{ "best_metric": null, "best_model_checkpoint": null, "epoch": 74.6268656716418, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03731343283582089, "grad_norm": 0.7508401274681091, "learning_rate": 1.0000000000000002e-06, "loss": 1.3845, "step": 10 }, { "epoch": 0.07462686567164178, "grad_norm": 0.5025657415390015, "learning_rate": 2.0000000000000003e-06, "loss": 1.4283, "step": 20 }, { "epoch": 0.11194029850746269, "grad_norm": 0.4894298315048218, "learning_rate": 3e-06, "loss": 1.4864, "step": 30 }, { "epoch": 0.14925373134328357, "grad_norm": 0.49390795826911926, "learning_rate": 4.000000000000001e-06, "loss": 1.4486, "step": 40 }, { "epoch": 0.1865671641791045, "grad_norm": 0.5841080546379089, "learning_rate": 5e-06, "loss": 1.37, "step": 50 }, { "epoch": 0.22388059701492538, "grad_norm": 0.5014635920524597, "learning_rate": 6e-06, "loss": 1.4189, "step": 60 }, { "epoch": 0.26119402985074625, "grad_norm": 0.6262728571891785, "learning_rate": 7.000000000000001e-06, "loss": 1.304, "step": 70 }, { "epoch": 0.29850746268656714, "grad_norm": 0.5421017408370972, "learning_rate": 8.000000000000001e-06, "loss": 1.2904, "step": 80 }, { "epoch": 0.3358208955223881, "grad_norm": 0.5474954843521118, "learning_rate": 9e-06, "loss": 1.2297, "step": 90 }, { "epoch": 0.373134328358209, "grad_norm": 0.8405944108963013, "learning_rate": 1e-05, "loss": 1.2028, "step": 100 }, { "epoch": 0.41044776119402987, "grad_norm": 0.7995213866233826, "learning_rate": 1.1000000000000001e-05, "loss": 1.0685, "step": 110 }, { "epoch": 0.44776119402985076, "grad_norm": 0.7961724400520325, "learning_rate": 1.2e-05, "loss": 0.9359, "step": 120 }, { "epoch": 0.48507462686567165, "grad_norm": 0.6919813752174377, "learning_rate": 1.3000000000000001e-05, "loss": 0.7111, "step": 130 }, { "epoch": 0.5223880597014925, "grad_norm": 0.600953996181488, "learning_rate": 1.4000000000000001e-05, "loss": 0.6458, "step": 140 }, { "epoch": 0.5597014925373134, "grad_norm": 0.4758719801902771, "learning_rate": 1.5e-05, "loss": 0.5448, "step": 150 }, { "epoch": 0.5970149253731343, "grad_norm": 0.46346014738082886, "learning_rate": 1.6000000000000003e-05, "loss": 0.4636, "step": 160 }, { "epoch": 0.6343283582089553, "grad_norm": 0.42867305874824524, "learning_rate": 1.7000000000000003e-05, "loss": 0.415, "step": 170 }, { "epoch": 0.6716417910447762, "grad_norm": 0.48248186707496643, "learning_rate": 1.8e-05, "loss": 0.4091, "step": 180 }, { "epoch": 0.7089552238805971, "grad_norm": 0.3397156596183777, "learning_rate": 1.9e-05, "loss": 0.3481, "step": 190 }, { "epoch": 0.746268656716418, "grad_norm": 0.3175753951072693, "learning_rate": 2e-05, "loss": 0.3818, "step": 200 }, { "epoch": 0.7835820895522388, "grad_norm": 0.26693665981292725, "learning_rate": 2.1e-05, "loss": 0.3607, "step": 210 }, { "epoch": 0.8208955223880597, "grad_norm": 0.3391198217868805, "learning_rate": 2.2000000000000003e-05, "loss": 0.3421, "step": 220 }, { "epoch": 0.8582089552238806, "grad_norm": 0.32470643520355225, "learning_rate": 2.3000000000000003e-05, "loss": 0.3389, "step": 230 }, { "epoch": 0.8955223880597015, "grad_norm": 0.30876028537750244, "learning_rate": 2.4e-05, "loss": 0.3148, "step": 240 }, { "epoch": 0.9328358208955224, "grad_norm": 0.28308507800102234, "learning_rate": 2.5e-05, "loss": 0.3067, "step": 250 }, { "epoch": 0.9701492537313433, "grad_norm": 0.32022032141685486, "learning_rate": 2.6000000000000002e-05, "loss": 0.2969, "step": 260 }, { "epoch": 1.007462686567164, "grad_norm": 0.38144516944885254, "learning_rate": 2.7000000000000002e-05, "loss": 0.3273, "step": 270 }, { "epoch": 1.044776119402985, "grad_norm": 0.37291163206100464, "learning_rate": 2.8000000000000003e-05, "loss": 0.3073, "step": 280 }, { "epoch": 1.0820895522388059, "grad_norm": 0.3465505540370941, "learning_rate": 2.9e-05, "loss": 0.3104, "step": 290 }, { "epoch": 1.1194029850746268, "grad_norm": 0.4177122414112091, "learning_rate": 3e-05, "loss": 0.264, "step": 300 }, { "epoch": 1.1567164179104479, "grad_norm": 0.36728623509407043, "learning_rate": 3.1e-05, "loss": 0.2849, "step": 310 }, { "epoch": 1.1940298507462686, "grad_norm": 0.3423154652118683, "learning_rate": 3.2000000000000005e-05, "loss": 0.2569, "step": 320 }, { "epoch": 1.2313432835820897, "grad_norm": 0.3784504532814026, "learning_rate": 3.3e-05, "loss": 0.2605, "step": 330 }, { "epoch": 1.2686567164179103, "grad_norm": 0.3675268590450287, "learning_rate": 3.4000000000000007e-05, "loss": 0.2307, "step": 340 }, { "epoch": 1.3059701492537314, "grad_norm": 0.45968109369277954, "learning_rate": 3.5e-05, "loss": 0.2422, "step": 350 }, { "epoch": 1.3432835820895521, "grad_norm": 0.5454893112182617, "learning_rate": 3.6e-05, "loss": 0.2194, "step": 360 }, { "epoch": 1.3805970149253732, "grad_norm": 0.4374641478061676, "learning_rate": 3.7e-05, "loss": 0.2308, "step": 370 }, { "epoch": 1.417910447761194, "grad_norm": 0.5563432574272156, "learning_rate": 3.8e-05, "loss": 0.2216, "step": 380 }, { "epoch": 1.455223880597015, "grad_norm": 0.6898565888404846, "learning_rate": 3.9000000000000006e-05, "loss": 0.2165, "step": 390 }, { "epoch": 1.4925373134328357, "grad_norm": 0.6653796434402466, "learning_rate": 4e-05, "loss": 0.2124, "step": 400 }, { "epoch": 1.5298507462686568, "grad_norm": 0.3551291525363922, "learning_rate": 4.1e-05, "loss": 0.2154, "step": 410 }, { "epoch": 1.5671641791044775, "grad_norm": 0.5818315744400024, "learning_rate": 4.2e-05, "loss": 0.1975, "step": 420 }, { "epoch": 1.6044776119402986, "grad_norm": 0.5571235418319702, "learning_rate": 4.3e-05, "loss": 0.2165, "step": 430 }, { "epoch": 1.6417910447761193, "grad_norm": 0.515515923500061, "learning_rate": 4.4000000000000006e-05, "loss": 0.1987, "step": 440 }, { "epoch": 1.6791044776119404, "grad_norm": 0.5274704694747925, "learning_rate": 4.5e-05, "loss": 0.2035, "step": 450 }, { "epoch": 1.716417910447761, "grad_norm": 0.42472219467163086, "learning_rate": 4.600000000000001e-05, "loss": 0.1867, "step": 460 }, { "epoch": 1.7537313432835822, "grad_norm": 0.43672966957092285, "learning_rate": 4.7e-05, "loss": 0.2053, "step": 470 }, { "epoch": 1.7910447761194028, "grad_norm": 0.5433230400085449, "learning_rate": 4.8e-05, "loss": 0.187, "step": 480 }, { "epoch": 1.828358208955224, "grad_norm": 0.9335751533508301, "learning_rate": 4.9e-05, "loss": 0.1824, "step": 490 }, { "epoch": 1.8656716417910446, "grad_norm": 0.4736366868019104, "learning_rate": 5e-05, "loss": 0.1715, "step": 500 }, { "epoch": 1.9029850746268657, "grad_norm": 0.4042463004589081, "learning_rate": 5.1000000000000006e-05, "loss": 0.1806, "step": 510 }, { "epoch": 1.9402985074626866, "grad_norm": 0.8714478015899658, "learning_rate": 5.2000000000000004e-05, "loss": 0.1699, "step": 520 }, { "epoch": 1.9776119402985075, "grad_norm": 0.48228025436401367, "learning_rate": 5.300000000000001e-05, "loss": 0.1789, "step": 530 }, { "epoch": 2.014925373134328, "grad_norm": 0.49646222591400146, "learning_rate": 5.4000000000000005e-05, "loss": 0.1765, "step": 540 }, { "epoch": 2.0522388059701493, "grad_norm": 0.6774762868881226, "learning_rate": 5.500000000000001e-05, "loss": 0.171, "step": 550 }, { "epoch": 2.08955223880597, "grad_norm": 0.6079558730125427, "learning_rate": 5.6000000000000006e-05, "loss": 0.1539, "step": 560 }, { "epoch": 2.126865671641791, "grad_norm": 0.7181674242019653, "learning_rate": 5.6999999999999996e-05, "loss": 0.1668, "step": 570 }, { "epoch": 2.1641791044776117, "grad_norm": 0.5435919165611267, "learning_rate": 5.8e-05, "loss": 0.1579, "step": 580 }, { "epoch": 2.201492537313433, "grad_norm": 1.0962722301483154, "learning_rate": 5.9e-05, "loss": 0.1542, "step": 590 }, { "epoch": 2.2388059701492535, "grad_norm": 0.5551506280899048, "learning_rate": 6e-05, "loss": 0.1631, "step": 600 }, { "epoch": 2.2761194029850746, "grad_norm": 0.6238372325897217, "learning_rate": 6.1e-05, "loss": 0.1663, "step": 610 }, { "epoch": 2.3134328358208958, "grad_norm": 0.46295881271362305, "learning_rate": 6.2e-05, "loss": 0.1463, "step": 620 }, { "epoch": 2.3507462686567164, "grad_norm": 0.39863941073417664, "learning_rate": 6.3e-05, "loss": 0.1507, "step": 630 }, { "epoch": 2.388059701492537, "grad_norm": 0.7588197588920593, "learning_rate": 6.400000000000001e-05, "loss": 0.1439, "step": 640 }, { "epoch": 2.425373134328358, "grad_norm": 0.32823893427848816, "learning_rate": 6.500000000000001e-05, "loss": 0.1582, "step": 650 }, { "epoch": 2.4626865671641793, "grad_norm": 0.5702382922172546, "learning_rate": 6.6e-05, "loss": 0.1473, "step": 660 }, { "epoch": 2.5, "grad_norm": 0.3852594494819641, "learning_rate": 6.7e-05, "loss": 0.1485, "step": 670 }, { "epoch": 2.5373134328358207, "grad_norm": 0.5872883796691895, "learning_rate": 6.800000000000001e-05, "loss": 0.1465, "step": 680 }, { "epoch": 2.574626865671642, "grad_norm": 0.4977574944496155, "learning_rate": 6.9e-05, "loss": 0.1371, "step": 690 }, { "epoch": 2.611940298507463, "grad_norm": 0.4708724617958069, "learning_rate": 7e-05, "loss": 0.1444, "step": 700 }, { "epoch": 2.6492537313432836, "grad_norm": 0.4347965717315674, "learning_rate": 7.1e-05, "loss": 0.1516, "step": 710 }, { "epoch": 2.6865671641791042, "grad_norm": 0.4048980474472046, "learning_rate": 7.2e-05, "loss": 0.1366, "step": 720 }, { "epoch": 2.7238805970149254, "grad_norm": 0.4389173090457916, "learning_rate": 7.3e-05, "loss": 0.1351, "step": 730 }, { "epoch": 2.7611940298507465, "grad_norm": 0.5517731308937073, "learning_rate": 7.4e-05, "loss": 0.1282, "step": 740 }, { "epoch": 2.798507462686567, "grad_norm": 0.608780026435852, "learning_rate": 7.500000000000001e-05, "loss": 0.1291, "step": 750 }, { "epoch": 2.835820895522388, "grad_norm": 0.4221258759498596, "learning_rate": 7.6e-05, "loss": 0.1098, "step": 760 }, { "epoch": 2.873134328358209, "grad_norm": 0.3673691749572754, "learning_rate": 7.7e-05, "loss": 0.1202, "step": 770 }, { "epoch": 2.91044776119403, "grad_norm": 0.5221970081329346, "learning_rate": 7.800000000000001e-05, "loss": 0.1233, "step": 780 }, { "epoch": 2.9477611940298507, "grad_norm": 0.35622647404670715, "learning_rate": 7.900000000000001e-05, "loss": 0.1189, "step": 790 }, { "epoch": 2.9850746268656714, "grad_norm": 0.40531468391418457, "learning_rate": 8e-05, "loss": 0.1193, "step": 800 }, { "epoch": 3.0223880597014925, "grad_norm": 0.43611589074134827, "learning_rate": 8.1e-05, "loss": 0.1293, "step": 810 }, { "epoch": 3.0597014925373136, "grad_norm": 0.5396864414215088, "learning_rate": 8.2e-05, "loss": 0.1164, "step": 820 }, { "epoch": 3.0970149253731343, "grad_norm": 0.4860381782054901, "learning_rate": 8.3e-05, "loss": 0.1186, "step": 830 }, { "epoch": 3.1343283582089554, "grad_norm": 0.4945438504219055, "learning_rate": 8.4e-05, "loss": 0.1154, "step": 840 }, { "epoch": 3.171641791044776, "grad_norm": 0.40868163108825684, "learning_rate": 8.5e-05, "loss": 0.1207, "step": 850 }, { "epoch": 3.208955223880597, "grad_norm": 0.4012243449687958, "learning_rate": 8.6e-05, "loss": 0.112, "step": 860 }, { "epoch": 3.246268656716418, "grad_norm": 0.447200745344162, "learning_rate": 8.7e-05, "loss": 0.1066, "step": 870 }, { "epoch": 3.283582089552239, "grad_norm": 0.4685825705528259, "learning_rate": 8.800000000000001e-05, "loss": 0.11, "step": 880 }, { "epoch": 3.3208955223880596, "grad_norm": 0.32511043548583984, "learning_rate": 8.900000000000001e-05, "loss": 0.1057, "step": 890 }, { "epoch": 3.3582089552238807, "grad_norm": 0.5183094143867493, "learning_rate": 9e-05, "loss": 0.1253, "step": 900 }, { "epoch": 3.3955223880597014, "grad_norm": 0.5203502178192139, "learning_rate": 9.1e-05, "loss": 0.1064, "step": 910 }, { "epoch": 3.4328358208955225, "grad_norm": 0.46369338035583496, "learning_rate": 9.200000000000001e-05, "loss": 0.1113, "step": 920 }, { "epoch": 3.470149253731343, "grad_norm": 0.4188336133956909, "learning_rate": 9.300000000000001e-05, "loss": 0.1052, "step": 930 }, { "epoch": 3.5074626865671643, "grad_norm": 0.5981103777885437, "learning_rate": 9.4e-05, "loss": 0.0975, "step": 940 }, { "epoch": 3.544776119402985, "grad_norm": 0.46332040429115295, "learning_rate": 9.5e-05, "loss": 0.1124, "step": 950 }, { "epoch": 3.582089552238806, "grad_norm": 0.38821789622306824, "learning_rate": 9.6e-05, "loss": 0.0999, "step": 960 }, { "epoch": 3.6194029850746268, "grad_norm": 0.4713737666606903, "learning_rate": 9.7e-05, "loss": 0.1004, "step": 970 }, { "epoch": 3.656716417910448, "grad_norm": 0.4151294231414795, "learning_rate": 9.8e-05, "loss": 0.106, "step": 980 }, { "epoch": 3.6940298507462686, "grad_norm": 0.49368739128112793, "learning_rate": 9.900000000000001e-05, "loss": 0.108, "step": 990 }, { "epoch": 3.7313432835820897, "grad_norm": 0.43807336688041687, "learning_rate": 0.0001, "loss": 0.1015, "step": 1000 }, { "epoch": 3.7686567164179103, "grad_norm": 0.3796832263469696, "learning_rate": 9.999993165095463e-05, "loss": 0.096, "step": 1010 }, { "epoch": 3.8059701492537314, "grad_norm": 0.40875181555747986, "learning_rate": 9.999972660400536e-05, "loss": 0.0993, "step": 1020 }, { "epoch": 3.843283582089552, "grad_norm": 0.5387396216392517, "learning_rate": 9.999938485971279e-05, "loss": 0.1049, "step": 1030 }, { "epoch": 3.8805970149253732, "grad_norm": 0.4900062680244446, "learning_rate": 9.999890641901125e-05, "loss": 0.1105, "step": 1040 }, { "epoch": 3.917910447761194, "grad_norm": 0.3660411536693573, "learning_rate": 9.999829128320874e-05, "loss": 0.0926, "step": 1050 }, { "epoch": 3.955223880597015, "grad_norm": 0.48555922508239746, "learning_rate": 9.999753945398704e-05, "loss": 0.1073, "step": 1060 }, { "epoch": 3.9925373134328357, "grad_norm": 0.46488624811172485, "learning_rate": 9.999665093340165e-05, "loss": 0.1022, "step": 1070 }, { "epoch": 4.029850746268656, "grad_norm": 0.4683173596858978, "learning_rate": 9.99956257238817e-05, "loss": 0.103, "step": 1080 }, { "epoch": 4.067164179104478, "grad_norm": 0.49612951278686523, "learning_rate": 9.999446382823013e-05, "loss": 0.0827, "step": 1090 }, { "epoch": 4.104477611940299, "grad_norm": 0.3614940643310547, "learning_rate": 9.999316524962345e-05, "loss": 0.0912, "step": 1100 }, { "epoch": 4.141791044776119, "grad_norm": 0.4512312114238739, "learning_rate": 9.999172999161198e-05, "loss": 0.09, "step": 1110 }, { "epoch": 4.17910447761194, "grad_norm": 0.41886571049690247, "learning_rate": 9.999015805811965e-05, "loss": 0.0921, "step": 1120 }, { "epoch": 4.2164179104477615, "grad_norm": 0.2905091643333435, "learning_rate": 9.998844945344405e-05, "loss": 0.0962, "step": 1130 }, { "epoch": 4.253731343283582, "grad_norm": 0.5822209715843201, "learning_rate": 9.998660418225645e-05, "loss": 0.0983, "step": 1140 }, { "epoch": 4.291044776119403, "grad_norm": 0.364227831363678, "learning_rate": 9.998462224960175e-05, "loss": 0.0966, "step": 1150 }, { "epoch": 4.3283582089552235, "grad_norm": 0.44611501693725586, "learning_rate": 9.998250366089848e-05, "loss": 0.0886, "step": 1160 }, { "epoch": 4.365671641791045, "grad_norm": 0.25390979647636414, "learning_rate": 9.998024842193876e-05, "loss": 0.0943, "step": 1170 }, { "epoch": 4.402985074626866, "grad_norm": 0.3032720386981964, "learning_rate": 9.997785653888835e-05, "loss": 0.0885, "step": 1180 }, { "epoch": 4.440298507462686, "grad_norm": 0.37883260846138, "learning_rate": 9.997532801828658e-05, "loss": 0.086, "step": 1190 }, { "epoch": 4.477611940298507, "grad_norm": 0.40362370014190674, "learning_rate": 9.997266286704631e-05, "loss": 0.0998, "step": 1200 }, { "epoch": 4.514925373134329, "grad_norm": 0.5272483229637146, "learning_rate": 9.996986109245395e-05, "loss": 0.0875, "step": 1210 }, { "epoch": 4.552238805970149, "grad_norm": 0.4320543110370636, "learning_rate": 9.996692270216947e-05, "loss": 0.079, "step": 1220 }, { "epoch": 4.58955223880597, "grad_norm": 0.45017650723457336, "learning_rate": 9.996384770422629e-05, "loss": 0.0892, "step": 1230 }, { "epoch": 4.6268656716417915, "grad_norm": 0.2561439871788025, "learning_rate": 9.996063610703137e-05, "loss": 0.0858, "step": 1240 }, { "epoch": 4.664179104477612, "grad_norm": 0.2821662724018097, "learning_rate": 9.995728791936504e-05, "loss": 0.09, "step": 1250 }, { "epoch": 4.701492537313433, "grad_norm": 0.4194805920124054, "learning_rate": 9.995380315038119e-05, "loss": 0.0847, "step": 1260 }, { "epoch": 4.7388059701492535, "grad_norm": 0.2861965000629425, "learning_rate": 9.9950181809607e-05, "loss": 0.0863, "step": 1270 }, { "epoch": 4.776119402985074, "grad_norm": 0.6766343116760254, "learning_rate": 9.994642390694308e-05, "loss": 0.0886, "step": 1280 }, { "epoch": 4.813432835820896, "grad_norm": 0.46337276697158813, "learning_rate": 9.99425294526634e-05, "loss": 0.0981, "step": 1290 }, { "epoch": 4.850746268656716, "grad_norm": 0.33428969979286194, "learning_rate": 9.993849845741524e-05, "loss": 0.0936, "step": 1300 }, { "epoch": 4.888059701492537, "grad_norm": 0.6151295900344849, "learning_rate": 9.99343309322192e-05, "loss": 0.0932, "step": 1310 }, { "epoch": 4.925373134328359, "grad_norm": 0.2884128987789154, "learning_rate": 9.993002688846913e-05, "loss": 0.0946, "step": 1320 }, { "epoch": 4.962686567164179, "grad_norm": 0.39723241329193115, "learning_rate": 9.992558633793212e-05, "loss": 0.0845, "step": 1330 }, { "epoch": 5.0, "grad_norm": 0.26714733242988586, "learning_rate": 9.992100929274846e-05, "loss": 0.0805, "step": 1340 }, { "epoch": 5.037313432835821, "grad_norm": 0.2622336447238922, "learning_rate": 9.991629576543163e-05, "loss": 0.0759, "step": 1350 }, { "epoch": 5.074626865671641, "grad_norm": 0.2800077199935913, "learning_rate": 9.991144576886823e-05, "loss": 0.0765, "step": 1360 }, { "epoch": 5.111940298507463, "grad_norm": 0.3822336792945862, "learning_rate": 9.990645931631796e-05, "loss": 0.0676, "step": 1370 }, { "epoch": 5.149253731343284, "grad_norm": 0.27242153882980347, "learning_rate": 9.990133642141359e-05, "loss": 0.0787, "step": 1380 }, { "epoch": 5.186567164179104, "grad_norm": 0.2604418992996216, "learning_rate": 9.989607709816091e-05, "loss": 0.0705, "step": 1390 }, { "epoch": 5.223880597014926, "grad_norm": 0.26919373869895935, "learning_rate": 9.989068136093873e-05, "loss": 0.0777, "step": 1400 }, { "epoch": 5.2611940298507465, "grad_norm": 0.3003658950328827, "learning_rate": 9.988514922449879e-05, "loss": 0.0743, "step": 1410 }, { "epoch": 5.298507462686567, "grad_norm": 0.3654123842716217, "learning_rate": 9.987948070396571e-05, "loss": 0.0779, "step": 1420 }, { "epoch": 5.335820895522388, "grad_norm": 0.6020807027816772, "learning_rate": 9.987367581483705e-05, "loss": 0.0831, "step": 1430 }, { "epoch": 5.373134328358209, "grad_norm": 0.2586672902107239, "learning_rate": 9.986773457298311e-05, "loss": 0.0745, "step": 1440 }, { "epoch": 5.41044776119403, "grad_norm": 0.4020416736602783, "learning_rate": 9.986165699464705e-05, "loss": 0.0744, "step": 1450 }, { "epoch": 5.447761194029851, "grad_norm": 0.3500658869743347, "learning_rate": 9.985544309644475e-05, "loss": 0.0822, "step": 1460 }, { "epoch": 5.485074626865671, "grad_norm": 0.3040224313735962, "learning_rate": 9.984909289536473e-05, "loss": 0.0728, "step": 1470 }, { "epoch": 5.522388059701493, "grad_norm": 0.315335214138031, "learning_rate": 9.984260640876821e-05, "loss": 0.0798, "step": 1480 }, { "epoch": 5.559701492537314, "grad_norm": 0.3270573019981384, "learning_rate": 9.983598365438902e-05, "loss": 0.0715, "step": 1490 }, { "epoch": 5.597014925373134, "grad_norm": 0.28146183490753174, "learning_rate": 9.98292246503335e-05, "loss": 0.0836, "step": 1500 }, { "epoch": 5.634328358208955, "grad_norm": 0.3723300099372864, "learning_rate": 9.98223294150805e-05, "loss": 0.0825, "step": 1510 }, { "epoch": 5.6716417910447765, "grad_norm": 0.262812077999115, "learning_rate": 9.981529796748134e-05, "loss": 0.0735, "step": 1520 }, { "epoch": 5.708955223880597, "grad_norm": 0.30341026186943054, "learning_rate": 9.980813032675974e-05, "loss": 0.082, "step": 1530 }, { "epoch": 5.746268656716418, "grad_norm": 0.29742923378944397, "learning_rate": 9.980082651251175e-05, "loss": 0.0827, "step": 1540 }, { "epoch": 5.7835820895522385, "grad_norm": 0.46010082960128784, "learning_rate": 9.979338654470569e-05, "loss": 0.0815, "step": 1550 }, { "epoch": 5.82089552238806, "grad_norm": 0.33839720487594604, "learning_rate": 9.97858104436822e-05, "loss": 0.069, "step": 1560 }, { "epoch": 5.858208955223881, "grad_norm": 0.3038853108882904, "learning_rate": 9.977809823015401e-05, "loss": 0.0712, "step": 1570 }, { "epoch": 5.895522388059701, "grad_norm": 0.27702105045318604, "learning_rate": 9.977024992520602e-05, "loss": 0.0767, "step": 1580 }, { "epoch": 5.932835820895522, "grad_norm": 0.2695387899875641, "learning_rate": 9.976226555029522e-05, "loss": 0.0752, "step": 1590 }, { "epoch": 5.970149253731344, "grad_norm": 0.22255517542362213, "learning_rate": 9.975414512725057e-05, "loss": 0.063, "step": 1600 }, { "epoch": 6.007462686567164, "grad_norm": 0.45302632451057434, "learning_rate": 9.974588867827301e-05, "loss": 0.0702, "step": 1610 }, { "epoch": 6.044776119402985, "grad_norm": 0.49717259407043457, "learning_rate": 9.973749622593534e-05, "loss": 0.0624, "step": 1620 }, { "epoch": 6.082089552238806, "grad_norm": 0.26096630096435547, "learning_rate": 9.972896779318219e-05, "loss": 0.0691, "step": 1630 }, { "epoch": 6.119402985074627, "grad_norm": 0.4464471638202667, "learning_rate": 9.972030340333001e-05, "loss": 0.0718, "step": 1640 }, { "epoch": 6.156716417910448, "grad_norm": 0.30284878611564636, "learning_rate": 9.97115030800669e-05, "loss": 0.0812, "step": 1650 }, { "epoch": 6.1940298507462686, "grad_norm": 0.2913932800292969, "learning_rate": 9.970256684745258e-05, "loss": 0.0638, "step": 1660 }, { "epoch": 6.231343283582089, "grad_norm": 0.296420156955719, "learning_rate": 9.969349472991838e-05, "loss": 0.0666, "step": 1670 }, { "epoch": 6.268656716417911, "grad_norm": 0.31396546959877014, "learning_rate": 9.968428675226714e-05, "loss": 0.0736, "step": 1680 }, { "epoch": 6.3059701492537314, "grad_norm": 0.37783142924308777, "learning_rate": 9.967494293967312e-05, "loss": 0.0723, "step": 1690 }, { "epoch": 6.343283582089552, "grad_norm": 0.4011059105396271, "learning_rate": 9.966546331768191e-05, "loss": 0.0648, "step": 1700 }, { "epoch": 6.380597014925373, "grad_norm": 0.30973032116889954, "learning_rate": 9.965584791221048e-05, "loss": 0.0729, "step": 1710 }, { "epoch": 6.417910447761194, "grad_norm": 0.2613247334957123, "learning_rate": 9.964609674954696e-05, "loss": 0.0716, "step": 1720 }, { "epoch": 6.455223880597015, "grad_norm": 0.6438387632369995, "learning_rate": 9.963620985635065e-05, "loss": 0.0753, "step": 1730 }, { "epoch": 6.492537313432836, "grad_norm": 0.3322042226791382, "learning_rate": 9.962618725965196e-05, "loss": 0.0794, "step": 1740 }, { "epoch": 6.529850746268656, "grad_norm": 0.2935912311077118, "learning_rate": 9.961602898685226e-05, "loss": 0.071, "step": 1750 }, { "epoch": 6.567164179104478, "grad_norm": 0.5056408047676086, "learning_rate": 9.96057350657239e-05, "loss": 0.083, "step": 1760 }, { "epoch": 6.604477611940299, "grad_norm": 0.31863516569137573, "learning_rate": 9.959530552441005e-05, "loss": 0.0717, "step": 1770 }, { "epoch": 6.641791044776119, "grad_norm": 0.3507208824157715, "learning_rate": 9.95847403914247e-05, "loss": 0.0759, "step": 1780 }, { "epoch": 6.67910447761194, "grad_norm": 0.3215446174144745, "learning_rate": 9.95740396956525e-05, "loss": 0.0755, "step": 1790 }, { "epoch": 6.7164179104477615, "grad_norm": 0.24160413444042206, "learning_rate": 9.956320346634876e-05, "loss": 0.0741, "step": 1800 }, { "epoch": 6.753731343283582, "grad_norm": 0.3066968023777008, "learning_rate": 9.955223173313931e-05, "loss": 0.067, "step": 1810 }, { "epoch": 6.791044776119403, "grad_norm": 0.2755463719367981, "learning_rate": 9.954112452602045e-05, "loss": 0.0694, "step": 1820 }, { "epoch": 6.8283582089552235, "grad_norm": 0.3000190854072571, "learning_rate": 9.952988187535886e-05, "loss": 0.0695, "step": 1830 }, { "epoch": 6.865671641791045, "grad_norm": 0.23208996653556824, "learning_rate": 9.95185038118915e-05, "loss": 0.0668, "step": 1840 }, { "epoch": 6.902985074626866, "grad_norm": 0.2938457727432251, "learning_rate": 9.950699036672559e-05, "loss": 0.0657, "step": 1850 }, { "epoch": 6.940298507462686, "grad_norm": 0.32090750336647034, "learning_rate": 9.949534157133844e-05, "loss": 0.0705, "step": 1860 }, { "epoch": 6.977611940298507, "grad_norm": 0.36553671956062317, "learning_rate": 9.948355745757741e-05, "loss": 0.0742, "step": 1870 }, { "epoch": 7.014925373134329, "grad_norm": 0.3832223415374756, "learning_rate": 9.94716380576598e-05, "loss": 0.0698, "step": 1880 }, { "epoch": 7.052238805970149, "grad_norm": 0.31990405917167664, "learning_rate": 9.945958340417283e-05, "loss": 0.0599, "step": 1890 }, { "epoch": 7.08955223880597, "grad_norm": 0.2690334618091583, "learning_rate": 9.944739353007344e-05, "loss": 0.0708, "step": 1900 }, { "epoch": 7.126865671641791, "grad_norm": 0.18849220871925354, "learning_rate": 9.943506846868826e-05, "loss": 0.0689, "step": 1910 }, { "epoch": 7.164179104477612, "grad_norm": 0.25833049416542053, "learning_rate": 9.942260825371358e-05, "loss": 0.0629, "step": 1920 }, { "epoch": 7.201492537313433, "grad_norm": 0.3614785373210907, "learning_rate": 9.941001291921512e-05, "loss": 0.0672, "step": 1930 }, { "epoch": 7.2388059701492535, "grad_norm": 0.33907780051231384, "learning_rate": 9.939728249962807e-05, "loss": 0.0642, "step": 1940 }, { "epoch": 7.276119402985074, "grad_norm": 0.26343289017677307, "learning_rate": 9.938441702975689e-05, "loss": 0.0647, "step": 1950 }, { "epoch": 7.313432835820896, "grad_norm": 0.387750506401062, "learning_rate": 9.937141654477528e-05, "loss": 0.055, "step": 1960 }, { "epoch": 7.350746268656716, "grad_norm": 0.31991636753082275, "learning_rate": 9.93582810802261e-05, "loss": 0.0656, "step": 1970 }, { "epoch": 7.388059701492537, "grad_norm": 0.3139459490776062, "learning_rate": 9.934501067202117e-05, "loss": 0.0689, "step": 1980 }, { "epoch": 7.425373134328359, "grad_norm": 0.3058273196220398, "learning_rate": 9.93316053564413e-05, "loss": 0.0652, "step": 1990 }, { "epoch": 7.462686567164179, "grad_norm": 0.2753952741622925, "learning_rate": 9.931806517013612e-05, "loss": 0.0592, "step": 2000 }, { "epoch": 7.5, "grad_norm": 0.3458659052848816, "learning_rate": 9.930439015012396e-05, "loss": 0.0612, "step": 2010 }, { "epoch": 7.537313432835821, "grad_norm": 0.31007301807403564, "learning_rate": 9.929058033379181e-05, "loss": 0.0606, "step": 2020 }, { "epoch": 7.574626865671641, "grad_norm": 0.27114179730415344, "learning_rate": 9.927663575889521e-05, "loss": 0.0722, "step": 2030 }, { "epoch": 7.611940298507463, "grad_norm": 0.4493834674358368, "learning_rate": 9.926255646355804e-05, "loss": 0.0702, "step": 2040 }, { "epoch": 7.649253731343284, "grad_norm": 0.25212913751602173, "learning_rate": 9.92483424862726e-05, "loss": 0.0681, "step": 2050 }, { "epoch": 7.686567164179104, "grad_norm": 0.33337101340293884, "learning_rate": 9.923399386589933e-05, "loss": 0.0601, "step": 2060 }, { "epoch": 7.723880597014926, "grad_norm": 0.2920111417770386, "learning_rate": 9.921951064166684e-05, "loss": 0.062, "step": 2070 }, { "epoch": 7.7611940298507465, "grad_norm": 0.2890508472919464, "learning_rate": 9.92048928531717e-05, "loss": 0.0607, "step": 2080 }, { "epoch": 7.798507462686567, "grad_norm": 0.3640911877155304, "learning_rate": 9.919014054037836e-05, "loss": 0.0564, "step": 2090 }, { "epoch": 7.835820895522388, "grad_norm": 0.24525225162506104, "learning_rate": 9.917525374361912e-05, "loss": 0.0539, "step": 2100 }, { "epoch": 7.8731343283582085, "grad_norm": 0.3678736984729767, "learning_rate": 9.91602325035939e-05, "loss": 0.0616, "step": 2110 }, { "epoch": 7.91044776119403, "grad_norm": 0.288236528635025, "learning_rate": 9.914507686137019e-05, "loss": 0.06, "step": 2120 }, { "epoch": 7.947761194029851, "grad_norm": 0.28636282682418823, "learning_rate": 9.912978685838294e-05, "loss": 0.0659, "step": 2130 }, { "epoch": 7.985074626865671, "grad_norm": 0.23699085414409637, "learning_rate": 9.911436253643445e-05, "loss": 0.0574, "step": 2140 }, { "epoch": 8.022388059701493, "grad_norm": 0.2684183418750763, "learning_rate": 9.90988039376942e-05, "loss": 0.0713, "step": 2150 }, { "epoch": 8.059701492537313, "grad_norm": 0.35374128818511963, "learning_rate": 9.90831111046988e-05, "loss": 0.0555, "step": 2160 }, { "epoch": 8.097014925373134, "grad_norm": 0.21349263191223145, "learning_rate": 9.90672840803519e-05, "loss": 0.059, "step": 2170 }, { "epoch": 8.134328358208956, "grad_norm": 0.20757345855236053, "learning_rate": 9.905132290792394e-05, "loss": 0.0585, "step": 2180 }, { "epoch": 8.171641791044776, "grad_norm": 0.25567516684532166, "learning_rate": 9.903522763105218e-05, "loss": 0.0588, "step": 2190 }, { "epoch": 8.208955223880597, "grad_norm": 0.22086405754089355, "learning_rate": 9.901899829374047e-05, "loss": 0.0594, "step": 2200 }, { "epoch": 8.246268656716419, "grad_norm": 0.43868452310562134, "learning_rate": 9.900263494035921e-05, "loss": 0.0582, "step": 2210 }, { "epoch": 8.283582089552239, "grad_norm": 0.25639423727989197, "learning_rate": 9.89861376156452e-05, "loss": 0.0547, "step": 2220 }, { "epoch": 8.32089552238806, "grad_norm": 0.2649364173412323, "learning_rate": 9.896950636470147e-05, "loss": 0.0618, "step": 2230 }, { "epoch": 8.35820895522388, "grad_norm": 0.21102295815944672, "learning_rate": 9.895274123299723e-05, "loss": 0.0589, "step": 2240 }, { "epoch": 8.395522388059701, "grad_norm": 0.2980606257915497, "learning_rate": 9.893584226636772e-05, "loss": 0.0617, "step": 2250 }, { "epoch": 8.432835820895523, "grad_norm": 0.2732321321964264, "learning_rate": 9.891880951101407e-05, "loss": 0.0586, "step": 2260 }, { "epoch": 8.470149253731343, "grad_norm": 0.32485371828079224, "learning_rate": 9.890164301350318e-05, "loss": 0.0518, "step": 2270 }, { "epoch": 8.507462686567164, "grad_norm": 0.2222377061843872, "learning_rate": 9.888434282076758e-05, "loss": 0.0605, "step": 2280 }, { "epoch": 8.544776119402986, "grad_norm": 0.277919739484787, "learning_rate": 9.886690898010535e-05, "loss": 0.0627, "step": 2290 }, { "epoch": 8.582089552238806, "grad_norm": 0.29359808564186096, "learning_rate": 9.884934153917997e-05, "loss": 0.0544, "step": 2300 }, { "epoch": 8.619402985074627, "grad_norm": 0.36508169770240784, "learning_rate": 9.883164054602012e-05, "loss": 0.062, "step": 2310 }, { "epoch": 8.656716417910447, "grad_norm": 0.34163740277290344, "learning_rate": 9.881380604901964e-05, "loss": 0.0564, "step": 2320 }, { "epoch": 8.694029850746269, "grad_norm": 0.24460600316524506, "learning_rate": 9.879583809693738e-05, "loss": 0.0626, "step": 2330 }, { "epoch": 8.73134328358209, "grad_norm": 0.1899256408214569, "learning_rate": 9.877773673889701e-05, "loss": 0.0653, "step": 2340 }, { "epoch": 8.76865671641791, "grad_norm": 0.23108312487602234, "learning_rate": 9.8759502024387e-05, "loss": 0.0619, "step": 2350 }, { "epoch": 8.805970149253731, "grad_norm": 0.18633678555488586, "learning_rate": 9.87411340032603e-05, "loss": 0.0556, "step": 2360 }, { "epoch": 8.843283582089553, "grad_norm": 0.21805410087108612, "learning_rate": 9.872263272573443e-05, "loss": 0.0559, "step": 2370 }, { "epoch": 8.880597014925373, "grad_norm": 0.17395304143428802, "learning_rate": 9.870399824239117e-05, "loss": 0.0498, "step": 2380 }, { "epoch": 8.917910447761194, "grad_norm": 0.27328982949256897, "learning_rate": 9.868523060417646e-05, "loss": 0.0588, "step": 2390 }, { "epoch": 8.955223880597014, "grad_norm": 0.39257484674453735, "learning_rate": 9.86663298624003e-05, "loss": 0.0585, "step": 2400 }, { "epoch": 8.992537313432836, "grad_norm": 0.16544950008392334, "learning_rate": 9.864729606873663e-05, "loss": 0.0574, "step": 2410 }, { "epoch": 9.029850746268657, "grad_norm": 0.22838005423545837, "learning_rate": 9.862812927522309e-05, "loss": 0.0566, "step": 2420 }, { "epoch": 9.067164179104477, "grad_norm": 0.2713736593723297, "learning_rate": 9.860882953426099e-05, "loss": 0.0538, "step": 2430 }, { "epoch": 9.104477611940299, "grad_norm": 0.19470594823360443, "learning_rate": 9.858939689861506e-05, "loss": 0.0564, "step": 2440 }, { "epoch": 9.14179104477612, "grad_norm": 0.25927573442459106, "learning_rate": 9.856983142141339e-05, "loss": 0.06, "step": 2450 }, { "epoch": 9.17910447761194, "grad_norm": 0.2696501314640045, "learning_rate": 9.855013315614725e-05, "loss": 0.0586, "step": 2460 }, { "epoch": 9.216417910447761, "grad_norm": 0.2163892239332199, "learning_rate": 9.853030215667093e-05, "loss": 0.0596, "step": 2470 }, { "epoch": 9.253731343283581, "grad_norm": 0.23102402687072754, "learning_rate": 9.851033847720166e-05, "loss": 0.0557, "step": 2480 }, { "epoch": 9.291044776119403, "grad_norm": 0.24106517434120178, "learning_rate": 9.849024217231935e-05, "loss": 0.0558, "step": 2490 }, { "epoch": 9.328358208955224, "grad_norm": 0.24170808494091034, "learning_rate": 9.847001329696653e-05, "loss": 0.0528, "step": 2500 }, { "epoch": 9.365671641791044, "grad_norm": 0.196956068277359, "learning_rate": 9.844965190644817e-05, "loss": 0.0559, "step": 2510 }, { "epoch": 9.402985074626866, "grad_norm": 0.23282545804977417, "learning_rate": 9.842915805643155e-05, "loss": 0.055, "step": 2520 }, { "epoch": 9.440298507462687, "grad_norm": 0.2290041148662567, "learning_rate": 9.840853180294608e-05, "loss": 0.051, "step": 2530 }, { "epoch": 9.477611940298507, "grad_norm": 0.21659782528877258, "learning_rate": 9.838777320238312e-05, "loss": 0.0527, "step": 2540 }, { "epoch": 9.514925373134329, "grad_norm": 0.18197275698184967, "learning_rate": 9.836688231149592e-05, "loss": 0.0592, "step": 2550 }, { "epoch": 9.552238805970148, "grad_norm": 0.3147948384284973, "learning_rate": 9.834585918739936e-05, "loss": 0.0564, "step": 2560 }, { "epoch": 9.58955223880597, "grad_norm": 0.26726973056793213, "learning_rate": 9.832470388756987e-05, "loss": 0.0565, "step": 2570 }, { "epoch": 9.626865671641792, "grad_norm": 0.25267449021339417, "learning_rate": 9.830341646984521e-05, "loss": 0.0565, "step": 2580 }, { "epoch": 9.664179104477611, "grad_norm": 0.209847092628479, "learning_rate": 9.82819969924244e-05, "loss": 0.0574, "step": 2590 }, { "epoch": 9.701492537313433, "grad_norm": 0.27112171053886414, "learning_rate": 9.826044551386744e-05, "loss": 0.0525, "step": 2600 }, { "epoch": 9.738805970149254, "grad_norm": 0.2419143170118332, "learning_rate": 9.823876209309527e-05, "loss": 0.0567, "step": 2610 }, { "epoch": 9.776119402985074, "grad_norm": 0.29285240173339844, "learning_rate": 9.821694678938953e-05, "loss": 0.0561, "step": 2620 }, { "epoch": 9.813432835820896, "grad_norm": 0.21428731083869934, "learning_rate": 9.819499966239243e-05, "loss": 0.0509, "step": 2630 }, { "epoch": 9.850746268656717, "grad_norm": 0.32968929409980774, "learning_rate": 9.817292077210659e-05, "loss": 0.0498, "step": 2640 }, { "epoch": 9.888059701492537, "grad_norm": 0.258206307888031, "learning_rate": 9.815071017889482e-05, "loss": 0.0525, "step": 2650 }, { "epoch": 9.925373134328359, "grad_norm": 0.2041812539100647, "learning_rate": 9.812836794348004e-05, "loss": 0.0628, "step": 2660 }, { "epoch": 9.962686567164178, "grad_norm": 0.35019591450691223, "learning_rate": 9.81058941269451e-05, "loss": 0.059, "step": 2670 }, { "epoch": 10.0, "grad_norm": 0.3348381519317627, "learning_rate": 9.808328879073251e-05, "loss": 0.0544, "step": 2680 }, { "epoch": 10.037313432835822, "grad_norm": 0.23715277016162872, "learning_rate": 9.806055199664446e-05, "loss": 0.0505, "step": 2690 }, { "epoch": 10.074626865671641, "grad_norm": 0.29319465160369873, "learning_rate": 9.803768380684242e-05, "loss": 0.0566, "step": 2700 }, { "epoch": 10.111940298507463, "grad_norm": 0.25703921914100647, "learning_rate": 9.801468428384716e-05, "loss": 0.0556, "step": 2710 }, { "epoch": 10.149253731343283, "grad_norm": 0.24589422345161438, "learning_rate": 9.799155349053851e-05, "loss": 0.0536, "step": 2720 }, { "epoch": 10.186567164179104, "grad_norm": 0.19085493683815002, "learning_rate": 9.796829149015517e-05, "loss": 0.0563, "step": 2730 }, { "epoch": 10.223880597014926, "grad_norm": 0.2258228361606598, "learning_rate": 9.794489834629455e-05, "loss": 0.0583, "step": 2740 }, { "epoch": 10.261194029850746, "grad_norm": 0.2357153296470642, "learning_rate": 9.792137412291265e-05, "loss": 0.0485, "step": 2750 }, { "epoch": 10.298507462686567, "grad_norm": 0.1982278823852539, "learning_rate": 9.789771888432375e-05, "loss": 0.0529, "step": 2760 }, { "epoch": 10.335820895522389, "grad_norm": 0.27283474802970886, "learning_rate": 9.787393269520039e-05, "loss": 0.0474, "step": 2770 }, { "epoch": 10.373134328358208, "grad_norm": 0.2752985656261444, "learning_rate": 9.785001562057309e-05, "loss": 0.0522, "step": 2780 }, { "epoch": 10.41044776119403, "grad_norm": 0.19233155250549316, "learning_rate": 9.782596772583026e-05, "loss": 0.05, "step": 2790 }, { "epoch": 10.447761194029852, "grad_norm": 0.31116774678230286, "learning_rate": 9.780178907671789e-05, "loss": 0.0539, "step": 2800 }, { "epoch": 10.485074626865671, "grad_norm": 0.16201527416706085, "learning_rate": 9.777747973933948e-05, "loss": 0.0569, "step": 2810 }, { "epoch": 10.522388059701493, "grad_norm": 0.17473511397838593, "learning_rate": 9.775303978015585e-05, "loss": 0.0437, "step": 2820 }, { "epoch": 10.559701492537313, "grad_norm": 0.23483754694461823, "learning_rate": 9.772846926598491e-05, "loss": 0.0586, "step": 2830 }, { "epoch": 10.597014925373134, "grad_norm": 0.20281587541103363, "learning_rate": 9.77037682640015e-05, "loss": 0.0501, "step": 2840 }, { "epoch": 10.634328358208956, "grad_norm": 0.18468862771987915, "learning_rate": 9.767893684173721e-05, "loss": 0.047, "step": 2850 }, { "epoch": 10.671641791044776, "grad_norm": 0.19350935518741608, "learning_rate": 9.765397506708023e-05, "loss": 0.0526, "step": 2860 }, { "epoch": 10.708955223880597, "grad_norm": 0.2308792918920517, "learning_rate": 9.762888300827507e-05, "loss": 0.0474, "step": 2870 }, { "epoch": 10.746268656716419, "grad_norm": 0.247776597738266, "learning_rate": 9.760366073392246e-05, "loss": 0.0503, "step": 2880 }, { "epoch": 10.783582089552239, "grad_norm": 0.23529237508773804, "learning_rate": 9.757830831297914e-05, "loss": 0.05, "step": 2890 }, { "epoch": 10.82089552238806, "grad_norm": 0.24974945187568665, "learning_rate": 9.755282581475769e-05, "loss": 0.0548, "step": 2900 }, { "epoch": 10.85820895522388, "grad_norm": 0.22085660696029663, "learning_rate": 9.752721330892624e-05, "loss": 0.061, "step": 2910 }, { "epoch": 10.895522388059701, "grad_norm": 0.18835818767547607, "learning_rate": 9.750147086550844e-05, "loss": 0.0481, "step": 2920 }, { "epoch": 10.932835820895523, "grad_norm": 0.4734271466732025, "learning_rate": 9.747559855488313e-05, "loss": 0.0492, "step": 2930 }, { "epoch": 10.970149253731343, "grad_norm": 0.23802787065505981, "learning_rate": 9.744959644778422e-05, "loss": 0.0522, "step": 2940 }, { "epoch": 11.007462686567164, "grad_norm": 0.25651365518569946, "learning_rate": 9.742346461530048e-05, "loss": 0.0479, "step": 2950 }, { "epoch": 11.044776119402986, "grad_norm": 0.3908090591430664, "learning_rate": 9.739720312887535e-05, "loss": 0.055, "step": 2960 }, { "epoch": 11.082089552238806, "grad_norm": 0.28512808680534363, "learning_rate": 9.73708120603067e-05, "loss": 0.0536, "step": 2970 }, { "epoch": 11.119402985074627, "grad_norm": 0.27310115098953247, "learning_rate": 9.734429148174675e-05, "loss": 0.0536, "step": 2980 }, { "epoch": 11.156716417910447, "grad_norm": 0.2793945074081421, "learning_rate": 9.731764146570173e-05, "loss": 0.0494, "step": 2990 }, { "epoch": 11.194029850746269, "grad_norm": 0.1958741396665573, "learning_rate": 9.729086208503174e-05, "loss": 0.0504, "step": 3000 }, { "epoch": 11.23134328358209, "grad_norm": 0.14222390949726105, "learning_rate": 9.726395341295062e-05, "loss": 0.05, "step": 3010 }, { "epoch": 11.26865671641791, "grad_norm": 0.19916026294231415, "learning_rate": 9.723691552302562e-05, "loss": 0.0579, "step": 3020 }, { "epoch": 11.305970149253731, "grad_norm": 0.2890547513961792, "learning_rate": 9.720974848917735e-05, "loss": 0.0496, "step": 3030 }, { "epoch": 11.343283582089553, "grad_norm": 0.287436306476593, "learning_rate": 9.718245238567939e-05, "loss": 0.0482, "step": 3040 }, { "epoch": 11.380597014925373, "grad_norm": 0.19894714653491974, "learning_rate": 9.715502728715826e-05, "loss": 0.049, "step": 3050 }, { "epoch": 11.417910447761194, "grad_norm": 0.35107964277267456, "learning_rate": 9.712747326859315e-05, "loss": 0.0555, "step": 3060 }, { "epoch": 11.455223880597014, "grad_norm": 0.18051770329475403, "learning_rate": 9.709979040531569e-05, "loss": 0.0554, "step": 3070 }, { "epoch": 11.492537313432836, "grad_norm": 0.27698877453804016, "learning_rate": 9.707197877300974e-05, "loss": 0.0474, "step": 3080 }, { "epoch": 11.529850746268657, "grad_norm": 0.24985937774181366, "learning_rate": 9.704403844771128e-05, "loss": 0.0519, "step": 3090 }, { "epoch": 11.567164179104477, "grad_norm": 0.2727425992488861, "learning_rate": 9.701596950580806e-05, "loss": 0.0516, "step": 3100 }, { "epoch": 11.604477611940299, "grad_norm": 0.38774731755256653, "learning_rate": 9.698777202403953e-05, "loss": 0.0543, "step": 3110 }, { "epoch": 11.64179104477612, "grad_norm": 0.1979246884584427, "learning_rate": 9.695944607949649e-05, "loss": 0.0589, "step": 3120 }, { "epoch": 11.67910447761194, "grad_norm": 0.20874737203121185, "learning_rate": 9.693099174962103e-05, "loss": 0.0521, "step": 3130 }, { "epoch": 11.716417910447761, "grad_norm": 0.18919429183006287, "learning_rate": 9.690240911220618e-05, "loss": 0.0525, "step": 3140 }, { "epoch": 11.753731343283581, "grad_norm": 0.1668858379125595, "learning_rate": 9.687369824539577e-05, "loss": 0.0491, "step": 3150 }, { "epoch": 11.791044776119403, "grad_norm": 0.18211665749549866, "learning_rate": 9.684485922768422e-05, "loss": 0.0422, "step": 3160 }, { "epoch": 11.828358208955224, "grad_norm": 0.3077932298183441, "learning_rate": 9.681589213791633e-05, "loss": 0.0531, "step": 3170 }, { "epoch": 11.865671641791044, "grad_norm": 0.2565005421638489, "learning_rate": 9.6786797055287e-05, "loss": 0.0469, "step": 3180 }, { "epoch": 11.902985074626866, "grad_norm": 0.20874324440956116, "learning_rate": 9.675757405934103e-05, "loss": 0.0446, "step": 3190 }, { "epoch": 11.940298507462687, "grad_norm": 0.37066107988357544, "learning_rate": 9.672822322997305e-05, "loss": 0.0592, "step": 3200 }, { "epoch": 11.977611940298507, "grad_norm": 0.23415519297122955, "learning_rate": 9.669874464742705e-05, "loss": 0.0477, "step": 3210 }, { "epoch": 12.014925373134329, "grad_norm": 0.24905860424041748, "learning_rate": 9.66691383922964e-05, "loss": 0.0497, "step": 3220 }, { "epoch": 12.052238805970148, "grad_norm": 0.189616858959198, "learning_rate": 9.663940454552342e-05, "loss": 0.0504, "step": 3230 }, { "epoch": 12.08955223880597, "grad_norm": 0.264218807220459, "learning_rate": 9.660954318839933e-05, "loss": 0.0413, "step": 3240 }, { "epoch": 12.126865671641792, "grad_norm": 0.23950912058353424, "learning_rate": 9.657955440256395e-05, "loss": 0.0443, "step": 3250 }, { "epoch": 12.164179104477611, "grad_norm": 0.24853253364562988, "learning_rate": 9.654943827000548e-05, "loss": 0.0549, "step": 3260 }, { "epoch": 12.201492537313433, "grad_norm": 0.25452083349227905, "learning_rate": 9.651919487306025e-05, "loss": 0.0484, "step": 3270 }, { "epoch": 12.238805970149254, "grad_norm": 0.18476618826389313, "learning_rate": 9.648882429441257e-05, "loss": 0.0505, "step": 3280 }, { "epoch": 12.276119402985074, "grad_norm": 0.19988073408603668, "learning_rate": 9.645832661709444e-05, "loss": 0.0493, "step": 3290 }, { "epoch": 12.313432835820896, "grad_norm": 0.18385781347751617, "learning_rate": 9.642770192448536e-05, "loss": 0.0437, "step": 3300 }, { "epoch": 12.350746268656717, "grad_norm": 0.16233357787132263, "learning_rate": 9.639695030031204e-05, "loss": 0.046, "step": 3310 }, { "epoch": 12.388059701492537, "grad_norm": 0.2317311316728592, "learning_rate": 9.636607182864827e-05, "loss": 0.0515, "step": 3320 }, { "epoch": 12.425373134328359, "grad_norm": 0.26591184735298157, "learning_rate": 9.63350665939146e-05, "loss": 0.0443, "step": 3330 }, { "epoch": 12.462686567164178, "grad_norm": 0.18448799848556519, "learning_rate": 9.630393468087818e-05, "loss": 0.0474, "step": 3340 }, { "epoch": 12.5, "grad_norm": 0.20527862012386322, "learning_rate": 9.627267617465243e-05, "loss": 0.049, "step": 3350 }, { "epoch": 12.537313432835822, "grad_norm": 0.22047220170497894, "learning_rate": 9.624129116069694e-05, "loss": 0.0446, "step": 3360 }, { "epoch": 12.574626865671641, "grad_norm": 0.19598756730556488, "learning_rate": 9.620977972481716e-05, "loss": 0.0574, "step": 3370 }, { "epoch": 12.611940298507463, "grad_norm": 0.2145535796880722, "learning_rate": 9.617814195316411e-05, "loss": 0.0511, "step": 3380 }, { "epoch": 12.649253731343283, "grad_norm": 0.31080666184425354, "learning_rate": 9.614637793223425e-05, "loss": 0.0559, "step": 3390 }, { "epoch": 12.686567164179104, "grad_norm": 0.2947694659233093, "learning_rate": 9.611448774886924e-05, "loss": 0.0539, "step": 3400 }, { "epoch": 12.723880597014926, "grad_norm": 0.13587631285190582, "learning_rate": 9.60824714902556e-05, "loss": 0.0525, "step": 3410 }, { "epoch": 12.761194029850746, "grad_norm": 0.2509562373161316, "learning_rate": 9.605032924392457e-05, "loss": 0.0499, "step": 3420 }, { "epoch": 12.798507462686567, "grad_norm": 0.2719811797142029, "learning_rate": 9.601806109775179e-05, "loss": 0.0474, "step": 3430 }, { "epoch": 12.835820895522389, "grad_norm": 0.21489158272743225, "learning_rate": 9.598566713995718e-05, "loss": 0.0434, "step": 3440 }, { "epoch": 12.873134328358208, "grad_norm": 0.18901456892490387, "learning_rate": 9.595314745910456e-05, "loss": 0.0539, "step": 3450 }, { "epoch": 12.91044776119403, "grad_norm": 0.18704549968242645, "learning_rate": 9.59205021441015e-05, "loss": 0.053, "step": 3460 }, { "epoch": 12.947761194029852, "grad_norm": 0.18295784294605255, "learning_rate": 9.588773128419906e-05, "loss": 0.0481, "step": 3470 }, { "epoch": 12.985074626865671, "grad_norm": 0.16313649713993073, "learning_rate": 9.58548349689915e-05, "loss": 0.0421, "step": 3480 }, { "epoch": 13.022388059701493, "grad_norm": 0.3223343789577484, "learning_rate": 9.582181328841611e-05, "loss": 0.0448, "step": 3490 }, { "epoch": 13.059701492537313, "grad_norm": 0.18513379991054535, "learning_rate": 9.578866633275288e-05, "loss": 0.0545, "step": 3500 }, { "epoch": 13.097014925373134, "grad_norm": 0.22825410962104797, "learning_rate": 9.575539419262434e-05, "loss": 0.0465, "step": 3510 }, { "epoch": 13.134328358208956, "grad_norm": 0.189821258187294, "learning_rate": 9.572199695899522e-05, "loss": 0.0457, "step": 3520 }, { "epoch": 13.171641791044776, "grad_norm": 0.1961260288953781, "learning_rate": 9.568847472317232e-05, "loss": 0.0447, "step": 3530 }, { "epoch": 13.208955223880597, "grad_norm": 0.19820477068424225, "learning_rate": 9.565482757680415e-05, "loss": 0.0459, "step": 3540 }, { "epoch": 13.246268656716419, "grad_norm": 0.15498116612434387, "learning_rate": 9.562105561188069e-05, "loss": 0.0496, "step": 3550 }, { "epoch": 13.283582089552239, "grad_norm": 0.33629029989242554, "learning_rate": 9.558715892073323e-05, "loss": 0.0494, "step": 3560 }, { "epoch": 13.32089552238806, "grad_norm": 0.2735806405544281, "learning_rate": 9.555313759603402e-05, "loss": 0.0548, "step": 3570 }, { "epoch": 13.35820895522388, "grad_norm": 0.213076651096344, "learning_rate": 9.551899173079607e-05, "loss": 0.0506, "step": 3580 }, { "epoch": 13.395522388059701, "grad_norm": 0.23117247223854065, "learning_rate": 9.548472141837286e-05, "loss": 0.0502, "step": 3590 }, { "epoch": 13.432835820895523, "grad_norm": 0.4022199809551239, "learning_rate": 9.545032675245813e-05, "loss": 0.0482, "step": 3600 }, { "epoch": 13.470149253731343, "grad_norm": 0.2162615954875946, "learning_rate": 9.541580782708557e-05, "loss": 0.045, "step": 3610 }, { "epoch": 13.507462686567164, "grad_norm": 0.2645283341407776, "learning_rate": 9.538116473662861e-05, "loss": 0.0475, "step": 3620 }, { "epoch": 13.544776119402986, "grad_norm": 0.2682011127471924, "learning_rate": 9.534639757580013e-05, "loss": 0.0486, "step": 3630 }, { "epoch": 13.582089552238806, "grad_norm": 0.19306032359600067, "learning_rate": 9.531150643965223e-05, "loss": 0.049, "step": 3640 }, { "epoch": 13.619402985074627, "grad_norm": 0.13654455542564392, "learning_rate": 9.527649142357596e-05, "loss": 0.0486, "step": 3650 }, { "epoch": 13.656716417910447, "grad_norm": 0.21462659537792206, "learning_rate": 9.524135262330098e-05, "loss": 0.0472, "step": 3660 }, { "epoch": 13.694029850746269, "grad_norm": 0.22151269018650055, "learning_rate": 9.520609013489547e-05, "loss": 0.0481, "step": 3670 }, { "epoch": 13.73134328358209, "grad_norm": 0.23680414259433746, "learning_rate": 9.517070405476575e-05, "loss": 0.0454, "step": 3680 }, { "epoch": 13.76865671641791, "grad_norm": 0.2769628167152405, "learning_rate": 9.513519447965595e-05, "loss": 0.0517, "step": 3690 }, { "epoch": 13.805970149253731, "grad_norm": 0.40475529432296753, "learning_rate": 9.509956150664796e-05, "loss": 0.0494, "step": 3700 }, { "epoch": 13.843283582089553, "grad_norm": 0.17483600974082947, "learning_rate": 9.50638052331609e-05, "loss": 0.0464, "step": 3710 }, { "epoch": 13.880597014925373, "grad_norm": 0.15413782000541687, "learning_rate": 9.502792575695112e-05, "loss": 0.0501, "step": 3720 }, { "epoch": 13.917910447761194, "grad_norm": 0.17406338453292847, "learning_rate": 9.499192317611167e-05, "loss": 0.0421, "step": 3730 }, { "epoch": 13.955223880597014, "grad_norm": 0.18288959562778473, "learning_rate": 9.49557975890723e-05, "loss": 0.0459, "step": 3740 }, { "epoch": 13.992537313432836, "grad_norm": 0.22260481119155884, "learning_rate": 9.491954909459895e-05, "loss": 0.0471, "step": 3750 }, { "epoch": 14.029850746268657, "grad_norm": 0.19988460838794708, "learning_rate": 9.488317779179361e-05, "loss": 0.0393, "step": 3760 }, { "epoch": 14.067164179104477, "grad_norm": 0.20259398221969604, "learning_rate": 9.484668378009408e-05, "loss": 0.0489, "step": 3770 }, { "epoch": 14.104477611940299, "grad_norm": 0.31647807359695435, "learning_rate": 9.481006715927351e-05, "loss": 0.0505, "step": 3780 }, { "epoch": 14.14179104477612, "grad_norm": 0.29236042499542236, "learning_rate": 9.477332802944044e-05, "loss": 0.0415, "step": 3790 }, { "epoch": 14.17910447761194, "grad_norm": 0.2563629448413849, "learning_rate": 9.473646649103818e-05, "loss": 0.0466, "step": 3800 }, { "epoch": 14.216417910447761, "grad_norm": 0.22838079929351807, "learning_rate": 9.46994826448448e-05, "loss": 0.0434, "step": 3810 }, { "epoch": 14.253731343283581, "grad_norm": 0.21613836288452148, "learning_rate": 9.46623765919727e-05, "loss": 0.0508, "step": 3820 }, { "epoch": 14.291044776119403, "grad_norm": 0.22012412548065186, "learning_rate": 9.462514843386845e-05, "loss": 0.0533, "step": 3830 }, { "epoch": 14.328358208955224, "grad_norm": 0.17455364763736725, "learning_rate": 9.458779827231237e-05, "loss": 0.0431, "step": 3840 }, { "epoch": 14.365671641791044, "grad_norm": 0.16018253564834595, "learning_rate": 9.45503262094184e-05, "loss": 0.0457, "step": 3850 }, { "epoch": 14.402985074626866, "grad_norm": 0.18833895027637482, "learning_rate": 9.451273234763371e-05, "loss": 0.0478, "step": 3860 }, { "epoch": 14.440298507462687, "grad_norm": 0.14888213574886322, "learning_rate": 9.447501678973852e-05, "loss": 0.0495, "step": 3870 }, { "epoch": 14.477611940298507, "grad_norm": 0.1761079728603363, "learning_rate": 9.443717963884569e-05, "loss": 0.0478, "step": 3880 }, { "epoch": 14.514925373134329, "grad_norm": 0.23822376132011414, "learning_rate": 9.439922099840054e-05, "loss": 0.0388, "step": 3890 }, { "epoch": 14.552238805970148, "grad_norm": 0.1618645191192627, "learning_rate": 9.43611409721806e-05, "loss": 0.0487, "step": 3900 }, { "epoch": 14.58955223880597, "grad_norm": 0.28659340739250183, "learning_rate": 9.432293966429514e-05, "loss": 0.0456, "step": 3910 }, { "epoch": 14.626865671641792, "grad_norm": 0.20425352454185486, "learning_rate": 9.428461717918511e-05, "loss": 0.0462, "step": 3920 }, { "epoch": 14.664179104477611, "grad_norm": 0.21112553775310516, "learning_rate": 9.424617362162271e-05, "loss": 0.0476, "step": 3930 }, { "epoch": 14.701492537313433, "grad_norm": 0.2742198407649994, "learning_rate": 9.420760909671118e-05, "loss": 0.0504, "step": 3940 }, { "epoch": 14.738805970149254, "grad_norm": 0.2715354859828949, "learning_rate": 9.416892370988444e-05, "loss": 0.0492, "step": 3950 }, { "epoch": 14.776119402985074, "grad_norm": 0.19375450909137726, "learning_rate": 9.413011756690685e-05, "loss": 0.0455, "step": 3960 }, { "epoch": 14.813432835820896, "grad_norm": 0.2033739686012268, "learning_rate": 9.409119077387294e-05, "loss": 0.0469, "step": 3970 }, { "epoch": 14.850746268656717, "grad_norm": 0.23652705550193787, "learning_rate": 9.405214343720707e-05, "loss": 0.0412, "step": 3980 }, { "epoch": 14.888059701492537, "grad_norm": 0.23133769631385803, "learning_rate": 9.401297566366318e-05, "loss": 0.0441, "step": 3990 }, { "epoch": 14.925373134328359, "grad_norm": 0.18082749843597412, "learning_rate": 9.397368756032445e-05, "loss": 0.0426, "step": 4000 }, { "epoch": 14.962686567164178, "grad_norm": 0.26076459884643555, "learning_rate": 9.393427923460308e-05, "loss": 0.0467, "step": 4010 }, { "epoch": 15.0, "grad_norm": 0.34363821148872375, "learning_rate": 9.389475079423988e-05, "loss": 0.0438, "step": 4020 }, { "epoch": 15.037313432835822, "grad_norm": 0.19497573375701904, "learning_rate": 9.385510234730415e-05, "loss": 0.0454, "step": 4030 }, { "epoch": 15.074626865671641, "grad_norm": 0.16470521688461304, "learning_rate": 9.381533400219318e-05, "loss": 0.0472, "step": 4040 }, { "epoch": 15.111940298507463, "grad_norm": 0.21470138430595398, "learning_rate": 9.377544586763215e-05, "loss": 0.0434, "step": 4050 }, { "epoch": 15.149253731343283, "grad_norm": 0.18492168188095093, "learning_rate": 9.373543805267368e-05, "loss": 0.0441, "step": 4060 }, { "epoch": 15.186567164179104, "grad_norm": 0.15961965918540955, "learning_rate": 9.369531066669758e-05, "loss": 0.0475, "step": 4070 }, { "epoch": 15.223880597014926, "grad_norm": 0.1778179258108139, "learning_rate": 9.365506381941066e-05, "loss": 0.0376, "step": 4080 }, { "epoch": 15.261194029850746, "grad_norm": 0.23423032462596893, "learning_rate": 9.36146976208462e-05, "loss": 0.0419, "step": 4090 }, { "epoch": 15.298507462686567, "grad_norm": 0.2188444882631302, "learning_rate": 9.357421218136386e-05, "loss": 0.0462, "step": 4100 }, { "epoch": 15.335820895522389, "grad_norm": 0.21141092479228973, "learning_rate": 9.353360761164931e-05, "loss": 0.0446, "step": 4110 }, { "epoch": 15.373134328358208, "grad_norm": 0.27159467339515686, "learning_rate": 9.349288402271388e-05, "loss": 0.0407, "step": 4120 }, { "epoch": 15.41044776119403, "grad_norm": 0.22593452036380768, "learning_rate": 9.345204152589428e-05, "loss": 0.0463, "step": 4130 }, { "epoch": 15.447761194029852, "grad_norm": 0.3661549985408783, "learning_rate": 9.341108023285238e-05, "loss": 0.0422, "step": 4140 }, { "epoch": 15.485074626865671, "grad_norm": 0.24105341732501984, "learning_rate": 9.337000025557476e-05, "loss": 0.0481, "step": 4150 }, { "epoch": 15.522388059701493, "grad_norm": 0.23946453630924225, "learning_rate": 9.332880170637252e-05, "loss": 0.0392, "step": 4160 }, { "epoch": 15.559701492537313, "grad_norm": 0.1917007863521576, "learning_rate": 9.328748469788093e-05, "loss": 0.0433, "step": 4170 }, { "epoch": 15.597014925373134, "grad_norm": 0.18548420071601868, "learning_rate": 9.32460493430591e-05, "loss": 0.0454, "step": 4180 }, { "epoch": 15.634328358208956, "grad_norm": 0.16716283559799194, "learning_rate": 9.320449575518972e-05, "loss": 0.0476, "step": 4190 }, { "epoch": 15.671641791044776, "grad_norm": 0.19687828421592712, "learning_rate": 9.316282404787871e-05, "loss": 0.0441, "step": 4200 }, { "epoch": 15.708955223880597, "grad_norm": 0.17828111350536346, "learning_rate": 9.31210343350549e-05, "loss": 0.0427, "step": 4210 }, { "epoch": 15.746268656716419, "grad_norm": 0.2108127772808075, "learning_rate": 9.30791267309698e-05, "loss": 0.0442, "step": 4220 }, { "epoch": 15.783582089552239, "grad_norm": 0.14861363172531128, "learning_rate": 9.30371013501972e-05, "loss": 0.0416, "step": 4230 }, { "epoch": 15.82089552238806, "grad_norm": 0.2654837965965271, "learning_rate": 9.299495830763286e-05, "loss": 0.043, "step": 4240 }, { "epoch": 15.85820895522388, "grad_norm": 0.19535772502422333, "learning_rate": 9.295269771849427e-05, "loss": 0.0484, "step": 4250 }, { "epoch": 15.895522388059701, "grad_norm": 0.20797879993915558, "learning_rate": 9.291031969832026e-05, "loss": 0.0497, "step": 4260 }, { "epoch": 15.932835820895523, "grad_norm": 0.14513571560382843, "learning_rate": 9.286782436297073e-05, "loss": 0.0403, "step": 4270 }, { "epoch": 15.970149253731343, "grad_norm": 0.20652088522911072, "learning_rate": 9.282521182862629e-05, "loss": 0.0409, "step": 4280 }, { "epoch": 16.007462686567163, "grad_norm": 0.2037963569164276, "learning_rate": 9.278248221178798e-05, "loss": 0.0434, "step": 4290 }, { "epoch": 16.044776119402986, "grad_norm": 0.2085045576095581, "learning_rate": 9.273963562927695e-05, "loss": 0.0461, "step": 4300 }, { "epoch": 16.082089552238806, "grad_norm": 0.18432487547397614, "learning_rate": 9.269667219823412e-05, "loss": 0.0396, "step": 4310 }, { "epoch": 16.119402985074625, "grad_norm": 0.16725443303585052, "learning_rate": 9.265359203611987e-05, "loss": 0.0431, "step": 4320 }, { "epoch": 16.15671641791045, "grad_norm": 0.25038379430770874, "learning_rate": 9.261039526071374e-05, "loss": 0.0451, "step": 4330 }, { "epoch": 16.19402985074627, "grad_norm": 0.20616401731967926, "learning_rate": 9.256708199011401e-05, "loss": 0.0374, "step": 4340 }, { "epoch": 16.23134328358209, "grad_norm": 0.22411657869815826, "learning_rate": 9.252365234273755e-05, "loss": 0.0379, "step": 4350 }, { "epoch": 16.26865671641791, "grad_norm": 0.25688955187797546, "learning_rate": 9.248010643731935e-05, "loss": 0.0416, "step": 4360 }, { "epoch": 16.30597014925373, "grad_norm": 0.2455955445766449, "learning_rate": 9.243644439291223e-05, "loss": 0.0389, "step": 4370 }, { "epoch": 16.34328358208955, "grad_norm": 0.11232680827379227, "learning_rate": 9.239266632888659e-05, "loss": 0.0382, "step": 4380 }, { "epoch": 16.380597014925375, "grad_norm": 0.19870124757289886, "learning_rate": 9.234877236492997e-05, "loss": 0.0461, "step": 4390 }, { "epoch": 16.417910447761194, "grad_norm": 0.13789547979831696, "learning_rate": 9.230476262104677e-05, "loss": 0.0466, "step": 4400 }, { "epoch": 16.455223880597014, "grad_norm": 0.19790180027484894, "learning_rate": 9.226063721755799e-05, "loss": 0.048, "step": 4410 }, { "epoch": 16.492537313432837, "grad_norm": 0.21846015751361847, "learning_rate": 9.221639627510076e-05, "loss": 0.0414, "step": 4420 }, { "epoch": 16.529850746268657, "grad_norm": 0.1878710389137268, "learning_rate": 9.217203991462815e-05, "loss": 0.0394, "step": 4430 }, { "epoch": 16.567164179104477, "grad_norm": 0.24988289177417755, "learning_rate": 9.212756825740873e-05, "loss": 0.0445, "step": 4440 }, { "epoch": 16.604477611940297, "grad_norm": 0.14291177690029144, "learning_rate": 9.208298142502636e-05, "loss": 0.0478, "step": 4450 }, { "epoch": 16.64179104477612, "grad_norm": 0.17774926126003265, "learning_rate": 9.20382795393797e-05, "loss": 0.0407, "step": 4460 }, { "epoch": 16.67910447761194, "grad_norm": 0.2323600798845291, "learning_rate": 9.199346272268199e-05, "loss": 0.0386, "step": 4470 }, { "epoch": 16.71641791044776, "grad_norm": 0.19065214693546295, "learning_rate": 9.194853109746074e-05, "loss": 0.044, "step": 4480 }, { "epoch": 16.753731343283583, "grad_norm": 0.16950172185897827, "learning_rate": 9.190348478655724e-05, "loss": 0.0476, "step": 4490 }, { "epoch": 16.791044776119403, "grad_norm": 0.24277034401893616, "learning_rate": 9.185832391312644e-05, "loss": 0.042, "step": 4500 }, { "epoch": 16.828358208955223, "grad_norm": 0.1774102747440338, "learning_rate": 9.18130486006364e-05, "loss": 0.0467, "step": 4510 }, { "epoch": 16.865671641791046, "grad_norm": 0.32764968276023865, "learning_rate": 9.176765897286813e-05, "loss": 0.0435, "step": 4520 }, { "epoch": 16.902985074626866, "grad_norm": 0.20119547843933105, "learning_rate": 9.17221551539151e-05, "loss": 0.0424, "step": 4530 }, { "epoch": 16.940298507462686, "grad_norm": 0.2355215847492218, "learning_rate": 9.167653726818305e-05, "loss": 0.0399, "step": 4540 }, { "epoch": 16.97761194029851, "grad_norm": 0.18891693651676178, "learning_rate": 9.163080544038952e-05, "loss": 0.0443, "step": 4550 }, { "epoch": 17.01492537313433, "grad_norm": 0.23788677155971527, "learning_rate": 9.158495979556358e-05, "loss": 0.0377, "step": 4560 }, { "epoch": 17.05223880597015, "grad_norm": 0.22770725190639496, "learning_rate": 9.153900045904549e-05, "loss": 0.0422, "step": 4570 }, { "epoch": 17.08955223880597, "grad_norm": 0.1889490783214569, "learning_rate": 9.14929275564863e-05, "loss": 0.0388, "step": 4580 }, { "epoch": 17.12686567164179, "grad_norm": 0.17724867165088654, "learning_rate": 9.144674121384757e-05, "loss": 0.0454, "step": 4590 }, { "epoch": 17.16417910447761, "grad_norm": 0.19548280537128448, "learning_rate": 9.140044155740101e-05, "loss": 0.0355, "step": 4600 }, { "epoch": 17.20149253731343, "grad_norm": 0.19331221282482147, "learning_rate": 9.135402871372808e-05, "loss": 0.0464, "step": 4610 }, { "epoch": 17.238805970149254, "grad_norm": 0.15202298760414124, "learning_rate": 9.130750280971978e-05, "loss": 0.0375, "step": 4620 }, { "epoch": 17.276119402985074, "grad_norm": 0.15507031977176666, "learning_rate": 9.126086397257612e-05, "loss": 0.0407, "step": 4630 }, { "epoch": 17.313432835820894, "grad_norm": 0.18652965128421783, "learning_rate": 9.121411232980588e-05, "loss": 0.0385, "step": 4640 }, { "epoch": 17.350746268656717, "grad_norm": 0.23101507127285004, "learning_rate": 9.116724800922629e-05, "loss": 0.0412, "step": 4650 }, { "epoch": 17.388059701492537, "grad_norm": 0.18489989638328552, "learning_rate": 9.112027113896262e-05, "loss": 0.0515, "step": 4660 }, { "epoch": 17.425373134328357, "grad_norm": 0.1620045155286789, "learning_rate": 9.107318184744781e-05, "loss": 0.0447, "step": 4670 }, { "epoch": 17.46268656716418, "grad_norm": 0.1474292278289795, "learning_rate": 9.102598026342222e-05, "loss": 0.0407, "step": 4680 }, { "epoch": 17.5, "grad_norm": 0.18343228101730347, "learning_rate": 9.097866651593317e-05, "loss": 0.0427, "step": 4690 }, { "epoch": 17.53731343283582, "grad_norm": 0.2731589376926422, "learning_rate": 9.093124073433463e-05, "loss": 0.0465, "step": 4700 }, { "epoch": 17.574626865671643, "grad_norm": 0.19504539668560028, "learning_rate": 9.088370304828685e-05, "loss": 0.037, "step": 4710 }, { "epoch": 17.611940298507463, "grad_norm": 0.217920184135437, "learning_rate": 9.083605358775612e-05, "loss": 0.0436, "step": 4720 }, { "epoch": 17.649253731343283, "grad_norm": 0.2485762983560562, "learning_rate": 9.078829248301417e-05, "loss": 0.0427, "step": 4730 }, { "epoch": 17.686567164179106, "grad_norm": 0.2041824907064438, "learning_rate": 9.074041986463808e-05, "loss": 0.0391, "step": 4740 }, { "epoch": 17.723880597014926, "grad_norm": 0.12309489399194717, "learning_rate": 9.069243586350975e-05, "loss": 0.0384, "step": 4750 }, { "epoch": 17.761194029850746, "grad_norm": 0.2100760042667389, "learning_rate": 9.064434061081562e-05, "loss": 0.0417, "step": 4760 }, { "epoch": 17.798507462686565, "grad_norm": 0.20066949725151062, "learning_rate": 9.059613423804623e-05, "loss": 0.0421, "step": 4770 }, { "epoch": 17.83582089552239, "grad_norm": 0.1418900340795517, "learning_rate": 9.0547816876996e-05, "loss": 0.0424, "step": 4780 }, { "epoch": 17.87313432835821, "grad_norm": 0.16109533607959747, "learning_rate": 9.049938865976275e-05, "loss": 0.0434, "step": 4790 }, { "epoch": 17.91044776119403, "grad_norm": 0.15550030767917633, "learning_rate": 9.045084971874738e-05, "loss": 0.0381, "step": 4800 }, { "epoch": 17.94776119402985, "grad_norm": 0.20752525329589844, "learning_rate": 9.040220018665347e-05, "loss": 0.0414, "step": 4810 }, { "epoch": 17.98507462686567, "grad_norm": 0.1435004323720932, "learning_rate": 9.035344019648702e-05, "loss": 0.0417, "step": 4820 }, { "epoch": 18.02238805970149, "grad_norm": 0.25393566489219666, "learning_rate": 9.030456988155596e-05, "loss": 0.0394, "step": 4830 }, { "epoch": 18.059701492537314, "grad_norm": 0.23017063736915588, "learning_rate": 9.025558937546988e-05, "loss": 0.0478, "step": 4840 }, { "epoch": 18.097014925373134, "grad_norm": 0.18687143921852112, "learning_rate": 9.020649881213958e-05, "loss": 0.0396, "step": 4850 }, { "epoch": 18.134328358208954, "grad_norm": 0.17204895615577698, "learning_rate": 9.015729832577681e-05, "loss": 0.0384, "step": 4860 }, { "epoch": 18.171641791044777, "grad_norm": 0.16985014081001282, "learning_rate": 9.010798805089384e-05, "loss": 0.0436, "step": 4870 }, { "epoch": 18.208955223880597, "grad_norm": 0.18706278502941132, "learning_rate": 9.005856812230304e-05, "loss": 0.0451, "step": 4880 }, { "epoch": 18.246268656716417, "grad_norm": 0.24117930233478546, "learning_rate": 9.000903867511666e-05, "loss": 0.0424, "step": 4890 }, { "epoch": 18.28358208955224, "grad_norm": 0.22972428798675537, "learning_rate": 8.995939984474624e-05, "loss": 0.0466, "step": 4900 }, { "epoch": 18.32089552238806, "grad_norm": 0.24179382622241974, "learning_rate": 8.990965176690252e-05, "loss": 0.044, "step": 4910 }, { "epoch": 18.35820895522388, "grad_norm": 0.18473748862743378, "learning_rate": 8.98597945775948e-05, "loss": 0.0381, "step": 4920 }, { "epoch": 18.395522388059703, "grad_norm": 0.24605973064899445, "learning_rate": 8.980982841313074e-05, "loss": 0.0414, "step": 4930 }, { "epoch": 18.432835820895523, "grad_norm": 0.201685830950737, "learning_rate": 8.975975341011596e-05, "loss": 0.0405, "step": 4940 }, { "epoch": 18.470149253731343, "grad_norm": 0.1506536900997162, "learning_rate": 8.970956970545355e-05, "loss": 0.0422, "step": 4950 }, { "epoch": 18.507462686567163, "grad_norm": 0.16673414409160614, "learning_rate": 8.965927743634391e-05, "loss": 0.0422, "step": 4960 }, { "epoch": 18.544776119402986, "grad_norm": 0.21865007281303406, "learning_rate": 8.96088767402841e-05, "loss": 0.042, "step": 4970 }, { "epoch": 18.582089552238806, "grad_norm": 0.19373145699501038, "learning_rate": 8.955836775506776e-05, "loss": 0.047, "step": 4980 }, { "epoch": 18.619402985074625, "grad_norm": 0.1440393477678299, "learning_rate": 8.950775061878453e-05, "loss": 0.0351, "step": 4990 }, { "epoch": 18.65671641791045, "grad_norm": 0.2655850052833557, "learning_rate": 8.945702546981969e-05, "loss": 0.0404, "step": 5000 }, { "epoch": 18.69402985074627, "grad_norm": 0.25791651010513306, "learning_rate": 8.940619244685388e-05, "loss": 0.0371, "step": 5010 }, { "epoch": 18.73134328358209, "grad_norm": 0.1491573303937912, "learning_rate": 8.935525168886262e-05, "loss": 0.0444, "step": 5020 }, { "epoch": 18.76865671641791, "grad_norm": 0.1620822250843048, "learning_rate": 8.930420333511606e-05, "loss": 0.041, "step": 5030 }, { "epoch": 18.80597014925373, "grad_norm": 0.23092538118362427, "learning_rate": 8.92530475251784e-05, "loss": 0.038, "step": 5040 }, { "epoch": 18.84328358208955, "grad_norm": 0.18359890580177307, "learning_rate": 8.920178439890765e-05, "loss": 0.0441, "step": 5050 }, { "epoch": 18.880597014925375, "grad_norm": 0.15026937425136566, "learning_rate": 8.91504140964553e-05, "loss": 0.0375, "step": 5060 }, { "epoch": 18.917910447761194, "grad_norm": 0.15214800834655762, "learning_rate": 8.909893675826574e-05, "loss": 0.0401, "step": 5070 }, { "epoch": 18.955223880597014, "grad_norm": 0.17139840126037598, "learning_rate": 8.90473525250761e-05, "loss": 0.0369, "step": 5080 }, { "epoch": 18.992537313432837, "grad_norm": 0.21731972694396973, "learning_rate": 8.899566153791566e-05, "loss": 0.0433, "step": 5090 }, { "epoch": 19.029850746268657, "grad_norm": 0.2552855312824249, "learning_rate": 8.894386393810563e-05, "loss": 0.0488, "step": 5100 }, { "epoch": 19.067164179104477, "grad_norm": 0.22493542730808258, "learning_rate": 8.889195986725865e-05, "loss": 0.0379, "step": 5110 }, { "epoch": 19.104477611940297, "grad_norm": 0.16157718002796173, "learning_rate": 8.883994946727849e-05, "loss": 0.0486, "step": 5120 }, { "epoch": 19.14179104477612, "grad_norm": 0.15074793994426727, "learning_rate": 8.878783288035957e-05, "loss": 0.0392, "step": 5130 }, { "epoch": 19.17910447761194, "grad_norm": 0.14178630709648132, "learning_rate": 8.873561024898668e-05, "loss": 0.0372, "step": 5140 }, { "epoch": 19.21641791044776, "grad_norm": 0.12401127070188522, "learning_rate": 8.868328171593448e-05, "loss": 0.0374, "step": 5150 }, { "epoch": 19.253731343283583, "grad_norm": 0.10974446684122086, "learning_rate": 8.863084742426719e-05, "loss": 0.0423, "step": 5160 }, { "epoch": 19.291044776119403, "grad_norm": 0.1507917046546936, "learning_rate": 8.857830751733815e-05, "loss": 0.0329, "step": 5170 }, { "epoch": 19.328358208955223, "grad_norm": 0.1831761598587036, "learning_rate": 8.852566213878947e-05, "loss": 0.0369, "step": 5180 }, { "epoch": 19.365671641791046, "grad_norm": 0.1886865645647049, "learning_rate": 8.84729114325516e-05, "loss": 0.039, "step": 5190 }, { "epoch": 19.402985074626866, "grad_norm": 0.18840211629867554, "learning_rate": 8.842005554284296e-05, "loss": 0.0474, "step": 5200 }, { "epoch": 19.440298507462686, "grad_norm": 0.2779952585697174, "learning_rate": 8.836709461416952e-05, "loss": 0.0414, "step": 5210 }, { "epoch": 19.47761194029851, "grad_norm": 0.11072772741317749, "learning_rate": 8.831402879132446e-05, "loss": 0.0448, "step": 5220 }, { "epoch": 19.51492537313433, "grad_norm": 0.16083550453186035, "learning_rate": 8.82608582193877e-05, "loss": 0.0395, "step": 5230 }, { "epoch": 19.55223880597015, "grad_norm": 0.14879973232746124, "learning_rate": 8.820758304372557e-05, "loss": 0.0383, "step": 5240 }, { "epoch": 19.58955223880597, "grad_norm": 0.13045696914196014, "learning_rate": 8.815420340999033e-05, "loss": 0.0354, "step": 5250 }, { "epoch": 19.62686567164179, "grad_norm": 0.21806840598583221, "learning_rate": 8.810071946411989e-05, "loss": 0.0409, "step": 5260 }, { "epoch": 19.66417910447761, "grad_norm": 0.2159956991672516, "learning_rate": 8.804713135233731e-05, "loss": 0.0413, "step": 5270 }, { "epoch": 19.701492537313435, "grad_norm": 0.15228591859340668, "learning_rate": 8.799343922115044e-05, "loss": 0.0377, "step": 5280 }, { "epoch": 19.738805970149254, "grad_norm": 0.18940836191177368, "learning_rate": 8.79396432173515e-05, "loss": 0.0413, "step": 5290 }, { "epoch": 19.776119402985074, "grad_norm": 0.19273892045021057, "learning_rate": 8.788574348801675e-05, "loss": 0.0372, "step": 5300 }, { "epoch": 19.813432835820894, "grad_norm": 0.23204709589481354, "learning_rate": 8.783174018050594e-05, "loss": 0.0409, "step": 5310 }, { "epoch": 19.850746268656717, "grad_norm": 0.16158196330070496, "learning_rate": 8.77776334424621e-05, "loss": 0.0366, "step": 5320 }, { "epoch": 19.888059701492537, "grad_norm": 0.18316198885440826, "learning_rate": 8.772342342181095e-05, "loss": 0.0376, "step": 5330 }, { "epoch": 19.925373134328357, "grad_norm": 0.15048454701900482, "learning_rate": 8.766911026676064e-05, "loss": 0.0385, "step": 5340 }, { "epoch": 19.96268656716418, "grad_norm": 0.16826169192790985, "learning_rate": 8.761469412580125e-05, "loss": 0.0399, "step": 5350 }, { "epoch": 20.0, "grad_norm": 0.2978197932243347, "learning_rate": 8.756017514770443e-05, "loss": 0.045, "step": 5360 }, { "epoch": 20.03731343283582, "grad_norm": 0.17185112833976746, "learning_rate": 8.750555348152298e-05, "loss": 0.0396, "step": 5370 }, { "epoch": 20.074626865671643, "grad_norm": 0.18874827027320862, "learning_rate": 8.745082927659047e-05, "loss": 0.04, "step": 5380 }, { "epoch": 20.111940298507463, "grad_norm": 0.22542424499988556, "learning_rate": 8.739600268252078e-05, "loss": 0.0383, "step": 5390 }, { "epoch": 20.149253731343283, "grad_norm": 0.23777806758880615, "learning_rate": 8.73410738492077e-05, "loss": 0.0405, "step": 5400 }, { "epoch": 20.186567164179106, "grad_norm": 0.14633609354496002, "learning_rate": 8.728604292682459e-05, "loss": 0.0389, "step": 5410 }, { "epoch": 20.223880597014926, "grad_norm": 0.17431358993053436, "learning_rate": 8.723091006582389e-05, "loss": 0.0358, "step": 5420 }, { "epoch": 20.261194029850746, "grad_norm": 0.16431669890880585, "learning_rate": 8.717567541693673e-05, "loss": 0.0353, "step": 5430 }, { "epoch": 20.298507462686565, "grad_norm": 0.20893873274326324, "learning_rate": 8.71203391311725e-05, "loss": 0.0379, "step": 5440 }, { "epoch": 20.33582089552239, "grad_norm": 0.20465858280658722, "learning_rate": 8.706490135981855e-05, "loss": 0.0424, "step": 5450 }, { "epoch": 20.37313432835821, "grad_norm": 0.18439358472824097, "learning_rate": 8.700936225443959e-05, "loss": 0.0334, "step": 5460 }, { "epoch": 20.41044776119403, "grad_norm": 0.22949454188346863, "learning_rate": 8.695372196687743e-05, "loss": 0.0411, "step": 5470 }, { "epoch": 20.44776119402985, "grad_norm": 0.18346606194972992, "learning_rate": 8.689798064925049e-05, "loss": 0.0344, "step": 5480 }, { "epoch": 20.48507462686567, "grad_norm": 0.17839306592941284, "learning_rate": 8.684213845395339e-05, "loss": 0.0384, "step": 5490 }, { "epoch": 20.52238805970149, "grad_norm": 0.1716238111257553, "learning_rate": 8.678619553365659e-05, "loss": 0.0337, "step": 5500 }, { "epoch": 20.559701492537314, "grad_norm": 0.2150501161813736, "learning_rate": 8.673015204130586e-05, "loss": 0.0375, "step": 5510 }, { "epoch": 20.597014925373134, "grad_norm": 0.18077640235424042, "learning_rate": 8.6674008130122e-05, "loss": 0.0394, "step": 5520 }, { "epoch": 20.634328358208954, "grad_norm": 0.18519295752048492, "learning_rate": 8.661776395360029e-05, "loss": 0.0404, "step": 5530 }, { "epoch": 20.671641791044777, "grad_norm": 0.20758816599845886, "learning_rate": 8.656141966551019e-05, "loss": 0.041, "step": 5540 }, { "epoch": 20.708955223880597, "grad_norm": 0.19027139246463776, "learning_rate": 8.650497541989482e-05, "loss": 0.0356, "step": 5550 }, { "epoch": 20.746268656716417, "grad_norm": 0.1323348730802536, "learning_rate": 8.644843137107059e-05, "loss": 0.0397, "step": 5560 }, { "epoch": 20.78358208955224, "grad_norm": 0.21040759980678558, "learning_rate": 8.639178767362676e-05, "loss": 0.0374, "step": 5570 }, { "epoch": 20.82089552238806, "grad_norm": 0.1928853690624237, "learning_rate": 8.633504448242505e-05, "loss": 0.0346, "step": 5580 }, { "epoch": 20.85820895522388, "grad_norm": 0.17996659874916077, "learning_rate": 8.627820195259918e-05, "loss": 0.0399, "step": 5590 }, { "epoch": 20.895522388059703, "grad_norm": 0.16209149360656738, "learning_rate": 8.622126023955446e-05, "loss": 0.0429, "step": 5600 }, { "epoch": 20.932835820895523, "grad_norm": 0.301158607006073, "learning_rate": 8.616421949896734e-05, "loss": 0.0445, "step": 5610 }, { "epoch": 20.970149253731343, "grad_norm": 0.16108687222003937, "learning_rate": 8.610707988678503e-05, "loss": 0.0404, "step": 5620 }, { "epoch": 21.007462686567163, "grad_norm": 0.1699988692998886, "learning_rate": 8.604984155922506e-05, "loss": 0.0394, "step": 5630 }, { "epoch": 21.044776119402986, "grad_norm": 0.2011076956987381, "learning_rate": 8.599250467277483e-05, "loss": 0.0381, "step": 5640 }, { "epoch": 21.082089552238806, "grad_norm": 0.17981526255607605, "learning_rate": 8.59350693841912e-05, "loss": 0.0404, "step": 5650 }, { "epoch": 21.119402985074625, "grad_norm": 0.15376441180706024, "learning_rate": 8.587753585050004e-05, "loss": 0.0365, "step": 5660 }, { "epoch": 21.15671641791045, "grad_norm": 0.25023868680000305, "learning_rate": 8.581990422899585e-05, "loss": 0.0372, "step": 5670 }, { "epoch": 21.19402985074627, "grad_norm": 0.1637163758277893, "learning_rate": 8.576217467724128e-05, "loss": 0.0407, "step": 5680 }, { "epoch": 21.23134328358209, "grad_norm": 0.10038904845714569, "learning_rate": 8.570434735306671e-05, "loss": 0.0345, "step": 5690 }, { "epoch": 21.26865671641791, "grad_norm": 0.14758236706256866, "learning_rate": 8.564642241456986e-05, "loss": 0.0429, "step": 5700 }, { "epoch": 21.30597014925373, "grad_norm": 0.1543818861246109, "learning_rate": 8.558840002011528e-05, "loss": 0.0345, "step": 5710 }, { "epoch": 21.34328358208955, "grad_norm": 0.15707038342952728, "learning_rate": 8.553028032833397e-05, "loss": 0.0431, "step": 5720 }, { "epoch": 21.380597014925375, "grad_norm": 0.1659613400697708, "learning_rate": 8.547206349812298e-05, "loss": 0.0436, "step": 5730 }, { "epoch": 21.417910447761194, "grad_norm": 0.2591764032840729, "learning_rate": 8.541374968864487e-05, "loss": 0.041, "step": 5740 }, { "epoch": 21.455223880597014, "grad_norm": 0.18910576403141022, "learning_rate": 8.535533905932738e-05, "loss": 0.0367, "step": 5750 }, { "epoch": 21.492537313432837, "grad_norm": 0.24122141301631927, "learning_rate": 8.529683176986295e-05, "loss": 0.0341, "step": 5760 }, { "epoch": 21.529850746268657, "grad_norm": 0.1295759379863739, "learning_rate": 8.523822798020827e-05, "loss": 0.0406, "step": 5770 }, { "epoch": 21.567164179104477, "grad_norm": 0.2030249983072281, "learning_rate": 8.517952785058385e-05, "loss": 0.0391, "step": 5780 }, { "epoch": 21.604477611940297, "grad_norm": 0.2473856657743454, "learning_rate": 8.512073154147362e-05, "loss": 0.0381, "step": 5790 }, { "epoch": 21.64179104477612, "grad_norm": 0.15921558439731598, "learning_rate": 8.506183921362443e-05, "loss": 0.0391, "step": 5800 }, { "epoch": 21.67910447761194, "grad_norm": 0.12598709762096405, "learning_rate": 8.500285102804568e-05, "loss": 0.0379, "step": 5810 }, { "epoch": 21.71641791044776, "grad_norm": 0.15903788805007935, "learning_rate": 8.494376714600878e-05, "loss": 0.0326, "step": 5820 }, { "epoch": 21.753731343283583, "grad_norm": 0.17107808589935303, "learning_rate": 8.488458772904684e-05, "loss": 0.0362, "step": 5830 }, { "epoch": 21.791044776119403, "grad_norm": 0.14881531894207, "learning_rate": 8.482531293895412e-05, "loss": 0.0378, "step": 5840 }, { "epoch": 21.828358208955223, "grad_norm": 0.1906944066286087, "learning_rate": 8.476594293778561e-05, "loss": 0.0407, "step": 5850 }, { "epoch": 21.865671641791046, "grad_norm": 0.09388154000043869, "learning_rate": 8.470647788785665e-05, "loss": 0.0392, "step": 5860 }, { "epoch": 21.902985074626866, "grad_norm": 0.2006390541791916, "learning_rate": 8.46469179517424e-05, "loss": 0.0407, "step": 5870 }, { "epoch": 21.940298507462686, "grad_norm": 0.20930394530296326, "learning_rate": 8.458726329227747e-05, "loss": 0.0386, "step": 5880 }, { "epoch": 21.97761194029851, "grad_norm": 0.16148430109024048, "learning_rate": 8.452751407255541e-05, "loss": 0.0399, "step": 5890 }, { "epoch": 22.01492537313433, "grad_norm": 0.1830778419971466, "learning_rate": 8.44676704559283e-05, "loss": 0.0367, "step": 5900 }, { "epoch": 22.05223880597015, "grad_norm": 0.15862196683883667, "learning_rate": 8.44077326060063e-05, "loss": 0.0396, "step": 5910 }, { "epoch": 22.08955223880597, "grad_norm": 0.2121029645204544, "learning_rate": 8.434770068665723e-05, "loss": 0.0419, "step": 5920 }, { "epoch": 22.12686567164179, "grad_norm": 0.10081466287374496, "learning_rate": 8.428757486200603e-05, "loss": 0.0353, "step": 5930 }, { "epoch": 22.16417910447761, "grad_norm": 0.11492795497179031, "learning_rate": 8.422735529643444e-05, "loss": 0.0351, "step": 5940 }, { "epoch": 22.20149253731343, "grad_norm": 0.2412245273590088, "learning_rate": 8.416704215458043e-05, "loss": 0.0306, "step": 5950 }, { "epoch": 22.238805970149254, "grad_norm": 0.1803082972764969, "learning_rate": 8.410663560133784e-05, "loss": 0.0362, "step": 5960 }, { "epoch": 22.276119402985074, "grad_norm": 0.15566259622573853, "learning_rate": 8.404613580185585e-05, "loss": 0.0325, "step": 5970 }, { "epoch": 22.313432835820894, "grad_norm": 0.12272784113883972, "learning_rate": 8.398554292153866e-05, "loss": 0.0336, "step": 5980 }, { "epoch": 22.350746268656717, "grad_norm": 0.12888449430465698, "learning_rate": 8.392485712604483e-05, "loss": 0.0335, "step": 5990 }, { "epoch": 22.388059701492537, "grad_norm": 0.20244503021240234, "learning_rate": 8.386407858128706e-05, "loss": 0.0386, "step": 6000 }, { "epoch": 22.425373134328357, "grad_norm": 0.13889527320861816, "learning_rate": 8.380320745343153e-05, "loss": 0.0357, "step": 6010 }, { "epoch": 22.46268656716418, "grad_norm": 0.2436564564704895, "learning_rate": 8.37422439088976e-05, "loss": 0.0292, "step": 6020 }, { "epoch": 22.5, "grad_norm": 0.24686101078987122, "learning_rate": 8.368118811435726e-05, "loss": 0.0397, "step": 6030 }, { "epoch": 22.53731343283582, "grad_norm": 0.20070314407348633, "learning_rate": 8.362004023673474e-05, "loss": 0.0385, "step": 6040 }, { "epoch": 22.574626865671643, "grad_norm": 0.20612193644046783, "learning_rate": 8.355880044320598e-05, "loss": 0.0309, "step": 6050 }, { "epoch": 22.611940298507463, "grad_norm": 0.12683475017547607, "learning_rate": 8.349746890119826e-05, "loss": 0.0297, "step": 6060 }, { "epoch": 22.649253731343283, "grad_norm": 0.21401409804821014, "learning_rate": 8.343604577838964e-05, "loss": 0.0389, "step": 6070 }, { "epoch": 22.686567164179106, "grad_norm": 0.1889771819114685, "learning_rate": 8.337453124270863e-05, "loss": 0.0439, "step": 6080 }, { "epoch": 22.723880597014926, "grad_norm": 0.19221380352973938, "learning_rate": 8.331292546233362e-05, "loss": 0.0368, "step": 6090 }, { "epoch": 22.761194029850746, "grad_norm": 0.17325708270072937, "learning_rate": 8.32512286056924e-05, "loss": 0.0347, "step": 6100 }, { "epoch": 22.798507462686565, "grad_norm": 0.2260226607322693, "learning_rate": 8.318944084146192e-05, "loss": 0.0344, "step": 6110 }, { "epoch": 22.83582089552239, "grad_norm": 0.24487236142158508, "learning_rate": 8.31275623385675e-05, "loss": 0.0354, "step": 6120 }, { "epoch": 22.87313432835821, "grad_norm": 0.2184796780347824, "learning_rate": 8.306559326618259e-05, "loss": 0.0366, "step": 6130 }, { "epoch": 22.91044776119403, "grad_norm": 0.13623999059200287, "learning_rate": 8.300353379372834e-05, "loss": 0.033, "step": 6140 }, { "epoch": 22.94776119402985, "grad_norm": 0.15340180695056915, "learning_rate": 8.29413840908729e-05, "loss": 0.032, "step": 6150 }, { "epoch": 22.98507462686567, "grad_norm": 0.10129690915346146, "learning_rate": 8.287914432753123e-05, "loss": 0.0336, "step": 6160 }, { "epoch": 23.02238805970149, "grad_norm": 0.18298344314098358, "learning_rate": 8.281681467386446e-05, "loss": 0.0414, "step": 6170 }, { "epoch": 23.059701492537314, "grad_norm": 0.1728634238243103, "learning_rate": 8.275439530027948e-05, "loss": 0.0352, "step": 6180 }, { "epoch": 23.097014925373134, "grad_norm": 0.20128990709781647, "learning_rate": 8.269188637742846e-05, "loss": 0.0359, "step": 6190 }, { "epoch": 23.134328358208954, "grad_norm": 0.1544492542743683, "learning_rate": 8.262928807620843e-05, "loss": 0.0365, "step": 6200 }, { "epoch": 23.171641791044777, "grad_norm": 0.14328154921531677, "learning_rate": 8.256660056776076e-05, "loss": 0.032, "step": 6210 }, { "epoch": 23.208955223880597, "grad_norm": 0.1453138291835785, "learning_rate": 8.250382402347065e-05, "loss": 0.0354, "step": 6220 }, { "epoch": 23.246268656716417, "grad_norm": 0.15712817013263702, "learning_rate": 8.244095861496686e-05, "loss": 0.0375, "step": 6230 }, { "epoch": 23.28358208955224, "grad_norm": 0.17584560811519623, "learning_rate": 8.237800451412095e-05, "loss": 0.0357, "step": 6240 }, { "epoch": 23.32089552238806, "grad_norm": 0.17542603611946106, "learning_rate": 8.231496189304704e-05, "loss": 0.0375, "step": 6250 }, { "epoch": 23.35820895522388, "grad_norm": 0.24553294479846954, "learning_rate": 8.225183092410128e-05, "loss": 0.0377, "step": 6260 }, { "epoch": 23.395522388059703, "grad_norm": 0.13672307133674622, "learning_rate": 8.218861177988129e-05, "loss": 0.037, "step": 6270 }, { "epoch": 23.432835820895523, "grad_norm": 0.228271946310997, "learning_rate": 8.212530463322583e-05, "loss": 0.0341, "step": 6280 }, { "epoch": 23.470149253731343, "grad_norm": 0.17787960171699524, "learning_rate": 8.206190965721419e-05, "loss": 0.0346, "step": 6290 }, { "epoch": 23.507462686567163, "grad_norm": 0.13236592710018158, "learning_rate": 8.199842702516583e-05, "loss": 0.0327, "step": 6300 }, { "epoch": 23.544776119402986, "grad_norm": 0.1402582973241806, "learning_rate": 8.193485691063985e-05, "loss": 0.0327, "step": 6310 }, { "epoch": 23.582089552238806, "grad_norm": 0.09977417439222336, "learning_rate": 8.18711994874345e-05, "loss": 0.0338, "step": 6320 }, { "epoch": 23.619402985074625, "grad_norm": 0.10996600985527039, "learning_rate": 8.180745492958674e-05, "loss": 0.0388, "step": 6330 }, { "epoch": 23.65671641791045, "grad_norm": 0.10995578020811081, "learning_rate": 8.174362341137177e-05, "loss": 0.0341, "step": 6340 }, { "epoch": 23.69402985074627, "grad_norm": 0.22654198110103607, "learning_rate": 8.167970510730253e-05, "loss": 0.0291, "step": 6350 }, { "epoch": 23.73134328358209, "grad_norm": 0.13473545014858246, "learning_rate": 8.161570019212921e-05, "loss": 0.0297, "step": 6360 }, { "epoch": 23.76865671641791, "grad_norm": 0.14444619417190552, "learning_rate": 8.155160884083881e-05, "loss": 0.0385, "step": 6370 }, { "epoch": 23.80597014925373, "grad_norm": 0.16078300774097443, "learning_rate": 8.148743122865463e-05, "loss": 0.0309, "step": 6380 }, { "epoch": 23.84328358208955, "grad_norm": 0.3152869641780853, "learning_rate": 8.14231675310358e-05, "loss": 0.0367, "step": 6390 }, { "epoch": 23.880597014925375, "grad_norm": 0.14878515899181366, "learning_rate": 8.135881792367686e-05, "loss": 0.0331, "step": 6400 }, { "epoch": 23.917910447761194, "grad_norm": 0.18219135701656342, "learning_rate": 8.129438258250712e-05, "loss": 0.0374, "step": 6410 }, { "epoch": 23.955223880597014, "grad_norm": 0.1473218947649002, "learning_rate": 8.12298616836904e-05, "loss": 0.0338, "step": 6420 }, { "epoch": 23.992537313432837, "grad_norm": 0.19802241027355194, "learning_rate": 8.116525540362434e-05, "loss": 0.0328, "step": 6430 }, { "epoch": 24.029850746268657, "grad_norm": 0.14013780653476715, "learning_rate": 8.110056391894005e-05, "loss": 0.0295, "step": 6440 }, { "epoch": 24.067164179104477, "grad_norm": 0.2698536813259125, "learning_rate": 8.103578740650156e-05, "loss": 0.0325, "step": 6450 }, { "epoch": 24.104477611940297, "grad_norm": 0.14893870055675507, "learning_rate": 8.097092604340542e-05, "loss": 0.0288, "step": 6460 }, { "epoch": 24.14179104477612, "grad_norm": 0.1409458965063095, "learning_rate": 8.090598000698009e-05, "loss": 0.0351, "step": 6470 }, { "epoch": 24.17910447761194, "grad_norm": 0.10835444182157516, "learning_rate": 8.084094947478556e-05, "loss": 0.0347, "step": 6480 }, { "epoch": 24.21641791044776, "grad_norm": 0.15062202513217926, "learning_rate": 8.077583462461283e-05, "loss": 0.0311, "step": 6490 }, { "epoch": 24.253731343283583, "grad_norm": 0.12380697578191757, "learning_rate": 8.07106356344834e-05, "loss": 0.0332, "step": 6500 }, { "epoch": 24.291044776119403, "grad_norm": 0.12719780206680298, "learning_rate": 8.064535268264883e-05, "loss": 0.0379, "step": 6510 }, { "epoch": 24.328358208955223, "grad_norm": 0.1722663938999176, "learning_rate": 8.057998594759022e-05, "loss": 0.0357, "step": 6520 }, { "epoch": 24.365671641791046, "grad_norm": 0.19297920167446136, "learning_rate": 8.051453560801772e-05, "loss": 0.0405, "step": 6530 }, { "epoch": 24.402985074626866, "grad_norm": 0.16749723255634308, "learning_rate": 8.044900184287007e-05, "loss": 0.0344, "step": 6540 }, { "epoch": 24.440298507462686, "grad_norm": 0.10346874594688416, "learning_rate": 8.038338483131407e-05, "loss": 0.0335, "step": 6550 }, { "epoch": 24.47761194029851, "grad_norm": 0.16637130081653595, "learning_rate": 8.031768475274413e-05, "loss": 0.0354, "step": 6560 }, { "epoch": 24.51492537313433, "grad_norm": 0.23754535615444183, "learning_rate": 8.025190178678175e-05, "loss": 0.0372, "step": 6570 }, { "epoch": 24.55223880597015, "grad_norm": 0.2080601304769516, "learning_rate": 8.018603611327504e-05, "loss": 0.0391, "step": 6580 }, { "epoch": 24.58955223880597, "grad_norm": 0.18562468886375427, "learning_rate": 8.012008791229826e-05, "loss": 0.0366, "step": 6590 }, { "epoch": 24.62686567164179, "grad_norm": 0.19797714054584503, "learning_rate": 8.005405736415126e-05, "loss": 0.0362, "step": 6600 }, { "epoch": 24.66417910447761, "grad_norm": 0.199635848402977, "learning_rate": 7.998794464935904e-05, "loss": 0.0372, "step": 6610 }, { "epoch": 24.701492537313435, "grad_norm": 0.1487186700105667, "learning_rate": 7.992174994867123e-05, "loss": 0.0377, "step": 6620 }, { "epoch": 24.738805970149254, "grad_norm": 0.2324935793876648, "learning_rate": 7.985547344306161e-05, "loss": 0.0342, "step": 6630 }, { "epoch": 24.776119402985074, "grad_norm": 0.1254553645849228, "learning_rate": 7.978911531372765e-05, "loss": 0.0363, "step": 6640 }, { "epoch": 24.813432835820894, "grad_norm": 0.17892928421497345, "learning_rate": 7.972267574208991e-05, "loss": 0.0358, "step": 6650 }, { "epoch": 24.850746268656717, "grad_norm": 0.14277411997318268, "learning_rate": 7.965615490979163e-05, "loss": 0.0341, "step": 6660 }, { "epoch": 24.888059701492537, "grad_norm": 0.12334313988685608, "learning_rate": 7.958955299869825e-05, "loss": 0.0295, "step": 6670 }, { "epoch": 24.925373134328357, "grad_norm": 0.19866636395454407, "learning_rate": 7.952287019089685e-05, "loss": 0.0361, "step": 6680 }, { "epoch": 24.96268656716418, "grad_norm": 0.11753349006175995, "learning_rate": 7.945610666869568e-05, "loss": 0.0315, "step": 6690 }, { "epoch": 25.0, "grad_norm": 0.16609805822372437, "learning_rate": 7.938926261462366e-05, "loss": 0.0356, "step": 6700 }, { "epoch": 25.03731343283582, "grad_norm": 0.11269158869981766, "learning_rate": 7.932233821142987e-05, "loss": 0.0291, "step": 6710 }, { "epoch": 25.074626865671643, "grad_norm": 0.20979076623916626, "learning_rate": 7.925533364208309e-05, "loss": 0.0338, "step": 6720 }, { "epoch": 25.111940298507463, "grad_norm": 0.10752667486667633, "learning_rate": 7.918824908977123e-05, "loss": 0.0313, "step": 6730 }, { "epoch": 25.149253731343283, "grad_norm": 0.19201111793518066, "learning_rate": 7.912108473790092e-05, "loss": 0.0391, "step": 6740 }, { "epoch": 25.186567164179106, "grad_norm": 0.15530024468898773, "learning_rate": 7.905384077009693e-05, "loss": 0.0381, "step": 6750 }, { "epoch": 25.223880597014926, "grad_norm": 0.12238435447216034, "learning_rate": 7.898651737020166e-05, "loss": 0.0307, "step": 6760 }, { "epoch": 25.261194029850746, "grad_norm": 0.1430927962064743, "learning_rate": 7.891911472227478e-05, "loss": 0.0311, "step": 6770 }, { "epoch": 25.298507462686565, "grad_norm": 0.13624276220798492, "learning_rate": 7.88516330105925e-05, "loss": 0.0285, "step": 6780 }, { "epoch": 25.33582089552239, "grad_norm": 0.14496494829654694, "learning_rate": 7.878407241964729e-05, "loss": 0.0337, "step": 6790 }, { "epoch": 25.37313432835821, "grad_norm": 0.14664921164512634, "learning_rate": 7.871643313414718e-05, "loss": 0.03, "step": 6800 }, { "epoch": 25.41044776119403, "grad_norm": 0.17611882090568542, "learning_rate": 7.864871533901544e-05, "loss": 0.0395, "step": 6810 }, { "epoch": 25.44776119402985, "grad_norm": 0.12972378730773926, "learning_rate": 7.858091921938988e-05, "loss": 0.0313, "step": 6820 }, { "epoch": 25.48507462686567, "grad_norm": 0.11367812007665634, "learning_rate": 7.851304496062254e-05, "loss": 0.0266, "step": 6830 }, { "epoch": 25.52238805970149, "grad_norm": 0.15163442492485046, "learning_rate": 7.844509274827907e-05, "loss": 0.0271, "step": 6840 }, { "epoch": 25.559701492537314, "grad_norm": 0.1871061623096466, "learning_rate": 7.837706276813819e-05, "loss": 0.0342, "step": 6850 }, { "epoch": 25.597014925373134, "grad_norm": 0.13131551444530487, "learning_rate": 7.830895520619128e-05, "loss": 0.0302, "step": 6860 }, { "epoch": 25.634328358208954, "grad_norm": 0.15808728337287903, "learning_rate": 7.824077024864179e-05, "loss": 0.0377, "step": 6870 }, { "epoch": 25.671641791044777, "grad_norm": 0.14186668395996094, "learning_rate": 7.817250808190483e-05, "loss": 0.0358, "step": 6880 }, { "epoch": 25.708955223880597, "grad_norm": 0.13903076946735382, "learning_rate": 7.810416889260653e-05, "loss": 0.0402, "step": 6890 }, { "epoch": 25.746268656716417, "grad_norm": 0.13526031374931335, "learning_rate": 7.803575286758364e-05, "loss": 0.0347, "step": 6900 }, { "epoch": 25.78358208955224, "grad_norm": 0.14555956423282623, "learning_rate": 7.796726019388295e-05, "loss": 0.0356, "step": 6910 }, { "epoch": 25.82089552238806, "grad_norm": 0.08969845622777939, "learning_rate": 7.789869105876083e-05, "loss": 0.0335, "step": 6920 }, { "epoch": 25.85820895522388, "grad_norm": 0.3439176380634308, "learning_rate": 7.783004564968263e-05, "loss": 0.0404, "step": 6930 }, { "epoch": 25.895522388059703, "grad_norm": 0.1106473058462143, "learning_rate": 7.776132415432234e-05, "loss": 0.0288, "step": 6940 }, { "epoch": 25.932835820895523, "grad_norm": 0.15126998722553253, "learning_rate": 7.769252676056187e-05, "loss": 0.0346, "step": 6950 }, { "epoch": 25.970149253731343, "grad_norm": 0.15303996205329895, "learning_rate": 7.762365365649067e-05, "loss": 0.034, "step": 6960 }, { "epoch": 26.007462686567163, "grad_norm": 0.13712307810783386, "learning_rate": 7.755470503040516e-05, "loss": 0.0332, "step": 6970 }, { "epoch": 26.044776119402986, "grad_norm": 0.15086202323436737, "learning_rate": 7.748568107080832e-05, "loss": 0.0304, "step": 6980 }, { "epoch": 26.082089552238806, "grad_norm": 0.1414598673582077, "learning_rate": 7.741658196640892e-05, "loss": 0.0332, "step": 6990 }, { "epoch": 26.119402985074625, "grad_norm": 0.13065436482429504, "learning_rate": 7.734740790612136e-05, "loss": 0.0346, "step": 7000 }, { "epoch": 26.15671641791045, "grad_norm": 0.14175507426261902, "learning_rate": 7.727815907906481e-05, "loss": 0.0298, "step": 7010 }, { "epoch": 26.19402985074627, "grad_norm": 0.14675237238407135, "learning_rate": 7.720883567456298e-05, "loss": 0.0334, "step": 7020 }, { "epoch": 26.23134328358209, "grad_norm": 0.12883435189723969, "learning_rate": 7.713943788214337e-05, "loss": 0.0296, "step": 7030 }, { "epoch": 26.26865671641791, "grad_norm": 0.18207651376724243, "learning_rate": 7.70699658915369e-05, "loss": 0.0299, "step": 7040 }, { "epoch": 26.30597014925373, "grad_norm": 0.21244533360004425, "learning_rate": 7.700041989267736e-05, "loss": 0.0386, "step": 7050 }, { "epoch": 26.34328358208955, "grad_norm": 0.16139419376850128, "learning_rate": 7.693080007570084e-05, "loss": 0.0385, "step": 7060 }, { "epoch": 26.380597014925375, "grad_norm": 0.12246864289045334, "learning_rate": 7.686110663094525e-05, "loss": 0.0356, "step": 7070 }, { "epoch": 26.417910447761194, "grad_norm": 0.16033267974853516, "learning_rate": 7.679133974894983e-05, "loss": 0.0293, "step": 7080 }, { "epoch": 26.455223880597014, "grad_norm": 0.17770257592201233, "learning_rate": 7.672149962045457e-05, "loss": 0.0343, "step": 7090 }, { "epoch": 26.492537313432837, "grad_norm": 0.170291006565094, "learning_rate": 7.66515864363997e-05, "loss": 0.0283, "step": 7100 }, { "epoch": 26.529850746268657, "grad_norm": 0.1902756541967392, "learning_rate": 7.658160038792518e-05, "loss": 0.0345, "step": 7110 }, { "epoch": 26.567164179104477, "grad_norm": 0.16938641667366028, "learning_rate": 7.651154166637025e-05, "loss": 0.0292, "step": 7120 }, { "epoch": 26.604477611940297, "grad_norm": 0.12178190052509308, "learning_rate": 7.644141046327271e-05, "loss": 0.0326, "step": 7130 }, { "epoch": 26.64179104477612, "grad_norm": 0.349470317363739, "learning_rate": 7.637120697036866e-05, "loss": 0.0337, "step": 7140 }, { "epoch": 26.67910447761194, "grad_norm": 0.16566987335681915, "learning_rate": 7.630093137959171e-05, "loss": 0.0396, "step": 7150 }, { "epoch": 26.71641791044776, "grad_norm": 0.1727592498064041, "learning_rate": 7.623058388307269e-05, "loss": 0.0321, "step": 7160 }, { "epoch": 26.753731343283583, "grad_norm": 0.10909541696310043, "learning_rate": 7.616016467313891e-05, "loss": 0.0351, "step": 7170 }, { "epoch": 26.791044776119403, "grad_norm": 0.14943735301494598, "learning_rate": 7.608967394231387e-05, "loss": 0.0353, "step": 7180 }, { "epoch": 26.828358208955223, "grad_norm": 0.0988110676407814, "learning_rate": 7.60191118833165e-05, "loss": 0.0372, "step": 7190 }, { "epoch": 26.865671641791046, "grad_norm": 0.22805768251419067, "learning_rate": 7.594847868906076e-05, "loss": 0.0354, "step": 7200 }, { "epoch": 26.902985074626866, "grad_norm": 0.12781986594200134, "learning_rate": 7.587777455265515e-05, "loss": 0.0304, "step": 7210 }, { "epoch": 26.940298507462686, "grad_norm": 0.14781200885772705, "learning_rate": 7.580699966740201e-05, "loss": 0.0313, "step": 7220 }, { "epoch": 26.97761194029851, "grad_norm": 0.1960006058216095, "learning_rate": 7.573615422679726e-05, "loss": 0.0363, "step": 7230 }, { "epoch": 27.01492537313433, "grad_norm": 0.11798709630966187, "learning_rate": 7.566523842452958e-05, "loss": 0.0298, "step": 7240 }, { "epoch": 27.05223880597015, "grad_norm": 0.117376908659935, "learning_rate": 7.559425245448006e-05, "loss": 0.0316, "step": 7250 }, { "epoch": 27.08955223880597, "grad_norm": 0.1397295892238617, "learning_rate": 7.552319651072164e-05, "loss": 0.0273, "step": 7260 }, { "epoch": 27.12686567164179, "grad_norm": 0.11225396394729614, "learning_rate": 7.545207078751857e-05, "loss": 0.0313, "step": 7270 }, { "epoch": 27.16417910447761, "grad_norm": 0.15752683579921722, "learning_rate": 7.538087547932585e-05, "loss": 0.0294, "step": 7280 }, { "epoch": 27.20149253731343, "grad_norm": 0.1239582896232605, "learning_rate": 7.530961078078873e-05, "loss": 0.0288, "step": 7290 }, { "epoch": 27.238805970149254, "grad_norm": 0.12366504967212677, "learning_rate": 7.52382768867422e-05, "loss": 0.0361, "step": 7300 }, { "epoch": 27.276119402985074, "grad_norm": 0.1711779236793518, "learning_rate": 7.516687399221037e-05, "loss": 0.0341, "step": 7310 }, { "epoch": 27.313432835820894, "grad_norm": 0.1658324897289276, "learning_rate": 7.509540229240601e-05, "loss": 0.0347, "step": 7320 }, { "epoch": 27.350746268656717, "grad_norm": 0.18168045580387115, "learning_rate": 7.50238619827301e-05, "loss": 0.0324, "step": 7330 }, { "epoch": 27.388059701492537, "grad_norm": 0.13268756866455078, "learning_rate": 7.495225325877103e-05, "loss": 0.0372, "step": 7340 }, { "epoch": 27.425373134328357, "grad_norm": 0.20096217095851898, "learning_rate": 7.488057631630437e-05, "loss": 0.0325, "step": 7350 }, { "epoch": 27.46268656716418, "grad_norm": 0.12081006169319153, "learning_rate": 7.480883135129211e-05, "loss": 0.0278, "step": 7360 }, { "epoch": 27.5, "grad_norm": 0.17097176611423492, "learning_rate": 7.473701855988227e-05, "loss": 0.0302, "step": 7370 }, { "epoch": 27.53731343283582, "grad_norm": 0.1790444701910019, "learning_rate": 7.466513813840825e-05, "loss": 0.031, "step": 7380 }, { "epoch": 27.574626865671643, "grad_norm": 0.17938077449798584, "learning_rate": 7.45931902833884e-05, "loss": 0.0355, "step": 7390 }, { "epoch": 27.611940298507463, "grad_norm": 0.18363428115844727, "learning_rate": 7.452117519152542e-05, "loss": 0.0393, "step": 7400 }, { "epoch": 27.649253731343283, "grad_norm": 0.1551889330148697, "learning_rate": 7.444909305970578e-05, "loss": 0.0287, "step": 7410 }, { "epoch": 27.686567164179106, "grad_norm": 0.15346640348434448, "learning_rate": 7.437694408499933e-05, "loss": 0.0314, "step": 7420 }, { "epoch": 27.723880597014926, "grad_norm": 0.1571803241968155, "learning_rate": 7.430472846465856e-05, "loss": 0.0346, "step": 7430 }, { "epoch": 27.761194029850746, "grad_norm": 0.11930593848228455, "learning_rate": 7.423244639611826e-05, "loss": 0.0341, "step": 7440 }, { "epoch": 27.798507462686565, "grad_norm": 0.10733676701784134, "learning_rate": 7.416009807699482e-05, "loss": 0.0304, "step": 7450 }, { "epoch": 27.83582089552239, "grad_norm": 0.11833635717630386, "learning_rate": 7.408768370508576e-05, "loss": 0.0292, "step": 7460 }, { "epoch": 27.87313432835821, "grad_norm": 0.1987493634223938, "learning_rate": 7.401520347836926e-05, "loss": 0.0372, "step": 7470 }, { "epoch": 27.91044776119403, "grad_norm": 0.14271625876426697, "learning_rate": 7.394265759500348e-05, "loss": 0.0304, "step": 7480 }, { "epoch": 27.94776119402985, "grad_norm": 0.1862812340259552, "learning_rate": 7.387004625332608e-05, "loss": 0.0351, "step": 7490 }, { "epoch": 27.98507462686567, "grad_norm": 0.13105957210063934, "learning_rate": 7.379736965185368e-05, "loss": 0.0322, "step": 7500 }, { "epoch": 28.02238805970149, "grad_norm": 0.16121017932891846, "learning_rate": 7.372462798928137e-05, "loss": 0.0282, "step": 7510 }, { "epoch": 28.059701492537314, "grad_norm": 0.25105759501457214, "learning_rate": 7.365182146448205e-05, "loss": 0.0365, "step": 7520 }, { "epoch": 28.097014925373134, "grad_norm": 0.17205365002155304, "learning_rate": 7.357895027650598e-05, "loss": 0.0286, "step": 7530 }, { "epoch": 28.134328358208954, "grad_norm": 0.12570993602275848, "learning_rate": 7.350601462458024e-05, "loss": 0.0353, "step": 7540 }, { "epoch": 28.171641791044777, "grad_norm": 0.2087439000606537, "learning_rate": 7.343301470810808e-05, "loss": 0.0286, "step": 7550 }, { "epoch": 28.208955223880597, "grad_norm": 0.24182668328285217, "learning_rate": 7.335995072666848e-05, "loss": 0.0267, "step": 7560 }, { "epoch": 28.246268656716417, "grad_norm": 0.1959223449230194, "learning_rate": 7.328682288001561e-05, "loss": 0.0285, "step": 7570 }, { "epoch": 28.28358208955224, "grad_norm": 0.09416075795888901, "learning_rate": 7.32136313680782e-05, "loss": 0.0332, "step": 7580 }, { "epoch": 28.32089552238806, "grad_norm": 0.11846543103456497, "learning_rate": 7.3140376390959e-05, "loss": 0.0358, "step": 7590 }, { "epoch": 28.35820895522388, "grad_norm": 0.1455664485692978, "learning_rate": 7.30670581489344e-05, "loss": 0.0324, "step": 7600 }, { "epoch": 28.395522388059703, "grad_norm": 0.13217243552207947, "learning_rate": 7.299367684245362e-05, "loss": 0.0302, "step": 7610 }, { "epoch": 28.432835820895523, "grad_norm": 0.1350845992565155, "learning_rate": 7.292023267213835e-05, "loss": 0.0307, "step": 7620 }, { "epoch": 28.470149253731343, "grad_norm": 0.14249691367149353, "learning_rate": 7.284672583878219e-05, "loss": 0.0279, "step": 7630 }, { "epoch": 28.507462686567163, "grad_norm": 0.1740005761384964, "learning_rate": 7.277315654334997e-05, "loss": 0.0315, "step": 7640 }, { "epoch": 28.544776119402986, "grad_norm": 0.17265993356704712, "learning_rate": 7.269952498697734e-05, "loss": 0.029, "step": 7650 }, { "epoch": 28.582089552238806, "grad_norm": 0.1168474555015564, "learning_rate": 7.262583137097018e-05, "loss": 0.0298, "step": 7660 }, { "epoch": 28.619402985074625, "grad_norm": 0.1682310402393341, "learning_rate": 7.255207589680402e-05, "loss": 0.0321, "step": 7670 }, { "epoch": 28.65671641791045, "grad_norm": 0.12167845666408539, "learning_rate": 7.247825876612353e-05, "loss": 0.0309, "step": 7680 }, { "epoch": 28.69402985074627, "grad_norm": 0.11592140048742294, "learning_rate": 7.240438018074189e-05, "loss": 0.0302, "step": 7690 }, { "epoch": 28.73134328358209, "grad_norm": 0.15492014586925507, "learning_rate": 7.233044034264034e-05, "loss": 0.0293, "step": 7700 }, { "epoch": 28.76865671641791, "grad_norm": 0.18349403142929077, "learning_rate": 7.225643945396757e-05, "loss": 0.0295, "step": 7710 }, { "epoch": 28.80597014925373, "grad_norm": 0.11238807439804077, "learning_rate": 7.218237771703921e-05, "loss": 0.0299, "step": 7720 }, { "epoch": 28.84328358208955, "grad_norm": 0.27874481678009033, "learning_rate": 7.210825533433719e-05, "loss": 0.0363, "step": 7730 }, { "epoch": 28.880597014925375, "grad_norm": 0.1895100474357605, "learning_rate": 7.203407250850928e-05, "loss": 0.0307, "step": 7740 }, { "epoch": 28.917910447761194, "grad_norm": 0.16037048399448395, "learning_rate": 7.195982944236851e-05, "loss": 0.0289, "step": 7750 }, { "epoch": 28.955223880597014, "grad_norm": 0.24020825326442719, "learning_rate": 7.188552633889259e-05, "loss": 0.0315, "step": 7760 }, { "epoch": 28.992537313432837, "grad_norm": 0.16143648326396942, "learning_rate": 7.181116340122336e-05, "loss": 0.0305, "step": 7770 }, { "epoch": 29.029850746268657, "grad_norm": 0.21622632443904877, "learning_rate": 7.173674083266624e-05, "loss": 0.0287, "step": 7780 }, { "epoch": 29.067164179104477, "grad_norm": 0.20102252066135406, "learning_rate": 7.166225883668969e-05, "loss": 0.0334, "step": 7790 }, { "epoch": 29.104477611940297, "grad_norm": 0.13965009152889252, "learning_rate": 7.158771761692464e-05, "loss": 0.0304, "step": 7800 }, { "epoch": 29.14179104477612, "grad_norm": 0.1629171222448349, "learning_rate": 7.151311737716397e-05, "loss": 0.0275, "step": 7810 }, { "epoch": 29.17910447761194, "grad_norm": 0.1336221694946289, "learning_rate": 7.143845832136188e-05, "loss": 0.0302, "step": 7820 }, { "epoch": 29.21641791044776, "grad_norm": 0.1043396145105362, "learning_rate": 7.136374065363334e-05, "loss": 0.0351, "step": 7830 }, { "epoch": 29.253731343283583, "grad_norm": 0.13451001048088074, "learning_rate": 7.128896457825364e-05, "loss": 0.0366, "step": 7840 }, { "epoch": 29.291044776119403, "grad_norm": 0.1465550661087036, "learning_rate": 7.121413029965769e-05, "loss": 0.0303, "step": 7850 }, { "epoch": 29.328358208955223, "grad_norm": 0.17387370765209198, "learning_rate": 7.113923802243957e-05, "loss": 0.0286, "step": 7860 }, { "epoch": 29.365671641791046, "grad_norm": 0.15019410848617554, "learning_rate": 7.10642879513519e-05, "loss": 0.0261, "step": 7870 }, { "epoch": 29.402985074626866, "grad_norm": 0.1326264888048172, "learning_rate": 7.09892802913053e-05, "loss": 0.0329, "step": 7880 }, { "epoch": 29.440298507462686, "grad_norm": 0.18702806532382965, "learning_rate": 7.091421524736784e-05, "loss": 0.037, "step": 7890 }, { "epoch": 29.47761194029851, "grad_norm": 0.13277067244052887, "learning_rate": 7.083909302476453e-05, "loss": 0.0332, "step": 7900 }, { "epoch": 29.51492537313433, "grad_norm": 0.17790928483009338, "learning_rate": 7.076391382887661e-05, "loss": 0.0295, "step": 7910 }, { "epoch": 29.55223880597015, "grad_norm": 0.12171241641044617, "learning_rate": 7.068867786524116e-05, "loss": 0.035, "step": 7920 }, { "epoch": 29.58955223880597, "grad_norm": 0.1449689269065857, "learning_rate": 7.061338533955043e-05, "loss": 0.0285, "step": 7930 }, { "epoch": 29.62686567164179, "grad_norm": 0.1526971459388733, "learning_rate": 7.053803645765128e-05, "loss": 0.0308, "step": 7940 }, { "epoch": 29.66417910447761, "grad_norm": 0.20465131103992462, "learning_rate": 7.04626314255447e-05, "loss": 0.0334, "step": 7950 }, { "epoch": 29.701492537313435, "grad_norm": 0.2523046135902405, "learning_rate": 7.038717044938519e-05, "loss": 0.0418, "step": 7960 }, { "epoch": 29.738805970149254, "grad_norm": 0.22158683836460114, "learning_rate": 7.031165373548014e-05, "loss": 0.0378, "step": 7970 }, { "epoch": 29.776119402985074, "grad_norm": 0.1642504334449768, "learning_rate": 7.023608149028937e-05, "loss": 0.0325, "step": 7980 }, { "epoch": 29.813432835820894, "grad_norm": 0.1396663635969162, "learning_rate": 7.016045392042452e-05, "loss": 0.0348, "step": 7990 }, { "epoch": 29.850746268656717, "grad_norm": 0.15272343158721924, "learning_rate": 7.008477123264848e-05, "loss": 0.0308, "step": 8000 }, { "epoch": 29.888059701492537, "grad_norm": 0.13813678920269012, "learning_rate": 7.000903363387482e-05, "loss": 0.033, "step": 8010 }, { "epoch": 29.925373134328357, "grad_norm": 0.15020430088043213, "learning_rate": 6.993324133116726e-05, "loss": 0.0326, "step": 8020 }, { "epoch": 29.96268656716418, "grad_norm": 0.21038872003555298, "learning_rate": 6.985739453173903e-05, "loss": 0.0339, "step": 8030 }, { "epoch": 30.0, "grad_norm": 0.12489106506109238, "learning_rate": 6.978149344295242e-05, "loss": 0.0295, "step": 8040 }, { "epoch": 30.03731343283582, "grad_norm": 0.1152721643447876, "learning_rate": 6.97055382723181e-05, "loss": 0.0278, "step": 8050 }, { "epoch": 30.074626865671643, "grad_norm": 0.08879780769348145, "learning_rate": 6.962952922749457e-05, "loss": 0.0271, "step": 8060 }, { "epoch": 30.111940298507463, "grad_norm": 0.17353716492652893, "learning_rate": 6.955346651628771e-05, "loss": 0.0315, "step": 8070 }, { "epoch": 30.149253731343283, "grad_norm": 0.17909663915634155, "learning_rate": 6.947735034665002e-05, "loss": 0.0282, "step": 8080 }, { "epoch": 30.186567164179106, "grad_norm": 0.1862180382013321, "learning_rate": 6.940118092668022e-05, "loss": 0.0292, "step": 8090 }, { "epoch": 30.223880597014926, "grad_norm": 0.14401745796203613, "learning_rate": 6.932495846462261e-05, "loss": 0.0266, "step": 8100 }, { "epoch": 30.261194029850746, "grad_norm": 0.11701787263154984, "learning_rate": 6.924868316886649e-05, "loss": 0.025, "step": 8110 }, { "epoch": 30.298507462686565, "grad_norm": 0.14499446749687195, "learning_rate": 6.917235524794558e-05, "loss": 0.0306, "step": 8120 }, { "epoch": 30.33582089552239, "grad_norm": 0.11035802960395813, "learning_rate": 6.909597491053751e-05, "loss": 0.0273, "step": 8130 }, { "epoch": 30.37313432835821, "grad_norm": 0.10410363972187042, "learning_rate": 6.901954236546323e-05, "loss": 0.0266, "step": 8140 }, { "epoch": 30.41044776119403, "grad_norm": 0.16998261213302612, "learning_rate": 6.894305782168638e-05, "loss": 0.0308, "step": 8150 }, { "epoch": 30.44776119402985, "grad_norm": 0.1439180225133896, "learning_rate": 6.886652148831279e-05, "loss": 0.0326, "step": 8160 }, { "epoch": 30.48507462686567, "grad_norm": 0.10910850763320923, "learning_rate": 6.878993357458986e-05, "loss": 0.0312, "step": 8170 }, { "epoch": 30.52238805970149, "grad_norm": 0.25548261404037476, "learning_rate": 6.871329428990602e-05, "loss": 0.0329, "step": 8180 }, { "epoch": 30.559701492537314, "grad_norm": 0.1617458164691925, "learning_rate": 6.863660384379017e-05, "loss": 0.0333, "step": 8190 }, { "epoch": 30.597014925373134, "grad_norm": 0.15894852578639984, "learning_rate": 6.855986244591104e-05, "loss": 0.0294, "step": 8200 }, { "epoch": 30.634328358208954, "grad_norm": 0.11025011539459229, "learning_rate": 6.84830703060767e-05, "loss": 0.0305, "step": 8210 }, { "epoch": 30.671641791044777, "grad_norm": 0.14748361706733704, "learning_rate": 6.840622763423391e-05, "loss": 0.0334, "step": 8220 }, { "epoch": 30.708955223880597, "grad_norm": 0.14963199198246002, "learning_rate": 6.83293346404676e-05, "loss": 0.0289, "step": 8230 }, { "epoch": 30.746268656716417, "grad_norm": 0.1172926276922226, "learning_rate": 6.825239153500029e-05, "loss": 0.0305, "step": 8240 }, { "epoch": 30.78358208955224, "grad_norm": 0.12199103832244873, "learning_rate": 6.817539852819149e-05, "loss": 0.0292, "step": 8250 }, { "epoch": 30.82089552238806, "grad_norm": 0.13366813957691193, "learning_rate": 6.809835583053715e-05, "loss": 0.0305, "step": 8260 }, { "epoch": 30.85820895522388, "grad_norm": 0.1589304655790329, "learning_rate": 6.802126365266905e-05, "loss": 0.0284, "step": 8270 }, { "epoch": 30.895522388059703, "grad_norm": 0.14448679983615875, "learning_rate": 6.794412220535426e-05, "loss": 0.0306, "step": 8280 }, { "epoch": 30.932835820895523, "grad_norm": 0.128362238407135, "learning_rate": 6.786693169949455e-05, "loss": 0.031, "step": 8290 }, { "epoch": 30.970149253731343, "grad_norm": 0.1749614030122757, "learning_rate": 6.778969234612584e-05, "loss": 0.0307, "step": 8300 }, { "epoch": 31.007462686567163, "grad_norm": 0.14325502514839172, "learning_rate": 6.771240435641754e-05, "loss": 0.0271, "step": 8310 }, { "epoch": 31.044776119402986, "grad_norm": 0.22686506807804108, "learning_rate": 6.763506794167208e-05, "loss": 0.0365, "step": 8320 }, { "epoch": 31.082089552238806, "grad_norm": 0.19101598858833313, "learning_rate": 6.755768331332424e-05, "loss": 0.0296, "step": 8330 }, { "epoch": 31.119402985074625, "grad_norm": 0.12777860462665558, "learning_rate": 6.748025068294067e-05, "loss": 0.0327, "step": 8340 }, { "epoch": 31.15671641791045, "grad_norm": 0.13496318459510803, "learning_rate": 6.740277026221923e-05, "loss": 0.0285, "step": 8350 }, { "epoch": 31.19402985074627, "grad_norm": 0.17581436038017273, "learning_rate": 6.732524226298841e-05, "loss": 0.0314, "step": 8360 }, { "epoch": 31.23134328358209, "grad_norm": 0.0864495038986206, "learning_rate": 6.72476668972068e-05, "loss": 0.024, "step": 8370 }, { "epoch": 31.26865671641791, "grad_norm": 0.18706722557544708, "learning_rate": 6.71700443769625e-05, "loss": 0.0296, "step": 8380 }, { "epoch": 31.30597014925373, "grad_norm": 0.15197384357452393, "learning_rate": 6.709237491447249e-05, "loss": 0.0319, "step": 8390 }, { "epoch": 31.34328358208955, "grad_norm": 0.16846439242362976, "learning_rate": 6.701465872208216e-05, "loss": 0.0323, "step": 8400 }, { "epoch": 31.380597014925375, "grad_norm": 0.1027967780828476, "learning_rate": 6.693689601226458e-05, "loss": 0.0214, "step": 8410 }, { "epoch": 31.417910447761194, "grad_norm": 0.10996536910533905, "learning_rate": 6.685908699762002e-05, "loss": 0.0279, "step": 8420 }, { "epoch": 31.455223880597014, "grad_norm": 0.14156757295131683, "learning_rate": 6.67812318908754e-05, "loss": 0.0297, "step": 8430 }, { "epoch": 31.492537313432837, "grad_norm": 0.1567368507385254, "learning_rate": 6.670333090488356e-05, "loss": 0.0315, "step": 8440 }, { "epoch": 31.529850746268657, "grad_norm": 0.16854074597358704, "learning_rate": 6.662538425262285e-05, "loss": 0.026, "step": 8450 }, { "epoch": 31.567164179104477, "grad_norm": 0.14378687739372253, "learning_rate": 6.654739214719641e-05, "loss": 0.0286, "step": 8460 }, { "epoch": 31.604477611940297, "grad_norm": 0.1158021092414856, "learning_rate": 6.646935480183173e-05, "loss": 0.0249, "step": 8470 }, { "epoch": 31.64179104477612, "grad_norm": 0.14877741038799286, "learning_rate": 6.639127242987988e-05, "loss": 0.0286, "step": 8480 }, { "epoch": 31.67910447761194, "grad_norm": 0.06401828676462173, "learning_rate": 6.631314524481513e-05, "loss": 0.0342, "step": 8490 }, { "epoch": 31.71641791044776, "grad_norm": 0.11828617751598358, "learning_rate": 6.623497346023418e-05, "loss": 0.0279, "step": 8500 }, { "epoch": 31.753731343283583, "grad_norm": 0.14267420768737793, "learning_rate": 6.615675728985572e-05, "loss": 0.0304, "step": 8510 }, { "epoch": 31.791044776119403, "grad_norm": 0.16918118298053741, "learning_rate": 6.607849694751977e-05, "loss": 0.0302, "step": 8520 }, { "epoch": 31.828358208955223, "grad_norm": 0.13828499615192413, "learning_rate": 6.600019264718713e-05, "loss": 0.0285, "step": 8530 }, { "epoch": 31.865671641791046, "grad_norm": 0.13972078263759613, "learning_rate": 6.592184460293877e-05, "loss": 0.0299, "step": 8540 }, { "epoch": 31.902985074626866, "grad_norm": 0.2056141495704651, "learning_rate": 6.584345302897523e-05, "loss": 0.0269, "step": 8550 }, { "epoch": 31.940298507462686, "grad_norm": 0.1933872550725937, "learning_rate": 6.576501813961609e-05, "loss": 0.0297, "step": 8560 }, { "epoch": 31.97761194029851, "grad_norm": 0.12873676419258118, "learning_rate": 6.568654014929932e-05, "loss": 0.0263, "step": 8570 }, { "epoch": 32.014925373134325, "grad_norm": 0.1017785519361496, "learning_rate": 6.56080192725808e-05, "loss": 0.0287, "step": 8580 }, { "epoch": 32.05223880597015, "grad_norm": 0.10982584953308105, "learning_rate": 6.552945572413358e-05, "loss": 0.0252, "step": 8590 }, { "epoch": 32.08955223880597, "grad_norm": 0.21709507703781128, "learning_rate": 6.545084971874738e-05, "loss": 0.0293, "step": 8600 }, { "epoch": 32.12686567164179, "grad_norm": 0.09698974341154099, "learning_rate": 6.537220147132805e-05, "loss": 0.0311, "step": 8610 }, { "epoch": 32.16417910447761, "grad_norm": 0.17902688682079315, "learning_rate": 6.529351119689688e-05, "loss": 0.0309, "step": 8620 }, { "epoch": 32.201492537313435, "grad_norm": 0.12380878627300262, "learning_rate": 6.521477911059008e-05, "loss": 0.0265, "step": 8630 }, { "epoch": 32.23880597014925, "grad_norm": 0.1910480409860611, "learning_rate": 6.513600542765817e-05, "loss": 0.0321, "step": 8640 }, { "epoch": 32.276119402985074, "grad_norm": 0.14889052510261536, "learning_rate": 6.505719036346539e-05, "loss": 0.0251, "step": 8650 }, { "epoch": 32.3134328358209, "grad_norm": 0.14452654123306274, "learning_rate": 6.497833413348909e-05, "loss": 0.0315, "step": 8660 }, { "epoch": 32.350746268656714, "grad_norm": 0.14575710892677307, "learning_rate": 6.489943695331923e-05, "loss": 0.0284, "step": 8670 }, { "epoch": 32.38805970149254, "grad_norm": 0.12503598630428314, "learning_rate": 6.48204990386577e-05, "loss": 0.0261, "step": 8680 }, { "epoch": 32.42537313432836, "grad_norm": 0.1625315248966217, "learning_rate": 6.474152060531768e-05, "loss": 0.0345, "step": 8690 }, { "epoch": 32.46268656716418, "grad_norm": 0.17368610203266144, "learning_rate": 6.466250186922325e-05, "loss": 0.0275, "step": 8700 }, { "epoch": 32.5, "grad_norm": 0.13465231657028198, "learning_rate": 6.458344304640858e-05, "loss": 0.0307, "step": 8710 }, { "epoch": 32.53731343283582, "grad_norm": 0.11459891498088837, "learning_rate": 6.450434435301751e-05, "loss": 0.0275, "step": 8720 }, { "epoch": 32.57462686567164, "grad_norm": 0.2402956634759903, "learning_rate": 6.44252060053028e-05, "loss": 0.0328, "step": 8730 }, { "epoch": 32.61194029850746, "grad_norm": 0.1454654186964035, "learning_rate": 6.43460282196257e-05, "loss": 0.0281, "step": 8740 }, { "epoch": 32.649253731343286, "grad_norm": 0.18076781928539276, "learning_rate": 6.426681121245527e-05, "loss": 0.0309, "step": 8750 }, { "epoch": 32.6865671641791, "grad_norm": 0.11452354490756989, "learning_rate": 6.418755520036775e-05, "loss": 0.0308, "step": 8760 }, { "epoch": 32.723880597014926, "grad_norm": 0.17728577554225922, "learning_rate": 6.410826040004607e-05, "loss": 0.0293, "step": 8770 }, { "epoch": 32.76119402985075, "grad_norm": 0.11097113788127899, "learning_rate": 6.402892702827916e-05, "loss": 0.0268, "step": 8780 }, { "epoch": 32.798507462686565, "grad_norm": 0.13701707124710083, "learning_rate": 6.394955530196147e-05, "loss": 0.0342, "step": 8790 }, { "epoch": 32.83582089552239, "grad_norm": 0.14271965622901917, "learning_rate": 6.387014543809223e-05, "loss": 0.0332, "step": 8800 }, { "epoch": 32.87313432835821, "grad_norm": 0.246138796210289, "learning_rate": 6.3790697653775e-05, "loss": 0.0346, "step": 8810 }, { "epoch": 32.91044776119403, "grad_norm": 0.13046270608901978, "learning_rate": 6.371121216621698e-05, "loss": 0.026, "step": 8820 }, { "epoch": 32.94776119402985, "grad_norm": 0.14272527396678925, "learning_rate": 6.363168919272846e-05, "loss": 0.0314, "step": 8830 }, { "epoch": 32.985074626865675, "grad_norm": 0.12290844321250916, "learning_rate": 6.355212895072223e-05, "loss": 0.0322, "step": 8840 }, { "epoch": 33.02238805970149, "grad_norm": 0.12390285730361938, "learning_rate": 6.34725316577129e-05, "loss": 0.0242, "step": 8850 }, { "epoch": 33.059701492537314, "grad_norm": 0.1426091194152832, "learning_rate": 6.339289753131649e-05, "loss": 0.0318, "step": 8860 }, { "epoch": 33.09701492537314, "grad_norm": 0.12006430327892303, "learning_rate": 6.331322678924962e-05, "loss": 0.0294, "step": 8870 }, { "epoch": 33.134328358208954, "grad_norm": 0.1369628757238388, "learning_rate": 6.323351964932908e-05, "loss": 0.0265, "step": 8880 }, { "epoch": 33.17164179104478, "grad_norm": 0.13940328359603882, "learning_rate": 6.315377632947115e-05, "loss": 0.0259, "step": 8890 }, { "epoch": 33.208955223880594, "grad_norm": 0.13609302043914795, "learning_rate": 6.307399704769099e-05, "loss": 0.0251, "step": 8900 }, { "epoch": 33.24626865671642, "grad_norm": 0.16875863075256348, "learning_rate": 6.299418202210214e-05, "loss": 0.0319, "step": 8910 }, { "epoch": 33.28358208955224, "grad_norm": 0.13128508627414703, "learning_rate": 6.291433147091583e-05, "loss": 0.0225, "step": 8920 }, { "epoch": 33.32089552238806, "grad_norm": 0.12581229209899902, "learning_rate": 6.283444561244042e-05, "loss": 0.0262, "step": 8930 }, { "epoch": 33.35820895522388, "grad_norm": 0.12713980674743652, "learning_rate": 6.275452466508077e-05, "loss": 0.0257, "step": 8940 }, { "epoch": 33.3955223880597, "grad_norm": 0.2022276669740677, "learning_rate": 6.26745688473377e-05, "loss": 0.0276, "step": 8950 }, { "epoch": 33.43283582089552, "grad_norm": 0.1446262001991272, "learning_rate": 6.259457837780742e-05, "loss": 0.0331, "step": 8960 }, { "epoch": 33.47014925373134, "grad_norm": 0.13466869294643402, "learning_rate": 6.251455347518073e-05, "loss": 0.0289, "step": 8970 }, { "epoch": 33.507462686567166, "grad_norm": 0.1328146755695343, "learning_rate": 6.243449435824276e-05, "loss": 0.0263, "step": 8980 }, { "epoch": 33.54477611940298, "grad_norm": 0.11357734352350235, "learning_rate": 6.235440124587198e-05, "loss": 0.0276, "step": 8990 }, { "epoch": 33.582089552238806, "grad_norm": 0.16766324639320374, "learning_rate": 6.227427435703997e-05, "loss": 0.0301, "step": 9000 }, { "epoch": 33.61940298507463, "grad_norm": 0.17010706663131714, "learning_rate": 6.219411391081055e-05, "loss": 0.0311, "step": 9010 }, { "epoch": 33.656716417910445, "grad_norm": 0.1424664556980133, "learning_rate": 6.211392012633932e-05, "loss": 0.0289, "step": 9020 }, { "epoch": 33.69402985074627, "grad_norm": 0.14431145787239075, "learning_rate": 6.203369322287306e-05, "loss": 0.0259, "step": 9030 }, { "epoch": 33.73134328358209, "grad_norm": 0.16278257966041565, "learning_rate": 6.195343341974899e-05, "loss": 0.0268, "step": 9040 }, { "epoch": 33.76865671641791, "grad_norm": 0.13748972117900848, "learning_rate": 6.187314093639444e-05, "loss": 0.0272, "step": 9050 }, { "epoch": 33.80597014925373, "grad_norm": 0.13281747698783875, "learning_rate": 6.179281599232591e-05, "loss": 0.026, "step": 9060 }, { "epoch": 33.843283582089555, "grad_norm": 0.13922417163848877, "learning_rate": 6.17124588071488e-05, "loss": 0.0288, "step": 9070 }, { "epoch": 33.88059701492537, "grad_norm": 0.1770545393228531, "learning_rate": 6.163206960055651e-05, "loss": 0.0308, "step": 9080 }, { "epoch": 33.917910447761194, "grad_norm": 0.16781681776046753, "learning_rate": 6.155164859233012e-05, "loss": 0.0241, "step": 9090 }, { "epoch": 33.95522388059702, "grad_norm": 0.1130417138338089, "learning_rate": 6.147119600233758e-05, "loss": 0.0271, "step": 9100 }, { "epoch": 33.992537313432834, "grad_norm": 0.16291223466396332, "learning_rate": 6.13907120505332e-05, "loss": 0.0301, "step": 9110 }, { "epoch": 34.02985074626866, "grad_norm": 0.2055405229330063, "learning_rate": 6.131019695695702e-05, "loss": 0.0349, "step": 9120 }, { "epoch": 34.06716417910448, "grad_norm": 0.1423303186893463, "learning_rate": 6.122965094173424e-05, "loss": 0.0278, "step": 9130 }, { "epoch": 34.1044776119403, "grad_norm": 0.10912077128887177, "learning_rate": 6.11490742250746e-05, "loss": 0.0247, "step": 9140 }, { "epoch": 34.14179104477612, "grad_norm": 0.15448538959026337, "learning_rate": 6.106846702727172e-05, "loss": 0.0267, "step": 9150 }, { "epoch": 34.17910447761194, "grad_norm": 0.13126212358474731, "learning_rate": 6.0987829568702656e-05, "loss": 0.0321, "step": 9160 }, { "epoch": 34.21641791044776, "grad_norm": 0.13185067474842072, "learning_rate": 6.090716206982714e-05, "loss": 0.0255, "step": 9170 }, { "epoch": 34.25373134328358, "grad_norm": 0.1447814702987671, "learning_rate": 6.0826464751186994e-05, "loss": 0.0333, "step": 9180 }, { "epoch": 34.291044776119406, "grad_norm": 0.11012827605009079, "learning_rate": 6.074573783340562e-05, "loss": 0.0282, "step": 9190 }, { "epoch": 34.32835820895522, "grad_norm": 0.12550827860832214, "learning_rate": 6.066498153718735e-05, "loss": 0.0259, "step": 9200 }, { "epoch": 34.365671641791046, "grad_norm": 0.13627253472805023, "learning_rate": 6.0584196083316794e-05, "loss": 0.0309, "step": 9210 }, { "epoch": 34.40298507462686, "grad_norm": 0.15665221214294434, "learning_rate": 6.05033816926583e-05, "loss": 0.0312, "step": 9220 }, { "epoch": 34.440298507462686, "grad_norm": 0.13754597306251526, "learning_rate": 6.042253858615532e-05, "loss": 0.0284, "step": 9230 }, { "epoch": 34.47761194029851, "grad_norm": 0.1104380339384079, "learning_rate": 6.034166698482984e-05, "loss": 0.0265, "step": 9240 }, { "epoch": 34.514925373134325, "grad_norm": 0.17137588560581207, "learning_rate": 6.026076710978171e-05, "loss": 0.027, "step": 9250 }, { "epoch": 34.55223880597015, "grad_norm": 0.21739983558654785, "learning_rate": 6.017983918218812e-05, "loss": 0.0332, "step": 9260 }, { "epoch": 34.58955223880597, "grad_norm": 0.13198427855968475, "learning_rate": 6.009888342330292e-05, "loss": 0.0257, "step": 9270 }, { "epoch": 34.62686567164179, "grad_norm": 0.1350526511669159, "learning_rate": 6.001790005445607e-05, "loss": 0.0251, "step": 9280 }, { "epoch": 34.66417910447761, "grad_norm": 0.1352643221616745, "learning_rate": 5.9936889297052986e-05, "loss": 0.0281, "step": 9290 }, { "epoch": 34.701492537313435, "grad_norm": 0.1326286941766739, "learning_rate": 5.985585137257401e-05, "loss": 0.0271, "step": 9300 }, { "epoch": 34.73880597014925, "grad_norm": 0.11800684779882431, "learning_rate": 5.977478650257374e-05, "loss": 0.0291, "step": 9310 }, { "epoch": 34.776119402985074, "grad_norm": 0.11421285569667816, "learning_rate": 5.969369490868042e-05, "loss": 0.0289, "step": 9320 }, { "epoch": 34.8134328358209, "grad_norm": 0.16851946711540222, "learning_rate": 5.961257681259535e-05, "loss": 0.0274, "step": 9330 }, { "epoch": 34.850746268656714, "grad_norm": 0.111990787088871, "learning_rate": 5.953143243609235e-05, "loss": 0.0335, "step": 9340 }, { "epoch": 34.88805970149254, "grad_norm": 0.15638987720012665, "learning_rate": 5.945026200101702e-05, "loss": 0.0286, "step": 9350 }, { "epoch": 34.92537313432836, "grad_norm": 0.13460654020309448, "learning_rate": 5.9369065729286245e-05, "loss": 0.0232, "step": 9360 }, { "epoch": 34.96268656716418, "grad_norm": 0.152596116065979, "learning_rate": 5.92878438428875e-05, "loss": 0.0324, "step": 9370 }, { "epoch": 35.0, "grad_norm": 0.15477623045444489, "learning_rate": 5.9206596563878357e-05, "loss": 0.0295, "step": 9380 }, { "epoch": 35.03731343283582, "grad_norm": 0.10074890404939651, "learning_rate": 5.912532411438576e-05, "loss": 0.0311, "step": 9390 }, { "epoch": 35.07462686567164, "grad_norm": 0.14076592028141022, "learning_rate": 5.90440267166055e-05, "loss": 0.0291, "step": 9400 }, { "epoch": 35.11194029850746, "grad_norm": 0.20552238821983337, "learning_rate": 5.896270459280153e-05, "loss": 0.0234, "step": 9410 }, { "epoch": 35.149253731343286, "grad_norm": 0.1296558380126953, "learning_rate": 5.888135796530544e-05, "loss": 0.0252, "step": 9420 }, { "epoch": 35.1865671641791, "grad_norm": 0.1414502114057541, "learning_rate": 5.8799987056515804e-05, "loss": 0.0272, "step": 9430 }, { "epoch": 35.223880597014926, "grad_norm": 0.11088557541370392, "learning_rate": 5.871859208889759e-05, "loss": 0.0265, "step": 9440 }, { "epoch": 35.26119402985075, "grad_norm": 0.10322070121765137, "learning_rate": 5.8637173284981526e-05, "loss": 0.0295, "step": 9450 }, { "epoch": 35.298507462686565, "grad_norm": 0.1704760044813156, "learning_rate": 5.85557308673635e-05, "loss": 0.0272, "step": 9460 }, { "epoch": 35.33582089552239, "grad_norm": 0.13821528851985931, "learning_rate": 5.847426505870399e-05, "loss": 0.031, "step": 9470 }, { "epoch": 35.37313432835821, "grad_norm": 0.16601887345314026, "learning_rate": 5.8392776081727385e-05, "loss": 0.0277, "step": 9480 }, { "epoch": 35.41044776119403, "grad_norm": 0.1347392350435257, "learning_rate": 5.831126415922148e-05, "loss": 0.0287, "step": 9490 }, { "epoch": 35.44776119402985, "grad_norm": 0.13811621069908142, "learning_rate": 5.8229729514036705e-05, "loss": 0.033, "step": 9500 }, { "epoch": 35.485074626865675, "grad_norm": 0.15594662725925446, "learning_rate": 5.8148172369085686e-05, "loss": 0.0303, "step": 9510 }, { "epoch": 35.52238805970149, "grad_norm": 0.1861082911491394, "learning_rate": 5.8066592947342555e-05, "loss": 0.0302, "step": 9520 }, { "epoch": 35.559701492537314, "grad_norm": 0.12091580033302307, "learning_rate": 5.798499147184233e-05, "loss": 0.0281, "step": 9530 }, { "epoch": 35.59701492537313, "grad_norm": 0.15308280289173126, "learning_rate": 5.7903368165680327e-05, "loss": 0.0285, "step": 9540 }, { "epoch": 35.634328358208954, "grad_norm": 0.11183473467826843, "learning_rate": 5.782172325201155e-05, "loss": 0.0328, "step": 9550 }, { "epoch": 35.67164179104478, "grad_norm": 0.12081501632928848, "learning_rate": 5.7740056954050084e-05, "loss": 0.0293, "step": 9560 }, { "epoch": 35.708955223880594, "grad_norm": 0.1798904836177826, "learning_rate": 5.765836949506843e-05, "loss": 0.0289, "step": 9570 }, { "epoch": 35.74626865671642, "grad_norm": 0.1439317762851715, "learning_rate": 5.757666109839702e-05, "loss": 0.0249, "step": 9580 }, { "epoch": 35.78358208955224, "grad_norm": 0.16547876596450806, "learning_rate": 5.74949319874235e-05, "loss": 0.0298, "step": 9590 }, { "epoch": 35.82089552238806, "grad_norm": 0.1731555014848709, "learning_rate": 5.74131823855921e-05, "loss": 0.0223, "step": 9600 }, { "epoch": 35.85820895522388, "grad_norm": 0.20106814801692963, "learning_rate": 5.733141251640315e-05, "loss": 0.0289, "step": 9610 }, { "epoch": 35.8955223880597, "grad_norm": 0.11127426475286484, "learning_rate": 5.72496226034123e-05, "loss": 0.0323, "step": 9620 }, { "epoch": 35.93283582089552, "grad_norm": 0.1440693587064743, "learning_rate": 5.7167812870230094e-05, "loss": 0.0356, "step": 9630 }, { "epoch": 35.97014925373134, "grad_norm": 0.10908407717943192, "learning_rate": 5.7085983540521216e-05, "loss": 0.0248, "step": 9640 }, { "epoch": 36.007462686567166, "grad_norm": 0.15123151242733002, "learning_rate": 5.70041348380039e-05, "loss": 0.0252, "step": 9650 }, { "epoch": 36.04477611940298, "grad_norm": 0.12714920938014984, "learning_rate": 5.692226698644938e-05, "loss": 0.0263, "step": 9660 }, { "epoch": 36.082089552238806, "grad_norm": 0.13215984404087067, "learning_rate": 5.6840380209681255e-05, "loss": 0.0258, "step": 9670 }, { "epoch": 36.11940298507463, "grad_norm": 0.16961364448070526, "learning_rate": 5.675847473157485e-05, "loss": 0.0275, "step": 9680 }, { "epoch": 36.156716417910445, "grad_norm": 0.16035287082195282, "learning_rate": 5.667655077605659e-05, "loss": 0.0263, "step": 9690 }, { "epoch": 36.19402985074627, "grad_norm": 0.13932132720947266, "learning_rate": 5.6594608567103456e-05, "loss": 0.0257, "step": 9700 }, { "epoch": 36.23134328358209, "grad_norm": 0.15304577350616455, "learning_rate": 5.65126483287423e-05, "loss": 0.0233, "step": 9710 }, { "epoch": 36.26865671641791, "grad_norm": 0.1556859016418457, "learning_rate": 5.6430670285049314e-05, "loss": 0.0327, "step": 9720 }, { "epoch": 36.30597014925373, "grad_norm": 0.16786599159240723, "learning_rate": 5.634867466014932e-05, "loss": 0.024, "step": 9730 }, { "epoch": 36.343283582089555, "grad_norm": 0.15629135072231293, "learning_rate": 5.6266661678215216e-05, "loss": 0.0282, "step": 9740 }, { "epoch": 36.38059701492537, "grad_norm": 0.10773632675409317, "learning_rate": 5.618463156346739e-05, "loss": 0.0272, "step": 9750 }, { "epoch": 36.417910447761194, "grad_norm": 0.17929421365261078, "learning_rate": 5.6102584540173006e-05, "loss": 0.0266, "step": 9760 }, { "epoch": 36.45522388059702, "grad_norm": 0.1450119912624359, "learning_rate": 5.602052083264555e-05, "loss": 0.0252, "step": 9770 }, { "epoch": 36.492537313432834, "grad_norm": 0.15753905475139618, "learning_rate": 5.5938440665244006e-05, "loss": 0.0251, "step": 9780 }, { "epoch": 36.52985074626866, "grad_norm": 0.13927458226680756, "learning_rate": 5.585634426237246e-05, "loss": 0.0227, "step": 9790 }, { "epoch": 36.56716417910448, "grad_norm": 0.12216200679540634, "learning_rate": 5.577423184847932e-05, "loss": 0.0277, "step": 9800 }, { "epoch": 36.6044776119403, "grad_norm": 0.28169167041778564, "learning_rate": 5.569210364805677e-05, "loss": 0.0273, "step": 9810 }, { "epoch": 36.64179104477612, "grad_norm": 0.1395770162343979, "learning_rate": 5.560995988564023e-05, "loss": 0.0253, "step": 9820 }, { "epoch": 36.67910447761194, "grad_norm": 0.21807795763015747, "learning_rate": 5.552780078580756e-05, "loss": 0.0296, "step": 9830 }, { "epoch": 36.71641791044776, "grad_norm": 0.1156320795416832, "learning_rate": 5.544562657317863e-05, "loss": 0.0267, "step": 9840 }, { "epoch": 36.75373134328358, "grad_norm": 0.12050265073776245, "learning_rate": 5.5363437472414595e-05, "loss": 0.0263, "step": 9850 }, { "epoch": 36.791044776119406, "grad_norm": 0.13960568606853485, "learning_rate": 5.52812337082173e-05, "loss": 0.0309, "step": 9860 }, { "epoch": 36.82835820895522, "grad_norm": 0.09472358971834183, "learning_rate": 5.519901550532871e-05, "loss": 0.0254, "step": 9870 }, { "epoch": 36.865671641791046, "grad_norm": 0.11379136890172958, "learning_rate": 5.511678308853026e-05, "loss": 0.0266, "step": 9880 }, { "epoch": 36.90298507462687, "grad_norm": 0.14129967987537384, "learning_rate": 5.5034536682642224e-05, "loss": 0.0303, "step": 9890 }, { "epoch": 36.940298507462686, "grad_norm": 0.15850168466567993, "learning_rate": 5.495227651252315e-05, "loss": 0.0304, "step": 9900 }, { "epoch": 36.97761194029851, "grad_norm": 0.12227936834096909, "learning_rate": 5.487000280306917e-05, "loss": 0.0258, "step": 9910 }, { "epoch": 37.014925373134325, "grad_norm": 0.16056831181049347, "learning_rate": 5.478771577921351e-05, "loss": 0.0293, "step": 9920 }, { "epoch": 37.05223880597015, "grad_norm": 0.1656666398048401, "learning_rate": 5.470541566592573e-05, "loss": 0.0263, "step": 9930 }, { "epoch": 37.08955223880597, "grad_norm": 0.2075061798095703, "learning_rate": 5.462310268821118e-05, "loss": 0.0259, "step": 9940 }, { "epoch": 37.12686567164179, "grad_norm": 0.15411338210105896, "learning_rate": 5.454077707111042e-05, "loss": 0.0297, "step": 9950 }, { "epoch": 37.16417910447761, "grad_norm": 0.17654858529567719, "learning_rate": 5.445843903969854e-05, "loss": 0.0317, "step": 9960 }, { "epoch": 37.201492537313435, "grad_norm": 0.10836059600114822, "learning_rate": 5.4376088819084556e-05, "loss": 0.0255, "step": 9970 }, { "epoch": 37.23880597014925, "grad_norm": 0.14221331477165222, "learning_rate": 5.4293726634410855e-05, "loss": 0.0262, "step": 9980 }, { "epoch": 37.276119402985074, "grad_norm": 0.16970522701740265, "learning_rate": 5.4211352710852495e-05, "loss": 0.0275, "step": 9990 }, { "epoch": 37.3134328358209, "grad_norm": 0.11511174589395523, "learning_rate": 5.4128967273616625e-05, "loss": 0.0276, "step": 10000 }, { "epoch": 37.350746268656714, "grad_norm": 0.11642615497112274, "learning_rate": 5.404657054794189e-05, "loss": 0.0283, "step": 10010 }, { "epoch": 37.38805970149254, "grad_norm": 0.11089030653238297, "learning_rate": 5.396416275909779e-05, "loss": 0.029, "step": 10020 }, { "epoch": 37.42537313432836, "grad_norm": 0.15315423905849457, "learning_rate": 5.3881744132384104e-05, "loss": 0.0289, "step": 10030 }, { "epoch": 37.46268656716418, "grad_norm": 0.12376224249601364, "learning_rate": 5.379931489313016e-05, "loss": 0.0255, "step": 10040 }, { "epoch": 37.5, "grad_norm": 0.1465853899717331, "learning_rate": 5.371687526669439e-05, "loss": 0.0302, "step": 10050 }, { "epoch": 37.53731343283582, "grad_norm": 0.17978793382644653, "learning_rate": 5.363442547846356e-05, "loss": 0.025, "step": 10060 }, { "epoch": 37.57462686567164, "grad_norm": 0.12211664766073227, "learning_rate": 5.355196575385225e-05, "loss": 0.0284, "step": 10070 }, { "epoch": 37.61194029850746, "grad_norm": 0.11175867915153503, "learning_rate": 5.3469496318302204e-05, "loss": 0.0281, "step": 10080 }, { "epoch": 37.649253731343286, "grad_norm": 0.1379425823688507, "learning_rate": 5.3387017397281704e-05, "loss": 0.0228, "step": 10090 }, { "epoch": 37.6865671641791, "grad_norm": 0.1415659636259079, "learning_rate": 5.330452921628497e-05, "loss": 0.0236, "step": 10100 }, { "epoch": 37.723880597014926, "grad_norm": 0.1218714565038681, "learning_rate": 5.322203200083154e-05, "loss": 0.0239, "step": 10110 }, { "epoch": 37.76119402985075, "grad_norm": 0.2530638873577118, "learning_rate": 5.313952597646568e-05, "loss": 0.0287, "step": 10120 }, { "epoch": 37.798507462686565, "grad_norm": 0.1346309930086136, "learning_rate": 5.305701136875566e-05, "loss": 0.028, "step": 10130 }, { "epoch": 37.83582089552239, "grad_norm": 0.14430050551891327, "learning_rate": 5.297448840329329e-05, "loss": 0.0241, "step": 10140 }, { "epoch": 37.87313432835821, "grad_norm": 0.14223158359527588, "learning_rate": 5.2891957305693205e-05, "loss": 0.0242, "step": 10150 }, { "epoch": 37.91044776119403, "grad_norm": 0.13965195417404175, "learning_rate": 5.280941830159227e-05, "loss": 0.0288, "step": 10160 }, { "epoch": 37.94776119402985, "grad_norm": 0.14658354222774506, "learning_rate": 5.2726871616649e-05, "loss": 0.025, "step": 10170 }, { "epoch": 37.985074626865675, "grad_norm": 0.16189296543598175, "learning_rate": 5.264431747654284e-05, "loss": 0.0273, "step": 10180 }, { "epoch": 38.02238805970149, "grad_norm": 0.2171725481748581, "learning_rate": 5.2561756106973656e-05, "loss": 0.0262, "step": 10190 }, { "epoch": 38.059701492537314, "grad_norm": 0.15309274196624756, "learning_rate": 5.247918773366112e-05, "loss": 0.0273, "step": 10200 }, { "epoch": 38.09701492537314, "grad_norm": 0.12091638147830963, "learning_rate": 5.2396612582343986e-05, "loss": 0.0259, "step": 10210 }, { "epoch": 38.134328358208954, "grad_norm": 0.12610192596912384, "learning_rate": 5.231403087877955e-05, "loss": 0.0254, "step": 10220 }, { "epoch": 38.17164179104478, "grad_norm": 0.17572349309921265, "learning_rate": 5.2231442848743064e-05, "loss": 0.0246, "step": 10230 }, { "epoch": 38.208955223880594, "grad_norm": 0.13860006630420685, "learning_rate": 5.214884871802703e-05, "loss": 0.0274, "step": 10240 }, { "epoch": 38.24626865671642, "grad_norm": 0.0987030565738678, "learning_rate": 5.2066248712440656e-05, "loss": 0.0254, "step": 10250 }, { "epoch": 38.28358208955224, "grad_norm": 0.15426771342754364, "learning_rate": 5.198364305780922e-05, "loss": 0.0234, "step": 10260 }, { "epoch": 38.32089552238806, "grad_norm": 0.17522168159484863, "learning_rate": 5.1901031979973394e-05, "loss": 0.029, "step": 10270 }, { "epoch": 38.35820895522388, "grad_norm": 0.13222624361515045, "learning_rate": 5.1818415704788725e-05, "loss": 0.0235, "step": 10280 }, { "epoch": 38.3955223880597, "grad_norm": 0.1906198114156723, "learning_rate": 5.1735794458124956e-05, "loss": 0.0304, "step": 10290 }, { "epoch": 38.43283582089552, "grad_norm": 0.12902778387069702, "learning_rate": 5.165316846586541e-05, "loss": 0.0271, "step": 10300 }, { "epoch": 38.47014925373134, "grad_norm": 0.15003792941570282, "learning_rate": 5.157053795390642e-05, "loss": 0.0229, "step": 10310 }, { "epoch": 38.507462686567166, "grad_norm": 0.1151634156703949, "learning_rate": 5.148790314815663e-05, "loss": 0.0254, "step": 10320 }, { "epoch": 38.54477611940298, "grad_norm": 0.14667057991027832, "learning_rate": 5.1405264274536445e-05, "loss": 0.0315, "step": 10330 }, { "epoch": 38.582089552238806, "grad_norm": 0.14457766711711884, "learning_rate": 5.132262155897739e-05, "loss": 0.0223, "step": 10340 }, { "epoch": 38.61940298507463, "grad_norm": 0.12631578743457794, "learning_rate": 5.123997522742151e-05, "loss": 0.0238, "step": 10350 }, { "epoch": 38.656716417910445, "grad_norm": 0.14647376537322998, "learning_rate": 5.1157325505820694e-05, "loss": 0.0239, "step": 10360 }, { "epoch": 38.69402985074627, "grad_norm": 0.1339976191520691, "learning_rate": 5.107467262013614e-05, "loss": 0.0208, "step": 10370 }, { "epoch": 38.73134328358209, "grad_norm": 0.13574305176734924, "learning_rate": 5.0992016796337686e-05, "loss": 0.0276, "step": 10380 }, { "epoch": 38.76865671641791, "grad_norm": 0.1807275265455246, "learning_rate": 5.0909358260403186e-05, "loss": 0.0265, "step": 10390 }, { "epoch": 38.80597014925373, "grad_norm": 0.15697626769542694, "learning_rate": 5.0826697238317935e-05, "loss": 0.0234, "step": 10400 }, { "epoch": 38.843283582089555, "grad_norm": 0.12683585286140442, "learning_rate": 5.074403395607399e-05, "loss": 0.0296, "step": 10410 }, { "epoch": 38.88059701492537, "grad_norm": 0.11971040815114975, "learning_rate": 5.066136863966963e-05, "loss": 0.0242, "step": 10420 }, { "epoch": 38.917910447761194, "grad_norm": 0.1630355417728424, "learning_rate": 5.057870151510864e-05, "loss": 0.0256, "step": 10430 }, { "epoch": 38.95522388059702, "grad_norm": 0.2496594339609146, "learning_rate": 5.0496032808399815e-05, "loss": 0.0217, "step": 10440 }, { "epoch": 38.992537313432834, "grad_norm": 0.17559197545051575, "learning_rate": 5.041336274555625e-05, "loss": 0.0266, "step": 10450 }, { "epoch": 39.02985074626866, "grad_norm": 0.12157560139894485, "learning_rate": 5.033069155259471e-05, "loss": 0.0253, "step": 10460 }, { "epoch": 39.06716417910448, "grad_norm": 0.18386155366897583, "learning_rate": 5.02480194555351e-05, "loss": 0.027, "step": 10470 }, { "epoch": 39.1044776119403, "grad_norm": 0.13233277201652527, "learning_rate": 5.016534668039976e-05, "loss": 0.0267, "step": 10480 }, { "epoch": 39.14179104477612, "grad_norm": 0.13411466777324677, "learning_rate": 5.0082673453212914e-05, "loss": 0.0226, "step": 10490 }, { "epoch": 39.17910447761194, "grad_norm": 0.1233869194984436, "learning_rate": 5e-05, "loss": 0.0251, "step": 10500 }, { "epoch": 39.21641791044776, "grad_norm": 0.1353301703929901, "learning_rate": 4.991732654678709e-05, "loss": 0.0237, "step": 10510 }, { "epoch": 39.25373134328358, "grad_norm": 0.14095327258110046, "learning_rate": 4.9834653319600246e-05, "loss": 0.0277, "step": 10520 }, { "epoch": 39.291044776119406, "grad_norm": 0.20331351459026337, "learning_rate": 4.975198054446492e-05, "loss": 0.0259, "step": 10530 }, { "epoch": 39.32835820895522, "grad_norm": 0.1917458176612854, "learning_rate": 4.96693084474053e-05, "loss": 0.0271, "step": 10540 }, { "epoch": 39.365671641791046, "grad_norm": 0.159153550863266, "learning_rate": 4.9586637254443756e-05, "loss": 0.0237, "step": 10550 }, { "epoch": 39.40298507462686, "grad_norm": 0.13301818072795868, "learning_rate": 4.950396719160018e-05, "loss": 0.0237, "step": 10560 }, { "epoch": 39.440298507462686, "grad_norm": 0.12468411773443222, "learning_rate": 4.942129848489137e-05, "loss": 0.0257, "step": 10570 }, { "epoch": 39.47761194029851, "grad_norm": 0.16620898246765137, "learning_rate": 4.93386313603304e-05, "loss": 0.028, "step": 10580 }, { "epoch": 39.514925373134325, "grad_norm": 0.09833827614784241, "learning_rate": 4.925596604392603e-05, "loss": 0.0217, "step": 10590 }, { "epoch": 39.55223880597015, "grad_norm": 0.1429758071899414, "learning_rate": 4.917330276168208e-05, "loss": 0.0275, "step": 10600 }, { "epoch": 39.58955223880597, "grad_norm": 0.08607587963342667, "learning_rate": 4.909064173959681e-05, "loss": 0.0259, "step": 10610 }, { "epoch": 39.62686567164179, "grad_norm": 0.09841455519199371, "learning_rate": 4.9007983203662326e-05, "loss": 0.0277, "step": 10620 }, { "epoch": 39.66417910447761, "grad_norm": 0.16310295462608337, "learning_rate": 4.892532737986387e-05, "loss": 0.0211, "step": 10630 }, { "epoch": 39.701492537313435, "grad_norm": 0.14656540751457214, "learning_rate": 4.884267449417931e-05, "loss": 0.0262, "step": 10640 }, { "epoch": 39.73880597014925, "grad_norm": 0.14029468595981598, "learning_rate": 4.87600247725785e-05, "loss": 0.023, "step": 10650 }, { "epoch": 39.776119402985074, "grad_norm": 0.13228537142276764, "learning_rate": 4.867737844102261e-05, "loss": 0.0231, "step": 10660 }, { "epoch": 39.8134328358209, "grad_norm": 0.11851055175065994, "learning_rate": 4.8594735725463567e-05, "loss": 0.0264, "step": 10670 }, { "epoch": 39.850746268656714, "grad_norm": 0.2095249593257904, "learning_rate": 4.851209685184338e-05, "loss": 0.0253, "step": 10680 }, { "epoch": 39.88805970149254, "grad_norm": 0.2000158131122589, "learning_rate": 4.8429462046093585e-05, "loss": 0.0277, "step": 10690 }, { "epoch": 39.92537313432836, "grad_norm": 0.16073594987392426, "learning_rate": 4.834683153413459e-05, "loss": 0.0241, "step": 10700 }, { "epoch": 39.96268656716418, "grad_norm": 0.15930184721946716, "learning_rate": 4.826420554187506e-05, "loss": 0.0252, "step": 10710 }, { "epoch": 40.0, "grad_norm": 0.1894836574792862, "learning_rate": 4.818158429521129e-05, "loss": 0.0278, "step": 10720 }, { "epoch": 40.03731343283582, "grad_norm": 0.09488458186388016, "learning_rate": 4.809896802002662e-05, "loss": 0.0205, "step": 10730 }, { "epoch": 40.07462686567164, "grad_norm": 0.12539008259773254, "learning_rate": 4.801635694219079e-05, "loss": 0.0213, "step": 10740 }, { "epoch": 40.11194029850746, "grad_norm": 0.1248989924788475, "learning_rate": 4.7933751287559335e-05, "loss": 0.0273, "step": 10750 }, { "epoch": 40.149253731343286, "grad_norm": 0.1184191182255745, "learning_rate": 4.785115128197298e-05, "loss": 0.0224, "step": 10760 }, { "epoch": 40.1865671641791, "grad_norm": 0.12063740193843842, "learning_rate": 4.776855715125694e-05, "loss": 0.0259, "step": 10770 }, { "epoch": 40.223880597014926, "grad_norm": 0.11314482986927032, "learning_rate": 4.7685969121220456e-05, "loss": 0.0253, "step": 10780 }, { "epoch": 40.26119402985075, "grad_norm": 0.08891874551773071, "learning_rate": 4.7603387417656026e-05, "loss": 0.0231, "step": 10790 }, { "epoch": 40.298507462686565, "grad_norm": 0.12430819869041443, "learning_rate": 4.7520812266338885e-05, "loss": 0.0248, "step": 10800 }, { "epoch": 40.33582089552239, "grad_norm": 0.19925013184547424, "learning_rate": 4.743824389302635e-05, "loss": 0.0232, "step": 10810 }, { "epoch": 40.37313432835821, "grad_norm": 0.1766035258769989, "learning_rate": 4.735568252345718e-05, "loss": 0.0227, "step": 10820 }, { "epoch": 40.41044776119403, "grad_norm": 0.1818736344575882, "learning_rate": 4.7273128383351015e-05, "loss": 0.0266, "step": 10830 }, { "epoch": 40.44776119402985, "grad_norm": 0.1865776628255844, "learning_rate": 4.7190581698407725e-05, "loss": 0.026, "step": 10840 }, { "epoch": 40.485074626865675, "grad_norm": 0.14903424680233002, "learning_rate": 4.710804269430681e-05, "loss": 0.029, "step": 10850 }, { "epoch": 40.52238805970149, "grad_norm": 0.12804944813251495, "learning_rate": 4.702551159670672e-05, "loss": 0.0226, "step": 10860 }, { "epoch": 40.559701492537314, "grad_norm": 0.17947590351104736, "learning_rate": 4.694298863124435e-05, "loss": 0.0252, "step": 10870 }, { "epoch": 40.59701492537313, "grad_norm": 0.10031769424676895, "learning_rate": 4.6860474023534335e-05, "loss": 0.0234, "step": 10880 }, { "epoch": 40.634328358208954, "grad_norm": 0.13865861296653748, "learning_rate": 4.677796799916845e-05, "loss": 0.0259, "step": 10890 }, { "epoch": 40.67164179104478, "grad_norm": 0.1440410614013672, "learning_rate": 4.669547078371504e-05, "loss": 0.0271, "step": 10900 }, { "epoch": 40.708955223880594, "grad_norm": 0.13466541469097137, "learning_rate": 4.66129826027183e-05, "loss": 0.0239, "step": 10910 }, { "epoch": 40.74626865671642, "grad_norm": 0.14159788191318512, "learning_rate": 4.65305036816978e-05, "loss": 0.0262, "step": 10920 }, { "epoch": 40.78358208955224, "grad_norm": 0.11835232377052307, "learning_rate": 4.6448034246147754e-05, "loss": 0.0227, "step": 10930 }, { "epoch": 40.82089552238806, "grad_norm": 0.12209437787532806, "learning_rate": 4.6365574521536445e-05, "loss": 0.0258, "step": 10940 }, { "epoch": 40.85820895522388, "grad_norm": 0.21525859832763672, "learning_rate": 4.6283124733305624e-05, "loss": 0.021, "step": 10950 }, { "epoch": 40.8955223880597, "grad_norm": 0.10109084844589233, "learning_rate": 4.620068510686985e-05, "loss": 0.0238, "step": 10960 }, { "epoch": 40.93283582089552, "grad_norm": 0.18143823742866516, "learning_rate": 4.611825586761591e-05, "loss": 0.0223, "step": 10970 }, { "epoch": 40.97014925373134, "grad_norm": 0.13277746737003326, "learning_rate": 4.60358372409022e-05, "loss": 0.0229, "step": 10980 }, { "epoch": 41.007462686567166, "grad_norm": 0.12579579651355743, "learning_rate": 4.5953429452058135e-05, "loss": 0.0256, "step": 10990 }, { "epoch": 41.04477611940298, "grad_norm": 0.12078782916069031, "learning_rate": 4.5871032726383386e-05, "loss": 0.0263, "step": 11000 }, { "epoch": 41.082089552238806, "grad_norm": 0.11179403215646744, "learning_rate": 4.5788647289147516e-05, "loss": 0.0304, "step": 11010 }, { "epoch": 41.11940298507463, "grad_norm": 0.137353777885437, "learning_rate": 4.570627336558915e-05, "loss": 0.0223, "step": 11020 }, { "epoch": 41.156716417910445, "grad_norm": 0.21319255232810974, "learning_rate": 4.562391118091544e-05, "loss": 0.0236, "step": 11030 }, { "epoch": 41.19402985074627, "grad_norm": 0.17310625314712524, "learning_rate": 4.554156096030149e-05, "loss": 0.0265, "step": 11040 }, { "epoch": 41.23134328358209, "grad_norm": 0.14055804908275604, "learning_rate": 4.545922292888959e-05, "loss": 0.0213, "step": 11050 }, { "epoch": 41.26865671641791, "grad_norm": 0.1475166529417038, "learning_rate": 4.537689731178883e-05, "loss": 0.0246, "step": 11060 }, { "epoch": 41.30597014925373, "grad_norm": 0.16311821341514587, "learning_rate": 4.529458433407429e-05, "loss": 0.0204, "step": 11070 }, { "epoch": 41.343283582089555, "grad_norm": 0.13256515562534332, "learning_rate": 4.5212284220786494e-05, "loss": 0.0289, "step": 11080 }, { "epoch": 41.38059701492537, "grad_norm": 0.19734111428260803, "learning_rate": 4.5129997196930845e-05, "loss": 0.0264, "step": 11090 }, { "epoch": 41.417910447761194, "grad_norm": 0.1428389698266983, "learning_rate": 4.504772348747687e-05, "loss": 0.0256, "step": 11100 }, { "epoch": 41.45522388059702, "grad_norm": 0.17340564727783203, "learning_rate": 4.496546331735778e-05, "loss": 0.024, "step": 11110 }, { "epoch": 41.492537313432834, "grad_norm": 0.15561352670192719, "learning_rate": 4.488321691146975e-05, "loss": 0.0301, "step": 11120 }, { "epoch": 41.52985074626866, "grad_norm": 0.16675710678100586, "learning_rate": 4.480098449467132e-05, "loss": 0.0245, "step": 11130 }, { "epoch": 41.56716417910448, "grad_norm": 0.10715004056692123, "learning_rate": 4.471876629178273e-05, "loss": 0.0244, "step": 11140 }, { "epoch": 41.6044776119403, "grad_norm": 0.12457656860351562, "learning_rate": 4.463656252758542e-05, "loss": 0.0286, "step": 11150 }, { "epoch": 41.64179104477612, "grad_norm": 0.14354181289672852, "learning_rate": 4.4554373426821374e-05, "loss": 0.0347, "step": 11160 }, { "epoch": 41.67910447761194, "grad_norm": 0.16612352430820465, "learning_rate": 4.447219921419244e-05, "loss": 0.0279, "step": 11170 }, { "epoch": 41.71641791044776, "grad_norm": 0.13321956992149353, "learning_rate": 4.439004011435979e-05, "loss": 0.0242, "step": 11180 }, { "epoch": 41.75373134328358, "grad_norm": 0.10558756440877914, "learning_rate": 4.430789635194324e-05, "loss": 0.0234, "step": 11190 }, { "epoch": 41.791044776119406, "grad_norm": 0.119489885866642, "learning_rate": 4.4225768151520694e-05, "loss": 0.031, "step": 11200 }, { "epoch": 41.82835820895522, "grad_norm": 0.11983241140842438, "learning_rate": 4.414365573762755e-05, "loss": 0.0281, "step": 11210 }, { "epoch": 41.865671641791046, "grad_norm": 0.14788614213466644, "learning_rate": 4.406155933475599e-05, "loss": 0.0306, "step": 11220 }, { "epoch": 41.90298507462687, "grad_norm": 0.19208259880542755, "learning_rate": 4.3979479167354477e-05, "loss": 0.0229, "step": 11230 }, { "epoch": 41.940298507462686, "grad_norm": 0.17858979105949402, "learning_rate": 4.3897415459827e-05, "loss": 0.0283, "step": 11240 }, { "epoch": 41.97761194029851, "grad_norm": 0.0867869034409523, "learning_rate": 4.381536843653262e-05, "loss": 0.0231, "step": 11250 }, { "epoch": 42.014925373134325, "grad_norm": 0.14575175940990448, "learning_rate": 4.373333832178478e-05, "loss": 0.0273, "step": 11260 }, { "epoch": 42.05223880597015, "grad_norm": 0.13052308559417725, "learning_rate": 4.365132533985071e-05, "loss": 0.0218, "step": 11270 }, { "epoch": 42.08955223880597, "grad_norm": 0.12592273950576782, "learning_rate": 4.3569329714950704e-05, "loss": 0.0215, "step": 11280 }, { "epoch": 42.12686567164179, "grad_norm": 0.1529986709356308, "learning_rate": 4.348735167125771e-05, "loss": 0.0238, "step": 11290 }, { "epoch": 42.16417910447761, "grad_norm": 0.1827235221862793, "learning_rate": 4.3405391432896555e-05, "loss": 0.0264, "step": 11300 }, { "epoch": 42.201492537313435, "grad_norm": 0.13462716341018677, "learning_rate": 4.3323449223943416e-05, "loss": 0.027, "step": 11310 }, { "epoch": 42.23880597014925, "grad_norm": 0.15314233303070068, "learning_rate": 4.324152526842517e-05, "loss": 0.0243, "step": 11320 }, { "epoch": 42.276119402985074, "grad_norm": 0.08454733341932297, "learning_rate": 4.315961979031875e-05, "loss": 0.0213, "step": 11330 }, { "epoch": 42.3134328358209, "grad_norm": 0.12747134268283844, "learning_rate": 4.307773301355062e-05, "loss": 0.0283, "step": 11340 }, { "epoch": 42.350746268656714, "grad_norm": 0.1505366563796997, "learning_rate": 4.2995865161996105e-05, "loss": 0.0247, "step": 11350 }, { "epoch": 42.38805970149254, "grad_norm": 0.12633542716503143, "learning_rate": 4.291401645947879e-05, "loss": 0.0247, "step": 11360 }, { "epoch": 42.42537313432836, "grad_norm": 0.1197117492556572, "learning_rate": 4.283218712976992e-05, "loss": 0.0194, "step": 11370 }, { "epoch": 42.46268656716418, "grad_norm": 0.1575552523136139, "learning_rate": 4.275037739658771e-05, "loss": 0.0294, "step": 11380 }, { "epoch": 42.5, "grad_norm": 0.13810384273529053, "learning_rate": 4.2668587483596864e-05, "loss": 0.0237, "step": 11390 }, { "epoch": 42.53731343283582, "grad_norm": 0.09534689038991928, "learning_rate": 4.2586817614407895e-05, "loss": 0.022, "step": 11400 }, { "epoch": 42.57462686567164, "grad_norm": 0.13130071759223938, "learning_rate": 4.250506801257653e-05, "loss": 0.0188, "step": 11410 }, { "epoch": 42.61194029850746, "grad_norm": 0.13016171753406525, "learning_rate": 4.2423338901602985e-05, "loss": 0.0236, "step": 11420 }, { "epoch": 42.649253731343286, "grad_norm": 0.11176010221242905, "learning_rate": 4.234163050493158e-05, "loss": 0.0232, "step": 11430 }, { "epoch": 42.6865671641791, "grad_norm": 0.16056294739246368, "learning_rate": 4.2259943045949934e-05, "loss": 0.0243, "step": 11440 }, { "epoch": 42.723880597014926, "grad_norm": 0.18838953971862793, "learning_rate": 4.2178276747988446e-05, "loss": 0.023, "step": 11450 }, { "epoch": 42.76119402985075, "grad_norm": 0.16921386122703552, "learning_rate": 4.209663183431969e-05, "loss": 0.0253, "step": 11460 }, { "epoch": 42.798507462686565, "grad_norm": 0.11748264729976654, "learning_rate": 4.201500852815768e-05, "loss": 0.029, "step": 11470 }, { "epoch": 42.83582089552239, "grad_norm": 0.08732422441244125, "learning_rate": 4.1933407052657456e-05, "loss": 0.0265, "step": 11480 }, { "epoch": 42.87313432835821, "grad_norm": 0.19406895339488983, "learning_rate": 4.1851827630914305e-05, "loss": 0.0265, "step": 11490 }, { "epoch": 42.91044776119403, "grad_norm": 0.11775784939527512, "learning_rate": 4.17702704859633e-05, "loss": 0.022, "step": 11500 }, { "epoch": 42.94776119402985, "grad_norm": 0.0994153693318367, "learning_rate": 4.1688735840778546e-05, "loss": 0.0255, "step": 11510 }, { "epoch": 42.985074626865675, "grad_norm": 0.1767520159482956, "learning_rate": 4.160722391827262e-05, "loss": 0.0228, "step": 11520 }, { "epoch": 43.02238805970149, "grad_norm": 0.0780944675207138, "learning_rate": 4.1525734941296026e-05, "loss": 0.0252, "step": 11530 }, { "epoch": 43.059701492537314, "grad_norm": 0.09312419593334198, "learning_rate": 4.14442691326365e-05, "loss": 0.0241, "step": 11540 }, { "epoch": 43.09701492537314, "grad_norm": 0.12718930840492249, "learning_rate": 4.13628267150185e-05, "loss": 0.0229, "step": 11550 }, { "epoch": 43.134328358208954, "grad_norm": 0.11635901778936386, "learning_rate": 4.1281407911102425e-05, "loss": 0.0239, "step": 11560 }, { "epoch": 43.17164179104478, "grad_norm": 0.14499343931674957, "learning_rate": 4.120001294348421e-05, "loss": 0.0247, "step": 11570 }, { "epoch": 43.208955223880594, "grad_norm": 0.16728025674819946, "learning_rate": 4.111864203469457e-05, "loss": 0.0244, "step": 11580 }, { "epoch": 43.24626865671642, "grad_norm": 0.1040458083152771, "learning_rate": 4.103729540719847e-05, "loss": 0.0197, "step": 11590 }, { "epoch": 43.28358208955224, "grad_norm": 0.13489454984664917, "learning_rate": 4.095597328339452e-05, "loss": 0.0271, "step": 11600 }, { "epoch": 43.32089552238806, "grad_norm": 0.23320449888706207, "learning_rate": 4.087467588561424e-05, "loss": 0.0237, "step": 11610 }, { "epoch": 43.35820895522388, "grad_norm": 0.08479960262775421, "learning_rate": 4.079340343612165e-05, "loss": 0.0238, "step": 11620 }, { "epoch": 43.3955223880597, "grad_norm": 0.22610676288604736, "learning_rate": 4.07121561571125e-05, "loss": 0.0258, "step": 11630 }, { "epoch": 43.43283582089552, "grad_norm": 0.10122967511415482, "learning_rate": 4.063093427071376e-05, "loss": 0.0227, "step": 11640 }, { "epoch": 43.47014925373134, "grad_norm": 0.12964020669460297, "learning_rate": 4.0549737998983e-05, "loss": 0.0247, "step": 11650 }, { "epoch": 43.507462686567166, "grad_norm": 0.1449606716632843, "learning_rate": 4.046856756390767e-05, "loss": 0.0215, "step": 11660 }, { "epoch": 43.54477611940298, "grad_norm": 0.10249517858028412, "learning_rate": 4.038742318740465e-05, "loss": 0.0218, "step": 11670 }, { "epoch": 43.582089552238806, "grad_norm": 0.11027264595031738, "learning_rate": 4.0306305091319595e-05, "loss": 0.0229, "step": 11680 }, { "epoch": 43.61940298507463, "grad_norm": 0.10887347906827927, "learning_rate": 4.0225213497426276e-05, "loss": 0.0229, "step": 11690 }, { "epoch": 43.656716417910445, "grad_norm": 0.15887947380542755, "learning_rate": 4.0144148627425993e-05, "loss": 0.0246, "step": 11700 }, { "epoch": 43.69402985074627, "grad_norm": 0.2000218778848648, "learning_rate": 4.006311070294702e-05, "loss": 0.0239, "step": 11710 }, { "epoch": 43.73134328358209, "grad_norm": 0.12649449706077576, "learning_rate": 3.9982099945543945e-05, "loss": 0.0321, "step": 11720 }, { "epoch": 43.76865671641791, "grad_norm": 0.17891472578048706, "learning_rate": 3.9901116576697083e-05, "loss": 0.0219, "step": 11730 }, { "epoch": 43.80597014925373, "grad_norm": 0.15477579832077026, "learning_rate": 3.982016081781189e-05, "loss": 0.0246, "step": 11740 }, { "epoch": 43.843283582089555, "grad_norm": 0.11766377091407776, "learning_rate": 3.973923289021829e-05, "loss": 0.0214, "step": 11750 }, { "epoch": 43.88059701492537, "grad_norm": 0.11015570908784866, "learning_rate": 3.965833301517017e-05, "loss": 0.0213, "step": 11760 }, { "epoch": 43.917910447761194, "grad_norm": 0.13538673520088196, "learning_rate": 3.9577461413844684e-05, "loss": 0.0207, "step": 11770 }, { "epoch": 43.95522388059702, "grad_norm": 0.1324649155139923, "learning_rate": 3.949661830734172e-05, "loss": 0.0226, "step": 11780 }, { "epoch": 43.992537313432834, "grad_norm": 0.15287022292613983, "learning_rate": 3.9415803916683224e-05, "loss": 0.0238, "step": 11790 }, { "epoch": 44.02985074626866, "grad_norm": 0.1688290238380432, "learning_rate": 3.933501846281267e-05, "loss": 0.026, "step": 11800 }, { "epoch": 44.06716417910448, "grad_norm": 0.12445367127656937, "learning_rate": 3.925426216659438e-05, "loss": 0.0239, "step": 11810 }, { "epoch": 44.1044776119403, "grad_norm": 0.13560178875923157, "learning_rate": 3.917353524881302e-05, "loss": 0.0197, "step": 11820 }, { "epoch": 44.14179104477612, "grad_norm": 0.14289440214633942, "learning_rate": 3.9092837930172884e-05, "loss": 0.026, "step": 11830 }, { "epoch": 44.17910447761194, "grad_norm": 0.14480352401733398, "learning_rate": 3.901217043129735e-05, "loss": 0.022, "step": 11840 }, { "epoch": 44.21641791044776, "grad_norm": 0.2291680872440338, "learning_rate": 3.8931532972728285e-05, "loss": 0.0212, "step": 11850 }, { "epoch": 44.25373134328358, "grad_norm": 0.11258632689714432, "learning_rate": 3.8850925774925425e-05, "loss": 0.0256, "step": 11860 }, { "epoch": 44.291044776119406, "grad_norm": 0.19027557969093323, "learning_rate": 3.877034905826577e-05, "loss": 0.0295, "step": 11870 }, { "epoch": 44.32835820895522, "grad_norm": 0.11086759716272354, "learning_rate": 3.8689803043043e-05, "loss": 0.0273, "step": 11880 }, { "epoch": 44.365671641791046, "grad_norm": 0.13299565017223358, "learning_rate": 3.860928794946682e-05, "loss": 0.0229, "step": 11890 }, { "epoch": 44.40298507462686, "grad_norm": 0.15393038094043732, "learning_rate": 3.852880399766243e-05, "loss": 0.0258, "step": 11900 }, { "epoch": 44.440298507462686, "grad_norm": 0.12500539422035217, "learning_rate": 3.844835140766988e-05, "loss": 0.0222, "step": 11910 }, { "epoch": 44.47761194029851, "grad_norm": 0.13150183856487274, "learning_rate": 3.836793039944349e-05, "loss": 0.0301, "step": 11920 }, { "epoch": 44.514925373134325, "grad_norm": 0.14131276309490204, "learning_rate": 3.828754119285123e-05, "loss": 0.0219, "step": 11930 }, { "epoch": 44.55223880597015, "grad_norm": 0.14066272974014282, "learning_rate": 3.820718400767409e-05, "loss": 0.0202, "step": 11940 }, { "epoch": 44.58955223880597, "grad_norm": 0.15809442102909088, "learning_rate": 3.812685906360557e-05, "loss": 0.0264, "step": 11950 }, { "epoch": 44.62686567164179, "grad_norm": 0.1730141043663025, "learning_rate": 3.8046566580251e-05, "loss": 0.0244, "step": 11960 }, { "epoch": 44.66417910447761, "grad_norm": 0.1478641927242279, "learning_rate": 3.796630677712697e-05, "loss": 0.0229, "step": 11970 }, { "epoch": 44.701492537313435, "grad_norm": 0.11353864520788193, "learning_rate": 3.788607987366069e-05, "loss": 0.0267, "step": 11980 }, { "epoch": 44.73880597014925, "grad_norm": 0.21580363810062408, "learning_rate": 3.780588608918947e-05, "loss": 0.0226, "step": 11990 }, { "epoch": 44.776119402985074, "grad_norm": 0.10920669883489609, "learning_rate": 3.772572564296005e-05, "loss": 0.0222, "step": 12000 }, { "epoch": 44.8134328358209, "grad_norm": 0.10214291512966156, "learning_rate": 3.764559875412803e-05, "loss": 0.0243, "step": 12010 }, { "epoch": 44.850746268656714, "grad_norm": 0.15345777571201324, "learning_rate": 3.756550564175727e-05, "loss": 0.024, "step": 12020 }, { "epoch": 44.88805970149254, "grad_norm": 0.10739658027887344, "learning_rate": 3.748544652481927e-05, "loss": 0.0269, "step": 12030 }, { "epoch": 44.92537313432836, "grad_norm": 0.16079604625701904, "learning_rate": 3.74054216221926e-05, "loss": 0.0249, "step": 12040 }, { "epoch": 44.96268656716418, "grad_norm": 0.19913697242736816, "learning_rate": 3.73254311526623e-05, "loss": 0.0233, "step": 12050 }, { "epoch": 45.0, "grad_norm": 0.1275843232870102, "learning_rate": 3.7245475334919246e-05, "loss": 0.0232, "step": 12060 }, { "epoch": 45.03731343283582, "grad_norm": 0.11753203719854355, "learning_rate": 3.716555438755961e-05, "loss": 0.0215, "step": 12070 }, { "epoch": 45.07462686567164, "grad_norm": 0.14887163043022156, "learning_rate": 3.7085668529084184e-05, "loss": 0.0288, "step": 12080 }, { "epoch": 45.11194029850746, "grad_norm": 0.1212143823504448, "learning_rate": 3.700581797789786e-05, "loss": 0.0205, "step": 12090 }, { "epoch": 45.149253731343286, "grad_norm": 0.1114036962389946, "learning_rate": 3.6926002952309016e-05, "loss": 0.0201, "step": 12100 }, { "epoch": 45.1865671641791, "grad_norm": 0.12109759449958801, "learning_rate": 3.684622367052887e-05, "loss": 0.0229, "step": 12110 }, { "epoch": 45.223880597014926, "grad_norm": 0.08198681473731995, "learning_rate": 3.676648035067093e-05, "loss": 0.0214, "step": 12120 }, { "epoch": 45.26119402985075, "grad_norm": 0.13337881863117218, "learning_rate": 3.6686773210750385e-05, "loss": 0.0237, "step": 12130 }, { "epoch": 45.298507462686565, "grad_norm": 0.11571992188692093, "learning_rate": 3.6607102468683526e-05, "loss": 0.0223, "step": 12140 }, { "epoch": 45.33582089552239, "grad_norm": 0.14970065653324127, "learning_rate": 3.65274683422871e-05, "loss": 0.0226, "step": 12150 }, { "epoch": 45.37313432835821, "grad_norm": 0.0912921130657196, "learning_rate": 3.6447871049277796e-05, "loss": 0.0238, "step": 12160 }, { "epoch": 45.41044776119403, "grad_norm": 0.15796604752540588, "learning_rate": 3.636831080727154e-05, "loss": 0.0319, "step": 12170 }, { "epoch": 45.44776119402985, "grad_norm": 0.09365538507699966, "learning_rate": 3.628878783378302e-05, "loss": 0.023, "step": 12180 }, { "epoch": 45.485074626865675, "grad_norm": 0.12802104651927948, "learning_rate": 3.6209302346225006e-05, "loss": 0.0258, "step": 12190 }, { "epoch": 45.52238805970149, "grad_norm": 0.13299128413200378, "learning_rate": 3.612985456190778e-05, "loss": 0.0281, "step": 12200 }, { "epoch": 45.559701492537314, "grad_norm": 0.12453636527061462, "learning_rate": 3.605044469803854e-05, "loss": 0.0202, "step": 12210 }, { "epoch": 45.59701492537313, "grad_norm": 0.12729981541633606, "learning_rate": 3.597107297172084e-05, "loss": 0.0231, "step": 12220 }, { "epoch": 45.634328358208954, "grad_norm": 0.16555693745613098, "learning_rate": 3.5891739599953945e-05, "loss": 0.0262, "step": 12230 }, { "epoch": 45.67164179104478, "grad_norm": 0.19759279489517212, "learning_rate": 3.581244479963225e-05, "loss": 0.0249, "step": 12240 }, { "epoch": 45.708955223880594, "grad_norm": 0.14443537592887878, "learning_rate": 3.5733188787544745e-05, "loss": 0.0233, "step": 12250 }, { "epoch": 45.74626865671642, "grad_norm": 0.10733987390995026, "learning_rate": 3.5653971780374295e-05, "loss": 0.0191, "step": 12260 }, { "epoch": 45.78358208955224, "grad_norm": 0.08959952741861343, "learning_rate": 3.557479399469721e-05, "loss": 0.0258, "step": 12270 }, { "epoch": 45.82089552238806, "grad_norm": 0.1537943333387375, "learning_rate": 3.5495655646982505e-05, "loss": 0.0243, "step": 12280 }, { "epoch": 45.85820895522388, "grad_norm": 0.21775025129318237, "learning_rate": 3.541655695359142e-05, "loss": 0.0254, "step": 12290 }, { "epoch": 45.8955223880597, "grad_norm": 0.12055443227291107, "learning_rate": 3.533749813077677e-05, "loss": 0.0237, "step": 12300 }, { "epoch": 45.93283582089552, "grad_norm": 0.060004930943250656, "learning_rate": 3.525847939468233e-05, "loss": 0.0213, "step": 12310 }, { "epoch": 45.97014925373134, "grad_norm": 0.1287485659122467, "learning_rate": 3.517950096134232e-05, "loss": 0.0239, "step": 12320 }, { "epoch": 46.007462686567166, "grad_norm": 0.1219983845949173, "learning_rate": 3.5100563046680764e-05, "loss": 0.0231, "step": 12330 }, { "epoch": 46.04477611940298, "grad_norm": 0.12523891031742096, "learning_rate": 3.5021665866510925e-05, "loss": 0.0186, "step": 12340 }, { "epoch": 46.082089552238806, "grad_norm": 0.12046749144792557, "learning_rate": 3.494280963653463e-05, "loss": 0.0223, "step": 12350 }, { "epoch": 46.11940298507463, "grad_norm": 0.0796799585223198, "learning_rate": 3.4863994572341843e-05, "loss": 0.0228, "step": 12360 }, { "epoch": 46.156716417910445, "grad_norm": 0.17884203791618347, "learning_rate": 3.478522088940993e-05, "loss": 0.0183, "step": 12370 }, { "epoch": 46.19402985074627, "grad_norm": 0.17840121686458588, "learning_rate": 3.470648880310313e-05, "loss": 0.02, "step": 12380 }, { "epoch": 46.23134328358209, "grad_norm": 0.10597117990255356, "learning_rate": 3.462779852867197e-05, "loss": 0.0242, "step": 12390 }, { "epoch": 46.26865671641791, "grad_norm": 0.12374546378850937, "learning_rate": 3.4549150281252636e-05, "loss": 0.0225, "step": 12400 }, { "epoch": 46.30597014925373, "grad_norm": 0.1119808480143547, "learning_rate": 3.447054427586644e-05, "loss": 0.0233, "step": 12410 }, { "epoch": 46.343283582089555, "grad_norm": 0.10380487143993378, "learning_rate": 3.439198072741921e-05, "loss": 0.024, "step": 12420 }, { "epoch": 46.38059701492537, "grad_norm": 0.14937321841716766, "learning_rate": 3.431345985070067e-05, "loss": 0.0247, "step": 12430 }, { "epoch": 46.417910447761194, "grad_norm": 0.07916778326034546, "learning_rate": 3.423498186038393e-05, "loss": 0.0206, "step": 12440 }, { "epoch": 46.45522388059702, "grad_norm": 0.1399843543767929, "learning_rate": 3.4156546971024784e-05, "loss": 0.0232, "step": 12450 }, { "epoch": 46.492537313432834, "grad_norm": 0.1189795508980751, "learning_rate": 3.407815539706124e-05, "loss": 0.0215, "step": 12460 }, { "epoch": 46.52985074626866, "grad_norm": 0.13561174273490906, "learning_rate": 3.399980735281286e-05, "loss": 0.0242, "step": 12470 }, { "epoch": 46.56716417910448, "grad_norm": 0.18860986828804016, "learning_rate": 3.392150305248024e-05, "loss": 0.024, "step": 12480 }, { "epoch": 46.6044776119403, "grad_norm": 0.14801068603992462, "learning_rate": 3.384324271014429e-05, "loss": 0.0251, "step": 12490 }, { "epoch": 46.64179104477612, "grad_norm": 0.13338293135166168, "learning_rate": 3.3765026539765834e-05, "loss": 0.0231, "step": 12500 }, { "epoch": 46.67910447761194, "grad_norm": 0.12494348734617233, "learning_rate": 3.368685475518488e-05, "loss": 0.0231, "step": 12510 }, { "epoch": 46.71641791044776, "grad_norm": 0.09320265054702759, "learning_rate": 3.360872757012011e-05, "loss": 0.0216, "step": 12520 }, { "epoch": 46.75373134328358, "grad_norm": 0.13676965236663818, "learning_rate": 3.3530645198168295e-05, "loss": 0.022, "step": 12530 }, { "epoch": 46.791044776119406, "grad_norm": 0.16494108736515045, "learning_rate": 3.3452607852803584e-05, "loss": 0.0215, "step": 12540 }, { "epoch": 46.82835820895522, "grad_norm": 0.11428332328796387, "learning_rate": 3.337461574737716e-05, "loss": 0.0197, "step": 12550 }, { "epoch": 46.865671641791046, "grad_norm": 0.11694550514221191, "learning_rate": 3.329666909511645e-05, "loss": 0.0234, "step": 12560 }, { "epoch": 46.90298507462687, "grad_norm": 0.12394881248474121, "learning_rate": 3.321876810912461e-05, "loss": 0.0219, "step": 12570 }, { "epoch": 46.940298507462686, "grad_norm": 0.17600436508655548, "learning_rate": 3.3140913002379995e-05, "loss": 0.0234, "step": 12580 }, { "epoch": 46.97761194029851, "grad_norm": 0.11403163522481918, "learning_rate": 3.3063103987735433e-05, "loss": 0.0226, "step": 12590 }, { "epoch": 47.014925373134325, "grad_norm": 0.09856057912111282, "learning_rate": 3.298534127791785e-05, "loss": 0.0188, "step": 12600 }, { "epoch": 47.05223880597015, "grad_norm": 0.12408200651407242, "learning_rate": 3.2907625085527503e-05, "loss": 0.0232, "step": 12610 }, { "epoch": 47.08955223880597, "grad_norm": 0.1113908588886261, "learning_rate": 3.282995562303754e-05, "loss": 0.0211, "step": 12620 }, { "epoch": 47.12686567164179, "grad_norm": 0.14994896948337555, "learning_rate": 3.275233310279321e-05, "loss": 0.0223, "step": 12630 }, { "epoch": 47.16417910447761, "grad_norm": 0.14600922167301178, "learning_rate": 3.267475773701161e-05, "loss": 0.0223, "step": 12640 }, { "epoch": 47.201492537313435, "grad_norm": 0.16115038096904755, "learning_rate": 3.2597229737780774e-05, "loss": 0.0222, "step": 12650 }, { "epoch": 47.23880597014925, "grad_norm": 0.1121780201792717, "learning_rate": 3.251974931705933e-05, "loss": 0.022, "step": 12660 }, { "epoch": 47.276119402985074, "grad_norm": 0.16974890232086182, "learning_rate": 3.244231668667578e-05, "loss": 0.0251, "step": 12670 }, { "epoch": 47.3134328358209, "grad_norm": 0.09503891319036484, "learning_rate": 3.236493205832795e-05, "loss": 0.0215, "step": 12680 }, { "epoch": 47.350746268656714, "grad_norm": 0.16446542739868164, "learning_rate": 3.228759564358248e-05, "loss": 0.0232, "step": 12690 }, { "epoch": 47.38805970149254, "grad_norm": 0.14546583592891693, "learning_rate": 3.221030765387417e-05, "loss": 0.0234, "step": 12700 }, { "epoch": 47.42537313432836, "grad_norm": 0.17383699119091034, "learning_rate": 3.2133068300505455e-05, "loss": 0.0307, "step": 12710 }, { "epoch": 47.46268656716418, "grad_norm": 0.13594375550746918, "learning_rate": 3.205587779464576e-05, "loss": 0.0201, "step": 12720 }, { "epoch": 47.5, "grad_norm": 0.14842519164085388, "learning_rate": 3.197873634733096e-05, "loss": 0.0225, "step": 12730 }, { "epoch": 47.53731343283582, "grad_norm": 0.22028860449790955, "learning_rate": 3.190164416946285e-05, "loss": 0.0203, "step": 12740 }, { "epoch": 47.57462686567164, "grad_norm": 0.12517951428890228, "learning_rate": 3.18246014718085e-05, "loss": 0.025, "step": 12750 }, { "epoch": 47.61194029850746, "grad_norm": 0.1442970186471939, "learning_rate": 3.1747608464999725e-05, "loss": 0.0224, "step": 12760 }, { "epoch": 47.649253731343286, "grad_norm": 0.18938858807086945, "learning_rate": 3.167066535953242e-05, "loss": 0.024, "step": 12770 }, { "epoch": 47.6865671641791, "grad_norm": 0.14149807393550873, "learning_rate": 3.1593772365766105e-05, "loss": 0.0208, "step": 12780 }, { "epoch": 47.723880597014926, "grad_norm": 0.12687914073467255, "learning_rate": 3.1516929693923315e-05, "loss": 0.0259, "step": 12790 }, { "epoch": 47.76119402985075, "grad_norm": 0.13417690992355347, "learning_rate": 3.144013755408895e-05, "loss": 0.0223, "step": 12800 }, { "epoch": 47.798507462686565, "grad_norm": 0.1326243132352829, "learning_rate": 3.136339615620985e-05, "loss": 0.025, "step": 12810 }, { "epoch": 47.83582089552239, "grad_norm": 0.13783913850784302, "learning_rate": 3.128670571009399e-05, "loss": 0.0288, "step": 12820 }, { "epoch": 47.87313432835821, "grad_norm": 0.14776673913002014, "learning_rate": 3.121006642541014e-05, "loss": 0.0284, "step": 12830 }, { "epoch": 47.91044776119403, "grad_norm": 0.23042693734169006, "learning_rate": 3.113347851168721e-05, "loss": 0.0216, "step": 12840 }, { "epoch": 47.94776119402985, "grad_norm": 0.1235581487417221, "learning_rate": 3.105694217831361e-05, "loss": 0.025, "step": 12850 }, { "epoch": 47.985074626865675, "grad_norm": 0.11921337246894836, "learning_rate": 3.098045763453678e-05, "loss": 0.0194, "step": 12860 }, { "epoch": 48.02238805970149, "grad_norm": 0.10627312958240509, "learning_rate": 3.090402508946249e-05, "loss": 0.0193, "step": 12870 }, { "epoch": 48.059701492537314, "grad_norm": 0.14170552790164948, "learning_rate": 3.082764475205442e-05, "loss": 0.0238, "step": 12880 }, { "epoch": 48.09701492537314, "grad_norm": 0.10793954133987427, "learning_rate": 3.075131683113352e-05, "loss": 0.0206, "step": 12890 }, { "epoch": 48.134328358208954, "grad_norm": 0.16192053258419037, "learning_rate": 3.0675041535377405e-05, "loss": 0.0222, "step": 12900 }, { "epoch": 48.17164179104478, "grad_norm": 0.1619051992893219, "learning_rate": 3.059881907331979e-05, "loss": 0.0219, "step": 12910 }, { "epoch": 48.208955223880594, "grad_norm": 0.1666599065065384, "learning_rate": 3.052264965335e-05, "loss": 0.0228, "step": 12920 }, { "epoch": 48.24626865671642, "grad_norm": 0.11401742696762085, "learning_rate": 3.0446533483712304e-05, "loss": 0.0229, "step": 12930 }, { "epoch": 48.28358208955224, "grad_norm": 0.09114587306976318, "learning_rate": 3.0370470772505433e-05, "loss": 0.0241, "step": 12940 }, { "epoch": 48.32089552238806, "grad_norm": 0.11798617243766785, "learning_rate": 3.0294461727681932e-05, "loss": 0.02, "step": 12950 }, { "epoch": 48.35820895522388, "grad_norm": 0.12310025840997696, "learning_rate": 3.0218506557047598e-05, "loss": 0.0215, "step": 12960 }, { "epoch": 48.3955223880597, "grad_norm": 0.11902189254760742, "learning_rate": 3.0142605468260978e-05, "loss": 0.0227, "step": 12970 }, { "epoch": 48.43283582089552, "grad_norm": 0.13520246744155884, "learning_rate": 3.006675866883275e-05, "loss": 0.0227, "step": 12980 }, { "epoch": 48.47014925373134, "grad_norm": 0.16067832708358765, "learning_rate": 2.999096636612518e-05, "loss": 0.0236, "step": 12990 }, { "epoch": 48.507462686567166, "grad_norm": 0.11780519038438797, "learning_rate": 2.991522876735154e-05, "loss": 0.0214, "step": 13000 }, { "epoch": 48.54477611940298, "grad_norm": 0.15027675032615662, "learning_rate": 2.9839546079575497e-05, "loss": 0.0231, "step": 13010 }, { "epoch": 48.582089552238806, "grad_norm": 0.14895616471767426, "learning_rate": 2.976391850971065e-05, "loss": 0.0263, "step": 13020 }, { "epoch": 48.61940298507463, "grad_norm": 0.14379404485225677, "learning_rate": 2.9688346264519866e-05, "loss": 0.0199, "step": 13030 }, { "epoch": 48.656716417910445, "grad_norm": 0.15204234421253204, "learning_rate": 2.9612829550614836e-05, "loss": 0.0241, "step": 13040 }, { "epoch": 48.69402985074627, "grad_norm": 0.09257636219263077, "learning_rate": 2.9537368574455304e-05, "loss": 0.021, "step": 13050 }, { "epoch": 48.73134328358209, "grad_norm": 0.09366945177316666, "learning_rate": 2.9461963542348737e-05, "loss": 0.0202, "step": 13060 }, { "epoch": 48.76865671641791, "grad_norm": 0.13519001007080078, "learning_rate": 2.9386614660449596e-05, "loss": 0.0246, "step": 13070 }, { "epoch": 48.80597014925373, "grad_norm": 0.09995035827159882, "learning_rate": 2.931132213475884e-05, "loss": 0.0187, "step": 13080 }, { "epoch": 48.843283582089555, "grad_norm": 0.1446525901556015, "learning_rate": 2.9236086171123404e-05, "loss": 0.0221, "step": 13090 }, { "epoch": 48.88059701492537, "grad_norm": 0.10364940017461777, "learning_rate": 2.916090697523549e-05, "loss": 0.0267, "step": 13100 }, { "epoch": 48.917910447761194, "grad_norm": 0.12823998928070068, "learning_rate": 2.9085784752632157e-05, "loss": 0.0215, "step": 13110 }, { "epoch": 48.95522388059702, "grad_norm": 0.11062665283679962, "learning_rate": 2.9010719708694722e-05, "loss": 0.0215, "step": 13120 }, { "epoch": 48.992537313432834, "grad_norm": 0.25769639015197754, "learning_rate": 2.8935712048648112e-05, "loss": 0.0229, "step": 13130 }, { "epoch": 49.02985074626866, "grad_norm": 0.09254038333892822, "learning_rate": 2.8860761977560436e-05, "loss": 0.0256, "step": 13140 }, { "epoch": 49.06716417910448, "grad_norm": 0.142227441072464, "learning_rate": 2.878586970034232e-05, "loss": 0.0208, "step": 13150 }, { "epoch": 49.1044776119403, "grad_norm": 0.1282985359430313, "learning_rate": 2.8711035421746367e-05, "loss": 0.0193, "step": 13160 }, { "epoch": 49.14179104477612, "grad_norm": 0.09821700304746628, "learning_rate": 2.8636259346366666e-05, "loss": 0.0167, "step": 13170 }, { "epoch": 49.17910447761194, "grad_norm": 0.1849426031112671, "learning_rate": 2.8561541678638142e-05, "loss": 0.0229, "step": 13180 }, { "epoch": 49.21641791044776, "grad_norm": 0.1085592582821846, "learning_rate": 2.8486882622836026e-05, "loss": 0.0225, "step": 13190 }, { "epoch": 49.25373134328358, "grad_norm": 0.09918786585330963, "learning_rate": 2.8412282383075363e-05, "loss": 0.0188, "step": 13200 }, { "epoch": 49.291044776119406, "grad_norm": 0.13383710384368896, "learning_rate": 2.8337741163310317e-05, "loss": 0.0262, "step": 13210 }, { "epoch": 49.32835820895522, "grad_norm": 0.16171598434448242, "learning_rate": 2.8263259167333777e-05, "loss": 0.0205, "step": 13220 }, { "epoch": 49.365671641791046, "grad_norm": 0.22855153679847717, "learning_rate": 2.8188836598776662e-05, "loss": 0.0235, "step": 13230 }, { "epoch": 49.40298507462686, "grad_norm": 0.16824224591255188, "learning_rate": 2.811447366110741e-05, "loss": 0.0225, "step": 13240 }, { "epoch": 49.440298507462686, "grad_norm": 0.13408814370632172, "learning_rate": 2.804017055763149e-05, "loss": 0.0268, "step": 13250 }, { "epoch": 49.47761194029851, "grad_norm": 0.09579440206289291, "learning_rate": 2.7965927491490705e-05, "loss": 0.0206, "step": 13260 }, { "epoch": 49.514925373134325, "grad_norm": 0.1929415613412857, "learning_rate": 2.7891744665662823e-05, "loss": 0.0246, "step": 13270 }, { "epoch": 49.55223880597015, "grad_norm": 0.11111212521791458, "learning_rate": 2.7817622282960815e-05, "loss": 0.0235, "step": 13280 }, { "epoch": 49.58955223880597, "grad_norm": 0.13075938820838928, "learning_rate": 2.774356054603243e-05, "loss": 0.0273, "step": 13290 }, { "epoch": 49.62686567164179, "grad_norm": 0.1561594158411026, "learning_rate": 2.766955965735968e-05, "loss": 0.0233, "step": 13300 }, { "epoch": 49.66417910447761, "grad_norm": 0.13065339624881744, "learning_rate": 2.7595619819258116e-05, "loss": 0.0253, "step": 13310 }, { "epoch": 49.701492537313435, "grad_norm": 0.12952469289302826, "learning_rate": 2.7521741233876496e-05, "loss": 0.0202, "step": 13320 }, { "epoch": 49.73880597014925, "grad_norm": 0.1551758199930191, "learning_rate": 2.7447924103195976e-05, "loss": 0.0193, "step": 13330 }, { "epoch": 49.776119402985074, "grad_norm": 0.1339927613735199, "learning_rate": 2.7374168629029813e-05, "loss": 0.0246, "step": 13340 }, { "epoch": 49.8134328358209, "grad_norm": 0.1858087182044983, "learning_rate": 2.7300475013022663e-05, "loss": 0.0228, "step": 13350 }, { "epoch": 49.850746268656714, "grad_norm": 0.12416113913059235, "learning_rate": 2.7226843456650037e-05, "loss": 0.0216, "step": 13360 }, { "epoch": 49.88805970149254, "grad_norm": 0.09504809975624084, "learning_rate": 2.7153274161217846e-05, "loss": 0.0213, "step": 13370 }, { "epoch": 49.92537313432836, "grad_norm": 0.11079956591129303, "learning_rate": 2.707976732786166e-05, "loss": 0.0215, "step": 13380 }, { "epoch": 49.96268656716418, "grad_norm": 0.10549938678741455, "learning_rate": 2.7006323157546386e-05, "loss": 0.0207, "step": 13390 }, { "epoch": 50.0, "grad_norm": 0.14993080496788025, "learning_rate": 2.693294185106562e-05, "loss": 0.0227, "step": 13400 }, { "epoch": 50.03731343283582, "grad_norm": 0.11769942939281464, "learning_rate": 2.6859623609040984e-05, "loss": 0.0203, "step": 13410 }, { "epoch": 50.07462686567164, "grad_norm": 0.11664724349975586, "learning_rate": 2.6786368631921836e-05, "loss": 0.019, "step": 13420 }, { "epoch": 50.11194029850746, "grad_norm": 0.12119125574827194, "learning_rate": 2.67131771199844e-05, "loss": 0.0212, "step": 13430 }, { "epoch": 50.149253731343286, "grad_norm": 0.1314798891544342, "learning_rate": 2.6640049273331515e-05, "loss": 0.0223, "step": 13440 }, { "epoch": 50.1865671641791, "grad_norm": 0.19163373112678528, "learning_rate": 2.656698529189193e-05, "loss": 0.025, "step": 13450 }, { "epoch": 50.223880597014926, "grad_norm": 0.15088340640068054, "learning_rate": 2.6493985375419778e-05, "loss": 0.0289, "step": 13460 }, { "epoch": 50.26119402985075, "grad_norm": 0.09920644760131836, "learning_rate": 2.642104972349403e-05, "loss": 0.0207, "step": 13470 }, { "epoch": 50.298507462686565, "grad_norm": 0.17476697266101837, "learning_rate": 2.6348178535517966e-05, "loss": 0.0242, "step": 13480 }, { "epoch": 50.33582089552239, "grad_norm": 0.12569792568683624, "learning_rate": 2.6275372010718635e-05, "loss": 0.017, "step": 13490 }, { "epoch": 50.37313432835821, "grad_norm": 0.16706988215446472, "learning_rate": 2.6202630348146324e-05, "loss": 0.0261, "step": 13500 }, { "epoch": 50.41044776119403, "grad_norm": 0.11610403656959534, "learning_rate": 2.612995374667394e-05, "loss": 0.0281, "step": 13510 }, { "epoch": 50.44776119402985, "grad_norm": 0.12250851094722748, "learning_rate": 2.6057342404996522e-05, "loss": 0.0244, "step": 13520 }, { "epoch": 50.485074626865675, "grad_norm": 0.06973443925380707, "learning_rate": 2.5984796521630737e-05, "loss": 0.0204, "step": 13530 }, { "epoch": 50.52238805970149, "grad_norm": 0.12069716304540634, "learning_rate": 2.591231629491423e-05, "loss": 0.0224, "step": 13540 }, { "epoch": 50.559701492537314, "grad_norm": 0.12831450998783112, "learning_rate": 2.5839901923005205e-05, "loss": 0.0228, "step": 13550 }, { "epoch": 50.59701492537313, "grad_norm": 0.2117542028427124, "learning_rate": 2.5767553603881767e-05, "loss": 0.0256, "step": 13560 }, { "epoch": 50.634328358208954, "grad_norm": 0.14169280230998993, "learning_rate": 2.5695271535341443e-05, "loss": 0.0246, "step": 13570 }, { "epoch": 50.67164179104478, "grad_norm": 0.09310466796159744, "learning_rate": 2.562305591500069e-05, "loss": 0.0191, "step": 13580 }, { "epoch": 50.708955223880594, "grad_norm": 0.15426033735275269, "learning_rate": 2.555090694029421e-05, "loss": 0.0257, "step": 13590 }, { "epoch": 50.74626865671642, "grad_norm": 0.08582637459039688, "learning_rate": 2.547882480847461e-05, "loss": 0.023, "step": 13600 }, { "epoch": 50.78358208955224, "grad_norm": 0.12210249900817871, "learning_rate": 2.540680971661161e-05, "loss": 0.0228, "step": 13610 }, { "epoch": 50.82089552238806, "grad_norm": 0.13269662857055664, "learning_rate": 2.5334861861591753e-05, "loss": 0.0228, "step": 13620 }, { "epoch": 50.85820895522388, "grad_norm": 0.20383509993553162, "learning_rate": 2.526298144011775e-05, "loss": 0.0216, "step": 13630 }, { "epoch": 50.8955223880597, "grad_norm": 0.14421682059764862, "learning_rate": 2.5191168648707887e-05, "loss": 0.0236, "step": 13640 }, { "epoch": 50.93283582089552, "grad_norm": 0.08700492978096008, "learning_rate": 2.511942368369566e-05, "loss": 0.0243, "step": 13650 }, { "epoch": 50.97014925373134, "grad_norm": 0.10098283737897873, "learning_rate": 2.5047746741228978e-05, "loss": 0.02, "step": 13660 }, { "epoch": 51.007462686567166, "grad_norm": 0.11686978489160538, "learning_rate": 2.4976138017269908e-05, "loss": 0.0219, "step": 13670 }, { "epoch": 51.04477611940298, "grad_norm": 0.07978662848472595, "learning_rate": 2.490459770759398e-05, "loss": 0.0177, "step": 13680 }, { "epoch": 51.082089552238806, "grad_norm": 0.14056360721588135, "learning_rate": 2.4833126007789653e-05, "loss": 0.0246, "step": 13690 }, { "epoch": 51.11940298507463, "grad_norm": 0.1920592039823532, "learning_rate": 2.476172311325783e-05, "loss": 0.0226, "step": 13700 }, { "epoch": 51.156716417910445, "grad_norm": 0.1461474597454071, "learning_rate": 2.4690389219211273e-05, "loss": 0.0204, "step": 13710 }, { "epoch": 51.19402985074627, "grad_norm": 0.1312243789434433, "learning_rate": 2.4619124520674146e-05, "loss": 0.0235, "step": 13720 }, { "epoch": 51.23134328358209, "grad_norm": 0.16667307913303375, "learning_rate": 2.4547929212481435e-05, "loss": 0.0197, "step": 13730 }, { "epoch": 51.26865671641791, "grad_norm": 0.06856778264045715, "learning_rate": 2.447680348927837e-05, "loss": 0.0194, "step": 13740 }, { "epoch": 51.30597014925373, "grad_norm": 0.15726716816425323, "learning_rate": 2.4405747545519963e-05, "loss": 0.0234, "step": 13750 }, { "epoch": 51.343283582089555, "grad_norm": 0.11875126510858536, "learning_rate": 2.433476157547044e-05, "loss": 0.0263, "step": 13760 }, { "epoch": 51.38059701492537, "grad_norm": 0.14230020344257355, "learning_rate": 2.4263845773202736e-05, "loss": 0.0222, "step": 13770 }, { "epoch": 51.417910447761194, "grad_norm": 0.12550948560237885, "learning_rate": 2.419300033259798e-05, "loss": 0.0199, "step": 13780 }, { "epoch": 51.45522388059702, "grad_norm": 0.12236285954713821, "learning_rate": 2.4122225447344875e-05, "loss": 0.0194, "step": 13790 }, { "epoch": 51.492537313432834, "grad_norm": 0.10739748179912567, "learning_rate": 2.405152131093926e-05, "loss": 0.0224, "step": 13800 }, { "epoch": 51.52985074626866, "grad_norm": 0.08485986292362213, "learning_rate": 2.3980888116683515e-05, "loss": 0.0213, "step": 13810 }, { "epoch": 51.56716417910448, "grad_norm": 0.23006561398506165, "learning_rate": 2.3910326057686127e-05, "loss": 0.0296, "step": 13820 }, { "epoch": 51.6044776119403, "grad_norm": 0.11056424677371979, "learning_rate": 2.3839835326861104e-05, "loss": 0.0197, "step": 13830 }, { "epoch": 51.64179104477612, "grad_norm": 0.1503966897726059, "learning_rate": 2.3769416116927335e-05, "loss": 0.0254, "step": 13840 }, { "epoch": 51.67910447761194, "grad_norm": 0.15047578513622284, "learning_rate": 2.3699068620408304e-05, "loss": 0.0221, "step": 13850 }, { "epoch": 51.71641791044776, "grad_norm": 0.14350087940692902, "learning_rate": 2.362879302963135e-05, "loss": 0.0216, "step": 13860 }, { "epoch": 51.75373134328358, "grad_norm": 0.09344734251499176, "learning_rate": 2.3558589536727277e-05, "loss": 0.0206, "step": 13870 }, { "epoch": 51.791044776119406, "grad_norm": 0.12326527386903763, "learning_rate": 2.3488458333629777e-05, "loss": 0.0225, "step": 13880 }, { "epoch": 51.82835820895522, "grad_norm": 0.15376359224319458, "learning_rate": 2.341839961207482e-05, "loss": 0.0225, "step": 13890 }, { "epoch": 51.865671641791046, "grad_norm": 0.10511427372694016, "learning_rate": 2.3348413563600325e-05, "loss": 0.0196, "step": 13900 }, { "epoch": 51.90298507462687, "grad_norm": 0.12192686647176743, "learning_rate": 2.3278500379545436e-05, "loss": 0.0218, "step": 13910 }, { "epoch": 51.940298507462686, "grad_norm": 0.130075603723526, "learning_rate": 2.3208660251050158e-05, "loss": 0.0194, "step": 13920 }, { "epoch": 51.97761194029851, "grad_norm": 0.10453350096940994, "learning_rate": 2.3138893369054766e-05, "loss": 0.023, "step": 13930 }, { "epoch": 52.014925373134325, "grad_norm": 0.08966228365898132, "learning_rate": 2.3069199924299174e-05, "loss": 0.0234, "step": 13940 }, { "epoch": 52.05223880597015, "grad_norm": 0.1274033486843109, "learning_rate": 2.2999580107322653e-05, "loss": 0.0204, "step": 13950 }, { "epoch": 52.08955223880597, "grad_norm": 0.08601605892181396, "learning_rate": 2.29300341084631e-05, "loss": 0.0216, "step": 13960 }, { "epoch": 52.12686567164179, "grad_norm": 0.08827143907546997, "learning_rate": 2.2860562117856647e-05, "loss": 0.0216, "step": 13970 }, { "epoch": 52.16417910447761, "grad_norm": 0.16426432132720947, "learning_rate": 2.279116432543705e-05, "loss": 0.0196, "step": 13980 }, { "epoch": 52.201492537313435, "grad_norm": 0.11794531345367432, "learning_rate": 2.2721840920935196e-05, "loss": 0.0226, "step": 13990 }, { "epoch": 52.23880597014925, "grad_norm": 0.13390977680683136, "learning_rate": 2.2652592093878666e-05, "loss": 0.0218, "step": 14000 }, { "epoch": 52.276119402985074, "grad_norm": 0.11990318447351456, "learning_rate": 2.258341803359108e-05, "loss": 0.0186, "step": 14010 }, { "epoch": 52.3134328358209, "grad_norm": 0.19645343720912933, "learning_rate": 2.251431892919171e-05, "loss": 0.0208, "step": 14020 }, { "epoch": 52.350746268656714, "grad_norm": 0.1454838216304779, "learning_rate": 2.2445294969594844e-05, "loss": 0.0187, "step": 14030 }, { "epoch": 52.38805970149254, "grad_norm": 0.09445475786924362, "learning_rate": 2.237634634350934e-05, "loss": 0.0198, "step": 14040 }, { "epoch": 52.42537313432836, "grad_norm": 0.16241857409477234, "learning_rate": 2.2307473239438154e-05, "loss": 0.0237, "step": 14050 }, { "epoch": 52.46268656716418, "grad_norm": 0.10022711753845215, "learning_rate": 2.2238675845677663e-05, "loss": 0.0183, "step": 14060 }, { "epoch": 52.5, "grad_norm": 0.19317051768302917, "learning_rate": 2.2169954350317374e-05, "loss": 0.0195, "step": 14070 }, { "epoch": 52.53731343283582, "grad_norm": 0.10097086429595947, "learning_rate": 2.2101308941239203e-05, "loss": 0.0207, "step": 14080 }, { "epoch": 52.57462686567164, "grad_norm": 0.11533255875110626, "learning_rate": 2.2032739806117058e-05, "loss": 0.0193, "step": 14090 }, { "epoch": 52.61194029850746, "grad_norm": 0.1833461970090866, "learning_rate": 2.196424713241637e-05, "loss": 0.0193, "step": 14100 }, { "epoch": 52.649253731343286, "grad_norm": 0.0757739394903183, "learning_rate": 2.1895831107393484e-05, "loss": 0.0182, "step": 14110 }, { "epoch": 52.6865671641791, "grad_norm": 0.10618551820516586, "learning_rate": 2.182749191809518e-05, "loss": 0.0202, "step": 14120 }, { "epoch": 52.723880597014926, "grad_norm": 0.15234141051769257, "learning_rate": 2.1759229751358217e-05, "loss": 0.0197, "step": 14130 }, { "epoch": 52.76119402985075, "grad_norm": 0.08095905929803848, "learning_rate": 2.1691044793808734e-05, "loss": 0.0194, "step": 14140 }, { "epoch": 52.798507462686565, "grad_norm": 0.1279105693101883, "learning_rate": 2.1622937231861822e-05, "loss": 0.0228, "step": 14150 }, { "epoch": 52.83582089552239, "grad_norm": 0.12276138365268707, "learning_rate": 2.1554907251720945e-05, "loss": 0.0188, "step": 14160 }, { "epoch": 52.87313432835821, "grad_norm": 0.06749910861253738, "learning_rate": 2.148695503937745e-05, "loss": 0.018, "step": 14170 }, { "epoch": 52.91044776119403, "grad_norm": 0.08617280423641205, "learning_rate": 2.1419080780610123e-05, "loss": 0.0223, "step": 14180 }, { "epoch": 52.94776119402985, "grad_norm": 0.0930023267865181, "learning_rate": 2.1351284660984572e-05, "loss": 0.0246, "step": 14190 }, { "epoch": 52.985074626865675, "grad_norm": 0.10156138241291046, "learning_rate": 2.128356686585282e-05, "loss": 0.0237, "step": 14200 }, { "epoch": 53.02238805970149, "grad_norm": 0.13922975957393646, "learning_rate": 2.121592758035273e-05, "loss": 0.0203, "step": 14210 }, { "epoch": 53.059701492537314, "grad_norm": 0.11783100664615631, "learning_rate": 2.1148366989407496e-05, "loss": 0.0187, "step": 14220 }, { "epoch": 53.09701492537314, "grad_norm": 0.13359035551548004, "learning_rate": 2.1080885277725236e-05, "loss": 0.0255, "step": 14230 }, { "epoch": 53.134328358208954, "grad_norm": 0.22380056977272034, "learning_rate": 2.1013482629798333e-05, "loss": 0.0211, "step": 14240 }, { "epoch": 53.17164179104478, "grad_norm": 0.0862349346280098, "learning_rate": 2.094615922990309e-05, "loss": 0.0189, "step": 14250 }, { "epoch": 53.208955223880594, "grad_norm": 0.23608718812465668, "learning_rate": 2.0878915262099098e-05, "loss": 0.0233, "step": 14260 }, { "epoch": 53.24626865671642, "grad_norm": 0.11717761307954788, "learning_rate": 2.0811750910228774e-05, "loss": 0.0225, "step": 14270 }, { "epoch": 53.28358208955224, "grad_norm": 0.0912686362862587, "learning_rate": 2.0744666357916925e-05, "loss": 0.0194, "step": 14280 }, { "epoch": 53.32089552238806, "grad_norm": 0.1377868950366974, "learning_rate": 2.067766178857013e-05, "loss": 0.0196, "step": 14290 }, { "epoch": 53.35820895522388, "grad_norm": 0.11210623383522034, "learning_rate": 2.061073738537635e-05, "loss": 0.0187, "step": 14300 }, { "epoch": 53.3955223880597, "grad_norm": 0.09348799288272858, "learning_rate": 2.0543893331304333e-05, "loss": 0.0181, "step": 14310 }, { "epoch": 53.43283582089552, "grad_norm": 0.06874913722276688, "learning_rate": 2.0477129809103147e-05, "loss": 0.0195, "step": 14320 }, { "epoch": 53.47014925373134, "grad_norm": 0.15592241287231445, "learning_rate": 2.0410447001301753e-05, "loss": 0.0234, "step": 14330 }, { "epoch": 53.507462686567166, "grad_norm": 0.1749936193227768, "learning_rate": 2.0343845090208368e-05, "loss": 0.0237, "step": 14340 }, { "epoch": 53.54477611940298, "grad_norm": 0.1107540875673294, "learning_rate": 2.0277324257910106e-05, "loss": 0.0209, "step": 14350 }, { "epoch": 53.582089552238806, "grad_norm": 0.12648239731788635, "learning_rate": 2.0210884686272368e-05, "loss": 0.0235, "step": 14360 }, { "epoch": 53.61940298507463, "grad_norm": 0.13177135586738586, "learning_rate": 2.0144526556938387e-05, "loss": 0.022, "step": 14370 }, { "epoch": 53.656716417910445, "grad_norm": 0.14855548739433289, "learning_rate": 2.0078250051328784e-05, "loss": 0.0266, "step": 14380 }, { "epoch": 53.69402985074627, "grad_norm": 0.1569342464208603, "learning_rate": 2.0012055350640986e-05, "loss": 0.0255, "step": 14390 }, { "epoch": 53.73134328358209, "grad_norm": 0.16979776322841644, "learning_rate": 1.9945942635848748e-05, "loss": 0.0217, "step": 14400 }, { "epoch": 53.76865671641791, "grad_norm": 0.1076108068227768, "learning_rate": 1.9879912087701753e-05, "loss": 0.017, "step": 14410 }, { "epoch": 53.80597014925373, "grad_norm": 0.21707268059253693, "learning_rate": 1.981396388672496e-05, "loss": 0.025, "step": 14420 }, { "epoch": 53.843283582089555, "grad_norm": 0.14711551368236542, "learning_rate": 1.974809821321827e-05, "loss": 0.0207, "step": 14430 }, { "epoch": 53.88059701492537, "grad_norm": 0.10578326880931854, "learning_rate": 1.9682315247255894e-05, "loss": 0.0159, "step": 14440 }, { "epoch": 53.917910447761194, "grad_norm": 0.09460412710905075, "learning_rate": 1.9616615168685943e-05, "loss": 0.0167, "step": 14450 }, { "epoch": 53.95522388059702, "grad_norm": 0.17408932745456696, "learning_rate": 1.9550998157129946e-05, "loss": 0.0187, "step": 14460 }, { "epoch": 53.992537313432834, "grad_norm": 0.11329926550388336, "learning_rate": 1.9485464391982284e-05, "loss": 0.0254, "step": 14470 }, { "epoch": 54.02985074626866, "grad_norm": 0.09702463448047638, "learning_rate": 1.942001405240979e-05, "loss": 0.0244, "step": 14480 }, { "epoch": 54.06716417910448, "grad_norm": 0.09816773235797882, "learning_rate": 1.9354647317351188e-05, "loss": 0.0211, "step": 14490 }, { "epoch": 54.1044776119403, "grad_norm": 0.1503995805978775, "learning_rate": 1.928936436551661e-05, "loss": 0.0237, "step": 14500 }, { "epoch": 54.14179104477612, "grad_norm": 0.14443016052246094, "learning_rate": 1.9224165375387193e-05, "loss": 0.0186, "step": 14510 }, { "epoch": 54.17910447761194, "grad_norm": 0.0989547073841095, "learning_rate": 1.9159050525214452e-05, "loss": 0.0196, "step": 14520 }, { "epoch": 54.21641791044776, "grad_norm": 0.20105260610580444, "learning_rate": 1.909401999301993e-05, "loss": 0.0199, "step": 14530 }, { "epoch": 54.25373134328358, "grad_norm": 0.09586000442504883, "learning_rate": 1.9029073956594606e-05, "loss": 0.0199, "step": 14540 }, { "epoch": 54.291044776119406, "grad_norm": 0.12682519853115082, "learning_rate": 1.8964212593498442e-05, "loss": 0.0228, "step": 14550 }, { "epoch": 54.32835820895522, "grad_norm": 0.143130362033844, "learning_rate": 1.8899436081059975e-05, "loss": 0.02, "step": 14560 }, { "epoch": 54.365671641791046, "grad_norm": 0.18497130274772644, "learning_rate": 1.8834744596375666e-05, "loss": 0.0224, "step": 14570 }, { "epoch": 54.40298507462686, "grad_norm": 0.11677397042512894, "learning_rate": 1.877013831630961e-05, "loss": 0.0232, "step": 14580 }, { "epoch": 54.440298507462686, "grad_norm": 0.13686029613018036, "learning_rate": 1.8705617417492883e-05, "loss": 0.0211, "step": 14590 }, { "epoch": 54.47761194029851, "grad_norm": 0.10299759358167648, "learning_rate": 1.8641182076323148e-05, "loss": 0.0203, "step": 14600 }, { "epoch": 54.514925373134325, "grad_norm": 0.1793365627527237, "learning_rate": 1.85768324689642e-05, "loss": 0.0214, "step": 14610 }, { "epoch": 54.55223880597015, "grad_norm": 0.1557474136352539, "learning_rate": 1.851256877134538e-05, "loss": 0.0183, "step": 14620 }, { "epoch": 54.58955223880597, "grad_norm": 0.2053525149822235, "learning_rate": 1.8448391159161204e-05, "loss": 0.0231, "step": 14630 }, { "epoch": 54.62686567164179, "grad_norm": 0.15260490775108337, "learning_rate": 1.838429980787081e-05, "loss": 0.0284, "step": 14640 }, { "epoch": 54.66417910447761, "grad_norm": 0.14003758132457733, "learning_rate": 1.8320294892697478e-05, "loss": 0.0217, "step": 14650 }, { "epoch": 54.701492537313435, "grad_norm": 0.20833978056907654, "learning_rate": 1.8256376588628238e-05, "loss": 0.0212, "step": 14660 }, { "epoch": 54.73880597014925, "grad_norm": 0.13922567665576935, "learning_rate": 1.8192545070413282e-05, "loss": 0.0185, "step": 14670 }, { "epoch": 54.776119402985074, "grad_norm": 0.16342200338840485, "learning_rate": 1.8128800512565513e-05, "loss": 0.0183, "step": 14680 }, { "epoch": 54.8134328358209, "grad_norm": 0.1727473884820938, "learning_rate": 1.8065143089360172e-05, "loss": 0.0223, "step": 14690 }, { "epoch": 54.850746268656714, "grad_norm": 0.11838866025209427, "learning_rate": 1.800157297483417e-05, "loss": 0.0179, "step": 14700 }, { "epoch": 54.88805970149254, "grad_norm": 0.18337324261665344, "learning_rate": 1.7938090342785817e-05, "loss": 0.0223, "step": 14710 }, { "epoch": 54.92537313432836, "grad_norm": 0.09204807877540588, "learning_rate": 1.787469536677419e-05, "loss": 0.0273, "step": 14720 }, { "epoch": 54.96268656716418, "grad_norm": 0.16529780626296997, "learning_rate": 1.7811388220118707e-05, "loss": 0.0207, "step": 14730 }, { "epoch": 55.0, "grad_norm": 0.18578030169010162, "learning_rate": 1.774816907589873e-05, "loss": 0.0217, "step": 14740 }, { "epoch": 55.03731343283582, "grad_norm": 0.15113744139671326, "learning_rate": 1.768503810695295e-05, "loss": 0.0252, "step": 14750 }, { "epoch": 55.07462686567164, "grad_norm": 0.13872158527374268, "learning_rate": 1.7621995485879062e-05, "loss": 0.0214, "step": 14760 }, { "epoch": 55.11194029850746, "grad_norm": 0.14171402156352997, "learning_rate": 1.755904138503316e-05, "loss": 0.023, "step": 14770 }, { "epoch": 55.149253731343286, "grad_norm": 0.10502675175666809, "learning_rate": 1.749617597652934e-05, "loss": 0.0223, "step": 14780 }, { "epoch": 55.1865671641791, "grad_norm": 0.11429077386856079, "learning_rate": 1.743339943223926e-05, "loss": 0.0239, "step": 14790 }, { "epoch": 55.223880597014926, "grad_norm": 0.13361825048923492, "learning_rate": 1.7370711923791567e-05, "loss": 0.0203, "step": 14800 }, { "epoch": 55.26119402985075, "grad_norm": 0.10289110243320465, "learning_rate": 1.7308113622571544e-05, "loss": 0.018, "step": 14810 }, { "epoch": 55.298507462686565, "grad_norm": 0.13233882188796997, "learning_rate": 1.7245604699720535e-05, "loss": 0.0167, "step": 14820 }, { "epoch": 55.33582089552239, "grad_norm": 0.1312631517648697, "learning_rate": 1.7183185326135543e-05, "loss": 0.018, "step": 14830 }, { "epoch": 55.37313432835821, "grad_norm": 0.1779259741306305, "learning_rate": 1.712085567246878e-05, "loss": 0.0179, "step": 14840 }, { "epoch": 55.41044776119403, "grad_norm": 0.1153745949268341, "learning_rate": 1.70586159091271e-05, "loss": 0.0187, "step": 14850 }, { "epoch": 55.44776119402985, "grad_norm": 0.12064877897500992, "learning_rate": 1.699646620627168e-05, "loss": 0.0204, "step": 14860 }, { "epoch": 55.485074626865675, "grad_norm": 0.1133665218949318, "learning_rate": 1.6934406733817414e-05, "loss": 0.0184, "step": 14870 }, { "epoch": 55.52238805970149, "grad_norm": 0.14247648417949677, "learning_rate": 1.6872437661432517e-05, "loss": 0.0187, "step": 14880 }, { "epoch": 55.559701492537314, "grad_norm": 0.12452684342861176, "learning_rate": 1.6810559158538092e-05, "loss": 0.0192, "step": 14890 }, { "epoch": 55.59701492537313, "grad_norm": 0.12780675292015076, "learning_rate": 1.6748771394307585e-05, "loss": 0.0174, "step": 14900 }, { "epoch": 55.634328358208954, "grad_norm": 0.16989034414291382, "learning_rate": 1.6687074537666398e-05, "loss": 0.0244, "step": 14910 }, { "epoch": 55.67164179104478, "grad_norm": 0.13992761075496674, "learning_rate": 1.662546875729138e-05, "loss": 0.0174, "step": 14920 }, { "epoch": 55.708955223880594, "grad_norm": 0.13120482861995697, "learning_rate": 1.6563954221610355e-05, "loss": 0.0251, "step": 14930 }, { "epoch": 55.74626865671642, "grad_norm": 0.10000721365213394, "learning_rate": 1.6502531098801753e-05, "loss": 0.0228, "step": 14940 }, { "epoch": 55.78358208955224, "grad_norm": 0.14758244156837463, "learning_rate": 1.6441199556794033e-05, "loss": 0.0195, "step": 14950 }, { "epoch": 55.82089552238806, "grad_norm": 0.12505674362182617, "learning_rate": 1.637995976326527e-05, "loss": 0.0206, "step": 14960 }, { "epoch": 55.85820895522388, "grad_norm": 0.1301531046628952, "learning_rate": 1.631881188564275e-05, "loss": 0.0179, "step": 14970 }, { "epoch": 55.8955223880597, "grad_norm": 0.13421425223350525, "learning_rate": 1.62577560911024e-05, "loss": 0.0178, "step": 14980 }, { "epoch": 55.93283582089552, "grad_norm": 0.1226482093334198, "learning_rate": 1.6196792546568472e-05, "loss": 0.0181, "step": 14990 }, { "epoch": 55.97014925373134, "grad_norm": 0.12425880134105682, "learning_rate": 1.6135921418712956e-05, "loss": 0.0222, "step": 15000 }, { "epoch": 56.007462686567166, "grad_norm": 0.15265363454818726, "learning_rate": 1.6075142873955164e-05, "loss": 0.0189, "step": 15010 }, { "epoch": 56.04477611940298, "grad_norm": 0.10370873659849167, "learning_rate": 1.6014457078461353e-05, "loss": 0.0181, "step": 15020 }, { "epoch": 56.082089552238806, "grad_norm": 0.1301751583814621, "learning_rate": 1.5953864198144135e-05, "loss": 0.0184, "step": 15030 }, { "epoch": 56.11940298507463, "grad_norm": 0.1353592425584793, "learning_rate": 1.5893364398662176e-05, "loss": 0.0208, "step": 15040 }, { "epoch": 56.156716417910445, "grad_norm": 0.1091231182217598, "learning_rate": 1.583295784541958e-05, "loss": 0.0217, "step": 15050 }, { "epoch": 56.19402985074627, "grad_norm": 0.20839501917362213, "learning_rate": 1.5772644703565565e-05, "loss": 0.0216, "step": 15060 }, { "epoch": 56.23134328358209, "grad_norm": 0.1246316060423851, "learning_rate": 1.5712425137993973e-05, "loss": 0.0176, "step": 15070 }, { "epoch": 56.26865671641791, "grad_norm": 0.15602728724479675, "learning_rate": 1.5652299313342773e-05, "loss": 0.0207, "step": 15080 }, { "epoch": 56.30597014925373, "grad_norm": 0.2218896746635437, "learning_rate": 1.5592267393993716e-05, "loss": 0.0196, "step": 15090 }, { "epoch": 56.343283582089555, "grad_norm": 0.08008897304534912, "learning_rate": 1.553232954407171e-05, "loss": 0.0196, "step": 15100 }, { "epoch": 56.38059701492537, "grad_norm": 0.10604596138000488, "learning_rate": 1.5472485927444597e-05, "loss": 0.0195, "step": 15110 }, { "epoch": 56.417910447761194, "grad_norm": 0.14185412228107452, "learning_rate": 1.5412736707722537e-05, "loss": 0.02, "step": 15120 }, { "epoch": 56.45522388059702, "grad_norm": 0.12535589933395386, "learning_rate": 1.5353082048257596e-05, "loss": 0.0273, "step": 15130 }, { "epoch": 56.492537313432834, "grad_norm": 0.19378483295440674, "learning_rate": 1.5293522112143373e-05, "loss": 0.0177, "step": 15140 }, { "epoch": 56.52985074626866, "grad_norm": 0.12723089754581451, "learning_rate": 1.5234057062214402e-05, "loss": 0.0187, "step": 15150 }, { "epoch": 56.56716417910448, "grad_norm": 0.08233541250228882, "learning_rate": 1.517468706104589e-05, "loss": 0.0167, "step": 15160 }, { "epoch": 56.6044776119403, "grad_norm": 0.1336365044116974, "learning_rate": 1.5115412270953167e-05, "loss": 0.0182, "step": 15170 }, { "epoch": 56.64179104477612, "grad_norm": 0.13425517082214355, "learning_rate": 1.5056232853991209e-05, "loss": 0.0164, "step": 15180 }, { "epoch": 56.67910447761194, "grad_norm": 0.14338354766368866, "learning_rate": 1.4997148971954344e-05, "loss": 0.0209, "step": 15190 }, { "epoch": 56.71641791044776, "grad_norm": 0.09882687032222748, "learning_rate": 1.4938160786375572e-05, "loss": 0.0182, "step": 15200 }, { "epoch": 56.75373134328358, "grad_norm": 0.129850834608078, "learning_rate": 1.4879268458526379e-05, "loss": 0.0245, "step": 15210 }, { "epoch": 56.791044776119406, "grad_norm": 0.12232585996389389, "learning_rate": 1.4820472149416154e-05, "loss": 0.0195, "step": 15220 }, { "epoch": 56.82835820895522, "grad_norm": 0.11128424108028412, "learning_rate": 1.4761772019791748e-05, "loss": 0.0203, "step": 15230 }, { "epoch": 56.865671641791046, "grad_norm": 0.18157242238521576, "learning_rate": 1.470316823013707e-05, "loss": 0.0159, "step": 15240 }, { "epoch": 56.90298507462687, "grad_norm": 0.18391671776771545, "learning_rate": 1.4644660940672627e-05, "loss": 0.0242, "step": 15250 }, { "epoch": 56.940298507462686, "grad_norm": 0.0945688858628273, "learning_rate": 1.4586250311355132e-05, "loss": 0.0256, "step": 15260 }, { "epoch": 56.97761194029851, "grad_norm": 0.11417562514543533, "learning_rate": 1.4527936501877032e-05, "loss": 0.0176, "step": 15270 }, { "epoch": 57.014925373134325, "grad_norm": 0.156963050365448, "learning_rate": 1.4469719671666043e-05, "loss": 0.0245, "step": 15280 }, { "epoch": 57.05223880597015, "grad_norm": 0.21794722974300385, "learning_rate": 1.4411599979884744e-05, "loss": 0.0196, "step": 15290 }, { "epoch": 57.08955223880597, "grad_norm": 0.12046397477388382, "learning_rate": 1.435357758543015e-05, "loss": 0.0198, "step": 15300 }, { "epoch": 57.12686567164179, "grad_norm": 0.15432411432266235, "learning_rate": 1.4295652646933277e-05, "loss": 0.0199, "step": 15310 }, { "epoch": 57.16417910447761, "grad_norm": 0.14670522511005402, "learning_rate": 1.4237825322758736e-05, "loss": 0.0189, "step": 15320 }, { "epoch": 57.201492537313435, "grad_norm": 0.14327657222747803, "learning_rate": 1.4180095771004154e-05, "loss": 0.0256, "step": 15330 }, { "epoch": 57.23880597014925, "grad_norm": 0.12205947190523148, "learning_rate": 1.412246414949997e-05, "loss": 0.0252, "step": 15340 }, { "epoch": 57.276119402985074, "grad_norm": 0.15607957541942596, "learning_rate": 1.4064930615808808e-05, "loss": 0.0183, "step": 15350 }, { "epoch": 57.3134328358209, "grad_norm": 0.12443247437477112, "learning_rate": 1.4007495327225162e-05, "loss": 0.022, "step": 15360 }, { "epoch": 57.350746268656714, "grad_norm": 0.16147422790527344, "learning_rate": 1.3950158440774957e-05, "loss": 0.0189, "step": 15370 }, { "epoch": 57.38805970149254, "grad_norm": 0.21510209143161774, "learning_rate": 1.389292011321498e-05, "loss": 0.0205, "step": 15380 }, { "epoch": 57.42537313432836, "grad_norm": 0.23851901292800903, "learning_rate": 1.383578050103268e-05, "loss": 0.0185, "step": 15390 }, { "epoch": 57.46268656716418, "grad_norm": 0.15332546830177307, "learning_rate": 1.3778739760445552e-05, "loss": 0.0213, "step": 15400 }, { "epoch": 57.5, "grad_norm": 0.17791134119033813, "learning_rate": 1.3721798047400813e-05, "loss": 0.0255, "step": 15410 }, { "epoch": 57.53731343283582, "grad_norm": 0.12996767461299896, "learning_rate": 1.3664955517574968e-05, "loss": 0.024, "step": 15420 }, { "epoch": 57.57462686567164, "grad_norm": 0.12579920887947083, "learning_rate": 1.3608212326373249e-05, "loss": 0.0235, "step": 15430 }, { "epoch": 57.61194029850746, "grad_norm": 0.11725910753011703, "learning_rate": 1.3551568628929434e-05, "loss": 0.0196, "step": 15440 }, { "epoch": 57.649253731343286, "grad_norm": 0.13588570058345795, "learning_rate": 1.3495024580105192e-05, "loss": 0.0204, "step": 15450 }, { "epoch": 57.6865671641791, "grad_norm": 0.08286559581756592, "learning_rate": 1.343858033448982e-05, "loss": 0.024, "step": 15460 }, { "epoch": 57.723880597014926, "grad_norm": 0.11547423899173737, "learning_rate": 1.3382236046399722e-05, "loss": 0.018, "step": 15470 }, { "epoch": 57.76119402985075, "grad_norm": 0.2193337231874466, "learning_rate": 1.3325991869878013e-05, "loss": 0.0192, "step": 15480 }, { "epoch": 57.798507462686565, "grad_norm": 0.1474095731973648, "learning_rate": 1.3269847958694148e-05, "loss": 0.0198, "step": 15490 }, { "epoch": 57.83582089552239, "grad_norm": 0.16049841046333313, "learning_rate": 1.3213804466343421e-05, "loss": 0.0224, "step": 15500 }, { "epoch": 57.87313432835821, "grad_norm": 0.14441774785518646, "learning_rate": 1.3157861546046613e-05, "loss": 0.022, "step": 15510 }, { "epoch": 57.91044776119403, "grad_norm": 0.09512905031442642, "learning_rate": 1.3102019350749528e-05, "loss": 0.0206, "step": 15520 }, { "epoch": 57.94776119402985, "grad_norm": 0.12467072159051895, "learning_rate": 1.3046278033122577e-05, "loss": 0.0192, "step": 15530 }, { "epoch": 57.985074626865675, "grad_norm": 0.127666175365448, "learning_rate": 1.299063774556042e-05, "loss": 0.0263, "step": 15540 }, { "epoch": 58.02238805970149, "grad_norm": 0.1259247213602066, "learning_rate": 1.293509864018146e-05, "loss": 0.0189, "step": 15550 }, { "epoch": 58.059701492537314, "grad_norm": 0.11749254912137985, "learning_rate": 1.2879660868827508e-05, "loss": 0.0248, "step": 15560 }, { "epoch": 58.09701492537314, "grad_norm": 0.11325323581695557, "learning_rate": 1.2824324583063302e-05, "loss": 0.018, "step": 15570 }, { "epoch": 58.134328358208954, "grad_norm": 0.11242391169071198, "learning_rate": 1.2769089934176126e-05, "loss": 0.0183, "step": 15580 }, { "epoch": 58.17164179104478, "grad_norm": 0.09608528017997742, "learning_rate": 1.2713957073175425e-05, "loss": 0.0194, "step": 15590 }, { "epoch": 58.208955223880594, "grad_norm": 0.11346051096916199, "learning_rate": 1.2658926150792322e-05, "loss": 0.0179, "step": 15600 }, { "epoch": 58.24626865671642, "grad_norm": 0.17065541446208954, "learning_rate": 1.2603997317479238e-05, "loss": 0.0206, "step": 15610 }, { "epoch": 58.28358208955224, "grad_norm": 0.08744276314973831, "learning_rate": 1.2549170723409549e-05, "loss": 0.0159, "step": 15620 }, { "epoch": 58.32089552238806, "grad_norm": 0.16918648779392242, "learning_rate": 1.2494446518477022e-05, "loss": 0.0235, "step": 15630 }, { "epoch": 58.35820895522388, "grad_norm": 0.10872272402048111, "learning_rate": 1.243982485229559e-05, "loss": 0.016, "step": 15640 }, { "epoch": 58.3955223880597, "grad_norm": 0.08804396539926529, "learning_rate": 1.2385305874198776e-05, "loss": 0.0159, "step": 15650 }, { "epoch": 58.43283582089552, "grad_norm": 0.1546899825334549, "learning_rate": 1.233088973323937e-05, "loss": 0.0234, "step": 15660 }, { "epoch": 58.47014925373134, "grad_norm": 0.10615945607423782, "learning_rate": 1.2276576578189064e-05, "loss": 0.0188, "step": 15670 }, { "epoch": 58.507462686567166, "grad_norm": 0.10865461081266403, "learning_rate": 1.2222366557537911e-05, "loss": 0.0208, "step": 15680 }, { "epoch": 58.54477611940298, "grad_norm": 0.17631977796554565, "learning_rate": 1.2168259819494066e-05, "loss": 0.0214, "step": 15690 }, { "epoch": 58.582089552238806, "grad_norm": 0.18886514008045197, "learning_rate": 1.2114256511983274e-05, "loss": 0.0176, "step": 15700 }, { "epoch": 58.61940298507463, "grad_norm": 0.08707905560731888, "learning_rate": 1.2060356782648503e-05, "loss": 0.0191, "step": 15710 }, { "epoch": 58.656716417910445, "grad_norm": 0.22665008902549744, "learning_rate": 1.2006560778849578e-05, "loss": 0.023, "step": 15720 }, { "epoch": 58.69402985074627, "grad_norm": 0.12604308128356934, "learning_rate": 1.1952868647662696e-05, "loss": 0.0207, "step": 15730 }, { "epoch": 58.73134328358209, "grad_norm": 0.1841406524181366, "learning_rate": 1.1899280535880119e-05, "loss": 0.0186, "step": 15740 }, { "epoch": 58.76865671641791, "grad_norm": 0.17913606762886047, "learning_rate": 1.1845796590009683e-05, "loss": 0.0197, "step": 15750 }, { "epoch": 58.80597014925373, "grad_norm": 0.22669418156147003, "learning_rate": 1.1792416956274444e-05, "loss": 0.0214, "step": 15760 }, { "epoch": 58.843283582089555, "grad_norm": 0.21109512448310852, "learning_rate": 1.1739141780612306e-05, "loss": 0.0187, "step": 15770 }, { "epoch": 58.88059701492537, "grad_norm": 0.13671617209911346, "learning_rate": 1.1685971208675539e-05, "loss": 0.0217, "step": 15780 }, { "epoch": 58.917910447761194, "grad_norm": 0.13792690634727478, "learning_rate": 1.1632905385830484e-05, "loss": 0.0217, "step": 15790 }, { "epoch": 58.95522388059702, "grad_norm": 0.21137778460979462, "learning_rate": 1.157994445715706e-05, "loss": 0.0182, "step": 15800 }, { "epoch": 58.992537313432834, "grad_norm": 0.16283412277698517, "learning_rate": 1.1527088567448407e-05, "loss": 0.0205, "step": 15810 }, { "epoch": 59.02985074626866, "grad_norm": 0.1275905817747116, "learning_rate": 1.1474337861210543e-05, "loss": 0.0224, "step": 15820 }, { "epoch": 59.06716417910448, "grad_norm": 0.15432944893836975, "learning_rate": 1.1421692482661856e-05, "loss": 0.0215, "step": 15830 }, { "epoch": 59.1044776119403, "grad_norm": 0.16244202852249146, "learning_rate": 1.1369152575732822e-05, "loss": 0.0179, "step": 15840 }, { "epoch": 59.14179104477612, "grad_norm": 0.143728569149971, "learning_rate": 1.1316718284065537e-05, "loss": 0.0186, "step": 15850 }, { "epoch": 59.17910447761194, "grad_norm": 0.18476592004299164, "learning_rate": 1.1264389751013326e-05, "loss": 0.0224, "step": 15860 }, { "epoch": 59.21641791044776, "grad_norm": 0.13196825981140137, "learning_rate": 1.1212167119640438e-05, "loss": 0.015, "step": 15870 }, { "epoch": 59.25373134328358, "grad_norm": 0.10285303741693497, "learning_rate": 1.1160050532721528e-05, "loss": 0.0165, "step": 15880 }, { "epoch": 59.291044776119406, "grad_norm": 0.1789907068014145, "learning_rate": 1.1108040132741354e-05, "loss": 0.0241, "step": 15890 }, { "epoch": 59.32835820895522, "grad_norm": 0.08315189182758331, "learning_rate": 1.1056136061894384e-05, "loss": 0.0176, "step": 15900 }, { "epoch": 59.365671641791046, "grad_norm": 0.08452967554330826, "learning_rate": 1.100433846208434e-05, "loss": 0.021, "step": 15910 }, { "epoch": 59.40298507462686, "grad_norm": 0.17031821608543396, "learning_rate": 1.095264747492391e-05, "loss": 0.0237, "step": 15920 }, { "epoch": 59.440298507462686, "grad_norm": 0.13352765142917633, "learning_rate": 1.090106324173426e-05, "loss": 0.0212, "step": 15930 }, { "epoch": 59.47761194029851, "grad_norm": 0.14547540247440338, "learning_rate": 1.0849585903544706e-05, "loss": 0.025, "step": 15940 }, { "epoch": 59.514925373134325, "grad_norm": 0.12380369752645493, "learning_rate": 1.0798215601092354e-05, "loss": 0.0176, "step": 15950 }, { "epoch": 59.55223880597015, "grad_norm": 0.1445429027080536, "learning_rate": 1.0746952474821614e-05, "loss": 0.0188, "step": 15960 }, { "epoch": 59.58955223880597, "grad_norm": 0.15998475253582, "learning_rate": 1.069579666488395e-05, "loss": 0.0234, "step": 15970 }, { "epoch": 59.62686567164179, "grad_norm": 0.12561002373695374, "learning_rate": 1.0644748311137376e-05, "loss": 0.0228, "step": 15980 }, { "epoch": 59.66417910447761, "grad_norm": 0.11739451438188553, "learning_rate": 1.059380755314613e-05, "loss": 0.0181, "step": 15990 }, { "epoch": 59.701492537313435, "grad_norm": 0.08170191198587418, "learning_rate": 1.0542974530180327e-05, "loss": 0.0173, "step": 16000 }, { "epoch": 59.73880597014925, "grad_norm": 0.15861664712429047, "learning_rate": 1.049224938121548e-05, "loss": 0.02, "step": 16010 }, { "epoch": 59.776119402985074, "grad_norm": 0.09696653485298157, "learning_rate": 1.0441632244932237e-05, "loss": 0.0179, "step": 16020 }, { "epoch": 59.8134328358209, "grad_norm": 0.1001015454530716, "learning_rate": 1.0391123259715906e-05, "loss": 0.014, "step": 16030 }, { "epoch": 59.850746268656714, "grad_norm": 0.15500189363956451, "learning_rate": 1.0340722563656107e-05, "loss": 0.0199, "step": 16040 }, { "epoch": 59.88805970149254, "grad_norm": 0.12021978199481964, "learning_rate": 1.0290430294546449e-05, "loss": 0.018, "step": 16050 }, { "epoch": 59.92537313432836, "grad_norm": 0.11726555973291397, "learning_rate": 1.0240246589884044e-05, "loss": 0.0194, "step": 16060 }, { "epoch": 59.96268656716418, "grad_norm": 0.06726031750440598, "learning_rate": 1.0190171586869258e-05, "loss": 0.0202, "step": 16070 }, { "epoch": 60.0, "grad_norm": 0.16693656146526337, "learning_rate": 1.0140205422405214e-05, "loss": 0.0179, "step": 16080 }, { "epoch": 60.03731343283582, "grad_norm": 0.12165366858243942, "learning_rate": 1.009034823309749e-05, "loss": 0.0213, "step": 16090 }, { "epoch": 60.07462686567164, "grad_norm": 0.16513405740261078, "learning_rate": 1.0040600155253765e-05, "loss": 0.0216, "step": 16100 }, { "epoch": 60.11194029850746, "grad_norm": 0.14521124958992004, "learning_rate": 9.990961324883358e-06, "loss": 0.0198, "step": 16110 }, { "epoch": 60.149253731343286, "grad_norm": 0.07179571688175201, "learning_rate": 9.941431877696955e-06, "loss": 0.019, "step": 16120 }, { "epoch": 60.1865671641791, "grad_norm": 0.1296706646680832, "learning_rate": 9.892011949106172e-06, "loss": 0.0169, "step": 16130 }, { "epoch": 60.223880597014926, "grad_norm": 0.10243833810091019, "learning_rate": 9.842701674223187e-06, "loss": 0.0191, "step": 16140 }, { "epoch": 60.26119402985075, "grad_norm": 0.16867268085479736, "learning_rate": 9.793501187860432e-06, "loss": 0.0213, "step": 16150 }, { "epoch": 60.298507462686565, "grad_norm": 0.06980166584253311, "learning_rate": 9.744410624530148e-06, "loss": 0.0223, "step": 16160 }, { "epoch": 60.33582089552239, "grad_norm": 0.1388089507818222, "learning_rate": 9.695430118444048e-06, "loss": 0.0237, "step": 16170 }, { "epoch": 60.37313432835821, "grad_norm": 0.12875203788280487, "learning_rate": 9.646559803512994e-06, "loss": 0.019, "step": 16180 }, { "epoch": 60.41044776119403, "grad_norm": 0.08692096173763275, "learning_rate": 9.597799813346525e-06, "loss": 0.0223, "step": 16190 }, { "epoch": 60.44776119402985, "grad_norm": 0.13906419277191162, "learning_rate": 9.549150281252633e-06, "loss": 0.0231, "step": 16200 }, { "epoch": 60.485074626865675, "grad_norm": 0.15726956725120544, "learning_rate": 9.500611340237258e-06, "loss": 0.0181, "step": 16210 }, { "epoch": 60.52238805970149, "grad_norm": 0.08657832443714142, "learning_rate": 9.452183123004e-06, "loss": 0.021, "step": 16220 }, { "epoch": 60.559701492537314, "grad_norm": 0.1306258738040924, "learning_rate": 9.403865761953779e-06, "loss": 0.0162, "step": 16230 }, { "epoch": 60.59701492537313, "grad_norm": 0.08822573721408844, "learning_rate": 9.355659389184396e-06, "loss": 0.0182, "step": 16240 }, { "epoch": 60.634328358208954, "grad_norm": 0.1690046340227127, "learning_rate": 9.307564136490254e-06, "loss": 0.0208, "step": 16250 }, { "epoch": 60.67164179104478, "grad_norm": 0.09607428312301636, "learning_rate": 9.259580135361929e-06, "loss": 0.0243, "step": 16260 }, { "epoch": 60.708955223880594, "grad_norm": 0.23867279291152954, "learning_rate": 9.211707516985829e-06, "loss": 0.0183, "step": 16270 }, { "epoch": 60.74626865671642, "grad_norm": 0.18673978745937347, "learning_rate": 9.163946412243896e-06, "loss": 0.0203, "step": 16280 }, { "epoch": 60.78358208955224, "grad_norm": 0.10111331939697266, "learning_rate": 9.116296951713133e-06, "loss": 0.016, "step": 16290 }, { "epoch": 60.82089552238806, "grad_norm": 0.10633347183465958, "learning_rate": 9.068759265665384e-06, "loss": 0.0262, "step": 16300 }, { "epoch": 60.85820895522388, "grad_norm": 0.1478668451309204, "learning_rate": 9.02133348406684e-06, "loss": 0.0161, "step": 16310 }, { "epoch": 60.8955223880597, "grad_norm": 0.17908719182014465, "learning_rate": 8.974019736577777e-06, "loss": 0.0219, "step": 16320 }, { "epoch": 60.93283582089552, "grad_norm": 0.09599035233259201, "learning_rate": 8.92681815255219e-06, "loss": 0.0189, "step": 16330 }, { "epoch": 60.97014925373134, "grad_norm": 0.06607712060213089, "learning_rate": 8.879728861037384e-06, "loss": 0.0163, "step": 16340 }, { "epoch": 61.007462686567166, "grad_norm": 0.09222304075956345, "learning_rate": 8.832751990773714e-06, "loss": 0.0244, "step": 16350 }, { "epoch": 61.04477611940298, "grad_norm": 0.1777472048997879, "learning_rate": 8.785887670194138e-06, "loss": 0.0164, "step": 16360 }, { "epoch": 61.082089552238806, "grad_norm": 0.15244877338409424, "learning_rate": 8.739136027423894e-06, "loss": 0.022, "step": 16370 }, { "epoch": 61.11940298507463, "grad_norm": 0.11156947910785675, "learning_rate": 8.692497190280224e-06, "loss": 0.0182, "step": 16380 }, { "epoch": 61.156716417910445, "grad_norm": 0.14859160780906677, "learning_rate": 8.645971286271904e-06, "loss": 0.0201, "step": 16390 }, { "epoch": 61.19402985074627, "grad_norm": 0.12644913792610168, "learning_rate": 8.599558442598998e-06, "loss": 0.0213, "step": 16400 }, { "epoch": 61.23134328358209, "grad_norm": 0.11389759182929993, "learning_rate": 8.55325878615244e-06, "loss": 0.0206, "step": 16410 }, { "epoch": 61.26865671641791, "grad_norm": 0.11454262584447861, "learning_rate": 8.507072443513702e-06, "loss": 0.0163, "step": 16420 }, { "epoch": 61.30597014925373, "grad_norm": 0.07392272353172302, "learning_rate": 8.460999540954517e-06, "loss": 0.0213, "step": 16430 }, { "epoch": 61.343283582089555, "grad_norm": 0.10970254987478256, "learning_rate": 8.415040204436426e-06, "loss": 0.0173, "step": 16440 }, { "epoch": 61.38059701492537, "grad_norm": 0.13220611214637756, "learning_rate": 8.369194559610482e-06, "loss": 0.0148, "step": 16450 }, { "epoch": 61.417910447761194, "grad_norm": 0.2035907655954361, "learning_rate": 8.323462731816961e-06, "loss": 0.0173, "step": 16460 }, { "epoch": 61.45522388059702, "grad_norm": 0.14891105890274048, "learning_rate": 8.277844846084898e-06, "loss": 0.0167, "step": 16470 }, { "epoch": 61.492537313432834, "grad_norm": 0.07740630209445953, "learning_rate": 8.232341027131885e-06, "loss": 0.0203, "step": 16480 }, { "epoch": 61.52985074626866, "grad_norm": 0.13667617738246918, "learning_rate": 8.186951399363613e-06, "loss": 0.0205, "step": 16490 }, { "epoch": 61.56716417910448, "grad_norm": 0.1692771464586258, "learning_rate": 8.141676086873572e-06, "loss": 0.0222, "step": 16500 }, { "epoch": 61.6044776119403, "grad_norm": 0.14342786371707916, "learning_rate": 8.096515213442762e-06, "loss": 0.0223, "step": 16510 }, { "epoch": 61.64179104477612, "grad_norm": 0.12913767993450165, "learning_rate": 8.051468902539272e-06, "loss": 0.0168, "step": 16520 }, { "epoch": 61.67910447761194, "grad_norm": 0.1333862990140915, "learning_rate": 8.00653727731801e-06, "loss": 0.0201, "step": 16530 }, { "epoch": 61.71641791044776, "grad_norm": 0.11389653384685516, "learning_rate": 7.96172046062032e-06, "loss": 0.0202, "step": 16540 }, { "epoch": 61.75373134328358, "grad_norm": 0.1464148908853531, "learning_rate": 7.917018574973645e-06, "loss": 0.0181, "step": 16550 }, { "epoch": 61.791044776119406, "grad_norm": 0.1706206202507019, "learning_rate": 7.872431742591268e-06, "loss": 0.0198, "step": 16560 }, { "epoch": 61.82835820895522, "grad_norm": 0.14118894934654236, "learning_rate": 7.827960085371855e-06, "loss": 0.0199, "step": 16570 }, { "epoch": 61.865671641791046, "grad_norm": 0.09329701215028763, "learning_rate": 7.783603724899257e-06, "loss": 0.018, "step": 16580 }, { "epoch": 61.90298507462687, "grad_norm": 0.1430530548095703, "learning_rate": 7.739362782442021e-06, "loss": 0.0204, "step": 16590 }, { "epoch": 61.940298507462686, "grad_norm": 0.09568456560373306, "learning_rate": 7.695237378953223e-06, "loss": 0.0195, "step": 16600 }, { "epoch": 61.97761194029851, "grad_norm": 0.17139685153961182, "learning_rate": 7.651227635070041e-06, "loss": 0.0166, "step": 16610 }, { "epoch": 62.014925373134325, "grad_norm": 0.13210199773311615, "learning_rate": 7.607333671113409e-06, "loss": 0.0185, "step": 16620 }, { "epoch": 62.05223880597015, "grad_norm": 0.17845605313777924, "learning_rate": 7.56355560708778e-06, "loss": 0.0194, "step": 16630 }, { "epoch": 62.08955223880597, "grad_norm": 0.14077799022197723, "learning_rate": 7.519893562680663e-06, "loss": 0.0221, "step": 16640 }, { "epoch": 62.12686567164179, "grad_norm": 0.1356247216463089, "learning_rate": 7.476347657262456e-06, "loss": 0.0174, "step": 16650 }, { "epoch": 62.16417910447761, "grad_norm": 0.1071634590625763, "learning_rate": 7.432918009885997e-06, "loss": 0.0193, "step": 16660 }, { "epoch": 62.201492537313435, "grad_norm": 0.14937230944633484, "learning_rate": 7.389604739286271e-06, "loss": 0.0222, "step": 16670 }, { "epoch": 62.23880597014925, "grad_norm": 0.1811068058013916, "learning_rate": 7.3464079638801365e-06, "loss": 0.0182, "step": 16680 }, { "epoch": 62.276119402985074, "grad_norm": 0.11141037195920944, "learning_rate": 7.30332780176588e-06, "loss": 0.0176, "step": 16690 }, { "epoch": 62.3134328358209, "grad_norm": 0.13714560866355896, "learning_rate": 7.260364370723044e-06, "loss": 0.017, "step": 16700 }, { "epoch": 62.350746268656714, "grad_norm": 0.11435215920209885, "learning_rate": 7.217517788212025e-06, "loss": 0.0197, "step": 16710 }, { "epoch": 62.38805970149254, "grad_norm": 0.10824663937091827, "learning_rate": 7.174788171373731e-06, "loss": 0.0212, "step": 16720 }, { "epoch": 62.42537313432836, "grad_norm": 0.15108585357666016, "learning_rate": 7.132175637029293e-06, "loss": 0.0149, "step": 16730 }, { "epoch": 62.46268656716418, "grad_norm": 0.11877794563770294, "learning_rate": 7.089680301679752e-06, "loss": 0.0255, "step": 16740 }, { "epoch": 62.5, "grad_norm": 0.24168604612350464, "learning_rate": 7.047302281505736e-06, "loss": 0.0169, "step": 16750 }, { "epoch": 62.53731343283582, "grad_norm": 0.11300180852413177, "learning_rate": 7.005041692367154e-06, "loss": 0.0158, "step": 16760 }, { "epoch": 62.57462686567164, "grad_norm": 0.10497759282588959, "learning_rate": 6.962898649802823e-06, "loss": 0.0167, "step": 16770 }, { "epoch": 62.61194029850746, "grad_norm": 0.13462357223033905, "learning_rate": 6.92087326903022e-06, "loss": 0.0224, "step": 16780 }, { "epoch": 62.649253731343286, "grad_norm": 0.15835444629192352, "learning_rate": 6.878965664945108e-06, "loss": 0.0166, "step": 16790 }, { "epoch": 62.6865671641791, "grad_norm": 0.1387423574924469, "learning_rate": 6.837175952121306e-06, "loss": 0.0193, "step": 16800 }, { "epoch": 62.723880597014926, "grad_norm": 0.12432363629341125, "learning_rate": 6.795504244810285e-06, "loss": 0.0176, "step": 16810 }, { "epoch": 62.76119402985075, "grad_norm": 0.08233992010354996, "learning_rate": 6.753950656940905e-06, "loss": 0.022, "step": 16820 }, { "epoch": 62.798507462686565, "grad_norm": 0.09147649258375168, "learning_rate": 6.712515302119077e-06, "loss": 0.0215, "step": 16830 }, { "epoch": 62.83582089552239, "grad_norm": 0.1478629857301712, "learning_rate": 6.671198293627479e-06, "loss": 0.0158, "step": 16840 }, { "epoch": 62.87313432835821, "grad_norm": 0.18379691243171692, "learning_rate": 6.629999744425236e-06, "loss": 0.023, "step": 16850 }, { "epoch": 62.91044776119403, "grad_norm": 0.10927613824605942, "learning_rate": 6.588919767147639e-06, "loss": 0.0191, "step": 16860 }, { "epoch": 62.94776119402985, "grad_norm": 0.1020372211933136, "learning_rate": 6.5479584741057255e-06, "loss": 0.0198, "step": 16870 }, { "epoch": 62.985074626865675, "grad_norm": 0.14007586240768433, "learning_rate": 6.5071159772861436e-06, "loss": 0.0149, "step": 16880 }, { "epoch": 63.02238805970149, "grad_norm": 0.14693738520145416, "learning_rate": 6.466392388350695e-06, "loss": 0.0213, "step": 16890 }, { "epoch": 63.059701492537314, "grad_norm": 0.10830642282962799, "learning_rate": 6.425787818636131e-06, "loss": 0.0217, "step": 16900 }, { "epoch": 63.09701492537314, "grad_norm": 0.1391148567199707, "learning_rate": 6.385302379153818e-06, "loss": 0.0234, "step": 16910 }, { "epoch": 63.134328358208954, "grad_norm": 0.17605721950531006, "learning_rate": 6.344936180589351e-06, "loss": 0.0204, "step": 16920 }, { "epoch": 63.17164179104478, "grad_norm": 0.14744633436203003, "learning_rate": 6.304689333302416e-06, "loss": 0.0154, "step": 16930 }, { "epoch": 63.208955223880594, "grad_norm": 0.11913343518972397, "learning_rate": 6.264561947326331e-06, "loss": 0.0178, "step": 16940 }, { "epoch": 63.24626865671642, "grad_norm": 0.07904637604951859, "learning_rate": 6.22455413236786e-06, "loss": 0.0196, "step": 16950 }, { "epoch": 63.28358208955224, "grad_norm": 0.10337790101766586, "learning_rate": 6.184665997806832e-06, "loss": 0.0184, "step": 16960 }, { "epoch": 63.32089552238806, "grad_norm": 0.16016288101673126, "learning_rate": 6.144897652695864e-06, "loss": 0.018, "step": 16970 }, { "epoch": 63.35820895522388, "grad_norm": 0.11716923117637634, "learning_rate": 6.1052492057601275e-06, "loss": 0.0262, "step": 16980 }, { "epoch": 63.3955223880597, "grad_norm": 0.1925889104604721, "learning_rate": 6.0657207653969315e-06, "loss": 0.0173, "step": 16990 }, { "epoch": 63.43283582089552, "grad_norm": 0.157193124294281, "learning_rate": 6.026312439675552e-06, "loss": 0.019, "step": 17000 }, { "epoch": 63.47014925373134, "grad_norm": 0.1425442099571228, "learning_rate": 5.9870243363368275e-06, "loss": 0.0217, "step": 17010 }, { "epoch": 63.507462686567166, "grad_norm": 0.1629457324743271, "learning_rate": 5.947856562792925e-06, "loss": 0.024, "step": 17020 }, { "epoch": 63.54477611940298, "grad_norm": 0.11014243960380554, "learning_rate": 5.908809226127054e-06, "loss": 0.0191, "step": 17030 }, { "epoch": 63.582089552238806, "grad_norm": 0.15194103121757507, "learning_rate": 5.869882433093155e-06, "loss": 0.0198, "step": 17040 }, { "epoch": 63.61940298507463, "grad_norm": 0.1056244969367981, "learning_rate": 5.831076290115573e-06, "loss": 0.0153, "step": 17050 }, { "epoch": 63.656716417910445, "grad_norm": 0.08925970643758774, "learning_rate": 5.79239090328883e-06, "loss": 0.02, "step": 17060 }, { "epoch": 63.69402985074627, "grad_norm": 0.10884346067905426, "learning_rate": 5.753826378377286e-06, "loss": 0.0181, "step": 17070 }, { "epoch": 63.73134328358209, "grad_norm": 0.06512469053268433, "learning_rate": 5.715382820814885e-06, "loss": 0.0161, "step": 17080 }, { "epoch": 63.76865671641791, "grad_norm": 0.1681651920080185, "learning_rate": 5.67706033570487e-06, "loss": 0.0267, "step": 17090 }, { "epoch": 63.80597014925373, "grad_norm": 0.13018307089805603, "learning_rate": 5.6388590278194096e-06, "loss": 0.0177, "step": 17100 }, { "epoch": 63.843283582089555, "grad_norm": 0.13269394636154175, "learning_rate": 5.600779001599455e-06, "loss": 0.021, "step": 17110 }, { "epoch": 63.88059701492537, "grad_norm": 0.11373662948608398, "learning_rate": 5.562820361154314e-06, "loss": 0.0174, "step": 17120 }, { "epoch": 63.917910447761194, "grad_norm": 0.14471089839935303, "learning_rate": 5.524983210261481e-06, "loss": 0.0232, "step": 17130 }, { "epoch": 63.95522388059702, "grad_norm": 0.15642648935317993, "learning_rate": 5.48726765236629e-06, "loss": 0.021, "step": 17140 }, { "epoch": 63.992537313432834, "grad_norm": 0.14102338254451752, "learning_rate": 5.449673790581611e-06, "loss": 0.0278, "step": 17150 }, { "epoch": 64.02985074626865, "grad_norm": 0.12686364352703094, "learning_rate": 5.412201727687644e-06, "loss": 0.0188, "step": 17160 }, { "epoch": 64.06716417910448, "grad_norm": 0.11900068074464798, "learning_rate": 5.374851566131561e-06, "loss": 0.019, "step": 17170 }, { "epoch": 64.1044776119403, "grad_norm": 0.07747360318899155, "learning_rate": 5.337623408027293e-06, "loss": 0.0167, "step": 17180 }, { "epoch": 64.14179104477611, "grad_norm": 0.11902616173028946, "learning_rate": 5.300517355155215e-06, "loss": 0.0172, "step": 17190 }, { "epoch": 64.17910447761194, "grad_norm": 0.11516989022493362, "learning_rate": 5.263533508961827e-06, "loss": 0.0201, "step": 17200 }, { "epoch": 64.21641791044776, "grad_norm": 0.16131870448589325, "learning_rate": 5.226671970559577e-06, "loss": 0.0177, "step": 17210 }, { "epoch": 64.25373134328358, "grad_norm": 0.11579888314008713, "learning_rate": 5.1899328407264855e-06, "loss": 0.0219, "step": 17220 }, { "epoch": 64.2910447761194, "grad_norm": 0.11769811809062958, "learning_rate": 5.153316219905946e-06, "loss": 0.0189, "step": 17230 }, { "epoch": 64.32835820895522, "grad_norm": 0.15976080298423767, "learning_rate": 5.116822208206396e-06, "loss": 0.019, "step": 17240 }, { "epoch": 64.36567164179104, "grad_norm": 0.056341491639614105, "learning_rate": 5.080450905401057e-06, "loss": 0.0157, "step": 17250 }, { "epoch": 64.40298507462687, "grad_norm": 0.1326870322227478, "learning_rate": 5.044202410927706e-06, "loss": 0.0246, "step": 17260 }, { "epoch": 64.44029850746269, "grad_norm": 0.10871471464633942, "learning_rate": 5.008076823888319e-06, "loss": 0.0173, "step": 17270 }, { "epoch": 64.4776119402985, "grad_norm": 0.15686079859733582, "learning_rate": 4.972074243048897e-06, "loss": 0.0179, "step": 17280 }, { "epoch": 64.51492537313433, "grad_norm": 0.16984419524669647, "learning_rate": 4.936194766839103e-06, "loss": 0.0161, "step": 17290 }, { "epoch": 64.55223880597015, "grad_norm": 0.12612541019916534, "learning_rate": 4.900438493352055e-06, "loss": 0.0209, "step": 17300 }, { "epoch": 64.58955223880596, "grad_norm": 0.1080985888838768, "learning_rate": 4.864805520344051e-06, "loss": 0.0185, "step": 17310 }, { "epoch": 64.6268656716418, "grad_norm": 0.17851845920085907, "learning_rate": 4.829295945234258e-06, "loss": 0.0191, "step": 17320 }, { "epoch": 64.66417910447761, "grad_norm": 0.11173732578754425, "learning_rate": 4.7939098651045235e-06, "loss": 0.0209, "step": 17330 }, { "epoch": 64.70149253731343, "grad_norm": 0.10424789041280746, "learning_rate": 4.758647376699032e-06, "loss": 0.0155, "step": 17340 }, { "epoch": 64.73880597014926, "grad_norm": 0.10223556309938431, "learning_rate": 4.723508576424062e-06, "loss": 0.0243, "step": 17350 }, { "epoch": 64.77611940298507, "grad_norm": 0.13392555713653564, "learning_rate": 4.688493560347773e-06, "loss": 0.02, "step": 17360 }, { "epoch": 64.81343283582089, "grad_norm": 0.2016138881444931, "learning_rate": 4.653602424199876e-06, "loss": 0.0223, "step": 17370 }, { "epoch": 64.85074626865672, "grad_norm": 0.1273835003376007, "learning_rate": 4.618835263371396e-06, "loss": 0.0195, "step": 17380 }, { "epoch": 64.88805970149254, "grad_norm": 0.10871824622154236, "learning_rate": 4.5841921729144424e-06, "loss": 0.0151, "step": 17390 }, { "epoch": 64.92537313432835, "grad_norm": 0.12166386842727661, "learning_rate": 4.549673247541875e-06, "loss": 0.0189, "step": 17400 }, { "epoch": 64.96268656716418, "grad_norm": 0.1331520676612854, "learning_rate": 4.515278581627141e-06, "loss": 0.0136, "step": 17410 }, { "epoch": 65.0, "grad_norm": 0.11596659570932388, "learning_rate": 4.48100826920394e-06, "loss": 0.0182, "step": 17420 }, { "epoch": 65.03731343283582, "grad_norm": 0.12967020273208618, "learning_rate": 4.446862403965984e-06, "loss": 0.0199, "step": 17430 }, { "epoch": 65.07462686567165, "grad_norm": 0.1988440304994583, "learning_rate": 4.412841079266777e-06, "loss": 0.021, "step": 17440 }, { "epoch": 65.11194029850746, "grad_norm": 0.14639997482299805, "learning_rate": 4.378944388119311e-06, "loss": 0.0191, "step": 17450 }, { "epoch": 65.14925373134328, "grad_norm": 0.12798142433166504, "learning_rate": 4.3451724231958644e-06, "loss": 0.0173, "step": 17460 }, { "epoch": 65.18656716417911, "grad_norm": 0.17876780033111572, "learning_rate": 4.311525276827682e-06, "loss": 0.0161, "step": 17470 }, { "epoch": 65.22388059701493, "grad_norm": 0.12645457684993744, "learning_rate": 4.27800304100478e-06, "loss": 0.0209, "step": 17480 }, { "epoch": 65.26119402985074, "grad_norm": 0.13220877945423126, "learning_rate": 4.244605807375679e-06, "loss": 0.0193, "step": 17490 }, { "epoch": 65.29850746268657, "grad_norm": 0.1316594034433365, "learning_rate": 4.2113336672471245e-06, "loss": 0.0209, "step": 17500 }, { "epoch": 65.33582089552239, "grad_norm": 0.12087316066026688, "learning_rate": 4.178186711583904e-06, "loss": 0.018, "step": 17510 }, { "epoch": 65.3731343283582, "grad_norm": 0.15793782472610474, "learning_rate": 4.145165031008508e-06, "loss": 0.0187, "step": 17520 }, { "epoch": 65.41044776119404, "grad_norm": 0.15775424242019653, "learning_rate": 4.112268715800943e-06, "loss": 0.0183, "step": 17530 }, { "epoch": 65.44776119402985, "grad_norm": 0.12293685972690582, "learning_rate": 4.079497855898501e-06, "loss": 0.0179, "step": 17540 }, { "epoch": 65.48507462686567, "grad_norm": 0.17893794178962708, "learning_rate": 4.046852540895446e-06, "loss": 0.0191, "step": 17550 }, { "epoch": 65.5223880597015, "grad_norm": 0.12910445034503937, "learning_rate": 4.01433286004283e-06, "loss": 0.0221, "step": 17560 }, { "epoch": 65.55970149253731, "grad_norm": 0.09562735259532928, "learning_rate": 3.981938902248222e-06, "loss": 0.0206, "step": 17570 }, { "epoch": 65.59701492537313, "grad_norm": 0.14073370397090912, "learning_rate": 3.949670756075447e-06, "loss": 0.0197, "step": 17580 }, { "epoch": 65.63432835820896, "grad_norm": 0.1126888319849968, "learning_rate": 3.917528509744412e-06, "loss": 0.0206, "step": 17590 }, { "epoch": 65.67164179104478, "grad_norm": 0.1429893523454666, "learning_rate": 3.885512251130763e-06, "loss": 0.0189, "step": 17600 }, { "epoch": 65.7089552238806, "grad_norm": 0.13565829396247864, "learning_rate": 3.8536220677657495e-06, "loss": 0.016, "step": 17610 }, { "epoch": 65.74626865671642, "grad_norm": 0.12730440497398376, "learning_rate": 3.821858046835913e-06, "loss": 0.0149, "step": 17620 }, { "epoch": 65.78358208955224, "grad_norm": 0.14973577857017517, "learning_rate": 3.790220275182854e-06, "loss": 0.0174, "step": 17630 }, { "epoch": 65.82089552238806, "grad_norm": 0.1319994479417801, "learning_rate": 3.75870883930306e-06, "loss": 0.0193, "step": 17640 }, { "epoch": 65.85820895522389, "grad_norm": 0.1149325966835022, "learning_rate": 3.7273238253475785e-06, "loss": 0.0186, "step": 17650 }, { "epoch": 65.8955223880597, "grad_norm": 0.1942172795534134, "learning_rate": 3.696065319121833e-06, "loss": 0.02, "step": 17660 }, { "epoch": 65.93283582089552, "grad_norm": 0.13044431805610657, "learning_rate": 3.664933406085402e-06, "loss": 0.0203, "step": 17670 }, { "epoch": 65.97014925373135, "grad_norm": 0.16788481175899506, "learning_rate": 3.6339281713517303e-06, "loss": 0.0211, "step": 17680 }, { "epoch": 66.00746268656717, "grad_norm": 0.12901845574378967, "learning_rate": 3.60304969968796e-06, "loss": 0.0221, "step": 17690 }, { "epoch": 66.04477611940298, "grad_norm": 0.14763857424259186, "learning_rate": 3.5722980755146517e-06, "loss": 0.0176, "step": 17700 }, { "epoch": 66.08208955223881, "grad_norm": 0.16571398079395294, "learning_rate": 3.541673382905558e-06, "loss": 0.0209, "step": 17710 }, { "epoch": 66.11940298507463, "grad_norm": 0.11846010386943817, "learning_rate": 3.511175705587433e-06, "loss": 0.0232, "step": 17720 }, { "epoch": 66.15671641791045, "grad_norm": 0.153096541762352, "learning_rate": 3.4808051269397512e-06, "loss": 0.0161, "step": 17730 }, { "epoch": 66.19402985074628, "grad_norm": 0.12656240165233612, "learning_rate": 3.4505617299945336e-06, "loss": 0.0203, "step": 17740 }, { "epoch": 66.23134328358209, "grad_norm": 0.1875738501548767, "learning_rate": 3.420445597436056e-06, "loss": 0.023, "step": 17750 }, { "epoch": 66.26865671641791, "grad_norm": 0.10410977154970169, "learning_rate": 3.390456811600673e-06, "loss": 0.0206, "step": 17760 }, { "epoch": 66.30597014925372, "grad_norm": 0.1436801552772522, "learning_rate": 3.360595454476595e-06, "loss": 0.0166, "step": 17770 }, { "epoch": 66.34328358208955, "grad_norm": 0.16608238220214844, "learning_rate": 3.3308616077036115e-06, "loss": 0.0251, "step": 17780 }, { "epoch": 66.38059701492537, "grad_norm": 0.14443938434123993, "learning_rate": 3.301255352572946e-06, "loss": 0.0151, "step": 17790 }, { "epoch": 66.41791044776119, "grad_norm": 0.18584851920604706, "learning_rate": 3.271776770026963e-06, "loss": 0.0235, "step": 17800 }, { "epoch": 66.45522388059702, "grad_norm": 0.08840323239564896, "learning_rate": 3.2424259406589664e-06, "loss": 0.0147, "step": 17810 }, { "epoch": 66.49253731343283, "grad_norm": 0.10821016877889633, "learning_rate": 3.213202944713023e-06, "loss": 0.0273, "step": 17820 }, { "epoch": 66.52985074626865, "grad_norm": 0.12930136919021606, "learning_rate": 3.1841078620836683e-06, "loss": 0.0172, "step": 17830 }, { "epoch": 66.56716417910448, "grad_norm": 0.17707459628582, "learning_rate": 3.155140772315773e-06, "loss": 0.0165, "step": 17840 }, { "epoch": 66.6044776119403, "grad_norm": 0.17191192507743835, "learning_rate": 3.126301754604233e-06, "loss": 0.0177, "step": 17850 }, { "epoch": 66.64179104477611, "grad_norm": 0.19727960228919983, "learning_rate": 3.0975908877938277e-06, "loss": 0.0224, "step": 17860 }, { "epoch": 66.67910447761194, "grad_norm": 0.07564827054738998, "learning_rate": 3.0690082503789742e-06, "loss": 0.0266, "step": 17870 }, { "epoch": 66.71641791044776, "grad_norm": 0.12017014622688293, "learning_rate": 3.040553920503503e-06, "loss": 0.0186, "step": 17880 }, { "epoch": 66.75373134328358, "grad_norm": 0.13225309550762177, "learning_rate": 3.0122279759604745e-06, "loss": 0.018, "step": 17890 }, { "epoch": 66.7910447761194, "grad_norm": 0.13635587692260742, "learning_rate": 2.9840304941919415e-06, "loss": 0.0233, "step": 17900 }, { "epoch": 66.82835820895522, "grad_norm": 0.14751172065734863, "learning_rate": 2.9559615522887273e-06, "loss": 0.0159, "step": 17910 }, { "epoch": 66.86567164179104, "grad_norm": 0.1506449580192566, "learning_rate": 2.928021226990263e-06, "loss": 0.0218, "step": 17920 }, { "epoch": 66.90298507462687, "grad_norm": 0.10639137029647827, "learning_rate": 2.9002095946843277e-06, "loss": 0.0176, "step": 17930 }, { "epoch": 66.94029850746269, "grad_norm": 0.10849863290786743, "learning_rate": 2.8725267314068495e-06, "loss": 0.024, "step": 17940 }, { "epoch": 66.9776119402985, "grad_norm": 0.09733816981315613, "learning_rate": 2.844972712841737e-06, "loss": 0.0182, "step": 17950 }, { "epoch": 67.01492537313433, "grad_norm": 0.1123482808470726, "learning_rate": 2.817547614320615e-06, "loss": 0.0198, "step": 17960 }, { "epoch": 67.05223880597015, "grad_norm": 0.15768779814243317, "learning_rate": 2.790251510822661e-06, "loss": 0.0188, "step": 17970 }, { "epoch": 67.08955223880596, "grad_norm": 0.1178063452243805, "learning_rate": 2.7630844769743757e-06, "loss": 0.0214, "step": 17980 }, { "epoch": 67.1268656716418, "grad_norm": 0.12463657557964325, "learning_rate": 2.73604658704939e-06, "loss": 0.0228, "step": 17990 }, { "epoch": 67.16417910447761, "grad_norm": 0.14056861400604248, "learning_rate": 2.7091379149682685e-06, "loss": 0.0184, "step": 18000 }, { "epoch": 67.20149253731343, "grad_norm": 0.12165476381778717, "learning_rate": 2.682358534298285e-06, "loss": 0.0208, "step": 18010 }, { "epoch": 67.23880597014926, "grad_norm": 0.17824946343898773, "learning_rate": 2.6557085182532582e-06, "loss": 0.0215, "step": 18020 }, { "epoch": 67.27611940298507, "grad_norm": 0.11440717428922653, "learning_rate": 2.6291879396933004e-06, "loss": 0.0234, "step": 18030 }, { "epoch": 67.31343283582089, "grad_norm": 0.07570961862802505, "learning_rate": 2.602796871124663e-06, "loss": 0.0173, "step": 18040 }, { "epoch": 67.35074626865672, "grad_norm": 0.10968591272830963, "learning_rate": 2.57653538469953e-06, "loss": 0.0202, "step": 18050 }, { "epoch": 67.38805970149254, "grad_norm": 0.10639911144971848, "learning_rate": 2.5504035522157854e-06, "loss": 0.0214, "step": 18060 }, { "epoch": 67.42537313432835, "grad_norm": 0.1440175473690033, "learning_rate": 2.5244014451168863e-06, "loss": 0.0226, "step": 18070 }, { "epoch": 67.46268656716418, "grad_norm": 0.12005949765443802, "learning_rate": 2.4985291344915674e-06, "loss": 0.0194, "step": 18080 }, { "epoch": 67.5, "grad_norm": 0.18498671054840088, "learning_rate": 2.4727866910737583e-06, "loss": 0.0187, "step": 18090 }, { "epoch": 67.53731343283582, "grad_norm": 0.062043447047472, "learning_rate": 2.4471741852423237e-06, "loss": 0.0202, "step": 18100 }, { "epoch": 67.57462686567165, "grad_norm": 0.06655175983905792, "learning_rate": 2.421691687020855e-06, "loss": 0.0182, "step": 18110 }, { "epoch": 67.61194029850746, "grad_norm": 0.11743638664484024, "learning_rate": 2.3963392660775575e-06, "loss": 0.0197, "step": 18120 }, { "epoch": 67.64925373134328, "grad_norm": 0.09882067888975143, "learning_rate": 2.371116991724953e-06, "loss": 0.0184, "step": 18130 }, { "epoch": 67.68656716417911, "grad_norm": 0.09239890426397324, "learning_rate": 2.3460249329197824e-06, "loss": 0.0171, "step": 18140 }, { "epoch": 67.72388059701493, "grad_norm": 0.12937159836292267, "learning_rate": 2.321063158262793e-06, "loss": 0.0173, "step": 18150 }, { "epoch": 67.76119402985074, "grad_norm": 0.08913063257932663, "learning_rate": 2.296231735998511e-06, "loss": 0.0229, "step": 18160 }, { "epoch": 67.79850746268657, "grad_norm": 0.14119577407836914, "learning_rate": 2.271530734015104e-06, "loss": 0.016, "step": 18170 }, { "epoch": 67.83582089552239, "grad_norm": 0.12339820712804794, "learning_rate": 2.2469602198441573e-06, "loss": 0.0189, "step": 18180 }, { "epoch": 67.8731343283582, "grad_norm": 0.10009341686964035, "learning_rate": 2.222520260660521e-06, "loss": 0.0154, "step": 18190 }, { "epoch": 67.91044776119404, "grad_norm": 0.17289131879806519, "learning_rate": 2.1982109232821178e-06, "loss": 0.0227, "step": 18200 }, { "epoch": 67.94776119402985, "grad_norm": 0.1440674513578415, "learning_rate": 2.174032274169746e-06, "loss": 0.0185, "step": 18210 }, { "epoch": 67.98507462686567, "grad_norm": 0.23111875355243683, "learning_rate": 2.149984379426906e-06, "loss": 0.0151, "step": 18220 }, { "epoch": 68.0223880597015, "grad_norm": 0.12018198519945145, "learning_rate": 2.1260673047996227e-06, "loss": 0.0169, "step": 18230 }, { "epoch": 68.05970149253731, "grad_norm": 0.10648278892040253, "learning_rate": 2.102281115676258e-06, "loss": 0.0173, "step": 18240 }, { "epoch": 68.09701492537313, "grad_norm": 0.08541513979434967, "learning_rate": 2.0786258770873647e-06, "loss": 0.0207, "step": 18250 }, { "epoch": 68.13432835820896, "grad_norm": 0.19868263602256775, "learning_rate": 2.0551016537054493e-06, "loss": 0.0198, "step": 18260 }, { "epoch": 68.17164179104478, "grad_norm": 0.12067861109972, "learning_rate": 2.0317085098448372e-06, "loss": 0.0204, "step": 18270 }, { "epoch": 68.2089552238806, "grad_norm": 0.1425771415233612, "learning_rate": 2.008446509461498e-06, "loss": 0.0184, "step": 18280 }, { "epoch": 68.24626865671642, "grad_norm": 0.14146816730499268, "learning_rate": 1.985315716152847e-06, "loss": 0.0172, "step": 18290 }, { "epoch": 68.28358208955224, "grad_norm": 0.1354537010192871, "learning_rate": 1.962316193157593e-06, "loss": 0.0161, "step": 18300 }, { "epoch": 68.32089552238806, "grad_norm": 0.16437850892543793, "learning_rate": 1.939448003355554e-06, "loss": 0.0232, "step": 18310 }, { "epoch": 68.35820895522389, "grad_norm": 0.13494864106178284, "learning_rate": 1.91671120926748e-06, "loss": 0.0181, "step": 18320 }, { "epoch": 68.3955223880597, "grad_norm": 0.12695229053497314, "learning_rate": 1.8941058730549132e-06, "loss": 0.0184, "step": 18330 }, { "epoch": 68.43283582089552, "grad_norm": 0.13497011363506317, "learning_rate": 1.8716320565199618e-06, "loss": 0.014, "step": 18340 }, { "epoch": 68.47014925373135, "grad_norm": 0.11022226512432098, "learning_rate": 1.849289821105199e-06, "loss": 0.0209, "step": 18350 }, { "epoch": 68.50746268656717, "grad_norm": 0.13277076184749603, "learning_rate": 1.8270792278934302e-06, "loss": 0.0212, "step": 18360 }, { "epoch": 68.54477611940298, "grad_norm": 0.1605406254529953, "learning_rate": 1.8050003376075707e-06, "loss": 0.014, "step": 18370 }, { "epoch": 68.58208955223881, "grad_norm": 0.13187871873378754, "learning_rate": 1.7830532106104747e-06, "loss": 0.0221, "step": 18380 }, { "epoch": 68.61940298507463, "grad_norm": 0.15948276221752167, "learning_rate": 1.7612379069047335e-06, "loss": 0.0211, "step": 18390 }, { "epoch": 68.65671641791045, "grad_norm": 0.11595791578292847, "learning_rate": 1.7395544861325718e-06, "loss": 0.0187, "step": 18400 }, { "epoch": 68.69402985074628, "grad_norm": 0.1577587127685547, "learning_rate": 1.7180030075756136e-06, "loss": 0.0211, "step": 18410 }, { "epoch": 68.73134328358209, "grad_norm": 0.12189193814992905, "learning_rate": 1.696583530154794e-06, "loss": 0.0222, "step": 18420 }, { "epoch": 68.76865671641791, "grad_norm": 0.1288890540599823, "learning_rate": 1.6752961124301415e-06, "loss": 0.0194, "step": 18430 }, { "epoch": 68.80597014925372, "grad_norm": 0.1630735546350479, "learning_rate": 1.6541408126006463e-06, "loss": 0.0204, "step": 18440 }, { "epoch": 68.84328358208955, "grad_norm": 0.20119670033454895, "learning_rate": 1.6331176885040878e-06, "loss": 0.021, "step": 18450 }, { "epoch": 68.88059701492537, "grad_norm": 0.10522568225860596, "learning_rate": 1.6122267976168781e-06, "loss": 0.0202, "step": 18460 }, { "epoch": 68.91791044776119, "grad_norm": 0.16778993606567383, "learning_rate": 1.5914681970539192e-06, "loss": 0.0177, "step": 18470 }, { "epoch": 68.95522388059702, "grad_norm": 0.14104974269866943, "learning_rate": 1.5708419435684462e-06, "loss": 0.0183, "step": 18480 }, { "epoch": 68.99253731343283, "grad_norm": 0.07799818366765976, "learning_rate": 1.550348093551829e-06, "loss": 0.0153, "step": 18490 }, { "epoch": 69.02985074626865, "grad_norm": 0.1240101307630539, "learning_rate": 1.5299867030334814e-06, "loss": 0.0165, "step": 18500 }, { "epoch": 69.06716417910448, "grad_norm": 0.1403922438621521, "learning_rate": 1.5097578276806633e-06, "loss": 0.0179, "step": 18510 }, { "epoch": 69.1044776119403, "grad_norm": 0.10646823793649673, "learning_rate": 1.4896615227983468e-06, "loss": 0.0172, "step": 18520 }, { "epoch": 69.14179104477611, "grad_norm": 0.17238102853298187, "learning_rate": 1.4696978433290653e-06, "loss": 0.0221, "step": 18530 }, { "epoch": 69.17910447761194, "grad_norm": 0.09721789509057999, "learning_rate": 1.4498668438527597e-06, "loss": 0.0211, "step": 18540 }, { "epoch": 69.21641791044776, "grad_norm": 0.12056535482406616, "learning_rate": 1.4301685785866214e-06, "loss": 0.0223, "step": 18550 }, { "epoch": 69.25373134328358, "grad_norm": 0.10625797510147095, "learning_rate": 1.4106031013849496e-06, "loss": 0.0228, "step": 18560 }, { "epoch": 69.2910447761194, "grad_norm": 0.16788646578788757, "learning_rate": 1.3911704657390113e-06, "loss": 0.0224, "step": 18570 }, { "epoch": 69.32835820895522, "grad_norm": 0.14030233025550842, "learning_rate": 1.3718707247769135e-06, "loss": 0.0177, "step": 18580 }, { "epoch": 69.36567164179104, "grad_norm": 0.18253101408481598, "learning_rate": 1.3527039312633827e-06, "loss": 0.0153, "step": 18590 }, { "epoch": 69.40298507462687, "grad_norm": 0.09068179875612259, "learning_rate": 1.333670137599713e-06, "loss": 0.0181, "step": 18600 }, { "epoch": 69.44029850746269, "grad_norm": 0.13773348927497864, "learning_rate": 1.3147693958235618e-06, "loss": 0.0212, "step": 18610 }, { "epoch": 69.4776119402985, "grad_norm": 0.1236392930150032, "learning_rate": 1.2960017576088446e-06, "loss": 0.0195, "step": 18620 }, { "epoch": 69.51492537313433, "grad_norm": 0.1716703474521637, "learning_rate": 1.2773672742655784e-06, "loss": 0.0198, "step": 18630 }, { "epoch": 69.55223880597015, "grad_norm": 0.1496538519859314, "learning_rate": 1.2588659967397e-06, "loss": 0.0172, "step": 18640 }, { "epoch": 69.58955223880596, "grad_norm": 0.12191520631313324, "learning_rate": 1.2404979756130142e-06, "loss": 0.0167, "step": 18650 }, { "epoch": 69.6268656716418, "grad_norm": 0.16896167397499084, "learning_rate": 1.222263261102985e-06, "loss": 0.0193, "step": 18660 }, { "epoch": 69.66417910447761, "grad_norm": 0.1616845428943634, "learning_rate": 1.2041619030626284e-06, "loss": 0.0163, "step": 18670 }, { "epoch": 69.70149253731343, "grad_norm": 0.16555984318256378, "learning_rate": 1.1861939509803687e-06, "loss": 0.0229, "step": 18680 }, { "epoch": 69.73880597014926, "grad_norm": 0.16760404407978058, "learning_rate": 1.1683594539798893e-06, "loss": 0.0273, "step": 18690 }, { "epoch": 69.77611940298507, "grad_norm": 0.14320339262485504, "learning_rate": 1.1506584608200367e-06, "loss": 0.022, "step": 18700 }, { "epoch": 69.81343283582089, "grad_norm": 0.08211534470319748, "learning_rate": 1.1330910198946442e-06, "loss": 0.018, "step": 18710 }, { "epoch": 69.85074626865672, "grad_norm": 0.12172215431928635, "learning_rate": 1.1156571792324211e-06, "loss": 0.0242, "step": 18720 }, { "epoch": 69.88805970149254, "grad_norm": 0.20841795206069946, "learning_rate": 1.0983569864968346e-06, "loss": 0.0172, "step": 18730 }, { "epoch": 69.92537313432835, "grad_norm": 0.09595713019371033, "learning_rate": 1.0811904889859336e-06, "loss": 0.0182, "step": 18740 }, { "epoch": 69.96268656716418, "grad_norm": 0.11765482276678085, "learning_rate": 1.064157733632276e-06, "loss": 0.0178, "step": 18750 }, { "epoch": 70.0, "grad_norm": 0.07378559559583664, "learning_rate": 1.0472587670027678e-06, "loss": 0.0168, "step": 18760 }, { "epoch": 70.03731343283582, "grad_norm": 0.13242511451244354, "learning_rate": 1.030493635298535e-06, "loss": 0.0162, "step": 18770 }, { "epoch": 70.07462686567165, "grad_norm": 0.0983816385269165, "learning_rate": 1.0138623843548078e-06, "loss": 0.0173, "step": 18780 }, { "epoch": 70.11194029850746, "grad_norm": 0.12736700475215912, "learning_rate": 9.97365059640787e-07, "loss": 0.0177, "step": 18790 }, { "epoch": 70.14925373134328, "grad_norm": 0.1017545685172081, "learning_rate": 9.810017062595322e-07, "loss": 0.0184, "step": 18800 }, { "epoch": 70.18656716417911, "grad_norm": 0.15892785787582397, "learning_rate": 9.647723689478305e-07, "loss": 0.025, "step": 18810 }, { "epoch": 70.22388059701493, "grad_norm": 0.1401316076517105, "learning_rate": 9.486770920760668e-07, "loss": 0.0176, "step": 18820 }, { "epoch": 70.26119402985074, "grad_norm": 0.10679785162210464, "learning_rate": 9.327159196481138e-07, "loss": 0.0184, "step": 18830 }, { "epoch": 70.29850746268657, "grad_norm": 0.13935355842113495, "learning_rate": 9.168888953011989e-07, "loss": 0.0205, "step": 18840 }, { "epoch": 70.33582089552239, "grad_norm": 0.08175674825906754, "learning_rate": 9.011960623058202e-07, "loss": 0.0178, "step": 18850 }, { "epoch": 70.3731343283582, "grad_norm": 0.12211751937866211, "learning_rate": 8.856374635655695e-07, "loss": 0.0172, "step": 18860 }, { "epoch": 70.41044776119404, "grad_norm": 0.11755157262086868, "learning_rate": 8.702131416170656e-07, "loss": 0.0175, "step": 18870 }, { "epoch": 70.44776119402985, "grad_norm": 0.1537550687789917, "learning_rate": 8.549231386298151e-07, "loss": 0.0163, "step": 18880 }, { "epoch": 70.48507462686567, "grad_norm": 0.0989036113023758, "learning_rate": 8.397674964061075e-07, "loss": 0.0159, "step": 18890 }, { "epoch": 70.5223880597015, "grad_norm": 0.17456677556037903, "learning_rate": 8.247462563808817e-07, "loss": 0.0217, "step": 18900 }, { "epoch": 70.55970149253731, "grad_norm": 0.13658224046230316, "learning_rate": 8.098594596216424e-07, "loss": 0.0174, "step": 18910 }, { "epoch": 70.59701492537313, "grad_norm": 0.13732556998729706, "learning_rate": 7.951071468283167e-07, "loss": 0.0199, "step": 18920 }, { "epoch": 70.63432835820896, "grad_norm": 0.17400671541690826, "learning_rate": 7.804893583331696e-07, "loss": 0.0201, "step": 18930 }, { "epoch": 70.67164179104478, "grad_norm": 0.14563308656215668, "learning_rate": 7.66006134100672e-07, "loss": 0.019, "step": 18940 }, { "epoch": 70.7089552238806, "grad_norm": 0.11749936640262604, "learning_rate": 7.516575137274162e-07, "loss": 0.0191, "step": 18950 }, { "epoch": 70.74626865671642, "grad_norm": 0.09877680987119675, "learning_rate": 7.374435364419674e-07, "loss": 0.0162, "step": 18960 }, { "epoch": 70.78358208955224, "grad_norm": 0.12091520428657532, "learning_rate": 7.233642411048014e-07, "loss": 0.015, "step": 18970 }, { "epoch": 70.82089552238806, "grad_norm": 0.20587463676929474, "learning_rate": 7.094196662081831e-07, "loss": 0.0195, "step": 18980 }, { "epoch": 70.85820895522389, "grad_norm": 0.12227480113506317, "learning_rate": 6.956098498760389e-07, "loss": 0.0161, "step": 18990 }, { "epoch": 70.8955223880597, "grad_norm": 0.0962904617190361, "learning_rate": 6.819348298638839e-07, "loss": 0.0165, "step": 19000 }, { "epoch": 70.93283582089552, "grad_norm": 0.06944536417722702, "learning_rate": 6.683946435586952e-07, "loss": 0.0145, "step": 19010 }, { "epoch": 70.97014925373135, "grad_norm": 0.1996118575334549, "learning_rate": 6.549893279788277e-07, "loss": 0.0188, "step": 19020 }, { "epoch": 71.00746268656717, "grad_norm": 0.18420849740505219, "learning_rate": 6.417189197739093e-07, "loss": 0.0168, "step": 19030 }, { "epoch": 71.04477611940298, "grad_norm": 0.12658478319644928, "learning_rate": 6.285834552247128e-07, "loss": 0.0226, "step": 19040 }, { "epoch": 71.08208955223881, "grad_norm": 0.0984082967042923, "learning_rate": 6.15582970243117e-07, "loss": 0.0185, "step": 19050 }, { "epoch": 71.11940298507463, "grad_norm": 0.16446450352668762, "learning_rate": 6.027175003719354e-07, "loss": 0.0176, "step": 19060 }, { "epoch": 71.15671641791045, "grad_norm": 0.12531772255897522, "learning_rate": 5.899870807848762e-07, "loss": 0.0158, "step": 19070 }, { "epoch": 71.19402985074628, "grad_norm": 0.15623462200164795, "learning_rate": 5.773917462864264e-07, "loss": 0.0175, "step": 19080 }, { "epoch": 71.23134328358209, "grad_norm": 0.12645038962364197, "learning_rate": 5.64931531311741e-07, "loss": 0.0198, "step": 19090 }, { "epoch": 71.26865671641791, "grad_norm": 0.15444409847259521, "learning_rate": 5.526064699265753e-07, "loss": 0.0167, "step": 19100 }, { "epoch": 71.30597014925372, "grad_norm": 0.14846886694431305, "learning_rate": 5.404165958271811e-07, "loss": 0.0184, "step": 19110 }, { "epoch": 71.34328358208955, "grad_norm": 0.11758224666118622, "learning_rate": 5.283619423401998e-07, "loss": 0.0176, "step": 19120 }, { "epoch": 71.38059701492537, "grad_norm": 0.17311839759349823, "learning_rate": 5.164425424226016e-07, "loss": 0.0165, "step": 19130 }, { "epoch": 71.41791044776119, "grad_norm": 0.14818927645683289, "learning_rate": 5.046584286615697e-07, "loss": 0.0175, "step": 19140 }, { "epoch": 71.45522388059702, "grad_norm": 0.1597837209701538, "learning_rate": 4.930096332744105e-07, "loss": 0.0182, "step": 19150 }, { "epoch": 71.49253731343283, "grad_norm": 0.11650685966014862, "learning_rate": 4.814961881085045e-07, "loss": 0.0169, "step": 19160 }, { "epoch": 71.52985074626865, "grad_norm": 0.09752462804317474, "learning_rate": 4.701181246411501e-07, "loss": 0.0195, "step": 19170 }, { "epoch": 71.56716417910448, "grad_norm": 0.11851171404123306, "learning_rate": 4.5887547397955864e-07, "loss": 0.0188, "step": 19180 }, { "epoch": 71.6044776119403, "grad_norm": 0.1688176542520523, "learning_rate": 4.4776826686069305e-07, "loss": 0.0222, "step": 19190 }, { "epoch": 71.64179104477611, "grad_norm": 0.14429128170013428, "learning_rate": 4.367965336512403e-07, "loss": 0.0169, "step": 19200 }, { "epoch": 71.67910447761194, "grad_norm": 0.10188789665699005, "learning_rate": 4.259603043475002e-07, "loss": 0.0168, "step": 19210 }, { "epoch": 71.71641791044776, "grad_norm": 0.1377180814743042, "learning_rate": 4.1525960857530243e-07, "loss": 0.019, "step": 19220 }, { "epoch": 71.75373134328358, "grad_norm": 0.16953866183757782, "learning_rate": 4.0469447558995065e-07, "loss": 0.0172, "step": 19230 }, { "epoch": 71.7910447761194, "grad_norm": 0.09385271370410919, "learning_rate": 3.9426493427611177e-07, "loss": 0.0189, "step": 19240 }, { "epoch": 71.82835820895522, "grad_norm": 0.08605004101991653, "learning_rate": 3.839710131477492e-07, "loss": 0.017, "step": 19250 }, { "epoch": 71.86567164179104, "grad_norm": 0.10793325304985046, "learning_rate": 3.738127403480507e-07, "loss": 0.0193, "step": 19260 }, { "epoch": 71.90298507462687, "grad_norm": 0.16877131164073944, "learning_rate": 3.637901436493507e-07, "loss": 0.0203, "step": 19270 }, { "epoch": 71.94029850746269, "grad_norm": 0.11603684723377228, "learning_rate": 3.5390325045304706e-07, "loss": 0.0156, "step": 19280 }, { "epoch": 71.9776119402985, "grad_norm": 0.1581297516822815, "learning_rate": 3.441520877895288e-07, "loss": 0.0198, "step": 19290 }, { "epoch": 72.01492537313433, "grad_norm": 0.19064193964004517, "learning_rate": 3.3453668231809286e-07, "loss": 0.0227, "step": 19300 }, { "epoch": 72.05223880597015, "grad_norm": 0.13616150617599487, "learning_rate": 3.250570603268943e-07, "loss": 0.0188, "step": 19310 }, { "epoch": 72.08955223880596, "grad_norm": 0.10166120529174805, "learning_rate": 3.157132477328628e-07, "loss": 0.0149, "step": 19320 }, { "epoch": 72.1268656716418, "grad_norm": 0.13065186142921448, "learning_rate": 3.0650527008162513e-07, "loss": 0.0182, "step": 19330 }, { "epoch": 72.16417910447761, "grad_norm": 0.15283487737178802, "learning_rate": 2.9743315254743833e-07, "loss": 0.0146, "step": 19340 }, { "epoch": 72.20149253731343, "grad_norm": 0.09659247100353241, "learning_rate": 2.8849691993311777e-07, "loss": 0.0185, "step": 19350 }, { "epoch": 72.23880597014926, "grad_norm": 0.09417303651571274, "learning_rate": 2.796965966699927e-07, "loss": 0.0173, "step": 19360 }, { "epoch": 72.27611940298507, "grad_norm": 0.0800069198012352, "learning_rate": 2.7103220681780615e-07, "loss": 0.0202, "step": 19370 }, { "epoch": 72.31343283582089, "grad_norm": 0.13767953217029572, "learning_rate": 2.625037740646763e-07, "loss": 0.0174, "step": 19380 }, { "epoch": 72.35074626865672, "grad_norm": 0.11557289212942123, "learning_rate": 2.5411132172700194e-07, "loss": 0.0155, "step": 19390 }, { "epoch": 72.38805970149254, "grad_norm": 0.09889765083789825, "learning_rate": 2.458548727494292e-07, "loss": 0.0186, "step": 19400 }, { "epoch": 72.42537313432835, "grad_norm": 0.22108830511569977, "learning_rate": 2.3773444970477955e-07, "loss": 0.0182, "step": 19410 }, { "epoch": 72.46268656716418, "grad_norm": 0.15238291025161743, "learning_rate": 2.2975007479397738e-07, "loss": 0.0202, "step": 19420 }, { "epoch": 72.5, "grad_norm": 0.08073991537094116, "learning_rate": 2.219017698460002e-07, "loss": 0.0211, "step": 19430 }, { "epoch": 72.53731343283582, "grad_norm": 0.09508215636014938, "learning_rate": 2.1418955631781202e-07, "loss": 0.0192, "step": 19440 }, { "epoch": 72.57462686567165, "grad_norm": 0.16223153471946716, "learning_rate": 2.0661345529430775e-07, "loss": 0.0163, "step": 19450 }, { "epoch": 72.61194029850746, "grad_norm": 0.14542946219444275, "learning_rate": 1.9917348748826335e-07, "loss": 0.0208, "step": 19460 }, { "epoch": 72.64925373134328, "grad_norm": 0.12122035771608353, "learning_rate": 1.918696732402636e-07, "loss": 0.0161, "step": 19470 }, { "epoch": 72.68656716417911, "grad_norm": 0.18518878519535065, "learning_rate": 1.847020325186577e-07, "loss": 0.0161, "step": 19480 }, { "epoch": 72.72388059701493, "grad_norm": 0.13667826354503632, "learning_rate": 1.776705849195037e-07, "loss": 0.0207, "step": 19490 }, { "epoch": 72.76119402985074, "grad_norm": 0.15153320133686066, "learning_rate": 1.7077534966650766e-07, "loss": 0.0242, "step": 19500 }, { "epoch": 72.79850746268657, "grad_norm": 0.16239145398139954, "learning_rate": 1.6401634561098444e-07, "loss": 0.0212, "step": 19510 }, { "epoch": 72.83582089552239, "grad_norm": 0.16250814497470856, "learning_rate": 1.5739359123178587e-07, "loss": 0.0168, "step": 19520 }, { "epoch": 72.8731343283582, "grad_norm": 0.12016879767179489, "learning_rate": 1.5090710463527836e-07, "loss": 0.0188, "step": 19530 }, { "epoch": 72.91044776119404, "grad_norm": 0.1420898139476776, "learning_rate": 1.4455690355525964e-07, "loss": 0.0155, "step": 19540 }, { "epoch": 72.94776119402985, "grad_norm": 0.1202482208609581, "learning_rate": 1.383430053529422e-07, "loss": 0.0197, "step": 19550 }, { "epoch": 72.98507462686567, "grad_norm": 0.12719324231147766, "learning_rate": 1.3226542701689215e-07, "loss": 0.0176, "step": 19560 }, { "epoch": 73.0223880597015, "grad_norm": 0.13211916387081146, "learning_rate": 1.2632418516296262e-07, "loss": 0.0225, "step": 19570 }, { "epoch": 73.05970149253731, "grad_norm": 0.1602044254541397, "learning_rate": 1.2051929603428825e-07, "loss": 0.0209, "step": 19580 }, { "epoch": 73.09701492537313, "grad_norm": 0.09601997584104538, "learning_rate": 1.1485077550122402e-07, "loss": 0.0214, "step": 19590 }, { "epoch": 73.13432835820896, "grad_norm": 0.09510338306427002, "learning_rate": 1.0931863906127327e-07, "loss": 0.02, "step": 19600 }, { "epoch": 73.17164179104478, "grad_norm": 0.09129198640584946, "learning_rate": 1.0392290183909304e-07, "loss": 0.0143, "step": 19610 }, { "epoch": 73.2089552238806, "grad_norm": 0.13121764361858368, "learning_rate": 9.866357858642205e-08, "loss": 0.0194, "step": 19620 }, { "epoch": 73.24626865671642, "grad_norm": 0.11988738924264908, "learning_rate": 9.354068368204739e-08, "loss": 0.0167, "step": 19630 }, { "epoch": 73.28358208955224, "grad_norm": 0.11255819350481033, "learning_rate": 8.855423113177664e-08, "loss": 0.0177, "step": 19640 }, { "epoch": 73.32089552238806, "grad_norm": 0.15283739566802979, "learning_rate": 8.37042345683714e-08, "loss": 0.0185, "step": 19650 }, { "epoch": 73.35820895522389, "grad_norm": 0.12407339364290237, "learning_rate": 7.899070725153613e-08, "loss": 0.015, "step": 19660 }, { "epoch": 73.3955223880597, "grad_norm": 0.13025252521038055, "learning_rate": 7.44136620678848e-08, "loss": 0.015, "step": 19670 }, { "epoch": 73.43283582089552, "grad_norm": 0.17516787350177765, "learning_rate": 6.997311153086883e-08, "loss": 0.0192, "step": 19680 }, { "epoch": 73.47014925373135, "grad_norm": 0.0927148312330246, "learning_rate": 6.566906778079917e-08, "loss": 0.0189, "step": 19690 }, { "epoch": 73.50746268656717, "grad_norm": 0.1357463002204895, "learning_rate": 6.150154258476315e-08, "loss": 0.0159, "step": 19700 }, { "epoch": 73.54477611940298, "grad_norm": 0.1569717973470688, "learning_rate": 5.747054733660773e-08, "loss": 0.0218, "step": 19710 }, { "epoch": 73.58208955223881, "grad_norm": 0.1201220154762268, "learning_rate": 5.3576093056922906e-08, "loss": 0.0202, "step": 19720 }, { "epoch": 73.61940298507463, "grad_norm": 0.1645561009645462, "learning_rate": 4.981819039300284e-08, "loss": 0.0179, "step": 19730 }, { "epoch": 73.65671641791045, "grad_norm": 0.09759602695703506, "learning_rate": 4.619684961881254e-08, "loss": 0.0183, "step": 19740 }, { "epoch": 73.69402985074628, "grad_norm": 0.13174809515476227, "learning_rate": 4.2712080634949024e-08, "loss": 0.0189, "step": 19750 }, { "epoch": 73.73134328358209, "grad_norm": 0.12171626836061478, "learning_rate": 3.936389296864129e-08, "loss": 0.0194, "step": 19760 }, { "epoch": 73.76865671641791, "grad_norm": 0.08692517131567001, "learning_rate": 3.615229577371149e-08, "loss": 0.0181, "step": 19770 }, { "epoch": 73.80597014925372, "grad_norm": 0.12346026301383972, "learning_rate": 3.3077297830541584e-08, "loss": 0.0196, "step": 19780 }, { "epoch": 73.84328358208955, "grad_norm": 0.09682221710681915, "learning_rate": 3.01389075460512e-08, "loss": 0.0177, "step": 19790 }, { "epoch": 73.88059701492537, "grad_norm": 0.09335530549287796, "learning_rate": 2.7337132953697554e-08, "loss": 0.0201, "step": 19800 }, { "epoch": 73.91791044776119, "grad_norm": 0.1098819151520729, "learning_rate": 2.467198171342e-08, "loss": 0.0185, "step": 19810 }, { "epoch": 73.95522388059702, "grad_norm": 0.15395502746105194, "learning_rate": 2.214346111164556e-08, "loss": 0.0198, "step": 19820 }, { "epoch": 73.99253731343283, "grad_norm": 0.10108119994401932, "learning_rate": 1.9751578061244504e-08, "loss": 0.0145, "step": 19830 }, { "epoch": 74.02985074626865, "grad_norm": 0.1167285218834877, "learning_rate": 1.749633910153592e-08, "loss": 0.0194, "step": 19840 }, { "epoch": 74.06716417910448, "grad_norm": 0.1497061550617218, "learning_rate": 1.5377750398265502e-08, "loss": 0.0205, "step": 19850 }, { "epoch": 74.1044776119403, "grad_norm": 0.11580044031143188, "learning_rate": 1.3395817743561134e-08, "loss": 0.0238, "step": 19860 }, { "epoch": 74.14179104477611, "grad_norm": 0.1373019814491272, "learning_rate": 1.1550546555960662e-08, "loss": 0.0216, "step": 19870 }, { "epoch": 74.17910447761194, "grad_norm": 0.09255756437778473, "learning_rate": 9.841941880361916e-09, "loss": 0.0206, "step": 19880 }, { "epoch": 74.21641791044776, "grad_norm": 0.12703175842761993, "learning_rate": 8.270008388022721e-09, "loss": 0.0194, "step": 19890 }, { "epoch": 74.25373134328358, "grad_norm": 0.13536234200000763, "learning_rate": 6.834750376549792e-09, "loss": 0.0177, "step": 19900 }, { "epoch": 74.2910447761194, "grad_norm": 0.09831994771957397, "learning_rate": 5.536171769887632e-09, "loss": 0.0168, "step": 19910 }, { "epoch": 74.32835820895522, "grad_norm": 0.15210555493831635, "learning_rate": 4.3742761183018784e-09, "loss": 0.0184, "step": 19920 }, { "epoch": 74.36567164179104, "grad_norm": 0.08395024389028549, "learning_rate": 3.349066598362649e-09, "loss": 0.0177, "step": 19930 }, { "epoch": 74.40298507462687, "grad_norm": 0.08620321750640869, "learning_rate": 2.4605460129556445e-09, "loss": 0.0175, "step": 19940 }, { "epoch": 74.44029850746269, "grad_norm": 0.13685142993927002, "learning_rate": 1.7087167912710478e-09, "loss": 0.015, "step": 19950 }, { "epoch": 74.4776119402985, "grad_norm": 0.15874415636062622, "learning_rate": 1.0935809887702154e-09, "loss": 0.0189, "step": 19960 }, { "epoch": 74.51492537313433, "grad_norm": 0.1138947606086731, "learning_rate": 6.151402872134337e-10, "loss": 0.0208, "step": 19970 }, { "epoch": 74.55223880597015, "grad_norm": 0.11884845048189163, "learning_rate": 2.7339599464326627e-10, "loss": 0.0202, "step": 19980 }, { "epoch": 74.58955223880596, "grad_norm": 0.07228806614875793, "learning_rate": 6.834904537900144e-11, "loss": 0.0167, "step": 19990 }, { "epoch": 74.6268656716418, "grad_norm": 0.1247607171535492, "learning_rate": 0.0, "loss": 0.0159, "step": 20000 } ], "logging_steps": 10, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 75, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.503433727678615e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }