{ "best_global_step": 626, "best_metric": 4.402504920959473, "best_model_checkpoint": "/home/deployer/laion/Orpheus-3B-Continued-2E-V4-WithGen/checkpoint-626", "epoch": 1.224579518412455, "eval_steps": 313, "global_step": 31613, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020166982616060984, "grad_norm": 0.2099609375, "learning_rate": 1.8e-05, "loss": 4.9769, "step": 10 }, { "epoch": 0.0004033396523212197, "grad_norm": 0.1552734375, "learning_rate": 3.8e-05, "loss": 4.8382, "step": 20 }, { "epoch": 0.0006050094784818296, "grad_norm": 0.07666015625, "learning_rate": 5.8e-05, "loss": 4.6796, "step": 30 }, { "epoch": 0.0008066793046424393, "grad_norm": 0.06787109375, "learning_rate": 7.800000000000001e-05, "loss": 4.6557, "step": 40 }, { "epoch": 0.0010083491308030493, "grad_norm": 0.058837890625, "learning_rate": 9.8e-05, "loss": 4.6592, "step": 50 }, { "epoch": 0.0012100189569636591, "grad_norm": 0.06982421875, "learning_rate": 0.000118, "loss": 4.612, "step": 60 }, { "epoch": 0.001411688783124269, "grad_norm": 0.06396484375, "learning_rate": 0.000138, "loss": 4.5519, "step": 70 }, { "epoch": 0.0016133586092848787, "grad_norm": 0.06787109375, "learning_rate": 0.00015800000000000002, "loss": 4.5649, "step": 80 }, { "epoch": 0.0018150284354454887, "grad_norm": 0.07177734375, "learning_rate": 0.00017800000000000002, "loss": 4.5665, "step": 90 }, { "epoch": 0.0020166982616060987, "grad_norm": 0.0615234375, "learning_rate": 0.00019800000000000002, "loss": 4.5854, "step": 100 }, { "epoch": 0.0022183680877667085, "grad_norm": 0.06298828125, "learning_rate": 0.00019999998367737306, "loss": 4.6197, "step": 110 }, { "epoch": 0.0024200379139273183, "grad_norm": 0.0693359375, "learning_rate": 0.00019999992725348425, "loss": 4.5047, "step": 120 }, { "epoch": 0.002621707740087928, "grad_norm": 0.0732421875, "learning_rate": 0.00019999983052684242, "loss": 4.5378, "step": 130 }, { "epoch": 0.002823377566248538, "grad_norm": 0.07763671875, "learning_rate": 0.0001999996934974865, "loss": 4.5702, "step": 140 }, { "epoch": 0.0030250473924091476, "grad_norm": 0.08154296875, "learning_rate": 0.00019999951616547182, "loss": 4.5161, "step": 150 }, { "epoch": 0.0032267172185697574, "grad_norm": 0.07177734375, "learning_rate": 0.00019999929853086975, "loss": 4.5427, "step": 160 }, { "epoch": 0.0034283870447303676, "grad_norm": 0.064453125, "learning_rate": 0.00019999904059376803, "loss": 4.5205, "step": 170 }, { "epoch": 0.0036300568708909774, "grad_norm": 0.06298828125, "learning_rate": 0.00019999874235427067, "loss": 4.5382, "step": 180 }, { "epoch": 0.003831726697051587, "grad_norm": 0.0693359375, "learning_rate": 0.0001999984038124978, "loss": 4.5279, "step": 190 }, { "epoch": 0.004033396523212197, "grad_norm": 0.06640625, "learning_rate": 0.0001999980249685859, "loss": 4.5047, "step": 200 }, { "epoch": 0.004235066349372807, "grad_norm": 0.06494140625, "learning_rate": 0.00019999760582268763, "loss": 4.5041, "step": 210 }, { "epoch": 0.004436736175533417, "grad_norm": 0.06689453125, "learning_rate": 0.00019999714637497192, "loss": 4.5513, "step": 220 }, { "epoch": 0.004638406001694026, "grad_norm": 0.0654296875, "learning_rate": 0.00019999664662562398, "loss": 4.5115, "step": 230 }, { "epoch": 0.0048400758278546365, "grad_norm": 0.06396484375, "learning_rate": 0.0001999961065748452, "loss": 4.5027, "step": 240 }, { "epoch": 0.005041745654015246, "grad_norm": 0.07373046875, "learning_rate": 0.00019999552622285317, "loss": 4.448, "step": 250 }, { "epoch": 0.005243415480175856, "grad_norm": 0.06689453125, "learning_rate": 0.0001999949055698819, "loss": 4.5337, "step": 260 }, { "epoch": 0.005445085306336466, "grad_norm": 0.0654296875, "learning_rate": 0.00019999424461618145, "loss": 4.5358, "step": 270 }, { "epoch": 0.005646755132497076, "grad_norm": 0.06103515625, "learning_rate": 0.00019999354336201828, "loss": 4.5168, "step": 280 }, { "epoch": 0.005848424958657686, "grad_norm": 0.0634765625, "learning_rate": 0.0001999928018076749, "loss": 4.4455, "step": 290 }, { "epoch": 0.006050094784818295, "grad_norm": 0.06591796875, "learning_rate": 0.00019999201995345026, "loss": 4.4747, "step": 300 }, { "epoch": 0.006251764610978905, "grad_norm": 0.064453125, "learning_rate": 0.00019999119779965947, "loss": 4.4948, "step": 310 }, { "epoch": 0.006312265558827088, "eval_loss": 4.493932723999023, "eval_runtime": 8.8208, "eval_samples_per_second": 22.674, "eval_steps_per_second": 1.474, "step": 313 }, { "epoch": 0.00028233775662485377, "grad_norm": 0.046875, "learning_rate": 0.00019996118655688004, "loss": 4.4607, "step": 320 }, { "epoch": 0.0006856774089460735, "grad_norm": 0.04541015625, "learning_rate": 0.00019995756127956854, "loss": 4.4336, "step": 330 }, { "epoch": 0.0010890170612672932, "grad_norm": 0.046142578125, "learning_rate": 0.00019995377420631467, "loss": 4.4016, "step": 340 }, { "epoch": 0.001492356713588513, "grad_norm": 0.04541015625, "learning_rate": 0.00019994982534324835, "loss": 4.2976, "step": 350 }, { "epoch": 0.0018956963659097325, "grad_norm": 0.05224609375, "learning_rate": 0.00019994571469676142, "loss": 4.2416, "step": 360 }, { "epoch": 0.0022990360182309523, "grad_norm": 0.052734375, "learning_rate": 0.00019994144227350756, "loss": 4.1895, "step": 370 }, { "epoch": 0.002702375670552172, "grad_norm": 0.058349609375, "learning_rate": 0.00019993700808040233, "loss": 4.1082, "step": 380 }, { "epoch": 0.003105715322873392, "grad_norm": 0.057861328125, "learning_rate": 0.0001999324121246231, "loss": 4.148, "step": 390 }, { "epoch": 0.0035090549751946114, "grad_norm": 0.06494140625, "learning_rate": 0.00019992765441360905, "loss": 4.1184, "step": 400 }, { "epoch": 0.003912394627515831, "grad_norm": 0.068359375, "learning_rate": 0.00019992273495506133, "loss": 4.1018, "step": 410 }, { "epoch": 0.004315734279837051, "grad_norm": 0.06005859375, "learning_rate": 0.00019991765375694276, "loss": 4.0995, "step": 420 }, { "epoch": 0.0047190739321582706, "grad_norm": 0.0634765625, "learning_rate": 0.00019991241082747795, "loss": 4.1194, "step": 430 }, { "epoch": 0.00512241358447949, "grad_norm": 0.0634765625, "learning_rate": 0.00019990700617515344, "loss": 4.0612, "step": 440 }, { "epoch": 0.00552575323680071, "grad_norm": 0.06298828125, "learning_rate": 0.00019990143980871738, "loss": 4.1001, "step": 450 }, { "epoch": 0.005929092889121929, "grad_norm": 0.0634765625, "learning_rate": 0.00019989571173717975, "loss": 4.075, "step": 460 }, { "epoch": 0.00633243254144315, "grad_norm": 0.07275390625, "learning_rate": 0.00019988982196981233, "loss": 4.1117, "step": 470 }, { "epoch": 0.006735772193764369, "grad_norm": 0.0517578125, "learning_rate": 0.00019988377051614854, "loss": 4.5104, "step": 480 }, { "epoch": 0.007139111846085589, "grad_norm": 0.047607421875, "learning_rate": 0.00019987755738598356, "loss": 4.5086, "step": 490 }, { "epoch": 0.007542451498406808, "grad_norm": 0.047607421875, "learning_rate": 0.00019987118258937416, "loss": 4.4517, "step": 500 }, { "epoch": 0.007945791150728028, "grad_norm": 0.044677734375, "learning_rate": 0.000199864646136639, "loss": 4.419, "step": 510 }, { "epoch": 0.008349130803049248, "grad_norm": 0.046142578125, "learning_rate": 0.00019985794803835825, "loss": 4.4749, "step": 520 }, { "epoch": 0.008752470455370467, "grad_norm": 0.0478515625, "learning_rate": 0.00019985108830537372, "loss": 4.4646, "step": 530 }, { "epoch": 0.009155810107691688, "grad_norm": 0.04345703125, "learning_rate": 0.00019984406694878895, "loss": 4.4207, "step": 540 }, { "epoch": 0.009559149760012906, "grad_norm": 0.044677734375, "learning_rate": 0.00019983688397996898, "loss": 4.4308, "step": 550 }, { "epoch": 0.009962489412334127, "grad_norm": 0.04345703125, "learning_rate": 0.00019982953941054054, "loss": 4.4311, "step": 560 }, { "epoch": 0.010365829064655347, "grad_norm": 0.04541015625, "learning_rate": 0.00019982203325239186, "loss": 4.4623, "step": 570 }, { "epoch": 0.010769168716976566, "grad_norm": 0.04833984375, "learning_rate": 0.00019981436551767275, "loss": 4.4461, "step": 580 }, { "epoch": 0.011172508369297786, "grad_norm": 0.046630859375, "learning_rate": 0.00019980653621879462, "loss": 4.4197, "step": 590 }, { "epoch": 0.011575848021619005, "grad_norm": 0.047119140625, "learning_rate": 0.00019979854536843027, "loss": 4.4135, "step": 600 }, { "epoch": 0.011979187673940225, "grad_norm": 0.0498046875, "learning_rate": 0.0001997903929795141, "loss": 4.4105, "step": 610 }, { "epoch": 0.012382527326261444, "grad_norm": 0.04638671875, "learning_rate": 0.00019978207906524192, "loss": 4.4107, "step": 620 }, { "epoch": 0.012624531117654176, "eval_loss": 4.402504920959473, "eval_runtime": 3.1085, "eval_samples_per_second": 64.339, "eval_steps_per_second": 8.042, "step": 626 }, { "epoch": 0.00016133586092848787, "grad_norm": 0.047119140625, "learning_rate": 0.000199773603639071, "loss": 4.2779, "step": 630 }, { "epoch": 0.0005646755132497075, "grad_norm": 0.0517578125, "learning_rate": 0.0001997649667147201, "loss": 4.2592, "step": 640 }, { "epoch": 0.0009680151655709273, "grad_norm": 0.053466796875, "learning_rate": 0.00019975616830616937, "loss": 4.2228, "step": 650 }, { "epoch": 0.001371354817892147, "grad_norm": 0.055908203125, "learning_rate": 0.00019974720842766023, "loss": 4.122, "step": 660 }, { "epoch": 0.0017746944702133668, "grad_norm": 0.057373046875, "learning_rate": 0.00019973808709369565, "loss": 4.0099, "step": 670 }, { "epoch": 0.0021780341225345863, "grad_norm": 0.0703125, "learning_rate": 0.00019972880431903977, "loss": 3.9065, "step": 680 }, { "epoch": 0.002581373774855806, "grad_norm": 0.06982421875, "learning_rate": 0.00019971936011871816, "loss": 3.7953, "step": 690 }, { "epoch": 0.002984713427177026, "grad_norm": 0.08349609375, "learning_rate": 0.00019970975450801762, "loss": 3.7788, "step": 700 }, { "epoch": 0.0033880530794982455, "grad_norm": 0.08544921875, "learning_rate": 0.00019969998750248626, "loss": 3.7368, "step": 710 }, { "epoch": 0.003791392731819465, "grad_norm": 0.09375, "learning_rate": 0.0001996900591179334, "loss": 3.7229, "step": 720 }, { "epoch": 0.004194732384140685, "grad_norm": 0.095703125, "learning_rate": 0.0001996799693704296, "loss": 3.7192, "step": 730 }, { "epoch": 0.004598072036461905, "grad_norm": 0.09228515625, "learning_rate": 0.00019966971827630654, "loss": 3.7457, "step": 740 }, { "epoch": 0.005001411688783124, "grad_norm": 0.0986328125, "learning_rate": 0.00019965930585215714, "loss": 3.7031, "step": 750 }, { "epoch": 0.005404751341104344, "grad_norm": 0.10595703125, "learning_rate": 0.00019964873211483547, "loss": 3.7335, "step": 760 }, { "epoch": 0.005808090993425563, "grad_norm": 0.10986328125, "learning_rate": 0.00019963799708145664, "loss": 3.6902, "step": 770 }, { "epoch": 0.006211430645746784, "grad_norm": 0.10400390625, "learning_rate": 0.00019962710076939686, "loss": 3.7408, "step": 780 }, { "epoch": 0.006614770298068003, "grad_norm": 0.062255859375, "learning_rate": 0.00019961604319629342, "loss": 4.204, "step": 790 }, { "epoch": 0.007018109950389223, "grad_norm": 0.053955078125, "learning_rate": 0.00019960482438004462, "loss": 4.364, "step": 800 }, { "epoch": 0.007421449602710442, "grad_norm": 0.053955078125, "learning_rate": 0.00019959344433880978, "loss": 4.3305, "step": 810 }, { "epoch": 0.007824789255031663, "grad_norm": 0.04833984375, "learning_rate": 0.0001995819030910091, "loss": 4.2992, "step": 820 }, { "epoch": 0.008228128907352882, "grad_norm": 0.049072265625, "learning_rate": 0.00019957020065532386, "loss": 4.3347, "step": 830 }, { "epoch": 0.008631468559674102, "grad_norm": 0.04931640625, "learning_rate": 0.0001995583370506961, "loss": 4.321, "step": 840 }, { "epoch": 0.00903480821199532, "grad_norm": 0.0517578125, "learning_rate": 0.00019954631229632884, "loss": 4.3122, "step": 850 }, { "epoch": 0.009438147864316541, "grad_norm": 0.05322265625, "learning_rate": 0.00019953412641168588, "loss": 4.2918, "step": 860 }, { "epoch": 0.00984148751663776, "grad_norm": 0.052490234375, "learning_rate": 0.00019952177941649185, "loss": 4.2793, "step": 870 }, { "epoch": 0.01024482716895898, "grad_norm": 0.048828125, "learning_rate": 0.00019950927133073222, "loss": 4.3363, "step": 880 }, { "epoch": 0.0106481668212802, "grad_norm": 0.05126953125, "learning_rate": 0.00019949660217465307, "loss": 4.311, "step": 890 }, { "epoch": 0.01105150647360142, "grad_norm": 0.05126953125, "learning_rate": 0.00019948377196876138, "loss": 4.2843, "step": 900 }, { "epoch": 0.01145484612592264, "grad_norm": 0.053466796875, "learning_rate": 0.00019947078073382466, "loss": 4.2645, "step": 910 }, { "epoch": 0.011858185778243858, "grad_norm": 0.05029296875, "learning_rate": 0.00019945762849087113, "loss": 4.2695, "step": 920 }, { "epoch": 0.012261525430565079, "grad_norm": 0.054931640625, "learning_rate": 0.00019944431526118964, "loss": 4.279, "step": 930 }, { "epoch": 0.012624531117654176, "eval_loss": 4.440088272094727, "eval_runtime": 3.1398, "eval_samples_per_second": 63.698, "eval_steps_per_second": 7.962, "step": 939 }, { "epoch": 4.033396523212197e-05, "grad_norm": 0.058837890625, "learning_rate": 0.0, "loss": 4.0975, "step": 940 }, { "epoch": 0.0004436736175533417, "grad_norm": 0.07421875, "learning_rate": 2e-05, "loss": 4.1075, "step": 950 }, { "epoch": 0.0008470132698745614, "grad_norm": 0.0703125, "learning_rate": 4e-05, "loss": 4.015, "step": 960 }, { "epoch": 0.001250352922195781, "grad_norm": 0.07373046875, "learning_rate": 6e-05, "loss": 3.8564, "step": 970 }, { "epoch": 0.0016536925745170008, "grad_norm": 0.1279296875, "learning_rate": 8e-05, "loss": 3.5457, "step": 980 }, { "epoch": 0.0020570322268382204, "grad_norm": 0.1982421875, "learning_rate": 0.0001, "loss": 3.2341, "step": 990 }, { "epoch": 0.00246037187915944, "grad_norm": 0.234375, "learning_rate": 0.00012, "loss": 3.0956, "step": 1000 }, { "epoch": 0.00286371153148066, "grad_norm": 0.27734375, "learning_rate": 0.00014, "loss": 2.9331, "step": 1010 }, { "epoch": 0.0032670511838018795, "grad_norm": 0.337890625, "learning_rate": 0.00016, "loss": 2.8938, "step": 1020 }, { "epoch": 0.003670390836123099, "grad_norm": 0.328125, "learning_rate": 0.00018, "loss": 2.8145, "step": 1030 }, { "epoch": 0.004073730488444319, "grad_norm": 0.296875, "learning_rate": 0.0002, "loss": 2.9022, "step": 1040 }, { "epoch": 0.004477070140765539, "grad_norm": 0.279296875, "learning_rate": 0.0001999999190676822, "loss": 2.9572, "step": 1050 }, { "epoch": 0.004880409793086758, "grad_norm": 0.33203125, "learning_rate": 0.00019999967627085973, "loss": 2.9381, "step": 1060 }, { "epoch": 0.005283749445407978, "grad_norm": 0.2470703125, "learning_rate": 0.00019999927160992563, "loss": 2.9392, "step": 1070 }, { "epoch": 0.005687089097729197, "grad_norm": 0.267578125, "learning_rate": 0.00019999870508553488, "loss": 2.8675, "step": 1080 }, { "epoch": 0.006090428750050418, "grad_norm": 0.279296875, "learning_rate": 0.00019999797669860455, "loss": 2.9042, "step": 1090 }, { "epoch": 0.006493768402371637, "grad_norm": 0.162109375, "learning_rate": 0.00019999708645031353, "loss": 3.4063, "step": 1100 }, { "epoch": 0.006897108054692857, "grad_norm": 0.08349609375, "learning_rate": 0.00019999603434210292, "loss": 4.137, "step": 1110 }, { "epoch": 0.0073004477070140765, "grad_norm": 0.0732421875, "learning_rate": 0.00019999482037567565, "loss": 4.1305, "step": 1120 }, { "epoch": 0.007703787359335296, "grad_norm": 0.06591796875, "learning_rate": 0.00019999344455299674, "loss": 4.0303, "step": 1130 }, { "epoch": 0.008107127011656516, "grad_norm": 0.0634765625, "learning_rate": 0.0001999919068762931, "loss": 4.0717, "step": 1140 }, { "epoch": 0.008510466663977735, "grad_norm": 0.059326171875, "learning_rate": 0.00019999020734805373, "loss": 4.0664, "step": 1150 }, { "epoch": 0.008913806316298956, "grad_norm": 0.060302734375, "learning_rate": 0.0001999883459710296, "loss": 4.0298, "step": 1160 }, { "epoch": 0.009317145968620174, "grad_norm": 0.057861328125, "learning_rate": 0.00019998632274823358, "loss": 4.0348, "step": 1170 }, { "epoch": 0.009720485620941395, "grad_norm": 0.06201171875, "learning_rate": 0.00019998413768294052, "loss": 4.0192, "step": 1180 }, { "epoch": 0.010123825273262615, "grad_norm": 0.064453125, "learning_rate": 0.0001999817907786873, "loss": 4.033, "step": 1190 }, { "epoch": 0.010527164925583834, "grad_norm": 0.0654296875, "learning_rate": 0.00019997928203927275, "loss": 4.0413, "step": 1200 }, { "epoch": 0.010930504577905054, "grad_norm": 0.06884765625, "learning_rate": 0.00019997661146875758, "loss": 4.0011, "step": 1210 }, { "epoch": 0.011333844230226273, "grad_norm": 0.068359375, "learning_rate": 0.00019997377907146459, "loss": 3.9817, "step": 1220 }, { "epoch": 0.011737183882547493, "grad_norm": 0.06884765625, "learning_rate": 0.0001999707848519783, "loss": 4.0007, "step": 1230 }, { "epoch": 0.012140523534868712, "grad_norm": 0.0654296875, "learning_rate": 0.0001999676288151454, "loss": 4.0265, "step": 1240 }, { "epoch": 0.012543863187189933, "grad_norm": 0.060302734375, "learning_rate": 0.00019996431096607438, "loss": 4.172, "step": 1250 }, { "epoch": 0.012624531117654176, "eval_loss": 4.49122428894043, "eval_runtime": 3.1912, "eval_samples_per_second": 62.672, "eval_steps_per_second": 7.834, "step": 1252 }, { "epoch": 0.00032267172185697574, "grad_norm": 0.08203125, "learning_rate": 1.4000000000000001e-06, "loss": 4.1169, "step": 1260 }, { "epoch": 0.0007260113741781954, "grad_norm": 0.1220703125, "learning_rate": 3.4000000000000005e-06, "loss": 4.0418, "step": 1270 }, { "epoch": 0.001129351026499415, "grad_norm": 0.1943359375, "learning_rate": 5.400000000000001e-06, "loss": 3.9211, "step": 1280 }, { "epoch": 0.0015326906788206349, "grad_norm": 0.328125, "learning_rate": 7.4e-06, "loss": 3.6888, "step": 1290 }, { "epoch": 0.0019360303311418546, "grad_norm": 0.375, "learning_rate": 9.4e-06, "loss": 3.4768, "step": 1300 }, { "epoch": 0.0023393699834630744, "grad_norm": 0.408203125, "learning_rate": 1.14e-05, "loss": 3.242, "step": 1310 }, { "epoch": 0.002742709635784294, "grad_norm": 0.3671875, "learning_rate": 1.3400000000000002e-05, "loss": 2.9401, "step": 1320 }, { "epoch": 0.0031460492881055136, "grad_norm": 0.265625, "learning_rate": 1.54e-05, "loss": 2.7321, "step": 1330 }, { "epoch": 0.0035493889404267336, "grad_norm": 0.23828125, "learning_rate": 1.7400000000000003e-05, "loss": 2.413, "step": 1340 }, { "epoch": 0.003952728592747953, "grad_norm": 0.2353515625, "learning_rate": 1.94e-05, "loss": 2.2395, "step": 1350 }, { "epoch": 0.004356068245069173, "grad_norm": 0.2353515625, "learning_rate": 1.9999999012581816e-05, "loss": 2.2062, "step": 1360 }, { "epoch": 0.004759407897390392, "grad_norm": 0.232421875, "learning_rate": 1.999999417624832e-05, "loss": 2.1174, "step": 1370 }, { "epoch": 0.005162747549711612, "grad_norm": 0.2265625, "learning_rate": 1.999998530963894e-05, "loss": 1.9498, "step": 1380 }, { "epoch": 0.005566087202032832, "grad_norm": 0.23046875, "learning_rate": 1.999997241275724e-05, "loss": 1.8648, "step": 1390 }, { "epoch": 0.005969426854354052, "grad_norm": 0.2392578125, "learning_rate": 1.9999955485608426e-05, "loss": 1.8192, "step": 1400 }, { "epoch": 0.006372766506675271, "grad_norm": 0.296875, "learning_rate": 1.999993452819932e-05, "loss": 2.0548, "step": 1410 }, { "epoch": 0.006776106158996491, "grad_norm": 0.34765625, "learning_rate": 1.999990954053836e-05, "loss": 3.5804, "step": 1420 }, { "epoch": 0.0071794458113177105, "grad_norm": 0.21484375, "learning_rate": 1.9999880522635625e-05, "loss": 3.8265, "step": 1430 }, { "epoch": 0.00758278546363893, "grad_norm": 0.130859375, "learning_rate": 1.999984747450281e-05, "loss": 3.7124, "step": 1440 }, { "epoch": 0.00798612511596015, "grad_norm": 0.1142578125, "learning_rate": 1.9999810396153232e-05, "loss": 3.7061, "step": 1450 }, { "epoch": 0.00838946476828137, "grad_norm": 0.103515625, "learning_rate": 1.9999769287601834e-05, "loss": 3.68, "step": 1460 }, { "epoch": 0.008792804420602589, "grad_norm": 0.09814453125, "learning_rate": 1.9999724148865183e-05, "loss": 3.6099, "step": 1470 }, { "epoch": 0.00919614407292381, "grad_norm": 0.09423828125, "learning_rate": 1.9999674979961473e-05, "loss": 3.613, "step": 1480 }, { "epoch": 0.00959948372524503, "grad_norm": 0.0966796875, "learning_rate": 1.999962178091052e-05, "loss": 3.5696, "step": 1490 }, { "epoch": 0.010002823377566248, "grad_norm": 0.10986328125, "learning_rate": 1.9999564551733764e-05, "loss": 3.5333, "step": 1500 }, { "epoch": 0.010406163029887469, "grad_norm": 0.09375, "learning_rate": 1.9999503292454275e-05, "loss": 3.5119, "step": 1510 }, { "epoch": 0.010809502682208687, "grad_norm": 0.103515625, "learning_rate": 1.9999438003096733e-05, "loss": 3.403, "step": 1520 }, { "epoch": 0.011212842334529908, "grad_norm": 0.10546875, "learning_rate": 1.9999368683687457e-05, "loss": 3.3882, "step": 1530 }, { "epoch": 0.011616181986851127, "grad_norm": 0.0986328125, "learning_rate": 1.999929533425439e-05, "loss": 3.3576, "step": 1540 }, { "epoch": 0.012019521639172347, "grad_norm": 0.107421875, "learning_rate": 1.999921795482708e-05, "loss": 3.4566, "step": 1550 }, { "epoch": 0.012422861291493567, "grad_norm": 0.158203125, "learning_rate": 1.9999136545436727e-05, "loss": 3.7767, "step": 1560 }, { "epoch": 0.012826200943814786, "grad_norm": 0.2177734375, "learning_rate": 1.999905110611613e-05, "loss": 4.5098, "step": 1570 }, { "epoch": 0.013229540596136007, "grad_norm": 0.1552734375, "learning_rate": 1.9998961636899736e-05, "loss": 4.6336, "step": 1580 }, { "epoch": 0.013632880248457225, "grad_norm": 0.12255859375, "learning_rate": 1.999886813782359e-05, "loss": 4.5697, "step": 1590 }, { "epoch": 0.014036219900778446, "grad_norm": 0.10498046875, "learning_rate": 1.999877060892538e-05, "loss": 4.5598, "step": 1600 }, { "epoch": 0.014439559553099664, "grad_norm": 0.08984375, "learning_rate": 1.9998669050244416e-05, "loss": 4.5326, "step": 1610 }, { "epoch": 0.014842899205420885, "grad_norm": 0.07958984375, "learning_rate": 1.999856346182163e-05, "loss": 4.5219, "step": 1620 }, { "epoch": 0.015246238857742105, "grad_norm": 0.07177734375, "learning_rate": 1.999845384369957e-05, "loss": 4.5013, "step": 1630 }, { "epoch": 0.015649578510063326, "grad_norm": 0.07080078125, "learning_rate": 1.9998340195922418e-05, "loss": 4.4859, "step": 1640 }, { "epoch": 0.016052918162384543, "grad_norm": 0.061767578125, "learning_rate": 1.999822251853598e-05, "loss": 4.5328, "step": 1650 }, { "epoch": 0.016456257814705763, "grad_norm": 0.05859375, "learning_rate": 1.9998100811587686e-05, "loss": 4.5024, "step": 1660 }, { "epoch": 0.016859597467026984, "grad_norm": 0.06201171875, "learning_rate": 1.9997975075126573e-05, "loss": 4.5178, "step": 1670 }, { "epoch": 0.017262937119348204, "grad_norm": 0.05908203125, "learning_rate": 1.9997845309203333e-05, "loss": 4.4892, "step": 1680 }, { "epoch": 0.017666276771669424, "grad_norm": 0.055908203125, "learning_rate": 1.9997711513870257e-05, "loss": 4.4645, "step": 1690 }, { "epoch": 0.01806961642399064, "grad_norm": 0.0546875, "learning_rate": 1.9997573689181272e-05, "loss": 4.4891, "step": 1700 }, { "epoch": 0.018472956076311862, "grad_norm": 0.054443359375, "learning_rate": 1.999743183519192e-05, "loss": 4.4604, "step": 1710 }, { "epoch": 0.018876295728633082, "grad_norm": 0.05224609375, "learning_rate": 1.9997285951959372e-05, "loss": 4.4935, "step": 1720 }, { "epoch": 0.019279635380954303, "grad_norm": 0.052978515625, "learning_rate": 1.999713603954243e-05, "loss": 4.4723, "step": 1730 }, { "epoch": 0.01968297503327552, "grad_norm": 0.052734375, "learning_rate": 1.9996982098001508e-05, "loss": 4.4923, "step": 1740 }, { "epoch": 0.02008631468559674, "grad_norm": 0.050537109375, "learning_rate": 1.9996824127398648e-05, "loss": 4.4402, "step": 1750 }, { "epoch": 0.02048965433791796, "grad_norm": 0.05078125, "learning_rate": 1.999666212779752e-05, "loss": 4.4716, "step": 1760 }, { "epoch": 0.02089299399023918, "grad_norm": 0.0478515625, "learning_rate": 1.999649609926341e-05, "loss": 4.4952, "step": 1770 }, { "epoch": 0.0212963336425604, "grad_norm": 0.052490234375, "learning_rate": 1.9996326041863236e-05, "loss": 4.4755, "step": 1780 }, { "epoch": 0.02169967329488162, "grad_norm": 0.04931640625, "learning_rate": 1.9996151955665535e-05, "loss": 4.4645, "step": 1790 }, { "epoch": 0.02210301294720284, "grad_norm": 0.05029296875, "learning_rate": 1.9995973840740467e-05, "loss": 4.474, "step": 1800 }, { "epoch": 0.02250635259952406, "grad_norm": 0.05029296875, "learning_rate": 1.999579169715982e-05, "loss": 4.4614, "step": 1810 }, { "epoch": 0.02290969225184528, "grad_norm": 0.049560546875, "learning_rate": 1.9995605524996996e-05, "loss": 4.4622, "step": 1820 }, { "epoch": 0.0233130319041665, "grad_norm": 0.052490234375, "learning_rate": 1.9995415324327038e-05, "loss": 4.4785, "step": 1830 }, { "epoch": 0.023716371556487717, "grad_norm": 0.048828125, "learning_rate": 1.9995221095226597e-05, "loss": 4.5104, "step": 1840 }, { "epoch": 0.024119711208808937, "grad_norm": 0.048095703125, "learning_rate": 1.999502283777395e-05, "loss": 4.4759, "step": 1850 }, { "epoch": 0.024523050861130158, "grad_norm": 0.0458984375, "learning_rate": 1.9994820552049002e-05, "loss": 4.4359, "step": 1860 }, { "epoch": 0.02492639051345138, "grad_norm": 0.045166015625, "learning_rate": 1.9994614238133282e-05, "loss": 4.4895, "step": 1870 }, { "epoch": 0.0253297301657726, "grad_norm": 0.04541015625, "learning_rate": 1.9994403896109942e-05, "loss": 4.4755, "step": 1880 }, { "epoch": 0.025733069818093816, "grad_norm": 0.045654296875, "learning_rate": 1.9994189526063746e-05, "loss": 4.4661, "step": 1890 }, { "epoch": 0.026136409470415036, "grad_norm": 0.046630859375, "learning_rate": 1.99939711280811e-05, "loss": 4.4775, "step": 1900 }, { "epoch": 0.026539749122736257, "grad_norm": 0.04931640625, "learning_rate": 1.999374870225003e-05, "loss": 4.4954, "step": 1910 }, { "epoch": 0.026943088775057477, "grad_norm": 0.046142578125, "learning_rate": 1.9993522248660163e-05, "loss": 4.4905, "step": 1920 }, { "epoch": 0.027346428427378694, "grad_norm": 0.049072265625, "learning_rate": 1.9993291767402776e-05, "loss": 4.4545, "step": 1930 }, { "epoch": 0.027749768079699914, "grad_norm": 0.045166015625, "learning_rate": 1.9993057258570762e-05, "loss": 4.4737, "step": 1940 }, { "epoch": 0.028153107732021135, "grad_norm": 0.046630859375, "learning_rate": 1.9992818722258626e-05, "loss": 4.4908, "step": 1950 }, { "epoch": 0.028556447384342355, "grad_norm": 0.047607421875, "learning_rate": 1.9992576158562515e-05, "loss": 4.4893, "step": 1960 }, { "epoch": 0.028959787036663576, "grad_norm": 0.04736328125, "learning_rate": 1.999232956758018e-05, "loss": 4.4452, "step": 1970 }, { "epoch": 0.029363126688984793, "grad_norm": 0.044677734375, "learning_rate": 1.999207894941101e-05, "loss": 4.4901, "step": 1980 }, { "epoch": 0.029766466341306013, "grad_norm": 0.0478515625, "learning_rate": 1.9991824304156006e-05, "loss": 4.419, "step": 1990 }, { "epoch": 0.030169805993627234, "grad_norm": 0.048828125, "learning_rate": 1.99915656319178e-05, "loss": 4.4521, "step": 2000 }, { "epoch": 0.030573145645948454, "grad_norm": 0.046875, "learning_rate": 1.999130293280065e-05, "loss": 4.4561, "step": 2010 }, { "epoch": 0.030976485298269674, "grad_norm": 0.0595703125, "learning_rate": 1.9991036206910417e-05, "loss": 4.4544, "step": 2020 }, { "epoch": 0.03137982495059089, "grad_norm": 0.048095703125, "learning_rate": 1.999076545435461e-05, "loss": 4.4713, "step": 2030 }, { "epoch": 0.03178316460291211, "grad_norm": 0.047119140625, "learning_rate": 1.999049067524235e-05, "loss": 4.4408, "step": 2040 }, { "epoch": 0.03218650425523333, "grad_norm": 0.04541015625, "learning_rate": 1.9990211869684374e-05, "loss": 4.4686, "step": 2050 }, { "epoch": 0.03258984390755455, "grad_norm": 0.047119140625, "learning_rate": 1.998992903779305e-05, "loss": 4.4673, "step": 2060 }, { "epoch": 0.03299318355987577, "grad_norm": 0.046630859375, "learning_rate": 1.9989642179682374e-05, "loss": 4.4302, "step": 2070 }, { "epoch": 0.033396523212196993, "grad_norm": 0.046875, "learning_rate": 1.998935129546795e-05, "loss": 4.4844, "step": 2080 }, { "epoch": 0.033799862864518214, "grad_norm": 0.0439453125, "learning_rate": 1.9989056385267015e-05, "loss": 4.4739, "step": 2090 }, { "epoch": 0.03420320251683943, "grad_norm": 0.047119140625, "learning_rate": 1.9988757449198428e-05, "loss": 4.499, "step": 2100 }, { "epoch": 0.03460654216916065, "grad_norm": 0.046875, "learning_rate": 1.9988454487382667e-05, "loss": 4.4517, "step": 2110 }, { "epoch": 0.03500988182148187, "grad_norm": 0.044677734375, "learning_rate": 1.9988147499941832e-05, "loss": 4.4608, "step": 2120 }, { "epoch": 0.03541322147380309, "grad_norm": 0.0458984375, "learning_rate": 1.998783648699965e-05, "loss": 4.4515, "step": 2130 }, { "epoch": 0.03581656112612431, "grad_norm": 0.04443359375, "learning_rate": 1.9987521448681465e-05, "loss": 4.4646, "step": 2140 }, { "epoch": 0.03621990077844553, "grad_norm": 0.05908203125, "learning_rate": 1.9987202385114252e-05, "loss": 4.4586, "step": 2150 }, { "epoch": 0.03662324043076675, "grad_norm": 0.04736328125, "learning_rate": 1.99868792964266e-05, "loss": 4.4699, "step": 2160 }, { "epoch": 0.03702658008308797, "grad_norm": 0.044189453125, "learning_rate": 1.9986552182748715e-05, "loss": 4.4678, "step": 2170 }, { "epoch": 0.03742991973540919, "grad_norm": 0.04541015625, "learning_rate": 1.9986221044212442e-05, "loss": 4.4581, "step": 2180 }, { "epoch": 0.037833259387730404, "grad_norm": 0.045654296875, "learning_rate": 1.998588588095124e-05, "loss": 4.4273, "step": 2190 }, { "epoch": 0.038236599040051625, "grad_norm": 0.044677734375, "learning_rate": 1.9985546693100186e-05, "loss": 4.4602, "step": 2200 }, { "epoch": 0.038639938692372845, "grad_norm": 0.04931640625, "learning_rate": 1.9985203480795977e-05, "loss": 4.4766, "step": 2210 }, { "epoch": 0.039043278344694066, "grad_norm": 0.045166015625, "learning_rate": 1.9984856244176948e-05, "loss": 4.4579, "step": 2220 }, { "epoch": 0.039446617997015286, "grad_norm": 0.04638671875, "learning_rate": 1.998450498338303e-05, "loss": 4.4661, "step": 2230 }, { "epoch": 0.039849957649336507, "grad_norm": 0.043701171875, "learning_rate": 1.9984149698555808e-05, "loss": 4.4415, "step": 2240 }, { "epoch": 0.04025329730165773, "grad_norm": 0.046875, "learning_rate": 1.998379038983846e-05, "loss": 4.4461, "step": 2250 }, { "epoch": 0.04065663695397895, "grad_norm": 0.04443359375, "learning_rate": 1.9983427057375802e-05, "loss": 4.4627, "step": 2260 }, { "epoch": 0.04105997660630017, "grad_norm": 0.0458984375, "learning_rate": 1.9983059701314267e-05, "loss": 4.4399, "step": 2270 }, { "epoch": 0.04146331625862139, "grad_norm": 0.045654296875, "learning_rate": 1.9982688321801906e-05, "loss": 4.4673, "step": 2280 }, { "epoch": 0.0418666559109426, "grad_norm": 0.046142578125, "learning_rate": 1.99823129189884e-05, "loss": 4.478, "step": 2290 }, { "epoch": 0.04226999556326382, "grad_norm": 0.045166015625, "learning_rate": 1.9981933493025044e-05, "loss": 4.4433, "step": 2300 }, { "epoch": 0.04267333521558504, "grad_norm": 0.045166015625, "learning_rate": 1.9981550044064756e-05, "loss": 4.4535, "step": 2310 }, { "epoch": 0.04307667486790626, "grad_norm": 0.046142578125, "learning_rate": 1.998116257226208e-05, "loss": 4.4665, "step": 2320 }, { "epoch": 0.043480014520227483, "grad_norm": 0.045166015625, "learning_rate": 1.9980771077773177e-05, "loss": 4.4419, "step": 2330 }, { "epoch": 0.043883354172548704, "grad_norm": 0.046875, "learning_rate": 1.9980375560755833e-05, "loss": 4.4724, "step": 2340 }, { "epoch": 0.044286693824869924, "grad_norm": 0.044921875, "learning_rate": 1.997997602136944e-05, "loss": 4.4485, "step": 2350 }, { "epoch": 0.044690033477191145, "grad_norm": 0.044921875, "learning_rate": 1.997957245977504e-05, "loss": 4.4484, "step": 2360 }, { "epoch": 0.045093373129512365, "grad_norm": 0.044921875, "learning_rate": 1.997916487613527e-05, "loss": 4.4594, "step": 2370 }, { "epoch": 0.04549671278183358, "grad_norm": 0.04638671875, "learning_rate": 1.9978753270614403e-05, "loss": 4.4836, "step": 2380 }, { "epoch": 0.0459000524341548, "grad_norm": 0.044921875, "learning_rate": 1.997833764337832e-05, "loss": 4.4767, "step": 2390 }, { "epoch": 0.04630339208647602, "grad_norm": 0.04736328125, "learning_rate": 1.9977917994594537e-05, "loss": 4.4358, "step": 2400 }, { "epoch": 0.04670673173879724, "grad_norm": 0.04541015625, "learning_rate": 1.997749432443218e-05, "loss": 4.4552, "step": 2410 }, { "epoch": 0.04711007139111846, "grad_norm": 0.044677734375, "learning_rate": 1.9977066633062002e-05, "loss": 4.4383, "step": 2420 }, { "epoch": 0.04751341104343968, "grad_norm": 0.045654296875, "learning_rate": 1.9976634920656374e-05, "loss": 4.4605, "step": 2430 }, { "epoch": 0.0479167506957609, "grad_norm": 0.042724609375, "learning_rate": 1.9976199187389286e-05, "loss": 4.4496, "step": 2440 }, { "epoch": 0.04832009034808212, "grad_norm": 0.04638671875, "learning_rate": 1.997575943343635e-05, "loss": 4.454, "step": 2450 }, { "epoch": 0.04872343000040334, "grad_norm": 0.044189453125, "learning_rate": 1.997531565897481e-05, "loss": 4.4212, "step": 2460 }, { "epoch": 0.04912676965272456, "grad_norm": 0.043212890625, "learning_rate": 1.9974867864183508e-05, "loss": 4.4441, "step": 2470 }, { "epoch": 0.049530109305045776, "grad_norm": 0.04541015625, "learning_rate": 1.997441604924292e-05, "loss": 4.4085, "step": 2480 }, { "epoch": 0.049933448957366997, "grad_norm": 0.044189453125, "learning_rate": 1.997396021433514e-05, "loss": 4.4616, "step": 2490 }, { "epoch": 0.05033678860968822, "grad_norm": 0.04833984375, "learning_rate": 1.9973500359643885e-05, "loss": 4.4544, "step": 2500 }, { "epoch": 0.05074012826200944, "grad_norm": 0.044677734375, "learning_rate": 1.9973036485354485e-05, "loss": 4.453, "step": 2510 }, { "epoch": 0.05114346791433066, "grad_norm": 0.04638671875, "learning_rate": 1.99725685916539e-05, "loss": 4.4722, "step": 2520 }, { "epoch": 0.05154680756665188, "grad_norm": 0.04296875, "learning_rate": 1.99720966787307e-05, "loss": 4.4332, "step": 2530 }, { "epoch": 0.0519501472189731, "grad_norm": 0.04638671875, "learning_rate": 1.9971620746775077e-05, "loss": 4.4757, "step": 2540 }, { "epoch": 0.05235348687129432, "grad_norm": 0.04638671875, "learning_rate": 1.997114079597885e-05, "loss": 4.4264, "step": 2550 }, { "epoch": 0.05275682652361554, "grad_norm": 0.04638671875, "learning_rate": 1.997065682653545e-05, "loss": 4.4372, "step": 2560 }, { "epoch": 0.05316016617593675, "grad_norm": 0.045166015625, "learning_rate": 1.997016883863993e-05, "loss": 4.4566, "step": 2570 }, { "epoch": 0.05356350582825797, "grad_norm": 0.04345703125, "learning_rate": 1.9969676832488965e-05, "loss": 4.4309, "step": 2580 }, { "epoch": 0.053966845480579194, "grad_norm": 0.046142578125, "learning_rate": 1.9969180808280845e-05, "loss": 4.4621, "step": 2590 }, { "epoch": 0.054370185132900414, "grad_norm": 0.04443359375, "learning_rate": 1.9968680766215477e-05, "loss": 4.4465, "step": 2600 }, { "epoch": 0.054773524785221635, "grad_norm": 0.04541015625, "learning_rate": 1.9968176706494403e-05, "loss": 4.4239, "step": 2610 }, { "epoch": 0.055176864437542855, "grad_norm": 0.042724609375, "learning_rate": 1.996766862932076e-05, "loss": 4.4567, "step": 2620 }, { "epoch": 0.055580204089864076, "grad_norm": 0.046142578125, "learning_rate": 1.996715653489933e-05, "loss": 4.423, "step": 2630 }, { "epoch": 0.055983543742185296, "grad_norm": 0.04248046875, "learning_rate": 1.9966640423436492e-05, "loss": 4.3849, "step": 2640 }, { "epoch": 0.056386883394506516, "grad_norm": 0.0458984375, "learning_rate": 1.9966120295140258e-05, "loss": 4.4788, "step": 2650 }, { "epoch": 0.05679022304682774, "grad_norm": 0.045166015625, "learning_rate": 1.996559615022025e-05, "loss": 4.4313, "step": 2660 }, { "epoch": 0.05719356269914895, "grad_norm": 0.044921875, "learning_rate": 1.996506798888772e-05, "loss": 4.4493, "step": 2670 }, { "epoch": 0.05759690235147017, "grad_norm": 0.047607421875, "learning_rate": 1.9964535811355524e-05, "loss": 4.4438, "step": 2680 }, { "epoch": 0.05800024200379139, "grad_norm": 0.04296875, "learning_rate": 1.996399961783815e-05, "loss": 4.413, "step": 2690 }, { "epoch": 0.05840358165611261, "grad_norm": 0.043701171875, "learning_rate": 1.9963459408551693e-05, "loss": 4.4538, "step": 2700 }, { "epoch": 0.05880692130843383, "grad_norm": 0.0439453125, "learning_rate": 1.996291518371388e-05, "loss": 4.466, "step": 2710 }, { "epoch": 0.05921026096075505, "grad_norm": 0.045654296875, "learning_rate": 1.9962366943544045e-05, "loss": 4.4963, "step": 2720 }, { "epoch": 0.05961360061307627, "grad_norm": 0.04541015625, "learning_rate": 1.9961814688263138e-05, "loss": 4.46, "step": 2730 }, { "epoch": 0.06001694026539749, "grad_norm": 0.04345703125, "learning_rate": 1.9961258418093745e-05, "loss": 4.4481, "step": 2740 }, { "epoch": 0.060420279917718714, "grad_norm": 0.044677734375, "learning_rate": 1.9960698133260053e-05, "loss": 4.442, "step": 2750 }, { "epoch": 0.06082361957003993, "grad_norm": 0.052490234375, "learning_rate": 1.9960133833987866e-05, "loss": 4.4473, "step": 2760 }, { "epoch": 0.06122695922236115, "grad_norm": 0.07958984375, "learning_rate": 1.9959565520504625e-05, "loss": 4.4286, "step": 2770 }, { "epoch": 0.06163029887468237, "grad_norm": 0.042724609375, "learning_rate": 1.9958993193039365e-05, "loss": 4.4616, "step": 2780 }, { "epoch": 0.06203363852700359, "grad_norm": 0.04296875, "learning_rate": 1.9958416851822755e-05, "loss": 4.4409, "step": 2790 }, { "epoch": 0.06243697817932481, "grad_norm": 0.042236328125, "learning_rate": 1.9957836497087074e-05, "loss": 4.4416, "step": 2800 }, { "epoch": 0.06284031783164602, "grad_norm": 0.04541015625, "learning_rate": 1.9957252129066227e-05, "loss": 4.4552, "step": 2810 }, { "epoch": 0.06324365748396725, "grad_norm": 0.046875, "learning_rate": 1.9956663747995724e-05, "loss": 4.4536, "step": 2820 }, { "epoch": 0.06364699713628846, "grad_norm": 0.045654296875, "learning_rate": 1.99560713541127e-05, "loss": 4.4544, "step": 2830 }, { "epoch": 0.06405033678860969, "grad_norm": 0.0419921875, "learning_rate": 1.9955474947655912e-05, "loss": 4.4288, "step": 2840 }, { "epoch": 0.0644536764409309, "grad_norm": 0.041748046875, "learning_rate": 1.995487452886572e-05, "loss": 4.4397, "step": 2850 }, { "epoch": 0.06485701609325213, "grad_norm": 0.044677734375, "learning_rate": 1.995427009798411e-05, "loss": 4.4909, "step": 2860 }, { "epoch": 0.06526035574557335, "grad_norm": 0.043701171875, "learning_rate": 1.9953661655254695e-05, "loss": 4.4528, "step": 2870 }, { "epoch": 0.06566369539789457, "grad_norm": 0.045654296875, "learning_rate": 1.9953049200922684e-05, "loss": 4.4308, "step": 2880 }, { "epoch": 0.06606703505021579, "grad_norm": 0.04541015625, "learning_rate": 1.9952432735234918e-05, "loss": 4.4585, "step": 2890 }, { "epoch": 0.066470374702537, "grad_norm": 0.046630859375, "learning_rate": 1.9951812258439846e-05, "loss": 4.4663, "step": 2900 }, { "epoch": 0.06687371435485823, "grad_norm": 0.0439453125, "learning_rate": 1.995118777078754e-05, "loss": 4.4549, "step": 2910 }, { "epoch": 0.06727705400717944, "grad_norm": 0.04931640625, "learning_rate": 1.9950559272529686e-05, "loss": 4.4434, "step": 2920 }, { "epoch": 0.06768039365950067, "grad_norm": 0.04443359375, "learning_rate": 1.9949926763919586e-05, "loss": 4.4086, "step": 2930 }, { "epoch": 0.06808373331182188, "grad_norm": 0.045166015625, "learning_rate": 1.9949290245212157e-05, "loss": 4.4456, "step": 2940 }, { "epoch": 0.06848707296414311, "grad_norm": 0.042724609375, "learning_rate": 1.9948649716663936e-05, "loss": 4.4395, "step": 2950 }, { "epoch": 0.06889041261646432, "grad_norm": 0.048095703125, "learning_rate": 1.994800517853307e-05, "loss": 4.4357, "step": 2960 }, { "epoch": 0.06929375226878555, "grad_norm": 0.041015625, "learning_rate": 1.9947356631079337e-05, "loss": 4.4129, "step": 2970 }, { "epoch": 0.06969709192110676, "grad_norm": 0.042724609375, "learning_rate": 1.9946704074564105e-05, "loss": 4.4587, "step": 2980 }, { "epoch": 0.07010043157342799, "grad_norm": 0.04248046875, "learning_rate": 1.994604750925038e-05, "loss": 4.4844, "step": 2990 }, { "epoch": 0.0705037712257492, "grad_norm": 0.044921875, "learning_rate": 1.9945386935402775e-05, "loss": 4.4578, "step": 3000 }, { "epoch": 0.07090711087807042, "grad_norm": 0.04443359375, "learning_rate": 1.9944722353287518e-05, "loss": 4.4306, "step": 3010 }, { "epoch": 0.07131045053039164, "grad_norm": 0.04248046875, "learning_rate": 1.994405376317246e-05, "loss": 4.4542, "step": 3020 }, { "epoch": 0.07171379018271286, "grad_norm": 0.044921875, "learning_rate": 1.9943381165327053e-05, "loss": 4.4166, "step": 3030 }, { "epoch": 0.07211712983503409, "grad_norm": 0.045166015625, "learning_rate": 1.9942704560022378e-05, "loss": 4.4745, "step": 3040 }, { "epoch": 0.0725204694873553, "grad_norm": 0.045166015625, "learning_rate": 1.9942023947531122e-05, "loss": 4.4555, "step": 3050 }, { "epoch": 0.07292380913967653, "grad_norm": 0.047119140625, "learning_rate": 1.99413393281276e-05, "loss": 4.4133, "step": 3060 }, { "epoch": 0.07332714879199774, "grad_norm": 0.04638671875, "learning_rate": 1.9940650702087718e-05, "loss": 4.4555, "step": 3070 }, { "epoch": 0.07373048844431897, "grad_norm": 0.044677734375, "learning_rate": 1.9939958069689026e-05, "loss": 4.4011, "step": 3080 }, { "epoch": 0.07413382809664018, "grad_norm": 0.04541015625, "learning_rate": 1.9939261431210664e-05, "loss": 4.4595, "step": 3090 }, { "epoch": 0.0745371677489614, "grad_norm": 0.04296875, "learning_rate": 1.9938560786933398e-05, "loss": 4.452, "step": 3100 }, { "epoch": 0.07494050740128262, "grad_norm": 0.04345703125, "learning_rate": 1.9937856137139612e-05, "loss": 4.4497, "step": 3110 }, { "epoch": 0.07534384705360384, "grad_norm": 0.044677734375, "learning_rate": 1.9937147482113296e-05, "loss": 4.4514, "step": 3120 }, { "epoch": 0.07574718670592506, "grad_norm": 0.04541015625, "learning_rate": 1.993643482214006e-05, "loss": 4.4674, "step": 3130 }, { "epoch": 0.07615052635824628, "grad_norm": 0.0419921875, "learning_rate": 1.9935718157507124e-05, "loss": 4.4503, "step": 3140 }, { "epoch": 0.0765538660105675, "grad_norm": 0.044677734375, "learning_rate": 1.9934997488503325e-05, "loss": 4.4512, "step": 3150 }, { "epoch": 0.07695720566288872, "grad_norm": 0.044677734375, "learning_rate": 1.993427281541911e-05, "loss": 4.4441, "step": 3160 }, { "epoch": 0.07736054531520994, "grad_norm": 0.044189453125, "learning_rate": 1.9933544138546542e-05, "loss": 4.4542, "step": 3170 }, { "epoch": 0.07776388496753116, "grad_norm": 0.0439453125, "learning_rate": 1.9932811458179305e-05, "loss": 4.4436, "step": 3180 }, { "epoch": 0.07816722461985237, "grad_norm": 0.047607421875, "learning_rate": 1.993207477461268e-05, "loss": 4.4158, "step": 3190 }, { "epoch": 0.0785705642721736, "grad_norm": 0.04443359375, "learning_rate": 1.993133408814358e-05, "loss": 4.4518, "step": 3200 }, { "epoch": 0.07897390392449481, "grad_norm": 0.044921875, "learning_rate": 1.9930589399070515e-05, "loss": 4.4289, "step": 3210 }, { "epoch": 0.07937724357681604, "grad_norm": 0.045654296875, "learning_rate": 1.9929840707693618e-05, "loss": 4.4452, "step": 3220 }, { "epoch": 0.07978058322913725, "grad_norm": 0.042724609375, "learning_rate": 1.9929088014314636e-05, "loss": 4.462, "step": 3230 }, { "epoch": 0.08018392288145848, "grad_norm": 0.04345703125, "learning_rate": 1.992833131923692e-05, "loss": 4.4282, "step": 3240 }, { "epoch": 0.0805872625337797, "grad_norm": 0.0439453125, "learning_rate": 1.9927570622765443e-05, "loss": 4.4584, "step": 3250 }, { "epoch": 0.08099060218610092, "grad_norm": 0.045166015625, "learning_rate": 1.9926805925206784e-05, "loss": 4.455, "step": 3260 }, { "epoch": 0.08139394183842213, "grad_norm": 0.043212890625, "learning_rate": 1.992603722686914e-05, "loss": 4.4233, "step": 3270 }, { "epoch": 0.08179728149074335, "grad_norm": 0.043701171875, "learning_rate": 1.9925264528062317e-05, "loss": 4.4435, "step": 3280 }, { "epoch": 0.08220062114306458, "grad_norm": 0.04541015625, "learning_rate": 1.9924487829097733e-05, "loss": 4.4385, "step": 3290 }, { "epoch": 0.08260396079538579, "grad_norm": 0.04248046875, "learning_rate": 1.9923707130288415e-05, "loss": 4.4204, "step": 3300 }, { "epoch": 0.08300730044770702, "grad_norm": 0.043701171875, "learning_rate": 1.9922922431949017e-05, "loss": 4.4202, "step": 3310 }, { "epoch": 0.08341064010002823, "grad_norm": 0.04296875, "learning_rate": 1.9922133734395787e-05, "loss": 4.394, "step": 3320 }, { "epoch": 0.08381397975234946, "grad_norm": 0.04443359375, "learning_rate": 1.9921341037946592e-05, "loss": 4.4216, "step": 3330 }, { "epoch": 0.08421731940467067, "grad_norm": 0.044677734375, "learning_rate": 1.9920544342920913e-05, "loss": 4.4231, "step": 3340 }, { "epoch": 0.0846206590569919, "grad_norm": 0.0458984375, "learning_rate": 1.991974364963984e-05, "loss": 4.4847, "step": 3350 }, { "epoch": 0.08502399870931311, "grad_norm": 0.04248046875, "learning_rate": 1.9918938958426075e-05, "loss": 4.4063, "step": 3360 }, { "epoch": 0.08542733836163434, "grad_norm": 0.0458984375, "learning_rate": 1.9918130269603926e-05, "loss": 4.4668, "step": 3370 }, { "epoch": 0.08583067801395555, "grad_norm": 0.0458984375, "learning_rate": 1.991731758349933e-05, "loss": 4.4454, "step": 3380 }, { "epoch": 0.08623401766627677, "grad_norm": 0.04541015625, "learning_rate": 1.9916500900439806e-05, "loss": 4.4527, "step": 3390 }, { "epoch": 0.086637357318598, "grad_norm": 0.042724609375, "learning_rate": 1.991568022075451e-05, "loss": 4.4057, "step": 3400 }, { "epoch": 0.0870406969709192, "grad_norm": 0.044189453125, "learning_rate": 1.9914855544774195e-05, "loss": 4.4503, "step": 3410 }, { "epoch": 0.08744403662324043, "grad_norm": 0.043212890625, "learning_rate": 1.991402687283123e-05, "loss": 4.4968, "step": 3420 }, { "epoch": 0.08784737627556165, "grad_norm": 0.045166015625, "learning_rate": 1.9913194205259595e-05, "loss": 4.4642, "step": 3430 }, { "epoch": 0.08825071592788288, "grad_norm": 0.0439453125, "learning_rate": 1.9912357542394873e-05, "loss": 4.4283, "step": 3440 }, { "epoch": 0.08865405558020409, "grad_norm": 0.0458984375, "learning_rate": 1.9911516884574262e-05, "loss": 4.4776, "step": 3450 }, { "epoch": 0.08905739523252532, "grad_norm": 0.045166015625, "learning_rate": 1.9910672232136578e-05, "loss": 4.4578, "step": 3460 }, { "epoch": 0.08946073488484653, "grad_norm": 0.04443359375, "learning_rate": 1.990982358542223e-05, "loss": 4.4718, "step": 3470 }, { "epoch": 0.08986407453716774, "grad_norm": 0.043212890625, "learning_rate": 1.9908970944773255e-05, "loss": 4.4575, "step": 3480 }, { "epoch": 0.09026741418948897, "grad_norm": 0.04296875, "learning_rate": 1.9908114310533285e-05, "loss": 4.4147, "step": 3490 }, { "epoch": 0.09067075384181018, "grad_norm": 0.048095703125, "learning_rate": 1.990725368304757e-05, "loss": 4.4363, "step": 3500 }, { "epoch": 0.09107409349413141, "grad_norm": 0.0439453125, "learning_rate": 1.990638906266297e-05, "loss": 4.4575, "step": 3510 }, { "epoch": 0.09147743314645262, "grad_norm": 0.043212890625, "learning_rate": 1.990552044972794e-05, "loss": 4.4545, "step": 3520 }, { "epoch": 0.09188077279877385, "grad_norm": 0.044189453125, "learning_rate": 1.9904647844592572e-05, "loss": 4.4439, "step": 3530 }, { "epoch": 0.09228411245109507, "grad_norm": 0.043701171875, "learning_rate": 1.9903771247608535e-05, "loss": 4.4192, "step": 3540 }, { "epoch": 0.09268745210341629, "grad_norm": 0.044677734375, "learning_rate": 1.9902890659129125e-05, "loss": 4.4251, "step": 3550 }, { "epoch": 0.0930907917557375, "grad_norm": 0.041259765625, "learning_rate": 1.990200607950925e-05, "loss": 4.4339, "step": 3560 }, { "epoch": 0.09349413140805872, "grad_norm": 0.04541015625, "learning_rate": 1.9901117509105417e-05, "loss": 4.4635, "step": 3570 }, { "epoch": 0.09389747106037995, "grad_norm": 0.044921875, "learning_rate": 1.990022494827574e-05, "loss": 4.4255, "step": 3580 }, { "epoch": 0.09430081071270116, "grad_norm": 0.044677734375, "learning_rate": 1.9899328397379955e-05, "loss": 4.4412, "step": 3590 }, { "epoch": 0.09470415036502239, "grad_norm": 0.042724609375, "learning_rate": 1.989842785677939e-05, "loss": 4.437, "step": 3600 }, { "epoch": 0.0951074900173436, "grad_norm": 0.04541015625, "learning_rate": 1.9897523326836987e-05, "loss": 4.4136, "step": 3610 }, { "epoch": 0.09551082966966483, "grad_norm": 0.047607421875, "learning_rate": 1.98966148079173e-05, "loss": 4.4353, "step": 3620 }, { "epoch": 0.09591416932198604, "grad_norm": 0.044677734375, "learning_rate": 1.989570230038649e-05, "loss": 4.4331, "step": 3630 }, { "epoch": 0.09631750897430727, "grad_norm": 0.043212890625, "learning_rate": 1.989478580461232e-05, "loss": 4.4311, "step": 3640 }, { "epoch": 0.09672084862662848, "grad_norm": 0.04443359375, "learning_rate": 1.9893865320964162e-05, "loss": 4.3961, "step": 3650 }, { "epoch": 0.0971241882789497, "grad_norm": 0.044921875, "learning_rate": 1.9892940849812997e-05, "loss": 4.4645, "step": 3660 }, { "epoch": 0.09752752793127092, "grad_norm": 0.046142578125, "learning_rate": 1.9892012391531413e-05, "loss": 4.495, "step": 3670 }, { "epoch": 0.09793086758359214, "grad_norm": 0.044677734375, "learning_rate": 1.989107994649361e-05, "loss": 4.4074, "step": 3680 }, { "epoch": 0.09833420723591337, "grad_norm": 0.04638671875, "learning_rate": 1.989014351507538e-05, "loss": 4.441, "step": 3690 }, { "epoch": 0.09873754688823458, "grad_norm": 0.045654296875, "learning_rate": 1.988920309765413e-05, "loss": 4.446, "step": 3700 }, { "epoch": 0.0991408865405558, "grad_norm": 0.045166015625, "learning_rate": 1.9888258694608886e-05, "loss": 4.4378, "step": 3710 }, { "epoch": 0.09954422619287702, "grad_norm": 0.044189453125, "learning_rate": 1.988731030632026e-05, "loss": 4.4068, "step": 3720 }, { "epoch": 0.09994756584519825, "grad_norm": 0.0458984375, "learning_rate": 1.988635793317048e-05, "loss": 4.4587, "step": 3730 }, { "epoch": 0.10035090549751946, "grad_norm": 0.042236328125, "learning_rate": 1.9885401575543384e-05, "loss": 4.4586, "step": 3740 }, { "epoch": 0.10075424514984069, "grad_norm": 0.044677734375, "learning_rate": 1.98844412338244e-05, "loss": 4.4342, "step": 3750 }, { "epoch": 0.1011575848021619, "grad_norm": 0.04638671875, "learning_rate": 1.9883476908400587e-05, "loss": 4.4647, "step": 3760 }, { "epoch": 0.10156092445448311, "grad_norm": 0.04345703125, "learning_rate": 1.9882508599660583e-05, "loss": 4.4362, "step": 3770 }, { "epoch": 0.10196426410680434, "grad_norm": 0.04248046875, "learning_rate": 1.9881536307994645e-05, "loss": 4.4402, "step": 3780 }, { "epoch": 0.10236760375912556, "grad_norm": 0.0439453125, "learning_rate": 1.9880560033794637e-05, "loss": 4.4238, "step": 3790 }, { "epoch": 0.10277094341144678, "grad_norm": 0.04443359375, "learning_rate": 1.9879579777454027e-05, "loss": 4.4556, "step": 3800 }, { "epoch": 0.103174283063768, "grad_norm": 0.049560546875, "learning_rate": 1.987859553936788e-05, "loss": 4.4287, "step": 3810 }, { "epoch": 0.10357762271608922, "grad_norm": 0.045654296875, "learning_rate": 1.9877607319932872e-05, "loss": 4.421, "step": 3820 }, { "epoch": 0.10398096236841044, "grad_norm": 0.04638671875, "learning_rate": 1.9876615119547286e-05, "loss": 4.4607, "step": 3830 }, { "epoch": 0.10438430202073166, "grad_norm": 0.04638671875, "learning_rate": 1.9875618938611008e-05, "loss": 4.4433, "step": 3840 }, { "epoch": 0.10478764167305288, "grad_norm": 0.04443359375, "learning_rate": 1.987461877752552e-05, "loss": 4.4713, "step": 3850 }, { "epoch": 0.10519098132537409, "grad_norm": 0.043701171875, "learning_rate": 1.9873614636693918e-05, "loss": 4.4631, "step": 3860 }, { "epoch": 0.10559432097769532, "grad_norm": 0.045166015625, "learning_rate": 1.9872606516520898e-05, "loss": 4.3911, "step": 3870 }, { "epoch": 0.10599766063001653, "grad_norm": 0.0458984375, "learning_rate": 1.9871594417412763e-05, "loss": 4.451, "step": 3880 }, { "epoch": 0.10640100028233776, "grad_norm": 0.04541015625, "learning_rate": 1.9870578339777416e-05, "loss": 4.4256, "step": 3890 }, { "epoch": 0.10680433993465897, "grad_norm": 0.04541015625, "learning_rate": 1.9869558284024363e-05, "loss": 4.4336, "step": 3900 }, { "epoch": 0.1072076795869802, "grad_norm": 0.044921875, "learning_rate": 1.9868534250564713e-05, "loss": 4.464, "step": 3910 }, { "epoch": 0.10761101923930141, "grad_norm": 0.042236328125, "learning_rate": 1.9867506239811188e-05, "loss": 4.4258, "step": 3920 }, { "epoch": 0.10801435889162264, "grad_norm": 0.04736328125, "learning_rate": 1.9866474252178096e-05, "loss": 4.4037, "step": 3930 }, { "epoch": 0.10841769854394386, "grad_norm": 0.04345703125, "learning_rate": 1.9865438288081366e-05, "loss": 4.383, "step": 3940 }, { "epoch": 0.10882103819626507, "grad_norm": 0.0439453125, "learning_rate": 1.986439834793851e-05, "loss": 4.4667, "step": 3950 }, { "epoch": 0.1092243778485863, "grad_norm": 0.044677734375, "learning_rate": 1.986335443216866e-05, "loss": 4.4176, "step": 3960 }, { "epoch": 0.10962771750090751, "grad_norm": 0.044189453125, "learning_rate": 1.9862306541192536e-05, "loss": 4.4293, "step": 3970 }, { "epoch": 0.11003105715322874, "grad_norm": 0.047119140625, "learning_rate": 1.9861254675432478e-05, "loss": 4.4302, "step": 3980 }, { "epoch": 0.11043439680554995, "grad_norm": 0.04541015625, "learning_rate": 1.9860198835312408e-05, "loss": 4.4271, "step": 3990 }, { "epoch": 0.11083773645787118, "grad_norm": 0.044677734375, "learning_rate": 1.985913902125786e-05, "loss": 4.4409, "step": 4000 }, { "epoch": 0.11124107611019239, "grad_norm": 0.04541015625, "learning_rate": 1.9858075233695974e-05, "loss": 4.4272, "step": 4010 }, { "epoch": 0.11164441576251362, "grad_norm": 0.0439453125, "learning_rate": 1.9857007473055482e-05, "loss": 4.426, "step": 4020 }, { "epoch": 0.11204775541483483, "grad_norm": 0.0439453125, "learning_rate": 1.9855935739766724e-05, "loss": 4.4782, "step": 4030 }, { "epoch": 0.11245109506715605, "grad_norm": 0.047119140625, "learning_rate": 1.9854860034261635e-05, "loss": 4.4529, "step": 4040 }, { "epoch": 0.11285443471947727, "grad_norm": 0.044677734375, "learning_rate": 1.9853780356973757e-05, "loss": 4.413, "step": 4050 }, { "epoch": 0.11325777437179849, "grad_norm": 0.045166015625, "learning_rate": 1.9852696708338224e-05, "loss": 4.4179, "step": 4060 }, { "epoch": 0.11366111402411971, "grad_norm": 0.04443359375, "learning_rate": 1.9851609088791783e-05, "loss": 4.4679, "step": 4070 }, { "epoch": 0.11406445367644093, "grad_norm": 0.04345703125, "learning_rate": 1.9850517498772775e-05, "loss": 4.4556, "step": 4080 }, { "epoch": 0.11446779332876215, "grad_norm": 0.043701171875, "learning_rate": 1.9849421938721137e-05, "loss": 4.4282, "step": 4090 }, { "epoch": 0.11487113298108337, "grad_norm": 0.0439453125, "learning_rate": 1.9848322409078412e-05, "loss": 4.4264, "step": 4100 }, { "epoch": 0.1152744726334046, "grad_norm": 0.045166015625, "learning_rate": 1.9847218910287743e-05, "loss": 4.4565, "step": 4110 }, { "epoch": 0.11567781228572581, "grad_norm": 0.04736328125, "learning_rate": 1.9846111442793866e-05, "loss": 4.4284, "step": 4120 }, { "epoch": 0.11608115193804702, "grad_norm": 0.045166015625, "learning_rate": 1.984500000704313e-05, "loss": 4.4254, "step": 4130 }, { "epoch": 0.11648449159036825, "grad_norm": 0.0458984375, "learning_rate": 1.9843884603483464e-05, "loss": 4.4437, "step": 4140 }, { "epoch": 0.11688783124268946, "grad_norm": 0.0439453125, "learning_rate": 1.9842765232564415e-05, "loss": 4.4201, "step": 4150 }, { "epoch": 0.11729117089501069, "grad_norm": 0.044921875, "learning_rate": 1.9841641894737113e-05, "loss": 4.442, "step": 4160 }, { "epoch": 0.1176945105473319, "grad_norm": 0.044189453125, "learning_rate": 1.98405145904543e-05, "loss": 4.4023, "step": 4170 }, { "epoch": 0.11809785019965313, "grad_norm": 0.044921875, "learning_rate": 1.9839383320170308e-05, "loss": 4.4158, "step": 4180 }, { "epoch": 0.11850118985197435, "grad_norm": 0.0439453125, "learning_rate": 1.9838248084341077e-05, "loss": 4.4407, "step": 4190 }, { "epoch": 0.11890452950429557, "grad_norm": 0.044189453125, "learning_rate": 1.9837108883424128e-05, "loss": 4.4053, "step": 4200 }, { "epoch": 0.11930786915661679, "grad_norm": 0.044189453125, "learning_rate": 1.98359657178786e-05, "loss": 4.453, "step": 4210 }, { "epoch": 0.11971120880893801, "grad_norm": 0.0439453125, "learning_rate": 1.9834818588165216e-05, "loss": 4.428, "step": 4220 }, { "epoch": 0.12011454846125923, "grad_norm": 0.046875, "learning_rate": 1.98336674947463e-05, "loss": 4.4208, "step": 4230 }, { "epoch": 0.12051788811358044, "grad_norm": 0.0439453125, "learning_rate": 1.9832512438085776e-05, "loss": 4.4668, "step": 4240 }, { "epoch": 0.12092122776590167, "grad_norm": 0.04345703125, "learning_rate": 1.9831353418649168e-05, "loss": 4.4151, "step": 4250 }, { "epoch": 0.12132456741822288, "grad_norm": 0.0478515625, "learning_rate": 1.9830190436903587e-05, "loss": 4.4326, "step": 4260 }, { "epoch": 0.12172790707054411, "grad_norm": 0.044677734375, "learning_rate": 1.982902349331775e-05, "loss": 4.4254, "step": 4270 }, { "epoch": 0.12213124672286532, "grad_norm": 0.043212890625, "learning_rate": 1.9827852588361966e-05, "loss": 4.46, "step": 4280 }, { "epoch": 0.12253458637518655, "grad_norm": 0.04541015625, "learning_rate": 1.982667772250815e-05, "loss": 4.4317, "step": 4290 }, { "epoch": 0.12293792602750776, "grad_norm": 0.044677734375, "learning_rate": 1.9825498896229793e-05, "loss": 4.4348, "step": 4300 }, { "epoch": 0.12334126567982899, "grad_norm": 0.043701171875, "learning_rate": 1.9824316110002e-05, "loss": 4.4246, "step": 4310 }, { "epoch": 0.1237446053321502, "grad_norm": 0.0439453125, "learning_rate": 1.9823129364301474e-05, "loss": 4.4576, "step": 4320 }, { "epoch": 0.12414794498447142, "grad_norm": 0.04541015625, "learning_rate": 1.9821938659606496e-05, "loss": 4.4288, "step": 4330 }, { "epoch": 0.12455128463679264, "grad_norm": 0.0439453125, "learning_rate": 1.9820743996396957e-05, "loss": 4.4153, "step": 4340 }, { "epoch": 0.12495462428911386, "grad_norm": 0.046142578125, "learning_rate": 1.981954537515434e-05, "loss": 4.4412, "step": 4350 }, { "epoch": 0.12535796394143509, "grad_norm": 0.0458984375, "learning_rate": 1.9818342796361723e-05, "loss": 4.4165, "step": 4360 }, { "epoch": 0.1257613035937563, "grad_norm": 0.046630859375, "learning_rate": 1.981713626050378e-05, "loss": 4.4294, "step": 4370 }, { "epoch": 0.1261646432460775, "grad_norm": 0.045166015625, "learning_rate": 1.9815925768066776e-05, "loss": 4.4297, "step": 4380 }, { "epoch": 0.12656798289839874, "grad_norm": 0.04345703125, "learning_rate": 1.9814711319538577e-05, "loss": 4.4139, "step": 4390 }, { "epoch": 0.12697132255071997, "grad_norm": 0.04443359375, "learning_rate": 1.9813492915408632e-05, "loss": 4.4248, "step": 4400 }, { "epoch": 0.1273746622030412, "grad_norm": 0.04541015625, "learning_rate": 1.9812270556167997e-05, "loss": 4.4137, "step": 4410 }, { "epoch": 0.1277780018553624, "grad_norm": 0.0439453125, "learning_rate": 1.981104424230932e-05, "loss": 4.4234, "step": 4420 }, { "epoch": 0.12818134150768362, "grad_norm": 0.0458984375, "learning_rate": 1.9809813974326834e-05, "loss": 4.4244, "step": 4430 }, { "epoch": 0.12858468116000485, "grad_norm": 0.04345703125, "learning_rate": 1.9808579752716372e-05, "loss": 4.4196, "step": 4440 }, { "epoch": 0.12898802081232605, "grad_norm": 0.046630859375, "learning_rate": 1.9807341577975362e-05, "loss": 4.4234, "step": 4450 }, { "epoch": 0.12939136046464728, "grad_norm": 0.04541015625, "learning_rate": 1.980609945060282e-05, "loss": 4.4298, "step": 4460 }, { "epoch": 0.1297947001169685, "grad_norm": 0.0458984375, "learning_rate": 1.9804853371099356e-05, "loss": 4.4431, "step": 4470 }, { "epoch": 0.13019803976928973, "grad_norm": 0.045654296875, "learning_rate": 1.9803603339967183e-05, "loss": 4.4309, "step": 4480 }, { "epoch": 0.13060137942161093, "grad_norm": 0.04443359375, "learning_rate": 1.980234935771009e-05, "loss": 4.4654, "step": 4490 }, { "epoch": 0.13100471907393216, "grad_norm": 0.046875, "learning_rate": 1.9801091424833476e-05, "loss": 4.4307, "step": 4500 }, { "epoch": 0.13140805872625339, "grad_norm": 0.044921875, "learning_rate": 1.9799829541844312e-05, "loss": 4.4234, "step": 4510 }, { "epoch": 0.13181139837857458, "grad_norm": 0.044677734375, "learning_rate": 1.979856370925118e-05, "loss": 4.46, "step": 4520 }, { "epoch": 0.1322147380308958, "grad_norm": 0.0439453125, "learning_rate": 1.979729392756424e-05, "loss": 4.4555, "step": 4530 }, { "epoch": 0.13261807768321704, "grad_norm": 0.043701171875, "learning_rate": 1.9796020197295257e-05, "loss": 4.4136, "step": 4540 }, { "epoch": 0.13302141733553827, "grad_norm": 0.04638671875, "learning_rate": 1.979474251895757e-05, "loss": 4.4088, "step": 4550 }, { "epoch": 0.13342475698785947, "grad_norm": 0.04248046875, "learning_rate": 1.9793460893066125e-05, "loss": 4.4373, "step": 4560 }, { "epoch": 0.1338280966401807, "grad_norm": 0.043212890625, "learning_rate": 1.979217532013745e-05, "loss": 4.4035, "step": 4570 }, { "epoch": 0.13423143629250192, "grad_norm": 0.044189453125, "learning_rate": 1.9790885800689674e-05, "loss": 4.4154, "step": 4580 }, { "epoch": 0.13463477594482315, "grad_norm": 0.04541015625, "learning_rate": 1.9789592335242497e-05, "loss": 4.4544, "step": 4590 }, { "epoch": 0.13503811559714435, "grad_norm": 0.046875, "learning_rate": 1.978829492431723e-05, "loss": 4.431, "step": 4600 }, { "epoch": 0.13544145524946558, "grad_norm": 0.043701171875, "learning_rate": 1.9786993568436767e-05, "loss": 4.4158, "step": 4610 }, { "epoch": 0.1358447949017868, "grad_norm": 0.045654296875, "learning_rate": 1.9785688268125586e-05, "loss": 4.4444, "step": 4620 }, { "epoch": 0.136248134554108, "grad_norm": 0.043212890625, "learning_rate": 1.978437902390976e-05, "loss": 4.4066, "step": 4630 }, { "epoch": 0.13665147420642923, "grad_norm": 0.046142578125, "learning_rate": 1.9783065836316955e-05, "loss": 4.464, "step": 4640 }, { "epoch": 0.13705481385875046, "grad_norm": 0.0439453125, "learning_rate": 1.9781748705876413e-05, "loss": 4.4413, "step": 4650 }, { "epoch": 0.13745815351107168, "grad_norm": 0.046142578125, "learning_rate": 1.9780427633118986e-05, "loss": 4.4317, "step": 4660 }, { "epoch": 0.13786149316339288, "grad_norm": 0.042724609375, "learning_rate": 1.9779102618577093e-05, "loss": 4.4638, "step": 4670 }, { "epoch": 0.1382648328157141, "grad_norm": 0.045654296875, "learning_rate": 1.9777773662784757e-05, "loss": 4.4528, "step": 4680 }, { "epoch": 0.13866817246803534, "grad_norm": 0.04296875, "learning_rate": 1.9776440766277584e-05, "loss": 4.4481, "step": 4690 }, { "epoch": 0.13907151212035654, "grad_norm": 0.04296875, "learning_rate": 1.9775103929592765e-05, "loss": 4.4317, "step": 4700 }, { "epoch": 0.13947485177267777, "grad_norm": 0.044189453125, "learning_rate": 1.9773763153269086e-05, "loss": 4.4441, "step": 4710 }, { "epoch": 0.139878191424999, "grad_norm": 0.04638671875, "learning_rate": 1.977241843784692e-05, "loss": 4.4156, "step": 4720 }, { "epoch": 0.14028153107732022, "grad_norm": 0.045166015625, "learning_rate": 1.9771069783868216e-05, "loss": 4.4555, "step": 4730 }, { "epoch": 0.14068487072964142, "grad_norm": 0.0439453125, "learning_rate": 1.976971719187652e-05, "loss": 4.4137, "step": 4740 }, { "epoch": 0.14108821038196265, "grad_norm": 0.045654296875, "learning_rate": 1.9768360662416976e-05, "loss": 4.4317, "step": 4750 }, { "epoch": 0.14149155003428388, "grad_norm": 0.04248046875, "learning_rate": 1.976700019603629e-05, "loss": 4.4435, "step": 4760 }, { "epoch": 0.1418948896866051, "grad_norm": 0.045166015625, "learning_rate": 1.9765635793282776e-05, "loss": 4.4111, "step": 4770 }, { "epoch": 0.1422982293389263, "grad_norm": 0.04345703125, "learning_rate": 1.976426745470632e-05, "loss": 4.4148, "step": 4780 }, { "epoch": 0.14270156899124753, "grad_norm": 0.043701171875, "learning_rate": 1.9762895180858407e-05, "loss": 4.4427, "step": 4790 }, { "epoch": 0.14310490864356876, "grad_norm": 0.049560546875, "learning_rate": 1.9761518972292094e-05, "loss": 4.3782, "step": 4800 }, { "epoch": 0.14350824829588996, "grad_norm": 0.047607421875, "learning_rate": 1.976013882956204e-05, "loss": 4.4193, "step": 4810 }, { "epoch": 0.14391158794821118, "grad_norm": 0.041748046875, "learning_rate": 1.9758754753224477e-05, "loss": 4.4211, "step": 4820 }, { "epoch": 0.1443149276005324, "grad_norm": 0.04541015625, "learning_rate": 1.975736674383722e-05, "loss": 4.3875, "step": 4830 }, { "epoch": 0.14471826725285364, "grad_norm": 0.044189453125, "learning_rate": 1.9755974801959686e-05, "loss": 4.4681, "step": 4840 }, { "epoch": 0.14512160690517484, "grad_norm": 0.045166015625, "learning_rate": 1.9754578928152858e-05, "loss": 4.4445, "step": 4850 }, { "epoch": 0.14552494655749607, "grad_norm": 0.044189453125, "learning_rate": 1.975317912297932e-05, "loss": 4.4988, "step": 4860 }, { "epoch": 0.1459282862098173, "grad_norm": 0.042236328125, "learning_rate": 1.9751775387003222e-05, "loss": 4.4047, "step": 4870 }, { "epoch": 0.14633162586213852, "grad_norm": 0.04150390625, "learning_rate": 1.9750367720790316e-05, "loss": 4.4024, "step": 4880 }, { "epoch": 0.14673496551445972, "grad_norm": 0.041748046875, "learning_rate": 1.9748956124907934e-05, "loss": 4.4068, "step": 4890 }, { "epoch": 0.14713830516678095, "grad_norm": 0.0498046875, "learning_rate": 1.9747540599924983e-05, "loss": 4.4456, "step": 4900 }, { "epoch": 0.14754164481910217, "grad_norm": 0.04541015625, "learning_rate": 1.9746121146411954e-05, "loss": 4.436, "step": 4910 }, { "epoch": 0.14794498447142337, "grad_norm": 0.044189453125, "learning_rate": 1.9744697764940936e-05, "loss": 4.4352, "step": 4920 }, { "epoch": 0.1483483241237446, "grad_norm": 0.044921875, "learning_rate": 1.9743270456085588e-05, "loss": 4.4355, "step": 4930 }, { "epoch": 0.14875166377606583, "grad_norm": 0.046142578125, "learning_rate": 1.974183922042115e-05, "loss": 4.4273, "step": 4940 }, { "epoch": 0.14915500342838706, "grad_norm": 0.044677734375, "learning_rate": 1.9740404058524457e-05, "loss": 4.4322, "step": 4950 }, { "epoch": 0.14955834308070826, "grad_norm": 0.041015625, "learning_rate": 1.9738964970973912e-05, "loss": 4.4242, "step": 4960 }, { "epoch": 0.14996168273302948, "grad_norm": 0.0439453125, "learning_rate": 1.973752195834952e-05, "loss": 4.4411, "step": 4970 }, { "epoch": 0.1503650223853507, "grad_norm": 0.04443359375, "learning_rate": 1.973607502123284e-05, "loss": 4.449, "step": 4980 }, { "epoch": 0.1507683620376719, "grad_norm": 0.043701171875, "learning_rate": 1.9734624160207034e-05, "loss": 4.4258, "step": 4990 }, { "epoch": 0.15117170168999314, "grad_norm": 0.04345703125, "learning_rate": 1.9733169375856845e-05, "loss": 4.4111, "step": 5000 }, { "epoch": 0.15157504134231436, "grad_norm": 0.04296875, "learning_rate": 1.9731710668768582e-05, "loss": 4.4278, "step": 5010 }, { "epoch": 0.1519783809946356, "grad_norm": 0.042236328125, "learning_rate": 1.9730248039530154e-05, "loss": 4.4062, "step": 5020 }, { "epoch": 0.1523817206469568, "grad_norm": 0.04736328125, "learning_rate": 1.9728781488731036e-05, "loss": 4.4346, "step": 5030 }, { "epoch": 0.15278506029927802, "grad_norm": 0.0458984375, "learning_rate": 1.9727311016962285e-05, "loss": 4.4375, "step": 5040 }, { "epoch": 0.15318839995159925, "grad_norm": 0.0458984375, "learning_rate": 1.972583662481655e-05, "loss": 4.4236, "step": 5050 }, { "epoch": 0.15359173960392047, "grad_norm": 0.043212890625, "learning_rate": 1.9724358312888047e-05, "loss": 4.4345, "step": 5060 }, { "epoch": 0.15399507925624167, "grad_norm": 0.046142578125, "learning_rate": 1.9722876081772576e-05, "loss": 4.4181, "step": 5070 }, { "epoch": 0.1543984189085629, "grad_norm": 0.044677734375, "learning_rate": 1.9721389932067524e-05, "loss": 4.4428, "step": 5080 }, { "epoch": 0.15480175856088413, "grad_norm": 0.04443359375, "learning_rate": 1.9719899864371842e-05, "loss": 4.407, "step": 5090 }, { "epoch": 0.15520509821320533, "grad_norm": 0.045654296875, "learning_rate": 1.971840587928608e-05, "loss": 4.4225, "step": 5100 }, { "epoch": 0.15560843786552656, "grad_norm": 0.04248046875, "learning_rate": 1.971690797741234e-05, "loss": 4.4189, "step": 5110 }, { "epoch": 0.15601177751784778, "grad_norm": 0.044921875, "learning_rate": 1.9715406159354332e-05, "loss": 4.454, "step": 5120 }, { "epoch": 0.156415117170169, "grad_norm": 0.04248046875, "learning_rate": 1.9713900425717324e-05, "loss": 4.4641, "step": 5130 }, { "epoch": 0.1568184568224902, "grad_norm": 0.045166015625, "learning_rate": 1.971239077710817e-05, "loss": 4.4274, "step": 5140 }, { "epoch": 0.15722179647481144, "grad_norm": 0.044677734375, "learning_rate": 1.97108772141353e-05, "loss": 4.4065, "step": 5150 }, { "epoch": 0.15762513612713266, "grad_norm": 0.0458984375, "learning_rate": 1.970935973740872e-05, "loss": 4.4587, "step": 5160 }, { "epoch": 0.1580284757794539, "grad_norm": 0.045166015625, "learning_rate": 1.970783834754002e-05, "loss": 4.4261, "step": 5170 }, { "epoch": 0.1584318154317751, "grad_norm": 0.043701171875, "learning_rate": 1.970631304514236e-05, "loss": 4.4182, "step": 5180 }, { "epoch": 0.15883515508409632, "grad_norm": 0.044189453125, "learning_rate": 1.970478383083048e-05, "loss": 4.4099, "step": 5190 }, { "epoch": 0.15923849473641755, "grad_norm": 0.043701171875, "learning_rate": 1.970325070522069e-05, "loss": 4.4178, "step": 5200 }, { "epoch": 0.15964183438873875, "grad_norm": 0.044677734375, "learning_rate": 1.9701713668930894e-05, "loss": 4.4069, "step": 5210 }, { "epoch": 0.16004517404105997, "grad_norm": 0.04248046875, "learning_rate": 1.970017272258055e-05, "loss": 4.4532, "step": 5220 }, { "epoch": 0.1604485136933812, "grad_norm": 0.046142578125, "learning_rate": 1.9698627866790704e-05, "loss": 4.39, "step": 5230 }, { "epoch": 0.16085185334570243, "grad_norm": 0.046630859375, "learning_rate": 1.9697079102183982e-05, "loss": 4.4278, "step": 5240 }, { "epoch": 0.16125519299802363, "grad_norm": 0.04248046875, "learning_rate": 1.9695526429384575e-05, "loss": 4.4071, "step": 5250 }, { "epoch": 0.16165853265034485, "grad_norm": 0.042724609375, "learning_rate": 1.9693969849018254e-05, "loss": 4.4265, "step": 5260 }, { "epoch": 0.16206187230266608, "grad_norm": 0.04345703125, "learning_rate": 1.9692409361712358e-05, "loss": 4.4504, "step": 5270 }, { "epoch": 0.16246521195498728, "grad_norm": 0.04638671875, "learning_rate": 1.9690844968095817e-05, "loss": 4.4269, "step": 5280 }, { "epoch": 0.1628685516073085, "grad_norm": 0.044189453125, "learning_rate": 1.968927666879912e-05, "loss": 4.436, "step": 5290 }, { "epoch": 0.16327189125962974, "grad_norm": 0.043212890625, "learning_rate": 1.9687704464454337e-05, "loss": 4.4163, "step": 5300 }, { "epoch": 0.16367523091195096, "grad_norm": 0.046630859375, "learning_rate": 1.968612835569511e-05, "loss": 4.395, "step": 5310 }, { "epoch": 0.16407857056427216, "grad_norm": 0.046142578125, "learning_rate": 1.968454834315665e-05, "loss": 4.4506, "step": 5320 }, { "epoch": 0.1644819102165934, "grad_norm": 0.043212890625, "learning_rate": 1.968296442747575e-05, "loss": 4.4192, "step": 5330 }, { "epoch": 0.16488524986891462, "grad_norm": 0.045654296875, "learning_rate": 1.9681376609290774e-05, "loss": 4.4294, "step": 5340 }, { "epoch": 0.16528858952123585, "grad_norm": 0.045654296875, "learning_rate": 1.9679784889241653e-05, "loss": 4.4436, "step": 5350 }, { "epoch": 0.16569192917355705, "grad_norm": 0.04296875, "learning_rate": 1.96781892679699e-05, "loss": 4.435, "step": 5360 }, { "epoch": 0.16609526882587827, "grad_norm": 0.044189453125, "learning_rate": 1.9676589746118588e-05, "loss": 4.4361, "step": 5370 }, { "epoch": 0.1664986084781995, "grad_norm": 0.044189453125, "learning_rate": 1.9674986324332367e-05, "loss": 4.4011, "step": 5380 }, { "epoch": 0.1669019481305207, "grad_norm": 0.045166015625, "learning_rate": 1.967337900325747e-05, "loss": 4.4528, "step": 5390 }, { "epoch": 0.16730528778284193, "grad_norm": 0.047119140625, "learning_rate": 1.967176778354169e-05, "loss": 4.4338, "step": 5400 }, { "epoch": 0.16770862743516315, "grad_norm": 0.044921875, "learning_rate": 1.9670152665834384e-05, "loss": 4.4473, "step": 5410 }, { "epoch": 0.16811196708748438, "grad_norm": 0.0458984375, "learning_rate": 1.9668533650786503e-05, "loss": 4.4477, "step": 5420 }, { "epoch": 0.16851530673980558, "grad_norm": 0.043212890625, "learning_rate": 1.9666910739050548e-05, "loss": 4.436, "step": 5430 }, { "epoch": 0.1689186463921268, "grad_norm": 0.042236328125, "learning_rate": 1.9665283931280592e-05, "loss": 4.3926, "step": 5440 }, { "epoch": 0.16932198604444804, "grad_norm": 0.045654296875, "learning_rate": 1.9663653228132293e-05, "loss": 4.4203, "step": 5450 }, { "epoch": 0.16972532569676924, "grad_norm": 0.0439453125, "learning_rate": 1.9662018630262866e-05, "loss": 4.3953, "step": 5460 }, { "epoch": 0.17012866534909046, "grad_norm": 0.0439453125, "learning_rate": 1.96603801383311e-05, "loss": 4.4506, "step": 5470 }, { "epoch": 0.1705320050014117, "grad_norm": 0.04296875, "learning_rate": 1.9658737752997355e-05, "loss": 4.3835, "step": 5480 }, { "epoch": 0.17093534465373292, "grad_norm": 0.04638671875, "learning_rate": 1.9657091474923554e-05, "loss": 4.4239, "step": 5490 }, { "epoch": 0.17133868430605412, "grad_norm": 0.045654296875, "learning_rate": 1.9655441304773192e-05, "loss": 4.4173, "step": 5500 }, { "epoch": 0.17174202395837534, "grad_norm": 0.04296875, "learning_rate": 1.9653787243211337e-05, "loss": 4.4073, "step": 5510 }, { "epoch": 0.17214536361069657, "grad_norm": 0.044921875, "learning_rate": 1.9652129290904623e-05, "loss": 4.4278, "step": 5520 }, { "epoch": 0.1725487032630178, "grad_norm": 0.043212890625, "learning_rate": 1.9650467448521246e-05, "loss": 4.445, "step": 5530 }, { "epoch": 0.172952042915339, "grad_norm": 0.044921875, "learning_rate": 1.9648801716730982e-05, "loss": 4.4233, "step": 5540 }, { "epoch": 0.17335538256766023, "grad_norm": 0.04443359375, "learning_rate": 1.964713209620516e-05, "loss": 4.4504, "step": 5550 }, { "epoch": 0.17375872221998145, "grad_norm": 0.0458984375, "learning_rate": 1.9645458587616685e-05, "loss": 4.4466, "step": 5560 }, { "epoch": 0.17416206187230265, "grad_norm": 0.045166015625, "learning_rate": 1.964378119164003e-05, "loss": 4.4244, "step": 5570 }, { "epoch": 0.17456540152462388, "grad_norm": 0.044921875, "learning_rate": 1.9642099908951232e-05, "loss": 4.465, "step": 5580 }, { "epoch": 0.1749687411769451, "grad_norm": 0.0458984375, "learning_rate": 1.964041474022789e-05, "loss": 4.3689, "step": 5590 }, { "epoch": 0.17537208082926634, "grad_norm": 0.042236328125, "learning_rate": 1.9638725686149184e-05, "loss": 4.4329, "step": 5600 }, { "epoch": 0.17577542048158754, "grad_norm": 0.04345703125, "learning_rate": 1.9637032747395838e-05, "loss": 4.3826, "step": 5610 }, { "epoch": 0.17617876013390876, "grad_norm": 0.043701171875, "learning_rate": 1.9635335924650158e-05, "loss": 4.4311, "step": 5620 }, { "epoch": 0.17658209978623, "grad_norm": 0.044921875, "learning_rate": 1.9633635218596016e-05, "loss": 4.4464, "step": 5630 }, { "epoch": 0.17698543943855122, "grad_norm": 0.04638671875, "learning_rate": 1.963193062991884e-05, "loss": 4.3879, "step": 5640 }, { "epoch": 0.17738877909087242, "grad_norm": 0.0458984375, "learning_rate": 1.963022215930562e-05, "loss": 4.4326, "step": 5650 }, { "epoch": 0.17779211874319364, "grad_norm": 0.043212890625, "learning_rate": 1.962850980744493e-05, "loss": 4.4224, "step": 5660 }, { "epoch": 0.17819545839551487, "grad_norm": 0.044921875, "learning_rate": 1.9626793575026884e-05, "loss": 4.4249, "step": 5670 }, { "epoch": 0.17859879804783607, "grad_norm": 0.043212890625, "learning_rate": 1.9625073462743178e-05, "loss": 4.4149, "step": 5680 }, { "epoch": 0.1790021377001573, "grad_norm": 0.046142578125, "learning_rate": 1.962334947128706e-05, "loss": 4.4131, "step": 5690 }, { "epoch": 0.17940547735247853, "grad_norm": 0.046142578125, "learning_rate": 1.9621621601353354e-05, "loss": 4.4174, "step": 5700 }, { "epoch": 0.17980881700479975, "grad_norm": 0.04443359375, "learning_rate": 1.9619889853638434e-05, "loss": 4.4323, "step": 5710 }, { "epoch": 0.18021215665712095, "grad_norm": 0.04443359375, "learning_rate": 1.9618154228840244e-05, "loss": 4.3831, "step": 5720 }, { "epoch": 0.18061549630944218, "grad_norm": 0.04443359375, "learning_rate": 1.961641472765829e-05, "loss": 4.3819, "step": 5730 }, { "epoch": 0.1810188359617634, "grad_norm": 0.04345703125, "learning_rate": 1.9614671350793638e-05, "loss": 4.4086, "step": 5740 }, { "epoch": 0.1814221756140846, "grad_norm": 0.0439453125, "learning_rate": 1.9612924098948917e-05, "loss": 4.4515, "step": 5750 }, { "epoch": 0.18182551526640583, "grad_norm": 0.044921875, "learning_rate": 1.961117297282832e-05, "loss": 4.4329, "step": 5760 }, { "epoch": 0.18222885491872706, "grad_norm": 0.04638671875, "learning_rate": 1.96094179731376e-05, "loss": 4.3997, "step": 5770 }, { "epoch": 0.1826321945710483, "grad_norm": 0.043701171875, "learning_rate": 1.9607659100584068e-05, "loss": 4.4264, "step": 5780 }, { "epoch": 0.1830355342233695, "grad_norm": 0.0439453125, "learning_rate": 1.9605896355876597e-05, "loss": 4.4235, "step": 5790 }, { "epoch": 0.18343887387569072, "grad_norm": 0.050048828125, "learning_rate": 1.9604129739725626e-05, "loss": 4.4217, "step": 5800 }, { "epoch": 0.18384221352801194, "grad_norm": 0.0458984375, "learning_rate": 1.960235925284315e-05, "loss": 4.4086, "step": 5810 }, { "epoch": 0.18424555318033317, "grad_norm": 0.04296875, "learning_rate": 1.9600584895942723e-05, "loss": 4.4301, "step": 5820 }, { "epoch": 0.18464889283265437, "grad_norm": 0.044921875, "learning_rate": 1.9598806669739464e-05, "loss": 4.4433, "step": 5830 }, { "epoch": 0.1850522324849756, "grad_norm": 0.044921875, "learning_rate": 1.959702457495004e-05, "loss": 4.4294, "step": 5840 }, { "epoch": 0.18545557213729683, "grad_norm": 0.046875, "learning_rate": 1.9595238612292696e-05, "loss": 4.3839, "step": 5850 }, { "epoch": 0.18585891178961803, "grad_norm": 0.045654296875, "learning_rate": 1.9593448782487216e-05, "loss": 4.436, "step": 5860 }, { "epoch": 0.18626225144193925, "grad_norm": 0.046142578125, "learning_rate": 1.9591655086254952e-05, "loss": 4.422, "step": 5870 }, { "epoch": 0.18666559109426048, "grad_norm": 0.0419921875, "learning_rate": 1.9589857524318815e-05, "loss": 4.4399, "step": 5880 }, { "epoch": 0.1870689307465817, "grad_norm": 0.045166015625, "learning_rate": 1.9588056097403272e-05, "loss": 4.4534, "step": 5890 }, { "epoch": 0.1874722703989029, "grad_norm": 0.044921875, "learning_rate": 1.9586250806234347e-05, "loss": 4.4714, "step": 5900 }, { "epoch": 0.18787561005122413, "grad_norm": 0.04345703125, "learning_rate": 1.9584441651539627e-05, "loss": 4.4086, "step": 5910 }, { "epoch": 0.18827894970354536, "grad_norm": 0.043701171875, "learning_rate": 1.9582628634048245e-05, "loss": 4.4375, "step": 5920 }, { "epoch": 0.18868228935586656, "grad_norm": 0.04541015625, "learning_rate": 1.9580811754490905e-05, "loss": 4.4239, "step": 5930 }, { "epoch": 0.1890856290081878, "grad_norm": 0.04345703125, "learning_rate": 1.957899101359985e-05, "loss": 4.4065, "step": 5940 }, { "epoch": 0.18948896866050902, "grad_norm": 0.044189453125, "learning_rate": 1.9577166412108905e-05, "loss": 4.4035, "step": 5950 }, { "epoch": 0.18989230831283024, "grad_norm": 0.043701171875, "learning_rate": 1.957533795075342e-05, "loss": 4.4268, "step": 5960 }, { "epoch": 0.19029564796515144, "grad_norm": 0.044921875, "learning_rate": 1.9573505630270323e-05, "loss": 4.4312, "step": 5970 }, { "epoch": 0.19069898761747267, "grad_norm": 0.04443359375, "learning_rate": 1.9571669451398087e-05, "loss": 4.4135, "step": 5980 }, { "epoch": 0.1911023272697939, "grad_norm": 0.04296875, "learning_rate": 1.9569829414876747e-05, "loss": 4.4179, "step": 5990 }, { "epoch": 0.19150566692211513, "grad_norm": 0.04443359375, "learning_rate": 1.9567985521447884e-05, "loss": 4.4461, "step": 6000 }, { "epoch": 0.19190900657443632, "grad_norm": 0.044677734375, "learning_rate": 1.956613777185464e-05, "loss": 4.4158, "step": 6010 }, { "epoch": 0.19231234622675755, "grad_norm": 0.04541015625, "learning_rate": 1.9564286166841716e-05, "loss": 4.4099, "step": 6020 }, { "epoch": 0.19271568587907878, "grad_norm": 0.043212890625, "learning_rate": 1.9562430707155352e-05, "loss": 4.3912, "step": 6030 }, { "epoch": 0.19311902553139998, "grad_norm": 0.046875, "learning_rate": 1.956057139354335e-05, "loss": 4.4391, "step": 6040 }, { "epoch": 0.1935223651837212, "grad_norm": 0.0419921875, "learning_rate": 1.955870822675507e-05, "loss": 4.418, "step": 6050 }, { "epoch": 0.19392570483604243, "grad_norm": 0.04443359375, "learning_rate": 1.9556841207541416e-05, "loss": 4.4192, "step": 6060 }, { "epoch": 0.19432904448836366, "grad_norm": 0.04638671875, "learning_rate": 1.9554970336654854e-05, "loss": 4.4466, "step": 6070 }, { "epoch": 0.19473238414068486, "grad_norm": 0.043212890625, "learning_rate": 1.9553095614849392e-05, "loss": 4.4524, "step": 6080 }, { "epoch": 0.1951357237930061, "grad_norm": 0.050537109375, "learning_rate": 1.9551217042880596e-05, "loss": 4.408, "step": 6090 }, { "epoch": 0.19553906344532732, "grad_norm": 0.045654296875, "learning_rate": 1.9549334621505586e-05, "loss": 4.4387, "step": 6100 }, { "epoch": 0.19594240309764854, "grad_norm": 0.044921875, "learning_rate": 1.9547448351483026e-05, "loss": 4.4344, "step": 6110 }, { "epoch": 0.19634574274996974, "grad_norm": 0.04443359375, "learning_rate": 1.9545558233573136e-05, "loss": 4.4015, "step": 6120 }, { "epoch": 0.19674908240229097, "grad_norm": 0.04296875, "learning_rate": 1.9543664268537687e-05, "loss": 4.4316, "step": 6130 }, { "epoch": 0.1971524220546122, "grad_norm": 0.045654296875, "learning_rate": 1.9541766457140002e-05, "loss": 4.4486, "step": 6140 }, { "epoch": 0.1975557617069334, "grad_norm": 0.045654296875, "learning_rate": 1.953986480014495e-05, "loss": 4.4231, "step": 6150 }, { "epoch": 0.19795910135925462, "grad_norm": 0.044921875, "learning_rate": 1.9537959298318946e-05, "loss": 4.3907, "step": 6160 }, { "epoch": 0.19836244101157585, "grad_norm": 0.04638671875, "learning_rate": 1.953604995242997e-05, "loss": 4.4468, "step": 6170 }, { "epoch": 0.19876578066389708, "grad_norm": 0.043701171875, "learning_rate": 1.9534136763247536e-05, "loss": 4.4483, "step": 6180 }, { "epoch": 0.19916912031621828, "grad_norm": 0.04541015625, "learning_rate": 1.9532219731542713e-05, "loss": 4.4409, "step": 6190 }, { "epoch": 0.1995724599685395, "grad_norm": 0.041259765625, "learning_rate": 1.9530298858088116e-05, "loss": 4.4329, "step": 6200 }, { "epoch": 0.19997579962086073, "grad_norm": 0.04345703125, "learning_rate": 1.9528374143657916e-05, "loss": 4.4215, "step": 6210 }, { "epoch": 0.20037913927318193, "grad_norm": 0.0419921875, "learning_rate": 1.9526445589027823e-05, "loss": 4.4397, "step": 6220 }, { "epoch": 0.20078247892550316, "grad_norm": 0.042724609375, "learning_rate": 1.9524513194975097e-05, "loss": 4.4591, "step": 6230 }, { "epoch": 0.2011858185778244, "grad_norm": 0.04345703125, "learning_rate": 1.952257696227855e-05, "loss": 4.4365, "step": 6240 }, { "epoch": 0.20158915823014562, "grad_norm": 0.044677734375, "learning_rate": 1.9520636891718536e-05, "loss": 4.426, "step": 6250 }, { "epoch": 0.20199249788246681, "grad_norm": 0.042236328125, "learning_rate": 1.9518692984076954e-05, "loss": 4.4257, "step": 6260 }, { "epoch": 0.20239583753478804, "grad_norm": 0.04541015625, "learning_rate": 1.9516745240137258e-05, "loss": 4.4476, "step": 6270 }, { "epoch": 0.20279917718710927, "grad_norm": 0.046630859375, "learning_rate": 1.951479366068444e-05, "loss": 4.4193, "step": 6280 }, { "epoch": 0.2032025168394305, "grad_norm": 0.044921875, "learning_rate": 1.9512838246505047e-05, "loss": 4.4487, "step": 6290 }, { "epoch": 0.2036058564917517, "grad_norm": 0.044921875, "learning_rate": 1.9510878998387153e-05, "loss": 4.406, "step": 6300 }, { "epoch": 0.20400919614407292, "grad_norm": 0.047119140625, "learning_rate": 1.95089159171204e-05, "loss": 4.4319, "step": 6310 }, { "epoch": 0.20441253579639415, "grad_norm": 0.045166015625, "learning_rate": 1.9506949003495964e-05, "loss": 4.4272, "step": 6320 }, { "epoch": 0.20481587544871535, "grad_norm": 0.0478515625, "learning_rate": 1.950497825830656e-05, "loss": 4.417, "step": 6330 }, { "epoch": 0.20521921510103658, "grad_norm": 0.04541015625, "learning_rate": 1.9503003682346457e-05, "loss": 4.4272, "step": 6340 }, { "epoch": 0.2056225547533578, "grad_norm": 0.045166015625, "learning_rate": 1.950102527641146e-05, "loss": 4.4631, "step": 6350 }, { "epoch": 0.20602589440567903, "grad_norm": 0.043212890625, "learning_rate": 1.9499043041298926e-05, "loss": 4.4216, "step": 6360 }, { "epoch": 0.20642923405800023, "grad_norm": 0.045166015625, "learning_rate": 1.9497056977807753e-05, "loss": 4.4107, "step": 6370 }, { "epoch": 0.20683257371032146, "grad_norm": 0.044677734375, "learning_rate": 1.949506708673837e-05, "loss": 4.3752, "step": 6380 }, { "epoch": 0.2072359133626427, "grad_norm": 0.044921875, "learning_rate": 1.9493073368892773e-05, "loss": 4.4042, "step": 6390 }, { "epoch": 0.20763925301496391, "grad_norm": 0.044677734375, "learning_rate": 1.9491075825074474e-05, "loss": 4.4834, "step": 6400 }, { "epoch": 0.20804259266728511, "grad_norm": 0.045654296875, "learning_rate": 1.948907445608854e-05, "loss": 4.4464, "step": 6410 }, { "epoch": 0.20844593231960634, "grad_norm": 0.04736328125, "learning_rate": 1.9487069262741587e-05, "loss": 4.4111, "step": 6420 }, { "epoch": 0.20884927197192757, "grad_norm": 0.044189453125, "learning_rate": 1.9485060245841753e-05, "loss": 4.4069, "step": 6430 }, { "epoch": 0.20925261162424877, "grad_norm": 0.04345703125, "learning_rate": 1.9483047406198734e-05, "loss": 4.4237, "step": 6440 }, { "epoch": 0.20965595127657, "grad_norm": 0.0419921875, "learning_rate": 1.948103074462376e-05, "loss": 4.4059, "step": 6450 }, { "epoch": 0.21005929092889122, "grad_norm": 0.044677734375, "learning_rate": 1.94790102619296e-05, "loss": 4.4207, "step": 6460 }, { "epoch": 0.21046263058121245, "grad_norm": 0.046630859375, "learning_rate": 1.9476985958930562e-05, "loss": 4.4224, "step": 6470 }, { "epoch": 0.21086597023353365, "grad_norm": 0.045654296875, "learning_rate": 1.9474957836442503e-05, "loss": 4.4075, "step": 6480 }, { "epoch": 0.21126930988585488, "grad_norm": 0.045654296875, "learning_rate": 1.947292589528281e-05, "loss": 4.4479, "step": 6490 }, { "epoch": 0.2116726495381761, "grad_norm": 0.043701171875, "learning_rate": 1.9470890136270406e-05, "loss": 4.3952, "step": 6500 }, { "epoch": 0.2120759891904973, "grad_norm": 0.04296875, "learning_rate": 1.946885056022577e-05, "loss": 4.3876, "step": 6510 }, { "epoch": 0.21247932884281853, "grad_norm": 0.04541015625, "learning_rate": 1.94668071679709e-05, "loss": 4.4027, "step": 6520 }, { "epoch": 0.21288266849513976, "grad_norm": 0.04296875, "learning_rate": 1.9464759960329338e-05, "loss": 4.4064, "step": 6530 }, { "epoch": 0.213286008147461, "grad_norm": 0.04638671875, "learning_rate": 1.946270893812617e-05, "loss": 4.4869, "step": 6540 }, { "epoch": 0.2136893477997822, "grad_norm": 0.043701171875, "learning_rate": 1.9460654102188016e-05, "loss": 4.3767, "step": 6550 }, { "epoch": 0.21409268745210341, "grad_norm": 0.047119140625, "learning_rate": 1.945859545334303e-05, "loss": 4.4312, "step": 6560 }, { "epoch": 0.21449602710442464, "grad_norm": 0.04736328125, "learning_rate": 1.9456532992420908e-05, "loss": 4.4624, "step": 6570 }, { "epoch": 0.21489936675674587, "grad_norm": 0.04345703125, "learning_rate": 1.9454466720252873e-05, "loss": 4.4068, "step": 6580 }, { "epoch": 0.21530270640906707, "grad_norm": 0.047119140625, "learning_rate": 1.9452396637671694e-05, "loss": 4.4328, "step": 6590 }, { "epoch": 0.2157060460613883, "grad_norm": 0.042236328125, "learning_rate": 1.945032274551167e-05, "loss": 4.4059, "step": 6600 }, { "epoch": 0.21610938571370952, "grad_norm": 0.045654296875, "learning_rate": 1.9448245044608642e-05, "loss": 4.4108, "step": 6610 }, { "epoch": 0.21651272536603072, "grad_norm": 0.0458984375, "learning_rate": 1.9446163535799977e-05, "loss": 4.412, "step": 6620 }, { "epoch": 0.21691606501835195, "grad_norm": 0.046630859375, "learning_rate": 1.944407821992458e-05, "loss": 4.4227, "step": 6630 }, { "epoch": 0.21731940467067318, "grad_norm": 0.04345703125, "learning_rate": 1.9441989097822894e-05, "loss": 4.3837, "step": 6640 }, { "epoch": 0.2177227443229944, "grad_norm": 0.0458984375, "learning_rate": 1.9439896170336888e-05, "loss": 4.4495, "step": 6650 }, { "epoch": 0.2181260839753156, "grad_norm": 0.04248046875, "learning_rate": 1.943779943831008e-05, "loss": 4.4078, "step": 6660 }, { "epoch": 0.21852942362763683, "grad_norm": 0.0439453125, "learning_rate": 1.9435698902587506e-05, "loss": 4.4187, "step": 6670 }, { "epoch": 0.21893276327995806, "grad_norm": 0.0419921875, "learning_rate": 1.9433594564015736e-05, "loss": 4.4453, "step": 6680 }, { "epoch": 0.21933610293227926, "grad_norm": 0.043701171875, "learning_rate": 1.9431486423442884e-05, "loss": 4.4233, "step": 6690 }, { "epoch": 0.2197394425846005, "grad_norm": 0.046875, "learning_rate": 1.9429374481718587e-05, "loss": 4.4194, "step": 6700 }, { "epoch": 0.2201427822369217, "grad_norm": 0.04345703125, "learning_rate": 1.9427258739694013e-05, "loss": 4.4008, "step": 6710 }, { "epoch": 0.22054612188924294, "grad_norm": 0.050048828125, "learning_rate": 1.9425139198221866e-05, "loss": 4.4237, "step": 6720 }, { "epoch": 0.22094946154156414, "grad_norm": 0.048095703125, "learning_rate": 1.9423015858156385e-05, "loss": 4.4145, "step": 6730 }, { "epoch": 0.22135280119388537, "grad_norm": 0.043701171875, "learning_rate": 1.9420888720353332e-05, "loss": 4.4356, "step": 6740 }, { "epoch": 0.2217561408462066, "grad_norm": 0.044677734375, "learning_rate": 1.9418757785670005e-05, "loss": 4.4328, "step": 6750 }, { "epoch": 0.22215948049852782, "grad_norm": 0.04443359375, "learning_rate": 1.941662305496522e-05, "loss": 4.4487, "step": 6760 }, { "epoch": 0.22256282015084902, "grad_norm": 0.04443359375, "learning_rate": 1.9414484529099345e-05, "loss": 4.4328, "step": 6770 }, { "epoch": 0.22296615980317025, "grad_norm": 0.044189453125, "learning_rate": 1.941234220893426e-05, "loss": 4.4736, "step": 6780 }, { "epoch": 0.22336949945549148, "grad_norm": 0.043701171875, "learning_rate": 1.9410196095333384e-05, "loss": 4.4082, "step": 6790 }, { "epoch": 0.22377283910781268, "grad_norm": 0.04345703125, "learning_rate": 1.9408046189161655e-05, "loss": 4.423, "step": 6800 }, { "epoch": 0.2241761787601339, "grad_norm": 0.046630859375, "learning_rate": 1.9405892491285545e-05, "loss": 4.4259, "step": 6810 }, { "epoch": 0.22457951841245513, "grad_norm": 0.044677734375, "learning_rate": 1.9403735002573057e-05, "loss": 4.4397, "step": 6820 }, { "epoch": 0.22498285806477636, "grad_norm": 0.046142578125, "learning_rate": 1.940157372389372e-05, "loss": 4.4437, "step": 6830 }, { "epoch": 0.22538619771709756, "grad_norm": 0.0458984375, "learning_rate": 1.9399408656118587e-05, "loss": 4.4269, "step": 6840 }, { "epoch": 0.22578953736941879, "grad_norm": 0.04296875, "learning_rate": 1.939723980012024e-05, "loss": 4.3989, "step": 6850 }, { "epoch": 0.22619287702174, "grad_norm": 0.044677734375, "learning_rate": 1.9395067156772788e-05, "loss": 4.4389, "step": 6860 }, { "epoch": 0.22659621667406124, "grad_norm": 0.041259765625, "learning_rate": 1.939289072695187e-05, "loss": 4.4334, "step": 6870 }, { "epoch": 0.22699955632638244, "grad_norm": 0.0439453125, "learning_rate": 1.9390710511534647e-05, "loss": 4.4335, "step": 6880 }, { "epoch": 0.22740289597870367, "grad_norm": 0.045654296875, "learning_rate": 1.9388526511399805e-05, "loss": 4.4387, "step": 6890 }, { "epoch": 0.2278062356310249, "grad_norm": 0.04296875, "learning_rate": 1.9386338727427553e-05, "loss": 4.3764, "step": 6900 }, { "epoch": 0.2282095752833461, "grad_norm": 0.044921875, "learning_rate": 1.9384147160499636e-05, "loss": 4.4588, "step": 6910 }, { "epoch": 0.22861291493566732, "grad_norm": 0.042724609375, "learning_rate": 1.9381951811499316e-05, "loss": 4.4063, "step": 6920 }, { "epoch": 0.22901625458798855, "grad_norm": 0.04638671875, "learning_rate": 1.9379752681311377e-05, "loss": 4.417, "step": 6930 }, { "epoch": 0.22941959424030978, "grad_norm": 0.04736328125, "learning_rate": 1.9377549770822126e-05, "loss": 4.4474, "step": 6940 }, { "epoch": 0.22982293389263098, "grad_norm": 0.04443359375, "learning_rate": 1.9375343080919404e-05, "loss": 4.4204, "step": 6950 }, { "epoch": 0.2302262735449522, "grad_norm": 0.045654296875, "learning_rate": 1.9373132612492567e-05, "loss": 4.443, "step": 6960 }, { "epoch": 0.23062961319727343, "grad_norm": 0.04345703125, "learning_rate": 1.937091836643249e-05, "loss": 4.4401, "step": 6970 }, { "epoch": 0.23103295284959463, "grad_norm": 0.04638671875, "learning_rate": 1.936870034363158e-05, "loss": 4.4595, "step": 6980 }, { "epoch": 0.23143629250191586, "grad_norm": 0.046142578125, "learning_rate": 1.9366478544983764e-05, "loss": 4.3969, "step": 6990 }, { "epoch": 0.23183963215423709, "grad_norm": 0.043212890625, "learning_rate": 1.9364252971384484e-05, "loss": 4.4485, "step": 7000 }, { "epoch": 0.2322429718065583, "grad_norm": 0.044189453125, "learning_rate": 1.936202362373071e-05, "loss": 4.4289, "step": 7010 }, { "epoch": 0.2326463114588795, "grad_norm": 0.043212890625, "learning_rate": 1.9359790502920933e-05, "loss": 4.3963, "step": 7020 }, { "epoch": 0.23304965111120074, "grad_norm": 0.044677734375, "learning_rate": 1.935755360985516e-05, "loss": 4.4025, "step": 7030 }, { "epoch": 0.23345299076352197, "grad_norm": 0.045654296875, "learning_rate": 1.9355312945434927e-05, "loss": 4.419, "step": 7040 }, { "epoch": 0.2338563304158432, "grad_norm": 0.04443359375, "learning_rate": 1.9353068510563273e-05, "loss": 4.4328, "step": 7050 }, { "epoch": 0.2342596700681644, "grad_norm": 0.044189453125, "learning_rate": 1.9350820306144777e-05, "loss": 4.4244, "step": 7060 }, { "epoch": 0.23466300972048562, "grad_norm": 0.044677734375, "learning_rate": 1.9348568333085524e-05, "loss": 4.4305, "step": 7070 }, { "epoch": 0.23506634937280685, "grad_norm": 0.043701171875, "learning_rate": 1.9346312592293122e-05, "loss": 4.3941, "step": 7080 }, { "epoch": 0.23546968902512805, "grad_norm": 0.04443359375, "learning_rate": 1.9344053084676703e-05, "loss": 4.4298, "step": 7090 }, { "epoch": 0.23587302867744928, "grad_norm": 0.042724609375, "learning_rate": 1.9341789811146904e-05, "loss": 4.4046, "step": 7100 }, { "epoch": 0.2362763683297705, "grad_norm": 0.04443359375, "learning_rate": 1.9339522772615885e-05, "loss": 4.4326, "step": 7110 }, { "epoch": 0.23667970798209173, "grad_norm": 0.045166015625, "learning_rate": 1.9337251969997335e-05, "loss": 4.4237, "step": 7120 }, { "epoch": 0.23708304763441293, "grad_norm": 0.046142578125, "learning_rate": 1.9334977404206444e-05, "loss": 4.4558, "step": 7130 }, { "epoch": 0.23748638728673416, "grad_norm": 0.044921875, "learning_rate": 1.9332699076159933e-05, "loss": 4.445, "step": 7140 }, { "epoch": 0.23788972693905538, "grad_norm": 0.044189453125, "learning_rate": 1.933041698677602e-05, "loss": 4.4194, "step": 7150 }, { "epoch": 0.23829306659137658, "grad_norm": 0.046142578125, "learning_rate": 1.932813113697446e-05, "loss": 4.4536, "step": 7160 }, { "epoch": 0.2386964062436978, "grad_norm": 0.045166015625, "learning_rate": 1.9325841527676508e-05, "loss": 4.4002, "step": 7170 }, { "epoch": 0.23909974589601904, "grad_norm": 0.046142578125, "learning_rate": 1.9323548159804943e-05, "loss": 4.4208, "step": 7180 }, { "epoch": 0.23950308554834027, "grad_norm": 0.04638671875, "learning_rate": 1.9321251034284055e-05, "loss": 4.4156, "step": 7190 }, { "epoch": 0.23990642520066147, "grad_norm": 0.044677734375, "learning_rate": 1.9318950152039652e-05, "loss": 4.3942, "step": 7200 }, { "epoch": 0.2403097648529827, "grad_norm": 0.0458984375, "learning_rate": 1.9316645513999054e-05, "loss": 4.441, "step": 7210 }, { "epoch": 0.24071310450530392, "grad_norm": 0.04638671875, "learning_rate": 1.931433712109109e-05, "loss": 4.4016, "step": 7220 }, { "epoch": 0.24111644415762515, "grad_norm": 0.045654296875, "learning_rate": 1.9312024974246108e-05, "loss": 4.444, "step": 7230 }, { "epoch": 0.24151978380994635, "grad_norm": 0.046630859375, "learning_rate": 1.9309709074395968e-05, "loss": 4.4248, "step": 7240 }, { "epoch": 0.24192312346226758, "grad_norm": 0.04833984375, "learning_rate": 1.9307389422474044e-05, "loss": 4.4159, "step": 7250 }, { "epoch": 0.2423264631145888, "grad_norm": 0.044921875, "learning_rate": 1.930506601941522e-05, "loss": 4.4084, "step": 7260 }, { "epoch": 0.24272980276691, "grad_norm": 0.041015625, "learning_rate": 1.9302738866155886e-05, "loss": 4.3999, "step": 7270 }, { "epoch": 0.24313314241923123, "grad_norm": 0.049560546875, "learning_rate": 1.930040796363396e-05, "loss": 4.4393, "step": 7280 }, { "epoch": 0.24353648207155246, "grad_norm": 0.046142578125, "learning_rate": 1.929807331278885e-05, "loss": 4.459, "step": 7290 }, { "epoch": 0.24393982172387368, "grad_norm": 0.043701171875, "learning_rate": 1.9295734914561492e-05, "loss": 4.3951, "step": 7300 }, { "epoch": 0.24434316137619488, "grad_norm": 0.042724609375, "learning_rate": 1.9293392769894323e-05, "loss": 4.4067, "step": 7310 }, { "epoch": 0.2447465010285161, "grad_norm": 0.045654296875, "learning_rate": 1.9291046879731293e-05, "loss": 4.4559, "step": 7320 }, { "epoch": 0.24514984068083734, "grad_norm": 0.0419921875, "learning_rate": 1.928869724501786e-05, "loss": 4.4401, "step": 7330 }, { "epoch": 0.24555318033315857, "grad_norm": 0.044921875, "learning_rate": 1.9286343866700994e-05, "loss": 4.4308, "step": 7340 }, { "epoch": 0.24595651998547977, "grad_norm": 0.0458984375, "learning_rate": 1.9283986745729174e-05, "loss": 4.4268, "step": 7350 }, { "epoch": 0.246359859637801, "grad_norm": 0.0419921875, "learning_rate": 1.928162588305238e-05, "loss": 4.3841, "step": 7360 }, { "epoch": 0.24676319929012222, "grad_norm": 0.0439453125, "learning_rate": 1.9279261279622107e-05, "loss": 4.3678, "step": 7370 }, { "epoch": 0.24716653894244342, "grad_norm": 0.044189453125, "learning_rate": 1.927689293639136e-05, "loss": 4.4294, "step": 7380 }, { "epoch": 0.24756987859476465, "grad_norm": 0.04345703125, "learning_rate": 1.9274520854314637e-05, "loss": 4.4055, "step": 7390 }, { "epoch": 0.24797321824708587, "grad_norm": 0.045166015625, "learning_rate": 1.9272145034347963e-05, "loss": 4.4209, "step": 7400 }, { "epoch": 0.2483765578994071, "grad_norm": 0.0458984375, "learning_rate": 1.926976547744886e-05, "loss": 4.3919, "step": 7410 }, { "epoch": 0.2487798975517283, "grad_norm": 0.044677734375, "learning_rate": 1.9267382184576352e-05, "loss": 4.3913, "step": 7420 }, { "epoch": 0.24918323720404953, "grad_norm": 0.047119140625, "learning_rate": 1.926499515669097e-05, "loss": 4.3959, "step": 7430 }, { "epoch": 0.24958657685637076, "grad_norm": 0.0478515625, "learning_rate": 1.9262604394754757e-05, "loss": 4.474, "step": 7440 }, { "epoch": 0.24998991650869196, "grad_norm": 0.04541015625, "learning_rate": 1.926020989973125e-05, "loss": 4.4431, "step": 7450 }, { "epoch": 0.2503932561610132, "grad_norm": 0.04638671875, "learning_rate": 1.925781167258551e-05, "loss": 4.4181, "step": 7460 }, { "epoch": 0.2507965958133344, "grad_norm": 0.043701171875, "learning_rate": 1.9255409714284077e-05, "loss": 4.4429, "step": 7470 }, { "epoch": 0.2511999354656556, "grad_norm": 0.04296875, "learning_rate": 1.9253004025795014e-05, "loss": 4.4406, "step": 7480 }, { "epoch": 0.25160327511797687, "grad_norm": 0.0439453125, "learning_rate": 1.9250594608087872e-05, "loss": 4.4064, "step": 7490 }, { "epoch": 0.25200661477029807, "grad_norm": 0.04150390625, "learning_rate": 1.9248181462133723e-05, "loss": 4.4064, "step": 7500 }, { "epoch": 0.25240995442261926, "grad_norm": 0.04443359375, "learning_rate": 1.9245764588905128e-05, "loss": 4.4175, "step": 7510 }, { "epoch": 0.2528132940749405, "grad_norm": 0.0458984375, "learning_rate": 1.9243343989376153e-05, "loss": 4.3831, "step": 7520 }, { "epoch": 0.2532166337272617, "grad_norm": 0.045166015625, "learning_rate": 1.9240919664522368e-05, "loss": 4.4314, "step": 7530 }, { "epoch": 0.2536199733795829, "grad_norm": 0.041748046875, "learning_rate": 1.9238491615320845e-05, "loss": 4.4223, "step": 7540 }, { "epoch": 0.2540233130319042, "grad_norm": 0.048583984375, "learning_rate": 1.9236059842750154e-05, "loss": 4.406, "step": 7550 }, { "epoch": 0.2544266526842254, "grad_norm": 0.044677734375, "learning_rate": 1.9233624347790363e-05, "loss": 4.4088, "step": 7560 }, { "epoch": 0.25482999233654663, "grad_norm": 0.04296875, "learning_rate": 1.9231185131423052e-05, "loss": 4.4516, "step": 7570 }, { "epoch": 0.25523333198886783, "grad_norm": 0.04248046875, "learning_rate": 1.9228742194631287e-05, "loss": 4.3987, "step": 7580 }, { "epoch": 0.25563667164118903, "grad_norm": 0.045166015625, "learning_rate": 1.9226295538399643e-05, "loss": 4.4216, "step": 7590 }, { "epoch": 0.2560400112935103, "grad_norm": 0.045654296875, "learning_rate": 1.9223845163714186e-05, "loss": 4.4326, "step": 7600 }, { "epoch": 0.2564433509458315, "grad_norm": 0.044921875, "learning_rate": 1.9221391071562494e-05, "loss": 4.3938, "step": 7610 }, { "epoch": 0.2568466905981527, "grad_norm": 0.045654296875, "learning_rate": 1.9218933262933626e-05, "loss": 4.4036, "step": 7620 }, { "epoch": 0.25725003025047394, "grad_norm": 0.0439453125, "learning_rate": 1.921647173881815e-05, "loss": 4.4347, "step": 7630 }, { "epoch": 0.25765336990279514, "grad_norm": 0.046142578125, "learning_rate": 1.921400650020813e-05, "loss": 4.4576, "step": 7640 }, { "epoch": 0.25805670955511634, "grad_norm": 0.045654296875, "learning_rate": 1.9211537548097123e-05, "loss": 4.4239, "step": 7650 }, { "epoch": 0.2584600492074376, "grad_norm": 0.0458984375, "learning_rate": 1.9209064883480186e-05, "loss": 4.4275, "step": 7660 }, { "epoch": 0.2588633888597588, "grad_norm": 0.044921875, "learning_rate": 1.9206588507353874e-05, "loss": 4.431, "step": 7670 }, { "epoch": 0.25926672851208005, "grad_norm": 0.047119140625, "learning_rate": 1.9204108420716233e-05, "loss": 4.4202, "step": 7680 }, { "epoch": 0.25967006816440125, "grad_norm": 0.041748046875, "learning_rate": 1.920162462456681e-05, "loss": 4.402, "step": 7690 }, { "epoch": 0.26007340781672245, "grad_norm": 0.044921875, "learning_rate": 1.919913711990664e-05, "loss": 4.404, "step": 7700 }, { "epoch": 0.2604767474690437, "grad_norm": 0.044189453125, "learning_rate": 1.919664590773826e-05, "loss": 4.4185, "step": 7710 }, { "epoch": 0.2608800871213649, "grad_norm": 0.045166015625, "learning_rate": 1.9194150989065695e-05, "loss": 4.4237, "step": 7720 }, { "epoch": 0.2612834267736861, "grad_norm": 0.04248046875, "learning_rate": 1.919165236489447e-05, "loss": 4.4204, "step": 7730 }, { "epoch": 0.26168676642600736, "grad_norm": 0.044677734375, "learning_rate": 1.9189150036231593e-05, "loss": 4.4148, "step": 7740 }, { "epoch": 0.26209010607832856, "grad_norm": 0.04541015625, "learning_rate": 1.9186644004085583e-05, "loss": 4.3968, "step": 7750 }, { "epoch": 0.26249344573064975, "grad_norm": 0.041748046875, "learning_rate": 1.918413426946643e-05, "loss": 4.4155, "step": 7760 }, { "epoch": 0.262896785382971, "grad_norm": 0.0419921875, "learning_rate": 1.9181620833385635e-05, "loss": 4.3753, "step": 7770 }, { "epoch": 0.2633001250352922, "grad_norm": 0.042724609375, "learning_rate": 1.9179103696856176e-05, "loss": 4.4388, "step": 7780 }, { "epoch": 0.26370346468761346, "grad_norm": 0.0439453125, "learning_rate": 1.9176582860892537e-05, "loss": 4.4089, "step": 7790 }, { "epoch": 0.26410680433993466, "grad_norm": 0.044677734375, "learning_rate": 1.9174058326510678e-05, "loss": 4.4113, "step": 7800 }, { "epoch": 0.26451014399225586, "grad_norm": 0.045166015625, "learning_rate": 1.9171530094728058e-05, "loss": 4.4383, "step": 7810 }, { "epoch": 0.2649134836445771, "grad_norm": 0.04443359375, "learning_rate": 1.9168998166563625e-05, "loss": 4.4356, "step": 7820 }, { "epoch": 0.2653168232968983, "grad_norm": 0.04345703125, "learning_rate": 1.916646254303782e-05, "loss": 4.4334, "step": 7830 }, { "epoch": 0.2657201629492195, "grad_norm": 0.044189453125, "learning_rate": 1.9163923225172566e-05, "loss": 4.4379, "step": 7840 }, { "epoch": 0.2661235026015408, "grad_norm": 0.044189453125, "learning_rate": 1.916138021399128e-05, "loss": 4.4273, "step": 7850 }, { "epoch": 0.266526842253862, "grad_norm": 0.044677734375, "learning_rate": 1.9158833510518864e-05, "loss": 4.4021, "step": 7860 }, { "epoch": 0.2669301819061832, "grad_norm": 0.0458984375, "learning_rate": 1.9156283115781715e-05, "loss": 4.4079, "step": 7870 }, { "epoch": 0.26733352155850443, "grad_norm": 0.04345703125, "learning_rate": 1.9153729030807712e-05, "loss": 4.4467, "step": 7880 }, { "epoch": 0.2677368612108256, "grad_norm": 0.04345703125, "learning_rate": 1.9151171256626217e-05, "loss": 4.4385, "step": 7890 }, { "epoch": 0.2681402008631469, "grad_norm": 0.044677734375, "learning_rate": 1.914860979426809e-05, "loss": 4.4118, "step": 7900 }, { "epoch": 0.2685435405154681, "grad_norm": 0.04296875, "learning_rate": 1.914604464476567e-05, "loss": 4.43, "step": 7910 }, { "epoch": 0.2689468801677893, "grad_norm": 0.0439453125, "learning_rate": 1.914347580915278e-05, "loss": 4.4376, "step": 7920 }, { "epoch": 0.26935021982011054, "grad_norm": 0.043212890625, "learning_rate": 1.9140903288464737e-05, "loss": 4.4191, "step": 7930 }, { "epoch": 0.26975355947243174, "grad_norm": 0.045166015625, "learning_rate": 1.913832708373834e-05, "loss": 4.4035, "step": 7940 }, { "epoch": 0.27015689912475294, "grad_norm": 0.044677734375, "learning_rate": 1.9135747196011866e-05, "loss": 4.4211, "step": 7950 }, { "epoch": 0.2705602387770742, "grad_norm": 0.045166015625, "learning_rate": 1.9133163626325083e-05, "loss": 4.4311, "step": 7960 }, { "epoch": 0.2709635784293954, "grad_norm": 0.044189453125, "learning_rate": 1.9130576375719242e-05, "loss": 4.4063, "step": 7970 }, { "epoch": 0.2713669180817166, "grad_norm": 0.045654296875, "learning_rate": 1.9127985445237074e-05, "loss": 4.4043, "step": 7980 }, { "epoch": 0.27177025773403785, "grad_norm": 0.044677734375, "learning_rate": 1.9125390835922807e-05, "loss": 4.3799, "step": 7990 }, { "epoch": 0.27217359738635905, "grad_norm": 0.044189453125, "learning_rate": 1.9122792548822127e-05, "loss": 4.4189, "step": 8000 }, { "epoch": 0.2725769370386803, "grad_norm": 0.045654296875, "learning_rate": 1.912019058498222e-05, "loss": 4.4111, "step": 8010 }, { "epoch": 0.2729802766910015, "grad_norm": 0.045166015625, "learning_rate": 1.9117584945451756e-05, "loss": 4.4074, "step": 8020 }, { "epoch": 0.2733836163433227, "grad_norm": 0.044677734375, "learning_rate": 1.9114975631280873e-05, "loss": 4.4157, "step": 8030 }, { "epoch": 0.27378695599564395, "grad_norm": 0.04736328125, "learning_rate": 1.9112362643521203e-05, "loss": 4.4158, "step": 8040 }, { "epoch": 0.27419029564796515, "grad_norm": 0.0458984375, "learning_rate": 1.9109745983225848e-05, "loss": 4.4144, "step": 8050 }, { "epoch": 0.27459363530028635, "grad_norm": 0.04296875, "learning_rate": 1.9107125651449398e-05, "loss": 4.4267, "step": 8060 }, { "epoch": 0.2749969749526076, "grad_norm": 0.0439453125, "learning_rate": 1.9104501649247917e-05, "loss": 4.3773, "step": 8070 }, { "epoch": 0.2754003146049288, "grad_norm": 0.0439453125, "learning_rate": 1.9101873977678954e-05, "loss": 4.4263, "step": 8080 }, { "epoch": 0.27580365425725, "grad_norm": 0.044677734375, "learning_rate": 1.9099242637801528e-05, "loss": 4.4308, "step": 8090 }, { "epoch": 0.27620699390957126, "grad_norm": 0.04833984375, "learning_rate": 1.909660763067615e-05, "loss": 4.4156, "step": 8100 }, { "epoch": 0.27661033356189246, "grad_norm": 0.046875, "learning_rate": 1.9093968957364795e-05, "loss": 4.4315, "step": 8110 }, { "epoch": 0.27701367321421366, "grad_norm": 0.046142578125, "learning_rate": 1.9091326618930924e-05, "loss": 4.4076, "step": 8120 }, { "epoch": 0.2774170128665349, "grad_norm": 0.045166015625, "learning_rate": 1.9088680616439474e-05, "loss": 4.4471, "step": 8130 }, { "epoch": 0.2778203525188561, "grad_norm": 0.045654296875, "learning_rate": 1.9086030950956858e-05, "loss": 4.4174, "step": 8140 }, { "epoch": 0.2782236921711774, "grad_norm": 0.045166015625, "learning_rate": 1.9083377623550963e-05, "loss": 4.4382, "step": 8150 }, { "epoch": 0.27862703182349857, "grad_norm": 0.04345703125, "learning_rate": 1.9080720635291153e-05, "loss": 4.3854, "step": 8160 }, { "epoch": 0.27903037147581977, "grad_norm": 0.04541015625, "learning_rate": 1.907805998724827e-05, "loss": 4.4166, "step": 8170 }, { "epoch": 0.279433711128141, "grad_norm": 0.0419921875, "learning_rate": 1.907539568049463e-05, "loss": 4.3843, "step": 8180 }, { "epoch": 0.2798370507804622, "grad_norm": 0.044677734375, "learning_rate": 1.9072727716104024e-05, "loss": 4.4313, "step": 8190 }, { "epoch": 0.2802403904327834, "grad_norm": 0.044677734375, "learning_rate": 1.907005609515171e-05, "loss": 4.4316, "step": 8200 }, { "epoch": 0.2806437300851047, "grad_norm": 0.04296875, "learning_rate": 1.9067380818714425e-05, "loss": 4.444, "step": 8210 }, { "epoch": 0.2810470697374259, "grad_norm": 0.0458984375, "learning_rate": 1.9064701887870388e-05, "loss": 4.4047, "step": 8220 }, { "epoch": 0.2814504093897471, "grad_norm": 0.04296875, "learning_rate": 1.906201930369928e-05, "loss": 4.4212, "step": 8230 }, { "epoch": 0.28185374904206834, "grad_norm": 0.044921875, "learning_rate": 1.9059333067282252e-05, "loss": 4.3766, "step": 8240 }, { "epoch": 0.28225708869438954, "grad_norm": 0.043701171875, "learning_rate": 1.9056643179701937e-05, "loss": 4.4128, "step": 8250 }, { "epoch": 0.2826604283467108, "grad_norm": 0.0458984375, "learning_rate": 1.9053949642042434e-05, "loss": 4.426, "step": 8260 }, { "epoch": 0.283063767999032, "grad_norm": 0.048095703125, "learning_rate": 1.9051252455389314e-05, "loss": 4.4006, "step": 8270 }, { "epoch": 0.2834671076513532, "grad_norm": 0.044189453125, "learning_rate": 1.9048551620829612e-05, "loss": 4.3984, "step": 8280 }, { "epoch": 0.28387044730367444, "grad_norm": 0.04345703125, "learning_rate": 1.904584713945185e-05, "loss": 4.4123, "step": 8290 }, { "epoch": 0.28427378695599564, "grad_norm": 0.0458984375, "learning_rate": 1.9043139012346e-05, "loss": 4.4129, "step": 8300 }, { "epoch": 0.28467712660831684, "grad_norm": 0.043212890625, "learning_rate": 1.904042724060352e-05, "loss": 4.4213, "step": 8310 }, { "epoch": 0.2850804662606381, "grad_norm": 0.043212890625, "learning_rate": 1.903771182531732e-05, "loss": 4.4572, "step": 8320 }, { "epoch": 0.2854838059129593, "grad_norm": 0.0439453125, "learning_rate": 1.9034992767581797e-05, "loss": 4.4227, "step": 8330 }, { "epoch": 0.2858871455652805, "grad_norm": 0.044921875, "learning_rate": 1.9032270068492804e-05, "loss": 4.3789, "step": 8340 }, { "epoch": 0.28629048521760175, "grad_norm": 0.046630859375, "learning_rate": 1.902954372914766e-05, "loss": 4.4301, "step": 8350 }, { "epoch": 0.28669382486992295, "grad_norm": 0.042236328125, "learning_rate": 1.9026813750645166e-05, "loss": 4.3816, "step": 8360 }, { "epoch": 0.2870971645222442, "grad_norm": 0.04833984375, "learning_rate": 1.902408013408557e-05, "loss": 4.4271, "step": 8370 }, { "epoch": 0.2875005041745654, "grad_norm": 0.044921875, "learning_rate": 1.90213428805706e-05, "loss": 4.4194, "step": 8380 }, { "epoch": 0.2879038438268866, "grad_norm": 0.043212890625, "learning_rate": 1.901860199120344e-05, "loss": 4.4439, "step": 8390 }, { "epoch": 0.28830718347920786, "grad_norm": 0.045654296875, "learning_rate": 1.901585746708875e-05, "loss": 4.4455, "step": 8400 }, { "epoch": 0.28871052313152906, "grad_norm": 0.043212890625, "learning_rate": 1.901310930933265e-05, "loss": 4.403, "step": 8410 }, { "epoch": 0.28911386278385026, "grad_norm": 0.046142578125, "learning_rate": 1.901035751904272e-05, "loss": 4.4196, "step": 8420 }, { "epoch": 0.2895172024361715, "grad_norm": 0.045166015625, "learning_rate": 1.9007602097328012e-05, "loss": 4.4364, "step": 8430 }, { "epoch": 0.2899205420884927, "grad_norm": 0.045654296875, "learning_rate": 1.9004843045299033e-05, "loss": 4.42, "step": 8440 }, { "epoch": 0.2903238817408139, "grad_norm": 0.0478515625, "learning_rate": 1.9002080364067762e-05, "loss": 4.4444, "step": 8450 }, { "epoch": 0.29072722139313517, "grad_norm": 0.0478515625, "learning_rate": 1.8999314054747634e-05, "loss": 4.4551, "step": 8460 }, { "epoch": 0.29113056104545637, "grad_norm": 0.042236328125, "learning_rate": 1.8996544118453553e-05, "loss": 4.4348, "step": 8470 }, { "epoch": 0.2915339006977776, "grad_norm": 0.046875, "learning_rate": 1.8993770556301874e-05, "loss": 4.4227, "step": 8480 }, { "epoch": 0.2919372403500988, "grad_norm": 0.04345703125, "learning_rate": 1.899099336941042e-05, "loss": 4.4347, "step": 8490 }, { "epoch": 0.29234058000242, "grad_norm": 0.046630859375, "learning_rate": 1.8988212558898483e-05, "loss": 4.4127, "step": 8500 }, { "epoch": 0.2927439196547413, "grad_norm": 0.04443359375, "learning_rate": 1.89854281258868e-05, "loss": 4.4444, "step": 8510 }, { "epoch": 0.2931472593070625, "grad_norm": 0.04541015625, "learning_rate": 1.898264007149757e-05, "loss": 4.3902, "step": 8520 }, { "epoch": 0.2935505989593837, "grad_norm": 0.04443359375, "learning_rate": 1.897984839685447e-05, "loss": 4.387, "step": 8530 }, { "epoch": 0.29395393861170493, "grad_norm": 0.0439453125, "learning_rate": 1.8977053103082612e-05, "loss": 4.4191, "step": 8540 }, { "epoch": 0.29435727826402613, "grad_norm": 0.04345703125, "learning_rate": 1.8974254191308584e-05, "loss": 4.4069, "step": 8550 }, { "epoch": 0.29476061791634733, "grad_norm": 0.043701171875, "learning_rate": 1.8971451662660416e-05, "loss": 4.4527, "step": 8560 }, { "epoch": 0.2951639575686686, "grad_norm": 0.042724609375, "learning_rate": 1.8968645518267615e-05, "loss": 4.4089, "step": 8570 }, { "epoch": 0.2955672972209898, "grad_norm": 0.042236328125, "learning_rate": 1.896583575926113e-05, "loss": 4.4067, "step": 8580 }, { "epoch": 0.295970636873311, "grad_norm": 0.04443359375, "learning_rate": 1.896302238677337e-05, "loss": 4.4074, "step": 8590 }, { "epoch": 0.29637397652563224, "grad_norm": 0.048583984375, "learning_rate": 1.896020540193821e-05, "loss": 4.4344, "step": 8600 }, { "epoch": 0.29677731617795344, "grad_norm": 0.044189453125, "learning_rate": 1.8957384805890967e-05, "loss": 4.3855, "step": 8610 }, { "epoch": 0.2971806558302747, "grad_norm": 0.042724609375, "learning_rate": 1.895456059976842e-05, "loss": 4.4141, "step": 8620 }, { "epoch": 0.2975839954825959, "grad_norm": 0.046142578125, "learning_rate": 1.8951732784708806e-05, "loss": 4.4411, "step": 8630 }, { "epoch": 0.2979873351349171, "grad_norm": 0.043701171875, "learning_rate": 1.894890136185181e-05, "loss": 4.4181, "step": 8640 }, { "epoch": 0.29839067478723835, "grad_norm": 0.043701171875, "learning_rate": 1.8946066332338576e-05, "loss": 4.3979, "step": 8650 }, { "epoch": 0.29879401443955955, "grad_norm": 0.04638671875, "learning_rate": 1.89432276973117e-05, "loss": 4.4394, "step": 8660 }, { "epoch": 0.29919735409188075, "grad_norm": 0.04248046875, "learning_rate": 1.894038545791523e-05, "loss": 4.3951, "step": 8670 }, { "epoch": 0.299600693744202, "grad_norm": 0.044677734375, "learning_rate": 1.8937539615294666e-05, "loss": 4.4228, "step": 8680 }, { "epoch": 0.3000040333965232, "grad_norm": 0.04736328125, "learning_rate": 1.8934690170596965e-05, "loss": 4.4193, "step": 8690 }, { "epoch": 0.3004073730488444, "grad_norm": 0.04443359375, "learning_rate": 1.893183712497053e-05, "loss": 4.3939, "step": 8700 }, { "epoch": 0.30081071270116566, "grad_norm": 0.045166015625, "learning_rate": 1.892898047956522e-05, "loss": 4.4423, "step": 8710 }, { "epoch": 0.30121405235348686, "grad_norm": 0.043701171875, "learning_rate": 1.892612023553234e-05, "loss": 4.43, "step": 8720 }, { "epoch": 0.3016173920058081, "grad_norm": 0.04541015625, "learning_rate": 1.892325639402465e-05, "loss": 4.4337, "step": 8730 }, { "epoch": 0.3020207316581293, "grad_norm": 0.04345703125, "learning_rate": 1.8920388956196354e-05, "loss": 4.4286, "step": 8740 }, { "epoch": 0.3024240713104505, "grad_norm": 0.04541015625, "learning_rate": 1.8917517923203116e-05, "loss": 4.4016, "step": 8750 }, { "epoch": 0.30282741096277177, "grad_norm": 0.046142578125, "learning_rate": 1.891464329620204e-05, "loss": 4.4618, "step": 8760 }, { "epoch": 0.30323075061509297, "grad_norm": 0.04638671875, "learning_rate": 1.891176507635167e-05, "loss": 4.4425, "step": 8770 }, { "epoch": 0.30363409026741417, "grad_norm": 0.0439453125, "learning_rate": 1.8908883264812025e-05, "loss": 4.4126, "step": 8780 }, { "epoch": 0.3040374299197354, "grad_norm": 0.042724609375, "learning_rate": 1.8905997862744548e-05, "loss": 4.387, "step": 8790 }, { "epoch": 0.3044407695720566, "grad_norm": 0.045166015625, "learning_rate": 1.8903108871312134e-05, "loss": 4.4012, "step": 8800 }, { "epoch": 0.3048441092243778, "grad_norm": 0.0439453125, "learning_rate": 1.8900216291679132e-05, "loss": 4.3945, "step": 8810 }, { "epoch": 0.3052474488766991, "grad_norm": 0.044921875, "learning_rate": 1.8897320125011327e-05, "loss": 4.4261, "step": 8820 }, { "epoch": 0.3056507885290203, "grad_norm": 0.046142578125, "learning_rate": 1.8894420372475957e-05, "loss": 4.4359, "step": 8830 }, { "epoch": 0.30605412818134153, "grad_norm": 0.046875, "learning_rate": 1.8891517035241704e-05, "loss": 4.4466, "step": 8840 }, { "epoch": 0.30645746783366273, "grad_norm": 0.044677734375, "learning_rate": 1.8888610114478693e-05, "loss": 4.4369, "step": 8850 }, { "epoch": 0.30686080748598393, "grad_norm": 0.044189453125, "learning_rate": 1.8885699611358493e-05, "loss": 4.4155, "step": 8860 }, { "epoch": 0.3072641471383052, "grad_norm": 0.045166015625, "learning_rate": 1.8882785527054116e-05, "loss": 4.3903, "step": 8870 }, { "epoch": 0.3076674867906264, "grad_norm": 0.043212890625, "learning_rate": 1.8879867862740023e-05, "loss": 4.4314, "step": 8880 }, { "epoch": 0.3080708264429476, "grad_norm": 0.04345703125, "learning_rate": 1.8876946619592114e-05, "loss": 4.4059, "step": 8890 }, { "epoch": 0.30847416609526884, "grad_norm": 0.04833984375, "learning_rate": 1.887402179878773e-05, "loss": 4.3865, "step": 8900 }, { "epoch": 0.30887750574759004, "grad_norm": 0.043701171875, "learning_rate": 1.8871093401505652e-05, "loss": 4.4326, "step": 8910 }, { "epoch": 0.30928084539991124, "grad_norm": 0.045654296875, "learning_rate": 1.886816142892611e-05, "loss": 4.4494, "step": 8920 }, { "epoch": 0.3096841850522325, "grad_norm": 0.045166015625, "learning_rate": 1.886522588223077e-05, "loss": 4.3864, "step": 8930 }, { "epoch": 0.3100875247045537, "grad_norm": 0.0419921875, "learning_rate": 1.8862286762602735e-05, "loss": 4.4073, "step": 8940 }, { "epoch": 0.31049086435687495, "grad_norm": 0.04443359375, "learning_rate": 1.8859344071226555e-05, "loss": 4.4264, "step": 8950 }, { "epoch": 0.31089420400919615, "grad_norm": 0.043701171875, "learning_rate": 1.8856397809288217e-05, "loss": 4.4275, "step": 8960 }, { "epoch": 0.31129754366151735, "grad_norm": 0.042724609375, "learning_rate": 1.8853447977975147e-05, "loss": 4.4002, "step": 8970 }, { "epoch": 0.3117008833138386, "grad_norm": 0.048828125, "learning_rate": 1.8850494578476207e-05, "loss": 4.3969, "step": 8980 }, { "epoch": 0.3121042229661598, "grad_norm": 0.04248046875, "learning_rate": 1.88475376119817e-05, "loss": 4.4264, "step": 8990 }, { "epoch": 0.312507562618481, "grad_norm": 0.04345703125, "learning_rate": 1.8844577079683367e-05, "loss": 4.3937, "step": 9000 }, { "epoch": 0.31291090227080226, "grad_norm": 0.045166015625, "learning_rate": 1.884161298277438e-05, "loss": 4.4644, "step": 9010 }, { "epoch": 0.31331424192312346, "grad_norm": 0.046142578125, "learning_rate": 1.8838645322449362e-05, "loss": 4.4555, "step": 9020 }, { "epoch": 0.31371758157544466, "grad_norm": 0.0458984375, "learning_rate": 1.8835674099904357e-05, "loss": 4.4047, "step": 9030 }, { "epoch": 0.3141209212277659, "grad_norm": 0.046630859375, "learning_rate": 1.8832699316336845e-05, "loss": 4.4394, "step": 9040 }, { "epoch": 0.3145242608800871, "grad_norm": 0.044677734375, "learning_rate": 1.8829720972945755e-05, "loss": 4.4275, "step": 9050 }, { "epoch": 0.3149276005324083, "grad_norm": 0.045654296875, "learning_rate": 1.8826739070931437e-05, "loss": 4.39, "step": 9060 }, { "epoch": 0.31533094018472957, "grad_norm": 0.04248046875, "learning_rate": 1.882375361149569e-05, "loss": 4.4134, "step": 9070 }, { "epoch": 0.31573427983705077, "grad_norm": 0.04296875, "learning_rate": 1.882076459584172e-05, "loss": 4.404, "step": 9080 }, { "epoch": 0.316137619489372, "grad_norm": 0.043212890625, "learning_rate": 1.88177720251742e-05, "loss": 4.4078, "step": 9090 }, { "epoch": 0.3165409591416932, "grad_norm": 0.046630859375, "learning_rate": 1.881477590069921e-05, "loss": 4.4351, "step": 9100 }, { "epoch": 0.3169442987940144, "grad_norm": 0.046142578125, "learning_rate": 1.8811776223624273e-05, "loss": 4.4317, "step": 9110 }, { "epoch": 0.3173476384463357, "grad_norm": 0.045166015625, "learning_rate": 1.8808772995158344e-05, "loss": 4.4356, "step": 9120 }, { "epoch": 0.3177509780986569, "grad_norm": 0.048095703125, "learning_rate": 1.880576621651181e-05, "loss": 4.4014, "step": 9130 }, { "epoch": 0.3181543177509781, "grad_norm": 0.044677734375, "learning_rate": 1.880275588889648e-05, "loss": 4.4171, "step": 9140 }, { "epoch": 0.31855765740329933, "grad_norm": 0.043701171875, "learning_rate": 1.8799742013525602e-05, "loss": 4.4338, "step": 9150 }, { "epoch": 0.31896099705562053, "grad_norm": 0.044189453125, "learning_rate": 1.8796724591613857e-05, "loss": 4.4023, "step": 9160 }, { "epoch": 0.31936433670794173, "grad_norm": 0.046142578125, "learning_rate": 1.8793703624377348e-05, "loss": 4.3985, "step": 9170 }, { "epoch": 0.319767676360263, "grad_norm": 0.044677734375, "learning_rate": 1.87906791130336e-05, "loss": 4.4159, "step": 9180 }, { "epoch": 0.3201710160125842, "grad_norm": 0.043701171875, "learning_rate": 1.8787651058801585e-05, "loss": 4.4059, "step": 9190 }, { "epoch": 0.32057435566490544, "grad_norm": 0.044921875, "learning_rate": 1.878461946290169e-05, "loss": 4.4271, "step": 9200 }, { "epoch": 0.32097769531722664, "grad_norm": 0.04931640625, "learning_rate": 1.8781584326555734e-05, "loss": 4.4007, "step": 9210 }, { "epoch": 0.32138103496954784, "grad_norm": 0.044921875, "learning_rate": 1.8778545650986958e-05, "loss": 4.4388, "step": 9220 }, { "epoch": 0.3217843746218691, "grad_norm": 0.046875, "learning_rate": 1.8775503437420037e-05, "loss": 4.4252, "step": 9230 }, { "epoch": 0.3221877142741903, "grad_norm": 0.04296875, "learning_rate": 1.8772457687081064e-05, "loss": 4.4398, "step": 9240 }, { "epoch": 0.3225910539265115, "grad_norm": 0.04345703125, "learning_rate": 1.8769408401197564e-05, "loss": 4.4102, "step": 9250 }, { "epoch": 0.32299439357883275, "grad_norm": 0.04443359375, "learning_rate": 1.876635558099848e-05, "loss": 4.4213, "step": 9260 }, { "epoch": 0.32339773323115395, "grad_norm": 0.04638671875, "learning_rate": 1.8763299227714187e-05, "loss": 4.4266, "step": 9270 }, { "epoch": 0.32380107288347515, "grad_norm": 0.0439453125, "learning_rate": 1.8760239342576482e-05, "loss": 4.4066, "step": 9280 }, { "epoch": 0.3242044125357964, "grad_norm": 0.04541015625, "learning_rate": 1.8757175926818578e-05, "loss": 4.4069, "step": 9290 }, { "epoch": 0.3246077521881176, "grad_norm": 0.044921875, "learning_rate": 1.875410898167512e-05, "loss": 4.4321, "step": 9300 }, { "epoch": 0.32501109184043886, "grad_norm": 0.046142578125, "learning_rate": 1.8751038508382176e-05, "loss": 4.4367, "step": 9310 }, { "epoch": 0.32541443149276006, "grad_norm": 0.0458984375, "learning_rate": 1.874796450817723e-05, "loss": 4.4207, "step": 9320 }, { "epoch": 0.32581777114508126, "grad_norm": 0.0439453125, "learning_rate": 1.8744886982299185e-05, "loss": 4.4375, "step": 9330 }, { "epoch": 0.3262211107974025, "grad_norm": 0.046875, "learning_rate": 1.874180593198837e-05, "loss": 4.4227, "step": 9340 }, { "epoch": 0.3266244504497237, "grad_norm": 0.04296875, "learning_rate": 1.873872135848654e-05, "loss": 4.4386, "step": 9350 }, { "epoch": 0.3270277901020449, "grad_norm": 0.046875, "learning_rate": 1.873563326303686e-05, "loss": 4.4088, "step": 9360 }, { "epoch": 0.32743112975436617, "grad_norm": 0.043212890625, "learning_rate": 1.8732541646883923e-05, "loss": 4.4194, "step": 9370 }, { "epoch": 0.32783446940668737, "grad_norm": 0.043701171875, "learning_rate": 1.8729446511273725e-05, "loss": 4.4227, "step": 9380 }, { "epoch": 0.32823780905900857, "grad_norm": 0.044921875, "learning_rate": 1.8726347857453705e-05, "loss": 4.415, "step": 9390 }, { "epoch": 0.3286411487113298, "grad_norm": 0.04345703125, "learning_rate": 1.8723245686672697e-05, "loss": 4.4688, "step": 9400 }, { "epoch": 0.329044488363651, "grad_norm": 0.045654296875, "learning_rate": 1.8720140000180967e-05, "loss": 4.4185, "step": 9410 }, { "epoch": 0.3294478280159723, "grad_norm": 0.041259765625, "learning_rate": 1.8717030799230195e-05, "loss": 4.4393, "step": 9420 }, { "epoch": 0.3298511676682935, "grad_norm": 0.0458984375, "learning_rate": 1.871391808507347e-05, "loss": 4.4148, "step": 9430 }, { "epoch": 0.3302545073206147, "grad_norm": 0.04345703125, "learning_rate": 1.8710801858965307e-05, "loss": 4.4414, "step": 9440 }, { "epoch": 0.33065784697293593, "grad_norm": 0.04443359375, "learning_rate": 1.8707682122161627e-05, "loss": 4.4513, "step": 9450 }, { "epoch": 0.33106118662525713, "grad_norm": 0.04248046875, "learning_rate": 1.8704558875919775e-05, "loss": 4.4272, "step": 9460 }, { "epoch": 0.33146452627757833, "grad_norm": 0.044921875, "learning_rate": 1.8701432121498505e-05, "loss": 4.398, "step": 9470 }, { "epoch": 0.3318678659298996, "grad_norm": 0.045166015625, "learning_rate": 1.8698301860157987e-05, "loss": 4.4247, "step": 9480 }, { "epoch": 0.3322712055822208, "grad_norm": 0.04638671875, "learning_rate": 1.86951680931598e-05, "loss": 4.4441, "step": 9490 }, { "epoch": 0.332674545234542, "grad_norm": 0.04443359375, "learning_rate": 1.8692030821766942e-05, "loss": 4.4122, "step": 9500 }, { "epoch": 0.33307788488686324, "grad_norm": 0.0458984375, "learning_rate": 1.868889004724382e-05, "loss": 4.4305, "step": 9510 }, { "epoch": 0.33348122453918444, "grad_norm": 0.044189453125, "learning_rate": 1.8685745770856255e-05, "loss": 4.4195, "step": 9520 }, { "epoch": 0.33388456419150564, "grad_norm": 0.04296875, "learning_rate": 1.8682597993871474e-05, "loss": 4.4311, "step": 9530 }, { "epoch": 0.3342879038438269, "grad_norm": 0.044921875, "learning_rate": 1.8679446717558125e-05, "loss": 4.4015, "step": 9540 }, { "epoch": 0.3346912434961481, "grad_norm": 0.044677734375, "learning_rate": 1.8676291943186252e-05, "loss": 4.4272, "step": 9550 }, { "epoch": 0.33509458314846935, "grad_norm": 0.045166015625, "learning_rate": 1.8673133672027324e-05, "loss": 4.4009, "step": 9560 }, { "epoch": 0.33549792280079055, "grad_norm": 0.0439453125, "learning_rate": 1.8669971905354208e-05, "loss": 4.4036, "step": 9570 }, { "epoch": 0.33590126245311175, "grad_norm": 0.045166015625, "learning_rate": 1.8666806644441188e-05, "loss": 4.409, "step": 9580 }, { "epoch": 0.336304602105433, "grad_norm": 0.046142578125, "learning_rate": 1.8663637890563942e-05, "loss": 4.4255, "step": 9590 }, { "epoch": 0.3367079417577542, "grad_norm": 0.04296875, "learning_rate": 1.866046564499958e-05, "loss": 4.457, "step": 9600 }, { "epoch": 0.3371112814100754, "grad_norm": 0.046142578125, "learning_rate": 1.86572899090266e-05, "loss": 4.4437, "step": 9610 }, { "epoch": 0.33751462106239666, "grad_norm": 0.045166015625, "learning_rate": 1.8654110683924904e-05, "loss": 4.4459, "step": 9620 }, { "epoch": 0.33791796071471786, "grad_norm": 0.04638671875, "learning_rate": 1.8650927970975818e-05, "loss": 4.4057, "step": 9630 }, { "epoch": 0.33832130036703906, "grad_norm": 0.047607421875, "learning_rate": 1.864774177146206e-05, "loss": 4.4189, "step": 9640 }, { "epoch": 0.3387246400193603, "grad_norm": 0.04638671875, "learning_rate": 1.864455208666776e-05, "loss": 4.417, "step": 9650 }, { "epoch": 0.3391279796716815, "grad_norm": 0.044677734375, "learning_rate": 1.8641358917878443e-05, "loss": 4.4045, "step": 9660 }, { "epoch": 0.33953131932400277, "grad_norm": 0.043701171875, "learning_rate": 1.8638162266381054e-05, "loss": 4.4063, "step": 9670 }, { "epoch": 0.33993465897632397, "grad_norm": 0.04443359375, "learning_rate": 1.8634962133463925e-05, "loss": 4.41, "step": 9680 }, { "epoch": 0.34033799862864517, "grad_norm": 0.044677734375, "learning_rate": 1.8631758520416804e-05, "loss": 4.4269, "step": 9690 }, { "epoch": 0.3407413382809664, "grad_norm": 0.04296875, "learning_rate": 1.862855142853083e-05, "loss": 4.3873, "step": 9700 }, { "epoch": 0.3411446779332876, "grad_norm": 0.0458984375, "learning_rate": 1.8625340859098554e-05, "loss": 4.4483, "step": 9710 }, { "epoch": 0.3415480175856088, "grad_norm": 0.044921875, "learning_rate": 1.862212681341393e-05, "loss": 4.3805, "step": 9720 }, { "epoch": 0.3419513572379301, "grad_norm": 0.04345703125, "learning_rate": 1.8618909292772297e-05, "loss": 4.421, "step": 9730 }, { "epoch": 0.3423546968902513, "grad_norm": 0.04833984375, "learning_rate": 1.861568829847041e-05, "loss": 4.4354, "step": 9740 }, { "epoch": 0.3427580365425725, "grad_norm": 0.045166015625, "learning_rate": 1.8612463831806423e-05, "loss": 4.4286, "step": 9750 }, { "epoch": 0.34316137619489373, "grad_norm": 0.042236328125, "learning_rate": 1.860923589407988e-05, "loss": 4.4136, "step": 9760 }, { "epoch": 0.34356471584721493, "grad_norm": 0.043212890625, "learning_rate": 1.8606004486591736e-05, "loss": 4.4251, "step": 9770 }, { "epoch": 0.3439680554995362, "grad_norm": 0.043701171875, "learning_rate": 1.8602769610644333e-05, "loss": 4.4423, "step": 9780 }, { "epoch": 0.3443713951518574, "grad_norm": 0.0419921875, "learning_rate": 1.859953126754142e-05, "loss": 4.4027, "step": 9790 }, { "epoch": 0.3447747348041786, "grad_norm": 0.04736328125, "learning_rate": 1.8596289458588134e-05, "loss": 4.4343, "step": 9800 }, { "epoch": 0.34517807445649984, "grad_norm": 0.044677734375, "learning_rate": 1.859304418509102e-05, "loss": 4.4054, "step": 9810 }, { "epoch": 0.34558141410882104, "grad_norm": 0.045654296875, "learning_rate": 1.858979544835801e-05, "loss": 4.4279, "step": 9820 }, { "epoch": 0.34598475376114224, "grad_norm": 0.04345703125, "learning_rate": 1.8586543249698435e-05, "loss": 4.376, "step": 9830 }, { "epoch": 0.3463880934134635, "grad_norm": 0.04296875, "learning_rate": 1.858328759042302e-05, "loss": 4.4164, "step": 9840 }, { "epoch": 0.3467914330657847, "grad_norm": 0.04443359375, "learning_rate": 1.8580028471843896e-05, "loss": 4.419, "step": 9850 }, { "epoch": 0.3471947727181059, "grad_norm": 0.044677734375, "learning_rate": 1.857676589527457e-05, "loss": 4.4521, "step": 9860 }, { "epoch": 0.34759811237042715, "grad_norm": 0.043212890625, "learning_rate": 1.857349986202995e-05, "loss": 4.4414, "step": 9870 }, { "epoch": 0.34800145202274835, "grad_norm": 0.0419921875, "learning_rate": 1.857023037342634e-05, "loss": 4.3983, "step": 9880 }, { "epoch": 0.3484047916750696, "grad_norm": 0.043701171875, "learning_rate": 1.8566957430781437e-05, "loss": 4.4159, "step": 9890 }, { "epoch": 0.3488081313273908, "grad_norm": 0.044677734375, "learning_rate": 1.8563681035414327e-05, "loss": 4.3817, "step": 9900 }, { "epoch": 0.349211470979712, "grad_norm": 0.04296875, "learning_rate": 1.8560401188645488e-05, "loss": 4.4153, "step": 9910 }, { "epoch": 0.34961481063203326, "grad_norm": 0.045166015625, "learning_rate": 1.8557117891796787e-05, "loss": 4.397, "step": 9920 }, { "epoch": 0.35001815028435446, "grad_norm": 0.0458984375, "learning_rate": 1.855383114619149e-05, "loss": 4.426, "step": 9930 }, { "epoch": 0.35042148993667566, "grad_norm": 0.046875, "learning_rate": 1.855054095315424e-05, "loss": 4.4263, "step": 9940 }, { "epoch": 0.3508248295889969, "grad_norm": 0.045654296875, "learning_rate": 1.8547247314011085e-05, "loss": 4.4345, "step": 9950 }, { "epoch": 0.3512281692413181, "grad_norm": 0.04638671875, "learning_rate": 1.8543950230089443e-05, "loss": 4.407, "step": 9960 }, { "epoch": 0.3516315088936393, "grad_norm": 0.044189453125, "learning_rate": 1.854064970271814e-05, "loss": 4.4181, "step": 9970 }, { "epoch": 0.35203484854596057, "grad_norm": 0.044677734375, "learning_rate": 1.8537345733227375e-05, "loss": 4.4058, "step": 9980 }, { "epoch": 0.35243818819828177, "grad_norm": 0.04345703125, "learning_rate": 1.853403832294874e-05, "loss": 4.4057, "step": 9990 }, { "epoch": 0.35284152785060297, "grad_norm": 0.045654296875, "learning_rate": 1.8530727473215213e-05, "loss": 4.4067, "step": 10000 }, { "epoch": 0.3532448675029242, "grad_norm": 0.044189453125, "learning_rate": 1.8527413185361164e-05, "loss": 4.389, "step": 10010 }, { "epoch": 0.3536482071552454, "grad_norm": 0.045654296875, "learning_rate": 1.8524095460722332e-05, "loss": 4.4016, "step": 10020 }, { "epoch": 0.3540515468075667, "grad_norm": 0.047119140625, "learning_rate": 1.8520774300635862e-05, "loss": 4.4335, "step": 10030 }, { "epoch": 0.3544548864598879, "grad_norm": 0.043701171875, "learning_rate": 1.851744970644027e-05, "loss": 4.4613, "step": 10040 }, { "epoch": 0.3548582261122091, "grad_norm": 0.044189453125, "learning_rate": 1.8514121679475465e-05, "loss": 4.3907, "step": 10050 }, { "epoch": 0.35526156576453033, "grad_norm": 0.04638671875, "learning_rate": 1.851079022108273e-05, "loss": 4.434, "step": 10060 }, { "epoch": 0.35566490541685153, "grad_norm": 0.044189453125, "learning_rate": 1.850745533260473e-05, "loss": 4.4022, "step": 10070 }, { "epoch": 0.35606824506917273, "grad_norm": 0.04443359375, "learning_rate": 1.850411701538553e-05, "loss": 4.3958, "step": 10080 }, { "epoch": 0.356471584721494, "grad_norm": 0.04541015625, "learning_rate": 1.8500775270770557e-05, "loss": 4.4393, "step": 10090 }, { "epoch": 0.3568749243738152, "grad_norm": 0.04296875, "learning_rate": 1.849743010010663e-05, "loss": 4.4201, "step": 10100 }, { "epoch": 0.3572782640261364, "grad_norm": 0.047607421875, "learning_rate": 1.849408150474194e-05, "loss": 4.4242, "step": 10110 }, { "epoch": 0.35768160367845764, "grad_norm": 0.045166015625, "learning_rate": 1.849072948602607e-05, "loss": 4.4078, "step": 10120 }, { "epoch": 0.35808494333077884, "grad_norm": 0.046142578125, "learning_rate": 1.8487374045309975e-05, "loss": 4.3957, "step": 10130 }, { "epoch": 0.3584882829831001, "grad_norm": 0.043701171875, "learning_rate": 1.8484015183945994e-05, "loss": 4.3943, "step": 10140 }, { "epoch": 0.3588916226354213, "grad_norm": 0.04345703125, "learning_rate": 1.848065290328784e-05, "loss": 4.4397, "step": 10150 }, { "epoch": 0.3592949622877425, "grad_norm": 0.046142578125, "learning_rate": 1.84772872046906e-05, "loss": 4.4062, "step": 10160 }, { "epoch": 0.35969830194006375, "grad_norm": 0.044677734375, "learning_rate": 1.847391808951075e-05, "loss": 4.4089, "step": 10170 }, { "epoch": 0.36010164159238495, "grad_norm": 0.04443359375, "learning_rate": 1.8470545559106135e-05, "loss": 4.4033, "step": 10180 }, { "epoch": 0.36050498124470615, "grad_norm": 0.046142578125, "learning_rate": 1.8467169614835982e-05, "loss": 4.3982, "step": 10190 }, { "epoch": 0.3609083208970274, "grad_norm": 0.0439453125, "learning_rate": 1.8463790258060887e-05, "loss": 4.4425, "step": 10200 }, { "epoch": 0.3613116605493486, "grad_norm": 0.0439453125, "learning_rate": 1.8460407490142828e-05, "loss": 4.4179, "step": 10210 }, { "epoch": 0.3617150002016698, "grad_norm": 0.046875, "learning_rate": 1.8457021312445145e-05, "loss": 4.4436, "step": 10220 }, { "epoch": 0.36211833985399106, "grad_norm": 0.047607421875, "learning_rate": 1.8453631726332576e-05, "loss": 4.4293, "step": 10230 }, { "epoch": 0.36252167950631226, "grad_norm": 0.045166015625, "learning_rate": 1.845023873317121e-05, "loss": 4.4276, "step": 10240 }, { "epoch": 0.3629250191586335, "grad_norm": 0.046142578125, "learning_rate": 1.8446842334328518e-05, "loss": 4.4233, "step": 10250 }, { "epoch": 0.3633283588109547, "grad_norm": 0.04443359375, "learning_rate": 1.8443442531173347e-05, "loss": 4.4046, "step": 10260 }, { "epoch": 0.3637316984632759, "grad_norm": 0.044189453125, "learning_rate": 1.8440039325075907e-05, "loss": 4.3952, "step": 10270 }, { "epoch": 0.36413503811559716, "grad_norm": 0.044189453125, "learning_rate": 1.843663271740779e-05, "loss": 4.4011, "step": 10280 }, { "epoch": 0.36453837776791836, "grad_norm": 0.044921875, "learning_rate": 1.8433222709541948e-05, "loss": 4.3731, "step": 10290 }, { "epoch": 0.36494171742023956, "grad_norm": 0.045166015625, "learning_rate": 1.8429809302852716e-05, "loss": 4.4287, "step": 10300 }, { "epoch": 0.3653450570725608, "grad_norm": 0.046142578125, "learning_rate": 1.842639249871579e-05, "loss": 4.4177, "step": 10310 }, { "epoch": 0.365748396724882, "grad_norm": 0.04248046875, "learning_rate": 1.8422972298508227e-05, "loss": 4.4014, "step": 10320 }, { "epoch": 0.3661517363772032, "grad_norm": 0.044921875, "learning_rate": 1.841954870360848e-05, "loss": 4.381, "step": 10330 }, { "epoch": 0.3665550760295245, "grad_norm": 0.043212890625, "learning_rate": 1.841612171539634e-05, "loss": 4.4207, "step": 10340 }, { "epoch": 0.3669584156818457, "grad_norm": 0.044677734375, "learning_rate": 1.841269133525298e-05, "loss": 4.3941, "step": 10350 }, { "epoch": 0.36736175533416693, "grad_norm": 0.04443359375, "learning_rate": 1.8409257564560947e-05, "loss": 4.4155, "step": 10360 }, { "epoch": 0.36776509498648813, "grad_norm": 0.045166015625, "learning_rate": 1.8405820404704137e-05, "loss": 4.4412, "step": 10370 }, { "epoch": 0.3681684346388093, "grad_norm": 0.04443359375, "learning_rate": 1.8402379857067826e-05, "loss": 4.414, "step": 10380 }, { "epoch": 0.3685717742911306, "grad_norm": 0.044921875, "learning_rate": 1.8398935923038647e-05, "loss": 4.4385, "step": 10390 }, { "epoch": 0.3689751139434518, "grad_norm": 0.044189453125, "learning_rate": 1.8395488604004603e-05, "loss": 4.4359, "step": 10400 }, { "epoch": 0.369378453595773, "grad_norm": 0.046630859375, "learning_rate": 1.839203790135506e-05, "loss": 4.4165, "step": 10410 }, { "epoch": 0.36978179324809424, "grad_norm": 0.04296875, "learning_rate": 1.838858381648075e-05, "loss": 4.41, "step": 10420 }, { "epoch": 0.37018513290041544, "grad_norm": 0.04296875, "learning_rate": 1.8385126350773757e-05, "loss": 4.4293, "step": 10430 }, { "epoch": 0.37058847255273664, "grad_norm": 0.045654296875, "learning_rate": 1.8381665505627547e-05, "loss": 4.461, "step": 10440 }, { "epoch": 0.3709918122050579, "grad_norm": 0.04443359375, "learning_rate": 1.8378201282436928e-05, "loss": 4.4296, "step": 10450 }, { "epoch": 0.3713951518573791, "grad_norm": 0.046630859375, "learning_rate": 1.837473368259808e-05, "loss": 4.4044, "step": 10460 }, { "epoch": 0.37179849150970035, "grad_norm": 0.04443359375, "learning_rate": 1.837126270750855e-05, "loss": 4.4241, "step": 10470 }, { "epoch": 0.37220183116202155, "grad_norm": 0.044921875, "learning_rate": 1.8367788358567225e-05, "loss": 4.3949, "step": 10480 }, { "epoch": 0.37260517081434275, "grad_norm": 0.045654296875, "learning_rate": 1.836431063717437e-05, "loss": 4.4051, "step": 10490 }, { "epoch": 0.373008510466664, "grad_norm": 0.04345703125, "learning_rate": 1.8360829544731603e-05, "loss": 4.424, "step": 10500 }, { "epoch": 0.3734118501189852, "grad_norm": 0.04443359375, "learning_rate": 1.835734508264191e-05, "loss": 4.4295, "step": 10510 }, { "epoch": 0.3738151897713064, "grad_norm": 0.045166015625, "learning_rate": 1.8353857252309607e-05, "loss": 4.3901, "step": 10520 }, { "epoch": 0.37421852942362765, "grad_norm": 0.042724609375, "learning_rate": 1.8350366055140405e-05, "loss": 4.4264, "step": 10530 }, { "epoch": 0.37462186907594885, "grad_norm": 0.04443359375, "learning_rate": 1.8346871492541344e-05, "loss": 4.4228, "step": 10540 }, { "epoch": 0.37502520872827005, "grad_norm": 0.044921875, "learning_rate": 1.8343373565920833e-05, "loss": 4.4288, "step": 10550 }, { "epoch": 0.3754285483805913, "grad_norm": 0.043701171875, "learning_rate": 1.8339872276688635e-05, "loss": 4.3978, "step": 10560 }, { "epoch": 0.3758318880329125, "grad_norm": 0.044189453125, "learning_rate": 1.8336367626255862e-05, "loss": 4.4014, "step": 10570 }, { "epoch": 0.3762352276852337, "grad_norm": 0.04345703125, "learning_rate": 1.8332859616034987e-05, "loss": 4.4109, "step": 10580 }, { "epoch": 0.37663856733755496, "grad_norm": 0.043701171875, "learning_rate": 1.832934824743984e-05, "loss": 4.4379, "step": 10590 }, { "epoch": 0.37704190698987616, "grad_norm": 0.044921875, "learning_rate": 1.8325833521885598e-05, "loss": 4.4457, "step": 10600 }, { "epoch": 0.3774452466421974, "grad_norm": 0.0419921875, "learning_rate": 1.8322315440788797e-05, "loss": 4.4255, "step": 10610 }, { "epoch": 0.3778485862945186, "grad_norm": 0.042724609375, "learning_rate": 1.8318794005567312e-05, "loss": 4.3931, "step": 10620 }, { "epoch": 0.3782519259468398, "grad_norm": 0.047119140625, "learning_rate": 1.8315269217640385e-05, "loss": 4.4185, "step": 10630 }, { "epoch": 0.3786552655991611, "grad_norm": 0.04296875, "learning_rate": 1.8311741078428605e-05, "loss": 4.4123, "step": 10640 }, { "epoch": 0.3790586052514823, "grad_norm": 0.044677734375, "learning_rate": 1.8308209589353907e-05, "loss": 4.3817, "step": 10650 }, { "epoch": 0.37946194490380347, "grad_norm": 0.044921875, "learning_rate": 1.8304674751839583e-05, "loss": 4.4471, "step": 10660 }, { "epoch": 0.3798652845561247, "grad_norm": 0.043212890625, "learning_rate": 1.8301136567310266e-05, "loss": 4.4312, "step": 10670 }, { "epoch": 0.3802686242084459, "grad_norm": 0.04345703125, "learning_rate": 1.8297595037191945e-05, "loss": 4.4396, "step": 10680 }, { "epoch": 0.3806719638607671, "grad_norm": 0.04541015625, "learning_rate": 1.829405016291196e-05, "loss": 4.4358, "step": 10690 }, { "epoch": 0.3810753035130884, "grad_norm": 0.043212890625, "learning_rate": 1.829050194589899e-05, "loss": 4.42, "step": 10700 }, { "epoch": 0.3814786431654096, "grad_norm": 0.0458984375, "learning_rate": 1.8286950387583062e-05, "loss": 4.4468, "step": 10710 }, { "epoch": 0.38188198281773084, "grad_norm": 0.04345703125, "learning_rate": 1.8283395489395557e-05, "loss": 4.4395, "step": 10720 }, { "epoch": 0.38228532247005204, "grad_norm": 0.043701171875, "learning_rate": 1.8279837252769196e-05, "loss": 4.3984, "step": 10730 }, { "epoch": 0.38268866212237324, "grad_norm": 0.044921875, "learning_rate": 1.827627567913805e-05, "loss": 4.4079, "step": 10740 }, { "epoch": 0.3830920017746945, "grad_norm": 0.0478515625, "learning_rate": 1.827271076993753e-05, "loss": 4.4547, "step": 10750 }, { "epoch": 0.3834953414270157, "grad_norm": 0.0419921875, "learning_rate": 1.8269142526604394e-05, "loss": 4.3955, "step": 10760 }, { "epoch": 0.3838986810793369, "grad_norm": 0.0439453125, "learning_rate": 1.826557095057674e-05, "loss": 4.4367, "step": 10770 }, { "epoch": 0.38430202073165814, "grad_norm": 0.046875, "learning_rate": 1.826199604329402e-05, "loss": 4.4129, "step": 10780 }, { "epoch": 0.38470536038397934, "grad_norm": 0.047119140625, "learning_rate": 1.8258417806197012e-05, "loss": 4.4268, "step": 10790 }, { "epoch": 0.38510870003630054, "grad_norm": 0.04345703125, "learning_rate": 1.8254836240727857e-05, "loss": 4.4434, "step": 10800 }, { "epoch": 0.3855120396886218, "grad_norm": 0.04443359375, "learning_rate": 1.8251251348330013e-05, "loss": 4.3948, "step": 10810 }, { "epoch": 0.385915379340943, "grad_norm": 0.045654296875, "learning_rate": 1.82476631304483e-05, "loss": 4.4062, "step": 10820 }, { "epoch": 0.38631871899326425, "grad_norm": 0.043701171875, "learning_rate": 1.8244071588528864e-05, "loss": 4.426, "step": 10830 }, { "epoch": 0.38672205864558545, "grad_norm": 0.04296875, "learning_rate": 1.82404767240192e-05, "loss": 4.399, "step": 10840 }, { "epoch": 0.38712539829790665, "grad_norm": 0.04296875, "learning_rate": 1.8236878538368135e-05, "loss": 4.4266, "step": 10850 }, { "epoch": 0.3875287379502279, "grad_norm": 0.045166015625, "learning_rate": 1.8233277033025846e-05, "loss": 4.4276, "step": 10860 }, { "epoch": 0.3879320776025491, "grad_norm": 0.04541015625, "learning_rate": 1.822967220944383e-05, "loss": 4.3799, "step": 10870 }, { "epoch": 0.3883354172548703, "grad_norm": 0.044677734375, "learning_rate": 1.8226064069074937e-05, "loss": 4.4499, "step": 10880 }, { "epoch": 0.38873875690719156, "grad_norm": 0.044189453125, "learning_rate": 1.822245261337334e-05, "loss": 4.3809, "step": 10890 }, { "epoch": 0.38914209655951276, "grad_norm": 0.04296875, "learning_rate": 1.821883784379457e-05, "loss": 4.4068, "step": 10900 }, { "epoch": 0.38954543621183396, "grad_norm": 0.04443359375, "learning_rate": 1.821521976179547e-05, "loss": 4.4326, "step": 10910 }, { "epoch": 0.3899487758641552, "grad_norm": 0.043701171875, "learning_rate": 1.821159836883423e-05, "loss": 4.4732, "step": 10920 }, { "epoch": 0.3903521155164764, "grad_norm": 0.04541015625, "learning_rate": 1.820797366637037e-05, "loss": 4.3733, "step": 10930 }, { "epoch": 0.39075545516879767, "grad_norm": 0.04443359375, "learning_rate": 1.820434565586475e-05, "loss": 4.404, "step": 10940 }, { "epoch": 0.39115879482111887, "grad_norm": 0.0419921875, "learning_rate": 1.820071433877956e-05, "loss": 4.4209, "step": 10950 }, { "epoch": 0.39156213447344007, "grad_norm": 0.0419921875, "learning_rate": 1.8197079716578315e-05, "loss": 4.4158, "step": 10960 }, { "epoch": 0.3919654741257613, "grad_norm": 0.047119140625, "learning_rate": 1.8193441790725873e-05, "loss": 4.412, "step": 10970 }, { "epoch": 0.3923688137780825, "grad_norm": 0.04443359375, "learning_rate": 1.818980056268842e-05, "loss": 4.4142, "step": 10980 }, { "epoch": 0.3927721534304037, "grad_norm": 0.04296875, "learning_rate": 1.818615603393347e-05, "loss": 4.4214, "step": 10990 }, { "epoch": 0.393175493082725, "grad_norm": 0.0419921875, "learning_rate": 1.818250820592987e-05, "loss": 4.3848, "step": 11000 }, { "epoch": 0.3935788327350462, "grad_norm": 0.04150390625, "learning_rate": 1.8178857080147798e-05, "loss": 4.4327, "step": 11010 }, { "epoch": 0.3939821723873674, "grad_norm": 0.043701171875, "learning_rate": 1.8175202658058762e-05, "loss": 4.4147, "step": 11020 }, { "epoch": 0.39438551203968863, "grad_norm": 0.0458984375, "learning_rate": 1.8171544941135584e-05, "loss": 4.4387, "step": 11030 }, { "epoch": 0.39478885169200983, "grad_norm": 0.044921875, "learning_rate": 1.8167883930852444e-05, "loss": 4.3859, "step": 11040 }, { "epoch": 0.39519219134433103, "grad_norm": 0.044189453125, "learning_rate": 1.816421962868481e-05, "loss": 4.419, "step": 11050 }, { "epoch": 0.3955955309966523, "grad_norm": 0.0458984375, "learning_rate": 1.8160552036109518e-05, "loss": 4.4185, "step": 11060 }, { "epoch": 0.3959988706489735, "grad_norm": 0.0419921875, "learning_rate": 1.8156881154604696e-05, "loss": 4.4584, "step": 11070 }, { "epoch": 0.39640221030129474, "grad_norm": 0.044921875, "learning_rate": 1.8153206985649815e-05, "loss": 4.4008, "step": 11080 }, { "epoch": 0.39680554995361594, "grad_norm": 0.04345703125, "learning_rate": 1.814952953072567e-05, "loss": 4.4109, "step": 11090 }, { "epoch": 0.39720888960593714, "grad_norm": 0.048095703125, "learning_rate": 1.814584879131437e-05, "loss": 4.436, "step": 11100 }, { "epoch": 0.3976122292582584, "grad_norm": 0.0458984375, "learning_rate": 1.8142164768899368e-05, "loss": 4.4029, "step": 11110 }, { "epoch": 0.3980155689105796, "grad_norm": 0.044921875, "learning_rate": 1.8138477464965417e-05, "loss": 4.4199, "step": 11120 }, { "epoch": 0.3984189085629008, "grad_norm": 0.046630859375, "learning_rate": 1.813478688099861e-05, "loss": 4.3939, "step": 11130 }, { "epoch": 0.39882224821522205, "grad_norm": 0.043701171875, "learning_rate": 1.813109301848635e-05, "loss": 4.42, "step": 11140 }, { "epoch": 0.39922558786754325, "grad_norm": 0.043701171875, "learning_rate": 1.8127395878917366e-05, "loss": 4.398, "step": 11150 }, { "epoch": 0.39962892751986445, "grad_norm": 0.04443359375, "learning_rate": 1.8123695463781713e-05, "loss": 4.4344, "step": 11160 }, { "epoch": 0.4000322671721857, "grad_norm": 0.044677734375, "learning_rate": 1.8119991774570757e-05, "loss": 4.3977, "step": 11170 }, { "epoch": 0.4004356068245069, "grad_norm": 0.047119140625, "learning_rate": 1.811628481277719e-05, "loss": 4.4049, "step": 11180 }, { "epoch": 0.40083894647682816, "grad_norm": 0.043701171875, "learning_rate": 1.8112574579895018e-05, "loss": 4.4336, "step": 11190 }, { "epoch": 0.40124228612914936, "grad_norm": 0.045654296875, "learning_rate": 1.8108861077419572e-05, "loss": 4.4069, "step": 11200 }, { "epoch": 0.40164562578147056, "grad_norm": 0.04296875, "learning_rate": 1.8105144306847495e-05, "loss": 4.4155, "step": 11210 }, { "epoch": 0.4020489654337918, "grad_norm": 0.046630859375, "learning_rate": 1.8101424269676752e-05, "loss": 4.4059, "step": 11220 }, { "epoch": 0.402452305086113, "grad_norm": 0.04296875, "learning_rate": 1.8097700967406613e-05, "loss": 4.397, "step": 11230 }, { "epoch": 0.4028556447384342, "grad_norm": 0.04541015625, "learning_rate": 1.809397440153768e-05, "loss": 4.4419, "step": 11240 }, { "epoch": 0.40325898439075547, "grad_norm": 0.04638671875, "learning_rate": 1.809024457357186e-05, "loss": 4.448, "step": 11250 }, { "epoch": 0.40366232404307667, "grad_norm": 0.044677734375, "learning_rate": 1.8086511485012377e-05, "loss": 4.4172, "step": 11260 }, { "epoch": 0.40406566369539787, "grad_norm": 0.043212890625, "learning_rate": 1.808277513736377e-05, "loss": 4.4013, "step": 11270 }, { "epoch": 0.4044690033477191, "grad_norm": 0.0458984375, "learning_rate": 1.8079035532131895e-05, "loss": 4.4174, "step": 11280 }, { "epoch": 0.4048723430000403, "grad_norm": 0.045166015625, "learning_rate": 1.8075292670823912e-05, "loss": 4.3907, "step": 11290 }, { "epoch": 0.4052756826523616, "grad_norm": 0.044189453125, "learning_rate": 1.80715465549483e-05, "loss": 4.4293, "step": 11300 }, { "epoch": 0.4056790223046828, "grad_norm": 0.0439453125, "learning_rate": 1.8067797186014844e-05, "loss": 4.4342, "step": 11310 }, { "epoch": 0.406082361957004, "grad_norm": 0.04248046875, "learning_rate": 1.806404456553465e-05, "loss": 4.4011, "step": 11320 }, { "epoch": 0.40648570160932523, "grad_norm": 0.04345703125, "learning_rate": 1.8060288695020126e-05, "loss": 4.4197, "step": 11330 }, { "epoch": 0.40688904126164643, "grad_norm": 0.044921875, "learning_rate": 1.8056529575984994e-05, "loss": 4.4506, "step": 11340 }, { "epoch": 0.40729238091396763, "grad_norm": 0.0458984375, "learning_rate": 1.8052767209944283e-05, "loss": 4.4184, "step": 11350 }, { "epoch": 0.4076957205662889, "grad_norm": 0.044921875, "learning_rate": 1.8049001598414326e-05, "loss": 4.3878, "step": 11360 }, { "epoch": 0.4080990602186101, "grad_norm": 0.045166015625, "learning_rate": 1.8045232742912776e-05, "loss": 4.4138, "step": 11370 }, { "epoch": 0.4085023998709313, "grad_norm": 0.045654296875, "learning_rate": 1.8041460644958585e-05, "loss": 4.3994, "step": 11380 }, { "epoch": 0.40890573952325254, "grad_norm": 0.04638671875, "learning_rate": 1.8037685306072012e-05, "loss": 4.4051, "step": 11390 }, { "epoch": 0.40930907917557374, "grad_norm": 0.046630859375, "learning_rate": 1.8033906727774625e-05, "loss": 4.4709, "step": 11400 }, { "epoch": 0.409712418827895, "grad_norm": 0.042236328125, "learning_rate": 1.8030124911589293e-05, "loss": 4.4201, "step": 11410 }, { "epoch": 0.4101157584802162, "grad_norm": 0.04541015625, "learning_rate": 1.8026339859040195e-05, "loss": 4.4581, "step": 11420 }, { "epoch": 0.4105190981325374, "grad_norm": 0.045654296875, "learning_rate": 1.8022551571652814e-05, "loss": 4.4573, "step": 11430 }, { "epoch": 0.41092243778485865, "grad_norm": 0.04541015625, "learning_rate": 1.8018760050953936e-05, "loss": 4.384, "step": 11440 }, { "epoch": 0.41132577743717985, "grad_norm": 0.047119140625, "learning_rate": 1.8014965298471646e-05, "loss": 4.4205, "step": 11450 }, { "epoch": 0.41172911708950105, "grad_norm": 0.044677734375, "learning_rate": 1.8011167315735334e-05, "loss": 4.413, "step": 11460 }, { "epoch": 0.4121324567418223, "grad_norm": 0.044921875, "learning_rate": 1.80073661042757e-05, "loss": 4.417, "step": 11470 }, { "epoch": 0.4125357963941435, "grad_norm": 0.044189453125, "learning_rate": 1.800356166562473e-05, "loss": 4.4064, "step": 11480 }, { "epoch": 0.4129391360464647, "grad_norm": 0.045654296875, "learning_rate": 1.799975400131572e-05, "loss": 4.425, "step": 11490 }, { "epoch": 0.41334247569878596, "grad_norm": 0.04248046875, "learning_rate": 1.799594311288327e-05, "loss": 4.4076, "step": 11500 }, { "epoch": 0.41374581535110716, "grad_norm": 0.0458984375, "learning_rate": 1.799212900186327e-05, "loss": 4.4149, "step": 11510 }, { "epoch": 0.41414915500342836, "grad_norm": 0.0458984375, "learning_rate": 1.798831166979291e-05, "loss": 4.4386, "step": 11520 }, { "epoch": 0.4145524946557496, "grad_norm": 0.0439453125, "learning_rate": 1.7984491118210687e-05, "loss": 4.4096, "step": 11530 }, { "epoch": 0.4149558343080708, "grad_norm": 0.046630859375, "learning_rate": 1.7980667348656388e-05, "loss": 4.4346, "step": 11540 }, { "epoch": 0.41535917396039207, "grad_norm": 0.044677734375, "learning_rate": 1.797684036267109e-05, "loss": 4.4299, "step": 11550 }, { "epoch": 0.41576251361271327, "grad_norm": 0.046630859375, "learning_rate": 1.797301016179719e-05, "loss": 4.4309, "step": 11560 }, { "epoch": 0.41616585326503447, "grad_norm": 0.04296875, "learning_rate": 1.7969176747578355e-05, "loss": 4.4038, "step": 11570 }, { "epoch": 0.4165691929173557, "grad_norm": 0.047607421875, "learning_rate": 1.7965340121559563e-05, "loss": 4.3912, "step": 11580 }, { "epoch": 0.4169725325696769, "grad_norm": 0.04345703125, "learning_rate": 1.7961500285287074e-05, "loss": 4.4288, "step": 11590 }, { "epoch": 0.4173758722219981, "grad_norm": 0.044921875, "learning_rate": 1.7957657240308456e-05, "loss": 4.4276, "step": 11600 }, { "epoch": 0.4177792118743194, "grad_norm": 0.0498046875, "learning_rate": 1.795381098817256e-05, "loss": 4.4193, "step": 11610 }, { "epoch": 0.4181825515266406, "grad_norm": 0.045166015625, "learning_rate": 1.7949961530429534e-05, "loss": 4.3987, "step": 11620 }, { "epoch": 0.4185858911789618, "grad_norm": 0.044677734375, "learning_rate": 1.7946108868630813e-05, "loss": 4.3984, "step": 11630 }, { "epoch": 0.41898923083128303, "grad_norm": 0.04443359375, "learning_rate": 1.794225300432913e-05, "loss": 4.4274, "step": 11640 }, { "epoch": 0.41939257048360423, "grad_norm": 0.045654296875, "learning_rate": 1.7938393939078508e-05, "loss": 4.4065, "step": 11650 }, { "epoch": 0.4197959101359255, "grad_norm": 0.046142578125, "learning_rate": 1.793453167443425e-05, "loss": 4.4177, "step": 11660 }, { "epoch": 0.4201992497882467, "grad_norm": 0.044677734375, "learning_rate": 1.7930666211952964e-05, "loss": 4.414, "step": 11670 }, { "epoch": 0.4206025894405679, "grad_norm": 0.04443359375, "learning_rate": 1.7926797553192533e-05, "loss": 4.4375, "step": 11680 }, { "epoch": 0.42100592909288914, "grad_norm": 0.046875, "learning_rate": 1.7922925699712138e-05, "loss": 4.4306, "step": 11690 }, { "epoch": 0.42140926874521034, "grad_norm": 0.0439453125, "learning_rate": 1.7919050653072244e-05, "loss": 4.4014, "step": 11700 }, { "epoch": 0.42181260839753154, "grad_norm": 0.044921875, "learning_rate": 1.7915172414834597e-05, "loss": 4.4227, "step": 11710 }, { "epoch": 0.4222159480498528, "grad_norm": 0.043701171875, "learning_rate": 1.7911290986562244e-05, "loss": 4.4421, "step": 11720 }, { "epoch": 0.422619287702174, "grad_norm": 0.0439453125, "learning_rate": 1.79074063698195e-05, "loss": 4.4286, "step": 11730 }, { "epoch": 0.4230226273544952, "grad_norm": 0.0439453125, "learning_rate": 1.790351856617198e-05, "loss": 4.415, "step": 11740 }, { "epoch": 0.42342596700681645, "grad_norm": 0.045654296875, "learning_rate": 1.7899627577186574e-05, "loss": 4.3777, "step": 11750 }, { "epoch": 0.42382930665913765, "grad_norm": 0.044921875, "learning_rate": 1.789573340443146e-05, "loss": 4.4006, "step": 11760 }, { "epoch": 0.4242326463114589, "grad_norm": 0.044189453125, "learning_rate": 1.789183604947609e-05, "loss": 4.4081, "step": 11770 }, { "epoch": 0.4246359859637801, "grad_norm": 0.0458984375, "learning_rate": 1.788793551389122e-05, "loss": 4.4071, "step": 11780 }, { "epoch": 0.4250393256161013, "grad_norm": 0.044921875, "learning_rate": 1.788403179924887e-05, "loss": 4.4238, "step": 11790 }, { "epoch": 0.42544266526842256, "grad_norm": 0.044677734375, "learning_rate": 1.788012490712234e-05, "loss": 4.4016, "step": 11800 }, { "epoch": 0.42584600492074376, "grad_norm": 0.045654296875, "learning_rate": 1.7876214839086218e-05, "loss": 4.3579, "step": 11810 }, { "epoch": 0.42624934457306496, "grad_norm": 0.042724609375, "learning_rate": 1.7872301596716376e-05, "loss": 4.4135, "step": 11820 }, { "epoch": 0.4266526842253862, "grad_norm": 0.0458984375, "learning_rate": 1.7868385181589954e-05, "loss": 4.4274, "step": 11830 }, { "epoch": 0.4270560238777074, "grad_norm": 0.04296875, "learning_rate": 1.7864465595285376e-05, "loss": 4.3834, "step": 11840 }, { "epoch": 0.4274593635300286, "grad_norm": 0.044677734375, "learning_rate": 1.786054283938235e-05, "loss": 4.3979, "step": 11850 }, { "epoch": 0.42786270318234987, "grad_norm": 0.0439453125, "learning_rate": 1.7856616915461847e-05, "loss": 4.4137, "step": 11860 }, { "epoch": 0.42826604283467107, "grad_norm": 0.0439453125, "learning_rate": 1.785268782510613e-05, "loss": 4.4107, "step": 11870 }, { "epoch": 0.4286693824869923, "grad_norm": 0.044677734375, "learning_rate": 1.784875556989873e-05, "loss": 4.4311, "step": 11880 }, { "epoch": 0.4290727221393135, "grad_norm": 0.043701171875, "learning_rate": 1.7844820151424456e-05, "loss": 4.4117, "step": 11890 }, { "epoch": 0.4294760617916347, "grad_norm": 0.043701171875, "learning_rate": 1.7840881571269393e-05, "loss": 4.3732, "step": 11900 }, { "epoch": 0.429879401443956, "grad_norm": 0.04345703125, "learning_rate": 1.7836939831020893e-05, "loss": 4.3994, "step": 11910 }, { "epoch": 0.4302827410962772, "grad_norm": 0.0458984375, "learning_rate": 1.7832994932267593e-05, "loss": 4.4274, "step": 11920 }, { "epoch": 0.4306860807485984, "grad_norm": 0.043701171875, "learning_rate": 1.7829046876599392e-05, "loss": 4.4171, "step": 11930 }, { "epoch": 0.43108942040091963, "grad_norm": 0.045654296875, "learning_rate": 1.782509566560747e-05, "loss": 4.4305, "step": 11940 }, { "epoch": 0.43149276005324083, "grad_norm": 0.044677734375, "learning_rate": 1.782114130088427e-05, "loss": 4.4258, "step": 11950 }, { "epoch": 0.43189609970556203, "grad_norm": 0.04736328125, "learning_rate": 1.7817183784023514e-05, "loss": 4.3952, "step": 11960 }, { "epoch": 0.4322994393578833, "grad_norm": 0.043212890625, "learning_rate": 1.7813223116620195e-05, "loss": 4.4233, "step": 11970 }, { "epoch": 0.4327027790102045, "grad_norm": 0.0439453125, "learning_rate": 1.7809259300270568e-05, "loss": 4.4212, "step": 11980 }, { "epoch": 0.4331061186625257, "grad_norm": 0.04345703125, "learning_rate": 1.780529233657216e-05, "loss": 4.4581, "step": 11990 }, { "epoch": 0.43350945831484694, "grad_norm": 0.04296875, "learning_rate": 1.7801322227123772e-05, "loss": 4.3928, "step": 12000 }, { "epoch": 0.43391279796716814, "grad_norm": 0.043212890625, "learning_rate": 1.7797348973525464e-05, "loss": 4.4179, "step": 12010 }, { "epoch": 0.4343161376194894, "grad_norm": 0.048828125, "learning_rate": 1.7793372577378573e-05, "loss": 4.4007, "step": 12020 }, { "epoch": 0.4347194772718106, "grad_norm": 0.045166015625, "learning_rate": 1.7789393040285694e-05, "loss": 4.457, "step": 12030 }, { "epoch": 0.4351228169241318, "grad_norm": 0.0439453125, "learning_rate": 1.7785410363850692e-05, "loss": 4.4354, "step": 12040 }, { "epoch": 0.43552615657645305, "grad_norm": 0.0439453125, "learning_rate": 1.778142454967869e-05, "loss": 4.4375, "step": 12050 }, { "epoch": 0.43592949622877425, "grad_norm": 0.04443359375, "learning_rate": 1.7777435599376095e-05, "loss": 4.4339, "step": 12060 }, { "epoch": 0.43633283588109545, "grad_norm": 0.046875, "learning_rate": 1.7773443514550556e-05, "loss": 4.4132, "step": 12070 }, { "epoch": 0.4367361755334167, "grad_norm": 0.0458984375, "learning_rate": 1.7769448296810995e-05, "loss": 4.4482, "step": 12080 }, { "epoch": 0.4371395151857379, "grad_norm": 0.042236328125, "learning_rate": 1.77654499477676e-05, "loss": 4.418, "step": 12090 }, { "epoch": 0.4375428548380591, "grad_norm": 0.045654296875, "learning_rate": 1.776144846903181e-05, "loss": 4.4149, "step": 12100 }, { "epoch": 0.43794619449038036, "grad_norm": 0.04296875, "learning_rate": 1.775744386221634e-05, "loss": 4.4376, "step": 12110 }, { "epoch": 0.43834953414270156, "grad_norm": 0.043212890625, "learning_rate": 1.7753436128935152e-05, "loss": 4.4041, "step": 12120 }, { "epoch": 0.4387528737950228, "grad_norm": 0.044677734375, "learning_rate": 1.774942527080348e-05, "loss": 4.4274, "step": 12130 }, { "epoch": 0.439156213447344, "grad_norm": 0.04541015625, "learning_rate": 1.7745411289437804e-05, "loss": 4.3549, "step": 12140 }, { "epoch": 0.4395595530996652, "grad_norm": 0.044921875, "learning_rate": 1.7741394186455875e-05, "loss": 4.4199, "step": 12150 }, { "epoch": 0.43996289275198647, "grad_norm": 0.044921875, "learning_rate": 1.7737373963476694e-05, "loss": 4.4152, "step": 12160 }, { "epoch": 0.44036623240430767, "grad_norm": 0.044677734375, "learning_rate": 1.7733350622120528e-05, "loss": 4.4243, "step": 12170 }, { "epoch": 0.44076957205662887, "grad_norm": 0.042724609375, "learning_rate": 1.772932416400889e-05, "loss": 4.421, "step": 12180 }, { "epoch": 0.4411729117089501, "grad_norm": 0.044189453125, "learning_rate": 1.7725294590764557e-05, "loss": 4.4259, "step": 12190 }, { "epoch": 0.4415762513612713, "grad_norm": 0.044921875, "learning_rate": 1.772126190401156e-05, "loss": 4.4352, "step": 12200 }, { "epoch": 0.4419795910135925, "grad_norm": 0.0419921875, "learning_rate": 1.7717226105375186e-05, "loss": 4.473, "step": 12210 }, { "epoch": 0.4423829306659138, "grad_norm": 0.042236328125, "learning_rate": 1.7713187196481967e-05, "loss": 4.3949, "step": 12220 }, { "epoch": 0.442786270318235, "grad_norm": 0.045166015625, "learning_rate": 1.77091451789597e-05, "loss": 4.4273, "step": 12230 }, { "epoch": 0.44318960997055623, "grad_norm": 0.048095703125, "learning_rate": 1.7705100054437436e-05, "loss": 4.4493, "step": 12240 }, { "epoch": 0.44359294962287743, "grad_norm": 0.04345703125, "learning_rate": 1.7701051824545464e-05, "loss": 4.4284, "step": 12250 }, { "epoch": 0.44399628927519863, "grad_norm": 0.044677734375, "learning_rate": 1.7697000490915336e-05, "loss": 4.4551, "step": 12260 }, { "epoch": 0.4443996289275199, "grad_norm": 0.04443359375, "learning_rate": 1.769294605517985e-05, "loss": 4.4046, "step": 12270 }, { "epoch": 0.4448029685798411, "grad_norm": 0.04150390625, "learning_rate": 1.768888851897306e-05, "loss": 4.4372, "step": 12280 }, { "epoch": 0.4452063082321623, "grad_norm": 0.042236328125, "learning_rate": 1.768482788393027e-05, "loss": 4.4184, "step": 12290 }, { "epoch": 0.44560964788448354, "grad_norm": 0.046142578125, "learning_rate": 1.768076415168802e-05, "loss": 4.4133, "step": 12300 }, { "epoch": 0.44601298753680474, "grad_norm": 0.04443359375, "learning_rate": 1.767669732388411e-05, "loss": 4.4246, "step": 12310 }, { "epoch": 0.44641632718912594, "grad_norm": 0.043701171875, "learning_rate": 1.7672627402157587e-05, "loss": 4.4047, "step": 12320 }, { "epoch": 0.4468196668414472, "grad_norm": 0.0439453125, "learning_rate": 1.7668554388148745e-05, "loss": 4.4398, "step": 12330 }, { "epoch": 0.4472230064937684, "grad_norm": 0.046142578125, "learning_rate": 1.7664478283499115e-05, "loss": 4.4028, "step": 12340 }, { "epoch": 0.44762634614608965, "grad_norm": 0.0439453125, "learning_rate": 1.7660399089851485e-05, "loss": 4.4334, "step": 12350 }, { "epoch": 0.44802968579841085, "grad_norm": 0.043212890625, "learning_rate": 1.7656316808849884e-05, "loss": 4.3991, "step": 12360 }, { "epoch": 0.44843302545073205, "grad_norm": 0.044921875, "learning_rate": 1.765223144213958e-05, "loss": 4.3845, "step": 12370 }, { "epoch": 0.4488363651030533, "grad_norm": 0.0439453125, "learning_rate": 1.7648142991367097e-05, "loss": 4.4327, "step": 12380 }, { "epoch": 0.4492397047553745, "grad_norm": 0.045654296875, "learning_rate": 1.7644051458180188e-05, "loss": 4.4033, "step": 12390 }, { "epoch": 0.4496430444076957, "grad_norm": 0.043701171875, "learning_rate": 1.7639956844227855e-05, "loss": 4.428, "step": 12400 }, { "epoch": 0.45004638406001696, "grad_norm": 0.04345703125, "learning_rate": 1.7635859151160347e-05, "loss": 4.399, "step": 12410 }, { "epoch": 0.45044972371233816, "grad_norm": 0.04541015625, "learning_rate": 1.7631758380629144e-05, "loss": 4.4122, "step": 12420 }, { "epoch": 0.45085306336465936, "grad_norm": 0.046142578125, "learning_rate": 1.762765453428697e-05, "loss": 4.4175, "step": 12430 }, { "epoch": 0.4512564030169806, "grad_norm": 0.04541015625, "learning_rate": 1.7623547613787795e-05, "loss": 4.4103, "step": 12440 }, { "epoch": 0.4516597426693018, "grad_norm": 0.0439453125, "learning_rate": 1.7619437620786812e-05, "loss": 4.4337, "step": 12450 }, { "epoch": 0.45206308232162307, "grad_norm": 0.045166015625, "learning_rate": 1.7615324556940473e-05, "loss": 4.3854, "step": 12460 }, { "epoch": 0.45246642197394427, "grad_norm": 0.044921875, "learning_rate": 1.761120842390645e-05, "loss": 4.3982, "step": 12470 }, { "epoch": 0.45286976162626547, "grad_norm": 0.046142578125, "learning_rate": 1.760708922334366e-05, "loss": 4.416, "step": 12480 }, { "epoch": 0.4532731012785867, "grad_norm": 0.043701171875, "learning_rate": 1.760296695691226e-05, "loss": 4.3945, "step": 12490 }, { "epoch": 0.4536764409309079, "grad_norm": 0.042236328125, "learning_rate": 1.7598841626273637e-05, "loss": 4.4108, "step": 12500 }, { "epoch": 0.4540797805832291, "grad_norm": 0.048095703125, "learning_rate": 1.759471323309041e-05, "loss": 4.4505, "step": 12510 }, { "epoch": 0.4544831202355504, "grad_norm": 0.044677734375, "learning_rate": 1.759058177902644e-05, "loss": 4.417, "step": 12520 }, { "epoch": 0.4548864598878716, "grad_norm": 0.044921875, "learning_rate": 1.7586447265746813e-05, "loss": 4.442, "step": 12530 }, { "epoch": 0.4552897995401928, "grad_norm": 0.044677734375, "learning_rate": 1.758230969491786e-05, "loss": 4.4262, "step": 12540 }, { "epoch": 0.45569313919251403, "grad_norm": 0.042236328125, "learning_rate": 1.757816906820713e-05, "loss": 4.4174, "step": 12550 }, { "epoch": 0.45609647884483523, "grad_norm": 0.044677734375, "learning_rate": 1.7574025387283416e-05, "loss": 4.4161, "step": 12560 }, { "epoch": 0.45649981849715643, "grad_norm": 0.043701171875, "learning_rate": 1.7569878653816735e-05, "loss": 4.41, "step": 12570 }, { "epoch": 0.4569031581494777, "grad_norm": 0.045654296875, "learning_rate": 1.7565728869478338e-05, "loss": 4.4304, "step": 12580 }, { "epoch": 0.4573064978017989, "grad_norm": 0.0439453125, "learning_rate": 1.7561576035940698e-05, "loss": 4.3971, "step": 12590 }, { "epoch": 0.45770983745412014, "grad_norm": 0.044921875, "learning_rate": 1.755742015487753e-05, "loss": 4.4415, "step": 12600 }, { "epoch": 0.45811317710644134, "grad_norm": 0.045166015625, "learning_rate": 1.7553261227963763e-05, "loss": 4.3919, "step": 12610 }, { "epoch": 0.45851651675876254, "grad_norm": 0.0439453125, "learning_rate": 1.754909925687556e-05, "loss": 4.4039, "step": 12620 }, { "epoch": 0.4589198564110838, "grad_norm": 0.043701171875, "learning_rate": 1.754493424329032e-05, "loss": 4.4289, "step": 12630 }, { "epoch": 0.459323196063405, "grad_norm": 0.0458984375, "learning_rate": 1.754076618888665e-05, "loss": 4.3887, "step": 12640 }, { "epoch": 0.4597265357157262, "grad_norm": 0.0439453125, "learning_rate": 1.7536595095344397e-05, "loss": 4.4246, "step": 12650 }, { "epoch": 0.46012987536804745, "grad_norm": 0.0439453125, "learning_rate": 1.7532420964344625e-05, "loss": 4.4414, "step": 12660 }, { "epoch": 0.46053321502036865, "grad_norm": 0.0458984375, "learning_rate": 1.752824379756963e-05, "loss": 4.4279, "step": 12670 }, { "epoch": 0.46093655467268985, "grad_norm": 0.044189453125, "learning_rate": 1.7524063596702916e-05, "loss": 4.4168, "step": 12680 }, { "epoch": 0.4613398943250111, "grad_norm": 0.046142578125, "learning_rate": 1.751988036342923e-05, "loss": 4.4306, "step": 12690 }, { "epoch": 0.4617432339773323, "grad_norm": 0.044921875, "learning_rate": 1.7515694099434532e-05, "loss": 4.423, "step": 12700 }, { "epoch": 0.46214657362965356, "grad_norm": 0.041259765625, "learning_rate": 1.751150480640599e-05, "loss": 4.4331, "step": 12710 }, { "epoch": 0.46254991328197476, "grad_norm": 0.048828125, "learning_rate": 1.7507312486032022e-05, "loss": 4.4622, "step": 12720 }, { "epoch": 0.46295325293429596, "grad_norm": 0.043701171875, "learning_rate": 1.750311714000224e-05, "loss": 4.4011, "step": 12730 }, { "epoch": 0.4633565925866172, "grad_norm": 0.0439453125, "learning_rate": 1.7498918770007486e-05, "loss": 4.437, "step": 12740 }, { "epoch": 0.4637599322389384, "grad_norm": 0.047119140625, "learning_rate": 1.7494717377739825e-05, "loss": 4.408, "step": 12750 }, { "epoch": 0.4641632718912596, "grad_norm": 0.0419921875, "learning_rate": 1.749051296489253e-05, "loss": 4.3931, "step": 12760 }, { "epoch": 0.46456661154358087, "grad_norm": 0.045654296875, "learning_rate": 1.7486305533160094e-05, "loss": 4.4574, "step": 12770 }, { "epoch": 0.46496995119590206, "grad_norm": 0.04638671875, "learning_rate": 1.7482095084238236e-05, "loss": 4.4337, "step": 12780 }, { "epoch": 0.46537329084822326, "grad_norm": 0.04296875, "learning_rate": 1.7477881619823883e-05, "loss": 4.408, "step": 12790 }, { "epoch": 0.4657766305005445, "grad_norm": 0.04638671875, "learning_rate": 1.7473665141615174e-05, "loss": 4.4558, "step": 12800 }, { "epoch": 0.4661799701528657, "grad_norm": 0.043701171875, "learning_rate": 1.7469445651311472e-05, "loss": 4.4216, "step": 12810 }, { "epoch": 0.466583309805187, "grad_norm": 0.043212890625, "learning_rate": 1.7465223150613347e-05, "loss": 4.4281, "step": 12820 }, { "epoch": 0.4669866494575082, "grad_norm": 0.043212890625, "learning_rate": 1.7460997641222582e-05, "loss": 4.4109, "step": 12830 }, { "epoch": 0.4673899891098294, "grad_norm": 0.04443359375, "learning_rate": 1.745676912484217e-05, "loss": 4.4585, "step": 12840 }, { "epoch": 0.46779332876215063, "grad_norm": 0.041259765625, "learning_rate": 1.7452537603176336e-05, "loss": 4.4275, "step": 12850 }, { "epoch": 0.46819666841447183, "grad_norm": 0.04736328125, "learning_rate": 1.744830307793049e-05, "loss": 4.3956, "step": 12860 }, { "epoch": 0.46860000806679303, "grad_norm": 0.043212890625, "learning_rate": 1.7444065550811264e-05, "loss": 4.4174, "step": 12870 }, { "epoch": 0.4690033477191143, "grad_norm": 0.04345703125, "learning_rate": 1.7439825023526503e-05, "loss": 4.4074, "step": 12880 }, { "epoch": 0.4694066873714355, "grad_norm": 0.0458984375, "learning_rate": 1.7435581497785255e-05, "loss": 4.3885, "step": 12890 }, { "epoch": 0.4698100270237567, "grad_norm": 0.044189453125, "learning_rate": 1.7431334975297778e-05, "loss": 4.4096, "step": 12900 }, { "epoch": 0.47021336667607794, "grad_norm": 0.042236328125, "learning_rate": 1.7427085457775545e-05, "loss": 4.4271, "step": 12910 }, { "epoch": 0.47061670632839914, "grad_norm": 0.044921875, "learning_rate": 1.742283294693122e-05, "loss": 4.4048, "step": 12920 }, { "epoch": 0.4710200459807204, "grad_norm": 0.04443359375, "learning_rate": 1.741857744447869e-05, "loss": 4.432, "step": 12930 }, { "epoch": 0.4714233856330416, "grad_norm": 0.04345703125, "learning_rate": 1.7414318952133035e-05, "loss": 4.422, "step": 12940 }, { "epoch": 0.4718267252853628, "grad_norm": 0.044677734375, "learning_rate": 1.7410057471610554e-05, "loss": 4.4092, "step": 12950 }, { "epoch": 0.47223006493768405, "grad_norm": 0.04248046875, "learning_rate": 1.740579300462874e-05, "loss": 4.4423, "step": 12960 }, { "epoch": 0.47263340459000525, "grad_norm": 0.045166015625, "learning_rate": 1.740152555290629e-05, "loss": 4.4233, "step": 12970 }, { "epoch": 0.47303674424232645, "grad_norm": 0.044189453125, "learning_rate": 1.73972551181631e-05, "loss": 4.3794, "step": 12980 }, { "epoch": 0.4734400838946477, "grad_norm": 0.044189453125, "learning_rate": 1.7392981702120283e-05, "loss": 4.4169, "step": 12990 }, { "epoch": 0.4738434235469689, "grad_norm": 0.04248046875, "learning_rate": 1.738870530650014e-05, "loss": 4.3977, "step": 13000 }, { "epoch": 0.4742467631992901, "grad_norm": 0.045654296875, "learning_rate": 1.7384425933026183e-05, "loss": 4.4113, "step": 13010 }, { "epoch": 0.47465010285161136, "grad_norm": 0.046142578125, "learning_rate": 1.738014358342311e-05, "loss": 4.4516, "step": 13020 }, { "epoch": 0.47505344250393255, "grad_norm": 0.04248046875, "learning_rate": 1.737585825941684e-05, "loss": 4.4504, "step": 13030 }, { "epoch": 0.47545678215625375, "grad_norm": 0.04541015625, "learning_rate": 1.737156996273446e-05, "loss": 4.3804, "step": 13040 }, { "epoch": 0.475860121808575, "grad_norm": 0.045166015625, "learning_rate": 1.7367278695104285e-05, "loss": 4.4088, "step": 13050 }, { "epoch": 0.4762634614608962, "grad_norm": 0.04296875, "learning_rate": 1.736298445825581e-05, "loss": 4.4213, "step": 13060 }, { "epoch": 0.47666680111321746, "grad_norm": 0.04296875, "learning_rate": 1.735868725391974e-05, "loss": 4.4013, "step": 13070 }, { "epoch": 0.47707014076553866, "grad_norm": 0.046142578125, "learning_rate": 1.735438708382796e-05, "loss": 4.4148, "step": 13080 }, { "epoch": 0.47747348041785986, "grad_norm": 0.046630859375, "learning_rate": 1.735008394971356e-05, "loss": 4.4202, "step": 13090 }, { "epoch": 0.4778768200701811, "grad_norm": 0.047119140625, "learning_rate": 1.734577785331082e-05, "loss": 4.4206, "step": 13100 }, { "epoch": 0.4782801597225023, "grad_norm": 0.042724609375, "learning_rate": 1.7341468796355223e-05, "loss": 4.3931, "step": 13110 }, { "epoch": 0.4786834993748235, "grad_norm": 0.045166015625, "learning_rate": 1.7337156780583436e-05, "loss": 4.4281, "step": 13120 }, { "epoch": 0.4790868390271448, "grad_norm": 0.044921875, "learning_rate": 1.733284180773332e-05, "loss": 4.4086, "step": 13130 }, { "epoch": 0.479490178679466, "grad_norm": 0.044677734375, "learning_rate": 1.732852387954393e-05, "loss": 4.4411, "step": 13140 }, { "epoch": 0.47989351833178717, "grad_norm": 0.044921875, "learning_rate": 1.732420299775551e-05, "loss": 4.4028, "step": 13150 }, { "epoch": 0.4802968579841084, "grad_norm": 0.044677734375, "learning_rate": 1.7319879164109502e-05, "loss": 4.4186, "step": 13160 }, { "epoch": 0.4807001976364296, "grad_norm": 0.044189453125, "learning_rate": 1.7315552380348523e-05, "loss": 4.4343, "step": 13170 }, { "epoch": 0.4811035372887509, "grad_norm": 0.0458984375, "learning_rate": 1.731122264821639e-05, "loss": 4.4495, "step": 13180 }, { "epoch": 0.4815068769410721, "grad_norm": 0.04296875, "learning_rate": 1.7306889969458107e-05, "loss": 4.3989, "step": 13190 }, { "epoch": 0.4819102165933933, "grad_norm": 0.04296875, "learning_rate": 1.7302554345819863e-05, "loss": 4.4283, "step": 13200 }, { "epoch": 0.48231355624571454, "grad_norm": 0.0458984375, "learning_rate": 1.7298215779049033e-05, "loss": 4.4064, "step": 13210 }, { "epoch": 0.48271689589803574, "grad_norm": 0.044189453125, "learning_rate": 1.7293874270894184e-05, "loss": 4.4111, "step": 13220 }, { "epoch": 0.48312023555035694, "grad_norm": 0.043701171875, "learning_rate": 1.728952982310506e-05, "loss": 4.409, "step": 13230 }, { "epoch": 0.4835235752026782, "grad_norm": 0.049560546875, "learning_rate": 1.72851824374326e-05, "loss": 4.4261, "step": 13240 }, { "epoch": 0.4839269148549994, "grad_norm": 0.044189453125, "learning_rate": 1.7280832115628918e-05, "loss": 4.3983, "step": 13250 }, { "epoch": 0.4843302545073206, "grad_norm": 0.044189453125, "learning_rate": 1.7276478859447316e-05, "loss": 4.4139, "step": 13260 }, { "epoch": 0.48473359415964185, "grad_norm": 0.043212890625, "learning_rate": 1.7272122670642277e-05, "loss": 4.4179, "step": 13270 }, { "epoch": 0.48513693381196304, "grad_norm": 0.045654296875, "learning_rate": 1.7267763550969463e-05, "loss": 4.4337, "step": 13280 }, { "epoch": 0.4855402734642843, "grad_norm": 0.04296875, "learning_rate": 1.7263401502185723e-05, "loss": 4.4149, "step": 13290 }, { "epoch": 0.4859436131166055, "grad_norm": 0.0439453125, "learning_rate": 1.7259036526049087e-05, "loss": 4.4216, "step": 13300 }, { "epoch": 0.4863469527689267, "grad_norm": 0.0458984375, "learning_rate": 1.7254668624318758e-05, "loss": 4.4229, "step": 13310 }, { "epoch": 0.48675029242124795, "grad_norm": 0.04736328125, "learning_rate": 1.7250297798755124e-05, "loss": 4.4513, "step": 13320 }, { "epoch": 0.48715363207356915, "grad_norm": 0.04296875, "learning_rate": 1.7245924051119745e-05, "loss": 4.387, "step": 13330 }, { "epoch": 0.48755697172589035, "grad_norm": 0.044921875, "learning_rate": 1.724154738317537e-05, "loss": 4.4285, "step": 13340 }, { "epoch": 0.4879603113782116, "grad_norm": 0.04296875, "learning_rate": 1.7237167796685917e-05, "loss": 4.4241, "step": 13350 }, { "epoch": 0.4883636510305328, "grad_norm": 0.04345703125, "learning_rate": 1.7232785293416473e-05, "loss": 4.4336, "step": 13360 }, { "epoch": 0.488766990682854, "grad_norm": 0.044677734375, "learning_rate": 1.7228399875133316e-05, "loss": 4.4279, "step": 13370 }, { "epoch": 0.48917033033517526, "grad_norm": 0.051513671875, "learning_rate": 1.7224011543603892e-05, "loss": 4.4219, "step": 13380 }, { "epoch": 0.48957366998749646, "grad_norm": 0.045654296875, "learning_rate": 1.7219620300596816e-05, "loss": 4.4288, "step": 13390 }, { "epoch": 0.4899770096398177, "grad_norm": 0.044189453125, "learning_rate": 1.7215226147881885e-05, "loss": 4.4192, "step": 13400 }, { "epoch": 0.4903803492921389, "grad_norm": 0.0439453125, "learning_rate": 1.7210829087230062e-05, "loss": 4.4216, "step": 13410 }, { "epoch": 0.4907836889444601, "grad_norm": 0.044921875, "learning_rate": 1.720642912041349e-05, "loss": 4.4575, "step": 13420 }, { "epoch": 0.49118702859678137, "grad_norm": 0.044921875, "learning_rate": 1.720202624920547e-05, "loss": 4.4018, "step": 13430 }, { "epoch": 0.49159036824910257, "grad_norm": 0.0458984375, "learning_rate": 1.7197620475380488e-05, "loss": 4.4276, "step": 13440 }, { "epoch": 0.49199370790142377, "grad_norm": 0.0439453125, "learning_rate": 1.7193211800714194e-05, "loss": 4.4114, "step": 13450 }, { "epoch": 0.492397047553745, "grad_norm": 0.04443359375, "learning_rate": 1.71888002269834e-05, "loss": 4.4432, "step": 13460 }, { "epoch": 0.4928003872060662, "grad_norm": 0.044921875, "learning_rate": 1.71843857559661e-05, "loss": 4.4151, "step": 13470 }, { "epoch": 0.4932037268583874, "grad_norm": 0.04345703125, "learning_rate": 1.7179968389441443e-05, "loss": 4.3855, "step": 13480 }, { "epoch": 0.4936070665107087, "grad_norm": 0.047119140625, "learning_rate": 1.7175548129189756e-05, "loss": 4.4261, "step": 13490 }, { "epoch": 0.4940104061630299, "grad_norm": 0.043212890625, "learning_rate": 1.7171124976992522e-05, "loss": 4.4385, "step": 13500 }, { "epoch": 0.4944137458153511, "grad_norm": 0.04443359375, "learning_rate": 1.7166698934632394e-05, "loss": 4.3938, "step": 13510 }, { "epoch": 0.49481708546767234, "grad_norm": 0.043701171875, "learning_rate": 1.71622700038932e-05, "loss": 4.4026, "step": 13520 }, { "epoch": 0.49522042511999353, "grad_norm": 0.043701171875, "learning_rate": 1.715783818655991e-05, "loss": 4.4023, "step": 13530 }, { "epoch": 0.4956237647723148, "grad_norm": 0.046630859375, "learning_rate": 1.7153403484418673e-05, "loss": 4.404, "step": 13540 }, { "epoch": 0.496027104424636, "grad_norm": 0.043212890625, "learning_rate": 1.7148965899256803e-05, "loss": 4.3942, "step": 13550 }, { "epoch": 0.4964304440769572, "grad_norm": 0.044189453125, "learning_rate": 1.7144525432862764e-05, "loss": 4.3959, "step": 13560 }, { "epoch": 0.49683378372927844, "grad_norm": 0.04833984375, "learning_rate": 1.714008208702619e-05, "loss": 4.4297, "step": 13570 }, { "epoch": 0.49723712338159964, "grad_norm": 0.048095703125, "learning_rate": 1.7135635863537874e-05, "loss": 4.4297, "step": 13580 }, { "epoch": 0.49764046303392084, "grad_norm": 0.044921875, "learning_rate": 1.7131186764189765e-05, "loss": 4.4206, "step": 13590 }, { "epoch": 0.4980438026862421, "grad_norm": 0.044677734375, "learning_rate": 1.7126734790774977e-05, "loss": 4.4502, "step": 13600 }, { "epoch": 0.4984471423385633, "grad_norm": 0.0439453125, "learning_rate": 1.7122279945087778e-05, "loss": 4.4239, "step": 13610 }, { "epoch": 0.4988504819908845, "grad_norm": 0.04541015625, "learning_rate": 1.7117822228923594e-05, "loss": 4.4222, "step": 13620 }, { "epoch": 0.49925382164320575, "grad_norm": 0.04638671875, "learning_rate": 1.7113361644079006e-05, "loss": 4.4127, "step": 13630 }, { "epoch": 0.49965716129552695, "grad_norm": 0.04931640625, "learning_rate": 1.7108898192351763e-05, "loss": 4.4484, "step": 13640 }, { "epoch": 0.5000605009478482, "grad_norm": 0.043701171875, "learning_rate": 1.710443187554075e-05, "loss": 4.4428, "step": 13650 }, { "epoch": 0.5004638406001694, "grad_norm": 0.0458984375, "learning_rate": 1.709996269544602e-05, "loss": 4.3701, "step": 13660 }, { "epoch": 0.5008671802524907, "grad_norm": 0.04345703125, "learning_rate": 1.709549065386878e-05, "loss": 4.3718, "step": 13670 }, { "epoch": 0.5012705199048119, "grad_norm": 0.044677734375, "learning_rate": 1.7091015752611383e-05, "loss": 4.415, "step": 13680 }, { "epoch": 0.5016738595571331, "grad_norm": 0.045166015625, "learning_rate": 1.7086537993477342e-05, "loss": 4.414, "step": 13690 }, { "epoch": 0.5020771992094543, "grad_norm": 0.044677734375, "learning_rate": 1.7082057378271317e-05, "loss": 4.4676, "step": 13700 }, { "epoch": 0.5024805388617755, "grad_norm": 0.047119140625, "learning_rate": 1.7077573908799122e-05, "loss": 4.4339, "step": 13710 }, { "epoch": 0.5028838785140968, "grad_norm": 0.045654296875, "learning_rate": 1.7073087586867718e-05, "loss": 4.4111, "step": 13720 }, { "epoch": 0.503287218166418, "grad_norm": 0.04736328125, "learning_rate": 1.7068598414285216e-05, "loss": 4.458, "step": 13730 }, { "epoch": 0.5036905578187392, "grad_norm": 0.045166015625, "learning_rate": 1.7064106392860885e-05, "loss": 4.4201, "step": 13740 }, { "epoch": 0.5040938974710604, "grad_norm": 0.0458984375, "learning_rate": 1.7059611524405127e-05, "loss": 4.4575, "step": 13750 }, { "epoch": 0.5044972371233816, "grad_norm": 0.046142578125, "learning_rate": 1.7055113810729502e-05, "loss": 4.4056, "step": 13760 }, { "epoch": 0.5049005767757028, "grad_norm": 0.043212890625, "learning_rate": 1.7050613253646714e-05, "loss": 4.4356, "step": 13770 }, { "epoch": 0.5053039164280241, "grad_norm": 0.044189453125, "learning_rate": 1.7046109854970612e-05, "loss": 4.3952, "step": 13780 }, { "epoch": 0.5057072560803453, "grad_norm": 0.04443359375, "learning_rate": 1.7041603616516188e-05, "loss": 4.4122, "step": 13790 }, { "epoch": 0.5061105957326665, "grad_norm": 0.04345703125, "learning_rate": 1.7037094540099588e-05, "loss": 4.4373, "step": 13800 }, { "epoch": 0.5065139353849877, "grad_norm": 0.044189453125, "learning_rate": 1.703258262753809e-05, "loss": 4.3739, "step": 13810 }, { "epoch": 0.5069172750373089, "grad_norm": 0.044677734375, "learning_rate": 1.7028067880650123e-05, "loss": 4.4019, "step": 13820 }, { "epoch": 0.5073206146896302, "grad_norm": 0.042724609375, "learning_rate": 1.7023550301255254e-05, "loss": 4.445, "step": 13830 }, { "epoch": 0.5077239543419514, "grad_norm": 0.044677734375, "learning_rate": 1.7019029891174192e-05, "loss": 4.4211, "step": 13840 }, { "epoch": 0.5081272939942726, "grad_norm": 0.045654296875, "learning_rate": 1.7014506652228794e-05, "loss": 4.4103, "step": 13850 }, { "epoch": 0.5085306336465938, "grad_norm": 0.04833984375, "learning_rate": 1.7009980586242043e-05, "loss": 4.4145, "step": 13860 }, { "epoch": 0.508933973298915, "grad_norm": 0.0458984375, "learning_rate": 1.7005451695038077e-05, "loss": 4.4281, "step": 13870 }, { "epoch": 0.5093373129512362, "grad_norm": 0.044677734375, "learning_rate": 1.7000919980442158e-05, "loss": 4.4164, "step": 13880 }, { "epoch": 0.5097406526035575, "grad_norm": 0.04443359375, "learning_rate": 1.69963854442807e-05, "loss": 4.416, "step": 13890 }, { "epoch": 0.5101439922558787, "grad_norm": 0.04443359375, "learning_rate": 1.699184808838124e-05, "loss": 4.4177, "step": 13900 }, { "epoch": 0.5105473319081999, "grad_norm": 0.04638671875, "learning_rate": 1.698730791457247e-05, "loss": 4.3975, "step": 13910 }, { "epoch": 0.5109506715605211, "grad_norm": 0.04541015625, "learning_rate": 1.6982764924684192e-05, "loss": 4.4133, "step": 13920 }, { "epoch": 0.5113540112128423, "grad_norm": 0.04541015625, "learning_rate": 1.697821912054737e-05, "loss": 4.4302, "step": 13930 }, { "epoch": 0.5117573508651636, "grad_norm": 0.04248046875, "learning_rate": 1.6973670503994078e-05, "loss": 4.437, "step": 13940 }, { "epoch": 0.5121606905174848, "grad_norm": 0.04296875, "learning_rate": 1.6969119076857542e-05, "loss": 4.418, "step": 13950 }, { "epoch": 0.512564030169806, "grad_norm": 0.043701171875, "learning_rate": 1.6964564840972117e-05, "loss": 4.4197, "step": 13960 }, { "epoch": 0.5129673698221272, "grad_norm": 0.044189453125, "learning_rate": 1.6960007798173277e-05, "loss": 4.3958, "step": 13970 }, { "epoch": 0.5133707094744484, "grad_norm": 0.045166015625, "learning_rate": 1.695544795029765e-05, "loss": 4.3988, "step": 13980 }, { "epoch": 0.5137740491267696, "grad_norm": 0.044189453125, "learning_rate": 1.695088529918297e-05, "loss": 4.4312, "step": 13990 }, { "epoch": 0.5141773887790909, "grad_norm": 0.043212890625, "learning_rate": 1.6946319846668116e-05, "loss": 4.3774, "step": 14000 }, { "epoch": 0.5145807284314121, "grad_norm": 0.046630859375, "learning_rate": 1.6941751594593094e-05, "loss": 4.433, "step": 14010 }, { "epoch": 0.5149840680837333, "grad_norm": 0.046142578125, "learning_rate": 1.6937180544799035e-05, "loss": 4.4322, "step": 14020 }, { "epoch": 0.5153874077360545, "grad_norm": 0.0439453125, "learning_rate": 1.6932606699128195e-05, "loss": 4.4204, "step": 14030 }, { "epoch": 0.5157907473883757, "grad_norm": 0.044921875, "learning_rate": 1.6928030059423976e-05, "loss": 4.4053, "step": 14040 }, { "epoch": 0.516194087040697, "grad_norm": 0.046142578125, "learning_rate": 1.6923450627530872e-05, "loss": 4.4133, "step": 14050 }, { "epoch": 0.5165974266930182, "grad_norm": 0.04248046875, "learning_rate": 1.6918868405294536e-05, "loss": 4.4056, "step": 14060 }, { "epoch": 0.5170007663453394, "grad_norm": 0.044921875, "learning_rate": 1.6914283394561725e-05, "loss": 4.3898, "step": 14070 }, { "epoch": 0.5174041059976606, "grad_norm": 0.044189453125, "learning_rate": 1.690969559718032e-05, "loss": 4.4327, "step": 14080 }, { "epoch": 0.5178074456499818, "grad_norm": 0.047119140625, "learning_rate": 1.6905105014999343e-05, "loss": 4.3907, "step": 14090 }, { "epoch": 0.518210785302303, "grad_norm": 0.0439453125, "learning_rate": 1.690051164986892e-05, "loss": 4.4062, "step": 14100 }, { "epoch": 0.5186141249546243, "grad_norm": 0.046630859375, "learning_rate": 1.6895915503640305e-05, "loss": 4.4172, "step": 14110 }, { "epoch": 0.5190174646069455, "grad_norm": 0.044189453125, "learning_rate": 1.6891316578165875e-05, "loss": 4.4294, "step": 14120 }, { "epoch": 0.5194208042592667, "grad_norm": 0.043701171875, "learning_rate": 1.6886714875299125e-05, "loss": 4.4606, "step": 14130 }, { "epoch": 0.5198241439115879, "grad_norm": 0.04345703125, "learning_rate": 1.6882110396894665e-05, "loss": 4.3943, "step": 14140 }, { "epoch": 0.5202274835639091, "grad_norm": 0.044677734375, "learning_rate": 1.6877503144808228e-05, "loss": 4.4289, "step": 14150 }, { "epoch": 0.5206308232162304, "grad_norm": 0.044189453125, "learning_rate": 1.687289312089667e-05, "loss": 4.4291, "step": 14160 }, { "epoch": 0.5210341628685516, "grad_norm": 0.04638671875, "learning_rate": 1.6868280327017958e-05, "loss": 4.4376, "step": 14170 }, { "epoch": 0.5214375025208728, "grad_norm": 0.04638671875, "learning_rate": 1.6863664765031177e-05, "loss": 4.4174, "step": 14180 }, { "epoch": 0.521840842173194, "grad_norm": 0.043701171875, "learning_rate": 1.685904643679652e-05, "loss": 4.4332, "step": 14190 }, { "epoch": 0.5222441818255152, "grad_norm": 0.0439453125, "learning_rate": 1.6854425344175308e-05, "loss": 4.4177, "step": 14200 }, { "epoch": 0.5226475214778364, "grad_norm": 0.046142578125, "learning_rate": 1.6849801489029968e-05, "loss": 4.4635, "step": 14210 }, { "epoch": 0.5230508611301578, "grad_norm": 0.044189453125, "learning_rate": 1.684517487322404e-05, "loss": 4.4382, "step": 14220 }, { "epoch": 0.523454200782479, "grad_norm": 0.041748046875, "learning_rate": 1.6840545498622183e-05, "loss": 4.4084, "step": 14230 }, { "epoch": 0.5238575404348002, "grad_norm": 0.0458984375, "learning_rate": 1.6835913367090162e-05, "loss": 4.4239, "step": 14240 }, { "epoch": 0.5242608800871214, "grad_norm": 0.04541015625, "learning_rate": 1.6831278480494854e-05, "loss": 4.4361, "step": 14250 }, { "epoch": 0.5246642197394425, "grad_norm": 0.04345703125, "learning_rate": 1.682664084070425e-05, "loss": 4.3911, "step": 14260 }, { "epoch": 0.5250675593917637, "grad_norm": 0.044189453125, "learning_rate": 1.682200044958745e-05, "loss": 4.3864, "step": 14270 }, { "epoch": 0.5254708990440851, "grad_norm": 0.04248046875, "learning_rate": 1.681735730901465e-05, "loss": 4.4192, "step": 14280 }, { "epoch": 0.5258742386964063, "grad_norm": 0.0458984375, "learning_rate": 1.6812711420857172e-05, "loss": 4.3882, "step": 14290 }, { "epoch": 0.5262775783487275, "grad_norm": 0.044921875, "learning_rate": 1.680806278698744e-05, "loss": 4.4431, "step": 14300 }, { "epoch": 0.5266809180010487, "grad_norm": 0.04296875, "learning_rate": 1.6803411409278987e-05, "loss": 4.3976, "step": 14310 }, { "epoch": 0.5270842576533699, "grad_norm": 0.043212890625, "learning_rate": 1.6798757289606434e-05, "loss": 4.4205, "step": 14320 }, { "epoch": 0.5274875973056912, "grad_norm": 0.048583984375, "learning_rate": 1.6794100429845527e-05, "loss": 4.4041, "step": 14330 }, { "epoch": 0.5278909369580124, "grad_norm": 0.046875, "learning_rate": 1.6789440831873112e-05, "loss": 4.4174, "step": 14340 }, { "epoch": 0.5282942766103336, "grad_norm": 0.04638671875, "learning_rate": 1.678477849756714e-05, "loss": 4.4661, "step": 14350 }, { "epoch": 0.5286976162626548, "grad_norm": 0.04443359375, "learning_rate": 1.6780113428806656e-05, "loss": 4.4198, "step": 14360 }, { "epoch": 0.529100955914976, "grad_norm": 0.04296875, "learning_rate": 1.677544562747181e-05, "loss": 4.4093, "step": 14370 }, { "epoch": 0.5295042955672972, "grad_norm": 0.045654296875, "learning_rate": 1.6770775095443863e-05, "loss": 4.4262, "step": 14380 }, { "epoch": 0.5299076352196185, "grad_norm": 0.04736328125, "learning_rate": 1.6766101834605163e-05, "loss": 4.4109, "step": 14390 }, { "epoch": 0.5303109748719397, "grad_norm": 0.045166015625, "learning_rate": 1.676142584683917e-05, "loss": 4.3715, "step": 14400 }, { "epoch": 0.5307143145242609, "grad_norm": 0.044189453125, "learning_rate": 1.6756747134030435e-05, "loss": 4.4018, "step": 14410 }, { "epoch": 0.5311176541765821, "grad_norm": 0.044677734375, "learning_rate": 1.6752065698064605e-05, "loss": 4.4289, "step": 14420 }, { "epoch": 0.5315209938289033, "grad_norm": 0.04345703125, "learning_rate": 1.6747381540828436e-05, "loss": 4.417, "step": 14430 }, { "epoch": 0.5319243334812246, "grad_norm": 0.044189453125, "learning_rate": 1.6742694664209768e-05, "loss": 4.4249, "step": 14440 }, { "epoch": 0.5323276731335458, "grad_norm": 0.04638671875, "learning_rate": 1.673800507009754e-05, "loss": 4.4162, "step": 14450 }, { "epoch": 0.532731012785867, "grad_norm": 0.04296875, "learning_rate": 1.6733312760381803e-05, "loss": 4.3998, "step": 14460 }, { "epoch": 0.5331343524381882, "grad_norm": 0.046142578125, "learning_rate": 1.6728617736953675e-05, "loss": 4.4323, "step": 14470 }, { "epoch": 0.5335376920905094, "grad_norm": 0.04736328125, "learning_rate": 1.6723920001705388e-05, "loss": 4.4111, "step": 14480 }, { "epoch": 0.5339410317428306, "grad_norm": 0.046630859375, "learning_rate": 1.6719219556530255e-05, "loss": 4.4001, "step": 14490 }, { "epoch": 0.5343443713951519, "grad_norm": 0.04345703125, "learning_rate": 1.6714516403322687e-05, "loss": 4.4158, "step": 14500 }, { "epoch": 0.5347477110474731, "grad_norm": 0.04638671875, "learning_rate": 1.670981054397819e-05, "loss": 4.4085, "step": 14510 }, { "epoch": 0.5351510506997943, "grad_norm": 0.0458984375, "learning_rate": 1.670510198039335e-05, "loss": 4.424, "step": 14520 }, { "epoch": 0.5355543903521155, "grad_norm": 0.0458984375, "learning_rate": 1.6700390714465856e-05, "loss": 4.4151, "step": 14530 }, { "epoch": 0.5359577300044367, "grad_norm": 0.04541015625, "learning_rate": 1.6695676748094472e-05, "loss": 4.4092, "step": 14540 }, { "epoch": 0.536361069656758, "grad_norm": 0.0439453125, "learning_rate": 1.6690960083179066e-05, "loss": 4.4203, "step": 14550 }, { "epoch": 0.5367644093090792, "grad_norm": 0.045654296875, "learning_rate": 1.6686240721620577e-05, "loss": 4.3864, "step": 14560 }, { "epoch": 0.5371677489614004, "grad_norm": 0.047119140625, "learning_rate": 1.6681518665321045e-05, "loss": 4.4021, "step": 14570 }, { "epoch": 0.5375710886137216, "grad_norm": 0.046142578125, "learning_rate": 1.6676793916183586e-05, "loss": 4.4185, "step": 14580 }, { "epoch": 0.5379744282660428, "grad_norm": 0.044189453125, "learning_rate": 1.6672066476112408e-05, "loss": 4.4131, "step": 14590 }, { "epoch": 0.538377767918364, "grad_norm": 0.043701171875, "learning_rate": 1.66673363470128e-05, "loss": 4.3964, "step": 14600 }, { "epoch": 0.5387811075706853, "grad_norm": 0.0458984375, "learning_rate": 1.6662603530791133e-05, "loss": 4.4016, "step": 14610 }, { "epoch": 0.5391844472230065, "grad_norm": 0.045166015625, "learning_rate": 1.665786802935487e-05, "loss": 4.4, "step": 14620 }, { "epoch": 0.5395877868753277, "grad_norm": 0.04541015625, "learning_rate": 1.6653129844612545e-05, "loss": 4.4189, "step": 14630 }, { "epoch": 0.5399911265276489, "grad_norm": 0.046875, "learning_rate": 1.6648388978473774e-05, "loss": 4.426, "step": 14640 }, { "epoch": 0.5403944661799701, "grad_norm": 0.04833984375, "learning_rate": 1.664364543284927e-05, "loss": 4.44, "step": 14650 }, { "epoch": 0.5407978058322914, "grad_norm": 0.043701171875, "learning_rate": 1.6638899209650803e-05, "loss": 4.4165, "step": 14660 }, { "epoch": 0.5412011454846126, "grad_norm": 0.047119140625, "learning_rate": 1.663415031079124e-05, "loss": 4.4118, "step": 14670 }, { "epoch": 0.5416044851369338, "grad_norm": 0.046875, "learning_rate": 1.6629398738184516e-05, "loss": 4.4229, "step": 14680 }, { "epoch": 0.542007824789255, "grad_norm": 0.045654296875, "learning_rate": 1.6624644493745646e-05, "loss": 4.4013, "step": 14690 }, { "epoch": 0.5424111644415762, "grad_norm": 0.043701171875, "learning_rate": 1.6619887579390727e-05, "loss": 4.4136, "step": 14700 }, { "epoch": 0.5428145040938974, "grad_norm": 0.046875, "learning_rate": 1.661512799703692e-05, "loss": 4.4148, "step": 14710 }, { "epoch": 0.5432178437462187, "grad_norm": 0.043701171875, "learning_rate": 1.6610365748602478e-05, "loss": 4.4134, "step": 14720 }, { "epoch": 0.5436211833985399, "grad_norm": 0.047119140625, "learning_rate": 1.6605600836006712e-05, "loss": 4.4216, "step": 14730 }, { "epoch": 0.5440245230508611, "grad_norm": 0.0458984375, "learning_rate": 1.660083326117002e-05, "loss": 4.4036, "step": 14740 }, { "epoch": 0.5444278627031823, "grad_norm": 0.045166015625, "learning_rate": 1.659606302601387e-05, "loss": 4.4026, "step": 14750 }, { "epoch": 0.5448312023555035, "grad_norm": 0.045166015625, "learning_rate": 1.6591290132460792e-05, "loss": 4.3951, "step": 14760 }, { "epoch": 0.5452345420078248, "grad_norm": 0.04248046875, "learning_rate": 1.6586514582434396e-05, "loss": 4.3729, "step": 14770 }, { "epoch": 0.545637881660146, "grad_norm": 0.04443359375, "learning_rate": 1.6581736377859364e-05, "loss": 4.4065, "step": 14780 }, { "epoch": 0.5460412213124672, "grad_norm": 0.043701171875, "learning_rate": 1.657695552066145e-05, "loss": 4.4455, "step": 14790 }, { "epoch": 0.5464445609647884, "grad_norm": 0.04248046875, "learning_rate": 1.6572172012767457e-05, "loss": 4.4473, "step": 14800 }, { "epoch": 0.5468479006171096, "grad_norm": 0.044189453125, "learning_rate": 1.656738585610529e-05, "loss": 4.4278, "step": 14810 }, { "epoch": 0.5472512402694308, "grad_norm": 0.045654296875, "learning_rate": 1.656259705260389e-05, "loss": 4.4028, "step": 14820 }, { "epoch": 0.5476545799217521, "grad_norm": 0.043212890625, "learning_rate": 1.655780560419329e-05, "loss": 4.4186, "step": 14830 }, { "epoch": 0.5480579195740733, "grad_norm": 0.048095703125, "learning_rate": 1.6553011512804564e-05, "loss": 4.382, "step": 14840 }, { "epoch": 0.5484612592263945, "grad_norm": 0.041748046875, "learning_rate": 1.6548214780369876e-05, "loss": 4.421, "step": 14850 }, { "epoch": 0.5488645988787157, "grad_norm": 0.04345703125, "learning_rate": 1.6543415408822433e-05, "loss": 4.3941, "step": 14860 }, { "epoch": 0.549267938531037, "grad_norm": 0.043701171875, "learning_rate": 1.653861340009652e-05, "loss": 4.4212, "step": 14870 }, { "epoch": 0.5496712781833583, "grad_norm": 0.047607421875, "learning_rate": 1.6533808756127484e-05, "loss": 4.3976, "step": 14880 }, { "epoch": 0.5500746178356795, "grad_norm": 0.04443359375, "learning_rate": 1.6529001478851723e-05, "loss": 4.4187, "step": 14890 }, { "epoch": 0.5504779574880007, "grad_norm": 0.04638671875, "learning_rate": 1.6524191570206706e-05, "loss": 4.4278, "step": 14900 }, { "epoch": 0.5508812971403219, "grad_norm": 0.046142578125, "learning_rate": 1.6519379032130957e-05, "loss": 4.4047, "step": 14910 }, { "epoch": 0.5512846367926431, "grad_norm": 0.04248046875, "learning_rate": 1.651456386656407e-05, "loss": 4.417, "step": 14920 }, { "epoch": 0.5516879764449643, "grad_norm": 0.0439453125, "learning_rate": 1.650974607544669e-05, "loss": 4.4021, "step": 14930 }, { "epoch": 0.5520913160972856, "grad_norm": 0.04345703125, "learning_rate": 1.6504925660720516e-05, "loss": 4.3912, "step": 14940 }, { "epoch": 0.5524946557496068, "grad_norm": 0.044677734375, "learning_rate": 1.6500102624328307e-05, "loss": 4.4056, "step": 14950 }, { "epoch": 0.552897995401928, "grad_norm": 0.04345703125, "learning_rate": 1.6495276968213887e-05, "loss": 4.4078, "step": 14960 }, { "epoch": 0.5533013350542492, "grad_norm": 0.043701171875, "learning_rate": 1.6490448694322124e-05, "loss": 4.4187, "step": 14970 }, { "epoch": 0.5537046747065704, "grad_norm": 0.044921875, "learning_rate": 1.6485617804598955e-05, "loss": 4.4232, "step": 14980 }, { "epoch": 0.5541080143588917, "grad_norm": 0.04443359375, "learning_rate": 1.6480784300991355e-05, "loss": 4.4227, "step": 14990 }, { "epoch": 0.5545113540112129, "grad_norm": 0.0439453125, "learning_rate": 1.6475948185447368e-05, "loss": 4.4315, "step": 15000 }, { "epoch": 0.5549146936635341, "grad_norm": 0.044189453125, "learning_rate": 1.6471109459916075e-05, "loss": 4.3981, "step": 15010 }, { "epoch": 0.5553180333158553, "grad_norm": 0.04443359375, "learning_rate": 1.6466268126347623e-05, "loss": 4.4342, "step": 15020 }, { "epoch": 0.5557213729681765, "grad_norm": 0.0439453125, "learning_rate": 1.64614241866932e-05, "loss": 4.4191, "step": 15030 }, { "epoch": 0.5561247126204977, "grad_norm": 0.043212890625, "learning_rate": 1.645657764290505e-05, "loss": 4.4324, "step": 15040 }, { "epoch": 0.556528052272819, "grad_norm": 0.044921875, "learning_rate": 1.6451728496936466e-05, "loss": 4.4155, "step": 15050 }, { "epoch": 0.5569313919251402, "grad_norm": 0.04736328125, "learning_rate": 1.6446876750741785e-05, "loss": 4.4085, "step": 15060 }, { "epoch": 0.5573347315774614, "grad_norm": 0.044921875, "learning_rate": 1.6442022406276405e-05, "loss": 4.4355, "step": 15070 }, { "epoch": 0.5577380712297826, "grad_norm": 0.0458984375, "learning_rate": 1.6437165465496755e-05, "loss": 4.3971, "step": 15080 }, { "epoch": 0.5581414108821038, "grad_norm": 0.046142578125, "learning_rate": 1.6432305930360314e-05, "loss": 4.3746, "step": 15090 }, { "epoch": 0.5585447505344251, "grad_norm": 0.043212890625, "learning_rate": 1.6427443802825612e-05, "loss": 4.39, "step": 15100 }, { "epoch": 0.5589480901867463, "grad_norm": 0.041259765625, "learning_rate": 1.642257908485223e-05, "loss": 4.4016, "step": 15110 }, { "epoch": 0.5593514298390675, "grad_norm": 0.044921875, "learning_rate": 1.6417711778400775e-05, "loss": 4.426, "step": 15120 }, { "epoch": 0.5597547694913887, "grad_norm": 0.044921875, "learning_rate": 1.641284188543291e-05, "loss": 4.4082, "step": 15130 }, { "epoch": 0.5601581091437099, "grad_norm": 0.0458984375, "learning_rate": 1.6407969407911336e-05, "loss": 4.4447, "step": 15140 }, { "epoch": 0.5605614487960311, "grad_norm": 0.043212890625, "learning_rate": 1.6403094347799798e-05, "loss": 4.4224, "step": 15150 }, { "epoch": 0.5609647884483524, "grad_norm": 0.044677734375, "learning_rate": 1.639821670706309e-05, "loss": 4.3934, "step": 15160 }, { "epoch": 0.5613681281006736, "grad_norm": 0.046630859375, "learning_rate": 1.6393336487667018e-05, "loss": 4.4138, "step": 15170 }, { "epoch": 0.5617714677529948, "grad_norm": 0.04443359375, "learning_rate": 1.638845369157846e-05, "loss": 4.3934, "step": 15180 }, { "epoch": 0.562174807405316, "grad_norm": 0.046630859375, "learning_rate": 1.6383568320765315e-05, "loss": 4.4421, "step": 15190 }, { "epoch": 0.5625781470576372, "grad_norm": 0.044921875, "learning_rate": 1.6378680377196526e-05, "loss": 4.4031, "step": 15200 }, { "epoch": 0.5629814867099584, "grad_norm": 0.044677734375, "learning_rate": 1.6373789862842064e-05, "loss": 4.4488, "step": 15210 }, { "epoch": 0.5633848263622797, "grad_norm": 0.04736328125, "learning_rate": 1.6368896779672948e-05, "loss": 4.4492, "step": 15220 }, { "epoch": 0.5637881660146009, "grad_norm": 0.0419921875, "learning_rate": 1.6364001129661226e-05, "loss": 4.3994, "step": 15230 }, { "epoch": 0.5641915056669221, "grad_norm": 0.043212890625, "learning_rate": 1.6359102914779977e-05, "loss": 4.4234, "step": 15240 }, { "epoch": 0.5645948453192433, "grad_norm": 0.0458984375, "learning_rate": 1.6354202137003323e-05, "loss": 4.4103, "step": 15250 }, { "epoch": 0.5649981849715645, "grad_norm": 0.046630859375, "learning_rate": 1.634929879830641e-05, "loss": 4.4243, "step": 15260 }, { "epoch": 0.5654015246238858, "grad_norm": 0.04443359375, "learning_rate": 1.6344392900665424e-05, "loss": 4.3975, "step": 15270 }, { "epoch": 0.565804864276207, "grad_norm": 0.044921875, "learning_rate": 1.6339484446057573e-05, "loss": 4.4337, "step": 15280 }, { "epoch": 0.5662082039285282, "grad_norm": 0.04443359375, "learning_rate": 1.6334573436461106e-05, "loss": 4.4185, "step": 15290 }, { "epoch": 0.5666115435808494, "grad_norm": 0.04443359375, "learning_rate": 1.632965987385529e-05, "loss": 4.3865, "step": 15300 }, { "epoch": 0.5670148832331706, "grad_norm": 0.041259765625, "learning_rate": 1.6324743760220436e-05, "loss": 4.4075, "step": 15310 }, { "epoch": 0.5674182228854918, "grad_norm": 0.042724609375, "learning_rate": 1.6319825097537868e-05, "loss": 4.4163, "step": 15320 }, { "epoch": 0.5678215625378131, "grad_norm": 0.044677734375, "learning_rate": 1.6314903887789947e-05, "loss": 4.3974, "step": 15330 }, { "epoch": 0.5682249021901343, "grad_norm": 0.0458984375, "learning_rate": 1.6309980132960056e-05, "loss": 4.3849, "step": 15340 }, { "epoch": 0.5686282418424555, "grad_norm": 0.043212890625, "learning_rate": 1.6305053835032607e-05, "loss": 4.4323, "step": 15350 }, { "epoch": 0.5690315814947767, "grad_norm": 0.04296875, "learning_rate": 1.630012499599303e-05, "loss": 4.454, "step": 15360 }, { "epoch": 0.5694349211470979, "grad_norm": 0.042724609375, "learning_rate": 1.6295193617827794e-05, "loss": 4.3913, "step": 15370 }, { "epoch": 0.5698382607994192, "grad_norm": 0.0458984375, "learning_rate": 1.6290259702524373e-05, "loss": 4.4038, "step": 15380 }, { "epoch": 0.5702416004517404, "grad_norm": 0.044189453125, "learning_rate": 1.6285323252071273e-05, "loss": 4.4091, "step": 15390 }, { "epoch": 0.5706449401040616, "grad_norm": 0.045166015625, "learning_rate": 1.6280384268458026e-05, "loss": 4.4327, "step": 15400 }, { "epoch": 0.5710482797563828, "grad_norm": 0.043701171875, "learning_rate": 1.6275442753675173e-05, "loss": 4.4758, "step": 15410 }, { "epoch": 0.571451619408704, "grad_norm": 0.044189453125, "learning_rate": 1.6270498709714287e-05, "loss": 4.4063, "step": 15420 }, { "epoch": 0.5718549590610252, "grad_norm": 0.046630859375, "learning_rate": 1.6265552138567954e-05, "loss": 4.4158, "step": 15430 }, { "epoch": 0.5722582987133465, "grad_norm": 0.044677734375, "learning_rate": 1.626060304222978e-05, "loss": 4.4114, "step": 15440 }, { "epoch": 0.5726616383656677, "grad_norm": 0.0439453125, "learning_rate": 1.6255651422694384e-05, "loss": 4.4322, "step": 15450 }, { "epoch": 0.573064978017989, "grad_norm": 0.046875, "learning_rate": 1.6250697281957416e-05, "loss": 4.4187, "step": 15460 }, { "epoch": 0.5734683176703101, "grad_norm": 0.044677734375, "learning_rate": 1.6245740622015523e-05, "loss": 4.4361, "step": 15470 }, { "epoch": 0.5738716573226313, "grad_norm": 0.04443359375, "learning_rate": 1.6240781444866386e-05, "loss": 4.4221, "step": 15480 }, { "epoch": 0.5742749969749527, "grad_norm": 0.043212890625, "learning_rate": 1.6235819752508685e-05, "loss": 4.4131, "step": 15490 }, { "epoch": 0.5746783366272739, "grad_norm": 0.044189453125, "learning_rate": 1.6230855546942118e-05, "loss": 4.4112, "step": 15500 }, { "epoch": 0.575081676279595, "grad_norm": 0.045654296875, "learning_rate": 1.6225888830167404e-05, "loss": 4.448, "step": 15510 }, { "epoch": 0.5754850159319163, "grad_norm": 0.046630859375, "learning_rate": 1.6220919604186265e-05, "loss": 4.4067, "step": 15520 }, { "epoch": 0.5758883555842375, "grad_norm": 0.04638671875, "learning_rate": 1.621594787100144e-05, "loss": 4.4336, "step": 15530 }, { "epoch": 0.5762916952365587, "grad_norm": 0.04296875, "learning_rate": 1.6210973632616667e-05, "loss": 4.4151, "step": 15540 }, { "epoch": 0.57669503488888, "grad_norm": 0.043701171875, "learning_rate": 1.6205996891036715e-05, "loss": 4.388, "step": 15550 }, { "epoch": 0.5770983745412012, "grad_norm": 0.0439453125, "learning_rate": 1.620101764826734e-05, "loss": 4.3875, "step": 15560 }, { "epoch": 0.5775017141935224, "grad_norm": 0.046630859375, "learning_rate": 1.619603590631532e-05, "loss": 4.4055, "step": 15570 }, { "epoch": 0.5779050538458436, "grad_norm": 0.045654296875, "learning_rate": 1.6191051667188436e-05, "loss": 4.4114, "step": 15580 }, { "epoch": 0.5783083934981648, "grad_norm": 0.044677734375, "learning_rate": 1.618606493289547e-05, "loss": 4.4387, "step": 15590 }, { "epoch": 0.5787117331504861, "grad_norm": 0.04443359375, "learning_rate": 1.618107570544622e-05, "loss": 4.4373, "step": 15600 }, { "epoch": 0.5791150728028073, "grad_norm": 0.0419921875, "learning_rate": 1.6176083986851483e-05, "loss": 4.3753, "step": 15610 }, { "epoch": 0.5795184124551285, "grad_norm": 0.047119140625, "learning_rate": 1.6171089779123058e-05, "loss": 4.4006, "step": 15620 }, { "epoch": 0.5799217521074497, "grad_norm": 0.043701171875, "learning_rate": 1.616609308427375e-05, "loss": 4.424, "step": 15630 }, { "epoch": 0.5803250917597709, "grad_norm": 0.044677734375, "learning_rate": 1.6161093904317366e-05, "loss": 4.4121, "step": 15640 }, { "epoch": 0.5807284314120921, "grad_norm": 0.046630859375, "learning_rate": 1.6156092241268716e-05, "loss": 4.4082, "step": 15650 }, { "epoch": 0.5811317710644134, "grad_norm": 0.04345703125, "learning_rate": 1.615108809714361e-05, "loss": 4.4174, "step": 15660 }, { "epoch": 0.5815351107167346, "grad_norm": 0.044677734375, "learning_rate": 1.6146081473958853e-05, "loss": 4.4353, "step": 15670 }, { "epoch": 0.5819384503690558, "grad_norm": 0.04541015625, "learning_rate": 1.6141072373732258e-05, "loss": 4.4286, "step": 15680 }, { "epoch": 0.582341790021377, "grad_norm": 0.04541015625, "learning_rate": 1.613606079848263e-05, "loss": 4.4102, "step": 15690 }, { "epoch": 0.5827451296736982, "grad_norm": 0.044189453125, "learning_rate": 1.6131046750229778e-05, "loss": 4.4182, "step": 15700 }, { "epoch": 0.5831484693260195, "grad_norm": 0.04638671875, "learning_rate": 1.612603023099449e-05, "loss": 4.4148, "step": 15710 }, { "epoch": 0.5835518089783407, "grad_norm": 0.04248046875, "learning_rate": 1.612101124279858e-05, "loss": 4.4269, "step": 15720 }, { "epoch": 0.5839551486306619, "grad_norm": 0.046142578125, "learning_rate": 1.6115989787664828e-05, "loss": 4.439, "step": 15730 }, { "epoch": 0.5843584882829831, "grad_norm": 0.044189453125, "learning_rate": 1.611096586761702e-05, "loss": 4.3966, "step": 15740 }, { "epoch": 0.5847618279353043, "grad_norm": 0.044677734375, "learning_rate": 1.610593948467994e-05, "loss": 4.393, "step": 15750 }, { "epoch": 0.5851651675876255, "grad_norm": 0.04541015625, "learning_rate": 1.6100910640879362e-05, "loss": 4.4339, "step": 15760 }, { "epoch": 0.5855685072399468, "grad_norm": 0.046875, "learning_rate": 1.6095879338242047e-05, "loss": 4.4491, "step": 15770 }, { "epoch": 0.585971846892268, "grad_norm": 0.04345703125, "learning_rate": 1.6090845578795748e-05, "loss": 4.39, "step": 15780 }, { "epoch": 0.5863751865445892, "grad_norm": 0.04443359375, "learning_rate": 1.6085809364569214e-05, "loss": 4.414, "step": 15790 }, { "epoch": 0.5867785261969104, "grad_norm": 0.044677734375, "learning_rate": 1.6080770697592176e-05, "loss": 4.42, "step": 15800 }, { "epoch": 0.5871818658492316, "grad_norm": 0.048583984375, "learning_rate": 1.607572957989536e-05, "loss": 4.3749, "step": 15810 }, { "epoch": 0.5875852055015529, "grad_norm": 0.044921875, "learning_rate": 1.6070686013510474e-05, "loss": 4.3908, "step": 15820 }, { "epoch": 0.5879885451538741, "grad_norm": 0.044189453125, "learning_rate": 1.606564000047022e-05, "loss": 4.4017, "step": 15830 }, { "epoch": 0.5883918848061953, "grad_norm": 0.046875, "learning_rate": 1.606059154280827e-05, "loss": 4.4151, "step": 15840 }, { "epoch": 0.5887952244585165, "grad_norm": 0.043701171875, "learning_rate": 1.6055540642559307e-05, "loss": 4.4113, "step": 15850 }, { "epoch": 0.5891985641108377, "grad_norm": 0.044677734375, "learning_rate": 1.605048730175898e-05, "loss": 4.4362, "step": 15860 }, { "epoch": 0.5896019037631589, "grad_norm": 0.0458984375, "learning_rate": 1.6045431522443924e-05, "loss": 4.4204, "step": 15870 }, { "epoch": 0.5900052434154802, "grad_norm": 0.043701171875, "learning_rate": 1.6040373306651757e-05, "loss": 4.4127, "step": 15880 }, { "epoch": 0.5904085830678014, "grad_norm": 0.04541015625, "learning_rate": 1.6035312656421083e-05, "loss": 4.4065, "step": 15890 }, { "epoch": 0.5908119227201226, "grad_norm": 0.046142578125, "learning_rate": 1.603024957379149e-05, "loss": 4.4673, "step": 15900 }, { "epoch": 0.5912152623724438, "grad_norm": 0.04638671875, "learning_rate": 1.6025184060803528e-05, "loss": 4.4056, "step": 15910 }, { "epoch": 0.591618602024765, "grad_norm": 0.045166015625, "learning_rate": 1.6020116119498757e-05, "loss": 4.427, "step": 15920 }, { "epoch": 0.5920219416770863, "grad_norm": 0.04638671875, "learning_rate": 1.6015045751919683e-05, "loss": 4.4411, "step": 15930 }, { "epoch": 0.5924252813294075, "grad_norm": 0.044677734375, "learning_rate": 1.600997296010981e-05, "loss": 4.4149, "step": 15940 }, { "epoch": 0.5928286209817287, "grad_norm": 0.046875, "learning_rate": 1.600489774611362e-05, "loss": 4.3579, "step": 15950 }, { "epoch": 0.5932319606340499, "grad_norm": 0.043212890625, "learning_rate": 1.599982011197656e-05, "loss": 4.4039, "step": 15960 }, { "epoch": 0.5936353002863711, "grad_norm": 0.042236328125, "learning_rate": 1.599474005974506e-05, "loss": 4.3751, "step": 15970 }, { "epoch": 0.5940386399386923, "grad_norm": 0.043212890625, "learning_rate": 1.5989657591466518e-05, "loss": 4.3616, "step": 15980 }, { "epoch": 0.5944419795910136, "grad_norm": 0.045166015625, "learning_rate": 1.5984572709189315e-05, "loss": 4.4367, "step": 15990 }, { "epoch": 0.5948453192433348, "grad_norm": 0.042236328125, "learning_rate": 1.59794854149628e-05, "loss": 4.4043, "step": 16000 }, { "epoch": 0.595248658895656, "grad_norm": 0.043701171875, "learning_rate": 1.597439571083729e-05, "loss": 4.4093, "step": 16010 }, { "epoch": 0.5956519985479772, "grad_norm": 0.045654296875, "learning_rate": 1.5969303598864082e-05, "loss": 4.4181, "step": 16020 }, { "epoch": 0.5960553382002984, "grad_norm": 0.04345703125, "learning_rate": 1.5964209081095436e-05, "loss": 4.4462, "step": 16030 }, { "epoch": 0.5964586778526197, "grad_norm": 0.046142578125, "learning_rate": 1.5959112159584586e-05, "loss": 4.4082, "step": 16040 }, { "epoch": 0.596862017504941, "grad_norm": 0.04833984375, "learning_rate": 1.595401283638573e-05, "loss": 4.4153, "step": 16050 }, { "epoch": 0.5972653571572621, "grad_norm": 0.045654296875, "learning_rate": 1.5948911113554042e-05, "loss": 4.3701, "step": 16060 }, { "epoch": 0.5976686968095833, "grad_norm": 0.046142578125, "learning_rate": 1.594380699314565e-05, "loss": 4.4197, "step": 16070 }, { "epoch": 0.5980720364619045, "grad_norm": 0.043701171875, "learning_rate": 1.593870047721767e-05, "loss": 4.4022, "step": 16080 }, { "epoch": 0.5984753761142257, "grad_norm": 0.0458984375, "learning_rate": 1.593359156782816e-05, "loss": 4.4078, "step": 16090 }, { "epoch": 0.598878715766547, "grad_norm": 0.0458984375, "learning_rate": 1.5928480267036156e-05, "loss": 4.4127, "step": 16100 }, { "epoch": 0.5992820554188683, "grad_norm": 0.0439453125, "learning_rate": 1.5923366576901646e-05, "loss": 4.3962, "step": 16110 }, { "epoch": 0.5996853950711895, "grad_norm": 0.045166015625, "learning_rate": 1.5918250499485605e-05, "loss": 4.437, "step": 16120 }, { "epoch": 0.6000887347235107, "grad_norm": 0.044677734375, "learning_rate": 1.591313203684994e-05, "loss": 4.4124, "step": 16130 }, { "epoch": 0.6004920743758319, "grad_norm": 0.044189453125, "learning_rate": 1.5908011191057545e-05, "loss": 4.3923, "step": 16140 }, { "epoch": 0.6008954140281532, "grad_norm": 0.044677734375, "learning_rate": 1.590288796417226e-05, "loss": 4.399, "step": 16150 }, { "epoch": 0.6012987536804744, "grad_norm": 0.044677734375, "learning_rate": 1.589776235825888e-05, "loss": 4.3956, "step": 16160 }, { "epoch": 0.6017020933327956, "grad_norm": 0.043701171875, "learning_rate": 1.5892634375383176e-05, "loss": 4.4188, "step": 16170 }, { "epoch": 0.6021054329851168, "grad_norm": 0.04296875, "learning_rate": 1.5887504017611866e-05, "loss": 4.4107, "step": 16180 }, { "epoch": 0.602508772637438, "grad_norm": 0.045654296875, "learning_rate": 1.5882371287012624e-05, "loss": 4.4239, "step": 16190 }, { "epoch": 0.6029121122897592, "grad_norm": 0.04638671875, "learning_rate": 1.5877236185654087e-05, "loss": 4.4182, "step": 16200 }, { "epoch": 0.6033154519420805, "grad_norm": 0.046142578125, "learning_rate": 1.5872098715605843e-05, "loss": 4.4086, "step": 16210 }, { "epoch": 0.6037187915944017, "grad_norm": 0.046630859375, "learning_rate": 1.5866958878938433e-05, "loss": 4.4102, "step": 16220 }, { "epoch": 0.6041221312467229, "grad_norm": 0.042724609375, "learning_rate": 1.5861816677723355e-05, "loss": 4.4091, "step": 16230 }, { "epoch": 0.6045254708990441, "grad_norm": 0.043212890625, "learning_rate": 1.5856672114033058e-05, "loss": 4.4336, "step": 16240 }, { "epoch": 0.6049288105513653, "grad_norm": 0.045654296875, "learning_rate": 1.585152518994095e-05, "loss": 4.4391, "step": 16250 }, { "epoch": 0.6053321502036865, "grad_norm": 0.044677734375, "learning_rate": 1.5846375907521375e-05, "loss": 4.389, "step": 16260 }, { "epoch": 0.6057354898560078, "grad_norm": 0.04345703125, "learning_rate": 1.5841224268849646e-05, "loss": 4.4128, "step": 16270 }, { "epoch": 0.606138829508329, "grad_norm": 0.04541015625, "learning_rate": 1.5836070276002013e-05, "loss": 4.4089, "step": 16280 }, { "epoch": 0.6065421691606502, "grad_norm": 0.0439453125, "learning_rate": 1.5830913931055678e-05, "loss": 4.4448, "step": 16290 }, { "epoch": 0.6069455088129714, "grad_norm": 0.044677734375, "learning_rate": 1.5825755236088794e-05, "loss": 4.4196, "step": 16300 }, { "epoch": 0.6073488484652926, "grad_norm": 0.045166015625, "learning_rate": 1.582059419318046e-05, "loss": 4.3875, "step": 16310 }, { "epoch": 0.6077521881176139, "grad_norm": 0.043701171875, "learning_rate": 1.581543080441071e-05, "loss": 4.4306, "step": 16320 }, { "epoch": 0.6081555277699351, "grad_norm": 0.04296875, "learning_rate": 1.5810265071860545e-05, "loss": 4.4306, "step": 16330 }, { "epoch": 0.6085588674222563, "grad_norm": 0.043212890625, "learning_rate": 1.5805096997611893e-05, "loss": 4.4037, "step": 16340 }, { "epoch": 0.6089622070745775, "grad_norm": 0.045166015625, "learning_rate": 1.579992658374763e-05, "loss": 4.3986, "step": 16350 }, { "epoch": 0.6093655467268987, "grad_norm": 0.046142578125, "learning_rate": 1.5794753832351584e-05, "loss": 4.3975, "step": 16360 }, { "epoch": 0.6097688863792199, "grad_norm": 0.043701171875, "learning_rate": 1.578957874550851e-05, "loss": 4.4348, "step": 16370 }, { "epoch": 0.6101722260315412, "grad_norm": 0.0439453125, "learning_rate": 1.578440132530412e-05, "loss": 4.3818, "step": 16380 }, { "epoch": 0.6105755656838624, "grad_norm": 0.04345703125, "learning_rate": 1.5779221573825046e-05, "loss": 4.407, "step": 16390 }, { "epoch": 0.6109789053361836, "grad_norm": 0.04443359375, "learning_rate": 1.5774039493158884e-05, "loss": 4.4095, "step": 16400 }, { "epoch": 0.6113822449885048, "grad_norm": 0.04296875, "learning_rate": 1.576885508539415e-05, "loss": 4.4221, "step": 16410 }, { "epoch": 0.611785584640826, "grad_norm": 0.043701171875, "learning_rate": 1.576366835262031e-05, "loss": 4.3968, "step": 16420 }, { "epoch": 0.6121889242931473, "grad_norm": 0.04541015625, "learning_rate": 1.5758479296927756e-05, "loss": 4.395, "step": 16430 }, { "epoch": 0.6125922639454685, "grad_norm": 0.04345703125, "learning_rate": 1.575328792040783e-05, "loss": 4.4439, "step": 16440 }, { "epoch": 0.6129956035977897, "grad_norm": 0.0439453125, "learning_rate": 1.574809422515279e-05, "loss": 4.3746, "step": 16450 }, { "epoch": 0.6133989432501109, "grad_norm": 0.04638671875, "learning_rate": 1.5742898213255844e-05, "loss": 4.3976, "step": 16460 }, { "epoch": 0.6138022829024321, "grad_norm": 0.046630859375, "learning_rate": 1.573769988681113e-05, "loss": 4.4185, "step": 16470 }, { "epoch": 0.6142056225547533, "grad_norm": 0.04638671875, "learning_rate": 1.5732499247913722e-05, "loss": 4.3893, "step": 16480 }, { "epoch": 0.6146089622070746, "grad_norm": 0.044921875, "learning_rate": 1.572729629865962e-05, "loss": 4.3883, "step": 16490 }, { "epoch": 0.6150123018593958, "grad_norm": 0.044921875, "learning_rate": 1.5722091041145752e-05, "loss": 4.4028, "step": 16500 }, { "epoch": 0.615415641511717, "grad_norm": 0.04541015625, "learning_rate": 1.5716883477469988e-05, "loss": 4.4199, "step": 16510 }, { "epoch": 0.6158189811640382, "grad_norm": 0.044677734375, "learning_rate": 1.571167360973112e-05, "loss": 4.414, "step": 16520 }, { "epoch": 0.6162223208163594, "grad_norm": 0.044677734375, "learning_rate": 1.5706461440028865e-05, "loss": 4.3665, "step": 16530 }, { "epoch": 0.6166256604686807, "grad_norm": 0.04296875, "learning_rate": 1.5701246970463883e-05, "loss": 4.4263, "step": 16540 }, { "epoch": 0.6170290001210019, "grad_norm": 0.044677734375, "learning_rate": 1.5696030203137737e-05, "loss": 4.429, "step": 16550 }, { "epoch": 0.6174323397733231, "grad_norm": 0.044921875, "learning_rate": 1.5690811140152937e-05, "loss": 4.3953, "step": 16560 }, { "epoch": 0.6178356794256443, "grad_norm": 0.04541015625, "learning_rate": 1.568558978361291e-05, "loss": 4.4122, "step": 16570 }, { "epoch": 0.6182390190779655, "grad_norm": 0.04248046875, "learning_rate": 1.5680366135622006e-05, "loss": 4.4138, "step": 16580 }, { "epoch": 0.6186423587302867, "grad_norm": 0.044921875, "learning_rate": 1.5675140198285505e-05, "loss": 4.4005, "step": 16590 }, { "epoch": 0.619045698382608, "grad_norm": 0.046630859375, "learning_rate": 1.5669911973709598e-05, "loss": 4.4507, "step": 16600 }, { "epoch": 0.6194490380349292, "grad_norm": 0.041748046875, "learning_rate": 1.566468146400141e-05, "loss": 4.4205, "step": 16610 }, { "epoch": 0.6198523776872504, "grad_norm": 0.044921875, "learning_rate": 1.5659448671268978e-05, "loss": 4.3944, "step": 16620 }, { "epoch": 0.6202557173395716, "grad_norm": 0.0439453125, "learning_rate": 1.565421359762127e-05, "loss": 4.416, "step": 16630 }, { "epoch": 0.6206590569918928, "grad_norm": 0.04150390625, "learning_rate": 1.5648976245168164e-05, "loss": 4.4271, "step": 16640 }, { "epoch": 0.6210623966442141, "grad_norm": 0.0458984375, "learning_rate": 1.564373661602045e-05, "loss": 4.3855, "step": 16650 }, { "epoch": 0.6214657362965353, "grad_norm": 0.044921875, "learning_rate": 1.5638494712289856e-05, "loss": 4.388, "step": 16660 }, { "epoch": 0.6218690759488565, "grad_norm": 0.044677734375, "learning_rate": 1.5633250536089007e-05, "loss": 4.4397, "step": 16670 }, { "epoch": 0.6222724156011777, "grad_norm": 0.04443359375, "learning_rate": 1.5628004089531458e-05, "loss": 4.4163, "step": 16680 }, { "epoch": 0.6226757552534989, "grad_norm": 0.044921875, "learning_rate": 1.562275537473167e-05, "loss": 4.3983, "step": 16690 }, { "epoch": 0.6230790949058201, "grad_norm": 0.044921875, "learning_rate": 1.5617504393805023e-05, "loss": 4.4323, "step": 16700 }, { "epoch": 0.6234824345581415, "grad_norm": 0.0458984375, "learning_rate": 1.5612251148867805e-05, "loss": 4.3723, "step": 16710 }, { "epoch": 0.6238857742104627, "grad_norm": 0.045166015625, "learning_rate": 1.5606995642037223e-05, "loss": 4.3998, "step": 16720 }, { "epoch": 0.6242891138627839, "grad_norm": 0.044921875, "learning_rate": 1.5601737875431385e-05, "loss": 4.4435, "step": 16730 }, { "epoch": 0.624692453515105, "grad_norm": 0.0439453125, "learning_rate": 1.559647785116933e-05, "loss": 4.4077, "step": 16740 }, { "epoch": 0.6250957931674263, "grad_norm": 0.0439453125, "learning_rate": 1.5591215571370982e-05, "loss": 4.4009, "step": 16750 }, { "epoch": 0.6254991328197476, "grad_norm": 0.045654296875, "learning_rate": 1.5585951038157193e-05, "loss": 4.4277, "step": 16760 }, { "epoch": 0.6259024724720688, "grad_norm": 0.04296875, "learning_rate": 1.558068425364972e-05, "loss": 4.4397, "step": 16770 }, { "epoch": 0.62630581212439, "grad_norm": 0.045166015625, "learning_rate": 1.557541521997121e-05, "loss": 4.4117, "step": 16780 }, { "epoch": 0.6267091517767112, "grad_norm": 0.047119140625, "learning_rate": 1.557014393924525e-05, "loss": 4.4268, "step": 16790 }, { "epoch": 0.6271124914290324, "grad_norm": 0.04150390625, "learning_rate": 1.5564870413596293e-05, "loss": 4.4001, "step": 16800 }, { "epoch": 0.6275158310813536, "grad_norm": 0.047607421875, "learning_rate": 1.5559594645149725e-05, "loss": 4.3899, "step": 16810 }, { "epoch": 0.6279191707336749, "grad_norm": 0.046142578125, "learning_rate": 1.555431663603183e-05, "loss": 4.4073, "step": 16820 }, { "epoch": 0.6283225103859961, "grad_norm": 0.043212890625, "learning_rate": 1.554903638836979e-05, "loss": 4.4274, "step": 16830 }, { "epoch": 0.6287258500383173, "grad_norm": 0.045654296875, "learning_rate": 1.5543753904291693e-05, "loss": 4.4261, "step": 16840 }, { "epoch": 0.6291291896906385, "grad_norm": 0.045166015625, "learning_rate": 1.5538469185926523e-05, "loss": 4.4079, "step": 16850 }, { "epoch": 0.6295325293429597, "grad_norm": 0.04541015625, "learning_rate": 1.5533182235404178e-05, "loss": 4.3822, "step": 16860 }, { "epoch": 0.629935868995281, "grad_norm": 0.043701171875, "learning_rate": 1.552789305485543e-05, "loss": 4.3911, "step": 16870 }, { "epoch": 0.6303392086476022, "grad_norm": 0.044921875, "learning_rate": 1.552260164641198e-05, "loss": 4.4135, "step": 16880 }, { "epoch": 0.6307425482999234, "grad_norm": 0.045166015625, "learning_rate": 1.5517308012206412e-05, "loss": 4.4121, "step": 16890 }, { "epoch": 0.6311458879522446, "grad_norm": 0.044189453125, "learning_rate": 1.55120121543722e-05, "loss": 4.43, "step": 16900 }, { "epoch": 0.6315492276045658, "grad_norm": 0.042724609375, "learning_rate": 1.5506714075043728e-05, "loss": 4.4179, "step": 16910 }, { "epoch": 0.631952567256887, "grad_norm": 0.042724609375, "learning_rate": 1.5501413776356267e-05, "loss": 4.4365, "step": 16920 }, { "epoch": 0.6323559069092083, "grad_norm": 0.044921875, "learning_rate": 1.5496111260445984e-05, "loss": 4.4277, "step": 16930 }, { "epoch": 0.6327592465615295, "grad_norm": 0.04443359375, "learning_rate": 1.5490806529449945e-05, "loss": 4.3878, "step": 16940 }, { "epoch": 0.6331625862138507, "grad_norm": 0.04443359375, "learning_rate": 1.54854995855061e-05, "loss": 4.4158, "step": 16950 }, { "epoch": 0.6335659258661719, "grad_norm": 0.042236328125, "learning_rate": 1.5480190430753297e-05, "loss": 4.383, "step": 16960 }, { "epoch": 0.6339692655184931, "grad_norm": 0.0458984375, "learning_rate": 1.547487906733127e-05, "loss": 4.4348, "step": 16970 }, { "epoch": 0.6343726051708144, "grad_norm": 0.044189453125, "learning_rate": 1.5469565497380653e-05, "loss": 4.4168, "step": 16980 }, { "epoch": 0.6347759448231356, "grad_norm": 0.04443359375, "learning_rate": 1.5464249723042954e-05, "loss": 4.3982, "step": 16990 }, { "epoch": 0.6351792844754568, "grad_norm": 0.04443359375, "learning_rate": 1.5458931746460582e-05, "loss": 4.4097, "step": 17000 }, { "epoch": 0.635582624127778, "grad_norm": 0.044921875, "learning_rate": 1.545361156977683e-05, "loss": 4.4058, "step": 17010 }, { "epoch": 0.6359859637800992, "grad_norm": 0.044677734375, "learning_rate": 1.544828919513588e-05, "loss": 4.4276, "step": 17020 }, { "epoch": 0.6363893034324204, "grad_norm": 0.046142578125, "learning_rate": 1.544296462468279e-05, "loss": 4.4223, "step": 17030 }, { "epoch": 0.6367926430847417, "grad_norm": 0.043212890625, "learning_rate": 1.5437637860563517e-05, "loss": 4.4133, "step": 17040 }, { "epoch": 0.6371959827370629, "grad_norm": 0.044921875, "learning_rate": 1.543230890492489e-05, "loss": 4.42, "step": 17050 }, { "epoch": 0.6375993223893841, "grad_norm": 0.04345703125, "learning_rate": 1.5426977759914626e-05, "loss": 4.4281, "step": 17060 }, { "epoch": 0.6380026620417053, "grad_norm": 0.044677734375, "learning_rate": 1.542164442768133e-05, "loss": 4.4388, "step": 17070 }, { "epoch": 0.6384060016940265, "grad_norm": 0.044677734375, "learning_rate": 1.5416308910374483e-05, "loss": 4.4239, "step": 17080 }, { "epoch": 0.6388093413463478, "grad_norm": 0.045166015625, "learning_rate": 1.5410971210144442e-05, "loss": 4.4148, "step": 17090 }, { "epoch": 0.639212680998669, "grad_norm": 0.04296875, "learning_rate": 1.5405631329142453e-05, "loss": 4.4202, "step": 17100 }, { "epoch": 0.6396160206509902, "grad_norm": 0.04345703125, "learning_rate": 1.540028926952063e-05, "loss": 4.4187, "step": 17110 }, { "epoch": 0.6400193603033114, "grad_norm": 0.04443359375, "learning_rate": 1.539494503343198e-05, "loss": 4.3976, "step": 17120 }, { "epoch": 0.6404226999556326, "grad_norm": 0.046630859375, "learning_rate": 1.5389598623030372e-05, "loss": 4.4154, "step": 17130 }, { "epoch": 0.6408260396079538, "grad_norm": 0.044921875, "learning_rate": 1.5384250040470568e-05, "loss": 4.4101, "step": 17140 }, { "epoch": 0.6412293792602751, "grad_norm": 0.04443359375, "learning_rate": 1.5378899287908186e-05, "loss": 4.4104, "step": 17150 }, { "epoch": 0.6416327189125963, "grad_norm": 0.049560546875, "learning_rate": 1.537354636749973e-05, "loss": 4.4146, "step": 17160 }, { "epoch": 0.6420360585649175, "grad_norm": 0.0458984375, "learning_rate": 1.536819128140258e-05, "loss": 4.4205, "step": 17170 }, { "epoch": 0.6424393982172387, "grad_norm": 0.046875, "learning_rate": 1.5362834031774975e-05, "loss": 4.4154, "step": 17180 }, { "epoch": 0.6428427378695599, "grad_norm": 0.046142578125, "learning_rate": 1.535747462077605e-05, "loss": 4.4835, "step": 17190 }, { "epoch": 0.6432460775218811, "grad_norm": 0.045166015625, "learning_rate": 1.5352113050565784e-05, "loss": 4.4198, "step": 17200 }, { "epoch": 0.6436494171742024, "grad_norm": 0.045166015625, "learning_rate": 1.5346749323305043e-05, "loss": 4.441, "step": 17210 }, { "epoch": 0.6440527568265236, "grad_norm": 0.04150390625, "learning_rate": 1.534138344115556e-05, "loss": 4.4151, "step": 17220 }, { "epoch": 0.6444560964788448, "grad_norm": 0.044189453125, "learning_rate": 1.533601540627994e-05, "loss": 4.3923, "step": 17230 }, { "epoch": 0.644859436131166, "grad_norm": 0.04345703125, "learning_rate": 1.533064522084164e-05, "loss": 4.3969, "step": 17240 }, { "epoch": 0.6452627757834872, "grad_norm": 0.045654296875, "learning_rate": 1.5325272887005002e-05, "loss": 4.4082, "step": 17250 }, { "epoch": 0.6456661154358085, "grad_norm": 0.045654296875, "learning_rate": 1.5319898406935218e-05, "loss": 4.4295, "step": 17260 }, { "epoch": 0.6460694550881297, "grad_norm": 0.044921875, "learning_rate": 1.531452178279836e-05, "loss": 4.4082, "step": 17270 }, { "epoch": 0.6464727947404509, "grad_norm": 0.047607421875, "learning_rate": 1.530914301676136e-05, "loss": 4.4244, "step": 17280 }, { "epoch": 0.6468761343927721, "grad_norm": 0.044189453125, "learning_rate": 1.5303762110992e-05, "loss": 4.4174, "step": 17290 }, { "epoch": 0.6472794740450933, "grad_norm": 0.044677734375, "learning_rate": 1.529837906765894e-05, "loss": 4.4231, "step": 17300 }, { "epoch": 0.6476828136974145, "grad_norm": 0.044677734375, "learning_rate": 1.5292993888931695e-05, "loss": 4.3812, "step": 17310 }, { "epoch": 0.6480861533497358, "grad_norm": 0.0458984375, "learning_rate": 1.5287606576980648e-05, "loss": 4.388, "step": 17320 }, { "epoch": 0.648489493002057, "grad_norm": 0.0458984375, "learning_rate": 1.5282217133977028e-05, "loss": 4.3875, "step": 17330 }, { "epoch": 0.6488928326543782, "grad_norm": 0.04638671875, "learning_rate": 1.527682556209293e-05, "loss": 4.4089, "step": 17340 }, { "epoch": 0.6492961723066994, "grad_norm": 0.041748046875, "learning_rate": 1.5271431863501317e-05, "loss": 4.418, "step": 17350 }, { "epoch": 0.6496995119590206, "grad_norm": 0.04296875, "learning_rate": 1.5266036040375988e-05, "loss": 4.4138, "step": 17360 }, { "epoch": 0.650102851611342, "grad_norm": 0.044677734375, "learning_rate": 1.526063809489162e-05, "loss": 4.3939, "step": 17370 }, { "epoch": 0.6505061912636632, "grad_norm": 0.045654296875, "learning_rate": 1.5255238029223728e-05, "loss": 4.4364, "step": 17380 }, { "epoch": 0.6509095309159844, "grad_norm": 0.044677734375, "learning_rate": 1.5249835845548688e-05, "loss": 4.39, "step": 17390 }, { "epoch": 0.6513128705683056, "grad_norm": 0.043212890625, "learning_rate": 1.5244431546043736e-05, "loss": 4.4205, "step": 17400 }, { "epoch": 0.6517162102206268, "grad_norm": 0.04833984375, "learning_rate": 1.5239025132886953e-05, "loss": 4.4002, "step": 17410 }, { "epoch": 0.652119549872948, "grad_norm": 0.044189453125, "learning_rate": 1.5233616608257273e-05, "loss": 4.3805, "step": 17420 }, { "epoch": 0.6525228895252693, "grad_norm": 0.044677734375, "learning_rate": 1.5228205974334484e-05, "loss": 4.44, "step": 17430 }, { "epoch": 0.6529262291775905, "grad_norm": 0.044189453125, "learning_rate": 1.5222793233299218e-05, "loss": 4.4114, "step": 17440 }, { "epoch": 0.6533295688299117, "grad_norm": 0.0478515625, "learning_rate": 1.5217378387332967e-05, "loss": 4.4182, "step": 17450 }, { "epoch": 0.6537329084822329, "grad_norm": 0.044921875, "learning_rate": 1.5211961438618054e-05, "loss": 4.3694, "step": 17460 }, { "epoch": 0.6541362481345541, "grad_norm": 0.044677734375, "learning_rate": 1.520654238933767e-05, "loss": 4.4295, "step": 17470 }, { "epoch": 0.6545395877868754, "grad_norm": 0.04541015625, "learning_rate": 1.5201121241675835e-05, "loss": 4.4087, "step": 17480 }, { "epoch": 0.6549429274391966, "grad_norm": 0.047607421875, "learning_rate": 1.5195697997817429e-05, "loss": 4.3858, "step": 17490 }, { "epoch": 0.6553462670915178, "grad_norm": 0.044189453125, "learning_rate": 1.5190272659948168e-05, "loss": 4.4323, "step": 17500 }, { "epoch": 0.655749606743839, "grad_norm": 0.044677734375, "learning_rate": 1.5184845230254613e-05, "loss": 4.4309, "step": 17510 }, { "epoch": 0.6561529463961602, "grad_norm": 0.0439453125, "learning_rate": 1.517941571092417e-05, "loss": 4.4129, "step": 17520 }, { "epoch": 0.6565562860484814, "grad_norm": 0.04345703125, "learning_rate": 1.5173984104145088e-05, "loss": 4.4192, "step": 17530 }, { "epoch": 0.6569596257008027, "grad_norm": 0.044921875, "learning_rate": 1.5168550412106451e-05, "loss": 4.3868, "step": 17540 }, { "epoch": 0.6573629653531239, "grad_norm": 0.04736328125, "learning_rate": 1.5163114636998191e-05, "loss": 4.415, "step": 17550 }, { "epoch": 0.6577663050054451, "grad_norm": 0.045166015625, "learning_rate": 1.5157676781011073e-05, "loss": 4.3993, "step": 17560 }, { "epoch": 0.6581696446577663, "grad_norm": 0.04248046875, "learning_rate": 1.515223684633671e-05, "loss": 4.3951, "step": 17570 }, { "epoch": 0.6585729843100875, "grad_norm": 0.045166015625, "learning_rate": 1.514679483516755e-05, "loss": 4.373, "step": 17580 }, { "epoch": 0.6589763239624088, "grad_norm": 0.043701171875, "learning_rate": 1.5141350749696861e-05, "loss": 4.4529, "step": 17590 }, { "epoch": 0.65937966361473, "grad_norm": 0.042236328125, "learning_rate": 1.5135904592118778e-05, "loss": 4.4082, "step": 17600 }, { "epoch": 0.6597830032670512, "grad_norm": 0.04345703125, "learning_rate": 1.513045636462824e-05, "loss": 4.4463, "step": 17610 }, { "epoch": 0.6601863429193724, "grad_norm": 0.046630859375, "learning_rate": 1.512500606942104e-05, "loss": 4.399, "step": 17620 }, { "epoch": 0.6605896825716936, "grad_norm": 0.04443359375, "learning_rate": 1.51195537086938e-05, "loss": 4.4163, "step": 17630 }, { "epoch": 0.6609930222240148, "grad_norm": 0.045166015625, "learning_rate": 1.511409928464397e-05, "loss": 4.4332, "step": 17640 }, { "epoch": 0.6613963618763361, "grad_norm": 0.046630859375, "learning_rate": 1.5108642799469838e-05, "loss": 4.4075, "step": 17650 }, { "epoch": 0.6617997015286573, "grad_norm": 0.04150390625, "learning_rate": 1.5103184255370516e-05, "loss": 4.3679, "step": 17660 }, { "epoch": 0.6622030411809785, "grad_norm": 0.044677734375, "learning_rate": 1.5097723654545952e-05, "loss": 4.4503, "step": 17670 }, { "epoch": 0.6626063808332997, "grad_norm": 0.044189453125, "learning_rate": 1.5092260999196917e-05, "loss": 4.4089, "step": 17680 }, { "epoch": 0.6630097204856209, "grad_norm": 0.042724609375, "learning_rate": 1.5086796291525017e-05, "loss": 4.3963, "step": 17690 }, { "epoch": 0.6634130601379422, "grad_norm": 0.04296875, "learning_rate": 1.5081329533732679e-05, "loss": 4.4211, "step": 17700 }, { "epoch": 0.6638163997902634, "grad_norm": 0.04296875, "learning_rate": 1.5075860728023158e-05, "loss": 4.4017, "step": 17710 }, { "epoch": 0.6642197394425846, "grad_norm": 0.04638671875, "learning_rate": 1.5070389876600534e-05, "loss": 4.4211, "step": 17720 }, { "epoch": 0.6646230790949058, "grad_norm": 0.045166015625, "learning_rate": 1.5064916981669714e-05, "loss": 4.4042, "step": 17730 }, { "epoch": 0.665026418747227, "grad_norm": 0.046142578125, "learning_rate": 1.5059442045436426e-05, "loss": 4.4469, "step": 17740 }, { "epoch": 0.6654297583995482, "grad_norm": 0.044921875, "learning_rate": 1.505396507010722e-05, "loss": 4.3947, "step": 17750 }, { "epoch": 0.6658330980518695, "grad_norm": 0.043701171875, "learning_rate": 1.504848605788948e-05, "loss": 4.43, "step": 17760 }, { "epoch": 0.6662364377041907, "grad_norm": 0.045166015625, "learning_rate": 1.5043005010991384e-05, "loss": 4.3977, "step": 17770 }, { "epoch": 0.6666397773565119, "grad_norm": 0.0439453125, "learning_rate": 1.503752193162196e-05, "loss": 4.4134, "step": 17780 }, { "epoch": 0.6670431170088331, "grad_norm": 0.044921875, "learning_rate": 1.5032036821991034e-05, "loss": 4.4349, "step": 17790 }, { "epoch": 0.6674464566611543, "grad_norm": 0.044677734375, "learning_rate": 1.502654968430926e-05, "loss": 4.3868, "step": 17800 }, { "epoch": 0.6678497963134756, "grad_norm": 0.04541015625, "learning_rate": 1.5021060520788107e-05, "loss": 4.4312, "step": 17810 }, { "epoch": 0.6682531359657968, "grad_norm": 0.044677734375, "learning_rate": 1.5015569333639862e-05, "loss": 4.4302, "step": 17820 }, { "epoch": 0.668656475618118, "grad_norm": 0.043212890625, "learning_rate": 1.5010076125077621e-05, "loss": 4.4205, "step": 17830 }, { "epoch": 0.6690598152704392, "grad_norm": 0.04541015625, "learning_rate": 1.5004580897315305e-05, "loss": 4.4058, "step": 17840 }, { "epoch": 0.6694631549227604, "grad_norm": 0.044189453125, "learning_rate": 1.4999083652567644e-05, "loss": 4.426, "step": 17850 }, { "epoch": 0.6698664945750816, "grad_norm": 0.046630859375, "learning_rate": 1.4993584393050179e-05, "loss": 4.4362, "step": 17860 }, { "epoch": 0.6702698342274029, "grad_norm": 0.044677734375, "learning_rate": 1.4988083120979264e-05, "loss": 4.4027, "step": 17870 }, { "epoch": 0.6706731738797241, "grad_norm": 0.0478515625, "learning_rate": 1.4982579838572066e-05, "loss": 4.4314, "step": 17880 }, { "epoch": 0.6710765135320453, "grad_norm": 0.045166015625, "learning_rate": 1.497707454804656e-05, "loss": 4.3781, "step": 17890 }, { "epoch": 0.6714798531843665, "grad_norm": 0.04248046875, "learning_rate": 1.497156725162153e-05, "loss": 4.3626, "step": 17900 }, { "epoch": 0.6718831928366877, "grad_norm": 0.043701171875, "learning_rate": 1.4966057951516573e-05, "loss": 4.4158, "step": 17910 }, { "epoch": 0.672286532489009, "grad_norm": 0.044189453125, "learning_rate": 1.4960546649952088e-05, "loss": 4.42, "step": 17920 }, { "epoch": 0.6726898721413302, "grad_norm": 0.04345703125, "learning_rate": 1.4955033349149282e-05, "loss": 4.4096, "step": 17930 }, { "epoch": 0.6730932117936514, "grad_norm": 0.044921875, "learning_rate": 1.494951805133017e-05, "loss": 4.4166, "step": 17940 }, { "epoch": 0.6734965514459726, "grad_norm": 0.045166015625, "learning_rate": 1.4944000758717572e-05, "loss": 4.438, "step": 17950 }, { "epoch": 0.6738998910982938, "grad_norm": 0.054443359375, "learning_rate": 1.4938481473535107e-05, "loss": 4.417, "step": 17960 }, { "epoch": 0.674303230750615, "grad_norm": 0.04931640625, "learning_rate": 1.4932960198007207e-05, "loss": 4.4324, "step": 17970 }, { "epoch": 0.6747065704029364, "grad_norm": 0.045166015625, "learning_rate": 1.492743693435909e-05, "loss": 4.45, "step": 17980 }, { "epoch": 0.6751099100552576, "grad_norm": 0.04638671875, "learning_rate": 1.4921911684816792e-05, "loss": 4.4373, "step": 17990 }, { "epoch": 0.6755132497075788, "grad_norm": 0.044677734375, "learning_rate": 1.491638445160714e-05, "loss": 4.4333, "step": 18000 }, { "epoch": 0.6759165893599, "grad_norm": 0.044921875, "learning_rate": 1.4910855236957762e-05, "loss": 4.4293, "step": 18010 }, { "epoch": 0.6763199290122212, "grad_norm": 0.043212890625, "learning_rate": 1.4905324043097087e-05, "loss": 4.3974, "step": 18020 }, { "epoch": 0.6767232686645425, "grad_norm": 0.044189453125, "learning_rate": 1.4899790872254338e-05, "loss": 4.4246, "step": 18030 }, { "epoch": 0.6771266083168637, "grad_norm": 0.045166015625, "learning_rate": 1.4894255726659541e-05, "loss": 4.3915, "step": 18040 }, { "epoch": 0.6775299479691849, "grad_norm": 0.0458984375, "learning_rate": 1.4888718608543507e-05, "loss": 4.4181, "step": 18050 }, { "epoch": 0.6779332876215061, "grad_norm": 0.04638671875, "learning_rate": 1.4883179520137857e-05, "loss": 4.4198, "step": 18060 }, { "epoch": 0.6783366272738273, "grad_norm": 0.04638671875, "learning_rate": 1.487763846367499e-05, "loss": 4.4086, "step": 18070 }, { "epoch": 0.6787399669261485, "grad_norm": 0.04638671875, "learning_rate": 1.487209544138811e-05, "loss": 4.4482, "step": 18080 }, { "epoch": 0.6791433065784698, "grad_norm": 0.04345703125, "learning_rate": 1.4866550455511208e-05, "loss": 4.4111, "step": 18090 }, { "epoch": 0.679546646230791, "grad_norm": 0.04443359375, "learning_rate": 1.4861003508279071e-05, "loss": 4.4079, "step": 18100 }, { "epoch": 0.6799499858831122, "grad_norm": 0.045654296875, "learning_rate": 1.4855454601927267e-05, "loss": 4.4257, "step": 18110 }, { "epoch": 0.6803533255354334, "grad_norm": 0.044189453125, "learning_rate": 1.4849903738692161e-05, "loss": 4.4484, "step": 18120 }, { "epoch": 0.6807566651877546, "grad_norm": 0.044921875, "learning_rate": 1.484435092081091e-05, "loss": 4.4123, "step": 18130 }, { "epoch": 0.6811600048400759, "grad_norm": 0.044677734375, "learning_rate": 1.4838796150521451e-05, "loss": 4.4084, "step": 18140 }, { "epoch": 0.6815633444923971, "grad_norm": 0.045654296875, "learning_rate": 1.4833239430062509e-05, "loss": 4.4268, "step": 18150 }, { "epoch": 0.6819666841447183, "grad_norm": 0.042724609375, "learning_rate": 1.4827680761673602e-05, "loss": 4.4535, "step": 18160 }, { "epoch": 0.6823700237970395, "grad_norm": 0.04638671875, "learning_rate": 1.4822120147595023e-05, "loss": 4.4543, "step": 18170 }, { "epoch": 0.6827733634493607, "grad_norm": 0.04443359375, "learning_rate": 1.4816557590067858e-05, "loss": 4.394, "step": 18180 }, { "epoch": 0.6831767031016819, "grad_norm": 0.04296875, "learning_rate": 1.4810993091333967e-05, "loss": 4.3729, "step": 18190 }, { "epoch": 0.6835800427540032, "grad_norm": 0.044921875, "learning_rate": 1.4805426653636003e-05, "loss": 4.4159, "step": 18200 }, { "epoch": 0.6839833824063244, "grad_norm": 0.04443359375, "learning_rate": 1.4799858279217396e-05, "loss": 4.4141, "step": 18210 }, { "epoch": 0.6843867220586456, "grad_norm": 0.041015625, "learning_rate": 1.4794287970322351e-05, "loss": 4.4031, "step": 18220 }, { "epoch": 0.6847900617109668, "grad_norm": 0.047119140625, "learning_rate": 1.4788715729195864e-05, "loss": 4.4169, "step": 18230 }, { "epoch": 0.685193401363288, "grad_norm": 0.0458984375, "learning_rate": 1.4783141558083698e-05, "loss": 4.4213, "step": 18240 }, { "epoch": 0.6855967410156092, "grad_norm": 0.04541015625, "learning_rate": 1.4777565459232399e-05, "loss": 4.4116, "step": 18250 }, { "epoch": 0.6860000806679305, "grad_norm": 0.04541015625, "learning_rate": 1.4771987434889292e-05, "loss": 4.4043, "step": 18260 }, { "epoch": 0.6864034203202517, "grad_norm": 0.043212890625, "learning_rate": 1.4766407487302475e-05, "loss": 4.4133, "step": 18270 }, { "epoch": 0.6868067599725729, "grad_norm": 0.04443359375, "learning_rate": 1.476082561872082e-05, "loss": 4.3862, "step": 18280 }, { "epoch": 0.6872100996248941, "grad_norm": 0.043212890625, "learning_rate": 1.4755241831393981e-05, "loss": 4.3986, "step": 18290 }, { "epoch": 0.6876134392772153, "grad_norm": 0.043212890625, "learning_rate": 1.4749656127572376e-05, "loss": 4.4199, "step": 18300 }, { "epoch": 0.6880167789295366, "grad_norm": 0.0419921875, "learning_rate": 1.4744068509507198e-05, "loss": 4.4602, "step": 18310 }, { "epoch": 0.6884201185818578, "grad_norm": 0.0458984375, "learning_rate": 1.4738478979450417e-05, "loss": 4.4353, "step": 18320 }, { "epoch": 0.688823458234179, "grad_norm": 0.041259765625, "learning_rate": 1.4732887539654764e-05, "loss": 4.4104, "step": 18330 }, { "epoch": 0.6892267978865002, "grad_norm": 0.044677734375, "learning_rate": 1.4727294192373746e-05, "loss": 4.3887, "step": 18340 }, { "epoch": 0.6896301375388214, "grad_norm": 0.044189453125, "learning_rate": 1.4721698939861642e-05, "loss": 4.3967, "step": 18350 }, { "epoch": 0.6900334771911426, "grad_norm": 0.044677734375, "learning_rate": 1.4716101784373489e-05, "loss": 4.4455, "step": 18360 }, { "epoch": 0.6904368168434639, "grad_norm": 0.04541015625, "learning_rate": 1.4710502728165097e-05, "loss": 4.3896, "step": 18370 }, { "epoch": 0.6908401564957851, "grad_norm": 0.0458984375, "learning_rate": 1.4704901773493043e-05, "loss": 4.4289, "step": 18380 }, { "epoch": 0.6912434961481063, "grad_norm": 0.044677734375, "learning_rate": 1.469929892261467e-05, "loss": 4.4343, "step": 18390 }, { "epoch": 0.6916468358004275, "grad_norm": 0.045654296875, "learning_rate": 1.469369417778808e-05, "loss": 4.4048, "step": 18400 }, { "epoch": 0.6920501754527487, "grad_norm": 0.04541015625, "learning_rate": 1.468808754127214e-05, "loss": 4.4212, "step": 18410 }, { "epoch": 0.69245351510507, "grad_norm": 0.0419921875, "learning_rate": 1.4682479015326483e-05, "loss": 4.4059, "step": 18420 }, { "epoch": 0.6928568547573912, "grad_norm": 0.04150390625, "learning_rate": 1.46768686022115e-05, "loss": 4.3935, "step": 18430 }, { "epoch": 0.6932601944097124, "grad_norm": 0.042724609375, "learning_rate": 1.4671256304188342e-05, "loss": 4.3907, "step": 18440 }, { "epoch": 0.6936635340620336, "grad_norm": 0.044677734375, "learning_rate": 1.4665642123518924e-05, "loss": 4.4123, "step": 18450 }, { "epoch": 0.6940668737143548, "grad_norm": 0.04296875, "learning_rate": 1.4660026062465915e-05, "loss": 4.4061, "step": 18460 }, { "epoch": 0.694470213366676, "grad_norm": 0.044921875, "learning_rate": 1.4654408123292742e-05, "loss": 4.4431, "step": 18470 }, { "epoch": 0.6948735530189973, "grad_norm": 0.043701171875, "learning_rate": 1.4648788308263599e-05, "loss": 4.4135, "step": 18480 }, { "epoch": 0.6952768926713185, "grad_norm": 0.044921875, "learning_rate": 1.4643166619643418e-05, "loss": 4.3764, "step": 18490 }, { "epoch": 0.6956802323236397, "grad_norm": 0.04296875, "learning_rate": 1.4637543059697902e-05, "loss": 4.4048, "step": 18500 }, { "epoch": 0.6960835719759609, "grad_norm": 0.0458984375, "learning_rate": 1.46319176306935e-05, "loss": 4.4107, "step": 18510 }, { "epoch": 0.6964869116282821, "grad_norm": 0.04638671875, "learning_rate": 1.4626290334897417e-05, "loss": 4.4038, "step": 18520 }, { "epoch": 0.6968902512806034, "grad_norm": 0.04638671875, "learning_rate": 1.462066117457761e-05, "loss": 4.4316, "step": 18530 }, { "epoch": 0.6972935909329246, "grad_norm": 0.04248046875, "learning_rate": 1.4615030152002785e-05, "loss": 4.4307, "step": 18540 }, { "epoch": 0.6976969305852458, "grad_norm": 0.044189453125, "learning_rate": 1.4609397269442403e-05, "loss": 4.4356, "step": 18550 }, { "epoch": 0.698100270237567, "grad_norm": 0.044189453125, "learning_rate": 1.460376252916667e-05, "loss": 4.4245, "step": 18560 }, { "epoch": 0.6985036098898882, "grad_norm": 0.0458984375, "learning_rate": 1.4598125933446546e-05, "loss": 4.4088, "step": 18570 }, { "epoch": 0.6989069495422094, "grad_norm": 0.041748046875, "learning_rate": 1.4592487484553736e-05, "loss": 4.4011, "step": 18580 }, { "epoch": 0.6993102891945308, "grad_norm": 0.04443359375, "learning_rate": 1.458684718476069e-05, "loss": 4.4288, "step": 18590 }, { "epoch": 0.699713628846852, "grad_norm": 0.048095703125, "learning_rate": 1.4581205036340606e-05, "loss": 4.4253, "step": 18600 }, { "epoch": 0.7001169684991732, "grad_norm": 0.041748046875, "learning_rate": 1.457556104156743e-05, "loss": 4.4261, "step": 18610 }, { "epoch": 0.7005203081514944, "grad_norm": 0.041748046875, "learning_rate": 1.4569915202715844e-05, "loss": 4.4179, "step": 18620 }, { "epoch": 0.7009236478038156, "grad_norm": 0.04541015625, "learning_rate": 1.4564267522061281e-05, "loss": 4.3952, "step": 18630 }, { "epoch": 0.7013269874561369, "grad_norm": 0.043701171875, "learning_rate": 1.4558618001879912e-05, "loss": 4.436, "step": 18640 }, { "epoch": 0.7017303271084581, "grad_norm": 0.044189453125, "learning_rate": 1.455296664444865e-05, "loss": 4.4233, "step": 18650 }, { "epoch": 0.7021336667607793, "grad_norm": 0.04296875, "learning_rate": 1.454731345204515e-05, "loss": 4.4186, "step": 18660 }, { "epoch": 0.7025370064131005, "grad_norm": 0.0439453125, "learning_rate": 1.4541658426947811e-05, "loss": 4.3764, "step": 18670 }, { "epoch": 0.7029403460654217, "grad_norm": 0.046875, "learning_rate": 1.4536001571435759e-05, "loss": 4.4473, "step": 18680 }, { "epoch": 0.7033436857177429, "grad_norm": 0.04833984375, "learning_rate": 1.4530342887788866e-05, "loss": 4.4046, "step": 18690 }, { "epoch": 0.7037470253700642, "grad_norm": 0.337890625, "learning_rate": 1.452468237828774e-05, "loss": 4.4316, "step": 18700 }, { "epoch": 0.7041503650223854, "grad_norm": 0.044921875, "learning_rate": 1.4519020045213723e-05, "loss": 4.3961, "step": 18710 }, { "epoch": 0.7045537046747066, "grad_norm": 0.045654296875, "learning_rate": 1.4513355890848892e-05, "loss": 4.405, "step": 18720 }, { "epoch": 0.7049570443270278, "grad_norm": 0.042236328125, "learning_rate": 1.450768991747606e-05, "loss": 4.4199, "step": 18730 }, { "epoch": 0.705360383979349, "grad_norm": 0.042236328125, "learning_rate": 1.4502022127378773e-05, "loss": 4.3922, "step": 18740 }, { "epoch": 0.7057637236316703, "grad_norm": 0.04296875, "learning_rate": 1.4496352522841303e-05, "loss": 4.4038, "step": 18750 }, { "epoch": 0.7061670632839915, "grad_norm": 0.044189453125, "learning_rate": 1.4490681106148666e-05, "loss": 4.4218, "step": 18760 }, { "epoch": 0.7065704029363127, "grad_norm": 0.04638671875, "learning_rate": 1.4485007879586594e-05, "loss": 4.3998, "step": 18770 }, { "epoch": 0.7069737425886339, "grad_norm": 0.044189453125, "learning_rate": 1.447933284544156e-05, "loss": 4.41, "step": 18780 }, { "epoch": 0.7073770822409551, "grad_norm": 0.0439453125, "learning_rate": 1.4473656006000755e-05, "loss": 4.4066, "step": 18790 }, { "epoch": 0.7077804218932763, "grad_norm": 0.045166015625, "learning_rate": 1.4467977363552113e-05, "loss": 4.4225, "step": 18800 }, { "epoch": 0.7081837615455976, "grad_norm": 0.046630859375, "learning_rate": 1.4462296920384274e-05, "loss": 4.3942, "step": 18810 }, { "epoch": 0.7085871011979188, "grad_norm": 0.04248046875, "learning_rate": 1.445661467878662e-05, "loss": 4.3941, "step": 18820 }, { "epoch": 0.70899044085024, "grad_norm": 0.046142578125, "learning_rate": 1.4450930641049252e-05, "loss": 4.4377, "step": 18830 }, { "epoch": 0.7093937805025612, "grad_norm": 0.046875, "learning_rate": 1.4445244809462995e-05, "loss": 4.4203, "step": 18840 }, { "epoch": 0.7097971201548824, "grad_norm": 0.045654296875, "learning_rate": 1.44395571863194e-05, "loss": 4.3923, "step": 18850 }, { "epoch": 0.7102004598072037, "grad_norm": 0.044921875, "learning_rate": 1.443386777391073e-05, "loss": 4.4131, "step": 18860 }, { "epoch": 0.7106037994595249, "grad_norm": 0.043701171875, "learning_rate": 1.4428176574529986e-05, "loss": 4.4493, "step": 18870 }, { "epoch": 0.7110071391118461, "grad_norm": 0.045654296875, "learning_rate": 1.4422483590470872e-05, "loss": 4.3775, "step": 18880 }, { "epoch": 0.7114104787641673, "grad_norm": 0.042236328125, "learning_rate": 1.4416788824027825e-05, "loss": 4.4049, "step": 18890 }, { "epoch": 0.7118138184164885, "grad_norm": 0.04248046875, "learning_rate": 1.4411092277495988e-05, "loss": 4.3952, "step": 18900 }, { "epoch": 0.7122171580688097, "grad_norm": 0.04443359375, "learning_rate": 1.4405393953171234e-05, "loss": 4.4337, "step": 18910 }, { "epoch": 0.712620497721131, "grad_norm": 0.044189453125, "learning_rate": 1.439969385335014e-05, "loss": 4.4162, "step": 18920 }, { "epoch": 0.7130238373734522, "grad_norm": 0.044677734375, "learning_rate": 1.4393991980330012e-05, "loss": 4.4041, "step": 18930 }, { "epoch": 0.7134271770257734, "grad_norm": 0.046142578125, "learning_rate": 1.438828833640886e-05, "loss": 4.4248, "step": 18940 }, { "epoch": 0.7138305166780946, "grad_norm": 0.043701171875, "learning_rate": 1.4382582923885408e-05, "loss": 4.4176, "step": 18950 }, { "epoch": 0.7142338563304158, "grad_norm": 0.0458984375, "learning_rate": 1.43768757450591e-05, "loss": 4.4374, "step": 18960 }, { "epoch": 0.7146371959827371, "grad_norm": 0.043212890625, "learning_rate": 1.4371166802230087e-05, "loss": 4.4099, "step": 18970 }, { "epoch": 0.7150405356350583, "grad_norm": 0.044189453125, "learning_rate": 1.4365456097699233e-05, "loss": 4.4265, "step": 18980 }, { "epoch": 0.7154438752873795, "grad_norm": 0.044921875, "learning_rate": 1.4359743633768107e-05, "loss": 4.4149, "step": 18990 }, { "epoch": 0.7158472149397007, "grad_norm": 0.044921875, "learning_rate": 1.4354029412738992e-05, "loss": 4.4204, "step": 19000 }, { "epoch": 0.7162505545920219, "grad_norm": 0.044677734375, "learning_rate": 1.434831343691488e-05, "loss": 4.4337, "step": 19010 }, { "epoch": 0.7166538942443431, "grad_norm": 0.046142578125, "learning_rate": 1.4342595708599471e-05, "loss": 4.4258, "step": 19020 }, { "epoch": 0.7170572338966644, "grad_norm": 0.04541015625, "learning_rate": 1.4336876230097163e-05, "loss": 4.4335, "step": 19030 }, { "epoch": 0.7174605735489856, "grad_norm": 0.044189453125, "learning_rate": 1.4331155003713067e-05, "loss": 4.414, "step": 19040 }, { "epoch": 0.7178639132013068, "grad_norm": 0.04541015625, "learning_rate": 1.4325432031752998e-05, "loss": 4.4586, "step": 19050 }, { "epoch": 0.718267252853628, "grad_norm": 0.044677734375, "learning_rate": 1.431970731652347e-05, "loss": 4.3759, "step": 19060 }, { "epoch": 0.7186705925059492, "grad_norm": 0.043212890625, "learning_rate": 1.4313980860331706e-05, "loss": 4.448, "step": 19070 }, { "epoch": 0.7190739321582705, "grad_norm": 0.04541015625, "learning_rate": 1.4308252665485623e-05, "loss": 4.4066, "step": 19080 }, { "epoch": 0.7194772718105917, "grad_norm": 0.042724609375, "learning_rate": 1.4302522734293844e-05, "loss": 4.4115, "step": 19090 }, { "epoch": 0.7198806114629129, "grad_norm": 0.047119140625, "learning_rate": 1.4296791069065695e-05, "loss": 4.4155, "step": 19100 }, { "epoch": 0.7202839511152341, "grad_norm": 0.0458984375, "learning_rate": 1.4291057672111193e-05, "loss": 4.4242, "step": 19110 }, { "epoch": 0.7206872907675553, "grad_norm": 0.045166015625, "learning_rate": 1.4285322545741053e-05, "loss": 4.4201, "step": 19120 }, { "epoch": 0.7210906304198765, "grad_norm": 0.04541015625, "learning_rate": 1.4279585692266699e-05, "loss": 4.3974, "step": 19130 }, { "epoch": 0.7214939700721978, "grad_norm": 0.0458984375, "learning_rate": 1.4273847114000237e-05, "loss": 4.4257, "step": 19140 }, { "epoch": 0.721897309724519, "grad_norm": 0.042236328125, "learning_rate": 1.4268106813254474e-05, "loss": 4.4025, "step": 19150 }, { "epoch": 0.7223006493768402, "grad_norm": 0.04443359375, "learning_rate": 1.426236479234291e-05, "loss": 4.3896, "step": 19160 }, { "epoch": 0.7227039890291614, "grad_norm": 0.045654296875, "learning_rate": 1.4256621053579744e-05, "loss": 4.4008, "step": 19170 }, { "epoch": 0.7231073286814826, "grad_norm": 0.04296875, "learning_rate": 1.4250875599279858e-05, "loss": 4.3969, "step": 19180 }, { "epoch": 0.7235106683338038, "grad_norm": 0.046875, "learning_rate": 1.424512843175883e-05, "loss": 4.3831, "step": 19190 }, { "epoch": 0.7239140079861252, "grad_norm": 0.043701171875, "learning_rate": 1.423937955333293e-05, "loss": 4.4297, "step": 19200 }, { "epoch": 0.7243173476384464, "grad_norm": 0.042724609375, "learning_rate": 1.4233628966319117e-05, "loss": 4.4131, "step": 19210 }, { "epoch": 0.7247206872907676, "grad_norm": 0.046142578125, "learning_rate": 1.4227876673035037e-05, "loss": 4.4148, "step": 19220 }, { "epoch": 0.7251240269430888, "grad_norm": 0.047607421875, "learning_rate": 1.4222122675799019e-05, "loss": 4.4097, "step": 19230 }, { "epoch": 0.72552736659541, "grad_norm": 0.04736328125, "learning_rate": 1.4216366976930093e-05, "loss": 4.3908, "step": 19240 }, { "epoch": 0.7259307062477313, "grad_norm": 0.0458984375, "learning_rate": 1.421060957874796e-05, "loss": 4.4189, "step": 19250 }, { "epoch": 0.7263340459000525, "grad_norm": 0.0458984375, "learning_rate": 1.4204850483573014e-05, "loss": 4.3955, "step": 19260 }, { "epoch": 0.7267373855523737, "grad_norm": 0.045654296875, "learning_rate": 1.4199089693726327e-05, "loss": 4.412, "step": 19270 }, { "epoch": 0.7271407252046949, "grad_norm": 0.04541015625, "learning_rate": 1.4193327211529659e-05, "loss": 4.4108, "step": 19280 }, { "epoch": 0.7275440648570161, "grad_norm": 0.0419921875, "learning_rate": 1.4187563039305454e-05, "loss": 4.4046, "step": 19290 }, { "epoch": 0.7279474045093373, "grad_norm": 0.046875, "learning_rate": 1.418179717937683e-05, "loss": 4.4248, "step": 19300 }, { "epoch": 0.7283507441616586, "grad_norm": 0.044921875, "learning_rate": 1.4176029634067594e-05, "loss": 4.3933, "step": 19310 }, { "epoch": 0.7287540838139798, "grad_norm": 0.044189453125, "learning_rate": 1.4170260405702222e-05, "loss": 4.429, "step": 19320 }, { "epoch": 0.729157423466301, "grad_norm": 0.045654296875, "learning_rate": 1.4164489496605876e-05, "loss": 4.4008, "step": 19330 }, { "epoch": 0.7295607631186222, "grad_norm": 0.042236328125, "learning_rate": 1.4158716909104388e-05, "loss": 4.3961, "step": 19340 }, { "epoch": 0.7299641027709434, "grad_norm": 0.047119140625, "learning_rate": 1.415294264552428e-05, "loss": 4.46, "step": 19350 }, { "epoch": 0.7303674424232647, "grad_norm": 0.04296875, "learning_rate": 1.4147166708192734e-05, "loss": 4.4129, "step": 19360 }, { "epoch": 0.7307707820755859, "grad_norm": 0.0458984375, "learning_rate": 1.4141389099437617e-05, "loss": 4.4165, "step": 19370 }, { "epoch": 0.7311741217279071, "grad_norm": 0.044921875, "learning_rate": 1.4135609821587465e-05, "loss": 4.415, "step": 19380 }, { "epoch": 0.7315774613802283, "grad_norm": 0.0458984375, "learning_rate": 1.4129828876971486e-05, "loss": 4.4154, "step": 19390 }, { "epoch": 0.7319808010325495, "grad_norm": 0.045166015625, "learning_rate": 1.4124046267919565e-05, "loss": 4.4235, "step": 19400 }, { "epoch": 0.7323841406848707, "grad_norm": 0.044921875, "learning_rate": 1.4118261996762253e-05, "loss": 4.3959, "step": 19410 }, { "epoch": 0.732787480337192, "grad_norm": 0.04443359375, "learning_rate": 1.411247606583077e-05, "loss": 4.4642, "step": 19420 }, { "epoch": 0.7331908199895132, "grad_norm": 0.0439453125, "learning_rate": 1.4106688477457011e-05, "loss": 4.4209, "step": 19430 }, { "epoch": 0.7335941596418344, "grad_norm": 0.04443359375, "learning_rate": 1.410089923397353e-05, "loss": 4.3873, "step": 19440 }, { "epoch": 0.7339974992941556, "grad_norm": 0.04541015625, "learning_rate": 1.409510833771356e-05, "loss": 4.4369, "step": 19450 }, { "epoch": 0.7344008389464768, "grad_norm": 0.044677734375, "learning_rate": 1.408931579101099e-05, "loss": 4.4144, "step": 19460 }, { "epoch": 0.7348041785987981, "grad_norm": 0.044921875, "learning_rate": 1.4083521596200373e-05, "loss": 4.4293, "step": 19470 }, { "epoch": 0.7352075182511193, "grad_norm": 0.044921875, "learning_rate": 1.407772575561694e-05, "loss": 4.4239, "step": 19480 }, { "epoch": 0.7356108579034405, "grad_norm": 0.04541015625, "learning_rate": 1.4071928271596566e-05, "loss": 4.3995, "step": 19490 }, { "epoch": 0.7360141975557617, "grad_norm": 0.0439453125, "learning_rate": 1.4066129146475807e-05, "loss": 4.4344, "step": 19500 }, { "epoch": 0.7364175372080829, "grad_norm": 0.0458984375, "learning_rate": 1.4060328382591867e-05, "loss": 4.4264, "step": 19510 }, { "epoch": 0.7368208768604041, "grad_norm": 0.044189453125, "learning_rate": 1.4054525982282617e-05, "loss": 4.404, "step": 19520 }, { "epoch": 0.7372242165127254, "grad_norm": 0.041748046875, "learning_rate": 1.4048721947886581e-05, "loss": 4.3936, "step": 19530 }, { "epoch": 0.7376275561650466, "grad_norm": 0.043212890625, "learning_rate": 1.4042916281742953e-05, "loss": 4.3795, "step": 19540 }, { "epoch": 0.7380308958173678, "grad_norm": 0.043701171875, "learning_rate": 1.4037108986191577e-05, "loss": 4.4185, "step": 19550 }, { "epoch": 0.738434235469689, "grad_norm": 0.042236328125, "learning_rate": 1.403130006357295e-05, "loss": 4.4431, "step": 19560 }, { "epoch": 0.7388375751220102, "grad_norm": 0.044189453125, "learning_rate": 1.4025489516228238e-05, "loss": 4.4205, "step": 19570 }, { "epoch": 0.7392409147743315, "grad_norm": 0.044189453125, "learning_rate": 1.4019677346499242e-05, "loss": 4.4582, "step": 19580 }, { "epoch": 0.7396442544266527, "grad_norm": 0.04296875, "learning_rate": 1.4013863556728436e-05, "loss": 4.4229, "step": 19590 }, { "epoch": 0.7400475940789739, "grad_norm": 0.046875, "learning_rate": 1.4008048149258935e-05, "loss": 4.4327, "step": 19600 }, { "epoch": 0.7404509337312951, "grad_norm": 0.045654296875, "learning_rate": 1.4002231126434515e-05, "loss": 4.3886, "step": 19610 }, { "epoch": 0.7408542733836163, "grad_norm": 0.042724609375, "learning_rate": 1.3996412490599594e-05, "loss": 4.4296, "step": 19620 }, { "epoch": 0.7412576130359375, "grad_norm": 0.04833984375, "learning_rate": 1.3990592244099243e-05, "loss": 4.4079, "step": 19630 }, { "epoch": 0.7416609526882588, "grad_norm": 0.045654296875, "learning_rate": 1.3984770389279185e-05, "loss": 4.4584, "step": 19640 }, { "epoch": 0.74206429234058, "grad_norm": 0.045166015625, "learning_rate": 1.3978946928485796e-05, "loss": 4.431, "step": 19650 }, { "epoch": 0.7424676319929012, "grad_norm": 0.044677734375, "learning_rate": 1.3973121864066081e-05, "loss": 4.4451, "step": 19660 }, { "epoch": 0.7428709716452224, "grad_norm": 0.04833984375, "learning_rate": 1.396729519836771e-05, "loss": 4.3805, "step": 19670 }, { "epoch": 0.7432743112975436, "grad_norm": 0.045166015625, "learning_rate": 1.3961466933738992e-05, "loss": 4.436, "step": 19680 }, { "epoch": 0.7436776509498649, "grad_norm": 0.04248046875, "learning_rate": 1.3955637072528873e-05, "loss": 4.4181, "step": 19690 }, { "epoch": 0.7440809906021861, "grad_norm": 0.040771484375, "learning_rate": 1.394980561708696e-05, "loss": 4.4108, "step": 19700 }, { "epoch": 0.7444843302545073, "grad_norm": 0.04541015625, "learning_rate": 1.3943972569763482e-05, "loss": 4.4207, "step": 19710 }, { "epoch": 0.7448876699068285, "grad_norm": 0.044921875, "learning_rate": 1.3938137932909324e-05, "loss": 4.4003, "step": 19720 }, { "epoch": 0.7452910095591497, "grad_norm": 0.045166015625, "learning_rate": 1.3932301708876005e-05, "loss": 4.4196, "step": 19730 }, { "epoch": 0.7456943492114709, "grad_norm": 0.044677734375, "learning_rate": 1.392646390001569e-05, "loss": 4.4275, "step": 19740 }, { "epoch": 0.7460976888637922, "grad_norm": 0.043701171875, "learning_rate": 1.3920624508681173e-05, "loss": 4.4135, "step": 19750 }, { "epoch": 0.7465010285161134, "grad_norm": 0.04443359375, "learning_rate": 1.3914783537225893e-05, "loss": 4.391, "step": 19760 }, { "epoch": 0.7469043681684346, "grad_norm": 0.042236328125, "learning_rate": 1.3908940988003927e-05, "loss": 4.4188, "step": 19770 }, { "epoch": 0.7473077078207558, "grad_norm": 0.045166015625, "learning_rate": 1.3903096863369983e-05, "loss": 4.3928, "step": 19780 }, { "epoch": 0.747711047473077, "grad_norm": 0.042236328125, "learning_rate": 1.3897251165679405e-05, "loss": 4.4204, "step": 19790 }, { "epoch": 0.7481143871253984, "grad_norm": 0.04345703125, "learning_rate": 1.3891403897288166e-05, "loss": 4.4125, "step": 19800 }, { "epoch": 0.7485177267777195, "grad_norm": 0.044921875, "learning_rate": 1.388555506055289e-05, "loss": 4.4199, "step": 19810 }, { "epoch": 0.7489210664300407, "grad_norm": 0.04638671875, "learning_rate": 1.3879704657830813e-05, "loss": 4.4158, "step": 19820 }, { "epoch": 0.749324406082362, "grad_norm": 0.048583984375, "learning_rate": 1.3873852691479812e-05, "loss": 4.4531, "step": 19830 }, { "epoch": 0.7497277457346831, "grad_norm": 0.0458984375, "learning_rate": 1.3867999163858392e-05, "loss": 4.425, "step": 19840 }, { "epoch": 0.7501310853870043, "grad_norm": 0.04296875, "learning_rate": 1.386214407732569e-05, "loss": 4.4374, "step": 19850 }, { "epoch": 0.7505344250393257, "grad_norm": 0.045654296875, "learning_rate": 1.3856287434241462e-05, "loss": 4.4158, "step": 19860 }, { "epoch": 0.7509377646916469, "grad_norm": 0.044189453125, "learning_rate": 1.3850429236966108e-05, "loss": 4.4408, "step": 19870 }, { "epoch": 0.7513411043439681, "grad_norm": 0.043701171875, "learning_rate": 1.3844569487860633e-05, "loss": 4.4136, "step": 19880 }, { "epoch": 0.7517444439962893, "grad_norm": 0.0458984375, "learning_rate": 1.3838708189286692e-05, "loss": 4.4031, "step": 19890 }, { "epoch": 0.7521477836486105, "grad_norm": 0.044921875, "learning_rate": 1.3832845343606537e-05, "loss": 4.4145, "step": 19900 }, { "epoch": 0.7525511233009318, "grad_norm": 0.042236328125, "learning_rate": 1.3826980953183069e-05, "loss": 4.3829, "step": 19910 }, { "epoch": 0.752954462953253, "grad_norm": 0.046142578125, "learning_rate": 1.3821115020379799e-05, "loss": 4.4157, "step": 19920 }, { "epoch": 0.7533578026055742, "grad_norm": 0.045654296875, "learning_rate": 1.3815247547560855e-05, "loss": 4.4461, "step": 19930 }, { "epoch": 0.7537611422578954, "grad_norm": 0.044189453125, "learning_rate": 1.3809378537090998e-05, "loss": 4.409, "step": 19940 }, { "epoch": 0.7541644819102166, "grad_norm": 0.045166015625, "learning_rate": 1.3803507991335601e-05, "loss": 4.4114, "step": 19950 }, { "epoch": 0.7545678215625378, "grad_norm": 0.04443359375, "learning_rate": 1.3797635912660656e-05, "loss": 4.3815, "step": 19960 }, { "epoch": 0.7549711612148591, "grad_norm": 0.046142578125, "learning_rate": 1.3791762303432774e-05, "loss": 4.3944, "step": 19970 }, { "epoch": 0.7553745008671803, "grad_norm": 0.042724609375, "learning_rate": 1.3785887166019183e-05, "loss": 4.4683, "step": 19980 }, { "epoch": 0.7557778405195015, "grad_norm": 0.044677734375, "learning_rate": 1.3780010502787727e-05, "loss": 4.4139, "step": 19990 }, { "epoch": 0.7561811801718227, "grad_norm": 0.044677734375, "learning_rate": 1.3774132316106867e-05, "loss": 4.4286, "step": 20000 }, { "epoch": 0.7565845198241439, "grad_norm": 0.043212890625, "learning_rate": 1.3768252608345675e-05, "loss": 4.3753, "step": 20010 }, { "epoch": 0.7569878594764652, "grad_norm": 0.044189453125, "learning_rate": 1.3762371381873833e-05, "loss": 4.4153, "step": 20020 }, { "epoch": 0.7573911991287864, "grad_norm": 0.0458984375, "learning_rate": 1.3756488639061644e-05, "loss": 4.415, "step": 20030 }, { "epoch": 0.7577945387811076, "grad_norm": 0.043701171875, "learning_rate": 1.3750604382280016e-05, "loss": 4.4187, "step": 20040 }, { "epoch": 0.7581978784334288, "grad_norm": 0.043212890625, "learning_rate": 1.3744718613900466e-05, "loss": 4.4188, "step": 20050 }, { "epoch": 0.75860121808575, "grad_norm": 0.04296875, "learning_rate": 1.373883133629512e-05, "loss": 4.3763, "step": 20060 }, { "epoch": 0.7590045577380712, "grad_norm": 0.048095703125, "learning_rate": 1.373294255183672e-05, "loss": 4.4066, "step": 20070 }, { "epoch": 0.7594078973903925, "grad_norm": 0.049072265625, "learning_rate": 1.3727052262898608e-05, "loss": 4.4169, "step": 20080 }, { "epoch": 0.7598112370427137, "grad_norm": 0.044677734375, "learning_rate": 1.3721160471854735e-05, "loss": 4.4063, "step": 20090 }, { "epoch": 0.7602145766950349, "grad_norm": 0.044677734375, "learning_rate": 1.3715267181079656e-05, "loss": 4.4266, "step": 20100 }, { "epoch": 0.7606179163473561, "grad_norm": 0.046630859375, "learning_rate": 1.3709372392948533e-05, "loss": 4.4242, "step": 20110 }, { "epoch": 0.7610212559996773, "grad_norm": 0.0439453125, "learning_rate": 1.3703476109837123e-05, "loss": 4.3972, "step": 20120 }, { "epoch": 0.7614245956519985, "grad_norm": 0.04248046875, "learning_rate": 1.3697578334121805e-05, "loss": 4.4476, "step": 20130 }, { "epoch": 0.7618279353043198, "grad_norm": 0.044921875, "learning_rate": 1.3691679068179532e-05, "loss": 4.3991, "step": 20140 }, { "epoch": 0.762231274956641, "grad_norm": 0.04345703125, "learning_rate": 1.3685778314387883e-05, "loss": 4.3992, "step": 20150 }, { "epoch": 0.7626346146089622, "grad_norm": 0.0458984375, "learning_rate": 1.3679876075125018e-05, "loss": 4.41, "step": 20160 }, { "epoch": 0.7630379542612834, "grad_norm": 0.04248046875, "learning_rate": 1.3673972352769709e-05, "loss": 4.4393, "step": 20170 }, { "epoch": 0.7634412939136046, "grad_norm": 0.04443359375, "learning_rate": 1.3668067149701324e-05, "loss": 4.3925, "step": 20180 }, { "epoch": 0.7638446335659259, "grad_norm": 0.04541015625, "learning_rate": 1.3662160468299812e-05, "loss": 4.3835, "step": 20190 }, { "epoch": 0.7642479732182471, "grad_norm": 0.04541015625, "learning_rate": 1.3656252310945743e-05, "loss": 4.4134, "step": 20200 }, { "epoch": 0.7646513128705683, "grad_norm": 0.043212890625, "learning_rate": 1.365034268002026e-05, "loss": 4.4187, "step": 20210 }, { "epoch": 0.7650546525228895, "grad_norm": 0.044921875, "learning_rate": 1.3644431577905112e-05, "loss": 4.4163, "step": 20220 }, { "epoch": 0.7654579921752107, "grad_norm": 0.045166015625, "learning_rate": 1.3638519006982635e-05, "loss": 4.424, "step": 20230 }, { "epoch": 0.7658613318275319, "grad_norm": 0.044677734375, "learning_rate": 1.3632604969635767e-05, "loss": 4.4082, "step": 20240 }, { "epoch": 0.7662646714798532, "grad_norm": 0.041748046875, "learning_rate": 1.362668946824802e-05, "loss": 4.3815, "step": 20250 }, { "epoch": 0.7666680111321744, "grad_norm": 0.045166015625, "learning_rate": 1.3620772505203513e-05, "loss": 4.3653, "step": 20260 }, { "epoch": 0.7670713507844956, "grad_norm": 0.048583984375, "learning_rate": 1.3614854082886941e-05, "loss": 4.4509, "step": 20270 }, { "epoch": 0.7674746904368168, "grad_norm": 0.046142578125, "learning_rate": 1.3608934203683596e-05, "loss": 4.4163, "step": 20280 }, { "epoch": 0.767878030089138, "grad_norm": 0.04150390625, "learning_rate": 1.3603012869979357e-05, "loss": 4.4118, "step": 20290 }, { "epoch": 0.7682813697414593, "grad_norm": 0.047119140625, "learning_rate": 1.359709008416068e-05, "loss": 4.4394, "step": 20300 }, { "epoch": 0.7686847093937805, "grad_norm": 0.04541015625, "learning_rate": 1.3591165848614617e-05, "loss": 4.444, "step": 20310 }, { "epoch": 0.7690880490461017, "grad_norm": 0.043212890625, "learning_rate": 1.3585240165728797e-05, "loss": 4.4359, "step": 20320 }, { "epoch": 0.7694913886984229, "grad_norm": 0.04248046875, "learning_rate": 1.3579313037891439e-05, "loss": 4.4452, "step": 20330 }, { "epoch": 0.7698947283507441, "grad_norm": 0.043212890625, "learning_rate": 1.3573384467491335e-05, "loss": 4.426, "step": 20340 }, { "epoch": 0.7702980680030653, "grad_norm": 0.044677734375, "learning_rate": 1.3567454456917865e-05, "loss": 4.4371, "step": 20350 }, { "epoch": 0.7707014076553866, "grad_norm": 0.0419921875, "learning_rate": 1.3561523008560993e-05, "loss": 4.4308, "step": 20360 }, { "epoch": 0.7711047473077078, "grad_norm": 0.04150390625, "learning_rate": 1.3555590124811256e-05, "loss": 4.4209, "step": 20370 }, { "epoch": 0.771508086960029, "grad_norm": 0.04638671875, "learning_rate": 1.3549655808059767e-05, "loss": 4.4333, "step": 20380 }, { "epoch": 0.7719114266123502, "grad_norm": 0.04345703125, "learning_rate": 1.3543720060698222e-05, "loss": 4.404, "step": 20390 }, { "epoch": 0.7723147662646714, "grad_norm": 0.044677734375, "learning_rate": 1.3537782885118898e-05, "loss": 4.4385, "step": 20400 }, { "epoch": 0.7727181059169927, "grad_norm": 0.0439453125, "learning_rate": 1.3531844283714634e-05, "loss": 4.4295, "step": 20410 }, { "epoch": 0.773121445569314, "grad_norm": 0.043701171875, "learning_rate": 1.3525904258878857e-05, "loss": 4.4387, "step": 20420 }, { "epoch": 0.7735247852216351, "grad_norm": 0.04443359375, "learning_rate": 1.351996281300556e-05, "loss": 4.4238, "step": 20430 }, { "epoch": 0.7739281248739563, "grad_norm": 0.043701171875, "learning_rate": 1.351401994848931e-05, "loss": 4.445, "step": 20440 }, { "epoch": 0.7743314645262775, "grad_norm": 0.0439453125, "learning_rate": 1.3508075667725251e-05, "loss": 4.4073, "step": 20450 }, { "epoch": 0.7747348041785987, "grad_norm": 0.045166015625, "learning_rate": 1.350212997310909e-05, "loss": 4.3998, "step": 20460 }, { "epoch": 0.7751381438309201, "grad_norm": 0.044921875, "learning_rate": 1.3496182867037108e-05, "loss": 4.4414, "step": 20470 }, { "epoch": 0.7755414834832413, "grad_norm": 0.045166015625, "learning_rate": 1.3490234351906158e-05, "loss": 4.4172, "step": 20480 }, { "epoch": 0.7759448231355625, "grad_norm": 0.044921875, "learning_rate": 1.3484284430113653e-05, "loss": 4.4258, "step": 20490 }, { "epoch": 0.7763481627878837, "grad_norm": 0.04443359375, "learning_rate": 1.3478333104057572e-05, "loss": 4.3569, "step": 20500 }, { "epoch": 0.7767515024402049, "grad_norm": 0.044921875, "learning_rate": 1.3472380376136478e-05, "loss": 4.394, "step": 20510 }, { "epoch": 0.7771548420925262, "grad_norm": 0.046142578125, "learning_rate": 1.3466426248749473e-05, "loss": 4.3779, "step": 20520 }, { "epoch": 0.7775581817448474, "grad_norm": 0.042724609375, "learning_rate": 1.3460470724296242e-05, "loss": 4.4497, "step": 20530 }, { "epoch": 0.7779615213971686, "grad_norm": 0.04443359375, "learning_rate": 1.345451380517703e-05, "loss": 4.3991, "step": 20540 }, { "epoch": 0.7783648610494898, "grad_norm": 0.044921875, "learning_rate": 1.3448555493792634e-05, "loss": 4.4475, "step": 20550 }, { "epoch": 0.778768200701811, "grad_norm": 0.044921875, "learning_rate": 1.3442595792544426e-05, "loss": 4.4145, "step": 20560 }, { "epoch": 0.7791715403541322, "grad_norm": 0.045166015625, "learning_rate": 1.343663470383433e-05, "loss": 4.4132, "step": 20570 }, { "epoch": 0.7795748800064535, "grad_norm": 0.045166015625, "learning_rate": 1.3430672230064825e-05, "loss": 4.4357, "step": 20580 }, { "epoch": 0.7799782196587747, "grad_norm": 0.045654296875, "learning_rate": 1.3424708373638961e-05, "loss": 4.4104, "step": 20590 }, { "epoch": 0.7803815593110959, "grad_norm": 0.04296875, "learning_rate": 1.3418743136960333e-05, "loss": 4.4041, "step": 20600 }, { "epoch": 0.7807848989634171, "grad_norm": 0.04345703125, "learning_rate": 1.3412776522433099e-05, "loss": 4.4434, "step": 20610 }, { "epoch": 0.7811882386157383, "grad_norm": 0.04541015625, "learning_rate": 1.3406808532461975e-05, "loss": 4.3855, "step": 20620 }, { "epoch": 0.7815915782680596, "grad_norm": 0.046875, "learning_rate": 1.340083916945222e-05, "loss": 4.4309, "step": 20630 }, { "epoch": 0.7819949179203808, "grad_norm": 0.046630859375, "learning_rate": 1.3394868435809656e-05, "loss": 4.4231, "step": 20640 }, { "epoch": 0.782398257572702, "grad_norm": 0.045166015625, "learning_rate": 1.3388896333940658e-05, "loss": 4.408, "step": 20650 }, { "epoch": 0.7828015972250232, "grad_norm": 0.04443359375, "learning_rate": 1.3382922866252144e-05, "loss": 4.417, "step": 20660 }, { "epoch": 0.7832049368773444, "grad_norm": 0.047119140625, "learning_rate": 1.337694803515159e-05, "loss": 4.4477, "step": 20670 }, { "epoch": 0.7836082765296656, "grad_norm": 0.045166015625, "learning_rate": 1.3370971843047022e-05, "loss": 4.4148, "step": 20680 }, { "epoch": 0.7840116161819869, "grad_norm": 0.049072265625, "learning_rate": 1.3364994292347007e-05, "loss": 4.4173, "step": 20690 }, { "epoch": 0.7844149558343081, "grad_norm": 0.044189453125, "learning_rate": 1.3359015385460665e-05, "loss": 4.4282, "step": 20700 }, { "epoch": 0.7848182954866293, "grad_norm": 0.045654296875, "learning_rate": 1.3353035124797663e-05, "loss": 4.3877, "step": 20710 }, { "epoch": 0.7852216351389505, "grad_norm": 0.044189453125, "learning_rate": 1.3347053512768216e-05, "loss": 4.4187, "step": 20720 }, { "epoch": 0.7856249747912717, "grad_norm": 0.043701171875, "learning_rate": 1.3341070551783076e-05, "loss": 4.4145, "step": 20730 }, { "epoch": 0.786028314443593, "grad_norm": 0.0458984375, "learning_rate": 1.3335086244253542e-05, "loss": 4.3939, "step": 20740 }, { "epoch": 0.7864316540959142, "grad_norm": 0.0458984375, "learning_rate": 1.3329100592591457e-05, "loss": 4.3903, "step": 20750 }, { "epoch": 0.7868349937482354, "grad_norm": 0.04833984375, "learning_rate": 1.3323113599209205e-05, "loss": 4.3953, "step": 20760 }, { "epoch": 0.7872383334005566, "grad_norm": 0.045166015625, "learning_rate": 1.3317125266519716e-05, "loss": 4.4027, "step": 20770 }, { "epoch": 0.7876416730528778, "grad_norm": 0.045166015625, "learning_rate": 1.3311135596936447e-05, "loss": 4.3829, "step": 20780 }, { "epoch": 0.788045012705199, "grad_norm": 0.042724609375, "learning_rate": 1.3305144592873406e-05, "loss": 4.4559, "step": 20790 }, { "epoch": 0.7884483523575203, "grad_norm": 0.04296875, "learning_rate": 1.329915225674513e-05, "loss": 4.3647, "step": 20800 }, { "epoch": 0.7888516920098415, "grad_norm": 0.044677734375, "learning_rate": 1.3293158590966709e-05, "loss": 4.392, "step": 20810 }, { "epoch": 0.7892550316621627, "grad_norm": 0.0458984375, "learning_rate": 1.3287163597953743e-05, "loss": 4.4122, "step": 20820 }, { "epoch": 0.7896583713144839, "grad_norm": 0.042236328125, "learning_rate": 1.3281167280122387e-05, "loss": 4.4082, "step": 20830 }, { "epoch": 0.7900617109668051, "grad_norm": 0.04443359375, "learning_rate": 1.3275169639889325e-05, "loss": 4.4207, "step": 20840 }, { "epoch": 0.7904650506191264, "grad_norm": 0.042236328125, "learning_rate": 1.326917067967177e-05, "loss": 4.406, "step": 20850 }, { "epoch": 0.7908683902714476, "grad_norm": 0.044189453125, "learning_rate": 1.3263170401887474e-05, "loss": 4.4129, "step": 20860 }, { "epoch": 0.7912717299237688, "grad_norm": 0.04345703125, "learning_rate": 1.325716880895471e-05, "loss": 4.3947, "step": 20870 }, { "epoch": 0.79167506957609, "grad_norm": 0.0458984375, "learning_rate": 1.3251165903292292e-05, "loss": 4.451, "step": 20880 }, { "epoch": 0.7920784092284112, "grad_norm": 0.04638671875, "learning_rate": 1.3245161687319552e-05, "loss": 4.3852, "step": 20890 }, { "epoch": 0.7924817488807324, "grad_norm": 0.04345703125, "learning_rate": 1.3239156163456366e-05, "loss": 4.4022, "step": 20900 }, { "epoch": 0.7928850885330537, "grad_norm": 0.04345703125, "learning_rate": 1.3233149334123118e-05, "loss": 4.4132, "step": 20910 }, { "epoch": 0.7932884281853749, "grad_norm": 0.04541015625, "learning_rate": 1.3227141201740732e-05, "loss": 4.3993, "step": 20920 }, { "epoch": 0.7936917678376961, "grad_norm": 0.044189453125, "learning_rate": 1.3221131768730651e-05, "loss": 4.393, "step": 20930 }, { "epoch": 0.7940951074900173, "grad_norm": 0.04345703125, "learning_rate": 1.3215121037514846e-05, "loss": 4.3794, "step": 20940 }, { "epoch": 0.7944984471423385, "grad_norm": 0.045654296875, "learning_rate": 1.3209109010515804e-05, "loss": 4.3916, "step": 20950 }, { "epoch": 0.7949017867946598, "grad_norm": 0.04541015625, "learning_rate": 1.3203095690156544e-05, "loss": 4.3905, "step": 20960 }, { "epoch": 0.795305126446981, "grad_norm": 0.04443359375, "learning_rate": 1.3197081078860598e-05, "loss": 4.4291, "step": 20970 }, { "epoch": 0.7957084660993022, "grad_norm": 0.048095703125, "learning_rate": 1.3191065179052022e-05, "loss": 4.4173, "step": 20980 }, { "epoch": 0.7961118057516234, "grad_norm": 0.042724609375, "learning_rate": 1.3185047993155397e-05, "loss": 4.4158, "step": 20990 }, { "epoch": 0.7965151454039446, "grad_norm": 0.043701171875, "learning_rate": 1.3179029523595808e-05, "loss": 4.3966, "step": 21000 }, { "epoch": 0.7969184850562658, "grad_norm": 0.04296875, "learning_rate": 1.3173009772798873e-05, "loss": 4.4588, "step": 21010 }, { "epoch": 0.7973218247085871, "grad_norm": 0.04296875, "learning_rate": 1.316698874319071e-05, "loss": 4.4218, "step": 21020 }, { "epoch": 0.7977251643609083, "grad_norm": 0.044677734375, "learning_rate": 1.316096643719797e-05, "loss": 4.4237, "step": 21030 }, { "epoch": 0.7981285040132295, "grad_norm": 0.04443359375, "learning_rate": 1.3154942857247805e-05, "loss": 4.4226, "step": 21040 }, { "epoch": 0.7985318436655507, "grad_norm": 0.045654296875, "learning_rate": 1.314891800576789e-05, "loss": 4.404, "step": 21050 }, { "epoch": 0.7989351833178719, "grad_norm": 0.043212890625, "learning_rate": 1.3142891885186402e-05, "loss": 4.3975, "step": 21060 }, { "epoch": 0.7993385229701933, "grad_norm": 0.04150390625, "learning_rate": 1.3136864497932038e-05, "loss": 4.4338, "step": 21070 }, { "epoch": 0.7997418626225145, "grad_norm": 0.04345703125, "learning_rate": 1.3130835846434002e-05, "loss": 4.4074, "step": 21080 }, { "epoch": 0.8001452022748357, "grad_norm": 0.043701171875, "learning_rate": 1.3124805933122012e-05, "loss": 4.3995, "step": 21090 }, { "epoch": 0.8005485419271569, "grad_norm": 0.04296875, "learning_rate": 1.3118774760426288e-05, "loss": 4.4304, "step": 21100 }, { "epoch": 0.800951881579478, "grad_norm": 0.0458984375, "learning_rate": 1.3112742330777557e-05, "loss": 4.4307, "step": 21110 }, { "epoch": 0.8013552212317993, "grad_norm": 0.046142578125, "learning_rate": 1.3106708646607062e-05, "loss": 4.431, "step": 21120 }, { "epoch": 0.8017585608841206, "grad_norm": 0.046630859375, "learning_rate": 1.310067371034654e-05, "loss": 4.4131, "step": 21130 }, { "epoch": 0.8021619005364418, "grad_norm": 0.0458984375, "learning_rate": 1.3094637524428246e-05, "loss": 4.3997, "step": 21140 }, { "epoch": 0.802565240188763, "grad_norm": 0.045166015625, "learning_rate": 1.3088600091284922e-05, "loss": 4.4169, "step": 21150 }, { "epoch": 0.8029685798410842, "grad_norm": 0.04345703125, "learning_rate": 1.3082561413349826e-05, "loss": 4.4194, "step": 21160 }, { "epoch": 0.8033719194934054, "grad_norm": 0.044677734375, "learning_rate": 1.3076521493056711e-05, "loss": 4.4477, "step": 21170 }, { "epoch": 0.8037752591457266, "grad_norm": 0.04345703125, "learning_rate": 1.3070480332839838e-05, "loss": 4.4326, "step": 21180 }, { "epoch": 0.8041785987980479, "grad_norm": 0.043212890625, "learning_rate": 1.3064437935133958e-05, "loss": 4.4129, "step": 21190 }, { "epoch": 0.8045819384503691, "grad_norm": 0.044189453125, "learning_rate": 1.305839430237433e-05, "loss": 4.4233, "step": 21200 }, { "epoch": 0.8049852781026903, "grad_norm": 0.044677734375, "learning_rate": 1.30523494369967e-05, "loss": 4.4403, "step": 21210 }, { "epoch": 0.8053886177550115, "grad_norm": 0.0439453125, "learning_rate": 1.3046303341437321e-05, "loss": 4.4175, "step": 21220 }, { "epoch": 0.8057919574073327, "grad_norm": 0.044677734375, "learning_rate": 1.3040256018132933e-05, "loss": 4.3849, "step": 21230 }, { "epoch": 0.806195297059654, "grad_norm": 0.04443359375, "learning_rate": 1.3034207469520782e-05, "loss": 4.4228, "step": 21240 }, { "epoch": 0.8065986367119752, "grad_norm": 0.046142578125, "learning_rate": 1.3028157698038599e-05, "loss": 4.3862, "step": 21250 }, { "epoch": 0.8070019763642964, "grad_norm": 0.04736328125, "learning_rate": 1.3022106706124608e-05, "loss": 4.4109, "step": 21260 }, { "epoch": 0.8074053160166176, "grad_norm": 0.045166015625, "learning_rate": 1.3016054496217531e-05, "loss": 4.3833, "step": 21270 }, { "epoch": 0.8078086556689388, "grad_norm": 0.045166015625, "learning_rate": 1.3010001070756572e-05, "loss": 4.439, "step": 21280 }, { "epoch": 0.80821199532126, "grad_norm": 0.0419921875, "learning_rate": 1.3003946432181436e-05, "loss": 4.4385, "step": 21290 }, { "epoch": 0.8086153349735813, "grad_norm": 0.04541015625, "learning_rate": 1.2997890582932305e-05, "loss": 4.3832, "step": 21300 }, { "epoch": 0.8090186746259025, "grad_norm": 0.047119140625, "learning_rate": 1.299183352544986e-05, "loss": 4.4072, "step": 21310 }, { "epoch": 0.8094220142782237, "grad_norm": 0.04345703125, "learning_rate": 1.2985775262175258e-05, "loss": 4.3785, "step": 21320 }, { "epoch": 0.8098253539305449, "grad_norm": 0.045654296875, "learning_rate": 1.2979715795550152e-05, "loss": 4.4147, "step": 21330 }, { "epoch": 0.8102286935828661, "grad_norm": 0.04638671875, "learning_rate": 1.2973655128016676e-05, "loss": 4.4445, "step": 21340 }, { "epoch": 0.8106320332351874, "grad_norm": 0.044677734375, "learning_rate": 1.2967593262017442e-05, "loss": 4.4082, "step": 21350 }, { "epoch": 0.8110353728875086, "grad_norm": 0.043212890625, "learning_rate": 1.2961530199995558e-05, "loss": 4.4486, "step": 21360 }, { "epoch": 0.8114387125398298, "grad_norm": 0.0439453125, "learning_rate": 1.2955465944394601e-05, "loss": 4.3921, "step": 21370 }, { "epoch": 0.811842052192151, "grad_norm": 0.045166015625, "learning_rate": 1.2949400497658638e-05, "loss": 4.4217, "step": 21380 }, { "epoch": 0.8122453918444722, "grad_norm": 0.044189453125, "learning_rate": 1.294333386223221e-05, "loss": 4.3969, "step": 21390 }, { "epoch": 0.8126487314967934, "grad_norm": 0.04443359375, "learning_rate": 1.2937266040560346e-05, "loss": 4.4185, "step": 21400 }, { "epoch": 0.8130520711491147, "grad_norm": 0.04443359375, "learning_rate": 1.2931197035088539e-05, "loss": 4.3683, "step": 21410 }, { "epoch": 0.8134554108014359, "grad_norm": 0.044677734375, "learning_rate": 1.292512684826277e-05, "loss": 4.4395, "step": 21420 }, { "epoch": 0.8138587504537571, "grad_norm": 0.043212890625, "learning_rate": 1.2919055482529494e-05, "loss": 4.4135, "step": 21430 }, { "epoch": 0.8142620901060783, "grad_norm": 0.048095703125, "learning_rate": 1.2912982940335643e-05, "loss": 4.4398, "step": 21440 }, { "epoch": 0.8146654297583995, "grad_norm": 0.045166015625, "learning_rate": 1.2906909224128619e-05, "loss": 4.4275, "step": 21450 }, { "epoch": 0.8150687694107208, "grad_norm": 0.047607421875, "learning_rate": 1.2900834336356294e-05, "loss": 4.4076, "step": 21460 }, { "epoch": 0.815472109063042, "grad_norm": 0.043212890625, "learning_rate": 1.289475827946702e-05, "loss": 4.4012, "step": 21470 }, { "epoch": 0.8158754487153632, "grad_norm": 0.0439453125, "learning_rate": 1.2888681055909618e-05, "loss": 4.4082, "step": 21480 }, { "epoch": 0.8162787883676844, "grad_norm": 0.04248046875, "learning_rate": 1.2882602668133376e-05, "loss": 4.4076, "step": 21490 }, { "epoch": 0.8166821280200056, "grad_norm": 0.04736328125, "learning_rate": 1.2876523118588056e-05, "loss": 4.3717, "step": 21500 }, { "epoch": 0.8170854676723268, "grad_norm": 0.044189453125, "learning_rate": 1.2870442409723882e-05, "loss": 4.4039, "step": 21510 }, { "epoch": 0.8174888073246481, "grad_norm": 0.044921875, "learning_rate": 1.2864360543991554e-05, "loss": 4.4545, "step": 21520 }, { "epoch": 0.8178921469769693, "grad_norm": 0.044189453125, "learning_rate": 1.2858277523842231e-05, "loss": 4.4154, "step": 21530 }, { "epoch": 0.8182954866292905, "grad_norm": 0.045654296875, "learning_rate": 1.2852193351727536e-05, "loss": 4.4125, "step": 21540 }, { "epoch": 0.8186988262816117, "grad_norm": 0.04443359375, "learning_rate": 1.2846108030099562e-05, "loss": 4.4107, "step": 21550 }, { "epoch": 0.8191021659339329, "grad_norm": 0.044921875, "learning_rate": 1.2840021561410867e-05, "loss": 4.4288, "step": 21560 }, { "epoch": 0.8195055055862542, "grad_norm": 0.044921875, "learning_rate": 1.283393394811446e-05, "loss": 4.3841, "step": 21570 }, { "epoch": 0.8199088452385754, "grad_norm": 0.044677734375, "learning_rate": 1.2827845192663823e-05, "loss": 4.3746, "step": 21580 }, { "epoch": 0.8203121848908966, "grad_norm": 0.043212890625, "learning_rate": 1.2821755297512893e-05, "loss": 4.4098, "step": 21590 }, { "epoch": 0.8207155245432178, "grad_norm": 0.046142578125, "learning_rate": 1.2815664265116066e-05, "loss": 4.4326, "step": 21600 }, { "epoch": 0.821118864195539, "grad_norm": 0.045654296875, "learning_rate": 1.2809572097928198e-05, "loss": 4.4059, "step": 21610 }, { "epoch": 0.8215222038478602, "grad_norm": 0.04638671875, "learning_rate": 1.2803478798404605e-05, "loss": 4.4284, "step": 21620 }, { "epoch": 0.8219255435001815, "grad_norm": 0.04296875, "learning_rate": 1.2797384369001052e-05, "loss": 4.3962, "step": 21630 }, { "epoch": 0.8223288831525027, "grad_norm": 0.04443359375, "learning_rate": 1.2791288812173765e-05, "loss": 4.421, "step": 21640 }, { "epoch": 0.8227322228048239, "grad_norm": 0.04541015625, "learning_rate": 1.2785192130379422e-05, "loss": 4.4429, "step": 21650 }, { "epoch": 0.8231355624571451, "grad_norm": 0.045166015625, "learning_rate": 1.2779094326075158e-05, "loss": 4.3992, "step": 21660 }, { "epoch": 0.8235389021094663, "grad_norm": 0.046142578125, "learning_rate": 1.2772995401718558e-05, "loss": 4.387, "step": 21670 }, { "epoch": 0.8239422417617877, "grad_norm": 0.046142578125, "learning_rate": 1.2766895359767657e-05, "loss": 4.4189, "step": 21680 }, { "epoch": 0.8243455814141089, "grad_norm": 0.04443359375, "learning_rate": 1.2760794202680939e-05, "loss": 4.4448, "step": 21690 }, { "epoch": 0.82474892106643, "grad_norm": 0.04638671875, "learning_rate": 1.275469193291734e-05, "loss": 4.4382, "step": 21700 }, { "epoch": 0.8251522607187513, "grad_norm": 0.046142578125, "learning_rate": 1.2748588552936251e-05, "loss": 4.4096, "step": 21710 }, { "epoch": 0.8255556003710725, "grad_norm": 0.04443359375, "learning_rate": 1.27424840651975e-05, "loss": 4.4338, "step": 21720 }, { "epoch": 0.8259589400233937, "grad_norm": 0.04638671875, "learning_rate": 1.2736378472161364e-05, "loss": 4.4277, "step": 21730 }, { "epoch": 0.826362279675715, "grad_norm": 0.047607421875, "learning_rate": 1.2730271776288567e-05, "loss": 4.4119, "step": 21740 }, { "epoch": 0.8267656193280362, "grad_norm": 0.04345703125, "learning_rate": 1.2724163980040283e-05, "loss": 4.3942, "step": 21750 }, { "epoch": 0.8271689589803574, "grad_norm": 0.04443359375, "learning_rate": 1.2718055085878114e-05, "loss": 4.4368, "step": 21760 }, { "epoch": 0.8275722986326786, "grad_norm": 0.043212890625, "learning_rate": 1.2711945096264123e-05, "loss": 4.4253, "step": 21770 }, { "epoch": 0.8279756382849998, "grad_norm": 0.042724609375, "learning_rate": 1.2705834013660798e-05, "loss": 4.4206, "step": 21780 }, { "epoch": 0.8283789779373211, "grad_norm": 0.045654296875, "learning_rate": 1.2699721840531082e-05, "loss": 4.3913, "step": 21790 }, { "epoch": 0.8287823175896423, "grad_norm": 0.046630859375, "learning_rate": 1.269360857933835e-05, "loss": 4.4065, "step": 21800 }, { "epoch": 0.8291856572419635, "grad_norm": 0.0458984375, "learning_rate": 1.2687494232546413e-05, "loss": 4.4081, "step": 21810 }, { "epoch": 0.8295889968942847, "grad_norm": 0.046142578125, "learning_rate": 1.2681378802619527e-05, "loss": 4.4289, "step": 21820 }, { "epoch": 0.8299923365466059, "grad_norm": 0.044677734375, "learning_rate": 1.2675262292022377e-05, "loss": 4.4258, "step": 21830 }, { "epoch": 0.8303956761989271, "grad_norm": 0.044189453125, "learning_rate": 1.266914470322009e-05, "loss": 4.4407, "step": 21840 }, { "epoch": 0.8307990158512484, "grad_norm": 0.04443359375, "learning_rate": 1.266302603867822e-05, "loss": 4.4, "step": 21850 }, { "epoch": 0.8312023555035696, "grad_norm": 0.044921875, "learning_rate": 1.265690630086276e-05, "loss": 4.4004, "step": 21860 }, { "epoch": 0.8316056951558908, "grad_norm": 0.043212890625, "learning_rate": 1.265078549224014e-05, "loss": 4.4184, "step": 21870 }, { "epoch": 0.832009034808212, "grad_norm": 0.0439453125, "learning_rate": 1.2644663615277211e-05, "loss": 4.4261, "step": 21880 }, { "epoch": 0.8324123744605332, "grad_norm": 0.04443359375, "learning_rate": 1.263854067244126e-05, "loss": 4.4268, "step": 21890 }, { "epoch": 0.8328157141128545, "grad_norm": 0.04248046875, "learning_rate": 1.2632416666200007e-05, "loss": 4.4209, "step": 21900 }, { "epoch": 0.8332190537651757, "grad_norm": 0.043701171875, "learning_rate": 1.2626291599021592e-05, "loss": 4.4301, "step": 21910 }, { "epoch": 0.8336223934174969, "grad_norm": 0.046875, "learning_rate": 1.2620165473374596e-05, "loss": 4.4172, "step": 21920 }, { "epoch": 0.8340257330698181, "grad_norm": 0.04443359375, "learning_rate": 1.2614038291728009e-05, "loss": 4.3953, "step": 21930 }, { "epoch": 0.8344290727221393, "grad_norm": 0.04638671875, "learning_rate": 1.2607910056551254e-05, "loss": 4.4519, "step": 21940 }, { "epoch": 0.8348324123744605, "grad_norm": 0.0439453125, "learning_rate": 1.2601780770314187e-05, "loss": 4.4074, "step": 21950 }, { "epoch": 0.8352357520267818, "grad_norm": 0.044677734375, "learning_rate": 1.2595650435487079e-05, "loss": 4.4383, "step": 21960 }, { "epoch": 0.835639091679103, "grad_norm": 0.045654296875, "learning_rate": 1.2589519054540628e-05, "loss": 4.4103, "step": 21970 }, { "epoch": 0.8360424313314242, "grad_norm": 0.0439453125, "learning_rate": 1.2583386629945945e-05, "loss": 4.4007, "step": 21980 }, { "epoch": 0.8364457709837454, "grad_norm": 0.044677734375, "learning_rate": 1.2577253164174576e-05, "loss": 4.382, "step": 21990 }, { "epoch": 0.8368491106360666, "grad_norm": 0.042724609375, "learning_rate": 1.2571118659698472e-05, "loss": 4.4461, "step": 22000 }, { "epoch": 0.8372524502883879, "grad_norm": 0.04443359375, "learning_rate": 1.256498311899001e-05, "loss": 4.4118, "step": 22010 }, { "epoch": 0.8376557899407091, "grad_norm": 0.047119140625, "learning_rate": 1.2558846544521983e-05, "loss": 4.4005, "step": 22020 }, { "epoch": 0.8380591295930303, "grad_norm": 0.045166015625, "learning_rate": 1.2552708938767606e-05, "loss": 4.4015, "step": 22030 }, { "epoch": 0.8384624692453515, "grad_norm": 0.043212890625, "learning_rate": 1.2546570304200496e-05, "loss": 4.4081, "step": 22040 }, { "epoch": 0.8388658088976727, "grad_norm": 0.043701171875, "learning_rate": 1.2540430643294701e-05, "loss": 4.4142, "step": 22050 }, { "epoch": 0.8392691485499939, "grad_norm": 0.043701171875, "learning_rate": 1.2534289958524675e-05, "loss": 4.4031, "step": 22060 }, { "epoch": 0.8396724882023152, "grad_norm": 0.044677734375, "learning_rate": 1.2528148252365282e-05, "loss": 4.4113, "step": 22070 }, { "epoch": 0.8400758278546364, "grad_norm": 0.04443359375, "learning_rate": 1.2522005527291801e-05, "loss": 4.4134, "step": 22080 }, { "epoch": 0.8404791675069576, "grad_norm": 0.0458984375, "learning_rate": 1.2515861785779922e-05, "loss": 4.4406, "step": 22090 }, { "epoch": 0.8408825071592788, "grad_norm": 0.04638671875, "learning_rate": 1.2509717030305744e-05, "loss": 4.448, "step": 22100 }, { "epoch": 0.8412858468116, "grad_norm": 0.0439453125, "learning_rate": 1.2503571263345771e-05, "loss": 4.3865, "step": 22110 }, { "epoch": 0.8416891864639212, "grad_norm": 0.044189453125, "learning_rate": 1.2497424487376924e-05, "loss": 4.3903, "step": 22120 }, { "epoch": 0.8420925261162425, "grad_norm": 0.04296875, "learning_rate": 1.2491276704876519e-05, "loss": 4.3841, "step": 22130 }, { "epoch": 0.8424958657685637, "grad_norm": 0.0458984375, "learning_rate": 1.2485127918322287e-05, "loss": 4.4004, "step": 22140 }, { "epoch": 0.8428992054208849, "grad_norm": 0.0419921875, "learning_rate": 1.2478978130192359e-05, "loss": 4.3998, "step": 22150 }, { "epoch": 0.8433025450732061, "grad_norm": 0.04541015625, "learning_rate": 1.2472827342965267e-05, "loss": 4.4013, "step": 22160 }, { "epoch": 0.8437058847255273, "grad_norm": 0.04443359375, "learning_rate": 1.2466675559119959e-05, "loss": 4.4314, "step": 22170 }, { "epoch": 0.8441092243778486, "grad_norm": 0.04638671875, "learning_rate": 1.2460522781135765e-05, "loss": 4.4064, "step": 22180 }, { "epoch": 0.8445125640301698, "grad_norm": 0.044677734375, "learning_rate": 1.2454369011492434e-05, "loss": 4.4026, "step": 22190 }, { "epoch": 0.844915903682491, "grad_norm": 0.044677734375, "learning_rate": 1.2448214252670096e-05, "loss": 4.3975, "step": 22200 }, { "epoch": 0.8453192433348122, "grad_norm": 0.047119140625, "learning_rate": 1.2442058507149302e-05, "loss": 4.4211, "step": 22210 }, { "epoch": 0.8457225829871334, "grad_norm": 0.047119140625, "learning_rate": 1.2435901777410981e-05, "loss": 4.4336, "step": 22220 }, { "epoch": 0.8461259226394546, "grad_norm": 0.044677734375, "learning_rate": 1.2429744065936466e-05, "loss": 4.4427, "step": 22230 }, { "epoch": 0.8465292622917759, "grad_norm": 0.044189453125, "learning_rate": 1.2423585375207493e-05, "loss": 4.387, "step": 22240 }, { "epoch": 0.8469326019440971, "grad_norm": 0.045654296875, "learning_rate": 1.2417425707706179e-05, "loss": 4.4026, "step": 22250 }, { "epoch": 0.8473359415964183, "grad_norm": 0.0458984375, "learning_rate": 1.2411265065915042e-05, "loss": 4.4359, "step": 22260 }, { "epoch": 0.8477392812487395, "grad_norm": 0.043212890625, "learning_rate": 1.2405103452316996e-05, "loss": 4.4049, "step": 22270 }, { "epoch": 0.8481426209010607, "grad_norm": 0.043701171875, "learning_rate": 1.2398940869395339e-05, "loss": 4.4004, "step": 22280 }, { "epoch": 0.848545960553382, "grad_norm": 0.04736328125, "learning_rate": 1.2392777319633766e-05, "loss": 4.405, "step": 22290 }, { "epoch": 0.8489493002057033, "grad_norm": 0.046875, "learning_rate": 1.2386612805516358e-05, "loss": 4.4042, "step": 22300 }, { "epoch": 0.8493526398580244, "grad_norm": 0.044189453125, "learning_rate": 1.2380447329527583e-05, "loss": 4.3783, "step": 22310 }, { "epoch": 0.8497559795103456, "grad_norm": 0.0439453125, "learning_rate": 1.2374280894152302e-05, "loss": 4.4224, "step": 22320 }, { "epoch": 0.8501593191626668, "grad_norm": 0.043212890625, "learning_rate": 1.2368113501875761e-05, "loss": 4.4233, "step": 22330 }, { "epoch": 0.850562658814988, "grad_norm": 0.044189453125, "learning_rate": 1.236194515518359e-05, "loss": 4.4213, "step": 22340 }, { "epoch": 0.8509659984673094, "grad_norm": 0.04443359375, "learning_rate": 1.2355775856561803e-05, "loss": 4.4259, "step": 22350 }, { "epoch": 0.8513693381196306, "grad_norm": 0.043701171875, "learning_rate": 1.23496056084968e-05, "loss": 4.4184, "step": 22360 }, { "epoch": 0.8517726777719518, "grad_norm": 0.046875, "learning_rate": 1.2343434413475361e-05, "loss": 4.4338, "step": 22370 }, { "epoch": 0.852176017424273, "grad_norm": 0.044921875, "learning_rate": 1.2337262273984656e-05, "loss": 4.4052, "step": 22380 }, { "epoch": 0.8525793570765942, "grad_norm": 0.042724609375, "learning_rate": 1.2331089192512218e-05, "loss": 4.4043, "step": 22390 }, { "epoch": 0.8529826967289155, "grad_norm": 0.04541015625, "learning_rate": 1.2324915171545978e-05, "loss": 4.413, "step": 22400 }, { "epoch": 0.8533860363812367, "grad_norm": 0.04736328125, "learning_rate": 1.2318740213574239e-05, "loss": 4.3875, "step": 22410 }, { "epoch": 0.8537893760335579, "grad_norm": 0.0439453125, "learning_rate": 1.2312564321085677e-05, "loss": 4.4006, "step": 22420 }, { "epoch": 0.8541927156858791, "grad_norm": 0.04296875, "learning_rate": 1.230638749656935e-05, "loss": 4.4327, "step": 22430 }, { "epoch": 0.8545960553382003, "grad_norm": 0.04248046875, "learning_rate": 1.2300209742514688e-05, "loss": 4.4337, "step": 22440 }, { "epoch": 0.8549993949905215, "grad_norm": 0.045166015625, "learning_rate": 1.2294031061411502e-05, "loss": 4.3832, "step": 22450 }, { "epoch": 0.8554027346428428, "grad_norm": 0.042724609375, "learning_rate": 1.2287851455749971e-05, "loss": 4.4055, "step": 22460 }, { "epoch": 0.855806074295164, "grad_norm": 0.042724609375, "learning_rate": 1.2281670928020647e-05, "loss": 4.4562, "step": 22470 }, { "epoch": 0.8562094139474852, "grad_norm": 0.047119140625, "learning_rate": 1.2275489480714453e-05, "loss": 4.44, "step": 22480 }, { "epoch": 0.8566127535998064, "grad_norm": 0.046142578125, "learning_rate": 1.2269307116322686e-05, "loss": 4.3771, "step": 22490 }, { "epoch": 0.8570160932521276, "grad_norm": 0.044189453125, "learning_rate": 1.2263123837337013e-05, "loss": 4.4012, "step": 22500 }, { "epoch": 0.8574194329044489, "grad_norm": 0.043701171875, "learning_rate": 1.2256939646249467e-05, "loss": 4.441, "step": 22510 }, { "epoch": 0.8578227725567701, "grad_norm": 0.0439453125, "learning_rate": 1.2250754545552446e-05, "loss": 4.4608, "step": 22520 }, { "epoch": 0.8582261122090913, "grad_norm": 0.04736328125, "learning_rate": 1.2244568537738716e-05, "loss": 4.4328, "step": 22530 }, { "epoch": 0.8586294518614125, "grad_norm": 0.043701171875, "learning_rate": 1.2238381625301419e-05, "loss": 4.4167, "step": 22540 }, { "epoch": 0.8590327915137337, "grad_norm": 0.045166015625, "learning_rate": 1.2232193810734044e-05, "loss": 4.4104, "step": 22550 }, { "epoch": 0.8594361311660549, "grad_norm": 0.04443359375, "learning_rate": 1.2226005096530458e-05, "loss": 4.4291, "step": 22560 }, { "epoch": 0.8598394708183762, "grad_norm": 0.045166015625, "learning_rate": 1.221981548518488e-05, "loss": 4.4212, "step": 22570 }, { "epoch": 0.8602428104706974, "grad_norm": 0.041259765625, "learning_rate": 1.2213624979191898e-05, "loss": 4.4034, "step": 22580 }, { "epoch": 0.8606461501230186, "grad_norm": 0.047119140625, "learning_rate": 1.220743358104646e-05, "loss": 4.412, "step": 22590 }, { "epoch": 0.8610494897753398, "grad_norm": 0.0439453125, "learning_rate": 1.2201241293243873e-05, "loss": 4.4587, "step": 22600 }, { "epoch": 0.861452829427661, "grad_norm": 0.043212890625, "learning_rate": 1.2195048118279795e-05, "loss": 4.4003, "step": 22610 }, { "epoch": 0.8618561690799823, "grad_norm": 0.0419921875, "learning_rate": 1.2188854058650254e-05, "loss": 4.4101, "step": 22620 }, { "epoch": 0.8622595087323035, "grad_norm": 0.04443359375, "learning_rate": 1.2182659116851624e-05, "loss": 4.4183, "step": 22630 }, { "epoch": 0.8626628483846247, "grad_norm": 0.044921875, "learning_rate": 1.2176463295380643e-05, "loss": 4.3674, "step": 22640 }, { "epoch": 0.8630661880369459, "grad_norm": 0.043701171875, "learning_rate": 1.2170266596734394e-05, "loss": 4.3654, "step": 22650 }, { "epoch": 0.8634695276892671, "grad_norm": 0.0439453125, "learning_rate": 1.2164069023410323e-05, "loss": 4.3956, "step": 22660 }, { "epoch": 0.8638728673415883, "grad_norm": 0.04443359375, "learning_rate": 1.2157870577906223e-05, "loss": 4.401, "step": 22670 }, { "epoch": 0.8642762069939096, "grad_norm": 0.044189453125, "learning_rate": 1.2151671262720242e-05, "loss": 4.4158, "step": 22680 }, { "epoch": 0.8646795466462308, "grad_norm": 0.044921875, "learning_rate": 1.2145471080350877e-05, "loss": 4.3932, "step": 22690 }, { "epoch": 0.865082886298552, "grad_norm": 0.045166015625, "learning_rate": 1.2139270033296969e-05, "loss": 4.41, "step": 22700 }, { "epoch": 0.8654862259508732, "grad_norm": 0.044189453125, "learning_rate": 1.2133068124057716e-05, "loss": 4.4247, "step": 22710 }, { "epoch": 0.8658895656031944, "grad_norm": 0.04345703125, "learning_rate": 1.212686535513266e-05, "loss": 4.4264, "step": 22720 }, { "epoch": 0.8662929052555157, "grad_norm": 0.04541015625, "learning_rate": 1.212066172902169e-05, "loss": 4.4356, "step": 22730 }, { "epoch": 0.8666962449078369, "grad_norm": 0.047607421875, "learning_rate": 1.2114457248225038e-05, "loss": 4.4404, "step": 22740 }, { "epoch": 0.8670995845601581, "grad_norm": 0.043212890625, "learning_rate": 1.2108251915243284e-05, "loss": 4.433, "step": 22750 }, { "epoch": 0.8675029242124793, "grad_norm": 0.046142578125, "learning_rate": 1.2102045732577348e-05, "loss": 4.4283, "step": 22760 }, { "epoch": 0.8679062638648005, "grad_norm": 0.04638671875, "learning_rate": 1.2095838702728495e-05, "loss": 4.3996, "step": 22770 }, { "epoch": 0.8683096035171217, "grad_norm": 0.046142578125, "learning_rate": 1.2089630828198333e-05, "loss": 4.4021, "step": 22780 }, { "epoch": 0.868712943169443, "grad_norm": 0.04541015625, "learning_rate": 1.2083422111488805e-05, "loss": 4.4237, "step": 22790 }, { "epoch": 0.8691162828217642, "grad_norm": 0.045654296875, "learning_rate": 1.20772125551022e-05, "loss": 4.4197, "step": 22800 }, { "epoch": 0.8695196224740854, "grad_norm": 0.04296875, "learning_rate": 1.2071002161541139e-05, "loss": 4.455, "step": 22810 }, { "epoch": 0.8699229621264066, "grad_norm": 0.044921875, "learning_rate": 1.2064790933308585e-05, "loss": 4.3833, "step": 22820 }, { "epoch": 0.8703263017787278, "grad_norm": 0.046630859375, "learning_rate": 1.2058578872907833e-05, "loss": 4.4318, "step": 22830 }, { "epoch": 0.8707296414310491, "grad_norm": 0.047607421875, "learning_rate": 1.205236598284252e-05, "loss": 4.4155, "step": 22840 }, { "epoch": 0.8711329810833703, "grad_norm": 0.044677734375, "learning_rate": 1.204615226561661e-05, "loss": 4.4482, "step": 22850 }, { "epoch": 0.8715363207356915, "grad_norm": 0.043212890625, "learning_rate": 1.2039937723734407e-05, "loss": 4.4253, "step": 22860 }, { "epoch": 0.8719396603880127, "grad_norm": 0.04541015625, "learning_rate": 1.2033722359700545e-05, "loss": 4.4296, "step": 22870 }, { "epoch": 0.8723430000403339, "grad_norm": 0.0458984375, "learning_rate": 1.2027506176019985e-05, "loss": 4.4297, "step": 22880 }, { "epoch": 0.8727463396926551, "grad_norm": 0.044921875, "learning_rate": 1.2021289175198026e-05, "loss": 4.4232, "step": 22890 }, { "epoch": 0.8731496793449764, "grad_norm": 0.0458984375, "learning_rate": 1.2015071359740287e-05, "loss": 4.4318, "step": 22900 }, { "epoch": 0.8735530189972976, "grad_norm": 0.044189453125, "learning_rate": 1.2008852732152725e-05, "loss": 4.4108, "step": 22910 }, { "epoch": 0.8739563586496188, "grad_norm": 0.049072265625, "learning_rate": 1.2002633294941617e-05, "loss": 4.3796, "step": 22920 }, { "epoch": 0.87435969830194, "grad_norm": 0.045166015625, "learning_rate": 1.1996413050613573e-05, "loss": 4.4311, "step": 22930 }, { "epoch": 0.8747630379542612, "grad_norm": 0.0439453125, "learning_rate": 1.1990192001675523e-05, "loss": 4.4064, "step": 22940 }, { "epoch": 0.8751663776065826, "grad_norm": 0.042724609375, "learning_rate": 1.1983970150634719e-05, "loss": 4.4408, "step": 22950 }, { "epoch": 0.8755697172589038, "grad_norm": 0.045654296875, "learning_rate": 1.1977747499998743e-05, "loss": 4.4232, "step": 22960 }, { "epoch": 0.875973056911225, "grad_norm": 0.0458984375, "learning_rate": 1.19715240522755e-05, "loss": 4.4037, "step": 22970 }, { "epoch": 0.8763763965635462, "grad_norm": 0.044189453125, "learning_rate": 1.1965299809973207e-05, "loss": 4.419, "step": 22980 }, { "epoch": 0.8767797362158674, "grad_norm": 0.0458984375, "learning_rate": 1.1959074775600412e-05, "loss": 4.398, "step": 22990 }, { "epoch": 0.8771830758681886, "grad_norm": 0.046630859375, "learning_rate": 1.1952848951665973e-05, "loss": 4.3844, "step": 23000 }, { "epoch": 0.8775864155205099, "grad_norm": 0.04541015625, "learning_rate": 1.194662234067907e-05, "loss": 4.4325, "step": 23010 }, { "epoch": 0.8779897551728311, "grad_norm": 0.042724609375, "learning_rate": 1.1940394945149202e-05, "loss": 4.3975, "step": 23020 }, { "epoch": 0.8783930948251523, "grad_norm": 0.04345703125, "learning_rate": 1.1934166767586182e-05, "loss": 4.4127, "step": 23030 }, { "epoch": 0.8787964344774735, "grad_norm": 0.043701171875, "learning_rate": 1.1927937810500141e-05, "loss": 4.424, "step": 23040 }, { "epoch": 0.8791997741297947, "grad_norm": 0.04541015625, "learning_rate": 1.192170807640152e-05, "loss": 4.4211, "step": 23050 }, { "epoch": 0.879603113782116, "grad_norm": 0.044189453125, "learning_rate": 1.1915477567801077e-05, "loss": 4.4288, "step": 23060 }, { "epoch": 0.8800064534344372, "grad_norm": 0.046142578125, "learning_rate": 1.1909246287209875e-05, "loss": 4.4518, "step": 23070 }, { "epoch": 0.8804097930867584, "grad_norm": 0.05126953125, "learning_rate": 1.19030142371393e-05, "loss": 4.3989, "step": 23080 }, { "epoch": 0.8808131327390796, "grad_norm": 0.044189453125, "learning_rate": 1.189678142010104e-05, "loss": 4.4043, "step": 23090 }, { "epoch": 0.8812164723914008, "grad_norm": 0.0458984375, "learning_rate": 1.1890547838607091e-05, "loss": 4.4014, "step": 23100 }, { "epoch": 0.881619812043722, "grad_norm": 0.044921875, "learning_rate": 1.1884313495169762e-05, "loss": 4.399, "step": 23110 }, { "epoch": 0.8820231516960433, "grad_norm": 0.04443359375, "learning_rate": 1.1878078392301663e-05, "loss": 4.4414, "step": 23120 }, { "epoch": 0.8824264913483645, "grad_norm": 0.044921875, "learning_rate": 1.1871842532515721e-05, "loss": 4.3869, "step": 23130 }, { "epoch": 0.8828298310006857, "grad_norm": 0.04443359375, "learning_rate": 1.1865605918325156e-05, "loss": 4.4477, "step": 23140 }, { "epoch": 0.8832331706530069, "grad_norm": 0.042724609375, "learning_rate": 1.18593685522435e-05, "loss": 4.3971, "step": 23150 }, { "epoch": 0.8836365103053281, "grad_norm": 0.043701171875, "learning_rate": 1.185313043678458e-05, "loss": 4.4012, "step": 23160 }, { "epoch": 0.8840398499576493, "grad_norm": 0.045166015625, "learning_rate": 1.1846891574462536e-05, "loss": 4.4022, "step": 23170 }, { "epoch": 0.8844431896099706, "grad_norm": 0.044189453125, "learning_rate": 1.1840651967791798e-05, "loss": 4.4229, "step": 23180 }, { "epoch": 0.8848465292622918, "grad_norm": 0.043212890625, "learning_rate": 1.1834411619287107e-05, "loss": 4.4454, "step": 23190 }, { "epoch": 0.885249868914613, "grad_norm": 0.04248046875, "learning_rate": 1.1828170531463492e-05, "loss": 4.4421, "step": 23200 }, { "epoch": 0.8856532085669342, "grad_norm": 0.044677734375, "learning_rate": 1.1821928706836285e-05, "loss": 4.4227, "step": 23210 }, { "epoch": 0.8860565482192554, "grad_norm": 0.04541015625, "learning_rate": 1.1815686147921119e-05, "loss": 4.4464, "step": 23220 }, { "epoch": 0.8864598878715767, "grad_norm": 0.04541015625, "learning_rate": 1.1809442857233915e-05, "loss": 4.4178, "step": 23230 }, { "epoch": 0.8868632275238979, "grad_norm": 0.045166015625, "learning_rate": 1.1803198837290896e-05, "loss": 4.3938, "step": 23240 }, { "epoch": 0.8872665671762191, "grad_norm": 0.04443359375, "learning_rate": 1.1796954090608573e-05, "loss": 4.4414, "step": 23250 }, { "epoch": 0.8876699068285403, "grad_norm": 0.04345703125, "learning_rate": 1.1790708619703757e-05, "loss": 4.4046, "step": 23260 }, { "epoch": 0.8880732464808615, "grad_norm": 0.042724609375, "learning_rate": 1.178446242709354e-05, "loss": 4.4023, "step": 23270 }, { "epoch": 0.8884765861331827, "grad_norm": 0.043701171875, "learning_rate": 1.1778215515295317e-05, "loss": 4.4416, "step": 23280 }, { "epoch": 0.888879925785504, "grad_norm": 0.044189453125, "learning_rate": 1.1771967886826763e-05, "loss": 4.4314, "step": 23290 }, { "epoch": 0.8892832654378252, "grad_norm": 0.041748046875, "learning_rate": 1.1765719544205846e-05, "loss": 4.4237, "step": 23300 }, { "epoch": 0.8896866050901464, "grad_norm": 0.044189453125, "learning_rate": 1.1759470489950829e-05, "loss": 4.4354, "step": 23310 }, { "epoch": 0.8900899447424676, "grad_norm": 0.04443359375, "learning_rate": 1.175322072658025e-05, "loss": 4.4152, "step": 23320 }, { "epoch": 0.8904932843947888, "grad_norm": 0.045166015625, "learning_rate": 1.1746970256612933e-05, "loss": 4.4013, "step": 23330 }, { "epoch": 0.8908966240471101, "grad_norm": 0.04443359375, "learning_rate": 1.1740719082568002e-05, "loss": 4.4568, "step": 23340 }, { "epoch": 0.8912999636994313, "grad_norm": 0.04736328125, "learning_rate": 1.1734467206964846e-05, "loss": 4.4299, "step": 23350 }, { "epoch": 0.8917033033517525, "grad_norm": 0.045654296875, "learning_rate": 1.1728214632323143e-05, "loss": 4.4227, "step": 23360 }, { "epoch": 0.8921066430040737, "grad_norm": 0.039794921875, "learning_rate": 1.1721961361162862e-05, "loss": 4.4051, "step": 23370 }, { "epoch": 0.8925099826563949, "grad_norm": 0.045654296875, "learning_rate": 1.1715707396004238e-05, "loss": 4.4133, "step": 23380 }, { "epoch": 0.8929133223087161, "grad_norm": 0.04443359375, "learning_rate": 1.1709452739367798e-05, "loss": 4.3986, "step": 23390 }, { "epoch": 0.8933166619610374, "grad_norm": 0.044189453125, "learning_rate": 1.170319739377434e-05, "loss": 4.425, "step": 23400 }, { "epoch": 0.8937200016133586, "grad_norm": 0.043212890625, "learning_rate": 1.1696941361744946e-05, "loss": 4.3856, "step": 23410 }, { "epoch": 0.8941233412656798, "grad_norm": 0.044921875, "learning_rate": 1.1690684645800964e-05, "loss": 4.4182, "step": 23420 }, { "epoch": 0.894526680918001, "grad_norm": 0.043701171875, "learning_rate": 1.1684427248464037e-05, "loss": 4.3807, "step": 23430 }, { "epoch": 0.8949300205703222, "grad_norm": 0.0458984375, "learning_rate": 1.1678169172256055e-05, "loss": 4.4196, "step": 23440 }, { "epoch": 0.8953333602226435, "grad_norm": 0.0419921875, "learning_rate": 1.1671910419699206e-05, "loss": 4.4229, "step": 23450 }, { "epoch": 0.8957366998749647, "grad_norm": 0.046875, "learning_rate": 1.1665650993315943e-05, "loss": 4.4196, "step": 23460 }, { "epoch": 0.8961400395272859, "grad_norm": 0.04248046875, "learning_rate": 1.1659390895628979e-05, "loss": 4.4322, "step": 23470 }, { "epoch": 0.8965433791796071, "grad_norm": 0.0439453125, "learning_rate": 1.1653130129161317e-05, "loss": 4.4315, "step": 23480 }, { "epoch": 0.8969467188319283, "grad_norm": 0.043701171875, "learning_rate": 1.1646868696436218e-05, "loss": 4.4106, "step": 23490 }, { "epoch": 0.8973500584842495, "grad_norm": 0.04345703125, "learning_rate": 1.164060659997721e-05, "loss": 4.4524, "step": 23500 }, { "epoch": 0.8977533981365708, "grad_norm": 0.046875, "learning_rate": 1.1634343842308098e-05, "loss": 4.425, "step": 23510 }, { "epoch": 0.898156737788892, "grad_norm": 0.0439453125, "learning_rate": 1.1628080425952944e-05, "loss": 4.4247, "step": 23520 }, { "epoch": 0.8985600774412132, "grad_norm": 0.041748046875, "learning_rate": 1.1621816353436078e-05, "loss": 4.4076, "step": 23530 }, { "epoch": 0.8989634170935344, "grad_norm": 0.04345703125, "learning_rate": 1.1615551627282098e-05, "loss": 4.3897, "step": 23540 }, { "epoch": 0.8993667567458556, "grad_norm": 0.046142578125, "learning_rate": 1.1609286250015861e-05, "loss": 4.4184, "step": 23550 }, { "epoch": 0.899770096398177, "grad_norm": 0.045654296875, "learning_rate": 1.160302022416249e-05, "loss": 4.409, "step": 23560 }, { "epoch": 0.9001734360504982, "grad_norm": 0.045654296875, "learning_rate": 1.1596753552247366e-05, "loss": 4.4085, "step": 23570 }, { "epoch": 0.9005767757028194, "grad_norm": 0.04296875, "learning_rate": 1.1590486236796135e-05, "loss": 4.4282, "step": 23580 }, { "epoch": 0.9009801153551406, "grad_norm": 0.046142578125, "learning_rate": 1.1584218280334696e-05, "loss": 4.4498, "step": 23590 }, { "epoch": 0.9013834550074618, "grad_norm": 0.044921875, "learning_rate": 1.1577949685389212e-05, "loss": 4.4578, "step": 23600 }, { "epoch": 0.901786794659783, "grad_norm": 0.046630859375, "learning_rate": 1.1571680454486102e-05, "loss": 4.4485, "step": 23610 }, { "epoch": 0.9021901343121043, "grad_norm": 0.0439453125, "learning_rate": 1.1565410590152037e-05, "loss": 4.4157, "step": 23620 }, { "epoch": 0.9025934739644255, "grad_norm": 0.0439453125, "learning_rate": 1.1559140094913952e-05, "loss": 4.408, "step": 23630 }, { "epoch": 0.9029968136167467, "grad_norm": 0.046875, "learning_rate": 1.1552868971299025e-05, "loss": 4.421, "step": 23640 }, { "epoch": 0.9034001532690679, "grad_norm": 0.048095703125, "learning_rate": 1.1546597221834697e-05, "loss": 4.4053, "step": 23650 }, { "epoch": 0.9038034929213891, "grad_norm": 0.046142578125, "learning_rate": 1.1540324849048655e-05, "loss": 4.4418, "step": 23660 }, { "epoch": 0.9042068325737104, "grad_norm": 0.04638671875, "learning_rate": 1.1534051855468844e-05, "loss": 4.3984, "step": 23670 }, { "epoch": 0.9046101722260316, "grad_norm": 0.044921875, "learning_rate": 1.1527778243623453e-05, "loss": 4.3782, "step": 23680 }, { "epoch": 0.9050135118783528, "grad_norm": 0.0478515625, "learning_rate": 1.1521504016040921e-05, "loss": 4.435, "step": 23690 }, { "epoch": 0.905416851530674, "grad_norm": 0.04443359375, "learning_rate": 1.1515229175249938e-05, "loss": 4.445, "step": 23700 }, { "epoch": 0.9058201911829952, "grad_norm": 0.047119140625, "learning_rate": 1.1508953723779437e-05, "loss": 4.3963, "step": 23710 }, { "epoch": 0.9062235308353164, "grad_norm": 0.045166015625, "learning_rate": 1.1502677664158603e-05, "loss": 4.3941, "step": 23720 }, { "epoch": 0.9066268704876377, "grad_norm": 0.047119140625, "learning_rate": 1.1496400998916857e-05, "loss": 4.4396, "step": 23730 }, { "epoch": 0.9070302101399589, "grad_norm": 0.046630859375, "learning_rate": 1.1490123730583876e-05, "loss": 4.3913, "step": 23740 }, { "epoch": 0.9074335497922801, "grad_norm": 0.042236328125, "learning_rate": 1.1483845861689568e-05, "loss": 4.4093, "step": 23750 }, { "epoch": 0.9078368894446013, "grad_norm": 0.044189453125, "learning_rate": 1.1477567394764096e-05, "loss": 4.3802, "step": 23760 }, { "epoch": 0.9082402290969225, "grad_norm": 0.04541015625, "learning_rate": 1.1471288332337851e-05, "loss": 4.3722, "step": 23770 }, { "epoch": 0.9086435687492438, "grad_norm": 0.04443359375, "learning_rate": 1.1465008676941472e-05, "loss": 4.4329, "step": 23780 }, { "epoch": 0.909046908401565, "grad_norm": 0.044189453125, "learning_rate": 1.1458728431105834e-05, "loss": 4.4334, "step": 23790 }, { "epoch": 0.9094502480538862, "grad_norm": 0.046875, "learning_rate": 1.1452447597362054e-05, "loss": 4.4154, "step": 23800 }, { "epoch": 0.9098535877062074, "grad_norm": 0.045654296875, "learning_rate": 1.1446166178241476e-05, "loss": 4.3753, "step": 23810 }, { "epoch": 0.9102569273585286, "grad_norm": 0.04443359375, "learning_rate": 1.1439884176275693e-05, "loss": 4.3838, "step": 23820 }, { "epoch": 0.9106602670108498, "grad_norm": 0.040771484375, "learning_rate": 1.1433601593996521e-05, "loss": 4.4136, "step": 23830 }, { "epoch": 0.9110636066631711, "grad_norm": 0.044921875, "learning_rate": 1.1427318433936023e-05, "loss": 4.405, "step": 23840 }, { "epoch": 0.9114669463154923, "grad_norm": 0.04541015625, "learning_rate": 1.1421034698626482e-05, "loss": 4.4619, "step": 23850 }, { "epoch": 0.9118702859678135, "grad_norm": 0.043701171875, "learning_rate": 1.1414750390600417e-05, "loss": 4.4026, "step": 23860 }, { "epoch": 0.9122736256201347, "grad_norm": 0.046142578125, "learning_rate": 1.1408465512390588e-05, "loss": 4.3981, "step": 23870 }, { "epoch": 0.9126769652724559, "grad_norm": 0.045654296875, "learning_rate": 1.1402180066529962e-05, "loss": 4.4093, "step": 23880 }, { "epoch": 0.9130803049247772, "grad_norm": 0.04541015625, "learning_rate": 1.1395894055551759e-05, "loss": 4.3777, "step": 23890 }, { "epoch": 0.9134836445770984, "grad_norm": 0.0458984375, "learning_rate": 1.138960748198941e-05, "loss": 4.4053, "step": 23900 }, { "epoch": 0.9138869842294196, "grad_norm": 0.04736328125, "learning_rate": 1.1383320348376587e-05, "loss": 4.4078, "step": 23910 }, { "epoch": 0.9142903238817408, "grad_norm": 0.043701171875, "learning_rate": 1.1377032657247171e-05, "loss": 4.4207, "step": 23920 }, { "epoch": 0.914693663534062, "grad_norm": 0.0458984375, "learning_rate": 1.137074441113528e-05, "loss": 4.404, "step": 23930 }, { "epoch": 0.9150970031863832, "grad_norm": 0.04736328125, "learning_rate": 1.1364455612575254e-05, "loss": 4.4296, "step": 23940 }, { "epoch": 0.9155003428387045, "grad_norm": 0.04345703125, "learning_rate": 1.135816626410165e-05, "loss": 4.4219, "step": 23950 }, { "epoch": 0.9159036824910257, "grad_norm": 0.0439453125, "learning_rate": 1.1351876368249258e-05, "loss": 4.3934, "step": 23960 }, { "epoch": 0.9163070221433469, "grad_norm": 0.046142578125, "learning_rate": 1.1345585927553069e-05, "loss": 4.3964, "step": 23970 }, { "epoch": 0.9167103617956681, "grad_norm": 0.044189453125, "learning_rate": 1.1339294944548315e-05, "loss": 4.4018, "step": 23980 }, { "epoch": 0.9171137014479893, "grad_norm": 0.0439453125, "learning_rate": 1.1333003421770435e-05, "loss": 4.4097, "step": 23990 }, { "epoch": 0.9175170411003106, "grad_norm": 0.04345703125, "learning_rate": 1.132671136175509e-05, "loss": 4.4303, "step": 24000 }, { "epoch": 0.9179203807526318, "grad_norm": 0.043701171875, "learning_rate": 1.1320418767038146e-05, "loss": 4.4264, "step": 24010 }, { "epoch": 0.918323720404953, "grad_norm": 0.045654296875, "learning_rate": 1.1314125640155705e-05, "loss": 4.3911, "step": 24020 }, { "epoch": 0.9187270600572742, "grad_norm": 0.0478515625, "learning_rate": 1.1307831983644064e-05, "loss": 4.4254, "step": 24030 }, { "epoch": 0.9191303997095954, "grad_norm": 0.045166015625, "learning_rate": 1.1301537800039749e-05, "loss": 4.4154, "step": 24040 }, { "epoch": 0.9195337393619166, "grad_norm": 0.044189453125, "learning_rate": 1.1295243091879484e-05, "loss": 4.4128, "step": 24050 }, { "epoch": 0.9199370790142379, "grad_norm": 0.04638671875, "learning_rate": 1.1288947861700217e-05, "loss": 4.4014, "step": 24060 }, { "epoch": 0.9203404186665591, "grad_norm": 0.044921875, "learning_rate": 1.12826521120391e-05, "loss": 4.3921, "step": 24070 }, { "epoch": 0.9207437583188803, "grad_norm": 0.044677734375, "learning_rate": 1.1276355845433491e-05, "loss": 4.3851, "step": 24080 }, { "epoch": 0.9211470979712015, "grad_norm": 0.0419921875, "learning_rate": 1.1270059064420966e-05, "loss": 4.4032, "step": 24090 }, { "epoch": 0.9215504376235227, "grad_norm": 0.0439453125, "learning_rate": 1.1263761771539297e-05, "loss": 4.4517, "step": 24100 }, { "epoch": 0.9219537772758439, "grad_norm": 0.043701171875, "learning_rate": 1.1257463969326472e-05, "loss": 4.3969, "step": 24110 }, { "epoch": 0.9223571169281652, "grad_norm": 0.046875, "learning_rate": 1.1251165660320683e-05, "loss": 4.3876, "step": 24120 }, { "epoch": 0.9227604565804864, "grad_norm": 0.043701171875, "learning_rate": 1.124486684706032e-05, "loss": 4.3916, "step": 24130 }, { "epoch": 0.9231637962328076, "grad_norm": 0.043701171875, "learning_rate": 1.1238567532083982e-05, "loss": 4.4108, "step": 24140 }, { "epoch": 0.9235671358851288, "grad_norm": 0.044677734375, "learning_rate": 1.1232267717930468e-05, "loss": 4.4031, "step": 24150 }, { "epoch": 0.92397047553745, "grad_norm": 0.045166015625, "learning_rate": 1.1225967407138773e-05, "loss": 4.4299, "step": 24160 }, { "epoch": 0.9243738151897714, "grad_norm": 0.04296875, "learning_rate": 1.1219666602248107e-05, "loss": 4.4066, "step": 24170 }, { "epoch": 0.9247771548420926, "grad_norm": 0.044921875, "learning_rate": 1.1213365305797865e-05, "loss": 4.4028, "step": 24180 }, { "epoch": 0.9251804944944138, "grad_norm": 0.044921875, "learning_rate": 1.1207063520327644e-05, "loss": 4.4098, "step": 24190 }, { "epoch": 0.925583834146735, "grad_norm": 0.04541015625, "learning_rate": 1.1200761248377236e-05, "loss": 4.4153, "step": 24200 }, { "epoch": 0.9259871737990562, "grad_norm": 0.0419921875, "learning_rate": 1.1194458492486637e-05, "loss": 4.3952, "step": 24210 }, { "epoch": 0.9263905134513774, "grad_norm": 0.04248046875, "learning_rate": 1.1188155255196034e-05, "loss": 4.3966, "step": 24220 }, { "epoch": 0.9267938531036987, "grad_norm": 0.046875, "learning_rate": 1.1181851539045803e-05, "loss": 4.4142, "step": 24230 }, { "epoch": 0.9271971927560199, "grad_norm": 0.0478515625, "learning_rate": 1.1175547346576519e-05, "loss": 4.4462, "step": 24240 }, { "epoch": 0.9276005324083411, "grad_norm": 0.0458984375, "learning_rate": 1.1169242680328946e-05, "loss": 4.393, "step": 24250 }, { "epoch": 0.9280038720606623, "grad_norm": 0.044189453125, "learning_rate": 1.1162937542844042e-05, "loss": 4.3979, "step": 24260 }, { "epoch": 0.9284072117129835, "grad_norm": 0.0439453125, "learning_rate": 1.1156631936662949e-05, "loss": 4.4215, "step": 24270 }, { "epoch": 0.9288105513653048, "grad_norm": 0.045166015625, "learning_rate": 1.1150325864327003e-05, "loss": 4.4167, "step": 24280 }, { "epoch": 0.929213891017626, "grad_norm": 0.045654296875, "learning_rate": 1.114401932837773e-05, "loss": 4.3932, "step": 24290 }, { "epoch": 0.9296172306699472, "grad_norm": 0.043701171875, "learning_rate": 1.1137712331356836e-05, "loss": 4.4349, "step": 24300 }, { "epoch": 0.9300205703222684, "grad_norm": 0.046875, "learning_rate": 1.113140487580622e-05, "loss": 4.402, "step": 24310 }, { "epoch": 0.9304239099745896, "grad_norm": 0.04443359375, "learning_rate": 1.1125096964267955e-05, "loss": 4.407, "step": 24320 }, { "epoch": 0.9308272496269108, "grad_norm": 0.04638671875, "learning_rate": 1.1118788599284314e-05, "loss": 4.4357, "step": 24330 }, { "epoch": 0.9312305892792321, "grad_norm": 0.046142578125, "learning_rate": 1.1112479783397734e-05, "loss": 4.4122, "step": 24340 }, { "epoch": 0.9316339289315533, "grad_norm": 0.0458984375, "learning_rate": 1.1106170519150852e-05, "loss": 4.4547, "step": 24350 }, { "epoch": 0.9320372685838745, "grad_norm": 0.044677734375, "learning_rate": 1.1099860809086472e-05, "loss": 4.3762, "step": 24360 }, { "epoch": 0.9324406082361957, "grad_norm": 0.043701171875, "learning_rate": 1.1093550655747583e-05, "loss": 4.4151, "step": 24370 }, { "epoch": 0.9328439478885169, "grad_norm": 0.044677734375, "learning_rate": 1.1087240061677353e-05, "loss": 4.4228, "step": 24380 }, { "epoch": 0.9332472875408382, "grad_norm": 0.04736328125, "learning_rate": 1.108092902941913e-05, "loss": 4.4209, "step": 24390 }, { "epoch": 0.9336506271931594, "grad_norm": 0.04443359375, "learning_rate": 1.107461756151643e-05, "loss": 4.3799, "step": 24400 }, { "epoch": 0.9340539668454806, "grad_norm": 0.045654296875, "learning_rate": 1.1068305660512954e-05, "loss": 4.3847, "step": 24410 }, { "epoch": 0.9344573064978018, "grad_norm": 0.04248046875, "learning_rate": 1.1061993328952576e-05, "loss": 4.4438, "step": 24420 }, { "epoch": 0.934860646150123, "grad_norm": 0.045654296875, "learning_rate": 1.1055680569379335e-05, "loss": 4.4033, "step": 24430 }, { "epoch": 0.9352639858024442, "grad_norm": 0.044189453125, "learning_rate": 1.1049367384337452e-05, "loss": 4.4359, "step": 24440 }, { "epoch": 0.9356673254547655, "grad_norm": 0.045166015625, "learning_rate": 1.1043053776371315e-05, "loss": 4.4033, "step": 24450 }, { "epoch": 0.9360706651070867, "grad_norm": 0.045654296875, "learning_rate": 1.1036739748025484e-05, "loss": 4.4224, "step": 24460 }, { "epoch": 0.9364740047594079, "grad_norm": 0.044921875, "learning_rate": 1.1030425301844692e-05, "loss": 4.4209, "step": 24470 }, { "epoch": 0.9368773444117291, "grad_norm": 0.046630859375, "learning_rate": 1.1024110440373833e-05, "loss": 4.4079, "step": 24480 }, { "epoch": 0.9372806840640503, "grad_norm": 0.044189453125, "learning_rate": 1.101779516615797e-05, "loss": 4.3951, "step": 24490 }, { "epoch": 0.9376840237163716, "grad_norm": 0.0439453125, "learning_rate": 1.1011479481742337e-05, "loss": 4.4224, "step": 24500 }, { "epoch": 0.9380873633686928, "grad_norm": 0.0439453125, "learning_rate": 1.1005163389672332e-05, "loss": 4.4094, "step": 24510 }, { "epoch": 0.938490703021014, "grad_norm": 0.0458984375, "learning_rate": 1.0998846892493512e-05, "loss": 4.3931, "step": 24520 }, { "epoch": 0.9388940426733352, "grad_norm": 0.043701171875, "learning_rate": 1.0992529992751604e-05, "loss": 4.4191, "step": 24530 }, { "epoch": 0.9392973823256564, "grad_norm": 0.044189453125, "learning_rate": 1.0986212692992492e-05, "loss": 4.4255, "step": 24540 }, { "epoch": 0.9397007219779776, "grad_norm": 0.04443359375, "learning_rate": 1.0979894995762224e-05, "loss": 4.4058, "step": 24550 }, { "epoch": 0.9401040616302989, "grad_norm": 0.044921875, "learning_rate": 1.0973576903607008e-05, "loss": 4.382, "step": 24560 }, { "epoch": 0.9405074012826201, "grad_norm": 0.046630859375, "learning_rate": 1.0967258419073218e-05, "loss": 4.4464, "step": 24570 }, { "epoch": 0.9409107409349413, "grad_norm": 0.04296875, "learning_rate": 1.0960939544707365e-05, "loss": 4.3983, "step": 24580 }, { "epoch": 0.9413140805872625, "grad_norm": 0.047119140625, "learning_rate": 1.0954620283056144e-05, "loss": 4.4083, "step": 24590 }, { "epoch": 0.9417174202395837, "grad_norm": 0.0439453125, "learning_rate": 1.0948300636666385e-05, "loss": 4.3879, "step": 24600 }, { "epoch": 0.942120759891905, "grad_norm": 0.045654296875, "learning_rate": 1.0941980608085085e-05, "loss": 4.4291, "step": 24610 }, { "epoch": 0.9425240995442262, "grad_norm": 0.044921875, "learning_rate": 1.093566019985939e-05, "loss": 4.4293, "step": 24620 }, { "epoch": 0.9429274391965474, "grad_norm": 0.0458984375, "learning_rate": 1.0929339414536604e-05, "loss": 4.4188, "step": 24630 }, { "epoch": 0.9433307788488686, "grad_norm": 0.04541015625, "learning_rate": 1.0923018254664174e-05, "loss": 4.3924, "step": 24640 }, { "epoch": 0.9437341185011898, "grad_norm": 0.04736328125, "learning_rate": 1.0916696722789706e-05, "loss": 4.4281, "step": 24650 }, { "epoch": 0.944137458153511, "grad_norm": 0.0458984375, "learning_rate": 1.0910374821460953e-05, "loss": 4.4119, "step": 24660 }, { "epoch": 0.9445407978058323, "grad_norm": 0.0439453125, "learning_rate": 1.0904052553225817e-05, "loss": 4.3981, "step": 24670 }, { "epoch": 0.9449441374581535, "grad_norm": 0.045166015625, "learning_rate": 1.089772992063235e-05, "loss": 4.4177, "step": 24680 }, { "epoch": 0.9453474771104747, "grad_norm": 0.044921875, "learning_rate": 1.0891406926228744e-05, "loss": 4.3855, "step": 24690 }, { "epoch": 0.9457508167627959, "grad_norm": 0.045166015625, "learning_rate": 1.088508357256335e-05, "loss": 4.4252, "step": 24700 }, { "epoch": 0.9461541564151171, "grad_norm": 0.045166015625, "learning_rate": 1.0878759862184645e-05, "loss": 4.4089, "step": 24710 }, { "epoch": 0.9465574960674384, "grad_norm": 0.0439453125, "learning_rate": 1.0872435797641267e-05, "loss": 4.4507, "step": 24720 }, { "epoch": 0.9469608357197596, "grad_norm": 0.04443359375, "learning_rate": 1.0866111381481987e-05, "loss": 4.39, "step": 24730 }, { "epoch": 0.9473641753720808, "grad_norm": 0.0439453125, "learning_rate": 1.0859786616255723e-05, "loss": 4.4443, "step": 24740 }, { "epoch": 0.947767515024402, "grad_norm": 0.04443359375, "learning_rate": 1.085346150451153e-05, "loss": 4.4248, "step": 24750 }, { "epoch": 0.9481708546767232, "grad_norm": 0.044677734375, "learning_rate": 1.0847136048798605e-05, "loss": 4.4304, "step": 24760 }, { "epoch": 0.9485741943290444, "grad_norm": 0.04248046875, "learning_rate": 1.0840810251666283e-05, "loss": 4.4432, "step": 24770 }, { "epoch": 0.9489775339813658, "grad_norm": 0.04443359375, "learning_rate": 1.0834484115664035e-05, "loss": 4.421, "step": 24780 }, { "epoch": 0.949380873633687, "grad_norm": 0.045166015625, "learning_rate": 1.0828157643341467e-05, "loss": 4.4599, "step": 24790 }, { "epoch": 0.9497842132860081, "grad_norm": 0.04296875, "learning_rate": 1.082183083724833e-05, "loss": 4.4128, "step": 24800 }, { "epoch": 0.9501875529383293, "grad_norm": 0.04443359375, "learning_rate": 1.0815503699934497e-05, "loss": 4.4233, "step": 24810 }, { "epoch": 0.9505908925906505, "grad_norm": 0.044921875, "learning_rate": 1.0809176233949986e-05, "loss": 4.4285, "step": 24820 }, { "epoch": 0.9509942322429719, "grad_norm": 0.046142578125, "learning_rate": 1.0802848441844936e-05, "loss": 4.4004, "step": 24830 }, { "epoch": 0.9513975718952931, "grad_norm": 0.045654296875, "learning_rate": 1.0796520326169624e-05, "loss": 4.4124, "step": 24840 }, { "epoch": 0.9518009115476143, "grad_norm": 0.04638671875, "learning_rate": 1.0790191889474464e-05, "loss": 4.427, "step": 24850 }, { "epoch": 0.9522042511999355, "grad_norm": 0.043701171875, "learning_rate": 1.0783863134309982e-05, "loss": 4.4032, "step": 24860 }, { "epoch": 0.9526075908522567, "grad_norm": 0.04443359375, "learning_rate": 1.0777534063226852e-05, "loss": 4.4265, "step": 24870 }, { "epoch": 0.9530109305045779, "grad_norm": 0.046142578125, "learning_rate": 1.0771204678775855e-05, "loss": 4.3936, "step": 24880 }, { "epoch": 0.9534142701568992, "grad_norm": 0.04541015625, "learning_rate": 1.0764874983507919e-05, "loss": 4.4314, "step": 24890 }, { "epoch": 0.9538176098092204, "grad_norm": 0.044921875, "learning_rate": 1.075854497997408e-05, "loss": 4.4247, "step": 24900 }, { "epoch": 0.9542209494615416, "grad_norm": 0.0458984375, "learning_rate": 1.075221467072551e-05, "loss": 4.3823, "step": 24910 }, { "epoch": 0.9546242891138628, "grad_norm": 0.043212890625, "learning_rate": 1.0745884058313498e-05, "loss": 4.3845, "step": 24920 }, { "epoch": 0.955027628766184, "grad_norm": 0.04296875, "learning_rate": 1.0739553145289459e-05, "loss": 4.408, "step": 24930 }, { "epoch": 0.9554309684185053, "grad_norm": 0.043701171875, "learning_rate": 1.0733221934204925e-05, "loss": 4.3919, "step": 24940 }, { "epoch": 0.9558343080708265, "grad_norm": 0.0458984375, "learning_rate": 1.072689042761155e-05, "loss": 4.4149, "step": 24950 }, { "epoch": 0.9562376477231477, "grad_norm": 0.04443359375, "learning_rate": 1.0720558628061109e-05, "loss": 4.4169, "step": 24960 }, { "epoch": 0.9566409873754689, "grad_norm": 0.045654296875, "learning_rate": 1.071422653810549e-05, "loss": 4.3931, "step": 24970 }, { "epoch": 0.9570443270277901, "grad_norm": 0.04443359375, "learning_rate": 1.0707894160296708e-05, "loss": 4.4145, "step": 24980 }, { "epoch": 0.9574476666801113, "grad_norm": 0.04443359375, "learning_rate": 1.0701561497186879e-05, "loss": 4.4124, "step": 24990 }, { "epoch": 0.9578510063324326, "grad_norm": 0.046875, "learning_rate": 1.0695228551328247e-05, "loss": 4.4177, "step": 25000 }, { "epoch": 0.9582543459847538, "grad_norm": 0.04443359375, "learning_rate": 1.0688895325273169e-05, "loss": 4.4417, "step": 25010 }, { "epoch": 0.958657685637075, "grad_norm": 0.045166015625, "learning_rate": 1.0682561821574105e-05, "loss": 4.4152, "step": 25020 }, { "epoch": 0.9590610252893962, "grad_norm": 0.043701171875, "learning_rate": 1.0676228042783637e-05, "loss": 4.411, "step": 25030 }, { "epoch": 0.9594643649417174, "grad_norm": 0.043212890625, "learning_rate": 1.066989399145445e-05, "loss": 4.3847, "step": 25040 }, { "epoch": 0.9598677045940387, "grad_norm": 0.04345703125, "learning_rate": 1.0663559670139349e-05, "loss": 4.4432, "step": 25050 }, { "epoch": 0.9602710442463599, "grad_norm": 0.045654296875, "learning_rate": 1.0657225081391235e-05, "loss": 4.4382, "step": 25060 }, { "epoch": 0.9606743838986811, "grad_norm": 0.046630859375, "learning_rate": 1.065089022776313e-05, "loss": 4.4395, "step": 25070 }, { "epoch": 0.9610777235510023, "grad_norm": 0.046630859375, "learning_rate": 1.064455511180815e-05, "loss": 4.3836, "step": 25080 }, { "epoch": 0.9614810632033235, "grad_norm": 0.04345703125, "learning_rate": 1.0638219736079528e-05, "loss": 4.426, "step": 25090 }, { "epoch": 0.9618844028556447, "grad_norm": 0.0439453125, "learning_rate": 1.0631884103130595e-05, "loss": 4.4206, "step": 25100 }, { "epoch": 0.962287742507966, "grad_norm": 0.048095703125, "learning_rate": 1.0625548215514787e-05, "loss": 4.4113, "step": 25110 }, { "epoch": 0.9626910821602872, "grad_norm": 0.0439453125, "learning_rate": 1.0619212075785643e-05, "loss": 4.3905, "step": 25120 }, { "epoch": 0.9630944218126084, "grad_norm": 0.043212890625, "learning_rate": 1.0612875686496805e-05, "loss": 4.4197, "step": 25130 }, { "epoch": 0.9634977614649296, "grad_norm": 0.044921875, "learning_rate": 1.0606539050202012e-05, "loss": 4.4197, "step": 25140 }, { "epoch": 0.9639011011172508, "grad_norm": 0.046875, "learning_rate": 1.0600202169455105e-05, "loss": 4.3972, "step": 25150 }, { "epoch": 0.964304440769572, "grad_norm": 0.044189453125, "learning_rate": 1.0593865046810025e-05, "loss": 4.4553, "step": 25160 }, { "epoch": 0.9647077804218933, "grad_norm": 0.04443359375, "learning_rate": 1.0587527684820809e-05, "loss": 4.379, "step": 25170 }, { "epoch": 0.9651111200742145, "grad_norm": 0.051025390625, "learning_rate": 1.0581190086041584e-05, "loss": 4.4021, "step": 25180 }, { "epoch": 0.9655144597265357, "grad_norm": 0.04443359375, "learning_rate": 1.0574852253026586e-05, "loss": 4.3809, "step": 25190 }, { "epoch": 0.9659177993788569, "grad_norm": 0.04736328125, "learning_rate": 1.056851418833014e-05, "loss": 4.4166, "step": 25200 }, { "epoch": 0.9663211390311781, "grad_norm": 0.04296875, "learning_rate": 1.0562175894506652e-05, "loss": 4.3942, "step": 25210 }, { "epoch": 0.9667244786834994, "grad_norm": 0.04443359375, "learning_rate": 1.055583737411064e-05, "loss": 4.431, "step": 25220 }, { "epoch": 0.9671278183358206, "grad_norm": 0.045166015625, "learning_rate": 1.05494986296967e-05, "loss": 4.4283, "step": 25230 }, { "epoch": 0.9675311579881418, "grad_norm": 0.0439453125, "learning_rate": 1.0543159663819525e-05, "loss": 4.3906, "step": 25240 }, { "epoch": 0.967934497640463, "grad_norm": 0.044189453125, "learning_rate": 1.0536820479033893e-05, "loss": 4.4038, "step": 25250 }, { "epoch": 0.9683378372927842, "grad_norm": 0.04541015625, "learning_rate": 1.053048107789467e-05, "loss": 4.4119, "step": 25260 }, { "epoch": 0.9687411769451054, "grad_norm": 0.04345703125, "learning_rate": 1.0524141462956811e-05, "loss": 4.3955, "step": 25270 }, { "epoch": 0.9691445165974267, "grad_norm": 0.046630859375, "learning_rate": 1.051780163677536e-05, "loss": 4.4041, "step": 25280 }, { "epoch": 0.9695478562497479, "grad_norm": 0.04443359375, "learning_rate": 1.0511461601905444e-05, "loss": 4.4104, "step": 25290 }, { "epoch": 0.9699511959020691, "grad_norm": 0.045654296875, "learning_rate": 1.0505121360902271e-05, "loss": 4.4316, "step": 25300 }, { "epoch": 0.9703545355543903, "grad_norm": 0.0458984375, "learning_rate": 1.0498780916321136e-05, "loss": 4.3685, "step": 25310 }, { "epoch": 0.9707578752067115, "grad_norm": 0.045166015625, "learning_rate": 1.0492440270717413e-05, "loss": 4.4434, "step": 25320 }, { "epoch": 0.9711612148590328, "grad_norm": 0.04443359375, "learning_rate": 1.048609942664656e-05, "loss": 4.3993, "step": 25330 }, { "epoch": 0.971564554511354, "grad_norm": 0.047607421875, "learning_rate": 1.0479758386664112e-05, "loss": 4.4139, "step": 25340 }, { "epoch": 0.9719678941636752, "grad_norm": 0.0458984375, "learning_rate": 1.0473417153325686e-05, "loss": 4.4462, "step": 25350 }, { "epoch": 0.9723712338159964, "grad_norm": 0.045166015625, "learning_rate": 1.0467075729186975e-05, "loss": 4.4206, "step": 25360 }, { "epoch": 0.9727745734683176, "grad_norm": 0.044677734375, "learning_rate": 1.046073411680375e-05, "loss": 4.4288, "step": 25370 }, { "epoch": 0.9731779131206388, "grad_norm": 0.04541015625, "learning_rate": 1.0454392318731858e-05, "loss": 4.4538, "step": 25380 }, { "epoch": 0.9735812527729601, "grad_norm": 0.046875, "learning_rate": 1.0448050337527216e-05, "loss": 4.3976, "step": 25390 }, { "epoch": 0.9739845924252813, "grad_norm": 0.04541015625, "learning_rate": 1.0441708175745824e-05, "loss": 4.4068, "step": 25400 }, { "epoch": 0.9743879320776025, "grad_norm": 0.045166015625, "learning_rate": 1.0435365835943745e-05, "loss": 4.4091, "step": 25410 }, { "epoch": 0.9747912717299237, "grad_norm": 0.046630859375, "learning_rate": 1.0429023320677125e-05, "loss": 4.4149, "step": 25420 }, { "epoch": 0.975194611382245, "grad_norm": 0.04345703125, "learning_rate": 1.0422680632502165e-05, "loss": 4.431, "step": 25430 }, { "epoch": 0.9755979510345663, "grad_norm": 0.043212890625, "learning_rate": 1.0416337773975151e-05, "loss": 4.3804, "step": 25440 }, { "epoch": 0.9760012906868875, "grad_norm": 0.045654296875, "learning_rate": 1.040999474765243e-05, "loss": 4.427, "step": 25450 }, { "epoch": 0.9764046303392087, "grad_norm": 0.044189453125, "learning_rate": 1.0403651556090419e-05, "loss": 4.4277, "step": 25460 }, { "epoch": 0.9768079699915299, "grad_norm": 0.04541015625, "learning_rate": 1.03973082018456e-05, "loss": 4.4513, "step": 25470 }, { "epoch": 0.9772113096438511, "grad_norm": 0.042724609375, "learning_rate": 1.0390964687474521e-05, "loss": 4.4471, "step": 25480 }, { "epoch": 0.9776146492961723, "grad_norm": 0.04296875, "learning_rate": 1.0384621015533796e-05, "loss": 4.376, "step": 25490 }, { "epoch": 0.9780179889484936, "grad_norm": 0.042724609375, "learning_rate": 1.0378277188580099e-05, "loss": 4.4139, "step": 25500 }, { "epoch": 0.9784213286008148, "grad_norm": 0.04736328125, "learning_rate": 1.037193320917017e-05, "loss": 4.4504, "step": 25510 }, { "epoch": 0.978824668253136, "grad_norm": 0.0439453125, "learning_rate": 1.036558907986081e-05, "loss": 4.4178, "step": 25520 }, { "epoch": 0.9792280079054572, "grad_norm": 0.045166015625, "learning_rate": 1.0359244803208876e-05, "loss": 4.4434, "step": 25530 }, { "epoch": 0.9796313475577784, "grad_norm": 0.046630859375, "learning_rate": 1.0352900381771294e-05, "loss": 4.3798, "step": 25540 }, { "epoch": 0.9800346872100997, "grad_norm": 0.046142578125, "learning_rate": 1.0346555818105038e-05, "loss": 4.4288, "step": 25550 }, { "epoch": 0.9804380268624209, "grad_norm": 0.046630859375, "learning_rate": 1.0340211114767145e-05, "loss": 4.3997, "step": 25560 }, { "epoch": 0.9808413665147421, "grad_norm": 0.04443359375, "learning_rate": 1.0333866274314708e-05, "loss": 4.401, "step": 25570 }, { "epoch": 0.9812447061670633, "grad_norm": 0.044921875, "learning_rate": 1.032752129930487e-05, "loss": 4.4236, "step": 25580 }, { "epoch": 0.9816480458193845, "grad_norm": 0.044921875, "learning_rate": 1.0321176192294839e-05, "loss": 4.426, "step": 25590 }, { "epoch": 0.9820513854717057, "grad_norm": 0.044921875, "learning_rate": 1.0314830955841868e-05, "loss": 4.4045, "step": 25600 }, { "epoch": 0.982454725124027, "grad_norm": 0.044189453125, "learning_rate": 1.0308485592503257e-05, "loss": 4.4227, "step": 25610 }, { "epoch": 0.9828580647763482, "grad_norm": 0.044921875, "learning_rate": 1.0302140104836372e-05, "loss": 4.4067, "step": 25620 }, { "epoch": 0.9832614044286694, "grad_norm": 0.042724609375, "learning_rate": 1.0295794495398614e-05, "loss": 4.4113, "step": 25630 }, { "epoch": 0.9836647440809906, "grad_norm": 0.046142578125, "learning_rate": 1.028944876674745e-05, "loss": 4.3905, "step": 25640 }, { "epoch": 0.9840680837333118, "grad_norm": 0.045654296875, "learning_rate": 1.0283102921440376e-05, "loss": 4.3818, "step": 25650 }, { "epoch": 0.9844714233856331, "grad_norm": 0.047607421875, "learning_rate": 1.027675696203495e-05, "loss": 4.3996, "step": 25660 }, { "epoch": 0.9848747630379543, "grad_norm": 0.0458984375, "learning_rate": 1.0270410891088767e-05, "loss": 4.4321, "step": 25670 }, { "epoch": 0.9852781026902755, "grad_norm": 0.046630859375, "learning_rate": 1.0264064711159471e-05, "loss": 4.3969, "step": 25680 }, { "epoch": 0.9856814423425967, "grad_norm": 0.044921875, "learning_rate": 1.0257718424804747e-05, "loss": 4.4101, "step": 25690 }, { "epoch": 0.9860847819949179, "grad_norm": 0.0458984375, "learning_rate": 1.0251372034582332e-05, "loss": 4.3989, "step": 25700 }, { "epoch": 0.9864881216472391, "grad_norm": 0.048095703125, "learning_rate": 1.024502554304999e-05, "loss": 4.4186, "step": 25710 }, { "epoch": 0.9868914612995604, "grad_norm": 0.0419921875, "learning_rate": 1.0238678952765535e-05, "loss": 4.4185, "step": 25720 }, { "epoch": 0.9872948009518816, "grad_norm": 0.044921875, "learning_rate": 1.0232332266286824e-05, "loss": 4.4087, "step": 25730 }, { "epoch": 0.9876981406042028, "grad_norm": 0.04248046875, "learning_rate": 1.0225985486171745e-05, "loss": 4.4226, "step": 25740 }, { "epoch": 0.988101480256524, "grad_norm": 0.043701171875, "learning_rate": 1.0219638614978227e-05, "loss": 4.4529, "step": 25750 }, { "epoch": 0.9885048199088452, "grad_norm": 0.04638671875, "learning_rate": 1.0213291655264235e-05, "loss": 4.4218, "step": 25760 }, { "epoch": 0.9889081595611665, "grad_norm": 0.04443359375, "learning_rate": 1.0206944609587775e-05, "loss": 4.4042, "step": 25770 }, { "epoch": 0.9893114992134877, "grad_norm": 0.044189453125, "learning_rate": 1.0200597480506875e-05, "loss": 4.4006, "step": 25780 }, { "epoch": 0.9897148388658089, "grad_norm": 0.046875, "learning_rate": 1.0194250270579613e-05, "loss": 4.408, "step": 25790 }, { "epoch": 0.9901181785181301, "grad_norm": 0.044189453125, "learning_rate": 1.0187902982364084e-05, "loss": 4.3921, "step": 25800 }, { "epoch": 0.9905215181704513, "grad_norm": 0.046875, "learning_rate": 1.0181555618418424e-05, "loss": 4.4087, "step": 25810 }, { "epoch": 0.9909248578227725, "grad_norm": 0.04541015625, "learning_rate": 1.01752081813008e-05, "loss": 4.4248, "step": 25820 }, { "epoch": 0.9913281974750938, "grad_norm": 0.042236328125, "learning_rate": 1.0168860673569404e-05, "loss": 4.4278, "step": 25830 }, { "epoch": 0.991731537127415, "grad_norm": 0.044921875, "learning_rate": 1.0162513097782463e-05, "loss": 4.422, "step": 25840 }, { "epoch": 0.9921348767797362, "grad_norm": 0.04443359375, "learning_rate": 1.0156165456498214e-05, "loss": 4.3904, "step": 25850 }, { "epoch": 0.9925382164320574, "grad_norm": 0.045654296875, "learning_rate": 1.0149817752274947e-05, "loss": 4.4334, "step": 25860 }, { "epoch": 0.9929415560843786, "grad_norm": 0.04443359375, "learning_rate": 1.0143469987670953e-05, "loss": 4.4248, "step": 25870 }, { "epoch": 0.9933448957366999, "grad_norm": 0.042724609375, "learning_rate": 1.0137122165244566e-05, "loss": 4.4224, "step": 25880 }, { "epoch": 0.9937482353890211, "grad_norm": 0.04345703125, "learning_rate": 1.0130774287554128e-05, "loss": 4.393, "step": 25890 }, { "epoch": 0.9941515750413423, "grad_norm": 0.044677734375, "learning_rate": 1.0124426357158015e-05, "loss": 4.4084, "step": 25900 }, { "epoch": 0.9945549146936635, "grad_norm": 0.045166015625, "learning_rate": 1.0118078376614615e-05, "loss": 4.4182, "step": 25910 }, { "epoch": 0.9949582543459847, "grad_norm": 0.043212890625, "learning_rate": 1.0111730348482348e-05, "loss": 4.4151, "step": 25920 }, { "epoch": 0.9953615939983059, "grad_norm": 0.043212890625, "learning_rate": 1.0105382275319639e-05, "loss": 4.4422, "step": 25930 }, { "epoch": 0.9957649336506272, "grad_norm": 0.044189453125, "learning_rate": 1.0099034159684946e-05, "loss": 4.3965, "step": 25940 }, { "epoch": 0.9961682733029484, "grad_norm": 0.04736328125, "learning_rate": 1.0092686004136727e-05, "loss": 4.3959, "step": 25950 }, { "epoch": 0.9965716129552696, "grad_norm": 0.042236328125, "learning_rate": 1.008633781123347e-05, "loss": 4.3795, "step": 25960 }, { "epoch": 0.9969749526075908, "grad_norm": 0.04638671875, "learning_rate": 1.0079989583533674e-05, "loss": 4.4011, "step": 25970 }, { "epoch": 0.997378292259912, "grad_norm": 0.04443359375, "learning_rate": 1.007364132359585e-05, "loss": 4.4216, "step": 25980 }, { "epoch": 0.9977816319122333, "grad_norm": 0.045166015625, "learning_rate": 1.006729303397852e-05, "loss": 4.4049, "step": 25990 }, { "epoch": 0.9981849715645545, "grad_norm": 0.044921875, "learning_rate": 1.0060944717240226e-05, "loss": 4.4239, "step": 26000 }, { "epoch": 0.9985883112168757, "grad_norm": 0.045166015625, "learning_rate": 1.005459637593952e-05, "loss": 4.4158, "step": 26010 }, { "epoch": 0.998991650869197, "grad_norm": 0.046142578125, "learning_rate": 1.0048248012634951e-05, "loss": 4.4219, "step": 26020 }, { "epoch": 0.9993949905215181, "grad_norm": 0.044189453125, "learning_rate": 1.0041899629885094e-05, "loss": 4.435, "step": 26030 }, { "epoch": 0.9997983301738393, "grad_norm": 0.044189453125, "learning_rate": 1.0035551230248517e-05, "loss": 4.4395, "step": 26040 }, { "epoch": 1.0002016698261607, "grad_norm": 0.043701171875, "learning_rate": 1.0029202816283807e-05, "loss": 4.3979, "step": 26050 }, { "epoch": 1.0006050094784817, "grad_norm": 0.04638671875, "learning_rate": 1.0022854390549547e-05, "loss": 4.3996, "step": 26060 }, { "epoch": 1.001008349130803, "grad_norm": 0.0458984375, "learning_rate": 1.0016505955604335e-05, "loss": 4.4037, "step": 26070 }, { "epoch": 1.0014116887831244, "grad_norm": 0.042724609375, "learning_rate": 1.0010157514006762e-05, "loss": 4.393, "step": 26080 }, { "epoch": 1.0018150284354455, "grad_norm": 0.045654296875, "learning_rate": 1.0003809068315428e-05, "loss": 4.3819, "step": 26090 }, { "epoch": 1.0022183680877668, "grad_norm": 0.04541015625, "learning_rate": 9.997460621088934e-06, "loss": 4.4138, "step": 26100 }, { "epoch": 1.0026217077400879, "grad_norm": 0.044189453125, "learning_rate": 9.991112174885882e-06, "loss": 4.4344, "step": 26110 }, { "epoch": 1.0030250473924092, "grad_norm": 0.047119140625, "learning_rate": 9.984763732264869e-06, "loss": 4.4317, "step": 26120 }, { "epoch": 1.0034283870447305, "grad_norm": 0.0439453125, "learning_rate": 9.978415295784495e-06, "loss": 4.3915, "step": 26130 }, { "epoch": 1.0038317266970516, "grad_norm": 0.0439453125, "learning_rate": 9.972066868003358e-06, "loss": 4.432, "step": 26140 }, { "epoch": 1.0042350663493729, "grad_norm": 0.04638671875, "learning_rate": 9.965718451480053e-06, "loss": 4.382, "step": 26150 }, { "epoch": 1.004638406001694, "grad_norm": 0.0478515625, "learning_rate": 9.959370048773161e-06, "loss": 4.4273, "step": 26160 }, { "epoch": 1.0050417456540153, "grad_norm": 0.044189453125, "learning_rate": 9.953021662441273e-06, "loss": 4.3881, "step": 26170 }, { "epoch": 1.0054450853063364, "grad_norm": 0.04833984375, "learning_rate": 9.946673295042958e-06, "loss": 4.402, "step": 26180 }, { "epoch": 1.0058484249586577, "grad_norm": 0.045654296875, "learning_rate": 9.940324949136797e-06, "loss": 4.4178, "step": 26190 }, { "epoch": 1.006251764610979, "grad_norm": 0.04736328125, "learning_rate": 9.933976627281336e-06, "loss": 4.4485, "step": 26200 }, { "epoch": 1.0066551042633, "grad_norm": 0.0478515625, "learning_rate": 9.927628332035133e-06, "loss": 4.4058, "step": 26210 }, { "epoch": 1.0070584439156214, "grad_norm": 0.047119140625, "learning_rate": 9.921280065956727e-06, "loss": 4.4325, "step": 26220 }, { "epoch": 1.0074617835679425, "grad_norm": 0.04443359375, "learning_rate": 9.914931831604642e-06, "loss": 4.4118, "step": 26230 }, { "epoch": 1.0078651232202638, "grad_norm": 0.04150390625, "learning_rate": 9.908583631537397e-06, "loss": 4.3888, "step": 26240 }, { "epoch": 1.008268462872585, "grad_norm": 0.045654296875, "learning_rate": 9.90223546831349e-06, "loss": 4.3967, "step": 26250 }, { "epoch": 1.0086718025249062, "grad_norm": 0.04345703125, "learning_rate": 9.895887344491412e-06, "loss": 4.4284, "step": 26260 }, { "epoch": 1.0090751421772275, "grad_norm": 0.04443359375, "learning_rate": 9.88953926262963e-06, "loss": 4.3973, "step": 26270 }, { "epoch": 1.0094784818295486, "grad_norm": 0.045166015625, "learning_rate": 9.883191225286597e-06, "loss": 4.4218, "step": 26280 }, { "epoch": 1.00988182148187, "grad_norm": 0.044921875, "learning_rate": 9.876843235020753e-06, "loss": 4.392, "step": 26290 }, { "epoch": 1.0102851611341912, "grad_norm": 0.043212890625, "learning_rate": 9.870495294390513e-06, "loss": 4.4108, "step": 26300 }, { "epoch": 1.0106885007865123, "grad_norm": 0.042724609375, "learning_rate": 9.86414740595427e-06, "loss": 4.4537, "step": 26310 }, { "epoch": 1.0110918404388336, "grad_norm": 0.043212890625, "learning_rate": 9.8577995722704e-06, "loss": 4.4091, "step": 26320 }, { "epoch": 1.0114951800911547, "grad_norm": 0.044921875, "learning_rate": 9.851451795897265e-06, "loss": 4.4063, "step": 26330 }, { "epoch": 1.011898519743476, "grad_norm": 0.0439453125, "learning_rate": 9.845104079393185e-06, "loss": 4.3848, "step": 26340 }, { "epoch": 1.0123018593957973, "grad_norm": 0.048583984375, "learning_rate": 9.83875642531647e-06, "loss": 4.388, "step": 26350 }, { "epoch": 1.0127051990481184, "grad_norm": 0.044921875, "learning_rate": 9.832408836225403e-06, "loss": 4.4101, "step": 26360 }, { "epoch": 1.0131085387004397, "grad_norm": 0.043701171875, "learning_rate": 9.826061314678243e-06, "loss": 4.4145, "step": 26370 }, { "epoch": 1.0135118783527608, "grad_norm": 0.044677734375, "learning_rate": 9.819713863233207e-06, "loss": 4.4532, "step": 26380 }, { "epoch": 1.013915218005082, "grad_norm": 0.044189453125, "learning_rate": 9.8133664844485e-06, "loss": 4.4021, "step": 26390 }, { "epoch": 1.0143185576574032, "grad_norm": 0.04296875, "learning_rate": 9.807019180882295e-06, "loss": 4.3851, "step": 26400 }, { "epoch": 1.0147218973097245, "grad_norm": 0.04736328125, "learning_rate": 9.800671955092732e-06, "loss": 4.4146, "step": 26410 }, { "epoch": 1.0151252369620458, "grad_norm": 0.0458984375, "learning_rate": 9.794324809637912e-06, "loss": 4.4362, "step": 26420 }, { "epoch": 1.015528576614367, "grad_norm": 0.04443359375, "learning_rate": 9.787977747075918e-06, "loss": 4.412, "step": 26430 }, { "epoch": 1.0159319162666882, "grad_norm": 0.0458984375, "learning_rate": 9.781630769964792e-06, "loss": 4.3828, "step": 26440 }, { "epoch": 1.0163352559190093, "grad_norm": 0.047119140625, "learning_rate": 9.77528388086254e-06, "loss": 4.4022, "step": 26450 }, { "epoch": 1.0167385955713306, "grad_norm": 0.043701171875, "learning_rate": 9.768937082327137e-06, "loss": 4.401, "step": 26460 }, { "epoch": 1.017141935223652, "grad_norm": 0.04833984375, "learning_rate": 9.762590376916519e-06, "loss": 4.3768, "step": 26470 }, { "epoch": 1.017545274875973, "grad_norm": 0.046630859375, "learning_rate": 9.756243767188588e-06, "loss": 4.377, "step": 26480 }, { "epoch": 1.0179486145282943, "grad_norm": 0.04541015625, "learning_rate": 9.749897255701196e-06, "loss": 4.4004, "step": 26490 }, { "epoch": 1.0183519541806154, "grad_norm": 0.0439453125, "learning_rate": 9.743550845012172e-06, "loss": 4.4329, "step": 26500 }, { "epoch": 1.0187552938329367, "grad_norm": 0.044921875, "learning_rate": 9.73720453767929e-06, "loss": 4.4321, "step": 26510 }, { "epoch": 1.019158633485258, "grad_norm": 0.044677734375, "learning_rate": 9.730858336260293e-06, "loss": 4.3985, "step": 26520 }, { "epoch": 1.0195619731375791, "grad_norm": 0.048095703125, "learning_rate": 9.724512243312873e-06, "loss": 4.3982, "step": 26530 }, { "epoch": 1.0199653127899004, "grad_norm": 0.04541015625, "learning_rate": 9.718166261394684e-06, "loss": 4.3995, "step": 26540 }, { "epoch": 1.0203686524422215, "grad_norm": 0.044677734375, "learning_rate": 9.711820393063337e-06, "loss": 4.4107, "step": 26550 }, { "epoch": 1.0207719920945428, "grad_norm": 0.043701171875, "learning_rate": 9.705474640876388e-06, "loss": 4.3595, "step": 26560 }, { "epoch": 1.0211753317468641, "grad_norm": 0.043701171875, "learning_rate": 9.699129007391351e-06, "loss": 4.4188, "step": 26570 }, { "epoch": 1.0215786713991852, "grad_norm": 0.04443359375, "learning_rate": 9.692783495165696e-06, "loss": 4.4033, "step": 26580 }, { "epoch": 1.0219820110515065, "grad_norm": 0.0439453125, "learning_rate": 9.686438106756846e-06, "loss": 4.3915, "step": 26590 }, { "epoch": 1.0223853507038276, "grad_norm": 0.042724609375, "learning_rate": 9.680092844722157e-06, "loss": 4.4125, "step": 26600 }, { "epoch": 1.022788690356149, "grad_norm": 0.045654296875, "learning_rate": 9.673747711618954e-06, "loss": 4.4068, "step": 26610 }, { "epoch": 1.02319203000847, "grad_norm": 0.042236328125, "learning_rate": 9.6674027100045e-06, "loss": 4.3956, "step": 26620 }, { "epoch": 1.0235953696607913, "grad_norm": 0.045654296875, "learning_rate": 9.66105784243601e-06, "loss": 4.3865, "step": 26630 }, { "epoch": 1.0239987093131127, "grad_norm": 0.046875, "learning_rate": 9.654713111470638e-06, "loss": 4.4351, "step": 26640 }, { "epoch": 1.0244020489654337, "grad_norm": 0.046142578125, "learning_rate": 9.648368519665489e-06, "loss": 4.3968, "step": 26650 }, { "epoch": 1.024805388617755, "grad_norm": 0.046142578125, "learning_rate": 9.642024069577611e-06, "loss": 4.4275, "step": 26660 }, { "epoch": 1.0252087282700761, "grad_norm": 0.044921875, "learning_rate": 9.635679763763997e-06, "loss": 4.3888, "step": 26670 }, { "epoch": 1.0256120679223975, "grad_norm": 0.044677734375, "learning_rate": 9.62933560478157e-06, "loss": 4.4219, "step": 26680 }, { "epoch": 1.0260154075747188, "grad_norm": 0.045166015625, "learning_rate": 9.622991595187207e-06, "loss": 4.4185, "step": 26690 }, { "epoch": 1.0264187472270399, "grad_norm": 0.045654296875, "learning_rate": 9.616647737537724e-06, "loss": 4.3841, "step": 26700 }, { "epoch": 1.0268220868793612, "grad_norm": 0.042724609375, "learning_rate": 9.610304034389867e-06, "loss": 4.3874, "step": 26710 }, { "epoch": 1.0272254265316823, "grad_norm": 0.045166015625, "learning_rate": 9.603960488300327e-06, "loss": 4.3871, "step": 26720 }, { "epoch": 1.0276287661840036, "grad_norm": 0.043701171875, "learning_rate": 9.59761710182573e-06, "loss": 4.3723, "step": 26730 }, { "epoch": 1.0280321058363249, "grad_norm": 0.044189453125, "learning_rate": 9.591273877522639e-06, "loss": 4.3945, "step": 26740 }, { "epoch": 1.028435445488646, "grad_norm": 0.045654296875, "learning_rate": 9.584930817947544e-06, "loss": 4.4227, "step": 26750 }, { "epoch": 1.0288387851409673, "grad_norm": 0.04833984375, "learning_rate": 9.578587925656878e-06, "loss": 4.4187, "step": 26760 }, { "epoch": 1.0292421247932884, "grad_norm": 0.044189453125, "learning_rate": 9.572245203207003e-06, "loss": 4.3918, "step": 26770 }, { "epoch": 1.0296454644456097, "grad_norm": 0.044189453125, "learning_rate": 9.565902653154215e-06, "loss": 4.4054, "step": 26780 }, { "epoch": 1.0300488040979308, "grad_norm": 0.0439453125, "learning_rate": 9.559560278054734e-06, "loss": 4.4021, "step": 26790 }, { "epoch": 1.030452143750252, "grad_norm": 0.045654296875, "learning_rate": 9.553218080464716e-06, "loss": 4.4218, "step": 26800 }, { "epoch": 1.0308554834025734, "grad_norm": 0.044189453125, "learning_rate": 9.546876062940246e-06, "loss": 4.4203, "step": 26810 }, { "epoch": 1.0312588230548945, "grad_norm": 0.046630859375, "learning_rate": 9.540534228037327e-06, "loss": 4.4394, "step": 26820 }, { "epoch": 1.0316621627072158, "grad_norm": 0.048583984375, "learning_rate": 9.534192578311896e-06, "loss": 4.3875, "step": 26830 }, { "epoch": 1.0320655023595369, "grad_norm": 0.043701171875, "learning_rate": 9.527851116319818e-06, "loss": 4.3633, "step": 26840 }, { "epoch": 1.0324688420118582, "grad_norm": 0.042724609375, "learning_rate": 9.52150984461688e-06, "loss": 4.4353, "step": 26850 }, { "epoch": 1.0328721816641795, "grad_norm": 0.046142578125, "learning_rate": 9.515168765758785e-06, "loss": 4.4013, "step": 26860 }, { "epoch": 1.0332755213165006, "grad_norm": 0.043701171875, "learning_rate": 9.508827882301166e-06, "loss": 4.3869, "step": 26870 }, { "epoch": 1.0336788609688219, "grad_norm": 0.044921875, "learning_rate": 9.502487196799578e-06, "loss": 4.421, "step": 26880 }, { "epoch": 1.034082200621143, "grad_norm": 0.046630859375, "learning_rate": 9.496146711809493e-06, "loss": 4.4175, "step": 26890 }, { "epoch": 1.0344855402734643, "grad_norm": 0.05029296875, "learning_rate": 9.4898064298863e-06, "loss": 4.439, "step": 26900 }, { "epoch": 1.0348888799257856, "grad_norm": 0.043701171875, "learning_rate": 9.483466353585312e-06, "loss": 4.3671, "step": 26910 }, { "epoch": 1.0352922195781067, "grad_norm": 0.043701171875, "learning_rate": 9.477126485461759e-06, "loss": 4.4065, "step": 26920 }, { "epoch": 1.035695559230428, "grad_norm": 0.04736328125, "learning_rate": 9.470786828070777e-06, "loss": 4.4044, "step": 26930 }, { "epoch": 1.036098898882749, "grad_norm": 0.044677734375, "learning_rate": 9.464447383967427e-06, "loss": 4.4131, "step": 26940 }, { "epoch": 1.0365022385350704, "grad_norm": 0.04736328125, "learning_rate": 9.458108155706682e-06, "loss": 4.4371, "step": 26950 }, { "epoch": 1.0369055781873917, "grad_norm": 0.0458984375, "learning_rate": 9.45176914584343e-06, "loss": 4.4394, "step": 26960 }, { "epoch": 1.0373089178397128, "grad_norm": 0.045166015625, "learning_rate": 9.445430356932465e-06, "loss": 4.4138, "step": 26970 }, { "epoch": 1.037712257492034, "grad_norm": 0.04443359375, "learning_rate": 9.439091791528494e-06, "loss": 4.4303, "step": 26980 }, { "epoch": 1.0381155971443552, "grad_norm": 0.04345703125, "learning_rate": 9.432753452186139e-06, "loss": 4.3481, "step": 26990 }, { "epoch": 1.0385189367966765, "grad_norm": 0.0439453125, "learning_rate": 9.42641534145993e-06, "loss": 4.3904, "step": 27000 }, { "epoch": 1.0389222764489976, "grad_norm": 0.045166015625, "learning_rate": 9.420077461904292e-06, "loss": 4.4178, "step": 27010 }, { "epoch": 1.039325616101319, "grad_norm": 0.045654296875, "learning_rate": 9.413739816073572e-06, "loss": 4.4266, "step": 27020 }, { "epoch": 1.0397289557536402, "grad_norm": 0.047119140625, "learning_rate": 9.407402406522024e-06, "loss": 4.4267, "step": 27030 }, { "epoch": 1.0401322954059613, "grad_norm": 0.046875, "learning_rate": 9.40106523580379e-06, "loss": 4.4148, "step": 27040 }, { "epoch": 1.0405356350582826, "grad_norm": 0.044921875, "learning_rate": 9.394728306472929e-06, "loss": 4.4375, "step": 27050 }, { "epoch": 1.0409389747106037, "grad_norm": 0.049560546875, "learning_rate": 9.388391621083402e-06, "loss": 4.4134, "step": 27060 }, { "epoch": 1.041342314362925, "grad_norm": 0.045654296875, "learning_rate": 9.382055182189072e-06, "loss": 4.3652, "step": 27070 }, { "epoch": 1.0417456540152463, "grad_norm": 0.046142578125, "learning_rate": 9.375718992343694e-06, "loss": 4.442, "step": 27080 }, { "epoch": 1.0421489936675674, "grad_norm": 0.042724609375, "learning_rate": 9.36938305410093e-06, "loss": 4.3954, "step": 27090 }, { "epoch": 1.0425523333198887, "grad_norm": 0.044921875, "learning_rate": 9.363047370014342e-06, "loss": 4.4089, "step": 27100 }, { "epoch": 1.0429556729722098, "grad_norm": 0.047607421875, "learning_rate": 9.356711942637391e-06, "loss": 4.354, "step": 27110 }, { "epoch": 1.0433590126245311, "grad_norm": 0.04541015625, "learning_rate": 9.350376774523422e-06, "loss": 4.3869, "step": 27120 }, { "epoch": 1.0437623522768524, "grad_norm": 0.044921875, "learning_rate": 9.344041868225687e-06, "loss": 4.4183, "step": 27130 }, { "epoch": 1.0441656919291735, "grad_norm": 0.04638671875, "learning_rate": 9.33770722629733e-06, "loss": 4.3826, "step": 27140 }, { "epoch": 1.0445690315814948, "grad_norm": 0.0458984375, "learning_rate": 9.33137285129139e-06, "loss": 4.3885, "step": 27150 }, { "epoch": 1.044972371233816, "grad_norm": 0.047607421875, "learning_rate": 9.325038745760794e-06, "loss": 4.4329, "step": 27160 }, { "epoch": 1.0453757108861372, "grad_norm": 0.048828125, "learning_rate": 9.318704912258362e-06, "loss": 4.4352, "step": 27170 }, { "epoch": 1.0457790505384585, "grad_norm": 0.044921875, "learning_rate": 9.31237135333681e-06, "loss": 4.3914, "step": 27180 }, { "epoch": 1.0461823901907796, "grad_norm": 0.044921875, "learning_rate": 9.306038071548732e-06, "loss": 4.3922, "step": 27190 }, { "epoch": 1.046585729843101, "grad_norm": 0.04150390625, "learning_rate": 9.299705069446619e-06, "loss": 4.3899, "step": 27200 }, { "epoch": 1.046989069495422, "grad_norm": 0.047119140625, "learning_rate": 9.293372349582847e-06, "loss": 4.4005, "step": 27210 }, { "epoch": 1.0473924091477433, "grad_norm": 0.04296875, "learning_rate": 9.287039914509682e-06, "loss": 4.4033, "step": 27220 }, { "epoch": 1.0477957488000644, "grad_norm": 0.043701171875, "learning_rate": 9.280707766779267e-06, "loss": 4.4157, "step": 27230 }, { "epoch": 1.0481990884523857, "grad_norm": 0.044921875, "learning_rate": 9.274375908943634e-06, "loss": 4.3876, "step": 27240 }, { "epoch": 1.048602428104707, "grad_norm": 0.044677734375, "learning_rate": 9.2680443435547e-06, "loss": 4.4209, "step": 27250 }, { "epoch": 1.0490057677570281, "grad_norm": 0.0458984375, "learning_rate": 9.261713073164262e-06, "loss": 4.4199, "step": 27260 }, { "epoch": 1.0494091074093495, "grad_norm": 0.043701171875, "learning_rate": 9.255382100323997e-06, "loss": 4.3832, "step": 27270 }, { "epoch": 1.0498124470616705, "grad_norm": 0.0458984375, "learning_rate": 9.249051427585461e-06, "loss": 4.4268, "step": 27280 }, { "epoch": 1.0502157867139919, "grad_norm": 0.044189453125, "learning_rate": 9.242721057500101e-06, "loss": 4.4268, "step": 27290 }, { "epoch": 1.0506191263663132, "grad_norm": 0.045166015625, "learning_rate": 9.236390992619218e-06, "loss": 4.435, "step": 27300 }, { "epoch": 1.0510224660186342, "grad_norm": 0.054443359375, "learning_rate": 9.230061235494013e-06, "loss": 4.3924, "step": 27310 }, { "epoch": 1.0514258056709556, "grad_norm": 0.046142578125, "learning_rate": 9.223731788675551e-06, "loss": 4.394, "step": 27320 }, { "epoch": 1.0518291453232766, "grad_norm": 0.045166015625, "learning_rate": 9.217402654714779e-06, "loss": 4.3841, "step": 27330 }, { "epoch": 1.052232484975598, "grad_norm": 0.047119140625, "learning_rate": 9.21107383616251e-06, "loss": 4.357, "step": 27340 }, { "epoch": 1.0526358246279193, "grad_norm": 0.0458984375, "learning_rate": 9.204745335569436e-06, "loss": 4.4162, "step": 27350 }, { "epoch": 1.0530391642802404, "grad_norm": 0.046630859375, "learning_rate": 9.198417155486116e-06, "loss": 4.4021, "step": 27360 }, { "epoch": 1.0534425039325617, "grad_norm": 0.047119140625, "learning_rate": 9.192089298462992e-06, "loss": 4.4296, "step": 27370 }, { "epoch": 1.0538458435848828, "grad_norm": 0.04736328125, "learning_rate": 9.185761767050356e-06, "loss": 4.4193, "step": 27380 }, { "epoch": 1.054249183237204, "grad_norm": 0.04443359375, "learning_rate": 9.179434563798381e-06, "loss": 4.3956, "step": 27390 }, { "epoch": 1.0546525228895254, "grad_norm": 0.047119140625, "learning_rate": 9.173107691257111e-06, "loss": 4.3964, "step": 27400 }, { "epoch": 1.0550558625418465, "grad_norm": 0.04638671875, "learning_rate": 9.166781151976447e-06, "loss": 4.4047, "step": 27410 }, { "epoch": 1.0554592021941678, "grad_norm": 0.042724609375, "learning_rate": 9.160454948506162e-06, "loss": 4.3869, "step": 27420 }, { "epoch": 1.0558625418464889, "grad_norm": 0.044921875, "learning_rate": 9.15412908339589e-06, "loss": 4.3973, "step": 27430 }, { "epoch": 1.0562658814988102, "grad_norm": 0.043701171875, "learning_rate": 9.147803559195137e-06, "loss": 4.4149, "step": 27440 }, { "epoch": 1.0566692211511313, "grad_norm": 0.044189453125, "learning_rate": 9.141478378453257e-06, "loss": 4.4091, "step": 27450 }, { "epoch": 1.0570725608034526, "grad_norm": 0.0439453125, "learning_rate": 9.135153543719477e-06, "loss": 4.4261, "step": 27460 }, { "epoch": 1.0574759004557739, "grad_norm": 0.045654296875, "learning_rate": 9.128829057542882e-06, "loss": 4.4123, "step": 27470 }, { "epoch": 1.057879240108095, "grad_norm": 0.0458984375, "learning_rate": 9.122504922472421e-06, "loss": 4.4434, "step": 27480 }, { "epoch": 1.0582825797604163, "grad_norm": 0.0439453125, "learning_rate": 9.116181141056887e-06, "loss": 4.4459, "step": 27490 }, { "epoch": 1.0586859194127374, "grad_norm": 0.04541015625, "learning_rate": 9.109857715844943e-06, "loss": 4.4067, "step": 27500 }, { "epoch": 1.0590892590650587, "grad_norm": 0.042236328125, "learning_rate": 9.10353464938511e-06, "loss": 4.436, "step": 27510 }, { "epoch": 1.05949259871738, "grad_norm": 0.04541015625, "learning_rate": 9.097211944225753e-06, "loss": 4.4257, "step": 27520 }, { "epoch": 1.059895938369701, "grad_norm": 0.044189453125, "learning_rate": 9.0908896029151e-06, "loss": 4.4009, "step": 27530 }, { "epoch": 1.0602992780220224, "grad_norm": 0.04345703125, "learning_rate": 9.084567628001234e-06, "loss": 4.3961, "step": 27540 }, { "epoch": 1.0607026176743435, "grad_norm": 0.0478515625, "learning_rate": 9.078246022032087e-06, "loss": 4.3885, "step": 27550 }, { "epoch": 1.0611059573266648, "grad_norm": 0.044921875, "learning_rate": 9.071924787555436e-06, "loss": 4.4014, "step": 27560 }, { "epoch": 1.061509296978986, "grad_norm": 0.044921875, "learning_rate": 9.065603927118918e-06, "loss": 4.4013, "step": 27570 }, { "epoch": 1.0619126366313072, "grad_norm": 0.045654296875, "learning_rate": 9.059283443270015e-06, "loss": 4.4064, "step": 27580 }, { "epoch": 1.0623159762836285, "grad_norm": 0.04541015625, "learning_rate": 9.052963338556062e-06, "loss": 4.4222, "step": 27590 }, { "epoch": 1.0627193159359496, "grad_norm": 0.045166015625, "learning_rate": 9.04664361552423e-06, "loss": 4.4067, "step": 27600 }, { "epoch": 1.063122655588271, "grad_norm": 0.042236328125, "learning_rate": 9.040324276721547e-06, "loss": 4.3938, "step": 27610 }, { "epoch": 1.063525995240592, "grad_norm": 0.046142578125, "learning_rate": 9.034005324694885e-06, "loss": 4.4732, "step": 27620 }, { "epoch": 1.0639293348929133, "grad_norm": 0.044921875, "learning_rate": 9.027686761990953e-06, "loss": 4.4167, "step": 27630 }, { "epoch": 1.0643326745452346, "grad_norm": 0.04443359375, "learning_rate": 9.021368591156307e-06, "loss": 4.4095, "step": 27640 }, { "epoch": 1.0647360141975557, "grad_norm": 0.04541015625, "learning_rate": 9.015050814737349e-06, "loss": 4.4079, "step": 27650 }, { "epoch": 1.065139353849877, "grad_norm": 0.041748046875, "learning_rate": 9.00873343528032e-06, "loss": 4.4124, "step": 27660 }, { "epoch": 1.065542693502198, "grad_norm": 0.04541015625, "learning_rate": 9.002416455331293e-06, "loss": 4.4578, "step": 27670 }, { "epoch": 1.0659460331545194, "grad_norm": 0.0478515625, "learning_rate": 8.996099877436191e-06, "loss": 4.4144, "step": 27680 }, { "epoch": 1.0663493728068407, "grad_norm": 0.04638671875, "learning_rate": 8.989783704140773e-06, "loss": 4.4389, "step": 27690 }, { "epoch": 1.0667527124591618, "grad_norm": 0.043701171875, "learning_rate": 8.98346793799063e-06, "loss": 4.4013, "step": 27700 }, { "epoch": 1.0671560521114831, "grad_norm": 0.0439453125, "learning_rate": 8.977152581531188e-06, "loss": 4.4222, "step": 27710 }, { "epoch": 1.0675593917638042, "grad_norm": 0.045654296875, "learning_rate": 8.970837637307717e-06, "loss": 4.4193, "step": 27720 }, { "epoch": 1.0679627314161255, "grad_norm": 0.0458984375, "learning_rate": 8.964523107865312e-06, "loss": 4.4018, "step": 27730 }, { "epoch": 1.0683660710684468, "grad_norm": 0.04296875, "learning_rate": 8.958208995748909e-06, "loss": 4.4286, "step": 27740 }, { "epoch": 1.068769410720768, "grad_norm": 0.0439453125, "learning_rate": 8.951895303503263e-06, "loss": 4.4273, "step": 27750 }, { "epoch": 1.0691727503730892, "grad_norm": 0.044189453125, "learning_rate": 8.945582033672972e-06, "loss": 4.4312, "step": 27760 }, { "epoch": 1.0695760900254103, "grad_norm": 0.04541015625, "learning_rate": 8.939269188802463e-06, "loss": 4.3792, "step": 27770 }, { "epoch": 1.0699794296777316, "grad_norm": 0.042724609375, "learning_rate": 8.932956771435979e-06, "loss": 4.4067, "step": 27780 }, { "epoch": 1.070382769330053, "grad_norm": 0.044921875, "learning_rate": 8.926644784117605e-06, "loss": 4.3986, "step": 27790 }, { "epoch": 1.070786108982374, "grad_norm": 0.043701171875, "learning_rate": 8.920333229391249e-06, "loss": 4.4031, "step": 27800 }, { "epoch": 1.0711894486346953, "grad_norm": 0.04736328125, "learning_rate": 8.914022109800647e-06, "loss": 4.3804, "step": 27810 }, { "epoch": 1.0715927882870164, "grad_norm": 0.04443359375, "learning_rate": 8.907711427889343e-06, "loss": 4.4428, "step": 27820 }, { "epoch": 1.0719961279393377, "grad_norm": 0.046142578125, "learning_rate": 8.901401186200727e-06, "loss": 4.4112, "step": 27830 }, { "epoch": 1.072399467591659, "grad_norm": 0.044921875, "learning_rate": 8.895091387278e-06, "loss": 4.414, "step": 27840 }, { "epoch": 1.0728028072439801, "grad_norm": 0.04345703125, "learning_rate": 8.888782033664187e-06, "loss": 4.4031, "step": 27850 }, { "epoch": 1.0732061468963014, "grad_norm": 0.045166015625, "learning_rate": 8.882473127902132e-06, "loss": 4.4248, "step": 27860 }, { "epoch": 1.0736094865486225, "grad_norm": 0.044189453125, "learning_rate": 8.8761646725345e-06, "loss": 4.4318, "step": 27870 }, { "epoch": 1.0740128262009438, "grad_norm": 0.043212890625, "learning_rate": 8.869856670103776e-06, "loss": 4.4124, "step": 27880 }, { "epoch": 1.074416165853265, "grad_norm": 0.04296875, "learning_rate": 8.863549123152255e-06, "loss": 4.4054, "step": 27890 }, { "epoch": 1.0748195055055862, "grad_norm": 0.04345703125, "learning_rate": 8.85724203422206e-06, "loss": 4.4016, "step": 27900 }, { "epoch": 1.0752228451579076, "grad_norm": 0.043701171875, "learning_rate": 8.850935405855118e-06, "loss": 4.4003, "step": 27910 }, { "epoch": 1.0756261848102286, "grad_norm": 0.045654296875, "learning_rate": 8.844629240593184e-06, "loss": 4.3941, "step": 27920 }, { "epoch": 1.07602952446255, "grad_norm": 0.045654296875, "learning_rate": 8.838323540977806e-06, "loss": 4.4122, "step": 27930 }, { "epoch": 1.076432864114871, "grad_norm": 0.0458984375, "learning_rate": 8.832018309550366e-06, "loss": 4.4236, "step": 27940 }, { "epoch": 1.0768362037671924, "grad_norm": 0.04248046875, "learning_rate": 8.825713548852042e-06, "loss": 4.4191, "step": 27950 }, { "epoch": 1.0772395434195137, "grad_norm": 0.042236328125, "learning_rate": 8.819409261423834e-06, "loss": 4.4218, "step": 27960 }, { "epoch": 1.0776428830718348, "grad_norm": 0.0458984375, "learning_rate": 8.81310544980654e-06, "loss": 4.4107, "step": 27970 }, { "epoch": 1.078046222724156, "grad_norm": 0.04541015625, "learning_rate": 8.806802116540772e-06, "loss": 4.4115, "step": 27980 }, { "epoch": 1.0784495623764772, "grad_norm": 0.045654296875, "learning_rate": 8.800499264166956e-06, "loss": 4.3874, "step": 27990 }, { "epoch": 1.0788529020287985, "grad_norm": 0.04443359375, "learning_rate": 8.794196895225306e-06, "loss": 4.4315, "step": 28000 }, { "epoch": 1.0792562416811196, "grad_norm": 0.042724609375, "learning_rate": 8.787895012255856e-06, "loss": 4.3869, "step": 28010 }, { "epoch": 1.0796595813334409, "grad_norm": 0.0458984375, "learning_rate": 8.781593617798442e-06, "loss": 4.41, "step": 28020 }, { "epoch": 1.0800629209857622, "grad_norm": 0.04541015625, "learning_rate": 8.775292714392702e-06, "loss": 4.3812, "step": 28030 }, { "epoch": 1.0804662606380833, "grad_norm": 0.04541015625, "learning_rate": 8.76899230457807e-06, "loss": 4.4248, "step": 28040 }, { "epoch": 1.0808696002904046, "grad_norm": 0.045166015625, "learning_rate": 8.762692390893794e-06, "loss": 4.4066, "step": 28050 }, { "epoch": 1.0812729399427257, "grad_norm": 0.043701171875, "learning_rate": 8.756392975878908e-06, "loss": 4.3729, "step": 28060 }, { "epoch": 1.081676279595047, "grad_norm": 0.0419921875, "learning_rate": 8.75009406207226e-06, "loss": 4.376, "step": 28070 }, { "epoch": 1.0820796192473683, "grad_norm": 0.045166015625, "learning_rate": 8.743795652012475e-06, "loss": 4.4038, "step": 28080 }, { "epoch": 1.0824829588996894, "grad_norm": 0.0419921875, "learning_rate": 8.737497748237996e-06, "loss": 4.4161, "step": 28090 }, { "epoch": 1.0828862985520107, "grad_norm": 0.04248046875, "learning_rate": 8.731200353287056e-06, "loss": 4.3949, "step": 28100 }, { "epoch": 1.0832896382043318, "grad_norm": 0.044677734375, "learning_rate": 8.724903469697672e-06, "loss": 4.3891, "step": 28110 }, { "epoch": 1.083692977856653, "grad_norm": 0.0458984375, "learning_rate": 8.718607100007668e-06, "loss": 4.3565, "step": 28120 }, { "epoch": 1.0840963175089744, "grad_norm": 0.046630859375, "learning_rate": 8.712311246754653e-06, "loss": 4.4161, "step": 28130 }, { "epoch": 1.0844996571612955, "grad_norm": 0.0458984375, "learning_rate": 8.706015912476038e-06, "loss": 4.4269, "step": 28140 }, { "epoch": 1.0849029968136168, "grad_norm": 0.045654296875, "learning_rate": 8.69972109970901e-06, "loss": 4.4089, "step": 28150 }, { "epoch": 1.0853063364659379, "grad_norm": 0.044189453125, "learning_rate": 8.693426810990557e-06, "loss": 4.4167, "step": 28160 }, { "epoch": 1.0857096761182592, "grad_norm": 0.04248046875, "learning_rate": 8.687133048857454e-06, "loss": 4.429, "step": 28170 }, { "epoch": 1.0861130157705805, "grad_norm": 0.045654296875, "learning_rate": 8.680839815846265e-06, "loss": 4.4092, "step": 28180 }, { "epoch": 1.0865163554229016, "grad_norm": 0.046630859375, "learning_rate": 8.674547114493328e-06, "loss": 4.3838, "step": 28190 }, { "epoch": 1.086919695075223, "grad_norm": 0.044189453125, "learning_rate": 8.668254947334785e-06, "loss": 4.4414, "step": 28200 }, { "epoch": 1.087323034727544, "grad_norm": 0.0458984375, "learning_rate": 8.66196331690655e-06, "loss": 4.3889, "step": 28210 }, { "epoch": 1.0877263743798653, "grad_norm": 0.04150390625, "learning_rate": 8.655672225744329e-06, "loss": 4.3817, "step": 28220 }, { "epoch": 1.0881297140321866, "grad_norm": 0.04541015625, "learning_rate": 8.649381676383603e-06, "loss": 4.4051, "step": 28230 }, { "epoch": 1.0885330536845077, "grad_norm": 0.04541015625, "learning_rate": 8.643091671359643e-06, "loss": 4.4297, "step": 28240 }, { "epoch": 1.088936393336829, "grad_norm": 0.044921875, "learning_rate": 8.636802213207495e-06, "loss": 4.3878, "step": 28250 }, { "epoch": 1.08933973298915, "grad_norm": 0.046142578125, "learning_rate": 8.63051330446198e-06, "loss": 4.3898, "step": 28260 }, { "epoch": 1.0897430726414714, "grad_norm": 0.045166015625, "learning_rate": 8.624224947657707e-06, "loss": 4.3961, "step": 28270 }, { "epoch": 1.0901464122937925, "grad_norm": 0.046875, "learning_rate": 8.61793714532906e-06, "loss": 4.4038, "step": 28280 }, { "epoch": 1.0905497519461138, "grad_norm": 0.04296875, "learning_rate": 8.6116499000102e-06, "loss": 4.3736, "step": 28290 }, { "epoch": 1.0909530915984351, "grad_norm": 0.044677734375, "learning_rate": 8.605363214235057e-06, "loss": 4.4246, "step": 28300 }, { "epoch": 1.0913564312507562, "grad_norm": 0.045654296875, "learning_rate": 8.599077090537342e-06, "loss": 4.4446, "step": 28310 }, { "epoch": 1.0917597709030775, "grad_norm": 0.0458984375, "learning_rate": 8.592791531450537e-06, "loss": 4.398, "step": 28320 }, { "epoch": 1.0921631105553986, "grad_norm": 0.044677734375, "learning_rate": 8.586506539507901e-06, "loss": 4.421, "step": 28330 }, { "epoch": 1.09256645020772, "grad_norm": 0.043701171875, "learning_rate": 8.580222117242457e-06, "loss": 4.3873, "step": 28340 }, { "epoch": 1.0929697898600412, "grad_norm": 0.043212890625, "learning_rate": 8.573938267187e-06, "loss": 4.4129, "step": 28350 }, { "epoch": 1.0933731295123623, "grad_norm": 0.04345703125, "learning_rate": 8.567654991874106e-06, "loss": 4.4158, "step": 28360 }, { "epoch": 1.0937764691646836, "grad_norm": 0.045166015625, "learning_rate": 8.561372293836096e-06, "loss": 4.387, "step": 28370 }, { "epoch": 1.0941798088170047, "grad_norm": 0.044189453125, "learning_rate": 8.555090175605083e-06, "loss": 4.41, "step": 28380 }, { "epoch": 1.094583148469326, "grad_norm": 0.04638671875, "learning_rate": 8.548808639712928e-06, "loss": 4.412, "step": 28390 }, { "epoch": 1.0949864881216473, "grad_norm": 0.045654296875, "learning_rate": 8.542527688691273e-06, "loss": 4.4157, "step": 28400 }, { "epoch": 1.0953898277739684, "grad_norm": 0.046142578125, "learning_rate": 8.53624732507151e-06, "loss": 4.3484, "step": 28410 }, { "epoch": 1.0957931674262897, "grad_norm": 0.04736328125, "learning_rate": 8.5299675513848e-06, "loss": 4.4387, "step": 28420 }, { "epoch": 1.0961965070786108, "grad_norm": 0.048095703125, "learning_rate": 8.523688370162068e-06, "loss": 4.396, "step": 28430 }, { "epoch": 1.0965998467309321, "grad_norm": 0.045654296875, "learning_rate": 8.517409783934006e-06, "loss": 4.3939, "step": 28440 }, { "epoch": 1.0970031863832532, "grad_norm": 0.04345703125, "learning_rate": 8.511131795231045e-06, "loss": 4.4057, "step": 28450 }, { "epoch": 1.0974065260355745, "grad_norm": 0.045166015625, "learning_rate": 8.504854406583395e-06, "loss": 4.4527, "step": 28460 }, { "epoch": 1.0978098656878958, "grad_norm": 0.045654296875, "learning_rate": 8.498577620521025e-06, "loss": 4.4299, "step": 28470 }, { "epoch": 1.098213205340217, "grad_norm": 0.0458984375, "learning_rate": 8.492301439573644e-06, "loss": 4.4051, "step": 28480 }, { "epoch": 1.0986165449925382, "grad_norm": 0.04345703125, "learning_rate": 8.486025866270732e-06, "loss": 4.3919, "step": 28490 }, { "epoch": 1.0990198846448593, "grad_norm": 0.044921875, "learning_rate": 8.479750903141522e-06, "loss": 4.3918, "step": 28500 }, { "epoch": 1.0994232242971806, "grad_norm": 0.046875, "learning_rate": 8.473476552714999e-06, "loss": 4.4178, "step": 28510 }, { "epoch": 1.099826563949502, "grad_norm": 0.046875, "learning_rate": 8.467202817519896e-06, "loss": 4.3783, "step": 28520 }, { "epoch": 1.100229903601823, "grad_norm": 0.044921875, "learning_rate": 8.460929700084704e-06, "loss": 4.3906, "step": 28530 }, { "epoch": 1.1006332432541444, "grad_norm": 0.0458984375, "learning_rate": 8.454657202937666e-06, "loss": 4.4199, "step": 28540 }, { "epoch": 1.1010365829064654, "grad_norm": 0.045166015625, "learning_rate": 8.448385328606777e-06, "loss": 4.3971, "step": 28550 }, { "epoch": 1.1014399225587868, "grad_norm": 0.04638671875, "learning_rate": 8.442114079619765e-06, "loss": 4.3995, "step": 28560 }, { "epoch": 1.101843262211108, "grad_norm": 0.045654296875, "learning_rate": 8.435843458504127e-06, "loss": 4.3979, "step": 28570 }, { "epoch": 1.1022466018634292, "grad_norm": 0.046875, "learning_rate": 8.429573467787097e-06, "loss": 4.4441, "step": 28580 }, { "epoch": 1.1026499415157505, "grad_norm": 0.04541015625, "learning_rate": 8.423304109995653e-06, "loss": 4.3853, "step": 28590 }, { "epoch": 1.1030532811680716, "grad_norm": 0.053466796875, "learning_rate": 8.41703538765652e-06, "loss": 4.39, "step": 28600 }, { "epoch": 1.1034566208203929, "grad_norm": 0.044189453125, "learning_rate": 8.410767303296172e-06, "loss": 4.411, "step": 28610 }, { "epoch": 1.1038599604727142, "grad_norm": 0.044189453125, "learning_rate": 8.40449985944082e-06, "loss": 4.421, "step": 28620 }, { "epoch": 1.1042633001250353, "grad_norm": 0.043212890625, "learning_rate": 8.398233058616415e-06, "loss": 4.391, "step": 28630 }, { "epoch": 1.1046666397773566, "grad_norm": 0.04345703125, "learning_rate": 8.391966903348653e-06, "loss": 4.4013, "step": 28640 }, { "epoch": 1.1050699794296777, "grad_norm": 0.0439453125, "learning_rate": 8.38570139616297e-06, "loss": 4.4265, "step": 28650 }, { "epoch": 1.105473319081999, "grad_norm": 0.04443359375, "learning_rate": 8.379436539584543e-06, "loss": 4.4328, "step": 28660 }, { "epoch": 1.1058766587343203, "grad_norm": 0.045166015625, "learning_rate": 8.373172336138277e-06, "loss": 4.3786, "step": 28670 }, { "epoch": 1.1062799983866414, "grad_norm": 0.043212890625, "learning_rate": 8.366908788348824e-06, "loss": 4.3949, "step": 28680 }, { "epoch": 1.1066833380389627, "grad_norm": 0.044921875, "learning_rate": 8.360645898740574e-06, "loss": 4.3879, "step": 28690 }, { "epoch": 1.1070866776912838, "grad_norm": 0.045166015625, "learning_rate": 8.354383669837632e-06, "loss": 4.3895, "step": 28700 }, { "epoch": 1.107490017343605, "grad_norm": 0.049072265625, "learning_rate": 8.348122104163859e-06, "loss": 4.4323, "step": 28710 }, { "epoch": 1.1078933569959262, "grad_norm": 0.04443359375, "learning_rate": 8.34186120424284e-06, "loss": 4.4102, "step": 28720 }, { "epoch": 1.1082966966482475, "grad_norm": 0.048095703125, "learning_rate": 8.335600972597894e-06, "loss": 4.4491, "step": 28730 }, { "epoch": 1.1087000363005688, "grad_norm": 0.04345703125, "learning_rate": 8.32934141175206e-06, "loss": 4.3994, "step": 28740 }, { "epoch": 1.1091033759528899, "grad_norm": 0.04443359375, "learning_rate": 8.323082524228123e-06, "loss": 4.3888, "step": 28750 }, { "epoch": 1.1095067156052112, "grad_norm": 0.045166015625, "learning_rate": 8.316824312548587e-06, "loss": 4.3883, "step": 28760 }, { "epoch": 1.1099100552575323, "grad_norm": 0.044189453125, "learning_rate": 8.310566779235683e-06, "loss": 4.3823, "step": 28770 }, { "epoch": 1.1103133949098536, "grad_norm": 0.0439453125, "learning_rate": 8.304309926811373e-06, "loss": 4.4182, "step": 28780 }, { "epoch": 1.110716734562175, "grad_norm": 0.043701171875, "learning_rate": 8.298053757797344e-06, "loss": 4.4025, "step": 28790 }, { "epoch": 1.111120074214496, "grad_norm": 0.043701171875, "learning_rate": 8.291798274715004e-06, "loss": 4.3926, "step": 28800 }, { "epoch": 1.1115234138668173, "grad_norm": 0.04443359375, "learning_rate": 8.285543480085491e-06, "loss": 4.4391, "step": 28810 }, { "epoch": 1.1119267535191384, "grad_norm": 0.0458984375, "learning_rate": 8.279289376429653e-06, "loss": 4.4132, "step": 28820 }, { "epoch": 1.1123300931714597, "grad_norm": 0.04638671875, "learning_rate": 8.273035966268074e-06, "loss": 4.4008, "step": 28830 }, { "epoch": 1.1127334328237808, "grad_norm": 0.046142578125, "learning_rate": 8.266783252121051e-06, "loss": 4.3889, "step": 28840 }, { "epoch": 1.113136772476102, "grad_norm": 0.044677734375, "learning_rate": 8.2605312365086e-06, "loss": 4.4199, "step": 28850 }, { "epoch": 1.1135401121284234, "grad_norm": 0.04443359375, "learning_rate": 8.25427992195046e-06, "loss": 4.4014, "step": 28860 }, { "epoch": 1.1139434517807445, "grad_norm": 0.0458984375, "learning_rate": 8.248029310966084e-06, "loss": 4.4568, "step": 28870 }, { "epoch": 1.1143467914330658, "grad_norm": 0.04541015625, "learning_rate": 8.241779406074644e-06, "loss": 4.42, "step": 28880 }, { "epoch": 1.114750131085387, "grad_norm": 0.044921875, "learning_rate": 8.23553020979502e-06, "loss": 4.3701, "step": 28890 }, { "epoch": 1.1151534707377082, "grad_norm": 0.04638671875, "learning_rate": 8.229281724645815e-06, "loss": 4.4442, "step": 28900 }, { "epoch": 1.1155568103900295, "grad_norm": 0.04541015625, "learning_rate": 8.223033953145341e-06, "loss": 4.3869, "step": 28910 }, { "epoch": 1.1159601500423506, "grad_norm": 0.045166015625, "learning_rate": 8.216786897811631e-06, "loss": 4.4061, "step": 28920 }, { "epoch": 1.116363489694672, "grad_norm": 0.046875, "learning_rate": 8.210540561162412e-06, "loss": 4.3789, "step": 28930 }, { "epoch": 1.116766829346993, "grad_norm": 0.042236328125, "learning_rate": 8.204294945715137e-06, "loss": 4.394, "step": 28940 }, { "epoch": 1.1171701689993143, "grad_norm": 0.046142578125, "learning_rate": 8.198050053986963e-06, "loss": 4.4023, "step": 28950 }, { "epoch": 1.1175735086516356, "grad_norm": 0.044921875, "learning_rate": 8.19180588849475e-06, "loss": 4.3858, "step": 28960 }, { "epoch": 1.1179768483039567, "grad_norm": 0.045166015625, "learning_rate": 8.185562451755076e-06, "loss": 4.4283, "step": 28970 }, { "epoch": 1.118380187956278, "grad_norm": 0.044677734375, "learning_rate": 8.179319746284218e-06, "loss": 4.4112, "step": 28980 }, { "epoch": 1.1187835276085991, "grad_norm": 0.044189453125, "learning_rate": 8.173077774598164e-06, "loss": 4.4003, "step": 28990 }, { "epoch": 1.1191868672609204, "grad_norm": 0.046142578125, "learning_rate": 8.166836539212593e-06, "loss": 4.3773, "step": 29000 }, { "epoch": 1.1195902069132417, "grad_norm": 0.046142578125, "learning_rate": 8.160596042642903e-06, "loss": 4.4126, "step": 29010 }, { "epoch": 1.1199935465655628, "grad_norm": 0.045166015625, "learning_rate": 8.154356287404185e-06, "loss": 4.4301, "step": 29020 }, { "epoch": 1.1203968862178841, "grad_norm": 0.04443359375, "learning_rate": 8.14811727601124e-06, "loss": 4.4097, "step": 29030 }, { "epoch": 1.1208002258702052, "grad_norm": 0.046875, "learning_rate": 8.141879010978553e-06, "loss": 4.4171, "step": 29040 }, { "epoch": 1.1212035655225265, "grad_norm": 0.046875, "learning_rate": 8.135641494820328e-06, "loss": 4.4072, "step": 29050 }, { "epoch": 1.1216069051748478, "grad_norm": 0.04248046875, "learning_rate": 8.129404730050455e-06, "loss": 4.4349, "step": 29060 }, { "epoch": 1.122010244827169, "grad_norm": 0.044189453125, "learning_rate": 8.123168719182519e-06, "loss": 4.406, "step": 29070 }, { "epoch": 1.1224135844794902, "grad_norm": 0.04443359375, "learning_rate": 8.116933464729809e-06, "loss": 4.4236, "step": 29080 }, { "epoch": 1.1228169241318113, "grad_norm": 0.044921875, "learning_rate": 8.110698969205306e-06, "loss": 4.3915, "step": 29090 }, { "epoch": 1.1232202637841326, "grad_norm": 0.04150390625, "learning_rate": 8.104465235121688e-06, "loss": 4.3883, "step": 29100 }, { "epoch": 1.123623603436454, "grad_norm": 0.044677734375, "learning_rate": 8.098232264991317e-06, "loss": 4.41, "step": 29110 }, { "epoch": 1.124026943088775, "grad_norm": 0.0439453125, "learning_rate": 8.09200006132626e-06, "loss": 4.4051, "step": 29120 }, { "epoch": 1.1244302827410964, "grad_norm": 0.04443359375, "learning_rate": 8.085768626638265e-06, "loss": 4.4307, "step": 29130 }, { "epoch": 1.1248336223934174, "grad_norm": 0.049072265625, "learning_rate": 8.079537963438775e-06, "loss": 4.3981, "step": 29140 }, { "epoch": 1.1252369620457388, "grad_norm": 0.045166015625, "learning_rate": 8.073308074238917e-06, "loss": 4.4056, "step": 29150 }, { "epoch": 1.1256403016980598, "grad_norm": 0.046142578125, "learning_rate": 8.067078961549516e-06, "loss": 4.4315, "step": 29160 }, { "epoch": 1.1260436413503812, "grad_norm": 0.046630859375, "learning_rate": 8.060850627881074e-06, "loss": 4.414, "step": 29170 }, { "epoch": 1.1264469810027025, "grad_norm": 0.044189453125, "learning_rate": 8.054623075743779e-06, "loss": 4.394, "step": 29180 }, { "epoch": 1.1268503206550236, "grad_norm": 0.049072265625, "learning_rate": 8.048396307647513e-06, "loss": 4.4032, "step": 29190 }, { "epoch": 1.1272536603073449, "grad_norm": 0.042724609375, "learning_rate": 8.042170326101835e-06, "loss": 4.3924, "step": 29200 }, { "epoch": 1.127656999959666, "grad_norm": 0.043212890625, "learning_rate": 8.035945133615992e-06, "loss": 4.3882, "step": 29210 }, { "epoch": 1.1280603396119873, "grad_norm": 0.052490234375, "learning_rate": 8.029720732698905e-06, "loss": 4.4021, "step": 29220 }, { "epoch": 1.1284636792643084, "grad_norm": 0.04443359375, "learning_rate": 8.023497125859183e-06, "loss": 4.4061, "step": 29230 }, { "epoch": 1.1288670189166297, "grad_norm": 0.04541015625, "learning_rate": 8.01727431560511e-06, "loss": 4.4165, "step": 29240 }, { "epoch": 1.129270358568951, "grad_norm": 0.0458984375, "learning_rate": 8.01105230444466e-06, "loss": 4.4318, "step": 29250 }, { "epoch": 1.129673698221272, "grad_norm": 0.043701171875, "learning_rate": 8.004831094885467e-06, "loss": 4.4251, "step": 29260 }, { "epoch": 1.1300770378735934, "grad_norm": 0.046630859375, "learning_rate": 7.998610689434857e-06, "loss": 4.3862, "step": 29270 }, { "epoch": 1.1304803775259145, "grad_norm": 0.045166015625, "learning_rate": 7.992391090599822e-06, "loss": 4.3918, "step": 29280 }, { "epoch": 1.1308837171782358, "grad_norm": 0.04345703125, "learning_rate": 7.986172300887038e-06, "loss": 4.391, "step": 29290 }, { "epoch": 1.131287056830557, "grad_norm": 0.044921875, "learning_rate": 7.979954322802849e-06, "loss": 4.416, "step": 29300 }, { "epoch": 1.1316903964828782, "grad_norm": 0.046875, "learning_rate": 7.973737158853271e-06, "loss": 4.4173, "step": 29310 }, { "epoch": 1.1320937361351995, "grad_norm": 0.043212890625, "learning_rate": 7.967520811544e-06, "loss": 4.4299, "step": 29320 }, { "epoch": 1.1324970757875206, "grad_norm": 0.044189453125, "learning_rate": 7.961305283380388e-06, "loss": 4.4083, "step": 29330 }, { "epoch": 1.1329004154398419, "grad_norm": 0.0439453125, "learning_rate": 7.955090576867471e-06, "loss": 4.3994, "step": 29340 }, { "epoch": 1.1333037550921632, "grad_norm": 0.046875, "learning_rate": 7.948876694509947e-06, "loss": 4.4066, "step": 29350 }, { "epoch": 1.1337070947444843, "grad_norm": 0.044189453125, "learning_rate": 7.942663638812191e-06, "loss": 4.4145, "step": 29360 }, { "epoch": 1.1341104343968056, "grad_norm": 0.046875, "learning_rate": 7.936451412278225e-06, "loss": 4.4202, "step": 29370 }, { "epoch": 1.1345137740491267, "grad_norm": 0.044677734375, "learning_rate": 7.930240017411757e-06, "loss": 4.4057, "step": 29380 }, { "epoch": 1.134917113701448, "grad_norm": 0.04638671875, "learning_rate": 7.924029456716149e-06, "loss": 4.4032, "step": 29390 }, { "epoch": 1.1353204533537693, "grad_norm": 0.04443359375, "learning_rate": 7.917819732694432e-06, "loss": 4.3932, "step": 29400 }, { "epoch": 1.1357237930060904, "grad_norm": 0.042236328125, "learning_rate": 7.911610847849294e-06, "loss": 4.4141, "step": 29410 }, { "epoch": 1.1361271326584117, "grad_norm": 0.043701171875, "learning_rate": 7.905402804683092e-06, "loss": 4.4464, "step": 29420 }, { "epoch": 1.1365304723107328, "grad_norm": 0.04443359375, "learning_rate": 7.89919560569784e-06, "loss": 4.4012, "step": 29430 }, { "epoch": 1.136933811963054, "grad_norm": 0.044921875, "learning_rate": 7.892989253395209e-06, "loss": 4.3992, "step": 29440 }, { "epoch": 1.1373371516153754, "grad_norm": 0.045166015625, "learning_rate": 7.886783750276532e-06, "loss": 4.4006, "step": 29450 }, { "epoch": 1.1377404912676965, "grad_norm": 0.045166015625, "learning_rate": 7.880579098842802e-06, "loss": 4.4124, "step": 29460 }, { "epoch": 1.1381438309200178, "grad_norm": 0.044921875, "learning_rate": 7.874375301594665e-06, "loss": 4.4346, "step": 29470 }, { "epoch": 1.138547170572339, "grad_norm": 0.04736328125, "learning_rate": 7.868172361032424e-06, "loss": 4.4187, "step": 29480 }, { "epoch": 1.1389505102246602, "grad_norm": 0.045654296875, "learning_rate": 7.861970279656035e-06, "loss": 4.4056, "step": 29490 }, { "epoch": 1.1393538498769815, "grad_norm": 0.04541015625, "learning_rate": 7.855769059965112e-06, "loss": 4.402, "step": 29500 }, { "epoch": 1.1397571895293026, "grad_norm": 0.044677734375, "learning_rate": 7.849568704458921e-06, "loss": 4.4054, "step": 29510 }, { "epoch": 1.140160529181624, "grad_norm": 0.043701171875, "learning_rate": 7.84336921563637e-06, "loss": 4.4124, "step": 29520 }, { "epoch": 1.140563868833945, "grad_norm": 0.04638671875, "learning_rate": 7.83717059599603e-06, "loss": 4.3995, "step": 29530 }, { "epoch": 1.1409672084862663, "grad_norm": 0.043212890625, "learning_rate": 7.830972848036124e-06, "loss": 4.382, "step": 29540 }, { "epoch": 1.1413705481385876, "grad_norm": 0.045654296875, "learning_rate": 7.824775974254504e-06, "loss": 4.3839, "step": 29550 }, { "epoch": 1.1417738877909087, "grad_norm": 0.046875, "learning_rate": 7.81857997714869e-06, "loss": 4.4116, "step": 29560 }, { "epoch": 1.14217722744323, "grad_norm": 0.0439453125, "learning_rate": 7.812384859215839e-06, "loss": 4.4014, "step": 29570 }, { "epoch": 1.1425805670955511, "grad_norm": 0.04638671875, "learning_rate": 7.806190622952759e-06, "loss": 4.3933, "step": 29580 }, { "epoch": 1.1429839067478724, "grad_norm": 0.045654296875, "learning_rate": 7.799997270855895e-06, "loss": 4.4353, "step": 29590 }, { "epoch": 1.1433872464001935, "grad_norm": 0.0498046875, "learning_rate": 7.793804805421341e-06, "loss": 4.3746, "step": 29600 }, { "epoch": 1.1437905860525148, "grad_norm": 0.044921875, "learning_rate": 7.787613229144837e-06, "loss": 4.4092, "step": 29610 }, { "epoch": 1.1441939257048361, "grad_norm": 0.04443359375, "learning_rate": 7.781422544521762e-06, "loss": 4.3969, "step": 29620 }, { "epoch": 1.1445972653571572, "grad_norm": 0.046875, "learning_rate": 7.775232754047124e-06, "loss": 4.419, "step": 29630 }, { "epoch": 1.1450006050094785, "grad_norm": 0.044921875, "learning_rate": 7.76904386021559e-06, "loss": 4.4106, "step": 29640 }, { "epoch": 1.1454039446617996, "grad_norm": 0.04736328125, "learning_rate": 7.762855865521454e-06, "loss": 4.4131, "step": 29650 }, { "epoch": 1.145807284314121, "grad_norm": 0.044189453125, "learning_rate": 7.756668772458646e-06, "loss": 4.3982, "step": 29660 }, { "epoch": 1.146210623966442, "grad_norm": 0.04296875, "learning_rate": 7.750482583520742e-06, "loss": 4.4188, "step": 29670 }, { "epoch": 1.1466139636187633, "grad_norm": 0.047119140625, "learning_rate": 7.744297301200945e-06, "loss": 4.4452, "step": 29680 }, { "epoch": 1.1470173032710846, "grad_norm": 0.0419921875, "learning_rate": 7.738112927992101e-06, "loss": 4.3804, "step": 29690 }, { "epoch": 1.1474206429234057, "grad_norm": 0.044189453125, "learning_rate": 7.731929466386679e-06, "loss": 4.397, "step": 29700 }, { "epoch": 1.147823982575727, "grad_norm": 0.044921875, "learning_rate": 7.725746918876785e-06, "loss": 4.4035, "step": 29710 }, { "epoch": 1.1482273222280481, "grad_norm": 0.04541015625, "learning_rate": 7.71956528795416e-06, "loss": 4.3903, "step": 29720 }, { "epoch": 1.1486306618803694, "grad_norm": 0.0439453125, "learning_rate": 7.713384576110178e-06, "loss": 4.4097, "step": 29730 }, { "epoch": 1.1490340015326908, "grad_norm": 0.0439453125, "learning_rate": 7.70720478583583e-06, "loss": 4.4421, "step": 29740 }, { "epoch": 1.1494373411850118, "grad_norm": 0.0458984375, "learning_rate": 7.701025919621748e-06, "loss": 4.4108, "step": 29750 }, { "epoch": 1.1498406808373332, "grad_norm": 0.046630859375, "learning_rate": 7.694847979958188e-06, "loss": 4.392, "step": 29760 }, { "epoch": 1.1502440204896542, "grad_norm": 0.04443359375, "learning_rate": 7.688670969335028e-06, "loss": 4.4114, "step": 29770 }, { "epoch": 1.1506473601419756, "grad_norm": 0.0439453125, "learning_rate": 7.682494890241775e-06, "loss": 4.4113, "step": 29780 }, { "epoch": 1.1510506997942969, "grad_norm": 0.0439453125, "learning_rate": 7.676319745167562e-06, "loss": 4.3765, "step": 29790 }, { "epoch": 1.151454039446618, "grad_norm": 0.040771484375, "learning_rate": 7.67014553660115e-06, "loss": 4.4455, "step": 29800 }, { "epoch": 1.1518573790989393, "grad_norm": 0.042724609375, "learning_rate": 7.663972267030907e-06, "loss": 4.4151, "step": 29810 }, { "epoch": 1.1522607187512603, "grad_norm": 0.047607421875, "learning_rate": 7.657799938944837e-06, "loss": 4.4118, "step": 29820 }, { "epoch": 1.1526640584035817, "grad_norm": 0.043701171875, "learning_rate": 7.65162855483056e-06, "loss": 4.4025, "step": 29830 }, { "epoch": 1.153067398055903, "grad_norm": 0.044189453125, "learning_rate": 7.645458117175316e-06, "loss": 4.3771, "step": 29840 }, { "epoch": 1.153470737708224, "grad_norm": 0.044677734375, "learning_rate": 7.63928862846596e-06, "loss": 4.3911, "step": 29850 }, { "epoch": 1.1538740773605454, "grad_norm": 0.043701171875, "learning_rate": 7.633120091188971e-06, "loss": 4.3992, "step": 29860 }, { "epoch": 1.1542774170128665, "grad_norm": 0.045654296875, "learning_rate": 7.626952507830441e-06, "loss": 4.3962, "step": 29870 }, { "epoch": 1.1546807566651878, "grad_norm": 0.048828125, "learning_rate": 7.620785880876081e-06, "loss": 4.3963, "step": 29880 }, { "epoch": 1.155084096317509, "grad_norm": 0.047607421875, "learning_rate": 7.614620212811203e-06, "loss": 4.3898, "step": 29890 }, { "epoch": 1.1554874359698302, "grad_norm": 0.048828125, "learning_rate": 7.60845550612075e-06, "loss": 4.4213, "step": 29900 }, { "epoch": 1.1558907756221515, "grad_norm": 0.05517578125, "learning_rate": 7.602291763289271e-06, "loss": 4.4136, "step": 29910 }, { "epoch": 1.1562941152744726, "grad_norm": 0.04638671875, "learning_rate": 7.596128986800921e-06, "loss": 4.402, "step": 29920 }, { "epoch": 1.1566974549267939, "grad_norm": 0.0458984375, "learning_rate": 7.589967179139476e-06, "loss": 4.4295, "step": 29930 }, { "epoch": 1.1571007945791152, "grad_norm": 0.0439453125, "learning_rate": 7.583806342788311e-06, "loss": 4.3847, "step": 29940 }, { "epoch": 1.1575041342314363, "grad_norm": 0.04345703125, "learning_rate": 7.577646480230423e-06, "loss": 4.4314, "step": 29950 }, { "epoch": 1.1579074738837576, "grad_norm": 0.0478515625, "learning_rate": 7.571487593948395e-06, "loss": 4.3909, "step": 29960 }, { "epoch": 1.1583108135360787, "grad_norm": 0.044189453125, "learning_rate": 7.565329686424439e-06, "loss": 4.3858, "step": 29970 }, { "epoch": 1.1587141531884, "grad_norm": 0.046630859375, "learning_rate": 7.559172760140358e-06, "loss": 4.4142, "step": 29980 }, { "epoch": 1.159117492840721, "grad_norm": 0.044677734375, "learning_rate": 7.553016817577574e-06, "loss": 4.3753, "step": 29990 }, { "epoch": 1.1595208324930424, "grad_norm": 0.045654296875, "learning_rate": 7.54686186121709e-06, "loss": 4.3887, "step": 30000 }, { "epoch": 1.1599241721453637, "grad_norm": 0.044189453125, "learning_rate": 7.540707893539531e-06, "loss": 4.409, "step": 30010 }, { "epoch": 1.1603275117976848, "grad_norm": 0.04736328125, "learning_rate": 7.53455491702512e-06, "loss": 4.4414, "step": 30020 }, { "epoch": 1.160730851450006, "grad_norm": 0.0458984375, "learning_rate": 7.528402934153671e-06, "loss": 4.4043, "step": 30030 }, { "epoch": 1.1611341911023272, "grad_norm": 0.042236328125, "learning_rate": 7.522251947404609e-06, "loss": 4.3978, "step": 30040 }, { "epoch": 1.1615375307546485, "grad_norm": 0.048095703125, "learning_rate": 7.5161019592569525e-06, "loss": 4.4025, "step": 30050 }, { "epoch": 1.1619408704069698, "grad_norm": 0.047119140625, "learning_rate": 7.5099529721893205e-06, "loss": 4.3936, "step": 30060 }, { "epoch": 1.162344210059291, "grad_norm": 0.048583984375, "learning_rate": 7.503804988679919e-06, "loss": 4.3882, "step": 30070 }, { "epoch": 1.1627475497116122, "grad_norm": 0.04736328125, "learning_rate": 7.497658011206559e-06, "loss": 4.3889, "step": 30080 }, { "epoch": 1.1631508893639333, "grad_norm": 0.044189453125, "learning_rate": 7.491512042246645e-06, "loss": 4.4104, "step": 30090 }, { "epoch": 1.1635542290162546, "grad_norm": 0.043212890625, "learning_rate": 7.485367084277175e-06, "loss": 4.4237, "step": 30100 }, { "epoch": 1.1639575686685757, "grad_norm": 0.04541015625, "learning_rate": 7.479223139774734e-06, "loss": 4.4303, "step": 30110 }, { "epoch": 1.164360908320897, "grad_norm": 0.044677734375, "learning_rate": 7.473080211215505e-06, "loss": 4.3747, "step": 30120 }, { "epoch": 1.1647642479732183, "grad_norm": 0.04443359375, "learning_rate": 7.466938301075262e-06, "loss": 4.3777, "step": 30130 }, { "epoch": 1.1651675876255394, "grad_norm": 0.0458984375, "learning_rate": 7.460797411829358e-06, "loss": 4.3832, "step": 30140 }, { "epoch": 1.1655709272778607, "grad_norm": 0.0458984375, "learning_rate": 7.454657545952746e-06, "loss": 4.4129, "step": 30150 }, { "epoch": 1.1659742669301818, "grad_norm": 0.044921875, "learning_rate": 7.4485187059199645e-06, "loss": 4.405, "step": 30160 }, { "epoch": 1.1663776065825031, "grad_norm": 0.048095703125, "learning_rate": 7.442380894205139e-06, "loss": 4.4032, "step": 30170 }, { "epoch": 1.1667809462348244, "grad_norm": 0.0439453125, "learning_rate": 7.436244113281972e-06, "loss": 4.4338, "step": 30180 }, { "epoch": 1.1671842858871455, "grad_norm": 0.043212890625, "learning_rate": 7.4301083656237584e-06, "loss": 4.4303, "step": 30190 }, { "epoch": 1.1675876255394668, "grad_norm": 0.04736328125, "learning_rate": 7.423973653703378e-06, "loss": 4.3878, "step": 30200 }, { "epoch": 1.167990965191788, "grad_norm": 0.04443359375, "learning_rate": 7.417839979993291e-06, "loss": 4.4176, "step": 30210 }, { "epoch": 1.1683943048441092, "grad_norm": 0.046142578125, "learning_rate": 7.411707346965535e-06, "loss": 4.438, "step": 30220 }, { "epoch": 1.1687976444964305, "grad_norm": 0.046630859375, "learning_rate": 7.405575757091733e-06, "loss": 4.402, "step": 30230 }, { "epoch": 1.1692009841487516, "grad_norm": 0.043701171875, "learning_rate": 7.399445212843092e-06, "loss": 4.4325, "step": 30240 }, { "epoch": 1.169604323801073, "grad_norm": 0.04443359375, "learning_rate": 7.393315716690382e-06, "loss": 4.4349, "step": 30250 }, { "epoch": 1.170007663453394, "grad_norm": 0.044677734375, "learning_rate": 7.387187271103963e-06, "loss": 4.3709, "step": 30260 }, { "epoch": 1.1704110031057153, "grad_norm": 0.043701171875, "learning_rate": 7.381059878553776e-06, "loss": 4.3907, "step": 30270 }, { "epoch": 1.1708143427580366, "grad_norm": 0.045654296875, "learning_rate": 7.374933541509323e-06, "loss": 4.4294, "step": 30280 }, { "epoch": 1.1712176824103577, "grad_norm": 0.04296875, "learning_rate": 7.3688082624396924e-06, "loss": 4.3864, "step": 30290 }, { "epoch": 1.171621022062679, "grad_norm": 0.045166015625, "learning_rate": 7.362684043813542e-06, "loss": 4.3593, "step": 30300 }, { "epoch": 1.1720243617150001, "grad_norm": 0.04345703125, "learning_rate": 7.356560888099099e-06, "loss": 4.3983, "step": 30310 }, { "epoch": 1.1724277013673214, "grad_norm": 0.0439453125, "learning_rate": 7.350438797764172e-06, "loss": 4.3866, "step": 30320 }, { "epoch": 1.1728310410196428, "grad_norm": 0.045654296875, "learning_rate": 7.344317775276127e-06, "loss": 4.4318, "step": 30330 }, { "epoch": 1.1732343806719638, "grad_norm": 0.046630859375, "learning_rate": 7.338197823101908e-06, "loss": 4.3798, "step": 30340 }, { "epoch": 1.1736377203242851, "grad_norm": 0.0546875, "learning_rate": 7.332078943708026e-06, "loss": 4.3902, "step": 30350 }, { "epoch": 1.1740410599766062, "grad_norm": 0.046142578125, "learning_rate": 7.325961139560564e-06, "loss": 4.4213, "step": 30360 }, { "epoch": 1.1744443996289275, "grad_norm": 0.044677734375, "learning_rate": 7.319844413125161e-06, "loss": 4.3883, "step": 30370 }, { "epoch": 1.1748477392812489, "grad_norm": 0.04296875, "learning_rate": 7.313728766867032e-06, "loss": 4.4077, "step": 30380 }, { "epoch": 1.17525107893357, "grad_norm": 0.043212890625, "learning_rate": 7.307614203250955e-06, "loss": 4.4009, "step": 30390 }, { "epoch": 1.1756544185858913, "grad_norm": 0.04443359375, "learning_rate": 7.301500724741261e-06, "loss": 4.3758, "step": 30400 }, { "epoch": 1.1760577582382123, "grad_norm": 0.046630859375, "learning_rate": 7.295388333801857e-06, "loss": 4.4395, "step": 30410 }, { "epoch": 1.1764610978905337, "grad_norm": 0.044189453125, "learning_rate": 7.289277032896207e-06, "loss": 4.3991, "step": 30420 }, { "epoch": 1.1768644375428547, "grad_norm": 0.04248046875, "learning_rate": 7.283166824487338e-06, "loss": 4.4172, "step": 30430 }, { "epoch": 1.177267777195176, "grad_norm": 0.047607421875, "learning_rate": 7.277057711037826e-06, "loss": 4.3827, "step": 30440 }, { "epoch": 1.1776711168474974, "grad_norm": 0.045166015625, "learning_rate": 7.270949695009819e-06, "loss": 4.4065, "step": 30450 }, { "epoch": 1.1780744564998185, "grad_norm": 0.04248046875, "learning_rate": 7.264842778865016e-06, "loss": 4.3897, "step": 30460 }, { "epoch": 1.1784777961521398, "grad_norm": 0.04443359375, "learning_rate": 7.258736965064677e-06, "loss": 4.4219, "step": 30470 }, { "epoch": 1.1788811358044609, "grad_norm": 0.044921875, "learning_rate": 7.252632256069609e-06, "loss": 4.3919, "step": 30480 }, { "epoch": 1.1792844754567822, "grad_norm": 0.0439453125, "learning_rate": 7.246528654340186e-06, "loss": 4.3938, "step": 30490 }, { "epoch": 1.1796878151091033, "grad_norm": 0.04345703125, "learning_rate": 7.240426162336329e-06, "loss": 4.385, "step": 30500 }, { "epoch": 1.1800911547614246, "grad_norm": 0.047607421875, "learning_rate": 7.234324782517506e-06, "loss": 4.396, "step": 30510 }, { "epoch": 1.1804944944137459, "grad_norm": 0.04443359375, "learning_rate": 7.2282245173427455e-06, "loss": 4.4393, "step": 30520 }, { "epoch": 1.180897834066067, "grad_norm": 0.044921875, "learning_rate": 7.222125369270627e-06, "loss": 4.414, "step": 30530 }, { "epoch": 1.1813011737183883, "grad_norm": 0.04248046875, "learning_rate": 7.216027340759275e-06, "loss": 4.3871, "step": 30540 }, { "epoch": 1.1817045133707094, "grad_norm": 0.04541015625, "learning_rate": 7.209930434266363e-06, "loss": 4.4298, "step": 30550 }, { "epoch": 1.1821078530230307, "grad_norm": 0.0439453125, "learning_rate": 7.203834652249116e-06, "loss": 4.3977, "step": 30560 }, { "epoch": 1.182511192675352, "grad_norm": 0.0458984375, "learning_rate": 7.197739997164304e-06, "loss": 4.3935, "step": 30570 }, { "epoch": 1.182914532327673, "grad_norm": 0.046875, "learning_rate": 7.1916464714682445e-06, "loss": 4.3923, "step": 30580 }, { "epoch": 1.1833178719799944, "grad_norm": 0.042236328125, "learning_rate": 7.185554077616791e-06, "loss": 4.3844, "step": 30590 }, { "epoch": 1.1837212116323155, "grad_norm": 0.044677734375, "learning_rate": 7.179462818065353e-06, "loss": 4.3812, "step": 30600 }, { "epoch": 1.1841245512846368, "grad_norm": 0.04443359375, "learning_rate": 7.17337269526888e-06, "loss": 4.399, "step": 30610 }, { "epoch": 1.184527890936958, "grad_norm": 0.04443359375, "learning_rate": 7.167283711681854e-06, "loss": 4.421, "step": 30620 }, { "epoch": 1.1849312305892792, "grad_norm": 0.04345703125, "learning_rate": 7.1611958697583065e-06, "loss": 4.4136, "step": 30630 }, { "epoch": 1.1853345702416005, "grad_norm": 0.047607421875, "learning_rate": 7.155109171951811e-06, "loss": 4.3822, "step": 30640 }, { "epoch": 1.1857379098939216, "grad_norm": 0.046875, "learning_rate": 7.149023620715474e-06, "loss": 4.4175, "step": 30650 }, { "epoch": 1.186141249546243, "grad_norm": 0.042724609375, "learning_rate": 7.142939218501942e-06, "loss": 4.4107, "step": 30660 }, { "epoch": 1.1865445891985642, "grad_norm": 0.044677734375, "learning_rate": 7.136855967763397e-06, "loss": 4.4178, "step": 30670 }, { "epoch": 1.1869479288508853, "grad_norm": 0.048828125, "learning_rate": 7.130773870951561e-06, "loss": 4.4189, "step": 30680 }, { "epoch": 1.1873512685032066, "grad_norm": 0.0439453125, "learning_rate": 7.124692930517689e-06, "loss": 4.4232, "step": 30690 }, { "epoch": 1.1877546081555277, "grad_norm": 0.045166015625, "learning_rate": 7.118613148912565e-06, "loss": 4.4074, "step": 30700 }, { "epoch": 1.188157947807849, "grad_norm": 0.04443359375, "learning_rate": 7.112534528586512e-06, "loss": 4.4277, "step": 30710 }, { "epoch": 1.1885612874601703, "grad_norm": 0.0478515625, "learning_rate": 7.106457071989386e-06, "loss": 4.4087, "step": 30720 }, { "epoch": 1.1889646271124914, "grad_norm": 0.04248046875, "learning_rate": 7.100380781570564e-06, "loss": 4.3823, "step": 30730 }, { "epoch": 1.1893679667648127, "grad_norm": 0.0458984375, "learning_rate": 7.0943056597789665e-06, "loss": 4.4376, "step": 30740 }, { "epoch": 1.1897713064171338, "grad_norm": 0.04443359375, "learning_rate": 7.0882317090630335e-06, "loss": 4.4296, "step": 30750 }, { "epoch": 1.1901746460694551, "grad_norm": 0.045654296875, "learning_rate": 7.082158931870742e-06, "loss": 4.4065, "step": 30760 }, { "epoch": 1.1905779857217764, "grad_norm": 0.044921875, "learning_rate": 7.076087330649581e-06, "loss": 4.3752, "step": 30770 }, { "epoch": 1.1909813253740975, "grad_norm": 0.041259765625, "learning_rate": 7.070016907846581e-06, "loss": 4.3921, "step": 30780 }, { "epoch": 1.1913846650264188, "grad_norm": 0.044921875, "learning_rate": 7.0639476659082885e-06, "loss": 4.4216, "step": 30790 }, { "epoch": 1.19178800467874, "grad_norm": 0.04541015625, "learning_rate": 7.057879607280782e-06, "loss": 4.4309, "step": 30800 }, { "epoch": 1.1921913443310612, "grad_norm": 0.04345703125, "learning_rate": 7.051812734409649e-06, "loss": 4.4111, "step": 30810 }, { "epoch": 1.1925946839833825, "grad_norm": 0.044921875, "learning_rate": 7.045747049740013e-06, "loss": 4.4166, "step": 30820 }, { "epoch": 1.1929980236357036, "grad_norm": 0.045654296875, "learning_rate": 7.039682555716516e-06, "loss": 4.3764, "step": 30830 }, { "epoch": 1.193401363288025, "grad_norm": 0.0498046875, "learning_rate": 7.033619254783314e-06, "loss": 4.4133, "step": 30840 }, { "epoch": 1.193804702940346, "grad_norm": 0.041748046875, "learning_rate": 7.027557149384087e-06, "loss": 4.3868, "step": 30850 }, { "epoch": 1.1942080425926673, "grad_norm": 0.04638671875, "learning_rate": 7.021496241962031e-06, "loss": 4.3952, "step": 30860 }, { "epoch": 1.1946113822449884, "grad_norm": 0.045654296875, "learning_rate": 7.015436534959868e-06, "loss": 4.4016, "step": 30870 }, { "epoch": 1.1950147218973097, "grad_norm": 0.044677734375, "learning_rate": 7.009378030819818e-06, "loss": 4.4212, "step": 30880 }, { "epoch": 1.195418061549631, "grad_norm": 0.0458984375, "learning_rate": 7.003320731983632e-06, "loss": 4.398, "step": 30890 }, { "epoch": 1.1958214012019521, "grad_norm": 0.04638671875, "learning_rate": 6.997264640892567e-06, "loss": 4.4301, "step": 30900 }, { "epoch": 1.1962247408542734, "grad_norm": 0.04833984375, "learning_rate": 6.991209759987402e-06, "loss": 4.4437, "step": 30910 }, { "epoch": 1.1966280805065945, "grad_norm": 0.0458984375, "learning_rate": 6.985156091708416e-06, "loss": 4.4105, "step": 30920 }, { "epoch": 1.1970314201589158, "grad_norm": 0.04638671875, "learning_rate": 6.979103638495408e-06, "loss": 4.4108, "step": 30930 }, { "epoch": 1.197434759811237, "grad_norm": 0.0458984375, "learning_rate": 6.9730524027876866e-06, "loss": 4.4157, "step": 30940 }, { "epoch": 1.1978380994635582, "grad_norm": 0.04833984375, "learning_rate": 6.967002387024071e-06, "loss": 4.3818, "step": 30950 }, { "epoch": 1.1982414391158795, "grad_norm": 0.044921875, "learning_rate": 6.960953593642878e-06, "loss": 4.4326, "step": 30960 }, { "epoch": 1.1986447787682006, "grad_norm": 0.044677734375, "learning_rate": 6.954906025081943e-06, "loss": 4.3847, "step": 30970 }, { "epoch": 1.199048118420522, "grad_norm": 0.04345703125, "learning_rate": 6.948859683778608e-06, "loss": 4.4073, "step": 30980 }, { "epoch": 1.199451458072843, "grad_norm": 0.047607421875, "learning_rate": 6.942814572169711e-06, "loss": 4.4107, "step": 30990 }, { "epoch": 1.1998547977251643, "grad_norm": 0.044677734375, "learning_rate": 6.9367706926916035e-06, "loss": 4.4069, "step": 31000 }, { "epoch": 1.2002581373774857, "grad_norm": 0.047119140625, "learning_rate": 6.930728047780138e-06, "loss": 4.4287, "step": 31010 }, { "epoch": 1.2006614770298067, "grad_norm": 0.04443359375, "learning_rate": 6.924686639870669e-06, "loss": 4.4101, "step": 31020 }, { "epoch": 1.201064816682128, "grad_norm": 0.04541015625, "learning_rate": 6.918646471398047e-06, "loss": 4.3875, "step": 31030 }, { "epoch": 1.2014681563344491, "grad_norm": 0.044677734375, "learning_rate": 6.91260754479663e-06, "loss": 4.4071, "step": 31040 }, { "epoch": 1.2018714959867705, "grad_norm": 0.044677734375, "learning_rate": 6.9065698625002765e-06, "loss": 4.3917, "step": 31050 }, { "epoch": 1.2022748356390918, "grad_norm": 0.0439453125, "learning_rate": 6.900533426942342e-06, "loss": 4.39, "step": 31060 }, { "epoch": 1.2026781752914129, "grad_norm": 0.042724609375, "learning_rate": 6.894498240555669e-06, "loss": 4.3867, "step": 31070 }, { "epoch": 1.2030815149437342, "grad_norm": 0.043701171875, "learning_rate": 6.8884643057726116e-06, "loss": 4.3749, "step": 31080 }, { "epoch": 1.2034848545960553, "grad_norm": 0.0458984375, "learning_rate": 6.882431625025016e-06, "loss": 4.4206, "step": 31090 }, { "epoch": 1.2038881942483766, "grad_norm": 0.05224609375, "learning_rate": 6.876400200744213e-06, "loss": 4.3981, "step": 31100 }, { "epoch": 1.2042915339006979, "grad_norm": 0.044189453125, "learning_rate": 6.87037003536104e-06, "loss": 4.4066, "step": 31110 }, { "epoch": 1.204694873553019, "grad_norm": 0.047119140625, "learning_rate": 6.86434113130582e-06, "loss": 4.3848, "step": 31120 }, { "epoch": 1.2050982132053403, "grad_norm": 0.04150390625, "learning_rate": 6.858313491008373e-06, "loss": 4.4187, "step": 31130 }, { "epoch": 1.2055015528576614, "grad_norm": 0.04541015625, "learning_rate": 6.852287116897998e-06, "loss": 4.3692, "step": 31140 }, { "epoch": 1.2059048925099827, "grad_norm": 0.04541015625, "learning_rate": 6.846262011403495e-06, "loss": 4.3664, "step": 31150 }, { "epoch": 1.206308232162304, "grad_norm": 0.045166015625, "learning_rate": 6.840238176953149e-06, "loss": 4.3947, "step": 31160 }, { "epoch": 1.206711571814625, "grad_norm": 0.0478515625, "learning_rate": 6.834215615974735e-06, "loss": 4.4017, "step": 31170 }, { "epoch": 1.2071149114669464, "grad_norm": 0.045654296875, "learning_rate": 6.82819433089551e-06, "loss": 4.3814, "step": 31180 }, { "epoch": 1.2075182511192675, "grad_norm": 0.043701171875, "learning_rate": 6.822174324142221e-06, "loss": 4.4097, "step": 31190 }, { "epoch": 1.2079215907715888, "grad_norm": 0.042724609375, "learning_rate": 6.816155598141101e-06, "loss": 4.4276, "step": 31200 }, { "epoch": 1.20832493042391, "grad_norm": 0.04296875, "learning_rate": 6.810138155317856e-06, "loss": 4.3912, "step": 31210 }, { "epoch": 1.2087282700762312, "grad_norm": 0.04296875, "learning_rate": 6.804121998097688e-06, "loss": 4.4028, "step": 31220 }, { "epoch": 1.2091316097285525, "grad_norm": 0.043701171875, "learning_rate": 6.798107128905275e-06, "loss": 4.3951, "step": 31230 }, { "epoch": 1.2095349493808736, "grad_norm": 0.043212890625, "learning_rate": 6.792093550164781e-06, "loss": 4.425, "step": 31240 }, { "epoch": 1.209938289033195, "grad_norm": 0.044677734375, "learning_rate": 6.786081264299838e-06, "loss": 4.3911, "step": 31250 }, { "epoch": 1.210341628685516, "grad_norm": 0.045654296875, "learning_rate": 6.7800702737335675e-06, "loss": 4.3821, "step": 31260 }, { "epoch": 1.2107449683378373, "grad_norm": 0.045166015625, "learning_rate": 6.774060580888566e-06, "loss": 4.4135, "step": 31270 }, { "epoch": 1.2111483079901586, "grad_norm": 0.0458984375, "learning_rate": 6.76805218818691e-06, "loss": 4.4127, "step": 31280 }, { "epoch": 1.2115516476424797, "grad_norm": 0.048583984375, "learning_rate": 6.762045098050144e-06, "loss": 4.4038, "step": 31290 }, { "epoch": 1.211954987294801, "grad_norm": 0.04150390625, "learning_rate": 6.756039312899294e-06, "loss": 4.4047, "step": 31300 }, { "epoch": 1.212358326947122, "grad_norm": 0.044921875, "learning_rate": 6.750034835154864e-06, "loss": 4.4125, "step": 31310 }, { "epoch": 1.2127616665994434, "grad_norm": 0.04443359375, "learning_rate": 6.744031667236817e-06, "loss": 4.4248, "step": 31320 }, { "epoch": 1.2131650062517645, "grad_norm": 0.047119140625, "learning_rate": 6.7380298115646e-06, "loss": 4.4163, "step": 31330 }, { "epoch": 1.2135683459040858, "grad_norm": 0.048095703125, "learning_rate": 6.73202927055713e-06, "loss": 4.4001, "step": 31340 }, { "epoch": 1.213971685556407, "grad_norm": 0.047119140625, "learning_rate": 6.72603004663279e-06, "loss": 4.4111, "step": 31350 }, { "epoch": 1.2143750252087282, "grad_norm": 0.044189453125, "learning_rate": 6.720032142209434e-06, "loss": 4.4117, "step": 31360 }, { "epoch": 1.2147783648610495, "grad_norm": 0.042724609375, "learning_rate": 6.714035559704384e-06, "loss": 4.4208, "step": 31370 }, { "epoch": 1.2151817045133706, "grad_norm": 0.04443359375, "learning_rate": 6.708040301534431e-06, "loss": 4.4411, "step": 31380 }, { "epoch": 1.215585044165692, "grad_norm": 0.045166015625, "learning_rate": 6.702046370115834e-06, "loss": 4.4225, "step": 31390 }, { "epoch": 1.2159883838180132, "grad_norm": 0.048095703125, "learning_rate": 6.696053767864308e-06, "loss": 4.4072, "step": 31400 }, { "epoch": 1.2163917234703343, "grad_norm": 0.0458984375, "learning_rate": 6.690062497195039e-06, "loss": 4.4116, "step": 31410 }, { "epoch": 1.2167950631226556, "grad_norm": 0.045166015625, "learning_rate": 6.684072560522678e-06, "loss": 4.3683, "step": 31420 }, { "epoch": 1.2171984027749767, "grad_norm": 0.0458984375, "learning_rate": 6.67808396026134e-06, "loss": 4.3953, "step": 31430 }, { "epoch": 1.217601742427298, "grad_norm": 0.045166015625, "learning_rate": 6.67209669882459e-06, "loss": 4.3983, "step": 31440 }, { "epoch": 1.2180050820796193, "grad_norm": 0.053955078125, "learning_rate": 6.666110778625463e-06, "loss": 4.396, "step": 31450 }, { "epoch": 1.2184084217319404, "grad_norm": 0.04443359375, "learning_rate": 6.660126202076456e-06, "loss": 4.3836, "step": 31460 }, { "epoch": 1.2188117613842617, "grad_norm": 0.0458984375, "learning_rate": 6.654142971589513e-06, "loss": 4.4379, "step": 31470 }, { "epoch": 1.2192151010365828, "grad_norm": 0.047607421875, "learning_rate": 6.6481610895760475e-06, "loss": 4.4648, "step": 31480 }, { "epoch": 1.2196184406889041, "grad_norm": 0.04443359375, "learning_rate": 6.642180558446922e-06, "loss": 4.417, "step": 31490 }, { "epoch": 1.2200217803412254, "grad_norm": 0.04443359375, "learning_rate": 6.636201380612461e-06, "loss": 4.3684, "step": 31500 }, { "epoch": 1.2204251199935465, "grad_norm": 0.044921875, "learning_rate": 6.630223558482433e-06, "loss": 4.3974, "step": 31510 }, { "epoch": 1.2208284596458678, "grad_norm": 0.045654296875, "learning_rate": 6.624247094466072e-06, "loss": 4.4056, "step": 31520 }, { "epoch": 1.221231799298189, "grad_norm": 0.04296875, "learning_rate": 6.618271990972054e-06, "loss": 4.4151, "step": 31530 }, { "epoch": 1.2216351389505102, "grad_norm": 0.04150390625, "learning_rate": 6.6122982504085185e-06, "loss": 4.4194, "step": 31540 }, { "epoch": 1.2220384786028315, "grad_norm": 0.042236328125, "learning_rate": 6.6063258751830435e-06, "loss": 4.3973, "step": 31550 }, { "epoch": 1.2224418182551526, "grad_norm": 0.04638671875, "learning_rate": 6.600354867702665e-06, "loss": 4.428, "step": 31560 }, { "epoch": 1.222845157907474, "grad_norm": 0.0439453125, "learning_rate": 6.59438523037387e-06, "loss": 4.3945, "step": 31570 }, { "epoch": 1.223248497559795, "grad_norm": 0.045166015625, "learning_rate": 6.5884169656025775e-06, "loss": 4.4172, "step": 31580 }, { "epoch": 1.2236518372121163, "grad_norm": 0.04931640625, "learning_rate": 6.58245007579417e-06, "loss": 4.4145, "step": 31590 }, { "epoch": 1.2240551768644377, "grad_norm": 0.045654296875, "learning_rate": 6.5764845633534716e-06, "loss": 4.3874, "step": 31600 }, { "epoch": 1.2244585165167587, "grad_norm": 0.046875, "learning_rate": 6.570520430684748e-06, "loss": 4.4114, "step": 31610 } ], "logging_steps": 10, "max_steps": 49586, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 313, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.551086273333979e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }