{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8089093158918155, "eval_steps": 500, "global_step": 128700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.285231669711076e-05, "grad_norm": 110.09329223632812, "learning_rate": 1.4000000000000001e-06, "loss": 13.7679, "step": 10 }, { "epoch": 0.0001257046333942215, "grad_norm": 96.14527130126953, "learning_rate": 3.4000000000000005e-06, "loss": 12.6173, "step": 20 }, { "epoch": 0.00018855695009133228, "grad_norm": 43.914146423339844, "learning_rate": 5.400000000000001e-06, "loss": 10.4195, "step": 30 }, { "epoch": 0.000251409266788443, "grad_norm": 24.050556182861328, "learning_rate": 7.4e-06, "loss": 9.2707, "step": 40 }, { "epoch": 0.0003142615834855538, "grad_norm": 17.52002716064453, "learning_rate": 9.4e-06, "loss": 8.893, "step": 50 }, { "epoch": 0.00037711390018266457, "grad_norm": 28.04815673828125, "learning_rate": 1.14e-05, "loss": 8.4257, "step": 60 }, { "epoch": 0.0004399662168797753, "grad_norm": 16.464113235473633, "learning_rate": 1.3400000000000002e-05, "loss": 7.9375, "step": 70 }, { "epoch": 0.000502818533576886, "grad_norm": 14.842829704284668, "learning_rate": 1.54e-05, "loss": 7.7756, "step": 80 }, { "epoch": 0.0005656708502739968, "grad_norm": 21.78369903564453, "learning_rate": 1.7400000000000003e-05, "loss": 7.4797, "step": 90 }, { "epoch": 0.0006285231669711076, "grad_norm": 15.628141403198242, "learning_rate": 1.94e-05, "loss": 7.532, "step": 100 }, { "epoch": 0.0006913754836682184, "grad_norm": 15.642536163330078, "learning_rate": 1.9999706629338745e-05, "loss": 7.2184, "step": 110 }, { "epoch": 0.0007542278003653291, "grad_norm": 15.071418762207031, "learning_rate": 1.9999287528394092e-05, "loss": 6.8758, "step": 120 }, { "epoch": 0.0008170801170624398, "grad_norm": 13.564769744873047, "learning_rate": 1.9998868427449436e-05, "loss": 6.9533, "step": 130 }, { "epoch": 0.0008799324337595506, "grad_norm": 12.832381248474121, "learning_rate": 1.9998449326504783e-05, "loss": 6.7193, "step": 140 }, { "epoch": 0.0009427847504566613, "grad_norm": 12.85204792022705, "learning_rate": 1.999803022556013e-05, "loss": 6.6503, "step": 150 }, { "epoch": 0.001005637067153772, "grad_norm": 10.495243072509766, "learning_rate": 1.9997611124615477e-05, "loss": 6.5755, "step": 160 }, { "epoch": 0.0010684893838508829, "grad_norm": 12.84036636352539, "learning_rate": 1.9997192023670824e-05, "loss": 6.3054, "step": 170 }, { "epoch": 0.0011313417005479936, "grad_norm": 10.062732696533203, "learning_rate": 1.9996772922726168e-05, "loss": 6.4009, "step": 180 }, { "epoch": 0.0011941940172451044, "grad_norm": 10.66489315032959, "learning_rate": 1.9996353821781515e-05, "loss": 5.9644, "step": 190 }, { "epoch": 0.0012570463339422152, "grad_norm": 10.236940383911133, "learning_rate": 1.9995934720836862e-05, "loss": 6.0764, "step": 200 }, { "epoch": 0.001319898650639326, "grad_norm": 11.32441520690918, "learning_rate": 1.999551561989221e-05, "loss": 5.8501, "step": 210 }, { "epoch": 0.0013827509673364367, "grad_norm": 10.349939346313477, "learning_rate": 1.9995096518947556e-05, "loss": 5.9544, "step": 220 }, { "epoch": 0.0014456032840335475, "grad_norm": 10.603028297424316, "learning_rate": 1.99946774180029e-05, "loss": 5.7299, "step": 230 }, { "epoch": 0.0015084556007306583, "grad_norm": 11.104246139526367, "learning_rate": 1.9994258317058247e-05, "loss": 5.708, "step": 240 }, { "epoch": 0.001571307917427769, "grad_norm": 9.362375259399414, "learning_rate": 1.9993839216113594e-05, "loss": 5.6093, "step": 250 }, { "epoch": 0.0016341602341248796, "grad_norm": 9.127053260803223, "learning_rate": 1.999342011516894e-05, "loss": 5.5071, "step": 260 }, { "epoch": 0.0016970125508219904, "grad_norm": 8.870572090148926, "learning_rate": 1.9993001014224288e-05, "loss": 5.4138, "step": 270 }, { "epoch": 0.0017598648675191011, "grad_norm": 10.999969482421875, "learning_rate": 1.9992581913279635e-05, "loss": 5.4419, "step": 280 }, { "epoch": 0.001822717184216212, "grad_norm": 8.93590259552002, "learning_rate": 1.9992162812334982e-05, "loss": 5.4908, "step": 290 }, { "epoch": 0.0018855695009133227, "grad_norm": 8.729954719543457, "learning_rate": 1.999174371139033e-05, "loss": 5.2426, "step": 300 }, { "epoch": 0.0019484218176104334, "grad_norm": 10.1044340133667, "learning_rate": 1.9991324610445673e-05, "loss": 5.2254, "step": 310 }, { "epoch": 0.002011274134307544, "grad_norm": 9.7942533493042, "learning_rate": 1.999090550950102e-05, "loss": 5.0436, "step": 320 }, { "epoch": 0.002074126451004655, "grad_norm": 10.541589736938477, "learning_rate": 1.9990486408556367e-05, "loss": 4.803, "step": 330 }, { "epoch": 0.0021369787677017658, "grad_norm": 9.875314712524414, "learning_rate": 1.9990067307611714e-05, "loss": 4.9165, "step": 340 }, { "epoch": 0.0021998310843988765, "grad_norm": 9.487140655517578, "learning_rate": 1.998964820666706e-05, "loss": 5.1093, "step": 350 }, { "epoch": 0.0022626834010959873, "grad_norm": 9.752985954284668, "learning_rate": 1.9989229105722405e-05, "loss": 5.0373, "step": 360 }, { "epoch": 0.002325535717793098, "grad_norm": 10.569908142089844, "learning_rate": 1.9988810004777752e-05, "loss": 4.9041, "step": 370 }, { "epoch": 0.002388388034490209, "grad_norm": 8.912810325622559, "learning_rate": 1.99883909038331e-05, "loss": 4.7678, "step": 380 }, { "epoch": 0.0024512403511873196, "grad_norm": 9.076282501220703, "learning_rate": 1.9987971802888446e-05, "loss": 4.6359, "step": 390 }, { "epoch": 0.0025140926678844304, "grad_norm": 10.559319496154785, "learning_rate": 1.998755270194379e-05, "loss": 4.5624, "step": 400 }, { "epoch": 0.002576944984581541, "grad_norm": 13.224327087402344, "learning_rate": 1.9987133600999137e-05, "loss": 4.6897, "step": 410 }, { "epoch": 0.002639797301278652, "grad_norm": 10.025303840637207, "learning_rate": 1.9986714500054484e-05, "loss": 4.6429, "step": 420 }, { "epoch": 0.0027026496179757627, "grad_norm": 9.266767501831055, "learning_rate": 1.998629539910983e-05, "loss": 4.6152, "step": 430 }, { "epoch": 0.0027655019346728735, "grad_norm": 9.802778244018555, "learning_rate": 1.9985876298165178e-05, "loss": 4.8169, "step": 440 }, { "epoch": 0.0028283542513699842, "grad_norm": 9.405644416809082, "learning_rate": 1.9985457197220525e-05, "loss": 4.3837, "step": 450 }, { "epoch": 0.002891206568067095, "grad_norm": 11.649224281311035, "learning_rate": 1.9985038096275872e-05, "loss": 4.5792, "step": 460 }, { "epoch": 0.0029540588847642058, "grad_norm": 11.123177528381348, "learning_rate": 1.998461899533122e-05, "loss": 4.5444, "step": 470 }, { "epoch": 0.0030169112014613165, "grad_norm": 10.083602905273438, "learning_rate": 1.9984199894386563e-05, "loss": 4.5796, "step": 480 }, { "epoch": 0.0030797635181584273, "grad_norm": 9.665024757385254, "learning_rate": 1.998378079344191e-05, "loss": 4.5045, "step": 490 }, { "epoch": 0.003142615834855538, "grad_norm": 10.009722709655762, "learning_rate": 1.9983361692497257e-05, "loss": 4.5415, "step": 500 }, { "epoch": 0.003205468151552649, "grad_norm": 8.68890380859375, "learning_rate": 1.9982942591552604e-05, "loss": 4.3695, "step": 510 }, { "epoch": 0.003268320468249759, "grad_norm": 9.695719718933105, "learning_rate": 1.998252349060795e-05, "loss": 4.4293, "step": 520 }, { "epoch": 0.00333117278494687, "grad_norm": 11.342803001403809, "learning_rate": 1.9982104389663295e-05, "loss": 4.6023, "step": 530 }, { "epoch": 0.0033940251016439807, "grad_norm": 9.516191482543945, "learning_rate": 1.9981685288718642e-05, "loss": 4.3556, "step": 540 }, { "epoch": 0.0034568774183410915, "grad_norm": 9.393511772155762, "learning_rate": 1.998126618777399e-05, "loss": 4.4067, "step": 550 }, { "epoch": 0.0035197297350382023, "grad_norm": 10.67137336730957, "learning_rate": 1.9980847086829336e-05, "loss": 4.375, "step": 560 }, { "epoch": 0.003582582051735313, "grad_norm": 9.595892906188965, "learning_rate": 1.9980427985884683e-05, "loss": 4.5256, "step": 570 }, { "epoch": 0.003645434368432424, "grad_norm": 8.754829406738281, "learning_rate": 1.9980008884940027e-05, "loss": 4.4646, "step": 580 }, { "epoch": 0.0037082866851295346, "grad_norm": 11.057015419006348, "learning_rate": 1.9979589783995374e-05, "loss": 4.5457, "step": 590 }, { "epoch": 0.0037711390018266453, "grad_norm": 10.04822826385498, "learning_rate": 1.997917068305072e-05, "loss": 4.2818, "step": 600 }, { "epoch": 0.003833991318523756, "grad_norm": 10.355257034301758, "learning_rate": 1.9978793492200532e-05, "loss": 4.309, "step": 610 }, { "epoch": 0.003896843635220867, "grad_norm": 8.481429100036621, "learning_rate": 1.997837439125588e-05, "loss": 4.1791, "step": 620 }, { "epoch": 0.003959695951917978, "grad_norm": 8.190858840942383, "learning_rate": 1.9977955290311226e-05, "loss": 4.1775, "step": 630 }, { "epoch": 0.004022548268615088, "grad_norm": 9.911380767822266, "learning_rate": 1.9977536189366573e-05, "loss": 4.3927, "step": 640 }, { "epoch": 0.004085400585312199, "grad_norm": 9.007355690002441, "learning_rate": 1.9977117088421917e-05, "loss": 4.2628, "step": 650 }, { "epoch": 0.00414825290200931, "grad_norm": 8.924678802490234, "learning_rate": 1.9976697987477264e-05, "loss": 4.2525, "step": 660 }, { "epoch": 0.004211105218706421, "grad_norm": 11.19166088104248, "learning_rate": 1.997627888653261e-05, "loss": 4.1394, "step": 670 }, { "epoch": 0.0042739575354035315, "grad_norm": 10.57885456085205, "learning_rate": 1.997585978558796e-05, "loss": 4.2921, "step": 680 }, { "epoch": 0.004336809852100642, "grad_norm": 9.213665008544922, "learning_rate": 1.9975440684643305e-05, "loss": 4.2321, "step": 690 }, { "epoch": 0.004399662168797753, "grad_norm": 9.66899299621582, "learning_rate": 1.997502158369865e-05, "loss": 4.1874, "step": 700 }, { "epoch": 0.004462514485494864, "grad_norm": 9.520936012268066, "learning_rate": 1.9974602482753996e-05, "loss": 4.2696, "step": 710 }, { "epoch": 0.004525366802191975, "grad_norm": 10.274086952209473, "learning_rate": 1.9974183381809343e-05, "loss": 4.2235, "step": 720 }, { "epoch": 0.004588219118889085, "grad_norm": 8.612703323364258, "learning_rate": 1.997376428086469e-05, "loss": 4.4711, "step": 730 }, { "epoch": 0.004651071435586196, "grad_norm": 10.191298484802246, "learning_rate": 1.9973345179920037e-05, "loss": 4.0802, "step": 740 }, { "epoch": 0.004713923752283307, "grad_norm": 17.114593505859375, "learning_rate": 1.9972926078975384e-05, "loss": 4.1454, "step": 750 }, { "epoch": 0.004776776068980418, "grad_norm": 8.410555839538574, "learning_rate": 1.997250697803073e-05, "loss": 4.1697, "step": 760 }, { "epoch": 0.0048396283856775284, "grad_norm": 9.421134948730469, "learning_rate": 1.997208787708608e-05, "loss": 4.2371, "step": 770 }, { "epoch": 0.004902480702374639, "grad_norm": 8.89936637878418, "learning_rate": 1.9971668776141426e-05, "loss": 4.1051, "step": 780 }, { "epoch": 0.00496533301907175, "grad_norm": 10.175434112548828, "learning_rate": 1.997124967519677e-05, "loss": 4.3075, "step": 790 }, { "epoch": 0.005028185335768861, "grad_norm": 8.717267990112305, "learning_rate": 1.9970830574252116e-05, "loss": 4.2175, "step": 800 }, { "epoch": 0.0050910376524659715, "grad_norm": 8.255484580993652, "learning_rate": 1.9970411473307463e-05, "loss": 4.3324, "step": 810 }, { "epoch": 0.005153889969163082, "grad_norm": 8.949331283569336, "learning_rate": 1.996999237236281e-05, "loss": 3.9377, "step": 820 }, { "epoch": 0.005216742285860193, "grad_norm": 7.299438953399658, "learning_rate": 1.9969573271418154e-05, "loss": 3.894, "step": 830 }, { "epoch": 0.005279594602557304, "grad_norm": 8.529239654541016, "learning_rate": 1.99691541704735e-05, "loss": 4.0175, "step": 840 }, { "epoch": 0.005342446919254415, "grad_norm": 8.880363464355469, "learning_rate": 1.996873506952885e-05, "loss": 4.0875, "step": 850 }, { "epoch": 0.005405299235951525, "grad_norm": 7.495782375335693, "learning_rate": 1.9968315968584195e-05, "loss": 3.9243, "step": 860 }, { "epoch": 0.005468151552648636, "grad_norm": 11.282350540161133, "learning_rate": 1.9967896867639543e-05, "loss": 4.1782, "step": 870 }, { "epoch": 0.005531003869345747, "grad_norm": 8.926542282104492, "learning_rate": 1.9967477766694886e-05, "loss": 4.0449, "step": 880 }, { "epoch": 0.005593856186042858, "grad_norm": 8.818291664123535, "learning_rate": 1.9967058665750233e-05, "loss": 4.0877, "step": 890 }, { "epoch": 0.0056567085027399685, "grad_norm": 9.448590278625488, "learning_rate": 1.996663956480558e-05, "loss": 4.1351, "step": 900 }, { "epoch": 0.005719560819437079, "grad_norm": 9.017233848571777, "learning_rate": 1.9966220463860927e-05, "loss": 4.0607, "step": 910 }, { "epoch": 0.00578241313613419, "grad_norm": 9.787857055664062, "learning_rate": 1.9965801362916274e-05, "loss": 4.0828, "step": 920 }, { "epoch": 0.005845265452831301, "grad_norm": 16.65804100036621, "learning_rate": 1.9965382261971618e-05, "loss": 4.1905, "step": 930 }, { "epoch": 0.0059081177695284115, "grad_norm": 8.049735069274902, "learning_rate": 1.9964963161026965e-05, "loss": 4.2229, "step": 940 }, { "epoch": 0.005970970086225522, "grad_norm": 9.003173828125, "learning_rate": 1.9964544060082312e-05, "loss": 3.8822, "step": 950 }, { "epoch": 0.006033822402922633, "grad_norm": 10.698800086975098, "learning_rate": 1.996412495913766e-05, "loss": 4.1255, "step": 960 }, { "epoch": 0.006096674719619744, "grad_norm": 8.154327392578125, "learning_rate": 1.9963705858193006e-05, "loss": 4.1607, "step": 970 }, { "epoch": 0.006159527036316855, "grad_norm": 8.608023643493652, "learning_rate": 1.9963286757248354e-05, "loss": 3.9839, "step": 980 }, { "epoch": 0.006222379353013965, "grad_norm": 9.532076835632324, "learning_rate": 1.99628676563037e-05, "loss": 4.1282, "step": 990 }, { "epoch": 0.006285231669711076, "grad_norm": 7.2183637619018555, "learning_rate": 1.9962448555359048e-05, "loss": 4.0516, "step": 1000 }, { "epoch": 0.006348083986408187, "grad_norm": 8.70899486541748, "learning_rate": 1.9962071364508856e-05, "loss": 3.94, "step": 1010 }, { "epoch": 0.006410936303105298, "grad_norm": 7.688412666320801, "learning_rate": 1.9961652263564203e-05, "loss": 4.2123, "step": 1020 }, { "epoch": 0.006473788619802408, "grad_norm": 11.714567184448242, "learning_rate": 1.996123316261955e-05, "loss": 3.9293, "step": 1030 }, { "epoch": 0.006536640936499518, "grad_norm": 10.026140213012695, "learning_rate": 1.9960814061674897e-05, "loss": 3.9482, "step": 1040 }, { "epoch": 0.006599493253196629, "grad_norm": 7.529440879821777, "learning_rate": 1.9960394960730244e-05, "loss": 4.0032, "step": 1050 }, { "epoch": 0.00666234556989374, "grad_norm": 9.811161041259766, "learning_rate": 1.995997585978559e-05, "loss": 4.0338, "step": 1060 }, { "epoch": 0.006725197886590851, "grad_norm": 6.567491054534912, "learning_rate": 1.9959556758840938e-05, "loss": 4.0207, "step": 1070 }, { "epoch": 0.0067880502032879615, "grad_norm": 8.35252571105957, "learning_rate": 1.9959137657896282e-05, "loss": 3.9025, "step": 1080 }, { "epoch": 0.006850902519985072, "grad_norm": 10.21352767944336, "learning_rate": 1.995871855695163e-05, "loss": 4.0365, "step": 1090 }, { "epoch": 0.006913754836682183, "grad_norm": 7.785412311553955, "learning_rate": 1.9958299456006976e-05, "loss": 3.7487, "step": 1100 }, { "epoch": 0.006976607153379294, "grad_norm": 9.56949520111084, "learning_rate": 1.9957880355062323e-05, "loss": 4.1561, "step": 1110 }, { "epoch": 0.0070394594700764045, "grad_norm": 9.330686569213867, "learning_rate": 1.995746125411767e-05, "loss": 3.6948, "step": 1120 }, { "epoch": 0.007102311786773515, "grad_norm": 7.871035099029541, "learning_rate": 1.9957042153173014e-05, "loss": 3.8271, "step": 1130 }, { "epoch": 0.007165164103470626, "grad_norm": 8.37169361114502, "learning_rate": 1.995662305222836e-05, "loss": 3.9274, "step": 1140 }, { "epoch": 0.007228016420167737, "grad_norm": 7.6004319190979, "learning_rate": 1.9956203951283708e-05, "loss": 3.7638, "step": 1150 }, { "epoch": 0.007290868736864848, "grad_norm": 8.623445510864258, "learning_rate": 1.9955784850339055e-05, "loss": 4.1957, "step": 1160 }, { "epoch": 0.007353721053561958, "grad_norm": 7.707347869873047, "learning_rate": 1.99553657493944e-05, "loss": 3.9143, "step": 1170 }, { "epoch": 0.007416573370259069, "grad_norm": 8.317936897277832, "learning_rate": 1.9954946648449746e-05, "loss": 3.8486, "step": 1180 }, { "epoch": 0.00747942568695618, "grad_norm": 8.231162071228027, "learning_rate": 1.9954527547505093e-05, "loss": 3.999, "step": 1190 }, { "epoch": 0.007542278003653291, "grad_norm": 24.25721549987793, "learning_rate": 1.995410844656044e-05, "loss": 3.956, "step": 1200 }, { "epoch": 0.0076051303203504015, "grad_norm": 7.126772403717041, "learning_rate": 1.9953689345615787e-05, "loss": 3.7612, "step": 1210 }, { "epoch": 0.007667982637047512, "grad_norm": 7.953426837921143, "learning_rate": 1.9953270244671134e-05, "loss": 3.8871, "step": 1220 }, { "epoch": 0.007730834953744623, "grad_norm": 9.395339965820312, "learning_rate": 1.995285114372648e-05, "loss": 3.9739, "step": 1230 }, { "epoch": 0.007793687270441734, "grad_norm": 9.223485946655273, "learning_rate": 1.9952432042781825e-05, "loss": 3.8702, "step": 1240 }, { "epoch": 0.007856539587138845, "grad_norm": 8.946688652038574, "learning_rate": 1.9952012941837172e-05, "loss": 3.7169, "step": 1250 }, { "epoch": 0.007919391903835955, "grad_norm": 9.54202938079834, "learning_rate": 1.995159384089252e-05, "loss": 4.0095, "step": 1260 }, { "epoch": 0.007982244220533067, "grad_norm": 8.55982780456543, "learning_rate": 1.9951174739947866e-05, "loss": 4.0312, "step": 1270 }, { "epoch": 0.008045096537230177, "grad_norm": 9.204590797424316, "learning_rate": 1.9950755639003213e-05, "loss": 3.9259, "step": 1280 }, { "epoch": 0.008107948853927288, "grad_norm": 8.297924995422363, "learning_rate": 1.995033653805856e-05, "loss": 3.9343, "step": 1290 }, { "epoch": 0.008170801170624398, "grad_norm": 8.083635330200195, "learning_rate": 1.9949917437113907e-05, "loss": 3.8386, "step": 1300 }, { "epoch": 0.00823365348732151, "grad_norm": 8.369839668273926, "learning_rate": 1.994949833616925e-05, "loss": 3.8577, "step": 1310 }, { "epoch": 0.00829650580401862, "grad_norm": 10.199877738952637, "learning_rate": 1.9949079235224598e-05, "loss": 3.8577, "step": 1320 }, { "epoch": 0.008359358120715732, "grad_norm": 8.548699378967285, "learning_rate": 1.9948660134279945e-05, "loss": 3.8987, "step": 1330 }, { "epoch": 0.008422210437412841, "grad_norm": 9.549871444702148, "learning_rate": 1.9948241033335292e-05, "loss": 3.7709, "step": 1340 }, { "epoch": 0.008485062754109953, "grad_norm": 8.54775619506836, "learning_rate": 1.9947821932390636e-05, "loss": 3.7397, "step": 1350 }, { "epoch": 0.008547915070807063, "grad_norm": 9.24419116973877, "learning_rate": 1.9947402831445983e-05, "loss": 3.8185, "step": 1360 }, { "epoch": 0.008610767387504173, "grad_norm": 8.104684829711914, "learning_rate": 1.994698373050133e-05, "loss": 3.8048, "step": 1370 }, { "epoch": 0.008673619704201285, "grad_norm": 8.316015243530273, "learning_rate": 1.9946564629556677e-05, "loss": 3.7468, "step": 1380 }, { "epoch": 0.008736472020898394, "grad_norm": 8.843308448791504, "learning_rate": 1.994614552861202e-05, "loss": 3.9003, "step": 1390 }, { "epoch": 0.008799324337595506, "grad_norm": 8.556550025939941, "learning_rate": 1.9945726427667368e-05, "loss": 3.7636, "step": 1400 }, { "epoch": 0.008862176654292616, "grad_norm": 8.34255599975586, "learning_rate": 1.9945307326722715e-05, "loss": 3.8535, "step": 1410 }, { "epoch": 0.008925028970989728, "grad_norm": 7.886868953704834, "learning_rate": 1.9944888225778062e-05, "loss": 3.9768, "step": 1420 }, { "epoch": 0.008987881287686838, "grad_norm": 7.469964027404785, "learning_rate": 1.994446912483341e-05, "loss": 3.9373, "step": 1430 }, { "epoch": 0.00905073360438395, "grad_norm": 9.098652839660645, "learning_rate": 1.9944050023888756e-05, "loss": 3.7668, "step": 1440 }, { "epoch": 0.009113585921081059, "grad_norm": 8.704818725585938, "learning_rate": 1.9943630922944103e-05, "loss": 3.9797, "step": 1450 }, { "epoch": 0.00917643823777817, "grad_norm": 9.686690330505371, "learning_rate": 1.994321182199945e-05, "loss": 3.8918, "step": 1460 }, { "epoch": 0.00923929055447528, "grad_norm": 8.338467597961426, "learning_rate": 1.9942792721054797e-05, "loss": 3.9657, "step": 1470 }, { "epoch": 0.009302142871172392, "grad_norm": 8.457193374633789, "learning_rate": 1.994237362011014e-05, "loss": 3.7973, "step": 1480 }, { "epoch": 0.009364995187869502, "grad_norm": 9.84638500213623, "learning_rate": 1.9941954519165488e-05, "loss": 3.7545, "step": 1490 }, { "epoch": 0.009427847504566614, "grad_norm": 7.546156406402588, "learning_rate": 1.9941535418220835e-05, "loss": 3.9961, "step": 1500 }, { "epoch": 0.009490699821263724, "grad_norm": 10.12843132019043, "learning_rate": 1.9941116317276182e-05, "loss": 3.9926, "step": 1510 }, { "epoch": 0.009553552137960835, "grad_norm": 9.499560356140137, "learning_rate": 1.994069721633153e-05, "loss": 3.7154, "step": 1520 }, { "epoch": 0.009616404454657945, "grad_norm": 7.583631992340088, "learning_rate": 1.9940278115386873e-05, "loss": 3.605, "step": 1530 }, { "epoch": 0.009679256771355057, "grad_norm": 7.655325889587402, "learning_rate": 1.993985901444222e-05, "loss": 3.8484, "step": 1540 }, { "epoch": 0.009742109088052167, "grad_norm": 6.684481143951416, "learning_rate": 1.9939439913497567e-05, "loss": 3.8044, "step": 1550 }, { "epoch": 0.009804961404749278, "grad_norm": 8.056528091430664, "learning_rate": 1.9939020812552914e-05, "loss": 3.7472, "step": 1560 }, { "epoch": 0.009867813721446388, "grad_norm": 9.075079917907715, "learning_rate": 1.9938601711608258e-05, "loss": 3.6593, "step": 1570 }, { "epoch": 0.0099306660381435, "grad_norm": 7.955049514770508, "learning_rate": 1.9938182610663605e-05, "loss": 3.6238, "step": 1580 }, { "epoch": 0.00999351835484061, "grad_norm": 9.027932167053223, "learning_rate": 1.9937763509718952e-05, "loss": 3.8524, "step": 1590 }, { "epoch": 0.010056370671537722, "grad_norm": 9.155461311340332, "learning_rate": 1.99373444087743e-05, "loss": 3.7894, "step": 1600 }, { "epoch": 0.010119222988234831, "grad_norm": 8.733969688415527, "learning_rate": 1.9936925307829646e-05, "loss": 3.758, "step": 1610 }, { "epoch": 0.010182075304931943, "grad_norm": 8.7479248046875, "learning_rate": 1.993650620688499e-05, "loss": 3.7762, "step": 1620 }, { "epoch": 0.010244927621629053, "grad_norm": 7.365170001983643, "learning_rate": 1.9936087105940337e-05, "loss": 3.7251, "step": 1630 }, { "epoch": 0.010307779938326165, "grad_norm": 8.145270347595215, "learning_rate": 1.9935668004995684e-05, "loss": 3.6831, "step": 1640 }, { "epoch": 0.010370632255023274, "grad_norm": 8.109509468078613, "learning_rate": 1.993524890405103e-05, "loss": 3.5559, "step": 1650 }, { "epoch": 0.010433484571720386, "grad_norm": 8.524164199829102, "learning_rate": 1.9934829803106378e-05, "loss": 3.7954, "step": 1660 }, { "epoch": 0.010496336888417496, "grad_norm": 9.258058547973633, "learning_rate": 1.9934410702161725e-05, "loss": 3.774, "step": 1670 }, { "epoch": 0.010559189205114608, "grad_norm": 9.292672157287598, "learning_rate": 1.9933991601217072e-05, "loss": 3.5493, "step": 1680 }, { "epoch": 0.010622041521811718, "grad_norm": 15.687127113342285, "learning_rate": 1.993357250027242e-05, "loss": 3.8524, "step": 1690 }, { "epoch": 0.01068489383850883, "grad_norm": 7.195960521697998, "learning_rate": 1.9933153399327763e-05, "loss": 3.716, "step": 1700 }, { "epoch": 0.010747746155205939, "grad_norm": 7.604008674621582, "learning_rate": 1.993273429838311e-05, "loss": 3.8347, "step": 1710 }, { "epoch": 0.01081059847190305, "grad_norm": 8.035575866699219, "learning_rate": 1.9932315197438457e-05, "loss": 3.7588, "step": 1720 }, { "epoch": 0.01087345078860016, "grad_norm": 7.609543323516846, "learning_rate": 1.9931896096493804e-05, "loss": 3.8961, "step": 1730 }, { "epoch": 0.010936303105297272, "grad_norm": 7.832934856414795, "learning_rate": 1.993147699554915e-05, "loss": 3.6334, "step": 1740 }, { "epoch": 0.010999155421994382, "grad_norm": 8.620970726013184, "learning_rate": 1.9931057894604495e-05, "loss": 3.6378, "step": 1750 }, { "epoch": 0.011062007738691494, "grad_norm": 7.927221298217773, "learning_rate": 1.9930638793659842e-05, "loss": 3.6141, "step": 1760 }, { "epoch": 0.011124860055388604, "grad_norm": 11.173967361450195, "learning_rate": 1.993021969271519e-05, "loss": 3.8173, "step": 1770 }, { "epoch": 0.011187712372085715, "grad_norm": 7.172491550445557, "learning_rate": 1.9929800591770536e-05, "loss": 3.725, "step": 1780 }, { "epoch": 0.011250564688782825, "grad_norm": 7.763185977935791, "learning_rate": 1.992938149082588e-05, "loss": 3.6435, "step": 1790 }, { "epoch": 0.011313417005479937, "grad_norm": 7.742452621459961, "learning_rate": 1.9928962389881227e-05, "loss": 3.6609, "step": 1800 }, { "epoch": 0.011376269322177047, "grad_norm": 8.668071746826172, "learning_rate": 1.9928543288936574e-05, "loss": 3.5743, "step": 1810 }, { "epoch": 0.011439121638874158, "grad_norm": 7.531178951263428, "learning_rate": 1.992812418799192e-05, "loss": 3.6773, "step": 1820 }, { "epoch": 0.011501973955571268, "grad_norm": 8.770427703857422, "learning_rate": 1.9927705087047268e-05, "loss": 3.6403, "step": 1830 }, { "epoch": 0.01156482627226838, "grad_norm": 7.917853832244873, "learning_rate": 1.9927285986102615e-05, "loss": 3.8062, "step": 1840 }, { "epoch": 0.01162767858896549, "grad_norm": 7.949713706970215, "learning_rate": 1.9926866885157962e-05, "loss": 3.7471, "step": 1850 }, { "epoch": 0.011690530905662602, "grad_norm": 8.924261093139648, "learning_rate": 1.992644778421331e-05, "loss": 3.559, "step": 1860 }, { "epoch": 0.011753383222359711, "grad_norm": 9.18735122680664, "learning_rate": 1.9926028683268653e-05, "loss": 3.7138, "step": 1870 }, { "epoch": 0.011816235539056823, "grad_norm": 7.951308727264404, "learning_rate": 1.9925609582324e-05, "loss": 3.6578, "step": 1880 }, { "epoch": 0.011879087855753933, "grad_norm": 8.34437370300293, "learning_rate": 1.9925190481379347e-05, "loss": 3.7569, "step": 1890 }, { "epoch": 0.011941940172451045, "grad_norm": 10.448899269104004, "learning_rate": 1.9924771380434694e-05, "loss": 3.8515, "step": 1900 }, { "epoch": 0.012004792489148155, "grad_norm": 8.515726089477539, "learning_rate": 1.992435227949004e-05, "loss": 3.5718, "step": 1910 }, { "epoch": 0.012067644805845266, "grad_norm": 8.738914489746094, "learning_rate": 1.9923933178545388e-05, "loss": 3.8008, "step": 1920 }, { "epoch": 0.012130497122542376, "grad_norm": 7.068699836730957, "learning_rate": 1.9923514077600732e-05, "loss": 3.5684, "step": 1930 }, { "epoch": 0.012193349439239488, "grad_norm": 7.559966087341309, "learning_rate": 1.992309497665608e-05, "loss": 3.752, "step": 1940 }, { "epoch": 0.012256201755936598, "grad_norm": 8.261521339416504, "learning_rate": 1.9922675875711426e-05, "loss": 3.5821, "step": 1950 }, { "epoch": 0.01231905407263371, "grad_norm": 7.729598522186279, "learning_rate": 1.9922256774766773e-05, "loss": 3.6635, "step": 1960 }, { "epoch": 0.01238190638933082, "grad_norm": 7.8209733963012695, "learning_rate": 1.9921837673822117e-05, "loss": 3.746, "step": 1970 }, { "epoch": 0.01244475870602793, "grad_norm": 7.871613502502441, "learning_rate": 1.9921418572877464e-05, "loss": 3.4679, "step": 1980 }, { "epoch": 0.01250761102272504, "grad_norm": 7.5880913734436035, "learning_rate": 1.992099947193281e-05, "loss": 3.6665, "step": 1990 }, { "epoch": 0.012570463339422152, "grad_norm": 9.116113662719727, "learning_rate": 1.9920580370988158e-05, "loss": 3.7091, "step": 2000 }, { "epoch": 0.012633315656119262, "grad_norm": 8.949613571166992, "learning_rate": 1.9920161270043505e-05, "loss": 3.659, "step": 2010 }, { "epoch": 0.012696167972816374, "grad_norm": 9.201729774475098, "learning_rate": 1.991974216909885e-05, "loss": 3.8791, "step": 2020 }, { "epoch": 0.012759020289513484, "grad_norm": 7.439731121063232, "learning_rate": 1.9919323068154196e-05, "loss": 3.7566, "step": 2030 }, { "epoch": 0.012821872606210595, "grad_norm": 8.156105041503906, "learning_rate": 1.9918903967209543e-05, "loss": 3.7228, "step": 2040 }, { "epoch": 0.012884724922907705, "grad_norm": 9.137293815612793, "learning_rate": 1.991848486626489e-05, "loss": 3.6265, "step": 2050 }, { "epoch": 0.012947577239604815, "grad_norm": 7.509158134460449, "learning_rate": 1.9918065765320237e-05, "loss": 3.7887, "step": 2060 }, { "epoch": 0.013010429556301927, "grad_norm": 7.798402309417725, "learning_rate": 1.9917646664375584e-05, "loss": 3.6766, "step": 2070 }, { "epoch": 0.013073281872999037, "grad_norm": 7.323843002319336, "learning_rate": 1.991722756343093e-05, "loss": 3.644, "step": 2080 }, { "epoch": 0.013136134189696148, "grad_norm": 8.679986000061035, "learning_rate": 1.9916808462486278e-05, "loss": 3.6055, "step": 2090 }, { "epoch": 0.013198986506393258, "grad_norm": 9.180718421936035, "learning_rate": 1.9916389361541622e-05, "loss": 3.873, "step": 2100 }, { "epoch": 0.01326183882309037, "grad_norm": 7.690215587615967, "learning_rate": 1.991597026059697e-05, "loss": 3.6968, "step": 2110 }, { "epoch": 0.01332469113978748, "grad_norm": 7.911571025848389, "learning_rate": 1.9915551159652316e-05, "loss": 3.8175, "step": 2120 }, { "epoch": 0.013387543456484591, "grad_norm": 8.48082447052002, "learning_rate": 1.9915132058707663e-05, "loss": 3.6924, "step": 2130 }, { "epoch": 0.013450395773181701, "grad_norm": 7.75557279586792, "learning_rate": 1.991471295776301e-05, "loss": 3.6379, "step": 2140 }, { "epoch": 0.013513248089878813, "grad_norm": 7.316931247711182, "learning_rate": 1.9914293856818354e-05, "loss": 3.6726, "step": 2150 }, { "epoch": 0.013576100406575923, "grad_norm": 8.54746150970459, "learning_rate": 1.99138747558737e-05, "loss": 3.6168, "step": 2160 }, { "epoch": 0.013638952723273035, "grad_norm": 7.163753032684326, "learning_rate": 1.9913455654929048e-05, "loss": 3.4964, "step": 2170 }, { "epoch": 0.013701805039970144, "grad_norm": 6.571826457977295, "learning_rate": 1.9913036553984395e-05, "loss": 3.7582, "step": 2180 }, { "epoch": 0.013764657356667256, "grad_norm": 8.665351867675781, "learning_rate": 1.991261745303974e-05, "loss": 3.5776, "step": 2190 }, { "epoch": 0.013827509673364366, "grad_norm": 7.472128391265869, "learning_rate": 1.9912198352095086e-05, "loss": 3.563, "step": 2200 }, { "epoch": 0.013890361990061478, "grad_norm": 8.150984764099121, "learning_rate": 1.9911779251150433e-05, "loss": 3.4579, "step": 2210 }, { "epoch": 0.013953214306758588, "grad_norm": 7.981696128845215, "learning_rate": 1.991136015020578e-05, "loss": 3.3768, "step": 2220 }, { "epoch": 0.0140160666234557, "grad_norm": 8.98642635345459, "learning_rate": 1.9910941049261127e-05, "loss": 3.6364, "step": 2230 }, { "epoch": 0.014078918940152809, "grad_norm": 7.533083438873291, "learning_rate": 1.991052194831647e-05, "loss": 3.714, "step": 2240 }, { "epoch": 0.01414177125684992, "grad_norm": 7.571574687957764, "learning_rate": 1.9910102847371818e-05, "loss": 3.5226, "step": 2250 }, { "epoch": 0.01420462357354703, "grad_norm": 7.607986927032471, "learning_rate": 1.9909683746427165e-05, "loss": 3.5229, "step": 2260 }, { "epoch": 0.014267475890244142, "grad_norm": 8.505463600158691, "learning_rate": 1.9909264645482512e-05, "loss": 3.5028, "step": 2270 }, { "epoch": 0.014330328206941252, "grad_norm": 8.641060829162598, "learning_rate": 1.990884554453786e-05, "loss": 3.6277, "step": 2280 }, { "epoch": 0.014393180523638364, "grad_norm": 7.744467258453369, "learning_rate": 1.9908426443593206e-05, "loss": 3.5696, "step": 2290 }, { "epoch": 0.014456032840335474, "grad_norm": 7.258744716644287, "learning_rate": 1.9908007342648553e-05, "loss": 3.7013, "step": 2300 }, { "epoch": 0.014518885157032585, "grad_norm": 7.771141052246094, "learning_rate": 1.99075882417039e-05, "loss": 3.5281, "step": 2310 }, { "epoch": 0.014581737473729695, "grad_norm": 9.042888641357422, "learning_rate": 1.9907169140759247e-05, "loss": 3.365, "step": 2320 }, { "epoch": 0.014644589790426807, "grad_norm": 8.447803497314453, "learning_rate": 1.990675003981459e-05, "loss": 3.6213, "step": 2330 }, { "epoch": 0.014707442107123917, "grad_norm": 7.852933406829834, "learning_rate": 1.9906330938869938e-05, "loss": 3.4386, "step": 2340 }, { "epoch": 0.014770294423821028, "grad_norm": 6.793748378753662, "learning_rate": 1.9905911837925285e-05, "loss": 3.5708, "step": 2350 }, { "epoch": 0.014833146740518138, "grad_norm": 9.815299987792969, "learning_rate": 1.9905492736980632e-05, "loss": 3.4692, "step": 2360 }, { "epoch": 0.01489599905721525, "grad_norm": 8.411046981811523, "learning_rate": 1.9905073636035976e-05, "loss": 3.4869, "step": 2370 }, { "epoch": 0.01495885137391236, "grad_norm": 8.23367977142334, "learning_rate": 1.9904654535091323e-05, "loss": 3.4829, "step": 2380 }, { "epoch": 0.015021703690609471, "grad_norm": 7.69027042388916, "learning_rate": 1.990423543414667e-05, "loss": 3.6509, "step": 2390 }, { "epoch": 0.015084556007306581, "grad_norm": 7.956418514251709, "learning_rate": 1.9903816333202017e-05, "loss": 3.4739, "step": 2400 }, { "epoch": 0.015147408324003693, "grad_norm": 8.069219589233398, "learning_rate": 1.990339723225736e-05, "loss": 3.345, "step": 2410 }, { "epoch": 0.015210260640700803, "grad_norm": 7.235134124755859, "learning_rate": 1.9902978131312708e-05, "loss": 3.4255, "step": 2420 }, { "epoch": 0.015273112957397915, "grad_norm": 7.432069301605225, "learning_rate": 1.9902559030368055e-05, "loss": 3.4666, "step": 2430 }, { "epoch": 0.015335965274095024, "grad_norm": 7.206005573272705, "learning_rate": 1.9902139929423402e-05, "loss": 3.5934, "step": 2440 }, { "epoch": 0.015398817590792136, "grad_norm": 9.905259132385254, "learning_rate": 1.990172082847875e-05, "loss": 3.6262, "step": 2450 }, { "epoch": 0.015461669907489246, "grad_norm": 8.01630973815918, "learning_rate": 1.9901301727534096e-05, "loss": 3.5275, "step": 2460 }, { "epoch": 0.015524522224186358, "grad_norm": 7.935608863830566, "learning_rate": 1.9900882626589443e-05, "loss": 3.5331, "step": 2470 }, { "epoch": 0.015587374540883468, "grad_norm": 8.589484214782715, "learning_rate": 1.990046352564479e-05, "loss": 3.614, "step": 2480 }, { "epoch": 0.01565022685758058, "grad_norm": 8.494813919067383, "learning_rate": 1.9900044424700134e-05, "loss": 3.4546, "step": 2490 }, { "epoch": 0.01571307917427769, "grad_norm": 6.927222728729248, "learning_rate": 1.989962532375548e-05, "loss": 3.4779, "step": 2500 }, { "epoch": 0.0157759314909748, "grad_norm": 6.986412048339844, "learning_rate": 1.9899206222810828e-05, "loss": 3.643, "step": 2510 }, { "epoch": 0.01583878380767191, "grad_norm": 7.346634864807129, "learning_rate": 1.9898787121866175e-05, "loss": 3.547, "step": 2520 }, { "epoch": 0.015901636124369022, "grad_norm": 8.32447624206543, "learning_rate": 1.9898368020921522e-05, "loss": 3.5548, "step": 2530 }, { "epoch": 0.015964488441066134, "grad_norm": 8.137677192687988, "learning_rate": 1.989794891997687e-05, "loss": 3.4738, "step": 2540 }, { "epoch": 0.016027340757763242, "grad_norm": 9.068408012390137, "learning_rate": 1.9897529819032213e-05, "loss": 3.3477, "step": 2550 }, { "epoch": 0.016090193074460354, "grad_norm": 8.339146614074707, "learning_rate": 1.989711071808756e-05, "loss": 3.6623, "step": 2560 }, { "epoch": 0.016153045391157465, "grad_norm": 8.223811149597168, "learning_rate": 1.9896691617142907e-05, "loss": 3.5537, "step": 2570 }, { "epoch": 0.016215897707854577, "grad_norm": 9.707834243774414, "learning_rate": 1.9896272516198254e-05, "loss": 3.6568, "step": 2580 }, { "epoch": 0.016278750024551685, "grad_norm": 7.555861473083496, "learning_rate": 1.9895853415253598e-05, "loss": 3.5347, "step": 2590 }, { "epoch": 0.016341602341248797, "grad_norm": 9.049849510192871, "learning_rate": 1.9895434314308945e-05, "loss": 3.6334, "step": 2600 }, { "epoch": 0.01640445465794591, "grad_norm": 7.760476589202881, "learning_rate": 1.9895015213364292e-05, "loss": 3.483, "step": 2610 }, { "epoch": 0.01646730697464302, "grad_norm": 8.10847282409668, "learning_rate": 1.989459611241964e-05, "loss": 3.6302, "step": 2620 }, { "epoch": 0.016530159291340128, "grad_norm": 8.256227493286133, "learning_rate": 1.9894177011474986e-05, "loss": 3.7809, "step": 2630 }, { "epoch": 0.01659301160803724, "grad_norm": 7.8532915115356445, "learning_rate": 1.989375791053033e-05, "loss": 3.518, "step": 2640 }, { "epoch": 0.01665586392473435, "grad_norm": 8.061572074890137, "learning_rate": 1.9893338809585677e-05, "loss": 3.4883, "step": 2650 }, { "epoch": 0.016718716241431463, "grad_norm": 7.70623254776001, "learning_rate": 1.9892919708641024e-05, "loss": 3.573, "step": 2660 }, { "epoch": 0.01678156855812857, "grad_norm": 7.417579174041748, "learning_rate": 1.989250060769637e-05, "loss": 3.4534, "step": 2670 }, { "epoch": 0.016844420874825683, "grad_norm": 7.6423020362854, "learning_rate": 1.9892081506751718e-05, "loss": 3.4455, "step": 2680 }, { "epoch": 0.016907273191522795, "grad_norm": 9.982197761535645, "learning_rate": 1.9891662405807065e-05, "loss": 3.4562, "step": 2690 }, { "epoch": 0.016970125508219906, "grad_norm": 7.2399444580078125, "learning_rate": 1.9891243304862412e-05, "loss": 3.4619, "step": 2700 }, { "epoch": 0.017032977824917014, "grad_norm": 7.721668243408203, "learning_rate": 1.989082420391776e-05, "loss": 3.334, "step": 2710 }, { "epoch": 0.017095830141614126, "grad_norm": 7.352929592132568, "learning_rate": 1.9890405102973103e-05, "loss": 3.4966, "step": 2720 }, { "epoch": 0.017158682458311238, "grad_norm": 7.685946941375732, "learning_rate": 1.988998600202845e-05, "loss": 3.2705, "step": 2730 }, { "epoch": 0.017221534775008346, "grad_norm": 8.082391738891602, "learning_rate": 1.9889566901083797e-05, "loss": 3.3639, "step": 2740 }, { "epoch": 0.017284387091705457, "grad_norm": 7.36880350112915, "learning_rate": 1.9889147800139144e-05, "loss": 3.362, "step": 2750 }, { "epoch": 0.01734723940840257, "grad_norm": 8.787617683410645, "learning_rate": 1.988872869919449e-05, "loss": 3.4222, "step": 2760 }, { "epoch": 0.01741009172509968, "grad_norm": 8.28075122833252, "learning_rate": 1.9888309598249835e-05, "loss": 3.5957, "step": 2770 }, { "epoch": 0.01747294404179679, "grad_norm": 9.33713436126709, "learning_rate": 1.9887890497305182e-05, "loss": 3.4035, "step": 2780 }, { "epoch": 0.0175357963584939, "grad_norm": 8.47854995727539, "learning_rate": 1.988747139636053e-05, "loss": 3.4141, "step": 2790 }, { "epoch": 0.017598648675191012, "grad_norm": 6.677530288696289, "learning_rate": 1.9887052295415876e-05, "loss": 3.389, "step": 2800 }, { "epoch": 0.017661500991888124, "grad_norm": 8.675537109375, "learning_rate": 1.988663319447122e-05, "loss": 3.3471, "step": 2810 }, { "epoch": 0.017724353308585232, "grad_norm": 9.454487800598145, "learning_rate": 1.9886214093526567e-05, "loss": 3.6361, "step": 2820 }, { "epoch": 0.017787205625282344, "grad_norm": 7.696407318115234, "learning_rate": 1.9885794992581914e-05, "loss": 3.3272, "step": 2830 }, { "epoch": 0.017850057941979455, "grad_norm": 8.311444282531738, "learning_rate": 1.988537589163726e-05, "loss": 3.2998, "step": 2840 }, { "epoch": 0.017912910258676567, "grad_norm": 7.864684104919434, "learning_rate": 1.9884956790692608e-05, "loss": 3.5902, "step": 2850 }, { "epoch": 0.017975762575373675, "grad_norm": 7.836400985717773, "learning_rate": 1.9884537689747955e-05, "loss": 3.4457, "step": 2860 }, { "epoch": 0.018038614892070787, "grad_norm": 7.825150966644287, "learning_rate": 1.98841185888033e-05, "loss": 3.6025, "step": 2870 }, { "epoch": 0.0181014672087679, "grad_norm": 8.043344497680664, "learning_rate": 1.9883699487858646e-05, "loss": 3.3072, "step": 2880 }, { "epoch": 0.01816431952546501, "grad_norm": 8.412161827087402, "learning_rate": 1.9883280386913993e-05, "loss": 3.4438, "step": 2890 }, { "epoch": 0.018227171842162118, "grad_norm": 8.524395942687988, "learning_rate": 1.988286128596934e-05, "loss": 3.2877, "step": 2900 }, { "epoch": 0.01829002415885923, "grad_norm": 8.123482704162598, "learning_rate": 1.9882442185024687e-05, "loss": 3.2552, "step": 2910 }, { "epoch": 0.01835287647555634, "grad_norm": 8.951177597045898, "learning_rate": 1.9882023084080034e-05, "loss": 3.5767, "step": 2920 }, { "epoch": 0.018415728792253453, "grad_norm": 7.002005100250244, "learning_rate": 1.988160398313538e-05, "loss": 3.4357, "step": 2930 }, { "epoch": 0.01847858110895056, "grad_norm": 8.552858352661133, "learning_rate": 1.988118488219073e-05, "loss": 3.5486, "step": 2940 }, { "epoch": 0.018541433425647673, "grad_norm": 8.0409517288208, "learning_rate": 1.9880765781246072e-05, "loss": 3.4463, "step": 2950 }, { "epoch": 0.018604285742344785, "grad_norm": 8.174979209899902, "learning_rate": 1.988034668030142e-05, "loss": 3.3729, "step": 2960 }, { "epoch": 0.018667138059041896, "grad_norm": 8.62437915802002, "learning_rate": 1.9879927579356766e-05, "loss": 3.5334, "step": 2970 }, { "epoch": 0.018729990375739004, "grad_norm": 7.793143272399902, "learning_rate": 1.9879508478412113e-05, "loss": 3.3313, "step": 2980 }, { "epoch": 0.018792842692436116, "grad_norm": 7.714635372161865, "learning_rate": 1.9879089377467457e-05, "loss": 3.573, "step": 2990 }, { "epoch": 0.018855695009133228, "grad_norm": 8.211463928222656, "learning_rate": 1.9878670276522804e-05, "loss": 3.4991, "step": 3000 }, { "epoch": 0.01891854732583034, "grad_norm": 7.586797714233398, "learning_rate": 1.987825117557815e-05, "loss": 3.5643, "step": 3010 }, { "epoch": 0.018981399642527447, "grad_norm": 7.573616027832031, "learning_rate": 1.9877832074633498e-05, "loss": 3.6384, "step": 3020 }, { "epoch": 0.01904425195922456, "grad_norm": 7.321767807006836, "learning_rate": 1.9877412973688842e-05, "loss": 3.3316, "step": 3030 }, { "epoch": 0.01910710427592167, "grad_norm": 8.411580085754395, "learning_rate": 1.987699387274419e-05, "loss": 3.2568, "step": 3040 }, { "epoch": 0.019169956592618782, "grad_norm": 9.95434284210205, "learning_rate": 1.9876574771799536e-05, "loss": 3.3447, "step": 3050 }, { "epoch": 0.01923280890931589, "grad_norm": 7.271300315856934, "learning_rate": 1.9876155670854883e-05, "loss": 3.1763, "step": 3060 }, { "epoch": 0.019295661226013002, "grad_norm": 6.9696502685546875, "learning_rate": 1.987573656991023e-05, "loss": 3.4721, "step": 3070 }, { "epoch": 0.019358513542710114, "grad_norm": 11.968955039978027, "learning_rate": 1.9875317468965577e-05, "loss": 3.4949, "step": 3080 }, { "epoch": 0.019421365859407225, "grad_norm": 7.978536605834961, "learning_rate": 1.9874898368020924e-05, "loss": 3.484, "step": 3090 }, { "epoch": 0.019484218176104334, "grad_norm": 8.469392776489258, "learning_rate": 1.987447926707627e-05, "loss": 3.4285, "step": 3100 }, { "epoch": 0.019547070492801445, "grad_norm": 8.721917152404785, "learning_rate": 1.987406016613162e-05, "loss": 3.5232, "step": 3110 }, { "epoch": 0.019609922809498557, "grad_norm": 7.684720516204834, "learning_rate": 1.9873641065186962e-05, "loss": 3.4044, "step": 3120 }, { "epoch": 0.01967277512619567, "grad_norm": 9.374492645263672, "learning_rate": 1.987322196424231e-05, "loss": 3.3665, "step": 3130 }, { "epoch": 0.019735627442892777, "grad_norm": 8.347115516662598, "learning_rate": 1.9872802863297656e-05, "loss": 3.3971, "step": 3140 }, { "epoch": 0.01979847975958989, "grad_norm": 9.327836036682129, "learning_rate": 1.9872383762353003e-05, "loss": 3.2726, "step": 3150 }, { "epoch": 0.019861332076287, "grad_norm": 9.030014038085938, "learning_rate": 1.987196466140835e-05, "loss": 3.3135, "step": 3160 }, { "epoch": 0.01992418439298411, "grad_norm": 8.689590454101562, "learning_rate": 1.9871545560463694e-05, "loss": 3.2192, "step": 3170 }, { "epoch": 0.01998703670968122, "grad_norm": 7.588329792022705, "learning_rate": 1.987112645951904e-05, "loss": 3.4882, "step": 3180 }, { "epoch": 0.02004988902637833, "grad_norm": 7.4032392501831055, "learning_rate": 1.9870707358574388e-05, "loss": 3.4922, "step": 3190 }, { "epoch": 0.020112741343075443, "grad_norm": 7.394667625427246, "learning_rate": 1.9870288257629735e-05, "loss": 3.3366, "step": 3200 }, { "epoch": 0.020175593659772555, "grad_norm": 8.536340713500977, "learning_rate": 1.986986915668508e-05, "loss": 3.5691, "step": 3210 }, { "epoch": 0.020238445976469663, "grad_norm": 7.568734169006348, "learning_rate": 1.9869450055740426e-05, "loss": 3.2338, "step": 3220 }, { "epoch": 0.020301298293166774, "grad_norm": 8.91421127319336, "learning_rate": 1.9869030954795773e-05, "loss": 3.3526, "step": 3230 }, { "epoch": 0.020364150609863886, "grad_norm": 8.39038372039795, "learning_rate": 1.986861185385112e-05, "loss": 3.3413, "step": 3240 }, { "epoch": 0.020427002926560998, "grad_norm": 11.154706954956055, "learning_rate": 1.9868192752906467e-05, "loss": 3.3824, "step": 3250 }, { "epoch": 0.020489855243258106, "grad_norm": 7.8558220863342285, "learning_rate": 1.986777365196181e-05, "loss": 3.3635, "step": 3260 }, { "epoch": 0.020552707559955218, "grad_norm": 10.399998664855957, "learning_rate": 1.9867354551017158e-05, "loss": 3.4351, "step": 3270 }, { "epoch": 0.02061555987665233, "grad_norm": 9.94041919708252, "learning_rate": 1.9866935450072505e-05, "loss": 3.4726, "step": 3280 }, { "epoch": 0.02067841219334944, "grad_norm": 7.788326740264893, "learning_rate": 1.9866516349127852e-05, "loss": 3.4651, "step": 3290 }, { "epoch": 0.02074126451004655, "grad_norm": 6.855205059051514, "learning_rate": 1.98660972481832e-05, "loss": 3.0607, "step": 3300 }, { "epoch": 0.02080411682674366, "grad_norm": 7.5319671630859375, "learning_rate": 1.9865678147238546e-05, "loss": 3.4578, "step": 3310 }, { "epoch": 0.020866969143440772, "grad_norm": 8.263099670410156, "learning_rate": 1.9865259046293893e-05, "loss": 3.4431, "step": 3320 }, { "epoch": 0.020929821460137884, "grad_norm": 7.396290302276611, "learning_rate": 1.986483994534924e-05, "loss": 3.3876, "step": 3330 }, { "epoch": 0.020992673776834992, "grad_norm": 8.584406852722168, "learning_rate": 1.9864420844404584e-05, "loss": 3.088, "step": 3340 }, { "epoch": 0.021055526093532104, "grad_norm": 8.801658630371094, "learning_rate": 1.986400174345993e-05, "loss": 3.147, "step": 3350 }, { "epoch": 0.021118378410229215, "grad_norm": 8.548176765441895, "learning_rate": 1.9863582642515278e-05, "loss": 3.2649, "step": 3360 }, { "epoch": 0.021181230726926327, "grad_norm": 7.954073905944824, "learning_rate": 1.9863163541570625e-05, "loss": 3.2076, "step": 3370 }, { "epoch": 0.021244083043623435, "grad_norm": 8.710152626037598, "learning_rate": 1.9862744440625972e-05, "loss": 3.4097, "step": 3380 }, { "epoch": 0.021306935360320547, "grad_norm": 8.620429992675781, "learning_rate": 1.9862325339681316e-05, "loss": 3.5115, "step": 3390 }, { "epoch": 0.02136978767701766, "grad_norm": 8.825028419494629, "learning_rate": 1.9861906238736663e-05, "loss": 3.4718, "step": 3400 }, { "epoch": 0.02143263999371477, "grad_norm": 7.917201995849609, "learning_rate": 1.986148713779201e-05, "loss": 3.3384, "step": 3410 }, { "epoch": 0.021495492310411878, "grad_norm": 7.338304042816162, "learning_rate": 1.9861068036847357e-05, "loss": 3.0676, "step": 3420 }, { "epoch": 0.02155834462710899, "grad_norm": 7.712886810302734, "learning_rate": 1.98606489359027e-05, "loss": 3.3085, "step": 3430 }, { "epoch": 0.0216211969438061, "grad_norm": 8.4638032913208, "learning_rate": 1.9860229834958048e-05, "loss": 3.3793, "step": 3440 }, { "epoch": 0.02168404926050321, "grad_norm": 8.804676055908203, "learning_rate": 1.9859810734013395e-05, "loss": 3.5193, "step": 3450 }, { "epoch": 0.02174690157720032, "grad_norm": 7.220158100128174, "learning_rate": 1.9859391633068742e-05, "loss": 3.3284, "step": 3460 }, { "epoch": 0.021809753893897433, "grad_norm": 6.993019104003906, "learning_rate": 1.985897253212409e-05, "loss": 3.3708, "step": 3470 }, { "epoch": 0.021872606210594545, "grad_norm": 8.048100471496582, "learning_rate": 1.9858553431179436e-05, "loss": 3.5168, "step": 3480 }, { "epoch": 0.021935458527291653, "grad_norm": 7.993709564208984, "learning_rate": 1.9858134330234783e-05, "loss": 3.2929, "step": 3490 }, { "epoch": 0.021998310843988764, "grad_norm": 7.402252674102783, "learning_rate": 1.9857715229290127e-05, "loss": 3.438, "step": 3500 }, { "epoch": 0.022061163160685876, "grad_norm": 8.188456535339355, "learning_rate": 1.9857296128345474e-05, "loss": 3.5718, "step": 3510 }, { "epoch": 0.022124015477382988, "grad_norm": 8.500411987304688, "learning_rate": 1.985687702740082e-05, "loss": 3.2535, "step": 3520 }, { "epoch": 0.022186867794080096, "grad_norm": 7.373854160308838, "learning_rate": 1.985645792645617e-05, "loss": 3.3569, "step": 3530 }, { "epoch": 0.022249720110777207, "grad_norm": 9.229218482971191, "learning_rate": 1.9856038825511515e-05, "loss": 3.4243, "step": 3540 }, { "epoch": 0.02231257242747432, "grad_norm": 8.59708309173584, "learning_rate": 1.9855619724566862e-05, "loss": 3.354, "step": 3550 }, { "epoch": 0.02237542474417143, "grad_norm": 7.8422417640686035, "learning_rate": 1.985520062362221e-05, "loss": 3.236, "step": 3560 }, { "epoch": 0.02243827706086854, "grad_norm": 6.979317665100098, "learning_rate": 1.9854781522677553e-05, "loss": 3.2229, "step": 3570 }, { "epoch": 0.02250112937756565, "grad_norm": 8.221169471740723, "learning_rate": 1.98543624217329e-05, "loss": 3.315, "step": 3580 }, { "epoch": 0.022563981694262762, "grad_norm": 9.011247634887695, "learning_rate": 1.9853943320788247e-05, "loss": 3.3895, "step": 3590 }, { "epoch": 0.022626834010959874, "grad_norm": 7.5295915603637695, "learning_rate": 1.9853524219843594e-05, "loss": 3.3018, "step": 3600 }, { "epoch": 0.022689686327656982, "grad_norm": 8.486215591430664, "learning_rate": 1.9853105118898938e-05, "loss": 3.2546, "step": 3610 }, { "epoch": 0.022752538644354094, "grad_norm": 6.862631320953369, "learning_rate": 1.9852686017954285e-05, "loss": 3.2158, "step": 3620 }, { "epoch": 0.022815390961051205, "grad_norm": 10.961071968078613, "learning_rate": 1.9852266917009632e-05, "loss": 3.5264, "step": 3630 }, { "epoch": 0.022878243277748317, "grad_norm": 7.940339088439941, "learning_rate": 1.985184781606498e-05, "loss": 3.3097, "step": 3640 }, { "epoch": 0.022941095594445425, "grad_norm": 8.660905838012695, "learning_rate": 1.9851428715120323e-05, "loss": 3.2254, "step": 3650 }, { "epoch": 0.023003947911142537, "grad_norm": 7.902162075042725, "learning_rate": 1.985100961417567e-05, "loss": 3.3875, "step": 3660 }, { "epoch": 0.02306680022783965, "grad_norm": 8.279126167297363, "learning_rate": 1.9850590513231017e-05, "loss": 3.3775, "step": 3670 }, { "epoch": 0.02312965254453676, "grad_norm": 7.69015645980835, "learning_rate": 1.9850171412286364e-05, "loss": 3.1918, "step": 3680 }, { "epoch": 0.023192504861233868, "grad_norm": 14.095693588256836, "learning_rate": 1.984975231134171e-05, "loss": 3.1605, "step": 3690 }, { "epoch": 0.02325535717793098, "grad_norm": 8.890153884887695, "learning_rate": 1.984933321039706e-05, "loss": 3.3755, "step": 3700 }, { "epoch": 0.02331820949462809, "grad_norm": 8.48962116241455, "learning_rate": 1.9848914109452405e-05, "loss": 3.3708, "step": 3710 }, { "epoch": 0.023381061811325203, "grad_norm": 8.391972541809082, "learning_rate": 1.9848495008507753e-05, "loss": 3.4301, "step": 3720 }, { "epoch": 0.02344391412802231, "grad_norm": 5.6870951652526855, "learning_rate": 1.98480759075631e-05, "loss": 3.048, "step": 3730 }, { "epoch": 0.023506766444719423, "grad_norm": 7.735255241394043, "learning_rate": 1.9847656806618443e-05, "loss": 3.1884, "step": 3740 }, { "epoch": 0.023569618761416535, "grad_norm": 116.45984649658203, "learning_rate": 1.984723770567379e-05, "loss": 3.4161, "step": 3750 }, { "epoch": 0.023632471078113646, "grad_norm": 8.070536613464355, "learning_rate": 1.9846818604729137e-05, "loss": 3.0678, "step": 3760 }, { "epoch": 0.023695323394810754, "grad_norm": 7.883828163146973, "learning_rate": 1.9846399503784484e-05, "loss": 3.2295, "step": 3770 }, { "epoch": 0.023758175711507866, "grad_norm": 9.798192977905273, "learning_rate": 1.984598040283983e-05, "loss": 3.2163, "step": 3780 }, { "epoch": 0.023821028028204978, "grad_norm": 7.705380439758301, "learning_rate": 1.9845561301895175e-05, "loss": 3.5051, "step": 3790 }, { "epoch": 0.02388388034490209, "grad_norm": 8.782146453857422, "learning_rate": 1.9845142200950522e-05, "loss": 3.1344, "step": 3800 }, { "epoch": 0.023946732661599197, "grad_norm": 8.779561042785645, "learning_rate": 1.984472310000587e-05, "loss": 3.0366, "step": 3810 }, { "epoch": 0.02400958497829631, "grad_norm": 8.433015823364258, "learning_rate": 1.9844303999061216e-05, "loss": 3.3192, "step": 3820 }, { "epoch": 0.02407243729499342, "grad_norm": 8.250164031982422, "learning_rate": 1.984388489811656e-05, "loss": 3.0157, "step": 3830 }, { "epoch": 0.024135289611690532, "grad_norm": 7.675574779510498, "learning_rate": 1.9843465797171907e-05, "loss": 3.3224, "step": 3840 }, { "epoch": 0.02419814192838764, "grad_norm": 7.564833641052246, "learning_rate": 1.9843046696227254e-05, "loss": 3.3004, "step": 3850 }, { "epoch": 0.024260994245084752, "grad_norm": 7.947770595550537, "learning_rate": 1.98426275952826e-05, "loss": 3.1889, "step": 3860 }, { "epoch": 0.024323846561781864, "grad_norm": 7.688364028930664, "learning_rate": 1.984220849433795e-05, "loss": 3.1756, "step": 3870 }, { "epoch": 0.024386698878478975, "grad_norm": 8.064852714538574, "learning_rate": 1.9841789393393292e-05, "loss": 3.2235, "step": 3880 }, { "epoch": 0.024449551195176084, "grad_norm": 8.496567726135254, "learning_rate": 1.984137029244864e-05, "loss": 3.2349, "step": 3890 }, { "epoch": 0.024512403511873195, "grad_norm": 7.16607141494751, "learning_rate": 1.9840951191503986e-05, "loss": 2.9673, "step": 3900 }, { "epoch": 0.024575255828570307, "grad_norm": 8.018267631530762, "learning_rate": 1.9840532090559333e-05, "loss": 3.3146, "step": 3910 }, { "epoch": 0.02463810814526742, "grad_norm": 7.622980117797852, "learning_rate": 1.984011298961468e-05, "loss": 3.2143, "step": 3920 }, { "epoch": 0.024700960461964527, "grad_norm": 9.034892082214355, "learning_rate": 1.9839693888670027e-05, "loss": 3.1585, "step": 3930 }, { "epoch": 0.02476381277866164, "grad_norm": 8.77389144897461, "learning_rate": 1.9839274787725375e-05, "loss": 3.362, "step": 3940 }, { "epoch": 0.02482666509535875, "grad_norm": 8.3350191116333, "learning_rate": 1.983885568678072e-05, "loss": 2.8545, "step": 3950 }, { "epoch": 0.02488951741205586, "grad_norm": 8.177921295166016, "learning_rate": 1.9838436585836065e-05, "loss": 3.0485, "step": 3960 }, { "epoch": 0.02495236972875297, "grad_norm": 7.447956562042236, "learning_rate": 1.9838017484891412e-05, "loss": 3.2264, "step": 3970 }, { "epoch": 0.02501522204545008, "grad_norm": 9.212813377380371, "learning_rate": 1.983759838394676e-05, "loss": 3.167, "step": 3980 }, { "epoch": 0.025078074362147193, "grad_norm": 7.669730186462402, "learning_rate": 1.9837179283002106e-05, "loss": 3.1319, "step": 3990 }, { "epoch": 0.025140926678844305, "grad_norm": 9.09615707397461, "learning_rate": 1.9836760182057454e-05, "loss": 3.2663, "step": 4000 }, { "epoch": 0.025203778995541413, "grad_norm": 8.853797912597656, "learning_rate": 1.9836341081112797e-05, "loss": 3.4463, "step": 4010 }, { "epoch": 0.025266631312238524, "grad_norm": 10.12424087524414, "learning_rate": 1.9835921980168144e-05, "loss": 3.236, "step": 4020 }, { "epoch": 0.025329483628935636, "grad_norm": 7.712778568267822, "learning_rate": 1.983550287922349e-05, "loss": 3.281, "step": 4030 }, { "epoch": 0.025392335945632748, "grad_norm": 7.615508556365967, "learning_rate": 1.983508377827884e-05, "loss": 3.1491, "step": 4040 }, { "epoch": 0.025455188262329856, "grad_norm": 8.383688926696777, "learning_rate": 1.9834664677334182e-05, "loss": 3.1655, "step": 4050 }, { "epoch": 0.025518040579026968, "grad_norm": 6.64720344543457, "learning_rate": 1.983424557638953e-05, "loss": 3.2549, "step": 4060 }, { "epoch": 0.02558089289572408, "grad_norm": 8.25935173034668, "learning_rate": 1.9833826475444876e-05, "loss": 3.1786, "step": 4070 }, { "epoch": 0.02564374521242119, "grad_norm": 7.64882755279541, "learning_rate": 1.9833407374500223e-05, "loss": 3.1456, "step": 4080 }, { "epoch": 0.0257065975291183, "grad_norm": 7.684487819671631, "learning_rate": 1.983298827355557e-05, "loss": 3.1528, "step": 4090 }, { "epoch": 0.02576944984581541, "grad_norm": 7.812594413757324, "learning_rate": 1.9832569172610917e-05, "loss": 3.2666, "step": 4100 }, { "epoch": 0.025832302162512522, "grad_norm": 7.969986915588379, "learning_rate": 1.9832150071666265e-05, "loss": 3.2326, "step": 4110 }, { "epoch": 0.02589515447920963, "grad_norm": 7.349579334259033, "learning_rate": 1.9831730970721608e-05, "loss": 3.0932, "step": 4120 }, { "epoch": 0.025958006795906742, "grad_norm": 8.839384078979492, "learning_rate": 1.9831311869776955e-05, "loss": 3.2192, "step": 4130 }, { "epoch": 0.026020859112603854, "grad_norm": 7.78971004486084, "learning_rate": 1.9830892768832302e-05, "loss": 3.154, "step": 4140 }, { "epoch": 0.026083711429300965, "grad_norm": 7.024759769439697, "learning_rate": 1.983047366788765e-05, "loss": 3.1585, "step": 4150 }, { "epoch": 0.026146563745998073, "grad_norm": 8.18656063079834, "learning_rate": 1.9830054566942997e-05, "loss": 3.2396, "step": 4160 }, { "epoch": 0.026209416062695185, "grad_norm": 7.245763778686523, "learning_rate": 1.9829635465998344e-05, "loss": 3.1889, "step": 4170 }, { "epoch": 0.026272268379392297, "grad_norm": 9.72729206085205, "learning_rate": 1.982921636505369e-05, "loss": 3.2638, "step": 4180 }, { "epoch": 0.02633512069608941, "grad_norm": 7.4755401611328125, "learning_rate": 1.9828797264109034e-05, "loss": 3.2889, "step": 4190 }, { "epoch": 0.026397973012786517, "grad_norm": 8.020759582519531, "learning_rate": 1.982837816316438e-05, "loss": 3.0979, "step": 4200 }, { "epoch": 0.026460825329483628, "grad_norm": 8.661406517028809, "learning_rate": 1.982795906221973e-05, "loss": 3.1325, "step": 4210 }, { "epoch": 0.02652367764618074, "grad_norm": 8.634723663330078, "learning_rate": 1.9827539961275076e-05, "loss": 3.4406, "step": 4220 }, { "epoch": 0.02658652996287785, "grad_norm": 8.003168106079102, "learning_rate": 1.982712086033042e-05, "loss": 3.0698, "step": 4230 }, { "epoch": 0.02664938227957496, "grad_norm": 7.931961536407471, "learning_rate": 1.9826701759385766e-05, "loss": 3.2705, "step": 4240 }, { "epoch": 0.02671223459627207, "grad_norm": 9.693182945251465, "learning_rate": 1.9826282658441113e-05, "loss": 3.346, "step": 4250 }, { "epoch": 0.026775086912969183, "grad_norm": 7.78812837600708, "learning_rate": 1.982586355749646e-05, "loss": 3.1529, "step": 4260 }, { "epoch": 0.026837939229666295, "grad_norm": 9.524435043334961, "learning_rate": 1.9825444456551804e-05, "loss": 3.1564, "step": 4270 }, { "epoch": 0.026900791546363403, "grad_norm": 7.616755485534668, "learning_rate": 1.982502535560715e-05, "loss": 3.0974, "step": 4280 }, { "epoch": 0.026963643863060514, "grad_norm": 6.952274799346924, "learning_rate": 1.9824606254662498e-05, "loss": 2.9486, "step": 4290 }, { "epoch": 0.027026496179757626, "grad_norm": 9.390430450439453, "learning_rate": 1.9824187153717845e-05, "loss": 3.1375, "step": 4300 }, { "epoch": 0.027089348496454738, "grad_norm": 8.931705474853516, "learning_rate": 1.9823768052773192e-05, "loss": 3.0853, "step": 4310 }, { "epoch": 0.027152200813151846, "grad_norm": 9.245275497436523, "learning_rate": 1.982334895182854e-05, "loss": 3.0416, "step": 4320 }, { "epoch": 0.027215053129848957, "grad_norm": 6.867707252502441, "learning_rate": 1.9822929850883887e-05, "loss": 2.9614, "step": 4330 }, { "epoch": 0.02727790544654607, "grad_norm": 7.120062351226807, "learning_rate": 1.9822510749939234e-05, "loss": 3.0628, "step": 4340 }, { "epoch": 0.02734075776324318, "grad_norm": 6.389726638793945, "learning_rate": 1.982209164899458e-05, "loss": 3.1971, "step": 4350 }, { "epoch": 0.02740361007994029, "grad_norm": 9.57553482055664, "learning_rate": 1.9821672548049924e-05, "loss": 3.0961, "step": 4360 }, { "epoch": 0.0274664623966374, "grad_norm": 7.604666233062744, "learning_rate": 1.982125344710527e-05, "loss": 2.9903, "step": 4370 }, { "epoch": 0.027529314713334512, "grad_norm": 7.997584342956543, "learning_rate": 1.982083434616062e-05, "loss": 3.1923, "step": 4380 }, { "epoch": 0.027592167030031624, "grad_norm": 8.558812141418457, "learning_rate": 1.9820415245215966e-05, "loss": 3.3072, "step": 4390 }, { "epoch": 0.027655019346728732, "grad_norm": 8.559096336364746, "learning_rate": 1.9819996144271313e-05, "loss": 2.8927, "step": 4400 }, { "epoch": 0.027717871663425844, "grad_norm": 8.305462837219238, "learning_rate": 1.9819577043326656e-05, "loss": 3.3332, "step": 4410 }, { "epoch": 0.027780723980122955, "grad_norm": 7.516127586364746, "learning_rate": 1.9819157942382003e-05, "loss": 3.2293, "step": 4420 }, { "epoch": 0.027843576296820067, "grad_norm": 13.237881660461426, "learning_rate": 1.981873884143735e-05, "loss": 3.0727, "step": 4430 }, { "epoch": 0.027906428613517175, "grad_norm": 7.675593376159668, "learning_rate": 1.9818319740492698e-05, "loss": 2.9193, "step": 4440 }, { "epoch": 0.027969280930214287, "grad_norm": 7.40706205368042, "learning_rate": 1.981790063954804e-05, "loss": 3.0294, "step": 4450 }, { "epoch": 0.0280321332469114, "grad_norm": 7.931587219238281, "learning_rate": 1.981748153860339e-05, "loss": 3.2119, "step": 4460 }, { "epoch": 0.02809498556360851, "grad_norm": 9.247722625732422, "learning_rate": 1.9817062437658735e-05, "loss": 3.2771, "step": 4470 }, { "epoch": 0.028157837880305618, "grad_norm": 8.768036842346191, "learning_rate": 1.9816643336714082e-05, "loss": 3.1498, "step": 4480 }, { "epoch": 0.02822069019700273, "grad_norm": 7.700189113616943, "learning_rate": 1.981622423576943e-05, "loss": 3.3009, "step": 4490 }, { "epoch": 0.02828354251369984, "grad_norm": 7.002710819244385, "learning_rate": 1.9815805134824773e-05, "loss": 2.8461, "step": 4500 }, { "epoch": 0.028346394830396953, "grad_norm": 7.769290447235107, "learning_rate": 1.981538603388012e-05, "loss": 3.0789, "step": 4510 }, { "epoch": 0.02840924714709406, "grad_norm": 7.786672115325928, "learning_rate": 1.9814966932935467e-05, "loss": 3.2034, "step": 4520 }, { "epoch": 0.028472099463791173, "grad_norm": 9.238251686096191, "learning_rate": 1.9814547831990814e-05, "loss": 3.0802, "step": 4530 }, { "epoch": 0.028534951780488284, "grad_norm": 9.059394836425781, "learning_rate": 1.981412873104616e-05, "loss": 3.2412, "step": 4540 }, { "epoch": 0.028597804097185396, "grad_norm": 7.74805212020874, "learning_rate": 1.981370963010151e-05, "loss": 2.9245, "step": 4550 }, { "epoch": 0.028660656413882504, "grad_norm": 7.382835388183594, "learning_rate": 1.9813290529156856e-05, "loss": 3.2211, "step": 4560 }, { "epoch": 0.028723508730579616, "grad_norm": 13.836868286132812, "learning_rate": 1.9812871428212203e-05, "loss": 3.1112, "step": 4570 }, { "epoch": 0.028786361047276728, "grad_norm": 6.769961357116699, "learning_rate": 1.9812452327267546e-05, "loss": 3.098, "step": 4580 }, { "epoch": 0.02884921336397384, "grad_norm": 7.997913360595703, "learning_rate": 1.9812033226322893e-05, "loss": 2.8804, "step": 4590 }, { "epoch": 0.028912065680670947, "grad_norm": 9.096884727478027, "learning_rate": 1.981161412537824e-05, "loss": 3.136, "step": 4600 }, { "epoch": 0.02897491799736806, "grad_norm": 7.878539562225342, "learning_rate": 1.9811195024433588e-05, "loss": 3.083, "step": 4610 }, { "epoch": 0.02903777031406517, "grad_norm": 7.577316761016846, "learning_rate": 1.9810775923488935e-05, "loss": 2.9114, "step": 4620 }, { "epoch": 0.029100622630762282, "grad_norm": 7.151461601257324, "learning_rate": 1.981035682254428e-05, "loss": 3.0634, "step": 4630 }, { "epoch": 0.02916347494745939, "grad_norm": 8.938619613647461, "learning_rate": 1.9809937721599625e-05, "loss": 3.0557, "step": 4640 }, { "epoch": 0.029226327264156502, "grad_norm": 7.312988758087158, "learning_rate": 1.9809518620654972e-05, "loss": 3.2038, "step": 4650 }, { "epoch": 0.029289179580853614, "grad_norm": 8.200956344604492, "learning_rate": 1.980909951971032e-05, "loss": 3.1253, "step": 4660 }, { "epoch": 0.029352031897550725, "grad_norm": 8.429890632629395, "learning_rate": 1.9808680418765663e-05, "loss": 2.8572, "step": 4670 }, { "epoch": 0.029414884214247834, "grad_norm": 7.834331035614014, "learning_rate": 1.980826131782101e-05, "loss": 3.2663, "step": 4680 }, { "epoch": 0.029477736530944945, "grad_norm": 8.754341125488281, "learning_rate": 1.9807842216876357e-05, "loss": 3.2611, "step": 4690 }, { "epoch": 0.029540588847642057, "grad_norm": 8.42916488647461, "learning_rate": 1.9807423115931704e-05, "loss": 3.2332, "step": 4700 }, { "epoch": 0.02960344116433917, "grad_norm": 7.737362384796143, "learning_rate": 1.980700401498705e-05, "loss": 2.9819, "step": 4710 }, { "epoch": 0.029666293481036277, "grad_norm": 9.505520820617676, "learning_rate": 1.98065849140424e-05, "loss": 3.0618, "step": 4720 }, { "epoch": 0.029729145797733388, "grad_norm": 9.08653450012207, "learning_rate": 1.9806165813097746e-05, "loss": 3.1651, "step": 4730 }, { "epoch": 0.0297919981144305, "grad_norm": 7.863708972930908, "learning_rate": 1.9805746712153093e-05, "loss": 3.077, "step": 4740 }, { "epoch": 0.02985485043112761, "grad_norm": 8.24854850769043, "learning_rate": 1.9805327611208436e-05, "loss": 3.1126, "step": 4750 }, { "epoch": 0.02991770274782472, "grad_norm": 8.799391746520996, "learning_rate": 1.9804908510263783e-05, "loss": 2.934, "step": 4760 }, { "epoch": 0.02998055506452183, "grad_norm": 9.366531372070312, "learning_rate": 1.980448940931913e-05, "loss": 2.8343, "step": 4770 }, { "epoch": 0.030043407381218943, "grad_norm": 8.352874755859375, "learning_rate": 1.9804070308374478e-05, "loss": 3.1882, "step": 4780 }, { "epoch": 0.030106259697916055, "grad_norm": 8.972452163696289, "learning_rate": 1.9803651207429825e-05, "loss": 3.0625, "step": 4790 }, { "epoch": 0.030169112014613163, "grad_norm": 7.474132061004639, "learning_rate": 1.9803232106485172e-05, "loss": 3.1211, "step": 4800 }, { "epoch": 0.030231964331310274, "grad_norm": 9.15546703338623, "learning_rate": 1.9802813005540515e-05, "loss": 3.0774, "step": 4810 }, { "epoch": 0.030294816648007386, "grad_norm": 8.463102340698242, "learning_rate": 1.9802393904595863e-05, "loss": 3.0315, "step": 4820 }, { "epoch": 0.030357668964704494, "grad_norm": 7.438838005065918, "learning_rate": 1.980197480365121e-05, "loss": 3.0363, "step": 4830 }, { "epoch": 0.030420521281401606, "grad_norm": 7.952330112457275, "learning_rate": 1.9801555702706557e-05, "loss": 3.078, "step": 4840 }, { "epoch": 0.030483373598098717, "grad_norm": 7.463561534881592, "learning_rate": 1.98011366017619e-05, "loss": 2.9217, "step": 4850 }, { "epoch": 0.03054622591479583, "grad_norm": 9.11038875579834, "learning_rate": 1.9800717500817247e-05, "loss": 3.4038, "step": 4860 }, { "epoch": 0.030609078231492937, "grad_norm": 7.781106472015381, "learning_rate": 1.9800298399872594e-05, "loss": 3.0778, "step": 4870 }, { "epoch": 0.03067193054819005, "grad_norm": 7.971752643585205, "learning_rate": 1.979987929892794e-05, "loss": 3.1256, "step": 4880 }, { "epoch": 0.03073478286488716, "grad_norm": 9.645397186279297, "learning_rate": 1.9799460197983285e-05, "loss": 2.9327, "step": 4890 }, { "epoch": 0.030797635181584272, "grad_norm": 8.911731719970703, "learning_rate": 1.9799041097038632e-05, "loss": 3.0361, "step": 4900 }, { "epoch": 0.03086048749828138, "grad_norm": 8.497078895568848, "learning_rate": 1.979862199609398e-05, "loss": 3.0149, "step": 4910 }, { "epoch": 0.030923339814978492, "grad_norm": 7.9162068367004395, "learning_rate": 1.9798202895149326e-05, "loss": 2.9479, "step": 4920 }, { "epoch": 0.030986192131675604, "grad_norm": 7.7327094078063965, "learning_rate": 1.9797783794204674e-05, "loss": 3.0199, "step": 4930 }, { "epoch": 0.031049044448372715, "grad_norm": 8.18338394165039, "learning_rate": 1.979736469326002e-05, "loss": 2.9839, "step": 4940 }, { "epoch": 0.031111896765069823, "grad_norm": 7.837664604187012, "learning_rate": 1.9796945592315368e-05, "loss": 3.004, "step": 4950 }, { "epoch": 0.031174749081766935, "grad_norm": 7.595458984375, "learning_rate": 1.9796526491370715e-05, "loss": 3.0458, "step": 4960 }, { "epoch": 0.031237601398464047, "grad_norm": 7.489948749542236, "learning_rate": 1.9796107390426062e-05, "loss": 2.8061, "step": 4970 }, { "epoch": 0.03130045371516116, "grad_norm": 7.755764484405518, "learning_rate": 1.9795688289481405e-05, "loss": 2.921, "step": 4980 }, { "epoch": 0.03136330603185827, "grad_norm": 9.916472434997559, "learning_rate": 1.9795269188536753e-05, "loss": 3.1144, "step": 4990 }, { "epoch": 0.03142615834855538, "grad_norm": 7.7621259689331055, "learning_rate": 1.97948500875921e-05, "loss": 2.9962, "step": 5000 }, { "epoch": 0.03148901066525249, "grad_norm": 8.081377983093262, "learning_rate": 1.9794430986647447e-05, "loss": 2.9858, "step": 5010 }, { "epoch": 0.0315518629819496, "grad_norm": 7.946218013763428, "learning_rate": 1.9794011885702794e-05, "loss": 2.994, "step": 5020 }, { "epoch": 0.03161471529864671, "grad_norm": 7.75227689743042, "learning_rate": 1.9793592784758137e-05, "loss": 2.8066, "step": 5030 }, { "epoch": 0.03167756761534382, "grad_norm": 8.829666137695312, "learning_rate": 1.9793173683813485e-05, "loss": 2.92, "step": 5040 }, { "epoch": 0.03174041993204093, "grad_norm": 7.233504295349121, "learning_rate": 1.979275458286883e-05, "loss": 3.2379, "step": 5050 }, { "epoch": 0.031803272248738045, "grad_norm": 8.083481788635254, "learning_rate": 1.979233548192418e-05, "loss": 3.1046, "step": 5060 }, { "epoch": 0.03186612456543515, "grad_norm": 8.806325912475586, "learning_rate": 1.9791916380979522e-05, "loss": 3.0145, "step": 5070 }, { "epoch": 0.03192897688213227, "grad_norm": 7.285245418548584, "learning_rate": 1.979149728003487e-05, "loss": 2.8619, "step": 5080 }, { "epoch": 0.031991829198829376, "grad_norm": 8.357592582702637, "learning_rate": 1.9791078179090216e-05, "loss": 3.1808, "step": 5090 }, { "epoch": 0.032054681515526484, "grad_norm": 7.444748401641846, "learning_rate": 1.9790659078145564e-05, "loss": 2.8626, "step": 5100 }, { "epoch": 0.0321175338322236, "grad_norm": 6.956993579864502, "learning_rate": 1.979023997720091e-05, "loss": 2.7885, "step": 5110 }, { "epoch": 0.03218038614892071, "grad_norm": 8.073221206665039, "learning_rate": 1.9789820876256258e-05, "loss": 3.0592, "step": 5120 }, { "epoch": 0.032243238465617816, "grad_norm": 7.597723484039307, "learning_rate": 1.97894017753116e-05, "loss": 3.0989, "step": 5130 }, { "epoch": 0.03230609078231493, "grad_norm": 7.634385585784912, "learning_rate": 1.978898267436695e-05, "loss": 3.2343, "step": 5140 }, { "epoch": 0.03236894309901204, "grad_norm": 7.463425636291504, "learning_rate": 1.9788563573422296e-05, "loss": 3.1204, "step": 5150 }, { "epoch": 0.032431795415709154, "grad_norm": 9.218866348266602, "learning_rate": 1.9788144472477643e-05, "loss": 3.0007, "step": 5160 }, { "epoch": 0.03249464773240626, "grad_norm": 6.456634044647217, "learning_rate": 1.978772537153299e-05, "loss": 3.0349, "step": 5170 }, { "epoch": 0.03255750004910337, "grad_norm": 7.182300567626953, "learning_rate": 1.9787306270588337e-05, "loss": 3.2196, "step": 5180 }, { "epoch": 0.032620352365800485, "grad_norm": 7.616100311279297, "learning_rate": 1.9786887169643684e-05, "loss": 3.185, "step": 5190 }, { "epoch": 0.032683204682497594, "grad_norm": 8.156831741333008, "learning_rate": 1.9786468068699027e-05, "loss": 2.9434, "step": 5200 }, { "epoch": 0.0327460569991947, "grad_norm": 7.594676971435547, "learning_rate": 1.9786048967754375e-05, "loss": 2.8468, "step": 5210 }, { "epoch": 0.03280890931589182, "grad_norm": 14.522555351257324, "learning_rate": 1.978562986680972e-05, "loss": 2.9002, "step": 5220 }, { "epoch": 0.032871761632588925, "grad_norm": 10.881830215454102, "learning_rate": 1.978521076586507e-05, "loss": 3.1985, "step": 5230 }, { "epoch": 0.03293461394928604, "grad_norm": 8.674522399902344, "learning_rate": 1.9784791664920416e-05, "loss": 3.1654, "step": 5240 }, { "epoch": 0.03299746626598315, "grad_norm": 8.216035842895508, "learning_rate": 1.978437256397576e-05, "loss": 2.9681, "step": 5250 }, { "epoch": 0.033060318582680256, "grad_norm": 6.300205707550049, "learning_rate": 1.9783953463031107e-05, "loss": 2.9274, "step": 5260 }, { "epoch": 0.03312317089937737, "grad_norm": 13.085122108459473, "learning_rate": 1.9783534362086454e-05, "loss": 3.1333, "step": 5270 }, { "epoch": 0.03318602321607448, "grad_norm": 8.12608528137207, "learning_rate": 1.97831152611418e-05, "loss": 3.1074, "step": 5280 }, { "epoch": 0.03324887553277159, "grad_norm": 7.285276412963867, "learning_rate": 1.9782696160197144e-05, "loss": 2.9279, "step": 5290 }, { "epoch": 0.0333117278494687, "grad_norm": 7.87554931640625, "learning_rate": 1.978227705925249e-05, "loss": 2.7172, "step": 5300 }, { "epoch": 0.03337458016616581, "grad_norm": 6.538553714752197, "learning_rate": 1.978185795830784e-05, "loss": 2.8172, "step": 5310 }, { "epoch": 0.033437432482862926, "grad_norm": 6.931741237640381, "learning_rate": 1.9781438857363186e-05, "loss": 2.8979, "step": 5320 }, { "epoch": 0.033500284799560034, "grad_norm": 8.080859184265137, "learning_rate": 1.9781019756418533e-05, "loss": 3.2293, "step": 5330 }, { "epoch": 0.03356313711625714, "grad_norm": 9.293947219848633, "learning_rate": 1.978060065547388e-05, "loss": 2.9521, "step": 5340 }, { "epoch": 0.03362598943295426, "grad_norm": 8.021860122680664, "learning_rate": 1.9780181554529227e-05, "loss": 2.9557, "step": 5350 }, { "epoch": 0.033688841749651366, "grad_norm": 6.971328258514404, "learning_rate": 1.9779762453584574e-05, "loss": 2.8615, "step": 5360 }, { "epoch": 0.033751694066348474, "grad_norm": 7.943089962005615, "learning_rate": 1.977934335263992e-05, "loss": 2.9748, "step": 5370 }, { "epoch": 0.03381454638304559, "grad_norm": 8.65058708190918, "learning_rate": 1.9778924251695265e-05, "loss": 3.0009, "step": 5380 }, { "epoch": 0.0338773986997427, "grad_norm": 8.905320167541504, "learning_rate": 1.977850515075061e-05, "loss": 3.1791, "step": 5390 }, { "epoch": 0.03394025101643981, "grad_norm": 7.219740390777588, "learning_rate": 1.977808604980596e-05, "loss": 2.8528, "step": 5400 }, { "epoch": 0.03400310333313692, "grad_norm": 7.684092044830322, "learning_rate": 1.9777666948861306e-05, "loss": 2.7724, "step": 5410 }, { "epoch": 0.03406595564983403, "grad_norm": 7.816575527191162, "learning_rate": 1.9777247847916653e-05, "loss": 2.9181, "step": 5420 }, { "epoch": 0.034128807966531144, "grad_norm": 7.647109508514404, "learning_rate": 1.9776828746971997e-05, "loss": 3.147, "step": 5430 }, { "epoch": 0.03419166028322825, "grad_norm": 7.7166643142700195, "learning_rate": 1.9776409646027344e-05, "loss": 2.8961, "step": 5440 }, { "epoch": 0.03425451259992536, "grad_norm": 7.806896686553955, "learning_rate": 1.977599054508269e-05, "loss": 2.9634, "step": 5450 }, { "epoch": 0.034317364916622475, "grad_norm": 7.769691467285156, "learning_rate": 1.9775571444138038e-05, "loss": 3.0611, "step": 5460 }, { "epoch": 0.034380217233319584, "grad_norm": 7.387162685394287, "learning_rate": 1.977515234319338e-05, "loss": 3.0066, "step": 5470 }, { "epoch": 0.03444306955001669, "grad_norm": 8.624832153320312, "learning_rate": 1.977473324224873e-05, "loss": 2.8516, "step": 5480 }, { "epoch": 0.03450592186671381, "grad_norm": 7.3755998611450195, "learning_rate": 1.9774314141304076e-05, "loss": 2.9962, "step": 5490 }, { "epoch": 0.034568774183410915, "grad_norm": 7.315758228302002, "learning_rate": 1.9773895040359423e-05, "loss": 3.3318, "step": 5500 }, { "epoch": 0.03463162650010803, "grad_norm": 8.75410270690918, "learning_rate": 1.9773475939414766e-05, "loss": 3.013, "step": 5510 }, { "epoch": 0.03469447881680514, "grad_norm": 8.508171081542969, "learning_rate": 1.9773056838470113e-05, "loss": 2.7933, "step": 5520 }, { "epoch": 0.034757331133502246, "grad_norm": 16.92315101623535, "learning_rate": 1.977263773752546e-05, "loss": 2.9606, "step": 5530 }, { "epoch": 0.03482018345019936, "grad_norm": 7.559053421020508, "learning_rate": 1.9772218636580808e-05, "loss": 2.928, "step": 5540 }, { "epoch": 0.03488303576689647, "grad_norm": 8.169791221618652, "learning_rate": 1.9771799535636155e-05, "loss": 3.0318, "step": 5550 }, { "epoch": 0.03494588808359358, "grad_norm": 8.387129783630371, "learning_rate": 1.9771380434691502e-05, "loss": 3.1214, "step": 5560 }, { "epoch": 0.03500874040029069, "grad_norm": 7.538905143737793, "learning_rate": 1.977096133374685e-05, "loss": 3.1572, "step": 5570 }, { "epoch": 0.0350715927169878, "grad_norm": 8.610246658325195, "learning_rate": 1.9770542232802196e-05, "loss": 2.86, "step": 5580 }, { "epoch": 0.035134445033684916, "grad_norm": 10.131224632263184, "learning_rate": 1.9770123131857543e-05, "loss": 3.0039, "step": 5590 }, { "epoch": 0.035197297350382024, "grad_norm": 14.25268840789795, "learning_rate": 1.9769704030912887e-05, "loss": 2.9156, "step": 5600 }, { "epoch": 0.03526014966707913, "grad_norm": 8.070310592651367, "learning_rate": 1.9769284929968234e-05, "loss": 2.9185, "step": 5610 }, { "epoch": 0.03532300198377625, "grad_norm": 8.09715747833252, "learning_rate": 1.976886582902358e-05, "loss": 2.9834, "step": 5620 }, { "epoch": 0.035385854300473356, "grad_norm": 9.936697006225586, "learning_rate": 1.9768446728078928e-05, "loss": 3.1071, "step": 5630 }, { "epoch": 0.035448706617170464, "grad_norm": 7.963923931121826, "learning_rate": 1.9768027627134275e-05, "loss": 3.1901, "step": 5640 }, { "epoch": 0.03551155893386758, "grad_norm": 10.36141300201416, "learning_rate": 1.976760852618962e-05, "loss": 2.9536, "step": 5650 }, { "epoch": 0.03557441125056469, "grad_norm": 8.136571884155273, "learning_rate": 1.9767189425244966e-05, "loss": 3.0211, "step": 5660 }, { "epoch": 0.0356372635672618, "grad_norm": 7.600170612335205, "learning_rate": 1.9766770324300313e-05, "loss": 2.8099, "step": 5670 }, { "epoch": 0.03570011588395891, "grad_norm": 7.338781356811523, "learning_rate": 1.976635122335566e-05, "loss": 2.9765, "step": 5680 }, { "epoch": 0.03576296820065602, "grad_norm": 6.763370513916016, "learning_rate": 1.9765932122411003e-05, "loss": 2.9867, "step": 5690 }, { "epoch": 0.035825820517353134, "grad_norm": 7.72622537612915, "learning_rate": 1.976551302146635e-05, "loss": 2.9673, "step": 5700 }, { "epoch": 0.03588867283405024, "grad_norm": 7.801615238189697, "learning_rate": 1.9765093920521698e-05, "loss": 2.9499, "step": 5710 }, { "epoch": 0.03595152515074735, "grad_norm": 9.447830200195312, "learning_rate": 1.9764674819577045e-05, "loss": 3.1475, "step": 5720 }, { "epoch": 0.036014377467444465, "grad_norm": 9.913958549499512, "learning_rate": 1.9764255718632392e-05, "loss": 2.7094, "step": 5730 }, { "epoch": 0.03607722978414157, "grad_norm": 7.018682479858398, "learning_rate": 1.976383661768774e-05, "loss": 2.7891, "step": 5740 }, { "epoch": 0.03614008210083869, "grad_norm": 8.090155601501465, "learning_rate": 1.9763417516743082e-05, "loss": 2.8387, "step": 5750 }, { "epoch": 0.0362029344175358, "grad_norm": 11.208479881286621, "learning_rate": 1.976299841579843e-05, "loss": 2.8956, "step": 5760 }, { "epoch": 0.036265786734232905, "grad_norm": 8.381065368652344, "learning_rate": 1.9762579314853777e-05, "loss": 2.8762, "step": 5770 }, { "epoch": 0.03632863905093002, "grad_norm": 8.11691951751709, "learning_rate": 1.9762160213909124e-05, "loss": 2.7947, "step": 5780 }, { "epoch": 0.03639149136762713, "grad_norm": 9.803716659545898, "learning_rate": 1.976174111296447e-05, "loss": 2.8678, "step": 5790 }, { "epoch": 0.036454343684324236, "grad_norm": 7.850063323974609, "learning_rate": 1.9761322012019818e-05, "loss": 2.9399, "step": 5800 }, { "epoch": 0.03651719600102135, "grad_norm": 7.630870342254639, "learning_rate": 1.9760902911075165e-05, "loss": 3.0078, "step": 5810 }, { "epoch": 0.03658004831771846, "grad_norm": 7.013731956481934, "learning_rate": 1.9760483810130512e-05, "loss": 2.9047, "step": 5820 }, { "epoch": 0.036642900634415575, "grad_norm": 12.404043197631836, "learning_rate": 1.9760064709185856e-05, "loss": 2.8254, "step": 5830 }, { "epoch": 0.03670575295111268, "grad_norm": 6.9944305419921875, "learning_rate": 1.9759645608241203e-05, "loss": 2.8782, "step": 5840 }, { "epoch": 0.03676860526780979, "grad_norm": 10.313700675964355, "learning_rate": 1.975922650729655e-05, "loss": 2.9288, "step": 5850 }, { "epoch": 0.036831457584506906, "grad_norm": 8.944605827331543, "learning_rate": 1.9758807406351897e-05, "loss": 2.9773, "step": 5860 }, { "epoch": 0.036894309901204014, "grad_norm": 7.414758205413818, "learning_rate": 1.975838830540724e-05, "loss": 2.7341, "step": 5870 }, { "epoch": 0.03695716221790112, "grad_norm": 9.151510238647461, "learning_rate": 1.9757969204462588e-05, "loss": 2.9997, "step": 5880 }, { "epoch": 0.03702001453459824, "grad_norm": 6.836916923522949, "learning_rate": 1.9757550103517935e-05, "loss": 2.9573, "step": 5890 }, { "epoch": 0.037082866851295346, "grad_norm": 6.474551677703857, "learning_rate": 1.9757131002573282e-05, "loss": 2.939, "step": 5900 }, { "epoch": 0.03714571916799246, "grad_norm": 8.964326858520508, "learning_rate": 1.9756711901628625e-05, "loss": 3.0834, "step": 5910 }, { "epoch": 0.03720857148468957, "grad_norm": 9.188521385192871, "learning_rate": 1.9756292800683973e-05, "loss": 2.7889, "step": 5920 }, { "epoch": 0.03727142380138668, "grad_norm": 8.61336612701416, "learning_rate": 1.975587369973932e-05, "loss": 3.0299, "step": 5930 }, { "epoch": 0.03733427611808379, "grad_norm": 11.761985778808594, "learning_rate": 1.9755454598794667e-05, "loss": 3.0475, "step": 5940 }, { "epoch": 0.0373971284347809, "grad_norm": 8.325447082519531, "learning_rate": 1.9755035497850014e-05, "loss": 2.8828, "step": 5950 }, { "epoch": 0.03745998075147801, "grad_norm": 8.411840438842773, "learning_rate": 1.975461639690536e-05, "loss": 3.0071, "step": 5960 }, { "epoch": 0.037522833068175124, "grad_norm": 7.479046821594238, "learning_rate": 1.9754197295960708e-05, "loss": 2.8171, "step": 5970 }, { "epoch": 0.03758568538487223, "grad_norm": 6.711784839630127, "learning_rate": 1.9753778195016055e-05, "loss": 2.7996, "step": 5980 }, { "epoch": 0.03764853770156935, "grad_norm": 8.660781860351562, "learning_rate": 1.9753359094071402e-05, "loss": 3.0605, "step": 5990 }, { "epoch": 0.037711390018266455, "grad_norm": 8.534005165100098, "learning_rate": 1.9752939993126746e-05, "loss": 2.683, "step": 6000 }, { "epoch": 0.03777424233496356, "grad_norm": 7.058311939239502, "learning_rate": 1.9752520892182093e-05, "loss": 3.0076, "step": 6010 }, { "epoch": 0.03783709465166068, "grad_norm": 6.711240291595459, "learning_rate": 1.975210179123744e-05, "loss": 3.0649, "step": 6020 }, { "epoch": 0.03789994696835779, "grad_norm": 7.963628768920898, "learning_rate": 1.9751682690292787e-05, "loss": 2.7924, "step": 6030 }, { "epoch": 0.037962799285054895, "grad_norm": 15.460878372192383, "learning_rate": 1.9751263589348134e-05, "loss": 2.5997, "step": 6040 }, { "epoch": 0.03802565160175201, "grad_norm": 8.393654823303223, "learning_rate": 1.9750844488403478e-05, "loss": 2.8854, "step": 6050 }, { "epoch": 0.03808850391844912, "grad_norm": 7.014850616455078, "learning_rate": 1.9750425387458825e-05, "loss": 2.8847, "step": 6060 }, { "epoch": 0.03815135623514623, "grad_norm": 7.304200172424316, "learning_rate": 1.9750006286514172e-05, "loss": 2.8667, "step": 6070 }, { "epoch": 0.03821420855184334, "grad_norm": 7.77520751953125, "learning_rate": 1.974958718556952e-05, "loss": 2.8748, "step": 6080 }, { "epoch": 0.03827706086854045, "grad_norm": 8.249287605285645, "learning_rate": 1.9749168084624863e-05, "loss": 2.9645, "step": 6090 }, { "epoch": 0.038339913185237565, "grad_norm": 8.533134460449219, "learning_rate": 1.974874898368021e-05, "loss": 3.1404, "step": 6100 }, { "epoch": 0.03840276550193467, "grad_norm": 7.188451766967773, "learning_rate": 1.9748329882735557e-05, "loss": 2.7063, "step": 6110 }, { "epoch": 0.03846561781863178, "grad_norm": 10.097341537475586, "learning_rate": 1.9747910781790904e-05, "loss": 2.6763, "step": 6120 }, { "epoch": 0.038528470135328896, "grad_norm": 9.750001907348633, "learning_rate": 1.974749168084625e-05, "loss": 2.8141, "step": 6130 }, { "epoch": 0.038591322452026004, "grad_norm": 8.507477760314941, "learning_rate": 1.9747072579901595e-05, "loss": 3.0148, "step": 6140 }, { "epoch": 0.03865417476872312, "grad_norm": 6.048313617706299, "learning_rate": 1.974665347895694e-05, "loss": 2.7566, "step": 6150 }, { "epoch": 0.03871702708542023, "grad_norm": 7.9719014167785645, "learning_rate": 1.974623437801229e-05, "loss": 2.7615, "step": 6160 }, { "epoch": 0.038779879402117336, "grad_norm": 16.9856014251709, "learning_rate": 1.9745815277067636e-05, "loss": 2.8321, "step": 6170 }, { "epoch": 0.03884273171881445, "grad_norm": 7.8329973220825195, "learning_rate": 1.9745396176122983e-05, "loss": 2.9907, "step": 6180 }, { "epoch": 0.03890558403551156, "grad_norm": 8.186614036560059, "learning_rate": 1.974497707517833e-05, "loss": 3.0901, "step": 6190 }, { "epoch": 0.03896843635220867, "grad_norm": 7.800524711608887, "learning_rate": 1.9744557974233677e-05, "loss": 3.0279, "step": 6200 }, { "epoch": 0.03903128866890578, "grad_norm": 8.399930000305176, "learning_rate": 1.9744138873289024e-05, "loss": 2.6055, "step": 6210 }, { "epoch": 0.03909414098560289, "grad_norm": 7.567289352416992, "learning_rate": 1.9743719772344368e-05, "loss": 2.8454, "step": 6220 }, { "epoch": 0.0391569933023, "grad_norm": 8.097586631774902, "learning_rate": 1.9743300671399715e-05, "loss": 2.8724, "step": 6230 }, { "epoch": 0.039219845618997114, "grad_norm": 7.968715190887451, "learning_rate": 1.9742881570455062e-05, "loss": 2.8206, "step": 6240 }, { "epoch": 0.03928269793569422, "grad_norm": 8.068397521972656, "learning_rate": 1.974246246951041e-05, "loss": 2.7349, "step": 6250 }, { "epoch": 0.03934555025239134, "grad_norm": 7.101130962371826, "learning_rate": 1.9742043368565756e-05, "loss": 2.6308, "step": 6260 }, { "epoch": 0.039408402569088445, "grad_norm": 7.49091100692749, "learning_rate": 1.97416242676211e-05, "loss": 2.7669, "step": 6270 }, { "epoch": 0.03947125488578555, "grad_norm": 7.830532550811768, "learning_rate": 1.9741205166676447e-05, "loss": 2.7484, "step": 6280 }, { "epoch": 0.03953410720248267, "grad_norm": 7.557278633117676, "learning_rate": 1.9740786065731794e-05, "loss": 2.8416, "step": 6290 }, { "epoch": 0.03959695951917978, "grad_norm": 7.9041972160339355, "learning_rate": 1.974036696478714e-05, "loss": 2.781, "step": 6300 }, { "epoch": 0.039659811835876885, "grad_norm": 8.118491172790527, "learning_rate": 1.9739947863842485e-05, "loss": 3.0058, "step": 6310 }, { "epoch": 0.039722664152574, "grad_norm": 8.89059066772461, "learning_rate": 1.973952876289783e-05, "loss": 2.9524, "step": 6320 }, { "epoch": 0.03978551646927111, "grad_norm": 7.545521259307861, "learning_rate": 1.973910966195318e-05, "loss": 2.9124, "step": 6330 }, { "epoch": 0.03984836878596822, "grad_norm": 8.568503379821777, "learning_rate": 1.9738690561008526e-05, "loss": 3.0594, "step": 6340 }, { "epoch": 0.03991122110266533, "grad_norm": 8.055715560913086, "learning_rate": 1.9738271460063873e-05, "loss": 3.1005, "step": 6350 }, { "epoch": 0.03997407341936244, "grad_norm": 8.731293678283691, "learning_rate": 1.973785235911922e-05, "loss": 2.8984, "step": 6360 }, { "epoch": 0.040036925736059555, "grad_norm": 7.474778652191162, "learning_rate": 1.9737433258174567e-05, "loss": 2.8651, "step": 6370 }, { "epoch": 0.04009977805275666, "grad_norm": 7.777046203613281, "learning_rate": 1.973701415722991e-05, "loss": 2.7705, "step": 6380 }, { "epoch": 0.04016263036945377, "grad_norm": 6.773117542266846, "learning_rate": 1.9736595056285258e-05, "loss": 2.8959, "step": 6390 }, { "epoch": 0.040225482686150886, "grad_norm": 8.405336380004883, "learning_rate": 1.9736175955340605e-05, "loss": 2.6969, "step": 6400 }, { "epoch": 0.040288335002847994, "grad_norm": 9.41283893585205, "learning_rate": 1.9735756854395952e-05, "loss": 2.9477, "step": 6410 }, { "epoch": 0.04035118731954511, "grad_norm": 8.614728927612305, "learning_rate": 1.97353377534513e-05, "loss": 2.8038, "step": 6420 }, { "epoch": 0.04041403963624222, "grad_norm": 7.485757827758789, "learning_rate": 1.9734918652506646e-05, "loss": 2.7371, "step": 6430 }, { "epoch": 0.040476891952939326, "grad_norm": 8.51486587524414, "learning_rate": 1.9734499551561993e-05, "loss": 2.721, "step": 6440 }, { "epoch": 0.04053974426963644, "grad_norm": 8.288865089416504, "learning_rate": 1.9734080450617337e-05, "loss": 2.745, "step": 6450 }, { "epoch": 0.04060259658633355, "grad_norm": 7.884392261505127, "learning_rate": 1.9733661349672684e-05, "loss": 2.7664, "step": 6460 }, { "epoch": 0.04066544890303066, "grad_norm": 7.100053787231445, "learning_rate": 1.973324224872803e-05, "loss": 2.7882, "step": 6470 }, { "epoch": 0.04072830121972777, "grad_norm": 7.000625133514404, "learning_rate": 1.9732823147783378e-05, "loss": 2.6478, "step": 6480 }, { "epoch": 0.04079115353642488, "grad_norm": 8.798921585083008, "learning_rate": 1.973240404683872e-05, "loss": 2.8204, "step": 6490 }, { "epoch": 0.040854005853121995, "grad_norm": 7.540419101715088, "learning_rate": 1.973198494589407e-05, "loss": 2.6759, "step": 6500 }, { "epoch": 0.040916858169819104, "grad_norm": 7.443111419677734, "learning_rate": 1.9731565844949416e-05, "loss": 2.7538, "step": 6510 }, { "epoch": 0.04097971048651621, "grad_norm": 8.110397338867188, "learning_rate": 1.9731146744004763e-05, "loss": 2.8167, "step": 6520 }, { "epoch": 0.04104256280321333, "grad_norm": 7.762920379638672, "learning_rate": 1.9730727643060107e-05, "loss": 2.7781, "step": 6530 }, { "epoch": 0.041105415119910435, "grad_norm": 7.832665920257568, "learning_rate": 1.9730308542115454e-05, "loss": 2.8305, "step": 6540 }, { "epoch": 0.04116826743660754, "grad_norm": 7.673941612243652, "learning_rate": 1.97298894411708e-05, "loss": 2.8912, "step": 6550 }, { "epoch": 0.04123111975330466, "grad_norm": 9.414925575256348, "learning_rate": 1.9729470340226148e-05, "loss": 2.7999, "step": 6560 }, { "epoch": 0.041293972070001767, "grad_norm": 10.081149101257324, "learning_rate": 1.9729051239281495e-05, "loss": 2.8094, "step": 6570 }, { "epoch": 0.04135682438669888, "grad_norm": 7.751121997833252, "learning_rate": 1.9728632138336842e-05, "loss": 2.7884, "step": 6580 }, { "epoch": 0.04141967670339599, "grad_norm": 11.314135551452637, "learning_rate": 1.972821303739219e-05, "loss": 2.7703, "step": 6590 }, { "epoch": 0.0414825290200931, "grad_norm": 8.257302284240723, "learning_rate": 1.9727793936447536e-05, "loss": 3.097, "step": 6600 }, { "epoch": 0.04154538133679021, "grad_norm": 7.9447526931762695, "learning_rate": 1.9727374835502883e-05, "loss": 2.7905, "step": 6610 }, { "epoch": 0.04160823365348732, "grad_norm": 6.873098850250244, "learning_rate": 1.9726955734558227e-05, "loss": 2.7661, "step": 6620 }, { "epoch": 0.04167108597018443, "grad_norm": 8.159378051757812, "learning_rate": 1.9726536633613574e-05, "loss": 3.0937, "step": 6630 }, { "epoch": 0.041733938286881544, "grad_norm": 8.810033798217773, "learning_rate": 1.972611753266892e-05, "loss": 2.7267, "step": 6640 }, { "epoch": 0.04179679060357865, "grad_norm": 7.456930160522461, "learning_rate": 1.9725698431724268e-05, "loss": 2.8697, "step": 6650 }, { "epoch": 0.04185964292027577, "grad_norm": 13.624564170837402, "learning_rate": 1.9725279330779615e-05, "loss": 2.6359, "step": 6660 }, { "epoch": 0.041922495236972876, "grad_norm": 7.865520477294922, "learning_rate": 1.972486022983496e-05, "loss": 2.6366, "step": 6670 }, { "epoch": 0.041985347553669984, "grad_norm": 9.472454071044922, "learning_rate": 1.9724441128890306e-05, "loss": 2.9113, "step": 6680 }, { "epoch": 0.0420481998703671, "grad_norm": 8.41500473022461, "learning_rate": 1.9724022027945653e-05, "loss": 3.1977, "step": 6690 }, { "epoch": 0.04211105218706421, "grad_norm": 7.459606170654297, "learning_rate": 1.9723602927001e-05, "loss": 2.5954, "step": 6700 }, { "epoch": 0.042173904503761316, "grad_norm": 8.650262832641602, "learning_rate": 1.9723183826056344e-05, "loss": 2.937, "step": 6710 }, { "epoch": 0.04223675682045843, "grad_norm": 8.585243225097656, "learning_rate": 1.972276472511169e-05, "loss": 2.7615, "step": 6720 }, { "epoch": 0.04229960913715554, "grad_norm": 13.335293769836426, "learning_rate": 1.9722345624167038e-05, "loss": 3.1172, "step": 6730 }, { "epoch": 0.042362461453852654, "grad_norm": 6.974452018737793, "learning_rate": 1.9721926523222385e-05, "loss": 3.0229, "step": 6740 }, { "epoch": 0.04242531377054976, "grad_norm": 8.071279525756836, "learning_rate": 1.9721507422277732e-05, "loss": 2.7496, "step": 6750 }, { "epoch": 0.04248816608724687, "grad_norm": 9.10824966430664, "learning_rate": 1.9721088321333076e-05, "loss": 2.8403, "step": 6760 }, { "epoch": 0.042551018403943985, "grad_norm": 8.561415672302246, "learning_rate": 1.9720669220388423e-05, "loss": 2.7773, "step": 6770 }, { "epoch": 0.042613870720641094, "grad_norm": 7.2878499031066895, "learning_rate": 1.972025011944377e-05, "loss": 2.5423, "step": 6780 }, { "epoch": 0.0426767230373382, "grad_norm": 8.255075454711914, "learning_rate": 1.9719831018499117e-05, "loss": 3.0136, "step": 6790 }, { "epoch": 0.04273957535403532, "grad_norm": 7.35914421081543, "learning_rate": 1.9719411917554464e-05, "loss": 2.9261, "step": 6800 }, { "epoch": 0.042802427670732425, "grad_norm": 7.392853736877441, "learning_rate": 1.971899281660981e-05, "loss": 2.7858, "step": 6810 }, { "epoch": 0.04286527998742954, "grad_norm": 7.571976661682129, "learning_rate": 1.9718573715665158e-05, "loss": 3.0002, "step": 6820 }, { "epoch": 0.04292813230412665, "grad_norm": 10.860123634338379, "learning_rate": 1.9718154614720505e-05, "loss": 3.0203, "step": 6830 }, { "epoch": 0.042990984620823756, "grad_norm": 8.156268119812012, "learning_rate": 1.971773551377585e-05, "loss": 2.9101, "step": 6840 }, { "epoch": 0.04305383693752087, "grad_norm": 8.31237506866455, "learning_rate": 1.9717316412831196e-05, "loss": 2.754, "step": 6850 }, { "epoch": 0.04311668925421798, "grad_norm": 8.099552154541016, "learning_rate": 1.9716897311886543e-05, "loss": 2.919, "step": 6860 }, { "epoch": 0.04317954157091509, "grad_norm": 7.635723114013672, "learning_rate": 1.971647821094189e-05, "loss": 2.7609, "step": 6870 }, { "epoch": 0.0432423938876122, "grad_norm": 7.842891216278076, "learning_rate": 1.9716059109997237e-05, "loss": 2.8499, "step": 6880 }, { "epoch": 0.04330524620430931, "grad_norm": 7.430498123168945, "learning_rate": 1.971564000905258e-05, "loss": 2.773, "step": 6890 }, { "epoch": 0.04336809852100642, "grad_norm": 7.651177883148193, "learning_rate": 1.9715220908107928e-05, "loss": 2.7329, "step": 6900 }, { "epoch": 0.043430950837703534, "grad_norm": 7.382655620574951, "learning_rate": 1.9714801807163275e-05, "loss": 2.8792, "step": 6910 }, { "epoch": 0.04349380315440064, "grad_norm": 8.936134338378906, "learning_rate": 1.9714382706218622e-05, "loss": 2.8317, "step": 6920 }, { "epoch": 0.04355665547109776, "grad_norm": 7.522771835327148, "learning_rate": 1.9713963605273966e-05, "loss": 2.6853, "step": 6930 }, { "epoch": 0.043619507787794866, "grad_norm": 8.205595016479492, "learning_rate": 1.9713544504329313e-05, "loss": 2.8547, "step": 6940 }, { "epoch": 0.043682360104491974, "grad_norm": 8.565117835998535, "learning_rate": 1.971312540338466e-05, "loss": 2.6827, "step": 6950 }, { "epoch": 0.04374521242118909, "grad_norm": 7.382494926452637, "learning_rate": 1.9712706302440007e-05, "loss": 2.7841, "step": 6960 }, { "epoch": 0.0438080647378862, "grad_norm": 7.452759265899658, "learning_rate": 1.9712287201495354e-05, "loss": 2.8491, "step": 6970 }, { "epoch": 0.043870917054583305, "grad_norm": 6.809290885925293, "learning_rate": 1.97118681005507e-05, "loss": 2.7978, "step": 6980 }, { "epoch": 0.04393376937128042, "grad_norm": 8.27755069732666, "learning_rate": 1.9711448999606048e-05, "loss": 2.9641, "step": 6990 }, { "epoch": 0.04399662168797753, "grad_norm": 7.654524803161621, "learning_rate": 1.9711029898661395e-05, "loss": 2.7442, "step": 7000 }, { "epoch": 0.044059474004674644, "grad_norm": 7.790019989013672, "learning_rate": 1.971061079771674e-05, "loss": 2.8155, "step": 7010 }, { "epoch": 0.04412232632137175, "grad_norm": 7.8720502853393555, "learning_rate": 1.9710191696772086e-05, "loss": 2.6249, "step": 7020 }, { "epoch": 0.04418517863806886, "grad_norm": 8.328658103942871, "learning_rate": 1.9709772595827433e-05, "loss": 2.7292, "step": 7030 }, { "epoch": 0.044248030954765975, "grad_norm": 9.934185981750488, "learning_rate": 1.970935349488278e-05, "loss": 2.8282, "step": 7040 }, { "epoch": 0.044310883271463083, "grad_norm": 7.164620876312256, "learning_rate": 1.9708934393938127e-05, "loss": 2.702, "step": 7050 }, { "epoch": 0.04437373558816019, "grad_norm": 7.628801345825195, "learning_rate": 1.9708515292993474e-05, "loss": 2.8875, "step": 7060 }, { "epoch": 0.04443658790485731, "grad_norm": 7.553658485412598, "learning_rate": 1.9708096192048818e-05, "loss": 2.5208, "step": 7070 }, { "epoch": 0.044499440221554415, "grad_norm": 8.54419994354248, "learning_rate": 1.9707677091104165e-05, "loss": 2.8091, "step": 7080 }, { "epoch": 0.04456229253825153, "grad_norm": 7.7056660652160645, "learning_rate": 1.9707257990159512e-05, "loss": 2.925, "step": 7090 }, { "epoch": 0.04462514485494864, "grad_norm": 8.290834426879883, "learning_rate": 1.970683888921486e-05, "loss": 2.7995, "step": 7100 }, { "epoch": 0.044687997171645746, "grad_norm": 7.718125820159912, "learning_rate": 1.9706419788270203e-05, "loss": 2.6879, "step": 7110 }, { "epoch": 0.04475084948834286, "grad_norm": 7.2181830406188965, "learning_rate": 1.970600068732555e-05, "loss": 2.8546, "step": 7120 }, { "epoch": 0.04481370180503997, "grad_norm": 6.926623821258545, "learning_rate": 1.9705581586380897e-05, "loss": 2.8165, "step": 7130 }, { "epoch": 0.04487655412173708, "grad_norm": 8.519497871398926, "learning_rate": 1.9705162485436244e-05, "loss": 2.9327, "step": 7140 }, { "epoch": 0.04493940643843419, "grad_norm": 7.977126121520996, "learning_rate": 1.9704743384491588e-05, "loss": 2.9805, "step": 7150 }, { "epoch": 0.0450022587551313, "grad_norm": 8.655324935913086, "learning_rate": 1.9704324283546935e-05, "loss": 2.6832, "step": 7160 }, { "epoch": 0.045065111071828416, "grad_norm": 8.400687217712402, "learning_rate": 1.9703905182602282e-05, "loss": 2.7897, "step": 7170 }, { "epoch": 0.045127963388525524, "grad_norm": 7.366981029510498, "learning_rate": 1.970348608165763e-05, "loss": 3.0097, "step": 7180 }, { "epoch": 0.04519081570522263, "grad_norm": 7.5406599044799805, "learning_rate": 1.9703066980712976e-05, "loss": 2.7499, "step": 7190 }, { "epoch": 0.04525366802191975, "grad_norm": 7.338527202606201, "learning_rate": 1.9702647879768323e-05, "loss": 2.733, "step": 7200 }, { "epoch": 0.045316520338616856, "grad_norm": 8.345789909362793, "learning_rate": 1.970222877882367e-05, "loss": 2.6885, "step": 7210 }, { "epoch": 0.045379372655313964, "grad_norm": 7.962245464324951, "learning_rate": 1.9701809677879017e-05, "loss": 2.5881, "step": 7220 }, { "epoch": 0.04544222497201108, "grad_norm": 8.405911445617676, "learning_rate": 1.9701390576934364e-05, "loss": 2.7834, "step": 7230 }, { "epoch": 0.04550507728870819, "grad_norm": 8.099193572998047, "learning_rate": 1.9700971475989708e-05, "loss": 2.8454, "step": 7240 }, { "epoch": 0.0455679296054053, "grad_norm": 9.434566497802734, "learning_rate": 1.9700552375045055e-05, "loss": 3.0116, "step": 7250 }, { "epoch": 0.04563078192210241, "grad_norm": 8.67699909210205, "learning_rate": 1.9700133274100402e-05, "loss": 2.8667, "step": 7260 }, { "epoch": 0.04569363423879952, "grad_norm": 7.456284999847412, "learning_rate": 1.969971417315575e-05, "loss": 2.5807, "step": 7270 }, { "epoch": 0.045756486555496634, "grad_norm": 7.954057693481445, "learning_rate": 1.9699295072211096e-05, "loss": 2.9401, "step": 7280 }, { "epoch": 0.04581933887219374, "grad_norm": 7.707198143005371, "learning_rate": 1.969887597126644e-05, "loss": 2.916, "step": 7290 }, { "epoch": 0.04588219118889085, "grad_norm": 9.013371467590332, "learning_rate": 1.9698456870321787e-05, "loss": 2.8672, "step": 7300 }, { "epoch": 0.045945043505587965, "grad_norm": 6.506833076477051, "learning_rate": 1.9698037769377134e-05, "loss": 2.6158, "step": 7310 }, { "epoch": 0.04600789582228507, "grad_norm": 7.845186233520508, "learning_rate": 1.969761866843248e-05, "loss": 2.6699, "step": 7320 }, { "epoch": 0.04607074813898219, "grad_norm": 7.635332107543945, "learning_rate": 1.9697199567487825e-05, "loss": 2.9555, "step": 7330 }, { "epoch": 0.0461336004556793, "grad_norm": 10.10507869720459, "learning_rate": 1.9696780466543172e-05, "loss": 2.4269, "step": 7340 }, { "epoch": 0.046196452772376405, "grad_norm": 8.401609420776367, "learning_rate": 1.969636136559852e-05, "loss": 2.8419, "step": 7350 }, { "epoch": 0.04625930508907352, "grad_norm": 8.491729736328125, "learning_rate": 1.9695942264653866e-05, "loss": 2.5032, "step": 7360 }, { "epoch": 0.04632215740577063, "grad_norm": 7.182466983795166, "learning_rate": 1.9695523163709213e-05, "loss": 2.8185, "step": 7370 }, { "epoch": 0.046385009722467736, "grad_norm": 8.073323249816895, "learning_rate": 1.969510406276456e-05, "loss": 2.8739, "step": 7380 }, { "epoch": 0.04644786203916485, "grad_norm": 8.310892105102539, "learning_rate": 1.9694684961819904e-05, "loss": 2.8783, "step": 7390 }, { "epoch": 0.04651071435586196, "grad_norm": 8.089179039001465, "learning_rate": 1.969426586087525e-05, "loss": 2.7613, "step": 7400 }, { "epoch": 0.046573566672559075, "grad_norm": 9.599477767944336, "learning_rate": 1.9693846759930598e-05, "loss": 2.6879, "step": 7410 }, { "epoch": 0.04663641898925618, "grad_norm": 8.713695526123047, "learning_rate": 1.9693427658985945e-05, "loss": 2.6981, "step": 7420 }, { "epoch": 0.04669927130595329, "grad_norm": 7.749399662017822, "learning_rate": 1.9693008558041292e-05, "loss": 2.771, "step": 7430 }, { "epoch": 0.046762123622650406, "grad_norm": 9.416159629821777, "learning_rate": 1.969258945709664e-05, "loss": 2.8693, "step": 7440 }, { "epoch": 0.046824975939347514, "grad_norm": 6.918641567230225, "learning_rate": 1.9692170356151986e-05, "loss": 2.7029, "step": 7450 }, { "epoch": 0.04688782825604462, "grad_norm": 6.363226890563965, "learning_rate": 1.969175125520733e-05, "loss": 2.9845, "step": 7460 }, { "epoch": 0.04695068057274174, "grad_norm": 7.930229663848877, "learning_rate": 1.9691332154262677e-05, "loss": 2.5707, "step": 7470 }, { "epoch": 0.047013532889438846, "grad_norm": 9.078071594238281, "learning_rate": 1.9690913053318024e-05, "loss": 2.7976, "step": 7480 }, { "epoch": 0.04707638520613596, "grad_norm": 9.267046928405762, "learning_rate": 1.969049395237337e-05, "loss": 2.8006, "step": 7490 }, { "epoch": 0.04713923752283307, "grad_norm": 7.660119533538818, "learning_rate": 1.9690074851428718e-05, "loss": 2.9633, "step": 7500 }, { "epoch": 0.04720208983953018, "grad_norm": 7.17922830581665, "learning_rate": 1.9689655750484062e-05, "loss": 2.645, "step": 7510 }, { "epoch": 0.04726494215622729, "grad_norm": 9.018447875976562, "learning_rate": 1.968923664953941e-05, "loss": 2.8149, "step": 7520 }, { "epoch": 0.0473277944729244, "grad_norm": 8.614439010620117, "learning_rate": 1.9688817548594756e-05, "loss": 2.7819, "step": 7530 }, { "epoch": 0.04739064678962151, "grad_norm": 8.659832954406738, "learning_rate": 1.9688398447650103e-05, "loss": 2.8259, "step": 7540 }, { "epoch": 0.047453499106318624, "grad_norm": 7.812225818634033, "learning_rate": 1.9687979346705447e-05, "loss": 2.7513, "step": 7550 }, { "epoch": 0.04751635142301573, "grad_norm": 7.860219955444336, "learning_rate": 1.9687560245760794e-05, "loss": 2.7221, "step": 7560 }, { "epoch": 0.04757920373971284, "grad_norm": 7.393378257751465, "learning_rate": 1.968714114481614e-05, "loss": 2.9303, "step": 7570 }, { "epoch": 0.047642056056409955, "grad_norm": 11.162703514099121, "learning_rate": 1.9686722043871488e-05, "loss": 2.6346, "step": 7580 }, { "epoch": 0.04770490837310706, "grad_norm": 6.968651294708252, "learning_rate": 1.9686302942926835e-05, "loss": 2.6713, "step": 7590 }, { "epoch": 0.04776776068980418, "grad_norm": 7.753033638000488, "learning_rate": 1.9685883841982182e-05, "loss": 2.6281, "step": 7600 }, { "epoch": 0.04783061300650129, "grad_norm": 9.735809326171875, "learning_rate": 1.968546474103753e-05, "loss": 2.6198, "step": 7610 }, { "epoch": 0.047893465323198395, "grad_norm": 9.786466598510742, "learning_rate": 1.9685045640092876e-05, "loss": 2.7877, "step": 7620 }, { "epoch": 0.04795631763989551, "grad_norm": 7.7668843269348145, "learning_rate": 1.968462653914822e-05, "loss": 2.732, "step": 7630 }, { "epoch": 0.04801916995659262, "grad_norm": 7.759322166442871, "learning_rate": 1.9684207438203567e-05, "loss": 2.5917, "step": 7640 }, { "epoch": 0.048082022273289726, "grad_norm": 8.343338966369629, "learning_rate": 1.9683788337258914e-05, "loss": 2.7432, "step": 7650 }, { "epoch": 0.04814487458998684, "grad_norm": 8.134322166442871, "learning_rate": 1.968336923631426e-05, "loss": 2.638, "step": 7660 }, { "epoch": 0.04820772690668395, "grad_norm": 7.430497169494629, "learning_rate": 1.9682950135369608e-05, "loss": 2.5861, "step": 7670 }, { "epoch": 0.048270579223381065, "grad_norm": 7.22769021987915, "learning_rate": 1.9682531034424955e-05, "loss": 2.7202, "step": 7680 }, { "epoch": 0.04833343154007817, "grad_norm": 6.811624050140381, "learning_rate": 1.96821119334803e-05, "loss": 2.6732, "step": 7690 }, { "epoch": 0.04839628385677528, "grad_norm": 6.873176097869873, "learning_rate": 1.9681692832535646e-05, "loss": 2.7445, "step": 7700 }, { "epoch": 0.048459136173472396, "grad_norm": 7.949793338775635, "learning_rate": 1.9681273731590993e-05, "loss": 2.8871, "step": 7710 }, { "epoch": 0.048521988490169504, "grad_norm": 10.053329467773438, "learning_rate": 1.968085463064634e-05, "loss": 2.6914, "step": 7720 }, { "epoch": 0.04858484080686661, "grad_norm": 6.918039798736572, "learning_rate": 1.9680435529701684e-05, "loss": 2.6115, "step": 7730 }, { "epoch": 0.04864769312356373, "grad_norm": 7.523407936096191, "learning_rate": 1.968001642875703e-05, "loss": 2.588, "step": 7740 }, { "epoch": 0.048710545440260836, "grad_norm": 7.9065260887146, "learning_rate": 1.9679597327812378e-05, "loss": 2.8345, "step": 7750 }, { "epoch": 0.04877339775695795, "grad_norm": 7.3336896896362305, "learning_rate": 1.9679178226867725e-05, "loss": 2.7611, "step": 7760 }, { "epoch": 0.04883625007365506, "grad_norm": 8.261215209960938, "learning_rate": 1.967875912592307e-05, "loss": 2.8819, "step": 7770 }, { "epoch": 0.04889910239035217, "grad_norm": 9.53640365600586, "learning_rate": 1.9678340024978416e-05, "loss": 2.8616, "step": 7780 }, { "epoch": 0.04896195470704928, "grad_norm": 6.995221138000488, "learning_rate": 1.9677920924033763e-05, "loss": 2.5584, "step": 7790 }, { "epoch": 0.04902480702374639, "grad_norm": 8.18701171875, "learning_rate": 1.967750182308911e-05, "loss": 2.7876, "step": 7800 }, { "epoch": 0.0490876593404435, "grad_norm": 8.947796821594238, "learning_rate": 1.9677082722144457e-05, "loss": 2.6307, "step": 7810 }, { "epoch": 0.049150511657140614, "grad_norm": 9.869112968444824, "learning_rate": 1.9676663621199804e-05, "loss": 2.4254, "step": 7820 }, { "epoch": 0.04921336397383772, "grad_norm": 8.49588394165039, "learning_rate": 1.967624452025515e-05, "loss": 2.7566, "step": 7830 }, { "epoch": 0.04927621629053484, "grad_norm": 8.207067489624023, "learning_rate": 1.9675825419310498e-05, "loss": 2.9028, "step": 7840 }, { "epoch": 0.049339068607231945, "grad_norm": 7.481714248657227, "learning_rate": 1.9675406318365845e-05, "loss": 2.7137, "step": 7850 }, { "epoch": 0.04940192092392905, "grad_norm": 9.47549819946289, "learning_rate": 1.967498721742119e-05, "loss": 2.5556, "step": 7860 }, { "epoch": 0.04946477324062617, "grad_norm": 7.08083438873291, "learning_rate": 1.9674568116476536e-05, "loss": 2.774, "step": 7870 }, { "epoch": 0.04952762555732328, "grad_norm": 10.015816688537598, "learning_rate": 1.9674149015531883e-05, "loss": 2.7492, "step": 7880 }, { "epoch": 0.049590477874020385, "grad_norm": 8.47620964050293, "learning_rate": 1.967372991458723e-05, "loss": 2.5963, "step": 7890 }, { "epoch": 0.0496533301907175, "grad_norm": 8.212635040283203, "learning_rate": 1.9673310813642577e-05, "loss": 2.5873, "step": 7900 }, { "epoch": 0.04971618250741461, "grad_norm": 6.361338138580322, "learning_rate": 1.967289171269792e-05, "loss": 2.6024, "step": 7910 }, { "epoch": 0.04977903482411172, "grad_norm": 7.361743450164795, "learning_rate": 1.9672472611753268e-05, "loss": 2.4698, "step": 7920 }, { "epoch": 0.04984188714080883, "grad_norm": 9.043204307556152, "learning_rate": 1.9672053510808615e-05, "loss": 2.8117, "step": 7930 }, { "epoch": 0.04990473945750594, "grad_norm": 6.981082916259766, "learning_rate": 1.9671634409863962e-05, "loss": 2.5884, "step": 7940 }, { "epoch": 0.049967591774203055, "grad_norm": 7.328317642211914, "learning_rate": 1.9671215308919306e-05, "loss": 2.7115, "step": 7950 }, { "epoch": 0.05003044409090016, "grad_norm": 7.6271653175354, "learning_rate": 1.9670796207974653e-05, "loss": 2.7678, "step": 7960 }, { "epoch": 0.05009329640759727, "grad_norm": 7.664426803588867, "learning_rate": 1.967037710703e-05, "loss": 2.5816, "step": 7970 }, { "epoch": 0.050156148724294386, "grad_norm": 9.592137336730957, "learning_rate": 1.9669958006085347e-05, "loss": 2.7122, "step": 7980 }, { "epoch": 0.050219001040991494, "grad_norm": 9.07362174987793, "learning_rate": 1.9669538905140694e-05, "loss": 2.9154, "step": 7990 }, { "epoch": 0.05028185335768861, "grad_norm": 10.417045593261719, "learning_rate": 1.966911980419604e-05, "loss": 2.6955, "step": 8000 }, { "epoch": 0.05034470567438572, "grad_norm": 7.155338287353516, "learning_rate": 1.9668700703251385e-05, "loss": 2.7882, "step": 8010 }, { "epoch": 0.050407557991082826, "grad_norm": 7.589554309844971, "learning_rate": 1.9668281602306732e-05, "loss": 2.5784, "step": 8020 }, { "epoch": 0.05047041030777994, "grad_norm": 8.70826244354248, "learning_rate": 1.966786250136208e-05, "loss": 2.8466, "step": 8030 }, { "epoch": 0.05053326262447705, "grad_norm": 6.847433090209961, "learning_rate": 1.9667443400417426e-05, "loss": 2.7549, "step": 8040 }, { "epoch": 0.05059611494117416, "grad_norm": 7.40858268737793, "learning_rate": 1.9667024299472773e-05, "loss": 2.7873, "step": 8050 }, { "epoch": 0.05065896725787127, "grad_norm": 8.859169006347656, "learning_rate": 1.966660519852812e-05, "loss": 2.6523, "step": 8060 }, { "epoch": 0.05072181957456838, "grad_norm": 7.8665361404418945, "learning_rate": 1.9666186097583467e-05, "loss": 2.8225, "step": 8070 }, { "epoch": 0.050784671891265495, "grad_norm": 8.269477844238281, "learning_rate": 1.966576699663881e-05, "loss": 2.4463, "step": 8080 }, { "epoch": 0.050847524207962604, "grad_norm": 7.107751846313477, "learning_rate": 1.9665347895694158e-05, "loss": 2.6323, "step": 8090 }, { "epoch": 0.05091037652465971, "grad_norm": 7.459254741668701, "learning_rate": 1.9664928794749505e-05, "loss": 2.6698, "step": 8100 }, { "epoch": 0.05097322884135683, "grad_norm": 7.243574142456055, "learning_rate": 1.9664509693804852e-05, "loss": 2.7532, "step": 8110 }, { "epoch": 0.051036081158053935, "grad_norm": 8.207475662231445, "learning_rate": 1.96640905928602e-05, "loss": 2.8283, "step": 8120 }, { "epoch": 0.05109893347475104, "grad_norm": 7.7275872230529785, "learning_rate": 1.9663671491915543e-05, "loss": 2.5482, "step": 8130 }, { "epoch": 0.05116178579144816, "grad_norm": 8.615901947021484, "learning_rate": 1.966325239097089e-05, "loss": 2.6718, "step": 8140 }, { "epoch": 0.051224638108145266, "grad_norm": 7.622805595397949, "learning_rate": 1.9662833290026237e-05, "loss": 2.6635, "step": 8150 }, { "epoch": 0.05128749042484238, "grad_norm": 8.606935501098633, "learning_rate": 1.9662414189081584e-05, "loss": 2.6948, "step": 8160 }, { "epoch": 0.05135034274153949, "grad_norm": 7.750791549682617, "learning_rate": 1.9661995088136928e-05, "loss": 2.8639, "step": 8170 }, { "epoch": 0.0514131950582366, "grad_norm": 7.692723274230957, "learning_rate": 1.9661617897286743e-05, "loss": 2.6692, "step": 8180 }, { "epoch": 0.05147604737493371, "grad_norm": 8.044435501098633, "learning_rate": 1.966119879634209e-05, "loss": 2.6322, "step": 8190 }, { "epoch": 0.05153889969163082, "grad_norm": 6.482088088989258, "learning_rate": 1.9660779695397437e-05, "loss": 2.6293, "step": 8200 }, { "epoch": 0.05160175200832793, "grad_norm": 8.842883110046387, "learning_rate": 1.966036059445278e-05, "loss": 2.6414, "step": 8210 }, { "epoch": 0.051664604325025044, "grad_norm": 7.571231365203857, "learning_rate": 1.9659941493508128e-05, "loss": 2.7317, "step": 8220 }, { "epoch": 0.05172745664172215, "grad_norm": 7.618865966796875, "learning_rate": 1.9659522392563475e-05, "loss": 2.7609, "step": 8230 }, { "epoch": 0.05179030895841926, "grad_norm": 8.84247875213623, "learning_rate": 1.965910329161882e-05, "loss": 2.8383, "step": 8240 }, { "epoch": 0.051853161275116376, "grad_norm": 9.494882583618164, "learning_rate": 1.9658684190674165e-05, "loss": 2.7591, "step": 8250 }, { "epoch": 0.051916013591813484, "grad_norm": 8.066009521484375, "learning_rate": 1.9658265089729512e-05, "loss": 2.6235, "step": 8260 }, { "epoch": 0.0519788659085106, "grad_norm": 7.616729736328125, "learning_rate": 1.965784598878486e-05, "loss": 2.4859, "step": 8270 }, { "epoch": 0.05204171822520771, "grad_norm": 7.720513343811035, "learning_rate": 1.9657426887840207e-05, "loss": 2.8178, "step": 8280 }, { "epoch": 0.052104570541904816, "grad_norm": 7.933021068572998, "learning_rate": 1.9657007786895554e-05, "loss": 2.5814, "step": 8290 }, { "epoch": 0.05216742285860193, "grad_norm": 8.000345230102539, "learning_rate": 1.96565886859509e-05, "loss": 2.6746, "step": 8300 }, { "epoch": 0.05223027517529904, "grad_norm": 6.778539180755615, "learning_rate": 1.9656169585006248e-05, "loss": 2.5875, "step": 8310 }, { "epoch": 0.05229312749199615, "grad_norm": 8.313685417175293, "learning_rate": 1.965575048406159e-05, "loss": 2.4838, "step": 8320 }, { "epoch": 0.05235597980869326, "grad_norm": 8.954083442687988, "learning_rate": 1.965533138311694e-05, "loss": 3.0361, "step": 8330 }, { "epoch": 0.05241883212539037, "grad_norm": 10.441940307617188, "learning_rate": 1.9654912282172286e-05, "loss": 2.766, "step": 8340 }, { "epoch": 0.052481684442087485, "grad_norm": 7.779235363006592, "learning_rate": 1.9654493181227633e-05, "loss": 2.4505, "step": 8350 }, { "epoch": 0.052544536758784594, "grad_norm": 7.268058776855469, "learning_rate": 1.965407408028298e-05, "loss": 2.7261, "step": 8360 }, { "epoch": 0.0526073890754817, "grad_norm": 6.707265377044678, "learning_rate": 1.9653654979338327e-05, "loss": 2.7951, "step": 8370 }, { "epoch": 0.05267024139217882, "grad_norm": 8.21595573425293, "learning_rate": 1.965323587839367e-05, "loss": 2.5005, "step": 8380 }, { "epoch": 0.052733093708875925, "grad_norm": 7.598992824554443, "learning_rate": 1.9652816777449018e-05, "loss": 2.8568, "step": 8390 }, { "epoch": 0.05279594602557303, "grad_norm": 8.286802291870117, "learning_rate": 1.9652397676504365e-05, "loss": 2.7548, "step": 8400 }, { "epoch": 0.05285879834227015, "grad_norm": 8.704188346862793, "learning_rate": 1.965197857555971e-05, "loss": 2.6038, "step": 8410 }, { "epoch": 0.052921650658967256, "grad_norm": 8.282207489013672, "learning_rate": 1.965155947461506e-05, "loss": 2.7913, "step": 8420 }, { "epoch": 0.05298450297566437, "grad_norm": 8.401204109191895, "learning_rate": 1.9651140373670402e-05, "loss": 3.0444, "step": 8430 }, { "epoch": 0.05304735529236148, "grad_norm": 8.609389305114746, "learning_rate": 1.965072127272575e-05, "loss": 2.7141, "step": 8440 }, { "epoch": 0.05311020760905859, "grad_norm": 7.735413074493408, "learning_rate": 1.9650302171781097e-05, "loss": 2.7262, "step": 8450 }, { "epoch": 0.0531730599257557, "grad_norm": 18.663652420043945, "learning_rate": 1.9649883070836444e-05, "loss": 2.6677, "step": 8460 }, { "epoch": 0.05323591224245281, "grad_norm": 8.87226676940918, "learning_rate": 1.9649463969891787e-05, "loss": 2.4734, "step": 8470 }, { "epoch": 0.05329876455914992, "grad_norm": 7.327389717102051, "learning_rate": 1.9649044868947134e-05, "loss": 2.765, "step": 8480 }, { "epoch": 0.053361616875847034, "grad_norm": 8.151284217834473, "learning_rate": 1.964862576800248e-05, "loss": 2.6378, "step": 8490 }, { "epoch": 0.05342446919254414, "grad_norm": 7.761752128601074, "learning_rate": 1.964820666705783e-05, "loss": 2.7119, "step": 8500 }, { "epoch": 0.05348732150924126, "grad_norm": 7.645976543426514, "learning_rate": 1.9647787566113176e-05, "loss": 2.7731, "step": 8510 }, { "epoch": 0.053550173825938366, "grad_norm": 6.9228434562683105, "learning_rate": 1.9647410375262987e-05, "loss": 2.8945, "step": 8520 }, { "epoch": 0.053613026142635474, "grad_norm": 6.676364421844482, "learning_rate": 1.9646991274318334e-05, "loss": 2.7391, "step": 8530 }, { "epoch": 0.05367587845933259, "grad_norm": 7.566768169403076, "learning_rate": 1.964657217337368e-05, "loss": 2.7572, "step": 8540 }, { "epoch": 0.0537387307760297, "grad_norm": 7.791513919830322, "learning_rate": 1.9646153072429025e-05, "loss": 2.655, "step": 8550 }, { "epoch": 0.053801583092726805, "grad_norm": 7.675039291381836, "learning_rate": 1.9645733971484372e-05, "loss": 2.7216, "step": 8560 }, { "epoch": 0.05386443540942392, "grad_norm": 7.082376480102539, "learning_rate": 1.964531487053972e-05, "loss": 2.4595, "step": 8570 }, { "epoch": 0.05392728772612103, "grad_norm": 9.083993911743164, "learning_rate": 1.9644895769595066e-05, "loss": 2.5619, "step": 8580 }, { "epoch": 0.053990140042818144, "grad_norm": 7.382872104644775, "learning_rate": 1.9644476668650413e-05, "loss": 2.6891, "step": 8590 }, { "epoch": 0.05405299235951525, "grad_norm": 7.136111736297607, "learning_rate": 1.964405756770576e-05, "loss": 2.6099, "step": 8600 }, { "epoch": 0.05411584467621236, "grad_norm": 7.041515350341797, "learning_rate": 1.9643638466761107e-05, "loss": 2.5007, "step": 8610 }, { "epoch": 0.054178696992909475, "grad_norm": 7.351996898651123, "learning_rate": 1.9643219365816454e-05, "loss": 2.7503, "step": 8620 }, { "epoch": 0.05424154930960658, "grad_norm": 6.633028984069824, "learning_rate": 1.96428002648718e-05, "loss": 2.7683, "step": 8630 }, { "epoch": 0.05430440162630369, "grad_norm": 7.365866661071777, "learning_rate": 1.9642381163927145e-05, "loss": 2.6287, "step": 8640 }, { "epoch": 0.05436725394300081, "grad_norm": 7.213832855224609, "learning_rate": 1.9641962062982492e-05, "loss": 2.5954, "step": 8650 }, { "epoch": 0.054430106259697915, "grad_norm": 6.928434371948242, "learning_rate": 1.964154296203784e-05, "loss": 2.4292, "step": 8660 }, { "epoch": 0.05449295857639503, "grad_norm": 8.020259857177734, "learning_rate": 1.9641123861093186e-05, "loss": 2.8059, "step": 8670 }, { "epoch": 0.05455581089309214, "grad_norm": 7.945987701416016, "learning_rate": 1.964070476014853e-05, "loss": 2.7137, "step": 8680 }, { "epoch": 0.054618663209789246, "grad_norm": 8.240311622619629, "learning_rate": 1.9640285659203877e-05, "loss": 2.5354, "step": 8690 }, { "epoch": 0.05468151552648636, "grad_norm": 10.10504150390625, "learning_rate": 1.9639866558259224e-05, "loss": 2.6713, "step": 8700 }, { "epoch": 0.05474436784318347, "grad_norm": 7.855795383453369, "learning_rate": 1.963944745731457e-05, "loss": 2.6301, "step": 8710 }, { "epoch": 0.05480722015988058, "grad_norm": 7.5793867111206055, "learning_rate": 1.9639028356369915e-05, "loss": 2.7625, "step": 8720 }, { "epoch": 0.05487007247657769, "grad_norm": 7.741368293762207, "learning_rate": 1.9638609255425262e-05, "loss": 2.5894, "step": 8730 }, { "epoch": 0.0549329247932748, "grad_norm": 6.912710189819336, "learning_rate": 1.963819015448061e-05, "loss": 2.7917, "step": 8740 }, { "epoch": 0.054995777109971916, "grad_norm": 7.872913837432861, "learning_rate": 1.9637771053535956e-05, "loss": 2.6818, "step": 8750 }, { "epoch": 0.055058629426669024, "grad_norm": 7.7948408126831055, "learning_rate": 1.9637351952591303e-05, "loss": 2.4908, "step": 8760 }, { "epoch": 0.05512148174336613, "grad_norm": 9.372628211975098, "learning_rate": 1.9636932851646647e-05, "loss": 2.6611, "step": 8770 }, { "epoch": 0.05518433406006325, "grad_norm": 8.036674499511719, "learning_rate": 1.9636513750701994e-05, "loss": 2.751, "step": 8780 }, { "epoch": 0.055247186376760356, "grad_norm": 8.101851463317871, "learning_rate": 1.963609464975734e-05, "loss": 2.7709, "step": 8790 }, { "epoch": 0.055310038693457464, "grad_norm": 8.577589988708496, "learning_rate": 1.9635675548812688e-05, "loss": 2.4593, "step": 8800 }, { "epoch": 0.05537289101015458, "grad_norm": 7.897840976715088, "learning_rate": 1.9635256447868035e-05, "loss": 2.8371, "step": 8810 }, { "epoch": 0.05543574332685169, "grad_norm": 8.486788749694824, "learning_rate": 1.9634837346923382e-05, "loss": 2.7736, "step": 8820 }, { "epoch": 0.0554985956435488, "grad_norm": 7.370955467224121, "learning_rate": 1.963441824597873e-05, "loss": 2.7316, "step": 8830 }, { "epoch": 0.05556144796024591, "grad_norm": 7.446674346923828, "learning_rate": 1.9633999145034076e-05, "loss": 2.719, "step": 8840 }, { "epoch": 0.05562430027694302, "grad_norm": 7.9408135414123535, "learning_rate": 1.9633580044089423e-05, "loss": 2.7738, "step": 8850 }, { "epoch": 0.055687152593640134, "grad_norm": 7.8129496574401855, "learning_rate": 1.9633160943144767e-05, "loss": 2.5191, "step": 8860 }, { "epoch": 0.05575000491033724, "grad_norm": 7.682240009307861, "learning_rate": 1.9632741842200114e-05, "loss": 2.6405, "step": 8870 }, { "epoch": 0.05581285722703435, "grad_norm": 7.207090854644775, "learning_rate": 1.963232274125546e-05, "loss": 2.5648, "step": 8880 }, { "epoch": 0.055875709543731465, "grad_norm": 8.125288009643555, "learning_rate": 1.9631903640310808e-05, "loss": 2.6379, "step": 8890 }, { "epoch": 0.05593856186042857, "grad_norm": 8.328934669494629, "learning_rate": 1.9631484539366152e-05, "loss": 2.8457, "step": 8900 }, { "epoch": 0.05600141417712568, "grad_norm": 7.958409786224365, "learning_rate": 1.96310654384215e-05, "loss": 2.8154, "step": 8910 }, { "epoch": 0.0560642664938228, "grad_norm": 8.580458641052246, "learning_rate": 1.9630646337476846e-05, "loss": 2.9781, "step": 8920 }, { "epoch": 0.056127118810519905, "grad_norm": 8.746109008789062, "learning_rate": 1.9630227236532193e-05, "loss": 2.6343, "step": 8930 }, { "epoch": 0.05618997112721702, "grad_norm": 7.417159557342529, "learning_rate": 1.962980813558754e-05, "loss": 2.5163, "step": 8940 }, { "epoch": 0.05625282344391413, "grad_norm": 6.750892639160156, "learning_rate": 1.9629389034642884e-05, "loss": 2.5631, "step": 8950 }, { "epoch": 0.056315675760611236, "grad_norm": 7.203453063964844, "learning_rate": 1.962896993369823e-05, "loss": 2.7475, "step": 8960 }, { "epoch": 0.05637852807730835, "grad_norm": 8.132242202758789, "learning_rate": 1.9628550832753578e-05, "loss": 2.6143, "step": 8970 }, { "epoch": 0.05644138039400546, "grad_norm": 8.89110279083252, "learning_rate": 1.9628131731808925e-05, "loss": 2.7637, "step": 8980 }, { "epoch": 0.05650423271070257, "grad_norm": 8.574644088745117, "learning_rate": 1.9627712630864272e-05, "loss": 2.6649, "step": 8990 }, { "epoch": 0.05656708502739968, "grad_norm": 7.429235935211182, "learning_rate": 1.962729352991962e-05, "loss": 2.4796, "step": 9000 }, { "epoch": 0.05662993734409679, "grad_norm": 7.8866400718688965, "learning_rate": 1.9626874428974966e-05, "loss": 2.7222, "step": 9010 }, { "epoch": 0.056692789660793906, "grad_norm": 7.169637680053711, "learning_rate": 1.962645532803031e-05, "loss": 2.6579, "step": 9020 }, { "epoch": 0.056755641977491014, "grad_norm": 8.332289695739746, "learning_rate": 1.9626036227085657e-05, "loss": 2.6769, "step": 9030 }, { "epoch": 0.05681849429418812, "grad_norm": 6.445282936096191, "learning_rate": 1.9625617126141004e-05, "loss": 2.5097, "step": 9040 }, { "epoch": 0.05688134661088524, "grad_norm": 8.111881256103516, "learning_rate": 1.962519802519635e-05, "loss": 2.6252, "step": 9050 }, { "epoch": 0.056944198927582346, "grad_norm": 6.78681755065918, "learning_rate": 1.9624778924251698e-05, "loss": 2.4517, "step": 9060 }, { "epoch": 0.057007051244279454, "grad_norm": 7.44216251373291, "learning_rate": 1.9624359823307045e-05, "loss": 2.8127, "step": 9070 }, { "epoch": 0.05706990356097657, "grad_norm": 8.825942039489746, "learning_rate": 1.962394072236239e-05, "loss": 2.8195, "step": 9080 }, { "epoch": 0.05713275587767368, "grad_norm": 6.7636566162109375, "learning_rate": 1.9623521621417736e-05, "loss": 2.5583, "step": 9090 }, { "epoch": 0.05719560819437079, "grad_norm": 7.559068202972412, "learning_rate": 1.9623102520473083e-05, "loss": 2.6505, "step": 9100 }, { "epoch": 0.0572584605110679, "grad_norm": 8.663436889648438, "learning_rate": 1.962268341952843e-05, "loss": 2.6025, "step": 9110 }, { "epoch": 0.05732131282776501, "grad_norm": 7.314866065979004, "learning_rate": 1.9622264318583774e-05, "loss": 2.9561, "step": 9120 }, { "epoch": 0.057384165144462124, "grad_norm": 7.5395612716674805, "learning_rate": 1.962184521763912e-05, "loss": 2.4377, "step": 9130 }, { "epoch": 0.05744701746115923, "grad_norm": 9.004176139831543, "learning_rate": 1.9621426116694468e-05, "loss": 2.4806, "step": 9140 }, { "epoch": 0.05750986977785634, "grad_norm": 17.26418113708496, "learning_rate": 1.9621007015749815e-05, "loss": 2.5967, "step": 9150 }, { "epoch": 0.057572722094553455, "grad_norm": 7.469278812408447, "learning_rate": 1.9620587914805162e-05, "loss": 2.8039, "step": 9160 }, { "epoch": 0.05763557441125056, "grad_norm": 6.7289628982543945, "learning_rate": 1.9620168813860506e-05, "loss": 2.5404, "step": 9170 }, { "epoch": 0.05769842672794768, "grad_norm": 8.37357234954834, "learning_rate": 1.9619749712915853e-05, "loss": 2.5832, "step": 9180 }, { "epoch": 0.05776127904464479, "grad_norm": 9.232333183288574, "learning_rate": 1.96193306119712e-05, "loss": 2.3753, "step": 9190 }, { "epoch": 0.057824131361341895, "grad_norm": 9.12719440460205, "learning_rate": 1.9618911511026547e-05, "loss": 2.7676, "step": 9200 }, { "epoch": 0.05788698367803901, "grad_norm": 6.722346305847168, "learning_rate": 1.9618492410081894e-05, "loss": 2.6162, "step": 9210 }, { "epoch": 0.05794983599473612, "grad_norm": 8.384921073913574, "learning_rate": 1.961807330913724e-05, "loss": 2.6299, "step": 9220 }, { "epoch": 0.058012688311433226, "grad_norm": 7.978445529937744, "learning_rate": 1.961765420819259e-05, "loss": 2.6206, "step": 9230 }, { "epoch": 0.05807554062813034, "grad_norm": 7.925615310668945, "learning_rate": 1.9617235107247935e-05, "loss": 2.758, "step": 9240 }, { "epoch": 0.05813839294482745, "grad_norm": 7.987235069274902, "learning_rate": 1.9616816006303282e-05, "loss": 2.4806, "step": 9250 }, { "epoch": 0.058201245261524565, "grad_norm": 9.439668655395508, "learning_rate": 1.9616396905358626e-05, "loss": 2.55, "step": 9260 }, { "epoch": 0.05826409757822167, "grad_norm": 8.0459566116333, "learning_rate": 1.9615977804413973e-05, "loss": 2.8098, "step": 9270 }, { "epoch": 0.05832694989491878, "grad_norm": 7.324197769165039, "learning_rate": 1.961555870346932e-05, "loss": 2.5795, "step": 9280 }, { "epoch": 0.058389802211615896, "grad_norm": 7.621894359588623, "learning_rate": 1.9615139602524667e-05, "loss": 2.6315, "step": 9290 }, { "epoch": 0.058452654528313004, "grad_norm": 6.299142360687256, "learning_rate": 1.961472050158001e-05, "loss": 2.6556, "step": 9300 }, { "epoch": 0.05851550684501011, "grad_norm": 7.16338586807251, "learning_rate": 1.9614301400635358e-05, "loss": 2.5728, "step": 9310 }, { "epoch": 0.05857835916170723, "grad_norm": 7.851929187774658, "learning_rate": 1.9613882299690705e-05, "loss": 2.4467, "step": 9320 }, { "epoch": 0.058641211478404336, "grad_norm": 9.934613227844238, "learning_rate": 1.9613463198746052e-05, "loss": 2.7476, "step": 9330 }, { "epoch": 0.05870406379510145, "grad_norm": 8.11226749420166, "learning_rate": 1.9613044097801396e-05, "loss": 2.5031, "step": 9340 }, { "epoch": 0.05876691611179856, "grad_norm": 8.113167762756348, "learning_rate": 1.9612624996856743e-05, "loss": 2.5387, "step": 9350 }, { "epoch": 0.05882976842849567, "grad_norm": 44.10908126831055, "learning_rate": 1.961220589591209e-05, "loss": 2.4559, "step": 9360 }, { "epoch": 0.05889262074519278, "grad_norm": 7.4416890144348145, "learning_rate": 1.9611786794967437e-05, "loss": 2.4803, "step": 9370 }, { "epoch": 0.05895547306188989, "grad_norm": 9.109253883361816, "learning_rate": 1.9611367694022784e-05, "loss": 2.7369, "step": 9380 }, { "epoch": 0.059018325378587, "grad_norm": 7.512287616729736, "learning_rate": 1.9610948593078128e-05, "loss": 2.5443, "step": 9390 }, { "epoch": 0.059081177695284114, "grad_norm": 8.12739372253418, "learning_rate": 1.9610529492133475e-05, "loss": 2.6543, "step": 9400 }, { "epoch": 0.05914403001198122, "grad_norm": 7.4665398597717285, "learning_rate": 1.9610110391188822e-05, "loss": 2.6158, "step": 9410 }, { "epoch": 0.05920688232867834, "grad_norm": 8.125940322875977, "learning_rate": 1.960969129024417e-05, "loss": 2.7101, "step": 9420 }, { "epoch": 0.059269734645375445, "grad_norm": 7.124801158905029, "learning_rate": 1.9609272189299516e-05, "loss": 2.7561, "step": 9430 }, { "epoch": 0.05933258696207255, "grad_norm": 7.844360828399658, "learning_rate": 1.9608853088354863e-05, "loss": 2.7748, "step": 9440 }, { "epoch": 0.05939543927876967, "grad_norm": 9.075909614562988, "learning_rate": 1.960843398741021e-05, "loss": 2.6617, "step": 9450 }, { "epoch": 0.059458291595466777, "grad_norm": 8.1566801071167, "learning_rate": 1.9608014886465557e-05, "loss": 2.6068, "step": 9460 }, { "epoch": 0.059521143912163885, "grad_norm": 5.545306205749512, "learning_rate": 1.9607595785520904e-05, "loss": 2.6708, "step": 9470 }, { "epoch": 0.059583996228861, "grad_norm": 8.167916297912598, "learning_rate": 1.9607176684576248e-05, "loss": 2.518, "step": 9480 }, { "epoch": 0.05964684854555811, "grad_norm": 6.242193222045898, "learning_rate": 1.9606757583631595e-05, "loss": 2.3471, "step": 9490 }, { "epoch": 0.05970970086225522, "grad_norm": 8.109650611877441, "learning_rate": 1.9606338482686942e-05, "loss": 2.4626, "step": 9500 }, { "epoch": 0.05977255317895233, "grad_norm": 7.323617458343506, "learning_rate": 1.960591938174229e-05, "loss": 2.4797, "step": 9510 }, { "epoch": 0.05983540549564944, "grad_norm": 7.509581089019775, "learning_rate": 1.9605500280797633e-05, "loss": 2.5529, "step": 9520 }, { "epoch": 0.059898257812346554, "grad_norm": 8.43581771850586, "learning_rate": 1.960508117985298e-05, "loss": 2.3881, "step": 9530 }, { "epoch": 0.05996111012904366, "grad_norm": 6.916092395782471, "learning_rate": 1.9604662078908327e-05, "loss": 2.6903, "step": 9540 }, { "epoch": 0.06002396244574077, "grad_norm": 8.527880668640137, "learning_rate": 1.9604242977963674e-05, "loss": 2.8299, "step": 9550 }, { "epoch": 0.060086814762437886, "grad_norm": 6.470539093017578, "learning_rate": 1.960382387701902e-05, "loss": 2.5176, "step": 9560 }, { "epoch": 0.060149667079134994, "grad_norm": 7.453949451446533, "learning_rate": 1.9603404776074365e-05, "loss": 2.4548, "step": 9570 }, { "epoch": 0.06021251939583211, "grad_norm": 7.623648643493652, "learning_rate": 1.9602985675129712e-05, "loss": 2.5387, "step": 9580 }, { "epoch": 0.06027537171252922, "grad_norm": 8.512686729431152, "learning_rate": 1.960256657418506e-05, "loss": 2.602, "step": 9590 }, { "epoch": 0.060338224029226326, "grad_norm": 7.6814751625061035, "learning_rate": 1.9602147473240406e-05, "loss": 2.6069, "step": 9600 }, { "epoch": 0.06040107634592344, "grad_norm": 7.552587509155273, "learning_rate": 1.9601728372295753e-05, "loss": 2.5909, "step": 9610 }, { "epoch": 0.06046392866262055, "grad_norm": 7.380543231964111, "learning_rate": 1.96013092713511e-05, "loss": 2.5966, "step": 9620 }, { "epoch": 0.06052678097931766, "grad_norm": 8.672584533691406, "learning_rate": 1.9600890170406447e-05, "loss": 2.503, "step": 9630 }, { "epoch": 0.06058963329601477, "grad_norm": 9.565302848815918, "learning_rate": 1.960047106946179e-05, "loss": 2.6518, "step": 9640 }, { "epoch": 0.06065248561271188, "grad_norm": 6.482096195220947, "learning_rate": 1.9600051968517138e-05, "loss": 2.6328, "step": 9650 }, { "epoch": 0.06071533792940899, "grad_norm": 11.39160442352295, "learning_rate": 1.9599632867572485e-05, "loss": 2.7147, "step": 9660 }, { "epoch": 0.060778190246106104, "grad_norm": 7.652275085449219, "learning_rate": 1.9599213766627832e-05, "loss": 2.3302, "step": 9670 }, { "epoch": 0.06084104256280321, "grad_norm": 14.219861030578613, "learning_rate": 1.959879466568318e-05, "loss": 2.614, "step": 9680 }, { "epoch": 0.06090389487950033, "grad_norm": 7.919884204864502, "learning_rate": 1.9598375564738526e-05, "loss": 2.5997, "step": 9690 }, { "epoch": 0.060966747196197435, "grad_norm": 7.179359436035156, "learning_rate": 1.959795646379387e-05, "loss": 2.4926, "step": 9700 }, { "epoch": 0.06102959951289454, "grad_norm": 8.82780647277832, "learning_rate": 1.9597537362849217e-05, "loss": 2.7258, "step": 9710 }, { "epoch": 0.06109245182959166, "grad_norm": 7.858294486999512, "learning_rate": 1.9597118261904564e-05, "loss": 2.5357, "step": 9720 }, { "epoch": 0.061155304146288766, "grad_norm": 7.4505109786987305, "learning_rate": 1.959669916095991e-05, "loss": 2.5321, "step": 9730 }, { "epoch": 0.061218156462985875, "grad_norm": 7.89233922958374, "learning_rate": 1.9596280060015255e-05, "loss": 2.4345, "step": 9740 }, { "epoch": 0.06128100877968299, "grad_norm": 8.051054000854492, "learning_rate": 1.9595860959070602e-05, "loss": 2.5858, "step": 9750 }, { "epoch": 0.0613438610963801, "grad_norm": 7.296353816986084, "learning_rate": 1.959544185812595e-05, "loss": 2.4151, "step": 9760 }, { "epoch": 0.06140671341307721, "grad_norm": 8.718103408813477, "learning_rate": 1.9595022757181296e-05, "loss": 2.8083, "step": 9770 }, { "epoch": 0.06146956572977432, "grad_norm": 7.9979472160339355, "learning_rate": 1.9594603656236643e-05, "loss": 2.6403, "step": 9780 }, { "epoch": 0.06153241804647143, "grad_norm": 6.973628997802734, "learning_rate": 1.9594184555291987e-05, "loss": 2.498, "step": 9790 }, { "epoch": 0.061595270363168544, "grad_norm": 8.49692440032959, "learning_rate": 1.9593765454347334e-05, "loss": 2.5516, "step": 9800 }, { "epoch": 0.06165812267986565, "grad_norm": 7.654516220092773, "learning_rate": 1.959334635340268e-05, "loss": 2.5383, "step": 9810 }, { "epoch": 0.06172097499656276, "grad_norm": 7.838619709014893, "learning_rate": 1.9592927252458028e-05, "loss": 2.5105, "step": 9820 }, { "epoch": 0.061783827313259876, "grad_norm": 7.077419757843018, "learning_rate": 1.9592508151513375e-05, "loss": 2.5268, "step": 9830 }, { "epoch": 0.061846679629956984, "grad_norm": 8.740164756774902, "learning_rate": 1.9592089050568722e-05, "loss": 2.5884, "step": 9840 }, { "epoch": 0.0619095319466541, "grad_norm": 8.284232139587402, "learning_rate": 1.959166994962407e-05, "loss": 2.5533, "step": 9850 }, { "epoch": 0.06197238426335121, "grad_norm": 7.148123264312744, "learning_rate": 1.9591250848679417e-05, "loss": 2.5367, "step": 9860 }, { "epoch": 0.062035236580048315, "grad_norm": 7.701465129852295, "learning_rate": 1.9590831747734764e-05, "loss": 2.6559, "step": 9870 }, { "epoch": 0.06209808889674543, "grad_norm": 6.957675457000732, "learning_rate": 1.9590412646790107e-05, "loss": 2.6157, "step": 9880 }, { "epoch": 0.06216094121344254, "grad_norm": 8.172834396362305, "learning_rate": 1.9589993545845454e-05, "loss": 2.4962, "step": 9890 }, { "epoch": 0.06222379353013965, "grad_norm": 7.706208229064941, "learning_rate": 1.95895744449008e-05, "loss": 2.4572, "step": 9900 }, { "epoch": 0.06228664584683676, "grad_norm": 11.208701133728027, "learning_rate": 1.958915534395615e-05, "loss": 2.4993, "step": 9910 }, { "epoch": 0.06234949816353387, "grad_norm": 7.338189125061035, "learning_rate": 1.9588736243011492e-05, "loss": 2.4891, "step": 9920 }, { "epoch": 0.062412350480230985, "grad_norm": 8.864147186279297, "learning_rate": 1.958831714206684e-05, "loss": 2.4935, "step": 9930 }, { "epoch": 0.062475202796928093, "grad_norm": 8.186270713806152, "learning_rate": 1.9587898041122186e-05, "loss": 2.6259, "step": 9940 }, { "epoch": 0.0625380551136252, "grad_norm": 7.757992267608643, "learning_rate": 1.9587478940177533e-05, "loss": 2.6162, "step": 9950 }, { "epoch": 0.06260090743032232, "grad_norm": 7.206307411193848, "learning_rate": 1.9587059839232877e-05, "loss": 2.6705, "step": 9960 }, { "epoch": 0.06266375974701943, "grad_norm": 7.583564758300781, "learning_rate": 1.9586640738288224e-05, "loss": 2.5045, "step": 9970 }, { "epoch": 0.06272661206371653, "grad_norm": 7.870649814605713, "learning_rate": 1.958622163734357e-05, "loss": 2.3944, "step": 9980 }, { "epoch": 0.06278946438041365, "grad_norm": 6.977264881134033, "learning_rate": 1.9585802536398918e-05, "loss": 2.4585, "step": 9990 }, { "epoch": 0.06285231669711076, "grad_norm": 7.438137054443359, "learning_rate": 1.9585383435454265e-05, "loss": 2.6557, "step": 10000 }, { "epoch": 0.06291516901380786, "grad_norm": 8.076875686645508, "learning_rate": 1.9584964334509612e-05, "loss": 2.5705, "step": 10010 }, { "epoch": 0.06297802133050498, "grad_norm": 7.514438629150391, "learning_rate": 1.9584545233564956e-05, "loss": 2.5158, "step": 10020 }, { "epoch": 0.0630408736472021, "grad_norm": 7.07708740234375, "learning_rate": 1.9584126132620303e-05, "loss": 2.5866, "step": 10030 }, { "epoch": 0.0631037259638992, "grad_norm": 8.048712730407715, "learning_rate": 1.958370703167565e-05, "loss": 2.6047, "step": 10040 }, { "epoch": 0.06316657828059631, "grad_norm": 7.391304016113281, "learning_rate": 1.9583287930730997e-05, "loss": 2.3468, "step": 10050 }, { "epoch": 0.06322943059729343, "grad_norm": 6.838722229003906, "learning_rate": 1.9582868829786344e-05, "loss": 2.2845, "step": 10060 }, { "epoch": 0.06329228291399053, "grad_norm": 8.18930721282959, "learning_rate": 1.958244972884169e-05, "loss": 2.4832, "step": 10070 }, { "epoch": 0.06335513523068764, "grad_norm": 6.990052223205566, "learning_rate": 1.958203062789704e-05, "loss": 2.7226, "step": 10080 }, { "epoch": 0.06341798754738476, "grad_norm": 7.360417366027832, "learning_rate": 1.9581611526952386e-05, "loss": 2.5438, "step": 10090 }, { "epoch": 0.06348083986408186, "grad_norm": 8.4058837890625, "learning_rate": 1.958119242600773e-05, "loss": 2.7104, "step": 10100 }, { "epoch": 0.06354369218077897, "grad_norm": 7.125432014465332, "learning_rate": 1.9580773325063076e-05, "loss": 2.3911, "step": 10110 }, { "epoch": 0.06360654449747609, "grad_norm": 7.1509222984313965, "learning_rate": 1.9580354224118423e-05, "loss": 2.4099, "step": 10120 }, { "epoch": 0.0636693968141732, "grad_norm": 7.346128463745117, "learning_rate": 1.957993512317377e-05, "loss": 2.7052, "step": 10130 }, { "epoch": 0.0637322491308703, "grad_norm": 7.011810302734375, "learning_rate": 1.9579516022229114e-05, "loss": 2.3179, "step": 10140 }, { "epoch": 0.06379510144756742, "grad_norm": 6.529143333435059, "learning_rate": 1.957909692128446e-05, "loss": 2.4885, "step": 10150 }, { "epoch": 0.06385795376426454, "grad_norm": 7.633592128753662, "learning_rate": 1.9578677820339808e-05, "loss": 2.7063, "step": 10160 }, { "epoch": 0.06392080608096164, "grad_norm": 8.110146522521973, "learning_rate": 1.9578258719395155e-05, "loss": 2.4585, "step": 10170 }, { "epoch": 0.06398365839765875, "grad_norm": 7.855094909667969, "learning_rate": 1.9577839618450502e-05, "loss": 2.3638, "step": 10180 }, { "epoch": 0.06404651071435587, "grad_norm": 7.637715816497803, "learning_rate": 1.9577420517505846e-05, "loss": 2.6611, "step": 10190 }, { "epoch": 0.06410936303105297, "grad_norm": 7.207457542419434, "learning_rate": 1.9577001416561193e-05, "loss": 2.5074, "step": 10200 }, { "epoch": 0.06417221534775008, "grad_norm": 7.327394008636475, "learning_rate": 1.957658231561654e-05, "loss": 2.6406, "step": 10210 }, { "epoch": 0.0642350676644472, "grad_norm": 6.885451316833496, "learning_rate": 1.9576163214671887e-05, "loss": 2.7146, "step": 10220 }, { "epoch": 0.0642979199811443, "grad_norm": 7.120704174041748, "learning_rate": 1.9575744113727234e-05, "loss": 2.2612, "step": 10230 }, { "epoch": 0.06436077229784141, "grad_norm": 7.489481449127197, "learning_rate": 1.957532501278258e-05, "loss": 2.4659, "step": 10240 }, { "epoch": 0.06442362461453853, "grad_norm": 7.456684589385986, "learning_rate": 1.957490591183793e-05, "loss": 2.3529, "step": 10250 }, { "epoch": 0.06448647693123563, "grad_norm": 8.414656639099121, "learning_rate": 1.9574486810893276e-05, "loss": 2.4778, "step": 10260 }, { "epoch": 0.06454932924793275, "grad_norm": 6.8723626136779785, "learning_rate": 1.957406770994862e-05, "loss": 2.4454, "step": 10270 }, { "epoch": 0.06461218156462986, "grad_norm": 20.84834861755371, "learning_rate": 1.9573648609003966e-05, "loss": 2.6826, "step": 10280 }, { "epoch": 0.06467503388132696, "grad_norm": 7.154892921447754, "learning_rate": 1.9573229508059313e-05, "loss": 2.4339, "step": 10290 }, { "epoch": 0.06473788619802408, "grad_norm": 6.664883136749268, "learning_rate": 1.957281040711466e-05, "loss": 2.3187, "step": 10300 }, { "epoch": 0.06480073851472119, "grad_norm": 8.236434936523438, "learning_rate": 1.9572391306170008e-05, "loss": 2.4366, "step": 10310 }, { "epoch": 0.06486359083141831, "grad_norm": 8.697257995605469, "learning_rate": 1.957197220522535e-05, "loss": 2.8649, "step": 10320 }, { "epoch": 0.06492644314811541, "grad_norm": 6.582972526550293, "learning_rate": 1.95715531042807e-05, "loss": 2.6505, "step": 10330 }, { "epoch": 0.06498929546481252, "grad_norm": 8.965617179870605, "learning_rate": 1.9571134003336045e-05, "loss": 2.5702, "step": 10340 }, { "epoch": 0.06505214778150964, "grad_norm": 7.132918357849121, "learning_rate": 1.9570714902391392e-05, "loss": 2.6687, "step": 10350 }, { "epoch": 0.06511500009820674, "grad_norm": 10.190888404846191, "learning_rate": 1.9570295801446736e-05, "loss": 2.4893, "step": 10360 }, { "epoch": 0.06517785241490386, "grad_norm": 8.878976821899414, "learning_rate": 1.9569876700502083e-05, "loss": 2.8093, "step": 10370 }, { "epoch": 0.06524070473160097, "grad_norm": 7.512330055236816, "learning_rate": 1.956945759955743e-05, "loss": 2.6352, "step": 10380 }, { "epoch": 0.06530355704829807, "grad_norm": 7.537259578704834, "learning_rate": 1.9569038498612777e-05, "loss": 2.5183, "step": 10390 }, { "epoch": 0.06536640936499519, "grad_norm": 6.685320854187012, "learning_rate": 1.9568619397668124e-05, "loss": 2.457, "step": 10400 }, { "epoch": 0.0654292616816923, "grad_norm": 7.5906195640563965, "learning_rate": 1.9568200296723468e-05, "loss": 2.6065, "step": 10410 }, { "epoch": 0.0654921139983894, "grad_norm": 6.875448703765869, "learning_rate": 1.9567781195778815e-05, "loss": 2.7156, "step": 10420 }, { "epoch": 0.06555496631508652, "grad_norm": 7.813136100769043, "learning_rate": 1.9567362094834162e-05, "loss": 2.7358, "step": 10430 }, { "epoch": 0.06561781863178363, "grad_norm": 7.258267879486084, "learning_rate": 1.956694299388951e-05, "loss": 2.4653, "step": 10440 }, { "epoch": 0.06568067094848073, "grad_norm": 7.039821624755859, "learning_rate": 1.9566523892944856e-05, "loss": 2.2635, "step": 10450 }, { "epoch": 0.06574352326517785, "grad_norm": 8.447144508361816, "learning_rate": 1.9566104792000203e-05, "loss": 2.3831, "step": 10460 }, { "epoch": 0.06580637558187497, "grad_norm": 7.895917892456055, "learning_rate": 1.956568569105555e-05, "loss": 2.5763, "step": 10470 }, { "epoch": 0.06586922789857208, "grad_norm": 7.778244495391846, "learning_rate": 1.9565266590110898e-05, "loss": 2.4349, "step": 10480 }, { "epoch": 0.06593208021526918, "grad_norm": 7.504886627197266, "learning_rate": 1.9564847489166245e-05, "loss": 2.5061, "step": 10490 }, { "epoch": 0.0659949325319663, "grad_norm": 11.029016494750977, "learning_rate": 1.956442838822159e-05, "loss": 2.4106, "step": 10500 }, { "epoch": 0.06605778484866341, "grad_norm": 8.39664363861084, "learning_rate": 1.9564009287276935e-05, "loss": 2.7254, "step": 10510 }, { "epoch": 0.06612063716536051, "grad_norm": 8.360917091369629, "learning_rate": 1.9563590186332283e-05, "loss": 2.2116, "step": 10520 }, { "epoch": 0.06618348948205763, "grad_norm": 6.493826866149902, "learning_rate": 1.956317108538763e-05, "loss": 2.791, "step": 10530 }, { "epoch": 0.06624634179875474, "grad_norm": 5.820472240447998, "learning_rate": 1.9562751984442973e-05, "loss": 2.3287, "step": 10540 }, { "epoch": 0.06630919411545184, "grad_norm": 8.29838752746582, "learning_rate": 1.956233288349832e-05, "loss": 2.6331, "step": 10550 }, { "epoch": 0.06637204643214896, "grad_norm": 6.8962082862854, "learning_rate": 1.9561913782553667e-05, "loss": 2.5689, "step": 10560 }, { "epoch": 0.06643489874884607, "grad_norm": 8.794946670532227, "learning_rate": 1.9561494681609014e-05, "loss": 2.8286, "step": 10570 }, { "epoch": 0.06649775106554318, "grad_norm": 7.809574604034424, "learning_rate": 1.956107558066436e-05, "loss": 2.3104, "step": 10580 }, { "epoch": 0.06656060338224029, "grad_norm": 6.886219024658203, "learning_rate": 1.9560656479719705e-05, "loss": 2.42, "step": 10590 }, { "epoch": 0.0666234556989374, "grad_norm": 9.957528114318848, "learning_rate": 1.9560237378775052e-05, "loss": 2.5591, "step": 10600 }, { "epoch": 0.06668630801563451, "grad_norm": 8.68337345123291, "learning_rate": 1.95598182778304e-05, "loss": 2.5122, "step": 10610 }, { "epoch": 0.06674916033233162, "grad_norm": 7.782831192016602, "learning_rate": 1.9559399176885746e-05, "loss": 2.5188, "step": 10620 }, { "epoch": 0.06681201264902874, "grad_norm": 8.128057479858398, "learning_rate": 1.9558980075941094e-05, "loss": 2.4072, "step": 10630 }, { "epoch": 0.06687486496572585, "grad_norm": 7.294857025146484, "learning_rate": 1.955856097499644e-05, "loss": 2.6447, "step": 10640 }, { "epoch": 0.06693771728242295, "grad_norm": 7.196445941925049, "learning_rate": 1.9558141874051784e-05, "loss": 2.4874, "step": 10650 }, { "epoch": 0.06700056959912007, "grad_norm": 9.046404838562012, "learning_rate": 1.955772277310713e-05, "loss": 2.4762, "step": 10660 }, { "epoch": 0.06706342191581718, "grad_norm": 7.872631072998047, "learning_rate": 1.955730367216248e-05, "loss": 2.7471, "step": 10670 }, { "epoch": 0.06712627423251429, "grad_norm": 7.524047374725342, "learning_rate": 1.9556884571217825e-05, "loss": 2.3539, "step": 10680 }, { "epoch": 0.0671891265492114, "grad_norm": 8.810508728027344, "learning_rate": 1.9556465470273173e-05, "loss": 2.4916, "step": 10690 }, { "epoch": 0.06725197886590852, "grad_norm": 8.720710754394531, "learning_rate": 1.955604636932852e-05, "loss": 2.7151, "step": 10700 }, { "epoch": 0.06731483118260562, "grad_norm": 7.8059587478637695, "learning_rate": 1.9555627268383867e-05, "loss": 2.5237, "step": 10710 }, { "epoch": 0.06737768349930273, "grad_norm": 9.27662467956543, "learning_rate": 1.955520816743921e-05, "loss": 2.6769, "step": 10720 }, { "epoch": 0.06744053581599985, "grad_norm": 9.518790245056152, "learning_rate": 1.9554789066494557e-05, "loss": 2.7134, "step": 10730 }, { "epoch": 0.06750338813269695, "grad_norm": 8.18859577178955, "learning_rate": 1.9554369965549905e-05, "loss": 2.7251, "step": 10740 }, { "epoch": 0.06756624044939406, "grad_norm": 8.43634033203125, "learning_rate": 1.955395086460525e-05, "loss": 2.7782, "step": 10750 }, { "epoch": 0.06762909276609118, "grad_norm": 6.717535495758057, "learning_rate": 1.9553531763660595e-05, "loss": 2.6271, "step": 10760 }, { "epoch": 0.06769194508278828, "grad_norm": 8.40380573272705, "learning_rate": 1.9553112662715942e-05, "loss": 2.4589, "step": 10770 }, { "epoch": 0.0677547973994854, "grad_norm": 7.56313419342041, "learning_rate": 1.955269356177129e-05, "loss": 2.4065, "step": 10780 }, { "epoch": 0.06781764971618251, "grad_norm": 8.096832275390625, "learning_rate": 1.9552274460826636e-05, "loss": 2.3685, "step": 10790 }, { "epoch": 0.06788050203287962, "grad_norm": 8.289287567138672, "learning_rate": 1.9551855359881984e-05, "loss": 2.3128, "step": 10800 }, { "epoch": 0.06794335434957673, "grad_norm": 6.763031482696533, "learning_rate": 1.9551436258937327e-05, "loss": 2.438, "step": 10810 }, { "epoch": 0.06800620666627384, "grad_norm": 9.077466011047363, "learning_rate": 1.9551017157992674e-05, "loss": 2.5176, "step": 10820 }, { "epoch": 0.06806905898297096, "grad_norm": 9.284290313720703, "learning_rate": 1.955059805704802e-05, "loss": 2.4859, "step": 10830 }, { "epoch": 0.06813191129966806, "grad_norm": 9.314769744873047, "learning_rate": 1.955017895610337e-05, "loss": 2.4891, "step": 10840 }, { "epoch": 0.06819476361636517, "grad_norm": 8.047820091247559, "learning_rate": 1.9549759855158716e-05, "loss": 2.4733, "step": 10850 }, { "epoch": 0.06825761593306229, "grad_norm": 9.353954315185547, "learning_rate": 1.9549340754214063e-05, "loss": 2.4225, "step": 10860 }, { "epoch": 0.06832046824975939, "grad_norm": 8.54472541809082, "learning_rate": 1.954892165326941e-05, "loss": 2.7112, "step": 10870 }, { "epoch": 0.0683833205664565, "grad_norm": 7.9946160316467285, "learning_rate": 1.9548502552324757e-05, "loss": 2.4115, "step": 10880 }, { "epoch": 0.06844617288315362, "grad_norm": 6.7684831619262695, "learning_rate": 1.9548083451380104e-05, "loss": 2.5891, "step": 10890 }, { "epoch": 0.06850902519985072, "grad_norm": 8.099825859069824, "learning_rate": 1.9547664350435447e-05, "loss": 2.3393, "step": 10900 }, { "epoch": 0.06857187751654784, "grad_norm": 9.216423034667969, "learning_rate": 1.9547245249490795e-05, "loss": 2.8297, "step": 10910 }, { "epoch": 0.06863472983324495, "grad_norm": 8.851109504699707, "learning_rate": 1.954682614854614e-05, "loss": 2.2189, "step": 10920 }, { "epoch": 0.06869758214994205, "grad_norm": 7.840999603271484, "learning_rate": 1.954640704760149e-05, "loss": 2.4758, "step": 10930 }, { "epoch": 0.06876043446663917, "grad_norm": 7.600790023803711, "learning_rate": 1.9545987946656832e-05, "loss": 2.4172, "step": 10940 }, { "epoch": 0.06882328678333628, "grad_norm": 9.331750869750977, "learning_rate": 1.954556884571218e-05, "loss": 2.4865, "step": 10950 }, { "epoch": 0.06888613910003338, "grad_norm": 7.492789268493652, "learning_rate": 1.9545149744767527e-05, "loss": 2.5539, "step": 10960 }, { "epoch": 0.0689489914167305, "grad_norm": 7.5510430335998535, "learning_rate": 1.9544730643822874e-05, "loss": 2.5407, "step": 10970 }, { "epoch": 0.06901184373342761, "grad_norm": 7.0119757652282715, "learning_rate": 1.9544311542878217e-05, "loss": 2.5054, "step": 10980 }, { "epoch": 0.06907469605012473, "grad_norm": 8.019808769226074, "learning_rate": 1.9543892441933564e-05, "loss": 2.4257, "step": 10990 }, { "epoch": 0.06913754836682183, "grad_norm": 8.645013809204102, "learning_rate": 1.954347334098891e-05, "loss": 2.5276, "step": 11000 }, { "epoch": 0.06920040068351895, "grad_norm": 6.927749156951904, "learning_rate": 1.954305424004426e-05, "loss": 2.7266, "step": 11010 }, { "epoch": 0.06926325300021606, "grad_norm": 7.718427658081055, "learning_rate": 1.9542635139099606e-05, "loss": 2.2747, "step": 11020 }, { "epoch": 0.06932610531691316, "grad_norm": 6.9200968742370605, "learning_rate": 1.954221603815495e-05, "loss": 2.4955, "step": 11030 }, { "epoch": 0.06938895763361028, "grad_norm": 7.879162311553955, "learning_rate": 1.9541796937210296e-05, "loss": 2.4849, "step": 11040 }, { "epoch": 0.06945180995030739, "grad_norm": 7.392335414886475, "learning_rate": 1.9541377836265643e-05, "loss": 2.4761, "step": 11050 }, { "epoch": 0.06951466226700449, "grad_norm": 8.46662425994873, "learning_rate": 1.954095873532099e-05, "loss": 2.5295, "step": 11060 }, { "epoch": 0.06957751458370161, "grad_norm": 7.7564287185668945, "learning_rate": 1.9540539634376338e-05, "loss": 2.4851, "step": 11070 }, { "epoch": 0.06964036690039872, "grad_norm": 8.08362865447998, "learning_rate": 1.9540120533431685e-05, "loss": 2.2313, "step": 11080 }, { "epoch": 0.06970321921709582, "grad_norm": 8.638923645019531, "learning_rate": 1.953970143248703e-05, "loss": 2.3547, "step": 11090 }, { "epoch": 0.06976607153379294, "grad_norm": 7.311507701873779, "learning_rate": 1.953928233154238e-05, "loss": 2.4118, "step": 11100 }, { "epoch": 0.06982892385049005, "grad_norm": 6.914936542510986, "learning_rate": 1.9538863230597726e-05, "loss": 2.4252, "step": 11110 }, { "epoch": 0.06989177616718716, "grad_norm": 7.176113605499268, "learning_rate": 1.953844412965307e-05, "loss": 2.6392, "step": 11120 }, { "epoch": 0.06995462848388427, "grad_norm": 8.129477500915527, "learning_rate": 1.9538025028708417e-05, "loss": 2.396, "step": 11130 }, { "epoch": 0.07001748080058139, "grad_norm": 8.857206344604492, "learning_rate": 1.9537605927763764e-05, "loss": 2.6338, "step": 11140 }, { "epoch": 0.0700803331172785, "grad_norm": 8.38614559173584, "learning_rate": 1.953718682681911e-05, "loss": 2.4761, "step": 11150 }, { "epoch": 0.0701431854339756, "grad_norm": 7.537355422973633, "learning_rate": 1.9536767725874454e-05, "loss": 2.39, "step": 11160 }, { "epoch": 0.07020603775067272, "grad_norm": 7.1328840255737305, "learning_rate": 1.95363486249298e-05, "loss": 2.361, "step": 11170 }, { "epoch": 0.07026889006736983, "grad_norm": 7.255983829498291, "learning_rate": 1.953592952398515e-05, "loss": 2.4905, "step": 11180 }, { "epoch": 0.07033174238406693, "grad_norm": 8.002547264099121, "learning_rate": 1.9535510423040496e-05, "loss": 2.3523, "step": 11190 }, { "epoch": 0.07039459470076405, "grad_norm": 7.435315132141113, "learning_rate": 1.9535091322095843e-05, "loss": 2.5576, "step": 11200 }, { "epoch": 0.07045744701746116, "grad_norm": 6.7415056228637695, "learning_rate": 1.9534672221151186e-05, "loss": 2.4429, "step": 11210 }, { "epoch": 0.07052029933415827, "grad_norm": 8.618391036987305, "learning_rate": 1.9534253120206533e-05, "loss": 2.4735, "step": 11220 }, { "epoch": 0.07058315165085538, "grad_norm": 7.767177581787109, "learning_rate": 1.953383401926188e-05, "loss": 2.9142, "step": 11230 }, { "epoch": 0.0706460039675525, "grad_norm": 7.603353023529053, "learning_rate": 1.9533414918317228e-05, "loss": 2.3744, "step": 11240 }, { "epoch": 0.0707088562842496, "grad_norm": 7.479285717010498, "learning_rate": 1.9532995817372575e-05, "loss": 2.2115, "step": 11250 }, { "epoch": 0.07077170860094671, "grad_norm": 7.70100736618042, "learning_rate": 1.953257671642792e-05, "loss": 2.5127, "step": 11260 }, { "epoch": 0.07083456091764383, "grad_norm": 7.1571736335754395, "learning_rate": 1.9532157615483265e-05, "loss": 2.4352, "step": 11270 }, { "epoch": 0.07089741323434093, "grad_norm": 7.968157768249512, "learning_rate": 1.9531738514538612e-05, "loss": 2.7268, "step": 11280 }, { "epoch": 0.07096026555103804, "grad_norm": 7.105053901672363, "learning_rate": 1.953131941359396e-05, "loss": 2.439, "step": 11290 }, { "epoch": 0.07102311786773516, "grad_norm": 6.176323413848877, "learning_rate": 1.9530900312649307e-05, "loss": 2.3334, "step": 11300 }, { "epoch": 0.07108597018443227, "grad_norm": 7.531483173370361, "learning_rate": 1.9530481211704654e-05, "loss": 2.5347, "step": 11310 }, { "epoch": 0.07114882250112937, "grad_norm": 8.726207733154297, "learning_rate": 1.953006211076e-05, "loss": 2.492, "step": 11320 }, { "epoch": 0.07121167481782649, "grad_norm": 7.908196926116943, "learning_rate": 1.9529643009815348e-05, "loss": 2.6788, "step": 11330 }, { "epoch": 0.0712745271345236, "grad_norm": 7.728970527648926, "learning_rate": 1.952922390887069e-05, "loss": 2.3624, "step": 11340 }, { "epoch": 0.0713373794512207, "grad_norm": 7.621332168579102, "learning_rate": 1.952880480792604e-05, "loss": 2.54, "step": 11350 }, { "epoch": 0.07140023176791782, "grad_norm": 6.608644008636475, "learning_rate": 1.9528385706981386e-05, "loss": 2.5584, "step": 11360 }, { "epoch": 0.07146308408461494, "grad_norm": 6.979635715484619, "learning_rate": 1.9527966606036733e-05, "loss": 2.1742, "step": 11370 }, { "epoch": 0.07152593640131204, "grad_norm": 7.518513202667236, "learning_rate": 1.9527547505092076e-05, "loss": 2.5554, "step": 11380 }, { "epoch": 0.07158878871800915, "grad_norm": 7.282397747039795, "learning_rate": 1.9527128404147423e-05, "loss": 2.4447, "step": 11390 }, { "epoch": 0.07165164103470627, "grad_norm": 7.44035005569458, "learning_rate": 1.952670930320277e-05, "loss": 2.2029, "step": 11400 }, { "epoch": 0.07171449335140337, "grad_norm": 7.8835859298706055, "learning_rate": 1.9526290202258118e-05, "loss": 2.3582, "step": 11410 }, { "epoch": 0.07177734566810048, "grad_norm": 9.01338005065918, "learning_rate": 1.9525871101313465e-05, "loss": 2.5426, "step": 11420 }, { "epoch": 0.0718401979847976, "grad_norm": 7.738584041595459, "learning_rate": 1.952545200036881e-05, "loss": 2.2553, "step": 11430 }, { "epoch": 0.0719030503014947, "grad_norm": 7.0059309005737305, "learning_rate": 1.9525032899424155e-05, "loss": 2.4217, "step": 11440 }, { "epoch": 0.07196590261819182, "grad_norm": 7.1120524406433105, "learning_rate": 1.9524613798479502e-05, "loss": 2.4836, "step": 11450 }, { "epoch": 0.07202875493488893, "grad_norm": 7.4890851974487305, "learning_rate": 1.952419469753485e-05, "loss": 2.4163, "step": 11460 }, { "epoch": 0.07209160725158605, "grad_norm": 7.924356460571289, "learning_rate": 1.9523775596590197e-05, "loss": 2.7207, "step": 11470 }, { "epoch": 0.07215445956828315, "grad_norm": 8.791280746459961, "learning_rate": 1.9523356495645544e-05, "loss": 2.3955, "step": 11480 }, { "epoch": 0.07221731188498026, "grad_norm": 7.378311634063721, "learning_rate": 1.952293739470089e-05, "loss": 2.4784, "step": 11490 }, { "epoch": 0.07228016420167738, "grad_norm": 9.543916702270508, "learning_rate": 1.9522518293756238e-05, "loss": 2.6184, "step": 11500 }, { "epoch": 0.07234301651837448, "grad_norm": 7.561647891998291, "learning_rate": 1.9522099192811585e-05, "loss": 2.5923, "step": 11510 }, { "epoch": 0.0724058688350716, "grad_norm": 7.390979290008545, "learning_rate": 1.952168009186693e-05, "loss": 2.4147, "step": 11520 }, { "epoch": 0.07246872115176871, "grad_norm": 7.260756015777588, "learning_rate": 1.9521260990922276e-05, "loss": 2.3274, "step": 11530 }, { "epoch": 0.07253157346846581, "grad_norm": 7.111080646514893, "learning_rate": 1.9520841889977623e-05, "loss": 2.488, "step": 11540 }, { "epoch": 0.07259442578516292, "grad_norm": 8.549944877624512, "learning_rate": 1.952042278903297e-05, "loss": 2.5328, "step": 11550 }, { "epoch": 0.07265727810186004, "grad_norm": 8.285893440246582, "learning_rate": 1.9520003688088313e-05, "loss": 2.5, "step": 11560 }, { "epoch": 0.07272013041855714, "grad_norm": 7.941225528717041, "learning_rate": 1.951958458714366e-05, "loss": 2.5202, "step": 11570 }, { "epoch": 0.07278298273525426, "grad_norm": 7.045867443084717, "learning_rate": 1.9519165486199008e-05, "loss": 2.541, "step": 11580 }, { "epoch": 0.07284583505195137, "grad_norm": 7.211911678314209, "learning_rate": 1.9518746385254355e-05, "loss": 2.2981, "step": 11590 }, { "epoch": 0.07290868736864847, "grad_norm": 6.470776557922363, "learning_rate": 1.95183272843097e-05, "loss": 2.5156, "step": 11600 }, { "epoch": 0.07297153968534559, "grad_norm": 7.176722049713135, "learning_rate": 1.9517908183365045e-05, "loss": 2.6306, "step": 11610 }, { "epoch": 0.0730343920020427, "grad_norm": 10.153510093688965, "learning_rate": 1.9517489082420393e-05, "loss": 2.476, "step": 11620 }, { "epoch": 0.07309724431873982, "grad_norm": 9.634344100952148, "learning_rate": 1.951706998147574e-05, "loss": 2.3988, "step": 11630 }, { "epoch": 0.07316009663543692, "grad_norm": 7.575964450836182, "learning_rate": 1.9516650880531087e-05, "loss": 2.5493, "step": 11640 }, { "epoch": 0.07322294895213403, "grad_norm": 7.2847065925598145, "learning_rate": 1.951623177958643e-05, "loss": 2.6312, "step": 11650 }, { "epoch": 0.07328580126883115, "grad_norm": 7.564513683319092, "learning_rate": 1.9515812678641777e-05, "loss": 2.3137, "step": 11660 }, { "epoch": 0.07334865358552825, "grad_norm": 7.918158531188965, "learning_rate": 1.9515393577697124e-05, "loss": 2.3743, "step": 11670 }, { "epoch": 0.07341150590222537, "grad_norm": 6.573338508605957, "learning_rate": 1.9515016386846936e-05, "loss": 2.692, "step": 11680 }, { "epoch": 0.07347435821892248, "grad_norm": 7.603901386260986, "learning_rate": 1.9514597285902283e-05, "loss": 2.4422, "step": 11690 }, { "epoch": 0.07353721053561958, "grad_norm": 8.727775573730469, "learning_rate": 1.951417818495763e-05, "loss": 2.4256, "step": 11700 }, { "epoch": 0.0736000628523167, "grad_norm": 7.944945812225342, "learning_rate": 1.9513759084012977e-05, "loss": 2.5131, "step": 11710 }, { "epoch": 0.07366291516901381, "grad_norm": 7.6638288497924805, "learning_rate": 1.951333998306832e-05, "loss": 2.6274, "step": 11720 }, { "epoch": 0.07372576748571091, "grad_norm": 7.521015644073486, "learning_rate": 1.9512920882123668e-05, "loss": 2.1635, "step": 11730 }, { "epoch": 0.07378861980240803, "grad_norm": 9.866744995117188, "learning_rate": 1.9512501781179015e-05, "loss": 2.3831, "step": 11740 }, { "epoch": 0.07385147211910514, "grad_norm": 8.70056438446045, "learning_rate": 1.9512082680234362e-05, "loss": 2.407, "step": 11750 }, { "epoch": 0.07391432443580224, "grad_norm": 8.276822090148926, "learning_rate": 1.951166357928971e-05, "loss": 2.3133, "step": 11760 }, { "epoch": 0.07397717675249936, "grad_norm": 7.402984619140625, "learning_rate": 1.9511244478345056e-05, "loss": 2.5265, "step": 11770 }, { "epoch": 0.07404002906919648, "grad_norm": 8.126667022705078, "learning_rate": 1.9510825377400403e-05, "loss": 2.3826, "step": 11780 }, { "epoch": 0.07410288138589358, "grad_norm": 7.4878950119018555, "learning_rate": 1.951040627645575e-05, "loss": 2.4999, "step": 11790 }, { "epoch": 0.07416573370259069, "grad_norm": 8.355165481567383, "learning_rate": 1.9509987175511097e-05, "loss": 2.5496, "step": 11800 }, { "epoch": 0.0742285860192878, "grad_norm": 6.998531818389893, "learning_rate": 1.950956807456644e-05, "loss": 2.5726, "step": 11810 }, { "epoch": 0.07429143833598492, "grad_norm": 8.8850736618042, "learning_rate": 1.9509148973621788e-05, "loss": 2.758, "step": 11820 }, { "epoch": 0.07435429065268202, "grad_norm": 7.972660064697266, "learning_rate": 1.9508729872677135e-05, "loss": 2.6275, "step": 11830 }, { "epoch": 0.07441714296937914, "grad_norm": 7.867787837982178, "learning_rate": 1.9508310771732482e-05, "loss": 2.3476, "step": 11840 }, { "epoch": 0.07447999528607625, "grad_norm": 8.236400604248047, "learning_rate": 1.950789167078783e-05, "loss": 2.4277, "step": 11850 }, { "epoch": 0.07454284760277335, "grad_norm": 7.272027492523193, "learning_rate": 1.9507472569843173e-05, "loss": 2.5366, "step": 11860 }, { "epoch": 0.07460569991947047, "grad_norm": 7.932986736297607, "learning_rate": 1.950705346889852e-05, "loss": 2.5213, "step": 11870 }, { "epoch": 0.07466855223616758, "grad_norm": 6.761380672454834, "learning_rate": 1.9506634367953867e-05, "loss": 2.4531, "step": 11880 }, { "epoch": 0.07473140455286469, "grad_norm": 9.38737678527832, "learning_rate": 1.9506215267009214e-05, "loss": 2.0328, "step": 11890 }, { "epoch": 0.0747942568695618, "grad_norm": 11.342555046081543, "learning_rate": 1.9505796166064558e-05, "loss": 2.4436, "step": 11900 }, { "epoch": 0.07485710918625892, "grad_norm": 8.043527603149414, "learning_rate": 1.9505377065119905e-05, "loss": 2.5713, "step": 11910 }, { "epoch": 0.07491996150295602, "grad_norm": 7.346526622772217, "learning_rate": 1.9504957964175252e-05, "loss": 2.4017, "step": 11920 }, { "epoch": 0.07498281381965313, "grad_norm": 8.646933555603027, "learning_rate": 1.95045388632306e-05, "loss": 2.5347, "step": 11930 }, { "epoch": 0.07504566613635025, "grad_norm": 7.251510143280029, "learning_rate": 1.9504119762285946e-05, "loss": 2.4801, "step": 11940 }, { "epoch": 0.07510851845304735, "grad_norm": 9.1826810836792, "learning_rate": 1.9503700661341293e-05, "loss": 2.499, "step": 11950 }, { "epoch": 0.07517137076974446, "grad_norm": 7.73813009262085, "learning_rate": 1.950328156039664e-05, "loss": 2.5228, "step": 11960 }, { "epoch": 0.07523422308644158, "grad_norm": 7.395781517028809, "learning_rate": 1.9502862459451984e-05, "loss": 2.5345, "step": 11970 }, { "epoch": 0.0752970754031387, "grad_norm": 8.236130714416504, "learning_rate": 1.950244335850733e-05, "loss": 2.3011, "step": 11980 }, { "epoch": 0.0753599277198358, "grad_norm": 7.84546422958374, "learning_rate": 1.9502024257562678e-05, "loss": 2.3883, "step": 11990 }, { "epoch": 0.07542278003653291, "grad_norm": 7.490150451660156, "learning_rate": 1.9501605156618025e-05, "loss": 2.4918, "step": 12000 }, { "epoch": 0.07548563235323003, "grad_norm": 7.3955488204956055, "learning_rate": 1.9501186055673372e-05, "loss": 2.6569, "step": 12010 }, { "epoch": 0.07554848466992713, "grad_norm": 7.579392433166504, "learning_rate": 1.950076695472872e-05, "loss": 2.526, "step": 12020 }, { "epoch": 0.07561133698662424, "grad_norm": 7.453882217407227, "learning_rate": 1.9500347853784063e-05, "loss": 2.2517, "step": 12030 }, { "epoch": 0.07567418930332136, "grad_norm": 7.354152202606201, "learning_rate": 1.949992875283941e-05, "loss": 2.5754, "step": 12040 }, { "epoch": 0.07573704162001846, "grad_norm": 7.82139253616333, "learning_rate": 1.9499509651894757e-05, "loss": 2.2389, "step": 12050 }, { "epoch": 0.07579989393671557, "grad_norm": 7.103209495544434, "learning_rate": 1.9499090550950104e-05, "loss": 2.323, "step": 12060 }, { "epoch": 0.07586274625341269, "grad_norm": 7.013645648956299, "learning_rate": 1.949867145000545e-05, "loss": 2.4986, "step": 12070 }, { "epoch": 0.07592559857010979, "grad_norm": 6.946286678314209, "learning_rate": 1.9498252349060795e-05, "loss": 2.5583, "step": 12080 }, { "epoch": 0.0759884508868069, "grad_norm": 7.744342803955078, "learning_rate": 1.9497833248116142e-05, "loss": 2.4402, "step": 12090 }, { "epoch": 0.07605130320350402, "grad_norm": 9.439397811889648, "learning_rate": 1.949741414717149e-05, "loss": 2.6846, "step": 12100 }, { "epoch": 0.07611415552020112, "grad_norm": 7.520416736602783, "learning_rate": 1.9496995046226836e-05, "loss": 2.4189, "step": 12110 }, { "epoch": 0.07617700783689824, "grad_norm": 7.313348770141602, "learning_rate": 1.949657594528218e-05, "loss": 2.332, "step": 12120 }, { "epoch": 0.07623986015359535, "grad_norm": 8.317850112915039, "learning_rate": 1.9496156844337527e-05, "loss": 2.45, "step": 12130 }, { "epoch": 0.07630271247029247, "grad_norm": 7.855832099914551, "learning_rate": 1.9495737743392874e-05, "loss": 2.3219, "step": 12140 }, { "epoch": 0.07636556478698957, "grad_norm": 7.824488639831543, "learning_rate": 1.949531864244822e-05, "loss": 2.5782, "step": 12150 }, { "epoch": 0.07642841710368668, "grad_norm": 6.932237148284912, "learning_rate": 1.9494899541503568e-05, "loss": 2.3706, "step": 12160 }, { "epoch": 0.0764912694203838, "grad_norm": 8.48770523071289, "learning_rate": 1.9494480440558915e-05, "loss": 2.335, "step": 12170 }, { "epoch": 0.0765541217370809, "grad_norm": 7.064522743225098, "learning_rate": 1.9494061339614262e-05, "loss": 2.1833, "step": 12180 }, { "epoch": 0.07661697405377801, "grad_norm": 8.988115310668945, "learning_rate": 1.949364223866961e-05, "loss": 2.6044, "step": 12190 }, { "epoch": 0.07667982637047513, "grad_norm": 7.177382469177246, "learning_rate": 1.9493223137724956e-05, "loss": 2.3806, "step": 12200 }, { "epoch": 0.07674267868717223, "grad_norm": 11.10888385772705, "learning_rate": 1.94928040367803e-05, "loss": 2.4232, "step": 12210 }, { "epoch": 0.07680553100386935, "grad_norm": 7.233067035675049, "learning_rate": 1.9492384935835647e-05, "loss": 2.2771, "step": 12220 }, { "epoch": 0.07686838332056646, "grad_norm": 7.85338830947876, "learning_rate": 1.9491965834890994e-05, "loss": 2.4547, "step": 12230 }, { "epoch": 0.07693123563726356, "grad_norm": 8.474621772766113, "learning_rate": 1.949154673394634e-05, "loss": 2.4507, "step": 12240 }, { "epoch": 0.07699408795396068, "grad_norm": 7.005478858947754, "learning_rate": 1.949112763300169e-05, "loss": 2.4807, "step": 12250 }, { "epoch": 0.07705694027065779, "grad_norm": 16.7135009765625, "learning_rate": 1.9490708532057032e-05, "loss": 2.4233, "step": 12260 }, { "epoch": 0.0771197925873549, "grad_norm": 7.83366060256958, "learning_rate": 1.949028943111238e-05, "loss": 2.4988, "step": 12270 }, { "epoch": 0.07718264490405201, "grad_norm": 7.76155424118042, "learning_rate": 1.9489870330167726e-05, "loss": 2.6238, "step": 12280 }, { "epoch": 0.07724549722074912, "grad_norm": 6.5174760818481445, "learning_rate": 1.9489451229223073e-05, "loss": 2.3573, "step": 12290 }, { "epoch": 0.07730834953744624, "grad_norm": 8.594646453857422, "learning_rate": 1.9489032128278417e-05, "loss": 2.5484, "step": 12300 }, { "epoch": 0.07737120185414334, "grad_norm": 8.022442817687988, "learning_rate": 1.9488613027333764e-05, "loss": 2.4529, "step": 12310 }, { "epoch": 0.07743405417084046, "grad_norm": 7.432999610900879, "learning_rate": 1.948819392638911e-05, "loss": 2.2009, "step": 12320 }, { "epoch": 0.07749690648753757, "grad_norm": 8.464859008789062, "learning_rate": 1.9487774825444458e-05, "loss": 2.3583, "step": 12330 }, { "epoch": 0.07755975880423467, "grad_norm": 7.359055519104004, "learning_rate": 1.9487355724499805e-05, "loss": 2.3175, "step": 12340 }, { "epoch": 0.07762261112093179, "grad_norm": 8.959524154663086, "learning_rate": 1.948693662355515e-05, "loss": 2.4113, "step": 12350 }, { "epoch": 0.0776854634376289, "grad_norm": 7.489354133605957, "learning_rate": 1.9486517522610496e-05, "loss": 2.5092, "step": 12360 }, { "epoch": 0.077748315754326, "grad_norm": 8.70345401763916, "learning_rate": 1.9486098421665843e-05, "loss": 2.2939, "step": 12370 }, { "epoch": 0.07781116807102312, "grad_norm": 6.111259937286377, "learning_rate": 1.948567932072119e-05, "loss": 2.3819, "step": 12380 }, { "epoch": 0.07787402038772023, "grad_norm": 7.629851341247559, "learning_rate": 1.9485260219776537e-05, "loss": 2.3942, "step": 12390 }, { "epoch": 0.07793687270441733, "grad_norm": 9.003440856933594, "learning_rate": 1.9484841118831884e-05, "loss": 2.5155, "step": 12400 }, { "epoch": 0.07799972502111445, "grad_norm": 6.7371039390563965, "learning_rate": 1.948442201788723e-05, "loss": 2.3328, "step": 12410 }, { "epoch": 0.07806257733781156, "grad_norm": 8.926176071166992, "learning_rate": 1.948400291694258e-05, "loss": 2.606, "step": 12420 }, { "epoch": 0.07812542965450867, "grad_norm": 7.441720962524414, "learning_rate": 1.9483583815997922e-05, "loss": 2.458, "step": 12430 }, { "epoch": 0.07818828197120578, "grad_norm": 7.5572662353515625, "learning_rate": 1.948316471505327e-05, "loss": 2.6717, "step": 12440 }, { "epoch": 0.0782511342879029, "grad_norm": 8.235877990722656, "learning_rate": 1.9482745614108616e-05, "loss": 2.5934, "step": 12450 }, { "epoch": 0.0783139866046, "grad_norm": 8.707640647888184, "learning_rate": 1.9482326513163963e-05, "loss": 2.49, "step": 12460 }, { "epoch": 0.07837683892129711, "grad_norm": 8.33491039276123, "learning_rate": 1.948190741221931e-05, "loss": 2.4845, "step": 12470 }, { "epoch": 0.07843969123799423, "grad_norm": 7.912849426269531, "learning_rate": 1.9481488311274654e-05, "loss": 2.2593, "step": 12480 }, { "epoch": 0.07850254355469134, "grad_norm": 7.304262638092041, "learning_rate": 1.948106921033e-05, "loss": 2.1579, "step": 12490 }, { "epoch": 0.07856539587138844, "grad_norm": 7.650024890899658, "learning_rate": 1.9480650109385348e-05, "loss": 2.4837, "step": 12500 }, { "epoch": 0.07862824818808556, "grad_norm": 9.099596977233887, "learning_rate": 1.9480231008440695e-05, "loss": 2.302, "step": 12510 }, { "epoch": 0.07869110050478267, "grad_norm": 7.692058563232422, "learning_rate": 1.947981190749604e-05, "loss": 2.5345, "step": 12520 }, { "epoch": 0.07875395282147978, "grad_norm": 8.309165000915527, "learning_rate": 1.9479392806551386e-05, "loss": 2.4045, "step": 12530 }, { "epoch": 0.07881680513817689, "grad_norm": 7.551580905914307, "learning_rate": 1.9478973705606733e-05, "loss": 2.2348, "step": 12540 }, { "epoch": 0.078879657454874, "grad_norm": 7.662278175354004, "learning_rate": 1.947855460466208e-05, "loss": 2.4571, "step": 12550 }, { "epoch": 0.0789425097715711, "grad_norm": 8.074088096618652, "learning_rate": 1.9478135503717427e-05, "loss": 2.2866, "step": 12560 }, { "epoch": 0.07900536208826822, "grad_norm": 7.268442153930664, "learning_rate": 1.9477716402772774e-05, "loss": 2.3656, "step": 12570 }, { "epoch": 0.07906821440496534, "grad_norm": 7.288553237915039, "learning_rate": 1.947729730182812e-05, "loss": 2.6178, "step": 12580 }, { "epoch": 0.07913106672166244, "grad_norm": 8.39034366607666, "learning_rate": 1.9476878200883465e-05, "loss": 2.4084, "step": 12590 }, { "epoch": 0.07919391903835955, "grad_norm": 7.558828353881836, "learning_rate": 1.9476459099938812e-05, "loss": 2.4615, "step": 12600 }, { "epoch": 0.07925677135505667, "grad_norm": 6.886292934417725, "learning_rate": 1.947603999899416e-05, "loss": 2.3437, "step": 12610 }, { "epoch": 0.07931962367175377, "grad_norm": 9.1254243850708, "learning_rate": 1.9475620898049506e-05, "loss": 2.3196, "step": 12620 }, { "epoch": 0.07938247598845088, "grad_norm": 7.684484958648682, "learning_rate": 1.9475201797104853e-05, "loss": 2.2172, "step": 12630 }, { "epoch": 0.079445328305148, "grad_norm": 6.235585689544678, "learning_rate": 1.94747826961602e-05, "loss": 2.1569, "step": 12640 }, { "epoch": 0.07950818062184511, "grad_norm": 7.675627708435059, "learning_rate": 1.9474363595215547e-05, "loss": 2.414, "step": 12650 }, { "epoch": 0.07957103293854222, "grad_norm": 7.568105697631836, "learning_rate": 1.947394449427089e-05, "loss": 2.2094, "step": 12660 }, { "epoch": 0.07963388525523933, "grad_norm": 8.476950645446777, "learning_rate": 1.9473525393326238e-05, "loss": 2.4608, "step": 12670 }, { "epoch": 0.07969673757193645, "grad_norm": 8.452275276184082, "learning_rate": 1.9473106292381585e-05, "loss": 2.3468, "step": 12680 }, { "epoch": 0.07975958988863355, "grad_norm": 7.434908866882324, "learning_rate": 1.9472687191436932e-05, "loss": 2.3847, "step": 12690 }, { "epoch": 0.07982244220533066, "grad_norm": 8.017882347106934, "learning_rate": 1.9472268090492276e-05, "loss": 2.2582, "step": 12700 }, { "epoch": 0.07988529452202778, "grad_norm": 7.4637837409973145, "learning_rate": 1.9471848989547623e-05, "loss": 2.3885, "step": 12710 }, { "epoch": 0.07994814683872488, "grad_norm": 8.98967456817627, "learning_rate": 1.947142988860297e-05, "loss": 2.3662, "step": 12720 }, { "epoch": 0.080010999155422, "grad_norm": 8.459840774536133, "learning_rate": 1.9471010787658317e-05, "loss": 2.2906, "step": 12730 }, { "epoch": 0.08007385147211911, "grad_norm": 7.0828986167907715, "learning_rate": 1.947059168671366e-05, "loss": 2.3257, "step": 12740 }, { "epoch": 0.08013670378881621, "grad_norm": 9.346085548400879, "learning_rate": 1.9470172585769008e-05, "loss": 2.5332, "step": 12750 }, { "epoch": 0.08019955610551333, "grad_norm": 10.998466491699219, "learning_rate": 1.9469753484824355e-05, "loss": 2.3698, "step": 12760 }, { "epoch": 0.08026240842221044, "grad_norm": 8.243062019348145, "learning_rate": 1.9469334383879702e-05, "loss": 2.4414, "step": 12770 }, { "epoch": 0.08032526073890754, "grad_norm": 8.127972602844238, "learning_rate": 1.946891528293505e-05, "loss": 2.3582, "step": 12780 }, { "epoch": 0.08038811305560466, "grad_norm": 8.575108528137207, "learning_rate": 1.9468496181990396e-05, "loss": 2.2728, "step": 12790 }, { "epoch": 0.08045096537230177, "grad_norm": 7.837752342224121, "learning_rate": 1.9468077081045743e-05, "loss": 2.1827, "step": 12800 }, { "epoch": 0.08051381768899889, "grad_norm": 7.2447190284729, "learning_rate": 1.946765798010109e-05, "loss": 2.3687, "step": 12810 }, { "epoch": 0.08057667000569599, "grad_norm": 7.327709674835205, "learning_rate": 1.9467238879156438e-05, "loss": 2.5204, "step": 12820 }, { "epoch": 0.0806395223223931, "grad_norm": 7.249586582183838, "learning_rate": 1.946681977821178e-05, "loss": 2.5019, "step": 12830 }, { "epoch": 0.08070237463909022, "grad_norm": 7.0609211921691895, "learning_rate": 1.9466400677267128e-05, "loss": 2.4995, "step": 12840 }, { "epoch": 0.08076522695578732, "grad_norm": 6.717433929443359, "learning_rate": 1.9465981576322475e-05, "loss": 2.5919, "step": 12850 }, { "epoch": 0.08082807927248443, "grad_norm": 6.879463195800781, "learning_rate": 1.9465562475377822e-05, "loss": 2.3503, "step": 12860 }, { "epoch": 0.08089093158918155, "grad_norm": 9.135047912597656, "learning_rate": 1.946514337443317e-05, "loss": 2.5435, "step": 12870 }, { "epoch": 0.08095378390587865, "grad_norm": 7.627810478210449, "learning_rate": 1.9464724273488513e-05, "loss": 2.495, "step": 12880 }, { "epoch": 0.08101663622257577, "grad_norm": 7.3780341148376465, "learning_rate": 1.946430517254386e-05, "loss": 2.3788, "step": 12890 }, { "epoch": 0.08107948853927288, "grad_norm": 7.583935260772705, "learning_rate": 1.9463886071599207e-05, "loss": 2.7461, "step": 12900 }, { "epoch": 0.08114234085596998, "grad_norm": 7.74154806137085, "learning_rate": 1.9463466970654554e-05, "loss": 2.3903, "step": 12910 }, { "epoch": 0.0812051931726671, "grad_norm": 7.6371073722839355, "learning_rate": 1.9463047869709898e-05, "loss": 2.5167, "step": 12920 }, { "epoch": 0.08126804548936421, "grad_norm": 8.727256774902344, "learning_rate": 1.9462628768765245e-05, "loss": 2.1948, "step": 12930 }, { "epoch": 0.08133089780606131, "grad_norm": 7.339801788330078, "learning_rate": 1.9462209667820592e-05, "loss": 2.3406, "step": 12940 }, { "epoch": 0.08139375012275843, "grad_norm": 7.481237888336182, "learning_rate": 1.946179056687594e-05, "loss": 2.1842, "step": 12950 }, { "epoch": 0.08145660243945554, "grad_norm": 8.10486125946045, "learning_rate": 1.9461371465931286e-05, "loss": 2.2285, "step": 12960 }, { "epoch": 0.08151945475615266, "grad_norm": 6.663724899291992, "learning_rate": 1.946095236498663e-05, "loss": 2.3499, "step": 12970 }, { "epoch": 0.08158230707284976, "grad_norm": 7.401335716247559, "learning_rate": 1.9460533264041977e-05, "loss": 2.3825, "step": 12980 }, { "epoch": 0.08164515938954688, "grad_norm": 7.259987831115723, "learning_rate": 1.9460114163097324e-05, "loss": 2.2446, "step": 12990 }, { "epoch": 0.08170801170624399, "grad_norm": 8.05159854888916, "learning_rate": 1.945969506215267e-05, "loss": 2.4418, "step": 13000 }, { "epoch": 0.08177086402294109, "grad_norm": 7.465734004974365, "learning_rate": 1.9459275961208018e-05, "loss": 2.5513, "step": 13010 }, { "epoch": 0.08183371633963821, "grad_norm": 6.613677501678467, "learning_rate": 1.9458856860263365e-05, "loss": 2.445, "step": 13020 }, { "epoch": 0.08189656865633532, "grad_norm": 7.202596664428711, "learning_rate": 1.9458437759318712e-05, "loss": 2.4428, "step": 13030 }, { "epoch": 0.08195942097303242, "grad_norm": 8.372621536254883, "learning_rate": 1.945801865837406e-05, "loss": 2.4775, "step": 13040 }, { "epoch": 0.08202227328972954, "grad_norm": 7.697512149810791, "learning_rate": 1.9457599557429403e-05, "loss": 2.4491, "step": 13050 }, { "epoch": 0.08208512560642665, "grad_norm": 7.001658916473389, "learning_rate": 1.945718045648475e-05, "loss": 2.3397, "step": 13060 }, { "epoch": 0.08214797792312376, "grad_norm": 7.712573051452637, "learning_rate": 1.9456761355540097e-05, "loss": 2.2824, "step": 13070 }, { "epoch": 0.08221083023982087, "grad_norm": 7.388295650482178, "learning_rate": 1.9456342254595444e-05, "loss": 2.4061, "step": 13080 }, { "epoch": 0.08227368255651799, "grad_norm": 6.870701313018799, "learning_rate": 1.945592315365079e-05, "loss": 2.4606, "step": 13090 }, { "epoch": 0.08233653487321509, "grad_norm": 6.018819808959961, "learning_rate": 1.9455504052706135e-05, "loss": 2.3073, "step": 13100 }, { "epoch": 0.0823993871899122, "grad_norm": 7.054746627807617, "learning_rate": 1.9455084951761482e-05, "loss": 2.3553, "step": 13110 }, { "epoch": 0.08246223950660932, "grad_norm": 6.665430545806885, "learning_rate": 1.945466585081683e-05, "loss": 2.4216, "step": 13120 }, { "epoch": 0.08252509182330642, "grad_norm": 6.984927177429199, "learning_rate": 1.9454246749872176e-05, "loss": 2.3272, "step": 13130 }, { "epoch": 0.08258794414000353, "grad_norm": 7.989890098571777, "learning_rate": 1.945382764892752e-05, "loss": 2.5116, "step": 13140 }, { "epoch": 0.08265079645670065, "grad_norm": 7.233949184417725, "learning_rate": 1.9453408547982867e-05, "loss": 2.227, "step": 13150 }, { "epoch": 0.08271364877339776, "grad_norm": 8.56431770324707, "learning_rate": 1.9452989447038214e-05, "loss": 2.6707, "step": 13160 }, { "epoch": 0.08277650109009486, "grad_norm": 8.269586563110352, "learning_rate": 1.945257034609356e-05, "loss": 2.2733, "step": 13170 }, { "epoch": 0.08283935340679198, "grad_norm": 9.309479713439941, "learning_rate": 1.945215124514891e-05, "loss": 2.6278, "step": 13180 }, { "epoch": 0.0829022057234891, "grad_norm": 8.378772735595703, "learning_rate": 1.9451732144204255e-05, "loss": 2.234, "step": 13190 }, { "epoch": 0.0829650580401862, "grad_norm": 8.369424819946289, "learning_rate": 1.9451313043259602e-05, "loss": 2.4723, "step": 13200 }, { "epoch": 0.08302791035688331, "grad_norm": 7.560756683349609, "learning_rate": 1.945089394231495e-05, "loss": 2.0368, "step": 13210 }, { "epoch": 0.08309076267358043, "grad_norm": 7.919924736022949, "learning_rate": 1.9450474841370293e-05, "loss": 2.1961, "step": 13220 }, { "epoch": 0.08315361499027753, "grad_norm": 7.153075218200684, "learning_rate": 1.945005574042564e-05, "loss": 2.3935, "step": 13230 }, { "epoch": 0.08321646730697464, "grad_norm": 9.653682708740234, "learning_rate": 1.9449636639480987e-05, "loss": 2.4894, "step": 13240 }, { "epoch": 0.08327931962367176, "grad_norm": 8.509584426879883, "learning_rate": 1.9449217538536334e-05, "loss": 2.3599, "step": 13250 }, { "epoch": 0.08334217194036886, "grad_norm": 7.730416297912598, "learning_rate": 1.944879843759168e-05, "loss": 2.4551, "step": 13260 }, { "epoch": 0.08340502425706597, "grad_norm": 7.178668975830078, "learning_rate": 1.944837933664703e-05, "loss": 2.3364, "step": 13270 }, { "epoch": 0.08346787657376309, "grad_norm": 7.232913494110107, "learning_rate": 1.9447960235702372e-05, "loss": 2.1589, "step": 13280 }, { "epoch": 0.08353072889046019, "grad_norm": 7.251108169555664, "learning_rate": 1.944754113475772e-05, "loss": 2.3956, "step": 13290 }, { "epoch": 0.0835935812071573, "grad_norm": 7.310342788696289, "learning_rate": 1.9447122033813066e-05, "loss": 2.5985, "step": 13300 }, { "epoch": 0.08365643352385442, "grad_norm": 7.670332908630371, "learning_rate": 1.9446702932868413e-05, "loss": 2.1638, "step": 13310 }, { "epoch": 0.08371928584055154, "grad_norm": 7.718730449676514, "learning_rate": 1.9446283831923757e-05, "loss": 2.4241, "step": 13320 }, { "epoch": 0.08378213815724864, "grad_norm": 7.897666931152344, "learning_rate": 1.9445864730979104e-05, "loss": 2.4343, "step": 13330 }, { "epoch": 0.08384499047394575, "grad_norm": 7.8425798416137695, "learning_rate": 1.944544563003445e-05, "loss": 2.4654, "step": 13340 }, { "epoch": 0.08390784279064287, "grad_norm": 6.980411052703857, "learning_rate": 1.94450265290898e-05, "loss": 2.4179, "step": 13350 }, { "epoch": 0.08397069510733997, "grad_norm": 7.646371364593506, "learning_rate": 1.9444607428145142e-05, "loss": 2.0922, "step": 13360 }, { "epoch": 0.08403354742403708, "grad_norm": 7.7674384117126465, "learning_rate": 1.944418832720049e-05, "loss": 2.4974, "step": 13370 }, { "epoch": 0.0840963997407342, "grad_norm": 7.743281841278076, "learning_rate": 1.9443769226255836e-05, "loss": 2.3573, "step": 13380 }, { "epoch": 0.0841592520574313, "grad_norm": 7.876406669616699, "learning_rate": 1.9443350125311183e-05, "loss": 2.3071, "step": 13390 }, { "epoch": 0.08422210437412841, "grad_norm": 7.47829532623291, "learning_rate": 1.944293102436653e-05, "loss": 2.5412, "step": 13400 }, { "epoch": 0.08428495669082553, "grad_norm": 7.951406478881836, "learning_rate": 1.9442511923421877e-05, "loss": 2.5606, "step": 13410 }, { "epoch": 0.08434780900752263, "grad_norm": 8.410359382629395, "learning_rate": 1.9442092822477224e-05, "loss": 2.3735, "step": 13420 }, { "epoch": 0.08441066132421975, "grad_norm": 7.944238662719727, "learning_rate": 1.944167372153257e-05, "loss": 2.411, "step": 13430 }, { "epoch": 0.08447351364091686, "grad_norm": 7.693236351013184, "learning_rate": 1.944125462058792e-05, "loss": 2.4179, "step": 13440 }, { "epoch": 0.08453636595761396, "grad_norm": 9.336854934692383, "learning_rate": 1.9440835519643262e-05, "loss": 2.4799, "step": 13450 }, { "epoch": 0.08459921827431108, "grad_norm": 7.201939582824707, "learning_rate": 1.944041641869861e-05, "loss": 2.2801, "step": 13460 }, { "epoch": 0.08466207059100819, "grad_norm": 7.295503616333008, "learning_rate": 1.9439997317753956e-05, "loss": 2.4658, "step": 13470 }, { "epoch": 0.08472492290770531, "grad_norm": 8.042071342468262, "learning_rate": 1.9439578216809304e-05, "loss": 2.3253, "step": 13480 }, { "epoch": 0.08478777522440241, "grad_norm": 8.716230392456055, "learning_rate": 1.943915911586465e-05, "loss": 2.4409, "step": 13490 }, { "epoch": 0.08485062754109952, "grad_norm": 8.266105651855469, "learning_rate": 1.9438740014919994e-05, "loss": 2.157, "step": 13500 }, { "epoch": 0.08491347985779664, "grad_norm": 8.517080307006836, "learning_rate": 1.943832091397534e-05, "loss": 2.4678, "step": 13510 }, { "epoch": 0.08497633217449374, "grad_norm": 7.580370903015137, "learning_rate": 1.943790181303069e-05, "loss": 2.2805, "step": 13520 }, { "epoch": 0.08503918449119086, "grad_norm": 7.174275875091553, "learning_rate": 1.9437482712086035e-05, "loss": 2.3805, "step": 13530 }, { "epoch": 0.08510203680788797, "grad_norm": 8.051619529724121, "learning_rate": 1.943706361114138e-05, "loss": 2.5313, "step": 13540 }, { "epoch": 0.08516488912458507, "grad_norm": 8.353168487548828, "learning_rate": 1.9436644510196726e-05, "loss": 2.5692, "step": 13550 }, { "epoch": 0.08522774144128219, "grad_norm": 8.41823673248291, "learning_rate": 1.9436225409252073e-05, "loss": 2.2947, "step": 13560 }, { "epoch": 0.0852905937579793, "grad_norm": 9.146581649780273, "learning_rate": 1.943580630830742e-05, "loss": 2.696, "step": 13570 }, { "epoch": 0.0853534460746764, "grad_norm": 7.7751874923706055, "learning_rate": 1.9435387207362767e-05, "loss": 2.2336, "step": 13580 }, { "epoch": 0.08541629839137352, "grad_norm": 7.192497730255127, "learning_rate": 1.9434968106418115e-05, "loss": 2.3596, "step": 13590 }, { "epoch": 0.08547915070807063, "grad_norm": 7.996962547302246, "learning_rate": 1.9434549005473458e-05, "loss": 2.318, "step": 13600 }, { "epoch": 0.08554200302476773, "grad_norm": 7.894214153289795, "learning_rate": 1.9434129904528805e-05, "loss": 2.323, "step": 13610 }, { "epoch": 0.08560485534146485, "grad_norm": 7.202610015869141, "learning_rate": 1.9433710803584152e-05, "loss": 2.3279, "step": 13620 }, { "epoch": 0.08566770765816197, "grad_norm": 7.913158416748047, "learning_rate": 1.94332917026395e-05, "loss": 2.3408, "step": 13630 }, { "epoch": 0.08573055997485908, "grad_norm": 7.302917003631592, "learning_rate": 1.9432872601694846e-05, "loss": 2.175, "step": 13640 }, { "epoch": 0.08579341229155618, "grad_norm": 7.996654510498047, "learning_rate": 1.9432453500750194e-05, "loss": 2.175, "step": 13650 }, { "epoch": 0.0858562646082533, "grad_norm": 7.731688022613525, "learning_rate": 1.943203439980554e-05, "loss": 2.555, "step": 13660 }, { "epoch": 0.08591911692495041, "grad_norm": 7.67500638961792, "learning_rate": 1.9431615298860884e-05, "loss": 2.279, "step": 13670 }, { "epoch": 0.08598196924164751, "grad_norm": 7.5716023445129395, "learning_rate": 1.943119619791623e-05, "loss": 2.3501, "step": 13680 }, { "epoch": 0.08604482155834463, "grad_norm": 7.677670955657959, "learning_rate": 1.943077709697158e-05, "loss": 2.2414, "step": 13690 }, { "epoch": 0.08610767387504174, "grad_norm": 6.14946985244751, "learning_rate": 1.9430357996026926e-05, "loss": 2.3285, "step": 13700 }, { "epoch": 0.08617052619173884, "grad_norm": 6.973321914672852, "learning_rate": 1.9429938895082273e-05, "loss": 2.2303, "step": 13710 }, { "epoch": 0.08623337850843596, "grad_norm": 7.703900337219238, "learning_rate": 1.9429519794137616e-05, "loss": 2.6025, "step": 13720 }, { "epoch": 0.08629623082513307, "grad_norm": 7.670722961425781, "learning_rate": 1.9429100693192963e-05, "loss": 2.2004, "step": 13730 }, { "epoch": 0.08635908314183018, "grad_norm": 7.5491766929626465, "learning_rate": 1.942868159224831e-05, "loss": 2.5119, "step": 13740 }, { "epoch": 0.08642193545852729, "grad_norm": 8.034729957580566, "learning_rate": 1.9428262491303657e-05, "loss": 2.4391, "step": 13750 }, { "epoch": 0.0864847877752244, "grad_norm": 6.830660820007324, "learning_rate": 1.9427843390359e-05, "loss": 2.1871, "step": 13760 }, { "epoch": 0.08654764009192151, "grad_norm": 7.351603031158447, "learning_rate": 1.9427424289414348e-05, "loss": 2.322, "step": 13770 }, { "epoch": 0.08661049240861862, "grad_norm": 7.406009674072266, "learning_rate": 1.9427005188469695e-05, "loss": 2.57, "step": 13780 }, { "epoch": 0.08667334472531574, "grad_norm": 7.952855587005615, "learning_rate": 1.9426586087525042e-05, "loss": 2.223, "step": 13790 }, { "epoch": 0.08673619704201284, "grad_norm": 7.72921085357666, "learning_rate": 1.942616698658039e-05, "loss": 2.291, "step": 13800 }, { "epoch": 0.08679904935870995, "grad_norm": 8.208253860473633, "learning_rate": 1.9425747885635737e-05, "loss": 2.4412, "step": 13810 }, { "epoch": 0.08686190167540707, "grad_norm": 7.6025800704956055, "learning_rate": 1.9425328784691084e-05, "loss": 2.3367, "step": 13820 }, { "epoch": 0.08692475399210418, "grad_norm": 8.467233657836914, "learning_rate": 1.942490968374643e-05, "loss": 2.5404, "step": 13830 }, { "epoch": 0.08698760630880129, "grad_norm": 8.328033447265625, "learning_rate": 1.9424490582801774e-05, "loss": 2.2398, "step": 13840 }, { "epoch": 0.0870504586254984, "grad_norm": 7.479990482330322, "learning_rate": 1.942407148185712e-05, "loss": 2.2371, "step": 13850 }, { "epoch": 0.08711331094219552, "grad_norm": 6.685925483703613, "learning_rate": 1.942365238091247e-05, "loss": 2.1465, "step": 13860 }, { "epoch": 0.08717616325889262, "grad_norm": 6.884483814239502, "learning_rate": 1.9423233279967816e-05, "loss": 2.3396, "step": 13870 }, { "epoch": 0.08723901557558973, "grad_norm": 7.625536918640137, "learning_rate": 1.9422814179023163e-05, "loss": 2.5991, "step": 13880 }, { "epoch": 0.08730186789228685, "grad_norm": 6.874094009399414, "learning_rate": 1.942239507807851e-05, "loss": 2.3927, "step": 13890 }, { "epoch": 0.08736472020898395, "grad_norm": 7.947897911071777, "learning_rate": 1.9421975977133853e-05, "loss": 2.4413, "step": 13900 }, { "epoch": 0.08742757252568106, "grad_norm": 7.6935715675354, "learning_rate": 1.94215568761892e-05, "loss": 2.6484, "step": 13910 }, { "epoch": 0.08749042484237818, "grad_norm": 7.301044464111328, "learning_rate": 1.9421137775244548e-05, "loss": 2.5653, "step": 13920 }, { "epoch": 0.08755327715907528, "grad_norm": 7.384383201599121, "learning_rate": 1.9420718674299895e-05, "loss": 2.3183, "step": 13930 }, { "epoch": 0.0876161294757724, "grad_norm": 7.303337097167969, "learning_rate": 1.9420299573355238e-05, "loss": 2.2904, "step": 13940 }, { "epoch": 0.08767898179246951, "grad_norm": 8.333943367004395, "learning_rate": 1.9419880472410585e-05, "loss": 2.2082, "step": 13950 }, { "epoch": 0.08774183410916661, "grad_norm": 7.08447790145874, "learning_rate": 1.9419461371465932e-05, "loss": 2.3771, "step": 13960 }, { "epoch": 0.08780468642586373, "grad_norm": 7.231597900390625, "learning_rate": 1.941904227052128e-05, "loss": 2.4866, "step": 13970 }, { "epoch": 0.08786753874256084, "grad_norm": 6.747964859008789, "learning_rate": 1.9418623169576623e-05, "loss": 2.399, "step": 13980 }, { "epoch": 0.08793039105925796, "grad_norm": 7.162939548492432, "learning_rate": 1.941820406863197e-05, "loss": 2.3983, "step": 13990 }, { "epoch": 0.08799324337595506, "grad_norm": 6.220515727996826, "learning_rate": 1.9417784967687317e-05, "loss": 2.3261, "step": 14000 }, { "epoch": 0.08805609569265217, "grad_norm": 7.695446968078613, "learning_rate": 1.9417365866742664e-05, "loss": 2.49, "step": 14010 }, { "epoch": 0.08811894800934929, "grad_norm": 8.09644889831543, "learning_rate": 1.941694676579801e-05, "loss": 2.3648, "step": 14020 }, { "epoch": 0.08818180032604639, "grad_norm": 6.877342224121094, "learning_rate": 1.941652766485336e-05, "loss": 2.088, "step": 14030 }, { "epoch": 0.0882446526427435, "grad_norm": 6.628556251525879, "learning_rate": 1.9416108563908706e-05, "loss": 2.3511, "step": 14040 }, { "epoch": 0.08830750495944062, "grad_norm": 7.792478084564209, "learning_rate": 1.9415689462964053e-05, "loss": 2.3154, "step": 14050 }, { "epoch": 0.08837035727613772, "grad_norm": 7.314945697784424, "learning_rate": 1.94152703620194e-05, "loss": 2.6679, "step": 14060 }, { "epoch": 0.08843320959283484, "grad_norm": 6.940366268157959, "learning_rate": 1.9414851261074743e-05, "loss": 2.3944, "step": 14070 }, { "epoch": 0.08849606190953195, "grad_norm": 5.507546901702881, "learning_rate": 1.941443216013009e-05, "loss": 1.9258, "step": 14080 }, { "epoch": 0.08855891422622905, "grad_norm": 7.643945217132568, "learning_rate": 1.9414013059185438e-05, "loss": 2.4071, "step": 14090 }, { "epoch": 0.08862176654292617, "grad_norm": 7.493567943572998, "learning_rate": 1.9413593958240785e-05, "loss": 2.4666, "step": 14100 }, { "epoch": 0.08868461885962328, "grad_norm": 9.023538589477539, "learning_rate": 1.941317485729613e-05, "loss": 2.173, "step": 14110 }, { "epoch": 0.08874747117632038, "grad_norm": 7.049037456512451, "learning_rate": 1.9412755756351475e-05, "loss": 2.4321, "step": 14120 }, { "epoch": 0.0888103234930175, "grad_norm": 7.310028076171875, "learning_rate": 1.9412336655406822e-05, "loss": 2.2373, "step": 14130 }, { "epoch": 0.08887317580971461, "grad_norm": 8.413848876953125, "learning_rate": 1.941191755446217e-05, "loss": 2.3431, "step": 14140 }, { "epoch": 0.08893602812641173, "grad_norm": 8.686189651489258, "learning_rate": 1.9411498453517517e-05, "loss": 2.4907, "step": 14150 }, { "epoch": 0.08899888044310883, "grad_norm": 7.3697967529296875, "learning_rate": 1.941107935257286e-05, "loss": 2.3139, "step": 14160 }, { "epoch": 0.08906173275980594, "grad_norm": 7.831506729125977, "learning_rate": 1.9410660251628207e-05, "loss": 2.5202, "step": 14170 }, { "epoch": 0.08912458507650306, "grad_norm": 8.068471908569336, "learning_rate": 1.9410241150683554e-05, "loss": 2.4672, "step": 14180 }, { "epoch": 0.08918743739320016, "grad_norm": 8.44082260131836, "learning_rate": 1.94098220497389e-05, "loss": 2.358, "step": 14190 }, { "epoch": 0.08925028970989728, "grad_norm": 7.093486309051514, "learning_rate": 1.940940294879425e-05, "loss": 2.453, "step": 14200 }, { "epoch": 0.08931314202659439, "grad_norm": 8.337098121643066, "learning_rate": 1.9408983847849596e-05, "loss": 2.4123, "step": 14210 }, { "epoch": 0.08937599434329149, "grad_norm": 6.9383087158203125, "learning_rate": 1.940856474690494e-05, "loss": 2.3185, "step": 14220 }, { "epoch": 0.08943884665998861, "grad_norm": 7.915830135345459, "learning_rate": 1.9408145645960286e-05, "loss": 2.6617, "step": 14230 }, { "epoch": 0.08950169897668572, "grad_norm": 8.545634269714355, "learning_rate": 1.9407726545015633e-05, "loss": 2.4342, "step": 14240 }, { "epoch": 0.08956455129338282, "grad_norm": 8.853870391845703, "learning_rate": 1.940730744407098e-05, "loss": 2.4135, "step": 14250 }, { "epoch": 0.08962740361007994, "grad_norm": 8.391120910644531, "learning_rate": 1.9406888343126328e-05, "loss": 2.3313, "step": 14260 }, { "epoch": 0.08969025592677705, "grad_norm": 7.608756065368652, "learning_rate": 1.9406469242181675e-05, "loss": 2.2267, "step": 14270 }, { "epoch": 0.08975310824347416, "grad_norm": 7.681011199951172, "learning_rate": 1.9406050141237022e-05, "loss": 2.5206, "step": 14280 }, { "epoch": 0.08981596056017127, "grad_norm": 7.436078071594238, "learning_rate": 1.9405631040292365e-05, "loss": 2.4975, "step": 14290 }, { "epoch": 0.08987881287686839, "grad_norm": 6.741333484649658, "learning_rate": 1.9405211939347712e-05, "loss": 2.4191, "step": 14300 }, { "epoch": 0.0899416651935655, "grad_norm": 7.723337173461914, "learning_rate": 1.940479283840306e-05, "loss": 2.4078, "step": 14310 }, { "epoch": 0.0900045175102626, "grad_norm": 6.964266300201416, "learning_rate": 1.9404373737458407e-05, "loss": 2.3508, "step": 14320 }, { "epoch": 0.09006736982695972, "grad_norm": 8.015843391418457, "learning_rate": 1.9403954636513754e-05, "loss": 2.3937, "step": 14330 }, { "epoch": 0.09013022214365683, "grad_norm": 7.15214729309082, "learning_rate": 1.9403535535569097e-05, "loss": 2.3037, "step": 14340 }, { "epoch": 0.09019307446035393, "grad_norm": 6.9445648193359375, "learning_rate": 1.9403116434624444e-05, "loss": 2.343, "step": 14350 }, { "epoch": 0.09025592677705105, "grad_norm": 6.637543201446533, "learning_rate": 1.940269733367979e-05, "loss": 2.4491, "step": 14360 }, { "epoch": 0.09031877909374816, "grad_norm": 7.8735551834106445, "learning_rate": 1.940227823273514e-05, "loss": 2.3134, "step": 14370 }, { "epoch": 0.09038163141044527, "grad_norm": 7.348316669464111, "learning_rate": 1.9401859131790482e-05, "loss": 2.5632, "step": 14380 }, { "epoch": 0.09044448372714238, "grad_norm": 7.872707366943359, "learning_rate": 1.940144003084583e-05, "loss": 2.2116, "step": 14390 }, { "epoch": 0.0905073360438395, "grad_norm": 7.157912254333496, "learning_rate": 1.9401020929901176e-05, "loss": 2.2396, "step": 14400 }, { "epoch": 0.0905701883605366, "grad_norm": 7.548715591430664, "learning_rate": 1.9400601828956523e-05, "loss": 2.608, "step": 14410 }, { "epoch": 0.09063304067723371, "grad_norm": 7.2458014488220215, "learning_rate": 1.940018272801187e-05, "loss": 2.3841, "step": 14420 }, { "epoch": 0.09069589299393083, "grad_norm": 7.721854209899902, "learning_rate": 1.9399763627067218e-05, "loss": 2.2875, "step": 14430 }, { "epoch": 0.09075874531062793, "grad_norm": 7.985480785369873, "learning_rate": 1.9399344526122565e-05, "loss": 2.4722, "step": 14440 }, { "epoch": 0.09082159762732504, "grad_norm": 7.973264694213867, "learning_rate": 1.9398925425177912e-05, "loss": 2.2483, "step": 14450 }, { "epoch": 0.09088444994402216, "grad_norm": 7.601576805114746, "learning_rate": 1.939850632423326e-05, "loss": 2.1748, "step": 14460 }, { "epoch": 0.09094730226071926, "grad_norm": 7.965937614440918, "learning_rate": 1.9398087223288603e-05, "loss": 2.4475, "step": 14470 }, { "epoch": 0.09101015457741637, "grad_norm": 7.92208194732666, "learning_rate": 1.939766812234395e-05, "loss": 2.2445, "step": 14480 }, { "epoch": 0.09107300689411349, "grad_norm": 7.090691566467285, "learning_rate": 1.9397249021399297e-05, "loss": 2.4417, "step": 14490 }, { "epoch": 0.0911358592108106, "grad_norm": 7.450453281402588, "learning_rate": 1.9396829920454644e-05, "loss": 2.2301, "step": 14500 }, { "epoch": 0.0911987115275077, "grad_norm": 8.080689430236816, "learning_rate": 1.939641081950999e-05, "loss": 2.2355, "step": 14510 }, { "epoch": 0.09126156384420482, "grad_norm": 8.256315231323242, "learning_rate": 1.9395991718565334e-05, "loss": 2.2679, "step": 14520 }, { "epoch": 0.09132441616090194, "grad_norm": 8.130385398864746, "learning_rate": 1.939557261762068e-05, "loss": 2.4544, "step": 14530 }, { "epoch": 0.09138726847759904, "grad_norm": 8.67445182800293, "learning_rate": 1.939515351667603e-05, "loss": 2.3577, "step": 14540 }, { "epoch": 0.09145012079429615, "grad_norm": 7.793080806732178, "learning_rate": 1.9394734415731376e-05, "loss": 2.298, "step": 14550 }, { "epoch": 0.09151297311099327, "grad_norm": 9.277105331420898, "learning_rate": 1.939431531478672e-05, "loss": 2.3302, "step": 14560 }, { "epoch": 0.09157582542769037, "grad_norm": 7.196310043334961, "learning_rate": 1.9393896213842066e-05, "loss": 2.3304, "step": 14570 }, { "epoch": 0.09163867774438748, "grad_norm": 6.744382381439209, "learning_rate": 1.9393477112897414e-05, "loss": 2.3452, "step": 14580 }, { "epoch": 0.0917015300610846, "grad_norm": 7.582704067230225, "learning_rate": 1.939305801195276e-05, "loss": 2.5666, "step": 14590 }, { "epoch": 0.0917643823777817, "grad_norm": 10.222574234008789, "learning_rate": 1.9392638911008104e-05, "loss": 2.466, "step": 14600 }, { "epoch": 0.09182723469447882, "grad_norm": 7.608969688415527, "learning_rate": 1.939221981006345e-05, "loss": 2.2171, "step": 14610 }, { "epoch": 0.09189008701117593, "grad_norm": 8.202703475952148, "learning_rate": 1.93918007091188e-05, "loss": 2.4784, "step": 14620 }, { "epoch": 0.09195293932787303, "grad_norm": 7.415685176849365, "learning_rate": 1.9391381608174145e-05, "loss": 2.1183, "step": 14630 }, { "epoch": 0.09201579164457015, "grad_norm": 8.944314002990723, "learning_rate": 1.9390962507229493e-05, "loss": 2.3361, "step": 14640 }, { "epoch": 0.09207864396126726, "grad_norm": 6.944596767425537, "learning_rate": 1.939054340628484e-05, "loss": 2.5046, "step": 14650 }, { "epoch": 0.09214149627796438, "grad_norm": 7.5702128410339355, "learning_rate": 1.9390124305340187e-05, "loss": 2.1109, "step": 14660 }, { "epoch": 0.09220434859466148, "grad_norm": 8.582987785339355, "learning_rate": 1.9389705204395534e-05, "loss": 2.3905, "step": 14670 }, { "epoch": 0.0922672009113586, "grad_norm": 8.8607177734375, "learning_rate": 1.938928610345088e-05, "loss": 2.3118, "step": 14680 }, { "epoch": 0.09233005322805571, "grad_norm": 8.889832496643066, "learning_rate": 1.9388867002506225e-05, "loss": 2.511, "step": 14690 }, { "epoch": 0.09239290554475281, "grad_norm": 7.495224475860596, "learning_rate": 1.938844790156157e-05, "loss": 2.4922, "step": 14700 }, { "epoch": 0.09245575786144992, "grad_norm": 7.616922855377197, "learning_rate": 1.938802880061692e-05, "loss": 2.3211, "step": 14710 }, { "epoch": 0.09251861017814704, "grad_norm": 9.176539421081543, "learning_rate": 1.9387609699672266e-05, "loss": 2.4175, "step": 14720 }, { "epoch": 0.09258146249484414, "grad_norm": 8.710726737976074, "learning_rate": 1.9387190598727613e-05, "loss": 2.34, "step": 14730 }, { "epoch": 0.09264431481154126, "grad_norm": 7.268640995025635, "learning_rate": 1.9386771497782956e-05, "loss": 2.5475, "step": 14740 }, { "epoch": 0.09270716712823837, "grad_norm": 8.120370864868164, "learning_rate": 1.9386352396838304e-05, "loss": 2.25, "step": 14750 }, { "epoch": 0.09277001944493547, "grad_norm": 8.488015174865723, "learning_rate": 1.938593329589365e-05, "loss": 2.2392, "step": 14760 }, { "epoch": 0.09283287176163259, "grad_norm": 7.086648464202881, "learning_rate": 1.9385514194948998e-05, "loss": 2.5415, "step": 14770 }, { "epoch": 0.0928957240783297, "grad_norm": 7.34189510345459, "learning_rate": 1.938509509400434e-05, "loss": 2.1996, "step": 14780 }, { "epoch": 0.0929585763950268, "grad_norm": 7.8050079345703125, "learning_rate": 1.938467599305969e-05, "loss": 2.3005, "step": 14790 }, { "epoch": 0.09302142871172392, "grad_norm": 9.377252578735352, "learning_rate": 1.9384256892115036e-05, "loss": 2.3329, "step": 14800 }, { "epoch": 0.09308428102842103, "grad_norm": 8.01342487335205, "learning_rate": 1.9383837791170383e-05, "loss": 2.3466, "step": 14810 }, { "epoch": 0.09314713334511815, "grad_norm": 6.556551933288574, "learning_rate": 1.938341869022573e-05, "loss": 2.1089, "step": 14820 }, { "epoch": 0.09320998566181525, "grad_norm": 7.6242146492004395, "learning_rate": 1.9382999589281077e-05, "loss": 2.5015, "step": 14830 }, { "epoch": 0.09327283797851237, "grad_norm": 6.905308246612549, "learning_rate": 1.9382580488336424e-05, "loss": 2.6101, "step": 14840 }, { "epoch": 0.09333569029520948, "grad_norm": 8.105611801147461, "learning_rate": 1.9382161387391767e-05, "loss": 2.3652, "step": 14850 }, { "epoch": 0.09339854261190658, "grad_norm": 8.25787353515625, "learning_rate": 1.9381742286447115e-05, "loss": 2.1882, "step": 14860 }, { "epoch": 0.0934613949286037, "grad_norm": 7.428998947143555, "learning_rate": 1.938132318550246e-05, "loss": 2.31, "step": 14870 }, { "epoch": 0.09352424724530081, "grad_norm": 6.790313720703125, "learning_rate": 1.938090408455781e-05, "loss": 2.3148, "step": 14880 }, { "epoch": 0.09358709956199791, "grad_norm": 7.349525451660156, "learning_rate": 1.9380484983613156e-05, "loss": 2.418, "step": 14890 }, { "epoch": 0.09364995187869503, "grad_norm": 8.276445388793945, "learning_rate": 1.9380065882668503e-05, "loss": 2.3691, "step": 14900 }, { "epoch": 0.09371280419539214, "grad_norm": 7.776458740234375, "learning_rate": 1.9379646781723847e-05, "loss": 2.4167, "step": 14910 }, { "epoch": 0.09377565651208924, "grad_norm": 7.6309990882873535, "learning_rate": 1.9379227680779194e-05, "loss": 2.3009, "step": 14920 }, { "epoch": 0.09383850882878636, "grad_norm": 7.005548477172852, "learning_rate": 1.937880857983454e-05, "loss": 2.1872, "step": 14930 }, { "epoch": 0.09390136114548348, "grad_norm": 8.230241775512695, "learning_rate": 1.9378389478889888e-05, "loss": 2.2369, "step": 14940 }, { "epoch": 0.09396421346218058, "grad_norm": 8.217141151428223, "learning_rate": 1.9377970377945235e-05, "loss": 2.2704, "step": 14950 }, { "epoch": 0.09402706577887769, "grad_norm": 6.3729047775268555, "learning_rate": 1.937755127700058e-05, "loss": 2.0222, "step": 14960 }, { "epoch": 0.0940899180955748, "grad_norm": 9.567404747009277, "learning_rate": 1.9377132176055926e-05, "loss": 2.2782, "step": 14970 }, { "epoch": 0.09415277041227192, "grad_norm": 7.393969535827637, "learning_rate": 1.9376713075111273e-05, "loss": 2.3824, "step": 14980 }, { "epoch": 0.09421562272896902, "grad_norm": 8.631592750549316, "learning_rate": 1.937629397416662e-05, "loss": 2.2788, "step": 14990 }, { "epoch": 0.09427847504566614, "grad_norm": 8.257186889648438, "learning_rate": 1.9375874873221963e-05, "loss": 2.2076, "step": 15000 }, { "epoch": 0.09434132736236325, "grad_norm": 7.280744552612305, "learning_rate": 1.937545577227731e-05, "loss": 2.2362, "step": 15010 }, { "epoch": 0.09440417967906035, "grad_norm": 7.483618259429932, "learning_rate": 1.9375036671332658e-05, "loss": 2.3, "step": 15020 }, { "epoch": 0.09446703199575747, "grad_norm": 10.175114631652832, "learning_rate": 1.9374617570388005e-05, "loss": 2.1979, "step": 15030 }, { "epoch": 0.09452988431245458, "grad_norm": 8.764018058776855, "learning_rate": 1.937419846944335e-05, "loss": 2.4274, "step": 15040 }, { "epoch": 0.09459273662915169, "grad_norm": 7.181217193603516, "learning_rate": 1.93737793684987e-05, "loss": 2.2674, "step": 15050 }, { "epoch": 0.0946555889458488, "grad_norm": 7.179269790649414, "learning_rate": 1.9373360267554046e-05, "loss": 2.5718, "step": 15060 }, { "epoch": 0.09471844126254592, "grad_norm": 6.558637619018555, "learning_rate": 1.9372941166609393e-05, "loss": 2.3464, "step": 15070 }, { "epoch": 0.09478129357924302, "grad_norm": 8.1760835647583, "learning_rate": 1.937252206566474e-05, "loss": 2.2597, "step": 15080 }, { "epoch": 0.09484414589594013, "grad_norm": 7.907341957092285, "learning_rate": 1.9372102964720084e-05, "loss": 2.3224, "step": 15090 }, { "epoch": 0.09490699821263725, "grad_norm": 7.5614190101623535, "learning_rate": 1.937168386377543e-05, "loss": 2.3524, "step": 15100 }, { "epoch": 0.09496985052933435, "grad_norm": 8.202812194824219, "learning_rate": 1.9371264762830778e-05, "loss": 2.4304, "step": 15110 }, { "epoch": 0.09503270284603146, "grad_norm": 7.19970178604126, "learning_rate": 1.9370845661886125e-05, "loss": 2.3235, "step": 15120 }, { "epoch": 0.09509555516272858, "grad_norm": 6.639101505279541, "learning_rate": 1.9370426560941472e-05, "loss": 2.2978, "step": 15130 }, { "epoch": 0.09515840747942568, "grad_norm": 6.895039081573486, "learning_rate": 1.9370007459996816e-05, "loss": 2.4561, "step": 15140 }, { "epoch": 0.0952212597961228, "grad_norm": 8.347858428955078, "learning_rate": 1.9369588359052163e-05, "loss": 2.0922, "step": 15150 }, { "epoch": 0.09528411211281991, "grad_norm": 7.56237268447876, "learning_rate": 1.936916925810751e-05, "loss": 2.2262, "step": 15160 }, { "epoch": 0.09534696442951703, "grad_norm": 7.407289981842041, "learning_rate": 1.9368750157162857e-05, "loss": 2.5011, "step": 15170 }, { "epoch": 0.09540981674621413, "grad_norm": 7.594213008880615, "learning_rate": 1.93683310562182e-05, "loss": 2.1702, "step": 15180 }, { "epoch": 0.09547266906291124, "grad_norm": 8.105140686035156, "learning_rate": 1.9367911955273548e-05, "loss": 2.4739, "step": 15190 }, { "epoch": 0.09553552137960836, "grad_norm": 8.515286445617676, "learning_rate": 1.9367492854328895e-05, "loss": 2.4776, "step": 15200 }, { "epoch": 0.09559837369630546, "grad_norm": 8.611160278320312, "learning_rate": 1.936707375338424e-05, "loss": 2.2731, "step": 15210 }, { "epoch": 0.09566122601300257, "grad_norm": 9.178365707397461, "learning_rate": 1.936665465243959e-05, "loss": 2.6187, "step": 15220 }, { "epoch": 0.09572407832969969, "grad_norm": 8.396846771240234, "learning_rate": 1.9366235551494932e-05, "loss": 2.2943, "step": 15230 }, { "epoch": 0.09578693064639679, "grad_norm": 6.751037120819092, "learning_rate": 1.936581645055028e-05, "loss": 2.1195, "step": 15240 }, { "epoch": 0.0958497829630939, "grad_norm": 6.924027919769287, "learning_rate": 1.9365397349605627e-05, "loss": 2.2707, "step": 15250 }, { "epoch": 0.09591263527979102, "grad_norm": 7.081989288330078, "learning_rate": 1.9364978248660974e-05, "loss": 2.3677, "step": 15260 }, { "epoch": 0.09597548759648812, "grad_norm": 9.71423053741455, "learning_rate": 1.936455914771632e-05, "loss": 2.2383, "step": 15270 }, { "epoch": 0.09603833991318524, "grad_norm": 8.501422882080078, "learning_rate": 1.9364140046771668e-05, "loss": 2.304, "step": 15280 }, { "epoch": 0.09610119222988235, "grad_norm": 7.34773063659668, "learning_rate": 1.9363720945827015e-05, "loss": 2.3234, "step": 15290 }, { "epoch": 0.09616404454657945, "grad_norm": 7.565731525421143, "learning_rate": 1.9363301844882362e-05, "loss": 2.3826, "step": 15300 }, { "epoch": 0.09622689686327657, "grad_norm": 7.900417804718018, "learning_rate": 1.9362882743937706e-05, "loss": 2.1959, "step": 15310 }, { "epoch": 0.09628974917997368, "grad_norm": 7.132443428039551, "learning_rate": 1.9362463642993053e-05, "loss": 2.0233, "step": 15320 }, { "epoch": 0.0963526014966708, "grad_norm": 8.974869728088379, "learning_rate": 1.93620445420484e-05, "loss": 2.2347, "step": 15330 }, { "epoch": 0.0964154538133679, "grad_norm": 6.8141188621521, "learning_rate": 1.9361625441103747e-05, "loss": 2.3844, "step": 15340 }, { "epoch": 0.09647830613006501, "grad_norm": 7.523212432861328, "learning_rate": 1.9361206340159094e-05, "loss": 2.4004, "step": 15350 }, { "epoch": 0.09654115844676213, "grad_norm": 8.011374473571777, "learning_rate": 1.9360787239214438e-05, "loss": 2.2032, "step": 15360 }, { "epoch": 0.09660401076345923, "grad_norm": 7.110884666442871, "learning_rate": 1.9360368138269785e-05, "loss": 2.2476, "step": 15370 }, { "epoch": 0.09666686308015635, "grad_norm": 7.431973934173584, "learning_rate": 1.9359949037325132e-05, "loss": 2.2006, "step": 15380 }, { "epoch": 0.09672971539685346, "grad_norm": 7.817060470581055, "learning_rate": 1.935952993638048e-05, "loss": 2.4115, "step": 15390 }, { "epoch": 0.09679256771355056, "grad_norm": 7.787459373474121, "learning_rate": 1.9359110835435822e-05, "loss": 2.3382, "step": 15400 }, { "epoch": 0.09685542003024768, "grad_norm": 6.05074405670166, "learning_rate": 1.935869173449117e-05, "loss": 2.1408, "step": 15410 }, { "epoch": 0.09691827234694479, "grad_norm": 8.650323867797852, "learning_rate": 1.9358272633546517e-05, "loss": 2.4014, "step": 15420 }, { "epoch": 0.0969811246636419, "grad_norm": 8.57004451751709, "learning_rate": 1.9357853532601864e-05, "loss": 2.2718, "step": 15430 }, { "epoch": 0.09704397698033901, "grad_norm": 8.429915428161621, "learning_rate": 1.935743443165721e-05, "loss": 2.2648, "step": 15440 }, { "epoch": 0.09710682929703612, "grad_norm": 7.5032148361206055, "learning_rate": 1.9357015330712558e-05, "loss": 2.3131, "step": 15450 }, { "epoch": 0.09716968161373322, "grad_norm": 8.546192169189453, "learning_rate": 1.9356596229767905e-05, "loss": 2.3559, "step": 15460 }, { "epoch": 0.09723253393043034, "grad_norm": 8.149224281311035, "learning_rate": 1.9356177128823252e-05, "loss": 2.4356, "step": 15470 }, { "epoch": 0.09729538624712745, "grad_norm": 8.380104064941406, "learning_rate": 1.9355758027878596e-05, "loss": 2.2616, "step": 15480 }, { "epoch": 0.09735823856382457, "grad_norm": 7.752674102783203, "learning_rate": 1.9355338926933943e-05, "loss": 2.2147, "step": 15490 }, { "epoch": 0.09742109088052167, "grad_norm": 7.410440444946289, "learning_rate": 1.935491982598929e-05, "loss": 2.0358, "step": 15500 }, { "epoch": 0.09748394319721879, "grad_norm": 9.173617362976074, "learning_rate": 1.9354500725044637e-05, "loss": 2.7082, "step": 15510 }, { "epoch": 0.0975467955139159, "grad_norm": 7.036073684692383, "learning_rate": 1.9354081624099984e-05, "loss": 2.2567, "step": 15520 }, { "epoch": 0.097609647830613, "grad_norm": 8.036812782287598, "learning_rate": 1.9353662523155328e-05, "loss": 2.2407, "step": 15530 }, { "epoch": 0.09767250014731012, "grad_norm": 8.202893257141113, "learning_rate": 1.9353243422210675e-05, "loss": 2.1459, "step": 15540 }, { "epoch": 0.09773535246400723, "grad_norm": 8.069165229797363, "learning_rate": 1.9352824321266022e-05, "loss": 2.4264, "step": 15550 }, { "epoch": 0.09779820478070433, "grad_norm": 7.467092990875244, "learning_rate": 1.935240522032137e-05, "loss": 2.2151, "step": 15560 }, { "epoch": 0.09786105709740145, "grad_norm": 7.783108711242676, "learning_rate": 1.9351986119376716e-05, "loss": 2.2198, "step": 15570 }, { "epoch": 0.09792390941409856, "grad_norm": 8.802495002746582, "learning_rate": 1.935156701843206e-05, "loss": 2.4794, "step": 15580 }, { "epoch": 0.09798676173079567, "grad_norm": 8.029830932617188, "learning_rate": 1.9351147917487407e-05, "loss": 2.4849, "step": 15590 }, { "epoch": 0.09804961404749278, "grad_norm": 6.598846435546875, "learning_rate": 1.9350728816542754e-05, "loss": 2.238, "step": 15600 }, { "epoch": 0.0981124663641899, "grad_norm": 7.8369879722595215, "learning_rate": 1.93503097155981e-05, "loss": 2.3688, "step": 15610 }, { "epoch": 0.098175318680887, "grad_norm": 9.714192390441895, "learning_rate": 1.9349890614653444e-05, "loss": 2.3551, "step": 15620 }, { "epoch": 0.09823817099758411, "grad_norm": 7.944300651550293, "learning_rate": 1.934947151370879e-05, "loss": 2.2602, "step": 15630 }, { "epoch": 0.09830102331428123, "grad_norm": 6.619028091430664, "learning_rate": 1.934905241276414e-05, "loss": 2.215, "step": 15640 }, { "epoch": 0.09836387563097834, "grad_norm": 8.038568496704102, "learning_rate": 1.9348633311819486e-05, "loss": 2.3041, "step": 15650 }, { "epoch": 0.09842672794767544, "grad_norm": 8.816732406616211, "learning_rate": 1.9348214210874833e-05, "loss": 2.4387, "step": 15660 }, { "epoch": 0.09848958026437256, "grad_norm": 7.7710490226745605, "learning_rate": 1.934779510993018e-05, "loss": 2.2581, "step": 15670 }, { "epoch": 0.09855243258106967, "grad_norm": 7.502630233764648, "learning_rate": 1.9347376008985527e-05, "loss": 2.2776, "step": 15680 }, { "epoch": 0.09861528489776678, "grad_norm": 7.6552276611328125, "learning_rate": 1.9346956908040874e-05, "loss": 2.0136, "step": 15690 }, { "epoch": 0.09867813721446389, "grad_norm": 7.770573616027832, "learning_rate": 1.934653780709622e-05, "loss": 2.2089, "step": 15700 }, { "epoch": 0.098740989531161, "grad_norm": 7.82880973815918, "learning_rate": 1.9346118706151565e-05, "loss": 2.4368, "step": 15710 }, { "epoch": 0.0988038418478581, "grad_norm": 6.880098342895508, "learning_rate": 1.9345699605206912e-05, "loss": 2.0097, "step": 15720 }, { "epoch": 0.09886669416455522, "grad_norm": 7.838752269744873, "learning_rate": 1.934528050426226e-05, "loss": 2.1871, "step": 15730 }, { "epoch": 0.09892954648125234, "grad_norm": 6.527927875518799, "learning_rate": 1.9344861403317606e-05, "loss": 2.0773, "step": 15740 }, { "epoch": 0.09899239879794944, "grad_norm": 7.516666412353516, "learning_rate": 1.9344442302372953e-05, "loss": 2.4226, "step": 15750 }, { "epoch": 0.09905525111464655, "grad_norm": 7.703370571136475, "learning_rate": 1.9344023201428297e-05, "loss": 2.1255, "step": 15760 }, { "epoch": 0.09911810343134367, "grad_norm": 7.8158440589904785, "learning_rate": 1.9343604100483644e-05, "loss": 2.2532, "step": 15770 }, { "epoch": 0.09918095574804077, "grad_norm": 7.7812089920043945, "learning_rate": 1.934318499953899e-05, "loss": 2.3821, "step": 15780 }, { "epoch": 0.09924380806473788, "grad_norm": 6.937283992767334, "learning_rate": 1.9342765898594338e-05, "loss": 2.2079, "step": 15790 }, { "epoch": 0.099306660381435, "grad_norm": 7.606636047363281, "learning_rate": 1.934234679764968e-05, "loss": 2.2399, "step": 15800 }, { "epoch": 0.0993695126981321, "grad_norm": 7.299315452575684, "learning_rate": 1.934192769670503e-05, "loss": 2.1152, "step": 15810 }, { "epoch": 0.09943236501482922, "grad_norm": 6.596746921539307, "learning_rate": 1.9341508595760376e-05, "loss": 2.1512, "step": 15820 }, { "epoch": 0.09949521733152633, "grad_norm": 5.329288482666016, "learning_rate": 1.9341089494815723e-05, "loss": 2.0065, "step": 15830 }, { "epoch": 0.09955806964822345, "grad_norm": 9.746723175048828, "learning_rate": 1.934067039387107e-05, "loss": 2.4375, "step": 15840 }, { "epoch": 0.09962092196492055, "grad_norm": 8.963370323181152, "learning_rate": 1.9340251292926414e-05, "loss": 2.4851, "step": 15850 }, { "epoch": 0.09968377428161766, "grad_norm": 6.951449394226074, "learning_rate": 1.933983219198176e-05, "loss": 2.5821, "step": 15860 }, { "epoch": 0.09974662659831478, "grad_norm": 8.524133682250977, "learning_rate": 1.9339413091037108e-05, "loss": 2.4172, "step": 15870 }, { "epoch": 0.09980947891501188, "grad_norm": 8.179025650024414, "learning_rate": 1.9338993990092455e-05, "loss": 2.2555, "step": 15880 }, { "epoch": 0.099872331231709, "grad_norm": 9.563819885253906, "learning_rate": 1.9338574889147802e-05, "loss": 2.3097, "step": 15890 }, { "epoch": 0.09993518354840611, "grad_norm": 7.194939136505127, "learning_rate": 1.933815578820315e-05, "loss": 2.4191, "step": 15900 }, { "epoch": 0.09999803586510321, "grad_norm": 7.345890522003174, "learning_rate": 1.9337736687258496e-05, "loss": 2.4248, "step": 15910 }, { "epoch": 0.10006088818180033, "grad_norm": 7.661227226257324, "learning_rate": 1.9337317586313843e-05, "loss": 2.3021, "step": 15920 }, { "epoch": 0.10012374049849744, "grad_norm": 7.1798415184021, "learning_rate": 1.9336898485369187e-05, "loss": 2.1467, "step": 15930 }, { "epoch": 0.10018659281519454, "grad_norm": 6.5734992027282715, "learning_rate": 1.9336479384424534e-05, "loss": 2.2988, "step": 15940 }, { "epoch": 0.10024944513189166, "grad_norm": 7.714196681976318, "learning_rate": 1.933606028347988e-05, "loss": 2.3623, "step": 15950 }, { "epoch": 0.10031229744858877, "grad_norm": 7.4762701988220215, "learning_rate": 1.9335641182535228e-05, "loss": 2.2166, "step": 15960 }, { "epoch": 0.10037514976528587, "grad_norm": 8.928500175476074, "learning_rate": 1.9335222081590575e-05, "loss": 2.2144, "step": 15970 }, { "epoch": 0.10043800208198299, "grad_norm": 8.641223907470703, "learning_rate": 1.933480298064592e-05, "loss": 2.1497, "step": 15980 }, { "epoch": 0.1005008543986801, "grad_norm": 7.675530433654785, "learning_rate": 1.9334383879701266e-05, "loss": 2.3305, "step": 15990 }, { "epoch": 0.10056370671537722, "grad_norm": 6.492575645446777, "learning_rate": 1.9333964778756613e-05, "loss": 2.1332, "step": 16000 }, { "epoch": 0.10062655903207432, "grad_norm": 7.188532829284668, "learning_rate": 1.933354567781196e-05, "loss": 2.1893, "step": 16010 }, { "epoch": 0.10068941134877143, "grad_norm": 8.255434036254883, "learning_rate": 1.9333126576867304e-05, "loss": 2.3866, "step": 16020 }, { "epoch": 0.10075226366546855, "grad_norm": 6.916716575622559, "learning_rate": 1.933270747592265e-05, "loss": 2.3914, "step": 16030 }, { "epoch": 0.10081511598216565, "grad_norm": 8.140775680541992, "learning_rate": 1.9332288374977998e-05, "loss": 2.3892, "step": 16040 }, { "epoch": 0.10087796829886277, "grad_norm": 8.864714622497559, "learning_rate": 1.9331869274033345e-05, "loss": 2.478, "step": 16050 }, { "epoch": 0.10094082061555988, "grad_norm": 8.247222900390625, "learning_rate": 1.9331450173088692e-05, "loss": 2.2977, "step": 16060 }, { "epoch": 0.10100367293225698, "grad_norm": 7.6356306076049805, "learning_rate": 1.933103107214404e-05, "loss": 2.2705, "step": 16070 }, { "epoch": 0.1010665252489541, "grad_norm": 8.452744483947754, "learning_rate": 1.9330611971199386e-05, "loss": 2.1171, "step": 16080 }, { "epoch": 0.10112937756565121, "grad_norm": 7.3948869705200195, "learning_rate": 1.9330192870254733e-05, "loss": 2.2588, "step": 16090 }, { "epoch": 0.10119222988234831, "grad_norm": 7.8918280601501465, "learning_rate": 1.9329773769310077e-05, "loss": 2.0473, "step": 16100 }, { "epoch": 0.10125508219904543, "grad_norm": 7.246849060058594, "learning_rate": 1.9329354668365424e-05, "loss": 2.4762, "step": 16110 }, { "epoch": 0.10131793451574254, "grad_norm": 12.090804100036621, "learning_rate": 1.932893556742077e-05, "loss": 2.3217, "step": 16120 }, { "epoch": 0.10138078683243965, "grad_norm": 6.78463077545166, "learning_rate": 1.9328516466476118e-05, "loss": 2.2876, "step": 16130 }, { "epoch": 0.10144363914913676, "grad_norm": 7.757300853729248, "learning_rate": 1.9328097365531465e-05, "loss": 2.2388, "step": 16140 }, { "epoch": 0.10150649146583388, "grad_norm": 7.6502861976623535, "learning_rate": 1.9327678264586812e-05, "loss": 2.1883, "step": 16150 }, { "epoch": 0.10156934378253099, "grad_norm": 7.231910705566406, "learning_rate": 1.9327259163642156e-05, "loss": 2.3303, "step": 16160 }, { "epoch": 0.10163219609922809, "grad_norm": 6.886990070343018, "learning_rate": 1.9326840062697503e-05, "loss": 2.3253, "step": 16170 }, { "epoch": 0.10169504841592521, "grad_norm": 7.37526273727417, "learning_rate": 1.932642096175285e-05, "loss": 2.3775, "step": 16180 }, { "epoch": 0.10175790073262232, "grad_norm": 7.015676498413086, "learning_rate": 1.9326001860808197e-05, "loss": 2.4584, "step": 16190 }, { "epoch": 0.10182075304931942, "grad_norm": 8.19909381866455, "learning_rate": 1.932558275986354e-05, "loss": 2.3381, "step": 16200 }, { "epoch": 0.10188360536601654, "grad_norm": 7.600981712341309, "learning_rate": 1.9325163658918888e-05, "loss": 2.3598, "step": 16210 }, { "epoch": 0.10194645768271365, "grad_norm": 7.685189723968506, "learning_rate": 1.9324744557974235e-05, "loss": 2.1358, "step": 16220 }, { "epoch": 0.10200930999941075, "grad_norm": 6.872974872589111, "learning_rate": 1.9324325457029582e-05, "loss": 2.0541, "step": 16230 }, { "epoch": 0.10207216231610787, "grad_norm": 8.120587348937988, "learning_rate": 1.9323906356084926e-05, "loss": 2.3747, "step": 16240 }, { "epoch": 0.10213501463280499, "grad_norm": 7.509024143218994, "learning_rate": 1.9323487255140273e-05, "loss": 2.3145, "step": 16250 }, { "epoch": 0.10219786694950209, "grad_norm": 8.789929389953613, "learning_rate": 1.932306815419562e-05, "loss": 2.0984, "step": 16260 }, { "epoch": 0.1022607192661992, "grad_norm": 6.380049705505371, "learning_rate": 1.9322649053250967e-05, "loss": 2.3583, "step": 16270 }, { "epoch": 0.10232357158289632, "grad_norm": 8.88233757019043, "learning_rate": 1.9322229952306314e-05, "loss": 2.6463, "step": 16280 }, { "epoch": 0.10238642389959342, "grad_norm": 6.933871269226074, "learning_rate": 1.932181085136166e-05, "loss": 2.2953, "step": 16290 }, { "epoch": 0.10244927621629053, "grad_norm": 7.950907230377197, "learning_rate": 1.9321391750417008e-05, "loss": 2.3062, "step": 16300 }, { "epoch": 0.10251212853298765, "grad_norm": 6.583611011505127, "learning_rate": 1.9320972649472355e-05, "loss": 2.2439, "step": 16310 }, { "epoch": 0.10257498084968476, "grad_norm": 7.193686485290527, "learning_rate": 1.9320553548527702e-05, "loss": 2.1577, "step": 16320 }, { "epoch": 0.10263783316638186, "grad_norm": 9.45664119720459, "learning_rate": 1.9320134447583046e-05, "loss": 2.372, "step": 16330 }, { "epoch": 0.10270068548307898, "grad_norm": 7.178765773773193, "learning_rate": 1.9319715346638393e-05, "loss": 2.2334, "step": 16340 }, { "epoch": 0.1027635377997761, "grad_norm": 9.265113830566406, "learning_rate": 1.931929624569374e-05, "loss": 2.3851, "step": 16350 }, { "epoch": 0.1028263901164732, "grad_norm": 10.580037117004395, "learning_rate": 1.9318877144749087e-05, "loss": 2.2099, "step": 16360 }, { "epoch": 0.10288924243317031, "grad_norm": 8.77173137664795, "learning_rate": 1.9318458043804434e-05, "loss": 2.0985, "step": 16370 }, { "epoch": 0.10295209474986743, "grad_norm": 7.568657875061035, "learning_rate": 1.9318038942859778e-05, "loss": 2.412, "step": 16380 }, { "epoch": 0.10301494706656453, "grad_norm": 7.412895202636719, "learning_rate": 1.9317619841915125e-05, "loss": 2.4029, "step": 16390 }, { "epoch": 0.10307779938326164, "grad_norm": 8.491913795471191, "learning_rate": 1.9317200740970472e-05, "loss": 2.325, "step": 16400 }, { "epoch": 0.10314065169995876, "grad_norm": 7.789159774780273, "learning_rate": 1.931678164002582e-05, "loss": 2.3369, "step": 16410 }, { "epoch": 0.10320350401665586, "grad_norm": 6.9769744873046875, "learning_rate": 1.9316362539081163e-05, "loss": 2.2403, "step": 16420 }, { "epoch": 0.10326635633335297, "grad_norm": 8.47653865814209, "learning_rate": 1.931594343813651e-05, "loss": 2.3198, "step": 16430 }, { "epoch": 0.10332920865005009, "grad_norm": 7.21108865737915, "learning_rate": 1.9315524337191857e-05, "loss": 2.3815, "step": 16440 }, { "epoch": 0.10339206096674719, "grad_norm": 13.637954711914062, "learning_rate": 1.9315105236247204e-05, "loss": 2.2583, "step": 16450 }, { "epoch": 0.1034549132834443, "grad_norm": 8.142091751098633, "learning_rate": 1.931468613530255e-05, "loss": 2.3985, "step": 16460 }, { "epoch": 0.10351776560014142, "grad_norm": 6.6919074058532715, "learning_rate": 1.9314267034357898e-05, "loss": 2.1173, "step": 16470 }, { "epoch": 0.10358061791683852, "grad_norm": 8.20840835571289, "learning_rate": 1.9313847933413242e-05, "loss": 2.4074, "step": 16480 }, { "epoch": 0.10364347023353564, "grad_norm": 7.9486470222473145, "learning_rate": 1.931342883246859e-05, "loss": 2.4469, "step": 16490 }, { "epoch": 0.10370632255023275, "grad_norm": 7.732548713684082, "learning_rate": 1.9313009731523936e-05, "loss": 2.5064, "step": 16500 }, { "epoch": 0.10376917486692987, "grad_norm": 8.321471214294434, "learning_rate": 1.9312590630579283e-05, "loss": 2.2244, "step": 16510 }, { "epoch": 0.10383202718362697, "grad_norm": 8.02713394165039, "learning_rate": 1.931217152963463e-05, "loss": 2.2076, "step": 16520 }, { "epoch": 0.10389487950032408, "grad_norm": 8.101451873779297, "learning_rate": 1.9311752428689977e-05, "loss": 2.1815, "step": 16530 }, { "epoch": 0.1039577318170212, "grad_norm": 7.51225471496582, "learning_rate": 1.9311333327745324e-05, "loss": 2.2129, "step": 16540 }, { "epoch": 0.1040205841337183, "grad_norm": 7.396722316741943, "learning_rate": 1.9310914226800668e-05, "loss": 2.2444, "step": 16550 }, { "epoch": 0.10408343645041541, "grad_norm": 7.46852970123291, "learning_rate": 1.9310495125856015e-05, "loss": 2.3093, "step": 16560 }, { "epoch": 0.10414628876711253, "grad_norm": 6.899946689605713, "learning_rate": 1.9310076024911362e-05, "loss": 2.2093, "step": 16570 }, { "epoch": 0.10420914108380963, "grad_norm": 7.019610404968262, "learning_rate": 1.930965692396671e-05, "loss": 2.0416, "step": 16580 }, { "epoch": 0.10427199340050675, "grad_norm": 8.696752548217773, "learning_rate": 1.9309237823022056e-05, "loss": 2.3048, "step": 16590 }, { "epoch": 0.10433484571720386, "grad_norm": 7.059429168701172, "learning_rate": 1.93088187220774e-05, "loss": 2.2203, "step": 16600 }, { "epoch": 0.10439769803390096, "grad_norm": 7.339666366577148, "learning_rate": 1.9308399621132747e-05, "loss": 2.3686, "step": 16610 }, { "epoch": 0.10446055035059808, "grad_norm": 8.099876403808594, "learning_rate": 1.9307980520188094e-05, "loss": 2.082, "step": 16620 }, { "epoch": 0.10452340266729519, "grad_norm": 6.657029628753662, "learning_rate": 1.930756141924344e-05, "loss": 2.135, "step": 16630 }, { "epoch": 0.1045862549839923, "grad_norm": 7.453880310058594, "learning_rate": 1.9307142318298785e-05, "loss": 2.3185, "step": 16640 }, { "epoch": 0.10464910730068941, "grad_norm": 8.553909301757812, "learning_rate": 1.9306723217354132e-05, "loss": 2.0539, "step": 16650 }, { "epoch": 0.10471195961738652, "grad_norm": 7.258327960968018, "learning_rate": 1.930630411640948e-05, "loss": 2.0804, "step": 16660 }, { "epoch": 0.10477481193408364, "grad_norm": 7.425248146057129, "learning_rate": 1.9305885015464826e-05, "loss": 2.2616, "step": 16670 }, { "epoch": 0.10483766425078074, "grad_norm": 8.037734985351562, "learning_rate": 1.9305465914520173e-05, "loss": 2.3576, "step": 16680 }, { "epoch": 0.10490051656747786, "grad_norm": 7.662077903747559, "learning_rate": 1.930504681357552e-05, "loss": 2.0718, "step": 16690 }, { "epoch": 0.10496336888417497, "grad_norm": 6.908252716064453, "learning_rate": 1.9304627712630867e-05, "loss": 2.2498, "step": 16700 }, { "epoch": 0.10502622120087207, "grad_norm": 7.3731770515441895, "learning_rate": 1.9304208611686214e-05, "loss": 2.378, "step": 16710 }, { "epoch": 0.10508907351756919, "grad_norm": 9.528003692626953, "learning_rate": 1.930378951074156e-05, "loss": 2.0206, "step": 16720 }, { "epoch": 0.1051519258342663, "grad_norm": 7.476037979125977, "learning_rate": 1.9303370409796905e-05, "loss": 2.1773, "step": 16730 }, { "epoch": 0.1052147781509634, "grad_norm": 8.378963470458984, "learning_rate": 1.9302951308852252e-05, "loss": 2.3907, "step": 16740 }, { "epoch": 0.10527763046766052, "grad_norm": 7.383098125457764, "learning_rate": 1.93025322079076e-05, "loss": 2.3107, "step": 16750 }, { "epoch": 0.10534048278435763, "grad_norm": 7.901841163635254, "learning_rate": 1.9302113106962946e-05, "loss": 2.1333, "step": 16760 }, { "epoch": 0.10540333510105473, "grad_norm": 6.856114387512207, "learning_rate": 1.9301694006018293e-05, "loss": 2.195, "step": 16770 }, { "epoch": 0.10546618741775185, "grad_norm": 7.679387092590332, "learning_rate": 1.9301274905073637e-05, "loss": 2.2593, "step": 16780 }, { "epoch": 0.10552903973444897, "grad_norm": 8.764408111572266, "learning_rate": 1.9300855804128984e-05, "loss": 2.2052, "step": 16790 }, { "epoch": 0.10559189205114607, "grad_norm": 6.764782905578613, "learning_rate": 1.930043670318433e-05, "loss": 2.3095, "step": 16800 }, { "epoch": 0.10565474436784318, "grad_norm": 7.6861701011657715, "learning_rate": 1.9300017602239678e-05, "loss": 2.1621, "step": 16810 }, { "epoch": 0.1057175966845403, "grad_norm": 8.404021263122559, "learning_rate": 1.9299598501295022e-05, "loss": 2.2331, "step": 16820 }, { "epoch": 0.10578044900123741, "grad_norm": 6.845357418060303, "learning_rate": 1.929917940035037e-05, "loss": 2.2765, "step": 16830 }, { "epoch": 0.10584330131793451, "grad_norm": 6.098022937774658, "learning_rate": 1.9298760299405716e-05, "loss": 2.1941, "step": 16840 }, { "epoch": 0.10590615363463163, "grad_norm": 6.772059440612793, "learning_rate": 1.9298341198461063e-05, "loss": 2.172, "step": 16850 }, { "epoch": 0.10596900595132874, "grad_norm": 8.620196342468262, "learning_rate": 1.9297922097516407e-05, "loss": 2.3347, "step": 16860 }, { "epoch": 0.10603185826802584, "grad_norm": 8.322911262512207, "learning_rate": 1.9297502996571754e-05, "loss": 2.3929, "step": 16870 }, { "epoch": 0.10609471058472296, "grad_norm": 7.095172882080078, "learning_rate": 1.92970838956271e-05, "loss": 2.0793, "step": 16880 }, { "epoch": 0.10615756290142007, "grad_norm": 8.138224601745605, "learning_rate": 1.9296664794682448e-05, "loss": 2.2524, "step": 16890 }, { "epoch": 0.10622041521811718, "grad_norm": 8.498547554016113, "learning_rate": 1.9296245693737795e-05, "loss": 2.3374, "step": 16900 }, { "epoch": 0.10628326753481429, "grad_norm": 7.647117614746094, "learning_rate": 1.9295826592793142e-05, "loss": 2.2798, "step": 16910 }, { "epoch": 0.1063461198515114, "grad_norm": 6.767284393310547, "learning_rate": 1.929540749184849e-05, "loss": 2.3106, "step": 16920 }, { "epoch": 0.10640897216820851, "grad_norm": 7.401424407958984, "learning_rate": 1.9294988390903836e-05, "loss": 2.2486, "step": 16930 }, { "epoch": 0.10647182448490562, "grad_norm": 9.371999740600586, "learning_rate": 1.9294569289959183e-05, "loss": 2.3394, "step": 16940 }, { "epoch": 0.10653467680160274, "grad_norm": 7.710360050201416, "learning_rate": 1.9294150189014527e-05, "loss": 2.1607, "step": 16950 }, { "epoch": 0.10659752911829984, "grad_norm": 8.567865371704102, "learning_rate": 1.9293731088069874e-05, "loss": 2.3452, "step": 16960 }, { "epoch": 0.10666038143499695, "grad_norm": 7.319023609161377, "learning_rate": 1.929331198712522e-05, "loss": 2.2159, "step": 16970 }, { "epoch": 0.10672323375169407, "grad_norm": 7.316112041473389, "learning_rate": 1.9292892886180568e-05, "loss": 2.4197, "step": 16980 }, { "epoch": 0.10678608606839118, "grad_norm": 8.065226554870605, "learning_rate": 1.9292473785235915e-05, "loss": 2.2532, "step": 16990 }, { "epoch": 0.10684893838508829, "grad_norm": 7.7862868309021, "learning_rate": 1.929205468429126e-05, "loss": 2.3215, "step": 17000 }, { "epoch": 0.1069117907017854, "grad_norm": 7.383972644805908, "learning_rate": 1.9291635583346606e-05, "loss": 2.2977, "step": 17010 }, { "epoch": 0.10697464301848252, "grad_norm": 7.82750129699707, "learning_rate": 1.9291216482401953e-05, "loss": 2.4872, "step": 17020 }, { "epoch": 0.10703749533517962, "grad_norm": 7.056968688964844, "learning_rate": 1.92907973814573e-05, "loss": 2.2487, "step": 17030 }, { "epoch": 0.10710034765187673, "grad_norm": 6.971473693847656, "learning_rate": 1.9290378280512644e-05, "loss": 2.1742, "step": 17040 }, { "epoch": 0.10716319996857385, "grad_norm": 7.3401570320129395, "learning_rate": 1.928995917956799e-05, "loss": 2.1839, "step": 17050 }, { "epoch": 0.10722605228527095, "grad_norm": 8.624346733093262, "learning_rate": 1.9289540078623338e-05, "loss": 2.3994, "step": 17060 }, { "epoch": 0.10728890460196806, "grad_norm": 8.388733863830566, "learning_rate": 1.9289120977678685e-05, "loss": 2.0978, "step": 17070 }, { "epoch": 0.10735175691866518, "grad_norm": 8.195749282836914, "learning_rate": 1.9288701876734032e-05, "loss": 2.0869, "step": 17080 }, { "epoch": 0.10741460923536228, "grad_norm": 8.285856246948242, "learning_rate": 1.928828277578938e-05, "loss": 2.1606, "step": 17090 }, { "epoch": 0.1074774615520594, "grad_norm": 8.132835388183594, "learning_rate": 1.9287863674844726e-05, "loss": 2.3997, "step": 17100 }, { "epoch": 0.10754031386875651, "grad_norm": 9.392642974853516, "learning_rate": 1.928744457390007e-05, "loss": 2.3096, "step": 17110 }, { "epoch": 0.10760316618545361, "grad_norm": 8.301399230957031, "learning_rate": 1.9287025472955417e-05, "loss": 2.3675, "step": 17120 }, { "epoch": 0.10766601850215073, "grad_norm": 6.58356237411499, "learning_rate": 1.9286606372010764e-05, "loss": 2.1104, "step": 17130 }, { "epoch": 0.10772887081884784, "grad_norm": 7.440036773681641, "learning_rate": 1.928618727106611e-05, "loss": 2.1014, "step": 17140 }, { "epoch": 0.10779172313554494, "grad_norm": 7.675351142883301, "learning_rate": 1.9285768170121458e-05, "loss": 2.3695, "step": 17150 }, { "epoch": 0.10785457545224206, "grad_norm": 5.981052875518799, "learning_rate": 1.9285349069176805e-05, "loss": 2.316, "step": 17160 }, { "epoch": 0.10791742776893917, "grad_norm": 8.150433540344238, "learning_rate": 1.928492996823215e-05, "loss": 2.1734, "step": 17170 }, { "epoch": 0.10798028008563629, "grad_norm": 7.965524196624756, "learning_rate": 1.9284510867287496e-05, "loss": 2.4759, "step": 17180 }, { "epoch": 0.10804313240233339, "grad_norm": 8.376654624938965, "learning_rate": 1.9284091766342843e-05, "loss": 2.1809, "step": 17190 }, { "epoch": 0.1081059847190305, "grad_norm": 8.441359519958496, "learning_rate": 1.928367266539819e-05, "loss": 2.0916, "step": 17200 }, { "epoch": 0.10816883703572762, "grad_norm": 7.79421329498291, "learning_rate": 1.9283253564453537e-05, "loss": 2.4733, "step": 17210 }, { "epoch": 0.10823168935242472, "grad_norm": 6.398135662078857, "learning_rate": 1.928283446350888e-05, "loss": 2.3177, "step": 17220 }, { "epoch": 0.10829454166912184, "grad_norm": 7.340041637420654, "learning_rate": 1.9282415362564228e-05, "loss": 2.2512, "step": 17230 }, { "epoch": 0.10835739398581895, "grad_norm": 8.736074447631836, "learning_rate": 1.9281996261619575e-05, "loss": 2.1867, "step": 17240 }, { "epoch": 0.10842024630251605, "grad_norm": 7.77802848815918, "learning_rate": 1.9281577160674922e-05, "loss": 2.3079, "step": 17250 }, { "epoch": 0.10848309861921317, "grad_norm": 7.42343282699585, "learning_rate": 1.9281158059730266e-05, "loss": 2.3138, "step": 17260 }, { "epoch": 0.10854595093591028, "grad_norm": 6.85483980178833, "learning_rate": 1.9280738958785613e-05, "loss": 2.1433, "step": 17270 }, { "epoch": 0.10860880325260738, "grad_norm": 9.421246528625488, "learning_rate": 1.928031985784096e-05, "loss": 2.4897, "step": 17280 }, { "epoch": 0.1086716555693045, "grad_norm": 6.760537624359131, "learning_rate": 1.9279900756896307e-05, "loss": 2.2983, "step": 17290 }, { "epoch": 0.10873450788600161, "grad_norm": 6.944523334503174, "learning_rate": 1.927952356604612e-05, "loss": 2.3495, "step": 17300 }, { "epoch": 0.10879736020269871, "grad_norm": 8.142277717590332, "learning_rate": 1.9279104465101465e-05, "loss": 2.32, "step": 17310 }, { "epoch": 0.10886021251939583, "grad_norm": 6.816464900970459, "learning_rate": 1.9278685364156813e-05, "loss": 2.0693, "step": 17320 }, { "epoch": 0.10892306483609294, "grad_norm": 8.537376403808594, "learning_rate": 1.927826626321216e-05, "loss": 2.2503, "step": 17330 }, { "epoch": 0.10898591715279006, "grad_norm": 7.559505462646484, "learning_rate": 1.9277847162267503e-05, "loss": 2.2101, "step": 17340 }, { "epoch": 0.10904876946948716, "grad_norm": 7.39756441116333, "learning_rate": 1.927742806132285e-05, "loss": 2.3663, "step": 17350 }, { "epoch": 0.10911162178618428, "grad_norm": 7.510336399078369, "learning_rate": 1.9277008960378197e-05, "loss": 2.6044, "step": 17360 }, { "epoch": 0.10917447410288139, "grad_norm": 7.362528324127197, "learning_rate": 1.9276589859433544e-05, "loss": 2.3271, "step": 17370 }, { "epoch": 0.10923732641957849, "grad_norm": 7.193731784820557, "learning_rate": 1.927617075848889e-05, "loss": 2.1653, "step": 17380 }, { "epoch": 0.10930017873627561, "grad_norm": 7.314977169036865, "learning_rate": 1.927575165754424e-05, "loss": 2.1461, "step": 17390 }, { "epoch": 0.10936303105297272, "grad_norm": 7.490072727203369, "learning_rate": 1.9275332556599586e-05, "loss": 2.3455, "step": 17400 }, { "epoch": 0.10942588336966982, "grad_norm": 9.052772521972656, "learning_rate": 1.9274913455654933e-05, "loss": 2.2784, "step": 17410 }, { "epoch": 0.10948873568636694, "grad_norm": 7.401103973388672, "learning_rate": 1.9274494354710276e-05, "loss": 2.0925, "step": 17420 }, { "epoch": 0.10955158800306405, "grad_norm": 7.619738578796387, "learning_rate": 1.9274075253765624e-05, "loss": 2.2382, "step": 17430 }, { "epoch": 0.10961444031976116, "grad_norm": 7.593260765075684, "learning_rate": 1.927365615282097e-05, "loss": 2.3528, "step": 17440 }, { "epoch": 0.10967729263645827, "grad_norm": 7.171609878540039, "learning_rate": 1.9273237051876318e-05, "loss": 2.1605, "step": 17450 }, { "epoch": 0.10974014495315539, "grad_norm": 6.895904541015625, "learning_rate": 1.9272817950931665e-05, "loss": 2.1842, "step": 17460 }, { "epoch": 0.10980299726985249, "grad_norm": 7.595503330230713, "learning_rate": 1.927239884998701e-05, "loss": 2.0926, "step": 17470 }, { "epoch": 0.1098658495865496, "grad_norm": 7.256607532501221, "learning_rate": 1.9271979749042355e-05, "loss": 2.3511, "step": 17480 }, { "epoch": 0.10992870190324672, "grad_norm": 8.06808853149414, "learning_rate": 1.9271560648097703e-05, "loss": 2.2478, "step": 17490 }, { "epoch": 0.10999155421994383, "grad_norm": 6.585019588470459, "learning_rate": 1.927114154715305e-05, "loss": 2.321, "step": 17500 }, { "epoch": 0.11005440653664093, "grad_norm": 7.640449523925781, "learning_rate": 1.9270722446208397e-05, "loss": 2.1923, "step": 17510 }, { "epoch": 0.11011725885333805, "grad_norm": 7.136289119720459, "learning_rate": 1.927030334526374e-05, "loss": 2.3107, "step": 17520 }, { "epoch": 0.11018011117003516, "grad_norm": 7.288252353668213, "learning_rate": 1.9269884244319087e-05, "loss": 2.3086, "step": 17530 }, { "epoch": 0.11024296348673226, "grad_norm": 6.95576810836792, "learning_rate": 1.9269465143374435e-05, "loss": 2.1987, "step": 17540 }, { "epoch": 0.11030581580342938, "grad_norm": 8.266757011413574, "learning_rate": 1.926904604242978e-05, "loss": 2.2896, "step": 17550 }, { "epoch": 0.1103686681201265, "grad_norm": 8.554996490478516, "learning_rate": 1.9268626941485125e-05, "loss": 2.1805, "step": 17560 }, { "epoch": 0.1104315204368236, "grad_norm": 8.602093696594238, "learning_rate": 1.9268207840540472e-05, "loss": 2.4654, "step": 17570 }, { "epoch": 0.11049437275352071, "grad_norm": 8.329663276672363, "learning_rate": 1.926778873959582e-05, "loss": 2.4862, "step": 17580 }, { "epoch": 0.11055722507021783, "grad_norm": 8.581228256225586, "learning_rate": 1.9267369638651166e-05, "loss": 2.1252, "step": 17590 }, { "epoch": 0.11062007738691493, "grad_norm": 7.5395402908325195, "learning_rate": 1.9266950537706514e-05, "loss": 2.3789, "step": 17600 }, { "epoch": 0.11068292970361204, "grad_norm": 6.943304538726807, "learning_rate": 1.926653143676186e-05, "loss": 2.0377, "step": 17610 }, { "epoch": 0.11074578202030916, "grad_norm": 8.480267524719238, "learning_rate": 1.9266112335817208e-05, "loss": 2.3763, "step": 17620 }, { "epoch": 0.11080863433700626, "grad_norm": 9.318236351013184, "learning_rate": 1.9265693234872555e-05, "loss": 2.322, "step": 17630 }, { "epoch": 0.11087148665370337, "grad_norm": 6.9914069175720215, "learning_rate": 1.9265274133927902e-05, "loss": 2.0475, "step": 17640 }, { "epoch": 0.11093433897040049, "grad_norm": 6.4933695793151855, "learning_rate": 1.9264855032983246e-05, "loss": 2.1868, "step": 17650 }, { "epoch": 0.1109971912870976, "grad_norm": 8.123563766479492, "learning_rate": 1.9264435932038593e-05, "loss": 2.2155, "step": 17660 }, { "epoch": 0.1110600436037947, "grad_norm": 8.205390930175781, "learning_rate": 1.926401683109394e-05, "loss": 2.1776, "step": 17670 }, { "epoch": 0.11112289592049182, "grad_norm": 7.595110893249512, "learning_rate": 1.9263597730149287e-05, "loss": 2.2943, "step": 17680 }, { "epoch": 0.11118574823718894, "grad_norm": 6.7248101234436035, "learning_rate": 1.926317862920463e-05, "loss": 2.2173, "step": 17690 }, { "epoch": 0.11124860055388604, "grad_norm": 7.3498430252075195, "learning_rate": 1.9262759528259977e-05, "loss": 2.1011, "step": 17700 }, { "epoch": 0.11131145287058315, "grad_norm": 6.804111003875732, "learning_rate": 1.9262340427315325e-05, "loss": 2.2512, "step": 17710 }, { "epoch": 0.11137430518728027, "grad_norm": 7.69357967376709, "learning_rate": 1.926192132637067e-05, "loss": 2.2153, "step": 17720 }, { "epoch": 0.11143715750397737, "grad_norm": 7.761533737182617, "learning_rate": 1.926150222542602e-05, "loss": 2.111, "step": 17730 }, { "epoch": 0.11150000982067448, "grad_norm": 9.239151954650879, "learning_rate": 1.9261083124481362e-05, "loss": 2.2681, "step": 17740 }, { "epoch": 0.1115628621373716, "grad_norm": 8.001991271972656, "learning_rate": 1.926066402353671e-05, "loss": 2.2142, "step": 17750 }, { "epoch": 0.1116257144540687, "grad_norm": 7.723743915557861, "learning_rate": 1.9260244922592057e-05, "loss": 2.1683, "step": 17760 }, { "epoch": 0.11168856677076582, "grad_norm": 7.701489448547363, "learning_rate": 1.9259825821647404e-05, "loss": 2.0378, "step": 17770 }, { "epoch": 0.11175141908746293, "grad_norm": 7.910447597503662, "learning_rate": 1.925940672070275e-05, "loss": 2.2301, "step": 17780 }, { "epoch": 0.11181427140416003, "grad_norm": 7.99406099319458, "learning_rate": 1.9258987619758098e-05, "loss": 2.0902, "step": 17790 }, { "epoch": 0.11187712372085715, "grad_norm": 7.329009532928467, "learning_rate": 1.925856851881344e-05, "loss": 2.1012, "step": 17800 }, { "epoch": 0.11193997603755426, "grad_norm": 7.231492042541504, "learning_rate": 1.925814941786879e-05, "loss": 1.9498, "step": 17810 }, { "epoch": 0.11200282835425136, "grad_norm": 7.1054487228393555, "learning_rate": 1.9257730316924136e-05, "loss": 2.0591, "step": 17820 }, { "epoch": 0.11206568067094848, "grad_norm": 8.44486141204834, "learning_rate": 1.9257311215979483e-05, "loss": 2.1928, "step": 17830 }, { "epoch": 0.1121285329876456, "grad_norm": 6.6756696701049805, "learning_rate": 1.925689211503483e-05, "loss": 2.1967, "step": 17840 }, { "epoch": 0.11219138530434271, "grad_norm": 8.86054515838623, "learning_rate": 1.9256473014090177e-05, "loss": 2.177, "step": 17850 }, { "epoch": 0.11225423762103981, "grad_norm": 7.436457633972168, "learning_rate": 1.9256053913145524e-05, "loss": 2.3267, "step": 17860 }, { "epoch": 0.11231708993773692, "grad_norm": 7.809746265411377, "learning_rate": 1.9255634812200868e-05, "loss": 2.2719, "step": 17870 }, { "epoch": 0.11237994225443404, "grad_norm": 6.934885025024414, "learning_rate": 1.9255215711256215e-05, "loss": 2.2318, "step": 17880 }, { "epoch": 0.11244279457113114, "grad_norm": 8.142309188842773, "learning_rate": 1.925479661031156e-05, "loss": 2.2587, "step": 17890 }, { "epoch": 0.11250564688782826, "grad_norm": 7.852748870849609, "learning_rate": 1.925437750936691e-05, "loss": 2.035, "step": 17900 }, { "epoch": 0.11256849920452537, "grad_norm": 7.492555141448975, "learning_rate": 1.9253958408422252e-05, "loss": 2.2448, "step": 17910 }, { "epoch": 0.11263135152122247, "grad_norm": 7.685120105743408, "learning_rate": 1.92535393074776e-05, "loss": 2.199, "step": 17920 }, { "epoch": 0.11269420383791959, "grad_norm": 7.95084810256958, "learning_rate": 1.9253120206532947e-05, "loss": 2.1024, "step": 17930 }, { "epoch": 0.1127570561546167, "grad_norm": 6.954649448394775, "learning_rate": 1.9252701105588294e-05, "loss": 2.0585, "step": 17940 }, { "epoch": 0.1128199084713138, "grad_norm": 6.373837947845459, "learning_rate": 1.925228200464364e-05, "loss": 2.1546, "step": 17950 }, { "epoch": 0.11288276078801092, "grad_norm": 6.765232086181641, "learning_rate": 1.9251862903698984e-05, "loss": 2.4648, "step": 17960 }, { "epoch": 0.11294561310470803, "grad_norm": 6.509195804595947, "learning_rate": 1.925144380275433e-05, "loss": 2.0974, "step": 17970 }, { "epoch": 0.11300846542140514, "grad_norm": 6.901458263397217, "learning_rate": 1.925102470180968e-05, "loss": 2.0435, "step": 17980 }, { "epoch": 0.11307131773810225, "grad_norm": 7.925265789031982, "learning_rate": 1.9250605600865026e-05, "loss": 2.3093, "step": 17990 }, { "epoch": 0.11313417005479937, "grad_norm": 7.479783058166504, "learning_rate": 1.9250186499920373e-05, "loss": 2.1798, "step": 18000 }, { "epoch": 0.11319702237149648, "grad_norm": 7.755842208862305, "learning_rate": 1.924976739897572e-05, "loss": 2.3681, "step": 18010 }, { "epoch": 0.11325987468819358, "grad_norm": 7.0234222412109375, "learning_rate": 1.9249348298031067e-05, "loss": 2.3122, "step": 18020 }, { "epoch": 0.1133227270048907, "grad_norm": 7.856392860412598, "learning_rate": 1.9248929197086414e-05, "loss": 2.1876, "step": 18030 }, { "epoch": 0.11338557932158781, "grad_norm": 7.414135456085205, "learning_rate": 1.924851009614176e-05, "loss": 2.4818, "step": 18040 }, { "epoch": 0.11344843163828491, "grad_norm": 7.981344223022461, "learning_rate": 1.9248090995197105e-05, "loss": 2.1543, "step": 18050 }, { "epoch": 0.11351128395498203, "grad_norm": 7.49269962310791, "learning_rate": 1.924767189425245e-05, "loss": 2.3239, "step": 18060 }, { "epoch": 0.11357413627167914, "grad_norm": 7.305643558502197, "learning_rate": 1.92472527933078e-05, "loss": 2.1973, "step": 18070 }, { "epoch": 0.11363698858837624, "grad_norm": 8.535238265991211, "learning_rate": 1.9246833692363146e-05, "loss": 2.2497, "step": 18080 }, { "epoch": 0.11369984090507336, "grad_norm": 7.401052474975586, "learning_rate": 1.924641459141849e-05, "loss": 2.1625, "step": 18090 }, { "epoch": 0.11376269322177048, "grad_norm": 8.050284385681152, "learning_rate": 1.9245995490473837e-05, "loss": 2.0429, "step": 18100 }, { "epoch": 0.11382554553846758, "grad_norm": 7.0560526847839355, "learning_rate": 1.9245576389529184e-05, "loss": 2.1123, "step": 18110 }, { "epoch": 0.11388839785516469, "grad_norm": 6.865621566772461, "learning_rate": 1.924515728858453e-05, "loss": 2.1157, "step": 18120 }, { "epoch": 0.1139512501718618, "grad_norm": 7.461267471313477, "learning_rate": 1.9244738187639878e-05, "loss": 2.3348, "step": 18130 }, { "epoch": 0.11401410248855891, "grad_norm": 8.42668342590332, "learning_rate": 1.924431908669522e-05, "loss": 2.3355, "step": 18140 }, { "epoch": 0.11407695480525602, "grad_norm": 8.142030715942383, "learning_rate": 1.924389998575057e-05, "loss": 2.0568, "step": 18150 }, { "epoch": 0.11413980712195314, "grad_norm": 7.327850818634033, "learning_rate": 1.9243480884805916e-05, "loss": 2.2661, "step": 18160 }, { "epoch": 0.11420265943865025, "grad_norm": 7.3329925537109375, "learning_rate": 1.9243061783861263e-05, "loss": 2.4782, "step": 18170 }, { "epoch": 0.11426551175534735, "grad_norm": 8.312920570373535, "learning_rate": 1.9242642682916606e-05, "loss": 2.6378, "step": 18180 }, { "epoch": 0.11432836407204447, "grad_norm": 12.4722900390625, "learning_rate": 1.9242223581971953e-05, "loss": 2.1063, "step": 18190 }, { "epoch": 0.11439121638874158, "grad_norm": 7.168670654296875, "learning_rate": 1.92418044810273e-05, "loss": 2.182, "step": 18200 }, { "epoch": 0.11445406870543869, "grad_norm": 8.269729614257812, "learning_rate": 1.9241385380082648e-05, "loss": 2.2661, "step": 18210 }, { "epoch": 0.1145169210221358, "grad_norm": 7.075867176055908, "learning_rate": 1.9240966279137995e-05, "loss": 2.243, "step": 18220 }, { "epoch": 0.11457977333883292, "grad_norm": 9.278741836547852, "learning_rate": 1.9240547178193342e-05, "loss": 2.0635, "step": 18230 }, { "epoch": 0.11464262565553002, "grad_norm": 8.0344820022583, "learning_rate": 1.924012807724869e-05, "loss": 2.2158, "step": 18240 }, { "epoch": 0.11470547797222713, "grad_norm": 8.224825859069824, "learning_rate": 1.9239708976304036e-05, "loss": 2.2555, "step": 18250 }, { "epoch": 0.11476833028892425, "grad_norm": 6.739234924316406, "learning_rate": 1.9239289875359383e-05, "loss": 1.9965, "step": 18260 }, { "epoch": 0.11483118260562135, "grad_norm": 8.875475883483887, "learning_rate": 1.9238870774414727e-05, "loss": 2.1516, "step": 18270 }, { "epoch": 0.11489403492231846, "grad_norm": 7.0612897872924805, "learning_rate": 1.9238451673470074e-05, "loss": 2.1763, "step": 18280 }, { "epoch": 0.11495688723901558, "grad_norm": 7.863101959228516, "learning_rate": 1.923803257252542e-05, "loss": 2.2357, "step": 18290 }, { "epoch": 0.11501973955571268, "grad_norm": 7.017388820648193, "learning_rate": 1.9237613471580768e-05, "loss": 2.2272, "step": 18300 }, { "epoch": 0.1150825918724098, "grad_norm": 9.14671802520752, "learning_rate": 1.923719437063611e-05, "loss": 2.2746, "step": 18310 }, { "epoch": 0.11514544418910691, "grad_norm": 8.308667182922363, "learning_rate": 1.923677526969146e-05, "loss": 2.016, "step": 18320 }, { "epoch": 0.11520829650580403, "grad_norm": 6.846327304840088, "learning_rate": 1.9236356168746806e-05, "loss": 2.1404, "step": 18330 }, { "epoch": 0.11527114882250113, "grad_norm": 8.261704444885254, "learning_rate": 1.9235937067802153e-05, "loss": 2.3775, "step": 18340 }, { "epoch": 0.11533400113919824, "grad_norm": 7.210214614868164, "learning_rate": 1.92355179668575e-05, "loss": 2.2635, "step": 18350 }, { "epoch": 0.11539685345589536, "grad_norm": 7.188510417938232, "learning_rate": 1.9235098865912843e-05, "loss": 2.4214, "step": 18360 }, { "epoch": 0.11545970577259246, "grad_norm": 7.944930553436279, "learning_rate": 1.923467976496819e-05, "loss": 2.1332, "step": 18370 }, { "epoch": 0.11552255808928957, "grad_norm": 8.27840805053711, "learning_rate": 1.9234260664023538e-05, "loss": 2.1003, "step": 18380 }, { "epoch": 0.11558541040598669, "grad_norm": 6.786845684051514, "learning_rate": 1.9233841563078885e-05, "loss": 2.1307, "step": 18390 }, { "epoch": 0.11564826272268379, "grad_norm": 7.634956359863281, "learning_rate": 1.9233422462134232e-05, "loss": 2.1637, "step": 18400 }, { "epoch": 0.1157111150393809, "grad_norm": 7.450354099273682, "learning_rate": 1.923300336118958e-05, "loss": 2.2814, "step": 18410 }, { "epoch": 0.11577396735607802, "grad_norm": 8.3748779296875, "learning_rate": 1.9232584260244923e-05, "loss": 2.5221, "step": 18420 }, { "epoch": 0.11583681967277512, "grad_norm": 7.9941558837890625, "learning_rate": 1.923216515930027e-05, "loss": 2.0243, "step": 18430 }, { "epoch": 0.11589967198947224, "grad_norm": 8.738197326660156, "learning_rate": 1.9231746058355617e-05, "loss": 2.0117, "step": 18440 }, { "epoch": 0.11596252430616935, "grad_norm": 7.011739253997803, "learning_rate": 1.9231326957410964e-05, "loss": 2.012, "step": 18450 }, { "epoch": 0.11602537662286645, "grad_norm": 7.023013114929199, "learning_rate": 1.923090785646631e-05, "loss": 2.0715, "step": 18460 }, { "epoch": 0.11608822893956357, "grad_norm": 7.268269062042236, "learning_rate": 1.9230488755521658e-05, "loss": 2.1794, "step": 18470 }, { "epoch": 0.11615108125626068, "grad_norm": 9.136162757873535, "learning_rate": 1.9230069654577005e-05, "loss": 2.2741, "step": 18480 }, { "epoch": 0.11621393357295778, "grad_norm": 7.9868621826171875, "learning_rate": 1.922965055363235e-05, "loss": 2.1819, "step": 18490 }, { "epoch": 0.1162767858896549, "grad_norm": 7.73309326171875, "learning_rate": 1.9229231452687696e-05, "loss": 2.4263, "step": 18500 }, { "epoch": 0.11633963820635201, "grad_norm": 7.7979350090026855, "learning_rate": 1.9228812351743043e-05, "loss": 2.4283, "step": 18510 }, { "epoch": 0.11640249052304913, "grad_norm": 6.465146541595459, "learning_rate": 1.922839325079839e-05, "loss": 1.9808, "step": 18520 }, { "epoch": 0.11646534283974623, "grad_norm": 8.1764554977417, "learning_rate": 1.9227974149853737e-05, "loss": 2.1467, "step": 18530 }, { "epoch": 0.11652819515644335, "grad_norm": 7.2224273681640625, "learning_rate": 1.922755504890908e-05, "loss": 2.1513, "step": 18540 }, { "epoch": 0.11659104747314046, "grad_norm": 8.304801940917969, "learning_rate": 1.9227135947964428e-05, "loss": 2.0873, "step": 18550 }, { "epoch": 0.11665389978983756, "grad_norm": 7.494878768920898, "learning_rate": 1.9226716847019775e-05, "loss": 2.1896, "step": 18560 }, { "epoch": 0.11671675210653468, "grad_norm": 6.850111484527588, "learning_rate": 1.9226297746075122e-05, "loss": 1.9097, "step": 18570 }, { "epoch": 0.11677960442323179, "grad_norm": 6.223687171936035, "learning_rate": 1.9225878645130465e-05, "loss": 2.0755, "step": 18580 }, { "epoch": 0.1168424567399289, "grad_norm": 6.9537200927734375, "learning_rate": 1.9225459544185813e-05, "loss": 2.3503, "step": 18590 }, { "epoch": 0.11690530905662601, "grad_norm": 8.668498039245605, "learning_rate": 1.922504044324116e-05, "loss": 2.2197, "step": 18600 }, { "epoch": 0.11696816137332312, "grad_norm": 8.086195945739746, "learning_rate": 1.9224621342296507e-05, "loss": 2.2773, "step": 18610 }, { "epoch": 0.11703101369002022, "grad_norm": 9.474557876586914, "learning_rate": 1.9224202241351854e-05, "loss": 2.3185, "step": 18620 }, { "epoch": 0.11709386600671734, "grad_norm": 9.221257209777832, "learning_rate": 1.92237831404072e-05, "loss": 2.3129, "step": 18630 }, { "epoch": 0.11715671832341445, "grad_norm": 7.471948146820068, "learning_rate": 1.9223364039462548e-05, "loss": 2.0476, "step": 18640 }, { "epoch": 0.11721957064011156, "grad_norm": 8.019757270812988, "learning_rate": 1.9222944938517895e-05, "loss": 2.161, "step": 18650 }, { "epoch": 0.11728242295680867, "grad_norm": 7.667059421539307, "learning_rate": 1.9222525837573242e-05, "loss": 2.1936, "step": 18660 }, { "epoch": 0.11734527527350579, "grad_norm": 8.694701194763184, "learning_rate": 1.9222106736628586e-05, "loss": 2.4176, "step": 18670 }, { "epoch": 0.1174081275902029, "grad_norm": 7.578640937805176, "learning_rate": 1.9221687635683933e-05, "loss": 1.9401, "step": 18680 }, { "epoch": 0.1174709799069, "grad_norm": 5.771047592163086, "learning_rate": 1.922126853473928e-05, "loss": 1.9679, "step": 18690 }, { "epoch": 0.11753383222359712, "grad_norm": 7.293246269226074, "learning_rate": 1.9220849433794627e-05, "loss": 2.3527, "step": 18700 }, { "epoch": 0.11759668454029423, "grad_norm": 7.9927873611450195, "learning_rate": 1.922043033284997e-05, "loss": 2.2599, "step": 18710 }, { "epoch": 0.11765953685699133, "grad_norm": 7.356894493103027, "learning_rate": 1.9220011231905318e-05, "loss": 2.3567, "step": 18720 }, { "epoch": 0.11772238917368845, "grad_norm": 6.663243770599365, "learning_rate": 1.9219592130960665e-05, "loss": 2.0537, "step": 18730 }, { "epoch": 0.11778524149038556, "grad_norm": 6.652327537536621, "learning_rate": 1.9219173030016012e-05, "loss": 2.2665, "step": 18740 }, { "epoch": 0.11784809380708267, "grad_norm": 7.32041597366333, "learning_rate": 1.921875392907136e-05, "loss": 2.1582, "step": 18750 }, { "epoch": 0.11791094612377978, "grad_norm": 7.6705451011657715, "learning_rate": 1.9218334828126703e-05, "loss": 2.3782, "step": 18760 }, { "epoch": 0.1179737984404769, "grad_norm": 8.588958740234375, "learning_rate": 1.921791572718205e-05, "loss": 2.1552, "step": 18770 }, { "epoch": 0.118036650757174, "grad_norm": 8.211400032043457, "learning_rate": 1.9217496626237397e-05, "loss": 2.1711, "step": 18780 }, { "epoch": 0.11809950307387111, "grad_norm": 6.705261707305908, "learning_rate": 1.9217077525292744e-05, "loss": 2.1008, "step": 18790 }, { "epoch": 0.11816235539056823, "grad_norm": 8.6095609664917, "learning_rate": 1.9216658424348087e-05, "loss": 2.2753, "step": 18800 }, { "epoch": 0.11822520770726533, "grad_norm": 6.959962844848633, "learning_rate": 1.9216239323403435e-05, "loss": 2.0305, "step": 18810 }, { "epoch": 0.11828806002396244, "grad_norm": 8.471665382385254, "learning_rate": 1.921582022245878e-05, "loss": 2.1081, "step": 18820 }, { "epoch": 0.11835091234065956, "grad_norm": 8.152358055114746, "learning_rate": 1.921540112151413e-05, "loss": 2.3344, "step": 18830 }, { "epoch": 0.11841376465735667, "grad_norm": 6.483791828155518, "learning_rate": 1.9214982020569476e-05, "loss": 2.1609, "step": 18840 }, { "epoch": 0.11847661697405378, "grad_norm": 7.693551540374756, "learning_rate": 1.9214562919624823e-05, "loss": 2.0621, "step": 18850 }, { "epoch": 0.11853946929075089, "grad_norm": 7.567829608917236, "learning_rate": 1.921414381868017e-05, "loss": 2.169, "step": 18860 }, { "epoch": 0.118602321607448, "grad_norm": 6.230541706085205, "learning_rate": 1.9213724717735517e-05, "loss": 2.0434, "step": 18870 }, { "epoch": 0.1186651739241451, "grad_norm": 7.5944976806640625, "learning_rate": 1.9213305616790864e-05, "loss": 2.2748, "step": 18880 }, { "epoch": 0.11872802624084222, "grad_norm": 9.224638938903809, "learning_rate": 1.9212886515846208e-05, "loss": 2.1078, "step": 18890 }, { "epoch": 0.11879087855753934, "grad_norm": 7.6371750831604, "learning_rate": 1.9212467414901555e-05, "loss": 2.1176, "step": 18900 }, { "epoch": 0.11885373087423644, "grad_norm": 8.238876342773438, "learning_rate": 1.9212048313956902e-05, "loss": 2.1418, "step": 18910 }, { "epoch": 0.11891658319093355, "grad_norm": 9.216188430786133, "learning_rate": 1.921162921301225e-05, "loss": 2.2832, "step": 18920 }, { "epoch": 0.11897943550763067, "grad_norm": 7.281813621520996, "learning_rate": 1.9211210112067593e-05, "loss": 2.1489, "step": 18930 }, { "epoch": 0.11904228782432777, "grad_norm": 7.572481632232666, "learning_rate": 1.921079101112294e-05, "loss": 2.4332, "step": 18940 }, { "epoch": 0.11910514014102488, "grad_norm": 7.893809795379639, "learning_rate": 1.9210371910178287e-05, "loss": 2.4696, "step": 18950 }, { "epoch": 0.119167992457722, "grad_norm": 7.050575256347656, "learning_rate": 1.9209952809233634e-05, "loss": 2.162, "step": 18960 }, { "epoch": 0.1192308447744191, "grad_norm": 8.660941123962402, "learning_rate": 1.920953370828898e-05, "loss": 2.413, "step": 18970 }, { "epoch": 0.11929369709111622, "grad_norm": 8.012182235717773, "learning_rate": 1.9209114607344325e-05, "loss": 2.3525, "step": 18980 }, { "epoch": 0.11935654940781333, "grad_norm": 7.362180233001709, "learning_rate": 1.920869550639967e-05, "loss": 2.3946, "step": 18990 }, { "epoch": 0.11941940172451045, "grad_norm": 7.550148010253906, "learning_rate": 1.920827640545502e-05, "loss": 2.1124, "step": 19000 }, { "epoch": 0.11948225404120755, "grad_norm": 7.317694664001465, "learning_rate": 1.9207857304510366e-05, "loss": 2.3389, "step": 19010 }, { "epoch": 0.11954510635790466, "grad_norm": 9.328289031982422, "learning_rate": 1.9207438203565713e-05, "loss": 2.1496, "step": 19020 }, { "epoch": 0.11960795867460178, "grad_norm": 8.249361038208008, "learning_rate": 1.920701910262106e-05, "loss": 2.2233, "step": 19030 }, { "epoch": 0.11967081099129888, "grad_norm": 7.633471965789795, "learning_rate": 1.9206600001676407e-05, "loss": 2.2853, "step": 19040 }, { "epoch": 0.119733663307996, "grad_norm": 9.351219177246094, "learning_rate": 1.920618090073175e-05, "loss": 2.2397, "step": 19050 }, { "epoch": 0.11979651562469311, "grad_norm": 14.492135047912598, "learning_rate": 1.9205761799787098e-05, "loss": 2.0129, "step": 19060 }, { "epoch": 0.11985936794139021, "grad_norm": 7.919447898864746, "learning_rate": 1.9205342698842445e-05, "loss": 2.1271, "step": 19070 }, { "epoch": 0.11992222025808733, "grad_norm": 7.8114142417907715, "learning_rate": 1.9204923597897792e-05, "loss": 2.0951, "step": 19080 }, { "epoch": 0.11998507257478444, "grad_norm": 7.433046817779541, "learning_rate": 1.920450449695314e-05, "loss": 1.9276, "step": 19090 }, { "epoch": 0.12004792489148154, "grad_norm": 7.817944526672363, "learning_rate": 1.9204085396008486e-05, "loss": 2.0837, "step": 19100 }, { "epoch": 0.12011077720817866, "grad_norm": 7.712526321411133, "learning_rate": 1.920366629506383e-05, "loss": 2.1003, "step": 19110 }, { "epoch": 0.12017362952487577, "grad_norm": 7.097597599029541, "learning_rate": 1.9203247194119177e-05, "loss": 2.238, "step": 19120 }, { "epoch": 0.12023648184157287, "grad_norm": 8.20934009552002, "learning_rate": 1.9202828093174524e-05, "loss": 2.1962, "step": 19130 }, { "epoch": 0.12029933415826999, "grad_norm": 9.129846572875977, "learning_rate": 1.920240899222987e-05, "loss": 2.2505, "step": 19140 }, { "epoch": 0.1203621864749671, "grad_norm": 7.770500183105469, "learning_rate": 1.9201989891285218e-05, "loss": 1.9644, "step": 19150 }, { "epoch": 0.12042503879166422, "grad_norm": 8.032506942749023, "learning_rate": 1.920157079034056e-05, "loss": 2.3757, "step": 19160 }, { "epoch": 0.12048789110836132, "grad_norm": 7.227913856506348, "learning_rate": 1.920115168939591e-05, "loss": 2.269, "step": 19170 }, { "epoch": 0.12055074342505843, "grad_norm": 9.362936973571777, "learning_rate": 1.9200732588451256e-05, "loss": 2.0911, "step": 19180 }, { "epoch": 0.12061359574175555, "grad_norm": 9.266836166381836, "learning_rate": 1.9200313487506603e-05, "loss": 2.0499, "step": 19190 }, { "epoch": 0.12067644805845265, "grad_norm": 8.412091255187988, "learning_rate": 1.9199894386561947e-05, "loss": 2.3619, "step": 19200 }, { "epoch": 0.12073930037514977, "grad_norm": 7.589006423950195, "learning_rate": 1.9199475285617294e-05, "loss": 2.4516, "step": 19210 }, { "epoch": 0.12080215269184688, "grad_norm": 8.433143615722656, "learning_rate": 1.919905618467264e-05, "loss": 2.1688, "step": 19220 }, { "epoch": 0.12086500500854398, "grad_norm": 6.455695152282715, "learning_rate": 1.9198637083727988e-05, "loss": 2.293, "step": 19230 }, { "epoch": 0.1209278573252411, "grad_norm": 8.669682502746582, "learning_rate": 1.9198217982783335e-05, "loss": 2.0135, "step": 19240 }, { "epoch": 0.12099070964193821, "grad_norm": 6.809682369232178, "learning_rate": 1.9197798881838682e-05, "loss": 2.1588, "step": 19250 }, { "epoch": 0.12105356195863531, "grad_norm": 7.939022064208984, "learning_rate": 1.919737978089403e-05, "loss": 2.4309, "step": 19260 }, { "epoch": 0.12111641427533243, "grad_norm": 8.199952125549316, "learning_rate": 1.9196960679949376e-05, "loss": 2.3256, "step": 19270 }, { "epoch": 0.12117926659202954, "grad_norm": 7.057774543762207, "learning_rate": 1.9196541579004723e-05, "loss": 2.1621, "step": 19280 }, { "epoch": 0.12124211890872665, "grad_norm": 6.977524280548096, "learning_rate": 1.9196122478060067e-05, "loss": 2.2381, "step": 19290 }, { "epoch": 0.12130497122542376, "grad_norm": 7.7442216873168945, "learning_rate": 1.9195703377115414e-05, "loss": 2.3605, "step": 19300 }, { "epoch": 0.12136782354212088, "grad_norm": 8.420924186706543, "learning_rate": 1.919528427617076e-05, "loss": 2.2136, "step": 19310 }, { "epoch": 0.12143067585881798, "grad_norm": 6.575891971588135, "learning_rate": 1.9194865175226108e-05, "loss": 2.122, "step": 19320 }, { "epoch": 0.12149352817551509, "grad_norm": 8.827194213867188, "learning_rate": 1.9194446074281452e-05, "loss": 2.2611, "step": 19330 }, { "epoch": 0.12155638049221221, "grad_norm": 6.7283525466918945, "learning_rate": 1.91940269733368e-05, "loss": 2.1378, "step": 19340 }, { "epoch": 0.12161923280890932, "grad_norm": 7.744928359985352, "learning_rate": 1.9193607872392146e-05, "loss": 2.1025, "step": 19350 }, { "epoch": 0.12168208512560642, "grad_norm": 7.292446136474609, "learning_rate": 1.9193188771447493e-05, "loss": 2.0431, "step": 19360 }, { "epoch": 0.12174493744230354, "grad_norm": 8.067106246948242, "learning_rate": 1.919276967050284e-05, "loss": 2.0828, "step": 19370 }, { "epoch": 0.12180778975900065, "grad_norm": 7.035076141357422, "learning_rate": 1.9192350569558184e-05, "loss": 2.1966, "step": 19380 }, { "epoch": 0.12187064207569775, "grad_norm": 8.168293952941895, "learning_rate": 1.919193146861353e-05, "loss": 2.3357, "step": 19390 }, { "epoch": 0.12193349439239487, "grad_norm": 7.839327335357666, "learning_rate": 1.9191512367668878e-05, "loss": 2.0244, "step": 19400 }, { "epoch": 0.12199634670909199, "grad_norm": 7.90061092376709, "learning_rate": 1.9191093266724225e-05, "loss": 2.1978, "step": 19410 }, { "epoch": 0.12205919902578909, "grad_norm": 7.999143600463867, "learning_rate": 1.9190674165779572e-05, "loss": 2.2847, "step": 19420 }, { "epoch": 0.1221220513424862, "grad_norm": 8.394352912902832, "learning_rate": 1.9190255064834916e-05, "loss": 2.2628, "step": 19430 }, { "epoch": 0.12218490365918332, "grad_norm": 7.913904190063477, "learning_rate": 1.9189835963890263e-05, "loss": 2.064, "step": 19440 }, { "epoch": 0.12224775597588042, "grad_norm": 7.781804084777832, "learning_rate": 1.918941686294561e-05, "loss": 2.0578, "step": 19450 }, { "epoch": 0.12231060829257753, "grad_norm": 8.752440452575684, "learning_rate": 1.9188997762000957e-05, "loss": 2.3629, "step": 19460 }, { "epoch": 0.12237346060927465, "grad_norm": 6.962963104248047, "learning_rate": 1.9188578661056304e-05, "loss": 2.1903, "step": 19470 }, { "epoch": 0.12243631292597175, "grad_norm": 7.6390862464904785, "learning_rate": 1.918815956011165e-05, "loss": 2.0745, "step": 19480 }, { "epoch": 0.12249916524266886, "grad_norm": 7.179372310638428, "learning_rate": 1.9187740459166998e-05, "loss": 2.3104, "step": 19490 }, { "epoch": 0.12256201755936598, "grad_norm": 7.717180252075195, "learning_rate": 1.9187321358222345e-05, "loss": 2.0472, "step": 19500 }, { "epoch": 0.1226248698760631, "grad_norm": 7.581270217895508, "learning_rate": 1.918690225727769e-05, "loss": 2.2385, "step": 19510 }, { "epoch": 0.1226877221927602, "grad_norm": 7.664039611816406, "learning_rate": 1.9186483156333036e-05, "loss": 2.1226, "step": 19520 }, { "epoch": 0.12275057450945731, "grad_norm": 9.040067672729492, "learning_rate": 1.9186105965482847e-05, "loss": 2.1236, "step": 19530 }, { "epoch": 0.12281342682615443, "grad_norm": 7.814372539520264, "learning_rate": 1.9185686864538194e-05, "loss": 1.9381, "step": 19540 }, { "epoch": 0.12287627914285153, "grad_norm": 8.149017333984375, "learning_rate": 1.918526776359354e-05, "loss": 2.1876, "step": 19550 }, { "epoch": 0.12293913145954864, "grad_norm": 7.6349310874938965, "learning_rate": 1.918484866264889e-05, "loss": 2.2091, "step": 19560 }, { "epoch": 0.12300198377624576, "grad_norm": 8.281112670898438, "learning_rate": 1.9184429561704236e-05, "loss": 2.1029, "step": 19570 }, { "epoch": 0.12306483609294286, "grad_norm": 7.725566387176514, "learning_rate": 1.9184010460759583e-05, "loss": 2.0583, "step": 19580 }, { "epoch": 0.12312768840963997, "grad_norm": 8.579814910888672, "learning_rate": 1.9183591359814926e-05, "loss": 2.4456, "step": 19590 }, { "epoch": 0.12319054072633709, "grad_norm": 9.688976287841797, "learning_rate": 1.9183172258870273e-05, "loss": 2.2219, "step": 19600 }, { "epoch": 0.12325339304303419, "grad_norm": 6.544898986816406, "learning_rate": 1.918275315792562e-05, "loss": 2.0028, "step": 19610 }, { "epoch": 0.1233162453597313, "grad_norm": 8.4871244430542, "learning_rate": 1.9182334056980967e-05, "loss": 2.2076, "step": 19620 }, { "epoch": 0.12337909767642842, "grad_norm": 6.6899518966674805, "learning_rate": 1.918191495603631e-05, "loss": 2.4654, "step": 19630 }, { "epoch": 0.12344194999312552, "grad_norm": 6.7486982345581055, "learning_rate": 1.9181495855091658e-05, "loss": 2.347, "step": 19640 }, { "epoch": 0.12350480230982264, "grad_norm": 8.075026512145996, "learning_rate": 1.9181076754147005e-05, "loss": 2.0383, "step": 19650 }, { "epoch": 0.12356765462651975, "grad_norm": 8.204153060913086, "learning_rate": 1.9180657653202352e-05, "loss": 2.2389, "step": 19660 }, { "epoch": 0.12363050694321687, "grad_norm": 7.5068230628967285, "learning_rate": 1.9180238552257696e-05, "loss": 1.9144, "step": 19670 }, { "epoch": 0.12369335925991397, "grad_norm": 7.916260242462158, "learning_rate": 1.9179819451313043e-05, "loss": 2.095, "step": 19680 }, { "epoch": 0.12375621157661108, "grad_norm": 7.666872501373291, "learning_rate": 1.917940035036839e-05, "loss": 2.2128, "step": 19690 }, { "epoch": 0.1238190638933082, "grad_norm": 7.435920238494873, "learning_rate": 1.9178981249423737e-05, "loss": 2.0676, "step": 19700 }, { "epoch": 0.1238819162100053, "grad_norm": 6.545831680297852, "learning_rate": 1.9178562148479084e-05, "loss": 2.1508, "step": 19710 }, { "epoch": 0.12394476852670241, "grad_norm": 7.308215618133545, "learning_rate": 1.917814304753443e-05, "loss": 2.1635, "step": 19720 }, { "epoch": 0.12400762084339953, "grad_norm": 7.002355098724365, "learning_rate": 1.917772394658978e-05, "loss": 2.0521, "step": 19730 }, { "epoch": 0.12407047316009663, "grad_norm": 7.835935592651367, "learning_rate": 1.9177304845645122e-05, "loss": 2.0145, "step": 19740 }, { "epoch": 0.12413332547679375, "grad_norm": 7.6545586585998535, "learning_rate": 1.917688574470047e-05, "loss": 2.123, "step": 19750 }, { "epoch": 0.12419617779349086, "grad_norm": 8.699694633483887, "learning_rate": 1.9176466643755816e-05, "loss": 2.1843, "step": 19760 }, { "epoch": 0.12425903011018796, "grad_norm": 7.266550064086914, "learning_rate": 1.9176047542811163e-05, "loss": 2.3133, "step": 19770 }, { "epoch": 0.12432188242688508, "grad_norm": 5.948155879974365, "learning_rate": 1.917562844186651e-05, "loss": 2.1415, "step": 19780 }, { "epoch": 0.12438473474358219, "grad_norm": 8.048948287963867, "learning_rate": 1.9175209340921858e-05, "loss": 2.0368, "step": 19790 }, { "epoch": 0.1244475870602793, "grad_norm": 7.991379261016846, "learning_rate": 1.9174790239977205e-05, "loss": 2.1586, "step": 19800 }, { "epoch": 0.12451043937697641, "grad_norm": 6.790831565856934, "learning_rate": 1.9174371139032548e-05, "loss": 1.9571, "step": 19810 }, { "epoch": 0.12457329169367352, "grad_norm": 8.452643394470215, "learning_rate": 1.9173952038087895e-05, "loss": 1.9298, "step": 19820 }, { "epoch": 0.12463614401037064, "grad_norm": 8.183903694152832, "learning_rate": 1.9173532937143242e-05, "loss": 2.3263, "step": 19830 }, { "epoch": 0.12469899632706774, "grad_norm": 7.026977062225342, "learning_rate": 1.917311383619859e-05, "loss": 2.0561, "step": 19840 }, { "epoch": 0.12476184864376486, "grad_norm": 7.824635982513428, "learning_rate": 1.9172694735253933e-05, "loss": 2.0271, "step": 19850 }, { "epoch": 0.12482470096046197, "grad_norm": 7.062274932861328, "learning_rate": 1.917227563430928e-05, "loss": 2.2787, "step": 19860 }, { "epoch": 0.12488755327715907, "grad_norm": 7.3444294929504395, "learning_rate": 1.9171856533364627e-05, "loss": 2.2993, "step": 19870 }, { "epoch": 0.12495040559385619, "grad_norm": 7.4288740158081055, "learning_rate": 1.9171437432419974e-05, "loss": 1.8584, "step": 19880 }, { "epoch": 0.1250132579105533, "grad_norm": 7.470634937286377, "learning_rate": 1.917101833147532e-05, "loss": 2.0709, "step": 19890 }, { "epoch": 0.1250761102272504, "grad_norm": 7.890097618103027, "learning_rate": 1.9170599230530665e-05, "loss": 2.0735, "step": 19900 }, { "epoch": 0.12513896254394752, "grad_norm": 8.124094009399414, "learning_rate": 1.9170180129586012e-05, "loss": 2.2584, "step": 19910 }, { "epoch": 0.12520181486064463, "grad_norm": 7.2056474685668945, "learning_rate": 1.916976102864136e-05, "loss": 2.0531, "step": 19920 }, { "epoch": 0.12526466717734175, "grad_norm": 7.099191188812256, "learning_rate": 1.9169341927696706e-05, "loss": 2.2185, "step": 19930 }, { "epoch": 0.12532751949403886, "grad_norm": 8.294858932495117, "learning_rate": 1.9168922826752053e-05, "loss": 2.173, "step": 19940 }, { "epoch": 0.12539037181073595, "grad_norm": 8.263566017150879, "learning_rate": 1.91685037258074e-05, "loss": 2.2138, "step": 19950 }, { "epoch": 0.12545322412743307, "grad_norm": 7.2847418785095215, "learning_rate": 1.9168084624862748e-05, "loss": 2.2492, "step": 19960 }, { "epoch": 0.12551607644413018, "grad_norm": 7.45059061050415, "learning_rate": 1.9167665523918095e-05, "loss": 2.3967, "step": 19970 }, { "epoch": 0.1255789287608273, "grad_norm": 8.305978775024414, "learning_rate": 1.916724642297344e-05, "loss": 2.075, "step": 19980 }, { "epoch": 0.1256417810775244, "grad_norm": 7.666145324707031, "learning_rate": 1.9166827322028785e-05, "loss": 2.3947, "step": 19990 }, { "epoch": 0.12570463339422153, "grad_norm": 6.912258148193359, "learning_rate": 1.9166408221084132e-05, "loss": 2.2309, "step": 20000 }, { "epoch": 0.12576748571091861, "grad_norm": 7.005916595458984, "learning_rate": 1.916598912013948e-05, "loss": 2.3093, "step": 20010 }, { "epoch": 0.12583033802761573, "grad_norm": 7.088340759277344, "learning_rate": 1.9165570019194827e-05, "loss": 2.3978, "step": 20020 }, { "epoch": 0.12589319034431284, "grad_norm": 7.318819046020508, "learning_rate": 1.916515091825017e-05, "loss": 1.9451, "step": 20030 }, { "epoch": 0.12595604266100996, "grad_norm": 7.074946880340576, "learning_rate": 1.9164731817305517e-05, "loss": 2.2586, "step": 20040 }, { "epoch": 0.12601889497770707, "grad_norm": 6.7742533683776855, "learning_rate": 1.9164312716360864e-05, "loss": 2.0808, "step": 20050 }, { "epoch": 0.1260817472944042, "grad_norm": 8.263649940490723, "learning_rate": 1.916389361541621e-05, "loss": 2.1158, "step": 20060 }, { "epoch": 0.12614459961110128, "grad_norm": 7.028926372528076, "learning_rate": 1.9163474514471555e-05, "loss": 2.1163, "step": 20070 }, { "epoch": 0.1262074519277984, "grad_norm": 6.812427043914795, "learning_rate": 1.9163055413526902e-05, "loss": 1.9023, "step": 20080 }, { "epoch": 0.1262703042444955, "grad_norm": 14.464795112609863, "learning_rate": 1.916263631258225e-05, "loss": 2.2503, "step": 20090 }, { "epoch": 0.12633315656119262, "grad_norm": 7.922114372253418, "learning_rate": 1.9162217211637596e-05, "loss": 2.1397, "step": 20100 }, { "epoch": 0.12639600887788974, "grad_norm": 6.583799362182617, "learning_rate": 1.9161798110692943e-05, "loss": 1.8107, "step": 20110 }, { "epoch": 0.12645886119458685, "grad_norm": 6.8187432289123535, "learning_rate": 1.9161379009748287e-05, "loss": 2.1158, "step": 20120 }, { "epoch": 0.12652171351128397, "grad_norm": 8.50075912475586, "learning_rate": 1.9160959908803634e-05, "loss": 2.341, "step": 20130 }, { "epoch": 0.12658456582798105, "grad_norm": 8.930004119873047, "learning_rate": 1.916054080785898e-05, "loss": 2.3068, "step": 20140 }, { "epoch": 0.12664741814467817, "grad_norm": 7.269128799438477, "learning_rate": 1.916012170691433e-05, "loss": 1.9076, "step": 20150 }, { "epoch": 0.12671027046137529, "grad_norm": 7.562623977661133, "learning_rate": 1.9159702605969675e-05, "loss": 2.26, "step": 20160 }, { "epoch": 0.1267731227780724, "grad_norm": 9.525056838989258, "learning_rate": 1.9159283505025022e-05, "loss": 2.3676, "step": 20170 }, { "epoch": 0.12683597509476952, "grad_norm": 7.904518127441406, "learning_rate": 1.915886440408037e-05, "loss": 2.291, "step": 20180 }, { "epoch": 0.12689882741146663, "grad_norm": 8.256309509277344, "learning_rate": 1.9158445303135717e-05, "loss": 2.0052, "step": 20190 }, { "epoch": 0.12696167972816372, "grad_norm": 8.02798080444336, "learning_rate": 1.9158026202191064e-05, "loss": 2.4553, "step": 20200 }, { "epoch": 0.12702453204486083, "grad_norm": 7.898191928863525, "learning_rate": 1.9157607101246407e-05, "loss": 2.126, "step": 20210 }, { "epoch": 0.12708738436155795, "grad_norm": 6.945441246032715, "learning_rate": 1.9157188000301754e-05, "loss": 2.0418, "step": 20220 }, { "epoch": 0.12715023667825506, "grad_norm": 6.388503074645996, "learning_rate": 1.91567688993571e-05, "loss": 2.0764, "step": 20230 }, { "epoch": 0.12721308899495218, "grad_norm": 8.389837265014648, "learning_rate": 1.915634979841245e-05, "loss": 2.1635, "step": 20240 }, { "epoch": 0.1272759413116493, "grad_norm": 7.304172992706299, "learning_rate": 1.9155930697467792e-05, "loss": 2.0955, "step": 20250 }, { "epoch": 0.1273387936283464, "grad_norm": 7.5525031089782715, "learning_rate": 1.915551159652314e-05, "loss": 2.2257, "step": 20260 }, { "epoch": 0.1274016459450435, "grad_norm": 7.935617446899414, "learning_rate": 1.9155092495578486e-05, "loss": 2.252, "step": 20270 }, { "epoch": 0.1274644982617406, "grad_norm": 7.920311450958252, "learning_rate": 1.9154673394633833e-05, "loss": 2.1841, "step": 20280 }, { "epoch": 0.12752735057843773, "grad_norm": 7.139548301696777, "learning_rate": 1.9154254293689177e-05, "loss": 2.0093, "step": 20290 }, { "epoch": 0.12759020289513484, "grad_norm": 9.367383003234863, "learning_rate": 1.9153835192744524e-05, "loss": 2.1182, "step": 20300 }, { "epoch": 0.12765305521183196, "grad_norm": 7.5193328857421875, "learning_rate": 1.915341609179987e-05, "loss": 2.3039, "step": 20310 }, { "epoch": 0.12771590752852907, "grad_norm": 7.702613353729248, "learning_rate": 1.915299699085522e-05, "loss": 2.3208, "step": 20320 }, { "epoch": 0.12777875984522616, "grad_norm": 7.226729393005371, "learning_rate": 1.9152577889910565e-05, "loss": 2.3608, "step": 20330 }, { "epoch": 0.12784161216192327, "grad_norm": 8.596226692199707, "learning_rate": 1.9152158788965913e-05, "loss": 2.5536, "step": 20340 }, { "epoch": 0.1279044644786204, "grad_norm": 7.296021938323975, "learning_rate": 1.915173968802126e-05, "loss": 2.3818, "step": 20350 }, { "epoch": 0.1279673167953175, "grad_norm": 7.413475036621094, "learning_rate": 1.9151320587076607e-05, "loss": 2.1525, "step": 20360 }, { "epoch": 0.12803016911201462, "grad_norm": 7.288099765777588, "learning_rate": 1.915090148613195e-05, "loss": 2.1861, "step": 20370 }, { "epoch": 0.12809302142871173, "grad_norm": 8.350249290466309, "learning_rate": 1.9150482385187297e-05, "loss": 2.0005, "step": 20380 }, { "epoch": 0.12815587374540882, "grad_norm": 7.46809720993042, "learning_rate": 1.9150063284242644e-05, "loss": 2.027, "step": 20390 }, { "epoch": 0.12821872606210594, "grad_norm": 7.459334850311279, "learning_rate": 1.914964418329799e-05, "loss": 1.9684, "step": 20400 }, { "epoch": 0.12828157837880305, "grad_norm": 7.063770771026611, "learning_rate": 1.914922508235334e-05, "loss": 2.1614, "step": 20410 }, { "epoch": 0.12834443069550017, "grad_norm": 7.142075061798096, "learning_rate": 1.9148805981408686e-05, "loss": 2.2335, "step": 20420 }, { "epoch": 0.12840728301219728, "grad_norm": 7.43182897567749, "learning_rate": 1.914838688046403e-05, "loss": 2.0091, "step": 20430 }, { "epoch": 0.1284701353288944, "grad_norm": 7.314360618591309, "learning_rate": 1.9147967779519376e-05, "loss": 2.1786, "step": 20440 }, { "epoch": 0.1285329876455915, "grad_norm": 8.008227348327637, "learning_rate": 1.9147548678574724e-05, "loss": 2.0012, "step": 20450 }, { "epoch": 0.1285958399622886, "grad_norm": 9.939228057861328, "learning_rate": 1.914712957763007e-05, "loss": 2.0807, "step": 20460 }, { "epoch": 0.12865869227898571, "grad_norm": 6.3646697998046875, "learning_rate": 1.9146710476685414e-05, "loss": 2.0031, "step": 20470 }, { "epoch": 0.12872154459568283, "grad_norm": 7.245133876800537, "learning_rate": 1.914629137574076e-05, "loss": 2.2448, "step": 20480 }, { "epoch": 0.12878439691237994, "grad_norm": 6.556634902954102, "learning_rate": 1.914587227479611e-05, "loss": 2.0929, "step": 20490 }, { "epoch": 0.12884724922907706, "grad_norm": 7.852427005767822, "learning_rate": 1.9145453173851455e-05, "loss": 2.4566, "step": 20500 }, { "epoch": 0.12891010154577418, "grad_norm": 6.728714466094971, "learning_rate": 1.9145034072906803e-05, "loss": 2.282, "step": 20510 }, { "epoch": 0.12897295386247126, "grad_norm": 6.429920673370361, "learning_rate": 1.9144614971962146e-05, "loss": 2.0266, "step": 20520 }, { "epoch": 0.12903580617916838, "grad_norm": 7.419079780578613, "learning_rate": 1.9144195871017493e-05, "loss": 2.3467, "step": 20530 }, { "epoch": 0.1290986584958655, "grad_norm": 7.593814373016357, "learning_rate": 1.914377677007284e-05, "loss": 2.2039, "step": 20540 }, { "epoch": 0.1291615108125626, "grad_norm": 34.75748062133789, "learning_rate": 1.9143357669128187e-05, "loss": 2.3577, "step": 20550 }, { "epoch": 0.12922436312925972, "grad_norm": 8.49825668334961, "learning_rate": 1.9142938568183535e-05, "loss": 2.0563, "step": 20560 }, { "epoch": 0.12928721544595684, "grad_norm": 7.967658996582031, "learning_rate": 1.914251946723888e-05, "loss": 2.0841, "step": 20570 }, { "epoch": 0.12935006776265393, "grad_norm": 8.534242630004883, "learning_rate": 1.914210036629423e-05, "loss": 2.2224, "step": 20580 }, { "epoch": 0.12941292007935104, "grad_norm": 7.604470252990723, "learning_rate": 1.9141681265349576e-05, "loss": 2.0527, "step": 20590 }, { "epoch": 0.12947577239604816, "grad_norm": 8.015196800231934, "learning_rate": 1.9141262164404923e-05, "loss": 2.1757, "step": 20600 }, { "epoch": 0.12953862471274527, "grad_norm": 7.4464111328125, "learning_rate": 1.9140843063460266e-05, "loss": 2.1135, "step": 20610 }, { "epoch": 0.12960147702944239, "grad_norm": 6.638156414031982, "learning_rate": 1.9140423962515614e-05, "loss": 2.0033, "step": 20620 }, { "epoch": 0.1296643293461395, "grad_norm": 7.73036527633667, "learning_rate": 1.914000486157096e-05, "loss": 2.0447, "step": 20630 }, { "epoch": 0.12972718166283662, "grad_norm": 6.8829216957092285, "learning_rate": 1.9139585760626308e-05, "loss": 2.1127, "step": 20640 }, { "epoch": 0.1297900339795337, "grad_norm": 7.297530174255371, "learning_rate": 1.913916665968165e-05, "loss": 2.2805, "step": 20650 }, { "epoch": 0.12985288629623082, "grad_norm": 9.506532669067383, "learning_rate": 1.9138747558737e-05, "loss": 2.3277, "step": 20660 }, { "epoch": 0.12991573861292793, "grad_norm": 7.570408821105957, "learning_rate": 1.9138328457792346e-05, "loss": 2.0331, "step": 20670 }, { "epoch": 0.12997859092962505, "grad_norm": 6.746794700622559, "learning_rate": 1.9137909356847693e-05, "loss": 1.9404, "step": 20680 }, { "epoch": 0.13004144324632216, "grad_norm": 7.1945109367370605, "learning_rate": 1.9137490255903036e-05, "loss": 2.2295, "step": 20690 }, { "epoch": 0.13010429556301928, "grad_norm": 8.07593059539795, "learning_rate": 1.9137071154958383e-05, "loss": 2.2345, "step": 20700 }, { "epoch": 0.13016714787971637, "grad_norm": 7.70260763168335, "learning_rate": 1.913665205401373e-05, "loss": 2.3744, "step": 20710 }, { "epoch": 0.13023000019641348, "grad_norm": 7.805847644805908, "learning_rate": 1.9136232953069077e-05, "loss": 2.2995, "step": 20720 }, { "epoch": 0.1302928525131106, "grad_norm": 7.288343906402588, "learning_rate": 1.9135813852124425e-05, "loss": 2.2592, "step": 20730 }, { "epoch": 0.1303557048298077, "grad_norm": 8.316977500915527, "learning_rate": 1.913539475117977e-05, "loss": 2.5339, "step": 20740 }, { "epoch": 0.13041855714650483, "grad_norm": 7.602510452270508, "learning_rate": 1.9134975650235115e-05, "loss": 2.0968, "step": 20750 }, { "epoch": 0.13048140946320194, "grad_norm": 7.784472465515137, "learning_rate": 1.9134556549290462e-05, "loss": 1.9854, "step": 20760 }, { "epoch": 0.13054426177989906, "grad_norm": 6.507996559143066, "learning_rate": 1.913413744834581e-05, "loss": 2.0867, "step": 20770 }, { "epoch": 0.13060711409659614, "grad_norm": 8.400556564331055, "learning_rate": 1.9133718347401157e-05, "loss": 2.1331, "step": 20780 }, { "epoch": 0.13066996641329326, "grad_norm": 7.46922492980957, "learning_rate": 1.9133299246456504e-05, "loss": 2.0639, "step": 20790 }, { "epoch": 0.13073281872999037, "grad_norm": 6.9902215003967285, "learning_rate": 1.913288014551185e-05, "loss": 2.1429, "step": 20800 }, { "epoch": 0.1307956710466875, "grad_norm": 6.405163288116455, "learning_rate": 1.9132461044567198e-05, "loss": 2.2893, "step": 20810 }, { "epoch": 0.1308585233633846, "grad_norm": 9.020237922668457, "learning_rate": 1.9132041943622545e-05, "loss": 2.1135, "step": 20820 }, { "epoch": 0.13092137568008172, "grad_norm": 8.635034561157227, "learning_rate": 1.913162284267789e-05, "loss": 2.1556, "step": 20830 }, { "epoch": 0.1309842279967788, "grad_norm": 8.830340385437012, "learning_rate": 1.9131203741733236e-05, "loss": 2.3283, "step": 20840 }, { "epoch": 0.13104708031347592, "grad_norm": 8.042695045471191, "learning_rate": 1.9130784640788583e-05, "loss": 2.0542, "step": 20850 }, { "epoch": 0.13110993263017304, "grad_norm": 8.38254451751709, "learning_rate": 1.913036553984393e-05, "loss": 2.2536, "step": 20860 }, { "epoch": 0.13117278494687015, "grad_norm": 7.26369571685791, "learning_rate": 1.9129946438899273e-05, "loss": 2.4977, "step": 20870 }, { "epoch": 0.13123563726356727, "grad_norm": 7.492801666259766, "learning_rate": 1.912952733795462e-05, "loss": 2.1608, "step": 20880 }, { "epoch": 0.13129848958026438, "grad_norm": 8.494919776916504, "learning_rate": 1.9129108237009968e-05, "loss": 2.2264, "step": 20890 }, { "epoch": 0.13136134189696147, "grad_norm": 6.754851818084717, "learning_rate": 1.9128689136065315e-05, "loss": 2.052, "step": 20900 }, { "epoch": 0.13142419421365859, "grad_norm": 7.529036521911621, "learning_rate": 1.912827003512066e-05, "loss": 2.1838, "step": 20910 }, { "epoch": 0.1314870465303557, "grad_norm": 7.61317253112793, "learning_rate": 1.9127850934176005e-05, "loss": 2.3485, "step": 20920 }, { "epoch": 0.13154989884705282, "grad_norm": 7.339181423187256, "learning_rate": 1.9127431833231352e-05, "loss": 2.0842, "step": 20930 }, { "epoch": 0.13161275116374993, "grad_norm": 7.108869552612305, "learning_rate": 1.91270127322867e-05, "loss": 2.0695, "step": 20940 }, { "epoch": 0.13167560348044705, "grad_norm": 7.704329490661621, "learning_rate": 1.9126593631342047e-05, "loss": 2.1022, "step": 20950 }, { "epoch": 0.13173845579714416, "grad_norm": 9.51086711883545, "learning_rate": 1.9126174530397394e-05, "loss": 1.9662, "step": 20960 }, { "epoch": 0.13180130811384125, "grad_norm": 7.567604064941406, "learning_rate": 1.912575542945274e-05, "loss": 2.1206, "step": 20970 }, { "epoch": 0.13186416043053836, "grad_norm": 7.458498001098633, "learning_rate": 1.9125336328508088e-05, "loss": 1.9615, "step": 20980 }, { "epoch": 0.13192701274723548, "grad_norm": 7.41848087310791, "learning_rate": 1.9124917227563435e-05, "loss": 1.9544, "step": 20990 }, { "epoch": 0.1319898650639326, "grad_norm": 7.613551139831543, "learning_rate": 1.912449812661878e-05, "loss": 1.8618, "step": 21000 }, { "epoch": 0.1320527173806297, "grad_norm": 7.473199844360352, "learning_rate": 1.9124079025674126e-05, "loss": 2.0437, "step": 21010 }, { "epoch": 0.13211556969732682, "grad_norm": 6.8990478515625, "learning_rate": 1.9123659924729473e-05, "loss": 2.159, "step": 21020 }, { "epoch": 0.1321784220140239, "grad_norm": 8.175252914428711, "learning_rate": 1.912324082378482e-05, "loss": 1.9783, "step": 21030 }, { "epoch": 0.13224127433072103, "grad_norm": 6.731335639953613, "learning_rate": 1.9122821722840167e-05, "loss": 1.9808, "step": 21040 }, { "epoch": 0.13230412664741814, "grad_norm": 8.126704216003418, "learning_rate": 1.912240262189551e-05, "loss": 1.9968, "step": 21050 }, { "epoch": 0.13236697896411526, "grad_norm": 8.616935729980469, "learning_rate": 1.9121983520950858e-05, "loss": 1.9534, "step": 21060 }, { "epoch": 0.13242983128081237, "grad_norm": 8.366382598876953, "learning_rate": 1.9121564420006205e-05, "loss": 2.3624, "step": 21070 }, { "epoch": 0.1324926835975095, "grad_norm": 6.051831245422363, "learning_rate": 1.9121145319061552e-05, "loss": 1.7983, "step": 21080 }, { "epoch": 0.1325555359142066, "grad_norm": 6.883232593536377, "learning_rate": 1.9120726218116895e-05, "loss": 2.0166, "step": 21090 }, { "epoch": 0.1326183882309037, "grad_norm": 8.676856994628906, "learning_rate": 1.9120307117172242e-05, "loss": 2.0505, "step": 21100 }, { "epoch": 0.1326812405476008, "grad_norm": 7.5355987548828125, "learning_rate": 1.911988801622759e-05, "loss": 2.1162, "step": 21110 }, { "epoch": 0.13274409286429792, "grad_norm": 7.496070384979248, "learning_rate": 1.9119468915282937e-05, "loss": 2.1305, "step": 21120 }, { "epoch": 0.13280694518099503, "grad_norm": 8.2671537399292, "learning_rate": 1.9119049814338284e-05, "loss": 2.2587, "step": 21130 }, { "epoch": 0.13286979749769215, "grad_norm": 7.576817512512207, "learning_rate": 1.9118630713393627e-05, "loss": 2.0564, "step": 21140 }, { "epoch": 0.13293264981438926, "grad_norm": 7.535472869873047, "learning_rate": 1.9118211612448974e-05, "loss": 2.0817, "step": 21150 }, { "epoch": 0.13299550213108635, "grad_norm": 8.097841262817383, "learning_rate": 1.911779251150432e-05, "loss": 2.2972, "step": 21160 }, { "epoch": 0.13305835444778347, "grad_norm": 22.04595947265625, "learning_rate": 1.911737341055967e-05, "loss": 2.1941, "step": 21170 }, { "epoch": 0.13312120676448058, "grad_norm": 7.512133598327637, "learning_rate": 1.9116954309615016e-05, "loss": 2.1939, "step": 21180 }, { "epoch": 0.1331840590811777, "grad_norm": 7.298905849456787, "learning_rate": 1.9116535208670363e-05, "loss": 2.2007, "step": 21190 }, { "epoch": 0.1332469113978748, "grad_norm": 6.7467474937438965, "learning_rate": 1.911611610772571e-05, "loss": 2.2847, "step": 21200 }, { "epoch": 0.13330976371457193, "grad_norm": 7.777745246887207, "learning_rate": 1.9115697006781057e-05, "loss": 2.1764, "step": 21210 }, { "epoch": 0.13337261603126901, "grad_norm": 7.369444370269775, "learning_rate": 1.9115277905836404e-05, "loss": 2.0733, "step": 21220 }, { "epoch": 0.13343546834796613, "grad_norm": 7.144137382507324, "learning_rate": 1.9114858804891748e-05, "loss": 1.9866, "step": 21230 }, { "epoch": 0.13349832066466324, "grad_norm": 8.029589653015137, "learning_rate": 1.9114439703947095e-05, "loss": 2.1095, "step": 21240 }, { "epoch": 0.13356117298136036, "grad_norm": 7.28410530090332, "learning_rate": 1.9114020603002442e-05, "loss": 2.0821, "step": 21250 }, { "epoch": 0.13362402529805747, "grad_norm": 7.584111213684082, "learning_rate": 1.911360150205779e-05, "loss": 2.043, "step": 21260 }, { "epoch": 0.1336868776147546, "grad_norm": 8.032008171081543, "learning_rate": 1.9113182401113132e-05, "loss": 2.1231, "step": 21270 }, { "epoch": 0.1337497299314517, "grad_norm": 8.244526863098145, "learning_rate": 1.911276330016848e-05, "loss": 2.183, "step": 21280 }, { "epoch": 0.1338125822481488, "grad_norm": 7.711684703826904, "learning_rate": 1.9112344199223827e-05, "loss": 1.9712, "step": 21290 }, { "epoch": 0.1338754345648459, "grad_norm": 7.845302104949951, "learning_rate": 1.9111925098279174e-05, "loss": 2.0885, "step": 21300 }, { "epoch": 0.13393828688154302, "grad_norm": 7.101912498474121, "learning_rate": 1.9111505997334517e-05, "loss": 1.847, "step": 21310 }, { "epoch": 0.13400113919824014, "grad_norm": 7.40846586227417, "learning_rate": 1.9111086896389864e-05, "loss": 2.432, "step": 21320 }, { "epoch": 0.13406399151493725, "grad_norm": 6.370995044708252, "learning_rate": 1.911066779544521e-05, "loss": 1.9739, "step": 21330 }, { "epoch": 0.13412684383163437, "grad_norm": 6.674936294555664, "learning_rate": 1.911024869450056e-05, "loss": 2.1724, "step": 21340 }, { "epoch": 0.13418969614833146, "grad_norm": 6.612419605255127, "learning_rate": 1.9109829593555906e-05, "loss": 2.0961, "step": 21350 }, { "epoch": 0.13425254846502857, "grad_norm": 6.799704551696777, "learning_rate": 1.9109410492611253e-05, "loss": 1.9552, "step": 21360 }, { "epoch": 0.13431540078172569, "grad_norm": 8.12215805053711, "learning_rate": 1.9108991391666596e-05, "loss": 2.1582, "step": 21370 }, { "epoch": 0.1343782530984228, "grad_norm": 8.388773918151855, "learning_rate": 1.9108572290721944e-05, "loss": 2.2236, "step": 21380 }, { "epoch": 0.13444110541511992, "grad_norm": 7.834544658660889, "learning_rate": 1.910815318977729e-05, "loss": 2.1054, "step": 21390 }, { "epoch": 0.13450395773181703, "grad_norm": 8.504358291625977, "learning_rate": 1.9107734088832638e-05, "loss": 2.1659, "step": 21400 }, { "epoch": 0.13456681004851412, "grad_norm": 8.723959922790527, "learning_rate": 1.9107314987887985e-05, "loss": 1.9973, "step": 21410 }, { "epoch": 0.13462966236521123, "grad_norm": 19.787212371826172, "learning_rate": 1.9106895886943332e-05, "loss": 2.2176, "step": 21420 }, { "epoch": 0.13469251468190835, "grad_norm": 7.724793910980225, "learning_rate": 1.910647678599868e-05, "loss": 1.9911, "step": 21430 }, { "epoch": 0.13475536699860546, "grad_norm": 7.064187526702881, "learning_rate": 1.9106057685054026e-05, "loss": 2.1318, "step": 21440 }, { "epoch": 0.13481821931530258, "grad_norm": 8.108419418334961, "learning_rate": 1.910563858410937e-05, "loss": 2.2516, "step": 21450 }, { "epoch": 0.1348810716319997, "grad_norm": 8.397812843322754, "learning_rate": 1.9105219483164717e-05, "loss": 2.0927, "step": 21460 }, { "epoch": 0.1349439239486968, "grad_norm": 6.9287567138671875, "learning_rate": 1.9104800382220064e-05, "loss": 2.0847, "step": 21470 }, { "epoch": 0.1350067762653939, "grad_norm": 14.46806526184082, "learning_rate": 1.910438128127541e-05, "loss": 2.0837, "step": 21480 }, { "epoch": 0.135069628582091, "grad_norm": 8.349653244018555, "learning_rate": 1.9103962180330755e-05, "loss": 2.1776, "step": 21490 }, { "epoch": 0.13513248089878813, "grad_norm": 7.929886817932129, "learning_rate": 1.91035430793861e-05, "loss": 2.2667, "step": 21500 }, { "epoch": 0.13519533321548524, "grad_norm": 6.784626007080078, "learning_rate": 1.910312397844145e-05, "loss": 2.1803, "step": 21510 }, { "epoch": 0.13525818553218236, "grad_norm": 7.63728141784668, "learning_rate": 1.9102704877496796e-05, "loss": 2.2395, "step": 21520 }, { "epoch": 0.13532103784887947, "grad_norm": 7.283819675445557, "learning_rate": 1.9102285776552143e-05, "loss": 2.3257, "step": 21530 }, { "epoch": 0.13538389016557656, "grad_norm": 6.891645431518555, "learning_rate": 1.9101866675607486e-05, "loss": 2.1525, "step": 21540 }, { "epoch": 0.13544674248227367, "grad_norm": 7.524374008178711, "learning_rate": 1.9101447574662834e-05, "loss": 1.987, "step": 21550 }, { "epoch": 0.1355095947989708, "grad_norm": 7.3678364753723145, "learning_rate": 1.910102847371818e-05, "loss": 2.0044, "step": 21560 }, { "epoch": 0.1355724471156679, "grad_norm": 7.025672912597656, "learning_rate": 1.9100609372773528e-05, "loss": 1.9908, "step": 21570 }, { "epoch": 0.13563529943236502, "grad_norm": 6.5765204429626465, "learning_rate": 1.9100190271828875e-05, "loss": 1.9447, "step": 21580 }, { "epoch": 0.13569815174906213, "grad_norm": 8.267107009887695, "learning_rate": 1.9099771170884222e-05, "loss": 2.3063, "step": 21590 }, { "epoch": 0.13576100406575925, "grad_norm": 7.098850250244141, "learning_rate": 1.909935206993957e-05, "loss": 2.1153, "step": 21600 }, { "epoch": 0.13582385638245634, "grad_norm": 7.305449962615967, "learning_rate": 1.9098932968994916e-05, "loss": 2.1471, "step": 21610 }, { "epoch": 0.13588670869915345, "grad_norm": 7.206968307495117, "learning_rate": 1.909851386805026e-05, "loss": 2.2529, "step": 21620 }, { "epoch": 0.13594956101585057, "grad_norm": 7.390930652618408, "learning_rate": 1.9098094767105607e-05, "loss": 2.094, "step": 21630 }, { "epoch": 0.13601241333254768, "grad_norm": 7.941120624542236, "learning_rate": 1.9097675666160954e-05, "loss": 2.1407, "step": 21640 }, { "epoch": 0.1360752656492448, "grad_norm": 7.612168788909912, "learning_rate": 1.90972565652163e-05, "loss": 2.1267, "step": 21650 }, { "epoch": 0.1361381179659419, "grad_norm": 8.423344612121582, "learning_rate": 1.9096837464271648e-05, "loss": 1.9587, "step": 21660 }, { "epoch": 0.136200970282639, "grad_norm": 6.990922451019287, "learning_rate": 1.909641836332699e-05, "loss": 2.2546, "step": 21670 }, { "epoch": 0.13626382259933612, "grad_norm": 10.280035972595215, "learning_rate": 1.909599926238234e-05, "loss": 2.3914, "step": 21680 }, { "epoch": 0.13632667491603323, "grad_norm": 7.910796642303467, "learning_rate": 1.9095580161437686e-05, "loss": 2.1273, "step": 21690 }, { "epoch": 0.13638952723273035, "grad_norm": 7.267086982727051, "learning_rate": 1.9095161060493033e-05, "loss": 2.0764, "step": 21700 }, { "epoch": 0.13645237954942746, "grad_norm": 7.074910640716553, "learning_rate": 1.9094783869642844e-05, "loss": 2.1828, "step": 21710 }, { "epoch": 0.13651523186612458, "grad_norm": 8.144148826599121, "learning_rate": 1.909436476869819e-05, "loss": 2.1668, "step": 21720 }, { "epoch": 0.13657808418282166, "grad_norm": 7.03750467300415, "learning_rate": 1.909394566775354e-05, "loss": 2.1083, "step": 21730 }, { "epoch": 0.13664093649951878, "grad_norm": 7.418277263641357, "learning_rate": 1.9093526566808882e-05, "loss": 2.2161, "step": 21740 }, { "epoch": 0.1367037888162159, "grad_norm": 10.619608879089355, "learning_rate": 1.909310746586423e-05, "loss": 2.0044, "step": 21750 }, { "epoch": 0.136766641132913, "grad_norm": 7.455464839935303, "learning_rate": 1.9092688364919576e-05, "loss": 2.2266, "step": 21760 }, { "epoch": 0.13682949344961012, "grad_norm": 9.03001594543457, "learning_rate": 1.9092269263974923e-05, "loss": 2.2673, "step": 21770 }, { "epoch": 0.13689234576630724, "grad_norm": 7.140875339508057, "learning_rate": 1.909185016303027e-05, "loss": 2.1292, "step": 21780 }, { "epoch": 0.13695519808300435, "grad_norm": 7.704232692718506, "learning_rate": 1.9091431062085614e-05, "loss": 2.3453, "step": 21790 }, { "epoch": 0.13701805039970144, "grad_norm": 7.573914051055908, "learning_rate": 1.909101196114096e-05, "loss": 2.0991, "step": 21800 }, { "epoch": 0.13708090271639856, "grad_norm": 8.799951553344727, "learning_rate": 1.9090592860196308e-05, "loss": 2.3587, "step": 21810 }, { "epoch": 0.13714375503309567, "grad_norm": 6.665711402893066, "learning_rate": 1.9090173759251655e-05, "loss": 2.3124, "step": 21820 }, { "epoch": 0.1372066073497928, "grad_norm": 7.785305976867676, "learning_rate": 1.9089754658307e-05, "loss": 2.323, "step": 21830 }, { "epoch": 0.1372694596664899, "grad_norm": 7.521659851074219, "learning_rate": 1.9089335557362346e-05, "loss": 2.1861, "step": 21840 }, { "epoch": 0.13733231198318702, "grad_norm": 8.873821258544922, "learning_rate": 1.9088916456417693e-05, "loss": 2.3054, "step": 21850 }, { "epoch": 0.1373951642998841, "grad_norm": 9.21068286895752, "learning_rate": 1.908849735547304e-05, "loss": 2.167, "step": 21860 }, { "epoch": 0.13745801661658122, "grad_norm": 7.585288047790527, "learning_rate": 1.9088078254528387e-05, "loss": 2.0737, "step": 21870 }, { "epoch": 0.13752086893327833, "grad_norm": 8.20755672454834, "learning_rate": 1.9087659153583734e-05, "loss": 2.1882, "step": 21880 }, { "epoch": 0.13758372124997545, "grad_norm": 7.105890274047852, "learning_rate": 1.908724005263908e-05, "loss": 2.1434, "step": 21890 }, { "epoch": 0.13764657356667256, "grad_norm": 6.78985071182251, "learning_rate": 1.908682095169443e-05, "loss": 2.3267, "step": 21900 }, { "epoch": 0.13770942588336968, "grad_norm": 8.069618225097656, "learning_rate": 1.9086401850749775e-05, "loss": 2.0262, "step": 21910 }, { "epoch": 0.13777227820006677, "grad_norm": 7.240670680999756, "learning_rate": 1.908598274980512e-05, "loss": 2.1662, "step": 21920 }, { "epoch": 0.13783513051676388, "grad_norm": 7.4934868812561035, "learning_rate": 1.9085563648860466e-05, "loss": 2.055, "step": 21930 }, { "epoch": 0.137897982833461, "grad_norm": 8.327574729919434, "learning_rate": 1.9085144547915813e-05, "loss": 2.1307, "step": 21940 }, { "epoch": 0.1379608351501581, "grad_norm": 7.624167442321777, "learning_rate": 1.908472544697116e-05, "loss": 2.144, "step": 21950 }, { "epoch": 0.13802368746685523, "grad_norm": 8.745718955993652, "learning_rate": 1.9084306346026507e-05, "loss": 2.1541, "step": 21960 }, { "epoch": 0.13808653978355234, "grad_norm": 7.69420051574707, "learning_rate": 1.908388724508185e-05, "loss": 2.1109, "step": 21970 }, { "epoch": 0.13814939210024946, "grad_norm": 7.407751560211182, "learning_rate": 1.9083468144137198e-05, "loss": 2.2494, "step": 21980 }, { "epoch": 0.13821224441694654, "grad_norm": 7.729306697845459, "learning_rate": 1.9083049043192545e-05, "loss": 2.1493, "step": 21990 }, { "epoch": 0.13827509673364366, "grad_norm": 7.324053764343262, "learning_rate": 1.9082629942247892e-05, "loss": 2.0412, "step": 22000 }, { "epoch": 0.13833794905034077, "grad_norm": 6.690671443939209, "learning_rate": 1.9082210841303236e-05, "loss": 1.9636, "step": 22010 }, { "epoch": 0.1384008013670379, "grad_norm": 6.156836986541748, "learning_rate": 1.9081791740358583e-05, "loss": 2.1582, "step": 22020 }, { "epoch": 0.138463653683735, "grad_norm": 7.035289287567139, "learning_rate": 1.908137263941393e-05, "loss": 2.1814, "step": 22030 }, { "epoch": 0.13852650600043212, "grad_norm": 7.178378582000732, "learning_rate": 1.9080953538469277e-05, "loss": 2.0645, "step": 22040 }, { "epoch": 0.1385893583171292, "grad_norm": 7.521155834197998, "learning_rate": 1.9080534437524624e-05, "loss": 2.1186, "step": 22050 }, { "epoch": 0.13865221063382632, "grad_norm": 7.570018291473389, "learning_rate": 1.908011533657997e-05, "loss": 2.1362, "step": 22060 }, { "epoch": 0.13871506295052344, "grad_norm": 7.197385787963867, "learning_rate": 1.9079696235635315e-05, "loss": 2.1849, "step": 22070 }, { "epoch": 0.13877791526722055, "grad_norm": 7.9512939453125, "learning_rate": 1.9079277134690662e-05, "loss": 2.4368, "step": 22080 }, { "epoch": 0.13884076758391767, "grad_norm": 8.198320388793945, "learning_rate": 1.907885803374601e-05, "loss": 2.2081, "step": 22090 }, { "epoch": 0.13890361990061478, "grad_norm": 8.728532791137695, "learning_rate": 1.9078438932801356e-05, "loss": 2.3773, "step": 22100 }, { "epoch": 0.1389664722173119, "grad_norm": 7.970332145690918, "learning_rate": 1.9078019831856703e-05, "loss": 2.1445, "step": 22110 }, { "epoch": 0.13902932453400899, "grad_norm": 7.041906833648682, "learning_rate": 1.907760073091205e-05, "loss": 2.1213, "step": 22120 }, { "epoch": 0.1390921768507061, "grad_norm": 6.955906867980957, "learning_rate": 1.9077181629967397e-05, "loss": 2.0388, "step": 22130 }, { "epoch": 0.13915502916740322, "grad_norm": 8.771379470825195, "learning_rate": 1.907676252902274e-05, "loss": 2.1978, "step": 22140 }, { "epoch": 0.13921788148410033, "grad_norm": 7.178952693939209, "learning_rate": 1.9076343428078088e-05, "loss": 2.0043, "step": 22150 }, { "epoch": 0.13928073380079745, "grad_norm": 7.710979461669922, "learning_rate": 1.9075924327133435e-05, "loss": 2.138, "step": 22160 }, { "epoch": 0.13934358611749456, "grad_norm": 7.282996654510498, "learning_rate": 1.9075505226188782e-05, "loss": 2.1759, "step": 22170 }, { "epoch": 0.13940643843419165, "grad_norm": 6.640257358551025, "learning_rate": 1.907508612524413e-05, "loss": 1.8781, "step": 22180 }, { "epoch": 0.13946929075088876, "grad_norm": 7.761207580566406, "learning_rate": 1.9074667024299473e-05, "loss": 2.1495, "step": 22190 }, { "epoch": 0.13953214306758588, "grad_norm": 7.394071102142334, "learning_rate": 1.907424792335482e-05, "loss": 2.0641, "step": 22200 }, { "epoch": 0.139594995384283, "grad_norm": 6.498964309692383, "learning_rate": 1.9073828822410167e-05, "loss": 2.0303, "step": 22210 }, { "epoch": 0.1396578477009801, "grad_norm": 7.536453723907471, "learning_rate": 1.9073409721465514e-05, "loss": 2.2705, "step": 22220 }, { "epoch": 0.13972070001767722, "grad_norm": 7.092509746551514, "learning_rate": 1.9072990620520858e-05, "loss": 2.0013, "step": 22230 }, { "epoch": 0.1397835523343743, "grad_norm": 8.55628776550293, "learning_rate": 1.9072571519576205e-05, "loss": 2.3906, "step": 22240 }, { "epoch": 0.13984640465107143, "grad_norm": 8.0609130859375, "learning_rate": 1.9072152418631552e-05, "loss": 2.0266, "step": 22250 }, { "epoch": 0.13990925696776854, "grad_norm": 7.505898475646973, "learning_rate": 1.90717333176869e-05, "loss": 2.1944, "step": 22260 }, { "epoch": 0.13997210928446566, "grad_norm": 8.852445602416992, "learning_rate": 1.9071314216742246e-05, "loss": 2.0867, "step": 22270 }, { "epoch": 0.14003496160116277, "grad_norm": 7.316661357879639, "learning_rate": 1.9070895115797593e-05, "loss": 2.1679, "step": 22280 }, { "epoch": 0.1400978139178599, "grad_norm": 8.005709648132324, "learning_rate": 1.907047601485294e-05, "loss": 2.1251, "step": 22290 }, { "epoch": 0.140160666234557, "grad_norm": 7.422688961029053, "learning_rate": 1.9070056913908287e-05, "loss": 1.8722, "step": 22300 }, { "epoch": 0.1402235185512541, "grad_norm": 6.957630634307861, "learning_rate": 1.906963781296363e-05, "loss": 2.3425, "step": 22310 }, { "epoch": 0.1402863708679512, "grad_norm": 7.596073627471924, "learning_rate": 1.9069218712018978e-05, "loss": 2.2905, "step": 22320 }, { "epoch": 0.14034922318464832, "grad_norm": 7.504247188568115, "learning_rate": 1.9068799611074325e-05, "loss": 2.1704, "step": 22330 }, { "epoch": 0.14041207550134543, "grad_norm": 6.982486248016357, "learning_rate": 1.9068380510129672e-05, "loss": 2.0466, "step": 22340 }, { "epoch": 0.14047492781804255, "grad_norm": 6.621249675750732, "learning_rate": 1.906796140918502e-05, "loss": 2.1644, "step": 22350 }, { "epoch": 0.14053778013473966, "grad_norm": 7.738133430480957, "learning_rate": 1.9067542308240363e-05, "loss": 2.1581, "step": 22360 }, { "epoch": 0.14060063245143675, "grad_norm": 7.586179733276367, "learning_rate": 1.906712320729571e-05, "loss": 2.0927, "step": 22370 }, { "epoch": 0.14066348476813387, "grad_norm": 6.744780540466309, "learning_rate": 1.9066704106351057e-05, "loss": 2.3744, "step": 22380 }, { "epoch": 0.14072633708483098, "grad_norm": 6.437538146972656, "learning_rate": 1.9066285005406404e-05, "loss": 2.295, "step": 22390 }, { "epoch": 0.1407891894015281, "grad_norm": 7.903862953186035, "learning_rate": 1.906586590446175e-05, "loss": 2.1277, "step": 22400 }, { "epoch": 0.1408520417182252, "grad_norm": 7.29001522064209, "learning_rate": 1.9065446803517095e-05, "loss": 2.2617, "step": 22410 }, { "epoch": 0.14091489403492233, "grad_norm": 7.974186897277832, "learning_rate": 1.9065027702572442e-05, "loss": 2.3483, "step": 22420 }, { "epoch": 0.14097774635161944, "grad_norm": 7.985724449157715, "learning_rate": 1.906460860162779e-05, "loss": 2.0671, "step": 22430 }, { "epoch": 0.14104059866831653, "grad_norm": 7.703798294067383, "learning_rate": 1.9064189500683136e-05, "loss": 2.0304, "step": 22440 }, { "epoch": 0.14110345098501365, "grad_norm": 7.514264106750488, "learning_rate": 1.906377039973848e-05, "loss": 2.1393, "step": 22450 }, { "epoch": 0.14116630330171076, "grad_norm": 8.436440467834473, "learning_rate": 1.9063351298793827e-05, "loss": 1.9612, "step": 22460 }, { "epoch": 0.14122915561840788, "grad_norm": 7.111056327819824, "learning_rate": 1.9062932197849174e-05, "loss": 2.0796, "step": 22470 }, { "epoch": 0.141292007935105, "grad_norm": 8.205460548400879, "learning_rate": 1.906251309690452e-05, "loss": 2.1031, "step": 22480 }, { "epoch": 0.1413548602518021, "grad_norm": 7.536001682281494, "learning_rate": 1.9062093995959868e-05, "loss": 2.1979, "step": 22490 }, { "epoch": 0.1414177125684992, "grad_norm": 8.101511001586914, "learning_rate": 1.9061674895015215e-05, "loss": 1.928, "step": 22500 }, { "epoch": 0.1414805648851963, "grad_norm": 7.2906599044799805, "learning_rate": 1.9061255794070562e-05, "loss": 2.2956, "step": 22510 }, { "epoch": 0.14154341720189342, "grad_norm": 7.893942832946777, "learning_rate": 1.906083669312591e-05, "loss": 2.133, "step": 22520 }, { "epoch": 0.14160626951859054, "grad_norm": 11.342302322387695, "learning_rate": 1.9060417592181257e-05, "loss": 2.1614, "step": 22530 }, { "epoch": 0.14166912183528765, "grad_norm": 8.061858177185059, "learning_rate": 1.90599984912366e-05, "loss": 2.1909, "step": 22540 }, { "epoch": 0.14173197415198477, "grad_norm": 7.32312536239624, "learning_rate": 1.9059579390291947e-05, "loss": 2.1426, "step": 22550 }, { "epoch": 0.14179482646868186, "grad_norm": 8.220121383666992, "learning_rate": 1.9059160289347294e-05, "loss": 2.295, "step": 22560 }, { "epoch": 0.14185767878537897, "grad_norm": 7.3067402839660645, "learning_rate": 1.905874118840264e-05, "loss": 2.003, "step": 22570 }, { "epoch": 0.1419205311020761, "grad_norm": 7.1083879470825195, "learning_rate": 1.905832208745799e-05, "loss": 1.9329, "step": 22580 }, { "epoch": 0.1419833834187732, "grad_norm": 7.069380283355713, "learning_rate": 1.9057902986513332e-05, "loss": 2.2562, "step": 22590 }, { "epoch": 0.14204623573547032, "grad_norm": 7.174941539764404, "learning_rate": 1.905748388556868e-05, "loss": 2.1896, "step": 22600 }, { "epoch": 0.14210908805216743, "grad_norm": 7.403316497802734, "learning_rate": 1.9057064784624026e-05, "loss": 2.0871, "step": 22610 }, { "epoch": 0.14217194036886455, "grad_norm": 7.457070827484131, "learning_rate": 1.9056645683679373e-05, "loss": 1.8901, "step": 22620 }, { "epoch": 0.14223479268556163, "grad_norm": 7.98372745513916, "learning_rate": 1.9056226582734717e-05, "loss": 2.2116, "step": 22630 }, { "epoch": 0.14229764500225875, "grad_norm": 7.929397106170654, "learning_rate": 1.9055807481790064e-05, "loss": 2.1043, "step": 22640 }, { "epoch": 0.14236049731895586, "grad_norm": 8.145434379577637, "learning_rate": 1.905538838084541e-05, "loss": 1.8081, "step": 22650 }, { "epoch": 0.14242334963565298, "grad_norm": 7.806945323944092, "learning_rate": 1.9054969279900758e-05, "loss": 2.3025, "step": 22660 }, { "epoch": 0.1424862019523501, "grad_norm": 10.782485008239746, "learning_rate": 1.9054550178956105e-05, "loss": 2.2739, "step": 22670 }, { "epoch": 0.1425490542690472, "grad_norm": 8.331992149353027, "learning_rate": 1.9054131078011452e-05, "loss": 2.1624, "step": 22680 }, { "epoch": 0.1426119065857443, "grad_norm": 7.473433494567871, "learning_rate": 1.9053711977066796e-05, "loss": 1.9709, "step": 22690 }, { "epoch": 0.1426747589024414, "grad_norm": 7.280951499938965, "learning_rate": 1.9053292876122143e-05, "loss": 2.1562, "step": 22700 }, { "epoch": 0.14273761121913853, "grad_norm": 8.735506057739258, "learning_rate": 1.905287377517749e-05, "loss": 2.4145, "step": 22710 }, { "epoch": 0.14280046353583564, "grad_norm": 8.281670570373535, "learning_rate": 1.9052454674232837e-05, "loss": 1.9165, "step": 22720 }, { "epoch": 0.14286331585253276, "grad_norm": 8.478446006774902, "learning_rate": 1.9052035573288184e-05, "loss": 2.1552, "step": 22730 }, { "epoch": 0.14292616816922987, "grad_norm": 7.284202575683594, "learning_rate": 1.905161647234353e-05, "loss": 2.4096, "step": 22740 }, { "epoch": 0.14298902048592696, "grad_norm": 6.107362747192383, "learning_rate": 1.905119737139888e-05, "loss": 2.1548, "step": 22750 }, { "epoch": 0.14305187280262407, "grad_norm": 8.200136184692383, "learning_rate": 1.9050778270454222e-05, "loss": 2.0889, "step": 22760 }, { "epoch": 0.1431147251193212, "grad_norm": 7.132558345794678, "learning_rate": 1.905035916950957e-05, "loss": 2.0952, "step": 22770 }, { "epoch": 0.1431775774360183, "grad_norm": 7.06957483291626, "learning_rate": 1.9049940068564916e-05, "loss": 2.2101, "step": 22780 }, { "epoch": 0.14324042975271542, "grad_norm": 10.873089790344238, "learning_rate": 1.9049520967620263e-05, "loss": 2.2968, "step": 22790 }, { "epoch": 0.14330328206941254, "grad_norm": 7.581949710845947, "learning_rate": 1.904910186667561e-05, "loss": 2.0683, "step": 22800 }, { "epoch": 0.14336613438610965, "grad_norm": 7.665589809417725, "learning_rate": 1.9048682765730954e-05, "loss": 2.1991, "step": 22810 }, { "epoch": 0.14342898670280674, "grad_norm": 8.55495834350586, "learning_rate": 1.90482636647863e-05, "loss": 2.1726, "step": 22820 }, { "epoch": 0.14349183901950385, "grad_norm": 6.944456100463867, "learning_rate": 1.904784456384165e-05, "loss": 1.994, "step": 22830 }, { "epoch": 0.14355469133620097, "grad_norm": 6.976284027099609, "learning_rate": 1.9047425462896995e-05, "loss": 2.3131, "step": 22840 }, { "epoch": 0.14361754365289808, "grad_norm": 7.741434097290039, "learning_rate": 1.904700636195234e-05, "loss": 1.9898, "step": 22850 }, { "epoch": 0.1436803959695952, "grad_norm": 8.721227645874023, "learning_rate": 1.9046587261007686e-05, "loss": 2.1514, "step": 22860 }, { "epoch": 0.1437432482862923, "grad_norm": 7.248230934143066, "learning_rate": 1.90462100701575e-05, "loss": 2.0825, "step": 22870 }, { "epoch": 0.1438061006029894, "grad_norm": 6.804014682769775, "learning_rate": 1.9045790969212845e-05, "loss": 2.1672, "step": 22880 }, { "epoch": 0.14386895291968652, "grad_norm": 8.10350513458252, "learning_rate": 1.904537186826819e-05, "loss": 2.2151, "step": 22890 }, { "epoch": 0.14393180523638363, "grad_norm": 7.515980243682861, "learning_rate": 1.904495276732354e-05, "loss": 2.1118, "step": 22900 }, { "epoch": 0.14399465755308075, "grad_norm": 6.625450134277344, "learning_rate": 1.9044533666378886e-05, "loss": 2.0872, "step": 22910 }, { "epoch": 0.14405750986977786, "grad_norm": 7.152846336364746, "learning_rate": 1.9044114565434233e-05, "loss": 2.2226, "step": 22920 }, { "epoch": 0.14412036218647498, "grad_norm": 7.193029403686523, "learning_rate": 1.9043695464489577e-05, "loss": 2.1758, "step": 22930 }, { "epoch": 0.1441832145031721, "grad_norm": 8.163533210754395, "learning_rate": 1.9043276363544924e-05, "loss": 2.1661, "step": 22940 }, { "epoch": 0.14424606681986918, "grad_norm": 7.087532997131348, "learning_rate": 1.904285726260027e-05, "loss": 1.9239, "step": 22950 }, { "epoch": 0.1443089191365663, "grad_norm": 6.445407390594482, "learning_rate": 1.9042438161655618e-05, "loss": 2.0338, "step": 22960 }, { "epoch": 0.1443717714532634, "grad_norm": 6.491453647613525, "learning_rate": 1.9042019060710965e-05, "loss": 2.0514, "step": 22970 }, { "epoch": 0.14443462376996052, "grad_norm": 6.91152811050415, "learning_rate": 1.9041599959766312e-05, "loss": 2.1338, "step": 22980 }, { "epoch": 0.14449747608665764, "grad_norm": 8.279658317565918, "learning_rate": 1.904118085882166e-05, "loss": 2.2453, "step": 22990 }, { "epoch": 0.14456032840335475, "grad_norm": 7.245723247528076, "learning_rate": 1.9040761757877003e-05, "loss": 2.1084, "step": 23000 }, { "epoch": 0.14462318072005184, "grad_norm": 6.900940895080566, "learning_rate": 1.904034265693235e-05, "loss": 2.2148, "step": 23010 }, { "epoch": 0.14468603303674896, "grad_norm": 7.883230686187744, "learning_rate": 1.9039923555987697e-05, "loss": 2.032, "step": 23020 }, { "epoch": 0.14474888535344607, "grad_norm": 8.119175910949707, "learning_rate": 1.9039504455043044e-05, "loss": 2.1719, "step": 23030 }, { "epoch": 0.1448117376701432, "grad_norm": 8.134607315063477, "learning_rate": 1.903908535409839e-05, "loss": 2.0942, "step": 23040 }, { "epoch": 0.1448745899868403, "grad_norm": 6.855745792388916, "learning_rate": 1.9038666253153738e-05, "loss": 2.1209, "step": 23050 }, { "epoch": 0.14493744230353742, "grad_norm": 7.171293258666992, "learning_rate": 1.903824715220908e-05, "loss": 2.2055, "step": 23060 }, { "epoch": 0.1450002946202345, "grad_norm": 7.572606563568115, "learning_rate": 1.903782805126443e-05, "loss": 2.1366, "step": 23070 }, { "epoch": 0.14506314693693162, "grad_norm": 7.0616374015808105, "learning_rate": 1.9037408950319776e-05, "loss": 2.0039, "step": 23080 }, { "epoch": 0.14512599925362873, "grad_norm": 7.565642833709717, "learning_rate": 1.9036989849375123e-05, "loss": 2.2619, "step": 23090 }, { "epoch": 0.14518885157032585, "grad_norm": 7.184761047363281, "learning_rate": 1.903657074843047e-05, "loss": 2.0735, "step": 23100 }, { "epoch": 0.14525170388702296, "grad_norm": 8.689106941223145, "learning_rate": 1.9036151647485814e-05, "loss": 2.0887, "step": 23110 }, { "epoch": 0.14531455620372008, "grad_norm": 7.586963176727295, "learning_rate": 1.903573254654116e-05, "loss": 2.0515, "step": 23120 }, { "epoch": 0.1453774085204172, "grad_norm": 9.940788269042969, "learning_rate": 1.9035313445596508e-05, "loss": 1.9153, "step": 23130 }, { "epoch": 0.14544026083711428, "grad_norm": 7.923069000244141, "learning_rate": 1.9034894344651855e-05, "loss": 2.1228, "step": 23140 }, { "epoch": 0.1455031131538114, "grad_norm": 15.715959548950195, "learning_rate": 1.90344752437072e-05, "loss": 2.0652, "step": 23150 }, { "epoch": 0.1455659654705085, "grad_norm": 6.555418491363525, "learning_rate": 1.9034056142762546e-05, "loss": 2.32, "step": 23160 }, { "epoch": 0.14562881778720563, "grad_norm": 7.920915126800537, "learning_rate": 1.9033637041817893e-05, "loss": 2.1331, "step": 23170 }, { "epoch": 0.14569167010390274, "grad_norm": 5.9372687339782715, "learning_rate": 1.903321794087324e-05, "loss": 1.8449, "step": 23180 }, { "epoch": 0.14575452242059986, "grad_norm": 7.384964466094971, "learning_rate": 1.9032798839928587e-05, "loss": 2.1567, "step": 23190 }, { "epoch": 0.14581737473729695, "grad_norm": 6.332118034362793, "learning_rate": 1.9032379738983934e-05, "loss": 2.0163, "step": 23200 }, { "epoch": 0.14588022705399406, "grad_norm": 8.333724021911621, "learning_rate": 1.903196063803928e-05, "loss": 2.067, "step": 23210 }, { "epoch": 0.14594307937069118, "grad_norm": 7.829019069671631, "learning_rate": 1.9031541537094628e-05, "loss": 2.1369, "step": 23220 }, { "epoch": 0.1460059316873883, "grad_norm": 7.6123366355896, "learning_rate": 1.9031122436149975e-05, "loss": 2.229, "step": 23230 }, { "epoch": 0.1460687840040854, "grad_norm": 7.171878337860107, "learning_rate": 1.903070333520532e-05, "loss": 1.9208, "step": 23240 }, { "epoch": 0.14613163632078252, "grad_norm": 6.105739116668701, "learning_rate": 1.9030284234260666e-05, "loss": 2.0472, "step": 23250 }, { "epoch": 0.14619448863747964, "grad_norm": 6.974096775054932, "learning_rate": 1.9029865133316013e-05, "loss": 2.0482, "step": 23260 }, { "epoch": 0.14625734095417672, "grad_norm": 7.583916664123535, "learning_rate": 1.902944603237136e-05, "loss": 2.2159, "step": 23270 }, { "epoch": 0.14632019327087384, "grad_norm": 7.2792205810546875, "learning_rate": 1.9029026931426704e-05, "loss": 2.2024, "step": 23280 }, { "epoch": 0.14638304558757095, "grad_norm": 6.52831506729126, "learning_rate": 1.902860783048205e-05, "loss": 2.045, "step": 23290 }, { "epoch": 0.14644589790426807, "grad_norm": 8.369816780090332, "learning_rate": 1.9028188729537398e-05, "loss": 2.0575, "step": 23300 }, { "epoch": 0.14650875022096518, "grad_norm": 7.134170055389404, "learning_rate": 1.9027769628592745e-05, "loss": 2.0503, "step": 23310 }, { "epoch": 0.1465716025376623, "grad_norm": 6.758161544799805, "learning_rate": 1.9027350527648092e-05, "loss": 2.1567, "step": 23320 }, { "epoch": 0.1466344548543594, "grad_norm": 10.140778541564941, "learning_rate": 1.9026931426703436e-05, "loss": 2.1435, "step": 23330 }, { "epoch": 0.1466973071710565, "grad_norm": 8.284818649291992, "learning_rate": 1.9026512325758783e-05, "loss": 1.9622, "step": 23340 }, { "epoch": 0.14676015948775362, "grad_norm": 8.317974090576172, "learning_rate": 1.902609322481413e-05, "loss": 2.3861, "step": 23350 }, { "epoch": 0.14682301180445073, "grad_norm": 7.516535758972168, "learning_rate": 1.9025674123869477e-05, "loss": 2.2544, "step": 23360 }, { "epoch": 0.14688586412114785, "grad_norm": 6.966447353363037, "learning_rate": 1.9025255022924824e-05, "loss": 2.0672, "step": 23370 }, { "epoch": 0.14694871643784496, "grad_norm": 7.023961544036865, "learning_rate": 1.9024835921980168e-05, "loss": 2.0731, "step": 23380 }, { "epoch": 0.14701156875454205, "grad_norm": 8.957491874694824, "learning_rate": 1.9024416821035515e-05, "loss": 2.3124, "step": 23390 }, { "epoch": 0.14707442107123916, "grad_norm": 7.384146690368652, "learning_rate": 1.9023997720090862e-05, "loss": 1.913, "step": 23400 }, { "epoch": 0.14713727338793628, "grad_norm": 8.35905933380127, "learning_rate": 1.902357861914621e-05, "loss": 2.0068, "step": 23410 }, { "epoch": 0.1472001257046334, "grad_norm": 7.860772132873535, "learning_rate": 1.9023159518201556e-05, "loss": 2.1883, "step": 23420 }, { "epoch": 0.1472629780213305, "grad_norm": 7.34670352935791, "learning_rate": 1.9022740417256903e-05, "loss": 1.8775, "step": 23430 }, { "epoch": 0.14732583033802762, "grad_norm": 7.937277793884277, "learning_rate": 1.902232131631225e-05, "loss": 2.1031, "step": 23440 }, { "epoch": 0.14738868265472474, "grad_norm": 8.50955581665039, "learning_rate": 1.9021902215367597e-05, "loss": 2.2568, "step": 23450 }, { "epoch": 0.14745153497142183, "grad_norm": 7.741550922393799, "learning_rate": 1.902148311442294e-05, "loss": 2.2492, "step": 23460 }, { "epoch": 0.14751438728811894, "grad_norm": 8.376154899597168, "learning_rate": 1.9021064013478288e-05, "loss": 2.088, "step": 23470 }, { "epoch": 0.14757723960481606, "grad_norm": 7.21097993850708, "learning_rate": 1.9020644912533635e-05, "loss": 2.2054, "step": 23480 }, { "epoch": 0.14764009192151317, "grad_norm": 9.768784523010254, "learning_rate": 1.9020225811588982e-05, "loss": 2.2019, "step": 23490 }, { "epoch": 0.1477029442382103, "grad_norm": 6.948235034942627, "learning_rate": 1.9019806710644326e-05, "loss": 1.9864, "step": 23500 }, { "epoch": 0.1477657965549074, "grad_norm": 9.397354125976562, "learning_rate": 1.9019387609699673e-05, "loss": 1.9141, "step": 23510 }, { "epoch": 0.1478286488716045, "grad_norm": 6.551173686981201, "learning_rate": 1.901896850875502e-05, "loss": 1.9257, "step": 23520 }, { "epoch": 0.1478915011883016, "grad_norm": 9.810015678405762, "learning_rate": 1.9018549407810367e-05, "loss": 2.2414, "step": 23530 }, { "epoch": 0.14795435350499872, "grad_norm": 6.3327717781066895, "learning_rate": 1.9018130306865714e-05, "loss": 1.9833, "step": 23540 }, { "epoch": 0.14801720582169584, "grad_norm": 7.804057598114014, "learning_rate": 1.9017711205921058e-05, "loss": 1.9943, "step": 23550 }, { "epoch": 0.14808005813839295, "grad_norm": 8.205534934997559, "learning_rate": 1.9017292104976405e-05, "loss": 2.1566, "step": 23560 }, { "epoch": 0.14814291045509007, "grad_norm": 7.657957553863525, "learning_rate": 1.9016873004031752e-05, "loss": 1.9966, "step": 23570 }, { "epoch": 0.14820576277178715, "grad_norm": 7.589319705963135, "learning_rate": 1.90164539030871e-05, "loss": 2.0899, "step": 23580 }, { "epoch": 0.14826861508848427, "grad_norm": 8.065515518188477, "learning_rate": 1.9016034802142446e-05, "loss": 2.1794, "step": 23590 }, { "epoch": 0.14833146740518138, "grad_norm": 7.099700927734375, "learning_rate": 1.9015615701197793e-05, "loss": 1.9405, "step": 23600 }, { "epoch": 0.1483943197218785, "grad_norm": 6.308840274810791, "learning_rate": 1.901519660025314e-05, "loss": 1.9217, "step": 23610 }, { "epoch": 0.1484571720385756, "grad_norm": 7.382903575897217, "learning_rate": 1.9014777499308487e-05, "loss": 2.0211, "step": 23620 }, { "epoch": 0.14852002435527273, "grad_norm": 8.541641235351562, "learning_rate": 1.901435839836383e-05, "loss": 2.2192, "step": 23630 }, { "epoch": 0.14858287667196984, "grad_norm": 7.6181793212890625, "learning_rate": 1.9013939297419178e-05, "loss": 1.9691, "step": 23640 }, { "epoch": 0.14864572898866693, "grad_norm": 8.119380950927734, "learning_rate": 1.9013520196474525e-05, "loss": 1.9171, "step": 23650 }, { "epoch": 0.14870858130536405, "grad_norm": 7.697994709014893, "learning_rate": 1.9013101095529872e-05, "loss": 2.0111, "step": 23660 }, { "epoch": 0.14877143362206116, "grad_norm": 8.19538402557373, "learning_rate": 1.901268199458522e-05, "loss": 2.153, "step": 23670 }, { "epoch": 0.14883428593875828, "grad_norm": 7.4682111740112305, "learning_rate": 1.9012262893640563e-05, "loss": 1.9928, "step": 23680 }, { "epoch": 0.1488971382554554, "grad_norm": 8.932218551635742, "learning_rate": 1.901184379269591e-05, "loss": 2.2439, "step": 23690 }, { "epoch": 0.1489599905721525, "grad_norm": 7.902842044830322, "learning_rate": 1.9011424691751257e-05, "loss": 2.2822, "step": 23700 }, { "epoch": 0.1490228428888496, "grad_norm": 7.800632476806641, "learning_rate": 1.9011005590806604e-05, "loss": 2.2285, "step": 23710 }, { "epoch": 0.1490856952055467, "grad_norm": 7.971667766571045, "learning_rate": 1.901058648986195e-05, "loss": 1.9143, "step": 23720 }, { "epoch": 0.14914854752224382, "grad_norm": 8.899446487426758, "learning_rate": 1.9010167388917295e-05, "loss": 1.9402, "step": 23730 }, { "epoch": 0.14921139983894094, "grad_norm": 7.046481609344482, "learning_rate": 1.9009748287972642e-05, "loss": 1.8834, "step": 23740 }, { "epoch": 0.14927425215563805, "grad_norm": 7.612890720367432, "learning_rate": 1.900932918702799e-05, "loss": 2.0692, "step": 23750 }, { "epoch": 0.14933710447233517, "grad_norm": 8.558869361877441, "learning_rate": 1.9008910086083336e-05, "loss": 2.1888, "step": 23760 }, { "epoch": 0.14939995678903228, "grad_norm": 7.5883331298828125, "learning_rate": 1.900849098513868e-05, "loss": 1.8002, "step": 23770 }, { "epoch": 0.14946280910572937, "grad_norm": 7.729646682739258, "learning_rate": 1.9008071884194027e-05, "loss": 2.121, "step": 23780 }, { "epoch": 0.1495256614224265, "grad_norm": 7.262862205505371, "learning_rate": 1.9007652783249374e-05, "loss": 1.982, "step": 23790 }, { "epoch": 0.1495885137391236, "grad_norm": 7.4409356117248535, "learning_rate": 1.900723368230472e-05, "loss": 2.0201, "step": 23800 }, { "epoch": 0.14965136605582072, "grad_norm": 6.645078182220459, "learning_rate": 1.9006814581360068e-05, "loss": 2.1479, "step": 23810 }, { "epoch": 0.14971421837251783, "grad_norm": 6.964990615844727, "learning_rate": 1.9006395480415415e-05, "loss": 2.1139, "step": 23820 }, { "epoch": 0.14977707068921495, "grad_norm": 6.36972713470459, "learning_rate": 1.9005976379470762e-05, "loss": 1.907, "step": 23830 }, { "epoch": 0.14983992300591203, "grad_norm": 7.909121513366699, "learning_rate": 1.900555727852611e-05, "loss": 1.9887, "step": 23840 }, { "epoch": 0.14990277532260915, "grad_norm": 7.761707782745361, "learning_rate": 1.9005138177581456e-05, "loss": 1.901, "step": 23850 }, { "epoch": 0.14996562763930626, "grad_norm": 7.285144329071045, "learning_rate": 1.90047190766368e-05, "loss": 2.0067, "step": 23860 }, { "epoch": 0.15002847995600338, "grad_norm": 7.631621360778809, "learning_rate": 1.9004299975692147e-05, "loss": 2.2314, "step": 23870 }, { "epoch": 0.1500913322727005, "grad_norm": 8.568719863891602, "learning_rate": 1.9003880874747494e-05, "loss": 1.9857, "step": 23880 }, { "epoch": 0.1501541845893976, "grad_norm": 7.1593546867370605, "learning_rate": 1.900346177380284e-05, "loss": 2.0381, "step": 23890 }, { "epoch": 0.1502170369060947, "grad_norm": 7.844557285308838, "learning_rate": 1.9003042672858185e-05, "loss": 2.0089, "step": 23900 }, { "epoch": 0.1502798892227918, "grad_norm": 6.168196201324463, "learning_rate": 1.9002623571913532e-05, "loss": 1.8796, "step": 23910 }, { "epoch": 0.15034274153948893, "grad_norm": 7.286600112915039, "learning_rate": 1.900220447096888e-05, "loss": 2.1071, "step": 23920 }, { "epoch": 0.15040559385618604, "grad_norm": 6.133782863616943, "learning_rate": 1.9001785370024226e-05, "loss": 2.1286, "step": 23930 }, { "epoch": 0.15046844617288316, "grad_norm": 7.266432762145996, "learning_rate": 1.9001366269079573e-05, "loss": 2.0999, "step": 23940 }, { "epoch": 0.15053129848958027, "grad_norm": 8.122286796569824, "learning_rate": 1.9000947168134917e-05, "loss": 2.0707, "step": 23950 }, { "epoch": 0.1505941508062774, "grad_norm": 8.332359313964844, "learning_rate": 1.9000528067190264e-05, "loss": 2.2856, "step": 23960 }, { "epoch": 0.15065700312297448, "grad_norm": 7.63896369934082, "learning_rate": 1.900010896624561e-05, "loss": 2.1809, "step": 23970 }, { "epoch": 0.1507198554396716, "grad_norm": 7.593400001525879, "learning_rate": 1.8999689865300958e-05, "loss": 1.9636, "step": 23980 }, { "epoch": 0.1507827077563687, "grad_norm": 7.072235584259033, "learning_rate": 1.8999270764356305e-05, "loss": 2.3203, "step": 23990 }, { "epoch": 0.15084556007306582, "grad_norm": 7.944361209869385, "learning_rate": 1.8998851663411652e-05, "loss": 2.1457, "step": 24000 }, { "epoch": 0.15090841238976294, "grad_norm": 7.352777481079102, "learning_rate": 1.8998432562466996e-05, "loss": 1.9976, "step": 24010 }, { "epoch": 0.15097126470646005, "grad_norm": 9.210273742675781, "learning_rate": 1.8998013461522343e-05, "loss": 1.9532, "step": 24020 }, { "epoch": 0.15103411702315714, "grad_norm": 7.580131530761719, "learning_rate": 1.899759436057769e-05, "loss": 2.2155, "step": 24030 }, { "epoch": 0.15109696933985425, "grad_norm": 7.389312744140625, "learning_rate": 1.8997175259633037e-05, "loss": 2.3535, "step": 24040 }, { "epoch": 0.15115982165655137, "grad_norm": 7.961337089538574, "learning_rate": 1.8996756158688384e-05, "loss": 2.0597, "step": 24050 }, { "epoch": 0.15122267397324848, "grad_norm": 6.754824161529541, "learning_rate": 1.899633705774373e-05, "loss": 2.0535, "step": 24060 }, { "epoch": 0.1512855262899456, "grad_norm": 6.829716682434082, "learning_rate": 1.8995917956799078e-05, "loss": 1.9852, "step": 24070 }, { "epoch": 0.15134837860664271, "grad_norm": 8.027902603149414, "learning_rate": 1.8995498855854422e-05, "loss": 1.9553, "step": 24080 }, { "epoch": 0.1514112309233398, "grad_norm": 7.129761219024658, "learning_rate": 1.899507975490977e-05, "loss": 1.9923, "step": 24090 }, { "epoch": 0.15147408324003692, "grad_norm": 7.457757472991943, "learning_rate": 1.8994660653965116e-05, "loss": 2.1197, "step": 24100 }, { "epoch": 0.15153693555673403, "grad_norm": 7.700310230255127, "learning_rate": 1.8994241553020463e-05, "loss": 1.9089, "step": 24110 }, { "epoch": 0.15159978787343115, "grad_norm": 7.856703281402588, "learning_rate": 1.8993822452075807e-05, "loss": 2.2076, "step": 24120 }, { "epoch": 0.15166264019012826, "grad_norm": 7.630815029144287, "learning_rate": 1.8993403351131154e-05, "loss": 2.0582, "step": 24130 }, { "epoch": 0.15172549250682538, "grad_norm": 7.412759780883789, "learning_rate": 1.89929842501865e-05, "loss": 2.1895, "step": 24140 }, { "epoch": 0.1517883448235225, "grad_norm": 7.12389612197876, "learning_rate": 1.8992565149241848e-05, "loss": 2.2506, "step": 24150 }, { "epoch": 0.15185119714021958, "grad_norm": 7.502507209777832, "learning_rate": 1.8992146048297195e-05, "loss": 2.0753, "step": 24160 }, { "epoch": 0.1519140494569167, "grad_norm": 7.414200305938721, "learning_rate": 1.899172694735254e-05, "loss": 1.9224, "step": 24170 }, { "epoch": 0.1519769017736138, "grad_norm": 7.306906223297119, "learning_rate": 1.8991307846407886e-05, "loss": 2.0083, "step": 24180 }, { "epoch": 0.15203975409031092, "grad_norm": 7.0531134605407715, "learning_rate": 1.8990888745463233e-05, "loss": 1.91, "step": 24190 }, { "epoch": 0.15210260640700804, "grad_norm": 7.230875492095947, "learning_rate": 1.899046964451858e-05, "loss": 2.0589, "step": 24200 }, { "epoch": 0.15216545872370515, "grad_norm": 7.221288681030273, "learning_rate": 1.8990050543573927e-05, "loss": 2.1291, "step": 24210 }, { "epoch": 0.15222831104040224, "grad_norm": 8.530590057373047, "learning_rate": 1.8989631442629274e-05, "loss": 2.1829, "step": 24220 }, { "epoch": 0.15229116335709936, "grad_norm": 7.478540897369385, "learning_rate": 1.898921234168462e-05, "loss": 1.9379, "step": 24230 }, { "epoch": 0.15235401567379647, "grad_norm": 7.320249080657959, "learning_rate": 1.8988793240739968e-05, "loss": 2.2113, "step": 24240 }, { "epoch": 0.1524168679904936, "grad_norm": 7.91867733001709, "learning_rate": 1.8988374139795315e-05, "loss": 2.217, "step": 24250 }, { "epoch": 0.1524797203071907, "grad_norm": 7.058121204376221, "learning_rate": 1.898795503885066e-05, "loss": 2.0749, "step": 24260 }, { "epoch": 0.15254257262388782, "grad_norm": 6.411896228790283, "learning_rate": 1.8987535937906006e-05, "loss": 2.1421, "step": 24270 }, { "epoch": 0.15260542494058493, "grad_norm": 7.097553253173828, "learning_rate": 1.8987116836961353e-05, "loss": 2.0138, "step": 24280 }, { "epoch": 0.15266827725728202, "grad_norm": 10.559577941894531, "learning_rate": 1.89866977360167e-05, "loss": 2.0342, "step": 24290 }, { "epoch": 0.15273112957397914, "grad_norm": 7.3536458015441895, "learning_rate": 1.8986278635072044e-05, "loss": 2.0605, "step": 24300 }, { "epoch": 0.15279398189067625, "grad_norm": 6.632030963897705, "learning_rate": 1.898585953412739e-05, "loss": 2.2258, "step": 24310 }, { "epoch": 0.15285683420737337, "grad_norm": 7.509122848510742, "learning_rate": 1.8985440433182738e-05, "loss": 2.0435, "step": 24320 }, { "epoch": 0.15291968652407048, "grad_norm": 8.334511756896973, "learning_rate": 1.8985021332238085e-05, "loss": 2.0767, "step": 24330 }, { "epoch": 0.1529825388407676, "grad_norm": 7.24440860748291, "learning_rate": 1.8984602231293432e-05, "loss": 2.1765, "step": 24340 }, { "epoch": 0.15304539115746468, "grad_norm": 7.586240768432617, "learning_rate": 1.8984183130348776e-05, "loss": 2.113, "step": 24350 }, { "epoch": 0.1531082434741618, "grad_norm": 7.222861289978027, "learning_rate": 1.8983764029404123e-05, "loss": 1.922, "step": 24360 }, { "epoch": 0.1531710957908589, "grad_norm": 6.647038459777832, "learning_rate": 1.898334492845947e-05, "loss": 2.0281, "step": 24370 }, { "epoch": 0.15323394810755603, "grad_norm": 7.2084527015686035, "learning_rate": 1.8982925827514817e-05, "loss": 2.0836, "step": 24380 }, { "epoch": 0.15329680042425314, "grad_norm": 8.349883079528809, "learning_rate": 1.898250672657016e-05, "loss": 2.1746, "step": 24390 }, { "epoch": 0.15335965274095026, "grad_norm": 7.082326889038086, "learning_rate": 1.8982087625625508e-05, "loss": 2.065, "step": 24400 }, { "epoch": 0.15342250505764735, "grad_norm": 8.142436981201172, "learning_rate": 1.8981668524680855e-05, "loss": 2.0993, "step": 24410 }, { "epoch": 0.15348535737434446, "grad_norm": 7.3239922523498535, "learning_rate": 1.8981249423736202e-05, "loss": 1.9832, "step": 24420 }, { "epoch": 0.15354820969104158, "grad_norm": 8.010440826416016, "learning_rate": 1.898083032279155e-05, "loss": 2.214, "step": 24430 }, { "epoch": 0.1536110620077387, "grad_norm": 6.904417514801025, "learning_rate": 1.8980411221846896e-05, "loss": 2.1383, "step": 24440 }, { "epoch": 0.1536739143244358, "grad_norm": 6.834875583648682, "learning_rate": 1.8979992120902243e-05, "loss": 2.1952, "step": 24450 }, { "epoch": 0.15373676664113292, "grad_norm": 6.857359409332275, "learning_rate": 1.897957301995759e-05, "loss": 2.0035, "step": 24460 }, { "epoch": 0.15379961895783004, "grad_norm": 8.004855155944824, "learning_rate": 1.8979153919012937e-05, "loss": 2.2526, "step": 24470 }, { "epoch": 0.15386247127452712, "grad_norm": 7.017568111419678, "learning_rate": 1.897873481806828e-05, "loss": 1.8192, "step": 24480 }, { "epoch": 0.15392532359122424, "grad_norm": 7.01463508605957, "learning_rate": 1.8978315717123628e-05, "loss": 2.3115, "step": 24490 }, { "epoch": 0.15398817590792135, "grad_norm": 7.071869373321533, "learning_rate": 1.8977896616178975e-05, "loss": 2.2191, "step": 24500 }, { "epoch": 0.15405102822461847, "grad_norm": 7.331428050994873, "learning_rate": 1.8977477515234322e-05, "loss": 2.1485, "step": 24510 }, { "epoch": 0.15411388054131558, "grad_norm": 7.987422466278076, "learning_rate": 1.8977058414289666e-05, "loss": 2.0528, "step": 24520 }, { "epoch": 0.1541767328580127, "grad_norm": 7.370058536529541, "learning_rate": 1.8976639313345013e-05, "loss": 2.0887, "step": 24530 }, { "epoch": 0.1542395851747098, "grad_norm": 7.986361026763916, "learning_rate": 1.897622021240036e-05, "loss": 2.2906, "step": 24540 }, { "epoch": 0.1543024374914069, "grad_norm": 8.180559158325195, "learning_rate": 1.8975801111455707e-05, "loss": 2.2125, "step": 24550 }, { "epoch": 0.15436528980810402, "grad_norm": 7.562689304351807, "learning_rate": 1.8975382010511054e-05, "loss": 2.2225, "step": 24560 }, { "epoch": 0.15442814212480113, "grad_norm": 7.62192964553833, "learning_rate": 1.8974962909566398e-05, "loss": 2.1318, "step": 24570 }, { "epoch": 0.15449099444149825, "grad_norm": 7.785002708435059, "learning_rate": 1.8974543808621745e-05, "loss": 2.1521, "step": 24580 }, { "epoch": 0.15455384675819536, "grad_norm": 6.82577657699585, "learning_rate": 1.8974124707677092e-05, "loss": 2.0823, "step": 24590 }, { "epoch": 0.15461669907489248, "grad_norm": 7.56740140914917, "learning_rate": 1.897370560673244e-05, "loss": 2.2076, "step": 24600 }, { "epoch": 0.15467955139158956, "grad_norm": 6.195840358734131, "learning_rate": 1.8973286505787786e-05, "loss": 2.0197, "step": 24610 }, { "epoch": 0.15474240370828668, "grad_norm": 7.166738033294678, "learning_rate": 1.8972867404843133e-05, "loss": 2.1333, "step": 24620 }, { "epoch": 0.1548052560249838, "grad_norm": 8.683135032653809, "learning_rate": 1.897244830389848e-05, "loss": 2.0418, "step": 24630 }, { "epoch": 0.1548681083416809, "grad_norm": 8.308981895446777, "learning_rate": 1.8972029202953824e-05, "loss": 2.1947, "step": 24640 }, { "epoch": 0.15493096065837803, "grad_norm": 7.782462120056152, "learning_rate": 1.897161010200917e-05, "loss": 2.0108, "step": 24650 }, { "epoch": 0.15499381297507514, "grad_norm": 7.015844345092773, "learning_rate": 1.8971191001064518e-05, "loss": 2.0842, "step": 24660 }, { "epoch": 0.15505666529177223, "grad_norm": 8.82336139678955, "learning_rate": 1.8970771900119865e-05, "loss": 1.9346, "step": 24670 }, { "epoch": 0.15511951760846934, "grad_norm": 8.218207359313965, "learning_rate": 1.8970352799175212e-05, "loss": 2.0771, "step": 24680 }, { "epoch": 0.15518236992516646, "grad_norm": 7.350327968597412, "learning_rate": 1.896993369823056e-05, "loss": 2.0874, "step": 24690 }, { "epoch": 0.15524522224186357, "grad_norm": 7.691098213195801, "learning_rate": 1.8969514597285903e-05, "loss": 1.8805, "step": 24700 }, { "epoch": 0.1553080745585607, "grad_norm": 7.976955890655518, "learning_rate": 1.896909549634125e-05, "loss": 2.0366, "step": 24710 }, { "epoch": 0.1553709268752578, "grad_norm": 6.466163635253906, "learning_rate": 1.8968676395396597e-05, "loss": 2.0855, "step": 24720 }, { "epoch": 0.1554337791919549, "grad_norm": 6.638965606689453, "learning_rate": 1.8968257294451944e-05, "loss": 2.2471, "step": 24730 }, { "epoch": 0.155496631508652, "grad_norm": 6.676800727844238, "learning_rate": 1.8967838193507288e-05, "loss": 2.2198, "step": 24740 }, { "epoch": 0.15555948382534912, "grad_norm": 7.223644256591797, "learning_rate": 1.8967419092562635e-05, "loss": 2.1861, "step": 24750 }, { "epoch": 0.15562233614204624, "grad_norm": 7.7987589836120605, "learning_rate": 1.8966999991617982e-05, "loss": 2.1355, "step": 24760 }, { "epoch": 0.15568518845874335, "grad_norm": 8.250946998596191, "learning_rate": 1.896658089067333e-05, "loss": 2.149, "step": 24770 }, { "epoch": 0.15574804077544047, "grad_norm": 8.638188362121582, "learning_rate": 1.8966161789728676e-05, "loss": 2.0937, "step": 24780 }, { "epoch": 0.15581089309213758, "grad_norm": 7.171464443206787, "learning_rate": 1.896574268878402e-05, "loss": 2.1431, "step": 24790 }, { "epoch": 0.15587374540883467, "grad_norm": 8.648698806762695, "learning_rate": 1.8965323587839367e-05, "loss": 2.1714, "step": 24800 }, { "epoch": 0.15593659772553178, "grad_norm": 7.166804313659668, "learning_rate": 1.8964904486894714e-05, "loss": 2.1407, "step": 24810 }, { "epoch": 0.1559994500422289, "grad_norm": 7.397407531738281, "learning_rate": 1.896448538595006e-05, "loss": 2.0526, "step": 24820 }, { "epoch": 0.15606230235892601, "grad_norm": 7.774376392364502, "learning_rate": 1.8964066285005408e-05, "loss": 1.9338, "step": 24830 }, { "epoch": 0.15612515467562313, "grad_norm": 7.6225457191467285, "learning_rate": 1.8963647184060755e-05, "loss": 1.7409, "step": 24840 }, { "epoch": 0.15618800699232024, "grad_norm": 11.696316719055176, "learning_rate": 1.8963228083116102e-05, "loss": 2.2644, "step": 24850 }, { "epoch": 0.15625085930901733, "grad_norm": 8.621893882751465, "learning_rate": 1.896280898217145e-05, "loss": 2.0853, "step": 24860 }, { "epoch": 0.15631371162571445, "grad_norm": 9.359789848327637, "learning_rate": 1.8962389881226796e-05, "loss": 2.2944, "step": 24870 }, { "epoch": 0.15637656394241156, "grad_norm": 7.055229187011719, "learning_rate": 1.896197078028214e-05, "loss": 1.7894, "step": 24880 }, { "epoch": 0.15643941625910868, "grad_norm": 7.7220258712768555, "learning_rate": 1.8961551679337487e-05, "loss": 1.9412, "step": 24890 }, { "epoch": 0.1565022685758058, "grad_norm": 8.135756492614746, "learning_rate": 1.8961132578392834e-05, "loss": 1.9745, "step": 24900 }, { "epoch": 0.1565651208925029, "grad_norm": 7.712810039520264, "learning_rate": 1.896071347744818e-05, "loss": 2.1804, "step": 24910 }, { "epoch": 0.1566279732092, "grad_norm": 7.599367618560791, "learning_rate": 1.8960294376503525e-05, "loss": 2.1436, "step": 24920 }, { "epoch": 0.1566908255258971, "grad_norm": 5.9962687492370605, "learning_rate": 1.8959875275558872e-05, "loss": 2.0119, "step": 24930 }, { "epoch": 0.15675367784259422, "grad_norm": 8.699943542480469, "learning_rate": 1.895945617461422e-05, "loss": 1.9338, "step": 24940 }, { "epoch": 0.15681653015929134, "grad_norm": 8.431902885437012, "learning_rate": 1.8959037073669566e-05, "loss": 2.1374, "step": 24950 }, { "epoch": 0.15687938247598845, "grad_norm": 7.762973785400391, "learning_rate": 1.8958617972724913e-05, "loss": 2.0149, "step": 24960 }, { "epoch": 0.15694223479268557, "grad_norm": 6.656715393066406, "learning_rate": 1.8958198871780257e-05, "loss": 2.2427, "step": 24970 }, { "epoch": 0.15700508710938268, "grad_norm": 7.184601306915283, "learning_rate": 1.8957779770835604e-05, "loss": 2.0844, "step": 24980 }, { "epoch": 0.15706793942607977, "grad_norm": 7.309114933013916, "learning_rate": 1.895736066989095e-05, "loss": 1.9691, "step": 24990 }, { "epoch": 0.1571307917427769, "grad_norm": 7.920795917510986, "learning_rate": 1.8956941568946298e-05, "loss": 2.2264, "step": 25000 }, { "epoch": 0.157193644059474, "grad_norm": 7.16301965713501, "learning_rate": 1.8956522468001642e-05, "loss": 2.1716, "step": 25010 }, { "epoch": 0.15725649637617112, "grad_norm": 8.722789764404297, "learning_rate": 1.895610336705699e-05, "loss": 2.012, "step": 25020 }, { "epoch": 0.15731934869286823, "grad_norm": 7.967345237731934, "learning_rate": 1.8955684266112336e-05, "loss": 2.0114, "step": 25030 }, { "epoch": 0.15738220100956535, "grad_norm": 8.800018310546875, "learning_rate": 1.8955265165167683e-05, "loss": 1.9146, "step": 25040 }, { "epoch": 0.15744505332626244, "grad_norm": 8.132065773010254, "learning_rate": 1.895484606422303e-05, "loss": 1.8465, "step": 25050 }, { "epoch": 0.15750790564295955, "grad_norm": 7.073143005371094, "learning_rate": 1.8954426963278377e-05, "loss": 2.1489, "step": 25060 }, { "epoch": 0.15757075795965667, "grad_norm": 7.072580814361572, "learning_rate": 1.8954007862333724e-05, "loss": 2.2444, "step": 25070 }, { "epoch": 0.15763361027635378, "grad_norm": 6.578721523284912, "learning_rate": 1.895358876138907e-05, "loss": 2.2326, "step": 25080 }, { "epoch": 0.1576964625930509, "grad_norm": 7.9277024269104, "learning_rate": 1.895316966044442e-05, "loss": 2.2134, "step": 25090 }, { "epoch": 0.157759314909748, "grad_norm": 8.125829696655273, "learning_rate": 1.8952750559499762e-05, "loss": 2.216, "step": 25100 }, { "epoch": 0.15782216722644513, "grad_norm": 13.079134941101074, "learning_rate": 1.895233145855511e-05, "loss": 2.1439, "step": 25110 }, { "epoch": 0.1578850195431422, "grad_norm": 7.656450271606445, "learning_rate": 1.8951912357610456e-05, "loss": 2.097, "step": 25120 }, { "epoch": 0.15794787185983933, "grad_norm": 6.691035270690918, "learning_rate": 1.8951493256665803e-05, "loss": 1.9697, "step": 25130 }, { "epoch": 0.15801072417653644, "grad_norm": 7.895198345184326, "learning_rate": 1.8951074155721147e-05, "loss": 2.3009, "step": 25140 }, { "epoch": 0.15807357649323356, "grad_norm": 8.710088729858398, "learning_rate": 1.8950655054776494e-05, "loss": 2.0514, "step": 25150 }, { "epoch": 0.15813642880993067, "grad_norm": 7.562583923339844, "learning_rate": 1.895023595383184e-05, "loss": 2.0919, "step": 25160 }, { "epoch": 0.1581992811266278, "grad_norm": 7.830014705657959, "learning_rate": 1.8949816852887188e-05, "loss": 2.3568, "step": 25170 }, { "epoch": 0.15826213344332488, "grad_norm": 6.225578784942627, "learning_rate": 1.8949397751942535e-05, "loss": 2.197, "step": 25180 }, { "epoch": 0.158324985760022, "grad_norm": 9.283886909484863, "learning_rate": 1.894897865099788e-05, "loss": 2.2077, "step": 25190 }, { "epoch": 0.1583878380767191, "grad_norm": 7.862932205200195, "learning_rate": 1.8948559550053226e-05, "loss": 2.0121, "step": 25200 }, { "epoch": 0.15845069039341622, "grad_norm": 7.057699680328369, "learning_rate": 1.8948140449108573e-05, "loss": 2.1178, "step": 25210 }, { "epoch": 0.15851354271011334, "grad_norm": 6.624111652374268, "learning_rate": 1.894772134816392e-05, "loss": 2.1101, "step": 25220 }, { "epoch": 0.15857639502681045, "grad_norm": 7.000601768493652, "learning_rate": 1.8947302247219267e-05, "loss": 2.126, "step": 25230 }, { "epoch": 0.15863924734350754, "grad_norm": 7.766676902770996, "learning_rate": 1.8946883146274614e-05, "loss": 2.0739, "step": 25240 }, { "epoch": 0.15870209966020465, "grad_norm": 7.2908034324646, "learning_rate": 1.894646404532996e-05, "loss": 1.8524, "step": 25250 }, { "epoch": 0.15876495197690177, "grad_norm": 6.436234951019287, "learning_rate": 1.8946044944385305e-05, "loss": 1.7905, "step": 25260 }, { "epoch": 0.15882780429359888, "grad_norm": 7.536283493041992, "learning_rate": 1.8945625843440652e-05, "loss": 2.101, "step": 25270 }, { "epoch": 0.158890656610296, "grad_norm": 7.62872838973999, "learning_rate": 1.8945206742496e-05, "loss": 2.0949, "step": 25280 }, { "epoch": 0.15895350892699311, "grad_norm": 6.854900360107422, "learning_rate": 1.8944787641551346e-05, "loss": 2.0687, "step": 25290 }, { "epoch": 0.15901636124369023, "grad_norm": 7.434235572814941, "learning_rate": 1.8944368540606693e-05, "loss": 2.0897, "step": 25300 }, { "epoch": 0.15907921356038732, "grad_norm": 6.923051357269287, "learning_rate": 1.894394943966204e-05, "loss": 1.8295, "step": 25310 }, { "epoch": 0.15914206587708443, "grad_norm": 8.220632553100586, "learning_rate": 1.8943530338717384e-05, "loss": 2.24, "step": 25320 }, { "epoch": 0.15920491819378155, "grad_norm": 7.645471096038818, "learning_rate": 1.894311123777273e-05, "loss": 2.0898, "step": 25330 }, { "epoch": 0.15926777051047866, "grad_norm": 8.202953338623047, "learning_rate": 1.8942692136828078e-05, "loss": 2.1, "step": 25340 }, { "epoch": 0.15933062282717578, "grad_norm": 6.860551834106445, "learning_rate": 1.8942273035883425e-05, "loss": 2.0844, "step": 25350 }, { "epoch": 0.1593934751438729, "grad_norm": 6.949239730834961, "learning_rate": 1.8941853934938772e-05, "loss": 2.1234, "step": 25360 }, { "epoch": 0.15945632746056998, "grad_norm": 6.777586460113525, "learning_rate": 1.8941434833994116e-05, "loss": 1.9905, "step": 25370 }, { "epoch": 0.1595191797772671, "grad_norm": 7.5527825355529785, "learning_rate": 1.8941015733049463e-05, "loss": 2.065, "step": 25380 }, { "epoch": 0.1595820320939642, "grad_norm": 7.536026954650879, "learning_rate": 1.894059663210481e-05, "loss": 1.9825, "step": 25390 }, { "epoch": 0.15964488441066133, "grad_norm": 7.107115745544434, "learning_rate": 1.8940177531160157e-05, "loss": 1.9168, "step": 25400 }, { "epoch": 0.15970773672735844, "grad_norm": 7.15897274017334, "learning_rate": 1.89397584302155e-05, "loss": 1.8647, "step": 25410 }, { "epoch": 0.15977058904405556, "grad_norm": 6.944474220275879, "learning_rate": 1.8939339329270848e-05, "loss": 2.1414, "step": 25420 }, { "epoch": 0.15983344136075264, "grad_norm": 8.514678955078125, "learning_rate": 1.8938920228326195e-05, "loss": 2.1249, "step": 25430 }, { "epoch": 0.15989629367744976, "grad_norm": 9.144061088562012, "learning_rate": 1.8938501127381542e-05, "loss": 2.322, "step": 25440 }, { "epoch": 0.15995914599414687, "grad_norm": 7.6061692237854, "learning_rate": 1.893808202643689e-05, "loss": 2.0253, "step": 25450 }, { "epoch": 0.160021998310844, "grad_norm": 7.343978404998779, "learning_rate": 1.8937662925492236e-05, "loss": 2.1732, "step": 25460 }, { "epoch": 0.1600848506275411, "grad_norm": 6.9757490158081055, "learning_rate": 1.8937243824547583e-05, "loss": 2.1448, "step": 25470 }, { "epoch": 0.16014770294423822, "grad_norm": 7.120965003967285, "learning_rate": 1.893682472360293e-05, "loss": 1.9994, "step": 25480 }, { "epoch": 0.16021055526093533, "grad_norm": 7.839221954345703, "learning_rate": 1.8936405622658278e-05, "loss": 2.1946, "step": 25490 }, { "epoch": 0.16027340757763242, "grad_norm": 7.733880519866943, "learning_rate": 1.893598652171362e-05, "loss": 2.0072, "step": 25500 }, { "epoch": 0.16033625989432954, "grad_norm": 9.782451629638672, "learning_rate": 1.8935567420768968e-05, "loss": 2.1506, "step": 25510 }, { "epoch": 0.16039911221102665, "grad_norm": 6.857192039489746, "learning_rate": 1.8935148319824315e-05, "loss": 1.9458, "step": 25520 }, { "epoch": 0.16046196452772377, "grad_norm": 7.340935707092285, "learning_rate": 1.8934729218879662e-05, "loss": 2.0559, "step": 25530 }, { "epoch": 0.16052481684442088, "grad_norm": 7.422693252563477, "learning_rate": 1.8934310117935006e-05, "loss": 2.192, "step": 25540 }, { "epoch": 0.160587669161118, "grad_norm": 9.500493049621582, "learning_rate": 1.8933891016990353e-05, "loss": 2.1425, "step": 25550 }, { "epoch": 0.16065052147781508, "grad_norm": 7.396688938140869, "learning_rate": 1.89334719160457e-05, "loss": 2.3758, "step": 25560 }, { "epoch": 0.1607133737945122, "grad_norm": 6.810682773590088, "learning_rate": 1.8933052815101047e-05, "loss": 1.9584, "step": 25570 }, { "epoch": 0.1607762261112093, "grad_norm": 7.831878662109375, "learning_rate": 1.8932633714156394e-05, "loss": 2.265, "step": 25580 }, { "epoch": 0.16083907842790643, "grad_norm": 9.585444450378418, "learning_rate": 1.8932214613211738e-05, "loss": 2.083, "step": 25590 }, { "epoch": 0.16090193074460354, "grad_norm": 6.930378437042236, "learning_rate": 1.8931795512267085e-05, "loss": 2.0618, "step": 25600 }, { "epoch": 0.16096478306130066, "grad_norm": 8.438226699829102, "learning_rate": 1.8931376411322432e-05, "loss": 1.9928, "step": 25610 }, { "epoch": 0.16102763537799777, "grad_norm": 6.970348834991455, "learning_rate": 1.893095731037778e-05, "loss": 1.9087, "step": 25620 }, { "epoch": 0.16109048769469486, "grad_norm": 8.59546184539795, "learning_rate": 1.8930538209433126e-05, "loss": 2.2538, "step": 25630 }, { "epoch": 0.16115334001139198, "grad_norm": 6.681682586669922, "learning_rate": 1.893011910848847e-05, "loss": 2.2343, "step": 25640 }, { "epoch": 0.1612161923280891, "grad_norm": 7.520928382873535, "learning_rate": 1.8929700007543817e-05, "loss": 2.2066, "step": 25650 }, { "epoch": 0.1612790446447862, "grad_norm": 7.324418544769287, "learning_rate": 1.8929280906599164e-05, "loss": 2.0636, "step": 25660 }, { "epoch": 0.16134189696148332, "grad_norm": 8.397371292114258, "learning_rate": 1.892886180565451e-05, "loss": 2.1226, "step": 25670 }, { "epoch": 0.16140474927818044, "grad_norm": 7.918264865875244, "learning_rate": 1.892844270470986e-05, "loss": 2.085, "step": 25680 }, { "epoch": 0.16146760159487752, "grad_norm": 7.92132568359375, "learning_rate": 1.8928023603765205e-05, "loss": 2.0146, "step": 25690 }, { "epoch": 0.16153045391157464, "grad_norm": 8.547807693481445, "learning_rate": 1.8927604502820552e-05, "loss": 1.9715, "step": 25700 }, { "epoch": 0.16159330622827175, "grad_norm": 7.091030597686768, "learning_rate": 1.89271854018759e-05, "loss": 2.2613, "step": 25710 }, { "epoch": 0.16165615854496887, "grad_norm": 6.6840081214904785, "learning_rate": 1.8926766300931243e-05, "loss": 2.1982, "step": 25720 }, { "epoch": 0.16171901086166598, "grad_norm": 7.225265979766846, "learning_rate": 1.892634719998659e-05, "loss": 2.0174, "step": 25730 }, { "epoch": 0.1617818631783631, "grad_norm": 7.222609043121338, "learning_rate": 1.8925928099041937e-05, "loss": 1.9866, "step": 25740 }, { "epoch": 0.1618447154950602, "grad_norm": 7.990882396697998, "learning_rate": 1.8925508998097284e-05, "loss": 2.0287, "step": 25750 }, { "epoch": 0.1619075678117573, "grad_norm": 6.329669952392578, "learning_rate": 1.8925089897152628e-05, "loss": 2.1078, "step": 25760 }, { "epoch": 0.16197042012845442, "grad_norm": 8.103155136108398, "learning_rate": 1.8924670796207975e-05, "loss": 2.0957, "step": 25770 }, { "epoch": 0.16203327244515153, "grad_norm": 8.01485538482666, "learning_rate": 1.8924251695263322e-05, "loss": 2.0673, "step": 25780 }, { "epoch": 0.16209612476184865, "grad_norm": 6.672882080078125, "learning_rate": 1.892383259431867e-05, "loss": 2.2367, "step": 25790 }, { "epoch": 0.16215897707854576, "grad_norm": 7.1393561363220215, "learning_rate": 1.8923413493374016e-05, "loss": 2.2425, "step": 25800 }, { "epoch": 0.16222182939524288, "grad_norm": 7.2413458824157715, "learning_rate": 1.892299439242936e-05, "loss": 1.9738, "step": 25810 }, { "epoch": 0.16228468171193997, "grad_norm": 7.2407121658325195, "learning_rate": 1.8922575291484707e-05, "loss": 2.1412, "step": 25820 }, { "epoch": 0.16234753402863708, "grad_norm": 8.259404182434082, "learning_rate": 1.8922156190540054e-05, "loss": 2.3482, "step": 25830 }, { "epoch": 0.1624103863453342, "grad_norm": 7.328073501586914, "learning_rate": 1.89217370895954e-05, "loss": 2.0186, "step": 25840 }, { "epoch": 0.1624732386620313, "grad_norm": 7.726652145385742, "learning_rate": 1.892131798865075e-05, "loss": 2.1059, "step": 25850 }, { "epoch": 0.16253609097872843, "grad_norm": 7.808199882507324, "learning_rate": 1.8920898887706095e-05, "loss": 2.226, "step": 25860 }, { "epoch": 0.16259894329542554, "grad_norm": 7.739694118499756, "learning_rate": 1.8920479786761442e-05, "loss": 2.3565, "step": 25870 }, { "epoch": 0.16266179561212263, "grad_norm": 8.303359031677246, "learning_rate": 1.892006068581679e-05, "loss": 2.2056, "step": 25880 }, { "epoch": 0.16272464792881974, "grad_norm": 9.105769157409668, "learning_rate": 1.8919641584872133e-05, "loss": 1.9957, "step": 25890 }, { "epoch": 0.16278750024551686, "grad_norm": 7.577997207641602, "learning_rate": 1.891922248392748e-05, "loss": 2.0289, "step": 25900 }, { "epoch": 0.16285035256221397, "grad_norm": 8.940987586975098, "learning_rate": 1.8918803382982827e-05, "loss": 2.0189, "step": 25910 }, { "epoch": 0.1629132048789111, "grad_norm": 7.030361175537109, "learning_rate": 1.8918384282038174e-05, "loss": 2.2427, "step": 25920 }, { "epoch": 0.1629760571956082, "grad_norm": 7.728364944458008, "learning_rate": 1.891796518109352e-05, "loss": 2.1514, "step": 25930 }, { "epoch": 0.16303890951230532, "grad_norm": 6.962830543518066, "learning_rate": 1.8917546080148865e-05, "loss": 2.1062, "step": 25940 }, { "epoch": 0.1631017618290024, "grad_norm": 7.344377517700195, "learning_rate": 1.8917126979204212e-05, "loss": 1.8316, "step": 25950 }, { "epoch": 0.16316461414569952, "grad_norm": 7.470015048980713, "learning_rate": 1.891670787825956e-05, "loss": 1.8542, "step": 25960 }, { "epoch": 0.16322746646239664, "grad_norm": 7.419188022613525, "learning_rate": 1.8916288777314906e-05, "loss": 2.1597, "step": 25970 }, { "epoch": 0.16329031877909375, "grad_norm": 8.001127243041992, "learning_rate": 1.8915869676370253e-05, "loss": 1.8469, "step": 25980 }, { "epoch": 0.16335317109579087, "grad_norm": 7.438764572143555, "learning_rate": 1.8915450575425597e-05, "loss": 2.0946, "step": 25990 }, { "epoch": 0.16341602341248798, "grad_norm": 7.521275997161865, "learning_rate": 1.8915031474480944e-05, "loss": 2.2205, "step": 26000 }, { "epoch": 0.16347887572918507, "grad_norm": 7.863826751708984, "learning_rate": 1.891461237353629e-05, "loss": 1.8943, "step": 26010 }, { "epoch": 0.16354172804588218, "grad_norm": 8.256706237792969, "learning_rate": 1.891419327259164e-05, "loss": 2.2367, "step": 26020 }, { "epoch": 0.1636045803625793, "grad_norm": 6.641922950744629, "learning_rate": 1.8913774171646982e-05, "loss": 2.0092, "step": 26030 }, { "epoch": 0.16366743267927641, "grad_norm": 7.038200378417969, "learning_rate": 1.891335507070233e-05, "loss": 2.1011, "step": 26040 }, { "epoch": 0.16373028499597353, "grad_norm": 7.9291090965271, "learning_rate": 1.8912935969757676e-05, "loss": 2.2102, "step": 26050 }, { "epoch": 0.16379313731267064, "grad_norm": 7.497622489929199, "learning_rate": 1.8912516868813023e-05, "loss": 2.0191, "step": 26060 }, { "epoch": 0.16385598962936773, "grad_norm": 8.030279159545898, "learning_rate": 1.891209776786837e-05, "loss": 1.9894, "step": 26070 }, { "epoch": 0.16391884194606485, "grad_norm": 7.592596054077148, "learning_rate": 1.8911678666923717e-05, "loss": 2.0592, "step": 26080 }, { "epoch": 0.16398169426276196, "grad_norm": 7.6316304206848145, "learning_rate": 1.8911259565979064e-05, "loss": 2.0823, "step": 26090 }, { "epoch": 0.16404454657945908, "grad_norm": 7.14187479019165, "learning_rate": 1.891084046503441e-05, "loss": 1.6933, "step": 26100 }, { "epoch": 0.1641073988961562, "grad_norm": 6.482819080352783, "learning_rate": 1.891042136408976e-05, "loss": 2.0323, "step": 26110 }, { "epoch": 0.1641702512128533, "grad_norm": 8.437029838562012, "learning_rate": 1.8910002263145102e-05, "loss": 1.9024, "step": 26120 }, { "epoch": 0.16423310352955042, "grad_norm": 7.370238780975342, "learning_rate": 1.890958316220045e-05, "loss": 2.0056, "step": 26130 }, { "epoch": 0.1642959558462475, "grad_norm": 7.479917526245117, "learning_rate": 1.8909164061255796e-05, "loss": 2.052, "step": 26140 }, { "epoch": 0.16435880816294463, "grad_norm": 8.088809967041016, "learning_rate": 1.8908744960311144e-05, "loss": 1.9173, "step": 26150 }, { "epoch": 0.16442166047964174, "grad_norm": 7.7105560302734375, "learning_rate": 1.8908325859366487e-05, "loss": 2.1858, "step": 26160 }, { "epoch": 0.16448451279633886, "grad_norm": 7.6701836585998535, "learning_rate": 1.8907906758421834e-05, "loss": 1.9759, "step": 26170 }, { "epoch": 0.16454736511303597, "grad_norm": 7.496591091156006, "learning_rate": 1.890748765747718e-05, "loss": 2.0881, "step": 26180 }, { "epoch": 0.16461021742973309, "grad_norm": 8.055353164672852, "learning_rate": 1.890706855653253e-05, "loss": 2.1669, "step": 26190 }, { "epoch": 0.16467306974643017, "grad_norm": 7.290207386016846, "learning_rate": 1.8906649455587875e-05, "loss": 2.2242, "step": 26200 }, { "epoch": 0.1647359220631273, "grad_norm": 7.497858047485352, "learning_rate": 1.890623035464322e-05, "loss": 1.9517, "step": 26210 }, { "epoch": 0.1647987743798244, "grad_norm": 9.148297309875488, "learning_rate": 1.8905811253698566e-05, "loss": 2.1669, "step": 26220 }, { "epoch": 0.16486162669652152, "grad_norm": 7.774691104888916, "learning_rate": 1.8905392152753913e-05, "loss": 2.1396, "step": 26230 }, { "epoch": 0.16492447901321863, "grad_norm": 7.218595504760742, "learning_rate": 1.890497305180926e-05, "loss": 2.0969, "step": 26240 }, { "epoch": 0.16498733132991575, "grad_norm": 7.237490653991699, "learning_rate": 1.8904553950864607e-05, "loss": 1.8568, "step": 26250 }, { "epoch": 0.16505018364661284, "grad_norm": 7.246071815490723, "learning_rate": 1.8904134849919955e-05, "loss": 2.1812, "step": 26260 }, { "epoch": 0.16511303596330995, "grad_norm": 8.438283920288086, "learning_rate": 1.8903715748975298e-05, "loss": 2.1437, "step": 26270 }, { "epoch": 0.16517588828000707, "grad_norm": 6.827730655670166, "learning_rate": 1.8903296648030645e-05, "loss": 1.963, "step": 26280 }, { "epoch": 0.16523874059670418, "grad_norm": 8.639034271240234, "learning_rate": 1.8902877547085992e-05, "loss": 2.0042, "step": 26290 }, { "epoch": 0.1653015929134013, "grad_norm": 6.844017028808594, "learning_rate": 1.890245844614134e-05, "loss": 2.1669, "step": 26300 }, { "epoch": 0.1653644452300984, "grad_norm": 6.324168682098389, "learning_rate": 1.8902039345196686e-05, "loss": 2.0545, "step": 26310 }, { "epoch": 0.16542729754679553, "grad_norm": 6.322104454040527, "learning_rate": 1.8901620244252034e-05, "loss": 1.9711, "step": 26320 }, { "epoch": 0.1654901498634926, "grad_norm": 12.182509422302246, "learning_rate": 1.890120114330738e-05, "loss": 1.9883, "step": 26330 }, { "epoch": 0.16555300218018973, "grad_norm": 7.797282695770264, "learning_rate": 1.8900782042362724e-05, "loss": 2.1898, "step": 26340 }, { "epoch": 0.16561585449688684, "grad_norm": 7.0130615234375, "learning_rate": 1.890036294141807e-05, "loss": 1.9196, "step": 26350 }, { "epoch": 0.16567870681358396, "grad_norm": 6.654932498931885, "learning_rate": 1.889994384047342e-05, "loss": 2.0265, "step": 26360 }, { "epoch": 0.16574155913028107, "grad_norm": 8.605035781860352, "learning_rate": 1.8899524739528766e-05, "loss": 2.2541, "step": 26370 }, { "epoch": 0.1658044114469782, "grad_norm": 6.103670120239258, "learning_rate": 1.889910563858411e-05, "loss": 2.1181, "step": 26380 }, { "epoch": 0.16586726376367528, "grad_norm": 7.237303733825684, "learning_rate": 1.8898686537639456e-05, "loss": 2.0054, "step": 26390 }, { "epoch": 0.1659301160803724, "grad_norm": 7.135437488555908, "learning_rate": 1.8898267436694803e-05, "loss": 1.9511, "step": 26400 }, { "epoch": 0.1659929683970695, "grad_norm": 7.38407039642334, "learning_rate": 1.889784833575015e-05, "loss": 1.8589, "step": 26410 }, { "epoch": 0.16605582071376662, "grad_norm": 6.739563465118408, "learning_rate": 1.8897429234805497e-05, "loss": 2.1611, "step": 26420 }, { "epoch": 0.16611867303046374, "grad_norm": 8.108920097351074, "learning_rate": 1.889701013386084e-05, "loss": 2.0602, "step": 26430 }, { "epoch": 0.16618152534716085, "grad_norm": 8.454718589782715, "learning_rate": 1.8896591032916188e-05, "loss": 2.1088, "step": 26440 }, { "epoch": 0.16624437766385797, "grad_norm": 6.543862819671631, "learning_rate": 1.8896171931971535e-05, "loss": 2.1595, "step": 26450 }, { "epoch": 0.16630722998055505, "grad_norm": 6.936466217041016, "learning_rate": 1.8895752831026882e-05, "loss": 1.9975, "step": 26460 }, { "epoch": 0.16637008229725217, "grad_norm": 8.221345901489258, "learning_rate": 1.889533373008223e-05, "loss": 2.0402, "step": 26470 }, { "epoch": 0.16643293461394928, "grad_norm": 8.026082038879395, "learning_rate": 1.8894914629137577e-05, "loss": 2.0939, "step": 26480 }, { "epoch": 0.1664957869306464, "grad_norm": 7.128113269805908, "learning_rate": 1.8894495528192924e-05, "loss": 1.9315, "step": 26490 }, { "epoch": 0.16655863924734352, "grad_norm": 7.8659491539001465, "learning_rate": 1.889407642724827e-05, "loss": 2.0228, "step": 26500 }, { "epoch": 0.16662149156404063, "grad_norm": 6.884822368621826, "learning_rate": 1.8893657326303618e-05, "loss": 2.2244, "step": 26510 }, { "epoch": 0.16668434388073772, "grad_norm": 7.679152488708496, "learning_rate": 1.889323822535896e-05, "loss": 2.0107, "step": 26520 }, { "epoch": 0.16674719619743483, "grad_norm": 7.619312763214111, "learning_rate": 1.889281912441431e-05, "loss": 1.9638, "step": 26530 }, { "epoch": 0.16681004851413195, "grad_norm": 7.260880947113037, "learning_rate": 1.8892400023469656e-05, "loss": 2.0733, "step": 26540 }, { "epoch": 0.16687290083082906, "grad_norm": 7.044139385223389, "learning_rate": 1.8891980922525003e-05, "loss": 2.1626, "step": 26550 }, { "epoch": 0.16693575314752618, "grad_norm": 7.697073936462402, "learning_rate": 1.8891561821580346e-05, "loss": 1.9792, "step": 26560 }, { "epoch": 0.1669986054642233, "grad_norm": 7.447133541107178, "learning_rate": 1.8891142720635693e-05, "loss": 2.0448, "step": 26570 }, { "epoch": 0.16706145778092038, "grad_norm": 7.185406684875488, "learning_rate": 1.889072361969104e-05, "loss": 2.2215, "step": 26580 }, { "epoch": 0.1671243100976175, "grad_norm": 8.131444931030273, "learning_rate": 1.8890304518746388e-05, "loss": 2.1474, "step": 26590 }, { "epoch": 0.1671871624143146, "grad_norm": 6.428308010101318, "learning_rate": 1.8889885417801735e-05, "loss": 2.1216, "step": 26600 }, { "epoch": 0.16725001473101173, "grad_norm": 7.927680969238281, "learning_rate": 1.8889466316857078e-05, "loss": 2.0213, "step": 26610 }, { "epoch": 0.16731286704770884, "grad_norm": 7.277192115783691, "learning_rate": 1.8889047215912425e-05, "loss": 1.8976, "step": 26620 }, { "epoch": 0.16737571936440596, "grad_norm": 7.823034763336182, "learning_rate": 1.8888628114967772e-05, "loss": 1.9142, "step": 26630 }, { "epoch": 0.16743857168110307, "grad_norm": 7.691032886505127, "learning_rate": 1.888820901402312e-05, "loss": 2.1413, "step": 26640 }, { "epoch": 0.16750142399780016, "grad_norm": 7.83949613571167, "learning_rate": 1.8887789913078463e-05, "loss": 2.1934, "step": 26650 }, { "epoch": 0.16756427631449727, "grad_norm": 7.6085944175720215, "learning_rate": 1.888737081213381e-05, "loss": 2.0881, "step": 26660 }, { "epoch": 0.1676271286311944, "grad_norm": 7.7130446434021, "learning_rate": 1.8886951711189157e-05, "loss": 2.0636, "step": 26670 }, { "epoch": 0.1676899809478915, "grad_norm": 6.7432026863098145, "learning_rate": 1.8886532610244504e-05, "loss": 1.8504, "step": 26680 }, { "epoch": 0.16775283326458862, "grad_norm": 7.776494026184082, "learning_rate": 1.888611350929985e-05, "loss": 2.0823, "step": 26690 }, { "epoch": 0.16781568558128573, "grad_norm": 6.37518835067749, "learning_rate": 1.88856944083552e-05, "loss": 2.0266, "step": 26700 }, { "epoch": 0.16787853789798282, "grad_norm": 7.716270923614502, "learning_rate": 1.8885275307410546e-05, "loss": 2.1018, "step": 26710 }, { "epoch": 0.16794139021467994, "grad_norm": 7.457879543304443, "learning_rate": 1.8884856206465893e-05, "loss": 2.296, "step": 26720 }, { "epoch": 0.16800424253137705, "grad_norm": 7.101696014404297, "learning_rate": 1.888443710552124e-05, "loss": 2.1638, "step": 26730 }, { "epoch": 0.16806709484807417, "grad_norm": 7.180225372314453, "learning_rate": 1.8884018004576583e-05, "loss": 1.7523, "step": 26740 }, { "epoch": 0.16812994716477128, "grad_norm": 6.898594379425049, "learning_rate": 1.888359890363193e-05, "loss": 2.0864, "step": 26750 }, { "epoch": 0.1681927994814684, "grad_norm": 7.147555351257324, "learning_rate": 1.8883179802687278e-05, "loss": 1.9752, "step": 26760 }, { "epoch": 0.16825565179816548, "grad_norm": 7.030953884124756, "learning_rate": 1.8882760701742625e-05, "loss": 2.0757, "step": 26770 }, { "epoch": 0.1683185041148626, "grad_norm": 8.195552825927734, "learning_rate": 1.888234160079797e-05, "loss": 1.9025, "step": 26780 }, { "epoch": 0.16838135643155971, "grad_norm": 6.486485004425049, "learning_rate": 1.8881922499853315e-05, "loss": 2.0026, "step": 26790 }, { "epoch": 0.16844420874825683, "grad_norm": 7.766326427459717, "learning_rate": 1.8881503398908662e-05, "loss": 2.0227, "step": 26800 }, { "epoch": 0.16850706106495394, "grad_norm": 7.248443603515625, "learning_rate": 1.888108429796401e-05, "loss": 1.962, "step": 26810 }, { "epoch": 0.16856991338165106, "grad_norm": 7.111790180206299, "learning_rate": 1.8880665197019357e-05, "loss": 2.094, "step": 26820 }, { "epoch": 0.16863276569834817, "grad_norm": 7.6839118003845215, "learning_rate": 1.88802460960747e-05, "loss": 2.2033, "step": 26830 }, { "epoch": 0.16869561801504526, "grad_norm": 8.051858901977539, "learning_rate": 1.8879826995130047e-05, "loss": 2.1157, "step": 26840 }, { "epoch": 0.16875847033174238, "grad_norm": 8.392078399658203, "learning_rate": 1.8879407894185394e-05, "loss": 2.0272, "step": 26850 }, { "epoch": 0.1688213226484395, "grad_norm": 7.104002475738525, "learning_rate": 1.887898879324074e-05, "loss": 2.0796, "step": 26860 }, { "epoch": 0.1688841749651366, "grad_norm": 7.312976360321045, "learning_rate": 1.887856969229609e-05, "loss": 2.0547, "step": 26870 }, { "epoch": 0.16894702728183372, "grad_norm": 6.258914947509766, "learning_rate": 1.8878150591351436e-05, "loss": 2.2227, "step": 26880 }, { "epoch": 0.16900987959853084, "grad_norm": 7.952404499053955, "learning_rate": 1.887773149040678e-05, "loss": 2.2863, "step": 26890 }, { "epoch": 0.16907273191522793, "grad_norm": 7.923120975494385, "learning_rate": 1.8877312389462126e-05, "loss": 1.9709, "step": 26900 }, { "epoch": 0.16913558423192504, "grad_norm": 6.05596399307251, "learning_rate": 1.8876893288517473e-05, "loss": 2.0636, "step": 26910 }, { "epoch": 0.16919843654862216, "grad_norm": 7.628379821777344, "learning_rate": 1.887647418757282e-05, "loss": 2.0983, "step": 26920 }, { "epoch": 0.16926128886531927, "grad_norm": 7.406306743621826, "learning_rate": 1.8876055086628168e-05, "loss": 1.9923, "step": 26930 }, { "epoch": 0.16932414118201639, "grad_norm": 7.642539024353027, "learning_rate": 1.8875635985683515e-05, "loss": 2.0245, "step": 26940 }, { "epoch": 0.1693869934987135, "grad_norm": 7.147662162780762, "learning_rate": 1.8875216884738862e-05, "loss": 1.8431, "step": 26950 }, { "epoch": 0.16944984581541062, "grad_norm": 7.125799179077148, "learning_rate": 1.8874797783794205e-05, "loss": 2.195, "step": 26960 }, { "epoch": 0.1695126981321077, "grad_norm": 8.474396705627441, "learning_rate": 1.8874378682849552e-05, "loss": 2.0423, "step": 26970 }, { "epoch": 0.16957555044880482, "grad_norm": 7.183737754821777, "learning_rate": 1.88739595819049e-05, "loss": 1.7227, "step": 26980 }, { "epoch": 0.16963840276550193, "grad_norm": 7.125051975250244, "learning_rate": 1.8873540480960247e-05, "loss": 2.0083, "step": 26990 }, { "epoch": 0.16970125508219905, "grad_norm": 7.695666790008545, "learning_rate": 1.887312138001559e-05, "loss": 2.1497, "step": 27000 }, { "epoch": 0.16976410739889616, "grad_norm": 7.176787376403809, "learning_rate": 1.8872702279070937e-05, "loss": 2.3338, "step": 27010 }, { "epoch": 0.16982695971559328, "grad_norm": 7.642787933349609, "learning_rate": 1.8872283178126284e-05, "loss": 2.1084, "step": 27020 }, { "epoch": 0.16988981203229037, "grad_norm": 7.733269691467285, "learning_rate": 1.887186407718163e-05, "loss": 2.1504, "step": 27030 }, { "epoch": 0.16995266434898748, "grad_norm": 7.00707483291626, "learning_rate": 1.887144497623698e-05, "loss": 2.192, "step": 27040 }, { "epoch": 0.1700155166656846, "grad_norm": 6.551406383514404, "learning_rate": 1.8871025875292322e-05, "loss": 1.9258, "step": 27050 }, { "epoch": 0.1700783689823817, "grad_norm": 8.196138381958008, "learning_rate": 1.887060677434767e-05, "loss": 1.7894, "step": 27060 }, { "epoch": 0.17014122129907883, "grad_norm": 7.6168107986450195, "learning_rate": 1.8870187673403016e-05, "loss": 2.0308, "step": 27070 }, { "epoch": 0.17020407361577594, "grad_norm": 7.582335948944092, "learning_rate": 1.8869768572458363e-05, "loss": 2.0958, "step": 27080 }, { "epoch": 0.17026692593247303, "grad_norm": 7.733460903167725, "learning_rate": 1.886934947151371e-05, "loss": 2.0958, "step": 27090 }, { "epoch": 0.17032977824917014, "grad_norm": 7.207364082336426, "learning_rate": 1.8868930370569058e-05, "loss": 2.0868, "step": 27100 }, { "epoch": 0.17039263056586726, "grad_norm": 7.0060296058654785, "learning_rate": 1.8868511269624405e-05, "loss": 1.9792, "step": 27110 }, { "epoch": 0.17045548288256437, "grad_norm": 6.219759464263916, "learning_rate": 1.8868092168679752e-05, "loss": 2.0264, "step": 27120 }, { "epoch": 0.1705183351992615, "grad_norm": 7.041900634765625, "learning_rate": 1.88676730677351e-05, "loss": 2.0412, "step": 27130 }, { "epoch": 0.1705811875159586, "grad_norm": 7.1881890296936035, "learning_rate": 1.8867253966790443e-05, "loss": 1.8791, "step": 27140 }, { "epoch": 0.17064403983265572, "grad_norm": 7.2067341804504395, "learning_rate": 1.886683486584579e-05, "loss": 1.9835, "step": 27150 }, { "epoch": 0.1707068921493528, "grad_norm": 6.838316440582275, "learning_rate": 1.8866415764901137e-05, "loss": 1.8088, "step": 27160 }, { "epoch": 0.17076974446604992, "grad_norm": 6.98081111907959, "learning_rate": 1.8865996663956484e-05, "loss": 2.108, "step": 27170 }, { "epoch": 0.17083259678274704, "grad_norm": 7.362522125244141, "learning_rate": 1.8865577563011827e-05, "loss": 1.9587, "step": 27180 }, { "epoch": 0.17089544909944415, "grad_norm": 7.864029407501221, "learning_rate": 1.8865158462067174e-05, "loss": 2.0782, "step": 27190 }, { "epoch": 0.17095830141614127, "grad_norm": 6.886123180389404, "learning_rate": 1.886473936112252e-05, "loss": 1.8335, "step": 27200 }, { "epoch": 0.17102115373283838, "grad_norm": 6.821227550506592, "learning_rate": 1.886432026017787e-05, "loss": 2.0717, "step": 27210 }, { "epoch": 0.17108400604953547, "grad_norm": 7.68241548538208, "learning_rate": 1.8863901159233216e-05, "loss": 2.064, "step": 27220 }, { "epoch": 0.17114685836623258, "grad_norm": 6.796754837036133, "learning_rate": 1.886348205828856e-05, "loss": 2.0502, "step": 27230 }, { "epoch": 0.1712097106829297, "grad_norm": 6.333973407745361, "learning_rate": 1.8863062957343906e-05, "loss": 1.869, "step": 27240 }, { "epoch": 0.17127256299962682, "grad_norm": 9.181356430053711, "learning_rate": 1.8862643856399254e-05, "loss": 2.0765, "step": 27250 }, { "epoch": 0.17133541531632393, "grad_norm": 6.63329553604126, "learning_rate": 1.88622247554546e-05, "loss": 1.8724, "step": 27260 }, { "epoch": 0.17139826763302105, "grad_norm": 7.677600860595703, "learning_rate": 1.8861805654509944e-05, "loss": 1.8641, "step": 27270 }, { "epoch": 0.17146111994971816, "grad_norm": 8.482681274414062, "learning_rate": 1.886138655356529e-05, "loss": 1.818, "step": 27280 }, { "epoch": 0.17152397226641525, "grad_norm": 9.265092849731445, "learning_rate": 1.886096745262064e-05, "loss": 2.2261, "step": 27290 }, { "epoch": 0.17158682458311236, "grad_norm": 8.26068115234375, "learning_rate": 1.8860548351675985e-05, "loss": 1.9023, "step": 27300 }, { "epoch": 0.17164967689980948, "grad_norm": 7.694910526275635, "learning_rate": 1.8860129250731333e-05, "loss": 2.0018, "step": 27310 }, { "epoch": 0.1717125292165066, "grad_norm": 7.138001918792725, "learning_rate": 1.885971014978668e-05, "loss": 2.173, "step": 27320 }, { "epoch": 0.1717753815332037, "grad_norm": 6.843593597412109, "learning_rate": 1.8859291048842027e-05, "loss": 1.8752, "step": 27330 }, { "epoch": 0.17183823384990082, "grad_norm": 8.503833770751953, "learning_rate": 1.8858871947897374e-05, "loss": 1.8912, "step": 27340 }, { "epoch": 0.1719010861665979, "grad_norm": 7.459292888641357, "learning_rate": 1.885845284695272e-05, "loss": 1.9018, "step": 27350 }, { "epoch": 0.17196393848329503, "grad_norm": 6.216436862945557, "learning_rate": 1.8858033746008065e-05, "loss": 1.8553, "step": 27360 }, { "epoch": 0.17202679079999214, "grad_norm": 8.26440715789795, "learning_rate": 1.885761464506341e-05, "loss": 2.0397, "step": 27370 }, { "epoch": 0.17208964311668926, "grad_norm": 6.738312721252441, "learning_rate": 1.885719554411876e-05, "loss": 2.0152, "step": 27380 }, { "epoch": 0.17215249543338637, "grad_norm": 6.979240894317627, "learning_rate": 1.8856776443174106e-05, "loss": 1.9694, "step": 27390 }, { "epoch": 0.1722153477500835, "grad_norm": 7.805268287658691, "learning_rate": 1.885635734222945e-05, "loss": 1.9635, "step": 27400 }, { "epoch": 0.17227820006678057, "grad_norm": 7.3537917137146, "learning_rate": 1.8855938241284796e-05, "loss": 2.1664, "step": 27410 }, { "epoch": 0.1723410523834777, "grad_norm": 7.4962615966796875, "learning_rate": 1.8855519140340144e-05, "loss": 2.1061, "step": 27420 }, { "epoch": 0.1724039047001748, "grad_norm": 7.050327777862549, "learning_rate": 1.885510003939549e-05, "loss": 2.2767, "step": 27430 }, { "epoch": 0.17246675701687192, "grad_norm": 7.685523986816406, "learning_rate": 1.8854680938450838e-05, "loss": 1.9987, "step": 27440 }, { "epoch": 0.17252960933356903, "grad_norm": 7.283895015716553, "learning_rate": 1.885426183750618e-05, "loss": 1.8152, "step": 27450 }, { "epoch": 0.17259246165026615, "grad_norm": 7.353915691375732, "learning_rate": 1.885384273656153e-05, "loss": 2.2302, "step": 27460 }, { "epoch": 0.17265531396696326, "grad_norm": 7.832383155822754, "learning_rate": 1.8853423635616876e-05, "loss": 2.0384, "step": 27470 }, { "epoch": 0.17271816628366035, "grad_norm": 8.037903785705566, "learning_rate": 1.8853004534672223e-05, "loss": 2.2001, "step": 27480 }, { "epoch": 0.17278101860035747, "grad_norm": 8.03400993347168, "learning_rate": 1.885258543372757e-05, "loss": 1.7838, "step": 27490 }, { "epoch": 0.17284387091705458, "grad_norm": 7.769232273101807, "learning_rate": 1.8852166332782917e-05, "loss": 1.9368, "step": 27500 }, { "epoch": 0.1729067232337517, "grad_norm": 8.408327102661133, "learning_rate": 1.8851747231838264e-05, "loss": 2.1784, "step": 27510 }, { "epoch": 0.1729695755504488, "grad_norm": 6.302871227264404, "learning_rate": 1.8851328130893607e-05, "loss": 2.1831, "step": 27520 }, { "epoch": 0.17303242786714593, "grad_norm": 7.579148769378662, "learning_rate": 1.8850909029948955e-05, "loss": 1.984, "step": 27530 }, { "epoch": 0.17309528018384301, "grad_norm": 5.984960556030273, "learning_rate": 1.88504899290043e-05, "loss": 2.1541, "step": 27540 }, { "epoch": 0.17315813250054013, "grad_norm": 6.045041561126709, "learning_rate": 1.885007082805965e-05, "loss": 1.9877, "step": 27550 }, { "epoch": 0.17322098481723724, "grad_norm": 7.484976768493652, "learning_rate": 1.8849651727114996e-05, "loss": 2.097, "step": 27560 }, { "epoch": 0.17328383713393436, "grad_norm": 8.815256118774414, "learning_rate": 1.8849232626170343e-05, "loss": 1.9681, "step": 27570 }, { "epoch": 0.17334668945063147, "grad_norm": 6.937033653259277, "learning_rate": 1.8848813525225687e-05, "loss": 1.8963, "step": 27580 }, { "epoch": 0.1734095417673286, "grad_norm": 7.550961494445801, "learning_rate": 1.8848394424281034e-05, "loss": 1.8807, "step": 27590 }, { "epoch": 0.17347239408402568, "grad_norm": 7.170886993408203, "learning_rate": 1.884797532333638e-05, "loss": 1.9588, "step": 27600 }, { "epoch": 0.1735352464007228, "grad_norm": 8.13257884979248, "learning_rate": 1.8847556222391728e-05, "loss": 2.0812, "step": 27610 }, { "epoch": 0.1735980987174199, "grad_norm": 6.796542167663574, "learning_rate": 1.884713712144707e-05, "loss": 1.788, "step": 27620 }, { "epoch": 0.17366095103411702, "grad_norm": 6.174264430999756, "learning_rate": 1.884671802050242e-05, "loss": 1.8799, "step": 27630 }, { "epoch": 0.17372380335081414, "grad_norm": 6.56820821762085, "learning_rate": 1.8846298919557766e-05, "loss": 1.9007, "step": 27640 }, { "epoch": 0.17378665566751125, "grad_norm": 8.850946426391602, "learning_rate": 1.8845879818613113e-05, "loss": 2.0807, "step": 27650 }, { "epoch": 0.17384950798420837, "grad_norm": 8.081398963928223, "learning_rate": 1.884546071766846e-05, "loss": 1.9101, "step": 27660 }, { "epoch": 0.17391236030090546, "grad_norm": 7.104781150817871, "learning_rate": 1.8845041616723803e-05, "loss": 1.8105, "step": 27670 }, { "epoch": 0.17397521261760257, "grad_norm": 6.3523173332214355, "learning_rate": 1.884462251577915e-05, "loss": 1.9426, "step": 27680 }, { "epoch": 0.17403806493429969, "grad_norm": 6.756746768951416, "learning_rate": 1.8844203414834498e-05, "loss": 2.2213, "step": 27690 }, { "epoch": 0.1741009172509968, "grad_norm": 7.961772441864014, "learning_rate": 1.8843784313889845e-05, "loss": 1.9764, "step": 27700 }, { "epoch": 0.17416376956769392, "grad_norm": 7.900453090667725, "learning_rate": 1.884336521294519e-05, "loss": 2.0987, "step": 27710 }, { "epoch": 0.17422662188439103, "grad_norm": 8.148445129394531, "learning_rate": 1.884294611200054e-05, "loss": 1.9654, "step": 27720 }, { "epoch": 0.17428947420108812, "grad_norm": 7.783229351043701, "learning_rate": 1.8842527011055886e-05, "loss": 1.9184, "step": 27730 }, { "epoch": 0.17435232651778523, "grad_norm": 8.24295711517334, "learning_rate": 1.8842107910111233e-05, "loss": 2.1139, "step": 27740 }, { "epoch": 0.17441517883448235, "grad_norm": 8.228842735290527, "learning_rate": 1.884168880916658e-05, "loss": 2.1052, "step": 27750 }, { "epoch": 0.17447803115117946, "grad_norm": 6.867830276489258, "learning_rate": 1.8841269708221924e-05, "loss": 2.0018, "step": 27760 }, { "epoch": 0.17454088346787658, "grad_norm": 6.347137928009033, "learning_rate": 1.884085060727727e-05, "loss": 1.9588, "step": 27770 }, { "epoch": 0.1746037357845737, "grad_norm": 7.055315017700195, "learning_rate": 1.8840431506332618e-05, "loss": 2.227, "step": 27780 }, { "epoch": 0.1746665881012708, "grad_norm": 7.518227577209473, "learning_rate": 1.8840012405387965e-05, "loss": 1.93, "step": 27790 }, { "epoch": 0.1747294404179679, "grad_norm": 8.064926147460938, "learning_rate": 1.883959330444331e-05, "loss": 2.0228, "step": 27800 }, { "epoch": 0.174792292734665, "grad_norm": 8.549365043640137, "learning_rate": 1.8839174203498656e-05, "loss": 1.9405, "step": 27810 }, { "epoch": 0.17485514505136213, "grad_norm": 7.1288228034973145, "learning_rate": 1.8838755102554003e-05, "loss": 2.3457, "step": 27820 }, { "epoch": 0.17491799736805924, "grad_norm": 6.547165870666504, "learning_rate": 1.883833600160935e-05, "loss": 2.0101, "step": 27830 }, { "epoch": 0.17498084968475636, "grad_norm": 7.813448905944824, "learning_rate": 1.8837916900664697e-05, "loss": 1.825, "step": 27840 }, { "epoch": 0.17504370200145347, "grad_norm": 6.4091410636901855, "learning_rate": 1.883749779972004e-05, "loss": 1.8868, "step": 27850 }, { "epoch": 0.17510655431815056, "grad_norm": 8.775810241699219, "learning_rate": 1.8837078698775388e-05, "loss": 1.9993, "step": 27860 }, { "epoch": 0.17516940663484767, "grad_norm": 7.959566593170166, "learning_rate": 1.8836659597830735e-05, "loss": 2.2209, "step": 27870 }, { "epoch": 0.1752322589515448, "grad_norm": 7.217938423156738, "learning_rate": 1.8836240496886082e-05, "loss": 2.1552, "step": 27880 }, { "epoch": 0.1752951112682419, "grad_norm": 7.476831912994385, "learning_rate": 1.883582139594143e-05, "loss": 2.0758, "step": 27890 }, { "epoch": 0.17535796358493902, "grad_norm": 6.8728461265563965, "learning_rate": 1.8835402294996772e-05, "loss": 2.0728, "step": 27900 }, { "epoch": 0.17542081590163613, "grad_norm": 7.740730285644531, "learning_rate": 1.883498319405212e-05, "loss": 1.8849, "step": 27910 }, { "epoch": 0.17548366821833322, "grad_norm": 6.934227466583252, "learning_rate": 1.8834564093107467e-05, "loss": 1.8524, "step": 27920 }, { "epoch": 0.17554652053503034, "grad_norm": 6.788956165313721, "learning_rate": 1.8834144992162814e-05, "loss": 1.92, "step": 27930 }, { "epoch": 0.17560937285172745, "grad_norm": 8.07392692565918, "learning_rate": 1.883372589121816e-05, "loss": 2.0643, "step": 27940 }, { "epoch": 0.17567222516842457, "grad_norm": 8.197890281677246, "learning_rate": 1.8833306790273508e-05, "loss": 2.2463, "step": 27950 }, { "epoch": 0.17573507748512168, "grad_norm": 7.5556721687316895, "learning_rate": 1.8832887689328855e-05, "loss": 2.1021, "step": 27960 }, { "epoch": 0.1757979298018188, "grad_norm": 6.970726013183594, "learning_rate": 1.8832468588384202e-05, "loss": 1.8875, "step": 27970 }, { "epoch": 0.1758607821185159, "grad_norm": 7.748169422149658, "learning_rate": 1.8832049487439546e-05, "loss": 1.8909, "step": 27980 }, { "epoch": 0.175923634435213, "grad_norm": 7.20039176940918, "learning_rate": 1.8831630386494893e-05, "loss": 1.8259, "step": 27990 }, { "epoch": 0.17598648675191012, "grad_norm": 8.26569938659668, "learning_rate": 1.883121128555024e-05, "loss": 2.378, "step": 28000 }, { "epoch": 0.17604933906860723, "grad_norm": 7.183000087738037, "learning_rate": 1.8830792184605587e-05, "loss": 2.3396, "step": 28010 }, { "epoch": 0.17611219138530435, "grad_norm": 5.683530330657959, "learning_rate": 1.883037308366093e-05, "loss": 1.9981, "step": 28020 }, { "epoch": 0.17617504370200146, "grad_norm": 7.313313961029053, "learning_rate": 1.8829953982716278e-05, "loss": 2.0859, "step": 28030 }, { "epoch": 0.17623789601869858, "grad_norm": 7.710696220397949, "learning_rate": 1.8829534881771625e-05, "loss": 2.0126, "step": 28040 }, { "epoch": 0.17630074833539566, "grad_norm": 7.8255510330200195, "learning_rate": 1.8829115780826972e-05, "loss": 1.8825, "step": 28050 }, { "epoch": 0.17636360065209278, "grad_norm": 8.450091361999512, "learning_rate": 1.882869667988232e-05, "loss": 2.1833, "step": 28060 }, { "epoch": 0.1764264529687899, "grad_norm": 8.356134414672852, "learning_rate": 1.8828277578937662e-05, "loss": 1.9364, "step": 28070 }, { "epoch": 0.176489305285487, "grad_norm": 6.771615505218506, "learning_rate": 1.882785847799301e-05, "loss": 1.9951, "step": 28080 }, { "epoch": 0.17655215760218412, "grad_norm": 7.926447868347168, "learning_rate": 1.8827439377048357e-05, "loss": 2.0846, "step": 28090 }, { "epoch": 0.17661500991888124, "grad_norm": 7.876333236694336, "learning_rate": 1.8827020276103704e-05, "loss": 1.9776, "step": 28100 }, { "epoch": 0.17667786223557833, "grad_norm": 6.883592128753662, "learning_rate": 1.882660117515905e-05, "loss": 2.2083, "step": 28110 }, { "epoch": 0.17674071455227544, "grad_norm": 6.218436241149902, "learning_rate": 1.8826182074214398e-05, "loss": 1.9894, "step": 28120 }, { "epoch": 0.17680356686897256, "grad_norm": 8.508634567260742, "learning_rate": 1.8825762973269745e-05, "loss": 2.1307, "step": 28130 }, { "epoch": 0.17686641918566967, "grad_norm": 7.165008068084717, "learning_rate": 1.8825343872325092e-05, "loss": 2.0333, "step": 28140 }, { "epoch": 0.1769292715023668, "grad_norm": 7.859193325042725, "learning_rate": 1.8824924771380436e-05, "loss": 1.9486, "step": 28150 }, { "epoch": 0.1769921238190639, "grad_norm": 6.977528095245361, "learning_rate": 1.8824505670435783e-05, "loss": 1.9967, "step": 28160 }, { "epoch": 0.17705497613576102, "grad_norm": 6.745163440704346, "learning_rate": 1.882408656949113e-05, "loss": 1.9882, "step": 28170 }, { "epoch": 0.1771178284524581, "grad_norm": 7.4453840255737305, "learning_rate": 1.8823667468546477e-05, "loss": 2.1332, "step": 28180 }, { "epoch": 0.17718068076915522, "grad_norm": 7.185011863708496, "learning_rate": 1.8823248367601824e-05, "loss": 1.9738, "step": 28190 }, { "epoch": 0.17724353308585233, "grad_norm": 7.705361843109131, "learning_rate": 1.8822829266657168e-05, "loss": 2.3986, "step": 28200 }, { "epoch": 0.17730638540254945, "grad_norm": 7.561318874359131, "learning_rate": 1.8822410165712515e-05, "loss": 2.006, "step": 28210 }, { "epoch": 0.17736923771924656, "grad_norm": 6.81564998626709, "learning_rate": 1.8821991064767862e-05, "loss": 1.6773, "step": 28220 }, { "epoch": 0.17743209003594368, "grad_norm": 7.152491092681885, "learning_rate": 1.882157196382321e-05, "loss": 2.0266, "step": 28230 }, { "epoch": 0.17749494235264077, "grad_norm": 7.514481067657471, "learning_rate": 1.8821152862878553e-05, "loss": 1.9871, "step": 28240 }, { "epoch": 0.17755779466933788, "grad_norm": 7.656121253967285, "learning_rate": 1.88207337619339e-05, "loss": 2.1298, "step": 28250 }, { "epoch": 0.177620646986035, "grad_norm": 7.641325950622559, "learning_rate": 1.8820314660989247e-05, "loss": 2.1423, "step": 28260 }, { "epoch": 0.1776834993027321, "grad_norm": 8.13333511352539, "learning_rate": 1.8819895560044594e-05, "loss": 1.9955, "step": 28270 }, { "epoch": 0.17774635161942923, "grad_norm": 6.73756217956543, "learning_rate": 1.881947645909994e-05, "loss": 2.2309, "step": 28280 }, { "epoch": 0.17780920393612634, "grad_norm": 7.742393970489502, "learning_rate": 1.8819057358155284e-05, "loss": 2.0561, "step": 28290 }, { "epoch": 0.17787205625282346, "grad_norm": 6.713929653167725, "learning_rate": 1.881863825721063e-05, "loss": 1.931, "step": 28300 }, { "epoch": 0.17793490856952054, "grad_norm": 6.656432151794434, "learning_rate": 1.881821915626598e-05, "loss": 1.9597, "step": 28310 }, { "epoch": 0.17799776088621766, "grad_norm": 7.4837646484375, "learning_rate": 1.8817800055321326e-05, "loss": 2.1406, "step": 28320 }, { "epoch": 0.17806061320291477, "grad_norm": 6.651392459869385, "learning_rate": 1.8817380954376673e-05, "loss": 1.9187, "step": 28330 }, { "epoch": 0.1781234655196119, "grad_norm": 7.317150592803955, "learning_rate": 1.881696185343202e-05, "loss": 2.0177, "step": 28340 }, { "epoch": 0.178186317836309, "grad_norm": 7.599892616271973, "learning_rate": 1.8816542752487367e-05, "loss": 1.8601, "step": 28350 }, { "epoch": 0.17824917015300612, "grad_norm": 6.383946895599365, "learning_rate": 1.8816123651542714e-05, "loss": 1.9067, "step": 28360 }, { "epoch": 0.1783120224697032, "grad_norm": 8.048141479492188, "learning_rate": 1.881570455059806e-05, "loss": 1.9985, "step": 28370 }, { "epoch": 0.17837487478640032, "grad_norm": 8.132674217224121, "learning_rate": 1.8815285449653405e-05, "loss": 1.9002, "step": 28380 }, { "epoch": 0.17843772710309744, "grad_norm": 7.169011116027832, "learning_rate": 1.8814866348708752e-05, "loss": 2.0158, "step": 28390 }, { "epoch": 0.17850057941979455, "grad_norm": 8.223552703857422, "learning_rate": 1.88144472477641e-05, "loss": 2.1555, "step": 28400 }, { "epoch": 0.17856343173649167, "grad_norm": 8.827797889709473, "learning_rate": 1.8814028146819446e-05, "loss": 2.0891, "step": 28410 }, { "epoch": 0.17862628405318878, "grad_norm": 7.081093788146973, "learning_rate": 1.881360904587479e-05, "loss": 1.8826, "step": 28420 }, { "epoch": 0.17868913636988587, "grad_norm": 6.889060020446777, "learning_rate": 1.8813189944930137e-05, "loss": 2.0148, "step": 28430 }, { "epoch": 0.17875198868658299, "grad_norm": 7.420534610748291, "learning_rate": 1.8812770843985484e-05, "loss": 2.0423, "step": 28440 }, { "epoch": 0.1788148410032801, "grad_norm": 8.297712326049805, "learning_rate": 1.881235174304083e-05, "loss": 1.9754, "step": 28450 }, { "epoch": 0.17887769331997722, "grad_norm": 7.373516082763672, "learning_rate": 1.8811932642096178e-05, "loss": 1.8087, "step": 28460 }, { "epoch": 0.17894054563667433, "grad_norm": 8.368450164794922, "learning_rate": 1.881151354115152e-05, "loss": 2.0844, "step": 28470 }, { "epoch": 0.17900339795337145, "grad_norm": 7.975512504577637, "learning_rate": 1.881109444020687e-05, "loss": 1.9639, "step": 28480 }, { "epoch": 0.17906625027006856, "grad_norm": 8.503561019897461, "learning_rate": 1.8810675339262216e-05, "loss": 2.1627, "step": 28490 }, { "epoch": 0.17912910258676565, "grad_norm": 6.963150978088379, "learning_rate": 1.8810256238317563e-05, "loss": 1.9203, "step": 28500 }, { "epoch": 0.17919195490346276, "grad_norm": 7.963650703430176, "learning_rate": 1.880983713737291e-05, "loss": 2.1543, "step": 28510 }, { "epoch": 0.17925480722015988, "grad_norm": 7.235218048095703, "learning_rate": 1.8809418036428254e-05, "loss": 1.9921, "step": 28520 }, { "epoch": 0.179317659536857, "grad_norm": 7.330077648162842, "learning_rate": 1.88089989354836e-05, "loss": 2.1142, "step": 28530 }, { "epoch": 0.1793805118535541, "grad_norm": 7.96385383605957, "learning_rate": 1.8808579834538948e-05, "loss": 1.9681, "step": 28540 }, { "epoch": 0.17944336417025122, "grad_norm": 7.287219524383545, "learning_rate": 1.8808160733594295e-05, "loss": 2.0364, "step": 28550 }, { "epoch": 0.1795062164869483, "grad_norm": 7.122207164764404, "learning_rate": 1.8807741632649642e-05, "loss": 1.966, "step": 28560 }, { "epoch": 0.17956906880364543, "grad_norm": 7.921074867248535, "learning_rate": 1.880732253170499e-05, "loss": 1.8209, "step": 28570 }, { "epoch": 0.17963192112034254, "grad_norm": 7.608434677124023, "learning_rate": 1.8806903430760336e-05, "loss": 1.9363, "step": 28580 }, { "epoch": 0.17969477343703966, "grad_norm": 11.472769737243652, "learning_rate": 1.8806484329815683e-05, "loss": 1.9104, "step": 28590 }, { "epoch": 0.17975762575373677, "grad_norm": 7.228365421295166, "learning_rate": 1.8806065228871027e-05, "loss": 2.1976, "step": 28600 }, { "epoch": 0.1798204780704339, "grad_norm": 8.290234565734863, "learning_rate": 1.8805646127926374e-05, "loss": 2.2106, "step": 28610 }, { "epoch": 0.179883330387131, "grad_norm": 6.307222843170166, "learning_rate": 1.880522702698172e-05, "loss": 1.8733, "step": 28620 }, { "epoch": 0.1799461827038281, "grad_norm": 7.063374042510986, "learning_rate": 1.8804807926037068e-05, "loss": 1.9629, "step": 28630 }, { "epoch": 0.1800090350205252, "grad_norm": 8.154483795166016, "learning_rate": 1.880438882509241e-05, "loss": 2.1459, "step": 28640 }, { "epoch": 0.18007188733722232, "grad_norm": 6.897121906280518, "learning_rate": 1.880396972414776e-05, "loss": 1.8751, "step": 28650 }, { "epoch": 0.18013473965391943, "grad_norm": 6.4750142097473145, "learning_rate": 1.8803550623203106e-05, "loss": 1.8537, "step": 28660 }, { "epoch": 0.18019759197061655, "grad_norm": 7.589195728302002, "learning_rate": 1.8803131522258453e-05, "loss": 1.8881, "step": 28670 }, { "epoch": 0.18026044428731366, "grad_norm": 7.621940612792969, "learning_rate": 1.88027124213138e-05, "loss": 2.2198, "step": 28680 }, { "epoch": 0.18032329660401075, "grad_norm": 7.115721225738525, "learning_rate": 1.8802293320369144e-05, "loss": 1.9749, "step": 28690 }, { "epoch": 0.18038614892070787, "grad_norm": 7.395238876342773, "learning_rate": 1.880187421942449e-05, "loss": 2.0467, "step": 28700 }, { "epoch": 0.18044900123740498, "grad_norm": 6.3072285652160645, "learning_rate": 1.8801455118479838e-05, "loss": 1.7886, "step": 28710 }, { "epoch": 0.1805118535541021, "grad_norm": 6.7934184074401855, "learning_rate": 1.8801036017535185e-05, "loss": 2.2724, "step": 28720 }, { "epoch": 0.1805747058707992, "grad_norm": 7.178077220916748, "learning_rate": 1.8800616916590532e-05, "loss": 2.0401, "step": 28730 }, { "epoch": 0.18063755818749633, "grad_norm": 7.876676082611084, "learning_rate": 1.880019781564588e-05, "loss": 1.8102, "step": 28740 }, { "epoch": 0.18070041050419341, "grad_norm": 7.284195899963379, "learning_rate": 1.8799778714701226e-05, "loss": 1.9114, "step": 28750 }, { "epoch": 0.18076326282089053, "grad_norm": 6.8634257316589355, "learning_rate": 1.8799359613756573e-05, "loss": 2.0179, "step": 28760 }, { "epoch": 0.18082611513758765, "grad_norm": 7.710941314697266, "learning_rate": 1.8798940512811917e-05, "loss": 2.0999, "step": 28770 }, { "epoch": 0.18088896745428476, "grad_norm": 7.6825852394104, "learning_rate": 1.8798521411867264e-05, "loss": 1.9196, "step": 28780 }, { "epoch": 0.18095181977098188, "grad_norm": 7.826768398284912, "learning_rate": 1.879810231092261e-05, "loss": 2.1616, "step": 28790 }, { "epoch": 0.181014672087679, "grad_norm": 6.824447154998779, "learning_rate": 1.8797683209977958e-05, "loss": 1.9437, "step": 28800 }, { "epoch": 0.1810775244043761, "grad_norm": 5.521284103393555, "learning_rate": 1.8797264109033305e-05, "loss": 1.8503, "step": 28810 }, { "epoch": 0.1811403767210732, "grad_norm": 7.975641250610352, "learning_rate": 1.879684500808865e-05, "loss": 1.9168, "step": 28820 }, { "epoch": 0.1812032290377703, "grad_norm": 7.012667179107666, "learning_rate": 1.8796425907143996e-05, "loss": 1.9218, "step": 28830 }, { "epoch": 0.18126608135446742, "grad_norm": 8.359773635864258, "learning_rate": 1.8796006806199343e-05, "loss": 2.1473, "step": 28840 }, { "epoch": 0.18132893367116454, "grad_norm": 7.304141044616699, "learning_rate": 1.879558770525469e-05, "loss": 2.2035, "step": 28850 }, { "epoch": 0.18139178598786165, "grad_norm": 6.638857841491699, "learning_rate": 1.8795168604310037e-05, "loss": 1.9289, "step": 28860 }, { "epoch": 0.18145463830455877, "grad_norm": 7.668281555175781, "learning_rate": 1.879474950336538e-05, "loss": 2.037, "step": 28870 }, { "epoch": 0.18151749062125586, "grad_norm": 6.877059459686279, "learning_rate": 1.8794330402420728e-05, "loss": 1.8873, "step": 28880 }, { "epoch": 0.18158034293795297, "grad_norm": 6.669254302978516, "learning_rate": 1.8793911301476075e-05, "loss": 1.9665, "step": 28890 }, { "epoch": 0.18164319525465009, "grad_norm": 7.703857898712158, "learning_rate": 1.8793492200531422e-05, "loss": 1.8277, "step": 28900 }, { "epoch": 0.1817060475713472, "grad_norm": 7.366462707519531, "learning_rate": 1.8793073099586766e-05, "loss": 1.9984, "step": 28910 }, { "epoch": 0.18176889988804432, "grad_norm": 7.7693867683410645, "learning_rate": 1.8792653998642113e-05, "loss": 2.0154, "step": 28920 }, { "epoch": 0.18183175220474143, "grad_norm": 7.505848407745361, "learning_rate": 1.879223489769746e-05, "loss": 1.8839, "step": 28930 }, { "epoch": 0.18189460452143852, "grad_norm": 7.814448356628418, "learning_rate": 1.8791815796752807e-05, "loss": 1.8928, "step": 28940 }, { "epoch": 0.18195745683813563, "grad_norm": 7.27949333190918, "learning_rate": 1.8791396695808154e-05, "loss": 2.1309, "step": 28950 }, { "epoch": 0.18202030915483275, "grad_norm": 6.9486165046691895, "learning_rate": 1.87909775948635e-05, "loss": 2.0047, "step": 28960 }, { "epoch": 0.18208316147152986, "grad_norm": 7.7865447998046875, "learning_rate": 1.8790558493918848e-05, "loss": 2.0098, "step": 28970 }, { "epoch": 0.18214601378822698, "grad_norm": 7.254465103149414, "learning_rate": 1.8790139392974195e-05, "loss": 2.1038, "step": 28980 }, { "epoch": 0.1822088661049241, "grad_norm": 7.637696266174316, "learning_rate": 1.8789720292029542e-05, "loss": 2.1296, "step": 28990 }, { "epoch": 0.1822717184216212, "grad_norm": 8.04129409790039, "learning_rate": 1.8789301191084886e-05, "loss": 2.2248, "step": 29000 }, { "epoch": 0.1823345707383183, "grad_norm": 7.1449408531188965, "learning_rate": 1.8788882090140233e-05, "loss": 1.9733, "step": 29010 }, { "epoch": 0.1823974230550154, "grad_norm": 7.499587059020996, "learning_rate": 1.878846298919558e-05, "loss": 2.1853, "step": 29020 }, { "epoch": 0.18246027537171253, "grad_norm": 9.368136405944824, "learning_rate": 1.8788043888250927e-05, "loss": 2.1305, "step": 29030 }, { "epoch": 0.18252312768840964, "grad_norm": 7.33623743057251, "learning_rate": 1.878762478730627e-05, "loss": 1.9875, "step": 29040 }, { "epoch": 0.18258598000510676, "grad_norm": 8.26385498046875, "learning_rate": 1.8787205686361618e-05, "loss": 2.1754, "step": 29050 }, { "epoch": 0.18264883232180387, "grad_norm": 6.485950946807861, "learning_rate": 1.8786786585416965e-05, "loss": 2.0015, "step": 29060 }, { "epoch": 0.18271168463850096, "grad_norm": 6.6667962074279785, "learning_rate": 1.8786367484472312e-05, "loss": 1.9353, "step": 29070 }, { "epoch": 0.18277453695519807, "grad_norm": 6.05470609664917, "learning_rate": 1.878594838352766e-05, "loss": 1.7624, "step": 29080 }, { "epoch": 0.1828373892718952, "grad_norm": 7.437668800354004, "learning_rate": 1.8785529282583003e-05, "loss": 2.1268, "step": 29090 }, { "epoch": 0.1829002415885923, "grad_norm": 7.7610297203063965, "learning_rate": 1.878511018163835e-05, "loss": 1.9864, "step": 29100 }, { "epoch": 0.18296309390528942, "grad_norm": 6.294562339782715, "learning_rate": 1.8784691080693697e-05, "loss": 2.1718, "step": 29110 }, { "epoch": 0.18302594622198654, "grad_norm": 7.2774553298950195, "learning_rate": 1.8784271979749044e-05, "loss": 2.0012, "step": 29120 }, { "epoch": 0.18308879853868365, "grad_norm": 6.683416366577148, "learning_rate": 1.878385287880439e-05, "loss": 1.9754, "step": 29130 }, { "epoch": 0.18315165085538074, "grad_norm": 6.475184917449951, "learning_rate": 1.8783433777859738e-05, "loss": 1.9697, "step": 29140 }, { "epoch": 0.18321450317207785, "grad_norm": 6.563970565795898, "learning_rate": 1.8783014676915082e-05, "loss": 2.2486, "step": 29150 }, { "epoch": 0.18327735548877497, "grad_norm": 8.39940357208252, "learning_rate": 1.878259557597043e-05, "loss": 1.8987, "step": 29160 }, { "epoch": 0.18334020780547208, "grad_norm": 7.151874542236328, "learning_rate": 1.8782176475025776e-05, "loss": 1.8675, "step": 29170 }, { "epoch": 0.1834030601221692, "grad_norm": 6.557036399841309, "learning_rate": 1.8781757374081123e-05, "loss": 2.0887, "step": 29180 }, { "epoch": 0.1834659124388663, "grad_norm": 7.522086143493652, "learning_rate": 1.878133827313647e-05, "loss": 1.9528, "step": 29190 }, { "epoch": 0.1835287647555634, "grad_norm": 6.171652793884277, "learning_rate": 1.8780919172191817e-05, "loss": 2.0084, "step": 29200 }, { "epoch": 0.18359161707226052, "grad_norm": 6.9197096824646, "learning_rate": 1.8780500071247164e-05, "loss": 1.9259, "step": 29210 }, { "epoch": 0.18365446938895763, "grad_norm": 7.602723598480225, "learning_rate": 1.8780080970302508e-05, "loss": 2.2391, "step": 29220 }, { "epoch": 0.18371732170565475, "grad_norm": 6.543840408325195, "learning_rate": 1.8779661869357855e-05, "loss": 1.9864, "step": 29230 }, { "epoch": 0.18378017402235186, "grad_norm": 6.674700736999512, "learning_rate": 1.8779242768413202e-05, "loss": 2.1489, "step": 29240 }, { "epoch": 0.18384302633904898, "grad_norm": 6.689058780670166, "learning_rate": 1.877882366746855e-05, "loss": 1.9458, "step": 29250 }, { "epoch": 0.18390587865574606, "grad_norm": 7.193964004516602, "learning_rate": 1.8778404566523893e-05, "loss": 1.8953, "step": 29260 }, { "epoch": 0.18396873097244318, "grad_norm": 7.274568557739258, "learning_rate": 1.877798546557924e-05, "loss": 1.9315, "step": 29270 }, { "epoch": 0.1840315832891403, "grad_norm": 7.066225528717041, "learning_rate": 1.8777566364634587e-05, "loss": 1.8457, "step": 29280 }, { "epoch": 0.1840944356058374, "grad_norm": 8.32391357421875, "learning_rate": 1.8777147263689934e-05, "loss": 1.8756, "step": 29290 }, { "epoch": 0.18415728792253452, "grad_norm": 7.687808513641357, "learning_rate": 1.877672816274528e-05, "loss": 2.0536, "step": 29300 }, { "epoch": 0.18422014023923164, "grad_norm": 8.26240062713623, "learning_rate": 1.8776309061800625e-05, "loss": 1.9104, "step": 29310 }, { "epoch": 0.18428299255592875, "grad_norm": 8.49111557006836, "learning_rate": 1.8775889960855972e-05, "loss": 1.8923, "step": 29320 }, { "epoch": 0.18434584487262584, "grad_norm": 7.489538192749023, "learning_rate": 1.877547085991132e-05, "loss": 2.1269, "step": 29330 }, { "epoch": 0.18440869718932296, "grad_norm": 6.954035758972168, "learning_rate": 1.8775051758966666e-05, "loss": 2.0588, "step": 29340 }, { "epoch": 0.18447154950602007, "grad_norm": 7.260134696960449, "learning_rate": 1.8774632658022013e-05, "loss": 1.9555, "step": 29350 }, { "epoch": 0.1845344018227172, "grad_norm": 7.261524200439453, "learning_rate": 1.877421355707736e-05, "loss": 2.1734, "step": 29360 }, { "epoch": 0.1845972541394143, "grad_norm": 6.00492525100708, "learning_rate": 1.8773794456132707e-05, "loss": 2.1805, "step": 29370 }, { "epoch": 0.18466010645611142, "grad_norm": 6.522037029266357, "learning_rate": 1.8773375355188054e-05, "loss": 1.8158, "step": 29380 }, { "epoch": 0.1847229587728085, "grad_norm": 7.0802412033081055, "learning_rate": 1.87729562542434e-05, "loss": 1.8112, "step": 29390 }, { "epoch": 0.18478581108950562, "grad_norm": 7.415716648101807, "learning_rate": 1.8772537153298745e-05, "loss": 1.7868, "step": 29400 }, { "epoch": 0.18484866340620273, "grad_norm": 5.86637020111084, "learning_rate": 1.8772118052354092e-05, "loss": 1.8952, "step": 29410 }, { "epoch": 0.18491151572289985, "grad_norm": 7.00277042388916, "learning_rate": 1.877169895140944e-05, "loss": 2.0638, "step": 29420 }, { "epoch": 0.18497436803959696, "grad_norm": 7.230973720550537, "learning_rate": 1.8771279850464786e-05, "loss": 1.8949, "step": 29430 }, { "epoch": 0.18503722035629408, "grad_norm": 6.861478328704834, "learning_rate": 1.8770902659614598e-05, "loss": 1.9096, "step": 29440 }, { "epoch": 0.18510007267299117, "grad_norm": 6.255519390106201, "learning_rate": 1.8770483558669945e-05, "loss": 2.0701, "step": 29450 }, { "epoch": 0.18516292498968828, "grad_norm": 7.750514030456543, "learning_rate": 1.8770064457725288e-05, "loss": 2.1037, "step": 29460 }, { "epoch": 0.1852257773063854, "grad_norm": 8.064445495605469, "learning_rate": 1.8769645356780635e-05, "loss": 1.9983, "step": 29470 }, { "epoch": 0.1852886296230825, "grad_norm": 6.737326622009277, "learning_rate": 1.8769226255835982e-05, "loss": 1.9377, "step": 29480 }, { "epoch": 0.18535148193977963, "grad_norm": 7.76080322265625, "learning_rate": 1.876880715489133e-05, "loss": 1.7995, "step": 29490 }, { "epoch": 0.18541433425647674, "grad_norm": 7.405367851257324, "learning_rate": 1.8768388053946677e-05, "loss": 1.8267, "step": 29500 }, { "epoch": 0.18547718657317386, "grad_norm": 6.397325038909912, "learning_rate": 1.8767968953002024e-05, "loss": 2.0647, "step": 29510 }, { "epoch": 0.18554003888987095, "grad_norm": 8.42994499206543, "learning_rate": 1.8767549852057367e-05, "loss": 2.3467, "step": 29520 }, { "epoch": 0.18560289120656806, "grad_norm": 7.365906715393066, "learning_rate": 1.8767130751112714e-05, "loss": 1.6478, "step": 29530 }, { "epoch": 0.18566574352326518, "grad_norm": 6.7625298500061035, "learning_rate": 1.876671165016806e-05, "loss": 1.7483, "step": 29540 }, { "epoch": 0.1857285958399623, "grad_norm": 6.54100227355957, "learning_rate": 1.876629254922341e-05, "loss": 1.8306, "step": 29550 }, { "epoch": 0.1857914481566594, "grad_norm": 7.321144104003906, "learning_rate": 1.8765873448278752e-05, "loss": 1.8764, "step": 29560 }, { "epoch": 0.18585430047335652, "grad_norm": 7.2385478019714355, "learning_rate": 1.87654543473341e-05, "loss": 1.7689, "step": 29570 }, { "epoch": 0.1859171527900536, "grad_norm": 7.6455302238464355, "learning_rate": 1.8765035246389446e-05, "loss": 2.1768, "step": 29580 }, { "epoch": 0.18598000510675072, "grad_norm": 6.5208635330200195, "learning_rate": 1.8764616145444793e-05, "loss": 1.8892, "step": 29590 }, { "epoch": 0.18604285742344784, "grad_norm": 8.060873031616211, "learning_rate": 1.876419704450014e-05, "loss": 2.0558, "step": 29600 }, { "epoch": 0.18610570974014495, "grad_norm": 7.863399028778076, "learning_rate": 1.8763777943555484e-05, "loss": 1.8187, "step": 29610 }, { "epoch": 0.18616856205684207, "grad_norm": 6.913415431976318, "learning_rate": 1.876335884261083e-05, "loss": 1.8612, "step": 29620 }, { "epoch": 0.18623141437353918, "grad_norm": 7.467434883117676, "learning_rate": 1.8762939741666178e-05, "loss": 2.0697, "step": 29630 }, { "epoch": 0.1862942666902363, "grad_norm": 7.388409614562988, "learning_rate": 1.8762520640721525e-05, "loss": 2.0388, "step": 29640 }, { "epoch": 0.18635711900693339, "grad_norm": 6.596724987030029, "learning_rate": 1.8762101539776872e-05, "loss": 1.8998, "step": 29650 }, { "epoch": 0.1864199713236305, "grad_norm": 7.31245756149292, "learning_rate": 1.876168243883222e-05, "loss": 2.0896, "step": 29660 }, { "epoch": 0.18648282364032762, "grad_norm": 7.265267848968506, "learning_rate": 1.8761263337887567e-05, "loss": 1.9591, "step": 29670 }, { "epoch": 0.18654567595702473, "grad_norm": 7.891168594360352, "learning_rate": 1.8760844236942914e-05, "loss": 1.8877, "step": 29680 }, { "epoch": 0.18660852827372185, "grad_norm": 6.974050998687744, "learning_rate": 1.8760425135998257e-05, "loss": 2.061, "step": 29690 }, { "epoch": 0.18667138059041896, "grad_norm": 6.938966751098633, "learning_rate": 1.8760006035053604e-05, "loss": 1.9556, "step": 29700 }, { "epoch": 0.18673423290711605, "grad_norm": 6.422309398651123, "learning_rate": 1.875958693410895e-05, "loss": 2.0012, "step": 29710 }, { "epoch": 0.18679708522381316, "grad_norm": 8.46394157409668, "learning_rate": 1.87591678331643e-05, "loss": 1.9021, "step": 29720 }, { "epoch": 0.18685993754051028, "grad_norm": 8.600244522094727, "learning_rate": 1.8758748732219646e-05, "loss": 2.0819, "step": 29730 }, { "epoch": 0.1869227898572074, "grad_norm": 7.660400390625, "learning_rate": 1.875832963127499e-05, "loss": 1.9857, "step": 29740 }, { "epoch": 0.1869856421739045, "grad_norm": 6.847285270690918, "learning_rate": 1.8757910530330336e-05, "loss": 2.1663, "step": 29750 }, { "epoch": 0.18704849449060162, "grad_norm": 7.508523941040039, "learning_rate": 1.8757491429385683e-05, "loss": 2.0756, "step": 29760 }, { "epoch": 0.1871113468072987, "grad_norm": 7.233463287353516, "learning_rate": 1.875707232844103e-05, "loss": 1.9206, "step": 29770 }, { "epoch": 0.18717419912399583, "grad_norm": 7.623024940490723, "learning_rate": 1.8756653227496374e-05, "loss": 1.8882, "step": 29780 }, { "epoch": 0.18723705144069294, "grad_norm": 8.130066871643066, "learning_rate": 1.875623412655172e-05, "loss": 2.2309, "step": 29790 }, { "epoch": 0.18729990375739006, "grad_norm": 6.728233337402344, "learning_rate": 1.875581502560707e-05, "loss": 2.1045, "step": 29800 }, { "epoch": 0.18736275607408717, "grad_norm": 7.936640739440918, "learning_rate": 1.8755395924662415e-05, "loss": 1.9853, "step": 29810 }, { "epoch": 0.1874256083907843, "grad_norm": 5.2671098709106445, "learning_rate": 1.8754976823717762e-05, "loss": 2.1318, "step": 29820 }, { "epoch": 0.1874884607074814, "grad_norm": 7.63880729675293, "learning_rate": 1.875455772277311e-05, "loss": 1.9968, "step": 29830 }, { "epoch": 0.1875513130241785, "grad_norm": 8.06202507019043, "learning_rate": 1.8754138621828453e-05, "loss": 2.0014, "step": 29840 }, { "epoch": 0.1876141653408756, "grad_norm": 6.8607611656188965, "learning_rate": 1.87537195208838e-05, "loss": 1.8825, "step": 29850 }, { "epoch": 0.18767701765757272, "grad_norm": 7.449906349182129, "learning_rate": 1.8753300419939147e-05, "loss": 2.1468, "step": 29860 }, { "epoch": 0.18773986997426984, "grad_norm": 8.055960655212402, "learning_rate": 1.8752881318994494e-05, "loss": 2.0776, "step": 29870 }, { "epoch": 0.18780272229096695, "grad_norm": 8.381570816040039, "learning_rate": 1.875246221804984e-05, "loss": 2.2393, "step": 29880 }, { "epoch": 0.18786557460766407, "grad_norm": 8.259137153625488, "learning_rate": 1.875204311710519e-05, "loss": 2.2421, "step": 29890 }, { "epoch": 0.18792842692436115, "grad_norm": 5.870490550994873, "learning_rate": 1.8751624016160536e-05, "loss": 1.7767, "step": 29900 }, { "epoch": 0.18799127924105827, "grad_norm": 7.300480842590332, "learning_rate": 1.8751204915215883e-05, "loss": 2.0607, "step": 29910 }, { "epoch": 0.18805413155775538, "grad_norm": 6.586701393127441, "learning_rate": 1.8750785814271226e-05, "loss": 2.0254, "step": 29920 }, { "epoch": 0.1881169838744525, "grad_norm": 6.941963195800781, "learning_rate": 1.8750366713326573e-05, "loss": 2.0021, "step": 29930 }, { "epoch": 0.1881798361911496, "grad_norm": 7.193849086761475, "learning_rate": 1.874994761238192e-05, "loss": 2.2742, "step": 29940 }, { "epoch": 0.18824268850784673, "grad_norm": 7.205645561218262, "learning_rate": 1.8749528511437268e-05, "loss": 2.007, "step": 29950 }, { "epoch": 0.18830554082454384, "grad_norm": 6.672652721405029, "learning_rate": 1.874910941049261e-05, "loss": 1.967, "step": 29960 }, { "epoch": 0.18836839314124093, "grad_norm": 8.637862205505371, "learning_rate": 1.874869030954796e-05, "loss": 2.0933, "step": 29970 }, { "epoch": 0.18843124545793805, "grad_norm": 6.464426517486572, "learning_rate": 1.8748271208603305e-05, "loss": 1.8952, "step": 29980 }, { "epoch": 0.18849409777463516, "grad_norm": 7.936492919921875, "learning_rate": 1.8747852107658653e-05, "loss": 1.8801, "step": 29990 }, { "epoch": 0.18855695009133228, "grad_norm": 7.535575866699219, "learning_rate": 1.8747433006713996e-05, "loss": 1.9741, "step": 30000 }, { "epoch": 0.1886198024080294, "grad_norm": 7.745148181915283, "learning_rate": 1.8747013905769343e-05, "loss": 2.1146, "step": 30010 }, { "epoch": 0.1886826547247265, "grad_norm": 6.761495113372803, "learning_rate": 1.874659480482469e-05, "loss": 2.0053, "step": 30020 }, { "epoch": 0.1887455070414236, "grad_norm": 7.184195518493652, "learning_rate": 1.8746175703880037e-05, "loss": 1.9576, "step": 30030 }, { "epoch": 0.1888083593581207, "grad_norm": 6.510901927947998, "learning_rate": 1.8745756602935384e-05, "loss": 2.0378, "step": 30040 }, { "epoch": 0.18887121167481782, "grad_norm": 6.986710548400879, "learning_rate": 1.874533750199073e-05, "loss": 1.7362, "step": 30050 }, { "epoch": 0.18893406399151494, "grad_norm": 6.512456893920898, "learning_rate": 1.874491840104608e-05, "loss": 1.8793, "step": 30060 }, { "epoch": 0.18899691630821205, "grad_norm": 7.3014702796936035, "learning_rate": 1.8744499300101426e-05, "loss": 2.1635, "step": 30070 }, { "epoch": 0.18905976862490917, "grad_norm": 6.359899997711182, "learning_rate": 1.8744080199156773e-05, "loss": 2.0233, "step": 30080 }, { "epoch": 0.18912262094160626, "grad_norm": 7.0193915367126465, "learning_rate": 1.8743661098212116e-05, "loss": 1.965, "step": 30090 }, { "epoch": 0.18918547325830337, "grad_norm": 7.0321455001831055, "learning_rate": 1.8743241997267464e-05, "loss": 2.0563, "step": 30100 }, { "epoch": 0.1892483255750005, "grad_norm": 7.861650466918945, "learning_rate": 1.874282289632281e-05, "loss": 2.0758, "step": 30110 }, { "epoch": 0.1893111778916976, "grad_norm": 6.646553039550781, "learning_rate": 1.8742403795378158e-05, "loss": 1.9899, "step": 30120 }, { "epoch": 0.18937403020839472, "grad_norm": 7.474400043487549, "learning_rate": 1.8741984694433505e-05, "loss": 2.0311, "step": 30130 }, { "epoch": 0.18943688252509183, "grad_norm": 7.869038105010986, "learning_rate": 1.874156559348885e-05, "loss": 2.3259, "step": 30140 }, { "epoch": 0.18949973484178895, "grad_norm": 7.051912784576416, "learning_rate": 1.8741146492544195e-05, "loss": 1.9394, "step": 30150 }, { "epoch": 0.18956258715848603, "grad_norm": 7.221027851104736, "learning_rate": 1.8740727391599543e-05, "loss": 2.0958, "step": 30160 }, { "epoch": 0.18962543947518315, "grad_norm": 7.719078540802002, "learning_rate": 1.874030829065489e-05, "loss": 1.7209, "step": 30170 }, { "epoch": 0.18968829179188026, "grad_norm": 7.397263526916504, "learning_rate": 1.8739889189710233e-05, "loss": 1.7915, "step": 30180 }, { "epoch": 0.18975114410857738, "grad_norm": 7.989697456359863, "learning_rate": 1.873947008876558e-05, "loss": 1.9291, "step": 30190 }, { "epoch": 0.1898139964252745, "grad_norm": 8.301801681518555, "learning_rate": 1.8739050987820927e-05, "loss": 2.3006, "step": 30200 }, { "epoch": 0.1898768487419716, "grad_norm": 6.9124040603637695, "learning_rate": 1.8738631886876275e-05, "loss": 1.7534, "step": 30210 }, { "epoch": 0.1899397010586687, "grad_norm": 8.110513687133789, "learning_rate": 1.873821278593162e-05, "loss": 2.0729, "step": 30220 }, { "epoch": 0.1900025533753658, "grad_norm": 7.219425678253174, "learning_rate": 1.8737793684986965e-05, "loss": 1.7849, "step": 30230 }, { "epoch": 0.19006540569206293, "grad_norm": 8.310271263122559, "learning_rate": 1.8737374584042312e-05, "loss": 1.9025, "step": 30240 }, { "epoch": 0.19012825800876004, "grad_norm": 7.594659328460693, "learning_rate": 1.873695548309766e-05, "loss": 2.0431, "step": 30250 }, { "epoch": 0.19019111032545716, "grad_norm": 7.479565143585205, "learning_rate": 1.8736536382153006e-05, "loss": 2.0942, "step": 30260 }, { "epoch": 0.19025396264215427, "grad_norm": 7.137730121612549, "learning_rate": 1.8736117281208354e-05, "loss": 1.7541, "step": 30270 }, { "epoch": 0.19031681495885136, "grad_norm": 8.164888381958008, "learning_rate": 1.87356981802637e-05, "loss": 2.0119, "step": 30280 }, { "epoch": 0.19037966727554848, "grad_norm": 7.230342388153076, "learning_rate": 1.8735279079319048e-05, "loss": 2.0207, "step": 30290 }, { "epoch": 0.1904425195922456, "grad_norm": 7.518143653869629, "learning_rate": 1.8734859978374395e-05, "loss": 2.2041, "step": 30300 }, { "epoch": 0.1905053719089427, "grad_norm": 7.932175636291504, "learning_rate": 1.873444087742974e-05, "loss": 2.164, "step": 30310 }, { "epoch": 0.19056822422563982, "grad_norm": 6.18129825592041, "learning_rate": 1.8734021776485086e-05, "loss": 1.8517, "step": 30320 }, { "epoch": 0.19063107654233694, "grad_norm": 7.166076183319092, "learning_rate": 1.8733602675540433e-05, "loss": 2.0263, "step": 30330 }, { "epoch": 0.19069392885903405, "grad_norm": 8.349908828735352, "learning_rate": 1.873318357459578e-05, "loss": 2.2155, "step": 30340 }, { "epoch": 0.19075678117573114, "grad_norm": 6.5167059898376465, "learning_rate": 1.8732764473651127e-05, "loss": 1.8075, "step": 30350 }, { "epoch": 0.19081963349242825, "grad_norm": 7.859280586242676, "learning_rate": 1.873234537270647e-05, "loss": 1.9008, "step": 30360 }, { "epoch": 0.19088248580912537, "grad_norm": 6.828162670135498, "learning_rate": 1.8731926271761817e-05, "loss": 1.8431, "step": 30370 }, { "epoch": 0.19094533812582248, "grad_norm": 6.228019714355469, "learning_rate": 1.8731507170817165e-05, "loss": 1.849, "step": 30380 }, { "epoch": 0.1910081904425196, "grad_norm": 6.685621738433838, "learning_rate": 1.873108806987251e-05, "loss": 1.9236, "step": 30390 }, { "epoch": 0.1910710427592167, "grad_norm": 6.393117427825928, "learning_rate": 1.8730668968927855e-05, "loss": 1.905, "step": 30400 }, { "epoch": 0.1911338950759138, "grad_norm": 6.564292907714844, "learning_rate": 1.8730249867983202e-05, "loss": 2.0825, "step": 30410 }, { "epoch": 0.19119674739261092, "grad_norm": 7.268558502197266, "learning_rate": 1.872983076703855e-05, "loss": 1.9467, "step": 30420 }, { "epoch": 0.19125959970930803, "grad_norm": 7.2220892906188965, "learning_rate": 1.8729411666093897e-05, "loss": 1.932, "step": 30430 }, { "epoch": 0.19132245202600515, "grad_norm": 6.725642681121826, "learning_rate": 1.8728992565149244e-05, "loss": 2.0276, "step": 30440 }, { "epoch": 0.19138530434270226, "grad_norm": 6.740306377410889, "learning_rate": 1.872857346420459e-05, "loss": 1.8869, "step": 30450 }, { "epoch": 0.19144815665939938, "grad_norm": 7.151871681213379, "learning_rate": 1.8728154363259938e-05, "loss": 1.9544, "step": 30460 }, { "epoch": 0.1915110089760965, "grad_norm": 6.8912811279296875, "learning_rate": 1.872773526231528e-05, "loss": 1.8118, "step": 30470 }, { "epoch": 0.19157386129279358, "grad_norm": 6.042466163635254, "learning_rate": 1.872731616137063e-05, "loss": 1.938, "step": 30480 }, { "epoch": 0.1916367136094907, "grad_norm": 6.601611137390137, "learning_rate": 1.8726897060425976e-05, "loss": 2.017, "step": 30490 }, { "epoch": 0.1916995659261878, "grad_norm": 6.856364727020264, "learning_rate": 1.8726477959481323e-05, "loss": 1.7202, "step": 30500 }, { "epoch": 0.19176241824288492, "grad_norm": 6.876039981842041, "learning_rate": 1.872605885853667e-05, "loss": 2.1173, "step": 30510 }, { "epoch": 0.19182527055958204, "grad_norm": 6.808557987213135, "learning_rate": 1.8725639757592017e-05, "loss": 1.9516, "step": 30520 }, { "epoch": 0.19188812287627915, "grad_norm": 7.098701477050781, "learning_rate": 1.8725220656647364e-05, "loss": 2.0891, "step": 30530 }, { "epoch": 0.19195097519297624, "grad_norm": 7.035343170166016, "learning_rate": 1.8724801555702708e-05, "loss": 2.0032, "step": 30540 }, { "epoch": 0.19201382750967336, "grad_norm": 7.604888439178467, "learning_rate": 1.8724382454758055e-05, "loss": 2.0704, "step": 30550 }, { "epoch": 0.19207667982637047, "grad_norm": 7.634496688842773, "learning_rate": 1.87239633538134e-05, "loss": 2.1229, "step": 30560 }, { "epoch": 0.1921395321430676, "grad_norm": 7.305230617523193, "learning_rate": 1.872354425286875e-05, "loss": 1.7444, "step": 30570 }, { "epoch": 0.1922023844597647, "grad_norm": 6.693951606750488, "learning_rate": 1.8723125151924092e-05, "loss": 1.7571, "step": 30580 }, { "epoch": 0.19226523677646182, "grad_norm": 5.700252056121826, "learning_rate": 1.872270605097944e-05, "loss": 1.9711, "step": 30590 }, { "epoch": 0.1923280890931589, "grad_norm": 7.504659652709961, "learning_rate": 1.8722286950034787e-05, "loss": 1.9572, "step": 30600 }, { "epoch": 0.19239094140985602, "grad_norm": 7.629385471343994, "learning_rate": 1.8721867849090134e-05, "loss": 1.9407, "step": 30610 }, { "epoch": 0.19245379372655314, "grad_norm": 7.406287670135498, "learning_rate": 1.8721448748145477e-05, "loss": 2.2253, "step": 30620 }, { "epoch": 0.19251664604325025, "grad_norm": 8.016263961791992, "learning_rate": 1.8721029647200824e-05, "loss": 2.0097, "step": 30630 }, { "epoch": 0.19257949835994737, "grad_norm": 7.708703994750977, "learning_rate": 1.872061054625617e-05, "loss": 1.9363, "step": 30640 }, { "epoch": 0.19264235067664448, "grad_norm": 7.221123218536377, "learning_rate": 1.872019144531152e-05, "loss": 1.8846, "step": 30650 }, { "epoch": 0.1927052029933416, "grad_norm": 7.941370010375977, "learning_rate": 1.8719772344366866e-05, "loss": 1.9737, "step": 30660 }, { "epoch": 0.19276805531003868, "grad_norm": 8.912504196166992, "learning_rate": 1.8719353243422213e-05, "loss": 2.1479, "step": 30670 }, { "epoch": 0.1928309076267358, "grad_norm": 8.278250694274902, "learning_rate": 1.871893414247756e-05, "loss": 1.6379, "step": 30680 }, { "epoch": 0.1928937599434329, "grad_norm": 6.728115558624268, "learning_rate": 1.8718515041532907e-05, "loss": 1.8149, "step": 30690 }, { "epoch": 0.19295661226013003, "grad_norm": 7.390202522277832, "learning_rate": 1.8718095940588254e-05, "loss": 1.9503, "step": 30700 }, { "epoch": 0.19301946457682714, "grad_norm": 7.695835113525391, "learning_rate": 1.8717676839643598e-05, "loss": 1.9788, "step": 30710 }, { "epoch": 0.19308231689352426, "grad_norm": 8.717729568481445, "learning_rate": 1.8717257738698945e-05, "loss": 2.011, "step": 30720 }, { "epoch": 0.19314516921022135, "grad_norm": 8.436739921569824, "learning_rate": 1.8716838637754292e-05, "loss": 2.0215, "step": 30730 }, { "epoch": 0.19320802152691846, "grad_norm": 8.262494087219238, "learning_rate": 1.871641953680964e-05, "loss": 2.0514, "step": 30740 }, { "epoch": 0.19327087384361558, "grad_norm": 8.385856628417969, "learning_rate": 1.8716000435864986e-05, "loss": 1.7285, "step": 30750 }, { "epoch": 0.1933337261603127, "grad_norm": 7.661288738250732, "learning_rate": 1.871558133492033e-05, "loss": 1.9245, "step": 30760 }, { "epoch": 0.1933965784770098, "grad_norm": 9.048144340515137, "learning_rate": 1.8715162233975677e-05, "loss": 2.4136, "step": 30770 }, { "epoch": 0.19345943079370692, "grad_norm": 7.569735050201416, "learning_rate": 1.8714743133031024e-05, "loss": 2.2554, "step": 30780 }, { "epoch": 0.19352228311040404, "grad_norm": 8.289266586303711, "learning_rate": 1.871432403208637e-05, "loss": 2.0044, "step": 30790 }, { "epoch": 0.19358513542710112, "grad_norm": 7.837000846862793, "learning_rate": 1.8713904931141714e-05, "loss": 1.9743, "step": 30800 }, { "epoch": 0.19364798774379824, "grad_norm": 6.99252986907959, "learning_rate": 1.871348583019706e-05, "loss": 1.9266, "step": 30810 }, { "epoch": 0.19371084006049535, "grad_norm": 7.3852643966674805, "learning_rate": 1.871306672925241e-05, "loss": 2.1344, "step": 30820 }, { "epoch": 0.19377369237719247, "grad_norm": 7.649234294891357, "learning_rate": 1.8712647628307756e-05, "loss": 2.159, "step": 30830 }, { "epoch": 0.19383654469388958, "grad_norm": 7.783021450042725, "learning_rate": 1.8712228527363103e-05, "loss": 2.0225, "step": 30840 }, { "epoch": 0.1938993970105867, "grad_norm": 6.159892559051514, "learning_rate": 1.8711809426418446e-05, "loss": 1.733, "step": 30850 }, { "epoch": 0.1939622493272838, "grad_norm": 7.179442882537842, "learning_rate": 1.8711390325473793e-05, "loss": 2.1753, "step": 30860 }, { "epoch": 0.1940251016439809, "grad_norm": 8.023426055908203, "learning_rate": 1.871097122452914e-05, "loss": 2.1041, "step": 30870 }, { "epoch": 0.19408795396067802, "grad_norm": 8.841620445251465, "learning_rate": 1.8710552123584488e-05, "loss": 1.9144, "step": 30880 }, { "epoch": 0.19415080627737513, "grad_norm": 9.015290260314941, "learning_rate": 1.8710133022639835e-05, "loss": 1.9328, "step": 30890 }, { "epoch": 0.19421365859407225, "grad_norm": 8.53672981262207, "learning_rate": 1.8709713921695182e-05, "loss": 1.9781, "step": 30900 }, { "epoch": 0.19427651091076936, "grad_norm": 6.604968547821045, "learning_rate": 1.870929482075053e-05, "loss": 2.0874, "step": 30910 }, { "epoch": 0.19433936322746645, "grad_norm": 7.135388374328613, "learning_rate": 1.8708875719805876e-05, "loss": 1.961, "step": 30920 }, { "epoch": 0.19440221554416356, "grad_norm": 6.9580864906311035, "learning_rate": 1.8708456618861223e-05, "loss": 1.8935, "step": 30930 }, { "epoch": 0.19446506786086068, "grad_norm": 6.016077041625977, "learning_rate": 1.8708037517916567e-05, "loss": 1.8778, "step": 30940 }, { "epoch": 0.1945279201775578, "grad_norm": 7.97602653503418, "learning_rate": 1.8707618416971914e-05, "loss": 2.0686, "step": 30950 }, { "epoch": 0.1945907724942549, "grad_norm": 6.80618953704834, "learning_rate": 1.870719931602726e-05, "loss": 1.8221, "step": 30960 }, { "epoch": 0.19465362481095203, "grad_norm": 7.454229831695557, "learning_rate": 1.8706780215082608e-05, "loss": 2.0162, "step": 30970 }, { "epoch": 0.19471647712764914, "grad_norm": 7.484661102294922, "learning_rate": 1.870636111413795e-05, "loss": 1.9677, "step": 30980 }, { "epoch": 0.19477932944434623, "grad_norm": 6.804109573364258, "learning_rate": 1.87059420131933e-05, "loss": 1.9224, "step": 30990 }, { "epoch": 0.19484218176104334, "grad_norm": 7.103456497192383, "learning_rate": 1.8705522912248646e-05, "loss": 2.0525, "step": 31000 }, { "epoch": 0.19490503407774046, "grad_norm": 7.709608554840088, "learning_rate": 1.8705103811303993e-05, "loss": 1.9993, "step": 31010 }, { "epoch": 0.19496788639443757, "grad_norm": 7.108093738555908, "learning_rate": 1.8704684710359336e-05, "loss": 2.0137, "step": 31020 }, { "epoch": 0.1950307387111347, "grad_norm": 7.940584182739258, "learning_rate": 1.8704265609414683e-05, "loss": 2.1052, "step": 31030 }, { "epoch": 0.1950935910278318, "grad_norm": 8.082210540771484, "learning_rate": 1.870384650847003e-05, "loss": 2.0226, "step": 31040 }, { "epoch": 0.1951564433445289, "grad_norm": 6.77839469909668, "learning_rate": 1.8703427407525378e-05, "loss": 1.7747, "step": 31050 }, { "epoch": 0.195219295661226, "grad_norm": 7.5021162033081055, "learning_rate": 1.8703008306580725e-05, "loss": 1.8515, "step": 31060 }, { "epoch": 0.19528214797792312, "grad_norm": 6.902249336242676, "learning_rate": 1.8702589205636072e-05, "loss": 1.9328, "step": 31070 }, { "epoch": 0.19534500029462024, "grad_norm": 6.502012252807617, "learning_rate": 1.870217010469142e-05, "loss": 1.7997, "step": 31080 }, { "epoch": 0.19540785261131735, "grad_norm": 6.4549665451049805, "learning_rate": 1.8701751003746766e-05, "loss": 2.0455, "step": 31090 }, { "epoch": 0.19547070492801447, "grad_norm": 6.250760555267334, "learning_rate": 1.870133190280211e-05, "loss": 1.9771, "step": 31100 }, { "epoch": 0.19553355724471155, "grad_norm": 6.72709321975708, "learning_rate": 1.8700912801857457e-05, "loss": 2.032, "step": 31110 }, { "epoch": 0.19559640956140867, "grad_norm": 7.2245097160339355, "learning_rate": 1.8700493700912804e-05, "loss": 1.8392, "step": 31120 }, { "epoch": 0.19565926187810578, "grad_norm": 7.101731777191162, "learning_rate": 1.870007459996815e-05, "loss": 1.9772, "step": 31130 }, { "epoch": 0.1957221141948029, "grad_norm": 6.784846305847168, "learning_rate": 1.8699655499023498e-05, "loss": 1.9331, "step": 31140 }, { "epoch": 0.1957849665115, "grad_norm": 9.258913040161133, "learning_rate": 1.8699236398078845e-05, "loss": 1.8773, "step": 31150 }, { "epoch": 0.19584781882819713, "grad_norm": 7.051684379577637, "learning_rate": 1.869881729713419e-05, "loss": 1.9141, "step": 31160 }, { "epoch": 0.19591067114489424, "grad_norm": 7.824609756469727, "learning_rate": 1.8698398196189536e-05, "loss": 1.8682, "step": 31170 }, { "epoch": 0.19597352346159133, "grad_norm": 9.092242240905762, "learning_rate": 1.8697979095244883e-05, "loss": 2.0567, "step": 31180 }, { "epoch": 0.19603637577828845, "grad_norm": 6.69179630279541, "learning_rate": 1.869755999430023e-05, "loss": 1.7153, "step": 31190 }, { "epoch": 0.19609922809498556, "grad_norm": 7.820713996887207, "learning_rate": 1.8697140893355574e-05, "loss": 1.951, "step": 31200 }, { "epoch": 0.19616208041168268, "grad_norm": 7.920715808868408, "learning_rate": 1.869672179241092e-05, "loss": 1.9306, "step": 31210 }, { "epoch": 0.1962249327283798, "grad_norm": 7.537846565246582, "learning_rate": 1.8696302691466268e-05, "loss": 1.9301, "step": 31220 }, { "epoch": 0.1962877850450769, "grad_norm": 7.648664951324463, "learning_rate": 1.8695883590521615e-05, "loss": 2.0416, "step": 31230 }, { "epoch": 0.196350637361774, "grad_norm": 6.697866439819336, "learning_rate": 1.8695464489576962e-05, "loss": 1.9286, "step": 31240 }, { "epoch": 0.1964134896784711, "grad_norm": 6.970717906951904, "learning_rate": 1.8695045388632305e-05, "loss": 1.8984, "step": 31250 }, { "epoch": 0.19647634199516822, "grad_norm": 6.391444206237793, "learning_rate": 1.8694626287687653e-05, "loss": 1.7854, "step": 31260 }, { "epoch": 0.19653919431186534, "grad_norm": 7.58518123626709, "learning_rate": 1.8694207186743e-05, "loss": 2.1818, "step": 31270 }, { "epoch": 0.19660204662856245, "grad_norm": 6.825311183929443, "learning_rate": 1.8693788085798347e-05, "loss": 2.0447, "step": 31280 }, { "epoch": 0.19666489894525957, "grad_norm": 7.749687671661377, "learning_rate": 1.8693368984853694e-05, "loss": 1.8898, "step": 31290 }, { "epoch": 0.19672775126195668, "grad_norm": 7.068374156951904, "learning_rate": 1.869294988390904e-05, "loss": 2.1863, "step": 31300 }, { "epoch": 0.19679060357865377, "grad_norm": 7.546702861785889, "learning_rate": 1.8692530782964388e-05, "loss": 2.037, "step": 31310 }, { "epoch": 0.1968534558953509, "grad_norm": 7.583136558532715, "learning_rate": 1.8692111682019735e-05, "loss": 2.0095, "step": 31320 }, { "epoch": 0.196916308212048, "grad_norm": 7.856510639190674, "learning_rate": 1.869169258107508e-05, "loss": 2.0096, "step": 31330 }, { "epoch": 0.19697916052874512, "grad_norm": 7.463872909545898, "learning_rate": 1.8691273480130426e-05, "loss": 2.0048, "step": 31340 }, { "epoch": 0.19704201284544223, "grad_norm": 7.2952799797058105, "learning_rate": 1.8690854379185773e-05, "loss": 1.908, "step": 31350 }, { "epoch": 0.19710486516213935, "grad_norm": 7.742048740386963, "learning_rate": 1.869043527824112e-05, "loss": 1.9118, "step": 31360 }, { "epoch": 0.19716771747883644, "grad_norm": 7.366936683654785, "learning_rate": 1.8690016177296467e-05, "loss": 1.9242, "step": 31370 }, { "epoch": 0.19723056979553355, "grad_norm": 6.955266952514648, "learning_rate": 1.868959707635181e-05, "loss": 2.0451, "step": 31380 }, { "epoch": 0.19729342211223067, "grad_norm": 7.954512596130371, "learning_rate": 1.8689177975407158e-05, "loss": 2.0263, "step": 31390 }, { "epoch": 0.19735627442892778, "grad_norm": 7.566021919250488, "learning_rate": 1.8688758874462505e-05, "loss": 2.0368, "step": 31400 }, { "epoch": 0.1974191267456249, "grad_norm": 6.972746849060059, "learning_rate": 1.8688339773517852e-05, "loss": 2.0718, "step": 31410 }, { "epoch": 0.197481979062322, "grad_norm": 6.899226188659668, "learning_rate": 1.8687920672573196e-05, "loss": 1.8342, "step": 31420 }, { "epoch": 0.1975448313790191, "grad_norm": 6.51491117477417, "learning_rate": 1.8687501571628543e-05, "loss": 2.0807, "step": 31430 }, { "epoch": 0.1976076836957162, "grad_norm": 7.504712104797363, "learning_rate": 1.868708247068389e-05, "loss": 2.0655, "step": 31440 }, { "epoch": 0.19767053601241333, "grad_norm": 7.929646015167236, "learning_rate": 1.8686663369739237e-05, "loss": 2.0732, "step": 31450 }, { "epoch": 0.19773338832911044, "grad_norm": 7.556617736816406, "learning_rate": 1.8686244268794584e-05, "loss": 1.8426, "step": 31460 }, { "epoch": 0.19779624064580756, "grad_norm": 6.628917217254639, "learning_rate": 1.8685825167849927e-05, "loss": 2.037, "step": 31470 }, { "epoch": 0.19785909296250467, "grad_norm": 6.637345790863037, "learning_rate": 1.8685406066905275e-05, "loss": 2.1008, "step": 31480 }, { "epoch": 0.1979219452792018, "grad_norm": 7.68168830871582, "learning_rate": 1.868498696596062e-05, "loss": 1.6905, "step": 31490 }, { "epoch": 0.19798479759589888, "grad_norm": 7.9139180183410645, "learning_rate": 1.868456786501597e-05, "loss": 2.0851, "step": 31500 }, { "epoch": 0.198047649912596, "grad_norm": 6.979345798492432, "learning_rate": 1.8684148764071316e-05, "loss": 1.8896, "step": 31510 }, { "epoch": 0.1981105022292931, "grad_norm": 7.564640998840332, "learning_rate": 1.8683729663126663e-05, "loss": 1.8157, "step": 31520 }, { "epoch": 0.19817335454599022, "grad_norm": 5.947103977203369, "learning_rate": 1.868331056218201e-05, "loss": 1.8437, "step": 31530 }, { "epoch": 0.19823620686268734, "grad_norm": 7.795741558074951, "learning_rate": 1.8682891461237357e-05, "loss": 2.0519, "step": 31540 }, { "epoch": 0.19829905917938445, "grad_norm": 7.159440040588379, "learning_rate": 1.8682472360292704e-05, "loss": 2.1248, "step": 31550 }, { "epoch": 0.19836191149608154, "grad_norm": 6.944345474243164, "learning_rate": 1.8682053259348048e-05, "loss": 2.1194, "step": 31560 }, { "epoch": 0.19842476381277865, "grad_norm": 7.7796196937561035, "learning_rate": 1.8681634158403395e-05, "loss": 1.8926, "step": 31570 }, { "epoch": 0.19848761612947577, "grad_norm": 7.708437919616699, "learning_rate": 1.8681215057458742e-05, "loss": 1.8538, "step": 31580 }, { "epoch": 0.19855046844617288, "grad_norm": 6.7379279136657715, "learning_rate": 1.868079595651409e-05, "loss": 2.2006, "step": 31590 }, { "epoch": 0.19861332076287, "grad_norm": 6.8227458000183105, "learning_rate": 1.8680376855569433e-05, "loss": 2.0633, "step": 31600 }, { "epoch": 0.19867617307956711, "grad_norm": 8.552653312683105, "learning_rate": 1.867995775462478e-05, "loss": 2.2047, "step": 31610 }, { "epoch": 0.1987390253962642, "grad_norm": 6.670979976654053, "learning_rate": 1.8679538653680127e-05, "loss": 2.1558, "step": 31620 }, { "epoch": 0.19880187771296132, "grad_norm": 6.225050449371338, "learning_rate": 1.8679119552735474e-05, "loss": 2.0291, "step": 31630 }, { "epoch": 0.19886473002965843, "grad_norm": 7.140894412994385, "learning_rate": 1.8678700451790818e-05, "loss": 2.24, "step": 31640 }, { "epoch": 0.19892758234635555, "grad_norm": 7.125330448150635, "learning_rate": 1.8678281350846165e-05, "loss": 1.9717, "step": 31650 }, { "epoch": 0.19899043466305266, "grad_norm": 8.001860618591309, "learning_rate": 1.867786224990151e-05, "loss": 2.2029, "step": 31660 }, { "epoch": 0.19905328697974978, "grad_norm": 7.351769924163818, "learning_rate": 1.867744314895686e-05, "loss": 1.6518, "step": 31670 }, { "epoch": 0.1991161392964469, "grad_norm": 8.122381210327148, "learning_rate": 1.8677024048012206e-05, "loss": 1.7484, "step": 31680 }, { "epoch": 0.19917899161314398, "grad_norm": 5.550985813140869, "learning_rate": 1.8676604947067553e-05, "loss": 1.7822, "step": 31690 }, { "epoch": 0.1992418439298411, "grad_norm": 7.294111728668213, "learning_rate": 1.86761858461229e-05, "loss": 1.7865, "step": 31700 }, { "epoch": 0.1993046962465382, "grad_norm": 7.205888271331787, "learning_rate": 1.8675766745178247e-05, "loss": 2.0595, "step": 31710 }, { "epoch": 0.19936754856323533, "grad_norm": 7.846662998199463, "learning_rate": 1.867534764423359e-05, "loss": 1.9511, "step": 31720 }, { "epoch": 0.19943040087993244, "grad_norm": 8.158760070800781, "learning_rate": 1.8674928543288938e-05, "loss": 2.076, "step": 31730 }, { "epoch": 0.19949325319662956, "grad_norm": 6.844155788421631, "learning_rate": 1.8674509442344285e-05, "loss": 1.8929, "step": 31740 }, { "epoch": 0.19955610551332664, "grad_norm": 7.20194149017334, "learning_rate": 1.8674090341399632e-05, "loss": 2.0575, "step": 31750 }, { "epoch": 0.19961895783002376, "grad_norm": 7.942824840545654, "learning_rate": 1.867367124045498e-05, "loss": 2.0269, "step": 31760 }, { "epoch": 0.19968181014672087, "grad_norm": 7.399482250213623, "learning_rate": 1.8673252139510326e-05, "loss": 2.122, "step": 31770 }, { "epoch": 0.199744662463418, "grad_norm": 6.742488861083984, "learning_rate": 1.867283303856567e-05, "loss": 2.0259, "step": 31780 }, { "epoch": 0.1998075147801151, "grad_norm": 9.902754783630371, "learning_rate": 1.8672413937621017e-05, "loss": 1.7763, "step": 31790 }, { "epoch": 0.19987036709681222, "grad_norm": 6.923020839691162, "learning_rate": 1.8671994836676364e-05, "loss": 1.9856, "step": 31800 }, { "epoch": 0.19993321941350933, "grad_norm": 7.055446147918701, "learning_rate": 1.867157573573171e-05, "loss": 2.0138, "step": 31810 }, { "epoch": 0.19999607173020642, "grad_norm": 7.5703229904174805, "learning_rate": 1.8671156634787055e-05, "loss": 2.0022, "step": 31820 }, { "epoch": 0.20005892404690354, "grad_norm": 7.893503665924072, "learning_rate": 1.8670737533842402e-05, "loss": 2.0686, "step": 31830 }, { "epoch": 0.20012177636360065, "grad_norm": 6.54017972946167, "learning_rate": 1.867031843289775e-05, "loss": 1.9186, "step": 31840 }, { "epoch": 0.20018462868029777, "grad_norm": 8.596176147460938, "learning_rate": 1.8669899331953096e-05, "loss": 1.7146, "step": 31850 }, { "epoch": 0.20024748099699488, "grad_norm": 9.219547271728516, "learning_rate": 1.8669480231008443e-05, "loss": 1.9238, "step": 31860 }, { "epoch": 0.200310333313692, "grad_norm": 7.179672718048096, "learning_rate": 1.8669061130063787e-05, "loss": 2.1072, "step": 31870 }, { "epoch": 0.20037318563038908, "grad_norm": 6.458518028259277, "learning_rate": 1.8668642029119134e-05, "loss": 1.8323, "step": 31880 }, { "epoch": 0.2004360379470862, "grad_norm": 8.046995162963867, "learning_rate": 1.866822292817448e-05, "loss": 1.8013, "step": 31890 }, { "epoch": 0.2004988902637833, "grad_norm": 7.660754203796387, "learning_rate": 1.8667803827229828e-05, "loss": 2.1948, "step": 31900 }, { "epoch": 0.20056174258048043, "grad_norm": 7.255456447601318, "learning_rate": 1.8667384726285175e-05, "loss": 1.7914, "step": 31910 }, { "epoch": 0.20062459489717754, "grad_norm": 6.695418834686279, "learning_rate": 1.8666965625340522e-05, "loss": 1.8412, "step": 31920 }, { "epoch": 0.20068744721387466, "grad_norm": 7.198409557342529, "learning_rate": 1.866654652439587e-05, "loss": 2.0424, "step": 31930 }, { "epoch": 0.20075029953057175, "grad_norm": 6.992066860198975, "learning_rate": 1.8666127423451216e-05, "loss": 2.2153, "step": 31940 }, { "epoch": 0.20081315184726886, "grad_norm": 6.353859901428223, "learning_rate": 1.866570832250656e-05, "loss": 1.888, "step": 31950 }, { "epoch": 0.20087600416396598, "grad_norm": 8.30443000793457, "learning_rate": 1.8665289221561907e-05, "loss": 2.1721, "step": 31960 }, { "epoch": 0.2009388564806631, "grad_norm": 7.140757083892822, "learning_rate": 1.8664870120617254e-05, "loss": 2.3043, "step": 31970 }, { "epoch": 0.2010017087973602, "grad_norm": 7.410743713378906, "learning_rate": 1.86644510196726e-05, "loss": 1.8216, "step": 31980 }, { "epoch": 0.20106456111405732, "grad_norm": 7.6606316566467285, "learning_rate": 1.8664031918727948e-05, "loss": 1.9886, "step": 31990 }, { "epoch": 0.20112741343075444, "grad_norm": 6.549091815948486, "learning_rate": 1.8663612817783292e-05, "loss": 1.8681, "step": 32000 }, { "epoch": 0.20119026574745152, "grad_norm": 6.990193843841553, "learning_rate": 1.866319371683864e-05, "loss": 2.0551, "step": 32010 }, { "epoch": 0.20125311806414864, "grad_norm": 6.669229984283447, "learning_rate": 1.8662774615893986e-05, "loss": 1.7833, "step": 32020 }, { "epoch": 0.20131597038084575, "grad_norm": 8.114689826965332, "learning_rate": 1.8662355514949333e-05, "loss": 2.0699, "step": 32030 }, { "epoch": 0.20137882269754287, "grad_norm": 7.928976535797119, "learning_rate": 1.8661936414004677e-05, "loss": 2.0345, "step": 32040 }, { "epoch": 0.20144167501423998, "grad_norm": 6.947659492492676, "learning_rate": 1.8661517313060024e-05, "loss": 1.9629, "step": 32050 }, { "epoch": 0.2015045273309371, "grad_norm": 8.57833480834961, "learning_rate": 1.866109821211537e-05, "loss": 2.1507, "step": 32060 }, { "epoch": 0.2015673796476342, "grad_norm": 6.773591995239258, "learning_rate": 1.8660679111170718e-05, "loss": 1.7984, "step": 32070 }, { "epoch": 0.2016302319643313, "grad_norm": 7.336679935455322, "learning_rate": 1.8660260010226065e-05, "loss": 2.0215, "step": 32080 }, { "epoch": 0.20169308428102842, "grad_norm": 8.003287315368652, "learning_rate": 1.8659840909281412e-05, "loss": 2.0209, "step": 32090 }, { "epoch": 0.20175593659772553, "grad_norm": 6.318203926086426, "learning_rate": 1.8659421808336756e-05, "loss": 1.9984, "step": 32100 }, { "epoch": 0.20181878891442265, "grad_norm": 8.037556648254395, "learning_rate": 1.8659002707392103e-05, "loss": 2.1062, "step": 32110 }, { "epoch": 0.20188164123111976, "grad_norm": 7.300333023071289, "learning_rate": 1.865858360644745e-05, "loss": 1.8028, "step": 32120 }, { "epoch": 0.20194449354781688, "grad_norm": 5.501397132873535, "learning_rate": 1.8658164505502797e-05, "loss": 1.978, "step": 32130 }, { "epoch": 0.20200734586451397, "grad_norm": 6.930439472198486, "learning_rate": 1.8657745404558144e-05, "loss": 1.9132, "step": 32140 }, { "epoch": 0.20207019818121108, "grad_norm": 6.926006317138672, "learning_rate": 1.865732630361349e-05, "loss": 1.8571, "step": 32150 }, { "epoch": 0.2021330504979082, "grad_norm": 7.173760890960693, "learning_rate": 1.8656907202668838e-05, "loss": 1.9771, "step": 32160 }, { "epoch": 0.2021959028146053, "grad_norm": 7.056827545166016, "learning_rate": 1.8656488101724185e-05, "loss": 2.0286, "step": 32170 }, { "epoch": 0.20225875513130243, "grad_norm": 7.20012092590332, "learning_rate": 1.865606900077953e-05, "loss": 1.8052, "step": 32180 }, { "epoch": 0.20232160744799954, "grad_norm": 7.294288158416748, "learning_rate": 1.8655649899834876e-05, "loss": 1.9413, "step": 32190 }, { "epoch": 0.20238445976469663, "grad_norm": 6.476958274841309, "learning_rate": 1.8655230798890223e-05, "loss": 1.9641, "step": 32200 }, { "epoch": 0.20244731208139374, "grad_norm": 6.717261791229248, "learning_rate": 1.865481169794557e-05, "loss": 1.9349, "step": 32210 }, { "epoch": 0.20251016439809086, "grad_norm": 6.926117897033691, "learning_rate": 1.8654392597000914e-05, "loss": 1.822, "step": 32220 }, { "epoch": 0.20257301671478797, "grad_norm": 6.997591495513916, "learning_rate": 1.865397349605626e-05, "loss": 2.0666, "step": 32230 }, { "epoch": 0.2026358690314851, "grad_norm": 7.3034563064575195, "learning_rate": 1.8653554395111608e-05, "loss": 2.0412, "step": 32240 }, { "epoch": 0.2026987213481822, "grad_norm": 7.561244487762451, "learning_rate": 1.8653135294166955e-05, "loss": 1.9237, "step": 32250 }, { "epoch": 0.2027615736648793, "grad_norm": 7.470678806304932, "learning_rate": 1.86527161932223e-05, "loss": 1.834, "step": 32260 }, { "epoch": 0.2028244259815764, "grad_norm": 7.097418785095215, "learning_rate": 1.8652297092277646e-05, "loss": 1.9299, "step": 32270 }, { "epoch": 0.20288727829827352, "grad_norm": 6.99167537689209, "learning_rate": 1.8651877991332993e-05, "loss": 2.2147, "step": 32280 }, { "epoch": 0.20295013061497064, "grad_norm": 6.0950927734375, "learning_rate": 1.865145889038834e-05, "loss": 1.6959, "step": 32290 }, { "epoch": 0.20301298293166775, "grad_norm": 7.768097400665283, "learning_rate": 1.8651039789443687e-05, "loss": 1.9643, "step": 32300 }, { "epoch": 0.20307583524836487, "grad_norm": 7.126907825469971, "learning_rate": 1.8650620688499034e-05, "loss": 1.663, "step": 32310 }, { "epoch": 0.20313868756506198, "grad_norm": 6.074197769165039, "learning_rate": 1.865020158755438e-05, "loss": 2.1272, "step": 32320 }, { "epoch": 0.20320153988175907, "grad_norm": 6.936085224151611, "learning_rate": 1.8649782486609728e-05, "loss": 1.8462, "step": 32330 }, { "epoch": 0.20326439219845618, "grad_norm": 6.845409393310547, "learning_rate": 1.8649363385665075e-05, "loss": 1.8656, "step": 32340 }, { "epoch": 0.2033272445151533, "grad_norm": 7.36346960067749, "learning_rate": 1.864894428472042e-05, "loss": 1.9648, "step": 32350 }, { "epoch": 0.20339009683185041, "grad_norm": 7.719435214996338, "learning_rate": 1.8648525183775766e-05, "loss": 1.8929, "step": 32360 }, { "epoch": 0.20345294914854753, "grad_norm": 8.386730194091797, "learning_rate": 1.8648106082831113e-05, "loss": 1.757, "step": 32370 }, { "epoch": 0.20351580146524464, "grad_norm": 7.338146686553955, "learning_rate": 1.864768698188646e-05, "loss": 1.9952, "step": 32380 }, { "epoch": 0.20357865378194173, "grad_norm": 6.615225315093994, "learning_rate": 1.8647267880941807e-05, "loss": 1.9445, "step": 32390 }, { "epoch": 0.20364150609863885, "grad_norm": 7.198486804962158, "learning_rate": 1.864684877999715e-05, "loss": 2.0266, "step": 32400 }, { "epoch": 0.20370435841533596, "grad_norm": 7.512393474578857, "learning_rate": 1.8646429679052498e-05, "loss": 1.9268, "step": 32410 }, { "epoch": 0.20376721073203308, "grad_norm": 7.614518165588379, "learning_rate": 1.8646010578107845e-05, "loss": 2.1856, "step": 32420 }, { "epoch": 0.2038300630487302, "grad_norm": 7.497756481170654, "learning_rate": 1.8645591477163192e-05, "loss": 2.0897, "step": 32430 }, { "epoch": 0.2038929153654273, "grad_norm": 6.641429901123047, "learning_rate": 1.8645172376218536e-05, "loss": 2.0392, "step": 32440 }, { "epoch": 0.2039557676821244, "grad_norm": 8.049214363098145, "learning_rate": 1.8644753275273883e-05, "loss": 1.8764, "step": 32450 }, { "epoch": 0.2040186199988215, "grad_norm": 8.235671997070312, "learning_rate": 1.864433417432923e-05, "loss": 2.1654, "step": 32460 }, { "epoch": 0.20408147231551862, "grad_norm": 6.430967807769775, "learning_rate": 1.8643915073384577e-05, "loss": 1.9691, "step": 32470 }, { "epoch": 0.20414432463221574, "grad_norm": 8.980220794677734, "learning_rate": 1.8643495972439924e-05, "loss": 2.0066, "step": 32480 }, { "epoch": 0.20420717694891286, "grad_norm": 9.148171424865723, "learning_rate": 1.8643076871495268e-05, "loss": 1.9771, "step": 32490 }, { "epoch": 0.20427002926560997, "grad_norm": 8.299166679382324, "learning_rate": 1.8642657770550615e-05, "loss": 1.8559, "step": 32500 }, { "epoch": 0.20433288158230709, "grad_norm": 8.3163480758667, "learning_rate": 1.8642238669605962e-05, "loss": 1.9074, "step": 32510 }, { "epoch": 0.20439573389900417, "grad_norm": 7.030807018280029, "learning_rate": 1.864181956866131e-05, "loss": 2.0593, "step": 32520 }, { "epoch": 0.2044585862157013, "grad_norm": 6.964199542999268, "learning_rate": 1.8641400467716656e-05, "loss": 2.0386, "step": 32530 }, { "epoch": 0.2045214385323984, "grad_norm": 6.957630157470703, "learning_rate": 1.8640981366772003e-05, "loss": 1.9482, "step": 32540 }, { "epoch": 0.20458429084909552, "grad_norm": 6.892584323883057, "learning_rate": 1.864056226582735e-05, "loss": 1.9165, "step": 32550 }, { "epoch": 0.20464714316579263, "grad_norm": 7.2235918045043945, "learning_rate": 1.8640143164882697e-05, "loss": 1.947, "step": 32560 }, { "epoch": 0.20470999548248975, "grad_norm": 7.948803901672363, "learning_rate": 1.863972406393804e-05, "loss": 2.0304, "step": 32570 }, { "epoch": 0.20477284779918684, "grad_norm": 6.61076021194458, "learning_rate": 1.8639304962993388e-05, "loss": 2.0487, "step": 32580 }, { "epoch": 0.20483570011588395, "grad_norm": 8.155477523803711, "learning_rate": 1.8638885862048735e-05, "loss": 1.8815, "step": 32590 }, { "epoch": 0.20489855243258107, "grad_norm": 6.9196248054504395, "learning_rate": 1.8638466761104082e-05, "loss": 1.7916, "step": 32600 }, { "epoch": 0.20496140474927818, "grad_norm": 7.408926963806152, "learning_rate": 1.863804766015943e-05, "loss": 2.0736, "step": 32610 }, { "epoch": 0.2050242570659753, "grad_norm": 8.11421012878418, "learning_rate": 1.8637628559214773e-05, "loss": 1.8672, "step": 32620 }, { "epoch": 0.2050871093826724, "grad_norm": 7.909786224365234, "learning_rate": 1.863720945827012e-05, "loss": 2.0961, "step": 32630 }, { "epoch": 0.20514996169936953, "grad_norm": 8.090932846069336, "learning_rate": 1.8636790357325467e-05, "loss": 2.0442, "step": 32640 }, { "epoch": 0.2052128140160666, "grad_norm": 6.865467548370361, "learning_rate": 1.8636371256380814e-05, "loss": 1.7117, "step": 32650 }, { "epoch": 0.20527566633276373, "grad_norm": 7.839059829711914, "learning_rate": 1.8635952155436158e-05, "loss": 1.9982, "step": 32660 }, { "epoch": 0.20533851864946084, "grad_norm": 5.283694267272949, "learning_rate": 1.8635533054491505e-05, "loss": 1.9163, "step": 32670 }, { "epoch": 0.20540137096615796, "grad_norm": 7.552374362945557, "learning_rate": 1.8635113953546852e-05, "loss": 1.8389, "step": 32680 }, { "epoch": 0.20546422328285507, "grad_norm": 7.677777290344238, "learning_rate": 1.86346948526022e-05, "loss": 2.1014, "step": 32690 }, { "epoch": 0.2055270755995522, "grad_norm": 7.4425201416015625, "learning_rate": 1.8634275751657546e-05, "loss": 1.9164, "step": 32700 }, { "epoch": 0.20558992791624928, "grad_norm": 6.851995468139648, "learning_rate": 1.8633856650712893e-05, "loss": 1.8816, "step": 32710 }, { "epoch": 0.2056527802329464, "grad_norm": 7.684861660003662, "learning_rate": 1.863343754976824e-05, "loss": 2.1476, "step": 32720 }, { "epoch": 0.2057156325496435, "grad_norm": 6.646461486816406, "learning_rate": 1.8633018448823584e-05, "loss": 1.8131, "step": 32730 }, { "epoch": 0.20577848486634062, "grad_norm": 7.502947807312012, "learning_rate": 1.863259934787893e-05, "loss": 2.1382, "step": 32740 }, { "epoch": 0.20584133718303774, "grad_norm": 7.392009258270264, "learning_rate": 1.8632180246934278e-05, "loss": 2.0775, "step": 32750 }, { "epoch": 0.20590418949973485, "grad_norm": 7.370657920837402, "learning_rate": 1.8631761145989625e-05, "loss": 1.902, "step": 32760 }, { "epoch": 0.20596704181643194, "grad_norm": 7.597288608551025, "learning_rate": 1.8631342045044972e-05, "loss": 1.9322, "step": 32770 }, { "epoch": 0.20602989413312905, "grad_norm": 6.6630353927612305, "learning_rate": 1.863092294410032e-05, "loss": 1.9984, "step": 32780 }, { "epoch": 0.20609274644982617, "grad_norm": 6.317451477050781, "learning_rate": 1.8630503843155666e-05, "loss": 2.0104, "step": 32790 }, { "epoch": 0.20615559876652328, "grad_norm": 6.763668060302734, "learning_rate": 1.863008474221101e-05, "loss": 1.9379, "step": 32800 }, { "epoch": 0.2062184510832204, "grad_norm": 8.344666481018066, "learning_rate": 1.8629665641266357e-05, "loss": 2.0772, "step": 32810 }, { "epoch": 0.20628130339991751, "grad_norm": 6.620004653930664, "learning_rate": 1.8629246540321704e-05, "loss": 1.8502, "step": 32820 }, { "epoch": 0.20634415571661463, "grad_norm": 7.310399055480957, "learning_rate": 1.862882743937705e-05, "loss": 1.946, "step": 32830 }, { "epoch": 0.20640700803331172, "grad_norm": 7.351963996887207, "learning_rate": 1.8628408338432395e-05, "loss": 1.835, "step": 32840 }, { "epoch": 0.20646986035000883, "grad_norm": 6.5745439529418945, "learning_rate": 1.8627989237487742e-05, "loss": 1.9042, "step": 32850 }, { "epoch": 0.20653271266670595, "grad_norm": 7.718234062194824, "learning_rate": 1.862757013654309e-05, "loss": 1.7437, "step": 32860 }, { "epoch": 0.20659556498340306, "grad_norm": 6.668702125549316, "learning_rate": 1.8627151035598436e-05, "loss": 1.966, "step": 32870 }, { "epoch": 0.20665841730010018, "grad_norm": 7.5997490882873535, "learning_rate": 1.862673193465378e-05, "loss": 1.7494, "step": 32880 }, { "epoch": 0.2067212696167973, "grad_norm": 8.042951583862305, "learning_rate": 1.8626312833709127e-05, "loss": 1.977, "step": 32890 }, { "epoch": 0.20678412193349438, "grad_norm": 8.362555503845215, "learning_rate": 1.8625893732764474e-05, "loss": 1.9018, "step": 32900 }, { "epoch": 0.2068469742501915, "grad_norm": Infinity, "learning_rate": 1.862547463181982e-05, "loss": 2.0126, "step": 32910 }, { "epoch": 0.2069098265668886, "grad_norm": 6.688395977020264, "learning_rate": 1.8625097440969632e-05, "loss": 1.9023, "step": 32920 }, { "epoch": 0.20697267888358573, "grad_norm": 7.301969528198242, "learning_rate": 1.862467834002498e-05, "loss": 1.9322, "step": 32930 }, { "epoch": 0.20703553120028284, "grad_norm": 6.88472843170166, "learning_rate": 1.8624259239080326e-05, "loss": 1.8666, "step": 32940 }, { "epoch": 0.20709838351697996, "grad_norm": 8.703797340393066, "learning_rate": 1.8623840138135674e-05, "loss": 2.0496, "step": 32950 }, { "epoch": 0.20716123583367704, "grad_norm": 6.187541484832764, "learning_rate": 1.8623421037191017e-05, "loss": 1.9576, "step": 32960 }, { "epoch": 0.20722408815037416, "grad_norm": 8.077150344848633, "learning_rate": 1.8623001936246364e-05, "loss": 1.8276, "step": 32970 }, { "epoch": 0.20728694046707127, "grad_norm": 7.203952789306641, "learning_rate": 1.862258283530171e-05, "loss": 2.1624, "step": 32980 }, { "epoch": 0.2073497927837684, "grad_norm": 6.861067771911621, "learning_rate": 1.862216373435706e-05, "loss": 1.8687, "step": 32990 }, { "epoch": 0.2074126451004655, "grad_norm": 6.995094299316406, "learning_rate": 1.8621744633412405e-05, "loss": 1.8891, "step": 33000 }, { "epoch": 0.20747549741716262, "grad_norm": 7.380050182342529, "learning_rate": 1.8621325532467753e-05, "loss": 1.8304, "step": 33010 }, { "epoch": 0.20753834973385973, "grad_norm": 7.955836772918701, "learning_rate": 1.86209064315231e-05, "loss": 2.1125, "step": 33020 }, { "epoch": 0.20760120205055682, "grad_norm": 7.0294084548950195, "learning_rate": 1.8620487330578447e-05, "loss": 2.0791, "step": 33030 }, { "epoch": 0.20766405436725394, "grad_norm": 7.854935169219971, "learning_rate": 1.862006822963379e-05, "loss": 1.9195, "step": 33040 }, { "epoch": 0.20772690668395105, "grad_norm": 7.9961161613464355, "learning_rate": 1.8619649128689137e-05, "loss": 1.699, "step": 33050 }, { "epoch": 0.20778975900064817, "grad_norm": 7.443821430206299, "learning_rate": 1.8619230027744485e-05, "loss": 1.9154, "step": 33060 }, { "epoch": 0.20785261131734528, "grad_norm": 8.965798377990723, "learning_rate": 1.861881092679983e-05, "loss": 1.9549, "step": 33070 }, { "epoch": 0.2079154636340424, "grad_norm": 7.658891677856445, "learning_rate": 1.861839182585518e-05, "loss": 1.9491, "step": 33080 }, { "epoch": 0.20797831595073948, "grad_norm": 8.56626033782959, "learning_rate": 1.8617972724910522e-05, "loss": 1.8583, "step": 33090 }, { "epoch": 0.2080411682674366, "grad_norm": 7.63130521774292, "learning_rate": 1.861755362396587e-05, "loss": 2.1472, "step": 33100 }, { "epoch": 0.20810402058413371, "grad_norm": 7.68650484085083, "learning_rate": 1.8617134523021216e-05, "loss": 1.8979, "step": 33110 }, { "epoch": 0.20816687290083083, "grad_norm": 6.086266040802002, "learning_rate": 1.8616715422076564e-05, "loss": 1.8511, "step": 33120 }, { "epoch": 0.20822972521752794, "grad_norm": 6.9972243309021, "learning_rate": 1.861629632113191e-05, "loss": 1.9685, "step": 33130 }, { "epoch": 0.20829257753422506, "grad_norm": 6.733551979064941, "learning_rate": 1.8615877220187254e-05, "loss": 1.9713, "step": 33140 }, { "epoch": 0.20835542985092217, "grad_norm": 6.972572326660156, "learning_rate": 1.86154581192426e-05, "loss": 1.917, "step": 33150 }, { "epoch": 0.20841828216761926, "grad_norm": 6.9418625831604, "learning_rate": 1.861503901829795e-05, "loss": 1.9291, "step": 33160 }, { "epoch": 0.20848113448431638, "grad_norm": 7.004095077514648, "learning_rate": 1.8614619917353296e-05, "loss": 1.8306, "step": 33170 }, { "epoch": 0.2085439868010135, "grad_norm": 6.511467933654785, "learning_rate": 1.861420081640864e-05, "loss": 1.9946, "step": 33180 }, { "epoch": 0.2086068391177106, "grad_norm": 8.272610664367676, "learning_rate": 1.8613781715463986e-05, "loss": 1.9819, "step": 33190 }, { "epoch": 0.20866969143440772, "grad_norm": 6.825449466705322, "learning_rate": 1.8613362614519333e-05, "loss": 1.8908, "step": 33200 }, { "epoch": 0.20873254375110484, "grad_norm": 8.018668174743652, "learning_rate": 1.861294351357468e-05, "loss": 2.0171, "step": 33210 }, { "epoch": 0.20879539606780192, "grad_norm": 7.659886360168457, "learning_rate": 1.8612524412630027e-05, "loss": 2.2547, "step": 33220 }, { "epoch": 0.20885824838449904, "grad_norm": 7.284714698791504, "learning_rate": 1.8612105311685375e-05, "loss": 1.7837, "step": 33230 }, { "epoch": 0.20892110070119616, "grad_norm": 7.291626930236816, "learning_rate": 1.861168621074072e-05, "loss": 1.972, "step": 33240 }, { "epoch": 0.20898395301789327, "grad_norm": 6.9453606605529785, "learning_rate": 1.861126710979607e-05, "loss": 2.0272, "step": 33250 }, { "epoch": 0.20904680533459039, "grad_norm": 9.36789321899414, "learning_rate": 1.8610848008851416e-05, "loss": 2.0488, "step": 33260 }, { "epoch": 0.2091096576512875, "grad_norm": 6.998178482055664, "learning_rate": 1.861042890790676e-05, "loss": 1.9161, "step": 33270 }, { "epoch": 0.2091725099679846, "grad_norm": 7.868252754211426, "learning_rate": 1.8610009806962107e-05, "loss": 1.996, "step": 33280 }, { "epoch": 0.2092353622846817, "grad_norm": 7.053635597229004, "learning_rate": 1.8609590706017454e-05, "loss": 2.0046, "step": 33290 }, { "epoch": 0.20929821460137882, "grad_norm": 6.25361967086792, "learning_rate": 1.86091716050728e-05, "loss": 1.9745, "step": 33300 }, { "epoch": 0.20936106691807593, "grad_norm": 6.859872817993164, "learning_rate": 1.8608752504128148e-05, "loss": 1.9112, "step": 33310 }, { "epoch": 0.20942391923477305, "grad_norm": 6.452934265136719, "learning_rate": 1.860833340318349e-05, "loss": 1.8774, "step": 33320 }, { "epoch": 0.20948677155147016, "grad_norm": 7.296616077423096, "learning_rate": 1.860791430223884e-05, "loss": 1.9381, "step": 33330 }, { "epoch": 0.20954962386816728, "grad_norm": 6.141557216644287, "learning_rate": 1.8607495201294186e-05, "loss": 1.8099, "step": 33340 }, { "epoch": 0.20961247618486437, "grad_norm": 7.579769611358643, "learning_rate": 1.8607076100349533e-05, "loss": 2.1197, "step": 33350 }, { "epoch": 0.20967532850156148, "grad_norm": 8.49437141418457, "learning_rate": 1.8606656999404876e-05, "loss": 2.0408, "step": 33360 }, { "epoch": 0.2097381808182586, "grad_norm": 7.307910442352295, "learning_rate": 1.8606237898460223e-05, "loss": 1.7301, "step": 33370 }, { "epoch": 0.2098010331349557, "grad_norm": 7.528223991394043, "learning_rate": 1.860581879751557e-05, "loss": 2.0548, "step": 33380 }, { "epoch": 0.20986388545165283, "grad_norm": 7.4743428230285645, "learning_rate": 1.8605399696570918e-05, "loss": 1.9875, "step": 33390 }, { "epoch": 0.20992673776834994, "grad_norm": 6.497557640075684, "learning_rate": 1.8604980595626265e-05, "loss": 2.0149, "step": 33400 }, { "epoch": 0.20998959008504703, "grad_norm": 6.687356948852539, "learning_rate": 1.860456149468161e-05, "loss": 2.0166, "step": 33410 }, { "epoch": 0.21005244240174414, "grad_norm": 7.749118328094482, "learning_rate": 1.8604142393736955e-05, "loss": 1.9113, "step": 33420 }, { "epoch": 0.21011529471844126, "grad_norm": 7.288553237915039, "learning_rate": 1.8603723292792302e-05, "loss": 2.0303, "step": 33430 }, { "epoch": 0.21017814703513837, "grad_norm": 7.221781253814697, "learning_rate": 1.860330419184765e-05, "loss": 1.6279, "step": 33440 }, { "epoch": 0.2102409993518355, "grad_norm": 6.4051666259765625, "learning_rate": 1.8602885090902997e-05, "loss": 1.9534, "step": 33450 }, { "epoch": 0.2103038516685326, "grad_norm": 6.147698402404785, "learning_rate": 1.8602465989958344e-05, "loss": 1.9419, "step": 33460 }, { "epoch": 0.21036670398522972, "grad_norm": 6.287750720977783, "learning_rate": 1.860204688901369e-05, "loss": 1.9551, "step": 33470 }, { "epoch": 0.2104295563019268, "grad_norm": 6.478549957275391, "learning_rate": 1.8601627788069038e-05, "loss": 1.8279, "step": 33480 }, { "epoch": 0.21049240861862392, "grad_norm": 8.444985389709473, "learning_rate": 1.860120868712438e-05, "loss": 2.0476, "step": 33490 }, { "epoch": 0.21055526093532104, "grad_norm": 8.121697425842285, "learning_rate": 1.860078958617973e-05, "loss": 2.0606, "step": 33500 }, { "epoch": 0.21061811325201815, "grad_norm": 6.9500017166137695, "learning_rate": 1.8600370485235076e-05, "loss": 1.9861, "step": 33510 }, { "epoch": 0.21068096556871527, "grad_norm": 5.910980701446533, "learning_rate": 1.8599951384290423e-05, "loss": 1.8387, "step": 33520 }, { "epoch": 0.21074381788541238, "grad_norm": 7.609782695770264, "learning_rate": 1.859953228334577e-05, "loss": 1.9676, "step": 33530 }, { "epoch": 0.21080667020210947, "grad_norm": 6.835140705108643, "learning_rate": 1.8599113182401113e-05, "loss": 2.1067, "step": 33540 }, { "epoch": 0.21086952251880658, "grad_norm": 7.364989757537842, "learning_rate": 1.859869408145646e-05, "loss": 1.9079, "step": 33550 }, { "epoch": 0.2109323748355037, "grad_norm": 8.439874649047852, "learning_rate": 1.8598274980511808e-05, "loss": 1.9671, "step": 33560 }, { "epoch": 0.21099522715220081, "grad_norm": 8.540244102478027, "learning_rate": 1.8597855879567155e-05, "loss": 2.2535, "step": 33570 }, { "epoch": 0.21105807946889793, "grad_norm": 7.351819038391113, "learning_rate": 1.8597436778622498e-05, "loss": 1.9451, "step": 33580 }, { "epoch": 0.21112093178559505, "grad_norm": 7.363847255706787, "learning_rate": 1.8597017677677845e-05, "loss": 1.8018, "step": 33590 }, { "epoch": 0.21118378410229213, "grad_norm": 7.224581718444824, "learning_rate": 1.8596598576733192e-05, "loss": 1.91, "step": 33600 }, { "epoch": 0.21124663641898925, "grad_norm": 8.13434886932373, "learning_rate": 1.859617947578854e-05, "loss": 2.0783, "step": 33610 }, { "epoch": 0.21130948873568636, "grad_norm": 7.822972774505615, "learning_rate": 1.859580228493835e-05, "loss": 1.9776, "step": 33620 }, { "epoch": 0.21137234105238348, "grad_norm": 6.807519435882568, "learning_rate": 1.8595383183993698e-05, "loss": 1.8579, "step": 33630 }, { "epoch": 0.2114351933690806, "grad_norm": 7.029597282409668, "learning_rate": 1.8594964083049045e-05, "loss": 2.0714, "step": 33640 }, { "epoch": 0.2114980456857777, "grad_norm": 7.00266695022583, "learning_rate": 1.8594544982104392e-05, "loss": 1.8764, "step": 33650 }, { "epoch": 0.21156089800247482, "grad_norm": 7.116758823394775, "learning_rate": 1.8594125881159736e-05, "loss": 2.0121, "step": 33660 }, { "epoch": 0.2116237503191719, "grad_norm": 7.175611972808838, "learning_rate": 1.8593706780215083e-05, "loss": 1.7897, "step": 33670 }, { "epoch": 0.21168660263586903, "grad_norm": 6.896945953369141, "learning_rate": 1.859328767927043e-05, "loss": 2.0325, "step": 33680 }, { "epoch": 0.21174945495256614, "grad_norm": 7.818874359130859, "learning_rate": 1.8592868578325777e-05, "loss": 1.9826, "step": 33690 }, { "epoch": 0.21181230726926326, "grad_norm": 7.196053504943848, "learning_rate": 1.8592449477381124e-05, "loss": 1.6508, "step": 33700 }, { "epoch": 0.21187515958596037, "grad_norm": 6.747186660766602, "learning_rate": 1.859203037643647e-05, "loss": 1.8931, "step": 33710 }, { "epoch": 0.21193801190265749, "grad_norm": 7.24590539932251, "learning_rate": 1.8591611275491818e-05, "loss": 1.9535, "step": 33720 }, { "epoch": 0.21200086421935457, "grad_norm": 8.471415519714355, "learning_rate": 1.8591192174547162e-05, "loss": 2.0143, "step": 33730 }, { "epoch": 0.2120637165360517, "grad_norm": 7.297245502471924, "learning_rate": 1.859077307360251e-05, "loss": 1.9066, "step": 33740 }, { "epoch": 0.2121265688527488, "grad_norm": 8.15941047668457, "learning_rate": 1.8590353972657856e-05, "loss": 1.81, "step": 33750 }, { "epoch": 0.21218942116944592, "grad_norm": 7.2019877433776855, "learning_rate": 1.8589934871713203e-05, "loss": 1.9632, "step": 33760 }, { "epoch": 0.21225227348614303, "grad_norm": 6.810883045196533, "learning_rate": 1.858951577076855e-05, "loss": 1.796, "step": 33770 }, { "epoch": 0.21231512580284015, "grad_norm": 7.2735419273376465, "learning_rate": 1.8589096669823897e-05, "loss": 1.993, "step": 33780 }, { "epoch": 0.21237797811953724, "grad_norm": 7.423776149749756, "learning_rate": 1.858867756887924e-05, "loss": 1.9816, "step": 33790 }, { "epoch": 0.21244083043623435, "grad_norm": 6.887326240539551, "learning_rate": 1.8588258467934588e-05, "loss": 2.0905, "step": 33800 }, { "epoch": 0.21250368275293147, "grad_norm": 8.197897911071777, "learning_rate": 1.8587839366989935e-05, "loss": 2.0615, "step": 33810 }, { "epoch": 0.21256653506962858, "grad_norm": 6.670204162597656, "learning_rate": 1.8587420266045282e-05, "loss": 1.9871, "step": 33820 }, { "epoch": 0.2126293873863257, "grad_norm": 7.086750507354736, "learning_rate": 1.8587001165100626e-05, "loss": 1.922, "step": 33830 }, { "epoch": 0.2126922397030228, "grad_norm": 7.694448471069336, "learning_rate": 1.8586582064155973e-05, "loss": 2.1166, "step": 33840 }, { "epoch": 0.21275509201971993, "grad_norm": 7.550271034240723, "learning_rate": 1.858616296321132e-05, "loss": 1.84, "step": 33850 }, { "epoch": 0.21281794433641701, "grad_norm": 6.961797714233398, "learning_rate": 1.8585743862266667e-05, "loss": 2.1871, "step": 33860 }, { "epoch": 0.21288079665311413, "grad_norm": 7.044167995452881, "learning_rate": 1.8585324761322014e-05, "loss": 1.7825, "step": 33870 }, { "epoch": 0.21294364896981124, "grad_norm": 7.101032733917236, "learning_rate": 1.8584905660377358e-05, "loss": 1.9029, "step": 33880 }, { "epoch": 0.21300650128650836, "grad_norm": 7.548702716827393, "learning_rate": 1.8584486559432705e-05, "loss": 1.9279, "step": 33890 }, { "epoch": 0.21306935360320547, "grad_norm": 8.239489555358887, "learning_rate": 1.8584067458488052e-05, "loss": 2.0406, "step": 33900 }, { "epoch": 0.2131322059199026, "grad_norm": 7.909656524658203, "learning_rate": 1.85836483575434e-05, "loss": 2.0369, "step": 33910 }, { "epoch": 0.21319505823659968, "grad_norm": 7.576388835906982, "learning_rate": 1.8583229256598746e-05, "loss": 1.8615, "step": 33920 }, { "epoch": 0.2132579105532968, "grad_norm": 7.289960861206055, "learning_rate": 1.8582810155654093e-05, "loss": 2.0094, "step": 33930 }, { "epoch": 0.2133207628699939, "grad_norm": 7.018068313598633, "learning_rate": 1.858239105470944e-05, "loss": 2.1906, "step": 33940 }, { "epoch": 0.21338361518669102, "grad_norm": 7.7681684494018555, "learning_rate": 1.8581971953764787e-05, "loss": 2.4025, "step": 33950 }, { "epoch": 0.21344646750338814, "grad_norm": 7.197399139404297, "learning_rate": 1.8581552852820134e-05, "loss": 2.0322, "step": 33960 }, { "epoch": 0.21350931982008525, "grad_norm": 7.588764190673828, "learning_rate": 1.8581133751875478e-05, "loss": 2.1111, "step": 33970 }, { "epoch": 0.21357217213678237, "grad_norm": 9.010973930358887, "learning_rate": 1.8580714650930825e-05, "loss": 2.1089, "step": 33980 }, { "epoch": 0.21363502445347946, "grad_norm": 7.639834403991699, "learning_rate": 1.8580295549986172e-05, "loss": 1.7106, "step": 33990 }, { "epoch": 0.21369787677017657, "grad_norm": 7.014494895935059, "learning_rate": 1.857987644904152e-05, "loss": 2.1511, "step": 34000 }, { "epoch": 0.21376072908687369, "grad_norm": 13.64177417755127, "learning_rate": 1.8579457348096863e-05, "loss": 2.2471, "step": 34010 }, { "epoch": 0.2138235814035708, "grad_norm": 8.525516510009766, "learning_rate": 1.857903824715221e-05, "loss": 1.927, "step": 34020 }, { "epoch": 0.21388643372026792, "grad_norm": 7.092137336730957, "learning_rate": 1.8578619146207557e-05, "loss": 2.0672, "step": 34030 }, { "epoch": 0.21394928603696503, "grad_norm": 7.667873382568359, "learning_rate": 1.8578200045262904e-05, "loss": 1.8891, "step": 34040 }, { "epoch": 0.21401213835366212, "grad_norm": 8.19531536102295, "learning_rate": 1.857778094431825e-05, "loss": 1.9668, "step": 34050 }, { "epoch": 0.21407499067035923, "grad_norm": 7.556863784790039, "learning_rate": 1.8577361843373595e-05, "loss": 2.002, "step": 34060 }, { "epoch": 0.21413784298705635, "grad_norm": 6.630819320678711, "learning_rate": 1.8576942742428942e-05, "loss": 1.8466, "step": 34070 }, { "epoch": 0.21420069530375346, "grad_norm": 8.189940452575684, "learning_rate": 1.857652364148429e-05, "loss": 1.9341, "step": 34080 }, { "epoch": 0.21426354762045058, "grad_norm": 7.160456657409668, "learning_rate": 1.8576104540539636e-05, "loss": 2.0784, "step": 34090 }, { "epoch": 0.2143263999371477, "grad_norm": 6.931938171386719, "learning_rate": 1.8575685439594983e-05, "loss": 1.8851, "step": 34100 }, { "epoch": 0.21438925225384478, "grad_norm": 7.42091178894043, "learning_rate": 1.8575266338650327e-05, "loss": 1.9543, "step": 34110 }, { "epoch": 0.2144521045705419, "grad_norm": 5.8673272132873535, "learning_rate": 1.8574847237705674e-05, "loss": 1.9578, "step": 34120 }, { "epoch": 0.214514956887239, "grad_norm": 7.70461893081665, "learning_rate": 1.857442813676102e-05, "loss": 1.9633, "step": 34130 }, { "epoch": 0.21457780920393613, "grad_norm": 7.143349647521973, "learning_rate": 1.8574009035816368e-05, "loss": 1.9404, "step": 34140 }, { "epoch": 0.21464066152063324, "grad_norm": 6.253964900970459, "learning_rate": 1.8573589934871715e-05, "loss": 1.9488, "step": 34150 }, { "epoch": 0.21470351383733036, "grad_norm": 7.41900110244751, "learning_rate": 1.8573170833927062e-05, "loss": 2.109, "step": 34160 }, { "epoch": 0.21476636615402747, "grad_norm": 8.520472526550293, "learning_rate": 1.857275173298241e-05, "loss": 1.9835, "step": 34170 }, { "epoch": 0.21482921847072456, "grad_norm": 7.361577987670898, "learning_rate": 1.8572332632037756e-05, "loss": 1.9218, "step": 34180 }, { "epoch": 0.21489207078742167, "grad_norm": 6.775546073913574, "learning_rate": 1.85719135310931e-05, "loss": 1.7766, "step": 34190 }, { "epoch": 0.2149549231041188, "grad_norm": 6.618929386138916, "learning_rate": 1.8571494430148447e-05, "loss": 1.8218, "step": 34200 }, { "epoch": 0.2150177754208159, "grad_norm": 8.674612045288086, "learning_rate": 1.8571075329203794e-05, "loss": 1.9593, "step": 34210 }, { "epoch": 0.21508062773751302, "grad_norm": 7.391805648803711, "learning_rate": 1.857065622825914e-05, "loss": 2.0312, "step": 34220 }, { "epoch": 0.21514348005421013, "grad_norm": 7.1152753829956055, "learning_rate": 1.8570237127314485e-05, "loss": 2.0281, "step": 34230 }, { "epoch": 0.21520633237090722, "grad_norm": 8.81369400024414, "learning_rate": 1.8569818026369832e-05, "loss": 2.0559, "step": 34240 }, { "epoch": 0.21526918468760434, "grad_norm": 6.1095452308654785, "learning_rate": 1.856939892542518e-05, "loss": 1.7486, "step": 34250 }, { "epoch": 0.21533203700430145, "grad_norm": 7.259755611419678, "learning_rate": 1.8568979824480526e-05, "loss": 2.0501, "step": 34260 }, { "epoch": 0.21539488932099857, "grad_norm": 7.357776165008545, "learning_rate": 1.8568560723535873e-05, "loss": 1.6965, "step": 34270 }, { "epoch": 0.21545774163769568, "grad_norm": 7.37483549118042, "learning_rate": 1.8568141622591217e-05, "loss": 2.0616, "step": 34280 }, { "epoch": 0.2155205939543928, "grad_norm": 7.865124702453613, "learning_rate": 1.8567722521646564e-05, "loss": 2.0083, "step": 34290 }, { "epoch": 0.21558344627108988, "grad_norm": 6.377746105194092, "learning_rate": 1.856730342070191e-05, "loss": 1.9817, "step": 34300 }, { "epoch": 0.215646298587787, "grad_norm": 8.451074600219727, "learning_rate": 1.8566884319757258e-05, "loss": 2.0513, "step": 34310 }, { "epoch": 0.21570915090448411, "grad_norm": 6.891283988952637, "learning_rate": 1.8566465218812605e-05, "loss": 1.7818, "step": 34320 }, { "epoch": 0.21577200322118123, "grad_norm": 7.912236213684082, "learning_rate": 1.8566046117867952e-05, "loss": 2.0117, "step": 34330 }, { "epoch": 0.21583485553787835, "grad_norm": 6.531398296356201, "learning_rate": 1.85656270169233e-05, "loss": 2.1723, "step": 34340 }, { "epoch": 0.21589770785457546, "grad_norm": 7.56461238861084, "learning_rate": 1.8565207915978646e-05, "loss": 2.1443, "step": 34350 }, { "epoch": 0.21596056017127258, "grad_norm": 7.823632717132568, "learning_rate": 1.856478881503399e-05, "loss": 1.9112, "step": 34360 }, { "epoch": 0.21602341248796966, "grad_norm": 8.01508903503418, "learning_rate": 1.8564369714089337e-05, "loss": 2.1299, "step": 34370 }, { "epoch": 0.21608626480466678, "grad_norm": 6.519814968109131, "learning_rate": 1.8563950613144684e-05, "loss": 1.9654, "step": 34380 }, { "epoch": 0.2161491171213639, "grad_norm": 7.177895545959473, "learning_rate": 1.856353151220003e-05, "loss": 2.0657, "step": 34390 }, { "epoch": 0.216211969438061, "grad_norm": 6.885359287261963, "learning_rate": 1.856311241125538e-05, "loss": 1.7852, "step": 34400 }, { "epoch": 0.21627482175475812, "grad_norm": 9.090568542480469, "learning_rate": 1.8562693310310722e-05, "loss": 1.7628, "step": 34410 }, { "epoch": 0.21633767407145524, "grad_norm": 8.141672134399414, "learning_rate": 1.856227420936607e-05, "loss": 1.9475, "step": 34420 }, { "epoch": 0.21640052638815233, "grad_norm": 7.501651287078857, "learning_rate": 1.8561855108421416e-05, "loss": 1.9273, "step": 34430 }, { "epoch": 0.21646337870484944, "grad_norm": 6.813697814941406, "learning_rate": 1.8561436007476763e-05, "loss": 1.9541, "step": 34440 }, { "epoch": 0.21652623102154656, "grad_norm": 6.917619228363037, "learning_rate": 1.8561016906532107e-05, "loss": 1.9514, "step": 34450 }, { "epoch": 0.21658908333824367, "grad_norm": 7.056396961212158, "learning_rate": 1.8560597805587454e-05, "loss": 2.0344, "step": 34460 }, { "epoch": 0.21665193565494079, "grad_norm": 7.068763256072998, "learning_rate": 1.85601787046428e-05, "loss": 2.0627, "step": 34470 }, { "epoch": 0.2167147879716379, "grad_norm": 6.844529151916504, "learning_rate": 1.8559759603698148e-05, "loss": 1.9777, "step": 34480 }, { "epoch": 0.21677764028833502, "grad_norm": 6.759030342102051, "learning_rate": 1.8559340502753495e-05, "loss": 1.8708, "step": 34490 }, { "epoch": 0.2168404926050321, "grad_norm": 6.698564052581787, "learning_rate": 1.855892140180884e-05, "loss": 2.0901, "step": 34500 }, { "epoch": 0.21690334492172922, "grad_norm": 7.497093677520752, "learning_rate": 1.8558502300864186e-05, "loss": 1.7706, "step": 34510 }, { "epoch": 0.21696619723842633, "grad_norm": 8.183006286621094, "learning_rate": 1.8558083199919533e-05, "loss": 2.0137, "step": 34520 }, { "epoch": 0.21702904955512345, "grad_norm": 7.996163845062256, "learning_rate": 1.855766409897488e-05, "loss": 2.0093, "step": 34530 }, { "epoch": 0.21709190187182056, "grad_norm": 7.119990348815918, "learning_rate": 1.8557244998030227e-05, "loss": 1.8239, "step": 34540 }, { "epoch": 0.21715475418851768, "grad_norm": 8.420408248901367, "learning_rate": 1.8556825897085574e-05, "loss": 2.3014, "step": 34550 }, { "epoch": 0.21721760650521477, "grad_norm": 7.02096700668335, "learning_rate": 1.855640679614092e-05, "loss": 1.923, "step": 34560 }, { "epoch": 0.21728045882191188, "grad_norm": 6.849606990814209, "learning_rate": 1.855598769519627e-05, "loss": 2.2225, "step": 34570 }, { "epoch": 0.217343311138609, "grad_norm": 7.89677619934082, "learning_rate": 1.8555568594251615e-05, "loss": 1.5972, "step": 34580 }, { "epoch": 0.2174061634553061, "grad_norm": 7.193415641784668, "learning_rate": 1.855514949330696e-05, "loss": 1.9054, "step": 34590 }, { "epoch": 0.21746901577200323, "grad_norm": 7.3271331787109375, "learning_rate": 1.8554730392362306e-05, "loss": 1.9486, "step": 34600 }, { "epoch": 0.21753186808870034, "grad_norm": 7.489126205444336, "learning_rate": 1.8554311291417653e-05, "loss": 1.8873, "step": 34610 }, { "epoch": 0.21759472040539743, "grad_norm": 6.797194957733154, "learning_rate": 1.8553892190473e-05, "loss": 1.9083, "step": 34620 }, { "epoch": 0.21765757272209454, "grad_norm": 7.961541175842285, "learning_rate": 1.8553473089528344e-05, "loss": 1.6529, "step": 34630 }, { "epoch": 0.21772042503879166, "grad_norm": 7.54440450668335, "learning_rate": 1.855305398858369e-05, "loss": 1.8152, "step": 34640 }, { "epoch": 0.21778327735548877, "grad_norm": 7.466855049133301, "learning_rate": 1.8552634887639038e-05, "loss": 1.6849, "step": 34650 }, { "epoch": 0.2178461296721859, "grad_norm": 7.031246185302734, "learning_rate": 1.8552215786694385e-05, "loss": 1.9987, "step": 34660 }, { "epoch": 0.217908981988883, "grad_norm": 8.001404762268066, "learning_rate": 1.8551796685749732e-05, "loss": 2.2657, "step": 34670 }, { "epoch": 0.21797183430558012, "grad_norm": 6.206151962280273, "learning_rate": 1.8551377584805076e-05, "loss": 2.0952, "step": 34680 }, { "epoch": 0.2180346866222772, "grad_norm": 8.160574913024902, "learning_rate": 1.8550958483860423e-05, "loss": 2.24, "step": 34690 }, { "epoch": 0.21809753893897432, "grad_norm": 8.026134490966797, "learning_rate": 1.855053938291577e-05, "loss": 1.9693, "step": 34700 }, { "epoch": 0.21816039125567144, "grad_norm": 7.086154460906982, "learning_rate": 1.8550120281971117e-05, "loss": 1.8228, "step": 34710 }, { "epoch": 0.21822324357236855, "grad_norm": 7.1722731590271, "learning_rate": 1.8549701181026464e-05, "loss": 1.6558, "step": 34720 }, { "epoch": 0.21828609588906567, "grad_norm": 7.947413444519043, "learning_rate": 1.854928208008181e-05, "loss": 1.6155, "step": 34730 }, { "epoch": 0.21834894820576278, "grad_norm": 8.06889533996582, "learning_rate": 1.8548862979137155e-05, "loss": 2.0601, "step": 34740 }, { "epoch": 0.21841180052245987, "grad_norm": 8.035139083862305, "learning_rate": 1.8548443878192502e-05, "loss": 1.9296, "step": 34750 }, { "epoch": 0.21847465283915699, "grad_norm": 6.365030765533447, "learning_rate": 1.854802477724785e-05, "loss": 1.8284, "step": 34760 }, { "epoch": 0.2185375051558541, "grad_norm": 7.066539764404297, "learning_rate": 1.8547605676303196e-05, "loss": 1.9459, "step": 34770 }, { "epoch": 0.21860035747255122, "grad_norm": 7.134952068328857, "learning_rate": 1.8547186575358543e-05, "loss": 1.9494, "step": 34780 }, { "epoch": 0.21866320978924833, "grad_norm": 6.721660137176514, "learning_rate": 1.854676747441389e-05, "loss": 1.9896, "step": 34790 }, { "epoch": 0.21872606210594545, "grad_norm": 5.704010009765625, "learning_rate": 1.8546348373469237e-05, "loss": 2.0603, "step": 34800 }, { "epoch": 0.21878891442264256, "grad_norm": 6.107640266418457, "learning_rate": 1.8545971182619045e-05, "loss": 1.844, "step": 34810 }, { "epoch": 0.21885176673933965, "grad_norm": 6.949453353881836, "learning_rate": 1.8545552081674392e-05, "loss": 1.992, "step": 34820 }, { "epoch": 0.21891461905603676, "grad_norm": 7.728182315826416, "learning_rate": 1.854513298072974e-05, "loss": 1.9564, "step": 34830 }, { "epoch": 0.21897747137273388, "grad_norm": 6.855501174926758, "learning_rate": 1.8544713879785087e-05, "loss": 2.0618, "step": 34840 }, { "epoch": 0.219040323689431, "grad_norm": 7.998497009277344, "learning_rate": 1.8544294778840434e-05, "loss": 2.0028, "step": 34850 }, { "epoch": 0.2191031760061281, "grad_norm": 7.709017753601074, "learning_rate": 1.854387567789578e-05, "loss": 2.2238, "step": 34860 }, { "epoch": 0.21916602832282522, "grad_norm": 7.136343955993652, "learning_rate": 1.8543456576951128e-05, "loss": 1.8661, "step": 34870 }, { "epoch": 0.2192288806395223, "grad_norm": 6.129396915435791, "learning_rate": 1.8543037476006475e-05, "loss": 1.8171, "step": 34880 }, { "epoch": 0.21929173295621943, "grad_norm": 7.82442569732666, "learning_rate": 1.854261837506182e-05, "loss": 2.0803, "step": 34890 }, { "epoch": 0.21935458527291654, "grad_norm": 7.109223365783691, "learning_rate": 1.8542199274117166e-05, "loss": 1.7726, "step": 34900 }, { "epoch": 0.21941743758961366, "grad_norm": 6.688714981079102, "learning_rate": 1.8541780173172513e-05, "loss": 1.9677, "step": 34910 }, { "epoch": 0.21948028990631077, "grad_norm": 7.616767406463623, "learning_rate": 1.854136107222786e-05, "loss": 1.9761, "step": 34920 }, { "epoch": 0.2195431422230079, "grad_norm": 8.643089294433594, "learning_rate": 1.8540941971283203e-05, "loss": 1.9013, "step": 34930 }, { "epoch": 0.21960599453970497, "grad_norm": 7.0558576583862305, "learning_rate": 1.854052287033855e-05, "loss": 2.1458, "step": 34940 }, { "epoch": 0.2196688468564021, "grad_norm": 8.11750316619873, "learning_rate": 1.8540103769393898e-05, "loss": 1.89, "step": 34950 }, { "epoch": 0.2197316991730992, "grad_norm": 7.2882513999938965, "learning_rate": 1.8539684668449245e-05, "loss": 1.97, "step": 34960 }, { "epoch": 0.21979455148979632, "grad_norm": 6.838789939880371, "learning_rate": 1.853926556750459e-05, "loss": 1.9848, "step": 34970 }, { "epoch": 0.21985740380649343, "grad_norm": 8.134601593017578, "learning_rate": 1.8538846466559935e-05, "loss": 1.9338, "step": 34980 }, { "epoch": 0.21992025612319055, "grad_norm": 7.3437089920043945, "learning_rate": 1.8538427365615282e-05, "loss": 1.9343, "step": 34990 }, { "epoch": 0.21998310843988766, "grad_norm": 7.4846343994140625, "learning_rate": 1.853800826467063e-05, "loss": 2.0585, "step": 35000 }, { "epoch": 0.22004596075658475, "grad_norm": 5.733299732208252, "learning_rate": 1.8537589163725977e-05, "loss": 1.5824, "step": 35010 }, { "epoch": 0.22010881307328187, "grad_norm": 6.123286724090576, "learning_rate": 1.8537170062781324e-05, "loss": 1.7044, "step": 35020 }, { "epoch": 0.22017166538997898, "grad_norm": 7.225106239318848, "learning_rate": 1.853675096183667e-05, "loss": 1.985, "step": 35030 }, { "epoch": 0.2202345177066761, "grad_norm": 7.095724582672119, "learning_rate": 1.8536331860892018e-05, "loss": 1.9784, "step": 35040 }, { "epoch": 0.2202973700233732, "grad_norm": 5.690850734710693, "learning_rate": 1.853591275994736e-05, "loss": 1.8355, "step": 35050 }, { "epoch": 0.22036022234007033, "grad_norm": 6.794473171234131, "learning_rate": 1.853549365900271e-05, "loss": 1.9682, "step": 35060 }, { "epoch": 0.22042307465676741, "grad_norm": 7.523730754852295, "learning_rate": 1.8535074558058056e-05, "loss": 1.9303, "step": 35070 }, { "epoch": 0.22048592697346453, "grad_norm": 6.781824111938477, "learning_rate": 1.8534655457113403e-05, "loss": 2.0273, "step": 35080 }, { "epoch": 0.22054877929016165, "grad_norm": 7.648756980895996, "learning_rate": 1.853423635616875e-05, "loss": 2.0898, "step": 35090 }, { "epoch": 0.22061163160685876, "grad_norm": 6.996502876281738, "learning_rate": 1.8533817255224097e-05, "loss": 1.8881, "step": 35100 }, { "epoch": 0.22067448392355588, "grad_norm": 7.366175174713135, "learning_rate": 1.853339815427944e-05, "loss": 1.9159, "step": 35110 }, { "epoch": 0.220737336240253, "grad_norm": 7.124420166015625, "learning_rate": 1.8532979053334788e-05, "loss": 2.0173, "step": 35120 }, { "epoch": 0.22080018855695008, "grad_norm": 9.098976135253906, "learning_rate": 1.8532559952390135e-05, "loss": 1.9402, "step": 35130 }, { "epoch": 0.2208630408736472, "grad_norm": 7.208345890045166, "learning_rate": 1.8532140851445482e-05, "loss": 2.1235, "step": 35140 }, { "epoch": 0.2209258931903443, "grad_norm": 7.569570064544678, "learning_rate": 1.8531721750500825e-05, "loss": 2.0295, "step": 35150 }, { "epoch": 0.22098874550704142, "grad_norm": 8.353118896484375, "learning_rate": 1.8531302649556173e-05, "loss": 2.1179, "step": 35160 }, { "epoch": 0.22105159782373854, "grad_norm": 6.591300010681152, "learning_rate": 1.853088354861152e-05, "loss": 1.82, "step": 35170 }, { "epoch": 0.22111445014043565, "grad_norm": 7.18334436416626, "learning_rate": 1.8530464447666867e-05, "loss": 2.1257, "step": 35180 }, { "epoch": 0.22117730245713277, "grad_norm": 7.796203136444092, "learning_rate": 1.8530045346722214e-05, "loss": 1.9239, "step": 35190 }, { "epoch": 0.22124015477382986, "grad_norm": 7.146561145782471, "learning_rate": 1.8529626245777557e-05, "loss": 1.7704, "step": 35200 }, { "epoch": 0.22130300709052697, "grad_norm": 6.450527191162109, "learning_rate": 1.8529207144832904e-05, "loss": 2.0196, "step": 35210 }, { "epoch": 0.22136585940722409, "grad_norm": 6.391997814178467, "learning_rate": 1.852878804388825e-05, "loss": 1.9764, "step": 35220 }, { "epoch": 0.2214287117239212, "grad_norm": 7.410685062408447, "learning_rate": 1.85283689429436e-05, "loss": 1.9668, "step": 35230 }, { "epoch": 0.22149156404061832, "grad_norm": 8.92043685913086, "learning_rate": 1.8527949841998946e-05, "loss": 2.041, "step": 35240 }, { "epoch": 0.22155441635731543, "grad_norm": 6.214332580566406, "learning_rate": 1.8527530741054293e-05, "loss": 1.902, "step": 35250 }, { "epoch": 0.22161726867401252, "grad_norm": 7.188347339630127, "learning_rate": 1.852711164010964e-05, "loss": 1.828, "step": 35260 }, { "epoch": 0.22168012099070963, "grad_norm": 7.721948623657227, "learning_rate": 1.8526692539164987e-05, "loss": 2.0143, "step": 35270 }, { "epoch": 0.22174297330740675, "grad_norm": 7.235950946807861, "learning_rate": 1.852627343822033e-05, "loss": 1.6793, "step": 35280 }, { "epoch": 0.22180582562410386, "grad_norm": 7.141629219055176, "learning_rate": 1.8525854337275678e-05, "loss": 1.9044, "step": 35290 }, { "epoch": 0.22186867794080098, "grad_norm": 6.771084308624268, "learning_rate": 1.8525435236331025e-05, "loss": 2.1811, "step": 35300 }, { "epoch": 0.2219315302574981, "grad_norm": 7.240036487579346, "learning_rate": 1.8525016135386372e-05, "loss": 1.8158, "step": 35310 }, { "epoch": 0.2219943825741952, "grad_norm": 6.458792209625244, "learning_rate": 1.852459703444172e-05, "loss": 1.9552, "step": 35320 }, { "epoch": 0.2220572348908923, "grad_norm": 6.815674304962158, "learning_rate": 1.8524177933497063e-05, "loss": 1.8711, "step": 35330 }, { "epoch": 0.2221200872075894, "grad_norm": 6.916085243225098, "learning_rate": 1.852375883255241e-05, "loss": 2.02, "step": 35340 }, { "epoch": 0.22218293952428653, "grad_norm": 6.218405723571777, "learning_rate": 1.8523339731607757e-05, "loss": 1.8892, "step": 35350 }, { "epoch": 0.22224579184098364, "grad_norm": 6.3282294273376465, "learning_rate": 1.8522920630663104e-05, "loss": 1.8681, "step": 35360 }, { "epoch": 0.22230864415768076, "grad_norm": 5.904386043548584, "learning_rate": 1.8522501529718447e-05, "loss": 1.9586, "step": 35370 }, { "epoch": 0.22237149647437787, "grad_norm": 6.8369526863098145, "learning_rate": 1.8522082428773795e-05, "loss": 1.9303, "step": 35380 }, { "epoch": 0.22243434879107496, "grad_norm": 8.529288291931152, "learning_rate": 1.852166332782914e-05, "loss": 2.1147, "step": 35390 }, { "epoch": 0.22249720110777207, "grad_norm": 13.044854164123535, "learning_rate": 1.852124422688449e-05, "loss": 1.7855, "step": 35400 }, { "epoch": 0.2225600534244692, "grad_norm": 7.8360819816589355, "learning_rate": 1.8520825125939836e-05, "loss": 2.1567, "step": 35410 }, { "epoch": 0.2226229057411663, "grad_norm": 7.3442769050598145, "learning_rate": 1.8520406024995183e-05, "loss": 1.9528, "step": 35420 }, { "epoch": 0.22268575805786342, "grad_norm": 6.816009521484375, "learning_rate": 1.8519986924050526e-05, "loss": 1.9875, "step": 35430 }, { "epoch": 0.22274861037456054, "grad_norm": 6.637674331665039, "learning_rate": 1.8519567823105874e-05, "loss": 1.9868, "step": 35440 }, { "epoch": 0.22281146269125762, "grad_norm": 7.885603904724121, "learning_rate": 1.851914872216122e-05, "loss": 2.0622, "step": 35450 }, { "epoch": 0.22287431500795474, "grad_norm": 7.169187068939209, "learning_rate": 1.8518729621216568e-05, "loss": 1.741, "step": 35460 }, { "epoch": 0.22293716732465185, "grad_norm": 7.062591075897217, "learning_rate": 1.8518310520271915e-05, "loss": 1.9444, "step": 35470 }, { "epoch": 0.22300001964134897, "grad_norm": 8.210655212402344, "learning_rate": 1.8517891419327262e-05, "loss": 1.9655, "step": 35480 }, { "epoch": 0.22306287195804608, "grad_norm": 7.064944267272949, "learning_rate": 1.851747231838261e-05, "loss": 2.0158, "step": 35490 }, { "epoch": 0.2231257242747432, "grad_norm": 6.961498737335205, "learning_rate": 1.8517053217437956e-05, "loss": 1.8474, "step": 35500 }, { "epoch": 0.2231885765914403, "grad_norm": 7.126535892486572, "learning_rate": 1.85166341164933e-05, "loss": 1.9145, "step": 35510 }, { "epoch": 0.2232514289081374, "grad_norm": 6.700169563293457, "learning_rate": 1.8516215015548647e-05, "loss": 1.9152, "step": 35520 }, { "epoch": 0.22331428122483452, "grad_norm": 7.212548732757568, "learning_rate": 1.8515795914603994e-05, "loss": 1.967, "step": 35530 }, { "epoch": 0.22337713354153163, "grad_norm": 6.833872318267822, "learning_rate": 1.851537681365934e-05, "loss": 1.7896, "step": 35540 }, { "epoch": 0.22343998585822875, "grad_norm": 7.667410373687744, "learning_rate": 1.8514957712714685e-05, "loss": 1.9779, "step": 35550 }, { "epoch": 0.22350283817492586, "grad_norm": 7.731854438781738, "learning_rate": 1.851453861177003e-05, "loss": 2.0346, "step": 35560 }, { "epoch": 0.22356569049162298, "grad_norm": 7.555546283721924, "learning_rate": 1.851411951082538e-05, "loss": 2.0954, "step": 35570 }, { "epoch": 0.22362854280832006, "grad_norm": 7.541079044342041, "learning_rate": 1.8513700409880726e-05, "loss": 2.0232, "step": 35580 }, { "epoch": 0.22369139512501718, "grad_norm": 7.672801971435547, "learning_rate": 1.851328130893607e-05, "loss": 1.8418, "step": 35590 }, { "epoch": 0.2237542474417143, "grad_norm": 8.663089752197266, "learning_rate": 1.8512862207991417e-05, "loss": 1.9483, "step": 35600 }, { "epoch": 0.2238170997584114, "grad_norm": 7.020554542541504, "learning_rate": 1.8512443107046764e-05, "loss": 2.0585, "step": 35610 }, { "epoch": 0.22387995207510852, "grad_norm": 7.544107437133789, "learning_rate": 1.851202400610211e-05, "loss": 1.9529, "step": 35620 }, { "epoch": 0.22394280439180564, "grad_norm": 7.0736470222473145, "learning_rate": 1.8511604905157458e-05, "loss": 1.7787, "step": 35630 }, { "epoch": 0.22400565670850273, "grad_norm": 8.328640937805176, "learning_rate": 1.8511185804212805e-05, "loss": 1.8952, "step": 35640 }, { "epoch": 0.22406850902519984, "grad_norm": 7.583932876586914, "learning_rate": 1.8510766703268152e-05, "loss": 2.076, "step": 35650 }, { "epoch": 0.22413136134189696, "grad_norm": 7.296407699584961, "learning_rate": 1.85103476023235e-05, "loss": 1.7793, "step": 35660 }, { "epoch": 0.22419421365859407, "grad_norm": 6.540462017059326, "learning_rate": 1.8509928501378846e-05, "loss": 1.9372, "step": 35670 }, { "epoch": 0.2242570659752912, "grad_norm": 7.858813285827637, "learning_rate": 1.850950940043419e-05, "loss": 1.6556, "step": 35680 }, { "epoch": 0.2243199182919883, "grad_norm": 6.697934150695801, "learning_rate": 1.8509090299489537e-05, "loss": 1.7858, "step": 35690 }, { "epoch": 0.22438277060868542, "grad_norm": 7.115276336669922, "learning_rate": 1.8508671198544884e-05, "loss": 1.9851, "step": 35700 }, { "epoch": 0.2244456229253825, "grad_norm": 7.1068925857543945, "learning_rate": 1.850825209760023e-05, "loss": 1.9345, "step": 35710 }, { "epoch": 0.22450847524207962, "grad_norm": 7.588796615600586, "learning_rate": 1.8507832996655578e-05, "loss": 2.0009, "step": 35720 }, { "epoch": 0.22457132755877673, "grad_norm": 6.471503257751465, "learning_rate": 1.850741389571092e-05, "loss": 2.0809, "step": 35730 }, { "epoch": 0.22463417987547385, "grad_norm": 8.151644706726074, "learning_rate": 1.850699479476627e-05, "loss": 2.0383, "step": 35740 }, { "epoch": 0.22469703219217096, "grad_norm": 6.93273401260376, "learning_rate": 1.8506575693821616e-05, "loss": 2.0041, "step": 35750 }, { "epoch": 0.22475988450886808, "grad_norm": 7.2442121505737305, "learning_rate": 1.8506156592876963e-05, "loss": 1.6638, "step": 35760 }, { "epoch": 0.22482273682556517, "grad_norm": 6.275068759918213, "learning_rate": 1.8505737491932307e-05, "loss": 1.8618, "step": 35770 }, { "epoch": 0.22488558914226228, "grad_norm": 8.405787467956543, "learning_rate": 1.8505318390987654e-05, "loss": 2.202, "step": 35780 }, { "epoch": 0.2249484414589594, "grad_norm": 7.45995569229126, "learning_rate": 1.8504899290043e-05, "loss": 2.0527, "step": 35790 }, { "epoch": 0.2250112937756565, "grad_norm": 7.501440048217773, "learning_rate": 1.8504480189098348e-05, "loss": 1.8311, "step": 35800 }, { "epoch": 0.22507414609235363, "grad_norm": 6.156557083129883, "learning_rate": 1.8504061088153695e-05, "loss": 2.0608, "step": 35810 }, { "epoch": 0.22513699840905074, "grad_norm": 7.16656494140625, "learning_rate": 1.850364198720904e-05, "loss": 1.8149, "step": 35820 }, { "epoch": 0.22519985072574786, "grad_norm": 8.1950101852417, "learning_rate": 1.8503222886264386e-05, "loss": 1.8656, "step": 35830 }, { "epoch": 0.22526270304244495, "grad_norm": 7.542734146118164, "learning_rate": 1.8502803785319733e-05, "loss": 2.0309, "step": 35840 }, { "epoch": 0.22532555535914206, "grad_norm": 8.013267517089844, "learning_rate": 1.850238468437508e-05, "loss": 1.7096, "step": 35850 }, { "epoch": 0.22538840767583918, "grad_norm": 7.8642964363098145, "learning_rate": 1.8501965583430427e-05, "loss": 2.0083, "step": 35860 }, { "epoch": 0.2254512599925363, "grad_norm": 7.795157432556152, "learning_rate": 1.8501546482485774e-05, "loss": 1.8551, "step": 35870 }, { "epoch": 0.2255141123092334, "grad_norm": 9.358570098876953, "learning_rate": 1.850112738154112e-05, "loss": 1.9385, "step": 35880 }, { "epoch": 0.22557696462593052, "grad_norm": 7.4309587478637695, "learning_rate": 1.8500708280596468e-05, "loss": 1.6916, "step": 35890 }, { "epoch": 0.2256398169426276, "grad_norm": 7.713372230529785, "learning_rate": 1.8500289179651812e-05, "loss": 1.9155, "step": 35900 }, { "epoch": 0.22570266925932472, "grad_norm": 6.559614181518555, "learning_rate": 1.849987007870716e-05, "loss": 1.9792, "step": 35910 }, { "epoch": 0.22576552157602184, "grad_norm": 8.431838035583496, "learning_rate": 1.8499450977762506e-05, "loss": 2.0648, "step": 35920 }, { "epoch": 0.22582837389271895, "grad_norm": 7.000290393829346, "learning_rate": 1.8499031876817853e-05, "loss": 1.8901, "step": 35930 }, { "epoch": 0.22589122620941607, "grad_norm": 6.437477111816406, "learning_rate": 1.84986127758732e-05, "loss": 1.8744, "step": 35940 }, { "epoch": 0.22595407852611318, "grad_norm": 5.661660194396973, "learning_rate": 1.8498193674928544e-05, "loss": 1.9295, "step": 35950 }, { "epoch": 0.22601693084281027, "grad_norm": 7.3603692054748535, "learning_rate": 1.849777457398389e-05, "loss": 1.9294, "step": 35960 }, { "epoch": 0.22607978315950739, "grad_norm": 8.71002197265625, "learning_rate": 1.8497355473039238e-05, "loss": 1.8093, "step": 35970 }, { "epoch": 0.2261426354762045, "grad_norm": 7.771989345550537, "learning_rate": 1.8496936372094585e-05, "loss": 1.8833, "step": 35980 }, { "epoch": 0.22620548779290162, "grad_norm": 6.397435665130615, "learning_rate": 1.849651727114993e-05, "loss": 1.7469, "step": 35990 }, { "epoch": 0.22626834010959873, "grad_norm": 7.078105449676514, "learning_rate": 1.8496098170205276e-05, "loss": 1.825, "step": 36000 }, { "epoch": 0.22633119242629585, "grad_norm": 7.667077541351318, "learning_rate": 1.8495679069260623e-05, "loss": 1.7933, "step": 36010 }, { "epoch": 0.22639404474299296, "grad_norm": 7.291276931762695, "learning_rate": 1.849525996831597e-05, "loss": 2.0556, "step": 36020 }, { "epoch": 0.22645689705969005, "grad_norm": 7.426200866699219, "learning_rate": 1.8494840867371317e-05, "loss": 1.8302, "step": 36030 }, { "epoch": 0.22651974937638716, "grad_norm": 6.769180774688721, "learning_rate": 1.8494421766426664e-05, "loss": 1.9561, "step": 36040 }, { "epoch": 0.22658260169308428, "grad_norm": 7.960482597351074, "learning_rate": 1.8494002665482008e-05, "loss": 1.7625, "step": 36050 }, { "epoch": 0.2266454540097814, "grad_norm": 7.0758585929870605, "learning_rate": 1.8493583564537355e-05, "loss": 1.8897, "step": 36060 }, { "epoch": 0.2267083063264785, "grad_norm": 6.825809955596924, "learning_rate": 1.8493164463592702e-05, "loss": 1.9526, "step": 36070 }, { "epoch": 0.22677115864317562, "grad_norm": 6.6412858963012695, "learning_rate": 1.849274536264805e-05, "loss": 1.9598, "step": 36080 }, { "epoch": 0.2268340109598727, "grad_norm": 7.068365097045898, "learning_rate": 1.8492326261703396e-05, "loss": 2.061, "step": 36090 }, { "epoch": 0.22689686327656983, "grad_norm": 8.627281188964844, "learning_rate": 1.8491907160758743e-05, "loss": 1.8702, "step": 36100 }, { "epoch": 0.22695971559326694, "grad_norm": 6.969089984893799, "learning_rate": 1.849148805981409e-05, "loss": 2.0275, "step": 36110 }, { "epoch": 0.22702256790996406, "grad_norm": 7.00883674621582, "learning_rate": 1.8491068958869437e-05, "loss": 2.0406, "step": 36120 }, { "epoch": 0.22708542022666117, "grad_norm": 7.159546375274658, "learning_rate": 1.849064985792478e-05, "loss": 2.1287, "step": 36130 }, { "epoch": 0.2271482725433583, "grad_norm": 6.845968723297119, "learning_rate": 1.8490230756980128e-05, "loss": 1.9461, "step": 36140 }, { "epoch": 0.2272111248600554, "grad_norm": 7.5593414306640625, "learning_rate": 1.8489811656035475e-05, "loss": 2.1837, "step": 36150 }, { "epoch": 0.2272739771767525, "grad_norm": 7.399803638458252, "learning_rate": 1.8489392555090822e-05, "loss": 1.9431, "step": 36160 }, { "epoch": 0.2273368294934496, "grad_norm": 6.044048309326172, "learning_rate": 1.8488973454146166e-05, "loss": 2.0285, "step": 36170 }, { "epoch": 0.22739968181014672, "grad_norm": 7.432627201080322, "learning_rate": 1.8488554353201513e-05, "loss": 2.0044, "step": 36180 }, { "epoch": 0.22746253412684383, "grad_norm": 7.868930816650391, "learning_rate": 1.848813525225686e-05, "loss": 1.8236, "step": 36190 }, { "epoch": 0.22752538644354095, "grad_norm": 7.376712322235107, "learning_rate": 1.8487716151312207e-05, "loss": 1.7301, "step": 36200 }, { "epoch": 0.22758823876023807, "grad_norm": 7.406872749328613, "learning_rate": 1.848729705036755e-05, "loss": 1.9849, "step": 36210 }, { "epoch": 0.22765109107693515, "grad_norm": 8.414949417114258, "learning_rate": 1.8486877949422898e-05, "loss": 1.8838, "step": 36220 }, { "epoch": 0.22771394339363227, "grad_norm": 8.242228507995605, "learning_rate": 1.8486458848478245e-05, "loss": 1.8699, "step": 36230 }, { "epoch": 0.22777679571032938, "grad_norm": 7.094605922698975, "learning_rate": 1.8486039747533592e-05, "loss": 2.0294, "step": 36240 }, { "epoch": 0.2278396480270265, "grad_norm": 7.137697219848633, "learning_rate": 1.848562064658894e-05, "loss": 1.9057, "step": 36250 }, { "epoch": 0.2279025003437236, "grad_norm": 7.533381462097168, "learning_rate": 1.8485201545644286e-05, "loss": 1.7952, "step": 36260 }, { "epoch": 0.22796535266042073, "grad_norm": 6.198008060455322, "learning_rate": 1.8484782444699633e-05, "loss": 1.9044, "step": 36270 }, { "epoch": 0.22802820497711782, "grad_norm": 9.231335639953613, "learning_rate": 1.848436334375498e-05, "loss": 2.181, "step": 36280 }, { "epoch": 0.22809105729381493, "grad_norm": 8.630999565124512, "learning_rate": 1.8483944242810327e-05, "loss": 1.7809, "step": 36290 }, { "epoch": 0.22815390961051205, "grad_norm": 6.130497455596924, "learning_rate": 1.848352514186567e-05, "loss": 1.9128, "step": 36300 }, { "epoch": 0.22821676192720916, "grad_norm": 6.748734951019287, "learning_rate": 1.8483106040921018e-05, "loss": 1.9731, "step": 36310 }, { "epoch": 0.22827961424390628, "grad_norm": 6.8579182624816895, "learning_rate": 1.8482686939976365e-05, "loss": 1.895, "step": 36320 }, { "epoch": 0.2283424665606034, "grad_norm": 7.391290187835693, "learning_rate": 1.8482267839031712e-05, "loss": 1.9956, "step": 36330 }, { "epoch": 0.2284053188773005, "grad_norm": 7.041627407073975, "learning_rate": 1.848184873808706e-05, "loss": 1.8528, "step": 36340 }, { "epoch": 0.2284681711939976, "grad_norm": 7.496146202087402, "learning_rate": 1.8481429637142403e-05, "loss": 1.9317, "step": 36350 }, { "epoch": 0.2285310235106947, "grad_norm": 7.350832939147949, "learning_rate": 1.848101053619775e-05, "loss": 1.9906, "step": 36360 }, { "epoch": 0.22859387582739182, "grad_norm": 7.2522664070129395, "learning_rate": 1.8480591435253097e-05, "loss": 1.7981, "step": 36370 }, { "epoch": 0.22865672814408894, "grad_norm": 6.560575008392334, "learning_rate": 1.8480172334308444e-05, "loss": 2.0547, "step": 36380 }, { "epoch": 0.22871958046078605, "grad_norm": 7.1260552406311035, "learning_rate": 1.8479753233363788e-05, "loss": 1.8038, "step": 36390 }, { "epoch": 0.22878243277748317, "grad_norm": 6.739349842071533, "learning_rate": 1.8479334132419135e-05, "loss": 1.7947, "step": 36400 }, { "epoch": 0.22884528509418026, "grad_norm": 6.752826690673828, "learning_rate": 1.8478915031474482e-05, "loss": 1.8501, "step": 36410 }, { "epoch": 0.22890813741087737, "grad_norm": 7.900572776794434, "learning_rate": 1.847849593052983e-05, "loss": 2.0682, "step": 36420 }, { "epoch": 0.2289709897275745, "grad_norm": 8.176858901977539, "learning_rate": 1.8478076829585176e-05, "loss": 1.8969, "step": 36430 }, { "epoch": 0.2290338420442716, "grad_norm": 6.831380844116211, "learning_rate": 1.847765772864052e-05, "loss": 1.9836, "step": 36440 }, { "epoch": 0.22909669436096872, "grad_norm": 7.1293721199035645, "learning_rate": 1.8477238627695867e-05, "loss": 2.1393, "step": 36450 }, { "epoch": 0.22915954667766583, "grad_norm": 7.110170364379883, "learning_rate": 1.8476819526751214e-05, "loss": 1.9506, "step": 36460 }, { "epoch": 0.22922239899436292, "grad_norm": 7.697797775268555, "learning_rate": 1.847640042580656e-05, "loss": 1.7845, "step": 36470 }, { "epoch": 0.22928525131106003, "grad_norm": 7.563226699829102, "learning_rate": 1.8475981324861908e-05, "loss": 1.9659, "step": 36480 }, { "epoch": 0.22934810362775715, "grad_norm": 7.161571025848389, "learning_rate": 1.8475562223917255e-05, "loss": 1.7976, "step": 36490 }, { "epoch": 0.22941095594445426, "grad_norm": 7.61132287979126, "learning_rate": 1.8475143122972602e-05, "loss": 1.8065, "step": 36500 }, { "epoch": 0.22947380826115138, "grad_norm": 7.758613109588623, "learning_rate": 1.847472402202795e-05, "loss": 1.8325, "step": 36510 }, { "epoch": 0.2295366605778485, "grad_norm": 6.902079105377197, "learning_rate": 1.8474304921083293e-05, "loss": 1.9519, "step": 36520 }, { "epoch": 0.2295995128945456, "grad_norm": 6.420543193817139, "learning_rate": 1.847388582013864e-05, "loss": 1.8842, "step": 36530 }, { "epoch": 0.2296623652112427, "grad_norm": 6.8175272941589355, "learning_rate": 1.8473466719193987e-05, "loss": 1.9314, "step": 36540 }, { "epoch": 0.2297252175279398, "grad_norm": 7.08127498626709, "learning_rate": 1.8473047618249334e-05, "loss": 1.9251, "step": 36550 }, { "epoch": 0.22978806984463693, "grad_norm": 6.201793670654297, "learning_rate": 1.847262851730468e-05, "loss": 1.926, "step": 36560 }, { "epoch": 0.22985092216133404, "grad_norm": 7.628470420837402, "learning_rate": 1.8472209416360025e-05, "loss": 1.9377, "step": 36570 }, { "epoch": 0.22991377447803116, "grad_norm": 7.624232769012451, "learning_rate": 1.8471790315415372e-05, "loss": 1.9868, "step": 36580 }, { "epoch": 0.22997662679472827, "grad_norm": 7.706589698791504, "learning_rate": 1.847137121447072e-05, "loss": 1.8757, "step": 36590 }, { "epoch": 0.23003947911142536, "grad_norm": 8.891164779663086, "learning_rate": 1.8470952113526066e-05, "loss": 1.9888, "step": 36600 }, { "epoch": 0.23010233142812248, "grad_norm": 6.154812335968018, "learning_rate": 1.847053301258141e-05, "loss": 1.8775, "step": 36610 }, { "epoch": 0.2301651837448196, "grad_norm": 8.748318672180176, "learning_rate": 1.8470113911636757e-05, "loss": 1.9606, "step": 36620 }, { "epoch": 0.2302280360615167, "grad_norm": 7.380129814147949, "learning_rate": 1.8469694810692104e-05, "loss": 1.8137, "step": 36630 }, { "epoch": 0.23029088837821382, "grad_norm": 6.104862213134766, "learning_rate": 1.846927570974745e-05, "loss": 1.7568, "step": 36640 }, { "epoch": 0.23035374069491094, "grad_norm": 8.323019027709961, "learning_rate": 1.8468856608802798e-05, "loss": 1.7345, "step": 36650 }, { "epoch": 0.23041659301160805, "grad_norm": 7.859176158905029, "learning_rate": 1.8468437507858145e-05, "loss": 1.8129, "step": 36660 }, { "epoch": 0.23047944532830514, "grad_norm": 8.44453239440918, "learning_rate": 1.8468018406913492e-05, "loss": 1.9672, "step": 36670 }, { "epoch": 0.23054229764500225, "grad_norm": 7.817448616027832, "learning_rate": 1.8467599305968836e-05, "loss": 1.8993, "step": 36680 }, { "epoch": 0.23060514996169937, "grad_norm": 7.425734043121338, "learning_rate": 1.8467180205024183e-05, "loss": 1.9744, "step": 36690 }, { "epoch": 0.23066800227839648, "grad_norm": 6.993030548095703, "learning_rate": 1.846676110407953e-05, "loss": 1.9328, "step": 36700 }, { "epoch": 0.2307308545950936, "grad_norm": 6.675727367401123, "learning_rate": 1.8466342003134877e-05, "loss": 1.9184, "step": 36710 }, { "epoch": 0.2307937069117907, "grad_norm": 7.259601593017578, "learning_rate": 1.8465922902190224e-05, "loss": 2.027, "step": 36720 }, { "epoch": 0.2308565592284878, "grad_norm": 6.463089942932129, "learning_rate": 1.846550380124557e-05, "loss": 1.9571, "step": 36730 }, { "epoch": 0.23091941154518492, "grad_norm": 6.317759990692139, "learning_rate": 1.8465084700300918e-05, "loss": 1.838, "step": 36740 }, { "epoch": 0.23098226386188203, "grad_norm": 7.501256942749023, "learning_rate": 1.8464665599356262e-05, "loss": 1.8231, "step": 36750 }, { "epoch": 0.23104511617857915, "grad_norm": 7.5587663650512695, "learning_rate": 1.846424649841161e-05, "loss": 1.8397, "step": 36760 }, { "epoch": 0.23110796849527626, "grad_norm": 6.539580345153809, "learning_rate": 1.8463827397466956e-05, "loss": 2.1636, "step": 36770 }, { "epoch": 0.23117082081197338, "grad_norm": 7.328402042388916, "learning_rate": 1.8463408296522303e-05, "loss": 1.6553, "step": 36780 }, { "epoch": 0.23123367312867046, "grad_norm": 7.860400676727295, "learning_rate": 1.8462989195577647e-05, "loss": 2.0599, "step": 36790 }, { "epoch": 0.23129652544536758, "grad_norm": 6.228035926818848, "learning_rate": 1.8462570094632994e-05, "loss": 1.9108, "step": 36800 }, { "epoch": 0.2313593777620647, "grad_norm": 8.228126525878906, "learning_rate": 1.846215099368834e-05, "loss": 1.9351, "step": 36810 }, { "epoch": 0.2314222300787618, "grad_norm": 6.4865617752075195, "learning_rate": 1.8461731892743688e-05, "loss": 1.9241, "step": 36820 }, { "epoch": 0.23148508239545892, "grad_norm": 8.462693214416504, "learning_rate": 1.846131279179903e-05, "loss": 1.7073, "step": 36830 }, { "epoch": 0.23154793471215604, "grad_norm": 8.767376899719238, "learning_rate": 1.846089369085438e-05, "loss": 2.2247, "step": 36840 }, { "epoch": 0.23161078702885315, "grad_norm": 7.113800525665283, "learning_rate": 1.8460474589909726e-05, "loss": 1.9196, "step": 36850 }, { "epoch": 0.23167363934555024, "grad_norm": 7.881669044494629, "learning_rate": 1.8460055488965073e-05, "loss": 1.8683, "step": 36860 }, { "epoch": 0.23173649166224736, "grad_norm": 7.283566951751709, "learning_rate": 1.845963638802042e-05, "loss": 1.9212, "step": 36870 }, { "epoch": 0.23179934397894447, "grad_norm": 6.925536155700684, "learning_rate": 1.8459217287075767e-05, "loss": 1.8082, "step": 36880 }, { "epoch": 0.2318621962956416, "grad_norm": 7.803654193878174, "learning_rate": 1.8458798186131114e-05, "loss": 1.9035, "step": 36890 }, { "epoch": 0.2319250486123387, "grad_norm": 5.596521377563477, "learning_rate": 1.845837908518646e-05, "loss": 1.7774, "step": 36900 }, { "epoch": 0.23198790092903582, "grad_norm": 6.094423770904541, "learning_rate": 1.8457959984241808e-05, "loss": 1.9895, "step": 36910 }, { "epoch": 0.2320507532457329, "grad_norm": 7.609260559082031, "learning_rate": 1.8457540883297152e-05, "loss": 2.0385, "step": 36920 }, { "epoch": 0.23211360556243002, "grad_norm": 6.813177585601807, "learning_rate": 1.84571217823525e-05, "loss": 1.9822, "step": 36930 }, { "epoch": 0.23217645787912713, "grad_norm": 6.039997577667236, "learning_rate": 1.8456702681407846e-05, "loss": 1.9013, "step": 36940 }, { "epoch": 0.23223931019582425, "grad_norm": 7.389283180236816, "learning_rate": 1.8456283580463193e-05, "loss": 2.094, "step": 36950 }, { "epoch": 0.23230216251252137, "grad_norm": 7.88203763961792, "learning_rate": 1.845586447951854e-05, "loss": 2.1993, "step": 36960 }, { "epoch": 0.23236501482921848, "grad_norm": 6.947795867919922, "learning_rate": 1.8455445378573884e-05, "loss": 2.0832, "step": 36970 }, { "epoch": 0.23242786714591557, "grad_norm": 6.959676265716553, "learning_rate": 1.845502627762923e-05, "loss": 1.9911, "step": 36980 }, { "epoch": 0.23249071946261268, "grad_norm": 7.610995292663574, "learning_rate": 1.8454607176684578e-05, "loss": 1.7864, "step": 36990 }, { "epoch": 0.2325535717793098, "grad_norm": 7.01109504699707, "learning_rate": 1.8454188075739925e-05, "loss": 1.9153, "step": 37000 }, { "epoch": 0.2326164240960069, "grad_norm": 6.2115960121154785, "learning_rate": 1.845376897479527e-05, "loss": 1.9913, "step": 37010 }, { "epoch": 0.23267927641270403, "grad_norm": 7.3208770751953125, "learning_rate": 1.8453349873850616e-05, "loss": 1.9758, "step": 37020 }, { "epoch": 0.23274212872940114, "grad_norm": 6.857991695404053, "learning_rate": 1.8452930772905963e-05, "loss": 1.8901, "step": 37030 }, { "epoch": 0.23280498104609826, "grad_norm": 6.762681007385254, "learning_rate": 1.845251167196131e-05, "loss": 1.966, "step": 37040 }, { "epoch": 0.23286783336279535, "grad_norm": 7.531749725341797, "learning_rate": 1.8452092571016657e-05, "loss": 1.8867, "step": 37050 }, { "epoch": 0.23293068567949246, "grad_norm": 7.440679550170898, "learning_rate": 1.8451673470072e-05, "loss": 1.8707, "step": 37060 }, { "epoch": 0.23299353799618958, "grad_norm": 12.17404842376709, "learning_rate": 1.8451254369127348e-05, "loss": 1.8228, "step": 37070 }, { "epoch": 0.2330563903128867, "grad_norm": 8.215689659118652, "learning_rate": 1.8450835268182695e-05, "loss": 1.9519, "step": 37080 }, { "epoch": 0.2331192426295838, "grad_norm": 8.259377479553223, "learning_rate": 1.8450416167238042e-05, "loss": 1.8253, "step": 37090 }, { "epoch": 0.23318209494628092, "grad_norm": 8.118638038635254, "learning_rate": 1.844999706629339e-05, "loss": 1.8352, "step": 37100 }, { "epoch": 0.233244947262978, "grad_norm": 7.638366222381592, "learning_rate": 1.8449577965348736e-05, "loss": 1.8802, "step": 37110 }, { "epoch": 0.23330779957967512, "grad_norm": 7.182828426361084, "learning_rate": 1.8449158864404083e-05, "loss": 1.8821, "step": 37120 }, { "epoch": 0.23337065189637224, "grad_norm": 7.47348690032959, "learning_rate": 1.844873976345943e-05, "loss": 2.0787, "step": 37130 }, { "epoch": 0.23343350421306935, "grad_norm": 8.01142692565918, "learning_rate": 1.8448320662514774e-05, "loss": 1.7158, "step": 37140 }, { "epoch": 0.23349635652976647, "grad_norm": 9.263545036315918, "learning_rate": 1.844790156157012e-05, "loss": 2.0585, "step": 37150 }, { "epoch": 0.23355920884646358, "grad_norm": 7.731915473937988, "learning_rate": 1.8447482460625468e-05, "loss": 1.8395, "step": 37160 }, { "epoch": 0.2336220611631607, "grad_norm": 7.082794666290283, "learning_rate": 1.8447063359680815e-05, "loss": 1.777, "step": 37170 }, { "epoch": 0.2336849134798578, "grad_norm": 8.51518726348877, "learning_rate": 1.8446644258736162e-05, "loss": 1.9184, "step": 37180 }, { "epoch": 0.2337477657965549, "grad_norm": 6.730716228485107, "learning_rate": 1.8446225157791506e-05, "loss": 1.8458, "step": 37190 }, { "epoch": 0.23381061811325202, "grad_norm": 7.255427837371826, "learning_rate": 1.8445806056846853e-05, "loss": 1.9946, "step": 37200 }, { "epoch": 0.23387347042994913, "grad_norm": 8.256935119628906, "learning_rate": 1.84453869559022e-05, "loss": 1.9618, "step": 37210 }, { "epoch": 0.23393632274664625, "grad_norm": 7.718365669250488, "learning_rate": 1.8444967854957547e-05, "loss": 1.7952, "step": 37220 }, { "epoch": 0.23399917506334336, "grad_norm": 6.30307674407959, "learning_rate": 1.844454875401289e-05, "loss": 1.8853, "step": 37230 }, { "epoch": 0.23406202738004045, "grad_norm": 6.986265182495117, "learning_rate": 1.8444129653068238e-05, "loss": 1.8914, "step": 37240 }, { "epoch": 0.23412487969673756, "grad_norm": 7.488687515258789, "learning_rate": 1.8443710552123585e-05, "loss": 2.0103, "step": 37250 }, { "epoch": 0.23418773201343468, "grad_norm": 7.455891132354736, "learning_rate": 1.8443291451178932e-05, "loss": 1.8, "step": 37260 }, { "epoch": 0.2342505843301318, "grad_norm": 8.035429954528809, "learning_rate": 1.844287235023428e-05, "loss": 1.9958, "step": 37270 }, { "epoch": 0.2343134366468289, "grad_norm": 6.650293827056885, "learning_rate": 1.8442453249289626e-05, "loss": 1.9657, "step": 37280 }, { "epoch": 0.23437628896352602, "grad_norm": 6.432164192199707, "learning_rate": 1.8442034148344973e-05, "loss": 1.734, "step": 37290 }, { "epoch": 0.2344391412802231, "grad_norm": 7.902127742767334, "learning_rate": 1.844161504740032e-05, "loss": 2.1061, "step": 37300 }, { "epoch": 0.23450199359692023, "grad_norm": 7.714845180511475, "learning_rate": 1.8441195946455664e-05, "loss": 1.9538, "step": 37310 }, { "epoch": 0.23456484591361734, "grad_norm": 7.619652271270752, "learning_rate": 1.844077684551101e-05, "loss": 1.9894, "step": 37320 }, { "epoch": 0.23462769823031446, "grad_norm": 7.897617816925049, "learning_rate": 1.8440357744566358e-05, "loss": 1.9426, "step": 37330 }, { "epoch": 0.23469055054701157, "grad_norm": 7.258893966674805, "learning_rate": 1.8439938643621705e-05, "loss": 1.8904, "step": 37340 }, { "epoch": 0.2347534028637087, "grad_norm": 7.671611309051514, "learning_rate": 1.8439519542677052e-05, "loss": 1.8734, "step": 37350 }, { "epoch": 0.2348162551804058, "grad_norm": 6.240477085113525, "learning_rate": 1.84391004417324e-05, "loss": 1.7576, "step": 37360 }, { "epoch": 0.2348791074971029, "grad_norm": 7.337497234344482, "learning_rate": 1.8438681340787743e-05, "loss": 1.7614, "step": 37370 }, { "epoch": 0.2349419598138, "grad_norm": 6.677089214324951, "learning_rate": 1.843826223984309e-05, "loss": 2.0978, "step": 37380 }, { "epoch": 0.23500481213049712, "grad_norm": 7.023135185241699, "learning_rate": 1.8437843138898437e-05, "loss": 1.943, "step": 37390 }, { "epoch": 0.23506766444719424, "grad_norm": 7.9640374183654785, "learning_rate": 1.8437424037953784e-05, "loss": 1.9079, "step": 37400 }, { "epoch": 0.23513051676389135, "grad_norm": 7.736348628997803, "learning_rate": 1.8437004937009128e-05, "loss": 1.9522, "step": 37410 }, { "epoch": 0.23519336908058847, "grad_norm": 7.060858249664307, "learning_rate": 1.8436585836064475e-05, "loss": 1.7814, "step": 37420 }, { "epoch": 0.23525622139728555, "grad_norm": 7.679143905639648, "learning_rate": 1.8436166735119822e-05, "loss": 2.0866, "step": 37430 }, { "epoch": 0.23531907371398267, "grad_norm": 6.9466352462768555, "learning_rate": 1.843574763417517e-05, "loss": 2.0974, "step": 37440 }, { "epoch": 0.23538192603067978, "grad_norm": 6.300509929656982, "learning_rate": 1.8435328533230513e-05, "loss": 1.7952, "step": 37450 }, { "epoch": 0.2354447783473769, "grad_norm": 7.0272135734558105, "learning_rate": 1.843490943228586e-05, "loss": 1.6515, "step": 37460 }, { "epoch": 0.235507630664074, "grad_norm": 7.288054466247559, "learning_rate": 1.8434490331341207e-05, "loss": 1.8176, "step": 37470 }, { "epoch": 0.23557048298077113, "grad_norm": 8.304322242736816, "learning_rate": 1.8434071230396554e-05, "loss": 2.1369, "step": 37480 }, { "epoch": 0.23563333529746824, "grad_norm": 6.926466464996338, "learning_rate": 1.84336521294519e-05, "loss": 2.0315, "step": 37490 }, { "epoch": 0.23569618761416533, "grad_norm": 7.540550708770752, "learning_rate": 1.8433233028507248e-05, "loss": 2.0642, "step": 37500 }, { "epoch": 0.23575903993086245, "grad_norm": 7.766870021820068, "learning_rate": 1.8432813927562595e-05, "loss": 1.8289, "step": 37510 }, { "epoch": 0.23582189224755956, "grad_norm": 6.657734394073486, "learning_rate": 1.8432394826617942e-05, "loss": 1.945, "step": 37520 }, { "epoch": 0.23588474456425668, "grad_norm": 6.852512836456299, "learning_rate": 1.843197572567329e-05, "loss": 1.7377, "step": 37530 }, { "epoch": 0.2359475968809538, "grad_norm": 7.0113444328308105, "learning_rate": 1.8431556624728633e-05, "loss": 1.9068, "step": 37540 }, { "epoch": 0.2360104491976509, "grad_norm": 7.569218635559082, "learning_rate": 1.843113752378398e-05, "loss": 1.7622, "step": 37550 }, { "epoch": 0.236073301514348, "grad_norm": 7.438565254211426, "learning_rate": 1.8430718422839327e-05, "loss": 1.9368, "step": 37560 }, { "epoch": 0.2361361538310451, "grad_norm": 10.159157752990723, "learning_rate": 1.8430299321894674e-05, "loss": 1.9322, "step": 37570 }, { "epoch": 0.23619900614774222, "grad_norm": 6.393486022949219, "learning_rate": 1.842988022095002e-05, "loss": 2.1107, "step": 37580 }, { "epoch": 0.23626185846443934, "grad_norm": 7.414916038513184, "learning_rate": 1.8429461120005365e-05, "loss": 1.8803, "step": 37590 }, { "epoch": 0.23632471078113645, "grad_norm": 7.108283519744873, "learning_rate": 1.8429042019060712e-05, "loss": 2.1052, "step": 37600 }, { "epoch": 0.23638756309783357, "grad_norm": 7.331905841827393, "learning_rate": 1.842862291811606e-05, "loss": 1.7375, "step": 37610 }, { "epoch": 0.23645041541453066, "grad_norm": 6.825594425201416, "learning_rate": 1.8428203817171406e-05, "loss": 1.8658, "step": 37620 }, { "epoch": 0.23651326773122777, "grad_norm": 7.41994047164917, "learning_rate": 1.842778471622675e-05, "loss": 1.8993, "step": 37630 }, { "epoch": 0.2365761200479249, "grad_norm": 6.07677698135376, "learning_rate": 1.8427365615282097e-05, "loss": 1.8716, "step": 37640 }, { "epoch": 0.236638972364622, "grad_norm": 6.925858497619629, "learning_rate": 1.8426946514337444e-05, "loss": 1.6922, "step": 37650 }, { "epoch": 0.23670182468131912, "grad_norm": 6.979998588562012, "learning_rate": 1.842652741339279e-05, "loss": 2.3251, "step": 37660 }, { "epoch": 0.23676467699801623, "grad_norm": 7.7621541023254395, "learning_rate": 1.8426108312448138e-05, "loss": 1.9647, "step": 37670 }, { "epoch": 0.23682752931471335, "grad_norm": 6.522325038909912, "learning_rate": 1.8425689211503482e-05, "loss": 1.9629, "step": 37680 }, { "epoch": 0.23689038163141043, "grad_norm": 6.459441661834717, "learning_rate": 1.842527011055883e-05, "loss": 1.8711, "step": 37690 }, { "epoch": 0.23695323394810755, "grad_norm": 7.515565395355225, "learning_rate": 1.8424851009614176e-05, "loss": 2.0095, "step": 37700 }, { "epoch": 0.23701608626480467, "grad_norm": 8.603837966918945, "learning_rate": 1.8424431908669523e-05, "loss": 2.128, "step": 37710 }, { "epoch": 0.23707893858150178, "grad_norm": 9.59928035736084, "learning_rate": 1.842401280772487e-05, "loss": 1.8273, "step": 37720 }, { "epoch": 0.2371417908981989, "grad_norm": 10.73585033416748, "learning_rate": 1.8423593706780217e-05, "loss": 2.014, "step": 37730 }, { "epoch": 0.237204643214896, "grad_norm": 6.706937313079834, "learning_rate": 1.8423174605835564e-05, "loss": 2.0051, "step": 37740 }, { "epoch": 0.2372674955315931, "grad_norm": 5.793028831481934, "learning_rate": 1.842275550489091e-05, "loss": 1.7939, "step": 37750 }, { "epoch": 0.2373303478482902, "grad_norm": 6.68222713470459, "learning_rate": 1.842233640394626e-05, "loss": 1.9552, "step": 37760 }, { "epoch": 0.23739320016498733, "grad_norm": 7.45481538772583, "learning_rate": 1.8421917303001602e-05, "loss": 1.7819, "step": 37770 }, { "epoch": 0.23745605248168444, "grad_norm": 6.734755039215088, "learning_rate": 1.842149820205695e-05, "loss": 1.9601, "step": 37780 }, { "epoch": 0.23751890479838156, "grad_norm": 7.868802547454834, "learning_rate": 1.8421079101112296e-05, "loss": 1.8973, "step": 37790 }, { "epoch": 0.23758175711507867, "grad_norm": 7.722999572753906, "learning_rate": 1.8420660000167643e-05, "loss": 1.8141, "step": 37800 }, { "epoch": 0.23764460943177576, "grad_norm": 6.061532974243164, "learning_rate": 1.8420240899222987e-05, "loss": 1.8178, "step": 37810 }, { "epoch": 0.23770746174847288, "grad_norm": 8.7529878616333, "learning_rate": 1.8419821798278334e-05, "loss": 2.0316, "step": 37820 }, { "epoch": 0.23777031406517, "grad_norm": 7.669527053833008, "learning_rate": 1.841940269733368e-05, "loss": 1.9007, "step": 37830 }, { "epoch": 0.2378331663818671, "grad_norm": 7.42069149017334, "learning_rate": 1.8418983596389028e-05, "loss": 1.7614, "step": 37840 }, { "epoch": 0.23789601869856422, "grad_norm": 6.95194673538208, "learning_rate": 1.8418564495444372e-05, "loss": 1.952, "step": 37850 }, { "epoch": 0.23795887101526134, "grad_norm": 11.418732643127441, "learning_rate": 1.841814539449972e-05, "loss": 1.9587, "step": 37860 }, { "epoch": 0.23802172333195845, "grad_norm": 7.482511043548584, "learning_rate": 1.8417726293555066e-05, "loss": 2.0166, "step": 37870 }, { "epoch": 0.23808457564865554, "grad_norm": 16.596914291381836, "learning_rate": 1.8417307192610413e-05, "loss": 2.0108, "step": 37880 }, { "epoch": 0.23814742796535265, "grad_norm": 6.911890506744385, "learning_rate": 1.841688809166576e-05, "loss": 1.7536, "step": 37890 }, { "epoch": 0.23821028028204977, "grad_norm": 8.240942001342773, "learning_rate": 1.8416468990721107e-05, "loss": 2.0914, "step": 37900 }, { "epoch": 0.23827313259874688, "grad_norm": 7.102033615112305, "learning_rate": 1.8416049889776454e-05, "loss": 1.9289, "step": 37910 }, { "epoch": 0.238335984915444, "grad_norm": 7.021769046783447, "learning_rate": 1.84156307888318e-05, "loss": 1.9185, "step": 37920 }, { "epoch": 0.23839883723214111, "grad_norm": 6.7171735763549805, "learning_rate": 1.8415211687887145e-05, "loss": 1.7612, "step": 37930 }, { "epoch": 0.2384616895488382, "grad_norm": 8.630457878112793, "learning_rate": 1.8414792586942492e-05, "loss": 1.9459, "step": 37940 }, { "epoch": 0.23852454186553532, "grad_norm": 7.017162322998047, "learning_rate": 1.841437348599784e-05, "loss": 1.9242, "step": 37950 }, { "epoch": 0.23858739418223243, "grad_norm": 7.716609477996826, "learning_rate": 1.8413954385053186e-05, "loss": 1.9872, "step": 37960 }, { "epoch": 0.23865024649892955, "grad_norm": 6.626461982727051, "learning_rate": 1.8413535284108533e-05, "loss": 1.9801, "step": 37970 }, { "epoch": 0.23871309881562666, "grad_norm": 7.237145900726318, "learning_rate": 1.841311618316388e-05, "loss": 2.0975, "step": 37980 }, { "epoch": 0.23877595113232378, "grad_norm": 7.1675705909729, "learning_rate": 1.8412697082219224e-05, "loss": 1.8958, "step": 37990 }, { "epoch": 0.2388388034490209, "grad_norm": 7.57642936706543, "learning_rate": 1.841227798127457e-05, "loss": 1.6876, "step": 38000 }, { "epoch": 0.23890165576571798, "grad_norm": 8.831753730773926, "learning_rate": 1.8411858880329918e-05, "loss": 2.1432, "step": 38010 }, { "epoch": 0.2389645080824151, "grad_norm": 6.9499711990356445, "learning_rate": 1.8411439779385265e-05, "loss": 1.9944, "step": 38020 }, { "epoch": 0.2390273603991122, "grad_norm": 7.618555545806885, "learning_rate": 1.841102067844061e-05, "loss": 2.0899, "step": 38030 }, { "epoch": 0.23909021271580932, "grad_norm": 7.863029956817627, "learning_rate": 1.8410601577495956e-05, "loss": 2.0186, "step": 38040 }, { "epoch": 0.23915306503250644, "grad_norm": 7.103366374969482, "learning_rate": 1.8410182476551303e-05, "loss": 1.8927, "step": 38050 }, { "epoch": 0.23921591734920356, "grad_norm": 7.5608015060424805, "learning_rate": 1.840976337560665e-05, "loss": 1.9394, "step": 38060 }, { "epoch": 0.23927876966590064, "grad_norm": 7.124314308166504, "learning_rate": 1.8409344274661997e-05, "loss": 2.0939, "step": 38070 }, { "epoch": 0.23934162198259776, "grad_norm": 7.2767720222473145, "learning_rate": 1.840892517371734e-05, "loss": 1.84, "step": 38080 }, { "epoch": 0.23940447429929487, "grad_norm": 7.567097187042236, "learning_rate": 1.8408506072772688e-05, "loss": 1.8521, "step": 38090 }, { "epoch": 0.239467326615992, "grad_norm": 8.322702407836914, "learning_rate": 1.8408086971828035e-05, "loss": 1.9675, "step": 38100 }, { "epoch": 0.2395301789326891, "grad_norm": 8.67414665222168, "learning_rate": 1.8407667870883382e-05, "loss": 1.946, "step": 38110 }, { "epoch": 0.23959303124938622, "grad_norm": 6.918972015380859, "learning_rate": 1.840724876993873e-05, "loss": 2.0705, "step": 38120 }, { "epoch": 0.2396558835660833, "grad_norm": 6.610874652862549, "learning_rate": 1.8406829668994076e-05, "loss": 1.9874, "step": 38130 }, { "epoch": 0.23971873588278042, "grad_norm": 7.756814002990723, "learning_rate": 1.8406410568049423e-05, "loss": 1.9056, "step": 38140 }, { "epoch": 0.23978158819947754, "grad_norm": 8.447721481323242, "learning_rate": 1.840599146710477e-05, "loss": 1.6769, "step": 38150 }, { "epoch": 0.23984444051617465, "grad_norm": 6.847433567047119, "learning_rate": 1.8405572366160114e-05, "loss": 1.9969, "step": 38160 }, { "epoch": 0.23990729283287177, "grad_norm": 6.875399112701416, "learning_rate": 1.840515326521546e-05, "loss": 1.82, "step": 38170 }, { "epoch": 0.23997014514956888, "grad_norm": 7.013482093811035, "learning_rate": 1.8404734164270808e-05, "loss": 1.8327, "step": 38180 }, { "epoch": 0.240032997466266, "grad_norm": 7.169978618621826, "learning_rate": 1.8404315063326155e-05, "loss": 2.073, "step": 38190 }, { "epoch": 0.24009584978296308, "grad_norm": 8.269976615905762, "learning_rate": 1.8403895962381502e-05, "loss": 1.9683, "step": 38200 }, { "epoch": 0.2401587020996602, "grad_norm": 7.200091361999512, "learning_rate": 1.8403476861436846e-05, "loss": 1.9096, "step": 38210 }, { "epoch": 0.2402215544163573, "grad_norm": 7.73632287979126, "learning_rate": 1.8403057760492193e-05, "loss": 2.2556, "step": 38220 }, { "epoch": 0.24028440673305443, "grad_norm": 6.895431041717529, "learning_rate": 1.840263865954754e-05, "loss": 2.0842, "step": 38230 }, { "epoch": 0.24034725904975154, "grad_norm": 7.235763072967529, "learning_rate": 1.8402219558602887e-05, "loss": 1.9744, "step": 38240 }, { "epoch": 0.24041011136644866, "grad_norm": 5.841657638549805, "learning_rate": 1.840180045765823e-05, "loss": 1.9609, "step": 38250 }, { "epoch": 0.24047296368314575, "grad_norm": 6.5870490074157715, "learning_rate": 1.8401381356713578e-05, "loss": 1.973, "step": 38260 }, { "epoch": 0.24053581599984286, "grad_norm": 8.151089668273926, "learning_rate": 1.8400962255768925e-05, "loss": 1.812, "step": 38270 }, { "epoch": 0.24059866831653998, "grad_norm": 5.663036346435547, "learning_rate": 1.8400543154824272e-05, "loss": 1.8347, "step": 38280 }, { "epoch": 0.2406615206332371, "grad_norm": 7.262301921844482, "learning_rate": 1.840012405387962e-05, "loss": 1.792, "step": 38290 }, { "epoch": 0.2407243729499342, "grad_norm": 7.873507499694824, "learning_rate": 1.8399704952934966e-05, "loss": 2.061, "step": 38300 }, { "epoch": 0.24078722526663132, "grad_norm": 6.455713272094727, "learning_rate": 1.839928585199031e-05, "loss": 1.8353, "step": 38310 }, { "epoch": 0.24085007758332844, "grad_norm": 7.31044864654541, "learning_rate": 1.8398866751045657e-05, "loss": 1.7257, "step": 38320 }, { "epoch": 0.24091292990002552, "grad_norm": 7.6389265060424805, "learning_rate": 1.8398447650101004e-05, "loss": 1.9959, "step": 38330 }, { "epoch": 0.24097578221672264, "grad_norm": 7.888726711273193, "learning_rate": 1.839802854915635e-05, "loss": 1.8684, "step": 38340 }, { "epoch": 0.24103863453341975, "grad_norm": 7.7473673820495605, "learning_rate": 1.83976094482117e-05, "loss": 1.9824, "step": 38350 }, { "epoch": 0.24110148685011687, "grad_norm": 7.208690643310547, "learning_rate": 1.8397190347267045e-05, "loss": 2.0381, "step": 38360 }, { "epoch": 0.24116433916681398, "grad_norm": 6.633978366851807, "learning_rate": 1.8396771246322392e-05, "loss": 1.9413, "step": 38370 }, { "epoch": 0.2412271914835111, "grad_norm": 5.579523086547852, "learning_rate": 1.839635214537774e-05, "loss": 1.7466, "step": 38380 }, { "epoch": 0.2412900438002082, "grad_norm": 6.117504596710205, "learning_rate": 1.8395933044433083e-05, "loss": 1.8218, "step": 38390 }, { "epoch": 0.2413528961169053, "grad_norm": 7.406382083892822, "learning_rate": 1.839551394348843e-05, "loss": 1.9613, "step": 38400 }, { "epoch": 0.24141574843360242, "grad_norm": 6.532168865203857, "learning_rate": 1.8395094842543777e-05, "loss": 1.7304, "step": 38410 }, { "epoch": 0.24147860075029953, "grad_norm": 7.247741222381592, "learning_rate": 1.8394675741599124e-05, "loss": 2.0489, "step": 38420 }, { "epoch": 0.24154145306699665, "grad_norm": 6.924738883972168, "learning_rate": 1.8394256640654468e-05, "loss": 1.8917, "step": 38430 }, { "epoch": 0.24160430538369376, "grad_norm": 8.146101951599121, "learning_rate": 1.8393837539709815e-05, "loss": 1.9852, "step": 38440 }, { "epoch": 0.24166715770039085, "grad_norm": 6.957603931427002, "learning_rate": 1.8393418438765162e-05, "loss": 2.0234, "step": 38450 }, { "epoch": 0.24173001001708797, "grad_norm": 7.829459190368652, "learning_rate": 1.839299933782051e-05, "loss": 2.0043, "step": 38460 }, { "epoch": 0.24179286233378508, "grad_norm": 6.800477981567383, "learning_rate": 1.8392580236875853e-05, "loss": 2.0715, "step": 38470 }, { "epoch": 0.2418557146504822, "grad_norm": 7.800566673278809, "learning_rate": 1.83921611359312e-05, "loss": 2.0213, "step": 38480 }, { "epoch": 0.2419185669671793, "grad_norm": 7.374375820159912, "learning_rate": 1.8391742034986547e-05, "loss": 1.8844, "step": 38490 }, { "epoch": 0.24198141928387643, "grad_norm": 9.049388885498047, "learning_rate": 1.8391322934041894e-05, "loss": 2.2025, "step": 38500 }, { "epoch": 0.24204427160057354, "grad_norm": 7.550909996032715, "learning_rate": 1.839090383309724e-05, "loss": 1.9928, "step": 38510 }, { "epoch": 0.24210712391727063, "grad_norm": 6.672713279724121, "learning_rate": 1.839048473215259e-05, "loss": 1.7213, "step": 38520 }, { "epoch": 0.24216997623396774, "grad_norm": 7.553778648376465, "learning_rate": 1.8390065631207935e-05, "loss": 1.8934, "step": 38530 }, { "epoch": 0.24223282855066486, "grad_norm": 8.76152229309082, "learning_rate": 1.8389646530263283e-05, "loss": 1.9237, "step": 38540 }, { "epoch": 0.24229568086736197, "grad_norm": 6.914309978485107, "learning_rate": 1.838922742931863e-05, "loss": 1.9673, "step": 38550 }, { "epoch": 0.2423585331840591, "grad_norm": 7.236188888549805, "learning_rate": 1.8388808328373973e-05, "loss": 1.798, "step": 38560 }, { "epoch": 0.2424213855007562, "grad_norm": 7.777528762817383, "learning_rate": 1.838838922742932e-05, "loss": 1.8949, "step": 38570 }, { "epoch": 0.2424842378174533, "grad_norm": 8.247822761535645, "learning_rate": 1.8387970126484667e-05, "loss": 2.0834, "step": 38580 }, { "epoch": 0.2425470901341504, "grad_norm": 8.609414100646973, "learning_rate": 1.8387551025540014e-05, "loss": 1.8722, "step": 38590 }, { "epoch": 0.24260994245084752, "grad_norm": 7.682502746582031, "learning_rate": 1.838713192459536e-05, "loss": 1.9986, "step": 38600 }, { "epoch": 0.24267279476754464, "grad_norm": 8.470084190368652, "learning_rate": 1.8386712823650705e-05, "loss": 2.0193, "step": 38610 }, { "epoch": 0.24273564708424175, "grad_norm": 7.982653617858887, "learning_rate": 1.8386293722706052e-05, "loss": 1.9688, "step": 38620 }, { "epoch": 0.24279849940093887, "grad_norm": 7.206417560577393, "learning_rate": 1.83858746217614e-05, "loss": 1.7714, "step": 38630 }, { "epoch": 0.24286135171763595, "grad_norm": 7.726312160491943, "learning_rate": 1.8385455520816746e-05, "loss": 1.9045, "step": 38640 }, { "epoch": 0.24292420403433307, "grad_norm": 5.919642448425293, "learning_rate": 1.838503641987209e-05, "loss": 1.9664, "step": 38650 }, { "epoch": 0.24298705635103018, "grad_norm": 7.361550807952881, "learning_rate": 1.8384617318927437e-05, "loss": 2.1269, "step": 38660 }, { "epoch": 0.2430499086677273, "grad_norm": 7.250823497772217, "learning_rate": 1.8384198217982784e-05, "loss": 1.7964, "step": 38670 }, { "epoch": 0.24311276098442441, "grad_norm": 7.15161657333374, "learning_rate": 1.8383821027132596e-05, "loss": 2.0504, "step": 38680 }, { "epoch": 0.24317561330112153, "grad_norm": 7.380502223968506, "learning_rate": 1.8383401926187943e-05, "loss": 1.8924, "step": 38690 }, { "epoch": 0.24323846561781864, "grad_norm": 8.092615127563477, "learning_rate": 1.838298282524329e-05, "loss": 1.9202, "step": 38700 }, { "epoch": 0.24330131793451573, "grad_norm": 6.767857074737549, "learning_rate": 1.8382563724298637e-05, "loss": 2.0366, "step": 38710 }, { "epoch": 0.24336417025121285, "grad_norm": 6.968144416809082, "learning_rate": 1.8382144623353984e-05, "loss": 1.8625, "step": 38720 }, { "epoch": 0.24342702256790996, "grad_norm": 5.415788650512695, "learning_rate": 1.8381725522409328e-05, "loss": 1.6783, "step": 38730 }, { "epoch": 0.24348987488460708, "grad_norm": 7.07074499130249, "learning_rate": 1.8381306421464675e-05, "loss": 2.0826, "step": 38740 }, { "epoch": 0.2435527272013042, "grad_norm": 6.802286624908447, "learning_rate": 1.8380887320520022e-05, "loss": 1.9283, "step": 38750 }, { "epoch": 0.2436155795180013, "grad_norm": 6.098469257354736, "learning_rate": 1.838046821957537e-05, "loss": 1.8083, "step": 38760 }, { "epoch": 0.2436784318346984, "grad_norm": 6.389626979827881, "learning_rate": 1.8380049118630712e-05, "loss": 1.9582, "step": 38770 }, { "epoch": 0.2437412841513955, "grad_norm": 5.906976222991943, "learning_rate": 1.837963001768606e-05, "loss": 2.0481, "step": 38780 }, { "epoch": 0.24380413646809262, "grad_norm": 7.177793025970459, "learning_rate": 1.8379210916741407e-05, "loss": 1.9249, "step": 38790 }, { "epoch": 0.24386698878478974, "grad_norm": 7.2348198890686035, "learning_rate": 1.8378791815796754e-05, "loss": 1.8801, "step": 38800 }, { "epoch": 0.24392984110148686, "grad_norm": 6.951122283935547, "learning_rate": 1.83783727148521e-05, "loss": 1.9686, "step": 38810 }, { "epoch": 0.24399269341818397, "grad_norm": 7.613935470581055, "learning_rate": 1.8377953613907448e-05, "loss": 2.0456, "step": 38820 }, { "epoch": 0.24405554573488109, "grad_norm": 6.578233242034912, "learning_rate": 1.8377534512962795e-05, "loss": 1.9367, "step": 38830 }, { "epoch": 0.24411839805157817, "grad_norm": 7.682235240936279, "learning_rate": 1.8377115412018142e-05, "loss": 1.6629, "step": 38840 }, { "epoch": 0.2441812503682753, "grad_norm": 6.999513149261475, "learning_rate": 1.837669631107349e-05, "loss": 2.0293, "step": 38850 }, { "epoch": 0.2442441026849724, "grad_norm": 7.855412483215332, "learning_rate": 1.8376277210128833e-05, "loss": 1.9867, "step": 38860 }, { "epoch": 0.24430695500166952, "grad_norm": 6.694448947906494, "learning_rate": 1.837585810918418e-05, "loss": 1.8862, "step": 38870 }, { "epoch": 0.24436980731836663, "grad_norm": 6.943238735198975, "learning_rate": 1.8375439008239527e-05, "loss": 1.8387, "step": 38880 }, { "epoch": 0.24443265963506375, "grad_norm": 7.439672470092773, "learning_rate": 1.8375019907294874e-05, "loss": 1.9138, "step": 38890 }, { "epoch": 0.24449551195176084, "grad_norm": 6.86573600769043, "learning_rate": 1.8374600806350218e-05, "loss": 1.945, "step": 38900 }, { "epoch": 0.24455836426845795, "grad_norm": 6.689842224121094, "learning_rate": 1.8374181705405565e-05, "loss": 1.7859, "step": 38910 }, { "epoch": 0.24462121658515507, "grad_norm": 6.846593379974365, "learning_rate": 1.8373762604460912e-05, "loss": 1.9778, "step": 38920 }, { "epoch": 0.24468406890185218, "grad_norm": 7.001317024230957, "learning_rate": 1.837334350351626e-05, "loss": 1.909, "step": 38930 }, { "epoch": 0.2447469212185493, "grad_norm": 7.235071659088135, "learning_rate": 1.8372924402571606e-05, "loss": 1.7714, "step": 38940 }, { "epoch": 0.2448097735352464, "grad_norm": 7.201655387878418, "learning_rate": 1.837250530162695e-05, "loss": 2.0205, "step": 38950 }, { "epoch": 0.2448726258519435, "grad_norm": 5.981378555297852, "learning_rate": 1.8372086200682297e-05, "loss": 1.7683, "step": 38960 }, { "epoch": 0.2449354781686406, "grad_norm": 6.676976203918457, "learning_rate": 1.8371667099737644e-05, "loss": 2.077, "step": 38970 }, { "epoch": 0.24499833048533773, "grad_norm": 6.120922565460205, "learning_rate": 1.837124799879299e-05, "loss": 1.8403, "step": 38980 }, { "epoch": 0.24506118280203484, "grad_norm": 6.761012077331543, "learning_rate": 1.8370828897848338e-05, "loss": 1.765, "step": 38990 }, { "epoch": 0.24512403511873196, "grad_norm": 7.727017402648926, "learning_rate": 1.837040979690368e-05, "loss": 1.817, "step": 39000 }, { "epoch": 0.24518688743542907, "grad_norm": 7.75016975402832, "learning_rate": 1.836999069595903e-05, "loss": 1.7469, "step": 39010 }, { "epoch": 0.2452497397521262, "grad_norm": 7.089818477630615, "learning_rate": 1.8369571595014376e-05, "loss": 1.8925, "step": 39020 }, { "epoch": 0.24531259206882328, "grad_norm": 6.474025726318359, "learning_rate": 1.8369152494069723e-05, "loss": 1.7677, "step": 39030 }, { "epoch": 0.2453754443855204, "grad_norm": 7.904554843902588, "learning_rate": 1.836873339312507e-05, "loss": 1.9615, "step": 39040 }, { "epoch": 0.2454382967022175, "grad_norm": 6.933401584625244, "learning_rate": 1.8368314292180417e-05, "loss": 1.9095, "step": 39050 }, { "epoch": 0.24550114901891462, "grad_norm": 6.717565059661865, "learning_rate": 1.8367895191235764e-05, "loss": 1.8879, "step": 39060 }, { "epoch": 0.24556400133561174, "grad_norm": 7.307586193084717, "learning_rate": 1.836747609029111e-05, "loss": 1.8777, "step": 39070 }, { "epoch": 0.24562685365230885, "grad_norm": 8.712448120117188, "learning_rate": 1.8367056989346455e-05, "loss": 1.832, "step": 39080 }, { "epoch": 0.24568970596900594, "grad_norm": 8.435969352722168, "learning_rate": 1.8366637888401802e-05, "loss": 1.9329, "step": 39090 }, { "epoch": 0.24575255828570305, "grad_norm": 8.680069923400879, "learning_rate": 1.836621878745715e-05, "loss": 2.0208, "step": 39100 }, { "epoch": 0.24581541060240017, "grad_norm": 8.355234146118164, "learning_rate": 1.8365799686512496e-05, "loss": 1.8571, "step": 39110 }, { "epoch": 0.24587826291909728, "grad_norm": 8.172979354858398, "learning_rate": 1.8365380585567843e-05, "loss": 1.2727, "step": 39120 }, { "epoch": 0.2459411152357944, "grad_norm": 7.647946834564209, "learning_rate": 1.8364961484623187e-05, "loss": 1.9499, "step": 39130 }, { "epoch": 0.24600396755249151, "grad_norm": 6.790382385253906, "learning_rate": 1.8364542383678534e-05, "loss": 1.7528, "step": 39140 }, { "epoch": 0.2460668198691886, "grad_norm": 6.446195125579834, "learning_rate": 1.836412328273388e-05, "loss": 1.7171, "step": 39150 }, { "epoch": 0.24612967218588572, "grad_norm": 7.8714776039123535, "learning_rate": 1.8363704181789228e-05, "loss": 2.0022, "step": 39160 }, { "epoch": 0.24619252450258283, "grad_norm": 7.599400997161865, "learning_rate": 1.836328508084457e-05, "loss": 1.9062, "step": 39170 }, { "epoch": 0.24625537681927995, "grad_norm": 7.171163082122803, "learning_rate": 1.836286597989992e-05, "loss": 1.9292, "step": 39180 }, { "epoch": 0.24631822913597706, "grad_norm": 6.78662633895874, "learning_rate": 1.8362446878955266e-05, "loss": 2.055, "step": 39190 }, { "epoch": 0.24638108145267418, "grad_norm": 6.9192609786987305, "learning_rate": 1.8362027778010613e-05, "loss": 1.7286, "step": 39200 }, { "epoch": 0.2464439337693713, "grad_norm": 6.885414123535156, "learning_rate": 1.836160867706596e-05, "loss": 1.9249, "step": 39210 }, { "epoch": 0.24650678608606838, "grad_norm": 7.425045013427734, "learning_rate": 1.8361189576121307e-05, "loss": 1.8222, "step": 39220 }, { "epoch": 0.2465696384027655, "grad_norm": 7.735393524169922, "learning_rate": 1.8360770475176654e-05, "loss": 2.0101, "step": 39230 }, { "epoch": 0.2466324907194626, "grad_norm": 7.208608627319336, "learning_rate": 1.8360351374232e-05, "loss": 1.8489, "step": 39240 }, { "epoch": 0.24669534303615973, "grad_norm": 6.560624122619629, "learning_rate": 1.8359932273287345e-05, "loss": 2.0849, "step": 39250 }, { "epoch": 0.24675819535285684, "grad_norm": 7.286542892456055, "learning_rate": 1.8359513172342692e-05, "loss": 1.9742, "step": 39260 }, { "epoch": 0.24682104766955396, "grad_norm": 6.513575553894043, "learning_rate": 1.835909407139804e-05, "loss": 1.8736, "step": 39270 }, { "epoch": 0.24688389998625104, "grad_norm": 7.486867427825928, "learning_rate": 1.8358674970453386e-05, "loss": 1.903, "step": 39280 }, { "epoch": 0.24694675230294816, "grad_norm": 7.773925304412842, "learning_rate": 1.8358255869508733e-05, "loss": 1.7813, "step": 39290 }, { "epoch": 0.24700960461964527, "grad_norm": 7.760753154754639, "learning_rate": 1.8357836768564077e-05, "loss": 1.7484, "step": 39300 }, { "epoch": 0.2470724569363424, "grad_norm": 7.225697040557861, "learning_rate": 1.8357417667619424e-05, "loss": 1.8298, "step": 39310 }, { "epoch": 0.2471353092530395, "grad_norm": 7.341373443603516, "learning_rate": 1.835699856667477e-05, "loss": 1.9556, "step": 39320 }, { "epoch": 0.24719816156973662, "grad_norm": 7.047973155975342, "learning_rate": 1.8356579465730118e-05, "loss": 1.7168, "step": 39330 }, { "epoch": 0.24726101388643373, "grad_norm": 7.430441856384277, "learning_rate": 1.8356160364785465e-05, "loss": 1.7909, "step": 39340 }, { "epoch": 0.24732386620313082, "grad_norm": 7.2714643478393555, "learning_rate": 1.835574126384081e-05, "loss": 2.1455, "step": 39350 }, { "epoch": 0.24738671851982794, "grad_norm": 6.922287464141846, "learning_rate": 1.8355322162896156e-05, "loss": 1.9391, "step": 39360 }, { "epoch": 0.24744957083652505, "grad_norm": 6.799683570861816, "learning_rate": 1.8354903061951503e-05, "loss": 1.8829, "step": 39370 }, { "epoch": 0.24751242315322217, "grad_norm": 5.420069217681885, "learning_rate": 1.835448396100685e-05, "loss": 1.7586, "step": 39380 }, { "epoch": 0.24757527546991928, "grad_norm": 7.595252990722656, "learning_rate": 1.8354064860062194e-05, "loss": 1.8475, "step": 39390 }, { "epoch": 0.2476381277866164, "grad_norm": 7.183959484100342, "learning_rate": 1.835364575911754e-05, "loss": 2.0457, "step": 39400 }, { "epoch": 0.24770098010331348, "grad_norm": 7.531284332275391, "learning_rate": 1.8353226658172888e-05, "loss": 1.8284, "step": 39410 }, { "epoch": 0.2477638324200106, "grad_norm": 7.272885799407959, "learning_rate": 1.8352807557228235e-05, "loss": 2.0378, "step": 39420 }, { "epoch": 0.24782668473670771, "grad_norm": 6.173305511474609, "learning_rate": 1.8352388456283582e-05, "loss": 1.9055, "step": 39430 }, { "epoch": 0.24788953705340483, "grad_norm": 7.7378692626953125, "learning_rate": 1.835196935533893e-05, "loss": 2.0607, "step": 39440 }, { "epoch": 0.24795238937010194, "grad_norm": 7.691354274749756, "learning_rate": 1.8351550254394276e-05, "loss": 1.7121, "step": 39450 }, { "epoch": 0.24801524168679906, "grad_norm": 6.638065814971924, "learning_rate": 1.8351131153449623e-05, "loss": 1.8401, "step": 39460 }, { "epoch": 0.24807809400349615, "grad_norm": 7.033689498901367, "learning_rate": 1.835071205250497e-05, "loss": 1.9848, "step": 39470 }, { "epoch": 0.24814094632019326, "grad_norm": 6.727336406707764, "learning_rate": 1.8350292951560314e-05, "loss": 1.8813, "step": 39480 }, { "epoch": 0.24820379863689038, "grad_norm": 7.711666584014893, "learning_rate": 1.834987385061566e-05, "loss": 1.8474, "step": 39490 }, { "epoch": 0.2482666509535875, "grad_norm": 8.10097599029541, "learning_rate": 1.8349454749671008e-05, "loss": 1.8589, "step": 39500 }, { "epoch": 0.2483295032702846, "grad_norm": 7.868946075439453, "learning_rate": 1.8349035648726355e-05, "loss": 1.7973, "step": 39510 }, { "epoch": 0.24839235558698172, "grad_norm": 8.510309219360352, "learning_rate": 1.83486165477817e-05, "loss": 1.6622, "step": 39520 }, { "epoch": 0.24845520790367884, "grad_norm": 5.8302764892578125, "learning_rate": 1.8348197446837046e-05, "loss": 1.7117, "step": 39530 }, { "epoch": 0.24851806022037592, "grad_norm": 6.924520969390869, "learning_rate": 1.8347778345892393e-05, "loss": 1.6787, "step": 39540 }, { "epoch": 0.24858091253707304, "grad_norm": 7.145519256591797, "learning_rate": 1.834735924494774e-05, "loss": 1.8117, "step": 39550 }, { "epoch": 0.24864376485377016, "grad_norm": 7.44929313659668, "learning_rate": 1.8346940144003087e-05, "loss": 1.9223, "step": 39560 }, { "epoch": 0.24870661717046727, "grad_norm": 7.225125789642334, "learning_rate": 1.834652104305843e-05, "loss": 1.8719, "step": 39570 }, { "epoch": 0.24876946948716439, "grad_norm": 7.701131343841553, "learning_rate": 1.8346101942113778e-05, "loss": 2.0049, "step": 39580 }, { "epoch": 0.2488323218038615, "grad_norm": 6.39308500289917, "learning_rate": 1.8345682841169125e-05, "loss": 1.7611, "step": 39590 }, { "epoch": 0.2488951741205586, "grad_norm": 6.422939777374268, "learning_rate": 1.8345263740224472e-05, "loss": 1.8618, "step": 39600 }, { "epoch": 0.2489580264372557, "grad_norm": 6.512946605682373, "learning_rate": 1.834484463927982e-05, "loss": 1.8978, "step": 39610 }, { "epoch": 0.24902087875395282, "grad_norm": 7.747503757476807, "learning_rate": 1.8344425538335166e-05, "loss": 1.6621, "step": 39620 }, { "epoch": 0.24908373107064993, "grad_norm": 7.341767311096191, "learning_rate": 1.834400643739051e-05, "loss": 1.8461, "step": 39630 }, { "epoch": 0.24914658338734705, "grad_norm": 8.208362579345703, "learning_rate": 1.8343587336445857e-05, "loss": 2.0456, "step": 39640 }, { "epoch": 0.24920943570404416, "grad_norm": 7.730144023895264, "learning_rate": 1.8343168235501204e-05, "loss": 1.9841, "step": 39650 }, { "epoch": 0.24927228802074128, "grad_norm": 7.366255760192871, "learning_rate": 1.834274913455655e-05, "loss": 1.8643, "step": 39660 }, { "epoch": 0.24933514033743837, "grad_norm": 7.58958101272583, "learning_rate": 1.8342330033611898e-05, "loss": 2.0262, "step": 39670 }, { "epoch": 0.24939799265413548, "grad_norm": 8.771005630493164, "learning_rate": 1.8341910932667245e-05, "loss": 2.0699, "step": 39680 }, { "epoch": 0.2494608449708326, "grad_norm": 6.679497718811035, "learning_rate": 1.8341491831722592e-05, "loss": 2.0086, "step": 39690 }, { "epoch": 0.2495236972875297, "grad_norm": 8.130735397338867, "learning_rate": 1.8341072730777936e-05, "loss": 1.9688, "step": 39700 }, { "epoch": 0.24958654960422683, "grad_norm": 6.9749603271484375, "learning_rate": 1.8340653629833283e-05, "loss": 1.6664, "step": 39710 }, { "epoch": 0.24964940192092394, "grad_norm": 7.334023475646973, "learning_rate": 1.834023452888863e-05, "loss": 2.0066, "step": 39720 }, { "epoch": 0.24971225423762103, "grad_norm": 8.127015113830566, "learning_rate": 1.8339815427943977e-05, "loss": 1.9115, "step": 39730 }, { "epoch": 0.24977510655431814, "grad_norm": 7.029850006103516, "learning_rate": 1.8339396326999324e-05, "loss": 1.8836, "step": 39740 }, { "epoch": 0.24983795887101526, "grad_norm": 7.777439594268799, "learning_rate": 1.8338977226054668e-05, "loss": 1.9276, "step": 39750 }, { "epoch": 0.24990081118771237, "grad_norm": 6.383263111114502, "learning_rate": 1.8338558125110015e-05, "loss": 1.8764, "step": 39760 }, { "epoch": 0.2499636635044095, "grad_norm": 7.595082759857178, "learning_rate": 1.8338139024165362e-05, "loss": 1.8711, "step": 39770 }, { "epoch": 0.2500265158211066, "grad_norm": 6.532485008239746, "learning_rate": 1.833771992322071e-05, "loss": 1.8727, "step": 39780 }, { "epoch": 0.2500893681378037, "grad_norm": 6.619588851928711, "learning_rate": 1.8337300822276053e-05, "loss": 1.7837, "step": 39790 }, { "epoch": 0.2501522204545008, "grad_norm": 7.122777462005615, "learning_rate": 1.83368817213314e-05, "loss": 1.9078, "step": 39800 }, { "epoch": 0.2502150727711979, "grad_norm": 6.726501941680908, "learning_rate": 1.8336462620386747e-05, "loss": 1.9333, "step": 39810 }, { "epoch": 0.25027792508789504, "grad_norm": 8.014927864074707, "learning_rate": 1.8336043519442094e-05, "loss": 1.8131, "step": 39820 }, { "epoch": 0.25034077740459215, "grad_norm": 6.615452766418457, "learning_rate": 1.833562441849744e-05, "loss": 1.9475, "step": 39830 }, { "epoch": 0.25040362972128927, "grad_norm": 7.125904560089111, "learning_rate": 1.8335205317552788e-05, "loss": 1.842, "step": 39840 }, { "epoch": 0.2504664820379864, "grad_norm": 7.295060157775879, "learning_rate": 1.8334786216608135e-05, "loss": 1.7838, "step": 39850 }, { "epoch": 0.2505293343546835, "grad_norm": 7.834619522094727, "learning_rate": 1.8334367115663482e-05, "loss": 1.7631, "step": 39860 }, { "epoch": 0.2505921866713806, "grad_norm": 6.960846900939941, "learning_rate": 1.833394801471883e-05, "loss": 1.9703, "step": 39870 }, { "epoch": 0.2506550389880777, "grad_norm": 8.08946418762207, "learning_rate": 1.8333528913774173e-05, "loss": 1.9355, "step": 39880 }, { "epoch": 0.2507178913047748, "grad_norm": 8.536508560180664, "learning_rate": 1.833310981282952e-05, "loss": 2.1335, "step": 39890 }, { "epoch": 0.2507807436214719, "grad_norm": 7.6163506507873535, "learning_rate": 1.8332690711884867e-05, "loss": 1.9276, "step": 39900 }, { "epoch": 0.250843595938169, "grad_norm": 8.61902141571045, "learning_rate": 1.8332271610940214e-05, "loss": 1.8752, "step": 39910 }, { "epoch": 0.25090644825486613, "grad_norm": 7.387242317199707, "learning_rate": 1.8331852509995558e-05, "loss": 2.0766, "step": 39920 }, { "epoch": 0.25096930057156325, "grad_norm": 5.936997890472412, "learning_rate": 1.8331433409050905e-05, "loss": 1.9449, "step": 39930 }, { "epoch": 0.25103215288826036, "grad_norm": 7.217583179473877, "learning_rate": 1.8331014308106252e-05, "loss": 1.9657, "step": 39940 }, { "epoch": 0.2510950052049575, "grad_norm": 6.556284427642822, "learning_rate": 1.83305952071616e-05, "loss": 1.9077, "step": 39950 }, { "epoch": 0.2511578575216546, "grad_norm": 7.746654033660889, "learning_rate": 1.8330176106216946e-05, "loss": 1.9202, "step": 39960 }, { "epoch": 0.2512207098383517, "grad_norm": 7.003026962280273, "learning_rate": 1.832975700527229e-05, "loss": 1.9193, "step": 39970 }, { "epoch": 0.2512835621550488, "grad_norm": 7.067366600036621, "learning_rate": 1.8329337904327637e-05, "loss": 1.9311, "step": 39980 }, { "epoch": 0.25134641447174594, "grad_norm": 8.074548721313477, "learning_rate": 1.8328918803382984e-05, "loss": 1.8689, "step": 39990 }, { "epoch": 0.25140926678844305, "grad_norm": 8.37764835357666, "learning_rate": 1.832849970243833e-05, "loss": 1.9689, "step": 40000 }, { "epoch": 0.25147211910514017, "grad_norm": 7.969852924346924, "learning_rate": 1.8328080601493675e-05, "loss": 2.1185, "step": 40010 }, { "epoch": 0.25153497142183723, "grad_norm": 7.347843170166016, "learning_rate": 1.8327661500549022e-05, "loss": 1.6484, "step": 40020 }, { "epoch": 0.25159782373853434, "grad_norm": 7.6527814865112305, "learning_rate": 1.832724239960437e-05, "loss": 1.9171, "step": 40030 }, { "epoch": 0.25166067605523146, "grad_norm": 6.635735034942627, "learning_rate": 1.8326823298659716e-05, "loss": 1.6042, "step": 40040 }, { "epoch": 0.2517235283719286, "grad_norm": 5.760181427001953, "learning_rate": 1.8326404197715063e-05, "loss": 2.0133, "step": 40050 }, { "epoch": 0.2517863806886257, "grad_norm": 6.241506576538086, "learning_rate": 1.832598509677041e-05, "loss": 1.815, "step": 40060 }, { "epoch": 0.2518492330053228, "grad_norm": 6.897012710571289, "learning_rate": 1.8325565995825757e-05, "loss": 1.9044, "step": 40070 }, { "epoch": 0.2519120853220199, "grad_norm": 5.902112007141113, "learning_rate": 1.8325146894881104e-05, "loss": 1.7641, "step": 40080 }, { "epoch": 0.25197493763871703, "grad_norm": 7.367774486541748, "learning_rate": 1.832472779393645e-05, "loss": 1.9469, "step": 40090 }, { "epoch": 0.25203778995541415, "grad_norm": 6.922102928161621, "learning_rate": 1.8324308692991795e-05, "loss": 1.9259, "step": 40100 }, { "epoch": 0.25210064227211126, "grad_norm": 7.971136569976807, "learning_rate": 1.8323889592047142e-05, "loss": 1.8942, "step": 40110 }, { "epoch": 0.2521634945888084, "grad_norm": 6.794959545135498, "learning_rate": 1.832347049110249e-05, "loss": 2.0783, "step": 40120 }, { "epoch": 0.2522263469055055, "grad_norm": 7.142230033874512, "learning_rate": 1.8323051390157836e-05, "loss": 1.9941, "step": 40130 }, { "epoch": 0.25228919922220255, "grad_norm": 7.810637950897217, "learning_rate": 1.8322632289213183e-05, "loss": 1.9061, "step": 40140 }, { "epoch": 0.25235205153889967, "grad_norm": 8.566405296325684, "learning_rate": 1.8322213188268527e-05, "loss": 1.9135, "step": 40150 }, { "epoch": 0.2524149038555968, "grad_norm": 7.335825443267822, "learning_rate": 1.8321794087323874e-05, "loss": 1.9056, "step": 40160 }, { "epoch": 0.2524777561722939, "grad_norm": 6.504067420959473, "learning_rate": 1.832137498637922e-05, "loss": 1.8662, "step": 40170 }, { "epoch": 0.252540608488991, "grad_norm": 6.566962718963623, "learning_rate": 1.8320955885434568e-05, "loss": 2.0712, "step": 40180 }, { "epoch": 0.25260346080568813, "grad_norm": 7.839852809906006, "learning_rate": 1.8320536784489912e-05, "loss": 1.9855, "step": 40190 }, { "epoch": 0.25266631312238524, "grad_norm": 8.374872207641602, "learning_rate": 1.832011768354526e-05, "loss": 1.9443, "step": 40200 }, { "epoch": 0.25272916543908236, "grad_norm": 7.07660436630249, "learning_rate": 1.8319698582600606e-05, "loss": 1.8826, "step": 40210 }, { "epoch": 0.2527920177557795, "grad_norm": 7.191566467285156, "learning_rate": 1.8319279481655953e-05, "loss": 1.959, "step": 40220 }, { "epoch": 0.2528548700724766, "grad_norm": 7.686609268188477, "learning_rate": 1.83188603807113e-05, "loss": 1.9148, "step": 40230 }, { "epoch": 0.2529177223891737, "grad_norm": 6.395354747772217, "learning_rate": 1.8318441279766647e-05, "loss": 1.9321, "step": 40240 }, { "epoch": 0.2529805747058708, "grad_norm": 7.376534461975098, "learning_rate": 1.8318022178821994e-05, "loss": 2.1802, "step": 40250 }, { "epoch": 0.25304342702256793, "grad_norm": 7.409773826599121, "learning_rate": 1.8317603077877338e-05, "loss": 1.8117, "step": 40260 }, { "epoch": 0.253106279339265, "grad_norm": 7.840152740478516, "learning_rate": 1.8317183976932685e-05, "loss": 1.9052, "step": 40270 }, { "epoch": 0.2531691316559621, "grad_norm": 7.386919975280762, "learning_rate": 1.8316764875988032e-05, "loss": 1.9497, "step": 40280 }, { "epoch": 0.2532319839726592, "grad_norm": 6.20576286315918, "learning_rate": 1.831634577504338e-05, "loss": 1.8981, "step": 40290 }, { "epoch": 0.25329483628935634, "grad_norm": 7.299727916717529, "learning_rate": 1.8315926674098726e-05, "loss": 1.9856, "step": 40300 }, { "epoch": 0.25335768860605345, "grad_norm": 7.2291083335876465, "learning_rate": 1.8315507573154073e-05, "loss": 1.743, "step": 40310 }, { "epoch": 0.25342054092275057, "grad_norm": 8.496075630187988, "learning_rate": 1.8315088472209417e-05, "loss": 1.796, "step": 40320 }, { "epoch": 0.2534833932394477, "grad_norm": 6.407141208648682, "learning_rate": 1.8314669371264764e-05, "loss": 1.9525, "step": 40330 }, { "epoch": 0.2535462455561448, "grad_norm": 7.616662979125977, "learning_rate": 1.831425027032011e-05, "loss": 1.9649, "step": 40340 }, { "epoch": 0.2536090978728419, "grad_norm": 6.378571510314941, "learning_rate": 1.8313831169375458e-05, "loss": 1.9117, "step": 40350 }, { "epoch": 0.25367195018953903, "grad_norm": 7.712974548339844, "learning_rate": 1.8313412068430805e-05, "loss": 1.7425, "step": 40360 }, { "epoch": 0.25373480250623615, "grad_norm": 7.338319301605225, "learning_rate": 1.831299296748615e-05, "loss": 1.8324, "step": 40370 }, { "epoch": 0.25379765482293326, "grad_norm": 6.323298454284668, "learning_rate": 1.8312573866541496e-05, "loss": 1.9462, "step": 40380 }, { "epoch": 0.2538605071396304, "grad_norm": 7.694051265716553, "learning_rate": 1.8312154765596843e-05, "loss": 1.9082, "step": 40390 }, { "epoch": 0.25392335945632744, "grad_norm": 8.786967277526855, "learning_rate": 1.831173566465219e-05, "loss": 2.0764, "step": 40400 }, { "epoch": 0.25398621177302455, "grad_norm": 7.639488220214844, "learning_rate": 1.8311316563707534e-05, "loss": 1.962, "step": 40410 }, { "epoch": 0.25404906408972167, "grad_norm": 8.818544387817383, "learning_rate": 1.831089746276288e-05, "loss": 1.8749, "step": 40420 }, { "epoch": 0.2541119164064188, "grad_norm": 7.728259086608887, "learning_rate": 1.8310478361818228e-05, "loss": 1.7827, "step": 40430 }, { "epoch": 0.2541747687231159, "grad_norm": 6.672224998474121, "learning_rate": 1.8310059260873575e-05, "loss": 1.8389, "step": 40440 }, { "epoch": 0.254237621039813, "grad_norm": 6.4283342361450195, "learning_rate": 1.8309640159928922e-05, "loss": 1.8576, "step": 40450 }, { "epoch": 0.2543004733565101, "grad_norm": 7.002668857574463, "learning_rate": 1.830922105898427e-05, "loss": 1.9462, "step": 40460 }, { "epoch": 0.25436332567320724, "grad_norm": 7.611618995666504, "learning_rate": 1.8308801958039616e-05, "loss": 2.0004, "step": 40470 }, { "epoch": 0.25442617798990436, "grad_norm": 7.9169745445251465, "learning_rate": 1.8308382857094963e-05, "loss": 1.8753, "step": 40480 }, { "epoch": 0.25448903030660147, "grad_norm": 7.655786037445068, "learning_rate": 1.830796375615031e-05, "loss": 1.929, "step": 40490 }, { "epoch": 0.2545518826232986, "grad_norm": 7.357483386993408, "learning_rate": 1.8307544655205654e-05, "loss": 1.903, "step": 40500 }, { "epoch": 0.2546147349399957, "grad_norm": 9.201723098754883, "learning_rate": 1.8307125554261e-05, "loss": 1.9587, "step": 40510 }, { "epoch": 0.2546775872566928, "grad_norm": 6.650157451629639, "learning_rate": 1.8306706453316348e-05, "loss": 1.7194, "step": 40520 }, { "epoch": 0.2547404395733899, "grad_norm": 6.331323623657227, "learning_rate": 1.8306287352371695e-05, "loss": 2.1537, "step": 40530 }, { "epoch": 0.254803291890087, "grad_norm": 7.6211724281311035, "learning_rate": 1.830586825142704e-05, "loss": 1.8748, "step": 40540 }, { "epoch": 0.2548661442067841, "grad_norm": 7.957117080688477, "learning_rate": 1.8305449150482386e-05, "loss": 1.9157, "step": 40550 }, { "epoch": 0.2549289965234812, "grad_norm": 6.765776634216309, "learning_rate": 1.8305030049537733e-05, "loss": 1.7877, "step": 40560 }, { "epoch": 0.25499184884017834, "grad_norm": 7.246833324432373, "learning_rate": 1.830461094859308e-05, "loss": 1.7688, "step": 40570 }, { "epoch": 0.25505470115687545, "grad_norm": 6.436697959899902, "learning_rate": 1.8304191847648427e-05, "loss": 2.1203, "step": 40580 }, { "epoch": 0.25511755347357257, "grad_norm": 7.299831867218018, "learning_rate": 1.830377274670377e-05, "loss": 1.8017, "step": 40590 }, { "epoch": 0.2551804057902697, "grad_norm": 7.9287004470825195, "learning_rate": 1.8303353645759118e-05, "loss": 1.8727, "step": 40600 }, { "epoch": 0.2552432581069668, "grad_norm": 6.686092853546143, "learning_rate": 1.8302934544814465e-05, "loss": 1.8771, "step": 40610 }, { "epoch": 0.2553061104236639, "grad_norm": 7.040564060211182, "learning_rate": 1.8302515443869812e-05, "loss": 1.9667, "step": 40620 }, { "epoch": 0.255368962740361, "grad_norm": 8.112922668457031, "learning_rate": 1.8302096342925156e-05, "loss": 1.8328, "step": 40630 }, { "epoch": 0.25543181505705814, "grad_norm": 7.869579792022705, "learning_rate": 1.8301677241980503e-05, "loss": 1.7613, "step": 40640 }, { "epoch": 0.2554946673737552, "grad_norm": 7.157601356506348, "learning_rate": 1.830125814103585e-05, "loss": 1.9874, "step": 40650 }, { "epoch": 0.2555575196904523, "grad_norm": 6.303585052490234, "learning_rate": 1.8300839040091197e-05, "loss": 1.8133, "step": 40660 }, { "epoch": 0.25562037200714943, "grad_norm": 6.151522636413574, "learning_rate": 1.8300419939146544e-05, "loss": 1.8912, "step": 40670 }, { "epoch": 0.25568322432384655, "grad_norm": 7.436097621917725, "learning_rate": 1.830000083820189e-05, "loss": 1.8466, "step": 40680 }, { "epoch": 0.25574607664054366, "grad_norm": 7.4817681312561035, "learning_rate": 1.8299581737257238e-05, "loss": 2.0094, "step": 40690 }, { "epoch": 0.2558089289572408, "grad_norm": 7.078312397003174, "learning_rate": 1.8299162636312585e-05, "loss": 2.038, "step": 40700 }, { "epoch": 0.2558717812739379, "grad_norm": 6.778133869171143, "learning_rate": 1.8298743535367932e-05, "loss": 2.0069, "step": 40710 }, { "epoch": 0.255934633590635, "grad_norm": 8.176437377929688, "learning_rate": 1.8298324434423276e-05, "loss": 1.9749, "step": 40720 }, { "epoch": 0.2559974859073321, "grad_norm": 7.017731189727783, "learning_rate": 1.8297905333478623e-05, "loss": 1.7378, "step": 40730 }, { "epoch": 0.25606033822402924, "grad_norm": 6.80908203125, "learning_rate": 1.829748623253397e-05, "loss": 1.9403, "step": 40740 }, { "epoch": 0.25612319054072635, "grad_norm": 7.145242214202881, "learning_rate": 1.8297067131589317e-05, "loss": 1.7958, "step": 40750 }, { "epoch": 0.25618604285742347, "grad_norm": 7.3790693283081055, "learning_rate": 1.8296648030644664e-05, "loss": 1.87, "step": 40760 }, { "epoch": 0.2562488951741206, "grad_norm": 8.010051727294922, "learning_rate": 1.8296228929700008e-05, "loss": 1.6271, "step": 40770 }, { "epoch": 0.25631174749081764, "grad_norm": 7.379945278167725, "learning_rate": 1.8295809828755355e-05, "loss": 1.8067, "step": 40780 }, { "epoch": 0.25637459980751476, "grad_norm": 7.175872802734375, "learning_rate": 1.8295390727810702e-05, "loss": 1.8745, "step": 40790 }, { "epoch": 0.2564374521242119, "grad_norm": 7.595414638519287, "learning_rate": 1.829497162686605e-05, "loss": 1.9023, "step": 40800 }, { "epoch": 0.256500304440909, "grad_norm": 7.016720294952393, "learning_rate": 1.8294552525921393e-05, "loss": 2.0608, "step": 40810 }, { "epoch": 0.2565631567576061, "grad_norm": 6.0823588371276855, "learning_rate": 1.829413342497674e-05, "loss": 1.9367, "step": 40820 }, { "epoch": 0.2566260090743032, "grad_norm": 6.850696563720703, "learning_rate": 1.8293714324032087e-05, "loss": 1.9112, "step": 40830 }, { "epoch": 0.25668886139100033, "grad_norm": 8.202967643737793, "learning_rate": 1.8293295223087434e-05, "loss": 1.6909, "step": 40840 }, { "epoch": 0.25675171370769745, "grad_norm": 6.884469509124756, "learning_rate": 1.829287612214278e-05, "loss": 1.7071, "step": 40850 }, { "epoch": 0.25681456602439456, "grad_norm": 6.499464511871338, "learning_rate": 1.8292457021198128e-05, "loss": 1.6713, "step": 40860 }, { "epoch": 0.2568774183410917, "grad_norm": 6.973784446716309, "learning_rate": 1.8292037920253475e-05, "loss": 2.105, "step": 40870 }, { "epoch": 0.2569402706577888, "grad_norm": 6.484634876251221, "learning_rate": 1.829161881930882e-05, "loss": 1.9186, "step": 40880 }, { "epoch": 0.2570031229744859, "grad_norm": 7.263411521911621, "learning_rate": 1.8291199718364166e-05, "loss": 1.9716, "step": 40890 }, { "epoch": 0.257065975291183, "grad_norm": 6.684298515319824, "learning_rate": 1.8290780617419513e-05, "loss": 1.7181, "step": 40900 }, { "epoch": 0.2571288276078801, "grad_norm": 6.107182502746582, "learning_rate": 1.829036151647486e-05, "loss": 1.7592, "step": 40910 }, { "epoch": 0.2571916799245772, "grad_norm": 7.427902698516846, "learning_rate": 1.8289942415530207e-05, "loss": 1.9022, "step": 40920 }, { "epoch": 0.2572545322412743, "grad_norm": 7.072940826416016, "learning_rate": 1.8289523314585554e-05, "loss": 1.7885, "step": 40930 }, { "epoch": 0.25731738455797143, "grad_norm": 8.010061264038086, "learning_rate": 1.8289104213640898e-05, "loss": 1.7651, "step": 40940 }, { "epoch": 0.25738023687466854, "grad_norm": 6.3123393058776855, "learning_rate": 1.8288685112696245e-05, "loss": 2.0995, "step": 40950 }, { "epoch": 0.25744308919136566, "grad_norm": 6.515552520751953, "learning_rate": 1.8288266011751592e-05, "loss": 1.7928, "step": 40960 }, { "epoch": 0.2575059415080628, "grad_norm": 7.143669128417969, "learning_rate": 1.828784691080694e-05, "loss": 1.8767, "step": 40970 }, { "epoch": 0.2575687938247599, "grad_norm": 7.594226360321045, "learning_rate": 1.8287427809862286e-05, "loss": 1.8088, "step": 40980 }, { "epoch": 0.257631646141457, "grad_norm": 8.058490753173828, "learning_rate": 1.828700870891763e-05, "loss": 2.0683, "step": 40990 }, { "epoch": 0.2576944984581541, "grad_norm": 9.095369338989258, "learning_rate": 1.8286589607972977e-05, "loss": 1.9984, "step": 41000 }, { "epoch": 0.25775735077485123, "grad_norm": 7.421639442443848, "learning_rate": 1.8286170507028324e-05, "loss": 1.7457, "step": 41010 }, { "epoch": 0.25782020309154835, "grad_norm": 6.662604331970215, "learning_rate": 1.828575140608367e-05, "loss": 2.006, "step": 41020 }, { "epoch": 0.25788305540824547, "grad_norm": 7.050024509429932, "learning_rate": 1.8285332305139015e-05, "loss": 1.8124, "step": 41030 }, { "epoch": 0.2579459077249425, "grad_norm": 7.553315162658691, "learning_rate": 1.8284913204194362e-05, "loss": 1.8247, "step": 41040 }, { "epoch": 0.25800876004163964, "grad_norm": 5.7506561279296875, "learning_rate": 1.828449410324971e-05, "loss": 1.8771, "step": 41050 }, { "epoch": 0.25807161235833675, "grad_norm": 7.674767971038818, "learning_rate": 1.8284075002305056e-05, "loss": 1.7781, "step": 41060 }, { "epoch": 0.25813446467503387, "grad_norm": 7.037848472595215, "learning_rate": 1.8283655901360403e-05, "loss": 1.7003, "step": 41070 }, { "epoch": 0.258197316991731, "grad_norm": 7.2399115562438965, "learning_rate": 1.828323680041575e-05, "loss": 1.9313, "step": 41080 }, { "epoch": 0.2582601693084281, "grad_norm": 7.564504146575928, "learning_rate": 1.8282817699471097e-05, "loss": 2.0094, "step": 41090 }, { "epoch": 0.2583230216251252, "grad_norm": 6.998504161834717, "learning_rate": 1.8282398598526444e-05, "loss": 1.8335, "step": 41100 }, { "epoch": 0.25838587394182233, "grad_norm": 7.1183762550354, "learning_rate": 1.828197949758179e-05, "loss": 1.7731, "step": 41110 }, { "epoch": 0.25844872625851945, "grad_norm": 7.143240451812744, "learning_rate": 1.8281560396637135e-05, "loss": 1.8062, "step": 41120 }, { "epoch": 0.25851157857521656, "grad_norm": 6.633826732635498, "learning_rate": 1.8281141295692482e-05, "loss": 1.9169, "step": 41130 }, { "epoch": 0.2585744308919137, "grad_norm": 6.880277156829834, "learning_rate": 1.828072219474783e-05, "loss": 1.8099, "step": 41140 }, { "epoch": 0.2586372832086108, "grad_norm": 5.943376541137695, "learning_rate": 1.8280303093803176e-05, "loss": 1.7188, "step": 41150 }, { "epoch": 0.25870013552530785, "grad_norm": 7.167764186859131, "learning_rate": 1.827988399285852e-05, "loss": 1.9132, "step": 41160 }, { "epoch": 0.25876298784200497, "grad_norm": 7.091798305511475, "learning_rate": 1.8279464891913867e-05, "loss": 1.7669, "step": 41170 }, { "epoch": 0.2588258401587021, "grad_norm": 7.426420211791992, "learning_rate": 1.8279045790969214e-05, "loss": 1.7289, "step": 41180 }, { "epoch": 0.2588886924753992, "grad_norm": 7.881027698516846, "learning_rate": 1.827862669002456e-05, "loss": 2.0903, "step": 41190 }, { "epoch": 0.2589515447920963, "grad_norm": 6.953278064727783, "learning_rate": 1.827820758907991e-05, "loss": 1.8588, "step": 41200 }, { "epoch": 0.2590143971087934, "grad_norm": 7.793636798858643, "learning_rate": 1.8277788488135252e-05, "loss": 2.1033, "step": 41210 }, { "epoch": 0.25907724942549054, "grad_norm": 8.12525463104248, "learning_rate": 1.82773693871906e-05, "loss": 2.0343, "step": 41220 }, { "epoch": 0.25914010174218766, "grad_norm": 6.306594371795654, "learning_rate": 1.8276950286245946e-05, "loss": 1.775, "step": 41230 }, { "epoch": 0.25920295405888477, "grad_norm": 8.239091873168945, "learning_rate": 1.8276531185301293e-05, "loss": 1.9468, "step": 41240 }, { "epoch": 0.2592658063755819, "grad_norm": 7.029281139373779, "learning_rate": 1.827611208435664e-05, "loss": 1.9031, "step": 41250 }, { "epoch": 0.259328658692279, "grad_norm": 7.506056785583496, "learning_rate": 1.8275692983411984e-05, "loss": 1.9074, "step": 41260 }, { "epoch": 0.2593915110089761, "grad_norm": 6.82318639755249, "learning_rate": 1.827527388246733e-05, "loss": 1.6273, "step": 41270 }, { "epoch": 0.25945436332567323, "grad_norm": 6.963465690612793, "learning_rate": 1.8274854781522678e-05, "loss": 1.6154, "step": 41280 }, { "epoch": 0.2595172156423703, "grad_norm": 7.320285320281982, "learning_rate": 1.8274435680578025e-05, "loss": 1.7584, "step": 41290 }, { "epoch": 0.2595800679590674, "grad_norm": 6.893568992614746, "learning_rate": 1.8274016579633372e-05, "loss": 1.8717, "step": 41300 }, { "epoch": 0.2596429202757645, "grad_norm": 7.00272274017334, "learning_rate": 1.827359747868872e-05, "loss": 1.7207, "step": 41310 }, { "epoch": 0.25970577259246164, "grad_norm": 6.852663993835449, "learning_rate": 1.8273178377744066e-05, "loss": 1.7521, "step": 41320 }, { "epoch": 0.25976862490915875, "grad_norm": 6.563899040222168, "learning_rate": 1.8272759276799413e-05, "loss": 1.7454, "step": 41330 }, { "epoch": 0.25983147722585587, "grad_norm": 7.41424036026001, "learning_rate": 1.8272340175854757e-05, "loss": 1.7128, "step": 41340 }, { "epoch": 0.259894329542553, "grad_norm": 6.487424373626709, "learning_rate": 1.8271921074910104e-05, "loss": 2.0147, "step": 41350 }, { "epoch": 0.2599571818592501, "grad_norm": 7.088781356811523, "learning_rate": 1.827150197396545e-05, "loss": 1.7212, "step": 41360 }, { "epoch": 0.2600200341759472, "grad_norm": 7.368875503540039, "learning_rate": 1.82710828730208e-05, "loss": 1.9403, "step": 41370 }, { "epoch": 0.2600828864926443, "grad_norm": 8.326485633850098, "learning_rate": 1.8270663772076145e-05, "loss": 1.9106, "step": 41380 }, { "epoch": 0.26014573880934144, "grad_norm": 9.53166389465332, "learning_rate": 1.827024467113149e-05, "loss": 1.987, "step": 41390 }, { "epoch": 0.26020859112603856, "grad_norm": 7.934520244598389, "learning_rate": 1.8269825570186836e-05, "loss": 1.9611, "step": 41400 }, { "epoch": 0.2602714434427357, "grad_norm": 7.475955009460449, "learning_rate": 1.8269406469242183e-05, "loss": 1.8361, "step": 41410 }, { "epoch": 0.26033429575943273, "grad_norm": 8.234063148498535, "learning_rate": 1.826898736829753e-05, "loss": 1.7954, "step": 41420 }, { "epoch": 0.26039714807612985, "grad_norm": 7.798406600952148, "learning_rate": 1.8268568267352874e-05, "loss": 1.8615, "step": 41430 }, { "epoch": 0.26046000039282696, "grad_norm": 6.932463645935059, "learning_rate": 1.826814916640822e-05, "loss": 1.9989, "step": 41440 }, { "epoch": 0.2605228527095241, "grad_norm": 8.069757461547852, "learning_rate": 1.8267730065463568e-05, "loss": 1.6508, "step": 41450 }, { "epoch": 0.2605857050262212, "grad_norm": 7.715116024017334, "learning_rate": 1.8267310964518915e-05, "loss": 1.86, "step": 41460 }, { "epoch": 0.2606485573429183, "grad_norm": 7.59549617767334, "learning_rate": 1.8266891863574262e-05, "loss": 2.0385, "step": 41470 }, { "epoch": 0.2607114096596154, "grad_norm": 6.884585380554199, "learning_rate": 1.826647276262961e-05, "loss": 1.7335, "step": 41480 }, { "epoch": 0.26077426197631254, "grad_norm": 7.218272686004639, "learning_rate": 1.8266053661684956e-05, "loss": 1.9569, "step": 41490 }, { "epoch": 0.26083711429300965, "grad_norm": 6.667884826660156, "learning_rate": 1.8265634560740303e-05, "loss": 1.7593, "step": 41500 }, { "epoch": 0.26089996660970677, "grad_norm": 6.3704304695129395, "learning_rate": 1.8265215459795647e-05, "loss": 1.8527, "step": 41510 }, { "epoch": 0.2609628189264039, "grad_norm": 7.603665828704834, "learning_rate": 1.8264796358850994e-05, "loss": 1.8057, "step": 41520 }, { "epoch": 0.261025671243101, "grad_norm": 7.23233699798584, "learning_rate": 1.826437725790634e-05, "loss": 1.669, "step": 41530 }, { "epoch": 0.2610885235597981, "grad_norm": 7.816350936889648, "learning_rate": 1.826395815696169e-05, "loss": 1.8749, "step": 41540 }, { "epoch": 0.2611513758764952, "grad_norm": 7.779391288757324, "learning_rate": 1.8263539056017035e-05, "loss": 2.0454, "step": 41550 }, { "epoch": 0.2612142281931923, "grad_norm": 6.121278762817383, "learning_rate": 1.826311995507238e-05, "loss": 2.085, "step": 41560 }, { "epoch": 0.2612770805098894, "grad_norm": 7.626347064971924, "learning_rate": 1.8262700854127726e-05, "loss": 1.8736, "step": 41570 }, { "epoch": 0.2613399328265865, "grad_norm": 6.436768054962158, "learning_rate": 1.8262281753183073e-05, "loss": 1.8548, "step": 41580 }, { "epoch": 0.26140278514328363, "grad_norm": 7.84519100189209, "learning_rate": 1.826186265223842e-05, "loss": 1.9526, "step": 41590 }, { "epoch": 0.26146563745998075, "grad_norm": 7.475573539733887, "learning_rate": 1.8261443551293767e-05, "loss": 1.9821, "step": 41600 }, { "epoch": 0.26152848977667786, "grad_norm": 6.5553178787231445, "learning_rate": 1.826102445034911e-05, "loss": 1.8634, "step": 41610 }, { "epoch": 0.261591342093375, "grad_norm": 7.0052008628845215, "learning_rate": 1.8260605349404458e-05, "loss": 1.7814, "step": 41620 }, { "epoch": 0.2616541944100721, "grad_norm": 6.52140998840332, "learning_rate": 1.8260186248459805e-05, "loss": 2.06, "step": 41630 }, { "epoch": 0.2617170467267692, "grad_norm": 8.644024848937988, "learning_rate": 1.8259767147515152e-05, "loss": 1.9339, "step": 41640 }, { "epoch": 0.2617798990434663, "grad_norm": 6.678708076477051, "learning_rate": 1.8259348046570496e-05, "loss": 2.0563, "step": 41650 }, { "epoch": 0.26184275136016344, "grad_norm": 7.915999412536621, "learning_rate": 1.8258928945625843e-05, "loss": 1.802, "step": 41660 }, { "epoch": 0.26190560367686055, "grad_norm": 6.686285018920898, "learning_rate": 1.825850984468119e-05, "loss": 1.8831, "step": 41670 }, { "epoch": 0.2619684559935576, "grad_norm": 7.126838684082031, "learning_rate": 1.8258090743736537e-05, "loss": 1.789, "step": 41680 }, { "epoch": 0.26203130831025473, "grad_norm": 7.053625106811523, "learning_rate": 1.8257671642791884e-05, "loss": 1.9077, "step": 41690 }, { "epoch": 0.26209416062695184, "grad_norm": 7.124743938446045, "learning_rate": 1.825725254184723e-05, "loss": 1.7721, "step": 41700 }, { "epoch": 0.26215701294364896, "grad_norm": 7.246686935424805, "learning_rate": 1.825683344090258e-05, "loss": 1.9152, "step": 41710 }, { "epoch": 0.2622198652603461, "grad_norm": 7.376430511474609, "learning_rate": 1.8256414339957925e-05, "loss": 1.7907, "step": 41720 }, { "epoch": 0.2622827175770432, "grad_norm": 7.931061267852783, "learning_rate": 1.8255995239013273e-05, "loss": 1.7704, "step": 41730 }, { "epoch": 0.2623455698937403, "grad_norm": 7.240325450897217, "learning_rate": 1.8255576138068616e-05, "loss": 1.7538, "step": 41740 }, { "epoch": 0.2624084222104374, "grad_norm": 7.850881099700928, "learning_rate": 1.8255157037123963e-05, "loss": 1.9528, "step": 41750 }, { "epoch": 0.26247127452713453, "grad_norm": 7.4015069007873535, "learning_rate": 1.825473793617931e-05, "loss": 1.7776, "step": 41760 }, { "epoch": 0.26253412684383165, "grad_norm": 7.406811714172363, "learning_rate": 1.8254318835234657e-05, "loss": 1.7798, "step": 41770 }, { "epoch": 0.26259697916052877, "grad_norm": 7.2380781173706055, "learning_rate": 1.825389973429e-05, "loss": 1.9065, "step": 41780 }, { "epoch": 0.2626598314772259, "grad_norm": 8.308854103088379, "learning_rate": 1.8253480633345348e-05, "loss": 1.9674, "step": 41790 }, { "epoch": 0.26272268379392294, "grad_norm": 7.051663875579834, "learning_rate": 1.8253061532400695e-05, "loss": 1.8546, "step": 41800 }, { "epoch": 0.26278553611062005, "grad_norm": 6.652577877044678, "learning_rate": 1.8252642431456042e-05, "loss": 1.8808, "step": 41810 }, { "epoch": 0.26284838842731717, "grad_norm": 7.104018688201904, "learning_rate": 1.825222333051139e-05, "loss": 1.9225, "step": 41820 }, { "epoch": 0.2629112407440143, "grad_norm": 7.6953325271606445, "learning_rate": 1.8251804229566733e-05, "loss": 1.7381, "step": 41830 }, { "epoch": 0.2629740930607114, "grad_norm": 7.816473007202148, "learning_rate": 1.825138512862208e-05, "loss": 1.7347, "step": 41840 }, { "epoch": 0.2630369453774085, "grad_norm": 7.108369827270508, "learning_rate": 1.8250966027677427e-05, "loss": 1.7597, "step": 41850 }, { "epoch": 0.26309979769410563, "grad_norm": 6.802537441253662, "learning_rate": 1.8250546926732774e-05, "loss": 1.7106, "step": 41860 }, { "epoch": 0.26316265001080275, "grad_norm": 7.982716083526611, "learning_rate": 1.825012782578812e-05, "loss": 1.7053, "step": 41870 }, { "epoch": 0.26322550232749986, "grad_norm": 6.618117809295654, "learning_rate": 1.824970872484347e-05, "loss": 1.9238, "step": 41880 }, { "epoch": 0.263288354644197, "grad_norm": 7.67053747177124, "learning_rate": 1.8249289623898812e-05, "loss": 1.9405, "step": 41890 }, { "epoch": 0.2633512069608941, "grad_norm": 8.179542541503906, "learning_rate": 1.824887052295416e-05, "loss": 2.0244, "step": 41900 }, { "epoch": 0.2634140592775912, "grad_norm": 7.588283538818359, "learning_rate": 1.8248451422009506e-05, "loss": 1.7046, "step": 41910 }, { "epoch": 0.2634769115942883, "grad_norm": 6.082485198974609, "learning_rate": 1.8248032321064853e-05, "loss": 1.8868, "step": 41920 }, { "epoch": 0.2635397639109854, "grad_norm": 9.015851974487305, "learning_rate": 1.82476132201202e-05, "loss": 1.8189, "step": 41930 }, { "epoch": 0.2636026162276825, "grad_norm": 7.327020168304443, "learning_rate": 1.8247194119175547e-05, "loss": 1.826, "step": 41940 }, { "epoch": 0.2636654685443796, "grad_norm": 8.161823272705078, "learning_rate": 1.8246775018230895e-05, "loss": 1.8783, "step": 41950 }, { "epoch": 0.2637283208610767, "grad_norm": 7.522366046905518, "learning_rate": 1.8246355917286238e-05, "loss": 1.9316, "step": 41960 }, { "epoch": 0.26379117317777384, "grad_norm": 7.7846856117248535, "learning_rate": 1.8245936816341585e-05, "loss": 2.0466, "step": 41970 }, { "epoch": 0.26385402549447096, "grad_norm": 7.288768768310547, "learning_rate": 1.8245517715396932e-05, "loss": 1.5244, "step": 41980 }, { "epoch": 0.26391687781116807, "grad_norm": 7.846412181854248, "learning_rate": 1.824509861445228e-05, "loss": 1.8989, "step": 41990 }, { "epoch": 0.2639797301278652, "grad_norm": 6.715677738189697, "learning_rate": 1.8244679513507627e-05, "loss": 1.9037, "step": 42000 }, { "epoch": 0.2640425824445623, "grad_norm": 7.158120155334473, "learning_rate": 1.824426041256297e-05, "loss": 1.9783, "step": 42010 }, { "epoch": 0.2641054347612594, "grad_norm": 6.922282695770264, "learning_rate": 1.8243841311618317e-05, "loss": 1.8341, "step": 42020 }, { "epoch": 0.26416828707795653, "grad_norm": 7.328244686126709, "learning_rate": 1.8243422210673664e-05, "loss": 2.0118, "step": 42030 }, { "epoch": 0.26423113939465365, "grad_norm": 7.900145530700684, "learning_rate": 1.824300310972901e-05, "loss": 1.8651, "step": 42040 }, { "epoch": 0.26429399171135076, "grad_norm": 9.116615295410156, "learning_rate": 1.8242584008784355e-05, "loss": 2.0223, "step": 42050 }, { "epoch": 0.2643568440280478, "grad_norm": 9.100872039794922, "learning_rate": 1.8242164907839702e-05, "loss": 2.0511, "step": 42060 }, { "epoch": 0.26441969634474494, "grad_norm": 6.846112251281738, "learning_rate": 1.824174580689505e-05, "loss": 1.9461, "step": 42070 }, { "epoch": 0.26448254866144205, "grad_norm": 7.331666946411133, "learning_rate": 1.8241326705950396e-05, "loss": 1.929, "step": 42080 }, { "epoch": 0.26454540097813917, "grad_norm": 7.769443988800049, "learning_rate": 1.8240907605005743e-05, "loss": 1.9024, "step": 42090 }, { "epoch": 0.2646082532948363, "grad_norm": 9.214117050170898, "learning_rate": 1.824048850406109e-05, "loss": 1.8484, "step": 42100 }, { "epoch": 0.2646711056115334, "grad_norm": 7.874613285064697, "learning_rate": 1.8240069403116438e-05, "loss": 2.0011, "step": 42110 }, { "epoch": 0.2647339579282305, "grad_norm": 6.289083003997803, "learning_rate": 1.8239650302171785e-05, "loss": 1.7715, "step": 42120 }, { "epoch": 0.2647968102449276, "grad_norm": 7.777981758117676, "learning_rate": 1.8239231201227128e-05, "loss": 2.0084, "step": 42130 }, { "epoch": 0.26485966256162474, "grad_norm": 6.87426233291626, "learning_rate": 1.8238812100282475e-05, "loss": 1.6479, "step": 42140 }, { "epoch": 0.26492251487832186, "grad_norm": 7.014932632446289, "learning_rate": 1.8238392999337822e-05, "loss": 1.784, "step": 42150 }, { "epoch": 0.264985367195019, "grad_norm": 7.573215484619141, "learning_rate": 1.823797389839317e-05, "loss": 1.8301, "step": 42160 }, { "epoch": 0.2650482195117161, "grad_norm": 5.516359806060791, "learning_rate": 1.8237554797448517e-05, "loss": 1.9987, "step": 42170 }, { "epoch": 0.2651110718284132, "grad_norm": 7.882081508636475, "learning_rate": 1.823713569650386e-05, "loss": 1.9692, "step": 42180 }, { "epoch": 0.26517392414511026, "grad_norm": 7.476166248321533, "learning_rate": 1.8236716595559207e-05, "loss": 1.8581, "step": 42190 }, { "epoch": 0.2652367764618074, "grad_norm": 6.487078666687012, "learning_rate": 1.8236297494614554e-05, "loss": 1.7073, "step": 42200 }, { "epoch": 0.2652996287785045, "grad_norm": 6.1671857833862305, "learning_rate": 1.82358783936699e-05, "loss": 1.8987, "step": 42210 }, { "epoch": 0.2653624810952016, "grad_norm": 6.757456302642822, "learning_rate": 1.823545929272525e-05, "loss": 1.9064, "step": 42220 }, { "epoch": 0.2654253334118987, "grad_norm": 6.9185261726379395, "learning_rate": 1.8235040191780592e-05, "loss": 1.8034, "step": 42230 }, { "epoch": 0.26548818572859584, "grad_norm": 8.581500053405762, "learning_rate": 1.823462109083594e-05, "loss": 1.939, "step": 42240 }, { "epoch": 0.26555103804529295, "grad_norm": 7.534511566162109, "learning_rate": 1.8234201989891286e-05, "loss": 1.9409, "step": 42250 }, { "epoch": 0.26561389036199007, "grad_norm": 6.689976215362549, "learning_rate": 1.8233782888946633e-05, "loss": 2.0029, "step": 42260 }, { "epoch": 0.2656767426786872, "grad_norm": 6.737766265869141, "learning_rate": 1.8233363788001977e-05, "loss": 1.9975, "step": 42270 }, { "epoch": 0.2657395949953843, "grad_norm": 7.648566722869873, "learning_rate": 1.8232944687057324e-05, "loss": 1.777, "step": 42280 }, { "epoch": 0.2658024473120814, "grad_norm": 6.44831657409668, "learning_rate": 1.823252558611267e-05, "loss": 1.9846, "step": 42290 }, { "epoch": 0.26586529962877853, "grad_norm": 6.997977256774902, "learning_rate": 1.823210648516802e-05, "loss": 1.8574, "step": 42300 }, { "epoch": 0.2659281519454756, "grad_norm": 7.669799327850342, "learning_rate": 1.8231687384223365e-05, "loss": 1.8257, "step": 42310 }, { "epoch": 0.2659910042621727, "grad_norm": 7.207052707672119, "learning_rate": 1.8231268283278712e-05, "loss": 1.7204, "step": 42320 }, { "epoch": 0.2660538565788698, "grad_norm": 7.405806541442871, "learning_rate": 1.823084918233406e-05, "loss": 1.9153, "step": 42330 }, { "epoch": 0.26611670889556693, "grad_norm": 7.449708938598633, "learning_rate": 1.8230430081389407e-05, "loss": 1.838, "step": 42340 }, { "epoch": 0.26617956121226405, "grad_norm": 6.299845218658447, "learning_rate": 1.8230010980444754e-05, "loss": 1.7433, "step": 42350 }, { "epoch": 0.26624241352896116, "grad_norm": 6.5375871658325195, "learning_rate": 1.8229591879500097e-05, "loss": 1.8614, "step": 42360 }, { "epoch": 0.2663052658456583, "grad_norm": 7.137462139129639, "learning_rate": 1.8229172778555444e-05, "loss": 2.0776, "step": 42370 }, { "epoch": 0.2663681181623554, "grad_norm": 7.907176971435547, "learning_rate": 1.822875367761079e-05, "loss": 1.9978, "step": 42380 }, { "epoch": 0.2664309704790525, "grad_norm": 7.982783794403076, "learning_rate": 1.822833457666614e-05, "loss": 1.8599, "step": 42390 }, { "epoch": 0.2664938227957496, "grad_norm": 7.208168029785156, "learning_rate": 1.8227915475721482e-05, "loss": 1.7397, "step": 42400 }, { "epoch": 0.26655667511244674, "grad_norm": 6.838236331939697, "learning_rate": 1.822749637477683e-05, "loss": 1.9251, "step": 42410 }, { "epoch": 0.26661952742914385, "grad_norm": 7.437549591064453, "learning_rate": 1.8227077273832176e-05, "loss": 1.7901, "step": 42420 }, { "epoch": 0.26668237974584097, "grad_norm": 6.826678276062012, "learning_rate": 1.8226658172887523e-05, "loss": 1.7771, "step": 42430 }, { "epoch": 0.26674523206253803, "grad_norm": 6.3887152671813965, "learning_rate": 1.822623907194287e-05, "loss": 1.8314, "step": 42440 }, { "epoch": 0.26680808437923514, "grad_norm": 7.1717071533203125, "learning_rate": 1.8225819970998214e-05, "loss": 1.877, "step": 42450 }, { "epoch": 0.26687093669593226, "grad_norm": 6.2922844886779785, "learning_rate": 1.822540087005356e-05, "loss": 1.7357, "step": 42460 }, { "epoch": 0.2669337890126294, "grad_norm": 6.810061931610107, "learning_rate": 1.822498176910891e-05, "loss": 1.7068, "step": 42470 }, { "epoch": 0.2669966413293265, "grad_norm": 7.887887954711914, "learning_rate": 1.8224562668164255e-05, "loss": 1.9572, "step": 42480 }, { "epoch": 0.2670594936460236, "grad_norm": 7.99580717086792, "learning_rate": 1.8224143567219602e-05, "loss": 1.7972, "step": 42490 }, { "epoch": 0.2671223459627207, "grad_norm": 6.92336893081665, "learning_rate": 1.822372446627495e-05, "loss": 1.7053, "step": 42500 }, { "epoch": 0.26718519827941783, "grad_norm": 7.77141809463501, "learning_rate": 1.8223305365330293e-05, "loss": 1.9202, "step": 42510 }, { "epoch": 0.26724805059611495, "grad_norm": 6.241732597351074, "learning_rate": 1.822288626438564e-05, "loss": 1.9697, "step": 42520 }, { "epoch": 0.26731090291281207, "grad_norm": 7.296006679534912, "learning_rate": 1.8222467163440987e-05, "loss": 1.8685, "step": 42530 }, { "epoch": 0.2673737552295092, "grad_norm": 16.223621368408203, "learning_rate": 1.8222048062496334e-05, "loss": 1.9736, "step": 42540 }, { "epoch": 0.2674366075462063, "grad_norm": 6.669566631317139, "learning_rate": 1.822162896155168e-05, "loss": 1.821, "step": 42550 }, { "epoch": 0.2674994598629034, "grad_norm": 7.176951885223389, "learning_rate": 1.822120986060703e-05, "loss": 1.5961, "step": 42560 }, { "epoch": 0.26756231217960047, "grad_norm": 6.5811920166015625, "learning_rate": 1.8220790759662376e-05, "loss": 1.7338, "step": 42570 }, { "epoch": 0.2676251644962976, "grad_norm": 7.482040882110596, "learning_rate": 1.822037165871772e-05, "loss": 1.877, "step": 42580 }, { "epoch": 0.2676880168129947, "grad_norm": 7.4308061599731445, "learning_rate": 1.8219952557773066e-05, "loss": 2.0624, "step": 42590 }, { "epoch": 0.2677508691296918, "grad_norm": 7.0572004318237305, "learning_rate": 1.8219533456828413e-05, "loss": 1.7521, "step": 42600 }, { "epoch": 0.26781372144638893, "grad_norm": 8.193439483642578, "learning_rate": 1.821911435588376e-05, "loss": 1.7971, "step": 42610 }, { "epoch": 0.26787657376308605, "grad_norm": 7.08612585067749, "learning_rate": 1.8218695254939108e-05, "loss": 1.8994, "step": 42620 }, { "epoch": 0.26793942607978316, "grad_norm": 6.6897969245910645, "learning_rate": 1.821827615399445e-05, "loss": 1.8881, "step": 42630 }, { "epoch": 0.2680022783964803, "grad_norm": 7.877755165100098, "learning_rate": 1.82178570530498e-05, "loss": 1.8548, "step": 42640 }, { "epoch": 0.2680651307131774, "grad_norm": 7.418746471405029, "learning_rate": 1.8217437952105145e-05, "loss": 1.8281, "step": 42650 }, { "epoch": 0.2681279830298745, "grad_norm": 7.265000343322754, "learning_rate": 1.8217018851160493e-05, "loss": 1.72, "step": 42660 }, { "epoch": 0.2681908353465716, "grad_norm": 7.055147171020508, "learning_rate": 1.8216599750215836e-05, "loss": 1.7341, "step": 42670 }, { "epoch": 0.26825368766326874, "grad_norm": 6.618005275726318, "learning_rate": 1.8216180649271183e-05, "loss": 2.2909, "step": 42680 }, { "epoch": 0.26831653997996585, "grad_norm": 6.586921691894531, "learning_rate": 1.821576154832653e-05, "loss": 2.0484, "step": 42690 }, { "epoch": 0.2683793922966629, "grad_norm": 7.860531806945801, "learning_rate": 1.8215342447381877e-05, "loss": 1.8451, "step": 42700 }, { "epoch": 0.26844224461336, "grad_norm": 8.915355682373047, "learning_rate": 1.8214923346437224e-05, "loss": 1.9683, "step": 42710 }, { "epoch": 0.26850509693005714, "grad_norm": 6.985775470733643, "learning_rate": 1.821450424549257e-05, "loss": 2.1368, "step": 42720 }, { "epoch": 0.26856794924675426, "grad_norm": 6.577744007110596, "learning_rate": 1.821408514454792e-05, "loss": 1.6015, "step": 42730 }, { "epoch": 0.26863080156345137, "grad_norm": 6.80482292175293, "learning_rate": 1.8213666043603266e-05, "loss": 1.7962, "step": 42740 }, { "epoch": 0.2686936538801485, "grad_norm": 6.925163269042969, "learning_rate": 1.8213246942658613e-05, "loss": 1.706, "step": 42750 }, { "epoch": 0.2687565061968456, "grad_norm": 7.358352184295654, "learning_rate": 1.8212827841713956e-05, "loss": 1.8237, "step": 42760 }, { "epoch": 0.2688193585135427, "grad_norm": 8.235050201416016, "learning_rate": 1.8212408740769304e-05, "loss": 1.6833, "step": 42770 }, { "epoch": 0.26888221083023983, "grad_norm": 7.137479305267334, "learning_rate": 1.821198963982465e-05, "loss": 1.6839, "step": 42780 }, { "epoch": 0.26894506314693695, "grad_norm": 8.005147933959961, "learning_rate": 1.8211570538879998e-05, "loss": 1.768, "step": 42790 }, { "epoch": 0.26900791546363406, "grad_norm": 7.089692115783691, "learning_rate": 1.821119334802981e-05, "loss": 1.9332, "step": 42800 }, { "epoch": 0.2690707677803312, "grad_norm": 6.873706817626953, "learning_rate": 1.8210774247085156e-05, "loss": 1.8431, "step": 42810 }, { "epoch": 0.26913362009702824, "grad_norm": 9.355104446411133, "learning_rate": 1.8210355146140503e-05, "loss": 1.8965, "step": 42820 }, { "epoch": 0.26919647241372535, "grad_norm": 6.228792667388916, "learning_rate": 1.8209936045195847e-05, "loss": 1.8587, "step": 42830 }, { "epoch": 0.26925932473042247, "grad_norm": 7.237106800079346, "learning_rate": 1.8209516944251194e-05, "loss": 1.9352, "step": 42840 }, { "epoch": 0.2693221770471196, "grad_norm": 6.2146453857421875, "learning_rate": 1.820909784330654e-05, "loss": 1.8494, "step": 42850 }, { "epoch": 0.2693850293638167, "grad_norm": 7.7184319496154785, "learning_rate": 1.8208678742361888e-05, "loss": 1.8093, "step": 42860 }, { "epoch": 0.2694478816805138, "grad_norm": 8.168176651000977, "learning_rate": 1.8208259641417235e-05, "loss": 1.8825, "step": 42870 }, { "epoch": 0.2695107339972109, "grad_norm": 6.3709797859191895, "learning_rate": 1.820784054047258e-05, "loss": 1.7919, "step": 42880 }, { "epoch": 0.26957358631390804, "grad_norm": 7.18712043762207, "learning_rate": 1.8207421439527926e-05, "loss": 1.7581, "step": 42890 }, { "epoch": 0.26963643863060516, "grad_norm": 7.938587188720703, "learning_rate": 1.8207002338583273e-05, "loss": 1.7975, "step": 42900 }, { "epoch": 0.2696992909473023, "grad_norm": 7.001043319702148, "learning_rate": 1.820658323763862e-05, "loss": 1.9066, "step": 42910 }, { "epoch": 0.2697621432639994, "grad_norm": 7.061951160430908, "learning_rate": 1.8206164136693964e-05, "loss": 1.7411, "step": 42920 }, { "epoch": 0.2698249955806965, "grad_norm": 7.237574577331543, "learning_rate": 1.820574503574931e-05, "loss": 1.9942, "step": 42930 }, { "epoch": 0.2698878478973936, "grad_norm": 7.545398712158203, "learning_rate": 1.8205325934804658e-05, "loss": 1.9839, "step": 42940 }, { "epoch": 0.2699507002140907, "grad_norm": 5.7841668128967285, "learning_rate": 1.8204906833860005e-05, "loss": 1.7837, "step": 42950 }, { "epoch": 0.2700135525307878, "grad_norm": 7.160608768463135, "learning_rate": 1.8204487732915352e-05, "loss": 1.9018, "step": 42960 }, { "epoch": 0.2700764048474849, "grad_norm": 6.914604187011719, "learning_rate": 1.8204068631970696e-05, "loss": 1.8397, "step": 42970 }, { "epoch": 0.270139257164182, "grad_norm": 7.4479827880859375, "learning_rate": 1.8203649531026043e-05, "loss": 1.8815, "step": 42980 }, { "epoch": 0.27020210948087914, "grad_norm": 6.855360984802246, "learning_rate": 1.820323043008139e-05, "loss": 1.8275, "step": 42990 }, { "epoch": 0.27026496179757625, "grad_norm": 7.653349876403809, "learning_rate": 1.8202811329136737e-05, "loss": 1.9389, "step": 43000 }, { "epoch": 0.27032781411427337, "grad_norm": 7.4689788818359375, "learning_rate": 1.8202392228192084e-05, "loss": 1.7573, "step": 43010 }, { "epoch": 0.2703906664309705, "grad_norm": 9.028587341308594, "learning_rate": 1.820197312724743e-05, "loss": 1.8279, "step": 43020 }, { "epoch": 0.2704535187476676, "grad_norm": 6.932178020477295, "learning_rate": 1.8201554026302778e-05, "loss": 1.8318, "step": 43030 }, { "epoch": 0.2705163710643647, "grad_norm": 6.070540428161621, "learning_rate": 1.8201134925358125e-05, "loss": 1.7882, "step": 43040 }, { "epoch": 0.27057922338106183, "grad_norm": 7.225831985473633, "learning_rate": 1.8200715824413472e-05, "loss": 1.839, "step": 43050 }, { "epoch": 0.27064207569775894, "grad_norm": 7.78971004486084, "learning_rate": 1.8200296723468816e-05, "loss": 1.7446, "step": 43060 }, { "epoch": 0.27070492801445606, "grad_norm": 7.131658554077148, "learning_rate": 1.8199877622524163e-05, "loss": 2.0806, "step": 43070 }, { "epoch": 0.2707677803311531, "grad_norm": 7.393949508666992, "learning_rate": 1.819945852157951e-05, "loss": 1.8194, "step": 43080 }, { "epoch": 0.27083063264785023, "grad_norm": 6.4248175621032715, "learning_rate": 1.8199039420634857e-05, "loss": 1.7011, "step": 43090 }, { "epoch": 0.27089348496454735, "grad_norm": 7.033029079437256, "learning_rate": 1.81986203196902e-05, "loss": 1.9835, "step": 43100 }, { "epoch": 0.27095633728124446, "grad_norm": 7.071962833404541, "learning_rate": 1.8198201218745548e-05, "loss": 1.7778, "step": 43110 }, { "epoch": 0.2710191895979416, "grad_norm": 6.914316177368164, "learning_rate": 1.8197782117800895e-05, "loss": 1.7871, "step": 43120 }, { "epoch": 0.2710820419146387, "grad_norm": 7.62619686126709, "learning_rate": 1.8197363016856242e-05, "loss": 1.7542, "step": 43130 }, { "epoch": 0.2711448942313358, "grad_norm": 7.535961151123047, "learning_rate": 1.819694391591159e-05, "loss": 1.9424, "step": 43140 }, { "epoch": 0.2712077465480329, "grad_norm": 7.0686845779418945, "learning_rate": 1.8196524814966933e-05, "loss": 2.1509, "step": 43150 }, { "epoch": 0.27127059886473004, "grad_norm": 5.649727821350098, "learning_rate": 1.819610571402228e-05, "loss": 1.9517, "step": 43160 }, { "epoch": 0.27133345118142715, "grad_norm": 7.16280460357666, "learning_rate": 1.8195686613077627e-05, "loss": 1.8772, "step": 43170 }, { "epoch": 0.27139630349812427, "grad_norm": 7.139915466308594, "learning_rate": 1.8195267512132974e-05, "loss": 2.0204, "step": 43180 }, { "epoch": 0.2714591558148214, "grad_norm": 6.4540863037109375, "learning_rate": 1.819484841118832e-05, "loss": 1.6998, "step": 43190 }, { "epoch": 0.2715220081315185, "grad_norm": 7.365581512451172, "learning_rate": 1.8194429310243665e-05, "loss": 2.0298, "step": 43200 }, { "epoch": 0.27158486044821556, "grad_norm": 7.0859246253967285, "learning_rate": 1.8194010209299012e-05, "loss": 1.9111, "step": 43210 }, { "epoch": 0.2716477127649127, "grad_norm": 6.624123573303223, "learning_rate": 1.819359110835436e-05, "loss": 2.1248, "step": 43220 }, { "epoch": 0.2717105650816098, "grad_norm": 8.226758003234863, "learning_rate": 1.8193172007409706e-05, "loss": 1.6659, "step": 43230 }, { "epoch": 0.2717734173983069, "grad_norm": 6.948604106903076, "learning_rate": 1.8192752906465053e-05, "loss": 1.9483, "step": 43240 }, { "epoch": 0.271836269715004, "grad_norm": 6.023930072784424, "learning_rate": 1.81923338055204e-05, "loss": 1.7512, "step": 43250 }, { "epoch": 0.27189912203170113, "grad_norm": 7.766272068023682, "learning_rate": 1.8191914704575747e-05, "loss": 1.9309, "step": 43260 }, { "epoch": 0.27196197434839825, "grad_norm": 6.777820110321045, "learning_rate": 1.8191495603631094e-05, "loss": 1.931, "step": 43270 }, { "epoch": 0.27202482666509537, "grad_norm": 8.068720817565918, "learning_rate": 1.8191076502686438e-05, "loss": 2.0577, "step": 43280 }, { "epoch": 0.2720876789817925, "grad_norm": 7.683823108673096, "learning_rate": 1.8190657401741785e-05, "loss": 1.9969, "step": 43290 }, { "epoch": 0.2721505312984896, "grad_norm": 6.8867363929748535, "learning_rate": 1.8190238300797132e-05, "loss": 1.9588, "step": 43300 }, { "epoch": 0.2722133836151867, "grad_norm": 7.831424236297607, "learning_rate": 1.818981919985248e-05, "loss": 1.7287, "step": 43310 }, { "epoch": 0.2722762359318838, "grad_norm": 6.731321334838867, "learning_rate": 1.8189400098907823e-05, "loss": 2.1138, "step": 43320 }, { "epoch": 0.2723390882485809, "grad_norm": 7.519513130187988, "learning_rate": 1.818898099796317e-05, "loss": 2.0652, "step": 43330 }, { "epoch": 0.272401940565278, "grad_norm": 7.941908359527588, "learning_rate": 1.8188561897018517e-05, "loss": 1.9205, "step": 43340 }, { "epoch": 0.2724647928819751, "grad_norm": 7.573431491851807, "learning_rate": 1.8188142796073864e-05, "loss": 1.8556, "step": 43350 }, { "epoch": 0.27252764519867223, "grad_norm": 8.56758975982666, "learning_rate": 1.818772369512921e-05, "loss": 1.9231, "step": 43360 }, { "epoch": 0.27259049751536935, "grad_norm": 6.786647796630859, "learning_rate": 1.8187304594184555e-05, "loss": 1.8466, "step": 43370 }, { "epoch": 0.27265334983206646, "grad_norm": 7.454209804534912, "learning_rate": 1.8186885493239902e-05, "loss": 1.8693, "step": 43380 }, { "epoch": 0.2727162021487636, "grad_norm": 8.307778358459473, "learning_rate": 1.818646639229525e-05, "loss": 1.6996, "step": 43390 }, { "epoch": 0.2727790544654607, "grad_norm": 7.2898712158203125, "learning_rate": 1.8186047291350596e-05, "loss": 1.7893, "step": 43400 }, { "epoch": 0.2728419067821578, "grad_norm": 7.7085981369018555, "learning_rate": 1.8185628190405943e-05, "loss": 2.0989, "step": 43410 }, { "epoch": 0.2729047590988549, "grad_norm": 7.155144691467285, "learning_rate": 1.818520908946129e-05, "loss": 1.7218, "step": 43420 }, { "epoch": 0.27296761141555204, "grad_norm": 6.132807731628418, "learning_rate": 1.8184789988516637e-05, "loss": 1.8792, "step": 43430 }, { "epoch": 0.27303046373224915, "grad_norm": 7.1564836502075195, "learning_rate": 1.8184370887571984e-05, "loss": 2.0505, "step": 43440 }, { "epoch": 0.27309331604894627, "grad_norm": 5.861886978149414, "learning_rate": 1.8183951786627328e-05, "loss": 1.8952, "step": 43450 }, { "epoch": 0.2731561683656433, "grad_norm": 7.8853278160095215, "learning_rate": 1.8183532685682675e-05, "loss": 1.7529, "step": 43460 }, { "epoch": 0.27321902068234044, "grad_norm": 7.434305191040039, "learning_rate": 1.8183113584738022e-05, "loss": 1.7616, "step": 43470 }, { "epoch": 0.27328187299903756, "grad_norm": 7.336535453796387, "learning_rate": 1.818269448379337e-05, "loss": 2.0926, "step": 43480 }, { "epoch": 0.27334472531573467, "grad_norm": 7.187647342681885, "learning_rate": 1.8182275382848716e-05, "loss": 1.8862, "step": 43490 }, { "epoch": 0.2734075776324318, "grad_norm": 7.7725934982299805, "learning_rate": 1.818185628190406e-05, "loss": 1.8336, "step": 43500 }, { "epoch": 0.2734704299491289, "grad_norm": 7.245877265930176, "learning_rate": 1.8181437180959407e-05, "loss": 1.9086, "step": 43510 }, { "epoch": 0.273533282265826, "grad_norm": 5.464651107788086, "learning_rate": 1.8181018080014754e-05, "loss": 1.8325, "step": 43520 }, { "epoch": 0.27359613458252313, "grad_norm": 6.858070373535156, "learning_rate": 1.81805989790701e-05, "loss": 1.9098, "step": 43530 }, { "epoch": 0.27365898689922025, "grad_norm": 8.30384349822998, "learning_rate": 1.8180179878125445e-05, "loss": 1.8923, "step": 43540 }, { "epoch": 0.27372183921591736, "grad_norm": 8.637042045593262, "learning_rate": 1.8179760777180792e-05, "loss": 1.9143, "step": 43550 }, { "epoch": 0.2737846915326145, "grad_norm": 7.063789367675781, "learning_rate": 1.817934167623614e-05, "loss": 1.7999, "step": 43560 }, { "epoch": 0.2738475438493116, "grad_norm": 6.5637030601501465, "learning_rate": 1.8178922575291486e-05, "loss": 1.9424, "step": 43570 }, { "epoch": 0.2739103961660087, "grad_norm": 6.852513790130615, "learning_rate": 1.8178503474346833e-05, "loss": 1.9062, "step": 43580 }, { "epoch": 0.27397324848270577, "grad_norm": 7.877373695373535, "learning_rate": 1.8178084373402177e-05, "loss": 1.8612, "step": 43590 }, { "epoch": 0.2740361007994029, "grad_norm": 7.566814422607422, "learning_rate": 1.8177665272457524e-05, "loss": 1.9674, "step": 43600 }, { "epoch": 0.2740989531161, "grad_norm": 6.332150936126709, "learning_rate": 1.817724617151287e-05, "loss": 1.6876, "step": 43610 }, { "epoch": 0.2741618054327971, "grad_norm": 6.841239929199219, "learning_rate": 1.8176827070568218e-05, "loss": 2.0219, "step": 43620 }, { "epoch": 0.2742246577494942, "grad_norm": 6.8319292068481445, "learning_rate": 1.8176407969623565e-05, "loss": 1.7973, "step": 43630 }, { "epoch": 0.27428751006619134, "grad_norm": 7.159534931182861, "learning_rate": 1.8175988868678912e-05, "loss": 1.77, "step": 43640 }, { "epoch": 0.27435036238288846, "grad_norm": 6.824354648590088, "learning_rate": 1.817556976773426e-05, "loss": 1.7697, "step": 43650 }, { "epoch": 0.2744132146995856, "grad_norm": 6.295945167541504, "learning_rate": 1.8175150666789606e-05, "loss": 1.8574, "step": 43660 }, { "epoch": 0.2744760670162827, "grad_norm": 7.479640483856201, "learning_rate": 1.8174731565844953e-05, "loss": 2.059, "step": 43670 }, { "epoch": 0.2745389193329798, "grad_norm": 6.682166576385498, "learning_rate": 1.8174312464900297e-05, "loss": 1.8981, "step": 43680 }, { "epoch": 0.2746017716496769, "grad_norm": 7.518378734588623, "learning_rate": 1.8173893363955644e-05, "loss": 1.8331, "step": 43690 }, { "epoch": 0.27466462396637403, "grad_norm": 6.496458053588867, "learning_rate": 1.817347426301099e-05, "loss": 2.0259, "step": 43700 }, { "epoch": 0.27472747628307115, "grad_norm": 7.358792304992676, "learning_rate": 1.8173055162066338e-05, "loss": 1.9976, "step": 43710 }, { "epoch": 0.2747903285997682, "grad_norm": 6.544776916503906, "learning_rate": 1.8172636061121682e-05, "loss": 1.7799, "step": 43720 }, { "epoch": 0.2748531809164653, "grad_norm": 7.420206069946289, "learning_rate": 1.817221696017703e-05, "loss": 1.9171, "step": 43730 }, { "epoch": 0.27491603323316244, "grad_norm": 6.45020866394043, "learning_rate": 1.8171797859232376e-05, "loss": 1.9072, "step": 43740 }, { "epoch": 0.27497888554985955, "grad_norm": 6.753374099731445, "learning_rate": 1.8171378758287723e-05, "loss": 1.9285, "step": 43750 }, { "epoch": 0.27504173786655667, "grad_norm": 8.145105361938477, "learning_rate": 1.817095965734307e-05, "loss": 1.8416, "step": 43760 }, { "epoch": 0.2751045901832538, "grad_norm": 5.977471351623535, "learning_rate": 1.8170540556398414e-05, "loss": 1.8596, "step": 43770 }, { "epoch": 0.2751674424999509, "grad_norm": 7.059874057769775, "learning_rate": 1.817012145545376e-05, "loss": 2.0057, "step": 43780 }, { "epoch": 0.275230294816648, "grad_norm": 7.4796013832092285, "learning_rate": 1.8169702354509108e-05, "loss": 1.8092, "step": 43790 }, { "epoch": 0.27529314713334513, "grad_norm": 6.353641033172607, "learning_rate": 1.8169283253564455e-05, "loss": 1.8401, "step": 43800 }, { "epoch": 0.27535599945004224, "grad_norm": 6.131389617919922, "learning_rate": 1.8168864152619802e-05, "loss": 1.8887, "step": 43810 }, { "epoch": 0.27541885176673936, "grad_norm": 6.0684943199157715, "learning_rate": 1.816844505167515e-05, "loss": 1.8536, "step": 43820 }, { "epoch": 0.2754817040834365, "grad_norm": 6.397604465484619, "learning_rate": 1.8168025950730493e-05, "loss": 1.8396, "step": 43830 }, { "epoch": 0.27554455640013353, "grad_norm": 7.1050262451171875, "learning_rate": 1.816760684978584e-05, "loss": 1.7696, "step": 43840 }, { "epoch": 0.27560740871683065, "grad_norm": 8.189983367919922, "learning_rate": 1.8167187748841187e-05, "loss": 1.7782, "step": 43850 }, { "epoch": 0.27567026103352776, "grad_norm": 6.490654468536377, "learning_rate": 1.8166768647896534e-05, "loss": 1.8053, "step": 43860 }, { "epoch": 0.2757331133502249, "grad_norm": 6.4062113761901855, "learning_rate": 1.816634954695188e-05, "loss": 1.5862, "step": 43870 }, { "epoch": 0.275795965666922, "grad_norm": 6.748137950897217, "learning_rate": 1.8165930446007228e-05, "loss": 2.0288, "step": 43880 }, { "epoch": 0.2758588179836191, "grad_norm": 7.861601829528809, "learning_rate": 1.8165511345062575e-05, "loss": 1.8009, "step": 43890 }, { "epoch": 0.2759216703003162, "grad_norm": 7.185576438903809, "learning_rate": 1.816509224411792e-05, "loss": 1.8527, "step": 43900 }, { "epoch": 0.27598452261701334, "grad_norm": 6.2016754150390625, "learning_rate": 1.8164673143173266e-05, "loss": 1.7815, "step": 43910 }, { "epoch": 0.27604737493371045, "grad_norm": 7.197715759277344, "learning_rate": 1.8164254042228613e-05, "loss": 1.9198, "step": 43920 }, { "epoch": 0.27611022725040757, "grad_norm": 6.321927547454834, "learning_rate": 1.816383494128396e-05, "loss": 1.6873, "step": 43930 }, { "epoch": 0.2761730795671047, "grad_norm": 8.296004295349121, "learning_rate": 1.8163415840339304e-05, "loss": 1.8916, "step": 43940 }, { "epoch": 0.2762359318838018, "grad_norm": 8.541367530822754, "learning_rate": 1.816299673939465e-05, "loss": 2.0147, "step": 43950 }, { "epoch": 0.2762987842004989, "grad_norm": 7.3273420333862305, "learning_rate": 1.8162577638449998e-05, "loss": 2.008, "step": 43960 }, { "epoch": 0.276361636517196, "grad_norm": 7.654440879821777, "learning_rate": 1.8162158537505345e-05, "loss": 1.7337, "step": 43970 }, { "epoch": 0.2764244888338931, "grad_norm": 7.776981353759766, "learning_rate": 1.8161739436560692e-05, "loss": 1.7561, "step": 43980 }, { "epoch": 0.2764873411505902, "grad_norm": 7.037495136260986, "learning_rate": 1.8161320335616036e-05, "loss": 1.8316, "step": 43990 }, { "epoch": 0.2765501934672873, "grad_norm": 7.595053672790527, "learning_rate": 1.8160901234671383e-05, "loss": 2.1776, "step": 44000 }, { "epoch": 0.27661304578398443, "grad_norm": 6.931766033172607, "learning_rate": 1.816048213372673e-05, "loss": 2.0454, "step": 44010 }, { "epoch": 0.27667589810068155, "grad_norm": 7.494742393493652, "learning_rate": 1.8160063032782077e-05, "loss": 1.9182, "step": 44020 }, { "epoch": 0.27673875041737866, "grad_norm": 8.088569641113281, "learning_rate": 1.8159643931837424e-05, "loss": 1.8282, "step": 44030 }, { "epoch": 0.2768016027340758, "grad_norm": 6.901574611663818, "learning_rate": 1.815922483089277e-05, "loss": 1.7486, "step": 44040 }, { "epoch": 0.2768644550507729, "grad_norm": 6.851449489593506, "learning_rate": 1.815880572994812e-05, "loss": 1.8935, "step": 44050 }, { "epoch": 0.27692730736747, "grad_norm": 8.495573997497559, "learning_rate": 1.8158386629003465e-05, "loss": 1.7771, "step": 44060 }, { "epoch": 0.2769901596841671, "grad_norm": 7.091639995574951, "learning_rate": 1.8157967528058812e-05, "loss": 1.83, "step": 44070 }, { "epoch": 0.27705301200086424, "grad_norm": 6.349162578582764, "learning_rate": 1.8157548427114156e-05, "loss": 1.8799, "step": 44080 }, { "epoch": 0.27711586431756136, "grad_norm": 7.483797073364258, "learning_rate": 1.8157129326169503e-05, "loss": 1.8951, "step": 44090 }, { "epoch": 0.2771787166342584, "grad_norm": 6.60048770904541, "learning_rate": 1.815671022522485e-05, "loss": 1.6704, "step": 44100 }, { "epoch": 0.27724156895095553, "grad_norm": 6.710370063781738, "learning_rate": 1.8156291124280197e-05, "loss": 1.8409, "step": 44110 }, { "epoch": 0.27730442126765265, "grad_norm": 7.331045150756836, "learning_rate": 1.815587202333554e-05, "loss": 1.5637, "step": 44120 }, { "epoch": 0.27736727358434976, "grad_norm": 6.952859401702881, "learning_rate": 1.8155452922390888e-05, "loss": 1.7967, "step": 44130 }, { "epoch": 0.2774301259010469, "grad_norm": 6.21740198135376, "learning_rate": 1.8155033821446235e-05, "loss": 1.8562, "step": 44140 }, { "epoch": 0.277492978217744, "grad_norm": 6.948338508605957, "learning_rate": 1.8154614720501582e-05, "loss": 2.0678, "step": 44150 }, { "epoch": 0.2775558305344411, "grad_norm": 7.098877906799316, "learning_rate": 1.8154195619556926e-05, "loss": 1.8754, "step": 44160 }, { "epoch": 0.2776186828511382, "grad_norm": 7.387324333190918, "learning_rate": 1.8153776518612273e-05, "loss": 1.9309, "step": 44170 }, { "epoch": 0.27768153516783534, "grad_norm": 6.8969407081604, "learning_rate": 1.815335741766762e-05, "loss": 2.0453, "step": 44180 }, { "epoch": 0.27774438748453245, "grad_norm": 7.333196640014648, "learning_rate": 1.8152938316722967e-05, "loss": 2.0103, "step": 44190 }, { "epoch": 0.27780723980122957, "grad_norm": 7.968249797821045, "learning_rate": 1.8152519215778314e-05, "loss": 1.9121, "step": 44200 }, { "epoch": 0.2778700921179267, "grad_norm": 6.779103755950928, "learning_rate": 1.8152100114833658e-05, "loss": 1.8259, "step": 44210 }, { "epoch": 0.2779329444346238, "grad_norm": 6.785097599029541, "learning_rate": 1.8151681013889005e-05, "loss": 1.7175, "step": 44220 }, { "epoch": 0.27799579675132086, "grad_norm": 6.160770416259766, "learning_rate": 1.8151261912944352e-05, "loss": 2.0265, "step": 44230 }, { "epoch": 0.27805864906801797, "grad_norm": 7.9139204025268555, "learning_rate": 1.81508428119997e-05, "loss": 1.747, "step": 44240 }, { "epoch": 0.2781215013847151, "grad_norm": 6.520354270935059, "learning_rate": 1.8150423711055046e-05, "loss": 1.7669, "step": 44250 }, { "epoch": 0.2781843537014122, "grad_norm": 6.231197357177734, "learning_rate": 1.8150004610110393e-05, "loss": 1.8292, "step": 44260 }, { "epoch": 0.2782472060181093, "grad_norm": 7.701934337615967, "learning_rate": 1.814958550916574e-05, "loss": 1.8001, "step": 44270 }, { "epoch": 0.27831005833480643, "grad_norm": 6.897225379943848, "learning_rate": 1.8149166408221087e-05, "loss": 1.5311, "step": 44280 }, { "epoch": 0.27837291065150355, "grad_norm": 7.523116111755371, "learning_rate": 1.8148747307276434e-05, "loss": 1.895, "step": 44290 }, { "epoch": 0.27843576296820066, "grad_norm": 7.386162757873535, "learning_rate": 1.8148328206331778e-05, "loss": 1.9553, "step": 44300 }, { "epoch": 0.2784986152848978, "grad_norm": 8.141165733337402, "learning_rate": 1.8147909105387125e-05, "loss": 1.6888, "step": 44310 }, { "epoch": 0.2785614676015949, "grad_norm": 7.3685455322265625, "learning_rate": 1.8147490004442472e-05, "loss": 1.7884, "step": 44320 }, { "epoch": 0.278624319918292, "grad_norm": 7.618593692779541, "learning_rate": 1.814707090349782e-05, "loss": 1.7598, "step": 44330 }, { "epoch": 0.2786871722349891, "grad_norm": 7.6132330894470215, "learning_rate": 1.8146651802553163e-05, "loss": 1.5731, "step": 44340 }, { "epoch": 0.27875002455168624, "grad_norm": 6.275602340698242, "learning_rate": 1.814623270160851e-05, "loss": 1.9637, "step": 44350 }, { "epoch": 0.2788128768683833, "grad_norm": 6.493110656738281, "learning_rate": 1.8145813600663857e-05, "loss": 2.0089, "step": 44360 }, { "epoch": 0.2788757291850804, "grad_norm": 7.190394401550293, "learning_rate": 1.8145394499719204e-05, "loss": 1.7452, "step": 44370 }, { "epoch": 0.2789385815017775, "grad_norm": 8.51392650604248, "learning_rate": 1.814497539877455e-05, "loss": 1.906, "step": 44380 }, { "epoch": 0.27900143381847464, "grad_norm": 7.147787570953369, "learning_rate": 1.8144556297829895e-05, "loss": 1.9689, "step": 44390 }, { "epoch": 0.27906428613517176, "grad_norm": 7.204176425933838, "learning_rate": 1.8144137196885242e-05, "loss": 1.9244, "step": 44400 }, { "epoch": 0.2791271384518689, "grad_norm": 7.5683674812316895, "learning_rate": 1.814371809594059e-05, "loss": 1.8164, "step": 44410 }, { "epoch": 0.279189990768566, "grad_norm": 7.33584451675415, "learning_rate": 1.8143298994995936e-05, "loss": 1.6963, "step": 44420 }, { "epoch": 0.2792528430852631, "grad_norm": 7.349540710449219, "learning_rate": 1.8142879894051283e-05, "loss": 1.7436, "step": 44430 }, { "epoch": 0.2793156954019602, "grad_norm": 9.453506469726562, "learning_rate": 1.814246079310663e-05, "loss": 1.8077, "step": 44440 }, { "epoch": 0.27937854771865733, "grad_norm": 9.688944816589355, "learning_rate": 1.8142041692161977e-05, "loss": 1.7876, "step": 44450 }, { "epoch": 0.27944140003535445, "grad_norm": 6.370456218719482, "learning_rate": 1.814162259121732e-05, "loss": 1.8971, "step": 44460 }, { "epoch": 0.27950425235205156, "grad_norm": 6.250444412231445, "learning_rate": 1.8141203490272668e-05, "loss": 1.6054, "step": 44470 }, { "epoch": 0.2795671046687486, "grad_norm": 7.204066753387451, "learning_rate": 1.8140784389328015e-05, "loss": 1.797, "step": 44480 }, { "epoch": 0.27962995698544574, "grad_norm": 7.6014485359191895, "learning_rate": 1.8140365288383362e-05, "loss": 1.8645, "step": 44490 }, { "epoch": 0.27969280930214285, "grad_norm": 6.935083389282227, "learning_rate": 1.813994618743871e-05, "loss": 1.821, "step": 44500 }, { "epoch": 0.27975566161883997, "grad_norm": 7.669597625732422, "learning_rate": 1.8139527086494056e-05, "loss": 2.1411, "step": 44510 }, { "epoch": 0.2798185139355371, "grad_norm": 7.0571441650390625, "learning_rate": 1.81391079855494e-05, "loss": 1.8528, "step": 44520 }, { "epoch": 0.2798813662522342, "grad_norm": 7.463881969451904, "learning_rate": 1.8138688884604747e-05, "loss": 1.9098, "step": 44530 }, { "epoch": 0.2799442185689313, "grad_norm": 6.437138557434082, "learning_rate": 1.8138269783660094e-05, "loss": 1.8339, "step": 44540 }, { "epoch": 0.28000707088562843, "grad_norm": 8.589752197265625, "learning_rate": 1.813785068271544e-05, "loss": 1.7393, "step": 44550 }, { "epoch": 0.28006992320232554, "grad_norm": 7.563274383544922, "learning_rate": 1.8137431581770785e-05, "loss": 1.9598, "step": 44560 }, { "epoch": 0.28013277551902266, "grad_norm": 7.278768062591553, "learning_rate": 1.8137012480826132e-05, "loss": 1.9527, "step": 44570 }, { "epoch": 0.2801956278357198, "grad_norm": 7.549731254577637, "learning_rate": 1.813659337988148e-05, "loss": 1.8646, "step": 44580 }, { "epoch": 0.2802584801524169, "grad_norm": 7.279454231262207, "learning_rate": 1.8136174278936826e-05, "loss": 1.7997, "step": 44590 }, { "epoch": 0.280321332469114, "grad_norm": 6.979907035827637, "learning_rate": 1.8135755177992173e-05, "loss": 1.8811, "step": 44600 }, { "epoch": 0.28038418478581106, "grad_norm": 6.6481852531433105, "learning_rate": 1.8135336077047517e-05, "loss": 1.9376, "step": 44610 }, { "epoch": 0.2804470371025082, "grad_norm": 7.196473598480225, "learning_rate": 1.8134916976102864e-05, "loss": 1.7155, "step": 44620 }, { "epoch": 0.2805098894192053, "grad_norm": 6.652624607086182, "learning_rate": 1.813449787515821e-05, "loss": 1.8019, "step": 44630 }, { "epoch": 0.2805727417359024, "grad_norm": 7.184447765350342, "learning_rate": 1.8134078774213558e-05, "loss": 1.9028, "step": 44640 }, { "epoch": 0.2806355940525995, "grad_norm": 8.438286781311035, "learning_rate": 1.8133659673268905e-05, "loss": 1.9653, "step": 44650 }, { "epoch": 0.28069844636929664, "grad_norm": 6.87973690032959, "learning_rate": 1.8133240572324252e-05, "loss": 1.7853, "step": 44660 }, { "epoch": 0.28076129868599375, "grad_norm": 7.515631675720215, "learning_rate": 1.81328214713796e-05, "loss": 1.5751, "step": 44670 }, { "epoch": 0.28082415100269087, "grad_norm": 6.332245349884033, "learning_rate": 1.8132402370434946e-05, "loss": 1.7445, "step": 44680 }, { "epoch": 0.280887003319388, "grad_norm": 6.123690128326416, "learning_rate": 1.8131983269490294e-05, "loss": 1.8947, "step": 44690 }, { "epoch": 0.2809498556360851, "grad_norm": 7.331873893737793, "learning_rate": 1.8131564168545637e-05, "loss": 1.7275, "step": 44700 }, { "epoch": 0.2810127079527822, "grad_norm": 6.67120885848999, "learning_rate": 1.8131145067600984e-05, "loss": 1.8632, "step": 44710 }, { "epoch": 0.28107556026947933, "grad_norm": 8.140172004699707, "learning_rate": 1.813072596665633e-05, "loss": 2.0517, "step": 44720 }, { "epoch": 0.28113841258617644, "grad_norm": 7.026405334472656, "learning_rate": 1.813030686571168e-05, "loss": 1.7541, "step": 44730 }, { "epoch": 0.2812012649028735, "grad_norm": 9.236125946044922, "learning_rate": 1.8129887764767022e-05, "loss": 1.8394, "step": 44740 }, { "epoch": 0.2812641172195706, "grad_norm": 9.79562759399414, "learning_rate": 1.812946866382237e-05, "loss": 1.9962, "step": 44750 }, { "epoch": 0.28132696953626773, "grad_norm": 7.946891784667969, "learning_rate": 1.8129049562877716e-05, "loss": 1.9605, "step": 44760 }, { "epoch": 0.28138982185296485, "grad_norm": 7.475961208343506, "learning_rate": 1.8128630461933063e-05, "loss": 1.6945, "step": 44770 }, { "epoch": 0.28145267416966196, "grad_norm": 7.607438087463379, "learning_rate": 1.8128211360988407e-05, "loss": 1.8782, "step": 44780 }, { "epoch": 0.2815155264863591, "grad_norm": 7.399308681488037, "learning_rate": 1.8127792260043754e-05, "loss": 1.8027, "step": 44790 }, { "epoch": 0.2815783788030562, "grad_norm": 6.98337984085083, "learning_rate": 1.81273731590991e-05, "loss": 2.0415, "step": 44800 }, { "epoch": 0.2816412311197533, "grad_norm": 7.4911017417907715, "learning_rate": 1.8126954058154448e-05, "loss": 1.8791, "step": 44810 }, { "epoch": 0.2817040834364504, "grad_norm": 6.311008930206299, "learning_rate": 1.8126534957209795e-05, "loss": 1.7386, "step": 44820 }, { "epoch": 0.28176693575314754, "grad_norm": 6.257545471191406, "learning_rate": 1.8126115856265142e-05, "loss": 1.7411, "step": 44830 }, { "epoch": 0.28182978806984466, "grad_norm": 6.519497394561768, "learning_rate": 1.8125696755320486e-05, "loss": 1.5972, "step": 44840 }, { "epoch": 0.28189264038654177, "grad_norm": 6.8323540687561035, "learning_rate": 1.8125277654375833e-05, "loss": 1.7022, "step": 44850 }, { "epoch": 0.2819554927032389, "grad_norm": 7.942287445068359, "learning_rate": 1.812485855343118e-05, "loss": 2.1369, "step": 44860 }, { "epoch": 0.28201834501993595, "grad_norm": 7.868318557739258, "learning_rate": 1.8124439452486527e-05, "loss": 1.9454, "step": 44870 }, { "epoch": 0.28208119733663306, "grad_norm": 6.588150501251221, "learning_rate": 1.8124020351541874e-05, "loss": 1.7428, "step": 44880 }, { "epoch": 0.2821440496533302, "grad_norm": 7.526489734649658, "learning_rate": 1.812360125059722e-05, "loss": 1.8493, "step": 44890 }, { "epoch": 0.2822069019700273, "grad_norm": 7.636748790740967, "learning_rate": 1.812318214965257e-05, "loss": 1.7206, "step": 44900 }, { "epoch": 0.2822697542867244, "grad_norm": 6.8038177490234375, "learning_rate": 1.8122763048707916e-05, "loss": 1.5991, "step": 44910 }, { "epoch": 0.2823326066034215, "grad_norm": 6.8554863929748535, "learning_rate": 1.812234394776326e-05, "loss": 1.6602, "step": 44920 }, { "epoch": 0.28239545892011864, "grad_norm": 8.983702659606934, "learning_rate": 1.8121924846818606e-05, "loss": 1.8623, "step": 44930 }, { "epoch": 0.28245831123681575, "grad_norm": 6.636299133300781, "learning_rate": 1.8121505745873953e-05, "loss": 2.0028, "step": 44940 }, { "epoch": 0.28252116355351287, "grad_norm": 7.728862762451172, "learning_rate": 1.81210866449293e-05, "loss": 2.0382, "step": 44950 }, { "epoch": 0.28258401587021, "grad_norm": 7.6725311279296875, "learning_rate": 1.8120667543984644e-05, "loss": 2.1286, "step": 44960 }, { "epoch": 0.2826468681869071, "grad_norm": 7.758844375610352, "learning_rate": 1.812024844303999e-05, "loss": 1.8814, "step": 44970 }, { "epoch": 0.2827097205036042, "grad_norm": 6.9506988525390625, "learning_rate": 1.8119829342095338e-05, "loss": 1.7229, "step": 44980 }, { "epoch": 0.28277257282030127, "grad_norm": 6.1850385665893555, "learning_rate": 1.8119410241150685e-05, "loss": 1.8845, "step": 44990 }, { "epoch": 0.2828354251369984, "grad_norm": 6.863743305206299, "learning_rate": 1.8118991140206032e-05, "loss": 1.8941, "step": 45000 }, { "epoch": 0.2828982774536955, "grad_norm": 5.781214714050293, "learning_rate": 1.8118572039261376e-05, "loss": 2.0665, "step": 45010 }, { "epoch": 0.2829611297703926, "grad_norm": 8.170014381408691, "learning_rate": 1.8118152938316723e-05, "loss": 1.8578, "step": 45020 }, { "epoch": 0.28302398208708973, "grad_norm": 7.467271327972412, "learning_rate": 1.811773383737207e-05, "loss": 1.9366, "step": 45030 }, { "epoch": 0.28308683440378685, "grad_norm": 7.218717098236084, "learning_rate": 1.8117314736427417e-05, "loss": 1.8072, "step": 45040 }, { "epoch": 0.28314968672048396, "grad_norm": 6.2370147705078125, "learning_rate": 1.8116895635482764e-05, "loss": 1.9271, "step": 45050 }, { "epoch": 0.2832125390371811, "grad_norm": 7.850654125213623, "learning_rate": 1.811647653453811e-05, "loss": 1.8077, "step": 45060 }, { "epoch": 0.2832753913538782, "grad_norm": 6.582813739776611, "learning_rate": 1.811605743359346e-05, "loss": 1.766, "step": 45070 }, { "epoch": 0.2833382436705753, "grad_norm": 6.800104141235352, "learning_rate": 1.8115638332648802e-05, "loss": 1.9248, "step": 45080 }, { "epoch": 0.2834010959872724, "grad_norm": 8.2304105758667, "learning_rate": 1.811521923170415e-05, "loss": 1.7349, "step": 45090 }, { "epoch": 0.28346394830396954, "grad_norm": 6.5748138427734375, "learning_rate": 1.8114800130759496e-05, "loss": 2.0388, "step": 45100 }, { "epoch": 0.28352680062066665, "grad_norm": 7.620097637176514, "learning_rate": 1.8114381029814843e-05, "loss": 2.0377, "step": 45110 }, { "epoch": 0.2835896529373637, "grad_norm": 8.494855880737305, "learning_rate": 1.811396192887019e-05, "loss": 2.0819, "step": 45120 }, { "epoch": 0.2836525052540608, "grad_norm": 8.075994491577148, "learning_rate": 1.8113542827925538e-05, "loss": 1.9846, "step": 45130 }, { "epoch": 0.28371535757075794, "grad_norm": 7.5319695472717285, "learning_rate": 1.811312372698088e-05, "loss": 1.82, "step": 45140 }, { "epoch": 0.28377820988745506, "grad_norm": 6.592759132385254, "learning_rate": 1.811270462603623e-05, "loss": 2.0664, "step": 45150 }, { "epoch": 0.2838410622041522, "grad_norm": 6.713395118713379, "learning_rate": 1.8112285525091575e-05, "loss": 1.8459, "step": 45160 }, { "epoch": 0.2839039145208493, "grad_norm": 8.242391586303711, "learning_rate": 1.8111866424146922e-05, "loss": 1.7817, "step": 45170 }, { "epoch": 0.2839667668375464, "grad_norm": 7.351894855499268, "learning_rate": 1.8111447323202266e-05, "loss": 1.8684, "step": 45180 }, { "epoch": 0.2840296191542435, "grad_norm": 6.898585319519043, "learning_rate": 1.8111028222257613e-05, "loss": 1.8582, "step": 45190 }, { "epoch": 0.28409247147094063, "grad_norm": 7.325027942657471, "learning_rate": 1.811060912131296e-05, "loss": 1.8005, "step": 45200 }, { "epoch": 0.28415532378763775, "grad_norm": 7.066969871520996, "learning_rate": 1.8110190020368307e-05, "loss": 1.9176, "step": 45210 }, { "epoch": 0.28421817610433486, "grad_norm": 7.08568811416626, "learning_rate": 1.8109770919423654e-05, "loss": 1.847, "step": 45220 }, { "epoch": 0.284281028421032, "grad_norm": 7.803054332733154, "learning_rate": 1.8109351818478998e-05, "loss": 1.7753, "step": 45230 }, { "epoch": 0.2843438807377291, "grad_norm": 7.064817428588867, "learning_rate": 1.8108932717534345e-05, "loss": 1.9478, "step": 45240 }, { "epoch": 0.28440673305442615, "grad_norm": 8.686222076416016, "learning_rate": 1.8108513616589692e-05, "loss": 1.641, "step": 45250 }, { "epoch": 0.28446958537112327, "grad_norm": 5.989514350891113, "learning_rate": 1.810809451564504e-05, "loss": 1.8908, "step": 45260 }, { "epoch": 0.2845324376878204, "grad_norm": 7.37302303314209, "learning_rate": 1.8107675414700386e-05, "loss": 1.8321, "step": 45270 }, { "epoch": 0.2845952900045175, "grad_norm": 6.899227142333984, "learning_rate": 1.8107256313755733e-05, "loss": 1.9037, "step": 45280 }, { "epoch": 0.2846581423212146, "grad_norm": 12.614656448364258, "learning_rate": 1.810683721281108e-05, "loss": 1.8213, "step": 45290 }, { "epoch": 0.28472099463791173, "grad_norm": 7.848989486694336, "learning_rate": 1.8106418111866428e-05, "loss": 1.9224, "step": 45300 }, { "epoch": 0.28478384695460884, "grad_norm": 8.137557029724121, "learning_rate": 1.8105999010921775e-05, "loss": 2.0452, "step": 45310 }, { "epoch": 0.28484669927130596, "grad_norm": 6.750499725341797, "learning_rate": 1.810557990997712e-05, "loss": 2.0184, "step": 45320 }, { "epoch": 0.2849095515880031, "grad_norm": 7.370104789733887, "learning_rate": 1.8105160809032465e-05, "loss": 1.9131, "step": 45330 }, { "epoch": 0.2849724039047002, "grad_norm": 6.264123916625977, "learning_rate": 1.8104741708087812e-05, "loss": 1.7559, "step": 45340 }, { "epoch": 0.2850352562213973, "grad_norm": 7.859541893005371, "learning_rate": 1.810432260714316e-05, "loss": 1.8596, "step": 45350 }, { "epoch": 0.2850981085380944, "grad_norm": 6.466876983642578, "learning_rate": 1.8103903506198503e-05, "loss": 1.8305, "step": 45360 }, { "epoch": 0.28516096085479153, "grad_norm": 6.512380599975586, "learning_rate": 1.810348440525385e-05, "loss": 1.7773, "step": 45370 }, { "epoch": 0.2852238131714886, "grad_norm": 6.475058078765869, "learning_rate": 1.8103065304309197e-05, "loss": 1.8184, "step": 45380 }, { "epoch": 0.2852866654881857, "grad_norm": 6.929133892059326, "learning_rate": 1.8102646203364544e-05, "loss": 1.8313, "step": 45390 }, { "epoch": 0.2853495178048828, "grad_norm": 7.595178127288818, "learning_rate": 1.8102227102419888e-05, "loss": 2.0823, "step": 45400 }, { "epoch": 0.28541237012157994, "grad_norm": 6.889100551605225, "learning_rate": 1.8101808001475235e-05, "loss": 1.8938, "step": 45410 }, { "epoch": 0.28547522243827705, "grad_norm": 7.095263957977295, "learning_rate": 1.8101388900530582e-05, "loss": 1.829, "step": 45420 }, { "epoch": 0.28553807475497417, "grad_norm": 8.837443351745605, "learning_rate": 1.810096979958593e-05, "loss": 1.853, "step": 45430 }, { "epoch": 0.2856009270716713, "grad_norm": 6.931927680969238, "learning_rate": 1.8100550698641276e-05, "loss": 1.7004, "step": 45440 }, { "epoch": 0.2856637793883684, "grad_norm": 7.829457759857178, "learning_rate": 1.8100131597696623e-05, "loss": 1.8764, "step": 45450 }, { "epoch": 0.2857266317050655, "grad_norm": 8.360833168029785, "learning_rate": 1.8099712496751967e-05, "loss": 1.702, "step": 45460 }, { "epoch": 0.28578948402176263, "grad_norm": 7.660463809967041, "learning_rate": 1.8099293395807314e-05, "loss": 1.6702, "step": 45470 }, { "epoch": 0.28585233633845974, "grad_norm": 6.292911052703857, "learning_rate": 1.809887429486266e-05, "loss": 1.8652, "step": 45480 }, { "epoch": 0.28591518865515686, "grad_norm": 6.531392574310303, "learning_rate": 1.809845519391801e-05, "loss": 1.7984, "step": 45490 }, { "epoch": 0.2859780409718539, "grad_norm": 7.413627624511719, "learning_rate": 1.8098036092973355e-05, "loss": 1.882, "step": 45500 }, { "epoch": 0.28604089328855103, "grad_norm": 6.160747528076172, "learning_rate": 1.8097616992028703e-05, "loss": 1.7704, "step": 45510 }, { "epoch": 0.28610374560524815, "grad_norm": 7.964906215667725, "learning_rate": 1.809719789108405e-05, "loss": 1.8103, "step": 45520 }, { "epoch": 0.28616659792194526, "grad_norm": 6.141238212585449, "learning_rate": 1.8096778790139397e-05, "loss": 1.959, "step": 45530 }, { "epoch": 0.2862294502386424, "grad_norm": 6.768047332763672, "learning_rate": 1.809635968919474e-05, "loss": 2.0092, "step": 45540 }, { "epoch": 0.2862923025553395, "grad_norm": 7.242840766906738, "learning_rate": 1.8095940588250087e-05, "loss": 1.7468, "step": 45550 }, { "epoch": 0.2863551548720366, "grad_norm": 7.306134223937988, "learning_rate": 1.8095521487305434e-05, "loss": 1.8085, "step": 45560 }, { "epoch": 0.2864180071887337, "grad_norm": 7.484009265899658, "learning_rate": 1.809510238636078e-05, "loss": 1.896, "step": 45570 }, { "epoch": 0.28648085950543084, "grad_norm": 7.079681873321533, "learning_rate": 1.8094683285416125e-05, "loss": 1.8512, "step": 45580 }, { "epoch": 0.28654371182212796, "grad_norm": 7.087714672088623, "learning_rate": 1.8094264184471472e-05, "loss": 1.7813, "step": 45590 }, { "epoch": 0.28660656413882507, "grad_norm": 9.201361656188965, "learning_rate": 1.809384508352682e-05, "loss": 1.6572, "step": 45600 }, { "epoch": 0.2866694164555222, "grad_norm": 6.778953552246094, "learning_rate": 1.8093425982582166e-05, "loss": 1.8233, "step": 45610 }, { "epoch": 0.2867322687722193, "grad_norm": 8.023008346557617, "learning_rate": 1.8093006881637514e-05, "loss": 1.8874, "step": 45620 }, { "epoch": 0.28679512108891636, "grad_norm": 7.189453601837158, "learning_rate": 1.8092587780692857e-05, "loss": 1.7675, "step": 45630 }, { "epoch": 0.2868579734056135, "grad_norm": 7.227446556091309, "learning_rate": 1.8092168679748204e-05, "loss": 1.9177, "step": 45640 }, { "epoch": 0.2869208257223106, "grad_norm": 6.096076488494873, "learning_rate": 1.809174957880355e-05, "loss": 2.1245, "step": 45650 }, { "epoch": 0.2869836780390077, "grad_norm": 7.4338202476501465, "learning_rate": 1.80913304778589e-05, "loss": 1.6487, "step": 45660 }, { "epoch": 0.2870465303557048, "grad_norm": 7.482662677764893, "learning_rate": 1.8090911376914245e-05, "loss": 1.8797, "step": 45670 }, { "epoch": 0.28710938267240194, "grad_norm": 7.430908203125, "learning_rate": 1.8090492275969593e-05, "loss": 1.7771, "step": 45680 }, { "epoch": 0.28717223498909905, "grad_norm": 7.199875354766846, "learning_rate": 1.809007317502494e-05, "loss": 1.8885, "step": 45690 }, { "epoch": 0.28723508730579617, "grad_norm": 7.804337501525879, "learning_rate": 1.8089654074080287e-05, "loss": 1.6821, "step": 45700 }, { "epoch": 0.2872979396224933, "grad_norm": 6.368311405181885, "learning_rate": 1.808923497313563e-05, "loss": 1.6673, "step": 45710 }, { "epoch": 0.2873607919391904, "grad_norm": 5.977096080780029, "learning_rate": 1.8088815872190977e-05, "loss": 1.6639, "step": 45720 }, { "epoch": 0.2874236442558875, "grad_norm": 5.912306308746338, "learning_rate": 1.8088396771246325e-05, "loss": 1.8296, "step": 45730 }, { "epoch": 0.2874864965725846, "grad_norm": 6.9958577156066895, "learning_rate": 1.808797767030167e-05, "loss": 1.6494, "step": 45740 }, { "epoch": 0.28754934888928174, "grad_norm": 8.182312965393066, "learning_rate": 1.808755856935702e-05, "loss": 1.9476, "step": 45750 }, { "epoch": 0.2876122012059788, "grad_norm": 8.067004203796387, "learning_rate": 1.8087139468412362e-05, "loss": 1.6992, "step": 45760 }, { "epoch": 0.2876750535226759, "grad_norm": 7.400192737579346, "learning_rate": 1.808672036746771e-05, "loss": 1.7374, "step": 45770 }, { "epoch": 0.28773790583937303, "grad_norm": 7.941074848175049, "learning_rate": 1.8086301266523056e-05, "loss": 1.9707, "step": 45780 }, { "epoch": 0.28780075815607015, "grad_norm": 7.446524143218994, "learning_rate": 1.8085882165578404e-05, "loss": 1.749, "step": 45790 }, { "epoch": 0.28786361047276726, "grad_norm": 5.95335054397583, "learning_rate": 1.8085463064633747e-05, "loss": 1.6531, "step": 45800 }, { "epoch": 0.2879264627894644, "grad_norm": 7.67755126953125, "learning_rate": 1.8085043963689094e-05, "loss": 1.8494, "step": 45810 }, { "epoch": 0.2879893151061615, "grad_norm": 6.806369304656982, "learning_rate": 1.808462486274444e-05, "loss": 1.8981, "step": 45820 }, { "epoch": 0.2880521674228586, "grad_norm": 7.677136421203613, "learning_rate": 1.808420576179979e-05, "loss": 1.8251, "step": 45830 }, { "epoch": 0.2881150197395557, "grad_norm": 7.445577621459961, "learning_rate": 1.8083786660855136e-05, "loss": 1.9718, "step": 45840 }, { "epoch": 0.28817787205625284, "grad_norm": 7.094901084899902, "learning_rate": 1.808336755991048e-05, "loss": 1.713, "step": 45850 }, { "epoch": 0.28824072437294995, "grad_norm": 6.669374942779541, "learning_rate": 1.8082948458965826e-05, "loss": 1.8079, "step": 45860 }, { "epoch": 0.28830357668964707, "grad_norm": 8.922381401062012, "learning_rate": 1.8082529358021173e-05, "loss": 1.8787, "step": 45870 }, { "epoch": 0.2883664290063442, "grad_norm": 7.130077838897705, "learning_rate": 1.808211025707652e-05, "loss": 1.8555, "step": 45880 }, { "epoch": 0.28842928132304124, "grad_norm": 6.75282096862793, "learning_rate": 1.8081691156131867e-05, "loss": 1.9025, "step": 45890 }, { "epoch": 0.28849213363973836, "grad_norm": 7.469769477844238, "learning_rate": 1.8081272055187215e-05, "loss": 1.8409, "step": 45900 }, { "epoch": 0.2885549859564355, "grad_norm": 6.7698750495910645, "learning_rate": 1.808085295424256e-05, "loss": 2.028, "step": 45910 }, { "epoch": 0.2886178382731326, "grad_norm": 8.43509292602539, "learning_rate": 1.808043385329791e-05, "loss": 2.0557, "step": 45920 }, { "epoch": 0.2886806905898297, "grad_norm": 7.978787899017334, "learning_rate": 1.8080014752353256e-05, "loss": 1.8012, "step": 45930 }, { "epoch": 0.2887435429065268, "grad_norm": 7.802064418792725, "learning_rate": 1.80795956514086e-05, "loss": 1.6254, "step": 45940 }, { "epoch": 0.28880639522322393, "grad_norm": 8.451842308044434, "learning_rate": 1.8079176550463947e-05, "loss": 1.665, "step": 45950 }, { "epoch": 0.28886924753992105, "grad_norm": 7.110132694244385, "learning_rate": 1.8078757449519294e-05, "loss": 1.9293, "step": 45960 }, { "epoch": 0.28893209985661816, "grad_norm": 6.904272556304932, "learning_rate": 1.807833834857464e-05, "loss": 1.6345, "step": 45970 }, { "epoch": 0.2889949521733153, "grad_norm": 7.582462787628174, "learning_rate": 1.8077919247629984e-05, "loss": 1.8533, "step": 45980 }, { "epoch": 0.2890578044900124, "grad_norm": 7.024543285369873, "learning_rate": 1.807750014668533e-05, "loss": 1.6023, "step": 45990 }, { "epoch": 0.2891206568067095, "grad_norm": 6.810546398162842, "learning_rate": 1.807708104574068e-05, "loss": 1.9153, "step": 46000 }, { "epoch": 0.28918350912340657, "grad_norm": 6.868898868560791, "learning_rate": 1.8076661944796026e-05, "loss": 1.9369, "step": 46010 }, { "epoch": 0.2892463614401037, "grad_norm": 6.2243971824646, "learning_rate": 1.8076242843851373e-05, "loss": 1.8618, "step": 46020 }, { "epoch": 0.2893092137568008, "grad_norm": 7.958976745605469, "learning_rate": 1.8075823742906716e-05, "loss": 1.9746, "step": 46030 }, { "epoch": 0.2893720660734979, "grad_norm": 7.3935770988464355, "learning_rate": 1.8075404641962063e-05, "loss": 1.8323, "step": 46040 }, { "epoch": 0.28943491839019503, "grad_norm": 6.162091255187988, "learning_rate": 1.807498554101741e-05, "loss": 1.6404, "step": 46050 }, { "epoch": 0.28949777070689214, "grad_norm": 6.663823127746582, "learning_rate": 1.8074566440072758e-05, "loss": 1.7306, "step": 46060 }, { "epoch": 0.28956062302358926, "grad_norm": 7.021383285522461, "learning_rate": 1.8074147339128105e-05, "loss": 1.7698, "step": 46070 }, { "epoch": 0.2896234753402864, "grad_norm": 7.511137962341309, "learning_rate": 1.807372823818345e-05, "loss": 1.8128, "step": 46080 }, { "epoch": 0.2896863276569835, "grad_norm": 6.127781867980957, "learning_rate": 1.8073309137238795e-05, "loss": 1.7309, "step": 46090 }, { "epoch": 0.2897491799736806, "grad_norm": 7.155242443084717, "learning_rate": 1.8072890036294142e-05, "loss": 1.8202, "step": 46100 }, { "epoch": 0.2898120322903777, "grad_norm": 7.369287014007568, "learning_rate": 1.807247093534949e-05, "loss": 1.84, "step": 46110 }, { "epoch": 0.28987488460707483, "grad_norm": 6.920827865600586, "learning_rate": 1.8072051834404837e-05, "loss": 1.6678, "step": 46120 }, { "epoch": 0.28993773692377195, "grad_norm": 7.506908416748047, "learning_rate": 1.8071632733460184e-05, "loss": 1.7669, "step": 46130 }, { "epoch": 0.290000589240469, "grad_norm": 6.9853901863098145, "learning_rate": 1.807121363251553e-05, "loss": 1.5735, "step": 46140 }, { "epoch": 0.2900634415571661, "grad_norm": 7.828129291534424, "learning_rate": 1.8070794531570878e-05, "loss": 1.7866, "step": 46150 }, { "epoch": 0.29012629387386324, "grad_norm": 6.989505767822266, "learning_rate": 1.807037543062622e-05, "loss": 1.7681, "step": 46160 }, { "epoch": 0.29018914619056035, "grad_norm": 6.1492414474487305, "learning_rate": 1.806995632968157e-05, "loss": 1.8462, "step": 46170 }, { "epoch": 0.29025199850725747, "grad_norm": 7.770036220550537, "learning_rate": 1.8069537228736916e-05, "loss": 1.6597, "step": 46180 }, { "epoch": 0.2903148508239546, "grad_norm": 6.923418045043945, "learning_rate": 1.8069118127792263e-05, "loss": 1.8354, "step": 46190 }, { "epoch": 0.2903777031406517, "grad_norm": 6.774586200714111, "learning_rate": 1.8068699026847606e-05, "loss": 1.8253, "step": 46200 }, { "epoch": 0.2904405554573488, "grad_norm": 7.21401309967041, "learning_rate": 1.8068279925902953e-05, "loss": 1.799, "step": 46210 }, { "epoch": 0.29050340777404593, "grad_norm": 7.436776638031006, "learning_rate": 1.80678608249583e-05, "loss": 1.6846, "step": 46220 }, { "epoch": 0.29056626009074304, "grad_norm": 6.8276214599609375, "learning_rate": 1.8067441724013648e-05, "loss": 1.8868, "step": 46230 }, { "epoch": 0.29062911240744016, "grad_norm": 7.0181193351745605, "learning_rate": 1.8067022623068995e-05, "loss": 1.564, "step": 46240 }, { "epoch": 0.2906919647241373, "grad_norm": 6.839227676391602, "learning_rate": 1.806660352212434e-05, "loss": 1.8778, "step": 46250 }, { "epoch": 0.2907548170408344, "grad_norm": 8.030241012573242, "learning_rate": 1.8066184421179685e-05, "loss": 2.032, "step": 46260 }, { "epoch": 0.29081766935753145, "grad_norm": 8.75223159790039, "learning_rate": 1.8065765320235032e-05, "loss": 1.8368, "step": 46270 }, { "epoch": 0.29088052167422856, "grad_norm": 7.519930839538574, "learning_rate": 1.806534621929038e-05, "loss": 1.8853, "step": 46280 }, { "epoch": 0.2909433739909257, "grad_norm": 6.830507755279541, "learning_rate": 1.8064927118345727e-05, "loss": 1.7432, "step": 46290 }, { "epoch": 0.2910062263076228, "grad_norm": 7.217705726623535, "learning_rate": 1.8064508017401074e-05, "loss": 1.8971, "step": 46300 }, { "epoch": 0.2910690786243199, "grad_norm": 5.745712757110596, "learning_rate": 1.806408891645642e-05, "loss": 1.7438, "step": 46310 }, { "epoch": 0.291131930941017, "grad_norm": 7.343744277954102, "learning_rate": 1.8063669815511768e-05, "loss": 1.751, "step": 46320 }, { "epoch": 0.29119478325771414, "grad_norm": 6.936794757843018, "learning_rate": 1.8063250714567115e-05, "loss": 1.74, "step": 46330 }, { "epoch": 0.29125763557441126, "grad_norm": 6.6871657371521, "learning_rate": 1.806283161362246e-05, "loss": 1.8208, "step": 46340 }, { "epoch": 0.29132048789110837, "grad_norm": 7.089998245239258, "learning_rate": 1.8062412512677806e-05, "loss": 1.9357, "step": 46350 }, { "epoch": 0.2913833402078055, "grad_norm": 7.669445037841797, "learning_rate": 1.8061993411733153e-05, "loss": 1.9424, "step": 46360 }, { "epoch": 0.2914461925245026, "grad_norm": 7.0265631675720215, "learning_rate": 1.80615743107885e-05, "loss": 1.6816, "step": 46370 }, { "epoch": 0.2915090448411997, "grad_norm": 7.4309587478637695, "learning_rate": 1.8061155209843843e-05, "loss": 1.8171, "step": 46380 }, { "epoch": 0.29157189715789683, "grad_norm": 7.687270641326904, "learning_rate": 1.806073610889919e-05, "loss": 1.868, "step": 46390 }, { "epoch": 0.2916347494745939, "grad_norm": 7.764848709106445, "learning_rate": 1.8060317007954538e-05, "loss": 2.1017, "step": 46400 }, { "epoch": 0.291697601791291, "grad_norm": 6.4793500900268555, "learning_rate": 1.8059897907009885e-05, "loss": 1.8929, "step": 46410 }, { "epoch": 0.2917604541079881, "grad_norm": 6.396585941314697, "learning_rate": 1.805947880606523e-05, "loss": 1.841, "step": 46420 }, { "epoch": 0.29182330642468524, "grad_norm": 6.193444728851318, "learning_rate": 1.8059059705120575e-05, "loss": 2.0445, "step": 46430 }, { "epoch": 0.29188615874138235, "grad_norm": 6.106712341308594, "learning_rate": 1.8058640604175922e-05, "loss": 1.872, "step": 46440 }, { "epoch": 0.29194901105807947, "grad_norm": 8.70683765411377, "learning_rate": 1.805822150323127e-05, "loss": 1.6543, "step": 46450 }, { "epoch": 0.2920118633747766, "grad_norm": 6.549591064453125, "learning_rate": 1.8057802402286617e-05, "loss": 1.893, "step": 46460 }, { "epoch": 0.2920747156914737, "grad_norm": 6.526978492736816, "learning_rate": 1.805738330134196e-05, "loss": 1.9956, "step": 46470 }, { "epoch": 0.2921375680081708, "grad_norm": 6.979129791259766, "learning_rate": 1.8056964200397307e-05, "loss": 1.6427, "step": 46480 }, { "epoch": 0.2922004203248679, "grad_norm": 7.817368984222412, "learning_rate": 1.8056545099452654e-05, "loss": 1.7581, "step": 46490 }, { "epoch": 0.29226327264156504, "grad_norm": 7.753480911254883, "learning_rate": 1.8056125998508e-05, "loss": 2.1084, "step": 46500 }, { "epoch": 0.29232612495826216, "grad_norm": 6.885097980499268, "learning_rate": 1.805570689756335e-05, "loss": 2.0702, "step": 46510 }, { "epoch": 0.29238897727495927, "grad_norm": 6.970781326293945, "learning_rate": 1.8055287796618696e-05, "loss": 1.8628, "step": 46520 }, { "epoch": 0.29245182959165633, "grad_norm": 6.915135860443115, "learning_rate": 1.8054868695674043e-05, "loss": 1.698, "step": 46530 }, { "epoch": 0.29251468190835345, "grad_norm": 6.631259918212891, "learning_rate": 1.805444959472939e-05, "loss": 1.7241, "step": 46540 }, { "epoch": 0.29257753422505056, "grad_norm": 7.211366653442383, "learning_rate": 1.8054030493784737e-05, "loss": 1.7791, "step": 46550 }, { "epoch": 0.2926403865417477, "grad_norm": 6.618610858917236, "learning_rate": 1.805361139284008e-05, "loss": 1.8971, "step": 46560 }, { "epoch": 0.2927032388584448, "grad_norm": 7.058317184448242, "learning_rate": 1.8053192291895428e-05, "loss": 1.9995, "step": 46570 }, { "epoch": 0.2927660911751419, "grad_norm": 6.45781946182251, "learning_rate": 1.8052773190950775e-05, "loss": 1.7701, "step": 46580 }, { "epoch": 0.292828943491839, "grad_norm": 6.641473293304443, "learning_rate": 1.8052354090006122e-05, "loss": 1.5614, "step": 46590 }, { "epoch": 0.29289179580853614, "grad_norm": 7.312847137451172, "learning_rate": 1.8051934989061465e-05, "loss": 1.8116, "step": 46600 }, { "epoch": 0.29295464812523325, "grad_norm": 6.886174201965332, "learning_rate": 1.8051515888116813e-05, "loss": 1.7406, "step": 46610 }, { "epoch": 0.29301750044193037, "grad_norm": 6.168983459472656, "learning_rate": 1.805109678717216e-05, "loss": 1.694, "step": 46620 }, { "epoch": 0.2930803527586275, "grad_norm": 7.689616680145264, "learning_rate": 1.8050677686227507e-05, "loss": 1.813, "step": 46630 }, { "epoch": 0.2931432050753246, "grad_norm": 5.971601486206055, "learning_rate": 1.8050258585282854e-05, "loss": 1.8417, "step": 46640 }, { "epoch": 0.29320605739202166, "grad_norm": 7.689766883850098, "learning_rate": 1.8049839484338197e-05, "loss": 2.1305, "step": 46650 }, { "epoch": 0.2932689097087188, "grad_norm": 7.959043025970459, "learning_rate": 1.8049420383393544e-05, "loss": 1.8383, "step": 46660 }, { "epoch": 0.2933317620254159, "grad_norm": 6.360825061798096, "learning_rate": 1.804900128244889e-05, "loss": 1.6932, "step": 46670 }, { "epoch": 0.293394614342113, "grad_norm": 6.770597457885742, "learning_rate": 1.804858218150424e-05, "loss": 1.7634, "step": 46680 }, { "epoch": 0.2934574666588101, "grad_norm": 6.878189563751221, "learning_rate": 1.8048163080559586e-05, "loss": 1.6384, "step": 46690 }, { "epoch": 0.29352031897550723, "grad_norm": 7.4565510749816895, "learning_rate": 1.8047743979614933e-05, "loss": 1.9091, "step": 46700 }, { "epoch": 0.29358317129220435, "grad_norm": 7.244266986846924, "learning_rate": 1.804732487867028e-05, "loss": 1.8722, "step": 46710 }, { "epoch": 0.29364602360890146, "grad_norm": 6.477084159851074, "learning_rate": 1.8046905777725624e-05, "loss": 1.6486, "step": 46720 }, { "epoch": 0.2937088759255986, "grad_norm": 6.180047512054443, "learning_rate": 1.804648667678097e-05, "loss": 1.9805, "step": 46730 }, { "epoch": 0.2937717282422957, "grad_norm": 6.933464050292969, "learning_rate": 1.8046067575836318e-05, "loss": 1.8354, "step": 46740 }, { "epoch": 0.2938345805589928, "grad_norm": 7.443251132965088, "learning_rate": 1.8045648474891665e-05, "loss": 1.7307, "step": 46750 }, { "epoch": 0.2938974328756899, "grad_norm": 6.6274003982543945, "learning_rate": 1.8045229373947012e-05, "loss": 1.9998, "step": 46760 }, { "epoch": 0.29396028519238704, "grad_norm": 6.9516472816467285, "learning_rate": 1.804481027300236e-05, "loss": 1.6248, "step": 46770 }, { "epoch": 0.2940231375090841, "grad_norm": 6.207671165466309, "learning_rate": 1.8044391172057703e-05, "loss": 1.7965, "step": 46780 }, { "epoch": 0.2940859898257812, "grad_norm": 7.07285737991333, "learning_rate": 1.804397207111305e-05, "loss": 1.8847, "step": 46790 }, { "epoch": 0.29414884214247833, "grad_norm": 7.29025936126709, "learning_rate": 1.8043552970168397e-05, "loss": 1.9441, "step": 46800 }, { "epoch": 0.29421169445917544, "grad_norm": 6.398433685302734, "learning_rate": 1.8043133869223744e-05, "loss": 1.9725, "step": 46810 }, { "epoch": 0.29427454677587256, "grad_norm": 7.145827293395996, "learning_rate": 1.8042714768279087e-05, "loss": 1.7165, "step": 46820 }, { "epoch": 0.2943373990925697, "grad_norm": 7.262965202331543, "learning_rate": 1.8042295667334435e-05, "loss": 2.0032, "step": 46830 }, { "epoch": 0.2944002514092668, "grad_norm": 7.3345866203308105, "learning_rate": 1.804187656638978e-05, "loss": 1.9671, "step": 46840 }, { "epoch": 0.2944631037259639, "grad_norm": 6.392716884613037, "learning_rate": 1.804145746544513e-05, "loss": 1.9767, "step": 46850 }, { "epoch": 0.294525956042661, "grad_norm": 7.186389446258545, "learning_rate": 1.8041038364500476e-05, "loss": 2.0334, "step": 46860 }, { "epoch": 0.29458880835935813, "grad_norm": 6.9807257652282715, "learning_rate": 1.804061926355582e-05, "loss": 1.7917, "step": 46870 }, { "epoch": 0.29465166067605525, "grad_norm": 6.366339206695557, "learning_rate": 1.8040200162611166e-05, "loss": 1.6576, "step": 46880 }, { "epoch": 0.29471451299275236, "grad_norm": 6.354554176330566, "learning_rate": 1.8039781061666514e-05, "loss": 1.682, "step": 46890 }, { "epoch": 0.2947773653094495, "grad_norm": 7.513469219207764, "learning_rate": 1.803936196072186e-05, "loss": 1.7218, "step": 46900 }, { "epoch": 0.29484021762614654, "grad_norm": 10.029930114746094, "learning_rate": 1.8038942859777208e-05, "loss": 2.184, "step": 46910 }, { "epoch": 0.29490306994284365, "grad_norm": 6.429253101348877, "learning_rate": 1.8038523758832555e-05, "loss": 1.8852, "step": 46920 }, { "epoch": 0.29496592225954077, "grad_norm": 6.832610607147217, "learning_rate": 1.8038104657887902e-05, "loss": 1.685, "step": 46930 }, { "epoch": 0.2950287745762379, "grad_norm": 6.212717056274414, "learning_rate": 1.803768555694325e-05, "loss": 1.9077, "step": 46940 }, { "epoch": 0.295091626892935, "grad_norm": 7.340845584869385, "learning_rate": 1.8037266455998596e-05, "loss": 1.8025, "step": 46950 }, { "epoch": 0.2951544792096321, "grad_norm": 7.158466339111328, "learning_rate": 1.803684735505394e-05, "loss": 1.5885, "step": 46960 }, { "epoch": 0.29521733152632923, "grad_norm": 6.452349662780762, "learning_rate": 1.8036428254109287e-05, "loss": 1.6982, "step": 46970 }, { "epoch": 0.29528018384302634, "grad_norm": 7.0004448890686035, "learning_rate": 1.8036009153164634e-05, "loss": 1.8416, "step": 46980 }, { "epoch": 0.29534303615972346, "grad_norm": 7.211448669433594, "learning_rate": 1.803559005221998e-05, "loss": 1.7706, "step": 46990 }, { "epoch": 0.2954058884764206, "grad_norm": 6.377902507781982, "learning_rate": 1.8035170951275325e-05, "loss": 1.8846, "step": 47000 }, { "epoch": 0.2954687407931177, "grad_norm": 10.337285995483398, "learning_rate": 1.803475185033067e-05, "loss": 1.8624, "step": 47010 }, { "epoch": 0.2955315931098148, "grad_norm": 7.350049018859863, "learning_rate": 1.803433274938602e-05, "loss": 1.8305, "step": 47020 }, { "epoch": 0.2955944454265119, "grad_norm": 6.6064372062683105, "learning_rate": 1.8033913648441366e-05, "loss": 1.8282, "step": 47030 }, { "epoch": 0.295657297743209, "grad_norm": 8.158204078674316, "learning_rate": 1.803349454749671e-05, "loss": 1.9169, "step": 47040 }, { "epoch": 0.2957201500599061, "grad_norm": 7.254039287567139, "learning_rate": 1.8033075446552057e-05, "loss": 1.8077, "step": 47050 }, { "epoch": 0.2957830023766032, "grad_norm": 7.76493501663208, "learning_rate": 1.8032656345607404e-05, "loss": 1.986, "step": 47060 }, { "epoch": 0.2958458546933003, "grad_norm": 6.843966007232666, "learning_rate": 1.803223724466275e-05, "loss": 1.984, "step": 47070 }, { "epoch": 0.29590870700999744, "grad_norm": 5.823937892913818, "learning_rate": 1.8031818143718098e-05, "loss": 1.9044, "step": 47080 }, { "epoch": 0.29597155932669456, "grad_norm": 6.896873950958252, "learning_rate": 1.803139904277344e-05, "loss": 1.845, "step": 47090 }, { "epoch": 0.29603441164339167, "grad_norm": 7.583927631378174, "learning_rate": 1.803097994182879e-05, "loss": 1.8615, "step": 47100 }, { "epoch": 0.2960972639600888, "grad_norm": 6.692607402801514, "learning_rate": 1.8030560840884136e-05, "loss": 1.9815, "step": 47110 }, { "epoch": 0.2961601162767859, "grad_norm": 7.335759162902832, "learning_rate": 1.8030141739939483e-05, "loss": 1.8262, "step": 47120 }, { "epoch": 0.296222968593483, "grad_norm": 5.8943986892700195, "learning_rate": 1.802972263899483e-05, "loss": 1.9539, "step": 47130 }, { "epoch": 0.29628582091018013, "grad_norm": 7.7029595375061035, "learning_rate": 1.8029303538050177e-05, "loss": 1.8097, "step": 47140 }, { "epoch": 0.29634867322687725, "grad_norm": 7.473947525024414, "learning_rate": 1.8028884437105524e-05, "loss": 1.8542, "step": 47150 }, { "epoch": 0.2964115255435743, "grad_norm": 6.227741241455078, "learning_rate": 1.802846533616087e-05, "loss": 1.6428, "step": 47160 }, { "epoch": 0.2964743778602714, "grad_norm": 7.363375186920166, "learning_rate": 1.8028046235216218e-05, "loss": 1.8623, "step": 47170 }, { "epoch": 0.29653723017696854, "grad_norm": 5.548816680908203, "learning_rate": 1.802762713427156e-05, "loss": 1.833, "step": 47180 }, { "epoch": 0.29660008249366565, "grad_norm": 6.321163654327393, "learning_rate": 1.802720803332691e-05, "loss": 1.8314, "step": 47190 }, { "epoch": 0.29666293481036277, "grad_norm": 5.64842414855957, "learning_rate": 1.8026788932382256e-05, "loss": 1.8445, "step": 47200 }, { "epoch": 0.2967257871270599, "grad_norm": 8.194724082946777, "learning_rate": 1.8026369831437603e-05, "loss": 1.9798, "step": 47210 }, { "epoch": 0.296788639443757, "grad_norm": 7.17202615737915, "learning_rate": 1.8025950730492947e-05, "loss": 1.965, "step": 47220 }, { "epoch": 0.2968514917604541, "grad_norm": 6.299026966094971, "learning_rate": 1.8025531629548294e-05, "loss": 1.6138, "step": 47230 }, { "epoch": 0.2969143440771512, "grad_norm": 7.963582515716553, "learning_rate": 1.802511252860364e-05, "loss": 1.9727, "step": 47240 }, { "epoch": 0.29697719639384834, "grad_norm": 7.803959369659424, "learning_rate": 1.8024693427658988e-05, "loss": 1.8211, "step": 47250 }, { "epoch": 0.29704004871054546, "grad_norm": 7.53137731552124, "learning_rate": 1.8024274326714335e-05, "loss": 1.6584, "step": 47260 }, { "epoch": 0.29710290102724257, "grad_norm": 7.338706970214844, "learning_rate": 1.802385522576968e-05, "loss": 1.8276, "step": 47270 }, { "epoch": 0.2971657533439397, "grad_norm": 7.736049175262451, "learning_rate": 1.8023436124825026e-05, "loss": 1.7143, "step": 47280 }, { "epoch": 0.29722860566063675, "grad_norm": 7.593161106109619, "learning_rate": 1.8023017023880373e-05, "loss": 2.0029, "step": 47290 }, { "epoch": 0.29729145797733386, "grad_norm": 7.548622131347656, "learning_rate": 1.802259792293572e-05, "loss": 1.8466, "step": 47300 }, { "epoch": 0.297354310294031, "grad_norm": 7.36475944519043, "learning_rate": 1.8022178821991067e-05, "loss": 1.7804, "step": 47310 }, { "epoch": 0.2974171626107281, "grad_norm": 7.36644172668457, "learning_rate": 1.8021759721046414e-05, "loss": 1.9277, "step": 47320 }, { "epoch": 0.2974800149274252, "grad_norm": 6.477627754211426, "learning_rate": 1.802134062010176e-05, "loss": 1.822, "step": 47330 }, { "epoch": 0.2975428672441223, "grad_norm": 7.44856595993042, "learning_rate": 1.8020921519157105e-05, "loss": 1.8445, "step": 47340 }, { "epoch": 0.29760571956081944, "grad_norm": 7.568237781524658, "learning_rate": 1.8020502418212452e-05, "loss": 1.9055, "step": 47350 }, { "epoch": 0.29766857187751655, "grad_norm": 7.216434001922607, "learning_rate": 1.80200833172678e-05, "loss": 1.905, "step": 47360 }, { "epoch": 0.29773142419421367, "grad_norm": 7.183870315551758, "learning_rate": 1.8019664216323146e-05, "loss": 1.6605, "step": 47370 }, { "epoch": 0.2977942765109108, "grad_norm": 6.750730514526367, "learning_rate": 1.8019245115378493e-05, "loss": 1.8036, "step": 47380 }, { "epoch": 0.2978571288276079, "grad_norm": 6.851963996887207, "learning_rate": 1.801882601443384e-05, "loss": 1.8385, "step": 47390 }, { "epoch": 0.297919981144305, "grad_norm": 6.923430442810059, "learning_rate": 1.8018406913489184e-05, "loss": 1.7631, "step": 47400 }, { "epoch": 0.29798283346100213, "grad_norm": 6.543471813201904, "learning_rate": 1.801798781254453e-05, "loss": 1.7407, "step": 47410 }, { "epoch": 0.2980456857776992, "grad_norm": 6.620553970336914, "learning_rate": 1.8017568711599878e-05, "loss": 1.8049, "step": 47420 }, { "epoch": 0.2981085380943963, "grad_norm": 6.741138935089111, "learning_rate": 1.8017149610655225e-05, "loss": 1.8804, "step": 47430 }, { "epoch": 0.2981713904110934, "grad_norm": 7.707770824432373, "learning_rate": 1.801673050971057e-05, "loss": 1.7485, "step": 47440 }, { "epoch": 0.29823424272779053, "grad_norm": 7.0002312660217285, "learning_rate": 1.8016311408765916e-05, "loss": 1.7442, "step": 47450 }, { "epoch": 0.29829709504448765, "grad_norm": 6.893242835998535, "learning_rate": 1.8015892307821263e-05, "loss": 1.824, "step": 47460 }, { "epoch": 0.29835994736118476, "grad_norm": 6.108610153198242, "learning_rate": 1.801547320687661e-05, "loss": 1.7694, "step": 47470 }, { "epoch": 0.2984227996778819, "grad_norm": 7.697864532470703, "learning_rate": 1.8015054105931957e-05, "loss": 1.7751, "step": 47480 }, { "epoch": 0.298485651994579, "grad_norm": 6.688353061676025, "learning_rate": 1.80146350049873e-05, "loss": 2.0156, "step": 47490 }, { "epoch": 0.2985485043112761, "grad_norm": 7.048642158508301, "learning_rate": 1.8014215904042648e-05, "loss": 1.7247, "step": 47500 }, { "epoch": 0.2986113566279732, "grad_norm": 6.9464850425720215, "learning_rate": 1.8013796803097995e-05, "loss": 1.9576, "step": 47510 }, { "epoch": 0.29867420894467034, "grad_norm": 6.998134136199951, "learning_rate": 1.8013377702153342e-05, "loss": 1.9937, "step": 47520 }, { "epoch": 0.29873706126136745, "grad_norm": 7.306562423706055, "learning_rate": 1.801295860120869e-05, "loss": 1.9435, "step": 47530 }, { "epoch": 0.29879991357806457, "grad_norm": 7.738890647888184, "learning_rate": 1.8012539500264036e-05, "loss": 1.837, "step": 47540 }, { "epoch": 0.29886276589476163, "grad_norm": 8.114222526550293, "learning_rate": 1.8012120399319383e-05, "loss": 2.0996, "step": 47550 }, { "epoch": 0.29892561821145874, "grad_norm": 7.040402412414551, "learning_rate": 1.801170129837473e-05, "loss": 1.725, "step": 47560 }, { "epoch": 0.29898847052815586, "grad_norm": 6.799810409545898, "learning_rate": 1.8011282197430077e-05, "loss": 1.8267, "step": 47570 }, { "epoch": 0.299051322844853, "grad_norm": 6.3567585945129395, "learning_rate": 1.801086309648542e-05, "loss": 1.7863, "step": 47580 }, { "epoch": 0.2991141751615501, "grad_norm": 7.715065002441406, "learning_rate": 1.8010443995540768e-05, "loss": 1.6239, "step": 47590 }, { "epoch": 0.2991770274782472, "grad_norm": 7.012716770172119, "learning_rate": 1.8010024894596115e-05, "loss": 1.7821, "step": 47600 }, { "epoch": 0.2992398797949443, "grad_norm": 6.990746021270752, "learning_rate": 1.8009605793651462e-05, "loss": 1.6305, "step": 47610 }, { "epoch": 0.29930273211164143, "grad_norm": 7.111852645874023, "learning_rate": 1.8009186692706806e-05, "loss": 2.0926, "step": 47620 }, { "epoch": 0.29936558442833855, "grad_norm": 8.18101692199707, "learning_rate": 1.8008767591762153e-05, "loss": 1.8121, "step": 47630 }, { "epoch": 0.29942843674503566, "grad_norm": 7.045371055603027, "learning_rate": 1.80083484908175e-05, "loss": 1.8821, "step": 47640 }, { "epoch": 0.2994912890617328, "grad_norm": 7.540232181549072, "learning_rate": 1.8007929389872847e-05, "loss": 1.9423, "step": 47650 }, { "epoch": 0.2995541413784299, "grad_norm": 7.191141605377197, "learning_rate": 1.800751028892819e-05, "loss": 1.8398, "step": 47660 }, { "epoch": 0.29961699369512695, "grad_norm": 7.146394729614258, "learning_rate": 1.8007091187983538e-05, "loss": 1.9698, "step": 47670 }, { "epoch": 0.29967984601182407, "grad_norm": 7.083776473999023, "learning_rate": 1.8006672087038885e-05, "loss": 1.7142, "step": 47680 }, { "epoch": 0.2997426983285212, "grad_norm": 7.160184860229492, "learning_rate": 1.8006252986094232e-05, "loss": 1.9591, "step": 47690 }, { "epoch": 0.2998055506452183, "grad_norm": 7.3451313972473145, "learning_rate": 1.800583388514958e-05, "loss": 1.9017, "step": 47700 }, { "epoch": 0.2998684029619154, "grad_norm": 6.632269382476807, "learning_rate": 1.8005414784204926e-05, "loss": 1.7079, "step": 47710 }, { "epoch": 0.29993125527861253, "grad_norm": 6.296167850494385, "learning_rate": 1.800499568326027e-05, "loss": 1.9561, "step": 47720 }, { "epoch": 0.29999410759530964, "grad_norm": 8.638937950134277, "learning_rate": 1.8004576582315617e-05, "loss": 1.7177, "step": 47730 }, { "epoch": 0.30005695991200676, "grad_norm": 6.743673324584961, "learning_rate": 1.8004157481370964e-05, "loss": 1.9555, "step": 47740 }, { "epoch": 0.3001198122287039, "grad_norm": 6.653906345367432, "learning_rate": 1.800373838042631e-05, "loss": 1.7253, "step": 47750 }, { "epoch": 0.300182664545401, "grad_norm": 8.051403999328613, "learning_rate": 1.8003319279481658e-05, "loss": 1.9951, "step": 47760 }, { "epoch": 0.3002455168620981, "grad_norm": 7.247729778289795, "learning_rate": 1.8002900178537005e-05, "loss": 1.8491, "step": 47770 }, { "epoch": 0.3003083691787952, "grad_norm": 8.287534713745117, "learning_rate": 1.8002481077592352e-05, "loss": 1.7213, "step": 47780 }, { "epoch": 0.30037122149549234, "grad_norm": 7.613092422485352, "learning_rate": 1.80020619766477e-05, "loss": 1.8323, "step": 47790 }, { "epoch": 0.3004340738121894, "grad_norm": 7.131540775299072, "learning_rate": 1.8001642875703043e-05, "loss": 1.8019, "step": 47800 }, { "epoch": 0.3004969261288865, "grad_norm": 7.072052001953125, "learning_rate": 1.800122377475839e-05, "loss": 2.1704, "step": 47810 }, { "epoch": 0.3005597784455836, "grad_norm": 8.470544815063477, "learning_rate": 1.8000804673813737e-05, "loss": 1.9052, "step": 47820 }, { "epoch": 0.30062263076228074, "grad_norm": 7.4394850730896, "learning_rate": 1.8000385572869084e-05, "loss": 1.9449, "step": 47830 }, { "epoch": 0.30068548307897786, "grad_norm": 7.438833236694336, "learning_rate": 1.7999966471924428e-05, "loss": 1.9655, "step": 47840 }, { "epoch": 0.30074833539567497, "grad_norm": 6.787623405456543, "learning_rate": 1.7999547370979775e-05, "loss": 1.8052, "step": 47850 }, { "epoch": 0.3008111877123721, "grad_norm": 6.316556930541992, "learning_rate": 1.7999128270035122e-05, "loss": 1.7274, "step": 47860 }, { "epoch": 0.3008740400290692, "grad_norm": 7.249434947967529, "learning_rate": 1.799870916909047e-05, "loss": 1.6915, "step": 47870 }, { "epoch": 0.3009368923457663, "grad_norm": 7.901944637298584, "learning_rate": 1.7998290068145816e-05, "loss": 1.7021, "step": 47880 }, { "epoch": 0.30099974466246343, "grad_norm": 6.7186760902404785, "learning_rate": 1.799787096720116e-05, "loss": 1.7394, "step": 47890 }, { "epoch": 0.30106259697916055, "grad_norm": 8.327082633972168, "learning_rate": 1.7997451866256507e-05, "loss": 1.8368, "step": 47900 }, { "epoch": 0.30112544929585766, "grad_norm": 6.639060020446777, "learning_rate": 1.7997032765311854e-05, "loss": 1.7183, "step": 47910 }, { "epoch": 0.3011883016125548, "grad_norm": 6.945801734924316, "learning_rate": 1.79966136643672e-05, "loss": 1.919, "step": 47920 }, { "epoch": 0.30125115392925184, "grad_norm": 7.766027927398682, "learning_rate": 1.7996194563422548e-05, "loss": 1.846, "step": 47930 }, { "epoch": 0.30131400624594895, "grad_norm": 6.516760349273682, "learning_rate": 1.7995775462477895e-05, "loss": 1.864, "step": 47940 }, { "epoch": 0.30137685856264607, "grad_norm": 5.535265922546387, "learning_rate": 1.7995356361533242e-05, "loss": 1.6883, "step": 47950 }, { "epoch": 0.3014397108793432, "grad_norm": 7.9575090408325195, "learning_rate": 1.799493726058859e-05, "loss": 1.8749, "step": 47960 }, { "epoch": 0.3015025631960403, "grad_norm": 7.324628829956055, "learning_rate": 1.7994518159643933e-05, "loss": 1.547, "step": 47970 }, { "epoch": 0.3015654155127374, "grad_norm": 7.135219573974609, "learning_rate": 1.799409905869928e-05, "loss": 1.9229, "step": 47980 }, { "epoch": 0.3016282678294345, "grad_norm": 7.590478897094727, "learning_rate": 1.7993679957754627e-05, "loss": 2.0679, "step": 47990 }, { "epoch": 0.30169112014613164, "grad_norm": 7.243574142456055, "learning_rate": 1.7993260856809974e-05, "loss": 2.0274, "step": 48000 }, { "epoch": 0.30175397246282876, "grad_norm": 7.255794048309326, "learning_rate": 1.799284175586532e-05, "loss": 1.6656, "step": 48010 }, { "epoch": 0.30181682477952587, "grad_norm": 6.9301934242248535, "learning_rate": 1.7992422654920665e-05, "loss": 1.9641, "step": 48020 }, { "epoch": 0.301879677096223, "grad_norm": 7.461561679840088, "learning_rate": 1.7992003553976012e-05, "loss": 1.9576, "step": 48030 }, { "epoch": 0.3019425294129201, "grad_norm": 7.48775053024292, "learning_rate": 1.799158445303136e-05, "loss": 2.0371, "step": 48040 }, { "epoch": 0.3020053817296172, "grad_norm": 7.324866771697998, "learning_rate": 1.7991165352086706e-05, "loss": 1.9388, "step": 48050 }, { "epoch": 0.3020682340463143, "grad_norm": 6.965330123901367, "learning_rate": 1.799074625114205e-05, "loss": 2.1266, "step": 48060 }, { "epoch": 0.3021310863630114, "grad_norm": 7.181867599487305, "learning_rate": 1.7990327150197397e-05, "loss": 1.8953, "step": 48070 }, { "epoch": 0.3021939386797085, "grad_norm": 8.220254898071289, "learning_rate": 1.7989908049252744e-05, "loss": 1.864, "step": 48080 }, { "epoch": 0.3022567909964056, "grad_norm": 7.029447555541992, "learning_rate": 1.798948894830809e-05, "loss": 1.5146, "step": 48090 }, { "epoch": 0.30231964331310274, "grad_norm": 6.5770792961120605, "learning_rate": 1.7989069847363438e-05, "loss": 1.921, "step": 48100 }, { "epoch": 0.30238249562979985, "grad_norm": 6.378840923309326, "learning_rate": 1.798865074641878e-05, "loss": 2.0148, "step": 48110 }, { "epoch": 0.30244534794649697, "grad_norm": 7.669800758361816, "learning_rate": 1.798823164547413e-05, "loss": 1.9965, "step": 48120 }, { "epoch": 0.3025082002631941, "grad_norm": 7.05275297164917, "learning_rate": 1.7987812544529476e-05, "loss": 1.642, "step": 48130 }, { "epoch": 0.3025710525798912, "grad_norm": 8.19581413269043, "learning_rate": 1.7987393443584823e-05, "loss": 1.7397, "step": 48140 }, { "epoch": 0.3026339048965883, "grad_norm": 6.76801872253418, "learning_rate": 1.798697434264017e-05, "loss": 2.24, "step": 48150 }, { "epoch": 0.30269675721328543, "grad_norm": 6.347983360290527, "learning_rate": 1.7986555241695517e-05, "loss": 1.9528, "step": 48160 }, { "epoch": 0.30275960952998254, "grad_norm": 7.854902744293213, "learning_rate": 1.7986136140750864e-05, "loss": 1.6761, "step": 48170 }, { "epoch": 0.3028224618466796, "grad_norm": 7.092661380767822, "learning_rate": 1.798571703980621e-05, "loss": 2.0425, "step": 48180 }, { "epoch": 0.3028853141633767, "grad_norm": 6.634575843811035, "learning_rate": 1.7985297938861558e-05, "loss": 1.9488, "step": 48190 }, { "epoch": 0.30294816648007383, "grad_norm": 7.243200302124023, "learning_rate": 1.7984878837916902e-05, "loss": 1.8827, "step": 48200 }, { "epoch": 0.30301101879677095, "grad_norm": 6.852927207946777, "learning_rate": 1.798445973697225e-05, "loss": 1.7027, "step": 48210 }, { "epoch": 0.30307387111346806, "grad_norm": 7.248819351196289, "learning_rate": 1.7984040636027596e-05, "loss": 1.9543, "step": 48220 }, { "epoch": 0.3031367234301652, "grad_norm": 7.36978006362915, "learning_rate": 1.7983621535082943e-05, "loss": 1.9045, "step": 48230 }, { "epoch": 0.3031995757468623, "grad_norm": 6.938103675842285, "learning_rate": 1.7983202434138287e-05, "loss": 1.7748, "step": 48240 }, { "epoch": 0.3032624280635594, "grad_norm": 6.852223873138428, "learning_rate": 1.7982783333193634e-05, "loss": 1.7587, "step": 48250 }, { "epoch": 0.3033252803802565, "grad_norm": 6.4312334060668945, "learning_rate": 1.798236423224898e-05, "loss": 1.6611, "step": 48260 }, { "epoch": 0.30338813269695364, "grad_norm": 6.497596263885498, "learning_rate": 1.7981945131304328e-05, "loss": 1.732, "step": 48270 }, { "epoch": 0.30345098501365075, "grad_norm": 5.9871602058410645, "learning_rate": 1.798152603035967e-05, "loss": 1.7099, "step": 48280 }, { "epoch": 0.30351383733034787, "grad_norm": 6.564183235168457, "learning_rate": 1.798110692941502e-05, "loss": 1.7541, "step": 48290 }, { "epoch": 0.303576689647045, "grad_norm": 6.92208194732666, "learning_rate": 1.7980687828470366e-05, "loss": 1.887, "step": 48300 }, { "epoch": 0.30363954196374204, "grad_norm": 6.861618995666504, "learning_rate": 1.7980268727525713e-05, "loss": 1.6396, "step": 48310 }, { "epoch": 0.30370239428043916, "grad_norm": 8.422454833984375, "learning_rate": 1.797984962658106e-05, "loss": 1.9241, "step": 48320 }, { "epoch": 0.3037652465971363, "grad_norm": 8.636983871459961, "learning_rate": 1.7979430525636407e-05, "loss": 1.772, "step": 48330 }, { "epoch": 0.3038280989138334, "grad_norm": 5.715958595275879, "learning_rate": 1.7979011424691754e-05, "loss": 1.8422, "step": 48340 }, { "epoch": 0.3038909512305305, "grad_norm": 7.868222713470459, "learning_rate": 1.7978592323747098e-05, "loss": 1.9577, "step": 48350 }, { "epoch": 0.3039538035472276, "grad_norm": 8.06572437286377, "learning_rate": 1.7978173222802445e-05, "loss": 1.9822, "step": 48360 }, { "epoch": 0.30401665586392473, "grad_norm": 7.032797336578369, "learning_rate": 1.7977754121857792e-05, "loss": 2.0774, "step": 48370 }, { "epoch": 0.30407950818062185, "grad_norm": 6.042160987854004, "learning_rate": 1.797733502091314e-05, "loss": 1.7674, "step": 48380 }, { "epoch": 0.30414236049731896, "grad_norm": 7.035040855407715, "learning_rate": 1.7976915919968486e-05, "loss": 1.9051, "step": 48390 }, { "epoch": 0.3042052128140161, "grad_norm": 7.070200443267822, "learning_rate": 1.7976496819023833e-05, "loss": 1.7928, "step": 48400 }, { "epoch": 0.3042680651307132, "grad_norm": 8.123950958251953, "learning_rate": 1.797607771807918e-05, "loss": 1.9142, "step": 48410 }, { "epoch": 0.3043309174474103, "grad_norm": 7.851001262664795, "learning_rate": 1.7975658617134524e-05, "loss": 1.7116, "step": 48420 }, { "epoch": 0.3043937697641074, "grad_norm": 7.491074085235596, "learning_rate": 1.797523951618987e-05, "loss": 1.7995, "step": 48430 }, { "epoch": 0.3044566220808045, "grad_norm": 7.63198184967041, "learning_rate": 1.7974820415245218e-05, "loss": 1.8103, "step": 48440 }, { "epoch": 0.3045194743975016, "grad_norm": 6.391507148742676, "learning_rate": 1.7974401314300565e-05, "loss": 1.7648, "step": 48450 }, { "epoch": 0.3045823267141987, "grad_norm": 7.051711082458496, "learning_rate": 1.797398221335591e-05, "loss": 1.8767, "step": 48460 }, { "epoch": 0.30464517903089583, "grad_norm": 6.65802526473999, "learning_rate": 1.7973563112411256e-05, "loss": 1.7294, "step": 48470 }, { "epoch": 0.30470803134759294, "grad_norm": 6.325926780700684, "learning_rate": 1.7973144011466603e-05, "loss": 1.8462, "step": 48480 }, { "epoch": 0.30477088366429006, "grad_norm": 6.50266170501709, "learning_rate": 1.797272491052195e-05, "loss": 1.6773, "step": 48490 }, { "epoch": 0.3048337359809872, "grad_norm": 7.261912822723389, "learning_rate": 1.7972305809577297e-05, "loss": 1.8061, "step": 48500 }, { "epoch": 0.3048965882976843, "grad_norm": 6.066399574279785, "learning_rate": 1.797188670863264e-05, "loss": 1.793, "step": 48510 }, { "epoch": 0.3049594406143814, "grad_norm": 7.92596960067749, "learning_rate": 1.7971467607687988e-05, "loss": 1.9376, "step": 48520 }, { "epoch": 0.3050222929310785, "grad_norm": 7.034647464752197, "learning_rate": 1.7971048506743335e-05, "loss": 1.9117, "step": 48530 }, { "epoch": 0.30508514524777564, "grad_norm": 6.429213523864746, "learning_rate": 1.7970629405798682e-05, "loss": 2.0952, "step": 48540 }, { "epoch": 0.30514799756447275, "grad_norm": 7.143803119659424, "learning_rate": 1.797021030485403e-05, "loss": 1.8642, "step": 48550 }, { "epoch": 0.30521084988116987, "grad_norm": Infinity, "learning_rate": 1.7969791203909376e-05, "loss": 1.8707, "step": 48560 }, { "epoch": 0.3052737021978669, "grad_norm": 7.328855514526367, "learning_rate": 1.7969414013059187e-05, "loss": 1.7873, "step": 48570 }, { "epoch": 0.30533655451456404, "grad_norm": 7.020454406738281, "learning_rate": 1.796899491211453e-05, "loss": 1.8623, "step": 48580 }, { "epoch": 0.30539940683126116, "grad_norm": 7.066745758056641, "learning_rate": 1.7968575811169878e-05, "loss": 1.6394, "step": 48590 }, { "epoch": 0.30546225914795827, "grad_norm": 6.923233985900879, "learning_rate": 1.7968156710225225e-05, "loss": 1.6722, "step": 48600 }, { "epoch": 0.3055251114646554, "grad_norm": 6.17056131362915, "learning_rate": 1.7967737609280572e-05, "loss": 1.7665, "step": 48610 }, { "epoch": 0.3055879637813525, "grad_norm": 7.043300151824951, "learning_rate": 1.796731850833592e-05, "loss": 1.6778, "step": 48620 }, { "epoch": 0.3056508160980496, "grad_norm": 7.796075344085693, "learning_rate": 1.7966899407391266e-05, "loss": 2.0575, "step": 48630 }, { "epoch": 0.30571366841474673, "grad_norm": 6.610686302185059, "learning_rate": 1.7966480306446614e-05, "loss": 1.8745, "step": 48640 }, { "epoch": 0.30577652073144385, "grad_norm": 6.844950199127197, "learning_rate": 1.796606120550196e-05, "loss": 2.0337, "step": 48650 }, { "epoch": 0.30583937304814096, "grad_norm": 7.483672618865967, "learning_rate": 1.7965642104557304e-05, "loss": 1.7752, "step": 48660 }, { "epoch": 0.3059022253648381, "grad_norm": 7.143709659576416, "learning_rate": 1.796522300361265e-05, "loss": 1.9864, "step": 48670 }, { "epoch": 0.3059650776815352, "grad_norm": 7.549054145812988, "learning_rate": 1.7964803902668e-05, "loss": 1.7346, "step": 48680 }, { "epoch": 0.30602792999823225, "grad_norm": 7.867265701293945, "learning_rate": 1.7964384801723346e-05, "loss": 1.771, "step": 48690 }, { "epoch": 0.30609078231492937, "grad_norm": 5.640731334686279, "learning_rate": 1.7963965700778693e-05, "loss": 1.7527, "step": 48700 }, { "epoch": 0.3061536346316265, "grad_norm": 6.865826606750488, "learning_rate": 1.796354659983404e-05, "loss": 1.7794, "step": 48710 }, { "epoch": 0.3062164869483236, "grad_norm": 8.240419387817383, "learning_rate": 1.7963127498889383e-05, "loss": 1.707, "step": 48720 }, { "epoch": 0.3062793392650207, "grad_norm": 7.254922866821289, "learning_rate": 1.796270839794473e-05, "loss": 1.8926, "step": 48730 }, { "epoch": 0.3063421915817178, "grad_norm": 8.631532669067383, "learning_rate": 1.7962289297000077e-05, "loss": 1.784, "step": 48740 }, { "epoch": 0.30640504389841494, "grad_norm": 7.44426965713501, "learning_rate": 1.7961870196055425e-05, "loss": 1.8762, "step": 48750 }, { "epoch": 0.30646789621511206, "grad_norm": 7.574648857116699, "learning_rate": 1.7961451095110768e-05, "loss": 1.6253, "step": 48760 }, { "epoch": 0.30653074853180917, "grad_norm": 6.930304050445557, "learning_rate": 1.7961031994166115e-05, "loss": 2.0645, "step": 48770 }, { "epoch": 0.3065936008485063, "grad_norm": 6.743585586547852, "learning_rate": 1.7960612893221462e-05, "loss": 1.9567, "step": 48780 }, { "epoch": 0.3066564531652034, "grad_norm": 8.479488372802734, "learning_rate": 1.796019379227681e-05, "loss": 1.9408, "step": 48790 }, { "epoch": 0.3067193054819005, "grad_norm": 6.826617240905762, "learning_rate": 1.7959774691332153e-05, "loss": 1.8654, "step": 48800 }, { "epoch": 0.30678215779859763, "grad_norm": 7.865091800689697, "learning_rate": 1.79593555903875e-05, "loss": 2.0285, "step": 48810 }, { "epoch": 0.3068450101152947, "grad_norm": 7.427491188049316, "learning_rate": 1.7958936489442847e-05, "loss": 1.9897, "step": 48820 }, { "epoch": 0.3069078624319918, "grad_norm": 8.017901420593262, "learning_rate": 1.7958517388498194e-05, "loss": 1.9974, "step": 48830 }, { "epoch": 0.3069707147486889, "grad_norm": 7.371147155761719, "learning_rate": 1.795809828755354e-05, "loss": 1.6837, "step": 48840 }, { "epoch": 0.30703356706538604, "grad_norm": 8.3818998336792, "learning_rate": 1.795767918660889e-05, "loss": 1.7212, "step": 48850 }, { "epoch": 0.30709641938208315, "grad_norm": 7.13516092300415, "learning_rate": 1.7957260085664236e-05, "loss": 1.6428, "step": 48860 }, { "epoch": 0.30715927169878027, "grad_norm": 7.678220748901367, "learning_rate": 1.7956840984719583e-05, "loss": 1.9003, "step": 48870 }, { "epoch": 0.3072221240154774, "grad_norm": 7.682860851287842, "learning_rate": 1.795642188377493e-05, "loss": 1.849, "step": 48880 }, { "epoch": 0.3072849763321745, "grad_norm": 6.572717666625977, "learning_rate": 1.7956002782830273e-05, "loss": 1.86, "step": 48890 }, { "epoch": 0.3073478286488716, "grad_norm": 7.628412246704102, "learning_rate": 1.795558368188562e-05, "loss": 1.8827, "step": 48900 }, { "epoch": 0.30741068096556873, "grad_norm": 7.541478633880615, "learning_rate": 1.7955164580940968e-05, "loss": 1.9272, "step": 48910 }, { "epoch": 0.30747353328226584, "grad_norm": 7.739235877990723, "learning_rate": 1.7954745479996315e-05, "loss": 1.8035, "step": 48920 }, { "epoch": 0.30753638559896296, "grad_norm": 5.950796127319336, "learning_rate": 1.795432637905166e-05, "loss": 1.9516, "step": 48930 }, { "epoch": 0.3075992379156601, "grad_norm": 6.327701568603516, "learning_rate": 1.7953907278107005e-05, "loss": 1.8042, "step": 48940 }, { "epoch": 0.30766209023235713, "grad_norm": 8.778162956237793, "learning_rate": 1.7953488177162352e-05, "loss": 1.7809, "step": 48950 }, { "epoch": 0.30772494254905425, "grad_norm": 7.731008052825928, "learning_rate": 1.79530690762177e-05, "loss": 1.7748, "step": 48960 }, { "epoch": 0.30778779486575136, "grad_norm": 7.498165607452393, "learning_rate": 1.7952649975273047e-05, "loss": 1.9784, "step": 48970 }, { "epoch": 0.3078506471824485, "grad_norm": 6.195873737335205, "learning_rate": 1.795223087432839e-05, "loss": 1.8752, "step": 48980 }, { "epoch": 0.3079134994991456, "grad_norm": 6.936201572418213, "learning_rate": 1.7951811773383737e-05, "loss": 2.047, "step": 48990 }, { "epoch": 0.3079763518158427, "grad_norm": 6.399014472961426, "learning_rate": 1.7951392672439084e-05, "loss": 1.963, "step": 49000 }, { "epoch": 0.3080392041325398, "grad_norm": 6.7194504737854, "learning_rate": 1.795097357149443e-05, "loss": 2.106, "step": 49010 }, { "epoch": 0.30810205644923694, "grad_norm": 5.826361656188965, "learning_rate": 1.795055447054978e-05, "loss": 1.5659, "step": 49020 }, { "epoch": 0.30816490876593405, "grad_norm": 7.676239490509033, "learning_rate": 1.7950135369605126e-05, "loss": 1.5651, "step": 49030 }, { "epoch": 0.30822776108263117, "grad_norm": 6.70309591293335, "learning_rate": 1.794971626866047e-05, "loss": 2.0287, "step": 49040 }, { "epoch": 0.3082906133993283, "grad_norm": 7.022078514099121, "learning_rate": 1.7949297167715816e-05, "loss": 1.8823, "step": 49050 }, { "epoch": 0.3083534657160254, "grad_norm": 7.787981033325195, "learning_rate": 1.7948878066771163e-05, "loss": 1.7635, "step": 49060 }, { "epoch": 0.3084163180327225, "grad_norm": 6.165613651275635, "learning_rate": 1.794845896582651e-05, "loss": 1.7764, "step": 49070 }, { "epoch": 0.3084791703494196, "grad_norm": 7.295814037322998, "learning_rate": 1.7948039864881858e-05, "loss": 1.9999, "step": 49080 }, { "epoch": 0.3085420226661167, "grad_norm": 6.8880486488342285, "learning_rate": 1.7947620763937205e-05, "loss": 1.7568, "step": 49090 }, { "epoch": 0.3086048749828138, "grad_norm": 7.967652320861816, "learning_rate": 1.7947201662992552e-05, "loss": 1.9172, "step": 49100 }, { "epoch": 0.3086677272995109, "grad_norm": 5.634479522705078, "learning_rate": 1.7946782562047895e-05, "loss": 1.4705, "step": 49110 }, { "epoch": 0.30873057961620803, "grad_norm": 26.34225845336914, "learning_rate": 1.7946363461103242e-05, "loss": 2.1408, "step": 49120 }, { "epoch": 0.30879343193290515, "grad_norm": 7.0778632164001465, "learning_rate": 1.794594436015859e-05, "loss": 1.5962, "step": 49130 }, { "epoch": 0.30885628424960226, "grad_norm": 7.073582172393799, "learning_rate": 1.7945525259213937e-05, "loss": 1.7534, "step": 49140 }, { "epoch": 0.3089191365662994, "grad_norm": 7.2108049392700195, "learning_rate": 1.7945106158269284e-05, "loss": 1.7949, "step": 49150 }, { "epoch": 0.3089819888829965, "grad_norm": 7.800317287445068, "learning_rate": 1.7944687057324627e-05, "loss": 2.0386, "step": 49160 }, { "epoch": 0.3090448411996936, "grad_norm": 7.265311241149902, "learning_rate": 1.7944267956379974e-05, "loss": 1.7653, "step": 49170 }, { "epoch": 0.3091076935163907, "grad_norm": 7.880399227142334, "learning_rate": 1.794384885543532e-05, "loss": 1.9162, "step": 49180 }, { "epoch": 0.30917054583308784, "grad_norm": 6.755533695220947, "learning_rate": 1.794342975449067e-05, "loss": 1.9217, "step": 49190 }, { "epoch": 0.30923339814978495, "grad_norm": 7.04044771194458, "learning_rate": 1.7943010653546012e-05, "loss": 1.8448, "step": 49200 }, { "epoch": 0.309296250466482, "grad_norm": 6.240016460418701, "learning_rate": 1.794259155260136e-05, "loss": 1.7467, "step": 49210 }, { "epoch": 0.30935910278317913, "grad_norm": 7.164734363555908, "learning_rate": 1.7942172451656706e-05, "loss": 2.0081, "step": 49220 }, { "epoch": 0.30942195509987624, "grad_norm": 7.578107833862305, "learning_rate": 1.7941753350712053e-05, "loss": 1.7392, "step": 49230 }, { "epoch": 0.30948480741657336, "grad_norm": 7.135012149810791, "learning_rate": 1.79413342497674e-05, "loss": 1.7525, "step": 49240 }, { "epoch": 0.3095476597332705, "grad_norm": 7.098381042480469, "learning_rate": 1.7940915148822748e-05, "loss": 1.9605, "step": 49250 }, { "epoch": 0.3096105120499676, "grad_norm": 8.046966552734375, "learning_rate": 1.7940496047878095e-05, "loss": 1.706, "step": 49260 }, { "epoch": 0.3096733643666647, "grad_norm": 7.1453094482421875, "learning_rate": 1.7940076946933442e-05, "loss": 2.1353, "step": 49270 }, { "epoch": 0.3097362166833618, "grad_norm": 7.904295444488525, "learning_rate": 1.793965784598879e-05, "loss": 2.0071, "step": 49280 }, { "epoch": 0.30979906900005894, "grad_norm": 8.429654121398926, "learning_rate": 1.7939238745044132e-05, "loss": 1.8943, "step": 49290 }, { "epoch": 0.30986192131675605, "grad_norm": 7.261959552764893, "learning_rate": 1.793881964409948e-05, "loss": 1.754, "step": 49300 }, { "epoch": 0.30992477363345317, "grad_norm": 6.630239009857178, "learning_rate": 1.7938400543154827e-05, "loss": 2.0018, "step": 49310 }, { "epoch": 0.3099876259501503, "grad_norm": 7.209163188934326, "learning_rate": 1.7937981442210174e-05, "loss": 1.8888, "step": 49320 }, { "epoch": 0.31005047826684734, "grad_norm": 7.261234283447266, "learning_rate": 1.793756234126552e-05, "loss": 1.8513, "step": 49330 }, { "epoch": 0.31011333058354446, "grad_norm": 6.507667541503906, "learning_rate": 1.7937143240320864e-05, "loss": 1.7614, "step": 49340 }, { "epoch": 0.31017618290024157, "grad_norm": 6.24437141418457, "learning_rate": 1.793672413937621e-05, "loss": 2.1852, "step": 49350 }, { "epoch": 0.3102390352169387, "grad_norm": 5.852570056915283, "learning_rate": 1.793630503843156e-05, "loss": 1.7248, "step": 49360 }, { "epoch": 0.3103018875336358, "grad_norm": 6.463139533996582, "learning_rate": 1.7935885937486906e-05, "loss": 1.7355, "step": 49370 }, { "epoch": 0.3103647398503329, "grad_norm": 6.722446918487549, "learning_rate": 1.793546683654225e-05, "loss": 1.7005, "step": 49380 }, { "epoch": 0.31042759216703003, "grad_norm": 6.7306718826293945, "learning_rate": 1.7935047735597596e-05, "loss": 1.9504, "step": 49390 }, { "epoch": 0.31049044448372715, "grad_norm": 6.818288803100586, "learning_rate": 1.7934628634652943e-05, "loss": 1.8979, "step": 49400 }, { "epoch": 0.31055329680042426, "grad_norm": 6.045130252838135, "learning_rate": 1.793420953370829e-05, "loss": 1.7419, "step": 49410 }, { "epoch": 0.3106161491171214, "grad_norm": 5.838948726654053, "learning_rate": 1.7933790432763634e-05, "loss": 1.6398, "step": 49420 }, { "epoch": 0.3106790014338185, "grad_norm": 7.738033294677734, "learning_rate": 1.793337133181898e-05, "loss": 1.6713, "step": 49430 }, { "epoch": 0.3107418537505156, "grad_norm": 6.2949604988098145, "learning_rate": 1.793295223087433e-05, "loss": 2.0401, "step": 49440 }, { "epoch": 0.3108047060672127, "grad_norm": 6.236536502838135, "learning_rate": 1.7932533129929675e-05, "loss": 1.8538, "step": 49450 }, { "epoch": 0.3108675583839098, "grad_norm": 6.050281524658203, "learning_rate": 1.7932114028985023e-05, "loss": 1.7109, "step": 49460 }, { "epoch": 0.3109304107006069, "grad_norm": 7.410684108734131, "learning_rate": 1.793169492804037e-05, "loss": 2.0338, "step": 49470 }, { "epoch": 0.310993263017304, "grad_norm": 6.922196388244629, "learning_rate": 1.7931275827095717e-05, "loss": 1.8601, "step": 49480 }, { "epoch": 0.3110561153340011, "grad_norm": 7.497621059417725, "learning_rate": 1.7930856726151064e-05, "loss": 1.8235, "step": 49490 }, { "epoch": 0.31111896765069824, "grad_norm": 7.287035942077637, "learning_rate": 1.793043762520641e-05, "loss": 1.8273, "step": 49500 }, { "epoch": 0.31118181996739536, "grad_norm": 7.15346622467041, "learning_rate": 1.7930018524261754e-05, "loss": 1.6921, "step": 49510 }, { "epoch": 0.31124467228409247, "grad_norm": 6.8751444816589355, "learning_rate": 1.79295994233171e-05, "loss": 1.6717, "step": 49520 }, { "epoch": 0.3113075246007896, "grad_norm": 7.623301982879639, "learning_rate": 1.792918032237245e-05, "loss": 2.1327, "step": 49530 }, { "epoch": 0.3113703769174867, "grad_norm": 6.589799880981445, "learning_rate": 1.7928761221427796e-05, "loss": 1.7977, "step": 49540 }, { "epoch": 0.3114332292341838, "grad_norm": 7.977503776550293, "learning_rate": 1.7928342120483143e-05, "loss": 1.5672, "step": 49550 }, { "epoch": 0.31149608155088093, "grad_norm": 6.824627876281738, "learning_rate": 1.7927923019538486e-05, "loss": 1.7908, "step": 49560 }, { "epoch": 0.31155893386757805, "grad_norm": 5.8089070320129395, "learning_rate": 1.7927503918593834e-05, "loss": 1.8488, "step": 49570 }, { "epoch": 0.31162178618427516, "grad_norm": 8.658555030822754, "learning_rate": 1.792708481764918e-05, "loss": 1.5759, "step": 49580 }, { "epoch": 0.3116846385009722, "grad_norm": 7.306936740875244, "learning_rate": 1.7926665716704528e-05, "loss": 1.817, "step": 49590 }, { "epoch": 0.31174749081766934, "grad_norm": 8.089118003845215, "learning_rate": 1.792624661575987e-05, "loss": 2.0118, "step": 49600 }, { "epoch": 0.31181034313436645, "grad_norm": 6.11497688293457, "learning_rate": 1.792582751481522e-05, "loss": 1.8159, "step": 49610 }, { "epoch": 0.31187319545106357, "grad_norm": 7.032067775726318, "learning_rate": 1.7925408413870565e-05, "loss": 1.9235, "step": 49620 }, { "epoch": 0.3119360477677607, "grad_norm": 6.375333786010742, "learning_rate": 1.7924989312925913e-05, "loss": 1.9218, "step": 49630 }, { "epoch": 0.3119989000844578, "grad_norm": 6.956419467926025, "learning_rate": 1.792457021198126e-05, "loss": 2.0338, "step": 49640 }, { "epoch": 0.3120617524011549, "grad_norm": 9.121163368225098, "learning_rate": 1.7924151111036607e-05, "loss": 1.6713, "step": 49650 }, { "epoch": 0.31212460471785203, "grad_norm": 8.117715835571289, "learning_rate": 1.792373201009195e-05, "loss": 1.8347, "step": 49660 }, { "epoch": 0.31218745703454914, "grad_norm": 7.388977527618408, "learning_rate": 1.7923312909147297e-05, "loss": 1.8706, "step": 49670 }, { "epoch": 0.31225030935124626, "grad_norm": 8.16901683807373, "learning_rate": 1.7922893808202645e-05, "loss": 1.9452, "step": 49680 }, { "epoch": 0.3123131616679434, "grad_norm": 7.1762213706970215, "learning_rate": 1.792247470725799e-05, "loss": 1.8667, "step": 49690 }, { "epoch": 0.3123760139846405, "grad_norm": 6.232288837432861, "learning_rate": 1.792205560631334e-05, "loss": 1.9377, "step": 49700 }, { "epoch": 0.3124388663013376, "grad_norm": 6.773346900939941, "learning_rate": 1.7921636505368686e-05, "loss": 1.8434, "step": 49710 }, { "epoch": 0.31250171861803466, "grad_norm": 7.3948869705200195, "learning_rate": 1.7921217404424033e-05, "loss": 2.0011, "step": 49720 }, { "epoch": 0.3125645709347318, "grad_norm": 6.538943767547607, "learning_rate": 1.7920798303479376e-05, "loss": 1.8106, "step": 49730 }, { "epoch": 0.3126274232514289, "grad_norm": 7.071255683898926, "learning_rate": 1.7920379202534724e-05, "loss": 2.0634, "step": 49740 }, { "epoch": 0.312690275568126, "grad_norm": 6.963105201721191, "learning_rate": 1.791996010159007e-05, "loss": 1.7158, "step": 49750 }, { "epoch": 0.3127531278848231, "grad_norm": 6.8943586349487305, "learning_rate": 1.7919541000645418e-05, "loss": 1.6281, "step": 49760 }, { "epoch": 0.31281598020152024, "grad_norm": 6.684320449829102, "learning_rate": 1.7919121899700765e-05, "loss": 1.5789, "step": 49770 }, { "epoch": 0.31287883251821735, "grad_norm": 7.835089206695557, "learning_rate": 1.791870279875611e-05, "loss": 1.9414, "step": 49780 }, { "epoch": 0.31294168483491447, "grad_norm": 8.6885347366333, "learning_rate": 1.7918283697811456e-05, "loss": 1.7312, "step": 49790 }, { "epoch": 0.3130045371516116, "grad_norm": 7.475964069366455, "learning_rate": 1.7917864596866803e-05, "loss": 1.8954, "step": 49800 }, { "epoch": 0.3130673894683087, "grad_norm": 6.240327835083008, "learning_rate": 1.791744549592215e-05, "loss": 1.8695, "step": 49810 }, { "epoch": 0.3131302417850058, "grad_norm": 8.597575187683105, "learning_rate": 1.7917026394977493e-05, "loss": 1.8715, "step": 49820 }, { "epoch": 0.31319309410170293, "grad_norm": 7.659453868865967, "learning_rate": 1.791660729403284e-05, "loss": 1.9177, "step": 49830 }, { "epoch": 0.3132559464184, "grad_norm": 7.241260528564453, "learning_rate": 1.7916188193088187e-05, "loss": 1.8623, "step": 49840 }, { "epoch": 0.3133187987350971, "grad_norm": 6.449772834777832, "learning_rate": 1.7915769092143535e-05, "loss": 2.0723, "step": 49850 }, { "epoch": 0.3133816510517942, "grad_norm": 6.881225109100342, "learning_rate": 1.791534999119888e-05, "loss": 1.8445, "step": 49860 }, { "epoch": 0.31344450336849133, "grad_norm": 6.207837104797363, "learning_rate": 1.791493089025423e-05, "loss": 2.0165, "step": 49870 }, { "epoch": 0.31350735568518845, "grad_norm": 7.346057415008545, "learning_rate": 1.7914511789309576e-05, "loss": 1.7559, "step": 49880 }, { "epoch": 0.31357020800188556, "grad_norm": 6.961569309234619, "learning_rate": 1.7914092688364923e-05, "loss": 1.8131, "step": 49890 }, { "epoch": 0.3136330603185827, "grad_norm": 7.810278415679932, "learning_rate": 1.791367358742027e-05, "loss": 1.8027, "step": 49900 }, { "epoch": 0.3136959126352798, "grad_norm": 6.59714412689209, "learning_rate": 1.7913254486475614e-05, "loss": 1.7132, "step": 49910 }, { "epoch": 0.3137587649519769, "grad_norm": 6.5777153968811035, "learning_rate": 1.791283538553096e-05, "loss": 1.7878, "step": 49920 }, { "epoch": 0.313821617268674, "grad_norm": 7.4422736167907715, "learning_rate": 1.7912416284586308e-05, "loss": 1.9738, "step": 49930 }, { "epoch": 0.31388446958537114, "grad_norm": 7.056987285614014, "learning_rate": 1.7911997183641655e-05, "loss": 2.0433, "step": 49940 }, { "epoch": 0.31394732190206825, "grad_norm": 6.372257232666016, "learning_rate": 1.7911578082697002e-05, "loss": 1.593, "step": 49950 }, { "epoch": 0.31401017421876537, "grad_norm": 7.499957084655762, "learning_rate": 1.7911158981752346e-05, "loss": 1.9129, "step": 49960 }, { "epoch": 0.31407302653546243, "grad_norm": 7.2810444831848145, "learning_rate": 1.7910739880807693e-05, "loss": 1.9261, "step": 49970 }, { "epoch": 0.31413587885215954, "grad_norm": 6.837697982788086, "learning_rate": 1.791032077986304e-05, "loss": 1.8936, "step": 49980 }, { "epoch": 0.31419873116885666, "grad_norm": 7.222091197967529, "learning_rate": 1.7909901678918387e-05, "loss": 1.9858, "step": 49990 }, { "epoch": 0.3142615834855538, "grad_norm": 7.050332069396973, "learning_rate": 1.790948257797373e-05, "loss": 1.9582, "step": 50000 }, { "epoch": 0.3143244358022509, "grad_norm": 7.38700532913208, "learning_rate": 1.7909063477029078e-05, "loss": 1.7374, "step": 50010 }, { "epoch": 0.314387288118948, "grad_norm": 7.6068501472473145, "learning_rate": 1.7908644376084425e-05, "loss": 1.9596, "step": 50020 }, { "epoch": 0.3144501404356451, "grad_norm": 6.117990493774414, "learning_rate": 1.790822527513977e-05, "loss": 1.8358, "step": 50030 }, { "epoch": 0.31451299275234224, "grad_norm": 7.923231601715088, "learning_rate": 1.7907806174195115e-05, "loss": 1.8882, "step": 50040 }, { "epoch": 0.31457584506903935, "grad_norm": 6.715137004852295, "learning_rate": 1.7907387073250462e-05, "loss": 1.7807, "step": 50050 }, { "epoch": 0.31463869738573647, "grad_norm": 6.373053550720215, "learning_rate": 1.790696797230581e-05, "loss": 1.9853, "step": 50060 }, { "epoch": 0.3147015497024336, "grad_norm": 6.411943435668945, "learning_rate": 1.7906548871361157e-05, "loss": 1.8439, "step": 50070 }, { "epoch": 0.3147644020191307, "grad_norm": 7.262099266052246, "learning_rate": 1.7906129770416504e-05, "loss": 1.8817, "step": 50080 }, { "epoch": 0.3148272543358278, "grad_norm": 6.456963062286377, "learning_rate": 1.790571066947185e-05, "loss": 1.767, "step": 50090 }, { "epoch": 0.31489010665252487, "grad_norm": 7.869819164276123, "learning_rate": 1.7905291568527198e-05, "loss": 2.1192, "step": 50100 }, { "epoch": 0.314952958969222, "grad_norm": 6.73624849319458, "learning_rate": 1.7904872467582545e-05, "loss": 1.6259, "step": 50110 }, { "epoch": 0.3150158112859191, "grad_norm": 6.531667232513428, "learning_rate": 1.7904453366637892e-05, "loss": 1.8657, "step": 50120 }, { "epoch": 0.3150786636026162, "grad_norm": 6.859806537628174, "learning_rate": 1.7904034265693236e-05, "loss": 1.9632, "step": 50130 }, { "epoch": 0.31514151591931333, "grad_norm": 8.881882667541504, "learning_rate": 1.7903615164748583e-05, "loss": 2.0181, "step": 50140 }, { "epoch": 0.31520436823601045, "grad_norm": 7.2561235427856445, "learning_rate": 1.790319606380393e-05, "loss": 1.9784, "step": 50150 }, { "epoch": 0.31526722055270756, "grad_norm": 6.737002372741699, "learning_rate": 1.7902776962859277e-05, "loss": 1.7579, "step": 50160 }, { "epoch": 0.3153300728694047, "grad_norm": 6.801862716674805, "learning_rate": 1.7902357861914624e-05, "loss": 1.812, "step": 50170 }, { "epoch": 0.3153929251861018, "grad_norm": 6.360828876495361, "learning_rate": 1.7901938760969968e-05, "loss": 1.9614, "step": 50180 }, { "epoch": 0.3154557775027989, "grad_norm": 6.732676029205322, "learning_rate": 1.7901519660025315e-05, "loss": 1.6523, "step": 50190 }, { "epoch": 0.315518629819496, "grad_norm": 7.454711437225342, "learning_rate": 1.7901100559080662e-05, "loss": 1.83, "step": 50200 }, { "epoch": 0.31558148213619314, "grad_norm": 6.141359806060791, "learning_rate": 1.790068145813601e-05, "loss": 1.6708, "step": 50210 }, { "epoch": 0.31564433445289025, "grad_norm": 9.075355529785156, "learning_rate": 1.7900262357191352e-05, "loss": 1.8219, "step": 50220 }, { "epoch": 0.3157071867695873, "grad_norm": 6.9223246574401855, "learning_rate": 1.78998432562467e-05, "loss": 1.8902, "step": 50230 }, { "epoch": 0.3157700390862844, "grad_norm": 7.6331562995910645, "learning_rate": 1.7899424155302047e-05, "loss": 1.9828, "step": 50240 }, { "epoch": 0.31583289140298154, "grad_norm": 8.22348403930664, "learning_rate": 1.7899005054357394e-05, "loss": 1.7224, "step": 50250 }, { "epoch": 0.31589574371967866, "grad_norm": 7.708650588989258, "learning_rate": 1.789858595341274e-05, "loss": 1.7092, "step": 50260 }, { "epoch": 0.31595859603637577, "grad_norm": 7.123536586761475, "learning_rate": 1.7898166852468088e-05, "loss": 1.736, "step": 50270 }, { "epoch": 0.3160214483530729, "grad_norm": 6.277158260345459, "learning_rate": 1.7897747751523435e-05, "loss": 1.794, "step": 50280 }, { "epoch": 0.31608430066977, "grad_norm": 7.302791118621826, "learning_rate": 1.789732865057878e-05, "loss": 1.8666, "step": 50290 }, { "epoch": 0.3161471529864671, "grad_norm": 6.053914546966553, "learning_rate": 1.7896909549634126e-05, "loss": 1.7688, "step": 50300 }, { "epoch": 0.31621000530316423, "grad_norm": 7.230604648590088, "learning_rate": 1.7896490448689473e-05, "loss": 1.78, "step": 50310 }, { "epoch": 0.31627285761986135, "grad_norm": 6.054196834564209, "learning_rate": 1.789607134774482e-05, "loss": 2.0002, "step": 50320 }, { "epoch": 0.31633570993655846, "grad_norm": 7.253074645996094, "learning_rate": 1.7895652246800167e-05, "loss": 1.9383, "step": 50330 }, { "epoch": 0.3163985622532556, "grad_norm": 6.574306964874268, "learning_rate": 1.7895233145855514e-05, "loss": 1.7574, "step": 50340 }, { "epoch": 0.31646141456995264, "grad_norm": 6.2237420082092285, "learning_rate": 1.7894814044910858e-05, "loss": 1.8644, "step": 50350 }, { "epoch": 0.31652426688664975, "grad_norm": 6.561565399169922, "learning_rate": 1.7894394943966205e-05, "loss": 2.0126, "step": 50360 }, { "epoch": 0.31658711920334687, "grad_norm": 6.336477756500244, "learning_rate": 1.7893975843021552e-05, "loss": 1.9635, "step": 50370 }, { "epoch": 0.316649971520044, "grad_norm": 8.280914306640625, "learning_rate": 1.78935567420769e-05, "loss": 1.8634, "step": 50380 }, { "epoch": 0.3167128238367411, "grad_norm": 7.454405784606934, "learning_rate": 1.7893137641132246e-05, "loss": 1.9546, "step": 50390 }, { "epoch": 0.3167756761534382, "grad_norm": 8.053701400756836, "learning_rate": 1.789271854018759e-05, "loss": 1.846, "step": 50400 }, { "epoch": 0.3168385284701353, "grad_norm": 7.830573081970215, "learning_rate": 1.7892299439242937e-05, "loss": 1.9054, "step": 50410 }, { "epoch": 0.31690138078683244, "grad_norm": 6.7762908935546875, "learning_rate": 1.7891880338298284e-05, "loss": 1.749, "step": 50420 }, { "epoch": 0.31696423310352956, "grad_norm": 7.049383163452148, "learning_rate": 1.789146123735363e-05, "loss": 1.7843, "step": 50430 }, { "epoch": 0.3170270854202267, "grad_norm": 6.608985424041748, "learning_rate": 1.7891042136408974e-05, "loss": 1.8268, "step": 50440 }, { "epoch": 0.3170899377369238, "grad_norm": 6.4440388679504395, "learning_rate": 1.789062303546432e-05, "loss": 1.9668, "step": 50450 }, { "epoch": 0.3171527900536209, "grad_norm": 6.358787536621094, "learning_rate": 1.789020393451967e-05, "loss": 1.7871, "step": 50460 }, { "epoch": 0.317215642370318, "grad_norm": 6.855319976806641, "learning_rate": 1.7889784833575016e-05, "loss": 1.9543, "step": 50470 }, { "epoch": 0.3172784946870151, "grad_norm": 6.997275352478027, "learning_rate": 1.7889365732630363e-05, "loss": 1.7011, "step": 50480 }, { "epoch": 0.3173413470037122, "grad_norm": 7.220674991607666, "learning_rate": 1.788894663168571e-05, "loss": 1.6187, "step": 50490 }, { "epoch": 0.3174041993204093, "grad_norm": 7.348036766052246, "learning_rate": 1.7888527530741057e-05, "loss": 1.9247, "step": 50500 }, { "epoch": 0.3174670516371064, "grad_norm": 7.006748199462891, "learning_rate": 1.7888108429796404e-05, "loss": 1.8376, "step": 50510 }, { "epoch": 0.31752990395380354, "grad_norm": 8.228358268737793, "learning_rate": 1.788768932885175e-05, "loss": 1.8103, "step": 50520 }, { "epoch": 0.31759275627050065, "grad_norm": 7.424829006195068, "learning_rate": 1.7887270227907095e-05, "loss": 1.7843, "step": 50530 }, { "epoch": 0.31765560858719777, "grad_norm": 6.651792526245117, "learning_rate": 1.7886851126962442e-05, "loss": 1.7476, "step": 50540 }, { "epoch": 0.3177184609038949, "grad_norm": 6.952655792236328, "learning_rate": 1.788643202601779e-05, "loss": 1.8087, "step": 50550 }, { "epoch": 0.317781313220592, "grad_norm": 7.491165637969971, "learning_rate": 1.7886012925073136e-05, "loss": 1.8656, "step": 50560 }, { "epoch": 0.3178441655372891, "grad_norm": 6.939685344696045, "learning_rate": 1.7885593824128483e-05, "loss": 1.8724, "step": 50570 }, { "epoch": 0.31790701785398623, "grad_norm": 6.545870780944824, "learning_rate": 1.7885174723183827e-05, "loss": 1.8012, "step": 50580 }, { "epoch": 0.31796987017068334, "grad_norm": 5.817804336547852, "learning_rate": 1.7884755622239174e-05, "loss": 1.7552, "step": 50590 }, { "epoch": 0.31803272248738046, "grad_norm": 6.606317043304443, "learning_rate": 1.788433652129452e-05, "loss": 1.7003, "step": 50600 }, { "epoch": 0.3180955748040775, "grad_norm": 7.408612251281738, "learning_rate": 1.7883917420349868e-05, "loss": 1.842, "step": 50610 }, { "epoch": 0.31815842712077463, "grad_norm": 7.27371883392334, "learning_rate": 1.788349831940521e-05, "loss": 1.8896, "step": 50620 }, { "epoch": 0.31822127943747175, "grad_norm": 7.040416717529297, "learning_rate": 1.788307921846056e-05, "loss": 1.9259, "step": 50630 }, { "epoch": 0.31828413175416886, "grad_norm": 7.271168231964111, "learning_rate": 1.7882660117515906e-05, "loss": 1.7213, "step": 50640 }, { "epoch": 0.318346984070866, "grad_norm": 6.543405532836914, "learning_rate": 1.7882241016571253e-05, "loss": 2.0821, "step": 50650 }, { "epoch": 0.3184098363875631, "grad_norm": 7.510614395141602, "learning_rate": 1.78818219156266e-05, "loss": 1.8321, "step": 50660 }, { "epoch": 0.3184726887042602, "grad_norm": 5.736481189727783, "learning_rate": 1.7881402814681944e-05, "loss": 1.779, "step": 50670 }, { "epoch": 0.3185355410209573, "grad_norm": 7.5980048179626465, "learning_rate": 1.788098371373729e-05, "loss": 1.7945, "step": 50680 }, { "epoch": 0.31859839333765444, "grad_norm": 6.607179164886475, "learning_rate": 1.7880606522887105e-05, "loss": 1.7979, "step": 50690 }, { "epoch": 0.31866124565435155, "grad_norm": 8.14891242980957, "learning_rate": 1.788018742194245e-05, "loss": 1.833, "step": 50700 }, { "epoch": 0.31872409797104867, "grad_norm": 7.936985015869141, "learning_rate": 1.7879768320997796e-05, "loss": 1.5912, "step": 50710 }, { "epoch": 0.3187869502877458, "grad_norm": 7.009968280792236, "learning_rate": 1.7879349220053143e-05, "loss": 1.8891, "step": 50720 }, { "epoch": 0.3188498026044429, "grad_norm": 5.5439863204956055, "learning_rate": 1.787893011910849e-05, "loss": 1.9336, "step": 50730 }, { "epoch": 0.31891265492113996, "grad_norm": 5.964497089385986, "learning_rate": 1.7878511018163834e-05, "loss": 1.9025, "step": 50740 }, { "epoch": 0.3189755072378371, "grad_norm": 7.623325824737549, "learning_rate": 1.787809191721918e-05, "loss": 1.8252, "step": 50750 }, { "epoch": 0.3190383595545342, "grad_norm": 7.806591987609863, "learning_rate": 1.7877672816274528e-05, "loss": 1.9262, "step": 50760 }, { "epoch": 0.3191012118712313, "grad_norm": 7.820315361022949, "learning_rate": 1.7877253715329875e-05, "loss": 1.9376, "step": 50770 }, { "epoch": 0.3191640641879284, "grad_norm": 7.506632328033447, "learning_rate": 1.7876834614385222e-05, "loss": 1.8455, "step": 50780 }, { "epoch": 0.31922691650462554, "grad_norm": 7.440194129943848, "learning_rate": 1.787641551344057e-05, "loss": 2.0055, "step": 50790 }, { "epoch": 0.31928976882132265, "grad_norm": 7.476811408996582, "learning_rate": 1.7875996412495916e-05, "loss": 1.8239, "step": 50800 }, { "epoch": 0.31935262113801977, "grad_norm": 6.638411998748779, "learning_rate": 1.7875577311551263e-05, "loss": 1.6158, "step": 50810 }, { "epoch": 0.3194154734547169, "grad_norm": 7.170370578765869, "learning_rate": 1.787515821060661e-05, "loss": 1.7892, "step": 50820 }, { "epoch": 0.319478325771414, "grad_norm": 6.0496931076049805, "learning_rate": 1.7874739109661954e-05, "loss": 1.8532, "step": 50830 }, { "epoch": 0.3195411780881111, "grad_norm": 7.209271430969238, "learning_rate": 1.78743200087173e-05, "loss": 2.1903, "step": 50840 }, { "epoch": 0.3196040304048082, "grad_norm": 6.790227890014648, "learning_rate": 1.7873900907772648e-05, "loss": 1.9094, "step": 50850 }, { "epoch": 0.3196668827215053, "grad_norm": 6.356524467468262, "learning_rate": 1.7873481806827995e-05, "loss": 1.8637, "step": 50860 }, { "epoch": 0.3197297350382024, "grad_norm": 6.975244045257568, "learning_rate": 1.787306270588334e-05, "loss": 1.8636, "step": 50870 }, { "epoch": 0.3197925873548995, "grad_norm": 6.814340114593506, "learning_rate": 1.7872643604938686e-05, "loss": 1.7427, "step": 50880 }, { "epoch": 0.31985543967159663, "grad_norm": 7.969485759735107, "learning_rate": 1.7872224503994033e-05, "loss": 1.7993, "step": 50890 }, { "epoch": 0.31991829198829375, "grad_norm": 7.632097244262695, "learning_rate": 1.787180540304938e-05, "loss": 1.7034, "step": 50900 }, { "epoch": 0.31998114430499086, "grad_norm": 7.777673721313477, "learning_rate": 1.7871386302104727e-05, "loss": 1.7781, "step": 50910 }, { "epoch": 0.320043996621688, "grad_norm": 6.5799384117126465, "learning_rate": 1.787096720116007e-05, "loss": 1.8563, "step": 50920 }, { "epoch": 0.3201068489383851, "grad_norm": 7.1976447105407715, "learning_rate": 1.7870548100215418e-05, "loss": 1.943, "step": 50930 }, { "epoch": 0.3201697012550822, "grad_norm": 6.877608299255371, "learning_rate": 1.7870128999270765e-05, "loss": 1.9797, "step": 50940 }, { "epoch": 0.3202325535717793, "grad_norm": 6.036958694458008, "learning_rate": 1.7869709898326112e-05, "loss": 1.6093, "step": 50950 }, { "epoch": 0.32029540588847644, "grad_norm": 5.940581321716309, "learning_rate": 1.786929079738146e-05, "loss": 1.6609, "step": 50960 }, { "epoch": 0.32035825820517355, "grad_norm": 7.371578216552734, "learning_rate": 1.7868871696436806e-05, "loss": 2.0982, "step": 50970 }, { "epoch": 0.32042111052187067, "grad_norm": 6.936854362487793, "learning_rate": 1.786845259549215e-05, "loss": 1.7427, "step": 50980 }, { "epoch": 0.3204839628385677, "grad_norm": 5.890512943267822, "learning_rate": 1.7868033494547497e-05, "loss": 1.8179, "step": 50990 }, { "epoch": 0.32054681515526484, "grad_norm": 7.359219074249268, "learning_rate": 1.7867614393602844e-05, "loss": 1.9621, "step": 51000 }, { "epoch": 0.32060966747196196, "grad_norm": 8.057191848754883, "learning_rate": 1.786719529265819e-05, "loss": 1.8713, "step": 51010 }, { "epoch": 0.32067251978865907, "grad_norm": 6.030718803405762, "learning_rate": 1.786677619171354e-05, "loss": 1.8131, "step": 51020 }, { "epoch": 0.3207353721053562, "grad_norm": 5.996182441711426, "learning_rate": 1.7866357090768885e-05, "loss": 1.8038, "step": 51030 }, { "epoch": 0.3207982244220533, "grad_norm": 6.6590046882629395, "learning_rate": 1.7865937989824232e-05, "loss": 1.6819, "step": 51040 }, { "epoch": 0.3208610767387504, "grad_norm": 6.439087867736816, "learning_rate": 1.7865518888879576e-05, "loss": 1.5105, "step": 51050 }, { "epoch": 0.32092392905544753, "grad_norm": 7.181485652923584, "learning_rate": 1.7865099787934923e-05, "loss": 1.6931, "step": 51060 }, { "epoch": 0.32098678137214465, "grad_norm": 6.688985347747803, "learning_rate": 1.786468068699027e-05, "loss": 1.7462, "step": 51070 }, { "epoch": 0.32104963368884176, "grad_norm": 7.273684501647949, "learning_rate": 1.7864261586045617e-05, "loss": 1.6931, "step": 51080 }, { "epoch": 0.3211124860055389, "grad_norm": 6.832235813140869, "learning_rate": 1.7863842485100964e-05, "loss": 1.8136, "step": 51090 }, { "epoch": 0.321175338322236, "grad_norm": 7.846092224121094, "learning_rate": 1.7863423384156308e-05, "loss": 2.0732, "step": 51100 }, { "epoch": 0.3212381906389331, "grad_norm": 8.31198787689209, "learning_rate": 1.7863004283211655e-05, "loss": 1.8665, "step": 51110 }, { "epoch": 0.32130104295563017, "grad_norm": 6.93269157409668, "learning_rate": 1.7862585182267002e-05, "loss": 1.6924, "step": 51120 }, { "epoch": 0.3213638952723273, "grad_norm": 5.975748538970947, "learning_rate": 1.786216608132235e-05, "loss": 1.871, "step": 51130 }, { "epoch": 0.3214267475890244, "grad_norm": 7.039586067199707, "learning_rate": 1.7861746980377693e-05, "loss": 1.6259, "step": 51140 }, { "epoch": 0.3214895999057215, "grad_norm": 7.075222969055176, "learning_rate": 1.786132787943304e-05, "loss": 1.7098, "step": 51150 }, { "epoch": 0.3215524522224186, "grad_norm": 7.806368350982666, "learning_rate": 1.7860908778488387e-05, "loss": 1.7068, "step": 51160 }, { "epoch": 0.32161530453911574, "grad_norm": 7.327364921569824, "learning_rate": 1.7860489677543734e-05, "loss": 1.6505, "step": 51170 }, { "epoch": 0.32167815685581286, "grad_norm": 7.286188125610352, "learning_rate": 1.786007057659908e-05, "loss": 1.9491, "step": 51180 }, { "epoch": 0.32174100917251, "grad_norm": 6.699967384338379, "learning_rate": 1.785965147565443e-05, "loss": 1.7551, "step": 51190 }, { "epoch": 0.3218038614892071, "grad_norm": 7.496636867523193, "learning_rate": 1.7859232374709775e-05, "loss": 1.6995, "step": 51200 }, { "epoch": 0.3218667138059042, "grad_norm": 6.468504428863525, "learning_rate": 1.7858813273765123e-05, "loss": 1.9599, "step": 51210 }, { "epoch": 0.3219295661226013, "grad_norm": 6.733185768127441, "learning_rate": 1.785839417282047e-05, "loss": 1.6771, "step": 51220 }, { "epoch": 0.32199241843929843, "grad_norm": 7.011502265930176, "learning_rate": 1.7857975071875813e-05, "loss": 1.6548, "step": 51230 }, { "epoch": 0.32205527075599555, "grad_norm": 8.619169235229492, "learning_rate": 1.785755597093116e-05, "loss": 2.0093, "step": 51240 }, { "epoch": 0.3221181230726926, "grad_norm": 7.195321083068848, "learning_rate": 1.7857136869986507e-05, "loss": 1.6438, "step": 51250 }, { "epoch": 0.3221809753893897, "grad_norm": 6.684089660644531, "learning_rate": 1.7856717769041854e-05, "loss": 1.756, "step": 51260 }, { "epoch": 0.32224382770608684, "grad_norm": 6.9751434326171875, "learning_rate": 1.7856298668097198e-05, "loss": 2.0399, "step": 51270 }, { "epoch": 0.32230668002278395, "grad_norm": 6.216689109802246, "learning_rate": 1.7855879567152545e-05, "loss": 1.8779, "step": 51280 }, { "epoch": 0.32236953233948107, "grad_norm": 7.417315483093262, "learning_rate": 1.7855460466207892e-05, "loss": 1.8112, "step": 51290 }, { "epoch": 0.3224323846561782, "grad_norm": 7.832452774047852, "learning_rate": 1.785504136526324e-05, "loss": 2.0227, "step": 51300 }, { "epoch": 0.3224952369728753, "grad_norm": 7.068687438964844, "learning_rate": 1.7854622264318586e-05, "loss": 1.7686, "step": 51310 }, { "epoch": 0.3225580892895724, "grad_norm": 7.699952602386475, "learning_rate": 1.785420316337393e-05, "loss": 1.7498, "step": 51320 }, { "epoch": 0.32262094160626953, "grad_norm": 6.384725570678711, "learning_rate": 1.7853784062429277e-05, "loss": 1.6738, "step": 51330 }, { "epoch": 0.32268379392296664, "grad_norm": 6.444098949432373, "learning_rate": 1.7853364961484624e-05, "loss": 1.8666, "step": 51340 }, { "epoch": 0.32274664623966376, "grad_norm": 6.82034969329834, "learning_rate": 1.785294586053997e-05, "loss": 1.6276, "step": 51350 }, { "epoch": 0.3228094985563609, "grad_norm": 6.927557945251465, "learning_rate": 1.7852526759595315e-05, "loss": 1.8343, "step": 51360 }, { "epoch": 0.32287235087305793, "grad_norm": 6.251853942871094, "learning_rate": 1.7852107658650662e-05, "loss": 1.819, "step": 51370 }, { "epoch": 0.32293520318975505, "grad_norm": 5.410449028015137, "learning_rate": 1.785168855770601e-05, "loss": 1.6984, "step": 51380 }, { "epoch": 0.32299805550645216, "grad_norm": 6.403854846954346, "learning_rate": 1.7851269456761356e-05, "loss": 2.1146, "step": 51390 }, { "epoch": 0.3230609078231493, "grad_norm": 6.982732772827148, "learning_rate": 1.7850850355816703e-05, "loss": 1.8227, "step": 51400 }, { "epoch": 0.3231237601398464, "grad_norm": 7.60330057144165, "learning_rate": 1.785043125487205e-05, "loss": 1.5598, "step": 51410 }, { "epoch": 0.3231866124565435, "grad_norm": 6.407942771911621, "learning_rate": 1.7850012153927397e-05, "loss": 1.741, "step": 51420 }, { "epoch": 0.3232494647732406, "grad_norm": 6.775578498840332, "learning_rate": 1.7849593052982745e-05, "loss": 1.7673, "step": 51430 }, { "epoch": 0.32331231708993774, "grad_norm": 6.695420265197754, "learning_rate": 1.784917395203809e-05, "loss": 1.9047, "step": 51440 }, { "epoch": 0.32337516940663485, "grad_norm": 6.92299222946167, "learning_rate": 1.7848754851093435e-05, "loss": 1.7528, "step": 51450 }, { "epoch": 0.32343802172333197, "grad_norm": 6.831806182861328, "learning_rate": 1.7848335750148782e-05, "loss": 1.835, "step": 51460 }, { "epoch": 0.3235008740400291, "grad_norm": 7.060969829559326, "learning_rate": 1.784791664920413e-05, "loss": 1.7881, "step": 51470 }, { "epoch": 0.3235637263567262, "grad_norm": 6.520586013793945, "learning_rate": 1.7847497548259476e-05, "loss": 1.8229, "step": 51480 }, { "epoch": 0.3236265786734233, "grad_norm": 7.508065700531006, "learning_rate": 1.784707844731482e-05, "loss": 1.8881, "step": 51490 }, { "epoch": 0.3236894309901204, "grad_norm": 7.780220031738281, "learning_rate": 1.7846659346370167e-05, "loss": 1.9137, "step": 51500 }, { "epoch": 0.3237522833068175, "grad_norm": 6.283895015716553, "learning_rate": 1.7846240245425514e-05, "loss": 1.8163, "step": 51510 }, { "epoch": 0.3238151356235146, "grad_norm": 7.579999923706055, "learning_rate": 1.784582114448086e-05, "loss": 1.7848, "step": 51520 }, { "epoch": 0.3238779879402117, "grad_norm": 6.87309455871582, "learning_rate": 1.784540204353621e-05, "loss": 1.7596, "step": 51530 }, { "epoch": 0.32394084025690884, "grad_norm": 8.016010284423828, "learning_rate": 1.7844982942591552e-05, "loss": 1.8643, "step": 51540 }, { "epoch": 0.32400369257360595, "grad_norm": 6.44862699508667, "learning_rate": 1.78445638416469e-05, "loss": 1.7422, "step": 51550 }, { "epoch": 0.32406654489030307, "grad_norm": 7.54352331161499, "learning_rate": 1.7844144740702246e-05, "loss": 1.8573, "step": 51560 }, { "epoch": 0.3241293972070002, "grad_norm": 7.547618389129639, "learning_rate": 1.7843725639757593e-05, "loss": 1.6722, "step": 51570 }, { "epoch": 0.3241922495236973, "grad_norm": 5.8921613693237305, "learning_rate": 1.784330653881294e-05, "loss": 1.9092, "step": 51580 }, { "epoch": 0.3242551018403944, "grad_norm": 6.372741222381592, "learning_rate": 1.7842887437868287e-05, "loss": 1.8572, "step": 51590 }, { "epoch": 0.3243179541570915, "grad_norm": 7.012420654296875, "learning_rate": 1.7842468336923635e-05, "loss": 1.8991, "step": 51600 }, { "epoch": 0.32438080647378864, "grad_norm": 7.101135730743408, "learning_rate": 1.7842049235978978e-05, "loss": 1.8706, "step": 51610 }, { "epoch": 0.32444365879048576, "grad_norm": 6.085858345031738, "learning_rate": 1.7841630135034325e-05, "loss": 1.577, "step": 51620 }, { "epoch": 0.3245065111071828, "grad_norm": 6.90290641784668, "learning_rate": 1.7841211034089672e-05, "loss": 1.915, "step": 51630 }, { "epoch": 0.32456936342387993, "grad_norm": 6.020164966583252, "learning_rate": 1.784079193314502e-05, "loss": 1.887, "step": 51640 }, { "epoch": 0.32463221574057705, "grad_norm": 7.010438442230225, "learning_rate": 1.7840372832200367e-05, "loss": 1.9698, "step": 51650 }, { "epoch": 0.32469506805727416, "grad_norm": 7.063260555267334, "learning_rate": 1.7839953731255714e-05, "loss": 1.5653, "step": 51660 }, { "epoch": 0.3247579203739713, "grad_norm": 7.6758952140808105, "learning_rate": 1.7839534630311057e-05, "loss": 1.7361, "step": 51670 }, { "epoch": 0.3248207726906684, "grad_norm": 6.925780773162842, "learning_rate": 1.7839115529366404e-05, "loss": 1.911, "step": 51680 }, { "epoch": 0.3248836250073655, "grad_norm": 7.4563751220703125, "learning_rate": 1.783869642842175e-05, "loss": 1.8287, "step": 51690 }, { "epoch": 0.3249464773240626, "grad_norm": 7.764344692230225, "learning_rate": 1.78382773274771e-05, "loss": 1.9369, "step": 51700 }, { "epoch": 0.32500932964075974, "grad_norm": 7.151037216186523, "learning_rate": 1.7837858226532446e-05, "loss": 1.8742, "step": 51710 }, { "epoch": 0.32507218195745685, "grad_norm": 6.498341083526611, "learning_rate": 1.783743912558779e-05, "loss": 1.9373, "step": 51720 }, { "epoch": 0.32513503427415397, "grad_norm": 6.145724296569824, "learning_rate": 1.7837020024643136e-05, "loss": 1.7477, "step": 51730 }, { "epoch": 0.3251978865908511, "grad_norm": 9.115324020385742, "learning_rate": 1.7836600923698483e-05, "loss": 1.7087, "step": 51740 }, { "epoch": 0.3252607389075482, "grad_norm": 5.0653486251831055, "learning_rate": 1.783618182275383e-05, "loss": 1.7502, "step": 51750 }, { "epoch": 0.32532359122424526, "grad_norm": 6.756585597991943, "learning_rate": 1.7835762721809174e-05, "loss": 1.7235, "step": 51760 }, { "epoch": 0.32538644354094237, "grad_norm": 7.557977199554443, "learning_rate": 1.783534362086452e-05, "loss": 1.8098, "step": 51770 }, { "epoch": 0.3254492958576395, "grad_norm": 6.938634872436523, "learning_rate": 1.7834924519919868e-05, "loss": 2.329, "step": 51780 }, { "epoch": 0.3255121481743366, "grad_norm": 7.438292026519775, "learning_rate": 1.7834505418975215e-05, "loss": 1.9686, "step": 51790 }, { "epoch": 0.3255750004910337, "grad_norm": 6.877195358276367, "learning_rate": 1.7834086318030562e-05, "loss": 1.7302, "step": 51800 }, { "epoch": 0.32563785280773083, "grad_norm": 7.628830909729004, "learning_rate": 1.783366721708591e-05, "loss": 1.9505, "step": 51810 }, { "epoch": 0.32570070512442795, "grad_norm": 7.419105529785156, "learning_rate": 1.7833248116141257e-05, "loss": 1.9519, "step": 51820 }, { "epoch": 0.32576355744112506, "grad_norm": 7.3178019523620605, "learning_rate": 1.7832829015196604e-05, "loss": 1.7879, "step": 51830 }, { "epoch": 0.3258264097578222, "grad_norm": 6.704282760620117, "learning_rate": 1.783240991425195e-05, "loss": 1.9448, "step": 51840 }, { "epoch": 0.3258892620745193, "grad_norm": 7.331361770629883, "learning_rate": 1.7831990813307294e-05, "loss": 1.6215, "step": 51850 }, { "epoch": 0.3259521143912164, "grad_norm": 7.451310157775879, "learning_rate": 1.783157171236264e-05, "loss": 1.7762, "step": 51860 }, { "epoch": 0.3260149667079135, "grad_norm": 8.309782981872559, "learning_rate": 1.783115261141799e-05, "loss": 1.9013, "step": 51870 }, { "epoch": 0.32607781902461064, "grad_norm": 7.002206325531006, "learning_rate": 1.7830733510473336e-05, "loss": 1.8646, "step": 51880 }, { "epoch": 0.3261406713413077, "grad_norm": 8.223302841186523, "learning_rate": 1.783031440952868e-05, "loss": 1.7973, "step": 51890 }, { "epoch": 0.3262035236580048, "grad_norm": 8.51438045501709, "learning_rate": 1.7829895308584026e-05, "loss": 1.9426, "step": 51900 }, { "epoch": 0.3262663759747019, "grad_norm": 7.0320563316345215, "learning_rate": 1.7829476207639373e-05, "loss": 1.8232, "step": 51910 }, { "epoch": 0.32632922829139904, "grad_norm": 8.625299453735352, "learning_rate": 1.782905710669472e-05, "loss": 1.8611, "step": 51920 }, { "epoch": 0.32639208060809616, "grad_norm": 6.61185359954834, "learning_rate": 1.7828638005750068e-05, "loss": 1.6157, "step": 51930 }, { "epoch": 0.3264549329247933, "grad_norm": 6.66688871383667, "learning_rate": 1.782821890480541e-05, "loss": 1.7748, "step": 51940 }, { "epoch": 0.3265177852414904, "grad_norm": 7.54803991317749, "learning_rate": 1.782779980386076e-05, "loss": 1.8101, "step": 51950 }, { "epoch": 0.3265806375581875, "grad_norm": 7.581246852874756, "learning_rate": 1.7827380702916105e-05, "loss": 1.7941, "step": 51960 }, { "epoch": 0.3266434898748846, "grad_norm": 7.599559783935547, "learning_rate": 1.7826961601971452e-05, "loss": 1.763, "step": 51970 }, { "epoch": 0.32670634219158173, "grad_norm": 6.512584686279297, "learning_rate": 1.78265425010268e-05, "loss": 1.7339, "step": 51980 }, { "epoch": 0.32676919450827885, "grad_norm": 6.6277055740356445, "learning_rate": 1.7826123400082143e-05, "loss": 1.7485, "step": 51990 }, { "epoch": 0.32683204682497596, "grad_norm": 8.265957832336426, "learning_rate": 1.782570429913749e-05, "loss": 1.5965, "step": 52000 }, { "epoch": 0.326894899141673, "grad_norm": 8.15899658203125, "learning_rate": 1.7825285198192837e-05, "loss": 1.8245, "step": 52010 }, { "epoch": 0.32695775145837014, "grad_norm": 6.8389081954956055, "learning_rate": 1.7824866097248184e-05, "loss": 1.6119, "step": 52020 }, { "epoch": 0.32702060377506725, "grad_norm": 7.083780765533447, "learning_rate": 1.782444699630353e-05, "loss": 1.774, "step": 52030 }, { "epoch": 0.32708345609176437, "grad_norm": 7.419913291931152, "learning_rate": 1.782402789535888e-05, "loss": 1.6966, "step": 52040 }, { "epoch": 0.3271463084084615, "grad_norm": 5.5379462242126465, "learning_rate": 1.7823608794414226e-05, "loss": 2.0746, "step": 52050 }, { "epoch": 0.3272091607251586, "grad_norm": 7.2828192710876465, "learning_rate": 1.7823189693469573e-05, "loss": 1.8912, "step": 52060 }, { "epoch": 0.3272720130418557, "grad_norm": 8.095076560974121, "learning_rate": 1.7822770592524916e-05, "loss": 2.0495, "step": 52070 }, { "epoch": 0.32733486535855283, "grad_norm": 7.0146484375, "learning_rate": 1.7822351491580263e-05, "loss": 1.7892, "step": 52080 }, { "epoch": 0.32739771767524994, "grad_norm": 7.499518871307373, "learning_rate": 1.782193239063561e-05, "loss": 1.6955, "step": 52090 }, { "epoch": 0.32746056999194706, "grad_norm": 6.1241774559021, "learning_rate": 1.7821513289690958e-05, "loss": 1.8733, "step": 52100 }, { "epoch": 0.3275234223086442, "grad_norm": 6.723125457763672, "learning_rate": 1.78210941887463e-05, "loss": 1.6152, "step": 52110 }, { "epoch": 0.3275862746253413, "grad_norm": 6.300307273864746, "learning_rate": 1.782067508780165e-05, "loss": 1.7901, "step": 52120 }, { "epoch": 0.3276491269420384, "grad_norm": 6.7384748458862305, "learning_rate": 1.7820255986856995e-05, "loss": 1.8631, "step": 52130 }, { "epoch": 0.32771197925873546, "grad_norm": 7.527587890625, "learning_rate": 1.7819836885912342e-05, "loss": 1.7774, "step": 52140 }, { "epoch": 0.3277748315754326, "grad_norm": 7.368362903594971, "learning_rate": 1.781941778496769e-05, "loss": 2.0007, "step": 52150 }, { "epoch": 0.3278376838921297, "grad_norm": 8.34139347076416, "learning_rate": 1.7818998684023033e-05, "loss": 1.7857, "step": 52160 }, { "epoch": 0.3279005362088268, "grad_norm": 7.319367408752441, "learning_rate": 1.781857958307838e-05, "loss": 1.8501, "step": 52170 }, { "epoch": 0.3279633885255239, "grad_norm": 7.252373218536377, "learning_rate": 1.7818160482133727e-05, "loss": 1.8423, "step": 52180 }, { "epoch": 0.32802624084222104, "grad_norm": 7.950265884399414, "learning_rate": 1.7817741381189074e-05, "loss": 1.7884, "step": 52190 }, { "epoch": 0.32808909315891815, "grad_norm": 6.485204219818115, "learning_rate": 1.781732228024442e-05, "loss": 1.7036, "step": 52200 }, { "epoch": 0.32815194547561527, "grad_norm": 7.385952949523926, "learning_rate": 1.781690317929977e-05, "loss": 1.7588, "step": 52210 }, { "epoch": 0.3282147977923124, "grad_norm": 6.963294506072998, "learning_rate": 1.7816484078355116e-05, "loss": 1.6952, "step": 52220 }, { "epoch": 0.3282776501090095, "grad_norm": 8.078579902648926, "learning_rate": 1.781606497741046e-05, "loss": 1.8461, "step": 52230 }, { "epoch": 0.3283405024257066, "grad_norm": 7.735701084136963, "learning_rate": 1.7815645876465806e-05, "loss": 1.9047, "step": 52240 }, { "epoch": 0.32840335474240373, "grad_norm": 6.707212924957275, "learning_rate": 1.7815226775521153e-05, "loss": 2.0122, "step": 52250 }, { "epoch": 0.32846620705910085, "grad_norm": 8.503338813781738, "learning_rate": 1.78148076745765e-05, "loss": 1.7775, "step": 52260 }, { "epoch": 0.3285290593757979, "grad_norm": 7.937143325805664, "learning_rate": 1.7814388573631848e-05, "loss": 1.9494, "step": 52270 }, { "epoch": 0.328591911692495, "grad_norm": 8.503870964050293, "learning_rate": 1.7813969472687195e-05, "loss": 1.8309, "step": 52280 }, { "epoch": 0.32865476400919214, "grad_norm": 6.416479587554932, "learning_rate": 1.781355037174254e-05, "loss": 1.6226, "step": 52290 }, { "epoch": 0.32871761632588925, "grad_norm": 7.439314365386963, "learning_rate": 1.7813131270797885e-05, "loss": 1.6882, "step": 52300 }, { "epoch": 0.32878046864258637, "grad_norm": 6.373034477233887, "learning_rate": 1.7812712169853233e-05, "loss": 1.7671, "step": 52310 }, { "epoch": 0.3288433209592835, "grad_norm": 6.135987281799316, "learning_rate": 1.781229306890858e-05, "loss": 2.0315, "step": 52320 }, { "epoch": 0.3289061732759806, "grad_norm": 8.025102615356445, "learning_rate": 1.7811873967963927e-05, "loss": 1.703, "step": 52330 }, { "epoch": 0.3289690255926777, "grad_norm": 8.254257202148438, "learning_rate": 1.781145486701927e-05, "loss": 1.8818, "step": 52340 }, { "epoch": 0.3290318779093748, "grad_norm": 6.1511688232421875, "learning_rate": 1.7811035766074617e-05, "loss": 1.8207, "step": 52350 }, { "epoch": 0.32909473022607194, "grad_norm": 6.602275371551514, "learning_rate": 1.7810616665129964e-05, "loss": 1.8641, "step": 52360 }, { "epoch": 0.32915758254276906, "grad_norm": 7.317796230316162, "learning_rate": 1.781019756418531e-05, "loss": 1.7598, "step": 52370 }, { "epoch": 0.32922043485946617, "grad_norm": 6.873623847961426, "learning_rate": 1.7809778463240655e-05, "loss": 1.9122, "step": 52380 }, { "epoch": 0.3292832871761633, "grad_norm": 7.018415451049805, "learning_rate": 1.7809359362296002e-05, "loss": 1.8689, "step": 52390 }, { "epoch": 0.32934613949286035, "grad_norm": 8.09970760345459, "learning_rate": 1.780894026135135e-05, "loss": 1.7076, "step": 52400 }, { "epoch": 0.32940899180955746, "grad_norm": 7.095788955688477, "learning_rate": 1.7808521160406696e-05, "loss": 1.8184, "step": 52410 }, { "epoch": 0.3294718441262546, "grad_norm": 6.006564617156982, "learning_rate": 1.7808102059462044e-05, "loss": 1.798, "step": 52420 }, { "epoch": 0.3295346964429517, "grad_norm": 7.368823528289795, "learning_rate": 1.780768295851739e-05, "loss": 2.0833, "step": 52430 }, { "epoch": 0.3295975487596488, "grad_norm": 6.977514266967773, "learning_rate": 1.7807263857572738e-05, "loss": 1.7669, "step": 52440 }, { "epoch": 0.3296604010763459, "grad_norm": 8.111364364624023, "learning_rate": 1.7806844756628085e-05, "loss": 1.7443, "step": 52450 }, { "epoch": 0.32972325339304304, "grad_norm": 7.839640140533447, "learning_rate": 1.7806425655683432e-05, "loss": 1.7325, "step": 52460 }, { "epoch": 0.32978610570974015, "grad_norm": 5.942218780517578, "learning_rate": 1.7806006554738775e-05, "loss": 1.6901, "step": 52470 }, { "epoch": 0.32984895802643727, "grad_norm": 7.037097930908203, "learning_rate": 1.7805587453794123e-05, "loss": 2.0469, "step": 52480 }, { "epoch": 0.3299118103431344, "grad_norm": 7.099818229675293, "learning_rate": 1.780516835284947e-05, "loss": 1.8316, "step": 52490 }, { "epoch": 0.3299746626598315, "grad_norm": 8.093073844909668, "learning_rate": 1.7804749251904817e-05, "loss": 1.8714, "step": 52500 }, { "epoch": 0.3300375149765286, "grad_norm": 6.274770736694336, "learning_rate": 1.780433015096016e-05, "loss": 1.8357, "step": 52510 }, { "epoch": 0.33010036729322567, "grad_norm": 5.988602161407471, "learning_rate": 1.7803911050015507e-05, "loss": 1.6467, "step": 52520 }, { "epoch": 0.3301632196099228, "grad_norm": 6.645780563354492, "learning_rate": 1.7803491949070855e-05, "loss": 1.7149, "step": 52530 }, { "epoch": 0.3302260719266199, "grad_norm": 6.206699848175049, "learning_rate": 1.78030728481262e-05, "loss": 1.7399, "step": 52540 }, { "epoch": 0.330288924243317, "grad_norm": 6.97754430770874, "learning_rate": 1.780265374718155e-05, "loss": 2.0122, "step": 52550 }, { "epoch": 0.33035177656001413, "grad_norm": 7.309817790985107, "learning_rate": 1.7802234646236892e-05, "loss": 1.5856, "step": 52560 }, { "epoch": 0.33041462887671125, "grad_norm": 6.189268589019775, "learning_rate": 1.780181554529224e-05, "loss": 1.9549, "step": 52570 }, { "epoch": 0.33047748119340836, "grad_norm": 7.027437210083008, "learning_rate": 1.7801396444347586e-05, "loss": 1.5906, "step": 52580 }, { "epoch": 0.3305403335101055, "grad_norm": 7.115314960479736, "learning_rate": 1.7800977343402934e-05, "loss": 1.7271, "step": 52590 }, { "epoch": 0.3306031858268026, "grad_norm": 7.591831684112549, "learning_rate": 1.780055824245828e-05, "loss": 2.0359, "step": 52600 }, { "epoch": 0.3306660381434997, "grad_norm": 7.378714561462402, "learning_rate": 1.7800139141513624e-05, "loss": 1.735, "step": 52610 }, { "epoch": 0.3307288904601968, "grad_norm": 6.624408721923828, "learning_rate": 1.779972004056897e-05, "loss": 1.8707, "step": 52620 }, { "epoch": 0.33079174277689394, "grad_norm": 7.652662754058838, "learning_rate": 1.779930093962432e-05, "loss": 1.682, "step": 52630 }, { "epoch": 0.33085459509359105, "grad_norm": 8.113582611083984, "learning_rate": 1.7798881838679666e-05, "loss": 1.8986, "step": 52640 }, { "epoch": 0.3309174474102881, "grad_norm": 5.646933555603027, "learning_rate": 1.7798462737735013e-05, "loss": 1.6427, "step": 52650 }, { "epoch": 0.3309802997269852, "grad_norm": 7.363295555114746, "learning_rate": 1.779804363679036e-05, "loss": 1.763, "step": 52660 }, { "epoch": 0.33104315204368234, "grad_norm": 6.794223785400391, "learning_rate": 1.7797624535845707e-05, "loss": 1.8697, "step": 52670 }, { "epoch": 0.33110600436037946, "grad_norm": 7.367708206176758, "learning_rate": 1.7797205434901054e-05, "loss": 1.8522, "step": 52680 }, { "epoch": 0.3311688566770766, "grad_norm": 7.230896949768066, "learning_rate": 1.7796786333956397e-05, "loss": 1.8344, "step": 52690 }, { "epoch": 0.3312317089937737, "grad_norm": 6.658416748046875, "learning_rate": 1.7796367233011745e-05, "loss": 1.7248, "step": 52700 }, { "epoch": 0.3312945613104708, "grad_norm": 6.473068714141846, "learning_rate": 1.779594813206709e-05, "loss": 1.8158, "step": 52710 }, { "epoch": 0.3313574136271679, "grad_norm": 5.824365615844727, "learning_rate": 1.779552903112244e-05, "loss": 1.8317, "step": 52720 }, { "epoch": 0.33142026594386503, "grad_norm": 7.017134189605713, "learning_rate": 1.7795109930177782e-05, "loss": 1.7481, "step": 52730 }, { "epoch": 0.33148311826056215, "grad_norm": 6.68671178817749, "learning_rate": 1.779469082923313e-05, "loss": 1.8106, "step": 52740 }, { "epoch": 0.33154597057725926, "grad_norm": 5.893295764923096, "learning_rate": 1.7794271728288477e-05, "loss": 1.636, "step": 52750 }, { "epoch": 0.3316088228939564, "grad_norm": 7.513364791870117, "learning_rate": 1.7793852627343824e-05, "loss": 1.7196, "step": 52760 }, { "epoch": 0.3316716752106535, "grad_norm": 7.483827590942383, "learning_rate": 1.779343352639917e-05, "loss": 1.8745, "step": 52770 }, { "epoch": 0.33173452752735055, "grad_norm": 7.519413948059082, "learning_rate": 1.7793014425454514e-05, "loss": 1.8776, "step": 52780 }, { "epoch": 0.33179737984404767, "grad_norm": 8.790800094604492, "learning_rate": 1.779259532450986e-05, "loss": 1.8679, "step": 52790 }, { "epoch": 0.3318602321607448, "grad_norm": 7.194511890411377, "learning_rate": 1.779217622356521e-05, "loss": 2.0707, "step": 52800 }, { "epoch": 0.3319230844774419, "grad_norm": 6.432770252227783, "learning_rate": 1.7791757122620556e-05, "loss": 1.7891, "step": 52810 }, { "epoch": 0.331985936794139, "grad_norm": 7.2645111083984375, "learning_rate": 1.7791338021675903e-05, "loss": 1.7714, "step": 52820 }, { "epoch": 0.33204878911083613, "grad_norm": 8.450857162475586, "learning_rate": 1.779091892073125e-05, "loss": 1.7657, "step": 52830 }, { "epoch": 0.33211164142753324, "grad_norm": 7.276505947113037, "learning_rate": 1.7790499819786597e-05, "loss": 1.8322, "step": 52840 }, { "epoch": 0.33217449374423036, "grad_norm": 6.824415683746338, "learning_rate": 1.7790080718841944e-05, "loss": 1.7138, "step": 52850 }, { "epoch": 0.3322373460609275, "grad_norm": 6.38921594619751, "learning_rate": 1.7789661617897288e-05, "loss": 1.7159, "step": 52860 }, { "epoch": 0.3323001983776246, "grad_norm": 6.562678813934326, "learning_rate": 1.7789242516952635e-05, "loss": 1.9596, "step": 52870 }, { "epoch": 0.3323630506943217, "grad_norm": 7.544395446777344, "learning_rate": 1.778882341600798e-05, "loss": 1.9372, "step": 52880 }, { "epoch": 0.3324259030110188, "grad_norm": 7.433197975158691, "learning_rate": 1.778840431506333e-05, "loss": 1.8617, "step": 52890 }, { "epoch": 0.33248875532771593, "grad_norm": 6.466828346252441, "learning_rate": 1.7787985214118676e-05, "loss": 1.594, "step": 52900 }, { "epoch": 0.332551607644413, "grad_norm": 6.611237049102783, "learning_rate": 1.778756611317402e-05, "loss": 1.831, "step": 52910 }, { "epoch": 0.3326144599611101, "grad_norm": 7.221104145050049, "learning_rate": 1.7787147012229367e-05, "loss": 1.5431, "step": 52920 }, { "epoch": 0.3326773122778072, "grad_norm": 6.376213073730469, "learning_rate": 1.7786727911284714e-05, "loss": 1.7078, "step": 52930 }, { "epoch": 0.33274016459450434, "grad_norm": 5.897716045379639, "learning_rate": 1.778630881034006e-05, "loss": 1.8387, "step": 52940 }, { "epoch": 0.33280301691120145, "grad_norm": 6.521352767944336, "learning_rate": 1.7785889709395408e-05, "loss": 1.7553, "step": 52950 }, { "epoch": 0.33286586922789857, "grad_norm": 5.935555458068848, "learning_rate": 1.778547060845075e-05, "loss": 1.8631, "step": 52960 }, { "epoch": 0.3329287215445957, "grad_norm": 8.263205528259277, "learning_rate": 1.77850515075061e-05, "loss": 1.8783, "step": 52970 }, { "epoch": 0.3329915738612928, "grad_norm": 6.078148365020752, "learning_rate": 1.7784632406561446e-05, "loss": 1.8977, "step": 52980 }, { "epoch": 0.3330544261779899, "grad_norm": 6.746679782867432, "learning_rate": 1.7784213305616793e-05, "loss": 1.6983, "step": 52990 }, { "epoch": 0.33311727849468703, "grad_norm": 7.494751930236816, "learning_rate": 1.7783794204672136e-05, "loss": 1.9726, "step": 53000 }, { "epoch": 0.33318013081138415, "grad_norm": 6.933189868927002, "learning_rate": 1.7783375103727483e-05, "loss": 1.7022, "step": 53010 }, { "epoch": 0.33324298312808126, "grad_norm": 7.498746871948242, "learning_rate": 1.778295600278283e-05, "loss": 1.8275, "step": 53020 }, { "epoch": 0.3333058354447783, "grad_norm": 6.554727077484131, "learning_rate": 1.7782536901838178e-05, "loss": 1.8502, "step": 53030 }, { "epoch": 0.33336868776147544, "grad_norm": 6.357724189758301, "learning_rate": 1.7782117800893525e-05, "loss": 1.7458, "step": 53040 }, { "epoch": 0.33343154007817255, "grad_norm": 6.919586658477783, "learning_rate": 1.7781698699948872e-05, "loss": 1.6825, "step": 53050 }, { "epoch": 0.33349439239486967, "grad_norm": 7.433612823486328, "learning_rate": 1.778127959900422e-05, "loss": 1.7833, "step": 53060 }, { "epoch": 0.3335572447115668, "grad_norm": 5.948964595794678, "learning_rate": 1.7780860498059566e-05, "loss": 1.6469, "step": 53070 }, { "epoch": 0.3336200970282639, "grad_norm": 8.402131080627441, "learning_rate": 1.7780441397114913e-05, "loss": 1.8364, "step": 53080 }, { "epoch": 0.333682949344961, "grad_norm": 7.735292911529541, "learning_rate": 1.7780022296170257e-05, "loss": 1.9143, "step": 53090 }, { "epoch": 0.3337458016616581, "grad_norm": 6.684525012969971, "learning_rate": 1.7779603195225604e-05, "loss": 1.7593, "step": 53100 }, { "epoch": 0.33380865397835524, "grad_norm": 6.776787757873535, "learning_rate": 1.777918409428095e-05, "loss": 1.9056, "step": 53110 }, { "epoch": 0.33387150629505236, "grad_norm": 6.470606803894043, "learning_rate": 1.7778764993336298e-05, "loss": 1.4876, "step": 53120 }, { "epoch": 0.33393435861174947, "grad_norm": 6.753844261169434, "learning_rate": 1.777834589239164e-05, "loss": 1.7287, "step": 53130 }, { "epoch": 0.3339972109284466, "grad_norm": 6.296436786651611, "learning_rate": 1.777792679144699e-05, "loss": 1.8241, "step": 53140 }, { "epoch": 0.3340600632451437, "grad_norm": 7.144308567047119, "learning_rate": 1.7777507690502336e-05, "loss": 1.7879, "step": 53150 }, { "epoch": 0.33412291556184076, "grad_norm": 6.159611701965332, "learning_rate": 1.7777088589557683e-05, "loss": 1.7772, "step": 53160 }, { "epoch": 0.3341857678785379, "grad_norm": 7.0396528244018555, "learning_rate": 1.777666948861303e-05, "loss": 1.7225, "step": 53170 }, { "epoch": 0.334248620195235, "grad_norm": 7.040628433227539, "learning_rate": 1.7776250387668373e-05, "loss": 1.626, "step": 53180 }, { "epoch": 0.3343114725119321, "grad_norm": 6.439341068267822, "learning_rate": 1.777583128672372e-05, "loss": 1.8533, "step": 53190 }, { "epoch": 0.3343743248286292, "grad_norm": 8.195597648620605, "learning_rate": 1.7775412185779068e-05, "loss": 1.7672, "step": 53200 }, { "epoch": 0.33443717714532634, "grad_norm": 8.984902381896973, "learning_rate": 1.7774993084834415e-05, "loss": 1.8945, "step": 53210 }, { "epoch": 0.33450002946202345, "grad_norm": 7.737391948699951, "learning_rate": 1.7774573983889762e-05, "loss": 1.8272, "step": 53220 }, { "epoch": 0.33456288177872057, "grad_norm": 7.130795955657959, "learning_rate": 1.777415488294511e-05, "loss": 1.9769, "step": 53230 }, { "epoch": 0.3346257340954177, "grad_norm": 6.71053409576416, "learning_rate": 1.7773735782000452e-05, "loss": 1.6326, "step": 53240 }, { "epoch": 0.3346885864121148, "grad_norm": 7.596933364868164, "learning_rate": 1.77733166810558e-05, "loss": 1.754, "step": 53250 }, { "epoch": 0.3347514387288119, "grad_norm": 6.899796009063721, "learning_rate": 1.7772897580111147e-05, "loss": 1.9545, "step": 53260 }, { "epoch": 0.334814291045509, "grad_norm": 7.802043914794922, "learning_rate": 1.7772478479166494e-05, "loss": 1.8785, "step": 53270 }, { "epoch": 0.33487714336220614, "grad_norm": 6.9452056884765625, "learning_rate": 1.777205937822184e-05, "loss": 1.8725, "step": 53280 }, { "epoch": 0.3349399956789032, "grad_norm": 6.828030109405518, "learning_rate": 1.7771640277277188e-05, "loss": 1.8363, "step": 53290 }, { "epoch": 0.3350028479956003, "grad_norm": 6.437834739685059, "learning_rate": 1.7771221176332535e-05, "loss": 1.5645, "step": 53300 }, { "epoch": 0.33506570031229743, "grad_norm": 6.539505958557129, "learning_rate": 1.777080207538788e-05, "loss": 1.8865, "step": 53310 }, { "epoch": 0.33512855262899455, "grad_norm": 6.247645854949951, "learning_rate": 1.7770382974443226e-05, "loss": 1.8822, "step": 53320 }, { "epoch": 0.33519140494569166, "grad_norm": 7.20156192779541, "learning_rate": 1.7769963873498573e-05, "loss": 1.8122, "step": 53330 }, { "epoch": 0.3352542572623888, "grad_norm": 7.3843207359313965, "learning_rate": 1.776954477255392e-05, "loss": 1.7941, "step": 53340 }, { "epoch": 0.3353171095790859, "grad_norm": 6.303149700164795, "learning_rate": 1.7769125671609263e-05, "loss": 1.5762, "step": 53350 }, { "epoch": 0.335379961895783, "grad_norm": 6.769227981567383, "learning_rate": 1.776870657066461e-05, "loss": 1.7282, "step": 53360 }, { "epoch": 0.3354428142124801, "grad_norm": 5.572915077209473, "learning_rate": 1.7768287469719958e-05, "loss": 1.6846, "step": 53370 }, { "epoch": 0.33550566652917724, "grad_norm": 8.161036491394043, "learning_rate": 1.7767868368775305e-05, "loss": 1.7322, "step": 53380 }, { "epoch": 0.33556851884587435, "grad_norm": 7.607860565185547, "learning_rate": 1.7767449267830652e-05, "loss": 1.756, "step": 53390 }, { "epoch": 0.33563137116257147, "grad_norm": 7.829543113708496, "learning_rate": 1.7767030166885995e-05, "loss": 1.6983, "step": 53400 }, { "epoch": 0.3356942234792686, "grad_norm": 7.995700359344482, "learning_rate": 1.7766611065941343e-05, "loss": 1.817, "step": 53410 }, { "epoch": 0.33575707579596564, "grad_norm": 6.766124725341797, "learning_rate": 1.776619196499669e-05, "loss": 1.7901, "step": 53420 }, { "epoch": 0.33581992811266276, "grad_norm": 7.617290019989014, "learning_rate": 1.7765772864052037e-05, "loss": 2.0277, "step": 53430 }, { "epoch": 0.3358827804293599, "grad_norm": Infinity, "learning_rate": 1.7765353763107384e-05, "loss": 1.8824, "step": 53440 }, { "epoch": 0.335945632746057, "grad_norm": 9.397178649902344, "learning_rate": 1.7764976572257195e-05, "loss": 2.0714, "step": 53450 }, { "epoch": 0.3360084850627541, "grad_norm": 7.2699360847473145, "learning_rate": 1.7764557471312542e-05, "loss": 1.7717, "step": 53460 }, { "epoch": 0.3360713373794512, "grad_norm": 5.961643218994141, "learning_rate": 1.776413837036789e-05, "loss": 1.6335, "step": 53470 }, { "epoch": 0.33613418969614833, "grad_norm": 7.276872634887695, "learning_rate": 1.7763719269423233e-05, "loss": 1.661, "step": 53480 }, { "epoch": 0.33619704201284545, "grad_norm": 6.956408500671387, "learning_rate": 1.776330016847858e-05, "loss": 1.7756, "step": 53490 }, { "epoch": 0.33625989432954256, "grad_norm": 8.05853271484375, "learning_rate": 1.7762881067533927e-05, "loss": 2.0086, "step": 53500 }, { "epoch": 0.3363227466462397, "grad_norm": 7.309393405914307, "learning_rate": 1.7762461966589274e-05, "loss": 1.965, "step": 53510 }, { "epoch": 0.3363855989629368, "grad_norm": 7.199671268463135, "learning_rate": 1.776204286564462e-05, "loss": 1.915, "step": 53520 }, { "epoch": 0.3364484512796339, "grad_norm": 7.527480602264404, "learning_rate": 1.7761623764699968e-05, "loss": 1.8693, "step": 53530 }, { "epoch": 0.33651130359633097, "grad_norm": 7.157961845397949, "learning_rate": 1.7761204663755315e-05, "loss": 1.8545, "step": 53540 }, { "epoch": 0.3365741559130281, "grad_norm": 7.603402614593506, "learning_rate": 1.776078556281066e-05, "loss": 1.8986, "step": 53550 }, { "epoch": 0.3366370082297252, "grad_norm": 6.873359680175781, "learning_rate": 1.7760366461866006e-05, "loss": 1.648, "step": 53560 }, { "epoch": 0.3366998605464223, "grad_norm": 7.6509809494018555, "learning_rate": 1.7759947360921353e-05, "loss": 1.869, "step": 53570 }, { "epoch": 0.33676271286311943, "grad_norm": 7.747193336486816, "learning_rate": 1.77595282599767e-05, "loss": 1.7266, "step": 53580 }, { "epoch": 0.33682556517981654, "grad_norm": 7.056433200836182, "learning_rate": 1.7759109159032047e-05, "loss": 1.8732, "step": 53590 }, { "epoch": 0.33688841749651366, "grad_norm": 8.493775367736816, "learning_rate": 1.7758690058087394e-05, "loss": 1.8027, "step": 53600 }, { "epoch": 0.3369512698132108, "grad_norm": 7.364907741546631, "learning_rate": 1.7758270957142738e-05, "loss": 1.8418, "step": 53610 }, { "epoch": 0.3370141221299079, "grad_norm": 7.566737651824951, "learning_rate": 1.7757851856198085e-05, "loss": 1.8186, "step": 53620 }, { "epoch": 0.337076974446605, "grad_norm": 5.394718170166016, "learning_rate": 1.7757432755253432e-05, "loss": 1.6161, "step": 53630 }, { "epoch": 0.3371398267633021, "grad_norm": 7.8384904861450195, "learning_rate": 1.775701365430878e-05, "loss": 1.8057, "step": 53640 }, { "epoch": 0.33720267907999923, "grad_norm": 7.048176288604736, "learning_rate": 1.7756594553364123e-05, "loss": 1.7877, "step": 53650 }, { "epoch": 0.33726553139669635, "grad_norm": 7.966719150543213, "learning_rate": 1.775617545241947e-05, "loss": 1.9503, "step": 53660 }, { "epoch": 0.3373283837133934, "grad_norm": 6.892568588256836, "learning_rate": 1.7755756351474817e-05, "loss": 1.8589, "step": 53670 }, { "epoch": 0.3373912360300905, "grad_norm": 5.600237846374512, "learning_rate": 1.7755337250530164e-05, "loss": 1.9034, "step": 53680 }, { "epoch": 0.33745408834678764, "grad_norm": 6.192532539367676, "learning_rate": 1.775491814958551e-05, "loss": 1.7382, "step": 53690 }, { "epoch": 0.33751694066348475, "grad_norm": 6.748945713043213, "learning_rate": 1.7754499048640855e-05, "loss": 2.1606, "step": 53700 }, { "epoch": 0.33757979298018187, "grad_norm": 5.532161712646484, "learning_rate": 1.7754079947696202e-05, "loss": 1.7523, "step": 53710 }, { "epoch": 0.337642645296879, "grad_norm": 7.209076881408691, "learning_rate": 1.775366084675155e-05, "loss": 1.8265, "step": 53720 }, { "epoch": 0.3377054976135761, "grad_norm": 7.280900955200195, "learning_rate": 1.7753241745806896e-05, "loss": 1.6002, "step": 53730 }, { "epoch": 0.3377683499302732, "grad_norm": 5.666275978088379, "learning_rate": 1.7752822644862243e-05, "loss": 1.6816, "step": 53740 }, { "epoch": 0.33783120224697033, "grad_norm": 6.765921592712402, "learning_rate": 1.775240354391759e-05, "loss": 1.9081, "step": 53750 }, { "epoch": 0.33789405456366745, "grad_norm": 7.1952290534973145, "learning_rate": 1.7751984442972937e-05, "loss": 1.6662, "step": 53760 }, { "epoch": 0.33795690688036456, "grad_norm": 7.036599159240723, "learning_rate": 1.7751565342028284e-05, "loss": 1.7319, "step": 53770 }, { "epoch": 0.3380197591970617, "grad_norm": 7.425091743469238, "learning_rate": 1.775114624108363e-05, "loss": 1.9976, "step": 53780 }, { "epoch": 0.3380826115137588, "grad_norm": 6.500268459320068, "learning_rate": 1.7750727140138975e-05, "loss": 1.6409, "step": 53790 }, { "epoch": 0.33814546383045585, "grad_norm": 7.112756729125977, "learning_rate": 1.7750308039194322e-05, "loss": 1.8804, "step": 53800 }, { "epoch": 0.33820831614715297, "grad_norm": 6.661694049835205, "learning_rate": 1.774988893824967e-05, "loss": 1.9926, "step": 53810 }, { "epoch": 0.3382711684638501, "grad_norm": 7.794707775115967, "learning_rate": 1.7749469837305016e-05, "loss": 1.6295, "step": 53820 }, { "epoch": 0.3383340207805472, "grad_norm": 6.211827754974365, "learning_rate": 1.774905073636036e-05, "loss": 2.0199, "step": 53830 }, { "epoch": 0.3383968730972443, "grad_norm": 7.719405174255371, "learning_rate": 1.7748631635415707e-05, "loss": 1.8618, "step": 53840 }, { "epoch": 0.3384597254139414, "grad_norm": 6.117846965789795, "learning_rate": 1.7748212534471054e-05, "loss": 1.78, "step": 53850 }, { "epoch": 0.33852257773063854, "grad_norm": 9.416608810424805, "learning_rate": 1.77477934335264e-05, "loss": 1.7694, "step": 53860 }, { "epoch": 0.33858543004733566, "grad_norm": 7.474032878875732, "learning_rate": 1.7747374332581745e-05, "loss": 1.9287, "step": 53870 }, { "epoch": 0.33864828236403277, "grad_norm": 6.153302192687988, "learning_rate": 1.7746955231637092e-05, "loss": 1.8605, "step": 53880 }, { "epoch": 0.3387111346807299, "grad_norm": 5.692074298858643, "learning_rate": 1.774653613069244e-05, "loss": 1.853, "step": 53890 }, { "epoch": 0.338773986997427, "grad_norm": 6.859311103820801, "learning_rate": 1.7746117029747786e-05, "loss": 1.6117, "step": 53900 }, { "epoch": 0.3388368393141241, "grad_norm": 6.528646469116211, "learning_rate": 1.7745697928803133e-05, "loss": 1.9483, "step": 53910 }, { "epoch": 0.33889969163082123, "grad_norm": 7.2122979164123535, "learning_rate": 1.774527882785848e-05, "loss": 1.8461, "step": 53920 }, { "epoch": 0.3389625439475183, "grad_norm": 7.391670227050781, "learning_rate": 1.7744859726913824e-05, "loss": 1.7604, "step": 53930 }, { "epoch": 0.3390253962642154, "grad_norm": 7.458083629608154, "learning_rate": 1.774444062596917e-05, "loss": 2.1509, "step": 53940 }, { "epoch": 0.3390882485809125, "grad_norm": 8.052411079406738, "learning_rate": 1.7744021525024518e-05, "loss": 1.7441, "step": 53950 }, { "epoch": 0.33915110089760964, "grad_norm": 7.353691101074219, "learning_rate": 1.7743602424079865e-05, "loss": 2.1435, "step": 53960 }, { "epoch": 0.33921395321430675, "grad_norm": 7.005837440490723, "learning_rate": 1.7743183323135212e-05, "loss": 1.5142, "step": 53970 }, { "epoch": 0.33927680553100387, "grad_norm": 7.651731967926025, "learning_rate": 1.774276422219056e-05, "loss": 1.8029, "step": 53980 }, { "epoch": 0.339339657847701, "grad_norm": 6.751585483551025, "learning_rate": 1.7742345121245906e-05, "loss": 1.7118, "step": 53990 }, { "epoch": 0.3394025101643981, "grad_norm": 7.8414177894592285, "learning_rate": 1.7741926020301253e-05, "loss": 1.6463, "step": 54000 }, { "epoch": 0.3394653624810952, "grad_norm": 6.46080207824707, "learning_rate": 1.7741506919356597e-05, "loss": 1.7061, "step": 54010 }, { "epoch": 0.3395282147977923, "grad_norm": 6.9414777755737305, "learning_rate": 1.7741087818411944e-05, "loss": 1.7382, "step": 54020 }, { "epoch": 0.33959106711448944, "grad_norm": 7.154740333557129, "learning_rate": 1.774066871746729e-05, "loss": 1.9247, "step": 54030 }, { "epoch": 0.33965391943118656, "grad_norm": 6.026335716247559, "learning_rate": 1.774024961652264e-05, "loss": 1.7836, "step": 54040 }, { "epoch": 0.3397167717478837, "grad_norm": 7.3941497802734375, "learning_rate": 1.7739830515577982e-05, "loss": 1.8794, "step": 54050 }, { "epoch": 0.33977962406458073, "grad_norm": 6.438807010650635, "learning_rate": 1.773941141463333e-05, "loss": 1.5947, "step": 54060 }, { "epoch": 0.33984247638127785, "grad_norm": 6.488143444061279, "learning_rate": 1.7738992313688676e-05, "loss": 1.8451, "step": 54070 }, { "epoch": 0.33990532869797496, "grad_norm": 6.967753887176514, "learning_rate": 1.7738573212744023e-05, "loss": 1.7057, "step": 54080 }, { "epoch": 0.3399681810146721, "grad_norm": 6.895341396331787, "learning_rate": 1.773815411179937e-05, "loss": 1.7863, "step": 54090 }, { "epoch": 0.3400310333313692, "grad_norm": 7.644385814666748, "learning_rate": 1.7737735010854714e-05, "loss": 1.7148, "step": 54100 }, { "epoch": 0.3400938856480663, "grad_norm": 6.567039489746094, "learning_rate": 1.773731590991006e-05, "loss": 1.5497, "step": 54110 }, { "epoch": 0.3401567379647634, "grad_norm": 6.824104309082031, "learning_rate": 1.7736896808965408e-05, "loss": 1.5358, "step": 54120 }, { "epoch": 0.34021959028146054, "grad_norm": 6.814744472503662, "learning_rate": 1.7736477708020755e-05, "loss": 1.787, "step": 54130 }, { "epoch": 0.34028244259815765, "grad_norm": 7.5279741287231445, "learning_rate": 1.7736058607076102e-05, "loss": 1.7153, "step": 54140 }, { "epoch": 0.34034529491485477, "grad_norm": 6.229637622833252, "learning_rate": 1.773563950613145e-05, "loss": 1.7908, "step": 54150 }, { "epoch": 0.3404081472315519, "grad_norm": 7.762417316436768, "learning_rate": 1.7735220405186796e-05, "loss": 1.9416, "step": 54160 }, { "epoch": 0.340470999548249, "grad_norm": 7.071630954742432, "learning_rate": 1.7734801304242144e-05, "loss": 1.9099, "step": 54170 }, { "epoch": 0.34053385186494606, "grad_norm": 7.530044078826904, "learning_rate": 1.7734382203297487e-05, "loss": 1.6938, "step": 54180 }, { "epoch": 0.3405967041816432, "grad_norm": 7.272950172424316, "learning_rate": 1.7733963102352834e-05, "loss": 1.8578, "step": 54190 }, { "epoch": 0.3406595564983403, "grad_norm": 7.612779140472412, "learning_rate": 1.773354400140818e-05, "loss": 1.7059, "step": 54200 }, { "epoch": 0.3407224088150374, "grad_norm": 6.924613952636719, "learning_rate": 1.773312490046353e-05, "loss": 1.768, "step": 54210 }, { "epoch": 0.3407852611317345, "grad_norm": 6.755637168884277, "learning_rate": 1.7732705799518875e-05, "loss": 1.9023, "step": 54220 }, { "epoch": 0.34084811344843163, "grad_norm": 6.792628288269043, "learning_rate": 1.773228669857422e-05, "loss": 1.7773, "step": 54230 }, { "epoch": 0.34091096576512875, "grad_norm": 6.276157379150391, "learning_rate": 1.7731867597629566e-05, "loss": 1.779, "step": 54240 }, { "epoch": 0.34097381808182586, "grad_norm": 6.791790962219238, "learning_rate": 1.7731448496684913e-05, "loss": 1.8692, "step": 54250 }, { "epoch": 0.341036670398523, "grad_norm": 7.605082988739014, "learning_rate": 1.773102939574026e-05, "loss": 1.5803, "step": 54260 }, { "epoch": 0.3410995227152201, "grad_norm": 6.673621654510498, "learning_rate": 1.7730610294795604e-05, "loss": 1.7076, "step": 54270 }, { "epoch": 0.3411623750319172, "grad_norm": 8.742812156677246, "learning_rate": 1.773019119385095e-05, "loss": 1.6247, "step": 54280 }, { "epoch": 0.3412252273486143, "grad_norm": 7.297802448272705, "learning_rate": 1.7729772092906298e-05, "loss": 1.5804, "step": 54290 }, { "epoch": 0.34128807966531144, "grad_norm": 7.189021587371826, "learning_rate": 1.7729352991961645e-05, "loss": 1.7823, "step": 54300 }, { "epoch": 0.3413509319820085, "grad_norm": 6.340190410614014, "learning_rate": 1.7728933891016992e-05, "loss": 1.5445, "step": 54310 }, { "epoch": 0.3414137842987056, "grad_norm": 7.6919989585876465, "learning_rate": 1.7728514790072336e-05, "loss": 1.7746, "step": 54320 }, { "epoch": 0.34147663661540273, "grad_norm": 7.8139753341674805, "learning_rate": 1.7728095689127683e-05, "loss": 1.797, "step": 54330 }, { "epoch": 0.34153948893209984, "grad_norm": 7.525743007659912, "learning_rate": 1.772767658818303e-05, "loss": 1.9476, "step": 54340 }, { "epoch": 0.34160234124879696, "grad_norm": 6.680002689361572, "learning_rate": 1.7727257487238377e-05, "loss": 1.8654, "step": 54350 }, { "epoch": 0.3416651935654941, "grad_norm": 7.364837169647217, "learning_rate": 1.7726838386293724e-05, "loss": 1.8191, "step": 54360 }, { "epoch": 0.3417280458821912, "grad_norm": 6.190256118774414, "learning_rate": 1.772641928534907e-05, "loss": 1.7854, "step": 54370 }, { "epoch": 0.3417908981988883, "grad_norm": 5.911518096923828, "learning_rate": 1.772600018440442e-05, "loss": 1.6279, "step": 54380 }, { "epoch": 0.3418537505155854, "grad_norm": 6.656449794769287, "learning_rate": 1.7725581083459766e-05, "loss": 1.7996, "step": 54390 }, { "epoch": 0.34191660283228253, "grad_norm": 8.080692291259766, "learning_rate": 1.7725161982515113e-05, "loss": 1.8572, "step": 54400 }, { "epoch": 0.34197945514897965, "grad_norm": 6.838883876800537, "learning_rate": 1.7724742881570456e-05, "loss": 1.6238, "step": 54410 }, { "epoch": 0.34204230746567676, "grad_norm": 9.394396781921387, "learning_rate": 1.7724323780625803e-05, "loss": 1.7309, "step": 54420 }, { "epoch": 0.3421051597823739, "grad_norm": 9.8051118850708, "learning_rate": 1.772390467968115e-05, "loss": 1.9172, "step": 54430 }, { "epoch": 0.34216801209907094, "grad_norm": 6.973281383514404, "learning_rate": 1.7723485578736497e-05, "loss": 1.7838, "step": 54440 }, { "epoch": 0.34223086441576805, "grad_norm": 6.628882884979248, "learning_rate": 1.772306647779184e-05, "loss": 1.7447, "step": 54450 }, { "epoch": 0.34229371673246517, "grad_norm": 7.0264363288879395, "learning_rate": 1.7722647376847188e-05, "loss": 1.9837, "step": 54460 }, { "epoch": 0.3423565690491623, "grad_norm": 7.414371013641357, "learning_rate": 1.7722228275902535e-05, "loss": 1.8179, "step": 54470 }, { "epoch": 0.3424194213658594, "grad_norm": 7.856083393096924, "learning_rate": 1.7721809174957882e-05, "loss": 1.8555, "step": 54480 }, { "epoch": 0.3424822736825565, "grad_norm": 6.976076602935791, "learning_rate": 1.7721390074013226e-05, "loss": 1.8994, "step": 54490 }, { "epoch": 0.34254512599925363, "grad_norm": 7.5447516441345215, "learning_rate": 1.7720970973068573e-05, "loss": 1.7808, "step": 54500 }, { "epoch": 0.34260797831595075, "grad_norm": 7.636044502258301, "learning_rate": 1.772055187212392e-05, "loss": 2.0319, "step": 54510 }, { "epoch": 0.34267083063264786, "grad_norm": 8.525298118591309, "learning_rate": 1.7720132771179267e-05, "loss": 1.8216, "step": 54520 }, { "epoch": 0.342733682949345, "grad_norm": 7.616324424743652, "learning_rate": 1.7719713670234614e-05, "loss": 1.9823, "step": 54530 }, { "epoch": 0.3427965352660421, "grad_norm": 7.189038276672363, "learning_rate": 1.771929456928996e-05, "loss": 1.8789, "step": 54540 }, { "epoch": 0.3428593875827392, "grad_norm": 7.485154628753662, "learning_rate": 1.771887546834531e-05, "loss": 1.7238, "step": 54550 }, { "epoch": 0.3429222398994363, "grad_norm": 7.713175296783447, "learning_rate": 1.7718456367400652e-05, "loss": 1.78, "step": 54560 }, { "epoch": 0.3429850922161334, "grad_norm": 6.5029296875, "learning_rate": 1.7718037266456e-05, "loss": 1.8195, "step": 54570 }, { "epoch": 0.3430479445328305, "grad_norm": 6.359293460845947, "learning_rate": 1.7717618165511346e-05, "loss": 1.7736, "step": 54580 }, { "epoch": 0.3431107968495276, "grad_norm": 7.335869789123535, "learning_rate": 1.7717199064566693e-05, "loss": 1.629, "step": 54590 }, { "epoch": 0.3431736491662247, "grad_norm": 6.607364654541016, "learning_rate": 1.771677996362204e-05, "loss": 1.7003, "step": 54600 }, { "epoch": 0.34323650148292184, "grad_norm": 7.606721878051758, "learning_rate": 1.7716360862677388e-05, "loss": 2.1147, "step": 54610 }, { "epoch": 0.34329935379961896, "grad_norm": 7.278506755828857, "learning_rate": 1.7715941761732735e-05, "loss": 1.7123, "step": 54620 }, { "epoch": 0.34336220611631607, "grad_norm": 6.562756538391113, "learning_rate": 1.7715522660788078e-05, "loss": 1.8377, "step": 54630 }, { "epoch": 0.3434250584330132, "grad_norm": 7.337499141693115, "learning_rate": 1.7715103559843425e-05, "loss": 1.7935, "step": 54640 }, { "epoch": 0.3434879107497103, "grad_norm": 5.972623825073242, "learning_rate": 1.7714684458898772e-05, "loss": 1.8112, "step": 54650 }, { "epoch": 0.3435507630664074, "grad_norm": 5.967720985412598, "learning_rate": 1.771426535795412e-05, "loss": 1.8698, "step": 54660 }, { "epoch": 0.34361361538310453, "grad_norm": 7.01886510848999, "learning_rate": 1.7713846257009463e-05, "loss": 1.8793, "step": 54670 }, { "epoch": 0.34367646769980165, "grad_norm": 7.248289585113525, "learning_rate": 1.771342715606481e-05, "loss": 1.6498, "step": 54680 }, { "epoch": 0.3437393200164987, "grad_norm": 6.365307331085205, "learning_rate": 1.7713008055120157e-05, "loss": 1.8024, "step": 54690 }, { "epoch": 0.3438021723331958, "grad_norm": 9.144411087036133, "learning_rate": 1.7712588954175504e-05, "loss": 1.7123, "step": 54700 }, { "epoch": 0.34386502464989294, "grad_norm": 5.928186893463135, "learning_rate": 1.771216985323085e-05, "loss": 1.5943, "step": 54710 }, { "epoch": 0.34392787696659005, "grad_norm": 6.9440436363220215, "learning_rate": 1.7711750752286195e-05, "loss": 1.9885, "step": 54720 }, { "epoch": 0.34399072928328717, "grad_norm": 6.48063850402832, "learning_rate": 1.7711331651341542e-05, "loss": 1.5523, "step": 54730 }, { "epoch": 0.3440535815999843, "grad_norm": 7.519895553588867, "learning_rate": 1.771091255039689e-05, "loss": 2.0059, "step": 54740 }, { "epoch": 0.3441164339166814, "grad_norm": 7.490455627441406, "learning_rate": 1.7710493449452236e-05, "loss": 1.5682, "step": 54750 }, { "epoch": 0.3441792862333785, "grad_norm": 8.436594009399414, "learning_rate": 1.7710074348507583e-05, "loss": 1.7293, "step": 54760 }, { "epoch": 0.3442421385500756, "grad_norm": 7.138073444366455, "learning_rate": 1.770965524756293e-05, "loss": 2.0913, "step": 54770 }, { "epoch": 0.34430499086677274, "grad_norm": 7.458728790283203, "learning_rate": 1.7709236146618278e-05, "loss": 1.8845, "step": 54780 }, { "epoch": 0.34436784318346986, "grad_norm": 7.629421710968018, "learning_rate": 1.7708817045673625e-05, "loss": 1.93, "step": 54790 }, { "epoch": 0.344430695500167, "grad_norm": 6.514352798461914, "learning_rate": 1.7708397944728968e-05, "loss": 1.6549, "step": 54800 }, { "epoch": 0.3444935478168641, "grad_norm": 7.668230056762695, "learning_rate": 1.7707978843784315e-05, "loss": 1.8372, "step": 54810 }, { "epoch": 0.34455640013356115, "grad_norm": 6.99606466293335, "learning_rate": 1.7707559742839662e-05, "loss": 1.7217, "step": 54820 }, { "epoch": 0.34461925245025826, "grad_norm": 6.23781156539917, "learning_rate": 1.770714064189501e-05, "loss": 1.6478, "step": 54830 }, { "epoch": 0.3446821047669554, "grad_norm": 7.972962856292725, "learning_rate": 1.7706721540950357e-05, "loss": 1.8881, "step": 54840 }, { "epoch": 0.3447449570836525, "grad_norm": 6.710640907287598, "learning_rate": 1.77063024400057e-05, "loss": 1.7905, "step": 54850 }, { "epoch": 0.3448078094003496, "grad_norm": 7.677894592285156, "learning_rate": 1.7705883339061047e-05, "loss": 1.8085, "step": 54860 }, { "epoch": 0.3448706617170467, "grad_norm": 7.830748081207275, "learning_rate": 1.7705464238116394e-05, "loss": 1.9429, "step": 54870 }, { "epoch": 0.34493351403374384, "grad_norm": 6.730197429656982, "learning_rate": 1.770504513717174e-05, "loss": 1.6322, "step": 54880 }, { "epoch": 0.34499636635044095, "grad_norm": 7.165801048278809, "learning_rate": 1.7704626036227085e-05, "loss": 1.9474, "step": 54890 }, { "epoch": 0.34505921866713807, "grad_norm": 6.8697710037231445, "learning_rate": 1.7704206935282432e-05, "loss": 1.7506, "step": 54900 }, { "epoch": 0.3451220709838352, "grad_norm": 6.679593086242676, "learning_rate": 1.770378783433778e-05, "loss": 1.8068, "step": 54910 }, { "epoch": 0.3451849233005323, "grad_norm": 7.5652241706848145, "learning_rate": 1.7703368733393126e-05, "loss": 1.8456, "step": 54920 }, { "epoch": 0.3452477756172294, "grad_norm": 6.1844353675842285, "learning_rate": 1.7702949632448473e-05, "loss": 1.7487, "step": 54930 }, { "epoch": 0.34531062793392653, "grad_norm": 6.531343936920166, "learning_rate": 1.7702530531503817e-05, "loss": 1.8684, "step": 54940 }, { "epoch": 0.3453734802506236, "grad_norm": 6.96888542175293, "learning_rate": 1.7702111430559164e-05, "loss": 1.8261, "step": 54950 }, { "epoch": 0.3454363325673207, "grad_norm": 6.074754238128662, "learning_rate": 1.770169232961451e-05, "loss": 1.7163, "step": 54960 }, { "epoch": 0.3454991848840178, "grad_norm": 7.811606407165527, "learning_rate": 1.770127322866986e-05, "loss": 1.9222, "step": 54970 }, { "epoch": 0.34556203720071493, "grad_norm": 6.472822666168213, "learning_rate": 1.7700854127725205e-05, "loss": 1.6429, "step": 54980 }, { "epoch": 0.34562488951741205, "grad_norm": 6.850888252258301, "learning_rate": 1.7700435026780552e-05, "loss": 1.7288, "step": 54990 }, { "epoch": 0.34568774183410916, "grad_norm": 7.054748058319092, "learning_rate": 1.77000159258359e-05, "loss": 1.972, "step": 55000 }, { "epoch": 0.3457505941508063, "grad_norm": 6.785097599029541, "learning_rate": 1.7699596824891247e-05, "loss": 1.8971, "step": 55010 }, { "epoch": 0.3458134464675034, "grad_norm": 7.310171604156494, "learning_rate": 1.7699177723946594e-05, "loss": 1.6131, "step": 55020 }, { "epoch": 0.3458762987842005, "grad_norm": 6.57820987701416, "learning_rate": 1.7698758623001937e-05, "loss": 1.8606, "step": 55030 }, { "epoch": 0.3459391511008976, "grad_norm": 7.374070167541504, "learning_rate": 1.7698339522057284e-05, "loss": 1.9107, "step": 55040 }, { "epoch": 0.34600200341759474, "grad_norm": 7.071714401245117, "learning_rate": 1.769792042111263e-05, "loss": 1.6747, "step": 55050 }, { "epoch": 0.34606485573429185, "grad_norm": 8.598275184631348, "learning_rate": 1.769750132016798e-05, "loss": 1.8978, "step": 55060 }, { "epoch": 0.34612770805098897, "grad_norm": 7.332151889801025, "learning_rate": 1.7697082219223322e-05, "loss": 1.9403, "step": 55070 }, { "epoch": 0.34619056036768603, "grad_norm": 6.51117467880249, "learning_rate": 1.769666311827867e-05, "loss": 1.7394, "step": 55080 }, { "epoch": 0.34625341268438314, "grad_norm": 6.8738603591918945, "learning_rate": 1.7696244017334016e-05, "loss": 1.6063, "step": 55090 }, { "epoch": 0.34631626500108026, "grad_norm": 7.8134307861328125, "learning_rate": 1.7695824916389363e-05, "loss": 1.7705, "step": 55100 }, { "epoch": 0.3463791173177774, "grad_norm": 6.688620090484619, "learning_rate": 1.7695405815444707e-05, "loss": 1.7648, "step": 55110 }, { "epoch": 0.3464419696344745, "grad_norm": 6.747702121734619, "learning_rate": 1.7694986714500054e-05, "loss": 2.0141, "step": 55120 }, { "epoch": 0.3465048219511716, "grad_norm": 7.8479461669921875, "learning_rate": 1.76945676135554e-05, "loss": 1.7764, "step": 55130 }, { "epoch": 0.3465676742678687, "grad_norm": 6.962363243103027, "learning_rate": 1.769414851261075e-05, "loss": 1.7716, "step": 55140 }, { "epoch": 0.34663052658456583, "grad_norm": 8.543752670288086, "learning_rate": 1.7693729411666095e-05, "loss": 1.8749, "step": 55150 }, { "epoch": 0.34669337890126295, "grad_norm": 6.781923770904541, "learning_rate": 1.7693310310721443e-05, "loss": 1.6538, "step": 55160 }, { "epoch": 0.34675623121796006, "grad_norm": 6.297466278076172, "learning_rate": 1.769289120977679e-05, "loss": 1.9625, "step": 55170 }, { "epoch": 0.3468190835346572, "grad_norm": 6.437690258026123, "learning_rate": 1.7692472108832133e-05, "loss": 1.6855, "step": 55180 }, { "epoch": 0.3468819358513543, "grad_norm": 7.9445648193359375, "learning_rate": 1.769205300788748e-05, "loss": 1.8701, "step": 55190 }, { "epoch": 0.34694478816805135, "grad_norm": 7.036828994750977, "learning_rate": 1.7691633906942827e-05, "loss": 1.7085, "step": 55200 }, { "epoch": 0.34700764048474847, "grad_norm": 5.771168231964111, "learning_rate": 1.7691214805998174e-05, "loss": 1.7395, "step": 55210 }, { "epoch": 0.3470704928014456, "grad_norm": 7.6461181640625, "learning_rate": 1.769079570505352e-05, "loss": 1.7988, "step": 55220 }, { "epoch": 0.3471333451181427, "grad_norm": 7.132679462432861, "learning_rate": 1.769037660410887e-05, "loss": 1.6534, "step": 55230 }, { "epoch": 0.3471961974348398, "grad_norm": 7.007255554199219, "learning_rate": 1.7689957503164216e-05, "loss": 2.0081, "step": 55240 }, { "epoch": 0.34725904975153693, "grad_norm": 7.715878009796143, "learning_rate": 1.768953840221956e-05, "loss": 1.8827, "step": 55250 }, { "epoch": 0.34732190206823405, "grad_norm": 7.1222453117370605, "learning_rate": 1.7689119301274906e-05, "loss": 1.7782, "step": 55260 }, { "epoch": 0.34738475438493116, "grad_norm": 8.725252151489258, "learning_rate": 1.7688700200330254e-05, "loss": 1.8346, "step": 55270 }, { "epoch": 0.3474476067016283, "grad_norm": 7.030858039855957, "learning_rate": 1.76882810993856e-05, "loss": 1.8693, "step": 55280 }, { "epoch": 0.3475104590183254, "grad_norm": 6.237267017364502, "learning_rate": 1.7687861998440944e-05, "loss": 1.7428, "step": 55290 }, { "epoch": 0.3475733113350225, "grad_norm": 6.757734775543213, "learning_rate": 1.768744289749629e-05, "loss": 1.6203, "step": 55300 }, { "epoch": 0.3476361636517196, "grad_norm": 7.839578628540039, "learning_rate": 1.768702379655164e-05, "loss": 1.6332, "step": 55310 }, { "epoch": 0.34769901596841674, "grad_norm": 7.068745136260986, "learning_rate": 1.7686604695606985e-05, "loss": 2.0161, "step": 55320 }, { "epoch": 0.3477618682851138, "grad_norm": 7.032870769500732, "learning_rate": 1.7686185594662333e-05, "loss": 1.8001, "step": 55330 }, { "epoch": 0.3478247206018109, "grad_norm": 6.8951568603515625, "learning_rate": 1.7685766493717676e-05, "loss": 1.946, "step": 55340 }, { "epoch": 0.347887572918508, "grad_norm": 6.67368745803833, "learning_rate": 1.7685347392773023e-05, "loss": 1.6941, "step": 55350 }, { "epoch": 0.34795042523520514, "grad_norm": 7.398200988769531, "learning_rate": 1.768492829182837e-05, "loss": 1.623, "step": 55360 }, { "epoch": 0.34801327755190226, "grad_norm": 7.012044429779053, "learning_rate": 1.7684509190883717e-05, "loss": 1.8401, "step": 55370 }, { "epoch": 0.34807612986859937, "grad_norm": 7.086763858795166, "learning_rate": 1.7684090089939065e-05, "loss": 1.6442, "step": 55380 }, { "epoch": 0.3481389821852965, "grad_norm": 6.619652271270752, "learning_rate": 1.768367098899441e-05, "loss": 1.6453, "step": 55390 }, { "epoch": 0.3482018345019936, "grad_norm": 8.069409370422363, "learning_rate": 1.768325188804976e-05, "loss": 1.8919, "step": 55400 }, { "epoch": 0.3482646868186907, "grad_norm": 6.691745758056641, "learning_rate": 1.7682832787105106e-05, "loss": 1.7724, "step": 55410 }, { "epoch": 0.34832753913538783, "grad_norm": 6.368143558502197, "learning_rate": 1.768241368616045e-05, "loss": 1.6134, "step": 55420 }, { "epoch": 0.34839039145208495, "grad_norm": 7.056820869445801, "learning_rate": 1.7681994585215796e-05, "loss": 2.0447, "step": 55430 }, { "epoch": 0.34845324376878206, "grad_norm": 7.876461982727051, "learning_rate": 1.7681575484271144e-05, "loss": 1.6026, "step": 55440 }, { "epoch": 0.3485160960854792, "grad_norm": 6.074370384216309, "learning_rate": 1.768115638332649e-05, "loss": 1.8327, "step": 55450 }, { "epoch": 0.34857894840217624, "grad_norm": 6.838947772979736, "learning_rate": 1.7680737282381838e-05, "loss": 1.8354, "step": 55460 }, { "epoch": 0.34864180071887335, "grad_norm": 6.722227096557617, "learning_rate": 1.768031818143718e-05, "loss": 1.7603, "step": 55470 }, { "epoch": 0.34870465303557047, "grad_norm": 7.02903413772583, "learning_rate": 1.767989908049253e-05, "loss": 1.6104, "step": 55480 }, { "epoch": 0.3487675053522676, "grad_norm": 7.181138515472412, "learning_rate": 1.7679479979547876e-05, "loss": 1.8003, "step": 55490 }, { "epoch": 0.3488303576689647, "grad_norm": 6.824518203735352, "learning_rate": 1.7679060878603223e-05, "loss": 1.761, "step": 55500 }, { "epoch": 0.3488932099856618, "grad_norm": 6.2776312828063965, "learning_rate": 1.7678641777658566e-05, "loss": 1.7548, "step": 55510 }, { "epoch": 0.3489560623023589, "grad_norm": 6.086516857147217, "learning_rate": 1.7678222676713913e-05, "loss": 1.8004, "step": 55520 }, { "epoch": 0.34901891461905604, "grad_norm": 5.898885250091553, "learning_rate": 1.767780357576926e-05, "loss": 1.9629, "step": 55530 }, { "epoch": 0.34908176693575316, "grad_norm": 7.097609519958496, "learning_rate": 1.7677384474824607e-05, "loss": 1.5366, "step": 55540 }, { "epoch": 0.3491446192524503, "grad_norm": 8.600543975830078, "learning_rate": 1.7676965373879955e-05, "loss": 1.6635, "step": 55550 }, { "epoch": 0.3492074715691474, "grad_norm": 7.00174617767334, "learning_rate": 1.7676546272935298e-05, "loss": 1.7336, "step": 55560 }, { "epoch": 0.3492703238858445, "grad_norm": 7.8161139488220215, "learning_rate": 1.7676127171990645e-05, "loss": 1.7531, "step": 55570 }, { "epoch": 0.3493331762025416, "grad_norm": 8.320804595947266, "learning_rate": 1.7675708071045992e-05, "loss": 1.6346, "step": 55580 }, { "epoch": 0.3493960285192387, "grad_norm": 6.894820213317871, "learning_rate": 1.767528897010134e-05, "loss": 1.6617, "step": 55590 }, { "epoch": 0.3494588808359358, "grad_norm": 7.591100692749023, "learning_rate": 1.7674869869156687e-05, "loss": 1.9333, "step": 55600 }, { "epoch": 0.3495217331526329, "grad_norm": 7.248533248901367, "learning_rate": 1.7674450768212034e-05, "loss": 1.8233, "step": 55610 }, { "epoch": 0.34958458546933, "grad_norm": 7.202202320098877, "learning_rate": 1.767403166726738e-05, "loss": 1.79, "step": 55620 }, { "epoch": 0.34964743778602714, "grad_norm": 7.5717620849609375, "learning_rate": 1.7673612566322728e-05, "loss": 1.808, "step": 55630 }, { "epoch": 0.34971029010272425, "grad_norm": 7.001557350158691, "learning_rate": 1.7673193465378075e-05, "loss": 1.7344, "step": 55640 }, { "epoch": 0.34977314241942137, "grad_norm": 6.482027530670166, "learning_rate": 1.767277436443342e-05, "loss": 1.7985, "step": 55650 }, { "epoch": 0.3498359947361185, "grad_norm": 7.108136177062988, "learning_rate": 1.7672355263488766e-05, "loss": 1.7908, "step": 55660 }, { "epoch": 0.3498988470528156, "grad_norm": 7.42733097076416, "learning_rate": 1.7671936162544113e-05, "loss": 1.8822, "step": 55670 }, { "epoch": 0.3499616993695127, "grad_norm": 6.271646976470947, "learning_rate": 1.767151706159946e-05, "loss": 1.685, "step": 55680 }, { "epoch": 0.35002455168620983, "grad_norm": 6.688443183898926, "learning_rate": 1.7671097960654803e-05, "loss": 1.7514, "step": 55690 }, { "epoch": 0.35008740400290694, "grad_norm": 6.73694372177124, "learning_rate": 1.767067885971015e-05, "loss": 1.658, "step": 55700 }, { "epoch": 0.350150256319604, "grad_norm": 6.9528350830078125, "learning_rate": 1.7670259758765498e-05, "loss": 1.7752, "step": 55710 }, { "epoch": 0.3502131086363011, "grad_norm": 9.307398796081543, "learning_rate": 1.7669840657820845e-05, "loss": 1.7258, "step": 55720 }, { "epoch": 0.35027596095299823, "grad_norm": 7.5922088623046875, "learning_rate": 1.7669421556876188e-05, "loss": 1.6945, "step": 55730 }, { "epoch": 0.35033881326969535, "grad_norm": 7.935495376586914, "learning_rate": 1.7669002455931535e-05, "loss": 1.9312, "step": 55740 }, { "epoch": 0.35040166558639246, "grad_norm": 7.695913791656494, "learning_rate": 1.7668583354986882e-05, "loss": 1.7118, "step": 55750 }, { "epoch": 0.3504645179030896, "grad_norm": 6.249854564666748, "learning_rate": 1.766816425404223e-05, "loss": 1.7209, "step": 55760 }, { "epoch": 0.3505273702197867, "grad_norm": 7.787451267242432, "learning_rate": 1.7667745153097577e-05, "loss": 1.7256, "step": 55770 }, { "epoch": 0.3505902225364838, "grad_norm": 6.477205753326416, "learning_rate": 1.7667326052152924e-05, "loss": 1.7085, "step": 55780 }, { "epoch": 0.3506530748531809, "grad_norm": 6.334481716156006, "learning_rate": 1.766690695120827e-05, "loss": 1.6984, "step": 55790 }, { "epoch": 0.35071592716987804, "grad_norm": 7.644689083099365, "learning_rate": 1.7666487850263618e-05, "loss": 1.8495, "step": 55800 }, { "epoch": 0.35077877948657515, "grad_norm": 6.420305252075195, "learning_rate": 1.766606874931896e-05, "loss": 1.6571, "step": 55810 }, { "epoch": 0.35084163180327227, "grad_norm": 6.823786735534668, "learning_rate": 1.766564964837431e-05, "loss": 1.942, "step": 55820 }, { "epoch": 0.3509044841199694, "grad_norm": 7.600020408630371, "learning_rate": 1.7665230547429656e-05, "loss": 1.7846, "step": 55830 }, { "epoch": 0.35096733643666644, "grad_norm": 7.309628486633301, "learning_rate": 1.7664811446485003e-05, "loss": 1.81, "step": 55840 }, { "epoch": 0.35103018875336356, "grad_norm": 8.144548416137695, "learning_rate": 1.766439234554035e-05, "loss": 2.0249, "step": 55850 }, { "epoch": 0.3510930410700607, "grad_norm": 7.690821170806885, "learning_rate": 1.7663973244595697e-05, "loss": 1.7633, "step": 55860 }, { "epoch": 0.3511558933867578, "grad_norm": 7.913681983947754, "learning_rate": 1.766355414365104e-05, "loss": 1.8007, "step": 55870 }, { "epoch": 0.3512187457034549, "grad_norm": 6.931325435638428, "learning_rate": 1.7663135042706388e-05, "loss": 1.9441, "step": 55880 }, { "epoch": 0.351281598020152, "grad_norm": 6.242430686950684, "learning_rate": 1.7662715941761735e-05, "loss": 1.5776, "step": 55890 }, { "epoch": 0.35134445033684913, "grad_norm": 6.786398410797119, "learning_rate": 1.7662296840817082e-05, "loss": 1.7039, "step": 55900 }, { "epoch": 0.35140730265354625, "grad_norm": 5.989193916320801, "learning_rate": 1.7661877739872425e-05, "loss": 1.869, "step": 55910 }, { "epoch": 0.35147015497024336, "grad_norm": 5.95859956741333, "learning_rate": 1.7661458638927772e-05, "loss": 1.704, "step": 55920 }, { "epoch": 0.3515330072869405, "grad_norm": 6.023372173309326, "learning_rate": 1.766103953798312e-05, "loss": 1.8161, "step": 55930 }, { "epoch": 0.3515958596036376, "grad_norm": 7.1503005027771, "learning_rate": 1.7660620437038467e-05, "loss": 1.911, "step": 55940 }, { "epoch": 0.3516587119203347, "grad_norm": 7.601272106170654, "learning_rate": 1.7660201336093814e-05, "loss": 1.8136, "step": 55950 }, { "epoch": 0.3517215642370318, "grad_norm": 5.760213375091553, "learning_rate": 1.7659782235149157e-05, "loss": 1.8077, "step": 55960 }, { "epoch": 0.3517844165537289, "grad_norm": 7.783840179443359, "learning_rate": 1.7659363134204504e-05, "loss": 1.9415, "step": 55970 }, { "epoch": 0.351847268870426, "grad_norm": 6.4394073486328125, "learning_rate": 1.765894403325985e-05, "loss": 1.7765, "step": 55980 }, { "epoch": 0.3519101211871231, "grad_norm": 8.876471519470215, "learning_rate": 1.76585249323152e-05, "loss": 2.0747, "step": 55990 }, { "epoch": 0.35197297350382023, "grad_norm": 6.266054630279541, "learning_rate": 1.7658105831370546e-05, "loss": 1.7959, "step": 56000 }, { "epoch": 0.35203582582051735, "grad_norm": 7.980495452880859, "learning_rate": 1.7657686730425893e-05, "loss": 1.6238, "step": 56010 }, { "epoch": 0.35209867813721446, "grad_norm": 6.909236907958984, "learning_rate": 1.765726762948124e-05, "loss": 1.7801, "step": 56020 }, { "epoch": 0.3521615304539116, "grad_norm": 7.640350341796875, "learning_rate": 1.7656848528536587e-05, "loss": 1.6987, "step": 56030 }, { "epoch": 0.3522243827706087, "grad_norm": 6.777658462524414, "learning_rate": 1.765642942759193e-05, "loss": 1.8309, "step": 56040 }, { "epoch": 0.3522872350873058, "grad_norm": 7.5713725090026855, "learning_rate": 1.7656010326647278e-05, "loss": 1.7728, "step": 56050 }, { "epoch": 0.3523500874040029, "grad_norm": 7.952373027801514, "learning_rate": 1.7655591225702625e-05, "loss": 1.9563, "step": 56060 }, { "epoch": 0.35241293972070004, "grad_norm": 7.443223476409912, "learning_rate": 1.7655172124757972e-05, "loss": 1.8321, "step": 56070 }, { "epoch": 0.35247579203739715, "grad_norm": 6.388519287109375, "learning_rate": 1.765475302381332e-05, "loss": 1.593, "step": 56080 }, { "epoch": 0.35253864435409427, "grad_norm": 6.434205055236816, "learning_rate": 1.7654333922868662e-05, "loss": 1.991, "step": 56090 }, { "epoch": 0.3526014966707913, "grad_norm": 6.544973850250244, "learning_rate": 1.765391482192401e-05, "loss": 1.7038, "step": 56100 }, { "epoch": 0.35266434898748844, "grad_norm": 6.311568260192871, "learning_rate": 1.7653495720979357e-05, "loss": 1.8253, "step": 56110 }, { "epoch": 0.35272720130418556, "grad_norm": 7.9773359298706055, "learning_rate": 1.7653076620034704e-05, "loss": 1.7429, "step": 56120 }, { "epoch": 0.35279005362088267, "grad_norm": 7.168190002441406, "learning_rate": 1.7652657519090047e-05, "loss": 1.7981, "step": 56130 }, { "epoch": 0.3528529059375798, "grad_norm": 7.579220294952393, "learning_rate": 1.7652238418145394e-05, "loss": 1.6961, "step": 56140 }, { "epoch": 0.3529157582542769, "grad_norm": 8.63365364074707, "learning_rate": 1.765181931720074e-05, "loss": 1.8155, "step": 56150 }, { "epoch": 0.352978610570974, "grad_norm": 6.581787109375, "learning_rate": 1.765140021625609e-05, "loss": 1.8945, "step": 56160 }, { "epoch": 0.35304146288767113, "grad_norm": 6.249863147735596, "learning_rate": 1.7650981115311436e-05, "loss": 1.8585, "step": 56170 }, { "epoch": 0.35310431520436825, "grad_norm": 7.682397365570068, "learning_rate": 1.7650562014366783e-05, "loss": 1.5529, "step": 56180 }, { "epoch": 0.35316716752106536, "grad_norm": 6.750224590301514, "learning_rate": 1.7650142913422126e-05, "loss": 1.7804, "step": 56190 }, { "epoch": 0.3532300198377625, "grad_norm": 7.524214267730713, "learning_rate": 1.7649723812477473e-05, "loss": 1.5981, "step": 56200 }, { "epoch": 0.3532928721544596, "grad_norm": 6.7702317237854, "learning_rate": 1.764930471153282e-05, "loss": 1.9306, "step": 56210 }, { "epoch": 0.35335572447115665, "grad_norm": 7.400282859802246, "learning_rate": 1.7648885610588168e-05, "loss": 1.7075, "step": 56220 }, { "epoch": 0.35341857678785377, "grad_norm": 7.539808750152588, "learning_rate": 1.7648466509643515e-05, "loss": 1.8542, "step": 56230 }, { "epoch": 0.3534814291045509, "grad_norm": 6.924534320831299, "learning_rate": 1.7648047408698862e-05, "loss": 1.8836, "step": 56240 }, { "epoch": 0.353544281421248, "grad_norm": 7.1117844581604, "learning_rate": 1.764762830775421e-05, "loss": 1.63, "step": 56250 }, { "epoch": 0.3536071337379451, "grad_norm": 7.669754981994629, "learning_rate": 1.7647209206809556e-05, "loss": 1.6712, "step": 56260 }, { "epoch": 0.3536699860546422, "grad_norm": 6.416409969329834, "learning_rate": 1.76467901058649e-05, "loss": 1.9183, "step": 56270 }, { "epoch": 0.35373283837133934, "grad_norm": 6.7374587059021, "learning_rate": 1.7646371004920247e-05, "loss": 1.8945, "step": 56280 }, { "epoch": 0.35379569068803646, "grad_norm": 5.99789571762085, "learning_rate": 1.7645951903975594e-05, "loss": 1.6532, "step": 56290 }, { "epoch": 0.3538585430047336, "grad_norm": 6.0619354248046875, "learning_rate": 1.764553280303094e-05, "loss": 1.7796, "step": 56300 }, { "epoch": 0.3539213953214307, "grad_norm": 6.364901542663574, "learning_rate": 1.7645113702086284e-05, "loss": 1.744, "step": 56310 }, { "epoch": 0.3539842476381278, "grad_norm": 6.619995594024658, "learning_rate": 1.764469460114163e-05, "loss": 1.7518, "step": 56320 }, { "epoch": 0.3540470999548249, "grad_norm": 9.150291442871094, "learning_rate": 1.764427550019698e-05, "loss": 1.6545, "step": 56330 }, { "epoch": 0.35410995227152203, "grad_norm": 7.180041790008545, "learning_rate": 1.7643856399252326e-05, "loss": 1.6696, "step": 56340 }, { "epoch": 0.3541728045882191, "grad_norm": 6.223598480224609, "learning_rate": 1.7643437298307673e-05, "loss": 1.9895, "step": 56350 }, { "epoch": 0.3542356569049162, "grad_norm": 6.764214038848877, "learning_rate": 1.7643018197363016e-05, "loss": 1.7926, "step": 56360 }, { "epoch": 0.3542985092216133, "grad_norm": 7.384403705596924, "learning_rate": 1.7642599096418364e-05, "loss": 1.7538, "step": 56370 }, { "epoch": 0.35436136153831044, "grad_norm": 6.367946624755859, "learning_rate": 1.764217999547371e-05, "loss": 1.9666, "step": 56380 }, { "epoch": 0.35442421385500755, "grad_norm": 6.674903869628906, "learning_rate": 1.7641760894529058e-05, "loss": 1.7066, "step": 56390 }, { "epoch": 0.35448706617170467, "grad_norm": 7.0921549797058105, "learning_rate": 1.7641341793584405e-05, "loss": 1.5394, "step": 56400 }, { "epoch": 0.3545499184884018, "grad_norm": 7.411281585693359, "learning_rate": 1.7640922692639752e-05, "loss": 1.6813, "step": 56410 }, { "epoch": 0.3546127708050989, "grad_norm": 6.548740386962891, "learning_rate": 1.76405035916951e-05, "loss": 1.7838, "step": 56420 }, { "epoch": 0.354675623121796, "grad_norm": 7.3828840255737305, "learning_rate": 1.7640084490750446e-05, "loss": 1.7112, "step": 56430 }, { "epoch": 0.35473847543849313, "grad_norm": 6.164304733276367, "learning_rate": 1.763966538980579e-05, "loss": 1.7097, "step": 56440 }, { "epoch": 0.35480132775519024, "grad_norm": 7.027005672454834, "learning_rate": 1.7639246288861137e-05, "loss": 2.0333, "step": 56450 }, { "epoch": 0.35486418007188736, "grad_norm": 7.092922210693359, "learning_rate": 1.7638827187916484e-05, "loss": 1.9929, "step": 56460 }, { "epoch": 0.3549270323885845, "grad_norm": 6.158232688903809, "learning_rate": 1.763840808697183e-05, "loss": 1.7665, "step": 56470 }, { "epoch": 0.35498988470528153, "grad_norm": 7.426692485809326, "learning_rate": 1.7637988986027178e-05, "loss": 1.9048, "step": 56480 }, { "epoch": 0.35505273702197865, "grad_norm": 7.2738871574401855, "learning_rate": 1.763756988508252e-05, "loss": 1.6266, "step": 56490 }, { "epoch": 0.35511558933867576, "grad_norm": 6.55369234085083, "learning_rate": 1.763715078413787e-05, "loss": 1.6879, "step": 56500 }, { "epoch": 0.3551784416553729, "grad_norm": 6.6980767250061035, "learning_rate": 1.7636731683193216e-05, "loss": 1.7281, "step": 56510 }, { "epoch": 0.35524129397207, "grad_norm": 6.520328044891357, "learning_rate": 1.7636312582248563e-05, "loss": 2.037, "step": 56520 }, { "epoch": 0.3553041462887671, "grad_norm": 5.384095191955566, "learning_rate": 1.7635893481303906e-05, "loss": 1.7778, "step": 56530 }, { "epoch": 0.3553669986054642, "grad_norm": 6.548918724060059, "learning_rate": 1.7635474380359254e-05, "loss": 1.9156, "step": 56540 }, { "epoch": 0.35542985092216134, "grad_norm": 7.573610782623291, "learning_rate": 1.76350552794146e-05, "loss": 1.8323, "step": 56550 }, { "epoch": 0.35549270323885845, "grad_norm": 8.479838371276855, "learning_rate": 1.7634636178469948e-05, "loss": 1.8084, "step": 56560 }, { "epoch": 0.35555555555555557, "grad_norm": 6.977817058563232, "learning_rate": 1.7634217077525295e-05, "loss": 1.6215, "step": 56570 }, { "epoch": 0.3556184078722527, "grad_norm": 6.460672855377197, "learning_rate": 1.763379797658064e-05, "loss": 1.9539, "step": 56580 }, { "epoch": 0.3556812601889498, "grad_norm": 6.926926612854004, "learning_rate": 1.7633378875635986e-05, "loss": 1.7129, "step": 56590 }, { "epoch": 0.3557441125056469, "grad_norm": 5.917463779449463, "learning_rate": 1.7632959774691333e-05, "loss": 1.8907, "step": 56600 }, { "epoch": 0.355806964822344, "grad_norm": 8.021463394165039, "learning_rate": 1.763254067374668e-05, "loss": 1.7799, "step": 56610 }, { "epoch": 0.3558698171390411, "grad_norm": 7.956747531890869, "learning_rate": 1.7632121572802027e-05, "loss": 1.598, "step": 56620 }, { "epoch": 0.3559326694557382, "grad_norm": 7.105515003204346, "learning_rate": 1.7631702471857374e-05, "loss": 1.8763, "step": 56630 }, { "epoch": 0.3559955217724353, "grad_norm": 6.7249274253845215, "learning_rate": 1.763128337091272e-05, "loss": 1.6253, "step": 56640 }, { "epoch": 0.35605837408913243, "grad_norm": 6.784914970397949, "learning_rate": 1.7630864269968068e-05, "loss": 1.8498, "step": 56650 }, { "epoch": 0.35612122640582955, "grad_norm": 7.381931304931641, "learning_rate": 1.7630445169023415e-05, "loss": 1.9151, "step": 56660 }, { "epoch": 0.35618407872252666, "grad_norm": 7.608487606048584, "learning_rate": 1.763002606807876e-05, "loss": 1.7975, "step": 56670 }, { "epoch": 0.3562469310392238, "grad_norm": 7.6619486808776855, "learning_rate": 1.7629606967134106e-05, "loss": 1.614, "step": 56680 }, { "epoch": 0.3563097833559209, "grad_norm": 5.907870292663574, "learning_rate": 1.7629187866189453e-05, "loss": 2.0249, "step": 56690 }, { "epoch": 0.356372635672618, "grad_norm": 6.495619773864746, "learning_rate": 1.76287687652448e-05, "loss": 2.0282, "step": 56700 }, { "epoch": 0.3564354879893151, "grad_norm": 7.167433738708496, "learning_rate": 1.7628349664300144e-05, "loss": 1.6641, "step": 56710 }, { "epoch": 0.35649834030601224, "grad_norm": 7.4635796546936035, "learning_rate": 1.762793056335549e-05, "loss": 2.0146, "step": 56720 }, { "epoch": 0.35656119262270936, "grad_norm": 7.188560962677002, "learning_rate": 1.7627511462410838e-05, "loss": 1.5855, "step": 56730 }, { "epoch": 0.3566240449394064, "grad_norm": 8.38038444519043, "learning_rate": 1.7627092361466185e-05, "loss": 1.8899, "step": 56740 }, { "epoch": 0.35668689725610353, "grad_norm": 8.065149307250977, "learning_rate": 1.762667326052153e-05, "loss": 1.8397, "step": 56750 }, { "epoch": 0.35674974957280065, "grad_norm": 6.2089314460754395, "learning_rate": 1.7626254159576876e-05, "loss": 1.5817, "step": 56760 }, { "epoch": 0.35681260188949776, "grad_norm": 7.829500675201416, "learning_rate": 1.7625835058632223e-05, "loss": 1.7704, "step": 56770 }, { "epoch": 0.3568754542061949, "grad_norm": 6.593550205230713, "learning_rate": 1.762541595768757e-05, "loss": 1.6037, "step": 56780 }, { "epoch": 0.356938306522892, "grad_norm": 5.2003045082092285, "learning_rate": 1.7624996856742917e-05, "loss": 1.5487, "step": 56790 }, { "epoch": 0.3570011588395891, "grad_norm": 7.296988487243652, "learning_rate": 1.7624577755798264e-05, "loss": 1.999, "step": 56800 }, { "epoch": 0.3570640111562862, "grad_norm": 6.731233596801758, "learning_rate": 1.762415865485361e-05, "loss": 1.68, "step": 56810 }, { "epoch": 0.35712686347298334, "grad_norm": 8.03806209564209, "learning_rate": 1.7623739553908955e-05, "loss": 1.9335, "step": 56820 }, { "epoch": 0.35718971578968045, "grad_norm": 7.351186275482178, "learning_rate": 1.76233204529643e-05, "loss": 1.7425, "step": 56830 }, { "epoch": 0.35725256810637757, "grad_norm": 7.279660701751709, "learning_rate": 1.762290135201965e-05, "loss": 1.8906, "step": 56840 }, { "epoch": 0.3573154204230747, "grad_norm": 7.247702598571777, "learning_rate": 1.7622482251074996e-05, "loss": 1.7041, "step": 56850 }, { "epoch": 0.35737827273977174, "grad_norm": 7.7054290771484375, "learning_rate": 1.7622063150130343e-05, "loss": 1.7601, "step": 56860 }, { "epoch": 0.35744112505646886, "grad_norm": 7.007997989654541, "learning_rate": 1.762164404918569e-05, "loss": 2.0024, "step": 56870 }, { "epoch": 0.35750397737316597, "grad_norm": 5.976104736328125, "learning_rate": 1.7621224948241037e-05, "loss": 2.0155, "step": 56880 }, { "epoch": 0.3575668296898631, "grad_norm": 6.137852668762207, "learning_rate": 1.762080584729638e-05, "loss": 1.6285, "step": 56890 }, { "epoch": 0.3576296820065602, "grad_norm": 7.945735454559326, "learning_rate": 1.7620386746351728e-05, "loss": 1.8247, "step": 56900 }, { "epoch": 0.3576925343232573, "grad_norm": 6.811553955078125, "learning_rate": 1.7619967645407075e-05, "loss": 1.7663, "step": 56910 }, { "epoch": 0.35775538663995443, "grad_norm": 7.087430000305176, "learning_rate": 1.7619548544462422e-05, "loss": 1.6412, "step": 56920 }, { "epoch": 0.35781823895665155, "grad_norm": 8.247753143310547, "learning_rate": 1.7619129443517766e-05, "loss": 1.8546, "step": 56930 }, { "epoch": 0.35788109127334866, "grad_norm": 7.284507751464844, "learning_rate": 1.7618710342573113e-05, "loss": 1.7229, "step": 56940 }, { "epoch": 0.3579439435900458, "grad_norm": 6.562889575958252, "learning_rate": 1.761829124162846e-05, "loss": 1.823, "step": 56950 }, { "epoch": 0.3580067959067429, "grad_norm": 7.026453971862793, "learning_rate": 1.7617872140683807e-05, "loss": 1.8297, "step": 56960 }, { "epoch": 0.35806964822344, "grad_norm": 6.744218349456787, "learning_rate": 1.7617453039739154e-05, "loss": 1.7583, "step": 56970 }, { "epoch": 0.3581325005401371, "grad_norm": 7.250420093536377, "learning_rate": 1.7617033938794498e-05, "loss": 1.8777, "step": 56980 }, { "epoch": 0.3581953528568342, "grad_norm": 7.0358123779296875, "learning_rate": 1.7616614837849845e-05, "loss": 1.6609, "step": 56990 }, { "epoch": 0.3582582051735313, "grad_norm": 6.225969314575195, "learning_rate": 1.7616195736905192e-05, "loss": 1.5569, "step": 57000 }, { "epoch": 0.3583210574902284, "grad_norm": 5.838583469390869, "learning_rate": 1.761577663596054e-05, "loss": 1.4379, "step": 57010 }, { "epoch": 0.3583839098069255, "grad_norm": 6.432792663574219, "learning_rate": 1.7615357535015886e-05, "loss": 2.0285, "step": 57020 }, { "epoch": 0.35844676212362264, "grad_norm": 9.680623054504395, "learning_rate": 1.7614938434071233e-05, "loss": 1.7972, "step": 57030 }, { "epoch": 0.35850961444031976, "grad_norm": 7.464869499206543, "learning_rate": 1.761451933312658e-05, "loss": 1.6548, "step": 57040 }, { "epoch": 0.35857246675701687, "grad_norm": 7.695644855499268, "learning_rate": 1.7614100232181927e-05, "loss": 2.1453, "step": 57050 }, { "epoch": 0.358635319073714, "grad_norm": 7.432145595550537, "learning_rate": 1.761368113123727e-05, "loss": 1.8533, "step": 57060 }, { "epoch": 0.3586981713904111, "grad_norm": 8.367108345031738, "learning_rate": 1.7613262030292618e-05, "loss": 2.0194, "step": 57070 }, { "epoch": 0.3587610237071082, "grad_norm": 8.933586120605469, "learning_rate": 1.7612842929347965e-05, "loss": 1.9469, "step": 57080 }, { "epoch": 0.35882387602380533, "grad_norm": 7.142405986785889, "learning_rate": 1.7612423828403312e-05, "loss": 1.8927, "step": 57090 }, { "epoch": 0.35888672834050245, "grad_norm": 6.7373738288879395, "learning_rate": 1.761200472745866e-05, "loss": 1.6524, "step": 57100 }, { "epoch": 0.35894958065719956, "grad_norm": 6.742250442504883, "learning_rate": 1.7611585626514003e-05, "loss": 1.7861, "step": 57110 }, { "epoch": 0.3590124329738966, "grad_norm": 6.655519962310791, "learning_rate": 1.761116652556935e-05, "loss": 1.8957, "step": 57120 }, { "epoch": 0.35907528529059374, "grad_norm": 7.259047508239746, "learning_rate": 1.7610747424624697e-05, "loss": 1.8484, "step": 57130 }, { "epoch": 0.35913813760729085, "grad_norm": 6.946603298187256, "learning_rate": 1.7610328323680044e-05, "loss": 1.8535, "step": 57140 }, { "epoch": 0.35920098992398797, "grad_norm": 7.809139728546143, "learning_rate": 1.7609909222735388e-05, "loss": 1.7191, "step": 57150 }, { "epoch": 0.3592638422406851, "grad_norm": 6.612559795379639, "learning_rate": 1.7609490121790735e-05, "loss": 1.9152, "step": 57160 }, { "epoch": 0.3593266945573822, "grad_norm": 6.120494365692139, "learning_rate": 1.7609071020846082e-05, "loss": 1.8226, "step": 57170 }, { "epoch": 0.3593895468740793, "grad_norm": 6.220257759094238, "learning_rate": 1.760865191990143e-05, "loss": 1.8188, "step": 57180 }, { "epoch": 0.35945239919077643, "grad_norm": 7.373902320861816, "learning_rate": 1.7608232818956776e-05, "loss": 1.8435, "step": 57190 }, { "epoch": 0.35951525150747354, "grad_norm": 6.463894367218018, "learning_rate": 1.760781371801212e-05, "loss": 1.695, "step": 57200 }, { "epoch": 0.35957810382417066, "grad_norm": 7.294466495513916, "learning_rate": 1.7607394617067467e-05, "loss": 1.7689, "step": 57210 }, { "epoch": 0.3596409561408678, "grad_norm": 7.174213409423828, "learning_rate": 1.7606975516122814e-05, "loss": 1.9515, "step": 57220 }, { "epoch": 0.3597038084575649, "grad_norm": 8.09652042388916, "learning_rate": 1.760655641517816e-05, "loss": 1.732, "step": 57230 }, { "epoch": 0.359766660774262, "grad_norm": 6.779608726501465, "learning_rate": 1.7606137314233508e-05, "loss": 1.8146, "step": 57240 }, { "epoch": 0.35982951309095906, "grad_norm": 6.073554039001465, "learning_rate": 1.7605718213288855e-05, "loss": 2.0628, "step": 57250 }, { "epoch": 0.3598923654076562, "grad_norm": 7.001215934753418, "learning_rate": 1.7605299112344202e-05, "loss": 1.7843, "step": 57260 }, { "epoch": 0.3599552177243533, "grad_norm": 6.757166862487793, "learning_rate": 1.760488001139955e-05, "loss": 1.6847, "step": 57270 }, { "epoch": 0.3600180700410504, "grad_norm": 7.732872009277344, "learning_rate": 1.7604460910454896e-05, "loss": 1.6415, "step": 57280 }, { "epoch": 0.3600809223577475, "grad_norm": 6.817984104156494, "learning_rate": 1.760404180951024e-05, "loss": 1.68, "step": 57290 }, { "epoch": 0.36014377467444464, "grad_norm": 7.102740287780762, "learning_rate": 1.7603622708565587e-05, "loss": 1.7997, "step": 57300 }, { "epoch": 0.36020662699114175, "grad_norm": 6.813784122467041, "learning_rate": 1.7603203607620934e-05, "loss": 1.8612, "step": 57310 }, { "epoch": 0.36026947930783887, "grad_norm": 6.347372055053711, "learning_rate": 1.760278450667628e-05, "loss": 1.6315, "step": 57320 }, { "epoch": 0.360332331624536, "grad_norm": 8.70351505279541, "learning_rate": 1.7602365405731625e-05, "loss": 1.7351, "step": 57330 }, { "epoch": 0.3603951839412331, "grad_norm": 7.5761823654174805, "learning_rate": 1.7601946304786972e-05, "loss": 1.7419, "step": 57340 }, { "epoch": 0.3604580362579302, "grad_norm": 6.638217449188232, "learning_rate": 1.760152720384232e-05, "loss": 1.8665, "step": 57350 }, { "epoch": 0.36052088857462733, "grad_norm": 7.505904197692871, "learning_rate": 1.7601108102897666e-05, "loss": 1.715, "step": 57360 }, { "epoch": 0.3605837408913244, "grad_norm": 7.13544225692749, "learning_rate": 1.760068900195301e-05, "loss": 1.9328, "step": 57370 }, { "epoch": 0.3606465932080215, "grad_norm": 7.589418411254883, "learning_rate": 1.7600269901008357e-05, "loss": 1.6518, "step": 57380 }, { "epoch": 0.3607094455247186, "grad_norm": 7.7186713218688965, "learning_rate": 1.7599850800063704e-05, "loss": 1.7416, "step": 57390 }, { "epoch": 0.36077229784141573, "grad_norm": 7.319962978363037, "learning_rate": 1.759943169911905e-05, "loss": 1.829, "step": 57400 }, { "epoch": 0.36083515015811285, "grad_norm": 7.251899242401123, "learning_rate": 1.7599012598174398e-05, "loss": 2.0038, "step": 57410 }, { "epoch": 0.36089800247480996, "grad_norm": 6.347203731536865, "learning_rate": 1.7598593497229745e-05, "loss": 1.791, "step": 57420 }, { "epoch": 0.3609608547915071, "grad_norm": 6.426996231079102, "learning_rate": 1.7598174396285092e-05, "loss": 1.917, "step": 57430 }, { "epoch": 0.3610237071082042, "grad_norm": 6.93100643157959, "learning_rate": 1.7597755295340436e-05, "loss": 1.7918, "step": 57440 }, { "epoch": 0.3610865594249013, "grad_norm": 6.777101516723633, "learning_rate": 1.7597336194395783e-05, "loss": 1.8492, "step": 57450 }, { "epoch": 0.3611494117415984, "grad_norm": 6.8046698570251465, "learning_rate": 1.759691709345113e-05, "loss": 1.5858, "step": 57460 }, { "epoch": 0.36121226405829554, "grad_norm": 6.279371738433838, "learning_rate": 1.7596497992506477e-05, "loss": 1.7344, "step": 57470 }, { "epoch": 0.36127511637499266, "grad_norm": 7.617173671722412, "learning_rate": 1.7596078891561824e-05, "loss": 1.8127, "step": 57480 }, { "epoch": 0.36133796869168977, "grad_norm": 6.920362949371338, "learning_rate": 1.7595701700711635e-05, "loss": 1.749, "step": 57490 }, { "epoch": 0.36140082100838683, "grad_norm": 6.679447174072266, "learning_rate": 1.7595282599766982e-05, "loss": 1.7885, "step": 57500 }, { "epoch": 0.36146367332508395, "grad_norm": 6.789583206176758, "learning_rate": 1.7594863498822326e-05, "loss": 1.9391, "step": 57510 }, { "epoch": 0.36152652564178106, "grad_norm": 7.509593486785889, "learning_rate": 1.7594444397877673e-05, "loss": 1.9867, "step": 57520 }, { "epoch": 0.3615893779584782, "grad_norm": 7.413074970245361, "learning_rate": 1.759402529693302e-05, "loss": 1.8085, "step": 57530 }, { "epoch": 0.3616522302751753, "grad_norm": 6.680906772613525, "learning_rate": 1.7593606195988367e-05, "loss": 1.8518, "step": 57540 }, { "epoch": 0.3617150825918724, "grad_norm": 6.255010604858398, "learning_rate": 1.7593187095043714e-05, "loss": 1.8553, "step": 57550 }, { "epoch": 0.3617779349085695, "grad_norm": 7.531172275543213, "learning_rate": 1.759276799409906e-05, "loss": 1.9392, "step": 57560 }, { "epoch": 0.36184078722526664, "grad_norm": 7.901610374450684, "learning_rate": 1.759234889315441e-05, "loss": 1.7256, "step": 57570 }, { "epoch": 0.36190363954196375, "grad_norm": 7.6437458992004395, "learning_rate": 1.7591929792209752e-05, "loss": 1.6673, "step": 57580 }, { "epoch": 0.36196649185866087, "grad_norm": 6.726452827453613, "learning_rate": 1.75915106912651e-05, "loss": 1.8192, "step": 57590 }, { "epoch": 0.362029344175358, "grad_norm": 7.075668811798096, "learning_rate": 1.7591091590320446e-05, "loss": 1.5295, "step": 57600 }, { "epoch": 0.3620921964920551, "grad_norm": 6.800719738006592, "learning_rate": 1.7590672489375793e-05, "loss": 1.5967, "step": 57610 }, { "epoch": 0.3621550488087522, "grad_norm": 6.986769676208496, "learning_rate": 1.759025338843114e-05, "loss": 1.7338, "step": 57620 }, { "epoch": 0.36221790112544927, "grad_norm": 6.586205005645752, "learning_rate": 1.7589834287486484e-05, "loss": 1.7687, "step": 57630 }, { "epoch": 0.3622807534421464, "grad_norm": 7.2887372970581055, "learning_rate": 1.758941518654183e-05, "loss": 1.7958, "step": 57640 }, { "epoch": 0.3623436057588435, "grad_norm": 7.047494888305664, "learning_rate": 1.7588996085597178e-05, "loss": 1.8834, "step": 57650 }, { "epoch": 0.3624064580755406, "grad_norm": 7.804978370666504, "learning_rate": 1.7588576984652525e-05, "loss": 1.6487, "step": 57660 }, { "epoch": 0.36246931039223773, "grad_norm": 6.975365161895752, "learning_rate": 1.758815788370787e-05, "loss": 1.7598, "step": 57670 }, { "epoch": 0.36253216270893485, "grad_norm": 7.373959541320801, "learning_rate": 1.7587738782763216e-05, "loss": 1.7554, "step": 57680 }, { "epoch": 0.36259501502563196, "grad_norm": 7.800610542297363, "learning_rate": 1.7587319681818563e-05, "loss": 1.7406, "step": 57690 }, { "epoch": 0.3626578673423291, "grad_norm": 7.360167980194092, "learning_rate": 1.758690058087391e-05, "loss": 1.6835, "step": 57700 }, { "epoch": 0.3627207196590262, "grad_norm": 6.3451247215271, "learning_rate": 1.7586481479929257e-05, "loss": 1.9422, "step": 57710 }, { "epoch": 0.3627835719757233, "grad_norm": 8.34115219116211, "learning_rate": 1.7586062378984604e-05, "loss": 1.9304, "step": 57720 }, { "epoch": 0.3628464242924204, "grad_norm": 6.679686546325684, "learning_rate": 1.758564327803995e-05, "loss": 1.6348, "step": 57730 }, { "epoch": 0.36290927660911754, "grad_norm": 6.9500017166137695, "learning_rate": 1.75852241770953e-05, "loss": 1.8888, "step": 57740 }, { "epoch": 0.36297212892581465, "grad_norm": 6.302811622619629, "learning_rate": 1.7584805076150642e-05, "loss": 1.7369, "step": 57750 }, { "epoch": 0.3630349812425117, "grad_norm": 7.762177467346191, "learning_rate": 1.758438597520599e-05, "loss": 1.7919, "step": 57760 }, { "epoch": 0.3630978335592088, "grad_norm": 6.639379024505615, "learning_rate": 1.7583966874261336e-05, "loss": 1.8843, "step": 57770 }, { "epoch": 0.36316068587590594, "grad_norm": 6.072000026702881, "learning_rate": 1.7583547773316683e-05, "loss": 1.5028, "step": 57780 }, { "epoch": 0.36322353819260306, "grad_norm": 7.485529899597168, "learning_rate": 1.758312867237203e-05, "loss": 1.9513, "step": 57790 }, { "epoch": 0.36328639050930017, "grad_norm": 7.676440238952637, "learning_rate": 1.7582709571427374e-05, "loss": 1.569, "step": 57800 }, { "epoch": 0.3633492428259973, "grad_norm": 7.731540679931641, "learning_rate": 1.758229047048272e-05, "loss": 2.0069, "step": 57810 }, { "epoch": 0.3634120951426944, "grad_norm": 7.498253345489502, "learning_rate": 1.758187136953807e-05, "loss": 1.9307, "step": 57820 }, { "epoch": 0.3634749474593915, "grad_norm": 5.711714267730713, "learning_rate": 1.7581452268593415e-05, "loss": 1.6662, "step": 57830 }, { "epoch": 0.36353779977608863, "grad_norm": 7.644040584564209, "learning_rate": 1.7581033167648762e-05, "loss": 1.7376, "step": 57840 }, { "epoch": 0.36360065209278575, "grad_norm": 6.760619163513184, "learning_rate": 1.7580614066704106e-05, "loss": 1.8354, "step": 57850 }, { "epoch": 0.36366350440948286, "grad_norm": 6.621782302856445, "learning_rate": 1.7580194965759453e-05, "loss": 1.7233, "step": 57860 }, { "epoch": 0.36372635672618, "grad_norm": 7.562590599060059, "learning_rate": 1.75797758648148e-05, "loss": 1.636, "step": 57870 }, { "epoch": 0.36378920904287704, "grad_norm": 6.979267120361328, "learning_rate": 1.7579356763870147e-05, "loss": 1.8222, "step": 57880 }, { "epoch": 0.36385206135957415, "grad_norm": 6.307051181793213, "learning_rate": 1.757893766292549e-05, "loss": 1.5847, "step": 57890 }, { "epoch": 0.36391491367627127, "grad_norm": 6.724384307861328, "learning_rate": 1.7578518561980838e-05, "loss": 1.8899, "step": 57900 }, { "epoch": 0.3639777659929684, "grad_norm": 6.279197692871094, "learning_rate": 1.7578099461036185e-05, "loss": 1.8336, "step": 57910 }, { "epoch": 0.3640406183096655, "grad_norm": 6.882331371307373, "learning_rate": 1.7577680360091532e-05, "loss": 1.665, "step": 57920 }, { "epoch": 0.3641034706263626, "grad_norm": 5.85809850692749, "learning_rate": 1.757726125914688e-05, "loss": 1.8763, "step": 57930 }, { "epoch": 0.36416632294305973, "grad_norm": 6.824677467346191, "learning_rate": 1.7576842158202226e-05, "loss": 1.7015, "step": 57940 }, { "epoch": 0.36422917525975684, "grad_norm": 5.771215915679932, "learning_rate": 1.7576423057257573e-05, "loss": 1.759, "step": 57950 }, { "epoch": 0.36429202757645396, "grad_norm": 6.597382545471191, "learning_rate": 1.757600395631292e-05, "loss": 1.7348, "step": 57960 }, { "epoch": 0.3643548798931511, "grad_norm": 7.467145919799805, "learning_rate": 1.7575584855368268e-05, "loss": 2.0836, "step": 57970 }, { "epoch": 0.3644177322098482, "grad_norm": 7.873134613037109, "learning_rate": 1.757516575442361e-05, "loss": 1.8236, "step": 57980 }, { "epoch": 0.3644805845265453, "grad_norm": 7.0622239112854, "learning_rate": 1.757474665347896e-05, "loss": 1.843, "step": 57990 }, { "epoch": 0.3645434368432424, "grad_norm": 7.284359931945801, "learning_rate": 1.7574327552534305e-05, "loss": 1.6614, "step": 58000 }, { "epoch": 0.3646062891599395, "grad_norm": 6.610737323760986, "learning_rate": 1.7573908451589653e-05, "loss": 1.767, "step": 58010 }, { "epoch": 0.3646691414766366, "grad_norm": 6.371236324310303, "learning_rate": 1.7573489350645e-05, "loss": 1.8746, "step": 58020 }, { "epoch": 0.3647319937933337, "grad_norm": 6.598259449005127, "learning_rate": 1.7573070249700343e-05, "loss": 1.9581, "step": 58030 }, { "epoch": 0.3647948461100308, "grad_norm": 6.762608528137207, "learning_rate": 1.757265114875569e-05, "loss": 1.6776, "step": 58040 }, { "epoch": 0.36485769842672794, "grad_norm": 6.016773700714111, "learning_rate": 1.7572232047811037e-05, "loss": 1.6855, "step": 58050 }, { "epoch": 0.36492055074342505, "grad_norm": 5.729305744171143, "learning_rate": 1.7571812946866384e-05, "loss": 1.7281, "step": 58060 }, { "epoch": 0.36498340306012217, "grad_norm": 6.6363348960876465, "learning_rate": 1.7571393845921728e-05, "loss": 1.8752, "step": 58070 }, { "epoch": 0.3650462553768193, "grad_norm": 6.771478176116943, "learning_rate": 1.7570974744977075e-05, "loss": 1.7828, "step": 58080 }, { "epoch": 0.3651091076935164, "grad_norm": 6.573596954345703, "learning_rate": 1.7570555644032422e-05, "loss": 1.7419, "step": 58090 }, { "epoch": 0.3651719600102135, "grad_norm": 6.653927803039551, "learning_rate": 1.757013654308777e-05, "loss": 1.8487, "step": 58100 }, { "epoch": 0.36523481232691063, "grad_norm": 8.33236026763916, "learning_rate": 1.7569717442143116e-05, "loss": 1.7902, "step": 58110 }, { "epoch": 0.36529766464360774, "grad_norm": 6.885913372039795, "learning_rate": 1.7569298341198464e-05, "loss": 1.8252, "step": 58120 }, { "epoch": 0.36536051696030486, "grad_norm": 6.6269121170043945, "learning_rate": 1.7568879240253807e-05, "loss": 1.6639, "step": 58130 }, { "epoch": 0.3654233692770019, "grad_norm": 7.685762882232666, "learning_rate": 1.7568460139309154e-05, "loss": 2.0038, "step": 58140 }, { "epoch": 0.36548622159369903, "grad_norm": 7.660758018493652, "learning_rate": 1.75680410383645e-05, "loss": 1.8155, "step": 58150 }, { "epoch": 0.36554907391039615, "grad_norm": 6.429567337036133, "learning_rate": 1.756762193741985e-05, "loss": 1.78, "step": 58160 }, { "epoch": 0.36561192622709326, "grad_norm": 8.127532005310059, "learning_rate": 1.7567202836475195e-05, "loss": 1.7227, "step": 58170 }, { "epoch": 0.3656747785437904, "grad_norm": 7.09838342666626, "learning_rate": 1.7566783735530543e-05, "loss": 1.9313, "step": 58180 }, { "epoch": 0.3657376308604875, "grad_norm": 8.586379051208496, "learning_rate": 1.756636463458589e-05, "loss": 1.8507, "step": 58190 }, { "epoch": 0.3658004831771846, "grad_norm": 7.072535514831543, "learning_rate": 1.7565945533641233e-05, "loss": 1.7274, "step": 58200 }, { "epoch": 0.3658633354938817, "grad_norm": 7.332555770874023, "learning_rate": 1.756552643269658e-05, "loss": 1.5983, "step": 58210 }, { "epoch": 0.36592618781057884, "grad_norm": 7.714618682861328, "learning_rate": 1.7565107331751927e-05, "loss": 1.7813, "step": 58220 }, { "epoch": 0.36598904012727596, "grad_norm": 7.031050682067871, "learning_rate": 1.7564688230807275e-05, "loss": 2.0464, "step": 58230 }, { "epoch": 0.36605189244397307, "grad_norm": 6.090056419372559, "learning_rate": 1.756426912986262e-05, "loss": 1.6593, "step": 58240 }, { "epoch": 0.3661147447606702, "grad_norm": 7.858686447143555, "learning_rate": 1.7563850028917965e-05, "loss": 1.8271, "step": 58250 }, { "epoch": 0.3661775970773673, "grad_norm": 6.74784517288208, "learning_rate": 1.7563430927973312e-05, "loss": 1.6977, "step": 58260 }, { "epoch": 0.36624044939406436, "grad_norm": 6.780416965484619, "learning_rate": 1.756301182702866e-05, "loss": 1.7107, "step": 58270 }, { "epoch": 0.3663033017107615, "grad_norm": 7.492496490478516, "learning_rate": 1.756263463617847e-05, "loss": 1.5698, "step": 58280 }, { "epoch": 0.3663661540274586, "grad_norm": 6.76776647567749, "learning_rate": 1.7562215535233818e-05, "loss": 1.8447, "step": 58290 }, { "epoch": 0.3664290063441557, "grad_norm": 6.72606086730957, "learning_rate": 1.7561796434289165e-05, "loss": 1.6991, "step": 58300 }, { "epoch": 0.3664918586608528, "grad_norm": 7.176609516143799, "learning_rate": 1.7561377333344512e-05, "loss": 1.7002, "step": 58310 }, { "epoch": 0.36655471097754994, "grad_norm": 7.325291633605957, "learning_rate": 1.7560958232399856e-05, "loss": 1.663, "step": 58320 }, { "epoch": 0.36661756329424705, "grad_norm": 7.918809413909912, "learning_rate": 1.7560539131455203e-05, "loss": 1.854, "step": 58330 }, { "epoch": 0.36668041561094417, "grad_norm": 7.725828170776367, "learning_rate": 1.756012003051055e-05, "loss": 1.7275, "step": 58340 }, { "epoch": 0.3667432679276413, "grad_norm": 6.682360649108887, "learning_rate": 1.7559700929565897e-05, "loss": 2.0832, "step": 58350 }, { "epoch": 0.3668061202443384, "grad_norm": 8.492762565612793, "learning_rate": 1.7559281828621244e-05, "loss": 1.5955, "step": 58360 }, { "epoch": 0.3668689725610355, "grad_norm": 6.3802385330200195, "learning_rate": 1.7558862727676588e-05, "loss": 1.8547, "step": 58370 }, { "epoch": 0.3669318248777326, "grad_norm": 6.269882678985596, "learning_rate": 1.7558443626731935e-05, "loss": 1.7349, "step": 58380 }, { "epoch": 0.3669946771944297, "grad_norm": 7.0758562088012695, "learning_rate": 1.7558024525787282e-05, "loss": 1.7896, "step": 58390 }, { "epoch": 0.3670575295111268, "grad_norm": 8.282215118408203, "learning_rate": 1.755760542484263e-05, "loss": 1.8961, "step": 58400 }, { "epoch": 0.3671203818278239, "grad_norm": 6.388606071472168, "learning_rate": 1.7557186323897976e-05, "loss": 1.7292, "step": 58410 }, { "epoch": 0.36718323414452103, "grad_norm": 6.925660133361816, "learning_rate": 1.7556767222953323e-05, "loss": 1.8551, "step": 58420 }, { "epoch": 0.36724608646121815, "grad_norm": 6.5192036628723145, "learning_rate": 1.755634812200867e-05, "loss": 1.8022, "step": 58430 }, { "epoch": 0.36730893877791526, "grad_norm": 7.54512357711792, "learning_rate": 1.7555929021064017e-05, "loss": 1.6527, "step": 58440 }, { "epoch": 0.3673717910946124, "grad_norm": 7.943971633911133, "learning_rate": 1.755550992011936e-05, "loss": 1.9448, "step": 58450 }, { "epoch": 0.3674346434113095, "grad_norm": 6.8775811195373535, "learning_rate": 1.7555090819174708e-05, "loss": 1.7978, "step": 58460 }, { "epoch": 0.3674974957280066, "grad_norm": 7.68377161026001, "learning_rate": 1.7554671718230055e-05, "loss": 2.0381, "step": 58470 }, { "epoch": 0.3675603480447037, "grad_norm": 6.402366638183594, "learning_rate": 1.7554252617285402e-05, "loss": 1.6738, "step": 58480 }, { "epoch": 0.36762320036140084, "grad_norm": 7.0784502029418945, "learning_rate": 1.755383351634075e-05, "loss": 1.6803, "step": 58490 }, { "epoch": 0.36768605267809795, "grad_norm": 7.339481830596924, "learning_rate": 1.7553414415396093e-05, "loss": 1.6937, "step": 58500 }, { "epoch": 0.36774890499479507, "grad_norm": 6.1548991203308105, "learning_rate": 1.755299531445144e-05, "loss": 1.9838, "step": 58510 }, { "epoch": 0.3678117573114921, "grad_norm": 6.306966781616211, "learning_rate": 1.7552576213506787e-05, "loss": 1.5747, "step": 58520 }, { "epoch": 0.36787460962818924, "grad_norm": 6.573767185211182, "learning_rate": 1.7552157112562134e-05, "loss": 1.8879, "step": 58530 }, { "epoch": 0.36793746194488636, "grad_norm": 6.0497260093688965, "learning_rate": 1.755173801161748e-05, "loss": 1.6662, "step": 58540 }, { "epoch": 0.36800031426158347, "grad_norm": 7.675485134124756, "learning_rate": 1.7551318910672825e-05, "loss": 1.7222, "step": 58550 }, { "epoch": 0.3680631665782806, "grad_norm": 7.188555717468262, "learning_rate": 1.7550899809728172e-05, "loss": 1.7689, "step": 58560 }, { "epoch": 0.3681260188949777, "grad_norm": 7.378721714019775, "learning_rate": 1.755048070878352e-05, "loss": 1.8371, "step": 58570 }, { "epoch": 0.3681888712116748, "grad_norm": 6.536661148071289, "learning_rate": 1.7550061607838866e-05, "loss": 1.8305, "step": 58580 }, { "epoch": 0.36825172352837193, "grad_norm": 6.832995891571045, "learning_rate": 1.754964250689421e-05, "loss": 1.9062, "step": 58590 }, { "epoch": 0.36831457584506905, "grad_norm": 9.356914520263672, "learning_rate": 1.7549223405949557e-05, "loss": 1.9665, "step": 58600 }, { "epoch": 0.36837742816176616, "grad_norm": 7.143482685089111, "learning_rate": 1.7548804305004904e-05, "loss": 1.7404, "step": 58610 }, { "epoch": 0.3684402804784633, "grad_norm": 6.5126214027404785, "learning_rate": 1.754838520406025e-05, "loss": 1.7681, "step": 58620 }, { "epoch": 0.3685031327951604, "grad_norm": 6.593245506286621, "learning_rate": 1.7547966103115598e-05, "loss": 1.7833, "step": 58630 }, { "epoch": 0.3685659851118575, "grad_norm": 7.299543380737305, "learning_rate": 1.7547547002170945e-05, "loss": 1.7029, "step": 58640 }, { "epoch": 0.36862883742855457, "grad_norm": 6.611456871032715, "learning_rate": 1.7547127901226292e-05, "loss": 1.8048, "step": 58650 }, { "epoch": 0.3686916897452517, "grad_norm": 6.490697383880615, "learning_rate": 1.754670880028164e-05, "loss": 1.8531, "step": 58660 }, { "epoch": 0.3687545420619488, "grad_norm": 6.8458943367004395, "learning_rate": 1.7546289699336986e-05, "loss": 1.6215, "step": 58670 }, { "epoch": 0.3688173943786459, "grad_norm": 7.012301445007324, "learning_rate": 1.754587059839233e-05, "loss": 1.8162, "step": 58680 }, { "epoch": 0.36888024669534303, "grad_norm": 6.759365558624268, "learning_rate": 1.7545451497447677e-05, "loss": 1.6949, "step": 58690 }, { "epoch": 0.36894309901204014, "grad_norm": 6.9743571281433105, "learning_rate": 1.7545032396503024e-05, "loss": 1.8654, "step": 58700 }, { "epoch": 0.36900595132873726, "grad_norm": 6.6136016845703125, "learning_rate": 1.754461329555837e-05, "loss": 1.7852, "step": 58710 }, { "epoch": 0.3690688036454344, "grad_norm": 7.7909016609191895, "learning_rate": 1.7544194194613715e-05, "loss": 1.8846, "step": 58720 }, { "epoch": 0.3691316559621315, "grad_norm": 6.4810404777526855, "learning_rate": 1.7543775093669062e-05, "loss": 1.843, "step": 58730 }, { "epoch": 0.3691945082788286, "grad_norm": 7.58692741394043, "learning_rate": 1.754335599272441e-05, "loss": 1.6527, "step": 58740 }, { "epoch": 0.3692573605955257, "grad_norm": 6.1824951171875, "learning_rate": 1.7542936891779756e-05, "loss": 1.6651, "step": 58750 }, { "epoch": 0.36932021291222283, "grad_norm": 7.710651874542236, "learning_rate": 1.7542517790835103e-05, "loss": 1.9004, "step": 58760 }, { "epoch": 0.36938306522891995, "grad_norm": 7.556338787078857, "learning_rate": 1.7542098689890447e-05, "loss": 1.6783, "step": 58770 }, { "epoch": 0.369445917545617, "grad_norm": 7.3827223777771, "learning_rate": 1.7541679588945794e-05, "loss": 1.9175, "step": 58780 }, { "epoch": 0.3695087698623141, "grad_norm": 5.807162761688232, "learning_rate": 1.754126048800114e-05, "loss": 1.8387, "step": 58790 }, { "epoch": 0.36957162217901124, "grad_norm": 6.519895553588867, "learning_rate": 1.7540841387056488e-05, "loss": 1.9513, "step": 58800 }, { "epoch": 0.36963447449570835, "grad_norm": 7.20941686630249, "learning_rate": 1.7540422286111835e-05, "loss": 1.6034, "step": 58810 }, { "epoch": 0.36969732681240547, "grad_norm": 7.563182353973389, "learning_rate": 1.754000318516718e-05, "loss": 1.5939, "step": 58820 }, { "epoch": 0.3697601791291026, "grad_norm": 7.159434795379639, "learning_rate": 1.7539584084222526e-05, "loss": 1.7039, "step": 58830 }, { "epoch": 0.3698230314457997, "grad_norm": 6.115586280822754, "learning_rate": 1.7539164983277873e-05, "loss": 1.7867, "step": 58840 }, { "epoch": 0.3698858837624968, "grad_norm": 5.896388053894043, "learning_rate": 1.753874588233322e-05, "loss": 1.7796, "step": 58850 }, { "epoch": 0.36994873607919393, "grad_norm": 6.0734100341796875, "learning_rate": 1.7538326781388567e-05, "loss": 1.5258, "step": 58860 }, { "epoch": 0.37001158839589104, "grad_norm": 6.663506031036377, "learning_rate": 1.7537907680443914e-05, "loss": 1.7604, "step": 58870 }, { "epoch": 0.37007444071258816, "grad_norm": 7.216512203216553, "learning_rate": 1.753748857949926e-05, "loss": 2.0099, "step": 58880 }, { "epoch": 0.3701372930292853, "grad_norm": 7.385026454925537, "learning_rate": 1.7537069478554608e-05, "loss": 1.7933, "step": 58890 }, { "epoch": 0.37020014534598233, "grad_norm": 7.46403694152832, "learning_rate": 1.7536650377609952e-05, "loss": 1.8139, "step": 58900 }, { "epoch": 0.37026299766267945, "grad_norm": 8.221874237060547, "learning_rate": 1.75362312766653e-05, "loss": 2.0104, "step": 58910 }, { "epoch": 0.37032584997937656, "grad_norm": 5.9333109855651855, "learning_rate": 1.7535812175720646e-05, "loss": 1.8058, "step": 58920 }, { "epoch": 0.3703887022960737, "grad_norm": 5.677400588989258, "learning_rate": 1.7535393074775993e-05, "loss": 1.5655, "step": 58930 }, { "epoch": 0.3704515546127708, "grad_norm": 6.823220252990723, "learning_rate": 1.7534973973831337e-05, "loss": 1.8187, "step": 58940 }, { "epoch": 0.3705144069294679, "grad_norm": 6.220883846282959, "learning_rate": 1.7534554872886684e-05, "loss": 1.6937, "step": 58950 }, { "epoch": 0.370577259246165, "grad_norm": 6.2850189208984375, "learning_rate": 1.753413577194203e-05, "loss": 1.6505, "step": 58960 }, { "epoch": 0.37064011156286214, "grad_norm": 6.755478382110596, "learning_rate": 1.7533716670997378e-05, "loss": 1.6412, "step": 58970 }, { "epoch": 0.37070296387955926, "grad_norm": 7.051673412322998, "learning_rate": 1.7533297570052725e-05, "loss": 1.9897, "step": 58980 }, { "epoch": 0.37076581619625637, "grad_norm": 6.870248317718506, "learning_rate": 1.753287846910807e-05, "loss": 1.7811, "step": 58990 }, { "epoch": 0.3708286685129535, "grad_norm": 7.873626232147217, "learning_rate": 1.7532459368163416e-05, "loss": 2.0148, "step": 59000 }, { "epoch": 0.3708915208296506, "grad_norm": 6.326781749725342, "learning_rate": 1.7532040267218763e-05, "loss": 1.6766, "step": 59010 }, { "epoch": 0.3709543731463477, "grad_norm": 7.179531097412109, "learning_rate": 1.753162116627411e-05, "loss": 1.8273, "step": 59020 }, { "epoch": 0.3710172254630448, "grad_norm": 6.448795318603516, "learning_rate": 1.7531202065329457e-05, "loss": 1.7084, "step": 59030 }, { "epoch": 0.3710800777797419, "grad_norm": 7.391201496124268, "learning_rate": 1.7530782964384804e-05, "loss": 1.7986, "step": 59040 }, { "epoch": 0.371142930096439, "grad_norm": 7.2317070960998535, "learning_rate": 1.753036386344015e-05, "loss": 1.6227, "step": 59050 }, { "epoch": 0.3712057824131361, "grad_norm": 6.9276227951049805, "learning_rate": 1.7529944762495498e-05, "loss": 1.9586, "step": 59060 }, { "epoch": 0.37126863472983324, "grad_norm": 6.809334754943848, "learning_rate": 1.7529525661550842e-05, "loss": 1.7877, "step": 59070 }, { "epoch": 0.37133148704653035, "grad_norm": 6.7816901206970215, "learning_rate": 1.752910656060619e-05, "loss": 1.7312, "step": 59080 }, { "epoch": 0.37139433936322747, "grad_norm": 6.264307498931885, "learning_rate": 1.7528687459661536e-05, "loss": 1.6797, "step": 59090 }, { "epoch": 0.3714571916799246, "grad_norm": 6.3187689781188965, "learning_rate": 1.7528268358716883e-05, "loss": 1.7063, "step": 59100 }, { "epoch": 0.3715200439966217, "grad_norm": 8.220011711120605, "learning_rate": 1.752784925777223e-05, "loss": 1.7625, "step": 59110 }, { "epoch": 0.3715828963133188, "grad_norm": 6.573159694671631, "learning_rate": 1.7527430156827574e-05, "loss": 1.9129, "step": 59120 }, { "epoch": 0.3716457486300159, "grad_norm": 6.078289031982422, "learning_rate": 1.752701105588292e-05, "loss": 1.9416, "step": 59130 }, { "epoch": 0.37170860094671304, "grad_norm": 6.652973175048828, "learning_rate": 1.7526591954938268e-05, "loss": 1.8134, "step": 59140 }, { "epoch": 0.37177145326341016, "grad_norm": 6.304858684539795, "learning_rate": 1.7526172853993615e-05, "loss": 1.7168, "step": 59150 }, { "epoch": 0.3718343055801072, "grad_norm": 8.342355728149414, "learning_rate": 1.7525753753048962e-05, "loss": 1.9052, "step": 59160 }, { "epoch": 0.37189715789680433, "grad_norm": 8.306211471557617, "learning_rate": 1.7525334652104306e-05, "loss": 1.7468, "step": 59170 }, { "epoch": 0.37196001021350145, "grad_norm": 7.454753398895264, "learning_rate": 1.7524915551159653e-05, "loss": 1.7553, "step": 59180 }, { "epoch": 0.37202286253019856, "grad_norm": 7.92028284072876, "learning_rate": 1.7524496450215e-05, "loss": 1.842, "step": 59190 }, { "epoch": 0.3720857148468957, "grad_norm": 5.945263385772705, "learning_rate": 1.7524077349270347e-05, "loss": 1.8005, "step": 59200 }, { "epoch": 0.3721485671635928, "grad_norm": 7.089756965637207, "learning_rate": 1.752365824832569e-05, "loss": 1.8219, "step": 59210 }, { "epoch": 0.3722114194802899, "grad_norm": 7.197872638702393, "learning_rate": 1.7523239147381038e-05, "loss": 1.9255, "step": 59220 }, { "epoch": 0.372274271796987, "grad_norm": 7.652667045593262, "learning_rate": 1.7522820046436385e-05, "loss": 1.8811, "step": 59230 }, { "epoch": 0.37233712411368414, "grad_norm": 7.3847808837890625, "learning_rate": 1.7522400945491732e-05, "loss": 1.7606, "step": 59240 }, { "epoch": 0.37239997643038125, "grad_norm": 6.941809177398682, "learning_rate": 1.752198184454708e-05, "loss": 1.8537, "step": 59250 }, { "epoch": 0.37246282874707837, "grad_norm": 7.309818267822266, "learning_rate": 1.7521562743602426e-05, "loss": 1.7466, "step": 59260 }, { "epoch": 0.3725256810637755, "grad_norm": 7.6321845054626465, "learning_rate": 1.7521143642657773e-05, "loss": 1.9646, "step": 59270 }, { "epoch": 0.3725885333804726, "grad_norm": 7.124085426330566, "learning_rate": 1.752072454171312e-05, "loss": 1.8152, "step": 59280 }, { "epoch": 0.37265138569716966, "grad_norm": 6.121925354003906, "learning_rate": 1.7520305440768467e-05, "loss": 1.7495, "step": 59290 }, { "epoch": 0.37271423801386677, "grad_norm": 6.808883190155029, "learning_rate": 1.751988633982381e-05, "loss": 1.7018, "step": 59300 }, { "epoch": 0.3727770903305639, "grad_norm": 6.999438762664795, "learning_rate": 1.7519467238879158e-05, "loss": 1.6712, "step": 59310 }, { "epoch": 0.372839942647261, "grad_norm": 6.6644086837768555, "learning_rate": 1.7519048137934505e-05, "loss": 1.7708, "step": 59320 }, { "epoch": 0.3729027949639581, "grad_norm": 7.2013840675354, "learning_rate": 1.7518629036989852e-05, "loss": 1.8504, "step": 59330 }, { "epoch": 0.37296564728065523, "grad_norm": 7.2592082023620605, "learning_rate": 1.7518209936045196e-05, "loss": 1.796, "step": 59340 }, { "epoch": 0.37302849959735235, "grad_norm": 6.626367568969727, "learning_rate": 1.7517790835100543e-05, "loss": 1.8003, "step": 59350 }, { "epoch": 0.37309135191404946, "grad_norm": 6.0213398933410645, "learning_rate": 1.751737173415589e-05, "loss": 1.5787, "step": 59360 }, { "epoch": 0.3731542042307466, "grad_norm": 7.501685619354248, "learning_rate": 1.7516952633211237e-05, "loss": 1.9043, "step": 59370 }, { "epoch": 0.3732170565474437, "grad_norm": 5.493393898010254, "learning_rate": 1.7516533532266584e-05, "loss": 1.6693, "step": 59380 }, { "epoch": 0.3732799088641408, "grad_norm": 5.734950542449951, "learning_rate": 1.7516114431321928e-05, "loss": 1.8853, "step": 59390 }, { "epoch": 0.3733427611808379, "grad_norm": 6.7122273445129395, "learning_rate": 1.7515695330377275e-05, "loss": 1.8712, "step": 59400 }, { "epoch": 0.37340561349753504, "grad_norm": 7.50879430770874, "learning_rate": 1.7515276229432622e-05, "loss": 1.9913, "step": 59410 }, { "epoch": 0.3734684658142321, "grad_norm": 8.851447105407715, "learning_rate": 1.751485712848797e-05, "loss": 1.763, "step": 59420 }, { "epoch": 0.3735313181309292, "grad_norm": 8.338323593139648, "learning_rate": 1.7514438027543316e-05, "loss": 1.708, "step": 59430 }, { "epoch": 0.37359417044762633, "grad_norm": 6.182701110839844, "learning_rate": 1.7514018926598663e-05, "loss": 1.6702, "step": 59440 }, { "epoch": 0.37365702276432344, "grad_norm": 7.472273826599121, "learning_rate": 1.7513599825654007e-05, "loss": 1.9166, "step": 59450 }, { "epoch": 0.37371987508102056, "grad_norm": 7.576421737670898, "learning_rate": 1.7513180724709354e-05, "loss": 1.6216, "step": 59460 }, { "epoch": 0.3737827273977177, "grad_norm": 7.299145698547363, "learning_rate": 1.75127616237647e-05, "loss": 1.7993, "step": 59470 }, { "epoch": 0.3738455797144148, "grad_norm": 5.993757247924805, "learning_rate": 1.7512342522820048e-05, "loss": 1.8102, "step": 59480 }, { "epoch": 0.3739084320311119, "grad_norm": 6.252512454986572, "learning_rate": 1.7511923421875395e-05, "loss": 1.6485, "step": 59490 }, { "epoch": 0.373971284347809, "grad_norm": 7.279608249664307, "learning_rate": 1.7511504320930742e-05, "loss": 1.7594, "step": 59500 }, { "epoch": 0.37403413666450613, "grad_norm": 7.6866655349731445, "learning_rate": 1.751108521998609e-05, "loss": 1.6542, "step": 59510 }, { "epoch": 0.37409698898120325, "grad_norm": 6.671911239624023, "learning_rate": 1.7510666119041433e-05, "loss": 1.6437, "step": 59520 }, { "epoch": 0.37415984129790036, "grad_norm": 6.562828063964844, "learning_rate": 1.751024701809678e-05, "loss": 1.5359, "step": 59530 }, { "epoch": 0.3742226936145974, "grad_norm": 6.7453413009643555, "learning_rate": 1.7509827917152127e-05, "loss": 1.8324, "step": 59540 }, { "epoch": 0.37428554593129454, "grad_norm": 8.931385040283203, "learning_rate": 1.7509408816207474e-05, "loss": 2.0589, "step": 59550 }, { "epoch": 0.37434839824799165, "grad_norm": 7.867937088012695, "learning_rate": 1.7508989715262818e-05, "loss": 1.8412, "step": 59560 }, { "epoch": 0.37441125056468877, "grad_norm": 6.942878246307373, "learning_rate": 1.7508570614318165e-05, "loss": 1.8466, "step": 59570 }, { "epoch": 0.3744741028813859, "grad_norm": 6.9903178215026855, "learning_rate": 1.7508151513373512e-05, "loss": 1.6826, "step": 59580 }, { "epoch": 0.374536955198083, "grad_norm": 7.423864841461182, "learning_rate": 1.750773241242886e-05, "loss": 1.6371, "step": 59590 }, { "epoch": 0.3745998075147801, "grad_norm": 7.129739761352539, "learning_rate": 1.7507313311484206e-05, "loss": 1.8208, "step": 59600 }, { "epoch": 0.37466265983147723, "grad_norm": 8.435739517211914, "learning_rate": 1.750689421053955e-05, "loss": 1.7287, "step": 59610 }, { "epoch": 0.37472551214817434, "grad_norm": 6.890371799468994, "learning_rate": 1.7506475109594897e-05, "loss": 1.7205, "step": 59620 }, { "epoch": 0.37478836446487146, "grad_norm": 7.0437726974487305, "learning_rate": 1.7506056008650244e-05, "loss": 1.9148, "step": 59630 }, { "epoch": 0.3748512167815686, "grad_norm": 6.208669662475586, "learning_rate": 1.750563690770559e-05, "loss": 1.7117, "step": 59640 }, { "epoch": 0.3749140690982657, "grad_norm": 6.926197052001953, "learning_rate": 1.7505217806760938e-05, "loss": 1.6599, "step": 59650 }, { "epoch": 0.3749769214149628, "grad_norm": 6.988066673278809, "learning_rate": 1.7504798705816285e-05, "loss": 1.6229, "step": 59660 }, { "epoch": 0.37503977373165986, "grad_norm": 8.055120468139648, "learning_rate": 1.7504379604871632e-05, "loss": 1.8998, "step": 59670 }, { "epoch": 0.375102626048357, "grad_norm": 7.21201753616333, "learning_rate": 1.750396050392698e-05, "loss": 1.8485, "step": 59680 }, { "epoch": 0.3751654783650541, "grad_norm": 5.845964431762695, "learning_rate": 1.7503541402982326e-05, "loss": 1.8329, "step": 59690 }, { "epoch": 0.3752283306817512, "grad_norm": 7.649561405181885, "learning_rate": 1.750312230203767e-05, "loss": 1.6308, "step": 59700 }, { "epoch": 0.3752911829984483, "grad_norm": 7.195222854614258, "learning_rate": 1.7502703201093017e-05, "loss": 1.8236, "step": 59710 }, { "epoch": 0.37535403531514544, "grad_norm": 7.8561248779296875, "learning_rate": 1.7502284100148364e-05, "loss": 1.8016, "step": 59720 }, { "epoch": 0.37541688763184256, "grad_norm": 6.7510600090026855, "learning_rate": 1.750186499920371e-05, "loss": 1.747, "step": 59730 }, { "epoch": 0.37547973994853967, "grad_norm": 7.114448547363281, "learning_rate": 1.7501445898259055e-05, "loss": 1.7153, "step": 59740 }, { "epoch": 0.3755425922652368, "grad_norm": 6.795496940612793, "learning_rate": 1.7501026797314402e-05, "loss": 1.7033, "step": 59750 }, { "epoch": 0.3756054445819339, "grad_norm": 9.095891952514648, "learning_rate": 1.750060769636975e-05, "loss": 1.6289, "step": 59760 }, { "epoch": 0.375668296898631, "grad_norm": 6.375208377838135, "learning_rate": 1.7500188595425096e-05, "loss": 1.7403, "step": 59770 }, { "epoch": 0.37573114921532813, "grad_norm": 7.159587383270264, "learning_rate": 1.7499769494480443e-05, "loss": 1.8371, "step": 59780 }, { "epoch": 0.37579400153202525, "grad_norm": 7.09494686126709, "learning_rate": 1.7499350393535787e-05, "loss": 1.8446, "step": 59790 }, { "epoch": 0.3758568538487223, "grad_norm": 6.484232425689697, "learning_rate": 1.7498931292591134e-05, "loss": 1.8591, "step": 59800 }, { "epoch": 0.3759197061654194, "grad_norm": 7.286050796508789, "learning_rate": 1.749851219164648e-05, "loss": 1.7384, "step": 59810 }, { "epoch": 0.37598255848211654, "grad_norm": 7.048933506011963, "learning_rate": 1.7498093090701828e-05, "loss": 1.5255, "step": 59820 }, { "epoch": 0.37604541079881365, "grad_norm": 6.8245768547058105, "learning_rate": 1.7497673989757172e-05, "loss": 1.7599, "step": 59830 }, { "epoch": 0.37610826311551077, "grad_norm": 8.256148338317871, "learning_rate": 1.749725488881252e-05, "loss": 1.7403, "step": 59840 }, { "epoch": 0.3761711154322079, "grad_norm": 7.225186347961426, "learning_rate": 1.7496835787867866e-05, "loss": 1.6914, "step": 59850 }, { "epoch": 0.376233967748905, "grad_norm": 6.334831237792969, "learning_rate": 1.7496416686923213e-05, "loss": 1.7208, "step": 59860 }, { "epoch": 0.3762968200656021, "grad_norm": 6.329988479614258, "learning_rate": 1.749599758597856e-05, "loss": 1.8027, "step": 59870 }, { "epoch": 0.3763596723822992, "grad_norm": 6.6955342292785645, "learning_rate": 1.7495578485033907e-05, "loss": 1.7901, "step": 59880 }, { "epoch": 0.37642252469899634, "grad_norm": 5.487803936004639, "learning_rate": 1.7495159384089254e-05, "loss": 1.6651, "step": 59890 }, { "epoch": 0.37648537701569346, "grad_norm": 6.468836784362793, "learning_rate": 1.74947402831446e-05, "loss": 1.683, "step": 59900 }, { "epoch": 0.37654822933239057, "grad_norm": 7.434545040130615, "learning_rate": 1.749432118219995e-05, "loss": 1.9459, "step": 59910 }, { "epoch": 0.3766110816490877, "grad_norm": 6.61981201171875, "learning_rate": 1.7493902081255292e-05, "loss": 1.8659, "step": 59920 }, { "epoch": 0.37667393396578475, "grad_norm": 7.554393768310547, "learning_rate": 1.749348298031064e-05, "loss": 1.8676, "step": 59930 }, { "epoch": 0.37673678628248186, "grad_norm": 7.570027828216553, "learning_rate": 1.7493063879365986e-05, "loss": 1.5633, "step": 59940 }, { "epoch": 0.376799638599179, "grad_norm": 7.682457447052002, "learning_rate": 1.7492644778421333e-05, "loss": 1.8556, "step": 59950 }, { "epoch": 0.3768624909158761, "grad_norm": 6.537892818450928, "learning_rate": 1.7492225677476677e-05, "loss": 1.8287, "step": 59960 }, { "epoch": 0.3769253432325732, "grad_norm": 7.347871780395508, "learning_rate": 1.7491806576532024e-05, "loss": 1.9065, "step": 59970 }, { "epoch": 0.3769881955492703, "grad_norm": 5.919852256774902, "learning_rate": 1.749138747558737e-05, "loss": 1.725, "step": 59980 }, { "epoch": 0.37705104786596744, "grad_norm": 7.147500514984131, "learning_rate": 1.7490968374642718e-05, "loss": 2.0688, "step": 59990 }, { "epoch": 0.37711390018266455, "grad_norm": 6.638935565948486, "learning_rate": 1.7490549273698065e-05, "loss": 1.752, "step": 60000 }, { "epoch": 0.37717675249936167, "grad_norm": 7.027769088745117, "learning_rate": 1.749013017275341e-05, "loss": 1.9077, "step": 60010 }, { "epoch": 0.3772396048160588, "grad_norm": 7.6059250831604, "learning_rate": 1.7489711071808756e-05, "loss": 1.8625, "step": 60020 }, { "epoch": 0.3773024571327559, "grad_norm": 6.746246814727783, "learning_rate": 1.7489291970864103e-05, "loss": 1.5552, "step": 60030 }, { "epoch": 0.377365309449453, "grad_norm": 6.389588832855225, "learning_rate": 1.748887286991945e-05, "loss": 1.7082, "step": 60040 }, { "epoch": 0.37742816176615007, "grad_norm": 6.3765363693237305, "learning_rate": 1.7488453768974797e-05, "loss": 1.888, "step": 60050 }, { "epoch": 0.3774910140828472, "grad_norm": 8.022350311279297, "learning_rate": 1.7488034668030144e-05, "loss": 1.7982, "step": 60060 }, { "epoch": 0.3775538663995443, "grad_norm": 6.376769542694092, "learning_rate": 1.748761556708549e-05, "loss": 1.5281, "step": 60070 }, { "epoch": 0.3776167187162414, "grad_norm": 6.17994499206543, "learning_rate": 1.7487196466140835e-05, "loss": 1.7159, "step": 60080 }, { "epoch": 0.37767957103293853, "grad_norm": 6.3481364250183105, "learning_rate": 1.7486777365196182e-05, "loss": 1.8968, "step": 60090 }, { "epoch": 0.37774242334963565, "grad_norm": 6.469996452331543, "learning_rate": 1.748635826425153e-05, "loss": 1.6666, "step": 60100 }, { "epoch": 0.37780527566633276, "grad_norm": 6.1791253089904785, "learning_rate": 1.7485939163306876e-05, "loss": 1.7415, "step": 60110 }, { "epoch": 0.3778681279830299, "grad_norm": 7.888062477111816, "learning_rate": 1.7485520062362223e-05, "loss": 1.7466, "step": 60120 }, { "epoch": 0.377930980299727, "grad_norm": 5.9787821769714355, "learning_rate": 1.748510096141757e-05, "loss": 1.6393, "step": 60130 }, { "epoch": 0.3779938326164241, "grad_norm": 7.3440752029418945, "learning_rate": 1.7484681860472914e-05, "loss": 1.783, "step": 60140 }, { "epoch": 0.3780566849331212, "grad_norm": 7.354781150817871, "learning_rate": 1.748426275952826e-05, "loss": 1.8741, "step": 60150 }, { "epoch": 0.37811953724981834, "grad_norm": 6.851288795471191, "learning_rate": 1.7483843658583608e-05, "loss": 2.0126, "step": 60160 }, { "epoch": 0.37818238956651545, "grad_norm": 6.357627868652344, "learning_rate": 1.7483424557638955e-05, "loss": 1.6932, "step": 60170 }, { "epoch": 0.3782452418832125, "grad_norm": 7.157871246337891, "learning_rate": 1.74830054566943e-05, "loss": 1.9015, "step": 60180 }, { "epoch": 0.37830809419990963, "grad_norm": 6.958586692810059, "learning_rate": 1.7482586355749646e-05, "loss": 1.7936, "step": 60190 }, { "epoch": 0.37837094651660674, "grad_norm": 7.132724285125732, "learning_rate": 1.7482167254804993e-05, "loss": 1.6724, "step": 60200 }, { "epoch": 0.37843379883330386, "grad_norm": 6.834670066833496, "learning_rate": 1.748174815386034e-05, "loss": 1.6689, "step": 60210 }, { "epoch": 0.378496651150001, "grad_norm": 6.891313552856445, "learning_rate": 1.7481329052915687e-05, "loss": 1.5845, "step": 60220 }, { "epoch": 0.3785595034666981, "grad_norm": 7.128862380981445, "learning_rate": 1.748090995197103e-05, "loss": 1.5651, "step": 60230 }, { "epoch": 0.3786223557833952, "grad_norm": 7.177995681762695, "learning_rate": 1.7480490851026378e-05, "loss": 1.806, "step": 60240 }, { "epoch": 0.3786852081000923, "grad_norm": 7.514861106872559, "learning_rate": 1.7480071750081725e-05, "loss": 1.7179, "step": 60250 }, { "epoch": 0.37874806041678943, "grad_norm": 7.260415077209473, "learning_rate": 1.7479652649137072e-05, "loss": 1.6272, "step": 60260 }, { "epoch": 0.37881091273348655, "grad_norm": 8.503983497619629, "learning_rate": 1.747923354819242e-05, "loss": 1.7617, "step": 60270 }, { "epoch": 0.37887376505018366, "grad_norm": 7.926694869995117, "learning_rate": 1.7478814447247766e-05, "loss": 1.7532, "step": 60280 }, { "epoch": 0.3789366173668808, "grad_norm": 7.101754665374756, "learning_rate": 1.7478395346303113e-05, "loss": 1.8402, "step": 60290 }, { "epoch": 0.3789994696835779, "grad_norm": 7.921205520629883, "learning_rate": 1.747797624535846e-05, "loss": 1.7564, "step": 60300 }, { "epoch": 0.37906232200027495, "grad_norm": 6.938370704650879, "learning_rate": 1.7477557144413808e-05, "loss": 1.7927, "step": 60310 }, { "epoch": 0.37912517431697207, "grad_norm": 7.330945014953613, "learning_rate": 1.747713804346915e-05, "loss": 1.9731, "step": 60320 }, { "epoch": 0.3791880266336692, "grad_norm": 6.738017559051514, "learning_rate": 1.7476718942524498e-05, "loss": 2.0006, "step": 60330 }, { "epoch": 0.3792508789503663, "grad_norm": 6.754965305328369, "learning_rate": 1.7476299841579845e-05, "loss": 1.7241, "step": 60340 }, { "epoch": 0.3793137312670634, "grad_norm": 6.526510238647461, "learning_rate": 1.7475880740635192e-05, "loss": 1.7795, "step": 60350 }, { "epoch": 0.37937658358376053, "grad_norm": 6.938403606414795, "learning_rate": 1.7475461639690536e-05, "loss": 1.7768, "step": 60360 }, { "epoch": 0.37943943590045764, "grad_norm": 7.293728828430176, "learning_rate": 1.7475042538745883e-05, "loss": 1.9032, "step": 60370 }, { "epoch": 0.37950228821715476, "grad_norm": 8.014728546142578, "learning_rate": 1.747462343780123e-05, "loss": 1.7381, "step": 60380 }, { "epoch": 0.3795651405338519, "grad_norm": 6.761390209197998, "learning_rate": 1.7474204336856577e-05, "loss": 1.497, "step": 60390 }, { "epoch": 0.379627992850549, "grad_norm": 5.933043003082275, "learning_rate": 1.7473785235911924e-05, "loss": 1.856, "step": 60400 }, { "epoch": 0.3796908451672461, "grad_norm": 6.399038314819336, "learning_rate": 1.7473408045061736e-05, "loss": 1.7518, "step": 60410 }, { "epoch": 0.3797536974839432, "grad_norm": 5.682687282562256, "learning_rate": 1.7472988944117083e-05, "loss": 1.6777, "step": 60420 }, { "epoch": 0.37981654980064034, "grad_norm": 7.3017497062683105, "learning_rate": 1.747256984317243e-05, "loss": 1.853, "step": 60430 }, { "epoch": 0.3798794021173374, "grad_norm": 7.732819557189941, "learning_rate": 1.7472150742227774e-05, "loss": 1.812, "step": 60440 }, { "epoch": 0.3799422544340345, "grad_norm": 8.50419807434082, "learning_rate": 1.747173164128312e-05, "loss": 1.9775, "step": 60450 }, { "epoch": 0.3800051067507316, "grad_norm": 6.747570514678955, "learning_rate": 1.7471312540338468e-05, "loss": 1.7447, "step": 60460 }, { "epoch": 0.38006795906742874, "grad_norm": 7.3443121910095215, "learning_rate": 1.7470893439393815e-05, "loss": 1.5567, "step": 60470 }, { "epoch": 0.38013081138412586, "grad_norm": 6.715612411499023, "learning_rate": 1.747047433844916e-05, "loss": 1.6435, "step": 60480 }, { "epoch": 0.38019366370082297, "grad_norm": 7.518825531005859, "learning_rate": 1.7470055237504505e-05, "loss": 1.7859, "step": 60490 }, { "epoch": 0.3802565160175201, "grad_norm": 6.818528652191162, "learning_rate": 1.7469636136559853e-05, "loss": 1.867, "step": 60500 }, { "epoch": 0.3803193683342172, "grad_norm": 6.7483296394348145, "learning_rate": 1.74692170356152e-05, "loss": 1.695, "step": 60510 }, { "epoch": 0.3803822206509143, "grad_norm": 8.435516357421875, "learning_rate": 1.7468797934670547e-05, "loss": 1.7875, "step": 60520 }, { "epoch": 0.38044507296761143, "grad_norm": 6.491191387176514, "learning_rate": 1.746837883372589e-05, "loss": 1.8847, "step": 60530 }, { "epoch": 0.38050792528430855, "grad_norm": 7.434518814086914, "learning_rate": 1.7467959732781237e-05, "loss": 1.5926, "step": 60540 }, { "epoch": 0.38057077760100566, "grad_norm": 7.427109718322754, "learning_rate": 1.7467540631836585e-05, "loss": 1.7158, "step": 60550 }, { "epoch": 0.3806336299177027, "grad_norm": 5.753595352172852, "learning_rate": 1.746712153089193e-05, "loss": 1.7352, "step": 60560 }, { "epoch": 0.38069648223439984, "grad_norm": 6.416884422302246, "learning_rate": 1.746670242994728e-05, "loss": 1.9335, "step": 60570 }, { "epoch": 0.38075933455109695, "grad_norm": 6.566325664520264, "learning_rate": 1.7466283329002626e-05, "loss": 1.7366, "step": 60580 }, { "epoch": 0.38082218686779407, "grad_norm": 7.040223121643066, "learning_rate": 1.7465864228057973e-05, "loss": 1.7657, "step": 60590 }, { "epoch": 0.3808850391844912, "grad_norm": 6.551633834838867, "learning_rate": 1.746544512711332e-05, "loss": 1.8301, "step": 60600 }, { "epoch": 0.3809478915011883, "grad_norm": 6.2591142654418945, "learning_rate": 1.7465026026168667e-05, "loss": 1.905, "step": 60610 }, { "epoch": 0.3810107438178854, "grad_norm": 7.5698561668396, "learning_rate": 1.746460692522401e-05, "loss": 1.6883, "step": 60620 }, { "epoch": 0.3810735961345825, "grad_norm": 6.377468585968018, "learning_rate": 1.7464187824279358e-05, "loss": 1.8105, "step": 60630 }, { "epoch": 0.38113644845127964, "grad_norm": 6.358702659606934, "learning_rate": 1.7463768723334705e-05, "loss": 1.6391, "step": 60640 }, { "epoch": 0.38119930076797676, "grad_norm": 7.569583892822266, "learning_rate": 1.7463349622390052e-05, "loss": 1.5885, "step": 60650 }, { "epoch": 0.38126215308467387, "grad_norm": 6.374160289764404, "learning_rate": 1.7462930521445396e-05, "loss": 1.6662, "step": 60660 }, { "epoch": 0.381325005401371, "grad_norm": 8.042657852172852, "learning_rate": 1.7462511420500743e-05, "loss": 1.6635, "step": 60670 }, { "epoch": 0.3813878577180681, "grad_norm": 6.4280781745910645, "learning_rate": 1.746209231955609e-05, "loss": 1.8211, "step": 60680 }, { "epoch": 0.38145071003476516, "grad_norm": 7.481120586395264, "learning_rate": 1.7461673218611437e-05, "loss": 1.9808, "step": 60690 }, { "epoch": 0.3815135623514623, "grad_norm": 10.263879776000977, "learning_rate": 1.746125411766678e-05, "loss": 1.8184, "step": 60700 }, { "epoch": 0.3815764146681594, "grad_norm": 6.977829456329346, "learning_rate": 1.7460835016722127e-05, "loss": 1.8257, "step": 60710 }, { "epoch": 0.3816392669848565, "grad_norm": 6.320673942565918, "learning_rate": 1.7460415915777475e-05, "loss": 1.71, "step": 60720 }, { "epoch": 0.3817021193015536, "grad_norm": 7.3813886642456055, "learning_rate": 1.745999681483282e-05, "loss": 1.7503, "step": 60730 }, { "epoch": 0.38176497161825074, "grad_norm": 6.991902828216553, "learning_rate": 1.745957771388817e-05, "loss": 1.9265, "step": 60740 }, { "epoch": 0.38182782393494785, "grad_norm": 7.4620256423950195, "learning_rate": 1.7459158612943516e-05, "loss": 1.5813, "step": 60750 }, { "epoch": 0.38189067625164497, "grad_norm": 7.323355197906494, "learning_rate": 1.7458739511998863e-05, "loss": 1.665, "step": 60760 }, { "epoch": 0.3819535285683421, "grad_norm": 6.123446941375732, "learning_rate": 1.7458320411054207e-05, "loss": 1.5442, "step": 60770 }, { "epoch": 0.3820163808850392, "grad_norm": 7.154541492462158, "learning_rate": 1.7457901310109554e-05, "loss": 1.6706, "step": 60780 }, { "epoch": 0.3820792332017363, "grad_norm": 7.16757869720459, "learning_rate": 1.74574822091649e-05, "loss": 1.778, "step": 60790 }, { "epoch": 0.3821420855184334, "grad_norm": 7.341074466705322, "learning_rate": 1.7457063108220248e-05, "loss": 1.7444, "step": 60800 }, { "epoch": 0.38220493783513054, "grad_norm": 7.525569915771484, "learning_rate": 1.7456644007275595e-05, "loss": 1.7723, "step": 60810 }, { "epoch": 0.3822677901518276, "grad_norm": 5.7784423828125, "learning_rate": 1.7456224906330942e-05, "loss": 1.8289, "step": 60820 }, { "epoch": 0.3823306424685247, "grad_norm": 6.461688995361328, "learning_rate": 1.745580580538629e-05, "loss": 1.7749, "step": 60830 }, { "epoch": 0.38239349478522183, "grad_norm": 7.5797529220581055, "learning_rate": 1.7455386704441633e-05, "loss": 1.7896, "step": 60840 }, { "epoch": 0.38245634710191895, "grad_norm": 6.6320624351501465, "learning_rate": 1.745496760349698e-05, "loss": 1.7124, "step": 60850 }, { "epoch": 0.38251919941861606, "grad_norm": 6.536380767822266, "learning_rate": 1.7454548502552327e-05, "loss": 1.7042, "step": 60860 }, { "epoch": 0.3825820517353132, "grad_norm": 6.43516731262207, "learning_rate": 1.7454129401607674e-05, "loss": 1.613, "step": 60870 }, { "epoch": 0.3826449040520103, "grad_norm": 6.561049461364746, "learning_rate": 1.7453710300663018e-05, "loss": 1.6386, "step": 60880 }, { "epoch": 0.3827077563687074, "grad_norm": 6.353400230407715, "learning_rate": 1.7453291199718365e-05, "loss": 1.6232, "step": 60890 }, { "epoch": 0.3827706086854045, "grad_norm": 8.064393043518066, "learning_rate": 1.745287209877371e-05, "loss": 1.7805, "step": 60900 }, { "epoch": 0.38283346100210164, "grad_norm": 8.582117080688477, "learning_rate": 1.745245299782906e-05, "loss": 1.9209, "step": 60910 }, { "epoch": 0.38289631331879875, "grad_norm": 6.980828762054443, "learning_rate": 1.7452033896884406e-05, "loss": 1.6657, "step": 60920 }, { "epoch": 0.38295916563549587, "grad_norm": 5.844123363494873, "learning_rate": 1.745161479593975e-05, "loss": 1.7771, "step": 60930 }, { "epoch": 0.383022017952193, "grad_norm": 7.514682292938232, "learning_rate": 1.7451195694995097e-05, "loss": 1.8881, "step": 60940 }, { "epoch": 0.38308487026889004, "grad_norm": 7.010300159454346, "learning_rate": 1.7450776594050444e-05, "loss": 1.6587, "step": 60950 }, { "epoch": 0.38314772258558716, "grad_norm": 8.163060188293457, "learning_rate": 1.745035749310579e-05, "loss": 1.8552, "step": 60960 }, { "epoch": 0.3832105749022843, "grad_norm": 5.847114562988281, "learning_rate": 1.7449938392161138e-05, "loss": 1.7179, "step": 60970 }, { "epoch": 0.3832734272189814, "grad_norm": 6.140045166015625, "learning_rate": 1.7449519291216485e-05, "loss": 1.9446, "step": 60980 }, { "epoch": 0.3833362795356785, "grad_norm": 6.357498645782471, "learning_rate": 1.7449100190271832e-05, "loss": 1.8126, "step": 60990 }, { "epoch": 0.3833991318523756, "grad_norm": 7.207118988037109, "learning_rate": 1.744868108932718e-05, "loss": 1.9658, "step": 61000 }, { "epoch": 0.38346198416907273, "grad_norm": 6.827266693115234, "learning_rate": 1.7448261988382523e-05, "loss": 2.061, "step": 61010 }, { "epoch": 0.38352483648576985, "grad_norm": 7.136649131774902, "learning_rate": 1.744784288743787e-05, "loss": 1.5873, "step": 61020 }, { "epoch": 0.38358768880246696, "grad_norm": 7.03965950012207, "learning_rate": 1.744746569658768e-05, "loss": 1.7496, "step": 61030 }, { "epoch": 0.3836505411191641, "grad_norm": 6.262448310852051, "learning_rate": 1.7447046595643028e-05, "loss": 1.8426, "step": 61040 }, { "epoch": 0.3837133934358612, "grad_norm": 7.471109390258789, "learning_rate": 1.7446627494698375e-05, "loss": 1.7518, "step": 61050 }, { "epoch": 0.3837762457525583, "grad_norm": 6.2768940925598145, "learning_rate": 1.7446208393753722e-05, "loss": 1.6824, "step": 61060 }, { "epoch": 0.38383909806925537, "grad_norm": 7.512392520904541, "learning_rate": 1.744578929280907e-05, "loss": 1.7053, "step": 61070 }, { "epoch": 0.3839019503859525, "grad_norm": 6.868130207061768, "learning_rate": 1.7445370191864413e-05, "loss": 1.7538, "step": 61080 }, { "epoch": 0.3839648027026496, "grad_norm": 6.069267749786377, "learning_rate": 1.744495109091976e-05, "loss": 1.7242, "step": 61090 }, { "epoch": 0.3840276550193467, "grad_norm": 6.6341376304626465, "learning_rate": 1.7444531989975107e-05, "loss": 1.8129, "step": 61100 }, { "epoch": 0.38409050733604383, "grad_norm": 7.144861221313477, "learning_rate": 1.7444112889030454e-05, "loss": 1.5183, "step": 61110 }, { "epoch": 0.38415335965274094, "grad_norm": 6.238708972930908, "learning_rate": 1.74436937880858e-05, "loss": 2.1609, "step": 61120 }, { "epoch": 0.38421621196943806, "grad_norm": 7.683382034301758, "learning_rate": 1.7443274687141145e-05, "loss": 1.8292, "step": 61130 }, { "epoch": 0.3842790642861352, "grad_norm": 6.920597553253174, "learning_rate": 1.7442855586196492e-05, "loss": 1.4433, "step": 61140 }, { "epoch": 0.3843419166028323, "grad_norm": 6.853091239929199, "learning_rate": 1.744243648525184e-05, "loss": 1.727, "step": 61150 }, { "epoch": 0.3844047689195294, "grad_norm": 6.160371780395508, "learning_rate": 1.7442017384307186e-05, "loss": 1.6501, "step": 61160 }, { "epoch": 0.3844676212362265, "grad_norm": 8.203758239746094, "learning_rate": 1.7441598283362533e-05, "loss": 1.7284, "step": 61170 }, { "epoch": 0.38453047355292364, "grad_norm": 8.511927604675293, "learning_rate": 1.7441179182417877e-05, "loss": 1.6751, "step": 61180 }, { "epoch": 0.38459332586962075, "grad_norm": 8.361741065979004, "learning_rate": 1.7440760081473224e-05, "loss": 1.8765, "step": 61190 }, { "epoch": 0.3846561781863178, "grad_norm": 6.204860210418701, "learning_rate": 1.744034098052857e-05, "loss": 1.8143, "step": 61200 }, { "epoch": 0.3847190305030149, "grad_norm": 6.404733657836914, "learning_rate": 1.7439921879583918e-05, "loss": 1.8478, "step": 61210 }, { "epoch": 0.38478188281971204, "grad_norm": 6.132648944854736, "learning_rate": 1.7439502778639262e-05, "loss": 1.6354, "step": 61220 }, { "epoch": 0.38484473513640916, "grad_norm": 7.414051532745361, "learning_rate": 1.743908367769461e-05, "loss": 1.6365, "step": 61230 }, { "epoch": 0.38490758745310627, "grad_norm": 6.387333393096924, "learning_rate": 1.7438664576749956e-05, "loss": 1.4594, "step": 61240 }, { "epoch": 0.3849704397698034, "grad_norm": 6.648262977600098, "learning_rate": 1.7438245475805303e-05, "loss": 1.914, "step": 61250 }, { "epoch": 0.3850332920865005, "grad_norm": 7.251654624938965, "learning_rate": 1.743782637486065e-05, "loss": 1.6843, "step": 61260 }, { "epoch": 0.3850961444031976, "grad_norm": 7.042481422424316, "learning_rate": 1.7437407273915997e-05, "loss": 1.7401, "step": 61270 }, { "epoch": 0.38515899671989473, "grad_norm": 6.673430919647217, "learning_rate": 1.7436988172971344e-05, "loss": 1.4713, "step": 61280 }, { "epoch": 0.38522184903659185, "grad_norm": 6.783602237701416, "learning_rate": 1.743656907202669e-05, "loss": 1.7352, "step": 61290 }, { "epoch": 0.38528470135328896, "grad_norm": 6.437345027923584, "learning_rate": 1.743614997108204e-05, "loss": 1.7249, "step": 61300 }, { "epoch": 0.3853475536699861, "grad_norm": 8.222586631774902, "learning_rate": 1.7435730870137382e-05, "loss": 1.7668, "step": 61310 }, { "epoch": 0.3854104059866832, "grad_norm": 7.5672383308410645, "learning_rate": 1.743531176919273e-05, "loss": 1.9099, "step": 61320 }, { "epoch": 0.38547325830338025, "grad_norm": 6.734484672546387, "learning_rate": 1.7434892668248076e-05, "loss": 1.7436, "step": 61330 }, { "epoch": 0.38553611062007737, "grad_norm": 7.042230129241943, "learning_rate": 1.7434473567303423e-05, "loss": 1.6302, "step": 61340 }, { "epoch": 0.3855989629367745, "grad_norm": 6.5508551597595215, "learning_rate": 1.743405446635877e-05, "loss": 1.6532, "step": 61350 }, { "epoch": 0.3856618152534716, "grad_norm": 5.1469502449035645, "learning_rate": 1.7433635365414114e-05, "loss": 1.4144, "step": 61360 }, { "epoch": 0.3857246675701687, "grad_norm": 6.1664652824401855, "learning_rate": 1.743321626446946e-05, "loss": 1.5423, "step": 61370 }, { "epoch": 0.3857875198868658, "grad_norm": 7.240113735198975, "learning_rate": 1.7432797163524808e-05, "loss": 1.6542, "step": 61380 }, { "epoch": 0.38585037220356294, "grad_norm": 7.81046199798584, "learning_rate": 1.7432378062580155e-05, "loss": 1.9639, "step": 61390 }, { "epoch": 0.38591322452026006, "grad_norm": 7.0568037033081055, "learning_rate": 1.74319589616355e-05, "loss": 1.9366, "step": 61400 }, { "epoch": 0.38597607683695717, "grad_norm": 6.542177677154541, "learning_rate": 1.7431539860690846e-05, "loss": 1.7901, "step": 61410 }, { "epoch": 0.3860389291536543, "grad_norm": 7.521069049835205, "learning_rate": 1.7431120759746193e-05, "loss": 1.753, "step": 61420 }, { "epoch": 0.3861017814703514, "grad_norm": 6.601457118988037, "learning_rate": 1.743070165880154e-05, "loss": 1.7678, "step": 61430 }, { "epoch": 0.3861646337870485, "grad_norm": 7.136099338531494, "learning_rate": 1.7430282557856887e-05, "loss": 1.5849, "step": 61440 }, { "epoch": 0.38622748610374563, "grad_norm": 7.009994983673096, "learning_rate": 1.7429863456912234e-05, "loss": 1.493, "step": 61450 }, { "epoch": 0.3862903384204427, "grad_norm": 6.2310686111450195, "learning_rate": 1.7429444355967578e-05, "loss": 1.7945, "step": 61460 }, { "epoch": 0.3863531907371398, "grad_norm": 5.899852275848389, "learning_rate": 1.7429025255022925e-05, "loss": 1.7684, "step": 61470 }, { "epoch": 0.3864160430538369, "grad_norm": 6.108722686767578, "learning_rate": 1.7428606154078272e-05, "loss": 1.7854, "step": 61480 }, { "epoch": 0.38647889537053404, "grad_norm": 6.275115489959717, "learning_rate": 1.742818705313362e-05, "loss": 1.6947, "step": 61490 }, { "epoch": 0.38654174768723115, "grad_norm": 6.80659294128418, "learning_rate": 1.7427767952188966e-05, "loss": 1.7221, "step": 61500 }, { "epoch": 0.38660460000392827, "grad_norm": 6.598296642303467, "learning_rate": 1.7427348851244313e-05, "loss": 1.7728, "step": 61510 }, { "epoch": 0.3866674523206254, "grad_norm": 6.046689510345459, "learning_rate": 1.742692975029966e-05, "loss": 1.8357, "step": 61520 }, { "epoch": 0.3867303046373225, "grad_norm": 6.522720813751221, "learning_rate": 1.7426510649355004e-05, "loss": 1.8675, "step": 61530 }, { "epoch": 0.3867931569540196, "grad_norm": 8.329684257507324, "learning_rate": 1.742609154841035e-05, "loss": 1.8171, "step": 61540 }, { "epoch": 0.3868560092707167, "grad_norm": 5.505519390106201, "learning_rate": 1.7425672447465698e-05, "loss": 1.5185, "step": 61550 }, { "epoch": 0.38691886158741384, "grad_norm": 7.60694694519043, "learning_rate": 1.7425253346521045e-05, "loss": 1.8365, "step": 61560 }, { "epoch": 0.38698171390411096, "grad_norm": 5.743784427642822, "learning_rate": 1.7424834245576392e-05, "loss": 1.7219, "step": 61570 }, { "epoch": 0.3870445662208081, "grad_norm": 6.197338104248047, "learning_rate": 1.7424415144631736e-05, "loss": 1.6295, "step": 61580 }, { "epoch": 0.38710741853750513, "grad_norm": 7.487074851989746, "learning_rate": 1.7423996043687083e-05, "loss": 1.9203, "step": 61590 }, { "epoch": 0.38717027085420225, "grad_norm": 6.8445515632629395, "learning_rate": 1.742357694274243e-05, "loss": 1.6947, "step": 61600 }, { "epoch": 0.38723312317089936, "grad_norm": 7.205432891845703, "learning_rate": 1.7423157841797777e-05, "loss": 1.6026, "step": 61610 }, { "epoch": 0.3872959754875965, "grad_norm": 6.194027423858643, "learning_rate": 1.742273874085312e-05, "loss": 1.7346, "step": 61620 }, { "epoch": 0.3873588278042936, "grad_norm": 6.567793369293213, "learning_rate": 1.7422319639908468e-05, "loss": 1.7449, "step": 61630 }, { "epoch": 0.3874216801209907, "grad_norm": 5.166746616363525, "learning_rate": 1.7421900538963815e-05, "loss": 1.7528, "step": 61640 }, { "epoch": 0.3874845324376878, "grad_norm": 6.169304847717285, "learning_rate": 1.7421481438019162e-05, "loss": 1.4929, "step": 61650 }, { "epoch": 0.38754738475438494, "grad_norm": 7.2083587646484375, "learning_rate": 1.742106233707451e-05, "loss": 1.7279, "step": 61660 }, { "epoch": 0.38761023707108205, "grad_norm": 7.021596908569336, "learning_rate": 1.7420643236129856e-05, "loss": 1.8054, "step": 61670 }, { "epoch": 0.38767308938777917, "grad_norm": 6.886139392852783, "learning_rate": 1.7420224135185203e-05, "loss": 1.7988, "step": 61680 }, { "epoch": 0.3877359417044763, "grad_norm": 6.56005859375, "learning_rate": 1.741980503424055e-05, "loss": 1.4582, "step": 61690 }, { "epoch": 0.3877987940211734, "grad_norm": 7.746946334838867, "learning_rate": 1.7419385933295898e-05, "loss": 1.7964, "step": 61700 }, { "epoch": 0.38786164633787046, "grad_norm": 7.342988967895508, "learning_rate": 1.741896683235124e-05, "loss": 1.9857, "step": 61710 }, { "epoch": 0.3879244986545676, "grad_norm": 7.007579326629639, "learning_rate": 1.741854773140659e-05, "loss": 1.6829, "step": 61720 }, { "epoch": 0.3879873509712647, "grad_norm": 7.58625602722168, "learning_rate": 1.7418128630461935e-05, "loss": 1.779, "step": 61730 }, { "epoch": 0.3880502032879618, "grad_norm": 6.65321683883667, "learning_rate": 1.7417709529517282e-05, "loss": 1.8255, "step": 61740 }, { "epoch": 0.3881130556046589, "grad_norm": 5.722879886627197, "learning_rate": 1.741729042857263e-05, "loss": 1.7794, "step": 61750 }, { "epoch": 0.38817590792135603, "grad_norm": 5.910167217254639, "learning_rate": 1.7416871327627973e-05, "loss": 1.7098, "step": 61760 }, { "epoch": 0.38823876023805315, "grad_norm": 7.410051345825195, "learning_rate": 1.741645222668332e-05, "loss": 1.9137, "step": 61770 }, { "epoch": 0.38830161255475026, "grad_norm": 11.012842178344727, "learning_rate": 1.7416033125738667e-05, "loss": 1.9382, "step": 61780 }, { "epoch": 0.3883644648714474, "grad_norm": 5.913793563842773, "learning_rate": 1.7415614024794014e-05, "loss": 1.7182, "step": 61790 }, { "epoch": 0.3884273171881445, "grad_norm": 6.666558742523193, "learning_rate": 1.7415194923849358e-05, "loss": 1.6937, "step": 61800 }, { "epoch": 0.3884901695048416, "grad_norm": 6.517996311187744, "learning_rate": 1.7414775822904705e-05, "loss": 1.3727, "step": 61810 }, { "epoch": 0.3885530218215387, "grad_norm": 5.659692764282227, "learning_rate": 1.7414356721960052e-05, "loss": 1.7622, "step": 61820 }, { "epoch": 0.38861587413823584, "grad_norm": 6.496676445007324, "learning_rate": 1.74139376210154e-05, "loss": 1.7122, "step": 61830 }, { "epoch": 0.3886787264549329, "grad_norm": 6.02579927444458, "learning_rate": 1.7413518520070743e-05, "loss": 1.5066, "step": 61840 }, { "epoch": 0.38874157877163, "grad_norm": 6.5224785804748535, "learning_rate": 1.741309941912609e-05, "loss": 1.6482, "step": 61850 }, { "epoch": 0.38880443108832713, "grad_norm": 5.896265983581543, "learning_rate": 1.7412680318181437e-05, "loss": 1.7601, "step": 61860 }, { "epoch": 0.38886728340502424, "grad_norm": 7.0013275146484375, "learning_rate": 1.7412261217236784e-05, "loss": 1.8265, "step": 61870 }, { "epoch": 0.38893013572172136, "grad_norm": 7.07061243057251, "learning_rate": 1.741184211629213e-05, "loss": 1.85, "step": 61880 }, { "epoch": 0.3889929880384185, "grad_norm": 7.437373161315918, "learning_rate": 1.741142301534748e-05, "loss": 1.8319, "step": 61890 }, { "epoch": 0.3890558403551156, "grad_norm": 6.504358291625977, "learning_rate": 1.7411003914402825e-05, "loss": 1.7933, "step": 61900 }, { "epoch": 0.3891186926718127, "grad_norm": 9.265369415283203, "learning_rate": 1.7410584813458172e-05, "loss": 1.9063, "step": 61910 }, { "epoch": 0.3891815449885098, "grad_norm": 7.179880142211914, "learning_rate": 1.741016571251352e-05, "loss": 1.8708, "step": 61920 }, { "epoch": 0.38924439730520694, "grad_norm": 7.085618019104004, "learning_rate": 1.7409746611568863e-05, "loss": 1.6742, "step": 61930 }, { "epoch": 0.38930724962190405, "grad_norm": 6.503653049468994, "learning_rate": 1.740932751062421e-05, "loss": 1.7477, "step": 61940 }, { "epoch": 0.38937010193860117, "grad_norm": 7.99690580368042, "learning_rate": 1.7408908409679557e-05, "loss": 1.8255, "step": 61950 }, { "epoch": 0.3894329542552983, "grad_norm": 6.84827995300293, "learning_rate": 1.7408489308734904e-05, "loss": 1.7582, "step": 61960 }, { "epoch": 0.38949580657199534, "grad_norm": 6.803924083709717, "learning_rate": 1.740807020779025e-05, "loss": 1.8283, "step": 61970 }, { "epoch": 0.38955865888869246, "grad_norm": 6.952895164489746, "learning_rate": 1.7407651106845595e-05, "loss": 1.8876, "step": 61980 }, { "epoch": 0.38962151120538957, "grad_norm": 6.024576187133789, "learning_rate": 1.7407232005900942e-05, "loss": 1.7315, "step": 61990 }, { "epoch": 0.3896843635220867, "grad_norm": 6.177873611450195, "learning_rate": 1.740681290495629e-05, "loss": 1.7841, "step": 62000 }, { "epoch": 0.3897472158387838, "grad_norm": 7.057847499847412, "learning_rate": 1.7406393804011636e-05, "loss": 2.1219, "step": 62010 }, { "epoch": 0.3898100681554809, "grad_norm": 6.767395496368408, "learning_rate": 1.740597470306698e-05, "loss": 1.749, "step": 62020 }, { "epoch": 0.38987292047217803, "grad_norm": 7.810880661010742, "learning_rate": 1.7405555602122327e-05, "loss": 1.7328, "step": 62030 }, { "epoch": 0.38993577278887515, "grad_norm": 7.274215221405029, "learning_rate": 1.7405136501177674e-05, "loss": 1.7232, "step": 62040 }, { "epoch": 0.38999862510557226, "grad_norm": 6.604606628417969, "learning_rate": 1.740471740023302e-05, "loss": 1.5776, "step": 62050 }, { "epoch": 0.3900614774222694, "grad_norm": 7.570003986358643, "learning_rate": 1.740429829928837e-05, "loss": 1.6579, "step": 62060 }, { "epoch": 0.3901243297389665, "grad_norm": 7.769916534423828, "learning_rate": 1.7403879198343715e-05, "loss": 2.3428, "step": 62070 }, { "epoch": 0.3901871820556636, "grad_norm": 8.162944793701172, "learning_rate": 1.7403460097399063e-05, "loss": 1.8261, "step": 62080 }, { "epoch": 0.3902500343723607, "grad_norm": 7.546502113342285, "learning_rate": 1.7403040996454406e-05, "loss": 1.7824, "step": 62090 }, { "epoch": 0.3903128866890578, "grad_norm": 6.176581382751465, "learning_rate": 1.7402621895509753e-05, "loss": 1.9418, "step": 62100 }, { "epoch": 0.3903757390057549, "grad_norm": 7.330261707305908, "learning_rate": 1.74022027945651e-05, "loss": 1.6838, "step": 62110 }, { "epoch": 0.390438591322452, "grad_norm": 7.880338668823242, "learning_rate": 1.7401783693620447e-05, "loss": 1.9394, "step": 62120 }, { "epoch": 0.3905014436391491, "grad_norm": 6.187344551086426, "learning_rate": 1.7401364592675794e-05, "loss": 1.6459, "step": 62130 }, { "epoch": 0.39056429595584624, "grad_norm": 7.3748297691345215, "learning_rate": 1.740094549173114e-05, "loss": 1.8857, "step": 62140 }, { "epoch": 0.39062714827254336, "grad_norm": 7.511769771575928, "learning_rate": 1.7400526390786485e-05, "loss": 1.7479, "step": 62150 }, { "epoch": 0.39069000058924047, "grad_norm": 6.027830123901367, "learning_rate": 1.7400107289841832e-05, "loss": 1.6398, "step": 62160 }, { "epoch": 0.3907528529059376, "grad_norm": 6.44254732131958, "learning_rate": 1.739968818889718e-05, "loss": 1.885, "step": 62170 }, { "epoch": 0.3908157052226347, "grad_norm": 7.95977783203125, "learning_rate": 1.7399269087952526e-05, "loss": 1.8606, "step": 62180 }, { "epoch": 0.3908785575393318, "grad_norm": 6.054286479949951, "learning_rate": 1.7398849987007874e-05, "loss": 1.6732, "step": 62190 }, { "epoch": 0.39094140985602893, "grad_norm": 7.351673603057861, "learning_rate": 1.7398430886063217e-05, "loss": 1.7577, "step": 62200 }, { "epoch": 0.39100426217272605, "grad_norm": 7.569562911987305, "learning_rate": 1.7398011785118564e-05, "loss": 1.8935, "step": 62210 }, { "epoch": 0.3910671144894231, "grad_norm": 6.332158088684082, "learning_rate": 1.739759268417391e-05, "loss": 1.5981, "step": 62220 }, { "epoch": 0.3911299668061202, "grad_norm": 6.264937877655029, "learning_rate": 1.739717358322926e-05, "loss": 1.8077, "step": 62230 }, { "epoch": 0.39119281912281734, "grad_norm": 7.290078163146973, "learning_rate": 1.7396754482284602e-05, "loss": 1.7831, "step": 62240 }, { "epoch": 0.39125567143951445, "grad_norm": 7.162696361541748, "learning_rate": 1.739633538133995e-05, "loss": 1.9486, "step": 62250 }, { "epoch": 0.39131852375621157, "grad_norm": 8.368673324584961, "learning_rate": 1.7395916280395296e-05, "loss": 1.8482, "step": 62260 }, { "epoch": 0.3913813760729087, "grad_norm": 8.074396133422852, "learning_rate": 1.7395497179450643e-05, "loss": 2.0184, "step": 62270 }, { "epoch": 0.3914442283896058, "grad_norm": 6.705972194671631, "learning_rate": 1.739507807850599e-05, "loss": 1.7899, "step": 62280 }, { "epoch": 0.3915070807063029, "grad_norm": 7.192666053771973, "learning_rate": 1.7394658977561337e-05, "loss": 1.81, "step": 62290 }, { "epoch": 0.391569933023, "grad_norm": 7.759625434875488, "learning_rate": 1.7394239876616685e-05, "loss": 1.4557, "step": 62300 }, { "epoch": 0.39163278533969714, "grad_norm": 7.5127949714660645, "learning_rate": 1.739382077567203e-05, "loss": 1.6934, "step": 62310 }, { "epoch": 0.39169563765639426, "grad_norm": 7.240886688232422, "learning_rate": 1.739340167472738e-05, "loss": 1.7088, "step": 62320 }, { "epoch": 0.3917584899730914, "grad_norm": 7.17133092880249, "learning_rate": 1.7392982573782722e-05, "loss": 2.0221, "step": 62330 }, { "epoch": 0.3918213422897885, "grad_norm": 6.620835304260254, "learning_rate": 1.739256347283807e-05, "loss": 1.5721, "step": 62340 }, { "epoch": 0.39188419460648555, "grad_norm": 16.08406639099121, "learning_rate": 1.7392144371893416e-05, "loss": 1.9188, "step": 62350 }, { "epoch": 0.39194704692318266, "grad_norm": 7.027464389801025, "learning_rate": 1.7391725270948764e-05, "loss": 1.799, "step": 62360 }, { "epoch": 0.3920098992398798, "grad_norm": 7.47585391998291, "learning_rate": 1.739130617000411e-05, "loss": 1.8251, "step": 62370 }, { "epoch": 0.3920727515565769, "grad_norm": 6.758760929107666, "learning_rate": 1.7390887069059454e-05, "loss": 1.6702, "step": 62380 }, { "epoch": 0.392135603873274, "grad_norm": 8.870641708374023, "learning_rate": 1.73904679681148e-05, "loss": 1.7451, "step": 62390 }, { "epoch": 0.3921984561899711, "grad_norm": 7.466734409332275, "learning_rate": 1.739004886717015e-05, "loss": 1.8321, "step": 62400 }, { "epoch": 0.39226130850666824, "grad_norm": 7.565104961395264, "learning_rate": 1.7389629766225496e-05, "loss": 1.5627, "step": 62410 }, { "epoch": 0.39232416082336535, "grad_norm": 6.403449535369873, "learning_rate": 1.738921066528084e-05, "loss": 1.7451, "step": 62420 }, { "epoch": 0.39238701314006247, "grad_norm": 9.319280624389648, "learning_rate": 1.7388791564336186e-05, "loss": 1.9225, "step": 62430 }, { "epoch": 0.3924498654567596, "grad_norm": 6.362649917602539, "learning_rate": 1.7388372463391533e-05, "loss": 1.766, "step": 62440 }, { "epoch": 0.3925127177734567, "grad_norm": 5.756836414337158, "learning_rate": 1.738795336244688e-05, "loss": 1.6647, "step": 62450 }, { "epoch": 0.3925755700901538, "grad_norm": 6.929123401641846, "learning_rate": 1.7387534261502224e-05, "loss": 1.5016, "step": 62460 }, { "epoch": 0.39263842240685093, "grad_norm": 6.545899391174316, "learning_rate": 1.738711516055757e-05, "loss": 1.8822, "step": 62470 }, { "epoch": 0.392701274723548, "grad_norm": 5.8037109375, "learning_rate": 1.7386696059612918e-05, "loss": 1.99, "step": 62480 }, { "epoch": 0.3927641270402451, "grad_norm": 6.601983547210693, "learning_rate": 1.7386276958668265e-05, "loss": 1.8231, "step": 62490 }, { "epoch": 0.3928269793569422, "grad_norm": 7.560104846954346, "learning_rate": 1.7385857857723612e-05, "loss": 1.8229, "step": 62500 }, { "epoch": 0.39288983167363933, "grad_norm": 6.890317916870117, "learning_rate": 1.738543875677896e-05, "loss": 1.778, "step": 62510 }, { "epoch": 0.39295268399033645, "grad_norm": 7.7869672775268555, "learning_rate": 1.7385019655834307e-05, "loss": 1.9573, "step": 62520 }, { "epoch": 0.39301553630703356, "grad_norm": 7.312685489654541, "learning_rate": 1.7384600554889654e-05, "loss": 1.902, "step": 62530 }, { "epoch": 0.3930783886237307, "grad_norm": 7.124302387237549, "learning_rate": 1.7384181453945e-05, "loss": 1.6969, "step": 62540 }, { "epoch": 0.3931412409404278, "grad_norm": 6.825855731964111, "learning_rate": 1.7383762353000344e-05, "loss": 1.8034, "step": 62550 }, { "epoch": 0.3932040932571249, "grad_norm": 5.885743618011475, "learning_rate": 1.738334325205569e-05, "loss": 1.6697, "step": 62560 }, { "epoch": 0.393266945573822, "grad_norm": 7.019294738769531, "learning_rate": 1.738292415111104e-05, "loss": 1.736, "step": 62570 }, { "epoch": 0.39332979789051914, "grad_norm": 7.226141929626465, "learning_rate": 1.7382505050166386e-05, "loss": 1.8095, "step": 62580 }, { "epoch": 0.39339265020721625, "grad_norm": 6.208901405334473, "learning_rate": 1.7382085949221733e-05, "loss": 1.5413, "step": 62590 }, { "epoch": 0.39345550252391337, "grad_norm": 8.056857109069824, "learning_rate": 1.7381666848277076e-05, "loss": 1.7156, "step": 62600 }, { "epoch": 0.39351835484061043, "grad_norm": 7.027343273162842, "learning_rate": 1.7381247747332423e-05, "loss": 1.707, "step": 62610 }, { "epoch": 0.39358120715730754, "grad_norm": 6.758009910583496, "learning_rate": 1.738082864638777e-05, "loss": 1.6405, "step": 62620 }, { "epoch": 0.39364405947400466, "grad_norm": 7.0669331550598145, "learning_rate": 1.7380409545443118e-05, "loss": 1.6977, "step": 62630 }, { "epoch": 0.3937069117907018, "grad_norm": 8.230191230773926, "learning_rate": 1.737999044449846e-05, "loss": 2.0547, "step": 62640 }, { "epoch": 0.3937697641073989, "grad_norm": 6.7205681800842285, "learning_rate": 1.7379571343553808e-05, "loss": 1.7277, "step": 62650 }, { "epoch": 0.393832616424096, "grad_norm": 6.831026077270508, "learning_rate": 1.7379152242609155e-05, "loss": 1.7897, "step": 62660 }, { "epoch": 0.3938954687407931, "grad_norm": 7.239831924438477, "learning_rate": 1.7378733141664502e-05, "loss": 1.6177, "step": 62670 }, { "epoch": 0.39395832105749023, "grad_norm": 6.872457504272461, "learning_rate": 1.737831404071985e-05, "loss": 1.9241, "step": 62680 }, { "epoch": 0.39402117337418735, "grad_norm": 8.175776481628418, "learning_rate": 1.7377894939775197e-05, "loss": 1.7966, "step": 62690 }, { "epoch": 0.39408402569088447, "grad_norm": 7.143253803253174, "learning_rate": 1.7377475838830544e-05, "loss": 1.8268, "step": 62700 }, { "epoch": 0.3941468780075816, "grad_norm": 5.8643269538879395, "learning_rate": 1.7377056737885887e-05, "loss": 1.7752, "step": 62710 }, { "epoch": 0.3942097303242787, "grad_norm": 8.041696548461914, "learning_rate": 1.7376637636941234e-05, "loss": 1.8251, "step": 62720 }, { "epoch": 0.39427258264097576, "grad_norm": 7.070854187011719, "learning_rate": 1.737621853599658e-05, "loss": 1.6292, "step": 62730 }, { "epoch": 0.39433543495767287, "grad_norm": 8.591217041015625, "learning_rate": 1.737579943505193e-05, "loss": 1.7556, "step": 62740 }, { "epoch": 0.39439828727437, "grad_norm": 7.1850996017456055, "learning_rate": 1.7375380334107276e-05, "loss": 1.8506, "step": 62750 }, { "epoch": 0.3944611395910671, "grad_norm": 6.187245845794678, "learning_rate": 1.7374961233162623e-05, "loss": 1.7453, "step": 62760 }, { "epoch": 0.3945239919077642, "grad_norm": 8.487533569335938, "learning_rate": 1.7374542132217966e-05, "loss": 1.9688, "step": 62770 }, { "epoch": 0.39458684422446133, "grad_norm": 7.419347763061523, "learning_rate": 1.7374123031273313e-05, "loss": 1.8618, "step": 62780 }, { "epoch": 0.39464969654115845, "grad_norm": 7.015792369842529, "learning_rate": 1.737370393032866e-05, "loss": 1.6115, "step": 62790 }, { "epoch": 0.39471254885785556, "grad_norm": 6.974392890930176, "learning_rate": 1.7373284829384008e-05, "loss": 1.9494, "step": 62800 }, { "epoch": 0.3947754011745527, "grad_norm": 6.271472930908203, "learning_rate": 1.7372865728439355e-05, "loss": 1.5843, "step": 62810 }, { "epoch": 0.3948382534912498, "grad_norm": 6.81941032409668, "learning_rate": 1.73724466274947e-05, "loss": 1.6105, "step": 62820 }, { "epoch": 0.3949011058079469, "grad_norm": 5.670494556427002, "learning_rate": 1.7372027526550045e-05, "loss": 1.6365, "step": 62830 }, { "epoch": 0.394963958124644, "grad_norm": 7.861020565032959, "learning_rate": 1.7371608425605392e-05, "loss": 1.8997, "step": 62840 }, { "epoch": 0.39502681044134114, "grad_norm": 6.499483585357666, "learning_rate": 1.737118932466074e-05, "loss": 1.7477, "step": 62850 }, { "epoch": 0.3950896627580382, "grad_norm": 6.675130367279053, "learning_rate": 1.7370770223716083e-05, "loss": 1.7941, "step": 62860 }, { "epoch": 0.3951525150747353, "grad_norm": 7.489518165588379, "learning_rate": 1.737035112277143e-05, "loss": 1.754, "step": 62870 }, { "epoch": 0.3952153673914324, "grad_norm": 9.1337251663208, "learning_rate": 1.7369932021826777e-05, "loss": 1.7115, "step": 62880 }, { "epoch": 0.39527821970812954, "grad_norm": 6.008447647094727, "learning_rate": 1.7369512920882124e-05, "loss": 1.8006, "step": 62890 }, { "epoch": 0.39534107202482666, "grad_norm": 6.136394500732422, "learning_rate": 1.736909381993747e-05, "loss": 1.8918, "step": 62900 }, { "epoch": 0.39540392434152377, "grad_norm": 7.775247097015381, "learning_rate": 1.736867471899282e-05, "loss": 1.8088, "step": 62910 }, { "epoch": 0.3954667766582209, "grad_norm": 6.778098106384277, "learning_rate": 1.7368255618048166e-05, "loss": 1.7606, "step": 62920 }, { "epoch": 0.395529628974918, "grad_norm": 6.544760704040527, "learning_rate": 1.7367836517103513e-05, "loss": 1.7363, "step": 62930 }, { "epoch": 0.3955924812916151, "grad_norm": 6.746118068695068, "learning_rate": 1.736741741615886e-05, "loss": 1.8645, "step": 62940 }, { "epoch": 0.39565533360831223, "grad_norm": 6.127651214599609, "learning_rate": 1.7366998315214203e-05, "loss": 1.5936, "step": 62950 }, { "epoch": 0.39571818592500935, "grad_norm": 6.827913761138916, "learning_rate": 1.736657921426955e-05, "loss": 1.9603, "step": 62960 }, { "epoch": 0.39578103824170646, "grad_norm": 7.778383731842041, "learning_rate": 1.7366160113324898e-05, "loss": 1.7447, "step": 62970 }, { "epoch": 0.3958438905584036, "grad_norm": 7.143712997436523, "learning_rate": 1.7365741012380245e-05, "loss": 1.8716, "step": 62980 }, { "epoch": 0.39590674287510064, "grad_norm": 5.906332969665527, "learning_rate": 1.7365321911435592e-05, "loss": 1.5589, "step": 62990 }, { "epoch": 0.39596959519179775, "grad_norm": 7.487616539001465, "learning_rate": 1.7364902810490935e-05, "loss": 1.8074, "step": 63000 }, { "epoch": 0.39603244750849487, "grad_norm": 6.404102325439453, "learning_rate": 1.7364483709546282e-05, "loss": 1.5414, "step": 63010 }, { "epoch": 0.396095299825192, "grad_norm": 6.926724910736084, "learning_rate": 1.736406460860163e-05, "loss": 1.9859, "step": 63020 }, { "epoch": 0.3961581521418891, "grad_norm": 6.548278331756592, "learning_rate": 1.7363645507656977e-05, "loss": 1.6036, "step": 63030 }, { "epoch": 0.3962210044585862, "grad_norm": 7.005793571472168, "learning_rate": 1.736322640671232e-05, "loss": 1.9394, "step": 63040 }, { "epoch": 0.3962838567752833, "grad_norm": 6.672649383544922, "learning_rate": 1.7362807305767667e-05, "loss": 1.9095, "step": 63050 }, { "epoch": 0.39634670909198044, "grad_norm": 6.91698694229126, "learning_rate": 1.7362388204823014e-05, "loss": 1.5982, "step": 63060 }, { "epoch": 0.39640956140867756, "grad_norm": 6.953579425811768, "learning_rate": 1.736196910387836e-05, "loss": 1.5735, "step": 63070 }, { "epoch": 0.3964724137253747, "grad_norm": 6.486627101898193, "learning_rate": 1.736155000293371e-05, "loss": 1.5392, "step": 63080 }, { "epoch": 0.3965352660420718, "grad_norm": 7.088765621185303, "learning_rate": 1.7361130901989052e-05, "loss": 1.7184, "step": 63090 }, { "epoch": 0.3965981183587689, "grad_norm": 7.858634948730469, "learning_rate": 1.73607118010444e-05, "loss": 1.587, "step": 63100 }, { "epoch": 0.396660970675466, "grad_norm": 6.86468505859375, "learning_rate": 1.7360292700099746e-05, "loss": 1.6296, "step": 63110 }, { "epoch": 0.3967238229921631, "grad_norm": 7.362751007080078, "learning_rate": 1.7359873599155093e-05, "loss": 1.8125, "step": 63120 }, { "epoch": 0.3967866753088602, "grad_norm": 6.04849910736084, "learning_rate": 1.735945449821044e-05, "loss": 1.9038, "step": 63130 }, { "epoch": 0.3968495276255573, "grad_norm": 7.203058242797852, "learning_rate": 1.7359035397265788e-05, "loss": 1.7335, "step": 63140 }, { "epoch": 0.3969123799422544, "grad_norm": 7.279017448425293, "learning_rate": 1.7358616296321135e-05, "loss": 1.9118, "step": 63150 }, { "epoch": 0.39697523225895154, "grad_norm": 7.012674808502197, "learning_rate": 1.7358197195376482e-05, "loss": 1.7824, "step": 63160 }, { "epoch": 0.39703808457564865, "grad_norm": 7.520296573638916, "learning_rate": 1.7357778094431825e-05, "loss": 1.686, "step": 63170 }, { "epoch": 0.39710093689234577, "grad_norm": 7.244654178619385, "learning_rate": 1.7357358993487173e-05, "loss": 1.749, "step": 63180 }, { "epoch": 0.3971637892090429, "grad_norm": 7.64955472946167, "learning_rate": 1.735693989254252e-05, "loss": 1.5669, "step": 63190 }, { "epoch": 0.39722664152574, "grad_norm": 8.233468055725098, "learning_rate": 1.7356520791597867e-05, "loss": 1.4642, "step": 63200 }, { "epoch": 0.3972894938424371, "grad_norm": 6.241605758666992, "learning_rate": 1.7356101690653214e-05, "loss": 1.813, "step": 63210 }, { "epoch": 0.39735234615913423, "grad_norm": 7.647136688232422, "learning_rate": 1.7355682589708557e-05, "loss": 1.7688, "step": 63220 }, { "epoch": 0.39741519847583134, "grad_norm": 6.980048179626465, "learning_rate": 1.7355263488763904e-05, "loss": 1.7126, "step": 63230 }, { "epoch": 0.3974780507925284, "grad_norm": 9.072611808776855, "learning_rate": 1.735484438781925e-05, "loss": 1.6422, "step": 63240 }, { "epoch": 0.3975409031092255, "grad_norm": 7.8335041999816895, "learning_rate": 1.73544252868746e-05, "loss": 1.997, "step": 63250 }, { "epoch": 0.39760375542592263, "grad_norm": 7.354805946350098, "learning_rate": 1.7354006185929942e-05, "loss": 1.7942, "step": 63260 }, { "epoch": 0.39766660774261975, "grad_norm": 7.97997522354126, "learning_rate": 1.735358708498529e-05, "loss": 1.856, "step": 63270 }, { "epoch": 0.39772946005931686, "grad_norm": 8.068892478942871, "learning_rate": 1.7353167984040636e-05, "loss": 1.7931, "step": 63280 }, { "epoch": 0.397792312376014, "grad_norm": 6.895265102386475, "learning_rate": 1.7352748883095984e-05, "loss": 1.9954, "step": 63290 }, { "epoch": 0.3978551646927111, "grad_norm": 7.230385780334473, "learning_rate": 1.735232978215133e-05, "loss": 1.7045, "step": 63300 }, { "epoch": 0.3979180170094082, "grad_norm": 8.533286094665527, "learning_rate": 1.7351910681206678e-05, "loss": 1.5355, "step": 63310 }, { "epoch": 0.3979808693261053, "grad_norm": 7.085193634033203, "learning_rate": 1.7351491580262025e-05, "loss": 1.5851, "step": 63320 }, { "epoch": 0.39804372164280244, "grad_norm": 6.402557849884033, "learning_rate": 1.7351072479317372e-05, "loss": 1.7385, "step": 63330 }, { "epoch": 0.39810657395949955, "grad_norm": 7.216492652893066, "learning_rate": 1.7350653378372715e-05, "loss": 1.8626, "step": 63340 }, { "epoch": 0.39816942627619667, "grad_norm": 7.015349388122559, "learning_rate": 1.7350234277428063e-05, "loss": 1.9229, "step": 63350 }, { "epoch": 0.3982322785928938, "grad_norm": 6.161546230316162, "learning_rate": 1.734981517648341e-05, "loss": 1.9836, "step": 63360 }, { "epoch": 0.39829513090959084, "grad_norm": 7.59517240524292, "learning_rate": 1.7349396075538757e-05, "loss": 1.6441, "step": 63370 }, { "epoch": 0.39835798322628796, "grad_norm": 6.2155561447143555, "learning_rate": 1.7348976974594104e-05, "loss": 1.767, "step": 63380 }, { "epoch": 0.3984208355429851, "grad_norm": 6.6172990798950195, "learning_rate": 1.7348557873649447e-05, "loss": 1.5974, "step": 63390 }, { "epoch": 0.3984836878596822, "grad_norm": 6.872488021850586, "learning_rate": 1.7348138772704795e-05, "loss": 1.7944, "step": 63400 }, { "epoch": 0.3985465401763793, "grad_norm": 6.841820240020752, "learning_rate": 1.734771967176014e-05, "loss": 1.7968, "step": 63410 }, { "epoch": 0.3986093924930764, "grad_norm": 7.346033573150635, "learning_rate": 1.734730057081549e-05, "loss": 1.8267, "step": 63420 }, { "epoch": 0.39867224480977353, "grad_norm": 6.4456305503845215, "learning_rate": 1.7346881469870836e-05, "loss": 1.8374, "step": 63430 }, { "epoch": 0.39873509712647065, "grad_norm": 5.8930463790893555, "learning_rate": 1.734646236892618e-05, "loss": 1.442, "step": 63440 }, { "epoch": 0.39879794944316777, "grad_norm": 7.168188095092773, "learning_rate": 1.7346043267981526e-05, "loss": 1.9853, "step": 63450 }, { "epoch": 0.3988608017598649, "grad_norm": 5.9383745193481445, "learning_rate": 1.7345624167036874e-05, "loss": 2.0134, "step": 63460 }, { "epoch": 0.398923654076562, "grad_norm": 6.50671911239624, "learning_rate": 1.734520506609222e-05, "loss": 1.6749, "step": 63470 }, { "epoch": 0.3989865063932591, "grad_norm": 6.94907808303833, "learning_rate": 1.7344785965147564e-05, "loss": 1.8436, "step": 63480 }, { "epoch": 0.3990493587099562, "grad_norm": 7.192526340484619, "learning_rate": 1.734436686420291e-05, "loss": 1.8829, "step": 63490 }, { "epoch": 0.3991122110266533, "grad_norm": 6.359023571014404, "learning_rate": 1.734394776325826e-05, "loss": 1.8296, "step": 63500 }, { "epoch": 0.3991750633433504, "grad_norm": 6.977709770202637, "learning_rate": 1.7343528662313606e-05, "loss": 1.658, "step": 63510 }, { "epoch": 0.3992379156600475, "grad_norm": 8.42409896850586, "learning_rate": 1.7343109561368953e-05, "loss": 1.8929, "step": 63520 }, { "epoch": 0.39930076797674463, "grad_norm": 6.620844841003418, "learning_rate": 1.73426904604243e-05, "loss": 1.7655, "step": 63530 }, { "epoch": 0.39936362029344175, "grad_norm": 7.603841304779053, "learning_rate": 1.7342271359479647e-05, "loss": 1.9693, "step": 63540 }, { "epoch": 0.39942647261013886, "grad_norm": 6.572299480438232, "learning_rate": 1.7341852258534994e-05, "loss": 1.5739, "step": 63550 }, { "epoch": 0.399489324926836, "grad_norm": 6.238868713378906, "learning_rate": 1.734143315759034e-05, "loss": 1.9381, "step": 63560 }, { "epoch": 0.3995521772435331, "grad_norm": 7.944374084472656, "learning_rate": 1.7341014056645685e-05, "loss": 1.9841, "step": 63570 }, { "epoch": 0.3996150295602302, "grad_norm": 8.332205772399902, "learning_rate": 1.734059495570103e-05, "loss": 1.8276, "step": 63580 }, { "epoch": 0.3996778818769273, "grad_norm": 7.850004196166992, "learning_rate": 1.734017585475638e-05, "loss": 1.6116, "step": 63590 }, { "epoch": 0.39974073419362444, "grad_norm": 5.909306049346924, "learning_rate": 1.7339756753811726e-05, "loss": 1.7175, "step": 63600 }, { "epoch": 0.39980358651032155, "grad_norm": 6.362872123718262, "learning_rate": 1.7339337652867073e-05, "loss": 1.6664, "step": 63610 }, { "epoch": 0.39986643882701867, "grad_norm": 6.35066032409668, "learning_rate": 1.7338918551922417e-05, "loss": 1.5737, "step": 63620 }, { "epoch": 0.3999292911437157, "grad_norm": 6.947273254394531, "learning_rate": 1.7338499450977764e-05, "loss": 1.8287, "step": 63630 }, { "epoch": 0.39999214346041284, "grad_norm": 6.292657375335693, "learning_rate": 1.733808035003311e-05, "loss": 1.8216, "step": 63640 }, { "epoch": 0.40005499577710996, "grad_norm": 6.504972457885742, "learning_rate": 1.7337661249088458e-05, "loss": 1.8389, "step": 63650 }, { "epoch": 0.40011784809380707, "grad_norm": 6.173849582672119, "learning_rate": 1.73372421481438e-05, "loss": 1.6541, "step": 63660 }, { "epoch": 0.4001807004105042, "grad_norm": 7.108821868896484, "learning_rate": 1.733682304719915e-05, "loss": 1.5928, "step": 63670 }, { "epoch": 0.4002435527272013, "grad_norm": 5.635346412658691, "learning_rate": 1.7336403946254496e-05, "loss": 1.7374, "step": 63680 }, { "epoch": 0.4003064050438984, "grad_norm": 6.614495754241943, "learning_rate": 1.7335984845309843e-05, "loss": 1.6753, "step": 63690 }, { "epoch": 0.40036925736059553, "grad_norm": 6.775355815887451, "learning_rate": 1.733556574436519e-05, "loss": 1.7663, "step": 63700 }, { "epoch": 0.40043210967729265, "grad_norm": 6.525432109832764, "learning_rate": 1.7335146643420537e-05, "loss": 1.7315, "step": 63710 }, { "epoch": 0.40049496199398976, "grad_norm": 7.270900726318359, "learning_rate": 1.733472754247588e-05, "loss": 1.9045, "step": 63720 }, { "epoch": 0.4005578143106869, "grad_norm": 7.312255859375, "learning_rate": 1.7334308441531228e-05, "loss": 1.7577, "step": 63730 }, { "epoch": 0.400620666627384, "grad_norm": 6.933480262756348, "learning_rate": 1.7333889340586575e-05, "loss": 1.6655, "step": 63740 }, { "epoch": 0.40068351894408105, "grad_norm": 7.075981616973877, "learning_rate": 1.733347023964192e-05, "loss": 1.7332, "step": 63750 }, { "epoch": 0.40074637126077817, "grad_norm": 7.954800128936768, "learning_rate": 1.733305113869727e-05, "loss": 1.8044, "step": 63760 }, { "epoch": 0.4008092235774753, "grad_norm": 6.6571455001831055, "learning_rate": 1.7332632037752616e-05, "loss": 1.6542, "step": 63770 }, { "epoch": 0.4008720758941724, "grad_norm": 6.37064266204834, "learning_rate": 1.7332212936807963e-05, "loss": 1.8351, "step": 63780 }, { "epoch": 0.4009349282108695, "grad_norm": 6.50276517868042, "learning_rate": 1.7331793835863307e-05, "loss": 1.5103, "step": 63790 }, { "epoch": 0.4009977805275666, "grad_norm": 5.96057653427124, "learning_rate": 1.7331374734918654e-05, "loss": 1.5626, "step": 63800 }, { "epoch": 0.40106063284426374, "grad_norm": 6.829263687133789, "learning_rate": 1.7330955633974e-05, "loss": 1.4947, "step": 63810 }, { "epoch": 0.40112348516096086, "grad_norm": 7.6001715660095215, "learning_rate": 1.7330536533029348e-05, "loss": 1.8695, "step": 63820 }, { "epoch": 0.401186337477658, "grad_norm": 5.44834566116333, "learning_rate": 1.7330117432084695e-05, "loss": 1.6288, "step": 63830 }, { "epoch": 0.4012491897943551, "grad_norm": 7.005091190338135, "learning_rate": 1.732969833114004e-05, "loss": 1.5295, "step": 63840 }, { "epoch": 0.4013120421110522, "grad_norm": 6.27988338470459, "learning_rate": 1.7329279230195386e-05, "loss": 1.6237, "step": 63850 }, { "epoch": 0.4013748944277493, "grad_norm": 8.009944915771484, "learning_rate": 1.7328860129250733e-05, "loss": 1.64, "step": 63860 }, { "epoch": 0.40143774674444643, "grad_norm": 6.172440528869629, "learning_rate": 1.732844102830608e-05, "loss": 1.6258, "step": 63870 }, { "epoch": 0.4015005990611435, "grad_norm": 6.858620643615723, "learning_rate": 1.7328021927361423e-05, "loss": 1.777, "step": 63880 }, { "epoch": 0.4015634513778406, "grad_norm": 7.138575553894043, "learning_rate": 1.732760282641677e-05, "loss": 1.6494, "step": 63890 }, { "epoch": 0.4016263036945377, "grad_norm": 7.080873012542725, "learning_rate": 1.7327183725472118e-05, "loss": 1.9679, "step": 63900 }, { "epoch": 0.40168915601123484, "grad_norm": 6.977520942687988, "learning_rate": 1.7326764624527465e-05, "loss": 1.8074, "step": 63910 }, { "epoch": 0.40175200832793195, "grad_norm": 6.923065662384033, "learning_rate": 1.7326345523582812e-05, "loss": 1.7449, "step": 63920 }, { "epoch": 0.40181486064462907, "grad_norm": 7.034780979156494, "learning_rate": 1.732592642263816e-05, "loss": 1.7452, "step": 63930 }, { "epoch": 0.4018777129613262, "grad_norm": 6.530808448791504, "learning_rate": 1.7325507321693506e-05, "loss": 1.4537, "step": 63940 }, { "epoch": 0.4019405652780233, "grad_norm": 7.575244426727295, "learning_rate": 1.7325088220748853e-05, "loss": 1.7735, "step": 63950 }, { "epoch": 0.4020034175947204, "grad_norm": 7.449587345123291, "learning_rate": 1.73246691198042e-05, "loss": 1.6958, "step": 63960 }, { "epoch": 0.40206626991141753, "grad_norm": 6.258814811706543, "learning_rate": 1.7324250018859544e-05, "loss": 1.8769, "step": 63970 }, { "epoch": 0.40212912222811464, "grad_norm": 6.345718860626221, "learning_rate": 1.732383091791489e-05, "loss": 1.7294, "step": 63980 }, { "epoch": 0.40219197454481176, "grad_norm": 7.540582656860352, "learning_rate": 1.7323411816970238e-05, "loss": 1.7429, "step": 63990 }, { "epoch": 0.4022548268615089, "grad_norm": 7.103134632110596, "learning_rate": 1.7322992716025585e-05, "loss": 1.6767, "step": 64000 }, { "epoch": 0.40231767917820593, "grad_norm": 5.747287750244141, "learning_rate": 1.732257361508093e-05, "loss": 1.5247, "step": 64010 }, { "epoch": 0.40238053149490305, "grad_norm": 8.805617332458496, "learning_rate": 1.7322154514136276e-05, "loss": 1.6903, "step": 64020 }, { "epoch": 0.40244338381160016, "grad_norm": 7.009683132171631, "learning_rate": 1.7321735413191623e-05, "loss": 1.7406, "step": 64030 }, { "epoch": 0.4025062361282973, "grad_norm": 6.678035736083984, "learning_rate": 1.732131631224697e-05, "loss": 1.7652, "step": 64040 }, { "epoch": 0.4025690884449944, "grad_norm": 6.537189960479736, "learning_rate": 1.7320897211302317e-05, "loss": 1.7145, "step": 64050 }, { "epoch": 0.4026319407616915, "grad_norm": 6.691839218139648, "learning_rate": 1.732047811035766e-05, "loss": 1.9445, "step": 64060 }, { "epoch": 0.4026947930783886, "grad_norm": 7.19918155670166, "learning_rate": 1.7320059009413008e-05, "loss": 1.8886, "step": 64070 }, { "epoch": 0.40275764539508574, "grad_norm": 6.629178047180176, "learning_rate": 1.7319639908468355e-05, "loss": 1.7704, "step": 64080 }, { "epoch": 0.40282049771178285, "grad_norm": 6.735805034637451, "learning_rate": 1.7319220807523702e-05, "loss": 1.6564, "step": 64090 }, { "epoch": 0.40288335002847997, "grad_norm": 7.237814426422119, "learning_rate": 1.7318801706579045e-05, "loss": 1.6536, "step": 64100 }, { "epoch": 0.4029462023451771, "grad_norm": 6.82144832611084, "learning_rate": 1.7318382605634392e-05, "loss": 1.6636, "step": 64110 }, { "epoch": 0.4030090546618742, "grad_norm": 6.825602054595947, "learning_rate": 1.731796350468974e-05, "loss": 1.8003, "step": 64120 }, { "epoch": 0.4030719069785713, "grad_norm": 6.9392619132995605, "learning_rate": 1.7317544403745087e-05, "loss": 1.6909, "step": 64130 }, { "epoch": 0.4031347592952684, "grad_norm": 7.321532249450684, "learning_rate": 1.7317125302800434e-05, "loss": 1.8516, "step": 64140 }, { "epoch": 0.4031976116119655, "grad_norm": 7.79758882522583, "learning_rate": 1.731670620185578e-05, "loss": 1.7862, "step": 64150 }, { "epoch": 0.4032604639286626, "grad_norm": 6.494131565093994, "learning_rate": 1.7316287100911128e-05, "loss": 1.7009, "step": 64160 }, { "epoch": 0.4033233162453597, "grad_norm": 6.491816997528076, "learning_rate": 1.7315867999966475e-05, "loss": 1.7408, "step": 64170 }, { "epoch": 0.40338616856205683, "grad_norm": 6.526629447937012, "learning_rate": 1.7315448899021822e-05, "loss": 1.7, "step": 64180 }, { "epoch": 0.40344902087875395, "grad_norm": 8.186490058898926, "learning_rate": 1.7315029798077166e-05, "loss": 1.7309, "step": 64190 }, { "epoch": 0.40351187319545107, "grad_norm": 7.1548638343811035, "learning_rate": 1.7314610697132513e-05, "loss": 1.827, "step": 64200 }, { "epoch": 0.4035747255121482, "grad_norm": 7.3159918785095215, "learning_rate": 1.731419159618786e-05, "loss": 1.644, "step": 64210 }, { "epoch": 0.4036375778288453, "grad_norm": 7.803012847900391, "learning_rate": 1.7313772495243207e-05, "loss": 1.7641, "step": 64220 }, { "epoch": 0.4037004301455424, "grad_norm": 7.93739652633667, "learning_rate": 1.7313353394298554e-05, "loss": 1.6864, "step": 64230 }, { "epoch": 0.4037632824622395, "grad_norm": 7.197110652923584, "learning_rate": 1.7312934293353898e-05, "loss": 1.8326, "step": 64240 }, { "epoch": 0.40382613477893664, "grad_norm": 6.009327411651611, "learning_rate": 1.7312515192409245e-05, "loss": 1.6372, "step": 64250 }, { "epoch": 0.40388898709563376, "grad_norm": 6.794830799102783, "learning_rate": 1.7312096091464592e-05, "loss": 1.9629, "step": 64260 }, { "epoch": 0.4039518394123308, "grad_norm": 6.994704723358154, "learning_rate": 1.731167699051994e-05, "loss": 1.9373, "step": 64270 }, { "epoch": 0.40401469172902793, "grad_norm": 7.244943141937256, "learning_rate": 1.7311257889575283e-05, "loss": 1.616, "step": 64280 }, { "epoch": 0.40407754404572505, "grad_norm": 5.833102703094482, "learning_rate": 1.731083878863063e-05, "loss": 1.801, "step": 64290 }, { "epoch": 0.40414039636242216, "grad_norm": 7.377758502960205, "learning_rate": 1.7310419687685977e-05, "loss": 1.7893, "step": 64300 }, { "epoch": 0.4042032486791193, "grad_norm": 6.23237419128418, "learning_rate": 1.7310000586741324e-05, "loss": 1.7011, "step": 64310 }, { "epoch": 0.4042661009958164, "grad_norm": 7.188849449157715, "learning_rate": 1.730958148579667e-05, "loss": 1.6293, "step": 64320 }, { "epoch": 0.4043289533125135, "grad_norm": 7.2781243324279785, "learning_rate": 1.7309162384852018e-05, "loss": 1.7722, "step": 64330 }, { "epoch": 0.4043918056292106, "grad_norm": 6.869524955749512, "learning_rate": 1.730874328390736e-05, "loss": 1.8963, "step": 64340 }, { "epoch": 0.40445465794590774, "grad_norm": 8.475284576416016, "learning_rate": 1.730832418296271e-05, "loss": 1.7004, "step": 64350 }, { "epoch": 0.40451751026260485, "grad_norm": 5.951559543609619, "learning_rate": 1.7307905082018056e-05, "loss": 1.4649, "step": 64360 }, { "epoch": 0.40458036257930197, "grad_norm": 8.366894721984863, "learning_rate": 1.7307485981073403e-05, "loss": 1.7709, "step": 64370 }, { "epoch": 0.4046432148959991, "grad_norm": 6.629880428314209, "learning_rate": 1.730706688012875e-05, "loss": 1.7315, "step": 64380 }, { "epoch": 0.40470606721269614, "grad_norm": 6.540175914764404, "learning_rate": 1.7306647779184097e-05, "loss": 1.691, "step": 64390 }, { "epoch": 0.40476891952939326, "grad_norm": 7.060180187225342, "learning_rate": 1.7306228678239444e-05, "loss": 1.7188, "step": 64400 }, { "epoch": 0.40483177184609037, "grad_norm": 8.70560073852539, "learning_rate": 1.7305809577294788e-05, "loss": 1.8337, "step": 64410 }, { "epoch": 0.4048946241627875, "grad_norm": 15.648215293884277, "learning_rate": 1.7305390476350135e-05, "loss": 1.5735, "step": 64420 }, { "epoch": 0.4049574764794846, "grad_norm": 6.112917900085449, "learning_rate": 1.7304971375405482e-05, "loss": 1.5983, "step": 64430 }, { "epoch": 0.4050203287961817, "grad_norm": 6.319818019866943, "learning_rate": 1.730455227446083e-05, "loss": 1.8616, "step": 64440 }, { "epoch": 0.40508318111287883, "grad_norm": 6.186047554016113, "learning_rate": 1.7304133173516176e-05, "loss": 1.515, "step": 64450 }, { "epoch": 0.40514603342957595, "grad_norm": 7.4625139236450195, "learning_rate": 1.730371407257152e-05, "loss": 1.6605, "step": 64460 }, { "epoch": 0.40520888574627306, "grad_norm": 5.847615718841553, "learning_rate": 1.7303294971626867e-05, "loss": 1.6329, "step": 64470 }, { "epoch": 0.4052717380629702, "grad_norm": 7.5985894203186035, "learning_rate": 1.7302875870682214e-05, "loss": 1.7322, "step": 64480 }, { "epoch": 0.4053345903796673, "grad_norm": 6.878131866455078, "learning_rate": 1.730245676973756e-05, "loss": 1.8163, "step": 64490 }, { "epoch": 0.4053974426963644, "grad_norm": 6.180505275726318, "learning_rate": 1.7302037668792905e-05, "loss": 1.566, "step": 64500 }, { "epoch": 0.4054602950130615, "grad_norm": 7.333800315856934, "learning_rate": 1.730161856784825e-05, "loss": 1.8852, "step": 64510 }, { "epoch": 0.4055231473297586, "grad_norm": 6.708546161651611, "learning_rate": 1.73011994669036e-05, "loss": 1.7546, "step": 64520 }, { "epoch": 0.4055859996464557, "grad_norm": 6.404630661010742, "learning_rate": 1.7300780365958946e-05, "loss": 1.9791, "step": 64530 }, { "epoch": 0.4056488519631528, "grad_norm": 6.5063557624816895, "learning_rate": 1.7300361265014293e-05, "loss": 1.8629, "step": 64540 }, { "epoch": 0.4057117042798499, "grad_norm": 6.890121936798096, "learning_rate": 1.729994216406964e-05, "loss": 1.8914, "step": 64550 }, { "epoch": 0.40577455659654704, "grad_norm": 6.5508012771606445, "learning_rate": 1.7299523063124987e-05, "loss": 1.8905, "step": 64560 }, { "epoch": 0.40583740891324416, "grad_norm": 6.7240447998046875, "learning_rate": 1.7299103962180334e-05, "loss": 1.6553, "step": 64570 }, { "epoch": 0.4059002612299413, "grad_norm": 7.4773077964782715, "learning_rate": 1.729868486123568e-05, "loss": 1.7865, "step": 64580 }, { "epoch": 0.4059631135466384, "grad_norm": 6.3153533935546875, "learning_rate": 1.7298265760291025e-05, "loss": 1.7086, "step": 64590 }, { "epoch": 0.4060259658633355, "grad_norm": 7.008026123046875, "learning_rate": 1.7297846659346372e-05, "loss": 1.5691, "step": 64600 }, { "epoch": 0.4060888181800326, "grad_norm": 7.000103950500488, "learning_rate": 1.729742755840172e-05, "loss": 2.2333, "step": 64610 }, { "epoch": 0.40615167049672973, "grad_norm": 6.496999263763428, "learning_rate": 1.7297008457457066e-05, "loss": 1.6231, "step": 64620 }, { "epoch": 0.40621452281342685, "grad_norm": 6.620175361633301, "learning_rate": 1.729658935651241e-05, "loss": 1.753, "step": 64630 }, { "epoch": 0.40627737513012396, "grad_norm": 6.827455043792725, "learning_rate": 1.7296170255567757e-05, "loss": 1.7794, "step": 64640 }, { "epoch": 0.406340227446821, "grad_norm": 8.18285846710205, "learning_rate": 1.7295751154623104e-05, "loss": 1.7071, "step": 64650 }, { "epoch": 0.40640307976351814, "grad_norm": 6.997250556945801, "learning_rate": 1.729533205367845e-05, "loss": 1.8244, "step": 64660 }, { "epoch": 0.40646593208021525, "grad_norm": 7.275681972503662, "learning_rate": 1.7294912952733798e-05, "loss": 1.8053, "step": 64670 }, { "epoch": 0.40652878439691237, "grad_norm": 6.914024353027344, "learning_rate": 1.729449385178914e-05, "loss": 1.6802, "step": 64680 }, { "epoch": 0.4065916367136095, "grad_norm": 7.151669979095459, "learning_rate": 1.729407475084449e-05, "loss": 1.5671, "step": 64690 }, { "epoch": 0.4066544890303066, "grad_norm": 7.415504455566406, "learning_rate": 1.7293655649899836e-05, "loss": 1.667, "step": 64700 }, { "epoch": 0.4067173413470037, "grad_norm": 7.724193572998047, "learning_rate": 1.7293236548955183e-05, "loss": 1.7058, "step": 64710 }, { "epoch": 0.40678019366370083, "grad_norm": 6.968172550201416, "learning_rate": 1.7292817448010527e-05, "loss": 1.8305, "step": 64720 }, { "epoch": 0.40684304598039794, "grad_norm": 6.436429023742676, "learning_rate": 1.7292398347065874e-05, "loss": 1.8455, "step": 64730 }, { "epoch": 0.40690589829709506, "grad_norm": 8.1800537109375, "learning_rate": 1.729197924612122e-05, "loss": 1.7654, "step": 64740 }, { "epoch": 0.4069687506137922, "grad_norm": 6.217105865478516, "learning_rate": 1.7291560145176568e-05, "loss": 1.6422, "step": 64750 }, { "epoch": 0.4070316029304893, "grad_norm": 6.0421624183654785, "learning_rate": 1.7291141044231915e-05, "loss": 1.781, "step": 64760 }, { "epoch": 0.4070944552471864, "grad_norm": 8.104823112487793, "learning_rate": 1.7290721943287262e-05, "loss": 1.9295, "step": 64770 }, { "epoch": 0.40715730756388346, "grad_norm": 8.161705017089844, "learning_rate": 1.729030284234261e-05, "loss": 1.6782, "step": 64780 }, { "epoch": 0.4072201598805806, "grad_norm": 7.609684467315674, "learning_rate": 1.7289883741397956e-05, "loss": 1.6898, "step": 64790 }, { "epoch": 0.4072830121972777, "grad_norm": 6.365793228149414, "learning_rate": 1.7289464640453303e-05, "loss": 1.7787, "step": 64800 }, { "epoch": 0.4073458645139748, "grad_norm": 6.17117166519165, "learning_rate": 1.7289045539508647e-05, "loss": 1.699, "step": 64810 }, { "epoch": 0.4074087168306719, "grad_norm": 6.393515110015869, "learning_rate": 1.7288626438563994e-05, "loss": 1.6626, "step": 64820 }, { "epoch": 0.40747156914736904, "grad_norm": 7.535359859466553, "learning_rate": 1.728820733761934e-05, "loss": 1.9596, "step": 64830 }, { "epoch": 0.40753442146406615, "grad_norm": 5.87127685546875, "learning_rate": 1.7287788236674688e-05, "loss": 1.9254, "step": 64840 }, { "epoch": 0.40759727378076327, "grad_norm": 7.311943531036377, "learning_rate": 1.7287369135730035e-05, "loss": 1.6165, "step": 64850 }, { "epoch": 0.4076601260974604, "grad_norm": 8.07356071472168, "learning_rate": 1.728695003478538e-05, "loss": 2.111, "step": 64860 }, { "epoch": 0.4077229784141575, "grad_norm": 6.676053047180176, "learning_rate": 1.7286530933840726e-05, "loss": 1.8156, "step": 64870 }, { "epoch": 0.4077858307308546, "grad_norm": 6.35545539855957, "learning_rate": 1.7286111832896073e-05, "loss": 1.6365, "step": 64880 }, { "epoch": 0.40784868304755173, "grad_norm": 7.015282154083252, "learning_rate": 1.728569273195142e-05, "loss": 1.6851, "step": 64890 }, { "epoch": 0.4079115353642488, "grad_norm": 6.763131618499756, "learning_rate": 1.7285273631006764e-05, "loss": 1.6802, "step": 64900 }, { "epoch": 0.4079743876809459, "grad_norm": 7.126100063323975, "learning_rate": 1.728485453006211e-05, "loss": 1.7282, "step": 64910 }, { "epoch": 0.408037239997643, "grad_norm": 7.30333948135376, "learning_rate": 1.7284435429117458e-05, "loss": 1.7624, "step": 64920 }, { "epoch": 0.40810009231434013, "grad_norm": 6.8902058601379395, "learning_rate": 1.7284016328172805e-05, "loss": 1.8086, "step": 64930 }, { "epoch": 0.40816294463103725, "grad_norm": 6.535923957824707, "learning_rate": 1.7283597227228152e-05, "loss": 1.5891, "step": 64940 }, { "epoch": 0.40822579694773437, "grad_norm": 6.411938190460205, "learning_rate": 1.72831781262835e-05, "loss": 1.6533, "step": 64950 }, { "epoch": 0.4082886492644315, "grad_norm": 6.7717742919921875, "learning_rate": 1.7282759025338846e-05, "loss": 1.9807, "step": 64960 }, { "epoch": 0.4083515015811286, "grad_norm": 7.145508289337158, "learning_rate": 1.728233992439419e-05, "loss": 1.7605, "step": 64970 }, { "epoch": 0.4084143538978257, "grad_norm": 7.144276142120361, "learning_rate": 1.7281920823449537e-05, "loss": 1.8039, "step": 64980 }, { "epoch": 0.4084772062145228, "grad_norm": 5.917950630187988, "learning_rate": 1.7281501722504884e-05, "loss": 1.6906, "step": 64990 }, { "epoch": 0.40854005853121994, "grad_norm": 6.059743881225586, "learning_rate": 1.728108262156023e-05, "loss": 1.6918, "step": 65000 }, { "epoch": 0.40860291084791706, "grad_norm": 7.979092597961426, "learning_rate": 1.7280663520615578e-05, "loss": 1.7555, "step": 65010 }, { "epoch": 0.40866576316461417, "grad_norm": 6.71980619430542, "learning_rate": 1.7280244419670925e-05, "loss": 1.6185, "step": 65020 }, { "epoch": 0.40872861548131123, "grad_norm": 7.242041110992432, "learning_rate": 1.727982531872627e-05, "loss": 1.7957, "step": 65030 }, { "epoch": 0.40879146779800835, "grad_norm": 6.984872817993164, "learning_rate": 1.7279406217781616e-05, "loss": 1.7645, "step": 65040 }, { "epoch": 0.40885432011470546, "grad_norm": 6.94974946975708, "learning_rate": 1.7278987116836963e-05, "loss": 1.9315, "step": 65050 }, { "epoch": 0.4089171724314026, "grad_norm": 7.313066482543945, "learning_rate": 1.727856801589231e-05, "loss": 1.8741, "step": 65060 }, { "epoch": 0.4089800247480997, "grad_norm": 6.230828762054443, "learning_rate": 1.7278148914947657e-05, "loss": 1.8841, "step": 65070 }, { "epoch": 0.4090428770647968, "grad_norm": 6.700216770172119, "learning_rate": 1.727777172409747e-05, "loss": 1.7687, "step": 65080 }, { "epoch": 0.4091057293814939, "grad_norm": 7.0344977378845215, "learning_rate": 1.7277352623152815e-05, "loss": 1.7114, "step": 65090 }, { "epoch": 0.40916858169819104, "grad_norm": 7.262002468109131, "learning_rate": 1.7276933522208163e-05, "loss": 1.9332, "step": 65100 }, { "epoch": 0.40923143401488815, "grad_norm": 6.58958625793457, "learning_rate": 1.7276514421263506e-05, "loss": 1.7787, "step": 65110 }, { "epoch": 0.40929428633158527, "grad_norm": 6.534708499908447, "learning_rate": 1.7276095320318853e-05, "loss": 1.6371, "step": 65120 }, { "epoch": 0.4093571386482824, "grad_norm": 5.883193016052246, "learning_rate": 1.72756762193742e-05, "loss": 1.513, "step": 65130 }, { "epoch": 0.4094199909649795, "grad_norm": 7.480133056640625, "learning_rate": 1.7275257118429547e-05, "loss": 1.6982, "step": 65140 }, { "epoch": 0.4094828432816766, "grad_norm": 6.219967842102051, "learning_rate": 1.727483801748489e-05, "loss": 1.5562, "step": 65150 }, { "epoch": 0.40954569559837367, "grad_norm": 7.834683895111084, "learning_rate": 1.7274418916540238e-05, "loss": 1.7408, "step": 65160 }, { "epoch": 0.4096085479150708, "grad_norm": 6.424805641174316, "learning_rate": 1.7273999815595585e-05, "loss": 1.8484, "step": 65170 }, { "epoch": 0.4096714002317679, "grad_norm": 7.738645553588867, "learning_rate": 1.7273580714650932e-05, "loss": 1.7559, "step": 65180 }, { "epoch": 0.409734252548465, "grad_norm": 6.217718124389648, "learning_rate": 1.727316161370628e-05, "loss": 1.7107, "step": 65190 }, { "epoch": 0.40979710486516213, "grad_norm": 7.466714859008789, "learning_rate": 1.7272742512761623e-05, "loss": 1.8146, "step": 65200 }, { "epoch": 0.40985995718185925, "grad_norm": 5.817049026489258, "learning_rate": 1.7272365321911438e-05, "loss": 1.6548, "step": 65210 }, { "epoch": 0.40992280949855636, "grad_norm": 6.468529224395752, "learning_rate": 1.7271946220966785e-05, "loss": 1.8484, "step": 65220 }, { "epoch": 0.4099856618152535, "grad_norm": 6.295289039611816, "learning_rate": 1.727152712002213e-05, "loss": 1.7962, "step": 65230 }, { "epoch": 0.4100485141319506, "grad_norm": 7.373312950134277, "learning_rate": 1.7271108019077476e-05, "loss": 1.6707, "step": 65240 }, { "epoch": 0.4101113664486477, "grad_norm": 6.659286022186279, "learning_rate": 1.7270688918132823e-05, "loss": 1.9766, "step": 65250 }, { "epoch": 0.4101742187653448, "grad_norm": 6.687599182128906, "learning_rate": 1.727026981718817e-05, "loss": 1.9147, "step": 65260 }, { "epoch": 0.41023707108204194, "grad_norm": 6.625199317932129, "learning_rate": 1.7269850716243513e-05, "loss": 1.6965, "step": 65270 }, { "epoch": 0.41029992339873905, "grad_norm": 8.33799934387207, "learning_rate": 1.726943161529886e-05, "loss": 1.9639, "step": 65280 }, { "epoch": 0.4103627757154361, "grad_norm": 8.036014556884766, "learning_rate": 1.7269012514354208e-05, "loss": 1.7142, "step": 65290 }, { "epoch": 0.4104256280321332, "grad_norm": 7.296436786651611, "learning_rate": 1.7268593413409555e-05, "loss": 1.7163, "step": 65300 }, { "epoch": 0.41048848034883034, "grad_norm": 8.26138973236084, "learning_rate": 1.7268174312464902e-05, "loss": 1.6409, "step": 65310 }, { "epoch": 0.41055133266552746, "grad_norm": 7.372732639312744, "learning_rate": 1.726775521152025e-05, "loss": 1.6704, "step": 65320 }, { "epoch": 0.4106141849822246, "grad_norm": 6.2835516929626465, "learning_rate": 1.7267336110575596e-05, "loss": 1.7482, "step": 65330 }, { "epoch": 0.4106770372989217, "grad_norm": 5.813044548034668, "learning_rate": 1.7266917009630943e-05, "loss": 1.7742, "step": 65340 }, { "epoch": 0.4107398896156188, "grad_norm": 7.238397598266602, "learning_rate": 1.7266497908686287e-05, "loss": 1.5224, "step": 65350 }, { "epoch": 0.4108027419323159, "grad_norm": 7.718009948730469, "learning_rate": 1.7266078807741634e-05, "loss": 2.0273, "step": 65360 }, { "epoch": 0.41086559424901303, "grad_norm": 6.700657367706299, "learning_rate": 1.726565970679698e-05, "loss": 1.8385, "step": 65370 }, { "epoch": 0.41092844656571015, "grad_norm": 7.539520263671875, "learning_rate": 1.7265240605852328e-05, "loss": 1.7639, "step": 65380 }, { "epoch": 0.41099129888240726, "grad_norm": 6.936416149139404, "learning_rate": 1.7264821504907675e-05, "loss": 1.8539, "step": 65390 }, { "epoch": 0.4110541511991044, "grad_norm": 7.549854755401611, "learning_rate": 1.7264402403963022e-05, "loss": 1.9081, "step": 65400 }, { "epoch": 0.41111700351580144, "grad_norm": 7.006616115570068, "learning_rate": 1.7263983303018366e-05, "loss": 1.6595, "step": 65410 }, { "epoch": 0.41117985583249855, "grad_norm": 5.82361364364624, "learning_rate": 1.7263564202073713e-05, "loss": 1.5125, "step": 65420 }, { "epoch": 0.41124270814919567, "grad_norm": 6.20650577545166, "learning_rate": 1.726314510112906e-05, "loss": 2.0813, "step": 65430 }, { "epoch": 0.4113055604658928, "grad_norm": 8.615455627441406, "learning_rate": 1.7262726000184407e-05, "loss": 1.7214, "step": 65440 }, { "epoch": 0.4113684127825899, "grad_norm": 5.808997631072998, "learning_rate": 1.726230689923975e-05, "loss": 1.6858, "step": 65450 }, { "epoch": 0.411431265099287, "grad_norm": 7.5976762771606445, "learning_rate": 1.7261887798295098e-05, "loss": 1.6086, "step": 65460 }, { "epoch": 0.41149411741598413, "grad_norm": 6.59607458114624, "learning_rate": 1.7261468697350445e-05, "loss": 1.7889, "step": 65470 }, { "epoch": 0.41155696973268124, "grad_norm": 6.698685646057129, "learning_rate": 1.7261049596405792e-05, "loss": 1.997, "step": 65480 }, { "epoch": 0.41161982204937836, "grad_norm": 6.6149749755859375, "learning_rate": 1.726063049546114e-05, "loss": 1.8442, "step": 65490 }, { "epoch": 0.4116826743660755, "grad_norm": 9.340747833251953, "learning_rate": 1.7260211394516483e-05, "loss": 1.7613, "step": 65500 }, { "epoch": 0.4117455266827726, "grad_norm": 7.007955074310303, "learning_rate": 1.725979229357183e-05, "loss": 1.6861, "step": 65510 }, { "epoch": 0.4118083789994697, "grad_norm": 7.721170425415039, "learning_rate": 1.7259373192627177e-05, "loss": 1.9393, "step": 65520 }, { "epoch": 0.4118712313161668, "grad_norm": 7.229162693023682, "learning_rate": 1.7258954091682524e-05, "loss": 1.9263, "step": 65530 }, { "epoch": 0.4119340836328639, "grad_norm": 6.689350128173828, "learning_rate": 1.725853499073787e-05, "loss": 1.6923, "step": 65540 }, { "epoch": 0.411996935949561, "grad_norm": 6.703178882598877, "learning_rate": 1.7258115889793218e-05, "loss": 1.7659, "step": 65550 }, { "epoch": 0.4120597882662581, "grad_norm": 6.903580665588379, "learning_rate": 1.7257696788848565e-05, "loss": 1.7734, "step": 65560 }, { "epoch": 0.4121226405829552, "grad_norm": 7.508144378662109, "learning_rate": 1.7257277687903912e-05, "loss": 1.8409, "step": 65570 }, { "epoch": 0.41218549289965234, "grad_norm": 7.502862453460693, "learning_rate": 1.7256858586959256e-05, "loss": 1.7433, "step": 65580 }, { "epoch": 0.41224834521634945, "grad_norm": 8.012452125549316, "learning_rate": 1.7256439486014603e-05, "loss": 1.7268, "step": 65590 }, { "epoch": 0.41231119753304657, "grad_norm": 7.023150444030762, "learning_rate": 1.725602038506995e-05, "loss": 1.7775, "step": 65600 }, { "epoch": 0.4123740498497437, "grad_norm": 7.646528720855713, "learning_rate": 1.7255601284125297e-05, "loss": 1.7078, "step": 65610 }, { "epoch": 0.4124369021664408, "grad_norm": 7.526142120361328, "learning_rate": 1.7255182183180644e-05, "loss": 1.8062, "step": 65620 }, { "epoch": 0.4124997544831379, "grad_norm": 7.300693511962891, "learning_rate": 1.7254763082235988e-05, "loss": 1.6165, "step": 65630 }, { "epoch": 0.41256260679983503, "grad_norm": 7.158872127532959, "learning_rate": 1.7254343981291335e-05, "loss": 1.741, "step": 65640 }, { "epoch": 0.41262545911653215, "grad_norm": 7.359210014343262, "learning_rate": 1.7253924880346682e-05, "loss": 1.6974, "step": 65650 }, { "epoch": 0.41268831143322926, "grad_norm": 7.219731330871582, "learning_rate": 1.725350577940203e-05, "loss": 1.879, "step": 65660 }, { "epoch": 0.4127511637499263, "grad_norm": 6.3508405685424805, "learning_rate": 1.7253086678457373e-05, "loss": 1.6002, "step": 65670 }, { "epoch": 0.41281401606662343, "grad_norm": 6.639791011810303, "learning_rate": 1.725266757751272e-05, "loss": 1.4928, "step": 65680 }, { "epoch": 0.41287686838332055, "grad_norm": 6.588821887969971, "learning_rate": 1.7252248476568067e-05, "loss": 1.8052, "step": 65690 }, { "epoch": 0.41293972070001767, "grad_norm": 7.214931964874268, "learning_rate": 1.7251829375623414e-05, "loss": 1.686, "step": 65700 }, { "epoch": 0.4130025730167148, "grad_norm": 6.759382724761963, "learning_rate": 1.725141027467876e-05, "loss": 1.7647, "step": 65710 }, { "epoch": 0.4130654253334119, "grad_norm": 6.056048393249512, "learning_rate": 1.7250991173734108e-05, "loss": 1.6579, "step": 65720 }, { "epoch": 0.413128277650109, "grad_norm": 6.751076698303223, "learning_rate": 1.725057207278945e-05, "loss": 1.8221, "step": 65730 }, { "epoch": 0.4131911299668061, "grad_norm": 7.223300457000732, "learning_rate": 1.72501529718448e-05, "loss": 1.7375, "step": 65740 }, { "epoch": 0.41325398228350324, "grad_norm": 7.070038318634033, "learning_rate": 1.7249733870900146e-05, "loss": 1.8717, "step": 65750 }, { "epoch": 0.41331683460020036, "grad_norm": 6.8550262451171875, "learning_rate": 1.7249314769955493e-05, "loss": 1.9206, "step": 65760 }, { "epoch": 0.41337968691689747, "grad_norm": 6.547621726989746, "learning_rate": 1.724889566901084e-05, "loss": 1.529, "step": 65770 }, { "epoch": 0.4134425392335946, "grad_norm": 6.848862171173096, "learning_rate": 1.7248476568066187e-05, "loss": 1.7646, "step": 65780 }, { "epoch": 0.4135053915502917, "grad_norm": 6.237651824951172, "learning_rate": 1.7248057467121534e-05, "loss": 1.7265, "step": 65790 }, { "epoch": 0.41356824386698876, "grad_norm": 7.226813793182373, "learning_rate": 1.724763836617688e-05, "loss": 1.6863, "step": 65800 }, { "epoch": 0.4136310961836859, "grad_norm": 6.415926456451416, "learning_rate": 1.7247219265232225e-05, "loss": 1.5202, "step": 65810 }, { "epoch": 0.413693948500383, "grad_norm": 7.4868245124816895, "learning_rate": 1.7246800164287572e-05, "loss": 1.7867, "step": 65820 }, { "epoch": 0.4137568008170801, "grad_norm": 5.861806392669678, "learning_rate": 1.724638106334292e-05, "loss": 1.6878, "step": 65830 }, { "epoch": 0.4138196531337772, "grad_norm": 8.50329303741455, "learning_rate": 1.7245961962398266e-05, "loss": 1.8611, "step": 65840 }, { "epoch": 0.41388250545047434, "grad_norm": 7.272984504699707, "learning_rate": 1.724554286145361e-05, "loss": 1.8541, "step": 65850 }, { "epoch": 0.41394535776717145, "grad_norm": 6.840214729309082, "learning_rate": 1.7245123760508957e-05, "loss": 1.8274, "step": 65860 }, { "epoch": 0.41400821008386857, "grad_norm": 6.844021797180176, "learning_rate": 1.7244704659564304e-05, "loss": 1.5502, "step": 65870 }, { "epoch": 0.4140710624005657, "grad_norm": 6.481375217437744, "learning_rate": 1.724428555861965e-05, "loss": 1.6152, "step": 65880 }, { "epoch": 0.4141339147172628, "grad_norm": 6.612030029296875, "learning_rate": 1.7243866457674998e-05, "loss": 1.7488, "step": 65890 }, { "epoch": 0.4141967670339599, "grad_norm": 6.541953086853027, "learning_rate": 1.724344735673034e-05, "loss": 1.7981, "step": 65900 }, { "epoch": 0.414259619350657, "grad_norm": 6.864079475402832, "learning_rate": 1.724302825578569e-05, "loss": 1.744, "step": 65910 }, { "epoch": 0.4143224716673541, "grad_norm": 6.154926300048828, "learning_rate": 1.7242609154841036e-05, "loss": 2.0588, "step": 65920 }, { "epoch": 0.4143853239840512, "grad_norm": 5.997568130493164, "learning_rate": 1.7242190053896383e-05, "loss": 1.7858, "step": 65930 }, { "epoch": 0.4144481763007483, "grad_norm": 7.9769463539123535, "learning_rate": 1.724177095295173e-05, "loss": 1.6325, "step": 65940 }, { "epoch": 0.41451102861744543, "grad_norm": 6.107345104217529, "learning_rate": 1.7241351852007077e-05, "loss": 1.9093, "step": 65950 }, { "epoch": 0.41457388093414255, "grad_norm": 6.253636837005615, "learning_rate": 1.7240932751062424e-05, "loss": 1.834, "step": 65960 }, { "epoch": 0.41463673325083966, "grad_norm": 6.682056427001953, "learning_rate": 1.7240513650117768e-05, "loss": 1.7277, "step": 65970 }, { "epoch": 0.4146995855675368, "grad_norm": 5.611754417419434, "learning_rate": 1.7240094549173115e-05, "loss": 1.8684, "step": 65980 }, { "epoch": 0.4147624378842339, "grad_norm": 7.929423809051514, "learning_rate": 1.7239675448228462e-05, "loss": 1.8078, "step": 65990 }, { "epoch": 0.414825290200931, "grad_norm": 7.0556960105896, "learning_rate": 1.723925634728381e-05, "loss": 1.6299, "step": 66000 }, { "epoch": 0.4148881425176281, "grad_norm": 5.946358680725098, "learning_rate": 1.7238837246339156e-05, "loss": 1.829, "step": 66010 }, { "epoch": 0.41495099483432524, "grad_norm": 6.1297688484191895, "learning_rate": 1.7238418145394503e-05, "loss": 1.7613, "step": 66020 }, { "epoch": 0.41501384715102235, "grad_norm": 6.7420477867126465, "learning_rate": 1.7237999044449847e-05, "loss": 1.7146, "step": 66030 }, { "epoch": 0.41507669946771947, "grad_norm": 7.604292869567871, "learning_rate": 1.7237579943505194e-05, "loss": 1.9379, "step": 66040 }, { "epoch": 0.4151395517844165, "grad_norm": 6.331554889678955, "learning_rate": 1.723716084256054e-05, "loss": 1.7841, "step": 66050 }, { "epoch": 0.41520240410111364, "grad_norm": 6.867188930511475, "learning_rate": 1.7236741741615888e-05, "loss": 1.6822, "step": 66060 }, { "epoch": 0.41526525641781076, "grad_norm": 7.866869926452637, "learning_rate": 1.723632264067123e-05, "loss": 1.5881, "step": 66070 }, { "epoch": 0.4153281087345079, "grad_norm": 7.904226303100586, "learning_rate": 1.723590353972658e-05, "loss": 1.7964, "step": 66080 }, { "epoch": 0.415390961051205, "grad_norm": 6.318147659301758, "learning_rate": 1.7235484438781926e-05, "loss": 1.7034, "step": 66090 }, { "epoch": 0.4154538133679021, "grad_norm": 7.195476055145264, "learning_rate": 1.7235065337837273e-05, "loss": 1.787, "step": 66100 }, { "epoch": 0.4155166656845992, "grad_norm": 8.014933586120605, "learning_rate": 1.723464623689262e-05, "loss": 1.6416, "step": 66110 }, { "epoch": 0.41557951800129633, "grad_norm": 6.8371100425720215, "learning_rate": 1.7234227135947964e-05, "loss": 1.8213, "step": 66120 }, { "epoch": 0.41564237031799345, "grad_norm": 6.7408976554870605, "learning_rate": 1.723380803500331e-05, "loss": 2.039, "step": 66130 }, { "epoch": 0.41570522263469056, "grad_norm": 7.483088493347168, "learning_rate": 1.7233388934058658e-05, "loss": 1.9312, "step": 66140 }, { "epoch": 0.4157680749513877, "grad_norm": 6.651445388793945, "learning_rate": 1.7232969833114005e-05, "loss": 1.7359, "step": 66150 }, { "epoch": 0.4158309272680848, "grad_norm": 7.179247856140137, "learning_rate": 1.7232550732169352e-05, "loss": 1.7614, "step": 66160 }, { "epoch": 0.4158937795847819, "grad_norm": 7.6218132972717285, "learning_rate": 1.72321316312247e-05, "loss": 1.7491, "step": 66170 }, { "epoch": 0.41595663190147897, "grad_norm": 6.225204944610596, "learning_rate": 1.7231712530280046e-05, "loss": 1.7596, "step": 66180 }, { "epoch": 0.4160194842181761, "grad_norm": 8.149336814880371, "learning_rate": 1.7231293429335393e-05, "loss": 2.0318, "step": 66190 }, { "epoch": 0.4160823365348732, "grad_norm": 6.81826639175415, "learning_rate": 1.723087432839074e-05, "loss": 1.9221, "step": 66200 }, { "epoch": 0.4161451888515703, "grad_norm": 7.789238929748535, "learning_rate": 1.7230455227446084e-05, "loss": 1.8142, "step": 66210 }, { "epoch": 0.41620804116826743, "grad_norm": 7.309952735900879, "learning_rate": 1.723003612650143e-05, "loss": 1.8951, "step": 66220 }, { "epoch": 0.41627089348496454, "grad_norm": 6.235332012176514, "learning_rate": 1.7229617025556778e-05, "loss": 1.8876, "step": 66230 }, { "epoch": 0.41633374580166166, "grad_norm": 6.2471160888671875, "learning_rate": 1.7229197924612125e-05, "loss": 1.6217, "step": 66240 }, { "epoch": 0.4163965981183588, "grad_norm": 9.248604774475098, "learning_rate": 1.722877882366747e-05, "loss": 2.0903, "step": 66250 }, { "epoch": 0.4164594504350559, "grad_norm": 6.332358360290527, "learning_rate": 1.7228359722722816e-05, "loss": 1.6627, "step": 66260 }, { "epoch": 0.416522302751753, "grad_norm": 6.084262847900391, "learning_rate": 1.7227940621778163e-05, "loss": 1.7674, "step": 66270 }, { "epoch": 0.4165851550684501, "grad_norm": 6.957821846008301, "learning_rate": 1.722752152083351e-05, "loss": 1.5903, "step": 66280 }, { "epoch": 0.41664800738514723, "grad_norm": 7.637058258056641, "learning_rate": 1.7227102419888854e-05, "loss": 1.7404, "step": 66290 }, { "epoch": 0.41671085970184435, "grad_norm": 6.842432975769043, "learning_rate": 1.72266833189442e-05, "loss": 1.8943, "step": 66300 }, { "epoch": 0.4167737120185414, "grad_norm": 7.005809783935547, "learning_rate": 1.7226264217999548e-05, "loss": 1.7298, "step": 66310 }, { "epoch": 0.4168365643352385, "grad_norm": 6.967994689941406, "learning_rate": 1.7225845117054895e-05, "loss": 1.8196, "step": 66320 }, { "epoch": 0.41689941665193564, "grad_norm": 7.2940287590026855, "learning_rate": 1.7225426016110242e-05, "loss": 1.5849, "step": 66330 }, { "epoch": 0.41696226896863275, "grad_norm": 7.64329719543457, "learning_rate": 1.722500691516559e-05, "loss": 1.8588, "step": 66340 }, { "epoch": 0.41702512128532987, "grad_norm": 7.028941631317139, "learning_rate": 1.7224587814220933e-05, "loss": 1.6441, "step": 66350 }, { "epoch": 0.417087973602027, "grad_norm": 6.863855838775635, "learning_rate": 1.722416871327628e-05, "loss": 1.8029, "step": 66360 }, { "epoch": 0.4171508259187241, "grad_norm": 5.859476089477539, "learning_rate": 1.7223749612331627e-05, "loss": 1.487, "step": 66370 }, { "epoch": 0.4172136782354212, "grad_norm": 7.921928882598877, "learning_rate": 1.7223330511386974e-05, "loss": 1.6424, "step": 66380 }, { "epoch": 0.41727653055211833, "grad_norm": 6.965579986572266, "learning_rate": 1.722291141044232e-05, "loss": 1.788, "step": 66390 }, { "epoch": 0.41733938286881544, "grad_norm": 7.220767974853516, "learning_rate": 1.7222492309497668e-05, "loss": 1.6314, "step": 66400 }, { "epoch": 0.41740223518551256, "grad_norm": 7.693778991699219, "learning_rate": 1.7222073208553015e-05, "loss": 1.5888, "step": 66410 }, { "epoch": 0.4174650875022097, "grad_norm": 6.9228410720825195, "learning_rate": 1.7221654107608362e-05, "loss": 1.7609, "step": 66420 }, { "epoch": 0.41752793981890673, "grad_norm": 6.110254287719727, "learning_rate": 1.7221235006663706e-05, "loss": 1.7262, "step": 66430 }, { "epoch": 0.41759079213560385, "grad_norm": 5.9352240562438965, "learning_rate": 1.7220815905719053e-05, "loss": 1.878, "step": 66440 }, { "epoch": 0.41765364445230097, "grad_norm": 7.226113796234131, "learning_rate": 1.72203968047744e-05, "loss": 1.6799, "step": 66450 }, { "epoch": 0.4177164967689981, "grad_norm": 7.249061584472656, "learning_rate": 1.7219977703829747e-05, "loss": 1.7067, "step": 66460 }, { "epoch": 0.4177793490856952, "grad_norm": 6.2751336097717285, "learning_rate": 1.721955860288509e-05, "loss": 1.7584, "step": 66470 }, { "epoch": 0.4178422014023923, "grad_norm": 6.313525199890137, "learning_rate": 1.7219139501940438e-05, "loss": 1.7579, "step": 66480 }, { "epoch": 0.4179050537190894, "grad_norm": 7.393110752105713, "learning_rate": 1.7218720400995785e-05, "loss": 1.6926, "step": 66490 }, { "epoch": 0.41796790603578654, "grad_norm": 7.071122646331787, "learning_rate": 1.7218301300051132e-05, "loss": 1.5097, "step": 66500 }, { "epoch": 0.41803075835248366, "grad_norm": 6.8215813636779785, "learning_rate": 1.721788219910648e-05, "loss": 2.0171, "step": 66510 }, { "epoch": 0.41809361066918077, "grad_norm": 6.949654579162598, "learning_rate": 1.7217463098161823e-05, "loss": 1.5347, "step": 66520 }, { "epoch": 0.4181564629858779, "grad_norm": 7.977286338806152, "learning_rate": 1.721704399721717e-05, "loss": 1.9713, "step": 66530 }, { "epoch": 0.418219315302575, "grad_norm": 6.974081516265869, "learning_rate": 1.7216624896272517e-05, "loss": 1.8234, "step": 66540 }, { "epoch": 0.4182821676192721, "grad_norm": 7.043870449066162, "learning_rate": 1.7216205795327864e-05, "loss": 1.7966, "step": 66550 }, { "epoch": 0.4183450199359692, "grad_norm": 6.155953884124756, "learning_rate": 1.721578669438321e-05, "loss": 1.5279, "step": 66560 }, { "epoch": 0.4184078722526663, "grad_norm": 7.128800392150879, "learning_rate": 1.7215367593438558e-05, "loss": 1.7763, "step": 66570 }, { "epoch": 0.4184707245693634, "grad_norm": 7.194254398345947, "learning_rate": 1.7214948492493905e-05, "loss": 1.7651, "step": 66580 }, { "epoch": 0.4185335768860605, "grad_norm": 6.361778736114502, "learning_rate": 1.7214529391549252e-05, "loss": 1.638, "step": 66590 }, { "epoch": 0.41859642920275764, "grad_norm": 6.098928928375244, "learning_rate": 1.7214110290604596e-05, "loss": 1.7984, "step": 66600 }, { "epoch": 0.41865928151945475, "grad_norm": 6.760110378265381, "learning_rate": 1.7213691189659943e-05, "loss": 1.7262, "step": 66610 }, { "epoch": 0.41872213383615187, "grad_norm": 7.264941215515137, "learning_rate": 1.721327208871529e-05, "loss": 1.6386, "step": 66620 }, { "epoch": 0.418784986152849, "grad_norm": 7.49856424331665, "learning_rate": 1.7212852987770637e-05, "loss": 1.7972, "step": 66630 }, { "epoch": 0.4188478384695461, "grad_norm": 6.58585786819458, "learning_rate": 1.7212433886825984e-05, "loss": 1.7253, "step": 66640 }, { "epoch": 0.4189106907862432, "grad_norm": 6.299633026123047, "learning_rate": 1.7212014785881328e-05, "loss": 1.7737, "step": 66650 }, { "epoch": 0.4189735431029403, "grad_norm": 4.980933666229248, "learning_rate": 1.7211595684936675e-05, "loss": 1.5795, "step": 66660 }, { "epoch": 0.41903639541963744, "grad_norm": 6.099027633666992, "learning_rate": 1.7211176583992022e-05, "loss": 1.9556, "step": 66670 }, { "epoch": 0.41909924773633456, "grad_norm": 7.245108127593994, "learning_rate": 1.721075748304737e-05, "loss": 1.59, "step": 66680 }, { "epoch": 0.4191621000530316, "grad_norm": 6.28397798538208, "learning_rate": 1.7210338382102713e-05, "loss": 1.795, "step": 66690 }, { "epoch": 0.41922495236972873, "grad_norm": 6.623855113983154, "learning_rate": 1.720991928115806e-05, "loss": 1.6809, "step": 66700 }, { "epoch": 0.41928780468642585, "grad_norm": 6.899521827697754, "learning_rate": 1.7209500180213407e-05, "loss": 1.6451, "step": 66710 }, { "epoch": 0.41935065700312296, "grad_norm": 6.536463737487793, "learning_rate": 1.7209081079268754e-05, "loss": 1.7583, "step": 66720 }, { "epoch": 0.4194135093198201, "grad_norm": 6.291255474090576, "learning_rate": 1.72086619783241e-05, "loss": 1.9444, "step": 66730 }, { "epoch": 0.4194763616365172, "grad_norm": 6.284396171569824, "learning_rate": 1.7208242877379445e-05, "loss": 1.5567, "step": 66740 }, { "epoch": 0.4195392139532143, "grad_norm": 6.022761821746826, "learning_rate": 1.7207823776434792e-05, "loss": 1.8496, "step": 66750 }, { "epoch": 0.4196020662699114, "grad_norm": 7.062186241149902, "learning_rate": 1.720740467549014e-05, "loss": 1.7722, "step": 66760 }, { "epoch": 0.41966491858660854, "grad_norm": 6.43408727645874, "learning_rate": 1.7206985574545486e-05, "loss": 1.8628, "step": 66770 }, { "epoch": 0.41972777090330565, "grad_norm": 7.805745601654053, "learning_rate": 1.7206566473600833e-05, "loss": 1.7588, "step": 66780 }, { "epoch": 0.41979062322000277, "grad_norm": 8.450122833251953, "learning_rate": 1.720614737265618e-05, "loss": 1.7457, "step": 66790 }, { "epoch": 0.4198534755366999, "grad_norm": 7.328549385070801, "learning_rate": 1.7205728271711527e-05, "loss": 1.7419, "step": 66800 }, { "epoch": 0.419916327853397, "grad_norm": 7.124634742736816, "learning_rate": 1.7205309170766874e-05, "loss": 1.5527, "step": 66810 }, { "epoch": 0.41997918017009406, "grad_norm": 6.787344932556152, "learning_rate": 1.720489006982222e-05, "loss": 1.9586, "step": 66820 }, { "epoch": 0.4200420324867912, "grad_norm": 6.533176898956299, "learning_rate": 1.7204470968877565e-05, "loss": 1.611, "step": 66830 }, { "epoch": 0.4201048848034883, "grad_norm": 6.889014720916748, "learning_rate": 1.7204051867932912e-05, "loss": 1.8108, "step": 66840 }, { "epoch": 0.4201677371201854, "grad_norm": 6.502508640289307, "learning_rate": 1.720363276698826e-05, "loss": 2.0798, "step": 66850 }, { "epoch": 0.4202305894368825, "grad_norm": 7.50160026550293, "learning_rate": 1.7203213666043606e-05, "loss": 1.8618, "step": 66860 }, { "epoch": 0.42029344175357963, "grad_norm": 6.552640914916992, "learning_rate": 1.720279456509895e-05, "loss": 1.7817, "step": 66870 }, { "epoch": 0.42035629407027675, "grad_norm": 6.772343635559082, "learning_rate": 1.7202375464154297e-05, "loss": 1.7162, "step": 66880 }, { "epoch": 0.42041914638697386, "grad_norm": 6.845734596252441, "learning_rate": 1.7201956363209644e-05, "loss": 1.7223, "step": 66890 }, { "epoch": 0.420481998703671, "grad_norm": 6.288269519805908, "learning_rate": 1.720153726226499e-05, "loss": 1.6642, "step": 66900 }, { "epoch": 0.4205448510203681, "grad_norm": 6.5025458335876465, "learning_rate": 1.7201118161320335e-05, "loss": 1.688, "step": 66910 }, { "epoch": 0.4206077033370652, "grad_norm": 6.833642959594727, "learning_rate": 1.7200699060375682e-05, "loss": 1.6041, "step": 66920 }, { "epoch": 0.4206705556537623, "grad_norm": 6.569134712219238, "learning_rate": 1.720027995943103e-05, "loss": 1.7687, "step": 66930 }, { "epoch": 0.42073340797045944, "grad_norm": 6.5684051513671875, "learning_rate": 1.7199860858486376e-05, "loss": 1.7422, "step": 66940 }, { "epoch": 0.4207962602871565, "grad_norm": 6.582808017730713, "learning_rate": 1.7199441757541723e-05, "loss": 1.5826, "step": 66950 }, { "epoch": 0.4208591126038536, "grad_norm": 6.532323837280273, "learning_rate": 1.719902265659707e-05, "loss": 1.549, "step": 66960 }, { "epoch": 0.42092196492055073, "grad_norm": 6.004312038421631, "learning_rate": 1.7198603555652417e-05, "loss": 1.7504, "step": 66970 }, { "epoch": 0.42098481723724784, "grad_norm": 6.040466785430908, "learning_rate": 1.719818445470776e-05, "loss": 1.686, "step": 66980 }, { "epoch": 0.42104766955394496, "grad_norm": 5.869112968444824, "learning_rate": 1.7197765353763108e-05, "loss": 1.4853, "step": 66990 }, { "epoch": 0.4211105218706421, "grad_norm": 5.894684791564941, "learning_rate": 1.7197346252818455e-05, "loss": 1.5959, "step": 67000 }, { "epoch": 0.4211733741873392, "grad_norm": 7.29726505279541, "learning_rate": 1.7196927151873802e-05, "loss": 1.5494, "step": 67010 }, { "epoch": 0.4212362265040363, "grad_norm": 6.248456954956055, "learning_rate": 1.719650805092915e-05, "loss": 1.6962, "step": 67020 }, { "epoch": 0.4212990788207334, "grad_norm": 6.856325149536133, "learning_rate": 1.7196088949984496e-05, "loss": 1.9297, "step": 67030 }, { "epoch": 0.42136193113743053, "grad_norm": 7.646697998046875, "learning_rate": 1.7195669849039843e-05, "loss": 1.9425, "step": 67040 }, { "epoch": 0.42142478345412765, "grad_norm": 7.795719623565674, "learning_rate": 1.7195250748095187e-05, "loss": 1.6795, "step": 67050 }, { "epoch": 0.42148763577082476, "grad_norm": 6.1254143714904785, "learning_rate": 1.7194831647150534e-05, "loss": 1.6857, "step": 67060 }, { "epoch": 0.4215504880875218, "grad_norm": 7.006941318511963, "learning_rate": 1.719441254620588e-05, "loss": 1.9168, "step": 67070 }, { "epoch": 0.42161334040421894, "grad_norm": 7.390153408050537, "learning_rate": 1.7193993445261228e-05, "loss": 1.5826, "step": 67080 }, { "epoch": 0.42167619272091605, "grad_norm": 6.724299907684326, "learning_rate": 1.7193574344316572e-05, "loss": 1.6652, "step": 67090 }, { "epoch": 0.42173904503761317, "grad_norm": 7.16749382019043, "learning_rate": 1.719315524337192e-05, "loss": 1.8747, "step": 67100 }, { "epoch": 0.4218018973543103, "grad_norm": 5.848663806915283, "learning_rate": 1.7192736142427266e-05, "loss": 1.5958, "step": 67110 }, { "epoch": 0.4218647496710074, "grad_norm": 7.2142815589904785, "learning_rate": 1.7192317041482613e-05, "loss": 1.7606, "step": 67120 }, { "epoch": 0.4219276019877045, "grad_norm": 6.202139854431152, "learning_rate": 1.719189794053796e-05, "loss": 1.6537, "step": 67130 }, { "epoch": 0.42199045430440163, "grad_norm": 7.26724910736084, "learning_rate": 1.7191478839593304e-05, "loss": 1.7531, "step": 67140 }, { "epoch": 0.42205330662109874, "grad_norm": 6.575324535369873, "learning_rate": 1.719105973864865e-05, "loss": 1.699, "step": 67150 }, { "epoch": 0.42211615893779586, "grad_norm": 7.1311564445495605, "learning_rate": 1.7190640637703998e-05, "loss": 1.7081, "step": 67160 }, { "epoch": 0.422179011254493, "grad_norm": 7.583156585693359, "learning_rate": 1.7190221536759345e-05, "loss": 1.6017, "step": 67170 }, { "epoch": 0.4222418635711901, "grad_norm": 4.99326229095459, "learning_rate": 1.7189802435814692e-05, "loss": 1.6263, "step": 67180 }, { "epoch": 0.4223047158878872, "grad_norm": 7.643944263458252, "learning_rate": 1.718938333487004e-05, "loss": 1.6513, "step": 67190 }, { "epoch": 0.42236756820458426, "grad_norm": 7.959160804748535, "learning_rate": 1.7188964233925386e-05, "loss": 1.6504, "step": 67200 }, { "epoch": 0.4224304205212814, "grad_norm": 8.044618606567383, "learning_rate": 1.7188545132980733e-05, "loss": 1.7734, "step": 67210 }, { "epoch": 0.4224932728379785, "grad_norm": 5.893154144287109, "learning_rate": 1.7188126032036077e-05, "loss": 1.8729, "step": 67220 }, { "epoch": 0.4225561251546756, "grad_norm": 8.05775260925293, "learning_rate": 1.7187706931091424e-05, "loss": 1.7189, "step": 67230 }, { "epoch": 0.4226189774713727, "grad_norm": 7.354771137237549, "learning_rate": 1.718728783014677e-05, "loss": 1.7098, "step": 67240 }, { "epoch": 0.42268182978806984, "grad_norm": 6.8519287109375, "learning_rate": 1.7186868729202118e-05, "loss": 1.843, "step": 67250 }, { "epoch": 0.42274468210476696, "grad_norm": 6.724393844604492, "learning_rate": 1.7186449628257465e-05, "loss": 1.9114, "step": 67260 }, { "epoch": 0.42280753442146407, "grad_norm": 6.693548679351807, "learning_rate": 1.718603052731281e-05, "loss": 1.6675, "step": 67270 }, { "epoch": 0.4228703867381612, "grad_norm": 7.3344621658325195, "learning_rate": 1.7185611426368156e-05, "loss": 1.8287, "step": 67280 }, { "epoch": 0.4229332390548583, "grad_norm": 7.433871746063232, "learning_rate": 1.7185192325423503e-05, "loss": 1.9375, "step": 67290 }, { "epoch": 0.4229960913715554, "grad_norm": 6.614596366882324, "learning_rate": 1.718477322447885e-05, "loss": 1.692, "step": 67300 }, { "epoch": 0.42305894368825253, "grad_norm": 7.543428421020508, "learning_rate": 1.7184354123534194e-05, "loss": 1.7797, "step": 67310 }, { "epoch": 0.42312179600494965, "grad_norm": 6.51392936706543, "learning_rate": 1.718393502258954e-05, "loss": 1.4786, "step": 67320 }, { "epoch": 0.4231846483216467, "grad_norm": 6.313328266143799, "learning_rate": 1.7183515921644888e-05, "loss": 1.748, "step": 67330 }, { "epoch": 0.4232475006383438, "grad_norm": 7.1494269371032715, "learning_rate": 1.7183096820700235e-05, "loss": 1.9026, "step": 67340 }, { "epoch": 0.42331035295504094, "grad_norm": 7.357445240020752, "learning_rate": 1.7182677719755582e-05, "loss": 1.7604, "step": 67350 }, { "epoch": 0.42337320527173805, "grad_norm": 7.715597629547119, "learning_rate": 1.7182258618810926e-05, "loss": 1.7441, "step": 67360 }, { "epoch": 0.42343605758843517, "grad_norm": 7.762784957885742, "learning_rate": 1.7181839517866273e-05, "loss": 1.93, "step": 67370 }, { "epoch": 0.4234989099051323, "grad_norm": 6.988029956817627, "learning_rate": 1.718142041692162e-05, "loss": 1.7947, "step": 67380 }, { "epoch": 0.4235617622218294, "grad_norm": 7.190766334533691, "learning_rate": 1.7181001315976967e-05, "loss": 1.7791, "step": 67390 }, { "epoch": 0.4236246145385265, "grad_norm": 7.197295188903809, "learning_rate": 1.7180582215032314e-05, "loss": 1.7481, "step": 67400 }, { "epoch": 0.4236874668552236, "grad_norm": 6.324467182159424, "learning_rate": 1.718016311408766e-05, "loss": 1.6548, "step": 67410 }, { "epoch": 0.42375031917192074, "grad_norm": 6.4799485206604, "learning_rate": 1.7179744013143008e-05, "loss": 1.794, "step": 67420 }, { "epoch": 0.42381317148861786, "grad_norm": 7.101329326629639, "learning_rate": 1.7179324912198355e-05, "loss": 1.6493, "step": 67430 }, { "epoch": 0.42387602380531497, "grad_norm": 7.107308387756348, "learning_rate": 1.7178905811253702e-05, "loss": 1.652, "step": 67440 }, { "epoch": 0.4239388761220121, "grad_norm": 6.932032585144043, "learning_rate": 1.7178486710309046e-05, "loss": 1.7858, "step": 67450 }, { "epoch": 0.42400172843870915, "grad_norm": 8.148683547973633, "learning_rate": 1.7178067609364393e-05, "loss": 1.6824, "step": 67460 }, { "epoch": 0.42406458075540626, "grad_norm": 6.516316890716553, "learning_rate": 1.717764850841974e-05, "loss": 1.5451, "step": 67470 }, { "epoch": 0.4241274330721034, "grad_norm": 5.768672943115234, "learning_rate": 1.7177229407475087e-05, "loss": 1.761, "step": 67480 }, { "epoch": 0.4241902853888005, "grad_norm": 6.416257381439209, "learning_rate": 1.717681030653043e-05, "loss": 1.6693, "step": 67490 }, { "epoch": 0.4242531377054976, "grad_norm": 7.094727516174316, "learning_rate": 1.7176391205585778e-05, "loss": 1.7253, "step": 67500 }, { "epoch": 0.4243159900221947, "grad_norm": 6.927624702453613, "learning_rate": 1.7175972104641125e-05, "loss": 1.942, "step": 67510 }, { "epoch": 0.42437884233889184, "grad_norm": 5.8472514152526855, "learning_rate": 1.7175553003696472e-05, "loss": 1.6038, "step": 67520 }, { "epoch": 0.42444169465558895, "grad_norm": 7.299180030822754, "learning_rate": 1.7175133902751816e-05, "loss": 1.8532, "step": 67530 }, { "epoch": 0.42450454697228607, "grad_norm": 7.734817981719971, "learning_rate": 1.7174714801807163e-05, "loss": 1.621, "step": 67540 }, { "epoch": 0.4245673992889832, "grad_norm": 6.606222629547119, "learning_rate": 1.717429570086251e-05, "loss": 1.6545, "step": 67550 }, { "epoch": 0.4246302516056803, "grad_norm": 6.60004186630249, "learning_rate": 1.7173876599917857e-05, "loss": 1.8597, "step": 67560 }, { "epoch": 0.4246931039223774, "grad_norm": 6.90226411819458, "learning_rate": 1.7173457498973204e-05, "loss": 2.0013, "step": 67570 }, { "epoch": 0.4247559562390745, "grad_norm": 8.462608337402344, "learning_rate": 1.717303839802855e-05, "loss": 1.8148, "step": 67580 }, { "epoch": 0.4248188085557716, "grad_norm": 6.221778869628906, "learning_rate": 1.71726192970839e-05, "loss": 1.6299, "step": 67590 }, { "epoch": 0.4248816608724687, "grad_norm": 6.240743160247803, "learning_rate": 1.7172200196139245e-05, "loss": 1.9142, "step": 67600 }, { "epoch": 0.4249445131891658, "grad_norm": 20.15860939025879, "learning_rate": 1.717178109519459e-05, "loss": 1.8206, "step": 67610 }, { "epoch": 0.42500736550586293, "grad_norm": 5.853695392608643, "learning_rate": 1.7171361994249936e-05, "loss": 1.4446, "step": 67620 }, { "epoch": 0.42507021782256005, "grad_norm": 8.076128005981445, "learning_rate": 1.7170942893305283e-05, "loss": 1.7001, "step": 67630 }, { "epoch": 0.42513307013925716, "grad_norm": 7.701231479644775, "learning_rate": 1.717052379236063e-05, "loss": 1.833, "step": 67640 }, { "epoch": 0.4251959224559543, "grad_norm": 7.213332653045654, "learning_rate": 1.7170104691415977e-05, "loss": 1.6603, "step": 67650 }, { "epoch": 0.4252587747726514, "grad_norm": 6.962845325469971, "learning_rate": 1.7169685590471324e-05, "loss": 1.7795, "step": 67660 }, { "epoch": 0.4253216270893485, "grad_norm": 7.002661228179932, "learning_rate": 1.7169266489526668e-05, "loss": 1.7748, "step": 67670 }, { "epoch": 0.4253844794060456, "grad_norm": 6.931869983673096, "learning_rate": 1.7168847388582015e-05, "loss": 1.7315, "step": 67680 }, { "epoch": 0.42544733172274274, "grad_norm": 6.835071563720703, "learning_rate": 1.7168428287637362e-05, "loss": 1.8342, "step": 67690 }, { "epoch": 0.42551018403943985, "grad_norm": 5.688884735107422, "learning_rate": 1.716800918669271e-05, "loss": 1.7369, "step": 67700 }, { "epoch": 0.4255730363561369, "grad_norm": 5.7779645919799805, "learning_rate": 1.7167590085748053e-05, "loss": 1.7024, "step": 67710 }, { "epoch": 0.42563588867283403, "grad_norm": 7.721713066101074, "learning_rate": 1.71671709848034e-05, "loss": 1.7237, "step": 67720 }, { "epoch": 0.42569874098953114, "grad_norm": 6.258790969848633, "learning_rate": 1.7166751883858747e-05, "loss": 1.5013, "step": 67730 }, { "epoch": 0.42576159330622826, "grad_norm": 6.449336051940918, "learning_rate": 1.7166332782914094e-05, "loss": 1.6477, "step": 67740 }, { "epoch": 0.4258244456229254, "grad_norm": 7.289432048797607, "learning_rate": 1.716591368196944e-05, "loss": 1.6998, "step": 67750 }, { "epoch": 0.4258872979396225, "grad_norm": 7.474597930908203, "learning_rate": 1.7165494581024785e-05, "loss": 1.545, "step": 67760 }, { "epoch": 0.4259501502563196, "grad_norm": 6.538856506347656, "learning_rate": 1.7165075480080132e-05, "loss": 1.9273, "step": 67770 }, { "epoch": 0.4260130025730167, "grad_norm": 6.761141777038574, "learning_rate": 1.716465637913548e-05, "loss": 1.7875, "step": 67780 }, { "epoch": 0.42607585488971383, "grad_norm": 6.8022637367248535, "learning_rate": 1.7164237278190826e-05, "loss": 1.6618, "step": 67790 }, { "epoch": 0.42613870720641095, "grad_norm": 7.009593486785889, "learning_rate": 1.7163818177246173e-05, "loss": 1.7295, "step": 67800 }, { "epoch": 0.42620155952310806, "grad_norm": 7.822202682495117, "learning_rate": 1.716339907630152e-05, "loss": 1.7239, "step": 67810 }, { "epoch": 0.4262644118398052, "grad_norm": 7.272392272949219, "learning_rate": 1.7162979975356867e-05, "loss": 1.8299, "step": 67820 }, { "epoch": 0.4263272641565023, "grad_norm": 7.358494758605957, "learning_rate": 1.7162560874412214e-05, "loss": 1.7296, "step": 67830 }, { "epoch": 0.42639011647319935, "grad_norm": 6.851416110992432, "learning_rate": 1.7162141773467558e-05, "loss": 1.5437, "step": 67840 }, { "epoch": 0.42645296878989647, "grad_norm": 6.438014507293701, "learning_rate": 1.7161722672522905e-05, "loss": 1.6468, "step": 67850 }, { "epoch": 0.4265158211065936, "grad_norm": 4.515395641326904, "learning_rate": 1.7161303571578252e-05, "loss": 1.5819, "step": 67860 }, { "epoch": 0.4265786734232907, "grad_norm": 6.547482013702393, "learning_rate": 1.71608844706336e-05, "loss": 1.6972, "step": 67870 }, { "epoch": 0.4266415257399878, "grad_norm": 8.230692863464355, "learning_rate": 1.7160465369688946e-05, "loss": 1.6475, "step": 67880 }, { "epoch": 0.42670437805668493, "grad_norm": 6.764297008514404, "learning_rate": 1.716004626874429e-05, "loss": 1.4645, "step": 67890 }, { "epoch": 0.42676723037338204, "grad_norm": 6.4913740158081055, "learning_rate": 1.7159627167799637e-05, "loss": 1.9947, "step": 67900 }, { "epoch": 0.42683008269007916, "grad_norm": 8.162996292114258, "learning_rate": 1.7159208066854984e-05, "loss": 1.8477, "step": 67910 }, { "epoch": 0.4268929350067763, "grad_norm": 5.915576934814453, "learning_rate": 1.715878896591033e-05, "loss": 1.6375, "step": 67920 }, { "epoch": 0.4269557873234734, "grad_norm": 7.290924549102783, "learning_rate": 1.7158369864965675e-05, "loss": 1.6407, "step": 67930 }, { "epoch": 0.4270186396401705, "grad_norm": 6.409750938415527, "learning_rate": 1.7157950764021022e-05, "loss": 1.6545, "step": 67940 }, { "epoch": 0.4270814919568676, "grad_norm": 6.6010823249816895, "learning_rate": 1.715753166307637e-05, "loss": 1.7267, "step": 67950 }, { "epoch": 0.42714434427356474, "grad_norm": 8.809877395629883, "learning_rate": 1.7157112562131716e-05, "loss": 1.9648, "step": 67960 }, { "epoch": 0.4272071965902618, "grad_norm": 7.118371486663818, "learning_rate": 1.7156693461187063e-05, "loss": 1.977, "step": 67970 }, { "epoch": 0.4272700489069589, "grad_norm": 6.144242286682129, "learning_rate": 1.7156274360242407e-05, "loss": 1.7578, "step": 67980 }, { "epoch": 0.427332901223656, "grad_norm": 7.426531791687012, "learning_rate": 1.7155855259297754e-05, "loss": 1.7804, "step": 67990 }, { "epoch": 0.42739575354035314, "grad_norm": 6.703150272369385, "learning_rate": 1.71554361583531e-05, "loss": 1.8699, "step": 68000 }, { "epoch": 0.42745860585705026, "grad_norm": 7.641908168792725, "learning_rate": 1.7155017057408448e-05, "loss": 1.8321, "step": 68010 }, { "epoch": 0.42752145817374737, "grad_norm": 7.908365726470947, "learning_rate": 1.7154597956463795e-05, "loss": 1.6424, "step": 68020 }, { "epoch": 0.4275843104904445, "grad_norm": 6.221214771270752, "learning_rate": 1.7154178855519142e-05, "loss": 1.5296, "step": 68030 }, { "epoch": 0.4276471628071416, "grad_norm": 6.144346237182617, "learning_rate": 1.715375975457449e-05, "loss": 1.7549, "step": 68040 }, { "epoch": 0.4277100151238387, "grad_norm": 6.489060401916504, "learning_rate": 1.7153340653629836e-05, "loss": 1.5271, "step": 68050 }, { "epoch": 0.42777286744053583, "grad_norm": 7.255828857421875, "learning_rate": 1.7152921552685184e-05, "loss": 1.8189, "step": 68060 }, { "epoch": 0.42783571975723295, "grad_norm": 6.923028469085693, "learning_rate": 1.7152502451740527e-05, "loss": 1.7012, "step": 68070 }, { "epoch": 0.42789857207393006, "grad_norm": 7.817262172698975, "learning_rate": 1.7152083350795874e-05, "loss": 1.9792, "step": 68080 }, { "epoch": 0.4279614243906271, "grad_norm": 7.205074310302734, "learning_rate": 1.715166424985122e-05, "loss": 1.7221, "step": 68090 }, { "epoch": 0.42802427670732424, "grad_norm": 6.439834117889404, "learning_rate": 1.715124514890657e-05, "loss": 1.7847, "step": 68100 }, { "epoch": 0.42808712902402135, "grad_norm": 8.112502098083496, "learning_rate": 1.7150826047961912e-05, "loss": 1.7084, "step": 68110 }, { "epoch": 0.42814998134071847, "grad_norm": 7.40590763092041, "learning_rate": 1.715040694701726e-05, "loss": 1.8056, "step": 68120 }, { "epoch": 0.4282128336574156, "grad_norm": 6.197878837585449, "learning_rate": 1.7149987846072606e-05, "loss": 1.7148, "step": 68130 }, { "epoch": 0.4282756859741127, "grad_norm": 7.215755939483643, "learning_rate": 1.7149568745127953e-05, "loss": 1.93, "step": 68140 }, { "epoch": 0.4283385382908098, "grad_norm": 7.1226806640625, "learning_rate": 1.7149149644183297e-05, "loss": 1.7791, "step": 68150 }, { "epoch": 0.4284013906075069, "grad_norm": 8.49282455444336, "learning_rate": 1.7148730543238644e-05, "loss": 1.8896, "step": 68160 }, { "epoch": 0.42846424292420404, "grad_norm": 7.0488057136535645, "learning_rate": 1.714831144229399e-05, "loss": 1.7185, "step": 68170 }, { "epoch": 0.42852709524090116, "grad_norm": 7.961507797241211, "learning_rate": 1.7147892341349338e-05, "loss": 1.6971, "step": 68180 }, { "epoch": 0.42858994755759827, "grad_norm": 7.726258754730225, "learning_rate": 1.7147473240404685e-05, "loss": 1.606, "step": 68190 }, { "epoch": 0.4286527998742954, "grad_norm": 6.355605125427246, "learning_rate": 1.7147054139460032e-05, "loss": 1.4763, "step": 68200 }, { "epoch": 0.4287156521909925, "grad_norm": 5.938056945800781, "learning_rate": 1.714663503851538e-05, "loss": 1.7344, "step": 68210 }, { "epoch": 0.42877850450768956, "grad_norm": 7.089502811431885, "learning_rate": 1.7146215937570727e-05, "loss": 1.5126, "step": 68220 }, { "epoch": 0.4288413568243867, "grad_norm": 5.78463077545166, "learning_rate": 1.714579683662607e-05, "loss": 1.7468, "step": 68230 }, { "epoch": 0.4289042091410838, "grad_norm": 6.663919448852539, "learning_rate": 1.7145377735681417e-05, "loss": 1.8403, "step": 68240 }, { "epoch": 0.4289670614577809, "grad_norm": 9.549799919128418, "learning_rate": 1.7144958634736764e-05, "loss": 2.0516, "step": 68250 }, { "epoch": 0.429029913774478, "grad_norm": 7.30894136428833, "learning_rate": 1.714453953379211e-05, "loss": 1.7817, "step": 68260 }, { "epoch": 0.42909276609117514, "grad_norm": 7.875905513763428, "learning_rate": 1.714412043284746e-05, "loss": 2.006, "step": 68270 }, { "epoch": 0.42915561840787225, "grad_norm": 7.074160099029541, "learning_rate": 1.7143701331902806e-05, "loss": 1.782, "step": 68280 }, { "epoch": 0.42921847072456937, "grad_norm": 6.277737617492676, "learning_rate": 1.714328223095815e-05, "loss": 1.6764, "step": 68290 }, { "epoch": 0.4292813230412665, "grad_norm": 6.88519811630249, "learning_rate": 1.714290504010796e-05, "loss": 1.9277, "step": 68300 }, { "epoch": 0.4293441753579636, "grad_norm": 7.190648555755615, "learning_rate": 1.7142485939163308e-05, "loss": 1.8115, "step": 68310 }, { "epoch": 0.4294070276746607, "grad_norm": 6.650430679321289, "learning_rate": 1.7142066838218655e-05, "loss": 1.9107, "step": 68320 }, { "epoch": 0.42946987999135783, "grad_norm": 7.447507381439209, "learning_rate": 1.7141647737274002e-05, "loss": 1.707, "step": 68330 }, { "epoch": 0.42953273230805494, "grad_norm": 6.656262397766113, "learning_rate": 1.714122863632935e-05, "loss": 1.6456, "step": 68340 }, { "epoch": 0.429595584624752, "grad_norm": 6.548450469970703, "learning_rate": 1.7140809535384696e-05, "loss": 1.9959, "step": 68350 }, { "epoch": 0.4296584369414491, "grad_norm": 7.175817012786865, "learning_rate": 1.714039043444004e-05, "loss": 1.7946, "step": 68360 }, { "epoch": 0.42972128925814623, "grad_norm": 6.8483710289001465, "learning_rate": 1.7139971333495387e-05, "loss": 1.8434, "step": 68370 }, { "epoch": 0.42978414157484335, "grad_norm": 6.899672031402588, "learning_rate": 1.7139552232550734e-05, "loss": 1.5863, "step": 68380 }, { "epoch": 0.42984699389154046, "grad_norm": 6.126796722412109, "learning_rate": 1.713913313160608e-05, "loss": 1.5894, "step": 68390 }, { "epoch": 0.4299098462082376, "grad_norm": 7.532485485076904, "learning_rate": 1.7138714030661428e-05, "loss": 1.8445, "step": 68400 }, { "epoch": 0.4299726985249347, "grad_norm": 8.191269874572754, "learning_rate": 1.713829492971677e-05, "loss": 1.6811, "step": 68410 }, { "epoch": 0.4300355508416318, "grad_norm": 8.057154655456543, "learning_rate": 1.713787582877212e-05, "loss": 1.8348, "step": 68420 }, { "epoch": 0.4300984031583289, "grad_norm": 7.179137229919434, "learning_rate": 1.7137456727827466e-05, "loss": 1.8079, "step": 68430 }, { "epoch": 0.43016125547502604, "grad_norm": 7.086436748504639, "learning_rate": 1.7137037626882813e-05, "loss": 1.8095, "step": 68440 }, { "epoch": 0.43022410779172315, "grad_norm": 7.228525161743164, "learning_rate": 1.7136618525938156e-05, "loss": 1.7663, "step": 68450 }, { "epoch": 0.43028696010842027, "grad_norm": 7.063848495483398, "learning_rate": 1.7136199424993504e-05, "loss": 1.706, "step": 68460 }, { "epoch": 0.4303498124251174, "grad_norm": 7.752283573150635, "learning_rate": 1.713578032404885e-05, "loss": 1.8401, "step": 68470 }, { "epoch": 0.43041266474181444, "grad_norm": 7.277685642242432, "learning_rate": 1.7135361223104198e-05, "loss": 1.65, "step": 68480 }, { "epoch": 0.43047551705851156, "grad_norm": 6.834372520446777, "learning_rate": 1.7134942122159545e-05, "loss": 1.7414, "step": 68490 }, { "epoch": 0.4305383693752087, "grad_norm": 7.359886169433594, "learning_rate": 1.7134523021214892e-05, "loss": 1.5748, "step": 68500 }, { "epoch": 0.4306012216919058, "grad_norm": 7.043386459350586, "learning_rate": 1.713410392027024e-05, "loss": 1.9207, "step": 68510 }, { "epoch": 0.4306640740086029, "grad_norm": 7.472099304199219, "learning_rate": 1.7133684819325586e-05, "loss": 1.9185, "step": 68520 }, { "epoch": 0.4307269263253, "grad_norm": 7.232861518859863, "learning_rate": 1.7133265718380933e-05, "loss": 1.8134, "step": 68530 }, { "epoch": 0.43078977864199713, "grad_norm": 7.0091423988342285, "learning_rate": 1.7132846617436277e-05, "loss": 1.7788, "step": 68540 }, { "epoch": 0.43085263095869425, "grad_norm": 7.006239414215088, "learning_rate": 1.7132427516491624e-05, "loss": 1.7941, "step": 68550 }, { "epoch": 0.43091548327539136, "grad_norm": 8.227099418640137, "learning_rate": 1.713200841554697e-05, "loss": 1.7631, "step": 68560 }, { "epoch": 0.4309783355920885, "grad_norm": 6.526317596435547, "learning_rate": 1.7131589314602318e-05, "loss": 1.5846, "step": 68570 }, { "epoch": 0.4310411879087856, "grad_norm": 7.143447399139404, "learning_rate": 1.7131170213657665e-05, "loss": 1.886, "step": 68580 }, { "epoch": 0.4311040402254827, "grad_norm": 6.684122085571289, "learning_rate": 1.713075111271301e-05, "loss": 1.9493, "step": 68590 }, { "epoch": 0.43116689254217977, "grad_norm": 7.29036808013916, "learning_rate": 1.7130332011768356e-05, "loss": 1.6145, "step": 68600 }, { "epoch": 0.4312297448588769, "grad_norm": 7.188809394836426, "learning_rate": 1.7129912910823703e-05, "loss": 1.6851, "step": 68610 }, { "epoch": 0.431292597175574, "grad_norm": 7.1672163009643555, "learning_rate": 1.712949380987905e-05, "loss": 1.6537, "step": 68620 }, { "epoch": 0.4313554494922711, "grad_norm": 6.353119850158691, "learning_rate": 1.7129074708934394e-05, "loss": 1.5965, "step": 68630 }, { "epoch": 0.43141830180896823, "grad_norm": 8.291101455688477, "learning_rate": 1.712865560798974e-05, "loss": 1.7326, "step": 68640 }, { "epoch": 0.43148115412566534, "grad_norm": 7.423107147216797, "learning_rate": 1.7128236507045088e-05, "loss": 1.7529, "step": 68650 }, { "epoch": 0.43154400644236246, "grad_norm": 7.277958393096924, "learning_rate": 1.7127817406100435e-05, "loss": 1.6147, "step": 68660 }, { "epoch": 0.4316068587590596, "grad_norm": 6.638142108917236, "learning_rate": 1.7127398305155782e-05, "loss": 1.7505, "step": 68670 }, { "epoch": 0.4316697110757567, "grad_norm": 7.172138690948486, "learning_rate": 1.7126979204211126e-05, "loss": 1.6679, "step": 68680 }, { "epoch": 0.4317325633924538, "grad_norm": 8.023433685302734, "learning_rate": 1.7126560103266473e-05, "loss": 1.7418, "step": 68690 }, { "epoch": 0.4317954157091509, "grad_norm": 7.74282693862915, "learning_rate": 1.712614100232182e-05, "loss": 1.6706, "step": 68700 }, { "epoch": 0.43185826802584804, "grad_norm": 7.428801536560059, "learning_rate": 1.7125721901377167e-05, "loss": 1.9181, "step": 68710 }, { "epoch": 0.43192112034254515, "grad_norm": 6.023731231689453, "learning_rate": 1.7125302800432514e-05, "loss": 1.7713, "step": 68720 }, { "epoch": 0.4319839726592422, "grad_norm": 4.9429731369018555, "learning_rate": 1.712488369948786e-05, "loss": 1.6503, "step": 68730 }, { "epoch": 0.4320468249759393, "grad_norm": 6.637831211090088, "learning_rate": 1.7124464598543208e-05, "loss": 1.6801, "step": 68740 }, { "epoch": 0.43210967729263644, "grad_norm": 6.555577754974365, "learning_rate": 1.7124045497598555e-05, "loss": 1.6762, "step": 68750 }, { "epoch": 0.43217252960933356, "grad_norm": 7.642809867858887, "learning_rate": 1.71236263966539e-05, "loss": 1.8938, "step": 68760 }, { "epoch": 0.43223538192603067, "grad_norm": 7.905426979064941, "learning_rate": 1.7123207295709246e-05, "loss": 1.7699, "step": 68770 }, { "epoch": 0.4322982342427278, "grad_norm": 6.7034759521484375, "learning_rate": 1.7122788194764593e-05, "loss": 1.6905, "step": 68780 }, { "epoch": 0.4323610865594249, "grad_norm": 6.467471122741699, "learning_rate": 1.712236909381994e-05, "loss": 1.7919, "step": 68790 }, { "epoch": 0.432423938876122, "grad_norm": 6.592196464538574, "learning_rate": 1.7121949992875287e-05, "loss": 2.0191, "step": 68800 }, { "epoch": 0.43248679119281913, "grad_norm": 6.909712791442871, "learning_rate": 1.712153089193063e-05, "loss": 1.6579, "step": 68810 }, { "epoch": 0.43254964350951625, "grad_norm": 7.228925704956055, "learning_rate": 1.7121111790985978e-05, "loss": 1.8762, "step": 68820 }, { "epoch": 0.43261249582621336, "grad_norm": 7.228713035583496, "learning_rate": 1.7120692690041325e-05, "loss": 1.8007, "step": 68830 }, { "epoch": 0.4326753481429105, "grad_norm": 5.614768981933594, "learning_rate": 1.7120273589096672e-05, "loss": 1.9519, "step": 68840 }, { "epoch": 0.4327382004596076, "grad_norm": 6.956311225891113, "learning_rate": 1.7119854488152016e-05, "loss": 1.7204, "step": 68850 }, { "epoch": 0.43280105277630465, "grad_norm": 6.350851535797119, "learning_rate": 1.7119435387207363e-05, "loss": 1.6029, "step": 68860 }, { "epoch": 0.43286390509300177, "grad_norm": 7.310161113739014, "learning_rate": 1.711901628626271e-05, "loss": 1.7023, "step": 68870 }, { "epoch": 0.4329267574096989, "grad_norm": 7.155307292938232, "learning_rate": 1.7118597185318057e-05, "loss": 1.7279, "step": 68880 }, { "epoch": 0.432989609726396, "grad_norm": 7.455556869506836, "learning_rate": 1.7118178084373404e-05, "loss": 1.7048, "step": 68890 }, { "epoch": 0.4330524620430931, "grad_norm": 7.175830364227295, "learning_rate": 1.711775898342875e-05, "loss": 1.7005, "step": 68900 }, { "epoch": 0.4331153143597902, "grad_norm": 6.088528633117676, "learning_rate": 1.7117339882484098e-05, "loss": 1.6173, "step": 68910 }, { "epoch": 0.43317816667648734, "grad_norm": 6.129864692687988, "learning_rate": 1.711692078153944e-05, "loss": 1.7082, "step": 68920 }, { "epoch": 0.43324101899318446, "grad_norm": 7.055078983306885, "learning_rate": 1.711650168059479e-05, "loss": 1.8112, "step": 68930 }, { "epoch": 0.43330387130988157, "grad_norm": 6.802575588226318, "learning_rate": 1.7116082579650136e-05, "loss": 1.9003, "step": 68940 }, { "epoch": 0.4333667236265787, "grad_norm": 6.301969051361084, "learning_rate": 1.7115663478705483e-05, "loss": 1.7973, "step": 68950 }, { "epoch": 0.4334295759432758, "grad_norm": 8.61343765258789, "learning_rate": 1.711524437776083e-05, "loss": 1.7553, "step": 68960 }, { "epoch": 0.4334924282599729, "grad_norm": 6.700798511505127, "learning_rate": 1.7114825276816177e-05, "loss": 1.6193, "step": 68970 }, { "epoch": 0.43355528057667003, "grad_norm": 6.971966743469238, "learning_rate": 1.711440617587152e-05, "loss": 1.6059, "step": 68980 }, { "epoch": 0.4336181328933671, "grad_norm": 7.719508171081543, "learning_rate": 1.7113987074926868e-05, "loss": 1.9536, "step": 68990 }, { "epoch": 0.4336809852100642, "grad_norm": 6.376918792724609, "learning_rate": 1.7113567973982215e-05, "loss": 1.7461, "step": 69000 }, { "epoch": 0.4337438375267613, "grad_norm": 5.054595947265625, "learning_rate": 1.7113148873037562e-05, "loss": 1.5952, "step": 69010 }, { "epoch": 0.43380668984345844, "grad_norm": 7.998342037200928, "learning_rate": 1.711272977209291e-05, "loss": 1.6446, "step": 69020 }, { "epoch": 0.43386954216015555, "grad_norm": 6.992016315460205, "learning_rate": 1.7112310671148253e-05, "loss": 1.982, "step": 69030 }, { "epoch": 0.43393239447685267, "grad_norm": 6.751170635223389, "learning_rate": 1.71118915702036e-05, "loss": 1.7092, "step": 69040 }, { "epoch": 0.4339952467935498, "grad_norm": 7.379878997802734, "learning_rate": 1.7111472469258947e-05, "loss": 1.8219, "step": 69050 }, { "epoch": 0.4340580991102469, "grad_norm": 7.311999797821045, "learning_rate": 1.7111053368314294e-05, "loss": 1.82, "step": 69060 }, { "epoch": 0.434120951426944, "grad_norm": 6.659695148468018, "learning_rate": 1.7110634267369638e-05, "loss": 1.8394, "step": 69070 }, { "epoch": 0.43418380374364113, "grad_norm": 7.174983978271484, "learning_rate": 1.7110215166424985e-05, "loss": 1.5287, "step": 69080 }, { "epoch": 0.43424665606033824, "grad_norm": 7.208707809448242, "learning_rate": 1.7109796065480332e-05, "loss": 2.0224, "step": 69090 }, { "epoch": 0.43430950837703536, "grad_norm": 7.024357318878174, "learning_rate": 1.710937696453568e-05, "loss": 1.9896, "step": 69100 }, { "epoch": 0.4343723606937325, "grad_norm": 7.056058883666992, "learning_rate": 1.7108957863591026e-05, "loss": 1.7408, "step": 69110 }, { "epoch": 0.43443521301042953, "grad_norm": 6.121402740478516, "learning_rate": 1.7108538762646373e-05, "loss": 1.6714, "step": 69120 }, { "epoch": 0.43449806532712665, "grad_norm": 5.951923370361328, "learning_rate": 1.710811966170172e-05, "loss": 1.7484, "step": 69130 }, { "epoch": 0.43456091764382376, "grad_norm": 6.897318363189697, "learning_rate": 1.7107700560757067e-05, "loss": 1.6989, "step": 69140 }, { "epoch": 0.4346237699605209, "grad_norm": 6.115614414215088, "learning_rate": 1.7107281459812414e-05, "loss": 1.9817, "step": 69150 }, { "epoch": 0.434686622277218, "grad_norm": 7.430760383605957, "learning_rate": 1.7106862358867758e-05, "loss": 1.8993, "step": 69160 }, { "epoch": 0.4347494745939151, "grad_norm": 7.004673480987549, "learning_rate": 1.7106443257923105e-05, "loss": 1.5811, "step": 69170 }, { "epoch": 0.4348123269106122, "grad_norm": 7.1759819984436035, "learning_rate": 1.7106024156978452e-05, "loss": 1.7459, "step": 69180 }, { "epoch": 0.43487517922730934, "grad_norm": 7.4145097732543945, "learning_rate": 1.71056050560338e-05, "loss": 1.7356, "step": 69190 }, { "epoch": 0.43493803154400645, "grad_norm": 7.906942367553711, "learning_rate": 1.7105185955089146e-05, "loss": 1.8496, "step": 69200 }, { "epoch": 0.43500088386070357, "grad_norm": 5.566155910491943, "learning_rate": 1.710476685414449e-05, "loss": 1.6328, "step": 69210 }, { "epoch": 0.4350637361774007, "grad_norm": 7.766795635223389, "learning_rate": 1.7104347753199837e-05, "loss": 1.777, "step": 69220 }, { "epoch": 0.4351265884940978, "grad_norm": 7.152865409851074, "learning_rate": 1.7103928652255184e-05, "loss": 1.6119, "step": 69230 }, { "epoch": 0.43518944081079486, "grad_norm": 6.858283519744873, "learning_rate": 1.710350955131053e-05, "loss": 1.8616, "step": 69240 }, { "epoch": 0.435252293127492, "grad_norm": 6.3837103843688965, "learning_rate": 1.7103090450365875e-05, "loss": 1.8093, "step": 69250 }, { "epoch": 0.4353151454441891, "grad_norm": 6.967526435852051, "learning_rate": 1.7102671349421222e-05, "loss": 1.8023, "step": 69260 }, { "epoch": 0.4353779977608862, "grad_norm": 6.755220890045166, "learning_rate": 1.710225224847657e-05, "loss": 1.7628, "step": 69270 }, { "epoch": 0.4354408500775833, "grad_norm": 6.2029337882995605, "learning_rate": 1.7101833147531916e-05, "loss": 1.6785, "step": 69280 }, { "epoch": 0.43550370239428043, "grad_norm": 7.696803569793701, "learning_rate": 1.7101414046587263e-05, "loss": 1.7776, "step": 69290 }, { "epoch": 0.43556655471097755, "grad_norm": 6.981212139129639, "learning_rate": 1.7100994945642607e-05, "loss": 1.6779, "step": 69300 }, { "epoch": 0.43562940702767466, "grad_norm": 6.717888832092285, "learning_rate": 1.7100575844697954e-05, "loss": 1.6449, "step": 69310 }, { "epoch": 0.4356922593443718, "grad_norm": 7.516409873962402, "learning_rate": 1.71001567437533e-05, "loss": 1.714, "step": 69320 }, { "epoch": 0.4357551116610689, "grad_norm": 6.508599281311035, "learning_rate": 1.7099737642808648e-05, "loss": 1.8393, "step": 69330 }, { "epoch": 0.435817963977766, "grad_norm": 7.194772243499756, "learning_rate": 1.7099318541863995e-05, "loss": 1.6077, "step": 69340 }, { "epoch": 0.4358808162944631, "grad_norm": 6.115171432495117, "learning_rate": 1.7098899440919342e-05, "loss": 1.8525, "step": 69350 }, { "epoch": 0.43594366861116024, "grad_norm": 6.4120378494262695, "learning_rate": 1.709848033997469e-05, "loss": 1.7427, "step": 69360 }, { "epoch": 0.4360065209278573, "grad_norm": 6.608808994293213, "learning_rate": 1.7098061239030036e-05, "loss": 1.746, "step": 69370 }, { "epoch": 0.4360693732445544, "grad_norm": 6.515633583068848, "learning_rate": 1.709764213808538e-05, "loss": 1.8448, "step": 69380 }, { "epoch": 0.43613222556125153, "grad_norm": 6.324481964111328, "learning_rate": 1.7097223037140727e-05, "loss": 1.7625, "step": 69390 }, { "epoch": 0.43619507787794864, "grad_norm": 5.291093349456787, "learning_rate": 1.7096803936196074e-05, "loss": 1.6256, "step": 69400 }, { "epoch": 0.43625793019464576, "grad_norm": 8.002639770507812, "learning_rate": 1.709638483525142e-05, "loss": 2.08, "step": 69410 }, { "epoch": 0.4363207825113429, "grad_norm": 6.381542205810547, "learning_rate": 1.7095965734306768e-05, "loss": 1.8249, "step": 69420 }, { "epoch": 0.43638363482804, "grad_norm": 6.60446310043335, "learning_rate": 1.7095546633362112e-05, "loss": 1.6198, "step": 69430 }, { "epoch": 0.4364464871447371, "grad_norm": 6.089707374572754, "learning_rate": 1.709512753241746e-05, "loss": 1.5444, "step": 69440 }, { "epoch": 0.4365093394614342, "grad_norm": 7.365067481994629, "learning_rate": 1.7094708431472806e-05, "loss": 1.6366, "step": 69450 }, { "epoch": 0.43657219177813134, "grad_norm": 6.4489593505859375, "learning_rate": 1.7094289330528153e-05, "loss": 1.7174, "step": 69460 }, { "epoch": 0.43663504409482845, "grad_norm": 6.833393573760986, "learning_rate": 1.7093870229583497e-05, "loss": 2.04, "step": 69470 }, { "epoch": 0.43669789641152557, "grad_norm": 8.178645133972168, "learning_rate": 1.7093451128638844e-05, "loss": 2.1001, "step": 69480 }, { "epoch": 0.4367607487282227, "grad_norm": 6.296994686126709, "learning_rate": 1.709303202769419e-05, "loss": 1.497, "step": 69490 }, { "epoch": 0.43682360104491974, "grad_norm": 6.104201793670654, "learning_rate": 1.7092612926749538e-05, "loss": 1.8025, "step": 69500 }, { "epoch": 0.43688645336161686, "grad_norm": 8.042522430419922, "learning_rate": 1.7092193825804885e-05, "loss": 1.8581, "step": 69510 }, { "epoch": 0.43694930567831397, "grad_norm": 7.047634124755859, "learning_rate": 1.7091774724860232e-05, "loss": 1.7825, "step": 69520 }, { "epoch": 0.4370121579950111, "grad_norm": 5.55924654006958, "learning_rate": 1.709135562391558e-05, "loss": 1.6944, "step": 69530 }, { "epoch": 0.4370750103117082, "grad_norm": 7.6144938468933105, "learning_rate": 1.7090936522970926e-05, "loss": 1.6686, "step": 69540 }, { "epoch": 0.4371378626284053, "grad_norm": 8.387451171875, "learning_rate": 1.709051742202627e-05, "loss": 1.7809, "step": 69550 }, { "epoch": 0.43720071494510243, "grad_norm": 5.343925952911377, "learning_rate": 1.7090098321081617e-05, "loss": 1.7923, "step": 69560 }, { "epoch": 0.43726356726179955, "grad_norm": 8.009215354919434, "learning_rate": 1.7089679220136964e-05, "loss": 1.6454, "step": 69570 }, { "epoch": 0.43732641957849666, "grad_norm": 6.277082920074463, "learning_rate": 1.708926011919231e-05, "loss": 1.6737, "step": 69580 }, { "epoch": 0.4373892718951938, "grad_norm": 8.452055931091309, "learning_rate": 1.7088841018247658e-05, "loss": 1.7448, "step": 69590 }, { "epoch": 0.4374521242118909, "grad_norm": 6.058176040649414, "learning_rate": 1.7088421917303002e-05, "loss": 1.7696, "step": 69600 }, { "epoch": 0.437514976528588, "grad_norm": 7.19420051574707, "learning_rate": 1.708800281635835e-05, "loss": 1.8384, "step": 69610 }, { "epoch": 0.4375778288452851, "grad_norm": 6.660956382751465, "learning_rate": 1.7087583715413696e-05, "loss": 1.5671, "step": 69620 }, { "epoch": 0.4376406811619822, "grad_norm": 6.775627136230469, "learning_rate": 1.7087164614469043e-05, "loss": 1.8158, "step": 69630 }, { "epoch": 0.4377035334786793, "grad_norm": 7.618368625640869, "learning_rate": 1.708674551352439e-05, "loss": 1.8476, "step": 69640 }, { "epoch": 0.4377663857953764, "grad_norm": 6.1381964683532715, "learning_rate": 1.7086326412579734e-05, "loss": 1.8181, "step": 69650 }, { "epoch": 0.4378292381120735, "grad_norm": 6.716647624969482, "learning_rate": 1.708590731163508e-05, "loss": 1.5799, "step": 69660 }, { "epoch": 0.43789209042877064, "grad_norm": 6.992799758911133, "learning_rate": 1.7085488210690428e-05, "loss": 1.9658, "step": 69670 }, { "epoch": 0.43795494274546776, "grad_norm": 6.472326755523682, "learning_rate": 1.7085069109745775e-05, "loss": 1.7605, "step": 69680 }, { "epoch": 0.43801779506216487, "grad_norm": 6.143565654754639, "learning_rate": 1.708465000880112e-05, "loss": 1.5635, "step": 69690 }, { "epoch": 0.438080647378862, "grad_norm": 6.250063896179199, "learning_rate": 1.7084230907856466e-05, "loss": 1.5388, "step": 69700 }, { "epoch": 0.4381434996955591, "grad_norm": 7.396320819854736, "learning_rate": 1.7083811806911813e-05, "loss": 1.6943, "step": 69710 }, { "epoch": 0.4382063520122562, "grad_norm": 6.950412273406982, "learning_rate": 1.708339270596716e-05, "loss": 1.6593, "step": 69720 }, { "epoch": 0.43826920432895333, "grad_norm": 7.082437038421631, "learning_rate": 1.7082973605022507e-05, "loss": 1.997, "step": 69730 }, { "epoch": 0.43833205664565045, "grad_norm": 7.463531970977783, "learning_rate": 1.7082554504077854e-05, "loss": 1.683, "step": 69740 }, { "epoch": 0.4383949089623475, "grad_norm": 6.712361812591553, "learning_rate": 1.70821354031332e-05, "loss": 2.094, "step": 69750 }, { "epoch": 0.4384577612790446, "grad_norm": 6.187252998352051, "learning_rate": 1.7081716302188548e-05, "loss": 1.6655, "step": 69760 }, { "epoch": 0.43852061359574174, "grad_norm": 5.67270040512085, "learning_rate": 1.7081297201243895e-05, "loss": 1.5296, "step": 69770 }, { "epoch": 0.43858346591243885, "grad_norm": 5.254736423492432, "learning_rate": 1.708087810029924e-05, "loss": 1.8248, "step": 69780 }, { "epoch": 0.43864631822913597, "grad_norm": 7.134040832519531, "learning_rate": 1.7080458999354586e-05, "loss": 1.8283, "step": 69790 }, { "epoch": 0.4387091705458331, "grad_norm": 7.79978609085083, "learning_rate": 1.7080039898409933e-05, "loss": 1.7114, "step": 69800 }, { "epoch": 0.4387720228625302, "grad_norm": 6.676272869110107, "learning_rate": 1.707962079746528e-05, "loss": 1.6352, "step": 69810 }, { "epoch": 0.4388348751792273, "grad_norm": 6.6913299560546875, "learning_rate": 1.7079201696520627e-05, "loss": 1.6483, "step": 69820 }, { "epoch": 0.43889772749592443, "grad_norm": 6.569377422332764, "learning_rate": 1.707878259557597e-05, "loss": 1.6161, "step": 69830 }, { "epoch": 0.43896057981262154, "grad_norm": 6.990094184875488, "learning_rate": 1.7078363494631318e-05, "loss": 1.7794, "step": 69840 }, { "epoch": 0.43902343212931866, "grad_norm": 8.219917297363281, "learning_rate": 1.7077944393686665e-05, "loss": 1.8896, "step": 69850 }, { "epoch": 0.4390862844460158, "grad_norm": 7.872071266174316, "learning_rate": 1.7077525292742012e-05, "loss": 1.7814, "step": 69860 }, { "epoch": 0.4391491367627129, "grad_norm": 6.119419574737549, "learning_rate": 1.7077106191797356e-05, "loss": 1.578, "step": 69870 }, { "epoch": 0.43921198907940995, "grad_norm": 6.6119608879089355, "learning_rate": 1.7076687090852703e-05, "loss": 1.9156, "step": 69880 }, { "epoch": 0.43927484139610706, "grad_norm": 7.422851085662842, "learning_rate": 1.707626798990805e-05, "loss": 1.5678, "step": 69890 }, { "epoch": 0.4393376937128042, "grad_norm": 8.162243843078613, "learning_rate": 1.7075848888963397e-05, "loss": 1.7778, "step": 69900 }, { "epoch": 0.4394005460295013, "grad_norm": 10.364338874816895, "learning_rate": 1.7075429788018744e-05, "loss": 1.9557, "step": 69910 }, { "epoch": 0.4394633983461984, "grad_norm": 7.00361442565918, "learning_rate": 1.707501068707409e-05, "loss": 1.6572, "step": 69920 }, { "epoch": 0.4395262506628955, "grad_norm": 6.680110454559326, "learning_rate": 1.7074591586129435e-05, "loss": 1.6079, "step": 69930 }, { "epoch": 0.43958910297959264, "grad_norm": 6.4366984367370605, "learning_rate": 1.7074172485184782e-05, "loss": 1.7348, "step": 69940 }, { "epoch": 0.43965195529628975, "grad_norm": 5.561446189880371, "learning_rate": 1.707375338424013e-05, "loss": 1.5654, "step": 69950 }, { "epoch": 0.43971480761298687, "grad_norm": 5.447123050689697, "learning_rate": 1.7073334283295476e-05, "loss": 1.6663, "step": 69960 }, { "epoch": 0.439777659929684, "grad_norm": 7.836920738220215, "learning_rate": 1.7072915182350823e-05, "loss": 1.7213, "step": 69970 }, { "epoch": 0.4398405122463811, "grad_norm": 6.675960540771484, "learning_rate": 1.707249608140617e-05, "loss": 1.5798, "step": 69980 }, { "epoch": 0.4399033645630782, "grad_norm": 6.375387668609619, "learning_rate": 1.7072076980461517e-05, "loss": 1.5938, "step": 69990 }, { "epoch": 0.43996621687977533, "grad_norm": 6.819417476654053, "learning_rate": 1.707165787951686e-05, "loss": 1.6739, "step": 70000 }, { "epoch": 0.4400290691964724, "grad_norm": 7.389351844787598, "learning_rate": 1.7071238778572208e-05, "loss": 1.882, "step": 70010 }, { "epoch": 0.4400919215131695, "grad_norm": 7.2243499755859375, "learning_rate": 1.7070819677627555e-05, "loss": 1.7576, "step": 70020 }, { "epoch": 0.4401547738298666, "grad_norm": 6.548774719238281, "learning_rate": 1.7070400576682902e-05, "loss": 1.5623, "step": 70030 }, { "epoch": 0.44021762614656373, "grad_norm": 9.590810775756836, "learning_rate": 1.706998147573825e-05, "loss": 1.6819, "step": 70040 }, { "epoch": 0.44028047846326085, "grad_norm": 7.229656219482422, "learning_rate": 1.7069562374793593e-05, "loss": 1.8326, "step": 70050 }, { "epoch": 0.44034333077995796, "grad_norm": 6.59816312789917, "learning_rate": 1.706914327384894e-05, "loss": 1.7817, "step": 70060 }, { "epoch": 0.4404061830966551, "grad_norm": 6.553726673126221, "learning_rate": 1.7068724172904287e-05, "loss": 1.923, "step": 70070 }, { "epoch": 0.4404690354133522, "grad_norm": 7.365018367767334, "learning_rate": 1.7068305071959634e-05, "loss": 1.8541, "step": 70080 }, { "epoch": 0.4405318877300493, "grad_norm": 6.91716194152832, "learning_rate": 1.7067885971014978e-05, "loss": 1.8022, "step": 70090 }, { "epoch": 0.4405947400467464, "grad_norm": 7.806150436401367, "learning_rate": 1.7067466870070325e-05, "loss": 1.695, "step": 70100 }, { "epoch": 0.44065759236344354, "grad_norm": 8.75064754486084, "learning_rate": 1.7067047769125672e-05, "loss": 2.0616, "step": 70110 }, { "epoch": 0.44072044468014065, "grad_norm": 7.268632411956787, "learning_rate": 1.706662866818102e-05, "loss": 1.4863, "step": 70120 }, { "epoch": 0.44078329699683777, "grad_norm": 7.369198799133301, "learning_rate": 1.7066209567236366e-05, "loss": 1.5836, "step": 70130 }, { "epoch": 0.44084614931353483, "grad_norm": 7.380405426025391, "learning_rate": 1.7065790466291713e-05, "loss": 1.8607, "step": 70140 }, { "epoch": 0.44090900163023194, "grad_norm": 7.630134582519531, "learning_rate": 1.706537136534706e-05, "loss": 1.9416, "step": 70150 }, { "epoch": 0.44097185394692906, "grad_norm": 7.153118133544922, "learning_rate": 1.7064952264402407e-05, "loss": 1.823, "step": 70160 }, { "epoch": 0.4410347062636262, "grad_norm": 6.811437606811523, "learning_rate": 1.7064533163457754e-05, "loss": 1.6643, "step": 70170 }, { "epoch": 0.4410975585803233, "grad_norm": 6.356449604034424, "learning_rate": 1.7064114062513098e-05, "loss": 1.6124, "step": 70180 }, { "epoch": 0.4411604108970204, "grad_norm": 5.7468671798706055, "learning_rate": 1.7063694961568445e-05, "loss": 1.8172, "step": 70190 }, { "epoch": 0.4412232632137175, "grad_norm": 7.016430377960205, "learning_rate": 1.7063275860623792e-05, "loss": 1.9987, "step": 70200 }, { "epoch": 0.44128611553041464, "grad_norm": 6.297787189483643, "learning_rate": 1.706285675967914e-05, "loss": 1.714, "step": 70210 }, { "epoch": 0.44134896784711175, "grad_norm": 6.5773186683654785, "learning_rate": 1.7062437658734483e-05, "loss": 1.7148, "step": 70220 }, { "epoch": 0.44141182016380887, "grad_norm": 6.49793815612793, "learning_rate": 1.706201855778983e-05, "loss": 1.8228, "step": 70230 }, { "epoch": 0.441474672480506, "grad_norm": 6.518189430236816, "learning_rate": 1.7061599456845177e-05, "loss": 1.6114, "step": 70240 }, { "epoch": 0.4415375247972031, "grad_norm": 8.319125175476074, "learning_rate": 1.7061180355900524e-05, "loss": 1.631, "step": 70250 }, { "epoch": 0.44160037711390016, "grad_norm": 5.953863143920898, "learning_rate": 1.706076125495587e-05, "loss": 1.8944, "step": 70260 }, { "epoch": 0.44166322943059727, "grad_norm": 5.739317893981934, "learning_rate": 1.7060342154011215e-05, "loss": 1.566, "step": 70270 }, { "epoch": 0.4417260817472944, "grad_norm": 6.951610088348389, "learning_rate": 1.7059923053066562e-05, "loss": 1.8221, "step": 70280 }, { "epoch": 0.4417889340639915, "grad_norm": 6.231076240539551, "learning_rate": 1.705950395212191e-05, "loss": 1.5316, "step": 70290 }, { "epoch": 0.4418517863806886, "grad_norm": 7.177138805389404, "learning_rate": 1.7059084851177256e-05, "loss": 1.8151, "step": 70300 }, { "epoch": 0.44191463869738573, "grad_norm": 5.447716236114502, "learning_rate": 1.70586657502326e-05, "loss": 1.5773, "step": 70310 }, { "epoch": 0.44197749101408285, "grad_norm": 7.0117621421813965, "learning_rate": 1.7058246649287947e-05, "loss": 1.6816, "step": 70320 }, { "epoch": 0.44204034333077996, "grad_norm": 6.275534152984619, "learning_rate": 1.7057827548343294e-05, "loss": 1.7795, "step": 70330 }, { "epoch": 0.4421031956474771, "grad_norm": 7.140209197998047, "learning_rate": 1.705740844739864e-05, "loss": 1.6769, "step": 70340 }, { "epoch": 0.4421660479641742, "grad_norm": 7.414947032928467, "learning_rate": 1.7056989346453988e-05, "loss": 1.8123, "step": 70350 }, { "epoch": 0.4422289002808713, "grad_norm": 7.385918140411377, "learning_rate": 1.7056570245509335e-05, "loss": 1.6472, "step": 70360 }, { "epoch": 0.4422917525975684, "grad_norm": 5.923056125640869, "learning_rate": 1.7056151144564682e-05, "loss": 1.6644, "step": 70370 }, { "epoch": 0.44235460491426554, "grad_norm": 6.5220627784729, "learning_rate": 1.705573204362003e-05, "loss": 1.8159, "step": 70380 }, { "epoch": 0.4424174572309626, "grad_norm": 7.185425758361816, "learning_rate": 1.7055312942675376e-05, "loss": 1.6309, "step": 70390 }, { "epoch": 0.4424803095476597, "grad_norm": 9.08899974822998, "learning_rate": 1.705489384173072e-05, "loss": 1.7753, "step": 70400 }, { "epoch": 0.4425431618643568, "grad_norm": 7.046308994293213, "learning_rate": 1.7054474740786067e-05, "loss": 1.6317, "step": 70410 }, { "epoch": 0.44260601418105394, "grad_norm": 6.433578014373779, "learning_rate": 1.7054055639841414e-05, "loss": 1.6979, "step": 70420 }, { "epoch": 0.44266886649775106, "grad_norm": 8.256340980529785, "learning_rate": 1.705363653889676e-05, "loss": 1.6845, "step": 70430 }, { "epoch": 0.44273171881444817, "grad_norm": 6.462786674499512, "learning_rate": 1.705321743795211e-05, "loss": 1.801, "step": 70440 }, { "epoch": 0.4427945711311453, "grad_norm": 6.751453399658203, "learning_rate": 1.7052798337007452e-05, "loss": 1.7099, "step": 70450 }, { "epoch": 0.4428574234478424, "grad_norm": 7.0939483642578125, "learning_rate": 1.70523792360628e-05, "loss": 1.6693, "step": 70460 }, { "epoch": 0.4429202757645395, "grad_norm": 5.824983596801758, "learning_rate": 1.7051960135118146e-05, "loss": 1.6418, "step": 70470 }, { "epoch": 0.44298312808123663, "grad_norm": 7.333545684814453, "learning_rate": 1.7051541034173493e-05, "loss": 1.8104, "step": 70480 }, { "epoch": 0.44304598039793375, "grad_norm": 7.342357158660889, "learning_rate": 1.7051121933228837e-05, "loss": 1.7328, "step": 70490 }, { "epoch": 0.44310883271463086, "grad_norm": 6.030719757080078, "learning_rate": 1.7050702832284184e-05, "loss": 1.7738, "step": 70500 }, { "epoch": 0.443171685031328, "grad_norm": 6.481091022491455, "learning_rate": 1.705028373133953e-05, "loss": 1.7931, "step": 70510 }, { "epoch": 0.44323453734802504, "grad_norm": 6.386503219604492, "learning_rate": 1.7049864630394878e-05, "loss": 1.8141, "step": 70520 }, { "epoch": 0.44329738966472215, "grad_norm": 6.556507587432861, "learning_rate": 1.7049445529450225e-05, "loss": 1.9126, "step": 70530 }, { "epoch": 0.44336024198141927, "grad_norm": 7.444990634918213, "learning_rate": 1.7049026428505572e-05, "loss": 1.9579, "step": 70540 }, { "epoch": 0.4434230942981164, "grad_norm": 6.353214740753174, "learning_rate": 1.7048607327560916e-05, "loss": 1.6042, "step": 70550 }, { "epoch": 0.4434859466148135, "grad_norm": 6.988030433654785, "learning_rate": 1.7048188226616263e-05, "loss": 1.8534, "step": 70560 }, { "epoch": 0.4435487989315106, "grad_norm": 6.261846542358398, "learning_rate": 1.704776912567161e-05, "loss": 1.8178, "step": 70570 }, { "epoch": 0.44361165124820773, "grad_norm": 7.959524631500244, "learning_rate": 1.7047350024726957e-05, "loss": 1.8082, "step": 70580 }, { "epoch": 0.44367450356490484, "grad_norm": 7.8182806968688965, "learning_rate": 1.7046930923782304e-05, "loss": 1.6912, "step": 70590 }, { "epoch": 0.44373735588160196, "grad_norm": 6.153756141662598, "learning_rate": 1.704651182283765e-05, "loss": 1.7776, "step": 70600 }, { "epoch": 0.4438002081982991, "grad_norm": 6.9526753425598145, "learning_rate": 1.7046092721893e-05, "loss": 1.736, "step": 70610 }, { "epoch": 0.4438630605149962, "grad_norm": 8.29808521270752, "learning_rate": 1.7045673620948342e-05, "loss": 1.6857, "step": 70620 }, { "epoch": 0.4439259128316933, "grad_norm": 7.707038402557373, "learning_rate": 1.704525452000369e-05, "loss": 1.8685, "step": 70630 }, { "epoch": 0.4439887651483904, "grad_norm": 7.182012557983398, "learning_rate": 1.7044835419059036e-05, "loss": 1.7884, "step": 70640 }, { "epoch": 0.4440516174650875, "grad_norm": 6.495720386505127, "learning_rate": 1.7044416318114383e-05, "loss": 1.6684, "step": 70650 }, { "epoch": 0.4441144697817846, "grad_norm": 7.441178321838379, "learning_rate": 1.704399721716973e-05, "loss": 1.5172, "step": 70660 }, { "epoch": 0.4441773220984817, "grad_norm": 6.320635795593262, "learning_rate": 1.7043578116225074e-05, "loss": 1.519, "step": 70670 }, { "epoch": 0.4442401744151788, "grad_norm": 6.959654808044434, "learning_rate": 1.704315901528042e-05, "loss": 1.7838, "step": 70680 }, { "epoch": 0.44430302673187594, "grad_norm": 7.093775749206543, "learning_rate": 1.7042739914335768e-05, "loss": 1.6259, "step": 70690 }, { "epoch": 0.44436587904857305, "grad_norm": 8.001116752624512, "learning_rate": 1.7042320813391115e-05, "loss": 1.9448, "step": 70700 }, { "epoch": 0.44442873136527017, "grad_norm": 7.232832431793213, "learning_rate": 1.704190171244646e-05, "loss": 1.7938, "step": 70710 }, { "epoch": 0.4444915836819673, "grad_norm": 7.079518795013428, "learning_rate": 1.7041482611501806e-05, "loss": 1.5559, "step": 70720 }, { "epoch": 0.4445544359986644, "grad_norm": 8.122894287109375, "learning_rate": 1.7041063510557153e-05, "loss": 1.6795, "step": 70730 }, { "epoch": 0.4446172883153615, "grad_norm": 6.774159908294678, "learning_rate": 1.70406444096125e-05, "loss": 1.6674, "step": 70740 }, { "epoch": 0.44468014063205863, "grad_norm": 5.806540012359619, "learning_rate": 1.7040225308667847e-05, "loss": 1.5207, "step": 70750 }, { "epoch": 0.44474299294875574, "grad_norm": 7.335987091064453, "learning_rate": 1.7039806207723194e-05, "loss": 1.7307, "step": 70760 }, { "epoch": 0.4448058452654528, "grad_norm": 6.601565837860107, "learning_rate": 1.703938710677854e-05, "loss": 1.9337, "step": 70770 }, { "epoch": 0.4448686975821499, "grad_norm": 7.305120944976807, "learning_rate": 1.703896800583389e-05, "loss": 2.0206, "step": 70780 }, { "epoch": 0.44493154989884703, "grad_norm": 5.766336917877197, "learning_rate": 1.7038548904889235e-05, "loss": 1.6814, "step": 70790 }, { "epoch": 0.44499440221554415, "grad_norm": 7.237926483154297, "learning_rate": 1.703812980394458e-05, "loss": 1.6617, "step": 70800 }, { "epoch": 0.44505725453224126, "grad_norm": 8.00814151763916, "learning_rate": 1.7037710702999926e-05, "loss": 1.7771, "step": 70810 }, { "epoch": 0.4451201068489384, "grad_norm": 7.366685390472412, "learning_rate": 1.7037291602055273e-05, "loss": 1.5731, "step": 70820 }, { "epoch": 0.4451829591656355, "grad_norm": 6.8207855224609375, "learning_rate": 1.703687250111062e-05, "loss": 1.8505, "step": 70830 }, { "epoch": 0.4452458114823326, "grad_norm": 6.556506633758545, "learning_rate": 1.7036453400165964e-05, "loss": 1.8803, "step": 70840 }, { "epoch": 0.4453086637990297, "grad_norm": 6.4768524169921875, "learning_rate": 1.703603429922131e-05, "loss": 1.9091, "step": 70850 }, { "epoch": 0.44537151611572684, "grad_norm": 7.1526970863342285, "learning_rate": 1.7035615198276658e-05, "loss": 1.7211, "step": 70860 }, { "epoch": 0.44543436843242395, "grad_norm": 7.244419574737549, "learning_rate": 1.7035196097332005e-05, "loss": 1.7652, "step": 70870 }, { "epoch": 0.44549722074912107, "grad_norm": 6.412376880645752, "learning_rate": 1.7034776996387352e-05, "loss": 1.5863, "step": 70880 }, { "epoch": 0.4455600730658182, "grad_norm": 5.949934482574463, "learning_rate": 1.7034357895442696e-05, "loss": 1.8857, "step": 70890 }, { "epoch": 0.44562292538251524, "grad_norm": 7.144877910614014, "learning_rate": 1.7033938794498043e-05, "loss": 1.7353, "step": 70900 }, { "epoch": 0.44568577769921236, "grad_norm": 6.605494499206543, "learning_rate": 1.703351969355339e-05, "loss": 1.5508, "step": 70910 }, { "epoch": 0.4457486300159095, "grad_norm": 5.3954033851623535, "learning_rate": 1.7033100592608737e-05, "loss": 1.7277, "step": 70920 }, { "epoch": 0.4458114823326066, "grad_norm": 6.628122806549072, "learning_rate": 1.703268149166408e-05, "loss": 1.8054, "step": 70930 }, { "epoch": 0.4458743346493037, "grad_norm": 7.13183069229126, "learning_rate": 1.7032262390719428e-05, "loss": 1.7426, "step": 70940 }, { "epoch": 0.4459371869660008, "grad_norm": 7.770249366760254, "learning_rate": 1.7031843289774775e-05, "loss": 1.682, "step": 70950 }, { "epoch": 0.44600003928269794, "grad_norm": 7.339083194732666, "learning_rate": 1.7031424188830122e-05, "loss": 1.8275, "step": 70960 }, { "epoch": 0.44606289159939505, "grad_norm": 6.73902702331543, "learning_rate": 1.703100508788547e-05, "loss": 1.7288, "step": 70970 }, { "epoch": 0.44612574391609217, "grad_norm": 8.257696151733398, "learning_rate": 1.7030585986940816e-05, "loss": 1.6587, "step": 70980 }, { "epoch": 0.4461885962327893, "grad_norm": 5.991543292999268, "learning_rate": 1.7030166885996163e-05, "loss": 1.5641, "step": 70990 }, { "epoch": 0.4462514485494864, "grad_norm": 6.239089488983154, "learning_rate": 1.702974778505151e-05, "loss": 1.7716, "step": 71000 }, { "epoch": 0.4463143008661835, "grad_norm": 7.279812812805176, "learning_rate": 1.7029328684106857e-05, "loss": 1.8226, "step": 71010 }, { "epoch": 0.4463771531828806, "grad_norm": 8.477140426635742, "learning_rate": 1.70289095831622e-05, "loss": 1.7446, "step": 71020 }, { "epoch": 0.4464400054995777, "grad_norm": 6.918874263763428, "learning_rate": 1.7028490482217548e-05, "loss": 1.5897, "step": 71030 }, { "epoch": 0.4465028578162748, "grad_norm": 6.781617164611816, "learning_rate": 1.7028071381272895e-05, "loss": 1.5441, "step": 71040 }, { "epoch": 0.4465657101329719, "grad_norm": 6.946547031402588, "learning_rate": 1.7027652280328242e-05, "loss": 1.5858, "step": 71050 }, { "epoch": 0.44662856244966903, "grad_norm": 6.65471887588501, "learning_rate": 1.702723317938359e-05, "loss": 1.7303, "step": 71060 }, { "epoch": 0.44669141476636615, "grad_norm": 5.427356243133545, "learning_rate": 1.7026814078438933e-05, "loss": 1.6519, "step": 71070 }, { "epoch": 0.44675426708306326, "grad_norm": 7.257509708404541, "learning_rate": 1.702639497749428e-05, "loss": 1.9381, "step": 71080 }, { "epoch": 0.4468171193997604, "grad_norm": 6.8030781745910645, "learning_rate": 1.7025975876549627e-05, "loss": 1.8593, "step": 71090 }, { "epoch": 0.4468799717164575, "grad_norm": 6.951048374176025, "learning_rate": 1.7025556775604974e-05, "loss": 1.6926, "step": 71100 }, { "epoch": 0.4469428240331546, "grad_norm": 6.181455612182617, "learning_rate": 1.7025137674660318e-05, "loss": 1.4505, "step": 71110 }, { "epoch": 0.4470056763498517, "grad_norm": 8.337657928466797, "learning_rate": 1.7024718573715665e-05, "loss": 1.8906, "step": 71120 }, { "epoch": 0.44706852866654884, "grad_norm": 6.0072550773620605, "learning_rate": 1.7024299472771012e-05, "loss": 1.6255, "step": 71130 }, { "epoch": 0.44713138098324595, "grad_norm": 6.303506851196289, "learning_rate": 1.702388037182636e-05, "loss": 1.6077, "step": 71140 }, { "epoch": 0.44719423329994307, "grad_norm": 6.815176486968994, "learning_rate": 1.7023461270881706e-05, "loss": 1.6572, "step": 71150 }, { "epoch": 0.4472570856166401, "grad_norm": 7.286723613739014, "learning_rate": 1.7023042169937053e-05, "loss": 1.6682, "step": 71160 }, { "epoch": 0.44731993793333724, "grad_norm": 7.008586883544922, "learning_rate": 1.70226230689924e-05, "loss": 1.6249, "step": 71170 }, { "epoch": 0.44738279025003436, "grad_norm": 7.969468593597412, "learning_rate": 1.7022203968047744e-05, "loss": 1.47, "step": 71180 }, { "epoch": 0.44744564256673147, "grad_norm": 8.08353042602539, "learning_rate": 1.702178486710309e-05, "loss": 1.6048, "step": 71190 }, { "epoch": 0.4475084948834286, "grad_norm": 7.194873332977295, "learning_rate": 1.7021365766158438e-05, "loss": 1.7399, "step": 71200 }, { "epoch": 0.4475713472001257, "grad_norm": 7.963724136352539, "learning_rate": 1.7020946665213785e-05, "loss": 1.6106, "step": 71210 }, { "epoch": 0.4476341995168228, "grad_norm": 6.300773620605469, "learning_rate": 1.7020527564269132e-05, "loss": 1.7566, "step": 71220 }, { "epoch": 0.44769705183351993, "grad_norm": 5.7120842933654785, "learning_rate": 1.702010846332448e-05, "loss": 1.9862, "step": 71230 }, { "epoch": 0.44775990415021705, "grad_norm": 6.56306791305542, "learning_rate": 1.7019689362379823e-05, "loss": 1.7944, "step": 71240 }, { "epoch": 0.44782275646691416, "grad_norm": 6.820886135101318, "learning_rate": 1.701927026143517e-05, "loss": 1.7516, "step": 71250 }, { "epoch": 0.4478856087836113, "grad_norm": 7.675322532653809, "learning_rate": 1.7018851160490517e-05, "loss": 1.5747, "step": 71260 }, { "epoch": 0.4479484611003084, "grad_norm": 6.196045398712158, "learning_rate": 1.7018432059545864e-05, "loss": 1.6493, "step": 71270 }, { "epoch": 0.44801131341700545, "grad_norm": 6.478281497955322, "learning_rate": 1.701801295860121e-05, "loss": 1.5458, "step": 71280 }, { "epoch": 0.44807416573370257, "grad_norm": 6.8187575340271, "learning_rate": 1.7017593857656555e-05, "loss": 1.887, "step": 71290 }, { "epoch": 0.4481370180503997, "grad_norm": 6.972844123840332, "learning_rate": 1.7017174756711902e-05, "loss": 1.7298, "step": 71300 }, { "epoch": 0.4481998703670968, "grad_norm": 6.094895362854004, "learning_rate": 1.701675565576725e-05, "loss": 1.9406, "step": 71310 }, { "epoch": 0.4482627226837939, "grad_norm": 6.521101951599121, "learning_rate": 1.7016336554822596e-05, "loss": 1.5613, "step": 71320 }, { "epoch": 0.44832557500049103, "grad_norm": 7.059791564941406, "learning_rate": 1.701591745387794e-05, "loss": 1.7796, "step": 71330 }, { "epoch": 0.44838842731718814, "grad_norm": 6.8557047843933105, "learning_rate": 1.7015498352933287e-05, "loss": 1.6299, "step": 71340 }, { "epoch": 0.44845127963388526, "grad_norm": 6.9734296798706055, "learning_rate": 1.7015079251988634e-05, "loss": 1.4914, "step": 71350 }, { "epoch": 0.4485141319505824, "grad_norm": 7.521538257598877, "learning_rate": 1.701466015104398e-05, "loss": 1.7855, "step": 71360 }, { "epoch": 0.4485769842672795, "grad_norm": 6.527483940124512, "learning_rate": 1.7014241050099328e-05, "loss": 1.8622, "step": 71370 }, { "epoch": 0.4486398365839766, "grad_norm": 6.91995096206665, "learning_rate": 1.7013821949154675e-05, "loss": 1.8563, "step": 71380 }, { "epoch": 0.4487026889006737, "grad_norm": 6.213915824890137, "learning_rate": 1.7013402848210022e-05, "loss": 1.6592, "step": 71390 }, { "epoch": 0.44876554121737083, "grad_norm": 6.30928897857666, "learning_rate": 1.701298374726537e-05, "loss": 1.6303, "step": 71400 }, { "epoch": 0.4488283935340679, "grad_norm": 6.645300388336182, "learning_rate": 1.7012564646320717e-05, "loss": 1.6825, "step": 71410 }, { "epoch": 0.448891245850765, "grad_norm": 6.3673248291015625, "learning_rate": 1.701214554537606e-05, "loss": 1.6719, "step": 71420 }, { "epoch": 0.4489540981674621, "grad_norm": 6.976233005523682, "learning_rate": 1.7011726444431407e-05, "loss": 1.8079, "step": 71430 }, { "epoch": 0.44901695048415924, "grad_norm": 6.400688171386719, "learning_rate": 1.7011307343486754e-05, "loss": 1.7852, "step": 71440 }, { "epoch": 0.44907980280085635, "grad_norm": 7.425689697265625, "learning_rate": 1.70108882425421e-05, "loss": 1.847, "step": 71450 }, { "epoch": 0.44914265511755347, "grad_norm": 6.973445892333984, "learning_rate": 1.7010469141597445e-05, "loss": 1.9481, "step": 71460 }, { "epoch": 0.4492055074342506, "grad_norm": 6.803900718688965, "learning_rate": 1.7010050040652792e-05, "loss": 1.5558, "step": 71470 }, { "epoch": 0.4492683597509477, "grad_norm": 7.7245988845825195, "learning_rate": 1.700963093970814e-05, "loss": 1.7535, "step": 71480 }, { "epoch": 0.4493312120676448, "grad_norm": 7.229686737060547, "learning_rate": 1.7009211838763486e-05, "loss": 1.7796, "step": 71490 }, { "epoch": 0.44939406438434193, "grad_norm": 6.046517848968506, "learning_rate": 1.7008792737818833e-05, "loss": 1.6553, "step": 71500 }, { "epoch": 0.44945691670103904, "grad_norm": 6.89406681060791, "learning_rate": 1.7008373636874177e-05, "loss": 1.5955, "step": 71510 }, { "epoch": 0.44951976901773616, "grad_norm": 6.609005451202393, "learning_rate": 1.7007954535929524e-05, "loss": 1.9899, "step": 71520 }, { "epoch": 0.4495826213344333, "grad_norm": 5.675281524658203, "learning_rate": 1.700753543498487e-05, "loss": 1.8324, "step": 71530 }, { "epoch": 0.44964547365113033, "grad_norm": 7.0303826332092285, "learning_rate": 1.700711633404022e-05, "loss": 1.5863, "step": 71540 }, { "epoch": 0.44970832596782745, "grad_norm": 7.449792385101318, "learning_rate": 1.7006697233095565e-05, "loss": 1.9384, "step": 71550 }, { "epoch": 0.44977117828452456, "grad_norm": 7.167794227600098, "learning_rate": 1.700627813215091e-05, "loss": 1.5772, "step": 71560 }, { "epoch": 0.4498340306012217, "grad_norm": 7.127641677856445, "learning_rate": 1.7005859031206256e-05, "loss": 1.9993, "step": 71570 }, { "epoch": 0.4498968829179188, "grad_norm": 5.376589775085449, "learning_rate": 1.7005439930261603e-05, "loss": 1.8241, "step": 71580 }, { "epoch": 0.4499597352346159, "grad_norm": 6.894751071929932, "learning_rate": 1.700502082931695e-05, "loss": 1.7184, "step": 71590 }, { "epoch": 0.450022587551313, "grad_norm": 6.802367687225342, "learning_rate": 1.7004601728372297e-05, "loss": 1.7378, "step": 71600 }, { "epoch": 0.45008543986801014, "grad_norm": 6.673643589019775, "learning_rate": 1.7004182627427644e-05, "loss": 1.5584, "step": 71610 }, { "epoch": 0.45014829218470725, "grad_norm": 8.011720657348633, "learning_rate": 1.700376352648299e-05, "loss": 1.6937, "step": 71620 }, { "epoch": 0.45021114450140437, "grad_norm": 8.510849952697754, "learning_rate": 1.700334442553834e-05, "loss": 1.6512, "step": 71630 }, { "epoch": 0.4502739968181015, "grad_norm": 6.473636627197266, "learning_rate": 1.7002925324593682e-05, "loss": 1.7975, "step": 71640 }, { "epoch": 0.4503368491347986, "grad_norm": 11.131653785705566, "learning_rate": 1.700250622364903e-05, "loss": 1.6276, "step": 71650 }, { "epoch": 0.4503997014514957, "grad_norm": 7.675611972808838, "learning_rate": 1.7002087122704376e-05, "loss": 1.5814, "step": 71660 }, { "epoch": 0.4504625537681928, "grad_norm": 5.326087951660156, "learning_rate": 1.7001668021759723e-05, "loss": 1.5803, "step": 71670 }, { "epoch": 0.4505254060848899, "grad_norm": 8.270243644714355, "learning_rate": 1.700124892081507e-05, "loss": 1.8582, "step": 71680 }, { "epoch": 0.450588258401587, "grad_norm": 6.738409042358398, "learning_rate": 1.7000829819870414e-05, "loss": 1.7065, "step": 71690 }, { "epoch": 0.4506511107182841, "grad_norm": 8.476706504821777, "learning_rate": 1.700041071892576e-05, "loss": 1.873, "step": 71700 }, { "epoch": 0.45071396303498124, "grad_norm": 7.574090480804443, "learning_rate": 1.699999161798111e-05, "loss": 1.8404, "step": 71710 }, { "epoch": 0.45077681535167835, "grad_norm": 6.264162063598633, "learning_rate": 1.6999572517036455e-05, "loss": 1.6393, "step": 71720 }, { "epoch": 0.45083966766837547, "grad_norm": 7.23504638671875, "learning_rate": 1.69991534160918e-05, "loss": 1.8151, "step": 71730 }, { "epoch": 0.4509025199850726, "grad_norm": 6.774835586547852, "learning_rate": 1.6998734315147146e-05, "loss": 1.7495, "step": 71740 }, { "epoch": 0.4509653723017697, "grad_norm": 7.305593490600586, "learning_rate": 1.6998315214202493e-05, "loss": 1.603, "step": 71750 }, { "epoch": 0.4510282246184668, "grad_norm": 6.9782328605651855, "learning_rate": 1.699789611325784e-05, "loss": 1.8465, "step": 71760 }, { "epoch": 0.4510910769351639, "grad_norm": 7.389726161956787, "learning_rate": 1.6997477012313187e-05, "loss": 1.531, "step": 71770 }, { "epoch": 0.45115392925186104, "grad_norm": 7.15920877456665, "learning_rate": 1.6997057911368534e-05, "loss": 1.6857, "step": 71780 }, { "epoch": 0.45121678156855816, "grad_norm": 7.240107536315918, "learning_rate": 1.699663881042388e-05, "loss": 1.7708, "step": 71790 }, { "epoch": 0.4512796338852552, "grad_norm": 6.3036980628967285, "learning_rate": 1.699621970947923e-05, "loss": 1.624, "step": 71800 }, { "epoch": 0.45134248620195233, "grad_norm": 7.612619876861572, "learning_rate": 1.6995800608534572e-05, "loss": 1.5405, "step": 71810 }, { "epoch": 0.45140533851864945, "grad_norm": 6.979082107543945, "learning_rate": 1.699538150758992e-05, "loss": 1.6972, "step": 71820 }, { "epoch": 0.45146819083534656, "grad_norm": 6.545334339141846, "learning_rate": 1.6994962406645266e-05, "loss": 1.5835, "step": 71830 }, { "epoch": 0.4515310431520437, "grad_norm": 6.032336711883545, "learning_rate": 1.6994543305700614e-05, "loss": 1.7, "step": 71840 }, { "epoch": 0.4515938954687408, "grad_norm": 7.708831310272217, "learning_rate": 1.699412420475596e-05, "loss": 1.6892, "step": 71850 }, { "epoch": 0.4516567477854379, "grad_norm": 6.805393695831299, "learning_rate": 1.6993705103811304e-05, "loss": 1.8059, "step": 71860 }, { "epoch": 0.451719600102135, "grad_norm": 7.413393974304199, "learning_rate": 1.699328600286665e-05, "loss": 1.8585, "step": 71870 }, { "epoch": 0.45178245241883214, "grad_norm": 5.643686294555664, "learning_rate": 1.6992866901922e-05, "loss": 1.5059, "step": 71880 }, { "epoch": 0.45184530473552925, "grad_norm": 5.965382099151611, "learning_rate": 1.6992447800977345e-05, "loss": 1.4114, "step": 71890 }, { "epoch": 0.45190815705222637, "grad_norm": 6.276783466339111, "learning_rate": 1.6992028700032693e-05, "loss": 1.6649, "step": 71900 }, { "epoch": 0.4519710093689235, "grad_norm": 8.128314971923828, "learning_rate": 1.6991609599088036e-05, "loss": 1.5287, "step": 71910 }, { "epoch": 0.45203386168562054, "grad_norm": 7.705959320068359, "learning_rate": 1.6991190498143383e-05, "loss": 1.7249, "step": 71920 }, { "epoch": 0.45209671400231766, "grad_norm": 5.786853313446045, "learning_rate": 1.699077139719873e-05, "loss": 1.6895, "step": 71930 }, { "epoch": 0.45215956631901477, "grad_norm": 6.402442455291748, "learning_rate": 1.6990352296254077e-05, "loss": 1.772, "step": 71940 }, { "epoch": 0.4522224186357119, "grad_norm": 5.8434367179870605, "learning_rate": 1.698993319530942e-05, "loss": 1.5583, "step": 71950 }, { "epoch": 0.452285270952409, "grad_norm": 6.627134799957275, "learning_rate": 1.6989514094364768e-05, "loss": 1.6571, "step": 71960 }, { "epoch": 0.4523481232691061, "grad_norm": 6.048925399780273, "learning_rate": 1.6989094993420115e-05, "loss": 1.8085, "step": 71970 }, { "epoch": 0.45241097558580323, "grad_norm": 6.704756736755371, "learning_rate": 1.6988675892475462e-05, "loss": 1.6309, "step": 71980 }, { "epoch": 0.45247382790250035, "grad_norm": 6.748854160308838, "learning_rate": 1.698825679153081e-05, "loss": 1.95, "step": 71990 }, { "epoch": 0.45253668021919746, "grad_norm": 5.85832405090332, "learning_rate": 1.6987837690586156e-05, "loss": 1.8155, "step": 72000 }, { "epoch": 0.4525995325358946, "grad_norm": 6.134837627410889, "learning_rate": 1.6987418589641504e-05, "loss": 1.7403, "step": 72010 }, { "epoch": 0.4526623848525917, "grad_norm": 8.114599227905273, "learning_rate": 1.698699948869685e-05, "loss": 1.7724, "step": 72020 }, { "epoch": 0.4527252371692888, "grad_norm": 6.4747700691223145, "learning_rate": 1.6986580387752198e-05, "loss": 1.9581, "step": 72030 }, { "epoch": 0.4527880894859859, "grad_norm": 6.428544521331787, "learning_rate": 1.698616128680754e-05, "loss": 1.6771, "step": 72040 }, { "epoch": 0.452850941802683, "grad_norm": 7.327491760253906, "learning_rate": 1.698574218586289e-05, "loss": 1.6727, "step": 72050 }, { "epoch": 0.4529137941193801, "grad_norm": 7.482061386108398, "learning_rate": 1.6985323084918236e-05, "loss": 1.6585, "step": 72060 }, { "epoch": 0.4529766464360772, "grad_norm": 6.531540870666504, "learning_rate": 1.6984903983973583e-05, "loss": 1.6971, "step": 72070 }, { "epoch": 0.45303949875277433, "grad_norm": 7.134012699127197, "learning_rate": 1.698448488302893e-05, "loss": 1.8148, "step": 72080 }, { "epoch": 0.45310235106947144, "grad_norm": 5.541568756103516, "learning_rate": 1.6984065782084273e-05, "loss": 1.7753, "step": 72090 }, { "epoch": 0.45316520338616856, "grad_norm": 6.51377010345459, "learning_rate": 1.698364668113962e-05, "loss": 2.0393, "step": 72100 }, { "epoch": 0.4532280557028657, "grad_norm": 6.593539237976074, "learning_rate": 1.6983227580194967e-05, "loss": 1.6683, "step": 72110 }, { "epoch": 0.4532909080195628, "grad_norm": 6.5326972007751465, "learning_rate": 1.6982808479250315e-05, "loss": 1.8713, "step": 72120 }, { "epoch": 0.4533537603362599, "grad_norm": 6.634287357330322, "learning_rate": 1.6982389378305658e-05, "loss": 1.6738, "step": 72130 }, { "epoch": 0.453416612652957, "grad_norm": 8.520031929016113, "learning_rate": 1.6981970277361005e-05, "loss": 1.938, "step": 72140 }, { "epoch": 0.45347946496965413, "grad_norm": 6.543417453765869, "learning_rate": 1.6981551176416352e-05, "loss": 1.7549, "step": 72150 }, { "epoch": 0.45354231728635125, "grad_norm": 6.342586040496826, "learning_rate": 1.69811320754717e-05, "loss": 1.6541, "step": 72160 }, { "epoch": 0.45360516960304836, "grad_norm": 6.786216735839844, "learning_rate": 1.6980712974527047e-05, "loss": 1.8846, "step": 72170 }, { "epoch": 0.4536680219197454, "grad_norm": 8.198033332824707, "learning_rate": 1.6980293873582394e-05, "loss": 1.6326, "step": 72180 }, { "epoch": 0.45373087423644254, "grad_norm": 7.122391223907471, "learning_rate": 1.6979874772637737e-05, "loss": 1.6513, "step": 72190 }, { "epoch": 0.45379372655313965, "grad_norm": 5.494532585144043, "learning_rate": 1.6979455671693084e-05, "loss": 1.6417, "step": 72200 }, { "epoch": 0.45385657886983677, "grad_norm": 7.167390823364258, "learning_rate": 1.697903657074843e-05, "loss": 1.7278, "step": 72210 }, { "epoch": 0.4539194311865339, "grad_norm": 7.346843719482422, "learning_rate": 1.697861746980378e-05, "loss": 1.7696, "step": 72220 }, { "epoch": 0.453982283503231, "grad_norm": 7.8218865394592285, "learning_rate": 1.6978198368859126e-05, "loss": 1.6169, "step": 72230 }, { "epoch": 0.4540451358199281, "grad_norm": 7.494811534881592, "learning_rate": 1.6977779267914473e-05, "loss": 1.7006, "step": 72240 }, { "epoch": 0.45410798813662523, "grad_norm": 6.5424017906188965, "learning_rate": 1.697736016696982e-05, "loss": 1.6564, "step": 72250 }, { "epoch": 0.45417084045332234, "grad_norm": 6.879222393035889, "learning_rate": 1.6976941066025163e-05, "loss": 1.8634, "step": 72260 }, { "epoch": 0.45423369277001946, "grad_norm": 7.229594707489014, "learning_rate": 1.697652196508051e-05, "loss": 1.6717, "step": 72270 }, { "epoch": 0.4542965450867166, "grad_norm": 7.287384510040283, "learning_rate": 1.6976102864135858e-05, "loss": 1.4973, "step": 72280 }, { "epoch": 0.4543593974034137, "grad_norm": 6.4875922203063965, "learning_rate": 1.6975683763191205e-05, "loss": 1.7654, "step": 72290 }, { "epoch": 0.4544222497201108, "grad_norm": 7.1652092933654785, "learning_rate": 1.697526466224655e-05, "loss": 1.4093, "step": 72300 }, { "epoch": 0.45448510203680786, "grad_norm": 7.26404333114624, "learning_rate": 1.6974845561301895e-05, "loss": 1.4883, "step": 72310 }, { "epoch": 0.454547954353505, "grad_norm": 6.425233364105225, "learning_rate": 1.6974426460357242e-05, "loss": 1.6787, "step": 72320 }, { "epoch": 0.4546108066702021, "grad_norm": 8.020889282226562, "learning_rate": 1.697400735941259e-05, "loss": 1.6346, "step": 72330 }, { "epoch": 0.4546736589868992, "grad_norm": 6.626036167144775, "learning_rate": 1.6973588258467937e-05, "loss": 1.838, "step": 72340 }, { "epoch": 0.4547365113035963, "grad_norm": 7.176641464233398, "learning_rate": 1.697316915752328e-05, "loss": 1.7114, "step": 72350 }, { "epoch": 0.45479936362029344, "grad_norm": 6.502776145935059, "learning_rate": 1.6972750056578627e-05, "loss": 1.6543, "step": 72360 }, { "epoch": 0.45486221593699055, "grad_norm": 5.991310119628906, "learning_rate": 1.6972330955633974e-05, "loss": 1.7302, "step": 72370 }, { "epoch": 0.45492506825368767, "grad_norm": 6.242975234985352, "learning_rate": 1.697191185468932e-05, "loss": 1.682, "step": 72380 }, { "epoch": 0.4549879205703848, "grad_norm": 7.033971786499023, "learning_rate": 1.697149275374467e-05, "loss": 1.5691, "step": 72390 }, { "epoch": 0.4550507728870819, "grad_norm": 6.049368858337402, "learning_rate": 1.6971073652800016e-05, "loss": 1.7311, "step": 72400 }, { "epoch": 0.455113625203779, "grad_norm": 8.386807441711426, "learning_rate": 1.6970654551855363e-05, "loss": 1.6526, "step": 72410 }, { "epoch": 0.45517647752047613, "grad_norm": 6.7603840827941895, "learning_rate": 1.697023545091071e-05, "loss": 1.7794, "step": 72420 }, { "epoch": 0.4552393298371732, "grad_norm": 7.872735500335693, "learning_rate": 1.6969816349966053e-05, "loss": 1.7921, "step": 72430 }, { "epoch": 0.4553021821538703, "grad_norm": 6.434782981872559, "learning_rate": 1.69693972490214e-05, "loss": 1.7876, "step": 72440 }, { "epoch": 0.4553650344705674, "grad_norm": 7.069159507751465, "learning_rate": 1.6968978148076748e-05, "loss": 1.6929, "step": 72450 }, { "epoch": 0.45542788678726454, "grad_norm": 5.2417497634887695, "learning_rate": 1.6968559047132095e-05, "loss": 1.6391, "step": 72460 }, { "epoch": 0.45549073910396165, "grad_norm": 6.261465549468994, "learning_rate": 1.696813994618744e-05, "loss": 1.766, "step": 72470 }, { "epoch": 0.45555359142065877, "grad_norm": 7.114291191101074, "learning_rate": 1.6967720845242785e-05, "loss": 1.8308, "step": 72480 }, { "epoch": 0.4556164437373559, "grad_norm": 5.9655585289001465, "learning_rate": 1.6967301744298132e-05, "loss": 1.7141, "step": 72490 }, { "epoch": 0.455679296054053, "grad_norm": 6.264899253845215, "learning_rate": 1.696688264335348e-05, "loss": 1.744, "step": 72500 }, { "epoch": 0.4557421483707501, "grad_norm": 7.141603469848633, "learning_rate": 1.6966463542408827e-05, "loss": 1.7852, "step": 72510 }, { "epoch": 0.4558050006874472, "grad_norm": 6.6123857498168945, "learning_rate": 1.6966044441464174e-05, "loss": 1.7439, "step": 72520 }, { "epoch": 0.45586785300414434, "grad_norm": 6.842229843139648, "learning_rate": 1.6965625340519517e-05, "loss": 1.6891, "step": 72530 }, { "epoch": 0.45593070532084146, "grad_norm": 6.660259246826172, "learning_rate": 1.6965206239574864e-05, "loss": 1.6473, "step": 72540 }, { "epoch": 0.45599355763753857, "grad_norm": 6.573643684387207, "learning_rate": 1.696478713863021e-05, "loss": 1.6939, "step": 72550 }, { "epoch": 0.45605640995423563, "grad_norm": 7.359166145324707, "learning_rate": 1.696436803768556e-05, "loss": 1.7793, "step": 72560 }, { "epoch": 0.45611926227093275, "grad_norm": 7.229355812072754, "learning_rate": 1.6963948936740902e-05, "loss": 1.7815, "step": 72570 }, { "epoch": 0.45618211458762986, "grad_norm": 7.012868404388428, "learning_rate": 1.696352983579625e-05, "loss": 1.6657, "step": 72580 }, { "epoch": 0.456244966904327, "grad_norm": 6.879143238067627, "learning_rate": 1.6963110734851596e-05, "loss": 1.6057, "step": 72590 }, { "epoch": 0.4563078192210241, "grad_norm": 7.133265018463135, "learning_rate": 1.6962691633906943e-05, "loss": 1.7353, "step": 72600 }, { "epoch": 0.4563706715377212, "grad_norm": 6.666014194488525, "learning_rate": 1.696227253296229e-05, "loss": 1.744, "step": 72610 }, { "epoch": 0.4564335238544183, "grad_norm": 7.25177001953125, "learning_rate": 1.6961853432017638e-05, "loss": 1.5536, "step": 72620 }, { "epoch": 0.45649637617111544, "grad_norm": 6.873335361480713, "learning_rate": 1.6961434331072985e-05, "loss": 1.8091, "step": 72630 }, { "epoch": 0.45655922848781255, "grad_norm": 5.979950904846191, "learning_rate": 1.6961015230128332e-05, "loss": 1.8286, "step": 72640 }, { "epoch": 0.45662208080450967, "grad_norm": 7.808071613311768, "learning_rate": 1.696059612918368e-05, "loss": 1.8732, "step": 72650 }, { "epoch": 0.4566849331212068, "grad_norm": 8.281644821166992, "learning_rate": 1.6960177028239022e-05, "loss": 1.8113, "step": 72660 }, { "epoch": 0.4567477854379039, "grad_norm": 7.271381378173828, "learning_rate": 1.695975792729437e-05, "loss": 1.9359, "step": 72670 }, { "epoch": 0.456810637754601, "grad_norm": 6.758279323577881, "learning_rate": 1.6959338826349717e-05, "loss": 1.5058, "step": 72680 }, { "epoch": 0.45687349007129807, "grad_norm": 7.0523834228515625, "learning_rate": 1.6958919725405064e-05, "loss": 1.8413, "step": 72690 }, { "epoch": 0.4569363423879952, "grad_norm": 6.535349369049072, "learning_rate": 1.695850062446041e-05, "loss": 1.7019, "step": 72700 }, { "epoch": 0.4569991947046923, "grad_norm": 6.913873195648193, "learning_rate": 1.6958081523515754e-05, "loss": 1.7541, "step": 72710 }, { "epoch": 0.4570620470213894, "grad_norm": 6.172971725463867, "learning_rate": 1.69576624225711e-05, "loss": 1.8061, "step": 72720 }, { "epoch": 0.45712489933808653, "grad_norm": 6.995718955993652, "learning_rate": 1.695724332162645e-05, "loss": 1.8055, "step": 72730 }, { "epoch": 0.45718775165478365, "grad_norm": 5.739918231964111, "learning_rate": 1.6956824220681796e-05, "loss": 1.8473, "step": 72740 }, { "epoch": 0.45725060397148076, "grad_norm": 8.108476638793945, "learning_rate": 1.695640511973714e-05, "loss": 1.8737, "step": 72750 }, { "epoch": 0.4573134562881779, "grad_norm": 6.1044440269470215, "learning_rate": 1.6955986018792486e-05, "loss": 1.7364, "step": 72760 }, { "epoch": 0.457376308604875, "grad_norm": 7.338126182556152, "learning_rate": 1.6955566917847833e-05, "loss": 1.8628, "step": 72770 }, { "epoch": 0.4574391609215721, "grad_norm": 5.92439603805542, "learning_rate": 1.695514781690318e-05, "loss": 1.6609, "step": 72780 }, { "epoch": 0.4575020132382692, "grad_norm": 5.798541069030762, "learning_rate": 1.6954728715958528e-05, "loss": 1.7788, "step": 72790 }, { "epoch": 0.45756486555496634, "grad_norm": 6.529398441314697, "learning_rate": 1.6954309615013875e-05, "loss": 1.7826, "step": 72800 }, { "epoch": 0.45762771787166345, "grad_norm": 7.10496187210083, "learning_rate": 1.695389051406922e-05, "loss": 1.5668, "step": 72810 }, { "epoch": 0.4576905701883605, "grad_norm": 6.67068338394165, "learning_rate": 1.6953471413124565e-05, "loss": 1.6258, "step": 72820 }, { "epoch": 0.45775342250505763, "grad_norm": 6.297972679138184, "learning_rate": 1.6953094222274377e-05, "loss": 1.8902, "step": 72830 }, { "epoch": 0.45781627482175474, "grad_norm": 5.803272247314453, "learning_rate": 1.6952675121329724e-05, "loss": 1.9535, "step": 72840 }, { "epoch": 0.45787912713845186, "grad_norm": 8.457806587219238, "learning_rate": 1.695225602038507e-05, "loss": 1.8868, "step": 72850 }, { "epoch": 0.457941979455149, "grad_norm": 6.783581733703613, "learning_rate": 1.6951836919440418e-05, "loss": 1.6544, "step": 72860 }, { "epoch": 0.4580048317718461, "grad_norm": 6.673780918121338, "learning_rate": 1.6951417818495765e-05, "loss": 1.7468, "step": 72870 }, { "epoch": 0.4580676840885432, "grad_norm": 7.6437668800354, "learning_rate": 1.695099871755111e-05, "loss": 1.8997, "step": 72880 }, { "epoch": 0.4581305364052403, "grad_norm": 6.677247524261475, "learning_rate": 1.6950579616606456e-05, "loss": 1.9311, "step": 72890 }, { "epoch": 0.45819338872193743, "grad_norm": 6.750226020812988, "learning_rate": 1.6950160515661803e-05, "loss": 1.7603, "step": 72900 }, { "epoch": 0.45825624103863455, "grad_norm": 6.881356716156006, "learning_rate": 1.694974141471715e-05, "loss": 1.5952, "step": 72910 }, { "epoch": 0.45831909335533166, "grad_norm": 6.134947776794434, "learning_rate": 1.6949322313772497e-05, "loss": 1.8821, "step": 72920 }, { "epoch": 0.4583819456720288, "grad_norm": 6.7909135818481445, "learning_rate": 1.6948903212827844e-05, "loss": 1.7302, "step": 72930 }, { "epoch": 0.45844479798872584, "grad_norm": 7.331709384918213, "learning_rate": 1.694848411188319e-05, "loss": 1.6622, "step": 72940 }, { "epoch": 0.45850765030542295, "grad_norm": 7.265378475189209, "learning_rate": 1.6948065010938538e-05, "loss": 1.7583, "step": 72950 }, { "epoch": 0.45857050262212007, "grad_norm": 5.8029279708862305, "learning_rate": 1.6947645909993882e-05, "loss": 1.6582, "step": 72960 }, { "epoch": 0.4586333549388172, "grad_norm": 6.222194194793701, "learning_rate": 1.694722680904923e-05, "loss": 1.6333, "step": 72970 }, { "epoch": 0.4586962072555143, "grad_norm": 7.174890995025635, "learning_rate": 1.6946807708104576e-05, "loss": 1.705, "step": 72980 }, { "epoch": 0.4587590595722114, "grad_norm": 6.686669826507568, "learning_rate": 1.6946388607159923e-05, "loss": 1.7082, "step": 72990 }, { "epoch": 0.45882191188890853, "grad_norm": 7.4787750244140625, "learning_rate": 1.6945969506215267e-05, "loss": 1.8003, "step": 73000 }, { "epoch": 0.45888476420560564, "grad_norm": 6.632627964019775, "learning_rate": 1.6945550405270614e-05, "loss": 1.4592, "step": 73010 }, { "epoch": 0.45894761652230276, "grad_norm": 7.123968124389648, "learning_rate": 1.694513130432596e-05, "loss": 1.9138, "step": 73020 }, { "epoch": 0.4590104688389999, "grad_norm": 6.550754070281982, "learning_rate": 1.6944712203381308e-05, "loss": 1.5374, "step": 73030 }, { "epoch": 0.459073321155697, "grad_norm": 7.170216083526611, "learning_rate": 1.6944293102436655e-05, "loss": 1.7027, "step": 73040 }, { "epoch": 0.4591361734723941, "grad_norm": 7.3206048011779785, "learning_rate": 1.6943874001492e-05, "loss": 1.7429, "step": 73050 }, { "epoch": 0.4591990257890912, "grad_norm": 9.7173433303833, "learning_rate": 1.6943454900547346e-05, "loss": 2.1381, "step": 73060 }, { "epoch": 0.4592618781057883, "grad_norm": 6.854995250701904, "learning_rate": 1.6943035799602693e-05, "loss": 1.8794, "step": 73070 }, { "epoch": 0.4593247304224854, "grad_norm": 9.510405540466309, "learning_rate": 1.694261669865804e-05, "loss": 1.5659, "step": 73080 }, { "epoch": 0.4593875827391825, "grad_norm": 6.91724157333374, "learning_rate": 1.6942197597713387e-05, "loss": 1.8883, "step": 73090 }, { "epoch": 0.4594504350558796, "grad_norm": 7.3841166496276855, "learning_rate": 1.6941778496768734e-05, "loss": 1.7679, "step": 73100 }, { "epoch": 0.45951328737257674, "grad_norm": 7.427146911621094, "learning_rate": 1.694135939582408e-05, "loss": 1.5073, "step": 73110 }, { "epoch": 0.45957613968927385, "grad_norm": 6.2699761390686035, "learning_rate": 1.6940940294879428e-05, "loss": 1.5289, "step": 73120 }, { "epoch": 0.45963899200597097, "grad_norm": 6.542108058929443, "learning_rate": 1.6940521193934772e-05, "loss": 1.8077, "step": 73130 }, { "epoch": 0.4597018443226681, "grad_norm": 6.790388584136963, "learning_rate": 1.694010209299012e-05, "loss": 1.6832, "step": 73140 }, { "epoch": 0.4597646966393652, "grad_norm": 7.237472057342529, "learning_rate": 1.6939682992045466e-05, "loss": 1.7433, "step": 73150 }, { "epoch": 0.4598275489560623, "grad_norm": 7.028692722320557, "learning_rate": 1.6939263891100813e-05, "loss": 1.6186, "step": 73160 }, { "epoch": 0.45989040127275943, "grad_norm": 7.17855978012085, "learning_rate": 1.693884479015616e-05, "loss": 1.8079, "step": 73170 }, { "epoch": 0.45995325358945655, "grad_norm": 9.221105575561523, "learning_rate": 1.6938425689211504e-05, "loss": 1.598, "step": 73180 }, { "epoch": 0.46001610590615366, "grad_norm": 7.384937286376953, "learning_rate": 1.693800658826685e-05, "loss": 1.7084, "step": 73190 }, { "epoch": 0.4600789582228507, "grad_norm": 6.056758880615234, "learning_rate": 1.6937587487322198e-05, "loss": 1.7156, "step": 73200 }, { "epoch": 0.46014181053954784, "grad_norm": 6.282230854034424, "learning_rate": 1.6937168386377545e-05, "loss": 1.5146, "step": 73210 }, { "epoch": 0.46020466285624495, "grad_norm": 7.369024753570557, "learning_rate": 1.693674928543289e-05, "loss": 1.795, "step": 73220 }, { "epoch": 0.46026751517294207, "grad_norm": 7.202399253845215, "learning_rate": 1.6936330184488236e-05, "loss": 1.867, "step": 73230 }, { "epoch": 0.4603303674896392, "grad_norm": 6.399475574493408, "learning_rate": 1.6935911083543583e-05, "loss": 1.6532, "step": 73240 }, { "epoch": 0.4603932198063363, "grad_norm": 7.484232425689697, "learning_rate": 1.693549198259893e-05, "loss": 1.7963, "step": 73250 }, { "epoch": 0.4604560721230334, "grad_norm": 6.41881799697876, "learning_rate": 1.6935072881654277e-05, "loss": 1.9135, "step": 73260 }, { "epoch": 0.4605189244397305, "grad_norm": 6.067859172821045, "learning_rate": 1.693465378070962e-05, "loss": 1.6958, "step": 73270 }, { "epoch": 0.46058177675642764, "grad_norm": 7.959996700286865, "learning_rate": 1.6934234679764968e-05, "loss": 2.0227, "step": 73280 }, { "epoch": 0.46064462907312476, "grad_norm": 5.5783610343933105, "learning_rate": 1.6933815578820315e-05, "loss": 1.5307, "step": 73290 }, { "epoch": 0.46070748138982187, "grad_norm": 6.5437445640563965, "learning_rate": 1.6933396477875662e-05, "loss": 1.5558, "step": 73300 }, { "epoch": 0.460770333706519, "grad_norm": 6.402663230895996, "learning_rate": 1.693297737693101e-05, "loss": 1.7255, "step": 73310 }, { "epoch": 0.4608331860232161, "grad_norm": 6.267522811889648, "learning_rate": 1.6932558275986356e-05, "loss": 1.7829, "step": 73320 }, { "epoch": 0.46089603833991316, "grad_norm": 6.446778297424316, "learning_rate": 1.6932139175041703e-05, "loss": 1.691, "step": 73330 }, { "epoch": 0.4609588906566103, "grad_norm": 8.30638599395752, "learning_rate": 1.693172007409705e-05, "loss": 1.7292, "step": 73340 }, { "epoch": 0.4610217429733074, "grad_norm": 8.48983383178711, "learning_rate": 1.6931300973152397e-05, "loss": 1.547, "step": 73350 }, { "epoch": 0.4610845952900045, "grad_norm": 7.699466705322266, "learning_rate": 1.693088187220774e-05, "loss": 1.7697, "step": 73360 }, { "epoch": 0.4611474476067016, "grad_norm": 7.0006208419799805, "learning_rate": 1.6930462771263088e-05, "loss": 1.5617, "step": 73370 }, { "epoch": 0.46121029992339874, "grad_norm": 7.954681873321533, "learning_rate": 1.6930043670318435e-05, "loss": 1.8519, "step": 73380 }, { "epoch": 0.46127315224009585, "grad_norm": 6.38613748550415, "learning_rate": 1.6929624569373782e-05, "loss": 1.6511, "step": 73390 }, { "epoch": 0.46133600455679297, "grad_norm": 7.790059566497803, "learning_rate": 1.6929205468429126e-05, "loss": 1.7144, "step": 73400 }, { "epoch": 0.4613988568734901, "grad_norm": 6.947166919708252, "learning_rate": 1.6928786367484473e-05, "loss": 1.8265, "step": 73410 }, { "epoch": 0.4614617091901872, "grad_norm": 6.62131929397583, "learning_rate": 1.692836726653982e-05, "loss": 1.6418, "step": 73420 }, { "epoch": 0.4615245615068843, "grad_norm": 6.732690334320068, "learning_rate": 1.6927948165595167e-05, "loss": 1.7439, "step": 73430 }, { "epoch": 0.4615874138235814, "grad_norm": 7.452075004577637, "learning_rate": 1.6927529064650514e-05, "loss": 1.7972, "step": 73440 }, { "epoch": 0.4616502661402785, "grad_norm": 6.767248630523682, "learning_rate": 1.6927109963705858e-05, "loss": 1.862, "step": 73450 }, { "epoch": 0.4617131184569756, "grad_norm": 6.166742324829102, "learning_rate": 1.6926690862761205e-05, "loss": 1.5783, "step": 73460 }, { "epoch": 0.4617759707736727, "grad_norm": 6.472208023071289, "learning_rate": 1.6926271761816552e-05, "loss": 1.7238, "step": 73470 }, { "epoch": 0.46183882309036983, "grad_norm": 9.513326644897461, "learning_rate": 1.69258526608719e-05, "loss": 1.7613, "step": 73480 }, { "epoch": 0.46190167540706695, "grad_norm": 7.1478657722473145, "learning_rate": 1.6925433559927246e-05, "loss": 1.7873, "step": 73490 }, { "epoch": 0.46196452772376406, "grad_norm": 7.685591220855713, "learning_rate": 1.692501445898259e-05, "loss": 1.6279, "step": 73500 }, { "epoch": 0.4620273800404612, "grad_norm": 7.648070812225342, "learning_rate": 1.6924595358037937e-05, "loss": 1.8225, "step": 73510 }, { "epoch": 0.4620902323571583, "grad_norm": 7.248754024505615, "learning_rate": 1.6924176257093284e-05, "loss": 1.6426, "step": 73520 }, { "epoch": 0.4621530846738554, "grad_norm": 7.591023921966553, "learning_rate": 1.692375715614863e-05, "loss": 1.8353, "step": 73530 }, { "epoch": 0.4622159369905525, "grad_norm": 6.6562347412109375, "learning_rate": 1.6923338055203978e-05, "loss": 1.8618, "step": 73540 }, { "epoch": 0.46227878930724964, "grad_norm": 6.6715617179870605, "learning_rate": 1.6922918954259325e-05, "loss": 1.8978, "step": 73550 }, { "epoch": 0.46234164162394675, "grad_norm": 7.414534091949463, "learning_rate": 1.6922499853314672e-05, "loss": 1.6267, "step": 73560 }, { "epoch": 0.46240449394064387, "grad_norm": 5.956836700439453, "learning_rate": 1.692208075237002e-05, "loss": 1.7615, "step": 73570 }, { "epoch": 0.4624673462573409, "grad_norm": 7.848029136657715, "learning_rate": 1.6921661651425363e-05, "loss": 1.7014, "step": 73580 }, { "epoch": 0.46253019857403804, "grad_norm": 6.871232986450195, "learning_rate": 1.692124255048071e-05, "loss": 1.7674, "step": 73590 }, { "epoch": 0.46259305089073516, "grad_norm": 7.2657880783081055, "learning_rate": 1.6920823449536057e-05, "loss": 1.7561, "step": 73600 }, { "epoch": 0.4626559032074323, "grad_norm": 7.009253978729248, "learning_rate": 1.6920404348591404e-05, "loss": 1.9371, "step": 73610 }, { "epoch": 0.4627187555241294, "grad_norm": 8.488468170166016, "learning_rate": 1.6919985247646748e-05, "loss": 1.634, "step": 73620 }, { "epoch": 0.4627816078408265, "grad_norm": 8.554583549499512, "learning_rate": 1.6919566146702095e-05, "loss": 1.8174, "step": 73630 }, { "epoch": 0.4628444601575236, "grad_norm": 6.670780658721924, "learning_rate": 1.6919147045757442e-05, "loss": 1.6491, "step": 73640 }, { "epoch": 0.46290731247422073, "grad_norm": 6.9372477531433105, "learning_rate": 1.691872794481279e-05, "loss": 1.5245, "step": 73650 }, { "epoch": 0.46297016479091785, "grad_norm": 7.845040321350098, "learning_rate": 1.6918308843868136e-05, "loss": 1.713, "step": 73660 }, { "epoch": 0.46303301710761496, "grad_norm": 8.10694694519043, "learning_rate": 1.691788974292348e-05, "loss": 1.5741, "step": 73670 }, { "epoch": 0.4630958694243121, "grad_norm": 7.517847537994385, "learning_rate": 1.6917470641978827e-05, "loss": 1.7044, "step": 73680 }, { "epoch": 0.4631587217410092, "grad_norm": 6.285558700561523, "learning_rate": 1.6917051541034174e-05, "loss": 1.9023, "step": 73690 }, { "epoch": 0.4632215740577063, "grad_norm": 7.671931743621826, "learning_rate": 1.691663244008952e-05, "loss": 1.6278, "step": 73700 }, { "epoch": 0.46328442637440337, "grad_norm": 6.878096580505371, "learning_rate": 1.6916213339144868e-05, "loss": 1.6788, "step": 73710 }, { "epoch": 0.4633472786911005, "grad_norm": 6.303915500640869, "learning_rate": 1.6915794238200215e-05, "loss": 1.6047, "step": 73720 }, { "epoch": 0.4634101310077976, "grad_norm": 5.825152397155762, "learning_rate": 1.6915375137255562e-05, "loss": 1.4785, "step": 73730 }, { "epoch": 0.4634729833244947, "grad_norm": 6.75026273727417, "learning_rate": 1.691495603631091e-05, "loss": 1.5219, "step": 73740 }, { "epoch": 0.46353583564119183, "grad_norm": 6.23679780960083, "learning_rate": 1.6914536935366253e-05, "loss": 1.64, "step": 73750 }, { "epoch": 0.46359868795788894, "grad_norm": 7.054623603820801, "learning_rate": 1.69141178344216e-05, "loss": 1.5494, "step": 73760 }, { "epoch": 0.46366154027458606, "grad_norm": 6.4822998046875, "learning_rate": 1.6913698733476947e-05, "loss": 1.7413, "step": 73770 }, { "epoch": 0.4637243925912832, "grad_norm": 7.870069980621338, "learning_rate": 1.6913279632532294e-05, "loss": 1.7077, "step": 73780 }, { "epoch": 0.4637872449079803, "grad_norm": 6.3491291999816895, "learning_rate": 1.691286053158764e-05, "loss": 1.6945, "step": 73790 }, { "epoch": 0.4638500972246774, "grad_norm": 7.511515140533447, "learning_rate": 1.6912441430642985e-05, "loss": 1.8034, "step": 73800 }, { "epoch": 0.4639129495413745, "grad_norm": 6.393674850463867, "learning_rate": 1.6912022329698332e-05, "loss": 1.7938, "step": 73810 }, { "epoch": 0.46397580185807163, "grad_norm": 6.74291467666626, "learning_rate": 1.691160322875368e-05, "loss": 1.7669, "step": 73820 }, { "epoch": 0.46403865417476875, "grad_norm": 6.482278347015381, "learning_rate": 1.6911184127809026e-05, "loss": 1.7606, "step": 73830 }, { "epoch": 0.4641015064914658, "grad_norm": 6.72927188873291, "learning_rate": 1.691076502686437e-05, "loss": 1.4387, "step": 73840 }, { "epoch": 0.4641643588081629, "grad_norm": 7.548161506652832, "learning_rate": 1.6910345925919717e-05, "loss": 1.5602, "step": 73850 }, { "epoch": 0.46422721112486004, "grad_norm": 6.613569736480713, "learning_rate": 1.6909926824975064e-05, "loss": 1.7397, "step": 73860 }, { "epoch": 0.46429006344155715, "grad_norm": 6.6580586433410645, "learning_rate": 1.690950772403041e-05, "loss": 1.7557, "step": 73870 }, { "epoch": 0.46435291575825427, "grad_norm": 7.397098064422607, "learning_rate": 1.6909088623085758e-05, "loss": 1.6191, "step": 73880 }, { "epoch": 0.4644157680749514, "grad_norm": 6.1506829261779785, "learning_rate": 1.6908669522141102e-05, "loss": 1.5727, "step": 73890 }, { "epoch": 0.4644786203916485, "grad_norm": 7.352633476257324, "learning_rate": 1.690825042119645e-05, "loss": 1.8109, "step": 73900 }, { "epoch": 0.4645414727083456, "grad_norm": 6.859440803527832, "learning_rate": 1.6907831320251796e-05, "loss": 1.6813, "step": 73910 }, { "epoch": 0.46460432502504273, "grad_norm": 7.155480861663818, "learning_rate": 1.6907412219307143e-05, "loss": 1.9739, "step": 73920 }, { "epoch": 0.46466717734173985, "grad_norm": 7.0241546630859375, "learning_rate": 1.690699311836249e-05, "loss": 1.9009, "step": 73930 }, { "epoch": 0.46473002965843696, "grad_norm": 6.739370822906494, "learning_rate": 1.6906574017417837e-05, "loss": 1.8424, "step": 73940 }, { "epoch": 0.4647928819751341, "grad_norm": 6.187098979949951, "learning_rate": 1.6906154916473184e-05, "loss": 1.5841, "step": 73950 }, { "epoch": 0.46485573429183114, "grad_norm": 6.150927543640137, "learning_rate": 1.690573581552853e-05, "loss": 1.619, "step": 73960 }, { "epoch": 0.46491858660852825, "grad_norm": 6.579783916473389, "learning_rate": 1.690531671458388e-05, "loss": 1.7277, "step": 73970 }, { "epoch": 0.46498143892522537, "grad_norm": 6.820524215698242, "learning_rate": 1.6904897613639222e-05, "loss": 1.5031, "step": 73980 }, { "epoch": 0.4650442912419225, "grad_norm": 6.518802642822266, "learning_rate": 1.690447851269457e-05, "loss": 1.6477, "step": 73990 }, { "epoch": 0.4651071435586196, "grad_norm": 7.290344715118408, "learning_rate": 1.6904059411749916e-05, "loss": 1.7982, "step": 74000 }, { "epoch": 0.4651699958753167, "grad_norm": 7.121784210205078, "learning_rate": 1.6903640310805263e-05, "loss": 1.7743, "step": 74010 }, { "epoch": 0.4652328481920138, "grad_norm": 6.875063896179199, "learning_rate": 1.6903221209860607e-05, "loss": 1.6827, "step": 74020 }, { "epoch": 0.46529570050871094, "grad_norm": 8.333395957946777, "learning_rate": 1.6902802108915954e-05, "loss": 1.6369, "step": 74030 }, { "epoch": 0.46535855282540806, "grad_norm": 6.934069633483887, "learning_rate": 1.69023830079713e-05, "loss": 1.6542, "step": 74040 }, { "epoch": 0.46542140514210517, "grad_norm": 7.221209526062012, "learning_rate": 1.6901963907026648e-05, "loss": 1.8351, "step": 74050 }, { "epoch": 0.4654842574588023, "grad_norm": 6.288447380065918, "learning_rate": 1.6901544806081995e-05, "loss": 1.4954, "step": 74060 }, { "epoch": 0.4655471097754994, "grad_norm": 6.521042823791504, "learning_rate": 1.690112570513734e-05, "loss": 1.6492, "step": 74070 }, { "epoch": 0.4656099620921965, "grad_norm": 8.660316467285156, "learning_rate": 1.6900706604192686e-05, "loss": 1.6829, "step": 74080 }, { "epoch": 0.4656728144088936, "grad_norm": 7.834723472595215, "learning_rate": 1.6900287503248033e-05, "loss": 1.6765, "step": 74090 }, { "epoch": 0.4657356667255907, "grad_norm": 6.806433200836182, "learning_rate": 1.689986840230338e-05, "loss": 1.5411, "step": 74100 }, { "epoch": 0.4657985190422878, "grad_norm": 7.692269802093506, "learning_rate": 1.6899449301358727e-05, "loss": 1.7579, "step": 74110 }, { "epoch": 0.4658613713589849, "grad_norm": 8.500774383544922, "learning_rate": 1.6899030200414074e-05, "loss": 1.8493, "step": 74120 }, { "epoch": 0.46592422367568204, "grad_norm": 8.580452919006348, "learning_rate": 1.6898611099469418e-05, "loss": 1.8775, "step": 74130 }, { "epoch": 0.46598707599237915, "grad_norm": 6.9834113121032715, "learning_rate": 1.6898191998524765e-05, "loss": 1.5928, "step": 74140 }, { "epoch": 0.46604992830907627, "grad_norm": 6.779479026794434, "learning_rate": 1.6897772897580112e-05, "loss": 1.7107, "step": 74150 }, { "epoch": 0.4661127806257734, "grad_norm": 7.981831073760986, "learning_rate": 1.689735379663546e-05, "loss": 1.7173, "step": 74160 }, { "epoch": 0.4661756329424705, "grad_norm": 7.5072245597839355, "learning_rate": 1.6896934695690806e-05, "loss": 1.8305, "step": 74170 }, { "epoch": 0.4662384852591676, "grad_norm": 7.0078535079956055, "learning_rate": 1.6896515594746153e-05, "loss": 1.7212, "step": 74180 }, { "epoch": 0.4663013375758647, "grad_norm": 6.888160705566406, "learning_rate": 1.68960964938015e-05, "loss": 1.6908, "step": 74190 }, { "epoch": 0.46636418989256184, "grad_norm": 6.368884086608887, "learning_rate": 1.6895677392856844e-05, "loss": 1.6535, "step": 74200 }, { "epoch": 0.46642704220925896, "grad_norm": 6.441951751708984, "learning_rate": 1.689525829191219e-05, "loss": 1.8026, "step": 74210 }, { "epoch": 0.466489894525956, "grad_norm": 7.454028606414795, "learning_rate": 1.6894839190967538e-05, "loss": 1.5824, "step": 74220 }, { "epoch": 0.46655274684265313, "grad_norm": 6.690650939941406, "learning_rate": 1.6894420090022885e-05, "loss": 1.6732, "step": 74230 }, { "epoch": 0.46661559915935025, "grad_norm": 5.540317535400391, "learning_rate": 1.689400098907823e-05, "loss": 1.4798, "step": 74240 }, { "epoch": 0.46667845147604736, "grad_norm": 7.390773773193359, "learning_rate": 1.6893581888133576e-05, "loss": 1.7011, "step": 74250 }, { "epoch": 0.4667413037927445, "grad_norm": 5.674918174743652, "learning_rate": 1.6893162787188923e-05, "loss": 1.897, "step": 74260 }, { "epoch": 0.4668041561094416, "grad_norm": 6.368553161621094, "learning_rate": 1.689274368624427e-05, "loss": 1.687, "step": 74270 }, { "epoch": 0.4668670084261387, "grad_norm": 6.636124610900879, "learning_rate": 1.6892324585299617e-05, "loss": 1.8943, "step": 74280 }, { "epoch": 0.4669298607428358, "grad_norm": 6.706605434417725, "learning_rate": 1.689190548435496e-05, "loss": 1.6624, "step": 74290 }, { "epoch": 0.46699271305953294, "grad_norm": 7.414637565612793, "learning_rate": 1.6891486383410308e-05, "loss": 1.8333, "step": 74300 }, { "epoch": 0.46705556537623005, "grad_norm": 5.616257190704346, "learning_rate": 1.6891067282465655e-05, "loss": 1.7722, "step": 74310 }, { "epoch": 0.46711841769292717, "grad_norm": 6.927336692810059, "learning_rate": 1.6890648181521002e-05, "loss": 1.5495, "step": 74320 }, { "epoch": 0.4671812700096243, "grad_norm": 6.845500469207764, "learning_rate": 1.689022908057635e-05, "loss": 1.6143, "step": 74330 }, { "epoch": 0.4672441223263214, "grad_norm": 7.378794193267822, "learning_rate": 1.6889809979631696e-05, "loss": 1.6623, "step": 74340 }, { "epoch": 0.46730697464301846, "grad_norm": 6.336019515991211, "learning_rate": 1.6889390878687043e-05, "loss": 1.7102, "step": 74350 }, { "epoch": 0.4673698269597156, "grad_norm": 6.820666313171387, "learning_rate": 1.688897177774239e-05, "loss": 1.8539, "step": 74360 }, { "epoch": 0.4674326792764127, "grad_norm": 5.6884870529174805, "learning_rate": 1.6888552676797738e-05, "loss": 1.6753, "step": 74370 }, { "epoch": 0.4674955315931098, "grad_norm": 7.648561477661133, "learning_rate": 1.688813357585308e-05, "loss": 1.6738, "step": 74380 }, { "epoch": 0.4675583839098069, "grad_norm": 6.930350303649902, "learning_rate": 1.688771447490843e-05, "loss": 1.7851, "step": 74390 }, { "epoch": 0.46762123622650403, "grad_norm": 7.078684329986572, "learning_rate": 1.6887295373963775e-05, "loss": 1.558, "step": 74400 }, { "epoch": 0.46768408854320115, "grad_norm": 8.631214141845703, "learning_rate": 1.6886876273019122e-05, "loss": 1.8714, "step": 74410 }, { "epoch": 0.46774694085989826, "grad_norm": 7.361252307891846, "learning_rate": 1.6886457172074466e-05, "loss": 1.6751, "step": 74420 }, { "epoch": 0.4678097931765954, "grad_norm": 5.998310089111328, "learning_rate": 1.6886038071129813e-05, "loss": 1.5445, "step": 74430 }, { "epoch": 0.4678726454932925, "grad_norm": 6.633802890777588, "learning_rate": 1.688561897018516e-05, "loss": 1.5037, "step": 74440 }, { "epoch": 0.4679354978099896, "grad_norm": 7.296174049377441, "learning_rate": 1.6885199869240507e-05, "loss": 1.7964, "step": 74450 }, { "epoch": 0.4679983501266867, "grad_norm": 7.240650653839111, "learning_rate": 1.6884780768295854e-05, "loss": 1.8011, "step": 74460 }, { "epoch": 0.46806120244338384, "grad_norm": 6.495932102203369, "learning_rate": 1.6884361667351198e-05, "loss": 1.7533, "step": 74470 }, { "epoch": 0.4681240547600809, "grad_norm": 7.80958366394043, "learning_rate": 1.6883942566406545e-05, "loss": 2.043, "step": 74480 }, { "epoch": 0.468186907076778, "grad_norm": 6.533959865570068, "learning_rate": 1.6883523465461892e-05, "loss": 1.6634, "step": 74490 }, { "epoch": 0.46824975939347513, "grad_norm": 6.347193241119385, "learning_rate": 1.688310436451724e-05, "loss": 1.4688, "step": 74500 }, { "epoch": 0.46831261171017224, "grad_norm": 6.545567512512207, "learning_rate": 1.6882685263572583e-05, "loss": 1.7698, "step": 74510 }, { "epoch": 0.46837546402686936, "grad_norm": 6.875192165374756, "learning_rate": 1.688226616262793e-05, "loss": 1.7476, "step": 74520 }, { "epoch": 0.4684383163435665, "grad_norm": 6.761386871337891, "learning_rate": 1.6881847061683277e-05, "loss": 1.6175, "step": 74530 }, { "epoch": 0.4685011686602636, "grad_norm": 7.0894365310668945, "learning_rate": 1.6881427960738624e-05, "loss": 1.7662, "step": 74540 }, { "epoch": 0.4685640209769607, "grad_norm": 6.652477264404297, "learning_rate": 1.688100885979397e-05, "loss": 1.6835, "step": 74550 }, { "epoch": 0.4686268732936578, "grad_norm": 6.749616622924805, "learning_rate": 1.688058975884932e-05, "loss": 1.7092, "step": 74560 }, { "epoch": 0.46868972561035493, "grad_norm": 6.41374397277832, "learning_rate": 1.6880170657904665e-05, "loss": 1.7461, "step": 74570 }, { "epoch": 0.46875257792705205, "grad_norm": 6.185507774353027, "learning_rate": 1.6879751556960013e-05, "loss": 1.5894, "step": 74580 }, { "epoch": 0.46881543024374916, "grad_norm": 6.641927242279053, "learning_rate": 1.687933245601536e-05, "loss": 1.7548, "step": 74590 }, { "epoch": 0.4688782825604462, "grad_norm": 7.611270904541016, "learning_rate": 1.6878913355070703e-05, "loss": 2.065, "step": 74600 }, { "epoch": 0.46894113487714334, "grad_norm": 6.557180404663086, "learning_rate": 1.687849425412605e-05, "loss": 1.568, "step": 74610 }, { "epoch": 0.46900398719384045, "grad_norm": 7.316890239715576, "learning_rate": 1.6878075153181397e-05, "loss": 1.592, "step": 74620 }, { "epoch": 0.46906683951053757, "grad_norm": 7.576214790344238, "learning_rate": 1.6877656052236744e-05, "loss": 1.7492, "step": 74630 }, { "epoch": 0.4691296918272347, "grad_norm": 6.319741725921631, "learning_rate": 1.6877236951292088e-05, "loss": 1.6616, "step": 74640 }, { "epoch": 0.4691925441439318, "grad_norm": 7.309403419494629, "learning_rate": 1.6876817850347435e-05, "loss": 1.7179, "step": 74650 }, { "epoch": 0.4692553964606289, "grad_norm": 8.033734321594238, "learning_rate": 1.6876398749402782e-05, "loss": 1.9204, "step": 74660 }, { "epoch": 0.46931824877732603, "grad_norm": 6.8274054527282715, "learning_rate": 1.687597964845813e-05, "loss": 1.702, "step": 74670 }, { "epoch": 0.46938110109402315, "grad_norm": 7.155275821685791, "learning_rate": 1.6875560547513476e-05, "loss": 1.7586, "step": 74680 }, { "epoch": 0.46944395341072026, "grad_norm": 5.7840471267700195, "learning_rate": 1.687514144656882e-05, "loss": 1.8591, "step": 74690 }, { "epoch": 0.4695068057274174, "grad_norm": 7.553875923156738, "learning_rate": 1.6874722345624167e-05, "loss": 1.6966, "step": 74700 }, { "epoch": 0.4695696580441145, "grad_norm": 6.658247947692871, "learning_rate": 1.6874303244679514e-05, "loss": 1.8337, "step": 74710 }, { "epoch": 0.4696325103608116, "grad_norm": 6.50600004196167, "learning_rate": 1.687388414373486e-05, "loss": 1.4966, "step": 74720 }, { "epoch": 0.46969536267750867, "grad_norm": 6.239086151123047, "learning_rate": 1.687346504279021e-05, "loss": 1.6827, "step": 74730 }, { "epoch": 0.4697582149942058, "grad_norm": 6.735509395599365, "learning_rate": 1.6873045941845555e-05, "loss": 1.6745, "step": 74740 }, { "epoch": 0.4698210673109029, "grad_norm": 7.19131326675415, "learning_rate": 1.6872626840900903e-05, "loss": 1.7236, "step": 74750 }, { "epoch": 0.4698839196276, "grad_norm": 6.133017539978027, "learning_rate": 1.6872207739956246e-05, "loss": 1.4435, "step": 74760 }, { "epoch": 0.4699467719442971, "grad_norm": 6.491494178771973, "learning_rate": 1.6871788639011593e-05, "loss": 1.7601, "step": 74770 }, { "epoch": 0.47000962426099424, "grad_norm": 7.579554080963135, "learning_rate": 1.687136953806694e-05, "loss": 1.7644, "step": 74780 }, { "epoch": 0.47007247657769136, "grad_norm": 7.176877021789551, "learning_rate": 1.6870950437122287e-05, "loss": 1.7589, "step": 74790 }, { "epoch": 0.47013532889438847, "grad_norm": 7.731554985046387, "learning_rate": 1.6870531336177635e-05, "loss": 1.8631, "step": 74800 }, { "epoch": 0.4701981812110856, "grad_norm": 6.453170299530029, "learning_rate": 1.687011223523298e-05, "loss": 1.8514, "step": 74810 }, { "epoch": 0.4702610335277827, "grad_norm": 8.215258598327637, "learning_rate": 1.6869693134288325e-05, "loss": 1.9341, "step": 74820 }, { "epoch": 0.4703238858444798, "grad_norm": 6.7729105949401855, "learning_rate": 1.6869274033343672e-05, "loss": 1.6897, "step": 74830 }, { "epoch": 0.47038673816117693, "grad_norm": 6.5331621170043945, "learning_rate": 1.686885493239902e-05, "loss": 1.6208, "step": 74840 }, { "epoch": 0.47044959047787405, "grad_norm": 7.255948543548584, "learning_rate": 1.6868435831454366e-05, "loss": 1.7441, "step": 74850 }, { "epoch": 0.4705124427945711, "grad_norm": 6.3399553298950195, "learning_rate": 1.686801673050971e-05, "loss": 1.786, "step": 74860 }, { "epoch": 0.4705752951112682, "grad_norm": 6.474696636199951, "learning_rate": 1.6867597629565057e-05, "loss": 1.7946, "step": 74870 }, { "epoch": 0.47063814742796534, "grad_norm": 7.178191661834717, "learning_rate": 1.6867178528620404e-05, "loss": 1.8623, "step": 74880 }, { "epoch": 0.47070099974466245, "grad_norm": 6.79795503616333, "learning_rate": 1.686675942767575e-05, "loss": 1.8388, "step": 74890 }, { "epoch": 0.47076385206135957, "grad_norm": 6.400870323181152, "learning_rate": 1.68663403267311e-05, "loss": 1.5224, "step": 74900 }, { "epoch": 0.4708267043780567, "grad_norm": 6.47348165512085, "learning_rate": 1.6865921225786442e-05, "loss": 1.5566, "step": 74910 }, { "epoch": 0.4708895566947538, "grad_norm": 6.684679985046387, "learning_rate": 1.686550212484179e-05, "loss": 1.6205, "step": 74920 }, { "epoch": 0.4709524090114509, "grad_norm": 7.984795093536377, "learning_rate": 1.6865083023897136e-05, "loss": 1.5545, "step": 74930 }, { "epoch": 0.471015261328148, "grad_norm": 7.289670467376709, "learning_rate": 1.6864663922952483e-05, "loss": 1.8546, "step": 74940 }, { "epoch": 0.47107811364484514, "grad_norm": 6.218448638916016, "learning_rate": 1.686424482200783e-05, "loss": 1.7009, "step": 74950 }, { "epoch": 0.47114096596154226, "grad_norm": 6.013876438140869, "learning_rate": 1.6863825721063177e-05, "loss": 1.669, "step": 74960 }, { "epoch": 0.4712038182782394, "grad_norm": 6.878289699554443, "learning_rate": 1.6863406620118525e-05, "loss": 1.7634, "step": 74970 }, { "epoch": 0.4712666705949365, "grad_norm": 6.458973407745361, "learning_rate": 1.686298751917387e-05, "loss": 1.7187, "step": 74980 }, { "epoch": 0.47132952291163355, "grad_norm": 6.173418998718262, "learning_rate": 1.686256841822922e-05, "loss": 1.9044, "step": 74990 }, { "epoch": 0.47139237522833066, "grad_norm": 6.662987232208252, "learning_rate": 1.6862149317284562e-05, "loss": 1.6913, "step": 75000 }, { "epoch": 0.4714552275450278, "grad_norm": 7.989409923553467, "learning_rate": 1.686173021633991e-05, "loss": 1.7866, "step": 75010 }, { "epoch": 0.4715180798617249, "grad_norm": 6.370418548583984, "learning_rate": 1.6861311115395257e-05, "loss": 1.6304, "step": 75020 }, { "epoch": 0.471580932178422, "grad_norm": 6.886153221130371, "learning_rate": 1.6860892014450604e-05, "loss": 1.7567, "step": 75030 }, { "epoch": 0.4716437844951191, "grad_norm": 6.359077453613281, "learning_rate": 1.6860472913505947e-05, "loss": 1.9381, "step": 75040 }, { "epoch": 0.47170663681181624, "grad_norm": 6.715882778167725, "learning_rate": 1.6860053812561294e-05, "loss": 1.8876, "step": 75050 }, { "epoch": 0.47176948912851335, "grad_norm": 6.847312927246094, "learning_rate": 1.685963471161664e-05, "loss": 1.6414, "step": 75060 }, { "epoch": 0.47183234144521047, "grad_norm": 5.654614448547363, "learning_rate": 1.685921561067199e-05, "loss": 1.9713, "step": 75070 }, { "epoch": 0.4718951937619076, "grad_norm": 6.608804702758789, "learning_rate": 1.6858796509727336e-05, "loss": 1.6387, "step": 75080 }, { "epoch": 0.4719580460786047, "grad_norm": 7.266916275024414, "learning_rate": 1.685837740878268e-05, "loss": 1.8478, "step": 75090 }, { "epoch": 0.4720208983953018, "grad_norm": 7.289012908935547, "learning_rate": 1.6857958307838026e-05, "loss": 1.8796, "step": 75100 }, { "epoch": 0.4720837507119989, "grad_norm": 6.832249641418457, "learning_rate": 1.6857539206893373e-05, "loss": 1.7268, "step": 75110 }, { "epoch": 0.472146603028696, "grad_norm": 7.332949161529541, "learning_rate": 1.685712010594872e-05, "loss": 1.9223, "step": 75120 }, { "epoch": 0.4722094553453931, "grad_norm": 6.668570041656494, "learning_rate": 1.6856701005004068e-05, "loss": 1.7615, "step": 75130 }, { "epoch": 0.4722723076620902, "grad_norm": 6.870800495147705, "learning_rate": 1.685628190405941e-05, "loss": 1.5939, "step": 75140 }, { "epoch": 0.47233515997878733, "grad_norm": 6.370723247528076, "learning_rate": 1.6855862803114758e-05, "loss": 1.7346, "step": 75150 }, { "epoch": 0.47239801229548445, "grad_norm": 7.461625576019287, "learning_rate": 1.6855443702170105e-05, "loss": 1.638, "step": 75160 }, { "epoch": 0.47246086461218156, "grad_norm": 7.121856689453125, "learning_rate": 1.6855024601225452e-05, "loss": 1.7512, "step": 75170 }, { "epoch": 0.4725237169288787, "grad_norm": 7.248109340667725, "learning_rate": 1.68546055002808e-05, "loss": 1.6313, "step": 75180 }, { "epoch": 0.4725865692455758, "grad_norm": 6.1002397537231445, "learning_rate": 1.6854186399336147e-05, "loss": 1.6249, "step": 75190 }, { "epoch": 0.4726494215622729, "grad_norm": 6.94600248336792, "learning_rate": 1.6853767298391494e-05, "loss": 1.8185, "step": 75200 }, { "epoch": 0.47271227387897, "grad_norm": 6.894454479217529, "learning_rate": 1.685334819744684e-05, "loss": 1.899, "step": 75210 }, { "epoch": 0.47277512619566714, "grad_norm": 5.67385196685791, "learning_rate": 1.6852929096502184e-05, "loss": 1.6894, "step": 75220 }, { "epoch": 0.47283797851236425, "grad_norm": 7.093928337097168, "learning_rate": 1.685250999555753e-05, "loss": 1.5867, "step": 75230 }, { "epoch": 0.4729008308290613, "grad_norm": 7.607110500335693, "learning_rate": 1.685209089461288e-05, "loss": 1.6814, "step": 75240 }, { "epoch": 0.47296368314575843, "grad_norm": 7.096514701843262, "learning_rate": 1.6851671793668226e-05, "loss": 1.728, "step": 75250 }, { "epoch": 0.47302653546245554, "grad_norm": 7.261518955230713, "learning_rate": 1.685125269272357e-05, "loss": 1.7906, "step": 75260 }, { "epoch": 0.47308938777915266, "grad_norm": 6.688602447509766, "learning_rate": 1.6850833591778916e-05, "loss": 1.5978, "step": 75270 }, { "epoch": 0.4731522400958498, "grad_norm": 6.605575084686279, "learning_rate": 1.6850414490834263e-05, "loss": 1.8219, "step": 75280 }, { "epoch": 0.4732150924125469, "grad_norm": 7.412301540374756, "learning_rate": 1.684999538988961e-05, "loss": 1.8662, "step": 75290 }, { "epoch": 0.473277944729244, "grad_norm": 6.660956859588623, "learning_rate": 1.6849576288944958e-05, "loss": 1.6767, "step": 75300 }, { "epoch": 0.4733407970459411, "grad_norm": 6.37428092956543, "learning_rate": 1.68491571880003e-05, "loss": 1.7547, "step": 75310 }, { "epoch": 0.47340364936263823, "grad_norm": 5.873388290405273, "learning_rate": 1.6848738087055648e-05, "loss": 1.5674, "step": 75320 }, { "epoch": 0.47346650167933535, "grad_norm": 7.413193702697754, "learning_rate": 1.6848318986110995e-05, "loss": 1.5481, "step": 75330 }, { "epoch": 0.47352935399603246, "grad_norm": 4.985287666320801, "learning_rate": 1.6847899885166342e-05, "loss": 1.6463, "step": 75340 }, { "epoch": 0.4735922063127296, "grad_norm": 7.10650110244751, "learning_rate": 1.684748078422169e-05, "loss": 2.0231, "step": 75350 }, { "epoch": 0.4736550586294267, "grad_norm": 6.295893669128418, "learning_rate": 1.6847061683277037e-05, "loss": 1.6833, "step": 75360 }, { "epoch": 0.47371791094612375, "grad_norm": 6.590880870819092, "learning_rate": 1.6846642582332384e-05, "loss": 1.5646, "step": 75370 }, { "epoch": 0.47378076326282087, "grad_norm": 6.749689102172852, "learning_rate": 1.6846223481387727e-05, "loss": 1.7946, "step": 75380 }, { "epoch": 0.473843615579518, "grad_norm": 5.869183540344238, "learning_rate": 1.6845804380443074e-05, "loss": 1.6161, "step": 75390 }, { "epoch": 0.4739064678962151, "grad_norm": 6.468136787414551, "learning_rate": 1.684538527949842e-05, "loss": 1.6574, "step": 75400 }, { "epoch": 0.4739693202129122, "grad_norm": 6.457572937011719, "learning_rate": 1.684496617855377e-05, "loss": 1.5288, "step": 75410 }, { "epoch": 0.47403217252960933, "grad_norm": 7.331090450286865, "learning_rate": 1.6844547077609116e-05, "loss": 1.6546, "step": 75420 }, { "epoch": 0.47409502484630645, "grad_norm": 6.027551174163818, "learning_rate": 1.6844127976664463e-05, "loss": 1.609, "step": 75430 }, { "epoch": 0.47415787716300356, "grad_norm": 6.717035293579102, "learning_rate": 1.6843708875719806e-05, "loss": 1.6238, "step": 75440 }, { "epoch": 0.4742207294797007, "grad_norm": 7.718981742858887, "learning_rate": 1.6843289774775153e-05, "loss": 1.6955, "step": 75450 }, { "epoch": 0.4742835817963978, "grad_norm": 5.82558012008667, "learning_rate": 1.68428706738305e-05, "loss": 1.5148, "step": 75460 }, { "epoch": 0.4743464341130949, "grad_norm": 6.919063091278076, "learning_rate": 1.6842451572885848e-05, "loss": 1.6523, "step": 75470 }, { "epoch": 0.474409286429792, "grad_norm": 6.56036376953125, "learning_rate": 1.684203247194119e-05, "loss": 1.6036, "step": 75480 }, { "epoch": 0.47447213874648914, "grad_norm": 7.16939640045166, "learning_rate": 1.684161337099654e-05, "loss": 1.9083, "step": 75490 }, { "epoch": 0.4745349910631862, "grad_norm": 6.926846027374268, "learning_rate": 1.6841194270051885e-05, "loss": 1.7733, "step": 75500 }, { "epoch": 0.4745978433798833, "grad_norm": 6.7428741455078125, "learning_rate": 1.6840775169107232e-05, "loss": 1.6344, "step": 75510 }, { "epoch": 0.4746606956965804, "grad_norm": 6.791428089141846, "learning_rate": 1.684035606816258e-05, "loss": 1.697, "step": 75520 }, { "epoch": 0.47472354801327754, "grad_norm": 6.997214317321777, "learning_rate": 1.6839936967217923e-05, "loss": 1.8743, "step": 75530 }, { "epoch": 0.47478640032997466, "grad_norm": 8.502320289611816, "learning_rate": 1.683951786627327e-05, "loss": 1.7792, "step": 75540 }, { "epoch": 0.47484925264667177, "grad_norm": 6.98447322845459, "learning_rate": 1.6839098765328617e-05, "loss": 1.8488, "step": 75550 }, { "epoch": 0.4749121049633689, "grad_norm": 6.466368675231934, "learning_rate": 1.6838679664383964e-05, "loss": 1.7191, "step": 75560 }, { "epoch": 0.474974957280066, "grad_norm": 6.1501383781433105, "learning_rate": 1.683826056343931e-05, "loss": 1.8504, "step": 75570 }, { "epoch": 0.4750378095967631, "grad_norm": 8.074056625366211, "learning_rate": 1.683784146249466e-05, "loss": 1.7625, "step": 75580 }, { "epoch": 0.47510066191346023, "grad_norm": 6.821253776550293, "learning_rate": 1.683746427164447e-05, "loss": 1.598, "step": 75590 }, { "epoch": 0.47516351423015735, "grad_norm": 6.949539661407471, "learning_rate": 1.6837045170699814e-05, "loss": 1.6408, "step": 75600 }, { "epoch": 0.47522636654685446, "grad_norm": 6.667018890380859, "learning_rate": 1.683662606975516e-05, "loss": 1.7095, "step": 75610 }, { "epoch": 0.4752892188635515, "grad_norm": 6.688776969909668, "learning_rate": 1.6836206968810508e-05, "loss": 2.0056, "step": 75620 }, { "epoch": 0.47535207118024864, "grad_norm": 7.515380382537842, "learning_rate": 1.6835787867865855e-05, "loss": 1.4865, "step": 75630 }, { "epoch": 0.47541492349694575, "grad_norm": 6.006285667419434, "learning_rate": 1.6835368766921202e-05, "loss": 1.7319, "step": 75640 }, { "epoch": 0.47547777581364287, "grad_norm": 8.443488121032715, "learning_rate": 1.683494966597655e-05, "loss": 1.6396, "step": 75650 }, { "epoch": 0.47554062813034, "grad_norm": 8.113880157470703, "learning_rate": 1.6834530565031896e-05, "loss": 1.5713, "step": 75660 }, { "epoch": 0.4756034804470371, "grad_norm": 5.771048069000244, "learning_rate": 1.6834111464087243e-05, "loss": 1.7873, "step": 75670 }, { "epoch": 0.4756663327637342, "grad_norm": 7.612918376922607, "learning_rate": 1.683369236314259e-05, "loss": 1.5751, "step": 75680 }, { "epoch": 0.4757291850804313, "grad_norm": 7.876788139343262, "learning_rate": 1.6833273262197934e-05, "loss": 1.8266, "step": 75690 }, { "epoch": 0.47579203739712844, "grad_norm": 6.927786827087402, "learning_rate": 1.683285416125328e-05, "loss": 2.0776, "step": 75700 }, { "epoch": 0.47585488971382556, "grad_norm": 6.485779285430908, "learning_rate": 1.6832435060308628e-05, "loss": 1.7823, "step": 75710 }, { "epoch": 0.4759177420305227, "grad_norm": 6.72318696975708, "learning_rate": 1.6832015959363975e-05, "loss": 1.9249, "step": 75720 }, { "epoch": 0.4759805943472198, "grad_norm": 6.445530414581299, "learning_rate": 1.6831596858419322e-05, "loss": 1.7381, "step": 75730 }, { "epoch": 0.4760434466639169, "grad_norm": 6.378854274749756, "learning_rate": 1.6831177757474666e-05, "loss": 1.7287, "step": 75740 }, { "epoch": 0.47610629898061396, "grad_norm": 6.627330303192139, "learning_rate": 1.6830758656530013e-05, "loss": 1.6475, "step": 75750 }, { "epoch": 0.4761691512973111, "grad_norm": 6.085504531860352, "learning_rate": 1.683033955558536e-05, "loss": 1.6755, "step": 75760 }, { "epoch": 0.4762320036140082, "grad_norm": 6.829914093017578, "learning_rate": 1.6829920454640707e-05, "loss": 1.7258, "step": 75770 }, { "epoch": 0.4762948559307053, "grad_norm": 6.772364616394043, "learning_rate": 1.682950135369605e-05, "loss": 1.504, "step": 75780 }, { "epoch": 0.4763577082474024, "grad_norm": 5.8424577713012695, "learning_rate": 1.6829082252751398e-05, "loss": 1.4472, "step": 75790 }, { "epoch": 0.47642056056409954, "grad_norm": 6.815469264984131, "learning_rate": 1.6828663151806745e-05, "loss": 1.6529, "step": 75800 }, { "epoch": 0.47648341288079665, "grad_norm": 7.46605110168457, "learning_rate": 1.6828244050862092e-05, "loss": 1.8595, "step": 75810 }, { "epoch": 0.47654626519749377, "grad_norm": 6.85030460357666, "learning_rate": 1.682782494991744e-05, "loss": 1.8455, "step": 75820 }, { "epoch": 0.4766091175141909, "grad_norm": 6.598263740539551, "learning_rate": 1.6827405848972783e-05, "loss": 1.471, "step": 75830 }, { "epoch": 0.476671969830888, "grad_norm": 6.376602649688721, "learning_rate": 1.682698674802813e-05, "loss": 1.5548, "step": 75840 }, { "epoch": 0.4767348221475851, "grad_norm": 5.831735134124756, "learning_rate": 1.6826567647083477e-05, "loss": 1.9294, "step": 75850 }, { "epoch": 0.47679767446428223, "grad_norm": 7.579250335693359, "learning_rate": 1.6826148546138824e-05, "loss": 1.7143, "step": 75860 }, { "epoch": 0.47686052678097934, "grad_norm": 6.94968318939209, "learning_rate": 1.682572944519417e-05, "loss": 1.694, "step": 75870 }, { "epoch": 0.4769233790976764, "grad_norm": 6.045395851135254, "learning_rate": 1.6825310344249518e-05, "loss": 1.7431, "step": 75880 }, { "epoch": 0.4769862314143735, "grad_norm": 15.732362747192383, "learning_rate": 1.6824891243304865e-05, "loss": 1.7184, "step": 75890 }, { "epoch": 0.47704908373107063, "grad_norm": 6.817939758300781, "learning_rate": 1.6824472142360212e-05, "loss": 1.6804, "step": 75900 }, { "epoch": 0.47711193604776775, "grad_norm": 5.561984062194824, "learning_rate": 1.6824053041415556e-05, "loss": 1.7968, "step": 75910 }, { "epoch": 0.47717478836446486, "grad_norm": 7.767377853393555, "learning_rate": 1.6823633940470903e-05, "loss": 2.0454, "step": 75920 }, { "epoch": 0.477237640681162, "grad_norm": 6.020562648773193, "learning_rate": 1.682321483952625e-05, "loss": 1.7706, "step": 75930 }, { "epoch": 0.4773004929978591, "grad_norm": 7.518442153930664, "learning_rate": 1.6822795738581597e-05, "loss": 1.729, "step": 75940 }, { "epoch": 0.4773633453145562, "grad_norm": 7.066908836364746, "learning_rate": 1.6822376637636944e-05, "loss": 1.8053, "step": 75950 }, { "epoch": 0.4774261976312533, "grad_norm": 7.802619934082031, "learning_rate": 1.6821957536692288e-05, "loss": 1.6715, "step": 75960 }, { "epoch": 0.47748904994795044, "grad_norm": 8.189960479736328, "learning_rate": 1.6821538435747635e-05, "loss": 1.8093, "step": 75970 }, { "epoch": 0.47755190226464755, "grad_norm": 5.803287982940674, "learning_rate": 1.6821119334802982e-05, "loss": 1.5923, "step": 75980 }, { "epoch": 0.47761475458134467, "grad_norm": 6.04809045791626, "learning_rate": 1.682070023385833e-05, "loss": 1.4471, "step": 75990 }, { "epoch": 0.4776776068980418, "grad_norm": 7.733794689178467, "learning_rate": 1.6820281132913673e-05, "loss": 1.7247, "step": 76000 }, { "epoch": 0.47774045921473884, "grad_norm": 6.097110271453857, "learning_rate": 1.681986203196902e-05, "loss": 1.4478, "step": 76010 }, { "epoch": 0.47780331153143596, "grad_norm": 7.205755233764648, "learning_rate": 1.6819442931024367e-05, "loss": 1.8043, "step": 76020 }, { "epoch": 0.4778661638481331, "grad_norm": 6.37203311920166, "learning_rate": 1.6819023830079714e-05, "loss": 1.6534, "step": 76030 }, { "epoch": 0.4779290161648302, "grad_norm": 6.525231838226318, "learning_rate": 1.681860472913506e-05, "loss": 1.6822, "step": 76040 }, { "epoch": 0.4779918684815273, "grad_norm": 6.487207889556885, "learning_rate": 1.6818185628190408e-05, "loss": 1.5818, "step": 76050 }, { "epoch": 0.4780547207982244, "grad_norm": 6.249277114868164, "learning_rate": 1.6817766527245755e-05, "loss": 1.4675, "step": 76060 }, { "epoch": 0.47811757311492153, "grad_norm": 7.021834373474121, "learning_rate": 1.68173474263011e-05, "loss": 1.866, "step": 76070 }, { "epoch": 0.47818042543161865, "grad_norm": 7.881227493286133, "learning_rate": 1.6816928325356446e-05, "loss": 1.6331, "step": 76080 }, { "epoch": 0.47824327774831576, "grad_norm": 6.758789539337158, "learning_rate": 1.6816509224411793e-05, "loss": 1.9898, "step": 76090 }, { "epoch": 0.4783061300650129, "grad_norm": 6.642223834991455, "learning_rate": 1.681609012346714e-05, "loss": 1.7728, "step": 76100 }, { "epoch": 0.47836898238171, "grad_norm": 6.30720329284668, "learning_rate": 1.6815671022522487e-05, "loss": 1.4039, "step": 76110 }, { "epoch": 0.4784318346984071, "grad_norm": 6.062718391418457, "learning_rate": 1.6815251921577834e-05, "loss": 1.5843, "step": 76120 }, { "epoch": 0.47849468701510417, "grad_norm": 6.683797359466553, "learning_rate": 1.681483282063318e-05, "loss": 1.6862, "step": 76130 }, { "epoch": 0.4785575393318013, "grad_norm": 6.161579608917236, "learning_rate": 1.6814413719688525e-05, "loss": 1.6474, "step": 76140 }, { "epoch": 0.4786203916484984, "grad_norm": 7.291128635406494, "learning_rate": 1.6813994618743872e-05, "loss": 1.7735, "step": 76150 }, { "epoch": 0.4786832439651955, "grad_norm": 5.702531814575195, "learning_rate": 1.681357551779922e-05, "loss": 1.7341, "step": 76160 }, { "epoch": 0.47874609628189263, "grad_norm": 6.238299369812012, "learning_rate": 1.6813156416854566e-05, "loss": 1.6386, "step": 76170 }, { "epoch": 0.47880894859858975, "grad_norm": 6.214787483215332, "learning_rate": 1.681273731590991e-05, "loss": 1.5607, "step": 76180 }, { "epoch": 0.47887180091528686, "grad_norm": 8.346977233886719, "learning_rate": 1.6812318214965257e-05, "loss": 1.5856, "step": 76190 }, { "epoch": 0.478934653231984, "grad_norm": 7.1682281494140625, "learning_rate": 1.6811899114020604e-05, "loss": 1.9296, "step": 76200 }, { "epoch": 0.4789975055486811, "grad_norm": 7.234790325164795, "learning_rate": 1.681148001307595e-05, "loss": 1.7518, "step": 76210 }, { "epoch": 0.4790603578653782, "grad_norm": 6.4557671546936035, "learning_rate": 1.6811060912131298e-05, "loss": 1.6877, "step": 76220 }, { "epoch": 0.4791232101820753, "grad_norm": 7.287628173828125, "learning_rate": 1.6810641811186642e-05, "loss": 1.7578, "step": 76230 }, { "epoch": 0.47918606249877244, "grad_norm": 7.659276962280273, "learning_rate": 1.681022271024199e-05, "loss": 1.761, "step": 76240 }, { "epoch": 0.47924891481546955, "grad_norm": 6.70458984375, "learning_rate": 1.6809803609297336e-05, "loss": 1.7808, "step": 76250 }, { "epoch": 0.4793117671321666, "grad_norm": 6.866919040679932, "learning_rate": 1.6809384508352683e-05, "loss": 1.6334, "step": 76260 }, { "epoch": 0.4793746194488637, "grad_norm": 8.058916091918945, "learning_rate": 1.680896540740803e-05, "loss": 1.6893, "step": 76270 }, { "epoch": 0.47943747176556084, "grad_norm": 7.43437385559082, "learning_rate": 1.6808546306463377e-05, "loss": 1.6279, "step": 76280 }, { "epoch": 0.47950032408225796, "grad_norm": 6.599917888641357, "learning_rate": 1.6808127205518724e-05, "loss": 1.6329, "step": 76290 }, { "epoch": 0.47956317639895507, "grad_norm": 6.738684177398682, "learning_rate": 1.680770810457407e-05, "loss": 1.5999, "step": 76300 }, { "epoch": 0.4796260287156522, "grad_norm": 6.267560005187988, "learning_rate": 1.6807289003629415e-05, "loss": 1.6746, "step": 76310 }, { "epoch": 0.4796888810323493, "grad_norm": 6.663815975189209, "learning_rate": 1.6806869902684762e-05, "loss": 1.5523, "step": 76320 }, { "epoch": 0.4797517333490464, "grad_norm": 7.0863356590271, "learning_rate": 1.680645080174011e-05, "loss": 1.4604, "step": 76330 }, { "epoch": 0.47981458566574353, "grad_norm": 7.865902423858643, "learning_rate": 1.6806031700795456e-05, "loss": 1.6081, "step": 76340 }, { "epoch": 0.47987743798244065, "grad_norm": 6.287507057189941, "learning_rate": 1.6805612599850803e-05, "loss": 1.7478, "step": 76350 }, { "epoch": 0.47994029029913776, "grad_norm": 6.601611137390137, "learning_rate": 1.6805193498906147e-05, "loss": 1.67, "step": 76360 }, { "epoch": 0.4800031426158349, "grad_norm": 7.671497344970703, "learning_rate": 1.6804774397961494e-05, "loss": 1.8226, "step": 76370 }, { "epoch": 0.480065994932532, "grad_norm": 7.103155612945557, "learning_rate": 1.680435529701684e-05, "loss": 1.8356, "step": 76380 }, { "epoch": 0.48012884724922905, "grad_norm": 6.71427059173584, "learning_rate": 1.6803936196072188e-05, "loss": 1.6828, "step": 76390 }, { "epoch": 0.48019169956592617, "grad_norm": 6.308364391326904, "learning_rate": 1.6803517095127532e-05, "loss": 1.8505, "step": 76400 }, { "epoch": 0.4802545518826233, "grad_norm": 6.404458522796631, "learning_rate": 1.680309799418288e-05, "loss": 1.7192, "step": 76410 }, { "epoch": 0.4803174041993204, "grad_norm": 7.096067428588867, "learning_rate": 1.6802678893238226e-05, "loss": 1.6234, "step": 76420 }, { "epoch": 0.4803802565160175, "grad_norm": 6.121633529663086, "learning_rate": 1.6802259792293573e-05, "loss": 1.5322, "step": 76430 }, { "epoch": 0.4804431088327146, "grad_norm": 6.883768081665039, "learning_rate": 1.680184069134892e-05, "loss": 1.4982, "step": 76440 }, { "epoch": 0.48050596114941174, "grad_norm": 5.9583635330200195, "learning_rate": 1.6801421590404264e-05, "loss": 1.6888, "step": 76450 }, { "epoch": 0.48056881346610886, "grad_norm": 7.673823356628418, "learning_rate": 1.680100248945961e-05, "loss": 1.6281, "step": 76460 }, { "epoch": 0.480631665782806, "grad_norm": 7.7770586013793945, "learning_rate": 1.6800583388514958e-05, "loss": 1.8198, "step": 76470 }, { "epoch": 0.4806945180995031, "grad_norm": 6.3411173820495605, "learning_rate": 1.6800164287570305e-05, "loss": 1.6632, "step": 76480 }, { "epoch": 0.4807573704162002, "grad_norm": 6.581920623779297, "learning_rate": 1.6799745186625652e-05, "loss": 1.9932, "step": 76490 }, { "epoch": 0.4808202227328973, "grad_norm": 6.3683085441589355, "learning_rate": 1.6799326085681e-05, "loss": 1.671, "step": 76500 }, { "epoch": 0.48088307504959443, "grad_norm": 7.540045261383057, "learning_rate": 1.6798906984736346e-05, "loss": 1.817, "step": 76510 }, { "epoch": 0.4809459273662915, "grad_norm": 6.9328389167785645, "learning_rate": 1.6798487883791693e-05, "loss": 1.8116, "step": 76520 }, { "epoch": 0.4810087796829886, "grad_norm": 6.642269611358643, "learning_rate": 1.679806878284704e-05, "loss": 1.7668, "step": 76530 }, { "epoch": 0.4810716319996857, "grad_norm": 5.931235313415527, "learning_rate": 1.6797649681902384e-05, "loss": 1.5424, "step": 76540 }, { "epoch": 0.48113448431638284, "grad_norm": 6.841514587402344, "learning_rate": 1.679723058095773e-05, "loss": 1.7151, "step": 76550 }, { "epoch": 0.48119733663307995, "grad_norm": 6.333242893218994, "learning_rate": 1.6796811480013078e-05, "loss": 1.6082, "step": 76560 }, { "epoch": 0.48126018894977707, "grad_norm": 7.489376068115234, "learning_rate": 1.6796392379068425e-05, "loss": 1.7848, "step": 76570 }, { "epoch": 0.4813230412664742, "grad_norm": 7.716389179229736, "learning_rate": 1.679597327812377e-05, "loss": 1.5968, "step": 76580 }, { "epoch": 0.4813858935831713, "grad_norm": 6.728993892669678, "learning_rate": 1.6795554177179116e-05, "loss": 1.5973, "step": 76590 }, { "epoch": 0.4814487458998684, "grad_norm": 7.20167350769043, "learning_rate": 1.6795135076234463e-05, "loss": 1.7179, "step": 76600 }, { "epoch": 0.48151159821656553, "grad_norm": 7.126608371734619, "learning_rate": 1.679471597528981e-05, "loss": 1.9309, "step": 76610 }, { "epoch": 0.48157445053326264, "grad_norm": 6.837955951690674, "learning_rate": 1.6794296874345154e-05, "loss": 1.8812, "step": 76620 }, { "epoch": 0.48163730284995976, "grad_norm": 8.498090744018555, "learning_rate": 1.67938777734005e-05, "loss": 1.6451, "step": 76630 }, { "epoch": 0.4817001551666569, "grad_norm": 6.694071292877197, "learning_rate": 1.6793458672455848e-05, "loss": 1.7285, "step": 76640 }, { "epoch": 0.48176300748335393, "grad_norm": 6.439844131469727, "learning_rate": 1.6793039571511195e-05, "loss": 1.7931, "step": 76650 }, { "epoch": 0.48182585980005105, "grad_norm": 6.896914482116699, "learning_rate": 1.6792620470566542e-05, "loss": 1.6776, "step": 76660 }, { "epoch": 0.48188871211674816, "grad_norm": 6.71177864074707, "learning_rate": 1.679220136962189e-05, "loss": 1.8234, "step": 76670 }, { "epoch": 0.4819515644334453, "grad_norm": 7.299954891204834, "learning_rate": 1.6791782268677236e-05, "loss": 1.6831, "step": 76680 }, { "epoch": 0.4820144167501424, "grad_norm": 7.252218723297119, "learning_rate": 1.6791363167732583e-05, "loss": 1.7688, "step": 76690 }, { "epoch": 0.4820772690668395, "grad_norm": 6.525442123413086, "learning_rate": 1.6790944066787927e-05, "loss": 1.8474, "step": 76700 }, { "epoch": 0.4821401213835366, "grad_norm": 6.625932693481445, "learning_rate": 1.6790524965843274e-05, "loss": 1.696, "step": 76710 }, { "epoch": 0.48220297370023374, "grad_norm": 7.926068305969238, "learning_rate": 1.679010586489862e-05, "loss": 1.569, "step": 76720 }, { "epoch": 0.48226582601693085, "grad_norm": 6.686048984527588, "learning_rate": 1.6789686763953968e-05, "loss": 1.5974, "step": 76730 }, { "epoch": 0.48232867833362797, "grad_norm": 7.3481268882751465, "learning_rate": 1.6789267663009315e-05, "loss": 1.6895, "step": 76740 }, { "epoch": 0.4823915306503251, "grad_norm": 6.539943695068359, "learning_rate": 1.6788848562064662e-05, "loss": 1.6513, "step": 76750 }, { "epoch": 0.4824543829670222, "grad_norm": 6.613016605377197, "learning_rate": 1.6788429461120006e-05, "loss": 1.8012, "step": 76760 }, { "epoch": 0.48251723528371926, "grad_norm": 6.274559020996094, "learning_rate": 1.6788010360175353e-05, "loss": 1.525, "step": 76770 }, { "epoch": 0.4825800876004164, "grad_norm": 7.586381912231445, "learning_rate": 1.67875912592307e-05, "loss": 1.7118, "step": 76780 }, { "epoch": 0.4826429399171135, "grad_norm": 6.268805027008057, "learning_rate": 1.6787172158286047e-05, "loss": 2.0469, "step": 76790 }, { "epoch": 0.4827057922338106, "grad_norm": 6.7833781242370605, "learning_rate": 1.678675305734139e-05, "loss": 1.6206, "step": 76800 }, { "epoch": 0.4827686445505077, "grad_norm": 9.56633472442627, "learning_rate": 1.6786333956396738e-05, "loss": 1.7133, "step": 76810 }, { "epoch": 0.48283149686720483, "grad_norm": 6.577396392822266, "learning_rate": 1.6785914855452085e-05, "loss": 1.6786, "step": 76820 }, { "epoch": 0.48289434918390195, "grad_norm": 6.394055366516113, "learning_rate": 1.6785495754507432e-05, "loss": 1.7993, "step": 76830 }, { "epoch": 0.48295720150059906, "grad_norm": 7.305932521820068, "learning_rate": 1.678507665356278e-05, "loss": 1.7944, "step": 76840 }, { "epoch": 0.4830200538172962, "grad_norm": 7.5725274085998535, "learning_rate": 1.6784657552618123e-05, "loss": 1.5526, "step": 76850 }, { "epoch": 0.4830829061339933, "grad_norm": 6.820903778076172, "learning_rate": 1.678423845167347e-05, "loss": 1.5832, "step": 76860 }, { "epoch": 0.4831457584506904, "grad_norm": 7.331640720367432, "learning_rate": 1.6783819350728817e-05, "loss": 1.7553, "step": 76870 }, { "epoch": 0.4832086107673875, "grad_norm": 5.045251369476318, "learning_rate": 1.6783400249784164e-05, "loss": 1.5728, "step": 76880 }, { "epoch": 0.48327146308408464, "grad_norm": 7.099085330963135, "learning_rate": 1.678298114883951e-05, "loss": 1.5843, "step": 76890 }, { "epoch": 0.4833343154007817, "grad_norm": 6.82592248916626, "learning_rate": 1.6782562047894858e-05, "loss": 1.7638, "step": 76900 }, { "epoch": 0.4833971677174788, "grad_norm": 6.2563910484313965, "learning_rate": 1.6782142946950205e-05, "loss": 1.5981, "step": 76910 }, { "epoch": 0.48346002003417593, "grad_norm": 6.211933612823486, "learning_rate": 1.6781723846005552e-05, "loss": 1.6231, "step": 76920 }, { "epoch": 0.48352287235087305, "grad_norm": 8.02187728881836, "learning_rate": 1.6781304745060896e-05, "loss": 1.5676, "step": 76930 }, { "epoch": 0.48358572466757016, "grad_norm": 7.387375831604004, "learning_rate": 1.6780885644116243e-05, "loss": 1.5103, "step": 76940 }, { "epoch": 0.4836485769842673, "grad_norm": 6.681406021118164, "learning_rate": 1.678046654317159e-05, "loss": 1.7857, "step": 76950 }, { "epoch": 0.4837114293009644, "grad_norm": 6.307636260986328, "learning_rate": 1.6780047442226937e-05, "loss": 1.6065, "step": 76960 }, { "epoch": 0.4837742816176615, "grad_norm": 7.979382038116455, "learning_rate": 1.6779628341282284e-05, "loss": 1.7488, "step": 76970 }, { "epoch": 0.4838371339343586, "grad_norm": 7.439484596252441, "learning_rate": 1.6779209240337628e-05, "loss": 1.9083, "step": 76980 }, { "epoch": 0.48389998625105574, "grad_norm": 6.592164039611816, "learning_rate": 1.6778790139392975e-05, "loss": 1.5036, "step": 76990 }, { "epoch": 0.48396283856775285, "grad_norm": 6.88593053817749, "learning_rate": 1.6778371038448322e-05, "loss": 1.7836, "step": 77000 }, { "epoch": 0.48402569088444997, "grad_norm": 6.5450849533081055, "learning_rate": 1.677795193750367e-05, "loss": 1.774, "step": 77010 }, { "epoch": 0.4840885432011471, "grad_norm": 7.682107448577881, "learning_rate": 1.6777532836559013e-05, "loss": 1.7071, "step": 77020 }, { "epoch": 0.48415139551784414, "grad_norm": 6.716879367828369, "learning_rate": 1.677711373561436e-05, "loss": 1.4301, "step": 77030 }, { "epoch": 0.48421424783454126, "grad_norm": 6.929161071777344, "learning_rate": 1.6776694634669707e-05, "loss": 1.552, "step": 77040 }, { "epoch": 0.48427710015123837, "grad_norm": 6.5387420654296875, "learning_rate": 1.6776275533725054e-05, "loss": 1.7443, "step": 77050 }, { "epoch": 0.4843399524679355, "grad_norm": 7.476447105407715, "learning_rate": 1.67758564327804e-05, "loss": 1.8439, "step": 77060 }, { "epoch": 0.4844028047846326, "grad_norm": 5.991411209106445, "learning_rate": 1.6775437331835748e-05, "loss": 1.8954, "step": 77070 }, { "epoch": 0.4844656571013297, "grad_norm": 6.784745693206787, "learning_rate": 1.6775018230891092e-05, "loss": 1.7183, "step": 77080 }, { "epoch": 0.48452850941802683, "grad_norm": 7.5521769523620605, "learning_rate": 1.677459912994644e-05, "loss": 1.8052, "step": 77090 }, { "epoch": 0.48459136173472395, "grad_norm": 6.470150947570801, "learning_rate": 1.6774180029001786e-05, "loss": 1.6111, "step": 77100 }, { "epoch": 0.48465421405142106, "grad_norm": 6.512275218963623, "learning_rate": 1.6773760928057133e-05, "loss": 1.7136, "step": 77110 }, { "epoch": 0.4847170663681182, "grad_norm": 6.011322021484375, "learning_rate": 1.677334182711248e-05, "loss": 1.7882, "step": 77120 }, { "epoch": 0.4847799186848153, "grad_norm": 7.649730205535889, "learning_rate": 1.6772922726167827e-05, "loss": 1.654, "step": 77130 }, { "epoch": 0.4848427710015124, "grad_norm": 6.561295032501221, "learning_rate": 1.6772503625223174e-05, "loss": 1.6828, "step": 77140 }, { "epoch": 0.4849056233182095, "grad_norm": 7.136097431182861, "learning_rate": 1.677208452427852e-05, "loss": 1.621, "step": 77150 }, { "epoch": 0.4849684756349066, "grad_norm": 6.467504501342773, "learning_rate": 1.6771665423333865e-05, "loss": 1.799, "step": 77160 }, { "epoch": 0.4850313279516037, "grad_norm": 6.1977314949035645, "learning_rate": 1.6771246322389212e-05, "loss": 1.8724, "step": 77170 }, { "epoch": 0.4850941802683008, "grad_norm": 7.321646690368652, "learning_rate": 1.677082722144456e-05, "loss": 1.7538, "step": 77180 }, { "epoch": 0.4851570325849979, "grad_norm": 7.129180431365967, "learning_rate": 1.6770408120499906e-05, "loss": 1.6605, "step": 77190 }, { "epoch": 0.48521988490169504, "grad_norm": 6.395766735076904, "learning_rate": 1.676998901955525e-05, "loss": 1.4973, "step": 77200 }, { "epoch": 0.48528273721839216, "grad_norm": 5.811442852020264, "learning_rate": 1.6769569918610597e-05, "loss": 1.7174, "step": 77210 }, { "epoch": 0.4853455895350893, "grad_norm": 7.0175018310546875, "learning_rate": 1.6769150817665944e-05, "loss": 1.517, "step": 77220 }, { "epoch": 0.4854084418517864, "grad_norm": 7.165356636047363, "learning_rate": 1.676873171672129e-05, "loss": 1.8301, "step": 77230 }, { "epoch": 0.4854712941684835, "grad_norm": 6.414263725280762, "learning_rate": 1.6768312615776635e-05, "loss": 1.5513, "step": 77240 }, { "epoch": 0.4855341464851806, "grad_norm": 6.830236911773682, "learning_rate": 1.6767893514831982e-05, "loss": 1.7215, "step": 77250 }, { "epoch": 0.48559699880187773, "grad_norm": 7.03541374206543, "learning_rate": 1.676747441388733e-05, "loss": 1.6454, "step": 77260 }, { "epoch": 0.48565985111857485, "grad_norm": 6.4914631843566895, "learning_rate": 1.6767055312942676e-05, "loss": 1.751, "step": 77270 }, { "epoch": 0.4857227034352719, "grad_norm": 6.393687725067139, "learning_rate": 1.6766636211998023e-05, "loss": 1.6691, "step": 77280 }, { "epoch": 0.485785555751969, "grad_norm": 5.885585784912109, "learning_rate": 1.676621711105337e-05, "loss": 1.6191, "step": 77290 }, { "epoch": 0.48584840806866614, "grad_norm": 6.8038201332092285, "learning_rate": 1.6765798010108717e-05, "loss": 1.5134, "step": 77300 }, { "epoch": 0.48591126038536325, "grad_norm": 6.377265453338623, "learning_rate": 1.6765378909164064e-05, "loss": 1.6292, "step": 77310 }, { "epoch": 0.48597411270206037, "grad_norm": 6.908006191253662, "learning_rate": 1.676495980821941e-05, "loss": 1.5764, "step": 77320 }, { "epoch": 0.4860369650187575, "grad_norm": 8.175592422485352, "learning_rate": 1.6764540707274755e-05, "loss": 1.993, "step": 77330 }, { "epoch": 0.4860998173354546, "grad_norm": 6.585998058319092, "learning_rate": 1.6764121606330102e-05, "loss": 1.8986, "step": 77340 }, { "epoch": 0.4861626696521517, "grad_norm": 6.87686824798584, "learning_rate": 1.676370250538545e-05, "loss": 1.8219, "step": 77350 }, { "epoch": 0.48622552196884883, "grad_norm": 6.103376388549805, "learning_rate": 1.6763283404440796e-05, "loss": 1.538, "step": 77360 }, { "epoch": 0.48628837428554594, "grad_norm": 7.167880535125732, "learning_rate": 1.6762864303496143e-05, "loss": 1.7229, "step": 77370 }, { "epoch": 0.48635122660224306, "grad_norm": 7.27005672454834, "learning_rate": 1.6762445202551487e-05, "loss": 1.7178, "step": 77380 }, { "epoch": 0.4864140789189402, "grad_norm": 7.195834636688232, "learning_rate": 1.6762026101606834e-05, "loss": 1.601, "step": 77390 }, { "epoch": 0.4864769312356373, "grad_norm": 6.97745418548584, "learning_rate": 1.676160700066218e-05, "loss": 1.6146, "step": 77400 }, { "epoch": 0.48653978355233435, "grad_norm": 6.174566268920898, "learning_rate": 1.676118789971753e-05, "loss": 1.5699, "step": 77410 }, { "epoch": 0.48660263586903146, "grad_norm": 6.945740699768066, "learning_rate": 1.6760768798772872e-05, "loss": 1.8153, "step": 77420 }, { "epoch": 0.4866654881857286, "grad_norm": 6.563122749328613, "learning_rate": 1.676034969782822e-05, "loss": 1.827, "step": 77430 }, { "epoch": 0.4867283405024257, "grad_norm": 7.262126445770264, "learning_rate": 1.6759930596883566e-05, "loss": 1.8561, "step": 77440 }, { "epoch": 0.4867911928191228, "grad_norm": 6.856802463531494, "learning_rate": 1.6759511495938913e-05, "loss": 1.772, "step": 77450 }, { "epoch": 0.4868540451358199, "grad_norm": 6.213690757751465, "learning_rate": 1.675909239499426e-05, "loss": 1.7232, "step": 77460 }, { "epoch": 0.48691689745251704, "grad_norm": 7.799118518829346, "learning_rate": 1.6758673294049604e-05, "loss": 1.9163, "step": 77470 }, { "epoch": 0.48697974976921415, "grad_norm": 6.059864044189453, "learning_rate": 1.675825419310495e-05, "loss": 1.714, "step": 77480 }, { "epoch": 0.48704260208591127, "grad_norm": 6.988504886627197, "learning_rate": 1.6757835092160298e-05, "loss": 1.629, "step": 77490 }, { "epoch": 0.4871054544026084, "grad_norm": 6.0013227462768555, "learning_rate": 1.6757415991215645e-05, "loss": 1.6388, "step": 77500 }, { "epoch": 0.4871683067193055, "grad_norm": 7.691008567810059, "learning_rate": 1.6756996890270992e-05, "loss": 1.8351, "step": 77510 }, { "epoch": 0.4872311590360026, "grad_norm": 6.733763217926025, "learning_rate": 1.675657778932634e-05, "loss": 1.7723, "step": 77520 }, { "epoch": 0.48729401135269973, "grad_norm": 5.495975971221924, "learning_rate": 1.6756158688381686e-05, "loss": 1.6548, "step": 77530 }, { "epoch": 0.4873568636693968, "grad_norm": 6.805240154266357, "learning_rate": 1.6755739587437033e-05, "loss": 1.6496, "step": 77540 }, { "epoch": 0.4874197159860939, "grad_norm": 6.348954677581787, "learning_rate": 1.6755320486492377e-05, "loss": 1.6526, "step": 77550 }, { "epoch": 0.487482568302791, "grad_norm": 6.872445106506348, "learning_rate": 1.6754901385547724e-05, "loss": 1.5751, "step": 77560 }, { "epoch": 0.48754542061948813, "grad_norm": 6.3247246742248535, "learning_rate": 1.675448228460307e-05, "loss": 1.898, "step": 77570 }, { "epoch": 0.48760827293618525, "grad_norm": 8.797094345092773, "learning_rate": 1.675406318365842e-05, "loss": 1.5272, "step": 77580 }, { "epoch": 0.48767112525288236, "grad_norm": 7.387877464294434, "learning_rate": 1.6753644082713765e-05, "loss": 1.5454, "step": 77590 }, { "epoch": 0.4877339775695795, "grad_norm": 6.240447521209717, "learning_rate": 1.675322498176911e-05, "loss": 1.4453, "step": 77600 }, { "epoch": 0.4877968298862766, "grad_norm": 6.7662763595581055, "learning_rate": 1.6752805880824456e-05, "loss": 1.6475, "step": 77610 }, { "epoch": 0.4878596822029737, "grad_norm": 5.877827167510986, "learning_rate": 1.6752386779879803e-05, "loss": 1.6666, "step": 77620 }, { "epoch": 0.4879225345196708, "grad_norm": 7.133870601654053, "learning_rate": 1.675196767893515e-05, "loss": 1.8037, "step": 77630 }, { "epoch": 0.48798538683636794, "grad_norm": 7.127045631408691, "learning_rate": 1.6751548577990494e-05, "loss": 1.7026, "step": 77640 }, { "epoch": 0.48804823915306506, "grad_norm": 6.184054374694824, "learning_rate": 1.675112947704584e-05, "loss": 1.7624, "step": 77650 }, { "epoch": 0.48811109146976217, "grad_norm": 6.619475841522217, "learning_rate": 1.6750710376101188e-05, "loss": 1.6668, "step": 77660 }, { "epoch": 0.48817394378645923, "grad_norm": 7.2268290519714355, "learning_rate": 1.6750291275156535e-05, "loss": 1.682, "step": 77670 }, { "epoch": 0.48823679610315635, "grad_norm": 5.725668907165527, "learning_rate": 1.6749872174211882e-05, "loss": 1.6467, "step": 77680 }, { "epoch": 0.48829964841985346, "grad_norm": 6.959593296051025, "learning_rate": 1.674945307326723e-05, "loss": 1.6727, "step": 77690 }, { "epoch": 0.4883625007365506, "grad_norm": 5.738344192504883, "learning_rate": 1.6749033972322576e-05, "loss": 1.7752, "step": 77700 }, { "epoch": 0.4884253530532477, "grad_norm": 6.951902866363525, "learning_rate": 1.674861487137792e-05, "loss": 1.5168, "step": 77710 }, { "epoch": 0.4884882053699448, "grad_norm": 6.028759479522705, "learning_rate": 1.6748195770433267e-05, "loss": 1.7481, "step": 77720 }, { "epoch": 0.4885510576866419, "grad_norm": 6.964917182922363, "learning_rate": 1.6747776669488614e-05, "loss": 1.7408, "step": 77730 }, { "epoch": 0.48861391000333904, "grad_norm": 6.195134162902832, "learning_rate": 1.674735756854396e-05, "loss": 1.6444, "step": 77740 }, { "epoch": 0.48867676232003615, "grad_norm": 7.4523115158081055, "learning_rate": 1.674693846759931e-05, "loss": 1.6964, "step": 77750 }, { "epoch": 0.48873961463673327, "grad_norm": 6.267302989959717, "learning_rate": 1.6746519366654655e-05, "loss": 1.58, "step": 77760 }, { "epoch": 0.4888024669534304, "grad_norm": 8.292152404785156, "learning_rate": 1.6746100265710003e-05, "loss": 1.698, "step": 77770 }, { "epoch": 0.4888653192701275, "grad_norm": 7.232621192932129, "learning_rate": 1.6745681164765346e-05, "loss": 1.6959, "step": 77780 }, { "epoch": 0.48892817158682456, "grad_norm": 7.289283275604248, "learning_rate": 1.6745262063820693e-05, "loss": 1.7407, "step": 77790 }, { "epoch": 0.48899102390352167, "grad_norm": 6.819472789764404, "learning_rate": 1.674484296287604e-05, "loss": 1.7476, "step": 77800 }, { "epoch": 0.4890538762202188, "grad_norm": 6.648898124694824, "learning_rate": 1.6744423861931387e-05, "loss": 1.5295, "step": 77810 }, { "epoch": 0.4891167285369159, "grad_norm": 7.825111389160156, "learning_rate": 1.674400476098673e-05, "loss": 1.7742, "step": 77820 }, { "epoch": 0.489179580853613, "grad_norm": 7.094618320465088, "learning_rate": 1.6743585660042078e-05, "loss": 1.5701, "step": 77830 }, { "epoch": 0.48924243317031013, "grad_norm": 7.05112361907959, "learning_rate": 1.6743166559097425e-05, "loss": 1.9165, "step": 77840 }, { "epoch": 0.48930528548700725, "grad_norm": 6.470577716827393, "learning_rate": 1.6742747458152772e-05, "loss": 1.768, "step": 77850 }, { "epoch": 0.48936813780370436, "grad_norm": 6.4770331382751465, "learning_rate": 1.6742328357208116e-05, "loss": 1.5896, "step": 77860 }, { "epoch": 0.4894309901204015, "grad_norm": 6.927404880523682, "learning_rate": 1.6741909256263463e-05, "loss": 1.701, "step": 77870 }, { "epoch": 0.4894938424370986, "grad_norm": 6.723846912384033, "learning_rate": 1.674149015531881e-05, "loss": 1.5947, "step": 77880 }, { "epoch": 0.4895566947537957, "grad_norm": 7.334015846252441, "learning_rate": 1.6741071054374157e-05, "loss": 1.6423, "step": 77890 }, { "epoch": 0.4896195470704928, "grad_norm": 6.482966899871826, "learning_rate": 1.6740651953429504e-05, "loss": 1.5698, "step": 77900 }, { "epoch": 0.48968239938718994, "grad_norm": 6.900713920593262, "learning_rate": 1.674023285248485e-05, "loss": 1.704, "step": 77910 }, { "epoch": 0.489745251703887, "grad_norm": 7.279245853424072, "learning_rate": 1.67398137515402e-05, "loss": 1.5754, "step": 77920 }, { "epoch": 0.4898081040205841, "grad_norm": 7.552469730377197, "learning_rate": 1.6739394650595546e-05, "loss": 1.7267, "step": 77930 }, { "epoch": 0.4898709563372812, "grad_norm": 5.7016754150390625, "learning_rate": 1.6738975549650893e-05, "loss": 1.5803, "step": 77940 }, { "epoch": 0.48993380865397834, "grad_norm": 5.878109931945801, "learning_rate": 1.6738556448706236e-05, "loss": 1.8408, "step": 77950 }, { "epoch": 0.48999666097067546, "grad_norm": 6.1266679763793945, "learning_rate": 1.6738137347761583e-05, "loss": 1.7481, "step": 77960 }, { "epoch": 0.4900595132873726, "grad_norm": 6.185999393463135, "learning_rate": 1.673771824681693e-05, "loss": 1.6293, "step": 77970 }, { "epoch": 0.4901223656040697, "grad_norm": 7.004542827606201, "learning_rate": 1.6737299145872277e-05, "loss": 1.7205, "step": 77980 }, { "epoch": 0.4901852179207668, "grad_norm": 6.641819000244141, "learning_rate": 1.6736880044927625e-05, "loss": 1.796, "step": 77990 }, { "epoch": 0.4902480702374639, "grad_norm": 6.25460958480835, "learning_rate": 1.6736460943982968e-05, "loss": 1.7323, "step": 78000 }, { "epoch": 0.49031092255416103, "grad_norm": 7.75309419631958, "learning_rate": 1.6736041843038315e-05, "loss": 1.6054, "step": 78010 }, { "epoch": 0.49037377487085815, "grad_norm": 6.942826271057129, "learning_rate": 1.6735622742093662e-05, "loss": 1.7125, "step": 78020 }, { "epoch": 0.49043662718755526, "grad_norm": 7.00164270401001, "learning_rate": 1.673520364114901e-05, "loss": 1.6341, "step": 78030 }, { "epoch": 0.4904994795042524, "grad_norm": 8.243274688720703, "learning_rate": 1.6734784540204353e-05, "loss": 1.5607, "step": 78040 }, { "epoch": 0.49056233182094944, "grad_norm": 7.010618209838867, "learning_rate": 1.67343654392597e-05, "loss": 1.6654, "step": 78050 }, { "epoch": 0.49062518413764655, "grad_norm": 7.62802791595459, "learning_rate": 1.6733946338315047e-05, "loss": 1.8686, "step": 78060 }, { "epoch": 0.49068803645434367, "grad_norm": 8.912038803100586, "learning_rate": 1.6733527237370394e-05, "loss": 1.5066, "step": 78070 }, { "epoch": 0.4907508887710408, "grad_norm": 6.84856653213501, "learning_rate": 1.673310813642574e-05, "loss": 1.5826, "step": 78080 }, { "epoch": 0.4908137410877379, "grad_norm": 7.078650951385498, "learning_rate": 1.6732689035481085e-05, "loss": 1.7848, "step": 78090 }, { "epoch": 0.490876593404435, "grad_norm": 6.207369327545166, "learning_rate": 1.6732269934536432e-05, "loss": 1.5481, "step": 78100 }, { "epoch": 0.49093944572113213, "grad_norm": 5.817479133605957, "learning_rate": 1.673185083359178e-05, "loss": 1.8581, "step": 78110 }, { "epoch": 0.49100229803782924, "grad_norm": 6.951427936553955, "learning_rate": 1.6731431732647126e-05, "loss": 1.6912, "step": 78120 }, { "epoch": 0.49106515035452636, "grad_norm": 7.501084804534912, "learning_rate": 1.6731012631702473e-05, "loss": 1.5541, "step": 78130 }, { "epoch": 0.4911280026712235, "grad_norm": 7.9845051765441895, "learning_rate": 1.673059353075782e-05, "loss": 1.7753, "step": 78140 }, { "epoch": 0.4911908549879206, "grad_norm": 6.4379563331604, "learning_rate": 1.6730174429813168e-05, "loss": 1.6814, "step": 78150 }, { "epoch": 0.4912537073046177, "grad_norm": 6.529316425323486, "learning_rate": 1.6729755328868515e-05, "loss": 1.5828, "step": 78160 }, { "epoch": 0.4913165596213148, "grad_norm": 7.063477516174316, "learning_rate": 1.6729336227923858e-05, "loss": 1.6801, "step": 78170 }, { "epoch": 0.4913794119380119, "grad_norm": 6.500266075134277, "learning_rate": 1.6728917126979205e-05, "loss": 1.5657, "step": 78180 }, { "epoch": 0.491442264254709, "grad_norm": 5.756186008453369, "learning_rate": 1.6728498026034552e-05, "loss": 1.8106, "step": 78190 }, { "epoch": 0.4915051165714061, "grad_norm": 7.048885345458984, "learning_rate": 1.67280789250899e-05, "loss": 1.6772, "step": 78200 }, { "epoch": 0.4915679688881032, "grad_norm": 7.077173709869385, "learning_rate": 1.6727659824145247e-05, "loss": 1.6553, "step": 78210 }, { "epoch": 0.49163082120480034, "grad_norm": 6.204776287078857, "learning_rate": 1.672724072320059e-05, "loss": 1.6815, "step": 78220 }, { "epoch": 0.49169367352149745, "grad_norm": 6.870537757873535, "learning_rate": 1.6726821622255937e-05, "loss": 1.7491, "step": 78230 }, { "epoch": 0.49175652583819457, "grad_norm": 6.51535177230835, "learning_rate": 1.6726402521311284e-05, "loss": 1.8883, "step": 78240 }, { "epoch": 0.4918193781548917, "grad_norm": 6.133185863494873, "learning_rate": 1.672598342036663e-05, "loss": 1.6109, "step": 78250 }, { "epoch": 0.4918822304715888, "grad_norm": 6.261854648590088, "learning_rate": 1.6725564319421975e-05, "loss": 2.0821, "step": 78260 }, { "epoch": 0.4919450827882859, "grad_norm": 6.175626754760742, "learning_rate": 1.6725145218477322e-05, "loss": 1.5385, "step": 78270 }, { "epoch": 0.49200793510498303, "grad_norm": 5.714633941650391, "learning_rate": 1.672472611753267e-05, "loss": 1.6944, "step": 78280 }, { "epoch": 0.49207078742168014, "grad_norm": 6.655765056610107, "learning_rate": 1.6724307016588016e-05, "loss": 1.6819, "step": 78290 }, { "epoch": 0.4921336397383772, "grad_norm": 7.588636875152588, "learning_rate": 1.6723887915643363e-05, "loss": 1.9455, "step": 78300 }, { "epoch": 0.4921964920550743, "grad_norm": 7.5459442138671875, "learning_rate": 1.672346881469871e-05, "loss": 1.5781, "step": 78310 }, { "epoch": 0.49225934437177143, "grad_norm": 7.523612022399902, "learning_rate": 1.6723049713754058e-05, "loss": 1.7942, "step": 78320 }, { "epoch": 0.49232219668846855, "grad_norm": 6.231264114379883, "learning_rate": 1.67226306128094e-05, "loss": 1.5826, "step": 78330 }, { "epoch": 0.49238504900516566, "grad_norm": 6.2151641845703125, "learning_rate": 1.672221151186475e-05, "loss": 1.6539, "step": 78340 }, { "epoch": 0.4924479013218628, "grad_norm": 5.359531879425049, "learning_rate": 1.6721792410920095e-05, "loss": 1.5909, "step": 78350 }, { "epoch": 0.4925107536385599, "grad_norm": 6.7791290283203125, "learning_rate": 1.6721373309975442e-05, "loss": 1.6958, "step": 78360 }, { "epoch": 0.492573605955257, "grad_norm": 6.420655250549316, "learning_rate": 1.672095420903079e-05, "loss": 1.5403, "step": 78370 }, { "epoch": 0.4926364582719541, "grad_norm": 6.764841079711914, "learning_rate": 1.6720535108086137e-05, "loss": 1.7971, "step": 78380 }, { "epoch": 0.49269931058865124, "grad_norm": 8.062089920043945, "learning_rate": 1.6720116007141484e-05, "loss": 1.9059, "step": 78390 }, { "epoch": 0.49276216290534836, "grad_norm": 7.529369831085205, "learning_rate": 1.6719696906196827e-05, "loss": 1.7136, "step": 78400 }, { "epoch": 0.49282501522204547, "grad_norm": 7.275436878204346, "learning_rate": 1.6719277805252174e-05, "loss": 1.7757, "step": 78410 }, { "epoch": 0.4928878675387426, "grad_norm": 6.625938892364502, "learning_rate": 1.671885870430752e-05, "loss": 1.7824, "step": 78420 }, { "epoch": 0.49295071985543965, "grad_norm": 7.415990829467773, "learning_rate": 1.671843960336287e-05, "loss": 1.6414, "step": 78430 }, { "epoch": 0.49301357217213676, "grad_norm": 6.522861480712891, "learning_rate": 1.6718020502418212e-05, "loss": 1.6451, "step": 78440 }, { "epoch": 0.4930764244888339, "grad_norm": 6.326927185058594, "learning_rate": 1.671760140147356e-05, "loss": 1.6692, "step": 78450 }, { "epoch": 0.493139276805531, "grad_norm": 6.64226770401001, "learning_rate": 1.6717182300528906e-05, "loss": 1.817, "step": 78460 }, { "epoch": 0.4932021291222281, "grad_norm": 7.481614112854004, "learning_rate": 1.6716763199584253e-05, "loss": 1.7001, "step": 78470 }, { "epoch": 0.4932649814389252, "grad_norm": 7.955791473388672, "learning_rate": 1.6716344098639597e-05, "loss": 1.6302, "step": 78480 }, { "epoch": 0.49332783375562234, "grad_norm": 6.260321140289307, "learning_rate": 1.6715924997694944e-05, "loss": 1.6797, "step": 78490 }, { "epoch": 0.49339068607231945, "grad_norm": 7.346848487854004, "learning_rate": 1.671550589675029e-05, "loss": 1.7796, "step": 78500 }, { "epoch": 0.49345353838901657, "grad_norm": 7.295152187347412, "learning_rate": 1.671508679580564e-05, "loss": 1.7205, "step": 78510 }, { "epoch": 0.4935163907057137, "grad_norm": 6.241100788116455, "learning_rate": 1.6714667694860985e-05, "loss": 1.6379, "step": 78520 }, { "epoch": 0.4935792430224108, "grad_norm": 6.412459850311279, "learning_rate": 1.6714290504010797e-05, "loss": 1.804, "step": 78530 }, { "epoch": 0.4936420953391079, "grad_norm": 5.450584411621094, "learning_rate": 1.6713871403066144e-05, "loss": 1.7511, "step": 78540 }, { "epoch": 0.493704947655805, "grad_norm": 6.743354797363281, "learning_rate": 1.671345230212149e-05, "loss": 1.5647, "step": 78550 }, { "epoch": 0.4937677999725021, "grad_norm": 8.319733619689941, "learning_rate": 1.6713033201176835e-05, "loss": 1.8374, "step": 78560 }, { "epoch": 0.4938306522891992, "grad_norm": 6.66218376159668, "learning_rate": 1.671261410023218e-05, "loss": 2.0294, "step": 78570 }, { "epoch": 0.4938935046058963, "grad_norm": 6.698323726654053, "learning_rate": 1.671219499928753e-05, "loss": 1.7704, "step": 78580 }, { "epoch": 0.49395635692259343, "grad_norm": 7.755212783813477, "learning_rate": 1.6711775898342876e-05, "loss": 1.6396, "step": 78590 }, { "epoch": 0.49401920923929055, "grad_norm": 8.131885528564453, "learning_rate": 1.6711356797398223e-05, "loss": 1.66, "step": 78600 }, { "epoch": 0.49408206155598766, "grad_norm": 6.574060916900635, "learning_rate": 1.671093769645357e-05, "loss": 1.6609, "step": 78610 }, { "epoch": 0.4941449138726848, "grad_norm": 5.90941047668457, "learning_rate": 1.6710518595508917e-05, "loss": 1.6647, "step": 78620 }, { "epoch": 0.4942077661893819, "grad_norm": 7.4213948249816895, "learning_rate": 1.6710099494564264e-05, "loss": 1.3468, "step": 78630 }, { "epoch": 0.494270618506079, "grad_norm": 5.807704925537109, "learning_rate": 1.670968039361961e-05, "loss": 1.6507, "step": 78640 }, { "epoch": 0.4943334708227761, "grad_norm": 7.539387226104736, "learning_rate": 1.6709261292674955e-05, "loss": 1.8559, "step": 78650 }, { "epoch": 0.49439632313947324, "grad_norm": 6.764973163604736, "learning_rate": 1.6708842191730302e-05, "loss": 1.7355, "step": 78660 }, { "epoch": 0.49445917545617035, "grad_norm": 6.689443588256836, "learning_rate": 1.670842309078565e-05, "loss": 1.6667, "step": 78670 }, { "epoch": 0.49452202777286747, "grad_norm": 7.405571460723877, "learning_rate": 1.6708003989840996e-05, "loss": 1.7678, "step": 78680 }, { "epoch": 0.4945848800895645, "grad_norm": 6.4708943367004395, "learning_rate": 1.670758488889634e-05, "loss": 1.7772, "step": 78690 }, { "epoch": 0.49464773240626164, "grad_norm": 6.734555721282959, "learning_rate": 1.6707165787951687e-05, "loss": 1.6194, "step": 78700 }, { "epoch": 0.49471058472295876, "grad_norm": 7.194657325744629, "learning_rate": 1.6706746687007034e-05, "loss": 1.6331, "step": 78710 }, { "epoch": 0.4947734370396559, "grad_norm": 6.688826560974121, "learning_rate": 1.670632758606238e-05, "loss": 1.768, "step": 78720 }, { "epoch": 0.494836289356353, "grad_norm": 8.241920471191406, "learning_rate": 1.6705908485117728e-05, "loss": 1.6582, "step": 78730 }, { "epoch": 0.4948991416730501, "grad_norm": 6.8648152351379395, "learning_rate": 1.6705489384173072e-05, "loss": 1.7388, "step": 78740 }, { "epoch": 0.4949619939897472, "grad_norm": 7.46151876449585, "learning_rate": 1.670507028322842e-05, "loss": 1.8008, "step": 78750 }, { "epoch": 0.49502484630644433, "grad_norm": 5.9637131690979, "learning_rate": 1.6704651182283766e-05, "loss": 1.6126, "step": 78760 }, { "epoch": 0.49508769862314145, "grad_norm": 6.775414943695068, "learning_rate": 1.6704232081339113e-05, "loss": 1.6424, "step": 78770 }, { "epoch": 0.49515055093983856, "grad_norm": 5.462296962738037, "learning_rate": 1.6703812980394457e-05, "loss": 1.559, "step": 78780 }, { "epoch": 0.4952134032565357, "grad_norm": 6.365492343902588, "learning_rate": 1.6703393879449804e-05, "loss": 1.6461, "step": 78790 }, { "epoch": 0.4952762555732328, "grad_norm": 6.2100510597229, "learning_rate": 1.670297477850515e-05, "loss": 1.6577, "step": 78800 }, { "epoch": 0.49533910788992985, "grad_norm": 6.8301849365234375, "learning_rate": 1.6702555677560498e-05, "loss": 1.7671, "step": 78810 }, { "epoch": 0.49540196020662697, "grad_norm": 6.008386611938477, "learning_rate": 1.6702136576615845e-05, "loss": 1.7751, "step": 78820 }, { "epoch": 0.4954648125233241, "grad_norm": 6.045980930328369, "learning_rate": 1.6701717475671192e-05, "loss": 1.5703, "step": 78830 }, { "epoch": 0.4955276648400212, "grad_norm": 7.105350971221924, "learning_rate": 1.670129837472654e-05, "loss": 1.8338, "step": 78840 }, { "epoch": 0.4955905171567183, "grad_norm": 7.170408248901367, "learning_rate": 1.6700879273781886e-05, "loss": 1.6635, "step": 78850 }, { "epoch": 0.49565336947341543, "grad_norm": 10.234091758728027, "learning_rate": 1.6700460172837233e-05, "loss": 2.009, "step": 78860 }, { "epoch": 0.49571622179011254, "grad_norm": 7.489864349365234, "learning_rate": 1.6700041071892577e-05, "loss": 1.8661, "step": 78870 }, { "epoch": 0.49577907410680966, "grad_norm": 6.527287006378174, "learning_rate": 1.6699621970947924e-05, "loss": 1.566, "step": 78880 }, { "epoch": 0.4958419264235068, "grad_norm": 7.5397443771362305, "learning_rate": 1.669920287000327e-05, "loss": 1.6793, "step": 78890 }, { "epoch": 0.4959047787402039, "grad_norm": 8.03945541381836, "learning_rate": 1.6698783769058618e-05, "loss": 1.8563, "step": 78900 }, { "epoch": 0.495967631056901, "grad_norm": 6.880329608917236, "learning_rate": 1.6698364668113965e-05, "loss": 1.5937, "step": 78910 }, { "epoch": 0.4960304833735981, "grad_norm": 6.510516166687012, "learning_rate": 1.669794556716931e-05, "loss": 1.6486, "step": 78920 }, { "epoch": 0.49609333569029523, "grad_norm": 6.234617710113525, "learning_rate": 1.6697526466224656e-05, "loss": 1.9359, "step": 78930 }, { "epoch": 0.4961561880069923, "grad_norm": 6.716326713562012, "learning_rate": 1.6697107365280003e-05, "loss": 1.9386, "step": 78940 }, { "epoch": 0.4962190403236894, "grad_norm": 5.632035255432129, "learning_rate": 1.669668826433535e-05, "loss": 1.7872, "step": 78950 }, { "epoch": 0.4962818926403865, "grad_norm": 7.106645107269287, "learning_rate": 1.6696269163390694e-05, "loss": 1.6277, "step": 78960 }, { "epoch": 0.49634474495708364, "grad_norm": 7.37431001663208, "learning_rate": 1.669585006244604e-05, "loss": 1.8052, "step": 78970 }, { "epoch": 0.49640759727378075, "grad_norm": 7.185262203216553, "learning_rate": 1.6695430961501388e-05, "loss": 1.8382, "step": 78980 }, { "epoch": 0.49647044959047787, "grad_norm": 7.572688102722168, "learning_rate": 1.6695011860556735e-05, "loss": 1.7511, "step": 78990 }, { "epoch": 0.496533301907175, "grad_norm": 6.76294469833374, "learning_rate": 1.6694592759612082e-05, "loss": 1.8781, "step": 79000 }, { "epoch": 0.4965961542238721, "grad_norm": 6.124716758728027, "learning_rate": 1.669417365866743e-05, "loss": 1.5367, "step": 79010 }, { "epoch": 0.4966590065405692, "grad_norm": 6.655550956726074, "learning_rate": 1.6693754557722773e-05, "loss": 1.6984, "step": 79020 }, { "epoch": 0.49672185885726633, "grad_norm": 7.736663341522217, "learning_rate": 1.669333545677812e-05, "loss": 1.6341, "step": 79030 }, { "epoch": 0.49678471117396344, "grad_norm": 6.517747402191162, "learning_rate": 1.6692916355833467e-05, "loss": 1.5439, "step": 79040 }, { "epoch": 0.49684756349066056, "grad_norm": 7.1836113929748535, "learning_rate": 1.6692497254888814e-05, "loss": 1.7337, "step": 79050 }, { "epoch": 0.4969104158073577, "grad_norm": 6.330719947814941, "learning_rate": 1.669207815394416e-05, "loss": 1.6814, "step": 79060 }, { "epoch": 0.49697326812405473, "grad_norm": 7.029351234436035, "learning_rate": 1.6691659052999508e-05, "loss": 1.8586, "step": 79070 }, { "epoch": 0.49703612044075185, "grad_norm": 5.868019104003906, "learning_rate": 1.6691239952054855e-05, "loss": 1.7189, "step": 79080 }, { "epoch": 0.49709897275744896, "grad_norm": 7.495911598205566, "learning_rate": 1.66908208511102e-05, "loss": 1.5853, "step": 79090 }, { "epoch": 0.4971618250741461, "grad_norm": 8.74422550201416, "learning_rate": 1.6690401750165546e-05, "loss": 1.7277, "step": 79100 }, { "epoch": 0.4972246773908432, "grad_norm": 7.246800899505615, "learning_rate": 1.6689982649220893e-05, "loss": 1.5798, "step": 79110 }, { "epoch": 0.4972875297075403, "grad_norm": 5.6232500076293945, "learning_rate": 1.668956354827624e-05, "loss": 1.8372, "step": 79120 }, { "epoch": 0.4973503820242374, "grad_norm": 6.304361820220947, "learning_rate": 1.6689144447331587e-05, "loss": 1.7998, "step": 79130 }, { "epoch": 0.49741323434093454, "grad_norm": 6.0489702224731445, "learning_rate": 1.668872534638693e-05, "loss": 1.6389, "step": 79140 }, { "epoch": 0.49747608665763166, "grad_norm": 7.240331649780273, "learning_rate": 1.6688306245442278e-05, "loss": 1.8321, "step": 79150 }, { "epoch": 0.49753893897432877, "grad_norm": 6.915675163269043, "learning_rate": 1.6687887144497625e-05, "loss": 1.7416, "step": 79160 }, { "epoch": 0.4976017912910259, "grad_norm": 6.851961612701416, "learning_rate": 1.6687468043552972e-05, "loss": 1.8687, "step": 79170 }, { "epoch": 0.497664643607723, "grad_norm": 6.435169219970703, "learning_rate": 1.6687048942608316e-05, "loss": 1.7454, "step": 79180 }, { "epoch": 0.4977274959244201, "grad_norm": 6.554869174957275, "learning_rate": 1.6686629841663663e-05, "loss": 1.667, "step": 79190 }, { "epoch": 0.4977903482411172, "grad_norm": 5.473569393157959, "learning_rate": 1.668621074071901e-05, "loss": 1.6133, "step": 79200 }, { "epoch": 0.4978532005578143, "grad_norm": 6.134435653686523, "learning_rate": 1.6685791639774357e-05, "loss": 1.4101, "step": 79210 }, { "epoch": 0.4979160528745114, "grad_norm": 6.592728614807129, "learning_rate": 1.6685372538829704e-05, "loss": 1.398, "step": 79220 }, { "epoch": 0.4979789051912085, "grad_norm": 5.825088977813721, "learning_rate": 1.668495343788505e-05, "loss": 1.629, "step": 79230 }, { "epoch": 0.49804175750790564, "grad_norm": 6.5205769538879395, "learning_rate": 1.6684534336940398e-05, "loss": 1.773, "step": 79240 }, { "epoch": 0.49810460982460275, "grad_norm": 5.9171833992004395, "learning_rate": 1.6684115235995745e-05, "loss": 1.5642, "step": 79250 }, { "epoch": 0.49816746214129987, "grad_norm": 6.904202461242676, "learning_rate": 1.6683696135051092e-05, "loss": 1.5912, "step": 79260 }, { "epoch": 0.498230314457997, "grad_norm": 5.861415863037109, "learning_rate": 1.6683277034106436e-05, "loss": 1.6165, "step": 79270 }, { "epoch": 0.4982931667746941, "grad_norm": 6.141399383544922, "learning_rate": 1.6682857933161783e-05, "loss": 1.7482, "step": 79280 }, { "epoch": 0.4983560190913912, "grad_norm": 6.803699016571045, "learning_rate": 1.668243883221713e-05, "loss": 1.6359, "step": 79290 }, { "epoch": 0.4984188714080883, "grad_norm": 7.952588081359863, "learning_rate": 1.6682019731272477e-05, "loss": 1.743, "step": 79300 }, { "epoch": 0.49848172372478544, "grad_norm": 6.018862724304199, "learning_rate": 1.668160063032782e-05, "loss": 1.6078, "step": 79310 }, { "epoch": 0.49854457604148256, "grad_norm": 6.411401748657227, "learning_rate": 1.6681181529383168e-05, "loss": 1.673, "step": 79320 }, { "epoch": 0.4986074283581796, "grad_norm": 6.5400872230529785, "learning_rate": 1.6680762428438515e-05, "loss": 1.6518, "step": 79330 }, { "epoch": 0.49867028067487673, "grad_norm": 7.193464279174805, "learning_rate": 1.6680343327493862e-05, "loss": 1.6081, "step": 79340 }, { "epoch": 0.49873313299157385, "grad_norm": 7.338587284088135, "learning_rate": 1.667992422654921e-05, "loss": 1.7216, "step": 79350 }, { "epoch": 0.49879598530827096, "grad_norm": 6.640892028808594, "learning_rate": 1.6679505125604553e-05, "loss": 1.7834, "step": 79360 }, { "epoch": 0.4988588376249681, "grad_norm": 6.7227349281311035, "learning_rate": 1.66790860246599e-05, "loss": 1.7339, "step": 79370 }, { "epoch": 0.4989216899416652, "grad_norm": 7.434032917022705, "learning_rate": 1.6678666923715247e-05, "loss": 1.6488, "step": 79380 }, { "epoch": 0.4989845422583623, "grad_norm": 8.0999174118042, "learning_rate": 1.6678247822770594e-05, "loss": 1.7659, "step": 79390 }, { "epoch": 0.4990473945750594, "grad_norm": 6.468531608581543, "learning_rate": 1.6677828721825938e-05, "loss": 1.7375, "step": 79400 }, { "epoch": 0.49911024689175654, "grad_norm": 6.481029987335205, "learning_rate": 1.6677409620881285e-05, "loss": 1.8437, "step": 79410 }, { "epoch": 0.49917309920845365, "grad_norm": 5.684129238128662, "learning_rate": 1.6676990519936632e-05, "loss": 1.7311, "step": 79420 }, { "epoch": 0.49923595152515077, "grad_norm": 5.615518569946289, "learning_rate": 1.667657141899198e-05, "loss": 1.6639, "step": 79430 }, { "epoch": 0.4992988038418479, "grad_norm": 6.510375499725342, "learning_rate": 1.6676152318047326e-05, "loss": 1.6085, "step": 79440 }, { "epoch": 0.49936165615854494, "grad_norm": 6.378992080688477, "learning_rate": 1.6675733217102673e-05, "loss": 1.5479, "step": 79450 }, { "epoch": 0.49942450847524206, "grad_norm": 5.967948913574219, "learning_rate": 1.667531411615802e-05, "loss": 1.5811, "step": 79460 }, { "epoch": 0.4994873607919392, "grad_norm": 6.28527307510376, "learning_rate": 1.6674895015213367e-05, "loss": 1.732, "step": 79470 }, { "epoch": 0.4995502131086363, "grad_norm": 7.023561954498291, "learning_rate": 1.6674475914268714e-05, "loss": 1.6311, "step": 79480 }, { "epoch": 0.4996130654253334, "grad_norm": 12.292658805847168, "learning_rate": 1.6674056813324058e-05, "loss": 1.5554, "step": 79490 }, { "epoch": 0.4996759177420305, "grad_norm": 7.064693450927734, "learning_rate": 1.6673637712379405e-05, "loss": 1.8701, "step": 79500 }, { "epoch": 0.49973877005872763, "grad_norm": 6.772129058837891, "learning_rate": 1.6673218611434752e-05, "loss": 1.728, "step": 79510 }, { "epoch": 0.49980162237542475, "grad_norm": 6.433325290679932, "learning_rate": 1.66727995104901e-05, "loss": 1.6161, "step": 79520 }, { "epoch": 0.49986447469212186, "grad_norm": 6.294651985168457, "learning_rate": 1.6672380409545446e-05, "loss": 1.8939, "step": 79530 }, { "epoch": 0.499927327008819, "grad_norm": 7.601191997528076, "learning_rate": 1.667196130860079e-05, "loss": 1.646, "step": 79540 }, { "epoch": 0.4999901793255161, "grad_norm": 6.567856311798096, "learning_rate": 1.6671542207656137e-05, "loss": 1.6362, "step": 79550 }, { "epoch": 0.5000530316422132, "grad_norm": 7.186223983764648, "learning_rate": 1.6671123106711484e-05, "loss": 1.7599, "step": 79560 }, { "epoch": 0.5001158839589103, "grad_norm": 7.041337490081787, "learning_rate": 1.667070400576683e-05, "loss": 1.6379, "step": 79570 }, { "epoch": 0.5001787362756074, "grad_norm": 6.522541522979736, "learning_rate": 1.6670284904822175e-05, "loss": 1.4968, "step": 79580 }, { "epoch": 0.5002415885923045, "grad_norm": 7.405561923980713, "learning_rate": 1.6669865803877522e-05, "loss": 1.6606, "step": 79590 }, { "epoch": 0.5003044409090016, "grad_norm": 7.2410173416137695, "learning_rate": 1.666944670293287e-05, "loss": 1.7189, "step": 79600 }, { "epoch": 0.5003672932256987, "grad_norm": 6.9519476890563965, "learning_rate": 1.6669027601988216e-05, "loss": 1.6469, "step": 79610 }, { "epoch": 0.5004301455423958, "grad_norm": 6.269879341125488, "learning_rate": 1.6668608501043563e-05, "loss": 1.6043, "step": 79620 }, { "epoch": 0.500492997859093, "grad_norm": 5.454013824462891, "learning_rate": 1.666818940009891e-05, "loss": 1.703, "step": 79630 }, { "epoch": 0.5005558501757901, "grad_norm": 6.524136066436768, "learning_rate": 1.6667770299154257e-05, "loss": 1.6494, "step": 79640 }, { "epoch": 0.5006187024924872, "grad_norm": 7.585860252380371, "learning_rate": 1.66673511982096e-05, "loss": 1.9085, "step": 79650 }, { "epoch": 0.5006815548091843, "grad_norm": 6.055868148803711, "learning_rate": 1.6666932097264948e-05, "loss": 1.7279, "step": 79660 }, { "epoch": 0.5007444071258814, "grad_norm": 6.640485763549805, "learning_rate": 1.6666512996320295e-05, "loss": 1.617, "step": 79670 }, { "epoch": 0.5008072594425785, "grad_norm": 7.06798791885376, "learning_rate": 1.6666093895375642e-05, "loss": 1.834, "step": 79680 }, { "epoch": 0.5008701117592756, "grad_norm": 6.391556262969971, "learning_rate": 1.666567479443099e-05, "loss": 1.6155, "step": 79690 }, { "epoch": 0.5009329640759728, "grad_norm": 6.465502738952637, "learning_rate": 1.6665255693486336e-05, "loss": 1.6234, "step": 79700 }, { "epoch": 0.5009958163926699, "grad_norm": 6.327841758728027, "learning_rate": 1.6664878502636144e-05, "loss": 1.5977, "step": 79710 }, { "epoch": 0.501058668709367, "grad_norm": 7.11573600769043, "learning_rate": 1.666445940169149e-05, "loss": 1.9059, "step": 79720 }, { "epoch": 0.5011215210260641, "grad_norm": 7.2466840744018555, "learning_rate": 1.666404030074684e-05, "loss": 1.8072, "step": 79730 }, { "epoch": 0.5011843733427612, "grad_norm": 7.251349925994873, "learning_rate": 1.6663621199802185e-05, "loss": 1.7005, "step": 79740 }, { "epoch": 0.5012472256594583, "grad_norm": 6.59246301651001, "learning_rate": 1.6663202098857533e-05, "loss": 1.7184, "step": 79750 }, { "epoch": 0.5013100779761555, "grad_norm": 6.917023658752441, "learning_rate": 1.666278299791288e-05, "loss": 1.7669, "step": 79760 }, { "epoch": 0.5013729302928525, "grad_norm": 7.197429656982422, "learning_rate": 1.6662363896968227e-05, "loss": 1.9412, "step": 79770 }, { "epoch": 0.5014357826095496, "grad_norm": 6.37503719329834, "learning_rate": 1.6661944796023574e-05, "loss": 1.8577, "step": 79780 }, { "epoch": 0.5014986349262467, "grad_norm": 8.113883972167969, "learning_rate": 1.6661525695078917e-05, "loss": 1.7181, "step": 79790 }, { "epoch": 0.5015614872429438, "grad_norm": 8.317326545715332, "learning_rate": 1.6661106594134264e-05, "loss": 1.6948, "step": 79800 }, { "epoch": 0.5016243395596409, "grad_norm": 6.044445037841797, "learning_rate": 1.666068749318961e-05, "loss": 1.753, "step": 79810 }, { "epoch": 0.501687191876338, "grad_norm": 5.891312599182129, "learning_rate": 1.666026839224496e-05, "loss": 1.8129, "step": 79820 }, { "epoch": 0.5017500441930351, "grad_norm": 6.871014595031738, "learning_rate": 1.6659849291300302e-05, "loss": 1.8918, "step": 79830 }, { "epoch": 0.5018128965097323, "grad_norm": 7.177075386047363, "learning_rate": 1.665943019035565e-05, "loss": 1.8326, "step": 79840 }, { "epoch": 0.5018757488264294, "grad_norm": 6.608696460723877, "learning_rate": 1.6659011089410996e-05, "loss": 1.6227, "step": 79850 }, { "epoch": 0.5019386011431265, "grad_norm": 7.607576370239258, "learning_rate": 1.6658591988466344e-05, "loss": 1.766, "step": 79860 }, { "epoch": 0.5020014534598236, "grad_norm": 6.3681254386901855, "learning_rate": 1.665817288752169e-05, "loss": 1.8021, "step": 79870 }, { "epoch": 0.5020643057765207, "grad_norm": 6.925334930419922, "learning_rate": 1.6657753786577034e-05, "loss": 1.565, "step": 79880 }, { "epoch": 0.5021271580932178, "grad_norm": 5.767662048339844, "learning_rate": 1.665733468563238e-05, "loss": 1.5511, "step": 79890 }, { "epoch": 0.502190010409915, "grad_norm": 6.4178147315979, "learning_rate": 1.665691558468773e-05, "loss": 1.982, "step": 79900 }, { "epoch": 0.5022528627266121, "grad_norm": 6.5203142166137695, "learning_rate": 1.6656496483743075e-05, "loss": 1.6557, "step": 79910 }, { "epoch": 0.5023157150433092, "grad_norm": 6.301761150360107, "learning_rate": 1.6656077382798423e-05, "loss": 1.7178, "step": 79920 }, { "epoch": 0.5023785673600063, "grad_norm": 7.500464916229248, "learning_rate": 1.665565828185377e-05, "loss": 1.5183, "step": 79930 }, { "epoch": 0.5024414196767034, "grad_norm": 6.666856288909912, "learning_rate": 1.6655239180909117e-05, "loss": 1.7092, "step": 79940 }, { "epoch": 0.5025042719934005, "grad_norm": 6.485771656036377, "learning_rate": 1.6654820079964464e-05, "loss": 1.6388, "step": 79950 }, { "epoch": 0.5025671243100976, "grad_norm": 6.726188659667969, "learning_rate": 1.6654400979019807e-05, "loss": 1.7191, "step": 79960 }, { "epoch": 0.5026299766267948, "grad_norm": 6.920906066894531, "learning_rate": 1.6653981878075155e-05, "loss": 1.7832, "step": 79970 }, { "epoch": 0.5026928289434919, "grad_norm": 7.067952632904053, "learning_rate": 1.66535627771305e-05, "loss": 1.8002, "step": 79980 }, { "epoch": 0.502755681260189, "grad_norm": 7.4023518562316895, "learning_rate": 1.665314367618585e-05, "loss": 1.5641, "step": 79990 }, { "epoch": 0.5028185335768861, "grad_norm": 6.506004333496094, "learning_rate": 1.6652724575241196e-05, "loss": 1.5715, "step": 80000 }, { "epoch": 0.5028813858935832, "grad_norm": 6.265328884124756, "learning_rate": 1.665230547429654e-05, "loss": 1.505, "step": 80010 }, { "epoch": 0.5029442382102803, "grad_norm": 6.966944217681885, "learning_rate": 1.6651886373351886e-05, "loss": 1.6603, "step": 80020 }, { "epoch": 0.5030070905269773, "grad_norm": 6.564925670623779, "learning_rate": 1.6651467272407234e-05, "loss": 1.8559, "step": 80030 }, { "epoch": 0.5030699428436745, "grad_norm": 6.52651834487915, "learning_rate": 1.665104817146258e-05, "loss": 1.8099, "step": 80040 }, { "epoch": 0.5031327951603716, "grad_norm": 5.758645057678223, "learning_rate": 1.6650629070517924e-05, "loss": 1.7989, "step": 80050 }, { "epoch": 0.5031956474770687, "grad_norm": 6.67716646194458, "learning_rate": 1.665020996957327e-05, "loss": 1.5506, "step": 80060 }, { "epoch": 0.5032584997937658, "grad_norm": 6.314333915710449, "learning_rate": 1.664979086862862e-05, "loss": 1.6887, "step": 80070 }, { "epoch": 0.5033213521104629, "grad_norm": 6.7269768714904785, "learning_rate": 1.6649371767683966e-05, "loss": 1.4443, "step": 80080 }, { "epoch": 0.50338420442716, "grad_norm": 6.974546432495117, "learning_rate": 1.6648952666739313e-05, "loss": 1.7091, "step": 80090 }, { "epoch": 0.5034470567438571, "grad_norm": 7.477077007293701, "learning_rate": 1.6648533565794656e-05, "loss": 1.6621, "step": 80100 }, { "epoch": 0.5035099090605543, "grad_norm": 6.204904556274414, "learning_rate": 1.6648114464850003e-05, "loss": 1.8354, "step": 80110 }, { "epoch": 0.5035727613772514, "grad_norm": 6.625732898712158, "learning_rate": 1.664769536390535e-05, "loss": 1.6104, "step": 80120 }, { "epoch": 0.5036356136939485, "grad_norm": 6.183162689208984, "learning_rate": 1.6647276262960697e-05, "loss": 1.557, "step": 80130 }, { "epoch": 0.5036984660106456, "grad_norm": 7.019721031188965, "learning_rate": 1.6646857162016045e-05, "loss": 1.589, "step": 80140 }, { "epoch": 0.5037613183273427, "grad_norm": 6.763312816619873, "learning_rate": 1.664643806107139e-05, "loss": 1.4686, "step": 80150 }, { "epoch": 0.5038241706440398, "grad_norm": 6.7790727615356445, "learning_rate": 1.664601896012674e-05, "loss": 1.6208, "step": 80160 }, { "epoch": 0.503887022960737, "grad_norm": 6.917080402374268, "learning_rate": 1.6645599859182086e-05, "loss": 1.8251, "step": 80170 }, { "epoch": 0.5039498752774341, "grad_norm": 6.307061672210693, "learning_rate": 1.6645180758237433e-05, "loss": 1.9444, "step": 80180 }, { "epoch": 0.5040127275941312, "grad_norm": 8.953438758850098, "learning_rate": 1.6644761657292777e-05, "loss": 1.6907, "step": 80190 }, { "epoch": 0.5040755799108283, "grad_norm": 6.719122886657715, "learning_rate": 1.6644342556348124e-05, "loss": 1.6157, "step": 80200 }, { "epoch": 0.5041384322275254, "grad_norm": 5.839183330535889, "learning_rate": 1.664392345540347e-05, "loss": 1.6481, "step": 80210 }, { "epoch": 0.5042012845442225, "grad_norm": 6.459103584289551, "learning_rate": 1.6643504354458818e-05, "loss": 1.7897, "step": 80220 }, { "epoch": 0.5042641368609196, "grad_norm": 6.475305080413818, "learning_rate": 1.664308525351416e-05, "loss": 1.6833, "step": 80230 }, { "epoch": 0.5043269891776168, "grad_norm": 7.551462650299072, "learning_rate": 1.664266615256951e-05, "loss": 1.5904, "step": 80240 }, { "epoch": 0.5043898414943139, "grad_norm": 7.036133766174316, "learning_rate": 1.6642247051624856e-05, "loss": 1.7531, "step": 80250 }, { "epoch": 0.504452693811011, "grad_norm": 7.232636451721191, "learning_rate": 1.6641827950680203e-05, "loss": 1.7086, "step": 80260 }, { "epoch": 0.5045155461277081, "grad_norm": 6.5783915519714355, "learning_rate": 1.664140884973555e-05, "loss": 1.8095, "step": 80270 }, { "epoch": 0.5045783984444051, "grad_norm": 6.384034156799316, "learning_rate": 1.6640989748790893e-05, "loss": 1.7586, "step": 80280 }, { "epoch": 0.5046412507611022, "grad_norm": 6.8381805419921875, "learning_rate": 1.664057064784624e-05, "loss": 1.6873, "step": 80290 }, { "epoch": 0.5047041030777993, "grad_norm": 6.193711280822754, "learning_rate": 1.6640151546901588e-05, "loss": 1.6226, "step": 80300 }, { "epoch": 0.5047669553944965, "grad_norm": 6.295399188995361, "learning_rate": 1.6639732445956935e-05, "loss": 1.9116, "step": 80310 }, { "epoch": 0.5048298077111936, "grad_norm": 7.165435791015625, "learning_rate": 1.663931334501228e-05, "loss": 1.6044, "step": 80320 }, { "epoch": 0.5048926600278907, "grad_norm": 7.73527193069458, "learning_rate": 1.663889424406763e-05, "loss": 1.831, "step": 80330 }, { "epoch": 0.5049555123445878, "grad_norm": 7.749932765960693, "learning_rate": 1.6638475143122972e-05, "loss": 1.6931, "step": 80340 }, { "epoch": 0.5050183646612849, "grad_norm": 6.340909481048584, "learning_rate": 1.663805604217832e-05, "loss": 1.6823, "step": 80350 }, { "epoch": 0.505081216977982, "grad_norm": 6.750431537628174, "learning_rate": 1.6637636941233667e-05, "loss": 1.7272, "step": 80360 }, { "epoch": 0.5051440692946791, "grad_norm": 6.4755120277404785, "learning_rate": 1.6637217840289014e-05, "loss": 1.8015, "step": 80370 }, { "epoch": 0.5052069216113763, "grad_norm": 6.332870960235596, "learning_rate": 1.663679873934436e-05, "loss": 1.7403, "step": 80380 }, { "epoch": 0.5052697739280734, "grad_norm": 6.5690412521362305, "learning_rate": 1.6636379638399708e-05, "loss": 1.8548, "step": 80390 }, { "epoch": 0.5053326262447705, "grad_norm": 7.2789225578308105, "learning_rate": 1.6635960537455055e-05, "loss": 1.7637, "step": 80400 }, { "epoch": 0.5053954785614676, "grad_norm": 7.356903076171875, "learning_rate": 1.66355414365104e-05, "loss": 1.7046, "step": 80410 }, { "epoch": 0.5054583308781647, "grad_norm": 6.851481914520264, "learning_rate": 1.6635122335565746e-05, "loss": 1.7586, "step": 80420 }, { "epoch": 0.5055211831948618, "grad_norm": 6.518064498901367, "learning_rate": 1.6634703234621093e-05, "loss": 1.7848, "step": 80430 }, { "epoch": 0.505584035511559, "grad_norm": 6.317898273468018, "learning_rate": 1.663428413367644e-05, "loss": 1.6302, "step": 80440 }, { "epoch": 0.5056468878282561, "grad_norm": 6.76554536819458, "learning_rate": 1.6633865032731783e-05, "loss": 1.7885, "step": 80450 }, { "epoch": 0.5057097401449532, "grad_norm": 7.445808410644531, "learning_rate": 1.663344593178713e-05, "loss": 1.763, "step": 80460 }, { "epoch": 0.5057725924616503, "grad_norm": 7.02715539932251, "learning_rate": 1.6633026830842478e-05, "loss": 1.8355, "step": 80470 }, { "epoch": 0.5058354447783474, "grad_norm": 6.42625093460083, "learning_rate": 1.6632607729897825e-05, "loss": 1.5474, "step": 80480 }, { "epoch": 0.5058982970950445, "grad_norm": 7.86593770980835, "learning_rate": 1.6632188628953172e-05, "loss": 1.7894, "step": 80490 }, { "epoch": 0.5059611494117416, "grad_norm": 6.331607341766357, "learning_rate": 1.6631769528008515e-05, "loss": 1.7241, "step": 80500 }, { "epoch": 0.5060240017284388, "grad_norm": 7.302580833435059, "learning_rate": 1.6631350427063862e-05, "loss": 1.7968, "step": 80510 }, { "epoch": 0.5060868540451359, "grad_norm": 5.495927810668945, "learning_rate": 1.663093132611921e-05, "loss": 1.6062, "step": 80520 }, { "epoch": 0.506149706361833, "grad_norm": 6.512253761291504, "learning_rate": 1.6630512225174557e-05, "loss": 1.6953, "step": 80530 }, { "epoch": 0.50621255867853, "grad_norm": 8.227531433105469, "learning_rate": 1.6630093124229904e-05, "loss": 1.688, "step": 80540 }, { "epoch": 0.5062754109952271, "grad_norm": 7.652024745941162, "learning_rate": 1.662967402328525e-05, "loss": 1.719, "step": 80550 }, { "epoch": 0.5063382633119242, "grad_norm": 6.868397235870361, "learning_rate": 1.6629254922340598e-05, "loss": 1.5406, "step": 80560 }, { "epoch": 0.5064011156286213, "grad_norm": 5.889806270599365, "learning_rate": 1.6628835821395945e-05, "loss": 1.7383, "step": 80570 }, { "epoch": 0.5064639679453184, "grad_norm": 5.755928039550781, "learning_rate": 1.6628416720451292e-05, "loss": 1.7932, "step": 80580 }, { "epoch": 0.5065268202620156, "grad_norm": 7.116064071655273, "learning_rate": 1.6627997619506636e-05, "loss": 1.6301, "step": 80590 }, { "epoch": 0.5065896725787127, "grad_norm": 6.306654930114746, "learning_rate": 1.6627578518561983e-05, "loss": 1.5587, "step": 80600 }, { "epoch": 0.5066525248954098, "grad_norm": 6.208273887634277, "learning_rate": 1.662715941761733e-05, "loss": 1.6283, "step": 80610 }, { "epoch": 0.5067153772121069, "grad_norm": 6.317222595214844, "learning_rate": 1.6626740316672677e-05, "loss": 1.6919, "step": 80620 }, { "epoch": 0.506778229528804, "grad_norm": 6.808119297027588, "learning_rate": 1.662632121572802e-05, "loss": 1.7396, "step": 80630 }, { "epoch": 0.5068410818455011, "grad_norm": 6.985289573669434, "learning_rate": 1.6625902114783368e-05, "loss": 1.8456, "step": 80640 }, { "epoch": 0.5069039341621983, "grad_norm": 7.419642925262451, "learning_rate": 1.6625483013838715e-05, "loss": 1.6966, "step": 80650 }, { "epoch": 0.5069667864788954, "grad_norm": 5.950889587402344, "learning_rate": 1.6625063912894062e-05, "loss": 1.8473, "step": 80660 }, { "epoch": 0.5070296387955925, "grad_norm": 6.5306878089904785, "learning_rate": 1.662464481194941e-05, "loss": 1.8264, "step": 80670 }, { "epoch": 0.5070924911122896, "grad_norm": 6.157708644866943, "learning_rate": 1.6624225711004752e-05, "loss": 1.642, "step": 80680 }, { "epoch": 0.5071553434289867, "grad_norm": 6.647695064544678, "learning_rate": 1.66238066100601e-05, "loss": 1.7642, "step": 80690 }, { "epoch": 0.5072181957456838, "grad_norm": 7.1048359870910645, "learning_rate": 1.6623387509115447e-05, "loss": 1.6765, "step": 80700 }, { "epoch": 0.507281048062381, "grad_norm": 7.053614139556885, "learning_rate": 1.6622968408170794e-05, "loss": 1.5628, "step": 80710 }, { "epoch": 0.5073439003790781, "grad_norm": 7.329823970794678, "learning_rate": 1.6622549307226137e-05, "loss": 1.7809, "step": 80720 }, { "epoch": 0.5074067526957752, "grad_norm": 5.393566608428955, "learning_rate": 1.6622130206281484e-05, "loss": 1.5963, "step": 80730 }, { "epoch": 0.5074696050124723, "grad_norm": 6.427209854125977, "learning_rate": 1.662171110533683e-05, "loss": 1.5768, "step": 80740 }, { "epoch": 0.5075324573291694, "grad_norm": 6.104846954345703, "learning_rate": 1.662129200439218e-05, "loss": 1.9616, "step": 80750 }, { "epoch": 0.5075953096458665, "grad_norm": 6.925151824951172, "learning_rate": 1.6620872903447526e-05, "loss": 1.6709, "step": 80760 }, { "epoch": 0.5076581619625636, "grad_norm": 8.292290687561035, "learning_rate": 1.6620453802502873e-05, "loss": 1.6986, "step": 80770 }, { "epoch": 0.5077210142792608, "grad_norm": 7.496538162231445, "learning_rate": 1.662003470155822e-05, "loss": 1.6919, "step": 80780 }, { "epoch": 0.5077838665959578, "grad_norm": 5.652065753936768, "learning_rate": 1.6619615600613567e-05, "loss": 1.6888, "step": 80790 }, { "epoch": 0.5078467189126549, "grad_norm": 6.844841480255127, "learning_rate": 1.6619196499668914e-05, "loss": 1.8496, "step": 80800 }, { "epoch": 0.507909571229352, "grad_norm": 5.9920806884765625, "learning_rate": 1.6618777398724258e-05, "loss": 1.5783, "step": 80810 }, { "epoch": 0.5079724235460491, "grad_norm": 7.539910793304443, "learning_rate": 1.6618358297779605e-05, "loss": 1.8375, "step": 80820 }, { "epoch": 0.5080352758627462, "grad_norm": 7.230218887329102, "learning_rate": 1.6617939196834952e-05, "loss": 1.7045, "step": 80830 }, { "epoch": 0.5080981281794433, "grad_norm": 6.424299716949463, "learning_rate": 1.66175200958903e-05, "loss": 1.5901, "step": 80840 }, { "epoch": 0.5081609804961404, "grad_norm": 8.0871000289917, "learning_rate": 1.6617100994945643e-05, "loss": 1.7978, "step": 80850 }, { "epoch": 0.5082238328128376, "grad_norm": 6.569265365600586, "learning_rate": 1.661668189400099e-05, "loss": 1.5899, "step": 80860 }, { "epoch": 0.5082866851295347, "grad_norm": 6.608719348907471, "learning_rate": 1.6616262793056337e-05, "loss": 1.5594, "step": 80870 }, { "epoch": 0.5083495374462318, "grad_norm": 7.097671031951904, "learning_rate": 1.6615843692111684e-05, "loss": 1.6149, "step": 80880 }, { "epoch": 0.5084123897629289, "grad_norm": 7.51013708114624, "learning_rate": 1.661542459116703e-05, "loss": 1.6666, "step": 80890 }, { "epoch": 0.508475242079626, "grad_norm": 7.3628058433532715, "learning_rate": 1.6615005490222374e-05, "loss": 1.4975, "step": 80900 }, { "epoch": 0.5085380943963231, "grad_norm": 7.488697528839111, "learning_rate": 1.661458638927772e-05, "loss": 1.8612, "step": 80910 }, { "epoch": 0.5086009467130203, "grad_norm": 6.688188076019287, "learning_rate": 1.661416728833307e-05, "loss": 1.6634, "step": 80920 }, { "epoch": 0.5086637990297174, "grad_norm": 5.443098068237305, "learning_rate": 1.6613748187388416e-05, "loss": 1.551, "step": 80930 }, { "epoch": 0.5087266513464145, "grad_norm": 6.645493030548096, "learning_rate": 1.6613329086443763e-05, "loss": 1.6804, "step": 80940 }, { "epoch": 0.5087895036631116, "grad_norm": 6.472682476043701, "learning_rate": 1.661290998549911e-05, "loss": 1.4649, "step": 80950 }, { "epoch": 0.5088523559798087, "grad_norm": 6.152359485626221, "learning_rate": 1.6612490884554457e-05, "loss": 1.6103, "step": 80960 }, { "epoch": 0.5089152082965058, "grad_norm": 7.1260986328125, "learning_rate": 1.66120717836098e-05, "loss": 1.6803, "step": 80970 }, { "epoch": 0.5089780606132029, "grad_norm": 7.128537178039551, "learning_rate": 1.6611652682665148e-05, "loss": 1.7006, "step": 80980 }, { "epoch": 0.5090409129299001, "grad_norm": 6.813705921173096, "learning_rate": 1.6611233581720495e-05, "loss": 1.9884, "step": 80990 }, { "epoch": 0.5091037652465972, "grad_norm": 6.137548923492432, "learning_rate": 1.6610814480775842e-05, "loss": 1.7066, "step": 81000 }, { "epoch": 0.5091666175632943, "grad_norm": 7.284581184387207, "learning_rate": 1.661039537983119e-05, "loss": 1.6337, "step": 81010 }, { "epoch": 0.5092294698799914, "grad_norm": 7.394177436828613, "learning_rate": 1.6609976278886536e-05, "loss": 1.6749, "step": 81020 }, { "epoch": 0.5092923221966885, "grad_norm": 7.670345783233643, "learning_rate": 1.660955717794188e-05, "loss": 1.7117, "step": 81030 }, { "epoch": 0.5093551745133856, "grad_norm": 5.4331817626953125, "learning_rate": 1.6609138076997227e-05, "loss": 1.5852, "step": 81040 }, { "epoch": 0.5094180268300826, "grad_norm": 7.567220687866211, "learning_rate": 1.6608718976052574e-05, "loss": 1.5489, "step": 81050 }, { "epoch": 0.5094808791467798, "grad_norm": 6.857569217681885, "learning_rate": 1.660829987510792e-05, "loss": 1.9249, "step": 81060 }, { "epoch": 0.5095437314634769, "grad_norm": 6.4807562828063965, "learning_rate": 1.6607880774163265e-05, "loss": 1.9061, "step": 81070 }, { "epoch": 0.509606583780174, "grad_norm": 6.4150567054748535, "learning_rate": 1.660746167321861e-05, "loss": 1.6716, "step": 81080 }, { "epoch": 0.5096694360968711, "grad_norm": 6.111120223999023, "learning_rate": 1.660704257227396e-05, "loss": 1.858, "step": 81090 }, { "epoch": 0.5097322884135682, "grad_norm": 6.444390773773193, "learning_rate": 1.6606623471329306e-05, "loss": 1.4956, "step": 81100 }, { "epoch": 0.5097951407302653, "grad_norm": 7.489157199859619, "learning_rate": 1.6606204370384653e-05, "loss": 1.582, "step": 81110 }, { "epoch": 0.5098579930469624, "grad_norm": 6.417622089385986, "learning_rate": 1.6605785269439996e-05, "loss": 1.6753, "step": 81120 }, { "epoch": 0.5099208453636596, "grad_norm": 6.800785064697266, "learning_rate": 1.6605366168495344e-05, "loss": 1.6678, "step": 81130 }, { "epoch": 0.5099836976803567, "grad_norm": 5.283477783203125, "learning_rate": 1.660494706755069e-05, "loss": 1.6663, "step": 81140 }, { "epoch": 0.5100465499970538, "grad_norm": 6.489543437957764, "learning_rate": 1.6604527966606038e-05, "loss": 1.7745, "step": 81150 }, { "epoch": 0.5101094023137509, "grad_norm": 7.533888816833496, "learning_rate": 1.6604108865661385e-05, "loss": 1.7328, "step": 81160 }, { "epoch": 0.510172254630448, "grad_norm": 7.438111782073975, "learning_rate": 1.6603689764716732e-05, "loss": 1.5361, "step": 81170 }, { "epoch": 0.5102351069471451, "grad_norm": 7.251165866851807, "learning_rate": 1.660327066377208e-05, "loss": 1.5971, "step": 81180 }, { "epoch": 0.5102979592638422, "grad_norm": 7.350485801696777, "learning_rate": 1.6602851562827426e-05, "loss": 1.7068, "step": 81190 }, { "epoch": 0.5103608115805394, "grad_norm": 7.594714164733887, "learning_rate": 1.6602432461882773e-05, "loss": 1.6654, "step": 81200 }, { "epoch": 0.5104236638972365, "grad_norm": 9.837715148925781, "learning_rate": 1.6602013360938117e-05, "loss": 1.7133, "step": 81210 }, { "epoch": 0.5104865162139336, "grad_norm": 6.73118782043457, "learning_rate": 1.6601594259993464e-05, "loss": 1.6478, "step": 81220 }, { "epoch": 0.5105493685306307, "grad_norm": 7.012758255004883, "learning_rate": 1.660117515904881e-05, "loss": 1.5899, "step": 81230 }, { "epoch": 0.5106122208473278, "grad_norm": 7.66168737411499, "learning_rate": 1.6600756058104158e-05, "loss": 1.4999, "step": 81240 }, { "epoch": 0.5106750731640249, "grad_norm": 6.692058563232422, "learning_rate": 1.66003369571595e-05, "loss": 1.7145, "step": 81250 }, { "epoch": 0.510737925480722, "grad_norm": 7.03905725479126, "learning_rate": 1.659991785621485e-05, "loss": 1.6896, "step": 81260 }, { "epoch": 0.5108007777974192, "grad_norm": 6.233604431152344, "learning_rate": 1.6599498755270196e-05, "loss": 1.63, "step": 81270 }, { "epoch": 0.5108636301141163, "grad_norm": 6.349859714508057, "learning_rate": 1.6599079654325543e-05, "loss": 1.5801, "step": 81280 }, { "epoch": 0.5109264824308134, "grad_norm": 6.295588970184326, "learning_rate": 1.659866055338089e-05, "loss": 1.8677, "step": 81290 }, { "epoch": 0.5109893347475104, "grad_norm": 7.059310436248779, "learning_rate": 1.6598241452436234e-05, "loss": 1.6375, "step": 81300 }, { "epoch": 0.5110521870642075, "grad_norm": 8.250588417053223, "learning_rate": 1.659782235149158e-05, "loss": 1.9543, "step": 81310 }, { "epoch": 0.5111150393809046, "grad_norm": 5.822747707366943, "learning_rate": 1.6597403250546928e-05, "loss": 1.6694, "step": 81320 }, { "epoch": 0.5111778916976017, "grad_norm": 6.962681293487549, "learning_rate": 1.6596984149602275e-05, "loss": 1.5185, "step": 81330 }, { "epoch": 0.5112407440142989, "grad_norm": 7.692488193511963, "learning_rate": 1.6596565048657622e-05, "loss": 1.7951, "step": 81340 }, { "epoch": 0.511303596330996, "grad_norm": 7.1396284103393555, "learning_rate": 1.6596145947712966e-05, "loss": 1.8221, "step": 81350 }, { "epoch": 0.5113664486476931, "grad_norm": 5.930846214294434, "learning_rate": 1.6595726846768313e-05, "loss": 1.7014, "step": 81360 }, { "epoch": 0.5114293009643902, "grad_norm": 7.050151824951172, "learning_rate": 1.659530774582366e-05, "loss": 1.6139, "step": 81370 }, { "epoch": 0.5114921532810873, "grad_norm": 5.861669540405273, "learning_rate": 1.6594888644879007e-05, "loss": 1.6394, "step": 81380 }, { "epoch": 0.5115550055977844, "grad_norm": 6.0154266357421875, "learning_rate": 1.6594469543934354e-05, "loss": 1.5412, "step": 81390 }, { "epoch": 0.5116178579144816, "grad_norm": 6.396653175354004, "learning_rate": 1.65940504429897e-05, "loss": 1.6857, "step": 81400 }, { "epoch": 0.5116807102311787, "grad_norm": 6.678314685821533, "learning_rate": 1.6593631342045048e-05, "loss": 1.5498, "step": 81410 }, { "epoch": 0.5117435625478758, "grad_norm": 6.268189907073975, "learning_rate": 1.6593212241100395e-05, "loss": 1.5248, "step": 81420 }, { "epoch": 0.5118064148645729, "grad_norm": 6.835800647735596, "learning_rate": 1.659279314015574e-05, "loss": 1.5798, "step": 81430 }, { "epoch": 0.51186926718127, "grad_norm": 5.7435832023620605, "learning_rate": 1.6592374039211086e-05, "loss": 1.6341, "step": 81440 }, { "epoch": 0.5119321194979671, "grad_norm": 5.660763263702393, "learning_rate": 1.6591954938266433e-05, "loss": 1.4495, "step": 81450 }, { "epoch": 0.5119949718146642, "grad_norm": 7.291779041290283, "learning_rate": 1.659153583732178e-05, "loss": 1.6463, "step": 81460 }, { "epoch": 0.5120578241313614, "grad_norm": 5.4878363609313965, "learning_rate": 1.6591116736377124e-05, "loss": 1.6303, "step": 81470 }, { "epoch": 0.5121206764480585, "grad_norm": 5.558329105377197, "learning_rate": 1.659069763543247e-05, "loss": 1.7346, "step": 81480 }, { "epoch": 0.5121835287647556, "grad_norm": 6.605159282684326, "learning_rate": 1.6590278534487818e-05, "loss": 1.4828, "step": 81490 }, { "epoch": 0.5122463810814527, "grad_norm": 7.170902252197266, "learning_rate": 1.6589859433543165e-05, "loss": 1.7989, "step": 81500 }, { "epoch": 0.5123092333981498, "grad_norm": 6.16931676864624, "learning_rate": 1.6589440332598512e-05, "loss": 1.5974, "step": 81510 }, { "epoch": 0.5123720857148469, "grad_norm": 6.78040885925293, "learning_rate": 1.6589021231653856e-05, "loss": 1.7621, "step": 81520 }, { "epoch": 0.512434938031544, "grad_norm": 6.225697994232178, "learning_rate": 1.6588602130709203e-05, "loss": 1.8161, "step": 81530 }, { "epoch": 0.5124977903482412, "grad_norm": 6.077652454376221, "learning_rate": 1.658818302976455e-05, "loss": 1.5478, "step": 81540 }, { "epoch": 0.5125606426649383, "grad_norm": 5.984597682952881, "learning_rate": 1.6587763928819897e-05, "loss": 1.3905, "step": 81550 }, { "epoch": 0.5126234949816353, "grad_norm": 7.048511028289795, "learning_rate": 1.6587344827875244e-05, "loss": 1.4659, "step": 81560 }, { "epoch": 0.5126863472983324, "grad_norm": 7.914498329162598, "learning_rate": 1.658692572693059e-05, "loss": 1.7662, "step": 81570 }, { "epoch": 0.5127491996150295, "grad_norm": 7.3455071449279785, "learning_rate": 1.6586506625985938e-05, "loss": 1.9286, "step": 81580 }, { "epoch": 0.5128120519317266, "grad_norm": 7.239782333374023, "learning_rate": 1.6586087525041282e-05, "loss": 1.8077, "step": 81590 }, { "epoch": 0.5128749042484237, "grad_norm": 6.27215051651001, "learning_rate": 1.658566842409663e-05, "loss": 1.8022, "step": 81600 }, { "epoch": 0.5129377565651209, "grad_norm": 4.653055191040039, "learning_rate": 1.6585249323151976e-05, "loss": 1.6557, "step": 81610 }, { "epoch": 0.513000608881818, "grad_norm": 8.42682933807373, "learning_rate": 1.6584830222207323e-05, "loss": 1.8114, "step": 81620 }, { "epoch": 0.5130634611985151, "grad_norm": 7.246777057647705, "learning_rate": 1.658441112126267e-05, "loss": 1.5247, "step": 81630 }, { "epoch": 0.5131263135152122, "grad_norm": 6.788954257965088, "learning_rate": 1.6583992020318017e-05, "loss": 1.6471, "step": 81640 }, { "epoch": 0.5131891658319093, "grad_norm": 6.673451900482178, "learning_rate": 1.658357291937336e-05, "loss": 1.595, "step": 81650 }, { "epoch": 0.5132520181486064, "grad_norm": 6.126130104064941, "learning_rate": 1.6583153818428708e-05, "loss": 1.6165, "step": 81660 }, { "epoch": 0.5133148704653036, "grad_norm": 6.588374137878418, "learning_rate": 1.6582734717484055e-05, "loss": 1.742, "step": 81670 }, { "epoch": 0.5133777227820007, "grad_norm": 6.954014301300049, "learning_rate": 1.6582315616539402e-05, "loss": 1.5431, "step": 81680 }, { "epoch": 0.5134405750986978, "grad_norm": 5.762803554534912, "learning_rate": 1.6581896515594746e-05, "loss": 1.7215, "step": 81690 }, { "epoch": 0.5135034274153949, "grad_norm": 6.05938720703125, "learning_rate": 1.6581477414650093e-05, "loss": 1.7686, "step": 81700 }, { "epoch": 0.513566279732092, "grad_norm": 5.955667972564697, "learning_rate": 1.658105831370544e-05, "loss": 1.6845, "step": 81710 }, { "epoch": 0.5136291320487891, "grad_norm": 6.757447719573975, "learning_rate": 1.6580639212760787e-05, "loss": 1.6399, "step": 81720 }, { "epoch": 0.5136919843654862, "grad_norm": 5.686888694763184, "learning_rate": 1.6580220111816134e-05, "loss": 1.2893, "step": 81730 }, { "epoch": 0.5137548366821834, "grad_norm": 6.872438430786133, "learning_rate": 1.6579801010871478e-05, "loss": 1.742, "step": 81740 }, { "epoch": 0.5138176889988805, "grad_norm": 6.417477607727051, "learning_rate": 1.6579381909926825e-05, "loss": 1.432, "step": 81750 }, { "epoch": 0.5138805413155776, "grad_norm": 6.8781418800354, "learning_rate": 1.6578962808982172e-05, "loss": 1.8819, "step": 81760 }, { "epoch": 0.5139433936322747, "grad_norm": 8.25013256072998, "learning_rate": 1.657854370803752e-05, "loss": 1.7962, "step": 81770 }, { "epoch": 0.5140062459489718, "grad_norm": 7.275095462799072, "learning_rate": 1.6578124607092866e-05, "loss": 1.905, "step": 81780 }, { "epoch": 0.5140690982656689, "grad_norm": 8.735372543334961, "learning_rate": 1.6577705506148213e-05, "loss": 1.9513, "step": 81790 }, { "epoch": 0.514131950582366, "grad_norm": 6.6073689460754395, "learning_rate": 1.657728640520356e-05, "loss": 1.7017, "step": 81800 }, { "epoch": 0.514194802899063, "grad_norm": 6.993113040924072, "learning_rate": 1.6576867304258907e-05, "loss": 1.8756, "step": 81810 }, { "epoch": 0.5142576552157602, "grad_norm": 8.592490196228027, "learning_rate": 1.6576448203314254e-05, "loss": 1.7714, "step": 81820 }, { "epoch": 0.5143205075324573, "grad_norm": 7.213768005371094, "learning_rate": 1.6576029102369598e-05, "loss": 1.724, "step": 81830 }, { "epoch": 0.5143833598491544, "grad_norm": 6.945169925689697, "learning_rate": 1.6575610001424945e-05, "loss": 1.6812, "step": 81840 }, { "epoch": 0.5144462121658515, "grad_norm": 7.712201118469238, "learning_rate": 1.6575190900480292e-05, "loss": 1.6623, "step": 81850 }, { "epoch": 0.5145090644825486, "grad_norm": 6.78476619720459, "learning_rate": 1.657477179953564e-05, "loss": 1.5049, "step": 81860 }, { "epoch": 0.5145719167992457, "grad_norm": 6.764843463897705, "learning_rate": 1.6574352698590983e-05, "loss": 1.9898, "step": 81870 }, { "epoch": 0.5146347691159429, "grad_norm": 7.237852573394775, "learning_rate": 1.657393359764633e-05, "loss": 1.6505, "step": 81880 }, { "epoch": 0.51469762143264, "grad_norm": 6.3692851066589355, "learning_rate": 1.6573514496701677e-05, "loss": 1.728, "step": 81890 }, { "epoch": 0.5147604737493371, "grad_norm": 7.024084568023682, "learning_rate": 1.6573095395757024e-05, "loss": 1.4154, "step": 81900 }, { "epoch": 0.5148233260660342, "grad_norm": 6.015324115753174, "learning_rate": 1.657267629481237e-05, "loss": 1.6963, "step": 81910 }, { "epoch": 0.5148861783827313, "grad_norm": 6.13345193862915, "learning_rate": 1.6572257193867715e-05, "loss": 1.5816, "step": 81920 }, { "epoch": 0.5149490306994284, "grad_norm": 6.69673490524292, "learning_rate": 1.6571838092923062e-05, "loss": 1.7643, "step": 81930 }, { "epoch": 0.5150118830161255, "grad_norm": 6.703579902648926, "learning_rate": 1.657141899197841e-05, "loss": 1.9539, "step": 81940 }, { "epoch": 0.5150747353328227, "grad_norm": 5.560588359832764, "learning_rate": 1.6570999891033756e-05, "loss": 1.5905, "step": 81950 }, { "epoch": 0.5151375876495198, "grad_norm": 5.567323207855225, "learning_rate": 1.6570580790089103e-05, "loss": 1.5554, "step": 81960 }, { "epoch": 0.5152004399662169, "grad_norm": 8.046873092651367, "learning_rate": 1.6570161689144447e-05, "loss": 1.8316, "step": 81970 }, { "epoch": 0.515263292282914, "grad_norm": 6.263387203216553, "learning_rate": 1.6569742588199794e-05, "loss": 1.6225, "step": 81980 }, { "epoch": 0.5153261445996111, "grad_norm": 7.945009708404541, "learning_rate": 1.656932348725514e-05, "loss": 1.7196, "step": 81990 }, { "epoch": 0.5153889969163082, "grad_norm": 7.492959022521973, "learning_rate": 1.6568904386310488e-05, "loss": 1.5255, "step": 82000 }, { "epoch": 0.5154518492330054, "grad_norm": 5.629178047180176, "learning_rate": 1.6568485285365835e-05, "loss": 1.6738, "step": 82010 }, { "epoch": 0.5155147015497025, "grad_norm": 5.612534523010254, "learning_rate": 1.6568066184421182e-05, "loss": 1.5211, "step": 82020 }, { "epoch": 0.5155775538663996, "grad_norm": 6.696208477020264, "learning_rate": 1.656764708347653e-05, "loss": 1.715, "step": 82030 }, { "epoch": 0.5156404061830967, "grad_norm": 7.129432678222656, "learning_rate": 1.6567227982531876e-05, "loss": 1.8996, "step": 82040 }, { "epoch": 0.5157032584997938, "grad_norm": 6.362168312072754, "learning_rate": 1.656680888158722e-05, "loss": 1.7316, "step": 82050 }, { "epoch": 0.5157661108164909, "grad_norm": 6.4442925453186035, "learning_rate": 1.6566389780642567e-05, "loss": 1.8083, "step": 82060 }, { "epoch": 0.5158289631331879, "grad_norm": 6.292686939239502, "learning_rate": 1.6565970679697914e-05, "loss": 1.4529, "step": 82070 }, { "epoch": 0.515891815449885, "grad_norm": 7.054361820220947, "learning_rate": 1.656555157875326e-05, "loss": 1.8437, "step": 82080 }, { "epoch": 0.5159546677665822, "grad_norm": 8.293606758117676, "learning_rate": 1.6565132477808605e-05, "loss": 1.6266, "step": 82090 }, { "epoch": 0.5160175200832793, "grad_norm": 7.016659259796143, "learning_rate": 1.6564713376863952e-05, "loss": 1.5936, "step": 82100 }, { "epoch": 0.5160803723999764, "grad_norm": 6.724484443664551, "learning_rate": 1.65642942759193e-05, "loss": 1.6597, "step": 82110 }, { "epoch": 0.5161432247166735, "grad_norm": 6.709836006164551, "learning_rate": 1.6563875174974646e-05, "loss": 1.7444, "step": 82120 }, { "epoch": 0.5162060770333706, "grad_norm": 6.972070217132568, "learning_rate": 1.6563456074029993e-05, "loss": 1.4151, "step": 82130 }, { "epoch": 0.5162689293500677, "grad_norm": 7.710244655609131, "learning_rate": 1.6563036973085337e-05, "loss": 1.801, "step": 82140 }, { "epoch": 0.5163317816667649, "grad_norm": 6.5838541984558105, "learning_rate": 1.6562617872140684e-05, "loss": 1.6151, "step": 82150 }, { "epoch": 0.516394633983462, "grad_norm": 7.036131858825684, "learning_rate": 1.656219877119603e-05, "loss": 1.6963, "step": 82160 }, { "epoch": 0.5164574863001591, "grad_norm": 6.047787189483643, "learning_rate": 1.6561779670251378e-05, "loss": 1.8514, "step": 82170 }, { "epoch": 0.5165203386168562, "grad_norm": 6.680669784545898, "learning_rate": 1.6561360569306725e-05, "loss": 1.7074, "step": 82180 }, { "epoch": 0.5165831909335533, "grad_norm": 7.738285541534424, "learning_rate": 1.6560941468362072e-05, "loss": 1.5607, "step": 82190 }, { "epoch": 0.5166460432502504, "grad_norm": 7.540075302124023, "learning_rate": 1.656052236741742e-05, "loss": 1.6826, "step": 82200 }, { "epoch": 0.5167088955669475, "grad_norm": 7.239392280578613, "learning_rate": 1.6560103266472766e-05, "loss": 1.631, "step": 82210 }, { "epoch": 0.5167717478836447, "grad_norm": 5.825963497161865, "learning_rate": 1.655968416552811e-05, "loss": 1.7823, "step": 82220 }, { "epoch": 0.5168346002003418, "grad_norm": 6.491040229797363, "learning_rate": 1.6559265064583457e-05, "loss": 1.765, "step": 82230 }, { "epoch": 0.5168974525170389, "grad_norm": 7.147906303405762, "learning_rate": 1.6558845963638804e-05, "loss": 1.7067, "step": 82240 }, { "epoch": 0.516960304833736, "grad_norm": 7.335332870483398, "learning_rate": 1.655842686269415e-05, "loss": 1.7845, "step": 82250 }, { "epoch": 0.5170231571504331, "grad_norm": 6.657925605773926, "learning_rate": 1.6558007761749498e-05, "loss": 1.6984, "step": 82260 }, { "epoch": 0.5170860094671302, "grad_norm": 6.173869609832764, "learning_rate": 1.6557588660804842e-05, "loss": 1.6487, "step": 82270 }, { "epoch": 0.5171488617838274, "grad_norm": 6.072544097900391, "learning_rate": 1.655716955986019e-05, "loss": 1.5735, "step": 82280 }, { "epoch": 0.5172117141005245, "grad_norm": 7.850886344909668, "learning_rate": 1.6556750458915536e-05, "loss": 1.7009, "step": 82290 }, { "epoch": 0.5172745664172216, "grad_norm": 6.694455623626709, "learning_rate": 1.6556331357970883e-05, "loss": 1.7019, "step": 82300 }, { "epoch": 0.5173374187339187, "grad_norm": 5.889588356018066, "learning_rate": 1.6555912257026227e-05, "loss": 1.7359, "step": 82310 }, { "epoch": 0.5174002710506157, "grad_norm": 7.2895002365112305, "learning_rate": 1.6555493156081574e-05, "loss": 1.6788, "step": 82320 }, { "epoch": 0.5174631233673128, "grad_norm": 7.424088001251221, "learning_rate": 1.655507405513692e-05, "loss": 1.6296, "step": 82330 }, { "epoch": 0.5175259756840099, "grad_norm": 6.089168548583984, "learning_rate": 1.6554654954192268e-05, "loss": 1.5945, "step": 82340 }, { "epoch": 0.517588828000707, "grad_norm": 7.069284915924072, "learning_rate": 1.6554235853247615e-05, "loss": 1.8223, "step": 82350 }, { "epoch": 0.5176516803174042, "grad_norm": 7.048602104187012, "learning_rate": 1.655381675230296e-05, "loss": 1.8519, "step": 82360 }, { "epoch": 0.5177145326341013, "grad_norm": 6.624363899230957, "learning_rate": 1.6553397651358306e-05, "loss": 1.6981, "step": 82370 }, { "epoch": 0.5177773849507984, "grad_norm": 8.180917739868164, "learning_rate": 1.6552978550413653e-05, "loss": 1.8844, "step": 82380 }, { "epoch": 0.5178402372674955, "grad_norm": 5.791408538818359, "learning_rate": 1.6552559449469e-05, "loss": 1.6947, "step": 82390 }, { "epoch": 0.5179030895841926, "grad_norm": 6.24849271774292, "learning_rate": 1.6552140348524347e-05, "loss": 1.8205, "step": 82400 }, { "epoch": 0.5179659419008897, "grad_norm": 7.0733561515808105, "learning_rate": 1.6551721247579694e-05, "loss": 1.5899, "step": 82410 }, { "epoch": 0.5180287942175869, "grad_norm": 5.714791774749756, "learning_rate": 1.655130214663504e-05, "loss": 1.6854, "step": 82420 }, { "epoch": 0.518091646534284, "grad_norm": 6.8169965744018555, "learning_rate": 1.6550883045690388e-05, "loss": 1.749, "step": 82430 }, { "epoch": 0.5181544988509811, "grad_norm": 7.001819133758545, "learning_rate": 1.6550463944745735e-05, "loss": 1.5122, "step": 82440 }, { "epoch": 0.5182173511676782, "grad_norm": 7.239821910858154, "learning_rate": 1.655004484380108e-05, "loss": 1.6865, "step": 82450 }, { "epoch": 0.5182802034843753, "grad_norm": 7.571619987487793, "learning_rate": 1.6549625742856426e-05, "loss": 1.8882, "step": 82460 }, { "epoch": 0.5183430558010724, "grad_norm": 7.204845905303955, "learning_rate": 1.6549206641911773e-05, "loss": 1.7303, "step": 82470 }, { "epoch": 0.5184059081177695, "grad_norm": 6.98240852355957, "learning_rate": 1.654878754096712e-05, "loss": 1.6622, "step": 82480 }, { "epoch": 0.5184687604344667, "grad_norm": 7.970244407653809, "learning_rate": 1.6548368440022464e-05, "loss": 1.5693, "step": 82490 }, { "epoch": 0.5185316127511638, "grad_norm": 7.878420352935791, "learning_rate": 1.654794933907781e-05, "loss": 1.8428, "step": 82500 }, { "epoch": 0.5185944650678609, "grad_norm": 6.624403953552246, "learning_rate": 1.6547530238133158e-05, "loss": 1.8425, "step": 82510 }, { "epoch": 0.518657317384558, "grad_norm": 6.755704879760742, "learning_rate": 1.6547111137188505e-05, "loss": 1.572, "step": 82520 }, { "epoch": 0.5187201697012551, "grad_norm": 6.136546611785889, "learning_rate": 1.6546692036243852e-05, "loss": 1.6231, "step": 82530 }, { "epoch": 0.5187830220179522, "grad_norm": 5.777431488037109, "learning_rate": 1.6546272935299196e-05, "loss": 1.4797, "step": 82540 }, { "epoch": 0.5188458743346493, "grad_norm": 7.702548980712891, "learning_rate": 1.6545853834354543e-05, "loss": 1.7375, "step": 82550 }, { "epoch": 0.5189087266513465, "grad_norm": 6.238503456115723, "learning_rate": 1.654543473340989e-05, "loss": 1.8284, "step": 82560 }, { "epoch": 0.5189715789680436, "grad_norm": 5.85338020324707, "learning_rate": 1.6545015632465237e-05, "loss": 1.6781, "step": 82570 }, { "epoch": 0.5190344312847406, "grad_norm": 6.3454179763793945, "learning_rate": 1.6544596531520584e-05, "loss": 1.496, "step": 82580 }, { "epoch": 0.5190972836014377, "grad_norm": 6.02119255065918, "learning_rate": 1.654417743057593e-05, "loss": 1.6278, "step": 82590 }, { "epoch": 0.5191601359181348, "grad_norm": 6.290589332580566, "learning_rate": 1.6543758329631275e-05, "loss": 1.5677, "step": 82600 }, { "epoch": 0.5192229882348319, "grad_norm": 5.991605758666992, "learning_rate": 1.6543339228686622e-05, "loss": 1.679, "step": 82610 }, { "epoch": 0.519285840551529, "grad_norm": 6.47032356262207, "learning_rate": 1.654292012774197e-05, "loss": 1.5912, "step": 82620 }, { "epoch": 0.5193486928682262, "grad_norm": 7.9203572273254395, "learning_rate": 1.6542501026797316e-05, "loss": 1.8486, "step": 82630 }, { "epoch": 0.5194115451849233, "grad_norm": 7.584958553314209, "learning_rate": 1.6542081925852663e-05, "loss": 1.7711, "step": 82640 }, { "epoch": 0.5194743975016204, "grad_norm": 6.896289348602295, "learning_rate": 1.654166282490801e-05, "loss": 1.7535, "step": 82650 }, { "epoch": 0.5195372498183175, "grad_norm": 5.558168888092041, "learning_rate": 1.6541243723963357e-05, "loss": 1.5942, "step": 82660 }, { "epoch": 0.5196001021350146, "grad_norm": 6.410091876983643, "learning_rate": 1.65408246230187e-05, "loss": 1.6743, "step": 82670 }, { "epoch": 0.5196629544517117, "grad_norm": 6.681957721710205, "learning_rate": 1.6540405522074048e-05, "loss": 1.5559, "step": 82680 }, { "epoch": 0.5197258067684088, "grad_norm": 7.247389793395996, "learning_rate": 1.6539986421129395e-05, "loss": 1.5263, "step": 82690 }, { "epoch": 0.519788659085106, "grad_norm": 7.7606916427612305, "learning_rate": 1.6539567320184742e-05, "loss": 1.7305, "step": 82700 }, { "epoch": 0.5198515114018031, "grad_norm": 5.6875433921813965, "learning_rate": 1.6539148219240086e-05, "loss": 1.7554, "step": 82710 }, { "epoch": 0.5199143637185002, "grad_norm": 6.5668463706970215, "learning_rate": 1.6538729118295433e-05, "loss": 1.6081, "step": 82720 }, { "epoch": 0.5199772160351973, "grad_norm": 6.804002285003662, "learning_rate": 1.653831001735078e-05, "loss": 1.8088, "step": 82730 }, { "epoch": 0.5200400683518944, "grad_norm": 6.785575866699219, "learning_rate": 1.6537890916406127e-05, "loss": 1.8513, "step": 82740 }, { "epoch": 0.5201029206685915, "grad_norm": 6.491295337677002, "learning_rate": 1.6537471815461474e-05, "loss": 1.5833, "step": 82750 }, { "epoch": 0.5201657729852887, "grad_norm": 5.610404968261719, "learning_rate": 1.6537052714516818e-05, "loss": 1.5928, "step": 82760 }, { "epoch": 0.5202286253019858, "grad_norm": 7.637632846832275, "learning_rate": 1.6536633613572165e-05, "loss": 1.7613, "step": 82770 }, { "epoch": 0.5202914776186829, "grad_norm": 7.387691020965576, "learning_rate": 1.6536214512627512e-05, "loss": 1.6803, "step": 82780 }, { "epoch": 0.52035432993538, "grad_norm": 5.746232032775879, "learning_rate": 1.653579541168286e-05, "loss": 1.6212, "step": 82790 }, { "epoch": 0.5204171822520771, "grad_norm": 7.148149490356445, "learning_rate": 1.6535376310738206e-05, "loss": 1.4678, "step": 82800 }, { "epoch": 0.5204800345687742, "grad_norm": 6.671700477600098, "learning_rate": 1.6534957209793553e-05, "loss": 1.6183, "step": 82810 }, { "epoch": 0.5205428868854713, "grad_norm": 6.904843330383301, "learning_rate": 1.65345381088489e-05, "loss": 1.7249, "step": 82820 }, { "epoch": 0.5206057392021683, "grad_norm": 9.225653648376465, "learning_rate": 1.6534119007904247e-05, "loss": 1.9133, "step": 82830 }, { "epoch": 0.5206685915188655, "grad_norm": 7.4347147941589355, "learning_rate": 1.6533699906959594e-05, "loss": 1.6362, "step": 82840 }, { "epoch": 0.5207314438355626, "grad_norm": 6.336967945098877, "learning_rate": 1.6533280806014938e-05, "loss": 1.4811, "step": 82850 }, { "epoch": 0.5207942961522597, "grad_norm": 6.682900905609131, "learning_rate": 1.6532861705070285e-05, "loss": 1.6655, "step": 82860 }, { "epoch": 0.5208571484689568, "grad_norm": 5.187028408050537, "learning_rate": 1.6532442604125632e-05, "loss": 1.5937, "step": 82870 }, { "epoch": 0.5209200007856539, "grad_norm": 6.478296279907227, "learning_rate": 1.653202350318098e-05, "loss": 1.8416, "step": 82880 }, { "epoch": 0.520982853102351, "grad_norm": 7.833890438079834, "learning_rate": 1.6531604402236323e-05, "loss": 1.9684, "step": 82890 }, { "epoch": 0.5210457054190482, "grad_norm": 6.321444988250732, "learning_rate": 1.653118530129167e-05, "loss": 1.6874, "step": 82900 }, { "epoch": 0.5211085577357453, "grad_norm": 5.837413311004639, "learning_rate": 1.6530766200347017e-05, "loss": 1.5814, "step": 82910 }, { "epoch": 0.5211714100524424, "grad_norm": 6.95207405090332, "learning_rate": 1.6530347099402364e-05, "loss": 1.5639, "step": 82920 }, { "epoch": 0.5212342623691395, "grad_norm": 6.530197620391846, "learning_rate": 1.6529927998457708e-05, "loss": 1.5572, "step": 82930 }, { "epoch": 0.5212971146858366, "grad_norm": 7.431722164154053, "learning_rate": 1.6529508897513055e-05, "loss": 1.9132, "step": 82940 }, { "epoch": 0.5213599670025337, "grad_norm": 7.086301803588867, "learning_rate": 1.6529089796568402e-05, "loss": 1.7854, "step": 82950 }, { "epoch": 0.5214228193192308, "grad_norm": 7.270763874053955, "learning_rate": 1.652867069562375e-05, "loss": 1.7947, "step": 82960 }, { "epoch": 0.521485671635928, "grad_norm": 7.24395751953125, "learning_rate": 1.6528251594679096e-05, "loss": 1.8873, "step": 82970 }, { "epoch": 0.5215485239526251, "grad_norm": 7.02109956741333, "learning_rate": 1.652783249373444e-05, "loss": 1.6871, "step": 82980 }, { "epoch": 0.5216113762693222, "grad_norm": 7.27868127822876, "learning_rate": 1.6527413392789787e-05, "loss": 1.5118, "step": 82990 }, { "epoch": 0.5216742285860193, "grad_norm": 7.981987953186035, "learning_rate": 1.6526994291845134e-05, "loss": 1.7591, "step": 83000 }, { "epoch": 0.5217370809027164, "grad_norm": 6.476938724517822, "learning_rate": 1.652657519090048e-05, "loss": 1.6087, "step": 83010 }, { "epoch": 0.5217999332194135, "grad_norm": 6.41933012008667, "learning_rate": 1.6526156089955828e-05, "loss": 1.7027, "step": 83020 }, { "epoch": 0.5218627855361107, "grad_norm": 7.0129828453063965, "learning_rate": 1.6525736989011175e-05, "loss": 1.4111, "step": 83030 }, { "epoch": 0.5219256378528078, "grad_norm": 6.466144561767578, "learning_rate": 1.6525317888066522e-05, "loss": 1.7989, "step": 83040 }, { "epoch": 0.5219884901695049, "grad_norm": 6.608112335205078, "learning_rate": 1.652489878712187e-05, "loss": 1.6697, "step": 83050 }, { "epoch": 0.522051342486202, "grad_norm": 7.534452438354492, "learning_rate": 1.6524479686177216e-05, "loss": 1.9912, "step": 83060 }, { "epoch": 0.5221141948028991, "grad_norm": 5.754380702972412, "learning_rate": 1.652406058523256e-05, "loss": 1.6117, "step": 83070 }, { "epoch": 0.5221770471195962, "grad_norm": 6.875207901000977, "learning_rate": 1.6523641484287907e-05, "loss": 1.5582, "step": 83080 }, { "epoch": 0.5222398994362932, "grad_norm": 8.083128929138184, "learning_rate": 1.6523222383343254e-05, "loss": 1.702, "step": 83090 }, { "epoch": 0.5223027517529903, "grad_norm": 6.761566638946533, "learning_rate": 1.65228032823986e-05, "loss": 1.7788, "step": 83100 }, { "epoch": 0.5223656040696875, "grad_norm": 7.703314304351807, "learning_rate": 1.6522384181453945e-05, "loss": 1.7674, "step": 83110 }, { "epoch": 0.5224284563863846, "grad_norm": 6.8022942543029785, "learning_rate": 1.6521965080509292e-05, "loss": 1.6648, "step": 83120 }, { "epoch": 0.5224913087030817, "grad_norm": 5.865825176239014, "learning_rate": 1.652154597956464e-05, "loss": 1.5966, "step": 83130 }, { "epoch": 0.5225541610197788, "grad_norm": 7.486507892608643, "learning_rate": 1.6521126878619986e-05, "loss": 1.6999, "step": 83140 }, { "epoch": 0.5226170133364759, "grad_norm": 6.923404216766357, "learning_rate": 1.6520707777675333e-05, "loss": 1.7957, "step": 83150 }, { "epoch": 0.522679865653173, "grad_norm": 5.716740131378174, "learning_rate": 1.6520288676730677e-05, "loss": 1.5891, "step": 83160 }, { "epoch": 0.5227427179698702, "grad_norm": 8.053667068481445, "learning_rate": 1.6519869575786024e-05, "loss": 1.7406, "step": 83170 }, { "epoch": 0.5228055702865673, "grad_norm": 6.939223289489746, "learning_rate": 1.651945047484137e-05, "loss": 1.8066, "step": 83180 }, { "epoch": 0.5228684226032644, "grad_norm": 6.253711223602295, "learning_rate": 1.6519031373896718e-05, "loss": 1.7324, "step": 83190 }, { "epoch": 0.5229312749199615, "grad_norm": 7.140364646911621, "learning_rate": 1.6518612272952065e-05, "loss": 1.5482, "step": 83200 }, { "epoch": 0.5229941272366586, "grad_norm": 5.5362935066223145, "learning_rate": 1.6518193172007412e-05, "loss": 1.4722, "step": 83210 }, { "epoch": 0.5230569795533557, "grad_norm": 6.500285625457764, "learning_rate": 1.651777407106276e-05, "loss": 1.7503, "step": 83220 }, { "epoch": 0.5231198318700528, "grad_norm": 6.48651647567749, "learning_rate": 1.6517354970118103e-05, "loss": 1.6214, "step": 83230 }, { "epoch": 0.52318268418675, "grad_norm": 6.936561107635498, "learning_rate": 1.651693586917345e-05, "loss": 1.5603, "step": 83240 }, { "epoch": 0.5232455365034471, "grad_norm": 6.254504203796387, "learning_rate": 1.6516516768228797e-05, "loss": 1.6045, "step": 83250 }, { "epoch": 0.5233083888201442, "grad_norm": 7.0453338623046875, "learning_rate": 1.6516097667284144e-05, "loss": 1.7993, "step": 83260 }, { "epoch": 0.5233712411368413, "grad_norm": 6.0243611335754395, "learning_rate": 1.651567856633949e-05, "loss": 1.9001, "step": 83270 }, { "epoch": 0.5234340934535384, "grad_norm": 7.146152019500732, "learning_rate": 1.651525946539484e-05, "loss": 1.7487, "step": 83280 }, { "epoch": 0.5234969457702355, "grad_norm": 6.603753566741943, "learning_rate": 1.6514840364450182e-05, "loss": 1.5815, "step": 83290 }, { "epoch": 0.5235597980869326, "grad_norm": 6.5526204109191895, "learning_rate": 1.651442126350553e-05, "loss": 1.802, "step": 83300 }, { "epoch": 0.5236226504036298, "grad_norm": 6.115887641906738, "learning_rate": 1.6514002162560876e-05, "loss": 1.5732, "step": 83310 }, { "epoch": 0.5236855027203269, "grad_norm": 7.639376163482666, "learning_rate": 1.6513583061616223e-05, "loss": 1.7702, "step": 83320 }, { "epoch": 0.523748355037024, "grad_norm": 6.723336219787598, "learning_rate": 1.6513163960671567e-05, "loss": 1.6207, "step": 83330 }, { "epoch": 0.5238112073537211, "grad_norm": 6.08930778503418, "learning_rate": 1.6512744859726914e-05, "loss": 1.7635, "step": 83340 }, { "epoch": 0.5238740596704181, "grad_norm": 7.516968250274658, "learning_rate": 1.651232575878226e-05, "loss": 1.6921, "step": 83350 }, { "epoch": 0.5239369119871152, "grad_norm": 6.24344539642334, "learning_rate": 1.6511906657837608e-05, "loss": 1.7873, "step": 83360 }, { "epoch": 0.5239997643038123, "grad_norm": 6.595812797546387, "learning_rate": 1.6511487556892955e-05, "loss": 1.5824, "step": 83370 }, { "epoch": 0.5240626166205095, "grad_norm": 6.358027458190918, "learning_rate": 1.65110684559483e-05, "loss": 1.6681, "step": 83380 }, { "epoch": 0.5241254689372066, "grad_norm": 7.562269687652588, "learning_rate": 1.6510649355003646e-05, "loss": 1.6935, "step": 83390 }, { "epoch": 0.5241883212539037, "grad_norm": 7.25498104095459, "learning_rate": 1.6510230254058993e-05, "loss": 1.6679, "step": 83400 }, { "epoch": 0.5242511735706008, "grad_norm": 7.283073902130127, "learning_rate": 1.650981115311434e-05, "loss": 1.602, "step": 83410 }, { "epoch": 0.5243140258872979, "grad_norm": 7.198612689971924, "learning_rate": 1.6509392052169687e-05, "loss": 1.7284, "step": 83420 }, { "epoch": 0.524376878203995, "grad_norm": 6.337793827056885, "learning_rate": 1.6508972951225034e-05, "loss": 2.0844, "step": 83430 }, { "epoch": 0.5244397305206921, "grad_norm": 6.039976596832275, "learning_rate": 1.650855385028038e-05, "loss": 1.6695, "step": 83440 }, { "epoch": 0.5245025828373893, "grad_norm": 6.3445916175842285, "learning_rate": 1.650813474933573e-05, "loss": 1.7213, "step": 83450 }, { "epoch": 0.5245654351540864, "grad_norm": 7.413024425506592, "learning_rate": 1.6507715648391075e-05, "loss": 1.5914, "step": 83460 }, { "epoch": 0.5246282874707835, "grad_norm": 6.500877857208252, "learning_rate": 1.650729654744642e-05, "loss": 1.948, "step": 83470 }, { "epoch": 0.5246911397874806, "grad_norm": 6.975460052490234, "learning_rate": 1.6506877446501766e-05, "loss": 1.6333, "step": 83480 }, { "epoch": 0.5247539921041777, "grad_norm": 6.222975730895996, "learning_rate": 1.6506458345557113e-05, "loss": 1.6955, "step": 83490 }, { "epoch": 0.5248168444208748, "grad_norm": 6.043398857116699, "learning_rate": 1.650603924461246e-05, "loss": 1.6554, "step": 83500 }, { "epoch": 0.524879696737572, "grad_norm": 6.9041290283203125, "learning_rate": 1.6505620143667804e-05, "loss": 1.6828, "step": 83510 }, { "epoch": 0.5249425490542691, "grad_norm": 7.749858379364014, "learning_rate": 1.650520104272315e-05, "loss": 1.8912, "step": 83520 }, { "epoch": 0.5250054013709662, "grad_norm": 6.439682483673096, "learning_rate": 1.6504781941778498e-05, "loss": 1.6681, "step": 83530 }, { "epoch": 0.5250682536876633, "grad_norm": 6.8574299812316895, "learning_rate": 1.6504362840833845e-05, "loss": 1.5521, "step": 83540 }, { "epoch": 0.5251311060043604, "grad_norm": 7.462523460388184, "learning_rate": 1.650394373988919e-05, "loss": 1.6394, "step": 83550 }, { "epoch": 0.5251939583210575, "grad_norm": 7.904224872589111, "learning_rate": 1.6503524638944536e-05, "loss": 1.6388, "step": 83560 }, { "epoch": 0.5252568106377546, "grad_norm": 5.839818954467773, "learning_rate": 1.6503105537999883e-05, "loss": 1.9303, "step": 83570 }, { "epoch": 0.5253196629544518, "grad_norm": 6.612687587738037, "learning_rate": 1.650268643705523e-05, "loss": 1.685, "step": 83580 }, { "epoch": 0.5253825152711489, "grad_norm": 7.205349922180176, "learning_rate": 1.6502267336110577e-05, "loss": 1.9504, "step": 83590 }, { "epoch": 0.5254453675878459, "grad_norm": 6.072460651397705, "learning_rate": 1.650184823516592e-05, "loss": 1.694, "step": 83600 }, { "epoch": 0.525508219904543, "grad_norm": 7.299000263214111, "learning_rate": 1.6501429134221268e-05, "loss": 1.7088, "step": 83610 }, { "epoch": 0.5255710722212401, "grad_norm": 6.392491340637207, "learning_rate": 1.6501010033276615e-05, "loss": 1.7243, "step": 83620 }, { "epoch": 0.5256339245379372, "grad_norm": 7.418911457061768, "learning_rate": 1.6500590932331962e-05, "loss": 1.8272, "step": 83630 }, { "epoch": 0.5256967768546343, "grad_norm": 7.277531147003174, "learning_rate": 1.650017183138731e-05, "loss": 1.6829, "step": 83640 }, { "epoch": 0.5257596291713315, "grad_norm": 6.8729681968688965, "learning_rate": 1.6499752730442656e-05, "loss": 1.5724, "step": 83650 }, { "epoch": 0.5258224814880286, "grad_norm": 5.070959091186523, "learning_rate": 1.6499333629498003e-05, "loss": 1.7617, "step": 83660 }, { "epoch": 0.5258853338047257, "grad_norm": 6.0253448486328125, "learning_rate": 1.649891452855335e-05, "loss": 1.8828, "step": 83670 }, { "epoch": 0.5259481861214228, "grad_norm": 6.4799723625183105, "learning_rate": 1.6498495427608697e-05, "loss": 1.6959, "step": 83680 }, { "epoch": 0.5260110384381199, "grad_norm": 6.933954238891602, "learning_rate": 1.649807632666404e-05, "loss": 1.7841, "step": 83690 }, { "epoch": 0.526073890754817, "grad_norm": 6.568515777587891, "learning_rate": 1.6497657225719388e-05, "loss": 1.7374, "step": 83700 }, { "epoch": 0.5261367430715141, "grad_norm": 7.105417251586914, "learning_rate": 1.6497238124774735e-05, "loss": 1.5518, "step": 83710 }, { "epoch": 0.5261995953882113, "grad_norm": 5.699483394622803, "learning_rate": 1.6496819023830082e-05, "loss": 1.6673, "step": 83720 }, { "epoch": 0.5262624477049084, "grad_norm": 6.796374320983887, "learning_rate": 1.6496399922885426e-05, "loss": 1.666, "step": 83730 }, { "epoch": 0.5263253000216055, "grad_norm": 6.832988262176514, "learning_rate": 1.6495980821940773e-05, "loss": 1.7855, "step": 83740 }, { "epoch": 0.5263881523383026, "grad_norm": 7.052558898925781, "learning_rate": 1.649556172099612e-05, "loss": 1.5677, "step": 83750 }, { "epoch": 0.5264510046549997, "grad_norm": 7.3553924560546875, "learning_rate": 1.6495142620051467e-05, "loss": 1.7541, "step": 83760 }, { "epoch": 0.5265138569716968, "grad_norm": 6.884090423583984, "learning_rate": 1.6494723519106814e-05, "loss": 1.8688, "step": 83770 }, { "epoch": 0.526576709288394, "grad_norm": 6.454369068145752, "learning_rate": 1.6494304418162158e-05, "loss": 1.6198, "step": 83780 }, { "epoch": 0.5266395616050911, "grad_norm": 6.577486515045166, "learning_rate": 1.6493885317217505e-05, "loss": 1.4164, "step": 83790 }, { "epoch": 0.5267024139217882, "grad_norm": 6.482428550720215, "learning_rate": 1.6493466216272852e-05, "loss": 1.6054, "step": 83800 }, { "epoch": 0.5267652662384853, "grad_norm": 6.978704929351807, "learning_rate": 1.64930471153282e-05, "loss": 1.8539, "step": 83810 }, { "epoch": 0.5268281185551824, "grad_norm": 6.174668788909912, "learning_rate": 1.6492628014383546e-05, "loss": 1.6333, "step": 83820 }, { "epoch": 0.5268909708718795, "grad_norm": 6.739724636077881, "learning_rate": 1.6492208913438893e-05, "loss": 1.5853, "step": 83830 }, { "epoch": 0.5269538231885766, "grad_norm": 6.9323272705078125, "learning_rate": 1.649178981249424e-05, "loss": 1.7365, "step": 83840 }, { "epoch": 0.5270166755052738, "grad_norm": 6.932864665985107, "learning_rate": 1.6491370711549584e-05, "loss": 1.8743, "step": 83850 }, { "epoch": 0.5270795278219708, "grad_norm": 6.934847354888916, "learning_rate": 1.649095161060493e-05, "loss": 1.6195, "step": 83860 }, { "epoch": 0.5271423801386679, "grad_norm": 7.109174728393555, "learning_rate": 1.6490532509660278e-05, "loss": 1.7509, "step": 83870 }, { "epoch": 0.527205232455365, "grad_norm": 7.261030673980713, "learning_rate": 1.6490113408715625e-05, "loss": 1.6738, "step": 83880 }, { "epoch": 0.5272680847720621, "grad_norm": 6.480356693267822, "learning_rate": 1.6489694307770972e-05, "loss": 1.6618, "step": 83890 }, { "epoch": 0.5273309370887592, "grad_norm": 7.185296058654785, "learning_rate": 1.648927520682632e-05, "loss": 1.9007, "step": 83900 }, { "epoch": 0.5273937894054563, "grad_norm": 6.617218494415283, "learning_rate": 1.6488856105881663e-05, "loss": 1.6509, "step": 83910 }, { "epoch": 0.5274566417221535, "grad_norm": 5.1567888259887695, "learning_rate": 1.648843700493701e-05, "loss": 1.53, "step": 83920 }, { "epoch": 0.5275194940388506, "grad_norm": 7.2307329177856445, "learning_rate": 1.6488017903992357e-05, "loss": 1.6642, "step": 83930 }, { "epoch": 0.5275823463555477, "grad_norm": 6.329922199249268, "learning_rate": 1.6487598803047704e-05, "loss": 1.6353, "step": 83940 }, { "epoch": 0.5276451986722448, "grad_norm": 6.501959323883057, "learning_rate": 1.6487179702103048e-05, "loss": 1.5327, "step": 83950 }, { "epoch": 0.5277080509889419, "grad_norm": 5.8954033851623535, "learning_rate": 1.6486760601158395e-05, "loss": 1.7865, "step": 83960 }, { "epoch": 0.527770903305639, "grad_norm": 6.62356424331665, "learning_rate": 1.6486341500213742e-05, "loss": 1.5822, "step": 83970 }, { "epoch": 0.5278337556223361, "grad_norm": 7.6494035720825195, "learning_rate": 1.648592239926909e-05, "loss": 1.5285, "step": 83980 }, { "epoch": 0.5278966079390333, "grad_norm": 6.179942607879639, "learning_rate": 1.6485503298324436e-05, "loss": 1.7896, "step": 83990 }, { "epoch": 0.5279594602557304, "grad_norm": 7.346772193908691, "learning_rate": 1.648508419737978e-05, "loss": 1.5969, "step": 84000 }, { "epoch": 0.5280223125724275, "grad_norm": 6.414741039276123, "learning_rate": 1.6484665096435127e-05, "loss": 1.6566, "step": 84010 }, { "epoch": 0.5280851648891246, "grad_norm": 7.321435451507568, "learning_rate": 1.6484245995490474e-05, "loss": 1.7269, "step": 84020 }, { "epoch": 0.5281480172058217, "grad_norm": 6.323916912078857, "learning_rate": 1.648382689454582e-05, "loss": 1.6105, "step": 84030 }, { "epoch": 0.5282108695225188, "grad_norm": 6.770116806030273, "learning_rate": 1.648340779360117e-05, "loss": 1.8191, "step": 84040 }, { "epoch": 0.528273721839216, "grad_norm": 6.618961334228516, "learning_rate": 1.6482988692656515e-05, "loss": 1.6701, "step": 84050 }, { "epoch": 0.5283365741559131, "grad_norm": 8.083182334899902, "learning_rate": 1.6482569591711862e-05, "loss": 1.8281, "step": 84060 }, { "epoch": 0.5283994264726102, "grad_norm": 6.384860038757324, "learning_rate": 1.648215049076721e-05, "loss": 1.5342, "step": 84070 }, { "epoch": 0.5284622787893073, "grad_norm": 6.723288536071777, "learning_rate": 1.6481731389822557e-05, "loss": 1.6023, "step": 84080 }, { "epoch": 0.5285251311060044, "grad_norm": 7.6965227127075195, "learning_rate": 1.64813122888779e-05, "loss": 1.7709, "step": 84090 }, { "epoch": 0.5285879834227015, "grad_norm": 7.332645893096924, "learning_rate": 1.6480893187933247e-05, "loss": 1.7903, "step": 84100 }, { "epoch": 0.5286508357393985, "grad_norm": 8.037694931030273, "learning_rate": 1.6480474086988594e-05, "loss": 1.9066, "step": 84110 }, { "epoch": 0.5287136880560956, "grad_norm": 6.930173397064209, "learning_rate": 1.648005498604394e-05, "loss": 1.7766, "step": 84120 }, { "epoch": 0.5287765403727928, "grad_norm": 6.96900749206543, "learning_rate": 1.6479635885099285e-05, "loss": 1.5314, "step": 84130 }, { "epoch": 0.5288393926894899, "grad_norm": 6.857990741729736, "learning_rate": 1.6479216784154632e-05, "loss": 1.4845, "step": 84140 }, { "epoch": 0.528902245006187, "grad_norm": 7.52931022644043, "learning_rate": 1.647879768320998e-05, "loss": 1.8119, "step": 84150 }, { "epoch": 0.5289650973228841, "grad_norm": 6.831870079040527, "learning_rate": 1.6478378582265326e-05, "loss": 1.728, "step": 84160 }, { "epoch": 0.5290279496395812, "grad_norm": 7.987659931182861, "learning_rate": 1.647795948132067e-05, "loss": 1.7008, "step": 84170 }, { "epoch": 0.5290908019562783, "grad_norm": 7.758392810821533, "learning_rate": 1.6477540380376017e-05, "loss": 1.5586, "step": 84180 }, { "epoch": 0.5291536542729754, "grad_norm": 6.975496768951416, "learning_rate": 1.6477121279431364e-05, "loss": 1.8404, "step": 84190 }, { "epoch": 0.5292165065896726, "grad_norm": 6.656536102294922, "learning_rate": 1.647670217848671e-05, "loss": 1.6575, "step": 84200 }, { "epoch": 0.5292793589063697, "grad_norm": 6.5500006675720215, "learning_rate": 1.647628307754206e-05, "loss": 1.731, "step": 84210 }, { "epoch": 0.5293422112230668, "grad_norm": 6.406815528869629, "learning_rate": 1.6475863976597405e-05, "loss": 1.5357, "step": 84220 }, { "epoch": 0.5294050635397639, "grad_norm": 7.080085754394531, "learning_rate": 1.647544487565275e-05, "loss": 1.5831, "step": 84230 }, { "epoch": 0.529467915856461, "grad_norm": 11.400970458984375, "learning_rate": 1.6475025774708096e-05, "loss": 1.7842, "step": 84240 }, { "epoch": 0.5295307681731581, "grad_norm": 7.819911479949951, "learning_rate": 1.6474606673763443e-05, "loss": 1.6955, "step": 84250 }, { "epoch": 0.5295936204898553, "grad_norm": 7.373455047607422, "learning_rate": 1.647418757281879e-05, "loss": 1.7008, "step": 84260 }, { "epoch": 0.5296564728065524, "grad_norm": 6.902364253997803, "learning_rate": 1.6473768471874137e-05, "loss": 1.7394, "step": 84270 }, { "epoch": 0.5297193251232495, "grad_norm": 7.253707408905029, "learning_rate": 1.6473349370929484e-05, "loss": 1.4435, "step": 84280 }, { "epoch": 0.5297821774399466, "grad_norm": 6.589545249938965, "learning_rate": 1.647293026998483e-05, "loss": 1.8562, "step": 84290 }, { "epoch": 0.5298450297566437, "grad_norm": 6.285915851593018, "learning_rate": 1.647251116904018e-05, "loss": 1.7756, "step": 84300 }, { "epoch": 0.5299078820733408, "grad_norm": 6.506764888763428, "learning_rate": 1.6472092068095522e-05, "loss": 1.6378, "step": 84310 }, { "epoch": 0.529970734390038, "grad_norm": 6.875381946563721, "learning_rate": 1.647167296715087e-05, "loss": 1.5785, "step": 84320 }, { "epoch": 0.5300335867067351, "grad_norm": 7.490484237670898, "learning_rate": 1.6471253866206216e-05, "loss": 1.7469, "step": 84330 }, { "epoch": 0.5300964390234322, "grad_norm": 5.694377899169922, "learning_rate": 1.6470834765261563e-05, "loss": 1.6922, "step": 84340 }, { "epoch": 0.5301592913401293, "grad_norm": 5.9290337562561035, "learning_rate": 1.6470415664316907e-05, "loss": 1.6742, "step": 84350 }, { "epoch": 0.5302221436568264, "grad_norm": 6.733861923217773, "learning_rate": 1.6469996563372254e-05, "loss": 1.6141, "step": 84360 }, { "epoch": 0.5302849959735234, "grad_norm": 7.116500377655029, "learning_rate": 1.64695774624276e-05, "loss": 1.8392, "step": 84370 }, { "epoch": 0.5303478482902205, "grad_norm": 6.626927852630615, "learning_rate": 1.646915836148295e-05, "loss": 1.6648, "step": 84380 }, { "epoch": 0.5304107006069176, "grad_norm": 7.265458106994629, "learning_rate": 1.6468739260538295e-05, "loss": 1.7129, "step": 84390 }, { "epoch": 0.5304735529236148, "grad_norm": 7.232100009918213, "learning_rate": 1.646832015959364e-05, "loss": 1.6931, "step": 84400 }, { "epoch": 0.5305364052403119, "grad_norm": 7.313258647918701, "learning_rate": 1.6467901058648986e-05, "loss": 1.5399, "step": 84410 }, { "epoch": 0.530599257557009, "grad_norm": 6.102558612823486, "learning_rate": 1.6467481957704333e-05, "loss": 1.5518, "step": 84420 }, { "epoch": 0.5306621098737061, "grad_norm": 7.185632228851318, "learning_rate": 1.646706285675968e-05, "loss": 1.7073, "step": 84430 }, { "epoch": 0.5307249621904032, "grad_norm": 7.501678943634033, "learning_rate": 1.6466643755815027e-05, "loss": 1.6756, "step": 84440 }, { "epoch": 0.5307878145071003, "grad_norm": 6.88824987411499, "learning_rate": 1.6466224654870374e-05, "loss": 1.788, "step": 84450 }, { "epoch": 0.5308506668237974, "grad_norm": 7.567595481872559, "learning_rate": 1.646580555392572e-05, "loss": 1.8341, "step": 84460 }, { "epoch": 0.5309135191404946, "grad_norm": 5.518832683563232, "learning_rate": 1.646538645298107e-05, "loss": 1.6301, "step": 84470 }, { "epoch": 0.5309763714571917, "grad_norm": 6.848931789398193, "learning_rate": 1.6464967352036412e-05, "loss": 1.8876, "step": 84480 }, { "epoch": 0.5310392237738888, "grad_norm": 7.0313401222229, "learning_rate": 1.646454825109176e-05, "loss": 1.5669, "step": 84490 }, { "epoch": 0.5311020760905859, "grad_norm": 6.8558478355407715, "learning_rate": 1.6464129150147106e-05, "loss": 1.65, "step": 84500 }, { "epoch": 0.531164928407283, "grad_norm": 6.209257125854492, "learning_rate": 1.6463710049202454e-05, "loss": 1.7208, "step": 84510 }, { "epoch": 0.5312277807239801, "grad_norm": 6.593898296356201, "learning_rate": 1.64632909482578e-05, "loss": 1.6445, "step": 84520 }, { "epoch": 0.5312906330406773, "grad_norm": 5.721263885498047, "learning_rate": 1.6462871847313144e-05, "loss": 1.7591, "step": 84530 }, { "epoch": 0.5313534853573744, "grad_norm": 8.044561386108398, "learning_rate": 1.646245274636849e-05, "loss": 1.482, "step": 84540 }, { "epoch": 0.5314163376740715, "grad_norm": 6.970468521118164, "learning_rate": 1.646203364542384e-05, "loss": 1.5991, "step": 84550 }, { "epoch": 0.5314791899907686, "grad_norm": 5.181437969207764, "learning_rate": 1.6461614544479185e-05, "loss": 1.4328, "step": 84560 }, { "epoch": 0.5315420423074657, "grad_norm": 7.308653831481934, "learning_rate": 1.646119544353453e-05, "loss": 1.6158, "step": 84570 }, { "epoch": 0.5316048946241628, "grad_norm": 6.897716045379639, "learning_rate": 1.6460776342589876e-05, "loss": 1.7081, "step": 84580 }, { "epoch": 0.5316677469408599, "grad_norm": 6.542665958404541, "learning_rate": 1.6460357241645223e-05, "loss": 1.6932, "step": 84590 }, { "epoch": 0.5317305992575571, "grad_norm": 4.889917850494385, "learning_rate": 1.645993814070057e-05, "loss": 1.589, "step": 84600 }, { "epoch": 0.5317934515742542, "grad_norm": 6.633420944213867, "learning_rate": 1.6459519039755917e-05, "loss": 1.792, "step": 84610 }, { "epoch": 0.5318563038909512, "grad_norm": 6.323985576629639, "learning_rate": 1.645909993881126e-05, "loss": 1.6676, "step": 84620 }, { "epoch": 0.5319191562076483, "grad_norm": 6.963869094848633, "learning_rate": 1.6458680837866608e-05, "loss": 1.6703, "step": 84630 }, { "epoch": 0.5319820085243454, "grad_norm": 7.031579494476318, "learning_rate": 1.6458261736921955e-05, "loss": 1.9484, "step": 84640 }, { "epoch": 0.5320448608410425, "grad_norm": 6.2956156730651855, "learning_rate": 1.6457842635977302e-05, "loss": 1.6665, "step": 84650 }, { "epoch": 0.5321077131577396, "grad_norm": 6.316678047180176, "learning_rate": 1.645742353503265e-05, "loss": 1.5173, "step": 84660 }, { "epoch": 0.5321705654744368, "grad_norm": 6.737529277801514, "learning_rate": 1.6457004434087996e-05, "loss": 1.5123, "step": 84670 }, { "epoch": 0.5322334177911339, "grad_norm": 7.0265069007873535, "learning_rate": 1.6456585333143344e-05, "loss": 1.759, "step": 84680 }, { "epoch": 0.532296270107831, "grad_norm": 6.733026504516602, "learning_rate": 1.645616623219869e-05, "loss": 1.9251, "step": 84690 }, { "epoch": 0.5323591224245281, "grad_norm": 7.538741588592529, "learning_rate": 1.6455747131254038e-05, "loss": 1.6288, "step": 84700 }, { "epoch": 0.5324219747412252, "grad_norm": 6.046632766723633, "learning_rate": 1.645532803030938e-05, "loss": 1.5321, "step": 84710 }, { "epoch": 0.5324848270579223, "grad_norm": 5.785271644592285, "learning_rate": 1.645490892936473e-05, "loss": 1.6415, "step": 84720 }, { "epoch": 0.5325476793746194, "grad_norm": 7.816107749938965, "learning_rate": 1.6454489828420076e-05, "loss": 1.7927, "step": 84730 }, { "epoch": 0.5326105316913166, "grad_norm": 7.172451972961426, "learning_rate": 1.6454070727475423e-05, "loss": 1.7184, "step": 84740 }, { "epoch": 0.5326733840080137, "grad_norm": 7.187384128570557, "learning_rate": 1.6453651626530766e-05, "loss": 1.6256, "step": 84750 }, { "epoch": 0.5327362363247108, "grad_norm": 5.67566442489624, "learning_rate": 1.6453232525586113e-05, "loss": 1.4756, "step": 84760 }, { "epoch": 0.5327990886414079, "grad_norm": 6.156361103057861, "learning_rate": 1.645281342464146e-05, "loss": 1.5702, "step": 84770 }, { "epoch": 0.532861940958105, "grad_norm": 6.500582218170166, "learning_rate": 1.6452394323696807e-05, "loss": 1.8808, "step": 84780 }, { "epoch": 0.5329247932748021, "grad_norm": 6.974032878875732, "learning_rate": 1.6451975222752155e-05, "loss": 1.5445, "step": 84790 }, { "epoch": 0.5329876455914992, "grad_norm": 6.717978000640869, "learning_rate": 1.6451556121807498e-05, "loss": 1.842, "step": 84800 }, { "epoch": 0.5330504979081964, "grad_norm": 6.938985824584961, "learning_rate": 1.6451137020862845e-05, "loss": 1.9241, "step": 84810 }, { "epoch": 0.5331133502248935, "grad_norm": 6.2072296142578125, "learning_rate": 1.6450717919918192e-05, "loss": 1.7266, "step": 84820 }, { "epoch": 0.5331762025415906, "grad_norm": 6.984010219573975, "learning_rate": 1.645029881897354e-05, "loss": 1.6843, "step": 84830 }, { "epoch": 0.5332390548582877, "grad_norm": 7.583876609802246, "learning_rate": 1.6449879718028887e-05, "loss": 1.6457, "step": 84840 }, { "epoch": 0.5333019071749848, "grad_norm": 6.7121992111206055, "learning_rate": 1.6449460617084234e-05, "loss": 1.4486, "step": 84850 }, { "epoch": 0.5333647594916819, "grad_norm": 6.792508602142334, "learning_rate": 1.6449041516139577e-05, "loss": 1.5144, "step": 84860 }, { "epoch": 0.533427611808379, "grad_norm": 7.822506904602051, "learning_rate": 1.6448622415194924e-05, "loss": 1.6888, "step": 84870 }, { "epoch": 0.5334904641250761, "grad_norm": 7.036062240600586, "learning_rate": 1.644820331425027e-05, "loss": 1.9722, "step": 84880 }, { "epoch": 0.5335533164417732, "grad_norm": 6.179880619049072, "learning_rate": 1.644778421330562e-05, "loss": 1.569, "step": 84890 }, { "epoch": 0.5336161687584703, "grad_norm": 7.492377281188965, "learning_rate": 1.6447365112360966e-05, "loss": 1.6265, "step": 84900 }, { "epoch": 0.5336790210751674, "grad_norm": 6.322450160980225, "learning_rate": 1.6446946011416313e-05, "loss": 1.4699, "step": 84910 }, { "epoch": 0.5337418733918645, "grad_norm": 6.580765247344971, "learning_rate": 1.644652691047166e-05, "loss": 1.6407, "step": 84920 }, { "epoch": 0.5338047257085616, "grad_norm": 6.971106052398682, "learning_rate": 1.6446107809527003e-05, "loss": 1.5293, "step": 84930 }, { "epoch": 0.5338675780252587, "grad_norm": 6.923962593078613, "learning_rate": 1.644568870858235e-05, "loss": 1.5995, "step": 84940 }, { "epoch": 0.5339304303419559, "grad_norm": 8.402005195617676, "learning_rate": 1.6445269607637698e-05, "loss": 1.6225, "step": 84950 }, { "epoch": 0.533993282658653, "grad_norm": 6.86106538772583, "learning_rate": 1.6444850506693045e-05, "loss": 1.6665, "step": 84960 }, { "epoch": 0.5340561349753501, "grad_norm": 5.663937091827393, "learning_rate": 1.6444431405748388e-05, "loss": 1.5055, "step": 84970 }, { "epoch": 0.5341189872920472, "grad_norm": 6.386752605438232, "learning_rate": 1.6444012304803735e-05, "loss": 1.8918, "step": 84980 }, { "epoch": 0.5341818396087443, "grad_norm": 5.876289367675781, "learning_rate": 1.6443593203859082e-05, "loss": 1.6871, "step": 84990 }, { "epoch": 0.5342446919254414, "grad_norm": 6.044205188751221, "learning_rate": 1.644317410291443e-05, "loss": 1.6087, "step": 85000 }, { "epoch": 0.5343075442421386, "grad_norm": 7.124244689941406, "learning_rate": 1.6442755001969777e-05, "loss": 1.8886, "step": 85010 }, { "epoch": 0.5343703965588357, "grad_norm": 6.722472190856934, "learning_rate": 1.644233590102512e-05, "loss": 1.6599, "step": 85020 }, { "epoch": 0.5344332488755328, "grad_norm": 6.756608486175537, "learning_rate": 1.6441916800080467e-05, "loss": 1.7099, "step": 85030 }, { "epoch": 0.5344961011922299, "grad_norm": 6.803463459014893, "learning_rate": 1.6441497699135814e-05, "loss": 1.5995, "step": 85040 }, { "epoch": 0.534558953508927, "grad_norm": 7.612119197845459, "learning_rate": 1.644107859819116e-05, "loss": 1.788, "step": 85050 }, { "epoch": 0.5346218058256241, "grad_norm": 6.436509132385254, "learning_rate": 1.644065949724651e-05, "loss": 1.4981, "step": 85060 }, { "epoch": 0.5346846581423212, "grad_norm": 6.51688814163208, "learning_rate": 1.6440240396301856e-05, "loss": 1.7471, "step": 85070 }, { "epoch": 0.5347475104590184, "grad_norm": 6.626704216003418, "learning_rate": 1.6439821295357203e-05, "loss": 1.765, "step": 85080 }, { "epoch": 0.5348103627757155, "grad_norm": 6.392571449279785, "learning_rate": 1.643940219441255e-05, "loss": 1.6547, "step": 85090 }, { "epoch": 0.5348732150924126, "grad_norm": 6.908417701721191, "learning_rate": 1.6438983093467893e-05, "loss": 1.9132, "step": 85100 }, { "epoch": 0.5349360674091097, "grad_norm": 6.740513801574707, "learning_rate": 1.643856399252324e-05, "loss": 1.8449, "step": 85110 }, { "epoch": 0.5349989197258068, "grad_norm": 6.186057090759277, "learning_rate": 1.6438144891578588e-05, "loss": 1.8939, "step": 85120 }, { "epoch": 0.5350617720425038, "grad_norm": 6.962738513946533, "learning_rate": 1.6437725790633935e-05, "loss": 1.8209, "step": 85130 }, { "epoch": 0.5351246243592009, "grad_norm": 6.540321350097656, "learning_rate": 1.6437306689689282e-05, "loss": 1.6953, "step": 85140 }, { "epoch": 0.535187476675898, "grad_norm": 7.460124492645264, "learning_rate": 1.6436887588744625e-05, "loss": 1.7276, "step": 85150 }, { "epoch": 0.5352503289925952, "grad_norm": 7.499505519866943, "learning_rate": 1.6436468487799972e-05, "loss": 1.8471, "step": 85160 }, { "epoch": 0.5353131813092923, "grad_norm": 6.599464416503906, "learning_rate": 1.643604938685532e-05, "loss": 1.6341, "step": 85170 }, { "epoch": 0.5353760336259894, "grad_norm": 6.362654685974121, "learning_rate": 1.6435630285910667e-05, "loss": 1.6101, "step": 85180 }, { "epoch": 0.5354388859426865, "grad_norm": 6.673609256744385, "learning_rate": 1.643521118496601e-05, "loss": 1.7353, "step": 85190 }, { "epoch": 0.5355017382593836, "grad_norm": 6.538881778717041, "learning_rate": 1.6434792084021357e-05, "loss": 1.6995, "step": 85200 }, { "epoch": 0.5355645905760807, "grad_norm": 8.122904777526855, "learning_rate": 1.6434372983076704e-05, "loss": 1.6756, "step": 85210 }, { "epoch": 0.5356274428927779, "grad_norm": 7.356871604919434, "learning_rate": 1.643395388213205e-05, "loss": 1.6809, "step": 85220 }, { "epoch": 0.535690295209475, "grad_norm": 5.879265308380127, "learning_rate": 1.64335347811874e-05, "loss": 1.7661, "step": 85230 }, { "epoch": 0.5357531475261721, "grad_norm": 5.349161148071289, "learning_rate": 1.6433115680242742e-05, "loss": 1.654, "step": 85240 }, { "epoch": 0.5358159998428692, "grad_norm": 6.181868553161621, "learning_rate": 1.643269657929809e-05, "loss": 1.8045, "step": 85250 }, { "epoch": 0.5358788521595663, "grad_norm": 6.501641273498535, "learning_rate": 1.6432277478353436e-05, "loss": 1.7423, "step": 85260 }, { "epoch": 0.5359417044762634, "grad_norm": 6.203233242034912, "learning_rate": 1.6431900287503248e-05, "loss": 1.6475, "step": 85270 }, { "epoch": 0.5360045567929606, "grad_norm": 7.076571464538574, "learning_rate": 1.6431481186558595e-05, "loss": 1.4486, "step": 85280 }, { "epoch": 0.5360674091096577, "grad_norm": 6.285316467285156, "learning_rate": 1.6431062085613942e-05, "loss": 1.5754, "step": 85290 }, { "epoch": 0.5361302614263548, "grad_norm": 6.930043697357178, "learning_rate": 1.643064298466929e-05, "loss": 1.5767, "step": 85300 }, { "epoch": 0.5361931137430519, "grad_norm": 6.204666614532471, "learning_rate": 1.6430223883724633e-05, "loss": 1.6366, "step": 85310 }, { "epoch": 0.536255966059749, "grad_norm": 7.860479831695557, "learning_rate": 1.642980478277998e-05, "loss": 1.8423, "step": 85320 }, { "epoch": 0.5363188183764461, "grad_norm": 7.237555503845215, "learning_rate": 1.6429385681835327e-05, "loss": 1.7368, "step": 85330 }, { "epoch": 0.5363816706931432, "grad_norm": 6.343367576599121, "learning_rate": 1.6428966580890674e-05, "loss": 1.489, "step": 85340 }, { "epoch": 0.5364445230098404, "grad_norm": 6.436690330505371, "learning_rate": 1.642854747994602e-05, "loss": 1.8278, "step": 85350 }, { "epoch": 0.5365073753265375, "grad_norm": 6.2652153968811035, "learning_rate": 1.6428128379001368e-05, "loss": 1.4449, "step": 85360 }, { "epoch": 0.5365702276432346, "grad_norm": 6.447495937347412, "learning_rate": 1.6427709278056715e-05, "loss": 1.6627, "step": 85370 }, { "epoch": 0.5366330799599317, "grad_norm": 5.91556978225708, "learning_rate": 1.6427290177112062e-05, "loss": 1.487, "step": 85380 }, { "epoch": 0.5366959322766287, "grad_norm": 6.8496222496032715, "learning_rate": 1.642687107616741e-05, "loss": 1.5192, "step": 85390 }, { "epoch": 0.5367587845933258, "grad_norm": 6.301149845123291, "learning_rate": 1.6426451975222753e-05, "loss": 1.6423, "step": 85400 }, { "epoch": 0.5368216369100229, "grad_norm": 8.504805564880371, "learning_rate": 1.64260328742781e-05, "loss": 1.818, "step": 85410 }, { "epoch": 0.53688448922672, "grad_norm": 6.374795436859131, "learning_rate": 1.6425613773333447e-05, "loss": 1.9748, "step": 85420 }, { "epoch": 0.5369473415434172, "grad_norm": 6.491119861602783, "learning_rate": 1.6425194672388794e-05, "loss": 1.9015, "step": 85430 }, { "epoch": 0.5370101938601143, "grad_norm": 7.207005023956299, "learning_rate": 1.642477557144414e-05, "loss": 1.7151, "step": 85440 }, { "epoch": 0.5370730461768114, "grad_norm": 7.193447113037109, "learning_rate": 1.6424356470499485e-05, "loss": 1.8635, "step": 85450 }, { "epoch": 0.5371358984935085, "grad_norm": 7.126465320587158, "learning_rate": 1.6423937369554832e-05, "loss": 1.8622, "step": 85460 }, { "epoch": 0.5371987508102056, "grad_norm": 6.056211948394775, "learning_rate": 1.642351826861018e-05, "loss": 1.4906, "step": 85470 }, { "epoch": 0.5372616031269027, "grad_norm": 6.354976654052734, "learning_rate": 1.6423099167665526e-05, "loss": 1.7693, "step": 85480 }, { "epoch": 0.5373244554435999, "grad_norm": 5.696046352386475, "learning_rate": 1.642268006672087e-05, "loss": 1.7025, "step": 85490 }, { "epoch": 0.537387307760297, "grad_norm": 5.817015171051025, "learning_rate": 1.6422260965776217e-05, "loss": 1.7058, "step": 85500 }, { "epoch": 0.5374501600769941, "grad_norm": 7.481677532196045, "learning_rate": 1.6421841864831564e-05, "loss": 1.6445, "step": 85510 }, { "epoch": 0.5375130123936912, "grad_norm": 6.951291084289551, "learning_rate": 1.642142276388691e-05, "loss": 1.8054, "step": 85520 }, { "epoch": 0.5375758647103883, "grad_norm": 6.017706394195557, "learning_rate": 1.6421003662942258e-05, "loss": 1.4114, "step": 85530 }, { "epoch": 0.5376387170270854, "grad_norm": 8.327905654907227, "learning_rate": 1.6420584561997605e-05, "loss": 1.6243, "step": 85540 }, { "epoch": 0.5377015693437825, "grad_norm": 6.597580909729004, "learning_rate": 1.642016546105295e-05, "loss": 1.6225, "step": 85550 }, { "epoch": 0.5377644216604797, "grad_norm": 8.693310737609863, "learning_rate": 1.6419746360108296e-05, "loss": 1.8426, "step": 85560 }, { "epoch": 0.5378272739771768, "grad_norm": 7.165815353393555, "learning_rate": 1.6419327259163643e-05, "loss": 1.5574, "step": 85570 }, { "epoch": 0.5378901262938739, "grad_norm": 6.516468048095703, "learning_rate": 1.641890815821899e-05, "loss": 1.7302, "step": 85580 }, { "epoch": 0.537952978610571, "grad_norm": 5.70261812210083, "learning_rate": 1.6418489057274337e-05, "loss": 1.7721, "step": 85590 }, { "epoch": 0.5380158309272681, "grad_norm": 7.128627300262451, "learning_rate": 1.6418069956329684e-05, "loss": 1.6988, "step": 85600 }, { "epoch": 0.5380786832439652, "grad_norm": 5.911867618560791, "learning_rate": 1.641765085538503e-05, "loss": 1.5781, "step": 85610 }, { "epoch": 0.5381415355606624, "grad_norm": 5.839148044586182, "learning_rate": 1.6417231754440375e-05, "loss": 1.3336, "step": 85620 }, { "epoch": 0.5382043878773595, "grad_norm": 6.416183948516846, "learning_rate": 1.6416812653495722e-05, "loss": 1.6185, "step": 85630 }, { "epoch": 0.5382672401940565, "grad_norm": 6.362691879272461, "learning_rate": 1.641639355255107e-05, "loss": 2.0266, "step": 85640 }, { "epoch": 0.5383300925107536, "grad_norm": 6.19903039932251, "learning_rate": 1.6415974451606416e-05, "loss": 1.5156, "step": 85650 }, { "epoch": 0.5383929448274507, "grad_norm": 6.622096538543701, "learning_rate": 1.6415555350661763e-05, "loss": 1.6729, "step": 85660 }, { "epoch": 0.5384557971441478, "grad_norm": 7.795952320098877, "learning_rate": 1.6415136249717107e-05, "loss": 1.5863, "step": 85670 }, { "epoch": 0.5385186494608449, "grad_norm": 6.2659831047058105, "learning_rate": 1.6414717148772454e-05, "loss": 1.8809, "step": 85680 }, { "epoch": 0.538581501777542, "grad_norm": 7.086266994476318, "learning_rate": 1.64142980478278e-05, "loss": 1.7379, "step": 85690 }, { "epoch": 0.5386443540942392, "grad_norm": 6.989285469055176, "learning_rate": 1.6413878946883148e-05, "loss": 2.0259, "step": 85700 }, { "epoch": 0.5387072064109363, "grad_norm": 6.347990036010742, "learning_rate": 1.6413459845938492e-05, "loss": 1.8471, "step": 85710 }, { "epoch": 0.5387700587276334, "grad_norm": 6.735945701599121, "learning_rate": 1.641304074499384e-05, "loss": 1.7722, "step": 85720 }, { "epoch": 0.5388329110443305, "grad_norm": 6.259716987609863, "learning_rate": 1.6412621644049186e-05, "loss": 1.6662, "step": 85730 }, { "epoch": 0.5388957633610276, "grad_norm": 7.281030178070068, "learning_rate": 1.6412202543104533e-05, "loss": 1.7436, "step": 85740 }, { "epoch": 0.5389586156777247, "grad_norm": 6.42799711227417, "learning_rate": 1.641178344215988e-05, "loss": 1.6246, "step": 85750 }, { "epoch": 0.5390214679944219, "grad_norm": 7.063828945159912, "learning_rate": 1.6411364341215227e-05, "loss": 1.642, "step": 85760 }, { "epoch": 0.539084320311119, "grad_norm": 6.377913475036621, "learning_rate": 1.6410945240270574e-05, "loss": 1.6268, "step": 85770 }, { "epoch": 0.5391471726278161, "grad_norm": 6.6367878913879395, "learning_rate": 1.641052613932592e-05, "loss": 1.5504, "step": 85780 }, { "epoch": 0.5392100249445132, "grad_norm": 6.8672709465026855, "learning_rate": 1.6410107038381268e-05, "loss": 1.7591, "step": 85790 }, { "epoch": 0.5392728772612103, "grad_norm": 7.131933212280273, "learning_rate": 1.6409687937436612e-05, "loss": 1.7588, "step": 85800 }, { "epoch": 0.5393357295779074, "grad_norm": 7.047543525695801, "learning_rate": 1.640926883649196e-05, "loss": 1.8538, "step": 85810 }, { "epoch": 0.5393985818946045, "grad_norm": 6.190505504608154, "learning_rate": 1.6408849735547306e-05, "loss": 1.7086, "step": 85820 }, { "epoch": 0.5394614342113017, "grad_norm": 5.566699028015137, "learning_rate": 1.6408430634602653e-05, "loss": 1.5023, "step": 85830 }, { "epoch": 0.5395242865279988, "grad_norm": 8.045251846313477, "learning_rate": 1.6408011533658e-05, "loss": 1.599, "step": 85840 }, { "epoch": 0.5395871388446959, "grad_norm": 8.263165473937988, "learning_rate": 1.6407592432713344e-05, "loss": 1.7251, "step": 85850 }, { "epoch": 0.539649991161393, "grad_norm": 6.604892253875732, "learning_rate": 1.640717333176869e-05, "loss": 1.6502, "step": 85860 }, { "epoch": 0.5397128434780901, "grad_norm": 7.067389011383057, "learning_rate": 1.6406754230824038e-05, "loss": 1.6586, "step": 85870 }, { "epoch": 0.5397756957947872, "grad_norm": 6.605795383453369, "learning_rate": 1.6406335129879385e-05, "loss": 1.6772, "step": 85880 }, { "epoch": 0.5398385481114844, "grad_norm": 6.340599060058594, "learning_rate": 1.640591602893473e-05, "loss": 1.6492, "step": 85890 }, { "epoch": 0.5399014004281814, "grad_norm": 7.894109725952148, "learning_rate": 1.6405496927990076e-05, "loss": 1.6686, "step": 85900 }, { "epoch": 0.5399642527448785, "grad_norm": 6.646517753601074, "learning_rate": 1.6405077827045423e-05, "loss": 1.8185, "step": 85910 }, { "epoch": 0.5400271050615756, "grad_norm": 5.803297519683838, "learning_rate": 1.640465872610077e-05, "loss": 1.4937, "step": 85920 }, { "epoch": 0.5400899573782727, "grad_norm": 5.791867733001709, "learning_rate": 1.6404239625156114e-05, "loss": 1.8236, "step": 85930 }, { "epoch": 0.5401528096949698, "grad_norm": 6.289924144744873, "learning_rate": 1.640382052421146e-05, "loss": 1.7929, "step": 85940 }, { "epoch": 0.5402156620116669, "grad_norm": 6.742290019989014, "learning_rate": 1.6403401423266808e-05, "loss": 1.7026, "step": 85950 }, { "epoch": 0.540278514328364, "grad_norm": 6.8754987716674805, "learning_rate": 1.6402982322322155e-05, "loss": 1.6969, "step": 85960 }, { "epoch": 0.5403413666450612, "grad_norm": 5.435462474822998, "learning_rate": 1.6402563221377502e-05, "loss": 1.5332, "step": 85970 }, { "epoch": 0.5404042189617583, "grad_norm": 7.776442527770996, "learning_rate": 1.640214412043285e-05, "loss": 1.8064, "step": 85980 }, { "epoch": 0.5404670712784554, "grad_norm": 7.129558086395264, "learning_rate": 1.6401725019488196e-05, "loss": 1.8947, "step": 85990 }, { "epoch": 0.5405299235951525, "grad_norm": 6.044556140899658, "learning_rate": 1.6401305918543543e-05, "loss": 1.656, "step": 86000 }, { "epoch": 0.5405927759118496, "grad_norm": 7.796718120574951, "learning_rate": 1.640088681759889e-05, "loss": 1.9945, "step": 86010 }, { "epoch": 0.5406556282285467, "grad_norm": 6.118916034698486, "learning_rate": 1.6400467716654234e-05, "loss": 1.6391, "step": 86020 }, { "epoch": 0.5407184805452439, "grad_norm": 5.638603687286377, "learning_rate": 1.640004861570958e-05, "loss": 1.5943, "step": 86030 }, { "epoch": 0.540781332861941, "grad_norm": 6.42252254486084, "learning_rate": 1.6399629514764928e-05, "loss": 1.5481, "step": 86040 }, { "epoch": 0.5408441851786381, "grad_norm": 7.297764778137207, "learning_rate": 1.6399210413820275e-05, "loss": 1.7197, "step": 86050 }, { "epoch": 0.5409070374953352, "grad_norm": 6.368427753448486, "learning_rate": 1.6398791312875622e-05, "loss": 1.6432, "step": 86060 }, { "epoch": 0.5409698898120323, "grad_norm": 6.701649188995361, "learning_rate": 1.6398372211930966e-05, "loss": 1.7287, "step": 86070 }, { "epoch": 0.5410327421287294, "grad_norm": 6.809452056884766, "learning_rate": 1.6397953110986313e-05, "loss": 1.6901, "step": 86080 }, { "epoch": 0.5410955944454265, "grad_norm": 7.57839298248291, "learning_rate": 1.639753401004166e-05, "loss": 1.6649, "step": 86090 }, { "epoch": 0.5411584467621237, "grad_norm": 11.420140266418457, "learning_rate": 1.6397114909097007e-05, "loss": 1.6038, "step": 86100 }, { "epoch": 0.5412212990788208, "grad_norm": 6.622860431671143, "learning_rate": 1.639669580815235e-05, "loss": 1.8323, "step": 86110 }, { "epoch": 0.5412841513955179, "grad_norm": 5.812434196472168, "learning_rate": 1.6396276707207698e-05, "loss": 1.7367, "step": 86120 }, { "epoch": 0.541347003712215, "grad_norm": 7.1364240646362305, "learning_rate": 1.6395857606263045e-05, "loss": 1.7987, "step": 86130 }, { "epoch": 0.5414098560289121, "grad_norm": 7.4703569412231445, "learning_rate": 1.6395438505318392e-05, "loss": 1.8512, "step": 86140 }, { "epoch": 0.5414727083456091, "grad_norm": 6.3275651931762695, "learning_rate": 1.639501940437374e-05, "loss": 1.7602, "step": 86150 }, { "epoch": 0.5415355606623062, "grad_norm": 6.205627918243408, "learning_rate": 1.6394600303429086e-05, "loss": 1.6417, "step": 86160 }, { "epoch": 0.5415984129790034, "grad_norm": 6.279049396514893, "learning_rate": 1.639418120248443e-05, "loss": 1.6631, "step": 86170 }, { "epoch": 0.5416612652957005, "grad_norm": 7.833987236022949, "learning_rate": 1.6393762101539777e-05, "loss": 1.5057, "step": 86180 }, { "epoch": 0.5417241176123976, "grad_norm": 6.630548000335693, "learning_rate": 1.6393343000595124e-05, "loss": 1.6445, "step": 86190 }, { "epoch": 0.5417869699290947, "grad_norm": 6.6342453956604, "learning_rate": 1.639292389965047e-05, "loss": 1.8989, "step": 86200 }, { "epoch": 0.5418498222457918, "grad_norm": 6.155070781707764, "learning_rate": 1.6392504798705818e-05, "loss": 1.6871, "step": 86210 }, { "epoch": 0.5419126745624889, "grad_norm": 6.080020904541016, "learning_rate": 1.6392085697761165e-05, "loss": 1.666, "step": 86220 }, { "epoch": 0.541975526879186, "grad_norm": 7.034708023071289, "learning_rate": 1.6391666596816512e-05, "loss": 1.7103, "step": 86230 }, { "epoch": 0.5420383791958832, "grad_norm": 6.662476539611816, "learning_rate": 1.6391247495871856e-05, "loss": 1.8543, "step": 86240 }, { "epoch": 0.5421012315125803, "grad_norm": 6.4750776290893555, "learning_rate": 1.6390828394927203e-05, "loss": 1.8943, "step": 86250 }, { "epoch": 0.5421640838292774, "grad_norm": 6.518702983856201, "learning_rate": 1.639040929398255e-05, "loss": 1.8582, "step": 86260 }, { "epoch": 0.5422269361459745, "grad_norm": 7.378445148468018, "learning_rate": 1.6389990193037897e-05, "loss": 1.5996, "step": 86270 }, { "epoch": 0.5422897884626716, "grad_norm": 5.998882293701172, "learning_rate": 1.6389571092093244e-05, "loss": 1.7373, "step": 86280 }, { "epoch": 0.5423526407793687, "grad_norm": 6.869247913360596, "learning_rate": 1.6389151991148588e-05, "loss": 1.7492, "step": 86290 }, { "epoch": 0.5424154930960658, "grad_norm": 9.03229808807373, "learning_rate": 1.6388732890203935e-05, "loss": 1.8033, "step": 86300 }, { "epoch": 0.542478345412763, "grad_norm": 7.205885410308838, "learning_rate": 1.6388313789259282e-05, "loss": 1.7698, "step": 86310 }, { "epoch": 0.5425411977294601, "grad_norm": 6.515379905700684, "learning_rate": 1.638789468831463e-05, "loss": 1.5568, "step": 86320 }, { "epoch": 0.5426040500461572, "grad_norm": 6.291879177093506, "learning_rate": 1.6387475587369973e-05, "loss": 1.7063, "step": 86330 }, { "epoch": 0.5426669023628543, "grad_norm": 6.5558271408081055, "learning_rate": 1.638705648642532e-05, "loss": 1.554, "step": 86340 }, { "epoch": 0.5427297546795514, "grad_norm": 6.825722694396973, "learning_rate": 1.6386637385480667e-05, "loss": 1.8762, "step": 86350 }, { "epoch": 0.5427926069962485, "grad_norm": 6.950225830078125, "learning_rate": 1.6386218284536014e-05, "loss": 1.4571, "step": 86360 }, { "epoch": 0.5428554593129457, "grad_norm": 6.12157678604126, "learning_rate": 1.638579918359136e-05, "loss": 1.6073, "step": 86370 }, { "epoch": 0.5429183116296428, "grad_norm": 6.891550540924072, "learning_rate": 1.6385380082646708e-05, "loss": 1.5565, "step": 86380 }, { "epoch": 0.5429811639463399, "grad_norm": 6.586368560791016, "learning_rate": 1.6384960981702055e-05, "loss": 1.389, "step": 86390 }, { "epoch": 0.543044016263037, "grad_norm": 6.79563570022583, "learning_rate": 1.6384541880757402e-05, "loss": 1.6659, "step": 86400 }, { "epoch": 0.543106868579734, "grad_norm": 7.297008991241455, "learning_rate": 1.638412277981275e-05, "loss": 1.5353, "step": 86410 }, { "epoch": 0.5431697208964311, "grad_norm": 6.567941665649414, "learning_rate": 1.6383703678868093e-05, "loss": 1.6088, "step": 86420 }, { "epoch": 0.5432325732131282, "grad_norm": 6.119392395019531, "learning_rate": 1.638328457792344e-05, "loss": 1.5637, "step": 86430 }, { "epoch": 0.5432954255298253, "grad_norm": 6.379626750946045, "learning_rate": 1.6382865476978787e-05, "loss": 1.7865, "step": 86440 }, { "epoch": 0.5433582778465225, "grad_norm": 5.699408531188965, "learning_rate": 1.6382446376034134e-05, "loss": 1.7122, "step": 86450 }, { "epoch": 0.5434211301632196, "grad_norm": 6.399451732635498, "learning_rate": 1.638202727508948e-05, "loss": 1.7304, "step": 86460 }, { "epoch": 0.5434839824799167, "grad_norm": 6.494431495666504, "learning_rate": 1.6381608174144825e-05, "loss": 1.4777, "step": 86470 }, { "epoch": 0.5435468347966138, "grad_norm": 6.212937355041504, "learning_rate": 1.6381189073200172e-05, "loss": 1.5896, "step": 86480 }, { "epoch": 0.5436096871133109, "grad_norm": 6.120029449462891, "learning_rate": 1.638076997225552e-05, "loss": 1.843, "step": 86490 }, { "epoch": 0.543672539430008, "grad_norm": 6.218716144561768, "learning_rate": 1.6380350871310866e-05, "loss": 1.6116, "step": 86500 }, { "epoch": 0.5437353917467052, "grad_norm": 5.772097587585449, "learning_rate": 1.637993177036621e-05, "loss": 1.6283, "step": 86510 }, { "epoch": 0.5437982440634023, "grad_norm": 7.5236358642578125, "learning_rate": 1.6379512669421557e-05, "loss": 1.6003, "step": 86520 }, { "epoch": 0.5438610963800994, "grad_norm": 7.945309638977051, "learning_rate": 1.6379093568476904e-05, "loss": 1.701, "step": 86530 }, { "epoch": 0.5439239486967965, "grad_norm": 6.487154006958008, "learning_rate": 1.637867446753225e-05, "loss": 1.5663, "step": 86540 }, { "epoch": 0.5439868010134936, "grad_norm": 7.235054969787598, "learning_rate": 1.6378255366587598e-05, "loss": 1.6827, "step": 86550 }, { "epoch": 0.5440496533301907, "grad_norm": 6.165349960327148, "learning_rate": 1.6377836265642942e-05, "loss": 1.4464, "step": 86560 }, { "epoch": 0.5441125056468878, "grad_norm": 7.019979953765869, "learning_rate": 1.637741716469829e-05, "loss": 1.5645, "step": 86570 }, { "epoch": 0.544175357963585, "grad_norm": 7.073566436767578, "learning_rate": 1.6376998063753636e-05, "loss": 1.6864, "step": 86580 }, { "epoch": 0.5442382102802821, "grad_norm": 7.4121527671813965, "learning_rate": 1.6376578962808983e-05, "loss": 1.8141, "step": 86590 }, { "epoch": 0.5443010625969792, "grad_norm": 6.5008463859558105, "learning_rate": 1.637615986186433e-05, "loss": 1.6266, "step": 86600 }, { "epoch": 0.5443639149136763, "grad_norm": 7.9003753662109375, "learning_rate": 1.6375740760919677e-05, "loss": 1.6775, "step": 86610 }, { "epoch": 0.5444267672303734, "grad_norm": 7.4179863929748535, "learning_rate": 1.6375321659975024e-05, "loss": 1.6263, "step": 86620 }, { "epoch": 0.5444896195470705, "grad_norm": 6.775514602661133, "learning_rate": 1.637490255903037e-05, "loss": 1.5732, "step": 86630 }, { "epoch": 0.5445524718637677, "grad_norm": 6.417285919189453, "learning_rate": 1.6374483458085715e-05, "loss": 1.6495, "step": 86640 }, { "epoch": 0.5446153241804648, "grad_norm": 6.63004207611084, "learning_rate": 1.6374064357141062e-05, "loss": 1.882, "step": 86650 }, { "epoch": 0.5446781764971618, "grad_norm": 6.118006229400635, "learning_rate": 1.637364525619641e-05, "loss": 1.6263, "step": 86660 }, { "epoch": 0.5447410288138589, "grad_norm": 6.563739776611328, "learning_rate": 1.6373226155251756e-05, "loss": 1.6761, "step": 86670 }, { "epoch": 0.544803881130556, "grad_norm": 6.6065545082092285, "learning_rate": 1.6372807054307103e-05, "loss": 1.4558, "step": 86680 }, { "epoch": 0.5448667334472531, "grad_norm": 6.14282751083374, "learning_rate": 1.6372387953362447e-05, "loss": 1.6269, "step": 86690 }, { "epoch": 0.5449295857639502, "grad_norm": 6.8921098709106445, "learning_rate": 1.6371968852417794e-05, "loss": 1.423, "step": 86700 }, { "epoch": 0.5449924380806473, "grad_norm": 7.513279438018799, "learning_rate": 1.637154975147314e-05, "loss": 1.629, "step": 86710 }, { "epoch": 0.5450552903973445, "grad_norm": 5.247377872467041, "learning_rate": 1.6371130650528488e-05, "loss": 1.6349, "step": 86720 }, { "epoch": 0.5451181427140416, "grad_norm": 7.0045318603515625, "learning_rate": 1.6370711549583832e-05, "loss": 1.5124, "step": 86730 }, { "epoch": 0.5451809950307387, "grad_norm": 7.852500915527344, "learning_rate": 1.637029244863918e-05, "loss": 1.6307, "step": 86740 }, { "epoch": 0.5452438473474358, "grad_norm": 7.170313358306885, "learning_rate": 1.6369873347694526e-05, "loss": 1.5946, "step": 86750 }, { "epoch": 0.5453066996641329, "grad_norm": 7.545778274536133, "learning_rate": 1.6369454246749873e-05, "loss": 1.6845, "step": 86760 }, { "epoch": 0.54536955198083, "grad_norm": 5.048802375793457, "learning_rate": 1.636903514580522e-05, "loss": 1.7877, "step": 86770 }, { "epoch": 0.5454324042975272, "grad_norm": 6.210660457611084, "learning_rate": 1.6368616044860567e-05, "loss": 1.6917, "step": 86780 }, { "epoch": 0.5454952566142243, "grad_norm": 6.029952526092529, "learning_rate": 1.6368196943915914e-05, "loss": 1.7503, "step": 86790 }, { "epoch": 0.5455581089309214, "grad_norm": 7.362029552459717, "learning_rate": 1.6367777842971258e-05, "loss": 1.4296, "step": 86800 }, { "epoch": 0.5456209612476185, "grad_norm": 7.046006202697754, "learning_rate": 1.6367358742026605e-05, "loss": 1.6642, "step": 86810 }, { "epoch": 0.5456838135643156, "grad_norm": 7.1073503494262695, "learning_rate": 1.6366939641081952e-05, "loss": 1.6695, "step": 86820 }, { "epoch": 0.5457466658810127, "grad_norm": 6.6842522621154785, "learning_rate": 1.63665205401373e-05, "loss": 1.5171, "step": 86830 }, { "epoch": 0.5458095181977098, "grad_norm": 6.862412452697754, "learning_rate": 1.6366101439192646e-05, "loss": 1.6675, "step": 86840 }, { "epoch": 0.545872370514407, "grad_norm": 7.693253040313721, "learning_rate": 1.6365682338247993e-05, "loss": 1.621, "step": 86850 }, { "epoch": 0.5459352228311041, "grad_norm": 5.722990036010742, "learning_rate": 1.636526323730334e-05, "loss": 1.6731, "step": 86860 }, { "epoch": 0.5459980751478012, "grad_norm": 6.423402786254883, "learning_rate": 1.6364844136358684e-05, "loss": 1.7002, "step": 86870 }, { "epoch": 0.5460609274644983, "grad_norm": 7.07973051071167, "learning_rate": 1.636442503541403e-05, "loss": 1.7934, "step": 86880 }, { "epoch": 0.5461237797811954, "grad_norm": 6.4761643409729, "learning_rate": 1.6364005934469378e-05, "loss": 1.8783, "step": 86890 }, { "epoch": 0.5461866320978925, "grad_norm": 7.523335933685303, "learning_rate": 1.6363586833524725e-05, "loss": 1.7059, "step": 86900 }, { "epoch": 0.5462494844145896, "grad_norm": 9.280854225158691, "learning_rate": 1.636316773258007e-05, "loss": 1.6908, "step": 86910 }, { "epoch": 0.5463123367312867, "grad_norm": 6.539122104644775, "learning_rate": 1.6362748631635416e-05, "loss": 1.8692, "step": 86920 }, { "epoch": 0.5463751890479838, "grad_norm": 7.5889434814453125, "learning_rate": 1.6362329530690763e-05, "loss": 1.7153, "step": 86930 }, { "epoch": 0.5464380413646809, "grad_norm": 6.818871974945068, "learning_rate": 1.636191042974611e-05, "loss": 1.9356, "step": 86940 }, { "epoch": 0.546500893681378, "grad_norm": 6.093920707702637, "learning_rate": 1.6361491328801454e-05, "loss": 1.4935, "step": 86950 }, { "epoch": 0.5465637459980751, "grad_norm": 5.412710189819336, "learning_rate": 1.63610722278568e-05, "loss": 1.6619, "step": 86960 }, { "epoch": 0.5466265983147722, "grad_norm": 7.041706085205078, "learning_rate": 1.6360653126912148e-05, "loss": 1.7782, "step": 86970 }, { "epoch": 0.5466894506314693, "grad_norm": 6.9528961181640625, "learning_rate": 1.6360234025967495e-05, "loss": 1.6705, "step": 86980 }, { "epoch": 0.5467523029481665, "grad_norm": 6.929596424102783, "learning_rate": 1.6359814925022842e-05, "loss": 1.7646, "step": 86990 }, { "epoch": 0.5468151552648636, "grad_norm": 6.04166316986084, "learning_rate": 1.635939582407819e-05, "loss": 1.7182, "step": 87000 }, { "epoch": 0.5468780075815607, "grad_norm": 7.2244486808776855, "learning_rate": 1.6358976723133536e-05, "loss": 1.5931, "step": 87010 }, { "epoch": 0.5469408598982578, "grad_norm": 6.446841716766357, "learning_rate": 1.6358557622188883e-05, "loss": 1.8676, "step": 87020 }, { "epoch": 0.5470037122149549, "grad_norm": 7.067605018615723, "learning_rate": 1.635813852124423e-05, "loss": 1.7028, "step": 87030 }, { "epoch": 0.547066564531652, "grad_norm": 6.6622724533081055, "learning_rate": 1.6357719420299574e-05, "loss": 1.5149, "step": 87040 }, { "epoch": 0.5471294168483491, "grad_norm": 7.171753883361816, "learning_rate": 1.635730031935492e-05, "loss": 1.6603, "step": 87050 }, { "epoch": 0.5471922691650463, "grad_norm": 6.751748561859131, "learning_rate": 1.635688121841027e-05, "loss": 1.6392, "step": 87060 }, { "epoch": 0.5472551214817434, "grad_norm": 7.492148399353027, "learning_rate": 1.6356462117465615e-05, "loss": 1.815, "step": 87070 }, { "epoch": 0.5473179737984405, "grad_norm": 7.068755626678467, "learning_rate": 1.6356043016520962e-05, "loss": 1.5841, "step": 87080 }, { "epoch": 0.5473808261151376, "grad_norm": 6.7877278327941895, "learning_rate": 1.6355623915576306e-05, "loss": 1.7141, "step": 87090 }, { "epoch": 0.5474436784318347, "grad_norm": 7.786384582519531, "learning_rate": 1.6355204814631653e-05, "loss": 1.6763, "step": 87100 }, { "epoch": 0.5475065307485318, "grad_norm": 6.93129825592041, "learning_rate": 1.6354785713687e-05, "loss": 1.7916, "step": 87110 }, { "epoch": 0.547569383065229, "grad_norm": 6.792569637298584, "learning_rate": 1.6354366612742347e-05, "loss": 1.7659, "step": 87120 }, { "epoch": 0.5476322353819261, "grad_norm": 6.7060394287109375, "learning_rate": 1.635394751179769e-05, "loss": 1.6655, "step": 87130 }, { "epoch": 0.5476950876986232, "grad_norm": 7.103468418121338, "learning_rate": 1.6353528410853038e-05, "loss": 1.8176, "step": 87140 }, { "epoch": 0.5477579400153203, "grad_norm": 6.279816627502441, "learning_rate": 1.6353109309908385e-05, "loss": 1.5718, "step": 87150 }, { "epoch": 0.5478207923320174, "grad_norm": 6.506233215332031, "learning_rate": 1.6352690208963732e-05, "loss": 1.8861, "step": 87160 }, { "epoch": 0.5478836446487144, "grad_norm": 8.267227172851562, "learning_rate": 1.635227110801908e-05, "loss": 1.6025, "step": 87170 }, { "epoch": 0.5479464969654115, "grad_norm": 6.034891605377197, "learning_rate": 1.6351852007074423e-05, "loss": 1.7394, "step": 87180 }, { "epoch": 0.5480093492821086, "grad_norm": 6.6851043701171875, "learning_rate": 1.635143290612977e-05, "loss": 1.4129, "step": 87190 }, { "epoch": 0.5480722015988058, "grad_norm": 6.170161724090576, "learning_rate": 1.6351013805185117e-05, "loss": 1.7897, "step": 87200 }, { "epoch": 0.5481350539155029, "grad_norm": 8.239502906799316, "learning_rate": 1.6350594704240464e-05, "loss": 1.6594, "step": 87210 }, { "epoch": 0.5481979062322, "grad_norm": 8.009893417358398, "learning_rate": 1.635017560329581e-05, "loss": 1.9211, "step": 87220 }, { "epoch": 0.5482607585488971, "grad_norm": 5.180368900299072, "learning_rate": 1.634975650235116e-05, "loss": 1.6533, "step": 87230 }, { "epoch": 0.5483236108655942, "grad_norm": 5.979281425476074, "learning_rate": 1.6349337401406505e-05, "loss": 1.5581, "step": 87240 }, { "epoch": 0.5483864631822913, "grad_norm": 5.247679233551025, "learning_rate": 1.6348918300461853e-05, "loss": 1.4401, "step": 87250 }, { "epoch": 0.5484493154989885, "grad_norm": 5.989559173583984, "learning_rate": 1.6348499199517196e-05, "loss": 1.4667, "step": 87260 }, { "epoch": 0.5485121678156856, "grad_norm": 7.64725399017334, "learning_rate": 1.6348080098572543e-05, "loss": 1.8236, "step": 87270 }, { "epoch": 0.5485750201323827, "grad_norm": 6.5117011070251465, "learning_rate": 1.634766099762789e-05, "loss": 1.6047, "step": 87280 }, { "epoch": 0.5486378724490798, "grad_norm": 6.34282922744751, "learning_rate": 1.6347241896683237e-05, "loss": 1.3005, "step": 87290 }, { "epoch": 0.5487007247657769, "grad_norm": 7.494961261749268, "learning_rate": 1.6346822795738584e-05, "loss": 1.5657, "step": 87300 }, { "epoch": 0.548763577082474, "grad_norm": 7.219995498657227, "learning_rate": 1.6346403694793928e-05, "loss": 1.7029, "step": 87310 }, { "epoch": 0.5488264293991711, "grad_norm": 5.836889266967773, "learning_rate": 1.6345984593849275e-05, "loss": 1.8481, "step": 87320 }, { "epoch": 0.5488892817158683, "grad_norm": 8.275524139404297, "learning_rate": 1.6345565492904622e-05, "loss": 1.868, "step": 87330 }, { "epoch": 0.5489521340325654, "grad_norm": 5.790599822998047, "learning_rate": 1.634514639195997e-05, "loss": 1.5417, "step": 87340 }, { "epoch": 0.5490149863492625, "grad_norm": 6.310551643371582, "learning_rate": 1.6344727291015313e-05, "loss": 1.8961, "step": 87350 }, { "epoch": 0.5490778386659596, "grad_norm": 8.10694694519043, "learning_rate": 1.634430819007066e-05, "loss": 1.3983, "step": 87360 }, { "epoch": 0.5491406909826567, "grad_norm": 6.893750190734863, "learning_rate": 1.6343889089126007e-05, "loss": 1.7624, "step": 87370 }, { "epoch": 0.5492035432993538, "grad_norm": 7.259228229522705, "learning_rate": 1.6343469988181354e-05, "loss": 1.7395, "step": 87380 }, { "epoch": 0.549266395616051, "grad_norm": 6.815544128417969, "learning_rate": 1.63430508872367e-05, "loss": 1.6337, "step": 87390 }, { "epoch": 0.5493292479327481, "grad_norm": 7.200207233428955, "learning_rate": 1.634263178629205e-05, "loss": 1.5261, "step": 87400 }, { "epoch": 0.5493921002494452, "grad_norm": 7.834887504577637, "learning_rate": 1.6342212685347395e-05, "loss": 1.8426, "step": 87410 }, { "epoch": 0.5494549525661423, "grad_norm": 7.845221519470215, "learning_rate": 1.6341793584402743e-05, "loss": 1.8138, "step": 87420 }, { "epoch": 0.5495178048828393, "grad_norm": 6.522507667541504, "learning_rate": 1.6341374483458086e-05, "loss": 1.5218, "step": 87430 }, { "epoch": 0.5495806571995364, "grad_norm": 7.382540225982666, "learning_rate": 1.6340955382513433e-05, "loss": 1.9934, "step": 87440 }, { "epoch": 0.5496435095162335, "grad_norm": 6.693045616149902, "learning_rate": 1.634053628156878e-05, "loss": 1.6689, "step": 87450 }, { "epoch": 0.5497063618329306, "grad_norm": 6.891082286834717, "learning_rate": 1.6340117180624127e-05, "loss": 1.7584, "step": 87460 }, { "epoch": 0.5497692141496278, "grad_norm": 8.451995849609375, "learning_rate": 1.6339698079679475e-05, "loss": 1.8392, "step": 87470 }, { "epoch": 0.5498320664663249, "grad_norm": 6.282824993133545, "learning_rate": 1.633927897873482e-05, "loss": 1.6657, "step": 87480 }, { "epoch": 0.549894918783022, "grad_norm": 6.958793640136719, "learning_rate": 1.6338859877790165e-05, "loss": 1.7762, "step": 87490 }, { "epoch": 0.5499577710997191, "grad_norm": 5.975584506988525, "learning_rate": 1.6338440776845512e-05, "loss": 1.8833, "step": 87500 }, { "epoch": 0.5500206234164162, "grad_norm": 7.92507791519165, "learning_rate": 1.633802167590086e-05, "loss": 1.7359, "step": 87510 }, { "epoch": 0.5500834757331133, "grad_norm": 7.211165428161621, "learning_rate": 1.6337602574956206e-05, "loss": 1.5616, "step": 87520 }, { "epoch": 0.5501463280498105, "grad_norm": 6.03791618347168, "learning_rate": 1.633718347401155e-05, "loss": 1.7102, "step": 87530 }, { "epoch": 0.5502091803665076, "grad_norm": 6.354442596435547, "learning_rate": 1.6336764373066897e-05, "loss": 1.5821, "step": 87540 }, { "epoch": 0.5502720326832047, "grad_norm": 5.757572174072266, "learning_rate": 1.6336345272122244e-05, "loss": 1.7117, "step": 87550 }, { "epoch": 0.5503348849999018, "grad_norm": 6.304782390594482, "learning_rate": 1.633592617117759e-05, "loss": 1.7086, "step": 87560 }, { "epoch": 0.5503977373165989, "grad_norm": 6.215713977813721, "learning_rate": 1.6335507070232935e-05, "loss": 1.4925, "step": 87570 }, { "epoch": 0.550460589633296, "grad_norm": 5.681164264678955, "learning_rate": 1.6335087969288282e-05, "loss": 1.5901, "step": 87580 }, { "epoch": 0.5505234419499931, "grad_norm": 7.626335144042969, "learning_rate": 1.633466886834363e-05, "loss": 1.8527, "step": 87590 }, { "epoch": 0.5505862942666903, "grad_norm": 6.560548782348633, "learning_rate": 1.6334249767398976e-05, "loss": 1.9203, "step": 87600 }, { "epoch": 0.5506491465833874, "grad_norm": 7.164431571960449, "learning_rate": 1.6333830666454323e-05, "loss": 1.5093, "step": 87610 }, { "epoch": 0.5507119989000845, "grad_norm": 6.229931354522705, "learning_rate": 1.633341156550967e-05, "loss": 1.6094, "step": 87620 }, { "epoch": 0.5507748512167816, "grad_norm": 6.629180908203125, "learning_rate": 1.6332992464565017e-05, "loss": 1.7345, "step": 87630 }, { "epoch": 0.5508377035334787, "grad_norm": 7.034588813781738, "learning_rate": 1.6332573363620365e-05, "loss": 1.7259, "step": 87640 }, { "epoch": 0.5509005558501758, "grad_norm": 7.558387279510498, "learning_rate": 1.633215426267571e-05, "loss": 1.7842, "step": 87650 }, { "epoch": 0.550963408166873, "grad_norm": 7.3063273429870605, "learning_rate": 1.6331735161731055e-05, "loss": 1.8985, "step": 87660 }, { "epoch": 0.5510262604835701, "grad_norm": 5.6917266845703125, "learning_rate": 1.6331316060786402e-05, "loss": 1.7987, "step": 87670 }, { "epoch": 0.5510891128002671, "grad_norm": 6.492855548858643, "learning_rate": 1.633089695984175e-05, "loss": 1.9732, "step": 87680 }, { "epoch": 0.5511519651169642, "grad_norm": 7.414215087890625, "learning_rate": 1.6330477858897097e-05, "loss": 1.7681, "step": 87690 }, { "epoch": 0.5512148174336613, "grad_norm": 6.806081771850586, "learning_rate": 1.6330058757952444e-05, "loss": 1.642, "step": 87700 }, { "epoch": 0.5512776697503584, "grad_norm": 6.732658386230469, "learning_rate": 1.6329639657007787e-05, "loss": 1.9905, "step": 87710 }, { "epoch": 0.5513405220670555, "grad_norm": 6.367004871368408, "learning_rate": 1.6329220556063134e-05, "loss": 1.87, "step": 87720 }, { "epoch": 0.5514033743837526, "grad_norm": 6.659083843231201, "learning_rate": 1.632880145511848e-05, "loss": 1.6762, "step": 87730 }, { "epoch": 0.5514662267004498, "grad_norm": 6.704216480255127, "learning_rate": 1.632838235417383e-05, "loss": 1.9091, "step": 87740 }, { "epoch": 0.5515290790171469, "grad_norm": 6.720300197601318, "learning_rate": 1.6327963253229172e-05, "loss": 1.5238, "step": 87750 }, { "epoch": 0.551591931333844, "grad_norm": 8.09980583190918, "learning_rate": 1.632754415228452e-05, "loss": 2.0008, "step": 87760 }, { "epoch": 0.5516547836505411, "grad_norm": 6.566691875457764, "learning_rate": 1.6327125051339866e-05, "loss": 1.7393, "step": 87770 }, { "epoch": 0.5517176359672382, "grad_norm": 5.294040203094482, "learning_rate": 1.6326705950395213e-05, "loss": 1.6083, "step": 87780 }, { "epoch": 0.5517804882839353, "grad_norm": 6.389209747314453, "learning_rate": 1.632628684945056e-05, "loss": 1.7037, "step": 87790 }, { "epoch": 0.5518433406006324, "grad_norm": 7.676865100860596, "learning_rate": 1.6325867748505908e-05, "loss": 1.6174, "step": 87800 }, { "epoch": 0.5519061929173296, "grad_norm": 6.478692531585693, "learning_rate": 1.632544864756125e-05, "loss": 1.5484, "step": 87810 }, { "epoch": 0.5519690452340267, "grad_norm": 7.029147148132324, "learning_rate": 1.6325029546616598e-05, "loss": 1.6543, "step": 87820 }, { "epoch": 0.5520318975507238, "grad_norm": 7.854307651519775, "learning_rate": 1.6324610445671945e-05, "loss": 1.6173, "step": 87830 }, { "epoch": 0.5520947498674209, "grad_norm": 6.4751877784729, "learning_rate": 1.6324191344727292e-05, "loss": 1.7093, "step": 87840 }, { "epoch": 0.552157602184118, "grad_norm": 5.035534858703613, "learning_rate": 1.632377224378264e-05, "loss": 1.7226, "step": 87850 }, { "epoch": 0.5522204545008151, "grad_norm": 6.849821090698242, "learning_rate": 1.6323353142837987e-05, "loss": 1.8909, "step": 87860 }, { "epoch": 0.5522833068175123, "grad_norm": 7.671096324920654, "learning_rate": 1.6322934041893334e-05, "loss": 1.51, "step": 87870 }, { "epoch": 0.5523461591342094, "grad_norm": 6.942525386810303, "learning_rate": 1.6322514940948677e-05, "loss": 1.7381, "step": 87880 }, { "epoch": 0.5524090114509065, "grad_norm": 6.023004531860352, "learning_rate": 1.6322095840004024e-05, "loss": 1.7272, "step": 87890 }, { "epoch": 0.5524718637676036, "grad_norm": 6.889723300933838, "learning_rate": 1.632167673905937e-05, "loss": 1.4791, "step": 87900 }, { "epoch": 0.5525347160843007, "grad_norm": 6.181000232696533, "learning_rate": 1.632125763811472e-05, "loss": 1.6955, "step": 87910 }, { "epoch": 0.5525975684009978, "grad_norm": 5.688227653503418, "learning_rate": 1.6320838537170066e-05, "loss": 1.7663, "step": 87920 }, { "epoch": 0.552660420717695, "grad_norm": 6.987216949462891, "learning_rate": 1.632041943622541e-05, "loss": 1.5974, "step": 87930 }, { "epoch": 0.552723273034392, "grad_norm": 5.845030784606934, "learning_rate": 1.6320000335280756e-05, "loss": 1.8496, "step": 87940 }, { "epoch": 0.5527861253510891, "grad_norm": 6.893259048461914, "learning_rate": 1.6319581234336103e-05, "loss": 1.7192, "step": 87950 }, { "epoch": 0.5528489776677862, "grad_norm": 6.025664806365967, "learning_rate": 1.631916213339145e-05, "loss": 1.5366, "step": 87960 }, { "epoch": 0.5529118299844833, "grad_norm": 7.317282676696777, "learning_rate": 1.6318743032446794e-05, "loss": 1.6679, "step": 87970 }, { "epoch": 0.5529746823011804, "grad_norm": 6.57348108291626, "learning_rate": 1.631832393150214e-05, "loss": 1.842, "step": 87980 }, { "epoch": 0.5530375346178775, "grad_norm": 6.035397052764893, "learning_rate": 1.631790483055749e-05, "loss": 1.4517, "step": 87990 }, { "epoch": 0.5531003869345746, "grad_norm": 6.152946472167969, "learning_rate": 1.6317485729612835e-05, "loss": 1.533, "step": 88000 }, { "epoch": 0.5531632392512718, "grad_norm": 6.46749210357666, "learning_rate": 1.6317066628668182e-05, "loss": 1.5675, "step": 88010 }, { "epoch": 0.5532260915679689, "grad_norm": 7.725470542907715, "learning_rate": 1.631664752772353e-05, "loss": 1.4354, "step": 88020 }, { "epoch": 0.553288943884666, "grad_norm": 6.083590984344482, "learning_rate": 1.6316228426778877e-05, "loss": 1.8766, "step": 88030 }, { "epoch": 0.5533517962013631, "grad_norm": 6.757826805114746, "learning_rate": 1.6315809325834224e-05, "loss": 1.5701, "step": 88040 }, { "epoch": 0.5534146485180602, "grad_norm": 6.432161331176758, "learning_rate": 1.6315390224889567e-05, "loss": 1.7546, "step": 88050 }, { "epoch": 0.5534775008347573, "grad_norm": 6.68278169631958, "learning_rate": 1.6314971123944914e-05, "loss": 1.5692, "step": 88060 }, { "epoch": 0.5535403531514544, "grad_norm": 7.217104434967041, "learning_rate": 1.631455202300026e-05, "loss": 1.548, "step": 88070 }, { "epoch": 0.5536032054681516, "grad_norm": 5.770365238189697, "learning_rate": 1.6314174832150073e-05, "loss": 1.5295, "step": 88080 }, { "epoch": 0.5536660577848487, "grad_norm": 7.724226951599121, "learning_rate": 1.631375573120542e-05, "loss": 1.6381, "step": 88090 }, { "epoch": 0.5537289101015458, "grad_norm": 8.098963737487793, "learning_rate": 1.6313336630260767e-05, "loss": 1.6756, "step": 88100 }, { "epoch": 0.5537917624182429, "grad_norm": 6.411672592163086, "learning_rate": 1.6312917529316114e-05, "loss": 1.5901, "step": 88110 }, { "epoch": 0.55385461473494, "grad_norm": 7.5321574211120605, "learning_rate": 1.6312498428371458e-05, "loss": 1.7832, "step": 88120 }, { "epoch": 0.5539174670516371, "grad_norm": 6.153202056884766, "learning_rate": 1.6312079327426805e-05, "loss": 1.5185, "step": 88130 }, { "epoch": 0.5539803193683343, "grad_norm": 6.171274662017822, "learning_rate": 1.6311660226482152e-05, "loss": 1.5899, "step": 88140 }, { "epoch": 0.5540431716850314, "grad_norm": 4.977878093719482, "learning_rate": 1.63112411255375e-05, "loss": 1.4111, "step": 88150 }, { "epoch": 0.5541060240017285, "grad_norm": 6.700924873352051, "learning_rate": 1.6310822024592846e-05, "loss": 1.7089, "step": 88160 }, { "epoch": 0.5541688763184256, "grad_norm": 6.37979793548584, "learning_rate": 1.6310402923648193e-05, "loss": 1.5482, "step": 88170 }, { "epoch": 0.5542317286351227, "grad_norm": 6.379642963409424, "learning_rate": 1.6309983822703537e-05, "loss": 1.6192, "step": 88180 }, { "epoch": 0.5542945809518198, "grad_norm": 6.328685760498047, "learning_rate": 1.6309564721758884e-05, "loss": 1.8287, "step": 88190 }, { "epoch": 0.5543574332685168, "grad_norm": 7.2171149253845215, "learning_rate": 1.630914562081423e-05, "loss": 1.5507, "step": 88200 }, { "epoch": 0.554420285585214, "grad_norm": 6.692591190338135, "learning_rate": 1.6308726519869578e-05, "loss": 1.5776, "step": 88210 }, { "epoch": 0.5544831379019111, "grad_norm": 6.94549036026001, "learning_rate": 1.6308307418924925e-05, "loss": 1.882, "step": 88220 }, { "epoch": 0.5545459902186082, "grad_norm": 5.682509899139404, "learning_rate": 1.630788831798027e-05, "loss": 1.648, "step": 88230 }, { "epoch": 0.5546088425353053, "grad_norm": 6.2639055252075195, "learning_rate": 1.6307469217035616e-05, "loss": 1.7571, "step": 88240 }, { "epoch": 0.5546716948520024, "grad_norm": 6.274423599243164, "learning_rate": 1.6307050116090963e-05, "loss": 1.7917, "step": 88250 }, { "epoch": 0.5547345471686995, "grad_norm": 5.863760471343994, "learning_rate": 1.630663101514631e-05, "loss": 1.7114, "step": 88260 }, { "epoch": 0.5547973994853966, "grad_norm": 6.9797468185424805, "learning_rate": 1.6306211914201654e-05, "loss": 1.6295, "step": 88270 }, { "epoch": 0.5548602518020938, "grad_norm": 7.441795349121094, "learning_rate": 1.6305792813257e-05, "loss": 1.6251, "step": 88280 }, { "epoch": 0.5549231041187909, "grad_norm": 6.823574066162109, "learning_rate": 1.6305373712312348e-05, "loss": 1.4674, "step": 88290 }, { "epoch": 0.554985956435488, "grad_norm": 6.422065258026123, "learning_rate": 1.6304954611367695e-05, "loss": 1.765, "step": 88300 }, { "epoch": 0.5550488087521851, "grad_norm": 6.555672645568848, "learning_rate": 1.6304535510423042e-05, "loss": 1.7314, "step": 88310 }, { "epoch": 0.5551116610688822, "grad_norm": 6.252634525299072, "learning_rate": 1.630411640947839e-05, "loss": 1.5402, "step": 88320 }, { "epoch": 0.5551745133855793, "grad_norm": 6.4778594970703125, "learning_rate": 1.6303697308533736e-05, "loss": 1.6508, "step": 88330 }, { "epoch": 0.5552373657022764, "grad_norm": 6.431690692901611, "learning_rate": 1.6303278207589083e-05, "loss": 1.8059, "step": 88340 }, { "epoch": 0.5553002180189736, "grad_norm": 6.253882884979248, "learning_rate": 1.630285910664443e-05, "loss": 1.7937, "step": 88350 }, { "epoch": 0.5553630703356707, "grad_norm": 7.070196628570557, "learning_rate": 1.6302440005699774e-05, "loss": 1.9511, "step": 88360 }, { "epoch": 0.5554259226523678, "grad_norm": 6.050371170043945, "learning_rate": 1.630202090475512e-05, "loss": 1.4563, "step": 88370 }, { "epoch": 0.5554887749690649, "grad_norm": 7.460027694702148, "learning_rate": 1.6301601803810468e-05, "loss": 1.8283, "step": 88380 }, { "epoch": 0.555551627285762, "grad_norm": 6.309556484222412, "learning_rate": 1.6301182702865815e-05, "loss": 1.7847, "step": 88390 }, { "epoch": 0.5556144796024591, "grad_norm": 7.3537139892578125, "learning_rate": 1.630076360192116e-05, "loss": 1.7832, "step": 88400 }, { "epoch": 0.5556773319191562, "grad_norm": 6.594983100891113, "learning_rate": 1.6300344500976506e-05, "loss": 1.7553, "step": 88410 }, { "epoch": 0.5557401842358534, "grad_norm": 7.0119428634643555, "learning_rate": 1.6299925400031853e-05, "loss": 1.6977, "step": 88420 }, { "epoch": 0.5558030365525505, "grad_norm": 7.39605188369751, "learning_rate": 1.62995062990872e-05, "loss": 1.7559, "step": 88430 }, { "epoch": 0.5558658888692476, "grad_norm": 5.62230110168457, "learning_rate": 1.6299087198142547e-05, "loss": 1.4533, "step": 88440 }, { "epoch": 0.5559287411859446, "grad_norm": 5.8085503578186035, "learning_rate": 1.629866809719789e-05, "loss": 1.6466, "step": 88450 }, { "epoch": 0.5559915935026417, "grad_norm": 6.4032511711120605, "learning_rate": 1.6298248996253238e-05, "loss": 1.7441, "step": 88460 }, { "epoch": 0.5560544458193388, "grad_norm": 7.032168388366699, "learning_rate": 1.6297829895308585e-05, "loss": 1.9077, "step": 88470 }, { "epoch": 0.5561172981360359, "grad_norm": 6.47052001953125, "learning_rate": 1.6297410794363932e-05, "loss": 1.6508, "step": 88480 }, { "epoch": 0.5561801504527331, "grad_norm": 6.570706844329834, "learning_rate": 1.629699169341928e-05, "loss": 1.7408, "step": 88490 }, { "epoch": 0.5562430027694302, "grad_norm": 6.553955078125, "learning_rate": 1.6296572592474623e-05, "loss": 1.9257, "step": 88500 }, { "epoch": 0.5563058550861273, "grad_norm": 6.742047309875488, "learning_rate": 1.629615349152997e-05, "loss": 1.6006, "step": 88510 }, { "epoch": 0.5563687074028244, "grad_norm": 6.896884441375732, "learning_rate": 1.6295734390585317e-05, "loss": 1.6857, "step": 88520 }, { "epoch": 0.5564315597195215, "grad_norm": 5.0939202308654785, "learning_rate": 1.6295315289640664e-05, "loss": 1.4175, "step": 88530 }, { "epoch": 0.5564944120362186, "grad_norm": 7.0617547035217285, "learning_rate": 1.629489618869601e-05, "loss": 1.6025, "step": 88540 }, { "epoch": 0.5565572643529157, "grad_norm": 6.168315887451172, "learning_rate": 1.6294477087751358e-05, "loss": 1.6803, "step": 88550 }, { "epoch": 0.5566201166696129, "grad_norm": 6.095003604888916, "learning_rate": 1.6294057986806705e-05, "loss": 1.5083, "step": 88560 }, { "epoch": 0.55668296898631, "grad_norm": 6.832040309906006, "learning_rate": 1.6293638885862052e-05, "loss": 1.5936, "step": 88570 }, { "epoch": 0.5567458213030071, "grad_norm": 7.0723557472229, "learning_rate": 1.6293219784917396e-05, "loss": 1.5651, "step": 88580 }, { "epoch": 0.5568086736197042, "grad_norm": 6.6852850914001465, "learning_rate": 1.6292800683972743e-05, "loss": 1.6691, "step": 88590 }, { "epoch": 0.5568715259364013, "grad_norm": 6.6617231369018555, "learning_rate": 1.629238158302809e-05, "loss": 1.7027, "step": 88600 }, { "epoch": 0.5569343782530984, "grad_norm": 5.725902557373047, "learning_rate": 1.6291962482083437e-05, "loss": 1.6941, "step": 88610 }, { "epoch": 0.5569972305697956, "grad_norm": 7.879228591918945, "learning_rate": 1.629154338113878e-05, "loss": 1.6248, "step": 88620 }, { "epoch": 0.5570600828864927, "grad_norm": 6.585484504699707, "learning_rate": 1.6291124280194128e-05, "loss": 1.6381, "step": 88630 }, { "epoch": 0.5571229352031898, "grad_norm": 6.741808891296387, "learning_rate": 1.6290705179249475e-05, "loss": 1.6211, "step": 88640 }, { "epoch": 0.5571857875198869, "grad_norm": 6.813026428222656, "learning_rate": 1.6290286078304822e-05, "loss": 1.6066, "step": 88650 }, { "epoch": 0.557248639836584, "grad_norm": 6.790803909301758, "learning_rate": 1.628986697736017e-05, "loss": 1.8055, "step": 88660 }, { "epoch": 0.5573114921532811, "grad_norm": 6.520722389221191, "learning_rate": 1.6289447876415513e-05, "loss": 1.6868, "step": 88670 }, { "epoch": 0.5573743444699782, "grad_norm": 6.6449174880981445, "learning_rate": 1.628902877547086e-05, "loss": 1.7712, "step": 88680 }, { "epoch": 0.5574371967866754, "grad_norm": 5.842750549316406, "learning_rate": 1.6288609674526207e-05, "loss": 1.3447, "step": 88690 }, { "epoch": 0.5575000491033725, "grad_norm": 6.170193195343018, "learning_rate": 1.6288190573581554e-05, "loss": 1.7034, "step": 88700 }, { "epoch": 0.5575629014200695, "grad_norm": 6.261218070983887, "learning_rate": 1.62877714726369e-05, "loss": 1.5003, "step": 88710 }, { "epoch": 0.5576257537367666, "grad_norm": 6.723862648010254, "learning_rate": 1.6287352371692248e-05, "loss": 1.5344, "step": 88720 }, { "epoch": 0.5576886060534637, "grad_norm": 6.398962497711182, "learning_rate": 1.6286933270747595e-05, "loss": 1.6921, "step": 88730 }, { "epoch": 0.5577514583701608, "grad_norm": 6.833871364593506, "learning_rate": 1.6286514169802942e-05, "loss": 1.8374, "step": 88740 }, { "epoch": 0.5578143106868579, "grad_norm": 6.804493427276611, "learning_rate": 1.6286095068858286e-05, "loss": 1.7986, "step": 88750 }, { "epoch": 0.557877163003555, "grad_norm": 6.8638787269592285, "learning_rate": 1.6285675967913633e-05, "loss": 1.5772, "step": 88760 }, { "epoch": 0.5579400153202522, "grad_norm": 5.638939380645752, "learning_rate": 1.628525686696898e-05, "loss": 1.6406, "step": 88770 }, { "epoch": 0.5580028676369493, "grad_norm": 9.554666519165039, "learning_rate": 1.6284837766024327e-05, "loss": 1.7268, "step": 88780 }, { "epoch": 0.5580657199536464, "grad_norm": 6.101341724395752, "learning_rate": 1.6284418665079674e-05, "loss": 1.5086, "step": 88790 }, { "epoch": 0.5581285722703435, "grad_norm": 6.6777520179748535, "learning_rate": 1.6283999564135018e-05, "loss": 1.6133, "step": 88800 }, { "epoch": 0.5581914245870406, "grad_norm": 7.209829330444336, "learning_rate": 1.6283580463190365e-05, "loss": 1.8295, "step": 88810 }, { "epoch": 0.5582542769037377, "grad_norm": 6.874991416931152, "learning_rate": 1.6283161362245712e-05, "loss": 1.8068, "step": 88820 }, { "epoch": 0.5583171292204349, "grad_norm": 7.330787181854248, "learning_rate": 1.628274226130106e-05, "loss": 1.6279, "step": 88830 }, { "epoch": 0.558379981537132, "grad_norm": 6.485179901123047, "learning_rate": 1.6282323160356406e-05, "loss": 1.6222, "step": 88840 }, { "epoch": 0.5584428338538291, "grad_norm": 6.6356329917907715, "learning_rate": 1.628190405941175e-05, "loss": 1.9118, "step": 88850 }, { "epoch": 0.5585056861705262, "grad_norm": 6.619465351104736, "learning_rate": 1.6281484958467097e-05, "loss": 1.8015, "step": 88860 }, { "epoch": 0.5585685384872233, "grad_norm": 6.195246696472168, "learning_rate": 1.6281065857522444e-05, "loss": 1.6933, "step": 88870 }, { "epoch": 0.5586313908039204, "grad_norm": 6.337784767150879, "learning_rate": 1.628064675657779e-05, "loss": 1.4796, "step": 88880 }, { "epoch": 0.5586942431206176, "grad_norm": 5.8104448318481445, "learning_rate": 1.6280227655633135e-05, "loss": 1.9172, "step": 88890 }, { "epoch": 0.5587570954373147, "grad_norm": 5.997087478637695, "learning_rate": 1.6279808554688482e-05, "loss": 1.7153, "step": 88900 }, { "epoch": 0.5588199477540118, "grad_norm": 5.885451316833496, "learning_rate": 1.627938945374383e-05, "loss": 1.5284, "step": 88910 }, { "epoch": 0.5588828000707089, "grad_norm": 7.631045818328857, "learning_rate": 1.6278970352799176e-05, "loss": 1.7715, "step": 88920 }, { "epoch": 0.558945652387406, "grad_norm": 6.923070430755615, "learning_rate": 1.6278551251854523e-05, "loss": 1.5471, "step": 88930 }, { "epoch": 0.5590085047041031, "grad_norm": 6.820895671844482, "learning_rate": 1.627813215090987e-05, "loss": 1.6448, "step": 88940 }, { "epoch": 0.5590713570208002, "grad_norm": 6.700250148773193, "learning_rate": 1.6277713049965217e-05, "loss": 1.6986, "step": 88950 }, { "epoch": 0.5591342093374972, "grad_norm": 8.055480003356934, "learning_rate": 1.6277293949020564e-05, "loss": 1.5795, "step": 88960 }, { "epoch": 0.5591970616541944, "grad_norm": 6.974710464477539, "learning_rate": 1.627687484807591e-05, "loss": 1.816, "step": 88970 }, { "epoch": 0.5592599139708915, "grad_norm": 6.205530166625977, "learning_rate": 1.6276455747131255e-05, "loss": 1.6085, "step": 88980 }, { "epoch": 0.5593227662875886, "grad_norm": 6.516695499420166, "learning_rate": 1.6276036646186602e-05, "loss": 1.822, "step": 88990 }, { "epoch": 0.5593856186042857, "grad_norm": 7.034592151641846, "learning_rate": 1.627561754524195e-05, "loss": 1.7036, "step": 89000 }, { "epoch": 0.5594484709209828, "grad_norm": 6.519965171813965, "learning_rate": 1.6275198444297296e-05, "loss": 1.6152, "step": 89010 }, { "epoch": 0.5595113232376799, "grad_norm": 6.692155361175537, "learning_rate": 1.627477934335264e-05, "loss": 1.8324, "step": 89020 }, { "epoch": 0.559574175554377, "grad_norm": 5.974812030792236, "learning_rate": 1.6274360242407987e-05, "loss": 1.6236, "step": 89030 }, { "epoch": 0.5596370278710742, "grad_norm": 6.819674491882324, "learning_rate": 1.6273941141463334e-05, "loss": 1.7876, "step": 89040 }, { "epoch": 0.5596998801877713, "grad_norm": 5.531889915466309, "learning_rate": 1.627352204051868e-05, "loss": 1.8333, "step": 89050 }, { "epoch": 0.5597627325044684, "grad_norm": 6.38810920715332, "learning_rate": 1.6273102939574028e-05, "loss": 1.582, "step": 89060 }, { "epoch": 0.5598255848211655, "grad_norm": 6.805556774139404, "learning_rate": 1.6272683838629372e-05, "loss": 1.6396, "step": 89070 }, { "epoch": 0.5598884371378626, "grad_norm": 6.56950044631958, "learning_rate": 1.627226473768472e-05, "loss": 1.4835, "step": 89080 }, { "epoch": 0.5599512894545597, "grad_norm": 6.350771903991699, "learning_rate": 1.6271845636740066e-05, "loss": 1.5145, "step": 89090 }, { "epoch": 0.5600141417712569, "grad_norm": 5.643241882324219, "learning_rate": 1.6271426535795413e-05, "loss": 1.728, "step": 89100 }, { "epoch": 0.560076994087954, "grad_norm": 6.5237321853637695, "learning_rate": 1.627100743485076e-05, "loss": 1.5731, "step": 89110 }, { "epoch": 0.5601398464046511, "grad_norm": 6.321384429931641, "learning_rate": 1.6270588333906104e-05, "loss": 1.6608, "step": 89120 }, { "epoch": 0.5602026987213482, "grad_norm": 6.6404829025268555, "learning_rate": 1.627016923296145e-05, "loss": 1.7645, "step": 89130 }, { "epoch": 0.5602655510380453, "grad_norm": 6.8443121910095215, "learning_rate": 1.6269750132016798e-05, "loss": 1.8534, "step": 89140 }, { "epoch": 0.5603284033547424, "grad_norm": 7.051900863647461, "learning_rate": 1.6269331031072145e-05, "loss": 1.4937, "step": 89150 }, { "epoch": 0.5603912556714395, "grad_norm": 6.721617221832275, "learning_rate": 1.6268911930127492e-05, "loss": 1.635, "step": 89160 }, { "epoch": 0.5604541079881367, "grad_norm": 7.16155481338501, "learning_rate": 1.626849282918284e-05, "loss": 1.7732, "step": 89170 }, { "epoch": 0.5605169603048338, "grad_norm": 9.203920364379883, "learning_rate": 1.6268073728238186e-05, "loss": 2.0461, "step": 89180 }, { "epoch": 0.5605798126215309, "grad_norm": 6.358094215393066, "learning_rate": 1.6267654627293533e-05, "loss": 1.6883, "step": 89190 }, { "epoch": 0.560642664938228, "grad_norm": 7.03914737701416, "learning_rate": 1.6267235526348877e-05, "loss": 1.6068, "step": 89200 }, { "epoch": 0.5607055172549251, "grad_norm": 6.174300670623779, "learning_rate": 1.6266816425404224e-05, "loss": 1.7287, "step": 89210 }, { "epoch": 0.5607683695716221, "grad_norm": 6.269082069396973, "learning_rate": 1.626639732445957e-05, "loss": 1.6998, "step": 89220 }, { "epoch": 0.5608312218883192, "grad_norm": 5.566357135772705, "learning_rate": 1.6265978223514918e-05, "loss": 1.3758, "step": 89230 }, { "epoch": 0.5608940742050164, "grad_norm": 7.715774059295654, "learning_rate": 1.6265559122570265e-05, "loss": 1.5952, "step": 89240 }, { "epoch": 0.5609569265217135, "grad_norm": 6.553233623504639, "learning_rate": 1.626514002162561e-05, "loss": 1.5755, "step": 89250 }, { "epoch": 0.5610197788384106, "grad_norm": 6.500826835632324, "learning_rate": 1.6264720920680956e-05, "loss": 1.6818, "step": 89260 }, { "epoch": 0.5610826311551077, "grad_norm": 7.091058254241943, "learning_rate": 1.6264301819736303e-05, "loss": 1.9517, "step": 89270 }, { "epoch": 0.5611454834718048, "grad_norm": 6.283771514892578, "learning_rate": 1.626388271879165e-05, "loss": 1.7043, "step": 89280 }, { "epoch": 0.5612083357885019, "grad_norm": 6.668854236602783, "learning_rate": 1.6263463617846994e-05, "loss": 1.5439, "step": 89290 }, { "epoch": 0.561271188105199, "grad_norm": 5.6727614402771, "learning_rate": 1.626304451690234e-05, "loss": 1.5474, "step": 89300 }, { "epoch": 0.5613340404218962, "grad_norm": 6.778054714202881, "learning_rate": 1.6262625415957688e-05, "loss": 1.8569, "step": 89310 }, { "epoch": 0.5613968927385933, "grad_norm": 5.522119045257568, "learning_rate": 1.6262206315013035e-05, "loss": 1.4112, "step": 89320 }, { "epoch": 0.5614597450552904, "grad_norm": 6.095500469207764, "learning_rate": 1.6261787214068382e-05, "loss": 1.6091, "step": 89330 }, { "epoch": 0.5615225973719875, "grad_norm": 6.398715496063232, "learning_rate": 1.626136811312373e-05, "loss": 1.4782, "step": 89340 }, { "epoch": 0.5615854496886846, "grad_norm": 6.670832633972168, "learning_rate": 1.6260949012179076e-05, "loss": 1.5155, "step": 89350 }, { "epoch": 0.5616483020053817, "grad_norm": 6.177496910095215, "learning_rate": 1.6260529911234423e-05, "loss": 1.6604, "step": 89360 }, { "epoch": 0.5617111543220789, "grad_norm": 6.3406524658203125, "learning_rate": 1.6260110810289767e-05, "loss": 1.7501, "step": 89370 }, { "epoch": 0.561774006638776, "grad_norm": 6.5295939445495605, "learning_rate": 1.6259691709345114e-05, "loss": 1.7353, "step": 89380 }, { "epoch": 0.5618368589554731, "grad_norm": 5.897647380828857, "learning_rate": 1.625927260840046e-05, "loss": 1.8933, "step": 89390 }, { "epoch": 0.5618997112721702, "grad_norm": 4.999483585357666, "learning_rate": 1.6258853507455808e-05, "loss": 1.6702, "step": 89400 }, { "epoch": 0.5619625635888673, "grad_norm": 6.448941230773926, "learning_rate": 1.6258434406511155e-05, "loss": 1.6223, "step": 89410 }, { "epoch": 0.5620254159055644, "grad_norm": 6.9571123123168945, "learning_rate": 1.62580153055665e-05, "loss": 1.7999, "step": 89420 }, { "epoch": 0.5620882682222615, "grad_norm": 6.229642868041992, "learning_rate": 1.6257596204621846e-05, "loss": 1.7164, "step": 89430 }, { "epoch": 0.5621511205389587, "grad_norm": 6.478581428527832, "learning_rate": 1.6257177103677193e-05, "loss": 1.5214, "step": 89440 }, { "epoch": 0.5622139728556558, "grad_norm": 5.852070331573486, "learning_rate": 1.625675800273254e-05, "loss": 1.5816, "step": 89450 }, { "epoch": 0.5622768251723529, "grad_norm": 6.88888692855835, "learning_rate": 1.6256338901787887e-05, "loss": 1.6473, "step": 89460 }, { "epoch": 0.5623396774890499, "grad_norm": 5.942054271697998, "learning_rate": 1.625591980084323e-05, "loss": 1.6023, "step": 89470 }, { "epoch": 0.562402529805747, "grad_norm": 7.349301815032959, "learning_rate": 1.6255500699898578e-05, "loss": 1.891, "step": 89480 }, { "epoch": 0.5624653821224441, "grad_norm": 6.585026741027832, "learning_rate": 1.6255081598953925e-05, "loss": 1.6427, "step": 89490 }, { "epoch": 0.5625282344391412, "grad_norm": 6.790942668914795, "learning_rate": 1.6254662498009272e-05, "loss": 1.671, "step": 89500 }, { "epoch": 0.5625910867558384, "grad_norm": 6.546286582946777, "learning_rate": 1.6254243397064616e-05, "loss": 1.5507, "step": 89510 }, { "epoch": 0.5626539390725355, "grad_norm": 6.672111511230469, "learning_rate": 1.6253824296119963e-05, "loss": 1.6603, "step": 89520 }, { "epoch": 0.5627167913892326, "grad_norm": 8.207008361816406, "learning_rate": 1.625340519517531e-05, "loss": 1.7917, "step": 89530 }, { "epoch": 0.5627796437059297, "grad_norm": 8.121054649353027, "learning_rate": 1.6252986094230657e-05, "loss": 1.9549, "step": 89540 }, { "epoch": 0.5628424960226268, "grad_norm": 6.844808101654053, "learning_rate": 1.6252566993286004e-05, "loss": 1.6826, "step": 89550 }, { "epoch": 0.5629053483393239, "grad_norm": 6.727411270141602, "learning_rate": 1.625214789234135e-05, "loss": 1.7397, "step": 89560 }, { "epoch": 0.562968200656021, "grad_norm": 7.244187355041504, "learning_rate": 1.6251728791396698e-05, "loss": 1.7125, "step": 89570 }, { "epoch": 0.5630310529727182, "grad_norm": 6.885039806365967, "learning_rate": 1.6251309690452045e-05, "loss": 1.4795, "step": 89580 }, { "epoch": 0.5630939052894153, "grad_norm": 6.604867458343506, "learning_rate": 1.6250890589507392e-05, "loss": 1.6503, "step": 89590 }, { "epoch": 0.5631567576061124, "grad_norm": 5.984154224395752, "learning_rate": 1.6250471488562736e-05, "loss": 1.8947, "step": 89600 }, { "epoch": 0.5632196099228095, "grad_norm": 9.04930591583252, "learning_rate": 1.6250052387618083e-05, "loss": 1.7429, "step": 89610 }, { "epoch": 0.5632824622395066, "grad_norm": 6.829537391662598, "learning_rate": 1.624963328667343e-05, "loss": 1.659, "step": 89620 }, { "epoch": 0.5633453145562037, "grad_norm": 6.806103229522705, "learning_rate": 1.6249214185728777e-05, "loss": 1.6567, "step": 89630 }, { "epoch": 0.5634081668729009, "grad_norm": 6.35833215713501, "learning_rate": 1.624879508478412e-05, "loss": 1.8157, "step": 89640 }, { "epoch": 0.563471019189598, "grad_norm": 6.641960620880127, "learning_rate": 1.6248375983839468e-05, "loss": 1.6399, "step": 89650 }, { "epoch": 0.5635338715062951, "grad_norm": 6.535701751708984, "learning_rate": 1.6247956882894815e-05, "loss": 1.5097, "step": 89660 }, { "epoch": 0.5635967238229922, "grad_norm": 6.738907814025879, "learning_rate": 1.6247537781950162e-05, "loss": 1.6767, "step": 89670 }, { "epoch": 0.5636595761396893, "grad_norm": 6.377346515655518, "learning_rate": 1.624711868100551e-05, "loss": 1.5594, "step": 89680 }, { "epoch": 0.5637224284563864, "grad_norm": 6.713415145874023, "learning_rate": 1.6246699580060853e-05, "loss": 1.6798, "step": 89690 }, { "epoch": 0.5637852807730835, "grad_norm": 5.800888538360596, "learning_rate": 1.62462804791162e-05, "loss": 1.518, "step": 89700 }, { "epoch": 0.5638481330897807, "grad_norm": 6.9739670753479, "learning_rate": 1.6245861378171547e-05, "loss": 1.6548, "step": 89710 }, { "epoch": 0.5639109854064778, "grad_norm": 6.058177947998047, "learning_rate": 1.6245442277226894e-05, "loss": 1.7046, "step": 89720 }, { "epoch": 0.5639738377231748, "grad_norm": 6.74522590637207, "learning_rate": 1.624502317628224e-05, "loss": 1.5815, "step": 89730 }, { "epoch": 0.5640366900398719, "grad_norm": 6.15690803527832, "learning_rate": 1.6244604075337588e-05, "loss": 1.7372, "step": 89740 }, { "epoch": 0.564099542356569, "grad_norm": 5.959741115570068, "learning_rate": 1.6244184974392932e-05, "loss": 1.7466, "step": 89750 }, { "epoch": 0.5641623946732661, "grad_norm": 8.733022689819336, "learning_rate": 1.624376587344828e-05, "loss": 1.5584, "step": 89760 }, { "epoch": 0.5642252469899632, "grad_norm": 6.4488677978515625, "learning_rate": 1.6243346772503626e-05, "loss": 1.7363, "step": 89770 }, { "epoch": 0.5642880993066604, "grad_norm": 6.470970630645752, "learning_rate": 1.6242927671558973e-05, "loss": 1.5498, "step": 89780 }, { "epoch": 0.5643509516233575, "grad_norm": 6.511645317077637, "learning_rate": 1.624250857061432e-05, "loss": 1.5934, "step": 89790 }, { "epoch": 0.5644138039400546, "grad_norm": 6.307367324829102, "learning_rate": 1.6242089469669667e-05, "loss": 1.759, "step": 89800 }, { "epoch": 0.5644766562567517, "grad_norm": 7.514179706573486, "learning_rate": 1.6241670368725014e-05, "loss": 1.8545, "step": 89810 }, { "epoch": 0.5645395085734488, "grad_norm": 5.769866943359375, "learning_rate": 1.6241251267780358e-05, "loss": 1.8331, "step": 89820 }, { "epoch": 0.5646023608901459, "grad_norm": 5.930098056793213, "learning_rate": 1.6240832166835705e-05, "loss": 1.713, "step": 89830 }, { "epoch": 0.564665213206843, "grad_norm": 7.681197166442871, "learning_rate": 1.6240413065891052e-05, "loss": 1.5439, "step": 89840 }, { "epoch": 0.5647280655235402, "grad_norm": 7.412057399749756, "learning_rate": 1.62399939649464e-05, "loss": 1.5705, "step": 89850 }, { "epoch": 0.5647909178402373, "grad_norm": 6.578376293182373, "learning_rate": 1.6239574864001746e-05, "loss": 1.7744, "step": 89860 }, { "epoch": 0.5648537701569344, "grad_norm": 9.134100914001465, "learning_rate": 1.623915576305709e-05, "loss": 1.5815, "step": 89870 }, { "epoch": 0.5649166224736315, "grad_norm": 6.486658573150635, "learning_rate": 1.6238736662112437e-05, "loss": 1.6856, "step": 89880 }, { "epoch": 0.5649794747903286, "grad_norm": 7.928880214691162, "learning_rate": 1.6238317561167784e-05, "loss": 1.7607, "step": 89890 }, { "epoch": 0.5650423271070257, "grad_norm": 6.201712131500244, "learning_rate": 1.623789846022313e-05, "loss": 1.5833, "step": 89900 }, { "epoch": 0.5651051794237228, "grad_norm": 4.976857662200928, "learning_rate": 1.6237479359278475e-05, "loss": 1.5414, "step": 89910 }, { "epoch": 0.56516803174042, "grad_norm": 7.770523548126221, "learning_rate": 1.6237060258333822e-05, "loss": 1.6504, "step": 89920 }, { "epoch": 0.5652308840571171, "grad_norm": 6.774728298187256, "learning_rate": 1.623664115738917e-05, "loss": 1.5659, "step": 89930 }, { "epoch": 0.5652937363738142, "grad_norm": 7.217031478881836, "learning_rate": 1.6236222056444516e-05, "loss": 1.7839, "step": 89940 }, { "epoch": 0.5653565886905113, "grad_norm": 6.508360385894775, "learning_rate": 1.6235802955499863e-05, "loss": 1.6238, "step": 89950 }, { "epoch": 0.5654194410072084, "grad_norm": 6.7677435874938965, "learning_rate": 1.623538385455521e-05, "loss": 1.4442, "step": 89960 }, { "epoch": 0.5654822933239055, "grad_norm": 6.441218376159668, "learning_rate": 1.6234964753610557e-05, "loss": 1.554, "step": 89970 }, { "epoch": 0.5655451456406025, "grad_norm": 6.243203163146973, "learning_rate": 1.6234545652665904e-05, "loss": 1.8936, "step": 89980 }, { "epoch": 0.5656079979572997, "grad_norm": 6.367725849151611, "learning_rate": 1.623412655172125e-05, "loss": 1.7235, "step": 89990 }, { "epoch": 0.5656708502739968, "grad_norm": 6.914323329925537, "learning_rate": 1.6233707450776595e-05, "loss": 1.8123, "step": 90000 }, { "epoch": 0.5657337025906939, "grad_norm": 6.8788299560546875, "learning_rate": 1.6233288349831942e-05, "loss": 1.5721, "step": 90010 }, { "epoch": 0.565796554907391, "grad_norm": 7.315779209136963, "learning_rate": 1.623286924888729e-05, "loss": 1.7572, "step": 90020 }, { "epoch": 0.5658594072240881, "grad_norm": 6.532388210296631, "learning_rate": 1.6232450147942636e-05, "loss": 1.7725, "step": 90030 }, { "epoch": 0.5659222595407852, "grad_norm": 7.474238395690918, "learning_rate": 1.623203104699798e-05, "loss": 1.8216, "step": 90040 }, { "epoch": 0.5659851118574823, "grad_norm": 6.514171600341797, "learning_rate": 1.6231611946053327e-05, "loss": 1.4655, "step": 90050 }, { "epoch": 0.5660479641741795, "grad_norm": 7.061287879943848, "learning_rate": 1.6231192845108674e-05, "loss": 1.7795, "step": 90060 }, { "epoch": 0.5661108164908766, "grad_norm": 6.0305094718933105, "learning_rate": 1.623077374416402e-05, "loss": 1.4329, "step": 90070 }, { "epoch": 0.5661736688075737, "grad_norm": 6.642032623291016, "learning_rate": 1.623035464321937e-05, "loss": 1.6796, "step": 90080 }, { "epoch": 0.5662365211242708, "grad_norm": 6.137035846710205, "learning_rate": 1.6229935542274712e-05, "loss": 1.5379, "step": 90090 }, { "epoch": 0.5662993734409679, "grad_norm": 8.072105407714844, "learning_rate": 1.622951644133006e-05, "loss": 1.8794, "step": 90100 }, { "epoch": 0.566362225757665, "grad_norm": 5.9996418952941895, "learning_rate": 1.6229097340385406e-05, "loss": 1.3494, "step": 90110 }, { "epoch": 0.5664250780743622, "grad_norm": 6.265005588531494, "learning_rate": 1.6228678239440753e-05, "loss": 1.7446, "step": 90120 }, { "epoch": 0.5664879303910593, "grad_norm": 8.894662857055664, "learning_rate": 1.6228259138496097e-05, "loss": 1.9204, "step": 90130 }, { "epoch": 0.5665507827077564, "grad_norm": 6.5717034339904785, "learning_rate": 1.6227840037551444e-05, "loss": 1.6384, "step": 90140 }, { "epoch": 0.5666136350244535, "grad_norm": 6.242033004760742, "learning_rate": 1.622742093660679e-05, "loss": 1.803, "step": 90150 }, { "epoch": 0.5666764873411506, "grad_norm": 6.6735405921936035, "learning_rate": 1.6227001835662138e-05, "loss": 1.7524, "step": 90160 }, { "epoch": 0.5667393396578477, "grad_norm": 6.145554065704346, "learning_rate": 1.6226582734717485e-05, "loss": 1.7816, "step": 90170 }, { "epoch": 0.5668021919745448, "grad_norm": 6.141727447509766, "learning_rate": 1.6226163633772832e-05, "loss": 1.8193, "step": 90180 }, { "epoch": 0.566865044291242, "grad_norm": 6.712489604949951, "learning_rate": 1.622574453282818e-05, "loss": 1.6914, "step": 90190 }, { "epoch": 0.5669278966079391, "grad_norm": 6.391608715057373, "learning_rate": 1.6225325431883526e-05, "loss": 2.0208, "step": 90200 }, { "epoch": 0.5669907489246362, "grad_norm": 6.549454212188721, "learning_rate": 1.6224906330938874e-05, "loss": 1.7772, "step": 90210 }, { "epoch": 0.5670536012413333, "grad_norm": 7.447284698486328, "learning_rate": 1.6224487229994217e-05, "loss": 1.6078, "step": 90220 }, { "epoch": 0.5671164535580304, "grad_norm": 6.7304463386535645, "learning_rate": 1.6224068129049564e-05, "loss": 1.8029, "step": 90230 }, { "epoch": 0.5671793058747274, "grad_norm": 6.139402389526367, "learning_rate": 1.622364902810491e-05, "loss": 1.4951, "step": 90240 }, { "epoch": 0.5672421581914245, "grad_norm": 6.552315711975098, "learning_rate": 1.622322992716026e-05, "loss": 1.6295, "step": 90250 }, { "epoch": 0.5673050105081217, "grad_norm": 6.577693939208984, "learning_rate": 1.6222810826215602e-05, "loss": 1.7602, "step": 90260 }, { "epoch": 0.5673678628248188, "grad_norm": 7.049777507781982, "learning_rate": 1.622239172527095e-05, "loss": 1.4879, "step": 90270 }, { "epoch": 0.5674307151415159, "grad_norm": 6.171783447265625, "learning_rate": 1.6221972624326296e-05, "loss": 1.3778, "step": 90280 }, { "epoch": 0.567493567458213, "grad_norm": 7.835484981536865, "learning_rate": 1.6221553523381643e-05, "loss": 1.6952, "step": 90290 }, { "epoch": 0.5675564197749101, "grad_norm": 6.65958309173584, "learning_rate": 1.622113442243699e-05, "loss": 1.6576, "step": 90300 }, { "epoch": 0.5676192720916072, "grad_norm": 6.794559001922607, "learning_rate": 1.6220715321492334e-05, "loss": 1.6117, "step": 90310 }, { "epoch": 0.5676821244083043, "grad_norm": 7.645313739776611, "learning_rate": 1.622029622054768e-05, "loss": 1.7929, "step": 90320 }, { "epoch": 0.5677449767250015, "grad_norm": 6.500690460205078, "learning_rate": 1.6219877119603028e-05, "loss": 1.5474, "step": 90330 }, { "epoch": 0.5678078290416986, "grad_norm": 6.779569625854492, "learning_rate": 1.6219458018658375e-05, "loss": 1.7564, "step": 90340 }, { "epoch": 0.5678706813583957, "grad_norm": 8.097848892211914, "learning_rate": 1.6219038917713722e-05, "loss": 1.6125, "step": 90350 }, { "epoch": 0.5679335336750928, "grad_norm": 7.679541110992432, "learning_rate": 1.621861981676907e-05, "loss": 1.6187, "step": 90360 }, { "epoch": 0.5679963859917899, "grad_norm": 7.951330661773682, "learning_rate": 1.6218200715824416e-05, "loss": 1.8312, "step": 90370 }, { "epoch": 0.568059238308487, "grad_norm": 6.817192554473877, "learning_rate": 1.621778161487976e-05, "loss": 1.3995, "step": 90380 }, { "epoch": 0.5681220906251842, "grad_norm": 7.9999308586120605, "learning_rate": 1.6217362513935107e-05, "loss": 1.7167, "step": 90390 }, { "epoch": 0.5681849429418813, "grad_norm": 7.261706352233887, "learning_rate": 1.6216943412990454e-05, "loss": 1.7325, "step": 90400 }, { "epoch": 0.5682477952585784, "grad_norm": 6.690458297729492, "learning_rate": 1.62165243120458e-05, "loss": 1.8187, "step": 90410 }, { "epoch": 0.5683106475752755, "grad_norm": 7.295557498931885, "learning_rate": 1.621610521110115e-05, "loss": 1.5807, "step": 90420 }, { "epoch": 0.5683734998919726, "grad_norm": 5.740212917327881, "learning_rate": 1.6215686110156496e-05, "loss": 1.6022, "step": 90430 }, { "epoch": 0.5684363522086697, "grad_norm": 7.598939418792725, "learning_rate": 1.621526700921184e-05, "loss": 1.8893, "step": 90440 }, { "epoch": 0.5684992045253668, "grad_norm": 6.767308235168457, "learning_rate": 1.6214847908267186e-05, "loss": 1.6985, "step": 90450 }, { "epoch": 0.568562056842064, "grad_norm": 7.9719929695129395, "learning_rate": 1.6214428807322533e-05, "loss": 1.6289, "step": 90460 }, { "epoch": 0.5686249091587611, "grad_norm": 6.249841690063477, "learning_rate": 1.621400970637788e-05, "loss": 1.6309, "step": 90470 }, { "epoch": 0.5686877614754582, "grad_norm": 6.916226387023926, "learning_rate": 1.6213590605433227e-05, "loss": 1.4816, "step": 90480 }, { "epoch": 0.5687506137921552, "grad_norm": 7.875985145568848, "learning_rate": 1.621317150448857e-05, "loss": 1.6301, "step": 90490 }, { "epoch": 0.5688134661088523, "grad_norm": 6.240650177001953, "learning_rate": 1.6212752403543918e-05, "loss": 1.8665, "step": 90500 }, { "epoch": 0.5688763184255494, "grad_norm": 7.0537614822387695, "learning_rate": 1.6212333302599265e-05, "loss": 1.8948, "step": 90510 }, { "epoch": 0.5689391707422465, "grad_norm": 7.471006393432617, "learning_rate": 1.6211914201654612e-05, "loss": 1.8681, "step": 90520 }, { "epoch": 0.5690020230589437, "grad_norm": 7.404205322265625, "learning_rate": 1.6211495100709956e-05, "loss": 1.5513, "step": 90530 }, { "epoch": 0.5690648753756408, "grad_norm": 7.2312140464782715, "learning_rate": 1.6211075999765303e-05, "loss": 1.8007, "step": 90540 }, { "epoch": 0.5691277276923379, "grad_norm": 6.615874290466309, "learning_rate": 1.621065689882065e-05, "loss": 1.5656, "step": 90550 }, { "epoch": 0.569190580009035, "grad_norm": 3.9499099254608154, "learning_rate": 1.6210237797875997e-05, "loss": 1.3342, "step": 90560 }, { "epoch": 0.5692534323257321, "grad_norm": 6.043526649475098, "learning_rate": 1.6209818696931344e-05, "loss": 1.6464, "step": 90570 }, { "epoch": 0.5693162846424292, "grad_norm": 7.471343517303467, "learning_rate": 1.620939959598669e-05, "loss": 1.652, "step": 90580 }, { "epoch": 0.5693791369591263, "grad_norm": 6.765534400939941, "learning_rate": 1.620898049504204e-05, "loss": 1.3959, "step": 90590 }, { "epoch": 0.5694419892758235, "grad_norm": 6.237020015716553, "learning_rate": 1.6208561394097386e-05, "loss": 1.6979, "step": 90600 }, { "epoch": 0.5695048415925206, "grad_norm": 7.709216117858887, "learning_rate": 1.6208142293152733e-05, "loss": 1.8288, "step": 90610 }, { "epoch": 0.5695676939092177, "grad_norm": 9.005006790161133, "learning_rate": 1.6207723192208076e-05, "loss": 1.6457, "step": 90620 }, { "epoch": 0.5696305462259148, "grad_norm": 6.673562526702881, "learning_rate": 1.6207304091263423e-05, "loss": 1.7839, "step": 90630 }, { "epoch": 0.5696933985426119, "grad_norm": 6.20949649810791, "learning_rate": 1.620688499031877e-05, "loss": 1.6451, "step": 90640 }, { "epoch": 0.569756250859309, "grad_norm": 7.498359680175781, "learning_rate": 1.6206465889374118e-05, "loss": 1.5566, "step": 90650 }, { "epoch": 0.5698191031760061, "grad_norm": 6.984977722167969, "learning_rate": 1.620604678842946e-05, "loss": 1.7864, "step": 90660 }, { "epoch": 0.5698819554927033, "grad_norm": 7.174541473388672, "learning_rate": 1.6205627687484808e-05, "loss": 1.6211, "step": 90670 }, { "epoch": 0.5699448078094004, "grad_norm": 6.415432453155518, "learning_rate": 1.6205208586540155e-05, "loss": 1.7804, "step": 90680 }, { "epoch": 0.5700076601260975, "grad_norm": 6.4624223709106445, "learning_rate": 1.6204789485595502e-05, "loss": 1.6502, "step": 90690 }, { "epoch": 0.5700705124427946, "grad_norm": 5.442400932312012, "learning_rate": 1.620437038465085e-05, "loss": 1.4261, "step": 90700 }, { "epoch": 0.5701333647594917, "grad_norm": 7.268397331237793, "learning_rate": 1.6203951283706193e-05, "loss": 1.6389, "step": 90710 }, { "epoch": 0.5701962170761888, "grad_norm": 6.997256755828857, "learning_rate": 1.620353218276154e-05, "loss": 1.7754, "step": 90720 }, { "epoch": 0.570259069392886, "grad_norm": 6.781765937805176, "learning_rate": 1.6203113081816887e-05, "loss": 1.7912, "step": 90730 }, { "epoch": 0.5703219217095831, "grad_norm": 6.821094036102295, "learning_rate": 1.6202693980872234e-05, "loss": 1.6553, "step": 90740 }, { "epoch": 0.5703847740262801, "grad_norm": 7.494826316833496, "learning_rate": 1.6202274879927578e-05, "loss": 1.6582, "step": 90750 }, { "epoch": 0.5704476263429772, "grad_norm": 6.2762041091918945, "learning_rate": 1.6201855778982925e-05, "loss": 1.7972, "step": 90760 }, { "epoch": 0.5705104786596743, "grad_norm": 6.95667839050293, "learning_rate": 1.6201436678038272e-05, "loss": 1.7072, "step": 90770 }, { "epoch": 0.5705733309763714, "grad_norm": 7.874874114990234, "learning_rate": 1.620101757709362e-05, "loss": 1.6645, "step": 90780 }, { "epoch": 0.5706361832930685, "grad_norm": 7.321478843688965, "learning_rate": 1.6200598476148966e-05, "loss": 1.6508, "step": 90790 }, { "epoch": 0.5706990356097656, "grad_norm": 5.930144786834717, "learning_rate": 1.6200179375204313e-05, "loss": 1.6602, "step": 90800 }, { "epoch": 0.5707618879264628, "grad_norm": 6.673276901245117, "learning_rate": 1.619976027425966e-05, "loss": 1.5876, "step": 90810 }, { "epoch": 0.5708247402431599, "grad_norm": 6.345154285430908, "learning_rate": 1.6199341173315008e-05, "loss": 1.5658, "step": 90820 }, { "epoch": 0.570887592559857, "grad_norm": 7.179617404937744, "learning_rate": 1.6198922072370355e-05, "loss": 1.6392, "step": 90830 }, { "epoch": 0.5709504448765541, "grad_norm": 7.312381744384766, "learning_rate": 1.6198502971425698e-05, "loss": 1.936, "step": 90840 }, { "epoch": 0.5710132971932512, "grad_norm": 6.575345993041992, "learning_rate": 1.6198083870481045e-05, "loss": 1.5589, "step": 90850 }, { "epoch": 0.5710761495099483, "grad_norm": 6.817177772521973, "learning_rate": 1.6197664769536392e-05, "loss": 1.626, "step": 90860 }, { "epoch": 0.5711390018266455, "grad_norm": 6.790354251861572, "learning_rate": 1.619724566859174e-05, "loss": 1.6123, "step": 90870 }, { "epoch": 0.5712018541433426, "grad_norm": 5.912313461303711, "learning_rate": 1.6196826567647083e-05, "loss": 1.7207, "step": 90880 }, { "epoch": 0.5712647064600397, "grad_norm": 6.774677753448486, "learning_rate": 1.619640746670243e-05, "loss": 1.933, "step": 90890 }, { "epoch": 0.5713275587767368, "grad_norm": 7.587268352508545, "learning_rate": 1.6195988365757777e-05, "loss": 1.7738, "step": 90900 }, { "epoch": 0.5713904110934339, "grad_norm": 6.267664432525635, "learning_rate": 1.6195569264813124e-05, "loss": 1.5228, "step": 90910 }, { "epoch": 0.571453263410131, "grad_norm": 5.794346332550049, "learning_rate": 1.619515016386847e-05, "loss": 1.6022, "step": 90920 }, { "epoch": 0.5715161157268281, "grad_norm": 6.912841796875, "learning_rate": 1.6194731062923815e-05, "loss": 1.7417, "step": 90930 }, { "epoch": 0.5715789680435253, "grad_norm": 7.007205486297607, "learning_rate": 1.6194311961979162e-05, "loss": 1.5764, "step": 90940 }, { "epoch": 0.5716418203602224, "grad_norm": 6.86815071105957, "learning_rate": 1.619389286103451e-05, "loss": 1.8537, "step": 90950 }, { "epoch": 0.5717046726769195, "grad_norm": 5.955638408660889, "learning_rate": 1.6193473760089856e-05, "loss": 1.7025, "step": 90960 }, { "epoch": 0.5717675249936166, "grad_norm": 6.138908386230469, "learning_rate": 1.6193054659145203e-05, "loss": 1.6357, "step": 90970 }, { "epoch": 0.5718303773103137, "grad_norm": 5.944375514984131, "learning_rate": 1.619263555820055e-05, "loss": 1.8174, "step": 90980 }, { "epoch": 0.5718932296270108, "grad_norm": 7.687257289886475, "learning_rate": 1.6192216457255898e-05, "loss": 1.6123, "step": 90990 }, { "epoch": 0.5719560819437078, "grad_norm": 7.10957670211792, "learning_rate": 1.619179735631124e-05, "loss": 1.6329, "step": 91000 }, { "epoch": 0.572018934260405, "grad_norm": 6.9417853355407715, "learning_rate": 1.619137825536659e-05, "loss": 1.5804, "step": 91010 }, { "epoch": 0.5720817865771021, "grad_norm": 6.256638526916504, "learning_rate": 1.6190959154421935e-05, "loss": 1.7376, "step": 91020 }, { "epoch": 0.5721446388937992, "grad_norm": 7.385227203369141, "learning_rate": 1.6190540053477282e-05, "loss": 1.4635, "step": 91030 }, { "epoch": 0.5722074912104963, "grad_norm": 6.41224479675293, "learning_rate": 1.619012095253263e-05, "loss": 1.672, "step": 91040 }, { "epoch": 0.5722703435271934, "grad_norm": 6.543980121612549, "learning_rate": 1.6189701851587977e-05, "loss": 1.6074, "step": 91050 }, { "epoch": 0.5723331958438905, "grad_norm": 6.76102876663208, "learning_rate": 1.618928275064332e-05, "loss": 1.7029, "step": 91060 }, { "epoch": 0.5723960481605876, "grad_norm": 7.033742904663086, "learning_rate": 1.6188863649698667e-05, "loss": 1.783, "step": 91070 }, { "epoch": 0.5724589004772848, "grad_norm": 7.322498321533203, "learning_rate": 1.6188444548754014e-05, "loss": 1.6939, "step": 91080 }, { "epoch": 0.5725217527939819, "grad_norm": 6.82847785949707, "learning_rate": 1.618802544780936e-05, "loss": 1.8255, "step": 91090 }, { "epoch": 0.572584605110679, "grad_norm": 6.518442153930664, "learning_rate": 1.618760634686471e-05, "loss": 1.6425, "step": 91100 }, { "epoch": 0.5726474574273761, "grad_norm": 6.245519638061523, "learning_rate": 1.6187187245920052e-05, "loss": 1.6791, "step": 91110 }, { "epoch": 0.5727103097440732, "grad_norm": 6.8035359382629395, "learning_rate": 1.61867681449754e-05, "loss": 1.5144, "step": 91120 }, { "epoch": 0.5727731620607703, "grad_norm": 6.895239353179932, "learning_rate": 1.6186349044030746e-05, "loss": 1.6901, "step": 91130 }, { "epoch": 0.5728360143774675, "grad_norm": 6.584578514099121, "learning_rate": 1.6185929943086093e-05, "loss": 1.7268, "step": 91140 }, { "epoch": 0.5728988666941646, "grad_norm": 6.822965621948242, "learning_rate": 1.6185510842141437e-05, "loss": 1.8395, "step": 91150 }, { "epoch": 0.5729617190108617, "grad_norm": 8.831889152526855, "learning_rate": 1.6185091741196784e-05, "loss": 1.5425, "step": 91160 }, { "epoch": 0.5730245713275588, "grad_norm": 6.542859077453613, "learning_rate": 1.618467264025213e-05, "loss": 1.743, "step": 91170 }, { "epoch": 0.5730874236442559, "grad_norm": 5.4479594230651855, "learning_rate": 1.618425353930748e-05, "loss": 1.6769, "step": 91180 }, { "epoch": 0.573150275960953, "grad_norm": 5.9766998291015625, "learning_rate": 1.6183834438362825e-05, "loss": 1.651, "step": 91190 }, { "epoch": 0.5732131282776501, "grad_norm": 7.001051425933838, "learning_rate": 1.6183415337418173e-05, "loss": 1.4927, "step": 91200 }, { "epoch": 0.5732759805943473, "grad_norm": 5.923704624176025, "learning_rate": 1.618299623647352e-05, "loss": 1.8324, "step": 91210 }, { "epoch": 0.5733388329110444, "grad_norm": 5.801931381225586, "learning_rate": 1.6182577135528867e-05, "loss": 1.5198, "step": 91220 }, { "epoch": 0.5734016852277415, "grad_norm": 7.863893508911133, "learning_rate": 1.6182158034584214e-05, "loss": 1.9263, "step": 91230 }, { "epoch": 0.5734645375444386, "grad_norm": 5.9496612548828125, "learning_rate": 1.6181738933639557e-05, "loss": 1.7619, "step": 91240 }, { "epoch": 0.5735273898611357, "grad_norm": 5.7954630851745605, "learning_rate": 1.6181319832694904e-05, "loss": 1.4125, "step": 91250 }, { "epoch": 0.5735902421778327, "grad_norm": 7.1357574462890625, "learning_rate": 1.618090073175025e-05, "loss": 1.5966, "step": 91260 }, { "epoch": 0.5736530944945298, "grad_norm": 6.261843681335449, "learning_rate": 1.61804816308056e-05, "loss": 1.817, "step": 91270 }, { "epoch": 0.573715946811227, "grad_norm": 6.486170291900635, "learning_rate": 1.6180062529860942e-05, "loss": 1.6445, "step": 91280 }, { "epoch": 0.5737787991279241, "grad_norm": 7.065108776092529, "learning_rate": 1.617964342891629e-05, "loss": 1.7621, "step": 91290 }, { "epoch": 0.5738416514446212, "grad_norm": 5.309915542602539, "learning_rate": 1.6179224327971636e-05, "loss": 1.6687, "step": 91300 }, { "epoch": 0.5739045037613183, "grad_norm": 6.505954265594482, "learning_rate": 1.6178805227026984e-05, "loss": 1.5949, "step": 91310 }, { "epoch": 0.5739673560780154, "grad_norm": 7.580758094787598, "learning_rate": 1.617838612608233e-05, "loss": 1.6881, "step": 91320 }, { "epoch": 0.5740302083947125, "grad_norm": 7.761985778808594, "learning_rate": 1.6177967025137674e-05, "loss": 1.9408, "step": 91330 }, { "epoch": 0.5740930607114096, "grad_norm": 6.892176151275635, "learning_rate": 1.617754792419302e-05, "loss": 1.7251, "step": 91340 }, { "epoch": 0.5741559130281068, "grad_norm": 7.198729515075684, "learning_rate": 1.617712882324837e-05, "loss": 1.6614, "step": 91350 }, { "epoch": 0.5742187653448039, "grad_norm": 6.735234260559082, "learning_rate": 1.6176709722303715e-05, "loss": 1.8572, "step": 91360 }, { "epoch": 0.574281617661501, "grad_norm": 6.974170207977295, "learning_rate": 1.6176290621359063e-05, "loss": 1.7196, "step": 91370 }, { "epoch": 0.5743444699781981, "grad_norm": 6.183765411376953, "learning_rate": 1.6175871520414406e-05, "loss": 1.6468, "step": 91380 }, { "epoch": 0.5744073222948952, "grad_norm": 6.75325870513916, "learning_rate": 1.6175452419469753e-05, "loss": 1.8088, "step": 91390 }, { "epoch": 0.5744701746115923, "grad_norm": 6.08672571182251, "learning_rate": 1.61750333185251e-05, "loss": 1.6807, "step": 91400 }, { "epoch": 0.5745330269282894, "grad_norm": 8.254101753234863, "learning_rate": 1.6174614217580447e-05, "loss": 1.5649, "step": 91410 }, { "epoch": 0.5745958792449866, "grad_norm": 6.380203723907471, "learning_rate": 1.6174195116635795e-05, "loss": 1.4865, "step": 91420 }, { "epoch": 0.5746587315616837, "grad_norm": 6.785824775695801, "learning_rate": 1.617377601569114e-05, "loss": 1.7694, "step": 91430 }, { "epoch": 0.5747215838783808, "grad_norm": 6.6405558586120605, "learning_rate": 1.617335691474649e-05, "loss": 1.7948, "step": 91440 }, { "epoch": 0.5747844361950779, "grad_norm": 6.775050163269043, "learning_rate": 1.6172937813801836e-05, "loss": 1.6598, "step": 91450 }, { "epoch": 0.574847288511775, "grad_norm": 6.293248653411865, "learning_rate": 1.617251871285718e-05, "loss": 1.7985, "step": 91460 }, { "epoch": 0.5749101408284721, "grad_norm": 6.873047351837158, "learning_rate": 1.6172099611912526e-05, "loss": 1.5276, "step": 91470 }, { "epoch": 0.5749729931451693, "grad_norm": 7.247620105743408, "learning_rate": 1.6171680510967874e-05, "loss": 1.6014, "step": 91480 }, { "epoch": 0.5750358454618664, "grad_norm": 7.733684062957764, "learning_rate": 1.6171303320117685e-05, "loss": 1.615, "step": 91490 }, { "epoch": 0.5750986977785635, "grad_norm": 9.061704635620117, "learning_rate": 1.6170884219173032e-05, "loss": 1.7163, "step": 91500 }, { "epoch": 0.5751615500952605, "grad_norm": 6.1667022705078125, "learning_rate": 1.617046511822838e-05, "loss": 1.5393, "step": 91510 }, { "epoch": 0.5752244024119576, "grad_norm": 6.157415390014648, "learning_rate": 1.6170046017283726e-05, "loss": 1.6257, "step": 91520 }, { "epoch": 0.5752872547286547, "grad_norm": 7.665525913238525, "learning_rate": 1.6169626916339073e-05, "loss": 1.786, "step": 91530 }, { "epoch": 0.5753501070453518, "grad_norm": 5.009360313415527, "learning_rate": 1.6169207815394417e-05, "loss": 1.6624, "step": 91540 }, { "epoch": 0.575412959362049, "grad_norm": 7.433297157287598, "learning_rate": 1.6168788714449764e-05, "loss": 1.887, "step": 91550 }, { "epoch": 0.5754758116787461, "grad_norm": 6.069151878356934, "learning_rate": 1.616836961350511e-05, "loss": 1.6577, "step": 91560 }, { "epoch": 0.5755386639954432, "grad_norm": 5.833019733428955, "learning_rate": 1.6167950512560458e-05, "loss": 1.7991, "step": 91570 }, { "epoch": 0.5756015163121403, "grad_norm": 7.223495006561279, "learning_rate": 1.6167531411615802e-05, "loss": 1.7811, "step": 91580 }, { "epoch": 0.5756643686288374, "grad_norm": 7.341835975646973, "learning_rate": 1.616711231067115e-05, "loss": 1.6356, "step": 91590 }, { "epoch": 0.5757272209455345, "grad_norm": 7.553828239440918, "learning_rate": 1.6166693209726496e-05, "loss": 1.8357, "step": 91600 }, { "epoch": 0.5757900732622316, "grad_norm": 6.463942050933838, "learning_rate": 1.6166274108781843e-05, "loss": 1.5733, "step": 91610 }, { "epoch": 0.5758529255789288, "grad_norm": 7.367928981781006, "learning_rate": 1.616585500783719e-05, "loss": 1.364, "step": 91620 }, { "epoch": 0.5759157778956259, "grad_norm": 6.926959991455078, "learning_rate": 1.6165435906892534e-05, "loss": 1.8664, "step": 91630 }, { "epoch": 0.575978630212323, "grad_norm": 6.353264808654785, "learning_rate": 1.616501680594788e-05, "loss": 1.5886, "step": 91640 }, { "epoch": 0.5760414825290201, "grad_norm": 7.789851188659668, "learning_rate": 1.6164597705003228e-05, "loss": 1.7212, "step": 91650 }, { "epoch": 0.5761043348457172, "grad_norm": 6.7941412925720215, "learning_rate": 1.6164178604058575e-05, "loss": 1.345, "step": 91660 }, { "epoch": 0.5761671871624143, "grad_norm": 6.814610481262207, "learning_rate": 1.6163759503113922e-05, "loss": 1.4953, "step": 91670 }, { "epoch": 0.5762300394791114, "grad_norm": 6.6490092277526855, "learning_rate": 1.616334040216927e-05, "loss": 1.5953, "step": 91680 }, { "epoch": 0.5762928917958086, "grad_norm": 7.200467109680176, "learning_rate": 1.6162921301224613e-05, "loss": 1.7973, "step": 91690 }, { "epoch": 0.5763557441125057, "grad_norm": 7.003600597381592, "learning_rate": 1.616250220027996e-05, "loss": 1.7018, "step": 91700 }, { "epoch": 0.5764185964292028, "grad_norm": 5.625233173370361, "learning_rate": 1.6162083099335307e-05, "loss": 1.6863, "step": 91710 }, { "epoch": 0.5764814487458999, "grad_norm": 6.291018486022949, "learning_rate": 1.6161663998390654e-05, "loss": 1.6589, "step": 91720 }, { "epoch": 0.576544301062597, "grad_norm": 5.673514366149902, "learning_rate": 1.6161244897446e-05, "loss": 1.8417, "step": 91730 }, { "epoch": 0.5766071533792941, "grad_norm": 7.698174476623535, "learning_rate": 1.6160825796501348e-05, "loss": 1.7826, "step": 91740 }, { "epoch": 0.5766700056959913, "grad_norm": 6.972863674163818, "learning_rate": 1.6160406695556695e-05, "loss": 1.6353, "step": 91750 }, { "epoch": 0.5767328580126884, "grad_norm": 6.908972263336182, "learning_rate": 1.615998759461204e-05, "loss": 1.743, "step": 91760 }, { "epoch": 0.5767957103293854, "grad_norm": 5.623591423034668, "learning_rate": 1.6159568493667386e-05, "loss": 1.4776, "step": 91770 }, { "epoch": 0.5768585626460825, "grad_norm": 7.277279853820801, "learning_rate": 1.6159149392722733e-05, "loss": 1.6441, "step": 91780 }, { "epoch": 0.5769214149627796, "grad_norm": 7.286955833435059, "learning_rate": 1.615873029177808e-05, "loss": 1.6584, "step": 91790 }, { "epoch": 0.5769842672794767, "grad_norm": 7.2920732498168945, "learning_rate": 1.6158311190833424e-05, "loss": 1.5623, "step": 91800 }, { "epoch": 0.5770471195961738, "grad_norm": 7.998461723327637, "learning_rate": 1.615789208988877e-05, "loss": 1.83, "step": 91810 }, { "epoch": 0.577109971912871, "grad_norm": 6.427152633666992, "learning_rate": 1.6157514899038586e-05, "loss": 1.6626, "step": 91820 }, { "epoch": 0.5771728242295681, "grad_norm": 6.673247814178467, "learning_rate": 1.615709579809393e-05, "loss": 1.7358, "step": 91830 }, { "epoch": 0.5772356765462652, "grad_norm": 6.672535419464111, "learning_rate": 1.6156676697149276e-05, "loss": 1.7213, "step": 91840 }, { "epoch": 0.5772985288629623, "grad_norm": 5.9849419593811035, "learning_rate": 1.6156257596204623e-05, "loss": 1.7514, "step": 91850 }, { "epoch": 0.5773613811796594, "grad_norm": 8.018648147583008, "learning_rate": 1.615583849525997e-05, "loss": 1.8444, "step": 91860 }, { "epoch": 0.5774242334963565, "grad_norm": 5.908487796783447, "learning_rate": 1.6155419394315318e-05, "loss": 1.5183, "step": 91870 }, { "epoch": 0.5774870858130536, "grad_norm": 7.056883335113525, "learning_rate": 1.615500029337066e-05, "loss": 1.6636, "step": 91880 }, { "epoch": 0.5775499381297508, "grad_norm": 6.9238762855529785, "learning_rate": 1.6154581192426008e-05, "loss": 1.5482, "step": 91890 }, { "epoch": 0.5776127904464479, "grad_norm": 7.2767181396484375, "learning_rate": 1.6154162091481355e-05, "loss": 1.7856, "step": 91900 }, { "epoch": 0.577675642763145, "grad_norm": 6.490273952484131, "learning_rate": 1.6153742990536702e-05, "loss": 1.6382, "step": 91910 }, { "epoch": 0.5777384950798421, "grad_norm": 7.068709373474121, "learning_rate": 1.6153323889592046e-05, "loss": 1.6674, "step": 91920 }, { "epoch": 0.5778013473965392, "grad_norm": 6.099994659423828, "learning_rate": 1.6152904788647393e-05, "loss": 1.8337, "step": 91930 }, { "epoch": 0.5778641997132363, "grad_norm": 6.2828826904296875, "learning_rate": 1.615248568770274e-05, "loss": 1.7471, "step": 91940 }, { "epoch": 0.5779270520299334, "grad_norm": 7.636531352996826, "learning_rate": 1.6152066586758087e-05, "loss": 1.6994, "step": 91950 }, { "epoch": 0.5779899043466306, "grad_norm": 6.466481685638428, "learning_rate": 1.6151647485813434e-05, "loss": 1.5933, "step": 91960 }, { "epoch": 0.5780527566633277, "grad_norm": 8.078164100646973, "learning_rate": 1.615122838486878e-05, "loss": 1.3894, "step": 91970 }, { "epoch": 0.5781156089800248, "grad_norm": 7.178411483764648, "learning_rate": 1.615080928392413e-05, "loss": 1.9705, "step": 91980 }, { "epoch": 0.5781784612967219, "grad_norm": 6.905796051025391, "learning_rate": 1.6150390182979476e-05, "loss": 1.6892, "step": 91990 }, { "epoch": 0.578241313613419, "grad_norm": 7.062839508056641, "learning_rate": 1.6149971082034823e-05, "loss": 1.4788, "step": 92000 }, { "epoch": 0.5783041659301161, "grad_norm": 7.051177501678467, "learning_rate": 1.6149551981090166e-05, "loss": 1.6358, "step": 92010 }, { "epoch": 0.5783670182468131, "grad_norm": 7.292420864105225, "learning_rate": 1.6149132880145513e-05, "loss": 1.6342, "step": 92020 }, { "epoch": 0.5784298705635103, "grad_norm": 6.565810680389404, "learning_rate": 1.614871377920086e-05, "loss": 1.7552, "step": 92030 }, { "epoch": 0.5784927228802074, "grad_norm": 6.324333190917969, "learning_rate": 1.6148294678256208e-05, "loss": 1.7477, "step": 92040 }, { "epoch": 0.5785555751969045, "grad_norm": 6.976558208465576, "learning_rate": 1.6147875577311555e-05, "loss": 1.9807, "step": 92050 }, { "epoch": 0.5786184275136016, "grad_norm": 5.373647689819336, "learning_rate": 1.61474564763669e-05, "loss": 1.8094, "step": 92060 }, { "epoch": 0.5786812798302987, "grad_norm": 6.824400901794434, "learning_rate": 1.6147037375422245e-05, "loss": 1.6915, "step": 92070 }, { "epoch": 0.5787441321469958, "grad_norm": 5.881832122802734, "learning_rate": 1.6146618274477592e-05, "loss": 1.5915, "step": 92080 }, { "epoch": 0.5788069844636929, "grad_norm": 5.642775058746338, "learning_rate": 1.614619917353294e-05, "loss": 1.5, "step": 92090 }, { "epoch": 0.5788698367803901, "grad_norm": 6.9122443199157715, "learning_rate": 1.6145780072588283e-05, "loss": 1.6575, "step": 92100 }, { "epoch": 0.5789326890970872, "grad_norm": 6.7870330810546875, "learning_rate": 1.614536097164363e-05, "loss": 1.7135, "step": 92110 }, { "epoch": 0.5789955414137843, "grad_norm": 6.732898235321045, "learning_rate": 1.6144941870698977e-05, "loss": 1.6904, "step": 92120 }, { "epoch": 0.5790583937304814, "grad_norm": 6.3459672927856445, "learning_rate": 1.6144522769754324e-05, "loss": 1.6486, "step": 92130 }, { "epoch": 0.5791212460471785, "grad_norm": 6.703038215637207, "learning_rate": 1.6144103668809668e-05, "loss": 1.4906, "step": 92140 }, { "epoch": 0.5791840983638756, "grad_norm": 6.147325038909912, "learning_rate": 1.6143684567865015e-05, "loss": 1.7452, "step": 92150 }, { "epoch": 0.5792469506805727, "grad_norm": 6.5492353439331055, "learning_rate": 1.6143265466920362e-05, "loss": 1.6346, "step": 92160 }, { "epoch": 0.5793098029972699, "grad_norm": 6.067101955413818, "learning_rate": 1.614284636597571e-05, "loss": 1.9467, "step": 92170 }, { "epoch": 0.579372655313967, "grad_norm": 6.0446367263793945, "learning_rate": 1.6142427265031056e-05, "loss": 1.6943, "step": 92180 }, { "epoch": 0.5794355076306641, "grad_norm": 7.993139743804932, "learning_rate": 1.6142008164086403e-05, "loss": 1.5548, "step": 92190 }, { "epoch": 0.5794983599473612, "grad_norm": 6.8349609375, "learning_rate": 1.614158906314175e-05, "loss": 1.6541, "step": 92200 }, { "epoch": 0.5795612122640583, "grad_norm": 7.241196155548096, "learning_rate": 1.6141169962197098e-05, "loss": 1.6765, "step": 92210 }, { "epoch": 0.5796240645807554, "grad_norm": 6.830817222595215, "learning_rate": 1.6140750861252445e-05, "loss": 1.4719, "step": 92220 }, { "epoch": 0.5796869168974526, "grad_norm": 6.541594505310059, "learning_rate": 1.614033176030779e-05, "loss": 1.7481, "step": 92230 }, { "epoch": 0.5797497692141497, "grad_norm": 7.100144386291504, "learning_rate": 1.6139912659363135e-05, "loss": 1.6503, "step": 92240 }, { "epoch": 0.5798126215308468, "grad_norm": 5.921083927154541, "learning_rate": 1.6139493558418482e-05, "loss": 1.7248, "step": 92250 }, { "epoch": 0.5798754738475439, "grad_norm": 7.404361724853516, "learning_rate": 1.613907445747383e-05, "loss": 1.8192, "step": 92260 }, { "epoch": 0.579938326164241, "grad_norm": 6.89762020111084, "learning_rate": 1.6138655356529177e-05, "loss": 1.7486, "step": 92270 }, { "epoch": 0.580001178480938, "grad_norm": 6.422845363616943, "learning_rate": 1.613823625558452e-05, "loss": 1.5891, "step": 92280 }, { "epoch": 0.5800640307976351, "grad_norm": 6.47426176071167, "learning_rate": 1.6137817154639867e-05, "loss": 1.6937, "step": 92290 }, { "epoch": 0.5801268831143322, "grad_norm": 6.403764724731445, "learning_rate": 1.6137398053695214e-05, "loss": 1.3126, "step": 92300 }, { "epoch": 0.5801897354310294, "grad_norm": 7.819459438323975, "learning_rate": 1.613697895275056e-05, "loss": 1.6318, "step": 92310 }, { "epoch": 0.5802525877477265, "grad_norm": 6.455129623413086, "learning_rate": 1.6136559851805905e-05, "loss": 1.636, "step": 92320 }, { "epoch": 0.5803154400644236, "grad_norm": 7.5750346183776855, "learning_rate": 1.6136140750861252e-05, "loss": 1.7812, "step": 92330 }, { "epoch": 0.5803782923811207, "grad_norm": 6.966227054595947, "learning_rate": 1.61357216499166e-05, "loss": 1.8077, "step": 92340 }, { "epoch": 0.5804411446978178, "grad_norm": 5.989964485168457, "learning_rate": 1.6135302548971946e-05, "loss": 1.5901, "step": 92350 }, { "epoch": 0.5805039970145149, "grad_norm": 6.2141313552856445, "learning_rate": 1.6134883448027293e-05, "loss": 1.7269, "step": 92360 }, { "epoch": 0.580566849331212, "grad_norm": 7.187416076660156, "learning_rate": 1.613446434708264e-05, "loss": 1.5185, "step": 92370 }, { "epoch": 0.5806297016479092, "grad_norm": 7.019697666168213, "learning_rate": 1.6134045246137988e-05, "loss": 1.609, "step": 92380 }, { "epoch": 0.5806925539646063, "grad_norm": 5.8373918533325195, "learning_rate": 1.613362614519333e-05, "loss": 1.5661, "step": 92390 }, { "epoch": 0.5807554062813034, "grad_norm": 6.427718162536621, "learning_rate": 1.613320704424868e-05, "loss": 1.5865, "step": 92400 }, { "epoch": 0.5808182585980005, "grad_norm": 6.802637577056885, "learning_rate": 1.6132787943304025e-05, "loss": 1.6558, "step": 92410 }, { "epoch": 0.5808811109146976, "grad_norm": 8.224874496459961, "learning_rate": 1.6132368842359373e-05, "loss": 1.6988, "step": 92420 }, { "epoch": 0.5809439632313947, "grad_norm": 6.821275234222412, "learning_rate": 1.613194974141472e-05, "loss": 1.8426, "step": 92430 }, { "epoch": 0.5810068155480919, "grad_norm": 7.150043487548828, "learning_rate": 1.6131530640470067e-05, "loss": 1.7512, "step": 92440 }, { "epoch": 0.581069667864789, "grad_norm": 6.87367057800293, "learning_rate": 1.613111153952541e-05, "loss": 1.6349, "step": 92450 }, { "epoch": 0.5811325201814861, "grad_norm": 7.385997295379639, "learning_rate": 1.6130692438580757e-05, "loss": 1.6678, "step": 92460 }, { "epoch": 0.5811953724981832, "grad_norm": 7.349902153015137, "learning_rate": 1.6130273337636104e-05, "loss": 1.6892, "step": 92470 }, { "epoch": 0.5812582248148803, "grad_norm": 5.5534234046936035, "learning_rate": 1.612985423669145e-05, "loss": 1.7053, "step": 92480 }, { "epoch": 0.5813210771315774, "grad_norm": 7.304937839508057, "learning_rate": 1.61294351357468e-05, "loss": 1.7921, "step": 92490 }, { "epoch": 0.5813839294482746, "grad_norm": 5.410800457000732, "learning_rate": 1.6129016034802142e-05, "loss": 1.5984, "step": 92500 }, { "epoch": 0.5814467817649717, "grad_norm": 6.12760066986084, "learning_rate": 1.612859693385749e-05, "loss": 1.6574, "step": 92510 }, { "epoch": 0.5815096340816688, "grad_norm": 7.921392440795898, "learning_rate": 1.6128177832912836e-05, "loss": 1.8178, "step": 92520 }, { "epoch": 0.5815724863983658, "grad_norm": 6.937547206878662, "learning_rate": 1.6127758731968184e-05, "loss": 1.7, "step": 92530 }, { "epoch": 0.5816353387150629, "grad_norm": 6.305153846740723, "learning_rate": 1.6127339631023527e-05, "loss": 1.8536, "step": 92540 }, { "epoch": 0.58169819103176, "grad_norm": 6.381948471069336, "learning_rate": 1.6126920530078874e-05, "loss": 1.6026, "step": 92550 }, { "epoch": 0.5817610433484571, "grad_norm": 6.949012279510498, "learning_rate": 1.612650142913422e-05, "loss": 1.7281, "step": 92560 }, { "epoch": 0.5818238956651542, "grad_norm": 6.717901706695557, "learning_rate": 1.612608232818957e-05, "loss": 1.7287, "step": 92570 }, { "epoch": 0.5818867479818514, "grad_norm": 6.39068603515625, "learning_rate": 1.6125663227244915e-05, "loss": 1.6692, "step": 92580 }, { "epoch": 0.5819496002985485, "grad_norm": 7.127432346343994, "learning_rate": 1.6125244126300263e-05, "loss": 1.676, "step": 92590 }, { "epoch": 0.5820124526152456, "grad_norm": 6.248184680938721, "learning_rate": 1.612482502535561e-05, "loss": 1.5538, "step": 92600 }, { "epoch": 0.5820753049319427, "grad_norm": 6.269472599029541, "learning_rate": 1.6124405924410957e-05, "loss": 1.6206, "step": 92610 }, { "epoch": 0.5821381572486398, "grad_norm": 6.745823860168457, "learning_rate": 1.6123986823466304e-05, "loss": 1.6224, "step": 92620 }, { "epoch": 0.5822010095653369, "grad_norm": 6.95557975769043, "learning_rate": 1.6123567722521647e-05, "loss": 1.7648, "step": 92630 }, { "epoch": 0.582263861882034, "grad_norm": 5.934445381164551, "learning_rate": 1.6123148621576995e-05, "loss": 1.3874, "step": 92640 }, { "epoch": 0.5823267141987312, "grad_norm": 6.739503860473633, "learning_rate": 1.612272952063234e-05, "loss": 1.6298, "step": 92650 }, { "epoch": 0.5823895665154283, "grad_norm": 5.960019588470459, "learning_rate": 1.612231041968769e-05, "loss": 1.7667, "step": 92660 }, { "epoch": 0.5824524188321254, "grad_norm": 5.629377365112305, "learning_rate": 1.6121891318743036e-05, "loss": 1.6309, "step": 92670 }, { "epoch": 0.5825152711488225, "grad_norm": 6.700377464294434, "learning_rate": 1.612147221779838e-05, "loss": 1.6084, "step": 92680 }, { "epoch": 0.5825781234655196, "grad_norm": 7.131255626678467, "learning_rate": 1.6121053116853726e-05, "loss": 1.5905, "step": 92690 }, { "epoch": 0.5826409757822167, "grad_norm": 6.773268222808838, "learning_rate": 1.6120634015909074e-05, "loss": 1.6531, "step": 92700 }, { "epoch": 0.5827038280989139, "grad_norm": 7.491827964782715, "learning_rate": 1.612021491496442e-05, "loss": 1.5632, "step": 92710 }, { "epoch": 0.582766680415611, "grad_norm": 6.638195037841797, "learning_rate": 1.6119795814019764e-05, "loss": 1.6553, "step": 92720 }, { "epoch": 0.5828295327323081, "grad_norm": 6.352215766906738, "learning_rate": 1.611937671307511e-05, "loss": 1.6827, "step": 92730 }, { "epoch": 0.5828923850490052, "grad_norm": 6.999631881713867, "learning_rate": 1.611895761213046e-05, "loss": 1.8146, "step": 92740 }, { "epoch": 0.5829552373657023, "grad_norm": 6.245862007141113, "learning_rate": 1.6118538511185806e-05, "loss": 1.5633, "step": 92750 }, { "epoch": 0.5830180896823994, "grad_norm": 6.060407638549805, "learning_rate": 1.611811941024115e-05, "loss": 1.5717, "step": 92760 }, { "epoch": 0.5830809419990965, "grad_norm": 6.836421966552734, "learning_rate": 1.6117700309296496e-05, "loss": 1.7283, "step": 92770 }, { "epoch": 0.5831437943157937, "grad_norm": 8.111465454101562, "learning_rate": 1.6117281208351843e-05, "loss": 1.886, "step": 92780 }, { "epoch": 0.5832066466324907, "grad_norm": 6.941701412200928, "learning_rate": 1.611686210740719e-05, "loss": 1.7525, "step": 92790 }, { "epoch": 0.5832694989491878, "grad_norm": 7.790984153747559, "learning_rate": 1.6116443006462537e-05, "loss": 2.0233, "step": 92800 }, { "epoch": 0.5833323512658849, "grad_norm": 6.319355487823486, "learning_rate": 1.6116023905517885e-05, "loss": 1.7986, "step": 92810 }, { "epoch": 0.583395203582582, "grad_norm": 6.105554580688477, "learning_rate": 1.611560480457323e-05, "loss": 1.6814, "step": 92820 }, { "epoch": 0.5834580558992791, "grad_norm": 6.465337753295898, "learning_rate": 1.611518570362858e-05, "loss": 1.7728, "step": 92830 }, { "epoch": 0.5835209082159762, "grad_norm": 7.23231840133667, "learning_rate": 1.6114766602683926e-05, "loss": 1.673, "step": 92840 }, { "epoch": 0.5835837605326734, "grad_norm": 6.262798309326172, "learning_rate": 1.611434750173927e-05, "loss": 1.5981, "step": 92850 }, { "epoch": 0.5836466128493705, "grad_norm": 6.214344024658203, "learning_rate": 1.6113928400794617e-05, "loss": 1.7211, "step": 92860 }, { "epoch": 0.5837094651660676, "grad_norm": 7.631073474884033, "learning_rate": 1.6113509299849964e-05, "loss": 1.6097, "step": 92870 }, { "epoch": 0.5837723174827647, "grad_norm": 6.744692802429199, "learning_rate": 1.611309019890531e-05, "loss": 1.6152, "step": 92880 }, { "epoch": 0.5838351697994618, "grad_norm": 6.149967670440674, "learning_rate": 1.6112671097960658e-05, "loss": 1.6721, "step": 92890 }, { "epoch": 0.5838980221161589, "grad_norm": 7.210057735443115, "learning_rate": 1.6112251997016e-05, "loss": 1.8445, "step": 92900 }, { "epoch": 0.583960874432856, "grad_norm": 7.1479387283325195, "learning_rate": 1.611183289607135e-05, "loss": 1.5608, "step": 92910 }, { "epoch": 0.5840237267495532, "grad_norm": 5.437380790710449, "learning_rate": 1.6111413795126696e-05, "loss": 1.6195, "step": 92920 }, { "epoch": 0.5840865790662503, "grad_norm": 6.878098011016846, "learning_rate": 1.6110994694182043e-05, "loss": 1.7123, "step": 92930 }, { "epoch": 0.5841494313829474, "grad_norm": 7.800525665283203, "learning_rate": 1.6110575593237386e-05, "loss": 1.7621, "step": 92940 }, { "epoch": 0.5842122836996445, "grad_norm": 6.723679065704346, "learning_rate": 1.6110156492292733e-05, "loss": 1.5929, "step": 92950 }, { "epoch": 0.5842751360163416, "grad_norm": 7.580997943878174, "learning_rate": 1.610973739134808e-05, "loss": 1.6705, "step": 92960 }, { "epoch": 0.5843379883330387, "grad_norm": 6.25993537902832, "learning_rate": 1.6109318290403428e-05, "loss": 1.5129, "step": 92970 }, { "epoch": 0.5844008406497359, "grad_norm": 8.702542304992676, "learning_rate": 1.6108899189458775e-05, "loss": 1.8442, "step": 92980 }, { "epoch": 0.584463692966433, "grad_norm": 6.514962196350098, "learning_rate": 1.610848008851412e-05, "loss": 1.6069, "step": 92990 }, { "epoch": 0.5845265452831301, "grad_norm": 5.985245704650879, "learning_rate": 1.610806098756947e-05, "loss": 1.6856, "step": 93000 }, { "epoch": 0.5845893975998272, "grad_norm": 7.467013835906982, "learning_rate": 1.6107641886624812e-05, "loss": 1.7394, "step": 93010 }, { "epoch": 0.5846522499165243, "grad_norm": 6.320962429046631, "learning_rate": 1.610722278568016e-05, "loss": 1.6715, "step": 93020 }, { "epoch": 0.5847151022332214, "grad_norm": 6.738753795623779, "learning_rate": 1.6106803684735507e-05, "loss": 1.7898, "step": 93030 }, { "epoch": 0.5847779545499185, "grad_norm": 6.880959510803223, "learning_rate": 1.6106384583790854e-05, "loss": 1.6081, "step": 93040 }, { "epoch": 0.5848408068666155, "grad_norm": 6.13414192199707, "learning_rate": 1.61059654828462e-05, "loss": 1.7875, "step": 93050 }, { "epoch": 0.5849036591833127, "grad_norm": 6.073904514312744, "learning_rate": 1.6105546381901548e-05, "loss": 1.6608, "step": 93060 }, { "epoch": 0.5849665115000098, "grad_norm": 5.76240348815918, "learning_rate": 1.610512728095689e-05, "loss": 1.7652, "step": 93070 }, { "epoch": 0.5850293638167069, "grad_norm": 5.705706596374512, "learning_rate": 1.610470818001224e-05, "loss": 1.5047, "step": 93080 }, { "epoch": 0.585092216133404, "grad_norm": 7.057455062866211, "learning_rate": 1.6104289079067586e-05, "loss": 1.6391, "step": 93090 }, { "epoch": 0.5851550684501011, "grad_norm": 5.5291748046875, "learning_rate": 1.6103869978122933e-05, "loss": 1.3893, "step": 93100 }, { "epoch": 0.5852179207667982, "grad_norm": 7.446898460388184, "learning_rate": 1.610345087717828e-05, "loss": 1.4326, "step": 93110 }, { "epoch": 0.5852807730834954, "grad_norm": 6.40448522567749, "learning_rate": 1.6103031776233623e-05, "loss": 1.5855, "step": 93120 }, { "epoch": 0.5853436254001925, "grad_norm": 7.22370719909668, "learning_rate": 1.610261267528897e-05, "loss": 1.5817, "step": 93130 }, { "epoch": 0.5854064777168896, "grad_norm": 5.670430660247803, "learning_rate": 1.6102193574344318e-05, "loss": 1.6276, "step": 93140 }, { "epoch": 0.5854693300335867, "grad_norm": 7.9834089279174805, "learning_rate": 1.6101774473399665e-05, "loss": 1.6822, "step": 93150 }, { "epoch": 0.5855321823502838, "grad_norm": 6.9317827224731445, "learning_rate": 1.610135537245501e-05, "loss": 1.8368, "step": 93160 }, { "epoch": 0.5855950346669809, "grad_norm": 7.470687389373779, "learning_rate": 1.6100936271510355e-05, "loss": 1.4557, "step": 93170 }, { "epoch": 0.585657886983678, "grad_norm": 8.429841995239258, "learning_rate": 1.6100517170565702e-05, "loss": 1.7588, "step": 93180 }, { "epoch": 0.5857207393003752, "grad_norm": 6.873202323913574, "learning_rate": 1.610009806962105e-05, "loss": 1.6446, "step": 93190 }, { "epoch": 0.5857835916170723, "grad_norm": 8.124137878417969, "learning_rate": 1.6099678968676397e-05, "loss": 1.7304, "step": 93200 }, { "epoch": 0.5858464439337694, "grad_norm": 5.538909435272217, "learning_rate": 1.6099259867731744e-05, "loss": 1.7024, "step": 93210 }, { "epoch": 0.5859092962504665, "grad_norm": 6.4878034591674805, "learning_rate": 1.609884076678709e-05, "loss": 1.7013, "step": 93220 }, { "epoch": 0.5859721485671636, "grad_norm": 8.979625701904297, "learning_rate": 1.6098421665842438e-05, "loss": 1.5524, "step": 93230 }, { "epoch": 0.5860350008838607, "grad_norm": 7.3275861740112305, "learning_rate": 1.6098002564897785e-05, "loss": 1.687, "step": 93240 }, { "epoch": 0.5860978532005579, "grad_norm": 7.95927619934082, "learning_rate": 1.609758346395313e-05, "loss": 1.6542, "step": 93250 }, { "epoch": 0.586160705517255, "grad_norm": 7.133495807647705, "learning_rate": 1.6097164363008476e-05, "loss": 1.6232, "step": 93260 }, { "epoch": 0.5862235578339521, "grad_norm": 6.226972579956055, "learning_rate": 1.6096745262063823e-05, "loss": 1.5965, "step": 93270 }, { "epoch": 0.5862864101506492, "grad_norm": 8.452329635620117, "learning_rate": 1.609632616111917e-05, "loss": 1.8258, "step": 93280 }, { "epoch": 0.5863492624673463, "grad_norm": 7.127265930175781, "learning_rate": 1.6095907060174517e-05, "loss": 1.6476, "step": 93290 }, { "epoch": 0.5864121147840433, "grad_norm": 7.24031400680542, "learning_rate": 1.609548795922986e-05, "loss": 1.8477, "step": 93300 }, { "epoch": 0.5864749671007404, "grad_norm": 6.776553630828857, "learning_rate": 1.6095068858285208e-05, "loss": 1.5119, "step": 93310 }, { "epoch": 0.5865378194174375, "grad_norm": 7.938908100128174, "learning_rate": 1.6094649757340555e-05, "loss": 1.7587, "step": 93320 }, { "epoch": 0.5866006717341347, "grad_norm": 6.02783727645874, "learning_rate": 1.6094230656395902e-05, "loss": 1.6484, "step": 93330 }, { "epoch": 0.5866635240508318, "grad_norm": 5.759817600250244, "learning_rate": 1.6093811555451245e-05, "loss": 1.5388, "step": 93340 }, { "epoch": 0.5867263763675289, "grad_norm": 6.911100387573242, "learning_rate": 1.6093392454506593e-05, "loss": 1.5686, "step": 93350 }, { "epoch": 0.586789228684226, "grad_norm": 6.46895170211792, "learning_rate": 1.609297335356194e-05, "loss": 1.6428, "step": 93360 }, { "epoch": 0.5868520810009231, "grad_norm": 7.600230693817139, "learning_rate": 1.6092554252617287e-05, "loss": 1.5964, "step": 93370 }, { "epoch": 0.5869149333176202, "grad_norm": 6.355671405792236, "learning_rate": 1.6092135151672634e-05, "loss": 1.5837, "step": 93380 }, { "epoch": 0.5869777856343174, "grad_norm": 7.148404598236084, "learning_rate": 1.6091716050727977e-05, "loss": 1.7842, "step": 93390 }, { "epoch": 0.5870406379510145, "grad_norm": 6.771876335144043, "learning_rate": 1.6091296949783324e-05, "loss": 1.488, "step": 93400 }, { "epoch": 0.5871034902677116, "grad_norm": 8.27407169342041, "learning_rate": 1.609087784883867e-05, "loss": 1.4837, "step": 93410 }, { "epoch": 0.5871663425844087, "grad_norm": 5.6078972816467285, "learning_rate": 1.609045874789402e-05, "loss": 1.6057, "step": 93420 }, { "epoch": 0.5872291949011058, "grad_norm": 7.58414888381958, "learning_rate": 1.6090039646949366e-05, "loss": 1.5732, "step": 93430 }, { "epoch": 0.5872920472178029, "grad_norm": 6.562372207641602, "learning_rate": 1.6089620546004713e-05, "loss": 1.8322, "step": 93440 }, { "epoch": 0.5873548995345, "grad_norm": 6.939548969268799, "learning_rate": 1.608920144506006e-05, "loss": 1.4416, "step": 93450 }, { "epoch": 0.5874177518511972, "grad_norm": 6.7253570556640625, "learning_rate": 1.6088782344115407e-05, "loss": 1.6307, "step": 93460 }, { "epoch": 0.5874806041678943, "grad_norm": 6.343280792236328, "learning_rate": 1.608836324317075e-05, "loss": 1.7455, "step": 93470 }, { "epoch": 0.5875434564845914, "grad_norm": 6.016113758087158, "learning_rate": 1.6087944142226098e-05, "loss": 1.6355, "step": 93480 }, { "epoch": 0.5876063088012885, "grad_norm": 7.350913047790527, "learning_rate": 1.6087525041281445e-05, "loss": 1.5951, "step": 93490 }, { "epoch": 0.5876691611179856, "grad_norm": 6.3639092445373535, "learning_rate": 1.6087105940336792e-05, "loss": 1.6209, "step": 93500 }, { "epoch": 0.5877320134346827, "grad_norm": 6.527315616607666, "learning_rate": 1.608668683939214e-05, "loss": 1.6205, "step": 93510 }, { "epoch": 0.5877948657513798, "grad_norm": 6.714054584503174, "learning_rate": 1.6086267738447483e-05, "loss": 1.5192, "step": 93520 }, { "epoch": 0.587857718068077, "grad_norm": 6.581670761108398, "learning_rate": 1.608584863750283e-05, "loss": 1.7955, "step": 93530 }, { "epoch": 0.5879205703847741, "grad_norm": 7.485185623168945, "learning_rate": 1.6085429536558177e-05, "loss": 1.6857, "step": 93540 }, { "epoch": 0.5879834227014712, "grad_norm": 6.0929765701293945, "learning_rate": 1.6085010435613524e-05, "loss": 1.4265, "step": 93550 }, { "epoch": 0.5880462750181682, "grad_norm": 6.712284564971924, "learning_rate": 1.6084591334668867e-05, "loss": 1.8785, "step": 93560 }, { "epoch": 0.5881091273348653, "grad_norm": 8.242375373840332, "learning_rate": 1.6084172233724215e-05, "loss": 1.7111, "step": 93570 }, { "epoch": 0.5881719796515624, "grad_norm": 7.200745582580566, "learning_rate": 1.608375313277956e-05, "loss": 1.6196, "step": 93580 }, { "epoch": 0.5882348319682595, "grad_norm": 6.68762731552124, "learning_rate": 1.608333403183491e-05, "loss": 1.3562, "step": 93590 }, { "epoch": 0.5882976842849567, "grad_norm": 6.237014293670654, "learning_rate": 1.6082914930890256e-05, "loss": 1.5764, "step": 93600 }, { "epoch": 0.5883605366016538, "grad_norm": 7.350156307220459, "learning_rate": 1.6082495829945603e-05, "loss": 1.4858, "step": 93610 }, { "epoch": 0.5884233889183509, "grad_norm": 6.49483585357666, "learning_rate": 1.608207672900095e-05, "loss": 1.4508, "step": 93620 }, { "epoch": 0.588486241235048, "grad_norm": 6.597403049468994, "learning_rate": 1.6081657628056297e-05, "loss": 1.7434, "step": 93630 }, { "epoch": 0.5885490935517451, "grad_norm": 6.692638397216797, "learning_rate": 1.608123852711164e-05, "loss": 1.728, "step": 93640 }, { "epoch": 0.5886119458684422, "grad_norm": 6.858451843261719, "learning_rate": 1.6080819426166988e-05, "loss": 1.5272, "step": 93650 }, { "epoch": 0.5886747981851393, "grad_norm": 7.041205406188965, "learning_rate": 1.6080400325222335e-05, "loss": 1.5382, "step": 93660 }, { "epoch": 0.5887376505018365, "grad_norm": 5.92672872543335, "learning_rate": 1.6079981224277682e-05, "loss": 1.6031, "step": 93670 }, { "epoch": 0.5888005028185336, "grad_norm": 6.83441162109375, "learning_rate": 1.607956212333303e-05, "loss": 1.7608, "step": 93680 }, { "epoch": 0.5888633551352307, "grad_norm": 6.780746936798096, "learning_rate": 1.6079143022388376e-05, "loss": 1.6298, "step": 93690 }, { "epoch": 0.5889262074519278, "grad_norm": 6.454309940338135, "learning_rate": 1.607872392144372e-05, "loss": 1.6783, "step": 93700 }, { "epoch": 0.5889890597686249, "grad_norm": 7.5611371994018555, "learning_rate": 1.6078304820499067e-05, "loss": 1.5631, "step": 93710 }, { "epoch": 0.589051912085322, "grad_norm": 6.402606964111328, "learning_rate": 1.6077885719554414e-05, "loss": 1.6421, "step": 93720 }, { "epoch": 0.5891147644020192, "grad_norm": 5.505886554718018, "learning_rate": 1.607746661860976e-05, "loss": 1.782, "step": 93730 }, { "epoch": 0.5891776167187163, "grad_norm": 6.116056442260742, "learning_rate": 1.6077047517665105e-05, "loss": 1.7706, "step": 93740 }, { "epoch": 0.5892404690354134, "grad_norm": 7.178369045257568, "learning_rate": 1.607662841672045e-05, "loss": 1.4497, "step": 93750 }, { "epoch": 0.5893033213521105, "grad_norm": 5.941424369812012, "learning_rate": 1.60762093157758e-05, "loss": 1.7454, "step": 93760 }, { "epoch": 0.5893661736688076, "grad_norm": 21.873794555664062, "learning_rate": 1.6075790214831146e-05, "loss": 1.5727, "step": 93770 }, { "epoch": 0.5894290259855047, "grad_norm": 6.942519187927246, "learning_rate": 1.607537111388649e-05, "loss": 1.776, "step": 93780 }, { "epoch": 0.5894918783022018, "grad_norm": 6.549501419067383, "learning_rate": 1.6074952012941837e-05, "loss": 1.5357, "step": 93790 }, { "epoch": 0.589554730618899, "grad_norm": 5.837566375732422, "learning_rate": 1.6074532911997184e-05, "loss": 1.6878, "step": 93800 }, { "epoch": 0.589617582935596, "grad_norm": 7.793712139129639, "learning_rate": 1.607411381105253e-05, "loss": 1.6156, "step": 93810 }, { "epoch": 0.5896804352522931, "grad_norm": 5.924585819244385, "learning_rate": 1.6073694710107878e-05, "loss": 1.5394, "step": 93820 }, { "epoch": 0.5897432875689902, "grad_norm": 5.860642433166504, "learning_rate": 1.6073275609163225e-05, "loss": 1.7339, "step": 93830 }, { "epoch": 0.5898061398856873, "grad_norm": 7.303297519683838, "learning_rate": 1.6072856508218572e-05, "loss": 1.5512, "step": 93840 }, { "epoch": 0.5898689922023844, "grad_norm": 6.974435806274414, "learning_rate": 1.607243740727392e-05, "loss": 1.6649, "step": 93850 }, { "epoch": 0.5899318445190815, "grad_norm": 6.702293395996094, "learning_rate": 1.6072018306329266e-05, "loss": 1.6807, "step": 93860 }, { "epoch": 0.5899946968357787, "grad_norm": 6.116212844848633, "learning_rate": 1.607159920538461e-05, "loss": 1.5708, "step": 93870 }, { "epoch": 0.5900575491524758, "grad_norm": 6.604289531707764, "learning_rate": 1.6071180104439957e-05, "loss": 1.626, "step": 93880 }, { "epoch": 0.5901204014691729, "grad_norm": 6.858876705169678, "learning_rate": 1.6070761003495304e-05, "loss": 1.5147, "step": 93890 }, { "epoch": 0.59018325378587, "grad_norm": 5.5290446281433105, "learning_rate": 1.607034190255065e-05, "loss": 1.7127, "step": 93900 }, { "epoch": 0.5902461061025671, "grad_norm": 5.859738349914551, "learning_rate": 1.6069922801605998e-05, "loss": 1.4923, "step": 93910 }, { "epoch": 0.5903089584192642, "grad_norm": 6.121007442474365, "learning_rate": 1.606950370066134e-05, "loss": 1.4182, "step": 93920 }, { "epoch": 0.5903718107359613, "grad_norm": 5.847887992858887, "learning_rate": 1.606908459971669e-05, "loss": 1.6474, "step": 93930 }, { "epoch": 0.5904346630526585, "grad_norm": 6.725071907043457, "learning_rate": 1.6068665498772036e-05, "loss": 1.7763, "step": 93940 }, { "epoch": 0.5904975153693556, "grad_norm": 6.250272750854492, "learning_rate": 1.6068246397827383e-05, "loss": 1.612, "step": 93950 }, { "epoch": 0.5905603676860527, "grad_norm": 6.042320251464844, "learning_rate": 1.6067827296882727e-05, "loss": 1.7588, "step": 93960 }, { "epoch": 0.5906232200027498, "grad_norm": 6.307615756988525, "learning_rate": 1.6067408195938074e-05, "loss": 1.843, "step": 93970 }, { "epoch": 0.5906860723194469, "grad_norm": 7.286334991455078, "learning_rate": 1.606698909499342e-05, "loss": 1.7702, "step": 93980 }, { "epoch": 0.590748924636144, "grad_norm": 7.581210136413574, "learning_rate": 1.6066569994048768e-05, "loss": 1.6187, "step": 93990 }, { "epoch": 0.5908117769528412, "grad_norm": 6.826915740966797, "learning_rate": 1.6066150893104115e-05, "loss": 1.7507, "step": 94000 }, { "epoch": 0.5908746292695383, "grad_norm": 6.500402450561523, "learning_rate": 1.6065731792159462e-05, "loss": 1.7423, "step": 94010 }, { "epoch": 0.5909374815862354, "grad_norm": 6.2059197425842285, "learning_rate": 1.6065312691214806e-05, "loss": 1.6294, "step": 94020 }, { "epoch": 0.5910003339029325, "grad_norm": 7.754387855529785, "learning_rate": 1.6064893590270153e-05, "loss": 2.0808, "step": 94030 }, { "epoch": 0.5910631862196296, "grad_norm": 6.315862655639648, "learning_rate": 1.60644744893255e-05, "loss": 1.6505, "step": 94040 }, { "epoch": 0.5911260385363267, "grad_norm": 5.965827465057373, "learning_rate": 1.6064055388380847e-05, "loss": 1.8246, "step": 94050 }, { "epoch": 0.5911888908530238, "grad_norm": 6.464879035949707, "learning_rate": 1.6063636287436194e-05, "loss": 1.8034, "step": 94060 }, { "epoch": 0.5912517431697208, "grad_norm": 5.9997172355651855, "learning_rate": 1.606321718649154e-05, "loss": 1.682, "step": 94070 }, { "epoch": 0.591314595486418, "grad_norm": 5.9515557289123535, "learning_rate": 1.6062798085546888e-05, "loss": 1.5475, "step": 94080 }, { "epoch": 0.5913774478031151, "grad_norm": 6.149786949157715, "learning_rate": 1.606237898460223e-05, "loss": 1.6599, "step": 94090 }, { "epoch": 0.5914403001198122, "grad_norm": 7.075897693634033, "learning_rate": 1.606195988365758e-05, "loss": 1.8625, "step": 94100 }, { "epoch": 0.5915031524365093, "grad_norm": 5.6168365478515625, "learning_rate": 1.6061540782712926e-05, "loss": 1.5913, "step": 94110 }, { "epoch": 0.5915660047532064, "grad_norm": 7.652287006378174, "learning_rate": 1.6061121681768273e-05, "loss": 1.8451, "step": 94120 }, { "epoch": 0.5916288570699035, "grad_norm": 5.249066352844238, "learning_rate": 1.606070258082362e-05, "loss": 1.7546, "step": 94130 }, { "epoch": 0.5916917093866007, "grad_norm": 7.445679187774658, "learning_rate": 1.6060283479878964e-05, "loss": 1.7411, "step": 94140 }, { "epoch": 0.5917545617032978, "grad_norm": 7.669356822967529, "learning_rate": 1.605986437893431e-05, "loss": 1.7584, "step": 94150 }, { "epoch": 0.5918174140199949, "grad_norm": 6.608617305755615, "learning_rate": 1.6059445277989658e-05, "loss": 1.7146, "step": 94160 }, { "epoch": 0.591880266336692, "grad_norm": 8.506745338439941, "learning_rate": 1.6059026177045005e-05, "loss": 1.6163, "step": 94170 }, { "epoch": 0.5919431186533891, "grad_norm": 6.032199382781982, "learning_rate": 1.605860707610035e-05, "loss": 1.6001, "step": 94180 }, { "epoch": 0.5920059709700862, "grad_norm": 7.232632637023926, "learning_rate": 1.6058187975155696e-05, "loss": 1.7259, "step": 94190 }, { "epoch": 0.5920688232867833, "grad_norm": 6.042525768280029, "learning_rate": 1.6057768874211043e-05, "loss": 1.6663, "step": 94200 }, { "epoch": 0.5921316756034805, "grad_norm": 7.102494716644287, "learning_rate": 1.605734977326639e-05, "loss": 1.5914, "step": 94210 }, { "epoch": 0.5921945279201776, "grad_norm": 8.22027587890625, "learning_rate": 1.6056930672321737e-05, "loss": 1.6215, "step": 94220 }, { "epoch": 0.5922573802368747, "grad_norm": 5.757334232330322, "learning_rate": 1.6056511571377084e-05, "loss": 1.4659, "step": 94230 }, { "epoch": 0.5923202325535718, "grad_norm": 6.860438346862793, "learning_rate": 1.605609247043243e-05, "loss": 1.6507, "step": 94240 }, { "epoch": 0.5923830848702689, "grad_norm": 6.159027099609375, "learning_rate": 1.6055673369487778e-05, "loss": 1.766, "step": 94250 }, { "epoch": 0.592445937186966, "grad_norm": 8.29791259765625, "learning_rate": 1.6055254268543122e-05, "loss": 1.7368, "step": 94260 }, { "epoch": 0.5925087895036631, "grad_norm": 6.134650707244873, "learning_rate": 1.605483516759847e-05, "loss": 2.0518, "step": 94270 }, { "epoch": 0.5925716418203603, "grad_norm": 5.710776329040527, "learning_rate": 1.6054416066653816e-05, "loss": 1.5536, "step": 94280 }, { "epoch": 0.5926344941370574, "grad_norm": 7.502482891082764, "learning_rate": 1.6053996965709163e-05, "loss": 1.5654, "step": 94290 }, { "epoch": 0.5926973464537545, "grad_norm": 6.617005348205566, "learning_rate": 1.605357786476451e-05, "loss": 1.5087, "step": 94300 }, { "epoch": 0.5927601987704516, "grad_norm": 6.882579803466797, "learning_rate": 1.6053158763819857e-05, "loss": 1.5574, "step": 94310 }, { "epoch": 0.5928230510871486, "grad_norm": 6.2495245933532715, "learning_rate": 1.60527396628752e-05, "loss": 1.6714, "step": 94320 }, { "epoch": 0.5928859034038457, "grad_norm": 7.2485246658325195, "learning_rate": 1.6052320561930548e-05, "loss": 1.6908, "step": 94330 }, { "epoch": 0.5929487557205428, "grad_norm": 6.475519180297852, "learning_rate": 1.6051901460985895e-05, "loss": 1.8505, "step": 94340 }, { "epoch": 0.59301160803724, "grad_norm": 6.150235176086426, "learning_rate": 1.6051482360041242e-05, "loss": 1.5998, "step": 94350 }, { "epoch": 0.5930744603539371, "grad_norm": 7.224484920501709, "learning_rate": 1.6051063259096586e-05, "loss": 1.5895, "step": 94360 }, { "epoch": 0.5931373126706342, "grad_norm": 6.305262088775635, "learning_rate": 1.6050644158151933e-05, "loss": 1.5623, "step": 94370 }, { "epoch": 0.5932001649873313, "grad_norm": 11.27811050415039, "learning_rate": 1.605022505720728e-05, "loss": 1.6536, "step": 94380 }, { "epoch": 0.5932630173040284, "grad_norm": 5.835448741912842, "learning_rate": 1.6049805956262627e-05, "loss": 1.4614, "step": 94390 }, { "epoch": 0.5933258696207255, "grad_norm": 6.761912822723389, "learning_rate": 1.604938685531797e-05, "loss": 1.6031, "step": 94400 }, { "epoch": 0.5933887219374226, "grad_norm": 8.251826286315918, "learning_rate": 1.6048967754373318e-05, "loss": 1.8298, "step": 94410 }, { "epoch": 0.5934515742541198, "grad_norm": 7.148452281951904, "learning_rate": 1.6048548653428665e-05, "loss": 1.6455, "step": 94420 }, { "epoch": 0.5935144265708169, "grad_norm": 6.777230739593506, "learning_rate": 1.6048129552484012e-05, "loss": 1.4488, "step": 94430 }, { "epoch": 0.593577278887514, "grad_norm": 6.385249137878418, "learning_rate": 1.604771045153936e-05, "loss": 1.6662, "step": 94440 }, { "epoch": 0.5936401312042111, "grad_norm": 6.66254186630249, "learning_rate": 1.6047291350594706e-05, "loss": 1.5689, "step": 94450 }, { "epoch": 0.5937029835209082, "grad_norm": 6.666104793548584, "learning_rate": 1.6046872249650053e-05, "loss": 1.5781, "step": 94460 }, { "epoch": 0.5937658358376053, "grad_norm": 6.386703968048096, "learning_rate": 1.60464531487054e-05, "loss": 1.6384, "step": 94470 }, { "epoch": 0.5938286881543025, "grad_norm": 6.9321794509887695, "learning_rate": 1.6046034047760747e-05, "loss": 1.7958, "step": 94480 }, { "epoch": 0.5938915404709996, "grad_norm": 5.967465877532959, "learning_rate": 1.604561494681609e-05, "loss": 1.605, "step": 94490 }, { "epoch": 0.5939543927876967, "grad_norm": 7.165401458740234, "learning_rate": 1.6045195845871438e-05, "loss": 1.6437, "step": 94500 }, { "epoch": 0.5940172451043938, "grad_norm": 6.284710884094238, "learning_rate": 1.6044776744926785e-05, "loss": 1.6488, "step": 94510 }, { "epoch": 0.5940800974210909, "grad_norm": 5.183877944946289, "learning_rate": 1.6044357643982132e-05, "loss": 1.5784, "step": 94520 }, { "epoch": 0.594142949737788, "grad_norm": 7.364722728729248, "learning_rate": 1.604393854303748e-05, "loss": 1.5545, "step": 94530 }, { "epoch": 0.5942058020544851, "grad_norm": 6.855818271636963, "learning_rate": 1.6043519442092823e-05, "loss": 1.6052, "step": 94540 }, { "epoch": 0.5942686543711823, "grad_norm": 6.377351760864258, "learning_rate": 1.604310034114817e-05, "loss": 1.5647, "step": 94550 }, { "epoch": 0.5943315066878794, "grad_norm": 5.653633117675781, "learning_rate": 1.6042681240203517e-05, "loss": 1.5609, "step": 94560 }, { "epoch": 0.5943943590045765, "grad_norm": 7.1275763511657715, "learning_rate": 1.6042262139258864e-05, "loss": 1.437, "step": 94570 }, { "epoch": 0.5944572113212735, "grad_norm": 6.853404998779297, "learning_rate": 1.6041843038314208e-05, "loss": 1.5824, "step": 94580 }, { "epoch": 0.5945200636379706, "grad_norm": 8.112689018249512, "learning_rate": 1.6041423937369555e-05, "loss": 1.6326, "step": 94590 }, { "epoch": 0.5945829159546677, "grad_norm": 5.817140579223633, "learning_rate": 1.6041004836424902e-05, "loss": 1.704, "step": 94600 }, { "epoch": 0.5946457682713648, "grad_norm": 6.160879135131836, "learning_rate": 1.604058573548025e-05, "loss": 1.7157, "step": 94610 }, { "epoch": 0.594708620588062, "grad_norm": 6.327479839324951, "learning_rate": 1.6040166634535596e-05, "loss": 1.7189, "step": 94620 }, { "epoch": 0.5947714729047591, "grad_norm": 6.345967769622803, "learning_rate": 1.6039747533590943e-05, "loss": 1.7162, "step": 94630 }, { "epoch": 0.5948343252214562, "grad_norm": 7.8932414054870605, "learning_rate": 1.6039328432646287e-05, "loss": 1.6821, "step": 94640 }, { "epoch": 0.5948971775381533, "grad_norm": 5.700346946716309, "learning_rate": 1.6038909331701634e-05, "loss": 1.5322, "step": 94650 }, { "epoch": 0.5949600298548504, "grad_norm": 6.52507209777832, "learning_rate": 1.603849023075698e-05, "loss": 1.722, "step": 94660 }, { "epoch": 0.5950228821715475, "grad_norm": 6.896712303161621, "learning_rate": 1.6038071129812328e-05, "loss": 1.7281, "step": 94670 }, { "epoch": 0.5950857344882446, "grad_norm": 6.592931270599365, "learning_rate": 1.6037652028867675e-05, "loss": 1.714, "step": 94680 }, { "epoch": 0.5951485868049418, "grad_norm": 6.37777042388916, "learning_rate": 1.6037232927923022e-05, "loss": 1.6428, "step": 94690 }, { "epoch": 0.5952114391216389, "grad_norm": 5.731248378753662, "learning_rate": 1.603681382697837e-05, "loss": 1.7142, "step": 94700 }, { "epoch": 0.595274291438336, "grad_norm": 6.787526607513428, "learning_rate": 1.6036394726033713e-05, "loss": 1.448, "step": 94710 }, { "epoch": 0.5953371437550331, "grad_norm": 7.775476932525635, "learning_rate": 1.603597562508906e-05, "loss": 1.5785, "step": 94720 }, { "epoch": 0.5953999960717302, "grad_norm": 6.696002006530762, "learning_rate": 1.6035556524144407e-05, "loss": 1.6204, "step": 94730 }, { "epoch": 0.5954628483884273, "grad_norm": 5.686933994293213, "learning_rate": 1.6035137423199754e-05, "loss": 1.4842, "step": 94740 }, { "epoch": 0.5955257007051244, "grad_norm": 9.774834632873535, "learning_rate": 1.60347183222551e-05, "loss": 1.7541, "step": 94750 }, { "epoch": 0.5955885530218216, "grad_norm": 5.845921039581299, "learning_rate": 1.6034299221310445e-05, "loss": 1.7277, "step": 94760 }, { "epoch": 0.5956514053385187, "grad_norm": 7.821176528930664, "learning_rate": 1.6033880120365792e-05, "loss": 1.6146, "step": 94770 }, { "epoch": 0.5957142576552158, "grad_norm": 7.6437482833862305, "learning_rate": 1.603346101942114e-05, "loss": 1.6718, "step": 94780 }, { "epoch": 0.5957771099719129, "grad_norm": 6.291599273681641, "learning_rate": 1.6033041918476486e-05, "loss": 1.7452, "step": 94790 }, { "epoch": 0.59583996228861, "grad_norm": 7.090104103088379, "learning_rate": 1.603262281753183e-05, "loss": 1.9078, "step": 94800 }, { "epoch": 0.5959028146053071, "grad_norm": 7.310959815979004, "learning_rate": 1.6032203716587177e-05, "loss": 1.8254, "step": 94810 }, { "epoch": 0.5959656669220043, "grad_norm": 6.400932312011719, "learning_rate": 1.6031784615642524e-05, "loss": 1.6835, "step": 94820 }, { "epoch": 0.5960285192387013, "grad_norm": 6.25166654586792, "learning_rate": 1.603136551469787e-05, "loss": 1.8227, "step": 94830 }, { "epoch": 0.5960913715553984, "grad_norm": 6.510306358337402, "learning_rate": 1.6030946413753218e-05, "loss": 1.6051, "step": 94840 }, { "epoch": 0.5961542238720955, "grad_norm": 7.328588485717773, "learning_rate": 1.6030527312808565e-05, "loss": 1.7419, "step": 94850 }, { "epoch": 0.5962170761887926, "grad_norm": 6.656290054321289, "learning_rate": 1.6030108211863912e-05, "loss": 1.4313, "step": 94860 }, { "epoch": 0.5962799285054897, "grad_norm": 6.53260612487793, "learning_rate": 1.602968911091926e-05, "loss": 1.6663, "step": 94870 }, { "epoch": 0.5963427808221868, "grad_norm": 6.379949569702148, "learning_rate": 1.6029270009974606e-05, "loss": 1.6955, "step": 94880 }, { "epoch": 0.596405633138884, "grad_norm": 7.013059616088867, "learning_rate": 1.602885090902995e-05, "loss": 1.6558, "step": 94890 }, { "epoch": 0.5964684854555811, "grad_norm": 6.310605049133301, "learning_rate": 1.6028431808085297e-05, "loss": 1.6859, "step": 94900 }, { "epoch": 0.5965313377722782, "grad_norm": 5.933061599731445, "learning_rate": 1.6028012707140644e-05, "loss": 1.6247, "step": 94910 }, { "epoch": 0.5965941900889753, "grad_norm": 6.350223064422607, "learning_rate": 1.602759360619599e-05, "loss": 1.6963, "step": 94920 }, { "epoch": 0.5966570424056724, "grad_norm": 7.618001461029053, "learning_rate": 1.6027174505251338e-05, "loss": 1.7173, "step": 94930 }, { "epoch": 0.5967198947223695, "grad_norm": 6.708108901977539, "learning_rate": 1.6026755404306682e-05, "loss": 1.4079, "step": 94940 }, { "epoch": 0.5967827470390666, "grad_norm": 6.737509727478027, "learning_rate": 1.602633630336203e-05, "loss": 1.7482, "step": 94950 }, { "epoch": 0.5968455993557638, "grad_norm": 6.866915225982666, "learning_rate": 1.6025917202417376e-05, "loss": 1.3871, "step": 94960 }, { "epoch": 0.5969084516724609, "grad_norm": 7.664914131164551, "learning_rate": 1.6025498101472723e-05, "loss": 1.7733, "step": 94970 }, { "epoch": 0.596971303989158, "grad_norm": 6.510303974151611, "learning_rate": 1.6025079000528067e-05, "loss": 1.7395, "step": 94980 }, { "epoch": 0.5970341563058551, "grad_norm": 6.773441791534424, "learning_rate": 1.6024659899583414e-05, "loss": 1.5984, "step": 94990 }, { "epoch": 0.5970970086225522, "grad_norm": 6.100302696228027, "learning_rate": 1.602424079863876e-05, "loss": 1.5435, "step": 95000 }, { "epoch": 0.5971598609392493, "grad_norm": 7.015550136566162, "learning_rate": 1.6023821697694108e-05, "loss": 1.8034, "step": 95010 }, { "epoch": 0.5972227132559464, "grad_norm": 7.05791711807251, "learning_rate": 1.602340259674945e-05, "loss": 1.6437, "step": 95020 }, { "epoch": 0.5972855655726436, "grad_norm": 6.620450496673584, "learning_rate": 1.60229834958048e-05, "loss": 1.7381, "step": 95030 }, { "epoch": 0.5973484178893407, "grad_norm": 6.6620941162109375, "learning_rate": 1.6022564394860146e-05, "loss": 1.4741, "step": 95040 }, { "epoch": 0.5974112702060378, "grad_norm": 6.4213104248046875, "learning_rate": 1.602218720400996e-05, "loss": 1.549, "step": 95050 }, { "epoch": 0.5974741225227349, "grad_norm": 6.880028247833252, "learning_rate": 1.6021768103065304e-05, "loss": 1.5293, "step": 95060 }, { "epoch": 0.597536974839432, "grad_norm": 6.232072830200195, "learning_rate": 1.602134900212065e-05, "loss": 1.6536, "step": 95070 }, { "epoch": 0.5975998271561291, "grad_norm": 6.660600662231445, "learning_rate": 1.6020929901176e-05, "loss": 1.6193, "step": 95080 }, { "epoch": 0.5976626794728261, "grad_norm": 6.547708988189697, "learning_rate": 1.6020510800231345e-05, "loss": 1.6307, "step": 95090 }, { "epoch": 0.5977255317895233, "grad_norm": 6.666970729827881, "learning_rate": 1.602009169928669e-05, "loss": 1.6335, "step": 95100 }, { "epoch": 0.5977883841062204, "grad_norm": 7.2162861824035645, "learning_rate": 1.6019672598342036e-05, "loss": 1.5506, "step": 95110 }, { "epoch": 0.5978512364229175, "grad_norm": 6.301713466644287, "learning_rate": 1.6019253497397383e-05, "loss": 1.639, "step": 95120 }, { "epoch": 0.5979140887396146, "grad_norm": 6.836359024047852, "learning_rate": 1.601883439645273e-05, "loss": 1.7094, "step": 95130 }, { "epoch": 0.5979769410563117, "grad_norm": 6.2631144523620605, "learning_rate": 1.6018415295508077e-05, "loss": 1.561, "step": 95140 }, { "epoch": 0.5980397933730088, "grad_norm": 7.471103668212891, "learning_rate": 1.6017996194563424e-05, "loss": 1.8125, "step": 95150 }, { "epoch": 0.598102645689706, "grad_norm": 6.798772811889648, "learning_rate": 1.601757709361877e-05, "loss": 1.6111, "step": 95160 }, { "epoch": 0.5981654980064031, "grad_norm": 6.729697227478027, "learning_rate": 1.601715799267412e-05, "loss": 1.6082, "step": 95170 }, { "epoch": 0.5982283503231002, "grad_norm": 6.938941955566406, "learning_rate": 1.6016738891729466e-05, "loss": 1.299, "step": 95180 }, { "epoch": 0.5982912026397973, "grad_norm": 7.27415657043457, "learning_rate": 1.601631979078481e-05, "loss": 1.5815, "step": 95190 }, { "epoch": 0.5983540549564944, "grad_norm": 7.754593849182129, "learning_rate": 1.6015900689840156e-05, "loss": 1.7821, "step": 95200 }, { "epoch": 0.5984169072731915, "grad_norm": 7.225983619689941, "learning_rate": 1.6015481588895503e-05, "loss": 1.7128, "step": 95210 }, { "epoch": 0.5984797595898886, "grad_norm": 6.401730537414551, "learning_rate": 1.601506248795085e-05, "loss": 1.6446, "step": 95220 }, { "epoch": 0.5985426119065858, "grad_norm": 7.39094352722168, "learning_rate": 1.6014643387006194e-05, "loss": 1.5911, "step": 95230 }, { "epoch": 0.5986054642232829, "grad_norm": 8.610921859741211, "learning_rate": 1.601422428606154e-05, "loss": 1.6109, "step": 95240 }, { "epoch": 0.59866831653998, "grad_norm": 6.567859649658203, "learning_rate": 1.601380518511689e-05, "loss": 1.7104, "step": 95250 }, { "epoch": 0.5987311688566771, "grad_norm": 6.0750250816345215, "learning_rate": 1.6013386084172235e-05, "loss": 1.568, "step": 95260 }, { "epoch": 0.5987940211733742, "grad_norm": 7.271208763122559, "learning_rate": 1.6012966983227583e-05, "loss": 1.8688, "step": 95270 }, { "epoch": 0.5988568734900713, "grad_norm": 6.858160972595215, "learning_rate": 1.6012547882282926e-05, "loss": 1.6456, "step": 95280 }, { "epoch": 0.5989197258067684, "grad_norm": 7.072115421295166, "learning_rate": 1.6012128781338273e-05, "loss": 1.6997, "step": 95290 }, { "epoch": 0.5989825781234656, "grad_norm": 6.856001853942871, "learning_rate": 1.601170968039362e-05, "loss": 1.8486, "step": 95300 }, { "epoch": 0.5990454304401627, "grad_norm": 6.55513334274292, "learning_rate": 1.6011290579448967e-05, "loss": 1.7358, "step": 95310 }, { "epoch": 0.5991082827568598, "grad_norm": 6.499383926391602, "learning_rate": 1.6010871478504314e-05, "loss": 1.5296, "step": 95320 }, { "epoch": 0.5991711350735569, "grad_norm": 6.556888580322266, "learning_rate": 1.6010452377559658e-05, "loss": 1.7428, "step": 95330 }, { "epoch": 0.5992339873902539, "grad_norm": 6.718261241912842, "learning_rate": 1.6010033276615005e-05, "loss": 1.4371, "step": 95340 }, { "epoch": 0.599296839706951, "grad_norm": 6.831932544708252, "learning_rate": 1.6009614175670352e-05, "loss": 1.9143, "step": 95350 }, { "epoch": 0.5993596920236481, "grad_norm": 7.682587146759033, "learning_rate": 1.60091950747257e-05, "loss": 1.6924, "step": 95360 }, { "epoch": 0.5994225443403453, "grad_norm": 7.187410354614258, "learning_rate": 1.6008775973781046e-05, "loss": 1.5765, "step": 95370 }, { "epoch": 0.5994853966570424, "grad_norm": 6.18812894821167, "learning_rate": 1.6008356872836394e-05, "loss": 1.5242, "step": 95380 }, { "epoch": 0.5995482489737395, "grad_norm": 7.400139808654785, "learning_rate": 1.600793777189174e-05, "loss": 1.4055, "step": 95390 }, { "epoch": 0.5996111012904366, "grad_norm": 7.379140853881836, "learning_rate": 1.6007518670947088e-05, "loss": 1.8885, "step": 95400 }, { "epoch": 0.5996739536071337, "grad_norm": 7.497847557067871, "learning_rate": 1.600709957000243e-05, "loss": 1.7633, "step": 95410 }, { "epoch": 0.5997368059238308, "grad_norm": 9.0486478805542, "learning_rate": 1.600668046905778e-05, "loss": 1.6708, "step": 95420 }, { "epoch": 0.5997996582405279, "grad_norm": 7.280218124389648, "learning_rate": 1.6006261368113125e-05, "loss": 1.6665, "step": 95430 }, { "epoch": 0.5998625105572251, "grad_norm": 8.033541679382324, "learning_rate": 1.6005842267168473e-05, "loss": 1.4409, "step": 95440 }, { "epoch": 0.5999253628739222, "grad_norm": 6.502048969268799, "learning_rate": 1.600542316622382e-05, "loss": 1.7335, "step": 95450 }, { "epoch": 0.5999882151906193, "grad_norm": 6.807484149932861, "learning_rate": 1.6005004065279163e-05, "loss": 1.689, "step": 95460 }, { "epoch": 0.6000510675073164, "grad_norm": 7.029139041900635, "learning_rate": 1.600458496433451e-05, "loss": 1.674, "step": 95470 }, { "epoch": 0.6001139198240135, "grad_norm": 7.192809581756592, "learning_rate": 1.6004165863389857e-05, "loss": 1.8582, "step": 95480 }, { "epoch": 0.6001767721407106, "grad_norm": 7.5557661056518555, "learning_rate": 1.6003746762445205e-05, "loss": 1.8241, "step": 95490 }, { "epoch": 0.6002396244574077, "grad_norm": 7.017920017242432, "learning_rate": 1.6003327661500548e-05, "loss": 1.8574, "step": 95500 }, { "epoch": 0.6003024767741049, "grad_norm": 7.448575496673584, "learning_rate": 1.6002908560555895e-05, "loss": 1.6413, "step": 95510 }, { "epoch": 0.600365329090802, "grad_norm": 5.390271186828613, "learning_rate": 1.6002489459611242e-05, "loss": 1.4774, "step": 95520 }, { "epoch": 0.6004281814074991, "grad_norm": 6.9333062171936035, "learning_rate": 1.600207035866659e-05, "loss": 1.6295, "step": 95530 }, { "epoch": 0.6004910337241962, "grad_norm": 7.013101577758789, "learning_rate": 1.6001651257721936e-05, "loss": 1.464, "step": 95540 }, { "epoch": 0.6005538860408933, "grad_norm": 6.239163398742676, "learning_rate": 1.6001232156777284e-05, "loss": 2.0033, "step": 95550 }, { "epoch": 0.6006167383575904, "grad_norm": 7.4665069580078125, "learning_rate": 1.600081305583263e-05, "loss": 1.6143, "step": 95560 }, { "epoch": 0.6006795906742876, "grad_norm": 6.792352199554443, "learning_rate": 1.6000393954887978e-05, "loss": 1.6973, "step": 95570 }, { "epoch": 0.6007424429909847, "grad_norm": 7.63231086730957, "learning_rate": 1.599997485394332e-05, "loss": 1.7097, "step": 95580 }, { "epoch": 0.6008052953076818, "grad_norm": 5.52643346786499, "learning_rate": 1.599955575299867e-05, "loss": 1.4769, "step": 95590 }, { "epoch": 0.6008681476243788, "grad_norm": 6.46755313873291, "learning_rate": 1.5999136652054016e-05, "loss": 1.5199, "step": 95600 }, { "epoch": 0.6009309999410759, "grad_norm": 6.4414896965026855, "learning_rate": 1.5998717551109363e-05, "loss": 1.9104, "step": 95610 }, { "epoch": 0.600993852257773, "grad_norm": 7.002188682556152, "learning_rate": 1.599829845016471e-05, "loss": 1.5368, "step": 95620 }, { "epoch": 0.6010567045744701, "grad_norm": 6.919557094573975, "learning_rate": 1.5997879349220053e-05, "loss": 1.8452, "step": 95630 }, { "epoch": 0.6011195568911673, "grad_norm": 8.59383487701416, "learning_rate": 1.59974602482754e-05, "loss": 2.0311, "step": 95640 }, { "epoch": 0.6011824092078644, "grad_norm": 5.873063564300537, "learning_rate": 1.5997041147330747e-05, "loss": 1.7351, "step": 95650 }, { "epoch": 0.6012452615245615, "grad_norm": 8.141200065612793, "learning_rate": 1.5996622046386095e-05, "loss": 1.766, "step": 95660 }, { "epoch": 0.6013081138412586, "grad_norm": 6.853367328643799, "learning_rate": 1.599620294544144e-05, "loss": 1.8471, "step": 95670 }, { "epoch": 0.6013709661579557, "grad_norm": 7.046663761138916, "learning_rate": 1.5995783844496785e-05, "loss": 1.8587, "step": 95680 }, { "epoch": 0.6014338184746528, "grad_norm": 6.066128730773926, "learning_rate": 1.5995364743552132e-05, "loss": 1.4011, "step": 95690 }, { "epoch": 0.6014966707913499, "grad_norm": 5.925566673278809, "learning_rate": 1.599494564260748e-05, "loss": 1.803, "step": 95700 }, { "epoch": 0.6015595231080471, "grad_norm": 6.03173303604126, "learning_rate": 1.5994526541662827e-05, "loss": 1.6507, "step": 95710 }, { "epoch": 0.6016223754247442, "grad_norm": 6.576630592346191, "learning_rate": 1.599410744071817e-05, "loss": 1.5175, "step": 95720 }, { "epoch": 0.6016852277414413, "grad_norm": 7.683470726013184, "learning_rate": 1.5993688339773517e-05, "loss": 1.6615, "step": 95730 }, { "epoch": 0.6017480800581384, "grad_norm": 6.241876602172852, "learning_rate": 1.5993269238828864e-05, "loss": 1.6691, "step": 95740 }, { "epoch": 0.6018109323748355, "grad_norm": 6.379269599914551, "learning_rate": 1.599285013788421e-05, "loss": 1.5956, "step": 95750 }, { "epoch": 0.6018737846915326, "grad_norm": 7.8299102783203125, "learning_rate": 1.599243103693956e-05, "loss": 1.7619, "step": 95760 }, { "epoch": 0.6019366370082297, "grad_norm": 5.8008928298950195, "learning_rate": 1.5992011935994906e-05, "loss": 1.6117, "step": 95770 }, { "epoch": 0.6019994893249269, "grad_norm": 5.89392614364624, "learning_rate": 1.5991592835050253e-05, "loss": 1.8185, "step": 95780 }, { "epoch": 0.602062341641624, "grad_norm": 6.424180030822754, "learning_rate": 1.59911737341056e-05, "loss": 1.6598, "step": 95790 }, { "epoch": 0.6021251939583211, "grad_norm": 6.174285411834717, "learning_rate": 1.5990754633160947e-05, "loss": 1.6731, "step": 95800 }, { "epoch": 0.6021880462750182, "grad_norm": 6.065310001373291, "learning_rate": 1.599033553221629e-05, "loss": 1.7175, "step": 95810 }, { "epoch": 0.6022508985917153, "grad_norm": 6.311932563781738, "learning_rate": 1.5989916431271638e-05, "loss": 1.7119, "step": 95820 }, { "epoch": 0.6023137509084124, "grad_norm": 6.33814001083374, "learning_rate": 1.5989497330326985e-05, "loss": 1.5467, "step": 95830 }, { "epoch": 0.6023766032251096, "grad_norm": 6.47866153717041, "learning_rate": 1.598907822938233e-05, "loss": 1.6905, "step": 95840 }, { "epoch": 0.6024394555418066, "grad_norm": 6.675044536590576, "learning_rate": 1.5988659128437675e-05, "loss": 1.5809, "step": 95850 }, { "epoch": 0.6025023078585037, "grad_norm": 5.273915767669678, "learning_rate": 1.5988240027493022e-05, "loss": 1.6373, "step": 95860 }, { "epoch": 0.6025651601752008, "grad_norm": 7.633127689361572, "learning_rate": 1.598782092654837e-05, "loss": 1.6152, "step": 95870 }, { "epoch": 0.6026280124918979, "grad_norm": 6.930589199066162, "learning_rate": 1.5987401825603717e-05, "loss": 1.6847, "step": 95880 }, { "epoch": 0.602690864808595, "grad_norm": 8.631400108337402, "learning_rate": 1.5986982724659064e-05, "loss": 1.5672, "step": 95890 }, { "epoch": 0.6027537171252921, "grad_norm": 6.0152106285095215, "learning_rate": 1.5986563623714407e-05, "loss": 1.7107, "step": 95900 }, { "epoch": 0.6028165694419892, "grad_norm": 6.5257158279418945, "learning_rate": 1.5986144522769754e-05, "loss": 1.5632, "step": 95910 }, { "epoch": 0.6028794217586864, "grad_norm": 6.976310729980469, "learning_rate": 1.59857254218251e-05, "loss": 1.9114, "step": 95920 }, { "epoch": 0.6029422740753835, "grad_norm": 6.798217296600342, "learning_rate": 1.598530632088045e-05, "loss": 1.6182, "step": 95930 }, { "epoch": 0.6030051263920806, "grad_norm": 6.026243209838867, "learning_rate": 1.5984887219935796e-05, "loss": 1.6752, "step": 95940 }, { "epoch": 0.6030679787087777, "grad_norm": 7.162676811218262, "learning_rate": 1.5984468118991143e-05, "loss": 1.7328, "step": 95950 }, { "epoch": 0.6031308310254748, "grad_norm": 6.474308490753174, "learning_rate": 1.5984049018046486e-05, "loss": 1.6895, "step": 95960 }, { "epoch": 0.6031936833421719, "grad_norm": 6.281386375427246, "learning_rate": 1.5983629917101833e-05, "loss": 1.6435, "step": 95970 }, { "epoch": 0.603256535658869, "grad_norm": 7.172461986541748, "learning_rate": 1.598321081615718e-05, "loss": 1.6507, "step": 95980 }, { "epoch": 0.6033193879755662, "grad_norm": 6.905472755432129, "learning_rate": 1.5982791715212528e-05, "loss": 1.5973, "step": 95990 }, { "epoch": 0.6033822402922633, "grad_norm": 7.273667335510254, "learning_rate": 1.5982372614267875e-05, "loss": 1.4689, "step": 96000 }, { "epoch": 0.6034450926089604, "grad_norm": 6.713540077209473, "learning_rate": 1.5981953513323222e-05, "loss": 1.7759, "step": 96010 }, { "epoch": 0.6035079449256575, "grad_norm": 7.233867168426514, "learning_rate": 1.598153441237857e-05, "loss": 1.671, "step": 96020 }, { "epoch": 0.6035707972423546, "grad_norm": 7.417419910430908, "learning_rate": 1.5981115311433912e-05, "loss": 1.836, "step": 96030 }, { "epoch": 0.6036336495590517, "grad_norm": 5.484426498413086, "learning_rate": 1.598069621048926e-05, "loss": 1.499, "step": 96040 }, { "epoch": 0.6036965018757489, "grad_norm": 6.9189534187316895, "learning_rate": 1.5980277109544607e-05, "loss": 1.4944, "step": 96050 }, { "epoch": 0.603759354192446, "grad_norm": 6.4656805992126465, "learning_rate": 1.5979858008599954e-05, "loss": 1.6776, "step": 96060 }, { "epoch": 0.6038222065091431, "grad_norm": 7.165610313415527, "learning_rate": 1.59794389076553e-05, "loss": 1.911, "step": 96070 }, { "epoch": 0.6038850588258402, "grad_norm": 6.205036163330078, "learning_rate": 1.5979019806710644e-05, "loss": 1.6976, "step": 96080 }, { "epoch": 0.6039479111425373, "grad_norm": 6.048856258392334, "learning_rate": 1.597860070576599e-05, "loss": 1.3312, "step": 96090 }, { "epoch": 0.6040107634592344, "grad_norm": 5.5724778175354, "learning_rate": 1.597818160482134e-05, "loss": 1.6017, "step": 96100 }, { "epoch": 0.6040736157759314, "grad_norm": 6.729308605194092, "learning_rate": 1.5977762503876686e-05, "loss": 1.6707, "step": 96110 }, { "epoch": 0.6041364680926286, "grad_norm": 6.612854957580566, "learning_rate": 1.597734340293203e-05, "loss": 1.596, "step": 96120 }, { "epoch": 0.6041993204093257, "grad_norm": 7.262803554534912, "learning_rate": 1.5976924301987376e-05, "loss": 1.86, "step": 96130 }, { "epoch": 0.6042621727260228, "grad_norm": 7.318043231964111, "learning_rate": 1.5976505201042723e-05, "loss": 1.63, "step": 96140 }, { "epoch": 0.6043250250427199, "grad_norm": 6.175392150878906, "learning_rate": 1.597608610009807e-05, "loss": 1.4979, "step": 96150 }, { "epoch": 0.604387877359417, "grad_norm": 6.861042022705078, "learning_rate": 1.5975666999153418e-05, "loss": 1.6917, "step": 96160 }, { "epoch": 0.6044507296761141, "grad_norm": 6.393370151519775, "learning_rate": 1.5975247898208765e-05, "loss": 1.6265, "step": 96170 }, { "epoch": 0.6045135819928112, "grad_norm": 8.243154525756836, "learning_rate": 1.5974828797264112e-05, "loss": 1.6527, "step": 96180 }, { "epoch": 0.6045764343095084, "grad_norm": 7.203034400939941, "learning_rate": 1.597440969631946e-05, "loss": 1.786, "step": 96190 }, { "epoch": 0.6046392866262055, "grad_norm": 5.90932559967041, "learning_rate": 1.5973990595374806e-05, "loss": 1.7499, "step": 96200 }, { "epoch": 0.6047021389429026, "grad_norm": 6.472553730010986, "learning_rate": 1.597357149443015e-05, "loss": 1.4541, "step": 96210 }, { "epoch": 0.6047649912595997, "grad_norm": 6.357101917266846, "learning_rate": 1.5973152393485497e-05, "loss": 1.4883, "step": 96220 }, { "epoch": 0.6048278435762968, "grad_norm": 6.183920383453369, "learning_rate": 1.5972733292540844e-05, "loss": 1.7241, "step": 96230 }, { "epoch": 0.6048906958929939, "grad_norm": 7.03749942779541, "learning_rate": 1.597231419159619e-05, "loss": 1.624, "step": 96240 }, { "epoch": 0.604953548209691, "grad_norm": 7.242808818817139, "learning_rate": 1.5971895090651534e-05, "loss": 1.4835, "step": 96250 }, { "epoch": 0.6050164005263882, "grad_norm": 6.75843620300293, "learning_rate": 1.597147598970688e-05, "loss": 1.7205, "step": 96260 }, { "epoch": 0.6050792528430853, "grad_norm": 7.639392852783203, "learning_rate": 1.597105688876223e-05, "loss": 1.7237, "step": 96270 }, { "epoch": 0.6051421051597824, "grad_norm": 6.621246814727783, "learning_rate": 1.5970637787817576e-05, "loss": 1.4742, "step": 96280 }, { "epoch": 0.6052049574764795, "grad_norm": 7.6240458488464355, "learning_rate": 1.5970218686872923e-05, "loss": 1.6599, "step": 96290 }, { "epoch": 0.6052678097931766, "grad_norm": 6.757303237915039, "learning_rate": 1.5969799585928266e-05, "loss": 1.7977, "step": 96300 }, { "epoch": 0.6053306621098737, "grad_norm": 7.339069843292236, "learning_rate": 1.5969380484983613e-05, "loss": 1.497, "step": 96310 }, { "epoch": 0.6053935144265709, "grad_norm": 6.622323513031006, "learning_rate": 1.596896138403896e-05, "loss": 1.5652, "step": 96320 }, { "epoch": 0.605456366743268, "grad_norm": 7.0324273109436035, "learning_rate": 1.5968542283094308e-05, "loss": 1.6223, "step": 96330 }, { "epoch": 0.6055192190599651, "grad_norm": 6.163375377655029, "learning_rate": 1.596812318214965e-05, "loss": 1.6602, "step": 96340 }, { "epoch": 0.6055820713766622, "grad_norm": 6.083081245422363, "learning_rate": 1.5967704081205e-05, "loss": 1.6352, "step": 96350 }, { "epoch": 0.6056449236933592, "grad_norm": 7.394678592681885, "learning_rate": 1.5967284980260345e-05, "loss": 1.6288, "step": 96360 }, { "epoch": 0.6057077760100563, "grad_norm": 5.2517523765563965, "learning_rate": 1.5966865879315693e-05, "loss": 1.52, "step": 96370 }, { "epoch": 0.6057706283267534, "grad_norm": 5.803227424621582, "learning_rate": 1.596644677837104e-05, "loss": 1.6651, "step": 96380 }, { "epoch": 0.6058334806434506, "grad_norm": 6.1749701499938965, "learning_rate": 1.5966027677426387e-05, "loss": 1.4957, "step": 96390 }, { "epoch": 0.6058963329601477, "grad_norm": 6.390466690063477, "learning_rate": 1.5965608576481734e-05, "loss": 1.4403, "step": 96400 }, { "epoch": 0.6059591852768448, "grad_norm": 7.035869598388672, "learning_rate": 1.596518947553708e-05, "loss": 1.5197, "step": 96410 }, { "epoch": 0.6060220375935419, "grad_norm": 6.534717082977295, "learning_rate": 1.5964770374592428e-05, "loss": 1.9955, "step": 96420 }, { "epoch": 0.606084889910239, "grad_norm": 6.712184429168701, "learning_rate": 1.596435127364777e-05, "loss": 1.4664, "step": 96430 }, { "epoch": 0.6061477422269361, "grad_norm": 7.0679931640625, "learning_rate": 1.596393217270312e-05, "loss": 1.7205, "step": 96440 }, { "epoch": 0.6062105945436332, "grad_norm": 9.562171936035156, "learning_rate": 1.5963513071758466e-05, "loss": 1.7928, "step": 96450 }, { "epoch": 0.6062734468603304, "grad_norm": 4.552233695983887, "learning_rate": 1.5963093970813813e-05, "loss": 1.5129, "step": 96460 }, { "epoch": 0.6063362991770275, "grad_norm": 6.2462263107299805, "learning_rate": 1.5962674869869156e-05, "loss": 1.674, "step": 96470 }, { "epoch": 0.6063991514937246, "grad_norm": 5.8251237869262695, "learning_rate": 1.5962255768924504e-05, "loss": 1.6712, "step": 96480 }, { "epoch": 0.6064620038104217, "grad_norm": 6.038680553436279, "learning_rate": 1.596183666797985e-05, "loss": 1.6022, "step": 96490 }, { "epoch": 0.6065248561271188, "grad_norm": 6.039565086364746, "learning_rate": 1.5961417567035198e-05, "loss": 1.5418, "step": 96500 }, { "epoch": 0.6065877084438159, "grad_norm": 6.144685745239258, "learning_rate": 1.5960998466090545e-05, "loss": 1.6655, "step": 96510 }, { "epoch": 0.606650560760513, "grad_norm": 5.955746173858643, "learning_rate": 1.596057936514589e-05, "loss": 1.556, "step": 96520 }, { "epoch": 0.6067134130772102, "grad_norm": 6.442933082580566, "learning_rate": 1.5960160264201235e-05, "loss": 1.5626, "step": 96530 }, { "epoch": 0.6067762653939073, "grad_norm": 6.719789981842041, "learning_rate": 1.5959741163256583e-05, "loss": 1.8061, "step": 96540 }, { "epoch": 0.6068391177106044, "grad_norm": 7.188895225524902, "learning_rate": 1.595932206231193e-05, "loss": 1.6047, "step": 96550 }, { "epoch": 0.6069019700273015, "grad_norm": 6.589751720428467, "learning_rate": 1.5958902961367277e-05, "loss": 1.5288, "step": 96560 }, { "epoch": 0.6069648223439986, "grad_norm": 5.951982021331787, "learning_rate": 1.5958483860422624e-05, "loss": 1.6871, "step": 96570 }, { "epoch": 0.6070276746606957, "grad_norm": 6.021017551422119, "learning_rate": 1.595806475947797e-05, "loss": 1.6616, "step": 96580 }, { "epoch": 0.6070905269773929, "grad_norm": 7.280405044555664, "learning_rate": 1.5957645658533315e-05, "loss": 1.7106, "step": 96590 }, { "epoch": 0.60715337929409, "grad_norm": 6.818925380706787, "learning_rate": 1.595722655758866e-05, "loss": 1.6977, "step": 96600 }, { "epoch": 0.6072162316107871, "grad_norm": 5.315835475921631, "learning_rate": 1.595680745664401e-05, "loss": 1.7101, "step": 96610 }, { "epoch": 0.6072790839274841, "grad_norm": 6.7951979637146, "learning_rate": 1.5956388355699356e-05, "loss": 1.5392, "step": 96620 }, { "epoch": 0.6073419362441812, "grad_norm": 6.182480812072754, "learning_rate": 1.5955969254754703e-05, "loss": 1.7713, "step": 96630 }, { "epoch": 0.6074047885608783, "grad_norm": 5.1290483474731445, "learning_rate": 1.595555015381005e-05, "loss": 1.6297, "step": 96640 }, { "epoch": 0.6074676408775754, "grad_norm": 6.374390125274658, "learning_rate": 1.5955131052865394e-05, "loss": 1.6293, "step": 96650 }, { "epoch": 0.6075304931942725, "grad_norm": 6.011179447174072, "learning_rate": 1.595471195192074e-05, "loss": 1.6367, "step": 96660 }, { "epoch": 0.6075933455109697, "grad_norm": 6.635353088378906, "learning_rate": 1.5954292850976088e-05, "loss": 1.7165, "step": 96670 }, { "epoch": 0.6076561978276668, "grad_norm": 6.711002349853516, "learning_rate": 1.5953873750031435e-05, "loss": 1.6859, "step": 96680 }, { "epoch": 0.6077190501443639, "grad_norm": 6.37709379196167, "learning_rate": 1.5953454649086782e-05, "loss": 1.6371, "step": 96690 }, { "epoch": 0.607781902461061, "grad_norm": 7.191039085388184, "learning_rate": 1.5953035548142126e-05, "loss": 1.7203, "step": 96700 }, { "epoch": 0.6078447547777581, "grad_norm": 6.104044437408447, "learning_rate": 1.5952616447197473e-05, "loss": 1.6676, "step": 96710 }, { "epoch": 0.6079076070944552, "grad_norm": 6.773703098297119, "learning_rate": 1.595219734625282e-05, "loss": 1.9815, "step": 96720 }, { "epoch": 0.6079704594111524, "grad_norm": 6.631420135498047, "learning_rate": 1.5951778245308167e-05, "loss": 1.5296, "step": 96730 }, { "epoch": 0.6080333117278495, "grad_norm": 8.33754825592041, "learning_rate": 1.595135914436351e-05, "loss": 1.8381, "step": 96740 }, { "epoch": 0.6080961640445466, "grad_norm": 6.7561116218566895, "learning_rate": 1.5950940043418857e-05, "loss": 1.7066, "step": 96750 }, { "epoch": 0.6081590163612437, "grad_norm": 6.796525955200195, "learning_rate": 1.5950520942474205e-05, "loss": 1.7786, "step": 96760 }, { "epoch": 0.6082218686779408, "grad_norm": 7.03566837310791, "learning_rate": 1.595010184152955e-05, "loss": 1.5446, "step": 96770 }, { "epoch": 0.6082847209946379, "grad_norm": 6.7658586502075195, "learning_rate": 1.59496827405849e-05, "loss": 1.544, "step": 96780 }, { "epoch": 0.608347573311335, "grad_norm": 7.732165813446045, "learning_rate": 1.5949263639640246e-05, "loss": 1.6133, "step": 96790 }, { "epoch": 0.6084104256280322, "grad_norm": 7.413866996765137, "learning_rate": 1.5948844538695593e-05, "loss": 1.8205, "step": 96800 }, { "epoch": 0.6084732779447293, "grad_norm": 6.353707790374756, "learning_rate": 1.594842543775094e-05, "loss": 1.5322, "step": 96810 }, { "epoch": 0.6085361302614264, "grad_norm": 6.494779586791992, "learning_rate": 1.5948006336806287e-05, "loss": 1.6119, "step": 96820 }, { "epoch": 0.6085989825781235, "grad_norm": 6.874396800994873, "learning_rate": 1.594758723586163e-05, "loss": 1.6729, "step": 96830 }, { "epoch": 0.6086618348948206, "grad_norm": 7.573245048522949, "learning_rate": 1.5947168134916978e-05, "loss": 1.43, "step": 96840 }, { "epoch": 0.6087246872115177, "grad_norm": 6.823930740356445, "learning_rate": 1.5946749033972325e-05, "loss": 1.7342, "step": 96850 }, { "epoch": 0.6087875395282148, "grad_norm": 7.079809188842773, "learning_rate": 1.5946329933027672e-05, "loss": 1.8017, "step": 96860 }, { "epoch": 0.6088503918449119, "grad_norm": 7.149454593658447, "learning_rate": 1.5945910832083016e-05, "loss": 1.6538, "step": 96870 }, { "epoch": 0.608913244161609, "grad_norm": 6.1625566482543945, "learning_rate": 1.5945491731138363e-05, "loss": 1.2721, "step": 96880 }, { "epoch": 0.6089760964783061, "grad_norm": 7.1358323097229, "learning_rate": 1.594507263019371e-05, "loss": 1.7929, "step": 96890 }, { "epoch": 0.6090389487950032, "grad_norm": 6.218572616577148, "learning_rate": 1.5944653529249057e-05, "loss": 1.6383, "step": 96900 }, { "epoch": 0.6091018011117003, "grad_norm": 6.8250298500061035, "learning_rate": 1.5944234428304404e-05, "loss": 1.5742, "step": 96910 }, { "epoch": 0.6091646534283974, "grad_norm": 6.283374786376953, "learning_rate": 1.5943815327359748e-05, "loss": 1.9339, "step": 96920 }, { "epoch": 0.6092275057450945, "grad_norm": 6.209414958953857, "learning_rate": 1.5943396226415095e-05, "loss": 1.8073, "step": 96930 }, { "epoch": 0.6092903580617917, "grad_norm": 5.9464826583862305, "learning_rate": 1.594297712547044e-05, "loss": 1.7446, "step": 96940 }, { "epoch": 0.6093532103784888, "grad_norm": 6.390690803527832, "learning_rate": 1.594255802452579e-05, "loss": 1.4427, "step": 96950 }, { "epoch": 0.6094160626951859, "grad_norm": 7.447775840759277, "learning_rate": 1.5942138923581136e-05, "loss": 1.6181, "step": 96960 }, { "epoch": 0.609478915011883, "grad_norm": 6.484543323516846, "learning_rate": 1.594171982263648e-05, "loss": 1.694, "step": 96970 }, { "epoch": 0.6095417673285801, "grad_norm": 6.508401393890381, "learning_rate": 1.5941300721691827e-05, "loss": 1.4989, "step": 96980 }, { "epoch": 0.6096046196452772, "grad_norm": 6.89706563949585, "learning_rate": 1.5940881620747174e-05, "loss": 1.6738, "step": 96990 }, { "epoch": 0.6096674719619743, "grad_norm": 7.620410919189453, "learning_rate": 1.594046251980252e-05, "loss": 1.8045, "step": 97000 }, { "epoch": 0.6097303242786715, "grad_norm": 7.822571754455566, "learning_rate": 1.5940043418857868e-05, "loss": 1.5355, "step": 97010 }, { "epoch": 0.6097931765953686, "grad_norm": 5.538037300109863, "learning_rate": 1.5939624317913215e-05, "loss": 1.6928, "step": 97020 }, { "epoch": 0.6098560289120657, "grad_norm": 6.762993812561035, "learning_rate": 1.5939205216968562e-05, "loss": 1.5583, "step": 97030 }, { "epoch": 0.6099188812287628, "grad_norm": 10.59797191619873, "learning_rate": 1.593878611602391e-05, "loss": 1.7227, "step": 97040 }, { "epoch": 0.6099817335454599, "grad_norm": 7.5071702003479, "learning_rate": 1.5938367015079253e-05, "loss": 1.7092, "step": 97050 }, { "epoch": 0.610044585862157, "grad_norm": 6.896035671234131, "learning_rate": 1.59379479141346e-05, "loss": 1.6339, "step": 97060 }, { "epoch": 0.6101074381788542, "grad_norm": 7.5746307373046875, "learning_rate": 1.5937528813189947e-05, "loss": 1.6787, "step": 97070 }, { "epoch": 0.6101702904955513, "grad_norm": 6.058924198150635, "learning_rate": 1.5937109712245294e-05, "loss": 1.6495, "step": 97080 }, { "epoch": 0.6102331428122484, "grad_norm": 7.73477840423584, "learning_rate": 1.5936690611300638e-05, "loss": 1.5875, "step": 97090 }, { "epoch": 0.6102959951289455, "grad_norm": 6.746033668518066, "learning_rate": 1.5936271510355985e-05, "loss": 1.6142, "step": 97100 }, { "epoch": 0.6103588474456426, "grad_norm": 6.900345802307129, "learning_rate": 1.5935852409411332e-05, "loss": 1.889, "step": 97110 }, { "epoch": 0.6104216997623397, "grad_norm": 5.7613525390625, "learning_rate": 1.593543330846668e-05, "loss": 1.5652, "step": 97120 }, { "epoch": 0.6104845520790367, "grad_norm": 6.890136241912842, "learning_rate": 1.5935014207522026e-05, "loss": 1.7257, "step": 97130 }, { "epoch": 0.6105474043957339, "grad_norm": 6.521233558654785, "learning_rate": 1.593459510657737e-05, "loss": 1.6479, "step": 97140 }, { "epoch": 0.610610256712431, "grad_norm": 7.145533084869385, "learning_rate": 1.5934176005632717e-05, "loss": 1.4238, "step": 97150 }, { "epoch": 0.6106731090291281, "grad_norm": 6.435394763946533, "learning_rate": 1.5933756904688064e-05, "loss": 1.7891, "step": 97160 }, { "epoch": 0.6107359613458252, "grad_norm": 6.097654342651367, "learning_rate": 1.593333780374341e-05, "loss": 1.5306, "step": 97170 }, { "epoch": 0.6107988136625223, "grad_norm": 7.007704257965088, "learning_rate": 1.5932918702798758e-05, "loss": 1.5745, "step": 97180 }, { "epoch": 0.6108616659792194, "grad_norm": 6.812733173370361, "learning_rate": 1.5932499601854105e-05, "loss": 1.5193, "step": 97190 }, { "epoch": 0.6109245182959165, "grad_norm": 6.507319927215576, "learning_rate": 1.5932080500909452e-05, "loss": 1.6541, "step": 97200 }, { "epoch": 0.6109873706126137, "grad_norm": 5.9998908042907715, "learning_rate": 1.5931661399964796e-05, "loss": 1.506, "step": 97210 }, { "epoch": 0.6110502229293108, "grad_norm": 7.0761237144470215, "learning_rate": 1.5931242299020143e-05, "loss": 1.5575, "step": 97220 }, { "epoch": 0.6111130752460079, "grad_norm": 6.050992488861084, "learning_rate": 1.593082319807549e-05, "loss": 1.5718, "step": 97230 }, { "epoch": 0.611175927562705, "grad_norm": 6.629901885986328, "learning_rate": 1.5930404097130837e-05, "loss": 1.8976, "step": 97240 }, { "epoch": 0.6112387798794021, "grad_norm": 6.821048259735107, "learning_rate": 1.5929984996186184e-05, "loss": 1.623, "step": 97250 }, { "epoch": 0.6113016321960992, "grad_norm": 6.956756114959717, "learning_rate": 1.592956589524153e-05, "loss": 1.6399, "step": 97260 }, { "epoch": 0.6113644845127963, "grad_norm": 5.7625627517700195, "learning_rate": 1.5929146794296875e-05, "loss": 1.6874, "step": 97270 }, { "epoch": 0.6114273368294935, "grad_norm": 6.572230339050293, "learning_rate": 1.5928727693352222e-05, "loss": 1.8282, "step": 97280 }, { "epoch": 0.6114901891461906, "grad_norm": 6.548996925354004, "learning_rate": 1.592830859240757e-05, "loss": 1.6299, "step": 97290 }, { "epoch": 0.6115530414628877, "grad_norm": 6.3550639152526855, "learning_rate": 1.5927889491462916e-05, "loss": 1.4761, "step": 97300 }, { "epoch": 0.6116158937795848, "grad_norm": 5.993685722351074, "learning_rate": 1.5927470390518263e-05, "loss": 1.618, "step": 97310 }, { "epoch": 0.6116787460962819, "grad_norm": 6.1411213874816895, "learning_rate": 1.5927051289573607e-05, "loss": 1.5549, "step": 97320 }, { "epoch": 0.611741598412979, "grad_norm": 6.283779144287109, "learning_rate": 1.5926632188628954e-05, "loss": 1.5801, "step": 97330 }, { "epoch": 0.6118044507296762, "grad_norm": 7.2673845291137695, "learning_rate": 1.59262130876843e-05, "loss": 1.6625, "step": 97340 }, { "epoch": 0.6118673030463733, "grad_norm": 6.941159725189209, "learning_rate": 1.5925793986739648e-05, "loss": 1.5375, "step": 97350 }, { "epoch": 0.6119301553630704, "grad_norm": 6.971386432647705, "learning_rate": 1.592537488579499e-05, "loss": 1.769, "step": 97360 }, { "epoch": 0.6119930076797675, "grad_norm": 6.657928943634033, "learning_rate": 1.592495578485034e-05, "loss": 1.8468, "step": 97370 }, { "epoch": 0.6120558599964645, "grad_norm": 6.511856555938721, "learning_rate": 1.5924536683905686e-05, "loss": 1.5713, "step": 97380 }, { "epoch": 0.6121187123131616, "grad_norm": 6.132238864898682, "learning_rate": 1.5924117582961033e-05, "loss": 1.6285, "step": 97390 }, { "epoch": 0.6121815646298587, "grad_norm": 7.924448013305664, "learning_rate": 1.592369848201638e-05, "loss": 1.8515, "step": 97400 }, { "epoch": 0.6122444169465558, "grad_norm": 7.811633586883545, "learning_rate": 1.5923279381071727e-05, "loss": 1.8305, "step": 97410 }, { "epoch": 0.612307269263253, "grad_norm": 6.808788776397705, "learning_rate": 1.5922860280127074e-05, "loss": 1.4275, "step": 97420 }, { "epoch": 0.6123701215799501, "grad_norm": 7.154580593109131, "learning_rate": 1.592244117918242e-05, "loss": 1.7556, "step": 97430 }, { "epoch": 0.6124329738966472, "grad_norm": 6.226292133331299, "learning_rate": 1.5922022078237768e-05, "loss": 1.594, "step": 97440 }, { "epoch": 0.6124958262133443, "grad_norm": 6.5397491455078125, "learning_rate": 1.5921602977293112e-05, "loss": 1.8206, "step": 97450 }, { "epoch": 0.6125586785300414, "grad_norm": 6.531113147735596, "learning_rate": 1.592118387634846e-05, "loss": 1.5511, "step": 97460 }, { "epoch": 0.6126215308467385, "grad_norm": 6.342073440551758, "learning_rate": 1.5920764775403806e-05, "loss": 1.5654, "step": 97470 }, { "epoch": 0.6126843831634357, "grad_norm": 6.173723220825195, "learning_rate": 1.5920345674459153e-05, "loss": 1.6765, "step": 97480 }, { "epoch": 0.6127472354801328, "grad_norm": 7.431957244873047, "learning_rate": 1.5919926573514497e-05, "loss": 1.6724, "step": 97490 }, { "epoch": 0.6128100877968299, "grad_norm": 7.1538801193237305, "learning_rate": 1.5919507472569844e-05, "loss": 1.728, "step": 97500 }, { "epoch": 0.612872940113527, "grad_norm": 5.422327995300293, "learning_rate": 1.591908837162519e-05, "loss": 1.8189, "step": 97510 }, { "epoch": 0.6129357924302241, "grad_norm": 6.470592498779297, "learning_rate": 1.5918669270680538e-05, "loss": 1.7974, "step": 97520 }, { "epoch": 0.6129986447469212, "grad_norm": 8.223944664001465, "learning_rate": 1.5918250169735885e-05, "loss": 1.6255, "step": 97530 }, { "epoch": 0.6130614970636183, "grad_norm": 6.294012546539307, "learning_rate": 1.591783106879123e-05, "loss": 1.7991, "step": 97540 }, { "epoch": 0.6131243493803155, "grad_norm": 6.184208393096924, "learning_rate": 1.5917411967846576e-05, "loss": 1.789, "step": 97550 }, { "epoch": 0.6131872016970126, "grad_norm": 6.930463790893555, "learning_rate": 1.5916992866901923e-05, "loss": 1.6868, "step": 97560 }, { "epoch": 0.6132500540137097, "grad_norm": 7.4079670906066895, "learning_rate": 1.591657376595727e-05, "loss": 1.5491, "step": 97570 }, { "epoch": 0.6133129063304068, "grad_norm": 6.005690574645996, "learning_rate": 1.5916154665012617e-05, "loss": 1.467, "step": 97580 }, { "epoch": 0.6133757586471039, "grad_norm": 6.448835372924805, "learning_rate": 1.591573556406796e-05, "loss": 1.7108, "step": 97590 }, { "epoch": 0.613438610963801, "grad_norm": 6.382637977600098, "learning_rate": 1.5915316463123308e-05, "loss": 1.7321, "step": 97600 }, { "epoch": 0.6135014632804981, "grad_norm": 7.751312732696533, "learning_rate": 1.5914897362178655e-05, "loss": 1.5915, "step": 97610 }, { "epoch": 0.6135643155971953, "grad_norm": 6.062617778778076, "learning_rate": 1.5914478261234002e-05, "loss": 1.6638, "step": 97620 }, { "epoch": 0.6136271679138924, "grad_norm": 6.349198818206787, "learning_rate": 1.591405916028935e-05, "loss": 1.6401, "step": 97630 }, { "epoch": 0.6136900202305894, "grad_norm": 5.5992045402526855, "learning_rate": 1.5913640059344696e-05, "loss": 1.5345, "step": 97640 }, { "epoch": 0.6137528725472865, "grad_norm": 7.2889838218688965, "learning_rate": 1.5913220958400043e-05, "loss": 1.9279, "step": 97650 }, { "epoch": 0.6138157248639836, "grad_norm": 6.606493949890137, "learning_rate": 1.591280185745539e-05, "loss": 1.6457, "step": 97660 }, { "epoch": 0.6138785771806807, "grad_norm": 7.579192161560059, "learning_rate": 1.5912382756510734e-05, "loss": 1.6003, "step": 97670 }, { "epoch": 0.6139414294973778, "grad_norm": 6.619809627532959, "learning_rate": 1.591196365556608e-05, "loss": 1.6042, "step": 97680 }, { "epoch": 0.614004281814075, "grad_norm": 5.405503273010254, "learning_rate": 1.5911544554621428e-05, "loss": 1.7067, "step": 97690 }, { "epoch": 0.6140671341307721, "grad_norm": 6.7340312004089355, "learning_rate": 1.5911125453676775e-05, "loss": 1.5633, "step": 97700 }, { "epoch": 0.6141299864474692, "grad_norm": 6.256981372833252, "learning_rate": 1.591070635273212e-05, "loss": 1.6141, "step": 97710 }, { "epoch": 0.6141928387641663, "grad_norm": 7.2453083992004395, "learning_rate": 1.5910287251787466e-05, "loss": 1.8788, "step": 97720 }, { "epoch": 0.6142556910808634, "grad_norm": 6.870968818664551, "learning_rate": 1.5909868150842813e-05, "loss": 1.3222, "step": 97730 }, { "epoch": 0.6143185433975605, "grad_norm": 6.767097473144531, "learning_rate": 1.590944904989816e-05, "loss": 1.8889, "step": 97740 }, { "epoch": 0.6143813957142576, "grad_norm": 7.072991371154785, "learning_rate": 1.5909029948953507e-05, "loss": 1.7936, "step": 97750 }, { "epoch": 0.6144442480309548, "grad_norm": 5.585392475128174, "learning_rate": 1.590861084800885e-05, "loss": 1.3332, "step": 97760 }, { "epoch": 0.6145071003476519, "grad_norm": 5.266753673553467, "learning_rate": 1.5908191747064198e-05, "loss": 1.4711, "step": 97770 }, { "epoch": 0.614569952664349, "grad_norm": 7.481316089630127, "learning_rate": 1.5907772646119545e-05, "loss": 1.5279, "step": 97780 }, { "epoch": 0.6146328049810461, "grad_norm": 6.820082187652588, "learning_rate": 1.5907353545174892e-05, "loss": 1.5929, "step": 97790 }, { "epoch": 0.6146956572977432, "grad_norm": 7.213798999786377, "learning_rate": 1.590693444423024e-05, "loss": 1.7012, "step": 97800 }, { "epoch": 0.6147585096144403, "grad_norm": 7.327370643615723, "learning_rate": 1.5906515343285586e-05, "loss": 1.6506, "step": 97810 }, { "epoch": 0.6148213619311375, "grad_norm": 7.922680377960205, "learning_rate": 1.5906096242340933e-05, "loss": 1.7617, "step": 97820 }, { "epoch": 0.6148842142478346, "grad_norm": 6.1932501792907715, "learning_rate": 1.590567714139628e-05, "loss": 1.648, "step": 97830 }, { "epoch": 0.6149470665645317, "grad_norm": 8.107322692871094, "learning_rate": 1.5905258040451624e-05, "loss": 1.6619, "step": 97840 }, { "epoch": 0.6150099188812288, "grad_norm": 7.934214115142822, "learning_rate": 1.590483893950697e-05, "loss": 1.8224, "step": 97850 }, { "epoch": 0.6150727711979259, "grad_norm": 6.7235517501831055, "learning_rate": 1.5904419838562318e-05, "loss": 1.7292, "step": 97860 }, { "epoch": 0.615135623514623, "grad_norm": 6.896998405456543, "learning_rate": 1.5904000737617665e-05, "loss": 1.6557, "step": 97870 }, { "epoch": 0.6151984758313201, "grad_norm": 7.2692999839782715, "learning_rate": 1.5903581636673012e-05, "loss": 1.6229, "step": 97880 }, { "epoch": 0.6152613281480172, "grad_norm": 6.0252461433410645, "learning_rate": 1.5903162535728356e-05, "loss": 1.4469, "step": 97890 }, { "epoch": 0.6153241804647143, "grad_norm": 7.354637145996094, "learning_rate": 1.5902743434783703e-05, "loss": 1.7811, "step": 97900 }, { "epoch": 0.6153870327814114, "grad_norm": 5.593834400177002, "learning_rate": 1.590232433383905e-05, "loss": 1.4064, "step": 97910 }, { "epoch": 0.6154498850981085, "grad_norm": 7.0428619384765625, "learning_rate": 1.5901905232894397e-05, "loss": 1.7334, "step": 97920 }, { "epoch": 0.6155127374148056, "grad_norm": 6.362512588500977, "learning_rate": 1.5901486131949744e-05, "loss": 1.9084, "step": 97930 }, { "epoch": 0.6155755897315027, "grad_norm": 6.2018208503723145, "learning_rate": 1.5901067031005088e-05, "loss": 1.5764, "step": 97940 }, { "epoch": 0.6156384420481998, "grad_norm": 6.819106578826904, "learning_rate": 1.5900647930060435e-05, "loss": 1.6431, "step": 97950 }, { "epoch": 0.615701294364897, "grad_norm": 6.307815074920654, "learning_rate": 1.5900228829115782e-05, "loss": 1.632, "step": 97960 }, { "epoch": 0.6157641466815941, "grad_norm": 6.985607147216797, "learning_rate": 1.589980972817113e-05, "loss": 1.8224, "step": 97970 }, { "epoch": 0.6158269989982912, "grad_norm": 6.369467735290527, "learning_rate": 1.5899390627226473e-05, "loss": 1.7077, "step": 97980 }, { "epoch": 0.6158898513149883, "grad_norm": 6.204318046569824, "learning_rate": 1.589897152628182e-05, "loss": 1.465, "step": 97990 }, { "epoch": 0.6159527036316854, "grad_norm": 7.323031902313232, "learning_rate": 1.5898552425337167e-05, "loss": 1.6542, "step": 98000 }, { "epoch": 0.6160155559483825, "grad_norm": 6.1911444664001465, "learning_rate": 1.5898133324392514e-05, "loss": 1.4795, "step": 98010 }, { "epoch": 0.6160784082650796, "grad_norm": 5.6918110847473145, "learning_rate": 1.589771422344786e-05, "loss": 1.52, "step": 98020 }, { "epoch": 0.6161412605817768, "grad_norm": 6.485158443450928, "learning_rate": 1.5897295122503208e-05, "loss": 1.6236, "step": 98030 }, { "epoch": 0.6162041128984739, "grad_norm": 6.915482044219971, "learning_rate": 1.5896876021558555e-05, "loss": 1.5711, "step": 98040 }, { "epoch": 0.616266965215171, "grad_norm": 7.597339153289795, "learning_rate": 1.5896456920613902e-05, "loss": 1.8487, "step": 98050 }, { "epoch": 0.6163298175318681, "grad_norm": 7.884457111358643, "learning_rate": 1.589603781966925e-05, "loss": 1.6207, "step": 98060 }, { "epoch": 0.6163926698485652, "grad_norm": 6.6080002784729, "learning_rate": 1.5895618718724593e-05, "loss": 1.5521, "step": 98070 }, { "epoch": 0.6164555221652623, "grad_norm": 7.194190502166748, "learning_rate": 1.589519961777994e-05, "loss": 1.3555, "step": 98080 }, { "epoch": 0.6165183744819595, "grad_norm": 5.772176742553711, "learning_rate": 1.5894780516835287e-05, "loss": 1.7029, "step": 98090 }, { "epoch": 0.6165812267986566, "grad_norm": 7.119681358337402, "learning_rate": 1.5894361415890634e-05, "loss": 1.757, "step": 98100 }, { "epoch": 0.6166440791153537, "grad_norm": 6.931814670562744, "learning_rate": 1.5893942314945978e-05, "loss": 1.7993, "step": 98110 }, { "epoch": 0.6167069314320508, "grad_norm": 6.7501301765441895, "learning_rate": 1.5893523214001325e-05, "loss": 1.6404, "step": 98120 }, { "epoch": 0.6167697837487479, "grad_norm": 7.078472137451172, "learning_rate": 1.5893104113056672e-05, "loss": 1.611, "step": 98130 }, { "epoch": 0.616832636065445, "grad_norm": 6.168127536773682, "learning_rate": 1.589268501211202e-05, "loss": 1.5652, "step": 98140 }, { "epoch": 0.616895488382142, "grad_norm": 6.353023052215576, "learning_rate": 1.5892265911167366e-05, "loss": 1.5572, "step": 98150 }, { "epoch": 0.6169583406988391, "grad_norm": 5.670644283294678, "learning_rate": 1.589184681022271e-05, "loss": 1.4688, "step": 98160 }, { "epoch": 0.6170211930155363, "grad_norm": 7.587639331817627, "learning_rate": 1.5891427709278057e-05, "loss": 1.8769, "step": 98170 }, { "epoch": 0.6170840453322334, "grad_norm": 6.383285045623779, "learning_rate": 1.5891008608333404e-05, "loss": 1.8693, "step": 98180 }, { "epoch": 0.6171468976489305, "grad_norm": 7.242428779602051, "learning_rate": 1.589058950738875e-05, "loss": 1.7981, "step": 98190 }, { "epoch": 0.6172097499656276, "grad_norm": 7.480915546417236, "learning_rate": 1.5890170406444098e-05, "loss": 1.8373, "step": 98200 }, { "epoch": 0.6172726022823247, "grad_norm": 7.819182395935059, "learning_rate": 1.5889751305499445e-05, "loss": 1.5363, "step": 98210 }, { "epoch": 0.6173354545990218, "grad_norm": 5.920949935913086, "learning_rate": 1.588933220455479e-05, "loss": 1.6519, "step": 98220 }, { "epoch": 0.617398306915719, "grad_norm": 6.396178722381592, "learning_rate": 1.5888913103610136e-05, "loss": 1.564, "step": 98230 }, { "epoch": 0.6174611592324161, "grad_norm": 6.648460388183594, "learning_rate": 1.5888494002665483e-05, "loss": 1.7183, "step": 98240 }, { "epoch": 0.6175240115491132, "grad_norm": 6.459324359893799, "learning_rate": 1.588807490172083e-05, "loss": 1.7778, "step": 98250 }, { "epoch": 0.6175868638658103, "grad_norm": 7.500816345214844, "learning_rate": 1.5887655800776177e-05, "loss": 1.5668, "step": 98260 }, { "epoch": 0.6176497161825074, "grad_norm": 5.732392311096191, "learning_rate": 1.5887236699831524e-05, "loss": 1.6429, "step": 98270 }, { "epoch": 0.6177125684992045, "grad_norm": 7.986827373504639, "learning_rate": 1.588681759888687e-05, "loss": 1.8901, "step": 98280 }, { "epoch": 0.6177754208159016, "grad_norm": 7.052105903625488, "learning_rate": 1.5886398497942215e-05, "loss": 1.4003, "step": 98290 }, { "epoch": 0.6178382731325988, "grad_norm": 6.326847076416016, "learning_rate": 1.5885979396997562e-05, "loss": 1.5663, "step": 98300 }, { "epoch": 0.6179011254492959, "grad_norm": 7.511728763580322, "learning_rate": 1.588556029605291e-05, "loss": 1.7506, "step": 98310 }, { "epoch": 0.617963977765993, "grad_norm": 7.672304630279541, "learning_rate": 1.5885141195108256e-05, "loss": 1.8009, "step": 98320 }, { "epoch": 0.6180268300826901, "grad_norm": 6.804565906524658, "learning_rate": 1.58847220941636e-05, "loss": 1.5392, "step": 98330 }, { "epoch": 0.6180896823993872, "grad_norm": 7.108956813812256, "learning_rate": 1.5884302993218947e-05, "loss": 1.4983, "step": 98340 }, { "epoch": 0.6181525347160843, "grad_norm": 9.570054054260254, "learning_rate": 1.5883883892274294e-05, "loss": 1.577, "step": 98350 }, { "epoch": 0.6182153870327814, "grad_norm": 7.5288004875183105, "learning_rate": 1.588346479132964e-05, "loss": 1.862, "step": 98360 }, { "epoch": 0.6182782393494786, "grad_norm": 5.870203018188477, "learning_rate": 1.5883045690384988e-05, "loss": 1.8158, "step": 98370 }, { "epoch": 0.6183410916661757, "grad_norm": 6.705949783325195, "learning_rate": 1.5882626589440332e-05, "loss": 1.6715, "step": 98380 }, { "epoch": 0.6184039439828728, "grad_norm": 5.2269978523254395, "learning_rate": 1.588220748849568e-05, "loss": 1.5872, "step": 98390 }, { "epoch": 0.6184667962995699, "grad_norm": 7.804924011230469, "learning_rate": 1.5881788387551026e-05, "loss": 1.7103, "step": 98400 }, { "epoch": 0.6185296486162669, "grad_norm": 7.38302755355835, "learning_rate": 1.5881369286606373e-05, "loss": 1.7875, "step": 98410 }, { "epoch": 0.618592500932964, "grad_norm": 6.590409755706787, "learning_rate": 1.588095018566172e-05, "loss": 1.733, "step": 98420 }, { "epoch": 0.6186553532496611, "grad_norm": 7.335029125213623, "learning_rate": 1.5880531084717067e-05, "loss": 1.8107, "step": 98430 }, { "epoch": 0.6187182055663583, "grad_norm": 6.710123062133789, "learning_rate": 1.5880111983772414e-05, "loss": 1.926, "step": 98440 }, { "epoch": 0.6187810578830554, "grad_norm": 6.156457424163818, "learning_rate": 1.587969288282776e-05, "loss": 1.7721, "step": 98450 }, { "epoch": 0.6188439101997525, "grad_norm": 6.876556396484375, "learning_rate": 1.587927378188311e-05, "loss": 1.74, "step": 98460 }, { "epoch": 0.6189067625164496, "grad_norm": 7.142808437347412, "learning_rate": 1.5878854680938452e-05, "loss": 1.6317, "step": 98470 }, { "epoch": 0.6189696148331467, "grad_norm": 6.802892684936523, "learning_rate": 1.58784355799938e-05, "loss": 1.6254, "step": 98480 }, { "epoch": 0.6190324671498438, "grad_norm": 5.966985702514648, "learning_rate": 1.5878016479049146e-05, "loss": 1.7586, "step": 98490 }, { "epoch": 0.619095319466541, "grad_norm": 6.2259321212768555, "learning_rate": 1.5877597378104493e-05, "loss": 1.7226, "step": 98500 }, { "epoch": 0.6191581717832381, "grad_norm": 6.297909259796143, "learning_rate": 1.5877178277159837e-05, "loss": 1.7483, "step": 98510 }, { "epoch": 0.6192210240999352, "grad_norm": 6.961431980133057, "learning_rate": 1.5876759176215184e-05, "loss": 1.606, "step": 98520 }, { "epoch": 0.6192838764166323, "grad_norm": 6.00648307800293, "learning_rate": 1.587634007527053e-05, "loss": 1.4804, "step": 98530 }, { "epoch": 0.6193467287333294, "grad_norm": 7.5596771240234375, "learning_rate": 1.5875920974325878e-05, "loss": 1.7532, "step": 98540 }, { "epoch": 0.6194095810500265, "grad_norm": 6.770756244659424, "learning_rate": 1.5875501873381225e-05, "loss": 1.6694, "step": 98550 }, { "epoch": 0.6194724333667236, "grad_norm": 7.217389106750488, "learning_rate": 1.587508277243657e-05, "loss": 1.4545, "step": 98560 }, { "epoch": 0.6195352856834208, "grad_norm": 7.36926794052124, "learning_rate": 1.5874663671491916e-05, "loss": 1.67, "step": 98570 }, { "epoch": 0.6195981380001179, "grad_norm": 6.236382961273193, "learning_rate": 1.5874244570547263e-05, "loss": 1.7156, "step": 98580 }, { "epoch": 0.619660990316815, "grad_norm": 6.446400165557861, "learning_rate": 1.587382546960261e-05, "loss": 1.6656, "step": 98590 }, { "epoch": 0.6197238426335121, "grad_norm": 6.381457328796387, "learning_rate": 1.5873406368657954e-05, "loss": 1.8684, "step": 98600 }, { "epoch": 0.6197866949502092, "grad_norm": 7.383438587188721, "learning_rate": 1.58729872677133e-05, "loss": 1.4665, "step": 98610 }, { "epoch": 0.6198495472669063, "grad_norm": 6.446352958679199, "learning_rate": 1.5872568166768648e-05, "loss": 1.7084, "step": 98620 }, { "epoch": 0.6199123995836034, "grad_norm": 6.78460168838501, "learning_rate": 1.5872149065823995e-05, "loss": 1.5823, "step": 98630 }, { "epoch": 0.6199752519003006, "grad_norm": 6.465644836425781, "learning_rate": 1.5871729964879342e-05, "loss": 1.8514, "step": 98640 }, { "epoch": 0.6200381042169977, "grad_norm": 6.511555194854736, "learning_rate": 1.587131086393469e-05, "loss": 1.6222, "step": 98650 }, { "epoch": 0.6201009565336947, "grad_norm": 7.202386856079102, "learning_rate": 1.5870891762990036e-05, "loss": 1.5518, "step": 98660 }, { "epoch": 0.6201638088503918, "grad_norm": 6.277887344360352, "learning_rate": 1.5870472662045383e-05, "loss": 1.7382, "step": 98670 }, { "epoch": 0.6202266611670889, "grad_norm": 7.642271518707275, "learning_rate": 1.587005356110073e-05, "loss": 1.5345, "step": 98680 }, { "epoch": 0.620289513483786, "grad_norm": 6.236955642700195, "learning_rate": 1.5869634460156074e-05, "loss": 1.6613, "step": 98690 }, { "epoch": 0.6203523658004831, "grad_norm": 6.624298572540283, "learning_rate": 1.586921535921142e-05, "loss": 1.7777, "step": 98700 }, { "epoch": 0.6204152181171803, "grad_norm": 6.969363212585449, "learning_rate": 1.5868796258266768e-05, "loss": 1.5978, "step": 98710 }, { "epoch": 0.6204780704338774, "grad_norm": 6.063605308532715, "learning_rate": 1.5868377157322115e-05, "loss": 1.5309, "step": 98720 }, { "epoch": 0.6205409227505745, "grad_norm": 8.052339553833008, "learning_rate": 1.586795805637746e-05, "loss": 1.7814, "step": 98730 }, { "epoch": 0.6206037750672716, "grad_norm": 6.75014066696167, "learning_rate": 1.5867538955432806e-05, "loss": 1.6291, "step": 98740 }, { "epoch": 0.6206666273839687, "grad_norm": 6.427272319793701, "learning_rate": 1.5867119854488153e-05, "loss": 1.5339, "step": 98750 }, { "epoch": 0.6207294797006658, "grad_norm": 7.240400791168213, "learning_rate": 1.58667007535435e-05, "loss": 1.8073, "step": 98760 }, { "epoch": 0.620792332017363, "grad_norm": 7.051139831542969, "learning_rate": 1.5866281652598847e-05, "loss": 1.5311, "step": 98770 }, { "epoch": 0.6208551843340601, "grad_norm": 7.380163192749023, "learning_rate": 1.586586255165419e-05, "loss": 1.6614, "step": 98780 }, { "epoch": 0.6209180366507572, "grad_norm": 6.821539402008057, "learning_rate": 1.5865443450709538e-05, "loss": 1.6406, "step": 98790 }, { "epoch": 0.6209808889674543, "grad_norm": 6.191500663757324, "learning_rate": 1.5865024349764885e-05, "loss": 1.5525, "step": 98800 }, { "epoch": 0.6210437412841514, "grad_norm": 6.564124584197998, "learning_rate": 1.5864605248820232e-05, "loss": 1.6494, "step": 98810 }, { "epoch": 0.6211065936008485, "grad_norm": 7.13649845123291, "learning_rate": 1.586418614787558e-05, "loss": 1.6922, "step": 98820 }, { "epoch": 0.6211694459175456, "grad_norm": 5.814675331115723, "learning_rate": 1.5863767046930926e-05, "loss": 1.7732, "step": 98830 }, { "epoch": 0.6212322982342428, "grad_norm": 6.690029144287109, "learning_rate": 1.5863347945986273e-05, "loss": 1.5637, "step": 98840 }, { "epoch": 0.6212951505509399, "grad_norm": 6.871272087097168, "learning_rate": 1.5862928845041617e-05, "loss": 1.5397, "step": 98850 }, { "epoch": 0.621358002867637, "grad_norm": 5.294637680053711, "learning_rate": 1.5862509744096964e-05, "loss": 1.5828, "step": 98860 }, { "epoch": 0.6214208551843341, "grad_norm": 7.080363750457764, "learning_rate": 1.586209064315231e-05, "loss": 1.8897, "step": 98870 }, { "epoch": 0.6214837075010312, "grad_norm": 6.606733798980713, "learning_rate": 1.5861671542207658e-05, "loss": 1.6083, "step": 98880 }, { "epoch": 0.6215465598177283, "grad_norm": 6.138832092285156, "learning_rate": 1.5861252441263005e-05, "loss": 1.7771, "step": 98890 }, { "epoch": 0.6216094121344254, "grad_norm": 5.845148086547852, "learning_rate": 1.5860833340318352e-05, "loss": 1.5498, "step": 98900 }, { "epoch": 0.6216722644511226, "grad_norm": 7.054131984710693, "learning_rate": 1.5860414239373696e-05, "loss": 1.5525, "step": 98910 }, { "epoch": 0.6217351167678196, "grad_norm": 6.092016696929932, "learning_rate": 1.5859995138429043e-05, "loss": 1.7413, "step": 98920 }, { "epoch": 0.6217979690845167, "grad_norm": 7.949788570404053, "learning_rate": 1.585957603748439e-05, "loss": 1.8284, "step": 98930 }, { "epoch": 0.6218608214012138, "grad_norm": 6.822504997253418, "learning_rate": 1.5859156936539737e-05, "loss": 1.7476, "step": 98940 }, { "epoch": 0.6219236737179109, "grad_norm": 6.951657295227051, "learning_rate": 1.585873783559508e-05, "loss": 1.7433, "step": 98950 }, { "epoch": 0.621986526034608, "grad_norm": 7.07517671585083, "learning_rate": 1.5858318734650428e-05, "loss": 1.9471, "step": 98960 }, { "epoch": 0.6220493783513051, "grad_norm": 6.086246967315674, "learning_rate": 1.5857899633705775e-05, "loss": 1.4152, "step": 98970 }, { "epoch": 0.6221122306680023, "grad_norm": 6.832052230834961, "learning_rate": 1.5857480532761122e-05, "loss": 1.4304, "step": 98980 }, { "epoch": 0.6221750829846994, "grad_norm": 6.731996059417725, "learning_rate": 1.585706143181647e-05, "loss": 1.6459, "step": 98990 }, { "epoch": 0.6222379353013965, "grad_norm": 6.495983123779297, "learning_rate": 1.5856642330871813e-05, "loss": 1.6378, "step": 99000 }, { "epoch": 0.6223007876180936, "grad_norm": 6.157533645629883, "learning_rate": 1.585622322992716e-05, "loss": 1.7741, "step": 99010 }, { "epoch": 0.6223636399347907, "grad_norm": 7.105135440826416, "learning_rate": 1.5855804128982507e-05, "loss": 1.8572, "step": 99020 }, { "epoch": 0.6224264922514878, "grad_norm": 6.373230457305908, "learning_rate": 1.5855385028037854e-05, "loss": 1.7724, "step": 99030 }, { "epoch": 0.6224893445681849, "grad_norm": 6.70004415512085, "learning_rate": 1.58549659270932e-05, "loss": 1.6024, "step": 99040 }, { "epoch": 0.6225521968848821, "grad_norm": 6.644879341125488, "learning_rate": 1.5854588736243012e-05, "loss": 1.6875, "step": 99050 }, { "epoch": 0.6226150492015792, "grad_norm": 6.124501705169678, "learning_rate": 1.585416963529836e-05, "loss": 1.5633, "step": 99060 }, { "epoch": 0.6226779015182763, "grad_norm": 5.908251762390137, "learning_rate": 1.5853750534353707e-05, "loss": 1.5969, "step": 99070 }, { "epoch": 0.6227407538349734, "grad_norm": 6.524896144866943, "learning_rate": 1.585333143340905e-05, "loss": 1.8025, "step": 99080 }, { "epoch": 0.6228036061516705, "grad_norm": 5.9396562576293945, "learning_rate": 1.5852912332464397e-05, "loss": 1.5912, "step": 99090 }, { "epoch": 0.6228664584683676, "grad_norm": 9.76508903503418, "learning_rate": 1.5852493231519744e-05, "loss": 1.4613, "step": 99100 }, { "epoch": 0.6229293107850647, "grad_norm": 6.710978031158447, "learning_rate": 1.585207413057509e-05, "loss": 1.7373, "step": 99110 }, { "epoch": 0.6229921631017619, "grad_norm": 6.091090202331543, "learning_rate": 1.585165502963044e-05, "loss": 1.4173, "step": 99120 }, { "epoch": 0.623055015418459, "grad_norm": 6.581202030181885, "learning_rate": 1.5851235928685786e-05, "loss": 1.7805, "step": 99130 }, { "epoch": 0.6231178677351561, "grad_norm": 6.508041858673096, "learning_rate": 1.5850816827741133e-05, "loss": 1.8844, "step": 99140 }, { "epoch": 0.6231807200518532, "grad_norm": 7.098252773284912, "learning_rate": 1.585039772679648e-05, "loss": 1.6756, "step": 99150 }, { "epoch": 0.6232435723685503, "grad_norm": 6.896302700042725, "learning_rate": 1.5849978625851823e-05, "loss": 1.5964, "step": 99160 }, { "epoch": 0.6233064246852473, "grad_norm": 7.160173416137695, "learning_rate": 1.584955952490717e-05, "loss": 1.5011, "step": 99170 }, { "epoch": 0.6233692770019444, "grad_norm": 6.765491008758545, "learning_rate": 1.5849140423962518e-05, "loss": 1.7538, "step": 99180 }, { "epoch": 0.6234321293186416, "grad_norm": 7.4504499435424805, "learning_rate": 1.5848721323017865e-05, "loss": 1.856, "step": 99190 }, { "epoch": 0.6234949816353387, "grad_norm": 7.950074195861816, "learning_rate": 1.5848302222073212e-05, "loss": 1.5604, "step": 99200 }, { "epoch": 0.6235578339520358, "grad_norm": 6.2002482414245605, "learning_rate": 1.5847883121128555e-05, "loss": 1.7324, "step": 99210 }, { "epoch": 0.6236206862687329, "grad_norm": 5.960773468017578, "learning_rate": 1.5847464020183903e-05, "loss": 1.5907, "step": 99220 }, { "epoch": 0.62368353858543, "grad_norm": 5.5091094970703125, "learning_rate": 1.584704491923925e-05, "loss": 1.6034, "step": 99230 }, { "epoch": 0.6237463909021271, "grad_norm": 6.118611812591553, "learning_rate": 1.5846625818294597e-05, "loss": 1.6115, "step": 99240 }, { "epoch": 0.6238092432188242, "grad_norm": 6.906420707702637, "learning_rate": 1.584620671734994e-05, "loss": 1.7667, "step": 99250 }, { "epoch": 0.6238720955355214, "grad_norm": 6.723021507263184, "learning_rate": 1.5845787616405287e-05, "loss": 1.4951, "step": 99260 }, { "epoch": 0.6239349478522185, "grad_norm": 6.305889129638672, "learning_rate": 1.5845368515460634e-05, "loss": 1.5258, "step": 99270 }, { "epoch": 0.6239978001689156, "grad_norm": 6.666172981262207, "learning_rate": 1.584494941451598e-05, "loss": 1.7402, "step": 99280 }, { "epoch": 0.6240606524856127, "grad_norm": 6.768346786499023, "learning_rate": 1.584453031357133e-05, "loss": 1.8403, "step": 99290 }, { "epoch": 0.6241235048023098, "grad_norm": 5.794680118560791, "learning_rate": 1.5844111212626672e-05, "loss": 1.5025, "step": 99300 }, { "epoch": 0.6241863571190069, "grad_norm": 7.722352981567383, "learning_rate": 1.584369211168202e-05, "loss": 1.5967, "step": 99310 }, { "epoch": 0.6242492094357041, "grad_norm": 6.2834906578063965, "learning_rate": 1.5843273010737366e-05, "loss": 1.7963, "step": 99320 }, { "epoch": 0.6243120617524012, "grad_norm": 6.741940975189209, "learning_rate": 1.5842853909792714e-05, "loss": 1.4822, "step": 99330 }, { "epoch": 0.6243749140690983, "grad_norm": 6.804315090179443, "learning_rate": 1.584243480884806e-05, "loss": 1.7334, "step": 99340 }, { "epoch": 0.6244377663857954, "grad_norm": 7.085864067077637, "learning_rate": 1.5842015707903408e-05, "loss": 1.7177, "step": 99350 }, { "epoch": 0.6245006187024925, "grad_norm": 7.832089900970459, "learning_rate": 1.5841596606958755e-05, "loss": 1.5916, "step": 99360 }, { "epoch": 0.6245634710191896, "grad_norm": 6.379815101623535, "learning_rate": 1.5841177506014102e-05, "loss": 1.7412, "step": 99370 }, { "epoch": 0.6246263233358867, "grad_norm": 6.425118923187256, "learning_rate": 1.584075840506945e-05, "loss": 1.5342, "step": 99380 }, { "epoch": 0.6246891756525839, "grad_norm": 6.985279083251953, "learning_rate": 1.5840339304124793e-05, "loss": 1.8981, "step": 99390 }, { "epoch": 0.624752027969281, "grad_norm": 5.208855152130127, "learning_rate": 1.583992020318014e-05, "loss": 1.6735, "step": 99400 }, { "epoch": 0.6248148802859781, "grad_norm": 5.808994770050049, "learning_rate": 1.5839501102235487e-05, "loss": 1.4748, "step": 99410 }, { "epoch": 0.6248777326026752, "grad_norm": 6.836221694946289, "learning_rate": 1.5839082001290834e-05, "loss": 1.739, "step": 99420 }, { "epoch": 0.6249405849193722, "grad_norm": 6.197317123413086, "learning_rate": 1.5838662900346177e-05, "loss": 1.5554, "step": 99430 }, { "epoch": 0.6250034372360693, "grad_norm": 8.52884578704834, "learning_rate": 1.5838243799401525e-05, "loss": 1.7089, "step": 99440 }, { "epoch": 0.6250662895527664, "grad_norm": 6.217985153198242, "learning_rate": 1.583782469845687e-05, "loss": 1.7057, "step": 99450 }, { "epoch": 0.6251291418694636, "grad_norm": 6.842314720153809, "learning_rate": 1.583740559751222e-05, "loss": 1.776, "step": 99460 }, { "epoch": 0.6251919941861607, "grad_norm": 6.270942211151123, "learning_rate": 1.5836986496567562e-05, "loss": 1.7268, "step": 99470 }, { "epoch": 0.6252548465028578, "grad_norm": 6.600823879241943, "learning_rate": 1.583656739562291e-05, "loss": 1.3532, "step": 99480 }, { "epoch": 0.6253176988195549, "grad_norm": 6.996237754821777, "learning_rate": 1.5836148294678256e-05, "loss": 1.7153, "step": 99490 }, { "epoch": 0.625380551136252, "grad_norm": 6.548280239105225, "learning_rate": 1.5835729193733604e-05, "loss": 1.8595, "step": 99500 }, { "epoch": 0.6254434034529491, "grad_norm": 5.533222198486328, "learning_rate": 1.583531009278895e-05, "loss": 1.4936, "step": 99510 }, { "epoch": 0.6255062557696462, "grad_norm": 6.712929725646973, "learning_rate": 1.5834890991844298e-05, "loss": 1.8575, "step": 99520 }, { "epoch": 0.6255691080863434, "grad_norm": 6.34805154800415, "learning_rate": 1.5834471890899645e-05, "loss": 1.7679, "step": 99530 }, { "epoch": 0.6256319604030405, "grad_norm": 6.5129218101501465, "learning_rate": 1.583405278995499e-05, "loss": 1.6556, "step": 99540 }, { "epoch": 0.6256948127197376, "grad_norm": 6.358635902404785, "learning_rate": 1.5833633689010336e-05, "loss": 1.7738, "step": 99550 }, { "epoch": 0.6257576650364347, "grad_norm": 6.160097122192383, "learning_rate": 1.5833214588065683e-05, "loss": 1.5183, "step": 99560 }, { "epoch": 0.6258205173531318, "grad_norm": 7.927013397216797, "learning_rate": 1.583279548712103e-05, "loss": 1.751, "step": 99570 }, { "epoch": 0.6258833696698289, "grad_norm": 5.808539390563965, "learning_rate": 1.5832376386176377e-05, "loss": 1.6338, "step": 99580 }, { "epoch": 0.625946221986526, "grad_norm": 5.886746883392334, "learning_rate": 1.5831957285231724e-05, "loss": 1.8113, "step": 99590 }, { "epoch": 0.6260090743032232, "grad_norm": 6.869828224182129, "learning_rate": 1.583153818428707e-05, "loss": 1.7536, "step": 99600 }, { "epoch": 0.6260719266199203, "grad_norm": 6.9043097496032715, "learning_rate": 1.5831119083342415e-05, "loss": 1.5779, "step": 99610 }, { "epoch": 0.6261347789366174, "grad_norm": 7.80623197555542, "learning_rate": 1.583069998239776e-05, "loss": 1.6508, "step": 99620 }, { "epoch": 0.6261976312533145, "grad_norm": 7.624624729156494, "learning_rate": 1.583028088145311e-05, "loss": 1.7147, "step": 99630 }, { "epoch": 0.6262604835700116, "grad_norm": 7.440316677093506, "learning_rate": 1.5829861780508456e-05, "loss": 1.6111, "step": 99640 }, { "epoch": 0.6263233358867087, "grad_norm": 7.38603401184082, "learning_rate": 1.58294426795638e-05, "loss": 1.5613, "step": 99650 }, { "epoch": 0.6263861882034059, "grad_norm": 7.0097246170043945, "learning_rate": 1.5829023578619147e-05, "loss": 1.867, "step": 99660 }, { "epoch": 0.626449040520103, "grad_norm": 6.874953269958496, "learning_rate": 1.5828604477674494e-05, "loss": 1.549, "step": 99670 }, { "epoch": 0.6265118928368, "grad_norm": 6.660768508911133, "learning_rate": 1.582818537672984e-05, "loss": 1.6114, "step": 99680 }, { "epoch": 0.6265747451534971, "grad_norm": 5.996919631958008, "learning_rate": 1.5827766275785188e-05, "loss": 1.6485, "step": 99690 }, { "epoch": 0.6266375974701942, "grad_norm": 6.453237533569336, "learning_rate": 1.582734717484053e-05, "loss": 1.6214, "step": 99700 }, { "epoch": 0.6267004497868913, "grad_norm": 8.109010696411133, "learning_rate": 1.582692807389588e-05, "loss": 1.769, "step": 99710 }, { "epoch": 0.6267633021035884, "grad_norm": 6.3128228187561035, "learning_rate": 1.5826508972951226e-05, "loss": 1.8159, "step": 99720 }, { "epoch": 0.6268261544202856, "grad_norm": 6.217896461486816, "learning_rate": 1.5826089872006573e-05, "loss": 1.4919, "step": 99730 }, { "epoch": 0.6268890067369827, "grad_norm": 7.381162166595459, "learning_rate": 1.582567077106192e-05, "loss": 1.7286, "step": 99740 }, { "epoch": 0.6269518590536798, "grad_norm": 7.417752265930176, "learning_rate": 1.5825251670117267e-05, "loss": 1.6283, "step": 99750 }, { "epoch": 0.6270147113703769, "grad_norm": 6.68156099319458, "learning_rate": 1.5824832569172614e-05, "loss": 1.6258, "step": 99760 }, { "epoch": 0.627077563687074, "grad_norm": 6.632552146911621, "learning_rate": 1.582441346822796e-05, "loss": 1.8614, "step": 99770 }, { "epoch": 0.6271404160037711, "grad_norm": 7.476672649383545, "learning_rate": 1.5823994367283305e-05, "loss": 1.8872, "step": 99780 }, { "epoch": 0.6272032683204682, "grad_norm": 6.832566738128662, "learning_rate": 1.582357526633865e-05, "loss": 1.7, "step": 99790 }, { "epoch": 0.6272661206371654, "grad_norm": 7.108794689178467, "learning_rate": 1.5823156165394e-05, "loss": 1.7619, "step": 99800 }, { "epoch": 0.6273289729538625, "grad_norm": 9.079567909240723, "learning_rate": 1.5822737064449346e-05, "loss": 1.7159, "step": 99810 }, { "epoch": 0.6273918252705596, "grad_norm": 6.384576320648193, "learning_rate": 1.5822317963504693e-05, "loss": 1.7503, "step": 99820 }, { "epoch": 0.6274546775872567, "grad_norm": 7.277607440948486, "learning_rate": 1.5821898862560037e-05, "loss": 1.7782, "step": 99830 }, { "epoch": 0.6275175299039538, "grad_norm": 6.08124303817749, "learning_rate": 1.5821479761615384e-05, "loss": 1.6447, "step": 99840 }, { "epoch": 0.6275803822206509, "grad_norm": 8.458341598510742, "learning_rate": 1.582106066067073e-05, "loss": 1.6659, "step": 99850 }, { "epoch": 0.627643234537348, "grad_norm": 7.769322872161865, "learning_rate": 1.5820641559726078e-05, "loss": 1.612, "step": 99860 }, { "epoch": 0.6277060868540452, "grad_norm": 6.998531818389893, "learning_rate": 1.582022245878142e-05, "loss": 1.3814, "step": 99870 }, { "epoch": 0.6277689391707423, "grad_norm": 6.404359340667725, "learning_rate": 1.581980335783677e-05, "loss": 1.6981, "step": 99880 }, { "epoch": 0.6278317914874394, "grad_norm": 6.724607944488525, "learning_rate": 1.5819384256892116e-05, "loss": 1.6364, "step": 99890 }, { "epoch": 0.6278946438041365, "grad_norm": 7.201076030731201, "learning_rate": 1.5818965155947463e-05, "loss": 1.7632, "step": 99900 }, { "epoch": 0.6279574961208336, "grad_norm": 6.761590003967285, "learning_rate": 1.581854605500281e-05, "loss": 1.4493, "step": 99910 }, { "epoch": 0.6280203484375307, "grad_norm": 6.595172882080078, "learning_rate": 1.5818126954058153e-05, "loss": 1.5363, "step": 99920 }, { "epoch": 0.6280832007542279, "grad_norm": 6.319594383239746, "learning_rate": 1.58177078531135e-05, "loss": 1.5987, "step": 99930 }, { "epoch": 0.6281460530709249, "grad_norm": 5.454696178436279, "learning_rate": 1.5817288752168848e-05, "loss": 1.5252, "step": 99940 }, { "epoch": 0.628208905387622, "grad_norm": 7.6446919441223145, "learning_rate": 1.5816869651224195e-05, "loss": 1.9102, "step": 99950 }, { "epoch": 0.6282717577043191, "grad_norm": 9.58559799194336, "learning_rate": 1.5816450550279542e-05, "loss": 1.6907, "step": 99960 }, { "epoch": 0.6283346100210162, "grad_norm": 6.981563091278076, "learning_rate": 1.581603144933489e-05, "loss": 1.7332, "step": 99970 }, { "epoch": 0.6283974623377133, "grad_norm": 5.508819580078125, "learning_rate": 1.5815612348390236e-05, "loss": 1.6118, "step": 99980 }, { "epoch": 0.6284603146544104, "grad_norm": 6.374399662017822, "learning_rate": 1.5815193247445583e-05, "loss": 1.4557, "step": 99990 }, { "epoch": 0.6285231669711075, "grad_norm": 7.851498603820801, "learning_rate": 1.581477414650093e-05, "loss": 1.6887, "step": 100000 }, { "epoch": 0.6285860192878047, "grad_norm": 6.407346248626709, "learning_rate": 1.5814355045556274e-05, "loss": 1.9452, "step": 100010 }, { "epoch": 0.6286488716045018, "grad_norm": 5.7401933670043945, "learning_rate": 1.581393594461162e-05, "loss": 1.8405, "step": 100020 }, { "epoch": 0.6287117239211989, "grad_norm": 9.775955200195312, "learning_rate": 1.5813516843666968e-05, "loss": 1.6058, "step": 100030 }, { "epoch": 0.628774576237896, "grad_norm": 6.347497463226318, "learning_rate": 1.5813097742722315e-05, "loss": 1.8753, "step": 100040 }, { "epoch": 0.6288374285545931, "grad_norm": 6.3936052322387695, "learning_rate": 1.581267864177766e-05, "loss": 1.4589, "step": 100050 }, { "epoch": 0.6289002808712902, "grad_norm": 5.969712257385254, "learning_rate": 1.5812259540833006e-05, "loss": 1.4234, "step": 100060 }, { "epoch": 0.6289631331879874, "grad_norm": 7.093703269958496, "learning_rate": 1.5811840439888353e-05, "loss": 1.7106, "step": 100070 }, { "epoch": 0.6290259855046845, "grad_norm": 7.396203517913818, "learning_rate": 1.58114213389437e-05, "loss": 1.7015, "step": 100080 }, { "epoch": 0.6290888378213816, "grad_norm": 6.682152271270752, "learning_rate": 1.5811002237999043e-05, "loss": 1.6464, "step": 100090 }, { "epoch": 0.6291516901380787, "grad_norm": 6.390876770019531, "learning_rate": 1.581058313705439e-05, "loss": 1.6781, "step": 100100 }, { "epoch": 0.6292145424547758, "grad_norm": 6.433990001678467, "learning_rate": 1.5810164036109738e-05, "loss": 1.4025, "step": 100110 }, { "epoch": 0.6292773947714729, "grad_norm": 6.061580181121826, "learning_rate": 1.5809744935165085e-05, "loss": 1.5259, "step": 100120 }, { "epoch": 0.62934024708817, "grad_norm": 6.715423107147217, "learning_rate": 1.5809325834220432e-05, "loss": 1.7404, "step": 100130 }, { "epoch": 0.6294030994048672, "grad_norm": 6.707705020904541, "learning_rate": 1.580890673327578e-05, "loss": 1.8089, "step": 100140 }, { "epoch": 0.6294659517215643, "grad_norm": 6.904034614562988, "learning_rate": 1.5808487632331126e-05, "loss": 1.8503, "step": 100150 }, { "epoch": 0.6295288040382614, "grad_norm": 6.170143127441406, "learning_rate": 1.580806853138647e-05, "loss": 1.4703, "step": 100160 }, { "epoch": 0.6295916563549585, "grad_norm": 6.437007427215576, "learning_rate": 1.5807649430441817e-05, "loss": 1.4874, "step": 100170 }, { "epoch": 0.6296545086716556, "grad_norm": 6.416689395904541, "learning_rate": 1.5807230329497164e-05, "loss": 1.9164, "step": 100180 }, { "epoch": 0.6297173609883526, "grad_norm": 7.378628730773926, "learning_rate": 1.580681122855251e-05, "loss": 1.6999, "step": 100190 }, { "epoch": 0.6297802133050497, "grad_norm": 7.50117301940918, "learning_rate": 1.5806392127607858e-05, "loss": 1.6007, "step": 100200 }, { "epoch": 0.6298430656217469, "grad_norm": 5.30483865737915, "learning_rate": 1.5805973026663205e-05, "loss": 1.7694, "step": 100210 }, { "epoch": 0.629905917938444, "grad_norm": 5.686487674713135, "learning_rate": 1.5805553925718552e-05, "loss": 1.5099, "step": 100220 }, { "epoch": 0.6299687702551411, "grad_norm": 7.767488479614258, "learning_rate": 1.5805134824773896e-05, "loss": 1.9495, "step": 100230 }, { "epoch": 0.6300316225718382, "grad_norm": 8.49234390258789, "learning_rate": 1.5804715723829243e-05, "loss": 1.7609, "step": 100240 }, { "epoch": 0.6300944748885353, "grad_norm": 5.479924201965332, "learning_rate": 1.580429662288459e-05, "loss": 1.6703, "step": 100250 }, { "epoch": 0.6301573272052324, "grad_norm": 7.043915271759033, "learning_rate": 1.5803877521939937e-05, "loss": 1.6396, "step": 100260 }, { "epoch": 0.6302201795219295, "grad_norm": 6.031806468963623, "learning_rate": 1.580345842099528e-05, "loss": 1.6458, "step": 100270 }, { "epoch": 0.6302830318386267, "grad_norm": 7.834958553314209, "learning_rate": 1.5803039320050628e-05, "loss": 1.6333, "step": 100280 }, { "epoch": 0.6303458841553238, "grad_norm": 6.062341690063477, "learning_rate": 1.5802620219105975e-05, "loss": 1.8101, "step": 100290 }, { "epoch": 0.6304087364720209, "grad_norm": 7.53291130065918, "learning_rate": 1.5802201118161322e-05, "loss": 1.8961, "step": 100300 }, { "epoch": 0.630471588788718, "grad_norm": 7.180240154266357, "learning_rate": 1.580178201721667e-05, "loss": 1.8702, "step": 100310 }, { "epoch": 0.6305344411054151, "grad_norm": 6.761497497558594, "learning_rate": 1.5801362916272013e-05, "loss": 1.6263, "step": 100320 }, { "epoch": 0.6305972934221122, "grad_norm": 7.409494400024414, "learning_rate": 1.580094381532736e-05, "loss": 1.6978, "step": 100330 }, { "epoch": 0.6306601457388094, "grad_norm": 7.477789878845215, "learning_rate": 1.5800524714382707e-05, "loss": 1.6104, "step": 100340 }, { "epoch": 0.6307229980555065, "grad_norm": 6.731983661651611, "learning_rate": 1.5800105613438054e-05, "loss": 1.6432, "step": 100350 }, { "epoch": 0.6307858503722036, "grad_norm": 7.559552192687988, "learning_rate": 1.57996865124934e-05, "loss": 1.6665, "step": 100360 }, { "epoch": 0.6308487026889007, "grad_norm": 6.713873386383057, "learning_rate": 1.5799267411548748e-05, "loss": 1.6159, "step": 100370 }, { "epoch": 0.6309115550055978, "grad_norm": 6.6399078369140625, "learning_rate": 1.5798848310604095e-05, "loss": 1.5398, "step": 100380 }, { "epoch": 0.6309744073222949, "grad_norm": 6.55191707611084, "learning_rate": 1.5798429209659442e-05, "loss": 1.5245, "step": 100390 }, { "epoch": 0.631037259638992, "grad_norm": 6.504763126373291, "learning_rate": 1.5798010108714786e-05, "loss": 1.6908, "step": 100400 }, { "epoch": 0.6311001119556892, "grad_norm": 5.631613731384277, "learning_rate": 1.5797591007770133e-05, "loss": 1.5776, "step": 100410 }, { "epoch": 0.6311629642723863, "grad_norm": 6.911037921905518, "learning_rate": 1.579717190682548e-05, "loss": 1.6871, "step": 100420 }, { "epoch": 0.6312258165890834, "grad_norm": 6.991258144378662, "learning_rate": 1.5796752805880827e-05, "loss": 1.7642, "step": 100430 }, { "epoch": 0.6312886689057805, "grad_norm": 7.394499778747559, "learning_rate": 1.5796333704936174e-05, "loss": 1.7376, "step": 100440 }, { "epoch": 0.6313515212224775, "grad_norm": 6.803630828857422, "learning_rate": 1.5795914603991518e-05, "loss": 1.6368, "step": 100450 }, { "epoch": 0.6314143735391746, "grad_norm": 6.7257914543151855, "learning_rate": 1.5795495503046865e-05, "loss": 1.7314, "step": 100460 }, { "epoch": 0.6314772258558717, "grad_norm": 7.259373188018799, "learning_rate": 1.5795076402102212e-05, "loss": 1.8811, "step": 100470 }, { "epoch": 0.6315400781725689, "grad_norm": 7.685807704925537, "learning_rate": 1.579465730115756e-05, "loss": 1.6636, "step": 100480 }, { "epoch": 0.631602930489266, "grad_norm": 6.555708885192871, "learning_rate": 1.5794238200212903e-05, "loss": 1.5783, "step": 100490 }, { "epoch": 0.6316657828059631, "grad_norm": 5.685708045959473, "learning_rate": 1.579381909926825e-05, "loss": 1.5042, "step": 100500 }, { "epoch": 0.6317286351226602, "grad_norm": 6.318507194519043, "learning_rate": 1.5793399998323597e-05, "loss": 1.5881, "step": 100510 }, { "epoch": 0.6317914874393573, "grad_norm": 6.510059356689453, "learning_rate": 1.5792980897378944e-05, "loss": 1.629, "step": 100520 }, { "epoch": 0.6318543397560544, "grad_norm": 6.250816822052002, "learning_rate": 1.579256179643429e-05, "loss": 1.7354, "step": 100530 }, { "epoch": 0.6319171920727515, "grad_norm": 7.057291030883789, "learning_rate": 1.5792142695489635e-05, "loss": 1.5878, "step": 100540 }, { "epoch": 0.6319800443894487, "grad_norm": 6.631834983825684, "learning_rate": 1.579172359454498e-05, "loss": 1.6387, "step": 100550 }, { "epoch": 0.6320428967061458, "grad_norm": 6.286482810974121, "learning_rate": 1.579130449360033e-05, "loss": 1.4187, "step": 100560 }, { "epoch": 0.6321057490228429, "grad_norm": 6.1507368087768555, "learning_rate": 1.5790885392655676e-05, "loss": 1.681, "step": 100570 }, { "epoch": 0.63216860133954, "grad_norm": 6.44645881652832, "learning_rate": 1.5790466291711023e-05, "loss": 1.5956, "step": 100580 }, { "epoch": 0.6322314536562371, "grad_norm": 6.74025297164917, "learning_rate": 1.579004719076637e-05, "loss": 1.5966, "step": 100590 }, { "epoch": 0.6322943059729342, "grad_norm": 6.204047203063965, "learning_rate": 1.5789628089821717e-05, "loss": 1.5122, "step": 100600 }, { "epoch": 0.6323571582896313, "grad_norm": 6.540848731994629, "learning_rate": 1.5789208988877064e-05, "loss": 1.7332, "step": 100610 }, { "epoch": 0.6324200106063285, "grad_norm": 5.2913737297058105, "learning_rate": 1.578878988793241e-05, "loss": 1.7636, "step": 100620 }, { "epoch": 0.6324828629230256, "grad_norm": 7.495303630828857, "learning_rate": 1.5788370786987755e-05, "loss": 1.7023, "step": 100630 }, { "epoch": 0.6325457152397227, "grad_norm": 6.820461750030518, "learning_rate": 1.5787951686043102e-05, "loss": 1.5975, "step": 100640 }, { "epoch": 0.6326085675564198, "grad_norm": 5.557898044586182, "learning_rate": 1.578753258509845e-05, "loss": 1.3953, "step": 100650 }, { "epoch": 0.6326714198731169, "grad_norm": 6.902298927307129, "learning_rate": 1.5787113484153796e-05, "loss": 1.7426, "step": 100660 }, { "epoch": 0.632734272189814, "grad_norm": 6.433447360992432, "learning_rate": 1.578669438320914e-05, "loss": 1.756, "step": 100670 }, { "epoch": 0.6327971245065112, "grad_norm": 6.460206508636475, "learning_rate": 1.5786275282264487e-05, "loss": 1.7636, "step": 100680 }, { "epoch": 0.6328599768232083, "grad_norm": 6.105504512786865, "learning_rate": 1.5785856181319834e-05, "loss": 1.7044, "step": 100690 }, { "epoch": 0.6329228291399053, "grad_norm": 6.065561294555664, "learning_rate": 1.578543708037518e-05, "loss": 1.5518, "step": 100700 }, { "epoch": 0.6329856814566024, "grad_norm": 6.046292304992676, "learning_rate": 1.5785017979430525e-05, "loss": 1.6447, "step": 100710 }, { "epoch": 0.6330485337732995, "grad_norm": 6.256049633026123, "learning_rate": 1.578459887848587e-05, "loss": 1.8812, "step": 100720 }, { "epoch": 0.6331113860899966, "grad_norm": 5.4630818367004395, "learning_rate": 1.578417977754122e-05, "loss": 1.5341, "step": 100730 }, { "epoch": 0.6331742384066937, "grad_norm": 7.070973873138428, "learning_rate": 1.5783760676596566e-05, "loss": 1.7251, "step": 100740 }, { "epoch": 0.6332370907233908, "grad_norm": 9.157733917236328, "learning_rate": 1.5783341575651913e-05, "loss": 1.7675, "step": 100750 }, { "epoch": 0.633299943040088, "grad_norm": 6.445616722106934, "learning_rate": 1.578292247470726e-05, "loss": 1.81, "step": 100760 }, { "epoch": 0.6333627953567851, "grad_norm": 6.646481990814209, "learning_rate": 1.5782503373762607e-05, "loss": 1.7616, "step": 100770 }, { "epoch": 0.6334256476734822, "grad_norm": 6.220576763153076, "learning_rate": 1.5782084272817954e-05, "loss": 1.8266, "step": 100780 }, { "epoch": 0.6334884999901793, "grad_norm": 7.9391374588012695, "learning_rate": 1.5781665171873298e-05, "loss": 1.8282, "step": 100790 }, { "epoch": 0.6335513523068764, "grad_norm": 5.481481552124023, "learning_rate": 1.5781246070928645e-05, "loss": 1.6849, "step": 100800 }, { "epoch": 0.6336142046235735, "grad_norm": 7.201502323150635, "learning_rate": 1.5780826969983992e-05, "loss": 1.8536, "step": 100810 }, { "epoch": 0.6336770569402707, "grad_norm": 6.303986072540283, "learning_rate": 1.578040786903934e-05, "loss": 1.5363, "step": 100820 }, { "epoch": 0.6337399092569678, "grad_norm": 6.826161861419678, "learning_rate": 1.5779988768094686e-05, "loss": 1.6907, "step": 100830 }, { "epoch": 0.6338027615736649, "grad_norm": 6.250027179718018, "learning_rate": 1.5779569667150033e-05, "loss": 1.7413, "step": 100840 }, { "epoch": 0.633865613890362, "grad_norm": 5.6930060386657715, "learning_rate": 1.5779150566205377e-05, "loss": 1.7811, "step": 100850 }, { "epoch": 0.6339284662070591, "grad_norm": 7.0470147132873535, "learning_rate": 1.5778731465260724e-05, "loss": 1.5405, "step": 100860 }, { "epoch": 0.6339913185237562, "grad_norm": 5.79329776763916, "learning_rate": 1.577831236431607e-05, "loss": 1.6439, "step": 100870 }, { "epoch": 0.6340541708404533, "grad_norm": 6.484084606170654, "learning_rate": 1.5777893263371418e-05, "loss": 1.5257, "step": 100880 }, { "epoch": 0.6341170231571505, "grad_norm": 8.19410514831543, "learning_rate": 1.577747416242676e-05, "loss": 1.779, "step": 100890 }, { "epoch": 0.6341798754738476, "grad_norm": 5.868208885192871, "learning_rate": 1.577705506148211e-05, "loss": 1.5361, "step": 100900 }, { "epoch": 0.6342427277905447, "grad_norm": 6.817616939544678, "learning_rate": 1.5776635960537456e-05, "loss": 1.7818, "step": 100910 }, { "epoch": 0.6343055801072418, "grad_norm": 6.6354451179504395, "learning_rate": 1.5776216859592803e-05, "loss": 1.8738, "step": 100920 }, { "epoch": 0.6343684324239389, "grad_norm": 5.966352939605713, "learning_rate": 1.577579775864815e-05, "loss": 1.8658, "step": 100930 }, { "epoch": 0.634431284740636, "grad_norm": 7.401264667510986, "learning_rate": 1.5775378657703494e-05, "loss": 1.8332, "step": 100940 }, { "epoch": 0.6344941370573332, "grad_norm": 6.14589262008667, "learning_rate": 1.577495955675884e-05, "loss": 1.7332, "step": 100950 }, { "epoch": 0.6345569893740302, "grad_norm": 6.752025127410889, "learning_rate": 1.5774540455814188e-05, "loss": 1.6043, "step": 100960 }, { "epoch": 0.6346198416907273, "grad_norm": 6.3684258460998535, "learning_rate": 1.5774121354869535e-05, "loss": 1.5993, "step": 100970 }, { "epoch": 0.6346826940074244, "grad_norm": 6.318819522857666, "learning_rate": 1.5773702253924882e-05, "loss": 1.6613, "step": 100980 }, { "epoch": 0.6347455463241215, "grad_norm": 5.311209678649902, "learning_rate": 1.577328315298023e-05, "loss": 1.6114, "step": 100990 }, { "epoch": 0.6348083986408186, "grad_norm": 6.803685188293457, "learning_rate": 1.5772864052035576e-05, "loss": 1.6155, "step": 101000 }, { "epoch": 0.6348712509575157, "grad_norm": 6.295719623565674, "learning_rate": 1.5772444951090923e-05, "loss": 1.6225, "step": 101010 }, { "epoch": 0.6349341032742128, "grad_norm": 6.521117687225342, "learning_rate": 1.5772025850146267e-05, "loss": 1.5671, "step": 101020 }, { "epoch": 0.63499695559091, "grad_norm": 6.544680595397949, "learning_rate": 1.5771606749201614e-05, "loss": 1.4785, "step": 101030 }, { "epoch": 0.6350598079076071, "grad_norm": 6.518697738647461, "learning_rate": 1.577118764825696e-05, "loss": 1.6918, "step": 101040 }, { "epoch": 0.6351226602243042, "grad_norm": 7.030117511749268, "learning_rate": 1.5770768547312308e-05, "loss": 1.6475, "step": 101050 }, { "epoch": 0.6351855125410013, "grad_norm": 6.561397075653076, "learning_rate": 1.5770349446367655e-05, "loss": 1.5397, "step": 101060 }, { "epoch": 0.6352483648576984, "grad_norm": 6.138189792633057, "learning_rate": 1.5769930345423e-05, "loss": 1.7127, "step": 101070 }, { "epoch": 0.6353112171743955, "grad_norm": 6.9077277183532715, "learning_rate": 1.5769511244478346e-05, "loss": 1.7314, "step": 101080 }, { "epoch": 0.6353740694910927, "grad_norm": 6.724200248718262, "learning_rate": 1.5769092143533693e-05, "loss": 1.6853, "step": 101090 }, { "epoch": 0.6354369218077898, "grad_norm": 7.189093589782715, "learning_rate": 1.576867304258904e-05, "loss": 1.6719, "step": 101100 }, { "epoch": 0.6354997741244869, "grad_norm": 6.973060607910156, "learning_rate": 1.5768253941644384e-05, "loss": 1.5184, "step": 101110 }, { "epoch": 0.635562626441184, "grad_norm": 7.763575553894043, "learning_rate": 1.576783484069973e-05, "loss": 1.6132, "step": 101120 }, { "epoch": 0.6356254787578811, "grad_norm": 5.495080471038818, "learning_rate": 1.5767415739755078e-05, "loss": 1.694, "step": 101130 }, { "epoch": 0.6356883310745782, "grad_norm": 6.750556468963623, "learning_rate": 1.5766996638810425e-05, "loss": 1.5549, "step": 101140 }, { "epoch": 0.6357511833912753, "grad_norm": 6.03955078125, "learning_rate": 1.5766577537865772e-05, "loss": 1.716, "step": 101150 }, { "epoch": 0.6358140357079725, "grad_norm": 4.914484024047852, "learning_rate": 1.576615843692112e-05, "loss": 1.5294, "step": 101160 }, { "epoch": 0.6358768880246696, "grad_norm": 6.851123809814453, "learning_rate": 1.5765739335976463e-05, "loss": 1.582, "step": 101170 }, { "epoch": 0.6359397403413667, "grad_norm": 6.215464115142822, "learning_rate": 1.576532023503181e-05, "loss": 1.5753, "step": 101180 }, { "epoch": 0.6360025926580638, "grad_norm": 7.140315532684326, "learning_rate": 1.5764901134087157e-05, "loss": 1.7042, "step": 101190 }, { "epoch": 0.6360654449747609, "grad_norm": 6.683956146240234, "learning_rate": 1.5764482033142504e-05, "loss": 1.4798, "step": 101200 }, { "epoch": 0.6361282972914579, "grad_norm": 6.934081077575684, "learning_rate": 1.576406293219785e-05, "loss": 1.6671, "step": 101210 }, { "epoch": 0.636191149608155, "grad_norm": 6.101939678192139, "learning_rate": 1.5763643831253198e-05, "loss": 1.5294, "step": 101220 }, { "epoch": 0.6362540019248522, "grad_norm": 6.583488941192627, "learning_rate": 1.5763224730308545e-05, "loss": 1.6551, "step": 101230 }, { "epoch": 0.6363168542415493, "grad_norm": 5.876173973083496, "learning_rate": 1.5762805629363892e-05, "loss": 1.6241, "step": 101240 }, { "epoch": 0.6363797065582464, "grad_norm": 7.093881130218506, "learning_rate": 1.5762386528419236e-05, "loss": 1.5319, "step": 101250 }, { "epoch": 0.6364425588749435, "grad_norm": 6.061024188995361, "learning_rate": 1.5761967427474583e-05, "loss": 1.6872, "step": 101260 }, { "epoch": 0.6365054111916406, "grad_norm": 6.763847351074219, "learning_rate": 1.576154832652993e-05, "loss": 1.7427, "step": 101270 }, { "epoch": 0.6365682635083377, "grad_norm": 6.708096027374268, "learning_rate": 1.5761129225585277e-05, "loss": 1.7042, "step": 101280 }, { "epoch": 0.6366311158250348, "grad_norm": 6.899614334106445, "learning_rate": 1.576071012464062e-05, "loss": 1.7199, "step": 101290 }, { "epoch": 0.636693968141732, "grad_norm": 6.982869625091553, "learning_rate": 1.5760291023695968e-05, "loss": 1.4169, "step": 101300 }, { "epoch": 0.6367568204584291, "grad_norm": 5.419250011444092, "learning_rate": 1.5759871922751315e-05, "loss": 1.4193, "step": 101310 }, { "epoch": 0.6368196727751262, "grad_norm": 6.4072041511535645, "learning_rate": 1.5759452821806662e-05, "loss": 1.724, "step": 101320 }, { "epoch": 0.6368825250918233, "grad_norm": 6.207793235778809, "learning_rate": 1.575903372086201e-05, "loss": 1.648, "step": 101330 }, { "epoch": 0.6369453774085204, "grad_norm": 6.272695064544678, "learning_rate": 1.5758614619917353e-05, "loss": 1.4553, "step": 101340 }, { "epoch": 0.6370082297252175, "grad_norm": 7.17915678024292, "learning_rate": 1.57581955189727e-05, "loss": 1.6907, "step": 101350 }, { "epoch": 0.6370710820419146, "grad_norm": 7.949512958526611, "learning_rate": 1.5757776418028047e-05, "loss": 1.4746, "step": 101360 }, { "epoch": 0.6371339343586118, "grad_norm": 7.116039276123047, "learning_rate": 1.5757357317083394e-05, "loss": 1.6751, "step": 101370 }, { "epoch": 0.6371967866753089, "grad_norm": 6.2278923988342285, "learning_rate": 1.575693821613874e-05, "loss": 1.528, "step": 101380 }, { "epoch": 0.637259638992006, "grad_norm": 6.393247127532959, "learning_rate": 1.5756519115194088e-05, "loss": 1.664, "step": 101390 }, { "epoch": 0.6373224913087031, "grad_norm": 7.355311393737793, "learning_rate": 1.5756100014249435e-05, "loss": 1.6343, "step": 101400 }, { "epoch": 0.6373853436254002, "grad_norm": 5.815723896026611, "learning_rate": 1.5755680913304782e-05, "loss": 1.637, "step": 101410 }, { "epoch": 0.6374481959420973, "grad_norm": 6.428781032562256, "learning_rate": 1.5755261812360126e-05, "loss": 1.7356, "step": 101420 }, { "epoch": 0.6375110482587945, "grad_norm": 6.101195335388184, "learning_rate": 1.5754842711415473e-05, "loss": 1.514, "step": 101430 }, { "epoch": 0.6375739005754916, "grad_norm": 6.362186908721924, "learning_rate": 1.575442361047082e-05, "loss": 1.7776, "step": 101440 }, { "epoch": 0.6376367528921887, "grad_norm": 7.063998222351074, "learning_rate": 1.5754004509526167e-05, "loss": 1.6367, "step": 101450 }, { "epoch": 0.6376996052088858, "grad_norm": 7.601218223571777, "learning_rate": 1.5753585408581514e-05, "loss": 1.6837, "step": 101460 }, { "epoch": 0.6377624575255828, "grad_norm": 6.573344707489014, "learning_rate": 1.5753166307636858e-05, "loss": 1.5347, "step": 101470 }, { "epoch": 0.6378253098422799, "grad_norm": 5.948842525482178, "learning_rate": 1.5752747206692205e-05, "loss": 1.528, "step": 101480 }, { "epoch": 0.637888162158977, "grad_norm": 6.568027019500732, "learning_rate": 1.5752328105747552e-05, "loss": 1.4821, "step": 101490 }, { "epoch": 0.6379510144756741, "grad_norm": 5.7263875007629395, "learning_rate": 1.57519090048029e-05, "loss": 1.4768, "step": 101500 }, { "epoch": 0.6380138667923713, "grad_norm": 7.680974960327148, "learning_rate": 1.5751489903858243e-05, "loss": 1.8568, "step": 101510 }, { "epoch": 0.6380767191090684, "grad_norm": 6.494784355163574, "learning_rate": 1.575107080291359e-05, "loss": 1.6616, "step": 101520 }, { "epoch": 0.6381395714257655, "grad_norm": 6.3536224365234375, "learning_rate": 1.5750651701968937e-05, "loss": 1.6196, "step": 101530 }, { "epoch": 0.6382024237424626, "grad_norm": 5.7544145584106445, "learning_rate": 1.5750232601024284e-05, "loss": 1.5604, "step": 101540 }, { "epoch": 0.6382652760591597, "grad_norm": 7.191973686218262, "learning_rate": 1.574981350007963e-05, "loss": 1.6539, "step": 101550 }, { "epoch": 0.6383281283758568, "grad_norm": 7.5846028327941895, "learning_rate": 1.5749394399134975e-05, "loss": 1.6648, "step": 101560 }, { "epoch": 0.638390980692554, "grad_norm": 6.780168056488037, "learning_rate": 1.5748975298190322e-05, "loss": 1.8323, "step": 101570 }, { "epoch": 0.6384538330092511, "grad_norm": 7.254518508911133, "learning_rate": 1.574855619724567e-05, "loss": 1.6612, "step": 101580 }, { "epoch": 0.6385166853259482, "grad_norm": 7.408272743225098, "learning_rate": 1.5748137096301016e-05, "loss": 1.6778, "step": 101590 }, { "epoch": 0.6385795376426453, "grad_norm": 6.666190147399902, "learning_rate": 1.5747717995356363e-05, "loss": 1.6954, "step": 101600 }, { "epoch": 0.6386423899593424, "grad_norm": 7.656550407409668, "learning_rate": 1.574729889441171e-05, "loss": 1.8318, "step": 101610 }, { "epoch": 0.6387052422760395, "grad_norm": 7.480816841125488, "learning_rate": 1.5746879793467057e-05, "loss": 1.4682, "step": 101620 }, { "epoch": 0.6387680945927366, "grad_norm": 7.185154914855957, "learning_rate": 1.5746460692522404e-05, "loss": 1.5891, "step": 101630 }, { "epoch": 0.6388309469094338, "grad_norm": 6.208061695098877, "learning_rate": 1.574604159157775e-05, "loss": 1.6934, "step": 101640 }, { "epoch": 0.6388937992261309, "grad_norm": 6.467697620391846, "learning_rate": 1.5745622490633095e-05, "loss": 1.5176, "step": 101650 }, { "epoch": 0.638956651542828, "grad_norm": 6.263512134552002, "learning_rate": 1.5745203389688442e-05, "loss": 1.8768, "step": 101660 }, { "epoch": 0.6390195038595251, "grad_norm": 5.4175872802734375, "learning_rate": 1.574478428874379e-05, "loss": 1.65, "step": 101670 }, { "epoch": 0.6390823561762222, "grad_norm": 5.825191020965576, "learning_rate": 1.5744365187799136e-05, "loss": 1.7513, "step": 101680 }, { "epoch": 0.6391452084929193, "grad_norm": 5.984965801239014, "learning_rate": 1.574394608685448e-05, "loss": 1.8157, "step": 101690 }, { "epoch": 0.6392080608096165, "grad_norm": 7.22231388092041, "learning_rate": 1.5743526985909827e-05, "loss": 1.6587, "step": 101700 }, { "epoch": 0.6392709131263136, "grad_norm": 6.928992748260498, "learning_rate": 1.5743107884965174e-05, "loss": 1.7003, "step": 101710 }, { "epoch": 0.6393337654430106, "grad_norm": 7.389366626739502, "learning_rate": 1.574268878402052e-05, "loss": 1.4202, "step": 101720 }, { "epoch": 0.6393966177597077, "grad_norm": 5.17368745803833, "learning_rate": 1.5742269683075865e-05, "loss": 1.6994, "step": 101730 }, { "epoch": 0.6394594700764048, "grad_norm": 6.973698139190674, "learning_rate": 1.5741850582131212e-05, "loss": 1.7086, "step": 101740 }, { "epoch": 0.6395223223931019, "grad_norm": 6.596184730529785, "learning_rate": 1.574143148118656e-05, "loss": 1.7873, "step": 101750 }, { "epoch": 0.639585174709799, "grad_norm": 6.920825481414795, "learning_rate": 1.5741012380241906e-05, "loss": 1.6646, "step": 101760 }, { "epoch": 0.6396480270264961, "grad_norm": 6.680747985839844, "learning_rate": 1.5740593279297253e-05, "loss": 1.67, "step": 101770 }, { "epoch": 0.6397108793431933, "grad_norm": 6.86875581741333, "learning_rate": 1.57401741783526e-05, "loss": 1.8445, "step": 101780 }, { "epoch": 0.6397737316598904, "grad_norm": 7.263949394226074, "learning_rate": 1.5739755077407944e-05, "loss": 1.6791, "step": 101790 }, { "epoch": 0.6398365839765875, "grad_norm": 6.624402046203613, "learning_rate": 1.573933597646329e-05, "loss": 1.7059, "step": 101800 }, { "epoch": 0.6398994362932846, "grad_norm": 5.630288124084473, "learning_rate": 1.5738916875518638e-05, "loss": 1.4169, "step": 101810 }, { "epoch": 0.6399622886099817, "grad_norm": 7.361122131347656, "learning_rate": 1.5738497774573985e-05, "loss": 1.6202, "step": 101820 }, { "epoch": 0.6400251409266788, "grad_norm": 6.6272430419921875, "learning_rate": 1.5738078673629332e-05, "loss": 1.5626, "step": 101830 }, { "epoch": 0.640087993243376, "grad_norm": 5.221563339233398, "learning_rate": 1.573765957268468e-05, "loss": 1.4445, "step": 101840 }, { "epoch": 0.6401508455600731, "grad_norm": 7.25012731552124, "learning_rate": 1.5737240471740026e-05, "loss": 1.494, "step": 101850 }, { "epoch": 0.6402136978767702, "grad_norm": 7.599103927612305, "learning_rate": 1.5736821370795373e-05, "loss": 1.6157, "step": 101860 }, { "epoch": 0.6402765501934673, "grad_norm": 6.354990005493164, "learning_rate": 1.5736402269850717e-05, "loss": 1.7489, "step": 101870 }, { "epoch": 0.6403394025101644, "grad_norm": 5.892938613891602, "learning_rate": 1.5735983168906064e-05, "loss": 1.4894, "step": 101880 }, { "epoch": 0.6404022548268615, "grad_norm": 6.195607662200928, "learning_rate": 1.573556406796141e-05, "loss": 1.7119, "step": 101890 }, { "epoch": 0.6404651071435586, "grad_norm": 6.346561431884766, "learning_rate": 1.5735144967016758e-05, "loss": 1.5715, "step": 101900 }, { "epoch": 0.6405279594602558, "grad_norm": 7.593139171600342, "learning_rate": 1.5734725866072102e-05, "loss": 1.742, "step": 101910 }, { "epoch": 0.6405908117769529, "grad_norm": 5.391120433807373, "learning_rate": 1.573430676512745e-05, "loss": 1.5101, "step": 101920 }, { "epoch": 0.64065366409365, "grad_norm": 8.564781188964844, "learning_rate": 1.5733887664182796e-05, "loss": 1.6782, "step": 101930 }, { "epoch": 0.6407165164103471, "grad_norm": 6.898707866668701, "learning_rate": 1.5733468563238143e-05, "loss": 1.8672, "step": 101940 }, { "epoch": 0.6407793687270442, "grad_norm": 6.528625965118408, "learning_rate": 1.573304946229349e-05, "loss": 1.7425, "step": 101950 }, { "epoch": 0.6408422210437413, "grad_norm": Infinity, "learning_rate": 1.5732630361348834e-05, "loss": 1.5653, "step": 101960 }, { "epoch": 0.6409050733604384, "grad_norm": 7.259417533874512, "learning_rate": 1.573225317049865e-05, "loss": 1.67, "step": 101970 }, { "epoch": 0.6409679256771355, "grad_norm": 6.333691596984863, "learning_rate": 1.5731834069553996e-05, "loss": 1.4398, "step": 101980 }, { "epoch": 0.6410307779938326, "grad_norm": 6.996382713317871, "learning_rate": 1.573141496860934e-05, "loss": 1.6902, "step": 101990 }, { "epoch": 0.6410936303105297, "grad_norm": 6.314414024353027, "learning_rate": 1.5730995867664686e-05, "loss": 1.6564, "step": 102000 }, { "epoch": 0.6411564826272268, "grad_norm": 6.953320503234863, "learning_rate": 1.5730576766720033e-05, "loss": 1.7557, "step": 102010 }, { "epoch": 0.6412193349439239, "grad_norm": 5.498722553253174, "learning_rate": 1.573015766577538e-05, "loss": 1.5373, "step": 102020 }, { "epoch": 0.641282187260621, "grad_norm": 6.309254169464111, "learning_rate": 1.5729738564830724e-05, "loss": 1.6494, "step": 102030 }, { "epoch": 0.6413450395773181, "grad_norm": 6.082536697387695, "learning_rate": 1.572931946388607e-05, "loss": 1.4346, "step": 102040 }, { "epoch": 0.6414078918940153, "grad_norm": 6.319396018981934, "learning_rate": 1.572890036294142e-05, "loss": 1.5892, "step": 102050 }, { "epoch": 0.6414707442107124, "grad_norm": 6.706999778747559, "learning_rate": 1.5728481261996765e-05, "loss": 1.7324, "step": 102060 }, { "epoch": 0.6415335965274095, "grad_norm": 6.6370649337768555, "learning_rate": 1.5728062161052113e-05, "loss": 1.7239, "step": 102070 }, { "epoch": 0.6415964488441066, "grad_norm": 6.0964274406433105, "learning_rate": 1.572764306010746e-05, "loss": 1.7505, "step": 102080 }, { "epoch": 0.6416593011608037, "grad_norm": 7.073576927185059, "learning_rate": 1.5727223959162807e-05, "loss": 1.6065, "step": 102090 }, { "epoch": 0.6417221534775008, "grad_norm": 6.858996391296387, "learning_rate": 1.5726804858218154e-05, "loss": 1.6198, "step": 102100 }, { "epoch": 0.641785005794198, "grad_norm": 6.487966537475586, "learning_rate": 1.5726385757273497e-05, "loss": 1.7107, "step": 102110 }, { "epoch": 0.6418478581108951, "grad_norm": 6.950842380523682, "learning_rate": 1.5725966656328844e-05, "loss": 1.6784, "step": 102120 }, { "epoch": 0.6419107104275922, "grad_norm": 6.250916957855225, "learning_rate": 1.572554755538419e-05, "loss": 1.6726, "step": 102130 }, { "epoch": 0.6419735627442893, "grad_norm": 6.4210028648376465, "learning_rate": 1.572512845443954e-05, "loss": 1.6568, "step": 102140 }, { "epoch": 0.6420364150609864, "grad_norm": 6.273014068603516, "learning_rate": 1.5724709353494886e-05, "loss": 1.5464, "step": 102150 }, { "epoch": 0.6420992673776835, "grad_norm": 6.026818752288818, "learning_rate": 1.572429025255023e-05, "loss": 1.5868, "step": 102160 }, { "epoch": 0.6421621196943806, "grad_norm": 7.14760160446167, "learning_rate": 1.5723871151605576e-05, "loss": 1.7496, "step": 102170 }, { "epoch": 0.6422249720110778, "grad_norm": 6.712249755859375, "learning_rate": 1.5723452050660924e-05, "loss": 1.7964, "step": 102180 }, { "epoch": 0.6422878243277749, "grad_norm": 7.693052291870117, "learning_rate": 1.572303294971627e-05, "loss": 1.5236, "step": 102190 }, { "epoch": 0.642350676644472, "grad_norm": 6.573236465454102, "learning_rate": 1.5722613848771618e-05, "loss": 1.4585, "step": 102200 }, { "epoch": 0.6424135289611691, "grad_norm": 6.692652702331543, "learning_rate": 1.572219474782696e-05, "loss": 1.5833, "step": 102210 }, { "epoch": 0.6424763812778662, "grad_norm": 6.677134037017822, "learning_rate": 1.572177564688231e-05, "loss": 1.6617, "step": 102220 }, { "epoch": 0.6425392335945632, "grad_norm": 6.623659610748291, "learning_rate": 1.5721356545937655e-05, "loss": 1.9046, "step": 102230 }, { "epoch": 0.6426020859112603, "grad_norm": 5.9338812828063965, "learning_rate": 1.5720937444993003e-05, "loss": 1.5888, "step": 102240 }, { "epoch": 0.6426649382279574, "grad_norm": 6.421466827392578, "learning_rate": 1.5720518344048346e-05, "loss": 1.5223, "step": 102250 }, { "epoch": 0.6427277905446546, "grad_norm": 6.812142372131348, "learning_rate": 1.5720099243103693e-05, "loss": 1.8059, "step": 102260 }, { "epoch": 0.6427906428613517, "grad_norm": 6.643022060394287, "learning_rate": 1.571968014215904e-05, "loss": 1.5783, "step": 102270 }, { "epoch": 0.6428534951780488, "grad_norm": 7.271981716156006, "learning_rate": 1.5719261041214387e-05, "loss": 1.5519, "step": 102280 }, { "epoch": 0.6429163474947459, "grad_norm": 6.32779598236084, "learning_rate": 1.5718841940269735e-05, "loss": 1.602, "step": 102290 }, { "epoch": 0.642979199811443, "grad_norm": 6.529623985290527, "learning_rate": 1.571842283932508e-05, "loss": 1.6133, "step": 102300 }, { "epoch": 0.6430420521281401, "grad_norm": 7.365950584411621, "learning_rate": 1.571800373838043e-05, "loss": 1.6673, "step": 102310 }, { "epoch": 0.6431049044448373, "grad_norm": 6.111841678619385, "learning_rate": 1.5717584637435776e-05, "loss": 1.484, "step": 102320 }, { "epoch": 0.6431677567615344, "grad_norm": 7.701827049255371, "learning_rate": 1.5717165536491123e-05, "loss": 1.7046, "step": 102330 }, { "epoch": 0.6432306090782315, "grad_norm": 6.581363201141357, "learning_rate": 1.5716746435546466e-05, "loss": 1.7479, "step": 102340 }, { "epoch": 0.6432934613949286, "grad_norm": 6.214954853057861, "learning_rate": 1.5716327334601814e-05, "loss": 1.5456, "step": 102350 }, { "epoch": 0.6433563137116257, "grad_norm": 6.552243709564209, "learning_rate": 1.571590823365716e-05, "loss": 1.4382, "step": 102360 }, { "epoch": 0.6434191660283228, "grad_norm": 5.998539447784424, "learning_rate": 1.5715489132712508e-05, "loss": 1.5903, "step": 102370 }, { "epoch": 0.64348201834502, "grad_norm": 8.151195526123047, "learning_rate": 1.5715070031767855e-05, "loss": 1.8683, "step": 102380 }, { "epoch": 0.6435448706617171, "grad_norm": 6.959873676300049, "learning_rate": 1.57146509308232e-05, "loss": 1.6179, "step": 102390 }, { "epoch": 0.6436077229784142, "grad_norm": 7.281445026397705, "learning_rate": 1.5714231829878546e-05, "loss": 1.7423, "step": 102400 }, { "epoch": 0.6436705752951113, "grad_norm": 6.711086750030518, "learning_rate": 1.5713812728933893e-05, "loss": 1.5798, "step": 102410 }, { "epoch": 0.6437334276118084, "grad_norm": 6.808206081390381, "learning_rate": 1.571339362798924e-05, "loss": 1.5961, "step": 102420 }, { "epoch": 0.6437962799285055, "grad_norm": 6.403003692626953, "learning_rate": 1.5712974527044583e-05, "loss": 1.7072, "step": 102430 }, { "epoch": 0.6438591322452026, "grad_norm": 6.481310844421387, "learning_rate": 1.571255542609993e-05, "loss": 1.758, "step": 102440 }, { "epoch": 0.6439219845618998, "grad_norm": 7.031172275543213, "learning_rate": 1.5712136325155277e-05, "loss": 1.7948, "step": 102450 }, { "epoch": 0.6439848368785969, "grad_norm": 6.796384334564209, "learning_rate": 1.5711717224210625e-05, "loss": 1.5936, "step": 102460 }, { "epoch": 0.644047689195294, "grad_norm": 7.230562686920166, "learning_rate": 1.571129812326597e-05, "loss": 1.6324, "step": 102470 }, { "epoch": 0.6441105415119911, "grad_norm": 7.450069427490234, "learning_rate": 1.571087902232132e-05, "loss": 1.5685, "step": 102480 }, { "epoch": 0.6441733938286881, "grad_norm": 5.21614933013916, "learning_rate": 1.5710459921376662e-05, "loss": 1.654, "step": 102490 }, { "epoch": 0.6442362461453852, "grad_norm": 6.1010332107543945, "learning_rate": 1.571004082043201e-05, "loss": 1.4736, "step": 102500 }, { "epoch": 0.6442990984620823, "grad_norm": 6.872131824493408, "learning_rate": 1.5709621719487357e-05, "loss": 1.9806, "step": 102510 }, { "epoch": 0.6443619507787794, "grad_norm": 6.7846760749816895, "learning_rate": 1.5709202618542704e-05, "loss": 1.8059, "step": 102520 }, { "epoch": 0.6444248030954766, "grad_norm": 6.188438415527344, "learning_rate": 1.570878351759805e-05, "loss": 1.5085, "step": 102530 }, { "epoch": 0.6444876554121737, "grad_norm": 6.045825004577637, "learning_rate": 1.5708364416653398e-05, "loss": 1.613, "step": 102540 }, { "epoch": 0.6445505077288708, "grad_norm": 7.195934295654297, "learning_rate": 1.5707945315708745e-05, "loss": 1.7093, "step": 102550 }, { "epoch": 0.6446133600455679, "grad_norm": 7.038455009460449, "learning_rate": 1.570752621476409e-05, "loss": 1.7824, "step": 102560 }, { "epoch": 0.644676212362265, "grad_norm": 7.097109317779541, "learning_rate": 1.5707107113819436e-05, "loss": 1.571, "step": 102570 }, { "epoch": 0.6447390646789621, "grad_norm": 6.5821757316589355, "learning_rate": 1.5706688012874783e-05, "loss": 1.6042, "step": 102580 }, { "epoch": 0.6448019169956593, "grad_norm": 6.128716945648193, "learning_rate": 1.570626891193013e-05, "loss": 1.5795, "step": 102590 }, { "epoch": 0.6448647693123564, "grad_norm": 7.007327556610107, "learning_rate": 1.5705849810985477e-05, "loss": 1.6193, "step": 102600 }, { "epoch": 0.6449276216290535, "grad_norm": 7.820053577423096, "learning_rate": 1.570543071004082e-05, "loss": 1.6681, "step": 102610 }, { "epoch": 0.6449904739457506, "grad_norm": 5.884525299072266, "learning_rate": 1.5705011609096168e-05, "loss": 1.4219, "step": 102620 }, { "epoch": 0.6450533262624477, "grad_norm": 6.69332218170166, "learning_rate": 1.5704592508151515e-05, "loss": 1.6222, "step": 102630 }, { "epoch": 0.6451161785791448, "grad_norm": 5.9894938468933105, "learning_rate": 1.570417340720686e-05, "loss": 1.5548, "step": 102640 }, { "epoch": 0.6451790308958419, "grad_norm": 7.076059341430664, "learning_rate": 1.5703754306262205e-05, "loss": 1.6576, "step": 102650 }, { "epoch": 0.6452418832125391, "grad_norm": 7.720175743103027, "learning_rate": 1.5703335205317552e-05, "loss": 1.6047, "step": 102660 }, { "epoch": 0.6453047355292362, "grad_norm": 5.937623500823975, "learning_rate": 1.57029161043729e-05, "loss": 1.5105, "step": 102670 }, { "epoch": 0.6453675878459333, "grad_norm": 7.381980895996094, "learning_rate": 1.5702497003428247e-05, "loss": 1.4033, "step": 102680 }, { "epoch": 0.6454304401626304, "grad_norm": 6.697049140930176, "learning_rate": 1.5702077902483594e-05, "loss": 1.7542, "step": 102690 }, { "epoch": 0.6454932924793275, "grad_norm": 6.8227972984313965, "learning_rate": 1.570165880153894e-05, "loss": 1.6011, "step": 102700 }, { "epoch": 0.6455561447960246, "grad_norm": 7.084945201873779, "learning_rate": 1.5701239700594288e-05, "loss": 1.7403, "step": 102710 }, { "epoch": 0.6456189971127217, "grad_norm": 6.482322692871094, "learning_rate": 1.5700820599649635e-05, "loss": 1.657, "step": 102720 }, { "epoch": 0.6456818494294189, "grad_norm": 6.782051086425781, "learning_rate": 1.570040149870498e-05, "loss": 1.6043, "step": 102730 }, { "epoch": 0.6457447017461159, "grad_norm": 7.116504192352295, "learning_rate": 1.5699982397760326e-05, "loss": 1.6849, "step": 102740 }, { "epoch": 0.645807554062813, "grad_norm": 5.7617506980896, "learning_rate": 1.5699563296815673e-05, "loss": 1.5332, "step": 102750 }, { "epoch": 0.6458704063795101, "grad_norm": 6.336673259735107, "learning_rate": 1.569914419587102e-05, "loss": 1.7876, "step": 102760 }, { "epoch": 0.6459332586962072, "grad_norm": 7.29729700088501, "learning_rate": 1.5698725094926367e-05, "loss": 1.7959, "step": 102770 }, { "epoch": 0.6459961110129043, "grad_norm": 6.527895450592041, "learning_rate": 1.569830599398171e-05, "loss": 1.8514, "step": 102780 }, { "epoch": 0.6460589633296014, "grad_norm": 6.2483744621276855, "learning_rate": 1.5697886893037058e-05, "loss": 1.7979, "step": 102790 }, { "epoch": 0.6461218156462986, "grad_norm": 7.259784698486328, "learning_rate": 1.5697467792092405e-05, "loss": 1.6971, "step": 102800 }, { "epoch": 0.6461846679629957, "grad_norm": 6.024012088775635, "learning_rate": 1.5697048691147752e-05, "loss": 1.6908, "step": 102810 }, { "epoch": 0.6462475202796928, "grad_norm": 6.680114269256592, "learning_rate": 1.56966295902031e-05, "loss": 1.7216, "step": 102820 }, { "epoch": 0.6463103725963899, "grad_norm": 7.588250160217285, "learning_rate": 1.5696210489258442e-05, "loss": 1.5704, "step": 102830 }, { "epoch": 0.646373224913087, "grad_norm": 5.725822925567627, "learning_rate": 1.569579138831379e-05, "loss": 1.8783, "step": 102840 }, { "epoch": 0.6464360772297841, "grad_norm": 6.266225337982178, "learning_rate": 1.5695372287369137e-05, "loss": 1.33, "step": 102850 }, { "epoch": 0.6464989295464812, "grad_norm": 6.439865589141846, "learning_rate": 1.5694953186424484e-05, "loss": 1.7816, "step": 102860 }, { "epoch": 0.6465617818631784, "grad_norm": 8.650877952575684, "learning_rate": 1.5694534085479827e-05, "loss": 1.7552, "step": 102870 }, { "epoch": 0.6466246341798755, "grad_norm": 6.840614318847656, "learning_rate": 1.5694114984535174e-05, "loss": 1.6594, "step": 102880 }, { "epoch": 0.6466874864965726, "grad_norm": 6.866539478302002, "learning_rate": 1.569369588359052e-05, "loss": 1.6927, "step": 102890 }, { "epoch": 0.6467503388132697, "grad_norm": 6.2211785316467285, "learning_rate": 1.569327678264587e-05, "loss": 1.6892, "step": 102900 }, { "epoch": 0.6468131911299668, "grad_norm": 6.687888145446777, "learning_rate": 1.5692857681701216e-05, "loss": 1.6261, "step": 102910 }, { "epoch": 0.6468760434466639, "grad_norm": 6.953901290893555, "learning_rate": 1.5692438580756563e-05, "loss": 1.6963, "step": 102920 }, { "epoch": 0.646938895763361, "grad_norm": 7.242424488067627, "learning_rate": 1.569201947981191e-05, "loss": 1.7554, "step": 102930 }, { "epoch": 0.6470017480800582, "grad_norm": 7.113307476043701, "learning_rate": 1.5691600378867257e-05, "loss": 1.8331, "step": 102940 }, { "epoch": 0.6470646003967553, "grad_norm": 6.461259365081787, "learning_rate": 1.5691181277922604e-05, "loss": 1.6479, "step": 102950 }, { "epoch": 0.6471274527134524, "grad_norm": 7.612675666809082, "learning_rate": 1.5690762176977948e-05, "loss": 1.7094, "step": 102960 }, { "epoch": 0.6471903050301495, "grad_norm": 7.381922721862793, "learning_rate": 1.5690343076033295e-05, "loss": 1.5518, "step": 102970 }, { "epoch": 0.6472531573468466, "grad_norm": 5.682344436645508, "learning_rate": 1.5689923975088642e-05, "loss": 1.4742, "step": 102980 }, { "epoch": 0.6473160096635437, "grad_norm": 6.8673272132873535, "learning_rate": 1.568950487414399e-05, "loss": 1.5129, "step": 102990 }, { "epoch": 0.6473788619802407, "grad_norm": 6.724424839019775, "learning_rate": 1.5689085773199336e-05, "loss": 1.5424, "step": 103000 }, { "epoch": 0.6474417142969379, "grad_norm": 6.3348212242126465, "learning_rate": 1.568866667225468e-05, "loss": 1.6309, "step": 103010 }, { "epoch": 0.647504566613635, "grad_norm": 6.291509628295898, "learning_rate": 1.5688247571310027e-05, "loss": 1.5779, "step": 103020 }, { "epoch": 0.6475674189303321, "grad_norm": 6.038863182067871, "learning_rate": 1.5687828470365374e-05, "loss": 1.7678, "step": 103030 }, { "epoch": 0.6476302712470292, "grad_norm": 8.962211608886719, "learning_rate": 1.568740936942072e-05, "loss": 1.6524, "step": 103040 }, { "epoch": 0.6476931235637263, "grad_norm": 6.943170070648193, "learning_rate": 1.5686990268476064e-05, "loss": 1.5671, "step": 103050 }, { "epoch": 0.6477559758804234, "grad_norm": 6.481675148010254, "learning_rate": 1.568657116753141e-05, "loss": 1.551, "step": 103060 }, { "epoch": 0.6478188281971206, "grad_norm": 6.505732536315918, "learning_rate": 1.568615206658676e-05, "loss": 1.763, "step": 103070 }, { "epoch": 0.6478816805138177, "grad_norm": 7.271378040313721, "learning_rate": 1.5685732965642106e-05, "loss": 1.7787, "step": 103080 }, { "epoch": 0.6479445328305148, "grad_norm": 8.255097389221191, "learning_rate": 1.5685313864697453e-05, "loss": 1.6224, "step": 103090 }, { "epoch": 0.6480073851472119, "grad_norm": 5.565671443939209, "learning_rate": 1.56848947637528e-05, "loss": 1.5235, "step": 103100 }, { "epoch": 0.648070237463909, "grad_norm": 6.7757134437561035, "learning_rate": 1.5684475662808143e-05, "loss": 1.5838, "step": 103110 }, { "epoch": 0.6481330897806061, "grad_norm": 7.188182353973389, "learning_rate": 1.568405656186349e-05, "loss": 1.7577, "step": 103120 }, { "epoch": 0.6481959420973032, "grad_norm": 6.870747089385986, "learning_rate": 1.5683637460918838e-05, "loss": 1.7028, "step": 103130 }, { "epoch": 0.6482587944140004, "grad_norm": 5.249590873718262, "learning_rate": 1.5683218359974185e-05, "loss": 1.7051, "step": 103140 }, { "epoch": 0.6483216467306975, "grad_norm": 5.709137916564941, "learning_rate": 1.5682799259029532e-05, "loss": 1.6306, "step": 103150 }, { "epoch": 0.6483844990473946, "grad_norm": 6.573286056518555, "learning_rate": 1.568238015808488e-05, "loss": 1.6941, "step": 103160 }, { "epoch": 0.6484473513640917, "grad_norm": 6.8041558265686035, "learning_rate": 1.5681961057140226e-05, "loss": 1.662, "step": 103170 }, { "epoch": 0.6485102036807888, "grad_norm": 6.927123546600342, "learning_rate": 1.568154195619557e-05, "loss": 1.8207, "step": 103180 }, { "epoch": 0.6485730559974859, "grad_norm": 5.384753704071045, "learning_rate": 1.5681122855250917e-05, "loss": 1.6552, "step": 103190 }, { "epoch": 0.648635908314183, "grad_norm": 7.417147159576416, "learning_rate": 1.5680703754306264e-05, "loss": 1.649, "step": 103200 }, { "epoch": 0.6486987606308802, "grad_norm": 7.085951805114746, "learning_rate": 1.568028465336161e-05, "loss": 1.4693, "step": 103210 }, { "epoch": 0.6487616129475773, "grad_norm": 7.058738708496094, "learning_rate": 1.5679865552416958e-05, "loss": 1.7497, "step": 103220 }, { "epoch": 0.6488244652642744, "grad_norm": 6.559217929840088, "learning_rate": 1.56794464514723e-05, "loss": 1.6933, "step": 103230 }, { "epoch": 0.6488873175809715, "grad_norm": 7.10174036026001, "learning_rate": 1.567902735052765e-05, "loss": 1.744, "step": 103240 }, { "epoch": 0.6489501698976686, "grad_norm": 8.040226936340332, "learning_rate": 1.5678608249582996e-05, "loss": 1.7605, "step": 103250 }, { "epoch": 0.6490130222143656, "grad_norm": 7.494002342224121, "learning_rate": 1.5678189148638343e-05, "loss": 1.4333, "step": 103260 }, { "epoch": 0.6490758745310627, "grad_norm": 8.5501708984375, "learning_rate": 1.5677770047693686e-05, "loss": 1.5655, "step": 103270 }, { "epoch": 0.6491387268477599, "grad_norm": 6.449906826019287, "learning_rate": 1.5677350946749034e-05, "loss": 1.6743, "step": 103280 }, { "epoch": 0.649201579164457, "grad_norm": 6.372710227966309, "learning_rate": 1.567693184580438e-05, "loss": 1.8011, "step": 103290 }, { "epoch": 0.6492644314811541, "grad_norm": 5.954245090484619, "learning_rate": 1.5676512744859728e-05, "loss": 1.595, "step": 103300 }, { "epoch": 0.6493272837978512, "grad_norm": 7.211219787597656, "learning_rate": 1.5676093643915075e-05, "loss": 1.7222, "step": 103310 }, { "epoch": 0.6493901361145483, "grad_norm": 6.185349464416504, "learning_rate": 1.5675674542970422e-05, "loss": 1.4639, "step": 103320 }, { "epoch": 0.6494529884312454, "grad_norm": 7.957022666931152, "learning_rate": 1.567525544202577e-05, "loss": 1.4498, "step": 103330 }, { "epoch": 0.6495158407479426, "grad_norm": 7.452658176422119, "learning_rate": 1.5674836341081116e-05, "loss": 1.7506, "step": 103340 }, { "epoch": 0.6495786930646397, "grad_norm": 7.014586448669434, "learning_rate": 1.5674417240136463e-05, "loss": 1.6089, "step": 103350 }, { "epoch": 0.6496415453813368, "grad_norm": 5.949844837188721, "learning_rate": 1.5673998139191807e-05, "loss": 1.6528, "step": 103360 }, { "epoch": 0.6497043976980339, "grad_norm": 7.606826305389404, "learning_rate": 1.5673579038247154e-05, "loss": 1.6338, "step": 103370 }, { "epoch": 0.649767250014731, "grad_norm": 6.81550407409668, "learning_rate": 1.56731599373025e-05, "loss": 1.4132, "step": 103380 }, { "epoch": 0.6498301023314281, "grad_norm": 5.845203876495361, "learning_rate": 1.5672740836357848e-05, "loss": 1.665, "step": 103390 }, { "epoch": 0.6498929546481252, "grad_norm": 7.393462181091309, "learning_rate": 1.567232173541319e-05, "loss": 1.5338, "step": 103400 }, { "epoch": 0.6499558069648224, "grad_norm": 6.444361686706543, "learning_rate": 1.567190263446854e-05, "loss": 1.6614, "step": 103410 }, { "epoch": 0.6500186592815195, "grad_norm": 6.161467552185059, "learning_rate": 1.5671483533523886e-05, "loss": 1.5387, "step": 103420 }, { "epoch": 0.6500815115982166, "grad_norm": 5.715375900268555, "learning_rate": 1.5671064432579233e-05, "loss": 1.708, "step": 103430 }, { "epoch": 0.6501443639149137, "grad_norm": 6.9999284744262695, "learning_rate": 1.567064533163458e-05, "loss": 1.5844, "step": 103440 }, { "epoch": 0.6502072162316108, "grad_norm": 6.0578765869140625, "learning_rate": 1.5670226230689924e-05, "loss": 1.5845, "step": 103450 }, { "epoch": 0.6502700685483079, "grad_norm": 7.019611358642578, "learning_rate": 1.566980712974527e-05, "loss": 1.6105, "step": 103460 }, { "epoch": 0.650332920865005, "grad_norm": 7.245494842529297, "learning_rate": 1.5669388028800618e-05, "loss": 1.739, "step": 103470 }, { "epoch": 0.6503957731817022, "grad_norm": 7.221858501434326, "learning_rate": 1.5668968927855965e-05, "loss": 1.669, "step": 103480 }, { "epoch": 0.6504586254983993, "grad_norm": 6.329747200012207, "learning_rate": 1.566854982691131e-05, "loss": 1.5785, "step": 103490 }, { "epoch": 0.6505214778150964, "grad_norm": 6.927884578704834, "learning_rate": 1.5668130725966656e-05, "loss": 1.5369, "step": 103500 }, { "epoch": 0.6505843301317934, "grad_norm": 6.898622035980225, "learning_rate": 1.5667711625022003e-05, "loss": 1.4242, "step": 103510 }, { "epoch": 0.6506471824484905, "grad_norm": 6.615671157836914, "learning_rate": 1.566729252407735e-05, "loss": 1.6846, "step": 103520 }, { "epoch": 0.6507100347651876, "grad_norm": 6.230980396270752, "learning_rate": 1.5666873423132697e-05, "loss": 1.6307, "step": 103530 }, { "epoch": 0.6507728870818847, "grad_norm": 7.339495658874512, "learning_rate": 1.5666454322188044e-05, "loss": 1.5946, "step": 103540 }, { "epoch": 0.6508357393985819, "grad_norm": 6.651918411254883, "learning_rate": 1.566603522124339e-05, "loss": 1.6696, "step": 103550 }, { "epoch": 0.650898591715279, "grad_norm": 7.0048980712890625, "learning_rate": 1.5665616120298738e-05, "loss": 1.6126, "step": 103560 }, { "epoch": 0.6509614440319761, "grad_norm": 6.759527683258057, "learning_rate": 1.5665197019354085e-05, "loss": 1.7289, "step": 103570 }, { "epoch": 0.6510242963486732, "grad_norm": 7.07916259765625, "learning_rate": 1.566477791840943e-05, "loss": 1.7425, "step": 103580 }, { "epoch": 0.6510871486653703, "grad_norm": 5.696152210235596, "learning_rate": 1.5664358817464776e-05, "loss": 1.4232, "step": 103590 }, { "epoch": 0.6511500009820674, "grad_norm": 6.269619941711426, "learning_rate": 1.5663939716520123e-05, "loss": 1.7295, "step": 103600 }, { "epoch": 0.6512128532987645, "grad_norm": 6.450016498565674, "learning_rate": 1.566352061557547e-05, "loss": 1.6174, "step": 103610 }, { "epoch": 0.6512757056154617, "grad_norm": 6.65458345413208, "learning_rate": 1.5663101514630817e-05, "loss": 1.9206, "step": 103620 }, { "epoch": 0.6513385579321588, "grad_norm": 5.862672328948975, "learning_rate": 1.566268241368616e-05, "loss": 1.5831, "step": 103630 }, { "epoch": 0.6514014102488559, "grad_norm": 5.754400253295898, "learning_rate": 1.5662263312741508e-05, "loss": 1.6507, "step": 103640 }, { "epoch": 0.651464262565553, "grad_norm": 7.132193565368652, "learning_rate": 1.5661844211796855e-05, "loss": 1.8128, "step": 103650 }, { "epoch": 0.6515271148822501, "grad_norm": 7.32619571685791, "learning_rate": 1.5661425110852202e-05, "loss": 1.5302, "step": 103660 }, { "epoch": 0.6515899671989472, "grad_norm": 5.260908126831055, "learning_rate": 1.5661006009907546e-05, "loss": 1.8892, "step": 103670 }, { "epoch": 0.6516528195156444, "grad_norm": 6.902319431304932, "learning_rate": 1.5660586908962893e-05, "loss": 1.7225, "step": 103680 }, { "epoch": 0.6517156718323415, "grad_norm": 6.186056137084961, "learning_rate": 1.566016780801824e-05, "loss": 1.8432, "step": 103690 }, { "epoch": 0.6517785241490386, "grad_norm": 7.764537811279297, "learning_rate": 1.5659748707073587e-05, "loss": 1.7789, "step": 103700 }, { "epoch": 0.6518413764657357, "grad_norm": 6.304036617279053, "learning_rate": 1.5659329606128934e-05, "loss": 1.7729, "step": 103710 }, { "epoch": 0.6519042287824328, "grad_norm": 6.370176315307617, "learning_rate": 1.565891050518428e-05, "loss": 1.5822, "step": 103720 }, { "epoch": 0.6519670810991299, "grad_norm": 6.075143337249756, "learning_rate": 1.5658491404239628e-05, "loss": 1.4602, "step": 103730 }, { "epoch": 0.652029933415827, "grad_norm": 7.602111339569092, "learning_rate": 1.565807230329497e-05, "loss": 1.86, "step": 103740 }, { "epoch": 0.6520927857325242, "grad_norm": 5.180424690246582, "learning_rate": 1.565765320235032e-05, "loss": 1.3146, "step": 103750 }, { "epoch": 0.6521556380492213, "grad_norm": 7.499391555786133, "learning_rate": 1.5657234101405666e-05, "loss": 1.6131, "step": 103760 }, { "epoch": 0.6522184903659183, "grad_norm": 6.732677459716797, "learning_rate": 1.5656815000461013e-05, "loss": 1.4125, "step": 103770 }, { "epoch": 0.6522813426826154, "grad_norm": 5.746530055999756, "learning_rate": 1.565639589951636e-05, "loss": 1.7176, "step": 103780 }, { "epoch": 0.6523441949993125, "grad_norm": 5.902767658233643, "learning_rate": 1.5655976798571707e-05, "loss": 1.64, "step": 103790 }, { "epoch": 0.6524070473160096, "grad_norm": 5.808832168579102, "learning_rate": 1.565555769762705e-05, "loss": 1.5475, "step": 103800 }, { "epoch": 0.6524698996327067, "grad_norm": 6.253108978271484, "learning_rate": 1.5655138596682398e-05, "loss": 1.6154, "step": 103810 }, { "epoch": 0.6525327519494039, "grad_norm": 6.812736511230469, "learning_rate": 1.5654719495737745e-05, "loss": 1.6477, "step": 103820 }, { "epoch": 0.652595604266101, "grad_norm": 7.912046909332275, "learning_rate": 1.5654300394793092e-05, "loss": 1.7127, "step": 103830 }, { "epoch": 0.6526584565827981, "grad_norm": 6.249855995178223, "learning_rate": 1.565388129384844e-05, "loss": 1.6528, "step": 103840 }, { "epoch": 0.6527213088994952, "grad_norm": 7.224602699279785, "learning_rate": 1.5653462192903783e-05, "loss": 1.5417, "step": 103850 }, { "epoch": 0.6527841612161923, "grad_norm": 6.1871161460876465, "learning_rate": 1.565304309195913e-05, "loss": 1.5821, "step": 103860 }, { "epoch": 0.6528470135328894, "grad_norm": 5.854806900024414, "learning_rate": 1.5652623991014477e-05, "loss": 1.5883, "step": 103870 }, { "epoch": 0.6529098658495865, "grad_norm": 6.067279815673828, "learning_rate": 1.5652204890069824e-05, "loss": 1.7265, "step": 103880 }, { "epoch": 0.6529727181662837, "grad_norm": 6.640414714813232, "learning_rate": 1.5651785789125168e-05, "loss": 1.7412, "step": 103890 }, { "epoch": 0.6530355704829808, "grad_norm": 6.987613201141357, "learning_rate": 1.5651366688180515e-05, "loss": 1.608, "step": 103900 }, { "epoch": 0.6530984227996779, "grad_norm": 8.038615226745605, "learning_rate": 1.5650947587235862e-05, "loss": 1.5996, "step": 103910 }, { "epoch": 0.653161275116375, "grad_norm": 6.497643947601318, "learning_rate": 1.565052848629121e-05, "loss": 1.5211, "step": 103920 }, { "epoch": 0.6532241274330721, "grad_norm": 7.015405654907227, "learning_rate": 1.5650109385346556e-05, "loss": 1.6312, "step": 103930 }, { "epoch": 0.6532869797497692, "grad_norm": 6.187511920928955, "learning_rate": 1.5649690284401903e-05, "loss": 1.5689, "step": 103940 }, { "epoch": 0.6533498320664664, "grad_norm": 7.025246620178223, "learning_rate": 1.564927118345725e-05, "loss": 1.5311, "step": 103950 }, { "epoch": 0.6534126843831635, "grad_norm": 6.445618629455566, "learning_rate": 1.5648852082512597e-05, "loss": 1.8203, "step": 103960 }, { "epoch": 0.6534755366998606, "grad_norm": 6.659660816192627, "learning_rate": 1.5648432981567944e-05, "loss": 1.6104, "step": 103970 }, { "epoch": 0.6535383890165577, "grad_norm": 6.287054061889648, "learning_rate": 1.5648013880623288e-05, "loss": 1.4779, "step": 103980 }, { "epoch": 0.6536012413332548, "grad_norm": 6.669844150543213, "learning_rate": 1.5647594779678635e-05, "loss": 1.5118, "step": 103990 }, { "epoch": 0.6536640936499519, "grad_norm": 7.070347309112549, "learning_rate": 1.5647175678733982e-05, "loss": 1.7626, "step": 104000 }, { "epoch": 0.653726945966649, "grad_norm": 6.286337852478027, "learning_rate": 1.564675657778933e-05, "loss": 1.5305, "step": 104010 }, { "epoch": 0.653789798283346, "grad_norm": 6.582416534423828, "learning_rate": 1.5646337476844676e-05, "loss": 1.6839, "step": 104020 }, { "epoch": 0.6538526506000432, "grad_norm": 7.97673225402832, "learning_rate": 1.564591837590002e-05, "loss": 1.8633, "step": 104030 }, { "epoch": 0.6539155029167403, "grad_norm": 10.233292579650879, "learning_rate": 1.5645499274955367e-05, "loss": 1.6929, "step": 104040 }, { "epoch": 0.6539783552334374, "grad_norm": 6.176262378692627, "learning_rate": 1.5645080174010714e-05, "loss": 1.3382, "step": 104050 }, { "epoch": 0.6540412075501345, "grad_norm": 6.435325622558594, "learning_rate": 1.564466107306606e-05, "loss": 1.8239, "step": 104060 }, { "epoch": 0.6541040598668316, "grad_norm": 6.884663105010986, "learning_rate": 1.5644241972121405e-05, "loss": 1.6751, "step": 104070 }, { "epoch": 0.6541669121835287, "grad_norm": 6.993259429931641, "learning_rate": 1.5643822871176752e-05, "loss": 1.7147, "step": 104080 }, { "epoch": 0.6542297645002259, "grad_norm": 6.018107891082764, "learning_rate": 1.56434037702321e-05, "loss": 1.751, "step": 104090 }, { "epoch": 0.654292616816923, "grad_norm": 5.587553024291992, "learning_rate": 1.5642984669287446e-05, "loss": 1.6907, "step": 104100 }, { "epoch": 0.6543554691336201, "grad_norm": 6.716568470001221, "learning_rate": 1.5642565568342793e-05, "loss": 1.7155, "step": 104110 }, { "epoch": 0.6544183214503172, "grad_norm": 6.479062080383301, "learning_rate": 1.5642146467398137e-05, "loss": 1.7387, "step": 104120 }, { "epoch": 0.6544811737670143, "grad_norm": 7.63808536529541, "learning_rate": 1.5641727366453484e-05, "loss": 1.624, "step": 104130 }, { "epoch": 0.6545440260837114, "grad_norm": 8.376218795776367, "learning_rate": 1.564130826550883e-05, "loss": 1.9181, "step": 104140 }, { "epoch": 0.6546068784004085, "grad_norm": 5.5728864669799805, "learning_rate": 1.5640889164564178e-05, "loss": 1.6911, "step": 104150 }, { "epoch": 0.6546697307171057, "grad_norm": 6.79200553894043, "learning_rate": 1.5640470063619525e-05, "loss": 1.596, "step": 104160 }, { "epoch": 0.6547325830338028, "grad_norm": 6.636775970458984, "learning_rate": 1.5640050962674872e-05, "loss": 1.5336, "step": 104170 }, { "epoch": 0.6547954353504999, "grad_norm": 7.076252460479736, "learning_rate": 1.563963186173022e-05, "loss": 1.7462, "step": 104180 }, { "epoch": 0.654858287667197, "grad_norm": 6.61713981628418, "learning_rate": 1.5639212760785566e-05, "loss": 1.5213, "step": 104190 }, { "epoch": 0.6549211399838941, "grad_norm": 6.114788055419922, "learning_rate": 1.563879365984091e-05, "loss": 1.5887, "step": 104200 }, { "epoch": 0.6549839923005912, "grad_norm": 5.9853596687316895, "learning_rate": 1.5638374558896257e-05, "loss": 1.6344, "step": 104210 }, { "epoch": 0.6550468446172883, "grad_norm": 5.697851657867432, "learning_rate": 1.5637955457951604e-05, "loss": 1.7218, "step": 104220 }, { "epoch": 0.6551096969339855, "grad_norm": 5.63599967956543, "learning_rate": 1.563753635700695e-05, "loss": 1.7532, "step": 104230 }, { "epoch": 0.6551725492506826, "grad_norm": 6.949346542358398, "learning_rate": 1.5637117256062298e-05, "loss": 1.5974, "step": 104240 }, { "epoch": 0.6552354015673797, "grad_norm": 6.5578694343566895, "learning_rate": 1.5636698155117642e-05, "loss": 1.6962, "step": 104250 }, { "epoch": 0.6552982538840768, "grad_norm": 6.383918285369873, "learning_rate": 1.563627905417299e-05, "loss": 1.5176, "step": 104260 }, { "epoch": 0.6553611062007739, "grad_norm": 5.7543487548828125, "learning_rate": 1.5635859953228336e-05, "loss": 1.5252, "step": 104270 }, { "epoch": 0.6554239585174709, "grad_norm": 6.0621771812438965, "learning_rate": 1.5635440852283683e-05, "loss": 1.6052, "step": 104280 }, { "epoch": 0.655486810834168, "grad_norm": 6.2976393699646, "learning_rate": 1.5635021751339027e-05, "loss": 1.6621, "step": 104290 }, { "epoch": 0.6555496631508652, "grad_norm": 7.7909088134765625, "learning_rate": 1.5634602650394374e-05, "loss": 1.8192, "step": 104300 }, { "epoch": 0.6556125154675623, "grad_norm": 6.488101482391357, "learning_rate": 1.563418354944972e-05, "loss": 1.4934, "step": 104310 }, { "epoch": 0.6556753677842594, "grad_norm": 6.674446105957031, "learning_rate": 1.5633764448505068e-05, "loss": 1.546, "step": 104320 }, { "epoch": 0.6557382201009565, "grad_norm": 6.641434192657471, "learning_rate": 1.5633345347560415e-05, "loss": 1.6793, "step": 104330 }, { "epoch": 0.6558010724176536, "grad_norm": 6.575596332550049, "learning_rate": 1.5632926246615762e-05, "loss": 1.7416, "step": 104340 }, { "epoch": 0.6558639247343507, "grad_norm": 5.703613758087158, "learning_rate": 1.563250714567111e-05, "loss": 1.5926, "step": 104350 }, { "epoch": 0.6559267770510478, "grad_norm": 7.271982669830322, "learning_rate": 1.5632088044726453e-05, "loss": 1.6012, "step": 104360 }, { "epoch": 0.655989629367745, "grad_norm": 7.279525279998779, "learning_rate": 1.56316689437818e-05, "loss": 1.8358, "step": 104370 }, { "epoch": 0.6560524816844421, "grad_norm": 7.8478779792785645, "learning_rate": 1.5631249842837147e-05, "loss": 1.5804, "step": 104380 }, { "epoch": 0.6561153340011392, "grad_norm": 7.450168609619141, "learning_rate": 1.5630830741892494e-05, "loss": 1.6337, "step": 104390 }, { "epoch": 0.6561781863178363, "grad_norm": 6.73850679397583, "learning_rate": 1.563041164094784e-05, "loss": 1.6636, "step": 104400 }, { "epoch": 0.6562410386345334, "grad_norm": 6.8981852531433105, "learning_rate": 1.5629992540003188e-05, "loss": 1.4187, "step": 104410 }, { "epoch": 0.6563038909512305, "grad_norm": 6.81155252456665, "learning_rate": 1.5629573439058532e-05, "loss": 1.5495, "step": 104420 }, { "epoch": 0.6563667432679277, "grad_norm": 7.1588544845581055, "learning_rate": 1.562915433811388e-05, "loss": 1.5536, "step": 104430 }, { "epoch": 0.6564295955846248, "grad_norm": 5.820721626281738, "learning_rate": 1.5628735237169226e-05, "loss": 1.4353, "step": 104440 }, { "epoch": 0.6564924479013219, "grad_norm": 7.019535064697266, "learning_rate": 1.5628316136224573e-05, "loss": 1.6098, "step": 104450 }, { "epoch": 0.656555300218019, "grad_norm": 5.81046724319458, "learning_rate": 1.562789703527992e-05, "loss": 1.4722, "step": 104460 }, { "epoch": 0.6566181525347161, "grad_norm": 6.771027088165283, "learning_rate": 1.5627477934335264e-05, "loss": 1.5313, "step": 104470 }, { "epoch": 0.6566810048514132, "grad_norm": 6.619336128234863, "learning_rate": 1.562705883339061e-05, "loss": 1.7025, "step": 104480 }, { "epoch": 0.6567438571681103, "grad_norm": 6.592316627502441, "learning_rate": 1.5626639732445958e-05, "loss": 1.6013, "step": 104490 }, { "epoch": 0.6568067094848075, "grad_norm": 6.460874080657959, "learning_rate": 1.5626220631501305e-05, "loss": 1.653, "step": 104500 }, { "epoch": 0.6568695618015046, "grad_norm": 6.1259074211120605, "learning_rate": 1.562580153055665e-05, "loss": 1.5678, "step": 104510 }, { "epoch": 0.6569324141182017, "grad_norm": 5.821610927581787, "learning_rate": 1.5625382429611996e-05, "loss": 1.514, "step": 104520 }, { "epoch": 0.6569952664348987, "grad_norm": 7.125027656555176, "learning_rate": 1.5624963328667343e-05, "loss": 1.5448, "step": 104530 }, { "epoch": 0.6570581187515958, "grad_norm": 7.306406021118164, "learning_rate": 1.562454422772269e-05, "loss": 1.7979, "step": 104540 }, { "epoch": 0.6571209710682929, "grad_norm": 6.649415493011475, "learning_rate": 1.5624125126778037e-05, "loss": 1.6668, "step": 104550 }, { "epoch": 0.65718382338499, "grad_norm": 6.722707271575928, "learning_rate": 1.5623706025833384e-05, "loss": 1.4713, "step": 104560 }, { "epoch": 0.6572466757016872, "grad_norm": 7.2857666015625, "learning_rate": 1.562328692488873e-05, "loss": 1.6118, "step": 104570 }, { "epoch": 0.6573095280183843, "grad_norm": 7.040681838989258, "learning_rate": 1.5622867823944078e-05, "loss": 1.8266, "step": 104580 }, { "epoch": 0.6573723803350814, "grad_norm": 6.947454929351807, "learning_rate": 1.5622448722999425e-05, "loss": 1.6549, "step": 104590 }, { "epoch": 0.6574352326517785, "grad_norm": 7.445709228515625, "learning_rate": 1.562202962205477e-05, "loss": 1.7482, "step": 104600 }, { "epoch": 0.6574980849684756, "grad_norm": 6.887095928192139, "learning_rate": 1.5621610521110116e-05, "loss": 1.4813, "step": 104610 }, { "epoch": 0.6575609372851727, "grad_norm": 5.7909159660339355, "learning_rate": 1.5621191420165463e-05, "loss": 1.7325, "step": 104620 }, { "epoch": 0.6576237896018698, "grad_norm": 5.971845626831055, "learning_rate": 1.562077231922081e-05, "loss": 1.849, "step": 104630 }, { "epoch": 0.657686641918567, "grad_norm": 6.557778358459473, "learning_rate": 1.5620353218276157e-05, "loss": 1.6898, "step": 104640 }, { "epoch": 0.6577494942352641, "grad_norm": 7.787708759307861, "learning_rate": 1.56199341173315e-05, "loss": 1.6621, "step": 104650 }, { "epoch": 0.6578123465519612, "grad_norm": 5.77874755859375, "learning_rate": 1.5619515016386848e-05, "loss": 1.5902, "step": 104660 }, { "epoch": 0.6578751988686583, "grad_norm": 6.337566375732422, "learning_rate": 1.5619095915442195e-05, "loss": 1.7225, "step": 104670 }, { "epoch": 0.6579380511853554, "grad_norm": 6.183952331542969, "learning_rate": 1.5618676814497542e-05, "loss": 1.6201, "step": 104680 }, { "epoch": 0.6580009035020525, "grad_norm": 5.826647758483887, "learning_rate": 1.5618257713552886e-05, "loss": 1.4433, "step": 104690 }, { "epoch": 0.6580637558187497, "grad_norm": 5.762514591217041, "learning_rate": 1.5617838612608233e-05, "loss": 1.611, "step": 104700 }, { "epoch": 0.6581266081354468, "grad_norm": 5.980803966522217, "learning_rate": 1.561741951166358e-05, "loss": 1.6396, "step": 104710 }, { "epoch": 0.6581894604521439, "grad_norm": 7.105878829956055, "learning_rate": 1.5617000410718927e-05, "loss": 1.5972, "step": 104720 }, { "epoch": 0.658252312768841, "grad_norm": 7.1127495765686035, "learning_rate": 1.5616581309774274e-05, "loss": 1.6492, "step": 104730 }, { "epoch": 0.6583151650855381, "grad_norm": 8.201815605163574, "learning_rate": 1.5616162208829618e-05, "loss": 1.612, "step": 104740 }, { "epoch": 0.6583780174022352, "grad_norm": 5.925051212310791, "learning_rate": 1.5615743107884965e-05, "loss": 1.8186, "step": 104750 }, { "epoch": 0.6584408697189323, "grad_norm": 6.60505485534668, "learning_rate": 1.5615324006940312e-05, "loss": 1.6606, "step": 104760 }, { "epoch": 0.6585037220356295, "grad_norm": 5.959009170532227, "learning_rate": 1.561490490599566e-05, "loss": 1.6531, "step": 104770 }, { "epoch": 0.6585665743523266, "grad_norm": 6.67812442779541, "learning_rate": 1.5614485805051006e-05, "loss": 1.7244, "step": 104780 }, { "epoch": 0.6586294266690236, "grad_norm": 6.7957234382629395, "learning_rate": 1.5614066704106353e-05, "loss": 1.6334, "step": 104790 }, { "epoch": 0.6586922789857207, "grad_norm": 5.9729390144348145, "learning_rate": 1.56136476031617e-05, "loss": 1.6634, "step": 104800 }, { "epoch": 0.6587551313024178, "grad_norm": 6.336520671844482, "learning_rate": 1.5613228502217047e-05, "loss": 1.6001, "step": 104810 }, { "epoch": 0.6588179836191149, "grad_norm": 7.992360591888428, "learning_rate": 1.561280940127239e-05, "loss": 1.5651, "step": 104820 }, { "epoch": 0.658880835935812, "grad_norm": 6.051232814788818, "learning_rate": 1.5612390300327738e-05, "loss": 1.5507, "step": 104830 }, { "epoch": 0.6589436882525092, "grad_norm": 6.314977169036865, "learning_rate": 1.5611971199383085e-05, "loss": 1.5137, "step": 104840 }, { "epoch": 0.6590065405692063, "grad_norm": 7.841394424438477, "learning_rate": 1.5611552098438432e-05, "loss": 1.8054, "step": 104850 }, { "epoch": 0.6590693928859034, "grad_norm": 6.797749042510986, "learning_rate": 1.561113299749378e-05, "loss": 1.7056, "step": 104860 }, { "epoch": 0.6591322452026005, "grad_norm": 7.025812149047852, "learning_rate": 1.5610713896549123e-05, "loss": 1.6572, "step": 104870 }, { "epoch": 0.6591950975192976, "grad_norm": 6.532901287078857, "learning_rate": 1.561029479560447e-05, "loss": 1.4581, "step": 104880 }, { "epoch": 0.6592579498359947, "grad_norm": 6.782949924468994, "learning_rate": 1.5609875694659817e-05, "loss": 1.6079, "step": 104890 }, { "epoch": 0.6593208021526918, "grad_norm": 6.53534460067749, "learning_rate": 1.5609456593715164e-05, "loss": 1.3991, "step": 104900 }, { "epoch": 0.659383654469389, "grad_norm": 6.3602213859558105, "learning_rate": 1.5609037492770508e-05, "loss": 1.6587, "step": 104910 }, { "epoch": 0.6594465067860861, "grad_norm": 7.155568599700928, "learning_rate": 1.5608618391825855e-05, "loss": 1.5366, "step": 104920 }, { "epoch": 0.6595093591027832, "grad_norm": 6.3102264404296875, "learning_rate": 1.5608199290881202e-05, "loss": 1.8082, "step": 104930 }, { "epoch": 0.6595722114194803, "grad_norm": 6.084749698638916, "learning_rate": 1.560778018993655e-05, "loss": 1.6722, "step": 104940 }, { "epoch": 0.6596350637361774, "grad_norm": 5.354098320007324, "learning_rate": 1.5607361088991896e-05, "loss": 1.6343, "step": 104950 }, { "epoch": 0.6596979160528745, "grad_norm": 6.635079383850098, "learning_rate": 1.5606941988047243e-05, "loss": 1.6978, "step": 104960 }, { "epoch": 0.6597607683695716, "grad_norm": 6.3366875648498535, "learning_rate": 1.560652288710259e-05, "loss": 1.7198, "step": 104970 }, { "epoch": 0.6598236206862688, "grad_norm": 6.161338806152344, "learning_rate": 1.5606103786157937e-05, "loss": 1.7174, "step": 104980 }, { "epoch": 0.6598864730029659, "grad_norm": 6.67751407623291, "learning_rate": 1.560568468521328e-05, "loss": 1.5836, "step": 104990 }, { "epoch": 0.659949325319663, "grad_norm": 6.224991798400879, "learning_rate": 1.5605265584268628e-05, "loss": 1.7559, "step": 105000 }, { "epoch": 0.6600121776363601, "grad_norm": 6.425436496734619, "learning_rate": 1.5604846483323975e-05, "loss": 1.8432, "step": 105010 }, { "epoch": 0.6600750299530572, "grad_norm": 7.239255905151367, "learning_rate": 1.5604427382379322e-05, "loss": 1.7081, "step": 105020 }, { "epoch": 0.6601378822697543, "grad_norm": 6.133124351501465, "learning_rate": 1.560400828143467e-05, "loss": 1.5968, "step": 105030 }, { "epoch": 0.6602007345864513, "grad_norm": 6.762413024902344, "learning_rate": 1.5603589180490013e-05, "loss": 1.9224, "step": 105040 }, { "epoch": 0.6602635869031485, "grad_norm": 6.529871463775635, "learning_rate": 1.560317007954536e-05, "loss": 1.6763, "step": 105050 }, { "epoch": 0.6603264392198456, "grad_norm": 6.822028160095215, "learning_rate": 1.5602750978600707e-05, "loss": 1.7081, "step": 105060 }, { "epoch": 0.6603892915365427, "grad_norm": 6.54036808013916, "learning_rate": 1.5602331877656054e-05, "loss": 1.6399, "step": 105070 }, { "epoch": 0.6604521438532398, "grad_norm": 5.844644069671631, "learning_rate": 1.56019127767114e-05, "loss": 1.466, "step": 105080 }, { "epoch": 0.6605149961699369, "grad_norm": 7.746768951416016, "learning_rate": 1.5601493675766745e-05, "loss": 1.664, "step": 105090 }, { "epoch": 0.660577848486634, "grad_norm": 7.402467727661133, "learning_rate": 1.5601074574822092e-05, "loss": 1.564, "step": 105100 }, { "epoch": 0.6606407008033311, "grad_norm": 6.537257671356201, "learning_rate": 1.560065547387744e-05, "loss": 1.5916, "step": 105110 }, { "epoch": 0.6607035531200283, "grad_norm": 6.990403652191162, "learning_rate": 1.5600236372932786e-05, "loss": 1.7585, "step": 105120 }, { "epoch": 0.6607664054367254, "grad_norm": 6.598796844482422, "learning_rate": 1.559981727198813e-05, "loss": 1.7664, "step": 105130 }, { "epoch": 0.6608292577534225, "grad_norm": 6.536150932312012, "learning_rate": 1.5599398171043477e-05, "loss": 1.6822, "step": 105140 }, { "epoch": 0.6608921100701196, "grad_norm": 6.739705562591553, "learning_rate": 1.5598979070098824e-05, "loss": 1.6046, "step": 105150 }, { "epoch": 0.6609549623868167, "grad_norm": 6.427530288696289, "learning_rate": 1.559855996915417e-05, "loss": 1.7359, "step": 105160 }, { "epoch": 0.6610178147035138, "grad_norm": 6.731106758117676, "learning_rate": 1.5598140868209518e-05, "loss": 1.6558, "step": 105170 }, { "epoch": 0.661080667020211, "grad_norm": 6.3343400955200195, "learning_rate": 1.5597721767264865e-05, "loss": 1.477, "step": 105180 }, { "epoch": 0.6611435193369081, "grad_norm": 5.260409832000732, "learning_rate": 1.5597302666320212e-05, "loss": 1.7898, "step": 105190 }, { "epoch": 0.6612063716536052, "grad_norm": 6.937628269195557, "learning_rate": 1.559688356537556e-05, "loss": 1.522, "step": 105200 }, { "epoch": 0.6612692239703023, "grad_norm": 6.389298915863037, "learning_rate": 1.5596464464430906e-05, "loss": 1.8267, "step": 105210 }, { "epoch": 0.6613320762869994, "grad_norm": 6.643794536590576, "learning_rate": 1.559604536348625e-05, "loss": 1.4692, "step": 105220 }, { "epoch": 0.6613949286036965, "grad_norm": 8.866166114807129, "learning_rate": 1.5595626262541597e-05, "loss": 1.7742, "step": 105230 }, { "epoch": 0.6614577809203936, "grad_norm": 5.795926094055176, "learning_rate": 1.5595207161596944e-05, "loss": 1.7647, "step": 105240 }, { "epoch": 0.6615206332370908, "grad_norm": 6.502231597900391, "learning_rate": 1.559478806065229e-05, "loss": 1.8489, "step": 105250 }, { "epoch": 0.6615834855537879, "grad_norm": 5.95989465713501, "learning_rate": 1.559436895970764e-05, "loss": 1.4893, "step": 105260 }, { "epoch": 0.661646337870485, "grad_norm": 6.643709659576416, "learning_rate": 1.5593949858762982e-05, "loss": 1.6102, "step": 105270 }, { "epoch": 0.6617091901871821, "grad_norm": 6.032772064208984, "learning_rate": 1.559353075781833e-05, "loss": 1.6731, "step": 105280 }, { "epoch": 0.6617720425038792, "grad_norm": 6.644000053405762, "learning_rate": 1.5593111656873676e-05, "loss": 1.4855, "step": 105290 }, { "epoch": 0.6618348948205762, "grad_norm": 7.02887487411499, "learning_rate": 1.5592692555929023e-05, "loss": 1.5104, "step": 105300 }, { "epoch": 0.6618977471372733, "grad_norm": 6.866332530975342, "learning_rate": 1.5592273454984367e-05, "loss": 1.5726, "step": 105310 }, { "epoch": 0.6619605994539705, "grad_norm": 6.947324752807617, "learning_rate": 1.5591854354039714e-05, "loss": 1.545, "step": 105320 }, { "epoch": 0.6620234517706676, "grad_norm": 7.434155464172363, "learning_rate": 1.559143525309506e-05, "loss": 1.642, "step": 105330 }, { "epoch": 0.6620863040873647, "grad_norm": 6.633470058441162, "learning_rate": 1.5591016152150408e-05, "loss": 1.6313, "step": 105340 }, { "epoch": 0.6621491564040618, "grad_norm": 7.02971887588501, "learning_rate": 1.5590597051205755e-05, "loss": 1.4788, "step": 105350 }, { "epoch": 0.6622120087207589, "grad_norm": 5.845401763916016, "learning_rate": 1.5590177950261102e-05, "loss": 1.6663, "step": 105360 }, { "epoch": 0.662274861037456, "grad_norm": 6.136750221252441, "learning_rate": 1.5589758849316446e-05, "loss": 1.781, "step": 105370 }, { "epoch": 0.6623377133541531, "grad_norm": 5.939883708953857, "learning_rate": 1.5589339748371793e-05, "loss": 1.8151, "step": 105380 }, { "epoch": 0.6624005656708503, "grad_norm": 7.601027965545654, "learning_rate": 1.558892064742714e-05, "loss": 1.5708, "step": 105390 }, { "epoch": 0.6624634179875474, "grad_norm": 6.943202018737793, "learning_rate": 1.5588501546482487e-05, "loss": 1.7305, "step": 105400 }, { "epoch": 0.6625262703042445, "grad_norm": 6.401005744934082, "learning_rate": 1.5588082445537834e-05, "loss": 1.8093, "step": 105410 }, { "epoch": 0.6625891226209416, "grad_norm": 6.7155070304870605, "learning_rate": 1.558766334459318e-05, "loss": 1.7965, "step": 105420 }, { "epoch": 0.6626519749376387, "grad_norm": 8.192904472351074, "learning_rate": 1.558724424364853e-05, "loss": 1.7007, "step": 105430 }, { "epoch": 0.6627148272543358, "grad_norm": 6.841559410095215, "learning_rate": 1.5586825142703872e-05, "loss": 1.6327, "step": 105440 }, { "epoch": 0.662777679571033, "grad_norm": 7.527811050415039, "learning_rate": 1.558640604175922e-05, "loss": 1.7499, "step": 105450 }, { "epoch": 0.6628405318877301, "grad_norm": 5.782719135284424, "learning_rate": 1.5585986940814566e-05, "loss": 1.5712, "step": 105460 }, { "epoch": 0.6629033842044272, "grad_norm": 6.337703227996826, "learning_rate": 1.5585567839869913e-05, "loss": 1.7707, "step": 105470 }, { "epoch": 0.6629662365211243, "grad_norm": 5.9730544090271, "learning_rate": 1.558514873892526e-05, "loss": 1.5823, "step": 105480 }, { "epoch": 0.6630290888378214, "grad_norm": 6.3289666175842285, "learning_rate": 1.5584729637980604e-05, "loss": 1.5429, "step": 105490 }, { "epoch": 0.6630919411545185, "grad_norm": 6.009237289428711, "learning_rate": 1.558431053703595e-05, "loss": 1.6163, "step": 105500 }, { "epoch": 0.6631547934712156, "grad_norm": 7.3155436515808105, "learning_rate": 1.5583891436091298e-05, "loss": 1.7001, "step": 105510 }, { "epoch": 0.6632176457879128, "grad_norm": 7.042203426361084, "learning_rate": 1.5583472335146645e-05, "loss": 1.8785, "step": 105520 }, { "epoch": 0.6632804981046099, "grad_norm": 6.458547115325928, "learning_rate": 1.558305323420199e-05, "loss": 1.4179, "step": 105530 }, { "epoch": 0.663343350421307, "grad_norm": 6.338866233825684, "learning_rate": 1.5582634133257336e-05, "loss": 1.6736, "step": 105540 }, { "epoch": 0.663406202738004, "grad_norm": 6.325279712677002, "learning_rate": 1.5582215032312683e-05, "loss": 1.5982, "step": 105550 }, { "epoch": 0.6634690550547011, "grad_norm": 6.828125953674316, "learning_rate": 1.558179593136803e-05, "loss": 1.951, "step": 105560 }, { "epoch": 0.6635319073713982, "grad_norm": 6.712599277496338, "learning_rate": 1.5581376830423377e-05, "loss": 1.534, "step": 105570 }, { "epoch": 0.6635947596880953, "grad_norm": 6.4551262855529785, "learning_rate": 1.5580957729478724e-05, "loss": 1.5952, "step": 105580 }, { "epoch": 0.6636576120047925, "grad_norm": 6.0207977294921875, "learning_rate": 1.558053862853407e-05, "loss": 1.6773, "step": 105590 }, { "epoch": 0.6637204643214896, "grad_norm": 7.634629249572754, "learning_rate": 1.558011952758942e-05, "loss": 1.752, "step": 105600 }, { "epoch": 0.6637833166381867, "grad_norm": 6.328338623046875, "learning_rate": 1.5579700426644765e-05, "loss": 1.5793, "step": 105610 }, { "epoch": 0.6638461689548838, "grad_norm": 5.922032356262207, "learning_rate": 1.557928132570011e-05, "loss": 1.4599, "step": 105620 }, { "epoch": 0.6639090212715809, "grad_norm": 6.268474102020264, "learning_rate": 1.5578862224755456e-05, "loss": 1.6257, "step": 105630 }, { "epoch": 0.663971873588278, "grad_norm": 6.577045917510986, "learning_rate": 1.5578443123810803e-05, "loss": 1.5157, "step": 105640 }, { "epoch": 0.6640347259049751, "grad_norm": 5.638345241546631, "learning_rate": 1.557802402286615e-05, "loss": 1.578, "step": 105650 }, { "epoch": 0.6640975782216723, "grad_norm": 6.140465259552002, "learning_rate": 1.5577604921921494e-05, "loss": 1.5578, "step": 105660 }, { "epoch": 0.6641604305383694, "grad_norm": 6.0519843101501465, "learning_rate": 1.557718582097684e-05, "loss": 1.5569, "step": 105670 }, { "epoch": 0.6642232828550665, "grad_norm": 7.764479160308838, "learning_rate": 1.5576766720032188e-05, "loss": 1.5647, "step": 105680 }, { "epoch": 0.6642861351717636, "grad_norm": 6.630612373352051, "learning_rate": 1.5576347619087535e-05, "loss": 1.6348, "step": 105690 }, { "epoch": 0.6643489874884607, "grad_norm": 6.928365707397461, "learning_rate": 1.5575928518142882e-05, "loss": 1.5322, "step": 105700 }, { "epoch": 0.6644118398051578, "grad_norm": 6.307169437408447, "learning_rate": 1.5575509417198226e-05, "loss": 1.6617, "step": 105710 }, { "epoch": 0.664474692121855, "grad_norm": 6.200599193572998, "learning_rate": 1.5575090316253573e-05, "loss": 1.862, "step": 105720 }, { "epoch": 0.6645375444385521, "grad_norm": 6.4765167236328125, "learning_rate": 1.557467121530892e-05, "loss": 1.7521, "step": 105730 }, { "epoch": 0.6646003967552492, "grad_norm": 8.003438949584961, "learning_rate": 1.5574252114364267e-05, "loss": 1.591, "step": 105740 }, { "epoch": 0.6646632490719463, "grad_norm": 5.496792793273926, "learning_rate": 1.557383301341961e-05, "loss": 1.4939, "step": 105750 }, { "epoch": 0.6647261013886434, "grad_norm": 7.4300713539123535, "learning_rate": 1.5573413912474958e-05, "loss": 1.728, "step": 105760 }, { "epoch": 0.6647889537053405, "grad_norm": 7.0992431640625, "learning_rate": 1.5572994811530305e-05, "loss": 1.5779, "step": 105770 }, { "epoch": 0.6648518060220376, "grad_norm": 6.628957748413086, "learning_rate": 1.5572575710585652e-05, "loss": 1.4178, "step": 105780 }, { "epoch": 0.6649146583387348, "grad_norm": 7.31161642074585, "learning_rate": 1.5572156609641e-05, "loss": 1.7454, "step": 105790 }, { "epoch": 0.6649775106554319, "grad_norm": 7.662426471710205, "learning_rate": 1.5571737508696346e-05, "loss": 1.6708, "step": 105800 }, { "epoch": 0.6650403629721289, "grad_norm": 6.7243781089782715, "learning_rate": 1.5571318407751693e-05, "loss": 1.619, "step": 105810 }, { "epoch": 0.665103215288826, "grad_norm": 6.5975260734558105, "learning_rate": 1.557089930680704e-05, "loss": 1.649, "step": 105820 }, { "epoch": 0.6651660676055231, "grad_norm": 7.845924377441406, "learning_rate": 1.5570480205862387e-05, "loss": 1.6569, "step": 105830 }, { "epoch": 0.6652289199222202, "grad_norm": 7.023687839508057, "learning_rate": 1.557006110491773e-05, "loss": 1.5158, "step": 105840 }, { "epoch": 0.6652917722389173, "grad_norm": 6.436861991882324, "learning_rate": 1.5569642003973078e-05, "loss": 1.3093, "step": 105850 }, { "epoch": 0.6653546245556144, "grad_norm": 6.995415210723877, "learning_rate": 1.5569222903028425e-05, "loss": 1.8804, "step": 105860 }, { "epoch": 0.6654174768723116, "grad_norm": 6.78193998336792, "learning_rate": 1.5568803802083772e-05, "loss": 1.9103, "step": 105870 }, { "epoch": 0.6654803291890087, "grad_norm": 7.750466346740723, "learning_rate": 1.556838470113912e-05, "loss": 1.573, "step": 105880 }, { "epoch": 0.6655431815057058, "grad_norm": 8.08763313293457, "learning_rate": 1.5567965600194463e-05, "loss": 1.6636, "step": 105890 }, { "epoch": 0.6656060338224029, "grad_norm": 6.756246089935303, "learning_rate": 1.556754649924981e-05, "loss": 1.5212, "step": 105900 }, { "epoch": 0.6656688861391, "grad_norm": 6.320174217224121, "learning_rate": 1.5567127398305157e-05, "loss": 1.6462, "step": 105910 }, { "epoch": 0.6657317384557971, "grad_norm": 6.737569808959961, "learning_rate": 1.5566708297360504e-05, "loss": 1.6862, "step": 105920 }, { "epoch": 0.6657945907724943, "grad_norm": 7.224164009094238, "learning_rate": 1.5566289196415848e-05, "loss": 1.7514, "step": 105930 }, { "epoch": 0.6658574430891914, "grad_norm": 6.790672302246094, "learning_rate": 1.5565870095471195e-05, "loss": 1.6229, "step": 105940 }, { "epoch": 0.6659202954058885, "grad_norm": 5.7221269607543945, "learning_rate": 1.5565450994526542e-05, "loss": 1.7798, "step": 105950 }, { "epoch": 0.6659831477225856, "grad_norm": 6.954379558563232, "learning_rate": 1.556503189358189e-05, "loss": 1.6222, "step": 105960 }, { "epoch": 0.6660460000392827, "grad_norm": 6.151113033294678, "learning_rate": 1.55646547027317e-05, "loss": 1.3968, "step": 105970 }, { "epoch": 0.6661088523559798, "grad_norm": 6.866469860076904, "learning_rate": 1.5564235601787048e-05, "loss": 1.4886, "step": 105980 }, { "epoch": 0.666171704672677, "grad_norm": 6.896879196166992, "learning_rate": 1.5563816500842395e-05, "loss": 1.7967, "step": 105990 }, { "epoch": 0.6662345569893741, "grad_norm": 5.111074924468994, "learning_rate": 1.5563397399897742e-05, "loss": 1.4791, "step": 106000 }, { "epoch": 0.6662974093060712, "grad_norm": 6.863555908203125, "learning_rate": 1.5562978298953085e-05, "loss": 1.6016, "step": 106010 }, { "epoch": 0.6663602616227683, "grad_norm": 6.542895793914795, "learning_rate": 1.5562559198008433e-05, "loss": 1.6089, "step": 106020 }, { "epoch": 0.6664231139394654, "grad_norm": 7.639867305755615, "learning_rate": 1.556214009706378e-05, "loss": 1.8336, "step": 106030 }, { "epoch": 0.6664859662561625, "grad_norm": 6.384363651275635, "learning_rate": 1.5561720996119127e-05, "loss": 1.7023, "step": 106040 }, { "epoch": 0.6665488185728596, "grad_norm": 6.245970249176025, "learning_rate": 1.5561301895174474e-05, "loss": 1.6612, "step": 106050 }, { "epoch": 0.6666116708895566, "grad_norm": 5.9655442237854, "learning_rate": 1.5560882794229817e-05, "loss": 1.5802, "step": 106060 }, { "epoch": 0.6666745232062538, "grad_norm": 6.154887676239014, "learning_rate": 1.5560463693285164e-05, "loss": 1.5052, "step": 106070 }, { "epoch": 0.6667373755229509, "grad_norm": 7.505038738250732, "learning_rate": 1.556004459234051e-05, "loss": 1.5395, "step": 106080 }, { "epoch": 0.666800227839648, "grad_norm": 5.876123428344727, "learning_rate": 1.5559667401490323e-05, "loss": 1.5256, "step": 106090 }, { "epoch": 0.6668630801563451, "grad_norm": 6.18739652633667, "learning_rate": 1.555924830054567e-05, "loss": 1.5564, "step": 106100 }, { "epoch": 0.6669259324730422, "grad_norm": 7.498041152954102, "learning_rate": 1.5558829199601017e-05, "loss": 1.5418, "step": 106110 }, { "epoch": 0.6669887847897393, "grad_norm": 6.704996109008789, "learning_rate": 1.5558410098656364e-05, "loss": 1.64, "step": 106120 }, { "epoch": 0.6670516371064364, "grad_norm": 6.852806091308594, "learning_rate": 1.5557990997711708e-05, "loss": 1.6613, "step": 106130 }, { "epoch": 0.6671144894231336, "grad_norm": 6.0546650886535645, "learning_rate": 1.5557571896767055e-05, "loss": 1.6197, "step": 106140 }, { "epoch": 0.6671773417398307, "grad_norm": 7.7422356605529785, "learning_rate": 1.5557152795822402e-05, "loss": 1.7779, "step": 106150 }, { "epoch": 0.6672401940565278, "grad_norm": 6.542478561401367, "learning_rate": 1.555673369487775e-05, "loss": 1.6599, "step": 106160 }, { "epoch": 0.6673030463732249, "grad_norm": 6.31635046005249, "learning_rate": 1.5556314593933096e-05, "loss": 1.4869, "step": 106170 }, { "epoch": 0.667365898689922, "grad_norm": 5.072801113128662, "learning_rate": 1.5555895492988443e-05, "loss": 1.4812, "step": 106180 }, { "epoch": 0.6674287510066191, "grad_norm": 7.292965412139893, "learning_rate": 1.555547639204379e-05, "loss": 1.5579, "step": 106190 }, { "epoch": 0.6674916033233163, "grad_norm": 8.103798866271973, "learning_rate": 1.5555057291099137e-05, "loss": 1.7417, "step": 106200 }, { "epoch": 0.6675544556400134, "grad_norm": 6.62891960144043, "learning_rate": 1.5554638190154484e-05, "loss": 1.8203, "step": 106210 }, { "epoch": 0.6676173079567105, "grad_norm": 6.723920822143555, "learning_rate": 1.5554219089209828e-05, "loss": 1.5491, "step": 106220 }, { "epoch": 0.6676801602734076, "grad_norm": 5.83394718170166, "learning_rate": 1.5553799988265175e-05, "loss": 1.7686, "step": 106230 }, { "epoch": 0.6677430125901047, "grad_norm": 6.1524810791015625, "learning_rate": 1.5553380887320522e-05, "loss": 1.651, "step": 106240 }, { "epoch": 0.6678058649068018, "grad_norm": 5.9737372398376465, "learning_rate": 1.555296178637587e-05, "loss": 1.8207, "step": 106250 }, { "epoch": 0.6678687172234989, "grad_norm": 7.384658336639404, "learning_rate": 1.5552542685431213e-05, "loss": 1.7481, "step": 106260 }, { "epoch": 0.6679315695401961, "grad_norm": 6.399549961090088, "learning_rate": 1.555212358448656e-05, "loss": 1.7386, "step": 106270 }, { "epoch": 0.6679944218568932, "grad_norm": 6.9837727546691895, "learning_rate": 1.5551704483541907e-05, "loss": 1.7751, "step": 106280 }, { "epoch": 0.6680572741735903, "grad_norm": 5.711150169372559, "learning_rate": 1.5551285382597254e-05, "loss": 1.577, "step": 106290 }, { "epoch": 0.6681201264902874, "grad_norm": 5.359866142272949, "learning_rate": 1.5550866281652598e-05, "loss": 1.6143, "step": 106300 }, { "epoch": 0.6681829788069845, "grad_norm": 6.565360069274902, "learning_rate": 1.5550447180707945e-05, "loss": 1.5832, "step": 106310 }, { "epoch": 0.6682458311236815, "grad_norm": 6.1983771324157715, "learning_rate": 1.5550028079763292e-05, "loss": 1.7735, "step": 106320 }, { "epoch": 0.6683086834403786, "grad_norm": 6.371673107147217, "learning_rate": 1.554960897881864e-05, "loss": 1.4869, "step": 106330 }, { "epoch": 0.6683715357570758, "grad_norm": 5.785719394683838, "learning_rate": 1.5549189877873986e-05, "loss": 1.7531, "step": 106340 }, { "epoch": 0.6684343880737729, "grad_norm": 7.046372890472412, "learning_rate": 1.5548770776929333e-05, "loss": 1.6951, "step": 106350 }, { "epoch": 0.66849724039047, "grad_norm": 7.223507404327393, "learning_rate": 1.554835167598468e-05, "loss": 1.6526, "step": 106360 }, { "epoch": 0.6685600927071671, "grad_norm": 6.702063083648682, "learning_rate": 1.5547932575040024e-05, "loss": 1.476, "step": 106370 }, { "epoch": 0.6686229450238642, "grad_norm": 5.8352370262146, "learning_rate": 1.554751347409537e-05, "loss": 1.5178, "step": 106380 }, { "epoch": 0.6686857973405613, "grad_norm": 6.32943058013916, "learning_rate": 1.5547094373150718e-05, "loss": 1.577, "step": 106390 }, { "epoch": 0.6687486496572584, "grad_norm": 6.566135406494141, "learning_rate": 1.5546675272206065e-05, "loss": 1.5621, "step": 106400 }, { "epoch": 0.6688115019739556, "grad_norm": 6.144376754760742, "learning_rate": 1.5546256171261412e-05, "loss": 1.6911, "step": 106410 }, { "epoch": 0.6688743542906527, "grad_norm": 6.189729690551758, "learning_rate": 1.554583707031676e-05, "loss": 1.4312, "step": 106420 }, { "epoch": 0.6689372066073498, "grad_norm": 6.6637773513793945, "learning_rate": 1.5545417969372106e-05, "loss": 1.6125, "step": 106430 }, { "epoch": 0.6690000589240469, "grad_norm": 6.491280555725098, "learning_rate": 1.554499886842745e-05, "loss": 1.7233, "step": 106440 }, { "epoch": 0.669062911240744, "grad_norm": 7.1595354080200195, "learning_rate": 1.5544579767482797e-05, "loss": 1.7563, "step": 106450 }, { "epoch": 0.6691257635574411, "grad_norm": 6.155887603759766, "learning_rate": 1.5544160666538144e-05, "loss": 1.8459, "step": 106460 }, { "epoch": 0.6691886158741382, "grad_norm": 7.118120193481445, "learning_rate": 1.554374156559349e-05, "loss": 1.6809, "step": 106470 }, { "epoch": 0.6692514681908354, "grad_norm": 5.1893486976623535, "learning_rate": 1.5543322464648835e-05, "loss": 1.6264, "step": 106480 }, { "epoch": 0.6693143205075325, "grad_norm": 6.951686382293701, "learning_rate": 1.5542903363704182e-05, "loss": 1.6391, "step": 106490 }, { "epoch": 0.6693771728242296, "grad_norm": 6.366652488708496, "learning_rate": 1.554248426275953e-05, "loss": 1.4176, "step": 106500 }, { "epoch": 0.6694400251409267, "grad_norm": 6.808743953704834, "learning_rate": 1.5542065161814876e-05, "loss": 1.7209, "step": 106510 }, { "epoch": 0.6695028774576238, "grad_norm": 5.900963306427002, "learning_rate": 1.5541646060870223e-05, "loss": 1.5499, "step": 106520 }, { "epoch": 0.6695657297743209, "grad_norm": 7.198376178741455, "learning_rate": 1.5541226959925567e-05, "loss": 1.5471, "step": 106530 }, { "epoch": 0.669628582091018, "grad_norm": 7.565441131591797, "learning_rate": 1.5540807858980914e-05, "loss": 1.6335, "step": 106540 }, { "epoch": 0.6696914344077152, "grad_norm": 7.076322078704834, "learning_rate": 1.554038875803626e-05, "loss": 1.8162, "step": 106550 }, { "epoch": 0.6697542867244123, "grad_norm": 5.171063423156738, "learning_rate": 1.5539969657091608e-05, "loss": 1.7026, "step": 106560 }, { "epoch": 0.6698171390411093, "grad_norm": 5.83885383605957, "learning_rate": 1.5539550556146955e-05, "loss": 1.3154, "step": 106570 }, { "epoch": 0.6698799913578064, "grad_norm": 5.799517631530762, "learning_rate": 1.5539131455202302e-05, "loss": 1.5434, "step": 106580 }, { "epoch": 0.6699428436745035, "grad_norm": 6.511415958404541, "learning_rate": 1.553871235425765e-05, "loss": 1.6854, "step": 106590 }, { "epoch": 0.6700056959912006, "grad_norm": 6.911275863647461, "learning_rate": 1.5538293253312996e-05, "loss": 1.8594, "step": 106600 }, { "epoch": 0.6700685483078977, "grad_norm": 6.210766315460205, "learning_rate": 1.553787415236834e-05, "loss": 1.6294, "step": 106610 }, { "epoch": 0.6701314006245949, "grad_norm": 5.71303653717041, "learning_rate": 1.5537455051423687e-05, "loss": 1.7189, "step": 106620 }, { "epoch": 0.670194252941292, "grad_norm": 6.001368045806885, "learning_rate": 1.5537035950479034e-05, "loss": 1.6858, "step": 106630 }, { "epoch": 0.6702571052579891, "grad_norm": 6.6295695304870605, "learning_rate": 1.553661684953438e-05, "loss": 1.7003, "step": 106640 }, { "epoch": 0.6703199575746862, "grad_norm": 7.705511093139648, "learning_rate": 1.553619774858973e-05, "loss": 1.5779, "step": 106650 }, { "epoch": 0.6703828098913833, "grad_norm": 7.167219638824463, "learning_rate": 1.5535778647645072e-05, "loss": 1.7056, "step": 106660 }, { "epoch": 0.6704456622080804, "grad_norm": 4.897042751312256, "learning_rate": 1.553535954670042e-05, "loss": 1.574, "step": 106670 }, { "epoch": 0.6705085145247776, "grad_norm": 6.355395317077637, "learning_rate": 1.5534940445755766e-05, "loss": 1.8371, "step": 106680 }, { "epoch": 0.6705713668414747, "grad_norm": 5.6377482414245605, "learning_rate": 1.5534521344811113e-05, "loss": 1.6737, "step": 106690 }, { "epoch": 0.6706342191581718, "grad_norm": 6.316112518310547, "learning_rate": 1.5534102243866457e-05, "loss": 1.6156, "step": 106700 }, { "epoch": 0.6706970714748689, "grad_norm": 6.347810745239258, "learning_rate": 1.5533683142921804e-05, "loss": 1.9162, "step": 106710 }, { "epoch": 0.670759923791566, "grad_norm": 6.955513000488281, "learning_rate": 1.553326404197715e-05, "loss": 1.7304, "step": 106720 }, { "epoch": 0.6708227761082631, "grad_norm": 6.361878871917725, "learning_rate": 1.5532844941032498e-05, "loss": 1.6015, "step": 106730 }, { "epoch": 0.6708856284249602, "grad_norm": 7.454643249511719, "learning_rate": 1.5532425840087845e-05, "loss": 1.7404, "step": 106740 }, { "epoch": 0.6709484807416574, "grad_norm": 7.591629981994629, "learning_rate": 1.553200673914319e-05, "loss": 1.5899, "step": 106750 }, { "epoch": 0.6710113330583545, "grad_norm": 5.682295799255371, "learning_rate": 1.5531587638198536e-05, "loss": 1.7101, "step": 106760 }, { "epoch": 0.6710741853750516, "grad_norm": 7.851317882537842, "learning_rate": 1.5531168537253883e-05, "loss": 1.8985, "step": 106770 }, { "epoch": 0.6711370376917487, "grad_norm": 6.232215881347656, "learning_rate": 1.553074943630923e-05, "loss": 1.7223, "step": 106780 }, { "epoch": 0.6711998900084458, "grad_norm": 5.8570146560668945, "learning_rate": 1.5530330335364577e-05, "loss": 1.6537, "step": 106790 }, { "epoch": 0.6712627423251429, "grad_norm": 7.223216533660889, "learning_rate": 1.5529911234419924e-05, "loss": 1.7117, "step": 106800 }, { "epoch": 0.67132559464184, "grad_norm": 7.296375751495361, "learning_rate": 1.552949213347527e-05, "loss": 1.4305, "step": 106810 }, { "epoch": 0.6713884469585372, "grad_norm": 6.399417400360107, "learning_rate": 1.552907303253062e-05, "loss": 1.6044, "step": 106820 }, { "epoch": 0.6714512992752342, "grad_norm": 5.918104648590088, "learning_rate": 1.5528653931585965e-05, "loss": 1.8545, "step": 106830 }, { "epoch": 0.6715141515919313, "grad_norm": 6.380157947540283, "learning_rate": 1.552823483064131e-05, "loss": 1.7125, "step": 106840 }, { "epoch": 0.6715770039086284, "grad_norm": 6.100411415100098, "learning_rate": 1.5527815729696656e-05, "loss": 1.5642, "step": 106850 }, { "epoch": 0.6716398562253255, "grad_norm": 6.3617730140686035, "learning_rate": 1.5527396628752003e-05, "loss": 1.6316, "step": 106860 }, { "epoch": 0.6717027085420226, "grad_norm": 6.027811527252197, "learning_rate": 1.552697752780735e-05, "loss": 1.5886, "step": 106870 }, { "epoch": 0.6717655608587197, "grad_norm": 6.4630537033081055, "learning_rate": 1.5526558426862694e-05, "loss": 1.5688, "step": 106880 }, { "epoch": 0.6718284131754169, "grad_norm": 6.114879608154297, "learning_rate": 1.552613932591804e-05, "loss": 1.4731, "step": 106890 }, { "epoch": 0.671891265492114, "grad_norm": 6.567522048950195, "learning_rate": 1.5525720224973388e-05, "loss": 1.6373, "step": 106900 }, { "epoch": 0.6719541178088111, "grad_norm": 7.470694065093994, "learning_rate": 1.5525301124028735e-05, "loss": 1.8952, "step": 106910 }, { "epoch": 0.6720169701255082, "grad_norm": 6.166024208068848, "learning_rate": 1.552488202308408e-05, "loss": 1.8146, "step": 106920 }, { "epoch": 0.6720798224422053, "grad_norm": 6.314844608306885, "learning_rate": 1.5524462922139426e-05, "loss": 1.5766, "step": 106930 }, { "epoch": 0.6721426747589024, "grad_norm": 6.448526859283447, "learning_rate": 1.5524043821194773e-05, "loss": 1.7526, "step": 106940 }, { "epoch": 0.6722055270755996, "grad_norm": 5.572268009185791, "learning_rate": 1.552362472025012e-05, "loss": 1.6016, "step": 106950 }, { "epoch": 0.6722683793922967, "grad_norm": 7.105794429779053, "learning_rate": 1.5523205619305467e-05, "loss": 1.6695, "step": 106960 }, { "epoch": 0.6723312317089938, "grad_norm": 8.571908950805664, "learning_rate": 1.5522786518360814e-05, "loss": 1.9172, "step": 106970 }, { "epoch": 0.6723940840256909, "grad_norm": 6.695889949798584, "learning_rate": 1.552236741741616e-05, "loss": 1.4954, "step": 106980 }, { "epoch": 0.672456936342388, "grad_norm": 6.310705184936523, "learning_rate": 1.552194831647151e-05, "loss": 1.8722, "step": 106990 }, { "epoch": 0.6725197886590851, "grad_norm": 6.111076354980469, "learning_rate": 1.5521529215526852e-05, "loss": 1.8558, "step": 107000 }, { "epoch": 0.6725826409757822, "grad_norm": 6.492177963256836, "learning_rate": 1.55211101145822e-05, "loss": 1.3883, "step": 107010 }, { "epoch": 0.6726454932924794, "grad_norm": 6.950572490692139, "learning_rate": 1.5520691013637546e-05, "loss": 1.6791, "step": 107020 }, { "epoch": 0.6727083456091765, "grad_norm": 7.7460103034973145, "learning_rate": 1.5520271912692893e-05, "loss": 1.6308, "step": 107030 }, { "epoch": 0.6727711979258736, "grad_norm": 6.830924987792969, "learning_rate": 1.551985281174824e-05, "loss": 1.7531, "step": 107040 }, { "epoch": 0.6728340502425707, "grad_norm": 6.992501258850098, "learning_rate": 1.5519433710803587e-05, "loss": 1.7381, "step": 107050 }, { "epoch": 0.6728969025592678, "grad_norm": 6.135435581207275, "learning_rate": 1.551901460985893e-05, "loss": 1.5797, "step": 107060 }, { "epoch": 0.6729597548759649, "grad_norm": 5.973193645477295, "learning_rate": 1.5518595508914278e-05, "loss": 1.7572, "step": 107070 }, { "epoch": 0.6730226071926619, "grad_norm": 7.702726364135742, "learning_rate": 1.5518176407969625e-05, "loss": 1.598, "step": 107080 }, { "epoch": 0.673085459509359, "grad_norm": 6.913188934326172, "learning_rate": 1.5517757307024972e-05, "loss": 1.4042, "step": 107090 }, { "epoch": 0.6731483118260562, "grad_norm": 7.558887004852295, "learning_rate": 1.5517338206080316e-05, "loss": 1.7481, "step": 107100 }, { "epoch": 0.6732111641427533, "grad_norm": 7.296529293060303, "learning_rate": 1.5516919105135663e-05, "loss": 1.4847, "step": 107110 }, { "epoch": 0.6732740164594504, "grad_norm": 6.7288289070129395, "learning_rate": 1.551650000419101e-05, "loss": 1.6209, "step": 107120 }, { "epoch": 0.6733368687761475, "grad_norm": 7.862184524536133, "learning_rate": 1.5516080903246357e-05, "loss": 1.6374, "step": 107130 }, { "epoch": 0.6733997210928446, "grad_norm": 7.252203464508057, "learning_rate": 1.5515661802301704e-05, "loss": 1.5912, "step": 107140 }, { "epoch": 0.6734625734095417, "grad_norm": 6.187528133392334, "learning_rate": 1.5515242701357048e-05, "loss": 1.3804, "step": 107150 }, { "epoch": 0.6735254257262389, "grad_norm": 6.828664779663086, "learning_rate": 1.5514823600412395e-05, "loss": 1.6811, "step": 107160 }, { "epoch": 0.673588278042936, "grad_norm": 6.430910587310791, "learning_rate": 1.5514404499467742e-05, "loss": 1.6207, "step": 107170 }, { "epoch": 0.6736511303596331, "grad_norm": 7.479432106018066, "learning_rate": 1.551398539852309e-05, "loss": 1.7641, "step": 107180 }, { "epoch": 0.6737139826763302, "grad_norm": 7.683620929718018, "learning_rate": 1.5513566297578436e-05, "loss": 1.5278, "step": 107190 }, { "epoch": 0.6737768349930273, "grad_norm": 6.850433349609375, "learning_rate": 1.5513147196633783e-05, "loss": 1.6015, "step": 107200 }, { "epoch": 0.6738396873097244, "grad_norm": 6.360098361968994, "learning_rate": 1.551272809568913e-05, "loss": 1.5139, "step": 107210 }, { "epoch": 0.6739025396264215, "grad_norm": 7.004693031311035, "learning_rate": 1.5512308994744478e-05, "loss": 1.7587, "step": 107220 }, { "epoch": 0.6739653919431187, "grad_norm": 6.2940778732299805, "learning_rate": 1.551188989379982e-05, "loss": 1.5376, "step": 107230 }, { "epoch": 0.6740282442598158, "grad_norm": 6.842554092407227, "learning_rate": 1.5511470792855168e-05, "loss": 1.6649, "step": 107240 }, { "epoch": 0.6740910965765129, "grad_norm": 7.063696384429932, "learning_rate": 1.5511051691910515e-05, "loss": 1.5728, "step": 107250 }, { "epoch": 0.67415394889321, "grad_norm": 5.923314094543457, "learning_rate": 1.5510632590965862e-05, "loss": 1.8809, "step": 107260 }, { "epoch": 0.6742168012099071, "grad_norm": 5.884190082550049, "learning_rate": 1.551021349002121e-05, "loss": 1.6403, "step": 107270 }, { "epoch": 0.6742796535266042, "grad_norm": 6.68449592590332, "learning_rate": 1.5509794389076553e-05, "loss": 1.7118, "step": 107280 }, { "epoch": 0.6743425058433014, "grad_norm": 6.421838283538818, "learning_rate": 1.55093752881319e-05, "loss": 1.6536, "step": 107290 }, { "epoch": 0.6744053581599985, "grad_norm": 5.841495990753174, "learning_rate": 1.5508956187187247e-05, "loss": 1.6271, "step": 107300 }, { "epoch": 0.6744682104766956, "grad_norm": 7.0780720710754395, "learning_rate": 1.5508537086242594e-05, "loss": 1.7483, "step": 107310 }, { "epoch": 0.6745310627933927, "grad_norm": 6.905803680419922, "learning_rate": 1.5508117985297938e-05, "loss": 1.9378, "step": 107320 }, { "epoch": 0.6745939151100898, "grad_norm": 6.076014518737793, "learning_rate": 1.5507698884353285e-05, "loss": 1.6559, "step": 107330 }, { "epoch": 0.6746567674267868, "grad_norm": 6.738631725311279, "learning_rate": 1.5507279783408632e-05, "loss": 1.6098, "step": 107340 }, { "epoch": 0.6747196197434839, "grad_norm": 7.530428409576416, "learning_rate": 1.550686068246398e-05, "loss": 1.9081, "step": 107350 }, { "epoch": 0.674782472060181, "grad_norm": 6.086501121520996, "learning_rate": 1.5506441581519326e-05, "loss": 1.5324, "step": 107360 }, { "epoch": 0.6748453243768782, "grad_norm": 6.828165054321289, "learning_rate": 1.5506022480574673e-05, "loss": 1.6179, "step": 107370 }, { "epoch": 0.6749081766935753, "grad_norm": 7.415923118591309, "learning_rate": 1.5505603379630017e-05, "loss": 1.6776, "step": 107380 }, { "epoch": 0.6749710290102724, "grad_norm": 5.763005256652832, "learning_rate": 1.5505184278685364e-05, "loss": 1.6505, "step": 107390 }, { "epoch": 0.6750338813269695, "grad_norm": 7.107841491699219, "learning_rate": 1.550476517774071e-05, "loss": 1.5215, "step": 107400 }, { "epoch": 0.6750967336436666, "grad_norm": 5.866076469421387, "learning_rate": 1.5504346076796058e-05, "loss": 1.4398, "step": 107410 }, { "epoch": 0.6751595859603637, "grad_norm": 6.280481815338135, "learning_rate": 1.5503926975851405e-05, "loss": 1.8825, "step": 107420 }, { "epoch": 0.6752224382770609, "grad_norm": 6.061824798583984, "learning_rate": 1.5503507874906752e-05, "loss": 1.4968, "step": 107430 }, { "epoch": 0.675285290593758, "grad_norm": 5.807056427001953, "learning_rate": 1.55030887739621e-05, "loss": 1.8132, "step": 107440 }, { "epoch": 0.6753481429104551, "grad_norm": 6.263597011566162, "learning_rate": 1.5502669673017447e-05, "loss": 1.5875, "step": 107450 }, { "epoch": 0.6754109952271522, "grad_norm": 6.837041854858398, "learning_rate": 1.550225057207279e-05, "loss": 1.7016, "step": 107460 }, { "epoch": 0.6754738475438493, "grad_norm": 7.085463047027588, "learning_rate": 1.5501831471128137e-05, "loss": 1.7122, "step": 107470 }, { "epoch": 0.6755366998605464, "grad_norm": 5.949290752410889, "learning_rate": 1.5501412370183484e-05, "loss": 1.6366, "step": 107480 }, { "epoch": 0.6755995521772435, "grad_norm": 6.12795877456665, "learning_rate": 1.550099326923883e-05, "loss": 1.5677, "step": 107490 }, { "epoch": 0.6756624044939407, "grad_norm": 6.4892778396606445, "learning_rate": 1.5500574168294175e-05, "loss": 1.5485, "step": 107500 }, { "epoch": 0.6757252568106378, "grad_norm": 7.078492164611816, "learning_rate": 1.5500155067349522e-05, "loss": 1.6116, "step": 107510 }, { "epoch": 0.6757881091273349, "grad_norm": 6.096056938171387, "learning_rate": 1.549973596640487e-05, "loss": 1.5864, "step": 107520 }, { "epoch": 0.675850961444032, "grad_norm": 6.814564228057861, "learning_rate": 1.5499316865460216e-05, "loss": 1.4588, "step": 107530 }, { "epoch": 0.6759138137607291, "grad_norm": 7.397793292999268, "learning_rate": 1.549889776451556e-05, "loss": 1.6345, "step": 107540 }, { "epoch": 0.6759766660774262, "grad_norm": 6.652068138122559, "learning_rate": 1.5498478663570907e-05, "loss": 1.7347, "step": 107550 }, { "epoch": 0.6760395183941234, "grad_norm": 7.2200822830200195, "learning_rate": 1.5498059562626254e-05, "loss": 1.598, "step": 107560 }, { "epoch": 0.6761023707108205, "grad_norm": 5.757396221160889, "learning_rate": 1.54976404616816e-05, "loss": 1.6958, "step": 107570 }, { "epoch": 0.6761652230275176, "grad_norm": 6.409972667694092, "learning_rate": 1.549722136073695e-05, "loss": 1.6045, "step": 107580 }, { "epoch": 0.6762280753442146, "grad_norm": 5.316153526306152, "learning_rate": 1.5496802259792295e-05, "loss": 1.4484, "step": 107590 }, { "epoch": 0.6762909276609117, "grad_norm": 6.884145259857178, "learning_rate": 1.5496383158847642e-05, "loss": 1.8094, "step": 107600 }, { "epoch": 0.6763537799776088, "grad_norm": 6.612267017364502, "learning_rate": 1.549596405790299e-05, "loss": 1.6474, "step": 107610 }, { "epoch": 0.6764166322943059, "grad_norm": 6.848570823669434, "learning_rate": 1.5495544956958337e-05, "loss": 1.6618, "step": 107620 }, { "epoch": 0.676479484611003, "grad_norm": 6.698587417602539, "learning_rate": 1.549512585601368e-05, "loss": 1.681, "step": 107630 }, { "epoch": 0.6765423369277002, "grad_norm": 6.573361873626709, "learning_rate": 1.5494706755069027e-05, "loss": 1.6353, "step": 107640 }, { "epoch": 0.6766051892443973, "grad_norm": 6.124057769775391, "learning_rate": 1.5494287654124374e-05, "loss": 1.5539, "step": 107650 }, { "epoch": 0.6766680415610944, "grad_norm": 7.117899417877197, "learning_rate": 1.549386855317972e-05, "loss": 1.4745, "step": 107660 }, { "epoch": 0.6767308938777915, "grad_norm": 6.080042839050293, "learning_rate": 1.549344945223507e-05, "loss": 1.805, "step": 107670 }, { "epoch": 0.6767937461944886, "grad_norm": 7.0961174964904785, "learning_rate": 1.5493030351290412e-05, "loss": 1.6328, "step": 107680 }, { "epoch": 0.6768565985111857, "grad_norm": 7.08535623550415, "learning_rate": 1.549261125034576e-05, "loss": 1.7396, "step": 107690 }, { "epoch": 0.6769194508278829, "grad_norm": 6.2137837409973145, "learning_rate": 1.5492192149401106e-05, "loss": 1.7535, "step": 107700 }, { "epoch": 0.67698230314458, "grad_norm": 6.663417339324951, "learning_rate": 1.5491773048456453e-05, "loss": 1.8152, "step": 107710 }, { "epoch": 0.6770451554612771, "grad_norm": 7.089684963226318, "learning_rate": 1.5491353947511797e-05, "loss": 1.7627, "step": 107720 }, { "epoch": 0.6771080077779742, "grad_norm": 7.1028218269348145, "learning_rate": 1.5490934846567144e-05, "loss": 1.5991, "step": 107730 }, { "epoch": 0.6771708600946713, "grad_norm": 6.861858367919922, "learning_rate": 1.549051574562249e-05, "loss": 1.4481, "step": 107740 }, { "epoch": 0.6772337124113684, "grad_norm": 7.217982292175293, "learning_rate": 1.549009664467784e-05, "loss": 1.6189, "step": 107750 }, { "epoch": 0.6772965647280655, "grad_norm": 6.6078972816467285, "learning_rate": 1.5489677543733185e-05, "loss": 1.7096, "step": 107760 }, { "epoch": 0.6773594170447627, "grad_norm": 6.024550914764404, "learning_rate": 1.548925844278853e-05, "loss": 1.777, "step": 107770 }, { "epoch": 0.6774222693614598, "grad_norm": 5.867654323577881, "learning_rate": 1.5488839341843876e-05, "loss": 1.4508, "step": 107780 }, { "epoch": 0.6774851216781569, "grad_norm": 6.194303035736084, "learning_rate": 1.5488420240899223e-05, "loss": 1.7473, "step": 107790 }, { "epoch": 0.677547973994854, "grad_norm": 7.7232160568237305, "learning_rate": 1.548800113995457e-05, "loss": 1.4863, "step": 107800 }, { "epoch": 0.6776108263115511, "grad_norm": 6.032736778259277, "learning_rate": 1.5487582039009917e-05, "loss": 1.5798, "step": 107810 }, { "epoch": 0.6776736786282482, "grad_norm": 7.0337114334106445, "learning_rate": 1.5487162938065264e-05, "loss": 1.5471, "step": 107820 }, { "epoch": 0.6777365309449453, "grad_norm": 6.021357536315918, "learning_rate": 1.548674383712061e-05, "loss": 1.4817, "step": 107830 }, { "epoch": 0.6777993832616425, "grad_norm": 5.410083770751953, "learning_rate": 1.548632473617596e-05, "loss": 1.6508, "step": 107840 }, { "epoch": 0.6778622355783395, "grad_norm": 7.354559421539307, "learning_rate": 1.5485905635231302e-05, "loss": 1.5473, "step": 107850 }, { "epoch": 0.6779250878950366, "grad_norm": 6.202705383300781, "learning_rate": 1.548548653428665e-05, "loss": 1.609, "step": 107860 }, { "epoch": 0.6779879402117337, "grad_norm": 6.383668899536133, "learning_rate": 1.5485067433341996e-05, "loss": 1.8578, "step": 107870 }, { "epoch": 0.6780507925284308, "grad_norm": 7.749847888946533, "learning_rate": 1.5484648332397344e-05, "loss": 1.6429, "step": 107880 }, { "epoch": 0.6781136448451279, "grad_norm": 5.802626132965088, "learning_rate": 1.548422923145269e-05, "loss": 1.5985, "step": 107890 }, { "epoch": 0.678176497161825, "grad_norm": 5.312656402587891, "learning_rate": 1.5483810130508034e-05, "loss": 1.702, "step": 107900 }, { "epoch": 0.6782393494785222, "grad_norm": 6.155538082122803, "learning_rate": 1.548339102956338e-05, "loss": 1.6247, "step": 107910 }, { "epoch": 0.6783022017952193, "grad_norm": 6.4692816734313965, "learning_rate": 1.548297192861873e-05, "loss": 1.6866, "step": 107920 }, { "epoch": 0.6783650541119164, "grad_norm": 6.633361339569092, "learning_rate": 1.5482552827674075e-05, "loss": 1.8328, "step": 107930 }, { "epoch": 0.6784279064286135, "grad_norm": 6.464073181152344, "learning_rate": 1.548213372672942e-05, "loss": 1.585, "step": 107940 }, { "epoch": 0.6784907587453106, "grad_norm": 7.110156059265137, "learning_rate": 1.5481714625784766e-05, "loss": 1.6967, "step": 107950 }, { "epoch": 0.6785536110620077, "grad_norm": 6.3033318519592285, "learning_rate": 1.5481295524840113e-05, "loss": 1.5713, "step": 107960 }, { "epoch": 0.6786164633787048, "grad_norm": 6.375494480133057, "learning_rate": 1.548087642389546e-05, "loss": 1.6137, "step": 107970 }, { "epoch": 0.678679315695402, "grad_norm": 7.028976917266846, "learning_rate": 1.5480457322950807e-05, "loss": 1.5442, "step": 107980 }, { "epoch": 0.6787421680120991, "grad_norm": 6.737597465515137, "learning_rate": 1.5480038222006155e-05, "loss": 1.448, "step": 107990 }, { "epoch": 0.6788050203287962, "grad_norm": 5.59172248840332, "learning_rate": 1.54796191210615e-05, "loss": 1.545, "step": 108000 }, { "epoch": 0.6788678726454933, "grad_norm": 6.734803199768066, "learning_rate": 1.5479200020116845e-05, "loss": 1.7618, "step": 108010 }, { "epoch": 0.6789307249621904, "grad_norm": 6.651988983154297, "learning_rate": 1.5478780919172192e-05, "loss": 1.575, "step": 108020 }, { "epoch": 0.6789935772788875, "grad_norm": 7.12288236618042, "learning_rate": 1.547836181822754e-05, "loss": 1.5762, "step": 108030 }, { "epoch": 0.6790564295955847, "grad_norm": 5.929279804229736, "learning_rate": 1.5477942717282886e-05, "loss": 1.6749, "step": 108040 }, { "epoch": 0.6791192819122818, "grad_norm": 6.111502647399902, "learning_rate": 1.5477523616338234e-05, "loss": 1.5812, "step": 108050 }, { "epoch": 0.6791821342289789, "grad_norm": 5.756034851074219, "learning_rate": 1.547710451539358e-05, "loss": 1.6907, "step": 108060 }, { "epoch": 0.679244986545676, "grad_norm": 5.800210952758789, "learning_rate": 1.5476685414448928e-05, "loss": 1.7198, "step": 108070 }, { "epoch": 0.6793078388623731, "grad_norm": 6.871427536010742, "learning_rate": 1.547626631350427e-05, "loss": 1.72, "step": 108080 }, { "epoch": 0.6793706911790702, "grad_norm": 5.799198150634766, "learning_rate": 1.547584721255962e-05, "loss": 1.7499, "step": 108090 }, { "epoch": 0.6794335434957673, "grad_norm": 6.329927921295166, "learning_rate": 1.5475428111614966e-05, "loss": 1.6051, "step": 108100 }, { "epoch": 0.6794963958124643, "grad_norm": 7.542634963989258, "learning_rate": 1.5475009010670313e-05, "loss": 1.5641, "step": 108110 }, { "epoch": 0.6795592481291615, "grad_norm": 7.232003211975098, "learning_rate": 1.5474589909725656e-05, "loss": 1.6142, "step": 108120 }, { "epoch": 0.6796221004458586, "grad_norm": 5.938115119934082, "learning_rate": 1.5474170808781003e-05, "loss": 1.7776, "step": 108130 }, { "epoch": 0.6796849527625557, "grad_norm": 6.24160099029541, "learning_rate": 1.547375170783635e-05, "loss": 1.5408, "step": 108140 }, { "epoch": 0.6797478050792528, "grad_norm": 6.338478088378906, "learning_rate": 1.5473332606891697e-05, "loss": 1.5938, "step": 108150 }, { "epoch": 0.6798106573959499, "grad_norm": 6.602304935455322, "learning_rate": 1.5472913505947045e-05, "loss": 1.5456, "step": 108160 }, { "epoch": 0.679873509712647, "grad_norm": 6.956899166107178, "learning_rate": 1.5472494405002388e-05, "loss": 1.6943, "step": 108170 }, { "epoch": 0.6799363620293442, "grad_norm": 7.508603096008301, "learning_rate": 1.5472075304057735e-05, "loss": 1.5053, "step": 108180 }, { "epoch": 0.6799992143460413, "grad_norm": 7.045706272125244, "learning_rate": 1.5471656203113082e-05, "loss": 1.6037, "step": 108190 }, { "epoch": 0.6800620666627384, "grad_norm": 7.3159871101379395, "learning_rate": 1.547123710216843e-05, "loss": 1.6272, "step": 108200 }, { "epoch": 0.6801249189794355, "grad_norm": 6.823150634765625, "learning_rate": 1.5470818001223777e-05, "loss": 1.6574, "step": 108210 }, { "epoch": 0.6801877712961326, "grad_norm": 6.396905422210693, "learning_rate": 1.5470398900279124e-05, "loss": 1.7211, "step": 108220 }, { "epoch": 0.6802506236128297, "grad_norm": 5.912825107574463, "learning_rate": 1.546997979933447e-05, "loss": 1.5153, "step": 108230 }, { "epoch": 0.6803134759295268, "grad_norm": 6.832945346832275, "learning_rate": 1.5469560698389818e-05, "loss": 1.4702, "step": 108240 }, { "epoch": 0.680376328246224, "grad_norm": 6.325376987457275, "learning_rate": 1.546914159744516e-05, "loss": 1.6689, "step": 108250 }, { "epoch": 0.6804391805629211, "grad_norm": 6.241364479064941, "learning_rate": 1.546872249650051e-05, "loss": 1.6209, "step": 108260 }, { "epoch": 0.6805020328796182, "grad_norm": 6.7398905754089355, "learning_rate": 1.5468303395555856e-05, "loss": 1.6831, "step": 108270 }, { "epoch": 0.6805648851963153, "grad_norm": 6.899637699127197, "learning_rate": 1.5467884294611203e-05, "loss": 1.7488, "step": 108280 }, { "epoch": 0.6806277375130124, "grad_norm": 6.30224609375, "learning_rate": 1.546746519366655e-05, "loss": 1.6354, "step": 108290 }, { "epoch": 0.6806905898297095, "grad_norm": 6.6264495849609375, "learning_rate": 1.5467046092721893e-05, "loss": 1.7231, "step": 108300 }, { "epoch": 0.6807534421464067, "grad_norm": 6.702651500701904, "learning_rate": 1.546662699177724e-05, "loss": 1.5692, "step": 108310 }, { "epoch": 0.6808162944631038, "grad_norm": 7.259090423583984, "learning_rate": 1.5466207890832588e-05, "loss": 1.7154, "step": 108320 }, { "epoch": 0.6808791467798009, "grad_norm": 7.181676387786865, "learning_rate": 1.5465788789887935e-05, "loss": 1.7076, "step": 108330 }, { "epoch": 0.680941999096498, "grad_norm": 7.554019927978516, "learning_rate": 1.5465369688943278e-05, "loss": 1.5567, "step": 108340 }, { "epoch": 0.6810048514131951, "grad_norm": 6.8995137214660645, "learning_rate": 1.5464950587998625e-05, "loss": 1.6748, "step": 108350 }, { "epoch": 0.6810677037298921, "grad_norm": 6.453468322753906, "learning_rate": 1.5464531487053972e-05, "loss": 1.4256, "step": 108360 }, { "epoch": 0.6811305560465892, "grad_norm": 7.019639015197754, "learning_rate": 1.546411238610932e-05, "loss": 1.451, "step": 108370 }, { "epoch": 0.6811934083632863, "grad_norm": 7.011302947998047, "learning_rate": 1.5463693285164667e-05, "loss": 1.4287, "step": 108380 }, { "epoch": 0.6812562606799835, "grad_norm": 6.763635635375977, "learning_rate": 1.546327418422001e-05, "loss": 1.6974, "step": 108390 }, { "epoch": 0.6813191129966806, "grad_norm": 6.361536026000977, "learning_rate": 1.5462855083275357e-05, "loss": 1.6616, "step": 108400 }, { "epoch": 0.6813819653133777, "grad_norm": 7.214605808258057, "learning_rate": 1.5462435982330704e-05, "loss": 1.4605, "step": 108410 }, { "epoch": 0.6814448176300748, "grad_norm": 7.430278778076172, "learning_rate": 1.546201688138605e-05, "loss": 1.5119, "step": 108420 }, { "epoch": 0.6815076699467719, "grad_norm": 5.081109046936035, "learning_rate": 1.54615977804414e-05, "loss": 1.7151, "step": 108430 }, { "epoch": 0.681570522263469, "grad_norm": 6.070030212402344, "learning_rate": 1.5461178679496746e-05, "loss": 1.617, "step": 108440 }, { "epoch": 0.6816333745801662, "grad_norm": 7.476914405822754, "learning_rate": 1.5460759578552093e-05, "loss": 1.921, "step": 108450 }, { "epoch": 0.6816962268968633, "grad_norm": 5.63602352142334, "learning_rate": 1.546034047760744e-05, "loss": 1.575, "step": 108460 }, { "epoch": 0.6817590792135604, "grad_norm": 6.046362400054932, "learning_rate": 1.5459921376662787e-05, "loss": 1.6911, "step": 108470 }, { "epoch": 0.6818219315302575, "grad_norm": 7.255295276641846, "learning_rate": 1.545950227571813e-05, "loss": 1.6588, "step": 108480 }, { "epoch": 0.6818847838469546, "grad_norm": 6.208819389343262, "learning_rate": 1.5459083174773478e-05, "loss": 1.5616, "step": 108490 }, { "epoch": 0.6819476361636517, "grad_norm": 6.300037384033203, "learning_rate": 1.5458664073828825e-05, "loss": 1.5962, "step": 108500 }, { "epoch": 0.6820104884803488, "grad_norm": 6.827779769897461, "learning_rate": 1.545824497288417e-05, "loss": 1.7844, "step": 108510 }, { "epoch": 0.682073340797046, "grad_norm": 6.917608737945557, "learning_rate": 1.5457825871939515e-05, "loss": 1.5409, "step": 108520 }, { "epoch": 0.6821361931137431, "grad_norm": 6.608980655670166, "learning_rate": 1.5457406770994862e-05, "loss": 1.4922, "step": 108530 }, { "epoch": 0.6821990454304402, "grad_norm": 7.339383125305176, "learning_rate": 1.545698767005021e-05, "loss": 1.4428, "step": 108540 }, { "epoch": 0.6822618977471373, "grad_norm": 6.288963794708252, "learning_rate": 1.5456568569105557e-05, "loss": 1.4886, "step": 108550 }, { "epoch": 0.6823247500638344, "grad_norm": 7.163339138031006, "learning_rate": 1.54561494681609e-05, "loss": 1.6897, "step": 108560 }, { "epoch": 0.6823876023805315, "grad_norm": 6.777314186096191, "learning_rate": 1.5455730367216247e-05, "loss": 1.6044, "step": 108570 }, { "epoch": 0.6824504546972286, "grad_norm": 5.41243839263916, "learning_rate": 1.5455311266271594e-05, "loss": 1.8293, "step": 108580 }, { "epoch": 0.6825133070139258, "grad_norm": 6.540939807891846, "learning_rate": 1.545489216532694e-05, "loss": 1.6995, "step": 108590 }, { "epoch": 0.6825761593306229, "grad_norm": 8.392694473266602, "learning_rate": 1.545447306438229e-05, "loss": 1.6126, "step": 108600 }, { "epoch": 0.68263901164732, "grad_norm": 5.375041961669922, "learning_rate": 1.5454053963437636e-05, "loss": 1.3655, "step": 108610 }, { "epoch": 0.682701863964017, "grad_norm": 6.172969341278076, "learning_rate": 1.5453634862492983e-05, "loss": 1.7403, "step": 108620 }, { "epoch": 0.6827647162807141, "grad_norm": 6.109135627746582, "learning_rate": 1.5453215761548326e-05, "loss": 1.3881, "step": 108630 }, { "epoch": 0.6828275685974112, "grad_norm": 5.76917839050293, "learning_rate": 1.5452796660603673e-05, "loss": 1.5411, "step": 108640 }, { "epoch": 0.6828904209141083, "grad_norm": 6.600244522094727, "learning_rate": 1.545237755965902e-05, "loss": 1.8191, "step": 108650 }, { "epoch": 0.6829532732308055, "grad_norm": 6.708452224731445, "learning_rate": 1.5451958458714368e-05, "loss": 1.64, "step": 108660 }, { "epoch": 0.6830161255475026, "grad_norm": 7.054609775543213, "learning_rate": 1.5451539357769715e-05, "loss": 1.6014, "step": 108670 }, { "epoch": 0.6830789778641997, "grad_norm": 7.061050891876221, "learning_rate": 1.5451120256825062e-05, "loss": 1.504, "step": 108680 }, { "epoch": 0.6831418301808968, "grad_norm": 6.879281520843506, "learning_rate": 1.545070115588041e-05, "loss": 1.7542, "step": 108690 }, { "epoch": 0.6832046824975939, "grad_norm": 6.8982343673706055, "learning_rate": 1.5450282054935752e-05, "loss": 1.7823, "step": 108700 }, { "epoch": 0.683267534814291, "grad_norm": 5.512999057769775, "learning_rate": 1.54498629539911e-05, "loss": 1.6196, "step": 108710 }, { "epoch": 0.6833303871309881, "grad_norm": 6.384003639221191, "learning_rate": 1.5449443853046447e-05, "loss": 1.5271, "step": 108720 }, { "epoch": 0.6833932394476853, "grad_norm": 7.060078144073486, "learning_rate": 1.5449024752101794e-05, "loss": 1.4296, "step": 108730 }, { "epoch": 0.6834560917643824, "grad_norm": 6.49919319152832, "learning_rate": 1.5448605651157137e-05, "loss": 1.4782, "step": 108740 }, { "epoch": 0.6835189440810795, "grad_norm": 4.964798927307129, "learning_rate": 1.5448186550212484e-05, "loss": 1.3391, "step": 108750 }, { "epoch": 0.6835817963977766, "grad_norm": 6.3799729347229, "learning_rate": 1.544776744926783e-05, "loss": 1.3969, "step": 108760 }, { "epoch": 0.6836446487144737, "grad_norm": 6.024362564086914, "learning_rate": 1.544734834832318e-05, "loss": 1.5368, "step": 108770 }, { "epoch": 0.6837075010311708, "grad_norm": 6.578910827636719, "learning_rate": 1.5446929247378526e-05, "loss": 1.7696, "step": 108780 }, { "epoch": 0.683770353347868, "grad_norm": 6.558101177215576, "learning_rate": 1.544651014643387e-05, "loss": 1.686, "step": 108790 }, { "epoch": 0.6838332056645651, "grad_norm": 6.501033306121826, "learning_rate": 1.5446091045489216e-05, "loss": 1.6505, "step": 108800 }, { "epoch": 0.6838960579812622, "grad_norm": 6.418697834014893, "learning_rate": 1.5445671944544563e-05, "loss": 1.5026, "step": 108810 }, { "epoch": 0.6839589102979593, "grad_norm": 7.684604167938232, "learning_rate": 1.544525284359991e-05, "loss": 1.7611, "step": 108820 }, { "epoch": 0.6840217626146564, "grad_norm": 5.792769432067871, "learning_rate": 1.5444833742655258e-05, "loss": 1.7072, "step": 108830 }, { "epoch": 0.6840846149313535, "grad_norm": 6.451221942901611, "learning_rate": 1.5444414641710605e-05, "loss": 1.5386, "step": 108840 }, { "epoch": 0.6841474672480506, "grad_norm": 6.220157146453857, "learning_rate": 1.5443995540765952e-05, "loss": 1.6949, "step": 108850 }, { "epoch": 0.6842103195647478, "grad_norm": 6.748058319091797, "learning_rate": 1.54435764398213e-05, "loss": 1.5862, "step": 108860 }, { "epoch": 0.6842731718814448, "grad_norm": 6.666525363922119, "learning_rate": 1.5443157338876643e-05, "loss": 1.4896, "step": 108870 }, { "epoch": 0.6843360241981419, "grad_norm": 5.935793876647949, "learning_rate": 1.544273823793199e-05, "loss": 1.5257, "step": 108880 }, { "epoch": 0.684398876514839, "grad_norm": 5.911357879638672, "learning_rate": 1.5442319136987337e-05, "loss": 1.6697, "step": 108890 }, { "epoch": 0.6844617288315361, "grad_norm": 6.261645317077637, "learning_rate": 1.5441900036042684e-05, "loss": 1.4149, "step": 108900 }, { "epoch": 0.6845245811482332, "grad_norm": 7.229828357696533, "learning_rate": 1.544148093509803e-05, "loss": 1.5759, "step": 108910 }, { "epoch": 0.6845874334649303, "grad_norm": 6.908501625061035, "learning_rate": 1.5441061834153374e-05, "loss": 1.7641, "step": 108920 }, { "epoch": 0.6846502857816275, "grad_norm": 6.935072898864746, "learning_rate": 1.544064273320872e-05, "loss": 1.6499, "step": 108930 }, { "epoch": 0.6847131380983246, "grad_norm": 6.077419757843018, "learning_rate": 1.544022363226407e-05, "loss": 1.7182, "step": 108940 }, { "epoch": 0.6847759904150217, "grad_norm": 8.297808647155762, "learning_rate": 1.5439804531319416e-05, "loss": 1.6095, "step": 108950 }, { "epoch": 0.6848388427317188, "grad_norm": 6.749807834625244, "learning_rate": 1.543938543037476e-05, "loss": 1.7973, "step": 108960 }, { "epoch": 0.6849016950484159, "grad_norm": 6.1888837814331055, "learning_rate": 1.5438966329430106e-05, "loss": 1.5648, "step": 108970 }, { "epoch": 0.684964547365113, "grad_norm": 5.637599945068359, "learning_rate": 1.5438547228485454e-05, "loss": 1.485, "step": 108980 }, { "epoch": 0.6850273996818101, "grad_norm": 5.449834823608398, "learning_rate": 1.54381281275408e-05, "loss": 1.412, "step": 108990 }, { "epoch": 0.6850902519985073, "grad_norm": 6.458430767059326, "learning_rate": 1.5437709026596148e-05, "loss": 1.7153, "step": 109000 }, { "epoch": 0.6851531043152044, "grad_norm": 7.78114652633667, "learning_rate": 1.543728992565149e-05, "loss": 1.4838, "step": 109010 }, { "epoch": 0.6852159566319015, "grad_norm": 6.260158538818359, "learning_rate": 1.543687082470684e-05, "loss": 1.5003, "step": 109020 }, { "epoch": 0.6852788089485986, "grad_norm": 6.7112507820129395, "learning_rate": 1.5436451723762185e-05, "loss": 1.4536, "step": 109030 }, { "epoch": 0.6853416612652957, "grad_norm": 7.102040767669678, "learning_rate": 1.5436032622817533e-05, "loss": 1.5841, "step": 109040 }, { "epoch": 0.6854045135819928, "grad_norm": 7.388706684112549, "learning_rate": 1.543561352187288e-05, "loss": 1.7225, "step": 109050 }, { "epoch": 0.68546736589869, "grad_norm": 6.066342830657959, "learning_rate": 1.5435194420928227e-05, "loss": 1.434, "step": 109060 }, { "epoch": 0.6855302182153871, "grad_norm": 6.081060409545898, "learning_rate": 1.5434775319983574e-05, "loss": 1.6384, "step": 109070 }, { "epoch": 0.6855930705320842, "grad_norm": 7.088042259216309, "learning_rate": 1.543435621903892e-05, "loss": 1.7854, "step": 109080 }, { "epoch": 0.6856559228487813, "grad_norm": 6.102912425994873, "learning_rate": 1.5433937118094268e-05, "loss": 1.8214, "step": 109090 }, { "epoch": 0.6857187751654784, "grad_norm": 6.492491722106934, "learning_rate": 1.543351801714961e-05, "loss": 1.56, "step": 109100 }, { "epoch": 0.6857816274821755, "grad_norm": 7.235417366027832, "learning_rate": 1.543309891620496e-05, "loss": 1.7355, "step": 109110 }, { "epoch": 0.6858444797988726, "grad_norm": 6.384990692138672, "learning_rate": 1.5432679815260306e-05, "loss": 1.2698, "step": 109120 }, { "epoch": 0.6859073321155696, "grad_norm": 6.30357027053833, "learning_rate": 1.5432260714315653e-05, "loss": 1.4585, "step": 109130 }, { "epoch": 0.6859701844322668, "grad_norm": 7.86502742767334, "learning_rate": 1.5431841613370996e-05, "loss": 1.6503, "step": 109140 }, { "epoch": 0.6860330367489639, "grad_norm": 7.20648193359375, "learning_rate": 1.5431422512426344e-05, "loss": 1.5368, "step": 109150 }, { "epoch": 0.686095889065661, "grad_norm": 6.791904449462891, "learning_rate": 1.543100341148169e-05, "loss": 1.7353, "step": 109160 }, { "epoch": 0.6861587413823581, "grad_norm": 6.168340682983398, "learning_rate": 1.5430584310537038e-05, "loss": 1.6341, "step": 109170 }, { "epoch": 0.6862215936990552, "grad_norm": 6.9182000160217285, "learning_rate": 1.543016520959238e-05, "loss": 1.6674, "step": 109180 }, { "epoch": 0.6862844460157523, "grad_norm": 6.65117883682251, "learning_rate": 1.542974610864773e-05, "loss": 1.8268, "step": 109190 }, { "epoch": 0.6863472983324495, "grad_norm": 4.9110236167907715, "learning_rate": 1.5429327007703076e-05, "loss": 1.5559, "step": 109200 }, { "epoch": 0.6864101506491466, "grad_norm": 6.228017807006836, "learning_rate": 1.5428907906758423e-05, "loss": 1.4989, "step": 109210 }, { "epoch": 0.6864730029658437, "grad_norm": 7.477842807769775, "learning_rate": 1.542848880581377e-05, "loss": 1.5137, "step": 109220 }, { "epoch": 0.6865358552825408, "grad_norm": 6.381414413452148, "learning_rate": 1.5428069704869117e-05, "loss": 1.4476, "step": 109230 }, { "epoch": 0.6865987075992379, "grad_norm": 7.051868438720703, "learning_rate": 1.5427650603924464e-05, "loss": 1.6207, "step": 109240 }, { "epoch": 0.686661559915935, "grad_norm": 6.752513408660889, "learning_rate": 1.542723150297981e-05, "loss": 1.6355, "step": 109250 }, { "epoch": 0.6867244122326321, "grad_norm": 6.904550075531006, "learning_rate": 1.5426812402035155e-05, "loss": 1.8425, "step": 109260 }, { "epoch": 0.6867872645493293, "grad_norm": 7.052554607391357, "learning_rate": 1.54263933010905e-05, "loss": 1.7125, "step": 109270 }, { "epoch": 0.6868501168660264, "grad_norm": 7.331358909606934, "learning_rate": 1.542597420014585e-05, "loss": 1.6841, "step": 109280 }, { "epoch": 0.6869129691827235, "grad_norm": 7.771301746368408, "learning_rate": 1.5425555099201196e-05, "loss": 1.7112, "step": 109290 }, { "epoch": 0.6869758214994206, "grad_norm": 5.575971603393555, "learning_rate": 1.5425135998256543e-05, "loss": 1.4082, "step": 109300 }, { "epoch": 0.6870386738161177, "grad_norm": 6.857352256774902, "learning_rate": 1.542471689731189e-05, "loss": 1.6333, "step": 109310 }, { "epoch": 0.6871015261328148, "grad_norm": 6.6807732582092285, "learning_rate": 1.5424297796367234e-05, "loss": 1.6542, "step": 109320 }, { "epoch": 0.687164378449512, "grad_norm": 7.070662498474121, "learning_rate": 1.542387869542258e-05, "loss": 1.7512, "step": 109330 }, { "epoch": 0.6872272307662091, "grad_norm": 5.746772289276123, "learning_rate": 1.5423459594477928e-05, "loss": 1.7782, "step": 109340 }, { "epoch": 0.6872900830829062, "grad_norm": 6.7605390548706055, "learning_rate": 1.5423040493533275e-05, "loss": 1.6884, "step": 109350 }, { "epoch": 0.6873529353996033, "grad_norm": 7.406123161315918, "learning_rate": 1.542262139258862e-05, "loss": 1.8441, "step": 109360 }, { "epoch": 0.6874157877163004, "grad_norm": 5.762146472930908, "learning_rate": 1.5422202291643966e-05, "loss": 1.4661, "step": 109370 }, { "epoch": 0.6874786400329974, "grad_norm": 6.332161903381348, "learning_rate": 1.5421783190699313e-05, "loss": 1.3427, "step": 109380 }, { "epoch": 0.6875414923496945, "grad_norm": 7.25848388671875, "learning_rate": 1.542136408975466e-05, "loss": 1.7529, "step": 109390 }, { "epoch": 0.6876043446663916, "grad_norm": 7.368528842926025, "learning_rate": 1.5420944988810007e-05, "loss": 2.0065, "step": 109400 }, { "epoch": 0.6876671969830888, "grad_norm": 7.377408027648926, "learning_rate": 1.542052588786535e-05, "loss": 1.7156, "step": 109410 }, { "epoch": 0.6877300492997859, "grad_norm": 6.124571323394775, "learning_rate": 1.5420106786920698e-05, "loss": 1.505, "step": 109420 }, { "epoch": 0.687792901616483, "grad_norm": 6.970246315002441, "learning_rate": 1.5419687685976045e-05, "loss": 1.5657, "step": 109430 }, { "epoch": 0.6878557539331801, "grad_norm": 7.313261032104492, "learning_rate": 1.541926858503139e-05, "loss": 1.6251, "step": 109440 }, { "epoch": 0.6879186062498772, "grad_norm": 7.775289058685303, "learning_rate": 1.541884948408674e-05, "loss": 1.6768, "step": 109450 }, { "epoch": 0.6879814585665743, "grad_norm": 8.135762214660645, "learning_rate": 1.5418430383142086e-05, "loss": 1.8218, "step": 109460 }, { "epoch": 0.6880443108832714, "grad_norm": 6.805774688720703, "learning_rate": 1.5418011282197433e-05, "loss": 1.3224, "step": 109470 }, { "epoch": 0.6881071631999686, "grad_norm": 6.121889591217041, "learning_rate": 1.541759218125278e-05, "loss": 1.63, "step": 109480 }, { "epoch": 0.6881700155166657, "grad_norm": 7.373378753662109, "learning_rate": 1.5417173080308124e-05, "loss": 1.6781, "step": 109490 }, { "epoch": 0.6882328678333628, "grad_norm": 6.9453864097595215, "learning_rate": 1.541675397936347e-05, "loss": 1.6407, "step": 109500 }, { "epoch": 0.6882957201500599, "grad_norm": 7.068073272705078, "learning_rate": 1.5416334878418818e-05, "loss": 1.6273, "step": 109510 }, { "epoch": 0.688358572466757, "grad_norm": 8.614026069641113, "learning_rate": 1.5415915777474165e-05, "loss": 1.7811, "step": 109520 }, { "epoch": 0.6884214247834541, "grad_norm": 7.80880069732666, "learning_rate": 1.5415496676529512e-05, "loss": 1.5961, "step": 109530 }, { "epoch": 0.6884842771001513, "grad_norm": 6.941601753234863, "learning_rate": 1.5415077575584856e-05, "loss": 1.7483, "step": 109540 }, { "epoch": 0.6885471294168484, "grad_norm": 6.181019306182861, "learning_rate": 1.5414658474640203e-05, "loss": 1.601, "step": 109550 }, { "epoch": 0.6886099817335455, "grad_norm": 6.1155219078063965, "learning_rate": 1.541423937369555e-05, "loss": 1.7136, "step": 109560 }, { "epoch": 0.6886728340502426, "grad_norm": 7.124586582183838, "learning_rate": 1.5413820272750897e-05, "loss": 1.5453, "step": 109570 }, { "epoch": 0.6887356863669397, "grad_norm": 6.8544511795043945, "learning_rate": 1.541340117180624e-05, "loss": 1.5531, "step": 109580 }, { "epoch": 0.6887985386836368, "grad_norm": 7.167695999145508, "learning_rate": 1.5412982070861588e-05, "loss": 1.6281, "step": 109590 }, { "epoch": 0.688861391000334, "grad_norm": 6.573579788208008, "learning_rate": 1.5412562969916935e-05, "loss": 1.6064, "step": 109600 }, { "epoch": 0.6889242433170311, "grad_norm": 6.685288429260254, "learning_rate": 1.541214386897228e-05, "loss": 1.5467, "step": 109610 }, { "epoch": 0.6889870956337282, "grad_norm": 6.076452255249023, "learning_rate": 1.541172476802763e-05, "loss": 1.554, "step": 109620 }, { "epoch": 0.6890499479504253, "grad_norm": 5.2713623046875, "learning_rate": 1.5411305667082976e-05, "loss": 1.489, "step": 109630 }, { "epoch": 0.6891128002671223, "grad_norm": 5.7808966636657715, "learning_rate": 1.541088656613832e-05, "loss": 1.5726, "step": 109640 }, { "epoch": 0.6891756525838194, "grad_norm": 7.051931858062744, "learning_rate": 1.5410467465193667e-05, "loss": 1.7635, "step": 109650 }, { "epoch": 0.6892385049005165, "grad_norm": 7.634355545043945, "learning_rate": 1.5410048364249014e-05, "loss": 1.663, "step": 109660 }, { "epoch": 0.6893013572172136, "grad_norm": 7.906497955322266, "learning_rate": 1.540962926330436e-05, "loss": 1.6142, "step": 109670 }, { "epoch": 0.6893642095339108, "grad_norm": 7.191876411437988, "learning_rate": 1.5409210162359708e-05, "loss": 1.55, "step": 109680 }, { "epoch": 0.6894270618506079, "grad_norm": 7.026230812072754, "learning_rate": 1.5408791061415055e-05, "loss": 1.6485, "step": 109690 }, { "epoch": 0.689489914167305, "grad_norm": 6.550210475921631, "learning_rate": 1.5408371960470402e-05, "loss": 1.5969, "step": 109700 }, { "epoch": 0.6895527664840021, "grad_norm": 6.096058368682861, "learning_rate": 1.540795285952575e-05, "loss": 1.6915, "step": 109710 }, { "epoch": 0.6896156188006992, "grad_norm": 7.495835304260254, "learning_rate": 1.5407533758581093e-05, "loss": 1.7327, "step": 109720 }, { "epoch": 0.6896784711173963, "grad_norm": 6.865203380584717, "learning_rate": 1.540711465763644e-05, "loss": 1.6517, "step": 109730 }, { "epoch": 0.6897413234340934, "grad_norm": 5.886893272399902, "learning_rate": 1.5406695556691787e-05, "loss": 1.6512, "step": 109740 }, { "epoch": 0.6898041757507906, "grad_norm": 6.635234832763672, "learning_rate": 1.5406276455747134e-05, "loss": 1.6374, "step": 109750 }, { "epoch": 0.6898670280674877, "grad_norm": 7.9032816886901855, "learning_rate": 1.5405857354802478e-05, "loss": 1.5856, "step": 109760 }, { "epoch": 0.6899298803841848, "grad_norm": 5.6491379737854, "learning_rate": 1.5405438253857825e-05, "loss": 1.5709, "step": 109770 }, { "epoch": 0.6899927327008819, "grad_norm": 5.710926532745361, "learning_rate": 1.5405019152913172e-05, "loss": 1.4562, "step": 109780 }, { "epoch": 0.690055585017579, "grad_norm": 6.611181259155273, "learning_rate": 1.540460005196852e-05, "loss": 1.7436, "step": 109790 }, { "epoch": 0.6901184373342761, "grad_norm": 6.378403663635254, "learning_rate": 1.5404180951023862e-05, "loss": 1.618, "step": 109800 }, { "epoch": 0.6901812896509733, "grad_norm": 7.318225860595703, "learning_rate": 1.540376185007921e-05, "loss": 1.4601, "step": 109810 }, { "epoch": 0.6902441419676704, "grad_norm": 6.113973140716553, "learning_rate": 1.5403342749134557e-05, "loss": 1.4868, "step": 109820 }, { "epoch": 0.6903069942843675, "grad_norm": 6.777023792266846, "learning_rate": 1.5402923648189904e-05, "loss": 1.5545, "step": 109830 }, { "epoch": 0.6903698466010646, "grad_norm": 7.185040473937988, "learning_rate": 1.540250454724525e-05, "loss": 1.7578, "step": 109840 }, { "epoch": 0.6904326989177617, "grad_norm": 6.249362468719482, "learning_rate": 1.5402085446300598e-05, "loss": 1.7373, "step": 109850 }, { "epoch": 0.6904955512344588, "grad_norm": 6.4665608406066895, "learning_rate": 1.5401666345355945e-05, "loss": 1.7623, "step": 109860 }, { "epoch": 0.6905584035511559, "grad_norm": 7.5931267738342285, "learning_rate": 1.5401247244411292e-05, "loss": 1.5157, "step": 109870 }, { "epoch": 0.6906212558678531, "grad_norm": 6.92786169052124, "learning_rate": 1.5400828143466636e-05, "loss": 1.569, "step": 109880 }, { "epoch": 0.6906841081845501, "grad_norm": 6.552675724029541, "learning_rate": 1.5400409042521983e-05, "loss": 1.5763, "step": 109890 }, { "epoch": 0.6907469605012472, "grad_norm": 8.306509017944336, "learning_rate": 1.539998994157733e-05, "loss": 1.7155, "step": 109900 }, { "epoch": 0.6908098128179443, "grad_norm": 6.897550106048584, "learning_rate": 1.5399570840632677e-05, "loss": 1.684, "step": 109910 }, { "epoch": 0.6908726651346414, "grad_norm": 6.713376522064209, "learning_rate": 1.5399151739688024e-05, "loss": 1.6465, "step": 109920 }, { "epoch": 0.6909355174513385, "grad_norm": 6.562190532684326, "learning_rate": 1.539873263874337e-05, "loss": 1.5514, "step": 109930 }, { "epoch": 0.6909983697680356, "grad_norm": 5.659005165100098, "learning_rate": 1.5398313537798715e-05, "loss": 1.5351, "step": 109940 }, { "epoch": 0.6910612220847328, "grad_norm": 6.241729259490967, "learning_rate": 1.5397894436854062e-05, "loss": 1.7717, "step": 109950 }, { "epoch": 0.6911240744014299, "grad_norm": 6.490688800811768, "learning_rate": 1.539747533590941e-05, "loss": 1.6363, "step": 109960 }, { "epoch": 0.691186926718127, "grad_norm": 6.120362281799316, "learning_rate": 1.5397056234964756e-05, "loss": 1.4055, "step": 109970 }, { "epoch": 0.6912497790348241, "grad_norm": 6.8912034034729, "learning_rate": 1.53966371340201e-05, "loss": 1.4607, "step": 109980 }, { "epoch": 0.6913126313515212, "grad_norm": 6.576463222503662, "learning_rate": 1.5396218033075447e-05, "loss": 1.8473, "step": 109990 }, { "epoch": 0.6913754836682183, "grad_norm": 6.136288642883301, "learning_rate": 1.5395798932130794e-05, "loss": 1.7056, "step": 110000 }, { "epoch": 0.6914383359849154, "grad_norm": 7.206757068634033, "learning_rate": 1.539537983118614e-05, "loss": 1.5955, "step": 110010 }, { "epoch": 0.6915011883016126, "grad_norm": 6.368089199066162, "learning_rate": 1.5394960730241488e-05, "loss": 1.6546, "step": 110020 }, { "epoch": 0.6915640406183097, "grad_norm": 6.267004013061523, "learning_rate": 1.539454162929683e-05, "loss": 1.6784, "step": 110030 }, { "epoch": 0.6916268929350068, "grad_norm": 6.834345817565918, "learning_rate": 1.539412252835218e-05, "loss": 1.775, "step": 110040 }, { "epoch": 0.6916897452517039, "grad_norm": 5.859621524810791, "learning_rate": 1.5393703427407526e-05, "loss": 1.4838, "step": 110050 }, { "epoch": 0.691752597568401, "grad_norm": 5.676794052124023, "learning_rate": 1.5393284326462873e-05, "loss": 1.5864, "step": 110060 }, { "epoch": 0.6918154498850981, "grad_norm": 6.597995758056641, "learning_rate": 1.539286522551822e-05, "loss": 1.5039, "step": 110070 }, { "epoch": 0.6918783022017952, "grad_norm": 5.67252779006958, "learning_rate": 1.5392446124573567e-05, "loss": 1.6945, "step": 110080 }, { "epoch": 0.6919411545184924, "grad_norm": 6.437417030334473, "learning_rate": 1.5392027023628914e-05, "loss": 1.6711, "step": 110090 }, { "epoch": 0.6920040068351895, "grad_norm": 5.708282947540283, "learning_rate": 1.539160792268426e-05, "loss": 1.5275, "step": 110100 }, { "epoch": 0.6920668591518866, "grad_norm": 6.925989151000977, "learning_rate": 1.5391188821739605e-05, "loss": 1.6202, "step": 110110 }, { "epoch": 0.6921297114685837, "grad_norm": 5.939941883087158, "learning_rate": 1.5390769720794952e-05, "loss": 1.7532, "step": 110120 }, { "epoch": 0.6921925637852808, "grad_norm": 6.466796875, "learning_rate": 1.5390392529944763e-05, "loss": 1.6942, "step": 110130 }, { "epoch": 0.6922554161019779, "grad_norm": 6.570621490478516, "learning_rate": 1.538997342900011e-05, "loss": 1.4633, "step": 110140 }, { "epoch": 0.6923182684186749, "grad_norm": 7.082062721252441, "learning_rate": 1.5389554328055457e-05, "loss": 1.6256, "step": 110150 }, { "epoch": 0.6923811207353721, "grad_norm": 6.98456335067749, "learning_rate": 1.5389135227110804e-05, "loss": 1.7756, "step": 110160 }, { "epoch": 0.6924439730520692, "grad_norm": 6.79068660736084, "learning_rate": 1.538871612616615e-05, "loss": 1.5717, "step": 110170 }, { "epoch": 0.6925068253687663, "grad_norm": 7.054384231567383, "learning_rate": 1.53882970252215e-05, "loss": 1.5873, "step": 110180 }, { "epoch": 0.6925696776854634, "grad_norm": 5.819924354553223, "learning_rate": 1.5387877924276842e-05, "loss": 1.6075, "step": 110190 }, { "epoch": 0.6926325300021605, "grad_norm": 6.592485427856445, "learning_rate": 1.538745882333219e-05, "loss": 1.8599, "step": 110200 }, { "epoch": 0.6926953823188576, "grad_norm": 7.693783283233643, "learning_rate": 1.5387039722387536e-05, "loss": 1.4913, "step": 110210 }, { "epoch": 0.6927582346355547, "grad_norm": 7.944255352020264, "learning_rate": 1.5386620621442883e-05, "loss": 1.4727, "step": 110220 }, { "epoch": 0.6928210869522519, "grad_norm": 6.045512676239014, "learning_rate": 1.538620152049823e-05, "loss": 1.7314, "step": 110230 }, { "epoch": 0.692883939268949, "grad_norm": 7.270375728607178, "learning_rate": 1.5385782419553574e-05, "loss": 1.5352, "step": 110240 }, { "epoch": 0.6929467915856461, "grad_norm": 7.211243152618408, "learning_rate": 1.538536331860892e-05, "loss": 1.7192, "step": 110250 }, { "epoch": 0.6930096439023432, "grad_norm": 6.737878322601318, "learning_rate": 1.5384944217664268e-05, "loss": 1.5612, "step": 110260 }, { "epoch": 0.6930724962190403, "grad_norm": 6.604902267456055, "learning_rate": 1.5384525116719615e-05, "loss": 1.7959, "step": 110270 }, { "epoch": 0.6931353485357374, "grad_norm": 6.718039035797119, "learning_rate": 1.538410601577496e-05, "loss": 1.6063, "step": 110280 }, { "epoch": 0.6931982008524346, "grad_norm": 6.575676918029785, "learning_rate": 1.5383686914830306e-05, "loss": 1.4797, "step": 110290 }, { "epoch": 0.6932610531691317, "grad_norm": 6.538565158843994, "learning_rate": 1.5383267813885653e-05, "loss": 1.5373, "step": 110300 }, { "epoch": 0.6933239054858288, "grad_norm": 6.900051116943359, "learning_rate": 1.5382848712941e-05, "loss": 1.5909, "step": 110310 }, { "epoch": 0.6933867578025259, "grad_norm": 6.351840496063232, "learning_rate": 1.5382429611996347e-05, "loss": 1.4559, "step": 110320 }, { "epoch": 0.693449610119223, "grad_norm": 6.0556817054748535, "learning_rate": 1.538201051105169e-05, "loss": 1.7691, "step": 110330 }, { "epoch": 0.6935124624359201, "grad_norm": 6.344932556152344, "learning_rate": 1.5381591410107038e-05, "loss": 1.4947, "step": 110340 }, { "epoch": 0.6935753147526172, "grad_norm": 7.3224358558654785, "learning_rate": 1.5381172309162385e-05, "loss": 1.7113, "step": 110350 }, { "epoch": 0.6936381670693144, "grad_norm": 7.070436954498291, "learning_rate": 1.5380753208217732e-05, "loss": 1.6708, "step": 110360 }, { "epoch": 0.6937010193860115, "grad_norm": 5.970061779022217, "learning_rate": 1.538033410727308e-05, "loss": 1.3929, "step": 110370 }, { "epoch": 0.6937638717027086, "grad_norm": 7.096200466156006, "learning_rate": 1.5379915006328426e-05, "loss": 1.5439, "step": 110380 }, { "epoch": 0.6938267240194057, "grad_norm": 6.618981838226318, "learning_rate": 1.5379495905383773e-05, "loss": 1.7176, "step": 110390 }, { "epoch": 0.6938895763361027, "grad_norm": 6.2274909019470215, "learning_rate": 1.537907680443912e-05, "loss": 1.6271, "step": 110400 }, { "epoch": 0.6939524286527998, "grad_norm": 6.041452407836914, "learning_rate": 1.5378657703494464e-05, "loss": 1.655, "step": 110410 }, { "epoch": 0.6940152809694969, "grad_norm": 6.7422261238098145, "learning_rate": 1.537823860254981e-05, "loss": 1.6391, "step": 110420 }, { "epoch": 0.694078133286194, "grad_norm": 6.288084506988525, "learning_rate": 1.537781950160516e-05, "loss": 1.8139, "step": 110430 }, { "epoch": 0.6941409856028912, "grad_norm": 6.951841831207275, "learning_rate": 1.5377400400660505e-05, "loss": 1.5908, "step": 110440 }, { "epoch": 0.6942038379195883, "grad_norm": 5.775060176849365, "learning_rate": 1.5376981299715852e-05, "loss": 1.6025, "step": 110450 }, { "epoch": 0.6942666902362854, "grad_norm": 8.272947311401367, "learning_rate": 1.5376562198771196e-05, "loss": 1.3625, "step": 110460 }, { "epoch": 0.6943295425529825, "grad_norm": 7.3116230964660645, "learning_rate": 1.5376143097826543e-05, "loss": 1.6746, "step": 110470 }, { "epoch": 0.6943923948696796, "grad_norm": 7.830728054046631, "learning_rate": 1.537572399688189e-05, "loss": 1.6529, "step": 110480 }, { "epoch": 0.6944552471863767, "grad_norm": 6.397856712341309, "learning_rate": 1.5375304895937237e-05, "loss": 1.5174, "step": 110490 }, { "epoch": 0.6945180995030739, "grad_norm": 7.6474480628967285, "learning_rate": 1.537488579499258e-05, "loss": 1.6356, "step": 110500 }, { "epoch": 0.694580951819771, "grad_norm": 7.2004218101501465, "learning_rate": 1.5374466694047928e-05, "loss": 1.8332, "step": 110510 }, { "epoch": 0.6946438041364681, "grad_norm": 6.119325637817383, "learning_rate": 1.5374047593103275e-05, "loss": 1.6274, "step": 110520 }, { "epoch": 0.6947066564531652, "grad_norm": 6.005640029907227, "learning_rate": 1.5373628492158622e-05, "loss": 1.3396, "step": 110530 }, { "epoch": 0.6947695087698623, "grad_norm": 6.580428123474121, "learning_rate": 1.537320939121397e-05, "loss": 1.5653, "step": 110540 }, { "epoch": 0.6948323610865594, "grad_norm": 6.162042140960693, "learning_rate": 1.5372790290269316e-05, "loss": 1.4884, "step": 110550 }, { "epoch": 0.6948952134032566, "grad_norm": 5.7658371925354, "learning_rate": 1.5372371189324663e-05, "loss": 1.4138, "step": 110560 }, { "epoch": 0.6949580657199537, "grad_norm": 6.4251861572265625, "learning_rate": 1.537195208838001e-05, "loss": 1.7541, "step": 110570 }, { "epoch": 0.6950209180366508, "grad_norm": 5.379714488983154, "learning_rate": 1.5371532987435354e-05, "loss": 1.743, "step": 110580 }, { "epoch": 0.6950837703533479, "grad_norm": 7.091882228851318, "learning_rate": 1.53711138864907e-05, "loss": 1.9623, "step": 110590 }, { "epoch": 0.695146622670045, "grad_norm": 7.727228164672852, "learning_rate": 1.537069478554605e-05, "loss": 1.8519, "step": 110600 }, { "epoch": 0.6952094749867421, "grad_norm": 7.1329450607299805, "learning_rate": 1.5370275684601395e-05, "loss": 1.6731, "step": 110610 }, { "epoch": 0.6952723273034392, "grad_norm": 5.80506706237793, "learning_rate": 1.5369856583656743e-05, "loss": 1.6767, "step": 110620 }, { "epoch": 0.6953351796201364, "grad_norm": 5.10790491104126, "learning_rate": 1.5369437482712086e-05, "loss": 1.6245, "step": 110630 }, { "epoch": 0.6953980319368335, "grad_norm": 5.793562889099121, "learning_rate": 1.5369018381767433e-05, "loss": 1.7881, "step": 110640 }, { "epoch": 0.6954608842535306, "grad_norm": 6.144549369812012, "learning_rate": 1.536859928082278e-05, "loss": 1.7062, "step": 110650 }, { "epoch": 0.6955237365702276, "grad_norm": 5.768095970153809, "learning_rate": 1.5368180179878127e-05, "loss": 1.5482, "step": 110660 }, { "epoch": 0.6955865888869247, "grad_norm": 6.386692047119141, "learning_rate": 1.5367761078933474e-05, "loss": 1.3336, "step": 110670 }, { "epoch": 0.6956494412036218, "grad_norm": 6.446012020111084, "learning_rate": 1.5367341977988818e-05, "loss": 1.6281, "step": 110680 }, { "epoch": 0.6957122935203189, "grad_norm": 6.755799293518066, "learning_rate": 1.5366922877044165e-05, "loss": 1.4132, "step": 110690 }, { "epoch": 0.695775145837016, "grad_norm": 6.42385721206665, "learning_rate": 1.5366503776099512e-05, "loss": 1.6404, "step": 110700 }, { "epoch": 0.6958379981537132, "grad_norm": 7.034458160400391, "learning_rate": 1.536608467515486e-05, "loss": 1.8111, "step": 110710 }, { "epoch": 0.6959008504704103, "grad_norm": 6.789862632751465, "learning_rate": 1.5365665574210203e-05, "loss": 1.5472, "step": 110720 }, { "epoch": 0.6959637027871074, "grad_norm": 7.90257453918457, "learning_rate": 1.536524647326555e-05, "loss": 1.4989, "step": 110730 }, { "epoch": 0.6960265551038045, "grad_norm": 5.922754764556885, "learning_rate": 1.5364827372320897e-05, "loss": 1.5503, "step": 110740 }, { "epoch": 0.6960894074205016, "grad_norm": 6.36511754989624, "learning_rate": 1.5364408271376244e-05, "loss": 1.4924, "step": 110750 }, { "epoch": 0.6961522597371987, "grad_norm": 7.440944194793701, "learning_rate": 1.536398917043159e-05, "loss": 1.6945, "step": 110760 }, { "epoch": 0.6962151120538959, "grad_norm": 6.556360721588135, "learning_rate": 1.536357006948694e-05, "loss": 1.7309, "step": 110770 }, { "epoch": 0.696277964370593, "grad_norm": 6.454293727874756, "learning_rate": 1.5363150968542285e-05, "loss": 1.4156, "step": 110780 }, { "epoch": 0.6963408166872901, "grad_norm": 7.992779731750488, "learning_rate": 1.5362731867597633e-05, "loss": 1.5207, "step": 110790 }, { "epoch": 0.6964036690039872, "grad_norm": 5.340725898742676, "learning_rate": 1.536231276665298e-05, "loss": 1.7143, "step": 110800 }, { "epoch": 0.6964665213206843, "grad_norm": 8.281492233276367, "learning_rate": 1.5361893665708323e-05, "loss": 1.8827, "step": 110810 }, { "epoch": 0.6965293736373814, "grad_norm": 5.960738658905029, "learning_rate": 1.536147456476367e-05, "loss": 1.4878, "step": 110820 }, { "epoch": 0.6965922259540785, "grad_norm": 7.015256404876709, "learning_rate": 1.5361055463819017e-05, "loss": 1.4828, "step": 110830 }, { "epoch": 0.6966550782707757, "grad_norm": 5.641343116760254, "learning_rate": 1.5360636362874365e-05, "loss": 1.4729, "step": 110840 }, { "epoch": 0.6967179305874728, "grad_norm": 7.063246250152588, "learning_rate": 1.536021726192971e-05, "loss": 1.635, "step": 110850 }, { "epoch": 0.6967807829041699, "grad_norm": 6.208061695098877, "learning_rate": 1.5359798160985055e-05, "loss": 1.6084, "step": 110860 }, { "epoch": 0.696843635220867, "grad_norm": 6.916558265686035, "learning_rate": 1.5359379060040402e-05, "loss": 1.7269, "step": 110870 }, { "epoch": 0.6969064875375641, "grad_norm": 6.103330135345459, "learning_rate": 1.535895995909575e-05, "loss": 1.5001, "step": 110880 }, { "epoch": 0.6969693398542612, "grad_norm": 6.57139253616333, "learning_rate": 1.5358540858151096e-05, "loss": 1.9416, "step": 110890 }, { "epoch": 0.6970321921709584, "grad_norm": 6.957958221435547, "learning_rate": 1.535812175720644e-05, "loss": 1.822, "step": 110900 }, { "epoch": 0.6970950444876554, "grad_norm": 5.670475006103516, "learning_rate": 1.5357702656261787e-05, "loss": 1.4883, "step": 110910 }, { "epoch": 0.6971578968043525, "grad_norm": 6.034177780151367, "learning_rate": 1.5357283555317134e-05, "loss": 1.7143, "step": 110920 }, { "epoch": 0.6972207491210496, "grad_norm": 6.624166011810303, "learning_rate": 1.535686445437248e-05, "loss": 1.5443, "step": 110930 }, { "epoch": 0.6972836014377467, "grad_norm": 6.640042781829834, "learning_rate": 1.535644535342783e-05, "loss": 1.6603, "step": 110940 }, { "epoch": 0.6973464537544438, "grad_norm": 5.989957809448242, "learning_rate": 1.5356026252483172e-05, "loss": 1.4335, "step": 110950 }, { "epoch": 0.6974093060711409, "grad_norm": 5.535582542419434, "learning_rate": 1.535560715153852e-05, "loss": 1.554, "step": 110960 }, { "epoch": 0.697472158387838, "grad_norm": 7.012519359588623, "learning_rate": 1.5355188050593866e-05, "loss": 1.7497, "step": 110970 }, { "epoch": 0.6975350107045352, "grad_norm": 6.441524028778076, "learning_rate": 1.5354768949649213e-05, "loss": 1.4893, "step": 110980 }, { "epoch": 0.6975978630212323, "grad_norm": 6.06854248046875, "learning_rate": 1.535434984870456e-05, "loss": 1.5386, "step": 110990 }, { "epoch": 0.6976607153379294, "grad_norm": 6.238610744476318, "learning_rate": 1.5353930747759907e-05, "loss": 1.8803, "step": 111000 }, { "epoch": 0.6977235676546265, "grad_norm": 5.91594934463501, "learning_rate": 1.5353511646815255e-05, "loss": 1.7155, "step": 111010 }, { "epoch": 0.6977864199713236, "grad_norm": 6.111907482147217, "learning_rate": 1.53530925458706e-05, "loss": 1.6746, "step": 111020 }, { "epoch": 0.6978492722880207, "grad_norm": 5.388387680053711, "learning_rate": 1.5352673444925945e-05, "loss": 1.657, "step": 111030 }, { "epoch": 0.6979121246047179, "grad_norm": 5.682615280151367, "learning_rate": 1.5352254343981292e-05, "loss": 1.6807, "step": 111040 }, { "epoch": 0.697974976921415, "grad_norm": 6.166755676269531, "learning_rate": 1.535183524303664e-05, "loss": 1.6375, "step": 111050 }, { "epoch": 0.6980378292381121, "grad_norm": 5.698094367980957, "learning_rate": 1.5351416142091987e-05, "loss": 1.5027, "step": 111060 }, { "epoch": 0.6981006815548092, "grad_norm": 6.914948463439941, "learning_rate": 1.5350997041147334e-05, "loss": 1.4712, "step": 111070 }, { "epoch": 0.6981635338715063, "grad_norm": 7.217332363128662, "learning_rate": 1.5350577940202677e-05, "loss": 1.65, "step": 111080 }, { "epoch": 0.6982263861882034, "grad_norm": 7.417016983032227, "learning_rate": 1.5350158839258024e-05, "loss": 1.7447, "step": 111090 }, { "epoch": 0.6982892385049005, "grad_norm": 6.69106388092041, "learning_rate": 1.534973973831337e-05, "loss": 1.5877, "step": 111100 }, { "epoch": 0.6983520908215977, "grad_norm": 7.806684494018555, "learning_rate": 1.534932063736872e-05, "loss": 1.6986, "step": 111110 }, { "epoch": 0.6984149431382948, "grad_norm": 5.269705772399902, "learning_rate": 1.5348901536424062e-05, "loss": 1.4404, "step": 111120 }, { "epoch": 0.6984777954549919, "grad_norm": 7.750637531280518, "learning_rate": 1.534848243547941e-05, "loss": 1.4203, "step": 111130 }, { "epoch": 0.698540647771689, "grad_norm": 6.469650745391846, "learning_rate": 1.5348063334534756e-05, "loss": 1.9374, "step": 111140 }, { "epoch": 0.6986035000883861, "grad_norm": 6.945512294769287, "learning_rate": 1.5347644233590103e-05, "loss": 1.541, "step": 111150 }, { "epoch": 0.6986663524050832, "grad_norm": 7.122017860412598, "learning_rate": 1.534722513264545e-05, "loss": 1.6571, "step": 111160 }, { "epoch": 0.6987292047217802, "grad_norm": 7.253368377685547, "learning_rate": 1.5346806031700798e-05, "loss": 1.849, "step": 111170 }, { "epoch": 0.6987920570384774, "grad_norm": 6.511105060577393, "learning_rate": 1.5346386930756145e-05, "loss": 1.9503, "step": 111180 }, { "epoch": 0.6988549093551745, "grad_norm": 6.037867069244385, "learning_rate": 1.534596782981149e-05, "loss": 1.6691, "step": 111190 }, { "epoch": 0.6989177616718716, "grad_norm": 7.932125091552734, "learning_rate": 1.5345548728866835e-05, "loss": 1.6816, "step": 111200 }, { "epoch": 0.6989806139885687, "grad_norm": 7.179452419281006, "learning_rate": 1.5345129627922182e-05, "loss": 1.5332, "step": 111210 }, { "epoch": 0.6990434663052658, "grad_norm": 5.7000732421875, "learning_rate": 1.534471052697753e-05, "loss": 1.5691, "step": 111220 }, { "epoch": 0.6991063186219629, "grad_norm": 6.47969913482666, "learning_rate": 1.5344291426032877e-05, "loss": 1.5694, "step": 111230 }, { "epoch": 0.69916917093866, "grad_norm": 7.466422080993652, "learning_rate": 1.5343872325088224e-05, "loss": 1.7279, "step": 111240 }, { "epoch": 0.6992320232553572, "grad_norm": 7.074092864990234, "learning_rate": 1.5343453224143567e-05, "loss": 1.6808, "step": 111250 }, { "epoch": 0.6992948755720543, "grad_norm": 6.891863822937012, "learning_rate": 1.5343034123198914e-05, "loss": 1.5505, "step": 111260 }, { "epoch": 0.6993577278887514, "grad_norm": 5.30508279800415, "learning_rate": 1.534261502225426e-05, "loss": 1.6563, "step": 111270 }, { "epoch": 0.6994205802054485, "grad_norm": 6.897408962249756, "learning_rate": 1.534219592130961e-05, "loss": 1.7412, "step": 111280 }, { "epoch": 0.6994834325221456, "grad_norm": 7.533118724822998, "learning_rate": 1.5341776820364956e-05, "loss": 1.7652, "step": 111290 }, { "epoch": 0.6995462848388427, "grad_norm": 5.944263458251953, "learning_rate": 1.53413577194203e-05, "loss": 1.5952, "step": 111300 }, { "epoch": 0.6996091371555399, "grad_norm": 7.44816255569458, "learning_rate": 1.5340938618475646e-05, "loss": 1.7116, "step": 111310 }, { "epoch": 0.699671989472237, "grad_norm": 6.531190872192383, "learning_rate": 1.5340519517530993e-05, "loss": 1.5211, "step": 111320 }, { "epoch": 0.6997348417889341, "grad_norm": 7.204296588897705, "learning_rate": 1.534010041658634e-05, "loss": 1.6991, "step": 111330 }, { "epoch": 0.6997976941056312, "grad_norm": 6.985568523406982, "learning_rate": 1.5339681315641684e-05, "loss": 1.5637, "step": 111340 }, { "epoch": 0.6998605464223283, "grad_norm": 5.216329574584961, "learning_rate": 1.533926221469703e-05, "loss": 1.3214, "step": 111350 }, { "epoch": 0.6999233987390254, "grad_norm": 6.626350402832031, "learning_rate": 1.5338843113752378e-05, "loss": 1.7145, "step": 111360 }, { "epoch": 0.6999862510557225, "grad_norm": 7.1996965408325195, "learning_rate": 1.5338424012807725e-05, "loss": 1.71, "step": 111370 }, { "epoch": 0.7000491033724197, "grad_norm": 6.929688930511475, "learning_rate": 1.5338004911863072e-05, "loss": 1.7993, "step": 111380 }, { "epoch": 0.7001119556891168, "grad_norm": 6.898329257965088, "learning_rate": 1.533758581091842e-05, "loss": 1.615, "step": 111390 }, { "epoch": 0.7001748080058139, "grad_norm": 6.943667411804199, "learning_rate": 1.5337166709973767e-05, "loss": 1.6869, "step": 111400 }, { "epoch": 0.700237660322511, "grad_norm": 6.866006374359131, "learning_rate": 1.5336747609029114e-05, "loss": 1.5386, "step": 111410 }, { "epoch": 0.700300512639208, "grad_norm": 7.109818935394287, "learning_rate": 1.533632850808446e-05, "loss": 1.7766, "step": 111420 }, { "epoch": 0.7003633649559051, "grad_norm": 5.786563873291016, "learning_rate": 1.5335909407139804e-05, "loss": 1.5939, "step": 111430 }, { "epoch": 0.7004262172726022, "grad_norm": 5.846156120300293, "learning_rate": 1.533549030619515e-05, "loss": 1.3397, "step": 111440 }, { "epoch": 0.7004890695892994, "grad_norm": 7.736020565032959, "learning_rate": 1.53350712052505e-05, "loss": 1.6997, "step": 111450 }, { "epoch": 0.7005519219059965, "grad_norm": 7.803137302398682, "learning_rate": 1.5334652104305846e-05, "loss": 1.7629, "step": 111460 }, { "epoch": 0.7006147742226936, "grad_norm": 6.8571906089782715, "learning_rate": 1.5334233003361193e-05, "loss": 1.6626, "step": 111470 }, { "epoch": 0.7006776265393907, "grad_norm": 6.03222131729126, "learning_rate": 1.5333813902416536e-05, "loss": 1.6276, "step": 111480 }, { "epoch": 0.7007404788560878, "grad_norm": 7.02052640914917, "learning_rate": 1.5333394801471883e-05, "loss": 1.5892, "step": 111490 }, { "epoch": 0.7008033311727849, "grad_norm": 5.841487407684326, "learning_rate": 1.5333017610621698e-05, "loss": 1.5184, "step": 111500 }, { "epoch": 0.700866183489482, "grad_norm": 6.02025842666626, "learning_rate": 1.5332598509677042e-05, "loss": 1.7335, "step": 111510 }, { "epoch": 0.7009290358061792, "grad_norm": 7.783817768096924, "learning_rate": 1.533217940873239e-05, "loss": 1.7158, "step": 111520 }, { "epoch": 0.7009918881228763, "grad_norm": 6.168801307678223, "learning_rate": 1.5331760307787736e-05, "loss": 1.5162, "step": 111530 }, { "epoch": 0.7010547404395734, "grad_norm": 6.441208362579346, "learning_rate": 1.5331341206843083e-05, "loss": 1.7334, "step": 111540 }, { "epoch": 0.7011175927562705, "grad_norm": 6.061811447143555, "learning_rate": 1.5330922105898427e-05, "loss": 1.5543, "step": 111550 }, { "epoch": 0.7011804450729676, "grad_norm": 7.365804195404053, "learning_rate": 1.5330503004953774e-05, "loss": 1.6776, "step": 111560 }, { "epoch": 0.7012432973896647, "grad_norm": 5.934698104858398, "learning_rate": 1.533008390400912e-05, "loss": 1.5696, "step": 111570 }, { "epoch": 0.7013061497063618, "grad_norm": 6.140161037445068, "learning_rate": 1.5329664803064468e-05, "loss": 1.8776, "step": 111580 }, { "epoch": 0.701369002023059, "grad_norm": 6.963189125061035, "learning_rate": 1.5329245702119815e-05, "loss": 1.6721, "step": 111590 }, { "epoch": 0.7014318543397561, "grad_norm": 7.292041778564453, "learning_rate": 1.532882660117516e-05, "loss": 1.8092, "step": 111600 }, { "epoch": 0.7014947066564532, "grad_norm": 7.88812255859375, "learning_rate": 1.5328407500230506e-05, "loss": 1.4997, "step": 111610 }, { "epoch": 0.7015575589731503, "grad_norm": 7.066206932067871, "learning_rate": 1.5327988399285853e-05, "loss": 1.8031, "step": 111620 }, { "epoch": 0.7016204112898474, "grad_norm": 6.974310398101807, "learning_rate": 1.53275692983412e-05, "loss": 1.57, "step": 111630 }, { "epoch": 0.7016832636065445, "grad_norm": 5.402825832366943, "learning_rate": 1.5327150197396547e-05, "loss": 1.6978, "step": 111640 }, { "epoch": 0.7017461159232417, "grad_norm": 5.739223003387451, "learning_rate": 1.532673109645189e-05, "loss": 1.4454, "step": 111650 }, { "epoch": 0.7018089682399388, "grad_norm": 6.70404577255249, "learning_rate": 1.5326311995507238e-05, "loss": 1.6804, "step": 111660 }, { "epoch": 0.7018718205566359, "grad_norm": 8.330659866333008, "learning_rate": 1.5325892894562585e-05, "loss": 1.7975, "step": 111670 }, { "epoch": 0.7019346728733329, "grad_norm": 6.400068283081055, "learning_rate": 1.5325473793617932e-05, "loss": 1.6265, "step": 111680 }, { "epoch": 0.70199752519003, "grad_norm": 7.074087619781494, "learning_rate": 1.532505469267328e-05, "loss": 1.483, "step": 111690 }, { "epoch": 0.7020603775067271, "grad_norm": 7.001039505004883, "learning_rate": 1.5324635591728626e-05, "loss": 1.5174, "step": 111700 }, { "epoch": 0.7021232298234242, "grad_norm": 6.503770351409912, "learning_rate": 1.5324216490783973e-05, "loss": 1.5599, "step": 111710 }, { "epoch": 0.7021860821401213, "grad_norm": 6.884154796600342, "learning_rate": 1.532379738983932e-05, "loss": 1.547, "step": 111720 }, { "epoch": 0.7022489344568185, "grad_norm": 6.284861087799072, "learning_rate": 1.5323378288894664e-05, "loss": 1.6342, "step": 111730 }, { "epoch": 0.7023117867735156, "grad_norm": 7.665338516235352, "learning_rate": 1.532295918795001e-05, "loss": 1.6949, "step": 111740 }, { "epoch": 0.7023746390902127, "grad_norm": 6.341705322265625, "learning_rate": 1.5322540087005358e-05, "loss": 1.334, "step": 111750 }, { "epoch": 0.7024374914069098, "grad_norm": 5.66460657119751, "learning_rate": 1.5322120986060705e-05, "loss": 1.6776, "step": 111760 }, { "epoch": 0.7025003437236069, "grad_norm": 6.004974842071533, "learning_rate": 1.532170188511605e-05, "loss": 1.6779, "step": 111770 }, { "epoch": 0.702563196040304, "grad_norm": 6.916309356689453, "learning_rate": 1.5321282784171396e-05, "loss": 1.6225, "step": 111780 }, { "epoch": 0.7026260483570012, "grad_norm": 6.033321857452393, "learning_rate": 1.5320863683226743e-05, "loss": 1.6739, "step": 111790 }, { "epoch": 0.7026889006736983, "grad_norm": 6.991604804992676, "learning_rate": 1.532044458228209e-05, "loss": 1.8158, "step": 111800 }, { "epoch": 0.7027517529903954, "grad_norm": 6.926016807556152, "learning_rate": 1.5320025481337437e-05, "loss": 1.5259, "step": 111810 }, { "epoch": 0.7028146053070925, "grad_norm": 7.260146141052246, "learning_rate": 1.531960638039278e-05, "loss": 1.642, "step": 111820 }, { "epoch": 0.7028774576237896, "grad_norm": 5.637368202209473, "learning_rate": 1.5319187279448128e-05, "loss": 1.4252, "step": 111830 }, { "epoch": 0.7029403099404867, "grad_norm": 6.351644515991211, "learning_rate": 1.5318768178503475e-05, "loss": 1.9248, "step": 111840 }, { "epoch": 0.7030031622571838, "grad_norm": 6.875118732452393, "learning_rate": 1.5318349077558822e-05, "loss": 1.6857, "step": 111850 }, { "epoch": 0.703066014573881, "grad_norm": 5.7353515625, "learning_rate": 1.531792997661417e-05, "loss": 1.4319, "step": 111860 }, { "epoch": 0.7031288668905781, "grad_norm": 5.102963924407959, "learning_rate": 1.5317510875669516e-05, "loss": 1.6871, "step": 111870 }, { "epoch": 0.7031917192072752, "grad_norm": 6.1228837966918945, "learning_rate": 1.5317091774724863e-05, "loss": 1.7169, "step": 111880 }, { "epoch": 0.7032545715239723, "grad_norm": 7.117494583129883, "learning_rate": 1.5316672673780207e-05, "loss": 1.7236, "step": 111890 }, { "epoch": 0.7033174238406694, "grad_norm": 7.3301167488098145, "learning_rate": 1.5316253572835554e-05, "loss": 1.6067, "step": 111900 }, { "epoch": 0.7033802761573665, "grad_norm": 6.822005271911621, "learning_rate": 1.53158344718909e-05, "loss": 1.4782, "step": 111910 }, { "epoch": 0.7034431284740637, "grad_norm": 5.827635765075684, "learning_rate": 1.5315415370946248e-05, "loss": 1.6321, "step": 111920 }, { "epoch": 0.7035059807907607, "grad_norm": 6.484631538391113, "learning_rate": 1.5314996270001595e-05, "loss": 1.7872, "step": 111930 }, { "epoch": 0.7035688331074578, "grad_norm": 6.064381122589111, "learning_rate": 1.5314577169056942e-05, "loss": 1.4691, "step": 111940 }, { "epoch": 0.7036316854241549, "grad_norm": 6.730330944061279, "learning_rate": 1.5314158068112286e-05, "loss": 1.531, "step": 111950 }, { "epoch": 0.703694537740852, "grad_norm": 6.129236698150635, "learning_rate": 1.5313738967167633e-05, "loss": 1.7533, "step": 111960 }, { "epoch": 0.7037573900575491, "grad_norm": 5.432794570922852, "learning_rate": 1.531331986622298e-05, "loss": 1.4477, "step": 111970 }, { "epoch": 0.7038202423742462, "grad_norm": 6.189945697784424, "learning_rate": 1.5312900765278327e-05, "loss": 1.7233, "step": 111980 }, { "epoch": 0.7038830946909433, "grad_norm": 6.620745658874512, "learning_rate": 1.531248166433367e-05, "loss": 1.5436, "step": 111990 }, { "epoch": 0.7039459470076405, "grad_norm": 5.968117713928223, "learning_rate": 1.5312062563389018e-05, "loss": 1.622, "step": 112000 }, { "epoch": 0.7040087993243376, "grad_norm": 7.784635066986084, "learning_rate": 1.5311643462444365e-05, "loss": 1.7669, "step": 112010 }, { "epoch": 0.7040716516410347, "grad_norm": 6.146176338195801, "learning_rate": 1.5311224361499712e-05, "loss": 1.4989, "step": 112020 }, { "epoch": 0.7041345039577318, "grad_norm": 5.737129211425781, "learning_rate": 1.531080526055506e-05, "loss": 1.4719, "step": 112030 }, { "epoch": 0.7041973562744289, "grad_norm": 6.738218307495117, "learning_rate": 1.5310386159610403e-05, "loss": 1.5552, "step": 112040 }, { "epoch": 0.704260208591126, "grad_norm": 6.180079460144043, "learning_rate": 1.530996705866575e-05, "loss": 1.6742, "step": 112050 }, { "epoch": 0.7043230609078232, "grad_norm": 6.585474491119385, "learning_rate": 1.5309547957721097e-05, "loss": 1.9372, "step": 112060 }, { "epoch": 0.7043859132245203, "grad_norm": 5.886623382568359, "learning_rate": 1.5309128856776444e-05, "loss": 1.6076, "step": 112070 }, { "epoch": 0.7044487655412174, "grad_norm": 6.316711902618408, "learning_rate": 1.530870975583179e-05, "loss": 1.5736, "step": 112080 }, { "epoch": 0.7045116178579145, "grad_norm": 5.902610778808594, "learning_rate": 1.5308290654887138e-05, "loss": 1.4446, "step": 112090 }, { "epoch": 0.7045744701746116, "grad_norm": 6.4041643142700195, "learning_rate": 1.5307871553942485e-05, "loss": 1.8072, "step": 112100 }, { "epoch": 0.7046373224913087, "grad_norm": 7.238096237182617, "learning_rate": 1.5307452452997832e-05, "loss": 1.6929, "step": 112110 }, { "epoch": 0.7047001748080058, "grad_norm": 5.849660396575928, "learning_rate": 1.530703335205318e-05, "loss": 1.327, "step": 112120 }, { "epoch": 0.704763027124703, "grad_norm": 5.922614574432373, "learning_rate": 1.5306614251108523e-05, "loss": 1.4867, "step": 112130 }, { "epoch": 0.7048258794414001, "grad_norm": 7.565813064575195, "learning_rate": 1.530619515016387e-05, "loss": 1.8852, "step": 112140 }, { "epoch": 0.7048887317580972, "grad_norm": 6.607747554779053, "learning_rate": 1.5305776049219217e-05, "loss": 1.6111, "step": 112150 }, { "epoch": 0.7049515840747943, "grad_norm": 5.848597526550293, "learning_rate": 1.5305356948274564e-05, "loss": 1.6545, "step": 112160 }, { "epoch": 0.7050144363914914, "grad_norm": 6.902886867523193, "learning_rate": 1.5304937847329908e-05, "loss": 1.7418, "step": 112170 }, { "epoch": 0.7050772887081885, "grad_norm": 6.819500923156738, "learning_rate": 1.5304518746385255e-05, "loss": 1.7065, "step": 112180 }, { "epoch": 0.7051401410248855, "grad_norm": 6.405013561248779, "learning_rate": 1.5304099645440602e-05, "loss": 1.5845, "step": 112190 }, { "epoch": 0.7052029933415827, "grad_norm": 6.885857582092285, "learning_rate": 1.530368054449595e-05, "loss": 1.546, "step": 112200 }, { "epoch": 0.7052658456582798, "grad_norm": 6.973810195922852, "learning_rate": 1.5303261443551296e-05, "loss": 1.6993, "step": 112210 }, { "epoch": 0.7053286979749769, "grad_norm": 6.513957500457764, "learning_rate": 1.530284234260664e-05, "loss": 1.3631, "step": 112220 }, { "epoch": 0.705391550291674, "grad_norm": 5.68147087097168, "learning_rate": 1.5302423241661987e-05, "loss": 1.6635, "step": 112230 }, { "epoch": 0.7054544026083711, "grad_norm": 7.037917613983154, "learning_rate": 1.5302004140717334e-05, "loss": 1.6377, "step": 112240 }, { "epoch": 0.7055172549250682, "grad_norm": 6.520129680633545, "learning_rate": 1.530158503977268e-05, "loss": 1.7185, "step": 112250 }, { "epoch": 0.7055801072417653, "grad_norm": 5.36246919631958, "learning_rate": 1.5301165938828028e-05, "loss": 1.76, "step": 112260 }, { "epoch": 0.7056429595584625, "grad_norm": 6.547013282775879, "learning_rate": 1.5300746837883372e-05, "loss": 1.6227, "step": 112270 }, { "epoch": 0.7057058118751596, "grad_norm": 6.3188581466674805, "learning_rate": 1.530032773693872e-05, "loss": 1.5041, "step": 112280 }, { "epoch": 0.7057686641918567, "grad_norm": 6.86865234375, "learning_rate": 1.5299908635994066e-05, "loss": 1.5423, "step": 112290 }, { "epoch": 0.7058315165085538, "grad_norm": 5.936668872833252, "learning_rate": 1.5299489535049413e-05, "loss": 1.6973, "step": 112300 }, { "epoch": 0.7058943688252509, "grad_norm": 6.411205291748047, "learning_rate": 1.529907043410476e-05, "loss": 1.6422, "step": 112310 }, { "epoch": 0.705957221141948, "grad_norm": 5.711047649383545, "learning_rate": 1.5298651333160107e-05, "loss": 1.4986, "step": 112320 }, { "epoch": 0.7060200734586451, "grad_norm": 7.482402324676514, "learning_rate": 1.5298232232215454e-05, "loss": 1.5474, "step": 112330 }, { "epoch": 0.7060829257753423, "grad_norm": 7.547204971313477, "learning_rate": 1.52978131312708e-05, "loss": 1.6577, "step": 112340 }, { "epoch": 0.7061457780920394, "grad_norm": 6.965823173522949, "learning_rate": 1.5297394030326145e-05, "loss": 1.6739, "step": 112350 }, { "epoch": 0.7062086304087365, "grad_norm": 7.063565254211426, "learning_rate": 1.5296974929381492e-05, "loss": 1.647, "step": 112360 }, { "epoch": 0.7062714827254336, "grad_norm": 7.207581520080566, "learning_rate": 1.529655582843684e-05, "loss": 1.7837, "step": 112370 }, { "epoch": 0.7063343350421307, "grad_norm": 7.736453056335449, "learning_rate": 1.5296136727492186e-05, "loss": 1.5431, "step": 112380 }, { "epoch": 0.7063971873588278, "grad_norm": 6.191543102264404, "learning_rate": 1.529571762654753e-05, "loss": 1.5448, "step": 112390 }, { "epoch": 0.706460039675525, "grad_norm": 6.08800745010376, "learning_rate": 1.5295298525602877e-05, "loss": 1.5879, "step": 112400 }, { "epoch": 0.7065228919922221, "grad_norm": 6.827498435974121, "learning_rate": 1.5294879424658224e-05, "loss": 1.695, "step": 112410 }, { "epoch": 0.7065857443089192, "grad_norm": 6.868002891540527, "learning_rate": 1.529446032371357e-05, "loss": 1.6397, "step": 112420 }, { "epoch": 0.7066485966256163, "grad_norm": 6.5036468505859375, "learning_rate": 1.5294041222768918e-05, "loss": 1.4834, "step": 112430 }, { "epoch": 0.7067114489423133, "grad_norm": 7.184835910797119, "learning_rate": 1.5293622121824262e-05, "loss": 1.4564, "step": 112440 }, { "epoch": 0.7067743012590104, "grad_norm": 6.301793575286865, "learning_rate": 1.529320302087961e-05, "loss": 1.4185, "step": 112450 }, { "epoch": 0.7068371535757075, "grad_norm": 7.015898704528809, "learning_rate": 1.5292783919934956e-05, "loss": 1.6782, "step": 112460 }, { "epoch": 0.7069000058924046, "grad_norm": 7.5571699142456055, "learning_rate": 1.5292364818990303e-05, "loss": 1.8978, "step": 112470 }, { "epoch": 0.7069628582091018, "grad_norm": 7.434046745300293, "learning_rate": 1.529194571804565e-05, "loss": 1.6505, "step": 112480 }, { "epoch": 0.7070257105257989, "grad_norm": 6.994179725646973, "learning_rate": 1.5291526617100997e-05, "loss": 1.5477, "step": 112490 }, { "epoch": 0.707088562842496, "grad_norm": 6.415890693664551, "learning_rate": 1.5291107516156344e-05, "loss": 1.4122, "step": 112500 }, { "epoch": 0.7071514151591931, "grad_norm": 6.995442867279053, "learning_rate": 1.529068841521169e-05, "loss": 1.4985, "step": 112510 }, { "epoch": 0.7072142674758902, "grad_norm": 7.962581634521484, "learning_rate": 1.5290269314267035e-05, "loss": 1.8319, "step": 112520 }, { "epoch": 0.7072771197925873, "grad_norm": 5.557328701019287, "learning_rate": 1.5289850213322382e-05, "loss": 1.5311, "step": 112530 }, { "epoch": 0.7073399721092845, "grad_norm": 6.887127876281738, "learning_rate": 1.528943111237773e-05, "loss": 1.682, "step": 112540 }, { "epoch": 0.7074028244259816, "grad_norm": 7.099719524383545, "learning_rate": 1.5289012011433076e-05, "loss": 1.5395, "step": 112550 }, { "epoch": 0.7074656767426787, "grad_norm": 6.0728559494018555, "learning_rate": 1.5288592910488423e-05, "loss": 1.6686, "step": 112560 }, { "epoch": 0.7075285290593758, "grad_norm": 7.208877086639404, "learning_rate": 1.5288173809543767e-05, "loss": 1.7004, "step": 112570 }, { "epoch": 0.7075913813760729, "grad_norm": 5.693336486816406, "learning_rate": 1.5287754708599114e-05, "loss": 1.4082, "step": 112580 }, { "epoch": 0.70765423369277, "grad_norm": 6.975170612335205, "learning_rate": 1.528733560765446e-05, "loss": 1.6212, "step": 112590 }, { "epoch": 0.7077170860094671, "grad_norm": 6.518259048461914, "learning_rate": 1.5286916506709808e-05, "loss": 1.5739, "step": 112600 }, { "epoch": 0.7077799383261643, "grad_norm": 7.409762382507324, "learning_rate": 1.5286497405765155e-05, "loss": 1.7499, "step": 112610 }, { "epoch": 0.7078427906428614, "grad_norm": 6.29955530166626, "learning_rate": 1.52860783048205e-05, "loss": 1.5993, "step": 112620 }, { "epoch": 0.7079056429595585, "grad_norm": 6.510418891906738, "learning_rate": 1.5285659203875846e-05, "loss": 1.6247, "step": 112630 }, { "epoch": 0.7079684952762556, "grad_norm": 6.022556781768799, "learning_rate": 1.5285240102931193e-05, "loss": 1.7137, "step": 112640 }, { "epoch": 0.7080313475929527, "grad_norm": 6.4454827308654785, "learning_rate": 1.528482100198654e-05, "loss": 1.6566, "step": 112650 }, { "epoch": 0.7080941999096498, "grad_norm": 6.457005977630615, "learning_rate": 1.5284401901041884e-05, "loss": 1.5795, "step": 112660 }, { "epoch": 0.708157052226347, "grad_norm": 6.971104621887207, "learning_rate": 1.528398280009723e-05, "loss": 1.6491, "step": 112670 }, { "epoch": 0.7082199045430441, "grad_norm": 7.521648406982422, "learning_rate": 1.5283563699152578e-05, "loss": 1.5301, "step": 112680 }, { "epoch": 0.7082827568597412, "grad_norm": 4.746102809906006, "learning_rate": 1.5283144598207925e-05, "loss": 1.4043, "step": 112690 }, { "epoch": 0.7083456091764382, "grad_norm": 6.2397050857543945, "learning_rate": 1.5282725497263272e-05, "loss": 1.8456, "step": 112700 }, { "epoch": 0.7084084614931353, "grad_norm": 6.495884895324707, "learning_rate": 1.528230639631862e-05, "loss": 1.8223, "step": 112710 }, { "epoch": 0.7084713138098324, "grad_norm": 6.451614856719971, "learning_rate": 1.5281887295373966e-05, "loss": 1.7124, "step": 112720 }, { "epoch": 0.7085341661265295, "grad_norm": 5.592561721801758, "learning_rate": 1.5281468194429313e-05, "loss": 1.516, "step": 112730 }, { "epoch": 0.7085970184432266, "grad_norm": 7.746678829193115, "learning_rate": 1.528104909348466e-05, "loss": 1.7306, "step": 112740 }, { "epoch": 0.7086598707599238, "grad_norm": 6.638991832733154, "learning_rate": 1.5280629992540004e-05, "loss": 1.6608, "step": 112750 }, { "epoch": 0.7087227230766209, "grad_norm": 6.159857749938965, "learning_rate": 1.528021089159535e-05, "loss": 1.8309, "step": 112760 }, { "epoch": 0.708785575393318, "grad_norm": 6.521195411682129, "learning_rate": 1.5279791790650698e-05, "loss": 1.4588, "step": 112770 }, { "epoch": 0.7088484277100151, "grad_norm": 6.476180076599121, "learning_rate": 1.5279372689706045e-05, "loss": 1.5082, "step": 112780 }, { "epoch": 0.7089112800267122, "grad_norm": 6.665990352630615, "learning_rate": 1.527895358876139e-05, "loss": 1.7222, "step": 112790 }, { "epoch": 0.7089741323434093, "grad_norm": 5.634333610534668, "learning_rate": 1.5278534487816736e-05, "loss": 1.6938, "step": 112800 }, { "epoch": 0.7090369846601065, "grad_norm": 5.693087100982666, "learning_rate": 1.5278115386872083e-05, "loss": 1.7999, "step": 112810 }, { "epoch": 0.7090998369768036, "grad_norm": 6.429813861846924, "learning_rate": 1.527769628592743e-05, "loss": 1.4559, "step": 112820 }, { "epoch": 0.7091626892935007, "grad_norm": 6.750729084014893, "learning_rate": 1.5277277184982777e-05, "loss": 1.4969, "step": 112830 }, { "epoch": 0.7092255416101978, "grad_norm": 5.89532470703125, "learning_rate": 1.527685808403812e-05, "loss": 1.785, "step": 112840 }, { "epoch": 0.7092883939268949, "grad_norm": 6.74790096282959, "learning_rate": 1.5276438983093468e-05, "loss": 1.6283, "step": 112850 }, { "epoch": 0.709351246243592, "grad_norm": 6.837890148162842, "learning_rate": 1.5276019882148815e-05, "loss": 1.7477, "step": 112860 }, { "epoch": 0.7094140985602891, "grad_norm": 5.5891008377075195, "learning_rate": 1.5275600781204162e-05, "loss": 1.6749, "step": 112870 }, { "epoch": 0.7094769508769863, "grad_norm": 6.491587162017822, "learning_rate": 1.527518168025951e-05, "loss": 1.7476, "step": 112880 }, { "epoch": 0.7095398031936834, "grad_norm": 6.33069372177124, "learning_rate": 1.5274762579314856e-05, "loss": 1.5088, "step": 112890 }, { "epoch": 0.7096026555103805, "grad_norm": 6.677149295806885, "learning_rate": 1.52743434783702e-05, "loss": 1.7174, "step": 112900 }, { "epoch": 0.7096655078270776, "grad_norm": 5.773202419281006, "learning_rate": 1.5273924377425547e-05, "loss": 1.5749, "step": 112910 }, { "epoch": 0.7097283601437747, "grad_norm": 6.393325328826904, "learning_rate": 1.5273505276480894e-05, "loss": 1.5115, "step": 112920 }, { "epoch": 0.7097912124604718, "grad_norm": 7.29564094543457, "learning_rate": 1.527308617553624e-05, "loss": 1.5899, "step": 112930 }, { "epoch": 0.709854064777169, "grad_norm": 6.721494674682617, "learning_rate": 1.5272667074591588e-05, "loss": 1.3894, "step": 112940 }, { "epoch": 0.709916917093866, "grad_norm": 6.804461479187012, "learning_rate": 1.5272247973646935e-05, "loss": 1.7747, "step": 112950 }, { "epoch": 0.7099797694105631, "grad_norm": 6.622951984405518, "learning_rate": 1.5271828872702282e-05, "loss": 1.6428, "step": 112960 }, { "epoch": 0.7100426217272602, "grad_norm": 5.707070350646973, "learning_rate": 1.5271409771757626e-05, "loss": 1.5076, "step": 112970 }, { "epoch": 0.7101054740439573, "grad_norm": 6.101596832275391, "learning_rate": 1.5270990670812973e-05, "loss": 1.7525, "step": 112980 }, { "epoch": 0.7101683263606544, "grad_norm": 7.57112455368042, "learning_rate": 1.527057156986832e-05, "loss": 1.7756, "step": 112990 }, { "epoch": 0.7102311786773515, "grad_norm": 6.470608711242676, "learning_rate": 1.5270152468923667e-05, "loss": 1.6362, "step": 113000 }, { "epoch": 0.7102940309940486, "grad_norm": 6.327256202697754, "learning_rate": 1.526973336797901e-05, "loss": 1.5283, "step": 113010 }, { "epoch": 0.7103568833107458, "grad_norm": 6.72871208190918, "learning_rate": 1.5269314267034358e-05, "loss": 1.3429, "step": 113020 }, { "epoch": 0.7104197356274429, "grad_norm": 7.564001083374023, "learning_rate": 1.5268895166089705e-05, "loss": 1.7246, "step": 113030 }, { "epoch": 0.71048258794414, "grad_norm": 6.237943172454834, "learning_rate": 1.5268476065145052e-05, "loss": 1.6596, "step": 113040 }, { "epoch": 0.7105454402608371, "grad_norm": 6.461047172546387, "learning_rate": 1.52680569642004e-05, "loss": 1.5815, "step": 113050 }, { "epoch": 0.7106082925775342, "grad_norm": 6.568318843841553, "learning_rate": 1.5267637863255743e-05, "loss": 1.7275, "step": 113060 }, { "epoch": 0.7106711448942313, "grad_norm": 7.531859397888184, "learning_rate": 1.526721876231109e-05, "loss": 1.5079, "step": 113070 }, { "epoch": 0.7107339972109284, "grad_norm": 5.017735004425049, "learning_rate": 1.5266799661366437e-05, "loss": 1.353, "step": 113080 }, { "epoch": 0.7107968495276256, "grad_norm": 6.397959232330322, "learning_rate": 1.5266380560421784e-05, "loss": 1.6848, "step": 113090 }, { "epoch": 0.7108597018443227, "grad_norm": 7.199172019958496, "learning_rate": 1.526596145947713e-05, "loss": 1.6795, "step": 113100 }, { "epoch": 0.7109225541610198, "grad_norm": 5.098223686218262, "learning_rate": 1.5265542358532478e-05, "loss": 1.3205, "step": 113110 }, { "epoch": 0.7109854064777169, "grad_norm": 5.997243404388428, "learning_rate": 1.5265123257587825e-05, "loss": 1.642, "step": 113120 }, { "epoch": 0.711048258794414, "grad_norm": 6.338582515716553, "learning_rate": 1.5264704156643172e-05, "loss": 1.503, "step": 113130 }, { "epoch": 0.7111111111111111, "grad_norm": 5.719226360321045, "learning_rate": 1.526428505569852e-05, "loss": 1.5103, "step": 113140 }, { "epoch": 0.7111739634278083, "grad_norm": 6.2346014976501465, "learning_rate": 1.5263865954753863e-05, "loss": 1.545, "step": 113150 }, { "epoch": 0.7112368157445054, "grad_norm": 6.652457237243652, "learning_rate": 1.526344685380921e-05, "loss": 1.5949, "step": 113160 }, { "epoch": 0.7112996680612025, "grad_norm": 6.260744571685791, "learning_rate": 1.5263027752864557e-05, "loss": 1.7655, "step": 113170 }, { "epoch": 0.7113625203778996, "grad_norm": 6.24083137512207, "learning_rate": 1.5262608651919904e-05, "loss": 1.4542, "step": 113180 }, { "epoch": 0.7114253726945967, "grad_norm": 7.184473037719727, "learning_rate": 1.5262189550975248e-05, "loss": 1.6336, "step": 113190 }, { "epoch": 0.7114882250112938, "grad_norm": 5.504934310913086, "learning_rate": 1.5261770450030595e-05, "loss": 1.5596, "step": 113200 }, { "epoch": 0.7115510773279908, "grad_norm": 6.840089797973633, "learning_rate": 1.5261351349085942e-05, "loss": 1.619, "step": 113210 }, { "epoch": 0.711613929644688, "grad_norm": 6.864090442657471, "learning_rate": 1.526093224814129e-05, "loss": 1.4888, "step": 113220 }, { "epoch": 0.7116767819613851, "grad_norm": 6.57045316696167, "learning_rate": 1.5260513147196636e-05, "loss": 1.6587, "step": 113230 }, { "epoch": 0.7117396342780822, "grad_norm": 6.458637714385986, "learning_rate": 1.526009404625198e-05, "loss": 1.5771, "step": 113240 }, { "epoch": 0.7118024865947793, "grad_norm": 6.163811206817627, "learning_rate": 1.5259674945307327e-05, "loss": 1.3506, "step": 113250 }, { "epoch": 0.7118653389114764, "grad_norm": 6.221703052520752, "learning_rate": 1.5259255844362674e-05, "loss": 1.4874, "step": 113260 }, { "epoch": 0.7119281912281735, "grad_norm": 6.887861251831055, "learning_rate": 1.525883674341802e-05, "loss": 1.4691, "step": 113270 }, { "epoch": 0.7119910435448706, "grad_norm": 5.505577087402344, "learning_rate": 1.5258417642473367e-05, "loss": 1.6029, "step": 113280 }, { "epoch": 0.7120538958615678, "grad_norm": 6.861028671264648, "learning_rate": 1.5257998541528714e-05, "loss": 1.532, "step": 113290 }, { "epoch": 0.7121167481782649, "grad_norm": 7.401561260223389, "learning_rate": 1.525757944058406e-05, "loss": 1.5277, "step": 113300 }, { "epoch": 0.712179600494962, "grad_norm": 5.729288578033447, "learning_rate": 1.5257160339639408e-05, "loss": 1.3577, "step": 113310 }, { "epoch": 0.7122424528116591, "grad_norm": 5.505568981170654, "learning_rate": 1.5256741238694752e-05, "loss": 1.3835, "step": 113320 }, { "epoch": 0.7123053051283562, "grad_norm": 6.931628227233887, "learning_rate": 1.5256322137750099e-05, "loss": 1.6917, "step": 113330 }, { "epoch": 0.7123681574450533, "grad_norm": 6.793735027313232, "learning_rate": 1.5255903036805446e-05, "loss": 1.486, "step": 113340 }, { "epoch": 0.7124310097617504, "grad_norm": 8.003320693969727, "learning_rate": 1.5255483935860793e-05, "loss": 1.8749, "step": 113350 }, { "epoch": 0.7124938620784476, "grad_norm": 5.8099870681762695, "learning_rate": 1.525506483491614e-05, "loss": 1.422, "step": 113360 }, { "epoch": 0.7125567143951447, "grad_norm": 5.143426418304443, "learning_rate": 1.5254645733971485e-05, "loss": 1.661, "step": 113370 }, { "epoch": 0.7126195667118418, "grad_norm": 6.967090606689453, "learning_rate": 1.5254226633026832e-05, "loss": 1.4827, "step": 113380 }, { "epoch": 0.7126824190285389, "grad_norm": 5.961867332458496, "learning_rate": 1.525380753208218e-05, "loss": 1.4611, "step": 113390 }, { "epoch": 0.712745271345236, "grad_norm": 7.57195520401001, "learning_rate": 1.5253388431137526e-05, "loss": 1.6686, "step": 113400 }, { "epoch": 0.7128081236619331, "grad_norm": 5.611908912658691, "learning_rate": 1.525296933019287e-05, "loss": 1.5144, "step": 113410 }, { "epoch": 0.7128709759786303, "grad_norm": 5.720475673675537, "learning_rate": 1.5252550229248217e-05, "loss": 1.6377, "step": 113420 }, { "epoch": 0.7129338282953274, "grad_norm": 6.286452770233154, "learning_rate": 1.5252131128303564e-05, "loss": 1.6228, "step": 113430 }, { "epoch": 0.7129966806120245, "grad_norm": 5.332391738891602, "learning_rate": 1.5251712027358911e-05, "loss": 1.3138, "step": 113440 }, { "epoch": 0.7130595329287216, "grad_norm": 7.41482400894165, "learning_rate": 1.5251292926414258e-05, "loss": 1.6605, "step": 113450 }, { "epoch": 0.7131223852454187, "grad_norm": 6.277690887451172, "learning_rate": 1.5250873825469604e-05, "loss": 1.6301, "step": 113460 }, { "epoch": 0.7131852375621157, "grad_norm": 6.387878894805908, "learning_rate": 1.525045472452495e-05, "loss": 1.4176, "step": 113470 }, { "epoch": 0.7132480898788128, "grad_norm": 7.576438903808594, "learning_rate": 1.5250035623580298e-05, "loss": 1.6814, "step": 113480 }, { "epoch": 0.71331094219551, "grad_norm": 6.340225696563721, "learning_rate": 1.5249616522635643e-05, "loss": 1.6373, "step": 113490 }, { "epoch": 0.7133737945122071, "grad_norm": 5.660751819610596, "learning_rate": 1.5249197421690989e-05, "loss": 1.5514, "step": 113500 }, { "epoch": 0.7134366468289042, "grad_norm": 7.049087047576904, "learning_rate": 1.5248778320746336e-05, "loss": 1.7007, "step": 113510 }, { "epoch": 0.7134994991456013, "grad_norm": 7.369273662567139, "learning_rate": 1.5248359219801683e-05, "loss": 1.7085, "step": 113520 }, { "epoch": 0.7135623514622984, "grad_norm": 6.360713005065918, "learning_rate": 1.524794011885703e-05, "loss": 1.5136, "step": 113530 }, { "epoch": 0.7136252037789955, "grad_norm": 6.789496898651123, "learning_rate": 1.5247521017912377e-05, "loss": 1.8079, "step": 113540 }, { "epoch": 0.7136880560956926, "grad_norm": 6.46088171005249, "learning_rate": 1.5247101916967722e-05, "loss": 1.6976, "step": 113550 }, { "epoch": 0.7137509084123898, "grad_norm": 5.728837490081787, "learning_rate": 1.5246682816023068e-05, "loss": 1.4974, "step": 113560 }, { "epoch": 0.7138137607290869, "grad_norm": 7.043184757232666, "learning_rate": 1.5246263715078415e-05, "loss": 1.7299, "step": 113570 }, { "epoch": 0.713876613045784, "grad_norm": 7.5252203941345215, "learning_rate": 1.5245844614133762e-05, "loss": 1.7266, "step": 113580 }, { "epoch": 0.7139394653624811, "grad_norm": 6.560543060302734, "learning_rate": 1.5245425513189107e-05, "loss": 1.8253, "step": 113590 }, { "epoch": 0.7140023176791782, "grad_norm": 6.411479473114014, "learning_rate": 1.5245006412244454e-05, "loss": 1.6435, "step": 113600 }, { "epoch": 0.7140651699958753, "grad_norm": 6.163437843322754, "learning_rate": 1.5244587311299801e-05, "loss": 1.7005, "step": 113610 }, { "epoch": 0.7141280223125724, "grad_norm": 7.1745381355285645, "learning_rate": 1.5244168210355148e-05, "loss": 1.5691, "step": 113620 }, { "epoch": 0.7141908746292696, "grad_norm": 6.296079635620117, "learning_rate": 1.5243749109410492e-05, "loss": 1.6268, "step": 113630 }, { "epoch": 0.7142537269459667, "grad_norm": 5.797415733337402, "learning_rate": 1.5243330008465839e-05, "loss": 1.6974, "step": 113640 }, { "epoch": 0.7143165792626638, "grad_norm": 6.801391124725342, "learning_rate": 1.5242910907521186e-05, "loss": 1.4754, "step": 113650 }, { "epoch": 0.7143794315793609, "grad_norm": 6.8657026290893555, "learning_rate": 1.5242491806576533e-05, "loss": 1.7169, "step": 113660 }, { "epoch": 0.714442283896058, "grad_norm": 6.456601619720459, "learning_rate": 1.524207270563188e-05, "loss": 1.618, "step": 113670 }, { "epoch": 0.7145051362127551, "grad_norm": 6.933406829833984, "learning_rate": 1.5241653604687226e-05, "loss": 1.6887, "step": 113680 }, { "epoch": 0.7145679885294522, "grad_norm": 6.51193380355835, "learning_rate": 1.5241234503742573e-05, "loss": 1.6549, "step": 113690 }, { "epoch": 0.7146308408461494, "grad_norm": 7.263623237609863, "learning_rate": 1.524081540279792e-05, "loss": 1.3845, "step": 113700 }, { "epoch": 0.7146936931628465, "grad_norm": 6.034303665161133, "learning_rate": 1.5240396301853267e-05, "loss": 1.7341, "step": 113710 }, { "epoch": 0.7147565454795435, "grad_norm": 6.4354143142700195, "learning_rate": 1.523997720090861e-05, "loss": 1.4352, "step": 113720 }, { "epoch": 0.7148193977962406, "grad_norm": 7.792895793914795, "learning_rate": 1.5239558099963958e-05, "loss": 1.5413, "step": 113730 }, { "epoch": 0.7148822501129377, "grad_norm": 7.444430828094482, "learning_rate": 1.5239138999019305e-05, "loss": 1.5746, "step": 113740 }, { "epoch": 0.7149451024296348, "grad_norm": 6.952875137329102, "learning_rate": 1.5238719898074652e-05, "loss": 1.6593, "step": 113750 }, { "epoch": 0.7150079547463319, "grad_norm": 6.499113082885742, "learning_rate": 1.5238300797129999e-05, "loss": 1.7686, "step": 113760 }, { "epoch": 0.7150708070630291, "grad_norm": 7.119709491729736, "learning_rate": 1.5237881696185344e-05, "loss": 1.6387, "step": 113770 }, { "epoch": 0.7151336593797262, "grad_norm": 6.934809684753418, "learning_rate": 1.5237462595240691e-05, "loss": 1.5899, "step": 113780 }, { "epoch": 0.7151965116964233, "grad_norm": 5.7645440101623535, "learning_rate": 1.5237043494296038e-05, "loss": 1.8011, "step": 113790 }, { "epoch": 0.7152593640131204, "grad_norm": 6.047173500061035, "learning_rate": 1.5236624393351385e-05, "loss": 1.5378, "step": 113800 }, { "epoch": 0.7153222163298175, "grad_norm": 5.505337715148926, "learning_rate": 1.523620529240673e-05, "loss": 1.7135, "step": 113810 }, { "epoch": 0.7153850686465146, "grad_norm": 6.926814079284668, "learning_rate": 1.5235786191462076e-05, "loss": 1.9873, "step": 113820 }, { "epoch": 0.7154479209632117, "grad_norm": 6.95925760269165, "learning_rate": 1.5235367090517423e-05, "loss": 1.8136, "step": 113830 }, { "epoch": 0.7155107732799089, "grad_norm": 7.495611190795898, "learning_rate": 1.523494798957277e-05, "loss": 1.5423, "step": 113840 }, { "epoch": 0.715573625596606, "grad_norm": 6.385137557983398, "learning_rate": 1.5234528888628117e-05, "loss": 1.7796, "step": 113850 }, { "epoch": 0.7156364779133031, "grad_norm": 6.669556140899658, "learning_rate": 1.5234109787683463e-05, "loss": 1.5417, "step": 113860 }, { "epoch": 0.7156993302300002, "grad_norm": 7.049135208129883, "learning_rate": 1.5233690686738808e-05, "loss": 1.4325, "step": 113870 }, { "epoch": 0.7157621825466973, "grad_norm": 6.6187825202941895, "learning_rate": 1.5233271585794155e-05, "loss": 1.5089, "step": 113880 }, { "epoch": 0.7158250348633944, "grad_norm": 5.775775909423828, "learning_rate": 1.5232852484849502e-05, "loss": 1.725, "step": 113890 }, { "epoch": 0.7158878871800916, "grad_norm": 6.5200395584106445, "learning_rate": 1.5232433383904848e-05, "loss": 1.6679, "step": 113900 }, { "epoch": 0.7159507394967887, "grad_norm": 6.957881927490234, "learning_rate": 1.5232014282960195e-05, "loss": 1.5921, "step": 113910 }, { "epoch": 0.7160135918134858, "grad_norm": 6.852334976196289, "learning_rate": 1.5231595182015542e-05, "loss": 1.7087, "step": 113920 }, { "epoch": 0.7160764441301829, "grad_norm": 6.447397708892822, "learning_rate": 1.5231176081070889e-05, "loss": 1.7099, "step": 113930 }, { "epoch": 0.71613929644688, "grad_norm": 6.0002121925354, "learning_rate": 1.5230756980126233e-05, "loss": 1.845, "step": 113940 }, { "epoch": 0.7162021487635771, "grad_norm": 6.524419784545898, "learning_rate": 1.523033787918158e-05, "loss": 1.6429, "step": 113950 }, { "epoch": 0.7162650010802742, "grad_norm": 5.7728400230407715, "learning_rate": 1.5229918778236927e-05, "loss": 1.5089, "step": 113960 }, { "epoch": 0.7163278533969714, "grad_norm": 7.010677814483643, "learning_rate": 1.5229499677292274e-05, "loss": 1.3965, "step": 113970 }, { "epoch": 0.7163907057136684, "grad_norm": 6.472847938537598, "learning_rate": 1.5229080576347621e-05, "loss": 1.6774, "step": 113980 }, { "epoch": 0.7164535580303655, "grad_norm": 6.118325233459473, "learning_rate": 1.5228661475402966e-05, "loss": 1.7889, "step": 113990 }, { "epoch": 0.7165164103470626, "grad_norm": 6.4376678466796875, "learning_rate": 1.5228242374458313e-05, "loss": 1.515, "step": 114000 }, { "epoch": 0.7165792626637597, "grad_norm": 6.637792587280273, "learning_rate": 1.522782327351366e-05, "loss": 1.7016, "step": 114010 }, { "epoch": 0.7166421149804568, "grad_norm": 6.195915699005127, "learning_rate": 1.5227404172569007e-05, "loss": 1.7905, "step": 114020 }, { "epoch": 0.7167049672971539, "grad_norm": 6.304981708526611, "learning_rate": 1.5226985071624351e-05, "loss": 1.7846, "step": 114030 }, { "epoch": 0.716767819613851, "grad_norm": 7.737209320068359, "learning_rate": 1.5226565970679698e-05, "loss": 1.3662, "step": 114040 }, { "epoch": 0.7168306719305482, "grad_norm": 6.3326497077941895, "learning_rate": 1.5226146869735045e-05, "loss": 1.5719, "step": 114050 }, { "epoch": 0.7168935242472453, "grad_norm": 6.7617669105529785, "learning_rate": 1.5225727768790392e-05, "loss": 1.4707, "step": 114060 }, { "epoch": 0.7169563765639424, "grad_norm": 5.8286237716674805, "learning_rate": 1.522530866784574e-05, "loss": 1.3576, "step": 114070 }, { "epoch": 0.7170192288806395, "grad_norm": 7.066000938415527, "learning_rate": 1.5224889566901085e-05, "loss": 1.7826, "step": 114080 }, { "epoch": 0.7170820811973366, "grad_norm": 6.6880717277526855, "learning_rate": 1.5224470465956432e-05, "loss": 1.4572, "step": 114090 }, { "epoch": 0.7171449335140337, "grad_norm": 7.817972660064697, "learning_rate": 1.5224051365011779e-05, "loss": 1.6042, "step": 114100 }, { "epoch": 0.7172077858307309, "grad_norm": 6.40554141998291, "learning_rate": 1.5223632264067126e-05, "loss": 1.5907, "step": 114110 }, { "epoch": 0.717270638147428, "grad_norm": 7.366568088531494, "learning_rate": 1.522321316312247e-05, "loss": 1.6764, "step": 114120 }, { "epoch": 0.7173334904641251, "grad_norm": 7.0088725090026855, "learning_rate": 1.5222794062177817e-05, "loss": 1.7399, "step": 114130 }, { "epoch": 0.7173963427808222, "grad_norm": 6.872783184051514, "learning_rate": 1.5222374961233164e-05, "loss": 1.5902, "step": 114140 }, { "epoch": 0.7174591950975193, "grad_norm": 6.1382341384887695, "learning_rate": 1.5221955860288511e-05, "loss": 1.6572, "step": 114150 }, { "epoch": 0.7175220474142164, "grad_norm": 6.142192363739014, "learning_rate": 1.5221536759343858e-05, "loss": 1.4925, "step": 114160 }, { "epoch": 0.7175848997309136, "grad_norm": 5.902562618255615, "learning_rate": 1.5221117658399203e-05, "loss": 1.5269, "step": 114170 }, { "epoch": 0.7176477520476107, "grad_norm": 6.210169315338135, "learning_rate": 1.5220698557454549e-05, "loss": 1.6386, "step": 114180 }, { "epoch": 0.7177106043643078, "grad_norm": 6.398752212524414, "learning_rate": 1.5220279456509896e-05, "loss": 1.5583, "step": 114190 }, { "epoch": 0.7177734566810049, "grad_norm": 6.617867469787598, "learning_rate": 1.5219860355565243e-05, "loss": 1.6815, "step": 114200 }, { "epoch": 0.717836308997702, "grad_norm": 5.834376811981201, "learning_rate": 1.5219441254620588e-05, "loss": 1.4698, "step": 114210 }, { "epoch": 0.7178991613143991, "grad_norm": 6.115474700927734, "learning_rate": 1.5219022153675935e-05, "loss": 1.7216, "step": 114220 }, { "epoch": 0.7179620136310961, "grad_norm": 6.723456859588623, "learning_rate": 1.5218603052731282e-05, "loss": 1.6805, "step": 114230 }, { "epoch": 0.7180248659477932, "grad_norm": 5.87972354888916, "learning_rate": 1.521818395178663e-05, "loss": 1.6633, "step": 114240 }, { "epoch": 0.7180877182644904, "grad_norm": 6.3831634521484375, "learning_rate": 1.5217764850841973e-05, "loss": 1.732, "step": 114250 }, { "epoch": 0.7181505705811875, "grad_norm": 6.4843974113464355, "learning_rate": 1.521734574989732e-05, "loss": 1.7449, "step": 114260 }, { "epoch": 0.7182134228978846, "grad_norm": 6.328886985778809, "learning_rate": 1.5216926648952667e-05, "loss": 1.7362, "step": 114270 }, { "epoch": 0.7182762752145817, "grad_norm": 5.957115650177002, "learning_rate": 1.5216507548008014e-05, "loss": 1.6076, "step": 114280 }, { "epoch": 0.7183391275312788, "grad_norm": 5.09821891784668, "learning_rate": 1.5216088447063361e-05, "loss": 1.6868, "step": 114290 }, { "epoch": 0.7184019798479759, "grad_norm": 6.222084045410156, "learning_rate": 1.5215669346118707e-05, "loss": 1.5835, "step": 114300 }, { "epoch": 0.718464832164673, "grad_norm": 7.457278728485107, "learning_rate": 1.5215250245174054e-05, "loss": 1.4989, "step": 114310 }, { "epoch": 0.7185276844813702, "grad_norm": 6.377392768859863, "learning_rate": 1.5214831144229401e-05, "loss": 1.6346, "step": 114320 }, { "epoch": 0.7185905367980673, "grad_norm": 7.023126125335693, "learning_rate": 1.5214412043284748e-05, "loss": 1.4804, "step": 114330 }, { "epoch": 0.7186533891147644, "grad_norm": 6.258632183074951, "learning_rate": 1.5213992942340092e-05, "loss": 1.8114, "step": 114340 }, { "epoch": 0.7187162414314615, "grad_norm": 6.734646797180176, "learning_rate": 1.5213573841395439e-05, "loss": 1.5873, "step": 114350 }, { "epoch": 0.7187790937481586, "grad_norm": 6.044432640075684, "learning_rate": 1.5213154740450786e-05, "loss": 1.7029, "step": 114360 }, { "epoch": 0.7188419460648557, "grad_norm": 6.947794437408447, "learning_rate": 1.5212735639506133e-05, "loss": 1.6726, "step": 114370 }, { "epoch": 0.7189047983815529, "grad_norm": 6.909889221191406, "learning_rate": 1.521231653856148e-05, "loss": 1.6194, "step": 114380 }, { "epoch": 0.71896765069825, "grad_norm": 6.474206447601318, "learning_rate": 1.5211897437616825e-05, "loss": 1.5683, "step": 114390 }, { "epoch": 0.7190305030149471, "grad_norm": 6.785498142242432, "learning_rate": 1.5211478336672172e-05, "loss": 1.8991, "step": 114400 }, { "epoch": 0.7190933553316442, "grad_norm": 6.298401832580566, "learning_rate": 1.521105923572752e-05, "loss": 1.61, "step": 114410 }, { "epoch": 0.7191562076483413, "grad_norm": 5.573666095733643, "learning_rate": 1.5210640134782867e-05, "loss": 1.6632, "step": 114420 }, { "epoch": 0.7192190599650384, "grad_norm": 7.085525035858154, "learning_rate": 1.521022103383821e-05, "loss": 1.5195, "step": 114430 }, { "epoch": 0.7192819122817355, "grad_norm": 6.482182025909424, "learning_rate": 1.5209801932893557e-05, "loss": 1.6174, "step": 114440 }, { "epoch": 0.7193447645984327, "grad_norm": 6.55352783203125, "learning_rate": 1.5209382831948904e-05, "loss": 1.5703, "step": 114450 }, { "epoch": 0.7194076169151298, "grad_norm": 7.438021659851074, "learning_rate": 1.5208963731004252e-05, "loss": 1.8293, "step": 114460 }, { "epoch": 0.7194704692318269, "grad_norm": 6.282613277435303, "learning_rate": 1.5208544630059599e-05, "loss": 1.6461, "step": 114470 }, { "epoch": 0.719533321548524, "grad_norm": 7.057091236114502, "learning_rate": 1.5208125529114944e-05, "loss": 1.5788, "step": 114480 }, { "epoch": 0.719596173865221, "grad_norm": 6.355682373046875, "learning_rate": 1.5207706428170291e-05, "loss": 1.7209, "step": 114490 }, { "epoch": 0.7196590261819181, "grad_norm": 6.700733184814453, "learning_rate": 1.5207287327225636e-05, "loss": 1.4957, "step": 114500 }, { "epoch": 0.7197218784986152, "grad_norm": 6.967093467712402, "learning_rate": 1.5206868226280983e-05, "loss": 1.5567, "step": 114510 }, { "epoch": 0.7197847308153124, "grad_norm": 5.767993927001953, "learning_rate": 1.5206449125336329e-05, "loss": 1.6891, "step": 114520 }, { "epoch": 0.7198475831320095, "grad_norm": 5.1797895431518555, "learning_rate": 1.5206030024391676e-05, "loss": 1.3634, "step": 114530 }, { "epoch": 0.7199104354487066, "grad_norm": 6.135411739349365, "learning_rate": 1.5205610923447023e-05, "loss": 1.7077, "step": 114540 }, { "epoch": 0.7199732877654037, "grad_norm": 7.227076530456543, "learning_rate": 1.520519182250237e-05, "loss": 1.7955, "step": 114550 }, { "epoch": 0.7200361400821008, "grad_norm": 5.616551399230957, "learning_rate": 1.520481463165218e-05, "loss": 1.7635, "step": 114560 }, { "epoch": 0.7200989923987979, "grad_norm": 5.438570022583008, "learning_rate": 1.5204395530707527e-05, "loss": 1.4608, "step": 114570 }, { "epoch": 0.720161844715495, "grad_norm": 6.289920330047607, "learning_rate": 1.5203976429762874e-05, "loss": 1.3727, "step": 114580 }, { "epoch": 0.7202246970321922, "grad_norm": 6.528743743896484, "learning_rate": 1.5203557328818221e-05, "loss": 1.6865, "step": 114590 }, { "epoch": 0.7202875493488893, "grad_norm": 6.531278133392334, "learning_rate": 1.5203138227873566e-05, "loss": 1.639, "step": 114600 }, { "epoch": 0.7203504016655864, "grad_norm": 7.219261646270752, "learning_rate": 1.5202719126928913e-05, "loss": 1.7829, "step": 114610 }, { "epoch": 0.7204132539822835, "grad_norm": 7.336671352386475, "learning_rate": 1.520230002598426e-05, "loss": 1.3718, "step": 114620 }, { "epoch": 0.7204761062989806, "grad_norm": 6.335206985473633, "learning_rate": 1.5201880925039608e-05, "loss": 1.5391, "step": 114630 }, { "epoch": 0.7205389586156777, "grad_norm": 7.5218505859375, "learning_rate": 1.5201461824094951e-05, "loss": 1.5025, "step": 114640 }, { "epoch": 0.7206018109323749, "grad_norm": 6.53808069229126, "learning_rate": 1.5201042723150298e-05, "loss": 1.5093, "step": 114650 }, { "epoch": 0.720664663249072, "grad_norm": 7.6231303215026855, "learning_rate": 1.5200623622205645e-05, "loss": 1.6067, "step": 114660 }, { "epoch": 0.7207275155657691, "grad_norm": 6.8570637702941895, "learning_rate": 1.5200204521260992e-05, "loss": 1.7287, "step": 114670 }, { "epoch": 0.7207903678824662, "grad_norm": 7.506631374359131, "learning_rate": 1.519978542031634e-05, "loss": 1.5974, "step": 114680 }, { "epoch": 0.7208532201991633, "grad_norm": 7.381155490875244, "learning_rate": 1.5199366319371685e-05, "loss": 1.7563, "step": 114690 }, { "epoch": 0.7209160725158604, "grad_norm": 6.632617473602295, "learning_rate": 1.5198947218427032e-05, "loss": 1.5796, "step": 114700 }, { "epoch": 0.7209789248325575, "grad_norm": 6.989343166351318, "learning_rate": 1.5198528117482379e-05, "loss": 1.6069, "step": 114710 }, { "epoch": 0.7210417771492547, "grad_norm": 7.608463287353516, "learning_rate": 1.5198109016537726e-05, "loss": 1.6091, "step": 114720 }, { "epoch": 0.7211046294659518, "grad_norm": 6.86716365814209, "learning_rate": 1.519768991559307e-05, "loss": 1.4456, "step": 114730 }, { "epoch": 0.7211674817826488, "grad_norm": 6.527399063110352, "learning_rate": 1.5197270814648417e-05, "loss": 1.6957, "step": 114740 }, { "epoch": 0.7212303340993459, "grad_norm": 8.976424217224121, "learning_rate": 1.5196851713703764e-05, "loss": 1.557, "step": 114750 }, { "epoch": 0.721293186416043, "grad_norm": 7.675748348236084, "learning_rate": 1.5196432612759111e-05, "loss": 1.8114, "step": 114760 }, { "epoch": 0.7213560387327401, "grad_norm": 7.14664363861084, "learning_rate": 1.5196013511814456e-05, "loss": 1.6201, "step": 114770 }, { "epoch": 0.7214188910494372, "grad_norm": 6.664819240570068, "learning_rate": 1.5195594410869803e-05, "loss": 1.7189, "step": 114780 }, { "epoch": 0.7214817433661344, "grad_norm": 8.908856391906738, "learning_rate": 1.519517530992515e-05, "loss": 1.7485, "step": 114790 }, { "epoch": 0.7215445956828315, "grad_norm": 5.326156139373779, "learning_rate": 1.5194756208980498e-05, "loss": 1.4227, "step": 114800 }, { "epoch": 0.7216074479995286, "grad_norm": 7.644719123840332, "learning_rate": 1.5194337108035843e-05, "loss": 1.7497, "step": 114810 }, { "epoch": 0.7216703003162257, "grad_norm": 5.888164520263672, "learning_rate": 1.5193918007091188e-05, "loss": 1.8029, "step": 114820 }, { "epoch": 0.7217331526329228, "grad_norm": 5.382526874542236, "learning_rate": 1.5193498906146535e-05, "loss": 1.5768, "step": 114830 }, { "epoch": 0.7217960049496199, "grad_norm": 7.742189407348633, "learning_rate": 1.5193079805201882e-05, "loss": 1.5128, "step": 114840 }, { "epoch": 0.721858857266317, "grad_norm": 7.856515884399414, "learning_rate": 1.519266070425723e-05, "loss": 1.5368, "step": 114850 }, { "epoch": 0.7219217095830142, "grad_norm": 7.214084148406982, "learning_rate": 1.5192241603312575e-05, "loss": 1.6149, "step": 114860 }, { "epoch": 0.7219845618997113, "grad_norm": 5.069043159484863, "learning_rate": 1.5191822502367922e-05, "loss": 1.4039, "step": 114870 }, { "epoch": 0.7220474142164084, "grad_norm": 6.424645900726318, "learning_rate": 1.5191403401423267e-05, "loss": 1.639, "step": 114880 }, { "epoch": 0.7221102665331055, "grad_norm": 7.57219123840332, "learning_rate": 1.5190984300478614e-05, "loss": 1.7926, "step": 114890 }, { "epoch": 0.7221731188498026, "grad_norm": 6.031521320343018, "learning_rate": 1.5190565199533961e-05, "loss": 1.784, "step": 114900 }, { "epoch": 0.7222359711664997, "grad_norm": 6.382801532745361, "learning_rate": 1.5190146098589307e-05, "loss": 1.6667, "step": 114910 }, { "epoch": 0.7222988234831969, "grad_norm": 6.570066452026367, "learning_rate": 1.5189726997644654e-05, "loss": 1.7855, "step": 114920 }, { "epoch": 0.722361675799894, "grad_norm": 6.704894065856934, "learning_rate": 1.5189307896700001e-05, "loss": 1.4182, "step": 114930 }, { "epoch": 0.7224245281165911, "grad_norm": 6.024482250213623, "learning_rate": 1.5188888795755348e-05, "loss": 1.5817, "step": 114940 }, { "epoch": 0.7224873804332882, "grad_norm": 8.764252662658691, "learning_rate": 1.5188469694810692e-05, "loss": 1.7421, "step": 114950 }, { "epoch": 0.7225502327499853, "grad_norm": 6.126072883605957, "learning_rate": 1.5188050593866039e-05, "loss": 1.6377, "step": 114960 }, { "epoch": 0.7226130850666824, "grad_norm": 5.632742404937744, "learning_rate": 1.5187631492921386e-05, "loss": 1.692, "step": 114970 }, { "epoch": 0.7226759373833795, "grad_norm": 5.550741672515869, "learning_rate": 1.5187212391976733e-05, "loss": 1.4384, "step": 114980 }, { "epoch": 0.7227387897000767, "grad_norm": 6.033601760864258, "learning_rate": 1.518679329103208e-05, "loss": 1.7988, "step": 114990 }, { "epoch": 0.7228016420167737, "grad_norm": 6.537342548370361, "learning_rate": 1.5186374190087425e-05, "loss": 1.64, "step": 115000 }, { "epoch": 0.7228644943334708, "grad_norm": 6.672082424163818, "learning_rate": 1.5185955089142772e-05, "loss": 1.2782, "step": 115010 }, { "epoch": 0.7229273466501679, "grad_norm": 6.4160075187683105, "learning_rate": 1.518553598819812e-05, "loss": 1.5656, "step": 115020 }, { "epoch": 0.722990198966865, "grad_norm": 6.561112403869629, "learning_rate": 1.5185116887253467e-05, "loss": 1.8092, "step": 115030 }, { "epoch": 0.7230530512835621, "grad_norm": 6.481167316436768, "learning_rate": 1.518469778630881e-05, "loss": 1.7376, "step": 115040 }, { "epoch": 0.7231159036002592, "grad_norm": 5.722866058349609, "learning_rate": 1.5184278685364157e-05, "loss": 1.431, "step": 115050 }, { "epoch": 0.7231787559169564, "grad_norm": 6.623687267303467, "learning_rate": 1.5183859584419504e-05, "loss": 1.5459, "step": 115060 }, { "epoch": 0.7232416082336535, "grad_norm": 6.44396448135376, "learning_rate": 1.5183440483474852e-05, "loss": 1.4087, "step": 115070 }, { "epoch": 0.7233044605503506, "grad_norm": 5.95437479019165, "learning_rate": 1.5183021382530197e-05, "loss": 1.6956, "step": 115080 }, { "epoch": 0.7233673128670477, "grad_norm": 5.641931056976318, "learning_rate": 1.5182602281585544e-05, "loss": 1.452, "step": 115090 }, { "epoch": 0.7234301651837448, "grad_norm": 7.695308208465576, "learning_rate": 1.5182183180640891e-05, "loss": 1.5653, "step": 115100 }, { "epoch": 0.7234930175004419, "grad_norm": 6.782965660095215, "learning_rate": 1.5181764079696238e-05, "loss": 1.5719, "step": 115110 }, { "epoch": 0.723555869817139, "grad_norm": 6.327223300933838, "learning_rate": 1.5181344978751583e-05, "loss": 1.5051, "step": 115120 }, { "epoch": 0.7236187221338362, "grad_norm": 6.862222194671631, "learning_rate": 1.5180925877806929e-05, "loss": 1.7584, "step": 115130 }, { "epoch": 0.7236815744505333, "grad_norm": 6.9344635009765625, "learning_rate": 1.5180506776862276e-05, "loss": 1.7514, "step": 115140 }, { "epoch": 0.7237444267672304, "grad_norm": 6.590836048126221, "learning_rate": 1.5180087675917623e-05, "loss": 1.5174, "step": 115150 }, { "epoch": 0.7238072790839275, "grad_norm": 6.448301792144775, "learning_rate": 1.517966857497297e-05, "loss": 1.7572, "step": 115160 }, { "epoch": 0.7238701314006246, "grad_norm": 7.644365310668945, "learning_rate": 1.5179249474028315e-05, "loss": 1.4495, "step": 115170 }, { "epoch": 0.7239329837173217, "grad_norm": 7.2462663650512695, "learning_rate": 1.5178830373083663e-05, "loss": 1.7079, "step": 115180 }, { "epoch": 0.7239958360340188, "grad_norm": 6.857507228851318, "learning_rate": 1.5178411272139008e-05, "loss": 1.5334, "step": 115190 }, { "epoch": 0.724058688350716, "grad_norm": 7.825425624847412, "learning_rate": 1.5177992171194355e-05, "loss": 1.486, "step": 115200 }, { "epoch": 0.7241215406674131, "grad_norm": 7.303027629852295, "learning_rate": 1.5177573070249702e-05, "loss": 1.6061, "step": 115210 }, { "epoch": 0.7241843929841102, "grad_norm": 6.9621262550354, "learning_rate": 1.5177153969305047e-05, "loss": 1.8309, "step": 115220 }, { "epoch": 0.7242472453008073, "grad_norm": 5.888366222381592, "learning_rate": 1.5176734868360394e-05, "loss": 1.5802, "step": 115230 }, { "epoch": 0.7243100976175044, "grad_norm": 7.329326629638672, "learning_rate": 1.5176315767415742e-05, "loss": 1.7735, "step": 115240 }, { "epoch": 0.7243729499342014, "grad_norm": 7.154123306274414, "learning_rate": 1.5175896666471089e-05, "loss": 1.6783, "step": 115250 }, { "epoch": 0.7244358022508985, "grad_norm": 6.366217613220215, "learning_rate": 1.5175477565526432e-05, "loss": 1.6962, "step": 115260 }, { "epoch": 0.7244986545675957, "grad_norm": 7.0050177574157715, "learning_rate": 1.517505846458178e-05, "loss": 1.6533, "step": 115270 }, { "epoch": 0.7245615068842928, "grad_norm": 6.864089488983154, "learning_rate": 1.5174639363637126e-05, "loss": 1.6909, "step": 115280 }, { "epoch": 0.7246243592009899, "grad_norm": 6.635605335235596, "learning_rate": 1.5174220262692474e-05, "loss": 1.4402, "step": 115290 }, { "epoch": 0.724687211517687, "grad_norm": 6.629925727844238, "learning_rate": 1.517380116174782e-05, "loss": 1.5788, "step": 115300 }, { "epoch": 0.7247500638343841, "grad_norm": 6.296230316162109, "learning_rate": 1.5173382060803166e-05, "loss": 1.5403, "step": 115310 }, { "epoch": 0.7248129161510812, "grad_norm": 6.700946807861328, "learning_rate": 1.5172962959858513e-05, "loss": 1.8149, "step": 115320 }, { "epoch": 0.7248757684677783, "grad_norm": 6.950066566467285, "learning_rate": 1.517254385891386e-05, "loss": 1.498, "step": 115330 }, { "epoch": 0.7249386207844755, "grad_norm": 6.15997314453125, "learning_rate": 1.5172124757969207e-05, "loss": 1.4936, "step": 115340 }, { "epoch": 0.7250014731011726, "grad_norm": 6.174920082092285, "learning_rate": 1.5171705657024551e-05, "loss": 1.9595, "step": 115350 }, { "epoch": 0.7250643254178697, "grad_norm": 6.02477502822876, "learning_rate": 1.5171286556079898e-05, "loss": 1.5151, "step": 115360 }, { "epoch": 0.7251271777345668, "grad_norm": 7.505553245544434, "learning_rate": 1.5170867455135245e-05, "loss": 1.6931, "step": 115370 }, { "epoch": 0.7251900300512639, "grad_norm": 6.075516700744629, "learning_rate": 1.5170448354190592e-05, "loss": 1.5881, "step": 115380 }, { "epoch": 0.725252882367961, "grad_norm": 7.066200256347656, "learning_rate": 1.5170029253245937e-05, "loss": 1.6859, "step": 115390 }, { "epoch": 0.7253157346846582, "grad_norm": 6.309739112854004, "learning_rate": 1.5169610152301285e-05, "loss": 1.67, "step": 115400 }, { "epoch": 0.7253785870013553, "grad_norm": 6.822290420532227, "learning_rate": 1.5169191051356632e-05, "loss": 1.6519, "step": 115410 }, { "epoch": 0.7254414393180524, "grad_norm": 6.343250274658203, "learning_rate": 1.5168771950411979e-05, "loss": 1.725, "step": 115420 }, { "epoch": 0.7255042916347495, "grad_norm": 6.2186689376831055, "learning_rate": 1.5168352849467326e-05, "loss": 1.5672, "step": 115430 }, { "epoch": 0.7255671439514466, "grad_norm": 6.9470367431640625, "learning_rate": 1.516793374852267e-05, "loss": 1.6098, "step": 115440 }, { "epoch": 0.7256299962681437, "grad_norm": 6.316976070404053, "learning_rate": 1.5167514647578016e-05, "loss": 1.4436, "step": 115450 }, { "epoch": 0.7256928485848408, "grad_norm": 6.690842628479004, "learning_rate": 1.5167095546633364e-05, "loss": 1.5943, "step": 115460 }, { "epoch": 0.725755700901538, "grad_norm": 7.384859085083008, "learning_rate": 1.516667644568871e-05, "loss": 1.6767, "step": 115470 }, { "epoch": 0.7258185532182351, "grad_norm": 5.442703723907471, "learning_rate": 1.5166257344744056e-05, "loss": 1.4782, "step": 115480 }, { "epoch": 0.7258814055349322, "grad_norm": 7.1188130378723145, "learning_rate": 1.5165838243799403e-05, "loss": 1.7413, "step": 115490 }, { "epoch": 0.7259442578516293, "grad_norm": 6.402884483337402, "learning_rate": 1.5165419142854748e-05, "loss": 1.4255, "step": 115500 }, { "epoch": 0.7260071101683263, "grad_norm": 6.383247375488281, "learning_rate": 1.5165000041910096e-05, "loss": 1.8272, "step": 115510 }, { "epoch": 0.7260699624850234, "grad_norm": 5.225858211517334, "learning_rate": 1.5164580940965443e-05, "loss": 1.489, "step": 115520 }, { "epoch": 0.7261328148017205, "grad_norm": 7.787111759185791, "learning_rate": 1.5164161840020788e-05, "loss": 1.6115, "step": 115530 }, { "epoch": 0.7261956671184177, "grad_norm": 6.8042802810668945, "learning_rate": 1.5163742739076135e-05, "loss": 1.7215, "step": 115540 }, { "epoch": 0.7262585194351148, "grad_norm": 7.336939811706543, "learning_rate": 1.5163323638131482e-05, "loss": 1.6679, "step": 115550 }, { "epoch": 0.7263213717518119, "grad_norm": 5.344560146331787, "learning_rate": 1.516290453718683e-05, "loss": 1.6706, "step": 115560 }, { "epoch": 0.726384224068509, "grad_norm": 7.378252029418945, "learning_rate": 1.5162485436242173e-05, "loss": 1.6384, "step": 115570 }, { "epoch": 0.7264470763852061, "grad_norm": 5.6592254638671875, "learning_rate": 1.516206633529752e-05, "loss": 1.7509, "step": 115580 }, { "epoch": 0.7265099287019032, "grad_norm": 6.7056403160095215, "learning_rate": 1.5161647234352867e-05, "loss": 1.6448, "step": 115590 }, { "epoch": 0.7265727810186003, "grad_norm": 7.759737014770508, "learning_rate": 1.5161228133408214e-05, "loss": 1.4573, "step": 115600 }, { "epoch": 0.7266356333352975, "grad_norm": 7.28309965133667, "learning_rate": 1.5160809032463561e-05, "loss": 1.6218, "step": 115610 }, { "epoch": 0.7266984856519946, "grad_norm": 6.890913963317871, "learning_rate": 1.5160389931518907e-05, "loss": 1.8592, "step": 115620 }, { "epoch": 0.7267613379686917, "grad_norm": 7.0204362869262695, "learning_rate": 1.5159970830574254e-05, "loss": 1.8612, "step": 115630 }, { "epoch": 0.7268241902853888, "grad_norm": 5.921641826629639, "learning_rate": 1.51595517296296e-05, "loss": 1.5616, "step": 115640 }, { "epoch": 0.7268870426020859, "grad_norm": 6.536056041717529, "learning_rate": 1.5159132628684948e-05, "loss": 1.6371, "step": 115650 }, { "epoch": 0.726949894918783, "grad_norm": 6.2884063720703125, "learning_rate": 1.5158713527740291e-05, "loss": 1.8226, "step": 115660 }, { "epoch": 0.7270127472354802, "grad_norm": 5.8661699295043945, "learning_rate": 1.5158294426795638e-05, "loss": 1.4365, "step": 115670 }, { "epoch": 0.7270755995521773, "grad_norm": 7.0693254470825195, "learning_rate": 1.5157875325850986e-05, "loss": 1.6888, "step": 115680 }, { "epoch": 0.7271384518688744, "grad_norm": 6.45350980758667, "learning_rate": 1.5157456224906333e-05, "loss": 1.6169, "step": 115690 }, { "epoch": 0.7272013041855715, "grad_norm": 5.505177021026611, "learning_rate": 1.5157037123961678e-05, "loss": 1.4611, "step": 115700 }, { "epoch": 0.7272641565022686, "grad_norm": 6.742366313934326, "learning_rate": 1.5156618023017025e-05, "loss": 1.6813, "step": 115710 }, { "epoch": 0.7273270088189657, "grad_norm": 6.366575717926025, "learning_rate": 1.5156198922072372e-05, "loss": 1.5547, "step": 115720 }, { "epoch": 0.7273898611356628, "grad_norm": 5.64211368560791, "learning_rate": 1.515577982112772e-05, "loss": 1.4973, "step": 115730 }, { "epoch": 0.72745271345236, "grad_norm": 6.600712299346924, "learning_rate": 1.5155360720183066e-05, "loss": 1.5888, "step": 115740 }, { "epoch": 0.7275155657690571, "grad_norm": 6.504429817199707, "learning_rate": 1.515494161923841e-05, "loss": 1.6392, "step": 115750 }, { "epoch": 0.7275784180857541, "grad_norm": 5.690182685852051, "learning_rate": 1.5154522518293757e-05, "loss": 1.4292, "step": 115760 }, { "epoch": 0.7276412704024512, "grad_norm": 7.890905380249023, "learning_rate": 1.5154103417349104e-05, "loss": 1.8914, "step": 115770 }, { "epoch": 0.7277041227191483, "grad_norm": 5.442069053649902, "learning_rate": 1.5153684316404451e-05, "loss": 1.4884, "step": 115780 }, { "epoch": 0.7277669750358454, "grad_norm": 7.099849700927734, "learning_rate": 1.5153265215459797e-05, "loss": 1.7281, "step": 115790 }, { "epoch": 0.7278298273525425, "grad_norm": 7.283675670623779, "learning_rate": 1.5152846114515144e-05, "loss": 1.6031, "step": 115800 }, { "epoch": 0.7278926796692397, "grad_norm": 5.814444541931152, "learning_rate": 1.5152427013570489e-05, "loss": 1.5404, "step": 115810 }, { "epoch": 0.7279555319859368, "grad_norm": 6.222223281860352, "learning_rate": 1.5152007912625836e-05, "loss": 1.4462, "step": 115820 }, { "epoch": 0.7280183843026339, "grad_norm": 8.399040222167969, "learning_rate": 1.5151588811681183e-05, "loss": 1.7329, "step": 115830 }, { "epoch": 0.728081236619331, "grad_norm": 7.937182903289795, "learning_rate": 1.5151169710736529e-05, "loss": 1.581, "step": 115840 }, { "epoch": 0.7281440889360281, "grad_norm": 6.623231410980225, "learning_rate": 1.5150750609791876e-05, "loss": 1.7026, "step": 115850 }, { "epoch": 0.7282069412527252, "grad_norm": 6.111772060394287, "learning_rate": 1.5150331508847223e-05, "loss": 1.4106, "step": 115860 }, { "epoch": 0.7282697935694223, "grad_norm": 5.90593147277832, "learning_rate": 1.514991240790257e-05, "loss": 1.7615, "step": 115870 }, { "epoch": 0.7283326458861195, "grad_norm": 5.499425888061523, "learning_rate": 1.5149493306957913e-05, "loss": 1.7468, "step": 115880 }, { "epoch": 0.7283954982028166, "grad_norm": 6.455041408538818, "learning_rate": 1.514907420601326e-05, "loss": 1.7094, "step": 115890 }, { "epoch": 0.7284583505195137, "grad_norm": 6.752415657043457, "learning_rate": 1.5148655105068608e-05, "loss": 1.6646, "step": 115900 }, { "epoch": 0.7285212028362108, "grad_norm": 7.045289039611816, "learning_rate": 1.5148236004123955e-05, "loss": 1.8899, "step": 115910 }, { "epoch": 0.7285840551529079, "grad_norm": 7.456486225128174, "learning_rate": 1.5147816903179302e-05, "loss": 1.7274, "step": 115920 }, { "epoch": 0.728646907469605, "grad_norm": 5.679928779602051, "learning_rate": 1.5147397802234647e-05, "loss": 1.7484, "step": 115930 }, { "epoch": 0.7287097597863021, "grad_norm": 6.834784984588623, "learning_rate": 1.5146978701289994e-05, "loss": 1.5582, "step": 115940 }, { "epoch": 0.7287726121029993, "grad_norm": 6.277487277984619, "learning_rate": 1.5146559600345341e-05, "loss": 1.6715, "step": 115950 }, { "epoch": 0.7288354644196964, "grad_norm": 7.477523326873779, "learning_rate": 1.5146140499400688e-05, "loss": 1.559, "step": 115960 }, { "epoch": 0.7288983167363935, "grad_norm": 6.518509864807129, "learning_rate": 1.5145721398456032e-05, "loss": 1.4773, "step": 115970 }, { "epoch": 0.7289611690530906, "grad_norm": 6.6805572509765625, "learning_rate": 1.5145302297511379e-05, "loss": 1.6848, "step": 115980 }, { "epoch": 0.7290240213697877, "grad_norm": 5.526790618896484, "learning_rate": 1.5144883196566726e-05, "loss": 1.5908, "step": 115990 }, { "epoch": 0.7290868736864848, "grad_norm": 6.3358588218688965, "learning_rate": 1.5144464095622073e-05, "loss": 1.6823, "step": 116000 }, { "epoch": 0.729149726003182, "grad_norm": 6.603145122528076, "learning_rate": 1.5144044994677419e-05, "loss": 1.569, "step": 116010 }, { "epoch": 0.729212578319879, "grad_norm": 7.549184799194336, "learning_rate": 1.5143625893732766e-05, "loss": 1.677, "step": 116020 }, { "epoch": 0.7292754306365761, "grad_norm": 5.670623779296875, "learning_rate": 1.5143206792788113e-05, "loss": 1.6699, "step": 116030 }, { "epoch": 0.7293382829532732, "grad_norm": 6.9575371742248535, "learning_rate": 1.514278769184346e-05, "loss": 1.4866, "step": 116040 }, { "epoch": 0.7294011352699703, "grad_norm": 6.037784576416016, "learning_rate": 1.5142368590898807e-05, "loss": 1.7322, "step": 116050 }, { "epoch": 0.7294639875866674, "grad_norm": 7.633106231689453, "learning_rate": 1.514194948995415e-05, "loss": 1.5799, "step": 116060 }, { "epoch": 0.7295268399033645, "grad_norm": 7.580491065979004, "learning_rate": 1.5141530389009498e-05, "loss": 1.7369, "step": 116070 }, { "epoch": 0.7295896922200616, "grad_norm": 5.560009956359863, "learning_rate": 1.5141111288064845e-05, "loss": 1.5047, "step": 116080 }, { "epoch": 0.7296525445367588, "grad_norm": 6.160469055175781, "learning_rate": 1.5140692187120192e-05, "loss": 1.5171, "step": 116090 }, { "epoch": 0.7297153968534559, "grad_norm": 6.144357204437256, "learning_rate": 1.5140273086175537e-05, "loss": 1.4165, "step": 116100 }, { "epoch": 0.729778249170153, "grad_norm": 7.0018815994262695, "learning_rate": 1.5139853985230884e-05, "loss": 1.4202, "step": 116110 }, { "epoch": 0.7298411014868501, "grad_norm": 6.264974594116211, "learning_rate": 1.5139434884286231e-05, "loss": 1.5731, "step": 116120 }, { "epoch": 0.7299039538035472, "grad_norm": 6.4756550788879395, "learning_rate": 1.5139015783341577e-05, "loss": 1.7918, "step": 116130 }, { "epoch": 0.7299668061202443, "grad_norm": 6.437472820281982, "learning_rate": 1.5138596682396924e-05, "loss": 1.9329, "step": 116140 }, { "epoch": 0.7300296584369415, "grad_norm": 5.813361644744873, "learning_rate": 1.5138177581452269e-05, "loss": 1.6144, "step": 116150 }, { "epoch": 0.7300925107536386, "grad_norm": 7.2174787521362305, "learning_rate": 1.5137758480507616e-05, "loss": 1.7227, "step": 116160 }, { "epoch": 0.7301553630703357, "grad_norm": 7.056854248046875, "learning_rate": 1.5137339379562963e-05, "loss": 1.6733, "step": 116170 }, { "epoch": 0.7302182153870328, "grad_norm": 6.974052906036377, "learning_rate": 1.513692027861831e-05, "loss": 1.6408, "step": 116180 }, { "epoch": 0.7302810677037299, "grad_norm": 7.3612518310546875, "learning_rate": 1.5136501177673654e-05, "loss": 1.569, "step": 116190 }, { "epoch": 0.730343920020427, "grad_norm": 7.1358323097229, "learning_rate": 1.5136082076729001e-05, "loss": 1.6857, "step": 116200 }, { "epoch": 0.7304067723371241, "grad_norm": 5.359313011169434, "learning_rate": 1.5135662975784348e-05, "loss": 1.475, "step": 116210 }, { "epoch": 0.7304696246538213, "grad_norm": 6.692825794219971, "learning_rate": 1.5135243874839695e-05, "loss": 1.5207, "step": 116220 }, { "epoch": 0.7305324769705184, "grad_norm": 6.0015363693237305, "learning_rate": 1.5134824773895042e-05, "loss": 1.6661, "step": 116230 }, { "epoch": 0.7305953292872155, "grad_norm": 5.568943500518799, "learning_rate": 1.5134405672950388e-05, "loss": 1.5029, "step": 116240 }, { "epoch": 0.7306581816039126, "grad_norm": 6.180572032928467, "learning_rate": 1.5133986572005735e-05, "loss": 1.6326, "step": 116250 }, { "epoch": 0.7307210339206097, "grad_norm": 6.116061687469482, "learning_rate": 1.5133567471061082e-05, "loss": 1.5924, "step": 116260 }, { "epoch": 0.7307838862373067, "grad_norm": 5.452944755554199, "learning_rate": 1.5133148370116429e-05, "loss": 1.5732, "step": 116270 }, { "epoch": 0.7308467385540038, "grad_norm": 5.4776787757873535, "learning_rate": 1.5132729269171773e-05, "loss": 1.5846, "step": 116280 }, { "epoch": 0.730909590870701, "grad_norm": 7.125429153442383, "learning_rate": 1.513231016822712e-05, "loss": 1.6662, "step": 116290 }, { "epoch": 0.7309724431873981, "grad_norm": 6.490479946136475, "learning_rate": 1.5131891067282467e-05, "loss": 1.5348, "step": 116300 }, { "epoch": 0.7310352955040952, "grad_norm": 5.33502197265625, "learning_rate": 1.5131471966337814e-05, "loss": 1.6706, "step": 116310 }, { "epoch": 0.7310981478207923, "grad_norm": 6.439328670501709, "learning_rate": 1.5131052865393159e-05, "loss": 1.9088, "step": 116320 }, { "epoch": 0.7311610001374894, "grad_norm": 6.690783500671387, "learning_rate": 1.5130633764448506e-05, "loss": 1.8729, "step": 116330 }, { "epoch": 0.7312238524541865, "grad_norm": 6.990403652191162, "learning_rate": 1.5130214663503853e-05, "loss": 1.5862, "step": 116340 }, { "epoch": 0.7312867047708836, "grad_norm": 7.408207416534424, "learning_rate": 1.51297955625592e-05, "loss": 1.6638, "step": 116350 }, { "epoch": 0.7313495570875808, "grad_norm": 6.472781658172607, "learning_rate": 1.5129376461614547e-05, "loss": 1.6145, "step": 116360 }, { "epoch": 0.7314124094042779, "grad_norm": 5.800587177276611, "learning_rate": 1.5128957360669891e-05, "loss": 1.7008, "step": 116370 }, { "epoch": 0.731475261720975, "grad_norm": 5.939591407775879, "learning_rate": 1.5128538259725238e-05, "loss": 1.6694, "step": 116380 }, { "epoch": 0.7315381140376721, "grad_norm": 6.2899956703186035, "learning_rate": 1.5128119158780585e-05, "loss": 1.6128, "step": 116390 }, { "epoch": 0.7316009663543692, "grad_norm": 6.833620548248291, "learning_rate": 1.5127700057835932e-05, "loss": 1.7073, "step": 116400 }, { "epoch": 0.7316638186710663, "grad_norm": 5.662091255187988, "learning_rate": 1.5127280956891278e-05, "loss": 1.6163, "step": 116410 }, { "epoch": 0.7317266709877635, "grad_norm": 6.265413284301758, "learning_rate": 1.5126861855946625e-05, "loss": 1.6779, "step": 116420 }, { "epoch": 0.7317895233044606, "grad_norm": 6.575082778930664, "learning_rate": 1.5126442755001972e-05, "loss": 1.6361, "step": 116430 }, { "epoch": 0.7318523756211577, "grad_norm": 5.75390100479126, "learning_rate": 1.5126023654057317e-05, "loss": 1.6087, "step": 116440 }, { "epoch": 0.7319152279378548, "grad_norm": 6.973175525665283, "learning_rate": 1.5125604553112664e-05, "loss": 1.6973, "step": 116450 }, { "epoch": 0.7319780802545519, "grad_norm": 5.981052875518799, "learning_rate": 1.512518545216801e-05, "loss": 1.6895, "step": 116460 }, { "epoch": 0.732040932571249, "grad_norm": 5.5195746421813965, "learning_rate": 1.5124766351223357e-05, "loss": 1.4089, "step": 116470 }, { "epoch": 0.7321037848879461, "grad_norm": 7.317570209503174, "learning_rate": 1.5124347250278704e-05, "loss": 1.729, "step": 116480 }, { "epoch": 0.7321666372046433, "grad_norm": 6.834884166717529, "learning_rate": 1.512392814933405e-05, "loss": 1.5518, "step": 116490 }, { "epoch": 0.7322294895213404, "grad_norm": 6.461305141448975, "learning_rate": 1.5123509048389396e-05, "loss": 1.5368, "step": 116500 }, { "epoch": 0.7322923418380375, "grad_norm": 6.329416751861572, "learning_rate": 1.5123089947444742e-05, "loss": 1.6686, "step": 116510 }, { "epoch": 0.7323551941547346, "grad_norm": 5.427972316741943, "learning_rate": 1.5122670846500089e-05, "loss": 1.7472, "step": 116520 }, { "epoch": 0.7324180464714316, "grad_norm": 7.150508880615234, "learning_rate": 1.5122251745555436e-05, "loss": 1.3206, "step": 116530 }, { "epoch": 0.7324808987881287, "grad_norm": 7.156881809234619, "learning_rate": 1.5121832644610783e-05, "loss": 1.5712, "step": 116540 }, { "epoch": 0.7325437511048258, "grad_norm": 6.0363335609436035, "learning_rate": 1.5121413543666128e-05, "loss": 1.6576, "step": 116550 }, { "epoch": 0.732606603421523, "grad_norm": 5.923098087310791, "learning_rate": 1.5120994442721475e-05, "loss": 1.568, "step": 116560 }, { "epoch": 0.7326694557382201, "grad_norm": 6.134315490722656, "learning_rate": 1.5120575341776822e-05, "loss": 1.3467, "step": 116570 }, { "epoch": 0.7327323080549172, "grad_norm": 5.646820068359375, "learning_rate": 1.512015624083217e-05, "loss": 1.672, "step": 116580 }, { "epoch": 0.7327951603716143, "grad_norm": 6.210283279418945, "learning_rate": 1.5119737139887513e-05, "loss": 1.6509, "step": 116590 }, { "epoch": 0.7328580126883114, "grad_norm": 6.5223469734191895, "learning_rate": 1.511931803894286e-05, "loss": 1.7307, "step": 116600 }, { "epoch": 0.7329208650050085, "grad_norm": 5.664274215698242, "learning_rate": 1.5118898937998207e-05, "loss": 1.5186, "step": 116610 }, { "epoch": 0.7329837173217056, "grad_norm": 6.588070392608643, "learning_rate": 1.5118479837053554e-05, "loss": 1.4739, "step": 116620 }, { "epoch": 0.7330465696384028, "grad_norm": 7.984457492828369, "learning_rate": 1.51180607361089e-05, "loss": 1.5591, "step": 116630 }, { "epoch": 0.7331094219550999, "grad_norm": 6.535252094268799, "learning_rate": 1.5117641635164247e-05, "loss": 1.5877, "step": 116640 }, { "epoch": 0.733172274271797, "grad_norm": 8.361675262451172, "learning_rate": 1.5117222534219594e-05, "loss": 1.5656, "step": 116650 }, { "epoch": 0.7332351265884941, "grad_norm": 5.582570552825928, "learning_rate": 1.5116803433274941e-05, "loss": 1.4592, "step": 116660 }, { "epoch": 0.7332979789051912, "grad_norm": 6.3676533699035645, "learning_rate": 1.5116384332330288e-05, "loss": 1.6096, "step": 116670 }, { "epoch": 0.7333608312218883, "grad_norm": 5.959659576416016, "learning_rate": 1.5115965231385632e-05, "loss": 1.6329, "step": 116680 }, { "epoch": 0.7334236835385854, "grad_norm": 6.887051105499268, "learning_rate": 1.5115546130440979e-05, "loss": 1.8432, "step": 116690 }, { "epoch": 0.7334865358552826, "grad_norm": 7.40312385559082, "learning_rate": 1.5115127029496326e-05, "loss": 1.5736, "step": 116700 }, { "epoch": 0.7335493881719797, "grad_norm": 5.859505653381348, "learning_rate": 1.5114707928551673e-05, "loss": 1.4872, "step": 116710 }, { "epoch": 0.7336122404886768, "grad_norm": 6.143748760223389, "learning_rate": 1.5114288827607018e-05, "loss": 1.5369, "step": 116720 }, { "epoch": 0.7336750928053739, "grad_norm": 6.233851432800293, "learning_rate": 1.5113869726662365e-05, "loss": 1.3973, "step": 116730 }, { "epoch": 0.733737945122071, "grad_norm": 6.276238441467285, "learning_rate": 1.5113450625717712e-05, "loss": 1.5194, "step": 116740 }, { "epoch": 0.7338007974387681, "grad_norm": 7.023739814758301, "learning_rate": 1.5113031524773058e-05, "loss": 1.5849, "step": 116750 }, { "epoch": 0.7338636497554653, "grad_norm": 6.218929767608643, "learning_rate": 1.5112612423828405e-05, "loss": 1.4176, "step": 116760 }, { "epoch": 0.7339265020721624, "grad_norm": 6.902184963226318, "learning_rate": 1.511219332288375e-05, "loss": 1.5564, "step": 116770 }, { "epoch": 0.7339893543888594, "grad_norm": 6.392886638641357, "learning_rate": 1.5111774221939097e-05, "loss": 1.674, "step": 116780 }, { "epoch": 0.7340522067055565, "grad_norm": 6.936563014984131, "learning_rate": 1.5111355120994444e-05, "loss": 1.6001, "step": 116790 }, { "epoch": 0.7341150590222536, "grad_norm": 7.19557523727417, "learning_rate": 1.5110936020049791e-05, "loss": 1.646, "step": 116800 }, { "epoch": 0.7341779113389507, "grad_norm": 7.987978935241699, "learning_rate": 1.5110516919105137e-05, "loss": 1.5771, "step": 116810 }, { "epoch": 0.7342407636556478, "grad_norm": 5.6326189041137695, "learning_rate": 1.5110097818160482e-05, "loss": 1.6144, "step": 116820 }, { "epoch": 0.734303615972345, "grad_norm": 8.156567573547363, "learning_rate": 1.510967871721583e-05, "loss": 1.568, "step": 116830 }, { "epoch": 0.7343664682890421, "grad_norm": 5.975371360778809, "learning_rate": 1.5109259616271176e-05, "loss": 1.5546, "step": 116840 }, { "epoch": 0.7344293206057392, "grad_norm": 6.602675437927246, "learning_rate": 1.5108840515326523e-05, "loss": 1.661, "step": 116850 }, { "epoch": 0.7344921729224363, "grad_norm": 5.703989028930664, "learning_rate": 1.5108421414381869e-05, "loss": 1.5246, "step": 116860 }, { "epoch": 0.7345550252391334, "grad_norm": 6.698914051055908, "learning_rate": 1.5108002313437216e-05, "loss": 1.6645, "step": 116870 }, { "epoch": 0.7346178775558305, "grad_norm": 5.767265796661377, "learning_rate": 1.5107583212492563e-05, "loss": 1.478, "step": 116880 }, { "epoch": 0.7346807298725276, "grad_norm": 6.4239821434021, "learning_rate": 1.510716411154791e-05, "loss": 1.6222, "step": 116890 }, { "epoch": 0.7347435821892248, "grad_norm": 6.624935150146484, "learning_rate": 1.5106745010603254e-05, "loss": 1.8779, "step": 116900 }, { "epoch": 0.7348064345059219, "grad_norm": 6.570802211761475, "learning_rate": 1.51063259096586e-05, "loss": 1.6508, "step": 116910 }, { "epoch": 0.734869286822619, "grad_norm": 6.047118663787842, "learning_rate": 1.5105906808713948e-05, "loss": 1.9309, "step": 116920 }, { "epoch": 0.7349321391393161, "grad_norm": 6.4321699142456055, "learning_rate": 1.5105487707769295e-05, "loss": 1.6348, "step": 116930 }, { "epoch": 0.7349949914560132, "grad_norm": 7.588624477386475, "learning_rate": 1.510506860682464e-05, "loss": 1.6869, "step": 116940 }, { "epoch": 0.7350578437727103, "grad_norm": 8.491658210754395, "learning_rate": 1.5104649505879987e-05, "loss": 1.9243, "step": 116950 }, { "epoch": 0.7351206960894074, "grad_norm": 5.338583946228027, "learning_rate": 1.5104230404935334e-05, "loss": 1.6569, "step": 116960 }, { "epoch": 0.7351835484061046, "grad_norm": 5.248413562774658, "learning_rate": 1.5103811303990681e-05, "loss": 1.6199, "step": 116970 }, { "epoch": 0.7352464007228017, "grad_norm": 6.071453094482422, "learning_rate": 1.5103392203046028e-05, "loss": 1.6443, "step": 116980 }, { "epoch": 0.7353092530394988, "grad_norm": 5.989220142364502, "learning_rate": 1.5102973102101372e-05, "loss": 1.4356, "step": 116990 }, { "epoch": 0.7353721053561959, "grad_norm": 6.5885210037231445, "learning_rate": 1.510255400115672e-05, "loss": 1.4912, "step": 117000 }, { "epoch": 0.735434957672893, "grad_norm": 8.531759262084961, "learning_rate": 1.5102134900212066e-05, "loss": 1.5257, "step": 117010 }, { "epoch": 0.7354978099895901, "grad_norm": 6.679140567779541, "learning_rate": 1.5101715799267413e-05, "loss": 1.6421, "step": 117020 }, { "epoch": 0.7355606623062872, "grad_norm": 5.821613311767578, "learning_rate": 1.5101296698322759e-05, "loss": 1.4254, "step": 117030 }, { "epoch": 0.7356235146229843, "grad_norm": 6.059935569763184, "learning_rate": 1.5100877597378106e-05, "loss": 1.5685, "step": 117040 }, { "epoch": 0.7356863669396814, "grad_norm": 7.621307373046875, "learning_rate": 1.5100458496433453e-05, "loss": 1.6196, "step": 117050 }, { "epoch": 0.7357492192563785, "grad_norm": 6.805684566497803, "learning_rate": 1.51000393954888e-05, "loss": 1.9584, "step": 117060 }, { "epoch": 0.7358120715730756, "grad_norm": 5.543520450592041, "learning_rate": 1.5099620294544145e-05, "loss": 1.5554, "step": 117070 }, { "epoch": 0.7358749238897727, "grad_norm": 6.204122066497803, "learning_rate": 1.509920119359949e-05, "loss": 1.7006, "step": 117080 }, { "epoch": 0.7359377762064698, "grad_norm": 5.589944362640381, "learning_rate": 1.5098782092654838e-05, "loss": 1.7825, "step": 117090 }, { "epoch": 0.7360006285231669, "grad_norm": 7.761539459228516, "learning_rate": 1.5098362991710185e-05, "loss": 1.4863, "step": 117100 }, { "epoch": 0.7360634808398641, "grad_norm": 8.452686309814453, "learning_rate": 1.5097943890765532e-05, "loss": 1.5597, "step": 117110 }, { "epoch": 0.7361263331565612, "grad_norm": 6.617719650268555, "learning_rate": 1.5097524789820877e-05, "loss": 1.7412, "step": 117120 }, { "epoch": 0.7361891854732583, "grad_norm": 5.494043350219727, "learning_rate": 1.5097105688876223e-05, "loss": 1.6649, "step": 117130 }, { "epoch": 0.7362520377899554, "grad_norm": 7.095428943634033, "learning_rate": 1.509668658793157e-05, "loss": 1.4653, "step": 117140 }, { "epoch": 0.7363148901066525, "grad_norm": 6.8330254554748535, "learning_rate": 1.5096267486986917e-05, "loss": 1.6437, "step": 117150 }, { "epoch": 0.7363777424233496, "grad_norm": 7.541816711425781, "learning_rate": 1.5095848386042264e-05, "loss": 1.9871, "step": 117160 }, { "epoch": 0.7364405947400468, "grad_norm": 6.407535552978516, "learning_rate": 1.509542928509761e-05, "loss": 1.5611, "step": 117170 }, { "epoch": 0.7365034470567439, "grad_norm": 6.419076442718506, "learning_rate": 1.5095010184152956e-05, "loss": 1.6267, "step": 117180 }, { "epoch": 0.736566299373441, "grad_norm": 7.079204082489014, "learning_rate": 1.5094591083208303e-05, "loss": 1.5027, "step": 117190 }, { "epoch": 0.7366291516901381, "grad_norm": 5.94259786605835, "learning_rate": 1.509417198226365e-05, "loss": 1.5696, "step": 117200 }, { "epoch": 0.7366920040068352, "grad_norm": 6.831979274749756, "learning_rate": 1.5093752881318994e-05, "loss": 1.8869, "step": 117210 }, { "epoch": 0.7367548563235323, "grad_norm": 6.6600751876831055, "learning_rate": 1.5093333780374341e-05, "loss": 1.5593, "step": 117220 }, { "epoch": 0.7368177086402294, "grad_norm": 6.9768524169921875, "learning_rate": 1.5092914679429688e-05, "loss": 1.5743, "step": 117230 }, { "epoch": 0.7368805609569266, "grad_norm": 7.4975104331970215, "learning_rate": 1.5092495578485035e-05, "loss": 1.7093, "step": 117240 }, { "epoch": 0.7369434132736237, "grad_norm": 7.1950507164001465, "learning_rate": 1.509207647754038e-05, "loss": 1.486, "step": 117250 }, { "epoch": 0.7370062655903208, "grad_norm": 6.349445343017578, "learning_rate": 1.5091657376595728e-05, "loss": 1.6197, "step": 117260 }, { "epoch": 0.7370691179070179, "grad_norm": 6.088489532470703, "learning_rate": 1.5091238275651075e-05, "loss": 1.5926, "step": 117270 }, { "epoch": 0.737131970223715, "grad_norm": 6.358327388763428, "learning_rate": 1.5090819174706422e-05, "loss": 1.752, "step": 117280 }, { "epoch": 0.737194822540412, "grad_norm": 7.0250372886657715, "learning_rate": 1.5090400073761769e-05, "loss": 1.7792, "step": 117290 }, { "epoch": 0.7372576748571091, "grad_norm": 6.476198196411133, "learning_rate": 1.5089980972817113e-05, "loss": 1.5934, "step": 117300 }, { "epoch": 0.7373205271738063, "grad_norm": 5.442954063415527, "learning_rate": 1.508956187187246e-05, "loss": 1.3738, "step": 117310 }, { "epoch": 0.7373833794905034, "grad_norm": 6.9679412841796875, "learning_rate": 1.5089142770927807e-05, "loss": 1.7288, "step": 117320 }, { "epoch": 0.7374462318072005, "grad_norm": 8.034341812133789, "learning_rate": 1.5088723669983154e-05, "loss": 1.6302, "step": 117330 }, { "epoch": 0.7375090841238976, "grad_norm": 7.061643600463867, "learning_rate": 1.50883045690385e-05, "loss": 1.6806, "step": 117340 }, { "epoch": 0.7375719364405947, "grad_norm": 7.586759090423584, "learning_rate": 1.5087885468093846e-05, "loss": 1.6545, "step": 117350 }, { "epoch": 0.7376347887572918, "grad_norm": 7.27079439163208, "learning_rate": 1.5087466367149193e-05, "loss": 1.6971, "step": 117360 }, { "epoch": 0.7376976410739889, "grad_norm": 6.415982246398926, "learning_rate": 1.508704726620454e-05, "loss": 1.5838, "step": 117370 }, { "epoch": 0.7377604933906861, "grad_norm": 5.629652976989746, "learning_rate": 1.5086628165259886e-05, "loss": 1.5687, "step": 117380 }, { "epoch": 0.7378233457073832, "grad_norm": 6.433467388153076, "learning_rate": 1.5086209064315231e-05, "loss": 1.6912, "step": 117390 }, { "epoch": 0.7378861980240803, "grad_norm": 6.706539154052734, "learning_rate": 1.5085789963370578e-05, "loss": 1.7466, "step": 117400 }, { "epoch": 0.7379490503407774, "grad_norm": 5.629751682281494, "learning_rate": 1.5085370862425925e-05, "loss": 1.5597, "step": 117410 }, { "epoch": 0.7380119026574745, "grad_norm": 6.2336249351501465, "learning_rate": 1.5084951761481272e-05, "loss": 1.5987, "step": 117420 }, { "epoch": 0.7380747549741716, "grad_norm": 6.122241497039795, "learning_rate": 1.5084532660536618e-05, "loss": 1.5728, "step": 117430 }, { "epoch": 0.7381376072908687, "grad_norm": 7.282014846801758, "learning_rate": 1.5084113559591965e-05, "loss": 1.7043, "step": 117440 }, { "epoch": 0.7382004596075659, "grad_norm": 6.606590270996094, "learning_rate": 1.508369445864731e-05, "loss": 1.6365, "step": 117450 }, { "epoch": 0.738263311924263, "grad_norm": 6.349709987640381, "learning_rate": 1.5083275357702657e-05, "loss": 1.6036, "step": 117460 }, { "epoch": 0.7383261642409601, "grad_norm": 5.677004814147949, "learning_rate": 1.5082856256758004e-05, "loss": 1.5725, "step": 117470 }, { "epoch": 0.7383890165576572, "grad_norm": 6.100164890289307, "learning_rate": 1.508243715581335e-05, "loss": 1.5149, "step": 117480 }, { "epoch": 0.7384518688743543, "grad_norm": 5.808437824249268, "learning_rate": 1.5082018054868697e-05, "loss": 1.6419, "step": 117490 }, { "epoch": 0.7385147211910514, "grad_norm": 6.135100364685059, "learning_rate": 1.5081598953924044e-05, "loss": 1.6567, "step": 117500 }, { "epoch": 0.7385775735077486, "grad_norm": 6.83665132522583, "learning_rate": 1.5081179852979391e-05, "loss": 1.7174, "step": 117510 }, { "epoch": 0.7386404258244457, "grad_norm": 6.4246087074279785, "learning_rate": 1.5080760752034735e-05, "loss": 1.6823, "step": 117520 }, { "epoch": 0.7387032781411428, "grad_norm": 6.082372665405273, "learning_rate": 1.5080341651090082e-05, "loss": 1.862, "step": 117530 }, { "epoch": 0.7387661304578399, "grad_norm": 6.764336109161377, "learning_rate": 1.5079922550145429e-05, "loss": 1.6196, "step": 117540 }, { "epoch": 0.7388289827745369, "grad_norm": 7.09053373336792, "learning_rate": 1.5079503449200776e-05, "loss": 1.6565, "step": 117550 }, { "epoch": 0.738891835091234, "grad_norm": 6.333735466003418, "learning_rate": 1.5079084348256121e-05, "loss": 1.6949, "step": 117560 }, { "epoch": 0.7389546874079311, "grad_norm": 5.893936634063721, "learning_rate": 1.5078665247311468e-05, "loss": 1.6049, "step": 117570 }, { "epoch": 0.7390175397246282, "grad_norm": 6.619492530822754, "learning_rate": 1.5078246146366815e-05, "loss": 1.5969, "step": 117580 }, { "epoch": 0.7390803920413254, "grad_norm": 5.940426349639893, "learning_rate": 1.5077827045422163e-05, "loss": 1.8313, "step": 117590 }, { "epoch": 0.7391432443580225, "grad_norm": 6.792811870574951, "learning_rate": 1.507740794447751e-05, "loss": 1.7358, "step": 117600 }, { "epoch": 0.7392060966747196, "grad_norm": 7.276131629943848, "learning_rate": 1.5076988843532853e-05, "loss": 1.7425, "step": 117610 }, { "epoch": 0.7392689489914167, "grad_norm": 6.933396816253662, "learning_rate": 1.50765697425882e-05, "loss": 1.5703, "step": 117620 }, { "epoch": 0.7393318013081138, "grad_norm": 7.595689296722412, "learning_rate": 1.5076150641643547e-05, "loss": 1.4989, "step": 117630 }, { "epoch": 0.7393946536248109, "grad_norm": 5.8264288902282715, "learning_rate": 1.5075731540698894e-05, "loss": 1.7436, "step": 117640 }, { "epoch": 0.739457505941508, "grad_norm": 7.674973487854004, "learning_rate": 1.507531243975424e-05, "loss": 1.665, "step": 117650 }, { "epoch": 0.7395203582582052, "grad_norm": 7.688501834869385, "learning_rate": 1.5074893338809587e-05, "loss": 1.5542, "step": 117660 }, { "epoch": 0.7395832105749023, "grad_norm": 6.045027732849121, "learning_rate": 1.5074474237864934e-05, "loss": 1.4263, "step": 117670 }, { "epoch": 0.7396460628915994, "grad_norm": 7.995920181274414, "learning_rate": 1.5074055136920281e-05, "loss": 1.8176, "step": 117680 }, { "epoch": 0.7397089152082965, "grad_norm": 6.833772659301758, "learning_rate": 1.5073636035975626e-05, "loss": 1.6143, "step": 117690 }, { "epoch": 0.7397717675249936, "grad_norm": 6.990723609924316, "learning_rate": 1.5073216935030972e-05, "loss": 1.845, "step": 117700 }, { "epoch": 0.7398346198416907, "grad_norm": 5.987464904785156, "learning_rate": 1.5072797834086319e-05, "loss": 1.535, "step": 117710 }, { "epoch": 0.7398974721583879, "grad_norm": 6.514626979827881, "learning_rate": 1.5072378733141666e-05, "loss": 1.7832, "step": 117720 }, { "epoch": 0.739960324475085, "grad_norm": 8.164240837097168, "learning_rate": 1.5071959632197013e-05, "loss": 1.6897, "step": 117730 }, { "epoch": 0.7400231767917821, "grad_norm": 6.876088619232178, "learning_rate": 1.5071540531252358e-05, "loss": 1.5586, "step": 117740 }, { "epoch": 0.7400860291084792, "grad_norm": 6.978507041931152, "learning_rate": 1.5071121430307705e-05, "loss": 1.6416, "step": 117750 }, { "epoch": 0.7401488814251763, "grad_norm": 6.801281452178955, "learning_rate": 1.5070702329363051e-05, "loss": 1.5543, "step": 117760 }, { "epoch": 0.7402117337418734, "grad_norm": 7.367517948150635, "learning_rate": 1.5070283228418398e-05, "loss": 1.66, "step": 117770 }, { "epoch": 0.7402745860585705, "grad_norm": 6.92649507522583, "learning_rate": 1.5069864127473745e-05, "loss": 1.564, "step": 117780 }, { "epoch": 0.7403374383752677, "grad_norm": 6.037821292877197, "learning_rate": 1.506944502652909e-05, "loss": 1.5933, "step": 117790 }, { "epoch": 0.7404002906919647, "grad_norm": 6.205532550811768, "learning_rate": 1.5069025925584437e-05, "loss": 1.4872, "step": 117800 }, { "epoch": 0.7404631430086618, "grad_norm": 5.795130252838135, "learning_rate": 1.5068606824639785e-05, "loss": 1.5236, "step": 117810 }, { "epoch": 0.7405259953253589, "grad_norm": 7.018015384674072, "learning_rate": 1.5068187723695132e-05, "loss": 1.585, "step": 117820 }, { "epoch": 0.740588847642056, "grad_norm": 6.7126946449279785, "learning_rate": 1.5067768622750475e-05, "loss": 1.6935, "step": 117830 }, { "epoch": 0.7406516999587531, "grad_norm": 5.657297134399414, "learning_rate": 1.5067349521805822e-05, "loss": 1.5176, "step": 117840 }, { "epoch": 0.7407145522754502, "grad_norm": 5.1493144035339355, "learning_rate": 1.506693042086117e-05, "loss": 1.8007, "step": 117850 }, { "epoch": 0.7407774045921474, "grad_norm": 7.843067646026611, "learning_rate": 1.5066511319916516e-05, "loss": 1.6827, "step": 117860 }, { "epoch": 0.7408402569088445, "grad_norm": 5.899585247039795, "learning_rate": 1.5066092218971862e-05, "loss": 1.5232, "step": 117870 }, { "epoch": 0.7409031092255416, "grad_norm": 6.4450483322143555, "learning_rate": 1.5065673118027209e-05, "loss": 1.8019, "step": 117880 }, { "epoch": 0.7409659615422387, "grad_norm": 5.604441165924072, "learning_rate": 1.5065254017082556e-05, "loss": 1.3844, "step": 117890 }, { "epoch": 0.7410288138589358, "grad_norm": 6.067689895629883, "learning_rate": 1.5064834916137903e-05, "loss": 1.4794, "step": 117900 }, { "epoch": 0.7410916661756329, "grad_norm": 7.927680015563965, "learning_rate": 1.506441581519325e-05, "loss": 1.7118, "step": 117910 }, { "epoch": 0.74115451849233, "grad_norm": 5.2320404052734375, "learning_rate": 1.5063996714248594e-05, "loss": 1.5452, "step": 117920 }, { "epoch": 0.7412173708090272, "grad_norm": 6.294632911682129, "learning_rate": 1.5063577613303941e-05, "loss": 1.5066, "step": 117930 }, { "epoch": 0.7412802231257243, "grad_norm": 7.3671417236328125, "learning_rate": 1.5063158512359288e-05, "loss": 1.6631, "step": 117940 }, { "epoch": 0.7413430754424214, "grad_norm": 5.318902015686035, "learning_rate": 1.5062739411414635e-05, "loss": 1.5723, "step": 117950 }, { "epoch": 0.7414059277591185, "grad_norm": 5.740302085876465, "learning_rate": 1.506232031046998e-05, "loss": 1.6862, "step": 117960 }, { "epoch": 0.7414687800758156, "grad_norm": 6.682391166687012, "learning_rate": 1.5061901209525327e-05, "loss": 1.5793, "step": 117970 }, { "epoch": 0.7415316323925127, "grad_norm": 6.816469669342041, "learning_rate": 1.5061482108580675e-05, "loss": 1.5008, "step": 117980 }, { "epoch": 0.7415944847092099, "grad_norm": 5.641001224517822, "learning_rate": 1.5061063007636022e-05, "loss": 1.6833, "step": 117990 }, { "epoch": 0.741657337025907, "grad_norm": 5.387622833251953, "learning_rate": 1.5060643906691369e-05, "loss": 1.5852, "step": 118000 }, { "epoch": 0.7417201893426041, "grad_norm": 6.281401634216309, "learning_rate": 1.5060224805746712e-05, "loss": 1.6492, "step": 118010 }, { "epoch": 0.7417830416593012, "grad_norm": 5.9567413330078125, "learning_rate": 1.505980570480206e-05, "loss": 1.6401, "step": 118020 }, { "epoch": 0.7418458939759983, "grad_norm": 7.386930465698242, "learning_rate": 1.5059386603857407e-05, "loss": 1.748, "step": 118030 }, { "epoch": 0.7419087462926954, "grad_norm": 7.229539394378662, "learning_rate": 1.5058967502912754e-05, "loss": 1.646, "step": 118040 }, { "epoch": 0.7419715986093925, "grad_norm": 6.037184715270996, "learning_rate": 1.5058548401968099e-05, "loss": 1.4495, "step": 118050 }, { "epoch": 0.7420344509260896, "grad_norm": 6.56737756729126, "learning_rate": 1.5058129301023446e-05, "loss": 1.8005, "step": 118060 }, { "epoch": 0.7420973032427867, "grad_norm": 5.887204647064209, "learning_rate": 1.5057710200078791e-05, "loss": 1.5325, "step": 118070 }, { "epoch": 0.7421601555594838, "grad_norm": 5.897393703460693, "learning_rate": 1.5057291099134138e-05, "loss": 1.6846, "step": 118080 }, { "epoch": 0.7422230078761809, "grad_norm": 6.135063648223877, "learning_rate": 1.5056871998189486e-05, "loss": 1.4418, "step": 118090 }, { "epoch": 0.742285860192878, "grad_norm": 6.761139392852783, "learning_rate": 1.5056452897244831e-05, "loss": 1.7583, "step": 118100 }, { "epoch": 0.7423487125095751, "grad_norm": 6.599857807159424, "learning_rate": 1.5056033796300178e-05, "loss": 1.5573, "step": 118110 }, { "epoch": 0.7424115648262722, "grad_norm": 6.822390556335449, "learning_rate": 1.5055614695355525e-05, "loss": 1.5575, "step": 118120 }, { "epoch": 0.7424744171429694, "grad_norm": 6.472692966461182, "learning_rate": 1.5055195594410872e-05, "loss": 1.6348, "step": 118130 }, { "epoch": 0.7425372694596665, "grad_norm": 6.222887992858887, "learning_rate": 1.5054776493466216e-05, "loss": 1.6309, "step": 118140 }, { "epoch": 0.7426001217763636, "grad_norm": 8.920105934143066, "learning_rate": 1.5054357392521563e-05, "loss": 1.9112, "step": 118150 }, { "epoch": 0.7426629740930607, "grad_norm": 7.522040843963623, "learning_rate": 1.505393829157691e-05, "loss": 1.8268, "step": 118160 }, { "epoch": 0.7427258264097578, "grad_norm": 6.264855861663818, "learning_rate": 1.5053519190632257e-05, "loss": 1.6398, "step": 118170 }, { "epoch": 0.7427886787264549, "grad_norm": 6.111952781677246, "learning_rate": 1.5053100089687602e-05, "loss": 1.6049, "step": 118180 }, { "epoch": 0.742851531043152, "grad_norm": 6.81397008895874, "learning_rate": 1.505268098874295e-05, "loss": 1.532, "step": 118190 }, { "epoch": 0.7429143833598492, "grad_norm": 6.4752302169799805, "learning_rate": 1.5052261887798297e-05, "loss": 1.7698, "step": 118200 }, { "epoch": 0.7429772356765463, "grad_norm": 6.936573028564453, "learning_rate": 1.5051842786853644e-05, "loss": 1.6254, "step": 118210 }, { "epoch": 0.7430400879932434, "grad_norm": 7.293984889984131, "learning_rate": 1.505142368590899e-05, "loss": 1.511, "step": 118220 }, { "epoch": 0.7431029403099405, "grad_norm": 6.396081447601318, "learning_rate": 1.5051004584964334e-05, "loss": 1.5334, "step": 118230 }, { "epoch": 0.7431657926266376, "grad_norm": 6.191473484039307, "learning_rate": 1.5050585484019681e-05, "loss": 1.6911, "step": 118240 }, { "epoch": 0.7432286449433347, "grad_norm": 7.289321422576904, "learning_rate": 1.5050166383075029e-05, "loss": 1.8485, "step": 118250 }, { "epoch": 0.7432914972600319, "grad_norm": 6.679356098175049, "learning_rate": 1.5049747282130376e-05, "loss": 1.7827, "step": 118260 }, { "epoch": 0.743354349576729, "grad_norm": 6.273317337036133, "learning_rate": 1.5049328181185721e-05, "loss": 1.5529, "step": 118270 }, { "epoch": 0.7434172018934261, "grad_norm": 5.995965957641602, "learning_rate": 1.5048909080241068e-05, "loss": 1.4336, "step": 118280 }, { "epoch": 0.7434800542101232, "grad_norm": 6.3854522705078125, "learning_rate": 1.5048489979296415e-05, "loss": 1.5366, "step": 118290 }, { "epoch": 0.7435429065268203, "grad_norm": 5.601490020751953, "learning_rate": 1.5048070878351762e-05, "loss": 1.6345, "step": 118300 }, { "epoch": 0.7436057588435174, "grad_norm": 6.8942461013793945, "learning_rate": 1.504765177740711e-05, "loss": 1.8003, "step": 118310 }, { "epoch": 0.7436686111602144, "grad_norm": 5.988004684448242, "learning_rate": 1.5047232676462453e-05, "loss": 1.8467, "step": 118320 }, { "epoch": 0.7437314634769115, "grad_norm": 5.545806884765625, "learning_rate": 1.50468135755178e-05, "loss": 1.3995, "step": 118330 }, { "epoch": 0.7437943157936087, "grad_norm": 6.340879917144775, "learning_rate": 1.5046394474573147e-05, "loss": 1.5756, "step": 118340 }, { "epoch": 0.7438571681103058, "grad_norm": 5.379518508911133, "learning_rate": 1.5045975373628494e-05, "loss": 1.6435, "step": 118350 }, { "epoch": 0.7439200204270029, "grad_norm": 6.786842346191406, "learning_rate": 1.504555627268384e-05, "loss": 1.6649, "step": 118360 }, { "epoch": 0.7439828727437, "grad_norm": 5.5345988273620605, "learning_rate": 1.5045137171739187e-05, "loss": 1.5497, "step": 118370 }, { "epoch": 0.7440457250603971, "grad_norm": 6.6930437088012695, "learning_rate": 1.5044718070794534e-05, "loss": 1.4766, "step": 118380 }, { "epoch": 0.7441085773770942, "grad_norm": 6.040542125701904, "learning_rate": 1.5044298969849879e-05, "loss": 1.647, "step": 118390 }, { "epoch": 0.7441714296937914, "grad_norm": 6.675861358642578, "learning_rate": 1.5043879868905226e-05, "loss": 1.5709, "step": 118400 }, { "epoch": 0.7442342820104885, "grad_norm": 6.339999675750732, "learning_rate": 1.5043460767960571e-05, "loss": 1.6079, "step": 118410 }, { "epoch": 0.7442971343271856, "grad_norm": 5.432798862457275, "learning_rate": 1.5043041667015919e-05, "loss": 1.6047, "step": 118420 }, { "epoch": 0.7443599866438827, "grad_norm": 6.235202312469482, "learning_rate": 1.5042622566071266e-05, "loss": 1.6056, "step": 118430 }, { "epoch": 0.7444228389605798, "grad_norm": 5.5712995529174805, "learning_rate": 1.5042203465126613e-05, "loss": 1.4556, "step": 118440 }, { "epoch": 0.7444856912772769, "grad_norm": 6.592348098754883, "learning_rate": 1.5041784364181956e-05, "loss": 1.7613, "step": 118450 }, { "epoch": 0.744548543593974, "grad_norm": 6.759244441986084, "learning_rate": 1.5041365263237303e-05, "loss": 1.739, "step": 118460 }, { "epoch": 0.7446113959106712, "grad_norm": 8.218925476074219, "learning_rate": 1.504094616229265e-05, "loss": 1.6422, "step": 118470 }, { "epoch": 0.7446742482273683, "grad_norm": 6.363652229309082, "learning_rate": 1.5040527061347998e-05, "loss": 1.5126, "step": 118480 }, { "epoch": 0.7447371005440654, "grad_norm": 7.040666103363037, "learning_rate": 1.5040107960403345e-05, "loss": 1.6639, "step": 118490 }, { "epoch": 0.7447999528607625, "grad_norm": 6.67605447769165, "learning_rate": 1.503968885945869e-05, "loss": 1.3955, "step": 118500 }, { "epoch": 0.7448628051774596, "grad_norm": 7.312140464782715, "learning_rate": 1.5039269758514037e-05, "loss": 1.4657, "step": 118510 }, { "epoch": 0.7449256574941567, "grad_norm": 7.492226600646973, "learning_rate": 1.5038850657569384e-05, "loss": 1.8691, "step": 118520 }, { "epoch": 0.7449885098108538, "grad_norm": 5.2623186111450195, "learning_rate": 1.5038431556624731e-05, "loss": 1.4467, "step": 118530 }, { "epoch": 0.745051362127551, "grad_norm": 6.508378505706787, "learning_rate": 1.5038012455680075e-05, "loss": 1.5585, "step": 118540 }, { "epoch": 0.7451142144442481, "grad_norm": 6.904928684234619, "learning_rate": 1.5037593354735422e-05, "loss": 1.7736, "step": 118550 }, { "epoch": 0.7451770667609452, "grad_norm": 5.544427394866943, "learning_rate": 1.5037174253790769e-05, "loss": 1.3645, "step": 118560 }, { "epoch": 0.7452399190776422, "grad_norm": 5.96975564956665, "learning_rate": 1.5036755152846116e-05, "loss": 1.5651, "step": 118570 }, { "epoch": 0.7453027713943393, "grad_norm": 6.607202053070068, "learning_rate": 1.5036336051901462e-05, "loss": 1.5868, "step": 118580 }, { "epoch": 0.7453656237110364, "grad_norm": 6.926027774810791, "learning_rate": 1.5035916950956809e-05, "loss": 1.6423, "step": 118590 }, { "epoch": 0.7454284760277335, "grad_norm": 7.431971549987793, "learning_rate": 1.5035497850012156e-05, "loss": 1.7106, "step": 118600 }, { "epoch": 0.7454913283444307, "grad_norm": 6.554296493530273, "learning_rate": 1.5035078749067503e-05, "loss": 1.5635, "step": 118610 }, { "epoch": 0.7455541806611278, "grad_norm": 8.098177909851074, "learning_rate": 1.5034701558217312e-05, "loss": 2.0012, "step": 118620 }, { "epoch": 0.7456170329778249, "grad_norm": 6.262833595275879, "learning_rate": 1.503428245727266e-05, "loss": 1.6464, "step": 118630 }, { "epoch": 0.745679885294522, "grad_norm": 6.295077800750732, "learning_rate": 1.5033863356328007e-05, "loss": 1.6451, "step": 118640 }, { "epoch": 0.7457427376112191, "grad_norm": 7.018307685852051, "learning_rate": 1.5033444255383354e-05, "loss": 1.4181, "step": 118650 }, { "epoch": 0.7458055899279162, "grad_norm": 6.842716693878174, "learning_rate": 1.5033025154438699e-05, "loss": 1.5012, "step": 118660 }, { "epoch": 0.7458684422446133, "grad_norm": 5.80836820602417, "learning_rate": 1.5032606053494046e-05, "loss": 1.5672, "step": 118670 }, { "epoch": 0.7459312945613105, "grad_norm": 6.2121100425720215, "learning_rate": 1.5032186952549393e-05, "loss": 1.8919, "step": 118680 }, { "epoch": 0.7459941468780076, "grad_norm": 7.220552444458008, "learning_rate": 1.503176785160474e-05, "loss": 1.4593, "step": 118690 }, { "epoch": 0.7460569991947047, "grad_norm": 6.2886552810668945, "learning_rate": 1.5031348750660084e-05, "loss": 1.6792, "step": 118700 }, { "epoch": 0.7461198515114018, "grad_norm": 5.707417964935303, "learning_rate": 1.5030929649715431e-05, "loss": 1.4881, "step": 118710 }, { "epoch": 0.7461827038280989, "grad_norm": 5.562893390655518, "learning_rate": 1.5030510548770778e-05, "loss": 1.6741, "step": 118720 }, { "epoch": 0.746245556144796, "grad_norm": 4.820675849914551, "learning_rate": 1.5030091447826125e-05, "loss": 1.5545, "step": 118730 }, { "epoch": 0.7463084084614932, "grad_norm": 6.513208389282227, "learning_rate": 1.5029672346881472e-05, "loss": 1.7086, "step": 118740 }, { "epoch": 0.7463712607781903, "grad_norm": 5.914416790008545, "learning_rate": 1.5029253245936818e-05, "loss": 1.6179, "step": 118750 }, { "epoch": 0.7464341130948874, "grad_norm": 6.555986404418945, "learning_rate": 1.5028834144992163e-05, "loss": 1.4677, "step": 118760 }, { "epoch": 0.7464969654115845, "grad_norm": 7.121492385864258, "learning_rate": 1.502841504404751e-05, "loss": 1.6739, "step": 118770 }, { "epoch": 0.7465598177282816, "grad_norm": 6.156549453735352, "learning_rate": 1.5027995943102857e-05, "loss": 1.5818, "step": 118780 }, { "epoch": 0.7466226700449787, "grad_norm": 5.9737067222595215, "learning_rate": 1.5027576842158202e-05, "loss": 1.3584, "step": 118790 }, { "epoch": 0.7466855223616758, "grad_norm": 6.020236492156982, "learning_rate": 1.502715774121355e-05, "loss": 1.654, "step": 118800 }, { "epoch": 0.746748374678373, "grad_norm": 7.43925142288208, "learning_rate": 1.5026738640268897e-05, "loss": 1.6574, "step": 118810 }, { "epoch": 0.7468112269950701, "grad_norm": 7.011662006378174, "learning_rate": 1.5026319539324244e-05, "loss": 1.5776, "step": 118820 }, { "epoch": 0.7468740793117671, "grad_norm": 6.51852560043335, "learning_rate": 1.502590043837959e-05, "loss": 1.7336, "step": 118830 }, { "epoch": 0.7469369316284642, "grad_norm": 5.864762306213379, "learning_rate": 1.5025481337434934e-05, "loss": 1.5665, "step": 118840 }, { "epoch": 0.7469997839451613, "grad_norm": 6.758666038513184, "learning_rate": 1.5025062236490281e-05, "loss": 1.6257, "step": 118850 }, { "epoch": 0.7470626362618584, "grad_norm": 6.533952713012695, "learning_rate": 1.5024643135545629e-05, "loss": 1.512, "step": 118860 }, { "epoch": 0.7471254885785555, "grad_norm": 5.908234596252441, "learning_rate": 1.5024224034600976e-05, "loss": 1.5964, "step": 118870 }, { "epoch": 0.7471883408952527, "grad_norm": 7.8281049728393555, "learning_rate": 1.5023804933656321e-05, "loss": 1.6632, "step": 118880 }, { "epoch": 0.7472511932119498, "grad_norm": 6.972465991973877, "learning_rate": 1.5023385832711668e-05, "loss": 1.7618, "step": 118890 }, { "epoch": 0.7473140455286469, "grad_norm": 6.552032947540283, "learning_rate": 1.5022966731767015e-05, "loss": 1.6267, "step": 118900 }, { "epoch": 0.747376897845344, "grad_norm": 5.361602783203125, "learning_rate": 1.5022547630822362e-05, "loss": 1.8534, "step": 118910 }, { "epoch": 0.7474397501620411, "grad_norm": 7.634783744812012, "learning_rate": 1.502212852987771e-05, "loss": 1.691, "step": 118920 }, { "epoch": 0.7475026024787382, "grad_norm": 6.120603084564209, "learning_rate": 1.5021709428933053e-05, "loss": 1.5766, "step": 118930 }, { "epoch": 0.7475654547954353, "grad_norm": 5.479430675506592, "learning_rate": 1.50212903279884e-05, "loss": 1.3384, "step": 118940 }, { "epoch": 0.7476283071121325, "grad_norm": 6.822821617126465, "learning_rate": 1.5020871227043747e-05, "loss": 1.3687, "step": 118950 }, { "epoch": 0.7476911594288296, "grad_norm": 7.230306148529053, "learning_rate": 1.5020452126099094e-05, "loss": 1.532, "step": 118960 }, { "epoch": 0.7477540117455267, "grad_norm": 8.492520332336426, "learning_rate": 1.502003302515444e-05, "loss": 1.6345, "step": 118970 }, { "epoch": 0.7478168640622238, "grad_norm": 7.006572723388672, "learning_rate": 1.5019613924209787e-05, "loss": 1.5281, "step": 118980 }, { "epoch": 0.7478797163789209, "grad_norm": 6.370782375335693, "learning_rate": 1.5019194823265134e-05, "loss": 1.5704, "step": 118990 }, { "epoch": 0.747942568695618, "grad_norm": 6.308587074279785, "learning_rate": 1.501877572232048e-05, "loss": 1.3109, "step": 119000 }, { "epoch": 0.7480054210123152, "grad_norm": 5.765300273895264, "learning_rate": 1.5018356621375824e-05, "loss": 1.5817, "step": 119010 }, { "epoch": 0.7480682733290123, "grad_norm": 6.795331001281738, "learning_rate": 1.5017937520431172e-05, "loss": 1.5964, "step": 119020 }, { "epoch": 0.7481311256457094, "grad_norm": 7.549683570861816, "learning_rate": 1.5017518419486519e-05, "loss": 1.5406, "step": 119030 }, { "epoch": 0.7481939779624065, "grad_norm": 5.843475341796875, "learning_rate": 1.5017099318541866e-05, "loss": 1.3048, "step": 119040 }, { "epoch": 0.7482568302791036, "grad_norm": 6.7381486892700195, "learning_rate": 1.5016680217597213e-05, "loss": 1.7444, "step": 119050 }, { "epoch": 0.7483196825958007, "grad_norm": 7.63314962387085, "learning_rate": 1.5016261116652558e-05, "loss": 1.6775, "step": 119060 }, { "epoch": 0.7483825349124978, "grad_norm": 6.3762946128845215, "learning_rate": 1.5015842015707905e-05, "loss": 1.5391, "step": 119070 }, { "epoch": 0.7484453872291948, "grad_norm": 6.232476234436035, "learning_rate": 1.501542291476325e-05, "loss": 1.5976, "step": 119080 }, { "epoch": 0.748508239545892, "grad_norm": 6.704739093780518, "learning_rate": 1.5015003813818598e-05, "loss": 1.6854, "step": 119090 }, { "epoch": 0.7485710918625891, "grad_norm": 6.483764171600342, "learning_rate": 1.5014584712873943e-05, "loss": 1.7323, "step": 119100 }, { "epoch": 0.7486339441792862, "grad_norm": 5.834470272064209, "learning_rate": 1.501416561192929e-05, "loss": 1.6401, "step": 119110 }, { "epoch": 0.7486967964959833, "grad_norm": 7.356104850769043, "learning_rate": 1.5013746510984637e-05, "loss": 1.5479, "step": 119120 }, { "epoch": 0.7487596488126804, "grad_norm": 5.908942222595215, "learning_rate": 1.5013327410039984e-05, "loss": 1.6411, "step": 119130 }, { "epoch": 0.7488225011293775, "grad_norm": 6.813505172729492, "learning_rate": 1.5012908309095331e-05, "loss": 1.4182, "step": 119140 }, { "epoch": 0.7488853534460747, "grad_norm": 5.873258113861084, "learning_rate": 1.5012489208150675e-05, "loss": 1.5243, "step": 119150 }, { "epoch": 0.7489482057627718, "grad_norm": 6.333630084991455, "learning_rate": 1.5012070107206022e-05, "loss": 1.605, "step": 119160 }, { "epoch": 0.7490110580794689, "grad_norm": 6.718225002288818, "learning_rate": 1.5011651006261369e-05, "loss": 1.6831, "step": 119170 }, { "epoch": 0.749073910396166, "grad_norm": 6.953636169433594, "learning_rate": 1.5011231905316716e-05, "loss": 1.6459, "step": 119180 }, { "epoch": 0.7491367627128631, "grad_norm": 6.216082572937012, "learning_rate": 1.5010812804372062e-05, "loss": 1.5992, "step": 119190 }, { "epoch": 0.7491996150295602, "grad_norm": 7.1688385009765625, "learning_rate": 1.5010393703427409e-05, "loss": 1.6605, "step": 119200 }, { "epoch": 0.7492624673462573, "grad_norm": 5.808324337005615, "learning_rate": 1.5009974602482756e-05, "loss": 1.6583, "step": 119210 }, { "epoch": 0.7493253196629545, "grad_norm": 6.335639953613281, "learning_rate": 1.5009555501538103e-05, "loss": 1.655, "step": 119220 }, { "epoch": 0.7493881719796516, "grad_norm": 7.026367664337158, "learning_rate": 1.500913640059345e-05, "loss": 1.6521, "step": 119230 }, { "epoch": 0.7494510242963487, "grad_norm": 6.402663707733154, "learning_rate": 1.5008717299648794e-05, "loss": 1.5132, "step": 119240 }, { "epoch": 0.7495138766130458, "grad_norm": 7.0340070724487305, "learning_rate": 1.500829819870414e-05, "loss": 1.4258, "step": 119250 }, { "epoch": 0.7495767289297429, "grad_norm": 6.382259845733643, "learning_rate": 1.5007879097759488e-05, "loss": 1.6958, "step": 119260 }, { "epoch": 0.74963958124644, "grad_norm": 6.875700950622559, "learning_rate": 1.5007459996814835e-05, "loss": 1.6692, "step": 119270 }, { "epoch": 0.7497024335631371, "grad_norm": 6.259668350219727, "learning_rate": 1.500704089587018e-05, "loss": 1.84, "step": 119280 }, { "epoch": 0.7497652858798343, "grad_norm": 6.386197090148926, "learning_rate": 1.5006621794925527e-05, "loss": 1.5187, "step": 119290 }, { "epoch": 0.7498281381965314, "grad_norm": 6.529397010803223, "learning_rate": 1.5006202693980874e-05, "loss": 1.6688, "step": 119300 }, { "epoch": 0.7498909905132285, "grad_norm": 5.699202537536621, "learning_rate": 1.5005783593036221e-05, "loss": 1.6539, "step": 119310 }, { "epoch": 0.7499538428299256, "grad_norm": 6.4420366287231445, "learning_rate": 1.5005364492091565e-05, "loss": 1.6209, "step": 119320 }, { "epoch": 0.7500166951466227, "grad_norm": 7.651759147644043, "learning_rate": 1.5004945391146912e-05, "loss": 1.6806, "step": 119330 }, { "epoch": 0.7500795474633197, "grad_norm": 6.842735767364502, "learning_rate": 1.5004526290202259e-05, "loss": 1.7389, "step": 119340 }, { "epoch": 0.7501423997800168, "grad_norm": 6.2523956298828125, "learning_rate": 1.5004107189257606e-05, "loss": 1.6368, "step": 119350 }, { "epoch": 0.750205252096714, "grad_norm": 6.198376178741455, "learning_rate": 1.5003688088312953e-05, "loss": 1.8505, "step": 119360 }, { "epoch": 0.7502681044134111, "grad_norm": 6.618760108947754, "learning_rate": 1.5003268987368299e-05, "loss": 1.9014, "step": 119370 }, { "epoch": 0.7503309567301082, "grad_norm": 6.725455284118652, "learning_rate": 1.5002849886423646e-05, "loss": 1.6399, "step": 119380 }, { "epoch": 0.7503938090468053, "grad_norm": 6.938900947570801, "learning_rate": 1.5002430785478991e-05, "loss": 1.3884, "step": 119390 }, { "epoch": 0.7504566613635024, "grad_norm": 6.5777997970581055, "learning_rate": 1.5002011684534338e-05, "loss": 1.7021, "step": 119400 }, { "epoch": 0.7505195136801995, "grad_norm": 6.52875280380249, "learning_rate": 1.5001592583589684e-05, "loss": 1.6928, "step": 119410 }, { "epoch": 0.7505823659968966, "grad_norm": 5.5812225341796875, "learning_rate": 1.500117348264503e-05, "loss": 1.5077, "step": 119420 }, { "epoch": 0.7506452183135938, "grad_norm": 6.043531894683838, "learning_rate": 1.5000754381700378e-05, "loss": 1.4386, "step": 119430 }, { "epoch": 0.7507080706302909, "grad_norm": 7.530872821807861, "learning_rate": 1.5000335280755725e-05, "loss": 1.7013, "step": 119440 }, { "epoch": 0.750770922946988, "grad_norm": 7.171182632446289, "learning_rate": 1.4999916179811072e-05, "loss": 1.7723, "step": 119450 }, { "epoch": 0.7508337752636851, "grad_norm": 5.6413469314575195, "learning_rate": 1.4999497078866416e-05, "loss": 1.6987, "step": 119460 }, { "epoch": 0.7508966275803822, "grad_norm": 7.155804634094238, "learning_rate": 1.4999077977921763e-05, "loss": 1.4665, "step": 119470 }, { "epoch": 0.7509594798970793, "grad_norm": 7.0509562492370605, "learning_rate": 1.499865887697711e-05, "loss": 1.6415, "step": 119480 }, { "epoch": 0.7510223322137765, "grad_norm": 7.219264030456543, "learning_rate": 1.4998239776032457e-05, "loss": 1.497, "step": 119490 }, { "epoch": 0.7510851845304736, "grad_norm": 6.636020660400391, "learning_rate": 1.4997820675087802e-05, "loss": 1.8566, "step": 119500 }, { "epoch": 0.7511480368471707, "grad_norm": 6.3326311111450195, "learning_rate": 1.499740157414315e-05, "loss": 1.5311, "step": 119510 }, { "epoch": 0.7512108891638678, "grad_norm": 7.0651445388793945, "learning_rate": 1.4996982473198496e-05, "loss": 1.5763, "step": 119520 }, { "epoch": 0.7512737414805649, "grad_norm": 5.846452713012695, "learning_rate": 1.4996563372253843e-05, "loss": 1.4839, "step": 119530 }, { "epoch": 0.751336593797262, "grad_norm": 5.834648132324219, "learning_rate": 1.499614427130919e-05, "loss": 1.4879, "step": 119540 }, { "epoch": 0.7513994461139591, "grad_norm": 6.99166202545166, "learning_rate": 1.4995725170364534e-05, "loss": 1.5047, "step": 119550 }, { "epoch": 0.7514622984306563, "grad_norm": 6.823217391967773, "learning_rate": 1.4995306069419881e-05, "loss": 1.6005, "step": 119560 }, { "epoch": 0.7515251507473534, "grad_norm": 6.487685203552246, "learning_rate": 1.4994886968475228e-05, "loss": 1.4634, "step": 119570 }, { "epoch": 0.7515880030640505, "grad_norm": 6.586220741271973, "learning_rate": 1.4994467867530575e-05, "loss": 1.8928, "step": 119580 }, { "epoch": 0.7516508553807475, "grad_norm": 8.22240161895752, "learning_rate": 1.499404876658592e-05, "loss": 1.5348, "step": 119590 }, { "epoch": 0.7517137076974446, "grad_norm": 6.863974571228027, "learning_rate": 1.4993629665641268e-05, "loss": 1.5618, "step": 119600 }, { "epoch": 0.7517765600141417, "grad_norm": 6.535792350769043, "learning_rate": 1.4993210564696615e-05, "loss": 1.6731, "step": 119610 }, { "epoch": 0.7518394123308388, "grad_norm": 5.779008865356445, "learning_rate": 1.4992791463751962e-05, "loss": 1.7479, "step": 119620 }, { "epoch": 0.751902264647536, "grad_norm": 6.919078350067139, "learning_rate": 1.4992372362807306e-05, "loss": 1.6353, "step": 119630 }, { "epoch": 0.7519651169642331, "grad_norm": 5.948938369750977, "learning_rate": 1.4991953261862653e-05, "loss": 1.4725, "step": 119640 }, { "epoch": 0.7520279692809302, "grad_norm": 7.857065200805664, "learning_rate": 1.4991534160918e-05, "loss": 1.7719, "step": 119650 }, { "epoch": 0.7520908215976273, "grad_norm": 5.382479667663574, "learning_rate": 1.4991115059973347e-05, "loss": 1.6679, "step": 119660 }, { "epoch": 0.7521536739143244, "grad_norm": 6.866490364074707, "learning_rate": 1.4990695959028694e-05, "loss": 1.685, "step": 119670 }, { "epoch": 0.7522165262310215, "grad_norm": 6.799786567687988, "learning_rate": 1.499027685808404e-05, "loss": 1.7707, "step": 119680 }, { "epoch": 0.7522793785477186, "grad_norm": 7.421419620513916, "learning_rate": 1.4989857757139386e-05, "loss": 1.4406, "step": 119690 }, { "epoch": 0.7523422308644158, "grad_norm": 6.190274715423584, "learning_rate": 1.4989438656194732e-05, "loss": 1.4512, "step": 119700 }, { "epoch": 0.7524050831811129, "grad_norm": 7.217263698577881, "learning_rate": 1.4989019555250079e-05, "loss": 1.3324, "step": 119710 }, { "epoch": 0.75246793549781, "grad_norm": 5.917891502380371, "learning_rate": 1.4988600454305424e-05, "loss": 1.4482, "step": 119720 }, { "epoch": 0.7525307878145071, "grad_norm": 6.145440578460693, "learning_rate": 1.4988181353360771e-05, "loss": 1.4712, "step": 119730 }, { "epoch": 0.7525936401312042, "grad_norm": 5.475290775299072, "learning_rate": 1.4987762252416118e-05, "loss": 1.4071, "step": 119740 }, { "epoch": 0.7526564924479013, "grad_norm": 8.057433128356934, "learning_rate": 1.4987343151471465e-05, "loss": 1.7007, "step": 119750 }, { "epoch": 0.7527193447645985, "grad_norm": 6.021699905395508, "learning_rate": 1.4986924050526812e-05, "loss": 1.5067, "step": 119760 }, { "epoch": 0.7527821970812956, "grad_norm": 6.090139865875244, "learning_rate": 1.4986504949582156e-05, "loss": 1.5609, "step": 119770 }, { "epoch": 0.7528450493979927, "grad_norm": 6.361364841461182, "learning_rate": 1.4986085848637503e-05, "loss": 1.5204, "step": 119780 }, { "epoch": 0.7529079017146898, "grad_norm": 7.001276016235352, "learning_rate": 1.498566674769285e-05, "loss": 1.4831, "step": 119790 }, { "epoch": 0.7529707540313869, "grad_norm": 6.353245258331299, "learning_rate": 1.4985247646748197e-05, "loss": 1.5124, "step": 119800 }, { "epoch": 0.753033606348084, "grad_norm": 5.2376790046691895, "learning_rate": 1.4984828545803543e-05, "loss": 1.4544, "step": 119810 }, { "epoch": 0.7530964586647811, "grad_norm": 5.625237941741943, "learning_rate": 1.498440944485889e-05, "loss": 1.4652, "step": 119820 }, { "epoch": 0.7531593109814783, "grad_norm": 5.442669868469238, "learning_rate": 1.4983990343914237e-05, "loss": 1.5617, "step": 119830 }, { "epoch": 0.7532221632981754, "grad_norm": 6.807565212249756, "learning_rate": 1.4983571242969584e-05, "loss": 1.6231, "step": 119840 }, { "epoch": 0.7532850156148724, "grad_norm": 6.105103969573975, "learning_rate": 1.4983152142024931e-05, "loss": 1.5512, "step": 119850 }, { "epoch": 0.7533478679315695, "grad_norm": 5.972720623016357, "learning_rate": 1.4982733041080275e-05, "loss": 1.6881, "step": 119860 }, { "epoch": 0.7534107202482666, "grad_norm": 6.814130783081055, "learning_rate": 1.4982313940135622e-05, "loss": 1.5766, "step": 119870 }, { "epoch": 0.7534735725649637, "grad_norm": 7.6490559577941895, "learning_rate": 1.4981894839190969e-05, "loss": 1.4441, "step": 119880 }, { "epoch": 0.7535364248816608, "grad_norm": 6.740894317626953, "learning_rate": 1.4981475738246316e-05, "loss": 1.5884, "step": 119890 }, { "epoch": 0.753599277198358, "grad_norm": 5.973379135131836, "learning_rate": 1.4981056637301661e-05, "loss": 1.7099, "step": 119900 }, { "epoch": 0.7536621295150551, "grad_norm": 5.714463710784912, "learning_rate": 1.4980637536357008e-05, "loss": 1.4491, "step": 119910 }, { "epoch": 0.7537249818317522, "grad_norm": 7.492783069610596, "learning_rate": 1.4980218435412355e-05, "loss": 1.6315, "step": 119920 }, { "epoch": 0.7537878341484493, "grad_norm": 5.984746932983398, "learning_rate": 1.4979799334467702e-05, "loss": 1.551, "step": 119930 }, { "epoch": 0.7538506864651464, "grad_norm": 5.90654182434082, "learning_rate": 1.4979380233523046e-05, "loss": 1.502, "step": 119940 }, { "epoch": 0.7539135387818435, "grad_norm": 6.398062705993652, "learning_rate": 1.4978961132578393e-05, "loss": 1.5511, "step": 119950 }, { "epoch": 0.7539763910985406, "grad_norm": 6.428770065307617, "learning_rate": 1.497854203163374e-05, "loss": 1.4763, "step": 119960 }, { "epoch": 0.7540392434152378, "grad_norm": 6.965630531311035, "learning_rate": 1.4978122930689087e-05, "loss": 1.34, "step": 119970 }, { "epoch": 0.7541020957319349, "grad_norm": 6.642297744750977, "learning_rate": 1.4977703829744434e-05, "loss": 1.7019, "step": 119980 }, { "epoch": 0.754164948048632, "grad_norm": 6.695052623748779, "learning_rate": 1.497728472879978e-05, "loss": 1.6922, "step": 119990 }, { "epoch": 0.7542278003653291, "grad_norm": 5.795705318450928, "learning_rate": 1.4976865627855127e-05, "loss": 1.5766, "step": 120000 }, { "epoch": 0.7542906526820262, "grad_norm": 6.503615856170654, "learning_rate": 1.4976446526910474e-05, "loss": 1.4383, "step": 120010 }, { "epoch": 0.7543535049987233, "grad_norm": 7.517498016357422, "learning_rate": 1.497602742596582e-05, "loss": 1.6784, "step": 120020 }, { "epoch": 0.7544163573154204, "grad_norm": 6.060889720916748, "learning_rate": 1.4975608325021165e-05, "loss": 1.7626, "step": 120030 }, { "epoch": 0.7544792096321176, "grad_norm": 7.414714336395264, "learning_rate": 1.4975189224076512e-05, "loss": 1.4736, "step": 120040 }, { "epoch": 0.7545420619488147, "grad_norm": 8.387202262878418, "learning_rate": 1.4974770123131859e-05, "loss": 1.7192, "step": 120050 }, { "epoch": 0.7546049142655118, "grad_norm": 5.452805042266846, "learning_rate": 1.4974351022187206e-05, "loss": 1.6026, "step": 120060 }, { "epoch": 0.7546677665822089, "grad_norm": 5.210594654083252, "learning_rate": 1.4973931921242553e-05, "loss": 1.4089, "step": 120070 }, { "epoch": 0.754730618898906, "grad_norm": 6.504967212677002, "learning_rate": 1.4973512820297897e-05, "loss": 1.5778, "step": 120080 }, { "epoch": 0.7547934712156031, "grad_norm": 8.21863079071045, "learning_rate": 1.4973093719353244e-05, "loss": 1.6274, "step": 120090 }, { "epoch": 0.7548563235323001, "grad_norm": 5.795107364654541, "learning_rate": 1.497267461840859e-05, "loss": 1.5769, "step": 120100 }, { "epoch": 0.7549191758489973, "grad_norm": 7.059028625488281, "learning_rate": 1.4972255517463938e-05, "loss": 1.5492, "step": 120110 }, { "epoch": 0.7549820281656944, "grad_norm": 7.099276542663574, "learning_rate": 1.4971836416519283e-05, "loss": 1.607, "step": 120120 }, { "epoch": 0.7550448804823915, "grad_norm": 6.764489650726318, "learning_rate": 1.497141731557463e-05, "loss": 1.638, "step": 120130 }, { "epoch": 0.7551077327990886, "grad_norm": 6.1144609451293945, "learning_rate": 1.4970998214629977e-05, "loss": 1.6679, "step": 120140 }, { "epoch": 0.7551705851157857, "grad_norm": 7.087555408477783, "learning_rate": 1.4970579113685324e-05, "loss": 1.7779, "step": 120150 }, { "epoch": 0.7552334374324828, "grad_norm": 6.355130672454834, "learning_rate": 1.4970160012740671e-05, "loss": 1.5195, "step": 120160 }, { "epoch": 0.75529628974918, "grad_norm": 5.404919624328613, "learning_rate": 1.4969740911796015e-05, "loss": 1.526, "step": 120170 }, { "epoch": 0.7553591420658771, "grad_norm": 7.248736381530762, "learning_rate": 1.4969321810851362e-05, "loss": 1.8076, "step": 120180 }, { "epoch": 0.7554219943825742, "grad_norm": 5.894480228424072, "learning_rate": 1.496890270990671e-05, "loss": 1.4873, "step": 120190 }, { "epoch": 0.7554848466992713, "grad_norm": 6.088755130767822, "learning_rate": 1.4968483608962056e-05, "loss": 1.9181, "step": 120200 }, { "epoch": 0.7555476990159684, "grad_norm": 6.1901068687438965, "learning_rate": 1.4968064508017402e-05, "loss": 1.5689, "step": 120210 }, { "epoch": 0.7556105513326655, "grad_norm": 6.730910301208496, "learning_rate": 1.4967645407072749e-05, "loss": 1.8808, "step": 120220 }, { "epoch": 0.7556734036493626, "grad_norm": 5.562646865844727, "learning_rate": 1.4967226306128096e-05, "loss": 1.4225, "step": 120230 }, { "epoch": 0.7557362559660598, "grad_norm": 6.61494779586792, "learning_rate": 1.4966807205183443e-05, "loss": 1.8968, "step": 120240 }, { "epoch": 0.7557991082827569, "grad_norm": 7.165287971496582, "learning_rate": 1.4966388104238787e-05, "loss": 1.8284, "step": 120250 }, { "epoch": 0.755861960599454, "grad_norm": 7.3369598388671875, "learning_rate": 1.4965969003294134e-05, "loss": 1.6924, "step": 120260 }, { "epoch": 0.7559248129161511, "grad_norm": 7.2790632247924805, "learning_rate": 1.496554990234948e-05, "loss": 1.7475, "step": 120270 }, { "epoch": 0.7559876652328482, "grad_norm": 6.322650909423828, "learning_rate": 1.4965130801404828e-05, "loss": 1.6261, "step": 120280 }, { "epoch": 0.7560505175495453, "grad_norm": 5.456332683563232, "learning_rate": 1.4964711700460175e-05, "loss": 1.4581, "step": 120290 }, { "epoch": 0.7561133698662424, "grad_norm": 6.35498046875, "learning_rate": 1.496429259951552e-05, "loss": 1.6147, "step": 120300 }, { "epoch": 0.7561762221829396, "grad_norm": 6.338553428649902, "learning_rate": 1.4963873498570867e-05, "loss": 1.611, "step": 120310 }, { "epoch": 0.7562390744996367, "grad_norm": 6.472460746765137, "learning_rate": 1.4963454397626214e-05, "loss": 1.5694, "step": 120320 }, { "epoch": 0.7563019268163338, "grad_norm": 6.855090141296387, "learning_rate": 1.496303529668156e-05, "loss": 1.6434, "step": 120330 }, { "epoch": 0.7563647791330309, "grad_norm": 6.079153537750244, "learning_rate": 1.4962616195736905e-05, "loss": 1.6676, "step": 120340 }, { "epoch": 0.756427631449728, "grad_norm": 6.099603176116943, "learning_rate": 1.4962197094792252e-05, "loss": 1.6, "step": 120350 }, { "epoch": 0.756490483766425, "grad_norm": 6.089348793029785, "learning_rate": 1.49617779938476e-05, "loss": 1.4022, "step": 120360 }, { "epoch": 0.7565533360831221, "grad_norm": 6.879031181335449, "learning_rate": 1.4961358892902946e-05, "loss": 1.6911, "step": 120370 }, { "epoch": 0.7566161883998193, "grad_norm": 6.240353107452393, "learning_rate": 1.4960939791958293e-05, "loss": 1.7932, "step": 120380 }, { "epoch": 0.7566790407165164, "grad_norm": 6.377612590789795, "learning_rate": 1.4960520691013639e-05, "loss": 1.5124, "step": 120390 }, { "epoch": 0.7567418930332135, "grad_norm": 7.6781415939331055, "learning_rate": 1.496014350016345e-05, "loss": 1.5271, "step": 120400 }, { "epoch": 0.7568047453499106, "grad_norm": 7.081296920776367, "learning_rate": 1.4959724399218797e-05, "loss": 1.6089, "step": 120410 }, { "epoch": 0.7568675976666077, "grad_norm": 6.226444721221924, "learning_rate": 1.4959305298274143e-05, "loss": 1.4901, "step": 120420 }, { "epoch": 0.7569304499833048, "grad_norm": 6.075373649597168, "learning_rate": 1.495888619732949e-05, "loss": 1.7908, "step": 120430 }, { "epoch": 0.756993302300002, "grad_norm": 6.699268817901611, "learning_rate": 1.4958467096384837e-05, "loss": 1.7134, "step": 120440 }, { "epoch": 0.7570561546166991, "grad_norm": 6.797584533691406, "learning_rate": 1.4958047995440184e-05, "loss": 1.7966, "step": 120450 }, { "epoch": 0.7571190069333962, "grad_norm": 6.222202777862549, "learning_rate": 1.4957628894495528e-05, "loss": 1.5158, "step": 120460 }, { "epoch": 0.7571818592500933, "grad_norm": 5.914909839630127, "learning_rate": 1.4957209793550875e-05, "loss": 1.5251, "step": 120470 }, { "epoch": 0.7572447115667904, "grad_norm": 5.490307807922363, "learning_rate": 1.4956790692606222e-05, "loss": 1.6727, "step": 120480 }, { "epoch": 0.7573075638834875, "grad_norm": 6.202897548675537, "learning_rate": 1.4956371591661569e-05, "loss": 1.7193, "step": 120490 }, { "epoch": 0.7573704162001846, "grad_norm": 6.767843246459961, "learning_rate": 1.4955952490716916e-05, "loss": 1.6305, "step": 120500 }, { "epoch": 0.7574332685168818, "grad_norm": 6.597473621368408, "learning_rate": 1.4955533389772261e-05, "loss": 1.6484, "step": 120510 }, { "epoch": 0.7574961208335789, "grad_norm": 5.778598308563232, "learning_rate": 1.4955114288827608e-05, "loss": 1.6532, "step": 120520 }, { "epoch": 0.757558973150276, "grad_norm": 6.088018417358398, "learning_rate": 1.4954695187882955e-05, "loss": 1.7389, "step": 120530 }, { "epoch": 0.7576218254669731, "grad_norm": 6.116697788238525, "learning_rate": 1.4954276086938302e-05, "loss": 1.69, "step": 120540 }, { "epoch": 0.7576846777836702, "grad_norm": 5.867340087890625, "learning_rate": 1.4953856985993646e-05, "loss": 1.4857, "step": 120550 }, { "epoch": 0.7577475301003673, "grad_norm": 6.752843379974365, "learning_rate": 1.4953437885048993e-05, "loss": 1.5705, "step": 120560 }, { "epoch": 0.7578103824170644, "grad_norm": 5.58306884765625, "learning_rate": 1.495301878410434e-05, "loss": 1.4287, "step": 120570 }, { "epoch": 0.7578732347337616, "grad_norm": 6.616302490234375, "learning_rate": 1.4952599683159687e-05, "loss": 1.7193, "step": 120580 }, { "epoch": 0.7579360870504587, "grad_norm": 7.261279106140137, "learning_rate": 1.4952180582215034e-05, "loss": 1.7444, "step": 120590 }, { "epoch": 0.7579989393671558, "grad_norm": 6.934172630310059, "learning_rate": 1.495176148127038e-05, "loss": 1.602, "step": 120600 }, { "epoch": 0.7580617916838528, "grad_norm": 6.805965900421143, "learning_rate": 1.4951342380325727e-05, "loss": 1.6606, "step": 120610 }, { "epoch": 0.7581246440005499, "grad_norm": 6.8719282150268555, "learning_rate": 1.4950923279381074e-05, "loss": 1.5321, "step": 120620 }, { "epoch": 0.758187496317247, "grad_norm": 6.2738189697265625, "learning_rate": 1.4950504178436421e-05, "loss": 1.7606, "step": 120630 }, { "epoch": 0.7582503486339441, "grad_norm": 6.845386028289795, "learning_rate": 1.4950085077491765e-05, "loss": 1.5743, "step": 120640 }, { "epoch": 0.7583132009506413, "grad_norm": 6.946133136749268, "learning_rate": 1.4949665976547112e-05, "loss": 1.5849, "step": 120650 }, { "epoch": 0.7583760532673384, "grad_norm": 6.396969318389893, "learning_rate": 1.4949246875602459e-05, "loss": 1.5044, "step": 120660 }, { "epoch": 0.7584389055840355, "grad_norm": 5.696493148803711, "learning_rate": 1.4948827774657806e-05, "loss": 1.5822, "step": 120670 }, { "epoch": 0.7585017579007326, "grad_norm": 6.477611064910889, "learning_rate": 1.4948408673713153e-05, "loss": 1.6271, "step": 120680 }, { "epoch": 0.7585646102174297, "grad_norm": 6.455690860748291, "learning_rate": 1.4947989572768498e-05, "loss": 1.5837, "step": 120690 }, { "epoch": 0.7586274625341268, "grad_norm": 6.538627624511719, "learning_rate": 1.4947570471823845e-05, "loss": 1.7551, "step": 120700 }, { "epoch": 0.7586903148508239, "grad_norm": 5.95809268951416, "learning_rate": 1.494715137087919e-05, "loss": 1.7291, "step": 120710 }, { "epoch": 0.7587531671675211, "grad_norm": 6.785862445831299, "learning_rate": 1.4946732269934538e-05, "loss": 1.5176, "step": 120720 }, { "epoch": 0.7588160194842182, "grad_norm": 7.060803413391113, "learning_rate": 1.4946313168989883e-05, "loss": 1.714, "step": 120730 }, { "epoch": 0.7588788718009153, "grad_norm": 6.920099258422852, "learning_rate": 1.494589406804523e-05, "loss": 1.5574, "step": 120740 }, { "epoch": 0.7589417241176124, "grad_norm": 6.748926639556885, "learning_rate": 1.4945474967100577e-05, "loss": 1.6358, "step": 120750 }, { "epoch": 0.7590045764343095, "grad_norm": 6.599475860595703, "learning_rate": 1.4945055866155924e-05, "loss": 1.7025, "step": 120760 }, { "epoch": 0.7590674287510066, "grad_norm": 7.630424976348877, "learning_rate": 1.4944636765211268e-05, "loss": 1.5113, "step": 120770 }, { "epoch": 0.7591302810677037, "grad_norm": 6.237452983856201, "learning_rate": 1.4944217664266615e-05, "loss": 1.5949, "step": 120780 }, { "epoch": 0.7591931333844009, "grad_norm": 6.928414821624756, "learning_rate": 1.4943798563321962e-05, "loss": 1.4555, "step": 120790 }, { "epoch": 0.759255985701098, "grad_norm": 5.862486839294434, "learning_rate": 1.494337946237731e-05, "loss": 1.742, "step": 120800 }, { "epoch": 0.7593188380177951, "grad_norm": 5.778768539428711, "learning_rate": 1.4942960361432656e-05, "loss": 1.4755, "step": 120810 }, { "epoch": 0.7593816903344922, "grad_norm": 6.168492794036865, "learning_rate": 1.4942541260488002e-05, "loss": 1.6806, "step": 120820 }, { "epoch": 0.7594445426511893, "grad_norm": 6.794562339782715, "learning_rate": 1.4942122159543349e-05, "loss": 1.6003, "step": 120830 }, { "epoch": 0.7595073949678864, "grad_norm": 5.526988506317139, "learning_rate": 1.4941703058598696e-05, "loss": 1.6219, "step": 120840 }, { "epoch": 0.7595702472845836, "grad_norm": 6.265909671783447, "learning_rate": 1.4941283957654043e-05, "loss": 1.7621, "step": 120850 }, { "epoch": 0.7596330996012807, "grad_norm": 6.518588066101074, "learning_rate": 1.4940864856709387e-05, "loss": 1.4799, "step": 120860 }, { "epoch": 0.7596959519179777, "grad_norm": 7.25003719329834, "learning_rate": 1.4940445755764734e-05, "loss": 1.6973, "step": 120870 }, { "epoch": 0.7597588042346748, "grad_norm": 7.085589408874512, "learning_rate": 1.494002665482008e-05, "loss": 1.4295, "step": 120880 }, { "epoch": 0.7598216565513719, "grad_norm": 7.0398406982421875, "learning_rate": 1.4939607553875428e-05, "loss": 1.6731, "step": 120890 }, { "epoch": 0.759884508868069, "grad_norm": 5.306856155395508, "learning_rate": 1.4939188452930775e-05, "loss": 1.6904, "step": 120900 }, { "epoch": 0.7599473611847661, "grad_norm": 7.085124492645264, "learning_rate": 1.493876935198612e-05, "loss": 1.4914, "step": 120910 }, { "epoch": 0.7600102135014632, "grad_norm": 6.8141069412231445, "learning_rate": 1.4938350251041467e-05, "loss": 1.7216, "step": 120920 }, { "epoch": 0.7600730658181604, "grad_norm": 7.061438083648682, "learning_rate": 1.4937931150096814e-05, "loss": 1.6401, "step": 120930 }, { "epoch": 0.7601359181348575, "grad_norm": 6.064600467681885, "learning_rate": 1.4937512049152162e-05, "loss": 1.9376, "step": 120940 }, { "epoch": 0.7601987704515546, "grad_norm": 6.246726989746094, "learning_rate": 1.4937092948207505e-05, "loss": 1.8237, "step": 120950 }, { "epoch": 0.7602616227682517, "grad_norm": 10.821596145629883, "learning_rate": 1.4936673847262852e-05, "loss": 1.7269, "step": 120960 }, { "epoch": 0.7603244750849488, "grad_norm": 5.70432710647583, "learning_rate": 1.49362547463182e-05, "loss": 1.817, "step": 120970 }, { "epoch": 0.7603873274016459, "grad_norm": 6.571020126342773, "learning_rate": 1.4935835645373546e-05, "loss": 1.8675, "step": 120980 }, { "epoch": 0.7604501797183431, "grad_norm": 6.246038913726807, "learning_rate": 1.4935416544428893e-05, "loss": 1.5206, "step": 120990 }, { "epoch": 0.7605130320350402, "grad_norm": 5.737758636474609, "learning_rate": 1.4934997443484239e-05, "loss": 1.6251, "step": 121000 }, { "epoch": 0.7605758843517373, "grad_norm": 6.515652656555176, "learning_rate": 1.4934578342539586e-05, "loss": 1.4381, "step": 121010 }, { "epoch": 0.7606387366684344, "grad_norm": 6.005425930023193, "learning_rate": 1.4934159241594931e-05, "loss": 1.652, "step": 121020 }, { "epoch": 0.7607015889851315, "grad_norm": 6.146503925323486, "learning_rate": 1.4933740140650278e-05, "loss": 1.5105, "step": 121030 }, { "epoch": 0.7607644413018286, "grad_norm": 6.9523749351501465, "learning_rate": 1.4933321039705624e-05, "loss": 1.7115, "step": 121040 }, { "epoch": 0.7608272936185257, "grad_norm": 6.038151264190674, "learning_rate": 1.493290193876097e-05, "loss": 1.5377, "step": 121050 }, { "epoch": 0.7608901459352229, "grad_norm": 6.48395299911499, "learning_rate": 1.4932482837816318e-05, "loss": 1.7367, "step": 121060 }, { "epoch": 0.76095299825192, "grad_norm": 5.40743350982666, "learning_rate": 1.4932063736871665e-05, "loss": 1.6873, "step": 121070 }, { "epoch": 0.7610158505686171, "grad_norm": 5.547600269317627, "learning_rate": 1.493164463592701e-05, "loss": 1.546, "step": 121080 }, { "epoch": 0.7610787028853142, "grad_norm": 6.854653358459473, "learning_rate": 1.4931225534982356e-05, "loss": 1.6727, "step": 121090 }, { "epoch": 0.7611415552020113, "grad_norm": 7.525597095489502, "learning_rate": 1.4930806434037703e-05, "loss": 1.6834, "step": 121100 }, { "epoch": 0.7612044075187084, "grad_norm": 5.9096879959106445, "learning_rate": 1.493038733309305e-05, "loss": 1.9445, "step": 121110 }, { "epoch": 0.7612672598354054, "grad_norm": 6.041938304901123, "learning_rate": 1.4929968232148397e-05, "loss": 1.7379, "step": 121120 }, { "epoch": 0.7613301121521026, "grad_norm": 6.086507320404053, "learning_rate": 1.4929549131203742e-05, "loss": 1.6905, "step": 121130 }, { "epoch": 0.7613929644687997, "grad_norm": 6.364861965179443, "learning_rate": 1.492913003025909e-05, "loss": 1.6181, "step": 121140 }, { "epoch": 0.7614558167854968, "grad_norm": 5.678732395172119, "learning_rate": 1.4928710929314436e-05, "loss": 1.6366, "step": 121150 }, { "epoch": 0.7615186691021939, "grad_norm": 6.15526819229126, "learning_rate": 1.4928291828369784e-05, "loss": 1.6084, "step": 121160 }, { "epoch": 0.761581521418891, "grad_norm": 6.950185775756836, "learning_rate": 1.4927872727425127e-05, "loss": 1.6141, "step": 121170 }, { "epoch": 0.7616443737355881, "grad_norm": 7.115409851074219, "learning_rate": 1.4927453626480474e-05, "loss": 1.5466, "step": 121180 }, { "epoch": 0.7617072260522852, "grad_norm": 7.771196365356445, "learning_rate": 1.4927034525535821e-05, "loss": 1.633, "step": 121190 }, { "epoch": 0.7617700783689824, "grad_norm": 6.16953706741333, "learning_rate": 1.4926615424591168e-05, "loss": 1.725, "step": 121200 }, { "epoch": 0.7618329306856795, "grad_norm": 6.843245983123779, "learning_rate": 1.4926196323646515e-05, "loss": 1.595, "step": 121210 }, { "epoch": 0.7618957830023766, "grad_norm": 6.203823566436768, "learning_rate": 1.4925777222701861e-05, "loss": 1.4425, "step": 121220 }, { "epoch": 0.7619586353190737, "grad_norm": 6.376070976257324, "learning_rate": 1.4925358121757208e-05, "loss": 1.6897, "step": 121230 }, { "epoch": 0.7620214876357708, "grad_norm": 6.103188991546631, "learning_rate": 1.4924939020812555e-05, "loss": 1.6413, "step": 121240 }, { "epoch": 0.7620843399524679, "grad_norm": 5.701595783233643, "learning_rate": 1.4924519919867902e-05, "loss": 1.6432, "step": 121250 }, { "epoch": 0.762147192269165, "grad_norm": 7.023159980773926, "learning_rate": 1.4924100818923246e-05, "loss": 1.5428, "step": 121260 }, { "epoch": 0.7622100445858622, "grad_norm": 7.466994285583496, "learning_rate": 1.4923681717978593e-05, "loss": 1.9101, "step": 121270 }, { "epoch": 0.7622728969025593, "grad_norm": 5.344905853271484, "learning_rate": 1.492326261703394e-05, "loss": 1.2814, "step": 121280 }, { "epoch": 0.7623357492192564, "grad_norm": 6.5035719871521, "learning_rate": 1.4922843516089287e-05, "loss": 1.5778, "step": 121290 }, { "epoch": 0.7623986015359535, "grad_norm": 6.424167156219482, "learning_rate": 1.4922424415144634e-05, "loss": 1.7851, "step": 121300 }, { "epoch": 0.7624614538526506, "grad_norm": 7.77842903137207, "learning_rate": 1.492200531419998e-05, "loss": 1.6441, "step": 121310 }, { "epoch": 0.7625243061693477, "grad_norm": 5.498336315155029, "learning_rate": 1.4921586213255326e-05, "loss": 1.5566, "step": 121320 }, { "epoch": 0.7625871584860449, "grad_norm": 6.364030838012695, "learning_rate": 1.4921167112310672e-05, "loss": 1.9447, "step": 121330 }, { "epoch": 0.762650010802742, "grad_norm": 5.463682651519775, "learning_rate": 1.4920748011366019e-05, "loss": 1.482, "step": 121340 }, { "epoch": 0.7627128631194391, "grad_norm": 6.897846698760986, "learning_rate": 1.4920328910421364e-05, "loss": 1.4963, "step": 121350 }, { "epoch": 0.7627757154361362, "grad_norm": 5.895562171936035, "learning_rate": 1.4919909809476711e-05, "loss": 1.7759, "step": 121360 }, { "epoch": 0.7628385677528333, "grad_norm": 6.170264720916748, "learning_rate": 1.4919490708532058e-05, "loss": 1.6302, "step": 121370 }, { "epoch": 0.7629014200695303, "grad_norm": 6.082094192504883, "learning_rate": 1.4919071607587406e-05, "loss": 1.5568, "step": 121380 }, { "epoch": 0.7629642723862274, "grad_norm": 6.674420356750488, "learning_rate": 1.4918652506642751e-05, "loss": 1.7198, "step": 121390 }, { "epoch": 0.7630271247029246, "grad_norm": 6.145451068878174, "learning_rate": 1.4918233405698096e-05, "loss": 1.4018, "step": 121400 }, { "epoch": 0.7630899770196217, "grad_norm": 7.611517906188965, "learning_rate": 1.4917814304753443e-05, "loss": 1.8585, "step": 121410 }, { "epoch": 0.7631528293363188, "grad_norm": 6.481110572814941, "learning_rate": 1.491739520380879e-05, "loss": 1.6318, "step": 121420 }, { "epoch": 0.7632156816530159, "grad_norm": 6.759619235992432, "learning_rate": 1.4916976102864137e-05, "loss": 1.7117, "step": 121430 }, { "epoch": 0.763278533969713, "grad_norm": 6.643283367156982, "learning_rate": 1.4916557001919483e-05, "loss": 1.5947, "step": 121440 }, { "epoch": 0.7633413862864101, "grad_norm": 6.595210075378418, "learning_rate": 1.491613790097483e-05, "loss": 1.604, "step": 121450 }, { "epoch": 0.7634042386031072, "grad_norm": 6.565183639526367, "learning_rate": 1.4915718800030177e-05, "loss": 1.6944, "step": 121460 }, { "epoch": 0.7634670909198044, "grad_norm": 5.534493923187256, "learning_rate": 1.4915299699085524e-05, "loss": 1.7532, "step": 121470 }, { "epoch": 0.7635299432365015, "grad_norm": 6.0962443351745605, "learning_rate": 1.4914880598140868e-05, "loss": 1.6787, "step": 121480 }, { "epoch": 0.7635927955531986, "grad_norm": 6.684833526611328, "learning_rate": 1.4914461497196215e-05, "loss": 1.6874, "step": 121490 }, { "epoch": 0.7636556478698957, "grad_norm": 6.234200477600098, "learning_rate": 1.4914042396251562e-05, "loss": 1.4909, "step": 121500 }, { "epoch": 0.7637185001865928, "grad_norm": 6.338442802429199, "learning_rate": 1.4913623295306909e-05, "loss": 1.5812, "step": 121510 }, { "epoch": 0.7637813525032899, "grad_norm": 6.367171287536621, "learning_rate": 1.4913204194362256e-05, "loss": 1.6601, "step": 121520 }, { "epoch": 0.763844204819987, "grad_norm": 5.642130374908447, "learning_rate": 1.4912785093417601e-05, "loss": 1.6207, "step": 121530 }, { "epoch": 0.7639070571366842, "grad_norm": 6.396183967590332, "learning_rate": 1.4912365992472949e-05, "loss": 1.5957, "step": 121540 }, { "epoch": 0.7639699094533813, "grad_norm": 7.066701412200928, "learning_rate": 1.4911946891528296e-05, "loss": 1.7143, "step": 121550 }, { "epoch": 0.7640327617700784, "grad_norm": 5.673250675201416, "learning_rate": 1.4911527790583643e-05, "loss": 1.5213, "step": 121560 }, { "epoch": 0.7640956140867755, "grad_norm": 6.310756683349609, "learning_rate": 1.4911108689638986e-05, "loss": 1.9226, "step": 121570 }, { "epoch": 0.7641584664034726, "grad_norm": 7.087517261505127, "learning_rate": 1.4910689588694333e-05, "loss": 1.6778, "step": 121580 }, { "epoch": 0.7642213187201697, "grad_norm": 7.2179694175720215, "learning_rate": 1.491027048774968e-05, "loss": 1.7289, "step": 121590 }, { "epoch": 0.7642841710368669, "grad_norm": 7.069692611694336, "learning_rate": 1.4909851386805028e-05, "loss": 1.7558, "step": 121600 }, { "epoch": 0.764347023353564, "grad_norm": 7.267943859100342, "learning_rate": 1.4909432285860375e-05, "loss": 1.7601, "step": 121610 }, { "epoch": 0.7644098756702611, "grad_norm": 7.698588848114014, "learning_rate": 1.490901318491572e-05, "loss": 1.5349, "step": 121620 }, { "epoch": 0.7644727279869581, "grad_norm": 7.444325923919678, "learning_rate": 1.4908594083971067e-05, "loss": 1.6643, "step": 121630 }, { "epoch": 0.7645355803036552, "grad_norm": 6.691740989685059, "learning_rate": 1.4908174983026414e-05, "loss": 1.7189, "step": 121640 }, { "epoch": 0.7645984326203523, "grad_norm": 6.304153919219971, "learning_rate": 1.490775588208176e-05, "loss": 1.5654, "step": 121650 }, { "epoch": 0.7646612849370494, "grad_norm": 6.096138954162598, "learning_rate": 1.4907336781137105e-05, "loss": 1.6734, "step": 121660 }, { "epoch": 0.7647241372537465, "grad_norm": 6.932074069976807, "learning_rate": 1.4906917680192452e-05, "loss": 1.9373, "step": 121670 }, { "epoch": 0.7647869895704437, "grad_norm": 6.2231221199035645, "learning_rate": 1.4906498579247799e-05, "loss": 1.4769, "step": 121680 }, { "epoch": 0.7648498418871408, "grad_norm": 4.926756381988525, "learning_rate": 1.4906079478303146e-05, "loss": 1.5477, "step": 121690 }, { "epoch": 0.7649126942038379, "grad_norm": 7.400648593902588, "learning_rate": 1.4905660377358491e-05, "loss": 1.7029, "step": 121700 }, { "epoch": 0.764975546520535, "grad_norm": 6.2708210945129395, "learning_rate": 1.4905241276413837e-05, "loss": 1.5563, "step": 121710 }, { "epoch": 0.7650383988372321, "grad_norm": 5.93113899230957, "learning_rate": 1.4904822175469184e-05, "loss": 1.536, "step": 121720 }, { "epoch": 0.7651012511539292, "grad_norm": 6.963664531707764, "learning_rate": 1.4904403074524531e-05, "loss": 1.6805, "step": 121730 }, { "epoch": 0.7651641034706264, "grad_norm": 6.695322036743164, "learning_rate": 1.4903983973579878e-05, "loss": 1.7509, "step": 121740 }, { "epoch": 0.7652269557873235, "grad_norm": 7.450059413909912, "learning_rate": 1.4903564872635223e-05, "loss": 1.7181, "step": 121750 }, { "epoch": 0.7652898081040206, "grad_norm": 7.113367080688477, "learning_rate": 1.490314577169057e-05, "loss": 1.6927, "step": 121760 }, { "epoch": 0.7653526604207177, "grad_norm": 8.61163330078125, "learning_rate": 1.4902726670745918e-05, "loss": 1.7477, "step": 121770 }, { "epoch": 0.7654155127374148, "grad_norm": 6.27892541885376, "learning_rate": 1.4902307569801265e-05, "loss": 1.6296, "step": 121780 }, { "epoch": 0.7654783650541119, "grad_norm": 7.385900497436523, "learning_rate": 1.4901888468856608e-05, "loss": 1.4883, "step": 121790 }, { "epoch": 0.765541217370809, "grad_norm": 6.854226112365723, "learning_rate": 1.4901469367911955e-05, "loss": 1.8356, "step": 121800 }, { "epoch": 0.7656040696875062, "grad_norm": 7.135695457458496, "learning_rate": 1.4901050266967302e-05, "loss": 1.5556, "step": 121810 }, { "epoch": 0.7656669220042033, "grad_norm": 6.962805271148682, "learning_rate": 1.490063116602265e-05, "loss": 1.522, "step": 121820 }, { "epoch": 0.7657297743209004, "grad_norm": 6.554911136627197, "learning_rate": 1.4900212065077997e-05, "loss": 1.7597, "step": 121830 }, { "epoch": 0.7657926266375975, "grad_norm": 6.736742973327637, "learning_rate": 1.4899792964133342e-05, "loss": 1.7353, "step": 121840 }, { "epoch": 0.7658554789542946, "grad_norm": 6.078730583190918, "learning_rate": 1.4899373863188689e-05, "loss": 1.5013, "step": 121850 }, { "epoch": 0.7659183312709917, "grad_norm": 6.418303489685059, "learning_rate": 1.4898954762244036e-05, "loss": 1.5643, "step": 121860 }, { "epoch": 0.7659811835876889, "grad_norm": 6.1626996994018555, "learning_rate": 1.4898535661299383e-05, "loss": 1.6183, "step": 121870 }, { "epoch": 0.766044035904386, "grad_norm": 6.378403186798096, "learning_rate": 1.4898116560354727e-05, "loss": 1.4926, "step": 121880 }, { "epoch": 0.766106888221083, "grad_norm": 6.052713394165039, "learning_rate": 1.4897697459410074e-05, "loss": 1.7107, "step": 121890 }, { "epoch": 0.7661697405377801, "grad_norm": 5.871499061584473, "learning_rate": 1.4897278358465421e-05, "loss": 1.6582, "step": 121900 }, { "epoch": 0.7662325928544772, "grad_norm": 7.141749858856201, "learning_rate": 1.4896859257520768e-05, "loss": 1.647, "step": 121910 }, { "epoch": 0.7662954451711743, "grad_norm": 6.619952201843262, "learning_rate": 1.4896440156576115e-05, "loss": 1.5802, "step": 121920 }, { "epoch": 0.7663582974878714, "grad_norm": 6.620635509490967, "learning_rate": 1.489602105563146e-05, "loss": 1.5038, "step": 121930 }, { "epoch": 0.7664211498045685, "grad_norm": 6.27406644821167, "learning_rate": 1.4895601954686808e-05, "loss": 1.7344, "step": 121940 }, { "epoch": 0.7664840021212657, "grad_norm": 6.9420270919799805, "learning_rate": 1.4895182853742155e-05, "loss": 1.6119, "step": 121950 }, { "epoch": 0.7665468544379628, "grad_norm": 5.954476833343506, "learning_rate": 1.48947637527975e-05, "loss": 1.3455, "step": 121960 }, { "epoch": 0.7666097067546599, "grad_norm": 5.4925127029418945, "learning_rate": 1.4894344651852845e-05, "loss": 1.3755, "step": 121970 }, { "epoch": 0.766672559071357, "grad_norm": 6.041146755218506, "learning_rate": 1.4893925550908193e-05, "loss": 1.5138, "step": 121980 }, { "epoch": 0.7667354113880541, "grad_norm": 6.702816963195801, "learning_rate": 1.489350644996354e-05, "loss": 1.6429, "step": 121990 }, { "epoch": 0.7667982637047512, "grad_norm": 5.840723037719727, "learning_rate": 1.4893087349018887e-05, "loss": 1.5491, "step": 122000 }, { "epoch": 0.7668611160214484, "grad_norm": 6.275969505310059, "learning_rate": 1.4892668248074232e-05, "loss": 1.6287, "step": 122010 }, { "epoch": 0.7669239683381455, "grad_norm": 5.27000093460083, "learning_rate": 1.4892249147129579e-05, "loss": 1.4055, "step": 122020 }, { "epoch": 0.7669868206548426, "grad_norm": 7.497596740722656, "learning_rate": 1.4891830046184924e-05, "loss": 1.6989, "step": 122030 }, { "epoch": 0.7670496729715397, "grad_norm": 6.702113151550293, "learning_rate": 1.4891410945240272e-05, "loss": 1.7854, "step": 122040 }, { "epoch": 0.7671125252882368, "grad_norm": 6.365345478057861, "learning_rate": 1.4890991844295619e-05, "loss": 1.5324, "step": 122050 }, { "epoch": 0.7671753776049339, "grad_norm": 6.774776458740234, "learning_rate": 1.4890572743350964e-05, "loss": 1.493, "step": 122060 }, { "epoch": 0.767238229921631, "grad_norm": 7.4185285568237305, "learning_rate": 1.4890153642406311e-05, "loss": 1.7349, "step": 122070 }, { "epoch": 0.7673010822383282, "grad_norm": 6.927353382110596, "learning_rate": 1.4889734541461658e-05, "loss": 1.6425, "step": 122080 }, { "epoch": 0.7673639345550253, "grad_norm": 5.199800491333008, "learning_rate": 1.4889315440517005e-05, "loss": 1.4486, "step": 122090 }, { "epoch": 0.7674267868717224, "grad_norm": 6.803194999694824, "learning_rate": 1.4888938249666815e-05, "loss": 1.7102, "step": 122100 }, { "epoch": 0.7674896391884195, "grad_norm": 6.105362415313721, "learning_rate": 1.4888519148722162e-05, "loss": 1.4752, "step": 122110 }, { "epoch": 0.7675524915051166, "grad_norm": 7.666367530822754, "learning_rate": 1.4888100047777509e-05, "loss": 1.6505, "step": 122120 }, { "epoch": 0.7676153438218137, "grad_norm": 6.379772186279297, "learning_rate": 1.4887680946832856e-05, "loss": 1.5318, "step": 122130 }, { "epoch": 0.7676781961385107, "grad_norm": 6.910738945007324, "learning_rate": 1.4887261845888201e-05, "loss": 1.5936, "step": 122140 }, { "epoch": 0.7677410484552079, "grad_norm": 5.412835597991943, "learning_rate": 1.4886842744943549e-05, "loss": 1.6097, "step": 122150 }, { "epoch": 0.767803900771905, "grad_norm": 6.722506046295166, "learning_rate": 1.4886423643998896e-05, "loss": 1.6577, "step": 122160 }, { "epoch": 0.7678667530886021, "grad_norm": 6.645995616912842, "learning_rate": 1.4886004543054243e-05, "loss": 1.4969, "step": 122170 }, { "epoch": 0.7679296054052992, "grad_norm": 5.618885517120361, "learning_rate": 1.4885585442109586e-05, "loss": 1.6188, "step": 122180 }, { "epoch": 0.7679924577219963, "grad_norm": 6.013381481170654, "learning_rate": 1.4885166341164933e-05, "loss": 1.5751, "step": 122190 }, { "epoch": 0.7680553100386934, "grad_norm": 6.78753137588501, "learning_rate": 1.488474724022028e-05, "loss": 1.6351, "step": 122200 }, { "epoch": 0.7681181623553905, "grad_norm": 6.380014419555664, "learning_rate": 1.4884328139275628e-05, "loss": 1.741, "step": 122210 }, { "epoch": 0.7681810146720877, "grad_norm": 6.304038047790527, "learning_rate": 1.4883909038330973e-05, "loss": 1.8592, "step": 122220 }, { "epoch": 0.7682438669887848, "grad_norm": 5.757291316986084, "learning_rate": 1.488348993738632e-05, "loss": 1.4222, "step": 122230 }, { "epoch": 0.7683067193054819, "grad_norm": 7.418156147003174, "learning_rate": 1.4883070836441667e-05, "loss": 1.5522, "step": 122240 }, { "epoch": 0.768369571622179, "grad_norm": 6.327040195465088, "learning_rate": 1.4882651735497014e-05, "loss": 1.8053, "step": 122250 }, { "epoch": 0.7684324239388761, "grad_norm": 5.413448810577393, "learning_rate": 1.4882232634552361e-05, "loss": 1.4365, "step": 122260 }, { "epoch": 0.7684952762555732, "grad_norm": 7.084179878234863, "learning_rate": 1.4881813533607705e-05, "loss": 1.5816, "step": 122270 }, { "epoch": 0.7685581285722703, "grad_norm": 6.731881141662598, "learning_rate": 1.4881394432663052e-05, "loss": 1.4995, "step": 122280 }, { "epoch": 0.7686209808889675, "grad_norm": 7.3260498046875, "learning_rate": 1.4880975331718399e-05, "loss": 1.361, "step": 122290 }, { "epoch": 0.7686838332056646, "grad_norm": 6.583715915679932, "learning_rate": 1.4880556230773746e-05, "loss": 1.752, "step": 122300 }, { "epoch": 0.7687466855223617, "grad_norm": 6.189996242523193, "learning_rate": 1.4880137129829091e-05, "loss": 1.7227, "step": 122310 }, { "epoch": 0.7688095378390588, "grad_norm": 7.296090602874756, "learning_rate": 1.4879718028884439e-05, "loss": 1.8045, "step": 122320 }, { "epoch": 0.7688723901557559, "grad_norm": 6.088799953460693, "learning_rate": 1.4879298927939786e-05, "loss": 1.5412, "step": 122330 }, { "epoch": 0.768935242472453, "grad_norm": 6.83470344543457, "learning_rate": 1.4878879826995131e-05, "loss": 1.5028, "step": 122340 }, { "epoch": 0.7689980947891502, "grad_norm": 6.843228340148926, "learning_rate": 1.4878460726050478e-05, "loss": 1.4771, "step": 122350 }, { "epoch": 0.7690609471058473, "grad_norm": 6.270680904388428, "learning_rate": 1.4878041625105823e-05, "loss": 1.6067, "step": 122360 }, { "epoch": 0.7691237994225444, "grad_norm": 23.329750061035156, "learning_rate": 1.487762252416117e-05, "loss": 1.5755, "step": 122370 }, { "epoch": 0.7691866517392415, "grad_norm": 6.056442737579346, "learning_rate": 1.4877203423216518e-05, "loss": 1.6251, "step": 122380 }, { "epoch": 0.7692495040559386, "grad_norm": 5.918461322784424, "learning_rate": 1.4876784322271865e-05, "loss": 1.5211, "step": 122390 }, { "epoch": 0.7693123563726356, "grad_norm": 5.789246559143066, "learning_rate": 1.4876365221327208e-05, "loss": 1.8626, "step": 122400 }, { "epoch": 0.7693752086893327, "grad_norm": 6.11323356628418, "learning_rate": 1.4875946120382555e-05, "loss": 1.4109, "step": 122410 }, { "epoch": 0.7694380610060298, "grad_norm": 7.065954208374023, "learning_rate": 1.4875527019437902e-05, "loss": 1.6697, "step": 122420 }, { "epoch": 0.769500913322727, "grad_norm": 5.824702739715576, "learning_rate": 1.487510791849325e-05, "loss": 1.6175, "step": 122430 }, { "epoch": 0.7695637656394241, "grad_norm": 6.183588981628418, "learning_rate": 1.4874688817548597e-05, "loss": 1.5873, "step": 122440 }, { "epoch": 0.7696266179561212, "grad_norm": 6.583012580871582, "learning_rate": 1.4874269716603942e-05, "loss": 1.7353, "step": 122450 }, { "epoch": 0.7696894702728183, "grad_norm": 6.278700828552246, "learning_rate": 1.4873850615659289e-05, "loss": 1.5125, "step": 122460 }, { "epoch": 0.7697523225895154, "grad_norm": 6.665764808654785, "learning_rate": 1.4873431514714636e-05, "loss": 1.6511, "step": 122470 }, { "epoch": 0.7698151749062125, "grad_norm": 7.0793657302856445, "learning_rate": 1.4873012413769983e-05, "loss": 1.4307, "step": 122480 }, { "epoch": 0.7698780272229097, "grad_norm": 6.780641078948975, "learning_rate": 1.4872593312825327e-05, "loss": 1.72, "step": 122490 }, { "epoch": 0.7699408795396068, "grad_norm": 6.398270606994629, "learning_rate": 1.4872174211880674e-05, "loss": 1.4372, "step": 122500 }, { "epoch": 0.7700037318563039, "grad_norm": 7.318230152130127, "learning_rate": 1.4871755110936021e-05, "loss": 1.6557, "step": 122510 }, { "epoch": 0.770066584173001, "grad_norm": 4.567630290985107, "learning_rate": 1.4871336009991368e-05, "loss": 1.5319, "step": 122520 }, { "epoch": 0.7701294364896981, "grad_norm": 6.257627964019775, "learning_rate": 1.4870916909046713e-05, "loss": 1.7295, "step": 122530 }, { "epoch": 0.7701922888063952, "grad_norm": 6.4811201095581055, "learning_rate": 1.487049780810206e-05, "loss": 1.533, "step": 122540 }, { "epoch": 0.7702551411230923, "grad_norm": 6.295751571655273, "learning_rate": 1.4870078707157408e-05, "loss": 1.5123, "step": 122550 }, { "epoch": 0.7703179934397895, "grad_norm": 6.236231327056885, "learning_rate": 1.4869659606212755e-05, "loss": 1.6454, "step": 122560 }, { "epoch": 0.7703808457564866, "grad_norm": 6.155300140380859, "learning_rate": 1.4869240505268102e-05, "loss": 1.7777, "step": 122570 }, { "epoch": 0.7704436980731837, "grad_norm": 7.4999680519104, "learning_rate": 1.4868821404323445e-05, "loss": 1.8179, "step": 122580 }, { "epoch": 0.7705065503898808, "grad_norm": 6.615935325622559, "learning_rate": 1.4868402303378793e-05, "loss": 1.7054, "step": 122590 }, { "epoch": 0.7705694027065779, "grad_norm": 6.052435874938965, "learning_rate": 1.486798320243414e-05, "loss": 1.3716, "step": 122600 }, { "epoch": 0.770632255023275, "grad_norm": 6.248154640197754, "learning_rate": 1.4867564101489487e-05, "loss": 1.6163, "step": 122610 }, { "epoch": 0.7706951073399722, "grad_norm": 6.153942108154297, "learning_rate": 1.4867145000544832e-05, "loss": 1.5133, "step": 122620 }, { "epoch": 0.7707579596566693, "grad_norm": 6.781783103942871, "learning_rate": 1.4866725899600179e-05, "loss": 1.5677, "step": 122630 }, { "epoch": 0.7708208119733664, "grad_norm": 6.606148719787598, "learning_rate": 1.4866306798655526e-05, "loss": 1.5422, "step": 122640 }, { "epoch": 0.7708836642900634, "grad_norm": 6.399189472198486, "learning_rate": 1.4865887697710872e-05, "loss": 1.7016, "step": 122650 }, { "epoch": 0.7709465166067605, "grad_norm": 6.381160259246826, "learning_rate": 1.4865468596766219e-05, "loss": 1.5201, "step": 122660 }, { "epoch": 0.7710093689234576, "grad_norm": 5.457291126251221, "learning_rate": 1.4865049495821564e-05, "loss": 1.626, "step": 122670 }, { "epoch": 0.7710722212401547, "grad_norm": 7.976124286651611, "learning_rate": 1.4864630394876911e-05, "loss": 1.3152, "step": 122680 }, { "epoch": 0.7711350735568518, "grad_norm": 6.42151403427124, "learning_rate": 1.4864211293932258e-05, "loss": 1.5678, "step": 122690 }, { "epoch": 0.771197925873549, "grad_norm": 10.819112777709961, "learning_rate": 1.4863792192987605e-05, "loss": 1.5978, "step": 122700 }, { "epoch": 0.7712607781902461, "grad_norm": 6.606426239013672, "learning_rate": 1.486337309204295e-05, "loss": 1.6483, "step": 122710 }, { "epoch": 0.7713236305069432, "grad_norm": 6.718689441680908, "learning_rate": 1.4862953991098296e-05, "loss": 1.6981, "step": 122720 }, { "epoch": 0.7713864828236403, "grad_norm": 7.290799617767334, "learning_rate": 1.4862534890153643e-05, "loss": 1.558, "step": 122730 }, { "epoch": 0.7714493351403374, "grad_norm": 6.368934154510498, "learning_rate": 1.486211578920899e-05, "loss": 1.7425, "step": 122740 }, { "epoch": 0.7715121874570345, "grad_norm": 6.9713897705078125, "learning_rate": 1.4861696688264337e-05, "loss": 1.9851, "step": 122750 }, { "epoch": 0.7715750397737317, "grad_norm": 7.266341686248779, "learning_rate": 1.4861277587319683e-05, "loss": 1.619, "step": 122760 }, { "epoch": 0.7716378920904288, "grad_norm": 6.003509998321533, "learning_rate": 1.486085848637503e-05, "loss": 1.665, "step": 122770 }, { "epoch": 0.7717007444071259, "grad_norm": 6.83775520324707, "learning_rate": 1.4860439385430377e-05, "loss": 1.6166, "step": 122780 }, { "epoch": 0.771763596723823, "grad_norm": 7.0539422035217285, "learning_rate": 1.4860020284485724e-05, "loss": 1.454, "step": 122790 }, { "epoch": 0.7718264490405201, "grad_norm": 5.957339763641357, "learning_rate": 1.4859601183541067e-05, "loss": 1.6903, "step": 122800 }, { "epoch": 0.7718893013572172, "grad_norm": 5.7905192375183105, "learning_rate": 1.4859182082596415e-05, "loss": 1.3768, "step": 122810 }, { "epoch": 0.7719521536739143, "grad_norm": 7.8568010330200195, "learning_rate": 1.4858762981651762e-05, "loss": 1.6476, "step": 122820 }, { "epoch": 0.7720150059906115, "grad_norm": 4.974095344543457, "learning_rate": 1.4858343880707109e-05, "loss": 1.4265, "step": 122830 }, { "epoch": 0.7720778583073086, "grad_norm": 6.108072280883789, "learning_rate": 1.4857924779762454e-05, "loss": 2.0263, "step": 122840 }, { "epoch": 0.7721407106240057, "grad_norm": 6.359543323516846, "learning_rate": 1.4857505678817801e-05, "loss": 1.6471, "step": 122850 }, { "epoch": 0.7722035629407028, "grad_norm": 8.34288215637207, "learning_rate": 1.4857086577873148e-05, "loss": 1.6121, "step": 122860 }, { "epoch": 0.7722664152573999, "grad_norm": 6.9957275390625, "learning_rate": 1.4856667476928495e-05, "loss": 1.8025, "step": 122870 }, { "epoch": 0.772329267574097, "grad_norm": 7.1086201667785645, "learning_rate": 1.4856248375983842e-05, "loss": 1.6208, "step": 122880 }, { "epoch": 0.7723921198907941, "grad_norm": 6.970386981964111, "learning_rate": 1.4855829275039186e-05, "loss": 1.6963, "step": 122890 }, { "epoch": 0.7724549722074913, "grad_norm": 6.1792378425598145, "learning_rate": 1.4855410174094533e-05, "loss": 1.6584, "step": 122900 }, { "epoch": 0.7725178245241883, "grad_norm": 7.270213603973389, "learning_rate": 1.485499107314988e-05, "loss": 1.6117, "step": 122910 }, { "epoch": 0.7725806768408854, "grad_norm": 6.2892351150512695, "learning_rate": 1.4854571972205227e-05, "loss": 1.5591, "step": 122920 }, { "epoch": 0.7726435291575825, "grad_norm": 6.083685874938965, "learning_rate": 1.4854152871260573e-05, "loss": 1.5936, "step": 122930 }, { "epoch": 0.7727063814742796, "grad_norm": 6.550130844116211, "learning_rate": 1.485373377031592e-05, "loss": 1.6718, "step": 122940 }, { "epoch": 0.7727692337909767, "grad_norm": 5.467560768127441, "learning_rate": 1.4853314669371267e-05, "loss": 1.3637, "step": 122950 }, { "epoch": 0.7728320861076738, "grad_norm": 6.171477794647217, "learning_rate": 1.4852895568426614e-05, "loss": 1.7116, "step": 122960 }, { "epoch": 0.772894938424371, "grad_norm": 6.136664867401123, "learning_rate": 1.485247646748196e-05, "loss": 1.5895, "step": 122970 }, { "epoch": 0.7729577907410681, "grad_norm": 7.264563083648682, "learning_rate": 1.4852057366537305e-05, "loss": 1.7034, "step": 122980 }, { "epoch": 0.7730206430577652, "grad_norm": 6.602540016174316, "learning_rate": 1.4851638265592652e-05, "loss": 1.648, "step": 122990 }, { "epoch": 0.7730834953744623, "grad_norm": 8.665788650512695, "learning_rate": 1.4851219164647999e-05, "loss": 1.6093, "step": 123000 }, { "epoch": 0.7731463476911594, "grad_norm": 7.170557975769043, "learning_rate": 1.4850800063703346e-05, "loss": 1.5464, "step": 123010 }, { "epoch": 0.7732092000078565, "grad_norm": 6.581963539123535, "learning_rate": 1.4850380962758691e-05, "loss": 1.6343, "step": 123020 }, { "epoch": 0.7732720523245536, "grad_norm": 6.883604526519775, "learning_rate": 1.4849961861814037e-05, "loss": 1.4948, "step": 123030 }, { "epoch": 0.7733349046412508, "grad_norm": 6.112658977508545, "learning_rate": 1.4849542760869384e-05, "loss": 1.3697, "step": 123040 }, { "epoch": 0.7733977569579479, "grad_norm": 7.500575542449951, "learning_rate": 1.484912365992473e-05, "loss": 1.8255, "step": 123050 }, { "epoch": 0.773460609274645, "grad_norm": 7.116211891174316, "learning_rate": 1.4848704558980078e-05, "loss": 1.5943, "step": 123060 }, { "epoch": 0.7735234615913421, "grad_norm": 6.220788478851318, "learning_rate": 1.4848285458035423e-05, "loss": 1.4484, "step": 123070 }, { "epoch": 0.7735863139080392, "grad_norm": 6.182985305786133, "learning_rate": 1.484786635709077e-05, "loss": 1.4905, "step": 123080 }, { "epoch": 0.7736491662247363, "grad_norm": 6.292008876800537, "learning_rate": 1.4847447256146117e-05, "loss": 1.6471, "step": 123090 }, { "epoch": 0.7737120185414335, "grad_norm": 5.59975004196167, "learning_rate": 1.4847028155201464e-05, "loss": 1.6638, "step": 123100 }, { "epoch": 0.7737748708581306, "grad_norm": 6.636292457580566, "learning_rate": 1.4846609054256808e-05, "loss": 1.6325, "step": 123110 }, { "epoch": 0.7738377231748277, "grad_norm": 5.51218318939209, "learning_rate": 1.4846189953312155e-05, "loss": 1.7696, "step": 123120 }, { "epoch": 0.7739005754915248, "grad_norm": 6.089381694793701, "learning_rate": 1.4845770852367502e-05, "loss": 1.5208, "step": 123130 }, { "epoch": 0.7739634278082219, "grad_norm": 6.221317768096924, "learning_rate": 1.484535175142285e-05, "loss": 1.5267, "step": 123140 }, { "epoch": 0.774026280124919, "grad_norm": 5.6198601722717285, "learning_rate": 1.4844932650478195e-05, "loss": 1.5002, "step": 123150 }, { "epoch": 0.7740891324416161, "grad_norm": 7.691695690155029, "learning_rate": 1.4844513549533542e-05, "loss": 1.6479, "step": 123160 }, { "epoch": 0.7741519847583131, "grad_norm": 6.768416404724121, "learning_rate": 1.4844094448588889e-05, "loss": 1.2711, "step": 123170 }, { "epoch": 0.7742148370750103, "grad_norm": 7.375920295715332, "learning_rate": 1.4843675347644236e-05, "loss": 1.5467, "step": 123180 }, { "epoch": 0.7742776893917074, "grad_norm": 6.8250346183776855, "learning_rate": 1.4843256246699583e-05, "loss": 1.561, "step": 123190 }, { "epoch": 0.7743405417084045, "grad_norm": 7.026341438293457, "learning_rate": 1.4842837145754927e-05, "loss": 1.823, "step": 123200 }, { "epoch": 0.7744033940251016, "grad_norm": 6.942145347595215, "learning_rate": 1.4842418044810274e-05, "loss": 1.6228, "step": 123210 }, { "epoch": 0.7744662463417987, "grad_norm": 6.909519672393799, "learning_rate": 1.484199894386562e-05, "loss": 1.6935, "step": 123220 }, { "epoch": 0.7745290986584958, "grad_norm": 6.394974708557129, "learning_rate": 1.4841579842920968e-05, "loss": 1.7623, "step": 123230 }, { "epoch": 0.774591950975193, "grad_norm": 6.175522804260254, "learning_rate": 1.4841160741976313e-05, "loss": 1.4766, "step": 123240 }, { "epoch": 0.7746548032918901, "grad_norm": 6.601080417633057, "learning_rate": 1.484074164103166e-05, "loss": 1.6036, "step": 123250 }, { "epoch": 0.7747176556085872, "grad_norm": 6.722240924835205, "learning_rate": 1.4840322540087007e-05, "loss": 1.3793, "step": 123260 }, { "epoch": 0.7747805079252843, "grad_norm": 6.2471232414245605, "learning_rate": 1.4839903439142354e-05, "loss": 1.4916, "step": 123270 }, { "epoch": 0.7748433602419814, "grad_norm": 5.863661289215088, "learning_rate": 1.48394843381977e-05, "loss": 1.817, "step": 123280 }, { "epoch": 0.7749062125586785, "grad_norm": 6.506509780883789, "learning_rate": 1.4839065237253045e-05, "loss": 1.4565, "step": 123290 }, { "epoch": 0.7749690648753756, "grad_norm": 7.0432963371276855, "learning_rate": 1.4838646136308392e-05, "loss": 1.5648, "step": 123300 }, { "epoch": 0.7750319171920728, "grad_norm": 6.736435890197754, "learning_rate": 1.483822703536374e-05, "loss": 1.646, "step": 123310 }, { "epoch": 0.7750947695087699, "grad_norm": 6.402501106262207, "learning_rate": 1.4837807934419086e-05, "loss": 1.6051, "step": 123320 }, { "epoch": 0.775157621825467, "grad_norm": 6.6225409507751465, "learning_rate": 1.4837388833474432e-05, "loss": 1.7524, "step": 123330 }, { "epoch": 0.7752204741421641, "grad_norm": 5.071566104888916, "learning_rate": 1.4836969732529777e-05, "loss": 1.4562, "step": 123340 }, { "epoch": 0.7752833264588612, "grad_norm": 5.7586188316345215, "learning_rate": 1.4836550631585124e-05, "loss": 1.6082, "step": 123350 }, { "epoch": 0.7753461787755583, "grad_norm": 8.436548233032227, "learning_rate": 1.4836131530640471e-05, "loss": 1.4547, "step": 123360 }, { "epoch": 0.7754090310922555, "grad_norm": 7.204708099365234, "learning_rate": 1.4835712429695818e-05, "loss": 1.5851, "step": 123370 }, { "epoch": 0.7754718834089526, "grad_norm": 6.138442516326904, "learning_rate": 1.4835293328751164e-05, "loss": 1.669, "step": 123380 }, { "epoch": 0.7755347357256497, "grad_norm": 6.008743762969971, "learning_rate": 1.483487422780651e-05, "loss": 1.5813, "step": 123390 }, { "epoch": 0.7755975880423468, "grad_norm": 5.30795955657959, "learning_rate": 1.4834455126861858e-05, "loss": 1.6783, "step": 123400 }, { "epoch": 0.7756604403590439, "grad_norm": 6.605019569396973, "learning_rate": 1.4834036025917205e-05, "loss": 1.768, "step": 123410 }, { "epoch": 0.7757232926757409, "grad_norm": 6.18769645690918, "learning_rate": 1.4833616924972549e-05, "loss": 1.4924, "step": 123420 }, { "epoch": 0.775786144992438, "grad_norm": 6.021730422973633, "learning_rate": 1.4833197824027896e-05, "loss": 1.446, "step": 123430 }, { "epoch": 0.7758489973091351, "grad_norm": 6.385734558105469, "learning_rate": 1.4832778723083243e-05, "loss": 1.6865, "step": 123440 }, { "epoch": 0.7759118496258323, "grad_norm": 5.815408229827881, "learning_rate": 1.483235962213859e-05, "loss": 1.5873, "step": 123450 }, { "epoch": 0.7759747019425294, "grad_norm": 6.392560005187988, "learning_rate": 1.4831940521193935e-05, "loss": 1.5407, "step": 123460 }, { "epoch": 0.7760375542592265, "grad_norm": 6.754059791564941, "learning_rate": 1.4831521420249282e-05, "loss": 1.7748, "step": 123470 }, { "epoch": 0.7761004065759236, "grad_norm": 5.6090312004089355, "learning_rate": 1.483110231930463e-05, "loss": 1.6933, "step": 123480 }, { "epoch": 0.7761632588926207, "grad_norm": 6.376821517944336, "learning_rate": 1.4830683218359976e-05, "loss": 1.709, "step": 123490 }, { "epoch": 0.7762261112093178, "grad_norm": 6.824679851531982, "learning_rate": 1.4830264117415323e-05, "loss": 1.7707, "step": 123500 }, { "epoch": 0.776288963526015, "grad_norm": 6.89393949508667, "learning_rate": 1.4829845016470667e-05, "loss": 1.7062, "step": 123510 }, { "epoch": 0.7763518158427121, "grad_norm": 7.074446678161621, "learning_rate": 1.4829425915526014e-05, "loss": 1.9918, "step": 123520 }, { "epoch": 0.7764146681594092, "grad_norm": 6.103003978729248, "learning_rate": 1.4829006814581361e-05, "loss": 1.3948, "step": 123530 }, { "epoch": 0.7764775204761063, "grad_norm": 6.617283344268799, "learning_rate": 1.4828587713636708e-05, "loss": 1.585, "step": 123540 }, { "epoch": 0.7765403727928034, "grad_norm": 6.929607391357422, "learning_rate": 1.4828168612692054e-05, "loss": 1.4903, "step": 123550 }, { "epoch": 0.7766032251095005, "grad_norm": 5.60888147354126, "learning_rate": 1.48277495117474e-05, "loss": 1.6008, "step": 123560 }, { "epoch": 0.7766660774261976, "grad_norm": 6.84712553024292, "learning_rate": 1.4827330410802748e-05, "loss": 1.5952, "step": 123570 }, { "epoch": 0.7767289297428948, "grad_norm": 5.9359846115112305, "learning_rate": 1.4826911309858095e-05, "loss": 1.7788, "step": 123580 }, { "epoch": 0.7767917820595919, "grad_norm": 5.723293304443359, "learning_rate": 1.482649220891344e-05, "loss": 1.6442, "step": 123590 }, { "epoch": 0.776854634376289, "grad_norm": 7.651445388793945, "learning_rate": 1.4826073107968786e-05, "loss": 1.5432, "step": 123600 }, { "epoch": 0.7769174866929861, "grad_norm": 6.02742862701416, "learning_rate": 1.4825654007024133e-05, "loss": 1.5669, "step": 123610 }, { "epoch": 0.7769803390096832, "grad_norm": 4.787092685699463, "learning_rate": 1.482523490607948e-05, "loss": 1.7363, "step": 123620 }, { "epoch": 0.7770431913263803, "grad_norm": 5.4203362464904785, "learning_rate": 1.4824815805134827e-05, "loss": 1.5502, "step": 123630 }, { "epoch": 0.7771060436430774, "grad_norm": 6.5744853019714355, "learning_rate": 1.4824396704190172e-05, "loss": 1.5197, "step": 123640 }, { "epoch": 0.7771688959597746, "grad_norm": 7.488888263702393, "learning_rate": 1.482397760324552e-05, "loss": 1.5051, "step": 123650 }, { "epoch": 0.7772317482764717, "grad_norm": 7.3996124267578125, "learning_rate": 1.4823558502300865e-05, "loss": 1.7411, "step": 123660 }, { "epoch": 0.7772946005931688, "grad_norm": 6.990914344787598, "learning_rate": 1.4823139401356212e-05, "loss": 1.6559, "step": 123670 }, { "epoch": 0.7773574529098658, "grad_norm": 4.7599968910217285, "learning_rate": 1.4822720300411559e-05, "loss": 1.3768, "step": 123680 }, { "epoch": 0.7774203052265629, "grad_norm": 6.931900501251221, "learning_rate": 1.4822301199466904e-05, "loss": 1.5697, "step": 123690 }, { "epoch": 0.77748315754326, "grad_norm": 7.484622001647949, "learning_rate": 1.4821882098522251e-05, "loss": 1.7696, "step": 123700 }, { "epoch": 0.7775460098599571, "grad_norm": 6.000463008880615, "learning_rate": 1.4821462997577598e-05, "loss": 1.4683, "step": 123710 }, { "epoch": 0.7776088621766543, "grad_norm": 6.402760982513428, "learning_rate": 1.4821043896632945e-05, "loss": 1.3881, "step": 123720 }, { "epoch": 0.7776717144933514, "grad_norm": 6.27096700668335, "learning_rate": 1.4820624795688289e-05, "loss": 1.5401, "step": 123730 }, { "epoch": 0.7777345668100485, "grad_norm": 7.476364612579346, "learning_rate": 1.4820205694743636e-05, "loss": 1.6563, "step": 123740 }, { "epoch": 0.7777974191267456, "grad_norm": 7.78745698928833, "learning_rate": 1.4819786593798983e-05, "loss": 1.5223, "step": 123750 }, { "epoch": 0.7778602714434427, "grad_norm": 6.533060550689697, "learning_rate": 1.481936749285433e-05, "loss": 1.6728, "step": 123760 }, { "epoch": 0.7779231237601398, "grad_norm": 6.133152484893799, "learning_rate": 1.4818948391909676e-05, "loss": 1.5037, "step": 123770 }, { "epoch": 0.777985976076837, "grad_norm": 5.495619773864746, "learning_rate": 1.4818529290965023e-05, "loss": 1.6131, "step": 123780 }, { "epoch": 0.7780488283935341, "grad_norm": 6.892193794250488, "learning_rate": 1.481811019002037e-05, "loss": 1.4951, "step": 123790 }, { "epoch": 0.7781116807102312, "grad_norm": 8.555862426757812, "learning_rate": 1.4817691089075717e-05, "loss": 1.4632, "step": 123800 }, { "epoch": 0.7781745330269283, "grad_norm": 7.705578804016113, "learning_rate": 1.4817271988131064e-05, "loss": 1.817, "step": 123810 }, { "epoch": 0.7782373853436254, "grad_norm": 6.521873474121094, "learning_rate": 1.4816852887186408e-05, "loss": 1.4848, "step": 123820 }, { "epoch": 0.7783002376603225, "grad_norm": 6.840022087097168, "learning_rate": 1.4816433786241755e-05, "loss": 1.6988, "step": 123830 }, { "epoch": 0.7783630899770196, "grad_norm": 7.270923137664795, "learning_rate": 1.4816014685297102e-05, "loss": 1.5247, "step": 123840 }, { "epoch": 0.7784259422937168, "grad_norm": 6.0143256187438965, "learning_rate": 1.4815595584352449e-05, "loss": 1.5974, "step": 123850 }, { "epoch": 0.7784887946104139, "grad_norm": 6.783865928649902, "learning_rate": 1.4815176483407794e-05, "loss": 1.6884, "step": 123860 }, { "epoch": 0.778551646927111, "grad_norm": 7.086910247802734, "learning_rate": 1.4814757382463141e-05, "loss": 1.7329, "step": 123870 }, { "epoch": 0.7786144992438081, "grad_norm": 6.956778049468994, "learning_rate": 1.4814338281518488e-05, "loss": 1.6973, "step": 123880 }, { "epoch": 0.7786773515605052, "grad_norm": 6.142144680023193, "learning_rate": 1.4813919180573835e-05, "loss": 1.7874, "step": 123890 }, { "epoch": 0.7787402038772023, "grad_norm": 6.882087230682373, "learning_rate": 1.4813500079629183e-05, "loss": 1.6705, "step": 123900 }, { "epoch": 0.7788030561938994, "grad_norm": 5.532877445220947, "learning_rate": 1.4813080978684526e-05, "loss": 1.7642, "step": 123910 }, { "epoch": 0.7788659085105966, "grad_norm": 8.262584686279297, "learning_rate": 1.4812661877739873e-05, "loss": 1.6743, "step": 123920 }, { "epoch": 0.7789287608272936, "grad_norm": 6.192286014556885, "learning_rate": 1.481224277679522e-05, "loss": 1.5542, "step": 123930 }, { "epoch": 0.7789916131439907, "grad_norm": 6.131222248077393, "learning_rate": 1.4811823675850567e-05, "loss": 1.5012, "step": 123940 }, { "epoch": 0.7790544654606878, "grad_norm": 6.869941234588623, "learning_rate": 1.4811404574905913e-05, "loss": 1.7849, "step": 123950 }, { "epoch": 0.7791173177773849, "grad_norm": 6.112270832061768, "learning_rate": 1.481098547396126e-05, "loss": 1.3971, "step": 123960 }, { "epoch": 0.779180170094082, "grad_norm": 8.105659484863281, "learning_rate": 1.4810566373016605e-05, "loss": 1.8585, "step": 123970 }, { "epoch": 0.7792430224107791, "grad_norm": 6.909567356109619, "learning_rate": 1.4810147272071952e-05, "loss": 1.5651, "step": 123980 }, { "epoch": 0.7793058747274763, "grad_norm": 6.206493377685547, "learning_rate": 1.48097281711273e-05, "loss": 1.6405, "step": 123990 }, { "epoch": 0.7793687270441734, "grad_norm": 6.888452529907227, "learning_rate": 1.4809309070182645e-05, "loss": 1.5659, "step": 124000 }, { "epoch": 0.7794315793608705, "grad_norm": 5.350739479064941, "learning_rate": 1.4808889969237992e-05, "loss": 1.4354, "step": 124010 }, { "epoch": 0.7794944316775676, "grad_norm": 6.917699813842773, "learning_rate": 1.4808470868293339e-05, "loss": 1.5024, "step": 124020 }, { "epoch": 0.7795572839942647, "grad_norm": 6.004730224609375, "learning_rate": 1.4808051767348686e-05, "loss": 1.8792, "step": 124030 }, { "epoch": 0.7796201363109618, "grad_norm": 6.729684352874756, "learning_rate": 1.480763266640403e-05, "loss": 1.7051, "step": 124040 }, { "epoch": 0.779682988627659, "grad_norm": 6.808758735656738, "learning_rate": 1.4807213565459377e-05, "loss": 1.6628, "step": 124050 }, { "epoch": 0.7797458409443561, "grad_norm": 7.212730407714844, "learning_rate": 1.4806794464514724e-05, "loss": 1.601, "step": 124060 }, { "epoch": 0.7798086932610532, "grad_norm": 7.3029375076293945, "learning_rate": 1.4806375363570071e-05, "loss": 1.4577, "step": 124070 }, { "epoch": 0.7798715455777503, "grad_norm": 5.240572929382324, "learning_rate": 1.4805956262625416e-05, "loss": 1.4778, "step": 124080 }, { "epoch": 0.7799343978944474, "grad_norm": 6.397128582000732, "learning_rate": 1.4805537161680763e-05, "loss": 1.6948, "step": 124090 }, { "epoch": 0.7799972502111445, "grad_norm": 5.794658660888672, "learning_rate": 1.480511806073611e-05, "loss": 1.6997, "step": 124100 }, { "epoch": 0.7800601025278416, "grad_norm": 7.616403102874756, "learning_rate": 1.4804698959791457e-05, "loss": 1.8753, "step": 124110 }, { "epoch": 0.7801229548445388, "grad_norm": 7.255417823791504, "learning_rate": 1.4804279858846805e-05, "loss": 1.653, "step": 124120 }, { "epoch": 0.7801858071612359, "grad_norm": 6.627250671386719, "learning_rate": 1.4803860757902148e-05, "loss": 1.7013, "step": 124130 }, { "epoch": 0.780248659477933, "grad_norm": 6.056670665740967, "learning_rate": 1.4803441656957495e-05, "loss": 1.7038, "step": 124140 }, { "epoch": 0.7803115117946301, "grad_norm": 6.512842655181885, "learning_rate": 1.4803022556012842e-05, "loss": 1.5134, "step": 124150 }, { "epoch": 0.7803743641113272, "grad_norm": 6.630873203277588, "learning_rate": 1.480260345506819e-05, "loss": 1.5681, "step": 124160 }, { "epoch": 0.7804372164280243, "grad_norm": 7.094987869262695, "learning_rate": 1.4802184354123535e-05, "loss": 1.5769, "step": 124170 }, { "epoch": 0.7805000687447214, "grad_norm": 6.1788554191589355, "learning_rate": 1.4801765253178882e-05, "loss": 1.7874, "step": 124180 }, { "epoch": 0.7805629210614184, "grad_norm": 5.971871376037598, "learning_rate": 1.4801346152234229e-05, "loss": 1.5268, "step": 124190 }, { "epoch": 0.7806257733781156, "grad_norm": 6.372351169586182, "learning_rate": 1.4800927051289576e-05, "loss": 1.6221, "step": 124200 }, { "epoch": 0.7806886256948127, "grad_norm": 6.5118279457092285, "learning_rate": 1.4800507950344923e-05, "loss": 1.7938, "step": 124210 }, { "epoch": 0.7807514780115098, "grad_norm": 6.687560558319092, "learning_rate": 1.4800088849400267e-05, "loss": 1.9373, "step": 124220 }, { "epoch": 0.7808143303282069, "grad_norm": 5.854617118835449, "learning_rate": 1.4799669748455614e-05, "loss": 1.6248, "step": 124230 }, { "epoch": 0.780877182644904, "grad_norm": 7.229498386383057, "learning_rate": 1.4799250647510961e-05, "loss": 1.6186, "step": 124240 }, { "epoch": 0.7809400349616011, "grad_norm": 6.641530513763428, "learning_rate": 1.4798831546566308e-05, "loss": 1.578, "step": 124250 }, { "epoch": 0.7810028872782983, "grad_norm": 6.960888385772705, "learning_rate": 1.4798412445621653e-05, "loss": 1.7438, "step": 124260 }, { "epoch": 0.7810657395949954, "grad_norm": 6.048774719238281, "learning_rate": 1.4797993344677e-05, "loss": 1.4759, "step": 124270 }, { "epoch": 0.7811285919116925, "grad_norm": 6.7953104972839355, "learning_rate": 1.4797574243732346e-05, "loss": 1.5316, "step": 124280 }, { "epoch": 0.7811914442283896, "grad_norm": 6.214657306671143, "learning_rate": 1.4797155142787693e-05, "loss": 1.6755, "step": 124290 }, { "epoch": 0.7812542965450867, "grad_norm": 6.976639747619629, "learning_rate": 1.479673604184304e-05, "loss": 1.7267, "step": 124300 }, { "epoch": 0.7813171488617838, "grad_norm": 7.6554694175720215, "learning_rate": 1.4796316940898385e-05, "loss": 1.5184, "step": 124310 }, { "epoch": 0.7813800011784809, "grad_norm": 7.21309232711792, "learning_rate": 1.4795897839953732e-05, "loss": 1.702, "step": 124320 }, { "epoch": 0.7814428534951781, "grad_norm": 6.164888381958008, "learning_rate": 1.479547873900908e-05, "loss": 1.4974, "step": 124330 }, { "epoch": 0.7815057058118752, "grad_norm": 6.911844730377197, "learning_rate": 1.4795059638064427e-05, "loss": 1.7752, "step": 124340 }, { "epoch": 0.7815685581285723, "grad_norm": 5.653620719909668, "learning_rate": 1.479464053711977e-05, "loss": 1.6673, "step": 124350 }, { "epoch": 0.7816314104452694, "grad_norm": 6.437528133392334, "learning_rate": 1.4794221436175117e-05, "loss": 1.8561, "step": 124360 }, { "epoch": 0.7816942627619665, "grad_norm": 6.037184238433838, "learning_rate": 1.4793802335230464e-05, "loss": 1.5835, "step": 124370 }, { "epoch": 0.7817571150786636, "grad_norm": 5.882865905761719, "learning_rate": 1.4793383234285811e-05, "loss": 1.7838, "step": 124380 }, { "epoch": 0.7818199673953607, "grad_norm": 6.115137577056885, "learning_rate": 1.4792964133341157e-05, "loss": 1.625, "step": 124390 }, { "epoch": 0.7818828197120579, "grad_norm": 7.6811347007751465, "learning_rate": 1.4792545032396504e-05, "loss": 1.5176, "step": 124400 }, { "epoch": 0.781945672028755, "grad_norm": 8.001575469970703, "learning_rate": 1.4792125931451851e-05, "loss": 1.7655, "step": 124410 }, { "epoch": 0.7820085243454521, "grad_norm": 5.630826473236084, "learning_rate": 1.4791706830507198e-05, "loss": 1.601, "step": 124420 }, { "epoch": 0.7820713766621492, "grad_norm": 5.482722759246826, "learning_rate": 1.4791287729562545e-05, "loss": 1.4681, "step": 124430 }, { "epoch": 0.7821342289788462, "grad_norm": 5.712696552276611, "learning_rate": 1.4790868628617889e-05, "loss": 1.5048, "step": 124440 }, { "epoch": 0.7821970812955433, "grad_norm": 6.567152500152588, "learning_rate": 1.4790449527673236e-05, "loss": 1.6015, "step": 124450 }, { "epoch": 0.7822599336122404, "grad_norm": 5.875940322875977, "learning_rate": 1.4790030426728583e-05, "loss": 1.3869, "step": 124460 }, { "epoch": 0.7823227859289376, "grad_norm": 7.083212852478027, "learning_rate": 1.478961132578393e-05, "loss": 1.9333, "step": 124470 }, { "epoch": 0.7823856382456347, "grad_norm": 6.764925003051758, "learning_rate": 1.4789192224839275e-05, "loss": 1.6252, "step": 124480 }, { "epoch": 0.7824484905623318, "grad_norm": 6.155638217926025, "learning_rate": 1.4788773123894622e-05, "loss": 1.6241, "step": 124490 }, { "epoch": 0.7825113428790289, "grad_norm": 7.117341995239258, "learning_rate": 1.478835402294997e-05, "loss": 1.362, "step": 124500 }, { "epoch": 0.782574195195726, "grad_norm": 6.133358478546143, "learning_rate": 1.4787934922005317e-05, "loss": 1.3876, "step": 124510 }, { "epoch": 0.7826370475124231, "grad_norm": 7.302773475646973, "learning_rate": 1.4787515821060664e-05, "loss": 1.5911, "step": 124520 }, { "epoch": 0.7826998998291202, "grad_norm": 6.74974250793457, "learning_rate": 1.4787096720116007e-05, "loss": 1.9227, "step": 124530 }, { "epoch": 0.7827627521458174, "grad_norm": 6.843846321105957, "learning_rate": 1.4786677619171354e-05, "loss": 1.5501, "step": 124540 }, { "epoch": 0.7828256044625145, "grad_norm": 7.022478103637695, "learning_rate": 1.4786258518226701e-05, "loss": 1.6664, "step": 124550 }, { "epoch": 0.7828884567792116, "grad_norm": 6.756307125091553, "learning_rate": 1.4785839417282049e-05, "loss": 1.9876, "step": 124560 }, { "epoch": 0.7829513090959087, "grad_norm": 6.578350067138672, "learning_rate": 1.4785420316337394e-05, "loss": 1.4891, "step": 124570 }, { "epoch": 0.7830141614126058, "grad_norm": 6.477244853973389, "learning_rate": 1.4785001215392741e-05, "loss": 1.6783, "step": 124580 }, { "epoch": 0.7830770137293029, "grad_norm": 6.223769187927246, "learning_rate": 1.4784582114448088e-05, "loss": 1.6556, "step": 124590 }, { "epoch": 0.783139866046, "grad_norm": 4.901460647583008, "learning_rate": 1.4784163013503433e-05, "loss": 1.5048, "step": 124600 }, { "epoch": 0.7832027183626972, "grad_norm": 7.089406490325928, "learning_rate": 1.478374391255878e-05, "loss": 1.714, "step": 124610 }, { "epoch": 0.7832655706793943, "grad_norm": 6.149925231933594, "learning_rate": 1.4783324811614126e-05, "loss": 1.5439, "step": 124620 }, { "epoch": 0.7833284229960914, "grad_norm": 6.35594367980957, "learning_rate": 1.4782905710669473e-05, "loss": 1.6053, "step": 124630 }, { "epoch": 0.7833912753127885, "grad_norm": 7.102924823760986, "learning_rate": 1.478248660972482e-05, "loss": 1.5252, "step": 124640 }, { "epoch": 0.7834541276294856, "grad_norm": 6.171487331390381, "learning_rate": 1.4782067508780167e-05, "loss": 1.5834, "step": 124650 }, { "epoch": 0.7835169799461827, "grad_norm": 5.921186447143555, "learning_rate": 1.478164840783551e-05, "loss": 1.3824, "step": 124660 }, { "epoch": 0.7835798322628799, "grad_norm": 5.365461826324463, "learning_rate": 1.4781229306890858e-05, "loss": 1.4792, "step": 124670 }, { "epoch": 0.783642684579577, "grad_norm": 6.067823886871338, "learning_rate": 1.4780810205946205e-05, "loss": 1.5823, "step": 124680 }, { "epoch": 0.7837055368962741, "grad_norm": 5.599748134613037, "learning_rate": 1.4780391105001552e-05, "loss": 1.7637, "step": 124690 }, { "epoch": 0.7837683892129711, "grad_norm": 5.318389415740967, "learning_rate": 1.4779972004056897e-05, "loss": 1.5311, "step": 124700 }, { "epoch": 0.7838312415296682, "grad_norm": 6.72338342666626, "learning_rate": 1.4779552903112244e-05, "loss": 1.5609, "step": 124710 }, { "epoch": 0.7838940938463653, "grad_norm": 6.061426639556885, "learning_rate": 1.4779133802167591e-05, "loss": 1.7527, "step": 124720 }, { "epoch": 0.7839569461630624, "grad_norm": 5.294411659240723, "learning_rate": 1.4778714701222939e-05, "loss": 1.3349, "step": 124730 }, { "epoch": 0.7840197984797596, "grad_norm": 6.9191765785217285, "learning_rate": 1.4778295600278286e-05, "loss": 1.3523, "step": 124740 }, { "epoch": 0.7840826507964567, "grad_norm": 5.913338661193848, "learning_rate": 1.477787649933363e-05, "loss": 1.5144, "step": 124750 }, { "epoch": 0.7841455031131538, "grad_norm": 5.615120887756348, "learning_rate": 1.4777457398388976e-05, "loss": 1.4487, "step": 124760 }, { "epoch": 0.7842083554298509, "grad_norm": 7.352620601654053, "learning_rate": 1.4777038297444323e-05, "loss": 1.4897, "step": 124770 }, { "epoch": 0.784271207746548, "grad_norm": 6.531876564025879, "learning_rate": 1.477661919649967e-05, "loss": 1.5274, "step": 124780 }, { "epoch": 0.7843340600632451, "grad_norm": 5.982143402099609, "learning_rate": 1.4776200095555016e-05, "loss": 1.5094, "step": 124790 }, { "epoch": 0.7843969123799422, "grad_norm": 5.861140727996826, "learning_rate": 1.4775780994610363e-05, "loss": 1.795, "step": 124800 }, { "epoch": 0.7844597646966394, "grad_norm": 6.9778289794921875, "learning_rate": 1.477536189366571e-05, "loss": 1.5421, "step": 124810 }, { "epoch": 0.7845226170133365, "grad_norm": 5.670233249664307, "learning_rate": 1.4774942792721057e-05, "loss": 1.3728, "step": 124820 }, { "epoch": 0.7845854693300336, "grad_norm": 6.536532402038574, "learning_rate": 1.4774523691776404e-05, "loss": 1.4614, "step": 124830 }, { "epoch": 0.7846483216467307, "grad_norm": 6.704484462738037, "learning_rate": 1.4774104590831748e-05, "loss": 1.6172, "step": 124840 }, { "epoch": 0.7847111739634278, "grad_norm": 7.304074764251709, "learning_rate": 1.4773685489887095e-05, "loss": 1.7006, "step": 124850 }, { "epoch": 0.7847740262801249, "grad_norm": 7.6201395988464355, "learning_rate": 1.4773266388942442e-05, "loss": 1.6694, "step": 124860 }, { "epoch": 0.784836878596822, "grad_norm": 7.4897284507751465, "learning_rate": 1.4772847287997789e-05, "loss": 1.5719, "step": 124870 }, { "epoch": 0.7848997309135192, "grad_norm": 5.951974868774414, "learning_rate": 1.4772428187053134e-05, "loss": 1.6502, "step": 124880 }, { "epoch": 0.7849625832302163, "grad_norm": 4.7976837158203125, "learning_rate": 1.4772009086108482e-05, "loss": 1.4351, "step": 124890 }, { "epoch": 0.7850254355469134, "grad_norm": 6.078033447265625, "learning_rate": 1.4771589985163829e-05, "loss": 1.6329, "step": 124900 }, { "epoch": 0.7850882878636105, "grad_norm": 6.36521053314209, "learning_rate": 1.4771170884219174e-05, "loss": 1.4933, "step": 124910 }, { "epoch": 0.7851511401803076, "grad_norm": 6.84323787689209, "learning_rate": 1.4770751783274521e-05, "loss": 1.6187, "step": 124920 }, { "epoch": 0.7852139924970047, "grad_norm": 6.295436859130859, "learning_rate": 1.4770332682329866e-05, "loss": 1.5152, "step": 124930 }, { "epoch": 0.7852768448137019, "grad_norm": 6.235681056976318, "learning_rate": 1.4769913581385213e-05, "loss": 1.3081, "step": 124940 }, { "epoch": 0.7853396971303989, "grad_norm": 5.9656476974487305, "learning_rate": 1.476949448044056e-05, "loss": 1.549, "step": 124950 }, { "epoch": 0.785402549447096, "grad_norm": 6.388768196105957, "learning_rate": 1.4769075379495908e-05, "loss": 1.5615, "step": 124960 }, { "epoch": 0.7854654017637931, "grad_norm": 5.354004383087158, "learning_rate": 1.4768656278551253e-05, "loss": 1.6545, "step": 124970 }, { "epoch": 0.7855282540804902, "grad_norm": 6.536776542663574, "learning_rate": 1.4768237177606598e-05, "loss": 1.4357, "step": 124980 }, { "epoch": 0.7855911063971873, "grad_norm": 6.764211654663086, "learning_rate": 1.4767818076661945e-05, "loss": 1.638, "step": 124990 }, { "epoch": 0.7856539587138844, "grad_norm": 7.431994438171387, "learning_rate": 1.4767398975717293e-05, "loss": 1.7454, "step": 125000 }, { "epoch": 0.7857168110305816, "grad_norm": 7.752243518829346, "learning_rate": 1.476697987477264e-05, "loss": 1.6082, "step": 125010 }, { "epoch": 0.7857796633472787, "grad_norm": 8.0689058303833, "learning_rate": 1.4766560773827985e-05, "loss": 1.3525, "step": 125020 }, { "epoch": 0.7858425156639758, "grad_norm": 6.900312900543213, "learning_rate": 1.4766141672883332e-05, "loss": 1.524, "step": 125030 }, { "epoch": 0.7859053679806729, "grad_norm": 6.975441932678223, "learning_rate": 1.4765722571938679e-05, "loss": 1.5605, "step": 125040 }, { "epoch": 0.78596822029737, "grad_norm": 7.432410717010498, "learning_rate": 1.4765303470994026e-05, "loss": 1.6976, "step": 125050 }, { "epoch": 0.7860310726140671, "grad_norm": 5.973094463348389, "learning_rate": 1.476488437004937e-05, "loss": 1.5579, "step": 125060 }, { "epoch": 0.7860939249307642, "grad_norm": 6.02004337310791, "learning_rate": 1.4764465269104717e-05, "loss": 1.6811, "step": 125070 }, { "epoch": 0.7861567772474614, "grad_norm": 6.000905513763428, "learning_rate": 1.4764046168160064e-05, "loss": 1.3433, "step": 125080 }, { "epoch": 0.7862196295641585, "grad_norm": 7.1472249031066895, "learning_rate": 1.4763627067215411e-05, "loss": 1.5057, "step": 125090 }, { "epoch": 0.7862824818808556, "grad_norm": 6.639767646789551, "learning_rate": 1.4763207966270756e-05, "loss": 1.4208, "step": 125100 }, { "epoch": 0.7863453341975527, "grad_norm": 6.315929889678955, "learning_rate": 1.4762788865326104e-05, "loss": 1.7368, "step": 125110 }, { "epoch": 0.7864081865142498, "grad_norm": 6.559476375579834, "learning_rate": 1.476236976438145e-05, "loss": 1.5957, "step": 125120 }, { "epoch": 0.7864710388309469, "grad_norm": 6.75954532623291, "learning_rate": 1.4761950663436798e-05, "loss": 1.6511, "step": 125130 }, { "epoch": 0.786533891147644, "grad_norm": 7.152742385864258, "learning_rate": 1.4761531562492145e-05, "loss": 1.3636, "step": 125140 }, { "epoch": 0.7865967434643412, "grad_norm": 6.649284839630127, "learning_rate": 1.4761112461547488e-05, "loss": 1.5758, "step": 125150 }, { "epoch": 0.7866595957810383, "grad_norm": 6.113519191741943, "learning_rate": 1.4760693360602835e-05, "loss": 1.5074, "step": 125160 }, { "epoch": 0.7867224480977354, "grad_norm": 6.174967288970947, "learning_rate": 1.4760274259658183e-05, "loss": 1.7528, "step": 125170 }, { "epoch": 0.7867853004144325, "grad_norm": 6.821481704711914, "learning_rate": 1.475985515871353e-05, "loss": 1.5735, "step": 125180 }, { "epoch": 0.7868481527311296, "grad_norm": 6.076359272003174, "learning_rate": 1.4759436057768875e-05, "loss": 1.9036, "step": 125190 }, { "epoch": 0.7869110050478267, "grad_norm": 5.820423126220703, "learning_rate": 1.4759016956824222e-05, "loss": 1.4209, "step": 125200 }, { "epoch": 0.7869738573645237, "grad_norm": 6.18796968460083, "learning_rate": 1.475859785587957e-05, "loss": 1.6042, "step": 125210 }, { "epoch": 0.7870367096812209, "grad_norm": 6.335443019866943, "learning_rate": 1.4758178754934915e-05, "loss": 1.5408, "step": 125220 }, { "epoch": 0.787099561997918, "grad_norm": 7.0418596267700195, "learning_rate": 1.4757759653990262e-05, "loss": 1.4753, "step": 125230 }, { "epoch": 0.7871624143146151, "grad_norm": 6.072787284851074, "learning_rate": 1.4757340553045607e-05, "loss": 1.7044, "step": 125240 }, { "epoch": 0.7872252666313122, "grad_norm": 6.614512920379639, "learning_rate": 1.4756921452100954e-05, "loss": 1.5064, "step": 125250 }, { "epoch": 0.7872881189480093, "grad_norm": 5.504802703857422, "learning_rate": 1.4756502351156301e-05, "loss": 1.4929, "step": 125260 }, { "epoch": 0.7873509712647064, "grad_norm": 6.460712909698486, "learning_rate": 1.4756083250211648e-05, "loss": 1.6838, "step": 125270 }, { "epoch": 0.7874138235814035, "grad_norm": 5.779965877532959, "learning_rate": 1.4755664149266994e-05, "loss": 1.7015, "step": 125280 }, { "epoch": 0.7874766758981007, "grad_norm": 7.005980014801025, "learning_rate": 1.4755245048322339e-05, "loss": 1.7211, "step": 125290 }, { "epoch": 0.7875395282147978, "grad_norm": 6.12052583694458, "learning_rate": 1.4754825947377686e-05, "loss": 1.7615, "step": 125300 }, { "epoch": 0.7876023805314949, "grad_norm": 6.373183727264404, "learning_rate": 1.4754406846433033e-05, "loss": 1.5446, "step": 125310 }, { "epoch": 0.787665232848192, "grad_norm": 6.870295524597168, "learning_rate": 1.475398774548838e-05, "loss": 1.5771, "step": 125320 }, { "epoch": 0.7877280851648891, "grad_norm": 6.176692485809326, "learning_rate": 1.4753568644543726e-05, "loss": 1.4018, "step": 125330 }, { "epoch": 0.7877909374815862, "grad_norm": 6.8330607414245605, "learning_rate": 1.4753149543599073e-05, "loss": 1.289, "step": 125340 }, { "epoch": 0.7878537897982834, "grad_norm": 6.328117847442627, "learning_rate": 1.475273044265442e-05, "loss": 1.4145, "step": 125350 }, { "epoch": 0.7879166421149805, "grad_norm": 6.612305641174316, "learning_rate": 1.4752311341709767e-05, "loss": 1.6084, "step": 125360 }, { "epoch": 0.7879794944316776, "grad_norm": 6.249629020690918, "learning_rate": 1.475189224076511e-05, "loss": 1.6003, "step": 125370 }, { "epoch": 0.7880423467483747, "grad_norm": 6.391242027282715, "learning_rate": 1.4751473139820457e-05, "loss": 1.6501, "step": 125380 }, { "epoch": 0.7881051990650718, "grad_norm": 6.851719379425049, "learning_rate": 1.4751054038875805e-05, "loss": 1.6964, "step": 125390 }, { "epoch": 0.7881680513817689, "grad_norm": 7.299596309661865, "learning_rate": 1.4750634937931152e-05, "loss": 1.6259, "step": 125400 }, { "epoch": 0.788230903698466, "grad_norm": 6.680347919464111, "learning_rate": 1.4750215836986497e-05, "loss": 1.601, "step": 125410 }, { "epoch": 0.7882937560151632, "grad_norm": 5.847686290740967, "learning_rate": 1.4749796736041844e-05, "loss": 1.7636, "step": 125420 }, { "epoch": 0.7883566083318603, "grad_norm": 6.313074588775635, "learning_rate": 1.4749377635097191e-05, "loss": 1.7038, "step": 125430 }, { "epoch": 0.7884194606485574, "grad_norm": 6.959484577178955, "learning_rate": 1.4748958534152538e-05, "loss": 1.6529, "step": 125440 }, { "epoch": 0.7884823129652545, "grad_norm": 7.320756912231445, "learning_rate": 1.4748539433207885e-05, "loss": 1.505, "step": 125450 }, { "epoch": 0.7885451652819515, "grad_norm": 7.231621265411377, "learning_rate": 1.4748120332263229e-05, "loss": 1.5173, "step": 125460 }, { "epoch": 0.7886080175986486, "grad_norm": 7.906031131744385, "learning_rate": 1.4747701231318576e-05, "loss": 1.6583, "step": 125470 }, { "epoch": 0.7886708699153457, "grad_norm": 6.090366363525391, "learning_rate": 1.4747282130373923e-05, "loss": 1.5974, "step": 125480 }, { "epoch": 0.7887337222320429, "grad_norm": 6.336813926696777, "learning_rate": 1.474686302942927e-05, "loss": 1.5699, "step": 125490 }, { "epoch": 0.78879657454874, "grad_norm": 6.168365478515625, "learning_rate": 1.4746443928484616e-05, "loss": 1.7668, "step": 125500 }, { "epoch": 0.7888594268654371, "grad_norm": 6.6962714195251465, "learning_rate": 1.4746024827539963e-05, "loss": 1.7307, "step": 125510 }, { "epoch": 0.7889222791821342, "grad_norm": 6.099548816680908, "learning_rate": 1.474560572659531e-05, "loss": 1.7463, "step": 125520 }, { "epoch": 0.7889851314988313, "grad_norm": 6.7555437088012695, "learning_rate": 1.4745186625650657e-05, "loss": 1.636, "step": 125530 }, { "epoch": 0.7890479838155284, "grad_norm": 7.971730709075928, "learning_rate": 1.4744767524706002e-05, "loss": 1.6252, "step": 125540 }, { "epoch": 0.7891108361322255, "grad_norm": 6.346310138702393, "learning_rate": 1.4744348423761348e-05, "loss": 1.6358, "step": 125550 }, { "epoch": 0.7891736884489227, "grad_norm": 6.743438720703125, "learning_rate": 1.4743929322816695e-05, "loss": 1.5607, "step": 125560 }, { "epoch": 0.7892365407656198, "grad_norm": 7.675486087799072, "learning_rate": 1.4743510221872042e-05, "loss": 1.5282, "step": 125570 }, { "epoch": 0.7892993930823169, "grad_norm": 6.808964729309082, "learning_rate": 1.4743091120927389e-05, "loss": 1.3673, "step": 125580 }, { "epoch": 0.789362245399014, "grad_norm": 5.577764987945557, "learning_rate": 1.4742672019982734e-05, "loss": 1.5494, "step": 125590 }, { "epoch": 0.7894250977157111, "grad_norm": 5.431988716125488, "learning_rate": 1.474225291903808e-05, "loss": 1.3762, "step": 125600 }, { "epoch": 0.7894879500324082, "grad_norm": 5.761918067932129, "learning_rate": 1.4741833818093427e-05, "loss": 1.3322, "step": 125610 }, { "epoch": 0.7895508023491054, "grad_norm": 6.490145206451416, "learning_rate": 1.4741414717148774e-05, "loss": 1.8115, "step": 125620 }, { "epoch": 0.7896136546658025, "grad_norm": 6.952646732330322, "learning_rate": 1.474099561620412e-05, "loss": 1.6042, "step": 125630 }, { "epoch": 0.7896765069824996, "grad_norm": 6.986286640167236, "learning_rate": 1.4740576515259466e-05, "loss": 1.5033, "step": 125640 }, { "epoch": 0.7897393592991967, "grad_norm": 5.9131951332092285, "learning_rate": 1.4740157414314813e-05, "loss": 1.4577, "step": 125650 }, { "epoch": 0.7898022116158938, "grad_norm": 7.052176475524902, "learning_rate": 1.473973831337016e-05, "loss": 1.5199, "step": 125660 }, { "epoch": 0.7898650639325909, "grad_norm": 7.309354305267334, "learning_rate": 1.4739319212425507e-05, "loss": 1.7211, "step": 125670 }, { "epoch": 0.789927916249288, "grad_norm": 5.919618606567383, "learning_rate": 1.4738900111480851e-05, "loss": 1.6385, "step": 125680 }, { "epoch": 0.7899907685659852, "grad_norm": 6.132586479187012, "learning_rate": 1.4738481010536198e-05, "loss": 1.68, "step": 125690 }, { "epoch": 0.7900536208826823, "grad_norm": 6.035669326782227, "learning_rate": 1.4738061909591545e-05, "loss": 1.5735, "step": 125700 }, { "epoch": 0.7901164731993794, "grad_norm": 6.01634407043457, "learning_rate": 1.4737642808646892e-05, "loss": 1.5432, "step": 125710 }, { "epoch": 0.7901793255160764, "grad_norm": 7.341683387756348, "learning_rate": 1.4737223707702238e-05, "loss": 1.5898, "step": 125720 }, { "epoch": 0.7902421778327735, "grad_norm": 7.209320068359375, "learning_rate": 1.4736804606757585e-05, "loss": 1.7538, "step": 125730 }, { "epoch": 0.7903050301494706, "grad_norm": 5.579822063446045, "learning_rate": 1.4736385505812932e-05, "loss": 1.493, "step": 125740 }, { "epoch": 0.7903678824661677, "grad_norm": 6.049619674682617, "learning_rate": 1.4735966404868279e-05, "loss": 1.5774, "step": 125750 }, { "epoch": 0.7904307347828649, "grad_norm": 6.2716851234436035, "learning_rate": 1.4735547303923626e-05, "loss": 1.5059, "step": 125760 }, { "epoch": 0.790493587099562, "grad_norm": 6.8997483253479, "learning_rate": 1.473512820297897e-05, "loss": 1.6502, "step": 125770 }, { "epoch": 0.7905564394162591, "grad_norm": 6.963823318481445, "learning_rate": 1.4734709102034317e-05, "loss": 1.5679, "step": 125780 }, { "epoch": 0.7906192917329562, "grad_norm": 7.256715297698975, "learning_rate": 1.4734290001089664e-05, "loss": 1.9256, "step": 125790 }, { "epoch": 0.7906821440496533, "grad_norm": 6.907870292663574, "learning_rate": 1.473387090014501e-05, "loss": 1.4705, "step": 125800 }, { "epoch": 0.7907449963663504, "grad_norm": 5.856288909912109, "learning_rate": 1.4733451799200356e-05, "loss": 1.4704, "step": 125810 }, { "epoch": 0.7908078486830475, "grad_norm": 6.106492519378662, "learning_rate": 1.4733032698255703e-05, "loss": 1.6956, "step": 125820 }, { "epoch": 0.7908707009997447, "grad_norm": 7.309217929840088, "learning_rate": 1.473261359731105e-05, "loss": 1.8171, "step": 125830 }, { "epoch": 0.7909335533164418, "grad_norm": 6.840932369232178, "learning_rate": 1.4732194496366397e-05, "loss": 1.6197, "step": 125840 }, { "epoch": 0.7909964056331389, "grad_norm": 5.846725940704346, "learning_rate": 1.4731775395421743e-05, "loss": 1.6506, "step": 125850 }, { "epoch": 0.791059257949836, "grad_norm": 7.0063605308532715, "learning_rate": 1.4731356294477088e-05, "loss": 1.9317, "step": 125860 }, { "epoch": 0.7911221102665331, "grad_norm": 5.581164360046387, "learning_rate": 1.4730937193532435e-05, "loss": 1.5135, "step": 125870 }, { "epoch": 0.7911849625832302, "grad_norm": 7.6870598793029785, "learning_rate": 1.4730518092587782e-05, "loss": 1.8118, "step": 125880 }, { "epoch": 0.7912478148999273, "grad_norm": 5.364130973815918, "learning_rate": 1.473009899164313e-05, "loss": 1.4011, "step": 125890 }, { "epoch": 0.7913106672166245, "grad_norm": 7.236602306365967, "learning_rate": 1.4729679890698475e-05, "loss": 1.557, "step": 125900 }, { "epoch": 0.7913735195333216, "grad_norm": 6.039979457855225, "learning_rate": 1.472926078975382e-05, "loss": 1.6451, "step": 125910 }, { "epoch": 0.7914363718500187, "grad_norm": 5.8714823722839355, "learning_rate": 1.4728841688809167e-05, "loss": 1.6081, "step": 125920 }, { "epoch": 0.7914992241667158, "grad_norm": 7.167581558227539, "learning_rate": 1.4728422587864514e-05, "loss": 1.85, "step": 125930 }, { "epoch": 0.7915620764834129, "grad_norm": 7.117419242858887, "learning_rate": 1.4728003486919861e-05, "loss": 1.4089, "step": 125940 }, { "epoch": 0.79162492880011, "grad_norm": 6.153970241546631, "learning_rate": 1.4727584385975207e-05, "loss": 1.4535, "step": 125950 }, { "epoch": 0.7916877811168072, "grad_norm": 7.502902030944824, "learning_rate": 1.4727165285030554e-05, "loss": 1.6855, "step": 125960 }, { "epoch": 0.7917506334335042, "grad_norm": 5.559210300445557, "learning_rate": 1.47267461840859e-05, "loss": 1.4268, "step": 125970 }, { "epoch": 0.7918134857502013, "grad_norm": 5.9198527336120605, "learning_rate": 1.4726327083141248e-05, "loss": 1.5697, "step": 125980 }, { "epoch": 0.7918763380668984, "grad_norm": 6.3073601722717285, "learning_rate": 1.4725907982196592e-05, "loss": 1.6166, "step": 125990 }, { "epoch": 0.7919391903835955, "grad_norm": 6.572141170501709, "learning_rate": 1.4725488881251939e-05, "loss": 1.5899, "step": 126000 }, { "epoch": 0.7920020427002926, "grad_norm": 6.698461532592773, "learning_rate": 1.4725069780307286e-05, "loss": 1.4765, "step": 126010 }, { "epoch": 0.7920648950169897, "grad_norm": 7.350159168243408, "learning_rate": 1.4724650679362633e-05, "loss": 1.5179, "step": 126020 }, { "epoch": 0.7921277473336868, "grad_norm": 7.004650592803955, "learning_rate": 1.4724231578417978e-05, "loss": 1.5526, "step": 126030 }, { "epoch": 0.792190599650384, "grad_norm": 6.276726245880127, "learning_rate": 1.4723812477473325e-05, "loss": 1.5309, "step": 126040 }, { "epoch": 0.7922534519670811, "grad_norm": 7.76663064956665, "learning_rate": 1.4723393376528672e-05, "loss": 1.6354, "step": 126050 }, { "epoch": 0.7923163042837782, "grad_norm": 6.963621139526367, "learning_rate": 1.472297427558402e-05, "loss": 1.453, "step": 126060 }, { "epoch": 0.7923791566004753, "grad_norm": 5.757785797119141, "learning_rate": 1.4722555174639366e-05, "loss": 1.6305, "step": 126070 }, { "epoch": 0.7924420089171724, "grad_norm": 6.344888687133789, "learning_rate": 1.472213607369471e-05, "loss": 1.5846, "step": 126080 }, { "epoch": 0.7925048612338695, "grad_norm": 5.937010288238525, "learning_rate": 1.4721716972750057e-05, "loss": 1.5592, "step": 126090 }, { "epoch": 0.7925677135505667, "grad_norm": 6.348926544189453, "learning_rate": 1.4721297871805404e-05, "loss": 1.6011, "step": 126100 }, { "epoch": 0.7926305658672638, "grad_norm": 6.579949855804443, "learning_rate": 1.4720878770860751e-05, "loss": 1.7442, "step": 126110 }, { "epoch": 0.7926934181839609, "grad_norm": 5.932890892028809, "learning_rate": 1.4720459669916097e-05, "loss": 1.6936, "step": 126120 }, { "epoch": 0.792756270500658, "grad_norm": 6.792901992797852, "learning_rate": 1.4720040568971444e-05, "loss": 1.8082, "step": 126130 }, { "epoch": 0.7928191228173551, "grad_norm": 5.802411079406738, "learning_rate": 1.471962146802679e-05, "loss": 1.654, "step": 126140 }, { "epoch": 0.7928819751340522, "grad_norm": 7.658969879150391, "learning_rate": 1.4719202367082138e-05, "loss": 1.8559, "step": 126150 }, { "epoch": 0.7929448274507493, "grad_norm": 6.637668132781982, "learning_rate": 1.4718783266137483e-05, "loss": 1.6167, "step": 126160 }, { "epoch": 0.7930076797674465, "grad_norm": 6.326990127563477, "learning_rate": 1.4718364165192829e-05, "loss": 1.7941, "step": 126170 }, { "epoch": 0.7930705320841436, "grad_norm": 6.690548419952393, "learning_rate": 1.4717945064248176e-05, "loss": 1.7078, "step": 126180 }, { "epoch": 0.7931333844008407, "grad_norm": 6.911334037780762, "learning_rate": 1.4717525963303523e-05, "loss": 1.5828, "step": 126190 }, { "epoch": 0.7931962367175378, "grad_norm": 7.0854926109313965, "learning_rate": 1.471710686235887e-05, "loss": 1.4639, "step": 126200 }, { "epoch": 0.7932590890342349, "grad_norm": 6.275550842285156, "learning_rate": 1.4716687761414215e-05, "loss": 1.7028, "step": 126210 }, { "epoch": 0.793321941350932, "grad_norm": 6.527076244354248, "learning_rate": 1.4716268660469562e-05, "loss": 1.6216, "step": 126220 }, { "epoch": 0.793384793667629, "grad_norm": 7.024774074554443, "learning_rate": 1.4715849559524908e-05, "loss": 1.609, "step": 126230 }, { "epoch": 0.7934476459843262, "grad_norm": 5.79679536819458, "learning_rate": 1.4715430458580255e-05, "loss": 1.6176, "step": 126240 }, { "epoch": 0.7935104983010233, "grad_norm": 6.3160247802734375, "learning_rate": 1.4715011357635602e-05, "loss": 1.6761, "step": 126250 }, { "epoch": 0.7935733506177204, "grad_norm": 6.258047103881836, "learning_rate": 1.4714592256690947e-05, "loss": 1.4803, "step": 126260 }, { "epoch": 0.7936362029344175, "grad_norm": 7.629281997680664, "learning_rate": 1.4714173155746294e-05, "loss": 1.5293, "step": 126270 }, { "epoch": 0.7936990552511146, "grad_norm": 6.507336139678955, "learning_rate": 1.4713754054801641e-05, "loss": 1.8054, "step": 126280 }, { "epoch": 0.7937619075678117, "grad_norm": 6.551023483276367, "learning_rate": 1.4713334953856988e-05, "loss": 1.616, "step": 126290 }, { "epoch": 0.7938247598845088, "grad_norm": 6.801031112670898, "learning_rate": 1.4712915852912332e-05, "loss": 1.5809, "step": 126300 }, { "epoch": 0.793887612201206, "grad_norm": 7.758546352386475, "learning_rate": 1.471249675196768e-05, "loss": 1.7554, "step": 126310 }, { "epoch": 0.7939504645179031, "grad_norm": 5.507966041564941, "learning_rate": 1.4712077651023026e-05, "loss": 1.595, "step": 126320 }, { "epoch": 0.7940133168346002, "grad_norm": 6.165992259979248, "learning_rate": 1.4711658550078373e-05, "loss": 1.5877, "step": 126330 }, { "epoch": 0.7940761691512973, "grad_norm": 5.525192737579346, "learning_rate": 1.4711239449133719e-05, "loss": 1.7429, "step": 126340 }, { "epoch": 0.7941390214679944, "grad_norm": 5.700862407684326, "learning_rate": 1.4710820348189066e-05, "loss": 1.5167, "step": 126350 }, { "epoch": 0.7942018737846915, "grad_norm": 5.368999004364014, "learning_rate": 1.4710401247244413e-05, "loss": 1.5961, "step": 126360 }, { "epoch": 0.7942647261013887, "grad_norm": 7.041489124298096, "learning_rate": 1.470998214629976e-05, "loss": 1.5955, "step": 126370 }, { "epoch": 0.7943275784180858, "grad_norm": 6.7174506187438965, "learning_rate": 1.4709563045355107e-05, "loss": 1.461, "step": 126380 }, { "epoch": 0.7943904307347829, "grad_norm": 6.197600364685059, "learning_rate": 1.470914394441045e-05, "loss": 1.6482, "step": 126390 }, { "epoch": 0.79445328305148, "grad_norm": 7.42714786529541, "learning_rate": 1.4708724843465798e-05, "loss": 1.5775, "step": 126400 }, { "epoch": 0.7945161353681771, "grad_norm": 7.02616548538208, "learning_rate": 1.4708305742521145e-05, "loss": 1.495, "step": 126410 }, { "epoch": 0.7945789876848742, "grad_norm": 5.982561111450195, "learning_rate": 1.4707886641576492e-05, "loss": 1.4115, "step": 126420 }, { "epoch": 0.7946418400015713, "grad_norm": 6.265834331512451, "learning_rate": 1.4707467540631837e-05, "loss": 1.7257, "step": 126430 }, { "epoch": 0.7947046923182685, "grad_norm": 6.405435562133789, "learning_rate": 1.4707048439687184e-05, "loss": 1.4261, "step": 126440 }, { "epoch": 0.7947675446349656, "grad_norm": 5.93803596496582, "learning_rate": 1.4706629338742531e-05, "loss": 1.471, "step": 126450 }, { "epoch": 0.7948303969516627, "grad_norm": 6.864293098449707, "learning_rate": 1.4706210237797878e-05, "loss": 1.5623, "step": 126460 }, { "epoch": 0.7948932492683598, "grad_norm": 7.847011566162109, "learning_rate": 1.4705791136853226e-05, "loss": 1.6047, "step": 126470 }, { "epoch": 0.7949561015850568, "grad_norm": 7.684126853942871, "learning_rate": 1.470537203590857e-05, "loss": 1.809, "step": 126480 }, { "epoch": 0.7950189539017539, "grad_norm": 6.885912895202637, "learning_rate": 1.4704952934963916e-05, "loss": 1.6312, "step": 126490 }, { "epoch": 0.795081806218451, "grad_norm": 6.973878860473633, "learning_rate": 1.4704533834019263e-05, "loss": 1.6712, "step": 126500 }, { "epoch": 0.7951446585351482, "grad_norm": 5.735217094421387, "learning_rate": 1.470411473307461e-05, "loss": 1.7193, "step": 126510 }, { "epoch": 0.7952075108518453, "grad_norm": 6.294557094573975, "learning_rate": 1.4703695632129956e-05, "loss": 1.6743, "step": 126520 }, { "epoch": 0.7952703631685424, "grad_norm": 7.289360046386719, "learning_rate": 1.4703276531185303e-05, "loss": 1.5616, "step": 126530 }, { "epoch": 0.7953332154852395, "grad_norm": 6.456623077392578, "learning_rate": 1.4702857430240648e-05, "loss": 1.642, "step": 126540 }, { "epoch": 0.7953960678019366, "grad_norm": 7.75649881362915, "learning_rate": 1.4702438329295995e-05, "loss": 1.523, "step": 126550 }, { "epoch": 0.7954589201186337, "grad_norm": 7.781533241271973, "learning_rate": 1.4702019228351342e-05, "loss": 1.7725, "step": 126560 }, { "epoch": 0.7955217724353308, "grad_norm": 6.417851448059082, "learning_rate": 1.4701600127406688e-05, "loss": 1.5826, "step": 126570 }, { "epoch": 0.795584624752028, "grad_norm": 7.097602844238281, "learning_rate": 1.4701181026462035e-05, "loss": 1.6525, "step": 126580 }, { "epoch": 0.7956474770687251, "grad_norm": 6.180857181549072, "learning_rate": 1.4700761925517382e-05, "loss": 1.4011, "step": 126590 }, { "epoch": 0.7957103293854222, "grad_norm": 5.929043292999268, "learning_rate": 1.4700342824572729e-05, "loss": 1.5811, "step": 126600 }, { "epoch": 0.7957731817021193, "grad_norm": 6.822539806365967, "learning_rate": 1.4699923723628073e-05, "loss": 1.6045, "step": 126610 }, { "epoch": 0.7958360340188164, "grad_norm": 6.456976413726807, "learning_rate": 1.469950462268342e-05, "loss": 1.5757, "step": 126620 }, { "epoch": 0.7958988863355135, "grad_norm": 5.858137130737305, "learning_rate": 1.4699085521738767e-05, "loss": 1.4446, "step": 126630 }, { "epoch": 0.7959617386522106, "grad_norm": 6.887723445892334, "learning_rate": 1.4698666420794114e-05, "loss": 1.5294, "step": 126640 }, { "epoch": 0.7960245909689078, "grad_norm": 6.020218372344971, "learning_rate": 1.469824731984946e-05, "loss": 1.5737, "step": 126650 }, { "epoch": 0.7960874432856049, "grad_norm": 6.273682117462158, "learning_rate": 1.4697828218904806e-05, "loss": 1.595, "step": 126660 }, { "epoch": 0.796150295602302, "grad_norm": 5.848662376403809, "learning_rate": 1.4697409117960153e-05, "loss": 1.4215, "step": 126670 }, { "epoch": 0.7962131479189991, "grad_norm": 6.93732213973999, "learning_rate": 1.46969900170155e-05, "loss": 1.4694, "step": 126680 }, { "epoch": 0.7962760002356962, "grad_norm": 5.907258033752441, "learning_rate": 1.4696570916070848e-05, "loss": 1.4548, "step": 126690 }, { "epoch": 0.7963388525523933, "grad_norm": 6.12294864654541, "learning_rate": 1.4696151815126191e-05, "loss": 1.5239, "step": 126700 }, { "epoch": 0.7964017048690905, "grad_norm": 5.543499946594238, "learning_rate": 1.4695732714181538e-05, "loss": 1.399, "step": 126710 }, { "epoch": 0.7964645571857876, "grad_norm": 5.761287212371826, "learning_rate": 1.4695313613236885e-05, "loss": 1.5, "step": 126720 }, { "epoch": 0.7965274095024847, "grad_norm": 6.135310173034668, "learning_rate": 1.4694894512292232e-05, "loss": 1.7174, "step": 126730 }, { "epoch": 0.7965902618191817, "grad_norm": 6.743239879608154, "learning_rate": 1.4694475411347578e-05, "loss": 1.5867, "step": 126740 }, { "epoch": 0.7966531141358788, "grad_norm": 6.6734466552734375, "learning_rate": 1.4694056310402925e-05, "loss": 1.7619, "step": 126750 }, { "epoch": 0.7967159664525759, "grad_norm": 6.193628787994385, "learning_rate": 1.4693637209458272e-05, "loss": 1.6588, "step": 126760 }, { "epoch": 0.796778818769273, "grad_norm": 5.738262176513672, "learning_rate": 1.4693218108513619e-05, "loss": 1.3049, "step": 126770 }, { "epoch": 0.7968416710859701, "grad_norm": 5.9922003746032715, "learning_rate": 1.4692799007568966e-05, "loss": 1.5419, "step": 126780 }, { "epoch": 0.7969045234026673, "grad_norm": 6.955031871795654, "learning_rate": 1.469237990662431e-05, "loss": 1.661, "step": 126790 }, { "epoch": 0.7969673757193644, "grad_norm": 4.9513421058654785, "learning_rate": 1.4691960805679657e-05, "loss": 1.5246, "step": 126800 }, { "epoch": 0.7970302280360615, "grad_norm": 7.062002182006836, "learning_rate": 1.4691541704735004e-05, "loss": 1.5576, "step": 126810 }, { "epoch": 0.7970930803527586, "grad_norm": 5.826161861419678, "learning_rate": 1.4691122603790351e-05, "loss": 1.5153, "step": 126820 }, { "epoch": 0.7971559326694557, "grad_norm": 6.426275730133057, "learning_rate": 1.4690703502845696e-05, "loss": 1.4776, "step": 126830 }, { "epoch": 0.7972187849861528, "grad_norm": 6.657223224639893, "learning_rate": 1.4690284401901043e-05, "loss": 1.704, "step": 126840 }, { "epoch": 0.79728163730285, "grad_norm": 7.099383354187012, "learning_rate": 1.4689865300956389e-05, "loss": 1.7583, "step": 126850 }, { "epoch": 0.7973444896195471, "grad_norm": 8.146724700927734, "learning_rate": 1.4689446200011736e-05, "loss": 1.7484, "step": 126860 }, { "epoch": 0.7974073419362442, "grad_norm": 6.495960235595703, "learning_rate": 1.4689027099067083e-05, "loss": 1.4996, "step": 126870 }, { "epoch": 0.7974701942529413, "grad_norm": 6.567388534545898, "learning_rate": 1.4688607998122428e-05, "loss": 1.6315, "step": 126880 }, { "epoch": 0.7975330465696384, "grad_norm": 5.827784538269043, "learning_rate": 1.4688188897177775e-05, "loss": 1.6036, "step": 126890 }, { "epoch": 0.7975958988863355, "grad_norm": 7.262020587921143, "learning_rate": 1.4687769796233122e-05, "loss": 1.5515, "step": 126900 }, { "epoch": 0.7976587512030326, "grad_norm": 6.969066619873047, "learning_rate": 1.468735069528847e-05, "loss": 1.4091, "step": 126910 }, { "epoch": 0.7977216035197298, "grad_norm": 8.006285667419434, "learning_rate": 1.468697350443828e-05, "loss": 1.5577, "step": 126920 }, { "epoch": 0.7977844558364269, "grad_norm": 6.737123966217041, "learning_rate": 1.4686554403493626e-05, "loss": 1.6531, "step": 126930 }, { "epoch": 0.797847308153124, "grad_norm": 6.720739364624023, "learning_rate": 1.4686135302548973e-05, "loss": 1.6536, "step": 126940 }, { "epoch": 0.7979101604698211, "grad_norm": 6.321871757507324, "learning_rate": 1.4685716201604319e-05, "loss": 1.6377, "step": 126950 }, { "epoch": 0.7979730127865182, "grad_norm": 5.901580810546875, "learning_rate": 1.4685297100659666e-05, "loss": 1.4752, "step": 126960 }, { "epoch": 0.7980358651032153, "grad_norm": 7.815974712371826, "learning_rate": 1.4684877999715013e-05, "loss": 1.6385, "step": 126970 }, { "epoch": 0.7980987174199125, "grad_norm": 7.375422954559326, "learning_rate": 1.468445889877036e-05, "loss": 1.3614, "step": 126980 }, { "epoch": 0.7981615697366095, "grad_norm": 7.201641082763672, "learning_rate": 1.4684039797825707e-05, "loss": 1.5465, "step": 126990 }, { "epoch": 0.7982244220533066, "grad_norm": 6.602486610412598, "learning_rate": 1.468362069688105e-05, "loss": 1.5143, "step": 127000 }, { "epoch": 0.7982872743700037, "grad_norm": 6.114508152008057, "learning_rate": 1.4683201595936398e-05, "loss": 1.7175, "step": 127010 }, { "epoch": 0.7983501266867008, "grad_norm": 5.410009384155273, "learning_rate": 1.4682782494991745e-05, "loss": 1.4378, "step": 127020 }, { "epoch": 0.7984129790033979, "grad_norm": 7.354188919067383, "learning_rate": 1.4682363394047092e-05, "loss": 1.5651, "step": 127030 }, { "epoch": 0.798475831320095, "grad_norm": 7.028811931610107, "learning_rate": 1.4681944293102437e-05, "loss": 1.6209, "step": 127040 }, { "epoch": 0.7985386836367921, "grad_norm": 5.333646774291992, "learning_rate": 1.4681525192157784e-05, "loss": 1.6728, "step": 127050 }, { "epoch": 0.7986015359534893, "grad_norm": 6.855044364929199, "learning_rate": 1.4681106091213131e-05, "loss": 1.5802, "step": 127060 }, { "epoch": 0.7986643882701864, "grad_norm": 4.967195987701416, "learning_rate": 1.4680686990268478e-05, "loss": 1.6561, "step": 127070 }, { "epoch": 0.7987272405868835, "grad_norm": 6.009040832519531, "learning_rate": 1.4680267889323822e-05, "loss": 1.4554, "step": 127080 }, { "epoch": 0.7987900929035806, "grad_norm": 7.387078285217285, "learning_rate": 1.467984878837917e-05, "loss": 1.5842, "step": 127090 }, { "epoch": 0.7988529452202777, "grad_norm": 7.210890769958496, "learning_rate": 1.4679429687434516e-05, "loss": 1.7337, "step": 127100 }, { "epoch": 0.7989157975369748, "grad_norm": 5.606622695922852, "learning_rate": 1.4679010586489863e-05, "loss": 1.4004, "step": 127110 }, { "epoch": 0.798978649853672, "grad_norm": 6.040121555328369, "learning_rate": 1.467859148554521e-05, "loss": 1.7046, "step": 127120 }, { "epoch": 0.7990415021703691, "grad_norm": 6.001274585723877, "learning_rate": 1.4678172384600556e-05, "loss": 1.6245, "step": 127130 }, { "epoch": 0.7991043544870662, "grad_norm": 6.918370246887207, "learning_rate": 1.4677753283655903e-05, "loss": 1.6022, "step": 127140 }, { "epoch": 0.7991672068037633, "grad_norm": 6.731177806854248, "learning_rate": 1.467733418271125e-05, "loss": 1.8094, "step": 127150 }, { "epoch": 0.7992300591204604, "grad_norm": 6.004929065704346, "learning_rate": 1.4676915081766597e-05, "loss": 1.742, "step": 127160 }, { "epoch": 0.7992929114371575, "grad_norm": 6.8932976722717285, "learning_rate": 1.467649598082194e-05, "loss": 1.5241, "step": 127170 }, { "epoch": 0.7993557637538546, "grad_norm": 5.122315406799316, "learning_rate": 1.4676076879877288e-05, "loss": 1.5932, "step": 127180 }, { "epoch": 0.7994186160705518, "grad_norm": 5.804356098175049, "learning_rate": 1.4675657778932635e-05, "loss": 1.564, "step": 127190 }, { "epoch": 0.7994814683872489, "grad_norm": 5.46740198135376, "learning_rate": 1.4675238677987982e-05, "loss": 1.5938, "step": 127200 }, { "epoch": 0.799544320703946, "grad_norm": 7.471740245819092, "learning_rate": 1.4674819577043329e-05, "loss": 1.5785, "step": 127210 }, { "epoch": 0.7996071730206431, "grad_norm": 5.829692840576172, "learning_rate": 1.4674400476098674e-05, "loss": 1.613, "step": 127220 }, { "epoch": 0.7996700253373402, "grad_norm": 7.80814790725708, "learning_rate": 1.467398137515402e-05, "loss": 1.6094, "step": 127230 }, { "epoch": 0.7997328776540373, "grad_norm": 6.667564868927002, "learning_rate": 1.4673562274209367e-05, "loss": 1.7065, "step": 127240 }, { "epoch": 0.7997957299707343, "grad_norm": 6.570542812347412, "learning_rate": 1.4673143173264714e-05, "loss": 1.7314, "step": 127250 }, { "epoch": 0.7998585822874315, "grad_norm": 7.139489650726318, "learning_rate": 1.467272407232006e-05, "loss": 1.6567, "step": 127260 }, { "epoch": 0.7999214346041286, "grad_norm": 6.807154655456543, "learning_rate": 1.4672304971375406e-05, "loss": 1.6603, "step": 127270 }, { "epoch": 0.7999842869208257, "grad_norm": 7.466335296630859, "learning_rate": 1.4671885870430753e-05, "loss": 1.5509, "step": 127280 }, { "epoch": 0.8000471392375228, "grad_norm": 5.848179340362549, "learning_rate": 1.46714667694861e-05, "loss": 1.5739, "step": 127290 }, { "epoch": 0.8001099915542199, "grad_norm": 5.600815296173096, "learning_rate": 1.4671047668541448e-05, "loss": 1.5581, "step": 127300 }, { "epoch": 0.800172843870917, "grad_norm": 4.473421096801758, "learning_rate": 1.4670628567596791e-05, "loss": 1.2654, "step": 127310 }, { "epoch": 0.8002356961876141, "grad_norm": 5.2405829429626465, "learning_rate": 1.4670209466652138e-05, "loss": 1.5498, "step": 127320 }, { "epoch": 0.8002985485043113, "grad_norm": 5.992538928985596, "learning_rate": 1.4669790365707485e-05, "loss": 1.5347, "step": 127330 }, { "epoch": 0.8003614008210084, "grad_norm": 7.98852014541626, "learning_rate": 1.4669371264762832e-05, "loss": 1.8067, "step": 127340 }, { "epoch": 0.8004242531377055, "grad_norm": 6.850035190582275, "learning_rate": 1.4668952163818178e-05, "loss": 1.8199, "step": 127350 }, { "epoch": 0.8004871054544026, "grad_norm": 5.654129981994629, "learning_rate": 1.4668533062873525e-05, "loss": 1.4623, "step": 127360 }, { "epoch": 0.8005499577710997, "grad_norm": 6.825001239776611, "learning_rate": 1.4668113961928872e-05, "loss": 1.5293, "step": 127370 }, { "epoch": 0.8006128100877968, "grad_norm": 7.440295219421387, "learning_rate": 1.4667694860984219e-05, "loss": 1.4749, "step": 127380 }, { "epoch": 0.800675662404494, "grad_norm": 5.969831466674805, "learning_rate": 1.4667275760039566e-05, "loss": 1.5684, "step": 127390 }, { "epoch": 0.8007385147211911, "grad_norm": 6.326897621154785, "learning_rate": 1.466685665909491e-05, "loss": 1.5869, "step": 127400 }, { "epoch": 0.8008013670378882, "grad_norm": 6.353309154510498, "learning_rate": 1.4666437558150257e-05, "loss": 1.6948, "step": 127410 }, { "epoch": 0.8008642193545853, "grad_norm": 6.800481796264648, "learning_rate": 1.4666018457205604e-05, "loss": 1.7255, "step": 127420 }, { "epoch": 0.8009270716712824, "grad_norm": 7.093092918395996, "learning_rate": 1.4665599356260951e-05, "loss": 1.8261, "step": 127430 }, { "epoch": 0.8009899239879795, "grad_norm": 6.4873881340026855, "learning_rate": 1.4665180255316296e-05, "loss": 1.7308, "step": 127440 }, { "epoch": 0.8010527763046766, "grad_norm": 5.64680814743042, "learning_rate": 1.4664761154371643e-05, "loss": 1.4815, "step": 127450 }, { "epoch": 0.8011156286213738, "grad_norm": 7.599221706390381, "learning_rate": 1.466434205342699e-05, "loss": 1.4035, "step": 127460 }, { "epoch": 0.8011784809380709, "grad_norm": 6.603479862213135, "learning_rate": 1.4663922952482338e-05, "loss": 1.5161, "step": 127470 }, { "epoch": 0.801241333254768, "grad_norm": 9.680336952209473, "learning_rate": 1.4663503851537681e-05, "loss": 1.5093, "step": 127480 }, { "epoch": 0.8013041855714651, "grad_norm": 7.357080936431885, "learning_rate": 1.4663084750593028e-05, "loss": 1.4461, "step": 127490 }, { "epoch": 0.8013670378881621, "grad_norm": 6.3268914222717285, "learning_rate": 1.4662665649648375e-05, "loss": 1.6055, "step": 127500 }, { "epoch": 0.8014298902048592, "grad_norm": 5.655613422393799, "learning_rate": 1.4662246548703722e-05, "loss": 1.3888, "step": 127510 }, { "epoch": 0.8014927425215563, "grad_norm": 6.709163665771484, "learning_rate": 1.466182744775907e-05, "loss": 1.522, "step": 127520 }, { "epoch": 0.8015555948382534, "grad_norm": 5.913792610168457, "learning_rate": 1.4661408346814415e-05, "loss": 1.4601, "step": 127530 }, { "epoch": 0.8016184471549506, "grad_norm": 6.938288688659668, "learning_rate": 1.4660989245869762e-05, "loss": 1.4908, "step": 127540 }, { "epoch": 0.8016812994716477, "grad_norm": 5.863032341003418, "learning_rate": 1.4660570144925107e-05, "loss": 1.4513, "step": 127550 }, { "epoch": 0.8017441517883448, "grad_norm": 6.508087635040283, "learning_rate": 1.4660151043980454e-05, "loss": 1.6434, "step": 127560 }, { "epoch": 0.8018070041050419, "grad_norm": 7.035425662994385, "learning_rate": 1.46597319430358e-05, "loss": 1.7906, "step": 127570 }, { "epoch": 0.801869856421739, "grad_norm": 5.500781536102295, "learning_rate": 1.4659312842091147e-05, "loss": 1.7829, "step": 127580 }, { "epoch": 0.8019327087384361, "grad_norm": 6.251988887786865, "learning_rate": 1.4658893741146494e-05, "loss": 1.6834, "step": 127590 }, { "epoch": 0.8019955610551333, "grad_norm": 6.672684192657471, "learning_rate": 1.4658474640201841e-05, "loss": 1.8085, "step": 127600 }, { "epoch": 0.8020584133718304, "grad_norm": 7.56199836730957, "learning_rate": 1.4658055539257188e-05, "loss": 1.7226, "step": 127610 }, { "epoch": 0.8021212656885275, "grad_norm": 5.457205772399902, "learning_rate": 1.4657636438312532e-05, "loss": 1.7512, "step": 127620 }, { "epoch": 0.8021841180052246, "grad_norm": 6.948217868804932, "learning_rate": 1.4657217337367879e-05, "loss": 1.4926, "step": 127630 }, { "epoch": 0.8022469703219217, "grad_norm": 5.721208095550537, "learning_rate": 1.4656798236423226e-05, "loss": 1.5069, "step": 127640 }, { "epoch": 0.8023098226386188, "grad_norm": 6.864531517028809, "learning_rate": 1.4656379135478573e-05, "loss": 1.5754, "step": 127650 }, { "epoch": 0.802372674955316, "grad_norm": 5.888504505157471, "learning_rate": 1.4655960034533918e-05, "loss": 1.5975, "step": 127660 }, { "epoch": 0.8024355272720131, "grad_norm": 6.1465888023376465, "learning_rate": 1.4655540933589265e-05, "loss": 1.5006, "step": 127670 }, { "epoch": 0.8024983795887102, "grad_norm": 5.462531089782715, "learning_rate": 1.4655121832644612e-05, "loss": 1.6284, "step": 127680 }, { "epoch": 0.8025612319054073, "grad_norm": 5.80675745010376, "learning_rate": 1.465470273169996e-05, "loss": 1.4707, "step": 127690 }, { "epoch": 0.8026240842221044, "grad_norm": 5.968596935272217, "learning_rate": 1.4654283630755307e-05, "loss": 1.6834, "step": 127700 }, { "epoch": 0.8026869365388015, "grad_norm": 6.94851541519165, "learning_rate": 1.465386452981065e-05, "loss": 1.7579, "step": 127710 }, { "epoch": 0.8027497888554986, "grad_norm": 6.325634002685547, "learning_rate": 1.4653445428865997e-05, "loss": 1.6801, "step": 127720 }, { "epoch": 0.8028126411721958, "grad_norm": 7.044205665588379, "learning_rate": 1.4653026327921344e-05, "loss": 1.6491, "step": 127730 }, { "epoch": 0.8028754934888929, "grad_norm": 6.051846504211426, "learning_rate": 1.4652607226976692e-05, "loss": 1.3398, "step": 127740 }, { "epoch": 0.80293834580559, "grad_norm": 7.053153038024902, "learning_rate": 1.4652188126032037e-05, "loss": 1.6287, "step": 127750 }, { "epoch": 0.803001198122287, "grad_norm": 6.5534234046936035, "learning_rate": 1.4651769025087384e-05, "loss": 1.4482, "step": 127760 }, { "epoch": 0.8030640504389841, "grad_norm": 5.681098461151123, "learning_rate": 1.4651349924142731e-05, "loss": 1.4801, "step": 127770 }, { "epoch": 0.8031269027556812, "grad_norm": 5.653262615203857, "learning_rate": 1.4650930823198078e-05, "loss": 1.4962, "step": 127780 }, { "epoch": 0.8031897550723783, "grad_norm": 6.983116626739502, "learning_rate": 1.4650511722253422e-05, "loss": 1.6625, "step": 127790 }, { "epoch": 0.8032526073890754, "grad_norm": 6.244621276855469, "learning_rate": 1.4650092621308769e-05, "loss": 1.7325, "step": 127800 }, { "epoch": 0.8033154597057726, "grad_norm": 7.330923080444336, "learning_rate": 1.4649673520364116e-05, "loss": 1.4747, "step": 127810 }, { "epoch": 0.8033783120224697, "grad_norm": 6.687651634216309, "learning_rate": 1.4649254419419463e-05, "loss": 1.593, "step": 127820 }, { "epoch": 0.8034411643391668, "grad_norm": 5.817078590393066, "learning_rate": 1.464883531847481e-05, "loss": 1.491, "step": 127830 }, { "epoch": 0.8035040166558639, "grad_norm": 6.812237739562988, "learning_rate": 1.4648416217530155e-05, "loss": 1.8285, "step": 127840 }, { "epoch": 0.803566868972561, "grad_norm": 6.4954423904418945, "learning_rate": 1.4647997116585503e-05, "loss": 1.603, "step": 127850 }, { "epoch": 0.8036297212892581, "grad_norm": 6.715160369873047, "learning_rate": 1.4647578015640848e-05, "loss": 1.6452, "step": 127860 }, { "epoch": 0.8036925736059553, "grad_norm": 7.735368251800537, "learning_rate": 1.4647158914696195e-05, "loss": 1.8229, "step": 127870 }, { "epoch": 0.8037554259226524, "grad_norm": 6.043717384338379, "learning_rate": 1.464673981375154e-05, "loss": 1.5291, "step": 127880 }, { "epoch": 0.8038182782393495, "grad_norm": 7.275433540344238, "learning_rate": 1.4646320712806887e-05, "loss": 1.5868, "step": 127890 }, { "epoch": 0.8038811305560466, "grad_norm": 6.811432838439941, "learning_rate": 1.4645901611862234e-05, "loss": 1.6473, "step": 127900 }, { "epoch": 0.8039439828727437, "grad_norm": 7.094412803649902, "learning_rate": 1.4645482510917582e-05, "loss": 1.5549, "step": 127910 }, { "epoch": 0.8040068351894408, "grad_norm": 7.31378173828125, "learning_rate": 1.4645063409972929e-05, "loss": 1.5162, "step": 127920 }, { "epoch": 0.8040696875061379, "grad_norm": 6.261271953582764, "learning_rate": 1.4644644309028272e-05, "loss": 1.6291, "step": 127930 }, { "epoch": 0.8041325398228351, "grad_norm": 6.118953704833984, "learning_rate": 1.464422520808362e-05, "loss": 1.5994, "step": 127940 }, { "epoch": 0.8041953921395322, "grad_norm": 5.418634414672852, "learning_rate": 1.4643806107138966e-05, "loss": 1.589, "step": 127950 }, { "epoch": 0.8042582444562293, "grad_norm": 6.648963451385498, "learning_rate": 1.4643387006194314e-05, "loss": 1.6008, "step": 127960 }, { "epoch": 0.8043210967729264, "grad_norm": 7.5236382484436035, "learning_rate": 1.4642967905249659e-05, "loss": 1.6685, "step": 127970 }, { "epoch": 0.8043839490896235, "grad_norm": 5.9270477294921875, "learning_rate": 1.4642548804305006e-05, "loss": 1.6547, "step": 127980 }, { "epoch": 0.8044468014063206, "grad_norm": 7.353875637054443, "learning_rate": 1.4642129703360353e-05, "loss": 1.4871, "step": 127990 }, { "epoch": 0.8045096537230177, "grad_norm": 7.18826961517334, "learning_rate": 1.46417106024157e-05, "loss": 1.8931, "step": 128000 }, { "epoch": 0.8045725060397148, "grad_norm": 5.180515766143799, "learning_rate": 1.4641291501471047e-05, "loss": 1.5294, "step": 128010 }, { "epoch": 0.8046353583564119, "grad_norm": 5.289478778839111, "learning_rate": 1.4640872400526391e-05, "loss": 1.4159, "step": 128020 }, { "epoch": 0.804698210673109, "grad_norm": 6.481279373168945, "learning_rate": 1.4640453299581738e-05, "loss": 1.9098, "step": 128030 }, { "epoch": 0.8047610629898061, "grad_norm": 6.723080635070801, "learning_rate": 1.4640034198637085e-05, "loss": 1.5707, "step": 128040 }, { "epoch": 0.8048239153065032, "grad_norm": 5.434211254119873, "learning_rate": 1.4639615097692432e-05, "loss": 1.3818, "step": 128050 }, { "epoch": 0.8048867676232003, "grad_norm": 7.487259387969971, "learning_rate": 1.4639195996747777e-05, "loss": 1.3277, "step": 128060 }, { "epoch": 0.8049496199398974, "grad_norm": 6.7534942626953125, "learning_rate": 1.4638776895803125e-05, "loss": 1.6875, "step": 128070 }, { "epoch": 0.8050124722565946, "grad_norm": 6.124451160430908, "learning_rate": 1.4638357794858472e-05, "loss": 1.6727, "step": 128080 }, { "epoch": 0.8050753245732917, "grad_norm": 6.262798309326172, "learning_rate": 1.4637938693913819e-05, "loss": 1.5039, "step": 128090 }, { "epoch": 0.8051381768899888, "grad_norm": 6.658420562744141, "learning_rate": 1.4637519592969162e-05, "loss": 1.5885, "step": 128100 }, { "epoch": 0.8052010292066859, "grad_norm": 7.250082015991211, "learning_rate": 1.463710049202451e-05, "loss": 1.4267, "step": 128110 }, { "epoch": 0.805263881523383, "grad_norm": 6.2989420890808105, "learning_rate": 1.4636681391079856e-05, "loss": 1.4532, "step": 128120 }, { "epoch": 0.8053267338400801, "grad_norm": 7.128006935119629, "learning_rate": 1.4636262290135204e-05, "loss": 1.7059, "step": 128130 }, { "epoch": 0.8053895861567772, "grad_norm": 6.768283367156982, "learning_rate": 1.463584318919055e-05, "loss": 1.7792, "step": 128140 }, { "epoch": 0.8054524384734744, "grad_norm": 7.063636302947998, "learning_rate": 1.4635424088245896e-05, "loss": 1.4833, "step": 128150 }, { "epoch": 0.8055152907901715, "grad_norm": 4.943026542663574, "learning_rate": 1.4635004987301243e-05, "loss": 1.4822, "step": 128160 }, { "epoch": 0.8055781431068686, "grad_norm": 6.337063312530518, "learning_rate": 1.4634585886356588e-05, "loss": 1.4695, "step": 128170 }, { "epoch": 0.8056409954235657, "grad_norm": 6.414618492126465, "learning_rate": 1.4634166785411936e-05, "loss": 1.5663, "step": 128180 }, { "epoch": 0.8057038477402628, "grad_norm": 6.596250534057617, "learning_rate": 1.4633747684467281e-05, "loss": 1.6292, "step": 128190 }, { "epoch": 0.8057667000569599, "grad_norm": 7.159796237945557, "learning_rate": 1.4633328583522628e-05, "loss": 1.6192, "step": 128200 }, { "epoch": 0.805829552373657, "grad_norm": 5.642171382904053, "learning_rate": 1.4632909482577975e-05, "loss": 1.3166, "step": 128210 }, { "epoch": 0.8058924046903542, "grad_norm": 6.385321140289307, "learning_rate": 1.4632490381633322e-05, "loss": 1.5261, "step": 128220 }, { "epoch": 0.8059552570070513, "grad_norm": 7.153017997741699, "learning_rate": 1.463207128068867e-05, "loss": 1.5968, "step": 128230 }, { "epoch": 0.8060181093237484, "grad_norm": 4.993671894073486, "learning_rate": 1.4631652179744013e-05, "loss": 1.6347, "step": 128240 }, { "epoch": 0.8060809616404455, "grad_norm": 6.2314982414245605, "learning_rate": 1.463123307879936e-05, "loss": 1.4873, "step": 128250 }, { "epoch": 0.8061438139571426, "grad_norm": 5.749682903289795, "learning_rate": 1.4630813977854707e-05, "loss": 1.2734, "step": 128260 }, { "epoch": 0.8062066662738396, "grad_norm": 6.511654376983643, "learning_rate": 1.4630394876910054e-05, "loss": 1.7047, "step": 128270 }, { "epoch": 0.8062695185905367, "grad_norm": 6.978699684143066, "learning_rate": 1.46299757759654e-05, "loss": 1.6343, "step": 128280 }, { "epoch": 0.8063323709072339, "grad_norm": 6.761673450469971, "learning_rate": 1.4629556675020747e-05, "loss": 1.5412, "step": 128290 }, { "epoch": 0.806395223223931, "grad_norm": 6.2292914390563965, "learning_rate": 1.4629137574076094e-05, "loss": 1.7083, "step": 128300 }, { "epoch": 0.8064580755406281, "grad_norm": 6.495267868041992, "learning_rate": 1.462871847313144e-05, "loss": 1.5973, "step": 128310 }, { "epoch": 0.8065209278573252, "grad_norm": 6.518203258514404, "learning_rate": 1.4628299372186788e-05, "loss": 1.5758, "step": 128320 }, { "epoch": 0.8065837801740223, "grad_norm": 5.8363566398620605, "learning_rate": 1.4627880271242131e-05, "loss": 1.2377, "step": 128330 }, { "epoch": 0.8066466324907194, "grad_norm": 7.506755352020264, "learning_rate": 1.4627461170297478e-05, "loss": 1.4138, "step": 128340 }, { "epoch": 0.8067094848074166, "grad_norm": 6.106832027435303, "learning_rate": 1.4627042069352826e-05, "loss": 1.6611, "step": 128350 }, { "epoch": 0.8067723371241137, "grad_norm": 5.887393474578857, "learning_rate": 1.4626622968408173e-05, "loss": 1.6214, "step": 128360 }, { "epoch": 0.8068351894408108, "grad_norm": 5.715047836303711, "learning_rate": 1.4626203867463518e-05, "loss": 1.7647, "step": 128370 }, { "epoch": 0.8068980417575079, "grad_norm": 4.8202080726623535, "learning_rate": 1.4625784766518865e-05, "loss": 1.6554, "step": 128380 }, { "epoch": 0.806960894074205, "grad_norm": 6.009983539581299, "learning_rate": 1.4625365665574212e-05, "loss": 1.4481, "step": 128390 }, { "epoch": 0.8070237463909021, "grad_norm": 6.740313529968262, "learning_rate": 1.462494656462956e-05, "loss": 1.6375, "step": 128400 }, { "epoch": 0.8070865987075992, "grad_norm": 6.739039421081543, "learning_rate": 1.4624527463684903e-05, "loss": 1.4724, "step": 128410 }, { "epoch": 0.8071494510242964, "grad_norm": 6.603867053985596, "learning_rate": 1.462410836274025e-05, "loss": 1.462, "step": 128420 }, { "epoch": 0.8072123033409935, "grad_norm": 6.111894130706787, "learning_rate": 1.4623689261795597e-05, "loss": 1.3688, "step": 128430 }, { "epoch": 0.8072751556576906, "grad_norm": 5.348015308380127, "learning_rate": 1.4623270160850944e-05, "loss": 1.4991, "step": 128440 }, { "epoch": 0.8073380079743877, "grad_norm": 5.96295690536499, "learning_rate": 1.4622851059906291e-05, "loss": 1.5457, "step": 128450 }, { "epoch": 0.8074008602910848, "grad_norm": 5.46589469909668, "learning_rate": 1.4622431958961637e-05, "loss": 1.4834, "step": 128460 }, { "epoch": 0.8074637126077819, "grad_norm": 11.722580909729004, "learning_rate": 1.4622012858016984e-05, "loss": 1.7034, "step": 128470 }, { "epoch": 0.807526564924479, "grad_norm": 6.253749370574951, "learning_rate": 1.462159375707233e-05, "loss": 1.4491, "step": 128480 }, { "epoch": 0.8075894172411762, "grad_norm": 5.999065399169922, "learning_rate": 1.4621174656127676e-05, "loss": 1.6275, "step": 128490 }, { "epoch": 0.8076522695578733, "grad_norm": 6.462035179138184, "learning_rate": 1.4620755555183021e-05, "loss": 1.5508, "step": 128500 }, { "epoch": 0.8077151218745704, "grad_norm": 6.563819408416748, "learning_rate": 1.4620336454238369e-05, "loss": 1.7511, "step": 128510 }, { "epoch": 0.8077779741912675, "grad_norm": 5.843883037567139, "learning_rate": 1.4619917353293716e-05, "loss": 1.9093, "step": 128520 }, { "epoch": 0.8078408265079645, "grad_norm": 6.107423305511475, "learning_rate": 1.4619498252349063e-05, "loss": 1.6832, "step": 128530 }, { "epoch": 0.8079036788246616, "grad_norm": 5.802535057067871, "learning_rate": 1.461907915140441e-05, "loss": 1.5708, "step": 128540 }, { "epoch": 0.8079665311413587, "grad_norm": 7.047648906707764, "learning_rate": 1.4618660050459753e-05, "loss": 1.6832, "step": 128550 }, { "epoch": 0.8080293834580559, "grad_norm": 6.248017311096191, "learning_rate": 1.46182409495151e-05, "loss": 1.3829, "step": 128560 }, { "epoch": 0.808092235774753, "grad_norm": 5.463923454284668, "learning_rate": 1.4617821848570448e-05, "loss": 1.6062, "step": 128570 }, { "epoch": 0.8081550880914501, "grad_norm": 6.9560136795043945, "learning_rate": 1.4617402747625795e-05, "loss": 1.7184, "step": 128580 }, { "epoch": 0.8082179404081472, "grad_norm": 7.091958045959473, "learning_rate": 1.461698364668114e-05, "loss": 1.4344, "step": 128590 }, { "epoch": 0.8082807927248443, "grad_norm": 6.31084680557251, "learning_rate": 1.4616564545736487e-05, "loss": 1.6064, "step": 128600 }, { "epoch": 0.8083436450415414, "grad_norm": 6.201137542724609, "learning_rate": 1.4616145444791834e-05, "loss": 1.4612, "step": 128610 }, { "epoch": 0.8084064973582386, "grad_norm": 3.948305606842041, "learning_rate": 1.4615726343847181e-05, "loss": 1.4763, "step": 128620 }, { "epoch": 0.8084693496749357, "grad_norm": 6.799100875854492, "learning_rate": 1.4615307242902528e-05, "loss": 1.5969, "step": 128630 }, { "epoch": 0.8085322019916328, "grad_norm": 5.397566795349121, "learning_rate": 1.4614888141957872e-05, "loss": 1.4547, "step": 128640 }, { "epoch": 0.8085950543083299, "grad_norm": 6.227335453033447, "learning_rate": 1.4614469041013219e-05, "loss": 1.4481, "step": 128650 }, { "epoch": 0.808657906625027, "grad_norm": 6.401673793792725, "learning_rate": 1.4614049940068566e-05, "loss": 1.5091, "step": 128660 }, { "epoch": 0.8087207589417241, "grad_norm": 6.572157382965088, "learning_rate": 1.4613630839123913e-05, "loss": 1.7148, "step": 128670 }, { "epoch": 0.8087836112584212, "grad_norm": 5.99587345123291, "learning_rate": 1.4613211738179259e-05, "loss": 1.5073, "step": 128680 }, { "epoch": 0.8088464635751184, "grad_norm": 6.44499397277832, "learning_rate": 1.4612792637234606e-05, "loss": 1.6425, "step": 128690 }, { "epoch": 0.8089093158918155, "grad_norm": 6.990484714508057, "learning_rate": 1.4612373536289953e-05, "loss": 1.5177, "step": 128700 } ], "logging_steps": 10, "max_steps": 477312, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.892036676517192e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }