{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 30324, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00065955447095487, "grad_norm": 2.413745403289795, "learning_rate": 1.4836795252225522e-07, "loss": 2.2717, "step": 10 }, { "epoch": 0.00131910894190974, "grad_norm": 2.212498426437378, "learning_rate": 3.1322123310253876e-07, "loss": 2.2465, "step": 20 }, { "epoch": 0.00197866341286461, "grad_norm": 2.1338729858398438, "learning_rate": 4.780745136828223e-07, "loss": 2.2954, "step": 30 }, { "epoch": 0.00263821788381948, "grad_norm": 1.911232352256775, "learning_rate": 6.429277942631058e-07, "loss": 2.2998, "step": 40 }, { "epoch": 0.00329777235477435, "grad_norm": 2.3705928325653076, "learning_rate": 8.077810748433894e-07, "loss": 2.2484, "step": 50 }, { "epoch": 0.00395732682572922, "grad_norm": 2.46266508102417, "learning_rate": 9.72634355423673e-07, "loss": 2.2459, "step": 60 }, { "epoch": 0.00461688129668409, "grad_norm": 2.2639856338500977, "learning_rate": 1.1374876360039564e-06, "loss": 2.1686, "step": 70 }, { "epoch": 0.00527643576763896, "grad_norm": 2.0852646827697754, "learning_rate": 1.3023409165842401e-06, "loss": 2.1975, "step": 80 }, { "epoch": 0.00593599023859383, "grad_norm": 3.11877179145813, "learning_rate": 1.4671941971645236e-06, "loss": 2.2112, "step": 90 }, { "epoch": 0.0065955447095487, "grad_norm": 1.8753658533096313, "learning_rate": 1.6320474777448073e-06, "loss": 2.2048, "step": 100 }, { "epoch": 0.00725509918050357, "grad_norm": 2.552967071533203, "learning_rate": 1.796900758325091e-06, "loss": 2.1385, "step": 110 }, { "epoch": 0.00791465365145844, "grad_norm": 2.5597939491271973, "learning_rate": 1.9617540389053744e-06, "loss": 2.106, "step": 120 }, { "epoch": 0.00857420812241331, "grad_norm": 2.459486722946167, "learning_rate": 2.126607319485658e-06, "loss": 2.1547, "step": 130 }, { "epoch": 0.00923376259336818, "grad_norm": 2.0308799743652344, "learning_rate": 2.2914606000659413e-06, "loss": 2.0145, "step": 140 }, { "epoch": 0.009893317064323049, "grad_norm": 2.0622429847717285, "learning_rate": 2.456313880646225e-06, "loss": 1.9606, "step": 150 }, { "epoch": 0.01055287153527792, "grad_norm": 1.494454026222229, "learning_rate": 2.6211671612265087e-06, "loss": 1.7742, "step": 160 }, { "epoch": 0.01121242600623279, "grad_norm": 1.3403377532958984, "learning_rate": 2.786020441806792e-06, "loss": 1.7, "step": 170 }, { "epoch": 0.01187198047718766, "grad_norm": 1.1371599435806274, "learning_rate": 2.9508737223870756e-06, "loss": 1.693, "step": 180 }, { "epoch": 0.01253153494814253, "grad_norm": 1.0440349578857422, "learning_rate": 3.115727002967359e-06, "loss": 1.6249, "step": 190 }, { "epoch": 0.0131910894190974, "grad_norm": 0.8132500052452087, "learning_rate": 3.280580283547643e-06, "loss": 1.5336, "step": 200 }, { "epoch": 0.013850643890052269, "grad_norm": 0.8266546130180359, "learning_rate": 3.4454335641279264e-06, "loss": 1.4346, "step": 210 }, { "epoch": 0.01451019836100714, "grad_norm": 0.6412438154220581, "learning_rate": 3.61028684470821e-06, "loss": 1.4265, "step": 220 }, { "epoch": 0.01516975283196201, "grad_norm": 0.5342578887939453, "learning_rate": 3.7751401252884933e-06, "loss": 1.2813, "step": 230 }, { "epoch": 0.01582930730291688, "grad_norm": 0.3916451036930084, "learning_rate": 3.939993405868777e-06, "loss": 1.3416, "step": 240 }, { "epoch": 0.01648886177387175, "grad_norm": 0.4304097294807434, "learning_rate": 4.104846686449061e-06, "loss": 1.3471, "step": 250 }, { "epoch": 0.01714841624482662, "grad_norm": 0.46680748462677, "learning_rate": 4.269699967029344e-06, "loss": 1.2997, "step": 260 }, { "epoch": 0.01780797071578149, "grad_norm": 0.37838730216026306, "learning_rate": 4.434553247609628e-06, "loss": 1.3106, "step": 270 }, { "epoch": 0.01846752518673636, "grad_norm": 0.3575062155723572, "learning_rate": 4.599406528189911e-06, "loss": 1.219, "step": 280 }, { "epoch": 0.01912707965769123, "grad_norm": 0.43343842029571533, "learning_rate": 4.7642598087701945e-06, "loss": 1.2032, "step": 290 }, { "epoch": 0.019786634128646098, "grad_norm": 0.4978269636631012, "learning_rate": 4.929113089350478e-06, "loss": 1.2446, "step": 300 }, { "epoch": 0.02044618859960097, "grad_norm": 0.3755980432033539, "learning_rate": 5.0939663699307614e-06, "loss": 1.3004, "step": 310 }, { "epoch": 0.02110574307055584, "grad_norm": 0.5298649668693542, "learning_rate": 5.258819650511045e-06, "loss": 1.2482, "step": 320 }, { "epoch": 0.02176529754151071, "grad_norm": 0.38744017481803894, "learning_rate": 5.423672931091329e-06, "loss": 1.1939, "step": 330 }, { "epoch": 0.02242485201246558, "grad_norm": 0.46404343843460083, "learning_rate": 5.588526211671613e-06, "loss": 1.1862, "step": 340 }, { "epoch": 0.023084406483420448, "grad_norm": 0.46498826146125793, "learning_rate": 5.753379492251896e-06, "loss": 1.1794, "step": 350 }, { "epoch": 0.02374396095437532, "grad_norm": 0.5952163934707642, "learning_rate": 5.91823277283218e-06, "loss": 1.129, "step": 360 }, { "epoch": 0.02440351542533019, "grad_norm": 0.5538243055343628, "learning_rate": 6.083086053412463e-06, "loss": 1.1399, "step": 370 }, { "epoch": 0.02506306989628506, "grad_norm": 0.6970240473747253, "learning_rate": 6.2479393339927465e-06, "loss": 1.1051, "step": 380 }, { "epoch": 0.02572262436723993, "grad_norm": 0.7881635427474976, "learning_rate": 6.412792614573031e-06, "loss": 1.1201, "step": 390 }, { "epoch": 0.0263821788381948, "grad_norm": 0.39730632305145264, "learning_rate": 6.577645895153314e-06, "loss": 1.1826, "step": 400 }, { "epoch": 0.02704173330914967, "grad_norm": 0.5324128866195679, "learning_rate": 6.742499175733598e-06, "loss": 1.1355, "step": 410 }, { "epoch": 0.027701287780104538, "grad_norm": 0.446857213973999, "learning_rate": 6.907352456313881e-06, "loss": 1.1181, "step": 420 }, { "epoch": 0.02836084225105941, "grad_norm": 0.7503374218940735, "learning_rate": 7.072205736894165e-06, "loss": 1.1523, "step": 430 }, { "epoch": 0.02902039672201428, "grad_norm": 0.6200227737426758, "learning_rate": 7.237059017474448e-06, "loss": 1.0842, "step": 440 }, { "epoch": 0.02967995119296915, "grad_norm": 0.5611582398414612, "learning_rate": 7.401912298054732e-06, "loss": 1.1075, "step": 450 }, { "epoch": 0.03033950566392402, "grad_norm": 0.41381916403770447, "learning_rate": 7.566765578635016e-06, "loss": 1.075, "step": 460 }, { "epoch": 0.030999060134878888, "grad_norm": 0.657398521900177, "learning_rate": 7.731618859215299e-06, "loss": 1.1104, "step": 470 }, { "epoch": 0.03165861460583376, "grad_norm": 0.9275014400482178, "learning_rate": 7.896472139795583e-06, "loss": 1.1215, "step": 480 }, { "epoch": 0.03231816907678863, "grad_norm": 0.6613135933876038, "learning_rate": 8.061325420375865e-06, "loss": 1.1092, "step": 490 }, { "epoch": 0.0329777235477435, "grad_norm": 0.7839298248291016, "learning_rate": 8.22617870095615e-06, "loss": 1.0522, "step": 500 }, { "epoch": 0.03363727801869837, "grad_norm": 0.6185851693153381, "learning_rate": 8.391031981536434e-06, "loss": 1.074, "step": 510 }, { "epoch": 0.03429683248965324, "grad_norm": 0.6220463514328003, "learning_rate": 8.555885262116717e-06, "loss": 1.0636, "step": 520 }, { "epoch": 0.034956386960608106, "grad_norm": 0.6293200254440308, "learning_rate": 8.720738542697001e-06, "loss": 1.0748, "step": 530 }, { "epoch": 0.03561594143156298, "grad_norm": 0.6412210464477539, "learning_rate": 8.885591823277284e-06, "loss": 1.0866, "step": 540 }, { "epoch": 0.03627549590251785, "grad_norm": 0.8587160706520081, "learning_rate": 9.050445103857568e-06, "loss": 1.0973, "step": 550 }, { "epoch": 0.03693505037347272, "grad_norm": 0.6351908445358276, "learning_rate": 9.21529838443785e-06, "loss": 1.0165, "step": 560 }, { "epoch": 0.03759460484442759, "grad_norm": 0.8718788623809814, "learning_rate": 9.380151665018135e-06, "loss": 1.0535, "step": 570 }, { "epoch": 0.03825415931538246, "grad_norm": 0.6819501519203186, "learning_rate": 9.545004945598417e-06, "loss": 1.0329, "step": 580 }, { "epoch": 0.03891371378633733, "grad_norm": 0.6043257117271423, "learning_rate": 9.709858226178702e-06, "loss": 0.9932, "step": 590 }, { "epoch": 0.039573268257292196, "grad_norm": 0.8498331308364868, "learning_rate": 9.874711506758984e-06, "loss": 1.0686, "step": 600 }, { "epoch": 0.04023282272824707, "grad_norm": 0.6236681938171387, "learning_rate": 1.0039564787339269e-05, "loss": 1.0402, "step": 610 }, { "epoch": 0.04089237719920194, "grad_norm": 0.8242946267127991, "learning_rate": 1.0204418067919553e-05, "loss": 1.0139, "step": 620 }, { "epoch": 0.04155193167015681, "grad_norm": 0.7272655367851257, "learning_rate": 1.0369271348499836e-05, "loss": 1.0754, "step": 630 }, { "epoch": 0.04221148614111168, "grad_norm": 0.6365894079208374, "learning_rate": 1.053412462908012e-05, "loss": 1.0287, "step": 640 }, { "epoch": 0.04287104061206655, "grad_norm": 0.5730026960372925, "learning_rate": 1.0698977909660403e-05, "loss": 1.0192, "step": 650 }, { "epoch": 0.04353059508302142, "grad_norm": 0.8422584533691406, "learning_rate": 1.0863831190240687e-05, "loss": 1.0297, "step": 660 }, { "epoch": 0.044190149553976286, "grad_norm": 0.5889145135879517, "learning_rate": 1.102868447082097e-05, "loss": 1.0257, "step": 670 }, { "epoch": 0.04484970402493116, "grad_norm": 0.7314364910125732, "learning_rate": 1.1193537751401254e-05, "loss": 1.0222, "step": 680 }, { "epoch": 0.04550925849588603, "grad_norm": 0.755777895450592, "learning_rate": 1.1358391031981536e-05, "loss": 1.0133, "step": 690 }, { "epoch": 0.046168812966840896, "grad_norm": 0.712040364742279, "learning_rate": 1.152324431256182e-05, "loss": 1.0136, "step": 700 }, { "epoch": 0.04682836743779577, "grad_norm": 0.843165397644043, "learning_rate": 1.1688097593142103e-05, "loss": 1.0341, "step": 710 }, { "epoch": 0.04748792190875064, "grad_norm": 0.75517338514328, "learning_rate": 1.1852950873722388e-05, "loss": 0.9576, "step": 720 }, { "epoch": 0.04814747637970551, "grad_norm": 0.619825541973114, "learning_rate": 1.2017804154302672e-05, "loss": 1.0275, "step": 730 }, { "epoch": 0.04880703085066038, "grad_norm": 0.5250429511070251, "learning_rate": 1.2182657434882955e-05, "loss": 0.9979, "step": 740 }, { "epoch": 0.04946658532161525, "grad_norm": 0.7191141843795776, "learning_rate": 1.2347510715463239e-05, "loss": 0.9829, "step": 750 }, { "epoch": 0.05012613979257012, "grad_norm": 0.6733546257019043, "learning_rate": 1.2512363996043521e-05, "loss": 1.0365, "step": 760 }, { "epoch": 0.050785694263524986, "grad_norm": 0.5763381719589233, "learning_rate": 1.2677217276623808e-05, "loss": 1.0175, "step": 770 }, { "epoch": 0.05144524873447986, "grad_norm": 0.6568360924720764, "learning_rate": 1.2842070557204088e-05, "loss": 0.9574, "step": 780 }, { "epoch": 0.05210480320543473, "grad_norm": 0.5359265208244324, "learning_rate": 1.3006923837784374e-05, "loss": 1.0253, "step": 790 }, { "epoch": 0.0527643576763896, "grad_norm": 0.6500004529953003, "learning_rate": 1.3171777118364655e-05, "loss": 1.0054, "step": 800 }, { "epoch": 0.05342391214734447, "grad_norm": 0.5643205642700195, "learning_rate": 1.3336630398944941e-05, "loss": 1.0252, "step": 810 }, { "epoch": 0.05408346661829934, "grad_norm": 0.675159752368927, "learning_rate": 1.3501483679525222e-05, "loss": 0.9847, "step": 820 }, { "epoch": 0.05474302108925421, "grad_norm": 0.6375489234924316, "learning_rate": 1.3666336960105508e-05, "loss": 0.9607, "step": 830 }, { "epoch": 0.055402575560209076, "grad_norm": 0.5561162233352661, "learning_rate": 1.3831190240685791e-05, "loss": 1.0017, "step": 840 }, { "epoch": 0.05606213003116395, "grad_norm": 0.5009911060333252, "learning_rate": 1.3996043521266075e-05, "loss": 0.9559, "step": 850 }, { "epoch": 0.05672168450211882, "grad_norm": 0.6449034214019775, "learning_rate": 1.4160896801846358e-05, "loss": 1.0037, "step": 860 }, { "epoch": 0.05738123897307369, "grad_norm": 0.5964744687080383, "learning_rate": 1.4325750082426642e-05, "loss": 1.0194, "step": 870 }, { "epoch": 0.05804079344402856, "grad_norm": 0.8459124565124512, "learning_rate": 1.4490603363006925e-05, "loss": 0.9388, "step": 880 }, { "epoch": 0.05870034791498343, "grad_norm": 0.630128026008606, "learning_rate": 1.4655456643587209e-05, "loss": 0.9996, "step": 890 }, { "epoch": 0.0593599023859383, "grad_norm": 0.5865899920463562, "learning_rate": 1.4820309924167492e-05, "loss": 0.9518, "step": 900 }, { "epoch": 0.060019456856893166, "grad_norm": 0.6959146857261658, "learning_rate": 1.4985163204747776e-05, "loss": 0.9677, "step": 910 }, { "epoch": 0.06067901132784804, "grad_norm": 0.5942516922950745, "learning_rate": 1.5150016485328059e-05, "loss": 0.9827, "step": 920 }, { "epoch": 0.06133856579880291, "grad_norm": 0.545923113822937, "learning_rate": 1.5314869765908343e-05, "loss": 0.952, "step": 930 }, { "epoch": 0.061998120269757775, "grad_norm": 0.6769543886184692, "learning_rate": 1.5479723046488627e-05, "loss": 0.9755, "step": 940 }, { "epoch": 0.06265767474071265, "grad_norm": 0.6097630262374878, "learning_rate": 1.564457632706891e-05, "loss": 1.004, "step": 950 }, { "epoch": 0.06331722921166752, "grad_norm": 0.6021948456764221, "learning_rate": 1.5809429607649192e-05, "loss": 0.9628, "step": 960 }, { "epoch": 0.06397678368262238, "grad_norm": 0.6364136934280396, "learning_rate": 1.5974282888229477e-05, "loss": 0.9575, "step": 970 }, { "epoch": 0.06463633815357726, "grad_norm": 0.6477786302566528, "learning_rate": 1.613913616880976e-05, "loss": 1.0281, "step": 980 }, { "epoch": 0.06529589262453213, "grad_norm": 0.6194107532501221, "learning_rate": 1.6303989449390045e-05, "loss": 1.0091, "step": 990 }, { "epoch": 0.065955447095487, "grad_norm": 0.6899977922439575, "learning_rate": 1.6468842729970326e-05, "loss": 0.9559, "step": 1000 }, { "epoch": 0.06661500156644187, "grad_norm": 0.6325615048408508, "learning_rate": 1.663369601055061e-05, "loss": 0.9716, "step": 1010 }, { "epoch": 0.06727455603739674, "grad_norm": 0.5693467259407043, "learning_rate": 1.6798549291130895e-05, "loss": 0.981, "step": 1020 }, { "epoch": 0.0679341105083516, "grad_norm": 0.6441985368728638, "learning_rate": 1.696340257171118e-05, "loss": 0.9821, "step": 1030 }, { "epoch": 0.06859366497930648, "grad_norm": 0.6741989850997925, "learning_rate": 1.712825585229146e-05, "loss": 0.9725, "step": 1040 }, { "epoch": 0.06925321945026135, "grad_norm": 0.6150327920913696, "learning_rate": 1.7293109132871744e-05, "loss": 0.9871, "step": 1050 }, { "epoch": 0.06991277392121621, "grad_norm": 0.5626732110977173, "learning_rate": 1.745796241345203e-05, "loss": 0.9707, "step": 1060 }, { "epoch": 0.07057232839217109, "grad_norm": 0.5015641450881958, "learning_rate": 1.7622815694032313e-05, "loss": 0.974, "step": 1070 }, { "epoch": 0.07123188286312596, "grad_norm": 0.5498797297477722, "learning_rate": 1.7787668974612594e-05, "loss": 0.9254, "step": 1080 }, { "epoch": 0.07189143733408083, "grad_norm": 0.6015375852584839, "learning_rate": 1.795252225519288e-05, "loss": 0.9425, "step": 1090 }, { "epoch": 0.0725509918050357, "grad_norm": 0.6349003911018372, "learning_rate": 1.8117375535773163e-05, "loss": 0.9542, "step": 1100 }, { "epoch": 0.07321054627599057, "grad_norm": 0.5576942563056946, "learning_rate": 1.8282228816353447e-05, "loss": 0.9377, "step": 1110 }, { "epoch": 0.07387010074694544, "grad_norm": 0.5731057524681091, "learning_rate": 1.8447082096933728e-05, "loss": 0.9705, "step": 1120 }, { "epoch": 0.07452965521790031, "grad_norm": 0.5626952648162842, "learning_rate": 1.8611935377514016e-05, "loss": 0.9557, "step": 1130 }, { "epoch": 0.07518920968885517, "grad_norm": 0.5902084708213806, "learning_rate": 1.8776788658094296e-05, "loss": 0.9629, "step": 1140 }, { "epoch": 0.07584876415981005, "grad_norm": 0.640737771987915, "learning_rate": 1.894164193867458e-05, "loss": 1.0292, "step": 1150 }, { "epoch": 0.07650831863076492, "grad_norm": 0.5191999673843384, "learning_rate": 1.9106495219254865e-05, "loss": 1.0054, "step": 1160 }, { "epoch": 0.07716787310171978, "grad_norm": 0.5990976095199585, "learning_rate": 1.927134849983515e-05, "loss": 0.9793, "step": 1170 }, { "epoch": 0.07782742757267466, "grad_norm": 0.5550162196159363, "learning_rate": 1.943620178041543e-05, "loss": 0.9916, "step": 1180 }, { "epoch": 0.07848698204362953, "grad_norm": 0.6130297183990479, "learning_rate": 1.9601055060995715e-05, "loss": 0.9418, "step": 1190 }, { "epoch": 0.07914653651458439, "grad_norm": 0.6504642963409424, "learning_rate": 1.9765908341576e-05, "loss": 0.9064, "step": 1200 }, { "epoch": 0.07980609098553927, "grad_norm": 0.6972726583480835, "learning_rate": 1.9930761622156283e-05, "loss": 0.9085, "step": 1210 }, { "epoch": 0.08046564545649414, "grad_norm": 0.5826172828674316, "learning_rate": 2.0095614902736564e-05, "loss": 0.9312, "step": 1220 }, { "epoch": 0.081125199927449, "grad_norm": 0.6361492872238159, "learning_rate": 2.026046818331685e-05, "loss": 0.9591, "step": 1230 }, { "epoch": 0.08178475439840388, "grad_norm": 0.6554346084594727, "learning_rate": 2.0425321463897133e-05, "loss": 0.9251, "step": 1240 }, { "epoch": 0.08244430886935875, "grad_norm": 0.5757082104682922, "learning_rate": 2.0590174744477417e-05, "loss": 0.9673, "step": 1250 }, { "epoch": 0.08310386334031362, "grad_norm": 0.7459129095077515, "learning_rate": 2.0755028025057698e-05, "loss": 0.9309, "step": 1260 }, { "epoch": 0.08376341781126849, "grad_norm": 0.670549750328064, "learning_rate": 2.0919881305637982e-05, "loss": 0.9372, "step": 1270 }, { "epoch": 0.08442297228222335, "grad_norm": 0.6436546444892883, "learning_rate": 2.1084734586218267e-05, "loss": 0.9501, "step": 1280 }, { "epoch": 0.08508252675317823, "grad_norm": 0.6000171303749084, "learning_rate": 2.124958786679855e-05, "loss": 0.9261, "step": 1290 }, { "epoch": 0.0857420812241331, "grad_norm": 0.6313101053237915, "learning_rate": 2.1414441147378832e-05, "loss": 0.9222, "step": 1300 }, { "epoch": 0.08640163569508796, "grad_norm": 0.833135724067688, "learning_rate": 2.157929442795912e-05, "loss": 0.9619, "step": 1310 }, { "epoch": 0.08706119016604284, "grad_norm": 0.5731329321861267, "learning_rate": 2.17441477085394e-05, "loss": 0.967, "step": 1320 }, { "epoch": 0.08772074463699771, "grad_norm": 0.6162570714950562, "learning_rate": 2.1909000989119685e-05, "loss": 0.9502, "step": 1330 }, { "epoch": 0.08838029910795257, "grad_norm": 0.6271604895591736, "learning_rate": 2.2073854269699966e-05, "loss": 0.9719, "step": 1340 }, { "epoch": 0.08903985357890745, "grad_norm": 0.7032260894775391, "learning_rate": 2.2238707550280253e-05, "loss": 0.9074, "step": 1350 }, { "epoch": 0.08969940804986232, "grad_norm": 0.62782222032547, "learning_rate": 2.2403560830860534e-05, "loss": 0.9138, "step": 1360 }, { "epoch": 0.09035896252081718, "grad_norm": 0.5636465549468994, "learning_rate": 2.256841411144082e-05, "loss": 0.9307, "step": 1370 }, { "epoch": 0.09101851699177206, "grad_norm": 0.6232348084449768, "learning_rate": 2.2733267392021103e-05, "loss": 0.9217, "step": 1380 }, { "epoch": 0.09167807146272693, "grad_norm": 0.5922995209693909, "learning_rate": 2.2898120672601387e-05, "loss": 0.9524, "step": 1390 }, { "epoch": 0.09233762593368179, "grad_norm": 0.5596097111701965, "learning_rate": 2.3062973953181668e-05, "loss": 0.9447, "step": 1400 }, { "epoch": 0.09299718040463667, "grad_norm": 0.5623578429222107, "learning_rate": 2.3227827233761952e-05, "loss": 0.9515, "step": 1410 }, { "epoch": 0.09365673487559154, "grad_norm": 0.6712508797645569, "learning_rate": 2.3392680514342237e-05, "loss": 0.9612, "step": 1420 }, { "epoch": 0.09431628934654641, "grad_norm": 0.781801164150238, "learning_rate": 2.355753379492252e-05, "loss": 0.9395, "step": 1430 }, { "epoch": 0.09497584381750128, "grad_norm": 0.6273669004440308, "learning_rate": 2.3722387075502802e-05, "loss": 0.9254, "step": 1440 }, { "epoch": 0.09563539828845614, "grad_norm": 0.5787047743797302, "learning_rate": 2.3887240356083086e-05, "loss": 0.8994, "step": 1450 }, { "epoch": 0.09629495275941102, "grad_norm": 0.5493133664131165, "learning_rate": 2.405209363666337e-05, "loss": 0.8898, "step": 1460 }, { "epoch": 0.09695450723036589, "grad_norm": 0.5380603671073914, "learning_rate": 2.4216946917243655e-05, "loss": 0.8835, "step": 1470 }, { "epoch": 0.09761406170132075, "grad_norm": 0.6819421052932739, "learning_rate": 2.4381800197823936e-05, "loss": 0.9235, "step": 1480 }, { "epoch": 0.09827361617227563, "grad_norm": 0.5991073250770569, "learning_rate": 2.454665347840422e-05, "loss": 0.8996, "step": 1490 }, { "epoch": 0.0989331706432305, "grad_norm": 0.6017669439315796, "learning_rate": 2.4711506758984505e-05, "loss": 0.946, "step": 1500 }, { "epoch": 0.09959272511418536, "grad_norm": 0.6362821459770203, "learning_rate": 2.487636003956479e-05, "loss": 0.9609, "step": 1510 }, { "epoch": 0.10025227958514024, "grad_norm": 0.6949606537818909, "learning_rate": 2.5041213320145073e-05, "loss": 0.9661, "step": 1520 }, { "epoch": 0.1009118340560951, "grad_norm": 0.6807888746261597, "learning_rate": 2.5206066600725354e-05, "loss": 0.9111, "step": 1530 }, { "epoch": 0.10157138852704997, "grad_norm": 0.6306922435760498, "learning_rate": 2.537091988130564e-05, "loss": 0.9029, "step": 1540 }, { "epoch": 0.10223094299800485, "grad_norm": 0.578364372253418, "learning_rate": 2.5535773161885923e-05, "loss": 0.9389, "step": 1550 }, { "epoch": 0.10289049746895972, "grad_norm": 0.5773734450340271, "learning_rate": 2.5700626442466207e-05, "loss": 0.944, "step": 1560 }, { "epoch": 0.10355005193991458, "grad_norm": 0.7100627422332764, "learning_rate": 2.5865479723046488e-05, "loss": 0.9705, "step": 1570 }, { "epoch": 0.10420960641086946, "grad_norm": 0.6515047550201416, "learning_rate": 2.6030333003626772e-05, "loss": 0.9532, "step": 1580 }, { "epoch": 0.10486916088182432, "grad_norm": 0.6922847032546997, "learning_rate": 2.6195186284207057e-05, "loss": 0.9124, "step": 1590 }, { "epoch": 0.1055287153527792, "grad_norm": 0.7992035150527954, "learning_rate": 2.636003956478734e-05, "loss": 0.9313, "step": 1600 }, { "epoch": 0.10618826982373407, "grad_norm": 0.6953135132789612, "learning_rate": 2.6524892845367622e-05, "loss": 0.9398, "step": 1610 }, { "epoch": 0.10684782429468893, "grad_norm": 0.7350666522979736, "learning_rate": 2.6689746125947906e-05, "loss": 0.8814, "step": 1620 }, { "epoch": 0.10750737876564381, "grad_norm": 0.7054291367530823, "learning_rate": 2.685459940652819e-05, "loss": 0.9421, "step": 1630 }, { "epoch": 0.10816693323659868, "grad_norm": 0.5834742784500122, "learning_rate": 2.7019452687108475e-05, "loss": 0.9328, "step": 1640 }, { "epoch": 0.10882648770755354, "grad_norm": 0.6302939653396606, "learning_rate": 2.7184305967688756e-05, "loss": 1.0036, "step": 1650 }, { "epoch": 0.10948604217850842, "grad_norm": 0.6931148767471313, "learning_rate": 2.734915924826904e-05, "loss": 0.9166, "step": 1660 }, { "epoch": 0.11014559664946329, "grad_norm": 0.5533245801925659, "learning_rate": 2.7514012528849324e-05, "loss": 0.8807, "step": 1670 }, { "epoch": 0.11080515112041815, "grad_norm": 0.6529316306114197, "learning_rate": 2.7678865809429612e-05, "loss": 0.8954, "step": 1680 }, { "epoch": 0.11146470559137303, "grad_norm": 0.567509114742279, "learning_rate": 2.784371909000989e-05, "loss": 0.878, "step": 1690 }, { "epoch": 0.1121242600623279, "grad_norm": 0.6449151039123535, "learning_rate": 2.8008572370590174e-05, "loss": 0.9475, "step": 1700 }, { "epoch": 0.11278381453328276, "grad_norm": 0.6593036651611328, "learning_rate": 2.8173425651170458e-05, "loss": 0.9124, "step": 1710 }, { "epoch": 0.11344336900423764, "grad_norm": 0.5768759846687317, "learning_rate": 2.8338278931750746e-05, "loss": 0.9553, "step": 1720 }, { "epoch": 0.1141029234751925, "grad_norm": 0.7477861046791077, "learning_rate": 2.8503132212331023e-05, "loss": 0.9001, "step": 1730 }, { "epoch": 0.11476247794614738, "grad_norm": 0.6525564193725586, "learning_rate": 2.8667985492911308e-05, "loss": 0.9107, "step": 1740 }, { "epoch": 0.11542203241710225, "grad_norm": 0.6128164529800415, "learning_rate": 2.8832838773491595e-05, "loss": 0.95, "step": 1750 }, { "epoch": 0.11608158688805711, "grad_norm": 0.5953107476234436, "learning_rate": 2.899769205407188e-05, "loss": 0.9213, "step": 1760 }, { "epoch": 0.116741141359012, "grad_norm": 0.679503858089447, "learning_rate": 2.9162545334652157e-05, "loss": 0.9104, "step": 1770 }, { "epoch": 0.11740069582996686, "grad_norm": 0.6580171585083008, "learning_rate": 2.932739861523244e-05, "loss": 0.9161, "step": 1780 }, { "epoch": 0.11806025030092172, "grad_norm": 0.6093540787696838, "learning_rate": 2.949225189581273e-05, "loss": 0.9435, "step": 1790 }, { "epoch": 0.1187198047718766, "grad_norm": 0.5813942551612854, "learning_rate": 2.9657105176393013e-05, "loss": 0.9012, "step": 1800 }, { "epoch": 0.11937935924283147, "grad_norm": 0.6638506650924683, "learning_rate": 2.9821958456973298e-05, "loss": 0.9067, "step": 1810 }, { "epoch": 0.12003891371378633, "grad_norm": 0.5925025343894958, "learning_rate": 2.998681173755358e-05, "loss": 0.9353, "step": 1820 }, { "epoch": 0.12069846818474121, "grad_norm": 0.688334584236145, "learning_rate": 3.0151665018133863e-05, "loss": 0.935, "step": 1830 }, { "epoch": 0.12135802265569608, "grad_norm": 0.6738244891166687, "learning_rate": 3.0316518298714147e-05, "loss": 0.9315, "step": 1840 }, { "epoch": 0.12201757712665094, "grad_norm": 0.6192309856414795, "learning_rate": 3.048137157929443e-05, "loss": 0.923, "step": 1850 }, { "epoch": 0.12267713159760582, "grad_norm": 0.6709905862808228, "learning_rate": 3.064622485987471e-05, "loss": 0.8941, "step": 1860 }, { "epoch": 0.12333668606856069, "grad_norm": 0.6975002288818359, "learning_rate": 3.0811078140455e-05, "loss": 0.95, "step": 1870 }, { "epoch": 0.12399624053951555, "grad_norm": 0.6484049558639526, "learning_rate": 3.0975931421035285e-05, "loss": 0.8955, "step": 1880 }, { "epoch": 0.12465579501047043, "grad_norm": 0.5713815093040466, "learning_rate": 3.1140784701615565e-05, "loss": 0.8926, "step": 1890 }, { "epoch": 0.1253153494814253, "grad_norm": 0.7232243418693542, "learning_rate": 3.1305637982195846e-05, "loss": 0.9017, "step": 1900 }, { "epoch": 0.12597490395238017, "grad_norm": 0.604157030582428, "learning_rate": 3.147049126277613e-05, "loss": 0.9463, "step": 1910 }, { "epoch": 0.12663445842333504, "grad_norm": 0.6578916311264038, "learning_rate": 3.1635344543356415e-05, "loss": 0.9106, "step": 1920 }, { "epoch": 0.1272940128942899, "grad_norm": 0.6498791575431824, "learning_rate": 3.18001978239367e-05, "loss": 0.8915, "step": 1930 }, { "epoch": 0.12795356736524477, "grad_norm": 0.5826857686042786, "learning_rate": 3.196505110451698e-05, "loss": 0.8851, "step": 1940 }, { "epoch": 0.12861312183619963, "grad_norm": 0.622488796710968, "learning_rate": 3.2129904385097265e-05, "loss": 0.8704, "step": 1950 }, { "epoch": 0.12927267630715453, "grad_norm": 0.5181499719619751, "learning_rate": 3.229475766567755e-05, "loss": 0.925, "step": 1960 }, { "epoch": 0.1299322307781094, "grad_norm": 0.6072003841400146, "learning_rate": 3.245961094625783e-05, "loss": 0.8958, "step": 1970 }, { "epoch": 0.13059178524906426, "grad_norm": 0.5885782241821289, "learning_rate": 3.2624464226838114e-05, "loss": 0.9435, "step": 1980 }, { "epoch": 0.13125133972001912, "grad_norm": 0.6380855441093445, "learning_rate": 3.27893175074184e-05, "loss": 0.9126, "step": 1990 }, { "epoch": 0.131910894190974, "grad_norm": 0.6280835866928101, "learning_rate": 3.295417078799868e-05, "loss": 0.9274, "step": 2000 }, { "epoch": 0.13257044866192888, "grad_norm": 0.6791607141494751, "learning_rate": 3.311902406857897e-05, "loss": 0.9361, "step": 2010 }, { "epoch": 0.13323000313288375, "grad_norm": 0.6630708575248718, "learning_rate": 3.328387734915925e-05, "loss": 0.9806, "step": 2020 }, { "epoch": 0.1338895576038386, "grad_norm": 0.6305481195449829, "learning_rate": 3.344873062973953e-05, "loss": 0.9137, "step": 2030 }, { "epoch": 0.13454911207479348, "grad_norm": 0.7444617748260498, "learning_rate": 3.361358391031982e-05, "loss": 0.92, "step": 2040 }, { "epoch": 0.13520866654574834, "grad_norm": 0.6238999962806702, "learning_rate": 3.37784371909001e-05, "loss": 0.9301, "step": 2050 }, { "epoch": 0.1358682210167032, "grad_norm": 0.6913322806358337, "learning_rate": 3.394329047148038e-05, "loss": 0.909, "step": 2060 }, { "epoch": 0.1365277754876581, "grad_norm": 0.6888453960418701, "learning_rate": 3.410814375206067e-05, "loss": 0.8859, "step": 2070 }, { "epoch": 0.13718732995861296, "grad_norm": 0.644161581993103, "learning_rate": 3.427299703264095e-05, "loss": 0.8954, "step": 2080 }, { "epoch": 0.13784688442956783, "grad_norm": 0.6524642705917358, "learning_rate": 3.443785031322124e-05, "loss": 0.8958, "step": 2090 }, { "epoch": 0.1385064389005227, "grad_norm": 0.6420117020606995, "learning_rate": 3.460270359380152e-05, "loss": 0.9183, "step": 2100 }, { "epoch": 0.13916599337147756, "grad_norm": 0.595023512840271, "learning_rate": 3.47675568743818e-05, "loss": 0.9226, "step": 2110 }, { "epoch": 0.13982554784243242, "grad_norm": 0.6285088062286377, "learning_rate": 3.493241015496209e-05, "loss": 0.8814, "step": 2120 }, { "epoch": 0.14048510231338732, "grad_norm": 0.6196804642677307, "learning_rate": 3.509726343554237e-05, "loss": 0.8598, "step": 2130 }, { "epoch": 0.14114465678434218, "grad_norm": 0.599234938621521, "learning_rate": 3.526211671612265e-05, "loss": 0.8996, "step": 2140 }, { "epoch": 0.14180421125529705, "grad_norm": 0.5277974009513855, "learning_rate": 3.542696999670294e-05, "loss": 0.9318, "step": 2150 }, { "epoch": 0.1424637657262519, "grad_norm": 0.6323888897895813, "learning_rate": 3.559182327728322e-05, "loss": 0.8764, "step": 2160 }, { "epoch": 0.14312332019720678, "grad_norm": 0.6974547505378723, "learning_rate": 3.5756676557863506e-05, "loss": 0.9063, "step": 2170 }, { "epoch": 0.14378287466816167, "grad_norm": 0.6774551272392273, "learning_rate": 3.592152983844379e-05, "loss": 0.8704, "step": 2180 }, { "epoch": 0.14444242913911653, "grad_norm": 0.673445999622345, "learning_rate": 3.608638311902407e-05, "loss": 0.9432, "step": 2190 }, { "epoch": 0.1451019836100714, "grad_norm": 0.6598107218742371, "learning_rate": 3.6251236399604355e-05, "loss": 0.9339, "step": 2200 }, { "epoch": 0.14576153808102627, "grad_norm": 0.6883885264396667, "learning_rate": 3.6416089680184636e-05, "loss": 0.9098, "step": 2210 }, { "epoch": 0.14642109255198113, "grad_norm": 0.6021624207496643, "learning_rate": 3.658094296076492e-05, "loss": 0.8606, "step": 2220 }, { "epoch": 0.147080647022936, "grad_norm": 0.5796533226966858, "learning_rate": 3.6745796241345205e-05, "loss": 0.899, "step": 2230 }, { "epoch": 0.1477402014938909, "grad_norm": 0.6242685317993164, "learning_rate": 3.6910649521925486e-05, "loss": 0.9218, "step": 2240 }, { "epoch": 0.14839975596484575, "grad_norm": 0.7199188470840454, "learning_rate": 3.7075502802505774e-05, "loss": 0.8937, "step": 2250 }, { "epoch": 0.14905931043580062, "grad_norm": 0.694506049156189, "learning_rate": 3.7240356083086054e-05, "loss": 0.8794, "step": 2260 }, { "epoch": 0.14971886490675548, "grad_norm": 0.6348100304603577, "learning_rate": 3.7405209363666335e-05, "loss": 0.9325, "step": 2270 }, { "epoch": 0.15037841937771035, "grad_norm": 0.730395495891571, "learning_rate": 3.757006264424662e-05, "loss": 0.9095, "step": 2280 }, { "epoch": 0.1510379738486652, "grad_norm": 0.639094352722168, "learning_rate": 3.773491592482691e-05, "loss": 0.8881, "step": 2290 }, { "epoch": 0.1516975283196201, "grad_norm": 0.7560307383537292, "learning_rate": 3.7899769205407185e-05, "loss": 0.8777, "step": 2300 }, { "epoch": 0.15235708279057497, "grad_norm": 0.6120066046714783, "learning_rate": 3.806462248598747e-05, "loss": 0.9273, "step": 2310 }, { "epoch": 0.15301663726152984, "grad_norm": 0.6584673523902893, "learning_rate": 3.822947576656776e-05, "loss": 0.9507, "step": 2320 }, { "epoch": 0.1536761917324847, "grad_norm": 0.7297015190124512, "learning_rate": 3.839432904714804e-05, "loss": 0.9074, "step": 2330 }, { "epoch": 0.15433574620343957, "grad_norm": 0.6097325682640076, "learning_rate": 3.855918232772832e-05, "loss": 0.9008, "step": 2340 }, { "epoch": 0.15499530067439446, "grad_norm": 0.6594650149345398, "learning_rate": 3.87240356083086e-05, "loss": 0.9434, "step": 2350 }, { "epoch": 0.15565485514534932, "grad_norm": 0.7125281691551208, "learning_rate": 3.888888888888889e-05, "loss": 0.8952, "step": 2360 }, { "epoch": 0.1563144096163042, "grad_norm": 0.6079130172729492, "learning_rate": 3.905374216946918e-05, "loss": 0.8867, "step": 2370 }, { "epoch": 0.15697396408725905, "grad_norm": 0.6090205907821655, "learning_rate": 3.921859545004945e-05, "loss": 0.8838, "step": 2380 }, { "epoch": 0.15763351855821392, "grad_norm": 0.7213959097862244, "learning_rate": 3.938344873062974e-05, "loss": 0.8975, "step": 2390 }, { "epoch": 0.15829307302916878, "grad_norm": 0.7012157440185547, "learning_rate": 3.954830201121003e-05, "loss": 0.9334, "step": 2400 }, { "epoch": 0.15895262750012368, "grad_norm": 0.8252381682395935, "learning_rate": 3.971315529179031e-05, "loss": 0.9535, "step": 2410 }, { "epoch": 0.15961218197107854, "grad_norm": 0.7257195711135864, "learning_rate": 3.987800857237059e-05, "loss": 0.92, "step": 2420 }, { "epoch": 0.1602717364420334, "grad_norm": 0.6549400091171265, "learning_rate": 4.004286185295088e-05, "loss": 0.9118, "step": 2430 }, { "epoch": 0.16093129091298827, "grad_norm": 0.6405781507492065, "learning_rate": 4.020771513353116e-05, "loss": 0.8934, "step": 2440 }, { "epoch": 0.16159084538394314, "grad_norm": 0.6783310770988464, "learning_rate": 4.0372568414111446e-05, "loss": 0.9367, "step": 2450 }, { "epoch": 0.162250399854898, "grad_norm": 0.6306205987930298, "learning_rate": 4.053742169469173e-05, "loss": 0.8781, "step": 2460 }, { "epoch": 0.1629099543258529, "grad_norm": 0.7541260123252869, "learning_rate": 4.070227497527201e-05, "loss": 0.9119, "step": 2470 }, { "epoch": 0.16356950879680776, "grad_norm": 0.6016479134559631, "learning_rate": 4.0867128255852296e-05, "loss": 0.9254, "step": 2480 }, { "epoch": 0.16422906326776263, "grad_norm": 0.6229802966117859, "learning_rate": 4.103198153643258e-05, "loss": 0.8907, "step": 2490 }, { "epoch": 0.1648886177387175, "grad_norm": 0.5935705900192261, "learning_rate": 4.119683481701286e-05, "loss": 0.901, "step": 2500 }, { "epoch": 0.16554817220967236, "grad_norm": 0.7060070037841797, "learning_rate": 4.1361688097593145e-05, "loss": 0.8912, "step": 2510 }, { "epoch": 0.16620772668062725, "grad_norm": 0.704928457736969, "learning_rate": 4.1526541378173426e-05, "loss": 0.9041, "step": 2520 }, { "epoch": 0.16686728115158211, "grad_norm": 0.7448880672454834, "learning_rate": 4.1691394658753714e-05, "loss": 0.8736, "step": 2530 }, { "epoch": 0.16752683562253698, "grad_norm": 0.6432943940162659, "learning_rate": 4.1856247939333995e-05, "loss": 0.8916, "step": 2540 }, { "epoch": 0.16818639009349184, "grad_norm": 0.692283034324646, "learning_rate": 4.2021101219914276e-05, "loss": 0.8787, "step": 2550 }, { "epoch": 0.1688459445644467, "grad_norm": 0.6590387225151062, "learning_rate": 4.2185954500494563e-05, "loss": 0.8528, "step": 2560 }, { "epoch": 0.16950549903540157, "grad_norm": 0.6531996726989746, "learning_rate": 4.2350807781074844e-05, "loss": 0.8857, "step": 2570 }, { "epoch": 0.17016505350635647, "grad_norm": 0.6231783628463745, "learning_rate": 4.2515661061655125e-05, "loss": 0.9153, "step": 2580 }, { "epoch": 0.17082460797731133, "grad_norm": 0.6619783639907837, "learning_rate": 4.268051434223541e-05, "loss": 0.929, "step": 2590 }, { "epoch": 0.1714841624482662, "grad_norm": 0.6574437618255615, "learning_rate": 4.2845367622815694e-05, "loss": 0.9076, "step": 2600 }, { "epoch": 0.17214371691922106, "grad_norm": 0.6736916303634644, "learning_rate": 4.301022090339598e-05, "loss": 0.9293, "step": 2610 }, { "epoch": 0.17280327139017593, "grad_norm": 0.7516410946846008, "learning_rate": 4.317507418397626e-05, "loss": 0.9069, "step": 2620 }, { "epoch": 0.1734628258611308, "grad_norm": 0.5540705919265747, "learning_rate": 4.3339927464556543e-05, "loss": 0.8764, "step": 2630 }, { "epoch": 0.17412238033208569, "grad_norm": 0.6677078008651733, "learning_rate": 4.350478074513683e-05, "loss": 0.9231, "step": 2640 }, { "epoch": 0.17478193480304055, "grad_norm": 0.5827996730804443, "learning_rate": 4.366963402571712e-05, "loss": 0.8681, "step": 2650 }, { "epoch": 0.17544148927399542, "grad_norm": 0.6666778922080994, "learning_rate": 4.383448730629739e-05, "loss": 0.9257, "step": 2660 }, { "epoch": 0.17610104374495028, "grad_norm": 0.5299721956253052, "learning_rate": 4.399934058687768e-05, "loss": 0.9241, "step": 2670 }, { "epoch": 0.17676059821590515, "grad_norm": 0.5545915365219116, "learning_rate": 4.416419386745796e-05, "loss": 0.862, "step": 2680 }, { "epoch": 0.17742015268686004, "grad_norm": 0.6056398749351501, "learning_rate": 4.432904714803825e-05, "loss": 0.9276, "step": 2690 }, { "epoch": 0.1780797071578149, "grad_norm": 0.740776538848877, "learning_rate": 4.449390042861853e-05, "loss": 0.9382, "step": 2700 }, { "epoch": 0.17873926162876977, "grad_norm": 0.6620045900344849, "learning_rate": 4.465875370919881e-05, "loss": 0.9182, "step": 2710 }, { "epoch": 0.17939881609972463, "grad_norm": 0.6999577879905701, "learning_rate": 4.48236069897791e-05, "loss": 0.9137, "step": 2720 }, { "epoch": 0.1800583705706795, "grad_norm": 0.6420654058456421, "learning_rate": 4.4988460270359386e-05, "loss": 0.9166, "step": 2730 }, { "epoch": 0.18071792504163436, "grad_norm": 0.7118934392929077, "learning_rate": 4.515331355093966e-05, "loss": 0.9266, "step": 2740 }, { "epoch": 0.18137747951258926, "grad_norm": 0.5614007115364075, "learning_rate": 4.531816683151995e-05, "loss": 0.9392, "step": 2750 }, { "epoch": 0.18203703398354412, "grad_norm": 0.6258883476257324, "learning_rate": 4.5483020112100236e-05, "loss": 0.8736, "step": 2760 }, { "epoch": 0.182696588454499, "grad_norm": 0.5963823795318604, "learning_rate": 4.564787339268052e-05, "loss": 0.8908, "step": 2770 }, { "epoch": 0.18335614292545385, "grad_norm": 0.703870415687561, "learning_rate": 4.58127266732608e-05, "loss": 0.9045, "step": 2780 }, { "epoch": 0.18401569739640872, "grad_norm": 0.5508717894554138, "learning_rate": 4.5977579953841086e-05, "loss": 0.8899, "step": 2790 }, { "epoch": 0.18467525186736358, "grad_norm": 0.6593130230903625, "learning_rate": 4.6142433234421366e-05, "loss": 0.9196, "step": 2800 }, { "epoch": 0.18533480633831848, "grad_norm": 0.6316845417022705, "learning_rate": 4.6307286515001654e-05, "loss": 0.8912, "step": 2810 }, { "epoch": 0.18599436080927334, "grad_norm": 0.6781242489814758, "learning_rate": 4.647213979558193e-05, "loss": 0.8971, "step": 2820 }, { "epoch": 0.1866539152802282, "grad_norm": 0.6441785097122192, "learning_rate": 4.6636993076162216e-05, "loss": 0.917, "step": 2830 }, { "epoch": 0.18731346975118307, "grad_norm": 0.636024534702301, "learning_rate": 4.6801846356742504e-05, "loss": 0.8869, "step": 2840 }, { "epoch": 0.18797302422213794, "grad_norm": 0.5574973821640015, "learning_rate": 4.6966699637322785e-05, "loss": 0.8681, "step": 2850 }, { "epoch": 0.18863257869309283, "grad_norm": 0.6535305976867676, "learning_rate": 4.713155291790307e-05, "loss": 0.8982, "step": 2860 }, { "epoch": 0.1892921331640477, "grad_norm": 0.5861814618110657, "learning_rate": 4.729640619848335e-05, "loss": 0.9378, "step": 2870 }, { "epoch": 0.18995168763500256, "grad_norm": 0.6891550421714783, "learning_rate": 4.7461259479063634e-05, "loss": 0.9189, "step": 2880 }, { "epoch": 0.19061124210595742, "grad_norm": 0.5748773813247681, "learning_rate": 4.762611275964392e-05, "loss": 0.8761, "step": 2890 }, { "epoch": 0.1912707965769123, "grad_norm": 0.673592209815979, "learning_rate": 4.77909660402242e-05, "loss": 0.8495, "step": 2900 }, { "epoch": 0.19193035104786715, "grad_norm": 0.539668083190918, "learning_rate": 4.7955819320804484e-05, "loss": 0.8722, "step": 2910 }, { "epoch": 0.19258990551882205, "grad_norm": 0.6330294013023376, "learning_rate": 4.812067260138477e-05, "loss": 0.9154, "step": 2920 }, { "epoch": 0.1932494599897769, "grad_norm": 0.7211683392524719, "learning_rate": 4.828552588196505e-05, "loss": 0.8343, "step": 2930 }, { "epoch": 0.19390901446073178, "grad_norm": 0.6515793800354004, "learning_rate": 4.845037916254534e-05, "loss": 0.9021, "step": 2940 }, { "epoch": 0.19456856893168664, "grad_norm": 0.6246060132980347, "learning_rate": 4.861523244312562e-05, "loss": 0.8516, "step": 2950 }, { "epoch": 0.1952281234026415, "grad_norm": 0.6002302765846252, "learning_rate": 4.87800857237059e-05, "loss": 0.862, "step": 2960 }, { "epoch": 0.19588767787359637, "grad_norm": 0.6544924378395081, "learning_rate": 4.894493900428619e-05, "loss": 0.8543, "step": 2970 }, { "epoch": 0.19654723234455126, "grad_norm": 0.6579413414001465, "learning_rate": 4.910979228486647e-05, "loss": 0.9218, "step": 2980 }, { "epoch": 0.19720678681550613, "grad_norm": 0.6065923571586609, "learning_rate": 4.927464556544675e-05, "loss": 0.8727, "step": 2990 }, { "epoch": 0.197866341286461, "grad_norm": 0.8123323917388916, "learning_rate": 4.943949884602704e-05, "loss": 0.911, "step": 3000 }, { "epoch": 0.19852589575741586, "grad_norm": 0.6149587631225586, "learning_rate": 4.960435212660732e-05, "loss": 0.8881, "step": 3010 }, { "epoch": 0.19918545022837073, "grad_norm": 0.6561806201934814, "learning_rate": 4.976920540718761e-05, "loss": 0.8765, "step": 3020 }, { "epoch": 0.19984500469932562, "grad_norm": 0.5923125743865967, "learning_rate": 4.993405868776789e-05, "loss": 0.9018, "step": 3030 }, { "epoch": 0.20050455917028048, "grad_norm": 0.616742730140686, "learning_rate": 4.999999403688283e-05, "loss": 0.8803, "step": 3040 }, { "epoch": 0.20116411364123535, "grad_norm": 0.5280983448028564, "learning_rate": 4.9999957595621525e-05, "loss": 0.9116, "step": 3050 }, { "epoch": 0.2018236681121902, "grad_norm": 0.5969537496566772, "learning_rate": 4.999988802599003e-05, "loss": 0.8889, "step": 3060 }, { "epoch": 0.20248322258314508, "grad_norm": 0.6396189332008362, "learning_rate": 4.999978532808053e-05, "loss": 0.8778, "step": 3070 }, { "epoch": 0.20314277705409994, "grad_norm": 0.6153563857078552, "learning_rate": 4.99996495020291e-05, "loss": 0.8914, "step": 3080 }, { "epoch": 0.20380233152505484, "grad_norm": 0.6899839043617249, "learning_rate": 4.999948054801574e-05, "loss": 0.8639, "step": 3090 }, { "epoch": 0.2044618859960097, "grad_norm": 0.6921502351760864, "learning_rate": 4.999927846626434e-05, "loss": 0.8647, "step": 3100 }, { "epoch": 0.20512144046696457, "grad_norm": 0.6061365008354187, "learning_rate": 4.999904325704268e-05, "loss": 0.8942, "step": 3110 }, { "epoch": 0.20578099493791943, "grad_norm": 0.6922749280929565, "learning_rate": 4.999877492066245e-05, "loss": 0.8897, "step": 3120 }, { "epoch": 0.2064405494088743, "grad_norm": 0.6063687205314636, "learning_rate": 4.9998473457479234e-05, "loss": 0.8576, "step": 3130 }, { "epoch": 0.20710010387982916, "grad_norm": 0.6878769993782043, "learning_rate": 4.99981388678925e-05, "loss": 0.8683, "step": 3140 }, { "epoch": 0.20775965835078405, "grad_norm": 0.7053576707839966, "learning_rate": 4.999777115234563e-05, "loss": 0.8899, "step": 3150 }, { "epoch": 0.20841921282173892, "grad_norm": 0.6048821806907654, "learning_rate": 4.9997370311325904e-05, "loss": 0.8854, "step": 3160 }, { "epoch": 0.20907876729269378, "grad_norm": 0.5910998582839966, "learning_rate": 4.9996936345364485e-05, "loss": 0.8622, "step": 3170 }, { "epoch": 0.20973832176364865, "grad_norm": 0.5993175506591797, "learning_rate": 4.999646925503644e-05, "loss": 0.9227, "step": 3180 }, { "epoch": 0.21039787623460351, "grad_norm": 0.5830239653587341, "learning_rate": 4.999596904096073e-05, "loss": 0.883, "step": 3190 }, { "epoch": 0.2110574307055584, "grad_norm": 0.5703751444816589, "learning_rate": 4.99954357038002e-05, "loss": 0.9075, "step": 3200 }, { "epoch": 0.21171698517651327, "grad_norm": 0.6441596150398254, "learning_rate": 4.99948692442616e-05, "loss": 0.9032, "step": 3210 }, { "epoch": 0.21237653964746814, "grad_norm": 0.6591752171516418, "learning_rate": 4.999426966309557e-05, "loss": 0.8922, "step": 3220 }, { "epoch": 0.213036094118423, "grad_norm": 0.6065967679023743, "learning_rate": 4.999363696109663e-05, "loss": 0.8931, "step": 3230 }, { "epoch": 0.21369564858937787, "grad_norm": 0.5592631101608276, "learning_rate": 4.9992971139103194e-05, "loss": 0.8682, "step": 3240 }, { "epoch": 0.21435520306033273, "grad_norm": 0.5951939821243286, "learning_rate": 4.999227219799758e-05, "loss": 0.8172, "step": 3250 }, { "epoch": 0.21501475753128763, "grad_norm": 0.6042636036872864, "learning_rate": 4.9991540138705975e-05, "loss": 0.9106, "step": 3260 }, { "epoch": 0.2156743120022425, "grad_norm": 0.5808688998222351, "learning_rate": 4.999077496219845e-05, "loss": 0.926, "step": 3270 }, { "epoch": 0.21633386647319736, "grad_norm": 0.7375866174697876, "learning_rate": 4.998997666948898e-05, "loss": 0.9025, "step": 3280 }, { "epoch": 0.21699342094415222, "grad_norm": 0.5514616370201111, "learning_rate": 4.9989145261635415e-05, "loss": 0.9091, "step": 3290 }, { "epoch": 0.21765297541510709, "grad_norm": 0.5729548931121826, "learning_rate": 4.998828073973947e-05, "loss": 0.933, "step": 3300 }, { "epoch": 0.21831252988606198, "grad_norm": 0.644983172416687, "learning_rate": 4.998738310494676e-05, "loss": 0.8818, "step": 3310 }, { "epoch": 0.21897208435701684, "grad_norm": 0.5531386137008667, "learning_rate": 4.998645235844678e-05, "loss": 0.9143, "step": 3320 }, { "epoch": 0.2196316388279717, "grad_norm": 0.5550761222839355, "learning_rate": 4.9985488501472885e-05, "loss": 0.975, "step": 3330 }, { "epoch": 0.22029119329892657, "grad_norm": 0.5695024132728577, "learning_rate": 4.998449153530234e-05, "loss": 0.9387, "step": 3340 }, { "epoch": 0.22095074776988144, "grad_norm": 0.621046781539917, "learning_rate": 4.998346146125623e-05, "loss": 0.9138, "step": 3350 }, { "epoch": 0.2216103022408363, "grad_norm": 0.6257467269897461, "learning_rate": 4.9982398280699575e-05, "loss": 0.9066, "step": 3360 }, { "epoch": 0.2222698567117912, "grad_norm": 0.5778996348381042, "learning_rate": 4.998130199504122e-05, "loss": 0.906, "step": 3370 }, { "epoch": 0.22292941118274606, "grad_norm": 0.5707346200942993, "learning_rate": 4.998017260573389e-05, "loss": 0.9184, "step": 3380 }, { "epoch": 0.22358896565370093, "grad_norm": 0.6047236919403076, "learning_rate": 4.9979010114274195e-05, "loss": 0.8492, "step": 3390 }, { "epoch": 0.2242485201246558, "grad_norm": 0.6120710372924805, "learning_rate": 4.9977814522202574e-05, "loss": 0.8874, "step": 3400 }, { "epoch": 0.22490807459561066, "grad_norm": 0.5748836398124695, "learning_rate": 4.997658583110337e-05, "loss": 0.8931, "step": 3410 }, { "epoch": 0.22556762906656552, "grad_norm": 0.6227717399597168, "learning_rate": 4.997532404260476e-05, "loss": 0.8616, "step": 3420 }, { "epoch": 0.22622718353752042, "grad_norm": 0.5952726006507874, "learning_rate": 4.997402915837879e-05, "loss": 0.8802, "step": 3430 }, { "epoch": 0.22688673800847528, "grad_norm": 0.5556307435035706, "learning_rate": 4.997270118014135e-05, "loss": 0.9117, "step": 3440 }, { "epoch": 0.22754629247943015, "grad_norm": 0.6270772814750671, "learning_rate": 4.99713401096522e-05, "loss": 0.8638, "step": 3450 }, { "epoch": 0.228205846950385, "grad_norm": 0.5514934062957764, "learning_rate": 4.996994594871495e-05, "loss": 0.8688, "step": 3460 }, { "epoch": 0.22886540142133988, "grad_norm": 0.6048349738121033, "learning_rate": 4.9968518699177035e-05, "loss": 0.8659, "step": 3470 }, { "epoch": 0.22952495589229477, "grad_norm": 0.6672205924987793, "learning_rate": 4.996705836292979e-05, "loss": 0.884, "step": 3480 }, { "epoch": 0.23018451036324963, "grad_norm": 0.5777316093444824, "learning_rate": 4.996556494190832e-05, "loss": 0.9188, "step": 3490 }, { "epoch": 0.2308440648342045, "grad_norm": 0.5783727765083313, "learning_rate": 4.996403843809164e-05, "loss": 0.884, "step": 3500 }, { "epoch": 0.23150361930515936, "grad_norm": 0.5978668928146362, "learning_rate": 4.996247885350257e-05, "loss": 0.8894, "step": 3510 }, { "epoch": 0.23216317377611423, "grad_norm": 0.5237613320350647, "learning_rate": 4.9960886190207774e-05, "loss": 0.9107, "step": 3520 }, { "epoch": 0.2328227282470691, "grad_norm": 0.6135637760162354, "learning_rate": 4.9959260450317746e-05, "loss": 0.8781, "step": 3530 }, { "epoch": 0.233482282718024, "grad_norm": 0.6045156717300415, "learning_rate": 4.9957601635986815e-05, "loss": 0.9053, "step": 3540 }, { "epoch": 0.23414183718897885, "grad_norm": 0.6048551797866821, "learning_rate": 4.995590974941314e-05, "loss": 0.9054, "step": 3550 }, { "epoch": 0.23480139165993372, "grad_norm": 0.6331484317779541, "learning_rate": 4.99541847928387e-05, "loss": 0.8917, "step": 3560 }, { "epoch": 0.23546094613088858, "grad_norm": 0.5210322737693787, "learning_rate": 4.99524267685493e-05, "loss": 0.899, "step": 3570 }, { "epoch": 0.23612050060184345, "grad_norm": 0.6640326976776123, "learning_rate": 4.9950635678874564e-05, "loss": 0.8845, "step": 3580 }, { "epoch": 0.2367800550727983, "grad_norm": 0.7048312425613403, "learning_rate": 4.9948811526187935e-05, "loss": 0.8704, "step": 3590 }, { "epoch": 0.2374396095437532, "grad_norm": 0.5205860137939453, "learning_rate": 4.9946954312906645e-05, "loss": 0.907, "step": 3600 }, { "epoch": 0.23809916401470807, "grad_norm": 0.6026431918144226, "learning_rate": 4.9945064041491786e-05, "loss": 0.8609, "step": 3610 }, { "epoch": 0.23875871848566294, "grad_norm": 0.7078092694282532, "learning_rate": 4.994314071444821e-05, "loss": 0.8966, "step": 3620 }, { "epoch": 0.2394182729566178, "grad_norm": 0.6005507707595825, "learning_rate": 4.994118433432459e-05, "loss": 0.8864, "step": 3630 }, { "epoch": 0.24007782742757267, "grad_norm": 0.5615901350975037, "learning_rate": 4.99391949037134e-05, "loss": 0.8206, "step": 3640 }, { "epoch": 0.24073738189852756, "grad_norm": 0.5781015157699585, "learning_rate": 4.99371724252509e-05, "loss": 0.891, "step": 3650 }, { "epoch": 0.24139693636948242, "grad_norm": 0.516188383102417, "learning_rate": 4.993511690161717e-05, "loss": 0.8413, "step": 3660 }, { "epoch": 0.2420564908404373, "grad_norm": 0.5980455875396729, "learning_rate": 4.993302833553605e-05, "loss": 0.8802, "step": 3670 }, { "epoch": 0.24271604531139215, "grad_norm": 0.6498844623565674, "learning_rate": 4.9930906729775164e-05, "loss": 0.8944, "step": 3680 }, { "epoch": 0.24337559978234702, "grad_norm": 0.6646761298179626, "learning_rate": 4.9928752087145945e-05, "loss": 0.8729, "step": 3690 }, { "epoch": 0.24403515425330188, "grad_norm": 0.5892811417579651, "learning_rate": 4.9926564410503593e-05, "loss": 0.9241, "step": 3700 }, { "epoch": 0.24469470872425678, "grad_norm": 0.588645339012146, "learning_rate": 4.992434370274707e-05, "loss": 0.8868, "step": 3710 }, { "epoch": 0.24535426319521164, "grad_norm": 0.6737022399902344, "learning_rate": 4.992208996681912e-05, "loss": 0.8572, "step": 3720 }, { "epoch": 0.2460138176661665, "grad_norm": 0.5201873779296875, "learning_rate": 4.991980320570625e-05, "loss": 0.8591, "step": 3730 }, { "epoch": 0.24667337213712137, "grad_norm": 0.6893309354782104, "learning_rate": 4.991748342243874e-05, "loss": 0.9197, "step": 3740 }, { "epoch": 0.24733292660807624, "grad_norm": 0.6144750118255615, "learning_rate": 4.9915130620090616e-05, "loss": 0.8604, "step": 3750 }, { "epoch": 0.2479924810790311, "grad_norm": 0.6070724129676819, "learning_rate": 4.9912744801779664e-05, "loss": 0.8649, "step": 3760 }, { "epoch": 0.248652035549986, "grad_norm": 0.5637763142585754, "learning_rate": 4.991032597066742e-05, "loss": 0.8973, "step": 3770 }, { "epoch": 0.24931159002094086, "grad_norm": 0.6145963668823242, "learning_rate": 4.990787412995916e-05, "loss": 0.8506, "step": 3780 }, { "epoch": 0.24997114449189572, "grad_norm": 0.6317866444587708, "learning_rate": 4.990538928290392e-05, "loss": 0.8798, "step": 3790 }, { "epoch": 0.2506306989628506, "grad_norm": 0.6320456862449646, "learning_rate": 4.990287143279445e-05, "loss": 0.9093, "step": 3800 }, { "epoch": 0.25129025343380545, "grad_norm": 0.651431143283844, "learning_rate": 4.9900320582967263e-05, "loss": 0.8826, "step": 3810 }, { "epoch": 0.25194980790476035, "grad_norm": 0.6451202034950256, "learning_rate": 4.989773673680258e-05, "loss": 0.9028, "step": 3820 }, { "epoch": 0.2526093623757152, "grad_norm": 0.6628925800323486, "learning_rate": 4.989511989772434e-05, "loss": 0.8489, "step": 3830 }, { "epoch": 0.2532689168466701, "grad_norm": 0.8010949492454529, "learning_rate": 4.989247006920023e-05, "loss": 0.8915, "step": 3840 }, { "epoch": 0.25392847131762497, "grad_norm": 0.5844048261642456, "learning_rate": 4.988978725474162e-05, "loss": 0.9375, "step": 3850 }, { "epoch": 0.2545880257885798, "grad_norm": 0.6060191988945007, "learning_rate": 4.988707145790361e-05, "loss": 0.86, "step": 3860 }, { "epoch": 0.2552475802595347, "grad_norm": 0.5984094738960266, "learning_rate": 4.988432268228501e-05, "loss": 0.8814, "step": 3870 }, { "epoch": 0.25590713473048954, "grad_norm": 0.6448334455490112, "learning_rate": 4.988154093152833e-05, "loss": 0.8941, "step": 3880 }, { "epoch": 0.25656668920144443, "grad_norm": 0.5510440468788147, "learning_rate": 4.987872620931975e-05, "loss": 0.8881, "step": 3890 }, { "epoch": 0.25722624367239927, "grad_norm": 0.5189316868782043, "learning_rate": 4.987587851938918e-05, "loss": 0.8564, "step": 3900 }, { "epoch": 0.25788579814335416, "grad_norm": 0.5691280961036682, "learning_rate": 4.98729978655102e-05, "loss": 0.8402, "step": 3910 }, { "epoch": 0.25854535261430905, "grad_norm": 0.6573358178138733, "learning_rate": 4.987008425150005e-05, "loss": 0.8846, "step": 3920 }, { "epoch": 0.2592049070852639, "grad_norm": 0.5777154564857483, "learning_rate": 4.98671376812197e-05, "loss": 0.8662, "step": 3930 }, { "epoch": 0.2598644615562188, "grad_norm": 0.5385237336158752, "learning_rate": 4.986415815857374e-05, "loss": 0.8887, "step": 3940 }, { "epoch": 0.2605240160271736, "grad_norm": 0.6187642216682434, "learning_rate": 4.9861145687510445e-05, "loss": 0.84, "step": 3950 }, { "epoch": 0.2611835704981285, "grad_norm": 0.5783065557479858, "learning_rate": 4.9858100272021754e-05, "loss": 0.86, "step": 3960 }, { "epoch": 0.2618431249690834, "grad_norm": 0.5744683742523193, "learning_rate": 4.985502191614326e-05, "loss": 0.9069, "step": 3970 }, { "epoch": 0.26250267944003824, "grad_norm": 0.5620474815368652, "learning_rate": 4.985191062395422e-05, "loss": 0.8804, "step": 3980 }, { "epoch": 0.26316223391099314, "grad_norm": 0.6070266366004944, "learning_rate": 4.984876639957751e-05, "loss": 0.8759, "step": 3990 }, { "epoch": 0.263821788381948, "grad_norm": 0.689795196056366, "learning_rate": 4.984558924717965e-05, "loss": 0.8584, "step": 4000 }, { "epoch": 0.26448134285290287, "grad_norm": 0.5451216101646423, "learning_rate": 4.984237917097081e-05, "loss": 0.8829, "step": 4010 }, { "epoch": 0.26514089732385776, "grad_norm": 0.5520846843719482, "learning_rate": 4.9839136175204795e-05, "loss": 0.9094, "step": 4020 }, { "epoch": 0.2658004517948126, "grad_norm": 0.6379942297935486, "learning_rate": 4.983586026417899e-05, "loss": 0.9022, "step": 4030 }, { "epoch": 0.2664600062657675, "grad_norm": 0.5754224061965942, "learning_rate": 4.983255144223445e-05, "loss": 0.8302, "step": 4040 }, { "epoch": 0.26711956073672233, "grad_norm": 0.5679827332496643, "learning_rate": 4.9829209713755815e-05, "loss": 0.84, "step": 4050 }, { "epoch": 0.2677791152076772, "grad_norm": 0.5987281203269958, "learning_rate": 4.9825835083171326e-05, "loss": 0.8516, "step": 4060 }, { "epoch": 0.26843866967863206, "grad_norm": 0.5965194702148438, "learning_rate": 4.9822427554952826e-05, "loss": 0.8453, "step": 4070 }, { "epoch": 0.26909822414958695, "grad_norm": 0.6309374570846558, "learning_rate": 4.981898713361577e-05, "loss": 0.8861, "step": 4080 }, { "epoch": 0.26975777862054184, "grad_norm": 0.5262725949287415, "learning_rate": 4.9815513823719185e-05, "loss": 0.8558, "step": 4090 }, { "epoch": 0.2704173330914967, "grad_norm": 0.564869225025177, "learning_rate": 4.9812007629865676e-05, "loss": 0.8498, "step": 4100 }, { "epoch": 0.2710768875624516, "grad_norm": 0.6223157644271851, "learning_rate": 4.980846855670143e-05, "loss": 0.9029, "step": 4110 }, { "epoch": 0.2717364420334064, "grad_norm": 0.5574937462806702, "learning_rate": 4.9804896608916215e-05, "loss": 0.8667, "step": 4120 }, { "epoch": 0.2723959965043613, "grad_norm": 0.5297979712486267, "learning_rate": 4.980129179124334e-05, "loss": 0.8944, "step": 4130 }, { "epoch": 0.2730555509753162, "grad_norm": 0.5349012017250061, "learning_rate": 4.979765410845969e-05, "loss": 0.9075, "step": 4140 }, { "epoch": 0.27371510544627103, "grad_norm": 0.5487902164459229, "learning_rate": 4.979398356538568e-05, "loss": 0.8637, "step": 4150 }, { "epoch": 0.2743746599172259, "grad_norm": 0.6216309666633606, "learning_rate": 4.9790280166885306e-05, "loss": 0.888, "step": 4160 }, { "epoch": 0.27503421438818076, "grad_norm": 0.6944143772125244, "learning_rate": 4.9786543917866057e-05, "loss": 0.9287, "step": 4170 }, { "epoch": 0.27569376885913566, "grad_norm": 0.5620063543319702, "learning_rate": 4.978277482327899e-05, "loss": 0.9018, "step": 4180 }, { "epoch": 0.27635332333009055, "grad_norm": 0.6146485209465027, "learning_rate": 4.9778972888118655e-05, "loss": 0.8805, "step": 4190 }, { "epoch": 0.2770128778010454, "grad_norm": 0.4971786439418793, "learning_rate": 4.977513811742316e-05, "loss": 0.8887, "step": 4200 }, { "epoch": 0.2776724322720003, "grad_norm": 0.6936700940132141, "learning_rate": 4.977127051627408e-05, "loss": 0.9038, "step": 4210 }, { "epoch": 0.2783319867429551, "grad_norm": 0.5615195035934448, "learning_rate": 4.976737008979653e-05, "loss": 0.8549, "step": 4220 }, { "epoch": 0.27899154121391, "grad_norm": 0.5746992230415344, "learning_rate": 4.9763436843159103e-05, "loss": 0.8949, "step": 4230 }, { "epoch": 0.27965109568486485, "grad_norm": 0.5601503849029541, "learning_rate": 4.97594707815739e-05, "loss": 0.8856, "step": 4240 }, { "epoch": 0.28031065015581974, "grad_norm": 0.5905441045761108, "learning_rate": 4.975547191029648e-05, "loss": 0.8566, "step": 4250 }, { "epoch": 0.28097020462677463, "grad_norm": 0.5898568034172058, "learning_rate": 4.97514402346259e-05, "loss": 0.8271, "step": 4260 }, { "epoch": 0.28162975909772947, "grad_norm": 0.5321268439292908, "learning_rate": 4.9747375759904694e-05, "loss": 0.9163, "step": 4270 }, { "epoch": 0.28228931356868436, "grad_norm": 0.5248478055000305, "learning_rate": 4.974327849151884e-05, "loss": 0.8777, "step": 4280 }, { "epoch": 0.2829488680396392, "grad_norm": 0.6414687037467957, "learning_rate": 4.973914843489779e-05, "loss": 0.8378, "step": 4290 }, { "epoch": 0.2836084225105941, "grad_norm": 0.66009122133255, "learning_rate": 4.9734985595514404e-05, "loss": 0.8621, "step": 4300 }, { "epoch": 0.284267976981549, "grad_norm": 0.5482301115989685, "learning_rate": 4.973078997888505e-05, "loss": 0.8923, "step": 4310 }, { "epoch": 0.2849275314525038, "grad_norm": 0.5216924548149109, "learning_rate": 4.972656159056949e-05, "loss": 0.8672, "step": 4320 }, { "epoch": 0.2855870859234587, "grad_norm": 0.6303116679191589, "learning_rate": 4.97223004361709e-05, "loss": 0.9094, "step": 4330 }, { "epoch": 0.28624664039441355, "grad_norm": 0.5526524782180786, "learning_rate": 4.97180065213359e-05, "loss": 0.8468, "step": 4340 }, { "epoch": 0.28690619486536845, "grad_norm": 0.5693706274032593, "learning_rate": 4.9713679851754526e-05, "loss": 0.8807, "step": 4350 }, { "epoch": 0.28756574933632334, "grad_norm": 0.55186927318573, "learning_rate": 4.97093204331602e-05, "loss": 0.8718, "step": 4360 }, { "epoch": 0.2882253038072782, "grad_norm": 0.6338434219360352, "learning_rate": 4.970492827132975e-05, "loss": 0.8295, "step": 4370 }, { "epoch": 0.28888485827823307, "grad_norm": 0.6424077749252319, "learning_rate": 4.970050337208339e-05, "loss": 0.8789, "step": 4380 }, { "epoch": 0.2895444127491879, "grad_norm": 0.6192151308059692, "learning_rate": 4.969604574128472e-05, "loss": 0.8799, "step": 4390 }, { "epoch": 0.2902039672201428, "grad_norm": 0.5858079195022583, "learning_rate": 4.969155538484071e-05, "loss": 0.8652, "step": 4400 }, { "epoch": 0.29086352169109764, "grad_norm": 0.6232174038887024, "learning_rate": 4.9687032308701715e-05, "loss": 0.8596, "step": 4410 }, { "epoch": 0.29152307616205253, "grad_norm": 0.6015047430992126, "learning_rate": 4.968247651886141e-05, "loss": 0.9113, "step": 4420 }, { "epoch": 0.2921826306330074, "grad_norm": 0.5569378137588501, "learning_rate": 4.9677888021356846e-05, "loss": 0.8941, "step": 4430 }, { "epoch": 0.29284218510396226, "grad_norm": 0.6491048336029053, "learning_rate": 4.9673266822268405e-05, "loss": 0.8547, "step": 4440 }, { "epoch": 0.29350173957491715, "grad_norm": 0.6606380939483643, "learning_rate": 4.966861292771982e-05, "loss": 0.8491, "step": 4450 }, { "epoch": 0.294161294045872, "grad_norm": 0.5726791620254517, "learning_rate": 4.966392634387814e-05, "loss": 0.8852, "step": 4460 }, { "epoch": 0.2948208485168269, "grad_norm": 0.6234275102615356, "learning_rate": 4.965920707695372e-05, "loss": 0.862, "step": 4470 }, { "epoch": 0.2954804029877818, "grad_norm": 0.5283318161964417, "learning_rate": 4.965445513320025e-05, "loss": 0.8395, "step": 4480 }, { "epoch": 0.2961399574587366, "grad_norm": 0.5711292028427124, "learning_rate": 4.9649670518914696e-05, "loss": 0.8736, "step": 4490 }, { "epoch": 0.2967995119296915, "grad_norm": 0.5051360130310059, "learning_rate": 4.9644853240437323e-05, "loss": 0.8488, "step": 4500 }, { "epoch": 0.29745906640064634, "grad_norm": 0.5224673748016357, "learning_rate": 4.96400033041517e-05, "loss": 0.8795, "step": 4510 }, { "epoch": 0.29811862087160124, "grad_norm": 0.5676709413528442, "learning_rate": 4.963512071648464e-05, "loss": 0.8647, "step": 4520 }, { "epoch": 0.29877817534255613, "grad_norm": 0.5240904092788696, "learning_rate": 4.963020548390626e-05, "loss": 0.8622, "step": 4530 }, { "epoch": 0.29943772981351097, "grad_norm": 0.6189314723014832, "learning_rate": 4.96252576129299e-05, "loss": 0.8645, "step": 4540 }, { "epoch": 0.30009728428446586, "grad_norm": 0.6082637906074524, "learning_rate": 4.962027711011218e-05, "loss": 0.8999, "step": 4550 }, { "epoch": 0.3007568387554207, "grad_norm": 0.4707930088043213, "learning_rate": 4.9615263982052945e-05, "loss": 0.8538, "step": 4560 }, { "epoch": 0.3014163932263756, "grad_norm": 0.5452629327774048, "learning_rate": 4.961021823539527e-05, "loss": 0.8774, "step": 4570 }, { "epoch": 0.3020759476973304, "grad_norm": 0.6488273739814758, "learning_rate": 4.960513987682547e-05, "loss": 0.8377, "step": 4580 }, { "epoch": 0.3027355021682853, "grad_norm": 0.6546377539634705, "learning_rate": 4.960002891307306e-05, "loss": 0.8474, "step": 4590 }, { "epoch": 0.3033950566392402, "grad_norm": 0.5141822099685669, "learning_rate": 4.9594885350910775e-05, "loss": 0.9065, "step": 4600 }, { "epoch": 0.30405461111019505, "grad_norm": 0.5785159468650818, "learning_rate": 4.958970919715453e-05, "loss": 0.8542, "step": 4610 }, { "epoch": 0.30471416558114994, "grad_norm": 0.5312493443489075, "learning_rate": 4.958450045866344e-05, "loss": 0.875, "step": 4620 }, { "epoch": 0.3053737200521048, "grad_norm": 0.5114297866821289, "learning_rate": 4.9579259142339803e-05, "loss": 0.865, "step": 4630 }, { "epoch": 0.3060332745230597, "grad_norm": 0.5772585868835449, "learning_rate": 4.957398525512908e-05, "loss": 0.8766, "step": 4640 }, { "epoch": 0.30669282899401457, "grad_norm": 0.6451815366744995, "learning_rate": 4.956867880401989e-05, "loss": 0.8471, "step": 4650 }, { "epoch": 0.3073523834649694, "grad_norm": 0.5841494798660278, "learning_rate": 4.9563339796044004e-05, "loss": 0.8858, "step": 4660 }, { "epoch": 0.3080119379359243, "grad_norm": 0.6031407117843628, "learning_rate": 4.955796823827636e-05, "loss": 0.8522, "step": 4670 }, { "epoch": 0.30867149240687913, "grad_norm": 0.6057419776916504, "learning_rate": 4.955256413783499e-05, "loss": 0.8584, "step": 4680 }, { "epoch": 0.309331046877834, "grad_norm": 0.6070857048034668, "learning_rate": 4.954712750188106e-05, "loss": 0.8726, "step": 4690 }, { "epoch": 0.3099906013487889, "grad_norm": 0.5966079235076904, "learning_rate": 4.954165833761889e-05, "loss": 0.8269, "step": 4700 }, { "epoch": 0.31065015581974376, "grad_norm": 0.6299596428871155, "learning_rate": 4.953615665229584e-05, "loss": 0.853, "step": 4710 }, { "epoch": 0.31130971029069865, "grad_norm": 0.531351625919342, "learning_rate": 4.9530622453202414e-05, "loss": 0.8558, "step": 4720 }, { "epoch": 0.3119692647616535, "grad_norm": 0.5519111752510071, "learning_rate": 4.952505574767217e-05, "loss": 0.8969, "step": 4730 }, { "epoch": 0.3126288192326084, "grad_norm": 0.542658805847168, "learning_rate": 4.951945654308178e-05, "loss": 0.8505, "step": 4740 }, { "epoch": 0.3132883737035632, "grad_norm": 0.6531395316123962, "learning_rate": 4.951382484685093e-05, "loss": 0.8559, "step": 4750 }, { "epoch": 0.3139479281745181, "grad_norm": 0.5937902331352234, "learning_rate": 4.9508160666442404e-05, "loss": 0.8593, "step": 4760 }, { "epoch": 0.314607482645473, "grad_norm": 0.5703521370887756, "learning_rate": 4.9502464009362015e-05, "loss": 0.8447, "step": 4770 }, { "epoch": 0.31526703711642784, "grad_norm": 0.5206552147865295, "learning_rate": 4.949673488315862e-05, "loss": 0.8426, "step": 4780 }, { "epoch": 0.31592659158738273, "grad_norm": 0.6926104426383972, "learning_rate": 4.949097329542409e-05, "loss": 0.8905, "step": 4790 }, { "epoch": 0.31658614605833757, "grad_norm": 0.5506643652915955, "learning_rate": 4.948517925379332e-05, "loss": 0.8486, "step": 4800 }, { "epoch": 0.31724570052929246, "grad_norm": 0.48270678520202637, "learning_rate": 4.947935276594421e-05, "loss": 0.8676, "step": 4810 }, { "epoch": 0.31790525500024736, "grad_norm": 0.6450955867767334, "learning_rate": 4.947349383959765e-05, "loss": 0.8027, "step": 4820 }, { "epoch": 0.3185648094712022, "grad_norm": 0.6653099656105042, "learning_rate": 4.946760248251752e-05, "loss": 0.8735, "step": 4830 }, { "epoch": 0.3192243639421571, "grad_norm": 0.6305930614471436, "learning_rate": 4.946167870251069e-05, "loss": 0.8307, "step": 4840 }, { "epoch": 0.3198839184131119, "grad_norm": 0.5568416118621826, "learning_rate": 4.945572250742696e-05, "loss": 0.8584, "step": 4850 }, { "epoch": 0.3205434728840668, "grad_norm": 0.5373754501342773, "learning_rate": 4.9449733905159125e-05, "loss": 0.9037, "step": 4860 }, { "epoch": 0.3212030273550217, "grad_norm": 0.6957933306694031, "learning_rate": 4.944371290364289e-05, "loss": 0.8741, "step": 4870 }, { "epoch": 0.32186258182597655, "grad_norm": 0.5444389581680298, "learning_rate": 4.943765951085691e-05, "loss": 0.888, "step": 4880 }, { "epoch": 0.32252213629693144, "grad_norm": 0.5078149437904358, "learning_rate": 4.943157373482276e-05, "loss": 0.8617, "step": 4890 }, { "epoch": 0.3231816907678863, "grad_norm": 0.5339057445526123, "learning_rate": 4.942545558360493e-05, "loss": 0.8304, "step": 4900 }, { "epoch": 0.32384124523884117, "grad_norm": 0.5843328237533569, "learning_rate": 4.941930506531081e-05, "loss": 0.8492, "step": 4910 }, { "epoch": 0.324500799709796, "grad_norm": 0.5101600885391235, "learning_rate": 4.9413122188090676e-05, "loss": 0.8542, "step": 4920 }, { "epoch": 0.3251603541807509, "grad_norm": 0.4721750020980835, "learning_rate": 4.940690696013769e-05, "loss": 0.8755, "step": 4930 }, { "epoch": 0.3258199086517058, "grad_norm": 0.5229089260101318, "learning_rate": 4.940065938968787e-05, "loss": 0.8836, "step": 4940 }, { "epoch": 0.32647946312266063, "grad_norm": 0.4514390528202057, "learning_rate": 4.9394379485020124e-05, "loss": 0.8757, "step": 4950 }, { "epoch": 0.3271390175936155, "grad_norm": 0.5412085652351379, "learning_rate": 4.938806725445617e-05, "loss": 0.8467, "step": 4960 }, { "epoch": 0.32779857206457036, "grad_norm": 0.5420681238174438, "learning_rate": 4.938172270636059e-05, "loss": 0.8671, "step": 4970 }, { "epoch": 0.32845812653552525, "grad_norm": 0.5458070635795593, "learning_rate": 4.937534584914076e-05, "loss": 0.8665, "step": 4980 }, { "epoch": 0.32911768100648015, "grad_norm": 0.5852974057197571, "learning_rate": 4.936893669124691e-05, "loss": 0.8574, "step": 4990 }, { "epoch": 0.329777235477435, "grad_norm": 0.525571346282959, "learning_rate": 4.936249524117206e-05, "loss": 0.852, "step": 5000 }, { "epoch": 0.3304367899483899, "grad_norm": 0.5699165463447571, "learning_rate": 4.935602150745198e-05, "loss": 0.8888, "step": 5010 }, { "epoch": 0.3310963444193447, "grad_norm": 0.5384656190872192, "learning_rate": 4.934951549866529e-05, "loss": 0.8972, "step": 5020 }, { "epoch": 0.3317558988902996, "grad_norm": 0.6388717889785767, "learning_rate": 4.934297722343332e-05, "loss": 0.8702, "step": 5030 }, { "epoch": 0.3324154533612545, "grad_norm": 0.5993049740791321, "learning_rate": 4.933640669042019e-05, "loss": 0.9064, "step": 5040 }, { "epoch": 0.33307500783220934, "grad_norm": 0.49405619502067566, "learning_rate": 4.932980390833275e-05, "loss": 0.8532, "step": 5050 }, { "epoch": 0.33373456230316423, "grad_norm": 0.5631572604179382, "learning_rate": 4.9323168885920604e-05, "loss": 0.8464, "step": 5060 }, { "epoch": 0.33439411677411907, "grad_norm": 0.5703868269920349, "learning_rate": 4.9316501631976056e-05, "loss": 0.8859, "step": 5070 }, { "epoch": 0.33505367124507396, "grad_norm": 0.5599934458732605, "learning_rate": 4.930980215533413e-05, "loss": 0.8796, "step": 5080 }, { "epoch": 0.3357132257160288, "grad_norm": 0.5828800797462463, "learning_rate": 4.9303070464872556e-05, "loss": 0.8847, "step": 5090 }, { "epoch": 0.3363727801869837, "grad_norm": 0.5994510650634766, "learning_rate": 4.929630656951175e-05, "loss": 0.914, "step": 5100 }, { "epoch": 0.3370323346579386, "grad_norm": 0.5373006463050842, "learning_rate": 4.9289510478214795e-05, "loss": 0.9014, "step": 5110 }, { "epoch": 0.3376918891288934, "grad_norm": 0.5972238779067993, "learning_rate": 4.9282682199987446e-05, "loss": 0.8267, "step": 5120 }, { "epoch": 0.3383514435998483, "grad_norm": 0.5829002261161804, "learning_rate": 4.92758217438781e-05, "loss": 0.8404, "step": 5130 }, { "epoch": 0.33901099807080315, "grad_norm": 0.5077376961708069, "learning_rate": 4.926892911897782e-05, "loss": 0.8797, "step": 5140 }, { "epoch": 0.33967055254175804, "grad_norm": 0.5396333932876587, "learning_rate": 4.926200433442025e-05, "loss": 0.8627, "step": 5150 }, { "epoch": 0.34033010701271293, "grad_norm": 0.5384257435798645, "learning_rate": 4.92550473993817e-05, "loss": 0.901, "step": 5160 }, { "epoch": 0.34098966148366777, "grad_norm": 0.5948606729507446, "learning_rate": 4.924805832308106e-05, "loss": 0.8521, "step": 5170 }, { "epoch": 0.34164921595462266, "grad_norm": 0.5079233646392822, "learning_rate": 4.92410371147798e-05, "loss": 0.8591, "step": 5180 }, { "epoch": 0.3423087704255775, "grad_norm": 0.49939635396003723, "learning_rate": 4.9233983783782005e-05, "loss": 0.8761, "step": 5190 }, { "epoch": 0.3429683248965324, "grad_norm": 0.5150532126426697, "learning_rate": 4.9226898339434294e-05, "loss": 0.8774, "step": 5200 }, { "epoch": 0.3436278793674873, "grad_norm": 0.5863020420074463, "learning_rate": 4.921978079112585e-05, "loss": 0.8877, "step": 5210 }, { "epoch": 0.3442874338384421, "grad_norm": 0.6055648326873779, "learning_rate": 4.92126311482884e-05, "loss": 0.8641, "step": 5220 }, { "epoch": 0.344946988309397, "grad_norm": 0.5826128125190735, "learning_rate": 4.92054494203962e-05, "loss": 0.8957, "step": 5230 }, { "epoch": 0.34560654278035186, "grad_norm": 0.47612279653549194, "learning_rate": 4.9198235616966034e-05, "loss": 0.8525, "step": 5240 }, { "epoch": 0.34626609725130675, "grad_norm": 0.5482989549636841, "learning_rate": 4.919098974755717e-05, "loss": 0.9164, "step": 5250 }, { "epoch": 0.3469256517222616, "grad_norm": 0.5377515554428101, "learning_rate": 4.918371182177138e-05, "loss": 0.867, "step": 5260 }, { "epoch": 0.3475852061932165, "grad_norm": 0.6391288638114929, "learning_rate": 4.917640184925292e-05, "loss": 0.8724, "step": 5270 }, { "epoch": 0.34824476066417137, "grad_norm": 0.550122082233429, "learning_rate": 4.916905983968849e-05, "loss": 0.8763, "step": 5280 }, { "epoch": 0.3489043151351262, "grad_norm": 0.5827004313468933, "learning_rate": 4.9161685802807276e-05, "loss": 0.8563, "step": 5290 }, { "epoch": 0.3495638696060811, "grad_norm": 0.5390400886535645, "learning_rate": 4.9154279748380884e-05, "loss": 0.8564, "step": 5300 }, { "epoch": 0.35022342407703594, "grad_norm": 0.4911317229270935, "learning_rate": 4.9146841686223336e-05, "loss": 0.8702, "step": 5310 }, { "epoch": 0.35088297854799083, "grad_norm": 0.5611599087715149, "learning_rate": 4.91393716261911e-05, "loss": 0.8464, "step": 5320 }, { "epoch": 0.3515425330189457, "grad_norm": 0.6718890070915222, "learning_rate": 4.913186957818303e-05, "loss": 0.8805, "step": 5330 }, { "epoch": 0.35220208748990056, "grad_norm": 0.6611254215240479, "learning_rate": 4.9124335552140353e-05, "loss": 0.8526, "step": 5340 }, { "epoch": 0.35286164196085545, "grad_norm": 0.5236311554908752, "learning_rate": 4.91167695580467e-05, "loss": 0.8691, "step": 5350 }, { "epoch": 0.3535211964318103, "grad_norm": 0.6203802824020386, "learning_rate": 4.910917160592804e-05, "loss": 0.859, "step": 5360 }, { "epoch": 0.3541807509027652, "grad_norm": 0.4989217519760132, "learning_rate": 4.910154170585272e-05, "loss": 0.9082, "step": 5370 }, { "epoch": 0.3548403053737201, "grad_norm": 0.5358820557594299, "learning_rate": 4.909387986793137e-05, "loss": 0.8199, "step": 5380 }, { "epoch": 0.3554998598446749, "grad_norm": 0.5449334979057312, "learning_rate": 4.908618610231701e-05, "loss": 0.866, "step": 5390 }, { "epoch": 0.3561594143156298, "grad_norm": 0.5716169476509094, "learning_rate": 4.9078460419204905e-05, "loss": 0.8263, "step": 5400 }, { "epoch": 0.35681896878658464, "grad_norm": 0.5559737682342529, "learning_rate": 4.907070282883267e-05, "loss": 0.8876, "step": 5410 }, { "epoch": 0.35747852325753954, "grad_norm": 0.5706018805503845, "learning_rate": 4.9062913341480163e-05, "loss": 0.8184, "step": 5420 }, { "epoch": 0.3581380777284944, "grad_norm": 0.4763959050178528, "learning_rate": 4.905509196746952e-05, "loss": 0.8376, "step": 5430 }, { "epoch": 0.35879763219944927, "grad_norm": 0.6196017265319824, "learning_rate": 4.9047238717165135e-05, "loss": 0.8774, "step": 5440 }, { "epoch": 0.35945718667040416, "grad_norm": 0.5639172196388245, "learning_rate": 4.9039353600973645e-05, "loss": 0.8914, "step": 5450 }, { "epoch": 0.360116741141359, "grad_norm": 0.539543867111206, "learning_rate": 4.90314366293439e-05, "loss": 0.8769, "step": 5460 }, { "epoch": 0.3607762956123139, "grad_norm": 0.5149198770523071, "learning_rate": 4.902348781276699e-05, "loss": 0.8533, "step": 5470 }, { "epoch": 0.36143585008326873, "grad_norm": 0.5052158832550049, "learning_rate": 4.901550716177616e-05, "loss": 0.8773, "step": 5480 }, { "epoch": 0.3620954045542236, "grad_norm": 0.48542529344558716, "learning_rate": 4.9007494686946896e-05, "loss": 0.8475, "step": 5490 }, { "epoch": 0.3627549590251785, "grad_norm": 0.5748151540756226, "learning_rate": 4.8999450398896804e-05, "loss": 0.8471, "step": 5500 }, { "epoch": 0.36341451349613335, "grad_norm": 0.5332309007644653, "learning_rate": 4.899137430828568e-05, "loss": 0.8578, "step": 5510 }, { "epoch": 0.36407406796708824, "grad_norm": 0.5747234225273132, "learning_rate": 4.8983266425815446e-05, "loss": 0.8573, "step": 5520 }, { "epoch": 0.3647336224380431, "grad_norm": 0.5445331335067749, "learning_rate": 4.897512676223016e-05, "loss": 0.8938, "step": 5530 }, { "epoch": 0.365393176908998, "grad_norm": 0.5994349122047424, "learning_rate": 4.8966955328315985e-05, "loss": 0.8726, "step": 5540 }, { "epoch": 0.36605273137995287, "grad_norm": 0.656285285949707, "learning_rate": 4.8958752134901196e-05, "loss": 0.8654, "step": 5550 }, { "epoch": 0.3667122858509077, "grad_norm": 0.4907941222190857, "learning_rate": 4.8950517192856146e-05, "loss": 0.8306, "step": 5560 }, { "epoch": 0.3673718403218626, "grad_norm": 0.544032633304596, "learning_rate": 4.894225051309327e-05, "loss": 0.9078, "step": 5570 }, { "epoch": 0.36803139479281743, "grad_norm": 0.5852176547050476, "learning_rate": 4.893395210656704e-05, "loss": 0.8421, "step": 5580 }, { "epoch": 0.3686909492637723, "grad_norm": 0.49195295572280884, "learning_rate": 4.8925621984274e-05, "loss": 0.8807, "step": 5590 }, { "epoch": 0.36935050373472716, "grad_norm": 0.6170905828475952, "learning_rate": 4.8917260157252674e-05, "loss": 0.8737, "step": 5600 }, { "epoch": 0.37001005820568206, "grad_norm": 0.5538198351860046, "learning_rate": 4.890886663658366e-05, "loss": 0.896, "step": 5610 }, { "epoch": 0.37066961267663695, "grad_norm": 0.5857877135276794, "learning_rate": 4.89004414333895e-05, "loss": 0.895, "step": 5620 }, { "epoch": 0.3713291671475918, "grad_norm": 0.46444687247276306, "learning_rate": 4.889198455883476e-05, "loss": 0.8998, "step": 5630 }, { "epoch": 0.3719887216185467, "grad_norm": 0.6267735362052917, "learning_rate": 4.888349602412595e-05, "loss": 0.8512, "step": 5640 }, { "epoch": 0.3726482760895015, "grad_norm": 0.5671398043632507, "learning_rate": 4.887497584051154e-05, "loss": 0.8621, "step": 5650 }, { "epoch": 0.3733078305604564, "grad_norm": 0.596243679523468, "learning_rate": 4.8866424019281935e-05, "loss": 0.8371, "step": 5660 }, { "epoch": 0.3739673850314113, "grad_norm": 0.5437570810317993, "learning_rate": 4.885784057176949e-05, "loss": 0.8937, "step": 5670 }, { "epoch": 0.37462693950236614, "grad_norm": 0.5501529574394226, "learning_rate": 4.884922550934843e-05, "loss": 0.8591, "step": 5680 }, { "epoch": 0.37528649397332103, "grad_norm": 0.5578798651695251, "learning_rate": 4.884057884343491e-05, "loss": 0.8518, "step": 5690 }, { "epoch": 0.37594604844427587, "grad_norm": 0.56142258644104, "learning_rate": 4.8831900585486936e-05, "loss": 0.9065, "step": 5700 }, { "epoch": 0.37660560291523076, "grad_norm": 0.5245373845100403, "learning_rate": 4.882319074700439e-05, "loss": 0.8545, "step": 5710 }, { "epoch": 0.37726515738618566, "grad_norm": 0.5014517903327942, "learning_rate": 4.8814449339529e-05, "loss": 0.8646, "step": 5720 }, { "epoch": 0.3779247118571405, "grad_norm": 0.5961427688598633, "learning_rate": 4.880567637464434e-05, "loss": 0.855, "step": 5730 }, { "epoch": 0.3785842663280954, "grad_norm": 0.5327067971229553, "learning_rate": 4.8796871863975776e-05, "loss": 0.8777, "step": 5740 }, { "epoch": 0.3792438207990502, "grad_norm": 0.4703054130077362, "learning_rate": 4.8788035819190504e-05, "loss": 0.8834, "step": 5750 }, { "epoch": 0.3799033752700051, "grad_norm": 0.6308974027633667, "learning_rate": 4.877916825199749e-05, "loss": 0.8532, "step": 5760 }, { "epoch": 0.38056292974095995, "grad_norm": 0.45056989789009094, "learning_rate": 4.8770269174147465e-05, "loss": 0.8441, "step": 5770 }, { "epoch": 0.38122248421191485, "grad_norm": 0.5260857939720154, "learning_rate": 4.8761338597432945e-05, "loss": 0.8776, "step": 5780 }, { "epoch": 0.38188203868286974, "grad_norm": 0.5394748449325562, "learning_rate": 4.875237653368816e-05, "loss": 0.8358, "step": 5790 }, { "epoch": 0.3825415931538246, "grad_norm": 0.5730496048927307, "learning_rate": 4.874338299478907e-05, "loss": 0.9312, "step": 5800 }, { "epoch": 0.38320114762477947, "grad_norm": 0.5718327760696411, "learning_rate": 4.8734357992653355e-05, "loss": 0.8601, "step": 5810 }, { "epoch": 0.3838607020957343, "grad_norm": 0.5419636368751526, "learning_rate": 4.872530153924037e-05, "loss": 0.857, "step": 5820 }, { "epoch": 0.3845202565666892, "grad_norm": 0.63425213098526, "learning_rate": 4.871621364655116e-05, "loss": 0.888, "step": 5830 }, { "epoch": 0.3851798110376441, "grad_norm": 0.567084789276123, "learning_rate": 4.870709432662843e-05, "loss": 0.8543, "step": 5840 }, { "epoch": 0.38583936550859893, "grad_norm": 0.7061660289764404, "learning_rate": 4.869794359155653e-05, "loss": 0.8304, "step": 5850 }, { "epoch": 0.3864989199795538, "grad_norm": 0.5780564546585083, "learning_rate": 4.868876145346144e-05, "loss": 0.9157, "step": 5860 }, { "epoch": 0.38715847445050866, "grad_norm": 0.6022658348083496, "learning_rate": 4.8679547924510735e-05, "loss": 0.8334, "step": 5870 }, { "epoch": 0.38781802892146355, "grad_norm": 0.5134875774383545, "learning_rate": 4.867030301691363e-05, "loss": 0.8873, "step": 5880 }, { "epoch": 0.38847758339241845, "grad_norm": 0.5205914378166199, "learning_rate": 4.866102674292088e-05, "loss": 0.8931, "step": 5890 }, { "epoch": 0.3891371378633733, "grad_norm": 0.5714764595031738, "learning_rate": 4.865171911482481e-05, "loss": 0.86, "step": 5900 }, { "epoch": 0.3897966923343282, "grad_norm": 0.5874856114387512, "learning_rate": 4.864238014495933e-05, "loss": 0.862, "step": 5910 }, { "epoch": 0.390456246805283, "grad_norm": 0.5221552848815918, "learning_rate": 4.8633009845699824e-05, "loss": 0.8148, "step": 5920 }, { "epoch": 0.3911158012762379, "grad_norm": 0.5328670144081116, "learning_rate": 4.862360822946324e-05, "loss": 0.8696, "step": 5930 }, { "epoch": 0.39177535574719274, "grad_norm": 0.5030723214149475, "learning_rate": 4.861417530870801e-05, "loss": 0.8793, "step": 5940 }, { "epoch": 0.39243491021814764, "grad_norm": 0.5878767967224121, "learning_rate": 4.860471109593404e-05, "loss": 0.8511, "step": 5950 }, { "epoch": 0.39309446468910253, "grad_norm": 0.441631019115448, "learning_rate": 4.859521560368271e-05, "loss": 0.9012, "step": 5960 }, { "epoch": 0.39375401916005737, "grad_norm": 0.5347902774810791, "learning_rate": 4.858568884453685e-05, "loss": 0.9162, "step": 5970 }, { "epoch": 0.39441357363101226, "grad_norm": 0.49979016184806824, "learning_rate": 4.857613083112072e-05, "loss": 0.8954, "step": 5980 }, { "epoch": 0.3950731281019671, "grad_norm": 0.5070289969444275, "learning_rate": 4.85665415761e-05, "loss": 0.9112, "step": 5990 }, { "epoch": 0.395732682572922, "grad_norm": 0.49998021125793457, "learning_rate": 4.855692109218177e-05, "loss": 0.849, "step": 6000 }, { "epoch": 0.3963922370438769, "grad_norm": 0.5425897836685181, "learning_rate": 4.854726939211448e-05, "loss": 0.8485, "step": 6010 }, { "epoch": 0.3970517915148317, "grad_norm": 0.5229442119598389, "learning_rate": 4.8537586488687964e-05, "loss": 0.8821, "step": 6020 }, { "epoch": 0.3977113459857866, "grad_norm": 0.5876744985580444, "learning_rate": 4.852787239473339e-05, "loss": 0.8534, "step": 6030 }, { "epoch": 0.39837090045674145, "grad_norm": 0.5264390707015991, "learning_rate": 4.851812712312327e-05, "loss": 0.8467, "step": 6040 }, { "epoch": 0.39903045492769634, "grad_norm": 0.48363423347473145, "learning_rate": 4.850835068677143e-05, "loss": 0.8573, "step": 6050 }, { "epoch": 0.39969000939865124, "grad_norm": 0.5530417561531067, "learning_rate": 4.849854309863297e-05, "loss": 0.9285, "step": 6060 }, { "epoch": 0.4003495638696061, "grad_norm": 0.5027384161949158, "learning_rate": 4.84887043717043e-05, "loss": 0.8676, "step": 6070 }, { "epoch": 0.40100911834056097, "grad_norm": 0.546110212802887, "learning_rate": 4.8478834519023084e-05, "loss": 0.8827, "step": 6080 }, { "epoch": 0.4016686728115158, "grad_norm": 0.614757776260376, "learning_rate": 4.8468933553668225e-05, "loss": 0.8417, "step": 6090 }, { "epoch": 0.4023282272824707, "grad_norm": 0.6290607452392578, "learning_rate": 4.845900148875986e-05, "loss": 0.8582, "step": 6100 }, { "epoch": 0.40298778175342553, "grad_norm": 0.5472835898399353, "learning_rate": 4.844903833745933e-05, "loss": 0.8602, "step": 6110 }, { "epoch": 0.4036473362243804, "grad_norm": 0.5838117599487305, "learning_rate": 4.8439044112969186e-05, "loss": 0.8411, "step": 6120 }, { "epoch": 0.4043068906953353, "grad_norm": 0.4958944022655487, "learning_rate": 4.842901882853314e-05, "loss": 0.8941, "step": 6130 }, { "epoch": 0.40496644516629016, "grad_norm": 0.5282686352729797, "learning_rate": 4.841896249743607e-05, "loss": 0.8996, "step": 6140 }, { "epoch": 0.40562599963724505, "grad_norm": 0.4729621410369873, "learning_rate": 4.8408875133003995e-05, "loss": 0.8869, "step": 6150 }, { "epoch": 0.4062855541081999, "grad_norm": 0.4891512095928192, "learning_rate": 4.839875674860405e-05, "loss": 0.9026, "step": 6160 }, { "epoch": 0.4069451085791548, "grad_norm": 0.5252025127410889, "learning_rate": 4.8388607357644475e-05, "loss": 0.8563, "step": 6170 }, { "epoch": 0.4076046630501097, "grad_norm": 0.5011799931526184, "learning_rate": 4.837842697357462e-05, "loss": 0.8434, "step": 6180 }, { "epoch": 0.4082642175210645, "grad_norm": 0.542057454586029, "learning_rate": 4.836821560988489e-05, "loss": 0.8237, "step": 6190 }, { "epoch": 0.4089237719920194, "grad_norm": 0.5338612794876099, "learning_rate": 4.835797328010672e-05, "loss": 0.8907, "step": 6200 }, { "epoch": 0.40958332646297424, "grad_norm": 0.5403425693511963, "learning_rate": 4.834769999781262e-05, "loss": 0.8345, "step": 6210 }, { "epoch": 0.41024288093392913, "grad_norm": 0.5578640699386597, "learning_rate": 4.833739577661609e-05, "loss": 0.8822, "step": 6220 }, { "epoch": 0.410902435404884, "grad_norm": 0.556004524230957, "learning_rate": 4.832706063017164e-05, "loss": 0.8276, "step": 6230 }, { "epoch": 0.41156198987583886, "grad_norm": 0.5602332949638367, "learning_rate": 4.831669457217475e-05, "loss": 0.8635, "step": 6240 }, { "epoch": 0.41222154434679376, "grad_norm": 0.5787188410758972, "learning_rate": 4.830629761636186e-05, "loss": 0.8235, "step": 6250 }, { "epoch": 0.4128810988177486, "grad_norm": 0.6306541562080383, "learning_rate": 4.829586977651038e-05, "loss": 0.876, "step": 6260 }, { "epoch": 0.4135406532887035, "grad_norm": 0.5497257113456726, "learning_rate": 4.828541106643862e-05, "loss": 0.7759, "step": 6270 }, { "epoch": 0.4142002077596583, "grad_norm": 0.48434269428253174, "learning_rate": 4.82749215000058e-05, "loss": 0.9067, "step": 6280 }, { "epoch": 0.4148597622306132, "grad_norm": 0.5243717432022095, "learning_rate": 4.8264401091112035e-05, "loss": 0.8673, "step": 6290 }, { "epoch": 0.4155193167015681, "grad_norm": 0.5414081811904907, "learning_rate": 4.8253849853698307e-05, "loss": 0.8885, "step": 6300 }, { "epoch": 0.41617887117252295, "grad_norm": 0.5486317873001099, "learning_rate": 4.824326780174645e-05, "loss": 0.8785, "step": 6310 }, { "epoch": 0.41683842564347784, "grad_norm": 0.5972316861152649, "learning_rate": 4.8232654949279145e-05, "loss": 0.8628, "step": 6320 }, { "epoch": 0.4174979801144327, "grad_norm": 0.4799783229827881, "learning_rate": 4.822201131035986e-05, "loss": 0.8222, "step": 6330 }, { "epoch": 0.41815753458538757, "grad_norm": 0.4656096398830414, "learning_rate": 4.821133689909288e-05, "loss": 0.8367, "step": 6340 }, { "epoch": 0.41881708905634246, "grad_norm": 0.4220138490200043, "learning_rate": 4.820063172962328e-05, "loss": 0.8608, "step": 6350 }, { "epoch": 0.4194766435272973, "grad_norm": 0.5239322185516357, "learning_rate": 4.8189895816136855e-05, "loss": 0.8596, "step": 6360 }, { "epoch": 0.4201361979982522, "grad_norm": 0.562239944934845, "learning_rate": 4.8179129172860173e-05, "loss": 0.892, "step": 6370 }, { "epoch": 0.42079575246920703, "grad_norm": 0.5668872594833374, "learning_rate": 4.8168331814060505e-05, "loss": 0.8036, "step": 6380 }, { "epoch": 0.4214553069401619, "grad_norm": 0.5321053266525269, "learning_rate": 4.8157503754045835e-05, "loss": 0.8519, "step": 6390 }, { "epoch": 0.4221148614111168, "grad_norm": 0.5495429635047913, "learning_rate": 4.814664500716483e-05, "loss": 0.8603, "step": 6400 }, { "epoch": 0.42277441588207165, "grad_norm": 0.5138208270072937, "learning_rate": 4.8135755587806806e-05, "loss": 0.8398, "step": 6410 }, { "epoch": 0.42343397035302655, "grad_norm": 0.549922525882721, "learning_rate": 4.812483551040176e-05, "loss": 0.8564, "step": 6420 }, { "epoch": 0.4240935248239814, "grad_norm": 0.49002325534820557, "learning_rate": 4.811388478942026e-05, "loss": 0.8315, "step": 6430 }, { "epoch": 0.4247530792949363, "grad_norm": 0.43466535210609436, "learning_rate": 4.810290343937353e-05, "loss": 0.8757, "step": 6440 }, { "epoch": 0.4254126337658911, "grad_norm": 0.5885916948318481, "learning_rate": 4.8091891474813366e-05, "loss": 0.8468, "step": 6450 }, { "epoch": 0.426072188236846, "grad_norm": 0.6256899237632751, "learning_rate": 4.808084891033212e-05, "loss": 0.8636, "step": 6460 }, { "epoch": 0.4267317427078009, "grad_norm": 0.49635109305381775, "learning_rate": 4.806977576056272e-05, "loss": 0.8742, "step": 6470 }, { "epoch": 0.42739129717875574, "grad_norm": 0.5088977217674255, "learning_rate": 4.8058672040178586e-05, "loss": 0.8276, "step": 6480 }, { "epoch": 0.42805085164971063, "grad_norm": 0.4978698194026947, "learning_rate": 4.804753776389369e-05, "loss": 0.8841, "step": 6490 }, { "epoch": 0.42871040612066547, "grad_norm": 0.5396628379821777, "learning_rate": 4.803637294646245e-05, "loss": 0.8505, "step": 6500 }, { "epoch": 0.42936996059162036, "grad_norm": 0.5257627367973328, "learning_rate": 4.8025177602679805e-05, "loss": 0.8388, "step": 6510 }, { "epoch": 0.43002951506257525, "grad_norm": 0.4984934628009796, "learning_rate": 4.8013951747381104e-05, "loss": 0.8656, "step": 6520 }, { "epoch": 0.4306890695335301, "grad_norm": 0.5394895076751709, "learning_rate": 4.800269539544214e-05, "loss": 0.9021, "step": 6530 }, { "epoch": 0.431348624004485, "grad_norm": 0.5981066226959229, "learning_rate": 4.799140856177914e-05, "loss": 0.847, "step": 6540 }, { "epoch": 0.4320081784754398, "grad_norm": 0.5400550365447998, "learning_rate": 4.798009126134869e-05, "loss": 0.8804, "step": 6550 }, { "epoch": 0.4326677329463947, "grad_norm": 0.5571463108062744, "learning_rate": 4.7968743509147776e-05, "loss": 0.8671, "step": 6560 }, { "epoch": 0.4333272874173496, "grad_norm": 0.5644498467445374, "learning_rate": 4.795736532021372e-05, "loss": 0.8762, "step": 6570 }, { "epoch": 0.43398684188830444, "grad_norm": 0.4889192581176758, "learning_rate": 4.794595670962417e-05, "loss": 0.8543, "step": 6580 }, { "epoch": 0.43464639635925933, "grad_norm": 0.6175575256347656, "learning_rate": 4.793451769249712e-05, "loss": 0.8644, "step": 6590 }, { "epoch": 0.43530595083021417, "grad_norm": 0.548516035079956, "learning_rate": 4.7923048283990814e-05, "loss": 0.9286, "step": 6600 }, { "epoch": 0.43596550530116907, "grad_norm": 0.5567188262939453, "learning_rate": 4.7911548499303806e-05, "loss": 0.8522, "step": 6610 }, { "epoch": 0.43662505977212396, "grad_norm": 0.602759599685669, "learning_rate": 4.790001835367488e-05, "loss": 0.8517, "step": 6620 }, { "epoch": 0.4372846142430788, "grad_norm": 0.5450781583786011, "learning_rate": 4.7888457862383064e-05, "loss": 0.8317, "step": 6630 }, { "epoch": 0.4379441687140337, "grad_norm": 0.5020095705986023, "learning_rate": 4.787686704074759e-05, "loss": 0.8516, "step": 6640 }, { "epoch": 0.4386037231849885, "grad_norm": 0.527065098285675, "learning_rate": 4.7865245904127874e-05, "loss": 0.839, "step": 6650 }, { "epoch": 0.4392632776559434, "grad_norm": 0.5484679937362671, "learning_rate": 4.7853594467923533e-05, "loss": 0.8759, "step": 6660 }, { "epoch": 0.43992283212689826, "grad_norm": 0.5647040605545044, "learning_rate": 4.784191274757431e-05, "loss": 0.8639, "step": 6670 }, { "epoch": 0.44058238659785315, "grad_norm": 0.5041146874427795, "learning_rate": 4.783020075856009e-05, "loss": 0.8089, "step": 6680 }, { "epoch": 0.44124194106880804, "grad_norm": 0.5434051156044006, "learning_rate": 4.781845851640086e-05, "loss": 0.8887, "step": 6690 }, { "epoch": 0.4419014955397629, "grad_norm": 0.5963016152381897, "learning_rate": 4.780668603665669e-05, "loss": 0.8474, "step": 6700 }, { "epoch": 0.44256105001071777, "grad_norm": 0.6325017213821411, "learning_rate": 4.7794883334927756e-05, "loss": 0.867, "step": 6710 }, { "epoch": 0.4432206044816726, "grad_norm": 0.514064610004425, "learning_rate": 4.778305042685422e-05, "loss": 0.8779, "step": 6720 }, { "epoch": 0.4438801589526275, "grad_norm": 0.5603863596916199, "learning_rate": 4.777118732811633e-05, "loss": 0.8183, "step": 6730 }, { "epoch": 0.4445397134235824, "grad_norm": 0.5008962750434875, "learning_rate": 4.7759294054434314e-05, "loss": 0.8398, "step": 6740 }, { "epoch": 0.44519926789453723, "grad_norm": 0.4778035581111908, "learning_rate": 4.774737062156839e-05, "loss": 0.8348, "step": 6750 }, { "epoch": 0.4458588223654921, "grad_norm": 0.4997784495353699, "learning_rate": 4.773541704531873e-05, "loss": 0.8549, "step": 6760 }, { "epoch": 0.44651837683644696, "grad_norm": 0.5219247341156006, "learning_rate": 4.772343334152548e-05, "loss": 0.8739, "step": 6770 }, { "epoch": 0.44717793130740185, "grad_norm": 0.5919073224067688, "learning_rate": 4.771141952606868e-05, "loss": 0.8334, "step": 6780 }, { "epoch": 0.44783748577835675, "grad_norm": 0.5725424885749817, "learning_rate": 4.7699375614868275e-05, "loss": 0.8633, "step": 6790 }, { "epoch": 0.4484970402493116, "grad_norm": 0.592842161655426, "learning_rate": 4.7687301623884105e-05, "loss": 0.8395, "step": 6800 }, { "epoch": 0.4491565947202665, "grad_norm": 0.4690439999103546, "learning_rate": 4.767519756911587e-05, "loss": 0.8987, "step": 6810 }, { "epoch": 0.4498161491912213, "grad_norm": 0.48259902000427246, "learning_rate": 4.766306346660309e-05, "loss": 0.8764, "step": 6820 }, { "epoch": 0.4504757036621762, "grad_norm": 0.565538763999939, "learning_rate": 4.765089933242511e-05, "loss": 0.8634, "step": 6830 }, { "epoch": 0.45113525813313105, "grad_norm": 0.5153759717941284, "learning_rate": 4.763870518270109e-05, "loss": 0.8678, "step": 6840 }, { "epoch": 0.45179481260408594, "grad_norm": 0.4666423797607422, "learning_rate": 4.762648103358995e-05, "loss": 0.8452, "step": 6850 }, { "epoch": 0.45245436707504083, "grad_norm": 0.543533980846405, "learning_rate": 4.761422690129035e-05, "loss": 0.8482, "step": 6860 }, { "epoch": 0.45311392154599567, "grad_norm": 0.5554654598236084, "learning_rate": 4.7601942802040696e-05, "loss": 0.8684, "step": 6870 }, { "epoch": 0.45377347601695056, "grad_norm": 0.5532326102256775, "learning_rate": 4.758962875211911e-05, "loss": 0.8608, "step": 6880 }, { "epoch": 0.4544330304879054, "grad_norm": 0.47074058651924133, "learning_rate": 4.757728476784339e-05, "loss": 0.8137, "step": 6890 }, { "epoch": 0.4550925849588603, "grad_norm": 0.4835397005081177, "learning_rate": 4.756491086557102e-05, "loss": 0.8586, "step": 6900 }, { "epoch": 0.4557521394298152, "grad_norm": 0.49764856696128845, "learning_rate": 4.7552507061699094e-05, "loss": 0.8595, "step": 6910 }, { "epoch": 0.45641169390077, "grad_norm": 0.5029451847076416, "learning_rate": 4.754007337266437e-05, "loss": 0.8581, "step": 6920 }, { "epoch": 0.4570712483717249, "grad_norm": 0.5425214767456055, "learning_rate": 4.752760981494319e-05, "loss": 0.8458, "step": 6930 }, { "epoch": 0.45773080284267975, "grad_norm": 0.48952221870422363, "learning_rate": 4.751511640505147e-05, "loss": 0.8655, "step": 6940 }, { "epoch": 0.45839035731363464, "grad_norm": 0.5727320909500122, "learning_rate": 4.750259315954469e-05, "loss": 0.8527, "step": 6950 }, { "epoch": 0.45904991178458954, "grad_norm": 0.4817902147769928, "learning_rate": 4.749004009501787e-05, "loss": 0.8289, "step": 6960 }, { "epoch": 0.4597094662555444, "grad_norm": 0.5458888411521912, "learning_rate": 4.7477457228105535e-05, "loss": 0.8764, "step": 6970 }, { "epoch": 0.46036902072649927, "grad_norm": 0.5658895373344421, "learning_rate": 4.746484457548171e-05, "loss": 0.8694, "step": 6980 }, { "epoch": 0.4610285751974541, "grad_norm": 0.5030274987220764, "learning_rate": 4.74522021538599e-05, "loss": 0.8161, "step": 6990 }, { "epoch": 0.461688129668409, "grad_norm": 0.6383715271949768, "learning_rate": 4.743952997999303e-05, "loss": 0.8224, "step": 7000 }, { "epoch": 0.46234768413936383, "grad_norm": 0.5009424090385437, "learning_rate": 4.7426828070673476e-05, "loss": 0.87, "step": 7010 }, { "epoch": 0.4630072386103187, "grad_norm": 0.5131643414497375, "learning_rate": 4.7414096442733015e-05, "loss": 0.8434, "step": 7020 }, { "epoch": 0.4636667930812736, "grad_norm": 0.516572117805481, "learning_rate": 4.740133511304278e-05, "loss": 0.8653, "step": 7030 }, { "epoch": 0.46432634755222846, "grad_norm": 0.5132176876068115, "learning_rate": 4.7388544098513295e-05, "loss": 0.8502, "step": 7040 }, { "epoch": 0.46498590202318335, "grad_norm": 0.5855733156204224, "learning_rate": 4.737572341609441e-05, "loss": 0.8573, "step": 7050 }, { "epoch": 0.4656454564941382, "grad_norm": 0.585620641708374, "learning_rate": 4.736287308277527e-05, "loss": 0.8575, "step": 7060 }, { "epoch": 0.4663050109650931, "grad_norm": 0.506710410118103, "learning_rate": 4.7349993115584354e-05, "loss": 0.8729, "step": 7070 }, { "epoch": 0.466964565436048, "grad_norm": 0.5265117883682251, "learning_rate": 4.733708353158936e-05, "loss": 0.8097, "step": 7080 }, { "epoch": 0.4676241199070028, "grad_norm": 0.5592107772827148, "learning_rate": 4.732414434789727e-05, "loss": 0.8626, "step": 7090 }, { "epoch": 0.4682836743779577, "grad_norm": 0.5142652988433838, "learning_rate": 4.7311175581654266e-05, "loss": 0.8759, "step": 7100 }, { "epoch": 0.46894322884891254, "grad_norm": 0.4814768433570862, "learning_rate": 4.729817725004575e-05, "loss": 0.8939, "step": 7110 }, { "epoch": 0.46960278331986743, "grad_norm": 0.4561408460140228, "learning_rate": 4.728514937029629e-05, "loss": 0.8657, "step": 7120 }, { "epoch": 0.4702623377908223, "grad_norm": 0.573836088180542, "learning_rate": 4.7272091959669606e-05, "loss": 0.8114, "step": 7130 }, { "epoch": 0.47092189226177716, "grad_norm": 0.5914710164070129, "learning_rate": 4.725900503546856e-05, "loss": 0.8626, "step": 7140 }, { "epoch": 0.47158144673273206, "grad_norm": 0.4675734341144562, "learning_rate": 4.724588861503513e-05, "loss": 0.8399, "step": 7150 }, { "epoch": 0.4722410012036869, "grad_norm": 0.5302778482437134, "learning_rate": 4.7232742715750356e-05, "loss": 0.8578, "step": 7160 }, { "epoch": 0.4729005556746418, "grad_norm": 0.48141634464263916, "learning_rate": 4.721956735503437e-05, "loss": 0.8934, "step": 7170 }, { "epoch": 0.4735601101455966, "grad_norm": 0.46267980337142944, "learning_rate": 4.7206362550346314e-05, "loss": 0.8764, "step": 7180 }, { "epoch": 0.4742196646165515, "grad_norm": 0.5466578006744385, "learning_rate": 4.719312831918437e-05, "loss": 0.8604, "step": 7190 }, { "epoch": 0.4748792190875064, "grad_norm": 0.5205459594726562, "learning_rate": 4.717986467908573e-05, "loss": 0.8327, "step": 7200 }, { "epoch": 0.47553877355846125, "grad_norm": 0.6165599822998047, "learning_rate": 4.71665716476265e-05, "loss": 0.8117, "step": 7210 }, { "epoch": 0.47619832802941614, "grad_norm": 0.5680373311042786, "learning_rate": 4.7153249242421794e-05, "loss": 0.8502, "step": 7220 }, { "epoch": 0.476857882500371, "grad_norm": 0.46300211548805237, "learning_rate": 4.713989748112561e-05, "loss": 0.8657, "step": 7230 }, { "epoch": 0.47751743697132587, "grad_norm": 0.5191219449043274, "learning_rate": 4.7126516381430877e-05, "loss": 0.8613, "step": 7240 }, { "epoch": 0.47817699144228076, "grad_norm": 0.5691511631011963, "learning_rate": 4.711310596106937e-05, "loss": 0.8751, "step": 7250 }, { "epoch": 0.4788365459132356, "grad_norm": 0.5520512461662292, "learning_rate": 4.7099666237811736e-05, "loss": 0.8751, "step": 7260 }, { "epoch": 0.4794961003841905, "grad_norm": 0.5244108438491821, "learning_rate": 4.708619722946746e-05, "loss": 0.8205, "step": 7270 }, { "epoch": 0.48015565485514533, "grad_norm": 0.4873800277709961, "learning_rate": 4.707269895388482e-05, "loss": 0.8659, "step": 7280 }, { "epoch": 0.4808152093261002, "grad_norm": 0.5746051669120789, "learning_rate": 4.705917142895087e-05, "loss": 0.8204, "step": 7290 }, { "epoch": 0.4814747637970551, "grad_norm": 0.506810188293457, "learning_rate": 4.7045614672591466e-05, "loss": 0.8652, "step": 7300 }, { "epoch": 0.48213431826800995, "grad_norm": 0.46993669867515564, "learning_rate": 4.703202870277113e-05, "loss": 0.861, "step": 7310 }, { "epoch": 0.48279387273896485, "grad_norm": 0.5876075625419617, "learning_rate": 4.701841353749316e-05, "loss": 0.8545, "step": 7320 }, { "epoch": 0.4834534272099197, "grad_norm": 0.5827206969261169, "learning_rate": 4.7004769194799495e-05, "loss": 0.8505, "step": 7330 }, { "epoch": 0.4841129816808746, "grad_norm": 0.5576984882354736, "learning_rate": 4.699109569277078e-05, "loss": 0.8291, "step": 7340 }, { "epoch": 0.4847725361518294, "grad_norm": 0.5203624367713928, "learning_rate": 4.697739304952627e-05, "loss": 0.8648, "step": 7350 }, { "epoch": 0.4854320906227843, "grad_norm": 0.5325854420661926, "learning_rate": 4.696366128322385e-05, "loss": 0.8756, "step": 7360 }, { "epoch": 0.4860916450937392, "grad_norm": 0.5148476958274841, "learning_rate": 4.6949900412059976e-05, "loss": 0.8653, "step": 7370 }, { "epoch": 0.48675119956469404, "grad_norm": 0.5403642654418945, "learning_rate": 4.6936110454269706e-05, "loss": 0.8666, "step": 7380 }, { "epoch": 0.48741075403564893, "grad_norm": 0.6091113090515137, "learning_rate": 4.692229142812662e-05, "loss": 0.8594, "step": 7390 }, { "epoch": 0.48807030850660377, "grad_norm": 0.5950354337692261, "learning_rate": 4.6908443351942824e-05, "loss": 0.8594, "step": 7400 }, { "epoch": 0.48872986297755866, "grad_norm": 0.5460037589073181, "learning_rate": 4.68945662440689e-05, "loss": 0.8146, "step": 7410 }, { "epoch": 0.48938941744851355, "grad_norm": 0.4589973986148834, "learning_rate": 4.688066012289394e-05, "loss": 0.8469, "step": 7420 }, { "epoch": 0.4900489719194684, "grad_norm": 0.5626265406608582, "learning_rate": 4.686672500684545e-05, "loss": 0.8655, "step": 7430 }, { "epoch": 0.4907085263904233, "grad_norm": 0.5438959002494812, "learning_rate": 4.685276091438937e-05, "loss": 0.8631, "step": 7440 }, { "epoch": 0.4913680808613781, "grad_norm": 0.5762490034103394, "learning_rate": 4.683876786403003e-05, "loss": 0.8649, "step": 7450 }, { "epoch": 0.492027635332333, "grad_norm": 0.5192339420318604, "learning_rate": 4.6824745874310155e-05, "loss": 0.8716, "step": 7460 }, { "epoch": 0.4926871898032879, "grad_norm": 0.6523929238319397, "learning_rate": 4.6810694963810784e-05, "loss": 0.8636, "step": 7470 }, { "epoch": 0.49334674427424274, "grad_norm": 0.4878346920013428, "learning_rate": 4.679661515115132e-05, "loss": 0.8706, "step": 7480 }, { "epoch": 0.49400629874519764, "grad_norm": 0.5933941602706909, "learning_rate": 4.678250645498943e-05, "loss": 0.842, "step": 7490 }, { "epoch": 0.4946658532161525, "grad_norm": 0.5156781673431396, "learning_rate": 4.6768368894021074e-05, "loss": 0.8545, "step": 7500 }, { "epoch": 0.49532540768710737, "grad_norm": 0.5238683223724365, "learning_rate": 4.675420248698045e-05, "loss": 0.8971, "step": 7510 }, { "epoch": 0.4959849621580622, "grad_norm": 0.5473213791847229, "learning_rate": 4.6740007252640006e-05, "loss": 0.8299, "step": 7520 }, { "epoch": 0.4966445166290171, "grad_norm": 0.5568445920944214, "learning_rate": 4.672578320981036e-05, "loss": 0.9001, "step": 7530 }, { "epoch": 0.497304071099972, "grad_norm": 0.5153902769088745, "learning_rate": 4.671153037734032e-05, "loss": 0.8358, "step": 7540 }, { "epoch": 0.4979636255709268, "grad_norm": 0.6011136174201965, "learning_rate": 4.669724877411685e-05, "loss": 0.8451, "step": 7550 }, { "epoch": 0.4986231800418817, "grad_norm": 0.5671474933624268, "learning_rate": 4.668293841906502e-05, "loss": 0.8882, "step": 7560 }, { "epoch": 0.49928273451283656, "grad_norm": 0.5087859034538269, "learning_rate": 4.6668599331148025e-05, "loss": 0.817, "step": 7570 }, { "epoch": 0.49994228898379145, "grad_norm": 0.5660974383354187, "learning_rate": 4.66542315293671e-05, "loss": 0.8382, "step": 7580 }, { "epoch": 0.5006018434547463, "grad_norm": 0.5032976269721985, "learning_rate": 4.663983503276158e-05, "loss": 0.8495, "step": 7590 }, { "epoch": 0.5012613979257012, "grad_norm": 0.6020365357398987, "learning_rate": 4.662540986040878e-05, "loss": 0.8475, "step": 7600 }, { "epoch": 0.501920952396656, "grad_norm": 0.5403103232383728, "learning_rate": 4.6610956031424026e-05, "loss": 0.8442, "step": 7610 }, { "epoch": 0.5025805068676109, "grad_norm": 0.5416961908340454, "learning_rate": 4.659647356496063e-05, "loss": 0.8514, "step": 7620 }, { "epoch": 0.5032400613385658, "grad_norm": 0.5349296927452087, "learning_rate": 4.658196248020984e-05, "loss": 0.8336, "step": 7630 }, { "epoch": 0.5038996158095207, "grad_norm": 0.5633635520935059, "learning_rate": 4.6567422796400846e-05, "loss": 0.8008, "step": 7640 }, { "epoch": 0.5045591702804756, "grad_norm": 0.5842314958572388, "learning_rate": 4.6552854532800705e-05, "loss": 0.8616, "step": 7650 }, { "epoch": 0.5052187247514304, "grad_norm": 0.5005068182945251, "learning_rate": 4.6538257708714375e-05, "loss": 0.8432, "step": 7660 }, { "epoch": 0.5058782792223853, "grad_norm": 0.5463336110115051, "learning_rate": 4.652363234348464e-05, "loss": 0.8573, "step": 7670 }, { "epoch": 0.5065378336933402, "grad_norm": 0.5448721647262573, "learning_rate": 4.6508978456492115e-05, "loss": 0.9031, "step": 7680 }, { "epoch": 0.507197388164295, "grad_norm": 0.5476658940315247, "learning_rate": 4.649429606715521e-05, "loss": 0.8108, "step": 7690 }, { "epoch": 0.5078569426352499, "grad_norm": 0.5261156558990479, "learning_rate": 4.6479585194930105e-05, "loss": 0.8532, "step": 7700 }, { "epoch": 0.5085164971062047, "grad_norm": 0.7172783613204956, "learning_rate": 4.646484585931072e-05, "loss": 0.8915, "step": 7710 }, { "epoch": 0.5091760515771596, "grad_norm": 0.4560132324695587, "learning_rate": 4.64500780798287e-05, "loss": 0.8142, "step": 7720 }, { "epoch": 0.5098356060481145, "grad_norm": 0.5214531421661377, "learning_rate": 4.643528187605336e-05, "loss": 0.8289, "step": 7730 }, { "epoch": 0.5104951605190694, "grad_norm": 0.5078032612800598, "learning_rate": 4.6420457267591724e-05, "loss": 0.8697, "step": 7740 }, { "epoch": 0.5111547149900242, "grad_norm": 0.5201875567436218, "learning_rate": 4.64056042740884e-05, "loss": 0.8752, "step": 7750 }, { "epoch": 0.5118142694609791, "grad_norm": 0.4340056777000427, "learning_rate": 4.639072291522567e-05, "loss": 0.8809, "step": 7760 }, { "epoch": 0.512473823931934, "grad_norm": 0.5306442379951477, "learning_rate": 4.637581321072335e-05, "loss": 0.832, "step": 7770 }, { "epoch": 0.5131333784028889, "grad_norm": 0.48031529784202576, "learning_rate": 4.636087518033886e-05, "loss": 0.8208, "step": 7780 }, { "epoch": 0.5137929328738438, "grad_norm": 0.4846910834312439, "learning_rate": 4.6345908843867126e-05, "loss": 0.9174, "step": 7790 }, { "epoch": 0.5144524873447985, "grad_norm": 0.5625187158584595, "learning_rate": 4.6330914221140596e-05, "loss": 0.8346, "step": 7800 }, { "epoch": 0.5151120418157534, "grad_norm": 0.6281886100769043, "learning_rate": 4.631589133202921e-05, "loss": 0.8391, "step": 7810 }, { "epoch": 0.5157715962867083, "grad_norm": 0.516608715057373, "learning_rate": 4.630084019644034e-05, "loss": 0.8417, "step": 7820 }, { "epoch": 0.5164311507576632, "grad_norm": 0.5382763147354126, "learning_rate": 4.6285760834318814e-05, "loss": 0.8164, "step": 7830 }, { "epoch": 0.5170907052286181, "grad_norm": 0.4136751592159271, "learning_rate": 4.6270653265646855e-05, "loss": 0.8465, "step": 7840 }, { "epoch": 0.5177502596995729, "grad_norm": 0.4957684874534607, "learning_rate": 4.6255517510444054e-05, "loss": 0.8992, "step": 7850 }, { "epoch": 0.5184098141705278, "grad_norm": 0.4853816628456116, "learning_rate": 4.624035358876736e-05, "loss": 0.8319, "step": 7860 }, { "epoch": 0.5190693686414827, "grad_norm": 0.5266784429550171, "learning_rate": 4.622516152071106e-05, "loss": 0.893, "step": 7870 }, { "epoch": 0.5197289231124376, "grad_norm": 0.5038085579872131, "learning_rate": 4.620994132640671e-05, "loss": 0.8838, "step": 7880 }, { "epoch": 0.5203884775833925, "grad_norm": 0.511411190032959, "learning_rate": 4.619469302602317e-05, "loss": 0.8379, "step": 7890 }, { "epoch": 0.5210480320543472, "grad_norm": 0.4921332001686096, "learning_rate": 4.6179416639766524e-05, "loss": 0.863, "step": 7900 }, { "epoch": 0.5217075865253021, "grad_norm": 0.5133247971534729, "learning_rate": 4.616411218788007e-05, "loss": 0.8444, "step": 7910 }, { "epoch": 0.522367140996257, "grad_norm": 0.4950665235519409, "learning_rate": 4.614877969064431e-05, "loss": 0.8385, "step": 7920 }, { "epoch": 0.5230266954672119, "grad_norm": 0.6108483076095581, "learning_rate": 4.613341916837689e-05, "loss": 0.8486, "step": 7930 }, { "epoch": 0.5236862499381668, "grad_norm": 0.5163877010345459, "learning_rate": 4.611803064143263e-05, "loss": 0.8779, "step": 7940 }, { "epoch": 0.5243458044091216, "grad_norm": 0.4994930624961853, "learning_rate": 4.6102614130203424e-05, "loss": 0.8292, "step": 7950 }, { "epoch": 0.5250053588800765, "grad_norm": 0.45950081944465637, "learning_rate": 4.608716965511827e-05, "loss": 0.8807, "step": 7960 }, { "epoch": 0.5256649133510314, "grad_norm": 0.5794354677200317, "learning_rate": 4.60716972366432e-05, "loss": 0.8288, "step": 7970 }, { "epoch": 0.5263244678219863, "grad_norm": 0.593536376953125, "learning_rate": 4.60561968952813e-05, "loss": 0.8667, "step": 7980 }, { "epoch": 0.5269840222929412, "grad_norm": 0.5221846103668213, "learning_rate": 4.604066865157265e-05, "loss": 0.8593, "step": 7990 }, { "epoch": 0.527643576763896, "grad_norm": 0.5120049118995667, "learning_rate": 4.602511252609429e-05, "loss": 0.8116, "step": 8000 }, { "epoch": 0.5283031312348508, "grad_norm": 0.594819962978363, "learning_rate": 4.600952853946023e-05, "loss": 0.8882, "step": 8010 }, { "epoch": 0.5289626857058057, "grad_norm": 0.4388793110847473, "learning_rate": 4.599391671232138e-05, "loss": 0.854, "step": 8020 }, { "epoch": 0.5296222401767606, "grad_norm": 0.49736639857292175, "learning_rate": 4.597827706536556e-05, "loss": 0.8745, "step": 8030 }, { "epoch": 0.5302817946477155, "grad_norm": 0.6666948795318604, "learning_rate": 4.5962609619317434e-05, "loss": 0.8671, "step": 8040 }, { "epoch": 0.5309413491186703, "grad_norm": 0.4953151047229767, "learning_rate": 4.5946914394938514e-05, "loss": 0.874, "step": 8050 }, { "epoch": 0.5316009035896252, "grad_norm": 0.6658579707145691, "learning_rate": 4.593119141302714e-05, "loss": 0.8242, "step": 8060 }, { "epoch": 0.5322604580605801, "grad_norm": 0.484624445438385, "learning_rate": 4.59154406944184e-05, "loss": 0.8286, "step": 8070 }, { "epoch": 0.532920012531535, "grad_norm": 0.4317564368247986, "learning_rate": 4.5899662259984166e-05, "loss": 0.8598, "step": 8080 }, { "epoch": 0.5335795670024898, "grad_norm": 0.6070392727851868, "learning_rate": 4.588385613063302e-05, "loss": 0.8192, "step": 8090 }, { "epoch": 0.5342391214734447, "grad_norm": 0.5171172022819519, "learning_rate": 4.5868022327310264e-05, "loss": 0.84, "step": 8100 }, { "epoch": 0.5348986759443995, "grad_norm": 0.482692688703537, "learning_rate": 4.585216087099783e-05, "loss": 0.8015, "step": 8110 }, { "epoch": 0.5355582304153544, "grad_norm": 0.5581316351890564, "learning_rate": 4.583627178271435e-05, "loss": 0.8459, "step": 8120 }, { "epoch": 0.5362177848863093, "grad_norm": 0.6078279614448547, "learning_rate": 4.582035508351502e-05, "loss": 0.8771, "step": 8130 }, { "epoch": 0.5368773393572641, "grad_norm": 0.5564948916435242, "learning_rate": 4.580441079449167e-05, "loss": 0.8413, "step": 8140 }, { "epoch": 0.537536893828219, "grad_norm": 0.5623395442962646, "learning_rate": 4.5788438936772647e-05, "loss": 0.8595, "step": 8150 }, { "epoch": 0.5381964482991739, "grad_norm": 0.4650050699710846, "learning_rate": 4.5772439531522874e-05, "loss": 0.8825, "step": 8160 }, { "epoch": 0.5388560027701288, "grad_norm": 0.4734998643398285, "learning_rate": 4.5756412599943734e-05, "loss": 0.8794, "step": 8170 }, { "epoch": 0.5395155572410837, "grad_norm": 0.48804497718811035, "learning_rate": 4.5740358163273114e-05, "loss": 0.8392, "step": 8180 }, { "epoch": 0.5401751117120385, "grad_norm": 0.518511176109314, "learning_rate": 4.572427624278536e-05, "loss": 0.8408, "step": 8190 }, { "epoch": 0.5408346661829934, "grad_norm": 0.4985922574996948, "learning_rate": 4.57081668597912e-05, "loss": 0.8528, "step": 8200 }, { "epoch": 0.5414942206539483, "grad_norm": 0.5364166498184204, "learning_rate": 4.569203003563778e-05, "loss": 0.8069, "step": 8210 }, { "epoch": 0.5421537751249031, "grad_norm": 0.5090025067329407, "learning_rate": 4.56758657917086e-05, "loss": 0.854, "step": 8220 }, { "epoch": 0.542813329595858, "grad_norm": 0.5399594306945801, "learning_rate": 4.565967414942351e-05, "loss": 0.8182, "step": 8230 }, { "epoch": 0.5434728840668128, "grad_norm": 0.48023220896720886, "learning_rate": 4.564345513023865e-05, "loss": 0.8335, "step": 8240 }, { "epoch": 0.5441324385377677, "grad_norm": 0.47295162081718445, "learning_rate": 4.562720875564644e-05, "loss": 0.8727, "step": 8250 }, { "epoch": 0.5447919930087226, "grad_norm": 0.46377071738243103, "learning_rate": 4.561093504717555e-05, "loss": 0.82, "step": 8260 }, { "epoch": 0.5454515474796775, "grad_norm": 0.5018013119697571, "learning_rate": 4.5594634026390896e-05, "loss": 0.8799, "step": 8270 }, { "epoch": 0.5461111019506324, "grad_norm": 0.5019428730010986, "learning_rate": 4.5578305714893535e-05, "loss": 0.8223, "step": 8280 }, { "epoch": 0.5467706564215872, "grad_norm": 0.5506460666656494, "learning_rate": 4.556195013432073e-05, "loss": 0.9144, "step": 8290 }, { "epoch": 0.5474302108925421, "grad_norm": 0.5080007910728455, "learning_rate": 4.5545567306345874e-05, "loss": 0.9037, "step": 8300 }, { "epoch": 0.548089765363497, "grad_norm": 0.47640281915664673, "learning_rate": 4.5529157252678446e-05, "loss": 0.8722, "step": 8310 }, { "epoch": 0.5487493198344519, "grad_norm": 0.5623273849487305, "learning_rate": 4.551271999506403e-05, "loss": 0.8497, "step": 8320 }, { "epoch": 0.5494088743054067, "grad_norm": 0.4991755187511444, "learning_rate": 4.5496255555284236e-05, "loss": 0.8393, "step": 8330 }, { "epoch": 0.5500684287763615, "grad_norm": 0.48409467935562134, "learning_rate": 4.5479763955156706e-05, "loss": 0.836, "step": 8340 }, { "epoch": 0.5507279832473164, "grad_norm": 0.5671091675758362, "learning_rate": 4.546324521653507e-05, "loss": 0.9025, "step": 8350 }, { "epoch": 0.5513875377182713, "grad_norm": 0.506266713142395, "learning_rate": 4.5446699361308914e-05, "loss": 0.8011, "step": 8360 }, { "epoch": 0.5520470921892262, "grad_norm": 0.5483802556991577, "learning_rate": 4.5430126411403776e-05, "loss": 0.851, "step": 8370 }, { "epoch": 0.5527066466601811, "grad_norm": 0.46331244707107544, "learning_rate": 4.541352638878108e-05, "loss": 0.8544, "step": 8380 }, { "epoch": 0.5533662011311359, "grad_norm": 0.5024694800376892, "learning_rate": 4.5396899315438135e-05, "loss": 0.8617, "step": 8390 }, { "epoch": 0.5540257556020908, "grad_norm": 0.4586072564125061, "learning_rate": 4.538024521340809e-05, "loss": 0.8427, "step": 8400 }, { "epoch": 0.5546853100730457, "grad_norm": 0.5485617518424988, "learning_rate": 4.5363564104759905e-05, "loss": 0.8684, "step": 8410 }, { "epoch": 0.5553448645440006, "grad_norm": 0.4568479359149933, "learning_rate": 4.5346856011598345e-05, "loss": 0.8274, "step": 8420 }, { "epoch": 0.5560044190149553, "grad_norm": 0.574080765247345, "learning_rate": 4.533012095606391e-05, "loss": 0.8643, "step": 8430 }, { "epoch": 0.5566639734859102, "grad_norm": 0.5243128538131714, "learning_rate": 4.5313358960332864e-05, "loss": 0.8536, "step": 8440 }, { "epoch": 0.5573235279568651, "grad_norm": 0.4808977544307709, "learning_rate": 4.5296570046617136e-05, "loss": 0.8712, "step": 8450 }, { "epoch": 0.55798308242782, "grad_norm": 0.5325448513031006, "learning_rate": 4.527975423716434e-05, "loss": 0.8249, "step": 8460 }, { "epoch": 0.5586426368987749, "grad_norm": 0.53151535987854, "learning_rate": 4.5262911554257723e-05, "loss": 0.8731, "step": 8470 }, { "epoch": 0.5593021913697297, "grad_norm": 0.48107099533081055, "learning_rate": 4.524604202021615e-05, "loss": 0.8429, "step": 8480 }, { "epoch": 0.5599617458406846, "grad_norm": 0.49375805258750916, "learning_rate": 4.5229145657394075e-05, "loss": 0.8207, "step": 8490 }, { "epoch": 0.5606213003116395, "grad_norm": 0.5281050205230713, "learning_rate": 4.521222248818148e-05, "loss": 0.8357, "step": 8500 }, { "epoch": 0.5612808547825944, "grad_norm": 0.5373192429542542, "learning_rate": 4.51952725350039e-05, "loss": 0.8558, "step": 8510 }, { "epoch": 0.5619404092535493, "grad_norm": 0.42790329456329346, "learning_rate": 4.5178295820322326e-05, "loss": 0.8332, "step": 8520 }, { "epoch": 0.562599963724504, "grad_norm": 0.5260501503944397, "learning_rate": 4.5161292366633246e-05, "loss": 0.853, "step": 8530 }, { "epoch": 0.5632595181954589, "grad_norm": 0.4377802312374115, "learning_rate": 4.514426219646857e-05, "loss": 0.8405, "step": 8540 }, { "epoch": 0.5639190726664138, "grad_norm": 0.5629652738571167, "learning_rate": 4.512720533239558e-05, "loss": 0.8894, "step": 8550 }, { "epoch": 0.5645786271373687, "grad_norm": 0.5291570425033569, "learning_rate": 4.5110121797017005e-05, "loss": 0.8372, "step": 8560 }, { "epoch": 0.5652381816083236, "grad_norm": 0.5068237781524658, "learning_rate": 4.509301161297083e-05, "loss": 0.8577, "step": 8570 }, { "epoch": 0.5658977360792784, "grad_norm": 0.4621034264564514, "learning_rate": 4.507587480293041e-05, "loss": 0.8374, "step": 8580 }, { "epoch": 0.5665572905502333, "grad_norm": 0.572524905204773, "learning_rate": 4.505871138960437e-05, "loss": 0.8846, "step": 8590 }, { "epoch": 0.5672168450211882, "grad_norm": 0.49350565671920776, "learning_rate": 4.504152139573659e-05, "loss": 0.8411, "step": 8600 }, { "epoch": 0.5678763994921431, "grad_norm": 0.4351065754890442, "learning_rate": 4.502430484410614e-05, "loss": 0.8809, "step": 8610 }, { "epoch": 0.568535953963098, "grad_norm": 0.5555775761604309, "learning_rate": 4.500706175752733e-05, "loss": 0.8555, "step": 8620 }, { "epoch": 0.5691955084340528, "grad_norm": 0.48449385166168213, "learning_rate": 4.498979215884963e-05, "loss": 0.8341, "step": 8630 }, { "epoch": 0.5698550629050076, "grad_norm": 0.5540316104888916, "learning_rate": 4.4972496070957596e-05, "loss": 0.8344, "step": 8640 }, { "epoch": 0.5705146173759625, "grad_norm": 0.6112542152404785, "learning_rate": 4.495517351677093e-05, "loss": 0.8555, "step": 8650 }, { "epoch": 0.5711741718469174, "grad_norm": 0.4980996549129486, "learning_rate": 4.4937824519244384e-05, "loss": 0.8395, "step": 8660 }, { "epoch": 0.5718337263178723, "grad_norm": 0.5588312149047852, "learning_rate": 4.4920449101367765e-05, "loss": 0.8709, "step": 8670 }, { "epoch": 0.5724932807888271, "grad_norm": 0.5733287930488586, "learning_rate": 4.4903047286165875e-05, "loss": 0.8522, "step": 8680 }, { "epoch": 0.573152835259782, "grad_norm": 0.5462709665298462, "learning_rate": 4.488561909669851e-05, "loss": 0.8962, "step": 8690 }, { "epoch": 0.5738123897307369, "grad_norm": 0.506364107131958, "learning_rate": 4.48681645560604e-05, "loss": 0.8295, "step": 8700 }, { "epoch": 0.5744719442016918, "grad_norm": 0.5398162603378296, "learning_rate": 4.485068368738122e-05, "loss": 0.8408, "step": 8710 }, { "epoch": 0.5751314986726467, "grad_norm": 0.5593112111091614, "learning_rate": 4.4833176513825504e-05, "loss": 0.8576, "step": 8720 }, { "epoch": 0.5757910531436015, "grad_norm": 0.5460782051086426, "learning_rate": 4.481564305859266e-05, "loss": 0.8577, "step": 8730 }, { "epoch": 0.5764506076145564, "grad_norm": 0.4949951767921448, "learning_rate": 4.479808334491692e-05, "loss": 0.8214, "step": 8740 }, { "epoch": 0.5771101620855112, "grad_norm": 0.5325927138328552, "learning_rate": 4.478049739606731e-05, "loss": 0.8378, "step": 8750 }, { "epoch": 0.5777697165564661, "grad_norm": 0.4853087067604065, "learning_rate": 4.476288523534762e-05, "loss": 0.8298, "step": 8760 }, { "epoch": 0.578429271027421, "grad_norm": 0.49878838658332825, "learning_rate": 4.4745246886096385e-05, "loss": 0.8152, "step": 8770 }, { "epoch": 0.5790888254983758, "grad_norm": 0.46912261843681335, "learning_rate": 4.4727582371686825e-05, "loss": 0.8127, "step": 8780 }, { "epoch": 0.5797483799693307, "grad_norm": 0.5477004647254944, "learning_rate": 4.470989171552685e-05, "loss": 0.8446, "step": 8790 }, { "epoch": 0.5804079344402856, "grad_norm": 0.5392802953720093, "learning_rate": 4.4692174941059006e-05, "loss": 0.8225, "step": 8800 }, { "epoch": 0.5810674889112405, "grad_norm": 0.5593754053115845, "learning_rate": 4.467443207176044e-05, "loss": 0.8241, "step": 8810 }, { "epoch": 0.5817270433821953, "grad_norm": 0.5052483677864075, "learning_rate": 4.4656663131142894e-05, "loss": 0.8303, "step": 8820 }, { "epoch": 0.5823865978531502, "grad_norm": 0.5355677604675293, "learning_rate": 4.463886814275264e-05, "loss": 0.8278, "step": 8830 }, { "epoch": 0.5830461523241051, "grad_norm": 0.4736327826976776, "learning_rate": 4.462104713017048e-05, "loss": 0.8249, "step": 8840 }, { "epoch": 0.58370570679506, "grad_norm": 0.5423553586006165, "learning_rate": 4.460320011701169e-05, "loss": 0.8718, "step": 8850 }, { "epoch": 0.5843652612660148, "grad_norm": 0.5245490074157715, "learning_rate": 4.458532712692603e-05, "loss": 0.8582, "step": 8860 }, { "epoch": 0.5850248157369696, "grad_norm": 0.5676358342170715, "learning_rate": 4.456742818359765e-05, "loss": 0.9181, "step": 8870 }, { "epoch": 0.5856843702079245, "grad_norm": 0.5473547577857971, "learning_rate": 4.45495033107451e-05, "loss": 0.8865, "step": 8880 }, { "epoch": 0.5863439246788794, "grad_norm": 0.6121447086334229, "learning_rate": 4.453155253212129e-05, "loss": 0.8642, "step": 8890 }, { "epoch": 0.5870034791498343, "grad_norm": 0.4944417178630829, "learning_rate": 4.451357587151347e-05, "loss": 0.8767, "step": 8900 }, { "epoch": 0.5876630336207892, "grad_norm": 0.5933345556259155, "learning_rate": 4.449557335274318e-05, "loss": 0.875, "step": 8910 }, { "epoch": 0.588322588091744, "grad_norm": 0.5733058452606201, "learning_rate": 4.4477544999666214e-05, "loss": 0.8545, "step": 8920 }, { "epoch": 0.5889821425626989, "grad_norm": 0.5385257005691528, "learning_rate": 4.4459490836172626e-05, "loss": 0.8053, "step": 8930 }, { "epoch": 0.5896416970336538, "grad_norm": 0.5365327596664429, "learning_rate": 4.4441410886186644e-05, "loss": 0.8491, "step": 8940 }, { "epoch": 0.5903012515046087, "grad_norm": 0.5605241656303406, "learning_rate": 4.442330517366668e-05, "loss": 0.8735, "step": 8950 }, { "epoch": 0.5909608059755636, "grad_norm": 0.4860929548740387, "learning_rate": 4.44051737226053e-05, "loss": 0.8801, "step": 8960 }, { "epoch": 0.5916203604465183, "grad_norm": 0.5219595432281494, "learning_rate": 4.438701655702914e-05, "loss": 0.8252, "step": 8970 }, { "epoch": 0.5922799149174732, "grad_norm": 0.49364590644836426, "learning_rate": 4.436883370099895e-05, "loss": 0.8633, "step": 8980 }, { "epoch": 0.5929394693884281, "grad_norm": 0.5454347729682922, "learning_rate": 4.43506251786095e-05, "loss": 0.9105, "step": 8990 }, { "epoch": 0.593599023859383, "grad_norm": 0.5115081667900085, "learning_rate": 4.4332391013989574e-05, "loss": 0.808, "step": 9000 }, { "epoch": 0.5942585783303379, "grad_norm": 0.5812116861343384, "learning_rate": 4.4314131231301944e-05, "loss": 0.8058, "step": 9010 }, { "epoch": 0.5949181328012927, "grad_norm": 0.5520011186599731, "learning_rate": 4.4295845854743326e-05, "loss": 0.854, "step": 9020 }, { "epoch": 0.5955776872722476, "grad_norm": 0.5113941431045532, "learning_rate": 4.4277534908544335e-05, "loss": 0.8387, "step": 9030 }, { "epoch": 0.5962372417432025, "grad_norm": 0.5096448659896851, "learning_rate": 4.425919841696952e-05, "loss": 0.8877, "step": 9040 }, { "epoch": 0.5968967962141574, "grad_norm": 0.5333675146102905, "learning_rate": 4.42408364043172e-05, "loss": 0.8456, "step": 9050 }, { "epoch": 0.5975563506851123, "grad_norm": 0.5552167296409607, "learning_rate": 4.42224488949196e-05, "loss": 0.8681, "step": 9060 }, { "epoch": 0.598215905156067, "grad_norm": 0.45667341351509094, "learning_rate": 4.420403591314266e-05, "loss": 0.8792, "step": 9070 }, { "epoch": 0.5988754596270219, "grad_norm": 0.6120560169219971, "learning_rate": 4.418559748338611e-05, "loss": 0.8355, "step": 9080 }, { "epoch": 0.5995350140979768, "grad_norm": 0.4900742471218109, "learning_rate": 4.416713363008341e-05, "loss": 0.8652, "step": 9090 }, { "epoch": 0.6001945685689317, "grad_norm": 0.5414189696311951, "learning_rate": 4.414864437770169e-05, "loss": 0.8779, "step": 9100 }, { "epoch": 0.6008541230398866, "grad_norm": 0.5061354637145996, "learning_rate": 4.413012975074174e-05, "loss": 0.8185, "step": 9110 }, { "epoch": 0.6015136775108414, "grad_norm": 0.5018121600151062, "learning_rate": 4.4111589773737976e-05, "loss": 0.8634, "step": 9120 }, { "epoch": 0.6021732319817963, "grad_norm": 0.5141509175300598, "learning_rate": 4.4093024471258414e-05, "loss": 0.8258, "step": 9130 }, { "epoch": 0.6028327864527512, "grad_norm": 0.5262400507926941, "learning_rate": 4.407443386790462e-05, "loss": 0.8597, "step": 9140 }, { "epoch": 0.6034923409237061, "grad_norm": 0.5369293093681335, "learning_rate": 4.40558179883117e-05, "loss": 0.9019, "step": 9150 }, { "epoch": 0.6041518953946609, "grad_norm": 0.5289623141288757, "learning_rate": 4.4037176857148235e-05, "loss": 0.8262, "step": 9160 }, { "epoch": 0.6048114498656157, "grad_norm": 0.5164844393730164, "learning_rate": 4.401851049911629e-05, "loss": 0.8415, "step": 9170 }, { "epoch": 0.6054710043365706, "grad_norm": 0.47341203689575195, "learning_rate": 4.399981893895133e-05, "loss": 0.8433, "step": 9180 }, { "epoch": 0.6061305588075255, "grad_norm": 0.4960174560546875, "learning_rate": 4.398110220142225e-05, "loss": 0.8375, "step": 9190 }, { "epoch": 0.6067901132784804, "grad_norm": 0.5548086762428284, "learning_rate": 4.39623603113313e-05, "loss": 0.8448, "step": 9200 }, { "epoch": 0.6074496677494352, "grad_norm": 0.5246376991271973, "learning_rate": 4.3943593293514024e-05, "loss": 0.865, "step": 9210 }, { "epoch": 0.6081092222203901, "grad_norm": 0.5469093918800354, "learning_rate": 4.3924801172839315e-05, "loss": 0.8506, "step": 9220 }, { "epoch": 0.608768776691345, "grad_norm": 0.4608471393585205, "learning_rate": 4.390598397420931e-05, "loss": 0.864, "step": 9230 }, { "epoch": 0.6094283311622999, "grad_norm": 0.5221256017684937, "learning_rate": 4.3887141722559356e-05, "loss": 0.8849, "step": 9240 }, { "epoch": 0.6100878856332548, "grad_norm": 0.49641260504722595, "learning_rate": 4.3868274442858034e-05, "loss": 0.8584, "step": 9250 }, { "epoch": 0.6107474401042096, "grad_norm": 0.5827483534812927, "learning_rate": 4.384938216010708e-05, "loss": 0.8585, "step": 9260 }, { "epoch": 0.6114069945751645, "grad_norm": 0.48872390389442444, "learning_rate": 4.383046489934135e-05, "loss": 0.8287, "step": 9270 }, { "epoch": 0.6120665490461193, "grad_norm": 0.6012468934059143, "learning_rate": 4.3811522685628806e-05, "loss": 0.8265, "step": 9280 }, { "epoch": 0.6127261035170742, "grad_norm": 0.48777174949645996, "learning_rate": 4.379255554407048e-05, "loss": 0.8712, "step": 9290 }, { "epoch": 0.6133856579880291, "grad_norm": 0.5294257998466492, "learning_rate": 4.377356349980044e-05, "loss": 0.8713, "step": 9300 }, { "epoch": 0.6140452124589839, "grad_norm": 0.534724771976471, "learning_rate": 4.3754546577985734e-05, "loss": 0.8705, "step": 9310 }, { "epoch": 0.6147047669299388, "grad_norm": 0.49359211325645447, "learning_rate": 4.37355048038264e-05, "loss": 0.8156, "step": 9320 }, { "epoch": 0.6153643214008937, "grad_norm": 0.48252472281455994, "learning_rate": 4.37164382025554e-05, "loss": 0.8668, "step": 9330 }, { "epoch": 0.6160238758718486, "grad_norm": 0.5378819704055786, "learning_rate": 4.3697346799438584e-05, "loss": 0.8605, "step": 9340 }, { "epoch": 0.6166834303428035, "grad_norm": 0.46722471714019775, "learning_rate": 4.3678230619774685e-05, "loss": 0.8482, "step": 9350 }, { "epoch": 0.6173429848137583, "grad_norm": 0.5129494667053223, "learning_rate": 4.3659089688895264e-05, "loss": 0.8591, "step": 9360 }, { "epoch": 0.6180025392847132, "grad_norm": 0.5393134951591492, "learning_rate": 4.363992403216467e-05, "loss": 0.8435, "step": 9370 }, { "epoch": 0.618662093755668, "grad_norm": 0.5477026104927063, "learning_rate": 4.362073367498002e-05, "loss": 0.8633, "step": 9380 }, { "epoch": 0.619321648226623, "grad_norm": 0.49567267298698425, "learning_rate": 4.360151864277119e-05, "loss": 0.8314, "step": 9390 }, { "epoch": 0.6199812026975778, "grad_norm": 0.5262508392333984, "learning_rate": 4.358227896100071e-05, "loss": 0.8515, "step": 9400 }, { "epoch": 0.6206407571685326, "grad_norm": 0.4469965100288391, "learning_rate": 4.3563014655163804e-05, "loss": 0.8674, "step": 9410 }, { "epoch": 0.6213003116394875, "grad_norm": 0.5764192938804626, "learning_rate": 4.354372575078831e-05, "loss": 0.875, "step": 9420 }, { "epoch": 0.6219598661104424, "grad_norm": 0.5805740356445312, "learning_rate": 4.3524412273434676e-05, "loss": 0.8142, "step": 9430 }, { "epoch": 0.6226194205813973, "grad_norm": 0.5671592950820923, "learning_rate": 4.350507424869591e-05, "loss": 0.8581, "step": 9440 }, { "epoch": 0.6232789750523522, "grad_norm": 0.4964553713798523, "learning_rate": 4.348571170219754e-05, "loss": 0.8241, "step": 9450 }, { "epoch": 0.623938529523307, "grad_norm": 0.5607429146766663, "learning_rate": 4.34663246595976e-05, "loss": 0.8813, "step": 9460 }, { "epoch": 0.6245980839942619, "grad_norm": 0.4957119822502136, "learning_rate": 4.344691314658657e-05, "loss": 0.8554, "step": 9470 }, { "epoch": 0.6252576384652168, "grad_norm": 0.5064859986305237, "learning_rate": 4.3427477188887366e-05, "loss": 0.8196, "step": 9480 }, { "epoch": 0.6259171929361717, "grad_norm": 0.5270329713821411, "learning_rate": 4.3408016812255305e-05, "loss": 0.8072, "step": 9490 }, { "epoch": 0.6265767474071264, "grad_norm": 0.6006602048873901, "learning_rate": 4.3388532042478046e-05, "loss": 0.8736, "step": 9500 }, { "epoch": 0.6272363018780813, "grad_norm": 0.5389840006828308, "learning_rate": 4.336902290537558e-05, "loss": 0.879, "step": 9510 }, { "epoch": 0.6278958563490362, "grad_norm": 0.551805853843689, "learning_rate": 4.334948942680018e-05, "loss": 0.8526, "step": 9520 }, { "epoch": 0.6285554108199911, "grad_norm": 0.512858510017395, "learning_rate": 4.33299316326364e-05, "loss": 0.8474, "step": 9530 }, { "epoch": 0.629214965290946, "grad_norm": 0.4484001100063324, "learning_rate": 4.331034954880098e-05, "loss": 0.8638, "step": 9540 }, { "epoch": 0.6298745197619008, "grad_norm": 0.47472772002220154, "learning_rate": 4.329074320124288e-05, "loss": 0.8579, "step": 9550 }, { "epoch": 0.6305340742328557, "grad_norm": 0.4683813750743866, "learning_rate": 4.327111261594318e-05, "loss": 0.845, "step": 9560 }, { "epoch": 0.6311936287038106, "grad_norm": 0.5788334012031555, "learning_rate": 4.325145781891511e-05, "loss": 0.8298, "step": 9570 }, { "epoch": 0.6318531831747655, "grad_norm": 0.5225703716278076, "learning_rate": 4.323177883620397e-05, "loss": 0.8652, "step": 9580 }, { "epoch": 0.6325127376457204, "grad_norm": 0.5140812397003174, "learning_rate": 4.321207569388711e-05, "loss": 0.8526, "step": 9590 }, { "epoch": 0.6331722921166751, "grad_norm": 0.5085943937301636, "learning_rate": 4.319234841807388e-05, "loss": 0.8725, "step": 9600 }, { "epoch": 0.63383184658763, "grad_norm": 0.5138686299324036, "learning_rate": 4.317259703490564e-05, "loss": 0.8721, "step": 9610 }, { "epoch": 0.6344914010585849, "grad_norm": 0.5103156566619873, "learning_rate": 4.315282157055567e-05, "loss": 0.8488, "step": 9620 }, { "epoch": 0.6351509555295398, "grad_norm": 0.4980023205280304, "learning_rate": 4.313302205122918e-05, "loss": 0.8676, "step": 9630 }, { "epoch": 0.6358105100004947, "grad_norm": 0.5121146440505981, "learning_rate": 4.311319850316323e-05, "loss": 0.8798, "step": 9640 }, { "epoch": 0.6364700644714495, "grad_norm": 0.4848631024360657, "learning_rate": 4.309335095262676e-05, "loss": 0.8552, "step": 9650 }, { "epoch": 0.6371296189424044, "grad_norm": 0.5741939544677734, "learning_rate": 4.307347942592047e-05, "loss": 0.8477, "step": 9660 }, { "epoch": 0.6377891734133593, "grad_norm": 0.62933349609375, "learning_rate": 4.3053583949376886e-05, "loss": 0.8501, "step": 9670 }, { "epoch": 0.6384487278843142, "grad_norm": 0.5315343737602234, "learning_rate": 4.3033664549360215e-05, "loss": 0.8368, "step": 9680 }, { "epoch": 0.6391082823552691, "grad_norm": 0.5381109714508057, "learning_rate": 4.3013721252266404e-05, "loss": 0.8963, "step": 9690 }, { "epoch": 0.6397678368262238, "grad_norm": 0.45453372597694397, "learning_rate": 4.2993754084523055e-05, "loss": 0.8361, "step": 9700 }, { "epoch": 0.6404273912971787, "grad_norm": 0.5069344639778137, "learning_rate": 4.29737630725894e-05, "loss": 0.8874, "step": 9710 }, { "epoch": 0.6410869457681336, "grad_norm": 0.45548906922340393, "learning_rate": 4.295374824295627e-05, "loss": 0.8352, "step": 9720 }, { "epoch": 0.6417465002390885, "grad_norm": 0.5694235563278198, "learning_rate": 4.293370962214605e-05, "loss": 0.7936, "step": 9730 }, { "epoch": 0.6424060547100434, "grad_norm": 0.5499682426452637, "learning_rate": 4.291364723671267e-05, "loss": 0.8706, "step": 9740 }, { "epoch": 0.6430656091809982, "grad_norm": 0.5891594290733337, "learning_rate": 4.289356111324154e-05, "loss": 0.8444, "step": 9750 }, { "epoch": 0.6437251636519531, "grad_norm": 0.5436473488807678, "learning_rate": 4.287345127834952e-05, "loss": 0.8477, "step": 9760 }, { "epoch": 0.644384718122908, "grad_norm": 0.5410086512565613, "learning_rate": 4.285331775868491e-05, "loss": 0.8566, "step": 9770 }, { "epoch": 0.6450442725938629, "grad_norm": 0.5957720875740051, "learning_rate": 4.2833160580927354e-05, "loss": 0.8366, "step": 9780 }, { "epoch": 0.6457038270648178, "grad_norm": 0.5900678634643555, "learning_rate": 4.281297977178791e-05, "loss": 0.8485, "step": 9790 }, { "epoch": 0.6463633815357726, "grad_norm": 0.46779167652130127, "learning_rate": 4.27927753580089e-05, "loss": 0.8603, "step": 9800 }, { "epoch": 0.6470229360067274, "grad_norm": 0.5066987872123718, "learning_rate": 4.2772547366363935e-05, "loss": 0.811, "step": 9810 }, { "epoch": 0.6476824904776823, "grad_norm": 0.4854544699192047, "learning_rate": 4.275229582365789e-05, "loss": 0.8084, "step": 9820 }, { "epoch": 0.6483420449486372, "grad_norm": 0.5501772165298462, "learning_rate": 4.273202075672682e-05, "loss": 0.8513, "step": 9830 }, { "epoch": 0.649001599419592, "grad_norm": 0.5020511150360107, "learning_rate": 4.271172219243798e-05, "loss": 0.8533, "step": 9840 }, { "epoch": 0.6496611538905469, "grad_norm": 0.5452893972396851, "learning_rate": 4.269140015768974e-05, "loss": 0.872, "step": 9850 }, { "epoch": 0.6503207083615018, "grad_norm": 0.5562975406646729, "learning_rate": 4.2671054679411605e-05, "loss": 0.8741, "step": 9860 }, { "epoch": 0.6509802628324567, "grad_norm": 0.5018196105957031, "learning_rate": 4.2650685784564085e-05, "loss": 0.8489, "step": 9870 }, { "epoch": 0.6516398173034116, "grad_norm": 0.5853243470191956, "learning_rate": 4.263029350013878e-05, "loss": 0.8669, "step": 9880 }, { "epoch": 0.6522993717743664, "grad_norm": 0.5556926131248474, "learning_rate": 4.260987785315826e-05, "loss": 0.8974, "step": 9890 }, { "epoch": 0.6529589262453213, "grad_norm": 0.4784821569919586, "learning_rate": 4.258943887067605e-05, "loss": 0.8753, "step": 9900 }, { "epoch": 0.6536184807162762, "grad_norm": 0.4800739884376526, "learning_rate": 4.256897657977661e-05, "loss": 0.8215, "step": 9910 }, { "epoch": 0.654278035187231, "grad_norm": 0.5254513621330261, "learning_rate": 4.254849100757527e-05, "loss": 0.8362, "step": 9920 }, { "epoch": 0.6549375896581859, "grad_norm": 0.5375306010246277, "learning_rate": 4.252798218121823e-05, "loss": 0.8502, "step": 9930 }, { "epoch": 0.6555971441291407, "grad_norm": 0.5457652807235718, "learning_rate": 4.2507450127882496e-05, "loss": 0.8469, "step": 9940 }, { "epoch": 0.6562566986000956, "grad_norm": 0.49668949842453003, "learning_rate": 4.248689487477584e-05, "loss": 0.8365, "step": 9950 }, { "epoch": 0.6569162530710505, "grad_norm": 0.4837133586406708, "learning_rate": 4.246631644913681e-05, "loss": 0.876, "step": 9960 }, { "epoch": 0.6575758075420054, "grad_norm": 0.5117802023887634, "learning_rate": 4.244571487823462e-05, "loss": 0.8463, "step": 9970 }, { "epoch": 0.6582353620129603, "grad_norm": 0.4953489601612091, "learning_rate": 4.242509018936919e-05, "loss": 0.8449, "step": 9980 }, { "epoch": 0.6588949164839151, "grad_norm": 0.48785701394081116, "learning_rate": 4.2404442409871056e-05, "loss": 0.8167, "step": 9990 }, { "epoch": 0.65955447095487, "grad_norm": 0.48401758074760437, "learning_rate": 4.238377156710135e-05, "loss": 0.8466, "step": 10000 }, { "epoch": 0.6602140254258249, "grad_norm": 0.5855180621147156, "learning_rate": 4.236307768845177e-05, "loss": 0.8657, "step": 10010 }, { "epoch": 0.6608735798967798, "grad_norm": 0.510765790939331, "learning_rate": 4.234236080134455e-05, "loss": 0.8918, "step": 10020 }, { "epoch": 0.6615331343677346, "grad_norm": 0.569969654083252, "learning_rate": 4.232162093323241e-05, "loss": 0.8425, "step": 10030 }, { "epoch": 0.6621926888386894, "grad_norm": 0.4895627498626709, "learning_rate": 4.23008581115985e-05, "loss": 0.8155, "step": 10040 }, { "epoch": 0.6628522433096443, "grad_norm": 0.5220820903778076, "learning_rate": 4.2280072363956415e-05, "loss": 0.8213, "step": 10050 }, { "epoch": 0.6635117977805992, "grad_norm": 0.5542130470275879, "learning_rate": 4.225926371785012e-05, "loss": 0.828, "step": 10060 }, { "epoch": 0.6641713522515541, "grad_norm": 0.5008354187011719, "learning_rate": 4.2238432200853914e-05, "loss": 0.8678, "step": 10070 }, { "epoch": 0.664830906722509, "grad_norm": 0.504158079624176, "learning_rate": 4.221757784057243e-05, "loss": 0.8491, "step": 10080 }, { "epoch": 0.6654904611934638, "grad_norm": 0.5875375270843506, "learning_rate": 4.219670066464053e-05, "loss": 0.8821, "step": 10090 }, { "epoch": 0.6661500156644187, "grad_norm": 0.5509904026985168, "learning_rate": 4.217580070072336e-05, "loss": 0.8331, "step": 10100 }, { "epoch": 0.6668095701353736, "grad_norm": 0.4190371632575989, "learning_rate": 4.215487797651621e-05, "loss": 0.8379, "step": 10110 }, { "epoch": 0.6674691246063285, "grad_norm": 0.49308207631111145, "learning_rate": 4.213393251974458e-05, "loss": 0.8703, "step": 10120 }, { "epoch": 0.6681286790772833, "grad_norm": 0.47358161211013794, "learning_rate": 4.2112964358164056e-05, "loss": 0.8307, "step": 10130 }, { "epoch": 0.6687882335482381, "grad_norm": 0.4836333692073822, "learning_rate": 4.209197351956032e-05, "loss": 0.8192, "step": 10140 }, { "epoch": 0.669447788019193, "grad_norm": 0.46903476119041443, "learning_rate": 4.207096003174914e-05, "loss": 0.871, "step": 10150 }, { "epoch": 0.6701073424901479, "grad_norm": 0.45738083124160767, "learning_rate": 4.204992392257624e-05, "loss": 0.8276, "step": 10160 }, { "epoch": 0.6707668969611028, "grad_norm": 0.5130215287208557, "learning_rate": 4.202886521991735e-05, "loss": 0.8392, "step": 10170 }, { "epoch": 0.6714264514320576, "grad_norm": 0.4978039860725403, "learning_rate": 4.200778395167815e-05, "loss": 0.8435, "step": 10180 }, { "epoch": 0.6720860059030125, "grad_norm": 0.4951028823852539, "learning_rate": 4.19866801457942e-05, "loss": 0.8354, "step": 10190 }, { "epoch": 0.6727455603739674, "grad_norm": 0.5361219644546509, "learning_rate": 4.196555383023094e-05, "loss": 0.8466, "step": 10200 }, { "epoch": 0.6734051148449223, "grad_norm": 0.5519912838935852, "learning_rate": 4.194440503298363e-05, "loss": 0.8152, "step": 10210 }, { "epoch": 0.6740646693158772, "grad_norm": 0.5510390996932983, "learning_rate": 4.1923233782077334e-05, "loss": 0.816, "step": 10220 }, { "epoch": 0.674724223786832, "grad_norm": 0.5109158754348755, "learning_rate": 4.1902040105566844e-05, "loss": 0.873, "step": 10230 }, { "epoch": 0.6753837782577868, "grad_norm": 0.47579264640808105, "learning_rate": 4.188082403153669e-05, "loss": 0.8319, "step": 10240 }, { "epoch": 0.6760433327287417, "grad_norm": 0.5012093186378479, "learning_rate": 4.1859585588101095e-05, "loss": 0.8384, "step": 10250 }, { "epoch": 0.6767028871996966, "grad_norm": 0.5368977189064026, "learning_rate": 4.1838324803403894e-05, "loss": 0.8212, "step": 10260 }, { "epoch": 0.6773624416706515, "grad_norm": 0.5367230772972107, "learning_rate": 4.181704170561854e-05, "loss": 0.8183, "step": 10270 }, { "epoch": 0.6780219961416063, "grad_norm": 0.606758713722229, "learning_rate": 4.179573632294806e-05, "loss": 0.8444, "step": 10280 }, { "epoch": 0.6786815506125612, "grad_norm": 0.5097817182540894, "learning_rate": 4.1774408683625e-05, "loss": 0.8486, "step": 10290 }, { "epoch": 0.6793411050835161, "grad_norm": 0.4653185307979584, "learning_rate": 4.175305881591141e-05, "loss": 0.8338, "step": 10300 }, { "epoch": 0.680000659554471, "grad_norm": 0.46998143196105957, "learning_rate": 4.1731686748098795e-05, "loss": 0.8318, "step": 10310 }, { "epoch": 0.6806602140254259, "grad_norm": 0.5375470519065857, "learning_rate": 4.1710292508508055e-05, "loss": 0.8311, "step": 10320 }, { "epoch": 0.6813197684963807, "grad_norm": 0.4817380905151367, "learning_rate": 4.168887612548951e-05, "loss": 0.867, "step": 10330 }, { "epoch": 0.6819793229673355, "grad_norm": 0.4903552234172821, "learning_rate": 4.16674376274228e-05, "loss": 0.8758, "step": 10340 }, { "epoch": 0.6826388774382904, "grad_norm": 0.5465636253356934, "learning_rate": 4.164597704271686e-05, "loss": 0.8036, "step": 10350 }, { "epoch": 0.6832984319092453, "grad_norm": 0.5072474479675293, "learning_rate": 4.1624494399809923e-05, "loss": 0.7913, "step": 10360 }, { "epoch": 0.6839579863802002, "grad_norm": 0.5057288408279419, "learning_rate": 4.1602989727169414e-05, "loss": 0.8626, "step": 10370 }, { "epoch": 0.684617540851155, "grad_norm": 0.4934908151626587, "learning_rate": 4.1581463053292e-05, "loss": 0.8859, "step": 10380 }, { "epoch": 0.6852770953221099, "grad_norm": 0.45849695801734924, "learning_rate": 4.1559914406703446e-05, "loss": 0.7938, "step": 10390 }, { "epoch": 0.6859366497930648, "grad_norm": 0.5369190573692322, "learning_rate": 4.153834381595869e-05, "loss": 0.8507, "step": 10400 }, { "epoch": 0.6865962042640197, "grad_norm": 0.48543307185173035, "learning_rate": 4.151675130964169e-05, "loss": 0.8423, "step": 10410 }, { "epoch": 0.6872557587349746, "grad_norm": 0.5510615706443787, "learning_rate": 4.149513691636551e-05, "loss": 0.8856, "step": 10420 }, { "epoch": 0.6879153132059294, "grad_norm": 0.495917946100235, "learning_rate": 4.147350066477215e-05, "loss": 0.8118, "step": 10430 }, { "epoch": 0.6885748676768843, "grad_norm": 0.5462771058082581, "learning_rate": 4.145184258353263e-05, "loss": 0.8556, "step": 10440 }, { "epoch": 0.6892344221478391, "grad_norm": 0.5103912949562073, "learning_rate": 4.143016270134686e-05, "loss": 0.8913, "step": 10450 }, { "epoch": 0.689893976618794, "grad_norm": 0.5578963756561279, "learning_rate": 4.140846104694368e-05, "loss": 0.8662, "step": 10460 }, { "epoch": 0.6905535310897489, "grad_norm": 0.51603102684021, "learning_rate": 4.138673764908074e-05, "loss": 0.8427, "step": 10470 }, { "epoch": 0.6912130855607037, "grad_norm": 0.5023142695426941, "learning_rate": 4.136499253654452e-05, "loss": 0.8672, "step": 10480 }, { "epoch": 0.6918726400316586, "grad_norm": 0.49633529782295227, "learning_rate": 4.134322573815028e-05, "loss": 0.8462, "step": 10490 }, { "epoch": 0.6925321945026135, "grad_norm": 0.48940548300743103, "learning_rate": 4.1321437282742e-05, "loss": 0.835, "step": 10500 }, { "epoch": 0.6931917489735684, "grad_norm": 0.481265664100647, "learning_rate": 4.129962719919238e-05, "loss": 0.8219, "step": 10510 }, { "epoch": 0.6938513034445232, "grad_norm": 0.5408857464790344, "learning_rate": 4.127779551640276e-05, "loss": 0.7663, "step": 10520 }, { "epoch": 0.6945108579154781, "grad_norm": 0.5185447931289673, "learning_rate": 4.1255942263303127e-05, "loss": 0.8399, "step": 10530 }, { "epoch": 0.695170412386433, "grad_norm": 0.46000975370407104, "learning_rate": 4.1234067468852023e-05, "loss": 0.8613, "step": 10540 }, { "epoch": 0.6958299668573878, "grad_norm": 0.47393959760665894, "learning_rate": 4.121217116203656e-05, "loss": 0.8552, "step": 10550 }, { "epoch": 0.6964895213283427, "grad_norm": 0.5146589875221252, "learning_rate": 4.1190253371872335e-05, "loss": 0.8211, "step": 10560 }, { "epoch": 0.6971490757992975, "grad_norm": 0.530214250087738, "learning_rate": 4.116831412740342e-05, "loss": 0.8309, "step": 10570 }, { "epoch": 0.6978086302702524, "grad_norm": 0.4867837131023407, "learning_rate": 4.114635345770234e-05, "loss": 0.8603, "step": 10580 }, { "epoch": 0.6984681847412073, "grad_norm": 0.4901922345161438, "learning_rate": 4.112437139186998e-05, "loss": 0.839, "step": 10590 }, { "epoch": 0.6991277392121622, "grad_norm": 0.5245567560195923, "learning_rate": 4.110236795903559e-05, "loss": 0.809, "step": 10600 }, { "epoch": 0.6997872936831171, "grad_norm": 0.5026851892471313, "learning_rate": 4.1080343188356734e-05, "loss": 0.8655, "step": 10610 }, { "epoch": 0.7004468481540719, "grad_norm": 0.4868167042732239, "learning_rate": 4.105829710901926e-05, "loss": 0.8839, "step": 10620 }, { "epoch": 0.7011064026250268, "grad_norm": 0.5156816840171814, "learning_rate": 4.1036229750237225e-05, "loss": 0.8233, "step": 10630 }, { "epoch": 0.7017659570959817, "grad_norm": 0.5261308550834656, "learning_rate": 4.1014141141252925e-05, "loss": 0.8245, "step": 10640 }, { "epoch": 0.7024255115669366, "grad_norm": 0.484281450510025, "learning_rate": 4.099203131133678e-05, "loss": 0.8896, "step": 10650 }, { "epoch": 0.7030850660378914, "grad_norm": 0.5240840315818787, "learning_rate": 4.0969900289787355e-05, "loss": 0.8351, "step": 10660 }, { "epoch": 0.7037446205088462, "grad_norm": 0.5067978501319885, "learning_rate": 4.094774810593128e-05, "loss": 0.8063, "step": 10670 }, { "epoch": 0.7044041749798011, "grad_norm": 0.4869655668735504, "learning_rate": 4.0925574789123245e-05, "loss": 0.8333, "step": 10680 }, { "epoch": 0.705063729450756, "grad_norm": 0.4879327118396759, "learning_rate": 4.090338036874592e-05, "loss": 0.8978, "step": 10690 }, { "epoch": 0.7057232839217109, "grad_norm": 0.503984808921814, "learning_rate": 4.088116487420995e-05, "loss": 0.8225, "step": 10700 }, { "epoch": 0.7063828383926658, "grad_norm": 0.5124876499176025, "learning_rate": 4.085892833495394e-05, "loss": 0.8247, "step": 10710 }, { "epoch": 0.7070423928636206, "grad_norm": 0.471996933221817, "learning_rate": 4.083667078044432e-05, "loss": 0.8366, "step": 10720 }, { "epoch": 0.7077019473345755, "grad_norm": 0.495221346616745, "learning_rate": 4.081439224017541e-05, "loss": 0.836, "step": 10730 }, { "epoch": 0.7083615018055304, "grad_norm": 0.501978874206543, "learning_rate": 4.0792092743669344e-05, "loss": 0.8057, "step": 10740 }, { "epoch": 0.7090210562764853, "grad_norm": 0.517574667930603, "learning_rate": 4.076977232047599e-05, "loss": 0.8133, "step": 10750 }, { "epoch": 0.7096806107474402, "grad_norm": 0.5121254920959473, "learning_rate": 4.074743100017298e-05, "loss": 0.8108, "step": 10760 }, { "epoch": 0.7103401652183949, "grad_norm": 0.5190317630767822, "learning_rate": 4.072506881236561e-05, "loss": 0.854, "step": 10770 }, { "epoch": 0.7109997196893498, "grad_norm": 0.5075172185897827, "learning_rate": 4.070268578668687e-05, "loss": 0.8511, "step": 10780 }, { "epoch": 0.7116592741603047, "grad_norm": 0.5328771471977234, "learning_rate": 4.068028195279731e-05, "loss": 0.761, "step": 10790 }, { "epoch": 0.7123188286312596, "grad_norm": 0.5213194489479065, "learning_rate": 4.06578573403851e-05, "loss": 0.822, "step": 10800 }, { "epoch": 0.7129783831022145, "grad_norm": 0.476565957069397, "learning_rate": 4.063541197916592e-05, "loss": 0.8382, "step": 10810 }, { "epoch": 0.7136379375731693, "grad_norm": 0.5263732075691223, "learning_rate": 4.0612945898882946e-05, "loss": 0.8203, "step": 10820 }, { "epoch": 0.7142974920441242, "grad_norm": 0.46448376774787903, "learning_rate": 4.0590459129306815e-05, "loss": 0.8388, "step": 10830 }, { "epoch": 0.7149570465150791, "grad_norm": 0.5530455708503723, "learning_rate": 4.056795170023557e-05, "loss": 0.8416, "step": 10840 }, { "epoch": 0.715616600986034, "grad_norm": 0.5095324516296387, "learning_rate": 4.054542364149466e-05, "loss": 0.8055, "step": 10850 }, { "epoch": 0.7162761554569888, "grad_norm": 0.48809099197387695, "learning_rate": 4.052287498293683e-05, "loss": 0.8078, "step": 10860 }, { "epoch": 0.7169357099279436, "grad_norm": 0.44991356134414673, "learning_rate": 4.050030575444216e-05, "loss": 0.8114, "step": 10870 }, { "epoch": 0.7175952643988985, "grad_norm": 0.5083290338516235, "learning_rate": 4.047771598591795e-05, "loss": 0.8888, "step": 10880 }, { "epoch": 0.7182548188698534, "grad_norm": 0.5286580920219421, "learning_rate": 4.045510570729877e-05, "loss": 0.8571, "step": 10890 }, { "epoch": 0.7189143733408083, "grad_norm": 0.4988810420036316, "learning_rate": 4.04324749485463e-05, "loss": 0.8191, "step": 10900 }, { "epoch": 0.7195739278117631, "grad_norm": 0.49191129207611084, "learning_rate": 4.040982373964943e-05, "loss": 0.802, "step": 10910 }, { "epoch": 0.720233482282718, "grad_norm": 0.5243684649467468, "learning_rate": 4.0387152110624096e-05, "loss": 0.8477, "step": 10920 }, { "epoch": 0.7208930367536729, "grad_norm": 0.44547760486602783, "learning_rate": 4.0364460091513325e-05, "loss": 0.8592, "step": 10930 }, { "epoch": 0.7215525912246278, "grad_norm": 0.5532107353210449, "learning_rate": 4.034174771238715e-05, "loss": 0.8735, "step": 10940 }, { "epoch": 0.7222121456955827, "grad_norm": 0.5193709135055542, "learning_rate": 4.0319015003342574e-05, "loss": 0.841, "step": 10950 }, { "epoch": 0.7228717001665375, "grad_norm": 0.44667038321495056, "learning_rate": 4.029626199450357e-05, "loss": 0.8137, "step": 10960 }, { "epoch": 0.7235312546374923, "grad_norm": 0.482125461101532, "learning_rate": 4.0273488716020986e-05, "loss": 0.8391, "step": 10970 }, { "epoch": 0.7241908091084472, "grad_norm": 0.5427354574203491, "learning_rate": 4.025069519807253e-05, "loss": 0.7965, "step": 10980 }, { "epoch": 0.7248503635794021, "grad_norm": 0.5725659728050232, "learning_rate": 4.022788147086275e-05, "loss": 0.8399, "step": 10990 }, { "epoch": 0.725509918050357, "grad_norm": 0.5463594198226929, "learning_rate": 4.020504756462297e-05, "loss": 0.8382, "step": 11000 }, { "epoch": 0.7261694725213118, "grad_norm": 0.5389062166213989, "learning_rate": 4.018219350961121e-05, "loss": 0.8262, "step": 11010 }, { "epoch": 0.7268290269922667, "grad_norm": 0.5420926213264465, "learning_rate": 4.015931933611225e-05, "loss": 0.837, "step": 11020 }, { "epoch": 0.7274885814632216, "grad_norm": 0.5854398012161255, "learning_rate": 4.01364250744375e-05, "loss": 0.8117, "step": 11030 }, { "epoch": 0.7281481359341765, "grad_norm": 0.512503981590271, "learning_rate": 4.0113510754925e-05, "loss": 0.8319, "step": 11040 }, { "epoch": 0.7288076904051314, "grad_norm": 0.4752545654773712, "learning_rate": 4.0090576407939354e-05, "loss": 0.859, "step": 11050 }, { "epoch": 0.7294672448760862, "grad_norm": 0.5591562986373901, "learning_rate": 4.006762206387172e-05, "loss": 0.83, "step": 11060 }, { "epoch": 0.7301267993470411, "grad_norm": 0.5371208190917969, "learning_rate": 4.004464775313976e-05, "loss": 0.8883, "step": 11070 }, { "epoch": 0.730786353817996, "grad_norm": 0.471000075340271, "learning_rate": 4.002165350618756e-05, "loss": 0.8262, "step": 11080 }, { "epoch": 0.7314459082889508, "grad_norm": 0.5140637159347534, "learning_rate": 3.999863935348568e-05, "loss": 0.8794, "step": 11090 }, { "epoch": 0.7321054627599057, "grad_norm": 0.5318006277084351, "learning_rate": 3.9975605325530995e-05, "loss": 0.8302, "step": 11100 }, { "epoch": 0.7327650172308605, "grad_norm": 0.5049440860748291, "learning_rate": 3.995255145284678e-05, "loss": 0.8566, "step": 11110 }, { "epoch": 0.7334245717018154, "grad_norm": 0.46503761410713196, "learning_rate": 3.992947776598254e-05, "loss": 0.8444, "step": 11120 }, { "epoch": 0.7340841261727703, "grad_norm": 0.5713391900062561, "learning_rate": 3.9906384295514106e-05, "loss": 0.8181, "step": 11130 }, { "epoch": 0.7347436806437252, "grad_norm": 0.499016672372818, "learning_rate": 3.988327107204347e-05, "loss": 0.8153, "step": 11140 }, { "epoch": 0.7354032351146801, "grad_norm": 0.4445125162601471, "learning_rate": 3.9860138126198834e-05, "loss": 0.8197, "step": 11150 }, { "epoch": 0.7360627895856349, "grad_norm": 0.5101156830787659, "learning_rate": 3.9836985488634514e-05, "loss": 0.7927, "step": 11160 }, { "epoch": 0.7367223440565898, "grad_norm": 0.47717103362083435, "learning_rate": 3.981381319003093e-05, "loss": 0.8049, "step": 11170 }, { "epoch": 0.7373818985275447, "grad_norm": 0.5079829692840576, "learning_rate": 3.9790621261094554e-05, "loss": 0.8554, "step": 11180 }, { "epoch": 0.7380414529984995, "grad_norm": 0.46226266026496887, "learning_rate": 3.976740973255787e-05, "loss": 0.8502, "step": 11190 }, { "epoch": 0.7387010074694543, "grad_norm": 0.4857373833656311, "learning_rate": 3.974417863517933e-05, "loss": 0.7853, "step": 11200 }, { "epoch": 0.7393605619404092, "grad_norm": 0.48326465487480164, "learning_rate": 3.972092799974333e-05, "loss": 0.8034, "step": 11210 }, { "epoch": 0.7400201164113641, "grad_norm": 0.5540308952331543, "learning_rate": 3.969765785706016e-05, "loss": 0.8369, "step": 11220 }, { "epoch": 0.740679670882319, "grad_norm": 0.5187276601791382, "learning_rate": 3.967436823796592e-05, "loss": 0.8689, "step": 11230 }, { "epoch": 0.7413392253532739, "grad_norm": 0.5328775644302368, "learning_rate": 3.965105917332256e-05, "loss": 0.8345, "step": 11240 }, { "epoch": 0.7419987798242287, "grad_norm": 0.4606923460960388, "learning_rate": 3.962773069401778e-05, "loss": 0.799, "step": 11250 }, { "epoch": 0.7426583342951836, "grad_norm": 0.5835855603218079, "learning_rate": 3.960438283096504e-05, "loss": 0.8093, "step": 11260 }, { "epoch": 0.7433178887661385, "grad_norm": 0.5286555886268616, "learning_rate": 3.958101561510342e-05, "loss": 0.8687, "step": 11270 }, { "epoch": 0.7439774432370934, "grad_norm": 0.5591707825660706, "learning_rate": 3.9557629077397705e-05, "loss": 0.8547, "step": 11280 }, { "epoch": 0.7446369977080483, "grad_norm": 0.48347362875938416, "learning_rate": 3.953422324883826e-05, "loss": 0.8076, "step": 11290 }, { "epoch": 0.745296552179003, "grad_norm": 0.4533845782279968, "learning_rate": 3.951079816044101e-05, "loss": 0.7979, "step": 11300 }, { "epoch": 0.7459561066499579, "grad_norm": 0.49205514788627625, "learning_rate": 3.948735384324742e-05, "loss": 0.858, "step": 11310 }, { "epoch": 0.7466156611209128, "grad_norm": 0.5320116281509399, "learning_rate": 3.94638903283244e-05, "loss": 0.8193, "step": 11320 }, { "epoch": 0.7472752155918677, "grad_norm": 0.5026687979698181, "learning_rate": 3.9440407646764355e-05, "loss": 0.8121, "step": 11330 }, { "epoch": 0.7479347700628226, "grad_norm": 0.5017873644828796, "learning_rate": 3.9416905829685034e-05, "loss": 0.8568, "step": 11340 }, { "epoch": 0.7485943245337774, "grad_norm": 0.5668044090270996, "learning_rate": 3.939338490822957e-05, "loss": 0.8492, "step": 11350 }, { "epoch": 0.7492538790047323, "grad_norm": 0.47770196199417114, "learning_rate": 3.9369844913566414e-05, "loss": 0.8135, "step": 11360 }, { "epoch": 0.7499134334756872, "grad_norm": 0.5444761514663696, "learning_rate": 3.934628587688928e-05, "loss": 0.8244, "step": 11370 }, { "epoch": 0.7505729879466421, "grad_norm": 0.4261077642440796, "learning_rate": 3.9322707829417116e-05, "loss": 0.8383, "step": 11380 }, { "epoch": 0.751232542417597, "grad_norm": 0.4794468581676483, "learning_rate": 3.929911080239407e-05, "loss": 0.8073, "step": 11390 }, { "epoch": 0.7518920968885517, "grad_norm": 0.4978042542934418, "learning_rate": 3.9275494827089445e-05, "loss": 0.8459, "step": 11400 }, { "epoch": 0.7525516513595066, "grad_norm": 0.5170986652374268, "learning_rate": 3.925185993479763e-05, "loss": 0.8554, "step": 11410 }, { "epoch": 0.7532112058304615, "grad_norm": 0.49450716376304626, "learning_rate": 3.922820615683811e-05, "loss": 0.8524, "step": 11420 }, { "epoch": 0.7538707603014164, "grad_norm": 0.530701756477356, "learning_rate": 3.920453352455538e-05, "loss": 0.8002, "step": 11430 }, { "epoch": 0.7545303147723713, "grad_norm": 0.4432068169116974, "learning_rate": 3.918084206931892e-05, "loss": 0.8412, "step": 11440 }, { "epoch": 0.7551898692433261, "grad_norm": 0.48112452030181885, "learning_rate": 3.915713182252315e-05, "loss": 0.853, "step": 11450 }, { "epoch": 0.755849423714281, "grad_norm": 0.5220328569412231, "learning_rate": 3.9133402815587404e-05, "loss": 0.8331, "step": 11460 }, { "epoch": 0.7565089781852359, "grad_norm": 0.4995824992656708, "learning_rate": 3.9109655079955864e-05, "loss": 0.8682, "step": 11470 }, { "epoch": 0.7571685326561908, "grad_norm": 0.4851597845554352, "learning_rate": 3.908588864709754e-05, "loss": 0.8431, "step": 11480 }, { "epoch": 0.7578280871271457, "grad_norm": 0.49197956919670105, "learning_rate": 3.9062103548506215e-05, "loss": 0.871, "step": 11490 }, { "epoch": 0.7584876415981004, "grad_norm": 0.4794784486293793, "learning_rate": 3.9038299815700385e-05, "loss": 0.8256, "step": 11500 }, { "epoch": 0.7591471960690553, "grad_norm": 0.5070955753326416, "learning_rate": 3.901447748022328e-05, "loss": 0.8067, "step": 11510 }, { "epoch": 0.7598067505400102, "grad_norm": 0.5093052983283997, "learning_rate": 3.899063657364275e-05, "loss": 0.804, "step": 11520 }, { "epoch": 0.7604663050109651, "grad_norm": 0.5523203611373901, "learning_rate": 3.896677712755127e-05, "loss": 0.8963, "step": 11530 }, { "epoch": 0.7611258594819199, "grad_norm": 0.5485988855361938, "learning_rate": 3.894289917356586e-05, "loss": 0.8572, "step": 11540 }, { "epoch": 0.7617854139528748, "grad_norm": 0.5128387212753296, "learning_rate": 3.8919002743328105e-05, "loss": 0.8724, "step": 11550 }, { "epoch": 0.7624449684238297, "grad_norm": 0.5283038020133972, "learning_rate": 3.889508786850403e-05, "loss": 0.8275, "step": 11560 }, { "epoch": 0.7631045228947846, "grad_norm": 0.5048648118972778, "learning_rate": 3.887115458078414e-05, "loss": 0.8876, "step": 11570 }, { "epoch": 0.7637640773657395, "grad_norm": 0.4777819812297821, "learning_rate": 3.884720291188331e-05, "loss": 0.8258, "step": 11580 }, { "epoch": 0.7644236318366943, "grad_norm": 0.4839961528778076, "learning_rate": 3.8823232893540774e-05, "loss": 0.8219, "step": 11590 }, { "epoch": 0.7650831863076492, "grad_norm": 0.5506725907325745, "learning_rate": 3.879924455752012e-05, "loss": 0.8657, "step": 11600 }, { "epoch": 0.765742740778604, "grad_norm": 0.505973756313324, "learning_rate": 3.877523793560915e-05, "loss": 0.8274, "step": 11610 }, { "epoch": 0.7664022952495589, "grad_norm": 0.5126368403434753, "learning_rate": 3.875121305961995e-05, "loss": 0.8468, "step": 11620 }, { "epoch": 0.7670618497205138, "grad_norm": 0.5028843879699707, "learning_rate": 3.872716996138876e-05, "loss": 0.8406, "step": 11630 }, { "epoch": 0.7677214041914686, "grad_norm": 0.5137712955474854, "learning_rate": 3.870310867277599e-05, "loss": 0.8498, "step": 11640 }, { "epoch": 0.7683809586624235, "grad_norm": 0.5019781589508057, "learning_rate": 3.8679029225666145e-05, "loss": 0.8329, "step": 11650 }, { "epoch": 0.7690405131333784, "grad_norm": 0.554481029510498, "learning_rate": 3.865493165196778e-05, "loss": 0.8429, "step": 11660 }, { "epoch": 0.7697000676043333, "grad_norm": 0.5420853495597839, "learning_rate": 3.86308159836135e-05, "loss": 0.7977, "step": 11670 }, { "epoch": 0.7703596220752882, "grad_norm": 0.5133484601974487, "learning_rate": 3.8606682252559856e-05, "loss": 0.8264, "step": 11680 }, { "epoch": 0.771019176546243, "grad_norm": 0.5209570527076721, "learning_rate": 3.8582530490787363e-05, "loss": 0.8342, "step": 11690 }, { "epoch": 0.7716787310171979, "grad_norm": 0.5342729091644287, "learning_rate": 3.85583607303004e-05, "loss": 0.8546, "step": 11700 }, { "epoch": 0.7723382854881528, "grad_norm": 0.512787938117981, "learning_rate": 3.853417300312724e-05, "loss": 0.852, "step": 11710 }, { "epoch": 0.7729978399591076, "grad_norm": 0.46172672510147095, "learning_rate": 3.8509967341319894e-05, "loss": 0.8465, "step": 11720 }, { "epoch": 0.7736573944300625, "grad_norm": 0.4403376877307892, "learning_rate": 3.848574377695421e-05, "loss": 0.8159, "step": 11730 }, { "epoch": 0.7743169489010173, "grad_norm": 0.46622994542121887, "learning_rate": 3.8461502342129736e-05, "loss": 0.8515, "step": 11740 }, { "epoch": 0.7749765033719722, "grad_norm": 0.4755195081233978, "learning_rate": 3.8437243068969674e-05, "loss": 0.8223, "step": 11750 }, { "epoch": 0.7756360578429271, "grad_norm": 0.48838281631469727, "learning_rate": 3.8412965989620905e-05, "loss": 0.8511, "step": 11760 }, { "epoch": 0.776295612313882, "grad_norm": 0.4364047944545746, "learning_rate": 3.838867113625387e-05, "loss": 0.8278, "step": 11770 }, { "epoch": 0.7769551667848369, "grad_norm": 0.5896735787391663, "learning_rate": 3.83643585410626e-05, "loss": 0.8102, "step": 11780 }, { "epoch": 0.7776147212557917, "grad_norm": 0.5205544829368591, "learning_rate": 3.834002823626459e-05, "loss": 0.8274, "step": 11790 }, { "epoch": 0.7782742757267466, "grad_norm": 0.5001713633537292, "learning_rate": 3.8315680254100846e-05, "loss": 0.8401, "step": 11800 }, { "epoch": 0.7789338301977015, "grad_norm": 0.49767130613327026, "learning_rate": 3.8291314626835784e-05, "loss": 0.827, "step": 11810 }, { "epoch": 0.7795933846686564, "grad_norm": 0.5469626188278198, "learning_rate": 3.826693138675719e-05, "loss": 0.8394, "step": 11820 }, { "epoch": 0.7802529391396112, "grad_norm": 0.4951440989971161, "learning_rate": 3.82425305661762e-05, "loss": 0.8815, "step": 11830 }, { "epoch": 0.780912493610566, "grad_norm": 0.5210491418838501, "learning_rate": 3.8218112197427244e-05, "loss": 0.8384, "step": 11840 }, { "epoch": 0.7815720480815209, "grad_norm": 0.5787615776062012, "learning_rate": 3.819367631286802e-05, "loss": 0.862, "step": 11850 }, { "epoch": 0.7822316025524758, "grad_norm": 0.5131245255470276, "learning_rate": 3.81692229448794e-05, "loss": 0.8308, "step": 11860 }, { "epoch": 0.7828911570234307, "grad_norm": 0.5724840760231018, "learning_rate": 3.814475212586548e-05, "loss": 0.8058, "step": 11870 }, { "epoch": 0.7835507114943855, "grad_norm": 0.5495114922523499, "learning_rate": 3.812026388825342e-05, "loss": 0.8645, "step": 11880 }, { "epoch": 0.7842102659653404, "grad_norm": 0.5270974636077881, "learning_rate": 3.8095758264493506e-05, "loss": 0.8307, "step": 11890 }, { "epoch": 0.7848698204362953, "grad_norm": 0.5352598428726196, "learning_rate": 3.807123528705905e-05, "loss": 0.8296, "step": 11900 }, { "epoch": 0.7855293749072502, "grad_norm": 0.517625093460083, "learning_rate": 3.804669498844635e-05, "loss": 0.8251, "step": 11910 }, { "epoch": 0.7861889293782051, "grad_norm": 0.5091532468795776, "learning_rate": 3.8022137401174676e-05, "loss": 0.8293, "step": 11920 }, { "epoch": 0.7868484838491598, "grad_norm": 0.5616522431373596, "learning_rate": 3.799756255778617e-05, "loss": 0.8256, "step": 11930 }, { "epoch": 0.7875080383201147, "grad_norm": 0.5082550644874573, "learning_rate": 3.79729704908459e-05, "loss": 0.8563, "step": 11940 }, { "epoch": 0.7881675927910696, "grad_norm": 0.4487828016281128, "learning_rate": 3.794836123294171e-05, "loss": 0.8187, "step": 11950 }, { "epoch": 0.7888271472620245, "grad_norm": 0.47633489966392517, "learning_rate": 3.792373481668424e-05, "loss": 0.8589, "step": 11960 }, { "epoch": 0.7894867017329794, "grad_norm": 0.5292962789535522, "learning_rate": 3.789909127470687e-05, "loss": 0.8168, "step": 11970 }, { "epoch": 0.7901462562039342, "grad_norm": 0.5220751762390137, "learning_rate": 3.787443063966567e-05, "loss": 0.8347, "step": 11980 }, { "epoch": 0.7908058106748891, "grad_norm": 0.46419110894203186, "learning_rate": 3.7849752944239356e-05, "loss": 0.8396, "step": 11990 }, { "epoch": 0.791465365145844, "grad_norm": 0.5395681262016296, "learning_rate": 3.7825058221129264e-05, "loss": 0.7882, "step": 12000 }, { "epoch": 0.7921249196167989, "grad_norm": 0.5546279549598694, "learning_rate": 3.780034650305928e-05, "loss": 0.8386, "step": 12010 }, { "epoch": 0.7927844740877538, "grad_norm": 0.526649534702301, "learning_rate": 3.7775617822775826e-05, "loss": 0.8477, "step": 12020 }, { "epoch": 0.7934440285587085, "grad_norm": 0.4924740195274353, "learning_rate": 3.775087221304779e-05, "loss": 0.7887, "step": 12030 }, { "epoch": 0.7941035830296634, "grad_norm": 0.5638056993484497, "learning_rate": 3.772610970666649e-05, "loss": 0.8524, "step": 12040 }, { "epoch": 0.7947631375006183, "grad_norm": 0.5174016952514648, "learning_rate": 3.770133033644565e-05, "loss": 0.8334, "step": 12050 }, { "epoch": 0.7954226919715732, "grad_norm": 0.5178600549697876, "learning_rate": 3.7676534135221306e-05, "loss": 0.8554, "step": 12060 }, { "epoch": 0.7960822464425281, "grad_norm": 0.5493482947349548, "learning_rate": 3.765172113585186e-05, "loss": 0.8356, "step": 12070 }, { "epoch": 0.7967418009134829, "grad_norm": 0.4699559211730957, "learning_rate": 3.762689137121792e-05, "loss": 0.8766, "step": 12080 }, { "epoch": 0.7974013553844378, "grad_norm": 0.5163393616676331, "learning_rate": 3.760204487422233e-05, "loss": 0.8348, "step": 12090 }, { "epoch": 0.7980609098553927, "grad_norm": 0.5244756937026978, "learning_rate": 3.75771816777901e-05, "loss": 0.8443, "step": 12100 }, { "epoch": 0.7987204643263476, "grad_norm": 0.5232709646224976, "learning_rate": 3.755230181486837e-05, "loss": 0.8496, "step": 12110 }, { "epoch": 0.7993800187973025, "grad_norm": 0.5520874261856079, "learning_rate": 3.7527405318426366e-05, "loss": 0.8428, "step": 12120 }, { "epoch": 0.8000395732682573, "grad_norm": 0.5144450664520264, "learning_rate": 3.7502492221455375e-05, "loss": 0.8245, "step": 12130 }, { "epoch": 0.8006991277392121, "grad_norm": 0.4852900207042694, "learning_rate": 3.747756255696866e-05, "loss": 0.8377, "step": 12140 }, { "epoch": 0.801358682210167, "grad_norm": 0.5000022649765015, "learning_rate": 3.745261635800142e-05, "loss": 0.8679, "step": 12150 }, { "epoch": 0.8020182366811219, "grad_norm": 0.45508676767349243, "learning_rate": 3.742765365761082e-05, "loss": 0.8336, "step": 12160 }, { "epoch": 0.8026777911520768, "grad_norm": 0.6492980718612671, "learning_rate": 3.7402674488875835e-05, "loss": 0.8476, "step": 12170 }, { "epoch": 0.8033373456230316, "grad_norm": 0.5550660490989685, "learning_rate": 3.7377678884897305e-05, "loss": 0.8518, "step": 12180 }, { "epoch": 0.8039969000939865, "grad_norm": 0.4505656957626343, "learning_rate": 3.735266687879782e-05, "loss": 0.812, "step": 12190 }, { "epoch": 0.8046564545649414, "grad_norm": 0.47150373458862305, "learning_rate": 3.732763850372172e-05, "loss": 0.8279, "step": 12200 }, { "epoch": 0.8053160090358963, "grad_norm": 0.587897777557373, "learning_rate": 3.730259379283503e-05, "loss": 0.8301, "step": 12210 }, { "epoch": 0.8059755635068511, "grad_norm": 0.501718282699585, "learning_rate": 3.727753277932543e-05, "loss": 0.8305, "step": 12220 }, { "epoch": 0.806635117977806, "grad_norm": 0.5089925527572632, "learning_rate": 3.725245549640218e-05, "loss": 0.8058, "step": 12230 }, { "epoch": 0.8072946724487609, "grad_norm": 0.4705756902694702, "learning_rate": 3.722736197729614e-05, "loss": 0.8453, "step": 12240 }, { "epoch": 0.8079542269197157, "grad_norm": 0.5152075290679932, "learning_rate": 3.720225225525966e-05, "loss": 0.8424, "step": 12250 }, { "epoch": 0.8086137813906706, "grad_norm": 0.5138453245162964, "learning_rate": 3.7177126363566536e-05, "loss": 0.8107, "step": 12260 }, { "epoch": 0.8092733358616254, "grad_norm": 0.5220133066177368, "learning_rate": 3.715198433551206e-05, "loss": 0.8531, "step": 12270 }, { "epoch": 0.8099328903325803, "grad_norm": 0.473230242729187, "learning_rate": 3.7126826204412824e-05, "loss": 0.8469, "step": 12280 }, { "epoch": 0.8105924448035352, "grad_norm": 0.49255087971687317, "learning_rate": 3.710165200360682e-05, "loss": 0.8169, "step": 12290 }, { "epoch": 0.8112519992744901, "grad_norm": 0.5157859325408936, "learning_rate": 3.707646176645332e-05, "loss": 0.8276, "step": 12300 }, { "epoch": 0.811911553745445, "grad_norm": 0.4958195686340332, "learning_rate": 3.7051255526332835e-05, "loss": 0.821, "step": 12310 }, { "epoch": 0.8125711082163998, "grad_norm": 0.447382390499115, "learning_rate": 3.702603331664708e-05, "loss": 0.8235, "step": 12320 }, { "epoch": 0.8132306626873547, "grad_norm": 0.5194646120071411, "learning_rate": 3.7000795170818945e-05, "loss": 0.8696, "step": 12330 }, { "epoch": 0.8138902171583096, "grad_norm": 0.4164382517337799, "learning_rate": 3.697554112229244e-05, "loss": 0.8149, "step": 12340 }, { "epoch": 0.8145497716292645, "grad_norm": 0.44350144267082214, "learning_rate": 3.6950271204532624e-05, "loss": 0.8193, "step": 12350 }, { "epoch": 0.8152093261002193, "grad_norm": 0.5147350430488586, "learning_rate": 3.6924985451025626e-05, "loss": 0.8306, "step": 12360 }, { "epoch": 0.8158688805711741, "grad_norm": 0.49076810479164124, "learning_rate": 3.689968389527851e-05, "loss": 0.8554, "step": 12370 }, { "epoch": 0.816528435042129, "grad_norm": 0.4826480746269226, "learning_rate": 3.6874366570819306e-05, "loss": 0.8187, "step": 12380 }, { "epoch": 0.8171879895130839, "grad_norm": 0.5235629081726074, "learning_rate": 3.684903351119696e-05, "loss": 0.86, "step": 12390 }, { "epoch": 0.8178475439840388, "grad_norm": 0.5923688411712646, "learning_rate": 3.6823684749981234e-05, "loss": 0.8607, "step": 12400 }, { "epoch": 0.8185070984549937, "grad_norm": 0.5231000781059265, "learning_rate": 3.6798320320762715e-05, "loss": 0.826, "step": 12410 }, { "epoch": 0.8191666529259485, "grad_norm": 0.6260348558425903, "learning_rate": 3.6772940257152736e-05, "loss": 0.825, "step": 12420 }, { "epoch": 0.8198262073969034, "grad_norm": 0.49430742859840393, "learning_rate": 3.674754459278338e-05, "loss": 0.8276, "step": 12430 }, { "epoch": 0.8204857618678583, "grad_norm": 0.5140009522438049, "learning_rate": 3.672213336130737e-05, "loss": 0.8656, "step": 12440 }, { "epoch": 0.8211453163388132, "grad_norm": 0.5198078751564026, "learning_rate": 3.669670659639807e-05, "loss": 0.8917, "step": 12450 }, { "epoch": 0.821804870809768, "grad_norm": 0.5191746354103088, "learning_rate": 3.667126433174943e-05, "loss": 0.841, "step": 12460 }, { "epoch": 0.8224644252807228, "grad_norm": 0.5955049395561218, "learning_rate": 3.6645806601075935e-05, "loss": 0.846, "step": 12470 }, { "epoch": 0.8231239797516777, "grad_norm": 0.4628657102584839, "learning_rate": 3.662033343811258e-05, "loss": 0.8451, "step": 12480 }, { "epoch": 0.8237835342226326, "grad_norm": 0.4703860580921173, "learning_rate": 3.6594844876614785e-05, "loss": 0.8507, "step": 12490 }, { "epoch": 0.8244430886935875, "grad_norm": 0.5772825479507446, "learning_rate": 3.6569340950358395e-05, "loss": 0.8423, "step": 12500 }, { "epoch": 0.8251026431645424, "grad_norm": 0.5526529550552368, "learning_rate": 3.6543821693139595e-05, "loss": 0.8013, "step": 12510 }, { "epoch": 0.8257621976354972, "grad_norm": 0.48744431138038635, "learning_rate": 3.651828713877491e-05, "loss": 0.8268, "step": 12520 }, { "epoch": 0.8264217521064521, "grad_norm": 0.5575173497200012, "learning_rate": 3.6492737321101115e-05, "loss": 0.8754, "step": 12530 }, { "epoch": 0.827081306577407, "grad_norm": 0.5068231821060181, "learning_rate": 3.646717227397524e-05, "loss": 0.8603, "step": 12540 }, { "epoch": 0.8277408610483619, "grad_norm": 0.5143702626228333, "learning_rate": 3.6441592031274454e-05, "loss": 0.7895, "step": 12550 }, { "epoch": 0.8284004155193166, "grad_norm": 0.45512455701828003, "learning_rate": 3.64159966268961e-05, "loss": 0.8014, "step": 12560 }, { "epoch": 0.8290599699902715, "grad_norm": 0.5064200162887573, "learning_rate": 3.639038609475758e-05, "loss": 0.8284, "step": 12570 }, { "epoch": 0.8297195244612264, "grad_norm": 0.5486271977424622, "learning_rate": 3.636476046879637e-05, "loss": 0.8553, "step": 12580 }, { "epoch": 0.8303790789321813, "grad_norm": 0.6321308016777039, "learning_rate": 3.633911978296994e-05, "loss": 0.8198, "step": 12590 }, { "epoch": 0.8310386334031362, "grad_norm": 0.47299566864967346, "learning_rate": 3.6313464071255703e-05, "loss": 0.8246, "step": 12600 }, { "epoch": 0.831698187874091, "grad_norm": 0.5452206134796143, "learning_rate": 3.6287793367651e-05, "loss": 0.8276, "step": 12610 }, { "epoch": 0.8323577423450459, "grad_norm": 0.5663066506385803, "learning_rate": 3.6262107706173034e-05, "loss": 0.8369, "step": 12620 }, { "epoch": 0.8330172968160008, "grad_norm": 0.5083215832710266, "learning_rate": 3.623640712085882e-05, "loss": 0.8315, "step": 12630 }, { "epoch": 0.8336768512869557, "grad_norm": 0.496698260307312, "learning_rate": 3.621069164576516e-05, "loss": 0.8508, "step": 12640 }, { "epoch": 0.8343364057579106, "grad_norm": 0.4482015073299408, "learning_rate": 3.618496131496858e-05, "loss": 0.8738, "step": 12650 }, { "epoch": 0.8349959602288654, "grad_norm": 0.5575942397117615, "learning_rate": 3.6159216162565296e-05, "loss": 0.8085, "step": 12660 }, { "epoch": 0.8356555146998202, "grad_norm": 0.5119989514350891, "learning_rate": 3.613345622267116e-05, "loss": 0.8429, "step": 12670 }, { "epoch": 0.8363150691707751, "grad_norm": 0.49572911858558655, "learning_rate": 3.6107681529421634e-05, "loss": 0.8728, "step": 12680 }, { "epoch": 0.83697462364173, "grad_norm": 0.5054008364677429, "learning_rate": 3.6081892116971715e-05, "loss": 0.836, "step": 12690 }, { "epoch": 0.8376341781126849, "grad_norm": 0.5312760472297668, "learning_rate": 3.60560880194959e-05, "loss": 0.8696, "step": 12700 }, { "epoch": 0.8382937325836397, "grad_norm": 0.49542853236198425, "learning_rate": 3.6030269271188164e-05, "loss": 0.8946, "step": 12710 }, { "epoch": 0.8389532870545946, "grad_norm": 0.5444157123565674, "learning_rate": 3.600443590626189e-05, "loss": 0.8179, "step": 12720 }, { "epoch": 0.8396128415255495, "grad_norm": 0.6010988354682922, "learning_rate": 3.5978587958949804e-05, "loss": 0.8384, "step": 12730 }, { "epoch": 0.8402723959965044, "grad_norm": 0.558444082736969, "learning_rate": 3.5952725463504005e-05, "loss": 0.8369, "step": 12740 }, { "epoch": 0.8409319504674593, "grad_norm": 0.5344332456588745, "learning_rate": 3.592684845419584e-05, "loss": 0.8345, "step": 12750 }, { "epoch": 0.8415915049384141, "grad_norm": 0.5073506236076355, "learning_rate": 3.590095696531588e-05, "loss": 0.8583, "step": 12760 }, { "epoch": 0.842251059409369, "grad_norm": 0.5145412683486938, "learning_rate": 3.5875051031173907e-05, "loss": 0.7981, "step": 12770 }, { "epoch": 0.8429106138803238, "grad_norm": 0.48051124811172485, "learning_rate": 3.584913068609884e-05, "loss": 0.8321, "step": 12780 }, { "epoch": 0.8435701683512787, "grad_norm": 0.573727011680603, "learning_rate": 3.5823195964438665e-05, "loss": 0.8189, "step": 12790 }, { "epoch": 0.8442297228222336, "grad_norm": 0.5168202519416809, "learning_rate": 3.5797246900560465e-05, "loss": 0.8698, "step": 12800 }, { "epoch": 0.8448892772931884, "grad_norm": 0.518841028213501, "learning_rate": 3.57712835288503e-05, "loss": 0.8891, "step": 12810 }, { "epoch": 0.8455488317641433, "grad_norm": 0.48710545897483826, "learning_rate": 3.574530588371319e-05, "loss": 0.8082, "step": 12820 }, { "epoch": 0.8462083862350982, "grad_norm": 0.5257467031478882, "learning_rate": 3.571931399957309e-05, "loss": 0.8382, "step": 12830 }, { "epoch": 0.8468679407060531, "grad_norm": 0.5244994759559631, "learning_rate": 3.5693307910872796e-05, "loss": 0.8125, "step": 12840 }, { "epoch": 0.847527495177008, "grad_norm": 0.5413283705711365, "learning_rate": 3.566728765207394e-05, "loss": 0.8259, "step": 12850 }, { "epoch": 0.8481870496479628, "grad_norm": 0.4778168499469757, "learning_rate": 3.564125325765694e-05, "loss": 0.8147, "step": 12860 }, { "epoch": 0.8488466041189177, "grad_norm": 0.5366325974464417, "learning_rate": 3.5615204762120925e-05, "loss": 0.8248, "step": 12870 }, { "epoch": 0.8495061585898726, "grad_norm": 0.460050106048584, "learning_rate": 3.558914219998373e-05, "loss": 0.857, "step": 12880 }, { "epoch": 0.8501657130608274, "grad_norm": 0.5721178650856018, "learning_rate": 3.556306560578181e-05, "loss": 0.8649, "step": 12890 }, { "epoch": 0.8508252675317822, "grad_norm": 0.5086795091629028, "learning_rate": 3.553697501407025e-05, "loss": 0.8229, "step": 12900 }, { "epoch": 0.8514848220027371, "grad_norm": 0.5210456252098083, "learning_rate": 3.551087045942263e-05, "loss": 0.8378, "step": 12910 }, { "epoch": 0.852144376473692, "grad_norm": 0.4959641993045807, "learning_rate": 3.548475197643109e-05, "loss": 0.8136, "step": 12920 }, { "epoch": 0.8528039309446469, "grad_norm": 0.42477983236312866, "learning_rate": 3.545861959970618e-05, "loss": 0.85, "step": 12930 }, { "epoch": 0.8534634854156018, "grad_norm": 0.4822991192340851, "learning_rate": 3.54324733638769e-05, "loss": 0.8232, "step": 12940 }, { "epoch": 0.8541230398865566, "grad_norm": 0.607531726360321, "learning_rate": 3.5406313303590577e-05, "loss": 0.8402, "step": 12950 }, { "epoch": 0.8547825943575115, "grad_norm": 0.4119485020637512, "learning_rate": 3.538013945351288e-05, "loss": 0.8571, "step": 12960 }, { "epoch": 0.8554421488284664, "grad_norm": 0.5254050493240356, "learning_rate": 3.5353951848327756e-05, "loss": 0.8428, "step": 12970 }, { "epoch": 0.8561017032994213, "grad_norm": 0.4991281032562256, "learning_rate": 3.532775052273737e-05, "loss": 0.8443, "step": 12980 }, { "epoch": 0.8567612577703762, "grad_norm": 0.5751249194145203, "learning_rate": 3.530153551146206e-05, "loss": 0.8199, "step": 12990 }, { "epoch": 0.8574208122413309, "grad_norm": 0.5402325987815857, "learning_rate": 3.527530684924031e-05, "loss": 0.8254, "step": 13000 }, { "epoch": 0.8580803667122858, "grad_norm": 0.5223280191421509, "learning_rate": 3.5249064570828706e-05, "loss": 0.8566, "step": 13010 }, { "epoch": 0.8587399211832407, "grad_norm": 0.4691770672798157, "learning_rate": 3.5222808711001854e-05, "loss": 0.8522, "step": 13020 }, { "epoch": 0.8593994756541956, "grad_norm": 0.5179622173309326, "learning_rate": 3.5196539304552364e-05, "loss": 0.8167, "step": 13030 }, { "epoch": 0.8600590301251505, "grad_norm": 0.5165730118751526, "learning_rate": 3.51702563862908e-05, "loss": 0.8628, "step": 13040 }, { "epoch": 0.8607185845961053, "grad_norm": 0.630574107170105, "learning_rate": 3.514395999104564e-05, "loss": 0.8352, "step": 13050 }, { "epoch": 0.8613781390670602, "grad_norm": 0.5269815921783447, "learning_rate": 3.51176501536632e-05, "loss": 0.8429, "step": 13060 }, { "epoch": 0.8620376935380151, "grad_norm": 0.5187968015670776, "learning_rate": 3.509132690900763e-05, "loss": 0.8003, "step": 13070 }, { "epoch": 0.86269724800897, "grad_norm": 0.5297034978866577, "learning_rate": 3.5064990291960835e-05, "loss": 0.7996, "step": 13080 }, { "epoch": 0.8633568024799249, "grad_norm": 0.489135205745697, "learning_rate": 3.503864033742244e-05, "loss": 0.8399, "step": 13090 }, { "epoch": 0.8640163569508796, "grad_norm": 0.6154001355171204, "learning_rate": 3.501227708030975e-05, "loss": 0.8121, "step": 13100 }, { "epoch": 0.8646759114218345, "grad_norm": 0.4901886284351349, "learning_rate": 3.498590055555768e-05, "loss": 0.8694, "step": 13110 }, { "epoch": 0.8653354658927894, "grad_norm": 0.48025843501091003, "learning_rate": 3.4959510798118757e-05, "loss": 0.852, "step": 13120 }, { "epoch": 0.8659950203637443, "grad_norm": 0.4628942012786865, "learning_rate": 3.493310784296301e-05, "loss": 0.8329, "step": 13130 }, { "epoch": 0.8666545748346992, "grad_norm": 0.6147623658180237, "learning_rate": 3.4906691725078e-05, "loss": 0.7919, "step": 13140 }, { "epoch": 0.867314129305654, "grad_norm": 0.5047041177749634, "learning_rate": 3.488026247946868e-05, "loss": 0.8365, "step": 13150 }, { "epoch": 0.8679736837766089, "grad_norm": 0.6306891441345215, "learning_rate": 3.4853820141157434e-05, "loss": 0.8611, "step": 13160 }, { "epoch": 0.8686332382475638, "grad_norm": 0.5087846517562866, "learning_rate": 3.482736474518397e-05, "loss": 0.8394, "step": 13170 }, { "epoch": 0.8692927927185187, "grad_norm": 0.4533502459526062, "learning_rate": 3.480089632660533e-05, "loss": 0.8554, "step": 13180 }, { "epoch": 0.8699523471894736, "grad_norm": 0.54240483045578, "learning_rate": 3.477441492049579e-05, "loss": 0.817, "step": 13190 }, { "epoch": 0.8706119016604283, "grad_norm": 0.5537872910499573, "learning_rate": 3.474792056194684e-05, "loss": 0.8232, "step": 13200 }, { "epoch": 0.8712714561313832, "grad_norm": 0.5160180330276489, "learning_rate": 3.472141328606714e-05, "loss": 0.8162, "step": 13210 }, { "epoch": 0.8719310106023381, "grad_norm": 0.506102442741394, "learning_rate": 3.469489312798246e-05, "loss": 0.8329, "step": 13220 }, { "epoch": 0.872590565073293, "grad_norm": 0.507501482963562, "learning_rate": 3.466836012283566e-05, "loss": 0.8237, "step": 13230 }, { "epoch": 0.8732501195442479, "grad_norm": 0.5725156664848328, "learning_rate": 3.464181430578658e-05, "loss": 0.847, "step": 13240 }, { "epoch": 0.8739096740152027, "grad_norm": 0.5154231190681458, "learning_rate": 3.4615255712012084e-05, "loss": 0.8328, "step": 13250 }, { "epoch": 0.8745692284861576, "grad_norm": 0.4981846511363983, "learning_rate": 3.458868437670594e-05, "loss": 0.814, "step": 13260 }, { "epoch": 0.8752287829571125, "grad_norm": 0.47902771830558777, "learning_rate": 3.4562100335078826e-05, "loss": 0.8807, "step": 13270 }, { "epoch": 0.8758883374280674, "grad_norm": 0.4744430482387543, "learning_rate": 3.453550362235822e-05, "loss": 0.8027, "step": 13280 }, { "epoch": 0.8765478918990222, "grad_norm": 0.5479256510734558, "learning_rate": 3.4508894273788426e-05, "loss": 0.8511, "step": 13290 }, { "epoch": 0.877207446369977, "grad_norm": 0.6024276614189148, "learning_rate": 3.448227232463047e-05, "loss": 0.8601, "step": 13300 }, { "epoch": 0.8778670008409319, "grad_norm": 0.4918282628059387, "learning_rate": 3.445563781016209e-05, "loss": 0.7992, "step": 13310 }, { "epoch": 0.8785265553118868, "grad_norm": 0.4895969033241272, "learning_rate": 3.442899076567766e-05, "loss": 0.813, "step": 13320 }, { "epoch": 0.8791861097828417, "grad_norm": 0.47569799423217773, "learning_rate": 3.440233122648817e-05, "loss": 0.7922, "step": 13330 }, { "epoch": 0.8798456642537965, "grad_norm": 0.5393938422203064, "learning_rate": 3.437565922792116e-05, "loss": 0.8801, "step": 13340 }, { "epoch": 0.8805052187247514, "grad_norm": 0.5467188954353333, "learning_rate": 3.4348974805320696e-05, "loss": 0.871, "step": 13350 }, { "epoch": 0.8811647731957063, "grad_norm": 0.5176929831504822, "learning_rate": 3.432227799404728e-05, "loss": 0.8133, "step": 13360 }, { "epoch": 0.8818243276666612, "grad_norm": 0.5406689047813416, "learning_rate": 3.4295568829477854e-05, "loss": 0.7797, "step": 13370 }, { "epoch": 0.8824838821376161, "grad_norm": 0.481183797121048, "learning_rate": 3.426884734700572e-05, "loss": 0.8246, "step": 13380 }, { "epoch": 0.8831434366085709, "grad_norm": 0.5114526748657227, "learning_rate": 3.42421135820405e-05, "loss": 0.7745, "step": 13390 }, { "epoch": 0.8838029910795258, "grad_norm": 0.5307304263114929, "learning_rate": 3.42153675700081e-05, "loss": 0.8493, "step": 13400 }, { "epoch": 0.8844625455504806, "grad_norm": 0.564628005027771, "learning_rate": 3.418860934635065e-05, "loss": 0.8158, "step": 13410 }, { "epoch": 0.8851221000214355, "grad_norm": 0.4864290952682495, "learning_rate": 3.4161838946526473e-05, "loss": 0.8065, "step": 13420 }, { "epoch": 0.8857816544923904, "grad_norm": 0.5225094556808472, "learning_rate": 3.4135056406010014e-05, "loss": 0.8429, "step": 13430 }, { "epoch": 0.8864412089633452, "grad_norm": 0.5745679140090942, "learning_rate": 3.4108261760291806e-05, "loss": 0.8411, "step": 13440 }, { "epoch": 0.8871007634343001, "grad_norm": 0.5746591687202454, "learning_rate": 3.408145504487843e-05, "loss": 0.7902, "step": 13450 }, { "epoch": 0.887760317905255, "grad_norm": 0.497823566198349, "learning_rate": 3.405463629529244e-05, "loss": 0.7747, "step": 13460 }, { "epoch": 0.8884198723762099, "grad_norm": 0.4694047272205353, "learning_rate": 3.402780554707238e-05, "loss": 0.8492, "step": 13470 }, { "epoch": 0.8890794268471648, "grad_norm": 0.585241436958313, "learning_rate": 3.400096283577266e-05, "loss": 0.8628, "step": 13480 }, { "epoch": 0.8897389813181196, "grad_norm": 0.5171390771865845, "learning_rate": 3.397410819696355e-05, "loss": 0.8546, "step": 13490 }, { "epoch": 0.8903985357890745, "grad_norm": 0.44888442754745483, "learning_rate": 3.394724166623113e-05, "loss": 0.8302, "step": 13500 }, { "epoch": 0.8910580902600294, "grad_norm": 0.47432762384414673, "learning_rate": 3.392036327917723e-05, "loss": 0.7981, "step": 13510 }, { "epoch": 0.8917176447309842, "grad_norm": 0.5711329579353333, "learning_rate": 3.38934730714194e-05, "loss": 0.7863, "step": 13520 }, { "epoch": 0.8923771992019391, "grad_norm": 0.5666019320487976, "learning_rate": 3.3866571078590854e-05, "loss": 0.831, "step": 13530 }, { "epoch": 0.8930367536728939, "grad_norm": 0.4173568785190582, "learning_rate": 3.3839657336340415e-05, "loss": 0.8111, "step": 13540 }, { "epoch": 0.8936963081438488, "grad_norm": 0.48306772112846375, "learning_rate": 3.3812731880332476e-05, "loss": 0.8548, "step": 13550 }, { "epoch": 0.8943558626148037, "grad_norm": 0.5220154523849487, "learning_rate": 3.378579474624697e-05, "loss": 0.8328, "step": 13560 }, { "epoch": 0.8950154170857586, "grad_norm": 0.5229178667068481, "learning_rate": 3.375884596977927e-05, "loss": 0.8584, "step": 13570 }, { "epoch": 0.8956749715567135, "grad_norm": 0.5190449953079224, "learning_rate": 3.373188558664022e-05, "loss": 0.8653, "step": 13580 }, { "epoch": 0.8963345260276683, "grad_norm": 0.4401300549507141, "learning_rate": 3.370491363255602e-05, "loss": 0.8604, "step": 13590 }, { "epoch": 0.8969940804986232, "grad_norm": 0.4707847833633423, "learning_rate": 3.367793014326819e-05, "loss": 0.8574, "step": 13600 }, { "epoch": 0.8976536349695781, "grad_norm": 0.4264417588710785, "learning_rate": 3.365093515453357e-05, "loss": 0.8218, "step": 13610 }, { "epoch": 0.898313189440533, "grad_norm": 0.5680892467498779, "learning_rate": 3.362392870212421e-05, "loss": 0.7665, "step": 13620 }, { "epoch": 0.8989727439114877, "grad_norm": 0.591608464717865, "learning_rate": 3.359691082182735e-05, "loss": 0.8213, "step": 13630 }, { "epoch": 0.8996322983824426, "grad_norm": 0.538254976272583, "learning_rate": 3.356988154944541e-05, "loss": 0.8613, "step": 13640 }, { "epoch": 0.9002918528533975, "grad_norm": 0.5444620847702026, "learning_rate": 3.354284092079587e-05, "loss": 0.8091, "step": 13650 }, { "epoch": 0.9009514073243524, "grad_norm": 0.5183736681938171, "learning_rate": 3.351578897171125e-05, "loss": 0.8324, "step": 13660 }, { "epoch": 0.9016109617953073, "grad_norm": 0.544813871383667, "learning_rate": 3.348872573803913e-05, "loss": 0.8321, "step": 13670 }, { "epoch": 0.9022705162662621, "grad_norm": 0.5314427614212036, "learning_rate": 3.3461651255641954e-05, "loss": 0.8347, "step": 13680 }, { "epoch": 0.902930070737217, "grad_norm": 0.6030846834182739, "learning_rate": 3.3434565560397165e-05, "loss": 0.8629, "step": 13690 }, { "epoch": 0.9035896252081719, "grad_norm": 0.47967347502708435, "learning_rate": 3.3407468688197e-05, "loss": 0.7961, "step": 13700 }, { "epoch": 0.9042491796791268, "grad_norm": 0.4342898726463318, "learning_rate": 3.338036067494853e-05, "loss": 0.828, "step": 13710 }, { "epoch": 0.9049087341500817, "grad_norm": 0.5623383522033691, "learning_rate": 3.3353241556573596e-05, "loss": 0.8206, "step": 13720 }, { "epoch": 0.9055682886210364, "grad_norm": 0.5118647813796997, "learning_rate": 3.332611136900874e-05, "loss": 0.8407, "step": 13730 }, { "epoch": 0.9062278430919913, "grad_norm": 0.4637552797794342, "learning_rate": 3.329897014820518e-05, "loss": 0.8396, "step": 13740 }, { "epoch": 0.9068873975629462, "grad_norm": 0.5614678859710693, "learning_rate": 3.327181793012875e-05, "loss": 0.851, "step": 13750 }, { "epoch": 0.9075469520339011, "grad_norm": 0.4977450370788574, "learning_rate": 3.324465475075987e-05, "loss": 0.8334, "step": 13760 }, { "epoch": 0.908206506504856, "grad_norm": 0.5911426544189453, "learning_rate": 3.3217480646093466e-05, "loss": 0.7991, "step": 13770 }, { "epoch": 0.9088660609758108, "grad_norm": 0.5079803466796875, "learning_rate": 3.3190295652138946e-05, "loss": 0.8507, "step": 13780 }, { "epoch": 0.9095256154467657, "grad_norm": 0.5313887596130371, "learning_rate": 3.3163099804920175e-05, "loss": 0.8012, "step": 13790 }, { "epoch": 0.9101851699177206, "grad_norm": 0.5302901864051819, "learning_rate": 3.313589314047536e-05, "loss": 0.8105, "step": 13800 }, { "epoch": 0.9108447243886755, "grad_norm": 0.47778141498565674, "learning_rate": 3.310867569485706e-05, "loss": 0.8443, "step": 13810 }, { "epoch": 0.9115042788596304, "grad_norm": 0.46718910336494446, "learning_rate": 3.3081447504132136e-05, "loss": 0.8608, "step": 13820 }, { "epoch": 0.9121638333305851, "grad_norm": 0.5107888579368591, "learning_rate": 3.305420860438165e-05, "loss": 0.8679, "step": 13830 }, { "epoch": 0.91282338780154, "grad_norm": 0.5221997499465942, "learning_rate": 3.30269590317009e-05, "loss": 0.8332, "step": 13840 }, { "epoch": 0.9134829422724949, "grad_norm": 0.49340206384658813, "learning_rate": 3.299969882219928e-05, "loss": 0.8448, "step": 13850 }, { "epoch": 0.9141424967434498, "grad_norm": 0.49549180269241333, "learning_rate": 3.297242801200033e-05, "loss": 0.793, "step": 13860 }, { "epoch": 0.9148020512144047, "grad_norm": 0.5069233775138855, "learning_rate": 3.29451466372416e-05, "loss": 0.8548, "step": 13870 }, { "epoch": 0.9154616056853595, "grad_norm": 0.5417354106903076, "learning_rate": 3.291785473407464e-05, "loss": 0.814, "step": 13880 }, { "epoch": 0.9161211601563144, "grad_norm": 0.569395124912262, "learning_rate": 3.289055233866497e-05, "loss": 0.8133, "step": 13890 }, { "epoch": 0.9167807146272693, "grad_norm": 0.5674502849578857, "learning_rate": 3.2863239487192014e-05, "loss": 0.8036, "step": 13900 }, { "epoch": 0.9174402690982242, "grad_norm": 0.4945697784423828, "learning_rate": 3.283591621584903e-05, "loss": 0.8145, "step": 13910 }, { "epoch": 0.9180998235691791, "grad_norm": 0.5142458081245422, "learning_rate": 3.280858256084311e-05, "loss": 0.8277, "step": 13920 }, { "epoch": 0.9187593780401339, "grad_norm": 0.49605944752693176, "learning_rate": 3.278123855839508e-05, "loss": 0.855, "step": 13930 }, { "epoch": 0.9194189325110887, "grad_norm": 0.5406461954116821, "learning_rate": 3.275388424473951e-05, "loss": 0.818, "step": 13940 }, { "epoch": 0.9200784869820436, "grad_norm": 0.5674294829368591, "learning_rate": 3.27265196561246e-05, "loss": 0.7944, "step": 13950 }, { "epoch": 0.9207380414529985, "grad_norm": 0.5565577745437622, "learning_rate": 3.269914482881218e-05, "loss": 0.8585, "step": 13960 }, { "epoch": 0.9213975959239533, "grad_norm": 0.5268924236297607, "learning_rate": 3.267175979907765e-05, "loss": 0.8569, "step": 13970 }, { "epoch": 0.9220571503949082, "grad_norm": 0.494615375995636, "learning_rate": 3.264436460320993e-05, "loss": 0.8264, "step": 13980 }, { "epoch": 0.9227167048658631, "grad_norm": 0.45897209644317627, "learning_rate": 3.261695927751141e-05, "loss": 0.8705, "step": 13990 }, { "epoch": 0.923376259336818, "grad_norm": 0.5044166445732117, "learning_rate": 3.25895438582979e-05, "loss": 0.8121, "step": 14000 }, { "epoch": 0.9240358138077729, "grad_norm": 0.6285238265991211, "learning_rate": 3.256211838189859e-05, "loss": 0.8331, "step": 14010 }, { "epoch": 0.9246953682787277, "grad_norm": 0.519390881061554, "learning_rate": 3.2534682884655985e-05, "loss": 0.8593, "step": 14020 }, { "epoch": 0.9253549227496826, "grad_norm": 0.5182341933250427, "learning_rate": 3.2507237402925896e-05, "loss": 0.8422, "step": 14030 }, { "epoch": 0.9260144772206375, "grad_norm": 0.5825724005699158, "learning_rate": 3.247978197307734e-05, "loss": 0.8037, "step": 14040 }, { "epoch": 0.9266740316915923, "grad_norm": 0.5468394756317139, "learning_rate": 3.2452316631492523e-05, "loss": 0.8499, "step": 14050 }, { "epoch": 0.9273335861625472, "grad_norm": 0.4748375117778778, "learning_rate": 3.2424841414566785e-05, "loss": 0.8832, "step": 14060 }, { "epoch": 0.927993140633502, "grad_norm": 0.5006450414657593, "learning_rate": 3.2397356358708555e-05, "loss": 0.8283, "step": 14070 }, { "epoch": 0.9286526951044569, "grad_norm": 0.5645299553871155, "learning_rate": 3.23698615003393e-05, "loss": 0.8264, "step": 14080 }, { "epoch": 0.9293122495754118, "grad_norm": 0.5048816204071045, "learning_rate": 3.2342356875893484e-05, "loss": 0.8275, "step": 14090 }, { "epoch": 0.9299718040463667, "grad_norm": 0.4915793538093567, "learning_rate": 3.2314842521818495e-05, "loss": 0.8119, "step": 14100 }, { "epoch": 0.9306313585173216, "grad_norm": 0.5013352036476135, "learning_rate": 3.2287318474574625e-05, "loss": 0.8539, "step": 14110 }, { "epoch": 0.9312909129882764, "grad_norm": 0.5083145499229431, "learning_rate": 3.225978477063501e-05, "loss": 0.846, "step": 14120 }, { "epoch": 0.9319504674592313, "grad_norm": 0.5598704814910889, "learning_rate": 3.223224144648558e-05, "loss": 0.8469, "step": 14130 }, { "epoch": 0.9326100219301862, "grad_norm": 0.5258391499519348, "learning_rate": 3.2204688538625006e-05, "loss": 0.8316, "step": 14140 }, { "epoch": 0.933269576401141, "grad_norm": 0.6012523174285889, "learning_rate": 3.217712608356468e-05, "loss": 0.8153, "step": 14150 }, { "epoch": 0.933929130872096, "grad_norm": 0.5268067717552185, "learning_rate": 3.2149554117828634e-05, "loss": 0.8159, "step": 14160 }, { "epoch": 0.9345886853430507, "grad_norm": 0.4189505875110626, "learning_rate": 3.212197267795349e-05, "loss": 0.842, "step": 14170 }, { "epoch": 0.9352482398140056, "grad_norm": 0.502161979675293, "learning_rate": 3.2094381800488436e-05, "loss": 0.8211, "step": 14180 }, { "epoch": 0.9359077942849605, "grad_norm": 0.4730813205242157, "learning_rate": 3.2066781521995184e-05, "loss": 0.8689, "step": 14190 }, { "epoch": 0.9365673487559154, "grad_norm": 0.49195387959480286, "learning_rate": 3.203917187904787e-05, "loss": 0.8266, "step": 14200 }, { "epoch": 0.9372269032268703, "grad_norm": 0.5385797619819641, "learning_rate": 3.201155290823306e-05, "loss": 0.8066, "step": 14210 }, { "epoch": 0.9378864576978251, "grad_norm": 0.537335991859436, "learning_rate": 3.198392464614967e-05, "loss": 0.8148, "step": 14220 }, { "epoch": 0.93854601216878, "grad_norm": 0.524181604385376, "learning_rate": 3.1956287129408944e-05, "loss": 0.8533, "step": 14230 }, { "epoch": 0.9392055666397349, "grad_norm": 0.4703523814678192, "learning_rate": 3.1928640394634365e-05, "loss": 0.8427, "step": 14240 }, { "epoch": 0.9398651211106898, "grad_norm": 0.459078848361969, "learning_rate": 3.1900984478461666e-05, "loss": 0.8372, "step": 14250 }, { "epoch": 0.9405246755816447, "grad_norm": 0.46345698833465576, "learning_rate": 3.18733194175387e-05, "loss": 0.8589, "step": 14260 }, { "epoch": 0.9411842300525994, "grad_norm": 0.48149171471595764, "learning_rate": 3.184564524852549e-05, "loss": 0.8208, "step": 14270 }, { "epoch": 0.9418437845235543, "grad_norm": 0.5642602443695068, "learning_rate": 3.181796200809409e-05, "loss": 0.8048, "step": 14280 }, { "epoch": 0.9425033389945092, "grad_norm": 0.5288432240486145, "learning_rate": 3.179026973292859e-05, "loss": 0.851, "step": 14290 }, { "epoch": 0.9431628934654641, "grad_norm": 0.5397107005119324, "learning_rate": 3.176256845972505e-05, "loss": 0.8217, "step": 14300 }, { "epoch": 0.9438224479364189, "grad_norm": 0.5059977769851685, "learning_rate": 3.173485822519145e-05, "loss": 0.8279, "step": 14310 }, { "epoch": 0.9444820024073738, "grad_norm": 0.5274835824966431, "learning_rate": 3.170713906604765e-05, "loss": 0.8246, "step": 14320 }, { "epoch": 0.9451415568783287, "grad_norm": 0.5186430811882019, "learning_rate": 3.1679411019025346e-05, "loss": 0.8268, "step": 14330 }, { "epoch": 0.9458011113492836, "grad_norm": 0.45758917927742004, "learning_rate": 3.165167412086799e-05, "loss": 0.8733, "step": 14340 }, { "epoch": 0.9464606658202385, "grad_norm": 0.5606436729431152, "learning_rate": 3.162392840833077e-05, "loss": 0.7949, "step": 14350 }, { "epoch": 0.9471202202911932, "grad_norm": 0.4964209198951721, "learning_rate": 3.159617391818056e-05, "loss": 0.8411, "step": 14360 }, { "epoch": 0.9477797747621481, "grad_norm": 0.5006893277168274, "learning_rate": 3.156841068719587e-05, "loss": 0.8317, "step": 14370 }, { "epoch": 0.948439329233103, "grad_norm": 0.47448766231536865, "learning_rate": 3.154063875216678e-05, "loss": 0.8454, "step": 14380 }, { "epoch": 0.9490988837040579, "grad_norm": 0.5972414612770081, "learning_rate": 3.151285814989492e-05, "loss": 0.8395, "step": 14390 }, { "epoch": 0.9497584381750128, "grad_norm": 0.4944043755531311, "learning_rate": 3.1485068917193385e-05, "loss": 0.8119, "step": 14400 }, { "epoch": 0.9504179926459676, "grad_norm": 0.5087505578994751, "learning_rate": 3.145727109088672e-05, "loss": 0.8401, "step": 14410 }, { "epoch": 0.9510775471169225, "grad_norm": 0.5083771347999573, "learning_rate": 3.142946470781085e-05, "loss": 0.8638, "step": 14420 }, { "epoch": 0.9517371015878774, "grad_norm": 0.5725495219230652, "learning_rate": 3.140164980481305e-05, "loss": 0.8505, "step": 14430 }, { "epoch": 0.9523966560588323, "grad_norm": 0.5729572176933289, "learning_rate": 3.137382641875187e-05, "loss": 0.8358, "step": 14440 }, { "epoch": 0.9530562105297872, "grad_norm": 0.6390442252159119, "learning_rate": 3.134599458649713e-05, "loss": 0.8283, "step": 14450 }, { "epoch": 0.953715765000742, "grad_norm": 0.5202282071113586, "learning_rate": 3.131815434492981e-05, "loss": 0.8617, "step": 14460 }, { "epoch": 0.9543753194716968, "grad_norm": 0.5068888068199158, "learning_rate": 3.1290305730942035e-05, "loss": 0.8538, "step": 14470 }, { "epoch": 0.9550348739426517, "grad_norm": 0.46176913380622864, "learning_rate": 3.126244878143705e-05, "loss": 0.8406, "step": 14480 }, { "epoch": 0.9556944284136066, "grad_norm": 0.5682269334793091, "learning_rate": 3.123458353332912e-05, "loss": 0.8245, "step": 14490 }, { "epoch": 0.9563539828845615, "grad_norm": 0.47708868980407715, "learning_rate": 3.120671002354353e-05, "loss": 0.8426, "step": 14500 }, { "epoch": 0.9570135373555163, "grad_norm": 0.5253884196281433, "learning_rate": 3.1178828289016495e-05, "loss": 0.8186, "step": 14510 }, { "epoch": 0.9576730918264712, "grad_norm": 0.5317881107330322, "learning_rate": 3.115093836669513e-05, "loss": 0.8278, "step": 14520 }, { "epoch": 0.9583326462974261, "grad_norm": 0.5529583692550659, "learning_rate": 3.1123040293537414e-05, "loss": 0.8201, "step": 14530 }, { "epoch": 0.958992200768381, "grad_norm": 0.580334484577179, "learning_rate": 3.1095134106512116e-05, "loss": 0.8389, "step": 14540 }, { "epoch": 0.9596517552393359, "grad_norm": 0.507512092590332, "learning_rate": 3.106721984259874e-05, "loss": 0.8326, "step": 14550 }, { "epoch": 0.9603113097102907, "grad_norm": 0.46313729882240295, "learning_rate": 3.103929753878754e-05, "loss": 0.8121, "step": 14560 }, { "epoch": 0.9609708641812456, "grad_norm": 0.5425809025764465, "learning_rate": 3.101136723207938e-05, "loss": 0.8336, "step": 14570 }, { "epoch": 0.9616304186522004, "grad_norm": 0.5370546579360962, "learning_rate": 3.098342895948574e-05, "loss": 0.8395, "step": 14580 }, { "epoch": 0.9622899731231553, "grad_norm": 0.5421480536460876, "learning_rate": 3.095548275802867e-05, "loss": 0.8375, "step": 14590 }, { "epoch": 0.9629495275941102, "grad_norm": 0.5985087752342224, "learning_rate": 3.092752866474071e-05, "loss": 0.8259, "step": 14600 }, { "epoch": 0.963609082065065, "grad_norm": 0.5083304047584534, "learning_rate": 3.089956671666488e-05, "loss": 0.8183, "step": 14610 }, { "epoch": 0.9642686365360199, "grad_norm": 0.5227060317993164, "learning_rate": 3.0871596950854575e-05, "loss": 0.8691, "step": 14620 }, { "epoch": 0.9649281910069748, "grad_norm": 0.47051557898521423, "learning_rate": 3.084361940437357e-05, "loss": 0.7931, "step": 14630 }, { "epoch": 0.9655877454779297, "grad_norm": 0.5483131408691406, "learning_rate": 3.081563411429596e-05, "loss": 0.8439, "step": 14640 }, { "epoch": 0.9662472999488845, "grad_norm": 0.495633989572525, "learning_rate": 3.078764111770609e-05, "loss": 0.8545, "step": 14650 }, { "epoch": 0.9669068544198394, "grad_norm": 0.5021789073944092, "learning_rate": 3.075964045169849e-05, "loss": 0.8494, "step": 14660 }, { "epoch": 0.9675664088907943, "grad_norm": 0.5027854442596436, "learning_rate": 3.073163215337792e-05, "loss": 0.8507, "step": 14670 }, { "epoch": 0.9682259633617492, "grad_norm": 0.5345577597618103, "learning_rate": 3.0703616259859184e-05, "loss": 0.8456, "step": 14680 }, { "epoch": 0.968885517832704, "grad_norm": 0.5073565244674683, "learning_rate": 3.0675592808267197e-05, "loss": 0.8598, "step": 14690 }, { "epoch": 0.9695450723036588, "grad_norm": 0.5810648202896118, "learning_rate": 3.064756183573687e-05, "loss": 0.785, "step": 14700 }, { "epoch": 0.9702046267746137, "grad_norm": 0.5641012787818909, "learning_rate": 3.061952337941307e-05, "loss": 0.801, "step": 14710 }, { "epoch": 0.9708641812455686, "grad_norm": 0.524024486541748, "learning_rate": 3.059147747645062e-05, "loss": 0.8343, "step": 14720 }, { "epoch": 0.9715237357165235, "grad_norm": 0.5879754424095154, "learning_rate": 3.0563424164014166e-05, "loss": 0.7986, "step": 14730 }, { "epoch": 0.9721832901874784, "grad_norm": 0.49657177925109863, "learning_rate": 3.053536347927819e-05, "loss": 0.8507, "step": 14740 }, { "epoch": 0.9728428446584332, "grad_norm": 0.5545039772987366, "learning_rate": 3.050729545942695e-05, "loss": 0.8096, "step": 14750 }, { "epoch": 0.9735023991293881, "grad_norm": 0.482943058013916, "learning_rate": 3.0479220141654425e-05, "loss": 0.832, "step": 14760 }, { "epoch": 0.974161953600343, "grad_norm": 0.5274185538291931, "learning_rate": 3.0451137563164263e-05, "loss": 0.8248, "step": 14770 }, { "epoch": 0.9748215080712979, "grad_norm": 0.50125652551651, "learning_rate": 3.042304776116971e-05, "loss": 0.7765, "step": 14780 }, { "epoch": 0.9754810625422528, "grad_norm": 0.507879376411438, "learning_rate": 3.039495077289362e-05, "loss": 0.8509, "step": 14790 }, { "epoch": 0.9761406170132075, "grad_norm": 0.5121268630027771, "learning_rate": 3.0366846635568347e-05, "loss": 0.7931, "step": 14800 }, { "epoch": 0.9768001714841624, "grad_norm": 0.5252723693847656, "learning_rate": 3.0338735386435723e-05, "loss": 0.8121, "step": 14810 }, { "epoch": 0.9774597259551173, "grad_norm": 0.5790934562683105, "learning_rate": 3.031061706274701e-05, "loss": 0.8163, "step": 14820 }, { "epoch": 0.9781192804260722, "grad_norm": 0.5557512044906616, "learning_rate": 3.0282491701762845e-05, "loss": 0.8722, "step": 14830 }, { "epoch": 0.9787788348970271, "grad_norm": 0.5133851170539856, "learning_rate": 3.025435934075317e-05, "loss": 0.7813, "step": 14840 }, { "epoch": 0.9794383893679819, "grad_norm": 0.5220972299575806, "learning_rate": 3.0226220016997237e-05, "loss": 0.8075, "step": 14850 }, { "epoch": 0.9800979438389368, "grad_norm": 0.5013805627822876, "learning_rate": 3.0198073767783495e-05, "loss": 0.8391, "step": 14860 }, { "epoch": 0.9807574983098917, "grad_norm": 0.5171005129814148, "learning_rate": 3.0169920630409586e-05, "loss": 0.8054, "step": 14870 }, { "epoch": 0.9814170527808466, "grad_norm": 0.5466331243515015, "learning_rate": 3.0141760642182275e-05, "loss": 0.863, "step": 14880 }, { "epoch": 0.9820766072518015, "grad_norm": 0.48877766728401184, "learning_rate": 3.011359384041741e-05, "loss": 0.8445, "step": 14890 }, { "epoch": 0.9827361617227562, "grad_norm": 0.5366662740707397, "learning_rate": 3.0085420262439863e-05, "loss": 0.8236, "step": 14900 }, { "epoch": 0.9833957161937111, "grad_norm": 0.5145524740219116, "learning_rate": 3.0057239945583476e-05, "loss": 0.85, "step": 14910 }, { "epoch": 0.984055270664666, "grad_norm": 0.49465933442115784, "learning_rate": 3.0029052927191042e-05, "loss": 0.7925, "step": 14920 }, { "epoch": 0.9847148251356209, "grad_norm": 0.5308068990707397, "learning_rate": 3.0000859244614222e-05, "loss": 0.843, "step": 14930 }, { "epoch": 0.9853743796065758, "grad_norm": 0.4998134970664978, "learning_rate": 2.9972658935213506e-05, "loss": 0.8538, "step": 14940 }, { "epoch": 0.9860339340775306, "grad_norm": 0.507693350315094, "learning_rate": 2.9944452036358167e-05, "loss": 0.8029, "step": 14950 }, { "epoch": 0.9866934885484855, "grad_norm": 0.6143972873687744, "learning_rate": 2.9916238585426216e-05, "loss": 0.845, "step": 14960 }, { "epoch": 0.9873530430194404, "grad_norm": 0.5952014923095703, "learning_rate": 2.9888018619804336e-05, "loss": 0.8529, "step": 14970 }, { "epoch": 0.9880125974903953, "grad_norm": 0.528600811958313, "learning_rate": 2.985979217688786e-05, "loss": 0.7844, "step": 14980 }, { "epoch": 0.98867215196135, "grad_norm": 0.4936237633228302, "learning_rate": 2.983155929408069e-05, "loss": 0.7997, "step": 14990 }, { "epoch": 0.989331706432305, "grad_norm": 0.5711715221405029, "learning_rate": 2.9803320008795266e-05, "loss": 0.8221, "step": 15000 }, { "epoch": 0.9899912609032598, "grad_norm": 0.48582160472869873, "learning_rate": 2.9775074358452503e-05, "loss": 0.8028, "step": 15010 }, { "epoch": 0.9906508153742147, "grad_norm": 0.48469483852386475, "learning_rate": 2.9746822380481763e-05, "loss": 0.8425, "step": 15020 }, { "epoch": 0.9913103698451696, "grad_norm": 0.5462239384651184, "learning_rate": 2.9718564112320796e-05, "loss": 0.8491, "step": 15030 }, { "epoch": 0.9919699243161244, "grad_norm": 0.5127155184745789, "learning_rate": 2.969029959141567e-05, "loss": 0.8045, "step": 15040 }, { "epoch": 0.9926294787870793, "grad_norm": 0.5171392560005188, "learning_rate": 2.966202885522076e-05, "loss": 0.8607, "step": 15050 }, { "epoch": 0.9932890332580342, "grad_norm": 0.5148513317108154, "learning_rate": 2.9633751941198657e-05, "loss": 0.803, "step": 15060 }, { "epoch": 0.9939485877289891, "grad_norm": 0.5123173594474792, "learning_rate": 2.9605468886820147e-05, "loss": 0.8307, "step": 15070 }, { "epoch": 0.994608142199944, "grad_norm": 0.5569455027580261, "learning_rate": 2.9577179729564163e-05, "loss": 0.8174, "step": 15080 }, { "epoch": 0.9952676966708988, "grad_norm": 0.534589409828186, "learning_rate": 2.9548884506917706e-05, "loss": 0.8208, "step": 15090 }, { "epoch": 0.9959272511418537, "grad_norm": 0.6001644730567932, "learning_rate": 2.9520583256375823e-05, "loss": 0.8314, "step": 15100 }, { "epoch": 0.9965868056128085, "grad_norm": 0.5668461918830872, "learning_rate": 2.9492276015441556e-05, "loss": 0.8692, "step": 15110 }, { "epoch": 0.9972463600837634, "grad_norm": 0.5646265149116516, "learning_rate": 2.9463962821625883e-05, "loss": 0.8541, "step": 15120 }, { "epoch": 0.9979059145547183, "grad_norm": 0.450910359621048, "learning_rate": 2.943564371244766e-05, "loss": 0.7934, "step": 15130 }, { "epoch": 0.9985654690256731, "grad_norm": 0.5016583204269409, "learning_rate": 2.9407318725433587e-05, "loss": 0.8428, "step": 15140 }, { "epoch": 0.999225023496628, "grad_norm": 0.6422037482261658, "learning_rate": 2.9378987898118154e-05, "loss": 0.792, "step": 15150 }, { "epoch": 0.9998845779675829, "grad_norm": 0.4914742112159729, "learning_rate": 2.935065126804359e-05, "loss": 0.849, "step": 15160 }, { "epoch": 1.0005276435767638, "grad_norm": 0.48679330945014954, "learning_rate": 2.932230887275982e-05, "loss": 0.8407, "step": 15170 }, { "epoch": 1.0011871980477187, "grad_norm": 0.582068145275116, "learning_rate": 2.929396074982439e-05, "loss": 0.8316, "step": 15180 }, { "epoch": 1.0018467525186736, "grad_norm": 0.540686845779419, "learning_rate": 2.9265606936802457e-05, "loss": 0.8298, "step": 15190 }, { "epoch": 1.0025063069896285, "grad_norm": 0.5291965007781982, "learning_rate": 2.923724747126671e-05, "loss": 0.8475, "step": 15200 }, { "epoch": 1.0031658614605834, "grad_norm": 0.5179721713066101, "learning_rate": 2.9208882390797327e-05, "loss": 0.8287, "step": 15210 }, { "epoch": 1.0038254159315383, "grad_norm": 0.5664460062980652, "learning_rate": 2.918051173298192e-05, "loss": 0.7661, "step": 15220 }, { "epoch": 1.0044849704024932, "grad_norm": 0.5750847458839417, "learning_rate": 2.9152135535415513e-05, "loss": 0.8322, "step": 15230 }, { "epoch": 1.005144524873448, "grad_norm": 0.5466136932373047, "learning_rate": 2.912375383570044e-05, "loss": 0.8145, "step": 15240 }, { "epoch": 1.0058040793444027, "grad_norm": 0.4758152961730957, "learning_rate": 2.9095366671446354e-05, "loss": 0.8167, "step": 15250 }, { "epoch": 1.0064636338153576, "grad_norm": 0.522501528263092, "learning_rate": 2.906697408027013e-05, "loss": 0.8623, "step": 15260 }, { "epoch": 1.0071231882863125, "grad_norm": 0.5495953559875488, "learning_rate": 2.9038576099795854e-05, "loss": 0.8087, "step": 15270 }, { "epoch": 1.0077827427572674, "grad_norm": 0.5437273979187012, "learning_rate": 2.901017276765474e-05, "loss": 0.8211, "step": 15280 }, { "epoch": 1.0084422972282223, "grad_norm": 0.5142714381217957, "learning_rate": 2.898176412148509e-05, "loss": 0.774, "step": 15290 }, { "epoch": 1.0091018516991772, "grad_norm": 0.6193010210990906, "learning_rate": 2.8953350198932257e-05, "loss": 0.8004, "step": 15300 }, { "epoch": 1.009761406170132, "grad_norm": 0.5164871215820312, "learning_rate": 2.8924931037648578e-05, "loss": 0.79, "step": 15310 }, { "epoch": 1.010420960641087, "grad_norm": 0.4344189763069153, "learning_rate": 2.8896506675293344e-05, "loss": 0.8296, "step": 15320 }, { "epoch": 1.0110805151120419, "grad_norm": 0.5434548854827881, "learning_rate": 2.8868077149532723e-05, "loss": 0.812, "step": 15330 }, { "epoch": 1.0117400695829968, "grad_norm": 0.5497419238090515, "learning_rate": 2.8839642498039736e-05, "loss": 0.8377, "step": 15340 }, { "epoch": 1.0123996240539515, "grad_norm": 0.6154009103775024, "learning_rate": 2.8811202758494204e-05, "loss": 0.7947, "step": 15350 }, { "epoch": 1.0130591785249063, "grad_norm": 0.5419930219650269, "learning_rate": 2.8782757968582664e-05, "loss": 0.8418, "step": 15360 }, { "epoch": 1.0137187329958612, "grad_norm": 0.469460666179657, "learning_rate": 2.875430816599837e-05, "loss": 0.8352, "step": 15370 }, { "epoch": 1.0143782874668161, "grad_norm": 0.5413352251052856, "learning_rate": 2.872585338844121e-05, "loss": 0.8183, "step": 15380 }, { "epoch": 1.015037841937771, "grad_norm": 0.4633040726184845, "learning_rate": 2.8697393673617657e-05, "loss": 0.7765, "step": 15390 }, { "epoch": 1.015697396408726, "grad_norm": 0.565757691860199, "learning_rate": 2.866892905924074e-05, "loss": 0.8224, "step": 15400 }, { "epoch": 1.0163569508796808, "grad_norm": 0.5182371139526367, "learning_rate": 2.8640459583029965e-05, "loss": 0.8232, "step": 15410 }, { "epoch": 1.0170165053506357, "grad_norm": 0.5305317640304565, "learning_rate": 2.8611985282711306e-05, "loss": 0.801, "step": 15420 }, { "epoch": 1.0176760598215906, "grad_norm": 0.5024542212486267, "learning_rate": 2.8583506196017107e-05, "loss": 0.8176, "step": 15430 }, { "epoch": 1.0183356142925455, "grad_norm": 0.5164063572883606, "learning_rate": 2.855502236068605e-05, "loss": 0.7778, "step": 15440 }, { "epoch": 1.0189951687635002, "grad_norm": 0.5681871771812439, "learning_rate": 2.8526533814463137e-05, "loss": 0.7978, "step": 15450 }, { "epoch": 1.019654723234455, "grad_norm": 0.4896976947784424, "learning_rate": 2.8498040595099585e-05, "loss": 0.8054, "step": 15460 }, { "epoch": 1.02031427770541, "grad_norm": 0.5637657046318054, "learning_rate": 2.8469542740352818e-05, "loss": 0.7965, "step": 15470 }, { "epoch": 1.0209738321763648, "grad_norm": 0.5734666585922241, "learning_rate": 2.8441040287986398e-05, "loss": 0.815, "step": 15480 }, { "epoch": 1.0216333866473197, "grad_norm": 0.5376142859458923, "learning_rate": 2.8412533275769987e-05, "loss": 0.7993, "step": 15490 }, { "epoch": 1.0222929411182746, "grad_norm": 0.47688788175582886, "learning_rate": 2.8384021741479285e-05, "loss": 0.8396, "step": 15500 }, { "epoch": 1.0229524955892295, "grad_norm": 0.5853601098060608, "learning_rate": 2.8355505722895963e-05, "loss": 0.8377, "step": 15510 }, { "epoch": 1.0236120500601844, "grad_norm": 0.46929389238357544, "learning_rate": 2.8326985257807675e-05, "loss": 0.832, "step": 15520 }, { "epoch": 1.0242716045311393, "grad_norm": 0.5821115374565125, "learning_rate": 2.829846038400794e-05, "loss": 0.8017, "step": 15530 }, { "epoch": 1.0249311590020942, "grad_norm": 0.5275147557258606, "learning_rate": 2.826993113929612e-05, "loss": 0.7748, "step": 15540 }, { "epoch": 1.0255907134730489, "grad_norm": 0.4697074294090271, "learning_rate": 2.8241397561477377e-05, "loss": 0.8377, "step": 15550 }, { "epoch": 1.0262502679440038, "grad_norm": 0.5319390892982483, "learning_rate": 2.8212859688362612e-05, "loss": 0.8135, "step": 15560 }, { "epoch": 1.0269098224149587, "grad_norm": 0.5258188843727112, "learning_rate": 2.818431755776843e-05, "loss": 0.8006, "step": 15570 }, { "epoch": 1.0275693768859135, "grad_norm": 0.5168179869651794, "learning_rate": 2.8155771207517044e-05, "loss": 0.8035, "step": 15580 }, { "epoch": 1.0282289313568684, "grad_norm": 0.5735469460487366, "learning_rate": 2.8127220675436305e-05, "loss": 0.7609, "step": 15590 }, { "epoch": 1.0288884858278233, "grad_norm": 0.5791534781455994, "learning_rate": 2.8098665999359558e-05, "loss": 0.8124, "step": 15600 }, { "epoch": 1.0295480402987782, "grad_norm": 0.57262122631073, "learning_rate": 2.8070107217125674e-05, "loss": 0.8088, "step": 15610 }, { "epoch": 1.0302075947697331, "grad_norm": 0.5607044100761414, "learning_rate": 2.804154436657895e-05, "loss": 0.7969, "step": 15620 }, { "epoch": 1.030867149240688, "grad_norm": 0.5467885136604309, "learning_rate": 2.8012977485569082e-05, "loss": 0.8088, "step": 15630 }, { "epoch": 1.0315267037116427, "grad_norm": 0.46850356459617615, "learning_rate": 2.79844066119511e-05, "loss": 0.8295, "step": 15640 }, { "epoch": 1.0321862581825976, "grad_norm": 0.5874578952789307, "learning_rate": 2.795583178358533e-05, "loss": 0.8339, "step": 15650 }, { "epoch": 1.0328458126535525, "grad_norm": 0.5937914848327637, "learning_rate": 2.7927253038337335e-05, "loss": 0.8057, "step": 15660 }, { "epoch": 1.0335053671245074, "grad_norm": 0.5366748571395874, "learning_rate": 2.7898670414077866e-05, "loss": 0.8177, "step": 15670 }, { "epoch": 1.0341649215954622, "grad_norm": 0.47821250557899475, "learning_rate": 2.787008394868282e-05, "loss": 0.8155, "step": 15680 }, { "epoch": 1.0348244760664171, "grad_norm": 0.5660244226455688, "learning_rate": 2.7841493680033177e-05, "loss": 0.7954, "step": 15690 }, { "epoch": 1.035484030537372, "grad_norm": 0.6502612233161926, "learning_rate": 2.7812899646014966e-05, "loss": 0.7861, "step": 15700 }, { "epoch": 1.036143585008327, "grad_norm": 0.5137171149253845, "learning_rate": 2.7784301884519204e-05, "loss": 0.8112, "step": 15710 }, { "epoch": 1.0368031394792818, "grad_norm": 0.5020664930343628, "learning_rate": 2.775570043344184e-05, "loss": 0.8135, "step": 15720 }, { "epoch": 1.0374626939502367, "grad_norm": 0.5886096954345703, "learning_rate": 2.7727095330683716e-05, "loss": 0.8039, "step": 15730 }, { "epoch": 1.0381222484211914, "grad_norm": 0.5411577820777893, "learning_rate": 2.7698486614150516e-05, "loss": 0.8122, "step": 15740 }, { "epoch": 1.0387818028921463, "grad_norm": 0.5698654055595398, "learning_rate": 2.7669874321752715e-05, "loss": 0.8299, "step": 15750 }, { "epoch": 1.0394413573631012, "grad_norm": 0.5780178308486938, "learning_rate": 2.7641258491405508e-05, "loss": 0.7836, "step": 15760 }, { "epoch": 1.040100911834056, "grad_norm": 0.4700949490070343, "learning_rate": 2.7612639161028804e-05, "loss": 0.8376, "step": 15770 }, { "epoch": 1.040760466305011, "grad_norm": 0.5386998057365417, "learning_rate": 2.7584016368547135e-05, "loss": 0.8318, "step": 15780 }, { "epoch": 1.0414200207759658, "grad_norm": 0.5147479772567749, "learning_rate": 2.7555390151889626e-05, "loss": 0.8306, "step": 15790 }, { "epoch": 1.0420795752469207, "grad_norm": 0.4795542061328888, "learning_rate": 2.7526760548989933e-05, "loss": 0.8083, "step": 15800 }, { "epoch": 1.0427391297178756, "grad_norm": 0.5005327463150024, "learning_rate": 2.7498127597786212e-05, "loss": 0.7766, "step": 15810 }, { "epoch": 1.0433986841888305, "grad_norm": 0.5824893116950989, "learning_rate": 2.746949133622103e-05, "loss": 0.8, "step": 15820 }, { "epoch": 1.0440582386597854, "grad_norm": 0.5469245314598083, "learning_rate": 2.7440851802241373e-05, "loss": 0.8154, "step": 15830 }, { "epoch": 1.04471779313074, "grad_norm": 0.6703712940216064, "learning_rate": 2.7412209033798552e-05, "loss": 0.8327, "step": 15840 }, { "epoch": 1.045377347601695, "grad_norm": 0.5922505259513855, "learning_rate": 2.738356306884815e-05, "loss": 0.8196, "step": 15850 }, { "epoch": 1.0460369020726499, "grad_norm": 0.5021489262580872, "learning_rate": 2.7354913945350006e-05, "loss": 0.7733, "step": 15860 }, { "epoch": 1.0466964565436048, "grad_norm": 0.5870568156242371, "learning_rate": 2.7326261701268124e-05, "loss": 0.8064, "step": 15870 }, { "epoch": 1.0473560110145597, "grad_norm": 0.5342341661453247, "learning_rate": 2.7297606374570665e-05, "loss": 0.7878, "step": 15880 }, { "epoch": 1.0480155654855146, "grad_norm": 0.5717883706092834, "learning_rate": 2.726894800322986e-05, "loss": 0.8263, "step": 15890 }, { "epoch": 1.0486751199564694, "grad_norm": 0.5930851697921753, "learning_rate": 2.7240286625221984e-05, "loss": 0.8175, "step": 15900 }, { "epoch": 1.0493346744274243, "grad_norm": 0.555741012096405, "learning_rate": 2.7211622278527278e-05, "loss": 0.8492, "step": 15910 }, { "epoch": 1.0499942288983792, "grad_norm": 0.5682945251464844, "learning_rate": 2.7182955001129932e-05, "loss": 0.8579, "step": 15920 }, { "epoch": 1.050653783369334, "grad_norm": 0.6149784922599792, "learning_rate": 2.7154284831018036e-05, "loss": 0.8437, "step": 15930 }, { "epoch": 1.0513133378402888, "grad_norm": 0.5615754723548889, "learning_rate": 2.7125611806183472e-05, "loss": 0.834, "step": 15940 }, { "epoch": 1.0519728923112437, "grad_norm": 0.5518723726272583, "learning_rate": 2.709693596462195e-05, "loss": 0.7846, "step": 15950 }, { "epoch": 1.0526324467821986, "grad_norm": 0.521699070930481, "learning_rate": 2.7068257344332864e-05, "loss": 0.787, "step": 15960 }, { "epoch": 1.0532920012531535, "grad_norm": 0.5240887999534607, "learning_rate": 2.703957598331934e-05, "loss": 0.7999, "step": 15970 }, { "epoch": 1.0539515557241084, "grad_norm": 0.5104392170906067, "learning_rate": 2.7010891919588103e-05, "loss": 0.8562, "step": 15980 }, { "epoch": 1.0546111101950633, "grad_norm": 0.5361871719360352, "learning_rate": 2.6982205191149462e-05, "loss": 0.8085, "step": 15990 }, { "epoch": 1.0552706646660182, "grad_norm": 0.5302392244338989, "learning_rate": 2.6953515836017273e-05, "loss": 0.8436, "step": 16000 }, { "epoch": 1.055930219136973, "grad_norm": 0.5710680484771729, "learning_rate": 2.6924823892208866e-05, "loss": 0.7765, "step": 16010 }, { "epoch": 1.056589773607928, "grad_norm": 0.5633246898651123, "learning_rate": 2.689612939774499e-05, "loss": 0.8259, "step": 16020 }, { "epoch": 1.0572493280788826, "grad_norm": 0.5586814284324646, "learning_rate": 2.686743239064979e-05, "loss": 0.8446, "step": 16030 }, { "epoch": 1.0579088825498375, "grad_norm": 0.6109247207641602, "learning_rate": 2.6838732908950732e-05, "loss": 0.788, "step": 16040 }, { "epoch": 1.0585684370207924, "grad_norm": 0.5148352384567261, "learning_rate": 2.6810030990678552e-05, "loss": 0.8345, "step": 16050 }, { "epoch": 1.0592279914917473, "grad_norm": 0.5439609289169312, "learning_rate": 2.678132667386724e-05, "loss": 0.8561, "step": 16060 }, { "epoch": 1.0598875459627022, "grad_norm": 0.6023163199424744, "learning_rate": 2.675261999655394e-05, "loss": 0.8097, "step": 16070 }, { "epoch": 1.060547100433657, "grad_norm": 0.5256965160369873, "learning_rate": 2.672391099677895e-05, "loss": 0.775, "step": 16080 }, { "epoch": 1.061206654904612, "grad_norm": 0.5441160798072815, "learning_rate": 2.6695199712585605e-05, "loss": 0.8455, "step": 16090 }, { "epoch": 1.0618662093755669, "grad_norm": 0.5260411500930786, "learning_rate": 2.6666486182020317e-05, "loss": 0.7795, "step": 16100 }, { "epoch": 1.0625257638465218, "grad_norm": 0.473296195268631, "learning_rate": 2.6637770443132427e-05, "loss": 0.8121, "step": 16110 }, { "epoch": 1.0631853183174766, "grad_norm": 0.6172069311141968, "learning_rate": 2.6609052533974233e-05, "loss": 0.828, "step": 16120 }, { "epoch": 1.0638448727884313, "grad_norm": 0.5585634708404541, "learning_rate": 2.6580332492600905e-05, "loss": 0.7759, "step": 16130 }, { "epoch": 1.0645044272593862, "grad_norm": 0.5809791088104248, "learning_rate": 2.6551610357070434e-05, "loss": 0.8133, "step": 16140 }, { "epoch": 1.065163981730341, "grad_norm": 0.5252553820610046, "learning_rate": 2.6522886165443578e-05, "loss": 0.7999, "step": 16150 }, { "epoch": 1.065823536201296, "grad_norm": 0.5203016400337219, "learning_rate": 2.649415995578383e-05, "loss": 0.822, "step": 16160 }, { "epoch": 1.0664830906722509, "grad_norm": 0.5954844355583191, "learning_rate": 2.646543176615737e-05, "loss": 0.8165, "step": 16170 }, { "epoch": 1.0671426451432058, "grad_norm": 0.5122602581977844, "learning_rate": 2.6436701634632964e-05, "loss": 0.7921, "step": 16180 }, { "epoch": 1.0678021996141607, "grad_norm": 0.623115599155426, "learning_rate": 2.640796959928199e-05, "loss": 0.8371, "step": 16190 }, { "epoch": 1.0684617540851156, "grad_norm": 0.48406779766082764, "learning_rate": 2.6379235698178324e-05, "loss": 0.8037, "step": 16200 }, { "epoch": 1.0691213085560705, "grad_norm": 0.5848739743232727, "learning_rate": 2.6350499969398324e-05, "loss": 0.8351, "step": 16210 }, { "epoch": 1.0697808630270251, "grad_norm": 0.5309697985649109, "learning_rate": 2.632176245102077e-05, "loss": 0.8565, "step": 16220 }, { "epoch": 1.07044041749798, "grad_norm": 0.5303778648376465, "learning_rate": 2.629302318112682e-05, "loss": 0.7997, "step": 16230 }, { "epoch": 1.071099971968935, "grad_norm": 0.6161729097366333, "learning_rate": 2.626428219779994e-05, "loss": 0.8235, "step": 16240 }, { "epoch": 1.0717595264398898, "grad_norm": 0.5845171213150024, "learning_rate": 2.6235539539125862e-05, "loss": 0.8098, "step": 16250 }, { "epoch": 1.0724190809108447, "grad_norm": 0.5690910220146179, "learning_rate": 2.6206795243192565e-05, "loss": 0.8089, "step": 16260 }, { "epoch": 1.0730786353817996, "grad_norm": 0.571356475353241, "learning_rate": 2.6178049348090168e-05, "loss": 0.8165, "step": 16270 }, { "epoch": 1.0737381898527545, "grad_norm": 0.5281323194503784, "learning_rate": 2.6149301891910928e-05, "loss": 0.7906, "step": 16280 }, { "epoch": 1.0743977443237094, "grad_norm": 0.5516245365142822, "learning_rate": 2.612055291274916e-05, "loss": 0.8007, "step": 16290 }, { "epoch": 1.0750572987946643, "grad_norm": 0.50286465883255, "learning_rate": 2.6091802448701215e-05, "loss": 0.8348, "step": 16300 }, { "epoch": 1.0757168532656192, "grad_norm": 0.604536771774292, "learning_rate": 2.6063050537865374e-05, "loss": 0.8203, "step": 16310 }, { "epoch": 1.076376407736574, "grad_norm": 0.5543378591537476, "learning_rate": 2.603429721834188e-05, "loss": 0.8479, "step": 16320 }, { "epoch": 1.0770359622075287, "grad_norm": 0.5020339488983154, "learning_rate": 2.6005542528232818e-05, "loss": 0.7891, "step": 16330 }, { "epoch": 1.0776955166784836, "grad_norm": 0.5786985158920288, "learning_rate": 2.5976786505642088e-05, "loss": 0.8337, "step": 16340 }, { "epoch": 1.0783550711494385, "grad_norm": 0.5452788472175598, "learning_rate": 2.5948029188675365e-05, "loss": 0.8378, "step": 16350 }, { "epoch": 1.0790146256203934, "grad_norm": 0.610916018486023, "learning_rate": 2.5919270615440034e-05, "loss": 0.7981, "step": 16360 }, { "epoch": 1.0796741800913483, "grad_norm": 0.6328546404838562, "learning_rate": 2.5890510824045143e-05, "loss": 0.7947, "step": 16370 }, { "epoch": 1.0803337345623032, "grad_norm": 0.5229448676109314, "learning_rate": 2.5861749852601363e-05, "loss": 0.8286, "step": 16380 }, { "epoch": 1.080993289033258, "grad_norm": 0.5723561644554138, "learning_rate": 2.5832987739220926e-05, "loss": 0.8037, "step": 16390 }, { "epoch": 1.081652843504213, "grad_norm": 0.5695706009864807, "learning_rate": 2.580422452201756e-05, "loss": 0.807, "step": 16400 }, { "epoch": 1.0823123979751679, "grad_norm": 0.5206248164176941, "learning_rate": 2.5775460239106493e-05, "loss": 0.7963, "step": 16410 }, { "epoch": 1.0829719524461225, "grad_norm": 0.4780265986919403, "learning_rate": 2.5746694928604316e-05, "loss": 0.8223, "step": 16420 }, { "epoch": 1.0836315069170774, "grad_norm": 0.5492380857467651, "learning_rate": 2.571792862862902e-05, "loss": 0.8205, "step": 16430 }, { "epoch": 1.0842910613880323, "grad_norm": 0.5519129633903503, "learning_rate": 2.568916137729989e-05, "loss": 0.7784, "step": 16440 }, { "epoch": 1.0849506158589872, "grad_norm": 0.509762167930603, "learning_rate": 2.5660393212737495e-05, "loss": 0.803, "step": 16450 }, { "epoch": 1.0856101703299421, "grad_norm": 0.5750563740730286, "learning_rate": 2.563162417306358e-05, "loss": 0.8163, "step": 16460 }, { "epoch": 1.086269724800897, "grad_norm": 0.6123660802841187, "learning_rate": 2.5602854296401056e-05, "loss": 0.7731, "step": 16470 }, { "epoch": 1.086929279271852, "grad_norm": 0.5463810563087463, "learning_rate": 2.5574083620873978e-05, "loss": 0.8075, "step": 16480 }, { "epoch": 1.0875888337428068, "grad_norm": 0.5543496012687683, "learning_rate": 2.5545312184607416e-05, "loss": 0.817, "step": 16490 }, { "epoch": 1.0882483882137617, "grad_norm": 0.5343895554542542, "learning_rate": 2.551654002572747e-05, "loss": 0.8106, "step": 16500 }, { "epoch": 1.0889079426847164, "grad_norm": 0.6146511435508728, "learning_rate": 2.5487767182361193e-05, "loss": 0.8039, "step": 16510 }, { "epoch": 1.0895674971556712, "grad_norm": 0.6023949384689331, "learning_rate": 2.5458993692636553e-05, "loss": 0.8165, "step": 16520 }, { "epoch": 1.0902270516266261, "grad_norm": 0.6236518621444702, "learning_rate": 2.5430219594682365e-05, "loss": 0.8269, "step": 16530 }, { "epoch": 1.090886606097581, "grad_norm": 0.4938434064388275, "learning_rate": 2.5401444926628248e-05, "loss": 0.8249, "step": 16540 }, { "epoch": 1.091546160568536, "grad_norm": 0.6218588948249817, "learning_rate": 2.5372669726604585e-05, "loss": 0.8198, "step": 16550 }, { "epoch": 1.0922057150394908, "grad_norm": 0.5304175615310669, "learning_rate": 2.5343894032742448e-05, "loss": 0.8154, "step": 16560 }, { "epoch": 1.0928652695104457, "grad_norm": 0.5762113928794861, "learning_rate": 2.53151178831736e-05, "loss": 0.8324, "step": 16570 }, { "epoch": 1.0935248239814006, "grad_norm": 0.6506617665290833, "learning_rate": 2.528634131603036e-05, "loss": 0.7989, "step": 16580 }, { "epoch": 1.0941843784523555, "grad_norm": 0.5646471977233887, "learning_rate": 2.5257564369445626e-05, "loss": 0.8396, "step": 16590 }, { "epoch": 1.0948439329233104, "grad_norm": 0.5536471009254456, "learning_rate": 2.5228787081552806e-05, "loss": 0.814, "step": 16600 }, { "epoch": 1.0955034873942653, "grad_norm": 0.49897870421409607, "learning_rate": 2.520000949048574e-05, "loss": 0.7905, "step": 16610 }, { "epoch": 1.09616304186522, "grad_norm": 0.4896376430988312, "learning_rate": 2.5171231634378707e-05, "loss": 0.8267, "step": 16620 }, { "epoch": 1.0968225963361748, "grad_norm": 0.5398492217063904, "learning_rate": 2.5142453551366275e-05, "loss": 0.8251, "step": 16630 }, { "epoch": 1.0974821508071297, "grad_norm": 0.5313453078269958, "learning_rate": 2.5113675279583365e-05, "loss": 0.807, "step": 16640 }, { "epoch": 1.0981417052780846, "grad_norm": 0.6137615442276001, "learning_rate": 2.508489685716513e-05, "loss": 0.777, "step": 16650 }, { "epoch": 1.0988012597490395, "grad_norm": 0.522820770740509, "learning_rate": 2.5056118322246924e-05, "loss": 0.7904, "step": 16660 }, { "epoch": 1.0994608142199944, "grad_norm": 0.6334253549575806, "learning_rate": 2.5027339712964247e-05, "loss": 0.8444, "step": 16670 }, { "epoch": 1.1001203686909493, "grad_norm": 0.5604522824287415, "learning_rate": 2.499856106745271e-05, "loss": 0.8036, "step": 16680 }, { "epoch": 1.1007799231619042, "grad_norm": 0.5039092302322388, "learning_rate": 2.496978242384795e-05, "loss": 0.8408, "step": 16690 }, { "epoch": 1.101439477632859, "grad_norm": 0.5355373620986938, "learning_rate": 2.494100382028562e-05, "loss": 0.8177, "step": 16700 }, { "epoch": 1.1020990321038138, "grad_norm": 0.4862341582775116, "learning_rate": 2.4912225294901314e-05, "loss": 0.7895, "step": 16710 }, { "epoch": 1.1027585865747687, "grad_norm": 0.5653737783432007, "learning_rate": 2.4883446885830528e-05, "loss": 0.9132, "step": 16720 }, { "epoch": 1.1034181410457236, "grad_norm": 0.49577951431274414, "learning_rate": 2.485466863120859e-05, "loss": 0.8172, "step": 16730 }, { "epoch": 1.1040776955166784, "grad_norm": 0.550129771232605, "learning_rate": 2.4825890569170628e-05, "loss": 0.842, "step": 16740 }, { "epoch": 1.1047372499876333, "grad_norm": 0.5835773944854736, "learning_rate": 2.4797112737851528e-05, "loss": 0.8226, "step": 16750 }, { "epoch": 1.1053968044585882, "grad_norm": 0.5645239353179932, "learning_rate": 2.4768335175385867e-05, "loss": 0.8272, "step": 16760 }, { "epoch": 1.1060563589295431, "grad_norm": 0.5530699491500854, "learning_rate": 2.4739557919907842e-05, "loss": 0.8276, "step": 16770 }, { "epoch": 1.106715913400498, "grad_norm": 0.5654169321060181, "learning_rate": 2.471078100955126e-05, "loss": 0.8476, "step": 16780 }, { "epoch": 1.107375467871453, "grad_norm": 0.6649032831192017, "learning_rate": 2.468200448244949e-05, "loss": 0.7893, "step": 16790 }, { "epoch": 1.1080350223424078, "grad_norm": 0.5665352940559387, "learning_rate": 2.4653228376735362e-05, "loss": 0.7691, "step": 16800 }, { "epoch": 1.1086945768133625, "grad_norm": 0.5382755398750305, "learning_rate": 2.462445273054116e-05, "loss": 0.7964, "step": 16810 }, { "epoch": 1.1093541312843174, "grad_norm": 0.5529101490974426, "learning_rate": 2.4595677581998557e-05, "loss": 0.8219, "step": 16820 }, { "epoch": 1.1100136857552723, "grad_norm": 0.5242228507995605, "learning_rate": 2.4566902969238574e-05, "loss": 0.8117, "step": 16830 }, { "epoch": 1.1106732402262272, "grad_norm": 0.6301599740982056, "learning_rate": 2.4538128930391518e-05, "loss": 0.8044, "step": 16840 }, { "epoch": 1.111332794697182, "grad_norm": 0.5453548431396484, "learning_rate": 2.450935550358694e-05, "loss": 0.8587, "step": 16850 }, { "epoch": 1.111992349168137, "grad_norm": 0.5987652540206909, "learning_rate": 2.4480582726953553e-05, "loss": 0.805, "step": 16860 }, { "epoch": 1.1126519036390918, "grad_norm": 0.5183432102203369, "learning_rate": 2.4451810638619243e-05, "loss": 0.7719, "step": 16870 }, { "epoch": 1.1133114581100467, "grad_norm": 0.5366532206535339, "learning_rate": 2.4423039276710973e-05, "loss": 0.7863, "step": 16880 }, { "epoch": 1.1139710125810016, "grad_norm": 0.5672691464424133, "learning_rate": 2.4394268679354736e-05, "loss": 0.8007, "step": 16890 }, { "epoch": 1.1146305670519565, "grad_norm": 0.5237433314323425, "learning_rate": 2.4365498884675515e-05, "loss": 0.7789, "step": 16900 }, { "epoch": 1.1152901215229112, "grad_norm": 0.5496178865432739, "learning_rate": 2.4336729930797234e-05, "loss": 0.8217, "step": 16910 }, { "epoch": 1.115949675993866, "grad_norm": 0.5697668790817261, "learning_rate": 2.43079618558427e-05, "loss": 0.8014, "step": 16920 }, { "epoch": 1.116609230464821, "grad_norm": 0.6329261064529419, "learning_rate": 2.4279194697933556e-05, "loss": 0.8646, "step": 16930 }, { "epoch": 1.1172687849357759, "grad_norm": 0.5397175550460815, "learning_rate": 2.4250428495190227e-05, "loss": 0.855, "step": 16940 }, { "epoch": 1.1179283394067308, "grad_norm": 0.5569603443145752, "learning_rate": 2.4221663285731875e-05, "loss": 0.7462, "step": 16950 }, { "epoch": 1.1185878938776856, "grad_norm": 0.6040708422660828, "learning_rate": 2.4192899107676343e-05, "loss": 0.7782, "step": 16960 }, { "epoch": 1.1192474483486405, "grad_norm": 0.5611158013343811, "learning_rate": 2.4164135999140112e-05, "loss": 0.831, "step": 16970 }, { "epoch": 1.1199070028195954, "grad_norm": 0.6430832743644714, "learning_rate": 2.4135373998238254e-05, "loss": 0.8552, "step": 16980 }, { "epoch": 1.1205665572905503, "grad_norm": 0.5347371101379395, "learning_rate": 2.4106613143084335e-05, "loss": 0.8524, "step": 16990 }, { "epoch": 1.121226111761505, "grad_norm": 0.5940733551979065, "learning_rate": 2.407785347179045e-05, "loss": 0.8171, "step": 17000 }, { "epoch": 1.1218856662324599, "grad_norm": 0.6383303999900818, "learning_rate": 2.4049095022467094e-05, "loss": 0.8148, "step": 17010 }, { "epoch": 1.1225452207034148, "grad_norm": 0.5206311345100403, "learning_rate": 2.4020337833223165e-05, "loss": 0.8517, "step": 17020 }, { "epoch": 1.1232047751743697, "grad_norm": 0.5240987539291382, "learning_rate": 2.399158194216588e-05, "loss": 0.806, "step": 17030 }, { "epoch": 1.1238643296453246, "grad_norm": 0.5234224796295166, "learning_rate": 2.3962827387400724e-05, "loss": 0.8178, "step": 17040 }, { "epoch": 1.1245238841162795, "grad_norm": 0.6092124581336975, "learning_rate": 2.393407420703143e-05, "loss": 0.8218, "step": 17050 }, { "epoch": 1.1251834385872344, "grad_norm": 0.5292211771011353, "learning_rate": 2.3905322439159905e-05, "loss": 0.8425, "step": 17060 }, { "epoch": 1.1258429930581892, "grad_norm": 0.5749512314796448, "learning_rate": 2.3876572121886186e-05, "loss": 0.8278, "step": 17070 }, { "epoch": 1.1265025475291441, "grad_norm": 0.4919281005859375, "learning_rate": 2.3847823293308372e-05, "loss": 0.8333, "step": 17080 }, { "epoch": 1.1271621020000988, "grad_norm": 0.5225867033004761, "learning_rate": 2.3819075991522604e-05, "loss": 0.8135, "step": 17090 }, { "epoch": 1.127821656471054, "grad_norm": 0.5524780750274658, "learning_rate": 2.3790330254623e-05, "loss": 0.7968, "step": 17100 }, { "epoch": 1.1284812109420086, "grad_norm": 0.5935143828392029, "learning_rate": 2.3761586120701597e-05, "loss": 0.7822, "step": 17110 }, { "epoch": 1.1291407654129635, "grad_norm": 0.501223623752594, "learning_rate": 2.3732843627848327e-05, "loss": 0.7745, "step": 17120 }, { "epoch": 1.1298003198839184, "grad_norm": 0.6654418706893921, "learning_rate": 2.3704102814150912e-05, "loss": 0.7994, "step": 17130 }, { "epoch": 1.1304598743548733, "grad_norm": 0.5945008993148804, "learning_rate": 2.3675363717694878e-05, "loss": 0.8149, "step": 17140 }, { "epoch": 1.1311194288258282, "grad_norm": 0.48068150877952576, "learning_rate": 2.3646626376563468e-05, "loss": 0.8087, "step": 17150 }, { "epoch": 1.131778983296783, "grad_norm": 0.6289824843406677, "learning_rate": 2.36178908288376e-05, "loss": 0.8473, "step": 17160 }, { "epoch": 1.132438537767738, "grad_norm": 0.5678028464317322, "learning_rate": 2.358915711259581e-05, "loss": 0.7846, "step": 17170 }, { "epoch": 1.1330980922386928, "grad_norm": 0.598111093044281, "learning_rate": 2.3560425265914204e-05, "loss": 0.803, "step": 17180 }, { "epoch": 1.1337576467096477, "grad_norm": 0.6262847185134888, "learning_rate": 2.3531695326866423e-05, "loss": 0.8234, "step": 17190 }, { "epoch": 1.1344172011806024, "grad_norm": 0.510326087474823, "learning_rate": 2.3502967333523575e-05, "loss": 0.7743, "step": 17200 }, { "epoch": 1.1350767556515573, "grad_norm": 0.5270165205001831, "learning_rate": 2.3474241323954197e-05, "loss": 0.8047, "step": 17210 }, { "epoch": 1.1357363101225122, "grad_norm": 0.6041849851608276, "learning_rate": 2.3445517336224166e-05, "loss": 0.8178, "step": 17220 }, { "epoch": 1.136395864593467, "grad_norm": 0.5742443799972534, "learning_rate": 2.3416795408396717e-05, "loss": 0.8408, "step": 17230 }, { "epoch": 1.137055419064422, "grad_norm": 0.5951201319694519, "learning_rate": 2.3388075578532338e-05, "loss": 0.8082, "step": 17240 }, { "epoch": 1.1377149735353769, "grad_norm": 0.5675894021987915, "learning_rate": 2.3359357884688747e-05, "loss": 0.8107, "step": 17250 }, { "epoch": 1.1383745280063318, "grad_norm": 0.5660733580589294, "learning_rate": 2.333064236492081e-05, "loss": 0.8182, "step": 17260 }, { "epoch": 1.1390340824772867, "grad_norm": 0.5753691792488098, "learning_rate": 2.3301929057280536e-05, "loss": 0.8055, "step": 17270 }, { "epoch": 1.1396936369482416, "grad_norm": 0.5021584033966064, "learning_rate": 2.327321799981699e-05, "loss": 0.8195, "step": 17280 }, { "epoch": 1.1403531914191962, "grad_norm": 0.5370116233825684, "learning_rate": 2.3244509230576262e-05, "loss": 0.7708, "step": 17290 }, { "epoch": 1.1410127458901511, "grad_norm": 0.6154167056083679, "learning_rate": 2.321580278760141e-05, "loss": 0.82, "step": 17300 }, { "epoch": 1.141672300361106, "grad_norm": 0.5904589891433716, "learning_rate": 2.318709870893239e-05, "loss": 0.8274, "step": 17310 }, { "epoch": 1.142331854832061, "grad_norm": 0.5207957625389099, "learning_rate": 2.315839703260605e-05, "loss": 0.7682, "step": 17320 }, { "epoch": 1.1429914093030158, "grad_norm": 0.6100203394889832, "learning_rate": 2.312969779665604e-05, "loss": 0.7972, "step": 17330 }, { "epoch": 1.1436509637739707, "grad_norm": 0.5301212072372437, "learning_rate": 2.3101001039112794e-05, "loss": 0.8483, "step": 17340 }, { "epoch": 1.1443105182449256, "grad_norm": 0.5791840553283691, "learning_rate": 2.3072306798003433e-05, "loss": 0.7987, "step": 17350 }, { "epoch": 1.1449700727158805, "grad_norm": 0.5505053400993347, "learning_rate": 2.3043615111351766e-05, "loss": 0.8104, "step": 17360 }, { "epoch": 1.1456296271868354, "grad_norm": 0.5380775332450867, "learning_rate": 2.301492601717821e-05, "loss": 0.8407, "step": 17370 }, { "epoch": 1.1462891816577903, "grad_norm": 0.5672085285186768, "learning_rate": 2.298623955349975e-05, "loss": 0.7932, "step": 17380 }, { "epoch": 1.1469487361287451, "grad_norm": 0.6575245261192322, "learning_rate": 2.2957555758329874e-05, "loss": 0.8142, "step": 17390 }, { "epoch": 1.1476082905996998, "grad_norm": 0.5814910531044006, "learning_rate": 2.2928874669678548e-05, "loss": 0.8402, "step": 17400 }, { "epoch": 1.1482678450706547, "grad_norm": 0.557349681854248, "learning_rate": 2.2900196325552143e-05, "loss": 0.7956, "step": 17410 }, { "epoch": 1.1489273995416096, "grad_norm": 0.6413754820823669, "learning_rate": 2.2871520763953395e-05, "loss": 0.8004, "step": 17420 }, { "epoch": 1.1495869540125645, "grad_norm": 0.5818390846252441, "learning_rate": 2.2842848022881367e-05, "loss": 0.8082, "step": 17430 }, { "epoch": 1.1502465084835194, "grad_norm": 0.5735812783241272, "learning_rate": 2.281417814033135e-05, "loss": 0.8373, "step": 17440 }, { "epoch": 1.1509060629544743, "grad_norm": 0.6169028282165527, "learning_rate": 2.2785511154294872e-05, "loss": 0.8308, "step": 17450 }, { "epoch": 1.1515656174254292, "grad_norm": 0.5504375100135803, "learning_rate": 2.2756847102759625e-05, "loss": 0.8558, "step": 17460 }, { "epoch": 1.152225171896384, "grad_norm": 0.43717899918556213, "learning_rate": 2.2728186023709406e-05, "loss": 0.7973, "step": 17470 }, { "epoch": 1.152884726367339, "grad_norm": 0.5762133002281189, "learning_rate": 2.269952795512407e-05, "loss": 0.8107, "step": 17480 }, { "epoch": 1.1535442808382936, "grad_norm": 0.5894545912742615, "learning_rate": 2.267087293497948e-05, "loss": 0.8131, "step": 17490 }, { "epoch": 1.1542038353092485, "grad_norm": 0.5659452080726624, "learning_rate": 2.2642221001247475e-05, "loss": 0.8045, "step": 17500 }, { "epoch": 1.1548633897802034, "grad_norm": 0.5233924388885498, "learning_rate": 2.2613572191895792e-05, "loss": 0.7889, "step": 17510 }, { "epoch": 1.1555229442511583, "grad_norm": 0.5157888531684875, "learning_rate": 2.2584926544888034e-05, "loss": 0.8099, "step": 17520 }, { "epoch": 1.1561824987221132, "grad_norm": 0.5643179416656494, "learning_rate": 2.2556284098183593e-05, "loss": 0.7983, "step": 17530 }, { "epoch": 1.156842053193068, "grad_norm": 0.6216257214546204, "learning_rate": 2.252764488973765e-05, "loss": 0.8089, "step": 17540 }, { "epoch": 1.157501607664023, "grad_norm": 0.5228362083435059, "learning_rate": 2.249900895750107e-05, "loss": 0.7835, "step": 17550 }, { "epoch": 1.1581611621349779, "grad_norm": 0.5488897562026978, "learning_rate": 2.247037633942041e-05, "loss": 0.8333, "step": 17560 }, { "epoch": 1.1588207166059328, "grad_norm": 0.654600977897644, "learning_rate": 2.244174707343779e-05, "loss": 0.7936, "step": 17570 }, { "epoch": 1.1594802710768874, "grad_norm": 0.620481014251709, "learning_rate": 2.2413121197490924e-05, "loss": 0.8162, "step": 17580 }, { "epoch": 1.1601398255478423, "grad_norm": 0.5952094793319702, "learning_rate": 2.2384498749513014e-05, "loss": 0.8404, "step": 17590 }, { "epoch": 1.1607993800187972, "grad_norm": 0.5122694373130798, "learning_rate": 2.235587976743273e-05, "loss": 0.8346, "step": 17600 }, { "epoch": 1.1614589344897521, "grad_norm": 0.5466095209121704, "learning_rate": 2.2327264289174155e-05, "loss": 0.812, "step": 17610 }, { "epoch": 1.162118488960707, "grad_norm": 0.5331703424453735, "learning_rate": 2.2298652352656698e-05, "loss": 0.805, "step": 17620 }, { "epoch": 1.162778043431662, "grad_norm": 0.5187802314758301, "learning_rate": 2.2270043995795114e-05, "loss": 0.7978, "step": 17630 }, { "epoch": 1.1634375979026168, "grad_norm": 0.5421300530433655, "learning_rate": 2.2241439256499388e-05, "loss": 0.8455, "step": 17640 }, { "epoch": 1.1640971523735717, "grad_norm": 0.5664952993392944, "learning_rate": 2.2212838172674736e-05, "loss": 0.8071, "step": 17650 }, { "epoch": 1.1647567068445266, "grad_norm": 0.5498470067977905, "learning_rate": 2.218424078222149e-05, "loss": 0.7909, "step": 17660 }, { "epoch": 1.1654162613154815, "grad_norm": 0.5885456800460815, "learning_rate": 2.2155647123035124e-05, "loss": 0.7942, "step": 17670 }, { "epoch": 1.1660758157864364, "grad_norm": 0.5358192324638367, "learning_rate": 2.2127057233006152e-05, "loss": 0.8011, "step": 17680 }, { "epoch": 1.166735370257391, "grad_norm": 0.5777158737182617, "learning_rate": 2.2098471150020102e-05, "loss": 0.8387, "step": 17690 }, { "epoch": 1.167394924728346, "grad_norm": 0.6417843699455261, "learning_rate": 2.2069888911957447e-05, "loss": 0.814, "step": 17700 }, { "epoch": 1.1680544791993008, "grad_norm": 0.5849559903144836, "learning_rate": 2.2041310556693573e-05, "loss": 0.801, "step": 17710 }, { "epoch": 1.1687140336702557, "grad_norm": 0.5827060341835022, "learning_rate": 2.2012736122098714e-05, "loss": 0.7806, "step": 17720 }, { "epoch": 1.1693735881412106, "grad_norm": 0.5556344389915466, "learning_rate": 2.1984165646037917e-05, "loss": 0.8079, "step": 17730 }, { "epoch": 1.1700331426121655, "grad_norm": 0.5267961621284485, "learning_rate": 2.1955599166370984e-05, "loss": 0.8203, "step": 17740 }, { "epoch": 1.1706926970831204, "grad_norm": 0.6056419610977173, "learning_rate": 2.19270367209524e-05, "loss": 0.8116, "step": 17750 }, { "epoch": 1.1713522515540753, "grad_norm": 0.575733482837677, "learning_rate": 2.189847834763132e-05, "loss": 0.792, "step": 17760 }, { "epoch": 1.1720118060250302, "grad_norm": 0.5400353670120239, "learning_rate": 2.1869924084251516e-05, "loss": 0.7937, "step": 17770 }, { "epoch": 1.1726713604959849, "grad_norm": 0.5470884442329407, "learning_rate": 2.184137396865129e-05, "loss": 0.768, "step": 17780 }, { "epoch": 1.1733309149669398, "grad_norm": 0.5165489912033081, "learning_rate": 2.181282803866348e-05, "loss": 0.7874, "step": 17790 }, { "epoch": 1.1739904694378946, "grad_norm": 0.5373079776763916, "learning_rate": 2.1784286332115323e-05, "loss": 0.7938, "step": 17800 }, { "epoch": 1.1746500239088495, "grad_norm": 0.5437031984329224, "learning_rate": 2.1755748886828507e-05, "loss": 0.823, "step": 17810 }, { "epoch": 1.1753095783798044, "grad_norm": 0.5490434169769287, "learning_rate": 2.172721574061906e-05, "loss": 0.7868, "step": 17820 }, { "epoch": 1.1759691328507593, "grad_norm": 0.5717520117759705, "learning_rate": 2.1698686931297317e-05, "loss": 0.8605, "step": 17830 }, { "epoch": 1.1766286873217142, "grad_norm": 0.5349151492118835, "learning_rate": 2.167016249666785e-05, "loss": 0.8, "step": 17840 }, { "epoch": 1.177288241792669, "grad_norm": 0.513151228427887, "learning_rate": 2.164164247452944e-05, "loss": 0.783, "step": 17850 }, { "epoch": 1.177947796263624, "grad_norm": 0.5884870290756226, "learning_rate": 2.161312690267504e-05, "loss": 0.778, "step": 17860 }, { "epoch": 1.1786073507345787, "grad_norm": 0.48820847272872925, "learning_rate": 2.158461581889169e-05, "loss": 0.8613, "step": 17870 }, { "epoch": 1.1792669052055336, "grad_norm": 0.5307297706604004, "learning_rate": 2.155610926096047e-05, "loss": 0.7844, "step": 17880 }, { "epoch": 1.1799264596764885, "grad_norm": 0.5557736158370972, "learning_rate": 2.152760726665648e-05, "loss": 0.7863, "step": 17890 }, { "epoch": 1.1805860141474434, "grad_norm": 0.6041250824928284, "learning_rate": 2.1499109873748777e-05, "loss": 0.8033, "step": 17900 }, { "epoch": 1.1812455686183982, "grad_norm": 0.6773850917816162, "learning_rate": 2.1470617120000307e-05, "loss": 0.8152, "step": 17910 }, { "epoch": 1.1819051230893531, "grad_norm": 0.5740440487861633, "learning_rate": 2.1442129043167874e-05, "loss": 0.793, "step": 17920 }, { "epoch": 1.182564677560308, "grad_norm": 0.5717465877532959, "learning_rate": 2.1413645681002087e-05, "loss": 0.83, "step": 17930 }, { "epoch": 1.183224232031263, "grad_norm": 0.6836726069450378, "learning_rate": 2.13851670712473e-05, "loss": 0.775, "step": 17940 }, { "epoch": 1.1838837865022178, "grad_norm": 0.6140259504318237, "learning_rate": 2.1356693251641575e-05, "loss": 0.8166, "step": 17950 }, { "epoch": 1.1845433409731727, "grad_norm": 0.5447438359260559, "learning_rate": 2.1328224259916636e-05, "loss": 0.7571, "step": 17960 }, { "epoch": 1.1852028954441276, "grad_norm": 0.6259800791740417, "learning_rate": 2.129976013379778e-05, "loss": 0.7944, "step": 17970 }, { "epoch": 1.1858624499150823, "grad_norm": 0.5480833649635315, "learning_rate": 2.1271300911003888e-05, "loss": 0.8111, "step": 17980 }, { "epoch": 1.1865220043860372, "grad_norm": 0.5970847606658936, "learning_rate": 2.1242846629247327e-05, "loss": 0.825, "step": 17990 }, { "epoch": 1.187181558856992, "grad_norm": 0.5720780491828918, "learning_rate": 2.1214397326233916e-05, "loss": 0.8312, "step": 18000 }, { "epoch": 1.187841113327947, "grad_norm": 0.6329047083854675, "learning_rate": 2.11859530396629e-05, "loss": 0.8134, "step": 18010 }, { "epoch": 1.1885006677989018, "grad_norm": 0.5721922516822815, "learning_rate": 2.115751380722683e-05, "loss": 0.7974, "step": 18020 }, { "epoch": 1.1891602222698567, "grad_norm": 0.5312685966491699, "learning_rate": 2.1129079666611602e-05, "loss": 0.7841, "step": 18030 }, { "epoch": 1.1898197767408116, "grad_norm": 0.6476850509643555, "learning_rate": 2.1100650655496347e-05, "loss": 0.8379, "step": 18040 }, { "epoch": 1.1904793312117665, "grad_norm": 0.5220320820808411, "learning_rate": 2.107222681155341e-05, "loss": 0.7927, "step": 18050 }, { "epoch": 1.1911388856827214, "grad_norm": 0.6444506645202637, "learning_rate": 2.104380817244826e-05, "loss": 0.7943, "step": 18060 }, { "epoch": 1.191798440153676, "grad_norm": 0.5259702205657959, "learning_rate": 2.1015394775839502e-05, "loss": 0.7614, "step": 18070 }, { "epoch": 1.192457994624631, "grad_norm": 0.5620884895324707, "learning_rate": 2.0986986659378777e-05, "loss": 0.8022, "step": 18080 }, { "epoch": 1.1931175490955859, "grad_norm": 0.5884404182434082, "learning_rate": 2.0958583860710733e-05, "loss": 0.8566, "step": 18090 }, { "epoch": 1.1937771035665408, "grad_norm": 0.5265540480613708, "learning_rate": 2.093018641747299e-05, "loss": 0.7796, "step": 18100 }, { "epoch": 1.1944366580374957, "grad_norm": 0.5781798958778381, "learning_rate": 2.0901794367296014e-05, "loss": 0.8111, "step": 18110 }, { "epoch": 1.1950962125084506, "grad_norm": 0.6055285930633545, "learning_rate": 2.0873407747803188e-05, "loss": 0.8399, "step": 18120 }, { "epoch": 1.1957557669794054, "grad_norm": 0.5819869637489319, "learning_rate": 2.084502659661067e-05, "loss": 0.8185, "step": 18130 }, { "epoch": 1.1964153214503603, "grad_norm": 0.5730735063552856, "learning_rate": 2.0816650951327375e-05, "loss": 0.827, "step": 18140 }, { "epoch": 1.1970748759213152, "grad_norm": 0.5140936374664307, "learning_rate": 2.078828084955492e-05, "loss": 0.83, "step": 18150 }, { "epoch": 1.19773443039227, "grad_norm": 0.6362972855567932, "learning_rate": 2.0759916328887574e-05, "loss": 0.8249, "step": 18160 }, { "epoch": 1.198393984863225, "grad_norm": 0.5397179126739502, "learning_rate": 2.0731557426912224e-05, "loss": 0.7908, "step": 18170 }, { "epoch": 1.1990535393341797, "grad_norm": 0.5912061929702759, "learning_rate": 2.0703204181208292e-05, "loss": 0.8028, "step": 18180 }, { "epoch": 1.1997130938051346, "grad_norm": 0.6152624487876892, "learning_rate": 2.0674856629347733e-05, "loss": 0.8105, "step": 18190 }, { "epoch": 1.2003726482760895, "grad_norm": 0.47451311349868774, "learning_rate": 2.0646514808894914e-05, "loss": 0.808, "step": 18200 }, { "epoch": 1.2010322027470444, "grad_norm": 0.6080948710441589, "learning_rate": 2.0618178757406645e-05, "loss": 0.7924, "step": 18210 }, { "epoch": 1.2016917572179993, "grad_norm": 0.5713484287261963, "learning_rate": 2.0589848512432065e-05, "loss": 0.8009, "step": 18220 }, { "epoch": 1.2023513116889541, "grad_norm": 0.589370608329773, "learning_rate": 2.0561524111512662e-05, "loss": 0.8316, "step": 18230 }, { "epoch": 1.203010866159909, "grad_norm": 0.5649095177650452, "learning_rate": 2.0533205592182118e-05, "loss": 0.8261, "step": 18240 }, { "epoch": 1.203670420630864, "grad_norm": 0.6353856325149536, "learning_rate": 2.0504892991966362e-05, "loss": 0.8201, "step": 18250 }, { "epoch": 1.2043299751018188, "grad_norm": 0.5868256092071533, "learning_rate": 2.047658634838347e-05, "loss": 0.8213, "step": 18260 }, { "epoch": 1.2049895295727735, "grad_norm": 0.6138498783111572, "learning_rate": 2.0448285698943627e-05, "loss": 0.807, "step": 18270 }, { "epoch": 1.2056490840437284, "grad_norm": 0.5619494318962097, "learning_rate": 2.0419991081149076e-05, "loss": 0.8276, "step": 18280 }, { "epoch": 1.2063086385146833, "grad_norm": 0.6716960668563843, "learning_rate": 2.0391702532494054e-05, "loss": 0.8525, "step": 18290 }, { "epoch": 1.2069681929856382, "grad_norm": 0.5778971910476685, "learning_rate": 2.0363420090464766e-05, "loss": 0.7981, "step": 18300 }, { "epoch": 1.207627747456593, "grad_norm": 0.5850443243980408, "learning_rate": 2.033514379253933e-05, "loss": 0.7916, "step": 18310 }, { "epoch": 1.208287301927548, "grad_norm": 0.5164930820465088, "learning_rate": 2.030687367618772e-05, "loss": 0.8356, "step": 18320 }, { "epoch": 1.2089468563985029, "grad_norm": 0.5958451628684998, "learning_rate": 2.0278609778871702e-05, "loss": 0.7961, "step": 18330 }, { "epoch": 1.2096064108694577, "grad_norm": 0.598099946975708, "learning_rate": 2.025035213804482e-05, "loss": 0.8413, "step": 18340 }, { "epoch": 1.2102659653404126, "grad_norm": 0.6404171586036682, "learning_rate": 2.0222100791152318e-05, "loss": 0.7793, "step": 18350 }, { "epoch": 1.2109255198113673, "grad_norm": 0.5784912109375, "learning_rate": 2.019385577563111e-05, "loss": 0.7858, "step": 18360 }, { "epoch": 1.2115850742823222, "grad_norm": 0.525601327419281, "learning_rate": 2.0165617128909706e-05, "loss": 0.779, "step": 18370 }, { "epoch": 1.212244628753277, "grad_norm": 0.5646904706954956, "learning_rate": 2.013738488840818e-05, "loss": 0.8152, "step": 18380 }, { "epoch": 1.212904183224232, "grad_norm": 0.6017470955848694, "learning_rate": 2.010915909153813e-05, "loss": 0.7865, "step": 18390 }, { "epoch": 1.2135637376951869, "grad_norm": 0.5971163511276245, "learning_rate": 2.00809397757026e-05, "loss": 0.8572, "step": 18400 }, { "epoch": 1.2142232921661418, "grad_norm": 0.5196258425712585, "learning_rate": 2.005272697829606e-05, "loss": 0.8182, "step": 18410 }, { "epoch": 1.2148828466370967, "grad_norm": 0.59040766954422, "learning_rate": 2.0024520736704317e-05, "loss": 0.8146, "step": 18420 }, { "epoch": 1.2155424011080516, "grad_norm": 0.6149417757987976, "learning_rate": 1.999632108830451e-05, "loss": 0.8172, "step": 18430 }, { "epoch": 1.2162019555790065, "grad_norm": 0.5045343041419983, "learning_rate": 1.9968128070465046e-05, "loss": 0.8656, "step": 18440 }, { "epoch": 1.2168615100499613, "grad_norm": 0.6588855385780334, "learning_rate": 1.9939941720545545e-05, "loss": 0.7979, "step": 18450 }, { "epoch": 1.2175210645209162, "grad_norm": 0.559449315071106, "learning_rate": 1.9911762075896765e-05, "loss": 0.7894, "step": 18460 }, { "epoch": 1.218180618991871, "grad_norm": 0.5257613062858582, "learning_rate": 1.9883589173860604e-05, "loss": 0.8484, "step": 18470 }, { "epoch": 1.2188401734628258, "grad_norm": 0.5910244584083557, "learning_rate": 1.9855423051770023e-05, "loss": 0.7977, "step": 18480 }, { "epoch": 1.2194997279337807, "grad_norm": 0.6656187176704407, "learning_rate": 1.982726374694899e-05, "loss": 0.853, "step": 18490 }, { "epoch": 1.2201592824047356, "grad_norm": 0.5211578607559204, "learning_rate": 1.9799111296712448e-05, "loss": 0.8342, "step": 18500 }, { "epoch": 1.2208188368756905, "grad_norm": 0.6329109072685242, "learning_rate": 1.9770965738366242e-05, "loss": 0.7913, "step": 18510 }, { "epoch": 1.2214783913466454, "grad_norm": 0.549485981464386, "learning_rate": 1.97428271092071e-05, "loss": 0.81, "step": 18520 }, { "epoch": 1.2221379458176003, "grad_norm": 0.5727259516716003, "learning_rate": 1.971469544652257e-05, "loss": 0.8258, "step": 18530 }, { "epoch": 1.2227975002885552, "grad_norm": 0.5799168944358826, "learning_rate": 1.9686570787590957e-05, "loss": 0.8108, "step": 18540 }, { "epoch": 1.22345705475951, "grad_norm": 0.6752129197120667, "learning_rate": 1.965845316968128e-05, "loss": 0.8412, "step": 18550 }, { "epoch": 1.2241166092304647, "grad_norm": 0.6387220621109009, "learning_rate": 1.963034263005324e-05, "loss": 0.7992, "step": 18560 }, { "epoch": 1.2247761637014196, "grad_norm": 0.6659034490585327, "learning_rate": 1.9602239205957156e-05, "loss": 0.8107, "step": 18570 }, { "epoch": 1.2254357181723745, "grad_norm": 0.5545511245727539, "learning_rate": 1.957414293463392e-05, "loss": 0.7758, "step": 18580 }, { "epoch": 1.2260952726433294, "grad_norm": 0.6981990933418274, "learning_rate": 1.9546053853314937e-05, "loss": 0.8423, "step": 18590 }, { "epoch": 1.2267548271142843, "grad_norm": 0.561258852481842, "learning_rate": 1.9517971999222097e-05, "loss": 0.8234, "step": 18600 }, { "epoch": 1.2274143815852392, "grad_norm": 0.5442118644714355, "learning_rate": 1.9489897409567694e-05, "loss": 0.8102, "step": 18610 }, { "epoch": 1.228073936056194, "grad_norm": 0.5950818657875061, "learning_rate": 1.9461830121554414e-05, "loss": 0.838, "step": 18620 }, { "epoch": 1.228733490527149, "grad_norm": 0.5580155253410339, "learning_rate": 1.9433770172375272e-05, "loss": 0.8407, "step": 18630 }, { "epoch": 1.2293930449981039, "grad_norm": 0.5553641319274902, "learning_rate": 1.9405717599213526e-05, "loss": 0.8295, "step": 18640 }, { "epoch": 1.2300525994690585, "grad_norm": 0.5126636624336243, "learning_rate": 1.9377672439242676e-05, "loss": 0.8066, "step": 18650 }, { "epoch": 1.2307121539400134, "grad_norm": 0.5395209193229675, "learning_rate": 1.9349634729626427e-05, "loss": 0.8221, "step": 18660 }, { "epoch": 1.2313717084109683, "grad_norm": 0.5603570342063904, "learning_rate": 1.932160450751857e-05, "loss": 0.8099, "step": 18670 }, { "epoch": 1.2320312628819232, "grad_norm": 0.5477350950241089, "learning_rate": 1.929358181006301e-05, "loss": 0.8187, "step": 18680 }, { "epoch": 1.232690817352878, "grad_norm": 0.5807533264160156, "learning_rate": 1.926556667439364e-05, "loss": 0.8133, "step": 18690 }, { "epoch": 1.233350371823833, "grad_norm": 0.5522672533988953, "learning_rate": 1.9237559137634358e-05, "loss": 0.8245, "step": 18700 }, { "epoch": 1.234009926294788, "grad_norm": 0.5667465329170227, "learning_rate": 1.9209559236898996e-05, "loss": 0.7869, "step": 18710 }, { "epoch": 1.2346694807657428, "grad_norm": 0.5482111573219299, "learning_rate": 1.918156700929126e-05, "loss": 0.7505, "step": 18720 }, { "epoch": 1.2353290352366977, "grad_norm": 0.5499863028526306, "learning_rate": 1.915358249190468e-05, "loss": 0.8484, "step": 18730 }, { "epoch": 1.2359885897076526, "grad_norm": 0.6239943504333496, "learning_rate": 1.9125605721822584e-05, "loss": 0.837, "step": 18740 }, { "epoch": 1.2366481441786075, "grad_norm": 0.6499251127243042, "learning_rate": 1.9097636736118025e-05, "loss": 0.8174, "step": 18750 }, { "epoch": 1.2373076986495621, "grad_norm": 0.6384209990501404, "learning_rate": 1.906967557185374e-05, "loss": 0.7907, "step": 18760 }, { "epoch": 1.237967253120517, "grad_norm": 0.5927095413208008, "learning_rate": 1.904172226608212e-05, "loss": 0.7915, "step": 18770 }, { "epoch": 1.238626807591472, "grad_norm": 0.6062419414520264, "learning_rate": 1.90137768558451e-05, "loss": 0.7449, "step": 18780 }, { "epoch": 1.2392863620624268, "grad_norm": 0.6219165325164795, "learning_rate": 1.898583937817419e-05, "loss": 0.7835, "step": 18790 }, { "epoch": 1.2399459165333817, "grad_norm": 0.6713700294494629, "learning_rate": 1.895790987009038e-05, "loss": 0.8172, "step": 18800 }, { "epoch": 1.2406054710043366, "grad_norm": 0.5766702890396118, "learning_rate": 1.89299883686041e-05, "loss": 0.7811, "step": 18810 }, { "epoch": 1.2412650254752915, "grad_norm": 0.6060327887535095, "learning_rate": 1.8902074910715156e-05, "loss": 0.8735, "step": 18820 }, { "epoch": 1.2419245799462464, "grad_norm": 0.5441230535507202, "learning_rate": 1.8874169533412705e-05, "loss": 0.8124, "step": 18830 }, { "epoch": 1.2425841344172013, "grad_norm": 0.6223664879798889, "learning_rate": 1.8846272273675206e-05, "loss": 0.814, "step": 18840 }, { "epoch": 1.243243688888156, "grad_norm": 0.5244922041893005, "learning_rate": 1.8818383168470353e-05, "loss": 0.8526, "step": 18850 }, { "epoch": 1.2439032433591108, "grad_norm": 0.6009203791618347, "learning_rate": 1.8790502254755023e-05, "loss": 0.79, "step": 18860 }, { "epoch": 1.2445627978300657, "grad_norm": 0.6063975691795349, "learning_rate": 1.876262956947524e-05, "loss": 0.8173, "step": 18870 }, { "epoch": 1.2452223523010206, "grad_norm": 0.6352492570877075, "learning_rate": 1.8734765149566154e-05, "loss": 0.8327, "step": 18880 }, { "epoch": 1.2458819067719755, "grad_norm": 0.5097781419754028, "learning_rate": 1.8706909031951926e-05, "loss": 0.8425, "step": 18890 }, { "epoch": 1.2465414612429304, "grad_norm": 0.5806178450584412, "learning_rate": 1.8679061253545742e-05, "loss": 0.8452, "step": 18900 }, { "epoch": 1.2472010157138853, "grad_norm": 0.5763145685195923, "learning_rate": 1.865122185124971e-05, "loss": 0.7994, "step": 18910 }, { "epoch": 1.2478605701848402, "grad_norm": 0.558208703994751, "learning_rate": 1.862339086195486e-05, "loss": 0.8302, "step": 18920 }, { "epoch": 1.248520124655795, "grad_norm": 0.5959612727165222, "learning_rate": 1.8595568322541067e-05, "loss": 0.7906, "step": 18930 }, { "epoch": 1.2491796791267498, "grad_norm": 0.5050669312477112, "learning_rate": 1.856775426987702e-05, "loss": 0.8195, "step": 18940 }, { "epoch": 1.2498392335977047, "grad_norm": 0.6480620503425598, "learning_rate": 1.8539948740820128e-05, "loss": 0.7802, "step": 18950 }, { "epoch": 1.2504987880686596, "grad_norm": 0.5807694792747498, "learning_rate": 1.8512151772216544e-05, "loss": 0.805, "step": 18960 }, { "epoch": 1.2511583425396144, "grad_norm": 0.5697056651115417, "learning_rate": 1.8484363400901057e-05, "loss": 0.7891, "step": 18970 }, { "epoch": 1.2518178970105693, "grad_norm": 0.6128772497177124, "learning_rate": 1.8456583663697066e-05, "loss": 0.8099, "step": 18980 }, { "epoch": 1.2524774514815242, "grad_norm": 0.5341264009475708, "learning_rate": 1.8428812597416544e-05, "loss": 0.8081, "step": 18990 }, { "epoch": 1.2531370059524791, "grad_norm": 0.5600847601890564, "learning_rate": 1.840105023885994e-05, "loss": 0.8213, "step": 19000 }, { "epoch": 1.253796560423434, "grad_norm": 0.4721827507019043, "learning_rate": 1.837329662481619e-05, "loss": 0.7826, "step": 19010 }, { "epoch": 1.254456114894389, "grad_norm": 0.5889362692832947, "learning_rate": 1.834555179206264e-05, "loss": 0.7959, "step": 19020 }, { "epoch": 1.2551156693653436, "grad_norm": 0.5458769202232361, "learning_rate": 1.8317815777364993e-05, "loss": 0.8449, "step": 19030 }, { "epoch": 1.2557752238362987, "grad_norm": 0.6288903951644897, "learning_rate": 1.8290088617477274e-05, "loss": 0.8013, "step": 19040 }, { "epoch": 1.2564347783072534, "grad_norm": 0.5463530421257019, "learning_rate": 1.8262370349141768e-05, "loss": 0.7887, "step": 19050 }, { "epoch": 1.2570943327782083, "grad_norm": 0.5617839694023132, "learning_rate": 1.8234661009088972e-05, "loss": 0.8045, "step": 19060 }, { "epoch": 1.2577538872491631, "grad_norm": 0.5814275741577148, "learning_rate": 1.8206960634037572e-05, "loss": 0.7853, "step": 19070 }, { "epoch": 1.258413441720118, "grad_norm": 0.6037193536758423, "learning_rate": 1.8179269260694365e-05, "loss": 0.8129, "step": 19080 }, { "epoch": 1.259072996191073, "grad_norm": 0.5609439611434937, "learning_rate": 1.815158692575419e-05, "loss": 0.7916, "step": 19090 }, { "epoch": 1.2597325506620278, "grad_norm": 0.6056429147720337, "learning_rate": 1.8123913665899956e-05, "loss": 0.7878, "step": 19100 }, { "epoch": 1.2603921051329827, "grad_norm": 0.5459847450256348, "learning_rate": 1.8096249517802522e-05, "loss": 0.8033, "step": 19110 }, { "epoch": 1.2610516596039376, "grad_norm": 0.5837607979774475, "learning_rate": 1.8068594518120684e-05, "loss": 0.7992, "step": 19120 }, { "epoch": 1.2617112140748925, "grad_norm": 0.541070818901062, "learning_rate": 1.8040948703501087e-05, "loss": 0.8201, "step": 19130 }, { "epoch": 1.2623707685458472, "grad_norm": 0.5953345894813538, "learning_rate": 1.8013312110578236e-05, "loss": 0.8148, "step": 19140 }, { "epoch": 1.263030323016802, "grad_norm": 0.593059241771698, "learning_rate": 1.7985684775974397e-05, "loss": 0.785, "step": 19150 }, { "epoch": 1.263689877487757, "grad_norm": 0.5525000691413879, "learning_rate": 1.795806673629959e-05, "loss": 0.8148, "step": 19160 }, { "epoch": 1.2643494319587119, "grad_norm": 0.6783982515335083, "learning_rate": 1.793045802815149e-05, "loss": 0.8104, "step": 19170 }, { "epoch": 1.2650089864296667, "grad_norm": 0.5801666975021362, "learning_rate": 1.790285868811542e-05, "loss": 0.8218, "step": 19180 }, { "epoch": 1.2656685409006216, "grad_norm": 0.5675482749938965, "learning_rate": 1.7875268752764296e-05, "loss": 0.8307, "step": 19190 }, { "epoch": 1.2663280953715765, "grad_norm": 0.5449127554893494, "learning_rate": 1.7847688258658552e-05, "loss": 0.8229, "step": 19200 }, { "epoch": 1.2669876498425314, "grad_norm": 0.6582222580909729, "learning_rate": 1.7820117242346144e-05, "loss": 0.8356, "step": 19210 }, { "epoch": 1.2676472043134863, "grad_norm": 0.5748285055160522, "learning_rate": 1.7792555740362422e-05, "loss": 0.8247, "step": 19220 }, { "epoch": 1.268306758784441, "grad_norm": 0.5172967910766602, "learning_rate": 1.776500378923016e-05, "loss": 0.7851, "step": 19230 }, { "epoch": 1.268966313255396, "grad_norm": 0.5694426894187927, "learning_rate": 1.773746142545948e-05, "loss": 0.8206, "step": 19240 }, { "epoch": 1.2696258677263508, "grad_norm": 0.5869324207305908, "learning_rate": 1.7709928685547777e-05, "loss": 0.8184, "step": 19250 }, { "epoch": 1.2702854221973057, "grad_norm": 0.62547367811203, "learning_rate": 1.768240560597972e-05, "loss": 0.8131, "step": 19260 }, { "epoch": 1.2709449766682606, "grad_norm": 0.5633248090744019, "learning_rate": 1.765489222322714e-05, "loss": 0.8112, "step": 19270 }, { "epoch": 1.2716045311392155, "grad_norm": 0.5703989267349243, "learning_rate": 1.762738857374906e-05, "loss": 0.8278, "step": 19280 }, { "epoch": 1.2722640856101703, "grad_norm": 0.5290929079055786, "learning_rate": 1.759989469399157e-05, "loss": 0.8018, "step": 19290 }, { "epoch": 1.2729236400811252, "grad_norm": 0.6253542900085449, "learning_rate": 1.757241062038785e-05, "loss": 0.7834, "step": 19300 }, { "epoch": 1.2735831945520801, "grad_norm": 0.5754891037940979, "learning_rate": 1.7544936389358034e-05, "loss": 0.8103, "step": 19310 }, { "epoch": 1.2742427490230348, "grad_norm": 0.6191249489784241, "learning_rate": 1.7517472037309267e-05, "loss": 0.7984, "step": 19320 }, { "epoch": 1.27490230349399, "grad_norm": 0.6206302046775818, "learning_rate": 1.7490017600635573e-05, "loss": 0.7922, "step": 19330 }, { "epoch": 1.2755618579649446, "grad_norm": 0.47136321663856506, "learning_rate": 1.746257311571785e-05, "loss": 0.8027, "step": 19340 }, { "epoch": 1.2762214124358995, "grad_norm": 0.6640087962150574, "learning_rate": 1.743513861892379e-05, "loss": 0.8399, "step": 19350 }, { "epoch": 1.2768809669068544, "grad_norm": 0.4717612564563751, "learning_rate": 1.7407714146607862e-05, "loss": 0.8017, "step": 19360 }, { "epoch": 1.2775405213778093, "grad_norm": 0.5602179765701294, "learning_rate": 1.7380299735111256e-05, "loss": 0.7809, "step": 19370 }, { "epoch": 1.2782000758487642, "grad_norm": 0.5533621907234192, "learning_rate": 1.735289542076182e-05, "loss": 0.7739, "step": 19380 }, { "epoch": 1.278859630319719, "grad_norm": 0.5811960697174072, "learning_rate": 1.7325501239874037e-05, "loss": 0.8069, "step": 19390 }, { "epoch": 1.279519184790674, "grad_norm": 0.5109241604804993, "learning_rate": 1.7298117228748938e-05, "loss": 0.7767, "step": 19400 }, { "epoch": 1.2801787392616288, "grad_norm": 0.5800501704216003, "learning_rate": 1.72707434236741e-05, "loss": 0.8309, "step": 19410 }, { "epoch": 1.2808382937325837, "grad_norm": 0.5540037751197815, "learning_rate": 1.724337986092356e-05, "loss": 0.8158, "step": 19420 }, { "epoch": 1.2814978482035384, "grad_norm": 0.6083894968032837, "learning_rate": 1.7216026576757805e-05, "loss": 0.8392, "step": 19430 }, { "epoch": 1.2821574026744935, "grad_norm": 0.5300758481025696, "learning_rate": 1.718868360742366e-05, "loss": 0.8052, "step": 19440 }, { "epoch": 1.2828169571454482, "grad_norm": 0.5607723593711853, "learning_rate": 1.716135098915432e-05, "loss": 0.825, "step": 19450 }, { "epoch": 1.283476511616403, "grad_norm": 0.5369413495063782, "learning_rate": 1.7134028758169253e-05, "loss": 0.8093, "step": 19460 }, { "epoch": 1.284136066087358, "grad_norm": 0.53282630443573, "learning_rate": 1.710671695067415e-05, "loss": 0.7952, "step": 19470 }, { "epoch": 1.2847956205583129, "grad_norm": 0.541825532913208, "learning_rate": 1.7079415602860917e-05, "loss": 0.7975, "step": 19480 }, { "epoch": 1.2854551750292678, "grad_norm": 0.6871068477630615, "learning_rate": 1.705212475090756e-05, "loss": 0.8133, "step": 19490 }, { "epoch": 1.2861147295002227, "grad_norm": 0.5954208374023438, "learning_rate": 1.702484443097821e-05, "loss": 0.7942, "step": 19500 }, { "epoch": 1.2867742839711775, "grad_norm": 0.6732329726219177, "learning_rate": 1.699757467922303e-05, "loss": 0.7494, "step": 19510 }, { "epoch": 1.2874338384421322, "grad_norm": 0.5208237171173096, "learning_rate": 1.6970315531778187e-05, "loss": 0.8084, "step": 19520 }, { "epoch": 1.2880933929130873, "grad_norm": 0.5553262829780579, "learning_rate": 1.6943067024765756e-05, "loss": 0.8072, "step": 19530 }, { "epoch": 1.288752947384042, "grad_norm": 0.6095374226570129, "learning_rate": 1.6915829194293777e-05, "loss": 0.8065, "step": 19540 }, { "epoch": 1.289412501854997, "grad_norm": 0.566206157207489, "learning_rate": 1.6888602076456096e-05, "loss": 0.8571, "step": 19550 }, { "epoch": 1.2900720563259518, "grad_norm": 0.6199831366539001, "learning_rate": 1.6861385707332372e-05, "loss": 0.795, "step": 19560 }, { "epoch": 1.2907316107969067, "grad_norm": 0.5906867980957031, "learning_rate": 1.6834180122988043e-05, "loss": 0.8285, "step": 19570 }, { "epoch": 1.2913911652678616, "grad_norm": 0.5739659070968628, "learning_rate": 1.6806985359474206e-05, "loss": 0.8048, "step": 19580 }, { "epoch": 1.2920507197388165, "grad_norm": 0.5545616745948792, "learning_rate": 1.677980145282767e-05, "loss": 0.8139, "step": 19590 }, { "epoch": 1.2927102742097714, "grad_norm": 0.5878023505210876, "learning_rate": 1.675262843907083e-05, "loss": 0.8206, "step": 19600 }, { "epoch": 1.2933698286807263, "grad_norm": 0.6396400332450867, "learning_rate": 1.6725466354211663e-05, "loss": 0.8593, "step": 19610 }, { "epoch": 1.2940293831516811, "grad_norm": 0.5396580100059509, "learning_rate": 1.6698315234243645e-05, "loss": 0.7799, "step": 19620 }, { "epoch": 1.2946889376226358, "grad_norm": 0.5915452241897583, "learning_rate": 1.667117511514573e-05, "loss": 0.7915, "step": 19630 }, { "epoch": 1.2953484920935907, "grad_norm": 0.5672121644020081, "learning_rate": 1.6644046032882298e-05, "loss": 0.7882, "step": 19640 }, { "epoch": 1.2960080465645456, "grad_norm": 0.7311338186264038, "learning_rate": 1.6616928023403107e-05, "loss": 0.7996, "step": 19650 }, { "epoch": 1.2966676010355005, "grad_norm": 0.5876666307449341, "learning_rate": 1.6589821122643238e-05, "loss": 0.8324, "step": 19660 }, { "epoch": 1.2973271555064554, "grad_norm": 0.6038510799407959, "learning_rate": 1.6562725366523037e-05, "loss": 0.8012, "step": 19670 }, { "epoch": 1.2979867099774103, "grad_norm": 0.5467290282249451, "learning_rate": 1.653564079094809e-05, "loss": 0.7902, "step": 19680 }, { "epoch": 1.2986462644483652, "grad_norm": 0.7234504818916321, "learning_rate": 1.6508567431809197e-05, "loss": 0.8189, "step": 19690 }, { "epoch": 1.29930581891932, "grad_norm": 0.5546754598617554, "learning_rate": 1.648150532498225e-05, "loss": 0.7958, "step": 19700 }, { "epoch": 1.299965373390275, "grad_norm": 0.5073449611663818, "learning_rate": 1.6454454506328253e-05, "loss": 0.8478, "step": 19710 }, { "epoch": 1.3006249278612296, "grad_norm": 0.6308967471122742, "learning_rate": 1.6427415011693248e-05, "loss": 0.7708, "step": 19720 }, { "epoch": 1.3012844823321847, "grad_norm": 0.5615613460540771, "learning_rate": 1.6400386876908275e-05, "loss": 0.7973, "step": 19730 }, { "epoch": 1.3019440368031394, "grad_norm": 0.584670901298523, "learning_rate": 1.6373370137789312e-05, "loss": 0.8181, "step": 19740 }, { "epoch": 1.3026035912740943, "grad_norm": 0.5000048279762268, "learning_rate": 1.6346364830137254e-05, "loss": 0.8122, "step": 19750 }, { "epoch": 1.3032631457450492, "grad_norm": 0.6242878437042236, "learning_rate": 1.6319370989737815e-05, "loss": 0.7971, "step": 19760 }, { "epoch": 1.303922700216004, "grad_norm": 0.5634641647338867, "learning_rate": 1.6292388652361544e-05, "loss": 0.8355, "step": 19770 }, { "epoch": 1.304582254686959, "grad_norm": 0.6160987615585327, "learning_rate": 1.6265417853763737e-05, "loss": 0.773, "step": 19780 }, { "epoch": 1.3052418091579139, "grad_norm": 0.5563802123069763, "learning_rate": 1.6238458629684407e-05, "loss": 0.842, "step": 19790 }, { "epoch": 1.3059013636288688, "grad_norm": 0.5255377292633057, "learning_rate": 1.6211511015848195e-05, "loss": 0.8243, "step": 19800 }, { "epoch": 1.3065609180998234, "grad_norm": 0.4894976317882538, "learning_rate": 1.6184575047964405e-05, "loss": 0.764, "step": 19810 }, { "epoch": 1.3072204725707786, "grad_norm": 0.6574572920799255, "learning_rate": 1.615765076172688e-05, "loss": 0.776, "step": 19820 }, { "epoch": 1.3078800270417332, "grad_norm": 0.5359966158866882, "learning_rate": 1.613073819281398e-05, "loss": 0.778, "step": 19830 }, { "epoch": 1.3085395815126881, "grad_norm": 0.6031800508499146, "learning_rate": 1.610383737688857e-05, "loss": 0.7966, "step": 19840 }, { "epoch": 1.309199135983643, "grad_norm": 0.5762466788291931, "learning_rate": 1.6076948349597894e-05, "loss": 0.8185, "step": 19850 }, { "epoch": 1.309858690454598, "grad_norm": 0.5590838193893433, "learning_rate": 1.605007114657361e-05, "loss": 0.8083, "step": 19860 }, { "epoch": 1.3105182449255528, "grad_norm": 0.6155121922492981, "learning_rate": 1.6023205803431696e-05, "loss": 0.8529, "step": 19870 }, { "epoch": 1.3111777993965077, "grad_norm": 0.6039925813674927, "learning_rate": 1.5996352355772422e-05, "loss": 0.7866, "step": 19880 }, { "epoch": 1.3118373538674626, "grad_norm": 0.6229190230369568, "learning_rate": 1.596951083918026e-05, "loss": 0.8194, "step": 19890 }, { "epoch": 1.3124969083384175, "grad_norm": 0.6370930671691895, "learning_rate": 1.5942681289223923e-05, "loss": 0.8628, "step": 19900 }, { "epoch": 1.3131564628093724, "grad_norm": 0.6257620453834534, "learning_rate": 1.5915863741456233e-05, "loss": 0.7803, "step": 19910 }, { "epoch": 1.313816017280327, "grad_norm": 0.5882974863052368, "learning_rate": 1.5889058231414116e-05, "loss": 0.7776, "step": 19920 }, { "epoch": 1.314475571751282, "grad_norm": 0.5556590557098389, "learning_rate": 1.586226479461855e-05, "loss": 0.7769, "step": 19930 }, { "epoch": 1.3151351262222368, "grad_norm": 0.5639545321464539, "learning_rate": 1.5835483466574504e-05, "loss": 0.7975, "step": 19940 }, { "epoch": 1.3157946806931917, "grad_norm": 0.5980276465415955, "learning_rate": 1.580871428277091e-05, "loss": 0.8253, "step": 19950 }, { "epoch": 1.3164542351641466, "grad_norm": 0.6568490862846375, "learning_rate": 1.578195727868061e-05, "loss": 0.8241, "step": 19960 }, { "epoch": 1.3171137896351015, "grad_norm": 0.5813195109367371, "learning_rate": 1.5755212489760305e-05, "loss": 0.8222, "step": 19970 }, { "epoch": 1.3177733441060564, "grad_norm": 0.626581072807312, "learning_rate": 1.5728479951450496e-05, "loss": 0.7894, "step": 19980 }, { "epoch": 1.3184328985770113, "grad_norm": 0.6403607726097107, "learning_rate": 1.5701759699175466e-05, "loss": 0.8277, "step": 19990 }, { "epoch": 1.3190924530479662, "grad_norm": 0.5415562391281128, "learning_rate": 1.567505176834321e-05, "loss": 0.809, "step": 20000 }, { "epoch": 1.3197520075189209, "grad_norm": 0.5711784958839417, "learning_rate": 1.5648356194345408e-05, "loss": 0.8135, "step": 20010 }, { "epoch": 1.320411561989876, "grad_norm": 0.5610942840576172, "learning_rate": 1.562167301255733e-05, "loss": 0.778, "step": 20020 }, { "epoch": 1.3210711164608306, "grad_norm": 0.6548299789428711, "learning_rate": 1.5595002258337864e-05, "loss": 0.7675, "step": 20030 }, { "epoch": 1.3217306709317855, "grad_norm": 0.6827725768089294, "learning_rate": 1.5568343967029414e-05, "loss": 0.8149, "step": 20040 }, { "epoch": 1.3223902254027404, "grad_norm": 0.6757158637046814, "learning_rate": 1.5541698173957865e-05, "loss": 0.8296, "step": 20050 }, { "epoch": 1.3230497798736953, "grad_norm": 0.5480896830558777, "learning_rate": 1.5515064914432553e-05, "loss": 0.8166, "step": 20060 }, { "epoch": 1.3237093343446502, "grad_norm": 0.5392537117004395, "learning_rate": 1.5488444223746192e-05, "loss": 0.8068, "step": 20070 }, { "epoch": 1.324368888815605, "grad_norm": 0.6353445649147034, "learning_rate": 1.546183613717484e-05, "loss": 0.7939, "step": 20080 }, { "epoch": 1.32502844328656, "grad_norm": 0.6121338605880737, "learning_rate": 1.543524068997787e-05, "loss": 0.7803, "step": 20090 }, { "epoch": 1.3256879977575147, "grad_norm": 0.6256228685379028, "learning_rate": 1.54086579173979e-05, "loss": 0.8092, "step": 20100 }, { "epoch": 1.3263475522284698, "grad_norm": 0.6484335660934448, "learning_rate": 1.538208785466072e-05, "loss": 0.7915, "step": 20110 }, { "epoch": 1.3270071066994245, "grad_norm": 0.6085736751556396, "learning_rate": 1.5355530536975325e-05, "loss": 0.8024, "step": 20120 }, { "epoch": 1.3276666611703793, "grad_norm": 0.5266491174697876, "learning_rate": 1.5328985999533797e-05, "loss": 0.8293, "step": 20130 }, { "epoch": 1.3283262156413342, "grad_norm": 0.6135145425796509, "learning_rate": 1.5302454277511286e-05, "loss": 0.8145, "step": 20140 }, { "epoch": 1.3289857701122891, "grad_norm": 0.6410892605781555, "learning_rate": 1.527593540606596e-05, "loss": 0.8378, "step": 20150 }, { "epoch": 1.329645324583244, "grad_norm": 0.49080774188041687, "learning_rate": 1.524942942033896e-05, "loss": 0.8334, "step": 20160 }, { "epoch": 1.330304879054199, "grad_norm": 0.5814183354377747, "learning_rate": 1.5222936355454342e-05, "loss": 0.8157, "step": 20170 }, { "epoch": 1.3309644335251538, "grad_norm": 0.585276186466217, "learning_rate": 1.5196456246519058e-05, "loss": 0.7868, "step": 20180 }, { "epoch": 1.3316239879961087, "grad_norm": 0.5762225389480591, "learning_rate": 1.516998912862288e-05, "loss": 0.8178, "step": 20190 }, { "epoch": 1.3322835424670636, "grad_norm": 0.5100105404853821, "learning_rate": 1.514353503683836e-05, "loss": 0.7861, "step": 20200 }, { "epoch": 1.3329430969380183, "grad_norm": 0.6492648720741272, "learning_rate": 1.51170940062208e-05, "loss": 0.7841, "step": 20210 }, { "epoch": 1.3336026514089732, "grad_norm": 0.6658373475074768, "learning_rate": 1.509066607180819e-05, "loss": 0.8538, "step": 20220 }, { "epoch": 1.334262205879928, "grad_norm": 0.5783854126930237, "learning_rate": 1.506425126862116e-05, "loss": 0.7978, "step": 20230 }, { "epoch": 1.334921760350883, "grad_norm": 0.5754417777061462, "learning_rate": 1.5037849631662961e-05, "loss": 0.7594, "step": 20240 }, { "epoch": 1.3355813148218378, "grad_norm": 0.5717854499816895, "learning_rate": 1.5011461195919358e-05, "loss": 0.8123, "step": 20250 }, { "epoch": 1.3362408692927927, "grad_norm": 0.6231403946876526, "learning_rate": 1.4985085996358656e-05, "loss": 0.8193, "step": 20260 }, { "epoch": 1.3369004237637476, "grad_norm": 0.6809998154640198, "learning_rate": 1.4958724067931607e-05, "loss": 0.8199, "step": 20270 }, { "epoch": 1.3375599782347025, "grad_norm": 0.5629172921180725, "learning_rate": 1.493237544557139e-05, "loss": 0.8214, "step": 20280 }, { "epoch": 1.3382195327056574, "grad_norm": 0.538102924823761, "learning_rate": 1.4906040164193519e-05, "loss": 0.8026, "step": 20290 }, { "epoch": 1.338879087176612, "grad_norm": 0.5235173106193542, "learning_rate": 1.4879718258695868e-05, "loss": 0.7747, "step": 20300 }, { "epoch": 1.3395386416475672, "grad_norm": 0.5327746868133545, "learning_rate": 1.4853409763958565e-05, "loss": 0.8357, "step": 20310 }, { "epoch": 1.3401981961185219, "grad_norm": 0.6447291970252991, "learning_rate": 1.482711471484397e-05, "loss": 0.7684, "step": 20320 }, { "epoch": 1.3408577505894768, "grad_norm": 0.6309868693351746, "learning_rate": 1.480083314619664e-05, "loss": 0.7857, "step": 20330 }, { "epoch": 1.3415173050604317, "grad_norm": 0.5822016596794128, "learning_rate": 1.4774565092843234e-05, "loss": 0.7944, "step": 20340 }, { "epoch": 1.3421768595313865, "grad_norm": 0.6021371483802795, "learning_rate": 1.474831058959253e-05, "loss": 0.8116, "step": 20350 }, { "epoch": 1.3428364140023414, "grad_norm": 0.6604745984077454, "learning_rate": 1.472206967123535e-05, "loss": 0.798, "step": 20360 }, { "epoch": 1.3434959684732963, "grad_norm": 0.5935235619544983, "learning_rate": 1.4695842372544505e-05, "loss": 0.7998, "step": 20370 }, { "epoch": 1.3441555229442512, "grad_norm": 0.6029685139656067, "learning_rate": 1.4669628728274753e-05, "loss": 0.8018, "step": 20380 }, { "epoch": 1.344815077415206, "grad_norm": 0.6155779361724854, "learning_rate": 1.4643428773162771e-05, "loss": 0.7862, "step": 20390 }, { "epoch": 1.345474631886161, "grad_norm": 0.697088360786438, "learning_rate": 1.461724254192709e-05, "loss": 0.7783, "step": 20400 }, { "epoch": 1.3461341863571157, "grad_norm": 0.620465874671936, "learning_rate": 1.4591070069268065e-05, "loss": 0.7687, "step": 20410 }, { "epoch": 1.3467937408280706, "grad_norm": 0.6178200244903564, "learning_rate": 1.4564911389867785e-05, "loss": 0.8391, "step": 20420 }, { "epoch": 1.3474532952990255, "grad_norm": 0.6703351736068726, "learning_rate": 1.4538766538390114e-05, "loss": 0.8219, "step": 20430 }, { "epoch": 1.3481128497699804, "grad_norm": 0.6984212398529053, "learning_rate": 1.4512635549480535e-05, "loss": 0.803, "step": 20440 }, { "epoch": 1.3487724042409353, "grad_norm": 0.5434896945953369, "learning_rate": 1.4486518457766219e-05, "loss": 0.7914, "step": 20450 }, { "epoch": 1.3494319587118901, "grad_norm": 0.6022245287895203, "learning_rate": 1.4460415297855872e-05, "loss": 0.7914, "step": 20460 }, { "epoch": 1.350091513182845, "grad_norm": 0.624377429485321, "learning_rate": 1.443432610433976e-05, "loss": 0.8008, "step": 20470 }, { "epoch": 1.3507510676538, "grad_norm": 0.5371820330619812, "learning_rate": 1.440825091178965e-05, "loss": 0.7979, "step": 20480 }, { "epoch": 1.3514106221247548, "grad_norm": 0.5285878777503967, "learning_rate": 1.4382189754758732e-05, "loss": 0.8222, "step": 20490 }, { "epoch": 1.3520701765957095, "grad_norm": 0.6214131712913513, "learning_rate": 1.4356142667781636e-05, "loss": 0.8126, "step": 20500 }, { "epoch": 1.3527297310666644, "grad_norm": 0.6240976452827454, "learning_rate": 1.4330109685374288e-05, "loss": 0.8467, "step": 20510 }, { "epoch": 1.3533892855376193, "grad_norm": 0.5760096311569214, "learning_rate": 1.4304090842033984e-05, "loss": 0.8397, "step": 20520 }, { "epoch": 1.3540488400085742, "grad_norm": 0.5903918147087097, "learning_rate": 1.4278086172239235e-05, "loss": 0.8482, "step": 20530 }, { "epoch": 1.354708394479529, "grad_norm": 0.5928918719291687, "learning_rate": 1.425209571044982e-05, "loss": 0.8182, "step": 20540 }, { "epoch": 1.355367948950484, "grad_norm": 0.5865376591682434, "learning_rate": 1.4226119491106648e-05, "loss": 0.8207, "step": 20550 }, { "epoch": 1.3560275034214389, "grad_norm": 0.6136580109596252, "learning_rate": 1.420015754863176e-05, "loss": 0.8, "step": 20560 }, { "epoch": 1.3566870578923937, "grad_norm": 0.5566912293434143, "learning_rate": 1.4174209917428306e-05, "loss": 0.7866, "step": 20570 }, { "epoch": 1.3573466123633486, "grad_norm": 0.6884405016899109, "learning_rate": 1.4148276631880437e-05, "loss": 0.8446, "step": 20580 }, { "epoch": 1.3580061668343033, "grad_norm": 0.5476433038711548, "learning_rate": 1.412235772635334e-05, "loss": 0.7751, "step": 20590 }, { "epoch": 1.3586657213052584, "grad_norm": 0.5802808403968811, "learning_rate": 1.4096453235193076e-05, "loss": 0.799, "step": 20600 }, { "epoch": 1.359325275776213, "grad_norm": 0.5228874683380127, "learning_rate": 1.4070563192726671e-05, "loss": 0.8439, "step": 20610 }, { "epoch": 1.359984830247168, "grad_norm": 0.5136884450912476, "learning_rate": 1.4044687633261983e-05, "loss": 0.7783, "step": 20620 }, { "epoch": 1.3606443847181229, "grad_norm": 0.5910024642944336, "learning_rate": 1.4018826591087658e-05, "loss": 0.7912, "step": 20630 }, { "epoch": 1.3613039391890778, "grad_norm": 0.5563416481018066, "learning_rate": 1.3992980100473146e-05, "loss": 0.7553, "step": 20640 }, { "epoch": 1.3619634936600327, "grad_norm": 0.6473105549812317, "learning_rate": 1.3967148195668562e-05, "loss": 0.8117, "step": 20650 }, { "epoch": 1.3626230481309876, "grad_norm": 0.5593595504760742, "learning_rate": 1.3941330910904737e-05, "loss": 0.8115, "step": 20660 }, { "epoch": 1.3632826026019425, "grad_norm": 0.6227688789367676, "learning_rate": 1.3915528280393097e-05, "loss": 0.7801, "step": 20670 }, { "epoch": 1.3639421570728971, "grad_norm": 0.6196003556251526, "learning_rate": 1.3889740338325686e-05, "loss": 0.8236, "step": 20680 }, { "epoch": 1.3646017115438522, "grad_norm": 0.5288597941398621, "learning_rate": 1.3863967118875048e-05, "loss": 0.7288, "step": 20690 }, { "epoch": 1.365261266014807, "grad_norm": 0.5875284075737, "learning_rate": 1.3838208656194223e-05, "loss": 0.7876, "step": 20700 }, { "epoch": 1.3659208204857618, "grad_norm": 0.5908956527709961, "learning_rate": 1.3812464984416726e-05, "loss": 0.796, "step": 20710 }, { "epoch": 1.3665803749567167, "grad_norm": 0.5640059113502502, "learning_rate": 1.3786736137656425e-05, "loss": 0.8001, "step": 20720 }, { "epoch": 1.3672399294276716, "grad_norm": 0.6048946380615234, "learning_rate": 1.3761022150007605e-05, "loss": 0.8347, "step": 20730 }, { "epoch": 1.3678994838986265, "grad_norm": 0.5536830425262451, "learning_rate": 1.373532305554478e-05, "loss": 0.8168, "step": 20740 }, { "epoch": 1.3685590383695814, "grad_norm": 0.6291602253913879, "learning_rate": 1.37096388883228e-05, "loss": 0.7887, "step": 20750 }, { "epoch": 1.3692185928405363, "grad_norm": 0.6404354572296143, "learning_rate": 1.3683969682376693e-05, "loss": 0.7914, "step": 20760 }, { "epoch": 1.3698781473114912, "grad_norm": 0.5645378232002258, "learning_rate": 1.3658315471721688e-05, "loss": 0.8385, "step": 20770 }, { "epoch": 1.370537701782446, "grad_norm": 0.6588670611381531, "learning_rate": 1.363267629035313e-05, "loss": 0.8016, "step": 20780 }, { "epoch": 1.3711972562534007, "grad_norm": 0.6370968222618103, "learning_rate": 1.3607052172246426e-05, "loss": 0.7955, "step": 20790 }, { "epoch": 1.3718568107243558, "grad_norm": 0.5848293900489807, "learning_rate": 1.3581443151357074e-05, "loss": 0.8195, "step": 20800 }, { "epoch": 1.3725163651953105, "grad_norm": 0.5646858811378479, "learning_rate": 1.3555849261620512e-05, "loss": 0.7744, "step": 20810 }, { "epoch": 1.3731759196662654, "grad_norm": 0.5530049800872803, "learning_rate": 1.3530270536952172e-05, "loss": 0.791, "step": 20820 }, { "epoch": 1.3738354741372203, "grad_norm": 0.5670651793479919, "learning_rate": 1.3504707011247365e-05, "loss": 0.8341, "step": 20830 }, { "epoch": 1.3744950286081752, "grad_norm": 0.5433035492897034, "learning_rate": 1.3479158718381246e-05, "loss": 0.8089, "step": 20840 }, { "epoch": 1.37515458307913, "grad_norm": 0.5983085632324219, "learning_rate": 1.3453625692208832e-05, "loss": 0.8004, "step": 20850 }, { "epoch": 1.375814137550085, "grad_norm": 0.5690645575523376, "learning_rate": 1.342810796656487e-05, "loss": 0.7438, "step": 20860 }, { "epoch": 1.3764736920210399, "grad_norm": 0.5865938663482666, "learning_rate": 1.3402605575263845e-05, "loss": 0.7654, "step": 20870 }, { "epoch": 1.3771332464919945, "grad_norm": 0.5614089369773865, "learning_rate": 1.3377118552099912e-05, "loss": 0.7979, "step": 20880 }, { "epoch": 1.3777928009629496, "grad_norm": 0.6096092462539673, "learning_rate": 1.3351646930846886e-05, "loss": 0.7826, "step": 20890 }, { "epoch": 1.3784523554339043, "grad_norm": 0.5356308221817017, "learning_rate": 1.332619074525815e-05, "loss": 0.8327, "step": 20900 }, { "epoch": 1.3791119099048592, "grad_norm": 0.49735626578330994, "learning_rate": 1.3300750029066628e-05, "loss": 0.8438, "step": 20910 }, { "epoch": 1.379771464375814, "grad_norm": 0.5439593195915222, "learning_rate": 1.3275324815984769e-05, "loss": 0.8126, "step": 20920 }, { "epoch": 1.380431018846769, "grad_norm": 0.5968343019485474, "learning_rate": 1.3249915139704455e-05, "loss": 0.7955, "step": 20930 }, { "epoch": 1.381090573317724, "grad_norm": 0.5874291658401489, "learning_rate": 1.3224521033897003e-05, "loss": 0.7936, "step": 20940 }, { "epoch": 1.3817501277886788, "grad_norm": 0.5604689121246338, "learning_rate": 1.3199142532213076e-05, "loss": 0.7942, "step": 20950 }, { "epoch": 1.3824096822596337, "grad_norm": 0.5957276225090027, "learning_rate": 1.3173779668282671e-05, "loss": 0.8042, "step": 20960 }, { "epoch": 1.3830692367305886, "grad_norm": 0.6727865934371948, "learning_rate": 1.3148432475715042e-05, "loss": 0.812, "step": 20970 }, { "epoch": 1.3837287912015435, "grad_norm": 0.6120534539222717, "learning_rate": 1.3123100988098714e-05, "loss": 0.8201, "step": 20980 }, { "epoch": 1.3843883456724981, "grad_norm": 0.5892711877822876, "learning_rate": 1.3097785239001375e-05, "loss": 0.7579, "step": 20990 }, { "epoch": 1.385047900143453, "grad_norm": 0.5758843421936035, "learning_rate": 1.3072485261969846e-05, "loss": 0.8137, "step": 21000 }, { "epoch": 1.385707454614408, "grad_norm": 0.66371750831604, "learning_rate": 1.3047201090530089e-05, "loss": 0.8254, "step": 21010 }, { "epoch": 1.3863670090853628, "grad_norm": 0.5467724204063416, "learning_rate": 1.3021932758187073e-05, "loss": 0.7762, "step": 21020 }, { "epoch": 1.3870265635563177, "grad_norm": 0.5854601860046387, "learning_rate": 1.2996680298424824e-05, "loss": 0.8477, "step": 21030 }, { "epoch": 1.3876861180272726, "grad_norm": 0.5929408669471741, "learning_rate": 1.2971443744706302e-05, "loss": 0.8353, "step": 21040 }, { "epoch": 1.3883456724982275, "grad_norm": 0.5941386818885803, "learning_rate": 1.2946223130473389e-05, "loss": 0.7822, "step": 21050 }, { "epoch": 1.3890052269691824, "grad_norm": 0.552693247795105, "learning_rate": 1.2921018489146875e-05, "loss": 0.8083, "step": 21060 }, { "epoch": 1.3896647814401373, "grad_norm": 0.5452876091003418, "learning_rate": 1.289582985412634e-05, "loss": 0.8044, "step": 21070 }, { "epoch": 1.390324335911092, "grad_norm": 0.6474664211273193, "learning_rate": 1.2870657258790217e-05, "loss": 0.801, "step": 21080 }, { "epoch": 1.390983890382047, "grad_norm": 0.5309171080589294, "learning_rate": 1.2845500736495597e-05, "loss": 0.7875, "step": 21090 }, { "epoch": 1.3916434448530017, "grad_norm": 0.5736203193664551, "learning_rate": 1.2820360320578356e-05, "loss": 0.8088, "step": 21100 }, { "epoch": 1.3923029993239566, "grad_norm": 0.7004844546318054, "learning_rate": 1.2795236044352965e-05, "loss": 0.826, "step": 21110 }, { "epoch": 1.3929625537949115, "grad_norm": 0.6375095248222351, "learning_rate": 1.2770127941112555e-05, "loss": 0.8044, "step": 21120 }, { "epoch": 1.3936221082658664, "grad_norm": 0.5890763401985168, "learning_rate": 1.2745036044128796e-05, "loss": 0.8475, "step": 21130 }, { "epoch": 1.3942816627368213, "grad_norm": 0.6262532472610474, "learning_rate": 1.271996038665188e-05, "loss": 0.8054, "step": 21140 }, { "epoch": 1.3949412172077762, "grad_norm": 0.6479079723358154, "learning_rate": 1.2694901001910509e-05, "loss": 0.8084, "step": 21150 }, { "epoch": 1.395600771678731, "grad_norm": 0.544293999671936, "learning_rate": 1.2669857923111782e-05, "loss": 0.8119, "step": 21160 }, { "epoch": 1.3962603261496858, "grad_norm": 0.601423442363739, "learning_rate": 1.2644831183441244e-05, "loss": 0.8142, "step": 21170 }, { "epoch": 1.3969198806206409, "grad_norm": 0.6186248660087585, "learning_rate": 1.2619820816062711e-05, "loss": 0.8067, "step": 21180 }, { "epoch": 1.3975794350915955, "grad_norm": 0.5760650634765625, "learning_rate": 1.259482685411838e-05, "loss": 0.7817, "step": 21190 }, { "epoch": 1.3982389895625504, "grad_norm": 0.5994721055030823, "learning_rate": 1.256984933072865e-05, "loss": 0.8261, "step": 21200 }, { "epoch": 1.3988985440335053, "grad_norm": 0.605767548084259, "learning_rate": 1.2544888278992195e-05, "loss": 0.8046, "step": 21210 }, { "epoch": 1.3995580985044602, "grad_norm": 0.5736032128334045, "learning_rate": 1.2519943731985806e-05, "loss": 0.7868, "step": 21220 }, { "epoch": 1.4002176529754151, "grad_norm": 0.595744788646698, "learning_rate": 1.2495015722764428e-05, "loss": 0.8441, "step": 21230 }, { "epoch": 1.40087720744637, "grad_norm": 0.6784983277320862, "learning_rate": 1.2470104284361104e-05, "loss": 0.7841, "step": 21240 }, { "epoch": 1.401536761917325, "grad_norm": 0.5943467617034912, "learning_rate": 1.2445209449786884e-05, "loss": 0.7988, "step": 21250 }, { "epoch": 1.4021963163882798, "grad_norm": 0.5512073636054993, "learning_rate": 1.2420331252030855e-05, "loss": 0.7966, "step": 21260 }, { "epoch": 1.4028558708592347, "grad_norm": 0.6070830225944519, "learning_rate": 1.2395469724060035e-05, "loss": 0.8414, "step": 21270 }, { "epoch": 1.4035154253301894, "grad_norm": 0.5669506192207336, "learning_rate": 1.237062489881934e-05, "loss": 0.8604, "step": 21280 }, { "epoch": 1.4041749798011443, "grad_norm": 0.5757513642311096, "learning_rate": 1.2345796809231588e-05, "loss": 0.8134, "step": 21290 }, { "epoch": 1.4048345342720991, "grad_norm": 0.5894919633865356, "learning_rate": 1.2320985488197383e-05, "loss": 0.8274, "step": 21300 }, { "epoch": 1.405494088743054, "grad_norm": 0.6311694979667664, "learning_rate": 1.2296190968595155e-05, "loss": 0.7954, "step": 21310 }, { "epoch": 1.406153643214009, "grad_norm": 0.6621250510215759, "learning_rate": 1.2271413283281006e-05, "loss": 0.7888, "step": 21320 }, { "epoch": 1.4068131976849638, "grad_norm": 0.6430502533912659, "learning_rate": 1.2246652465088784e-05, "loss": 0.8383, "step": 21330 }, { "epoch": 1.4074727521559187, "grad_norm": 0.6386721134185791, "learning_rate": 1.2221908546829957e-05, "loss": 0.8225, "step": 21340 }, { "epoch": 1.4081323066268736, "grad_norm": 0.5988573431968689, "learning_rate": 1.2197181561293622e-05, "loss": 0.816, "step": 21350 }, { "epoch": 1.4087918610978285, "grad_norm": 0.6191495060920715, "learning_rate": 1.2172471541246416e-05, "loss": 0.7815, "step": 21360 }, { "epoch": 1.4094514155687832, "grad_norm": 0.6011860966682434, "learning_rate": 1.2147778519432493e-05, "loss": 0.8307, "step": 21370 }, { "epoch": 1.4101109700397383, "grad_norm": 0.5985183715820312, "learning_rate": 1.2123102528573507e-05, "loss": 0.803, "step": 21380 }, { "epoch": 1.410770524510693, "grad_norm": 0.5957480072975159, "learning_rate": 1.2098443601368526e-05, "loss": 0.8124, "step": 21390 }, { "epoch": 1.4114300789816479, "grad_norm": 0.6131159067153931, "learning_rate": 1.2073801770493997e-05, "loss": 0.8531, "step": 21400 }, { "epoch": 1.4120896334526027, "grad_norm": 0.5503156781196594, "learning_rate": 1.2049177068603723e-05, "loss": 0.8175, "step": 21410 }, { "epoch": 1.4127491879235576, "grad_norm": 0.5735180974006653, "learning_rate": 1.2024569528328824e-05, "loss": 0.7863, "step": 21420 }, { "epoch": 1.4134087423945125, "grad_norm": 0.6213884949684143, "learning_rate": 1.1999979182277646e-05, "loss": 0.7929, "step": 21430 }, { "epoch": 1.4140682968654674, "grad_norm": 0.531531035900116, "learning_rate": 1.1975406063035787e-05, "loss": 0.8341, "step": 21440 }, { "epoch": 1.4147278513364223, "grad_norm": 0.5258073806762695, "learning_rate": 1.1950850203165986e-05, "loss": 0.7921, "step": 21450 }, { "epoch": 1.415387405807377, "grad_norm": 0.5849917531013489, "learning_rate": 1.1926311635208115e-05, "loss": 0.8012, "step": 21460 }, { "epoch": 1.416046960278332, "grad_norm": 0.6284214854240417, "learning_rate": 1.1901790391679161e-05, "loss": 0.7843, "step": 21470 }, { "epoch": 1.4167065147492868, "grad_norm": 0.5382357239723206, "learning_rate": 1.1877286505073114e-05, "loss": 0.8127, "step": 21480 }, { "epoch": 1.4173660692202417, "grad_norm": 0.5823057293891907, "learning_rate": 1.1852800007860984e-05, "loss": 0.8127, "step": 21490 }, { "epoch": 1.4180256236911966, "grad_norm": 0.5972256064414978, "learning_rate": 1.182833093249075e-05, "loss": 0.8037, "step": 21500 }, { "epoch": 1.4186851781621515, "grad_norm": 0.6301577687263489, "learning_rate": 1.180387931138727e-05, "loss": 0.8301, "step": 21510 }, { "epoch": 1.4193447326331063, "grad_norm": 0.6335862874984741, "learning_rate": 1.1779445176952314e-05, "loss": 0.8143, "step": 21520 }, { "epoch": 1.4200042871040612, "grad_norm": 0.6473941206932068, "learning_rate": 1.1755028561564448e-05, "loss": 0.8091, "step": 21530 }, { "epoch": 1.4206638415750161, "grad_norm": 0.6132245659828186, "learning_rate": 1.1730629497579036e-05, "loss": 0.8476, "step": 21540 }, { "epoch": 1.421323396045971, "grad_norm": 0.6118532419204712, "learning_rate": 1.170624801732817e-05, "loss": 0.7832, "step": 21550 }, { "epoch": 1.421982950516926, "grad_norm": 0.6644175052642822, "learning_rate": 1.1681884153120674e-05, "loss": 0.8288, "step": 21560 }, { "epoch": 1.4226425049878806, "grad_norm": 0.6317498087882996, "learning_rate": 1.1657537937241996e-05, "loss": 0.8498, "step": 21570 }, { "epoch": 1.4233020594588355, "grad_norm": 0.514884352684021, "learning_rate": 1.1633209401954202e-05, "loss": 0.8146, "step": 21580 }, { "epoch": 1.4239616139297904, "grad_norm": 0.5476719737052917, "learning_rate": 1.160889857949595e-05, "loss": 0.8387, "step": 21590 }, { "epoch": 1.4246211684007453, "grad_norm": 0.605689287185669, "learning_rate": 1.15846055020824e-05, "loss": 0.7732, "step": 21600 }, { "epoch": 1.4252807228717002, "grad_norm": 0.6338589787483215, "learning_rate": 1.156033020190522e-05, "loss": 0.8314, "step": 21610 }, { "epoch": 1.425940277342655, "grad_norm": 0.5500427484512329, "learning_rate": 1.1536072711132504e-05, "loss": 0.7729, "step": 21620 }, { "epoch": 1.42659983181361, "grad_norm": 0.5607891082763672, "learning_rate": 1.1511833061908753e-05, "loss": 0.7731, "step": 21630 }, { "epoch": 1.4272593862845648, "grad_norm": 0.6563683152198792, "learning_rate": 1.1487611286354812e-05, "loss": 0.8208, "step": 21640 }, { "epoch": 1.4279189407555197, "grad_norm": 0.5528565049171448, "learning_rate": 1.1463407416567876e-05, "loss": 0.8296, "step": 21650 }, { "epoch": 1.4285784952264744, "grad_norm": 0.6151896119117737, "learning_rate": 1.1439221484621376e-05, "loss": 0.7722, "step": 21660 }, { "epoch": 1.4292380496974295, "grad_norm": 0.5941686630249023, "learning_rate": 1.1415053522564981e-05, "loss": 0.8092, "step": 21670 }, { "epoch": 1.4298976041683842, "grad_norm": 0.6373192667961121, "learning_rate": 1.1390903562424567e-05, "loss": 0.7926, "step": 21680 }, { "epoch": 1.430557158639339, "grad_norm": 0.5912952423095703, "learning_rate": 1.136677163620213e-05, "loss": 0.8041, "step": 21690 }, { "epoch": 1.431216713110294, "grad_norm": 0.5902479290962219, "learning_rate": 1.1342657775875793e-05, "loss": 0.7994, "step": 21700 }, { "epoch": 1.4318762675812489, "grad_norm": 0.616900622844696, "learning_rate": 1.1318562013399717e-05, "loss": 0.7985, "step": 21710 }, { "epoch": 1.4325358220522038, "grad_norm": 0.5874533653259277, "learning_rate": 1.1294484380704082e-05, "loss": 0.8003, "step": 21720 }, { "epoch": 1.4331953765231586, "grad_norm": 0.5619797706604004, "learning_rate": 1.1270424909695068e-05, "loss": 0.8339, "step": 21730 }, { "epoch": 1.4338549309941135, "grad_norm": 0.5925664901733398, "learning_rate": 1.1246383632254753e-05, "loss": 0.7941, "step": 21740 }, { "epoch": 1.4345144854650682, "grad_norm": 0.6364315748214722, "learning_rate": 1.1222360580241157e-05, "loss": 0.8333, "step": 21750 }, { "epoch": 1.4351740399360233, "grad_norm": 0.5474036931991577, "learning_rate": 1.1198355785488072e-05, "loss": 0.8145, "step": 21760 }, { "epoch": 1.435833594406978, "grad_norm": 0.6324875950813293, "learning_rate": 1.1174369279805175e-05, "loss": 0.8044, "step": 21770 }, { "epoch": 1.436493148877933, "grad_norm": 0.5442603826522827, "learning_rate": 1.115040109497785e-05, "loss": 0.7998, "step": 21780 }, { "epoch": 1.4371527033488878, "grad_norm": 0.610609233379364, "learning_rate": 1.1126451262767249e-05, "loss": 0.7931, "step": 21790 }, { "epoch": 1.4378122578198427, "grad_norm": 0.6423842906951904, "learning_rate": 1.110251981491017e-05, "loss": 0.8003, "step": 21800 }, { "epoch": 1.4384718122907976, "grad_norm": 0.6783219575881958, "learning_rate": 1.1078606783119055e-05, "loss": 0.777, "step": 21810 }, { "epoch": 1.4391313667617525, "grad_norm": 0.6192532181739807, "learning_rate": 1.1054712199081967e-05, "loss": 0.8204, "step": 21820 }, { "epoch": 1.4397909212327074, "grad_norm": 0.6282913088798523, "learning_rate": 1.1030836094462484e-05, "loss": 0.8036, "step": 21830 }, { "epoch": 1.4404504757036622, "grad_norm": 0.5335955619812012, "learning_rate": 1.1006978500899748e-05, "loss": 0.8666, "step": 21840 }, { "epoch": 1.4411100301746171, "grad_norm": 0.463339239358902, "learning_rate": 1.0983139450008298e-05, "loss": 0.7961, "step": 21850 }, { "epoch": 1.4417695846455718, "grad_norm": 0.6162631511688232, "learning_rate": 1.0959318973378177e-05, "loss": 0.837, "step": 21860 }, { "epoch": 1.4424291391165267, "grad_norm": 0.6249439716339111, "learning_rate": 1.0935517102574757e-05, "loss": 0.7661, "step": 21870 }, { "epoch": 1.4430886935874816, "grad_norm": 0.5610096454620361, "learning_rate": 1.0911733869138796e-05, "loss": 0.7936, "step": 21880 }, { "epoch": 1.4437482480584365, "grad_norm": 0.5870029926300049, "learning_rate": 1.0887969304586348e-05, "loss": 0.8329, "step": 21890 }, { "epoch": 1.4444078025293914, "grad_norm": 0.6591834425926208, "learning_rate": 1.0864223440408689e-05, "loss": 0.8199, "step": 21900 }, { "epoch": 1.4450673570003463, "grad_norm": 0.5896702408790588, "learning_rate": 1.0840496308072364e-05, "loss": 0.7952, "step": 21910 }, { "epoch": 1.4457269114713012, "grad_norm": 0.6215192079544067, "learning_rate": 1.0816787939019062e-05, "loss": 0.8418, "step": 21920 }, { "epoch": 1.446386465942256, "grad_norm": 0.6321289539337158, "learning_rate": 1.0793098364665639e-05, "loss": 0.7868, "step": 21930 }, { "epoch": 1.447046020413211, "grad_norm": 0.6170499920845032, "learning_rate": 1.0769427616404015e-05, "loss": 0.7809, "step": 21940 }, { "epoch": 1.4477055748841656, "grad_norm": 0.5628238320350647, "learning_rate": 1.074577572560117e-05, "loss": 0.7912, "step": 21950 }, { "epoch": 1.4483651293551207, "grad_norm": 0.6782822608947754, "learning_rate": 1.0722142723599119e-05, "loss": 0.8704, "step": 21960 }, { "epoch": 1.4490246838260754, "grad_norm": 0.5879037976264954, "learning_rate": 1.069852864171482e-05, "loss": 0.8164, "step": 21970 }, { "epoch": 1.4496842382970303, "grad_norm": 0.5934215188026428, "learning_rate": 1.0674933511240165e-05, "loss": 0.8124, "step": 21980 }, { "epoch": 1.4503437927679852, "grad_norm": 0.5908352136611938, "learning_rate": 1.0651357363441927e-05, "loss": 0.8108, "step": 21990 }, { "epoch": 1.45100334723894, "grad_norm": 0.6745395064353943, "learning_rate": 1.0627800229561755e-05, "loss": 0.781, "step": 22000 }, { "epoch": 1.451662901709895, "grad_norm": 0.5967004299163818, "learning_rate": 1.060426214081606e-05, "loss": 0.8093, "step": 22010 }, { "epoch": 1.4523224561808499, "grad_norm": 0.6017720103263855, "learning_rate": 1.058074312839605e-05, "loss": 0.8131, "step": 22020 }, { "epoch": 1.4529820106518048, "grad_norm": 0.5551460385322571, "learning_rate": 1.0557243223467636e-05, "loss": 0.8141, "step": 22030 }, { "epoch": 1.4536415651227594, "grad_norm": 0.5525084733963013, "learning_rate": 1.0533762457171403e-05, "loss": 0.7938, "step": 22040 }, { "epoch": 1.4543011195937146, "grad_norm": 0.602660059928894, "learning_rate": 1.0510300860622602e-05, "loss": 0.8027, "step": 22050 }, { "epoch": 1.4549606740646692, "grad_norm": 0.5001670122146606, "learning_rate": 1.0486858464911053e-05, "loss": 0.7908, "step": 22060 }, { "epoch": 1.4556202285356241, "grad_norm": 0.5953605771064758, "learning_rate": 1.0463435301101144e-05, "loss": 0.8254, "step": 22070 }, { "epoch": 1.456279783006579, "grad_norm": 0.5867521166801453, "learning_rate": 1.044003140023177e-05, "loss": 0.741, "step": 22080 }, { "epoch": 1.456939337477534, "grad_norm": 0.5637813210487366, "learning_rate": 1.0416646793316324e-05, "loss": 0.7721, "step": 22090 }, { "epoch": 1.4575988919484888, "grad_norm": 0.5925215482711792, "learning_rate": 1.0393281511342598e-05, "loss": 0.815, "step": 22100 }, { "epoch": 1.4582584464194437, "grad_norm": 0.6085634827613831, "learning_rate": 1.0369935585272808e-05, "loss": 0.79, "step": 22110 }, { "epoch": 1.4589180008903986, "grad_norm": 0.594815731048584, "learning_rate": 1.0346609046043501e-05, "loss": 0.8484, "step": 22120 }, { "epoch": 1.4595775553613535, "grad_norm": 0.6512571573257446, "learning_rate": 1.0323301924565528e-05, "loss": 0.805, "step": 22130 }, { "epoch": 1.4602371098323084, "grad_norm": 0.6098020672798157, "learning_rate": 1.0300014251724036e-05, "loss": 0.7833, "step": 22140 }, { "epoch": 1.460896664303263, "grad_norm": 0.5692249536514282, "learning_rate": 1.0276746058378375e-05, "loss": 0.7819, "step": 22150 }, { "epoch": 1.4615562187742182, "grad_norm": 0.5564795136451721, "learning_rate": 1.0253497375362084e-05, "loss": 0.8166, "step": 22160 }, { "epoch": 1.4622157732451728, "grad_norm": 0.6502294540405273, "learning_rate": 1.0230268233482867e-05, "loss": 0.8091, "step": 22170 }, { "epoch": 1.4628753277161277, "grad_norm": 0.5914169549942017, "learning_rate": 1.0207058663522506e-05, "loss": 0.8058, "step": 22180 }, { "epoch": 1.4635348821870826, "grad_norm": 0.6946161985397339, "learning_rate": 1.0183868696236878e-05, "loss": 0.7911, "step": 22190 }, { "epoch": 1.4641944366580375, "grad_norm": 0.5570001602172852, "learning_rate": 1.0160698362355864e-05, "loss": 0.8019, "step": 22200 }, { "epoch": 1.4648539911289924, "grad_norm": 0.558890163898468, "learning_rate": 1.0137547692583328e-05, "loss": 0.8003, "step": 22210 }, { "epoch": 1.4655135455999473, "grad_norm": 0.5632757544517517, "learning_rate": 1.011441671759707e-05, "loss": 0.8181, "step": 22220 }, { "epoch": 1.4661731000709022, "grad_norm": 0.6365344524383545, "learning_rate": 1.0091305468048822e-05, "loss": 0.8286, "step": 22230 }, { "epoch": 1.4668326545418569, "grad_norm": 0.5856372117996216, "learning_rate": 1.0068213974564147e-05, "loss": 0.8314, "step": 22240 }, { "epoch": 1.467492209012812, "grad_norm": 0.6081796288490295, "learning_rate": 1.0045142267742435e-05, "loss": 0.7878, "step": 22250 }, { "epoch": 1.4681517634837666, "grad_norm": 0.5490699410438538, "learning_rate": 1.0022090378156871e-05, "loss": 0.7427, "step": 22260 }, { "epoch": 1.4688113179547215, "grad_norm": 0.62840735912323, "learning_rate": 9.999058336354353e-06, "loss": 0.8142, "step": 22270 }, { "epoch": 1.4694708724256764, "grad_norm": 0.5533925294876099, "learning_rate": 9.976046172855511e-06, "loss": 0.8006, "step": 22280 }, { "epoch": 1.4701304268966313, "grad_norm": 0.584366500377655, "learning_rate": 9.953053918154607e-06, "loss": 0.8489, "step": 22290 }, { "epoch": 1.4707899813675862, "grad_norm": 0.6402396559715271, "learning_rate": 9.930081602719526e-06, "loss": 0.7938, "step": 22300 }, { "epoch": 1.471449535838541, "grad_norm": 0.5986910462379456, "learning_rate": 9.907129256991732e-06, "loss": 0.8286, "step": 22310 }, { "epoch": 1.472109090309496, "grad_norm": 0.6115344762802124, "learning_rate": 9.884196911386231e-06, "loss": 0.8422, "step": 22320 }, { "epoch": 1.4727686447804509, "grad_norm": 0.57796710729599, "learning_rate": 9.861284596291548e-06, "loss": 0.7795, "step": 22330 }, { "epoch": 1.4734281992514058, "grad_norm": 0.6050494909286499, "learning_rate": 9.838392342069602e-06, "loss": 0.8121, "step": 22340 }, { "epoch": 1.4740877537223604, "grad_norm": 0.5895455479621887, "learning_rate": 9.815520179055793e-06, "loss": 0.8373, "step": 22350 }, { "epoch": 1.4747473081933153, "grad_norm": 0.5442635416984558, "learning_rate": 9.792668137558855e-06, "loss": 0.7792, "step": 22360 }, { "epoch": 1.4754068626642702, "grad_norm": 0.5683884024620056, "learning_rate": 9.769836247860891e-06, "loss": 0.7822, "step": 22370 }, { "epoch": 1.4760664171352251, "grad_norm": 0.5949370861053467, "learning_rate": 9.74702454021728e-06, "loss": 0.8117, "step": 22380 }, { "epoch": 1.47672597160618, "grad_norm": 0.7127247452735901, "learning_rate": 9.72423304485665e-06, "loss": 0.7423, "step": 22390 }, { "epoch": 1.477385526077135, "grad_norm": 0.5755226016044617, "learning_rate": 9.701461791980871e-06, "loss": 0.8108, "step": 22400 }, { "epoch": 1.4780450805480898, "grad_norm": 0.6091025471687317, "learning_rate": 9.67871081176496e-06, "loss": 0.7684, "step": 22410 }, { "epoch": 1.4787046350190447, "grad_norm": 0.5700154900550842, "learning_rate": 9.655980134357107e-06, "loss": 0.8291, "step": 22420 }, { "epoch": 1.4793641894899996, "grad_norm": 0.5324963331222534, "learning_rate": 9.633269789878544e-06, "loss": 0.8019, "step": 22430 }, { "epoch": 1.4800237439609543, "grad_norm": 0.5627251267433167, "learning_rate": 9.610579808423612e-06, "loss": 0.8254, "step": 22440 }, { "epoch": 1.4806832984319094, "grad_norm": 0.5750599503517151, "learning_rate": 9.587910220059631e-06, "loss": 0.8373, "step": 22450 }, { "epoch": 1.481342852902864, "grad_norm": 0.6064298748970032, "learning_rate": 9.56526105482693e-06, "loss": 0.8165, "step": 22460 }, { "epoch": 1.482002407373819, "grad_norm": 0.6086931824684143, "learning_rate": 9.542632342738746e-06, "loss": 0.8612, "step": 22470 }, { "epoch": 1.4826619618447738, "grad_norm": 0.6298277974128723, "learning_rate": 9.52002411378122e-06, "loss": 0.8165, "step": 22480 }, { "epoch": 1.4833215163157287, "grad_norm": 0.5675653219223022, "learning_rate": 9.497436397913368e-06, "loss": 0.7859, "step": 22490 }, { "epoch": 1.4839810707866836, "grad_norm": 0.6803301572799683, "learning_rate": 9.474869225066993e-06, "loss": 0.7845, "step": 22500 }, { "epoch": 1.4846406252576385, "grad_norm": 0.6293306350708008, "learning_rate": 9.452322625146715e-06, "loss": 0.7989, "step": 22510 }, { "epoch": 1.4853001797285934, "grad_norm": 0.6525318622589111, "learning_rate": 9.429796628029838e-06, "loss": 0.8538, "step": 22520 }, { "epoch": 1.485959734199548, "grad_norm": 0.6509277820587158, "learning_rate": 9.407291263566424e-06, "loss": 0.7948, "step": 22530 }, { "epoch": 1.4866192886705032, "grad_norm": 0.5920518636703491, "learning_rate": 9.384806561579141e-06, "loss": 0.7992, "step": 22540 }, { "epoch": 1.4872788431414579, "grad_norm": 0.5769726634025574, "learning_rate": 9.36234255186332e-06, "loss": 0.8238, "step": 22550 }, { "epoch": 1.4879383976124128, "grad_norm": 0.6404539346694946, "learning_rate": 9.339899264186846e-06, "loss": 0.8269, "step": 22560 }, { "epoch": 1.4885979520833676, "grad_norm": 0.5562175512313843, "learning_rate": 9.317476728290146e-06, "loss": 0.8011, "step": 22570 }, { "epoch": 1.4892575065543225, "grad_norm": 0.7294158935546875, "learning_rate": 9.295074973886164e-06, "loss": 0.8319, "step": 22580 }, { "epoch": 1.4899170610252774, "grad_norm": 0.6766993999481201, "learning_rate": 9.272694030660279e-06, "loss": 0.7976, "step": 22590 }, { "epoch": 1.4905766154962323, "grad_norm": 0.6774852871894836, "learning_rate": 9.25033392827033e-06, "loss": 0.8286, "step": 22600 }, { "epoch": 1.4912361699671872, "grad_norm": 0.6171419024467468, "learning_rate": 9.227994696346502e-06, "loss": 0.8103, "step": 22610 }, { "epoch": 1.4918957244381421, "grad_norm": 0.6780225038528442, "learning_rate": 9.205676364491333e-06, "loss": 0.8463, "step": 22620 }, { "epoch": 1.492555278909097, "grad_norm": 0.560507595539093, "learning_rate": 9.183378962279693e-06, "loss": 0.813, "step": 22630 }, { "epoch": 1.4932148333800517, "grad_norm": 0.5771291851997375, "learning_rate": 9.161102519258683e-06, "loss": 0.8009, "step": 22640 }, { "epoch": 1.4938743878510066, "grad_norm": 0.5692350268363953, "learning_rate": 9.138847064947642e-06, "loss": 0.8096, "step": 22650 }, { "epoch": 1.4945339423219615, "grad_norm": 0.6242731809616089, "learning_rate": 9.116612628838091e-06, "loss": 0.7972, "step": 22660 }, { "epoch": 1.4951934967929164, "grad_norm": 0.5729048848152161, "learning_rate": 9.094399240393722e-06, "loss": 0.7981, "step": 22670 }, { "epoch": 1.4958530512638712, "grad_norm": 0.5291873812675476, "learning_rate": 9.072206929050303e-06, "loss": 0.7915, "step": 22680 }, { "epoch": 1.4965126057348261, "grad_norm": 0.6954770088195801, "learning_rate": 9.050035724215703e-06, "loss": 0.8027, "step": 22690 }, { "epoch": 1.497172160205781, "grad_norm": 0.5207001566886902, "learning_rate": 9.0278856552698e-06, "loss": 0.8056, "step": 22700 }, { "epoch": 1.497831714676736, "grad_norm": 0.6069022417068481, "learning_rate": 9.005756751564459e-06, "loss": 0.7766, "step": 22710 }, { "epoch": 1.4984912691476908, "grad_norm": 0.5734812617301941, "learning_rate": 8.983649042423534e-06, "loss": 0.7816, "step": 22720 }, { "epoch": 1.4991508236186455, "grad_norm": 0.81162428855896, "learning_rate": 8.961562557142758e-06, "loss": 0.7697, "step": 22730 }, { "epoch": 1.4998103780896006, "grad_norm": 0.595378041267395, "learning_rate": 8.939497324989748e-06, "loss": 0.769, "step": 22740 }, { "epoch": 1.5004699325605553, "grad_norm": 0.5689677000045776, "learning_rate": 8.917453375203958e-06, "loss": 0.8124, "step": 22750 }, { "epoch": 1.5011294870315102, "grad_norm": 0.5219847559928894, "learning_rate": 8.895430736996645e-06, "loss": 0.8031, "step": 22760 }, { "epoch": 1.501789041502465, "grad_norm": 0.610152542591095, "learning_rate": 8.87342943955084e-06, "loss": 0.8004, "step": 22770 }, { "epoch": 1.50244859597342, "grad_norm": 0.6241317391395569, "learning_rate": 8.85144951202126e-06, "loss": 0.7737, "step": 22780 }, { "epoch": 1.5031081504443748, "grad_norm": 0.6606419086456299, "learning_rate": 8.829490983534324e-06, "loss": 0.798, "step": 22790 }, { "epoch": 1.5037677049153297, "grad_norm": 0.6633086800575256, "learning_rate": 8.807553883188086e-06, "loss": 0.8282, "step": 22800 }, { "epoch": 1.5044272593862846, "grad_norm": 0.7518271803855896, "learning_rate": 8.785638240052227e-06, "loss": 0.8244, "step": 22810 }, { "epoch": 1.5050868138572393, "grad_norm": 0.6326174139976501, "learning_rate": 8.763744083167965e-06, "loss": 0.801, "step": 22820 }, { "epoch": 1.5057463683281944, "grad_norm": 0.5996015667915344, "learning_rate": 8.741871441548056e-06, "loss": 0.8471, "step": 22830 }, { "epoch": 1.506405922799149, "grad_norm": 0.5443564057350159, "learning_rate": 8.720020344176764e-06, "loss": 0.7987, "step": 22840 }, { "epoch": 1.5070654772701042, "grad_norm": 0.6426756381988525, "learning_rate": 8.698190820009764e-06, "loss": 0.8308, "step": 22850 }, { "epoch": 1.5077250317410589, "grad_norm": 0.6051197052001953, "learning_rate": 8.676382897974191e-06, "loss": 0.8043, "step": 22860 }, { "epoch": 1.5083845862120138, "grad_norm": 0.5542495250701904, "learning_rate": 8.654596606968524e-06, "loss": 0.8075, "step": 22870 }, { "epoch": 1.5090441406829687, "grad_norm": 0.5416606664657593, "learning_rate": 8.632831975862579e-06, "loss": 0.8075, "step": 22880 }, { "epoch": 1.5097036951539236, "grad_norm": 0.5079529285430908, "learning_rate": 8.61108903349747e-06, "loss": 0.7696, "step": 22890 }, { "epoch": 1.5103632496248784, "grad_norm": 0.6498621702194214, "learning_rate": 8.589367808685586e-06, "loss": 0.7949, "step": 22900 }, { "epoch": 1.5110228040958331, "grad_norm": 0.6200118064880371, "learning_rate": 8.56766833021053e-06, "loss": 0.8284, "step": 22910 }, { "epoch": 1.5116823585667882, "grad_norm": 0.5316246747970581, "learning_rate": 8.545990626827066e-06, "loss": 0.8584, "step": 22920 }, { "epoch": 1.512341913037743, "grad_norm": 0.5837723016738892, "learning_rate": 8.524334727261145e-06, "loss": 0.8238, "step": 22930 }, { "epoch": 1.513001467508698, "grad_norm": 0.5774195194244385, "learning_rate": 8.502700660209784e-06, "loss": 0.8266, "step": 22940 }, { "epoch": 1.5136610219796527, "grad_norm": 0.6202636361122131, "learning_rate": 8.481088454341114e-06, "loss": 0.7763, "step": 22950 }, { "epoch": 1.5143205764506076, "grad_norm": 0.599242627620697, "learning_rate": 8.459498138294228e-06, "loss": 0.7837, "step": 22960 }, { "epoch": 1.5149801309215625, "grad_norm": 0.5883403420448303, "learning_rate": 8.43792974067929e-06, "loss": 0.8001, "step": 22970 }, { "epoch": 1.5156396853925174, "grad_norm": 0.6413730978965759, "learning_rate": 8.416383290077356e-06, "loss": 0.7961, "step": 22980 }, { "epoch": 1.5162992398634723, "grad_norm": 0.6023602485656738, "learning_rate": 8.394858815040443e-06, "loss": 0.7996, "step": 22990 }, { "epoch": 1.516958794334427, "grad_norm": 0.5290365219116211, "learning_rate": 8.373356344091442e-06, "loss": 0.7872, "step": 23000 }, { "epoch": 1.517618348805382, "grad_norm": 0.5551934242248535, "learning_rate": 8.351875905724044e-06, "loss": 0.7933, "step": 23010 }, { "epoch": 1.5182779032763367, "grad_norm": 0.5995215177536011, "learning_rate": 8.330417528402795e-06, "loss": 0.8193, "step": 23020 }, { "epoch": 1.5189374577472918, "grad_norm": 0.6809059381484985, "learning_rate": 8.308981240562974e-06, "loss": 0.8003, "step": 23030 }, { "epoch": 1.5195970122182465, "grad_norm": 0.5909712910652161, "learning_rate": 8.287567070610613e-06, "loss": 0.8035, "step": 23040 }, { "epoch": 1.5202565666892014, "grad_norm": 0.5868057608604431, "learning_rate": 8.266175046922414e-06, "loss": 0.8191, "step": 23050 }, { "epoch": 1.5209161211601563, "grad_norm": 0.56301349401474, "learning_rate": 8.244805197845734e-06, "loss": 0.8133, "step": 23060 }, { "epoch": 1.5215756756311112, "grad_norm": 0.6976560950279236, "learning_rate": 8.223457551698568e-06, "loss": 0.797, "step": 23070 }, { "epoch": 1.522235230102066, "grad_norm": 0.5786426067352295, "learning_rate": 8.202132136769458e-06, "loss": 0.797, "step": 23080 }, { "epoch": 1.522894784573021, "grad_norm": 0.6411847472190857, "learning_rate": 8.18082898131752e-06, "loss": 0.8056, "step": 23090 }, { "epoch": 1.5235543390439759, "grad_norm": 0.5769123435020447, "learning_rate": 8.159548113572329e-06, "loss": 0.7846, "step": 23100 }, { "epoch": 1.5242138935149305, "grad_norm": 0.6662389636039734, "learning_rate": 8.138289561733973e-06, "loss": 0.7897, "step": 23110 }, { "epoch": 1.5248734479858856, "grad_norm": 0.5101078748703003, "learning_rate": 8.117053353972931e-06, "loss": 0.7911, "step": 23120 }, { "epoch": 1.5255330024568403, "grad_norm": 0.5832508206367493, "learning_rate": 8.095839518430105e-06, "loss": 0.8233, "step": 23130 }, { "epoch": 1.5261925569277954, "grad_norm": 0.6068770289421082, "learning_rate": 8.074648083216723e-06, "loss": 0.787, "step": 23140 }, { "epoch": 1.52685211139875, "grad_norm": 0.5033300518989563, "learning_rate": 8.053479076414338e-06, "loss": 0.7904, "step": 23150 }, { "epoch": 1.527511665869705, "grad_norm": 0.5479113459587097, "learning_rate": 8.032332526074801e-06, "loss": 0.8433, "step": 23160 }, { "epoch": 1.5281712203406599, "grad_norm": 0.5920894145965576, "learning_rate": 8.01120846022017e-06, "loss": 0.7985, "step": 23170 }, { "epoch": 1.5288307748116148, "grad_norm": 0.7134125232696533, "learning_rate": 7.990106906842753e-06, "loss": 0.7729, "step": 23180 }, { "epoch": 1.5294903292825697, "grad_norm": 0.6525259017944336, "learning_rate": 7.96902789390497e-06, "loss": 0.792, "step": 23190 }, { "epoch": 1.5301498837535243, "grad_norm": 0.6140827536582947, "learning_rate": 7.94797144933942e-06, "loss": 0.8135, "step": 23200 }, { "epoch": 1.5308094382244795, "grad_norm": 0.6362495422363281, "learning_rate": 7.926937601048779e-06, "loss": 0.769, "step": 23210 }, { "epoch": 1.5314689926954341, "grad_norm": 0.6009213924407959, "learning_rate": 7.905926376905776e-06, "loss": 0.804, "step": 23220 }, { "epoch": 1.5321285471663892, "grad_norm": 0.6461767554283142, "learning_rate": 7.88493780475316e-06, "loss": 0.7764, "step": 23230 }, { "epoch": 1.532788101637344, "grad_norm": 0.6046391725540161, "learning_rate": 7.863971912403659e-06, "loss": 0.7996, "step": 23240 }, { "epoch": 1.5334476561082988, "grad_norm": 0.6695650219917297, "learning_rate": 7.84302872763997e-06, "loss": 0.7954, "step": 23250 }, { "epoch": 1.5341072105792537, "grad_norm": 0.6348613500595093, "learning_rate": 7.822108278214668e-06, "loss": 0.8027, "step": 23260 }, { "epoch": 1.5347667650502086, "grad_norm": 0.593072772026062, "learning_rate": 7.80121059185023e-06, "loss": 0.8247, "step": 23270 }, { "epoch": 1.5354263195211635, "grad_norm": 0.5665187835693359, "learning_rate": 7.78033569623895e-06, "loss": 0.7775, "step": 23280 }, { "epoch": 1.5360858739921184, "grad_norm": 0.6689664721488953, "learning_rate": 7.759483619042912e-06, "loss": 0.8052, "step": 23290 }, { "epoch": 1.5367454284630733, "grad_norm": 0.5643017292022705, "learning_rate": 7.738654387893995e-06, "loss": 0.8193, "step": 23300 }, { "epoch": 1.537404982934028, "grad_norm": 0.6332219243049622, "learning_rate": 7.717848030393782e-06, "loss": 0.8418, "step": 23310 }, { "epoch": 1.538064537404983, "grad_norm": 0.5949552655220032, "learning_rate": 7.697064574113539e-06, "loss": 0.8179, "step": 23320 }, { "epoch": 1.5387240918759377, "grad_norm": 0.6400283575057983, "learning_rate": 7.67630404659419e-06, "loss": 0.8082, "step": 23330 }, { "epoch": 1.5393836463468926, "grad_norm": 0.6538004279136658, "learning_rate": 7.655566475346298e-06, "loss": 0.835, "step": 23340 }, { "epoch": 1.5400432008178475, "grad_norm": 0.5514785647392273, "learning_rate": 7.63485188784997e-06, "loss": 0.8312, "step": 23350 }, { "epoch": 1.5407027552888024, "grad_norm": 0.6204501390457153, "learning_rate": 7.614160311554888e-06, "loss": 0.8094, "step": 23360 }, { "epoch": 1.5413623097597573, "grad_norm": 0.6015772819519043, "learning_rate": 7.593491773880218e-06, "loss": 0.877, "step": 23370 }, { "epoch": 1.5420218642307122, "grad_norm": 0.5586115121841431, "learning_rate": 7.572846302214601e-06, "loss": 0.8028, "step": 23380 }, { "epoch": 1.542681418701667, "grad_norm": 0.7056943774223328, "learning_rate": 7.552223923916127e-06, "loss": 0.7808, "step": 23390 }, { "epoch": 1.5433409731726218, "grad_norm": 0.6403433084487915, "learning_rate": 7.53162466631227e-06, "loss": 0.825, "step": 23400 }, { "epoch": 1.5440005276435769, "grad_norm": 0.6124839186668396, "learning_rate": 7.511048556699868e-06, "loss": 0.8026, "step": 23410 }, { "epoch": 1.5446600821145315, "grad_norm": 0.5926290154457092, "learning_rate": 7.490495622345075e-06, "loss": 0.8174, "step": 23420 }, { "epoch": 1.5453196365854867, "grad_norm": 0.6216310262680054, "learning_rate": 7.46996589048336e-06, "loss": 0.7698, "step": 23430 }, { "epoch": 1.5459791910564413, "grad_norm": 0.6308226585388184, "learning_rate": 7.449459388319447e-06, "loss": 0.7954, "step": 23440 }, { "epoch": 1.5466387455273962, "grad_norm": 0.5584129095077515, "learning_rate": 7.428976143027224e-06, "loss": 0.7876, "step": 23450 }, { "epoch": 1.5472982999983511, "grad_norm": 0.6883158087730408, "learning_rate": 7.408516181749825e-06, "loss": 0.7904, "step": 23460 }, { "epoch": 1.547957854469306, "grad_norm": 0.5625373721122742, "learning_rate": 7.38807953159949e-06, "loss": 0.8231, "step": 23470 }, { "epoch": 1.548617408940261, "grad_norm": 0.6092795729637146, "learning_rate": 7.367666219657596e-06, "loss": 0.8077, "step": 23480 }, { "epoch": 1.5492769634112156, "grad_norm": 0.649097740650177, "learning_rate": 7.347276272974568e-06, "loss": 0.7777, "step": 23490 }, { "epoch": 1.5499365178821707, "grad_norm": 0.6178075075149536, "learning_rate": 7.3269097185698744e-06, "loss": 0.7762, "step": 23500 }, { "epoch": 1.5505960723531254, "grad_norm": 0.6393544673919678, "learning_rate": 7.306566583432009e-06, "loss": 0.8093, "step": 23510 }, { "epoch": 1.5512556268240805, "grad_norm": 0.547970712184906, "learning_rate": 7.286246894518395e-06, "loss": 0.8173, "step": 23520 }, { "epoch": 1.5519151812950351, "grad_norm": 0.5908211469650269, "learning_rate": 7.2659506787554284e-06, "loss": 0.7633, "step": 23530 }, { "epoch": 1.55257473576599, "grad_norm": 0.5850244760513306, "learning_rate": 7.245677963038347e-06, "loss": 0.8296, "step": 23540 }, { "epoch": 1.553234290236945, "grad_norm": 0.6321101188659668, "learning_rate": 7.225428774231302e-06, "loss": 0.7984, "step": 23550 }, { "epoch": 1.5538938447078998, "grad_norm": 0.6044578552246094, "learning_rate": 7.205203139167227e-06, "loss": 0.8495, "step": 23560 }, { "epoch": 1.5545533991788547, "grad_norm": 0.649852991104126, "learning_rate": 7.185001084647874e-06, "loss": 0.7805, "step": 23570 }, { "epoch": 1.5552129536498096, "grad_norm": 0.5769763588905334, "learning_rate": 7.16482263744373e-06, "loss": 0.7945, "step": 23580 }, { "epoch": 1.5558725081207645, "grad_norm": 0.617280900478363, "learning_rate": 7.144667824293994e-06, "loss": 0.8014, "step": 23590 }, { "epoch": 1.5565320625917192, "grad_norm": 0.5664177536964417, "learning_rate": 7.124536671906571e-06, "loss": 0.806, "step": 23600 }, { "epoch": 1.5571916170626743, "grad_norm": 0.6676650643348694, "learning_rate": 7.104429206957983e-06, "loss": 0.7894, "step": 23610 }, { "epoch": 1.557851171533629, "grad_norm": 0.613895833492279, "learning_rate": 7.0843454560933945e-06, "loss": 0.7845, "step": 23620 }, { "epoch": 1.558510726004584, "grad_norm": 0.6392267346382141, "learning_rate": 7.064285445926505e-06, "loss": 0.7939, "step": 23630 }, { "epoch": 1.5591702804755387, "grad_norm": 0.6425051093101501, "learning_rate": 7.044249203039588e-06, "loss": 0.8511, "step": 23640 }, { "epoch": 1.5598298349464936, "grad_norm": 0.556755006313324, "learning_rate": 7.024236753983418e-06, "loss": 0.8516, "step": 23650 }, { "epoch": 1.5604893894174485, "grad_norm": 0.6224756836891174, "learning_rate": 7.004248125277221e-06, "loss": 0.8272, "step": 23660 }, { "epoch": 1.5611489438884034, "grad_norm": 0.6352970004081726, "learning_rate": 6.9842833434086905e-06, "loss": 0.8294, "step": 23670 }, { "epoch": 1.5618084983593583, "grad_norm": 0.6254895329475403, "learning_rate": 6.96434243483387e-06, "loss": 0.7893, "step": 23680 }, { "epoch": 1.562468052830313, "grad_norm": 0.6049076318740845, "learning_rate": 6.944425425977216e-06, "loss": 0.8025, "step": 23690 }, { "epoch": 1.563127607301268, "grad_norm": 0.6346916556358337, "learning_rate": 6.924532343231485e-06, "loss": 0.7668, "step": 23700 }, { "epoch": 1.5637871617722228, "grad_norm": 0.6637734770774841, "learning_rate": 6.904663212957749e-06, "loss": 0.7904, "step": 23710 }, { "epoch": 1.5644467162431779, "grad_norm": 0.591850996017456, "learning_rate": 6.884818061485321e-06, "loss": 0.8569, "step": 23720 }, { "epoch": 1.5651062707141326, "grad_norm": 0.5571227073669434, "learning_rate": 6.864996915111741e-06, "loss": 0.8004, "step": 23730 }, { "epoch": 1.5657658251850874, "grad_norm": 0.5591459274291992, "learning_rate": 6.8451998001027555e-06, "loss": 0.8331, "step": 23740 }, { "epoch": 1.5664253796560423, "grad_norm": 0.6120739579200745, "learning_rate": 6.825426742692248e-06, "loss": 0.8481, "step": 23750 }, { "epoch": 1.5670849341269972, "grad_norm": 0.6025487780570984, "learning_rate": 6.805677769082247e-06, "loss": 0.8433, "step": 23760 }, { "epoch": 1.5677444885979521, "grad_norm": 0.5513527393341064, "learning_rate": 6.785952905442819e-06, "loss": 0.8163, "step": 23770 }, { "epoch": 1.5684040430689068, "grad_norm": 0.6942810416221619, "learning_rate": 6.766252177912133e-06, "loss": 0.7812, "step": 23780 }, { "epoch": 1.569063597539862, "grad_norm": 0.6479464769363403, "learning_rate": 6.7465756125963425e-06, "loss": 0.7861, "step": 23790 }, { "epoch": 1.5697231520108166, "grad_norm": 0.6747413277626038, "learning_rate": 6.726923235569607e-06, "loss": 0.7849, "step": 23800 }, { "epoch": 1.5703827064817717, "grad_norm": 0.49388283491134644, "learning_rate": 6.707295072874009e-06, "loss": 0.7807, "step": 23810 }, { "epoch": 1.5710422609527264, "grad_norm": 0.6372382640838623, "learning_rate": 6.687691150519551e-06, "loss": 0.7993, "step": 23820 }, { "epoch": 1.5717018154236813, "grad_norm": 0.6567792296409607, "learning_rate": 6.668111494484128e-06, "loss": 0.8044, "step": 23830 }, { "epoch": 1.5723613698946362, "grad_norm": 0.6088330149650574, "learning_rate": 6.648556130713454e-06, "loss": 0.8587, "step": 23840 }, { "epoch": 1.573020924365591, "grad_norm": 0.5622130036354065, "learning_rate": 6.629025085121082e-06, "loss": 0.8884, "step": 23850 }, { "epoch": 1.573680478836546, "grad_norm": 0.5609302520751953, "learning_rate": 6.609518383588317e-06, "loss": 0.7804, "step": 23860 }, { "epoch": 1.5743400333075008, "grad_norm": 0.5331741571426392, "learning_rate": 6.590036051964205e-06, "loss": 0.7765, "step": 23870 }, { "epoch": 1.5749995877784557, "grad_norm": 0.5762697458267212, "learning_rate": 6.570578116065518e-06, "loss": 0.8276, "step": 23880 }, { "epoch": 1.5756591422494104, "grad_norm": 0.6534186601638794, "learning_rate": 6.551144601676681e-06, "loss": 0.8163, "step": 23890 }, { "epoch": 1.5763186967203655, "grad_norm": 0.6218444108963013, "learning_rate": 6.531735534549766e-06, "loss": 0.8002, "step": 23900 }, { "epoch": 1.5769782511913202, "grad_norm": 0.598573625087738, "learning_rate": 6.512350940404438e-06, "loss": 0.7777, "step": 23910 }, { "epoch": 1.5776378056622753, "grad_norm": 0.5644959211349487, "learning_rate": 6.492990844927957e-06, "loss": 0.811, "step": 23920 }, { "epoch": 1.57829736013323, "grad_norm": 0.5573763251304626, "learning_rate": 6.473655273775087e-06, "loss": 0.8174, "step": 23930 }, { "epoch": 1.5789569146041849, "grad_norm": 0.6255615949630737, "learning_rate": 6.4543442525681255e-06, "loss": 0.7781, "step": 23940 }, { "epoch": 1.5796164690751398, "grad_norm": 0.6131150126457214, "learning_rate": 6.435057806896813e-06, "loss": 0.8318, "step": 23950 }, { "epoch": 1.5802760235460946, "grad_norm": 0.5963195562362671, "learning_rate": 6.41579596231833e-06, "loss": 0.8412, "step": 23960 }, { "epoch": 1.5809355780170495, "grad_norm": 0.6147268414497375, "learning_rate": 6.39655874435727e-06, "loss": 0.8097, "step": 23970 }, { "epoch": 1.5815951324880042, "grad_norm": 0.6898005604743958, "learning_rate": 6.377346178505581e-06, "loss": 0.7921, "step": 23980 }, { "epoch": 1.5822546869589593, "grad_norm": 0.5684288144111633, "learning_rate": 6.358158290222541e-06, "loss": 0.8226, "step": 23990 }, { "epoch": 1.582914241429914, "grad_norm": 0.5642861127853394, "learning_rate": 6.338995104934728e-06, "loss": 0.8034, "step": 24000 }, { "epoch": 1.583573795900869, "grad_norm": 0.5649825930595398, "learning_rate": 6.3198566480360045e-06, "loss": 0.7769, "step": 24010 }, { "epoch": 1.5842333503718238, "grad_norm": 0.583200216293335, "learning_rate": 6.3007429448874385e-06, "loss": 0.7858, "step": 24020 }, { "epoch": 1.5848929048427787, "grad_norm": 0.5720763802528381, "learning_rate": 6.281654020817299e-06, "loss": 0.7903, "step": 24030 }, { "epoch": 1.5855524593137336, "grad_norm": 0.5873232483863831, "learning_rate": 6.262589901121044e-06, "loss": 0.8214, "step": 24040 }, { "epoch": 1.5862120137846885, "grad_norm": 0.5910772681236267, "learning_rate": 6.24355061106123e-06, "loss": 0.8017, "step": 24050 }, { "epoch": 1.5868715682556433, "grad_norm": 0.6636596918106079, "learning_rate": 6.224536175867538e-06, "loss": 0.8145, "step": 24060 }, { "epoch": 1.587531122726598, "grad_norm": 0.6654101014137268, "learning_rate": 6.205546620736694e-06, "loss": 0.7943, "step": 24070 }, { "epoch": 1.5881906771975531, "grad_norm": 0.6048585176467896, "learning_rate": 6.186581970832453e-06, "loss": 0.8313, "step": 24080 }, { "epoch": 1.5888502316685078, "grad_norm": 0.6448466777801514, "learning_rate": 6.167642251285593e-06, "loss": 0.7644, "step": 24090 }, { "epoch": 1.589509786139463, "grad_norm": 0.6216910481452942, "learning_rate": 6.148727487193823e-06, "loss": 0.8317, "step": 24100 }, { "epoch": 1.5901693406104176, "grad_norm": 0.6383108496665955, "learning_rate": 6.129837703621818e-06, "loss": 0.8389, "step": 24110 }, { "epoch": 1.5908288950813725, "grad_norm": 0.63627028465271, "learning_rate": 6.110972925601102e-06, "loss": 0.8508, "step": 24120 }, { "epoch": 1.5914884495523274, "grad_norm": 0.593573808670044, "learning_rate": 6.0921331781301106e-06, "loss": 0.8329, "step": 24130 }, { "epoch": 1.5921480040232823, "grad_norm": 0.5346869826316833, "learning_rate": 6.073318486174082e-06, "loss": 0.7971, "step": 24140 }, { "epoch": 1.5928075584942372, "grad_norm": 0.4986163377761841, "learning_rate": 6.054528874665072e-06, "loss": 0.8252, "step": 24150 }, { "epoch": 1.593467112965192, "grad_norm": 0.5987104773521423, "learning_rate": 6.035764368501884e-06, "loss": 0.8334, "step": 24160 }, { "epoch": 1.594126667436147, "grad_norm": 0.6206068396568298, "learning_rate": 6.0170249925500575e-06, "loss": 0.7933, "step": 24170 }, { "epoch": 1.5947862219071016, "grad_norm": 0.5876137018203735, "learning_rate": 5.9983107716418435e-06, "loss": 0.8059, "step": 24180 }, { "epoch": 1.5954457763780567, "grad_norm": 0.581540584564209, "learning_rate": 5.979621730576135e-06, "loss": 0.8066, "step": 24190 }, { "epoch": 1.5961053308490114, "grad_norm": 0.593189001083374, "learning_rate": 5.960957894118499e-06, "loss": 0.8295, "step": 24200 }, { "epoch": 1.5967648853199665, "grad_norm": 0.5631604790687561, "learning_rate": 5.942319287001044e-06, "loss": 0.7807, "step": 24210 }, { "epoch": 1.5974244397909212, "grad_norm": 0.6551171541213989, "learning_rate": 5.923705933922502e-06, "loss": 0.7679, "step": 24220 }, { "epoch": 1.598083994261876, "grad_norm": 0.5785092115402222, "learning_rate": 5.905117859548104e-06, "loss": 0.8023, "step": 24230 }, { "epoch": 1.598743548732831, "grad_norm": 0.5633843541145325, "learning_rate": 5.8865550885096074e-06, "loss": 0.7987, "step": 24240 }, { "epoch": 1.5994031032037859, "grad_norm": 0.6311590075492859, "learning_rate": 5.868017645405221e-06, "loss": 0.7853, "step": 24250 }, { "epoch": 1.6000626576747408, "grad_norm": 0.656356155872345, "learning_rate": 5.849505554799595e-06, "loss": 0.7754, "step": 24260 }, { "epoch": 1.6007222121456954, "grad_norm": 0.7022365927696228, "learning_rate": 5.831018841223795e-06, "loss": 0.8057, "step": 24270 }, { "epoch": 1.6013817666166505, "grad_norm": 0.6113805174827576, "learning_rate": 5.812557529175239e-06, "loss": 0.7862, "step": 24280 }, { "epoch": 1.6020413210876052, "grad_norm": 0.5850211381912231, "learning_rate": 5.794121643117711e-06, "loss": 0.7823, "step": 24290 }, { "epoch": 1.6027008755585603, "grad_norm": 0.5751693248748779, "learning_rate": 5.775711207481277e-06, "loss": 0.8013, "step": 24300 }, { "epoch": 1.603360430029515, "grad_norm": 0.612724781036377, "learning_rate": 5.757326246662281e-06, "loss": 0.8005, "step": 24310 }, { "epoch": 1.60401998450047, "grad_norm": 0.6902143955230713, "learning_rate": 5.738966785023331e-06, "loss": 0.8164, "step": 24320 }, { "epoch": 1.6046795389714248, "grad_norm": 0.5919621586799622, "learning_rate": 5.720632846893217e-06, "loss": 0.7999, "step": 24330 }, { "epoch": 1.6053390934423797, "grad_norm": 0.6050026416778564, "learning_rate": 5.702324456566938e-06, "loss": 0.8025, "step": 24340 }, { "epoch": 1.6059986479133346, "grad_norm": 0.6934067606925964, "learning_rate": 5.684041638305593e-06, "loss": 0.816, "step": 24350 }, { "epoch": 1.6066582023842892, "grad_norm": 0.6082236170768738, "learning_rate": 5.665784416336445e-06, "loss": 0.8474, "step": 24360 }, { "epoch": 1.6073177568552444, "grad_norm": 0.6929888129234314, "learning_rate": 5.6475528148528e-06, "loss": 0.8254, "step": 24370 }, { "epoch": 1.607977311326199, "grad_norm": 0.6141098141670227, "learning_rate": 5.6293468580140395e-06, "loss": 0.8182, "step": 24380 }, { "epoch": 1.6086368657971541, "grad_norm": 0.5887795090675354, "learning_rate": 5.61116656994555e-06, "loss": 0.8084, "step": 24390 }, { "epoch": 1.6092964202681088, "grad_norm": 0.5878868699073792, "learning_rate": 5.593011974738696e-06, "loss": 0.7687, "step": 24400 }, { "epoch": 1.6099559747390637, "grad_norm": 0.6481924653053284, "learning_rate": 5.57488309645082e-06, "loss": 0.841, "step": 24410 }, { "epoch": 1.6106155292100186, "grad_norm": 0.6095759272575378, "learning_rate": 5.556779959105152e-06, "loss": 0.7907, "step": 24420 }, { "epoch": 1.6112750836809735, "grad_norm": 0.5441837310791016, "learning_rate": 5.538702586690853e-06, "loss": 0.8129, "step": 24430 }, { "epoch": 1.6119346381519284, "grad_norm": 0.5181625485420227, "learning_rate": 5.520651003162891e-06, "loss": 0.7661, "step": 24440 }, { "epoch": 1.6125941926228833, "grad_norm": 0.6456611156463623, "learning_rate": 5.50262523244211e-06, "loss": 0.8086, "step": 24450 }, { "epoch": 1.6132537470938382, "grad_norm": 0.5870319604873657, "learning_rate": 5.484625298415114e-06, "loss": 0.8152, "step": 24460 }, { "epoch": 1.6139133015647928, "grad_norm": 0.6919241547584534, "learning_rate": 5.466651224934294e-06, "loss": 0.7845, "step": 24470 }, { "epoch": 1.614572856035748, "grad_norm": 0.6638587117195129, "learning_rate": 5.448703035817756e-06, "loss": 0.8435, "step": 24480 }, { "epoch": 1.6152324105067026, "grad_norm": 0.5628007650375366, "learning_rate": 5.430780754849301e-06, "loss": 0.8099, "step": 24490 }, { "epoch": 1.6158919649776577, "grad_norm": 0.5648647546768188, "learning_rate": 5.412884405778424e-06, "loss": 0.8164, "step": 24500 }, { "epoch": 1.6165515194486124, "grad_norm": 0.6228564977645874, "learning_rate": 5.395014012320232e-06, "loss": 0.811, "step": 24510 }, { "epoch": 1.6172110739195673, "grad_norm": 0.5722883939743042, "learning_rate": 5.377169598155441e-06, "loss": 0.8016, "step": 24520 }, { "epoch": 1.6178706283905222, "grad_norm": 0.5846453309059143, "learning_rate": 5.3593511869303535e-06, "loss": 0.7895, "step": 24530 }, { "epoch": 1.618530182861477, "grad_norm": 0.5563955903053284, "learning_rate": 5.341558802256799e-06, "loss": 0.853, "step": 24540 }, { "epoch": 1.619189737332432, "grad_norm": 0.589167594909668, "learning_rate": 5.323792467712135e-06, "loss": 0.785, "step": 24550 }, { "epoch": 1.6198492918033867, "grad_norm": 0.6283437013626099, "learning_rate": 5.306052206839188e-06, "loss": 0.8389, "step": 24560 }, { "epoch": 1.6205088462743418, "grad_norm": 0.5898224115371704, "learning_rate": 5.288338043146229e-06, "loss": 0.8151, "step": 24570 }, { "epoch": 1.6211684007452964, "grad_norm": 0.6777439117431641, "learning_rate": 5.27065000010695e-06, "loss": 0.818, "step": 24580 }, { "epoch": 1.6218279552162516, "grad_norm": 0.5743446946144104, "learning_rate": 5.252988101160442e-06, "loss": 0.8047, "step": 24590 }, { "epoch": 1.6224875096872062, "grad_norm": 0.6315315365791321, "learning_rate": 5.235352369711141e-06, "loss": 0.8442, "step": 24600 }, { "epoch": 1.6231470641581611, "grad_norm": 0.5853379368782043, "learning_rate": 5.217742829128797e-06, "loss": 0.8024, "step": 24610 }, { "epoch": 1.623806618629116, "grad_norm": 0.6441596746444702, "learning_rate": 5.200159502748478e-06, "loss": 0.7766, "step": 24620 }, { "epoch": 1.624466173100071, "grad_norm": 0.5711107850074768, "learning_rate": 5.182602413870491e-06, "loss": 0.8099, "step": 24630 }, { "epoch": 1.6251257275710258, "grad_norm": 0.5894733667373657, "learning_rate": 5.165071585760398e-06, "loss": 0.8102, "step": 24640 }, { "epoch": 1.6257852820419807, "grad_norm": 0.6057029962539673, "learning_rate": 5.147567041648937e-06, "loss": 0.7781, "step": 24650 }, { "epoch": 1.6264448365129356, "grad_norm": 0.7251249551773071, "learning_rate": 5.130088804732036e-06, "loss": 0.7977, "step": 24660 }, { "epoch": 1.6271043909838903, "grad_norm": 0.6940408945083618, "learning_rate": 5.112636898170742e-06, "loss": 0.8161, "step": 24670 }, { "epoch": 1.6277639454548454, "grad_norm": 0.5553697347640991, "learning_rate": 5.0952113450912355e-06, "loss": 0.7842, "step": 24680 }, { "epoch": 1.6284234999258, "grad_norm": 0.5917607545852661, "learning_rate": 5.077812168584761e-06, "loss": 0.7922, "step": 24690 }, { "epoch": 1.629083054396755, "grad_norm": 0.6597845554351807, "learning_rate": 5.0604393917076055e-06, "loss": 0.8443, "step": 24700 }, { "epoch": 1.6297426088677098, "grad_norm": 0.6031802296638489, "learning_rate": 5.043093037481086e-06, "loss": 0.783, "step": 24710 }, { "epoch": 1.6304021633386647, "grad_norm": 0.5948882699012756, "learning_rate": 5.025773128891492e-06, "loss": 0.8233, "step": 24720 }, { "epoch": 1.6310617178096196, "grad_norm": 0.6422739028930664, "learning_rate": 5.008479688890089e-06, "loss": 0.8248, "step": 24730 }, { "epoch": 1.6317212722805745, "grad_norm": 0.6182518005371094, "learning_rate": 4.991212740393045e-06, "loss": 0.8165, "step": 24740 }, { "epoch": 1.6323808267515294, "grad_norm": 0.6159992814064026, "learning_rate": 4.9739723062814295e-06, "loss": 0.8042, "step": 24750 }, { "epoch": 1.633040381222484, "grad_norm": 0.5987078547477722, "learning_rate": 4.956758409401194e-06, "loss": 0.7775, "step": 24760 }, { "epoch": 1.6336999356934392, "grad_norm": 0.6602343916893005, "learning_rate": 4.939571072563096e-06, "loss": 0.8174, "step": 24770 }, { "epoch": 1.6343594901643939, "grad_norm": 0.6065938472747803, "learning_rate": 4.922410318542733e-06, "loss": 0.7932, "step": 24780 }, { "epoch": 1.635019044635349, "grad_norm": 0.6987325549125671, "learning_rate": 4.905276170080431e-06, "loss": 0.7737, "step": 24790 }, { "epoch": 1.6356785991063036, "grad_norm": 0.5951793193817139, "learning_rate": 4.8881686498813e-06, "loss": 0.7763, "step": 24800 }, { "epoch": 1.6363381535772585, "grad_norm": 0.6437860727310181, "learning_rate": 4.871087780615144e-06, "loss": 0.8006, "step": 24810 }, { "epoch": 1.6369977080482134, "grad_norm": 0.5470296144485474, "learning_rate": 4.854033584916462e-06, "loss": 0.8303, "step": 24820 }, { "epoch": 1.6376572625191683, "grad_norm": 0.5686923265457153, "learning_rate": 4.837006085384399e-06, "loss": 0.8304, "step": 24830 }, { "epoch": 1.6383168169901232, "grad_norm": 0.6447607278823853, "learning_rate": 4.820005304582714e-06, "loss": 0.7821, "step": 24840 }, { "epoch": 1.6389763714610779, "grad_norm": 0.6381845474243164, "learning_rate": 4.80303126503979e-06, "loss": 0.8364, "step": 24850 }, { "epoch": 1.639635925932033, "grad_norm": 0.4987076222896576, "learning_rate": 4.786083989248538e-06, "loss": 0.8175, "step": 24860 }, { "epoch": 1.6402954804029877, "grad_norm": 0.6408962607383728, "learning_rate": 4.769163499666443e-06, "loss": 0.8169, "step": 24870 }, { "epoch": 1.6409550348739428, "grad_norm": 0.5356785655021667, "learning_rate": 4.752269818715446e-06, "loss": 0.7907, "step": 24880 }, { "epoch": 1.6416145893448975, "grad_norm": 0.5934824347496033, "learning_rate": 4.73540296878201e-06, "loss": 0.8481, "step": 24890 }, { "epoch": 1.6422741438158523, "grad_norm": 0.6478430032730103, "learning_rate": 4.718562972217011e-06, "loss": 0.7804, "step": 24900 }, { "epoch": 1.6429336982868072, "grad_norm": 0.5883824229240417, "learning_rate": 4.7017498513357545e-06, "loss": 0.788, "step": 24910 }, { "epoch": 1.6435932527577621, "grad_norm": 0.681681215763092, "learning_rate": 4.684963628417949e-06, "loss": 0.7992, "step": 24920 }, { "epoch": 1.644252807228717, "grad_norm": 0.6568331718444824, "learning_rate": 4.668204325707609e-06, "loss": 0.7977, "step": 24930 }, { "epoch": 1.644912361699672, "grad_norm": 0.5915022492408752, "learning_rate": 4.6514719654131286e-06, "loss": 0.7947, "step": 24940 }, { "epoch": 1.6455719161706268, "grad_norm": 0.619634211063385, "learning_rate": 4.634766569707169e-06, "loss": 0.8562, "step": 24950 }, { "epoch": 1.6462314706415815, "grad_norm": 0.6007986068725586, "learning_rate": 4.618088160726677e-06, "loss": 0.7471, "step": 24960 }, { "epoch": 1.6468910251125366, "grad_norm": 0.6730979681015015, "learning_rate": 4.601436760572831e-06, "loss": 0.8011, "step": 24970 }, { "epoch": 1.6475505795834913, "grad_norm": 0.5380508899688721, "learning_rate": 4.584812391311003e-06, "loss": 0.8208, "step": 24980 }, { "epoch": 1.6482101340544464, "grad_norm": 0.6031770706176758, "learning_rate": 4.568215074970778e-06, "loss": 0.791, "step": 24990 }, { "epoch": 1.648869688525401, "grad_norm": 0.615338146686554, "learning_rate": 4.551644833545868e-06, "loss": 0.8092, "step": 25000 }, { "epoch": 1.649529242996356, "grad_norm": 0.6194599866867065, "learning_rate": 4.5351016889941145e-06, "loss": 0.8308, "step": 25010 }, { "epoch": 1.6501887974673108, "grad_norm": 0.6836225390434265, "learning_rate": 4.518585663237446e-06, "loss": 0.825, "step": 25020 }, { "epoch": 1.6508483519382657, "grad_norm": 0.6452698111534119, "learning_rate": 4.502096778161871e-06, "loss": 0.7912, "step": 25030 }, { "epoch": 1.6515079064092206, "grad_norm": 0.636465311050415, "learning_rate": 4.48563505561741e-06, "loss": 0.8178, "step": 25040 }, { "epoch": 1.6521674608801753, "grad_norm": 0.5928276181221008, "learning_rate": 4.469200517418115e-06, "loss": 0.7823, "step": 25050 }, { "epoch": 1.6528270153511304, "grad_norm": 0.6056523323059082, "learning_rate": 4.452793185341994e-06, "loss": 0.8313, "step": 25060 }, { "epoch": 1.653486569822085, "grad_norm": 0.5615975260734558, "learning_rate": 4.436413081131005e-06, "loss": 0.8348, "step": 25070 }, { "epoch": 1.6541461242930402, "grad_norm": 0.638456404209137, "learning_rate": 4.420060226491041e-06, "loss": 0.8151, "step": 25080 }, { "epoch": 1.6548056787639949, "grad_norm": 0.6386964917182922, "learning_rate": 4.403734643091875e-06, "loss": 0.8095, "step": 25090 }, { "epoch": 1.6554652332349498, "grad_norm": 0.6466909646987915, "learning_rate": 4.38743635256714e-06, "loss": 0.797, "step": 25100 }, { "epoch": 1.6561247877059047, "grad_norm": 0.6175558567047119, "learning_rate": 4.371165376514297e-06, "loss": 0.8249, "step": 25110 }, { "epoch": 1.6567843421768595, "grad_norm": 0.6386156678199768, "learning_rate": 4.35492173649463e-06, "loss": 0.7941, "step": 25120 }, { "epoch": 1.6574438966478144, "grad_norm": 0.6918299198150635, "learning_rate": 4.338705454033182e-06, "loss": 0.8136, "step": 25130 }, { "epoch": 1.6581034511187691, "grad_norm": 0.7022925615310669, "learning_rate": 4.322516550618757e-06, "loss": 0.8031, "step": 25140 }, { "epoch": 1.6587630055897242, "grad_norm": 0.6506940126419067, "learning_rate": 4.306355047703867e-06, "loss": 0.7453, "step": 25150 }, { "epoch": 1.659422560060679, "grad_norm": 0.6400471329689026, "learning_rate": 4.290220966704711e-06, "loss": 0.7688, "step": 25160 }, { "epoch": 1.660082114531634, "grad_norm": 0.6685969829559326, "learning_rate": 4.274114329001172e-06, "loss": 0.7827, "step": 25170 }, { "epoch": 1.6607416690025887, "grad_norm": 0.5573239326477051, "learning_rate": 4.258035155936743e-06, "loss": 0.8142, "step": 25180 }, { "epoch": 1.6614012234735436, "grad_norm": 0.5721292495727539, "learning_rate": 4.24198346881853e-06, "loss": 0.8086, "step": 25190 }, { "epoch": 1.6620607779444985, "grad_norm": 0.6257614493370056, "learning_rate": 4.225959288917228e-06, "loss": 0.8277, "step": 25200 }, { "epoch": 1.6627203324154534, "grad_norm": 0.6866161823272705, "learning_rate": 4.209962637467058e-06, "loss": 0.8228, "step": 25210 }, { "epoch": 1.6633798868864083, "grad_norm": 0.7327253818511963, "learning_rate": 4.193993535665791e-06, "loss": 0.7469, "step": 25220 }, { "epoch": 1.6640394413573631, "grad_norm": 0.6483702063560486, "learning_rate": 4.178052004674665e-06, "loss": 0.7842, "step": 25230 }, { "epoch": 1.664698995828318, "grad_norm": 0.634652316570282, "learning_rate": 4.162138065618401e-06, "loss": 0.8013, "step": 25240 }, { "epoch": 1.6653585502992727, "grad_norm": 0.6225734353065491, "learning_rate": 4.146251739585135e-06, "loss": 0.8043, "step": 25250 }, { "epoch": 1.6660181047702278, "grad_norm": 0.6606937646865845, "learning_rate": 4.130393047626444e-06, "loss": 0.8123, "step": 25260 }, { "epoch": 1.6666776592411825, "grad_norm": 0.6344702243804932, "learning_rate": 4.1145620107572614e-06, "loss": 0.7918, "step": 25270 }, { "epoch": 1.6673372137121376, "grad_norm": 0.6618313789367676, "learning_rate": 4.098758649955872e-06, "loss": 0.8129, "step": 25280 }, { "epoch": 1.6679967681830923, "grad_norm": 0.7696662545204163, "learning_rate": 4.082982986163908e-06, "loss": 0.7915, "step": 25290 }, { "epoch": 1.6686563226540472, "grad_norm": 0.5740074515342712, "learning_rate": 4.067235040286277e-06, "loss": 0.8715, "step": 25300 }, { "epoch": 1.669315877125002, "grad_norm": 0.680923581123352, "learning_rate": 4.051514833191172e-06, "loss": 0.787, "step": 25310 }, { "epoch": 1.669975431595957, "grad_norm": 0.6620702743530273, "learning_rate": 4.0358223857100226e-06, "loss": 0.8038, "step": 25320 }, { "epoch": 1.6706349860669119, "grad_norm": 0.5888247489929199, "learning_rate": 4.020157718637469e-06, "loss": 0.7963, "step": 25330 }, { "epoch": 1.6712945405378665, "grad_norm": 0.5931589603424072, "learning_rate": 4.0045208527313335e-06, "loss": 0.8, "step": 25340 }, { "epoch": 1.6719540950088216, "grad_norm": 0.6023751497268677, "learning_rate": 3.988911808712617e-06, "loss": 0.7914, "step": 25350 }, { "epoch": 1.6726136494797763, "grad_norm": 0.6235506534576416, "learning_rate": 3.973330607265452e-06, "loss": 0.7857, "step": 25360 }, { "epoch": 1.6732732039507314, "grad_norm": 0.5739419460296631, "learning_rate": 3.957777269037042e-06, "loss": 0.7726, "step": 25370 }, { "epoch": 1.673932758421686, "grad_norm": 0.6558060050010681, "learning_rate": 3.942251814637709e-06, "loss": 0.8036, "step": 25380 }, { "epoch": 1.674592312892641, "grad_norm": 0.6836333274841309, "learning_rate": 3.926754264640798e-06, "loss": 0.7994, "step": 25390 }, { "epoch": 1.6752518673635959, "grad_norm": 0.625515341758728, "learning_rate": 3.911284639582699e-06, "loss": 0.7524, "step": 25400 }, { "epoch": 1.6759114218345508, "grad_norm": 0.5542259216308594, "learning_rate": 3.8958429599627755e-06, "loss": 0.8588, "step": 25410 }, { "epoch": 1.6765709763055057, "grad_norm": 0.6386640071868896, "learning_rate": 3.880429246243367e-06, "loss": 0.8087, "step": 25420 }, { "epoch": 1.6772305307764603, "grad_norm": 0.5683929920196533, "learning_rate": 3.865043518849767e-06, "loss": 0.7986, "step": 25430 }, { "epoch": 1.6778900852474155, "grad_norm": 0.6959534883499146, "learning_rate": 3.849685798170163e-06, "loss": 0.8188, "step": 25440 }, { "epoch": 1.6785496397183701, "grad_norm": 0.5607390999794006, "learning_rate": 3.834356104555659e-06, "loss": 0.8532, "step": 25450 }, { "epoch": 1.6792091941893252, "grad_norm": 0.599860668182373, "learning_rate": 3.81905445832017e-06, "loss": 0.8207, "step": 25460 }, { "epoch": 1.67986874866028, "grad_norm": 0.5843977332115173, "learning_rate": 3.8037808797404954e-06, "loss": 0.7927, "step": 25470 }, { "epoch": 1.6805283031312348, "grad_norm": 0.5897167921066284, "learning_rate": 3.788535389056211e-06, "loss": 0.7981, "step": 25480 }, { "epoch": 1.6811878576021897, "grad_norm": 0.6084034442901611, "learning_rate": 3.7733180064696893e-06, "loss": 0.815, "step": 25490 }, { "epoch": 1.6818474120731446, "grad_norm": 0.6056513786315918, "learning_rate": 3.758128752146045e-06, "loss": 0.8144, "step": 25500 }, { "epoch": 1.6825069665440995, "grad_norm": 0.5559295415878296, "learning_rate": 3.7429676462131136e-06, "loss": 0.766, "step": 25510 }, { "epoch": 1.6831665210150544, "grad_norm": 0.5697713494300842, "learning_rate": 3.727834708761452e-06, "loss": 0.7816, "step": 25520 }, { "epoch": 1.6838260754860093, "grad_norm": 0.6037575006484985, "learning_rate": 3.712729959844263e-06, "loss": 0.8125, "step": 25530 }, { "epoch": 1.684485629956964, "grad_norm": 0.5951526761054993, "learning_rate": 3.6976534194774287e-06, "loss": 0.8355, "step": 25540 }, { "epoch": 1.685145184427919, "grad_norm": 0.635144054889679, "learning_rate": 3.682605107639406e-06, "loss": 0.7882, "step": 25550 }, { "epoch": 1.6858047388988737, "grad_norm": 0.6611377000808716, "learning_rate": 3.667585044271288e-06, "loss": 0.7948, "step": 25560 }, { "epoch": 1.6864642933698288, "grad_norm": 0.5945231318473816, "learning_rate": 3.652593249276709e-06, "loss": 0.7924, "step": 25570 }, { "epoch": 1.6871238478407835, "grad_norm": 0.5711615085601807, "learning_rate": 3.6376297425218585e-06, "loss": 0.7651, "step": 25580 }, { "epoch": 1.6877834023117384, "grad_norm": 0.6354359984397888, "learning_rate": 3.622694543835431e-06, "loss": 0.8049, "step": 25590 }, { "epoch": 1.6884429567826933, "grad_norm": 0.6226228475570679, "learning_rate": 3.607787673008606e-06, "loss": 0.7954, "step": 25600 }, { "epoch": 1.6891025112536482, "grad_norm": 0.6653103232383728, "learning_rate": 3.592909149795043e-06, "loss": 0.7887, "step": 25610 }, { "epoch": 1.689762065724603, "grad_norm": 0.6024723649024963, "learning_rate": 3.57805899391081e-06, "loss": 0.7584, "step": 25620 }, { "epoch": 1.6904216201955578, "grad_norm": 0.5744298696517944, "learning_rate": 3.563237225034413e-06, "loss": 0.7976, "step": 25630 }, { "epoch": 1.6910811746665129, "grad_norm": 0.5463761687278748, "learning_rate": 3.548443862806719e-06, "loss": 0.7559, "step": 25640 }, { "epoch": 1.6917407291374675, "grad_norm": 0.6243197321891785, "learning_rate": 3.533678926830955e-06, "loss": 0.7711, "step": 25650 }, { "epoch": 1.6924002836084227, "grad_norm": 0.6674763560295105, "learning_rate": 3.518942436672701e-06, "loss": 0.8286, "step": 25660 }, { "epoch": 1.6930598380793773, "grad_norm": 0.6699982285499573, "learning_rate": 3.5042344118598137e-06, "loss": 0.8206, "step": 25670 }, { "epoch": 1.6937193925503322, "grad_norm": 0.5957103371620178, "learning_rate": 3.4895548718824443e-06, "loss": 0.7995, "step": 25680 }, { "epoch": 1.694378947021287, "grad_norm": 0.7029277682304382, "learning_rate": 3.4749038361929913e-06, "loss": 0.7842, "step": 25690 }, { "epoch": 1.695038501492242, "grad_norm": 0.5681647062301636, "learning_rate": 3.460281324206094e-06, "loss": 0.7992, "step": 25700 }, { "epoch": 1.695698055963197, "grad_norm": 0.7148775458335876, "learning_rate": 3.4456873552985743e-06, "loss": 0.7899, "step": 25710 }, { "epoch": 1.6963576104341518, "grad_norm": 0.5896807312965393, "learning_rate": 3.4311219488094544e-06, "loss": 0.7733, "step": 25720 }, { "epoch": 1.6970171649051067, "grad_norm": 0.5539336204528809, "learning_rate": 3.4165851240398866e-06, "loss": 0.764, "step": 25730 }, { "epoch": 1.6976767193760613, "grad_norm": 0.6221079230308533, "learning_rate": 3.402076900253148e-06, "loss": 0.8234, "step": 25740 }, { "epoch": 1.6983362738470165, "grad_norm": 0.6422823667526245, "learning_rate": 3.387597296674641e-06, "loss": 0.8512, "step": 25750 }, { "epoch": 1.6989958283179711, "grad_norm": 0.6487817168235779, "learning_rate": 3.373146332491814e-06, "loss": 0.7879, "step": 25760 }, { "epoch": 1.699655382788926, "grad_norm": 0.6562302708625793, "learning_rate": 3.3587240268541762e-06, "loss": 0.7887, "step": 25770 }, { "epoch": 1.700314937259881, "grad_norm": 0.5367435812950134, "learning_rate": 3.3443303988732578e-06, "loss": 0.7913, "step": 25780 }, { "epoch": 1.7009744917308358, "grad_norm": 0.6663900017738342, "learning_rate": 3.3299654676225867e-06, "loss": 0.795, "step": 25790 }, { "epoch": 1.7016340462017907, "grad_norm": 0.6704986095428467, "learning_rate": 3.315629252137678e-06, "loss": 0.8316, "step": 25800 }, { "epoch": 1.7022936006727456, "grad_norm": 0.5732877254486084, "learning_rate": 3.3013217714159715e-06, "loss": 0.8099, "step": 25810 }, { "epoch": 1.7029531551437005, "grad_norm": 0.587375819683075, "learning_rate": 3.287043044416843e-06, "loss": 0.7608, "step": 25820 }, { "epoch": 1.7036127096146552, "grad_norm": 0.552054762840271, "learning_rate": 3.2727930900615572e-06, "loss": 0.8175, "step": 25830 }, { "epoch": 1.7042722640856103, "grad_norm": 0.6114928126335144, "learning_rate": 3.2585719272332643e-06, "loss": 0.7723, "step": 25840 }, { "epoch": 1.704931818556565, "grad_norm": 0.6393599510192871, "learning_rate": 3.244379574776954e-06, "loss": 0.7694, "step": 25850 }, { "epoch": 1.70559137302752, "grad_norm": 0.5469321012496948, "learning_rate": 3.2302160514994296e-06, "loss": 0.8425, "step": 25860 }, { "epoch": 1.7062509274984747, "grad_norm": 0.760794460773468, "learning_rate": 3.216081376169311e-06, "loss": 0.8194, "step": 25870 }, { "epoch": 1.7069104819694296, "grad_norm": 0.6197438836097717, "learning_rate": 3.201975567516974e-06, "loss": 0.816, "step": 25880 }, { "epoch": 1.7075700364403845, "grad_norm": 0.6593007445335388, "learning_rate": 3.187898644234552e-06, "loss": 0.8373, "step": 25890 }, { "epoch": 1.7082295909113394, "grad_norm": 0.596043050289154, "learning_rate": 3.1738506249758966e-06, "loss": 0.7892, "step": 25900 }, { "epoch": 1.7088891453822943, "grad_norm": 0.6729983687400818, "learning_rate": 3.1598315283565605e-06, "loss": 0.8246, "step": 25910 }, { "epoch": 1.709548699853249, "grad_norm": 0.592121422290802, "learning_rate": 3.145841372953759e-06, "loss": 0.808, "step": 25920 }, { "epoch": 1.710208254324204, "grad_norm": 0.5267177820205688, "learning_rate": 3.131880177306376e-06, "loss": 0.7904, "step": 25930 }, { "epoch": 1.7108678087951588, "grad_norm": 0.682929277420044, "learning_rate": 3.1179479599149077e-06, "loss": 0.7997, "step": 25940 }, { "epoch": 1.7115273632661139, "grad_norm": 0.6856069564819336, "learning_rate": 3.1040447392414434e-06, "loss": 0.7882, "step": 25950 }, { "epoch": 1.7121869177370685, "grad_norm": 0.5914722681045532, "learning_rate": 3.090170533709666e-06, "loss": 0.8497, "step": 25960 }, { "epoch": 1.7128464722080234, "grad_norm": 0.5459190011024475, "learning_rate": 3.0763253617047904e-06, "loss": 0.8171, "step": 25970 }, { "epoch": 1.7135060266789783, "grad_norm": 0.6196326017379761, "learning_rate": 3.0625092415735775e-06, "loss": 0.7907, "step": 25980 }, { "epoch": 1.7141655811499332, "grad_norm": 0.5496552586555481, "learning_rate": 3.0487221916242733e-06, "loss": 0.7832, "step": 25990 }, { "epoch": 1.7148251356208881, "grad_norm": 0.5956296920776367, "learning_rate": 3.034964230126611e-06, "loss": 0.8034, "step": 26000 }, { "epoch": 1.715484690091843, "grad_norm": 0.5823291540145874, "learning_rate": 3.021235375311768e-06, "loss": 0.7987, "step": 26010 }, { "epoch": 1.716144244562798, "grad_norm": 0.6842591166496277, "learning_rate": 3.0075356453723667e-06, "loss": 0.808, "step": 26020 }, { "epoch": 1.7168037990337526, "grad_norm": 0.7035054564476013, "learning_rate": 2.9938650584624333e-06, "loss": 0.7812, "step": 26030 }, { "epoch": 1.7174633535047077, "grad_norm": 0.5906645059585571, "learning_rate": 2.9802236326973487e-06, "loss": 0.7919, "step": 26040 }, { "epoch": 1.7181229079756624, "grad_norm": 0.5931512117385864, "learning_rate": 2.966611386153892e-06, "loss": 0.8114, "step": 26050 }, { "epoch": 1.7187824624466175, "grad_norm": 0.6326853632926941, "learning_rate": 2.9530283368701394e-06, "loss": 0.7874, "step": 26060 }, { "epoch": 1.7194420169175721, "grad_norm": 0.5746793150901794, "learning_rate": 2.9394745028455045e-06, "loss": 0.786, "step": 26070 }, { "epoch": 1.720101571388527, "grad_norm": 0.6155476570129395, "learning_rate": 2.9259499020406726e-06, "loss": 0.8124, "step": 26080 }, { "epoch": 1.720761125859482, "grad_norm": 0.6925846338272095, "learning_rate": 2.912454552377586e-06, "loss": 0.8228, "step": 26090 }, { "epoch": 1.7214206803304368, "grad_norm": 0.6041793823242188, "learning_rate": 2.8989884717394444e-06, "loss": 0.8044, "step": 26100 }, { "epoch": 1.7220802348013917, "grad_norm": 0.5684294700622559, "learning_rate": 2.8855516779706382e-06, "loss": 0.787, "step": 26110 }, { "epoch": 1.7227397892723464, "grad_norm": 0.5601034164428711, "learning_rate": 2.872144188876777e-06, "loss": 0.8011, "step": 26120 }, { "epoch": 1.7233993437433015, "grad_norm": 0.5804260969161987, "learning_rate": 2.8587660222246016e-06, "loss": 0.8106, "step": 26130 }, { "epoch": 1.7240588982142562, "grad_norm": 0.6315391063690186, "learning_rate": 2.8454171957420285e-06, "loss": 0.829, "step": 26140 }, { "epoch": 1.7247184526852113, "grad_norm": 0.5435738563537598, "learning_rate": 2.832097727118077e-06, "loss": 0.7895, "step": 26150 }, { "epoch": 1.725378007156166, "grad_norm": 0.6043064594268799, "learning_rate": 2.8188076340028735e-06, "loss": 0.7951, "step": 26160 }, { "epoch": 1.7260375616271209, "grad_norm": 0.593038022518158, "learning_rate": 2.8055469340076134e-06, "loss": 0.7747, "step": 26170 }, { "epoch": 1.7266971160980757, "grad_norm": 0.5918024778366089, "learning_rate": 2.7923156447045323e-06, "loss": 0.8139, "step": 26180 }, { "epoch": 1.7273566705690306, "grad_norm": 0.6056832671165466, "learning_rate": 2.779113783626916e-06, "loss": 0.7975, "step": 26190 }, { "epoch": 1.7280162250399855, "grad_norm": 0.6603617668151855, "learning_rate": 2.7659413682690234e-06, "loss": 0.8294, "step": 26200 }, { "epoch": 1.7286757795109402, "grad_norm": 0.6331311464309692, "learning_rate": 2.752798416086133e-06, "loss": 0.7895, "step": 26210 }, { "epoch": 1.7293353339818953, "grad_norm": 0.5651764869689941, "learning_rate": 2.7396849444944307e-06, "loss": 0.8386, "step": 26220 }, { "epoch": 1.72999488845285, "grad_norm": 0.6461435556411743, "learning_rate": 2.7266009708710766e-06, "loss": 0.8364, "step": 26230 }, { "epoch": 1.730654442923805, "grad_norm": 0.565617561340332, "learning_rate": 2.7135465125541354e-06, "loss": 0.8264, "step": 26240 }, { "epoch": 1.7313139973947598, "grad_norm": 0.6211186647415161, "learning_rate": 2.700521586842547e-06, "loss": 0.796, "step": 26250 }, { "epoch": 1.7319735518657147, "grad_norm": 0.6177099943161011, "learning_rate": 2.687526210996125e-06, "loss": 0.85, "step": 26260 }, { "epoch": 1.7326331063366696, "grad_norm": 0.6954526305198669, "learning_rate": 2.6745604022355153e-06, "loss": 0.8372, "step": 26270 }, { "epoch": 1.7332926608076245, "grad_norm": 0.6717360019683838, "learning_rate": 2.661624177742203e-06, "loss": 0.8176, "step": 26280 }, { "epoch": 1.7339522152785793, "grad_norm": 0.5314779281616211, "learning_rate": 2.6487175546584505e-06, "loss": 0.7842, "step": 26290 }, { "epoch": 1.7346117697495342, "grad_norm": 0.6311419606208801, "learning_rate": 2.635840550087307e-06, "loss": 0.8061, "step": 26300 }, { "epoch": 1.7352713242204891, "grad_norm": 0.6443468928337097, "learning_rate": 2.62299318109257e-06, "loss": 0.8335, "step": 26310 }, { "epoch": 1.7359308786914438, "grad_norm": 0.5492504835128784, "learning_rate": 2.610175464698755e-06, "loss": 0.8256, "step": 26320 }, { "epoch": 1.736590433162399, "grad_norm": 0.6499750018119812, "learning_rate": 2.5973874178911035e-06, "loss": 0.792, "step": 26330 }, { "epoch": 1.7372499876333536, "grad_norm": 0.5789830088615417, "learning_rate": 2.5846290576155258e-06, "loss": 0.7956, "step": 26340 }, { "epoch": 1.7379095421043087, "grad_norm": 0.646263062953949, "learning_rate": 2.5719004007785987e-06, "loss": 0.8111, "step": 26350 }, { "epoch": 1.7385690965752634, "grad_norm": 0.5217257738113403, "learning_rate": 2.559201464247532e-06, "loss": 0.8302, "step": 26360 }, { "epoch": 1.7392286510462183, "grad_norm": 0.5562120676040649, "learning_rate": 2.5465322648501655e-06, "loss": 0.82, "step": 26370 }, { "epoch": 1.7398882055171732, "grad_norm": 0.587908148765564, "learning_rate": 2.533892819374914e-06, "loss": 0.773, "step": 26380 }, { "epoch": 1.740547759988128, "grad_norm": 0.6600366234779358, "learning_rate": 2.521283144570788e-06, "loss": 0.7903, "step": 26390 }, { "epoch": 1.741207314459083, "grad_norm": 0.6248031258583069, "learning_rate": 2.5087032571473266e-06, "loss": 0.7825, "step": 26400 }, { "epoch": 1.7418668689300376, "grad_norm": 0.615217924118042, "learning_rate": 2.496153173774604e-06, "loss": 0.8362, "step": 26410 }, { "epoch": 1.7425264234009927, "grad_norm": 0.6468397378921509, "learning_rate": 2.4836329110832086e-06, "loss": 0.7988, "step": 26420 }, { "epoch": 1.7431859778719474, "grad_norm": 0.6291540861129761, "learning_rate": 2.4711424856641964e-06, "loss": 0.8245, "step": 26430 }, { "epoch": 1.7438455323429025, "grad_norm": 0.5626056790351868, "learning_rate": 2.4586819140690943e-06, "loss": 0.7779, "step": 26440 }, { "epoch": 1.7445050868138572, "grad_norm": 0.6262189149856567, "learning_rate": 2.4462512128098717e-06, "loss": 0.7705, "step": 26450 }, { "epoch": 1.745164641284812, "grad_norm": 0.60690838098526, "learning_rate": 2.4338503983589026e-06, "loss": 0.769, "step": 26460 }, { "epoch": 1.745824195755767, "grad_norm": 0.5185971856117249, "learning_rate": 2.421479487148978e-06, "loss": 0.7812, "step": 26470 }, { "epoch": 1.7464837502267219, "grad_norm": 0.6107097268104553, "learning_rate": 2.409138495573246e-06, "loss": 0.7936, "step": 26480 }, { "epoch": 1.7471433046976768, "grad_norm": 0.624666690826416, "learning_rate": 2.396827439985208e-06, "loss": 0.8287, "step": 26490 }, { "epoch": 1.7478028591686314, "grad_norm": 0.573738694190979, "learning_rate": 2.3845463366987007e-06, "loss": 0.8372, "step": 26500 }, { "epoch": 1.7484624136395865, "grad_norm": 0.643282949924469, "learning_rate": 2.372295201987873e-06, "loss": 0.8416, "step": 26510 }, { "epoch": 1.7491219681105412, "grad_norm": 0.6036027073860168, "learning_rate": 2.360074052087155e-06, "loss": 0.7965, "step": 26520 }, { "epoch": 1.7497815225814963, "grad_norm": 0.668479323387146, "learning_rate": 2.3478829031912426e-06, "loss": 0.8128, "step": 26530 }, { "epoch": 1.750441077052451, "grad_norm": 0.560921847820282, "learning_rate": 2.3357217714550848e-06, "loss": 0.7554, "step": 26540 }, { "epoch": 1.751100631523406, "grad_norm": 0.5599755048751831, "learning_rate": 2.3235906729938417e-06, "loss": 0.8108, "step": 26550 }, { "epoch": 1.7517601859943608, "grad_norm": 0.6397618055343628, "learning_rate": 2.3114896238828925e-06, "loss": 0.8349, "step": 26560 }, { "epoch": 1.7524197404653157, "grad_norm": 0.6178655624389648, "learning_rate": 2.2994186401577716e-06, "loss": 0.791, "step": 26570 }, { "epoch": 1.7530792949362706, "grad_norm": 0.7424609661102295, "learning_rate": 2.2873777378141947e-06, "loss": 0.7657, "step": 26580 }, { "epoch": 1.7537388494072255, "grad_norm": 0.5651452541351318, "learning_rate": 2.275366932808004e-06, "loss": 0.7932, "step": 26590 }, { "epoch": 1.7543984038781804, "grad_norm": 0.6681116223335266, "learning_rate": 2.2633862410551683e-06, "loss": 0.8513, "step": 26600 }, { "epoch": 1.755057958349135, "grad_norm": 0.6477376818656921, "learning_rate": 2.251435678431743e-06, "loss": 0.7905, "step": 26610 }, { "epoch": 1.7557175128200901, "grad_norm": 0.5751684308052063, "learning_rate": 2.239515260773853e-06, "loss": 0.831, "step": 26620 }, { "epoch": 1.7563770672910448, "grad_norm": 0.5635110139846802, "learning_rate": 2.227625003877701e-06, "loss": 0.7815, "step": 26630 }, { "epoch": 1.757036621762, "grad_norm": 0.6373135447502136, "learning_rate": 2.2157649234994944e-06, "loss": 0.8066, "step": 26640 }, { "epoch": 1.7576961762329546, "grad_norm": 0.6144300103187561, "learning_rate": 2.2039350353554785e-06, "loss": 0.8136, "step": 26650 }, { "epoch": 1.7583557307039095, "grad_norm": 0.6329615712165833, "learning_rate": 2.1921353551218603e-06, "loss": 0.7747, "step": 26660 }, { "epoch": 1.7590152851748644, "grad_norm": 0.6645671725273132, "learning_rate": 2.1803658984348414e-06, "loss": 0.801, "step": 26670 }, { "epoch": 1.7596748396458193, "grad_norm": 0.6029779314994812, "learning_rate": 2.1686266808905676e-06, "loss": 0.8115, "step": 26680 }, { "epoch": 1.7603343941167742, "grad_norm": 0.7071924209594727, "learning_rate": 2.156917718045104e-06, "loss": 0.7652, "step": 26690 }, { "epoch": 1.7609939485877288, "grad_norm": 0.5901879668235779, "learning_rate": 2.1452390254144446e-06, "loss": 0.8238, "step": 26700 }, { "epoch": 1.761653503058684, "grad_norm": 0.6033055186271667, "learning_rate": 2.1335906184744374e-06, "loss": 0.7974, "step": 26710 }, { "epoch": 1.7623130575296386, "grad_norm": 0.6236467957496643, "learning_rate": 2.121972512660836e-06, "loss": 0.7641, "step": 26720 }, { "epoch": 1.7629726120005937, "grad_norm": 0.6290685534477234, "learning_rate": 2.11038472336921e-06, "loss": 0.7739, "step": 26730 }, { "epoch": 1.7636321664715484, "grad_norm": 0.6045517921447754, "learning_rate": 2.0988272659549808e-06, "loss": 0.8081, "step": 26740 }, { "epoch": 1.7642917209425033, "grad_norm": 0.6901437640190125, "learning_rate": 2.087300155733357e-06, "loss": 0.811, "step": 26750 }, { "epoch": 1.7649512754134582, "grad_norm": 0.5917714238166809, "learning_rate": 2.0758034079793347e-06, "loss": 0.8048, "step": 26760 }, { "epoch": 1.765610829884413, "grad_norm": 0.5725721716880798, "learning_rate": 2.064337037927688e-06, "loss": 0.7878, "step": 26770 }, { "epoch": 1.766270384355368, "grad_norm": 0.6000796556472778, "learning_rate": 2.0529010607729242e-06, "loss": 0.8137, "step": 26780 }, { "epoch": 1.7669299388263227, "grad_norm": 0.6986436247825623, "learning_rate": 2.0414954916692904e-06, "loss": 0.8074, "step": 26790 }, { "epoch": 1.7675894932972778, "grad_norm": 0.5333727598190308, "learning_rate": 2.03012034573071e-06, "loss": 0.7536, "step": 26800 }, { "epoch": 1.7682490477682324, "grad_norm": 0.6346567273139954, "learning_rate": 2.018775638030826e-06, "loss": 0.7852, "step": 26810 }, { "epoch": 1.7689086022391876, "grad_norm": 0.5741221308708191, "learning_rate": 2.0074613836029236e-06, "loss": 0.7825, "step": 26820 }, { "epoch": 1.7695681567101422, "grad_norm": 0.5961235761642456, "learning_rate": 1.9961775974399468e-06, "loss": 0.8353, "step": 26830 }, { "epoch": 1.7702277111810971, "grad_norm": 0.6274139285087585, "learning_rate": 1.984924294494453e-06, "loss": 0.7683, "step": 26840 }, { "epoch": 1.770887265652052, "grad_norm": 0.6172131896018982, "learning_rate": 1.9737014896786115e-06, "loss": 0.8453, "step": 26850 }, { "epoch": 1.771546820123007, "grad_norm": 0.7044884562492371, "learning_rate": 1.962509197864182e-06, "loss": 0.8254, "step": 26860 }, { "epoch": 1.7722063745939618, "grad_norm": 0.5985585451126099, "learning_rate": 1.9513474338824748e-06, "loss": 0.8301, "step": 26870 }, { "epoch": 1.7728659290649167, "grad_norm": 0.6298454403877258, "learning_rate": 1.9402162125243706e-06, "loss": 0.8077, "step": 26880 }, { "epoch": 1.7735254835358716, "grad_norm": 0.6074488162994385, "learning_rate": 1.929115548540253e-06, "loss": 0.7929, "step": 26890 }, { "epoch": 1.7741850380068263, "grad_norm": 0.682905912399292, "learning_rate": 1.9180454566400247e-06, "loss": 0.8093, "step": 26900 }, { "epoch": 1.7748445924777814, "grad_norm": 0.6187024712562561, "learning_rate": 1.9070059514930822e-06, "loss": 0.7925, "step": 26910 }, { "epoch": 1.775504146948736, "grad_norm": 0.7148305773735046, "learning_rate": 1.8959970477282735e-06, "loss": 0.7884, "step": 26920 }, { "epoch": 1.7761637014196912, "grad_norm": 0.6598269939422607, "learning_rate": 1.8850187599339096e-06, "loss": 0.8044, "step": 26930 }, { "epoch": 1.7768232558906458, "grad_norm": 0.6087578535079956, "learning_rate": 1.8740711026577206e-06, "loss": 0.8131, "step": 26940 }, { "epoch": 1.7774828103616007, "grad_norm": 0.6049370169639587, "learning_rate": 1.8631540904068629e-06, "loss": 0.7979, "step": 26950 }, { "epoch": 1.7781423648325556, "grad_norm": 0.5292810201644897, "learning_rate": 1.8522677376478642e-06, "loss": 0.8086, "step": 26960 }, { "epoch": 1.7788019193035105, "grad_norm": 0.5981762409210205, "learning_rate": 1.8414120588066436e-06, "loss": 0.7902, "step": 26970 }, { "epoch": 1.7794614737744654, "grad_norm": 0.6287865042686462, "learning_rate": 1.8305870682684578e-06, "loss": 0.8154, "step": 26980 }, { "epoch": 1.78012102824542, "grad_norm": 0.6655606627464294, "learning_rate": 1.819792780377899e-06, "loss": 0.7849, "step": 26990 }, { "epoch": 1.7807805827163752, "grad_norm": 0.6155992746353149, "learning_rate": 1.809029209438884e-06, "loss": 0.7921, "step": 27000 }, { "epoch": 1.7814401371873299, "grad_norm": 0.7183905243873596, "learning_rate": 1.798296369714622e-06, "loss": 0.8068, "step": 27010 }, { "epoch": 1.782099691658285, "grad_norm": 0.6222487092018127, "learning_rate": 1.7875942754275898e-06, "loss": 0.8344, "step": 27020 }, { "epoch": 1.7827592461292396, "grad_norm": 0.5872424244880676, "learning_rate": 1.776922940759529e-06, "loss": 0.8019, "step": 27030 }, { "epoch": 1.7834188006001945, "grad_norm": 0.48592233657836914, "learning_rate": 1.7662823798514249e-06, "loss": 0.8084, "step": 27040 }, { "epoch": 1.7840783550711494, "grad_norm": 0.6957306265830994, "learning_rate": 1.7556726068034795e-06, "loss": 0.7976, "step": 27050 }, { "epoch": 1.7847379095421043, "grad_norm": 0.5849895477294922, "learning_rate": 1.7450936356750907e-06, "loss": 0.7798, "step": 27060 }, { "epoch": 1.7853974640130592, "grad_norm": 0.6629789471626282, "learning_rate": 1.7345454804848504e-06, "loss": 0.7876, "step": 27070 }, { "epoch": 1.786057018484014, "grad_norm": 0.7732569575309753, "learning_rate": 1.7240281552105081e-06, "loss": 0.7914, "step": 27080 }, { "epoch": 1.786716572954969, "grad_norm": 0.6583324670791626, "learning_rate": 1.7135416737889625e-06, "loss": 0.8115, "step": 27090 }, { "epoch": 1.7873761274259237, "grad_norm": 0.6596052050590515, "learning_rate": 1.7030860501162371e-06, "loss": 0.8074, "step": 27100 }, { "epoch": 1.7880356818968788, "grad_norm": 0.5933216214179993, "learning_rate": 1.6926612980474655e-06, "loss": 0.8023, "step": 27110 }, { "epoch": 1.7886952363678335, "grad_norm": 0.6136610507965088, "learning_rate": 1.682267431396875e-06, "loss": 0.795, "step": 27120 }, { "epoch": 1.7893547908387883, "grad_norm": 0.6610358953475952, "learning_rate": 1.6719044639377595e-06, "loss": 0.7924, "step": 27130 }, { "epoch": 1.7900143453097432, "grad_norm": 0.607771098613739, "learning_rate": 1.6615724094024843e-06, "loss": 0.7755, "step": 27140 }, { "epoch": 1.7906738997806981, "grad_norm": 0.6132540106773376, "learning_rate": 1.6512712814824167e-06, "loss": 0.7878, "step": 27150 }, { "epoch": 1.791333454251653, "grad_norm": 0.6049851775169373, "learning_rate": 1.6410010938279818e-06, "loss": 0.7968, "step": 27160 }, { "epoch": 1.791993008722608, "grad_norm": 0.5582095980644226, "learning_rate": 1.6307618600485712e-06, "loss": 0.7967, "step": 27170 }, { "epoch": 1.7926525631935628, "grad_norm": 0.5795011520385742, "learning_rate": 1.6205535937125844e-06, "loss": 0.7944, "step": 27180 }, { "epoch": 1.7933121176645175, "grad_norm": 0.6117453575134277, "learning_rate": 1.6103763083473694e-06, "loss": 0.7785, "step": 27190 }, { "epoch": 1.7939716721354726, "grad_norm": 0.6477991938591003, "learning_rate": 1.6002300174392193e-06, "loss": 0.8261, "step": 27200 }, { "epoch": 1.7946312266064273, "grad_norm": 0.6463562846183777, "learning_rate": 1.5901147344333705e-06, "loss": 0.7661, "step": 27210 }, { "epoch": 1.7952907810773824, "grad_norm": 0.587986409664154, "learning_rate": 1.5800304727339482e-06, "loss": 0.8229, "step": 27220 }, { "epoch": 1.795950335548337, "grad_norm": 0.56048184633255, "learning_rate": 1.5699772457039936e-06, "loss": 0.8257, "step": 27230 }, { "epoch": 1.796609890019292, "grad_norm": 0.5849249958992004, "learning_rate": 1.5599550666653945e-06, "loss": 0.8239, "step": 27240 }, { "epoch": 1.7972694444902468, "grad_norm": 0.5611611008644104, "learning_rate": 1.5499639488989248e-06, "loss": 0.8773, "step": 27250 }, { "epoch": 1.7979289989612017, "grad_norm": 0.6698682904243469, "learning_rate": 1.5400039056441772e-06, "loss": 0.8124, "step": 27260 }, { "epoch": 1.7985885534321566, "grad_norm": 0.6456633806228638, "learning_rate": 1.530074950099583e-06, "loss": 0.7882, "step": 27270 }, { "epoch": 1.7992481079031113, "grad_norm": 0.642745852470398, "learning_rate": 1.520177095422362e-06, "loss": 0.8279, "step": 27280 }, { "epoch": 1.7999076623740664, "grad_norm": 0.5709918141365051, "learning_rate": 1.5103103547285252e-06, "loss": 0.7625, "step": 27290 }, { "epoch": 1.800567216845021, "grad_norm": 0.5563238859176636, "learning_rate": 1.5004747410928639e-06, "loss": 0.7995, "step": 27300 }, { "epoch": 1.8012267713159762, "grad_norm": 0.6466971039772034, "learning_rate": 1.4906702675489076e-06, "loss": 0.81, "step": 27310 }, { "epoch": 1.8018863257869309, "grad_norm": 0.6266707181930542, "learning_rate": 1.480896947088936e-06, "loss": 0.7501, "step": 27320 }, { "epoch": 1.8025458802578858, "grad_norm": 0.5325832962989807, "learning_rate": 1.4711547926639342e-06, "loss": 0.7601, "step": 27330 }, { "epoch": 1.8032054347288407, "grad_norm": 0.5419653654098511, "learning_rate": 1.4614438171835858e-06, "loss": 0.8051, "step": 27340 }, { "epoch": 1.8038649891997955, "grad_norm": 0.5886746048927307, "learning_rate": 1.451764033516273e-06, "loss": 0.8162, "step": 27350 }, { "epoch": 1.8045245436707504, "grad_norm": 0.6023467779159546, "learning_rate": 1.4421154544890293e-06, "loss": 0.7919, "step": 27360 }, { "epoch": 1.8051840981417053, "grad_norm": 0.6059507727622986, "learning_rate": 1.4324980928875547e-06, "loss": 0.7967, "step": 27370 }, { "epoch": 1.8058436526126602, "grad_norm": 0.5828139185905457, "learning_rate": 1.4229119614561555e-06, "loss": 0.8134, "step": 27380 }, { "epoch": 1.806503207083615, "grad_norm": 0.6271647810935974, "learning_rate": 1.4133570728977836e-06, "loss": 0.7957, "step": 27390 }, { "epoch": 1.80716276155457, "grad_norm": 0.5952603220939636, "learning_rate": 1.4038334398739666e-06, "loss": 0.7744, "step": 27400 }, { "epoch": 1.8078223160255247, "grad_norm": 0.5841818451881409, "learning_rate": 1.3943410750048291e-06, "loss": 0.8163, "step": 27410 }, { "epoch": 1.8084818704964798, "grad_norm": 0.583544135093689, "learning_rate": 1.3848799908690563e-06, "loss": 0.7842, "step": 27420 }, { "epoch": 1.8091414249674345, "grad_norm": 0.6535754203796387, "learning_rate": 1.375450200003875e-06, "loss": 0.8113, "step": 27430 }, { "epoch": 1.8098009794383894, "grad_norm": 0.5610626935958862, "learning_rate": 1.366051714905056e-06, "loss": 0.7657, "step": 27440 }, { "epoch": 1.8104605339093442, "grad_norm": 0.5700557827949524, "learning_rate": 1.3566845480268765e-06, "loss": 0.7881, "step": 27450 }, { "epoch": 1.8111200883802991, "grad_norm": 0.5087742805480957, "learning_rate": 1.3473487117821259e-06, "loss": 0.7937, "step": 27460 }, { "epoch": 1.811779642851254, "grad_norm": 0.5830112099647522, "learning_rate": 1.3380442185420505e-06, "loss": 0.8397, "step": 27470 }, { "epoch": 1.8124391973222087, "grad_norm": 0.5971534848213196, "learning_rate": 1.3287710806363958e-06, "loss": 0.8171, "step": 27480 }, { "epoch": 1.8130987517931638, "grad_norm": 0.6187090277671814, "learning_rate": 1.3195293103533263e-06, "loss": 0.7883, "step": 27490 }, { "epoch": 1.8137583062641185, "grad_norm": 0.5818140506744385, "learning_rate": 1.3103189199394628e-06, "loss": 0.8735, "step": 27500 }, { "epoch": 1.8144178607350736, "grad_norm": 0.6792356967926025, "learning_rate": 1.3011399215998338e-06, "loss": 0.7959, "step": 27510 }, { "epoch": 1.8150774152060283, "grad_norm": 0.5853057503700256, "learning_rate": 1.291992327497865e-06, "loss": 0.8166, "step": 27520 }, { "epoch": 1.8157369696769832, "grad_norm": 0.528980553150177, "learning_rate": 1.2828761497553825e-06, "loss": 0.7907, "step": 27530 }, { "epoch": 1.816396524147938, "grad_norm": 0.6840847730636597, "learning_rate": 1.273791400452562e-06, "loss": 0.7674, "step": 27540 }, { "epoch": 1.817056078618893, "grad_norm": 0.6091808676719666, "learning_rate": 1.2647380916279427e-06, "loss": 0.8216, "step": 27550 }, { "epoch": 1.8177156330898478, "grad_norm": 0.6639935374259949, "learning_rate": 1.2557162352784064e-06, "loss": 0.7533, "step": 27560 }, { "epoch": 1.8183751875608025, "grad_norm": 0.6252728700637817, "learning_rate": 1.2467258433591395e-06, "loss": 0.8153, "step": 27570 }, { "epoch": 1.8190347420317576, "grad_norm": 0.6612904071807861, "learning_rate": 1.2377669277836512e-06, "loss": 0.8006, "step": 27580 }, { "epoch": 1.8196942965027123, "grad_norm": 0.6393854022026062, "learning_rate": 1.2288395004237286e-06, "loss": 0.8181, "step": 27590 }, { "epoch": 1.8203538509736674, "grad_norm": 0.6946678757667542, "learning_rate": 1.219943573109439e-06, "loss": 0.8324, "step": 27600 }, { "epoch": 1.821013405444622, "grad_norm": 0.5817633867263794, "learning_rate": 1.2110791576290997e-06, "loss": 0.8268, "step": 27610 }, { "epoch": 1.821672959915577, "grad_norm": 0.4907081127166748, "learning_rate": 1.2022462657292843e-06, "loss": 0.7987, "step": 27620 }, { "epoch": 1.8223325143865319, "grad_norm": 0.6870168447494507, "learning_rate": 1.1934449091147825e-06, "loss": 0.8311, "step": 27630 }, { "epoch": 1.8229920688574868, "grad_norm": 0.6277094483375549, "learning_rate": 1.18467509944859e-06, "loss": 0.8072, "step": 27640 }, { "epoch": 1.8236516233284417, "grad_norm": 0.6111871004104614, "learning_rate": 1.175936848351919e-06, "loss": 0.8094, "step": 27650 }, { "epoch": 1.8243111777993966, "grad_norm": 0.6213318705558777, "learning_rate": 1.1672301674041403e-06, "loss": 0.7951, "step": 27660 }, { "epoch": 1.8249707322703514, "grad_norm": 0.5851269960403442, "learning_rate": 1.1585550681428054e-06, "loss": 0.7909, "step": 27670 }, { "epoch": 1.8256302867413061, "grad_norm": 0.6120691895484924, "learning_rate": 1.1499115620636103e-06, "loss": 0.7837, "step": 27680 }, { "epoch": 1.8262898412122612, "grad_norm": 0.6957132816314697, "learning_rate": 1.1412996606203818e-06, "loss": 0.8056, "step": 27690 }, { "epoch": 1.826949395683216, "grad_norm": 0.6566064953804016, "learning_rate": 1.1327193752250665e-06, "loss": 0.8448, "step": 27700 }, { "epoch": 1.827608950154171, "grad_norm": 0.6958504319190979, "learning_rate": 1.1241707172477245e-06, "loss": 0.818, "step": 27710 }, { "epoch": 1.8282685046251257, "grad_norm": 0.6409597396850586, "learning_rate": 1.1156536980164972e-06, "loss": 0.8035, "step": 27720 }, { "epoch": 1.8289280590960806, "grad_norm": 0.5831571817398071, "learning_rate": 1.107168328817601e-06, "loss": 0.8198, "step": 27730 }, { "epoch": 1.8295876135670355, "grad_norm": 0.6030648946762085, "learning_rate": 1.0987146208953164e-06, "loss": 0.7906, "step": 27740 }, { "epoch": 1.8302471680379904, "grad_norm": 0.6270460486412048, "learning_rate": 1.0902925854519603e-06, "loss": 0.816, "step": 27750 }, { "epoch": 1.8309067225089453, "grad_norm": 0.6238800287246704, "learning_rate": 1.0819022336478884e-06, "loss": 0.8364, "step": 27760 }, { "epoch": 1.8315662769799, "grad_norm": 0.7053459882736206, "learning_rate": 1.0735435766014657e-06, "loss": 0.8146, "step": 27770 }, { "epoch": 1.832225831450855, "grad_norm": 0.6827749609947205, "learning_rate": 1.0652166253890567e-06, "loss": 0.7585, "step": 27780 }, { "epoch": 1.8328853859218097, "grad_norm": 0.6241556406021118, "learning_rate": 1.0569213910450153e-06, "loss": 0.7673, "step": 27790 }, { "epoch": 1.8335449403927648, "grad_norm": 0.6750614643096924, "learning_rate": 1.04865788456166e-06, "loss": 0.8058, "step": 27800 }, { "epoch": 1.8342044948637195, "grad_norm": 0.6766597628593445, "learning_rate": 1.0404261168892786e-06, "loss": 0.8014, "step": 27810 }, { "epoch": 1.8348640493346744, "grad_norm": 0.6603577733039856, "learning_rate": 1.0322260989360788e-06, "loss": 0.7803, "step": 27820 }, { "epoch": 1.8355236038056293, "grad_norm": 0.5669465661048889, "learning_rate": 1.0240578415682157e-06, "loss": 0.7876, "step": 27830 }, { "epoch": 1.8361831582765842, "grad_norm": 0.6441722512245178, "learning_rate": 1.0159213556097474e-06, "loss": 0.7779, "step": 27840 }, { "epoch": 1.836842712747539, "grad_norm": 0.5952627658843994, "learning_rate": 1.0078166518426356e-06, "loss": 0.8614, "step": 27850 }, { "epoch": 1.8375022672184937, "grad_norm": 0.6190049052238464, "learning_rate": 9.997437410067228e-07, "loss": 0.7864, "step": 27860 }, { "epoch": 1.8381618216894489, "grad_norm": 0.6481174230575562, "learning_rate": 9.91702633799721e-07, "loss": 0.7902, "step": 27870 }, { "epoch": 1.8388213761604035, "grad_norm": 0.5373259782791138, "learning_rate": 9.836933408772009e-07, "loss": 0.7392, "step": 27880 }, { "epoch": 1.8394809306313586, "grad_norm": 0.600075364112854, "learning_rate": 9.757158728525673e-07, "loss": 0.7742, "step": 27890 }, { "epoch": 1.8401404851023133, "grad_norm": 0.5878193974494934, "learning_rate": 9.677702402970724e-07, "loss": 0.7823, "step": 27900 }, { "epoch": 1.8408000395732682, "grad_norm": 0.5557110905647278, "learning_rate": 9.598564537397548e-07, "loss": 0.8337, "step": 27910 }, { "epoch": 1.841459594044223, "grad_norm": 0.6779967546463013, "learning_rate": 9.519745236674704e-07, "loss": 0.7992, "step": 27920 }, { "epoch": 1.842119148515178, "grad_norm": 0.5710042119026184, "learning_rate": 9.441244605248561e-07, "loss": 0.7914, "step": 27930 }, { "epoch": 1.8427787029861329, "grad_norm": 0.5980215668678284, "learning_rate": 9.363062747143242e-07, "loss": 0.8313, "step": 27940 }, { "epoch": 1.8434382574570878, "grad_norm": 0.6596368551254272, "learning_rate": 9.285199765960456e-07, "loss": 0.7727, "step": 27950 }, { "epoch": 1.8440978119280427, "grad_norm": 0.6728708744049072, "learning_rate": 9.207655764879225e-07, "loss": 0.8028, "step": 27960 }, { "epoch": 1.8447573663989973, "grad_norm": 0.5726572871208191, "learning_rate": 9.130430846656074e-07, "loss": 0.7922, "step": 27970 }, { "epoch": 1.8454169208699525, "grad_norm": 0.5376827716827393, "learning_rate": 9.053525113624534e-07, "loss": 0.7349, "step": 27980 }, { "epoch": 1.8460764753409071, "grad_norm": 0.5951704978942871, "learning_rate": 8.976938667695278e-07, "loss": 0.7845, "step": 27990 }, { "epoch": 1.8467360298118622, "grad_norm": 0.5681121349334717, "learning_rate": 8.900671610355876e-07, "loss": 0.7981, "step": 28000 }, { "epoch": 1.847395584282817, "grad_norm": 0.5649076104164124, "learning_rate": 8.82472404267054e-07, "loss": 0.7924, "step": 28010 }, { "epoch": 1.8480551387537718, "grad_norm": 0.6413310766220093, "learning_rate": 8.749096065280266e-07, "loss": 0.7917, "step": 28020 }, { "epoch": 1.8487146932247267, "grad_norm": 0.6765559315681458, "learning_rate": 8.673787778402498e-07, "loss": 0.8659, "step": 28030 }, { "epoch": 1.8493742476956816, "grad_norm": 0.6600339412689209, "learning_rate": 8.59879928183105e-07, "loss": 0.8127, "step": 28040 }, { "epoch": 1.8500338021666365, "grad_norm": 0.569463312625885, "learning_rate": 8.524130674935876e-07, "loss": 0.808, "step": 28050 }, { "epoch": 1.8506933566375912, "grad_norm": 0.61843341588974, "learning_rate": 8.449782056663191e-07, "loss": 0.8086, "step": 28060 }, { "epoch": 1.8513529111085463, "grad_norm": 0.5859857201576233, "learning_rate": 8.375753525535046e-07, "loss": 0.8057, "step": 28070 }, { "epoch": 1.852012465579501, "grad_norm": 0.5519652366638184, "learning_rate": 8.30204517964947e-07, "loss": 0.7882, "step": 28080 }, { "epoch": 1.852672020050456, "grad_norm": 0.6738079190254211, "learning_rate": 8.228657116680056e-07, "loss": 0.8171, "step": 28090 }, { "epoch": 1.8533315745214107, "grad_norm": 0.6553736329078674, "learning_rate": 8.155589433876065e-07, "loss": 0.793, "step": 28100 }, { "epoch": 1.8539911289923656, "grad_norm": 0.6549698710441589, "learning_rate": 8.082842228062215e-07, "loss": 0.815, "step": 28110 }, { "epoch": 1.8546506834633205, "grad_norm": 0.5773028135299683, "learning_rate": 8.010415595638476e-07, "loss": 0.7916, "step": 28120 }, { "epoch": 1.8553102379342754, "grad_norm": 0.6732603311538696, "learning_rate": 7.938309632580132e-07, "loss": 0.8104, "step": 28130 }, { "epoch": 1.8559697924052303, "grad_norm": 0.5733674168586731, "learning_rate": 7.866524434437361e-07, "loss": 0.801, "step": 28140 }, { "epoch": 1.856629346876185, "grad_norm": 0.6804933547973633, "learning_rate": 7.795060096335488e-07, "loss": 0.809, "step": 28150 }, { "epoch": 1.85728890134714, "grad_norm": 0.7227495312690735, "learning_rate": 7.72391671297451e-07, "loss": 0.8407, "step": 28160 }, { "epoch": 1.8579484558180948, "grad_norm": 0.5979621410369873, "learning_rate": 7.653094378629183e-07, "loss": 0.7738, "step": 28170 }, { "epoch": 1.8586080102890499, "grad_norm": 0.5470470786094666, "learning_rate": 7.582593187148795e-07, "loss": 0.771, "step": 28180 }, { "epoch": 1.8592675647600045, "grad_norm": 0.647771954536438, "learning_rate": 7.512413231957061e-07, "loss": 0.8138, "step": 28190 }, { "epoch": 1.8599271192309594, "grad_norm": 0.6679745316505432, "learning_rate": 7.442554606052088e-07, "loss": 0.7772, "step": 28200 }, { "epoch": 1.8605866737019143, "grad_norm": 0.6812871098518372, "learning_rate": 7.373017402006133e-07, "loss": 0.8325, "step": 28210 }, { "epoch": 1.8612462281728692, "grad_norm": 0.5566513538360596, "learning_rate": 7.30380171196543e-07, "loss": 0.7712, "step": 28220 }, { "epoch": 1.8619057826438241, "grad_norm": 0.6013004779815674, "learning_rate": 7.234907627650361e-07, "loss": 0.8472, "step": 28230 }, { "epoch": 1.862565337114779, "grad_norm": 0.577357292175293, "learning_rate": 7.166335240354982e-07, "loss": 0.7835, "step": 28240 }, { "epoch": 1.863224891585734, "grad_norm": 0.6533622741699219, "learning_rate": 7.098084640947105e-07, "loss": 0.8029, "step": 28250 }, { "epoch": 1.8638844460566886, "grad_norm": 0.6401820778846741, "learning_rate": 7.030155919868165e-07, "loss": 0.7857, "step": 28260 }, { "epoch": 1.8645440005276437, "grad_norm": 0.5723109245300293, "learning_rate": 6.962549167132987e-07, "loss": 0.7685, "step": 28270 }, { "epoch": 1.8652035549985984, "grad_norm": 0.5786372423171997, "learning_rate": 6.895264472329771e-07, "loss": 0.7816, "step": 28280 }, { "epoch": 1.8658631094695535, "grad_norm": 0.6384173631668091, "learning_rate": 6.828301924620001e-07, "loss": 0.7928, "step": 28290 }, { "epoch": 1.8665226639405081, "grad_norm": 0.631347119808197, "learning_rate": 6.761661612738224e-07, "loss": 0.802, "step": 28300 }, { "epoch": 1.867182218411463, "grad_norm": 0.744253933429718, "learning_rate": 6.69534362499194e-07, "loss": 0.7876, "step": 28310 }, { "epoch": 1.867841772882418, "grad_norm": 0.6918014883995056, "learning_rate": 6.62934804926163e-07, "loss": 0.7935, "step": 28320 }, { "epoch": 1.8685013273533728, "grad_norm": 0.604925811290741, "learning_rate": 6.563674973000422e-07, "loss": 0.7913, "step": 28330 }, { "epoch": 1.8691608818243277, "grad_norm": 0.585300624370575, "learning_rate": 6.498324483234231e-07, "loss": 0.8186, "step": 28340 }, { "epoch": 1.8698204362952824, "grad_norm": 0.6663399934768677, "learning_rate": 6.433296666561339e-07, "loss": 0.8498, "step": 28350 }, { "epoch": 1.8704799907662375, "grad_norm": 0.5360284447669983, "learning_rate": 6.368591609152514e-07, "loss": 0.7987, "step": 28360 }, { "epoch": 1.8711395452371922, "grad_norm": 0.6080397367477417, "learning_rate": 6.30420939675086e-07, "loss": 0.8008, "step": 28370 }, { "epoch": 1.8717990997081473, "grad_norm": 0.5916748046875, "learning_rate": 6.240150114671578e-07, "loss": 0.7896, "step": 28380 }, { "epoch": 1.872458654179102, "grad_norm": 0.6351580023765564, "learning_rate": 6.176413847802098e-07, "loss": 0.8017, "step": 28390 }, { "epoch": 1.8731182086500568, "grad_norm": 0.6313516497612, "learning_rate": 6.113000680601611e-07, "loss": 0.8033, "step": 28400 }, { "epoch": 1.8737777631210117, "grad_norm": 0.5805239677429199, "learning_rate": 6.049910697101313e-07, "loss": 0.7932, "step": 28410 }, { "epoch": 1.8744373175919666, "grad_norm": 0.5967887043952942, "learning_rate": 5.987143980904026e-07, "loss": 0.833, "step": 28420 }, { "epoch": 1.8750968720629215, "grad_norm": 0.5833791494369507, "learning_rate": 5.924700615184298e-07, "loss": 0.7703, "step": 28430 }, { "epoch": 1.8757564265338764, "grad_norm": 0.6744887232780457, "learning_rate": 5.862580682688135e-07, "loss": 0.8229, "step": 28440 }, { "epoch": 1.8764159810048313, "grad_norm": 0.5459935069084167, "learning_rate": 5.80078426573294e-07, "loss": 0.795, "step": 28450 }, { "epoch": 1.877075535475786, "grad_norm": 0.6006172299385071, "learning_rate": 5.739311446207457e-07, "loss": 0.7936, "step": 28460 }, { "epoch": 1.877735089946741, "grad_norm": 0.6047661304473877, "learning_rate": 5.678162305571583e-07, "loss": 0.8336, "step": 28470 }, { "epoch": 1.8783946444176958, "grad_norm": 0.5724004507064819, "learning_rate": 5.61733692485636e-07, "loss": 0.7871, "step": 28480 }, { "epoch": 1.8790541988886507, "grad_norm": 0.658601701259613, "learning_rate": 5.556835384663673e-07, "loss": 0.7798, "step": 28490 }, { "epoch": 1.8797137533596056, "grad_norm": 0.6241977214813232, "learning_rate": 5.496657765166419e-07, "loss": 0.7971, "step": 28500 }, { "epoch": 1.8803733078305604, "grad_norm": 0.633460521697998, "learning_rate": 5.436804146108143e-07, "loss": 0.8022, "step": 28510 }, { "epoch": 1.8810328623015153, "grad_norm": 0.6179897785186768, "learning_rate": 5.377274606803123e-07, "loss": 0.8115, "step": 28520 }, { "epoch": 1.8816924167724702, "grad_norm": 0.5969645977020264, "learning_rate": 5.318069226136174e-07, "loss": 0.8075, "step": 28530 }, { "epoch": 1.8823519712434251, "grad_norm": 0.5832700133323669, "learning_rate": 5.259188082562483e-07, "loss": 0.8137, "step": 28540 }, { "epoch": 1.8830115257143798, "grad_norm": 0.6285104751586914, "learning_rate": 5.20063125410769e-07, "loss": 0.7557, "step": 28550 }, { "epoch": 1.883671080185335, "grad_norm": 0.5918856859207153, "learning_rate": 5.142398818367589e-07, "loss": 0.7935, "step": 28560 }, { "epoch": 1.8843306346562896, "grad_norm": 0.6324704885482788, "learning_rate": 5.084490852508172e-07, "loss": 0.7444, "step": 28570 }, { "epoch": 1.8849901891272447, "grad_norm": 0.5948597192764282, "learning_rate": 5.026907433265393e-07, "loss": 0.8408, "step": 28580 }, { "epoch": 1.8856497435981994, "grad_norm": 0.5209887623786926, "learning_rate": 4.969648636945184e-07, "loss": 0.8001, "step": 28590 }, { "epoch": 1.8863092980691543, "grad_norm": 0.6631389856338501, "learning_rate": 4.912714539423296e-07, "loss": 0.7824, "step": 28600 }, { "epoch": 1.8869688525401092, "grad_norm": 0.5967811942100525, "learning_rate": 4.856105216145212e-07, "loss": 0.7951, "step": 28610 }, { "epoch": 1.887628407011064, "grad_norm": 0.5898823738098145, "learning_rate": 4.799820742126066e-07, "loss": 0.7931, "step": 28620 }, { "epoch": 1.888287961482019, "grad_norm": 0.717066764831543, "learning_rate": 4.743861191950472e-07, "loss": 0.8245, "step": 28630 }, { "epoch": 1.8889475159529736, "grad_norm": 0.585137665271759, "learning_rate": 4.6882266397725305e-07, "loss": 0.8382, "step": 28640 }, { "epoch": 1.8896070704239287, "grad_norm": 0.6448143720626831, "learning_rate": 4.632917159315603e-07, "loss": 0.7635, "step": 28650 }, { "epoch": 1.8902666248948834, "grad_norm": 0.6555895209312439, "learning_rate": 4.577932823872394e-07, "loss": 0.8097, "step": 28660 }, { "epoch": 1.8909261793658385, "grad_norm": 0.5938135981559753, "learning_rate": 4.523273706304648e-07, "loss": 0.8214, "step": 28670 }, { "epoch": 1.8915857338367932, "grad_norm": 0.6311590075492859, "learning_rate": 4.4689398790431494e-07, "loss": 0.7846, "step": 28680 }, { "epoch": 1.892245288307748, "grad_norm": 0.6728568077087402, "learning_rate": 4.4149314140877474e-07, "loss": 0.7662, "step": 28690 }, { "epoch": 1.892904842778703, "grad_norm": 0.5750419497489929, "learning_rate": 4.361248383007027e-07, "loss": 0.7597, "step": 28700 }, { "epoch": 1.8935643972496579, "grad_norm": 0.6412707567214966, "learning_rate": 4.3078908569383346e-07, "loss": 0.7943, "step": 28710 }, { "epoch": 1.8942239517206128, "grad_norm": 0.6414803266525269, "learning_rate": 4.2548589065876945e-07, "loss": 0.8001, "step": 28720 }, { "epoch": 1.8948835061915676, "grad_norm": 0.6530488133430481, "learning_rate": 4.202152602229781e-07, "loss": 0.8507, "step": 28730 }, { "epoch": 1.8955430606625225, "grad_norm": 0.679801344871521, "learning_rate": 4.1497720137076145e-07, "loss": 0.7956, "step": 28740 }, { "epoch": 1.8962026151334772, "grad_norm": 0.5268911719322205, "learning_rate": 4.0977172104326723e-07, "loss": 0.7891, "step": 28750 }, { "epoch": 1.8968621696044323, "grad_norm": 0.5881314277648926, "learning_rate": 4.04598826138472e-07, "loss": 0.7669, "step": 28760 }, { "epoch": 1.897521724075387, "grad_norm": 0.6459960341453552, "learning_rate": 3.9945852351116474e-07, "loss": 0.8379, "step": 28770 }, { "epoch": 1.898181278546342, "grad_norm": 0.5741415023803711, "learning_rate": 3.943508199729579e-07, "loss": 0.8246, "step": 28780 }, { "epoch": 1.8988408330172968, "grad_norm": 0.6332618594169617, "learning_rate": 3.8927572229225384e-07, "loss": 0.7727, "step": 28790 }, { "epoch": 1.8995003874882517, "grad_norm": 0.5780948996543884, "learning_rate": 3.8423323719425365e-07, "loss": 0.7893, "step": 28800 }, { "epoch": 1.9001599419592066, "grad_norm": 0.5841256976127625, "learning_rate": 3.792233713609428e-07, "loss": 0.779, "step": 28810 }, { "epoch": 1.9008194964301615, "grad_norm": 0.5591938495635986, "learning_rate": 3.7424613143107746e-07, "loss": 0.7964, "step": 28820 }, { "epoch": 1.9014790509011164, "grad_norm": 0.5748834609985352, "learning_rate": 3.693015240001846e-07, "loss": 0.8756, "step": 28830 }, { "epoch": 1.902138605372071, "grad_norm": 0.5563206672668457, "learning_rate": 3.6438955562055065e-07, "loss": 0.7879, "step": 28840 }, { "epoch": 1.9027981598430261, "grad_norm": 0.6695645451545715, "learning_rate": 3.5951023280119953e-07, "loss": 0.8213, "step": 28850 }, { "epoch": 1.9034577143139808, "grad_norm": 0.6426726579666138, "learning_rate": 3.5466356200790364e-07, "loss": 0.7981, "step": 28860 }, { "epoch": 1.904117268784936, "grad_norm": 0.6324830651283264, "learning_rate": 3.498495496631726e-07, "loss": 0.7583, "step": 28870 }, { "epoch": 1.9047768232558906, "grad_norm": 0.70176100730896, "learning_rate": 3.4506820214622594e-07, "loss": 0.7816, "step": 28880 }, { "epoch": 1.9054363777268455, "grad_norm": 0.6347261667251587, "learning_rate": 3.403195257930064e-07, "loss": 0.8076, "step": 28890 }, { "epoch": 1.9060959321978004, "grad_norm": 0.6823022365570068, "learning_rate": 3.35603526896161e-07, "loss": 0.7579, "step": 28900 }, { "epoch": 1.9067554866687553, "grad_norm": 0.5782615542411804, "learning_rate": 3.309202117050353e-07, "loss": 0.8361, "step": 28910 }, { "epoch": 1.9074150411397102, "grad_norm": 0.6254626512527466, "learning_rate": 3.2626958642566487e-07, "loss": 0.8308, "step": 28920 }, { "epoch": 1.9080745956106648, "grad_norm": 0.5811460018157959, "learning_rate": 3.2165165722076453e-07, "loss": 0.84, "step": 28930 }, { "epoch": 1.90873415008162, "grad_norm": 0.51547771692276, "learning_rate": 3.170664302097226e-07, "loss": 0.8014, "step": 28940 }, { "epoch": 1.9093937045525746, "grad_norm": 0.5484378933906555, "learning_rate": 3.1251391146859264e-07, "loss": 0.7637, "step": 28950 }, { "epoch": 1.9100532590235297, "grad_norm": 0.5861138105392456, "learning_rate": 3.079941070300879e-07, "loss": 0.7827, "step": 28960 }, { "epoch": 1.9107128134944844, "grad_norm": 0.6178954243659973, "learning_rate": 3.035070228835674e-07, "loss": 0.7855, "step": 28970 }, { "epoch": 1.9113723679654393, "grad_norm": 0.5996459722518921, "learning_rate": 2.99052664975033e-07, "loss": 0.7935, "step": 28980 }, { "epoch": 1.9120319224363942, "grad_norm": 0.7222851514816284, "learning_rate": 2.9463103920712163e-07, "loss": 0.7818, "step": 28990 }, { "epoch": 1.912691476907349, "grad_norm": 0.6564216017723083, "learning_rate": 2.9024215143908796e-07, "loss": 0.8046, "step": 29000 }, { "epoch": 1.913351031378304, "grad_norm": 0.5750806927680969, "learning_rate": 2.858860074868158e-07, "loss": 0.854, "step": 29010 }, { "epoch": 1.9140105858492589, "grad_norm": 0.5890952348709106, "learning_rate": 2.8156261312279055e-07, "loss": 0.78, "step": 29020 }, { "epoch": 1.9146701403202138, "grad_norm": 0.5353766083717346, "learning_rate": 2.772719740760987e-07, "loss": 0.787, "step": 29030 }, { "epoch": 1.9153296947911684, "grad_norm": 0.6585463881492615, "learning_rate": 2.730140960324312e-07, "loss": 0.8043, "step": 29040 }, { "epoch": 1.9159892492621236, "grad_norm": 0.7087804079055786, "learning_rate": 2.6878898463405523e-07, "loss": 0.8154, "step": 29050 }, { "epoch": 1.9166488037330782, "grad_norm": 0.6948942542076111, "learning_rate": 2.645966454798282e-07, "loss": 0.8357, "step": 29060 }, { "epoch": 1.9173083582040333, "grad_norm": 0.6298587918281555, "learning_rate": 2.604370841251674e-07, "loss": 0.8252, "step": 29070 }, { "epoch": 1.917967912674988, "grad_norm": 0.5921646356582642, "learning_rate": 2.5631030608206653e-07, "loss": 0.7834, "step": 29080 }, { "epoch": 1.918627467145943, "grad_norm": 0.6113035678863525, "learning_rate": 2.522163168190678e-07, "loss": 0.8191, "step": 29090 }, { "epoch": 1.9192870216168978, "grad_norm": 0.6097885370254517, "learning_rate": 2.4815512176127044e-07, "loss": 0.8086, "step": 29100 }, { "epoch": 1.9199465760878527, "grad_norm": 0.5496877431869507, "learning_rate": 2.441267262903169e-07, "loss": 0.8146, "step": 29110 }, { "epoch": 1.9206061305588076, "grad_norm": 0.6302570104598999, "learning_rate": 2.401311357443786e-07, "loss": 0.7706, "step": 29120 }, { "epoch": 1.9212656850297622, "grad_norm": 0.5880969762802124, "learning_rate": 2.3616835541816483e-07, "loss": 0.833, "step": 29130 }, { "epoch": 1.9219252395007174, "grad_norm": 0.7146965861320496, "learning_rate": 2.322383905628972e-07, "loss": 0.814, "step": 29140 }, { "epoch": 1.922584793971672, "grad_norm": 0.6757823824882507, "learning_rate": 2.2834124638632658e-07, "loss": 0.834, "step": 29150 }, { "epoch": 1.9232443484426271, "grad_norm": 0.6999145746231079, "learning_rate": 2.2447692805269427e-07, "loss": 0.7789, "step": 29160 }, { "epoch": 1.9239039029135818, "grad_norm": 0.5567458271980286, "learning_rate": 2.2064544068275682e-07, "loss": 0.7861, "step": 29170 }, { "epoch": 1.9245634573845367, "grad_norm": 0.6425588726997375, "learning_rate": 2.1684678935375845e-07, "loss": 0.7737, "step": 29180 }, { "epoch": 1.9252230118554916, "grad_norm": 0.6440041661262512, "learning_rate": 2.130809790994337e-07, "loss": 0.7504, "step": 29190 }, { "epoch": 1.9258825663264465, "grad_norm": 0.6200813055038452, "learning_rate": 2.0934801490999635e-07, "loss": 0.821, "step": 29200 }, { "epoch": 1.9265421207974014, "grad_norm": 0.6447944045066833, "learning_rate": 2.0564790173213388e-07, "loss": 0.7804, "step": 29210 }, { "epoch": 1.927201675268356, "grad_norm": 0.5899184942245483, "learning_rate": 2.0198064446900756e-07, "loss": 0.814, "step": 29220 }, { "epoch": 1.9278612297393112, "grad_norm": 0.6010910272598267, "learning_rate": 1.9834624798023006e-07, "loss": 0.8052, "step": 29230 }, { "epoch": 1.9285207842102658, "grad_norm": 0.6138426065444946, "learning_rate": 1.9474471708188225e-07, "loss": 0.8303, "step": 29240 }, { "epoch": 1.929180338681221, "grad_norm": 0.5905739068984985, "learning_rate": 1.9117605654648264e-07, "loss": 0.8096, "step": 29250 }, { "epoch": 1.9298398931521756, "grad_norm": 0.5698313117027283, "learning_rate": 1.8764027110299008e-07, "loss": 0.8557, "step": 29260 }, { "epoch": 1.9304994476231305, "grad_norm": 0.6312277913093567, "learning_rate": 1.8413736543681503e-07, "loss": 0.7648, "step": 29270 }, { "epoch": 1.9311590020940854, "grad_norm": 0.7178078293800354, "learning_rate": 1.8066734418978047e-07, "loss": 0.7871, "step": 29280 }, { "epoch": 1.9318185565650403, "grad_norm": 0.5866417288780212, "learning_rate": 1.7723021196014433e-07, "loss": 0.7638, "step": 29290 }, { "epoch": 1.9324781110359952, "grad_norm": 0.5741631388664246, "learning_rate": 1.7382597330257432e-07, "loss": 0.8046, "step": 29300 }, { "epoch": 1.93313766550695, "grad_norm": 0.5900196433067322, "learning_rate": 1.7045463272815366e-07, "loss": 0.7694, "step": 29310 }, { "epoch": 1.933797219977905, "grad_norm": 0.6223501563072205, "learning_rate": 1.6711619470437258e-07, "loss": 0.8038, "step": 29320 }, { "epoch": 1.9344567744488597, "grad_norm": 0.5457992553710938, "learning_rate": 1.638106636551201e-07, "loss": 0.8517, "step": 29330 }, { "epoch": 1.9351163289198148, "grad_norm": 0.5838644504547119, "learning_rate": 1.6053804396067295e-07, "loss": 0.7684, "step": 29340 }, { "epoch": 1.9357758833907694, "grad_norm": 0.6603797078132629, "learning_rate": 1.5729833995770383e-07, "loss": 0.797, "step": 29350 }, { "epoch": 1.9364354378617246, "grad_norm": 0.6109535694122314, "learning_rate": 1.540915559392675e-07, "loss": 0.8132, "step": 29360 }, { "epoch": 1.9370949923326792, "grad_norm": 0.6293129324913025, "learning_rate": 1.5091769615478702e-07, "loss": 0.8362, "step": 29370 }, { "epoch": 1.9377545468036341, "grad_norm": 0.6027792692184448, "learning_rate": 1.4777676481006475e-07, "loss": 0.7832, "step": 29380 }, { "epoch": 1.938414101274589, "grad_norm": 0.6462914943695068, "learning_rate": 1.4466876606726297e-07, "loss": 0.7979, "step": 29390 }, { "epoch": 1.939073655745544, "grad_norm": 0.5773576498031616, "learning_rate": 1.4159370404490945e-07, "loss": 0.7953, "step": 29400 }, { "epoch": 1.9397332102164988, "grad_norm": 0.5936535000801086, "learning_rate": 1.3855158281787794e-07, "loss": 0.7555, "step": 29410 }, { "epoch": 1.9403927646874535, "grad_norm": 0.5910260081291199, "learning_rate": 1.3554240641740211e-07, "loss": 0.7682, "step": 29420 }, { "epoch": 1.9410523191584086, "grad_norm": 0.6144441366195679, "learning_rate": 1.3256617883104783e-07, "loss": 0.7915, "step": 29430 }, { "epoch": 1.9417118736293633, "grad_norm": 0.6245831251144409, "learning_rate": 1.296229040027269e-07, "loss": 0.8063, "step": 29440 }, { "epoch": 1.9423714281003184, "grad_norm": 0.5887926816940308, "learning_rate": 1.2671258583268064e-07, "loss": 0.7825, "step": 29450 }, { "epoch": 1.943030982571273, "grad_norm": 0.7208525538444519, "learning_rate": 1.238352281774824e-07, "loss": 0.8249, "step": 29460 }, { "epoch": 1.943690537042228, "grad_norm": 0.585756778717041, "learning_rate": 1.2099083485002105e-07, "loss": 0.7675, "step": 29470 }, { "epoch": 1.9443500915131828, "grad_norm": 0.6151425242424011, "learning_rate": 1.1817940961950935e-07, "loss": 0.7904, "step": 29480 }, { "epoch": 1.9450096459841377, "grad_norm": 0.6283534169197083, "learning_rate": 1.1540095621147274e-07, "loss": 0.8098, "step": 29490 }, { "epoch": 1.9456692004550926, "grad_norm": 0.6851007342338562, "learning_rate": 1.1265547830774381e-07, "loss": 0.8109, "step": 29500 }, { "epoch": 1.9463287549260473, "grad_norm": 0.5581753849983215, "learning_rate": 1.0994297954645127e-07, "loss": 0.7712, "step": 29510 }, { "epoch": 1.9469883093970024, "grad_norm": 0.6845225691795349, "learning_rate": 1.0726346352203376e-07, "loss": 0.8152, "step": 29520 }, { "epoch": 1.947647863867957, "grad_norm": 0.605595588684082, "learning_rate": 1.0461693378521487e-07, "loss": 0.8162, "step": 29530 }, { "epoch": 1.9483074183389122, "grad_norm": 0.6354922652244568, "learning_rate": 1.0200339384300872e-07, "loss": 0.8063, "step": 29540 }, { "epoch": 1.9489669728098669, "grad_norm": 0.6054199934005737, "learning_rate": 9.942284715871442e-08, "loss": 0.7971, "step": 29550 }, { "epoch": 1.9496265272808218, "grad_norm": 0.6663618683815002, "learning_rate": 9.687529715191047e-08, "loss": 0.7953, "step": 29560 }, { "epoch": 1.9502860817517766, "grad_norm": 0.5727202296257019, "learning_rate": 9.436074719844923e-08, "loss": 0.7885, "step": 29570 }, { "epoch": 1.9509456362227315, "grad_norm": 0.6474148035049438, "learning_rate": 9.187920063045141e-08, "loss": 0.832, "step": 29580 }, { "epoch": 1.9516051906936864, "grad_norm": 0.6238139867782593, "learning_rate": 8.943066073631435e-08, "loss": 0.8041, "step": 29590 }, { "epoch": 1.9522647451646413, "grad_norm": 0.5499809980392456, "learning_rate": 8.701513076067869e-08, "loss": 0.7788, "step": 29600 }, { "epoch": 1.9529242996355962, "grad_norm": 0.6836000084877014, "learning_rate": 8.463261390446175e-08, "loss": 0.7739, "step": 29610 }, { "epoch": 1.9535838541065509, "grad_norm": 0.6283951997756958, "learning_rate": 8.228311332481864e-08, "loss": 0.772, "step": 29620 }, { "epoch": 1.954243408577506, "grad_norm": 0.5444614291191101, "learning_rate": 7.996663213516165e-08, "loss": 0.7846, "step": 29630 }, { "epoch": 1.9549029630484607, "grad_norm": 0.5624345541000366, "learning_rate": 7.768317340514642e-08, "loss": 0.7867, "step": 29640 }, { "epoch": 1.9555625175194158, "grad_norm": 0.5255836844444275, "learning_rate": 7.543274016066915e-08, "loss": 0.8102, "step": 29650 }, { "epoch": 1.9562220719903705, "grad_norm": 0.574588418006897, "learning_rate": 7.321533538386105e-08, "loss": 0.8184, "step": 29660 }, { "epoch": 1.9568816264613254, "grad_norm": 0.5666007995605469, "learning_rate": 7.103096201308556e-08, "loss": 0.765, "step": 29670 }, { "epoch": 1.9575411809322802, "grad_norm": 0.5659819841384888, "learning_rate": 6.887962294294393e-08, "loss": 0.806, "step": 29680 }, { "epoch": 1.9582007354032351, "grad_norm": 0.5693690180778503, "learning_rate": 6.676132102424737e-08, "loss": 0.8027, "step": 29690 }, { "epoch": 1.95886028987419, "grad_norm": 0.5768720507621765, "learning_rate": 6.46760590640394e-08, "loss": 0.779, "step": 29700 }, { "epoch": 1.9595198443451447, "grad_norm": 0.606207013130188, "learning_rate": 6.26238398255763e-08, "loss": 0.7832, "step": 29710 }, { "epoch": 1.9601793988160998, "grad_norm": 0.6650753021240234, "learning_rate": 6.060466602833271e-08, "loss": 0.8116, "step": 29720 }, { "epoch": 1.9608389532870545, "grad_norm": 0.6604939103126526, "learning_rate": 5.8618540347990525e-08, "loss": 0.8095, "step": 29730 }, { "epoch": 1.9614985077580096, "grad_norm": 0.5078802108764648, "learning_rate": 5.6665465416436136e-08, "loss": 0.7879, "step": 29740 }, { "epoch": 1.9621580622289643, "grad_norm": 0.6700655221939087, "learning_rate": 5.4745443821763166e-08, "loss": 0.8114, "step": 29750 }, { "epoch": 1.9628176166999192, "grad_norm": 0.6484373807907104, "learning_rate": 5.285847810826139e-08, "loss": 0.8497, "step": 29760 }, { "epoch": 1.963477171170874, "grad_norm": 0.6642829775810242, "learning_rate": 5.100457077642229e-08, "loss": 0.8363, "step": 29770 }, { "epoch": 1.964136725641829, "grad_norm": 0.5557263493537903, "learning_rate": 4.918372428292517e-08, "loss": 0.8005, "step": 29780 }, { "epoch": 1.9647962801127838, "grad_norm": 0.6279803514480591, "learning_rate": 4.7395941040639933e-08, "loss": 0.8498, "step": 29790 }, { "epoch": 1.9654558345837387, "grad_norm": 0.5895106196403503, "learning_rate": 4.564122341862709e-08, "loss": 0.8074, "step": 29800 }, { "epoch": 1.9661153890546936, "grad_norm": 0.6393919587135315, "learning_rate": 4.391957374212385e-08, "loss": 0.8016, "step": 29810 }, { "epoch": 1.9667749435256483, "grad_norm": 0.6000730395317078, "learning_rate": 4.2230994292558054e-08, "loss": 0.7748, "step": 29820 }, { "epoch": 1.9674344979966034, "grad_norm": 0.587570309638977, "learning_rate": 4.057548730752592e-08, "loss": 0.7784, "step": 29830 }, { "epoch": 1.968094052467558, "grad_norm": 0.6112786531448364, "learning_rate": 3.8953054980800376e-08, "loss": 0.8131, "step": 29840 }, { "epoch": 1.9687536069385132, "grad_norm": 0.6138783693313599, "learning_rate": 3.736369946232554e-08, "loss": 0.8195, "step": 29850 }, { "epoch": 1.9694131614094679, "grad_norm": 0.728312611579895, "learning_rate": 3.58074228582167e-08, "loss": 0.8106, "step": 29860 }, { "epoch": 1.9700727158804228, "grad_norm": 0.6900039911270142, "learning_rate": 3.428422723075475e-08, "loss": 0.7994, "step": 29870 }, { "epoch": 1.9707322703513777, "grad_norm": 0.5480231046676636, "learning_rate": 3.279411459838067e-08, "loss": 0.7692, "step": 29880 }, { "epoch": 1.9713918248223326, "grad_norm": 0.6333634853363037, "learning_rate": 3.133708693570103e-08, "loss": 0.7794, "step": 29890 }, { "epoch": 1.9720513792932874, "grad_norm": 0.6649506688117981, "learning_rate": 2.9913146173474185e-08, "loss": 0.787, "step": 29900 }, { "epoch": 1.9727109337642421, "grad_norm": 0.4889037311077118, "learning_rate": 2.8522294198618516e-08, "loss": 0.7896, "step": 29910 }, { "epoch": 1.9733704882351972, "grad_norm": 0.6194847226142883, "learning_rate": 2.7164532854201395e-08, "loss": 0.808, "step": 29920 }, { "epoch": 1.974030042706152, "grad_norm": 0.6829218864440918, "learning_rate": 2.583986393944471e-08, "loss": 0.8069, "step": 29930 }, { "epoch": 1.974689597177107, "grad_norm": 0.6882838606834412, "learning_rate": 2.454828920971375e-08, "loss": 0.788, "step": 29940 }, { "epoch": 1.9753491516480617, "grad_norm": 0.6496096849441528, "learning_rate": 2.328981037652833e-08, "loss": 0.7632, "step": 29950 }, { "epoch": 1.9760087061190166, "grad_norm": 0.5673264861106873, "learning_rate": 2.206442910753781e-08, "loss": 0.7861, "step": 29960 }, { "epoch": 1.9766682605899715, "grad_norm": 0.6036912798881531, "learning_rate": 2.0872147026543277e-08, "loss": 0.806, "step": 29970 }, { "epoch": 1.9773278150609264, "grad_norm": 0.5649986863136292, "learning_rate": 1.971296571348369e-08, "loss": 0.8245, "step": 29980 }, { "epoch": 1.9779873695318813, "grad_norm": 0.5428677201271057, "learning_rate": 1.8586886704430318e-08, "loss": 0.8154, "step": 29990 }, { "epoch": 1.978646924002836, "grad_norm": 0.602078378200531, "learning_rate": 1.7493911491595072e-08, "loss": 0.774, "step": 30000 }, { "epoch": 1.979306478473791, "grad_norm": 0.6510676741600037, "learning_rate": 1.6434041523316622e-08, "loss": 0.793, "step": 30010 }, { "epoch": 1.9799660329447457, "grad_norm": 0.6513907313346863, "learning_rate": 1.5407278204068732e-08, "loss": 0.7836, "step": 30020 }, { "epoch": 1.9806255874157008, "grad_norm": 0.6857253313064575, "learning_rate": 1.4413622894457468e-08, "loss": 0.7807, "step": 30030 }, { "epoch": 1.9812851418866555, "grad_norm": 0.6612505316734314, "learning_rate": 1.345307691120734e-08, "loss": 0.7765, "step": 30040 }, { "epoch": 1.9819446963576104, "grad_norm": 0.6458035111427307, "learning_rate": 1.2525641527175168e-08, "loss": 0.815, "step": 30050 }, { "epoch": 1.9826042508285653, "grad_norm": 0.5740944743156433, "learning_rate": 1.1631317971341759e-08, "loss": 0.8259, "step": 30060 }, { "epoch": 1.9832638052995202, "grad_norm": 0.6129186749458313, "learning_rate": 1.0770107428806352e-08, "loss": 0.755, "step": 30070 }, { "epoch": 1.983923359770475, "grad_norm": 0.6856721043586731, "learning_rate": 9.9420110407894e-09, "loss": 0.83, "step": 30080 }, { "epoch": 1.98458291424143, "grad_norm": 0.5338472127914429, "learning_rate": 9.14702990463534e-09, "loss": 0.7711, "step": 30090 }, { "epoch": 1.9852424687123849, "grad_norm": 0.5985947251319885, "learning_rate": 8.385165073801493e-09, "loss": 0.7747, "step": 30100 }, { "epoch": 1.9859020231833395, "grad_norm": 0.6340281367301941, "learning_rate": 7.656417557863615e-09, "loss": 0.7788, "step": 30110 }, { "epoch": 1.9865615776542946, "grad_norm": 0.6839861869812012, "learning_rate": 6.960788322513123e-09, "loss": 0.8268, "step": 30120 }, { "epoch": 1.9872211321252493, "grad_norm": 0.6112245321273804, "learning_rate": 6.29827828955154e-09, "loss": 0.7713, "step": 30130 }, { "epoch": 1.9878806865962044, "grad_norm": 0.6058304309844971, "learning_rate": 5.668888336898826e-09, "loss": 0.8565, "step": 30140 }, { "epoch": 1.988540241067159, "grad_norm": 0.5842820405960083, "learning_rate": 5.0726192985794955e-09, "loss": 0.788, "step": 30150 }, { "epoch": 1.989199795538114, "grad_norm": 0.6127521395683289, "learning_rate": 4.509471964730949e-09, "loss": 0.8039, "step": 30160 }, { "epoch": 1.9898593500090689, "grad_norm": 0.5821914076805115, "learning_rate": 3.979447081606247e-09, "loss": 0.8399, "step": 30170 }, { "epoch": 1.9905189044800238, "grad_norm": 0.5806571245193481, "learning_rate": 3.4825453515574536e-09, "loss": 0.8254, "step": 30180 }, { "epoch": 1.9911784589509787, "grad_norm": 0.6134341359138489, "learning_rate": 3.018767433046743e-09, "loss": 0.7945, "step": 30190 }, { "epoch": 1.9918380134219333, "grad_norm": 0.5684574842453003, "learning_rate": 2.5881139406436216e-09, "loss": 0.7578, "step": 30200 }, { "epoch": 1.9924975678928885, "grad_norm": 0.575101912021637, "learning_rate": 2.190585445022153e-09, "loss": 0.8154, "step": 30210 }, { "epoch": 1.9931571223638431, "grad_norm": 0.5798953771591187, "learning_rate": 1.826182472966509e-09, "loss": 0.7824, "step": 30220 }, { "epoch": 1.9938166768347982, "grad_norm": 0.5315937399864197, "learning_rate": 1.4949055073570916e-09, "loss": 0.7947, "step": 30230 }, { "epoch": 1.994476231305753, "grad_norm": 0.5395314693450928, "learning_rate": 1.1967549871816363e-09, "loss": 0.7722, "step": 30240 }, { "epoch": 1.9951357857767078, "grad_norm": 0.5870974659919739, "learning_rate": 9.317313075296597e-10, "loss": 0.8507, "step": 30250 }, { "epoch": 1.9957953402476627, "grad_norm": 0.5886339545249939, "learning_rate": 6.998348195980109e-10, "loss": 0.8084, "step": 30260 }, { "epoch": 1.9964548947186176, "grad_norm": 0.619418740272522, "learning_rate": 5.010658306742188e-10, "loss": 0.7772, "step": 30270 }, { "epoch": 1.9971144491895725, "grad_norm": 0.6857271790504456, "learning_rate": 3.354246041614717e-10, "loss": 0.8017, "step": 30280 }, { "epoch": 1.9977740036605272, "grad_norm": 0.6156572699546814, "learning_rate": 2.0291135955086138e-10, "loss": 0.7994, "step": 30290 }, { "epoch": 1.9984335581314823, "grad_norm": 0.590694010257721, "learning_rate": 1.035262724463637e-10, "loss": 0.7933, "step": 30300 }, { "epoch": 1.999093112602437, "grad_norm": 0.5420209169387817, "learning_rate": 3.726947454263385e-11, "loss": 0.8108, "step": 30310 }, { "epoch": 1.999752667073392, "grad_norm": 0.6134430170059204, "learning_rate": 4.1410536416597314e-12, "loss": 0.7825, "step": 30320 }, { "epoch": 2.0, "step": 30324, "total_flos": 1.277030840488113e+19, "train_loss": 0.8469410036818174, "train_runtime": 209948.1178, "train_samples_per_second": 2.311, "train_steps_per_second": 0.144 } ], "logging_steps": 10, "max_steps": 30324, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.277030840488113e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }