{ "best_global_step": 8530, "best_metric": 3.07828903, "best_model_checkpoint": "/inspire/hdd/project/deepanalysis/guitao-25013/Muse/workspace/Finals/ckpt/Muse_1.7b_main_3e-4/v0-20251228-133339/checkpoint-8530", "epoch": 5.0, "eval_steps": 500, "global_step": 8530, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005863383172090296, "grad_norm": 314.3364817194584, "learning_rate": 1.7584994138335285e-07, "loss": 21.1610107421875, "step": 1, "token_acc": 0.0073373194124074295 }, { "epoch": 0.0011726766344180592, "grad_norm": 312.62903799335515, "learning_rate": 3.516998827667057e-07, "loss": 21.174728393554688, "step": 2, "token_acc": 0.007801702527263515 }, { "epoch": 0.001759014951627089, "grad_norm": 314.0437118110648, "learning_rate": 5.275498241500586e-07, "loss": 21.19637107849121, "step": 3, "token_acc": 0.007983309100539968 }, { "epoch": 0.0023453532688361184, "grad_norm": 316.275601610318, "learning_rate": 7.033997655334114e-07, "loss": 21.180936813354492, "step": 4, "token_acc": 0.008063862867296412 }, { "epoch": 0.002931691586045148, "grad_norm": 314.08093038743345, "learning_rate": 8.792497069167643e-07, "loss": 21.104143142700195, "step": 5, "token_acc": 0.007759480832185766 }, { "epoch": 0.003518029903254178, "grad_norm": 312.4682647769296, "learning_rate": 1.0550996483001172e-06, "loss": 21.098918914794922, "step": 6, "token_acc": 0.007825808905820153 }, { "epoch": 0.004104368220463207, "grad_norm": 309.28568466174437, "learning_rate": 1.2309495896834702e-06, "loss": 20.95001220703125, "step": 7, "token_acc": 0.007920341602319884 }, { "epoch": 0.004690706537672237, "grad_norm": 308.1929230830652, "learning_rate": 1.4067995310668228e-06, "loss": 20.90537452697754, "step": 8, "token_acc": 0.007910641394804291 }, { "epoch": 0.005277044854881266, "grad_norm": 284.315801687854, "learning_rate": 1.5826494724501758e-06, "loss": 20.338947296142578, "step": 9, "token_acc": 0.008087204886554487 }, { "epoch": 0.005863383172090296, "grad_norm": 275.5431419082845, "learning_rate": 1.7584994138335286e-06, "loss": 20.183818817138672, "step": 10, "token_acc": 0.007973904505471146 }, { "epoch": 0.006449721489299325, "grad_norm": 139.3389914655089, "learning_rate": 1.9343493552168814e-06, "loss": 18.946557998657227, "step": 11, "token_acc": 0.008094707025011912 }, { "epoch": 0.007036059806508356, "grad_norm": 132.4378913844216, "learning_rate": 2.1101992966002344e-06, "loss": 18.853530883789062, "step": 12, "token_acc": 0.008527825313519165 }, { "epoch": 0.007622398123717385, "grad_norm": 121.25647774247827, "learning_rate": 2.286049237983587e-06, "loss": 18.666290283203125, "step": 13, "token_acc": 0.007512774624691704 }, { "epoch": 0.008208736440926415, "grad_norm": 116.29502434897812, "learning_rate": 2.4618991793669404e-06, "loss": 18.575685501098633, "step": 14, "token_acc": 0.007071640011564332 }, { "epoch": 0.008795074758135445, "grad_norm": 105.87930744716077, "learning_rate": 2.637749120750293e-06, "loss": 17.826993942260742, "step": 15, "token_acc": 0.0073926671608984616 }, { "epoch": 0.009381413075344474, "grad_norm": 107.22465537666224, "learning_rate": 2.8135990621336455e-06, "loss": 17.77083969116211, "step": 16, "token_acc": 0.007343478704933102 }, { "epoch": 0.009967751392553504, "grad_norm": 104.42431797549932, "learning_rate": 2.9894490035169985e-06, "loss": 17.671676635742188, "step": 17, "token_acc": 0.007584964061656072 }, { "epoch": 0.010554089709762533, "grad_norm": 97.1517823913138, "learning_rate": 3.1652989449003515e-06, "loss": 17.443300247192383, "step": 18, "token_acc": 0.008030187308725584 }, { "epoch": 0.011140428026971563, "grad_norm": 92.92248978237245, "learning_rate": 3.3411488862837045e-06, "loss": 17.25950813293457, "step": 19, "token_acc": 0.007852151242614505 }, { "epoch": 0.011726766344180592, "grad_norm": 90.75908176544272, "learning_rate": 3.516998827667057e-06, "loss": 17.083738327026367, "step": 20, "token_acc": 0.008399767699699065 }, { "epoch": 0.012313104661389622, "grad_norm": 89.40671696025731, "learning_rate": 3.6928487690504097e-06, "loss": 16.089990615844727, "step": 21, "token_acc": 0.008510286705775428 }, { "epoch": 0.01289944297859865, "grad_norm": 86.40882487547461, "learning_rate": 3.868698710433763e-06, "loss": 15.813337326049805, "step": 22, "token_acc": 0.007770580466637318 }, { "epoch": 0.013485781295807681, "grad_norm": 80.45076228251688, "learning_rate": 4.044548651817115e-06, "loss": 15.548316955566406, "step": 23, "token_acc": 0.007433676784055582 }, { "epoch": 0.014072119613016711, "grad_norm": 74.73025400426289, "learning_rate": 4.220398593200469e-06, "loss": 15.295755386352539, "step": 24, "token_acc": 0.00691768826619965 }, { "epoch": 0.01465845793022574, "grad_norm": 67.36641983888083, "learning_rate": 4.396248534583821e-06, "loss": 14.949100494384766, "step": 25, "token_acc": 0.007036510698205677 }, { "epoch": 0.01524479624743477, "grad_norm": 62.37981132861096, "learning_rate": 4.572098475967174e-06, "loss": 14.670554161071777, "step": 26, "token_acc": 0.006551926712826025 }, { "epoch": 0.0158311345646438, "grad_norm": 58.27214205890374, "learning_rate": 4.7479484173505265e-06, "loss": 14.401126861572266, "step": 27, "token_acc": 0.006652412715889741 }, { "epoch": 0.01641747288185283, "grad_norm": 50.65676588449631, "learning_rate": 4.923798358733881e-06, "loss": 13.930343627929688, "step": 28, "token_acc": 0.0062067179789200054 }, { "epoch": 0.017003811199061858, "grad_norm": 44.302864497754314, "learning_rate": 5.099648300117233e-06, "loss": 13.473762512207031, "step": 29, "token_acc": 0.006410205259560116 }, { "epoch": 0.01759014951627089, "grad_norm": 37.42459128972729, "learning_rate": 5.275498241500586e-06, "loss": 13.176072120666504, "step": 30, "token_acc": 0.005870950610727562 }, { "epoch": 0.01817648783347992, "grad_norm": 30.528500032723407, "learning_rate": 5.4513481828839385e-06, "loss": 12.937468528747559, "step": 31, "token_acc": 0.005615265927283757 }, { "epoch": 0.018762826150688947, "grad_norm": 28.116763303453823, "learning_rate": 5.627198124267291e-06, "loss": 12.739139556884766, "step": 32, "token_acc": 0.006037305845732461 }, { "epoch": 0.019349164467897976, "grad_norm": 21.40897064271037, "learning_rate": 5.8030480656506445e-06, "loss": 12.558815956115723, "step": 33, "token_acc": 0.005980296496805262 }, { "epoch": 0.019935502785107008, "grad_norm": 18.852855583335938, "learning_rate": 5.978898007033997e-06, "loss": 12.391048431396484, "step": 34, "token_acc": 0.006346509549639421 }, { "epoch": 0.020521841102316037, "grad_norm": 14.823310534361715, "learning_rate": 6.15474794841735e-06, "loss": 12.246835708618164, "step": 35, "token_acc": 0.007395827547076982 }, { "epoch": 0.021108179419525065, "grad_norm": 22.27816060587619, "learning_rate": 6.330597889800703e-06, "loss": 12.179847717285156, "step": 36, "token_acc": 0.007873791190272485 }, { "epoch": 0.021694517736734097, "grad_norm": 12.621183835541824, "learning_rate": 6.506447831184056e-06, "loss": 12.111846923828125, "step": 37, "token_acc": 0.008229866944553877 }, { "epoch": 0.022280856053943126, "grad_norm": 12.78972863393983, "learning_rate": 6.682297772567409e-06, "loss": 12.076262474060059, "step": 38, "token_acc": 0.008645978076269878 }, { "epoch": 0.022867194371152155, "grad_norm": 4.961409677636954, "learning_rate": 6.858147713950762e-06, "loss": 12.016976356506348, "step": 39, "token_acc": 0.009117272958534725 }, { "epoch": 0.023453532688361183, "grad_norm": 6.941489122321791, "learning_rate": 7.033997655334114e-06, "loss": 11.989545822143555, "step": 40, "token_acc": 0.009394958210124087 }, { "epoch": 0.024039871005570215, "grad_norm": 4.170593409239079, "learning_rate": 7.209847596717467e-06, "loss": 11.965530395507812, "step": 41, "token_acc": 0.00956037138569709 }, { "epoch": 0.024626209322779244, "grad_norm": 2.3209278853204203, "learning_rate": 7.385697538100819e-06, "loss": 11.943653106689453, "step": 42, "token_acc": 0.009718547282948487 }, { "epoch": 0.025212547639988273, "grad_norm": 1.5273919402872675, "learning_rate": 7.561547479484174e-06, "loss": 11.94092845916748, "step": 43, "token_acc": 0.008713464673327469 }, { "epoch": 0.0257988859571973, "grad_norm": 1.105332600457078, "learning_rate": 7.737397420867525e-06, "loss": 11.921232223510742, "step": 44, "token_acc": 0.00915239400365783 }, { "epoch": 0.026385224274406333, "grad_norm": 0.9484775593306607, "learning_rate": 7.913247362250878e-06, "loss": 11.907386779785156, "step": 45, "token_acc": 0.00945462625920596 }, { "epoch": 0.026971562591615362, "grad_norm": 0.7503746648125605, "learning_rate": 8.08909730363423e-06, "loss": 11.89285659790039, "step": 46, "token_acc": 0.009954296567402753 }, { "epoch": 0.02755790090882439, "grad_norm": 0.9308641772913042, "learning_rate": 8.264947245017583e-06, "loss": 11.886062622070312, "step": 47, "token_acc": 0.00974159406980461 }, { "epoch": 0.028144239226033423, "grad_norm": 0.7601436610741698, "learning_rate": 8.440797186400937e-06, "loss": 11.872444152832031, "step": 48, "token_acc": 0.010114053269194533 }, { "epoch": 0.02873057754324245, "grad_norm": 0.6319880135257516, "learning_rate": 8.61664712778429e-06, "loss": 11.867204666137695, "step": 49, "token_acc": 0.00965653692444547 }, { "epoch": 0.02931691586045148, "grad_norm": 0.628498674085733, "learning_rate": 8.792497069167643e-06, "loss": 11.849407196044922, "step": 50, "token_acc": 0.010177322843888137 }, { "epoch": 0.02990325417766051, "grad_norm": 0.7146005612349628, "learning_rate": 8.968347010550995e-06, "loss": 11.840921401977539, "step": 51, "token_acc": 0.00994983299427297 }, { "epoch": 0.03048959249486954, "grad_norm": 0.6985955833355848, "learning_rate": 9.144196951934348e-06, "loss": 11.820819854736328, "step": 52, "token_acc": 0.010665230272017236 }, { "epoch": 0.03107593081207857, "grad_norm": 0.7025370418351381, "learning_rate": 9.320046893317702e-06, "loss": 11.81194019317627, "step": 53, "token_acc": 0.010381088131673077 }, { "epoch": 0.0316622691292876, "grad_norm": 0.6873922833533456, "learning_rate": 9.495896834701053e-06, "loss": 11.805252075195312, "step": 54, "token_acc": 0.009833599549249058 }, { "epoch": 0.03224860744649663, "grad_norm": 0.6698310516097201, "learning_rate": 9.671746776084405e-06, "loss": 11.791781425476074, "step": 55, "token_acc": 0.009808991012105984 }, { "epoch": 0.03283494576370566, "grad_norm": 0.9373125049565595, "learning_rate": 9.847596717467761e-06, "loss": 11.773946762084961, "step": 56, "token_acc": 0.010166980877996229 }, { "epoch": 0.03342128408091469, "grad_norm": 0.8621283626238764, "learning_rate": 1.0023446658851114e-05, "loss": 11.760116577148438, "step": 57, "token_acc": 0.009926121865523092 }, { "epoch": 0.034007622398123716, "grad_norm": 0.8257835226977261, "learning_rate": 1.0199296600234467e-05, "loss": 11.747591972351074, "step": 58, "token_acc": 0.009541249531284511 }, { "epoch": 0.034593960715332744, "grad_norm": 1.0365097764198536, "learning_rate": 1.037514654161782e-05, "loss": 11.720434188842773, "step": 59, "token_acc": 0.010312098545578402 }, { "epoch": 0.03518029903254178, "grad_norm": 0.8725835366263507, "learning_rate": 1.0550996483001172e-05, "loss": 11.705840110778809, "step": 60, "token_acc": 0.009987452006606464 }, { "epoch": 0.03576663734975081, "grad_norm": 1.074544665508291, "learning_rate": 1.0726846424384524e-05, "loss": 11.683185577392578, "step": 61, "token_acc": 0.010081490959432815 }, { "epoch": 0.03635297566695984, "grad_norm": 0.9846162435087352, "learning_rate": 1.0902696365767877e-05, "loss": 11.665786743164062, "step": 62, "token_acc": 0.009682069716251167 }, { "epoch": 0.036939313984168866, "grad_norm": 1.3421678425032828, "learning_rate": 1.107854630715123e-05, "loss": 11.642001152038574, "step": 63, "token_acc": 0.009598904815762525 }, { "epoch": 0.037525652301377894, "grad_norm": 1.1774469227022621, "learning_rate": 1.1254396248534582e-05, "loss": 11.619728088378906, "step": 64, "token_acc": 0.009494669682006818 }, { "epoch": 0.03811199061858692, "grad_norm": 1.2998491921754616, "learning_rate": 1.1430246189917935e-05, "loss": 11.584670066833496, "step": 65, "token_acc": 0.009765726590864169 }, { "epoch": 0.03869832893579595, "grad_norm": 1.3331250444441887, "learning_rate": 1.1606096131301289e-05, "loss": 11.553152084350586, "step": 66, "token_acc": 0.009664113140836771 }, { "epoch": 0.03928466725300499, "grad_norm": 1.6226622150984584, "learning_rate": 1.1781946072684642e-05, "loss": 11.519734382629395, "step": 67, "token_acc": 0.009575511602198475 }, { "epoch": 0.039871005570214016, "grad_norm": 1.6983746926329137, "learning_rate": 1.1957796014067994e-05, "loss": 11.476907730102539, "step": 68, "token_acc": 0.009746186611111684 }, { "epoch": 0.040457343887423045, "grad_norm": 1.9643348599757893, "learning_rate": 1.2133645955451347e-05, "loss": 11.434260368347168, "step": 69, "token_acc": 0.01034937890285831 }, { "epoch": 0.04104368220463207, "grad_norm": 1.7242948284798472, "learning_rate": 1.23094958968347e-05, "loss": 11.389060020446777, "step": 70, "token_acc": 0.009599121765713144 }, { "epoch": 0.0416300205218411, "grad_norm": 2.5625702568153867, "learning_rate": 1.2485345838218052e-05, "loss": 11.341590881347656, "step": 71, "token_acc": 0.009460101329955669 }, { "epoch": 0.04221635883905013, "grad_norm": 3.350991324498559, "learning_rate": 1.2661195779601406e-05, "loss": 11.279542922973633, "step": 72, "token_acc": 0.010042107779976887 }, { "epoch": 0.04280269715625916, "grad_norm": 4.160970757348774, "learning_rate": 1.2837045720984759e-05, "loss": 11.229930877685547, "step": 73, "token_acc": 0.00965858873464549 }, { "epoch": 0.043389035473468195, "grad_norm": 2.9397270316351465, "learning_rate": 1.3012895662368111e-05, "loss": 11.172969818115234, "step": 74, "token_acc": 0.00988893140261154 }, { "epoch": 0.04397537379067722, "grad_norm": 4.680081569445933, "learning_rate": 1.3188745603751464e-05, "loss": 11.112567901611328, "step": 75, "token_acc": 0.010311249857864977 }, { "epoch": 0.04456171210788625, "grad_norm": 4.699299452058756, "learning_rate": 1.3364595545134818e-05, "loss": 11.063923835754395, "step": 76, "token_acc": 0.009845417740154582 }, { "epoch": 0.04514805042509528, "grad_norm": 5.538504792405696, "learning_rate": 1.354044548651817e-05, "loss": 11.009982109069824, "step": 77, "token_acc": 0.010230417527201116 }, { "epoch": 0.04573438874230431, "grad_norm": 6.241477072762899, "learning_rate": 1.3716295427901523e-05, "loss": 10.952494621276855, "step": 78, "token_acc": 0.009848835496833646 }, { "epoch": 0.04632072705951334, "grad_norm": 3.968667046539815, "learning_rate": 1.3892145369284876e-05, "loss": 10.884750366210938, "step": 79, "token_acc": 0.010374260292944979 }, { "epoch": 0.046907065376722366, "grad_norm": 4.08469338419477, "learning_rate": 1.4067995310668228e-05, "loss": 10.834638595581055, "step": 80, "token_acc": 0.01003153216963363 }, { "epoch": 0.047493403693931395, "grad_norm": 6.6259636972686575, "learning_rate": 1.4243845252051581e-05, "loss": 10.7899169921875, "step": 81, "token_acc": 0.010143984210553942 }, { "epoch": 0.04807974201114043, "grad_norm": 8.644032921976954, "learning_rate": 1.4419695193434934e-05, "loss": 10.74351692199707, "step": 82, "token_acc": 0.009235846796731522 }, { "epoch": 0.04866608032834946, "grad_norm": 4.081780657697027, "learning_rate": 1.4595545134818286e-05, "loss": 10.667047500610352, "step": 83, "token_acc": 0.009777380678504742 }, { "epoch": 0.04925241864555849, "grad_norm": 11.90175869423382, "learning_rate": 1.4771395076201639e-05, "loss": 10.624881744384766, "step": 84, "token_acc": 0.00937833267980549 }, { "epoch": 0.049838756962767516, "grad_norm": 7.994709891197614, "learning_rate": 1.4947245017584991e-05, "loss": 10.56396484375, "step": 85, "token_acc": 0.00981186471434669 }, { "epoch": 0.050425095279976545, "grad_norm": 8.543642313368, "learning_rate": 1.5123094958968347e-05, "loss": 10.482460021972656, "step": 86, "token_acc": 0.008725180999740156 }, { "epoch": 0.051011433597185574, "grad_norm": 7.198160934436331, "learning_rate": 1.52989449003517e-05, "loss": 10.4072265625, "step": 87, "token_acc": 0.009421692097833329 }, { "epoch": 0.0515977719143946, "grad_norm": 6.920251688037826, "learning_rate": 1.547479484173505e-05, "loss": 10.33627700805664, "step": 88, "token_acc": 0.00978225640119037 }, { "epoch": 0.05218411023160364, "grad_norm": 11.391122689653942, "learning_rate": 1.5650644783118405e-05, "loss": 10.275107383728027, "step": 89, "token_acc": 0.009228338721805463 }, { "epoch": 0.052770448548812667, "grad_norm": 6.991426182765631, "learning_rate": 1.5826494724501756e-05, "loss": 10.193984985351562, "step": 90, "token_acc": 0.009725028527649565 }, { "epoch": 0.053356786866021695, "grad_norm": 4.3113444626016895, "learning_rate": 1.600234466588511e-05, "loss": 10.132678985595703, "step": 91, "token_acc": 0.010158740210668524 }, { "epoch": 0.053943125183230724, "grad_norm": 11.940257207318611, "learning_rate": 1.617819460726846e-05, "loss": 10.072431564331055, "step": 92, "token_acc": 0.009815266901281929 }, { "epoch": 0.05452946350043975, "grad_norm": 9.681323523382128, "learning_rate": 1.6354044548651815e-05, "loss": 10.024391174316406, "step": 93, "token_acc": 0.009961478678110428 }, { "epoch": 0.05511580181764878, "grad_norm": 4.861946877215656, "learning_rate": 1.6529894490035166e-05, "loss": 9.97166919708252, "step": 94, "token_acc": 0.009475612488303746 }, { "epoch": 0.05570214013485781, "grad_norm": 4.3772589334491965, "learning_rate": 1.670574443141852e-05, "loss": 9.933237075805664, "step": 95, "token_acc": 0.01003043354640326 }, { "epoch": 0.056288478452066845, "grad_norm": 14.964595161107606, "learning_rate": 1.6881594372801875e-05, "loss": 9.862251281738281, "step": 96, "token_acc": 0.01016556857484778 }, { "epoch": 0.056874816769275874, "grad_norm": 4.65280285736792, "learning_rate": 1.705744431418523e-05, "loss": 9.84132194519043, "step": 97, "token_acc": 0.009567075299070265 }, { "epoch": 0.0574611550864849, "grad_norm": 14.368870071890083, "learning_rate": 1.723329425556858e-05, "loss": 9.78306770324707, "step": 98, "token_acc": 0.009784511204843476 }, { "epoch": 0.05804749340369393, "grad_norm": 12.283347945856411, "learning_rate": 1.7409144196951934e-05, "loss": 9.761861801147461, "step": 99, "token_acc": 0.009183320907057518 }, { "epoch": 0.05863383172090296, "grad_norm": 6.289929174215331, "learning_rate": 1.7584994138335285e-05, "loss": 9.696324348449707, "step": 100, "token_acc": 0.009611648446278735 }, { "epoch": 0.05922017003811199, "grad_norm": 35.78537698520067, "learning_rate": 1.776084407971864e-05, "loss": 9.738545417785645, "step": 101, "token_acc": 0.009845722446886983 }, { "epoch": 0.05980650835532102, "grad_norm": 23.291932077851612, "learning_rate": 1.793669402110199e-05, "loss": 9.63571548461914, "step": 102, "token_acc": 0.009211965975727201 }, { "epoch": 0.06039284667253005, "grad_norm": 15.106667317942211, "learning_rate": 1.8112543962485345e-05, "loss": 9.648144721984863, "step": 103, "token_acc": 0.009499501927771785 }, { "epoch": 0.06097918498973908, "grad_norm": 14.292865410024833, "learning_rate": 1.8288393903868696e-05, "loss": 9.568429946899414, "step": 104, "token_acc": 0.009660574412532636 }, { "epoch": 0.06156552330694811, "grad_norm": 8.461322653026425, "learning_rate": 1.846424384525205e-05, "loss": 9.560866355895996, "step": 105, "token_acc": 0.00936985872091505 }, { "epoch": 0.06215186162415714, "grad_norm": 4.292496271157349, "learning_rate": 1.8640093786635404e-05, "loss": 9.511852264404297, "step": 106, "token_acc": 0.010042017917074073 }, { "epoch": 0.06273819994136617, "grad_norm": 14.131710084857392, "learning_rate": 1.8815943728018755e-05, "loss": 9.496458053588867, "step": 107, "token_acc": 0.009481680473185036 }, { "epoch": 0.0633245382585752, "grad_norm": 12.579733144874412, "learning_rate": 1.8991793669402106e-05, "loss": 9.469629287719727, "step": 108, "token_acc": 0.009342529673495926 }, { "epoch": 0.06391087657578423, "grad_norm": 6.298168771686222, "learning_rate": 1.916764361078546e-05, "loss": 9.43764877319336, "step": 109, "token_acc": 0.009872353553661641 }, { "epoch": 0.06449721489299326, "grad_norm": 6.871301089559754, "learning_rate": 1.934349355216881e-05, "loss": 9.39526081085205, "step": 110, "token_acc": 0.010972125821086198 }, { "epoch": 0.06508355321020229, "grad_norm": 8.538197061328741, "learning_rate": 1.951934349355217e-05, "loss": 9.375219345092773, "step": 111, "token_acc": 0.011830798349507945 }, { "epoch": 0.06566989152741132, "grad_norm": 6.714641229555084, "learning_rate": 1.9695193434935523e-05, "loss": 9.35539436340332, "step": 112, "token_acc": 0.011527377521613832 }, { "epoch": 0.06625622984462035, "grad_norm": 11.079604572182392, "learning_rate": 1.9871043376318874e-05, "loss": 9.405292510986328, "step": 113, "token_acc": 0.01187268776480163 }, { "epoch": 0.06684256816182937, "grad_norm": 10.866949986340893, "learning_rate": 2.0046893317702228e-05, "loss": 9.310361862182617, "step": 114, "token_acc": 0.011660611129989283 }, { "epoch": 0.0674289064790384, "grad_norm": 5.631543407563885, "learning_rate": 2.022274325908558e-05, "loss": 9.294713020324707, "step": 115, "token_acc": 0.011267989449274766 }, { "epoch": 0.06801524479624743, "grad_norm": 4.298235727891895, "learning_rate": 2.0398593200468933e-05, "loss": 9.283777236938477, "step": 116, "token_acc": 0.011911465212838175 }, { "epoch": 0.06860158311345646, "grad_norm": 7.189953099216086, "learning_rate": 2.0574443141852284e-05, "loss": 9.287104606628418, "step": 117, "token_acc": 0.012719671956544894 }, { "epoch": 0.06918792143066549, "grad_norm": 4.938561701190064, "learning_rate": 2.075029308323564e-05, "loss": 9.19372272491455, "step": 118, "token_acc": 0.012120262190280664 }, { "epoch": 0.06977425974787452, "grad_norm": 5.582734578739099, "learning_rate": 2.092614302461899e-05, "loss": 9.248631477355957, "step": 119, "token_acc": 0.01245375783353619 }, { "epoch": 0.07036059806508356, "grad_norm": 5.747290575446226, "learning_rate": 2.1101992966002344e-05, "loss": 9.195615768432617, "step": 120, "token_acc": 0.011600977541594821 }, { "epoch": 0.07094693638229259, "grad_norm": 2.153343444667456, "learning_rate": 2.1277842907385698e-05, "loss": 9.178709030151367, "step": 121, "token_acc": 0.011623717839846424 }, { "epoch": 0.07153327469950162, "grad_norm": 2.6292161448695706, "learning_rate": 2.145369284876905e-05, "loss": 9.207082748413086, "step": 122, "token_acc": 0.012685543089992982 }, { "epoch": 0.07211961301671065, "grad_norm": 3.1307465945296826, "learning_rate": 2.1629542790152403e-05, "loss": 9.16843318939209, "step": 123, "token_acc": 0.01121300853208481 }, { "epoch": 0.07270595133391967, "grad_norm": 1.743800307119476, "learning_rate": 2.1805392731535754e-05, "loss": 9.134980201721191, "step": 124, "token_acc": 0.011759137608839346 }, { "epoch": 0.0732922896511287, "grad_norm": 6.051628470549426, "learning_rate": 2.1981242672919108e-05, "loss": 9.122943878173828, "step": 125, "token_acc": 0.011672197221885172 }, { "epoch": 0.07387862796833773, "grad_norm": 3.288730122021578, "learning_rate": 2.215709261430246e-05, "loss": 9.163206100463867, "step": 126, "token_acc": 0.01227240259302686 }, { "epoch": 0.07446496628554676, "grad_norm": 2.5453981083810584, "learning_rate": 2.2332942555685813e-05, "loss": 9.145343780517578, "step": 127, "token_acc": 0.011243629485630222 }, { "epoch": 0.07505130460275579, "grad_norm": 2.0002052789726297, "learning_rate": 2.2508792497069164e-05, "loss": 9.193897247314453, "step": 128, "token_acc": 0.01193651676083668 }, { "epoch": 0.07563764291996482, "grad_norm": 1.1792973472423087, "learning_rate": 2.268464243845252e-05, "loss": 9.138301849365234, "step": 129, "token_acc": 0.011933714892797067 }, { "epoch": 0.07622398123717385, "grad_norm": 1.615342470156613, "learning_rate": 2.286049237983587e-05, "loss": 9.069543838500977, "step": 130, "token_acc": 0.01162119458725517 }, { "epoch": 0.07681031955438287, "grad_norm": 1.2425419124387802, "learning_rate": 2.3036342321219224e-05, "loss": 9.160469055175781, "step": 131, "token_acc": 0.011514993959598123 }, { "epoch": 0.0773966578715919, "grad_norm": 1.1494220190885984, "learning_rate": 2.3212192262602578e-05, "loss": 9.09598159790039, "step": 132, "token_acc": 0.011932376180533798 }, { "epoch": 0.07798299618880093, "grad_norm": 1.0804073201496034, "learning_rate": 2.338804220398593e-05, "loss": 9.150649070739746, "step": 133, "token_acc": 0.011491540561662048 }, { "epoch": 0.07856933450600997, "grad_norm": 1.0158259606356494, "learning_rate": 2.3563892145369283e-05, "loss": 9.1259126663208, "step": 134, "token_acc": 0.011596265228679849 }, { "epoch": 0.079155672823219, "grad_norm": 1.1407371077285182, "learning_rate": 2.3739742086752634e-05, "loss": 9.168603897094727, "step": 135, "token_acc": 0.011914977458501088 }, { "epoch": 0.07974201114042803, "grad_norm": 1.3923991247388563, "learning_rate": 2.3915592028135988e-05, "loss": 9.018424987792969, "step": 136, "token_acc": 0.012036606417946918 }, { "epoch": 0.08032834945763706, "grad_norm": 0.866237784620666, "learning_rate": 2.409144196951934e-05, "loss": 9.09442138671875, "step": 137, "token_acc": 0.011247779705740424 }, { "epoch": 0.08091468777484609, "grad_norm": 1.133934093662852, "learning_rate": 2.4267291910902693e-05, "loss": 9.096515655517578, "step": 138, "token_acc": 0.011347487130256101 }, { "epoch": 0.08150102609205512, "grad_norm": 0.7484998749546738, "learning_rate": 2.4443141852286044e-05, "loss": 9.116138458251953, "step": 139, "token_acc": 0.011388677224704985 }, { "epoch": 0.08208736440926415, "grad_norm": 1.2316770385528586, "learning_rate": 2.46189917936694e-05, "loss": 9.02778148651123, "step": 140, "token_acc": 0.012339055793991416 }, { "epoch": 0.08267370272647317, "grad_norm": 0.8202402062747113, "learning_rate": 2.4794841735052756e-05, "loss": 9.107624053955078, "step": 141, "token_acc": 0.01242040491313653 }, { "epoch": 0.0832600410436822, "grad_norm": 0.763879798575539, "learning_rate": 2.4970691676436104e-05, "loss": 9.10135269165039, "step": 142, "token_acc": 0.010823608152419579 }, { "epoch": 0.08384637936089123, "grad_norm": 0.8725460797459627, "learning_rate": 2.514654161781946e-05, "loss": 9.088061332702637, "step": 143, "token_acc": 0.011498018818663035 }, { "epoch": 0.08443271767810026, "grad_norm": 0.8102709264413125, "learning_rate": 2.5322391559202812e-05, "loss": 9.01123046875, "step": 144, "token_acc": 0.011969924970549557 }, { "epoch": 0.08501905599530929, "grad_norm": 0.9890874533700922, "learning_rate": 2.5498241500586167e-05, "loss": 9.156105995178223, "step": 145, "token_acc": 0.01190416711588879 }, { "epoch": 0.08560539431251832, "grad_norm": 0.822954879412597, "learning_rate": 2.5674091441969517e-05, "loss": 9.027963638305664, "step": 146, "token_acc": 0.011730904214639666 }, { "epoch": 0.08619173262972735, "grad_norm": 0.7282515853418581, "learning_rate": 2.5849941383352872e-05, "loss": 9.03160285949707, "step": 147, "token_acc": 0.01175353477542632 }, { "epoch": 0.08677807094693639, "grad_norm": 0.8368857688504768, "learning_rate": 2.6025791324736223e-05, "loss": 9.071564674377441, "step": 148, "token_acc": 0.010754451413656923 }, { "epoch": 0.08736440926414542, "grad_norm": 0.6278830429440386, "learning_rate": 2.6201641266119577e-05, "loss": 9.053972244262695, "step": 149, "token_acc": 0.011633656390825269 }, { "epoch": 0.08795074758135445, "grad_norm": 0.6397821727954456, "learning_rate": 2.6377491207502928e-05, "loss": 9.06360149383545, "step": 150, "token_acc": 0.011848974862455198 }, { "epoch": 0.08853708589856348, "grad_norm": 0.6004148577981634, "learning_rate": 2.6553341148886282e-05, "loss": 9.099259376525879, "step": 151, "token_acc": 0.013133470907262038 }, { "epoch": 0.0891234242157725, "grad_norm": 0.6865229704240047, "learning_rate": 2.6729191090269636e-05, "loss": 9.064840316772461, "step": 152, "token_acc": 0.013405342600970635 }, { "epoch": 0.08970976253298153, "grad_norm": 0.7632232621520254, "learning_rate": 2.6905041031652987e-05, "loss": 9.049582481384277, "step": 153, "token_acc": 0.013849242159155785 }, { "epoch": 0.09029610085019056, "grad_norm": 1.2343339843541559, "learning_rate": 2.708089097303634e-05, "loss": 9.073572158813477, "step": 154, "token_acc": 0.014364270067302789 }, { "epoch": 0.09088243916739959, "grad_norm": 2.4046900523131223, "learning_rate": 2.7256740914419692e-05, "loss": 9.104389190673828, "step": 155, "token_acc": 0.013243885722438934 }, { "epoch": 0.09146877748460862, "grad_norm": 0.7626259540919172, "learning_rate": 2.7432590855803047e-05, "loss": 9.041627883911133, "step": 156, "token_acc": 0.01367964734120157 }, { "epoch": 0.09205511580181765, "grad_norm": 1.0603069789698447, "learning_rate": 2.7608440797186398e-05, "loss": 9.078733444213867, "step": 157, "token_acc": 0.014265893766521792 }, { "epoch": 0.09264145411902668, "grad_norm": 2.7869681246712026, "learning_rate": 2.7784290738569752e-05, "loss": 9.044357299804688, "step": 158, "token_acc": 0.013175206298416975 }, { "epoch": 0.0932277924362357, "grad_norm": 1.0510916482917174, "learning_rate": 2.7960140679953103e-05, "loss": 9.011998176574707, "step": 159, "token_acc": 0.013896452113121632 }, { "epoch": 0.09381413075344473, "grad_norm": 1.7118843563336834, "learning_rate": 2.8135990621336457e-05, "loss": 9.12320327758789, "step": 160, "token_acc": 0.013295854970914525 }, { "epoch": 0.09440046907065376, "grad_norm": 1.5244266222430018, "learning_rate": 2.831184056271981e-05, "loss": 9.09963321685791, "step": 161, "token_acc": 0.0138716387466804 }, { "epoch": 0.09498680738786279, "grad_norm": 1.290101654649091, "learning_rate": 2.8487690504103162e-05, "loss": 9.068549156188965, "step": 162, "token_acc": 0.01399306481728461 }, { "epoch": 0.09557314570507183, "grad_norm": 0.9054428849915294, "learning_rate": 2.8663540445486516e-05, "loss": 9.050338745117188, "step": 163, "token_acc": 0.012928927807758884 }, { "epoch": 0.09615948402228086, "grad_norm": 0.7724962200914788, "learning_rate": 2.8839390386869867e-05, "loss": 9.063183784484863, "step": 164, "token_acc": 0.012395917789531936 }, { "epoch": 0.09674582233948989, "grad_norm": 1.0176998059967046, "learning_rate": 2.901524032825322e-05, "loss": 9.074644088745117, "step": 165, "token_acc": 0.01415834091881162 }, { "epoch": 0.09733216065669892, "grad_norm": 7.90819084770369, "learning_rate": 2.9191090269636572e-05, "loss": 9.12739372253418, "step": 166, "token_acc": 0.013206636782467937 }, { "epoch": 0.09791849897390795, "grad_norm": 3.423448470393187, "learning_rate": 2.9366940211019927e-05, "loss": 9.009647369384766, "step": 167, "token_acc": 0.01391107680709375 }, { "epoch": 0.09850483729111698, "grad_norm": 1.6457841087557514, "learning_rate": 2.9542790152403278e-05, "loss": 9.003883361816406, "step": 168, "token_acc": 0.0134522029454538 }, { "epoch": 0.099091175608326, "grad_norm": 2.198473395726332, "learning_rate": 2.9718640093786632e-05, "loss": 8.992986679077148, "step": 169, "token_acc": 0.014436789322999822 }, { "epoch": 0.09967751392553503, "grad_norm": 1.1970813445148931, "learning_rate": 2.9894490035169983e-05, "loss": 9.024124145507812, "step": 170, "token_acc": 0.012878132201587086 }, { "epoch": 0.10026385224274406, "grad_norm": 1.3429372910828548, "learning_rate": 3.0070339976553337e-05, "loss": 9.00933837890625, "step": 171, "token_acc": 0.01384946771697629 }, { "epoch": 0.10085019055995309, "grad_norm": 1.495172842814261, "learning_rate": 3.0246189917936695e-05, "loss": 9.008771896362305, "step": 172, "token_acc": 0.01305237598834097 }, { "epoch": 0.10143652887716212, "grad_norm": 3.111441907901365, "learning_rate": 3.0422039859320042e-05, "loss": 9.036519050598145, "step": 173, "token_acc": 0.014005433773596254 }, { "epoch": 0.10202286719437115, "grad_norm": 4.205589247307397, "learning_rate": 3.05978898007034e-05, "loss": 8.98863410949707, "step": 174, "token_acc": 0.01340088550485345 }, { "epoch": 0.10260920551158018, "grad_norm": 0.9788653267005818, "learning_rate": 3.077373974208675e-05, "loss": 9.032859802246094, "step": 175, "token_acc": 0.013573408634849708 }, { "epoch": 0.1031955438287892, "grad_norm": 1.9078470133423255, "learning_rate": 3.09495896834701e-05, "loss": 9.032963752746582, "step": 176, "token_acc": 0.014906284454244762 }, { "epoch": 0.10378188214599825, "grad_norm": 4.835289045264976, "learning_rate": 3.112543962485345e-05, "loss": 9.001845359802246, "step": 177, "token_acc": 0.01326270955625822 }, { "epoch": 0.10436822046320728, "grad_norm": 2.640550938937187, "learning_rate": 3.130128956623681e-05, "loss": 8.940900802612305, "step": 178, "token_acc": 0.014488685382291665 }, { "epoch": 0.1049545587804163, "grad_norm": 14.014989868496693, "learning_rate": 3.147713950762016e-05, "loss": 9.075488090515137, "step": 179, "token_acc": 0.014488467594862496 }, { "epoch": 0.10554089709762533, "grad_norm": 12.789330347693321, "learning_rate": 3.165298944900351e-05, "loss": 9.076740264892578, "step": 180, "token_acc": 0.014073287307488051 }, { "epoch": 0.10612723541483436, "grad_norm": 1.2837946677339216, "learning_rate": 3.182883939038687e-05, "loss": 9.014093399047852, "step": 181, "token_acc": 0.013479452603939608 }, { "epoch": 0.10671357373204339, "grad_norm": 3.4257055015124567, "learning_rate": 3.200468933177022e-05, "loss": 8.930099487304688, "step": 182, "token_acc": 0.0146207031420874 }, { "epoch": 0.10729991204925242, "grad_norm": 3.1713835244548565, "learning_rate": 3.218053927315357e-05, "loss": 9.024892807006836, "step": 183, "token_acc": 0.014562824791921922 }, { "epoch": 0.10788625036646145, "grad_norm": 2.588075444088043, "learning_rate": 3.235638921453692e-05, "loss": 8.943979263305664, "step": 184, "token_acc": 0.015625941420402006 }, { "epoch": 0.10847258868367048, "grad_norm": 2.0213650702088932, "learning_rate": 3.253223915592028e-05, "loss": 8.963343620300293, "step": 185, "token_acc": 0.014867770427393616 }, { "epoch": 0.1090589270008795, "grad_norm": 3.1662236379348765, "learning_rate": 3.270808909730363e-05, "loss": 8.966333389282227, "step": 186, "token_acc": 0.01482275064500185 }, { "epoch": 0.10964526531808853, "grad_norm": 0.8511160102277255, "learning_rate": 3.288393903868698e-05, "loss": 8.944194793701172, "step": 187, "token_acc": 0.014568748124071655 }, { "epoch": 0.11023160363529756, "grad_norm": 1.8901798550851767, "learning_rate": 3.305978898007033e-05, "loss": 8.878313064575195, "step": 188, "token_acc": 0.016518586737219787 }, { "epoch": 0.11081794195250659, "grad_norm": 0.6449265028125587, "learning_rate": 3.323563892145369e-05, "loss": 8.910615921020508, "step": 189, "token_acc": 0.016138486579772928 }, { "epoch": 0.11140428026971562, "grad_norm": 1.2665135816480648, "learning_rate": 3.341148886283704e-05, "loss": 8.943984985351562, "step": 190, "token_acc": 0.015834434958471803 }, { "epoch": 0.11199061858692466, "grad_norm": 3.125132540955922, "learning_rate": 3.35873388042204e-05, "loss": 8.913708686828613, "step": 191, "token_acc": 0.01523594838495425 }, { "epoch": 0.11257695690413369, "grad_norm": 2.8226529772726283, "learning_rate": 3.376318874560375e-05, "loss": 8.998905181884766, "step": 192, "token_acc": 0.0156307628373289 }, { "epoch": 0.11316329522134272, "grad_norm": 6.25845402660554, "learning_rate": 3.39390386869871e-05, "loss": 8.950105667114258, "step": 193, "token_acc": 0.015614232481913702 }, { "epoch": 0.11374963353855175, "grad_norm": 6.9238518423791975, "learning_rate": 3.411488862837046e-05, "loss": 8.931921005249023, "step": 194, "token_acc": 0.01613472933932224 }, { "epoch": 0.11433597185576078, "grad_norm": 0.964011993194305, "learning_rate": 3.429073856975381e-05, "loss": 8.915523529052734, "step": 195, "token_acc": 0.01629271261858786 }, { "epoch": 0.1149223101729698, "grad_norm": 2.1872053944233167, "learning_rate": 3.446658851113716e-05, "loss": 8.878673553466797, "step": 196, "token_acc": 0.016522522475200457 }, { "epoch": 0.11550864849017883, "grad_norm": 0.9929085286069286, "learning_rate": 3.464243845252051e-05, "loss": 8.950199127197266, "step": 197, "token_acc": 0.016348160576175472 }, { "epoch": 0.11609498680738786, "grad_norm": 2.0389524983194645, "learning_rate": 3.481828839390387e-05, "loss": 8.845256805419922, "step": 198, "token_acc": 0.017225800810283017 }, { "epoch": 0.11668132512459689, "grad_norm": 2.5318152346551335, "learning_rate": 3.499413833528722e-05, "loss": 8.826522827148438, "step": 199, "token_acc": 0.016965894507602764 }, { "epoch": 0.11726766344180592, "grad_norm": 1.3147557263545948, "learning_rate": 3.516998827667057e-05, "loss": 8.790542602539062, "step": 200, "token_acc": 0.015927815029847085 }, { "epoch": 0.11785400175901495, "grad_norm": 1.0529821309507168, "learning_rate": 3.534583821805393e-05, "loss": 8.906332015991211, "step": 201, "token_acc": 0.016561514195583597 }, { "epoch": 0.11844034007622398, "grad_norm": 2.198041382203583, "learning_rate": 3.552168815943728e-05, "loss": 8.760419845581055, "step": 202, "token_acc": 0.01693612737038842 }, { "epoch": 0.119026678393433, "grad_norm": 1.8636648000552203, "learning_rate": 3.569753810082063e-05, "loss": 8.882649421691895, "step": 203, "token_acc": 0.016757771881621718 }, { "epoch": 0.11961301671064203, "grad_norm": 2.334628036106764, "learning_rate": 3.587338804220398e-05, "loss": 8.732261657714844, "step": 204, "token_acc": 0.01689587109335083 }, { "epoch": 0.12019935502785108, "grad_norm": 3.6417434376433997, "learning_rate": 3.604923798358734e-05, "loss": 8.859725952148438, "step": 205, "token_acc": 0.016732605176935728 }, { "epoch": 0.1207856933450601, "grad_norm": 1.1461520465739654, "learning_rate": 3.622508792497069e-05, "loss": 8.789977073669434, "step": 206, "token_acc": 0.016978721576879307 }, { "epoch": 0.12137203166226913, "grad_norm": 5.478124128309508, "learning_rate": 3.640093786635404e-05, "loss": 8.8179292678833, "step": 207, "token_acc": 0.01666530404487685 }, { "epoch": 0.12195836997947816, "grad_norm": 1.9413287312440262, "learning_rate": 3.657678780773739e-05, "loss": 8.845304489135742, "step": 208, "token_acc": 0.01686858289122918 }, { "epoch": 0.12254470829668719, "grad_norm": 4.362775383731732, "learning_rate": 3.675263774912075e-05, "loss": 8.80113410949707, "step": 209, "token_acc": 0.016194827554091417 }, { "epoch": 0.12313104661389622, "grad_norm": 5.170439158455645, "learning_rate": 3.69284876905041e-05, "loss": 8.72726058959961, "step": 210, "token_acc": 0.016813022914523166 }, { "epoch": 0.12371738493110525, "grad_norm": 1.2544001304062333, "learning_rate": 3.710433763188745e-05, "loss": 8.8226957321167, "step": 211, "token_acc": 0.017576808370558257 }, { "epoch": 0.12430372324831428, "grad_norm": 4.064034908986902, "learning_rate": 3.728018757327081e-05, "loss": 8.691914558410645, "step": 212, "token_acc": 0.016029177630140615 }, { "epoch": 0.1248900615655233, "grad_norm": 5.66673725924405, "learning_rate": 3.745603751465416e-05, "loss": 8.654747009277344, "step": 213, "token_acc": 0.01710718629585339 }, { "epoch": 0.12547639988273235, "grad_norm": 1.8032447842916424, "learning_rate": 3.763188745603751e-05, "loss": 8.759397506713867, "step": 214, "token_acc": 0.016445591198048948 }, { "epoch": 0.12606273819994138, "grad_norm": 3.881359930935999, "learning_rate": 3.780773739742087e-05, "loss": 8.737679481506348, "step": 215, "token_acc": 0.015620007719264166 }, { "epoch": 0.1266490765171504, "grad_norm": 5.353254463820007, "learning_rate": 3.798358733880421e-05, "loss": 8.705495834350586, "step": 216, "token_acc": 0.017007621773868435 }, { "epoch": 0.12723541483435943, "grad_norm": 3.084177121198912, "learning_rate": 3.815943728018757e-05, "loss": 8.637994766235352, "step": 217, "token_acc": 0.01672063190892592 }, { "epoch": 0.12782175315156846, "grad_norm": 4.906960521179897, "learning_rate": 3.833528722157092e-05, "loss": 8.752494812011719, "step": 218, "token_acc": 0.016543936245969365 }, { "epoch": 0.1284080914687775, "grad_norm": 4.68498485188667, "learning_rate": 3.851113716295428e-05, "loss": 8.630697250366211, "step": 219, "token_acc": 0.01716079216489793 }, { "epoch": 0.12899442978598652, "grad_norm": 4.421641931399822, "learning_rate": 3.868698710433762e-05, "loss": 8.625170707702637, "step": 220, "token_acc": 0.01652241526557979 }, { "epoch": 0.12958076810319555, "grad_norm": 3.6243561487515494, "learning_rate": 3.886283704572098e-05, "loss": 8.68350601196289, "step": 221, "token_acc": 0.017743194418739387 }, { "epoch": 0.13016710642040458, "grad_norm": 6.731615648535092, "learning_rate": 3.903868698710434e-05, "loss": 8.61314868927002, "step": 222, "token_acc": 0.016826560453083856 }, { "epoch": 0.1307534447376136, "grad_norm": 3.8951372260537527, "learning_rate": 3.921453692848769e-05, "loss": 8.674982070922852, "step": 223, "token_acc": 0.01645953334735309 }, { "epoch": 0.13133978305482263, "grad_norm": 5.0161197585727555, "learning_rate": 3.9390386869871046e-05, "loss": 8.68167495727539, "step": 224, "token_acc": 0.016467335622679977 }, { "epoch": 0.13192612137203166, "grad_norm": 7.150299923498034, "learning_rate": 3.956623681125439e-05, "loss": 8.575281143188477, "step": 225, "token_acc": 0.016570919559520274 }, { "epoch": 0.1325124596892407, "grad_norm": 2.30292100939367, "learning_rate": 3.974208675263775e-05, "loss": 8.574335098266602, "step": 226, "token_acc": 0.017459028310447343 }, { "epoch": 0.13309879800644972, "grad_norm": 6.818015262946809, "learning_rate": 3.99179366940211e-05, "loss": 8.623686790466309, "step": 227, "token_acc": 0.016416210808624765 }, { "epoch": 0.13368513632365875, "grad_norm": 8.081805549191621, "learning_rate": 4.0093786635404456e-05, "loss": 8.6261625289917, "step": 228, "token_acc": 0.015715693644919895 }, { "epoch": 0.13427147464086778, "grad_norm": 2.9986313751668847, "learning_rate": 4.02696365767878e-05, "loss": 8.618754386901855, "step": 229, "token_acc": 0.016192745002832402 }, { "epoch": 0.1348578129580768, "grad_norm": 4.477099581502254, "learning_rate": 4.044548651817116e-05, "loss": 8.553512573242188, "step": 230, "token_acc": 0.0174617371039869 }, { "epoch": 0.13544415127528583, "grad_norm": 5.3811405918676245, "learning_rate": 4.062133645955451e-05, "loss": 8.630196571350098, "step": 231, "token_acc": 0.01684603615051477 }, { "epoch": 0.13603048959249486, "grad_norm": 2.3900266067121794, "learning_rate": 4.0797186400937866e-05, "loss": 8.560566902160645, "step": 232, "token_acc": 0.01697804930027705 }, { "epoch": 0.1366168279097039, "grad_norm": 3.2667017471281685, "learning_rate": 4.097303634232122e-05, "loss": 8.522208213806152, "step": 233, "token_acc": 0.017987175160310495 }, { "epoch": 0.13720316622691292, "grad_norm": 13.065247310607706, "learning_rate": 4.114888628370457e-05, "loss": 8.572076797485352, "step": 234, "token_acc": 0.01687565618236646 }, { "epoch": 0.13778950454412195, "grad_norm": 7.388370678270873, "learning_rate": 4.132473622508792e-05, "loss": 8.521028518676758, "step": 235, "token_acc": 0.01644102089325559 }, { "epoch": 0.13837584286133098, "grad_norm": 20.440602046224782, "learning_rate": 4.150058616647128e-05, "loss": 8.51113510131836, "step": 236, "token_acc": 0.017621761295818262 }, { "epoch": 0.13896218117854, "grad_norm": 18.541476919661964, "learning_rate": 4.167643610785463e-05, "loss": 8.62530517578125, "step": 237, "token_acc": 0.017359247213363224 }, { "epoch": 0.13954851949574903, "grad_norm": 2.5282089501337346, "learning_rate": 4.185228604923798e-05, "loss": 8.510663986206055, "step": 238, "token_acc": 0.019206657079544116 }, { "epoch": 0.14013485781295806, "grad_norm": 9.401277860536702, "learning_rate": 4.202813599062133e-05, "loss": 8.505946159362793, "step": 239, "token_acc": 0.018394363576213713 }, { "epoch": 0.14072119613016712, "grad_norm": 5.567415916578747, "learning_rate": 4.220398593200469e-05, "loss": 8.481810569763184, "step": 240, "token_acc": 0.017143819031710885 }, { "epoch": 0.14130753444737615, "grad_norm": 2.1675158960189513, "learning_rate": 4.237983587338804e-05, "loss": 8.493383407592773, "step": 241, "token_acc": 0.01822428262522767 }, { "epoch": 0.14189387276458518, "grad_norm": 8.929633214833405, "learning_rate": 4.2555685814771396e-05, "loss": 8.513069152832031, "step": 242, "token_acc": 0.017859501381529246 }, { "epoch": 0.1424802110817942, "grad_norm": 3.6897452741754173, "learning_rate": 4.273153575615474e-05, "loss": 8.453872680664062, "step": 243, "token_acc": 0.018653342204239025 }, { "epoch": 0.14306654939900323, "grad_norm": 4.625537063062409, "learning_rate": 4.29073856975381e-05, "loss": 8.47091293334961, "step": 244, "token_acc": 0.018603806974158858 }, { "epoch": 0.14365288771621226, "grad_norm": 7.870832093972414, "learning_rate": 4.308323563892145e-05, "loss": 8.564062118530273, "step": 245, "token_acc": 0.01740024713169218 }, { "epoch": 0.1442392260334213, "grad_norm": 4.259223115226154, "learning_rate": 4.3259085580304806e-05, "loss": 8.369973182678223, "step": 246, "token_acc": 0.01828073112807893 }, { "epoch": 0.14482556435063032, "grad_norm": 2.2867801139341593, "learning_rate": 4.343493552168815e-05, "loss": 8.349054336547852, "step": 247, "token_acc": 0.018252581127587885 }, { "epoch": 0.14541190266783935, "grad_norm": 11.668153759621728, "learning_rate": 4.361078546307151e-05, "loss": 8.439401626586914, "step": 248, "token_acc": 0.017820318677740622 }, { "epoch": 0.14599824098504838, "grad_norm": 2.1768025335983254, "learning_rate": 4.378663540445486e-05, "loss": 8.447366714477539, "step": 249, "token_acc": 0.018823249920810897 }, { "epoch": 0.1465845793022574, "grad_norm": 17.87671040443596, "learning_rate": 4.3962485345838216e-05, "loss": 8.423364639282227, "step": 250, "token_acc": 0.01875484276925797 }, { "epoch": 0.14717091761946643, "grad_norm": 14.547813900905627, "learning_rate": 4.4138335287221574e-05, "loss": 8.468502044677734, "step": 251, "token_acc": 0.01637811915792896 }, { "epoch": 0.14775725593667546, "grad_norm": 7.662344409516869, "learning_rate": 4.431418522860492e-05, "loss": 8.409443855285645, "step": 252, "token_acc": 0.018396897829317815 }, { "epoch": 0.1483435942538845, "grad_norm": 4.610852221268246, "learning_rate": 4.449003516998827e-05, "loss": 8.30703353881836, "step": 253, "token_acc": 0.018592773049497083 }, { "epoch": 0.14892993257109352, "grad_norm": 15.582364757774345, "learning_rate": 4.466588511137163e-05, "loss": 8.416754722595215, "step": 254, "token_acc": 0.01929136306630321 }, { "epoch": 0.14951627088830255, "grad_norm": 12.564851101593918, "learning_rate": 4.4841735052754984e-05, "loss": 8.483627319335938, "step": 255, "token_acc": 0.01938644858848068 }, { "epoch": 0.15010260920551158, "grad_norm": 8.015261268760222, "learning_rate": 4.501758499413833e-05, "loss": 8.404739379882812, "step": 256, "token_acc": 0.01865865619087942 }, { "epoch": 0.1506889475227206, "grad_norm": 7.364009400186939, "learning_rate": 4.5193434935521686e-05, "loss": 8.311761856079102, "step": 257, "token_acc": 0.019295205673469283 }, { "epoch": 0.15127528583992964, "grad_norm": 11.539319319363312, "learning_rate": 4.536928487690504e-05, "loss": 8.299919128417969, "step": 258, "token_acc": 0.02016530879220357 }, { "epoch": 0.15186162415713866, "grad_norm": 8.916585738074229, "learning_rate": 4.5545134818288395e-05, "loss": 8.352657318115234, "step": 259, "token_acc": 0.01924070080372876 }, { "epoch": 0.1524479624743477, "grad_norm": 14.799399472136862, "learning_rate": 4.572098475967174e-05, "loss": 8.296621322631836, "step": 260, "token_acc": 0.018318659471740984 }, { "epoch": 0.15303430079155672, "grad_norm": 12.52200798352578, "learning_rate": 4.5896834701055096e-05, "loss": 8.391961097717285, "step": 261, "token_acc": 0.019134395123230914 }, { "epoch": 0.15362063910876575, "grad_norm": 7.726866027566938, "learning_rate": 4.607268464243845e-05, "loss": 8.309759140014648, "step": 262, "token_acc": 0.018757194540371648 }, { "epoch": 0.15420697742597478, "grad_norm": 6.663272791005106, "learning_rate": 4.6248534583821805e-05, "loss": 8.346975326538086, "step": 263, "token_acc": 0.019628551090403158 }, { "epoch": 0.1547933157431838, "grad_norm": 8.849650123022013, "learning_rate": 4.6424384525205156e-05, "loss": 8.265969276428223, "step": 264, "token_acc": 0.01917901182204468 }, { "epoch": 0.15537965406039284, "grad_norm": 6.278597013879949, "learning_rate": 4.660023446658851e-05, "loss": 8.262683868408203, "step": 265, "token_acc": 0.020237367940757707 }, { "epoch": 0.15596599237760186, "grad_norm": 14.83723807809333, "learning_rate": 4.677608440797186e-05, "loss": 8.24969482421875, "step": 266, "token_acc": 0.02095503230969343 }, { "epoch": 0.1565523306948109, "grad_norm": 14.013194643054666, "learning_rate": 4.6951934349355215e-05, "loss": 8.28728199005127, "step": 267, "token_acc": 0.020356644541047995 }, { "epoch": 0.15713866901201995, "grad_norm": 3.299461201611789, "learning_rate": 4.7127784290738566e-05, "loss": 8.28223705291748, "step": 268, "token_acc": 0.01932153093571812 }, { "epoch": 0.15772500732922898, "grad_norm": 1.4370879623196615, "learning_rate": 4.730363423212192e-05, "loss": 8.154370307922363, "step": 269, "token_acc": 0.020513134692529394 }, { "epoch": 0.158311345646438, "grad_norm": 11.690469326957365, "learning_rate": 4.747948417350527e-05, "loss": 8.307836532592773, "step": 270, "token_acc": 0.0186597039960265 }, { "epoch": 0.15889768396364704, "grad_norm": 7.034490509655638, "learning_rate": 4.7655334114888626e-05, "loss": 8.156492233276367, "step": 271, "token_acc": 0.020116473273681313 }, { "epoch": 0.15948402228085606, "grad_norm": 8.610409267769095, "learning_rate": 4.7831184056271977e-05, "loss": 8.175551414489746, "step": 272, "token_acc": 0.020806123220426027 }, { "epoch": 0.1600703605980651, "grad_norm": 4.911789439993146, "learning_rate": 4.8007033997655334e-05, "loss": 8.205076217651367, "step": 273, "token_acc": 0.019922229874769194 }, { "epoch": 0.16065669891527412, "grad_norm": 7.221215383403958, "learning_rate": 4.818288393903868e-05, "loss": 8.238790512084961, "step": 274, "token_acc": 0.019895751451924324 }, { "epoch": 0.16124303723248315, "grad_norm": 4.435402344576878, "learning_rate": 4.8358733880422036e-05, "loss": 8.225730895996094, "step": 275, "token_acc": 0.020369095309631113 }, { "epoch": 0.16182937554969218, "grad_norm": 6.71909090750322, "learning_rate": 4.853458382180539e-05, "loss": 8.196975708007812, "step": 276, "token_acc": 0.02137586040254431 }, { "epoch": 0.1624157138669012, "grad_norm": 3.6340838143635477, "learning_rate": 4.8710433763188744e-05, "loss": 8.132326126098633, "step": 277, "token_acc": 0.02181880979874629 }, { "epoch": 0.16300205218411024, "grad_norm": 3.374835225665365, "learning_rate": 4.888628370457209e-05, "loss": 8.0545654296875, "step": 278, "token_acc": 0.021444238143077304 }, { "epoch": 0.16358839050131926, "grad_norm": 12.31289490424, "learning_rate": 4.9062133645955446e-05, "loss": 8.135686874389648, "step": 279, "token_acc": 0.02138458539874521 }, { "epoch": 0.1641747288185283, "grad_norm": 9.090867008689703, "learning_rate": 4.92379835873388e-05, "loss": 8.082733154296875, "step": 280, "token_acc": 0.021298596585837062 }, { "epoch": 0.16476106713573732, "grad_norm": 13.052408170393587, "learning_rate": 4.9413833528722155e-05, "loss": 8.079601287841797, "step": 281, "token_acc": 0.021224666252589126 }, { "epoch": 0.16534740545294635, "grad_norm": 11.778781185258065, "learning_rate": 4.958968347010551e-05, "loss": 8.064671516418457, "step": 282, "token_acc": 0.021834183530466955 }, { "epoch": 0.16593374377015538, "grad_norm": 6.78001123641741, "learning_rate": 4.9765533411488857e-05, "loss": 8.138179779052734, "step": 283, "token_acc": 0.0203531588851116 }, { "epoch": 0.1665200820873644, "grad_norm": 5.049128069374832, "learning_rate": 4.994138335287221e-05, "loss": 8.085536003112793, "step": 284, "token_acc": 0.022625034070080593 }, { "epoch": 0.16710642040457344, "grad_norm": 12.528670847425603, "learning_rate": 5.0117233294255565e-05, "loss": 8.023820877075195, "step": 285, "token_acc": 0.022043720754737036 }, { "epoch": 0.16769275872178246, "grad_norm": 10.784200550540923, "learning_rate": 5.029308323563892e-05, "loss": 8.108606338500977, "step": 286, "token_acc": 0.02092908479595299 }, { "epoch": 0.1682790970389915, "grad_norm": 8.126861750504569, "learning_rate": 5.046893317702227e-05, "loss": 7.985041618347168, "step": 287, "token_acc": 0.02380128160747117 }, { "epoch": 0.16886543535620052, "grad_norm": 6.561806312125951, "learning_rate": 5.0644783118405625e-05, "loss": 8.01936149597168, "step": 288, "token_acc": 0.0228410804370893 }, { "epoch": 0.16945177367340955, "grad_norm": 7.933481262999368, "learning_rate": 5.0820633059788975e-05, "loss": 8.02853012084961, "step": 289, "token_acc": 0.022237328872628023 }, { "epoch": 0.17003811199061858, "grad_norm": 4.366074984576223, "learning_rate": 5.099648300117233e-05, "loss": 7.944267749786377, "step": 290, "token_acc": 0.025122139500665743 }, { "epoch": 0.1706244503078276, "grad_norm": 11.692846671580012, "learning_rate": 5.117233294255568e-05, "loss": 7.985602378845215, "step": 291, "token_acc": 0.023169997823331328 }, { "epoch": 0.17121078862503664, "grad_norm": 13.074716716989984, "learning_rate": 5.1348182883939035e-05, "loss": 7.920740604400635, "step": 292, "token_acc": 0.024088712879455885 }, { "epoch": 0.17179712694224566, "grad_norm": 3.668348138002314, "learning_rate": 5.1524032825322386e-05, "loss": 7.948727607727051, "step": 293, "token_acc": 0.024311563090047828 }, { "epoch": 0.1723834652594547, "grad_norm": 3.6593951371713347, "learning_rate": 5.1699882766705743e-05, "loss": 7.921501159667969, "step": 294, "token_acc": 0.02397616188662831 }, { "epoch": 0.17296980357666372, "grad_norm": 2.3311775966786583, "learning_rate": 5.1875732708089094e-05, "loss": 7.8770036697387695, "step": 295, "token_acc": 0.02581199573513264 }, { "epoch": 0.17355614189387278, "grad_norm": 6.422375969880502, "learning_rate": 5.2051582649472445e-05, "loss": 7.884852409362793, "step": 296, "token_acc": 0.024368458420172687 }, { "epoch": 0.1741424802110818, "grad_norm": 5.166149248909966, "learning_rate": 5.2227432590855796e-05, "loss": 7.842950820922852, "step": 297, "token_acc": 0.024369453211405143 }, { "epoch": 0.17472881852829084, "grad_norm": 5.864410777685613, "learning_rate": 5.2403282532239154e-05, "loss": 7.882455825805664, "step": 298, "token_acc": 0.0243029159866849 }, { "epoch": 0.17531515684549986, "grad_norm": 4.555465681072166, "learning_rate": 5.2579132473622505e-05, "loss": 7.719220161437988, "step": 299, "token_acc": 0.026509222169598015 }, { "epoch": 0.1759014951627089, "grad_norm": 8.146444251304837, "learning_rate": 5.2754982415005856e-05, "loss": 7.7608819007873535, "step": 300, "token_acc": 0.02517310504945128 }, { "epoch": 0.17648783347991792, "grad_norm": 5.81969392025838, "learning_rate": 5.2930832356389206e-05, "loss": 7.8868088722229, "step": 301, "token_acc": 0.026045193782157562 }, { "epoch": 0.17707417179712695, "grad_norm": 3.55087871225099, "learning_rate": 5.3106682297772564e-05, "loss": 7.767951488494873, "step": 302, "token_acc": 0.026804178446754007 }, { "epoch": 0.17766051011433598, "grad_norm": 6.103333882605994, "learning_rate": 5.3282532239155915e-05, "loss": 7.703026294708252, "step": 303, "token_acc": 0.028081661460994327 }, { "epoch": 0.178246848431545, "grad_norm": 6.364365303635605, "learning_rate": 5.345838218053927e-05, "loss": 7.696715354919434, "step": 304, "token_acc": 0.02772256186845454 }, { "epoch": 0.17883318674875404, "grad_norm": 3.1244894707851656, "learning_rate": 5.363423212192262e-05, "loss": 7.673195838928223, "step": 305, "token_acc": 0.027334839978056535 }, { "epoch": 0.17941952506596306, "grad_norm": 2.245808538247048, "learning_rate": 5.3810082063305974e-05, "loss": 7.595062255859375, "step": 306, "token_acc": 0.029556739893409595 }, { "epoch": 0.1800058633831721, "grad_norm": 3.2670214584944497, "learning_rate": 5.3985932004689325e-05, "loss": 7.662883758544922, "step": 307, "token_acc": 0.030627175805047868 }, { "epoch": 0.18059220170038112, "grad_norm": 9.105293586837126, "learning_rate": 5.416178194607268e-05, "loss": 7.673460483551025, "step": 308, "token_acc": 0.027661091777773858 }, { "epoch": 0.18117854001759015, "grad_norm": 3.1321173873360406, "learning_rate": 5.433763188745603e-05, "loss": 7.641384124755859, "step": 309, "token_acc": 0.0288999058869512 }, { "epoch": 0.18176487833479918, "grad_norm": 13.919280211384155, "learning_rate": 5.4513481828839385e-05, "loss": 7.5895843505859375, "step": 310, "token_acc": 0.031850296366024974 }, { "epoch": 0.1823512166520082, "grad_norm": 10.78428048946607, "learning_rate": 5.4689331770222736e-05, "loss": 7.651038646697998, "step": 311, "token_acc": 0.03031201206764231 }, { "epoch": 0.18293755496921724, "grad_norm": 11.317942213652632, "learning_rate": 5.486518171160609e-05, "loss": 7.543618202209473, "step": 312, "token_acc": 0.028990082072378468 }, { "epoch": 0.18352389328642627, "grad_norm": 10.367792510592034, "learning_rate": 5.504103165298945e-05, "loss": 7.542943954467773, "step": 313, "token_acc": 0.03029279538609945 }, { "epoch": 0.1841102316036353, "grad_norm": 6.268839393388123, "learning_rate": 5.5216881594372795e-05, "loss": 7.507347106933594, "step": 314, "token_acc": 0.03205096252579529 }, { "epoch": 0.18469656992084432, "grad_norm": 4.508204916844255, "learning_rate": 5.5392731535756146e-05, "loss": 7.416807174682617, "step": 315, "token_acc": 0.03401838061716707 }, { "epoch": 0.18528290823805335, "grad_norm": 11.750848656420674, "learning_rate": 5.5568581477139504e-05, "loss": 7.428244113922119, "step": 316, "token_acc": 0.032806324110671935 }, { "epoch": 0.18586924655526238, "grad_norm": 11.627313030276985, "learning_rate": 5.574443141852286e-05, "loss": 7.453977108001709, "step": 317, "token_acc": 0.03260713590193003 }, { "epoch": 0.1864555848724714, "grad_norm": 5.214110239696652, "learning_rate": 5.5920281359906205e-05, "loss": 7.3778076171875, "step": 318, "token_acc": 0.03553857399851728 }, { "epoch": 0.18704192318968044, "grad_norm": 6.391817140754896, "learning_rate": 5.609613130128956e-05, "loss": 7.440142631530762, "step": 319, "token_acc": 0.03374119201973295 }, { "epoch": 0.18762826150688947, "grad_norm": 6.405246099739629, "learning_rate": 5.6271981242672914e-05, "loss": 7.383110523223877, "step": 320, "token_acc": 0.03369989912707122 }, { "epoch": 0.1882145998240985, "grad_norm": 4.008618115738209, "learning_rate": 5.644783118405627e-05, "loss": 7.389673709869385, "step": 321, "token_acc": 0.03433148960518787 }, { "epoch": 0.18880093814130752, "grad_norm": 5.767811435642842, "learning_rate": 5.662368112543962e-05, "loss": 7.326108932495117, "step": 322, "token_acc": 0.0385379304515945 }, { "epoch": 0.18938727645851655, "grad_norm": 3.7583662533508426, "learning_rate": 5.679953106682297e-05, "loss": 7.279698848724365, "step": 323, "token_acc": 0.03957132602866975 }, { "epoch": 0.18997361477572558, "grad_norm": 5.943633506455475, "learning_rate": 5.6975381008206324e-05, "loss": 7.280203342437744, "step": 324, "token_acc": 0.0367412308614768 }, { "epoch": 0.19055995309293464, "grad_norm": 3.2342148152791133, "learning_rate": 5.715123094958968e-05, "loss": 7.163398742675781, "step": 325, "token_acc": 0.04174379956665334 }, { "epoch": 0.19114629141014366, "grad_norm": 3.165610295503623, "learning_rate": 5.732708089097303e-05, "loss": 7.16325044631958, "step": 326, "token_acc": 0.04046002494111126 }, { "epoch": 0.1917326297273527, "grad_norm": 5.039046477812103, "learning_rate": 5.7502930832356384e-05, "loss": 7.215337753295898, "step": 327, "token_acc": 0.03968510953556506 }, { "epoch": 0.19231896804456172, "grad_norm": 2.883439081513902, "learning_rate": 5.7678780773739735e-05, "loss": 7.11888313293457, "step": 328, "token_acc": 0.04250615736031269 }, { "epoch": 0.19290530636177075, "grad_norm": 3.6553171107398628, "learning_rate": 5.785463071512309e-05, "loss": 7.125880241394043, "step": 329, "token_acc": 0.04175203327745701 }, { "epoch": 0.19349164467897978, "grad_norm": 5.058030098319214, "learning_rate": 5.803048065650644e-05, "loss": 7.135167121887207, "step": 330, "token_acc": 0.04496769124697356 }, { "epoch": 0.1940779829961888, "grad_norm": 3.816963267149248, "learning_rate": 5.8206330597889794e-05, "loss": 7.082459449768066, "step": 331, "token_acc": 0.04591925420991124 }, { "epoch": 0.19466432131339784, "grad_norm": 4.971782718161779, "learning_rate": 5.8382180539273145e-05, "loss": 7.1755266189575195, "step": 332, "token_acc": 0.04363740940406607 }, { "epoch": 0.19525065963060687, "grad_norm": 4.059838755757486, "learning_rate": 5.85580304806565e-05, "loss": 7.031644344329834, "step": 333, "token_acc": 0.04727827833332024 }, { "epoch": 0.1958369979478159, "grad_norm": 6.9386590373189, "learning_rate": 5.8733880422039853e-05, "loss": 7.099215507507324, "step": 334, "token_acc": 0.045780148259895674 }, { "epoch": 0.19642333626502492, "grad_norm": 2.6967921812083, "learning_rate": 5.890973036342321e-05, "loss": 6.985239028930664, "step": 335, "token_acc": 0.05013862557090652 }, { "epoch": 0.19700967458223395, "grad_norm": 7.550407977501261, "learning_rate": 5.9085580304806555e-05, "loss": 7.04837703704834, "step": 336, "token_acc": 0.04612652287519834 }, { "epoch": 0.19759601289944298, "grad_norm": 5.27521125963567, "learning_rate": 5.926143024618991e-05, "loss": 6.9652886390686035, "step": 337, "token_acc": 0.051093080386549805 }, { "epoch": 0.198182351216652, "grad_norm": 3.6158798804507555, "learning_rate": 5.9437280187573264e-05, "loss": 6.937475204467773, "step": 338, "token_acc": 0.05021848416495554 }, { "epoch": 0.19876868953386104, "grad_norm": 6.222175978899776, "learning_rate": 5.961313012895662e-05, "loss": 7.00045108795166, "step": 339, "token_acc": 0.04891469585423589 }, { "epoch": 0.19935502785107007, "grad_norm": 3.1054397194678796, "learning_rate": 5.9788980070339966e-05, "loss": 6.8834381103515625, "step": 340, "token_acc": 0.0526119639685359 }, { "epoch": 0.1999413661682791, "grad_norm": 4.397389351686467, "learning_rate": 5.996483001172332e-05, "loss": 6.9578094482421875, "step": 341, "token_acc": 0.052191083494864225 }, { "epoch": 0.20052770448548812, "grad_norm": 4.801319634690989, "learning_rate": 6.0140679953106674e-05, "loss": 6.844263076782227, "step": 342, "token_acc": 0.052490422443043486 }, { "epoch": 0.20111404280269715, "grad_norm": 4.767657258047003, "learning_rate": 6.031652989449003e-05, "loss": 6.938053131103516, "step": 343, "token_acc": 0.05282566016014473 }, { "epoch": 0.20170038111990618, "grad_norm": 2.253603185039558, "learning_rate": 6.049237983587339e-05, "loss": 6.819204807281494, "step": 344, "token_acc": 0.05629140825095781 }, { "epoch": 0.2022867194371152, "grad_norm": 4.344731956548509, "learning_rate": 6.0668229777256734e-05, "loss": 6.8875885009765625, "step": 345, "token_acc": 0.05337461936760417 }, { "epoch": 0.20287305775432424, "grad_norm": 2.811385796889861, "learning_rate": 6.0844079718640084e-05, "loss": 6.836453437805176, "step": 346, "token_acc": 0.05516626219443596 }, { "epoch": 0.20345939607153327, "grad_norm": 6.092241745161295, "learning_rate": 6.101992966002344e-05, "loss": 6.767938613891602, "step": 347, "token_acc": 0.058459332297080176 }, { "epoch": 0.2040457343887423, "grad_norm": 3.9864307005875705, "learning_rate": 6.11957796014068e-05, "loss": 6.770617485046387, "step": 348, "token_acc": 0.05724495126315673 }, { "epoch": 0.20463207270595132, "grad_norm": 4.058901327550995, "learning_rate": 6.137162954279014e-05, "loss": 6.76531982421875, "step": 349, "token_acc": 0.06027002080049777 }, { "epoch": 0.20521841102316035, "grad_norm": 4.317791212637859, "learning_rate": 6.15474794841735e-05, "loss": 6.773094177246094, "step": 350, "token_acc": 0.062060392762690104 }, { "epoch": 0.20580474934036938, "grad_norm": 4.934932155666583, "learning_rate": 6.172332942555685e-05, "loss": 6.6705732345581055, "step": 351, "token_acc": 0.06350626808100289 }, { "epoch": 0.2063910876575784, "grad_norm": 3.9845913931971464, "learning_rate": 6.18991793669402e-05, "loss": 6.6311445236206055, "step": 352, "token_acc": 0.06489040217171262 }, { "epoch": 0.20697742597478747, "grad_norm": 5.705320434840597, "learning_rate": 6.207502930832357e-05, "loss": 6.754546165466309, "step": 353, "token_acc": 0.06055406773701478 }, { "epoch": 0.2075637642919965, "grad_norm": 3.9814825127524904, "learning_rate": 6.22508792497069e-05, "loss": 6.669569969177246, "step": 354, "token_acc": 0.0641871223260956 }, { "epoch": 0.20815010260920552, "grad_norm": 2.932258869299944, "learning_rate": 6.242672919109027e-05, "loss": 6.554462432861328, "step": 355, "token_acc": 0.06948275862068966 }, { "epoch": 0.20873644092641455, "grad_norm": 4.052589265141277, "learning_rate": 6.260257913247362e-05, "loss": 6.661326885223389, "step": 356, "token_acc": 0.06407248499617613 }, { "epoch": 0.20932277924362358, "grad_norm": 2.5844799515788988, "learning_rate": 6.277842907385697e-05, "loss": 6.592467308044434, "step": 357, "token_acc": 0.06882969230088795 }, { "epoch": 0.2099091175608326, "grad_norm": 6.63740309439479, "learning_rate": 6.295427901524032e-05, "loss": 6.533319473266602, "step": 358, "token_acc": 0.07381255312297078 }, { "epoch": 0.21049545587804164, "grad_norm": 2.867880931416568, "learning_rate": 6.313012895662367e-05, "loss": 6.496272563934326, "step": 359, "token_acc": 0.07298163480118769 }, { "epoch": 0.21108179419525067, "grad_norm": 6.466092282670102, "learning_rate": 6.330597889800702e-05, "loss": 6.492397308349609, "step": 360, "token_acc": 0.0708952157789965 }, { "epoch": 0.2116681325124597, "grad_norm": 4.081236904055039, "learning_rate": 6.348182883939039e-05, "loss": 6.564023494720459, "step": 361, "token_acc": 0.07080278501869934 }, { "epoch": 0.21225447082966872, "grad_norm": 4.576791166092956, "learning_rate": 6.365767878077374e-05, "loss": 6.539773941040039, "step": 362, "token_acc": 0.07262635282723645 }, { "epoch": 0.21284080914687775, "grad_norm": 3.4615545149694227, "learning_rate": 6.383352872215709e-05, "loss": 6.419950485229492, "step": 363, "token_acc": 0.07525542701156895 }, { "epoch": 0.21342714746408678, "grad_norm": 3.8621845560128123, "learning_rate": 6.400937866354044e-05, "loss": 6.485685348510742, "step": 364, "token_acc": 0.0747659922549649 }, { "epoch": 0.2140134857812958, "grad_norm": 4.310845577707348, "learning_rate": 6.418522860492379e-05, "loss": 6.360536575317383, "step": 365, "token_acc": 0.08212773393915768 }, { "epoch": 0.21459982409850484, "grad_norm": 5.130310485378813, "learning_rate": 6.436107854630714e-05, "loss": 6.332820892333984, "step": 366, "token_acc": 0.08399961197565657 }, { "epoch": 0.21518616241571387, "grad_norm": 3.2627227667341243, "learning_rate": 6.45369284876905e-05, "loss": 6.339569091796875, "step": 367, "token_acc": 0.0825173213119373 }, { "epoch": 0.2157725007329229, "grad_norm": 2.7698115072089378, "learning_rate": 6.471277842907384e-05, "loss": 6.3140997886657715, "step": 368, "token_acc": 0.08358076882747544 }, { "epoch": 0.21635883905013192, "grad_norm": 6.237769593166118, "learning_rate": 6.488862837045721e-05, "loss": 6.258779525756836, "step": 369, "token_acc": 0.0876622606762058 }, { "epoch": 0.21694517736734095, "grad_norm": 3.198082732767584, "learning_rate": 6.506447831184056e-05, "loss": 6.240322589874268, "step": 370, "token_acc": 0.08722307407603236 }, { "epoch": 0.21753151568454998, "grad_norm": 5.419124343030608, "learning_rate": 6.524032825322391e-05, "loss": 6.224976539611816, "step": 371, "token_acc": 0.09026953041111344 }, { "epoch": 0.218117854001759, "grad_norm": 2.691783564878796, "learning_rate": 6.541617819460726e-05, "loss": 6.274032115936279, "step": 372, "token_acc": 0.08869360367395016 }, { "epoch": 0.21870419231896804, "grad_norm": 6.321056109320655, "learning_rate": 6.559202813599061e-05, "loss": 6.243311882019043, "step": 373, "token_acc": 0.08884785339860503 }, { "epoch": 0.21929053063617707, "grad_norm": 3.5963317047768624, "learning_rate": 6.576787807737396e-05, "loss": 6.225800037384033, "step": 374, "token_acc": 0.08838464533626929 }, { "epoch": 0.2198768689533861, "grad_norm": 4.891786070067984, "learning_rate": 6.594372801875733e-05, "loss": 6.2503886222839355, "step": 375, "token_acc": 0.08695025951783629 }, { "epoch": 0.22046320727059512, "grad_norm": 3.3030936613989357, "learning_rate": 6.611957796014067e-05, "loss": 6.132002353668213, "step": 376, "token_acc": 0.09638771588071487 }, { "epoch": 0.22104954558780415, "grad_norm": 5.179555144508683, "learning_rate": 6.629542790152403e-05, "loss": 6.224666595458984, "step": 377, "token_acc": 0.09155244630836488 }, { "epoch": 0.22163588390501318, "grad_norm": 4.359283222832193, "learning_rate": 6.647127784290738e-05, "loss": 6.115947246551514, "step": 378, "token_acc": 0.09782691594585567 }, { "epoch": 0.2222222222222222, "grad_norm": 3.2069770676956164, "learning_rate": 6.664712778429073e-05, "loss": 6.088150978088379, "step": 379, "token_acc": 0.09868489873210612 }, { "epoch": 0.22280856053943124, "grad_norm": 5.21906238950017, "learning_rate": 6.682297772567408e-05, "loss": 6.070379257202148, "step": 380, "token_acc": 0.09922988287031165 }, { "epoch": 0.2233948988566403, "grad_norm": 3.671361985579552, "learning_rate": 6.699882766705743e-05, "loss": 6.007410049438477, "step": 381, "token_acc": 0.10148445547954252 }, { "epoch": 0.22398123717384932, "grad_norm": 3.6539275815743, "learning_rate": 6.71746776084408e-05, "loss": 5.959519386291504, "step": 382, "token_acc": 0.10463915681433661 }, { "epoch": 0.22456757549105835, "grad_norm": 3.484047399329108, "learning_rate": 6.735052754982415e-05, "loss": 5.968883514404297, "step": 383, "token_acc": 0.10419276129566506 }, { "epoch": 0.22515391380826738, "grad_norm": 3.7392798077416334, "learning_rate": 6.75263774912075e-05, "loss": 5.93845272064209, "step": 384, "token_acc": 0.107687439169029 }, { "epoch": 0.2257402521254764, "grad_norm": 5.104027979643277, "learning_rate": 6.770222743259085e-05, "loss": 5.954387664794922, "step": 385, "token_acc": 0.10229585508398396 }, { "epoch": 0.22632659044268544, "grad_norm": 3.2295895983045044, "learning_rate": 6.78780773739742e-05, "loss": 5.954471588134766, "step": 386, "token_acc": 0.10523225769795812 }, { "epoch": 0.22691292875989447, "grad_norm": 3.902262975674632, "learning_rate": 6.805392731535755e-05, "loss": 5.940810203552246, "step": 387, "token_acc": 0.10337094696979714 }, { "epoch": 0.2274992670771035, "grad_norm": 4.215680276073613, "learning_rate": 6.822977725674092e-05, "loss": 5.91622257232666, "step": 388, "token_acc": 0.11124847764272017 }, { "epoch": 0.22808560539431252, "grad_norm": 4.02224878232534, "learning_rate": 6.840562719812425e-05, "loss": 5.936064720153809, "step": 389, "token_acc": 0.10599353059489497 }, { "epoch": 0.22867194371152155, "grad_norm": 4.400738551378376, "learning_rate": 6.858147713950762e-05, "loss": 5.819920063018799, "step": 390, "token_acc": 0.11496178107801813 }, { "epoch": 0.22925828202873058, "grad_norm": 3.106468815757425, "learning_rate": 6.875732708089097e-05, "loss": 5.793968200683594, "step": 391, "token_acc": 0.11380289997024727 }, { "epoch": 0.2298446203459396, "grad_norm": 4.270459329469786, "learning_rate": 6.893317702227432e-05, "loss": 5.831222057342529, "step": 392, "token_acc": 0.1124405064729474 }, { "epoch": 0.23043095866314864, "grad_norm": 3.667073317503492, "learning_rate": 6.910902696365767e-05, "loss": 5.7944536209106445, "step": 393, "token_acc": 0.11555909335349523 }, { "epoch": 0.23101729698035767, "grad_norm": 4.715840322471299, "learning_rate": 6.928487690504102e-05, "loss": 5.698849678039551, "step": 394, "token_acc": 0.11793997132776834 }, { "epoch": 0.2316036352975667, "grad_norm": 3.5271256105819915, "learning_rate": 6.946072684642437e-05, "loss": 5.790862083435059, "step": 395, "token_acc": 0.11205064554684319 }, { "epoch": 0.23218997361477572, "grad_norm": 4.123248336581656, "learning_rate": 6.963657678780774e-05, "loss": 5.726106643676758, "step": 396, "token_acc": 0.122620957309185 }, { "epoch": 0.23277631193198475, "grad_norm": 4.601396240828345, "learning_rate": 6.981242672919109e-05, "loss": 5.763605117797852, "step": 397, "token_acc": 0.11514154754465278 }, { "epoch": 0.23336265024919378, "grad_norm": 4.1784116657872366, "learning_rate": 6.998827667057444e-05, "loss": 5.816640853881836, "step": 398, "token_acc": 0.11001271044238153 }, { "epoch": 0.2339489885664028, "grad_norm": 4.303445038580127, "learning_rate": 7.016412661195779e-05, "loss": 5.6869001388549805, "step": 399, "token_acc": 0.1205205334365583 }, { "epoch": 0.23453532688361184, "grad_norm": 3.483165080467426, "learning_rate": 7.033997655334114e-05, "loss": 5.675357818603516, "step": 400, "token_acc": 0.12406015037593984 }, { "epoch": 0.23512166520082087, "grad_norm": 4.513334435815259, "learning_rate": 7.051582649472449e-05, "loss": 5.637521266937256, "step": 401, "token_acc": 0.12292262927553806 }, { "epoch": 0.2357080035180299, "grad_norm": 4.018562496654571, "learning_rate": 7.069167643610786e-05, "loss": 5.609130382537842, "step": 402, "token_acc": 0.1258798253374222 }, { "epoch": 0.23629434183523892, "grad_norm": 4.310385199445542, "learning_rate": 7.08675263774912e-05, "loss": 5.596505165100098, "step": 403, "token_acc": 0.12747931954290223 }, { "epoch": 0.23688068015244795, "grad_norm": 3.6719739018035673, "learning_rate": 7.104337631887456e-05, "loss": 5.603360176086426, "step": 404, "token_acc": 0.12725657856703648 }, { "epoch": 0.23746701846965698, "grad_norm": 5.107645944979772, "learning_rate": 7.121922626025791e-05, "loss": 5.512840270996094, "step": 405, "token_acc": 0.13123216097307128 }, { "epoch": 0.238053356786866, "grad_norm": 3.4242288333948783, "learning_rate": 7.139507620164126e-05, "loss": 5.566375732421875, "step": 406, "token_acc": 0.12973804470499303 }, { "epoch": 0.23863969510407504, "grad_norm": 5.634245072157221, "learning_rate": 7.157092614302461e-05, "loss": 5.549043655395508, "step": 407, "token_acc": 0.13178647372061392 }, { "epoch": 0.23922603342128407, "grad_norm": 3.1258971889919236, "learning_rate": 7.174677608440796e-05, "loss": 5.5727949142456055, "step": 408, "token_acc": 0.12761968317523872 }, { "epoch": 0.23981237173849312, "grad_norm": 5.159396424110232, "learning_rate": 7.192262602579131e-05, "loss": 5.5011444091796875, "step": 409, "token_acc": 0.1339217887080706 }, { "epoch": 0.24039871005570215, "grad_norm": 3.651002171915324, "learning_rate": 7.209847596717468e-05, "loss": 5.50866174697876, "step": 410, "token_acc": 0.13396123861211842 }, { "epoch": 0.24098504837291118, "grad_norm": 3.511854270534502, "learning_rate": 7.227432590855801e-05, "loss": 5.360819339752197, "step": 411, "token_acc": 0.1451115018560384 }, { "epoch": 0.2415713866901202, "grad_norm": 3.8741646298104424, "learning_rate": 7.245017584994138e-05, "loss": 5.452428817749023, "step": 412, "token_acc": 0.1357357762727176 }, { "epoch": 0.24215772500732924, "grad_norm": 4.140739124810105, "learning_rate": 7.262602579132473e-05, "loss": 5.4548258781433105, "step": 413, "token_acc": 0.13496192185614472 }, { "epoch": 0.24274406332453827, "grad_norm": 3.6336401396883113, "learning_rate": 7.280187573270808e-05, "loss": 5.409458160400391, "step": 414, "token_acc": 0.1404517432057197 }, { "epoch": 0.2433304016417473, "grad_norm": 4.455997416997323, "learning_rate": 7.297772567409144e-05, "loss": 5.440122127532959, "step": 415, "token_acc": 0.13411181413925255 }, { "epoch": 0.24391673995895632, "grad_norm": 3.997954407390372, "learning_rate": 7.315357561547478e-05, "loss": 5.495820045471191, "step": 416, "token_acc": 0.13153217390851768 }, { "epoch": 0.24450307827616535, "grad_norm": 2.6517978433218174, "learning_rate": 7.332942555685815e-05, "loss": 5.326896667480469, "step": 417, "token_acc": 0.14230757201388672 }, { "epoch": 0.24508941659337438, "grad_norm": 4.808739982288075, "learning_rate": 7.35052754982415e-05, "loss": 5.362764835357666, "step": 418, "token_acc": 0.1434285236929573 }, { "epoch": 0.2456757549105834, "grad_norm": 4.112335927290701, "learning_rate": 7.368112543962485e-05, "loss": 5.388121604919434, "step": 419, "token_acc": 0.13707269712610917 }, { "epoch": 0.24626209322779244, "grad_norm": 4.197959879073062, "learning_rate": 7.38569753810082e-05, "loss": 5.376265525817871, "step": 420, "token_acc": 0.1362363219270727 }, { "epoch": 0.24684843154500147, "grad_norm": 3.5180398232718897, "learning_rate": 7.403282532239155e-05, "loss": 5.385644435882568, "step": 421, "token_acc": 0.14158413430974454 }, { "epoch": 0.2474347698622105, "grad_norm": 3.4469219080041453, "learning_rate": 7.42086752637749e-05, "loss": 5.353095054626465, "step": 422, "token_acc": 0.14258298020324836 }, { "epoch": 0.24802110817941952, "grad_norm": 5.808916479823406, "learning_rate": 7.438452520515827e-05, "loss": 5.38712739944458, "step": 423, "token_acc": 0.13573487786979768 }, { "epoch": 0.24860744649662855, "grad_norm": 2.6387248735386915, "learning_rate": 7.456037514654162e-05, "loss": 5.203732490539551, "step": 424, "token_acc": 0.15196587265665865 }, { "epoch": 0.24919378481383758, "grad_norm": 5.867979613408349, "learning_rate": 7.473622508792497e-05, "loss": 5.249689102172852, "step": 425, "token_acc": 0.15209264427618951 }, { "epoch": 0.2497801231310466, "grad_norm": 3.5276269628564294, "learning_rate": 7.491207502930832e-05, "loss": 5.266202449798584, "step": 426, "token_acc": 0.14987937820826758 }, { "epoch": 0.25036646144825564, "grad_norm": 3.946052632053937, "learning_rate": 7.508792497069167e-05, "loss": 5.274470806121826, "step": 427, "token_acc": 0.14590072504182933 }, { "epoch": 0.2509527997654647, "grad_norm": 3.662742158072335, "learning_rate": 7.526377491207502e-05, "loss": 5.274651050567627, "step": 428, "token_acc": 0.14401235857865766 }, { "epoch": 0.2515391380826737, "grad_norm": 4.264731892009234, "learning_rate": 7.543962485345838e-05, "loss": 5.256073951721191, "step": 429, "token_acc": 0.15129322948510063 }, { "epoch": 0.25212547639988275, "grad_norm": 2.697176414371289, "learning_rate": 7.561547479484174e-05, "loss": 5.273626804351807, "step": 430, "token_acc": 0.147494960395838 }, { "epoch": 0.25271181471709175, "grad_norm": 3.4506169814872174, "learning_rate": 7.579132473622507e-05, "loss": 5.143132209777832, "step": 431, "token_acc": 0.15700888715082084 }, { "epoch": 0.2532981530343008, "grad_norm": 4.122652348529465, "learning_rate": 7.596717467760842e-05, "loss": 5.217945098876953, "step": 432, "token_acc": 0.1505449056450857 }, { "epoch": 0.2538844913515098, "grad_norm": 2.290202740182317, "learning_rate": 7.614302461899179e-05, "loss": 5.189580917358398, "step": 433, "token_acc": 0.15147783566518605 }, { "epoch": 0.25447082966871887, "grad_norm": 6.086241792160422, "learning_rate": 7.631887456037514e-05, "loss": 5.193511962890625, "step": 434, "token_acc": 0.14781076057130063 }, { "epoch": 0.25505716798592787, "grad_norm": 3.937639776278552, "learning_rate": 7.649472450175849e-05, "loss": 5.153887748718262, "step": 435, "token_acc": 0.15459014119955417 }, { "epoch": 0.2556435063031369, "grad_norm": 3.5701669774395466, "learning_rate": 7.667057444314184e-05, "loss": 5.151963233947754, "step": 436, "token_acc": 0.1553613392837893 }, { "epoch": 0.2562298446203459, "grad_norm": 4.321933004097814, "learning_rate": 7.68464243845252e-05, "loss": 5.219295501708984, "step": 437, "token_acc": 0.14973951439553573 }, { "epoch": 0.256816182937555, "grad_norm": 3.6446538960209045, "learning_rate": 7.702227432590856e-05, "loss": 5.178834438323975, "step": 438, "token_acc": 0.15094032488008002 }, { "epoch": 0.257402521254764, "grad_norm": 4.215823977064856, "learning_rate": 7.71981242672919e-05, "loss": 5.084681987762451, "step": 439, "token_acc": 0.1602725085422162 }, { "epoch": 0.25798885957197304, "grad_norm": 2.082208848443483, "learning_rate": 7.737397420867524e-05, "loss": 5.073691368103027, "step": 440, "token_acc": 0.16210726514031104 }, { "epoch": 0.25857519788918204, "grad_norm": 4.609460695706422, "learning_rate": 7.754982415005861e-05, "loss": 5.116462707519531, "step": 441, "token_acc": 0.15574350719150967 }, { "epoch": 0.2591615362063911, "grad_norm": 3.525751510753823, "learning_rate": 7.772567409144196e-05, "loss": 5.100196838378906, "step": 442, "token_acc": 0.15798017903801853 }, { "epoch": 0.2597478745236001, "grad_norm": 4.617431093703994, "learning_rate": 7.790152403282531e-05, "loss": 5.1339311599731445, "step": 443, "token_acc": 0.15189041921971075 }, { "epoch": 0.26033421284080915, "grad_norm": 3.017863480202891, "learning_rate": 7.807737397420867e-05, "loss": 5.0896759033203125, "step": 444, "token_acc": 0.15756112126815827 }, { "epoch": 0.26092055115801815, "grad_norm": 4.171071570802497, "learning_rate": 7.825322391559203e-05, "loss": 5.082404613494873, "step": 445, "token_acc": 0.15682236355408383 }, { "epoch": 0.2615068894752272, "grad_norm": 3.3636820336741913, "learning_rate": 7.842907385697538e-05, "loss": 5.070326805114746, "step": 446, "token_acc": 0.15744733466252453 }, { "epoch": 0.2620932277924362, "grad_norm": 3.3012431566827765, "learning_rate": 7.860492379835873e-05, "loss": 5.013023376464844, "step": 447, "token_acc": 0.1633589272139496 }, { "epoch": 0.26267956610964527, "grad_norm": 3.845746490923582, "learning_rate": 7.878077373974209e-05, "loss": 5.1147918701171875, "step": 448, "token_acc": 0.1536516467588527 }, { "epoch": 0.26326590442685427, "grad_norm": 3.5345488842199626, "learning_rate": 7.895662368112543e-05, "loss": 4.982331275939941, "step": 449, "token_acc": 0.1635154152537532 }, { "epoch": 0.2638522427440633, "grad_norm": 3.3299318537006655, "learning_rate": 7.913247362250878e-05, "loss": 5.0843048095703125, "step": 450, "token_acc": 0.1566409435481122 }, { "epoch": 0.2644385810612723, "grad_norm": 3.8323982942537427, "learning_rate": 7.930832356389213e-05, "loss": 5.060731410980225, "step": 451, "token_acc": 0.15935043929874754 }, { "epoch": 0.2650249193784814, "grad_norm": 4.121507885756696, "learning_rate": 7.94841735052755e-05, "loss": 5.041284084320068, "step": 452, "token_acc": 0.16059973702967037 }, { "epoch": 0.26561125769569044, "grad_norm": 3.5580531202180463, "learning_rate": 7.966002344665885e-05, "loss": 4.961620330810547, "step": 453, "token_acc": 0.16602636059784323 }, { "epoch": 0.26619759601289944, "grad_norm": 3.0163193561626094, "learning_rate": 7.98358733880422e-05, "loss": 4.878084659576416, "step": 454, "token_acc": 0.17350987909780022 }, { "epoch": 0.2667839343301085, "grad_norm": 3.431317820756719, "learning_rate": 8.001172332942555e-05, "loss": 5.0134687423706055, "step": 455, "token_acc": 0.16012941459456173 }, { "epoch": 0.2673702726473175, "grad_norm": 2.705360648318704, "learning_rate": 8.018757327080891e-05, "loss": 4.891533374786377, "step": 456, "token_acc": 0.17029746225505943 }, { "epoch": 0.26795661096452655, "grad_norm": 5.1314399238445985, "learning_rate": 8.036342321219226e-05, "loss": 4.998048782348633, "step": 457, "token_acc": 0.16485212848348013 }, { "epoch": 0.26854294928173555, "grad_norm": 2.3647801017291785, "learning_rate": 8.05392731535756e-05, "loss": 4.95503568649292, "step": 458, "token_acc": 0.16324241382047144 }, { "epoch": 0.2691292875989446, "grad_norm": 5.2296717763972715, "learning_rate": 8.071512309495895e-05, "loss": 5.009551048278809, "step": 459, "token_acc": 0.1602560937901933 }, { "epoch": 0.2697156259161536, "grad_norm": 2.9545150306653385, "learning_rate": 8.089097303634232e-05, "loss": 4.97419548034668, "step": 460, "token_acc": 0.16263180369871563 }, { "epoch": 0.27030196423336267, "grad_norm": 4.945318371027142, "learning_rate": 8.106682297772567e-05, "loss": 5.022619724273682, "step": 461, "token_acc": 0.15620776778894177 }, { "epoch": 0.27088830255057167, "grad_norm": 3.397431079817508, "learning_rate": 8.124267291910902e-05, "loss": 4.894867897033691, "step": 462, "token_acc": 0.17118931692385772 }, { "epoch": 0.2714746408677807, "grad_norm": 4.150211893682385, "learning_rate": 8.141852286049237e-05, "loss": 4.851693153381348, "step": 463, "token_acc": 0.1745461052905577 }, { "epoch": 0.2720609791849897, "grad_norm": 3.0270975998642644, "learning_rate": 8.159437280187573e-05, "loss": 4.882989883422852, "step": 464, "token_acc": 0.1707711409927879 }, { "epoch": 0.2726473175021988, "grad_norm": 2.977653736872163, "learning_rate": 8.177022274325908e-05, "loss": 4.971857070922852, "step": 465, "token_acc": 0.16176777654636276 }, { "epoch": 0.2732336558194078, "grad_norm": 2.944303587003063, "learning_rate": 8.194607268464243e-05, "loss": 4.874970436096191, "step": 466, "token_acc": 0.16954215285843582 }, { "epoch": 0.27381999413661684, "grad_norm": 3.1896761394478617, "learning_rate": 8.212192262602577e-05, "loss": 4.909939289093018, "step": 467, "token_acc": 0.165068006112229 }, { "epoch": 0.27440633245382584, "grad_norm": 4.293542865739274, "learning_rate": 8.229777256740914e-05, "loss": 4.90077018737793, "step": 468, "token_acc": 0.17014762781262913 }, { "epoch": 0.2749926707710349, "grad_norm": 3.1330889330046467, "learning_rate": 8.247362250879249e-05, "loss": 4.843530654907227, "step": 469, "token_acc": 0.16903433113478775 }, { "epoch": 0.2755790090882439, "grad_norm": 4.1621931176916265, "learning_rate": 8.264947245017584e-05, "loss": 4.883255958557129, "step": 470, "token_acc": 0.1669339839505141 }, { "epoch": 0.27616534740545295, "grad_norm": 3.8399961219958905, "learning_rate": 8.282532239155919e-05, "loss": 4.845980644226074, "step": 471, "token_acc": 0.1738849117154105 }, { "epoch": 0.27675168572266196, "grad_norm": 2.9752735036838027, "learning_rate": 8.300117233294255e-05, "loss": 4.86611270904541, "step": 472, "token_acc": 0.16837167979722756 }, { "epoch": 0.277338024039871, "grad_norm": 4.740099255306814, "learning_rate": 8.31770222743259e-05, "loss": 4.873641490936279, "step": 473, "token_acc": 0.17010540902164664 }, { "epoch": 0.27792436235708, "grad_norm": 2.470868235822677, "learning_rate": 8.335287221570926e-05, "loss": 4.788944244384766, "step": 474, "token_acc": 0.17623830305281293 }, { "epoch": 0.27851070067428907, "grad_norm": 4.622013550828369, "learning_rate": 8.352872215709262e-05, "loss": 4.74179744720459, "step": 475, "token_acc": 0.17933142703676203 }, { "epoch": 0.27909703899149807, "grad_norm": 2.8544668474720765, "learning_rate": 8.370457209847596e-05, "loss": 4.844178676605225, "step": 476, "token_acc": 0.17099172926625691 }, { "epoch": 0.2796833773087071, "grad_norm": 3.7744684942917344, "learning_rate": 8.388042203985931e-05, "loss": 4.800683975219727, "step": 477, "token_acc": 0.17339959664316332 }, { "epoch": 0.2802697156259161, "grad_norm": 3.405329217039666, "learning_rate": 8.405627198124266e-05, "loss": 4.812726020812988, "step": 478, "token_acc": 0.1753202502074898 }, { "epoch": 0.2808560539431252, "grad_norm": 4.9716612594809115, "learning_rate": 8.423212192262602e-05, "loss": 4.8752336502075195, "step": 479, "token_acc": 0.16625593114356127 }, { "epoch": 0.28144239226033424, "grad_norm": 3.3820338181300476, "learning_rate": 8.440797186400937e-05, "loss": 4.74002742767334, "step": 480, "token_acc": 0.17751355588946233 }, { "epoch": 0.28202873057754324, "grad_norm": 3.9900300042135575, "learning_rate": 8.458382180539273e-05, "loss": 4.752828121185303, "step": 481, "token_acc": 0.1753159773799286 }, { "epoch": 0.2826150688947523, "grad_norm": 3.4269369834902768, "learning_rate": 8.475967174677608e-05, "loss": 4.7765607833862305, "step": 482, "token_acc": 0.17440990121973074 }, { "epoch": 0.2832014072119613, "grad_norm": 4.231019460263849, "learning_rate": 8.493552168815944e-05, "loss": 4.792545318603516, "step": 483, "token_acc": 0.17406913421442466 }, { "epoch": 0.28378774552917035, "grad_norm": 3.139852211027999, "learning_rate": 8.511137162954279e-05, "loss": 4.7755303382873535, "step": 484, "token_acc": 0.17380265764877958 }, { "epoch": 0.28437408384637936, "grad_norm": 3.592729782124692, "learning_rate": 8.528722157092613e-05, "loss": 4.770051956176758, "step": 485, "token_acc": 0.17371479612439794 }, { "epoch": 0.2849604221635884, "grad_norm": 3.6187401118272366, "learning_rate": 8.546307151230948e-05, "loss": 4.7032694816589355, "step": 486, "token_acc": 0.1809716962696539 }, { "epoch": 0.2855467604807974, "grad_norm": 3.0045234882401974, "learning_rate": 8.563892145369284e-05, "loss": 4.725955963134766, "step": 487, "token_acc": 0.17817942676699738 }, { "epoch": 0.28613309879800647, "grad_norm": 4.53269932950948, "learning_rate": 8.58147713950762e-05, "loss": 4.784139633178711, "step": 488, "token_acc": 0.1743480132779322 }, { "epoch": 0.28671943711521547, "grad_norm": 2.6017795957109873, "learning_rate": 8.599062133645955e-05, "loss": 4.644981384277344, "step": 489, "token_acc": 0.18411112787487527 }, { "epoch": 0.2873057754324245, "grad_norm": 4.297472803697688, "learning_rate": 8.61664712778429e-05, "loss": 4.752431869506836, "step": 490, "token_acc": 0.17491059019496108 }, { "epoch": 0.2878921137496335, "grad_norm": 2.6511703891836964, "learning_rate": 8.634232121922626e-05, "loss": 4.645848274230957, "step": 491, "token_acc": 0.1850725632069195 }, { "epoch": 0.2884784520668426, "grad_norm": 4.107964009200134, "learning_rate": 8.651817116060961e-05, "loss": 4.729785919189453, "step": 492, "token_acc": 0.17807644837625158 }, { "epoch": 0.2890647903840516, "grad_norm": 2.9920451555349845, "learning_rate": 8.669402110199296e-05, "loss": 4.704058647155762, "step": 493, "token_acc": 0.18044468256094295 }, { "epoch": 0.28965112870126064, "grad_norm": 2.9469871810814134, "learning_rate": 8.68698710433763e-05, "loss": 4.713854789733887, "step": 494, "token_acc": 0.17756162788189878 }, { "epoch": 0.29023746701846964, "grad_norm": 4.06770303189972, "learning_rate": 8.704572098475966e-05, "loss": 4.729300498962402, "step": 495, "token_acc": 0.17551730736409854 }, { "epoch": 0.2908238053356787, "grad_norm": 2.7396804316485883, "learning_rate": 8.722157092614302e-05, "loss": 4.679933071136475, "step": 496, "token_acc": 0.17935259463524694 }, { "epoch": 0.2914101436528877, "grad_norm": 2.384751615316691, "learning_rate": 8.739742086752637e-05, "loss": 4.717489242553711, "step": 497, "token_acc": 0.1777613188759721 }, { "epoch": 0.29199648197009676, "grad_norm": 4.735842023000457, "learning_rate": 8.757327080890972e-05, "loss": 4.666316032409668, "step": 498, "token_acc": 0.18178123094720716 }, { "epoch": 0.29258282028730576, "grad_norm": 3.0220471186635507, "learning_rate": 8.774912075029308e-05, "loss": 4.643878936767578, "step": 499, "token_acc": 0.18439965365922964 }, { "epoch": 0.2931691586045148, "grad_norm": 4.441331901875158, "learning_rate": 8.792497069167643e-05, "loss": 4.796988487243652, "step": 500, "token_acc": 0.16914572075667747 }, { "epoch": 0.2937554969217238, "grad_norm": 2.7759859802199425, "learning_rate": 8.810082063305978e-05, "loss": 4.621725082397461, "step": 501, "token_acc": 0.18621704262064828 }, { "epoch": 0.29434183523893287, "grad_norm": 3.667398896824923, "learning_rate": 8.827667057444315e-05, "loss": 4.643711090087891, "step": 502, "token_acc": 0.18315541211519365 }, { "epoch": 0.29492817355614187, "grad_norm": 3.627269345593771, "learning_rate": 8.845252051582649e-05, "loss": 4.585537910461426, "step": 503, "token_acc": 0.19074013340669527 }, { "epoch": 0.2955145118733509, "grad_norm": 2.589392057406107, "learning_rate": 8.862837045720984e-05, "loss": 4.666231155395508, "step": 504, "token_acc": 0.17902322230917128 }, { "epoch": 0.2961008501905599, "grad_norm": 3.8257322006077916, "learning_rate": 8.880422039859319e-05, "loss": 4.708887100219727, "step": 505, "token_acc": 0.1744964024146941 }, { "epoch": 0.296687188507769, "grad_norm": 2.6419895463335266, "learning_rate": 8.898007033997654e-05, "loss": 4.63877010345459, "step": 506, "token_acc": 0.18197642962997812 }, { "epoch": 0.297273526824978, "grad_norm": 3.9906103928536814, "learning_rate": 8.91559202813599e-05, "loss": 4.671177387237549, "step": 507, "token_acc": 0.1801969474719116 }, { "epoch": 0.29785986514218704, "grad_norm": 3.152919983469968, "learning_rate": 8.933177022274325e-05, "loss": 4.592166423797607, "step": 508, "token_acc": 0.18710155788229335 }, { "epoch": 0.2984462034593961, "grad_norm": 3.655488570590753, "learning_rate": 8.95076201641266e-05, "loss": 4.644621849060059, "step": 509, "token_acc": 0.18176177812277072 }, { "epoch": 0.2990325417766051, "grad_norm": 2.750159953084596, "learning_rate": 8.968347010550997e-05, "loss": 4.562835693359375, "step": 510, "token_acc": 0.18830252762930944 }, { "epoch": 0.29961888009381415, "grad_norm": 2.4937239782422487, "learning_rate": 8.98593200468933e-05, "loss": 4.600914001464844, "step": 511, "token_acc": 0.1866240176996692 }, { "epoch": 0.30020521841102316, "grad_norm": 3.957279725521061, "learning_rate": 9.003516998827666e-05, "loss": 4.687961101531982, "step": 512, "token_acc": 0.17401908059621032 }, { "epoch": 0.3007915567282322, "grad_norm": 3.1854409145444658, "learning_rate": 9.021101992966001e-05, "loss": 4.646778106689453, "step": 513, "token_acc": 0.1782020477069379 }, { "epoch": 0.3013778950454412, "grad_norm": 3.0475452320689795, "learning_rate": 9.038686987104337e-05, "loss": 4.586252212524414, "step": 514, "token_acc": 0.18421430642140973 }, { "epoch": 0.30196423336265027, "grad_norm": 4.192017323997992, "learning_rate": 9.056271981242672e-05, "loss": 4.605769634246826, "step": 515, "token_acc": 0.1835988645528915 }, { "epoch": 0.30255057167985927, "grad_norm": 2.647703459193893, "learning_rate": 9.073856975381007e-05, "loss": 4.662216663360596, "step": 516, "token_acc": 0.1794005688948381 }, { "epoch": 0.3031369099970683, "grad_norm": 3.863217520699813, "learning_rate": 9.091441969519342e-05, "loss": 4.577106475830078, "step": 517, "token_acc": 0.1861207437025398 }, { "epoch": 0.3037232483142773, "grad_norm": 2.5968035759892323, "learning_rate": 9.109026963657679e-05, "loss": 4.558673858642578, "step": 518, "token_acc": 0.18617459820496765 }, { "epoch": 0.3043095866314864, "grad_norm": 4.1088138179372855, "learning_rate": 9.126611957796014e-05, "loss": 4.632473945617676, "step": 519, "token_acc": 0.1817299846101913 }, { "epoch": 0.3048959249486954, "grad_norm": 2.8407541010955017, "learning_rate": 9.144196951934348e-05, "loss": 4.575881481170654, "step": 520, "token_acc": 0.1831301395875154 }, { "epoch": 0.30548226326590444, "grad_norm": 3.525659096633153, "learning_rate": 9.161781946072683e-05, "loss": 4.6428117752075195, "step": 521, "token_acc": 0.1788675985954233 }, { "epoch": 0.30606860158311344, "grad_norm": 3.491119122494435, "learning_rate": 9.179366940211019e-05, "loss": 4.5430474281311035, "step": 522, "token_acc": 0.188283048651937 }, { "epoch": 0.3066549399003225, "grad_norm": 2.1428754929118288, "learning_rate": 9.196951934349354e-05, "loss": 4.5450439453125, "step": 523, "token_acc": 0.18679540229885058 }, { "epoch": 0.3072412782175315, "grad_norm": 3.0113952438674327, "learning_rate": 9.21453692848769e-05, "loss": 4.616668701171875, "step": 524, "token_acc": 0.1794623708766547 }, { "epoch": 0.30782761653474056, "grad_norm": 3.6984238294448684, "learning_rate": 9.232121922626025e-05, "loss": 4.533224582672119, "step": 525, "token_acc": 0.18899855325414847 }, { "epoch": 0.30841395485194956, "grad_norm": 3.3527459968983324, "learning_rate": 9.249706916764361e-05, "loss": 4.5262064933776855, "step": 526, "token_acc": 0.1895146528468351 }, { "epoch": 0.3090002931691586, "grad_norm": 2.9330124270233995, "learning_rate": 9.267291910902696e-05, "loss": 4.551050662994385, "step": 527, "token_acc": 0.1870796960986278 }, { "epoch": 0.3095866314863676, "grad_norm": 3.1240841176918908, "learning_rate": 9.284876905041031e-05, "loss": 4.535248756408691, "step": 528, "token_acc": 0.189337813691762 }, { "epoch": 0.31017296980357667, "grad_norm": 2.5427880866609045, "learning_rate": 9.302461899179365e-05, "loss": 4.5670037269592285, "step": 529, "token_acc": 0.18567923509948453 }, { "epoch": 0.31075930812078567, "grad_norm": 3.3224454254455917, "learning_rate": 9.320046893317701e-05, "loss": 4.515107154846191, "step": 530, "token_acc": 0.18920715739291882 }, { "epoch": 0.3113456464379947, "grad_norm": 3.0015687472127315, "learning_rate": 9.337631887456036e-05, "loss": 4.516177177429199, "step": 531, "token_acc": 0.1877613323520593 }, { "epoch": 0.31193198475520373, "grad_norm": 3.8973902201992665, "learning_rate": 9.355216881594372e-05, "loss": 4.48250675201416, "step": 532, "token_acc": 0.19162787497386688 }, { "epoch": 0.3125183230724128, "grad_norm": 3.5247420708446597, "learning_rate": 9.372801875732707e-05, "loss": 4.455173492431641, "step": 533, "token_acc": 0.19637005033275398 }, { "epoch": 0.3131046613896218, "grad_norm": 3.1246786854744526, "learning_rate": 9.390386869871043e-05, "loss": 4.451881408691406, "step": 534, "token_acc": 0.19472246072902177 }, { "epoch": 0.31369099970683084, "grad_norm": 3.4385940497905843, "learning_rate": 9.407971864009378e-05, "loss": 4.48879337310791, "step": 535, "token_acc": 0.19036605683808225 }, { "epoch": 0.3142773380240399, "grad_norm": 2.597097818650952, "learning_rate": 9.425556858147713e-05, "loss": 4.520144462585449, "step": 536, "token_acc": 0.18691259070500538 }, { "epoch": 0.3148636763412489, "grad_norm": 3.4226212241298195, "learning_rate": 9.44314185228605e-05, "loss": 4.540506362915039, "step": 537, "token_acc": 0.18508584126606056 }, { "epoch": 0.31545001465845796, "grad_norm": 2.4824914064908863, "learning_rate": 9.460726846424383e-05, "loss": 4.465728282928467, "step": 538, "token_acc": 0.1915556646554954 }, { "epoch": 0.31603635297566696, "grad_norm": 2.882341396541977, "learning_rate": 9.478311840562719e-05, "loss": 4.470610618591309, "step": 539, "token_acc": 0.19236570611700032 }, { "epoch": 0.316622691292876, "grad_norm": 2.8894471474624734, "learning_rate": 9.495896834701054e-05, "loss": 4.502852439880371, "step": 540, "token_acc": 0.19015903572175072 }, { "epoch": 0.317209029610085, "grad_norm": 3.023208244491889, "learning_rate": 9.51348182883939e-05, "loss": 4.464916229248047, "step": 541, "token_acc": 0.19146902867502907 }, { "epoch": 0.31779536792729407, "grad_norm": 2.6554894722122824, "learning_rate": 9.531066822977725e-05, "loss": 4.520737171173096, "step": 542, "token_acc": 0.1874107119148145 }, { "epoch": 0.31838170624450307, "grad_norm": 2.6524765319037744, "learning_rate": 9.54865181711606e-05, "loss": 4.5033159255981445, "step": 543, "token_acc": 0.18648980221769396 }, { "epoch": 0.3189680445617121, "grad_norm": 4.00175608469235, "learning_rate": 9.566236811254395e-05, "loss": 4.572734832763672, "step": 544, "token_acc": 0.18185341252883927 }, { "epoch": 0.31955438287892113, "grad_norm": 1.9363148550412852, "learning_rate": 9.583821805392732e-05, "loss": 4.3937578201293945, "step": 545, "token_acc": 0.19903716700733623 }, { "epoch": 0.3201407211961302, "grad_norm": 4.791396426586729, "learning_rate": 9.601406799531067e-05, "loss": 4.483650207519531, "step": 546, "token_acc": 0.18751971428280712 }, { "epoch": 0.3207270595133392, "grad_norm": 2.479850389374486, "learning_rate": 9.6189917936694e-05, "loss": 4.4839067459106445, "step": 547, "token_acc": 0.19110425840509074 }, { "epoch": 0.32131339783054824, "grad_norm": 4.057648818161033, "learning_rate": 9.636576787807736e-05, "loss": 4.555606842041016, "step": 548, "token_acc": 0.18468672920757026 }, { "epoch": 0.32189973614775724, "grad_norm": 3.0281361500272883, "learning_rate": 9.654161781946072e-05, "loss": 4.48748779296875, "step": 549, "token_acc": 0.1893791359047858 }, { "epoch": 0.3224860744649663, "grad_norm": 2.533216036098531, "learning_rate": 9.671746776084407e-05, "loss": 4.413464546203613, "step": 550, "token_acc": 0.19599664254347804 }, { "epoch": 0.3230724127821753, "grad_norm": 3.4476353025632274, "learning_rate": 9.689331770222742e-05, "loss": 4.490583419799805, "step": 551, "token_acc": 0.18889616934157516 }, { "epoch": 0.32365875109938436, "grad_norm": 2.2956474462733985, "learning_rate": 9.706916764361077e-05, "loss": 4.4333600997924805, "step": 552, "token_acc": 0.19354196538820573 }, { "epoch": 0.32424508941659336, "grad_norm": 3.4186757712042337, "learning_rate": 9.724501758499414e-05, "loss": 4.487241744995117, "step": 553, "token_acc": 0.1880563631111547 }, { "epoch": 0.3248314277338024, "grad_norm": 3.0509457750185494, "learning_rate": 9.742086752637749e-05, "loss": 4.4048943519592285, "step": 554, "token_acc": 0.19973247110884276 }, { "epoch": 0.3254177660510114, "grad_norm": 2.4010549397705123, "learning_rate": 9.759671746776084e-05, "loss": 4.403026580810547, "step": 555, "token_acc": 0.1987846912851113 }, { "epoch": 0.32600410436822047, "grad_norm": 3.5560588153925865, "learning_rate": 9.777256740914418e-05, "loss": 4.4723663330078125, "step": 556, "token_acc": 0.18985294391046498 }, { "epoch": 0.32659044268542947, "grad_norm": 3.58796477016802, "learning_rate": 9.794841735052754e-05, "loss": 4.515393257141113, "step": 557, "token_acc": 0.18431624659840234 }, { "epoch": 0.32717678100263853, "grad_norm": 3.286244235333189, "learning_rate": 9.812426729191089e-05, "loss": 4.417842864990234, "step": 558, "token_acc": 0.19316545292982115 }, { "epoch": 0.32776311931984753, "grad_norm": 3.4206552730028688, "learning_rate": 9.830011723329424e-05, "loss": 4.427712440490723, "step": 559, "token_acc": 0.19389070662139743 }, { "epoch": 0.3283494576370566, "grad_norm": 2.912858358910191, "learning_rate": 9.84759671746776e-05, "loss": 4.458911418914795, "step": 560, "token_acc": 0.19085674411960243 }, { "epoch": 0.3289357959542656, "grad_norm": 2.9886374417871826, "learning_rate": 9.865181711606096e-05, "loss": 4.424046993255615, "step": 561, "token_acc": 0.19432127840272603 }, { "epoch": 0.32952213427147464, "grad_norm": 3.031565975255019, "learning_rate": 9.882766705744431e-05, "loss": 4.379822731018066, "step": 562, "token_acc": 0.19693805093364478 }, { "epoch": 0.33010847258868364, "grad_norm": 2.217911618496524, "learning_rate": 9.900351699882766e-05, "loss": 4.356549263000488, "step": 563, "token_acc": 0.20240716012847812 }, { "epoch": 0.3306948109058927, "grad_norm": 3.542208484689449, "learning_rate": 9.917936694021102e-05, "loss": 4.406285285949707, "step": 564, "token_acc": 0.19587714060267428 }, { "epoch": 0.33128114922310176, "grad_norm": 2.8100336992529793, "learning_rate": 9.935521688159436e-05, "loss": 4.383203029632568, "step": 565, "token_acc": 0.1975497224701587 }, { "epoch": 0.33186748754031076, "grad_norm": 3.047854864923898, "learning_rate": 9.953106682297771e-05, "loss": 4.409743309020996, "step": 566, "token_acc": 0.19337355691375438 }, { "epoch": 0.3324538258575198, "grad_norm": 2.57546646636222, "learning_rate": 9.970691676436106e-05, "loss": 4.444423675537109, "step": 567, "token_acc": 0.19188884618979862 }, { "epoch": 0.3330401641747288, "grad_norm": 2.8645692339766526, "learning_rate": 9.988276670574441e-05, "loss": 4.416524887084961, "step": 568, "token_acc": 0.19409797988740699 }, { "epoch": 0.33362650249193787, "grad_norm": 2.4472261273565428, "learning_rate": 0.00010005861664712778, "loss": 4.395447254180908, "step": 569, "token_acc": 0.1946796231039515 }, { "epoch": 0.33421284080914687, "grad_norm": 2.631956417804853, "learning_rate": 0.00010023446658851113, "loss": 4.44894552230835, "step": 570, "token_acc": 0.18897370470265215 }, { "epoch": 0.33479917912635593, "grad_norm": 2.4865008225004246, "learning_rate": 0.00010041031652989448, "loss": 4.335729598999023, "step": 571, "token_acc": 0.20097561750288503 }, { "epoch": 0.33538551744356493, "grad_norm": 3.043697322532231, "learning_rate": 0.00010058616647127785, "loss": 4.373074054718018, "step": 572, "token_acc": 0.1970346345418742 }, { "epoch": 0.335971855760774, "grad_norm": 2.4409304224176256, "learning_rate": 0.0001007620164126612, "loss": 4.353499412536621, "step": 573, "token_acc": 0.19993152211994777 }, { "epoch": 0.336558194077983, "grad_norm": 3.301446026270895, "learning_rate": 0.00010093786635404453, "loss": 4.405664443969727, "step": 574, "token_acc": 0.19363844537156122 }, { "epoch": 0.33714453239519204, "grad_norm": 2.864402730813161, "learning_rate": 0.00010111371629542788, "loss": 4.3503851890563965, "step": 575, "token_acc": 0.20062480377580755 }, { "epoch": 0.33773087071240104, "grad_norm": 2.858342867160818, "learning_rate": 0.00010128956623681125, "loss": 4.364161968231201, "step": 576, "token_acc": 0.1978775154897844 }, { "epoch": 0.3383172090296101, "grad_norm": 3.1758831041141122, "learning_rate": 0.0001014654161781946, "loss": 4.376925468444824, "step": 577, "token_acc": 0.19712086473176674 }, { "epoch": 0.3389035473468191, "grad_norm": 3.220219510727813, "learning_rate": 0.00010164126611957795, "loss": 4.422541618347168, "step": 578, "token_acc": 0.19094777177834732 }, { "epoch": 0.33948988566402816, "grad_norm": 2.2530355822059134, "learning_rate": 0.0001018171160609613, "loss": 4.348875045776367, "step": 579, "token_acc": 0.19977448538088372 }, { "epoch": 0.34007622398123716, "grad_norm": 3.634942922257256, "learning_rate": 0.00010199296600234467, "loss": 4.317217826843262, "step": 580, "token_acc": 0.201233524626348 }, { "epoch": 0.3406625622984462, "grad_norm": 2.3007134675353265, "learning_rate": 0.00010216881594372802, "loss": 4.3593339920043945, "step": 581, "token_acc": 0.19616560671854927 }, { "epoch": 0.3412489006156552, "grad_norm": 3.2173056706541288, "learning_rate": 0.00010234466588511135, "loss": 4.379700660705566, "step": 582, "token_acc": 0.19660178935802858 }, { "epoch": 0.34183523893286427, "grad_norm": 2.549620696797565, "learning_rate": 0.0001025205158264947, "loss": 4.396203994750977, "step": 583, "token_acc": 0.1929887877702395 }, { "epoch": 0.3424215772500733, "grad_norm": 2.7627648261715705, "learning_rate": 0.00010269636576787807, "loss": 4.392889499664307, "step": 584, "token_acc": 0.1943292951280639 }, { "epoch": 0.34300791556728233, "grad_norm": 2.4908181813130406, "learning_rate": 0.00010287221570926142, "loss": 4.396252632141113, "step": 585, "token_acc": 0.1933978866250837 }, { "epoch": 0.34359425388449133, "grad_norm": 3.4710452873198543, "learning_rate": 0.00010304806565064477, "loss": 4.371337413787842, "step": 586, "token_acc": 0.19775096212354693 }, { "epoch": 0.3441805922017004, "grad_norm": 2.1366264885882886, "learning_rate": 0.00010322391559202812, "loss": 4.3373212814331055, "step": 587, "token_acc": 0.1990349207027401 }, { "epoch": 0.3447669305189094, "grad_norm": 2.793102114015419, "learning_rate": 0.00010339976553341149, "loss": 4.3192620277404785, "step": 588, "token_acc": 0.20043531213122015 }, { "epoch": 0.34535326883611844, "grad_norm": 3.2110398057106577, "learning_rate": 0.00010357561547479484, "loss": 4.3520426750183105, "step": 589, "token_acc": 0.19888331982015184 }, { "epoch": 0.34593960715332744, "grad_norm": 2.818657134738977, "learning_rate": 0.00010375146541617819, "loss": 4.319580078125, "step": 590, "token_acc": 0.20022879471458502 }, { "epoch": 0.3465259454705365, "grad_norm": 3.1142050834669304, "learning_rate": 0.00010392731535756153, "loss": 4.402172088623047, "step": 591, "token_acc": 0.19109598588234056 }, { "epoch": 0.34711228378774556, "grad_norm": 2.481377475388217, "learning_rate": 0.00010410316529894489, "loss": 4.254935264587402, "step": 592, "token_acc": 0.2041862762942354 }, { "epoch": 0.34769862210495456, "grad_norm": 2.7899472797326803, "learning_rate": 0.00010427901524032824, "loss": 4.301620960235596, "step": 593, "token_acc": 0.201591428676059 }, { "epoch": 0.3482849604221636, "grad_norm": 2.306420877176881, "learning_rate": 0.00010445486518171159, "loss": 4.326542854309082, "step": 594, "token_acc": 0.1977195497922805 }, { "epoch": 0.3488712987393726, "grad_norm": 3.790533075561843, "learning_rate": 0.00010463071512309494, "loss": 4.306258201599121, "step": 595, "token_acc": 0.19972011730456374 }, { "epoch": 0.34945763705658167, "grad_norm": 2.131097301711391, "learning_rate": 0.00010480656506447831, "loss": 4.290352821350098, "step": 596, "token_acc": 0.2039510487647645 }, { "epoch": 0.3500439753737907, "grad_norm": 3.4011241264489187, "learning_rate": 0.00010498241500586166, "loss": 4.348827362060547, "step": 597, "token_acc": 0.19694226298252568 }, { "epoch": 0.35063031369099973, "grad_norm": 2.3812567144661907, "learning_rate": 0.00010515826494724501, "loss": 4.334071159362793, "step": 598, "token_acc": 0.19651532832614105 }, { "epoch": 0.35121665200820873, "grad_norm": 3.361051181363213, "learning_rate": 0.00010533411488862837, "loss": 4.35249137878418, "step": 599, "token_acc": 0.19578733204581367 }, { "epoch": 0.3518029903254178, "grad_norm": 2.0689154004556807, "learning_rate": 0.00010550996483001171, "loss": 4.308032035827637, "step": 600, "token_acc": 0.2002745968991027 }, { "epoch": 0.3523893286426268, "grad_norm": 3.3945659225723444, "learning_rate": 0.00010568581477139506, "loss": 4.315279960632324, "step": 601, "token_acc": 0.19940086223738882 }, { "epoch": 0.35297566695983584, "grad_norm": 2.394583028279431, "learning_rate": 0.00010586166471277841, "loss": 4.375241756439209, "step": 602, "token_acc": 0.1922472512387402 }, { "epoch": 0.35356200527704484, "grad_norm": 2.5966869299785698, "learning_rate": 0.00010603751465416176, "loss": 4.277009963989258, "step": 603, "token_acc": 0.20603324495261147 }, { "epoch": 0.3541483435942539, "grad_norm": 2.7232463688132698, "learning_rate": 0.00010621336459554513, "loss": 4.286871910095215, "step": 604, "token_acc": 0.2023142725869977 }, { "epoch": 0.3547346819114629, "grad_norm": 2.8417721138990166, "learning_rate": 0.00010638921453692848, "loss": 4.277947425842285, "step": 605, "token_acc": 0.2047835391203939 }, { "epoch": 0.35532102022867196, "grad_norm": 2.682389888868531, "learning_rate": 0.00010656506447831183, "loss": 4.279004096984863, "step": 606, "token_acc": 0.204964619805314 }, { "epoch": 0.35590735854588096, "grad_norm": 2.5595296108724948, "learning_rate": 0.0001067409144196952, "loss": 4.303957939147949, "step": 607, "token_acc": 0.20128981673075075 }, { "epoch": 0.35649369686309, "grad_norm": 2.837959032860929, "learning_rate": 0.00010691676436107855, "loss": 4.348471641540527, "step": 608, "token_acc": 0.1935732804072 }, { "epoch": 0.357080035180299, "grad_norm": 2.2017623995570577, "learning_rate": 0.00010709261430246188, "loss": 4.283245086669922, "step": 609, "token_acc": 0.20343393742860674 }, { "epoch": 0.35766637349750807, "grad_norm": 2.4879621393285696, "learning_rate": 0.00010726846424384523, "loss": 4.3325886726379395, "step": 610, "token_acc": 0.19625486754644958 }, { "epoch": 0.3582527118147171, "grad_norm": 3.0133670311804273, "learning_rate": 0.0001074443141852286, "loss": 4.32450008392334, "step": 611, "token_acc": 0.19944296375266524 }, { "epoch": 0.35883905013192613, "grad_norm": 2.983908195578399, "learning_rate": 0.00010762016412661195, "loss": 4.303168296813965, "step": 612, "token_acc": 0.20124181467953536 }, { "epoch": 0.35942538844913513, "grad_norm": 2.4041157929692636, "learning_rate": 0.0001077960140679953, "loss": 4.294432640075684, "step": 613, "token_acc": 0.20261561643765524 }, { "epoch": 0.3600117267663442, "grad_norm": 3.153780987731585, "learning_rate": 0.00010797186400937865, "loss": 4.37457275390625, "step": 614, "token_acc": 0.19388188385159102 }, { "epoch": 0.3605980650835532, "grad_norm": 1.950667897766203, "learning_rate": 0.00010814771395076202, "loss": 4.259453773498535, "step": 615, "token_acc": 0.20556281620577127 }, { "epoch": 0.36118440340076224, "grad_norm": 3.157750730971191, "learning_rate": 0.00010832356389214537, "loss": 4.238499641418457, "step": 616, "token_acc": 0.20794253185145026 }, { "epoch": 0.36177074171797124, "grad_norm": 1.8891889691306176, "learning_rate": 0.00010849941383352872, "loss": 4.216545581817627, "step": 617, "token_acc": 0.20939565325467746 }, { "epoch": 0.3623570800351803, "grad_norm": 2.8123821881610946, "learning_rate": 0.00010867526377491205, "loss": 4.283915996551514, "step": 618, "token_acc": 0.20297554659149647 }, { "epoch": 0.3629434183523893, "grad_norm": 2.314371069394081, "learning_rate": 0.00010885111371629542, "loss": 4.32310676574707, "step": 619, "token_acc": 0.19798544100220078 }, { "epoch": 0.36352975666959836, "grad_norm": 2.444273779518254, "learning_rate": 0.00010902696365767877, "loss": 4.273144721984863, "step": 620, "token_acc": 0.19962574558496746 }, { "epoch": 0.3641160949868074, "grad_norm": 2.5626319954303383, "learning_rate": 0.00010920281359906212, "loss": 4.27290678024292, "step": 621, "token_acc": 0.20420688587209002 }, { "epoch": 0.3647024333040164, "grad_norm": 2.3094596676423484, "learning_rate": 0.00010937866354044547, "loss": 4.219508171081543, "step": 622, "token_acc": 0.20828157133759956 }, { "epoch": 0.36528877162122547, "grad_norm": 2.8523401518170703, "learning_rate": 0.00010955451348182884, "loss": 4.30706787109375, "step": 623, "token_acc": 0.2004713204445251 }, { "epoch": 0.3658751099384345, "grad_norm": 2.8177216268057608, "learning_rate": 0.00010973036342321219, "loss": 4.278560638427734, "step": 624, "token_acc": 0.20058306429364967 }, { "epoch": 0.36646144825564353, "grad_norm": 2.6895108643874464, "learning_rate": 0.00010990621336459554, "loss": 4.2217254638671875, "step": 625, "token_acc": 0.20855649584958172 }, { "epoch": 0.36704778657285253, "grad_norm": 2.7574646136296157, "learning_rate": 0.0001100820633059789, "loss": 4.265744209289551, "step": 626, "token_acc": 0.20324141228766132 }, { "epoch": 0.3676341248900616, "grad_norm": 2.7351237617082598, "learning_rate": 0.00011025791324736224, "loss": 4.2529072761535645, "step": 627, "token_acc": 0.20443375057449512 }, { "epoch": 0.3682204632072706, "grad_norm": 2.4151446067812237, "learning_rate": 0.00011043376318874559, "loss": 4.239971160888672, "step": 628, "token_acc": 0.20523713465038262 }, { "epoch": 0.36880680152447964, "grad_norm": 2.081580812922123, "learning_rate": 0.00011060961313012894, "loss": 4.287620544433594, "step": 629, "token_acc": 0.20006133661848102 }, { "epoch": 0.36939313984168864, "grad_norm": 2.8298152822135116, "learning_rate": 0.00011078546307151229, "loss": 4.236255168914795, "step": 630, "token_acc": 0.20463056475457536 }, { "epoch": 0.3699794781588977, "grad_norm": 2.5510842902255084, "learning_rate": 0.00011096131301289566, "loss": 4.258484363555908, "step": 631, "token_acc": 0.20242658123646584 }, { "epoch": 0.3705658164761067, "grad_norm": 2.6950367431077433, "learning_rate": 0.00011113716295427901, "loss": 4.230443954467773, "step": 632, "token_acc": 0.20410526425107228 }, { "epoch": 0.37115215479331576, "grad_norm": 2.1898464073824186, "learning_rate": 0.00011131301289566236, "loss": 4.242029190063477, "step": 633, "token_acc": 0.20153682109107485 }, { "epoch": 0.37173849311052476, "grad_norm": 2.53603820659676, "learning_rate": 0.00011148886283704572, "loss": 4.224470615386963, "step": 634, "token_acc": 0.206255272208163 }, { "epoch": 0.3723248314277338, "grad_norm": 2.3673784961554274, "learning_rate": 0.00011166471277842907, "loss": 4.228389739990234, "step": 635, "token_acc": 0.20586544294116096 }, { "epoch": 0.3729111697449428, "grad_norm": 2.3218680839207737, "learning_rate": 0.00011184056271981241, "loss": 4.2403340339660645, "step": 636, "token_acc": 0.20616360392140776 }, { "epoch": 0.3734975080621519, "grad_norm": 2.637923432956788, "learning_rate": 0.00011201641266119576, "loss": 4.205411911010742, "step": 637, "token_acc": 0.20736103108180387 }, { "epoch": 0.3740838463793609, "grad_norm": 2.579854481699513, "learning_rate": 0.00011219226260257913, "loss": 4.257320404052734, "step": 638, "token_acc": 0.20180283509459596 }, { "epoch": 0.37467018469656993, "grad_norm": 2.2090510651164434, "learning_rate": 0.00011236811254396248, "loss": 4.209136009216309, "step": 639, "token_acc": 0.20608312144419577 }, { "epoch": 0.37525652301377893, "grad_norm": 3.867673247689839, "learning_rate": 0.00011254396248534583, "loss": 4.302282333374023, "step": 640, "token_acc": 0.196326914892245 }, { "epoch": 0.375842861330988, "grad_norm": 2.1654413354163498, "learning_rate": 0.00011271981242672918, "loss": 4.222521781921387, "step": 641, "token_acc": 0.20369304518540127 }, { "epoch": 0.376429199648197, "grad_norm": 3.087594103609828, "learning_rate": 0.00011289566236811254, "loss": 4.248073101043701, "step": 642, "token_acc": 0.2020766406022564 }, { "epoch": 0.37701553796540604, "grad_norm": 2.1813258304419834, "learning_rate": 0.0001130715123094959, "loss": 4.154412746429443, "step": 643, "token_acc": 0.21357638275083968 }, { "epoch": 0.37760187628261505, "grad_norm": 3.3076625794336065, "learning_rate": 0.00011324736225087924, "loss": 4.20400333404541, "step": 644, "token_acc": 0.20735396905902806 }, { "epoch": 0.3781882145998241, "grad_norm": 2.644500244743364, "learning_rate": 0.00011342321219226258, "loss": 4.2519965171813965, "step": 645, "token_acc": 0.20269862100570762 }, { "epoch": 0.3787745529170331, "grad_norm": 2.6667352505531925, "learning_rate": 0.00011359906213364595, "loss": 4.311054229736328, "step": 646, "token_acc": 0.19621835768676682 }, { "epoch": 0.37936089123424216, "grad_norm": 2.317800667982946, "learning_rate": 0.0001137749120750293, "loss": 4.217830657958984, "step": 647, "token_acc": 0.20569010622603304 }, { "epoch": 0.37994722955145116, "grad_norm": 2.3337578059583532, "learning_rate": 0.00011395076201641265, "loss": 4.20598030090332, "step": 648, "token_acc": 0.20874728009092175 }, { "epoch": 0.3805335678686602, "grad_norm": 2.7678048866848206, "learning_rate": 0.000114126611957796, "loss": 4.308103561401367, "step": 649, "token_acc": 0.19802635305263916 }, { "epoch": 0.3811199061858693, "grad_norm": 2.3082928696944434, "learning_rate": 0.00011430246189917936, "loss": 4.234099864959717, "step": 650, "token_acc": 0.201914854157866 }, { "epoch": 0.3817062445030783, "grad_norm": 2.3968763914936235, "learning_rate": 0.00011447831184056271, "loss": 4.15907096862793, "step": 651, "token_acc": 0.21309123583573925 }, { "epoch": 0.38229258282028733, "grad_norm": 2.098436379878401, "learning_rate": 0.00011465416178194607, "loss": 4.287763595581055, "step": 652, "token_acc": 0.19773169624454057 }, { "epoch": 0.38287892113749633, "grad_norm": 2.993266576352787, "learning_rate": 0.00011483001172332943, "loss": 4.265036582946777, "step": 653, "token_acc": 0.20070331774496447 }, { "epoch": 0.3834652594547054, "grad_norm": 1.993385308869202, "learning_rate": 0.00011500586166471277, "loss": 4.185324668884277, "step": 654, "token_acc": 0.20959614325641318 }, { "epoch": 0.3840515977719144, "grad_norm": 2.805813696939635, "learning_rate": 0.00011518171160609612, "loss": 4.205728530883789, "step": 655, "token_acc": 0.2054641234455115 }, { "epoch": 0.38463793608912344, "grad_norm": 1.8986969693751168, "learning_rate": 0.00011535756154747947, "loss": 4.19041109085083, "step": 656, "token_acc": 0.2067963687612689 }, { "epoch": 0.38522427440633245, "grad_norm": 3.0643280073221306, "learning_rate": 0.00011553341148886282, "loss": 4.218249797821045, "step": 657, "token_acc": 0.20460666375971479 }, { "epoch": 0.3858106127235415, "grad_norm": 2.089830978419018, "learning_rate": 0.00011570926143024618, "loss": 4.217906951904297, "step": 658, "token_acc": 0.20533666437482614 }, { "epoch": 0.3863969510407505, "grad_norm": 2.36016733149989, "learning_rate": 0.00011588511137162954, "loss": 4.215489864349365, "step": 659, "token_acc": 0.20456186439097432 }, { "epoch": 0.38698328935795956, "grad_norm": 1.7515316899120266, "learning_rate": 0.00011606096131301289, "loss": 4.174760818481445, "step": 660, "token_acc": 0.20799731780232508 }, { "epoch": 0.38756962767516856, "grad_norm": 2.4015701846986444, "learning_rate": 0.00011623681125439625, "loss": 4.227086067199707, "step": 661, "token_acc": 0.20463953133450785 }, { "epoch": 0.3881559659923776, "grad_norm": 2.214148268420861, "learning_rate": 0.00011641266119577959, "loss": 4.22163200378418, "step": 662, "token_acc": 0.2040581809843447 }, { "epoch": 0.3887423043095866, "grad_norm": 2.5996513782251367, "learning_rate": 0.00011658851113716294, "loss": 4.125226974487305, "step": 663, "token_acc": 0.21308594008114287 }, { "epoch": 0.3893286426267957, "grad_norm": 2.4595480251748545, "learning_rate": 0.00011676436107854629, "loss": 4.155521869659424, "step": 664, "token_acc": 0.2104404733733833 }, { "epoch": 0.3899149809440047, "grad_norm": 2.9530630107298155, "learning_rate": 0.00011694021101992964, "loss": 4.183135032653809, "step": 665, "token_acc": 0.20813966313202137 }, { "epoch": 0.39050131926121373, "grad_norm": 2.155911546664777, "learning_rate": 0.000117116060961313, "loss": 4.197364330291748, "step": 666, "token_acc": 0.20681188405722106 }, { "epoch": 0.39108765757842273, "grad_norm": 2.5509361326367093, "learning_rate": 0.00011729191090269636, "loss": 4.22589111328125, "step": 667, "token_acc": 0.20329033424436102 }, { "epoch": 0.3916739958956318, "grad_norm": 2.2358115413515742, "learning_rate": 0.00011746776084407971, "loss": 4.122133255004883, "step": 668, "token_acc": 0.21357097298148756 }, { "epoch": 0.3922603342128408, "grad_norm": 2.8407879200869472, "learning_rate": 0.00011764361078546307, "loss": 4.207402229309082, "step": 669, "token_acc": 0.20472913509328908 }, { "epoch": 0.39284667253004985, "grad_norm": 1.8183668097342147, "learning_rate": 0.00011781946072684642, "loss": 4.237727165222168, "step": 670, "token_acc": 0.20184391156729947 }, { "epoch": 0.39343301084725885, "grad_norm": 3.066399712432076, "learning_rate": 0.00011799531066822976, "loss": 4.168529510498047, "step": 671, "token_acc": 0.20791175331945214 }, { "epoch": 0.3940193491644679, "grad_norm": 1.8797947561138206, "learning_rate": 0.00011817116060961311, "loss": 4.148011684417725, "step": 672, "token_acc": 0.21082481445175746 }, { "epoch": 0.3946056874816769, "grad_norm": 3.0675257288605158, "learning_rate": 0.00011834701055099647, "loss": 4.161212921142578, "step": 673, "token_acc": 0.21053175423257292 }, { "epoch": 0.39519202579888596, "grad_norm": 1.9745773494119627, "learning_rate": 0.00011852286049237983, "loss": 4.147210121154785, "step": 674, "token_acc": 0.21428515538399343 }, { "epoch": 0.39577836411609496, "grad_norm": 3.347622964535245, "learning_rate": 0.00011869871043376318, "loss": 4.205878257751465, "step": 675, "token_acc": 0.20560969431965126 }, { "epoch": 0.396364702433304, "grad_norm": 1.8890336780637331, "learning_rate": 0.00011887456037514653, "loss": 4.118740081787109, "step": 676, "token_acc": 0.21519614131410833 }, { "epoch": 0.3969510407505131, "grad_norm": 2.896736132578702, "learning_rate": 0.00011905041031652989, "loss": 4.152634620666504, "step": 677, "token_acc": 0.21091203352553353 }, { "epoch": 0.3975373790677221, "grad_norm": 2.4418785524177085, "learning_rate": 0.00011922626025791324, "loss": 4.120591640472412, "step": 678, "token_acc": 0.21206497987500625 }, { "epoch": 0.39812371738493113, "grad_norm": 2.6209867136785245, "learning_rate": 0.0001194021101992966, "loss": 4.203255653381348, "step": 679, "token_acc": 0.20534793897665768 }, { "epoch": 0.39871005570214013, "grad_norm": 2.745224729395226, "learning_rate": 0.00011957796014067993, "loss": 4.108470439910889, "step": 680, "token_acc": 0.21366317434515705 }, { "epoch": 0.3992963940193492, "grad_norm": 2.01585643774109, "learning_rate": 0.0001197538100820633, "loss": 4.250920295715332, "step": 681, "token_acc": 0.19864673880981756 }, { "epoch": 0.3998827323365582, "grad_norm": 2.302510751856157, "learning_rate": 0.00011992966002344665, "loss": 4.157665252685547, "step": 682, "token_acc": 0.206980312926534 }, { "epoch": 0.40046907065376725, "grad_norm": 2.4412374279570797, "learning_rate": 0.00012010550996483, "loss": 4.190195083618164, "step": 683, "token_acc": 0.20590817684878351 }, { "epoch": 0.40105540897097625, "grad_norm": 2.3821393502986323, "learning_rate": 0.00012028135990621335, "loss": 4.153336524963379, "step": 684, "token_acc": 0.2114520132006094 }, { "epoch": 0.4016417472881853, "grad_norm": 1.7445297299224478, "learning_rate": 0.00012045720984759671, "loss": 4.111824989318848, "step": 685, "token_acc": 0.21490644946029513 }, { "epoch": 0.4022280856053943, "grad_norm": 2.8060739284301577, "learning_rate": 0.00012063305978898006, "loss": 4.160216331481934, "step": 686, "token_acc": 0.20775726209173198 }, { "epoch": 0.40281442392260336, "grad_norm": 2.100722450230539, "learning_rate": 0.00012080890973036341, "loss": 4.152647018432617, "step": 687, "token_acc": 0.2080494564739898 }, { "epoch": 0.40340076223981236, "grad_norm": 2.3389537338761506, "learning_rate": 0.00012098475967174678, "loss": 4.160815715789795, "step": 688, "token_acc": 0.2103728859607417 }, { "epoch": 0.4039871005570214, "grad_norm": 1.9789338697041972, "learning_rate": 0.00012116060961313012, "loss": 4.152812480926514, "step": 689, "token_acc": 0.21057015699745346 }, { "epoch": 0.4045734388742304, "grad_norm": 2.6818345095100184, "learning_rate": 0.00012133645955451347, "loss": 4.12637996673584, "step": 690, "token_acc": 0.21129294216812977 }, { "epoch": 0.4051597771914395, "grad_norm": 1.6244029783563276, "learning_rate": 0.00012151230949589682, "loss": 4.156166076660156, "step": 691, "token_acc": 0.20764431291188676 }, { "epoch": 0.4057461155086485, "grad_norm": 3.167276504124015, "learning_rate": 0.00012168815943728017, "loss": 4.206124305725098, "step": 692, "token_acc": 0.20199026921829388 }, { "epoch": 0.40633245382585753, "grad_norm": 1.8681993380613036, "learning_rate": 0.00012186400937866353, "loss": 4.181087017059326, "step": 693, "token_acc": 0.2067197459318676 }, { "epoch": 0.40691879214306653, "grad_norm": 2.9568119691184727, "learning_rate": 0.00012203985932004688, "loss": 4.21808385848999, "step": 694, "token_acc": 0.20395758325103489 }, { "epoch": 0.4075051304602756, "grad_norm": 2.0223093452670757, "learning_rate": 0.00012221570926143025, "loss": 4.093393802642822, "step": 695, "token_acc": 0.21467751036989294 }, { "epoch": 0.4080914687774846, "grad_norm": 2.5376842035994196, "learning_rate": 0.0001223915592028136, "loss": 4.146512508392334, "step": 696, "token_acc": 0.2089104967012327 }, { "epoch": 0.40867780709469365, "grad_norm": 2.400952595156885, "learning_rate": 0.00012256740914419695, "loss": 4.119112014770508, "step": 697, "token_acc": 0.2138407156976025 }, { "epoch": 0.40926414541190265, "grad_norm": 2.051615767217494, "learning_rate": 0.00012274325908558027, "loss": 4.1630353927612305, "step": 698, "token_acc": 0.20607875209548546 }, { "epoch": 0.4098504837291117, "grad_norm": 2.4070556767455265, "learning_rate": 0.00012291910902696365, "loss": 4.065162658691406, "step": 699, "token_acc": 0.2178281688708309 }, { "epoch": 0.4104368220463207, "grad_norm": 1.7767561279855022, "learning_rate": 0.000123094958968347, "loss": 4.160017967224121, "step": 700, "token_acc": 0.20539266823583685 }, { "epoch": 0.41102316036352976, "grad_norm": 2.1998659108616163, "learning_rate": 0.00012327080890973035, "loss": 4.164645671844482, "step": 701, "token_acc": 0.20756730548799607 }, { "epoch": 0.41160949868073876, "grad_norm": 2.475840659601759, "learning_rate": 0.0001234466588511137, "loss": 4.168675422668457, "step": 702, "token_acc": 0.20841045210664227 }, { "epoch": 0.4121958369979478, "grad_norm": 2.0596512499590083, "learning_rate": 0.00012362250879249706, "loss": 4.1293182373046875, "step": 703, "token_acc": 0.21049262945697367 }, { "epoch": 0.4127821753151568, "grad_norm": 2.0376017958400836, "learning_rate": 0.0001237983587338804, "loss": 4.152271270751953, "step": 704, "token_acc": 0.20728880788457588 }, { "epoch": 0.4133685136323659, "grad_norm": 2.10338543770458, "learning_rate": 0.00012397420867526376, "loss": 4.191035747528076, "step": 705, "token_acc": 0.20290883662190864 }, { "epoch": 0.41395485194957493, "grad_norm": 2.4527919996484133, "learning_rate": 0.00012415005861664714, "loss": 4.105487823486328, "step": 706, "token_acc": 0.21175861728189577 }, { "epoch": 0.41454119026678393, "grad_norm": 2.076112620592698, "learning_rate": 0.00012432590855803046, "loss": 4.096744060516357, "step": 707, "token_acc": 0.21373644682541718 }, { "epoch": 0.415127528583993, "grad_norm": 2.4501405078458998, "learning_rate": 0.0001245017584994138, "loss": 4.080202102661133, "step": 708, "token_acc": 0.2151697631678618 }, { "epoch": 0.415713866901202, "grad_norm": 2.7005766434794767, "learning_rate": 0.00012467760844079716, "loss": 4.129821300506592, "step": 709, "token_acc": 0.21251860720988933 }, { "epoch": 0.41630020521841105, "grad_norm": 2.134171405553577, "learning_rate": 0.00012485345838218054, "loss": 4.124048233032227, "step": 710, "token_acc": 0.21154408364384392 }, { "epoch": 0.41688654353562005, "grad_norm": 2.4148764514565535, "learning_rate": 0.0001250293083235639, "loss": 4.112054824829102, "step": 711, "token_acc": 0.21169364507636662 }, { "epoch": 0.4174728818528291, "grad_norm": 2.0495547010381676, "learning_rate": 0.00012520515826494724, "loss": 4.157209396362305, "step": 712, "token_acc": 0.20692587091511516 }, { "epoch": 0.4180592201700381, "grad_norm": 2.3772134147936392, "learning_rate": 0.0001253810082063306, "loss": 4.160642623901367, "step": 713, "token_acc": 0.20857684472166943 }, { "epoch": 0.41864555848724716, "grad_norm": 2.3651051237165013, "learning_rate": 0.00012555685814771394, "loss": 4.122062683105469, "step": 714, "token_acc": 0.20909662902728834 }, { "epoch": 0.41923189680445616, "grad_norm": 2.122922504207128, "learning_rate": 0.0001257327080890973, "loss": 4.099079132080078, "step": 715, "token_acc": 0.21240977833447983 }, { "epoch": 0.4198182351216652, "grad_norm": 2.437035772579928, "learning_rate": 0.00012590855803048064, "loss": 4.065517425537109, "step": 716, "token_acc": 0.21513740691195213 }, { "epoch": 0.4204045734388742, "grad_norm": 1.8086811220859031, "learning_rate": 0.000126084407971864, "loss": 4.120763778686523, "step": 717, "token_acc": 0.21111116827261925 }, { "epoch": 0.4209909117560833, "grad_norm": 3.16220286536545, "learning_rate": 0.00012626025791324735, "loss": 4.158533573150635, "step": 718, "token_acc": 0.20581005527464008 }, { "epoch": 0.4215772500732923, "grad_norm": 1.679460962803792, "learning_rate": 0.0001264361078546307, "loss": 4.08233642578125, "step": 719, "token_acc": 0.2153272678999072 }, { "epoch": 0.42216358839050133, "grad_norm": 2.804379712588806, "learning_rate": 0.00012661195779601405, "loss": 4.186943054199219, "step": 720, "token_acc": 0.2050246496088657 }, { "epoch": 0.42274992670771033, "grad_norm": 2.391357394029273, "learning_rate": 0.0001267878077373974, "loss": 4.042555809020996, "step": 721, "token_acc": 0.21814060011994127 }, { "epoch": 0.4233362650249194, "grad_norm": 2.126805986126612, "learning_rate": 0.00012696365767878078, "loss": 4.137605667114258, "step": 722, "token_acc": 0.20761405227816362 }, { "epoch": 0.4239226033421284, "grad_norm": 2.316979942646839, "learning_rate": 0.00012713950762016413, "loss": 4.13259220123291, "step": 723, "token_acc": 0.21073668832495607 }, { "epoch": 0.42450894165933745, "grad_norm": 1.8990744805574709, "learning_rate": 0.00012731535756154748, "loss": 4.040066719055176, "step": 724, "token_acc": 0.21839272368776733 }, { "epoch": 0.42509527997654645, "grad_norm": 2.564686818764265, "learning_rate": 0.0001274912075029308, "loss": 4.155131816864014, "step": 725, "token_acc": 0.20508214090890017 }, { "epoch": 0.4256816182937555, "grad_norm": 2.3381168743589287, "learning_rate": 0.00012766705744431418, "loss": 4.170192718505859, "step": 726, "token_acc": 0.2039467182354088 }, { "epoch": 0.4262679566109645, "grad_norm": 2.5315902542143087, "learning_rate": 0.00012784290738569753, "loss": 4.092622756958008, "step": 727, "token_acc": 0.21180726637490152 }, { "epoch": 0.42685429492817356, "grad_norm": 2.1824803687804173, "learning_rate": 0.00012801875732708088, "loss": 4.109076499938965, "step": 728, "token_acc": 0.21181478696422426 }, { "epoch": 0.42744063324538256, "grad_norm": 2.036872391261052, "learning_rate": 0.00012819460726846423, "loss": 4.081572532653809, "step": 729, "token_acc": 0.21358976825722592 }, { "epoch": 0.4280269715625916, "grad_norm": 1.9012882818076624, "learning_rate": 0.00012837045720984758, "loss": 4.100808620452881, "step": 730, "token_acc": 0.21383671182236574 }, { "epoch": 0.4286133098798006, "grad_norm": 2.2856031050832324, "learning_rate": 0.00012854630715123093, "loss": 4.118395805358887, "step": 731, "token_acc": 0.2106478149100257 }, { "epoch": 0.4291996481970097, "grad_norm": 2.127030448882872, "learning_rate": 0.00012872215709261429, "loss": 4.081466197967529, "step": 732, "token_acc": 0.21411614365840226 }, { "epoch": 0.42978598651421873, "grad_norm": 2.415004072002858, "learning_rate": 0.00012889800703399766, "loss": 4.068760871887207, "step": 733, "token_acc": 0.21585425691862434 }, { "epoch": 0.43037232483142773, "grad_norm": 2.7470040560462947, "learning_rate": 0.000129073856975381, "loss": 4.063611030578613, "step": 734, "token_acc": 0.21337652724079142 }, { "epoch": 0.4309586631486368, "grad_norm": 1.8332247268435393, "learning_rate": 0.00012924970691676434, "loss": 4.081305503845215, "step": 735, "token_acc": 0.21493379559620845 }, { "epoch": 0.4315450014658458, "grad_norm": 2.3766773644982737, "learning_rate": 0.0001294255568581477, "loss": 4.039984226226807, "step": 736, "token_acc": 0.21901808893073973 }, { "epoch": 0.43213133978305485, "grad_norm": 1.925927827075631, "learning_rate": 0.00012960140679953107, "loss": 4.093031883239746, "step": 737, "token_acc": 0.21356167080821198 }, { "epoch": 0.43271767810026385, "grad_norm": 2.7307736873525905, "learning_rate": 0.00012977725674091442, "loss": 4.111199378967285, "step": 738, "token_acc": 0.21000319009541285 }, { "epoch": 0.4333040164174729, "grad_norm": 2.0236355732370246, "learning_rate": 0.00012995310668229777, "loss": 4.092278480529785, "step": 739, "token_acc": 0.21215870362034595 }, { "epoch": 0.4338903547346819, "grad_norm": 2.711432149669485, "learning_rate": 0.00013012895662368112, "loss": 4.053503513336182, "step": 740, "token_acc": 0.21752965989981546 }, { "epoch": 0.43447669305189096, "grad_norm": 1.9142003930487304, "learning_rate": 0.00013030480656506447, "loss": 4.143960952758789, "step": 741, "token_acc": 0.2083737747581514 }, { "epoch": 0.43506303136909996, "grad_norm": 2.4806757993224045, "learning_rate": 0.00013048065650644782, "loss": 4.038522243499756, "step": 742, "token_acc": 0.21859457056322765 }, { "epoch": 0.435649369686309, "grad_norm": 1.9143472174167695, "learning_rate": 0.00013065650644783117, "loss": 4.072566986083984, "step": 743, "token_acc": 0.21407456476829098 }, { "epoch": 0.436235708003518, "grad_norm": 1.9707957125227447, "learning_rate": 0.00013083235638921452, "loss": 4.032047271728516, "step": 744, "token_acc": 0.21724656823796615 }, { "epoch": 0.4368220463207271, "grad_norm": 2.096178781940885, "learning_rate": 0.00013100820633059787, "loss": 4.087066650390625, "step": 745, "token_acc": 0.21413080960956357 }, { "epoch": 0.4374083846379361, "grad_norm": 2.224574019360642, "learning_rate": 0.00013118405627198123, "loss": 4.112011432647705, "step": 746, "token_acc": 0.20901167998047712 }, { "epoch": 0.43799472295514513, "grad_norm": 1.8161649689655095, "learning_rate": 0.00013135990621336458, "loss": 4.048702239990234, "step": 747, "token_acc": 0.21569269355158074 }, { "epoch": 0.43858106127235413, "grad_norm": 2.298111949720826, "learning_rate": 0.00013153575615474793, "loss": 4.138697147369385, "step": 748, "token_acc": 0.20531864787507698 }, { "epoch": 0.4391673995895632, "grad_norm": 2.1554211028757497, "learning_rate": 0.0001317116060961313, "loss": 4.076339244842529, "step": 749, "token_acc": 0.2136930807484062 }, { "epoch": 0.4397537379067722, "grad_norm": 2.1022149460459323, "learning_rate": 0.00013188745603751466, "loss": 4.093334197998047, "step": 750, "token_acc": 0.2121363986452981 }, { "epoch": 0.44034007622398125, "grad_norm": 2.184088089058623, "learning_rate": 0.00013206330597889798, "loss": 4.112935543060303, "step": 751, "token_acc": 0.20786556616614218 }, { "epoch": 0.44092641454119025, "grad_norm": 2.0327734832578823, "learning_rate": 0.00013223915592028133, "loss": 4.0729804039001465, "step": 752, "token_acc": 0.21243168701159407 }, { "epoch": 0.4415127528583993, "grad_norm": 2.355493907916843, "learning_rate": 0.0001324150058616647, "loss": 4.067388534545898, "step": 753, "token_acc": 0.21615596780896967 }, { "epoch": 0.4420990911756083, "grad_norm": 2.06448310854639, "learning_rate": 0.00013259085580304806, "loss": 4.034196853637695, "step": 754, "token_acc": 0.21728687740084712 }, { "epoch": 0.44268542949281736, "grad_norm": 2.417821533403658, "learning_rate": 0.0001327667057444314, "loss": 4.091578960418701, "step": 755, "token_acc": 0.20998901081399607 }, { "epoch": 0.44327176781002636, "grad_norm": 2.1743948418696575, "learning_rate": 0.00013294255568581476, "loss": 4.0719451904296875, "step": 756, "token_acc": 0.21568946121439436 }, { "epoch": 0.4438581061272354, "grad_norm": 2.0290993848132914, "learning_rate": 0.0001331184056271981, "loss": 4.0861968994140625, "step": 757, "token_acc": 0.211518461739941 }, { "epoch": 0.4444444444444444, "grad_norm": 2.1300144802947054, "learning_rate": 0.00013329425556858146, "loss": 4.091272354125977, "step": 758, "token_acc": 0.20996469162627446 }, { "epoch": 0.4450307827616535, "grad_norm": 2.099180534718681, "learning_rate": 0.00013347010550996481, "loss": 4.030974864959717, "step": 759, "token_acc": 0.2173153673304594 }, { "epoch": 0.4456171210788625, "grad_norm": 2.0165908506525287, "learning_rate": 0.00013364595545134816, "loss": 4.082425117492676, "step": 760, "token_acc": 0.2113496503858436 }, { "epoch": 0.44620345939607153, "grad_norm": 2.3107332528859645, "learning_rate": 0.00013382180539273152, "loss": 4.047661781311035, "step": 761, "token_acc": 0.21523623553626325 }, { "epoch": 0.4467897977132806, "grad_norm": 1.5887295852701293, "learning_rate": 0.00013399765533411487, "loss": 4.032632827758789, "step": 762, "token_acc": 0.2163661256842384 }, { "epoch": 0.4473761360304896, "grad_norm": 2.780848673395678, "learning_rate": 0.00013417350527549822, "loss": 4.060242176055908, "step": 763, "token_acc": 0.21551964482038496 }, { "epoch": 0.44796247434769865, "grad_norm": 1.7776689254813058, "learning_rate": 0.0001343493552168816, "loss": 4.008596897125244, "step": 764, "token_acc": 0.22028683387647935 }, { "epoch": 0.44854881266490765, "grad_norm": 2.439113486656821, "learning_rate": 0.00013452520515826495, "loss": 4.083590984344482, "step": 765, "token_acc": 0.21233907507907085 }, { "epoch": 0.4491351509821167, "grad_norm": 1.9902075433071043, "learning_rate": 0.0001347010550996483, "loss": 4.019782066345215, "step": 766, "token_acc": 0.21966962152321648 }, { "epoch": 0.4497214892993257, "grad_norm": 2.2612576620568463, "learning_rate": 0.00013487690504103165, "loss": 4.063292503356934, "step": 767, "token_acc": 0.2138527930567899 }, { "epoch": 0.45030782761653476, "grad_norm": 2.05662811076168, "learning_rate": 0.000135052754982415, "loss": 4.059133529663086, "step": 768, "token_acc": 0.21186494597202635 }, { "epoch": 0.45089416593374376, "grad_norm": 1.8830594810898171, "learning_rate": 0.00013522860492379835, "loss": 4.1018829345703125, "step": 769, "token_acc": 0.20955638328609022 }, { "epoch": 0.4514805042509528, "grad_norm": 2.331390668269172, "learning_rate": 0.0001354044548651817, "loss": 4.039198875427246, "step": 770, "token_acc": 0.21756219618677752 }, { "epoch": 0.4520668425681618, "grad_norm": 2.45959116003766, "learning_rate": 0.00013558030480656505, "loss": 4.031373500823975, "step": 771, "token_acc": 0.21664699392845296 }, { "epoch": 0.4526531808853709, "grad_norm": 2.1000476049218855, "learning_rate": 0.0001357561547479484, "loss": 3.9976091384887695, "step": 772, "token_acc": 0.2205907226521206 }, { "epoch": 0.4532395192025799, "grad_norm": 2.1774641066309153, "learning_rate": 0.00013593200468933175, "loss": 4.0507893562316895, "step": 773, "token_acc": 0.2165560684109642 }, { "epoch": 0.45382585751978893, "grad_norm": 1.8722671773171955, "learning_rate": 0.0001361078546307151, "loss": 4.077295780181885, "step": 774, "token_acc": 0.21288626930533328 }, { "epoch": 0.45441219583699793, "grad_norm": 2.3845653862423277, "learning_rate": 0.00013628370457209846, "loss": 4.086780548095703, "step": 775, "token_acc": 0.21148281264399593 }, { "epoch": 0.454998534154207, "grad_norm": 1.8095497490989678, "learning_rate": 0.00013645955451348183, "loss": 4.035080909729004, "step": 776, "token_acc": 0.21685192353776106 }, { "epoch": 0.455584872471416, "grad_norm": 2.318434675374333, "learning_rate": 0.00013663540445486518, "loss": 4.0308427810668945, "step": 777, "token_acc": 0.21576059770324024 }, { "epoch": 0.45617121078862505, "grad_norm": 2.0287197433760653, "learning_rate": 0.0001368112543962485, "loss": 4.029142379760742, "step": 778, "token_acc": 0.21721333636137516 }, { "epoch": 0.45675754910583405, "grad_norm": 2.475126604314923, "learning_rate": 0.00013698710433763186, "loss": 4.030572891235352, "step": 779, "token_acc": 0.21645246257494938 }, { "epoch": 0.4573438874230431, "grad_norm": 1.7161855550958882, "learning_rate": 0.00013716295427901524, "loss": 4.023125648498535, "step": 780, "token_acc": 0.2155937526460423 }, { "epoch": 0.4579302257402521, "grad_norm": 2.3769327424182167, "learning_rate": 0.0001373388042203986, "loss": 4.012986183166504, "step": 781, "token_acc": 0.2186782110085776 }, { "epoch": 0.45851656405746116, "grad_norm": 2.0624369022885913, "learning_rate": 0.00013751465416178194, "loss": 4.023934364318848, "step": 782, "token_acc": 0.21677102767909628 }, { "epoch": 0.45910290237467016, "grad_norm": 2.177195634816114, "learning_rate": 0.0001376905041031653, "loss": 4.044404029846191, "step": 783, "token_acc": 0.21445519159211984 }, { "epoch": 0.4596892406918792, "grad_norm": 2.2563262707201686, "learning_rate": 0.00013786635404454864, "loss": 4.055820941925049, "step": 784, "token_acc": 0.21395694057232478 }, { "epoch": 0.4602755790090882, "grad_norm": 2.4828346512876682, "learning_rate": 0.000138042203985932, "loss": 4.045603275299072, "step": 785, "token_acc": 0.21446117121626304 }, { "epoch": 0.4608619173262973, "grad_norm": 2.042905256187356, "learning_rate": 0.00013821805392731534, "loss": 3.9925954341888428, "step": 786, "token_acc": 0.2192796224314044 }, { "epoch": 0.4614482556435063, "grad_norm": 2.1699196072038323, "learning_rate": 0.0001383939038686987, "loss": 4.077231407165527, "step": 787, "token_acc": 0.210341239740475 }, { "epoch": 0.46203459396071533, "grad_norm": 1.668827224585959, "learning_rate": 0.00013856975381008204, "loss": 4.014451026916504, "step": 788, "token_acc": 0.21867900758189443 }, { "epoch": 0.46262093227792433, "grad_norm": 2.4213183982831046, "learning_rate": 0.0001387456037514654, "loss": 4.035038948059082, "step": 789, "token_acc": 0.21588062358957372 }, { "epoch": 0.4632072705951334, "grad_norm": 1.8628530124538194, "learning_rate": 0.00013892145369284875, "loss": 4.022771835327148, "step": 790, "token_acc": 0.21723966118584953 }, { "epoch": 0.46379360891234245, "grad_norm": 2.1894736172037446, "learning_rate": 0.00013909730363423212, "loss": 4.059389591217041, "step": 791, "token_acc": 0.21307001231043243 }, { "epoch": 0.46437994722955145, "grad_norm": 1.875901227982124, "learning_rate": 0.00013927315357561547, "loss": 3.989318370819092, "step": 792, "token_acc": 0.22080802675585284 }, { "epoch": 0.4649662855467605, "grad_norm": 2.6561148005205633, "learning_rate": 0.00013944900351699883, "loss": 4.044867992401123, "step": 793, "token_acc": 0.21448236026615256 }, { "epoch": 0.4655526238639695, "grad_norm": 1.7302015259189907, "learning_rate": 0.00013962485345838218, "loss": 4.015591621398926, "step": 794, "token_acc": 0.21665450972049155 }, { "epoch": 0.46613896218117856, "grad_norm": 2.3466255090535415, "learning_rate": 0.00013980070339976553, "loss": 4.007062911987305, "step": 795, "token_acc": 0.21982217586273153 }, { "epoch": 0.46672530049838756, "grad_norm": 2.0480849198278555, "learning_rate": 0.00013997655334114888, "loss": 4.026554107666016, "step": 796, "token_acc": 0.21854175107224186 }, { "epoch": 0.4673116388155966, "grad_norm": 2.100109175539408, "learning_rate": 0.00014015240328253223, "loss": 4.008111953735352, "step": 797, "token_acc": 0.2188090430190523 }, { "epoch": 0.4678979771328056, "grad_norm": 2.415989266838997, "learning_rate": 0.00014032825322391558, "loss": 3.945232629776001, "step": 798, "token_acc": 0.22416460155930695 }, { "epoch": 0.4684843154500147, "grad_norm": 1.7068560656200347, "learning_rate": 0.00014050410316529893, "loss": 4.011207580566406, "step": 799, "token_acc": 0.21589051259290665 }, { "epoch": 0.4690706537672237, "grad_norm": 2.1426999987947446, "learning_rate": 0.00014067995310668228, "loss": 4.03364372253418, "step": 800, "token_acc": 0.21555533808562363 }, { "epoch": 0.46965699208443273, "grad_norm": 1.8094369551718885, "learning_rate": 0.00014085580304806563, "loss": 4.028426170349121, "step": 801, "token_acc": 0.21638462670394498 }, { "epoch": 0.47024333040164173, "grad_norm": 1.845072807159866, "learning_rate": 0.00014103165298944898, "loss": 4.014816761016846, "step": 802, "token_acc": 0.21553700107652607 }, { "epoch": 0.4708296687188508, "grad_norm": 2.2812061529373335, "learning_rate": 0.00014120750293083236, "loss": 4.042592525482178, "step": 803, "token_acc": 0.21553874828794584 }, { "epoch": 0.4714160070360598, "grad_norm": 1.702609737313762, "learning_rate": 0.0001413833528722157, "loss": 4.070908546447754, "step": 804, "token_acc": 0.21147798536627446 }, { "epoch": 0.47200234535326885, "grad_norm": 2.5034058601638933, "learning_rate": 0.00014155920281359904, "loss": 4.0488786697387695, "step": 805, "token_acc": 0.21286942351624663 }, { "epoch": 0.47258868367047785, "grad_norm": 1.7491534141236686, "learning_rate": 0.0001417350527549824, "loss": 3.9564085006713867, "step": 806, "token_acc": 0.22351506149152733 }, { "epoch": 0.4731750219876869, "grad_norm": 2.6368328801590466, "learning_rate": 0.00014191090269636576, "loss": 4.078041076660156, "step": 807, "token_acc": 0.2092960879751978 }, { "epoch": 0.4737613603048959, "grad_norm": 1.7049492759816158, "learning_rate": 0.00014208675263774912, "loss": 4.031122207641602, "step": 808, "token_acc": 0.21409589593522838 }, { "epoch": 0.47434769862210496, "grad_norm": 2.175849449615241, "learning_rate": 0.00014226260257913247, "loss": 4.014800071716309, "step": 809, "token_acc": 0.21730702544387703 }, { "epoch": 0.47493403693931396, "grad_norm": 1.6008167829662476, "learning_rate": 0.00014243845252051582, "loss": 3.988290786743164, "step": 810, "token_acc": 0.2181024523077002 }, { "epoch": 0.475520375256523, "grad_norm": 1.9951804005247435, "learning_rate": 0.00014261430246189917, "loss": 4.050525188446045, "step": 811, "token_acc": 0.21434982865196386 }, { "epoch": 0.476106713573732, "grad_norm": 1.9175919238515464, "learning_rate": 0.00014279015240328252, "loss": 4.008174419403076, "step": 812, "token_acc": 0.2191672900319906 }, { "epoch": 0.4766930518909411, "grad_norm": 2.287598029487112, "learning_rate": 0.00014296600234466587, "loss": 3.989607334136963, "step": 813, "token_acc": 0.21782770163820522 }, { "epoch": 0.4772793902081501, "grad_norm": 2.0477310229223096, "learning_rate": 0.00014314185228604922, "loss": 4.016474723815918, "step": 814, "token_acc": 0.21551566633214794 }, { "epoch": 0.47786572852535913, "grad_norm": 1.7085568657747554, "learning_rate": 0.00014331770222743257, "loss": 4.027948379516602, "step": 815, "token_acc": 0.21404618439295173 }, { "epoch": 0.47845206684256814, "grad_norm": 2.110394743573738, "learning_rate": 0.00014349355216881592, "loss": 4.040744781494141, "step": 816, "token_acc": 0.21310861903236142 }, { "epoch": 0.4790384051597772, "grad_norm": 2.2496327045668245, "learning_rate": 0.00014366940211019927, "loss": 4.01626443862915, "step": 817, "token_acc": 0.21752420540376136 }, { "epoch": 0.47962474347698625, "grad_norm": 1.8336540992901764, "learning_rate": 0.00014384525205158262, "loss": 3.9970903396606445, "step": 818, "token_acc": 0.21812795487287706 }, { "epoch": 0.48021108179419525, "grad_norm": 1.538716532624314, "learning_rate": 0.000144021101992966, "loss": 3.980809450149536, "step": 819, "token_acc": 0.2216139102236775 }, { "epoch": 0.4807974201114043, "grad_norm": 1.8764621654293472, "learning_rate": 0.00014419695193434935, "loss": 3.9870188236236572, "step": 820, "token_acc": 0.2181704702999422 }, { "epoch": 0.4813837584286133, "grad_norm": 2.1105175838345693, "learning_rate": 0.0001443728018757327, "loss": 4.027533531188965, "step": 821, "token_acc": 0.21529521725956624 }, { "epoch": 0.48197009674582236, "grad_norm": 1.9414430591555694, "learning_rate": 0.00014454865181711603, "loss": 4.012726783752441, "step": 822, "token_acc": 0.21794045963291417 }, { "epoch": 0.48255643506303136, "grad_norm": 1.7616903861393867, "learning_rate": 0.0001447245017584994, "loss": 3.963686943054199, "step": 823, "token_acc": 0.22228820780293837 }, { "epoch": 0.4831427733802404, "grad_norm": 1.9193893003192424, "learning_rate": 0.00014490035169988276, "loss": 3.987231731414795, "step": 824, "token_acc": 0.21857036266380822 }, { "epoch": 0.4837291116974494, "grad_norm": 2.8079381968028243, "learning_rate": 0.0001450762016412661, "loss": 4.059203147888184, "step": 825, "token_acc": 0.21176744366443004 }, { "epoch": 0.4843154500146585, "grad_norm": 1.321918916420202, "learning_rate": 0.00014525205158264946, "loss": 3.9564807415008545, "step": 826, "token_acc": 0.22537002932026354 }, { "epoch": 0.4849017883318675, "grad_norm": 2.1831183683445126, "learning_rate": 0.0001454279015240328, "loss": 3.943370819091797, "step": 827, "token_acc": 0.22395926813560396 }, { "epoch": 0.48548812664907653, "grad_norm": 1.8073220827352843, "learning_rate": 0.00014560375146541616, "loss": 4.050217628479004, "step": 828, "token_acc": 0.21223494574670754 }, { "epoch": 0.48607446496628554, "grad_norm": 2.0773636021346413, "learning_rate": 0.0001457796014067995, "loss": 4.001766204833984, "step": 829, "token_acc": 0.21755831781842078 }, { "epoch": 0.4866608032834946, "grad_norm": 1.8394367672718561, "learning_rate": 0.0001459554513481829, "loss": 3.990412712097168, "step": 830, "token_acc": 0.2188863208288564 }, { "epoch": 0.4872471416007036, "grad_norm": 1.9226135370128277, "learning_rate": 0.0001461313012895662, "loss": 3.954904556274414, "step": 831, "token_acc": 0.2220279589095575 }, { "epoch": 0.48783347991791265, "grad_norm": 2.1943672410085626, "learning_rate": 0.00014630715123094956, "loss": 4.047689914703369, "step": 832, "token_acc": 0.21354716951744082 }, { "epoch": 0.48841981823512165, "grad_norm": 1.6164406326378484, "learning_rate": 0.00014648300117233291, "loss": 3.950265645980835, "step": 833, "token_acc": 0.22424753026151406 }, { "epoch": 0.4890061565523307, "grad_norm": 2.4966985525759995, "learning_rate": 0.0001466588511137163, "loss": 3.9795613288879395, "step": 834, "token_acc": 0.21804594930371415 }, { "epoch": 0.4895924948695397, "grad_norm": 1.60918508625833, "learning_rate": 0.00014683470105509964, "loss": 3.9786455631256104, "step": 835, "token_acc": 0.2197452313378655 }, { "epoch": 0.49017883318674876, "grad_norm": 1.6759984207154885, "learning_rate": 0.000147010550996483, "loss": 3.938727378845215, "step": 836, "token_acc": 0.2215581564308025 }, { "epoch": 0.49076517150395776, "grad_norm": 2.1113533339907185, "learning_rate": 0.00014718640093786635, "loss": 4.035755157470703, "step": 837, "token_acc": 0.21481765655822055 }, { "epoch": 0.4913515098211668, "grad_norm": 2.0609562966744095, "learning_rate": 0.0001473622508792497, "loss": 3.9650158882141113, "step": 838, "token_acc": 0.222173579109063 }, { "epoch": 0.4919378481383758, "grad_norm": 1.791600221391203, "learning_rate": 0.00014753810082063305, "loss": 3.995253324508667, "step": 839, "token_acc": 0.21673336504024238 }, { "epoch": 0.4925241864555849, "grad_norm": 2.4579608974993046, "learning_rate": 0.0001477139507620164, "loss": 3.959195613861084, "step": 840, "token_acc": 0.22332642714560785 }, { "epoch": 0.4931105247727939, "grad_norm": 1.6621475859581218, "learning_rate": 0.00014788980070339975, "loss": 3.998112678527832, "step": 841, "token_acc": 0.21640121066498919 }, { "epoch": 0.49369686309000294, "grad_norm": 1.469163654760437, "learning_rate": 0.0001480656506447831, "loss": 3.97268009185791, "step": 842, "token_acc": 0.22018389029796032 }, { "epoch": 0.49428320140721194, "grad_norm": 2.30500065737085, "learning_rate": 0.00014824150058616645, "loss": 4.016376495361328, "step": 843, "token_acc": 0.2150182527541018 }, { "epoch": 0.494869539724421, "grad_norm": 1.7887059826943665, "learning_rate": 0.0001484173505275498, "loss": 3.9573819637298584, "step": 844, "token_acc": 0.22371168462373675 }, { "epoch": 0.49545587804163, "grad_norm": 2.3988451252707566, "learning_rate": 0.00014859320046893315, "loss": 3.932079315185547, "step": 845, "token_acc": 0.2243850140891421 }, { "epoch": 0.49604221635883905, "grad_norm": 1.5338282934520788, "learning_rate": 0.00014876905041031653, "loss": 3.955700397491455, "step": 846, "token_acc": 0.21945676654977947 }, { "epoch": 0.4966285546760481, "grad_norm": 1.9250776311428506, "learning_rate": 0.00014894490035169988, "loss": 4.006363868713379, "step": 847, "token_acc": 0.21841865037567587 }, { "epoch": 0.4972148929932571, "grad_norm": 1.4048102487483758, "learning_rate": 0.00014912075029308323, "loss": 3.9912896156311035, "step": 848, "token_acc": 0.21928597364317906 }, { "epoch": 0.49780123131046616, "grad_norm": 2.1756339607300013, "learning_rate": 0.00014929660023446656, "loss": 4.036858558654785, "step": 849, "token_acc": 0.21234694548563163 }, { "epoch": 0.49838756962767516, "grad_norm": 1.5716772729800585, "learning_rate": 0.00014947245017584993, "loss": 4.011584281921387, "step": 850, "token_acc": 0.2162639758639049 }, { "epoch": 0.4989739079448842, "grad_norm": 1.949847969488178, "learning_rate": 0.00014964830011723329, "loss": 3.9427664279937744, "step": 851, "token_acc": 0.22413635735390378 }, { "epoch": 0.4995602462620932, "grad_norm": 1.9896352603864982, "learning_rate": 0.00014982415005861664, "loss": 4.000300407409668, "step": 852, "token_acc": 0.21806170210052417 }, { "epoch": 0.5001465845793023, "grad_norm": 1.7644891475355713, "learning_rate": 0.00015, "loss": 3.979973793029785, "step": 853, "token_acc": 0.22028406398483627 }, { "epoch": 0.5007329228965113, "grad_norm": 2.2017802075397044, "learning_rate": 0.00015017584994138334, "loss": 3.9872255325317383, "step": 854, "token_acc": 0.21750855616181755 }, { "epoch": 0.5013192612137203, "grad_norm": 1.9348175449343203, "learning_rate": 0.0001503516998827667, "loss": 4.005254745483398, "step": 855, "token_acc": 0.21751924966692182 }, { "epoch": 0.5019055995309294, "grad_norm": 1.8726980600246896, "learning_rate": 0.00015052754982415004, "loss": 3.987555980682373, "step": 856, "token_acc": 0.21767533062651193 }, { "epoch": 0.5024919378481384, "grad_norm": 1.990956672328526, "learning_rate": 0.00015070339976553342, "loss": 3.960458517074585, "step": 857, "token_acc": 0.22118701835268162 }, { "epoch": 0.5030782761653474, "grad_norm": 1.8878931629909304, "learning_rate": 0.00015087924970691677, "loss": 3.9807324409484863, "step": 858, "token_acc": 0.21681157292974515 }, { "epoch": 0.5036646144825564, "grad_norm": 2.1481393464654372, "learning_rate": 0.00015105509964830012, "loss": 4.004087448120117, "step": 859, "token_acc": 0.21796587704822173 }, { "epoch": 0.5042509527997655, "grad_norm": 1.704303098304262, "learning_rate": 0.00015123094958968347, "loss": 3.969203472137451, "step": 860, "token_acc": 0.21856859915087284 }, { "epoch": 0.5048372911169745, "grad_norm": 1.7188916071284426, "learning_rate": 0.00015140679953106682, "loss": 3.929818630218506, "step": 861, "token_acc": 0.22368165993378253 }, { "epoch": 0.5054236294341835, "grad_norm": 1.8650604888006674, "learning_rate": 0.00015158264947245014, "loss": 3.9721601009368896, "step": 862, "token_acc": 0.219752282996307 }, { "epoch": 0.5060099677513925, "grad_norm": 2.0548758658245267, "learning_rate": 0.0001517584994138335, "loss": 3.990267276763916, "step": 863, "token_acc": 0.21548263359781408 }, { "epoch": 0.5065963060686016, "grad_norm": 1.5728431121297184, "learning_rate": 0.00015193434935521685, "loss": 4.012087345123291, "step": 864, "token_acc": 0.21419306105438632 }, { "epoch": 0.5071826443858106, "grad_norm": 1.8105860775151201, "learning_rate": 0.0001521101992966002, "loss": 3.946206569671631, "step": 865, "token_acc": 0.2221922390484231 }, { "epoch": 0.5077689827030196, "grad_norm": 1.5569200935759993, "learning_rate": 0.00015228604923798358, "loss": 3.9780349731445312, "step": 866, "token_acc": 0.219986240560004 }, { "epoch": 0.5083553210202286, "grad_norm": 1.8787609377561236, "learning_rate": 0.00015246189917936693, "loss": 3.9243876934051514, "step": 867, "token_acc": 0.22371271038121932 }, { "epoch": 0.5089416593374377, "grad_norm": 1.6512207327099913, "learning_rate": 0.00015263774912075028, "loss": 3.876498222351074, "step": 868, "token_acc": 0.22979409141109505 }, { "epoch": 0.5095279976546467, "grad_norm": 1.756021093611537, "learning_rate": 0.00015281359906213363, "loss": 3.99798846244812, "step": 869, "token_acc": 0.21573954844783863 }, { "epoch": 0.5101143359718557, "grad_norm": 1.718331963623993, "learning_rate": 0.00015298944900351698, "loss": 3.944492816925049, "step": 870, "token_acc": 0.2218630920848912 }, { "epoch": 0.5107006742890647, "grad_norm": 1.7128046050177161, "learning_rate": 0.00015316529894490033, "loss": 3.983834743499756, "step": 871, "token_acc": 0.21704382516494622 }, { "epoch": 0.5112870126062738, "grad_norm": 1.8836001278405181, "learning_rate": 0.00015334114888628368, "loss": 3.883991003036499, "step": 872, "token_acc": 0.23023657659451197 }, { "epoch": 0.5118733509234829, "grad_norm": 1.8942648121354546, "learning_rate": 0.00015351699882766706, "loss": 3.985807180404663, "step": 873, "token_acc": 0.21714366547539296 }, { "epoch": 0.5124596892406919, "grad_norm": 2.0609604959415804, "learning_rate": 0.0001536928487690504, "loss": 3.9133596420288086, "step": 874, "token_acc": 0.2254088136198552 }, { "epoch": 0.513046027557901, "grad_norm": 1.7925070501652656, "learning_rate": 0.00015386869871043376, "loss": 3.8894381523132324, "step": 875, "token_acc": 0.22630443301120085 }, { "epoch": 0.51363236587511, "grad_norm": 1.979811186356441, "learning_rate": 0.0001540445486518171, "loss": 3.910457134246826, "step": 876, "token_acc": 0.22494236041874377 }, { "epoch": 0.514218704192319, "grad_norm": 2.2961128449422707, "learning_rate": 0.00015422039859320046, "loss": 3.9257307052612305, "step": 877, "token_acc": 0.2224986092346817 }, { "epoch": 0.514805042509528, "grad_norm": 2.08409492393019, "learning_rate": 0.0001543962485345838, "loss": 3.9230222702026367, "step": 878, "token_acc": 0.2227323400969341 }, { "epoch": 0.5153913808267371, "grad_norm": 1.8807552297427133, "learning_rate": 0.00015457209847596716, "loss": 3.9706668853759766, "step": 879, "token_acc": 0.2191473859669354 }, { "epoch": 0.5159777191439461, "grad_norm": 2.150173111629756, "learning_rate": 0.0001547479484173505, "loss": 3.8807334899902344, "step": 880, "token_acc": 0.22980057458007425 }, { "epoch": 0.5165640574611551, "grad_norm": 1.7047678488505607, "learning_rate": 0.00015492379835873387, "loss": 3.9462296962738037, "step": 881, "token_acc": 0.22053098532321688 }, { "epoch": 0.5171503957783641, "grad_norm": 1.9234628884001572, "learning_rate": 0.00015509964830011722, "loss": 3.891421318054199, "step": 882, "token_acc": 0.22498364604857382 }, { "epoch": 0.5177367340955732, "grad_norm": 1.7389757320470693, "learning_rate": 0.00015527549824150057, "loss": 3.930666446685791, "step": 883, "token_acc": 0.22066262914143214 }, { "epoch": 0.5183230724127822, "grad_norm": 2.142997139557562, "learning_rate": 0.00015545134818288392, "loss": 3.8838438987731934, "step": 884, "token_acc": 0.22805118803282837 }, { "epoch": 0.5189094107299912, "grad_norm": 1.68542605805146, "learning_rate": 0.00015562719812426727, "loss": 3.9110770225524902, "step": 885, "token_acc": 0.22470971281123966 }, { "epoch": 0.5194957490472002, "grad_norm": 2.198383007322505, "learning_rate": 0.00015580304806565062, "loss": 3.9386661052703857, "step": 886, "token_acc": 0.22072635121799747 }, { "epoch": 0.5200820873644093, "grad_norm": 1.460136455490652, "learning_rate": 0.00015597889800703397, "loss": 3.934483528137207, "step": 887, "token_acc": 0.2227731440213931 }, { "epoch": 0.5206684256816183, "grad_norm": 2.0341208810793736, "learning_rate": 0.00015615474794841735, "loss": 3.849897861480713, "step": 888, "token_acc": 0.23172450308457923 }, { "epoch": 0.5212547639988273, "grad_norm": 1.7702890794575363, "learning_rate": 0.0001563305978898007, "loss": 3.8413984775543213, "step": 889, "token_acc": 0.23266234041201753 }, { "epoch": 0.5218411023160363, "grad_norm": 1.3172823022436198, "learning_rate": 0.00015650644783118405, "loss": 4.023441791534424, "step": 890, "token_acc": 0.2140231777076869 }, { "epoch": 0.5224274406332454, "grad_norm": 1.7987630372430532, "learning_rate": 0.0001566822977725674, "loss": 3.9418463706970215, "step": 891, "token_acc": 0.21902007098738144 }, { "epoch": 0.5230137789504544, "grad_norm": 1.5457812760931215, "learning_rate": 0.00015685814771395075, "loss": 3.8915979862213135, "step": 892, "token_acc": 0.22530290641492604 }, { "epoch": 0.5236001172676634, "grad_norm": 1.8524408684101006, "learning_rate": 0.0001570339976553341, "loss": 3.956329822540283, "step": 893, "token_acc": 0.21877838663433324 }, { "epoch": 0.5241864555848724, "grad_norm": 1.6202311706340022, "learning_rate": 0.00015720984759671745, "loss": 3.970872163772583, "step": 894, "token_acc": 0.2151157834919733 }, { "epoch": 0.5247727939020815, "grad_norm": 1.7296810237840348, "learning_rate": 0.0001573856975381008, "loss": 3.884525775909424, "step": 895, "token_acc": 0.22611281955704657 }, { "epoch": 0.5253591322192905, "grad_norm": 1.9700508466213726, "learning_rate": 0.00015756154747948418, "loss": 3.886685371398926, "step": 896, "token_acc": 0.22721692152073364 }, { "epoch": 0.5259454705364995, "grad_norm": 1.6643932546054272, "learning_rate": 0.00015773739742086753, "loss": 3.9216156005859375, "step": 897, "token_acc": 0.22382920825500358 }, { "epoch": 0.5265318088537085, "grad_norm": 2.0422673315712836, "learning_rate": 0.00015791324736225086, "loss": 3.9734396934509277, "step": 898, "token_acc": 0.21721680088589893 }, { "epoch": 0.5271181471709177, "grad_norm": 1.8285500570488256, "learning_rate": 0.0001580890973036342, "loss": 3.9177701473236084, "step": 899, "token_acc": 0.22320209577067432 }, { "epoch": 0.5277044854881267, "grad_norm": 1.892132984584803, "learning_rate": 0.00015826494724501756, "loss": 3.8765602111816406, "step": 900, "token_acc": 0.2274972571764112 }, { "epoch": 0.5282908238053357, "grad_norm": 2.0480632835267034, "learning_rate": 0.0001584407971864009, "loss": 3.8768796920776367, "step": 901, "token_acc": 0.22793550519704706 }, { "epoch": 0.5288771621225447, "grad_norm": 1.8469982288869577, "learning_rate": 0.00015861664712778426, "loss": 3.9207754135131836, "step": 902, "token_acc": 0.2237546468401487 }, { "epoch": 0.5294635004397538, "grad_norm": 1.8078244233765264, "learning_rate": 0.0001587924970691676, "loss": 3.962942361831665, "step": 903, "token_acc": 0.21719667952510122 }, { "epoch": 0.5300498387569628, "grad_norm": 2.0112665418099884, "learning_rate": 0.000158968347010551, "loss": 3.9658801555633545, "step": 904, "token_acc": 0.21717460219436382 }, { "epoch": 0.5306361770741718, "grad_norm": 1.5803050145359165, "learning_rate": 0.00015914419695193434, "loss": 3.930394172668457, "step": 905, "token_acc": 0.2199539958041269 }, { "epoch": 0.5312225153913809, "grad_norm": 2.265693151130477, "learning_rate": 0.0001593200468933177, "loss": 3.926424264907837, "step": 906, "token_acc": 0.22296035584276616 }, { "epoch": 0.5318088537085899, "grad_norm": 1.6345979565422217, "learning_rate": 0.00015949589683470104, "loss": 3.871497392654419, "step": 907, "token_acc": 0.22907294205388415 }, { "epoch": 0.5323951920257989, "grad_norm": 1.8870322544597933, "learning_rate": 0.0001596717467760844, "loss": 3.909820079803467, "step": 908, "token_acc": 0.22357154341252924 }, { "epoch": 0.5329815303430079, "grad_norm": 1.7884835913229296, "learning_rate": 0.00015984759671746774, "loss": 3.866403818130493, "step": 909, "token_acc": 0.2279854950392159 }, { "epoch": 0.533567868660217, "grad_norm": 1.6228249739147715, "learning_rate": 0.0001600234466588511, "loss": 3.926779270172119, "step": 910, "token_acc": 0.22188930077975202 }, { "epoch": 0.534154206977426, "grad_norm": 2.0371392394497576, "learning_rate": 0.00016019929660023447, "loss": 3.983499526977539, "step": 911, "token_acc": 0.21486293120309652 }, { "epoch": 0.534740545294635, "grad_norm": 1.9346715671081824, "learning_rate": 0.00016037514654161782, "loss": 3.9338266849517822, "step": 912, "token_acc": 0.2199525671567353 }, { "epoch": 0.535326883611844, "grad_norm": 1.6310952632069726, "learning_rate": 0.00016055099648300118, "loss": 3.907869577407837, "step": 913, "token_acc": 0.22508152728216785 }, { "epoch": 0.5359132219290531, "grad_norm": 1.6824459491486203, "learning_rate": 0.00016072684642438453, "loss": 3.873809337615967, "step": 914, "token_acc": 0.2271099704744425 }, { "epoch": 0.5364995602462621, "grad_norm": 1.7378065073842361, "learning_rate": 0.00016090269636576788, "loss": 3.869752883911133, "step": 915, "token_acc": 0.22676870176028938 }, { "epoch": 0.5370858985634711, "grad_norm": 2.2672502519030124, "learning_rate": 0.0001610785463071512, "loss": 3.85251522064209, "step": 916, "token_acc": 0.22783467121248743 }, { "epoch": 0.5376722368806801, "grad_norm": 1.4910400619078958, "learning_rate": 0.00016125439624853455, "loss": 3.8842687606811523, "step": 917, "token_acc": 0.22513684256666794 }, { "epoch": 0.5382585751978892, "grad_norm": 2.1919057211015858, "learning_rate": 0.0001614302461899179, "loss": 3.872668981552124, "step": 918, "token_acc": 0.22686832740213522 }, { "epoch": 0.5388449135150982, "grad_norm": 1.6830724282109406, "learning_rate": 0.00016160609613130125, "loss": 3.909123420715332, "step": 919, "token_acc": 0.22325362460046513 }, { "epoch": 0.5394312518323072, "grad_norm": 2.350468600195527, "learning_rate": 0.00016178194607268463, "loss": 3.887495279312134, "step": 920, "token_acc": 0.22444137889215657 }, { "epoch": 0.5400175901495162, "grad_norm": 1.4240916291357377, "learning_rate": 0.00016195779601406798, "loss": 3.901359796524048, "step": 921, "token_acc": 0.22152898734381993 }, { "epoch": 0.5406039284667253, "grad_norm": 2.5578139137799214, "learning_rate": 0.00016213364595545133, "loss": 3.8996005058288574, "step": 922, "token_acc": 0.2224880568208536 }, { "epoch": 0.5411902667839343, "grad_norm": 1.654339863655892, "learning_rate": 0.00016230949589683468, "loss": 3.8932442665100098, "step": 923, "token_acc": 0.22398950144584012 }, { "epoch": 0.5417766051011433, "grad_norm": 2.305658898130935, "learning_rate": 0.00016248534583821804, "loss": 3.9399142265319824, "step": 924, "token_acc": 0.21933216359067673 }, { "epoch": 0.5423629434183523, "grad_norm": 1.5671466964981424, "learning_rate": 0.00016266119577960139, "loss": 3.9029433727264404, "step": 925, "token_acc": 0.2244351100811124 }, { "epoch": 0.5429492817355615, "grad_norm": 1.4155163165349693, "learning_rate": 0.00016283704572098474, "loss": 3.8714826107025146, "step": 926, "token_acc": 0.22622458346082167 }, { "epoch": 0.5435356200527705, "grad_norm": 1.8090848368862913, "learning_rate": 0.00016301289566236812, "loss": 3.9201483726501465, "step": 927, "token_acc": 0.22064769828416583 }, { "epoch": 0.5441219583699795, "grad_norm": 1.7077767009824116, "learning_rate": 0.00016318874560375147, "loss": 3.915027141571045, "step": 928, "token_acc": 0.21957110632731466 }, { "epoch": 0.5447082966871885, "grad_norm": 2.3053761811942692, "learning_rate": 0.00016336459554513482, "loss": 3.8680150508880615, "step": 929, "token_acc": 0.22908105340198773 }, { "epoch": 0.5452946350043976, "grad_norm": 1.7038704881339632, "learning_rate": 0.00016354044548651817, "loss": 3.8649606704711914, "step": 930, "token_acc": 0.22599985395977634 }, { "epoch": 0.5458809733216066, "grad_norm": 1.9590465692716896, "learning_rate": 0.00016371629542790152, "loss": 3.916379928588867, "step": 931, "token_acc": 0.22121331822576118 }, { "epoch": 0.5464673116388156, "grad_norm": 2.107097180155753, "learning_rate": 0.00016389214536928487, "loss": 3.860433578491211, "step": 932, "token_acc": 0.22758102385476603 }, { "epoch": 0.5470536499560247, "grad_norm": 2.020049755319126, "learning_rate": 0.0001640679953106682, "loss": 3.876265048980713, "step": 933, "token_acc": 0.2249887457394585 }, { "epoch": 0.5476399882732337, "grad_norm": 1.8651374267246572, "learning_rate": 0.00016424384525205154, "loss": 3.887596368789673, "step": 934, "token_acc": 0.22486311756928515 }, { "epoch": 0.5482263265904427, "grad_norm": 1.6270706204818088, "learning_rate": 0.0001644196951934349, "loss": 3.7863473892211914, "step": 935, "token_acc": 0.23526055088422362 }, { "epoch": 0.5488126649076517, "grad_norm": 1.8790362280306379, "learning_rate": 0.00016459554513481827, "loss": 3.892228603363037, "step": 936, "token_acc": 0.22367166868935198 }, { "epoch": 0.5493990032248608, "grad_norm": 1.6262894298631754, "learning_rate": 0.00016477139507620162, "loss": 3.8819103240966797, "step": 937, "token_acc": 0.22461407509378842 }, { "epoch": 0.5499853415420698, "grad_norm": 1.6835260779443002, "learning_rate": 0.00016494724501758497, "loss": 3.8713810443878174, "step": 938, "token_acc": 0.22596033205135427 }, { "epoch": 0.5505716798592788, "grad_norm": 1.6139327305337157, "learning_rate": 0.00016512309495896833, "loss": 3.8582606315612793, "step": 939, "token_acc": 0.22724204171955678 }, { "epoch": 0.5511580181764878, "grad_norm": 1.6328977273897982, "learning_rate": 0.00016529894490035168, "loss": 3.918926239013672, "step": 940, "token_acc": 0.21781373723104538 }, { "epoch": 0.5517443564936969, "grad_norm": 2.540165440159551, "learning_rate": 0.00016547479484173503, "loss": 3.8559579849243164, "step": 941, "token_acc": 0.2290715995555544 }, { "epoch": 0.5523306948109059, "grad_norm": 1.6070015667916382, "learning_rate": 0.00016565064478311838, "loss": 3.8953933715820312, "step": 942, "token_acc": 0.2232832507228418 }, { "epoch": 0.5529170331281149, "grad_norm": 2.6087940129834606, "learning_rate": 0.00016582649472450176, "loss": 3.8771557807922363, "step": 943, "token_acc": 0.22429230231061525 }, { "epoch": 0.5535033714453239, "grad_norm": 1.6772942708898626, "learning_rate": 0.0001660023446658851, "loss": 3.8471951484680176, "step": 944, "token_acc": 0.22528464797687553 }, { "epoch": 0.554089709762533, "grad_norm": 2.1197199190074536, "learning_rate": 0.00016617819460726846, "loss": 3.889328956604004, "step": 945, "token_acc": 0.22346470983584693 }, { "epoch": 0.554676048079742, "grad_norm": 1.446835203621188, "learning_rate": 0.0001663540445486518, "loss": 3.8504536151885986, "step": 946, "token_acc": 0.22807207216709605 }, { "epoch": 0.555262386396951, "grad_norm": 2.0554873327067367, "learning_rate": 0.00016652989449003516, "loss": 3.887838840484619, "step": 947, "token_acc": 0.22247060174613476 }, { "epoch": 0.55584872471416, "grad_norm": 1.4985531094150735, "learning_rate": 0.0001667057444314185, "loss": 3.8691744804382324, "step": 948, "token_acc": 0.22617778441134767 }, { "epoch": 0.5564350630313691, "grad_norm": 1.7896530441901979, "learning_rate": 0.00016688159437280186, "loss": 3.8619778156280518, "step": 949, "token_acc": 0.22637287973650477 }, { "epoch": 0.5570214013485781, "grad_norm": 1.6209176814096278, "learning_rate": 0.00016705744431418524, "loss": 3.883054256439209, "step": 950, "token_acc": 0.2237000823657571 }, { "epoch": 0.5576077396657871, "grad_norm": 2.033597766300496, "learning_rate": 0.00016723329425556856, "loss": 3.8718981742858887, "step": 951, "token_acc": 0.2251093113518304 }, { "epoch": 0.5581940779829961, "grad_norm": 2.042305998126389, "learning_rate": 0.00016740914419695191, "loss": 3.8430066108703613, "step": 952, "token_acc": 0.22733380394166275 }, { "epoch": 0.5587804163002053, "grad_norm": 1.4804159926717073, "learning_rate": 0.00016758499413833527, "loss": 3.889702796936035, "step": 953, "token_acc": 0.22307067936988267 }, { "epoch": 0.5593667546174143, "grad_norm": 1.9605146504877211, "learning_rate": 0.00016776084407971862, "loss": 3.805490732192993, "step": 954, "token_acc": 0.2299566384257852 }, { "epoch": 0.5599530929346233, "grad_norm": 1.329651864074579, "learning_rate": 0.00016793669402110197, "loss": 3.8096566200256348, "step": 955, "token_acc": 0.2306157819879429 }, { "epoch": 0.5605394312518323, "grad_norm": 2.1099994418681005, "learning_rate": 0.00016811254396248532, "loss": 3.752127170562744, "step": 956, "token_acc": 0.2341067109216535 }, { "epoch": 0.5611257695690414, "grad_norm": 1.6716877531090917, "learning_rate": 0.00016828839390386867, "loss": 3.792173147201538, "step": 957, "token_acc": 0.23246892109500805 }, { "epoch": 0.5617121078862504, "grad_norm": 1.776225589174403, "learning_rate": 0.00016846424384525205, "loss": 3.7778494358062744, "step": 958, "token_acc": 0.23393346090008563 }, { "epoch": 0.5622984462034594, "grad_norm": 2.0573797546130765, "learning_rate": 0.0001686400937866354, "loss": 3.80838680267334, "step": 959, "token_acc": 0.23018234395930243 }, { "epoch": 0.5628847845206685, "grad_norm": 2.627189945187957, "learning_rate": 0.00016881594372801875, "loss": 3.849771499633789, "step": 960, "token_acc": 0.22590771705431384 }, { "epoch": 0.5634711228378775, "grad_norm": 1.4022259144079376, "learning_rate": 0.0001689917936694021, "loss": 3.798753261566162, "step": 961, "token_acc": 0.23199926703602303 }, { "epoch": 0.5640574611550865, "grad_norm": 2.641507792972365, "learning_rate": 0.00016916764361078545, "loss": 3.8406167030334473, "step": 962, "token_acc": 0.22649577042069668 }, { "epoch": 0.5646437994722955, "grad_norm": 1.6975595373942463, "learning_rate": 0.0001693434935521688, "loss": 3.775881290435791, "step": 963, "token_acc": 0.2357917643034074 }, { "epoch": 0.5652301377895046, "grad_norm": 1.910419676601113, "learning_rate": 0.00016951934349355215, "loss": 3.7803802490234375, "step": 964, "token_acc": 0.2310104062999735 }, { "epoch": 0.5658164761067136, "grad_norm": 2.0456848023221528, "learning_rate": 0.00016969519343493553, "loss": 3.8123297691345215, "step": 965, "token_acc": 0.22908259662709057 }, { "epoch": 0.5664028144239226, "grad_norm": 2.0004497909113788, "learning_rate": 0.00016987104337631888, "loss": 3.8176627159118652, "step": 966, "token_acc": 0.23051391374577732 }, { "epoch": 0.5669891527411316, "grad_norm": 1.6749179829807868, "learning_rate": 0.00017004689331770223, "loss": 3.865586757659912, "step": 967, "token_acc": 0.22201220415835765 }, { "epoch": 0.5675754910583407, "grad_norm": 1.43314195297598, "learning_rate": 0.00017022274325908558, "loss": 3.8020176887512207, "step": 968, "token_acc": 0.22984268895728757 }, { "epoch": 0.5681618293755497, "grad_norm": 1.8604374951102054, "learning_rate": 0.0001703985932004689, "loss": 3.7938385009765625, "step": 969, "token_acc": 0.2336951736781865 }, { "epoch": 0.5687481676927587, "grad_norm": 2.058462608515411, "learning_rate": 0.00017057444314185226, "loss": 3.8569295406341553, "step": 970, "token_acc": 0.2216671669861014 }, { "epoch": 0.5693345060099677, "grad_norm": 1.5182828879918648, "learning_rate": 0.0001707502930832356, "loss": 3.7685647010803223, "step": 971, "token_acc": 0.23434252893801036 }, { "epoch": 0.5699208443271768, "grad_norm": 1.876005075206123, "learning_rate": 0.00017092614302461896, "loss": 3.760934829711914, "step": 972, "token_acc": 0.23547494620350445 }, { "epoch": 0.5705071826443858, "grad_norm": 1.5505384496975014, "learning_rate": 0.0001711019929660023, "loss": 3.8039684295654297, "step": 973, "token_acc": 0.22771953432640576 }, { "epoch": 0.5710935209615948, "grad_norm": 1.8705460505803075, "learning_rate": 0.0001712778429073857, "loss": 3.7606213092803955, "step": 974, "token_acc": 0.23188597824420837 }, { "epoch": 0.5716798592788038, "grad_norm": 1.7528434236491515, "learning_rate": 0.00017145369284876904, "loss": 3.761270523071289, "step": 975, "token_acc": 0.23444719557424942 }, { "epoch": 0.5722661975960129, "grad_norm": 2.0821731093703497, "learning_rate": 0.0001716295427901524, "loss": 3.7339396476745605, "step": 976, "token_acc": 0.2378353388722311 }, { "epoch": 0.5728525359132219, "grad_norm": 1.9691817522107247, "learning_rate": 0.00017180539273153574, "loss": 3.732114553451538, "step": 977, "token_acc": 0.238153206172103 }, { "epoch": 0.5734388742304309, "grad_norm": 2.4070037941293068, "learning_rate": 0.0001719812426729191, "loss": 3.7604122161865234, "step": 978, "token_acc": 0.23372010539116336 }, { "epoch": 0.5740252125476399, "grad_norm": 1.6260488737434329, "learning_rate": 0.00017215709261430244, "loss": 3.8127951622009277, "step": 979, "token_acc": 0.22550215952647382 }, { "epoch": 0.574611550864849, "grad_norm": 1.9366604197758266, "learning_rate": 0.0001723329425556858, "loss": 3.763200044631958, "step": 980, "token_acc": 0.23357217573221759 }, { "epoch": 0.575197889182058, "grad_norm": 2.0556119984650834, "learning_rate": 0.00017250879249706917, "loss": 3.8390207290649414, "step": 981, "token_acc": 0.2248840603714209 }, { "epoch": 0.575784227499267, "grad_norm": 1.772028914817992, "learning_rate": 0.00017268464243845252, "loss": 3.772087574005127, "step": 982, "token_acc": 0.23338080091563634 }, { "epoch": 0.576370565816476, "grad_norm": 1.4915034547494663, "learning_rate": 0.00017286049237983587, "loss": 3.791701078414917, "step": 983, "token_acc": 0.2299218870445552 }, { "epoch": 0.5769569041336852, "grad_norm": 1.7591559593946273, "learning_rate": 0.00017303634232121922, "loss": 3.71233868598938, "step": 984, "token_acc": 0.23830011919449615 }, { "epoch": 0.5775432424508942, "grad_norm": 1.7977195385366684, "learning_rate": 0.00017321219226260257, "loss": 3.7185568809509277, "step": 985, "token_acc": 0.23748368930705308 }, { "epoch": 0.5781295807681032, "grad_norm": 2.032938491014217, "learning_rate": 0.00017338804220398593, "loss": 3.732858657836914, "step": 986, "token_acc": 0.23777224019303106 }, { "epoch": 0.5787159190853123, "grad_norm": 1.6908459291620666, "learning_rate": 0.00017356389214536925, "loss": 3.807938814163208, "step": 987, "token_acc": 0.22720925697198205 }, { "epoch": 0.5793022574025213, "grad_norm": 1.9746385185012727, "learning_rate": 0.0001737397420867526, "loss": 3.789818286895752, "step": 988, "token_acc": 0.23036192354658744 }, { "epoch": 0.5798885957197303, "grad_norm": 1.7685715705150378, "learning_rate": 0.00017391559202813595, "loss": 3.7408945560455322, "step": 989, "token_acc": 0.23466769868750528 }, { "epoch": 0.5804749340369393, "grad_norm": 2.085513015081606, "learning_rate": 0.00017409144196951933, "loss": 3.779965400695801, "step": 990, "token_acc": 0.23080559853564314 }, { "epoch": 0.5810612723541484, "grad_norm": 1.97289393508748, "learning_rate": 0.00017426729191090268, "loss": 3.73659610748291, "step": 991, "token_acc": 0.23539004534212696 }, { "epoch": 0.5816476106713574, "grad_norm": 1.5719566316629405, "learning_rate": 0.00017444314185228603, "loss": 3.767144203186035, "step": 992, "token_acc": 0.231133577248078 }, { "epoch": 0.5822339489885664, "grad_norm": 2.221009110169234, "learning_rate": 0.00017461899179366938, "loss": 3.813162088394165, "step": 993, "token_acc": 0.22603453370172047 }, { "epoch": 0.5828202873057754, "grad_norm": 1.5729398388240954, "learning_rate": 0.00017479484173505273, "loss": 3.738103151321411, "step": 994, "token_acc": 0.23470266655182373 }, { "epoch": 0.5834066256229845, "grad_norm": 2.0343612603791104, "learning_rate": 0.00017497069167643608, "loss": 3.756946563720703, "step": 995, "token_acc": 0.2318222254497921 }, { "epoch": 0.5839929639401935, "grad_norm": 1.780455478426327, "learning_rate": 0.00017514654161781943, "loss": 3.763497829437256, "step": 996, "token_acc": 0.23103976772785306 }, { "epoch": 0.5845793022574025, "grad_norm": 1.6800980913892105, "learning_rate": 0.0001753223915592028, "loss": 3.753526210784912, "step": 997, "token_acc": 0.2321179654670696 }, { "epoch": 0.5851656405746115, "grad_norm": 2.265499265235378, "learning_rate": 0.00017549824150058616, "loss": 3.7691006660461426, "step": 998, "token_acc": 0.22900453955901426 }, { "epoch": 0.5857519788918206, "grad_norm": 1.6240050212863657, "learning_rate": 0.00017567409144196951, "loss": 3.7468395233154297, "step": 999, "token_acc": 0.23339079070451846 }, { "epoch": 0.5863383172090296, "grad_norm": 2.2018163113534612, "learning_rate": 0.00017584994138335287, "loss": 3.7238576412200928, "step": 1000, "token_acc": 0.23595961924924083 }, { "epoch": 0.5869246555262386, "grad_norm": 1.6378836151373348, "learning_rate": 0.00017602579132473622, "loss": 3.7456817626953125, "step": 1001, "token_acc": 0.23389593975459477 }, { "epoch": 0.5875109938434476, "grad_norm": 2.28179104386628, "learning_rate": 0.00017620164126611957, "loss": 3.812601089477539, "step": 1002, "token_acc": 0.22701729235274462 }, { "epoch": 0.5880973321606567, "grad_norm": 1.8069343466352854, "learning_rate": 0.00017637749120750292, "loss": 3.777170181274414, "step": 1003, "token_acc": 0.22912079498592827 }, { "epoch": 0.5886836704778657, "grad_norm": 1.8631877657548856, "learning_rate": 0.0001765533411488863, "loss": 3.7404391765594482, "step": 1004, "token_acc": 0.23333145963655635 }, { "epoch": 0.5892700087950747, "grad_norm": 1.7310119191640232, "learning_rate": 0.0001767291910902696, "loss": 3.7471632957458496, "step": 1005, "token_acc": 0.2318820998103318 }, { "epoch": 0.5898563471122837, "grad_norm": 1.6105529593792534, "learning_rate": 0.00017690504103165297, "loss": 3.741431713104248, "step": 1006, "token_acc": 0.23254195223558424 }, { "epoch": 0.5904426854294929, "grad_norm": 1.7639133298148444, "learning_rate": 0.00017708089097303632, "loss": 3.74269962310791, "step": 1007, "token_acc": 0.23368833585421506 }, { "epoch": 0.5910290237467019, "grad_norm": 2.024488540150968, "learning_rate": 0.00017725674091441967, "loss": 3.6688318252563477, "step": 1008, "token_acc": 0.241715951512101 }, { "epoch": 0.5916153620639109, "grad_norm": 2.0853110137348727, "learning_rate": 0.00017743259085580302, "loss": 3.6972496509552, "step": 1009, "token_acc": 0.23908795827385176 }, { "epoch": 0.5922017003811199, "grad_norm": 1.9697296507358653, "learning_rate": 0.00017760844079718637, "loss": 3.709904432296753, "step": 1010, "token_acc": 0.2352049776967273 }, { "epoch": 0.592788038698329, "grad_norm": 1.5305082980186744, "learning_rate": 0.00017778429073856973, "loss": 3.7503957748413086, "step": 1011, "token_acc": 0.2309733824521351 }, { "epoch": 0.593374377015538, "grad_norm": 1.7420866171252485, "learning_rate": 0.00017796014067995308, "loss": 3.666067600250244, "step": 1012, "token_acc": 0.2417316030365603 }, { "epoch": 0.593960715332747, "grad_norm": 1.754435111239744, "learning_rate": 0.00017813599062133645, "loss": 3.6906795501708984, "step": 1013, "token_acc": 0.23827355143870713 }, { "epoch": 0.594547053649956, "grad_norm": 1.5697168034186035, "learning_rate": 0.0001783118405627198, "loss": 3.729680061340332, "step": 1014, "token_acc": 0.2347010557082746 }, { "epoch": 0.5951333919671651, "grad_norm": 2.116488133678572, "learning_rate": 0.00017848769050410316, "loss": 3.6820173263549805, "step": 1015, "token_acc": 0.2389939750963643 }, { "epoch": 0.5957197302843741, "grad_norm": 1.8672920267123638, "learning_rate": 0.0001786635404454865, "loss": 3.724274158477783, "step": 1016, "token_acc": 0.2332079088786238 }, { "epoch": 0.5963060686015831, "grad_norm": 1.657482172222701, "learning_rate": 0.00017883939038686986, "loss": 3.7396392822265625, "step": 1017, "token_acc": 0.23240976745649644 }, { "epoch": 0.5968924069187922, "grad_norm": 2.088800459388359, "learning_rate": 0.0001790152403282532, "loss": 3.7212891578674316, "step": 1018, "token_acc": 0.23728237360001714 }, { "epoch": 0.5974787452360012, "grad_norm": 1.6670460566976946, "learning_rate": 0.00017919109026963656, "loss": 3.6965465545654297, "step": 1019, "token_acc": 0.23797888513069232 }, { "epoch": 0.5980650835532102, "grad_norm": 2.0990693646240994, "learning_rate": 0.00017936694021101994, "loss": 3.704850912094116, "step": 1020, "token_acc": 0.2369784629430917 }, { "epoch": 0.5986514218704192, "grad_norm": 1.555766524748678, "learning_rate": 0.0001795427901524033, "loss": 3.713559865951538, "step": 1021, "token_acc": 0.23499892092597552 }, { "epoch": 0.5992377601876283, "grad_norm": 1.9341881331459105, "learning_rate": 0.0001797186400937866, "loss": 3.6927831172943115, "step": 1022, "token_acc": 0.2388497459018265 }, { "epoch": 0.5998240985048373, "grad_norm": 1.5790932430517346, "learning_rate": 0.00017989449003516996, "loss": 3.6795473098754883, "step": 1023, "token_acc": 0.24034447449260904 }, { "epoch": 0.6004104368220463, "grad_norm": 2.1328626002951987, "learning_rate": 0.00018007033997655331, "loss": 3.679569721221924, "step": 1024, "token_acc": 0.2380116269532051 }, { "epoch": 0.6009967751392553, "grad_norm": 1.9213149808541714, "learning_rate": 0.00018024618991793666, "loss": 3.6670780181884766, "step": 1025, "token_acc": 0.23923851256260337 }, { "epoch": 0.6015831134564644, "grad_norm": 1.7268151431963263, "learning_rate": 0.00018042203985932002, "loss": 3.6794185638427734, "step": 1026, "token_acc": 0.2390296501440673 }, { "epoch": 0.6021694517736734, "grad_norm": 1.7862344592153199, "learning_rate": 0.00018059788980070337, "loss": 3.689814567565918, "step": 1027, "token_acc": 0.23739270313746164 }, { "epoch": 0.6027557900908824, "grad_norm": 1.408158527334921, "learning_rate": 0.00018077373974208674, "loss": 3.6734933853149414, "step": 1028, "token_acc": 0.2403965814432246 }, { "epoch": 0.6033421284080914, "grad_norm": 1.6313996795919927, "learning_rate": 0.0001809495896834701, "loss": 3.7151341438293457, "step": 1029, "token_acc": 0.23451085368897953 }, { "epoch": 0.6039284667253005, "grad_norm": 2.028145277378289, "learning_rate": 0.00018112543962485345, "loss": 3.6773934364318848, "step": 1030, "token_acc": 0.23917269304132632 }, { "epoch": 0.6045148050425095, "grad_norm": 1.7517553346273191, "learning_rate": 0.0001813012895662368, "loss": 3.7363381385803223, "step": 1031, "token_acc": 0.23308443964873776 }, { "epoch": 0.6051011433597185, "grad_norm": 2.3260384753709644, "learning_rate": 0.00018147713950762015, "loss": 3.682816982269287, "step": 1032, "token_acc": 0.23894650039905288 }, { "epoch": 0.6056874816769275, "grad_norm": 1.590409744856486, "learning_rate": 0.0001816529894490035, "loss": 3.698338508605957, "step": 1033, "token_acc": 0.23675260240352064 }, { "epoch": 0.6062738199941367, "grad_norm": 1.9647215600580257, "learning_rate": 0.00018182883939038685, "loss": 3.7348413467407227, "step": 1034, "token_acc": 0.23381419503750722 }, { "epoch": 0.6068601583113457, "grad_norm": 1.6307737572790548, "learning_rate": 0.00018200468933177023, "loss": 3.6462979316711426, "step": 1035, "token_acc": 0.24204562518929898 }, { "epoch": 0.6074464966285547, "grad_norm": 2.2673121932280202, "learning_rate": 0.00018218053927315358, "loss": 3.691218376159668, "step": 1036, "token_acc": 0.23765262246002258 }, { "epoch": 0.6080328349457637, "grad_norm": 1.3696256691750233, "learning_rate": 0.00018235638921453693, "loss": 3.693535089492798, "step": 1037, "token_acc": 0.23815732174201115 }, { "epoch": 0.6086191732629728, "grad_norm": 1.692119290208484, "learning_rate": 0.00018253223915592028, "loss": 3.665832757949829, "step": 1038, "token_acc": 0.2424768155079654 }, { "epoch": 0.6092055115801818, "grad_norm": 1.6373115228902064, "learning_rate": 0.00018270808909730363, "loss": 3.656285285949707, "step": 1039, "token_acc": 0.2417843345761936 }, { "epoch": 0.6097918498973908, "grad_norm": 1.5045690498071511, "learning_rate": 0.00018288393903868696, "loss": 3.6519041061401367, "step": 1040, "token_acc": 0.24315208174568248 }, { "epoch": 0.6103781882145998, "grad_norm": 1.472894308715283, "learning_rate": 0.0001830597889800703, "loss": 3.6748061180114746, "step": 1041, "token_acc": 0.24159138061626617 }, { "epoch": 0.6109645265318089, "grad_norm": 1.917801013938851, "learning_rate": 0.00018323563892145366, "loss": 3.6534295082092285, "step": 1042, "token_acc": 0.24099850572124537 }, { "epoch": 0.6115508648490179, "grad_norm": 1.2835467482420932, "learning_rate": 0.000183411488862837, "loss": 3.679368495941162, "step": 1043, "token_acc": 0.23895935009895655 }, { "epoch": 0.6121372031662269, "grad_norm": 2.029075993248839, "learning_rate": 0.00018358733880422039, "loss": 3.6091785430908203, "step": 1044, "token_acc": 0.2475746132420901 }, { "epoch": 0.612723541483436, "grad_norm": 1.9608272585512443, "learning_rate": 0.00018376318874560374, "loss": 3.6709554195404053, "step": 1045, "token_acc": 0.24014582389446734 }, { "epoch": 0.613309879800645, "grad_norm": 1.5798332645599171, "learning_rate": 0.0001839390386869871, "loss": 3.655616283416748, "step": 1046, "token_acc": 0.241416062746174 }, { "epoch": 0.613896218117854, "grad_norm": 2.2355287749237194, "learning_rate": 0.00018411488862837044, "loss": 3.693601131439209, "step": 1047, "token_acc": 0.23598704514745084 }, { "epoch": 0.614482556435063, "grad_norm": 1.591001188998099, "learning_rate": 0.0001842907385697538, "loss": 3.6287569999694824, "step": 1048, "token_acc": 0.24424157776493666 }, { "epoch": 0.6150688947522721, "grad_norm": 1.7694997501888765, "learning_rate": 0.00018446658851113714, "loss": 3.659522533416748, "step": 1049, "token_acc": 0.2394644289504535 }, { "epoch": 0.6156552330694811, "grad_norm": 1.9080366732285265, "learning_rate": 0.0001846424384525205, "loss": 3.722494125366211, "step": 1050, "token_acc": 0.23213453163904016 }, { "epoch": 0.6162415713866901, "grad_norm": 1.9899739540283183, "learning_rate": 0.00018481828839390387, "loss": 3.6280910968780518, "step": 1051, "token_acc": 0.24328213095055012 }, { "epoch": 0.6168279097038991, "grad_norm": 1.447630401827886, "learning_rate": 0.00018499413833528722, "loss": 3.714254856109619, "step": 1052, "token_acc": 0.23404759915592482 }, { "epoch": 0.6174142480211082, "grad_norm": 1.8101851488016054, "learning_rate": 0.00018516998827667057, "loss": 3.641073226928711, "step": 1053, "token_acc": 0.24086692705595122 }, { "epoch": 0.6180005863383172, "grad_norm": 1.4578780084340344, "learning_rate": 0.00018534583821805392, "loss": 3.684634208679199, "step": 1054, "token_acc": 0.2376295647930862 }, { "epoch": 0.6185869246555262, "grad_norm": 1.4171255394024795, "learning_rate": 0.00018552168815943727, "loss": 3.6276702880859375, "step": 1055, "token_acc": 0.2427771930878332 }, { "epoch": 0.6191732629727352, "grad_norm": 1.4895494083038259, "learning_rate": 0.00018569753810082062, "loss": 3.659527540206909, "step": 1056, "token_acc": 0.24059873154759448 }, { "epoch": 0.6197596012899443, "grad_norm": 1.8089027556266084, "learning_rate": 0.00018587338804220397, "loss": 3.662781000137329, "step": 1057, "token_acc": 0.24168235542258087 }, { "epoch": 0.6203459396071533, "grad_norm": 1.351720852943308, "learning_rate": 0.0001860492379835873, "loss": 3.641920566558838, "step": 1058, "token_acc": 0.2428657342359603 }, { "epoch": 0.6209322779243623, "grad_norm": 1.8125143419574568, "learning_rate": 0.00018622508792497065, "loss": 3.6598668098449707, "step": 1059, "token_acc": 0.24041304608178418 }, { "epoch": 0.6215186162415713, "grad_norm": 1.3673553699320966, "learning_rate": 0.00018640093786635403, "loss": 3.65216064453125, "step": 1060, "token_acc": 0.23978535367012135 }, { "epoch": 0.6221049545587805, "grad_norm": 2.2542466699915336, "learning_rate": 0.00018657678780773738, "loss": 3.6763997077941895, "step": 1061, "token_acc": 0.2391366067771996 }, { "epoch": 0.6226912928759895, "grad_norm": 1.3538918594347018, "learning_rate": 0.00018675263774912073, "loss": 3.6599977016448975, "step": 1062, "token_acc": 0.23938073813094643 }, { "epoch": 0.6232776311931985, "grad_norm": 1.4829739761686875, "learning_rate": 0.00018692848769050408, "loss": 3.636746644973755, "step": 1063, "token_acc": 0.24287322295444516 }, { "epoch": 0.6238639695104075, "grad_norm": 1.748101916192368, "learning_rate": 0.00018710433763188743, "loss": 3.645778179168701, "step": 1064, "token_acc": 0.24184308710219526 }, { "epoch": 0.6244503078276166, "grad_norm": 1.7216664092839264, "learning_rate": 0.00018728018757327078, "loss": 3.697787284851074, "step": 1065, "token_acc": 0.2372536064310098 }, { "epoch": 0.6250366461448256, "grad_norm": 2.0037505338687174, "learning_rate": 0.00018745603751465413, "loss": 3.711813449859619, "step": 1066, "token_acc": 0.23334084492106205 }, { "epoch": 0.6256229844620346, "grad_norm": 1.5125001231386015, "learning_rate": 0.0001876318874560375, "loss": 3.677464485168457, "step": 1067, "token_acc": 0.23677737437968593 }, { "epoch": 0.6262093227792436, "grad_norm": 1.7578623567318084, "learning_rate": 0.00018780773739742086, "loss": 3.688357353210449, "step": 1068, "token_acc": 0.2363699147964349 }, { "epoch": 0.6267956610964527, "grad_norm": 2.0244761456840568, "learning_rate": 0.0001879835873388042, "loss": 3.589259624481201, "step": 1069, "token_acc": 0.24947612333847974 }, { "epoch": 0.6273819994136617, "grad_norm": 1.555361145103413, "learning_rate": 0.00018815943728018756, "loss": 3.671065330505371, "step": 1070, "token_acc": 0.23687182823682498 }, { "epoch": 0.6279683377308707, "grad_norm": 1.951366711556471, "learning_rate": 0.00018833528722157091, "loss": 3.684986114501953, "step": 1071, "token_acc": 0.23795057469478242 }, { "epoch": 0.6285546760480798, "grad_norm": 1.3478851758346075, "learning_rate": 0.00018851113716295426, "loss": 3.6856579780578613, "step": 1072, "token_acc": 0.2373701700030501 }, { "epoch": 0.6291410143652888, "grad_norm": 1.6806400160384543, "learning_rate": 0.00018868698710433762, "loss": 3.6672234535217285, "step": 1073, "token_acc": 0.2386756230416714 }, { "epoch": 0.6297273526824978, "grad_norm": 1.4137867333499683, "learning_rate": 0.000188862837045721, "loss": 3.689209461212158, "step": 1074, "token_acc": 0.23769485812679747 }, { "epoch": 0.6303136909997068, "grad_norm": 1.6981345647704025, "learning_rate": 0.00018903868698710434, "loss": 3.621795654296875, "step": 1075, "token_acc": 0.24479109793827836 }, { "epoch": 0.6309000293169159, "grad_norm": 1.9866745076502357, "learning_rate": 0.00018921453692848767, "loss": 3.642712116241455, "step": 1076, "token_acc": 0.24154126055880443 }, { "epoch": 0.6314863676341249, "grad_norm": 1.976887452629786, "learning_rate": 0.00018939038686987102, "loss": 3.6317319869995117, "step": 1077, "token_acc": 0.24141695793897028 }, { "epoch": 0.6320727059513339, "grad_norm": 1.7329902202194336, "learning_rate": 0.00018956623681125437, "loss": 3.6605727672576904, "step": 1078, "token_acc": 0.24026168531492628 }, { "epoch": 0.6326590442685429, "grad_norm": 1.4726478368834104, "learning_rate": 0.00018974208675263772, "loss": 3.6747255325317383, "step": 1079, "token_acc": 0.23477749759247646 }, { "epoch": 0.633245382585752, "grad_norm": 1.7585814971059355, "learning_rate": 0.00018991793669402107, "loss": 3.6440324783325195, "step": 1080, "token_acc": 0.24026081611929131 }, { "epoch": 0.633831720902961, "grad_norm": 1.7674480720415773, "learning_rate": 0.00019009378663540442, "loss": 3.6040310859680176, "step": 1081, "token_acc": 0.2475406848066658 }, { "epoch": 0.63441805922017, "grad_norm": 1.5517546055726985, "learning_rate": 0.0001902696365767878, "loss": 3.6312811374664307, "step": 1082, "token_acc": 0.24221097517235882 }, { "epoch": 0.635004397537379, "grad_norm": 1.7430612793323368, "learning_rate": 0.00019044548651817115, "loss": 3.7135157585144043, "step": 1083, "token_acc": 0.23036504350489043 }, { "epoch": 0.6355907358545881, "grad_norm": 2.040381816418502, "learning_rate": 0.0001906213364595545, "loss": 3.657586097717285, "step": 1084, "token_acc": 0.24113301401118153 }, { "epoch": 0.6361770741717971, "grad_norm": 1.556818247580028, "learning_rate": 0.00019079718640093785, "loss": 3.674015760421753, "step": 1085, "token_acc": 0.2372200040603641 }, { "epoch": 0.6367634124890061, "grad_norm": 2.269620149573287, "learning_rate": 0.0001909730363423212, "loss": 3.6781411170959473, "step": 1086, "token_acc": 0.23656198212669913 }, { "epoch": 0.6373497508062151, "grad_norm": 1.7357260290331344, "learning_rate": 0.00019114888628370456, "loss": 3.654287338256836, "step": 1087, "token_acc": 0.2386494909949324 }, { "epoch": 0.6379360891234243, "grad_norm": 1.9773427735767597, "learning_rate": 0.0001913247362250879, "loss": 3.639333963394165, "step": 1088, "token_acc": 0.24030843219460793 }, { "epoch": 0.6385224274406333, "grad_norm": 1.5183334730919866, "learning_rate": 0.00019150058616647126, "loss": 3.646763324737549, "step": 1089, "token_acc": 0.24163768412438624 }, { "epoch": 0.6391087657578423, "grad_norm": 1.6136655963608397, "learning_rate": 0.00019167643610785463, "loss": 3.6259384155273438, "step": 1090, "token_acc": 0.24393340668031388 }, { "epoch": 0.6396951040750513, "grad_norm": 1.7046718930028641, "learning_rate": 0.00019185228604923799, "loss": 3.6054673194885254, "step": 1091, "token_acc": 0.24627198951426857 }, { "epoch": 0.6402814423922604, "grad_norm": 1.3540144432232415, "learning_rate": 0.00019202813599062134, "loss": 3.6118674278259277, "step": 1092, "token_acc": 0.24329294288664136 }, { "epoch": 0.6408677807094694, "grad_norm": 1.4413944342971239, "learning_rate": 0.00019220398593200466, "loss": 3.6929872035980225, "step": 1093, "token_acc": 0.2353969362149911 }, { "epoch": 0.6414541190266784, "grad_norm": 1.7950871278456584, "learning_rate": 0.000192379835873388, "loss": 3.623234748840332, "step": 1094, "token_acc": 0.24344740572953516 }, { "epoch": 0.6420404573438874, "grad_norm": 1.6697604715179724, "learning_rate": 0.00019255568581477136, "loss": 3.5792489051818848, "step": 1095, "token_acc": 0.2470687203421172 }, { "epoch": 0.6426267956610965, "grad_norm": 2.039536708982461, "learning_rate": 0.0001927315357561547, "loss": 3.612475872039795, "step": 1096, "token_acc": 0.24463368341136277 }, { "epoch": 0.6432131339783055, "grad_norm": 1.6127142992973633, "learning_rate": 0.00019290738569753806, "loss": 3.606299638748169, "step": 1097, "token_acc": 0.24438323038241083 }, { "epoch": 0.6437994722955145, "grad_norm": 1.7625034583598775, "learning_rate": 0.00019308323563892144, "loss": 3.6384198665618896, "step": 1098, "token_acc": 0.24053047297002822 }, { "epoch": 0.6443858106127235, "grad_norm": 1.2513562102792075, "learning_rate": 0.0001932590855803048, "loss": 3.6397864818573, "step": 1099, "token_acc": 0.24160905220037274 }, { "epoch": 0.6449721489299326, "grad_norm": 1.6512767352844167, "learning_rate": 0.00019343493552168814, "loss": 3.67323637008667, "step": 1100, "token_acc": 0.2357519688049545 }, { "epoch": 0.6455584872471416, "grad_norm": 1.8225156712466453, "learning_rate": 0.0001936107854630715, "loss": 3.6741366386413574, "step": 1101, "token_acc": 0.2391971488724046 }, { "epoch": 0.6461448255643506, "grad_norm": 1.6907654254018762, "learning_rate": 0.00019378663540445485, "loss": 3.6123390197753906, "step": 1102, "token_acc": 0.24138375590889558 }, { "epoch": 0.6467311638815597, "grad_norm": 1.7215367096404044, "learning_rate": 0.0001939624853458382, "loss": 3.6283833980560303, "step": 1103, "token_acc": 0.24226162319691608 }, { "epoch": 0.6473175021987687, "grad_norm": 1.3985995856220952, "learning_rate": 0.00019413833528722155, "loss": 3.6100409030914307, "step": 1104, "token_acc": 0.24666047537661515 }, { "epoch": 0.6479038405159777, "grad_norm": 2.0689539464013955, "learning_rate": 0.00019431418522860493, "loss": 3.6854305267333984, "step": 1105, "token_acc": 0.23523078526030633 }, { "epoch": 0.6484901788331867, "grad_norm": 1.2157216193055542, "learning_rate": 0.00019449003516998828, "loss": 3.612168312072754, "step": 1106, "token_acc": 0.24397058589628717 }, { "epoch": 0.6490765171503958, "grad_norm": 2.011225265689044, "learning_rate": 0.00019466588511137163, "loss": 3.645557165145874, "step": 1107, "token_acc": 0.24132552895656384 }, { "epoch": 0.6496628554676048, "grad_norm": 1.4751809413167396, "learning_rate": 0.00019484173505275498, "loss": 3.6708264350891113, "step": 1108, "token_acc": 0.23719865422073816 }, { "epoch": 0.6502491937848138, "grad_norm": 1.8382469128246788, "learning_rate": 0.00019501758499413833, "loss": 3.5735676288604736, "step": 1109, "token_acc": 0.2489281631097561 }, { "epoch": 0.6508355321020228, "grad_norm": 1.3940802207683847, "learning_rate": 0.00019519343493552168, "loss": 3.6366333961486816, "step": 1110, "token_acc": 0.24249216659694872 }, { "epoch": 0.6514218704192319, "grad_norm": 1.657597656876021, "learning_rate": 0.000195369284876905, "loss": 3.5903561115264893, "step": 1111, "token_acc": 0.244454957618766 }, { "epoch": 0.6520082087364409, "grad_norm": 1.6175099314586001, "learning_rate": 0.00019554513481828835, "loss": 3.6336400508880615, "step": 1112, "token_acc": 0.24179001828782218 }, { "epoch": 0.6525945470536499, "grad_norm": 1.470480683499547, "learning_rate": 0.0001957209847596717, "loss": 3.5976552963256836, "step": 1113, "token_acc": 0.24569830395401945 }, { "epoch": 0.6531808853708589, "grad_norm": 2.1813622949923626, "learning_rate": 0.00019589683470105508, "loss": 3.6614279747009277, "step": 1114, "token_acc": 0.24019228690003908 }, { "epoch": 0.653767223688068, "grad_norm": 1.5860689099835066, "learning_rate": 0.00019607268464243843, "loss": 3.63978910446167, "step": 1115, "token_acc": 0.24227566265819628 }, { "epoch": 0.6543535620052771, "grad_norm": 1.6480716077173239, "learning_rate": 0.00019624853458382179, "loss": 3.6350765228271484, "step": 1116, "token_acc": 0.23837553165774356 }, { "epoch": 0.6549399003224861, "grad_norm": 1.4176358370845636, "learning_rate": 0.00019642438452520514, "loss": 3.5681934356689453, "step": 1117, "token_acc": 0.24877584755013568 }, { "epoch": 0.6555262386396951, "grad_norm": 1.516793444920232, "learning_rate": 0.0001966002344665885, "loss": 3.673952102661133, "step": 1118, "token_acc": 0.2350283096839975 }, { "epoch": 0.6561125769569042, "grad_norm": 1.337234260253422, "learning_rate": 0.00019677608440797184, "loss": 3.6203818321228027, "step": 1119, "token_acc": 0.2439845069435977 }, { "epoch": 0.6566989152741132, "grad_norm": 1.2978707681197799, "learning_rate": 0.0001969519343493552, "loss": 3.5939743518829346, "step": 1120, "token_acc": 0.24607298556314597 }, { "epoch": 0.6572852535913222, "grad_norm": 1.6691223741547296, "learning_rate": 0.00019712778429073857, "loss": 3.6283607482910156, "step": 1121, "token_acc": 0.24155875527778564 }, { "epoch": 0.6578715919085312, "grad_norm": 1.5328612206431984, "learning_rate": 0.00019730363423212192, "loss": 3.661714553833008, "step": 1122, "token_acc": 0.23627960263029865 }, { "epoch": 0.6584579302257403, "grad_norm": 1.6956253356889974, "learning_rate": 0.00019747948417350527, "loss": 3.649235725402832, "step": 1123, "token_acc": 0.23973618822016637 }, { "epoch": 0.6590442685429493, "grad_norm": 1.385611834215062, "learning_rate": 0.00019765533411488862, "loss": 3.6056106090545654, "step": 1124, "token_acc": 0.24425279978638953 }, { "epoch": 0.6596306068601583, "grad_norm": 1.6153349029702886, "learning_rate": 0.00019783118405627197, "loss": 3.5426769256591797, "step": 1125, "token_acc": 0.2524058340488067 }, { "epoch": 0.6602169451773673, "grad_norm": 1.60034514549438, "learning_rate": 0.00019800703399765532, "loss": 3.610332489013672, "step": 1126, "token_acc": 0.24038042033142212 }, { "epoch": 0.6608032834945764, "grad_norm": 1.450280750845652, "learning_rate": 0.00019818288393903867, "loss": 3.6011390686035156, "step": 1127, "token_acc": 0.24470297132638846 }, { "epoch": 0.6613896218117854, "grad_norm": 1.5520459264059963, "learning_rate": 0.00019835873388042205, "loss": 3.645643472671509, "step": 1128, "token_acc": 0.24071357710635938 }, { "epoch": 0.6619759601289944, "grad_norm": 1.4093507282595426, "learning_rate": 0.00019853458382180535, "loss": 3.5918097496032715, "step": 1129, "token_acc": 0.24674497689983607 }, { "epoch": 0.6625622984462035, "grad_norm": 1.2870749759372013, "learning_rate": 0.00019871043376318872, "loss": 3.579008102416992, "step": 1130, "token_acc": 0.2460683717638452 }, { "epoch": 0.6631486367634125, "grad_norm": 1.5116505122615504, "learning_rate": 0.00019888628370457208, "loss": 3.627666473388672, "step": 1131, "token_acc": 0.2396118058513041 }, { "epoch": 0.6637349750806215, "grad_norm": 1.644916560866073, "learning_rate": 0.00019906213364595543, "loss": 3.6390559673309326, "step": 1132, "token_acc": 0.2402390395769362 }, { "epoch": 0.6643213133978305, "grad_norm": 1.7802925249319201, "learning_rate": 0.00019923798358733878, "loss": 3.6603431701660156, "step": 1133, "token_acc": 0.23859199223284577 }, { "epoch": 0.6649076517150396, "grad_norm": 1.8720354744807326, "learning_rate": 0.00019941383352872213, "loss": 3.652681827545166, "step": 1134, "token_acc": 0.23852036998233903 }, { "epoch": 0.6654939900322486, "grad_norm": 1.2546442608542687, "learning_rate": 0.00019958968347010548, "loss": 3.5934860706329346, "step": 1135, "token_acc": 0.24600937747587442 }, { "epoch": 0.6660803283494576, "grad_norm": 1.7291911612479556, "learning_rate": 0.00019976553341148883, "loss": 3.571781873703003, "step": 1136, "token_acc": 0.24974058375564373 }, { "epoch": 0.6666666666666666, "grad_norm": 1.356670588130926, "learning_rate": 0.0001999413833528722, "loss": 3.6166539192199707, "step": 1137, "token_acc": 0.2420671408410662 }, { "epoch": 0.6672530049838757, "grad_norm": 1.7900598056883277, "learning_rate": 0.00020011723329425556, "loss": 3.593308448791504, "step": 1138, "token_acc": 0.2445009106939989 }, { "epoch": 0.6678393433010847, "grad_norm": 1.3943622144762045, "learning_rate": 0.0002002930832356389, "loss": 3.6098599433898926, "step": 1139, "token_acc": 0.24256794400247142 }, { "epoch": 0.6684256816182937, "grad_norm": 1.7776906887504345, "learning_rate": 0.00020046893317702226, "loss": 3.6094508171081543, "step": 1140, "token_acc": 0.24432004608583488 }, { "epoch": 0.6690120199355027, "grad_norm": 1.5104074155476455, "learning_rate": 0.0002006447831184056, "loss": 3.618520736694336, "step": 1141, "token_acc": 0.24201057955666866 }, { "epoch": 0.6695983582527119, "grad_norm": 1.4851445853862546, "learning_rate": 0.00020082063305978896, "loss": 3.5834691524505615, "step": 1142, "token_acc": 0.24682063231896365 }, { "epoch": 0.6701846965699209, "grad_norm": 1.5474687686537965, "learning_rate": 0.0002009964830011723, "loss": 3.592369318008423, "step": 1143, "token_acc": 0.24598476895568652 }, { "epoch": 0.6707710348871299, "grad_norm": 1.5860675010366894, "learning_rate": 0.0002011723329425557, "loss": 3.6278696060180664, "step": 1144, "token_acc": 0.24052580886428082 }, { "epoch": 0.6713573732043389, "grad_norm": 1.6020826511503836, "learning_rate": 0.00020134818288393904, "loss": 3.6181278228759766, "step": 1145, "token_acc": 0.24164455708232685 }, { "epoch": 0.671943711521548, "grad_norm": 1.4014698945420845, "learning_rate": 0.0002015240328253224, "loss": 3.558528423309326, "step": 1146, "token_acc": 0.2504937928743287 }, { "epoch": 0.672530049838757, "grad_norm": 1.4833780015077336, "learning_rate": 0.00020169988276670572, "loss": 3.5694637298583984, "step": 1147, "token_acc": 0.24811560383281017 }, { "epoch": 0.673116388155966, "grad_norm": 1.5150151091636253, "learning_rate": 0.00020187573270808907, "loss": 3.621929168701172, "step": 1148, "token_acc": 0.24057901662878334 }, { "epoch": 0.673702726473175, "grad_norm": 1.413507895436423, "learning_rate": 0.00020205158264947242, "loss": 3.6022233963012695, "step": 1149, "token_acc": 0.242924326177315 }, { "epoch": 0.6742890647903841, "grad_norm": 1.5325210819111854, "learning_rate": 0.00020222743259085577, "loss": 3.613931894302368, "step": 1150, "token_acc": 0.24302687267349957 }, { "epoch": 0.6748754031075931, "grad_norm": 1.4181193199346183, "learning_rate": 0.00020240328253223912, "loss": 3.588520050048828, "step": 1151, "token_acc": 0.24631008162526843 }, { "epoch": 0.6754617414248021, "grad_norm": 1.8156740867821797, "learning_rate": 0.0002025791324736225, "loss": 3.585069179534912, "step": 1152, "token_acc": 0.24621286904183853 }, { "epoch": 0.6760480797420111, "grad_norm": 1.519099142119853, "learning_rate": 0.00020275498241500585, "loss": 3.588890552520752, "step": 1153, "token_acc": 0.24348914506900032 }, { "epoch": 0.6766344180592202, "grad_norm": 2.284298859116246, "learning_rate": 0.0002029308323563892, "loss": 3.6400070190429688, "step": 1154, "token_acc": 0.2388069264901405 }, { "epoch": 0.6772207563764292, "grad_norm": 1.412118723214527, "learning_rate": 0.00020310668229777255, "loss": 3.5892252922058105, "step": 1155, "token_acc": 0.2460910084248711 }, { "epoch": 0.6778070946936382, "grad_norm": 1.4038632447176342, "learning_rate": 0.0002032825322391559, "loss": 3.617854356765747, "step": 1156, "token_acc": 0.24162731584198796 }, { "epoch": 0.6783934330108473, "grad_norm": 1.4735367884794843, "learning_rate": 0.00020345838218053925, "loss": 3.5383706092834473, "step": 1157, "token_acc": 0.25039964451958663 }, { "epoch": 0.6789797713280563, "grad_norm": 1.3508446984252374, "learning_rate": 0.0002036342321219226, "loss": 3.6222965717315674, "step": 1158, "token_acc": 0.24018951750390588 }, { "epoch": 0.6795661096452653, "grad_norm": 1.6147453261994797, "learning_rate": 0.00020381008206330598, "loss": 3.589507579803467, "step": 1159, "token_acc": 0.2461748835834134 }, { "epoch": 0.6801524479624743, "grad_norm": 1.6299601848386351, "learning_rate": 0.00020398593200468933, "loss": 3.5018553733825684, "step": 1160, "token_acc": 0.25406549485889246 }, { "epoch": 0.6807387862796834, "grad_norm": 1.6419355929097839, "learning_rate": 0.00020416178194607268, "loss": 3.636781692504883, "step": 1161, "token_acc": 0.23880242611949928 }, { "epoch": 0.6813251245968924, "grad_norm": 1.4434573456199389, "learning_rate": 0.00020433763188745603, "loss": 3.5546317100524902, "step": 1162, "token_acc": 0.2501913386514349 }, { "epoch": 0.6819114629141014, "grad_norm": 1.7254479135607967, "learning_rate": 0.00020451348182883939, "loss": 3.614981174468994, "step": 1163, "token_acc": 0.24098654802627437 }, { "epoch": 0.6824978012313104, "grad_norm": 1.9072933428320171, "learning_rate": 0.0002046893317702227, "loss": 3.5855712890625, "step": 1164, "token_acc": 0.24768261233786498 }, { "epoch": 0.6830841395485195, "grad_norm": 1.2978073301636897, "learning_rate": 0.00020486518171160606, "loss": 3.6052474975585938, "step": 1165, "token_acc": 0.24182108715763506 }, { "epoch": 0.6836704778657285, "grad_norm": 1.5958120732363972, "learning_rate": 0.0002050410316529894, "loss": 3.599301338195801, "step": 1166, "token_acc": 0.24437615147621186 }, { "epoch": 0.6842568161829375, "grad_norm": 1.5180439439214437, "learning_rate": 0.00020521688159437276, "loss": 3.622044324874878, "step": 1167, "token_acc": 0.24223399113347088 }, { "epoch": 0.6848431545001465, "grad_norm": 1.4818090899694143, "learning_rate": 0.00020539273153575614, "loss": 3.617368698120117, "step": 1168, "token_acc": 0.24405604228664182 }, { "epoch": 0.6854294928173557, "grad_norm": 1.347070011833193, "learning_rate": 0.0002055685814771395, "loss": 3.596374273300171, "step": 1169, "token_acc": 0.2420882549587698 }, { "epoch": 0.6860158311345647, "grad_norm": 1.803849208222528, "learning_rate": 0.00020574443141852284, "loss": 3.563307285308838, "step": 1170, "token_acc": 0.24695189651165794 }, { "epoch": 0.6866021694517737, "grad_norm": 1.5364140968501123, "learning_rate": 0.0002059202813599062, "loss": 3.5895957946777344, "step": 1171, "token_acc": 0.2443650122895845 }, { "epoch": 0.6871885077689827, "grad_norm": 1.6945213045144898, "learning_rate": 0.00020609613130128954, "loss": 3.592796564102173, "step": 1172, "token_acc": 0.24319801449991763 }, { "epoch": 0.6877748460861918, "grad_norm": 1.584319083932552, "learning_rate": 0.0002062719812426729, "loss": 3.58577036857605, "step": 1173, "token_acc": 0.24424933687002653 }, { "epoch": 0.6883611844034008, "grad_norm": 1.5081160231626272, "learning_rate": 0.00020644783118405624, "loss": 3.591670036315918, "step": 1174, "token_acc": 0.24489660758771417 }, { "epoch": 0.6889475227206098, "grad_norm": 1.5839956729679818, "learning_rate": 0.00020662368112543962, "loss": 3.609102725982666, "step": 1175, "token_acc": 0.24326734804760783 }, { "epoch": 0.6895338610378188, "grad_norm": 1.6374116490283572, "learning_rate": 0.00020679953106682297, "loss": 3.6066267490386963, "step": 1176, "token_acc": 0.24193709988715104 }, { "epoch": 0.6901201993550279, "grad_norm": 1.5307115289616988, "learning_rate": 0.00020697538100820632, "loss": 3.5727434158325195, "step": 1177, "token_acc": 0.24325560608848812 }, { "epoch": 0.6907065376722369, "grad_norm": 1.9058690211782219, "learning_rate": 0.00020715123094958968, "loss": 3.6209356784820557, "step": 1178, "token_acc": 0.24030822129729526 }, { "epoch": 0.6912928759894459, "grad_norm": 1.0561626946870013, "learning_rate": 0.00020732708089097303, "loss": 3.5838255882263184, "step": 1179, "token_acc": 0.2469547475887084 }, { "epoch": 0.6918792143066549, "grad_norm": 1.401383225761813, "learning_rate": 0.00020750293083235638, "loss": 3.564133644104004, "step": 1180, "token_acc": 0.2470333343493147 }, { "epoch": 0.692465552623864, "grad_norm": 1.5602268152648973, "learning_rate": 0.00020767878077373973, "loss": 3.618037223815918, "step": 1181, "token_acc": 0.24118230654455783 }, { "epoch": 0.693051890941073, "grad_norm": 1.6271596903446885, "learning_rate": 0.00020785463071512305, "loss": 3.5246148109436035, "step": 1182, "token_acc": 0.25002085761454623 }, { "epoch": 0.693638229258282, "grad_norm": 1.3045394781437147, "learning_rate": 0.0002080304806565064, "loss": 3.544750213623047, "step": 1183, "token_acc": 0.2496232633279483 }, { "epoch": 0.6942245675754911, "grad_norm": 1.6538527007391917, "learning_rate": 0.00020820633059788978, "loss": 3.5446419715881348, "step": 1184, "token_acc": 0.2500187660967813 }, { "epoch": 0.6948109058927001, "grad_norm": 1.3739265379760683, "learning_rate": 0.00020838218053927313, "loss": 3.572628974914551, "step": 1185, "token_acc": 0.24697953610956327 }, { "epoch": 0.6953972442099091, "grad_norm": 1.5753787047227474, "learning_rate": 0.00020855803048065648, "loss": 3.561522960662842, "step": 1186, "token_acc": 0.24755836342687462 }, { "epoch": 0.6959835825271181, "grad_norm": 1.2660102525575596, "learning_rate": 0.00020873388042203983, "loss": 3.59983491897583, "step": 1187, "token_acc": 0.24312022202342226 }, { "epoch": 0.6965699208443272, "grad_norm": 1.4852802655773794, "learning_rate": 0.00020890973036342318, "loss": 3.5600967407226562, "step": 1188, "token_acc": 0.24581967234460328 }, { "epoch": 0.6971562591615362, "grad_norm": 1.3948117157455504, "learning_rate": 0.00020908558030480654, "loss": 3.570460319519043, "step": 1189, "token_acc": 0.2457847772348982 }, { "epoch": 0.6977425974787452, "grad_norm": 1.3912107301734813, "learning_rate": 0.00020926143024618989, "loss": 3.6175618171691895, "step": 1190, "token_acc": 0.24230702399251486 }, { "epoch": 0.6983289357959542, "grad_norm": 1.5759277073923086, "learning_rate": 0.00020943728018757326, "loss": 3.5924034118652344, "step": 1191, "token_acc": 0.24593419385763687 }, { "epoch": 0.6989152741131633, "grad_norm": 1.39277861785629, "learning_rate": 0.00020961313012895662, "loss": 3.5540080070495605, "step": 1192, "token_acc": 0.24777259334493126 }, { "epoch": 0.6995016124303723, "grad_norm": 1.385638697451805, "learning_rate": 0.00020978898007033997, "loss": 3.5938775539398193, "step": 1193, "token_acc": 0.2435192618929286 }, { "epoch": 0.7000879507475813, "grad_norm": 1.8584368658135042, "learning_rate": 0.00020996483001172332, "loss": 3.549201488494873, "step": 1194, "token_acc": 0.24996201185230207 }, { "epoch": 0.7006742890647903, "grad_norm": 1.4064310655448726, "learning_rate": 0.00021014067995310667, "loss": 3.604276657104492, "step": 1195, "token_acc": 0.24198638540074036 }, { "epoch": 0.7012606273819995, "grad_norm": 1.7018986780837322, "learning_rate": 0.00021031652989449002, "loss": 3.57584547996521, "step": 1196, "token_acc": 0.2464382163438025 }, { "epoch": 0.7018469656992085, "grad_norm": 1.5146780956860744, "learning_rate": 0.00021049237983587337, "loss": 3.5720572471618652, "step": 1197, "token_acc": 0.2452082644585072 }, { "epoch": 0.7024333040164175, "grad_norm": 1.5270076813463245, "learning_rate": 0.00021066822977725675, "loss": 3.633413791656494, "step": 1198, "token_acc": 0.23917427063087535 }, { "epoch": 0.7030196423336265, "grad_norm": 1.8373426947599365, "learning_rate": 0.0002108440797186401, "loss": 3.605698347091675, "step": 1199, "token_acc": 0.24348149891261342 }, { "epoch": 0.7036059806508356, "grad_norm": 1.4501453703784004, "learning_rate": 0.00021101992966002342, "loss": 3.598822593688965, "step": 1200, "token_acc": 0.24447639640430693 }, { "epoch": 0.7041923189680446, "grad_norm": 1.2155396450137024, "learning_rate": 0.00021119577960140677, "loss": 3.5956811904907227, "step": 1201, "token_acc": 0.2441128386132825 }, { "epoch": 0.7047786572852536, "grad_norm": 1.2907905813875884, "learning_rate": 0.00021137162954279012, "loss": 3.5836386680603027, "step": 1202, "token_acc": 0.24527807333973478 }, { "epoch": 0.7053649956024626, "grad_norm": 1.4178030166108693, "learning_rate": 0.00021154747948417347, "loss": 3.615964651107788, "step": 1203, "token_acc": 0.24103439992186035 }, { "epoch": 0.7059513339196717, "grad_norm": 1.6241951295929389, "learning_rate": 0.00021172332942555683, "loss": 3.535749912261963, "step": 1204, "token_acc": 0.24910567304039508 }, { "epoch": 0.7065376722368807, "grad_norm": 1.3848919579031367, "learning_rate": 0.00021189917936694018, "loss": 3.566925525665283, "step": 1205, "token_acc": 0.24736881976317418 }, { "epoch": 0.7071240105540897, "grad_norm": 1.4890576918850404, "learning_rate": 0.00021207502930832353, "loss": 3.581563949584961, "step": 1206, "token_acc": 0.24462583288565312 }, { "epoch": 0.7077103488712987, "grad_norm": 1.7218651703673185, "learning_rate": 0.0002122508792497069, "loss": 3.6127965450286865, "step": 1207, "token_acc": 0.24181569023322472 }, { "epoch": 0.7082966871885078, "grad_norm": 1.1565119321273731, "learning_rate": 0.00021242672919109026, "loss": 3.5695741176605225, "step": 1208, "token_acc": 0.2475268265348339 }, { "epoch": 0.7088830255057168, "grad_norm": 1.7283381721414417, "learning_rate": 0.0002126025791324736, "loss": 3.5483202934265137, "step": 1209, "token_acc": 0.24790857560055568 }, { "epoch": 0.7094693638229258, "grad_norm": 1.542720787689113, "learning_rate": 0.00021277842907385696, "loss": 3.587327480316162, "step": 1210, "token_acc": 0.24341776459203965 }, { "epoch": 0.7100557021401348, "grad_norm": 1.5495727319473576, "learning_rate": 0.0002129542790152403, "loss": 3.558802366256714, "step": 1211, "token_acc": 0.24773354150403332 }, { "epoch": 0.7106420404573439, "grad_norm": 1.2971421551206894, "learning_rate": 0.00021313012895662366, "loss": 3.569840908050537, "step": 1212, "token_acc": 0.24753208967059892 }, { "epoch": 0.7112283787745529, "grad_norm": 1.719753069320933, "learning_rate": 0.000213305978898007, "loss": 3.549684524536133, "step": 1213, "token_acc": 0.24809236004551197 }, { "epoch": 0.7118147170917619, "grad_norm": 1.3880068430694603, "learning_rate": 0.0002134818288393904, "loss": 3.5489330291748047, "step": 1214, "token_acc": 0.2470236733033285 }, { "epoch": 0.712401055408971, "grad_norm": 1.5015538155788861, "learning_rate": 0.00021365767878077374, "loss": 3.543576717376709, "step": 1215, "token_acc": 0.24917295392648137 }, { "epoch": 0.71298739372618, "grad_norm": 1.5967968755855506, "learning_rate": 0.0002138335287221571, "loss": 3.53983736038208, "step": 1216, "token_acc": 0.2496313938981513 }, { "epoch": 0.713573732043389, "grad_norm": 1.3867352012821892, "learning_rate": 0.00021400937866354044, "loss": 3.5764036178588867, "step": 1217, "token_acc": 0.24566393184290522 }, { "epoch": 0.714160070360598, "grad_norm": 1.738247725706473, "learning_rate": 0.00021418522860492377, "loss": 3.579580307006836, "step": 1218, "token_acc": 0.24417444338892128 }, { "epoch": 0.7147464086778071, "grad_norm": 1.171318680757039, "learning_rate": 0.00021436107854630712, "loss": 3.5330920219421387, "step": 1219, "token_acc": 0.24978086902561053 }, { "epoch": 0.7153327469950161, "grad_norm": 1.72557106420474, "learning_rate": 0.00021453692848769047, "loss": 3.580744743347168, "step": 1220, "token_acc": 0.24503237830745797 }, { "epoch": 0.7159190853122251, "grad_norm": 1.065274071322266, "learning_rate": 0.00021471277842907382, "loss": 3.554166555404663, "step": 1221, "token_acc": 0.24794841439762222 }, { "epoch": 0.7165054236294341, "grad_norm": 1.7524479882390889, "learning_rate": 0.0002148886283704572, "loss": 3.5375688076019287, "step": 1222, "token_acc": 0.25004202960986016 }, { "epoch": 0.7170917619466433, "grad_norm": 1.148305853613259, "learning_rate": 0.00021506447831184055, "loss": 3.5806210041046143, "step": 1223, "token_acc": 0.2460832130269096 }, { "epoch": 0.7176781002638523, "grad_norm": 1.3143303464959835, "learning_rate": 0.0002152403282532239, "loss": 3.5024471282958984, "step": 1224, "token_acc": 0.2549667489229997 }, { "epoch": 0.7182644385810613, "grad_norm": 1.7154443943940636, "learning_rate": 0.00021541617819460725, "loss": 3.604999542236328, "step": 1225, "token_acc": 0.2437934262396667 }, { "epoch": 0.7188507768982703, "grad_norm": 1.3277525488232813, "learning_rate": 0.0002155920281359906, "loss": 3.598742961883545, "step": 1226, "token_acc": 0.24198334135041394 }, { "epoch": 0.7194371152154794, "grad_norm": 1.6416711338903025, "learning_rate": 0.00021576787807737395, "loss": 3.569950580596924, "step": 1227, "token_acc": 0.24519790527360977 }, { "epoch": 0.7200234535326884, "grad_norm": 1.2723851660667214, "learning_rate": 0.0002159437280187573, "loss": 3.6088290214538574, "step": 1228, "token_acc": 0.2403587070732291 }, { "epoch": 0.7206097918498974, "grad_norm": 1.6001371153841353, "learning_rate": 0.00021611957796014068, "loss": 3.5068674087524414, "step": 1229, "token_acc": 0.2534285654463416 }, { "epoch": 0.7211961301671064, "grad_norm": 1.220262358170032, "learning_rate": 0.00021629542790152403, "loss": 3.573232650756836, "step": 1230, "token_acc": 0.2471954032973013 }, { "epoch": 0.7217824684843155, "grad_norm": 1.518819589412541, "learning_rate": 0.00021647127784290738, "loss": 3.580660343170166, "step": 1231, "token_acc": 0.2448288050159823 }, { "epoch": 0.7223688068015245, "grad_norm": 1.1318360269614194, "learning_rate": 0.00021664712778429073, "loss": 3.5644583702087402, "step": 1232, "token_acc": 0.24850441054324857 }, { "epoch": 0.7229551451187335, "grad_norm": 1.395018834103563, "learning_rate": 0.00021682297772567408, "loss": 3.598156213760376, "step": 1233, "token_acc": 0.2420621761658031 }, { "epoch": 0.7235414834359425, "grad_norm": 1.5448312665379487, "learning_rate": 0.00021699882766705743, "loss": 3.524854898452759, "step": 1234, "token_acc": 0.2517297511730025 }, { "epoch": 0.7241278217531516, "grad_norm": 1.3364464261702895, "learning_rate": 0.00021717467760844078, "loss": 3.5365190505981445, "step": 1235, "token_acc": 0.2520886573619029 }, { "epoch": 0.7247141600703606, "grad_norm": 1.5887730415281391, "learning_rate": 0.0002173505275498241, "loss": 3.5950169563293457, "step": 1236, "token_acc": 0.2435176786077198 }, { "epoch": 0.7253004983875696, "grad_norm": 1.3903154856538764, "learning_rate": 0.00021752637749120746, "loss": 3.5647411346435547, "step": 1237, "token_acc": 0.24810043553549876 }, { "epoch": 0.7258868367047786, "grad_norm": 1.775432142168979, "learning_rate": 0.00021770222743259084, "loss": 3.5696730613708496, "step": 1238, "token_acc": 0.24437369492923616 }, { "epoch": 0.7264731750219877, "grad_norm": 1.157195309336081, "learning_rate": 0.0002178780773739742, "loss": 3.5941247940063477, "step": 1239, "token_acc": 0.24282366954790882 }, { "epoch": 0.7270595133391967, "grad_norm": 1.884945186646098, "learning_rate": 0.00021805392731535754, "loss": 3.533627510070801, "step": 1240, "token_acc": 0.2510889445695497 }, { "epoch": 0.7276458516564057, "grad_norm": 1.169212926292052, "learning_rate": 0.0002182297772567409, "loss": 3.5517265796661377, "step": 1241, "token_acc": 0.2504540121819906 }, { "epoch": 0.7282321899736148, "grad_norm": 1.6188861606613274, "learning_rate": 0.00021840562719812424, "loss": 3.5319066047668457, "step": 1242, "token_acc": 0.2509352471782027 }, { "epoch": 0.7288185282908238, "grad_norm": 1.3036897567680428, "learning_rate": 0.0002185814771395076, "loss": 3.621346950531006, "step": 1243, "token_acc": 0.24057557222096146 }, { "epoch": 0.7294048666080328, "grad_norm": 1.3072549451955513, "learning_rate": 0.00021875732708089094, "loss": 3.517343044281006, "step": 1244, "token_acc": 0.25246771634567106 }, { "epoch": 0.7299912049252418, "grad_norm": 1.492085170051368, "learning_rate": 0.00021893317702227432, "loss": 3.6194374561309814, "step": 1245, "token_acc": 0.2387442709596701 }, { "epoch": 0.7305775432424509, "grad_norm": 1.4440577619767259, "learning_rate": 0.00021910902696365767, "loss": 3.5678908824920654, "step": 1246, "token_acc": 0.24672867937343465 }, { "epoch": 0.73116388155966, "grad_norm": 1.4249551650165415, "learning_rate": 0.00021928487690504102, "loss": 3.564143657684326, "step": 1247, "token_acc": 0.24619896642899347 }, { "epoch": 0.731750219876869, "grad_norm": 1.4700480170173016, "learning_rate": 0.00021946072684642437, "loss": 3.542144298553467, "step": 1248, "token_acc": 0.2491643685752169 }, { "epoch": 0.732336558194078, "grad_norm": 1.4625998782950218, "learning_rate": 0.00021963657678780772, "loss": 3.5754032135009766, "step": 1249, "token_acc": 0.24517069183060547 }, { "epoch": 0.7329228965112871, "grad_norm": 1.261337351964851, "learning_rate": 0.00021981242672919107, "loss": 3.5500237941741943, "step": 1250, "token_acc": 0.24797190974705768 }, { "epoch": 0.7335092348284961, "grad_norm": 1.4197345876805896, "learning_rate": 0.00021998827667057443, "loss": 3.545901298522949, "step": 1251, "token_acc": 0.25066345983728244 }, { "epoch": 0.7340955731457051, "grad_norm": 1.1981523835627474, "learning_rate": 0.0002201641266119578, "loss": 3.526506185531616, "step": 1252, "token_acc": 0.25086497567701355 }, { "epoch": 0.7346819114629141, "grad_norm": 1.5868929873386077, "learning_rate": 0.0002203399765533411, "loss": 3.557638168334961, "step": 1253, "token_acc": 0.24662984533986684 }, { "epoch": 0.7352682497801232, "grad_norm": 1.203787460106692, "learning_rate": 0.00022051582649472448, "loss": 3.622738838195801, "step": 1254, "token_acc": 0.23892821607727707 }, { "epoch": 0.7358545880973322, "grad_norm": 1.5349738914300173, "learning_rate": 0.00022069167643610783, "loss": 3.570497512817383, "step": 1255, "token_acc": 0.24549297209207577 }, { "epoch": 0.7364409264145412, "grad_norm": 1.2023519482228637, "learning_rate": 0.00022086752637749118, "loss": 3.5299220085144043, "step": 1256, "token_acc": 0.24937345313117448 }, { "epoch": 0.7370272647317502, "grad_norm": 1.4428375285935824, "learning_rate": 0.00022104337631887453, "loss": 3.625183343887329, "step": 1257, "token_acc": 0.23908045381815168 }, { "epoch": 0.7376136030489593, "grad_norm": 1.492136020848117, "learning_rate": 0.00022121922626025788, "loss": 3.583981513977051, "step": 1258, "token_acc": 0.2430647087898509 }, { "epoch": 0.7381999413661683, "grad_norm": 1.2650077279455343, "learning_rate": 0.00022139507620164123, "loss": 3.5299758911132812, "step": 1259, "token_acc": 0.24919815537854642 }, { "epoch": 0.7387862796833773, "grad_norm": 1.3894114980479488, "learning_rate": 0.00022157092614302458, "loss": 3.5406646728515625, "step": 1260, "token_acc": 0.2473780787615509 }, { "epoch": 0.7393726180005863, "grad_norm": 1.5554310690509106, "learning_rate": 0.00022174677608440796, "loss": 3.5819411277770996, "step": 1261, "token_acc": 0.2432205219966681 }, { "epoch": 0.7399589563177954, "grad_norm": 1.078788867409666, "learning_rate": 0.0002219226260257913, "loss": 3.516218900680542, "step": 1262, "token_acc": 0.25198958201319577 }, { "epoch": 0.7405452946350044, "grad_norm": 1.4517875795545918, "learning_rate": 0.00022209847596717466, "loss": 3.539255142211914, "step": 1263, "token_acc": 0.24697214860528 }, { "epoch": 0.7411316329522134, "grad_norm": 0.9449188512997851, "learning_rate": 0.00022227432590855801, "loss": 3.493272542953491, "step": 1264, "token_acc": 0.25410398605315265 }, { "epoch": 0.7417179712694224, "grad_norm": 1.6967023908024002, "learning_rate": 0.00022245017584994137, "loss": 3.605579376220703, "step": 1265, "token_acc": 0.24067534189118514 }, { "epoch": 0.7423043095866315, "grad_norm": 1.376175745966463, "learning_rate": 0.00022262602579132472, "loss": 3.514482021331787, "step": 1266, "token_acc": 0.25100421732727846 }, { "epoch": 0.7428906479038405, "grad_norm": 1.4282552361013525, "learning_rate": 0.00022280187573270807, "loss": 3.4941563606262207, "step": 1267, "token_acc": 0.25457826957735685 }, { "epoch": 0.7434769862210495, "grad_norm": 1.2853485429608942, "learning_rate": 0.00022297772567409145, "loss": 3.5218427181243896, "step": 1268, "token_acc": 0.2522233605440694 }, { "epoch": 0.7440633245382586, "grad_norm": 1.2983254477011472, "learning_rate": 0.0002231535756154748, "loss": 3.5729188919067383, "step": 1269, "token_acc": 0.24637513486462417 }, { "epoch": 0.7446496628554676, "grad_norm": 1.3241285215527823, "learning_rate": 0.00022332942555685815, "loss": 3.5643608570098877, "step": 1270, "token_acc": 0.24493266380488915 }, { "epoch": 0.7452360011726766, "grad_norm": 1.7079998418405293, "learning_rate": 0.00022350527549824147, "loss": 3.5267105102539062, "step": 1271, "token_acc": 0.2508754537373613 }, { "epoch": 0.7458223394898856, "grad_norm": 1.0785260467019164, "learning_rate": 0.00022368112543962482, "loss": 3.516601085662842, "step": 1272, "token_acc": 0.25197126247112284 }, { "epoch": 0.7464086778070947, "grad_norm": 1.5906117477639026, "learning_rate": 0.00022385697538100817, "loss": 3.5439701080322266, "step": 1273, "token_acc": 0.24772254168470084 }, { "epoch": 0.7469950161243037, "grad_norm": 1.3551127023366434, "learning_rate": 0.00022403282532239152, "loss": 3.4956958293914795, "step": 1274, "token_acc": 0.2531258331291185 }, { "epoch": 0.7475813544415127, "grad_norm": 1.7966361203245316, "learning_rate": 0.00022420867526377487, "loss": 3.5369644165039062, "step": 1275, "token_acc": 0.2475377049095056 }, { "epoch": 0.7481676927587217, "grad_norm": 1.1403500735637413, "learning_rate": 0.00022438452520515825, "loss": 3.5866994857788086, "step": 1276, "token_acc": 0.2411107143982274 }, { "epoch": 0.7487540310759309, "grad_norm": 1.5401020454499297, "learning_rate": 0.0002245603751465416, "loss": 3.5151519775390625, "step": 1277, "token_acc": 0.25217215120593206 }, { "epoch": 0.7493403693931399, "grad_norm": 1.3311392424656527, "learning_rate": 0.00022473622508792495, "loss": 3.5742123126983643, "step": 1278, "token_acc": 0.2442783841016452 }, { "epoch": 0.7499267077103489, "grad_norm": 1.159911663442692, "learning_rate": 0.0002249120750293083, "loss": 3.5540332794189453, "step": 1279, "token_acc": 0.24720883844592179 }, { "epoch": 0.7505130460275579, "grad_norm": 1.3012220176281875, "learning_rate": 0.00022508792497069166, "loss": 3.5348312854766846, "step": 1280, "token_acc": 0.24802389364569077 }, { "epoch": 0.751099384344767, "grad_norm": 1.3022062175616274, "learning_rate": 0.000225263774912075, "loss": 3.5225284099578857, "step": 1281, "token_acc": 0.2503072616380958 }, { "epoch": 0.751685722661976, "grad_norm": 1.505998046236882, "learning_rate": 0.00022543962485345836, "loss": 3.4947967529296875, "step": 1282, "token_acc": 0.2542147387019762 }, { "epoch": 0.752272060979185, "grad_norm": 1.232094591628959, "learning_rate": 0.0002256154747948417, "loss": 3.543626546859741, "step": 1283, "token_acc": 0.24764154703884605 }, { "epoch": 0.752858399296394, "grad_norm": 1.407284196813894, "learning_rate": 0.00022579132473622509, "loss": 3.586953639984131, "step": 1284, "token_acc": 0.24211429649676508 }, { "epoch": 0.7534447376136031, "grad_norm": 1.2439733052125364, "learning_rate": 0.00022596717467760844, "loss": 3.5615909099578857, "step": 1285, "token_acc": 0.24607328480976437 }, { "epoch": 0.7540310759308121, "grad_norm": 1.2459773478657614, "learning_rate": 0.0002261430246189918, "loss": 3.5112464427948, "step": 1286, "token_acc": 0.2525803560639626 }, { "epoch": 0.7546174142480211, "grad_norm": 1.3251173409506307, "learning_rate": 0.00022631887456037514, "loss": 3.5989558696746826, "step": 1287, "token_acc": 0.2408991245533842 }, { "epoch": 0.7552037525652301, "grad_norm": 1.519826396769631, "learning_rate": 0.0002264947245017585, "loss": 3.538268566131592, "step": 1288, "token_acc": 0.249512409602184 }, { "epoch": 0.7557900908824392, "grad_norm": 1.3617912994699326, "learning_rate": 0.00022667057444314181, "loss": 3.6270439624786377, "step": 1289, "token_acc": 0.23806774588078 }, { "epoch": 0.7563764291996482, "grad_norm": 1.4984868102122595, "learning_rate": 0.00022684642438452516, "loss": 3.518787384033203, "step": 1290, "token_acc": 0.2492241644275672 }, { "epoch": 0.7569627675168572, "grad_norm": 1.2724412606452153, "learning_rate": 0.00022702227432590852, "loss": 3.547226905822754, "step": 1291, "token_acc": 0.2482909813492878 }, { "epoch": 0.7575491058340662, "grad_norm": 1.6547817954334374, "learning_rate": 0.0002271981242672919, "loss": 3.5538692474365234, "step": 1292, "token_acc": 0.24455592292783035 }, { "epoch": 0.7581354441512753, "grad_norm": 1.0969874831633897, "learning_rate": 0.00022737397420867524, "loss": 3.552861452102661, "step": 1293, "token_acc": 0.24882529681936705 }, { "epoch": 0.7587217824684843, "grad_norm": 1.5525991567533708, "learning_rate": 0.0002275498241500586, "loss": 3.5503973960876465, "step": 1294, "token_acc": 0.24610411590368234 }, { "epoch": 0.7593081207856933, "grad_norm": 1.111227001399496, "learning_rate": 0.00022772567409144195, "loss": 3.5445427894592285, "step": 1295, "token_acc": 0.24689116348563217 }, { "epoch": 0.7598944591029023, "grad_norm": 1.401945661859676, "learning_rate": 0.0002279015240328253, "loss": 3.498046398162842, "step": 1296, "token_acc": 0.25129064787773225 }, { "epoch": 0.7604807974201114, "grad_norm": 1.3553859273140412, "learning_rate": 0.00022807737397420865, "loss": 3.550243377685547, "step": 1297, "token_acc": 0.2473780663402885 }, { "epoch": 0.7610671357373204, "grad_norm": 1.4746703457145391, "learning_rate": 0.000228253223915592, "loss": 3.568816661834717, "step": 1298, "token_acc": 0.24337994489785308 }, { "epoch": 0.7616534740545294, "grad_norm": 1.5822841647471388, "learning_rate": 0.00022842907385697538, "loss": 3.5240468978881836, "step": 1299, "token_acc": 0.2501253146413563 }, { "epoch": 0.7622398123717385, "grad_norm": 1.3058461990043648, "learning_rate": 0.00022860492379835873, "loss": 3.5019290447235107, "step": 1300, "token_acc": 0.2524095486566734 }, { "epoch": 0.7628261506889475, "grad_norm": 1.444597586491725, "learning_rate": 0.00022878077373974208, "loss": 3.551189422607422, "step": 1301, "token_acc": 0.24687636278815045 }, { "epoch": 0.7634124890061565, "grad_norm": 0.999517254996965, "learning_rate": 0.00022895662368112543, "loss": 3.5050086975097656, "step": 1302, "token_acc": 0.2526176091602818 }, { "epoch": 0.7639988273233655, "grad_norm": 1.6290560761411603, "learning_rate": 0.00022913247362250878, "loss": 3.491086483001709, "step": 1303, "token_acc": 0.2532722361715415 }, { "epoch": 0.7645851656405747, "grad_norm": 1.06139807828867, "learning_rate": 0.00022930832356389213, "loss": 3.552790880203247, "step": 1304, "token_acc": 0.24734570364114017 }, { "epoch": 0.7651715039577837, "grad_norm": 1.6256699714520118, "learning_rate": 0.00022948417350527548, "loss": 3.5898325443267822, "step": 1305, "token_acc": 0.24198415788826494 }, { "epoch": 0.7657578422749927, "grad_norm": 1.2347533772466504, "learning_rate": 0.00022966002344665886, "loss": 3.5725343227386475, "step": 1306, "token_acc": 0.2442365366564663 }, { "epoch": 0.7663441805922017, "grad_norm": 1.2867475843845748, "learning_rate": 0.00022983587338804216, "loss": 3.5599560737609863, "step": 1307, "token_acc": 0.24574363502424326 }, { "epoch": 0.7669305189094108, "grad_norm": 1.469264263974974, "learning_rate": 0.00023001172332942553, "loss": 3.493713855743408, "step": 1308, "token_acc": 0.2529612903395894 }, { "epoch": 0.7675168572266198, "grad_norm": 1.698079236558699, "learning_rate": 0.00023018757327080889, "loss": 3.517455577850342, "step": 1309, "token_acc": 0.24846891548628752 }, { "epoch": 0.7681031955438288, "grad_norm": 1.15350549639968, "learning_rate": 0.00023036342321219224, "loss": 3.480907917022705, "step": 1310, "token_acc": 0.257596941618342 }, { "epoch": 0.7686895338610378, "grad_norm": 1.7379349550715335, "learning_rate": 0.0002305392731535756, "loss": 3.5841002464294434, "step": 1311, "token_acc": 0.2439927620783476 }, { "epoch": 0.7692758721782469, "grad_norm": 1.1002585500093949, "learning_rate": 0.00023071512309495894, "loss": 3.546715259552002, "step": 1312, "token_acc": 0.24598529893721616 }, { "epoch": 0.7698622104954559, "grad_norm": 1.562290812434698, "learning_rate": 0.0002308909730363423, "loss": 3.565049409866333, "step": 1313, "token_acc": 0.24550648273441925 }, { "epoch": 0.7704485488126649, "grad_norm": 1.1663479522914595, "learning_rate": 0.00023106682297772564, "loss": 3.5516159534454346, "step": 1314, "token_acc": 0.24810693781708118 }, { "epoch": 0.7710348871298739, "grad_norm": 1.3549712160228458, "learning_rate": 0.00023124267291910902, "loss": 3.5216691493988037, "step": 1315, "token_acc": 0.24940417776531615 }, { "epoch": 0.771621225447083, "grad_norm": 1.2017379180206698, "learning_rate": 0.00023141852286049237, "loss": 3.535663604736328, "step": 1316, "token_acc": 0.2503336766911664 }, { "epoch": 0.772207563764292, "grad_norm": 1.2752756703528207, "learning_rate": 0.00023159437280187572, "loss": 3.4778430461883545, "step": 1317, "token_acc": 0.25549548416839546 }, { "epoch": 0.772793902081501, "grad_norm": 1.2451082557090631, "learning_rate": 0.00023177022274325907, "loss": 3.548982620239258, "step": 1318, "token_acc": 0.2455617512873189 }, { "epoch": 0.77338024039871, "grad_norm": 1.4158895523275319, "learning_rate": 0.00023194607268464242, "loss": 3.553767204284668, "step": 1319, "token_acc": 0.24403253697502258 }, { "epoch": 0.7739665787159191, "grad_norm": 1.1767115162226616, "learning_rate": 0.00023212192262602577, "loss": 3.521721363067627, "step": 1320, "token_acc": 0.25231452970398655 }, { "epoch": 0.7745529170331281, "grad_norm": 1.020067149395257, "learning_rate": 0.00023229777256740912, "loss": 3.490562915802002, "step": 1321, "token_acc": 0.2524878938498624 }, { "epoch": 0.7751392553503371, "grad_norm": 1.4522842208103106, "learning_rate": 0.0002324736225087925, "loss": 3.5425868034362793, "step": 1322, "token_acc": 0.2478709940366382 }, { "epoch": 0.7757255936675461, "grad_norm": 1.2347430944392428, "learning_rate": 0.00023264947245017585, "loss": 3.540163040161133, "step": 1323, "token_acc": 0.24787067330162482 }, { "epoch": 0.7763119319847552, "grad_norm": 1.4725162345597291, "learning_rate": 0.00023282532239155918, "loss": 3.52274227142334, "step": 1324, "token_acc": 0.2501067548118743 }, { "epoch": 0.7768982703019642, "grad_norm": 1.4164951525512393, "learning_rate": 0.00023300117233294253, "loss": 3.5144240856170654, "step": 1325, "token_acc": 0.2494186136449186 }, { "epoch": 0.7774846086191732, "grad_norm": 1.3358592704000456, "learning_rate": 0.00023317702227432588, "loss": 3.5409460067749023, "step": 1326, "token_acc": 0.24992991646042081 }, { "epoch": 0.7780709469363823, "grad_norm": 1.429921604376815, "learning_rate": 0.00023335287221570923, "loss": 3.551036834716797, "step": 1327, "token_acc": 0.24525578265401718 }, { "epoch": 0.7786572852535913, "grad_norm": 1.129976355717462, "learning_rate": 0.00023352872215709258, "loss": 3.491189479827881, "step": 1328, "token_acc": 0.25371180054285647 }, { "epoch": 0.7792436235708003, "grad_norm": 1.425989839314161, "learning_rate": 0.00023370457209847593, "loss": 3.498746871948242, "step": 1329, "token_acc": 0.25239680547217525 }, { "epoch": 0.7798299618880093, "grad_norm": 1.218087099427935, "learning_rate": 0.00023388042203985928, "loss": 3.551755905151367, "step": 1330, "token_acc": 0.2462798623966187 }, { "epoch": 0.7804163002052185, "grad_norm": 1.4064826474628556, "learning_rate": 0.00023405627198124266, "loss": 3.503357410430908, "step": 1331, "token_acc": 0.25205527006485345 }, { "epoch": 0.7810026385224275, "grad_norm": 1.2702129287944959, "learning_rate": 0.000234232121922626, "loss": 3.5402162075042725, "step": 1332, "token_acc": 0.2482242861177273 }, { "epoch": 0.7815889768396365, "grad_norm": 1.4589042288206073, "learning_rate": 0.00023440797186400936, "loss": 3.5561366081237793, "step": 1333, "token_acc": 0.2433288151198179 }, { "epoch": 0.7821753151568455, "grad_norm": 1.1382322207361673, "learning_rate": 0.0002345838218053927, "loss": 3.5850577354431152, "step": 1334, "token_acc": 0.24253468594996558 }, { "epoch": 0.7827616534740546, "grad_norm": 1.4318276791262674, "learning_rate": 0.00023475967174677606, "loss": 3.525158166885376, "step": 1335, "token_acc": 0.2482472920822298 }, { "epoch": 0.7833479917912636, "grad_norm": 1.2806354855902475, "learning_rate": 0.00023493552168815941, "loss": 3.493321418762207, "step": 1336, "token_acc": 0.2520449795999602 }, { "epoch": 0.7839343301084726, "grad_norm": 1.6088123510738195, "learning_rate": 0.00023511137162954276, "loss": 3.478659152984619, "step": 1337, "token_acc": 0.25558739102558986 }, { "epoch": 0.7845206684256816, "grad_norm": 1.176673622710618, "learning_rate": 0.00023528722157092614, "loss": 3.543164014816284, "step": 1338, "token_acc": 0.24751044947341028 }, { "epoch": 0.7851070067428907, "grad_norm": 1.736002962508232, "learning_rate": 0.0002354630715123095, "loss": 3.5470120906829834, "step": 1339, "token_acc": 0.24651919184366672 }, { "epoch": 0.7856933450600997, "grad_norm": 1.0430661484266757, "learning_rate": 0.00023563892145369284, "loss": 3.5405569076538086, "step": 1340, "token_acc": 0.24673323006027154 }, { "epoch": 0.7862796833773087, "grad_norm": 1.4860223772155738, "learning_rate": 0.0002358147713950762, "loss": 3.554518699645996, "step": 1341, "token_acc": 0.24535291146277385 }, { "epoch": 0.7868660216945177, "grad_norm": 1.1371472770558677, "learning_rate": 0.00023599062133645952, "loss": 3.539022207260132, "step": 1342, "token_acc": 0.24746841571493366 }, { "epoch": 0.7874523600117268, "grad_norm": 1.2986564156057583, "learning_rate": 0.00023616647127784287, "loss": 3.512289524078369, "step": 1343, "token_acc": 0.25061844192491545 }, { "epoch": 0.7880386983289358, "grad_norm": 1.4232989622410008, "learning_rate": 0.00023634232121922622, "loss": 3.569448709487915, "step": 1344, "token_acc": 0.24304652773993662 }, { "epoch": 0.7886250366461448, "grad_norm": 0.9015955375223305, "learning_rate": 0.00023651817116060957, "loss": 3.5098624229431152, "step": 1345, "token_acc": 0.25226367114779813 }, { "epoch": 0.7892113749633538, "grad_norm": 1.1018489389970407, "learning_rate": 0.00023669402110199295, "loss": 3.506894111633301, "step": 1346, "token_acc": 0.25030938985201284 }, { "epoch": 0.7897977132805629, "grad_norm": 1.3289606406901249, "learning_rate": 0.0002368698710433763, "loss": 3.5221610069274902, "step": 1347, "token_acc": 0.2521557298714594 }, { "epoch": 0.7903840515977719, "grad_norm": 1.1043491605511258, "learning_rate": 0.00023704572098475965, "loss": 3.551619529724121, "step": 1348, "token_acc": 0.24603639851847905 }, { "epoch": 0.7909703899149809, "grad_norm": 1.4888513969907502, "learning_rate": 0.000237221570926143, "loss": 3.5470314025878906, "step": 1349, "token_acc": 0.24807980075970826 }, { "epoch": 0.7915567282321899, "grad_norm": 1.0276340009004528, "learning_rate": 0.00023739742086752635, "loss": 3.478890895843506, "step": 1350, "token_acc": 0.25571374463647356 }, { "epoch": 0.792143066549399, "grad_norm": 1.3879383216328915, "learning_rate": 0.0002375732708089097, "loss": 3.57604718208313, "step": 1351, "token_acc": 0.24262518199658162 }, { "epoch": 0.792729404866608, "grad_norm": 1.340463944902041, "learning_rate": 0.00023774912075029306, "loss": 3.539947509765625, "step": 1352, "token_acc": 0.24951912190290865 }, { "epoch": 0.793315743183817, "grad_norm": 1.2984350731495375, "learning_rate": 0.00023792497069167643, "loss": 3.5628278255462646, "step": 1353, "token_acc": 0.24179955683097823 }, { "epoch": 0.7939020815010261, "grad_norm": 1.4053193273464923, "learning_rate": 0.00023810082063305978, "loss": 3.596524238586426, "step": 1354, "token_acc": 0.24081179590900115 }, { "epoch": 0.7944884198182351, "grad_norm": 1.1068320873107373, "learning_rate": 0.00023827667057444313, "loss": 3.514730215072632, "step": 1355, "token_acc": 0.25075754431115355 }, { "epoch": 0.7950747581354441, "grad_norm": 1.6625948931249201, "learning_rate": 0.00023845252051582649, "loss": 3.5009069442749023, "step": 1356, "token_acc": 0.2532228289834475 }, { "epoch": 0.7956610964526531, "grad_norm": 0.9555384726549012, "learning_rate": 0.00023862837045720984, "loss": 3.426203727722168, "step": 1357, "token_acc": 0.2634332709767469 }, { "epoch": 0.7962474347698623, "grad_norm": 1.4347928093908564, "learning_rate": 0.0002388042203985932, "loss": 3.5869579315185547, "step": 1358, "token_acc": 0.23961607866725945 }, { "epoch": 0.7968337730870713, "grad_norm": 1.5869928118072407, "learning_rate": 0.00023898007033997654, "loss": 3.531996250152588, "step": 1359, "token_acc": 0.24907132385747976 }, { "epoch": 0.7974201114042803, "grad_norm": 1.043270178257646, "learning_rate": 0.00023915592028135986, "loss": 3.5149025917053223, "step": 1360, "token_acc": 0.24939670495375799 }, { "epoch": 0.7980064497214893, "grad_norm": 1.4997250379643066, "learning_rate": 0.0002393317702227432, "loss": 3.499387741088867, "step": 1361, "token_acc": 0.25247028794354515 }, { "epoch": 0.7985927880386984, "grad_norm": 0.9161935273599092, "learning_rate": 0.0002395076201641266, "loss": 3.4430837631225586, "step": 1362, "token_acc": 0.2608140271196803 }, { "epoch": 0.7991791263559074, "grad_norm": 1.4636381710982649, "learning_rate": 0.00023968347010550994, "loss": 3.538482189178467, "step": 1363, "token_acc": 0.2481892192903008 }, { "epoch": 0.7997654646731164, "grad_norm": 1.222398277145788, "learning_rate": 0.0002398593200468933, "loss": 3.4975392818450928, "step": 1364, "token_acc": 0.2520004106177095 }, { "epoch": 0.8003518029903254, "grad_norm": 1.402610637836429, "learning_rate": 0.00024003516998827664, "loss": 3.52131986618042, "step": 1365, "token_acc": 0.24899112837790638 }, { "epoch": 0.8009381413075345, "grad_norm": 1.1913008491251893, "learning_rate": 0.00024021101992966, "loss": 3.5296170711517334, "step": 1366, "token_acc": 0.24790734861005034 }, { "epoch": 0.8015244796247435, "grad_norm": 1.2684558105859447, "learning_rate": 0.00024038686987104335, "loss": 3.518589973449707, "step": 1367, "token_acc": 0.24943038184146413 }, { "epoch": 0.8021108179419525, "grad_norm": 1.2108378798749466, "learning_rate": 0.0002405627198124267, "loss": 3.5225632190704346, "step": 1368, "token_acc": 0.24759843948226407 }, { "epoch": 0.8026971562591615, "grad_norm": 1.3267338901291388, "learning_rate": 0.00024073856975381007, "loss": 3.4838345050811768, "step": 1369, "token_acc": 0.25506533749819404 }, { "epoch": 0.8032834945763706, "grad_norm": 1.1158571791648582, "learning_rate": 0.00024091441969519343, "loss": 3.486650228500366, "step": 1370, "token_acc": 0.2515 }, { "epoch": 0.8038698328935796, "grad_norm": 1.4953364879371343, "learning_rate": 0.00024109026963657678, "loss": 3.5227785110473633, "step": 1371, "token_acc": 0.25013988470897475 }, { "epoch": 0.8044561712107886, "grad_norm": 1.1646909892992259, "learning_rate": 0.00024126611957796013, "loss": 3.5241241455078125, "step": 1372, "token_acc": 0.25021427500053567 }, { "epoch": 0.8050425095279976, "grad_norm": 1.1741380221992452, "learning_rate": 0.00024144196951934348, "loss": 3.509476661682129, "step": 1373, "token_acc": 0.2512670580540594 }, { "epoch": 0.8056288478452067, "grad_norm": 1.373415991157362, "learning_rate": 0.00024161781946072683, "loss": 3.523770809173584, "step": 1374, "token_acc": 0.24859482287172632 }, { "epoch": 0.8062151861624157, "grad_norm": 1.2750483087366524, "learning_rate": 0.00024179366940211018, "loss": 3.505979061126709, "step": 1375, "token_acc": 0.2518861855581306 }, { "epoch": 0.8068015244796247, "grad_norm": 1.381538930759667, "learning_rate": 0.00024196951934349356, "loss": 3.4995217323303223, "step": 1376, "token_acc": 0.2513723346657306 }, { "epoch": 0.8073878627968337, "grad_norm": 1.279395434103982, "learning_rate": 0.0002421453692848769, "loss": 3.5280673503875732, "step": 1377, "token_acc": 0.24813645966458883 }, { "epoch": 0.8079742011140428, "grad_norm": 1.2955129904578537, "learning_rate": 0.00024232121922626023, "loss": 3.5247902870178223, "step": 1378, "token_acc": 0.24923996416049057 }, { "epoch": 0.8085605394312518, "grad_norm": 1.2994083419952702, "learning_rate": 0.00024249706916764358, "loss": 3.4935412406921387, "step": 1379, "token_acc": 0.2509757844096378 }, { "epoch": 0.8091468777484608, "grad_norm": 1.2562584084945727, "learning_rate": 0.00024267291910902693, "loss": 3.541517734527588, "step": 1380, "token_acc": 0.24732372626236554 }, { "epoch": 0.8097332160656698, "grad_norm": 1.0297583497336373, "learning_rate": 0.00024284876905041029, "loss": 3.521821975708008, "step": 1381, "token_acc": 0.24713079980351002 }, { "epoch": 0.810319554382879, "grad_norm": 1.4345717094664914, "learning_rate": 0.00024302461899179364, "loss": 3.5253238677978516, "step": 1382, "token_acc": 0.24893450971573697 }, { "epoch": 0.810905892700088, "grad_norm": 1.2128476575553961, "learning_rate": 0.000243200468933177, "loss": 3.5326619148254395, "step": 1383, "token_acc": 0.2455768928268432 }, { "epoch": 0.811492231017297, "grad_norm": 1.5176449352451746, "learning_rate": 0.00024337631887456034, "loss": 3.498926877975464, "step": 1384, "token_acc": 0.2520472989373041 }, { "epoch": 0.8120785693345061, "grad_norm": 1.2248775542869774, "learning_rate": 0.00024355216881594372, "loss": 3.486328125, "step": 1385, "token_acc": 0.2532828461001748 }, { "epoch": 0.8126649076517151, "grad_norm": 1.260784729825427, "learning_rate": 0.00024372801875732707, "loss": 3.4956254959106445, "step": 1386, "token_acc": 0.2521185409166037 }, { "epoch": 0.8132512459689241, "grad_norm": 0.9384761279512973, "learning_rate": 0.00024390386869871042, "loss": 3.4776968955993652, "step": 1387, "token_acc": 0.25448890559810167 }, { "epoch": 0.8138375842861331, "grad_norm": 1.4143018745024274, "learning_rate": 0.00024407971864009377, "loss": 3.569000720977783, "step": 1388, "token_acc": 0.2423899957586822 }, { "epoch": 0.8144239226033422, "grad_norm": 1.1364348124033246, "learning_rate": 0.0002442555685814771, "loss": 3.503652334213257, "step": 1389, "token_acc": 0.2497558160772283 }, { "epoch": 0.8150102609205512, "grad_norm": 1.112221727500393, "learning_rate": 0.0002444314185228605, "loss": 3.47096586227417, "step": 1390, "token_acc": 0.254377988248935 }, { "epoch": 0.8155965992377602, "grad_norm": 1.4408744725241267, "learning_rate": 0.0002446072684642438, "loss": 3.4749674797058105, "step": 1391, "token_acc": 0.25344111187866664 }, { "epoch": 0.8161829375549692, "grad_norm": 0.999034055474413, "learning_rate": 0.0002447831184056272, "loss": 3.519740581512451, "step": 1392, "token_acc": 0.25079816690376244 }, { "epoch": 0.8167692758721783, "grad_norm": 1.4358392785129175, "learning_rate": 0.0002449589683470105, "loss": 3.4820048809051514, "step": 1393, "token_acc": 0.25344169676288947 }, { "epoch": 0.8173556141893873, "grad_norm": 1.2158700851637072, "learning_rate": 0.0002451348182883939, "loss": 3.487466812133789, "step": 1394, "token_acc": 0.25218459529117676 }, { "epoch": 0.8179419525065963, "grad_norm": 1.3575663748216873, "learning_rate": 0.0002453106682297773, "loss": 3.4985780715942383, "step": 1395, "token_acc": 0.25081551568878313 }, { "epoch": 0.8185282908238053, "grad_norm": 1.2461491284729482, "learning_rate": 0.00024548651817116055, "loss": 3.4759790897369385, "step": 1396, "token_acc": 0.2531578725022179 }, { "epoch": 0.8191146291410144, "grad_norm": 1.2489309189556415, "learning_rate": 0.0002456623681125439, "loss": 3.5269782543182373, "step": 1397, "token_acc": 0.25053125659894204 }, { "epoch": 0.8197009674582234, "grad_norm": 1.1061098127228195, "learning_rate": 0.0002458382180539273, "loss": 3.565739154815674, "step": 1398, "token_acc": 0.24388808520125893 }, { "epoch": 0.8202873057754324, "grad_norm": 1.2921953537227937, "learning_rate": 0.00024601406799531063, "loss": 3.5449059009552, "step": 1399, "token_acc": 0.24675700525281913 }, { "epoch": 0.8208736440926414, "grad_norm": 1.2131059564246882, "learning_rate": 0.000246189917936694, "loss": 3.487881898880005, "step": 1400, "token_acc": 0.2518350118713242 }, { "epoch": 0.8214599824098505, "grad_norm": 1.1314907200938653, "learning_rate": 0.00024636576787807733, "loss": 3.5301811695098877, "step": 1401, "token_acc": 0.24820517924634553 }, { "epoch": 0.8220463207270595, "grad_norm": 1.2883489472056389, "learning_rate": 0.0002465416178194607, "loss": 3.4905428886413574, "step": 1402, "token_acc": 0.25414038879133893 }, { "epoch": 0.8226326590442685, "grad_norm": 1.1617520850905985, "learning_rate": 0.00024671746776084403, "loss": 3.490755558013916, "step": 1403, "token_acc": 0.2537040115536069 }, { "epoch": 0.8232189973614775, "grad_norm": 1.3902940951835465, "learning_rate": 0.0002468933177022274, "loss": 3.459394931793213, "step": 1404, "token_acc": 0.2569570406355315 }, { "epoch": 0.8238053356786866, "grad_norm": 1.16037314307343, "learning_rate": 0.0002470691676436108, "loss": 3.4644532203674316, "step": 1405, "token_acc": 0.2531717787801028 }, { "epoch": 0.8243916739958956, "grad_norm": 1.1317272249494483, "learning_rate": 0.0002472450175849941, "loss": 3.4927358627319336, "step": 1406, "token_acc": 0.2506719120256865 }, { "epoch": 0.8249780123131046, "grad_norm": 1.0801859633177322, "learning_rate": 0.0002474208675263775, "loss": 3.5378341674804688, "step": 1407, "token_acc": 0.24590614226267948 }, { "epoch": 0.8255643506303136, "grad_norm": 1.0145950275544406, "learning_rate": 0.0002475967174677608, "loss": 3.432802200317383, "step": 1408, "token_acc": 0.260275911601341 }, { "epoch": 0.8261506889475227, "grad_norm": 1.3244002168746623, "learning_rate": 0.0002477725674091442, "loss": 3.5539357662200928, "step": 1409, "token_acc": 0.24332076884005668 }, { "epoch": 0.8267370272647317, "grad_norm": 1.3588962961468913, "learning_rate": 0.0002479484173505275, "loss": 3.4740781784057617, "step": 1410, "token_acc": 0.25389171162129387 }, { "epoch": 0.8273233655819408, "grad_norm": 1.2332645972401635, "learning_rate": 0.0002481242672919109, "loss": 3.5055789947509766, "step": 1411, "token_acc": 0.25093262276360867 }, { "epoch": 0.8279097038991499, "grad_norm": 1.2857659721300096, "learning_rate": 0.00024830011723329427, "loss": 3.4982221126556396, "step": 1412, "token_acc": 0.2508262315663979 }, { "epoch": 0.8284960422163589, "grad_norm": 1.2115742894605912, "learning_rate": 0.0002484759671746776, "loss": 3.4536831378936768, "step": 1413, "token_acc": 0.25567958773425087 }, { "epoch": 0.8290823805335679, "grad_norm": 1.0186740097083244, "learning_rate": 0.0002486518171160609, "loss": 3.525965690612793, "step": 1414, "token_acc": 0.247292948605744 }, { "epoch": 0.8296687188507769, "grad_norm": 1.0860146122015826, "learning_rate": 0.0002488276670574443, "loss": 3.5002598762512207, "step": 1415, "token_acc": 0.2515596551706045 }, { "epoch": 0.830255057167986, "grad_norm": 1.4454233866292083, "learning_rate": 0.0002490035169988276, "loss": 3.537914752960205, "step": 1416, "token_acc": 0.24730110892160273 }, { "epoch": 0.830841395485195, "grad_norm": 1.0400082681968426, "learning_rate": 0.000249179366940211, "loss": 3.445465087890625, "step": 1417, "token_acc": 0.2572701816602924 }, { "epoch": 0.831427733802404, "grad_norm": 1.4586003171229651, "learning_rate": 0.0002493552168815943, "loss": 3.524172782897949, "step": 1418, "token_acc": 0.2477750959022223 }, { "epoch": 0.832014072119613, "grad_norm": 0.9737932195192143, "learning_rate": 0.0002495310668229777, "loss": 3.504889965057373, "step": 1419, "token_acc": 0.24958140576413168 }, { "epoch": 0.8326004104368221, "grad_norm": 1.6420739394542294, "learning_rate": 0.0002497069167643611, "loss": 3.4851255416870117, "step": 1420, "token_acc": 0.25322547614171614 }, { "epoch": 0.8331867487540311, "grad_norm": 1.1223518955111516, "learning_rate": 0.0002498827667057444, "loss": 3.542442560195923, "step": 1421, "token_acc": 0.24554698040089198 }, { "epoch": 0.8337730870712401, "grad_norm": 1.3367038739023618, "learning_rate": 0.0002500586166471278, "loss": 3.515259265899658, "step": 1422, "token_acc": 0.24910493711883128 }, { "epoch": 0.8343594253884491, "grad_norm": 1.03374443830272, "learning_rate": 0.0002502344665885111, "loss": 3.444234848022461, "step": 1423, "token_acc": 0.2579337291317806 }, { "epoch": 0.8349457637056582, "grad_norm": 1.2791814816590374, "learning_rate": 0.0002504103165298945, "loss": 3.44527530670166, "step": 1424, "token_acc": 0.25652109548482604 }, { "epoch": 0.8355321020228672, "grad_norm": 1.3864977882348868, "learning_rate": 0.0002505861664712778, "loss": 3.455772638320923, "step": 1425, "token_acc": 0.25790059227569306 }, { "epoch": 0.8361184403400762, "grad_norm": 1.4144485065719277, "learning_rate": 0.0002507620164126612, "loss": 3.488882541656494, "step": 1426, "token_acc": 0.25109528330143516 }, { "epoch": 0.8367047786572852, "grad_norm": 1.1264031573258386, "learning_rate": 0.00025093786635404456, "loss": 3.5070533752441406, "step": 1427, "token_acc": 0.25087618485559127 }, { "epoch": 0.8372911169744943, "grad_norm": 1.152685641703167, "learning_rate": 0.0002511137162954279, "loss": 3.5346567630767822, "step": 1428, "token_acc": 0.24735420126290364 }, { "epoch": 0.8378774552917033, "grad_norm": 1.3353647284902894, "learning_rate": 0.00025128956623681126, "loss": 3.505019426345825, "step": 1429, "token_acc": 0.24829969020798873 }, { "epoch": 0.8384637936089123, "grad_norm": 1.0996475557570604, "learning_rate": 0.0002514654161781946, "loss": 3.502018451690674, "step": 1430, "token_acc": 0.25136771144445275 }, { "epoch": 0.8390501319261213, "grad_norm": 1.2944463810727704, "learning_rate": 0.0002516412661195779, "loss": 3.5040979385375977, "step": 1431, "token_acc": 0.25034546526041984 }, { "epoch": 0.8396364702433304, "grad_norm": 1.0922213874510516, "learning_rate": 0.0002518171160609613, "loss": 3.4808406829833984, "step": 1432, "token_acc": 0.25272973577262503 }, { "epoch": 0.8402228085605394, "grad_norm": 1.3836978652417502, "learning_rate": 0.0002519929660023446, "loss": 3.5306410789489746, "step": 1433, "token_acc": 0.24835992873415202 }, { "epoch": 0.8408091468777484, "grad_norm": 1.0227616473534067, "learning_rate": 0.000252168815943728, "loss": 3.477841854095459, "step": 1434, "token_acc": 0.2519114740119637 }, { "epoch": 0.8413954851949574, "grad_norm": 1.4117895464330559, "learning_rate": 0.0002523446658851113, "loss": 3.4867329597473145, "step": 1435, "token_acc": 0.25292114434123525 }, { "epoch": 0.8419818235121665, "grad_norm": 0.979133012776937, "learning_rate": 0.0002525205158264947, "loss": 3.526702880859375, "step": 1436, "token_acc": 0.24739271984843747 }, { "epoch": 0.8425681618293756, "grad_norm": 1.460079217104386, "learning_rate": 0.00025269636576787807, "loss": 3.5548555850982666, "step": 1437, "token_acc": 0.24606543045867876 }, { "epoch": 0.8431545001465846, "grad_norm": 0.8637701731042254, "learning_rate": 0.0002528722157092614, "loss": 3.5132594108581543, "step": 1438, "token_acc": 0.25090352816248 }, { "epoch": 0.8437408384637937, "grad_norm": 1.2024678017098835, "learning_rate": 0.00025304806565064477, "loss": 3.492680072784424, "step": 1439, "token_acc": 0.24954935197616512 }, { "epoch": 0.8443271767810027, "grad_norm": 1.0795016627432437, "learning_rate": 0.0002532239155920281, "loss": 3.4910407066345215, "step": 1440, "token_acc": 0.25258379139807996 }, { "epoch": 0.8449135150982117, "grad_norm": 1.2303250210827923, "learning_rate": 0.0002533997655334115, "loss": 3.5061259269714355, "step": 1441, "token_acc": 0.250391739739532 }, { "epoch": 0.8454998534154207, "grad_norm": 0.8931192989937837, "learning_rate": 0.0002535756154747948, "loss": 3.4786667823791504, "step": 1442, "token_acc": 0.25215940034550405 }, { "epoch": 0.8460861917326298, "grad_norm": 1.228573698795334, "learning_rate": 0.0002537514654161782, "loss": 3.5324888229370117, "step": 1443, "token_acc": 0.2465188989406279 }, { "epoch": 0.8466725300498388, "grad_norm": 1.1340169813949923, "learning_rate": 0.00025392731535756155, "loss": 3.5123674869537354, "step": 1444, "token_acc": 0.24927085358740034 }, { "epoch": 0.8472588683670478, "grad_norm": 1.5219406858487863, "learning_rate": 0.0002541031652989449, "loss": 3.4856228828430176, "step": 1445, "token_acc": 0.2519374781168183 }, { "epoch": 0.8478452066842568, "grad_norm": 1.2730331229444234, "learning_rate": 0.00025427901524032826, "loss": 3.509390354156494, "step": 1446, "token_acc": 0.24904399460755258 }, { "epoch": 0.8484315450014659, "grad_norm": 1.3087532698596622, "learning_rate": 0.0002544548651817116, "loss": 3.457827568054199, "step": 1447, "token_acc": 0.2574022526492273 }, { "epoch": 0.8490178833186749, "grad_norm": 1.0173605101250207, "learning_rate": 0.00025463071512309496, "loss": 3.519615650177002, "step": 1448, "token_acc": 0.2480974204944206 }, { "epoch": 0.8496042216358839, "grad_norm": 1.2744942717422465, "learning_rate": 0.0002548065650644783, "loss": 3.4946985244750977, "step": 1449, "token_acc": 0.2520526711775526 }, { "epoch": 0.8501905599530929, "grad_norm": 1.0618891422752543, "learning_rate": 0.0002549824150058616, "loss": 3.4787182807922363, "step": 1450, "token_acc": 0.2524193865900232 }, { "epoch": 0.850776898270302, "grad_norm": 0.9765908292231744, "learning_rate": 0.000255158264947245, "loss": 3.4466562271118164, "step": 1451, "token_acc": 0.2585026765760615 }, { "epoch": 0.851363236587511, "grad_norm": 0.9419277848629152, "learning_rate": 0.00025533411488862836, "loss": 3.4913783073425293, "step": 1452, "token_acc": 0.2518612762745918 }, { "epoch": 0.85194957490472, "grad_norm": 1.2032763069489223, "learning_rate": 0.0002555099648300117, "loss": 3.507528781890869, "step": 1453, "token_acc": 0.250432230255326 }, { "epoch": 0.852535913221929, "grad_norm": 1.1184302831915998, "learning_rate": 0.00025568581477139506, "loss": 3.502519369125366, "step": 1454, "token_acc": 0.24943050925985508 }, { "epoch": 0.8531222515391381, "grad_norm": 1.2133641109908768, "learning_rate": 0.0002558616647127784, "loss": 3.4678733348846436, "step": 1455, "token_acc": 0.2559625269397266 }, { "epoch": 0.8537085898563471, "grad_norm": 1.3048733888561916, "learning_rate": 0.00025603751465416176, "loss": 3.3941495418548584, "step": 1456, "token_acc": 0.2624058203529579 }, { "epoch": 0.8542949281735561, "grad_norm": 0.9244945988282646, "learning_rate": 0.0002562133645955451, "loss": 3.4439454078674316, "step": 1457, "token_acc": 0.25687500498236904 }, { "epoch": 0.8548812664907651, "grad_norm": 1.1368826972867665, "learning_rate": 0.00025638921453692847, "loss": 3.504081964492798, "step": 1458, "token_acc": 0.2516719909315746 }, { "epoch": 0.8554676048079742, "grad_norm": 1.1832458636905463, "learning_rate": 0.00025656506447831184, "loss": 3.424797773361206, "step": 1459, "token_acc": 0.26008319627513493 }, { "epoch": 0.8560539431251832, "grad_norm": 1.3148642348600923, "learning_rate": 0.00025674091441969517, "loss": 3.488696813583374, "step": 1460, "token_acc": 0.25248551812633613 }, { "epoch": 0.8566402814423922, "grad_norm": 0.9801609890583167, "learning_rate": 0.00025691676436107855, "loss": 3.450690746307373, "step": 1461, "token_acc": 0.25557135958326627 }, { "epoch": 0.8572266197596012, "grad_norm": 1.227028490260521, "learning_rate": 0.00025709261430246187, "loss": 3.500920057296753, "step": 1462, "token_acc": 0.2508067284586337 }, { "epoch": 0.8578129580768104, "grad_norm": 1.0108741979870475, "learning_rate": 0.00025726846424384525, "loss": 3.5010433197021484, "step": 1463, "token_acc": 0.2509909872319119 }, { "epoch": 0.8583992963940194, "grad_norm": 1.1379483325284288, "learning_rate": 0.00025744431418522857, "loss": 3.4791853427886963, "step": 1464, "token_acc": 0.25361647491251477 }, { "epoch": 0.8589856347112284, "grad_norm": 1.020050248471449, "learning_rate": 0.00025762016412661195, "loss": 3.4631359577178955, "step": 1465, "token_acc": 0.2546912619533825 }, { "epoch": 0.8595719730284375, "grad_norm": 1.3453424739355984, "learning_rate": 0.00025779601406799533, "loss": 3.4491984844207764, "step": 1466, "token_acc": 0.25708798020709844 }, { "epoch": 0.8601583113456465, "grad_norm": 1.1373548787033998, "learning_rate": 0.00025797186400937865, "loss": 3.50360107421875, "step": 1467, "token_acc": 0.2490274178007018 }, { "epoch": 0.8607446496628555, "grad_norm": 1.2029776923453677, "learning_rate": 0.000258147713950762, "loss": 3.505368232727051, "step": 1468, "token_acc": 0.2486163043707882 }, { "epoch": 0.8613309879800645, "grad_norm": 1.0136470358274647, "learning_rate": 0.00025832356389214535, "loss": 3.478726863861084, "step": 1469, "token_acc": 0.25389309378519237 }, { "epoch": 0.8619173262972736, "grad_norm": 1.3792194661646142, "learning_rate": 0.0002584994138335287, "loss": 3.445176601409912, "step": 1470, "token_acc": 0.2578973587173398 }, { "epoch": 0.8625036646144826, "grad_norm": 1.148264023600593, "learning_rate": 0.00025867526377491205, "loss": 3.522228240966797, "step": 1471, "token_acc": 0.2464372049549487 }, { "epoch": 0.8630900029316916, "grad_norm": 1.0318068798469455, "learning_rate": 0.0002588511137162954, "loss": 3.5284390449523926, "step": 1472, "token_acc": 0.24818001323626737 }, { "epoch": 0.8636763412489006, "grad_norm": 1.2969094563008188, "learning_rate": 0.00025902696365767876, "loss": 3.4596054553985596, "step": 1473, "token_acc": 0.25765682803296125 }, { "epoch": 0.8642626795661097, "grad_norm": 1.1230446180911924, "learning_rate": 0.00025920281359906213, "loss": 3.5157618522644043, "step": 1474, "token_acc": 0.2497594150625521 }, { "epoch": 0.8648490178833187, "grad_norm": 1.287610683595465, "learning_rate": 0.00025937866354044546, "loss": 3.4883811473846436, "step": 1475, "token_acc": 0.2500610084099765 }, { "epoch": 0.8654353562005277, "grad_norm": 1.127778010493277, "learning_rate": 0.00025955451348182884, "loss": 3.4494807720184326, "step": 1476, "token_acc": 0.2569076566417355 }, { "epoch": 0.8660216945177367, "grad_norm": 1.2590031047059904, "learning_rate": 0.00025973036342321216, "loss": 3.4717233180999756, "step": 1477, "token_acc": 0.25256045831364743 }, { "epoch": 0.8666080328349458, "grad_norm": 1.0091424360495187, "learning_rate": 0.00025990621336459554, "loss": 3.5164389610290527, "step": 1478, "token_acc": 0.24728033472803349 }, { "epoch": 0.8671943711521548, "grad_norm": 1.1052879637029678, "learning_rate": 0.00026008206330597886, "loss": 3.457829475402832, "step": 1479, "token_acc": 0.25569888995534784 }, { "epoch": 0.8677807094693638, "grad_norm": 1.1066971925333116, "learning_rate": 0.00026025791324736224, "loss": 3.4888057708740234, "step": 1480, "token_acc": 0.25156994341360706 }, { "epoch": 0.8683670477865728, "grad_norm": 1.390212179035056, "learning_rate": 0.0002604337631887456, "loss": 3.4768948554992676, "step": 1481, "token_acc": 0.2531208139395487 }, { "epoch": 0.8689533861037819, "grad_norm": 1.1820990083916247, "learning_rate": 0.00026060961313012894, "loss": 3.4105687141418457, "step": 1482, "token_acc": 0.2611815216069455 }, { "epoch": 0.8695397244209909, "grad_norm": 1.0701695103759878, "learning_rate": 0.0002607854630715123, "loss": 3.462088108062744, "step": 1483, "token_acc": 0.2538903665261109 }, { "epoch": 0.8701260627381999, "grad_norm": 1.1792212192386595, "learning_rate": 0.00026096131301289564, "loss": 3.4792885780334473, "step": 1484, "token_acc": 0.25245618275050435 }, { "epoch": 0.8707124010554089, "grad_norm": 0.9898559172407613, "learning_rate": 0.00026113716295427897, "loss": 3.492661952972412, "step": 1485, "token_acc": 0.25025937611398713 }, { "epoch": 0.871298739372618, "grad_norm": 1.212503342664103, "learning_rate": 0.00026131301289566234, "loss": 3.5061206817626953, "step": 1486, "token_acc": 0.24855186662942017 }, { "epoch": 0.871885077689827, "grad_norm": 1.2683829743754083, "learning_rate": 0.00026148886283704567, "loss": 3.4554123878479004, "step": 1487, "token_acc": 0.2552584960599458 }, { "epoch": 0.872471416007036, "grad_norm": 1.1643304855501575, "learning_rate": 0.00026166471277842905, "loss": 3.4975650310516357, "step": 1488, "token_acc": 0.2510860008635983 }, { "epoch": 0.873057754324245, "grad_norm": 1.0068136487743475, "learning_rate": 0.00026184056271981237, "loss": 3.483488082885742, "step": 1489, "token_acc": 0.25227338553748446 }, { "epoch": 0.8736440926414542, "grad_norm": 1.2632821978307662, "learning_rate": 0.00026201641266119575, "loss": 3.4896020889282227, "step": 1490, "token_acc": 0.25088503419267155 }, { "epoch": 0.8742304309586632, "grad_norm": 0.9838168543250088, "learning_rate": 0.0002621922626025791, "loss": 3.4653091430664062, "step": 1491, "token_acc": 0.2544072444359416 }, { "epoch": 0.8748167692758722, "grad_norm": 1.471163288906277, "learning_rate": 0.00026236811254396245, "loss": 3.4483559131622314, "step": 1492, "token_acc": 0.2559266484011058 }, { "epoch": 0.8754031075930812, "grad_norm": 1.101568292463219, "learning_rate": 0.00026254396248534583, "loss": 3.469982862472534, "step": 1493, "token_acc": 0.2517078089365656 }, { "epoch": 0.8759894459102903, "grad_norm": 1.2615179350877874, "learning_rate": 0.00026271981242672915, "loss": 3.4580917358398438, "step": 1494, "token_acc": 0.25682331239959877 }, { "epoch": 0.8765757842274993, "grad_norm": 1.06422370883166, "learning_rate": 0.00026289566236811253, "loss": 3.4955356121063232, "step": 1495, "token_acc": 0.25027668230636135 }, { "epoch": 0.8771621225447083, "grad_norm": 1.2603191881587898, "learning_rate": 0.00026307151230949585, "loss": 3.482748508453369, "step": 1496, "token_acc": 0.2543479033133891 }, { "epoch": 0.8777484608619174, "grad_norm": 0.9783108614081832, "learning_rate": 0.00026324736225087923, "loss": 3.5418219566345215, "step": 1497, "token_acc": 0.2436677761502971 }, { "epoch": 0.8783347991791264, "grad_norm": 1.2429591111490474, "learning_rate": 0.0002634232121922626, "loss": 3.4759297370910645, "step": 1498, "token_acc": 0.2520641356431994 }, { "epoch": 0.8789211374963354, "grad_norm": 1.0581699407986824, "learning_rate": 0.00026359906213364593, "loss": 3.494903326034546, "step": 1499, "token_acc": 0.2509341234861309 }, { "epoch": 0.8795074758135444, "grad_norm": 0.8789727902423904, "learning_rate": 0.0002637749120750293, "loss": 3.4828197956085205, "step": 1500, "token_acc": 0.25208624216644476 }, { "epoch": 0.8800938141307535, "grad_norm": 0.9957855287579016, "learning_rate": 0.00026395076201641264, "loss": 3.466064453125, "step": 1501, "token_acc": 0.2543825885223967 }, { "epoch": 0.8806801524479625, "grad_norm": 1.1784501014555773, "learning_rate": 0.00026412661195779596, "loss": 3.4562320709228516, "step": 1502, "token_acc": 0.25585390534911634 }, { "epoch": 0.8812664907651715, "grad_norm": 1.2146156037236144, "learning_rate": 0.00026430246189917934, "loss": 3.491581439971924, "step": 1503, "token_acc": 0.2509083900809457 }, { "epoch": 0.8818528290823805, "grad_norm": 1.2503935349588382, "learning_rate": 0.00026447831184056266, "loss": 3.500335216522217, "step": 1504, "token_acc": 0.2490078088268899 }, { "epoch": 0.8824391673995896, "grad_norm": 0.808137274840268, "learning_rate": 0.00026465416178194604, "loss": 3.4292941093444824, "step": 1505, "token_acc": 0.2593210806571452 }, { "epoch": 0.8830255057167986, "grad_norm": 1.1391797718055179, "learning_rate": 0.0002648300117233294, "loss": 3.427213191986084, "step": 1506, "token_acc": 0.2594526072285175 }, { "epoch": 0.8836118440340076, "grad_norm": 1.0590003334045317, "learning_rate": 0.00026500586166471274, "loss": 3.5022454261779785, "step": 1507, "token_acc": 0.24925489858827135 }, { "epoch": 0.8841981823512166, "grad_norm": 1.1006071145397718, "learning_rate": 0.0002651817116060961, "loss": 3.477682590484619, "step": 1508, "token_acc": 0.2546573643082078 }, { "epoch": 0.8847845206684257, "grad_norm": 1.3300564427698305, "learning_rate": 0.00026535756154747944, "loss": 3.443197250366211, "step": 1509, "token_acc": 0.2561571114761236 }, { "epoch": 0.8853708589856347, "grad_norm": 1.0665700378584235, "learning_rate": 0.0002655334114888628, "loss": 3.4111695289611816, "step": 1510, "token_acc": 0.263408075437938 }, { "epoch": 0.8859571973028437, "grad_norm": 1.0033152514507064, "learning_rate": 0.00026570926143024614, "loss": 3.500072956085205, "step": 1511, "token_acc": 0.2512841083393881 }, { "epoch": 0.8865435356200527, "grad_norm": 1.1281883883466841, "learning_rate": 0.0002658851113716295, "loss": 3.456676721572876, "step": 1512, "token_acc": 0.2554430868374071 }, { "epoch": 0.8871298739372618, "grad_norm": 1.2163668516851136, "learning_rate": 0.0002660609613130129, "loss": 3.4494264125823975, "step": 1513, "token_acc": 0.25702038132807364 }, { "epoch": 0.8877162122544708, "grad_norm": 0.9904118753122958, "learning_rate": 0.0002662368112543962, "loss": 3.5263233184814453, "step": 1514, "token_acc": 0.24631515893651815 }, { "epoch": 0.8883025505716798, "grad_norm": 1.1719530863935192, "learning_rate": 0.0002664126611957796, "loss": 3.51806902885437, "step": 1515, "token_acc": 0.2499573497832286 }, { "epoch": 0.8888888888888888, "grad_norm": 0.9593989649835573, "learning_rate": 0.0002665885111371629, "loss": 3.4662561416625977, "step": 1516, "token_acc": 0.2530683585629148 }, { "epoch": 0.889475227206098, "grad_norm": 1.197077472172848, "learning_rate": 0.0002667643610785463, "loss": 3.467747449874878, "step": 1517, "token_acc": 0.2523390872657972 }, { "epoch": 0.890061565523307, "grad_norm": 0.933390047550227, "learning_rate": 0.00026694021101992963, "loss": 3.4822099208831787, "step": 1518, "token_acc": 0.25373968487202025 }, { "epoch": 0.890647903840516, "grad_norm": 1.1438148656892082, "learning_rate": 0.000267116060961313, "loss": 3.454476833343506, "step": 1519, "token_acc": 0.2563312180513519 }, { "epoch": 0.891234242157725, "grad_norm": 1.1179665355696513, "learning_rate": 0.00026729191090269633, "loss": 3.4433393478393555, "step": 1520, "token_acc": 0.2554607718712013 }, { "epoch": 0.8918205804749341, "grad_norm": 0.944106697077151, "learning_rate": 0.0002674677608440797, "loss": 3.4730758666992188, "step": 1521, "token_acc": 0.2535616381458714 }, { "epoch": 0.8924069187921431, "grad_norm": 1.1242940117241675, "learning_rate": 0.00026764361078546303, "loss": 3.5014383792877197, "step": 1522, "token_acc": 0.2501024229977295 }, { "epoch": 0.8929932571093521, "grad_norm": 1.056202439618883, "learning_rate": 0.0002678194607268464, "loss": 3.487800121307373, "step": 1523, "token_acc": 0.2504834817899256 }, { "epoch": 0.8935795954265612, "grad_norm": 1.1450304903088528, "learning_rate": 0.00026799531066822973, "loss": 3.4891011714935303, "step": 1524, "token_acc": 0.2487708844090349 }, { "epoch": 0.8941659337437702, "grad_norm": 1.1963699191797805, "learning_rate": 0.0002681711606096131, "loss": 3.471309185028076, "step": 1525, "token_acc": 0.25321512030353027 }, { "epoch": 0.8947522720609792, "grad_norm": 1.2911891429700755, "learning_rate": 0.00026834701055099643, "loss": 3.4688570499420166, "step": 1526, "token_acc": 0.2543395530606078 }, { "epoch": 0.8953386103781882, "grad_norm": 1.3541671300085827, "learning_rate": 0.0002685228604923798, "loss": 3.4217777252197266, "step": 1527, "token_acc": 0.25732142290147436 }, { "epoch": 0.8959249486953973, "grad_norm": 1.0340294987781176, "learning_rate": 0.0002686987104337632, "loss": 3.526620864868164, "step": 1528, "token_acc": 0.24828612247415052 }, { "epoch": 0.8965112870126063, "grad_norm": 1.1843352082829566, "learning_rate": 0.0002688745603751465, "loss": 3.4540655612945557, "step": 1529, "token_acc": 0.2562146528039365 }, { "epoch": 0.8970976253298153, "grad_norm": 0.9312341910820744, "learning_rate": 0.0002690504103165299, "loss": 3.4109597206115723, "step": 1530, "token_acc": 0.26077811972492254 }, { "epoch": 0.8976839636470243, "grad_norm": 1.1389510210835876, "learning_rate": 0.0002692262602579132, "loss": 3.4806084632873535, "step": 1531, "token_acc": 0.25224521258902977 }, { "epoch": 0.8982703019642334, "grad_norm": 1.204245093576224, "learning_rate": 0.0002694021101992966, "loss": 3.506807327270508, "step": 1532, "token_acc": 0.24877111207612038 }, { "epoch": 0.8988566402814424, "grad_norm": 0.9354575749316849, "learning_rate": 0.0002695779601406799, "loss": 3.431272029876709, "step": 1533, "token_acc": 0.2572023602915654 }, { "epoch": 0.8994429785986514, "grad_norm": 1.0973157385520398, "learning_rate": 0.0002697538100820633, "loss": 3.4384636878967285, "step": 1534, "token_acc": 0.2583302555720576 }, { "epoch": 0.9000293169158604, "grad_norm": 1.2470823605175088, "learning_rate": 0.0002699296600234467, "loss": 3.445343494415283, "step": 1535, "token_acc": 0.25776050573665904 }, { "epoch": 0.9006156552330695, "grad_norm": 1.21117306060003, "learning_rate": 0.00027010550996483, "loss": 3.459315538406372, "step": 1536, "token_acc": 0.254273623326833 }, { "epoch": 0.9012019935502785, "grad_norm": 1.2707438527261994, "learning_rate": 0.0002702813599062134, "loss": 3.4351089000701904, "step": 1537, "token_acc": 0.2573327377810262 }, { "epoch": 0.9017883318674875, "grad_norm": 1.0563873238335013, "learning_rate": 0.0002704572098475967, "loss": 3.4426050186157227, "step": 1538, "token_acc": 0.25467843458135725 }, { "epoch": 0.9023746701846965, "grad_norm": 1.301847498796856, "learning_rate": 0.00027063305978898, "loss": 3.4981689453125, "step": 1539, "token_acc": 0.2497133292035165 }, { "epoch": 0.9029610085019056, "grad_norm": 0.9476036329108056, "learning_rate": 0.0002708089097303634, "loss": 3.442716121673584, "step": 1540, "token_acc": 0.2575194305277856 }, { "epoch": 0.9035473468191146, "grad_norm": 1.0257906465617297, "learning_rate": 0.0002709847596717467, "loss": 3.4262518882751465, "step": 1541, "token_acc": 0.25883133784487244 }, { "epoch": 0.9041336851363236, "grad_norm": 1.3430203391119773, "learning_rate": 0.0002711606096131301, "loss": 3.4443607330322266, "step": 1542, "token_acc": 0.2563439717168789 }, { "epoch": 0.9047200234535326, "grad_norm": 0.997431049613658, "learning_rate": 0.0002713364595545134, "loss": 3.441514015197754, "step": 1543, "token_acc": 0.25662779507196354 }, { "epoch": 0.9053063617707418, "grad_norm": 1.4781970651884355, "learning_rate": 0.0002715123094958968, "loss": 3.4904818534851074, "step": 1544, "token_acc": 0.25204524014495816 }, { "epoch": 0.9058927000879508, "grad_norm": 0.898564746248468, "learning_rate": 0.0002716881594372802, "loss": 3.405942916870117, "step": 1545, "token_acc": 0.2619123811122594 }, { "epoch": 0.9064790384051598, "grad_norm": 1.2525403228685923, "learning_rate": 0.0002718640093786635, "loss": 3.3839802742004395, "step": 1546, "token_acc": 0.26291074945578935 }, { "epoch": 0.9070653767223688, "grad_norm": 1.1600874729832094, "learning_rate": 0.0002720398593200469, "loss": 3.4191513061523438, "step": 1547, "token_acc": 0.260927032794369 }, { "epoch": 0.9076517150395779, "grad_norm": 1.1537609840819099, "learning_rate": 0.0002722157092614302, "loss": 3.491884231567383, "step": 1548, "token_acc": 0.2488000781037672 }, { "epoch": 0.9082380533567869, "grad_norm": 0.9037660294633072, "learning_rate": 0.0002723915592028136, "loss": 3.4326887130737305, "step": 1549, "token_acc": 0.25638336083622676 }, { "epoch": 0.9088243916739959, "grad_norm": 0.9686272469080168, "learning_rate": 0.0002725674091441969, "loss": 3.4177675247192383, "step": 1550, "token_acc": 0.26115206565009647 }, { "epoch": 0.909410729991205, "grad_norm": 0.9481275062618324, "learning_rate": 0.0002727432590855803, "loss": 3.45444393157959, "step": 1551, "token_acc": 0.25564734718585236 }, { "epoch": 0.909997068308414, "grad_norm": 1.129928778529221, "learning_rate": 0.00027291910902696367, "loss": 3.4867782592773438, "step": 1552, "token_acc": 0.250120702029099 }, { "epoch": 0.910583406625623, "grad_norm": 1.1799087427895225, "learning_rate": 0.000273094958968347, "loss": 3.4504122734069824, "step": 1553, "token_acc": 0.25701350966743186 }, { "epoch": 0.911169744942832, "grad_norm": 1.1453521579593329, "learning_rate": 0.00027327080890973037, "loss": 3.485167980194092, "step": 1554, "token_acc": 0.2528238922868001 }, { "epoch": 0.9117560832600411, "grad_norm": 1.0363219830339416, "learning_rate": 0.0002734466588511137, "loss": 3.503322124481201, "step": 1555, "token_acc": 0.24887578345660666 }, { "epoch": 0.9123424215772501, "grad_norm": 1.1860029539500596, "learning_rate": 0.000273622508792497, "loss": 3.515435218811035, "step": 1556, "token_acc": 0.24652886453957976 }, { "epoch": 0.9129287598944591, "grad_norm": 1.0213078858930382, "learning_rate": 0.0002737983587338804, "loss": 3.53326153755188, "step": 1557, "token_acc": 0.24519392484369182 }, { "epoch": 0.9135150982116681, "grad_norm": 1.0297035281757607, "learning_rate": 0.0002739742086752637, "loss": 3.4802563190460205, "step": 1558, "token_acc": 0.2525334820568045 }, { "epoch": 0.9141014365288772, "grad_norm": 1.1029739860383216, "learning_rate": 0.0002741500586166471, "loss": 3.4929981231689453, "step": 1559, "token_acc": 0.24966556494526077 }, { "epoch": 0.9146877748460862, "grad_norm": 0.9430169525700198, "learning_rate": 0.0002743259085580305, "loss": 3.467041492462158, "step": 1560, "token_acc": 0.2545945973305529 }, { "epoch": 0.9152741131632952, "grad_norm": 1.2943235615356024, "learning_rate": 0.0002745017584994138, "loss": 3.38330340385437, "step": 1561, "token_acc": 0.2645694722901279 }, { "epoch": 0.9158604514805042, "grad_norm": 0.9078577818826634, "learning_rate": 0.0002746776084407972, "loss": 3.4435367584228516, "step": 1562, "token_acc": 0.25605095205164885 }, { "epoch": 0.9164467897977133, "grad_norm": 1.0582256931438696, "learning_rate": 0.0002748534583821805, "loss": 3.4161782264709473, "step": 1563, "token_acc": 0.261538744389201 }, { "epoch": 0.9170331281149223, "grad_norm": 0.9571577507235014, "learning_rate": 0.0002750293083235639, "loss": 3.3922221660614014, "step": 1564, "token_acc": 0.26461262143024716 }, { "epoch": 0.9176194664321313, "grad_norm": 1.1759342304557798, "learning_rate": 0.0002752051582649472, "loss": 3.4620676040649414, "step": 1565, "token_acc": 0.2534643031075604 }, { "epoch": 0.9182058047493403, "grad_norm": 1.1334047859869674, "learning_rate": 0.0002753810082063306, "loss": 3.427372694015503, "step": 1566, "token_acc": 0.2580983700096032 }, { "epoch": 0.9187921430665494, "grad_norm": 1.3161157829942292, "learning_rate": 0.00027555685814771396, "loss": 3.5011913776397705, "step": 1567, "token_acc": 0.24847892459419685 }, { "epoch": 0.9193784813837584, "grad_norm": 1.0831452504834898, "learning_rate": 0.0002757327080890973, "loss": 3.4381520748138428, "step": 1568, "token_acc": 0.25972817126945436 }, { "epoch": 0.9199648197009674, "grad_norm": 1.0470354314887476, "learning_rate": 0.00027590855803048066, "loss": 3.4123809337615967, "step": 1569, "token_acc": 0.2594358897434969 }, { "epoch": 0.9205511580181764, "grad_norm": 1.0322005775210283, "learning_rate": 0.000276084407971864, "loss": 3.4311747550964355, "step": 1570, "token_acc": 0.25962873549108667 }, { "epoch": 0.9211374963353856, "grad_norm": 1.3650104051929608, "learning_rate": 0.00027626025791324736, "loss": 3.44868803024292, "step": 1571, "token_acc": 0.25470813620129956 }, { "epoch": 0.9217238346525946, "grad_norm": 1.0078466222217648, "learning_rate": 0.0002764361078546307, "loss": 3.454195976257324, "step": 1572, "token_acc": 0.25561077530713333 }, { "epoch": 0.9223101729698036, "grad_norm": 1.1215704139131795, "learning_rate": 0.000276611957796014, "loss": 3.435854911804199, "step": 1573, "token_acc": 0.2560089361284383 }, { "epoch": 0.9228965112870126, "grad_norm": 1.1436980541290278, "learning_rate": 0.0002767878077373974, "loss": 3.4624428749084473, "step": 1574, "token_acc": 0.2537039373891008 }, { "epoch": 0.9234828496042217, "grad_norm": 1.213858884230976, "learning_rate": 0.00027696365767878076, "loss": 3.419650077819824, "step": 1575, "token_acc": 0.25872801152678004 }, { "epoch": 0.9240691879214307, "grad_norm": 0.9928902206662252, "learning_rate": 0.0002771395076201641, "loss": 3.447695016860962, "step": 1576, "token_acc": 0.25604969877128303 }, { "epoch": 0.9246555262386397, "grad_norm": 1.1170179466519534, "learning_rate": 0.00027731535756154747, "loss": 3.4231395721435547, "step": 1577, "token_acc": 0.2588927730986735 }, { "epoch": 0.9252418645558487, "grad_norm": 0.8570173868715074, "learning_rate": 0.0002774912075029308, "loss": 3.5291895866394043, "step": 1578, "token_acc": 0.24584225128814902 }, { "epoch": 0.9258282028730578, "grad_norm": 0.9676214841379986, "learning_rate": 0.00027766705744431417, "loss": 3.414877414703369, "step": 1579, "token_acc": 0.2584987040285545 }, { "epoch": 0.9264145411902668, "grad_norm": 1.1782797758324282, "learning_rate": 0.0002778429073856975, "loss": 3.4611570835113525, "step": 1580, "token_acc": 0.2526968036408508 }, { "epoch": 0.9270008795074758, "grad_norm": 0.8791078117414856, "learning_rate": 0.00027801875732708087, "loss": 3.416090965270996, "step": 1581, "token_acc": 0.25959511297139976 }, { "epoch": 0.9275872178246849, "grad_norm": 0.91160368854889, "learning_rate": 0.00027819460726846425, "loss": 3.4486334323883057, "step": 1582, "token_acc": 0.2559706543219903 }, { "epoch": 0.9281735561418939, "grad_norm": 1.04591211316074, "learning_rate": 0.00027837045720984757, "loss": 3.436034679412842, "step": 1583, "token_acc": 0.2574375156626383 }, { "epoch": 0.9287598944591029, "grad_norm": 0.9027223087739822, "learning_rate": 0.00027854630715123095, "loss": 3.4619922637939453, "step": 1584, "token_acc": 0.25507177510212525 }, { "epoch": 0.9293462327763119, "grad_norm": 1.0490646679277922, "learning_rate": 0.00027872215709261427, "loss": 3.4477930068969727, "step": 1585, "token_acc": 0.2542764857881137 }, { "epoch": 0.929932571093521, "grad_norm": 0.9890043199810226, "learning_rate": 0.00027889800703399765, "loss": 3.4564199447631836, "step": 1586, "token_acc": 0.2542775148070373 }, { "epoch": 0.93051890941073, "grad_norm": 1.114124375382977, "learning_rate": 0.000279073856975381, "loss": 3.4412851333618164, "step": 1587, "token_acc": 0.2556504217042669 }, { "epoch": 0.931105247727939, "grad_norm": 1.2382464201645553, "learning_rate": 0.00027924970691676435, "loss": 3.4975357055664062, "step": 1588, "token_acc": 0.24976999841018732 }, { "epoch": 0.931691586045148, "grad_norm": 1.0018365063275843, "learning_rate": 0.00027942555685814773, "loss": 3.4665846824645996, "step": 1589, "token_acc": 0.25362675646197996 }, { "epoch": 0.9322779243623571, "grad_norm": 1.0706134072461655, "learning_rate": 0.00027960140679953105, "loss": 3.4295220375061035, "step": 1590, "token_acc": 0.2591023108282688 }, { "epoch": 0.9328642626795661, "grad_norm": 1.1521618515120322, "learning_rate": 0.0002797772567409144, "loss": 3.4484362602233887, "step": 1591, "token_acc": 0.2554142581888247 }, { "epoch": 0.9334506009967751, "grad_norm": 1.0522691885661983, "learning_rate": 0.00027995310668229776, "loss": 3.455878496170044, "step": 1592, "token_acc": 0.25255059935216323 }, { "epoch": 0.9340369393139841, "grad_norm": 0.8390656539948494, "learning_rate": 0.0002801289566236811, "loss": 3.419095039367676, "step": 1593, "token_acc": 0.26043699919502916 }, { "epoch": 0.9346232776311932, "grad_norm": 1.0104283054265661, "learning_rate": 0.00028030480656506446, "loss": 3.393857002258301, "step": 1594, "token_acc": 0.2604841721112415 }, { "epoch": 0.9352096159484022, "grad_norm": 1.273394420912752, "learning_rate": 0.0002804806565064478, "loss": 3.4570064544677734, "step": 1595, "token_acc": 0.2550111272425326 }, { "epoch": 0.9357959542656112, "grad_norm": 0.8500555303463819, "learning_rate": 0.00028065650644783116, "loss": 3.3855323791503906, "step": 1596, "token_acc": 0.26343645660142206 }, { "epoch": 0.9363822925828202, "grad_norm": 0.9823160060984943, "learning_rate": 0.0002808323563892145, "loss": 3.4690091609954834, "step": 1597, "token_acc": 0.25358774294978353 }, { "epoch": 0.9369686309000294, "grad_norm": 1.0324244245829983, "learning_rate": 0.00028100820633059786, "loss": 3.4059247970581055, "step": 1598, "token_acc": 0.25973350998798966 }, { "epoch": 0.9375549692172384, "grad_norm": 1.1353441800518547, "learning_rate": 0.00028118405627198124, "loss": 3.423762321472168, "step": 1599, "token_acc": 0.25978963105344655 }, { "epoch": 0.9381413075344474, "grad_norm": 0.9101082160968709, "learning_rate": 0.00028135990621336456, "loss": 3.438351631164551, "step": 1600, "token_acc": 0.25616560754659295 }, { "epoch": 0.9387276458516564, "grad_norm": 0.8532083561356912, "learning_rate": 0.00028153575615474794, "loss": 3.45070743560791, "step": 1601, "token_acc": 0.25418973383457893 }, { "epoch": 0.9393139841688655, "grad_norm": 0.9842250500653741, "learning_rate": 0.00028171160609613126, "loss": 3.3930931091308594, "step": 1602, "token_acc": 0.26322528928287 }, { "epoch": 0.9399003224860745, "grad_norm": 1.1484976018054966, "learning_rate": 0.00028188745603751464, "loss": 3.463414192199707, "step": 1603, "token_acc": 0.2519882191284152 }, { "epoch": 0.9404866608032835, "grad_norm": 1.0558181128485224, "learning_rate": 0.00028206330597889797, "loss": 3.4456119537353516, "step": 1604, "token_acc": 0.2534280556385314 }, { "epoch": 0.9410729991204925, "grad_norm": 1.395649076515183, "learning_rate": 0.00028223915592028134, "loss": 3.5010666847229004, "step": 1605, "token_acc": 0.2490527628416582 }, { "epoch": 0.9416593374377016, "grad_norm": 1.1699134005984961, "learning_rate": 0.0002824150058616647, "loss": 3.485076904296875, "step": 1606, "token_acc": 0.2505359893426585 }, { "epoch": 0.9422456757549106, "grad_norm": 1.0050844548368318, "learning_rate": 0.00028259085580304805, "loss": 3.414064884185791, "step": 1607, "token_acc": 0.2585769322525138 }, { "epoch": 0.9428320140721196, "grad_norm": 1.0017520470621288, "learning_rate": 0.0002827667057444314, "loss": 3.426929473876953, "step": 1608, "token_acc": 0.25771782568771745 }, { "epoch": 0.9434183523893287, "grad_norm": 1.0464793362631202, "learning_rate": 0.00028294255568581475, "loss": 3.459712505340576, "step": 1609, "token_acc": 0.2544558884700207 }, { "epoch": 0.9440046907065377, "grad_norm": 0.8884348039241726, "learning_rate": 0.00028311840562719807, "loss": 3.4620704650878906, "step": 1610, "token_acc": 0.25350297036689257 }, { "epoch": 0.9445910290237467, "grad_norm": 1.151615454096963, "learning_rate": 0.00028329425556858145, "loss": 3.430426597595215, "step": 1611, "token_acc": 0.25567336001852525 }, { "epoch": 0.9451773673409557, "grad_norm": 1.1225200597210718, "learning_rate": 0.0002834701055099648, "loss": 3.4575791358947754, "step": 1612, "token_acc": 0.25634345896767924 }, { "epoch": 0.9457637056581648, "grad_norm": 1.056428813670255, "learning_rate": 0.00028364595545134815, "loss": 3.4641103744506836, "step": 1613, "token_acc": 0.25412191202893847 }, { "epoch": 0.9463500439753738, "grad_norm": 1.2067914765995427, "learning_rate": 0.00028382180539273153, "loss": 3.397404193878174, "step": 1614, "token_acc": 0.2611007607514361 }, { "epoch": 0.9469363822925828, "grad_norm": 0.8468931035795969, "learning_rate": 0.00028399765533411485, "loss": 3.431407928466797, "step": 1615, "token_acc": 0.2580003772280029 }, { "epoch": 0.9475227206097918, "grad_norm": 0.6781329738531857, "learning_rate": 0.00028417350527549823, "loss": 3.418238401412964, "step": 1616, "token_acc": 0.25974239546751826 }, { "epoch": 0.9481090589270009, "grad_norm": 0.8887112611673056, "learning_rate": 0.00028434935521688156, "loss": 3.4343841075897217, "step": 1617, "token_acc": 0.25546847490903657 }, { "epoch": 0.9486953972442099, "grad_norm": 0.9606695431904696, "learning_rate": 0.00028452520515826493, "loss": 3.4573464393615723, "step": 1618, "token_acc": 0.2544670120243435 }, { "epoch": 0.9492817355614189, "grad_norm": 0.9466212276822745, "learning_rate": 0.00028470105509964826, "loss": 3.428793430328369, "step": 1619, "token_acc": 0.2570824747714905 }, { "epoch": 0.9498680738786279, "grad_norm": 1.247266489564097, "learning_rate": 0.00028487690504103163, "loss": 3.483790636062622, "step": 1620, "token_acc": 0.2524136429156599 }, { "epoch": 0.950454412195837, "grad_norm": 0.9881453608434886, "learning_rate": 0.000285052754982415, "loss": 3.4680538177490234, "step": 1621, "token_acc": 0.25247324958089284 }, { "epoch": 0.951040750513046, "grad_norm": 1.2373340402330901, "learning_rate": 0.00028522860492379834, "loss": 3.401545763015747, "step": 1622, "token_acc": 0.25966873933282625 }, { "epoch": 0.951627088830255, "grad_norm": 1.0211834418326686, "learning_rate": 0.0002854044548651817, "loss": 3.4372189044952393, "step": 1623, "token_acc": 0.25602884016106214 }, { "epoch": 0.952213427147464, "grad_norm": 1.0192162955522874, "learning_rate": 0.00028558030480656504, "loss": 3.450834274291992, "step": 1624, "token_acc": 0.25508543961123 }, { "epoch": 0.9527997654646732, "grad_norm": 1.1325298124281291, "learning_rate": 0.0002857561547479484, "loss": 3.4558000564575195, "step": 1625, "token_acc": 0.25267409347121744 }, { "epoch": 0.9533861037818822, "grad_norm": 1.099952880945093, "learning_rate": 0.00028593200468933174, "loss": 3.42177152633667, "step": 1626, "token_acc": 0.258832141675274 }, { "epoch": 0.9539724420990912, "grad_norm": 0.8946557370056859, "learning_rate": 0.00028610785463071506, "loss": 3.3207759857177734, "step": 1627, "token_acc": 0.2713037641456994 }, { "epoch": 0.9545587804163002, "grad_norm": 0.8901828893814432, "learning_rate": 0.00028628370457209844, "loss": 3.427022695541382, "step": 1628, "token_acc": 0.2590191883936757 }, { "epoch": 0.9551451187335093, "grad_norm": 1.1268292660227681, "learning_rate": 0.00028645955451348177, "loss": 3.4051380157470703, "step": 1629, "token_acc": 0.2599669184799224 }, { "epoch": 0.9557314570507183, "grad_norm": 1.0518598816999978, "learning_rate": 0.00028663540445486514, "loss": 3.4638655185699463, "step": 1630, "token_acc": 0.2531246365164797 }, { "epoch": 0.9563177953679273, "grad_norm": 1.0118229097954303, "learning_rate": 0.0002868112543962485, "loss": 3.407078266143799, "step": 1631, "token_acc": 0.2619734458121537 }, { "epoch": 0.9569041336851363, "grad_norm": 1.0145122324754978, "learning_rate": 0.00028698710433763185, "loss": 3.438203811645508, "step": 1632, "token_acc": 0.25449237586341716 }, { "epoch": 0.9574904720023454, "grad_norm": 1.2528340922136278, "learning_rate": 0.0002871629542790152, "loss": 3.444272518157959, "step": 1633, "token_acc": 0.25588460659506107 }, { "epoch": 0.9580768103195544, "grad_norm": 0.9964204749519147, "learning_rate": 0.00028733880422039855, "loss": 3.477262258529663, "step": 1634, "token_acc": 0.2513322627498312 }, { "epoch": 0.9586631486367634, "grad_norm": 1.1931086643729485, "learning_rate": 0.0002875146541617819, "loss": 3.4402215480804443, "step": 1635, "token_acc": 0.25753290746181273 }, { "epoch": 0.9592494869539725, "grad_norm": 0.8850779833023668, "learning_rate": 0.00028769050410316525, "loss": 3.4351909160614014, "step": 1636, "token_acc": 0.2557032609064188 }, { "epoch": 0.9598358252711815, "grad_norm": 0.8319326232433024, "learning_rate": 0.0002878663540445486, "loss": 3.412260055541992, "step": 1637, "token_acc": 0.26149460866382074 }, { "epoch": 0.9604221635883905, "grad_norm": 0.9458665141032183, "learning_rate": 0.000288042203985932, "loss": 3.4018025398254395, "step": 1638, "token_acc": 0.259475120000833 }, { "epoch": 0.9610085019055995, "grad_norm": 1.0007726259310146, "learning_rate": 0.00028821805392731533, "loss": 3.47475528717041, "step": 1639, "token_acc": 0.25281027579492144 }, { "epoch": 0.9615948402228086, "grad_norm": 1.0352169379895728, "learning_rate": 0.0002883939038686987, "loss": 3.4306507110595703, "step": 1640, "token_acc": 0.2567856643223238 }, { "epoch": 0.9621811785400176, "grad_norm": 1.0827222011867332, "learning_rate": 0.00028856975381008203, "loss": 3.4324893951416016, "step": 1641, "token_acc": 0.2558575349720682 }, { "epoch": 0.9627675168572266, "grad_norm": 1.1095957533359941, "learning_rate": 0.0002887456037514654, "loss": 3.485936164855957, "step": 1642, "token_acc": 0.24930181553582134 }, { "epoch": 0.9633538551744356, "grad_norm": 1.1520469133687614, "learning_rate": 0.00028892145369284873, "loss": 3.506608009338379, "step": 1643, "token_acc": 0.24794244891212808 }, { "epoch": 0.9639401934916447, "grad_norm": 0.8331536357057102, "learning_rate": 0.00028909730363423206, "loss": 3.452690601348877, "step": 1644, "token_acc": 0.2532282272813269 }, { "epoch": 0.9645265318088537, "grad_norm": 1.1806714446698217, "learning_rate": 0.00028927315357561543, "loss": 3.4091579914093018, "step": 1645, "token_acc": 0.25954991374608205 }, { "epoch": 0.9651128701260627, "grad_norm": 1.1003504159123314, "learning_rate": 0.0002894490035169988, "loss": 3.449368715286255, "step": 1646, "token_acc": 0.2527903231542209 }, { "epoch": 0.9656992084432717, "grad_norm": 0.9521412616048038, "learning_rate": 0.00028962485345838214, "loss": 3.372286558151245, "step": 1647, "token_acc": 0.2640550807217474 }, { "epoch": 0.9662855467604808, "grad_norm": 0.9085826341092487, "learning_rate": 0.0002898007033997655, "loss": 3.4189882278442383, "step": 1648, "token_acc": 0.259935453412688 }, { "epoch": 0.9668718850776898, "grad_norm": 0.8411084758914664, "learning_rate": 0.00028997655334114884, "loss": 3.4541051387786865, "step": 1649, "token_acc": 0.2536862395053104 }, { "epoch": 0.9674582233948988, "grad_norm": 0.9785933359551604, "learning_rate": 0.0002901524032825322, "loss": 3.3913700580596924, "step": 1650, "token_acc": 0.26226255371691387 }, { "epoch": 0.9680445617121078, "grad_norm": 1.1455793192318853, "learning_rate": 0.00029032825322391554, "loss": 3.462675094604492, "step": 1651, "token_acc": 0.2526551920618869 }, { "epoch": 0.968630900029317, "grad_norm": 0.7993899416606041, "learning_rate": 0.0002905041031652989, "loss": 3.3909566402435303, "step": 1652, "token_acc": 0.2631858142732802 }, { "epoch": 0.969217238346526, "grad_norm": 0.875426702268605, "learning_rate": 0.0002906799531066823, "loss": 3.4527156352996826, "step": 1653, "token_acc": 0.2534200314845774 }, { "epoch": 0.969803576663735, "grad_norm": 1.024280615064505, "learning_rate": 0.0002908558030480656, "loss": 3.4014289379119873, "step": 1654, "token_acc": 0.2612895972625327 }, { "epoch": 0.970389914980944, "grad_norm": 0.9195103456149949, "learning_rate": 0.000291031652989449, "loss": 3.4070944786071777, "step": 1655, "token_acc": 0.2599360876147802 }, { "epoch": 0.9709762532981531, "grad_norm": 1.2107313775202413, "learning_rate": 0.0002912075029308323, "loss": 3.4211158752441406, "step": 1656, "token_acc": 0.25745008537943737 }, { "epoch": 0.9715625916153621, "grad_norm": 0.9604788197685252, "learning_rate": 0.0002913833528722157, "loss": 3.3969945907592773, "step": 1657, "token_acc": 0.2608897465317618 }, { "epoch": 0.9721489299325711, "grad_norm": 0.9546786830863327, "learning_rate": 0.000291559202813599, "loss": 3.439542770385742, "step": 1658, "token_acc": 0.2548316751753569 }, { "epoch": 0.9727352682497801, "grad_norm": 1.1081209240364918, "learning_rate": 0.0002917350527549824, "loss": 3.4130752086639404, "step": 1659, "token_acc": 0.2589106143792215 }, { "epoch": 0.9733216065669892, "grad_norm": 0.9994860287529672, "learning_rate": 0.0002919109026963658, "loss": 3.4299957752227783, "step": 1660, "token_acc": 0.25488947319741556 }, { "epoch": 0.9739079448841982, "grad_norm": 0.8782120536730222, "learning_rate": 0.0002920867526377491, "loss": 3.405499219894409, "step": 1661, "token_acc": 0.2595201857750302 }, { "epoch": 0.9744942832014072, "grad_norm": 0.9933984036179534, "learning_rate": 0.0002922626025791324, "loss": 3.397000312805176, "step": 1662, "token_acc": 0.25998143486553654 }, { "epoch": 0.9750806215186162, "grad_norm": 1.1258193437404802, "learning_rate": 0.0002924384525205158, "loss": 3.431743860244751, "step": 1663, "token_acc": 0.25656518648453763 }, { "epoch": 0.9756669598358253, "grad_norm": 1.1338615165102073, "learning_rate": 0.00029261430246189913, "loss": 3.479358434677124, "step": 1664, "token_acc": 0.25233220663895994 }, { "epoch": 0.9762532981530343, "grad_norm": 1.0556484863970759, "learning_rate": 0.0002927901524032825, "loss": 3.420722007751465, "step": 1665, "token_acc": 0.25751524015376054 }, { "epoch": 0.9768396364702433, "grad_norm": 0.8963897877390183, "learning_rate": 0.00029296600234466583, "loss": 3.443148136138916, "step": 1666, "token_acc": 0.25492007068033706 }, { "epoch": 0.9774259747874524, "grad_norm": 0.9650260165038219, "learning_rate": 0.0002931418522860492, "loss": 3.41599702835083, "step": 1667, "token_acc": 0.25878221038451804 }, { "epoch": 0.9780123131046614, "grad_norm": 0.917491397482388, "learning_rate": 0.0002933177022274326, "loss": 3.41109037399292, "step": 1668, "token_acc": 0.2582422657423119 }, { "epoch": 0.9785986514218704, "grad_norm": 1.0246960695630682, "learning_rate": 0.0002934935521688159, "loss": 3.4457051753997803, "step": 1669, "token_acc": 0.2550188870884565 }, { "epoch": 0.9791849897390794, "grad_norm": 1.1251563619561569, "learning_rate": 0.0002936694021101993, "loss": 3.450312614440918, "step": 1670, "token_acc": 0.25354109093134003 }, { "epoch": 0.9797713280562885, "grad_norm": 0.8059031273350623, "learning_rate": 0.0002938452520515826, "loss": 3.352806568145752, "step": 1671, "token_acc": 0.2665452773984363 }, { "epoch": 0.9803576663734975, "grad_norm": 0.9693959222114189, "learning_rate": 0.000294021101992966, "loss": 3.464014768600464, "step": 1672, "token_acc": 0.2524639658217273 }, { "epoch": 0.9809440046907065, "grad_norm": 1.0537377134456585, "learning_rate": 0.0002941969519343493, "loss": 3.4342567920684814, "step": 1673, "token_acc": 0.2546251981106909 }, { "epoch": 0.9815303430079155, "grad_norm": 0.8635266937934581, "learning_rate": 0.0002943728018757327, "loss": 3.4285411834716797, "step": 1674, "token_acc": 0.2589354350273712 }, { "epoch": 0.9821166813251246, "grad_norm": 0.9324102609728363, "learning_rate": 0.00029454865181711607, "loss": 3.4739909172058105, "step": 1675, "token_acc": 0.2521764907791681 }, { "epoch": 0.9827030196423336, "grad_norm": 0.8812169433582375, "learning_rate": 0.0002947245017584994, "loss": 3.38845157623291, "step": 1676, "token_acc": 0.2618837295643379 }, { "epoch": 0.9832893579595426, "grad_norm": 0.9133348923682535, "learning_rate": 0.00029490035169988277, "loss": 3.4450371265411377, "step": 1677, "token_acc": 0.25328833461679595 }, { "epoch": 0.9838756962767516, "grad_norm": 0.9242693428256067, "learning_rate": 0.0002950762016412661, "loss": 3.441045045852661, "step": 1678, "token_acc": 0.25596900947650514 }, { "epoch": 0.9844620345939608, "grad_norm": 0.8511730515132478, "learning_rate": 0.00029525205158264947, "loss": 3.4234378337860107, "step": 1679, "token_acc": 0.2579017299923499 }, { "epoch": 0.9850483729111698, "grad_norm": 0.7143281472029915, "learning_rate": 0.0002954279015240328, "loss": 3.4041244983673096, "step": 1680, "token_acc": 0.2593249099357803 }, { "epoch": 0.9856347112283788, "grad_norm": 0.712568877758948, "learning_rate": 0.0002956037514654161, "loss": 3.4403133392333984, "step": 1681, "token_acc": 0.25675481053611565 }, { "epoch": 0.9862210495455878, "grad_norm": 0.862445178621824, "learning_rate": 0.0002957796014067995, "loss": 3.3996949195861816, "step": 1682, "token_acc": 0.2594488686638857 }, { "epoch": 0.9868073878627969, "grad_norm": 0.9530327210573329, "learning_rate": 0.0002959554513481828, "loss": 3.453869104385376, "step": 1683, "token_acc": 0.25481199071028715 }, { "epoch": 0.9873937261800059, "grad_norm": 1.065627650859129, "learning_rate": 0.0002961313012895662, "loss": 3.4256539344787598, "step": 1684, "token_acc": 0.2574449525113286 }, { "epoch": 0.9879800644972149, "grad_norm": 1.0511687294908194, "learning_rate": 0.0002963071512309496, "loss": 3.4245340824127197, "step": 1685, "token_acc": 0.2563528258738974 }, { "epoch": 0.9885664028144239, "grad_norm": 0.8970258445831942, "learning_rate": 0.0002964830011723329, "loss": 3.4051733016967773, "step": 1686, "token_acc": 0.2606317553235485 }, { "epoch": 0.989152741131633, "grad_norm": 1.1716050343225166, "learning_rate": 0.0002966588511137163, "loss": 3.3879857063293457, "step": 1687, "token_acc": 0.2617099571079236 }, { "epoch": 0.989739079448842, "grad_norm": 0.9696488751938213, "learning_rate": 0.0002968347010550996, "loss": 3.4367923736572266, "step": 1688, "token_acc": 0.25598060517837457 }, { "epoch": 0.990325417766051, "grad_norm": 0.9268558263660351, "learning_rate": 0.000297010550996483, "loss": 3.436521053314209, "step": 1689, "token_acc": 0.25622815233019824 }, { "epoch": 0.99091175608326, "grad_norm": 0.7939532103834785, "learning_rate": 0.0002971864009378663, "loss": 3.399738073348999, "step": 1690, "token_acc": 0.26121320531228526 }, { "epoch": 0.9914980944004691, "grad_norm": 0.7943310120893513, "learning_rate": 0.0002973622508792497, "loss": 3.4149017333984375, "step": 1691, "token_acc": 0.25951125415413 }, { "epoch": 0.9920844327176781, "grad_norm": 0.8748408720329571, "learning_rate": 0.00029753810082063306, "loss": 3.45902156829834, "step": 1692, "token_acc": 0.2526114892472697 }, { "epoch": 0.9926707710348871, "grad_norm": 1.1103174216671499, "learning_rate": 0.0002977139507620164, "loss": 3.4256043434143066, "step": 1693, "token_acc": 0.2564982095258609 }, { "epoch": 0.9932571093520962, "grad_norm": 1.2286517604869127, "learning_rate": 0.00029788980070339976, "loss": 3.4395804405212402, "step": 1694, "token_acc": 0.25493543139971436 }, { "epoch": 0.9938434476693052, "grad_norm": 1.0210072953526748, "learning_rate": 0.0002980656506447831, "loss": 3.4143528938293457, "step": 1695, "token_acc": 0.2597158684178792 }, { "epoch": 0.9944297859865142, "grad_norm": 0.9802514881528891, "learning_rate": 0.00029824150058616646, "loss": 3.375446319580078, "step": 1696, "token_acc": 0.2642880572615679 }, { "epoch": 0.9950161243037232, "grad_norm": 0.9943322618687697, "learning_rate": 0.0002984173505275498, "loss": 3.421415328979492, "step": 1697, "token_acc": 0.25697515562022133 }, { "epoch": 0.9956024626209323, "grad_norm": 1.0951892710373659, "learning_rate": 0.0002985932004689331, "loss": 3.4430785179138184, "step": 1698, "token_acc": 0.2549121346846752 }, { "epoch": 0.9961888009381413, "grad_norm": 0.94806515266285, "learning_rate": 0.0002987690504103165, "loss": 3.436896800994873, "step": 1699, "token_acc": 0.2558370881853824 }, { "epoch": 0.9967751392553503, "grad_norm": 0.9653025378040253, "learning_rate": 0.00029894490035169987, "loss": 3.3707313537597656, "step": 1700, "token_acc": 0.2640488333377375 }, { "epoch": 0.9973614775725593, "grad_norm": 1.155369457124877, "learning_rate": 0.0002991207502930832, "loss": 3.3923587799072266, "step": 1701, "token_acc": 0.26086674199036375 }, { "epoch": 0.9979478158897684, "grad_norm": 1.2166474726195073, "learning_rate": 0.00029929660023446657, "loss": 3.393880844116211, "step": 1702, "token_acc": 0.2593271936556664 }, { "epoch": 0.9985341542069774, "grad_norm": 0.7563388412989023, "learning_rate": 0.0002994724501758499, "loss": 3.429440975189209, "step": 1703, "token_acc": 0.2594440978720455 }, { "epoch": 0.9991204925241864, "grad_norm": 0.756823285765525, "learning_rate": 0.00029964830011723327, "loss": 3.382394313812256, "step": 1704, "token_acc": 0.26312419476509824 }, { "epoch": 0.9997068308413954, "grad_norm": 0.8732197525299397, "learning_rate": 0.0002998241500586166, "loss": 3.435934066772461, "step": 1705, "token_acc": 0.2552384780278671 }, { "epoch": 1.0, "grad_norm": 0.9633250349032103, "learning_rate": 0.0003, "loss": 3.3410556316375732, "step": 1706, "token_acc": 0.27049430741970304 }, { "epoch": 1.0, "eval_loss": 3.3927879333496094, "eval_runtime": 8.222, "eval_samples_per_second": 31.136, "eval_steps_per_second": 3.892, "eval_token_acc": 0.26011028975955314, "step": 1706 }, { "epoch": 1.0005863383172091, "grad_norm": 1.0278859598089938, "learning_rate": 0.000299999999295476, "loss": 3.4180030822753906, "step": 1707, "token_acc": 0.25876622434251667 }, { "epoch": 1.001172676634418, "grad_norm": 1.0864264429910573, "learning_rate": 0.00029999999718190423, "loss": 3.3736886978149414, "step": 1708, "token_acc": 0.2637199628222097 }, { "epoch": 1.0017590149516271, "grad_norm": 0.9769255217125332, "learning_rate": 0.0002999999936592846, "loss": 3.409825325012207, "step": 1709, "token_acc": 0.25942727451812764 }, { "epoch": 1.0023453532688362, "grad_norm": 0.8333681661466221, "learning_rate": 0.00029999998872761715, "loss": 3.37786602973938, "step": 1710, "token_acc": 0.2628836363454688 }, { "epoch": 1.0029316915860451, "grad_norm": 0.8084953482684752, "learning_rate": 0.0002999999823869019, "loss": 3.392746925354004, "step": 1711, "token_acc": 0.26052477321027856 }, { "epoch": 1.0035180299032542, "grad_norm": 0.9261746325531222, "learning_rate": 0.00029999997463713897, "loss": 3.4910969734191895, "step": 1712, "token_acc": 0.25134285714285715 }, { "epoch": 1.0041043682204631, "grad_norm": 1.1063628391454878, "learning_rate": 0.0002999999654783284, "loss": 3.391463279724121, "step": 1713, "token_acc": 0.2600584345177497 }, { "epoch": 1.0046907065376722, "grad_norm": 0.9810377414032089, "learning_rate": 0.00029999995491047026, "loss": 3.3883185386657715, "step": 1714, "token_acc": 0.25857074704600147 }, { "epoch": 1.0052770448548813, "grad_norm": 0.8932328364487947, "learning_rate": 0.00029999994293356474, "loss": 3.3523049354553223, "step": 1715, "token_acc": 0.2667162173751465 }, { "epoch": 1.0058633831720902, "grad_norm": 0.739044666395893, "learning_rate": 0.0002999999295476118, "loss": 3.3763883113861084, "step": 1716, "token_acc": 0.2626059459045688 }, { "epoch": 1.0064497214892993, "grad_norm": 0.8456230589026097, "learning_rate": 0.00029999991475261167, "loss": 3.399453639984131, "step": 1717, "token_acc": 0.2597213931399532 }, { "epoch": 1.0070360598065085, "grad_norm": 0.8751833313184247, "learning_rate": 0.0002999998985485645, "loss": 3.3983278274536133, "step": 1718, "token_acc": 0.2583237978996618 }, { "epoch": 1.0076223981237173, "grad_norm": 0.9920140389801315, "learning_rate": 0.0002999998809354704, "loss": 3.4007740020751953, "step": 1719, "token_acc": 0.25923446534313105 }, { "epoch": 1.0082087364409265, "grad_norm": 1.2084044953976938, "learning_rate": 0.00029999986191332957, "loss": 3.424487590789795, "step": 1720, "token_acc": 0.25755801703237646 }, { "epoch": 1.0087950747581353, "grad_norm": 0.6820958420544017, "learning_rate": 0.0002999998414821421, "loss": 3.4095396995544434, "step": 1721, "token_acc": 0.26009093340465367 }, { "epoch": 1.0093814130753445, "grad_norm": 0.7412080973084054, "learning_rate": 0.0002999998196419083, "loss": 3.4043984413146973, "step": 1722, "token_acc": 0.2596113732532137 }, { "epoch": 1.0099677513925536, "grad_norm": 1.0373002169063243, "learning_rate": 0.0002999997963926283, "loss": 3.354407787322998, "step": 1723, "token_acc": 0.26333875781941 }, { "epoch": 1.0105540897097625, "grad_norm": 0.9080489688837124, "learning_rate": 0.00029999977173430234, "loss": 3.428590774536133, "step": 1724, "token_acc": 0.2568598981812835 }, { "epoch": 1.0111404280269716, "grad_norm": 0.9875429010714675, "learning_rate": 0.00029999974566693067, "loss": 3.4000887870788574, "step": 1725, "token_acc": 0.2605814519190159 }, { "epoch": 1.0117267663441807, "grad_norm": 1.0907716369815048, "learning_rate": 0.0002999997181905135, "loss": 3.415714740753174, "step": 1726, "token_acc": 0.25532852890809754 }, { "epoch": 1.0123131046613896, "grad_norm": 0.965178027918728, "learning_rate": 0.0002999996893050511, "loss": 3.4131994247436523, "step": 1727, "token_acc": 0.2566456520606471 }, { "epoch": 1.0128994429785987, "grad_norm": 0.9547230227619361, "learning_rate": 0.0002999996590105438, "loss": 3.4405550956726074, "step": 1728, "token_acc": 0.25368073740722774 }, { "epoch": 1.0134857812958076, "grad_norm": 0.8847965964508416, "learning_rate": 0.00029999962730699174, "loss": 3.389859676361084, "step": 1729, "token_acc": 0.2604225765926406 }, { "epoch": 1.0140721196130167, "grad_norm": 0.8294793922589502, "learning_rate": 0.00029999959419439536, "loss": 3.4114487171173096, "step": 1730, "token_acc": 0.25742371223732746 }, { "epoch": 1.0146584579302258, "grad_norm": 1.0027198034549007, "learning_rate": 0.00029999955967275493, "loss": 3.35640025138855, "step": 1731, "token_acc": 0.26197248549718116 }, { "epoch": 1.0152447962474347, "grad_norm": 0.9592782797974249, "learning_rate": 0.0002999995237420707, "loss": 3.3589940071105957, "step": 1732, "token_acc": 0.2638201270316613 }, { "epoch": 1.0158311345646438, "grad_norm": 0.943559877890224, "learning_rate": 0.00029999948640234317, "loss": 3.314626455307007, "step": 1733, "token_acc": 0.2709107539526983 }, { "epoch": 1.016417472881853, "grad_norm": 0.8642867669201103, "learning_rate": 0.00029999944765357253, "loss": 3.400038242340088, "step": 1734, "token_acc": 0.25767984943731037 }, { "epoch": 1.0170038111990618, "grad_norm": 0.8459321748467306, "learning_rate": 0.0002999994074957592, "loss": 3.4074056148529053, "step": 1735, "token_acc": 0.2597568777991043 }, { "epoch": 1.017590149516271, "grad_norm": 0.894745378019444, "learning_rate": 0.00029999936592890356, "loss": 3.372298240661621, "step": 1736, "token_acc": 0.2629354410082604 }, { "epoch": 1.01817648783348, "grad_norm": 0.9224572091580456, "learning_rate": 0.00029999932295300604, "loss": 3.4247336387634277, "step": 1737, "token_acc": 0.25723349397265083 }, { "epoch": 1.018762826150689, "grad_norm": 0.7911471956349633, "learning_rate": 0.00029999927856806695, "loss": 3.3763225078582764, "step": 1738, "token_acc": 0.2620417021538588 }, { "epoch": 1.019349164467898, "grad_norm": 0.7174391704005636, "learning_rate": 0.00029999923277408686, "loss": 3.3414993286132812, "step": 1739, "token_acc": 0.2666880440478332 }, { "epoch": 1.019935502785107, "grad_norm": 0.8386471678889612, "learning_rate": 0.00029999918557106607, "loss": 3.394024133682251, "step": 1740, "token_acc": 0.2607623532406376 }, { "epoch": 1.020521841102316, "grad_norm": 1.0113645296909612, "learning_rate": 0.000299999136959005, "loss": 3.3740687370300293, "step": 1741, "token_acc": 0.26362324544142723 }, { "epoch": 1.0211081794195251, "grad_norm": 1.128621336111224, "learning_rate": 0.0002999990869379042, "loss": 3.3917953968048096, "step": 1742, "token_acc": 0.26263016121899246 }, { "epoch": 1.021694517736734, "grad_norm": 0.710250082745488, "learning_rate": 0.0002999990355077641, "loss": 3.450207233428955, "step": 1743, "token_acc": 0.2519529839669444 }, { "epoch": 1.0222808560539431, "grad_norm": 0.8119155944613001, "learning_rate": 0.00029999898266858517, "loss": 3.3751885890960693, "step": 1744, "token_acc": 0.2633274532274056 }, { "epoch": 1.0228671943711523, "grad_norm": 0.8324607460854886, "learning_rate": 0.00029999892842036803, "loss": 3.4380359649658203, "step": 1745, "token_acc": 0.2558347268818098 }, { "epoch": 1.0234535326883611, "grad_norm": 0.7664113526347227, "learning_rate": 0.00029999887276311297, "loss": 3.3703644275665283, "step": 1746, "token_acc": 0.2644394860979511 }, { "epoch": 1.0240398710055703, "grad_norm": 0.8877303154156282, "learning_rate": 0.00029999881569682063, "loss": 3.363328695297241, "step": 1747, "token_acc": 0.26397908341443105 }, { "epoch": 1.0246262093227791, "grad_norm": 0.7224506263196261, "learning_rate": 0.0002999987572214916, "loss": 3.36671781539917, "step": 1748, "token_acc": 0.26416642265677964 }, { "epoch": 1.0252125476399883, "grad_norm": 0.6699390918986655, "learning_rate": 0.00029999869733712635, "loss": 3.3922994136810303, "step": 1749, "token_acc": 0.2604790495493773 }, { "epoch": 1.0257988859571974, "grad_norm": 0.8220225165178263, "learning_rate": 0.00029999863604372544, "loss": 3.3872084617614746, "step": 1750, "token_acc": 0.2595554774573166 }, { "epoch": 1.0263852242744063, "grad_norm": 0.8485810972963475, "learning_rate": 0.0002999985733412895, "loss": 3.3624041080474854, "step": 1751, "token_acc": 0.2649622390525435 }, { "epoch": 1.0269715625916154, "grad_norm": 0.9047595067154075, "learning_rate": 0.00029999850922981906, "loss": 3.362987756729126, "step": 1752, "token_acc": 0.26315309662853864 }, { "epoch": 1.0275579009088245, "grad_norm": 1.1178257155612394, "learning_rate": 0.0002999984437093148, "loss": 3.3803837299346924, "step": 1753, "token_acc": 0.26111043660492295 }, { "epoch": 1.0281442392260334, "grad_norm": 0.9210282713304254, "learning_rate": 0.00029999837677977724, "loss": 3.372103214263916, "step": 1754, "token_acc": 0.2640985176492804 }, { "epoch": 1.0287305775432425, "grad_norm": 0.7476643013868809, "learning_rate": 0.0002999983084412071, "loss": 3.4022512435913086, "step": 1755, "token_acc": 0.2576047129090288 }, { "epoch": 1.0293169158604514, "grad_norm": 0.81344564858752, "learning_rate": 0.0002999982386936049, "loss": 3.372739791870117, "step": 1756, "token_acc": 0.2634982415879752 }, { "epoch": 1.0299032541776605, "grad_norm": 0.7536285278639427, "learning_rate": 0.00029999816753697143, "loss": 3.3786659240722656, "step": 1757, "token_acc": 0.26132991417888957 }, { "epoch": 1.0304895924948696, "grad_norm": 0.7842789862506705, "learning_rate": 0.0002999980949713073, "loss": 3.348092555999756, "step": 1758, "token_acc": 0.2655454906348118 }, { "epoch": 1.0310759308120785, "grad_norm": 0.8396611512827058, "learning_rate": 0.0002999980209966132, "loss": 3.3995118141174316, "step": 1759, "token_acc": 0.2603121619861858 }, { "epoch": 1.0316622691292876, "grad_norm": 1.1018203486439937, "learning_rate": 0.0002999979456128898, "loss": 3.3909754753112793, "step": 1760, "token_acc": 0.2593186258946939 }, { "epoch": 1.0322486074464967, "grad_norm": 1.0576369226958502, "learning_rate": 0.0002999978688201378, "loss": 3.3943753242492676, "step": 1761, "token_acc": 0.260843463121654 }, { "epoch": 1.0328349457637056, "grad_norm": 1.019445181119772, "learning_rate": 0.000299997790618358, "loss": 3.3849689960479736, "step": 1762, "token_acc": 0.261232816955356 }, { "epoch": 1.0334212840809147, "grad_norm": 0.8755984761795024, "learning_rate": 0.00029999771100755105, "loss": 3.3875365257263184, "step": 1763, "token_acc": 0.25996019017955674 }, { "epoch": 1.0340076223981236, "grad_norm": 0.8994273455788394, "learning_rate": 0.00029999762998771774, "loss": 3.393388271331787, "step": 1764, "token_acc": 0.2599351092190374 }, { "epoch": 1.0345939607153327, "grad_norm": 0.9717786494864452, "learning_rate": 0.0002999975475588588, "loss": 3.350372314453125, "step": 1765, "token_acc": 0.2650019770248897 }, { "epoch": 1.0351802990325418, "grad_norm": 1.0468605375312847, "learning_rate": 0.00029999746372097507, "loss": 3.3480684757232666, "step": 1766, "token_acc": 0.26507298053939743 }, { "epoch": 1.0357666373497507, "grad_norm": 0.8851988192954556, "learning_rate": 0.0002999973784740672, "loss": 3.3836312294006348, "step": 1767, "token_acc": 0.26240938520281115 }, { "epoch": 1.0363529756669598, "grad_norm": 0.9331771450498074, "learning_rate": 0.00029999729181813615, "loss": 3.4141969680786133, "step": 1768, "token_acc": 0.2595553457020279 }, { "epoch": 1.036939313984169, "grad_norm": 0.77731891668261, "learning_rate": 0.00029999720375318266, "loss": 3.3435940742492676, "step": 1769, "token_acc": 0.26451718240192285 }, { "epoch": 1.0375256523013778, "grad_norm": 0.8986994009828604, "learning_rate": 0.0002999971142792076, "loss": 3.3709797859191895, "step": 1770, "token_acc": 0.262259951557326 }, { "epoch": 1.038111990618587, "grad_norm": 0.8776321376775235, "learning_rate": 0.0002999970233962117, "loss": 3.3955564498901367, "step": 1771, "token_acc": 0.26160457333350695 }, { "epoch": 1.038698328935796, "grad_norm": 0.8175033022470682, "learning_rate": 0.00029999693110419593, "loss": 3.419679641723633, "step": 1772, "token_acc": 0.25593657649161256 }, { "epoch": 1.039284667253005, "grad_norm": 0.8277918762478872, "learning_rate": 0.00029999683740316116, "loss": 3.4046270847320557, "step": 1773, "token_acc": 0.25829507278095404 }, { "epoch": 1.039871005570214, "grad_norm": 0.7603283305041085, "learning_rate": 0.0002999967422931082, "loss": 3.3523402214050293, "step": 1774, "token_acc": 0.2653741866942922 }, { "epoch": 1.040457343887423, "grad_norm": 0.7606691782279854, "learning_rate": 0.00029999664577403794, "loss": 3.380520820617676, "step": 1775, "token_acc": 0.2620260087961379 }, { "epoch": 1.041043682204632, "grad_norm": 0.7226846040608634, "learning_rate": 0.00029999654784595135, "loss": 3.411355495452881, "step": 1776, "token_acc": 0.2576562091014471 }, { "epoch": 1.0416300205218412, "grad_norm": 0.689014870912869, "learning_rate": 0.0002999964485088493, "loss": 3.382324695587158, "step": 1777, "token_acc": 0.25940353790152626 }, { "epoch": 1.04221635883905, "grad_norm": 0.7761575920904261, "learning_rate": 0.0002999963477627327, "loss": 3.3927111625671387, "step": 1778, "token_acc": 0.26143789183304567 }, { "epoch": 1.0428026971562592, "grad_norm": 0.7044803022677012, "learning_rate": 0.0002999962456076026, "loss": 3.3545103073120117, "step": 1779, "token_acc": 0.263890076684056 }, { "epoch": 1.0433890354734683, "grad_norm": 0.7699425146010567, "learning_rate": 0.00029999614204345986, "loss": 3.3955256938934326, "step": 1780, "token_acc": 0.2587263670070707 }, { "epoch": 1.0439753737906772, "grad_norm": 0.9232243393083946, "learning_rate": 0.00029999603707030545, "loss": 3.3742294311523438, "step": 1781, "token_acc": 0.2616142697768505 }, { "epoch": 1.0445617121078863, "grad_norm": 0.9426066110575911, "learning_rate": 0.00029999593068814044, "loss": 3.305436611175537, "step": 1782, "token_acc": 0.26920904756133096 }, { "epoch": 1.0451480504250952, "grad_norm": 0.8670073963620424, "learning_rate": 0.0002999958228969658, "loss": 3.376132011413574, "step": 1783, "token_acc": 0.2624668927582907 }, { "epoch": 1.0457343887423043, "grad_norm": 1.0426661507890371, "learning_rate": 0.0002999957136967825, "loss": 3.334969997406006, "step": 1784, "token_acc": 0.2681092555750532 }, { "epoch": 1.0463207270595134, "grad_norm": 0.9671634793867665, "learning_rate": 0.0002999956030875916, "loss": 3.3868446350097656, "step": 1785, "token_acc": 0.26027436522101643 }, { "epoch": 1.0469070653767223, "grad_norm": 0.8985632541483063, "learning_rate": 0.00029999549106939414, "loss": 3.3959574699401855, "step": 1786, "token_acc": 0.2607896363587497 }, { "epoch": 1.0474934036939314, "grad_norm": 0.8612457539310248, "learning_rate": 0.0002999953776421911, "loss": 3.416934013366699, "step": 1787, "token_acc": 0.25827750621775314 }, { "epoch": 1.0480797420111405, "grad_norm": 0.7170934421803308, "learning_rate": 0.00029999526280598374, "loss": 3.3741862773895264, "step": 1788, "token_acc": 0.2614633512952022 }, { "epoch": 1.0486660803283494, "grad_norm": 0.8041434643072711, "learning_rate": 0.00029999514656077285, "loss": 3.404782772064209, "step": 1789, "token_acc": 0.2589871424822881 }, { "epoch": 1.0492524186455585, "grad_norm": 0.7943564409743228, "learning_rate": 0.00029999502890655977, "loss": 3.348447322845459, "step": 1790, "token_acc": 0.26582131135510745 }, { "epoch": 1.0498387569627674, "grad_norm": 0.6660538951113505, "learning_rate": 0.0002999949098433455, "loss": 3.4021592140197754, "step": 1791, "token_acc": 0.25776594328318464 }, { "epoch": 1.0504250952799765, "grad_norm": 0.7349703975298816, "learning_rate": 0.0002999947893711312, "loss": 3.380229949951172, "step": 1792, "token_acc": 0.26187537359199486 }, { "epoch": 1.0510114335971856, "grad_norm": 0.7399682433070527, "learning_rate": 0.0002999946674899179, "loss": 3.330737829208374, "step": 1793, "token_acc": 0.2670677534825549 }, { "epoch": 1.0515977719143945, "grad_norm": 0.7233746919487639, "learning_rate": 0.0002999945441997069, "loss": 3.3915553092956543, "step": 1794, "token_acc": 0.2597853714336429 }, { "epoch": 1.0521841102316036, "grad_norm": 0.6939501488533247, "learning_rate": 0.0002999944195004992, "loss": 3.3740787506103516, "step": 1795, "token_acc": 0.2618271831453743 }, { "epoch": 1.0527704485488127, "grad_norm": 0.6525540005774914, "learning_rate": 0.0002999942933922961, "loss": 3.3524160385131836, "step": 1796, "token_acc": 0.264246341495198 }, { "epoch": 1.0533567868660216, "grad_norm": 0.7278215743329536, "learning_rate": 0.00029999416587509875, "loss": 3.349514961242676, "step": 1797, "token_acc": 0.26369993675947173 }, { "epoch": 1.0539431251832307, "grad_norm": 0.74157067501199, "learning_rate": 0.0002999940369489083, "loss": 3.3952043056488037, "step": 1798, "token_acc": 0.2597982444648238 }, { "epoch": 1.0545294635004399, "grad_norm": 0.6652350999524858, "learning_rate": 0.000299993906613726, "loss": 3.3528342247009277, "step": 1799, "token_acc": 0.2653861775033546 }, { "epoch": 1.0551158018176487, "grad_norm": 0.766098439126724, "learning_rate": 0.00029999377486955304, "loss": 3.345491647720337, "step": 1800, "token_acc": 0.2650788052054133 }, { "epoch": 1.0557021401348579, "grad_norm": 0.9866872815918452, "learning_rate": 0.00029999364171639077, "loss": 3.407106876373291, "step": 1801, "token_acc": 0.2580140856371538 }, { "epoch": 1.0562884784520667, "grad_norm": 1.1768840730452808, "learning_rate": 0.0002999935071542403, "loss": 3.4136111736297607, "step": 1802, "token_acc": 0.25747453552966904 }, { "epoch": 1.0568748167692759, "grad_norm": 0.7892104606692729, "learning_rate": 0.0002999933711831029, "loss": 3.4035239219665527, "step": 1803, "token_acc": 0.2581690174726764 }, { "epoch": 1.057461155086485, "grad_norm": 0.7375141856192511, "learning_rate": 0.00029999323380298, "loss": 3.389493227005005, "step": 1804, "token_acc": 0.26071410652181376 }, { "epoch": 1.0580474934036939, "grad_norm": 0.9593468572012652, "learning_rate": 0.0002999930950138727, "loss": 3.3721256256103516, "step": 1805, "token_acc": 0.2624717719774853 }, { "epoch": 1.058633831720903, "grad_norm": 1.0766239421207, "learning_rate": 0.0002999929548157824, "loss": 3.3696908950805664, "step": 1806, "token_acc": 0.26196856726142603 }, { "epoch": 1.059220170038112, "grad_norm": 0.9456005406147554, "learning_rate": 0.00029999281320871045, "loss": 3.359165668487549, "step": 1807, "token_acc": 0.2623298447226212 }, { "epoch": 1.059806508355321, "grad_norm": 0.8439669142285969, "learning_rate": 0.0002999926701926581, "loss": 3.3571033477783203, "step": 1808, "token_acc": 0.26622653285334696 }, { "epoch": 1.06039284667253, "grad_norm": 0.8205434561019966, "learning_rate": 0.0002999925257676267, "loss": 3.384303092956543, "step": 1809, "token_acc": 0.25844957316782885 }, { "epoch": 1.060979184989739, "grad_norm": 0.8971858413606124, "learning_rate": 0.00029999237993361767, "loss": 3.3800411224365234, "step": 1810, "token_acc": 0.260637640565144 }, { "epoch": 1.061565523306948, "grad_norm": 0.7823903778009983, "learning_rate": 0.0002999922326906324, "loss": 3.379568099975586, "step": 1811, "token_acc": 0.2601063569776506 }, { "epoch": 1.0621518616241572, "grad_norm": 0.9159004083745563, "learning_rate": 0.0002999920840386722, "loss": 3.3650574684143066, "step": 1812, "token_acc": 0.2624144561590261 }, { "epoch": 1.062738199941366, "grad_norm": 1.030104968460944, "learning_rate": 0.00029999193397773846, "loss": 3.3583874702453613, "step": 1813, "token_acc": 0.2638046572030603 }, { "epoch": 1.0633245382585752, "grad_norm": 0.9624137254531098, "learning_rate": 0.0002999917825078326, "loss": 3.400360107421875, "step": 1814, "token_acc": 0.26012860373840685 }, { "epoch": 1.0639108765757843, "grad_norm": 0.7893157710454682, "learning_rate": 0.00029999162962895606, "loss": 3.3766489028930664, "step": 1815, "token_acc": 0.2627018532277808 }, { "epoch": 1.0644972148929932, "grad_norm": 0.6949594233793593, "learning_rate": 0.0002999914753411103, "loss": 3.3800716400146484, "step": 1816, "token_acc": 0.26135095090219 }, { "epoch": 1.0650835532102023, "grad_norm": 0.745899088250051, "learning_rate": 0.00029999131964429676, "loss": 3.3419995307922363, "step": 1817, "token_acc": 0.26539760067210405 }, { "epoch": 1.0656698915274112, "grad_norm": 0.9529758359560807, "learning_rate": 0.0002999911625385169, "loss": 3.3772125244140625, "step": 1818, "token_acc": 0.2623152772573517 }, { "epoch": 1.0662562298446203, "grad_norm": 0.857926572563159, "learning_rate": 0.00029999100402377214, "loss": 3.392021656036377, "step": 1819, "token_acc": 0.25674741018237357 }, { "epoch": 1.0668425681618294, "grad_norm": 0.9550570270986178, "learning_rate": 0.000299990844100064, "loss": 3.392603874206543, "step": 1820, "token_acc": 0.2594738689607106 }, { "epoch": 1.0674289064790383, "grad_norm": 0.9535639342697362, "learning_rate": 0.0002999906827673941, "loss": 3.389920711517334, "step": 1821, "token_acc": 0.2597747740643187 }, { "epoch": 1.0680152447962474, "grad_norm": 1.0020147431310158, "learning_rate": 0.00029999052002576375, "loss": 3.3925065994262695, "step": 1822, "token_acc": 0.2601016069554256 }, { "epoch": 1.0686015831134565, "grad_norm": 0.8487964557424253, "learning_rate": 0.0002999903558751746, "loss": 3.354167938232422, "step": 1823, "token_acc": 0.26329381444657307 }, { "epoch": 1.0691879214306654, "grad_norm": 0.7935267537680214, "learning_rate": 0.00029999019031562814, "loss": 3.3833789825439453, "step": 1824, "token_acc": 0.2610409315544568 }, { "epoch": 1.0697742597478745, "grad_norm": 0.8960275105104439, "learning_rate": 0.000299990023347126, "loss": 3.3536205291748047, "step": 1825, "token_acc": 0.26386308371728845 }, { "epoch": 1.0703605980650837, "grad_norm": 0.9312218639291532, "learning_rate": 0.0002999898549696697, "loss": 3.3346824645996094, "step": 1826, "token_acc": 0.2663930145599632 }, { "epoch": 1.0709469363822925, "grad_norm": 0.8228525371779517, "learning_rate": 0.00029998968518326084, "loss": 3.3215909004211426, "step": 1827, "token_acc": 0.2687290038815346 }, { "epoch": 1.0715332746995017, "grad_norm": 0.7293975570159389, "learning_rate": 0.000299989513987901, "loss": 3.334415912628174, "step": 1828, "token_acc": 0.2639500933935404 }, { "epoch": 1.0721196130167105, "grad_norm": 0.845495366430657, "learning_rate": 0.00029998934138359177, "loss": 3.384962797164917, "step": 1829, "token_acc": 0.25753564664448714 }, { "epoch": 1.0727059513339197, "grad_norm": 0.9174501715512632, "learning_rate": 0.0002999891673703348, "loss": 3.4210264682769775, "step": 1830, "token_acc": 0.2531643301602578 }, { "epoch": 1.0732922896511288, "grad_norm": 0.851762399027193, "learning_rate": 0.00029998899194813165, "loss": 3.3407535552978516, "step": 1831, "token_acc": 0.2675511755328695 }, { "epoch": 1.0738786279683377, "grad_norm": 0.7976284371502028, "learning_rate": 0.0002999888151169841, "loss": 3.3013997077941895, "step": 1832, "token_acc": 0.27040328165224425 }, { "epoch": 1.0744649662855468, "grad_norm": 0.9812288980132592, "learning_rate": 0.0002999886368768938, "loss": 3.3562488555908203, "step": 1833, "token_acc": 0.2630545586935039 }, { "epoch": 1.0750513046027559, "grad_norm": 0.9791616359697202, "learning_rate": 0.0002999884572278623, "loss": 3.3331212997436523, "step": 1834, "token_acc": 0.2662169065005254 }, { "epoch": 1.0756376429199648, "grad_norm": 0.7719278901610888, "learning_rate": 0.0002999882761698913, "loss": 3.3892135620117188, "step": 1835, "token_acc": 0.2607259208156069 }, { "epoch": 1.0762239812371739, "grad_norm": 0.7422384611730193, "learning_rate": 0.00029998809370298266, "loss": 3.343909740447998, "step": 1836, "token_acc": 0.2646412836875938 }, { "epoch": 1.0768103195543828, "grad_norm": 0.8532424680345285, "learning_rate": 0.00029998790982713793, "loss": 3.401294708251953, "step": 1837, "token_acc": 0.2601695338928915 }, { "epoch": 1.077396657871592, "grad_norm": 0.7758794825761017, "learning_rate": 0.00029998772454235893, "loss": 3.353382110595703, "step": 1838, "token_acc": 0.2618015035145331 }, { "epoch": 1.077982996188801, "grad_norm": 0.6932813798239679, "learning_rate": 0.0002999875378486474, "loss": 3.373500347137451, "step": 1839, "token_acc": 0.2617260594030746 }, { "epoch": 1.07856933450601, "grad_norm": 0.7131069637005163, "learning_rate": 0.000299987349746005, "loss": 3.385409116744995, "step": 1840, "token_acc": 0.25893486904826496 }, { "epoch": 1.079155672823219, "grad_norm": 0.6980254047974559, "learning_rate": 0.00029998716023443356, "loss": 3.332803726196289, "step": 1841, "token_acc": 0.26615579121191013 }, { "epoch": 1.0797420111404281, "grad_norm": 0.6515846096456306, "learning_rate": 0.00029998696931393486, "loss": 3.340400218963623, "step": 1842, "token_acc": 0.26582544097814875 }, { "epoch": 1.080328349457637, "grad_norm": 0.7106931322015347, "learning_rate": 0.00029998677698451077, "loss": 3.389979124069214, "step": 1843, "token_acc": 0.2595262230234107 }, { "epoch": 1.0809146877748461, "grad_norm": 0.6975720815938058, "learning_rate": 0.0002999865832461629, "loss": 3.358290910720825, "step": 1844, "token_acc": 0.26243774346509835 }, { "epoch": 1.081501026092055, "grad_norm": 0.6874789787841085, "learning_rate": 0.00029998638809889327, "loss": 3.3440351486206055, "step": 1845, "token_acc": 0.26515496479568335 }, { "epoch": 1.0820873644092641, "grad_norm": 0.7484763116980394, "learning_rate": 0.00029998619154270365, "loss": 3.3031270503997803, "step": 1846, "token_acc": 0.27085472806441013 }, { "epoch": 1.0826737027264732, "grad_norm": 0.6716394854257814, "learning_rate": 0.0002999859935775958, "loss": 3.3274431228637695, "step": 1847, "token_acc": 0.2670102902325925 }, { "epoch": 1.0832600410436821, "grad_norm": 0.7246323707521469, "learning_rate": 0.00029998579420357173, "loss": 3.3061628341674805, "step": 1848, "token_acc": 0.2686076228303312 }, { "epoch": 1.0838463793608912, "grad_norm": 0.8409240959189997, "learning_rate": 0.0002999855934206331, "loss": 3.273674964904785, "step": 1849, "token_acc": 0.27425059355728765 }, { "epoch": 1.0844327176781003, "grad_norm": 0.7850085431693875, "learning_rate": 0.00029998539122878207, "loss": 3.364394187927246, "step": 1850, "token_acc": 0.26277502407017794 }, { "epoch": 1.0850190559953092, "grad_norm": 0.8227303629573909, "learning_rate": 0.00029998518762802033, "loss": 3.3496623039245605, "step": 1851, "token_acc": 0.2631613021569537 }, { "epoch": 1.0856053943125183, "grad_norm": 0.8260038363963389, "learning_rate": 0.0002999849826183499, "loss": 3.358795404434204, "step": 1852, "token_acc": 0.26455691762210704 }, { "epoch": 1.0861917326297275, "grad_norm": 0.697201027188689, "learning_rate": 0.0002999847761997726, "loss": 3.376037836074829, "step": 1853, "token_acc": 0.26037088869724184 }, { "epoch": 1.0867780709469363, "grad_norm": 0.6127033600818833, "learning_rate": 0.0002999845683722905, "loss": 3.396925449371338, "step": 1854, "token_acc": 0.2584458122608201 }, { "epoch": 1.0873644092641455, "grad_norm": 0.722678643055183, "learning_rate": 0.00029998435913590547, "loss": 3.305473804473877, "step": 1855, "token_acc": 0.2696824059852351 }, { "epoch": 1.0879507475813543, "grad_norm": 0.7044692180510782, "learning_rate": 0.0002999841484906195, "loss": 3.361895799636841, "step": 1856, "token_acc": 0.26089728895107256 }, { "epoch": 1.0885370858985635, "grad_norm": 0.6668830006063075, "learning_rate": 0.0002999839364364345, "loss": 3.415842056274414, "step": 1857, "token_acc": 0.2564124743679114 }, { "epoch": 1.0891234242157726, "grad_norm": 0.7317548277285817, "learning_rate": 0.0002999837229733526, "loss": 3.3588547706604004, "step": 1858, "token_acc": 0.2610472786462046 }, { "epoch": 1.0897097625329815, "grad_norm": 0.9015859051332197, "learning_rate": 0.0002999835081013756, "loss": 3.366499423980713, "step": 1859, "token_acc": 0.26132335417598257 }, { "epoch": 1.0902961008501906, "grad_norm": 1.0877575177463663, "learning_rate": 0.0002999832918205058, "loss": 3.3732504844665527, "step": 1860, "token_acc": 0.259486928952226 }, { "epoch": 1.0908824391673997, "grad_norm": 1.0735482612044867, "learning_rate": 0.00029998307413074503, "loss": 3.423321008682251, "step": 1861, "token_acc": 0.25715507538769505 }, { "epoch": 1.0914687774846086, "grad_norm": 0.868524363994832, "learning_rate": 0.0002999828550320953, "loss": 3.3437538146972656, "step": 1862, "token_acc": 0.2673338852475441 }, { "epoch": 1.0920551158018177, "grad_norm": 0.6891365587706741, "learning_rate": 0.0002999826345245589, "loss": 3.3648080825805664, "step": 1863, "token_acc": 0.26254205175843265 }, { "epoch": 1.0926414541190266, "grad_norm": 0.9074006485523566, "learning_rate": 0.00029998241260813767, "loss": 3.3081865310668945, "step": 1864, "token_acc": 0.27017784222366753 }, { "epoch": 1.0932277924362357, "grad_norm": 0.7593773774507495, "learning_rate": 0.0002999821892828338, "loss": 3.315218448638916, "step": 1865, "token_acc": 0.26843527895785935 }, { "epoch": 1.0938141307534448, "grad_norm": 0.6497742092191846, "learning_rate": 0.00029998196454864934, "loss": 3.3000540733337402, "step": 1866, "token_acc": 0.27061344441312496 }, { "epoch": 1.0944004690706537, "grad_norm": 0.7048221938275748, "learning_rate": 0.0002999817384055864, "loss": 3.3393657207489014, "step": 1867, "token_acc": 0.2679184850062057 }, { "epoch": 1.0949868073878628, "grad_norm": 0.7908784387797431, "learning_rate": 0.00029998151085364714, "loss": 3.378603458404541, "step": 1868, "token_acc": 0.259456069449902 }, { "epoch": 1.095573145705072, "grad_norm": 0.8068618658514728, "learning_rate": 0.00029998128189283373, "loss": 3.36336612701416, "step": 1869, "token_acc": 0.2633997413482546 }, { "epoch": 1.0961594840222808, "grad_norm": 0.8659161802934441, "learning_rate": 0.00029998105152314827, "loss": 3.376901865005493, "step": 1870, "token_acc": 0.2623447480247874 }, { "epoch": 1.09674582233949, "grad_norm": 0.9134912370764321, "learning_rate": 0.00029998081974459294, "loss": 3.3381829261779785, "step": 1871, "token_acc": 0.2662731509391431 }, { "epoch": 1.0973321606566988, "grad_norm": 0.7803111438405833, "learning_rate": 0.0002999805865571699, "loss": 3.3406195640563965, "step": 1872, "token_acc": 0.2671025408892982 }, { "epoch": 1.097918498973908, "grad_norm": 0.6447883447424895, "learning_rate": 0.0002999803519608813, "loss": 3.3208789825439453, "step": 1873, "token_acc": 0.2666537644785086 }, { "epoch": 1.098504837291117, "grad_norm": 0.5818048970487153, "learning_rate": 0.0002999801159557295, "loss": 3.3246748447418213, "step": 1874, "token_acc": 0.269779814782225 }, { "epoch": 1.099091175608326, "grad_norm": 0.7194365061619795, "learning_rate": 0.00029997987854171656, "loss": 3.3521764278411865, "step": 1875, "token_acc": 0.2627429729652276 }, { "epoch": 1.099677513925535, "grad_norm": 0.7418192473254891, "learning_rate": 0.00029997963971884476, "loss": 3.3831262588500977, "step": 1876, "token_acc": 0.26058105384542124 }, { "epoch": 1.1002638522427441, "grad_norm": 0.7360889287029435, "learning_rate": 0.0002999793994871163, "loss": 3.2953128814697266, "step": 1877, "token_acc": 0.2726489190900754 }, { "epoch": 1.100850190559953, "grad_norm": 0.6914228117661096, "learning_rate": 0.0002999791578465335, "loss": 3.3322625160217285, "step": 1878, "token_acc": 0.26731065516909897 }, { "epoch": 1.1014365288771621, "grad_norm": 0.6818145221951787, "learning_rate": 0.00029997891479709865, "loss": 3.431363105773926, "step": 1879, "token_acc": 0.2527140101432057 }, { "epoch": 1.1020228671943713, "grad_norm": 0.6380090435573896, "learning_rate": 0.000299978670338814, "loss": 3.3048906326293945, "step": 1880, "token_acc": 0.26954131041178025 }, { "epoch": 1.1026092055115801, "grad_norm": 0.6626461330029279, "learning_rate": 0.0002999784244716818, "loss": 3.4155962467193604, "step": 1881, "token_acc": 0.25694825620672185 }, { "epoch": 1.1031955438287893, "grad_norm": 0.8506625506583408, "learning_rate": 0.0002999781771957044, "loss": 3.3267247676849365, "step": 1882, "token_acc": 0.2661804153999438 }, { "epoch": 1.1037818821459981, "grad_norm": 1.0169888861684042, "learning_rate": 0.0002999779285108841, "loss": 3.3621134757995605, "step": 1883, "token_acc": 0.2641188721236499 }, { "epoch": 1.1043682204632073, "grad_norm": 0.9996126969486607, "learning_rate": 0.0002999776784172234, "loss": 3.4028024673461914, "step": 1884, "token_acc": 0.25747495324083963 }, { "epoch": 1.1049545587804164, "grad_norm": 0.8139645269537045, "learning_rate": 0.0002999774269147244, "loss": 3.353316307067871, "step": 1885, "token_acc": 0.26401771069917107 }, { "epoch": 1.1055408970976253, "grad_norm": 0.80233470952487, "learning_rate": 0.00029997717400338954, "loss": 3.3447303771972656, "step": 1886, "token_acc": 0.2648047630674455 }, { "epoch": 1.1061272354148344, "grad_norm": 0.9067542121117775, "learning_rate": 0.00029997691968322126, "loss": 3.34726619720459, "step": 1887, "token_acc": 0.26563167601421334 }, { "epoch": 1.1067135737320435, "grad_norm": 0.8790008003714386, "learning_rate": 0.0002999766639542219, "loss": 3.383406162261963, "step": 1888, "token_acc": 0.26137054282255673 }, { "epoch": 1.1072999120492524, "grad_norm": 0.7723925882335825, "learning_rate": 0.0002999764068163939, "loss": 3.375410556793213, "step": 1889, "token_acc": 0.2602062758529014 }, { "epoch": 1.1078862503664615, "grad_norm": 0.6862856940559612, "learning_rate": 0.0002999761482697397, "loss": 3.3254730701446533, "step": 1890, "token_acc": 0.2677047978495208 }, { "epoch": 1.1084725886836704, "grad_norm": 0.7005596297591745, "learning_rate": 0.0002999758883142616, "loss": 3.3733391761779785, "step": 1891, "token_acc": 0.26148496348237105 }, { "epoch": 1.1090589270008795, "grad_norm": 0.6574644069151541, "learning_rate": 0.0002999756269499622, "loss": 3.3428986072540283, "step": 1892, "token_acc": 0.263758845483692 }, { "epoch": 1.1096452653180886, "grad_norm": 0.7374671406319119, "learning_rate": 0.0002999753641768438, "loss": 3.34000301361084, "step": 1893, "token_acc": 0.26486557171252223 }, { "epoch": 1.1102316036352975, "grad_norm": 0.7089183877569187, "learning_rate": 0.000299975099994909, "loss": 3.343444585800171, "step": 1894, "token_acc": 0.2662241450838618 }, { "epoch": 1.1108179419525066, "grad_norm": 0.7150249875529383, "learning_rate": 0.00029997483440416024, "loss": 3.355675220489502, "step": 1895, "token_acc": 0.2632219663815144 }, { "epoch": 1.1114042802697157, "grad_norm": 0.68784403268337, "learning_rate": 0.0002999745674046, "loss": 3.355212688446045, "step": 1896, "token_acc": 0.26356509016888774 }, { "epoch": 1.1119906185869246, "grad_norm": 0.688190966174502, "learning_rate": 0.00029997429899623077, "loss": 3.3388872146606445, "step": 1897, "token_acc": 0.2651463904585778 }, { "epoch": 1.1125769569041337, "grad_norm": 0.7259598076374931, "learning_rate": 0.0002999740291790551, "loss": 3.3208112716674805, "step": 1898, "token_acc": 0.26923304342217014 }, { "epoch": 1.1131632952213426, "grad_norm": 0.8619283887543173, "learning_rate": 0.0002999737579530755, "loss": 3.371981143951416, "step": 1899, "token_acc": 0.26365867501918205 }, { "epoch": 1.1137496335385517, "grad_norm": 0.7331512846683491, "learning_rate": 0.00029997348531829454, "loss": 3.286670684814453, "step": 1900, "token_acc": 0.2714882102821801 }, { "epoch": 1.1143359718557608, "grad_norm": 0.6969807793380998, "learning_rate": 0.0002999732112747148, "loss": 3.376579761505127, "step": 1901, "token_acc": 0.2624837004382263 }, { "epoch": 1.1149223101729697, "grad_norm": 0.6956932096027781, "learning_rate": 0.0002999729358223388, "loss": 3.3629045486450195, "step": 1902, "token_acc": 0.2634608450689611 }, { "epoch": 1.1155086484901788, "grad_norm": 0.6236796923785131, "learning_rate": 0.00029997265896116923, "loss": 3.304075002670288, "step": 1903, "token_acc": 0.27021195106626383 }, { "epoch": 1.116094986807388, "grad_norm": 0.5722823413972503, "learning_rate": 0.00029997238069120856, "loss": 3.3786699771881104, "step": 1904, "token_acc": 0.2614218425268879 }, { "epoch": 1.1166813251245968, "grad_norm": 0.6006724897405271, "learning_rate": 0.00029997210101245943, "loss": 3.277489185333252, "step": 1905, "token_acc": 0.27338236577241487 }, { "epoch": 1.117267663441806, "grad_norm": 0.6142685318454425, "learning_rate": 0.0002999718199249246, "loss": 3.367166042327881, "step": 1906, "token_acc": 0.2626761168846522 }, { "epoch": 1.117854001759015, "grad_norm": 0.7853224904120498, "learning_rate": 0.00029997153742860656, "loss": 3.393280267715454, "step": 1907, "token_acc": 0.25774006826009216 }, { "epoch": 1.118440340076224, "grad_norm": 0.9933270488076913, "learning_rate": 0.000299971253523508, "loss": 3.3495571613311768, "step": 1908, "token_acc": 0.26349458047421176 }, { "epoch": 1.119026678393433, "grad_norm": 1.0631415138893656, "learning_rate": 0.0002999709682096316, "loss": 3.311051368713379, "step": 1909, "token_acc": 0.2663021597683087 }, { "epoch": 1.119613016710642, "grad_norm": 0.9524295589845513, "learning_rate": 0.0002999706814869801, "loss": 3.3175277709960938, "step": 1910, "token_acc": 0.267980606631419 }, { "epoch": 1.120199355027851, "grad_norm": 0.8239734835740735, "learning_rate": 0.000299970393355556, "loss": 3.3693792819976807, "step": 1911, "token_acc": 0.2606141410774551 }, { "epoch": 1.1207856933450602, "grad_norm": 0.7856677472781632, "learning_rate": 0.0002999701038153623, "loss": 3.362311840057373, "step": 1912, "token_acc": 0.2618071321468802 }, { "epoch": 1.121372031662269, "grad_norm": 0.706648113129583, "learning_rate": 0.0002999698128664015, "loss": 3.335555076599121, "step": 1913, "token_acc": 0.2662314007183171 }, { "epoch": 1.1219583699794782, "grad_norm": 0.7102652196766037, "learning_rate": 0.0002999695205086764, "loss": 3.388075113296509, "step": 1914, "token_acc": 0.2599739683509952 }, { "epoch": 1.1225447082966873, "grad_norm": 0.6667556280557873, "learning_rate": 0.0002999692267421897, "loss": 3.3488645553588867, "step": 1915, "token_acc": 0.26448339628374307 }, { "epoch": 1.1231310466138962, "grad_norm": 0.6601217240036369, "learning_rate": 0.00029996893156694426, "loss": 3.3449301719665527, "step": 1916, "token_acc": 0.2628081070982206 }, { "epoch": 1.1237173849311053, "grad_norm": 0.7229753657573753, "learning_rate": 0.00029996863498294276, "loss": 3.33357310295105, "step": 1917, "token_acc": 0.2662222444199685 }, { "epoch": 1.1243037232483142, "grad_norm": 0.8791007568872177, "learning_rate": 0.00029996833699018805, "loss": 3.35505747795105, "step": 1918, "token_acc": 0.26359758503775726 }, { "epoch": 1.1248900615655233, "grad_norm": 1.0999690846524, "learning_rate": 0.0002999680375886829, "loss": 3.3831076622009277, "step": 1919, "token_acc": 0.260181298035195 }, { "epoch": 1.1254763998827324, "grad_norm": 0.8369759326026645, "learning_rate": 0.00029996773677843004, "loss": 3.359495162963867, "step": 1920, "token_acc": 0.2637172948018531 }, { "epoch": 1.1260627381999413, "grad_norm": 0.7845576540450709, "learning_rate": 0.0002999674345594325, "loss": 3.3230931758880615, "step": 1921, "token_acc": 0.26525522740727936 }, { "epoch": 1.1266490765171504, "grad_norm": 0.8410633495346551, "learning_rate": 0.00029996713093169284, "loss": 3.359400749206543, "step": 1922, "token_acc": 0.26570629795092193 }, { "epoch": 1.1272354148343595, "grad_norm": 0.7303650681504213, "learning_rate": 0.0002999668258952142, "loss": 3.332303524017334, "step": 1923, "token_acc": 0.26684466823821207 }, { "epoch": 1.1278217531515684, "grad_norm": 0.6731782228706984, "learning_rate": 0.0002999665194499992, "loss": 3.3381526470184326, "step": 1924, "token_acc": 0.2665927327288147 }, { "epoch": 1.1284080914687775, "grad_norm": 0.875065253599284, "learning_rate": 0.000299966211596051, "loss": 3.337651491165161, "step": 1925, "token_acc": 0.2667549806915921 }, { "epoch": 1.1289944297859864, "grad_norm": 0.8093063015717047, "learning_rate": 0.00029996590233337216, "loss": 3.352814197540283, "step": 1926, "token_acc": 0.26216892438334005 }, { "epoch": 1.1295807681031955, "grad_norm": 0.7451980234972398, "learning_rate": 0.0002999655916619658, "loss": 3.338909149169922, "step": 1927, "token_acc": 0.26322852809931563 }, { "epoch": 1.1301671064204046, "grad_norm": 0.6049228277401555, "learning_rate": 0.0002999652795818348, "loss": 3.3408162593841553, "step": 1928, "token_acc": 0.2655704082368825 }, { "epoch": 1.1307534447376135, "grad_norm": 0.619707357719485, "learning_rate": 0.00029996496609298215, "loss": 3.289022445678711, "step": 1929, "token_acc": 0.27188357714557826 }, { "epoch": 1.1313397830548226, "grad_norm": 0.8673011790171524, "learning_rate": 0.00029996465119541064, "loss": 3.334179639816284, "step": 1930, "token_acc": 0.2659278985940694 }, { "epoch": 1.1319261213720317, "grad_norm": 0.7982743584041475, "learning_rate": 0.0002999643348891233, "loss": 3.3319764137268066, "step": 1931, "token_acc": 0.266465007929949 }, { "epoch": 1.1325124596892406, "grad_norm": 0.7837746878822562, "learning_rate": 0.0002999640171741232, "loss": 3.323746681213379, "step": 1932, "token_acc": 0.2686681787230121 }, { "epoch": 1.1330987980064497, "grad_norm": 0.7451614648824124, "learning_rate": 0.00029996369805041314, "loss": 3.3534750938415527, "step": 1933, "token_acc": 0.26392933798243023 }, { "epoch": 1.1336851363236589, "grad_norm": 0.6631188997441021, "learning_rate": 0.00029996337751799624, "loss": 3.380854368209839, "step": 1934, "token_acc": 0.2589157624066479 }, { "epoch": 1.1342714746408677, "grad_norm": 0.6273536900390022, "learning_rate": 0.00029996305557687555, "loss": 3.3189263343811035, "step": 1935, "token_acc": 0.2681923947343257 }, { "epoch": 1.1348578129580769, "grad_norm": 0.5243896912843286, "learning_rate": 0.00029996273222705397, "loss": 3.301912307739258, "step": 1936, "token_acc": 0.27202488927476914 }, { "epoch": 1.1354441512752858, "grad_norm": 0.552250075539328, "learning_rate": 0.0002999624074685346, "loss": 3.3436598777770996, "step": 1937, "token_acc": 0.2636355927262705 }, { "epoch": 1.1360304895924949, "grad_norm": 0.5929807875975288, "learning_rate": 0.0002999620813013205, "loss": 3.3347506523132324, "step": 1938, "token_acc": 0.26471321695760597 }, { "epoch": 1.136616827909704, "grad_norm": 0.7032005102990087, "learning_rate": 0.0002999617537254147, "loss": 3.374617099761963, "step": 1939, "token_acc": 0.2601524976615624 }, { "epoch": 1.1372031662269129, "grad_norm": 0.7285075832661516, "learning_rate": 0.0002999614247408204, "loss": 3.360698699951172, "step": 1940, "token_acc": 0.26319181077740567 }, { "epoch": 1.137789504544122, "grad_norm": 0.8826469909185993, "learning_rate": 0.0002999610943475404, "loss": 3.3209612369537354, "step": 1941, "token_acc": 0.26784537231409444 }, { "epoch": 1.1383758428613309, "grad_norm": 1.0492315463364479, "learning_rate": 0.00029996076254557816, "loss": 3.338071346282959, "step": 1942, "token_acc": 0.2661972017069852 }, { "epoch": 1.13896218117854, "grad_norm": 0.860027410585991, "learning_rate": 0.0002999604293349366, "loss": 3.377962350845337, "step": 1943, "token_acc": 0.2602863675511627 }, { "epoch": 1.139548519495749, "grad_norm": 0.650907376080584, "learning_rate": 0.0002999600947156188, "loss": 3.389468193054199, "step": 1944, "token_acc": 0.25905989249603856 }, { "epoch": 1.140134857812958, "grad_norm": 0.5969851658839853, "learning_rate": 0.0002999597586876281, "loss": 3.2962870597839355, "step": 1945, "token_acc": 0.2698709445534775 }, { "epoch": 1.140721196130167, "grad_norm": 0.7242920636922087, "learning_rate": 0.00029995942125096746, "loss": 3.3191795349121094, "step": 1946, "token_acc": 0.26826151214228594 }, { "epoch": 1.1413075344473762, "grad_norm": 0.8087653998720067, "learning_rate": 0.0002999590824056401, "loss": 3.3854565620422363, "step": 1947, "token_acc": 0.25963435618733316 }, { "epoch": 1.141893872764585, "grad_norm": 0.7869087307846857, "learning_rate": 0.00029995874215164934, "loss": 3.305237054824829, "step": 1948, "token_acc": 0.26998533896463617 }, { "epoch": 1.1424802110817942, "grad_norm": 0.6910460118517419, "learning_rate": 0.0002999584004889982, "loss": 3.338068962097168, "step": 1949, "token_acc": 0.2669910527853209 }, { "epoch": 1.1430665493990033, "grad_norm": 0.7108637470013623, "learning_rate": 0.00029995805741769, "loss": 3.3965020179748535, "step": 1950, "token_acc": 0.25785697121285334 }, { "epoch": 1.1436528877162122, "grad_norm": 0.7178269562933071, "learning_rate": 0.0002999577129377279, "loss": 3.3654632568359375, "step": 1951, "token_acc": 0.2610368533812058 }, { "epoch": 1.1442392260334213, "grad_norm": 0.65037033338626, "learning_rate": 0.0002999573670491151, "loss": 3.357334613800049, "step": 1952, "token_acc": 0.26266939619784835 }, { "epoch": 1.1448255643506302, "grad_norm": 0.7189769920989445, "learning_rate": 0.000299957019751855, "loss": 3.327446460723877, "step": 1953, "token_acc": 0.26656079129405996 }, { "epoch": 1.1454119026678393, "grad_norm": 0.8095041811195423, "learning_rate": 0.00029995667104595067, "loss": 3.321824073791504, "step": 1954, "token_acc": 0.26718440404792615 }, { "epoch": 1.1459982409850484, "grad_norm": 0.7130710663788038, "learning_rate": 0.00029995632093140557, "loss": 3.3693463802337646, "step": 1955, "token_acc": 0.2625461369695096 }, { "epoch": 1.1465845793022573, "grad_norm": 0.6500165133231406, "learning_rate": 0.0002999559694082229, "loss": 3.375372886657715, "step": 1956, "token_acc": 0.2590127074425589 }, { "epoch": 1.1471709176194664, "grad_norm": 0.7787068125894548, "learning_rate": 0.0002999556164764059, "loss": 3.3446969985961914, "step": 1957, "token_acc": 0.2645939922055181 }, { "epoch": 1.1477572559366755, "grad_norm": 0.8195009412002108, "learning_rate": 0.00029995526213595804, "loss": 3.3114867210388184, "step": 1958, "token_acc": 0.2697273415397499 }, { "epoch": 1.1483435942538844, "grad_norm": 0.8081389286150346, "learning_rate": 0.0002999549063868825, "loss": 3.3430967330932617, "step": 1959, "token_acc": 0.2663364055895911 }, { "epoch": 1.1489299325710935, "grad_norm": 0.7759123761988718, "learning_rate": 0.00029995454922918265, "loss": 3.3480381965637207, "step": 1960, "token_acc": 0.2663450319231223 }, { "epoch": 1.1495162708883027, "grad_norm": 0.719609283634722, "learning_rate": 0.00029995419066286197, "loss": 3.284679412841797, "step": 1961, "token_acc": 0.271282795366428 }, { "epoch": 1.1501026092055116, "grad_norm": 0.7015104305571958, "learning_rate": 0.00029995383068792366, "loss": 3.3615212440490723, "step": 1962, "token_acc": 0.2632653326234596 }, { "epoch": 1.1506889475227207, "grad_norm": 0.5689924705042977, "learning_rate": 0.0002999534693043712, "loss": 3.3382349014282227, "step": 1963, "token_acc": 0.2646267546566649 }, { "epoch": 1.1512752858399296, "grad_norm": 0.6262209952327991, "learning_rate": 0.000299953106512208, "loss": 3.286991834640503, "step": 1964, "token_acc": 0.27078639958660117 }, { "epoch": 1.1518616241571387, "grad_norm": 0.7388822545265264, "learning_rate": 0.00029995274231143734, "loss": 3.3689193725585938, "step": 1965, "token_acc": 0.2620067219652344 }, { "epoch": 1.1524479624743478, "grad_norm": 0.7812418842743305, "learning_rate": 0.0002999523767020628, "loss": 3.352614641189575, "step": 1966, "token_acc": 0.2621751615766729 }, { "epoch": 1.1530343007915567, "grad_norm": 0.6051669723795385, "learning_rate": 0.0002999520096840877, "loss": 3.3416333198547363, "step": 1967, "token_acc": 0.2651756764996726 }, { "epoch": 1.1536206391087658, "grad_norm": 0.6706426620703033, "learning_rate": 0.0002999516412575156, "loss": 3.3312907218933105, "step": 1968, "token_acc": 0.26512975722841137 }, { "epoch": 1.1542069774259747, "grad_norm": 0.765885205827473, "learning_rate": 0.00029995127142234983, "loss": 3.288945436477661, "step": 1969, "token_acc": 0.27127552373375763 }, { "epoch": 1.1547933157431838, "grad_norm": 0.6795166117387362, "learning_rate": 0.00029995090017859394, "loss": 3.293445587158203, "step": 1970, "token_acc": 0.27237392049574793 }, { "epoch": 1.155379654060393, "grad_norm": 0.7916104091506182, "learning_rate": 0.0002999505275262514, "loss": 3.379434585571289, "step": 1971, "token_acc": 0.2616566646034608 }, { "epoch": 1.1559659923776018, "grad_norm": 0.7188022054827424, "learning_rate": 0.00029995015346532573, "loss": 3.304378032684326, "step": 1972, "token_acc": 0.2698788996793357 }, { "epoch": 1.156552330694811, "grad_norm": 0.569945433372692, "learning_rate": 0.0002999497779958204, "loss": 3.291182041168213, "step": 1973, "token_acc": 0.2715777536506419 }, { "epoch": 1.15713866901202, "grad_norm": 0.6201960741217912, "learning_rate": 0.000299949401117739, "loss": 3.3668785095214844, "step": 1974, "token_acc": 0.2601957358099146 }, { "epoch": 1.157725007329229, "grad_norm": 0.7329999714887171, "learning_rate": 0.00029994902283108504, "loss": 3.3286333084106445, "step": 1975, "token_acc": 0.267235791596362 }, { "epoch": 1.158311345646438, "grad_norm": 0.7213961361869522, "learning_rate": 0.000299948643135862, "loss": 3.282303810119629, "step": 1976, "token_acc": 0.2734704559352425 }, { "epoch": 1.1588976839636471, "grad_norm": 0.6427228218191209, "learning_rate": 0.0002999482620320736, "loss": 3.368710517883301, "step": 1977, "token_acc": 0.26174031511292994 }, { "epoch": 1.159484022280856, "grad_norm": 0.5613460399318453, "learning_rate": 0.00029994787951972333, "loss": 3.356588363647461, "step": 1978, "token_acc": 0.26274101309296055 }, { "epoch": 1.1600703605980651, "grad_norm": 0.6097384180377884, "learning_rate": 0.00029994749559881476, "loss": 3.332831859588623, "step": 1979, "token_acc": 0.26737021804919686 }, { "epoch": 1.160656698915274, "grad_norm": 0.6316714066209003, "learning_rate": 0.0002999471102693515, "loss": 3.332819938659668, "step": 1980, "token_acc": 0.2647212923507352 }, { "epoch": 1.1612430372324831, "grad_norm": 0.6795550957237514, "learning_rate": 0.00029994672353133726, "loss": 3.3542909622192383, "step": 1981, "token_acc": 0.2649227253279347 }, { "epoch": 1.1618293755496922, "grad_norm": 0.6264201925215888, "learning_rate": 0.00029994633538477555, "loss": 3.322784900665283, "step": 1982, "token_acc": 0.2667106243600671 }, { "epoch": 1.1624157138669011, "grad_norm": 0.6633182100862598, "learning_rate": 0.0002999459458296701, "loss": 3.3186521530151367, "step": 1983, "token_acc": 0.2680578769918358 }, { "epoch": 1.1630020521841102, "grad_norm": 0.6881980978248731, "learning_rate": 0.0002999455548660245, "loss": 3.3235373497009277, "step": 1984, "token_acc": 0.26742215491840887 }, { "epoch": 1.1635883905013193, "grad_norm": 0.5706343264002469, "learning_rate": 0.0002999451624938425, "loss": 3.3294029235839844, "step": 1985, "token_acc": 0.265123171920702 }, { "epoch": 1.1641747288185282, "grad_norm": 0.6942383787856404, "learning_rate": 0.00029994476871312783, "loss": 3.34446382522583, "step": 1986, "token_acc": 0.2650353065399591 }, { "epoch": 1.1647610671357373, "grad_norm": 0.7099706142371945, "learning_rate": 0.00029994437352388404, "loss": 3.2746572494506836, "step": 1987, "token_acc": 0.27445367288568606 }, { "epoch": 1.1653474054529465, "grad_norm": 0.5898544531498877, "learning_rate": 0.00029994397692611487, "loss": 3.3302407264709473, "step": 1988, "token_acc": 0.2661184072943243 }, { "epoch": 1.1659337437701554, "grad_norm": 0.6300487622753115, "learning_rate": 0.00029994357891982413, "loss": 3.3049159049987793, "step": 1989, "token_acc": 0.26821039513661626 }, { "epoch": 1.1665200820873645, "grad_norm": 0.6516852825734706, "learning_rate": 0.00029994317950501556, "loss": 3.3591113090515137, "step": 1990, "token_acc": 0.26239879899201113 }, { "epoch": 1.1671064204045734, "grad_norm": 0.6863933468120201, "learning_rate": 0.00029994277868169284, "loss": 3.338371753692627, "step": 1991, "token_acc": 0.2648220212142904 }, { "epoch": 1.1676927587217825, "grad_norm": 0.8924121778566915, "learning_rate": 0.00029994237644985977, "loss": 3.317682981491089, "step": 1992, "token_acc": 0.2689219445659727 }, { "epoch": 1.1682790970389916, "grad_norm": 1.0840220933324198, "learning_rate": 0.0002999419728095201, "loss": 3.3796143531799316, "step": 1993, "token_acc": 0.2584881158708978 }, { "epoch": 1.1688654353562005, "grad_norm": 1.2412303166841476, "learning_rate": 0.00029994156776067763, "loss": 3.3809375762939453, "step": 1994, "token_acc": 0.25951028021167694 }, { "epoch": 1.1694517736734096, "grad_norm": 0.7827911872446766, "learning_rate": 0.0002999411613033362, "loss": 3.3278648853302, "step": 1995, "token_acc": 0.26477981639327036 }, { "epoch": 1.1700381119906185, "grad_norm": 0.7929071837072107, "learning_rate": 0.0002999407534374996, "loss": 3.3798394203186035, "step": 1996, "token_acc": 0.2608343818359324 }, { "epoch": 1.1706244503078276, "grad_norm": 0.7986184390573784, "learning_rate": 0.00029994034416317165, "loss": 3.3762965202331543, "step": 1997, "token_acc": 0.25908969564208567 }, { "epoch": 1.1712107886250367, "grad_norm": 0.7999578194088334, "learning_rate": 0.00029993993348035626, "loss": 3.3295092582702637, "step": 1998, "token_acc": 0.26638387307608513 }, { "epoch": 1.1717971269422456, "grad_norm": 0.7126091151644154, "learning_rate": 0.00029993952138905724, "loss": 3.3363890647888184, "step": 1999, "token_acc": 0.2659289195422305 }, { "epoch": 1.1723834652594547, "grad_norm": 0.893795821143371, "learning_rate": 0.0002999391078892784, "loss": 3.3406131267547607, "step": 2000, "token_acc": 0.26555549402883144 }, { "epoch": 1.1729698035766638, "grad_norm": 1.0080838403923484, "learning_rate": 0.0002999386929810238, "loss": 3.332104206085205, "step": 2001, "token_acc": 0.265471036794742 }, { "epoch": 1.1735561418938727, "grad_norm": 0.7842580658459681, "learning_rate": 0.00029993827666429713, "loss": 3.2980542182922363, "step": 2002, "token_acc": 0.27015937623092384 }, { "epoch": 1.1741424802110818, "grad_norm": 0.6727063001572678, "learning_rate": 0.0002999378589391024, "loss": 3.3635566234588623, "step": 2003, "token_acc": 0.2622830703961223 }, { "epoch": 1.174728818528291, "grad_norm": 0.8024219203341931, "learning_rate": 0.00029993743980544354, "loss": 3.331637144088745, "step": 2004, "token_acc": 0.26550438163009815 }, { "epoch": 1.1753151568454998, "grad_norm": 0.6713497173993388, "learning_rate": 0.0002999370192633245, "loss": 3.337149143218994, "step": 2005, "token_acc": 0.265036261659202 }, { "epoch": 1.175901495162709, "grad_norm": 0.6415603183059132, "learning_rate": 0.0002999365973127492, "loss": 3.307196855545044, "step": 2006, "token_acc": 0.2695347666004744 }, { "epoch": 1.1764878334799178, "grad_norm": 0.5961756599005775, "learning_rate": 0.00029993617395372165, "loss": 3.362119197845459, "step": 2007, "token_acc": 0.260503534487951 }, { "epoch": 1.177074171797127, "grad_norm": 0.682835441678395, "learning_rate": 0.00029993574918624574, "loss": 3.3564674854278564, "step": 2008, "token_acc": 0.26026551084234156 }, { "epoch": 1.177660510114336, "grad_norm": 0.6625098307529516, "learning_rate": 0.0002999353230103255, "loss": 3.310089111328125, "step": 2009, "token_acc": 0.2677890816436882 }, { "epoch": 1.178246848431545, "grad_norm": 0.6123330858898528, "learning_rate": 0.0002999348954259649, "loss": 3.3231968879699707, "step": 2010, "token_acc": 0.26636346379378045 }, { "epoch": 1.178833186748754, "grad_norm": 0.5702395878478161, "learning_rate": 0.0002999344664331681, "loss": 3.2643089294433594, "step": 2011, "token_acc": 0.27394198960915056 }, { "epoch": 1.1794195250659631, "grad_norm": 0.6425606904272094, "learning_rate": 0.00029993403603193895, "loss": 3.3612194061279297, "step": 2012, "token_acc": 0.2606042466011451 }, { "epoch": 1.180005863383172, "grad_norm": 0.6899898103543229, "learning_rate": 0.0002999336042222816, "loss": 3.321394920349121, "step": 2013, "token_acc": 0.26843857951443134 }, { "epoch": 1.1805922017003811, "grad_norm": 0.593362004041284, "learning_rate": 0.00029993317100420006, "loss": 3.314401149749756, "step": 2014, "token_acc": 0.2682603445690519 }, { "epoch": 1.1811785400175903, "grad_norm": 0.5580832500825603, "learning_rate": 0.00029993273637769844, "loss": 3.3018605709075928, "step": 2015, "token_acc": 0.2695492449314311 }, { "epoch": 1.1817648783347992, "grad_norm": 0.6903425112203811, "learning_rate": 0.0002999323003427808, "loss": 3.3319854736328125, "step": 2016, "token_acc": 0.2660228246601794 }, { "epoch": 1.1823512166520083, "grad_norm": 0.6967150806433087, "learning_rate": 0.0002999318628994512, "loss": 3.294027090072632, "step": 2017, "token_acc": 0.2699184743561589 }, { "epoch": 1.1829375549692172, "grad_norm": 0.756701148809816, "learning_rate": 0.0002999314240477138, "loss": 3.3299171924591064, "step": 2018, "token_acc": 0.2662063167072781 }, { "epoch": 1.1835238932864263, "grad_norm": 0.628809448696654, "learning_rate": 0.00029993098378757274, "loss": 3.2967135906219482, "step": 2019, "token_acc": 0.2696584922702615 }, { "epoch": 1.1841102316036354, "grad_norm": 0.6411136343546889, "learning_rate": 0.0002999305421190321, "loss": 3.3544015884399414, "step": 2020, "token_acc": 0.26283174762143213 }, { "epoch": 1.1846965699208443, "grad_norm": 0.6333796484586045, "learning_rate": 0.00029993009904209604, "loss": 3.3268680572509766, "step": 2021, "token_acc": 0.265913253455844 }, { "epoch": 1.1852829082380534, "grad_norm": 0.6107410027655137, "learning_rate": 0.00029992965455676875, "loss": 3.3241801261901855, "step": 2022, "token_acc": 0.26734860660873355 }, { "epoch": 1.1858692465552623, "grad_norm": 0.7356535562613199, "learning_rate": 0.00029992920866305433, "loss": 3.2969963550567627, "step": 2023, "token_acc": 0.27054739263723854 }, { "epoch": 1.1864555848724714, "grad_norm": 0.7695591936268781, "learning_rate": 0.00029992876136095706, "loss": 3.3530123233795166, "step": 2024, "token_acc": 0.26591298729045404 }, { "epoch": 1.1870419231896805, "grad_norm": 0.6838717378354494, "learning_rate": 0.00029992831265048117, "loss": 3.3304524421691895, "step": 2025, "token_acc": 0.26564195578705857 }, { "epoch": 1.1876282615068894, "grad_norm": 0.7090986012275077, "learning_rate": 0.00029992786253163077, "loss": 3.334564447402954, "step": 2026, "token_acc": 0.263806425270407 }, { "epoch": 1.1882145998240985, "grad_norm": 0.8212285587289102, "learning_rate": 0.0002999274110044101, "loss": 3.3476967811584473, "step": 2027, "token_acc": 0.2638440899530132 }, { "epoch": 1.1888009381413076, "grad_norm": 0.6842681880156068, "learning_rate": 0.00029992695806882344, "loss": 3.309312105178833, "step": 2028, "token_acc": 0.26729882987588954 }, { "epoch": 1.1893872764585165, "grad_norm": 0.7247761063798277, "learning_rate": 0.00029992650372487507, "loss": 3.32065486907959, "step": 2029, "token_acc": 0.2655774066670965 }, { "epoch": 1.1899736147757256, "grad_norm": 0.7610741636989733, "learning_rate": 0.0002999260479725692, "loss": 3.3356432914733887, "step": 2030, "token_acc": 0.26399924497307664 }, { "epoch": 1.1905599530929347, "grad_norm": 0.7515939632275938, "learning_rate": 0.0002999255908119101, "loss": 3.298417091369629, "step": 2031, "token_acc": 0.2671629509828957 }, { "epoch": 1.1911462914101436, "grad_norm": 0.6858455339466643, "learning_rate": 0.0002999251322429022, "loss": 3.303715229034424, "step": 2032, "token_acc": 0.269738735405361 }, { "epoch": 1.1917326297273527, "grad_norm": 0.6665509936380439, "learning_rate": 0.0002999246722655497, "loss": 3.3836960792541504, "step": 2033, "token_acc": 0.2594626330508719 }, { "epoch": 1.1923189680445616, "grad_norm": 0.8279750421855766, "learning_rate": 0.00029992421087985684, "loss": 3.2851297855377197, "step": 2034, "token_acc": 0.27180549127884934 }, { "epoch": 1.1929053063617707, "grad_norm": 0.8588560314510714, "learning_rate": 0.0002999237480858281, "loss": 3.3636527061462402, "step": 2035, "token_acc": 0.26119030348901995 }, { "epoch": 1.1934916446789798, "grad_norm": 0.6777087470019907, "learning_rate": 0.0002999232838834678, "loss": 3.3011584281921387, "step": 2036, "token_acc": 0.27084546902627493 }, { "epoch": 1.1940779829961887, "grad_norm": 0.6595819956758585, "learning_rate": 0.0002999228182727802, "loss": 3.333683490753174, "step": 2037, "token_acc": 0.2648266417208313 }, { "epoch": 1.1946643213133978, "grad_norm": 0.6792667286840062, "learning_rate": 0.0002999223512537698, "loss": 3.2768514156341553, "step": 2038, "token_acc": 0.27290284993735087 }, { "epoch": 1.195250659630607, "grad_norm": 0.7246748573378871, "learning_rate": 0.00029992188282644094, "loss": 3.319718599319458, "step": 2039, "token_acc": 0.26794886323207356 }, { "epoch": 1.1958369979478158, "grad_norm": 0.6551161235233398, "learning_rate": 0.00029992141299079795, "loss": 3.3104288578033447, "step": 2040, "token_acc": 0.26690501895865604 }, { "epoch": 1.196423336265025, "grad_norm": 0.6345854619264507, "learning_rate": 0.00029992094174684534, "loss": 3.337841033935547, "step": 2041, "token_acc": 0.26508669058902135 }, { "epoch": 1.197009674582234, "grad_norm": 0.605743758218586, "learning_rate": 0.00029992046909458757, "loss": 3.3117339611053467, "step": 2042, "token_acc": 0.26860436925895 }, { "epoch": 1.197596012899443, "grad_norm": 0.6815455246425074, "learning_rate": 0.0002999199950340289, "loss": 3.359574317932129, "step": 2043, "token_acc": 0.2614712308812819 }, { "epoch": 1.198182351216652, "grad_norm": 0.6085384835152693, "learning_rate": 0.000299919519565174, "loss": 3.288266181945801, "step": 2044, "token_acc": 0.2713576653036435 }, { "epoch": 1.198768689533861, "grad_norm": 0.6388341826321748, "learning_rate": 0.00029991904268802716, "loss": 3.3446741104125977, "step": 2045, "token_acc": 0.26320325363280656 }, { "epoch": 1.19935502785107, "grad_norm": 0.6225496120296988, "learning_rate": 0.00029991856440259295, "loss": 3.305783271789551, "step": 2046, "token_acc": 0.27007207788571536 }, { "epoch": 1.1999413661682792, "grad_norm": 0.6117408936972606, "learning_rate": 0.00029991808470887586, "loss": 3.259596586227417, "step": 2047, "token_acc": 0.27576484625261 }, { "epoch": 1.200527704485488, "grad_norm": 0.6212806005482442, "learning_rate": 0.0002999176036068804, "loss": 3.3930552005767822, "step": 2048, "token_acc": 0.2598445319987222 }, { "epoch": 1.2011140428026972, "grad_norm": 0.6205630689809001, "learning_rate": 0.000299917121096611, "loss": 3.3198723793029785, "step": 2049, "token_acc": 0.26617474176503886 }, { "epoch": 1.201700381119906, "grad_norm": 0.5045313452997845, "learning_rate": 0.0002999166371780723, "loss": 3.3389158248901367, "step": 2050, "token_acc": 0.2651433625332621 }, { "epoch": 1.2022867194371152, "grad_norm": 0.6515204260945489, "learning_rate": 0.0002999161518512688, "loss": 3.2973780632019043, "step": 2051, "token_acc": 0.2703869284946264 }, { "epoch": 1.2028730577543243, "grad_norm": 0.7038350642826086, "learning_rate": 0.0002999156651162051, "loss": 3.31166934967041, "step": 2052, "token_acc": 0.26836957652689625 }, { "epoch": 1.2034593960715332, "grad_norm": 0.6779333257048654, "learning_rate": 0.0002999151769728857, "loss": 3.2851827144622803, "step": 2053, "token_acc": 0.27303459285729387 }, { "epoch": 1.2040457343887423, "grad_norm": 0.6554007635654845, "learning_rate": 0.00029991468742131527, "loss": 3.3222532272338867, "step": 2054, "token_acc": 0.2669137360037849 }, { "epoch": 1.2046320727059514, "grad_norm": 0.6947073524264371, "learning_rate": 0.00029991419646149836, "loss": 3.258507490158081, "step": 2055, "token_acc": 0.2737537868355825 }, { "epoch": 1.2052184110231603, "grad_norm": 0.9867715450035401, "learning_rate": 0.00029991370409343954, "loss": 3.3458991050720215, "step": 2056, "token_acc": 0.2644667105110449 }, { "epoch": 1.2058047493403694, "grad_norm": 1.014434252127322, "learning_rate": 0.0002999132103171435, "loss": 3.3140206336975098, "step": 2057, "token_acc": 0.2679720580220517 }, { "epoch": 1.2063910876575785, "grad_norm": 0.8976812794199026, "learning_rate": 0.0002999127151326149, "loss": 3.374518871307373, "step": 2058, "token_acc": 0.26009924829384207 }, { "epoch": 1.2069774259747874, "grad_norm": 0.7563778389108587, "learning_rate": 0.0002999122185398583, "loss": 3.319183349609375, "step": 2059, "token_acc": 0.26805570631252384 }, { "epoch": 1.2075637642919965, "grad_norm": 0.6415493707011815, "learning_rate": 0.00029991172053887844, "loss": 3.3099513053894043, "step": 2060, "token_acc": 0.2680718605018229 }, { "epoch": 1.2081501026092054, "grad_norm": 0.730315208009389, "learning_rate": 0.00029991122112968, "loss": 3.347346305847168, "step": 2061, "token_acc": 0.26325385942183804 }, { "epoch": 1.2087364409264145, "grad_norm": 0.7558494702149601, "learning_rate": 0.0002999107203122676, "loss": 3.3253543376922607, "step": 2062, "token_acc": 0.26826446543544563 }, { "epoch": 1.2093227792436236, "grad_norm": 0.7042346754398479, "learning_rate": 0.000299910218086646, "loss": 3.3066747188568115, "step": 2063, "token_acc": 0.2686815020082981 }, { "epoch": 1.2099091175608325, "grad_norm": 0.6783900312066156, "learning_rate": 0.00029990971445281994, "loss": 3.24532413482666, "step": 2064, "token_acc": 0.27718448487988084 }, { "epoch": 1.2104954558780416, "grad_norm": 0.6581625381317505, "learning_rate": 0.0002999092094107941, "loss": 3.334491014480591, "step": 2065, "token_acc": 0.26478884441027545 }, { "epoch": 1.2110817941952507, "grad_norm": 0.6606948607208916, "learning_rate": 0.0002999087029605732, "loss": 3.343390941619873, "step": 2066, "token_acc": 0.26227076693719525 }, { "epoch": 1.2116681325124596, "grad_norm": 0.6933547540981225, "learning_rate": 0.00029990819510216206, "loss": 3.2889957427978516, "step": 2067, "token_acc": 0.2707499161853571 }, { "epoch": 1.2122544708296688, "grad_norm": 0.6648536542965798, "learning_rate": 0.00029990768583556545, "loss": 3.3179306983947754, "step": 2068, "token_acc": 0.2670377563584098 }, { "epoch": 1.2128408091468779, "grad_norm": 0.6622009355891474, "learning_rate": 0.00029990717516078814, "loss": 3.267537832260132, "step": 2069, "token_acc": 0.27337674834722997 }, { "epoch": 1.2134271474640868, "grad_norm": 0.5744462168261999, "learning_rate": 0.00029990666307783495, "loss": 3.283907890319824, "step": 2070, "token_acc": 0.2718030100029918 }, { "epoch": 1.2140134857812959, "grad_norm": 0.658076160083686, "learning_rate": 0.0002999061495867106, "loss": 3.3672618865966797, "step": 2071, "token_acc": 0.26128527523143646 }, { "epoch": 1.2145998240985048, "grad_norm": 0.5899163347167272, "learning_rate": 0.00029990563468741997, "loss": 3.342081069946289, "step": 2072, "token_acc": 0.26453657439598416 }, { "epoch": 1.2151861624157139, "grad_norm": 0.6603644157144087, "learning_rate": 0.00029990511837996793, "loss": 3.330611228942871, "step": 2073, "token_acc": 0.2653444757807728 }, { "epoch": 1.215772500732923, "grad_norm": 0.6987118690256372, "learning_rate": 0.0002999046006643593, "loss": 3.318565845489502, "step": 2074, "token_acc": 0.2684502602749571 }, { "epoch": 1.2163588390501319, "grad_norm": 0.7429090514141424, "learning_rate": 0.00029990408154059896, "loss": 3.3112151622772217, "step": 2075, "token_acc": 0.2694966418029221 }, { "epoch": 1.216945177367341, "grad_norm": 0.8263653609898882, "learning_rate": 0.0002999035610086918, "loss": 3.2740535736083984, "step": 2076, "token_acc": 0.27266720219740354 }, { "epoch": 1.2175315156845499, "grad_norm": 0.5784419288905609, "learning_rate": 0.0002999030390686426, "loss": 3.2838969230651855, "step": 2077, "token_acc": 0.2708504916925395 }, { "epoch": 1.218117854001759, "grad_norm": 0.5494118620339634, "learning_rate": 0.0002999025157204564, "loss": 3.299497127532959, "step": 2078, "token_acc": 0.26937650438698657 }, { "epoch": 1.218704192318968, "grad_norm": 0.7012737830317644, "learning_rate": 0.00029990199096413805, "loss": 3.2877395153045654, "step": 2079, "token_acc": 0.27112755504958824 }, { "epoch": 1.219290530636177, "grad_norm": 0.6027322829673991, "learning_rate": 0.0002999014647996925, "loss": 3.3021931648254395, "step": 2080, "token_acc": 0.26911808449310193 }, { "epoch": 1.219876868953386, "grad_norm": 0.6353499255322259, "learning_rate": 0.0002999009372271247, "loss": 3.288527011871338, "step": 2081, "token_acc": 0.27196617088599173 }, { "epoch": 1.2204632072705952, "grad_norm": 0.6948989016078879, "learning_rate": 0.00029990040824643955, "loss": 3.3416969776153564, "step": 2082, "token_acc": 0.2640719622441191 }, { "epoch": 1.221049545587804, "grad_norm": 0.5545287494977666, "learning_rate": 0.00029989987785764206, "loss": 3.3221025466918945, "step": 2083, "token_acc": 0.26650663705858285 }, { "epoch": 1.2216358839050132, "grad_norm": 0.5481319206238314, "learning_rate": 0.0002998993460607372, "loss": 3.2976815700531006, "step": 2084, "token_acc": 0.2691629015802952 }, { "epoch": 1.2222222222222223, "grad_norm": 0.660616497730608, "learning_rate": 0.00029989881285573004, "loss": 3.343980312347412, "step": 2085, "token_acc": 0.2617564609454103 }, { "epoch": 1.2228085605394312, "grad_norm": 0.6250957253394741, "learning_rate": 0.00029989827824262544, "loss": 3.300699472427368, "step": 2086, "token_acc": 0.2693041229655468 }, { "epoch": 1.2233948988566403, "grad_norm": 0.7360852306767541, "learning_rate": 0.00029989774222142856, "loss": 3.3323047161102295, "step": 2087, "token_acc": 0.2669919517928538 }, { "epoch": 1.2239812371738492, "grad_norm": 0.5814890530796598, "learning_rate": 0.0002998972047921444, "loss": 3.2863571643829346, "step": 2088, "token_acc": 0.2708321311853791 }, { "epoch": 1.2245675754910583, "grad_norm": 0.6940225721146759, "learning_rate": 0.00029989666595477794, "loss": 3.307682991027832, "step": 2089, "token_acc": 0.2691685352651767 }, { "epoch": 1.2251539138082674, "grad_norm": 0.680532291131526, "learning_rate": 0.0002998961257093343, "loss": 3.308253049850464, "step": 2090, "token_acc": 0.2669477857321235 }, { "epoch": 1.2257402521254763, "grad_norm": 0.6947177813801204, "learning_rate": 0.0002998955840558186, "loss": 3.3120388984680176, "step": 2091, "token_acc": 0.2693546326558857 }, { "epoch": 1.2263265904426854, "grad_norm": 0.5968647826683237, "learning_rate": 0.0002998950409942358, "loss": 3.290501117706299, "step": 2092, "token_acc": 0.26951688733753715 }, { "epoch": 1.2269129287598945, "grad_norm": 0.5968713433910895, "learning_rate": 0.00029989449652459107, "loss": 3.3237626552581787, "step": 2093, "token_acc": 0.26627131744870497 }, { "epoch": 1.2274992670771034, "grad_norm": 0.6173449887506648, "learning_rate": 0.00029989395064688963, "loss": 3.336862564086914, "step": 2094, "token_acc": 0.2635481364488165 }, { "epoch": 1.2280856053943126, "grad_norm": 0.5203168234892392, "learning_rate": 0.00029989340336113645, "loss": 3.3187806606292725, "step": 2095, "token_acc": 0.26681958605693373 }, { "epoch": 1.2286719437115217, "grad_norm": 0.5637549069752911, "learning_rate": 0.0002998928546673367, "loss": 3.275979995727539, "step": 2096, "token_acc": 0.27155464321759 }, { "epoch": 1.2292582820287306, "grad_norm": 0.5190385652435072, "learning_rate": 0.00029989230456549555, "loss": 3.337808132171631, "step": 2097, "token_acc": 0.26503801536820215 }, { "epoch": 1.2298446203459397, "grad_norm": 0.5540363970569976, "learning_rate": 0.00029989175305561824, "loss": 3.313361883163452, "step": 2098, "token_acc": 0.26775947617046136 }, { "epoch": 1.2304309586631486, "grad_norm": 0.6106092818476985, "learning_rate": 0.0002998912001377099, "loss": 3.3052308559417725, "step": 2099, "token_acc": 0.26819892095965714 }, { "epoch": 1.2310172969803577, "grad_norm": 0.5022731012521641, "learning_rate": 0.00029989064581177567, "loss": 3.3475341796875, "step": 2100, "token_acc": 0.2632253307149824 }, { "epoch": 1.2316036352975668, "grad_norm": 0.5254456969685513, "learning_rate": 0.0002998900900778208, "loss": 3.3345439434051514, "step": 2101, "token_acc": 0.2657941701368233 }, { "epoch": 1.2321899736147757, "grad_norm": 0.5776643481379248, "learning_rate": 0.0002998895329358506, "loss": 3.4040868282318115, "step": 2102, "token_acc": 0.25648854556837114 }, { "epoch": 1.2327763119319848, "grad_norm": 0.7126250083185364, "learning_rate": 0.0002998889743858701, "loss": 3.308574676513672, "step": 2103, "token_acc": 0.271233421251954 }, { "epoch": 1.2333626502491937, "grad_norm": 0.7493689898375907, "learning_rate": 0.0002998884144278847, "loss": 3.332040309906006, "step": 2104, "token_acc": 0.2659551196009431 }, { "epoch": 1.2339489885664028, "grad_norm": 0.6717319061364705, "learning_rate": 0.00029988785306189964, "loss": 3.3404903411865234, "step": 2105, "token_acc": 0.26252485571560213 }, { "epoch": 1.234535326883612, "grad_norm": 0.5721399807371714, "learning_rate": 0.0002998872902879202, "loss": 3.326077938079834, "step": 2106, "token_acc": 0.2647285406209432 }, { "epoch": 1.2351216652008208, "grad_norm": 0.7785647488256546, "learning_rate": 0.0002998867261059516, "loss": 3.348332405090332, "step": 2107, "token_acc": 0.26316347234966464 }, { "epoch": 1.23570800351803, "grad_norm": 0.7699710288909138, "learning_rate": 0.00029988616051599917, "loss": 3.365997552871704, "step": 2108, "token_acc": 0.26083881518406926 }, { "epoch": 1.236294341835239, "grad_norm": 0.5983932136814264, "learning_rate": 0.0002998855935180683, "loss": 3.294644832611084, "step": 2109, "token_acc": 0.26960612325314015 }, { "epoch": 1.236880680152448, "grad_norm": 0.5634442210006753, "learning_rate": 0.00029988502511216425, "loss": 3.3013739585876465, "step": 2110, "token_acc": 0.26949246456810777 }, { "epoch": 1.237467018469657, "grad_norm": 0.6218794532833704, "learning_rate": 0.0002998844552982923, "loss": 3.334766387939453, "step": 2111, "token_acc": 0.26451730324224537 }, { "epoch": 1.2380533567868661, "grad_norm": 0.6565069088306307, "learning_rate": 0.0002998838840764579, "loss": 3.270565986633301, "step": 2112, "token_acc": 0.2734178335559079 }, { "epoch": 1.238639695104075, "grad_norm": 0.70861216376591, "learning_rate": 0.00029988331144666634, "loss": 3.326977252960205, "step": 2113, "token_acc": 0.26604877996723814 }, { "epoch": 1.2392260334212841, "grad_norm": 0.6303386920349181, "learning_rate": 0.0002998827374089231, "loss": 3.306929588317871, "step": 2114, "token_acc": 0.26793842901214593 }, { "epoch": 1.239812371738493, "grad_norm": 0.7554077898506995, "learning_rate": 0.0002998821619632335, "loss": 3.3262505531311035, "step": 2115, "token_acc": 0.2663884511126521 }, { "epoch": 1.2403987100557021, "grad_norm": 0.7159698777971694, "learning_rate": 0.0002998815851096029, "loss": 3.327603816986084, "step": 2116, "token_acc": 0.26626987362413373 }, { "epoch": 1.2409850483729112, "grad_norm": 0.6951961866627198, "learning_rate": 0.00029988100684803684, "loss": 3.2882933616638184, "step": 2117, "token_acc": 0.2700128567755207 }, { "epoch": 1.2415713866901201, "grad_norm": 0.6730787396317341, "learning_rate": 0.0002998804271785407, "loss": 3.351698875427246, "step": 2118, "token_acc": 0.2627433005467766 }, { "epoch": 1.2421577250073292, "grad_norm": 0.5918778624078417, "learning_rate": 0.00029987984610111985, "loss": 3.31082820892334, "step": 2119, "token_acc": 0.2667638290120026 }, { "epoch": 1.2427440633245384, "grad_norm": 0.5570989689294167, "learning_rate": 0.00029987926361577983, "loss": 3.3346266746520996, "step": 2120, "token_acc": 0.26360399516591426 }, { "epoch": 1.2433304016417472, "grad_norm": 0.5726842910744052, "learning_rate": 0.0002998786797225261, "loss": 3.3188109397888184, "step": 2121, "token_acc": 0.26551651479008254 }, { "epoch": 1.2439167399589564, "grad_norm": 0.5654421812957456, "learning_rate": 0.0002998780944213642, "loss": 3.3154945373535156, "step": 2122, "token_acc": 0.26729529812825625 }, { "epoch": 1.2445030782761655, "grad_norm": 0.5035371758236616, "learning_rate": 0.0002998775077122995, "loss": 3.2638792991638184, "step": 2123, "token_acc": 0.2725763459344415 }, { "epoch": 1.2450894165933744, "grad_norm": 0.5344870894728856, "learning_rate": 0.00029987691959533757, "loss": 3.3320822715759277, "step": 2124, "token_acc": 0.2656573648055263 }, { "epoch": 1.2456757549105835, "grad_norm": 0.550705871090821, "learning_rate": 0.00029987633007048394, "loss": 3.335256576538086, "step": 2125, "token_acc": 0.26625558448750514 }, { "epoch": 1.2462620932277924, "grad_norm": 0.6969259635416923, "learning_rate": 0.0002998757391377442, "loss": 3.3727118968963623, "step": 2126, "token_acc": 0.25852920045160627 }, { "epoch": 1.2468484315450015, "grad_norm": 0.7668450938910254, "learning_rate": 0.0002998751467971238, "loss": 3.354109048843384, "step": 2127, "token_acc": 0.2606746758061422 }, { "epoch": 1.2474347698622106, "grad_norm": 0.828645404198967, "learning_rate": 0.0002998745530486284, "loss": 3.310776472091675, "step": 2128, "token_acc": 0.2677291944231717 }, { "epoch": 1.2480211081794195, "grad_norm": 0.8397830233194004, "learning_rate": 0.00029987395789226343, "loss": 3.284890651702881, "step": 2129, "token_acc": 0.27193671556716664 }, { "epoch": 1.2486074464966286, "grad_norm": 0.8319792205900353, "learning_rate": 0.0002998733613280347, "loss": 3.3155770301818848, "step": 2130, "token_acc": 0.2663592983399422 }, { "epoch": 1.2491937848138375, "grad_norm": 0.6358709918855738, "learning_rate": 0.00029987276335594764, "loss": 3.3107969760894775, "step": 2131, "token_acc": 0.26838361579233844 }, { "epoch": 1.2497801231310466, "grad_norm": 0.8187670448347658, "learning_rate": 0.000299872163976008, "loss": 3.327176570892334, "step": 2132, "token_acc": 0.265643496393001 }, { "epoch": 1.2503664614482557, "grad_norm": 0.8799754377740789, "learning_rate": 0.00029987156318822127, "loss": 3.3459300994873047, "step": 2133, "token_acc": 0.2619655418437908 }, { "epoch": 1.2509527997654648, "grad_norm": 0.8607173781795262, "learning_rate": 0.00029987096099259316, "loss": 3.3512959480285645, "step": 2134, "token_acc": 0.2622847739708617 }, { "epoch": 1.2515391380826737, "grad_norm": 0.7310525150665035, "learning_rate": 0.0002998703573891293, "loss": 3.311583995819092, "step": 2135, "token_acc": 0.2678241671033153 }, { "epoch": 1.2521254763998828, "grad_norm": 0.6104269828926102, "learning_rate": 0.0002998697523778354, "loss": 3.307077169418335, "step": 2136, "token_acc": 0.26972578460034025 }, { "epoch": 1.2527118147170917, "grad_norm": 0.6156106157703805, "learning_rate": 0.00029986914595871714, "loss": 3.2971267700195312, "step": 2137, "token_acc": 0.26959025959009825 }, { "epoch": 1.2532981530343008, "grad_norm": 0.7302402155802491, "learning_rate": 0.00029986853813178025, "loss": 3.3069005012512207, "step": 2138, "token_acc": 0.26683005227004186 }, { "epoch": 1.25388449135151, "grad_norm": 0.6563072697916883, "learning_rate": 0.0002998679288970303, "loss": 3.2868282794952393, "step": 2139, "token_acc": 0.2711505057068376 }, { "epoch": 1.2544708296687188, "grad_norm": 0.6429617948728832, "learning_rate": 0.0002998673182544732, "loss": 3.359733819961548, "step": 2140, "token_acc": 0.26204009305651266 }, { "epoch": 1.255057167985928, "grad_norm": 0.7273487865264391, "learning_rate": 0.00029986670620411453, "loss": 3.303636074066162, "step": 2141, "token_acc": 0.2696956868148483 }, { "epoch": 1.2556435063031368, "grad_norm": 0.6274279233808178, "learning_rate": 0.00029986609274596014, "loss": 3.3028361797332764, "step": 2142, "token_acc": 0.26970384555553767 }, { "epoch": 1.256229844620346, "grad_norm": 0.6306918888174531, "learning_rate": 0.00029986547788001574, "loss": 3.300806999206543, "step": 2143, "token_acc": 0.2689138383225023 }, { "epoch": 1.256816182937555, "grad_norm": 0.734844206472174, "learning_rate": 0.00029986486160628713, "loss": 3.381234645843506, "step": 2144, "token_acc": 0.26008237340921425 }, { "epoch": 1.257402521254764, "grad_norm": 0.5949168326818668, "learning_rate": 0.0002998642439247801, "loss": 3.3264591693878174, "step": 2145, "token_acc": 0.26503336622213736 }, { "epoch": 1.257988859571973, "grad_norm": 0.6565293655178087, "learning_rate": 0.00029986362483550044, "loss": 3.32952880859375, "step": 2146, "token_acc": 0.2647331726408004 }, { "epoch": 1.258575197889182, "grad_norm": 0.621292523357113, "learning_rate": 0.00029986300433845395, "loss": 3.257190704345703, "step": 2147, "token_acc": 0.27434129537484353 }, { "epoch": 1.259161536206391, "grad_norm": 0.5922717530220168, "learning_rate": 0.00029986238243364646, "loss": 3.345968008041382, "step": 2148, "token_acc": 0.26301378221506966 }, { "epoch": 1.2597478745236002, "grad_norm": 0.5224942640936433, "learning_rate": 0.00029986175912108387, "loss": 3.2582998275756836, "step": 2149, "token_acc": 0.2728868559638074 }, { "epoch": 1.2603342128408093, "grad_norm": 0.5731204151215652, "learning_rate": 0.000299861134400772, "loss": 3.289397716522217, "step": 2150, "token_acc": 0.2709508107414401 }, { "epoch": 1.2609205511580182, "grad_norm": 0.6082071246763562, "learning_rate": 0.00029986050827271666, "loss": 3.2928662300109863, "step": 2151, "token_acc": 0.26959458607957054 }, { "epoch": 1.2615068894752273, "grad_norm": 0.5060442865288772, "learning_rate": 0.0002998598807369238, "loss": 3.28161883354187, "step": 2152, "token_acc": 0.2724963848125584 }, { "epoch": 1.2620932277924362, "grad_norm": 0.5695834381928475, "learning_rate": 0.0002998592517933993, "loss": 3.289191246032715, "step": 2153, "token_acc": 0.270518978365867 }, { "epoch": 1.2626795661096453, "grad_norm": 0.5509504992003815, "learning_rate": 0.0002998586214421491, "loss": 3.3023438453674316, "step": 2154, "token_acc": 0.26857629581654063 }, { "epoch": 1.2632659044268544, "grad_norm": 0.5772255455075851, "learning_rate": 0.0002998579896831791, "loss": 3.2766575813293457, "step": 2155, "token_acc": 0.2724554690592426 }, { "epoch": 1.2638522427440633, "grad_norm": 0.5637676735137469, "learning_rate": 0.00029985735651649514, "loss": 3.2982006072998047, "step": 2156, "token_acc": 0.26910432723269 }, { "epoch": 1.2644385810612724, "grad_norm": 0.5368074090825047, "learning_rate": 0.0002998567219421033, "loss": 3.3654940128326416, "step": 2157, "token_acc": 0.25963998813782174 }, { "epoch": 1.2650249193784813, "grad_norm": 0.5540173743536623, "learning_rate": 0.00029985608596000955, "loss": 3.3347549438476562, "step": 2158, "token_acc": 0.2637975215842937 }, { "epoch": 1.2656112576956904, "grad_norm": 0.5834929108363435, "learning_rate": 0.00029985544857021973, "loss": 3.280578136444092, "step": 2159, "token_acc": 0.2717119415351722 }, { "epoch": 1.2661975960128995, "grad_norm": 0.5646854900649066, "learning_rate": 0.00029985480977273997, "loss": 3.2801413536071777, "step": 2160, "token_acc": 0.2709653277562486 }, { "epoch": 1.2667839343301086, "grad_norm": 0.5810387304476278, "learning_rate": 0.00029985416956757613, "loss": 3.297654151916504, "step": 2161, "token_acc": 0.2692621505658639 }, { "epoch": 1.2673702726473175, "grad_norm": 0.5157863683401149, "learning_rate": 0.0002998535279547343, "loss": 3.268155336380005, "step": 2162, "token_acc": 0.27402030403450633 }, { "epoch": 1.2679566109645266, "grad_norm": 0.5262582893830188, "learning_rate": 0.00029985288493422055, "loss": 3.33351469039917, "step": 2163, "token_acc": 0.2630807953017006 }, { "epoch": 1.2685429492817355, "grad_norm": 0.5693554269915471, "learning_rate": 0.00029985224050604084, "loss": 3.2613868713378906, "step": 2164, "token_acc": 0.272203195903776 }, { "epoch": 1.2691292875989446, "grad_norm": 0.5628776134524188, "learning_rate": 0.0002998515946702013, "loss": 3.261300563812256, "step": 2165, "token_acc": 0.27635754927622497 }, { "epoch": 1.2697156259161537, "grad_norm": 0.5315611500611698, "learning_rate": 0.00029985094742670794, "loss": 3.29467511177063, "step": 2166, "token_acc": 0.2689114566322074 }, { "epoch": 1.2703019642333626, "grad_norm": 0.6780690989771148, "learning_rate": 0.0002998502987755668, "loss": 3.312556743621826, "step": 2167, "token_acc": 0.2678858355306526 }, { "epoch": 1.2708883025505717, "grad_norm": 0.7336877112335726, "learning_rate": 0.0002998496487167841, "loss": 3.3274106979370117, "step": 2168, "token_acc": 0.2637095072144464 }, { "epoch": 1.2714746408677806, "grad_norm": 0.7929841867017643, "learning_rate": 0.00029984899725036586, "loss": 3.3308005332946777, "step": 2169, "token_acc": 0.26378217565644807 }, { "epoch": 1.2720609791849897, "grad_norm": 0.7609426399054392, "learning_rate": 0.0002998483443763182, "loss": 3.307753086090088, "step": 2170, "token_acc": 0.2681502766484272 }, { "epoch": 1.2726473175021988, "grad_norm": 0.670693378394733, "learning_rate": 0.0002998476900946473, "loss": 3.342557668685913, "step": 2171, "token_acc": 0.2627241317916868 }, { "epoch": 1.2732336558194077, "grad_norm": 0.7124681031691964, "learning_rate": 0.0002998470344053592, "loss": 3.3329200744628906, "step": 2172, "token_acc": 0.26445544732905407 }, { "epoch": 1.2738199941366168, "grad_norm": 0.8109197732369587, "learning_rate": 0.0002998463773084602, "loss": 3.3145103454589844, "step": 2173, "token_acc": 0.26800017507769075 }, { "epoch": 1.2744063324538257, "grad_norm": 0.7220782542966019, "learning_rate": 0.0002998457188039564, "loss": 3.304135799407959, "step": 2174, "token_acc": 0.26850617613080957 }, { "epoch": 1.2749926707710348, "grad_norm": 0.5695688236095846, "learning_rate": 0.00029984505889185393, "loss": 3.3218445777893066, "step": 2175, "token_acc": 0.265615223517853 }, { "epoch": 1.275579009088244, "grad_norm": 0.5632506471567461, "learning_rate": 0.0002998443975721591, "loss": 3.31032657623291, "step": 2176, "token_acc": 0.26712138626345544 }, { "epoch": 1.276165347405453, "grad_norm": 0.5493900285684672, "learning_rate": 0.0002998437348448781, "loss": 3.300459146499634, "step": 2177, "token_acc": 0.2686587265487423 }, { "epoch": 1.276751685722662, "grad_norm": 0.49386124711919854, "learning_rate": 0.0002998430707100171, "loss": 3.303426504135132, "step": 2178, "token_acc": 0.2671934050417376 }, { "epoch": 1.277338024039871, "grad_norm": 0.5057559180714568, "learning_rate": 0.00029984240516758235, "loss": 3.2868337631225586, "step": 2179, "token_acc": 0.2698031723822465 }, { "epoch": 1.27792436235708, "grad_norm": 0.5357431217413532, "learning_rate": 0.0002998417382175802, "loss": 3.3203537464141846, "step": 2180, "token_acc": 0.26492578571315745 }, { "epoch": 1.278510700674289, "grad_norm": 0.6264817217942751, "learning_rate": 0.0002998410698600167, "loss": 3.31192684173584, "step": 2181, "token_acc": 0.2670937979174721 }, { "epoch": 1.2790970389914982, "grad_norm": 0.6113179654224454, "learning_rate": 0.00029984040009489835, "loss": 3.338350296020508, "step": 2182, "token_acc": 0.26445690526793636 }, { "epoch": 1.279683377308707, "grad_norm": 0.6258366014006113, "learning_rate": 0.00029983972892223137, "loss": 3.2962443828582764, "step": 2183, "token_acc": 0.2697837823874351 }, { "epoch": 1.2802697156259162, "grad_norm": 0.5354736116305405, "learning_rate": 0.00029983905634202196, "loss": 3.266925811767578, "step": 2184, "token_acc": 0.27151789719703806 }, { "epoch": 1.280856053943125, "grad_norm": 0.6242763361588439, "learning_rate": 0.00029983838235427663, "loss": 3.326183557510376, "step": 2185, "token_acc": 0.26395035729367183 }, { "epoch": 1.2814423922603342, "grad_norm": 0.6884774051788985, "learning_rate": 0.0002998377069590015, "loss": 3.266263484954834, "step": 2186, "token_acc": 0.27291413885433147 }, { "epoch": 1.2820287305775433, "grad_norm": 0.7406855818729945, "learning_rate": 0.0002998370301562031, "loss": 3.340160846710205, "step": 2187, "token_acc": 0.2630227821000322 }, { "epoch": 1.2826150688947524, "grad_norm": 0.8701427670352354, "learning_rate": 0.0002998363519458877, "loss": 3.3322606086730957, "step": 2188, "token_acc": 0.26376326131152666 }, { "epoch": 1.2832014072119613, "grad_norm": 0.7544234078823233, "learning_rate": 0.00029983567232806164, "loss": 3.2814273834228516, "step": 2189, "token_acc": 0.2704299604478318 }, { "epoch": 1.2837877455291704, "grad_norm": 0.6431713594079861, "learning_rate": 0.0002998349913027314, "loss": 3.301873207092285, "step": 2190, "token_acc": 0.2690658332514611 }, { "epoch": 1.2843740838463793, "grad_norm": 0.6027721183581168, "learning_rate": 0.00029983430886990325, "loss": 3.302947521209717, "step": 2191, "token_acc": 0.2693317864681523 }, { "epoch": 1.2849604221635884, "grad_norm": 0.6557131651669685, "learning_rate": 0.00029983362502958375, "loss": 3.30894136428833, "step": 2192, "token_acc": 0.2691040586622573 }, { "epoch": 1.2855467604807975, "grad_norm": 0.7842394522723533, "learning_rate": 0.0002998329397817792, "loss": 3.342106580734253, "step": 2193, "token_acc": 0.26160489929919817 }, { "epoch": 1.2861330987980064, "grad_norm": 0.6925606265064673, "learning_rate": 0.0002998322531264961, "loss": 3.2745885848999023, "step": 2194, "token_acc": 0.2721911261384877 }, { "epoch": 1.2867194371152155, "grad_norm": 0.6734126567631493, "learning_rate": 0.0002998315650637409, "loss": 3.3048202991485596, "step": 2195, "token_acc": 0.2682049500296348 }, { "epoch": 1.2873057754324244, "grad_norm": 0.6690636141458522, "learning_rate": 0.00029983087559352, "loss": 3.375196933746338, "step": 2196, "token_acc": 0.2598232836366127 }, { "epoch": 1.2878921137496335, "grad_norm": 0.6019589446764615, "learning_rate": 0.00029983018471583996, "loss": 3.290217876434326, "step": 2197, "token_acc": 0.2707252430754147 }, { "epoch": 1.2884784520668426, "grad_norm": 0.49823082026053717, "learning_rate": 0.0002998294924307072, "loss": 3.3269500732421875, "step": 2198, "token_acc": 0.26613234798009616 }, { "epoch": 1.2890647903840515, "grad_norm": 0.5598927830961332, "learning_rate": 0.0002998287987381283, "loss": 3.3405699729919434, "step": 2199, "token_acc": 0.26544866571603176 }, { "epoch": 1.2896511287012606, "grad_norm": 0.6150140558428371, "learning_rate": 0.00029982810363810973, "loss": 3.242016315460205, "step": 2200, "token_acc": 0.2765774777684134 }, { "epoch": 1.2902374670184695, "grad_norm": 0.5821702231390028, "learning_rate": 0.00029982740713065803, "loss": 3.3304026126861572, "step": 2201, "token_acc": 0.26573595847740283 }, { "epoch": 1.2908238053356786, "grad_norm": 0.50217474958973, "learning_rate": 0.0002998267092157797, "loss": 3.270297050476074, "step": 2202, "token_acc": 0.2706662825294923 }, { "epoch": 1.2914101436528878, "grad_norm": 0.5219864888913429, "learning_rate": 0.0002998260098934813, "loss": 3.229401111602783, "step": 2203, "token_acc": 0.2787177716390424 }, { "epoch": 1.2919964819700969, "grad_norm": 0.47728938688848777, "learning_rate": 0.0002998253091637695, "loss": 3.327571153640747, "step": 2204, "token_acc": 0.2667041876046901 }, { "epoch": 1.2925828202873058, "grad_norm": 0.6053655829327786, "learning_rate": 0.00029982460702665075, "loss": 3.2703559398651123, "step": 2205, "token_acc": 0.2750776633646159 }, { "epoch": 1.2931691586045149, "grad_norm": 0.5543560292064499, "learning_rate": 0.0002998239034821318, "loss": 3.2333743572235107, "step": 2206, "token_acc": 0.2766598417471794 }, { "epoch": 1.2937554969217238, "grad_norm": 0.6200252388513652, "learning_rate": 0.00029982319853021907, "loss": 3.3022828102111816, "step": 2207, "token_acc": 0.26804412565243846 }, { "epoch": 1.2943418352389329, "grad_norm": 0.6137523939801273, "learning_rate": 0.00029982249217091935, "loss": 3.2986888885498047, "step": 2208, "token_acc": 0.2690404348293827 }, { "epoch": 1.294928173556142, "grad_norm": 0.5815537682012434, "learning_rate": 0.0002998217844042392, "loss": 3.3160715103149414, "step": 2209, "token_acc": 0.2676236498010233 }, { "epoch": 1.2955145118733509, "grad_norm": 0.5360001246244428, "learning_rate": 0.00029982107523018523, "loss": 3.304793357849121, "step": 2210, "token_acc": 0.2670159527002698 }, { "epoch": 1.29610085019056, "grad_norm": 0.5506234846767539, "learning_rate": 0.0002998203646487641, "loss": 3.326186418533325, "step": 2211, "token_acc": 0.26720169951611 }, { "epoch": 1.2966871885077689, "grad_norm": 0.6450482166492333, "learning_rate": 0.0002998196526599826, "loss": 3.287656784057617, "step": 2212, "token_acc": 0.27086998553684066 }, { "epoch": 1.297273526824978, "grad_norm": 0.5588678446519576, "learning_rate": 0.00029981893926384734, "loss": 3.283034086227417, "step": 2213, "token_acc": 0.27199671424807426 }, { "epoch": 1.297859865142187, "grad_norm": 0.506091078079632, "learning_rate": 0.00029981822446036504, "loss": 3.298727512359619, "step": 2214, "token_acc": 0.2684945198066332 }, { "epoch": 1.2984462034593962, "grad_norm": 0.5701354761565592, "learning_rate": 0.00029981750824954235, "loss": 3.315445899963379, "step": 2215, "token_acc": 0.2670569210002272 }, { "epoch": 1.299032541776605, "grad_norm": 0.565753216094878, "learning_rate": 0.0002998167906313861, "loss": 3.2935709953308105, "step": 2216, "token_acc": 0.27084292473319654 }, { "epoch": 1.2996188800938142, "grad_norm": 0.5214210663432226, "learning_rate": 0.00029981607160590296, "loss": 3.3164734840393066, "step": 2217, "token_acc": 0.2651342564656032 }, { "epoch": 1.300205218411023, "grad_norm": 0.6998020169228616, "learning_rate": 0.00029981535117309976, "loss": 3.308281898498535, "step": 2218, "token_acc": 0.2670380981598282 }, { "epoch": 1.3007915567282322, "grad_norm": 0.6891429930221858, "learning_rate": 0.00029981462933298316, "loss": 3.268507957458496, "step": 2219, "token_acc": 0.27166720579672643 }, { "epoch": 1.3013778950454413, "grad_norm": 0.6163740036075614, "learning_rate": 0.00029981390608556005, "loss": 3.270542621612549, "step": 2220, "token_acc": 0.27115693944444297 }, { "epoch": 1.3019642333626502, "grad_norm": 0.46880773007916293, "learning_rate": 0.0002998131814308371, "loss": 3.3270061016082764, "step": 2221, "token_acc": 0.26720261579919397 }, { "epoch": 1.3025505716798593, "grad_norm": 0.5666034478188257, "learning_rate": 0.0002998124553688212, "loss": 3.2751564979553223, "step": 2222, "token_acc": 0.27265387243028383 }, { "epoch": 1.3031369099970682, "grad_norm": 0.5425295643701308, "learning_rate": 0.0002998117278995192, "loss": 3.2680187225341797, "step": 2223, "token_acc": 0.27294542354507517 }, { "epoch": 1.3037232483142773, "grad_norm": 0.5345252467687768, "learning_rate": 0.00029981099902293785, "loss": 3.282696008682251, "step": 2224, "token_acc": 0.2712544818053268 }, { "epoch": 1.3043095866314864, "grad_norm": 0.5976044303423198, "learning_rate": 0.00029981026873908406, "loss": 3.2881011962890625, "step": 2225, "token_acc": 0.2695382115199115 }, { "epoch": 1.3048959249486953, "grad_norm": 0.6646242809482138, "learning_rate": 0.00029980953704796464, "loss": 3.3530287742614746, "step": 2226, "token_acc": 0.2618140612067457 }, { "epoch": 1.3054822632659044, "grad_norm": 0.7626656999327691, "learning_rate": 0.0002998088039495865, "loss": 3.312908172607422, "step": 2227, "token_acc": 0.26741101721053606 }, { "epoch": 1.3060686015831133, "grad_norm": 0.7075294236327936, "learning_rate": 0.0002998080694439566, "loss": 3.2757482528686523, "step": 2228, "token_acc": 0.27232796119856223 }, { "epoch": 1.3066549399003224, "grad_norm": 0.6467313184595685, "learning_rate": 0.00029980733353108163, "loss": 3.259946346282959, "step": 2229, "token_acc": 0.2749463767074785 }, { "epoch": 1.3072412782175316, "grad_norm": 0.5313339026228017, "learning_rate": 0.0002998065962109687, "loss": 3.274688482284546, "step": 2230, "token_acc": 0.2707252904584371 }, { "epoch": 1.3078276165347407, "grad_norm": 0.5687353383839434, "learning_rate": 0.0002998058574836246, "loss": 3.2825818061828613, "step": 2231, "token_acc": 0.272266373274864 }, { "epoch": 1.3084139548519496, "grad_norm": 0.6345212048483689, "learning_rate": 0.0002998051173490564, "loss": 3.3325624465942383, "step": 2232, "token_acc": 0.26486385848219296 }, { "epoch": 1.3090002931691587, "grad_norm": 0.5905731563190346, "learning_rate": 0.00029980437580727097, "loss": 3.298100233078003, "step": 2233, "token_acc": 0.2694881684002884 }, { "epoch": 1.3095866314863676, "grad_norm": 0.6631967215698978, "learning_rate": 0.00029980363285827524, "loss": 3.282688617706299, "step": 2234, "token_acc": 0.2703405792626444 }, { "epoch": 1.3101729698035767, "grad_norm": 0.6697413420367967, "learning_rate": 0.00029980288850207633, "loss": 3.307828426361084, "step": 2235, "token_acc": 0.26742929706570484 }, { "epoch": 1.3107593081207858, "grad_norm": 0.6421058698742232, "learning_rate": 0.0002998021427386811, "loss": 3.334409713745117, "step": 2236, "token_acc": 0.26487303204272716 }, { "epoch": 1.3113456464379947, "grad_norm": 0.6595973161924285, "learning_rate": 0.0002998013955680966, "loss": 3.331205129623413, "step": 2237, "token_acc": 0.2625318072819041 }, { "epoch": 1.3119319847552038, "grad_norm": 0.7495110331718255, "learning_rate": 0.00029980064699032993, "loss": 3.3111205101013184, "step": 2238, "token_acc": 0.26595421879158904 }, { "epoch": 1.3125183230724127, "grad_norm": 0.6875944127587434, "learning_rate": 0.00029979989700538794, "loss": 3.328988552093506, "step": 2239, "token_acc": 0.264067124763369 }, { "epoch": 1.3131046613896218, "grad_norm": 0.5537285182729828, "learning_rate": 0.0002997991456132778, "loss": 3.2696051597595215, "step": 2240, "token_acc": 0.2711926762707038 }, { "epoch": 1.313690999706831, "grad_norm": 0.5038321969219325, "learning_rate": 0.0002997983928140065, "loss": 3.243635654449463, "step": 2241, "token_acc": 0.27540068579160754 }, { "epoch": 1.31427733802404, "grad_norm": 0.560099986162438, "learning_rate": 0.00029979763860758123, "loss": 3.3241019248962402, "step": 2242, "token_acc": 0.2646147170818195 }, { "epoch": 1.314863676341249, "grad_norm": 0.4985661792619478, "learning_rate": 0.000299796882994009, "loss": 3.276165008544922, "step": 2243, "token_acc": 0.27285826459326656 }, { "epoch": 1.315450014658458, "grad_norm": 0.541273585665269, "learning_rate": 0.0002997961259732968, "loss": 3.294405698776245, "step": 2244, "token_acc": 0.2686876611005267 }, { "epoch": 1.316036352975667, "grad_norm": 0.5535245116614177, "learning_rate": 0.00029979536754545197, "loss": 3.253227949142456, "step": 2245, "token_acc": 0.2758418513501213 }, { "epoch": 1.316622691292876, "grad_norm": 0.6179274535339254, "learning_rate": 0.0002997946077104815, "loss": 3.2938966751098633, "step": 2246, "token_acc": 0.2703017518349082 }, { "epoch": 1.3172090296100851, "grad_norm": 0.5951189275555914, "learning_rate": 0.00029979384646839247, "loss": 3.2918429374694824, "step": 2247, "token_acc": 0.27033843980167194 }, { "epoch": 1.317795367927294, "grad_norm": 0.5024457090964609, "learning_rate": 0.00029979308381919217, "loss": 3.294224977493286, "step": 2248, "token_acc": 0.2694072544057162 }, { "epoch": 1.3183817062445031, "grad_norm": 0.5121211423651184, "learning_rate": 0.00029979231976288767, "loss": 3.2594432830810547, "step": 2249, "token_acc": 0.272581892813331 }, { "epoch": 1.318968044561712, "grad_norm": 0.6351375919838699, "learning_rate": 0.0002997915542994862, "loss": 3.2962005138397217, "step": 2250, "token_acc": 0.2674360636767293 }, { "epoch": 1.3195543828789211, "grad_norm": 0.6626472556212978, "learning_rate": 0.00029979078742899487, "loss": 3.227412223815918, "step": 2251, "token_acc": 0.2785516448274187 }, { "epoch": 1.3201407211961302, "grad_norm": 0.6226872992898699, "learning_rate": 0.000299790019151421, "loss": 3.343895435333252, "step": 2252, "token_acc": 0.26338035186394887 }, { "epoch": 1.3207270595133391, "grad_norm": 0.529315840672225, "learning_rate": 0.0002997892494667717, "loss": 3.3050756454467773, "step": 2253, "token_acc": 0.2683314780718646 }, { "epoch": 1.3213133978305482, "grad_norm": 0.4544086896748536, "learning_rate": 0.00029978847837505425, "loss": 3.3003289699554443, "step": 2254, "token_acc": 0.26957124667498705 }, { "epoch": 1.3218997361477571, "grad_norm": 0.5899682285454962, "learning_rate": 0.00029978770587627587, "loss": 3.3053488731384277, "step": 2255, "token_acc": 0.26736030324815374 }, { "epoch": 1.3224860744649662, "grad_norm": 0.6577105034602148, "learning_rate": 0.00029978693197044387, "loss": 3.3156371116638184, "step": 2256, "token_acc": 0.267959370174683 }, { "epoch": 1.3230724127821754, "grad_norm": 0.5921161976960245, "learning_rate": 0.00029978615665756547, "loss": 3.2383341789245605, "step": 2257, "token_acc": 0.2739855408900975 }, { "epoch": 1.3236587510993845, "grad_norm": 0.5663747775677623, "learning_rate": 0.00029978537993764797, "loss": 3.2610905170440674, "step": 2258, "token_acc": 0.27425446710973833 }, { "epoch": 1.3242450894165934, "grad_norm": 0.5432351136244487, "learning_rate": 0.0002997846018106987, "loss": 3.3185648918151855, "step": 2259, "token_acc": 0.2668741444768718 }, { "epoch": 1.3248314277338025, "grad_norm": 0.5823335899973447, "learning_rate": 0.00029978382227672487, "loss": 3.2780375480651855, "step": 2260, "token_acc": 0.2708189737822669 }, { "epoch": 1.3254177660510114, "grad_norm": 0.5675378446185119, "learning_rate": 0.0002997830413357339, "loss": 3.320552349090576, "step": 2261, "token_acc": 0.265323481784585 }, { "epoch": 1.3260041043682205, "grad_norm": 0.5865634430756527, "learning_rate": 0.00029978225898773307, "loss": 3.2714736461639404, "step": 2262, "token_acc": 0.27051810035013035 }, { "epoch": 1.3265904426854296, "grad_norm": 0.6176964931750497, "learning_rate": 0.0002997814752327298, "loss": 3.268202304840088, "step": 2263, "token_acc": 0.27317149897761234 }, { "epoch": 1.3271767810026385, "grad_norm": 0.5047083333210624, "learning_rate": 0.00029978069007073133, "loss": 3.2561774253845215, "step": 2264, "token_acc": 0.2755039722930905 }, { "epoch": 1.3277631193198476, "grad_norm": 0.588378636125102, "learning_rate": 0.00029977990350174517, "loss": 3.3012447357177734, "step": 2265, "token_acc": 0.26793030979583815 }, { "epoch": 1.3283494576370565, "grad_norm": 0.743236948460754, "learning_rate": 0.00029977911552577863, "loss": 3.2998924255371094, "step": 2266, "token_acc": 0.26817500695626867 }, { "epoch": 1.3289357959542656, "grad_norm": 0.6282502429321196, "learning_rate": 0.00029977832614283914, "loss": 3.278174877166748, "step": 2267, "token_acc": 0.2730944084780009 }, { "epoch": 1.3295221342714747, "grad_norm": 0.5959214202056641, "learning_rate": 0.0002997775353529341, "loss": 3.267796039581299, "step": 2268, "token_acc": 0.27000682980889423 }, { "epoch": 1.3301084725886836, "grad_norm": 0.6101010501919545, "learning_rate": 0.00029977674315607094, "loss": 3.326608180999756, "step": 2269, "token_acc": 0.26466105136576284 }, { "epoch": 1.3306948109058927, "grad_norm": 0.5333506217601538, "learning_rate": 0.0002997759495522571, "loss": 3.256826400756836, "step": 2270, "token_acc": 0.27342956359489995 }, { "epoch": 1.3312811492231018, "grad_norm": 0.5751998940429687, "learning_rate": 0.00029977515454150005, "loss": 3.280289649963379, "step": 2271, "token_acc": 0.26893647421556277 }, { "epoch": 1.3318674875403107, "grad_norm": 0.5674708798246865, "learning_rate": 0.0002997743581238072, "loss": 3.2676568031311035, "step": 2272, "token_acc": 0.2717521335569251 }, { "epoch": 1.3324538258575198, "grad_norm": 0.5209257182537923, "learning_rate": 0.00029977356029918615, "loss": 3.300467014312744, "step": 2273, "token_acc": 0.2677439458457302 }, { "epoch": 1.333040164174729, "grad_norm": 0.5370827990424959, "learning_rate": 0.00029977276106764425, "loss": 3.3089516162872314, "step": 2274, "token_acc": 0.2687172844075504 }, { "epoch": 1.3336265024919378, "grad_norm": 0.5117476927981967, "learning_rate": 0.00029977196042918914, "loss": 3.3296666145324707, "step": 2275, "token_acc": 0.2638010496518057 }, { "epoch": 1.334212840809147, "grad_norm": 0.5240396460556052, "learning_rate": 0.00029977115838382824, "loss": 3.3252065181732178, "step": 2276, "token_acc": 0.26566904884115766 }, { "epoch": 1.3347991791263558, "grad_norm": 0.5433609453309353, "learning_rate": 0.00029977035493156915, "loss": 3.2982425689697266, "step": 2277, "token_acc": 0.2677894071368362 }, { "epoch": 1.335385517443565, "grad_norm": 0.5132947145326135, "learning_rate": 0.0002997695500724194, "loss": 3.21835994720459, "step": 2278, "token_acc": 0.27913682908391174 }, { "epoch": 1.335971855760774, "grad_norm": 0.5682318840731981, "learning_rate": 0.00029976874380638655, "loss": 3.3052456378936768, "step": 2279, "token_acc": 0.26642364995259366 }, { "epoch": 1.336558194077983, "grad_norm": 0.5318389133635304, "learning_rate": 0.0002997679361334781, "loss": 3.2847137451171875, "step": 2280, "token_acc": 0.2714304702661031 }, { "epoch": 1.337144532395192, "grad_norm": 0.6270022632832412, "learning_rate": 0.0002997671270537018, "loss": 3.3030083179473877, "step": 2281, "token_acc": 0.2691590412104362 }, { "epoch": 1.337730870712401, "grad_norm": 0.70998324144504, "learning_rate": 0.0002997663165670651, "loss": 3.225252628326416, "step": 2282, "token_acc": 0.2787204019371605 }, { "epoch": 1.33831720902961, "grad_norm": 0.8485721915014303, "learning_rate": 0.0002997655046735757, "loss": 3.262086868286133, "step": 2283, "token_acc": 0.27217654694937254 }, { "epoch": 1.3389035473468192, "grad_norm": 0.8247834873770069, "learning_rate": 0.00029976469137324115, "loss": 3.30789852142334, "step": 2284, "token_acc": 0.2660406803940995 }, { "epoch": 1.3394898856640283, "grad_norm": 0.5625533896064778, "learning_rate": 0.0002997638766660692, "loss": 3.29653263092041, "step": 2285, "token_acc": 0.26793905552334746 }, { "epoch": 1.3400762239812372, "grad_norm": 0.5372431669185745, "learning_rate": 0.00029976306055206736, "loss": 3.3913815021514893, "step": 2286, "token_acc": 0.25672483877416535 }, { "epoch": 1.3406625622984463, "grad_norm": 0.6821846701498606, "learning_rate": 0.0002997622430312434, "loss": 3.2478461265563965, "step": 2287, "token_acc": 0.27491543099404936 }, { "epoch": 1.3412489006156552, "grad_norm": 0.6297146434549384, "learning_rate": 0.00029976142410360505, "loss": 3.3208537101745605, "step": 2288, "token_acc": 0.2639167947051176 }, { "epoch": 1.3418352389328643, "grad_norm": 0.5580286324362485, "learning_rate": 0.0002997606037691599, "loss": 3.3070850372314453, "step": 2289, "token_acc": 0.2686028368980932 }, { "epoch": 1.3424215772500734, "grad_norm": 0.5895264944518573, "learning_rate": 0.0002997597820279156, "loss": 3.278052806854248, "step": 2290, "token_acc": 0.2703500987619902 }, { "epoch": 1.3430079155672823, "grad_norm": 0.5111750332848601, "learning_rate": 0.00029975895887987997, "loss": 3.281803607940674, "step": 2291, "token_acc": 0.2708238112499803 }, { "epoch": 1.3435942538844914, "grad_norm": 0.6031842677870055, "learning_rate": 0.0002997581343250608, "loss": 3.2881124019622803, "step": 2292, "token_acc": 0.268021370745733 }, { "epoch": 1.3441805922017003, "grad_norm": 0.6392491756576695, "learning_rate": 0.00029975730836346567, "loss": 3.310147762298584, "step": 2293, "token_acc": 0.26805406132329207 }, { "epoch": 1.3447669305189094, "grad_norm": 0.5598359262732455, "learning_rate": 0.0002997564809951025, "loss": 3.336435317993164, "step": 2294, "token_acc": 0.26340598981112007 }, { "epoch": 1.3453532688361185, "grad_norm": 0.5837609017456445, "learning_rate": 0.00029975565221997894, "loss": 3.22463321685791, "step": 2295, "token_acc": 0.2779614562411489 }, { "epoch": 1.3459396071533274, "grad_norm": 0.6692356965286059, "learning_rate": 0.0002997548220381029, "loss": 3.294100761413574, "step": 2296, "token_acc": 0.2688499578845879 }, { "epoch": 1.3465259454705365, "grad_norm": 0.5471252942064326, "learning_rate": 0.00029975399044948197, "loss": 3.299065589904785, "step": 2297, "token_acc": 0.26853766617429836 }, { "epoch": 1.3471122837877456, "grad_norm": 0.5528774624095585, "learning_rate": 0.0002997531574541242, "loss": 3.2783710956573486, "step": 2298, "token_acc": 0.27027325073058533 }, { "epoch": 1.3476986221049545, "grad_norm": 0.5230929152248286, "learning_rate": 0.0002997523230520373, "loss": 3.3362293243408203, "step": 2299, "token_acc": 0.2632044637859116 }, { "epoch": 1.3482849604221636, "grad_norm": 0.515685632881243, "learning_rate": 0.0002997514872432291, "loss": 3.2717537879943848, "step": 2300, "token_acc": 0.27259666650907965 }, { "epoch": 1.3488712987393727, "grad_norm": 0.5154966451679686, "learning_rate": 0.0002997506500277075, "loss": 3.2458300590515137, "step": 2301, "token_acc": 0.2752459647033405 }, { "epoch": 1.3494576370565816, "grad_norm": 0.50216030163088, "learning_rate": 0.0002997498114054803, "loss": 3.2456283569335938, "step": 2302, "token_acc": 0.27291917246404374 }, { "epoch": 1.3500439753737907, "grad_norm": 0.5399919049371538, "learning_rate": 0.00029974897137655544, "loss": 3.27744460105896, "step": 2303, "token_acc": 0.2716898248868578 }, { "epoch": 1.3506303136909996, "grad_norm": 0.5884172704660784, "learning_rate": 0.00029974812994094073, "loss": 3.263371229171753, "step": 2304, "token_acc": 0.27181613916596753 }, { "epoch": 1.3512166520082087, "grad_norm": 0.5957021930861959, "learning_rate": 0.0002997472870986442, "loss": 3.3235244750976562, "step": 2305, "token_acc": 0.2657339947294993 }, { "epoch": 1.3518029903254178, "grad_norm": 0.5954054239633952, "learning_rate": 0.00029974644284967364, "loss": 3.2575554847717285, "step": 2306, "token_acc": 0.2736577356130108 }, { "epoch": 1.3523893286426267, "grad_norm": 0.5734337005440051, "learning_rate": 0.00029974559719403703, "loss": 3.283459186553955, "step": 2307, "token_acc": 0.2715370737687761 }, { "epoch": 1.3529756669598358, "grad_norm": 0.4995061504747152, "learning_rate": 0.0002997447501317424, "loss": 3.3653249740600586, "step": 2308, "token_acc": 0.26058986776084037 }, { "epoch": 1.3535620052770447, "grad_norm": 0.46836013653694625, "learning_rate": 0.00029974390166279753, "loss": 3.3367135524749756, "step": 2309, "token_acc": 0.26363310947625995 }, { "epoch": 1.3541483435942538, "grad_norm": 0.5519592326440945, "learning_rate": 0.0002997430517872105, "loss": 3.263765335083008, "step": 2310, "token_acc": 0.27324423567623685 }, { "epoch": 1.354734681911463, "grad_norm": 0.4686918427230092, "learning_rate": 0.0002997422005049894, "loss": 3.27644681930542, "step": 2311, "token_acc": 0.2715050032342289 }, { "epoch": 1.355321020228672, "grad_norm": 0.5265614040160932, "learning_rate": 0.00029974134781614195, "loss": 3.2583961486816406, "step": 2312, "token_acc": 0.27383285759188797 }, { "epoch": 1.355907358545881, "grad_norm": 0.5415736335051998, "learning_rate": 0.0002997404937206764, "loss": 3.265746593475342, "step": 2313, "token_acc": 0.26987788838870413 }, { "epoch": 1.35649369686309, "grad_norm": 0.5451073129843799, "learning_rate": 0.0002997396382186006, "loss": 3.313465118408203, "step": 2314, "token_acc": 0.2647275781957552 }, { "epoch": 1.357080035180299, "grad_norm": 0.5048112950713483, "learning_rate": 0.00029973878130992276, "loss": 3.254274368286133, "step": 2315, "token_acc": 0.2741221464856752 }, { "epoch": 1.357666373497508, "grad_norm": 0.5186716972014628, "learning_rate": 0.0002997379229946509, "loss": 3.2755894660949707, "step": 2316, "token_acc": 0.2700103412616339 }, { "epoch": 1.3582527118147172, "grad_norm": 0.6155813111810708, "learning_rate": 0.00029973706327279294, "loss": 3.264812469482422, "step": 2317, "token_acc": 0.2711816984424038 }, { "epoch": 1.358839050131926, "grad_norm": 0.7629225360859194, "learning_rate": 0.000299736202144357, "loss": 3.282783031463623, "step": 2318, "token_acc": 0.2679287160063328 }, { "epoch": 1.3594253884491352, "grad_norm": 0.8317424796066938, "learning_rate": 0.0002997353396093513, "loss": 3.2683820724487305, "step": 2319, "token_acc": 0.2729518172617446 }, { "epoch": 1.360011726766344, "grad_norm": 0.7492516148193727, "learning_rate": 0.0002997344756677838, "loss": 3.2349185943603516, "step": 2320, "token_acc": 0.2771028025181669 }, { "epoch": 1.3605980650835532, "grad_norm": 0.4687240034529518, "learning_rate": 0.00029973361031966275, "loss": 3.3158764839172363, "step": 2321, "token_acc": 0.26525094437277424 }, { "epoch": 1.3611844034007623, "grad_norm": 0.5595775221457051, "learning_rate": 0.0002997327435649961, "loss": 3.285459041595459, "step": 2322, "token_acc": 0.27008047433073845 }, { "epoch": 1.3617707417179712, "grad_norm": 0.714260067304, "learning_rate": 0.0002997318754037922, "loss": 3.298811912536621, "step": 2323, "token_acc": 0.2685461804958983 }, { "epoch": 1.3623570800351803, "grad_norm": 0.647391508108505, "learning_rate": 0.000299731005836059, "loss": 3.2870395183563232, "step": 2324, "token_acc": 0.27038044195686034 }, { "epoch": 1.3629434183523892, "grad_norm": 0.6276206671948867, "learning_rate": 0.0002997301348618048, "loss": 3.3104248046875, "step": 2325, "token_acc": 0.26754086657077764 }, { "epoch": 1.3635297566695983, "grad_norm": 0.5811453966151366, "learning_rate": 0.00029972926248103776, "loss": 3.317330837249756, "step": 2326, "token_acc": 0.26639975988564046 }, { "epoch": 1.3641160949868074, "grad_norm": 0.5399857548100404, "learning_rate": 0.000299728388693766, "loss": 3.2647323608398438, "step": 2327, "token_acc": 0.2717178676930631 }, { "epoch": 1.3647024333040165, "grad_norm": 0.5896669596479472, "learning_rate": 0.0002997275134999979, "loss": 3.323700428009033, "step": 2328, "token_acc": 0.2641683661758537 }, { "epoch": 1.3652887716212254, "grad_norm": 0.4968272930499289, "learning_rate": 0.0002997266368997415, "loss": 3.252244234085083, "step": 2329, "token_acc": 0.27375088689857996 }, { "epoch": 1.3658751099384345, "grad_norm": 0.5661909633476219, "learning_rate": 0.0002997257588930051, "loss": 3.303410768508911, "step": 2330, "token_acc": 0.26806372108880716 }, { "epoch": 1.3664614482556434, "grad_norm": 0.4889002229709549, "learning_rate": 0.000299724879479797, "loss": 3.2588319778442383, "step": 2331, "token_acc": 0.27261013297282455 }, { "epoch": 1.3670477865728525, "grad_norm": 0.523452646609507, "learning_rate": 0.00029972399866012536, "loss": 3.3019442558288574, "step": 2332, "token_acc": 0.26883919464917316 }, { "epoch": 1.3676341248900616, "grad_norm": 0.5991602508845554, "learning_rate": 0.0002997231164339985, "loss": 3.303267002105713, "step": 2333, "token_acc": 0.2693654809997365 }, { "epoch": 1.3682204632072705, "grad_norm": 0.47922301702977266, "learning_rate": 0.00029972223280142477, "loss": 3.276179790496826, "step": 2334, "token_acc": 0.270415965323388 }, { "epoch": 1.3688068015244796, "grad_norm": 0.5374123016409618, "learning_rate": 0.0002997213477624124, "loss": 3.2509799003601074, "step": 2335, "token_acc": 0.2733777940268025 }, { "epoch": 1.3693931398416885, "grad_norm": 0.5131823712888214, "learning_rate": 0.0002997204613169697, "loss": 3.2458348274230957, "step": 2336, "token_acc": 0.27555502597071996 }, { "epoch": 1.3699794781588976, "grad_norm": 0.4720311535025932, "learning_rate": 0.00029971957346510504, "loss": 3.2800302505493164, "step": 2337, "token_acc": 0.2712871592371743 }, { "epoch": 1.3705658164761068, "grad_norm": 0.5885560370358275, "learning_rate": 0.0002997186842068267, "loss": 3.286646842956543, "step": 2338, "token_acc": 0.26988527067000606 }, { "epoch": 1.3711521547933159, "grad_norm": 0.5253751759878109, "learning_rate": 0.0002997177935421431, "loss": 3.2615408897399902, "step": 2339, "token_acc": 0.2730822049669512 }, { "epoch": 1.3717384931105248, "grad_norm": 0.5917183244379426, "learning_rate": 0.0002997169014710626, "loss": 3.2781829833984375, "step": 2340, "token_acc": 0.2718929845048517 }, { "epoch": 1.3723248314277339, "grad_norm": 0.55712570027739, "learning_rate": 0.0002997160079935936, "loss": 3.3119707107543945, "step": 2341, "token_acc": 0.26857088250284233 }, { "epoch": 1.3729111697449428, "grad_norm": 0.5413481180585094, "learning_rate": 0.0002997151131097443, "loss": 3.3005857467651367, "step": 2342, "token_acc": 0.27016522415079375 }, { "epoch": 1.3734975080621519, "grad_norm": 0.6250074378623586, "learning_rate": 0.00029971421681952335, "loss": 3.269123077392578, "step": 2343, "token_acc": 0.2720781157235526 }, { "epoch": 1.374083846379361, "grad_norm": 0.5787286755194841, "learning_rate": 0.0002997133191229391, "loss": 3.2638661861419678, "step": 2344, "token_acc": 0.2729091496580322 }, { "epoch": 1.3746701846965699, "grad_norm": 0.5382273479345289, "learning_rate": 0.0002997124200199999, "loss": 3.3391809463500977, "step": 2345, "token_acc": 0.2620415033259662 }, { "epoch": 1.375256523013779, "grad_norm": 0.5154236967742366, "learning_rate": 0.00029971151951071426, "loss": 3.2429590225219727, "step": 2346, "token_acc": 0.2757056478511695 }, { "epoch": 1.3758428613309879, "grad_norm": 0.560470275544452, "learning_rate": 0.0002997106175950907, "loss": 3.2860841751098633, "step": 2347, "token_acc": 0.2716123313618209 }, { "epoch": 1.376429199648197, "grad_norm": 0.6224421636749669, "learning_rate": 0.00029970971427313754, "loss": 3.2779831886291504, "step": 2348, "token_acc": 0.26955697385638366 }, { "epoch": 1.377015537965406, "grad_norm": 0.5577765452220557, "learning_rate": 0.00029970880954486337, "loss": 3.2940945625305176, "step": 2349, "token_acc": 0.2670984949514193 }, { "epoch": 1.377601876282615, "grad_norm": 0.46331051423448555, "learning_rate": 0.00029970790341027673, "loss": 3.2180240154266357, "step": 2350, "token_acc": 0.27915366951166093 }, { "epoch": 1.378188214599824, "grad_norm": 0.47276994060129046, "learning_rate": 0.000299706995869386, "loss": 3.3227555751800537, "step": 2351, "token_acc": 0.2656742777847862 }, { "epoch": 1.378774552917033, "grad_norm": 0.42270136977989675, "learning_rate": 0.0002997060869221998, "loss": 3.291868209838867, "step": 2352, "token_acc": 0.26848096194483795 }, { "epoch": 1.379360891234242, "grad_norm": 0.5199632673138261, "learning_rate": 0.0002997051765687266, "loss": 3.3101983070373535, "step": 2353, "token_acc": 0.26690153528760036 }, { "epoch": 1.3799472295514512, "grad_norm": 0.5586597279144494, "learning_rate": 0.00029970426480897507, "loss": 3.2956953048706055, "step": 2354, "token_acc": 0.2680487070170344 }, { "epoch": 1.3805335678686603, "grad_norm": 0.5608563153143209, "learning_rate": 0.00029970335164295365, "loss": 3.2905173301696777, "step": 2355, "token_acc": 0.27045489587334937 }, { "epoch": 1.3811199061858692, "grad_norm": 0.5536133785227201, "learning_rate": 0.000299702437070671, "loss": 3.300647735595703, "step": 2356, "token_acc": 0.26856061683049826 }, { "epoch": 1.3817062445030783, "grad_norm": 0.6649924455336458, "learning_rate": 0.0002997015210921357, "loss": 3.2790932655334473, "step": 2357, "token_acc": 0.27087235785508185 }, { "epoch": 1.3822925828202872, "grad_norm": 0.5964107461280297, "learning_rate": 0.0002997006037073563, "loss": 3.231511116027832, "step": 2358, "token_acc": 0.2764793733863132 }, { "epoch": 1.3828789211374963, "grad_norm": 0.6224699335368878, "learning_rate": 0.0002996996849163414, "loss": 3.2941975593566895, "step": 2359, "token_acc": 0.26802402326596514 }, { "epoch": 1.3834652594547054, "grad_norm": 0.5651333259570841, "learning_rate": 0.0002996987647190998, "loss": 3.2717056274414062, "step": 2360, "token_acc": 0.26955286794936356 }, { "epoch": 1.3840515977719143, "grad_norm": 0.5860490237091708, "learning_rate": 0.00029969784311563994, "loss": 3.322009563446045, "step": 2361, "token_acc": 0.2652774888238249 }, { "epoch": 1.3846379360891234, "grad_norm": 0.5573661304462181, "learning_rate": 0.0002996969201059706, "loss": 3.2978432178497314, "step": 2362, "token_acc": 0.267980186658104 }, { "epoch": 1.3852242744063323, "grad_norm": 0.5667250924505168, "learning_rate": 0.0002996959956901004, "loss": 3.2606427669525146, "step": 2363, "token_acc": 0.27151666839368604 }, { "epoch": 1.3858106127235414, "grad_norm": 0.44134976736199516, "learning_rate": 0.00029969506986803805, "loss": 3.294374465942383, "step": 2364, "token_acc": 0.2676413468497316 }, { "epoch": 1.3863969510407506, "grad_norm": 0.5073717246582231, "learning_rate": 0.00029969414263979226, "loss": 3.272484302520752, "step": 2365, "token_acc": 0.27024941205917474 }, { "epoch": 1.3869832893579597, "grad_norm": 0.4450021797425381, "learning_rate": 0.0002996932140053717, "loss": 3.306857109069824, "step": 2366, "token_acc": 0.2669401195573612 }, { "epoch": 1.3875696276751686, "grad_norm": 0.429587450942379, "learning_rate": 0.00029969228396478507, "loss": 3.266036033630371, "step": 2367, "token_acc": 0.27178082770047435 }, { "epoch": 1.3881559659923777, "grad_norm": 0.5459278389143569, "learning_rate": 0.00029969135251804117, "loss": 3.3118112087249756, "step": 2368, "token_acc": 0.2668911205989404 }, { "epoch": 1.3887423043095866, "grad_norm": 0.5746779701717927, "learning_rate": 0.00029969041966514874, "loss": 3.2996721267700195, "step": 2369, "token_acc": 0.2670568911294909 }, { "epoch": 1.3893286426267957, "grad_norm": 0.5943429117838637, "learning_rate": 0.0002996894854061165, "loss": 3.2907819747924805, "step": 2370, "token_acc": 0.2694884014850312 }, { "epoch": 1.3899149809440048, "grad_norm": 0.6759504312438539, "learning_rate": 0.0002996885497409533, "loss": 3.2052154541015625, "step": 2371, "token_acc": 0.2799557856083761 }, { "epoch": 1.3905013192612137, "grad_norm": 0.6419167773567508, "learning_rate": 0.0002996876126696678, "loss": 3.2945470809936523, "step": 2372, "token_acc": 0.26735053694502325 }, { "epoch": 1.3910876575784228, "grad_norm": 0.6068298106982998, "learning_rate": 0.000299686674192269, "loss": 3.2378623485565186, "step": 2373, "token_acc": 0.27459639395454055 }, { "epoch": 1.3916739958956317, "grad_norm": 0.48792939042572236, "learning_rate": 0.0002996857343087655, "loss": 3.264981746673584, "step": 2374, "token_acc": 0.26990316896938676 }, { "epoch": 1.3922603342128408, "grad_norm": 0.5495416257441966, "learning_rate": 0.00029968479301916627, "loss": 3.294529914855957, "step": 2375, "token_acc": 0.26965454461963173 }, { "epoch": 1.39284667253005, "grad_norm": 0.6417954643977561, "learning_rate": 0.0002996838503234801, "loss": 3.2646875381469727, "step": 2376, "token_acc": 0.2729160587875012 }, { "epoch": 1.3934330108472588, "grad_norm": 0.586601301747982, "learning_rate": 0.0002996829062217159, "loss": 3.3135623931884766, "step": 2377, "token_acc": 0.2653031278156777 }, { "epoch": 1.394019349164468, "grad_norm": 0.6027622068121812, "learning_rate": 0.00029968196071388246, "loss": 3.2888572216033936, "step": 2378, "token_acc": 0.2704266764329258 }, { "epoch": 1.3946056874816768, "grad_norm": 0.5032874618310406, "learning_rate": 0.0002996810137999887, "loss": 3.2596287727355957, "step": 2379, "token_acc": 0.2735786967766483 }, { "epoch": 1.395192025798886, "grad_norm": 0.4906476191151634, "learning_rate": 0.0002996800654800435, "loss": 3.323955535888672, "step": 2380, "token_acc": 0.2652195641769417 }, { "epoch": 1.395778364116095, "grad_norm": 0.5560025432747817, "learning_rate": 0.0002996791157540558, "loss": 3.3399150371551514, "step": 2381, "token_acc": 0.26121292736913354 }, { "epoch": 1.3963647024333041, "grad_norm": 0.49341289746083594, "learning_rate": 0.0002996781646220345, "loss": 3.26633882522583, "step": 2382, "token_acc": 0.2722567751767122 }, { "epoch": 1.396951040750513, "grad_norm": 0.5448553545367426, "learning_rate": 0.00029967721208398854, "loss": 3.292320728302002, "step": 2383, "token_acc": 0.2673366496890594 }, { "epoch": 1.3975373790677221, "grad_norm": 0.5225034765496931, "learning_rate": 0.00029967625813992683, "loss": 3.3125152587890625, "step": 2384, "token_acc": 0.26563303134382255 }, { "epoch": 1.398123717384931, "grad_norm": 0.5500913852697767, "learning_rate": 0.0002996753027898584, "loss": 3.2874464988708496, "step": 2385, "token_acc": 0.27049894403379093 }, { "epoch": 1.3987100557021401, "grad_norm": 0.4645968084795441, "learning_rate": 0.0002996743460337922, "loss": 3.300896644592285, "step": 2386, "token_acc": 0.2677324724901682 }, { "epoch": 1.3992963940193492, "grad_norm": 0.49463476764608016, "learning_rate": 0.0002996733878717372, "loss": 3.307324171066284, "step": 2387, "token_acc": 0.26644562201364336 }, { "epoch": 1.3998827323365581, "grad_norm": 0.5539459308849792, "learning_rate": 0.0002996724283037024, "loss": 3.290579319000244, "step": 2388, "token_acc": 0.27121335645449546 }, { "epoch": 1.4004690706537672, "grad_norm": 0.548340525269374, "learning_rate": 0.0002996714673296968, "loss": 3.2658143043518066, "step": 2389, "token_acc": 0.2707590904790438 }, { "epoch": 1.4010554089709761, "grad_norm": 0.5190091175210023, "learning_rate": 0.0002996705049497295, "loss": 3.2678635120391846, "step": 2390, "token_acc": 0.2721067853626518 }, { "epoch": 1.4016417472881852, "grad_norm": 0.6100558408639117, "learning_rate": 0.0002996695411638095, "loss": 3.272930145263672, "step": 2391, "token_acc": 0.2702527083410063 }, { "epoch": 1.4022280856053944, "grad_norm": 0.6171545359403329, "learning_rate": 0.00029966857597194576, "loss": 3.299881935119629, "step": 2392, "token_acc": 0.2691575993616335 }, { "epoch": 1.4028144239226035, "grad_norm": 0.5992004860102884, "learning_rate": 0.0002996676093741475, "loss": 3.26491117477417, "step": 2393, "token_acc": 0.271936449112317 }, { "epoch": 1.4034007622398124, "grad_norm": 0.6052576275531372, "learning_rate": 0.0002996666413704237, "loss": 3.2830424308776855, "step": 2394, "token_acc": 0.27201498142862324 }, { "epoch": 1.4039871005570215, "grad_norm": 0.5422850506693514, "learning_rate": 0.00029966567196078347, "loss": 3.2469875812530518, "step": 2395, "token_acc": 0.27374289101304 }, { "epoch": 1.4045734388742304, "grad_norm": 0.46615885839772603, "learning_rate": 0.000299664701145236, "loss": 3.272097110748291, "step": 2396, "token_acc": 0.2695859074306419 }, { "epoch": 1.4051597771914395, "grad_norm": 0.5547669742528543, "learning_rate": 0.00029966372892379023, "loss": 3.2573513984680176, "step": 2397, "token_acc": 0.2730984196410369 }, { "epoch": 1.4057461155086486, "grad_norm": 0.5888378255725208, "learning_rate": 0.00029966275529645544, "loss": 3.2564949989318848, "step": 2398, "token_acc": 0.2733417284964659 }, { "epoch": 1.4063324538258575, "grad_norm": 0.6999742138984829, "learning_rate": 0.0002996617802632408, "loss": 3.3245022296905518, "step": 2399, "token_acc": 0.26458784607706437 }, { "epoch": 1.4069187921430666, "grad_norm": 0.6187743950601527, "learning_rate": 0.00029966080382415534, "loss": 3.289876937866211, "step": 2400, "token_acc": 0.2689946765163462 }, { "epoch": 1.4075051304602755, "grad_norm": 0.5574328057106079, "learning_rate": 0.00029965982597920834, "loss": 3.3295211791992188, "step": 2401, "token_acc": 0.26311146040639377 }, { "epoch": 1.4080914687774846, "grad_norm": 0.5444022659333221, "learning_rate": 0.0002996588467284089, "loss": 3.319186210632324, "step": 2402, "token_acc": 0.26461354050163755 }, { "epoch": 1.4086778070946937, "grad_norm": 0.5520874374873401, "learning_rate": 0.00029965786607176627, "loss": 3.270364761352539, "step": 2403, "token_acc": 0.27180070329508715 }, { "epoch": 1.4092641454119026, "grad_norm": 0.6257688853954135, "learning_rate": 0.0002996568840092897, "loss": 3.250545024871826, "step": 2404, "token_acc": 0.27658181478075167 }, { "epoch": 1.4098504837291117, "grad_norm": 0.6514541129360853, "learning_rate": 0.00029965590054098837, "loss": 3.2695682048797607, "step": 2405, "token_acc": 0.2693352791587935 }, { "epoch": 1.4104368220463206, "grad_norm": 0.5837524493790546, "learning_rate": 0.0002996549156668715, "loss": 3.2856874465942383, "step": 2406, "token_acc": 0.269773548605153 }, { "epoch": 1.4110231603635297, "grad_norm": 0.637914880472307, "learning_rate": 0.0002996539293869483, "loss": 3.3023757934570312, "step": 2407, "token_acc": 0.2672429490814686 }, { "epoch": 1.4116094986807388, "grad_norm": 0.6191978222907231, "learning_rate": 0.00029965294170122814, "loss": 3.2671241760253906, "step": 2408, "token_acc": 0.27227085209306223 }, { "epoch": 1.412195836997948, "grad_norm": 0.6222856911579304, "learning_rate": 0.0002996519526097203, "loss": 3.317592144012451, "step": 2409, "token_acc": 0.2658801899016178 }, { "epoch": 1.4127821753151568, "grad_norm": 0.6804586920788489, "learning_rate": 0.00029965096211243393, "loss": 3.284654140472412, "step": 2410, "token_acc": 0.2704206942915532 }, { "epoch": 1.413368513632366, "grad_norm": 0.6742883424313039, "learning_rate": 0.0002996499702093785, "loss": 3.230648994445801, "step": 2411, "token_acc": 0.275978607073568 }, { "epoch": 1.4139548519495748, "grad_norm": 0.5872339128456647, "learning_rate": 0.0002996489769005632, "loss": 3.290733575820923, "step": 2412, "token_acc": 0.26818145768895235 }, { "epoch": 1.414541190266784, "grad_norm": 0.48972058786107486, "learning_rate": 0.0002996479821859975, "loss": 3.28164005279541, "step": 2413, "token_acc": 0.2707975110690797 }, { "epoch": 1.415127528583993, "grad_norm": 0.5794363584208544, "learning_rate": 0.0002996469860656906, "loss": 3.244933605194092, "step": 2414, "token_acc": 0.27554281094849875 }, { "epoch": 1.415713866901202, "grad_norm": 0.6710234464995446, "learning_rate": 0.0002996459885396519, "loss": 3.302269220352173, "step": 2415, "token_acc": 0.26643798839579413 }, { "epoch": 1.416300205218411, "grad_norm": 0.5854137865708379, "learning_rate": 0.00029964498960789087, "loss": 3.2613236904144287, "step": 2416, "token_acc": 0.2721644409823628 }, { "epoch": 1.41688654353562, "grad_norm": 0.5571072159267442, "learning_rate": 0.00029964398927041677, "loss": 3.266636848449707, "step": 2417, "token_acc": 0.27184649630578067 }, { "epoch": 1.417472881852829, "grad_norm": 0.5983563131923706, "learning_rate": 0.000299642987527239, "loss": 3.273965835571289, "step": 2418, "token_acc": 0.2718297727084839 }, { "epoch": 1.4180592201700382, "grad_norm": 0.5485429302425923, "learning_rate": 0.000299641984378367, "loss": 3.3078503608703613, "step": 2419, "token_acc": 0.26543168160362646 }, { "epoch": 1.4186455584872473, "grad_norm": 0.43728030135305784, "learning_rate": 0.00029964097982381025, "loss": 3.2973577976226807, "step": 2420, "token_acc": 0.2682629762202354 }, { "epoch": 1.4192318968044562, "grad_norm": 0.4855300240859427, "learning_rate": 0.00029963997386357814, "loss": 3.2981672286987305, "step": 2421, "token_acc": 0.26688090869201003 }, { "epoch": 1.4198182351216653, "grad_norm": 0.5246237559632434, "learning_rate": 0.0002996389664976801, "loss": 3.255279541015625, "step": 2422, "token_acc": 0.2724458204334365 }, { "epoch": 1.4204045734388742, "grad_norm": 0.488053208292944, "learning_rate": 0.0002996379577261256, "loss": 3.2573599815368652, "step": 2423, "token_acc": 0.272176678993509 }, { "epoch": 1.4209909117560833, "grad_norm": 0.4910361792162045, "learning_rate": 0.0002996369475489242, "loss": 3.227957248687744, "step": 2424, "token_acc": 0.2761903787417567 }, { "epoch": 1.4215772500732924, "grad_norm": 0.5108161096315005, "learning_rate": 0.0002996359359660852, "loss": 3.2387242317199707, "step": 2425, "token_acc": 0.2769740314578275 }, { "epoch": 1.4221635883905013, "grad_norm": 0.531490502074727, "learning_rate": 0.0002996349229776183, "loss": 3.2796196937561035, "step": 2426, "token_acc": 0.2711439280782347 }, { "epoch": 1.4227499267077104, "grad_norm": 0.5284219595620157, "learning_rate": 0.0002996339085835329, "loss": 3.295367956161499, "step": 2427, "token_acc": 0.2688898236226973 }, { "epoch": 1.4233362650249193, "grad_norm": 0.5785852547202086, "learning_rate": 0.0002996328927838386, "loss": 3.335418701171875, "step": 2428, "token_acc": 0.2637451063486298 }, { "epoch": 1.4239226033421284, "grad_norm": 0.6141501793925301, "learning_rate": 0.00029963187557854485, "loss": 3.254877805709839, "step": 2429, "token_acc": 0.2738140096932212 }, { "epoch": 1.4245089416593375, "grad_norm": 0.5626640791176556, "learning_rate": 0.00029963085696766133, "loss": 3.276174545288086, "step": 2430, "token_acc": 0.26854819357419013 }, { "epoch": 1.4250952799765464, "grad_norm": 0.47233076945503105, "learning_rate": 0.00029962983695119746, "loss": 3.2461767196655273, "step": 2431, "token_acc": 0.27423772091689236 }, { "epoch": 1.4256816182937555, "grad_norm": 0.5179452046766786, "learning_rate": 0.00029962881552916294, "loss": 3.239842414855957, "step": 2432, "token_acc": 0.2751083173520847 }, { "epoch": 1.4262679566109644, "grad_norm": 0.5498096003757862, "learning_rate": 0.0002996277927015673, "loss": 3.311431646347046, "step": 2433, "token_acc": 0.26475392461252634 }, { "epoch": 1.4268542949281735, "grad_norm": 0.5162105974718673, "learning_rate": 0.00029962676846842024, "loss": 3.2687466144561768, "step": 2434, "token_acc": 0.27074020709256474 }, { "epoch": 1.4274406332453826, "grad_norm": 0.5041546692426218, "learning_rate": 0.00029962574282973124, "loss": 3.2616050243377686, "step": 2435, "token_acc": 0.2734515198591612 }, { "epoch": 1.4280269715625917, "grad_norm": 0.5773355803832131, "learning_rate": 0.0002996247157855101, "loss": 3.2735462188720703, "step": 2436, "token_acc": 0.2715997471842514 }, { "epoch": 1.4286133098798006, "grad_norm": 0.5803850793501364, "learning_rate": 0.00029962368733576627, "loss": 3.261991262435913, "step": 2437, "token_acc": 0.2722853880144978 }, { "epoch": 1.4291996481970097, "grad_norm": 0.5861804962959183, "learning_rate": 0.0002996226574805096, "loss": 3.2808706760406494, "step": 2438, "token_acc": 0.26932716153167163 }, { "epoch": 1.4297859865142186, "grad_norm": 0.5955044303907834, "learning_rate": 0.00029962162621974964, "loss": 3.2741129398345947, "step": 2439, "token_acc": 0.2702591623036649 }, { "epoch": 1.4303723248314277, "grad_norm": 0.5591415897897387, "learning_rate": 0.00029962059355349613, "loss": 3.2834272384643555, "step": 2440, "token_acc": 0.2697684864129831 }, { "epoch": 1.4309586631486368, "grad_norm": 0.5410184203513996, "learning_rate": 0.0002996195594817587, "loss": 3.269676923751831, "step": 2441, "token_acc": 0.26933012109167015 }, { "epoch": 1.4315450014658457, "grad_norm": 0.524093599477998, "learning_rate": 0.00029961852400454725, "loss": 3.237142324447632, "step": 2442, "token_acc": 0.2752851239349041 }, { "epoch": 1.4321313397830548, "grad_norm": 0.5862095138113799, "learning_rate": 0.0002996174871218713, "loss": 3.263597011566162, "step": 2443, "token_acc": 0.27275309835073225 }, { "epoch": 1.4327176781002637, "grad_norm": 0.5760703519676676, "learning_rate": 0.0002996164488337407, "loss": 3.2759804725646973, "step": 2444, "token_acc": 0.26915015393300123 }, { "epoch": 1.4333040164174728, "grad_norm": 0.5577065017705736, "learning_rate": 0.00029961540914016514, "loss": 3.264591932296753, "step": 2445, "token_acc": 0.27057818435130054 }, { "epoch": 1.433890354734682, "grad_norm": 0.5956832881235639, "learning_rate": 0.00029961436804115443, "loss": 3.2746620178222656, "step": 2446, "token_acc": 0.2715555018284022 }, { "epoch": 1.434476693051891, "grad_norm": 0.5882495532963262, "learning_rate": 0.00029961332553671836, "loss": 3.293856620788574, "step": 2447, "token_acc": 0.2683404549091803 }, { "epoch": 1.4350630313691, "grad_norm": 0.6746391294561705, "learning_rate": 0.0002996122816268667, "loss": 3.255826950073242, "step": 2448, "token_acc": 0.2724063393034722 }, { "epoch": 1.435649369686309, "grad_norm": 0.5670825996457249, "learning_rate": 0.00029961123631160925, "loss": 3.2983407974243164, "step": 2449, "token_acc": 0.26728366079646887 }, { "epoch": 1.436235708003518, "grad_norm": 0.48334039553728575, "learning_rate": 0.0002996101895909558, "loss": 3.231581926345825, "step": 2450, "token_acc": 0.278606310691341 }, { "epoch": 1.436822046320727, "grad_norm": 0.5454619521333512, "learning_rate": 0.0002996091414649163, "loss": 3.273036003112793, "step": 2451, "token_acc": 0.2712632334469765 }, { "epoch": 1.4374083846379362, "grad_norm": 0.49612619988177964, "learning_rate": 0.00029960809193350045, "loss": 3.209500312805176, "step": 2452, "token_acc": 0.27882888695334257 }, { "epoch": 1.437994722955145, "grad_norm": 0.4780020641798862, "learning_rate": 0.0002996070409967182, "loss": 3.240847110748291, "step": 2453, "token_acc": 0.2755924029691783 }, { "epoch": 1.4385810612723542, "grad_norm": 0.5562260160280741, "learning_rate": 0.00029960598865457936, "loss": 3.300473213195801, "step": 2454, "token_acc": 0.2660521690753455 }, { "epoch": 1.439167399589563, "grad_norm": 0.41455540352615194, "learning_rate": 0.00029960493490709393, "loss": 3.225064754486084, "step": 2455, "token_acc": 0.27859735068088615 }, { "epoch": 1.4397537379067722, "grad_norm": 0.49060360934727537, "learning_rate": 0.0002996038797542717, "loss": 3.2779016494750977, "step": 2456, "token_acc": 0.2693551582520789 }, { "epoch": 1.4403400762239813, "grad_norm": 0.48134117070452553, "learning_rate": 0.0002996028231961226, "loss": 3.2480998039245605, "step": 2457, "token_acc": 0.2726294566841121 }, { "epoch": 1.4409264145411902, "grad_norm": 0.4846791705278586, "learning_rate": 0.00029960176523265657, "loss": 3.3117456436157227, "step": 2458, "token_acc": 0.2671325050363942 }, { "epoch": 1.4415127528583993, "grad_norm": 0.4864709920787233, "learning_rate": 0.0002996007058638836, "loss": 3.265331745147705, "step": 2459, "token_acc": 0.2698780669107512 }, { "epoch": 1.4420990911756082, "grad_norm": 0.5242517564470773, "learning_rate": 0.0002995996450898135, "loss": 3.267516613006592, "step": 2460, "token_acc": 0.2705900488416999 }, { "epoch": 1.4426854294928173, "grad_norm": 0.5732991431130952, "learning_rate": 0.0002995985829104564, "loss": 3.2622485160827637, "step": 2461, "token_acc": 0.27178177676418025 }, { "epoch": 1.4432717678100264, "grad_norm": 0.6434024227946015, "learning_rate": 0.0002995975193258221, "loss": 3.246582508087158, "step": 2462, "token_acc": 0.2724080968976392 }, { "epoch": 1.4438581061272355, "grad_norm": 0.6597641503705125, "learning_rate": 0.0002995964543359208, "loss": 3.2463173866271973, "step": 2463, "token_acc": 0.2752161421231569 }, { "epoch": 1.4444444444444444, "grad_norm": 0.5226985575894212, "learning_rate": 0.00029959538794076235, "loss": 3.26845645904541, "step": 2464, "token_acc": 0.2713407559059991 }, { "epoch": 1.4450307827616535, "grad_norm": 0.556848476299921, "learning_rate": 0.0002995943201403568, "loss": 3.30072021484375, "step": 2465, "token_acc": 0.26717867732532596 }, { "epoch": 1.4456171210788624, "grad_norm": 0.6705222556786472, "learning_rate": 0.00029959325093471416, "loss": 3.3088722229003906, "step": 2466, "token_acc": 0.26592082616179 }, { "epoch": 1.4462034593960715, "grad_norm": 0.6075013009060733, "learning_rate": 0.00029959218032384456, "loss": 3.2549691200256348, "step": 2467, "token_acc": 0.2737410320176459 }, { "epoch": 1.4467897977132806, "grad_norm": 0.5750620115404984, "learning_rate": 0.00029959110830775804, "loss": 3.267874002456665, "step": 2468, "token_acc": 0.27124176950102064 }, { "epoch": 1.4473761360304895, "grad_norm": 0.52606530692236, "learning_rate": 0.0002995900348864646, "loss": 3.3183746337890625, "step": 2469, "token_acc": 0.2662360618263493 }, { "epoch": 1.4479624743476986, "grad_norm": 0.6000248817758309, "learning_rate": 0.00029958896005997433, "loss": 3.267632484436035, "step": 2470, "token_acc": 0.2703505277730602 }, { "epoch": 1.4485488126649075, "grad_norm": 0.5913805316104338, "learning_rate": 0.00029958788382829736, "loss": 3.2394769191741943, "step": 2471, "token_acc": 0.27615980468536777 }, { "epoch": 1.4491351509821166, "grad_norm": 0.5939759552617605, "learning_rate": 0.0002995868061914438, "loss": 3.3121910095214844, "step": 2472, "token_acc": 0.266173446718813 }, { "epoch": 1.4497214892993258, "grad_norm": 0.5798415072155725, "learning_rate": 0.0002995857271494238, "loss": 3.2216055393218994, "step": 2473, "token_acc": 0.2757765561722712 }, { "epoch": 1.4503078276165349, "grad_norm": 0.5187684984997788, "learning_rate": 0.0002995846467022474, "loss": 3.2719712257385254, "step": 2474, "token_acc": 0.2699009506975831 }, { "epoch": 1.4508941659337438, "grad_norm": 0.5129690742529915, "learning_rate": 0.0002995835648499249, "loss": 3.314084053039551, "step": 2475, "token_acc": 0.26405700872426946 }, { "epoch": 1.4514805042509529, "grad_norm": 0.5269854602482452, "learning_rate": 0.00029958248159246627, "loss": 3.3296351432800293, "step": 2476, "token_acc": 0.26090290648858433 }, { "epoch": 1.4520668425681618, "grad_norm": 0.3886974435375772, "learning_rate": 0.00029958139692988186, "loss": 3.2300639152526855, "step": 2477, "token_acc": 0.27662006091775054 }, { "epoch": 1.4526531808853709, "grad_norm": 0.43471057332652185, "learning_rate": 0.00029958031086218173, "loss": 3.313685894012451, "step": 2478, "token_acc": 0.26468260757977585 }, { "epoch": 1.45323951920258, "grad_norm": 0.46782072655399637, "learning_rate": 0.00029957922338937624, "loss": 3.2337112426757812, "step": 2479, "token_acc": 0.2764421660479642 }, { "epoch": 1.4538258575197889, "grad_norm": 0.47160646249434063, "learning_rate": 0.0002995781345114754, "loss": 3.2678468227386475, "step": 2480, "token_acc": 0.2707927508459634 }, { "epoch": 1.454412195836998, "grad_norm": 0.5304232935220056, "learning_rate": 0.0002995770442284896, "loss": 3.2588658332824707, "step": 2481, "token_acc": 0.27340846374519817 }, { "epoch": 1.4549985341542069, "grad_norm": 0.4583550574197023, "learning_rate": 0.0002995759525404291, "loss": 3.225100040435791, "step": 2482, "token_acc": 0.2775747265453676 }, { "epoch": 1.455584872471416, "grad_norm": 0.4530442909995885, "learning_rate": 0.00029957485944730395, "loss": 3.2562572956085205, "step": 2483, "token_acc": 0.2740214823612326 }, { "epoch": 1.456171210788625, "grad_norm": 0.5157824579350037, "learning_rate": 0.00029957376494912463, "loss": 3.2559168338775635, "step": 2484, "token_acc": 0.2738537465506262 }, { "epoch": 1.456757549105834, "grad_norm": 0.48449141848147914, "learning_rate": 0.00029957266904590127, "loss": 3.2081894874572754, "step": 2485, "token_acc": 0.2790809295988357 }, { "epoch": 1.457343887423043, "grad_norm": 0.4741869173861958, "learning_rate": 0.0002995715717376443, "loss": 3.2100329399108887, "step": 2486, "token_acc": 0.27951631442792496 }, { "epoch": 1.457930225740252, "grad_norm": 0.5442997792728896, "learning_rate": 0.0002995704730243639, "loss": 3.2460222244262695, "step": 2487, "token_acc": 0.27498667292422824 }, { "epoch": 1.458516564057461, "grad_norm": 0.4962109401276086, "learning_rate": 0.0002995693729060705, "loss": 3.248276710510254, "step": 2488, "token_acc": 0.2753291479053252 }, { "epoch": 1.4591029023746702, "grad_norm": 0.5220143437471754, "learning_rate": 0.00029956827138277444, "loss": 3.2501816749572754, "step": 2489, "token_acc": 0.2723430666340558 }, { "epoch": 1.4596892406918793, "grad_norm": 0.5453953855106521, "learning_rate": 0.00029956716845448597, "loss": 3.215519905090332, "step": 2490, "token_acc": 0.2766432557093625 }, { "epoch": 1.4602755790090882, "grad_norm": 0.5130281087212133, "learning_rate": 0.00029956606412121547, "loss": 3.227383613586426, "step": 2491, "token_acc": 0.2762241683403509 }, { "epoch": 1.4608619173262973, "grad_norm": 0.5057125488916887, "learning_rate": 0.00029956495838297334, "loss": 3.2652158737182617, "step": 2492, "token_acc": 0.27056826503336723 }, { "epoch": 1.4614482556435062, "grad_norm": 0.5762047330434874, "learning_rate": 0.00029956385123977, "loss": 3.2863574028015137, "step": 2493, "token_acc": 0.2694660741562191 }, { "epoch": 1.4620345939607153, "grad_norm": 0.49294866725919084, "learning_rate": 0.00029956274269161585, "loss": 3.2374112606048584, "step": 2494, "token_acc": 0.2745273269853321 }, { "epoch": 1.4626209322779244, "grad_norm": 0.44041006703346686, "learning_rate": 0.0002995616327385212, "loss": 3.2690324783325195, "step": 2495, "token_acc": 0.27038789471204927 }, { "epoch": 1.4632072705951333, "grad_norm": 0.44474094645923584, "learning_rate": 0.00029956052138049654, "loss": 3.2864012718200684, "step": 2496, "token_acc": 0.26769168636364815 }, { "epoch": 1.4637936089123424, "grad_norm": 0.4583071001481383, "learning_rate": 0.00029955940861755236, "loss": 3.2882251739501953, "step": 2497, "token_acc": 0.26651701048883386 }, { "epoch": 1.4643799472295513, "grad_norm": 0.5544376954959978, "learning_rate": 0.0002995582944496991, "loss": 3.298161745071411, "step": 2498, "token_acc": 0.267579183259073 }, { "epoch": 1.4649662855467604, "grad_norm": 0.6396720187667903, "learning_rate": 0.0002995571788769471, "loss": 3.286190986633301, "step": 2499, "token_acc": 0.2688598415982279 }, { "epoch": 1.4655526238639696, "grad_norm": 0.7066212189495559, "learning_rate": 0.000299556061899307, "loss": 3.3011059761047363, "step": 2500, "token_acc": 0.2670835573413316 }, { "epoch": 1.4661389621811787, "grad_norm": 0.6975891701218879, "learning_rate": 0.0002995549435167893, "loss": 3.2530603408813477, "step": 2501, "token_acc": 0.2746035058430718 }, { "epoch": 1.4667253004983876, "grad_norm": 0.5440023876639446, "learning_rate": 0.0002995538237294043, "loss": 3.218017578125, "step": 2502, "token_acc": 0.2800704732415317 }, { "epoch": 1.4673116388155967, "grad_norm": 0.42733706521677034, "learning_rate": 0.0002995527025371627, "loss": 3.296980619430542, "step": 2503, "token_acc": 0.2661848333118229 }, { "epoch": 1.4678979771328056, "grad_norm": 0.5775984262825813, "learning_rate": 0.00029955157994007497, "loss": 3.2467880249023438, "step": 2504, "token_acc": 0.2720436499181045 }, { "epoch": 1.4684843154500147, "grad_norm": 0.607685630956546, "learning_rate": 0.0002995504559381517, "loss": 3.257120370864868, "step": 2505, "token_acc": 0.27233091228061096 }, { "epoch": 1.4690706537672238, "grad_norm": 0.5537875300196272, "learning_rate": 0.00029954933053140344, "loss": 3.267056465148926, "step": 2506, "token_acc": 0.27111471013819377 }, { "epoch": 1.4696569920844327, "grad_norm": 0.6099697965632727, "learning_rate": 0.00029954820371984065, "loss": 3.2631402015686035, "step": 2507, "token_acc": 0.27125433870658633 }, { "epoch": 1.4702433304016418, "grad_norm": 0.5226995278710362, "learning_rate": 0.00029954707550347413, "loss": 3.2801647186279297, "step": 2508, "token_acc": 0.2701979993757968 }, { "epoch": 1.4708296687188507, "grad_norm": 0.5569274326717343, "learning_rate": 0.0002995459458823143, "loss": 3.2543892860412598, "step": 2509, "token_acc": 0.27370963672742443 }, { "epoch": 1.4714160070360598, "grad_norm": 0.5216950567724785, "learning_rate": 0.0002995448148563718, "loss": 3.2383432388305664, "step": 2510, "token_acc": 0.27730205157462934 }, { "epoch": 1.472002345353269, "grad_norm": 0.49669954943848504, "learning_rate": 0.00029954368242565726, "loss": 3.2582168579101562, "step": 2511, "token_acc": 0.2735029777471202 }, { "epoch": 1.4725886836704778, "grad_norm": 0.5435656498050259, "learning_rate": 0.0002995425485901814, "loss": 3.3101158142089844, "step": 2512, "token_acc": 0.26606064979716393 }, { "epoch": 1.473175021987687, "grad_norm": 0.4658556567794753, "learning_rate": 0.00029954141334995475, "loss": 3.258605480194092, "step": 2513, "token_acc": 0.27394187380894275 }, { "epoch": 1.4737613603048958, "grad_norm": 0.4794428128559778, "learning_rate": 0.000299540276704988, "loss": 3.2497177124023438, "step": 2514, "token_acc": 0.27413987613493496 }, { "epoch": 1.474347698622105, "grad_norm": 0.3791618840094815, "learning_rate": 0.0002995391386552919, "loss": 3.2589731216430664, "step": 2515, "token_acc": 0.2722100528946372 }, { "epoch": 1.474934036939314, "grad_norm": 0.48772423027768735, "learning_rate": 0.00029953799920087715, "loss": 3.268817663192749, "step": 2516, "token_acc": 0.2708611584304379 }, { "epoch": 1.4755203752565231, "grad_norm": 0.508323440313134, "learning_rate": 0.0002995368583417544, "loss": 3.2834722995758057, "step": 2517, "token_acc": 0.2681912789286397 }, { "epoch": 1.476106713573732, "grad_norm": 0.5191147168077236, "learning_rate": 0.00029953571607793433, "loss": 3.2675676345825195, "step": 2518, "token_acc": 0.271583866244889 }, { "epoch": 1.4766930518909411, "grad_norm": 0.45053672047382126, "learning_rate": 0.0002995345724094277, "loss": 3.214259386062622, "step": 2519, "token_acc": 0.27819643693603263 }, { "epoch": 1.47727939020815, "grad_norm": 0.6000153775055845, "learning_rate": 0.0002995334273362452, "loss": 3.277830123901367, "step": 2520, "token_acc": 0.26850099042119374 }, { "epoch": 1.4778657285253591, "grad_norm": 0.6966706102589892, "learning_rate": 0.00029953228085839777, "loss": 3.2829794883728027, "step": 2521, "token_acc": 0.26748123373438953 }, { "epoch": 1.4784520668425682, "grad_norm": 0.5717307347648334, "learning_rate": 0.00029953113297589604, "loss": 3.198235034942627, "step": 2522, "token_acc": 0.27981393167070595 }, { "epoch": 1.4790384051597771, "grad_norm": 0.5715470010392587, "learning_rate": 0.0002995299836887507, "loss": 3.3156356811523438, "step": 2523, "token_acc": 0.26235408896485907 }, { "epoch": 1.4796247434769862, "grad_norm": 0.5552363518654317, "learning_rate": 0.0002995288329969728, "loss": 3.2741451263427734, "step": 2524, "token_acc": 0.2698504146005226 }, { "epoch": 1.4802110817941951, "grad_norm": 0.4539637922710809, "learning_rate": 0.0002995276809005729, "loss": 3.2360451221466064, "step": 2525, "token_acc": 0.27472926261265496 }, { "epoch": 1.4807974201114043, "grad_norm": 0.5802624707121228, "learning_rate": 0.000299526527399562, "loss": 3.282024383544922, "step": 2526, "token_acc": 0.2711384485135072 }, { "epoch": 1.4813837584286134, "grad_norm": 0.4483308268578799, "learning_rate": 0.00029952537249395086, "loss": 3.2770121097564697, "step": 2527, "token_acc": 0.26936493499545233 }, { "epoch": 1.4819700967458225, "grad_norm": 0.49547409787206204, "learning_rate": 0.00029952421618375033, "loss": 3.2745680809020996, "step": 2528, "token_acc": 0.26975465639049456 }, { "epoch": 1.4825564350630314, "grad_norm": 0.5260362243889769, "learning_rate": 0.00029952305846897125, "loss": 3.2819323539733887, "step": 2529, "token_acc": 0.2681352753574125 }, { "epoch": 1.4831427733802405, "grad_norm": 0.4988861512922218, "learning_rate": 0.0002995218993496245, "loss": 3.2443206310272217, "step": 2530, "token_acc": 0.2739736906149531 }, { "epoch": 1.4837291116974494, "grad_norm": 0.4954154624170425, "learning_rate": 0.00029952073882572104, "loss": 3.2632195949554443, "step": 2531, "token_acc": 0.2720941903107508 }, { "epoch": 1.4843154500146585, "grad_norm": 0.4384017291872042, "learning_rate": 0.0002995195768972717, "loss": 3.2593283653259277, "step": 2532, "token_acc": 0.2712260456708239 }, { "epoch": 1.4849017883318676, "grad_norm": 0.5259080653794915, "learning_rate": 0.00029951841356428744, "loss": 3.2523694038391113, "step": 2533, "token_acc": 0.27217687126680334 }, { "epoch": 1.4854881266490765, "grad_norm": 0.5265600721444553, "learning_rate": 0.0002995172488267791, "loss": 3.270918369293213, "step": 2534, "token_acc": 0.2706597149881245 }, { "epoch": 1.4860744649662856, "grad_norm": 0.4707641302751526, "learning_rate": 0.00029951608268475775, "loss": 3.2479538917541504, "step": 2535, "token_acc": 0.27229970352884797 }, { "epoch": 1.4866608032834945, "grad_norm": 0.44607243468239133, "learning_rate": 0.00029951491513823425, "loss": 3.277556896209717, "step": 2536, "token_acc": 0.26924773143384073 }, { "epoch": 1.4872471416007036, "grad_norm": 0.4228502518496769, "learning_rate": 0.0002995137461872196, "loss": 3.2539877891540527, "step": 2537, "token_acc": 0.2751127456955257 }, { "epoch": 1.4878334799179127, "grad_norm": 0.49795194784284313, "learning_rate": 0.00029951257583172474, "loss": 3.2471823692321777, "step": 2538, "token_acc": 0.2748196531822004 }, { "epoch": 1.4884198182351216, "grad_norm": 0.4719773575178683, "learning_rate": 0.0002995114040717608, "loss": 3.24350905418396, "step": 2539, "token_acc": 0.2763765569563413 }, { "epoch": 1.4890061565523307, "grad_norm": 0.45785338985826807, "learning_rate": 0.00029951023090733856, "loss": 3.3011040687561035, "step": 2540, "token_acc": 0.2656882657065162 }, { "epoch": 1.4895924948695396, "grad_norm": 0.4834041683333375, "learning_rate": 0.00029950905633846926, "loss": 3.2872345447540283, "step": 2541, "token_acc": 0.26841136074257543 }, { "epoch": 1.4901788331867487, "grad_norm": 0.4940169380604645, "learning_rate": 0.00029950788036516376, "loss": 3.238436698913574, "step": 2542, "token_acc": 0.2766128439210413 }, { "epoch": 1.4907651715039578, "grad_norm": 0.4058187144725442, "learning_rate": 0.00029950670298743324, "loss": 3.281667470932007, "step": 2543, "token_acc": 0.2691628245446668 }, { "epoch": 1.491351509821167, "grad_norm": 0.49064419683017546, "learning_rate": 0.00029950552420528875, "loss": 3.2514026165008545, "step": 2544, "token_acc": 0.2731924800712513 }, { "epoch": 1.4919378481383758, "grad_norm": 0.4368608617111464, "learning_rate": 0.0002995043440187413, "loss": 3.2364449501037598, "step": 2545, "token_acc": 0.27491803722871 }, { "epoch": 1.492524186455585, "grad_norm": 0.4860734974790874, "learning_rate": 0.00029950316242780196, "loss": 3.2919654846191406, "step": 2546, "token_acc": 0.26726588918143573 }, { "epoch": 1.4931105247727938, "grad_norm": 0.5389055302919861, "learning_rate": 0.00029950197943248187, "loss": 3.270216941833496, "step": 2547, "token_acc": 0.26860392407339323 }, { "epoch": 1.493696863090003, "grad_norm": 0.604460852909286, "learning_rate": 0.00029950079503279217, "loss": 3.2792298793792725, "step": 2548, "token_acc": 0.2682338042660968 }, { "epoch": 1.494283201407212, "grad_norm": 0.6795394845777744, "learning_rate": 0.00029949960922874395, "loss": 3.2567477226257324, "step": 2549, "token_acc": 0.27264136418897755 }, { "epoch": 1.494869539724421, "grad_norm": 0.7381154273102846, "learning_rate": 0.00029949842202034834, "loss": 3.2532291412353516, "step": 2550, "token_acc": 0.268973641211415 }, { "epoch": 1.49545587804163, "grad_norm": 0.6467591076161764, "learning_rate": 0.0002994972334076165, "loss": 3.2274885177612305, "step": 2551, "token_acc": 0.2757874817666925 }, { "epoch": 1.496042216358839, "grad_norm": 0.5478815680891119, "learning_rate": 0.0002994960433905596, "loss": 3.291308641433716, "step": 2552, "token_acc": 0.26750355269814396 }, { "epoch": 1.496628554676048, "grad_norm": 0.5303150284439865, "learning_rate": 0.00029949485196918886, "loss": 3.228041172027588, "step": 2553, "token_acc": 0.27546946845338643 }, { "epoch": 1.4972148929932572, "grad_norm": 0.6857198423433041, "learning_rate": 0.00029949365914351544, "loss": 3.227492570877075, "step": 2554, "token_acc": 0.2766444544376763 }, { "epoch": 1.4978012313104663, "grad_norm": 0.5785615169596057, "learning_rate": 0.0002994924649135504, "loss": 3.2660446166992188, "step": 2555, "token_acc": 0.2707877834578582 }, { "epoch": 1.4983875696276752, "grad_norm": 0.5484608991133217, "learning_rate": 0.00029949126927930527, "loss": 3.2557449340820312, "step": 2556, "token_acc": 0.2738700768334664 }, { "epoch": 1.4989739079448843, "grad_norm": 0.6157931503054472, "learning_rate": 0.00029949007224079106, "loss": 3.308929443359375, "step": 2557, "token_acc": 0.2669024173319344 }, { "epoch": 1.4995602462620932, "grad_norm": 0.5611333465156132, "learning_rate": 0.000299488873798019, "loss": 3.266162157058716, "step": 2558, "token_acc": 0.27174382703925715 }, { "epoch": 1.5001465845793023, "grad_norm": 0.5025520046643134, "learning_rate": 0.00029948767395100045, "loss": 3.2401084899902344, "step": 2559, "token_acc": 0.27444810720703383 }, { "epoch": 1.5007329228965114, "grad_norm": 0.5886691227972639, "learning_rate": 0.0002994864726997466, "loss": 3.246023654937744, "step": 2560, "token_acc": 0.27449062796746626 }, { "epoch": 1.5013192612137203, "grad_norm": 0.4680933992063443, "learning_rate": 0.0002994852700442689, "loss": 3.238621711730957, "step": 2561, "token_acc": 0.27515372696794815 }, { "epoch": 1.5019055995309294, "grad_norm": 0.5113795499723659, "learning_rate": 0.0002994840659845784, "loss": 3.309816360473633, "step": 2562, "token_acc": 0.26539564393491394 }, { "epoch": 1.5024919378481383, "grad_norm": 0.5286890906881446, "learning_rate": 0.00029948286052068656, "loss": 3.233323574066162, "step": 2563, "token_acc": 0.2752145175354863 }, { "epoch": 1.5030782761653474, "grad_norm": 0.4918747975834918, "learning_rate": 0.0002994816536526047, "loss": 3.2589094638824463, "step": 2564, "token_acc": 0.27382327151618374 }, { "epoch": 1.5036646144825565, "grad_norm": 0.5063870917674708, "learning_rate": 0.0002994804453803441, "loss": 3.249185085296631, "step": 2565, "token_acc": 0.2723194007251863 }, { "epoch": 1.5042509527997656, "grad_norm": 0.5342237900882338, "learning_rate": 0.00029947923570391614, "loss": 3.2482357025146484, "step": 2566, "token_acc": 0.2719251910233412 }, { "epoch": 1.5048372911169745, "grad_norm": 0.4536384078553172, "learning_rate": 0.00029947802462333223, "loss": 3.2479400634765625, "step": 2567, "token_acc": 0.2740501510749105 }, { "epoch": 1.5054236294341834, "grad_norm": 0.440432686457559, "learning_rate": 0.00029947681213860367, "loss": 3.255180835723877, "step": 2568, "token_acc": 0.2716750321588014 }, { "epoch": 1.5060099677513925, "grad_norm": 0.45371671423950166, "learning_rate": 0.0002994755982497419, "loss": 3.273400068283081, "step": 2569, "token_acc": 0.26875115146729833 }, { "epoch": 1.5065963060686016, "grad_norm": 0.5039327713370054, "learning_rate": 0.0002994743829567583, "loss": 3.2287909984588623, "step": 2570, "token_acc": 0.2754379212781892 }, { "epoch": 1.5071826443858107, "grad_norm": 0.446406781702838, "learning_rate": 0.00029947316625966426, "loss": 3.2684402465820312, "step": 2571, "token_acc": 0.2712274565912423 }, { "epoch": 1.5077689827030196, "grad_norm": 0.5615606327568824, "learning_rate": 0.00029947194815847127, "loss": 3.2657227516174316, "step": 2572, "token_acc": 0.2721600185111388 }, { "epoch": 1.5083553210202285, "grad_norm": 0.5297100396298892, "learning_rate": 0.00029947072865319077, "loss": 3.244450092315674, "step": 2573, "token_acc": 0.2752240192537881 }, { "epoch": 1.5089416593374376, "grad_norm": 0.4599382675637798, "learning_rate": 0.00029946950774383413, "loss": 3.282733917236328, "step": 2574, "token_acc": 0.26875566925201544 }, { "epoch": 1.5095279976546467, "grad_norm": 0.43347404231698083, "learning_rate": 0.0002994682854304129, "loss": 3.2786543369293213, "step": 2575, "token_acc": 0.2705432246026227 }, { "epoch": 1.5101143359718558, "grad_norm": 0.488587773862481, "learning_rate": 0.00029946706171293856, "loss": 3.239985466003418, "step": 2576, "token_acc": 0.27749877542602286 }, { "epoch": 1.5107006742890647, "grad_norm": 0.4545033777558242, "learning_rate": 0.0002994658365914226, "loss": 3.250013828277588, "step": 2577, "token_acc": 0.27398447090441574 }, { "epoch": 1.5112870126062738, "grad_norm": 0.391870837510196, "learning_rate": 0.0002994646100658765, "loss": 3.305877447128296, "step": 2578, "token_acc": 0.2647496858888263 }, { "epoch": 1.5118733509234827, "grad_norm": 0.4191626082476612, "learning_rate": 0.00029946338213631177, "loss": 3.2644736766815186, "step": 2579, "token_acc": 0.2711852438823465 }, { "epoch": 1.5124596892406919, "grad_norm": 0.42134967997244777, "learning_rate": 0.00029946215280274, "loss": 3.2502541542053223, "step": 2580, "token_acc": 0.27193422706976217 }, { "epoch": 1.513046027557901, "grad_norm": 0.47330365609355307, "learning_rate": 0.0002994609220651726, "loss": 3.3019003868103027, "step": 2581, "token_acc": 0.26723835582697253 }, { "epoch": 1.51363236587511, "grad_norm": 0.5174094776256812, "learning_rate": 0.00029945968992362135, "loss": 3.304482936859131, "step": 2582, "token_acc": 0.26464804916643586 }, { "epoch": 1.514218704192319, "grad_norm": 0.5783972425432596, "learning_rate": 0.0002994584563780977, "loss": 3.2893614768981934, "step": 2583, "token_acc": 0.268629930680548 }, { "epoch": 1.5148050425095279, "grad_norm": 0.5168786804593303, "learning_rate": 0.00029945722142861323, "loss": 3.2524969577789307, "step": 2584, "token_acc": 0.27404341412905603 }, { "epoch": 1.515391380826737, "grad_norm": 0.4719959664394992, "learning_rate": 0.0002994559850751796, "loss": 3.265166997909546, "step": 2585, "token_acc": 0.272577012544325 }, { "epoch": 1.515977719143946, "grad_norm": 0.5017331559458807, "learning_rate": 0.00029945474731780827, "loss": 3.2844796180725098, "step": 2586, "token_acc": 0.26557221925600133 }, { "epoch": 1.5165640574611552, "grad_norm": 0.46152632999954957, "learning_rate": 0.0002994535081565111, "loss": 3.2420570850372314, "step": 2587, "token_acc": 0.27436457048353646 }, { "epoch": 1.517150395778364, "grad_norm": 0.5222176614826465, "learning_rate": 0.00029945226759129956, "loss": 3.255290985107422, "step": 2588, "token_acc": 0.27016946108801637 }, { "epoch": 1.5177367340955732, "grad_norm": 0.49994409525667705, "learning_rate": 0.0002994510256221854, "loss": 3.2530574798583984, "step": 2589, "token_acc": 0.2727175178947644 }, { "epoch": 1.518323072412782, "grad_norm": 0.4409735661011459, "learning_rate": 0.00029944978224918017, "loss": 3.2479567527770996, "step": 2590, "token_acc": 0.2735926242682123 }, { "epoch": 1.5189094107299912, "grad_norm": 0.46460038040727386, "learning_rate": 0.0002994485374722957, "loss": 3.2456612586975098, "step": 2591, "token_acc": 0.2726161522610505 }, { "epoch": 1.5194957490472003, "grad_norm": 0.41028938097018913, "learning_rate": 0.00029944729129154356, "loss": 3.2668919563293457, "step": 2592, "token_acc": 0.27042461135299656 }, { "epoch": 1.5200820873644094, "grad_norm": 0.5114396729619312, "learning_rate": 0.00029944604370693556, "loss": 3.2745766639709473, "step": 2593, "token_acc": 0.27089257463110983 }, { "epoch": 1.5206684256816183, "grad_norm": 0.48592698234181303, "learning_rate": 0.0002994447947184833, "loss": 3.260713815689087, "step": 2594, "token_acc": 0.2726756232001147 }, { "epoch": 1.5212547639988272, "grad_norm": 0.44329972163254655, "learning_rate": 0.0002994435443261986, "loss": 3.282197952270508, "step": 2595, "token_acc": 0.27206243577044925 }, { "epoch": 1.5218411023160363, "grad_norm": 0.41586322560510586, "learning_rate": 0.0002994422925300931, "loss": 3.2038583755493164, "step": 2596, "token_acc": 0.2789060609726591 }, { "epoch": 1.5224274406332454, "grad_norm": 0.5548817611130542, "learning_rate": 0.00029944103933017877, "loss": 3.2753467559814453, "step": 2597, "token_acc": 0.2711772745861943 }, { "epoch": 1.5230137789504545, "grad_norm": 0.5008536769083188, "learning_rate": 0.00029943978472646716, "loss": 3.2493529319763184, "step": 2598, "token_acc": 0.27152359422006217 }, { "epoch": 1.5236001172676634, "grad_norm": 0.5939104116254532, "learning_rate": 0.00029943852871897015, "loss": 3.245530843734741, "step": 2599, "token_acc": 0.2729049298462572 }, { "epoch": 1.5241864555848723, "grad_norm": 0.6654278794585069, "learning_rate": 0.0002994372713076995, "loss": 3.2976255416870117, "step": 2600, "token_acc": 0.26559652839716935 }, { "epoch": 1.5247727939020814, "grad_norm": 0.5722669192820382, "learning_rate": 0.0002994360124926672, "loss": 3.202688694000244, "step": 2601, "token_acc": 0.2794341849182823 }, { "epoch": 1.5253591322192905, "grad_norm": 0.4347782463005387, "learning_rate": 0.0002994347522738848, "loss": 3.2058236598968506, "step": 2602, "token_acc": 0.2794052739756437 }, { "epoch": 1.5259454705364996, "grad_norm": 0.4843206431020174, "learning_rate": 0.0002994334906513643, "loss": 3.234999895095825, "step": 2603, "token_acc": 0.27464556109033733 }, { "epoch": 1.5265318088537085, "grad_norm": 0.46618686937105536, "learning_rate": 0.0002994322276251175, "loss": 3.247464656829834, "step": 2604, "token_acc": 0.27201700205884305 }, { "epoch": 1.5271181471709177, "grad_norm": 0.49863968721517776, "learning_rate": 0.00029943096319515634, "loss": 3.25022292137146, "step": 2605, "token_acc": 0.2744658458134877 }, { "epoch": 1.5277044854881265, "grad_norm": 0.42156666304996887, "learning_rate": 0.0002994296973614926, "loss": 3.2403554916381836, "step": 2606, "token_acc": 0.27340907657041913 }, { "epoch": 1.5282908238053357, "grad_norm": 0.4976139037400748, "learning_rate": 0.0002994284301241382, "loss": 3.2825558185577393, "step": 2607, "token_acc": 0.2681418460954357 }, { "epoch": 1.5288771621225448, "grad_norm": 0.5069534446103522, "learning_rate": 0.0002994271614831051, "loss": 3.2484450340270996, "step": 2608, "token_acc": 0.27271462623160453 }, { "epoch": 1.5294635004397539, "grad_norm": 0.5699437655237528, "learning_rate": 0.0002994258914384051, "loss": 3.2138636112213135, "step": 2609, "token_acc": 0.2766587980420409 }, { "epoch": 1.5300498387569628, "grad_norm": 0.45998378336695894, "learning_rate": 0.0002994246199900503, "loss": 3.2879927158355713, "step": 2610, "token_acc": 0.2679223544928722 }, { "epoch": 1.5306361770741717, "grad_norm": 0.6154288515773049, "learning_rate": 0.0002994233471380525, "loss": 3.2672739028930664, "step": 2611, "token_acc": 0.27183243794909523 }, { "epoch": 1.5312225153913808, "grad_norm": 0.56610889987781, "learning_rate": 0.00029942207288242366, "loss": 3.26328182220459, "step": 2612, "token_acc": 0.27076172494849576 }, { "epoch": 1.5318088537085899, "grad_norm": 0.5501456404692603, "learning_rate": 0.0002994207972231759, "loss": 3.206486701965332, "step": 2613, "token_acc": 0.27696600736541566 }, { "epoch": 1.532395192025799, "grad_norm": 0.517087751685438, "learning_rate": 0.000299419520160321, "loss": 3.2929186820983887, "step": 2614, "token_acc": 0.2689949875132426 }, { "epoch": 1.5329815303430079, "grad_norm": 0.5075746750818896, "learning_rate": 0.0002994182416938711, "loss": 3.207481861114502, "step": 2615, "token_acc": 0.27807730634079825 }, { "epoch": 1.533567868660217, "grad_norm": 0.4878958821266842, "learning_rate": 0.0002994169618238382, "loss": 3.2330331802368164, "step": 2616, "token_acc": 0.27519103656947774 }, { "epoch": 1.5341542069774259, "grad_norm": 0.4831997251494441, "learning_rate": 0.00029941568055023415, "loss": 3.2875823974609375, "step": 2617, "token_acc": 0.2694377162413971 }, { "epoch": 1.534740545294635, "grad_norm": 0.5153014499430449, "learning_rate": 0.00029941439787307126, "loss": 3.2962586879730225, "step": 2618, "token_acc": 0.26776753712237583 }, { "epoch": 1.535326883611844, "grad_norm": 0.5186886324905743, "learning_rate": 0.0002994131137923614, "loss": 3.2489466667175293, "step": 2619, "token_acc": 0.27368841690673507 }, { "epoch": 1.5359132219290532, "grad_norm": 0.48159006395388476, "learning_rate": 0.0002994118283081166, "loss": 3.233454704284668, "step": 2620, "token_acc": 0.27397085893084916 }, { "epoch": 1.536499560246262, "grad_norm": 0.4378062199857777, "learning_rate": 0.0002994105414203491, "loss": 3.257981061935425, "step": 2621, "token_acc": 0.27231493367565257 }, { "epoch": 1.537085898563471, "grad_norm": 0.4942286525519708, "learning_rate": 0.00029940925312907086, "loss": 3.2460618019104004, "step": 2622, "token_acc": 0.27283662691695704 }, { "epoch": 1.53767223688068, "grad_norm": 0.5398683189916152, "learning_rate": 0.00029940796343429406, "loss": 3.278038740158081, "step": 2623, "token_acc": 0.2706649588910444 }, { "epoch": 1.5382585751978892, "grad_norm": 0.5380032709716642, "learning_rate": 0.0002994066723360307, "loss": 3.3141283988952637, "step": 2624, "token_acc": 0.2665029070840076 }, { "epoch": 1.5388449135150983, "grad_norm": 0.4758418024750476, "learning_rate": 0.00029940537983429307, "loss": 3.2510204315185547, "step": 2625, "token_acc": 0.2709560046683319 }, { "epoch": 1.5394312518323072, "grad_norm": 0.602552575717143, "learning_rate": 0.0002994040859290932, "loss": 3.279886484146118, "step": 2626, "token_acc": 0.2686613327256005 }, { "epoch": 1.5400175901495161, "grad_norm": 0.6005688332614376, "learning_rate": 0.0002994027906204432, "loss": 3.2397189140319824, "step": 2627, "token_acc": 0.2728930623339717 }, { "epoch": 1.5406039284667252, "grad_norm": 0.5543763550345601, "learning_rate": 0.0002994014939083553, "loss": 3.2142765522003174, "step": 2628, "token_acc": 0.27727140988171306 }, { "epoch": 1.5411902667839343, "grad_norm": 0.5186816863153937, "learning_rate": 0.0002994001957928418, "loss": 3.245211601257324, "step": 2629, "token_acc": 0.2738477770850319 }, { "epoch": 1.5417766051011434, "grad_norm": 0.5585858915856783, "learning_rate": 0.00029939889627391466, "loss": 3.2058024406433105, "step": 2630, "token_acc": 0.27804180291187236 }, { "epoch": 1.5423629434183523, "grad_norm": 0.5168940902518459, "learning_rate": 0.0002993975953515863, "loss": 3.2340588569641113, "step": 2631, "token_acc": 0.2759878002448394 }, { "epoch": 1.5429492817355615, "grad_norm": 0.508829846026294, "learning_rate": 0.00029939629302586877, "loss": 3.201805353164673, "step": 2632, "token_acc": 0.28036247465123154 }, { "epoch": 1.5435356200527703, "grad_norm": 0.5624196940686363, "learning_rate": 0.0002993949892967744, "loss": 3.2233927249908447, "step": 2633, "token_acc": 0.2764161731782088 }, { "epoch": 1.5441219583699795, "grad_norm": 0.5209484581224846, "learning_rate": 0.00029939368416431544, "loss": 3.266388416290283, "step": 2634, "token_acc": 0.2720329470497581 }, { "epoch": 1.5447082966871886, "grad_norm": 0.5955666412975484, "learning_rate": 0.00029939237762850415, "loss": 3.2704360485076904, "step": 2635, "token_acc": 0.2716294378773017 }, { "epoch": 1.5452946350043977, "grad_norm": 0.5188519140174653, "learning_rate": 0.00029939106968935274, "loss": 3.205293655395508, "step": 2636, "token_acc": 0.2799473088335506 }, { "epoch": 1.5458809733216066, "grad_norm": 0.4332729019256432, "learning_rate": 0.0002993897603468736, "loss": 3.257351875305176, "step": 2637, "token_acc": 0.2719226067026084 }, { "epoch": 1.5464673116388155, "grad_norm": 0.5313701918927636, "learning_rate": 0.00029938844960107885, "loss": 3.2809062004089355, "step": 2638, "token_acc": 0.2687805468310211 }, { "epoch": 1.5470536499560246, "grad_norm": 0.5217645341071558, "learning_rate": 0.00029938713745198103, "loss": 3.228593349456787, "step": 2639, "token_acc": 0.27617740058655116 }, { "epoch": 1.5476399882732337, "grad_norm": 0.49795970880218154, "learning_rate": 0.0002993858238995923, "loss": 3.2364792823791504, "step": 2640, "token_acc": 0.27504581632275815 }, { "epoch": 1.5482263265904428, "grad_norm": 0.41881416357796636, "learning_rate": 0.00029938450894392507, "loss": 3.3082218170166016, "step": 2641, "token_acc": 0.2665928531224979 }, { "epoch": 1.5488126649076517, "grad_norm": 0.42763663208929126, "learning_rate": 0.0002993831925849917, "loss": 3.226113796234131, "step": 2642, "token_acc": 0.2754932061667102 }, { "epoch": 1.5493990032248608, "grad_norm": 0.4859469581253531, "learning_rate": 0.00029938187482280446, "loss": 3.2522401809692383, "step": 2643, "token_acc": 0.27337353322900587 }, { "epoch": 1.5499853415420697, "grad_norm": 0.5248984494678559, "learning_rate": 0.0002993805556573759, "loss": 3.245025157928467, "step": 2644, "token_acc": 0.2746379663966521 }, { "epoch": 1.5505716798592788, "grad_norm": 0.5210220205602817, "learning_rate": 0.00029937923508871825, "loss": 3.226012706756592, "step": 2645, "token_acc": 0.27477981698167403 }, { "epoch": 1.551158018176488, "grad_norm": 0.5661392376928134, "learning_rate": 0.00029937791311684394, "loss": 3.2766690254211426, "step": 2646, "token_acc": 0.26932523335245345 }, { "epoch": 1.551744356493697, "grad_norm": 0.4764228029122335, "learning_rate": 0.00029937658974176553, "loss": 3.226942539215088, "step": 2647, "token_acc": 0.2755716987730077 }, { "epoch": 1.552330694810906, "grad_norm": 0.5232041755602224, "learning_rate": 0.0002993752649634952, "loss": 3.2254648208618164, "step": 2648, "token_acc": 0.2772045858875839 }, { "epoch": 1.5529170331281148, "grad_norm": 0.4967070088009408, "learning_rate": 0.0002993739387820457, "loss": 3.2678744792938232, "step": 2649, "token_acc": 0.27159833341439293 }, { "epoch": 1.553503371445324, "grad_norm": 0.5475988180413215, "learning_rate": 0.0002993726111974292, "loss": 3.2653234004974365, "step": 2650, "token_acc": 0.2714494517638791 }, { "epoch": 1.554089709762533, "grad_norm": 0.6757056347179742, "learning_rate": 0.0002993712822096584, "loss": 3.2989320755004883, "step": 2651, "token_acc": 0.2675348856242583 }, { "epoch": 1.5546760480797421, "grad_norm": 0.5549801353491003, "learning_rate": 0.00029936995181874563, "loss": 3.2337820529937744, "step": 2652, "token_acc": 0.27528565271777405 }, { "epoch": 1.555262386396951, "grad_norm": 0.5457228611590723, "learning_rate": 0.00029936862002470345, "loss": 3.2703475952148438, "step": 2653, "token_acc": 0.2722524574066917 }, { "epoch": 1.55584872471416, "grad_norm": 0.5383024036623529, "learning_rate": 0.0002993672868275444, "loss": 3.264014720916748, "step": 2654, "token_acc": 0.2710610812624135 }, { "epoch": 1.556435063031369, "grad_norm": 0.5650581786893103, "learning_rate": 0.0002993659522272809, "loss": 3.2954115867614746, "step": 2655, "token_acc": 0.2676637079668734 }, { "epoch": 1.5570214013485781, "grad_norm": 0.44519816202854007, "learning_rate": 0.0002993646162239256, "loss": 3.2726893424987793, "step": 2656, "token_acc": 0.2704731085852894 }, { "epoch": 1.5576077396657872, "grad_norm": 0.46865346415689174, "learning_rate": 0.00029936327881749093, "loss": 3.277496576309204, "step": 2657, "token_acc": 0.2702793251525586 }, { "epoch": 1.5581940779829961, "grad_norm": 0.4602070319871101, "learning_rate": 0.00029936194000798963, "loss": 3.246164321899414, "step": 2658, "token_acc": 0.2727380072285198 }, { "epoch": 1.5587804163002053, "grad_norm": 0.4626760257442825, "learning_rate": 0.0002993605997954341, "loss": 3.2376041412353516, "step": 2659, "token_acc": 0.2741505412078711 }, { "epoch": 1.5593667546174141, "grad_norm": 0.42804034608065045, "learning_rate": 0.000299359258179837, "loss": 3.24965763092041, "step": 2660, "token_acc": 0.2718377953399089 }, { "epoch": 1.5599530929346233, "grad_norm": 0.41635655726721815, "learning_rate": 0.00029935791516121096, "loss": 3.249459743499756, "step": 2661, "token_acc": 0.27161737626031757 }, { "epoch": 1.5605394312518324, "grad_norm": 0.42683243209117, "learning_rate": 0.0002993565707395686, "loss": 3.229635238647461, "step": 2662, "token_acc": 0.27518101071330847 }, { "epoch": 1.5611257695690415, "grad_norm": 0.44940662108950113, "learning_rate": 0.00029935522491492247, "loss": 3.217538833618164, "step": 2663, "token_acc": 0.2748281791377717 }, { "epoch": 1.5617121078862504, "grad_norm": 0.48855416948332153, "learning_rate": 0.00029935387768728524, "loss": 3.2292075157165527, "step": 2664, "token_acc": 0.2742352341409843 }, { "epoch": 1.5622984462034593, "grad_norm": 0.5076304708868539, "learning_rate": 0.0002993525290566697, "loss": 3.2850582599639893, "step": 2665, "token_acc": 0.2696157428239554 }, { "epoch": 1.5628847845206684, "grad_norm": 0.5492576815214156, "learning_rate": 0.0002993511790230883, "loss": 3.2378196716308594, "step": 2666, "token_acc": 0.2737053469185579 }, { "epoch": 1.5634711228378775, "grad_norm": 0.5677140969281658, "learning_rate": 0.00029934982758655383, "loss": 3.2756845951080322, "step": 2667, "token_acc": 0.2711083272361071 }, { "epoch": 1.5640574611550866, "grad_norm": 0.5576659664714068, "learning_rate": 0.000299348474747079, "loss": 3.328035354614258, "step": 2668, "token_acc": 0.26395129240124104 }, { "epoch": 1.5646437994722955, "grad_norm": 0.4934578169744558, "learning_rate": 0.0002993471205046765, "loss": 3.269674301147461, "step": 2669, "token_acc": 0.27037367648403177 }, { "epoch": 1.5652301377895046, "grad_norm": 0.5148148350339178, "learning_rate": 0.0002993457648593591, "loss": 3.2371411323547363, "step": 2670, "token_acc": 0.2724076585328459 }, { "epoch": 1.5658164761067135, "grad_norm": 0.4760550046760065, "learning_rate": 0.0002993444078111394, "loss": 3.307738780975342, "step": 2671, "token_acc": 0.26641953934475854 }, { "epoch": 1.5664028144239226, "grad_norm": 0.4691676065456747, "learning_rate": 0.00029934304936003026, "loss": 3.2141170501708984, "step": 2672, "token_acc": 0.2774571558808868 }, { "epoch": 1.5669891527411317, "grad_norm": 0.49930497229939125, "learning_rate": 0.0002993416895060444, "loss": 3.2581887245178223, "step": 2673, "token_acc": 0.2731966198103066 }, { "epoch": 1.5675754910583408, "grad_norm": 0.5530823212722192, "learning_rate": 0.0002993403282491947, "loss": 3.2199554443359375, "step": 2674, "token_acc": 0.27857337408074684 }, { "epoch": 1.5681618293755497, "grad_norm": 0.416715109705561, "learning_rate": 0.00029933896558949374, "loss": 3.2289445400238037, "step": 2675, "token_acc": 0.2758312410126748 }, { "epoch": 1.5687481676927586, "grad_norm": 0.42689102131088197, "learning_rate": 0.0002993376015269545, "loss": 3.243030548095703, "step": 2676, "token_acc": 0.27342611275656553 }, { "epoch": 1.5693345060099677, "grad_norm": 0.40203869612757054, "learning_rate": 0.0002993362360615897, "loss": 3.266345500946045, "step": 2677, "token_acc": 0.27061593465686135 }, { "epoch": 1.5699208443271768, "grad_norm": 0.4417616604422087, "learning_rate": 0.0002993348691934122, "loss": 3.2280187606811523, "step": 2678, "token_acc": 0.2741423602023626 }, { "epoch": 1.570507182644386, "grad_norm": 0.45648041885251595, "learning_rate": 0.0002993335009224348, "loss": 3.225739002227783, "step": 2679, "token_acc": 0.2772000304427242 }, { "epoch": 1.5710935209615948, "grad_norm": 0.44526434210683724, "learning_rate": 0.00029933213124867043, "loss": 3.2088475227355957, "step": 2680, "token_acc": 0.2777221585338652 }, { "epoch": 1.5716798592788037, "grad_norm": 0.3985179985085914, "learning_rate": 0.00029933076017213194, "loss": 3.222714424133301, "step": 2681, "token_acc": 0.27779638757291053 }, { "epoch": 1.5722661975960128, "grad_norm": 0.47015768871391006, "learning_rate": 0.00029932938769283214, "loss": 3.2354116439819336, "step": 2682, "token_acc": 0.2743406095173012 }, { "epoch": 1.572852535913222, "grad_norm": 0.4333603783048628, "learning_rate": 0.00029932801381078397, "loss": 3.2490038871765137, "step": 2683, "token_acc": 0.27225055105792967 }, { "epoch": 1.573438874230431, "grad_norm": 0.43622030814722107, "learning_rate": 0.00029932663852600034, "loss": 3.2444024085998535, "step": 2684, "token_acc": 0.2730213596844202 }, { "epoch": 1.57402521254764, "grad_norm": 0.3895775479547454, "learning_rate": 0.00029932526183849416, "loss": 3.258131980895996, "step": 2685, "token_acc": 0.27289741883877516 }, { "epoch": 1.574611550864849, "grad_norm": 0.4321711157940707, "learning_rate": 0.0002993238837482784, "loss": 3.253261089324951, "step": 2686, "token_acc": 0.27271341182903397 }, { "epoch": 1.575197889182058, "grad_norm": 0.40630776391228274, "learning_rate": 0.0002993225042553659, "loss": 3.2729482650756836, "step": 2687, "token_acc": 0.26927156014512976 }, { "epoch": 1.575784227499267, "grad_norm": 0.4560080758374562, "learning_rate": 0.00029932112335976974, "loss": 3.1932506561279297, "step": 2688, "token_acc": 0.2811465122869519 }, { "epoch": 1.5763705658164762, "grad_norm": 0.5089141075761099, "learning_rate": 0.00029931974106150284, "loss": 3.294118881225586, "step": 2689, "token_acc": 0.265711969174377 }, { "epoch": 1.5769569041336853, "grad_norm": 0.487999939243647, "learning_rate": 0.00029931835736057816, "loss": 3.25174617767334, "step": 2690, "token_acc": 0.27106734028266294 }, { "epoch": 1.5775432424508942, "grad_norm": 0.49475367666993286, "learning_rate": 0.0002993169722570087, "loss": 3.263667583465576, "step": 2691, "token_acc": 0.27103474973036273 }, { "epoch": 1.578129580768103, "grad_norm": 0.5130871297944195, "learning_rate": 0.00029931558575080753, "loss": 3.210399866104126, "step": 2692, "token_acc": 0.27724994649798185 }, { "epoch": 1.5787159190853122, "grad_norm": 0.5226661877928755, "learning_rate": 0.00029931419784198765, "loss": 3.2222676277160645, "step": 2693, "token_acc": 0.27536330691895805 }, { "epoch": 1.5793022574025213, "grad_norm": 0.5056452111250206, "learning_rate": 0.0002993128085305621, "loss": 3.2211380004882812, "step": 2694, "token_acc": 0.27632935474613923 }, { "epoch": 1.5798885957197304, "grad_norm": 0.4688092746118987, "learning_rate": 0.00029931141781654386, "loss": 3.260437488555908, "step": 2695, "token_acc": 0.27175967242374927 }, { "epoch": 1.5804749340369393, "grad_norm": 0.4810384518672649, "learning_rate": 0.00029931002569994603, "loss": 3.218383550643921, "step": 2696, "token_acc": 0.27488023398934663 }, { "epoch": 1.5810612723541484, "grad_norm": 0.47643477944060103, "learning_rate": 0.00029930863218078176, "loss": 3.2615809440612793, "step": 2697, "token_acc": 0.27051356914304736 }, { "epoch": 1.5816476106713573, "grad_norm": 0.45321266835930557, "learning_rate": 0.0002993072372590641, "loss": 3.238734245300293, "step": 2698, "token_acc": 0.27531953263768555 }, { "epoch": 1.5822339489885664, "grad_norm": 0.5161826450875109, "learning_rate": 0.0002993058409348061, "loss": 3.2160561084747314, "step": 2699, "token_acc": 0.27693314867124147 }, { "epoch": 1.5828202873057755, "grad_norm": 0.4200191476602458, "learning_rate": 0.0002993044432080209, "loss": 3.2701847553253174, "step": 2700, "token_acc": 0.2706830794647455 }, { "epoch": 1.5834066256229846, "grad_norm": 0.5149505536802924, "learning_rate": 0.0002993030440787217, "loss": 3.25761079788208, "step": 2701, "token_acc": 0.27214159917348735 }, { "epoch": 1.5839929639401935, "grad_norm": 0.5389300366798491, "learning_rate": 0.00029930164354692156, "loss": 3.242964267730713, "step": 2702, "token_acc": 0.27478999582895547 }, { "epoch": 1.5845793022574024, "grad_norm": 0.5419886449745961, "learning_rate": 0.00029930024161263367, "loss": 3.2561569213867188, "step": 2703, "token_acc": 0.2730442207122274 }, { "epoch": 1.5851656405746115, "grad_norm": 0.5264087051899076, "learning_rate": 0.00029929883827587117, "loss": 3.213006019592285, "step": 2704, "token_acc": 0.2771937969630755 }, { "epoch": 1.5857519788918206, "grad_norm": 0.5082829910876778, "learning_rate": 0.0002992974335366473, "loss": 3.240527391433716, "step": 2705, "token_acc": 0.27348725277958763 }, { "epoch": 1.5863383172090297, "grad_norm": 0.4936121740552838, "learning_rate": 0.00029929602739497523, "loss": 3.2195537090301514, "step": 2706, "token_acc": 0.2755467313225083 }, { "epoch": 1.5869246555262386, "grad_norm": 0.5363283151732983, "learning_rate": 0.00029929461985086814, "loss": 3.261514186859131, "step": 2707, "token_acc": 0.2724921294936598 }, { "epoch": 1.5875109938434475, "grad_norm": 0.47631800183404005, "learning_rate": 0.00029929321090433925, "loss": 3.191333293914795, "step": 2708, "token_acc": 0.27947795971107203 }, { "epoch": 1.5880973321606566, "grad_norm": 0.41306246136794844, "learning_rate": 0.0002992918005554019, "loss": 3.22574520111084, "step": 2709, "token_acc": 0.2748240597474446 }, { "epoch": 1.5886836704778657, "grad_norm": 0.45490859389660776, "learning_rate": 0.0002992903888040692, "loss": 3.2315351963043213, "step": 2710, "token_acc": 0.2770321729886836 }, { "epoch": 1.5892700087950749, "grad_norm": 0.46520111116611357, "learning_rate": 0.00029928897565035444, "loss": 3.2533652782440186, "step": 2711, "token_acc": 0.27225410858792914 }, { "epoch": 1.5898563471122837, "grad_norm": 0.5047586063427044, "learning_rate": 0.00029928756109427095, "loss": 3.244349479675293, "step": 2712, "token_acc": 0.27429234946526354 }, { "epoch": 1.5904426854294929, "grad_norm": 0.41400183297954696, "learning_rate": 0.000299286145135832, "loss": 3.1837916374206543, "step": 2713, "token_acc": 0.2806259953448487 }, { "epoch": 1.5910290237467017, "grad_norm": 0.501299474034693, "learning_rate": 0.0002992847277750509, "loss": 3.243840456008911, "step": 2714, "token_acc": 0.27279809027869467 }, { "epoch": 1.5916153620639109, "grad_norm": 0.6278109894191083, "learning_rate": 0.0002992833090119409, "loss": 3.300935745239258, "step": 2715, "token_acc": 0.26612083525452745 }, { "epoch": 1.59220170038112, "grad_norm": 0.5981522934222099, "learning_rate": 0.0002992818888465154, "loss": 3.284961700439453, "step": 2716, "token_acc": 0.26882906429691583 }, { "epoch": 1.592788038698329, "grad_norm": 0.5970889935366824, "learning_rate": 0.00029928046727878773, "loss": 3.2843661308288574, "step": 2717, "token_acc": 0.2701027348020585 }, { "epoch": 1.593374377015538, "grad_norm": 0.5153818190393806, "learning_rate": 0.0002992790443087712, "loss": 3.223395824432373, "step": 2718, "token_acc": 0.27696721000037994 }, { "epoch": 1.5939607153327469, "grad_norm": 0.43980250099170276, "learning_rate": 0.00029927761993647924, "loss": 3.222972869873047, "step": 2719, "token_acc": 0.2748214569116013 }, { "epoch": 1.594547053649956, "grad_norm": 0.5223287700086423, "learning_rate": 0.00029927619416192516, "loss": 3.2458887100219727, "step": 2720, "token_acc": 0.273188400236354 }, { "epoch": 1.595133391967165, "grad_norm": 0.4125632968524281, "learning_rate": 0.00029927476698512237, "loss": 3.230388641357422, "step": 2721, "token_acc": 0.2733371837580833 }, { "epoch": 1.5957197302843742, "grad_norm": 0.4879504470758975, "learning_rate": 0.00029927333840608437, "loss": 3.23653244972229, "step": 2722, "token_acc": 0.2762215148940487 }, { "epoch": 1.596306068601583, "grad_norm": 0.5367539081977291, "learning_rate": 0.00029927190842482445, "loss": 3.249480724334717, "step": 2723, "token_acc": 0.27174708835914824 }, { "epoch": 1.5968924069187922, "grad_norm": 0.499365320177642, "learning_rate": 0.0002992704770413561, "loss": 3.2621824741363525, "step": 2724, "token_acc": 0.270077554037611 }, { "epoch": 1.597478745236001, "grad_norm": 0.4439137441648015, "learning_rate": 0.00029926904425569276, "loss": 3.209418535232544, "step": 2725, "token_acc": 0.2776703536729811 }, { "epoch": 1.5980650835532102, "grad_norm": 0.45933860630408435, "learning_rate": 0.0002992676100678479, "loss": 3.2532382011413574, "step": 2726, "token_acc": 0.27090482116543174 }, { "epoch": 1.5986514218704193, "grad_norm": 0.5137245456825731, "learning_rate": 0.0002992661744778349, "loss": 3.261685371398926, "step": 2727, "token_acc": 0.27290040684158084 }, { "epoch": 1.5992377601876284, "grad_norm": 0.4879246372246115, "learning_rate": 0.00029926473748566746, "loss": 3.224544048309326, "step": 2728, "token_acc": 0.2759846919768348 }, { "epoch": 1.5998240985048373, "grad_norm": 0.5040858080934653, "learning_rate": 0.0002992632990913589, "loss": 3.3423006534576416, "step": 2729, "token_acc": 0.26068816977565573 }, { "epoch": 1.6004104368220462, "grad_norm": 0.4117497050223786, "learning_rate": 0.0002992618592949228, "loss": 3.1993706226348877, "step": 2730, "token_acc": 0.2792337058871694 }, { "epoch": 1.6009967751392553, "grad_norm": 0.5381450468096101, "learning_rate": 0.00029926041809637266, "loss": 3.237950325012207, "step": 2731, "token_acc": 0.2730349412367786 }, { "epoch": 1.6015831134564644, "grad_norm": 0.536469442746549, "learning_rate": 0.00029925897549572196, "loss": 3.270899772644043, "step": 2732, "token_acc": 0.2713140905773085 }, { "epoch": 1.6021694517736735, "grad_norm": 0.41947074181675853, "learning_rate": 0.0002992575314929844, "loss": 3.2282652854919434, "step": 2733, "token_acc": 0.2757453991014926 }, { "epoch": 1.6027557900908824, "grad_norm": 0.4478211929321843, "learning_rate": 0.0002992560860881734, "loss": 3.2585651874542236, "step": 2734, "token_acc": 0.2718842639428697 }, { "epoch": 1.6033421284080913, "grad_norm": 0.4323518882342909, "learning_rate": 0.00029925463928130264, "loss": 3.2802717685699463, "step": 2735, "token_acc": 0.266565710522085 }, { "epoch": 1.6039284667253004, "grad_norm": 0.4342113935040127, "learning_rate": 0.00029925319107238565, "loss": 3.2652697563171387, "step": 2736, "token_acc": 0.2708841463414634 }, { "epoch": 1.6045148050425095, "grad_norm": 0.4983534563241922, "learning_rate": 0.00029925174146143603, "loss": 3.2986245155334473, "step": 2737, "token_acc": 0.26620232410019773 }, { "epoch": 1.6051011433597187, "grad_norm": 0.4018549426682999, "learning_rate": 0.0002992502904484674, "loss": 3.2220096588134766, "step": 2738, "token_acc": 0.2759158841423092 }, { "epoch": 1.6056874816769275, "grad_norm": 0.46023540523421264, "learning_rate": 0.00029924883803349346, "loss": 3.2141122817993164, "step": 2739, "token_acc": 0.2767691241772431 }, { "epoch": 1.6062738199941367, "grad_norm": 0.44112152099692786, "learning_rate": 0.0002992473842165278, "loss": 3.2150025367736816, "step": 2740, "token_acc": 0.2763198864767466 }, { "epoch": 1.6068601583113455, "grad_norm": 0.48571007093696955, "learning_rate": 0.00029924592899758406, "loss": 3.2276647090911865, "step": 2741, "token_acc": 0.2774720826533366 }, { "epoch": 1.6074464966285547, "grad_norm": 0.4882611553540648, "learning_rate": 0.0002992444723766759, "loss": 3.209761142730713, "step": 2742, "token_acc": 0.27640525304761554 }, { "epoch": 1.6080328349457638, "grad_norm": 0.5037432511400337, "learning_rate": 0.00029924301435381705, "loss": 3.2221689224243164, "step": 2743, "token_acc": 0.27641195846345956 }, { "epoch": 1.6086191732629729, "grad_norm": 0.4108345978356018, "learning_rate": 0.00029924155492902125, "loss": 3.169506311416626, "step": 2744, "token_acc": 0.2839673159275413 }, { "epoch": 1.6092055115801818, "grad_norm": 0.48161915532035654, "learning_rate": 0.00029924009410230206, "loss": 3.207735061645508, "step": 2745, "token_acc": 0.2782410996520518 }, { "epoch": 1.6097918498973907, "grad_norm": 0.5395821224512068, "learning_rate": 0.00029923863187367335, "loss": 3.244959831237793, "step": 2746, "token_acc": 0.27297992521679176 }, { "epoch": 1.6103781882145998, "grad_norm": 0.4860412555961269, "learning_rate": 0.00029923716824314874, "loss": 3.272493362426758, "step": 2747, "token_acc": 0.26890261920037056 }, { "epoch": 1.6109645265318089, "grad_norm": 0.4486647506364991, "learning_rate": 0.00029923570321074204, "loss": 3.2505991458892822, "step": 2748, "token_acc": 0.2723867367127596 }, { "epoch": 1.611550864849018, "grad_norm": 0.5571826196354732, "learning_rate": 0.00029923423677646703, "loss": 3.2746028900146484, "step": 2749, "token_acc": 0.268941748565943 }, { "epoch": 1.6121372031662269, "grad_norm": 0.5401522424899917, "learning_rate": 0.00029923276894033753, "loss": 3.2452683448791504, "step": 2750, "token_acc": 0.2735822378576673 }, { "epoch": 1.612723541483436, "grad_norm": 0.44585486045488004, "learning_rate": 0.0002992312997023672, "loss": 3.2004213333129883, "step": 2751, "token_acc": 0.27908434080450767 }, { "epoch": 1.6133098798006449, "grad_norm": 0.5152388818495092, "learning_rate": 0.0002992298290625698, "loss": 3.2106189727783203, "step": 2752, "token_acc": 0.278890324148552 }, { "epoch": 1.613896218117854, "grad_norm": 0.5149415375917549, "learning_rate": 0.00029922835702095936, "loss": 3.225625991821289, "step": 2753, "token_acc": 0.2752305933239503 }, { "epoch": 1.614482556435063, "grad_norm": 0.522854781758891, "learning_rate": 0.00029922688357754965, "loss": 3.2174530029296875, "step": 2754, "token_acc": 0.2757915401042565 }, { "epoch": 1.6150688947522722, "grad_norm": 0.5171855737146633, "learning_rate": 0.0002992254087323543, "loss": 3.257126808166504, "step": 2755, "token_acc": 0.2725901898991652 }, { "epoch": 1.6156552330694811, "grad_norm": 0.5110765602229769, "learning_rate": 0.00029922393248538745, "loss": 3.2495858669281006, "step": 2756, "token_acc": 0.27403209522021704 }, { "epoch": 1.61624157138669, "grad_norm": 0.45370606672750524, "learning_rate": 0.0002992224548366627, "loss": 3.2649779319763184, "step": 2757, "token_acc": 0.2720630886214788 }, { "epoch": 1.6168279097038991, "grad_norm": 0.45805663282030973, "learning_rate": 0.0002992209757861942, "loss": 3.239689350128174, "step": 2758, "token_acc": 0.27263965648735333 }, { "epoch": 1.6174142480211082, "grad_norm": 0.44054215322263024, "learning_rate": 0.0002992194953339957, "loss": 3.2765979766845703, "step": 2759, "token_acc": 0.2669946503387101 }, { "epoch": 1.6180005863383173, "grad_norm": 0.4916839250635167, "learning_rate": 0.000299218013480081, "loss": 3.258183479309082, "step": 2760, "token_acc": 0.2724705329601863 }, { "epoch": 1.6185869246555262, "grad_norm": 0.5048261077039959, "learning_rate": 0.00029921653022446426, "loss": 3.2572457790374756, "step": 2761, "token_acc": 0.27138767349844034 }, { "epoch": 1.6191732629727351, "grad_norm": 0.49811486906674163, "learning_rate": 0.00029921504556715923, "loss": 3.169217109680176, "step": 2762, "token_acc": 0.28300437315892607 }, { "epoch": 1.6197596012899442, "grad_norm": 0.4595125569053117, "learning_rate": 0.0002992135595081799, "loss": 3.201615810394287, "step": 2763, "token_acc": 0.2788792931707706 }, { "epoch": 1.6203459396071533, "grad_norm": 0.48850151544162984, "learning_rate": 0.00029921207204754033, "loss": 3.2450265884399414, "step": 2764, "token_acc": 0.2718133545391581 }, { "epoch": 1.6209322779243625, "grad_norm": 0.4523349350879401, "learning_rate": 0.0002992105831852543, "loss": 3.185438632965088, "step": 2765, "token_acc": 0.2796368855363484 }, { "epoch": 1.6215186162415713, "grad_norm": 0.4299534610222147, "learning_rate": 0.000299209092921336, "loss": 3.2155470848083496, "step": 2766, "token_acc": 0.27669452181987003 }, { "epoch": 1.6221049545587805, "grad_norm": 0.47199304565738287, "learning_rate": 0.0002992076012557993, "loss": 3.2312304973602295, "step": 2767, "token_acc": 0.27646653047146535 }, { "epoch": 1.6226912928759893, "grad_norm": 0.48169586292190014, "learning_rate": 0.0002992061081886582, "loss": 3.2011427879333496, "step": 2768, "token_acc": 0.28045878121603496 }, { "epoch": 1.6232776311931985, "grad_norm": 0.450042128295733, "learning_rate": 0.00029920461371992684, "loss": 3.2608869075775146, "step": 2769, "token_acc": 0.27108049953050684 }, { "epoch": 1.6238639695104076, "grad_norm": 0.40807286227511486, "learning_rate": 0.00029920311784961917, "loss": 3.227682590484619, "step": 2770, "token_acc": 0.2750270653625147 }, { "epoch": 1.6244503078276167, "grad_norm": 0.4278745158532367, "learning_rate": 0.0002992016205777492, "loss": 3.2256436347961426, "step": 2771, "token_acc": 0.2752127717243996 }, { "epoch": 1.6250366461448256, "grad_norm": 0.4527504295828738, "learning_rate": 0.00029920012190433115, "loss": 3.231621265411377, "step": 2772, "token_acc": 0.2775668531097732 }, { "epoch": 1.6256229844620345, "grad_norm": 0.47534107910061496, "learning_rate": 0.000299198621829379, "loss": 3.2577579021453857, "step": 2773, "token_acc": 0.2708343340825258 }, { "epoch": 1.6262093227792436, "grad_norm": 0.4663353596532772, "learning_rate": 0.00029919712035290675, "loss": 3.305983304977417, "step": 2774, "token_acc": 0.26527691200509385 }, { "epoch": 1.6267956610964527, "grad_norm": 0.38004499709417205, "learning_rate": 0.0002991956174749287, "loss": 3.2501847743988037, "step": 2775, "token_acc": 0.27291044601275066 }, { "epoch": 1.6273819994136618, "grad_norm": 0.39649814703538755, "learning_rate": 0.0002991941131954588, "loss": 3.2686281204223633, "step": 2776, "token_acc": 0.2713234548249413 }, { "epoch": 1.6279683377308707, "grad_norm": 0.4395841431592008, "learning_rate": 0.00029919260751451124, "loss": 3.2062840461730957, "step": 2777, "token_acc": 0.2773292518622142 }, { "epoch": 1.6285546760480798, "grad_norm": 0.542379940537786, "learning_rate": 0.0002991911004321002, "loss": 3.2093567848205566, "step": 2778, "token_acc": 0.27836100292176286 }, { "epoch": 1.6291410143652887, "grad_norm": 0.6236099249715498, "learning_rate": 0.0002991895919482398, "loss": 3.229440450668335, "step": 2779, "token_acc": 0.27503906433726505 }, { "epoch": 1.6297273526824978, "grad_norm": 0.6686920349885959, "learning_rate": 0.0002991880820629443, "loss": 3.2797927856445312, "step": 2780, "token_acc": 0.269106760687393 }, { "epoch": 1.630313690999707, "grad_norm": 0.5891680262330786, "learning_rate": 0.0002991865707762277, "loss": 3.2230935096740723, "step": 2781, "token_acc": 0.2747641177659158 }, { "epoch": 1.630900029316916, "grad_norm": 0.38169271362579993, "learning_rate": 0.00029918505808810436, "loss": 3.233772039413452, "step": 2782, "token_acc": 0.27561514416166616 }, { "epoch": 1.631486367634125, "grad_norm": 0.5040408203568694, "learning_rate": 0.0002991835439985884, "loss": 3.1938376426696777, "step": 2783, "token_acc": 0.28025278125205716 }, { "epoch": 1.6320727059513338, "grad_norm": 0.50511812583065, "learning_rate": 0.0002991820285076941, "loss": 3.227950096130371, "step": 2784, "token_acc": 0.27614113745402313 }, { "epoch": 1.632659044268543, "grad_norm": 0.3981822428392917, "learning_rate": 0.00029918051161543564, "loss": 3.2353432178497314, "step": 2785, "token_acc": 0.27576602393031296 }, { "epoch": 1.633245382585752, "grad_norm": 0.40274345346499313, "learning_rate": 0.00029917899332182723, "loss": 3.197986602783203, "step": 2786, "token_acc": 0.2774161572773831 }, { "epoch": 1.6338317209029611, "grad_norm": 0.5216991829369977, "learning_rate": 0.0002991774736268833, "loss": 3.2200732231140137, "step": 2787, "token_acc": 0.2773095291831721 }, { "epoch": 1.63441805922017, "grad_norm": 0.5249615058286641, "learning_rate": 0.000299175952530618, "loss": 3.2722325325012207, "step": 2788, "token_acc": 0.26963473444804786 }, { "epoch": 1.635004397537379, "grad_norm": 0.44309677495433253, "learning_rate": 0.0002991744300330456, "loss": 3.272714138031006, "step": 2789, "token_acc": 0.2709898232737305 }, { "epoch": 1.635590735854588, "grad_norm": 0.44954046283565063, "learning_rate": 0.0002991729061341805, "loss": 3.2553958892822266, "step": 2790, "token_acc": 0.27128811550479476 }, { "epoch": 1.6361770741717971, "grad_norm": 0.4612430435001581, "learning_rate": 0.0002991713808340369, "loss": 3.2683799266815186, "step": 2791, "token_acc": 0.2695307918652867 }, { "epoch": 1.6367634124890063, "grad_norm": 0.5374043302255161, "learning_rate": 0.00029916985413262927, "loss": 3.208031177520752, "step": 2792, "token_acc": 0.27587530652376585 }, { "epoch": 1.6373497508062151, "grad_norm": 0.4996969858384595, "learning_rate": 0.0002991683260299718, "loss": 3.3217482566833496, "step": 2793, "token_acc": 0.2642883546075864 }, { "epoch": 1.6379360891234243, "grad_norm": 0.479591947993097, "learning_rate": 0.00029916679652607894, "loss": 3.261453628540039, "step": 2794, "token_acc": 0.27161909561434405 }, { "epoch": 1.6385224274406331, "grad_norm": 0.48740664193297306, "learning_rate": 0.00029916526562096506, "loss": 3.21614670753479, "step": 2795, "token_acc": 0.2767442921271737 }, { "epoch": 1.6391087657578423, "grad_norm": 0.5434620208433892, "learning_rate": 0.0002991637333146445, "loss": 3.2481441497802734, "step": 2796, "token_acc": 0.274214273877408 }, { "epoch": 1.6396951040750514, "grad_norm": 0.5048102115116913, "learning_rate": 0.0002991621996071316, "loss": 3.2879605293273926, "step": 2797, "token_acc": 0.26718519094656606 }, { "epoch": 1.6402814423922605, "grad_norm": 0.46198781192539773, "learning_rate": 0.00029916066449844095, "loss": 3.2883810997009277, "step": 2798, "token_acc": 0.26736141055636076 }, { "epoch": 1.6408677807094694, "grad_norm": 0.5059493436244304, "learning_rate": 0.00029915912798858676, "loss": 3.278900146484375, "step": 2799, "token_acc": 0.2674969855380721 }, { "epoch": 1.6414541190266783, "grad_norm": 0.48827666203430803, "learning_rate": 0.00029915759007758357, "loss": 3.2965686321258545, "step": 2800, "token_acc": 0.2656863049577109 }, { "epoch": 1.6420404573438874, "grad_norm": 0.4605077587581276, "learning_rate": 0.0002991560507654458, "loss": 3.2295122146606445, "step": 2801, "token_acc": 0.27451548025353484 }, { "epoch": 1.6426267956610965, "grad_norm": 0.47374081971298215, "learning_rate": 0.000299154510052188, "loss": 3.173492670059204, "step": 2802, "token_acc": 0.28155719855655564 }, { "epoch": 1.6432131339783056, "grad_norm": 0.42688371855016066, "learning_rate": 0.0002991529679378245, "loss": 3.226634979248047, "step": 2803, "token_acc": 0.2757227573063262 }, { "epoch": 1.6437994722955145, "grad_norm": 0.4361767904572582, "learning_rate": 0.00029915142442236986, "loss": 3.251396417617798, "step": 2804, "token_acc": 0.2725779039874364 }, { "epoch": 1.6443858106127234, "grad_norm": 0.475816140310604, "learning_rate": 0.0002991498795058386, "loss": 3.2423527240753174, "step": 2805, "token_acc": 0.273065127004381 }, { "epoch": 1.6449721489299325, "grad_norm": 0.40756243927030267, "learning_rate": 0.00029914833318824517, "loss": 3.249202251434326, "step": 2806, "token_acc": 0.27335952089638743 }, { "epoch": 1.6455584872471416, "grad_norm": 0.46369295312067865, "learning_rate": 0.00029914678546960415, "loss": 3.229423999786377, "step": 2807, "token_acc": 0.27506253062066477 }, { "epoch": 1.6461448255643507, "grad_norm": 0.5065792103297269, "learning_rate": 0.0002991452363499301, "loss": 3.241318702697754, "step": 2808, "token_acc": 0.27285611557821793 }, { "epoch": 1.6467311638815598, "grad_norm": 0.4592479492270207, "learning_rate": 0.00029914368582923746, "loss": 3.2161073684692383, "step": 2809, "token_acc": 0.2769729802727943 }, { "epoch": 1.6473175021987687, "grad_norm": 0.46478065073603425, "learning_rate": 0.0002991421339075409, "loss": 3.233269691467285, "step": 2810, "token_acc": 0.2751621844481755 }, { "epoch": 1.6479038405159776, "grad_norm": 0.43303168277559084, "learning_rate": 0.0002991405805848549, "loss": 3.1588401794433594, "step": 2811, "token_acc": 0.28553875870209655 }, { "epoch": 1.6484901788331867, "grad_norm": 0.46259567860613987, "learning_rate": 0.0002991390258611942, "loss": 3.1970856189727783, "step": 2812, "token_acc": 0.28027978035741086 }, { "epoch": 1.6490765171503958, "grad_norm": 0.5284415102704099, "learning_rate": 0.0002991374697365733, "loss": 3.2642972469329834, "step": 2813, "token_acc": 0.27382538104261084 }, { "epoch": 1.649662855467605, "grad_norm": 0.459088700642807, "learning_rate": 0.00029913591221100683, "loss": 3.212010145187378, "step": 2814, "token_acc": 0.2750689247735329 }, { "epoch": 1.6502491937848138, "grad_norm": 0.4932365296610275, "learning_rate": 0.0002991343532845094, "loss": 3.2581164836883545, "step": 2815, "token_acc": 0.27183697772624354 }, { "epoch": 1.6508355321020227, "grad_norm": 0.4632231285174976, "learning_rate": 0.0002991327929570957, "loss": 3.225412368774414, "step": 2816, "token_acc": 0.275295327450117 }, { "epoch": 1.6514218704192318, "grad_norm": 0.4481535710117751, "learning_rate": 0.0002991312312287804, "loss": 3.177954912185669, "step": 2817, "token_acc": 0.2818033981080582 }, { "epoch": 1.652008208736441, "grad_norm": 0.3972418048032003, "learning_rate": 0.0002991296680995781, "loss": 3.233959436416626, "step": 2818, "token_acc": 0.27440802215881877 }, { "epoch": 1.65259454705365, "grad_norm": 0.4602011784551465, "learning_rate": 0.0002991281035695035, "loss": 3.171598434448242, "step": 2819, "token_acc": 0.28290302186081656 }, { "epoch": 1.653180885370859, "grad_norm": 0.5057108721084328, "learning_rate": 0.0002991265376385714, "loss": 3.280641555786133, "step": 2820, "token_acc": 0.2681088861606198 }, { "epoch": 1.653767223688068, "grad_norm": 0.4435909377590639, "learning_rate": 0.0002991249703067964, "loss": 3.2387359142303467, "step": 2821, "token_acc": 0.2715591222014698 }, { "epoch": 1.654353562005277, "grad_norm": 0.4202352843563113, "learning_rate": 0.0002991234015741932, "loss": 3.1910641193389893, "step": 2822, "token_acc": 0.28135926746824796 }, { "epoch": 1.654939900322486, "grad_norm": 0.4206557046682502, "learning_rate": 0.00029912183144077664, "loss": 3.204280376434326, "step": 2823, "token_acc": 0.2804414409022423 }, { "epoch": 1.6555262386396952, "grad_norm": 0.4542571943296922, "learning_rate": 0.0002991202599065614, "loss": 3.228511095046997, "step": 2824, "token_acc": 0.27397860258541423 }, { "epoch": 1.6561125769569043, "grad_norm": 0.4749885204709126, "learning_rate": 0.0002991186869715623, "loss": 3.245894432067871, "step": 2825, "token_acc": 0.2718803233989589 }, { "epoch": 1.6566989152741132, "grad_norm": 0.35464321955210626, "learning_rate": 0.00029911711263579403, "loss": 3.2186279296875, "step": 2826, "token_acc": 0.2747720522431616 }, { "epoch": 1.657285253591322, "grad_norm": 0.42248244633721693, "learning_rate": 0.00029911553689927143, "loss": 3.230262041091919, "step": 2827, "token_acc": 0.27456979577936935 }, { "epoch": 1.6578715919085312, "grad_norm": 0.4667035752094275, "learning_rate": 0.0002991139597620093, "loss": 3.218432903289795, "step": 2828, "token_acc": 0.27650287918788025 }, { "epoch": 1.6584579302257403, "grad_norm": 0.47507935857195155, "learning_rate": 0.00029911238122402243, "loss": 3.2209808826446533, "step": 2829, "token_acc": 0.27657500185950634 }, { "epoch": 1.6590442685429494, "grad_norm": 0.47861342126262924, "learning_rate": 0.0002991108012853257, "loss": 3.2544357776641846, "step": 2830, "token_acc": 0.2702499202838164 }, { "epoch": 1.6596306068601583, "grad_norm": 0.39978005899052843, "learning_rate": 0.0002991092199459339, "loss": 3.1622705459594727, "step": 2831, "token_acc": 0.28341896682200185 }, { "epoch": 1.6602169451773672, "grad_norm": 0.4428136135693279, "learning_rate": 0.0002991076372058619, "loss": 3.2114734649658203, "step": 2832, "token_acc": 0.2784724793483396 }, { "epoch": 1.6608032834945763, "grad_norm": 0.44145785388244313, "learning_rate": 0.0002991060530651246, "loss": 3.2212629318237305, "step": 2833, "token_acc": 0.27632747845331423 }, { "epoch": 1.6613896218117854, "grad_norm": 0.44206036860571757, "learning_rate": 0.00029910446752373686, "loss": 3.214123010635376, "step": 2834, "token_acc": 0.2752622766839999 }, { "epoch": 1.6619759601289945, "grad_norm": 0.4133133760550389, "learning_rate": 0.00029910288058171354, "loss": 3.2422633171081543, "step": 2835, "token_acc": 0.27304393150784484 }, { "epoch": 1.6625622984462036, "grad_norm": 0.48891310044777486, "learning_rate": 0.0002991012922390696, "loss": 3.2133827209472656, "step": 2836, "token_acc": 0.27682369127956497 }, { "epoch": 1.6631486367634125, "grad_norm": 0.5363772781414919, "learning_rate": 0.0002990997024958199, "loss": 3.214707374572754, "step": 2837, "token_acc": 0.27630177905702463 }, { "epoch": 1.6637349750806214, "grad_norm": 0.4364771881159486, "learning_rate": 0.0002990981113519795, "loss": 3.2803666591644287, "step": 2838, "token_acc": 0.2696617585794232 }, { "epoch": 1.6643213133978305, "grad_norm": 0.4486979724519655, "learning_rate": 0.00029909651880756315, "loss": 3.2419018745422363, "step": 2839, "token_acc": 0.27528582101987176 }, { "epoch": 1.6649076517150396, "grad_norm": 0.5822000068867318, "learning_rate": 0.00029909492486258595, "loss": 3.227296829223633, "step": 2840, "token_acc": 0.274400288273448 }, { "epoch": 1.6654939900322487, "grad_norm": 0.5261569567025727, "learning_rate": 0.00029909332951706284, "loss": 3.1907236576080322, "step": 2841, "token_acc": 0.27935683677230194 }, { "epoch": 1.6660803283494576, "grad_norm": 0.49724005142374633, "learning_rate": 0.00029909173277100883, "loss": 3.233206272125244, "step": 2842, "token_acc": 0.2747210110117508 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5274464042339126, "learning_rate": 0.0002990901346244389, "loss": 3.245659112930298, "step": 2843, "token_acc": 0.27321410845180066 }, { "epoch": 1.6672530049838756, "grad_norm": 0.5558296269198623, "learning_rate": 0.000299088535077368, "loss": 3.2522130012512207, "step": 2844, "token_acc": 0.2724277417222365 }, { "epoch": 1.6678393433010847, "grad_norm": 0.5166506452069556, "learning_rate": 0.00029908693412981127, "loss": 3.2887752056121826, "step": 2845, "token_acc": 0.2664081697862963 }, { "epoch": 1.6684256816182939, "grad_norm": 0.3833862431555096, "learning_rate": 0.0002990853317817837, "loss": 3.2319154739379883, "step": 2846, "token_acc": 0.27538357016009085 }, { "epoch": 1.6690120199355027, "grad_norm": 0.497694338400513, "learning_rate": 0.00029908372803330027, "loss": 3.257106065750122, "step": 2847, "token_acc": 0.2720572313071932 }, { "epoch": 1.6695983582527119, "grad_norm": 0.504161388079991, "learning_rate": 0.0002990821228843761, "loss": 3.2067041397094727, "step": 2848, "token_acc": 0.2784462372165262 }, { "epoch": 1.6701846965699207, "grad_norm": 0.46879493539524586, "learning_rate": 0.00029908051633502635, "loss": 3.2096762657165527, "step": 2849, "token_acc": 0.27803603455028963 }, { "epoch": 1.6707710348871299, "grad_norm": 0.42829641333100577, "learning_rate": 0.000299078908385266, "loss": 3.200063467025757, "step": 2850, "token_acc": 0.27725802874722144 }, { "epoch": 1.671357373204339, "grad_norm": 0.45899376762916194, "learning_rate": 0.0002990772990351102, "loss": 3.238583564758301, "step": 2851, "token_acc": 0.273020788974488 }, { "epoch": 1.671943711521548, "grad_norm": 0.38646537877138676, "learning_rate": 0.000299075688284574, "loss": 3.297233819961548, "step": 2852, "token_acc": 0.2647120688657864 }, { "epoch": 1.672530049838757, "grad_norm": 0.3923370194459343, "learning_rate": 0.0002990740761336727, "loss": 3.247020959854126, "step": 2853, "token_acc": 0.27318545774921615 }, { "epoch": 1.6731163881559659, "grad_norm": 0.4644511971706538, "learning_rate": 0.00029907246258242126, "loss": 3.2347538471221924, "step": 2854, "token_acc": 0.27459427590272967 }, { "epoch": 1.673702726473175, "grad_norm": 0.39475447247516626, "learning_rate": 0.00029907084763083495, "loss": 3.208374500274658, "step": 2855, "token_acc": 0.278306472843667 }, { "epoch": 1.674289064790384, "grad_norm": 0.4430346427796089, "learning_rate": 0.00029906923127892885, "loss": 3.2527387142181396, "step": 2856, "token_acc": 0.27363797881404905 }, { "epoch": 1.6748754031075932, "grad_norm": 0.4868100026820017, "learning_rate": 0.00029906761352671823, "loss": 3.2725353240966797, "step": 2857, "token_acc": 0.26958010228243817 }, { "epoch": 1.675461741424802, "grad_norm": 0.43962473874674607, "learning_rate": 0.0002990659943742182, "loss": 3.2349705696105957, "step": 2858, "token_acc": 0.2738408823776341 }, { "epoch": 1.676048079742011, "grad_norm": 0.4669380202583218, "learning_rate": 0.0002990643738214441, "loss": 3.220262289047241, "step": 2859, "token_acc": 0.27696553781888705 }, { "epoch": 1.67663441805922, "grad_norm": 0.5290029478657404, "learning_rate": 0.00029906275186841107, "loss": 3.2136669158935547, "step": 2860, "token_acc": 0.2748721816010062 }, { "epoch": 1.6772207563764292, "grad_norm": 0.5053608750044666, "learning_rate": 0.00029906112851513434, "loss": 3.2090916633605957, "step": 2861, "token_acc": 0.2775112673444542 }, { "epoch": 1.6778070946936383, "grad_norm": 0.4995945617978085, "learning_rate": 0.00029905950376162916, "loss": 3.249103307723999, "step": 2862, "token_acc": 0.2719103179585188 }, { "epoch": 1.6783934330108474, "grad_norm": 0.4899050876239883, "learning_rate": 0.00029905787760791075, "loss": 3.2164385318756104, "step": 2863, "token_acc": 0.27659480793933616 }, { "epoch": 1.6789797713280563, "grad_norm": 0.4720769696939573, "learning_rate": 0.0002990562500539945, "loss": 3.2586801052093506, "step": 2864, "token_acc": 0.2702825130584553 }, { "epoch": 1.6795661096452652, "grad_norm": 0.46991483081344027, "learning_rate": 0.0002990546210998956, "loss": 3.2369980812072754, "step": 2865, "token_acc": 0.2757360598814078 }, { "epoch": 1.6801524479624743, "grad_norm": 0.47195767603199723, "learning_rate": 0.0002990529907456294, "loss": 3.2156364917755127, "step": 2866, "token_acc": 0.276450387257051 }, { "epoch": 1.6807387862796834, "grad_norm": 0.4275220616730097, "learning_rate": 0.00029905135899121126, "loss": 3.2788164615631104, "step": 2867, "token_acc": 0.26949163071986343 }, { "epoch": 1.6813251245968925, "grad_norm": 0.44233258282252413, "learning_rate": 0.00029904972583665637, "loss": 3.2445950508117676, "step": 2868, "token_acc": 0.27418893053883964 }, { "epoch": 1.6819114629141014, "grad_norm": 0.447608962005346, "learning_rate": 0.00029904809128198024, "loss": 3.188769817352295, "step": 2869, "token_acc": 0.2783678439657999 }, { "epoch": 1.6824978012313103, "grad_norm": 0.47553117662997496, "learning_rate": 0.00029904645532719806, "loss": 3.2812047004699707, "step": 2870, "token_acc": 0.2672803478397117 }, { "epoch": 1.6830841395485194, "grad_norm": 0.5071577131050147, "learning_rate": 0.00029904481797232534, "loss": 3.223794460296631, "step": 2871, "token_acc": 0.2739874377301278 }, { "epoch": 1.6836704778657285, "grad_norm": 0.4543249449654351, "learning_rate": 0.0002990431792173773, "loss": 3.2305033206939697, "step": 2872, "token_acc": 0.2742884819896181 }, { "epoch": 1.6842568161829377, "grad_norm": 0.40414519758355155, "learning_rate": 0.0002990415390623695, "loss": 3.212996482849121, "step": 2873, "token_acc": 0.27560592413666046 }, { "epoch": 1.6848431545001465, "grad_norm": 0.4417170995846462, "learning_rate": 0.0002990398975073173, "loss": 3.2344865798950195, "step": 2874, "token_acc": 0.2741679172507783 }, { "epoch": 1.6854294928173557, "grad_norm": 0.4642149503113735, "learning_rate": 0.000299038254552236, "loss": 3.227048873901367, "step": 2875, "token_acc": 0.2755617465566173 }, { "epoch": 1.6860158311345645, "grad_norm": 0.5137941631016929, "learning_rate": 0.0002990366101971412, "loss": 3.248990297317505, "step": 2876, "token_acc": 0.2727422981166032 }, { "epoch": 1.6866021694517737, "grad_norm": 0.4132054370104702, "learning_rate": 0.0002990349644420483, "loss": 3.285066604614258, "step": 2877, "token_acc": 0.2674370614940157 }, { "epoch": 1.6871885077689828, "grad_norm": 0.5001672302139621, "learning_rate": 0.0002990333172869727, "loss": 3.2116456031799316, "step": 2878, "token_acc": 0.27631278244266366 }, { "epoch": 1.6877748460861919, "grad_norm": 0.5272204765261164, "learning_rate": 0.0002990316687319299, "loss": 3.1955928802490234, "step": 2879, "token_acc": 0.2793326700772171 }, { "epoch": 1.6883611844034008, "grad_norm": 0.4621699302757071, "learning_rate": 0.0002990300187769354, "loss": 3.2302982807159424, "step": 2880, "token_acc": 0.27474503596545713 }, { "epoch": 1.6889475227206097, "grad_norm": 0.4440285107378725, "learning_rate": 0.00029902836742200467, "loss": 3.2027597427368164, "step": 2881, "token_acc": 0.28094687522222017 }, { "epoch": 1.6895338610378188, "grad_norm": 0.4905970091795241, "learning_rate": 0.0002990267146671533, "loss": 3.2533726692199707, "step": 2882, "token_acc": 0.27134760606425345 }, { "epoch": 1.6901201993550279, "grad_norm": 0.43316419970321957, "learning_rate": 0.00029902506051239676, "loss": 3.2209091186523438, "step": 2883, "token_acc": 0.2758372362648307 }, { "epoch": 1.690706537672237, "grad_norm": 0.4193898788072306, "learning_rate": 0.0002990234049577506, "loss": 3.2143421173095703, "step": 2884, "token_acc": 0.2753898372557133 }, { "epoch": 1.6912928759894459, "grad_norm": 0.44630613298124766, "learning_rate": 0.00029902174800323033, "loss": 3.251372814178467, "step": 2885, "token_acc": 0.2712665947927897 }, { "epoch": 1.6918792143066548, "grad_norm": 0.46453857681112154, "learning_rate": 0.0002990200896488515, "loss": 3.2222745418548584, "step": 2886, "token_acc": 0.27526902306139867 }, { "epoch": 1.692465552623864, "grad_norm": 0.45375993751477894, "learning_rate": 0.0002990184298946298, "loss": 3.201394557952881, "step": 2887, "token_acc": 0.27790923924182376 }, { "epoch": 1.693051890941073, "grad_norm": 0.4553226339871933, "learning_rate": 0.0002990167687405807, "loss": 3.18430757522583, "step": 2888, "token_acc": 0.28163958169486775 }, { "epoch": 1.6936382292582821, "grad_norm": 0.4548508088697926, "learning_rate": 0.0002990151061867199, "loss": 3.154754638671875, "step": 2889, "token_acc": 0.28382355272125914 }, { "epoch": 1.6942245675754912, "grad_norm": 0.41502553854796614, "learning_rate": 0.000299013442233063, "loss": 3.2125260829925537, "step": 2890, "token_acc": 0.27621796398688403 }, { "epoch": 1.6948109058927001, "grad_norm": 0.4230302723983323, "learning_rate": 0.0002990117768796256, "loss": 3.2401626110076904, "step": 2891, "token_acc": 0.2733638656577725 }, { "epoch": 1.695397244209909, "grad_norm": 0.3779837201386293, "learning_rate": 0.00029901011012642333, "loss": 3.2548422813415527, "step": 2892, "token_acc": 0.2714419669189004 }, { "epoch": 1.6959835825271181, "grad_norm": 0.49682878472345093, "learning_rate": 0.0002990084419734719, "loss": 3.244354248046875, "step": 2893, "token_acc": 0.2716279167864007 }, { "epoch": 1.6965699208443272, "grad_norm": 0.4271309472781941, "learning_rate": 0.0002990067724207869, "loss": 3.2306809425354004, "step": 2894, "token_acc": 0.27270328578745595 }, { "epoch": 1.6971562591615363, "grad_norm": 0.4582096075377558, "learning_rate": 0.00029900510146838407, "loss": 3.207265853881836, "step": 2895, "token_acc": 0.276791251498739 }, { "epoch": 1.6977425974787452, "grad_norm": 0.4281921138993705, "learning_rate": 0.00029900342911627913, "loss": 3.2117080688476562, "step": 2896, "token_acc": 0.27532571193161337 }, { "epoch": 1.6983289357959541, "grad_norm": 0.3957950439960161, "learning_rate": 0.0002990017553644877, "loss": 3.229311943054199, "step": 2897, "token_acc": 0.2743480964973934 }, { "epoch": 1.6989152741131632, "grad_norm": 0.3932749004914916, "learning_rate": 0.0002990000802130256, "loss": 3.1969499588012695, "step": 2898, "token_acc": 0.2780155003205345 }, { "epoch": 1.6995016124303723, "grad_norm": 0.37133416075991915, "learning_rate": 0.00029899840366190856, "loss": 3.246971368789673, "step": 2899, "token_acc": 0.2717865912245116 }, { "epoch": 1.7000879507475815, "grad_norm": 0.4429710927873473, "learning_rate": 0.0002989967257111523, "loss": 3.248538017272949, "step": 2900, "token_acc": 0.2719268858126851 }, { "epoch": 1.7006742890647903, "grad_norm": 0.43667158614321094, "learning_rate": 0.0002989950463607725, "loss": 3.2323217391967773, "step": 2901, "token_acc": 0.2745708641483001 }, { "epoch": 1.7012606273819995, "grad_norm": 0.500344499868889, "learning_rate": 0.0002989933656107851, "loss": 3.2650294303894043, "step": 2902, "token_acc": 0.27021428869943165 }, { "epoch": 1.7018469656992083, "grad_norm": 0.46211898363266674, "learning_rate": 0.00029899168346120573, "loss": 3.228623390197754, "step": 2903, "token_acc": 0.27370896727940786 }, { "epoch": 1.7024333040164175, "grad_norm": 0.4027540192380709, "learning_rate": 0.0002989899999120503, "loss": 3.202935218811035, "step": 2904, "token_acc": 0.2783711668134439 }, { "epoch": 1.7030196423336266, "grad_norm": 0.4546474344483752, "learning_rate": 0.0002989883149633346, "loss": 3.2350871562957764, "step": 2905, "token_acc": 0.2718994041615107 }, { "epoch": 1.7036059806508357, "grad_norm": 0.4412110771246422, "learning_rate": 0.0002989866286150744, "loss": 3.204291582107544, "step": 2906, "token_acc": 0.2771382781014429 }, { "epoch": 1.7041923189680446, "grad_norm": 0.4109946291592292, "learning_rate": 0.0002989849408672856, "loss": 3.199084758758545, "step": 2907, "token_acc": 0.2799282061702056 }, { "epoch": 1.7047786572852535, "grad_norm": 0.44875402976443873, "learning_rate": 0.00029898325171998406, "loss": 3.1845109462738037, "step": 2908, "token_acc": 0.2806975317132005 }, { "epoch": 1.7053649956024626, "grad_norm": 0.49086532630772184, "learning_rate": 0.0002989815611731856, "loss": 3.2574119567871094, "step": 2909, "token_acc": 0.26958782313082136 }, { "epoch": 1.7059513339196717, "grad_norm": 0.5105773884194543, "learning_rate": 0.00029897986922690616, "loss": 3.2497825622558594, "step": 2910, "token_acc": 0.27092517036567093 }, { "epoch": 1.7065376722368808, "grad_norm": 0.47524729864649207, "learning_rate": 0.00029897817588116156, "loss": 3.282766342163086, "step": 2911, "token_acc": 0.26748043497752105 }, { "epoch": 1.7071240105540897, "grad_norm": 0.5017116256579833, "learning_rate": 0.00029897648113596777, "loss": 3.2044057846069336, "step": 2912, "token_acc": 0.27696646556821214 }, { "epoch": 1.7077103488712986, "grad_norm": 0.5954270924794584, "learning_rate": 0.00029897478499134073, "loss": 3.224130153656006, "step": 2913, "token_acc": 0.27602873692921087 }, { "epoch": 1.7082966871885077, "grad_norm": 0.45629157144050586, "learning_rate": 0.00029897308744729627, "loss": 3.2441396713256836, "step": 2914, "token_acc": 0.27346893747570367 }, { "epoch": 1.7088830255057168, "grad_norm": 0.42205608635520075, "learning_rate": 0.00029897138850385044, "loss": 3.206831932067871, "step": 2915, "token_acc": 0.2775562948502928 }, { "epoch": 1.709469363822926, "grad_norm": 0.4064019208811385, "learning_rate": 0.0002989696881610191, "loss": 3.198359489440918, "step": 2916, "token_acc": 0.27816397437176044 }, { "epoch": 1.7100557021401348, "grad_norm": 0.40080909909490353, "learning_rate": 0.00029896798641881834, "loss": 3.207329750061035, "step": 2917, "token_acc": 0.2773968619890265 }, { "epoch": 1.710642040457344, "grad_norm": 0.43281358375828005, "learning_rate": 0.00029896628327726407, "loss": 3.2210593223571777, "step": 2918, "token_acc": 0.27596926979196346 }, { "epoch": 1.7112283787745528, "grad_norm": 0.3944509896842094, "learning_rate": 0.0002989645787363723, "loss": 3.258690357208252, "step": 2919, "token_acc": 0.27137178542420837 }, { "epoch": 1.711814717091762, "grad_norm": 0.43436806475673606, "learning_rate": 0.000298962872796159, "loss": 3.1930580139160156, "step": 2920, "token_acc": 0.27941056078591897 }, { "epoch": 1.712401055408971, "grad_norm": 0.45859538596934785, "learning_rate": 0.0002989611654566403, "loss": 3.235530376434326, "step": 2921, "token_acc": 0.27269450002873546 }, { "epoch": 1.7129873937261801, "grad_norm": 0.440224515759211, "learning_rate": 0.0002989594567178322, "loss": 3.2450520992279053, "step": 2922, "token_acc": 0.2715480846434769 }, { "epoch": 1.713573732043389, "grad_norm": 0.36498507951938525, "learning_rate": 0.00029895774657975063, "loss": 3.2106118202209473, "step": 2923, "token_acc": 0.27674604946866505 }, { "epoch": 1.714160070360598, "grad_norm": 0.4002467923174126, "learning_rate": 0.00029895603504241186, "loss": 3.2360215187072754, "step": 2924, "token_acc": 0.2749669419696083 }, { "epoch": 1.714746408677807, "grad_norm": 0.38645027886485367, "learning_rate": 0.0002989543221058318, "loss": 3.2303028106689453, "step": 2925, "token_acc": 0.27771741846889625 }, { "epoch": 1.7153327469950161, "grad_norm": 0.3485442911327244, "learning_rate": 0.0002989526077700266, "loss": 3.184638023376465, "step": 2926, "token_acc": 0.28187811168627697 }, { "epoch": 1.7159190853122253, "grad_norm": 0.3488914602193716, "learning_rate": 0.0002989508920350124, "loss": 3.186772584915161, "step": 2927, "token_acc": 0.27990037041767785 }, { "epoch": 1.7165054236294341, "grad_norm": 0.46298604829372964, "learning_rate": 0.0002989491749008053, "loss": 3.2689456939697266, "step": 2928, "token_acc": 0.26986889918436674 }, { "epoch": 1.7170917619466433, "grad_norm": 0.4730672590206892, "learning_rate": 0.0002989474563674213, "loss": 3.2138149738311768, "step": 2929, "token_acc": 0.2749291875472617 }, { "epoch": 1.7176781002638521, "grad_norm": 0.44781503470620443, "learning_rate": 0.00029894573643487674, "loss": 3.209279775619507, "step": 2930, "token_acc": 0.2783731963956184 }, { "epoch": 1.7182644385810613, "grad_norm": 0.4301862174608705, "learning_rate": 0.0002989440151031877, "loss": 3.203165054321289, "step": 2931, "token_acc": 0.2791456965884623 }, { "epoch": 1.7188507768982704, "grad_norm": 0.42165815586939914, "learning_rate": 0.00029894229237237036, "loss": 3.2482869625091553, "step": 2932, "token_acc": 0.2714959799792218 }, { "epoch": 1.7194371152154795, "grad_norm": 0.424637432731858, "learning_rate": 0.0002989405682424408, "loss": 3.2542104721069336, "step": 2933, "token_acc": 0.2710513070390559 }, { "epoch": 1.7200234535326884, "grad_norm": 0.45401365198124943, "learning_rate": 0.0002989388427134154, "loss": 3.2733314037323, "step": 2934, "token_acc": 0.26972491183071495 }, { "epoch": 1.7206097918498973, "grad_norm": 0.4222743477856575, "learning_rate": 0.0002989371157853102, "loss": 3.246250629425049, "step": 2935, "token_acc": 0.27324763697284327 }, { "epoch": 1.7211961301671064, "grad_norm": 0.41210246551814894, "learning_rate": 0.00029893538745814154, "loss": 3.1802730560302734, "step": 2936, "token_acc": 0.27988948775800115 }, { "epoch": 1.7217824684843155, "grad_norm": 0.4265238932587353, "learning_rate": 0.00029893365773192554, "loss": 3.176746129989624, "step": 2937, "token_acc": 0.2818218227921725 }, { "epoch": 1.7223688068015246, "grad_norm": 0.43244487621907757, "learning_rate": 0.0002989319266066786, "loss": 3.2159221172332764, "step": 2938, "token_acc": 0.27829120328973084 }, { "epoch": 1.7229551451187335, "grad_norm": 0.37385990892769094, "learning_rate": 0.00029893019408241684, "loss": 3.2271828651428223, "step": 2939, "token_acc": 0.27446952549274956 }, { "epoch": 1.7235414834359424, "grad_norm": 0.46642081374639366, "learning_rate": 0.00029892846015915666, "loss": 3.2303805351257324, "step": 2940, "token_acc": 0.2743567876371707 }, { "epoch": 1.7241278217531515, "grad_norm": 0.43244335862197014, "learning_rate": 0.0002989267248369142, "loss": 3.235325813293457, "step": 2941, "token_acc": 0.2727040623591539 }, { "epoch": 1.7247141600703606, "grad_norm": 0.4588509065278779, "learning_rate": 0.0002989249881157059, "loss": 3.1970572471618652, "step": 2942, "token_acc": 0.27793381819534607 }, { "epoch": 1.7253004983875697, "grad_norm": 0.5045604752131334, "learning_rate": 0.00029892324999554796, "loss": 3.275254249572754, "step": 2943, "token_acc": 0.2693328440632502 }, { "epoch": 1.7258868367047786, "grad_norm": 0.456024552990943, "learning_rate": 0.0002989215104764568, "loss": 3.2023587226867676, "step": 2944, "token_acc": 0.2777041561788561 }, { "epoch": 1.7264731750219877, "grad_norm": 0.4071224382283433, "learning_rate": 0.00029891976955844873, "loss": 3.1918816566467285, "step": 2945, "token_acc": 0.28053982312513964 }, { "epoch": 1.7270595133391966, "grad_norm": 0.43218059990150653, "learning_rate": 0.0002989180272415401, "loss": 3.1881520748138428, "step": 2946, "token_acc": 0.2790993683245183 }, { "epoch": 1.7276458516564057, "grad_norm": 0.47266101181725195, "learning_rate": 0.0002989162835257472, "loss": 3.2644615173339844, "step": 2947, "token_acc": 0.2716276140746766 }, { "epoch": 1.7282321899736148, "grad_norm": 0.5337284360939488, "learning_rate": 0.00029891453841108655, "loss": 3.255692958831787, "step": 2948, "token_acc": 0.2716893788305622 }, { "epoch": 1.728818528290824, "grad_norm": 0.5251778305953624, "learning_rate": 0.0002989127918975745, "loss": 3.244690418243408, "step": 2949, "token_acc": 0.27068144583248005 }, { "epoch": 1.7294048666080328, "grad_norm": 0.5010245638083478, "learning_rate": 0.0002989110439852274, "loss": 3.2158520221710205, "step": 2950, "token_acc": 0.2768921130738729 }, { "epoch": 1.7299912049252417, "grad_norm": 0.5927692292672526, "learning_rate": 0.0002989092946740617, "loss": 3.231207847595215, "step": 2951, "token_acc": 0.2736133851821531 }, { "epoch": 1.7305775432424508, "grad_norm": 0.4749809044495472, "learning_rate": 0.0002989075439640938, "loss": 3.2253758907318115, "step": 2952, "token_acc": 0.27382944807080734 }, { "epoch": 1.73116388155966, "grad_norm": 0.43492546237654195, "learning_rate": 0.0002989057918553402, "loss": 3.2406039237976074, "step": 2953, "token_acc": 0.27328072508950585 }, { "epoch": 1.731750219876869, "grad_norm": 0.4900718881293884, "learning_rate": 0.0002989040383478174, "loss": 3.1991477012634277, "step": 2954, "token_acc": 0.27923704050412657 }, { "epoch": 1.732336558194078, "grad_norm": 0.479484525788494, "learning_rate": 0.00029890228344154175, "loss": 3.2283072471618652, "step": 2955, "token_acc": 0.2752824818593782 }, { "epoch": 1.732922896511287, "grad_norm": 0.4375042735530878, "learning_rate": 0.0002989005271365298, "loss": 3.2165286540985107, "step": 2956, "token_acc": 0.2775749756112912 }, { "epoch": 1.733509234828496, "grad_norm": 0.4827776263469403, "learning_rate": 0.0002988987694327981, "loss": 3.210174083709717, "step": 2957, "token_acc": 0.27810936002555636 }, { "epoch": 1.734095573145705, "grad_norm": 0.4358646178546923, "learning_rate": 0.00029889701033036304, "loss": 3.191426992416382, "step": 2958, "token_acc": 0.27815224768965696 }, { "epoch": 1.7346819114629142, "grad_norm": 0.4602293276345902, "learning_rate": 0.0002988952498292412, "loss": 3.2631454467773438, "step": 2959, "token_acc": 0.27076818886472703 }, { "epoch": 1.7352682497801233, "grad_norm": 0.4666881351243424, "learning_rate": 0.0002988934879294492, "loss": 3.255311965942383, "step": 2960, "token_acc": 0.270743254410424 }, { "epoch": 1.7358545880973322, "grad_norm": 0.436412379374655, "learning_rate": 0.00029889172463100344, "loss": 3.2340545654296875, "step": 2961, "token_acc": 0.2738752672045908 }, { "epoch": 1.736440926414541, "grad_norm": 0.456885558621909, "learning_rate": 0.0002988899599339206, "loss": 3.228849411010742, "step": 2962, "token_acc": 0.27373197083399864 }, { "epoch": 1.7370272647317502, "grad_norm": 0.476080936728531, "learning_rate": 0.0002988881938382172, "loss": 3.198176860809326, "step": 2963, "token_acc": 0.2788898440378239 }, { "epoch": 1.7376136030489593, "grad_norm": 0.4288188813084186, "learning_rate": 0.0002988864263439099, "loss": 3.2011232376098633, "step": 2964, "token_acc": 0.27934262479407995 }, { "epoch": 1.7381999413661684, "grad_norm": 0.45945776994352666, "learning_rate": 0.0002988846574510152, "loss": 3.294987916946411, "step": 2965, "token_acc": 0.26492802736638504 }, { "epoch": 1.7387862796833773, "grad_norm": 0.48926988423464574, "learning_rate": 0.00029888288715954975, "loss": 3.279208183288574, "step": 2966, "token_acc": 0.2673917392648637 }, { "epoch": 1.7393726180005862, "grad_norm": 0.5209673038047091, "learning_rate": 0.00029888111546953023, "loss": 3.286663055419922, "step": 2967, "token_acc": 0.2667663194636676 }, { "epoch": 1.7399589563177953, "grad_norm": 0.4585282185867643, "learning_rate": 0.0002988793423809733, "loss": 3.192500114440918, "step": 2968, "token_acc": 0.28024420872632055 }, { "epoch": 1.7405452946350044, "grad_norm": 0.5561036413315082, "learning_rate": 0.0002988775678938955, "loss": 3.2186150550842285, "step": 2969, "token_acc": 0.27541355955394403 }, { "epoch": 1.7411316329522135, "grad_norm": 0.48782343135866413, "learning_rate": 0.0002988757920083136, "loss": 3.230574131011963, "step": 2970, "token_acc": 0.2729369981148386 }, { "epoch": 1.7417179712694224, "grad_norm": 0.4347209747158836, "learning_rate": 0.0002988740147242442, "loss": 3.2144205570220947, "step": 2971, "token_acc": 0.27357395619474045 }, { "epoch": 1.7423043095866315, "grad_norm": 0.5138337717978827, "learning_rate": 0.000298872236041704, "loss": 3.2159461975097656, "step": 2972, "token_acc": 0.2749179423889925 }, { "epoch": 1.7428906479038404, "grad_norm": 0.45446442986291996, "learning_rate": 0.00029887045596070985, "loss": 3.2635390758514404, "step": 2973, "token_acc": 0.2695824953224163 }, { "epoch": 1.7434769862210495, "grad_norm": 0.44277223115423175, "learning_rate": 0.0002988686744812783, "loss": 3.1536710262298584, "step": 2974, "token_acc": 0.28362740158625455 }, { "epoch": 1.7440633245382586, "grad_norm": 0.4477785635117505, "learning_rate": 0.00029886689160342624, "loss": 3.149986505508423, "step": 2975, "token_acc": 0.28682943764216984 }, { "epoch": 1.7446496628554677, "grad_norm": 0.45705595430088247, "learning_rate": 0.0002988651073271703, "loss": 3.223367929458618, "step": 2976, "token_acc": 0.276728867452036 }, { "epoch": 1.7452360011726766, "grad_norm": 0.41713350095968776, "learning_rate": 0.0002988633216525273, "loss": 3.211010694503784, "step": 2977, "token_acc": 0.27826329750104706 }, { "epoch": 1.7458223394898855, "grad_norm": 0.39944145899523875, "learning_rate": 0.0002988615345795139, "loss": 3.209463119506836, "step": 2978, "token_acc": 0.2767651924025843 }, { "epoch": 1.7464086778070946, "grad_norm": 0.45146553948988505, "learning_rate": 0.000298859746108147, "loss": 3.2298130989074707, "step": 2979, "token_acc": 0.2757337238391604 }, { "epoch": 1.7469950161243037, "grad_norm": 0.36290085317086257, "learning_rate": 0.00029885795623844344, "loss": 3.2437000274658203, "step": 2980, "token_acc": 0.2720106261705073 }, { "epoch": 1.7475813544415129, "grad_norm": 0.3860731857418535, "learning_rate": 0.00029885616497041993, "loss": 3.235504388809204, "step": 2981, "token_acc": 0.27485309832549076 }, { "epoch": 1.7481676927587217, "grad_norm": 0.4111734952727702, "learning_rate": 0.00029885437230409335, "loss": 3.174206495285034, "step": 2982, "token_acc": 0.27966847700187897 }, { "epoch": 1.7487540310759309, "grad_norm": 0.38123763080776796, "learning_rate": 0.0002988525782394805, "loss": 3.1991724967956543, "step": 2983, "token_acc": 0.2780540644376381 }, { "epoch": 1.7493403693931397, "grad_norm": 0.357392931881769, "learning_rate": 0.0002988507827765983, "loss": 3.228595018386841, "step": 2984, "token_acc": 0.2724567164099995 }, { "epoch": 1.7499267077103489, "grad_norm": 0.41246354276358044, "learning_rate": 0.0002988489859154635, "loss": 3.197805404663086, "step": 2985, "token_acc": 0.279079239565822 }, { "epoch": 1.750513046027558, "grad_norm": 0.39625737378171655, "learning_rate": 0.0002988471876560931, "loss": 3.2019221782684326, "step": 2986, "token_acc": 0.2785406239815266 }, { "epoch": 1.751099384344767, "grad_norm": 0.393408487160217, "learning_rate": 0.000298845387998504, "loss": 3.1612119674682617, "step": 2987, "token_acc": 0.2818130645441298 }, { "epoch": 1.751685722661976, "grad_norm": 0.47261885390619, "learning_rate": 0.000298843586942713, "loss": 3.2373523712158203, "step": 2988, "token_acc": 0.27260510552464284 }, { "epoch": 1.7522720609791849, "grad_norm": 0.4836075771794946, "learning_rate": 0.0002988417844887371, "loss": 3.203369140625, "step": 2989, "token_acc": 0.27700130344444873 }, { "epoch": 1.752858399296394, "grad_norm": 0.40921260232614215, "learning_rate": 0.0002988399806365931, "loss": 3.1472361087799072, "step": 2990, "token_acc": 0.28747061256761075 }, { "epoch": 1.753444737613603, "grad_norm": 0.374756966045756, "learning_rate": 0.00029883817538629815, "loss": 3.188462495803833, "step": 2991, "token_acc": 0.2792496741037529 }, { "epoch": 1.7540310759308122, "grad_norm": 0.4606709485931363, "learning_rate": 0.00029883636873786904, "loss": 3.1872105598449707, "step": 2992, "token_acc": 0.2783044378113592 }, { "epoch": 1.754617414248021, "grad_norm": 0.4391831791058478, "learning_rate": 0.00029883456069132284, "loss": 3.2252631187438965, "step": 2993, "token_acc": 0.27628494904798434 }, { "epoch": 1.75520375256523, "grad_norm": 0.4033532181645478, "learning_rate": 0.00029883275124667654, "loss": 3.237276077270508, "step": 2994, "token_acc": 0.2730648604485676 }, { "epoch": 1.755790090882439, "grad_norm": 0.3774001762006789, "learning_rate": 0.000298830940403947, "loss": 3.160149574279785, "step": 2995, "token_acc": 0.28180051949140117 }, { "epoch": 1.7563764291996482, "grad_norm": 0.36910819264381955, "learning_rate": 0.00029882912816315145, "loss": 3.2079246044158936, "step": 2996, "token_acc": 0.27753857653359 }, { "epoch": 1.7569627675168573, "grad_norm": 0.3962695436774894, "learning_rate": 0.0002988273145243067, "loss": 3.204803466796875, "step": 2997, "token_acc": 0.27699695509079575 }, { "epoch": 1.7575491058340662, "grad_norm": 0.4853136571049276, "learning_rate": 0.0002988254994874299, "loss": 3.2736048698425293, "step": 2998, "token_acc": 0.26733801717408273 }, { "epoch": 1.7581354441512753, "grad_norm": 0.48233065037883466, "learning_rate": 0.00029882368305253807, "loss": 3.1931209564208984, "step": 2999, "token_acc": 0.27754757190955975 }, { "epoch": 1.7587217824684842, "grad_norm": 0.5568223567602844, "learning_rate": 0.0002988218652196483, "loss": 3.2376389503479004, "step": 3000, "token_acc": 0.2734502353783574 }, { "epoch": 1.7593081207856933, "grad_norm": 0.568473019553501, "learning_rate": 0.0002988200459887776, "loss": 3.232337474822998, "step": 3001, "token_acc": 0.27418963036520355 }, { "epoch": 1.7598944591029024, "grad_norm": 0.48928417508193744, "learning_rate": 0.0002988182253599432, "loss": 3.216559410095215, "step": 3002, "token_acc": 0.2777156523879335 }, { "epoch": 1.7604807974201115, "grad_norm": 0.44999607097181255, "learning_rate": 0.000298816403333162, "loss": 3.209160804748535, "step": 3003, "token_acc": 0.27667326341302306 }, { "epoch": 1.7610671357373204, "grad_norm": 0.4076155523978033, "learning_rate": 0.00029881457990845123, "loss": 3.2078170776367188, "step": 3004, "token_acc": 0.27667922201882494 }, { "epoch": 1.7616534740545293, "grad_norm": 0.4504196985302263, "learning_rate": 0.0002988127550858281, "loss": 3.2098276615142822, "step": 3005, "token_acc": 0.27558754118031203 }, { "epoch": 1.7622398123717384, "grad_norm": 0.4019528533199475, "learning_rate": 0.0002988109288653096, "loss": 3.184138774871826, "step": 3006, "token_acc": 0.2793607561353662 }, { "epoch": 1.7628261506889475, "grad_norm": 0.33582318540561285, "learning_rate": 0.00029880910124691296, "loss": 3.2205710411071777, "step": 3007, "token_acc": 0.2741717958840845 }, { "epoch": 1.7634124890061567, "grad_norm": 0.41831368159712556, "learning_rate": 0.0002988072722306554, "loss": 3.1818723678588867, "step": 3008, "token_acc": 0.28001851379461784 }, { "epoch": 1.7639988273233655, "grad_norm": 0.38351976240962415, "learning_rate": 0.00029880544181655396, "loss": 3.217712163925171, "step": 3009, "token_acc": 0.27636742671009773 }, { "epoch": 1.7645851656405747, "grad_norm": 0.40290249676749684, "learning_rate": 0.0002988036100046259, "loss": 3.2289366722106934, "step": 3010, "token_acc": 0.2744454179723781 }, { "epoch": 1.7651715039577835, "grad_norm": 0.44901830164436773, "learning_rate": 0.00029880177679488846, "loss": 3.2276575565338135, "step": 3011, "token_acc": 0.27476908782860576 }, { "epoch": 1.7657578422749927, "grad_norm": 0.3842510012972663, "learning_rate": 0.0002987999421873589, "loss": 3.196608781814575, "step": 3012, "token_acc": 0.27771141717080705 }, { "epoch": 1.7663441805922018, "grad_norm": 0.36832728374962936, "learning_rate": 0.00029879810618205433, "loss": 3.2251601219177246, "step": 3013, "token_acc": 0.27583368182403983 }, { "epoch": 1.7669305189094109, "grad_norm": 0.48137554259166715, "learning_rate": 0.00029879626877899205, "loss": 3.229835033416748, "step": 3014, "token_acc": 0.2743034136651158 }, { "epoch": 1.7675168572266198, "grad_norm": 0.5057034826244041, "learning_rate": 0.00029879442997818935, "loss": 3.205227851867676, "step": 3015, "token_acc": 0.2788544038221311 }, { "epoch": 1.7681031955438287, "grad_norm": 0.5257169779801291, "learning_rate": 0.0002987925897796635, "loss": 3.2185215950012207, "step": 3016, "token_acc": 0.2769872201577087 }, { "epoch": 1.7686895338610378, "grad_norm": 0.47184697729205993, "learning_rate": 0.00029879074818343177, "loss": 3.227670192718506, "step": 3017, "token_acc": 0.2734811350695923 }, { "epoch": 1.7692758721782469, "grad_norm": 0.3801213722968637, "learning_rate": 0.0002987889051895114, "loss": 3.2056775093078613, "step": 3018, "token_acc": 0.2781037714528801 }, { "epoch": 1.769862210495456, "grad_norm": 0.454100575991093, "learning_rate": 0.0002987870607979198, "loss": 3.20422101020813, "step": 3019, "token_acc": 0.2795593654526557 }, { "epoch": 1.770448548812665, "grad_norm": 0.4718796378675752, "learning_rate": 0.00029878521500867426, "loss": 3.25378680229187, "step": 3020, "token_acc": 0.2707251617422838 }, { "epoch": 1.7710348871298738, "grad_norm": 0.4445528067020484, "learning_rate": 0.0002987833678217922, "loss": 3.2453157901763916, "step": 3021, "token_acc": 0.2721935817441633 }, { "epoch": 1.771621225447083, "grad_norm": 0.4086999590296976, "learning_rate": 0.0002987815192372907, "loss": 3.191281318664551, "step": 3022, "token_acc": 0.27838477379414434 }, { "epoch": 1.772207563764292, "grad_norm": 0.4110438884012705, "learning_rate": 0.00029877966925518745, "loss": 3.2246382236480713, "step": 3023, "token_acc": 0.273426226978502 }, { "epoch": 1.7727939020815011, "grad_norm": 0.381426325772649, "learning_rate": 0.00029877781787549966, "loss": 3.2010087966918945, "step": 3024, "token_acc": 0.27750102082482647 }, { "epoch": 1.77338024039871, "grad_norm": 0.4412252322976521, "learning_rate": 0.0002987759650982448, "loss": 3.1967265605926514, "step": 3025, "token_acc": 0.27849687793362243 }, { "epoch": 1.7739665787159191, "grad_norm": 0.43051256948339794, "learning_rate": 0.00029877411092344016, "loss": 3.250744104385376, "step": 3026, "token_acc": 0.2709132503932788 }, { "epoch": 1.774552917033128, "grad_norm": 0.4485402817527516, "learning_rate": 0.00029877225535110326, "loss": 3.2088661193847656, "step": 3027, "token_acc": 0.2769502320270681 }, { "epoch": 1.7751392553503371, "grad_norm": 0.4580295659152123, "learning_rate": 0.00029877039838125145, "loss": 3.2266652584075928, "step": 3028, "token_acc": 0.2749788370637938 }, { "epoch": 1.7757255936675462, "grad_norm": 0.4158067344416709, "learning_rate": 0.00029876854001390223, "loss": 3.1609230041503906, "step": 3029, "token_acc": 0.2848933664791312 }, { "epoch": 1.7763119319847553, "grad_norm": 0.48798610508802487, "learning_rate": 0.0002987666802490731, "loss": 3.217484474182129, "step": 3030, "token_acc": 0.27469376841559073 }, { "epoch": 1.7768982703019642, "grad_norm": 0.42652496634360815, "learning_rate": 0.0002987648190867814, "loss": 3.2117695808410645, "step": 3031, "token_acc": 0.27565490689494715 }, { "epoch": 1.7774846086191731, "grad_norm": 0.3844513614065584, "learning_rate": 0.0002987629565270447, "loss": 3.2262282371520996, "step": 3032, "token_acc": 0.27447388005705664 }, { "epoch": 1.7780709469363822, "grad_norm": 0.41181120203171784, "learning_rate": 0.00029876109256988056, "loss": 3.176386833190918, "step": 3033, "token_acc": 0.2803974298441894 }, { "epoch": 1.7786572852535913, "grad_norm": 0.4113338658326975, "learning_rate": 0.00029875922721530636, "loss": 3.200834035873413, "step": 3034, "token_acc": 0.27865419479576997 }, { "epoch": 1.7792436235708005, "grad_norm": 0.3977930974238967, "learning_rate": 0.00029875736046333965, "loss": 3.240610122680664, "step": 3035, "token_acc": 0.27340778734864213 }, { "epoch": 1.7798299618880093, "grad_norm": 0.39834230440344687, "learning_rate": 0.000298755492313998, "loss": 3.20261287689209, "step": 3036, "token_acc": 0.27678968373281115 }, { "epoch": 1.7804163002052185, "grad_norm": 0.41900610497518376, "learning_rate": 0.00029875362276729896, "loss": 3.193077564239502, "step": 3037, "token_acc": 0.2809007872862043 }, { "epoch": 1.7810026385224274, "grad_norm": 0.399033025506431, "learning_rate": 0.0002987517518232601, "loss": 3.227677345275879, "step": 3038, "token_acc": 0.273995111843703 }, { "epoch": 1.7815889768396365, "grad_norm": 0.3344428067050935, "learning_rate": 0.00029874987948189894, "loss": 3.1884307861328125, "step": 3039, "token_acc": 0.27842918879067613 }, { "epoch": 1.7821753151568456, "grad_norm": 0.4359989969882841, "learning_rate": 0.00029874800574323314, "loss": 3.2151083946228027, "step": 3040, "token_acc": 0.27566703024444067 }, { "epoch": 1.7827616534740547, "grad_norm": 0.4230103452265189, "learning_rate": 0.00029874613060728027, "loss": 3.217792510986328, "step": 3041, "token_acc": 0.27678232095712496 }, { "epoch": 1.7833479917912636, "grad_norm": 0.37760816007101156, "learning_rate": 0.00029874425407405795, "loss": 3.196533679962158, "step": 3042, "token_acc": 0.2790759892876156 }, { "epoch": 1.7839343301084725, "grad_norm": 0.4072870011474391, "learning_rate": 0.00029874237614358374, "loss": 3.2531285285949707, "step": 3043, "token_acc": 0.274150277896537 }, { "epoch": 1.7845206684256816, "grad_norm": 0.4815636661061652, "learning_rate": 0.00029874049681587536, "loss": 3.203864574432373, "step": 3044, "token_acc": 0.2765805045157272 }, { "epoch": 1.7851070067428907, "grad_norm": 0.5592637574956612, "learning_rate": 0.0002987386160909505, "loss": 3.1920690536499023, "step": 3045, "token_acc": 0.27948762639598135 }, { "epoch": 1.7856933450600998, "grad_norm": 0.5116728882443193, "learning_rate": 0.00029873673396882666, "loss": 3.2219960689544678, "step": 3046, "token_acc": 0.2767154165678613 }, { "epoch": 1.7862796833773087, "grad_norm": 0.4766312764029502, "learning_rate": 0.0002987348504495217, "loss": 3.2557475566864014, "step": 3047, "token_acc": 0.2717916698635102 }, { "epoch": 1.7868660216945176, "grad_norm": 0.43202190142552827, "learning_rate": 0.00029873296553305326, "loss": 3.2439732551574707, "step": 3048, "token_acc": 0.27214608965074516 }, { "epoch": 1.7874523600117267, "grad_norm": 0.4994665538617656, "learning_rate": 0.000298731079219439, "loss": 3.1739494800567627, "step": 3049, "token_acc": 0.2806197167314858 }, { "epoch": 1.7880386983289358, "grad_norm": 0.527347381215796, "learning_rate": 0.00029872919150869667, "loss": 3.2109100818634033, "step": 3050, "token_acc": 0.27657140469188296 }, { "epoch": 1.788625036646145, "grad_norm": 0.46072518418200253, "learning_rate": 0.00029872730240084405, "loss": 3.201361656188965, "step": 3051, "token_acc": 0.27859193470575677 }, { "epoch": 1.7892113749633538, "grad_norm": 0.38981800606865175, "learning_rate": 0.00029872541189589875, "loss": 3.1890811920166016, "step": 3052, "token_acc": 0.27871792914699434 }, { "epoch": 1.789797713280563, "grad_norm": 0.5780406996167121, "learning_rate": 0.00029872351999387866, "loss": 3.183228015899658, "step": 3053, "token_acc": 0.2805518983547949 }, { "epoch": 1.7903840515977718, "grad_norm": 0.43280506158346155, "learning_rate": 0.0002987216266948015, "loss": 3.2463531494140625, "step": 3054, "token_acc": 0.2711178725543592 }, { "epoch": 1.790970389914981, "grad_norm": 0.4297012456614373, "learning_rate": 0.0002987197319986851, "loss": 3.169093132019043, "step": 3055, "token_acc": 0.2816073422464311 }, { "epoch": 1.79155672823219, "grad_norm": 0.4123924087524073, "learning_rate": 0.0002987178359055472, "loss": 3.23175048828125, "step": 3056, "token_acc": 0.27464615431451994 }, { "epoch": 1.7921430665493991, "grad_norm": 0.4720911078423588, "learning_rate": 0.0002987159384154056, "loss": 3.1703243255615234, "step": 3057, "token_acc": 0.2829324747234779 }, { "epoch": 1.792729404866608, "grad_norm": 0.38726885822971013, "learning_rate": 0.00029871403952827817, "loss": 3.1636850833892822, "step": 3058, "token_acc": 0.2844534835489784 }, { "epoch": 1.793315743183817, "grad_norm": 0.43873521591992914, "learning_rate": 0.0002987121392441827, "loss": 3.1982152462005615, "step": 3059, "token_acc": 0.2795582042355231 }, { "epoch": 1.793902081501026, "grad_norm": 0.43347233583267847, "learning_rate": 0.0002987102375631371, "loss": 3.258458137512207, "step": 3060, "token_acc": 0.26988021052295474 }, { "epoch": 1.7944884198182351, "grad_norm": 0.39534045915902943, "learning_rate": 0.00029870833448515926, "loss": 3.2241101264953613, "step": 3061, "token_acc": 0.27467076428063153 }, { "epoch": 1.7950747581354443, "grad_norm": 0.4193269975284209, "learning_rate": 0.00029870643001026696, "loss": 3.208967685699463, "step": 3062, "token_acc": 0.27696220120128684 }, { "epoch": 1.7956610964526531, "grad_norm": 0.5125322557948351, "learning_rate": 0.0002987045241384782, "loss": 3.2821810245513916, "step": 3063, "token_acc": 0.26538648101121354 }, { "epoch": 1.7962474347698623, "grad_norm": 0.5024411216156999, "learning_rate": 0.0002987026168698107, "loss": 3.2589094638824463, "step": 3064, "token_acc": 0.27228363606476536 }, { "epoch": 1.7968337730870712, "grad_norm": 0.44702953811579343, "learning_rate": 0.0002987007082042826, "loss": 3.162526845932007, "step": 3065, "token_acc": 0.2839980640182087 }, { "epoch": 1.7974201114042803, "grad_norm": 0.38903732352117193, "learning_rate": 0.0002986987981419117, "loss": 3.2290239334106445, "step": 3066, "token_acc": 0.2735682559436958 }, { "epoch": 1.7980064497214894, "grad_norm": 0.4401312455932557, "learning_rate": 0.0002986968866827159, "loss": 3.2162981033325195, "step": 3067, "token_acc": 0.2752204465482574 }, { "epoch": 1.7985927880386985, "grad_norm": 0.4802073775247277, "learning_rate": 0.00029869497382671324, "loss": 3.254286766052246, "step": 3068, "token_acc": 0.2712889913985752 }, { "epoch": 1.7991791263559074, "grad_norm": 0.4204199252379988, "learning_rate": 0.0002986930595739217, "loss": 3.2674221992492676, "step": 3069, "token_acc": 0.2700633600098783 }, { "epoch": 1.7997654646731163, "grad_norm": 0.42891477123289806, "learning_rate": 0.0002986911439243592, "loss": 3.1940321922302246, "step": 3070, "token_acc": 0.2777882518394717 }, { "epoch": 1.8003518029903254, "grad_norm": 0.49901467444925296, "learning_rate": 0.0002986892268780438, "loss": 3.2779791355133057, "step": 3071, "token_acc": 0.26902503369219644 }, { "epoch": 1.8009381413075345, "grad_norm": 0.41877885587213687, "learning_rate": 0.00029868730843499343, "loss": 3.2141828536987305, "step": 3072, "token_acc": 0.275964811576404 }, { "epoch": 1.8015244796247436, "grad_norm": 0.40499434211130053, "learning_rate": 0.00029868538859522623, "loss": 3.199528217315674, "step": 3073, "token_acc": 0.27853907715952725 }, { "epoch": 1.8021108179419525, "grad_norm": 0.4441401078885119, "learning_rate": 0.0002986834673587601, "loss": 3.211149215698242, "step": 3074, "token_acc": 0.27742192821825135 }, { "epoch": 1.8026971562591614, "grad_norm": 0.4298235793235902, "learning_rate": 0.0002986815447256132, "loss": 3.195619583129883, "step": 3075, "token_acc": 0.2783943002442033 }, { "epoch": 1.8032834945763705, "grad_norm": 0.5212437172382742, "learning_rate": 0.00029867962069580345, "loss": 3.1940438747406006, "step": 3076, "token_acc": 0.2778802347380819 }, { "epoch": 1.8038698328935796, "grad_norm": 0.5184726183245811, "learning_rate": 0.0002986776952693491, "loss": 3.2543487548828125, "step": 3077, "token_acc": 0.2707527491185204 }, { "epoch": 1.8044561712107887, "grad_norm": 0.43210728160586004, "learning_rate": 0.0002986757684462681, "loss": 3.1954867839813232, "step": 3078, "token_acc": 0.2773328628485806 }, { "epoch": 1.8050425095279976, "grad_norm": 0.510658200430899, "learning_rate": 0.00029867384022657864, "loss": 3.2134909629821777, "step": 3079, "token_acc": 0.2769135234098136 }, { "epoch": 1.8056288478452067, "grad_norm": 0.4431334149184413, "learning_rate": 0.0002986719106102988, "loss": 3.2375941276550293, "step": 3080, "token_acc": 0.2735058741605516 }, { "epoch": 1.8062151861624156, "grad_norm": 0.48954184459414696, "learning_rate": 0.0002986699795974466, "loss": 3.2246596813201904, "step": 3081, "token_acc": 0.27461348834378235 }, { "epoch": 1.8068015244796247, "grad_norm": 0.4262138406975625, "learning_rate": 0.0002986680471880404, "loss": 3.2108099460601807, "step": 3082, "token_acc": 0.27553052710434883 }, { "epoch": 1.8073878627968338, "grad_norm": 0.4189868749650725, "learning_rate": 0.00029866611338209815, "loss": 3.2112135887145996, "step": 3083, "token_acc": 0.2770698003227915 }, { "epoch": 1.807974201114043, "grad_norm": 0.41203927046986744, "learning_rate": 0.0002986641781796381, "loss": 3.1484875679016113, "step": 3084, "token_acc": 0.284946854052248 }, { "epoch": 1.8085605394312518, "grad_norm": 0.4975151478604645, "learning_rate": 0.00029866224158067847, "loss": 3.2264223098754883, "step": 3085, "token_acc": 0.2753575959933222 }, { "epoch": 1.8091468777484607, "grad_norm": 0.46300821375660994, "learning_rate": 0.0002986603035852373, "loss": 3.1928622722625732, "step": 3086, "token_acc": 0.27933179360663485 }, { "epoch": 1.8097332160656698, "grad_norm": 0.45176389425886143, "learning_rate": 0.000298658364193333, "loss": 3.169503688812256, "step": 3087, "token_acc": 0.28127640677536003 }, { "epoch": 1.810319554382879, "grad_norm": 0.4350557653181423, "learning_rate": 0.0002986564234049837, "loss": 3.243781328201294, "step": 3088, "token_acc": 0.27342597294186144 }, { "epoch": 1.810905892700088, "grad_norm": 0.41248706938235047, "learning_rate": 0.00029865448122020754, "loss": 3.1504716873168945, "step": 3089, "token_acc": 0.2841933661179435 }, { "epoch": 1.811492231017297, "grad_norm": 0.49268484197778906, "learning_rate": 0.00029865253763902293, "loss": 3.193650484085083, "step": 3090, "token_acc": 0.2789217966883076 }, { "epoch": 1.812078569334506, "grad_norm": 0.3841039358593599, "learning_rate": 0.000298650592661448, "loss": 3.2557573318481445, "step": 3091, "token_acc": 0.27061570188977807 }, { "epoch": 1.812664907651715, "grad_norm": 0.4499488876826179, "learning_rate": 0.00029864864628750105, "loss": 3.198819637298584, "step": 3092, "token_acc": 0.2796761359450157 }, { "epoch": 1.813251245968924, "grad_norm": 0.40753208160656057, "learning_rate": 0.00029864669851720037, "loss": 3.147763729095459, "step": 3093, "token_acc": 0.2839922499320726 }, { "epoch": 1.8138375842861332, "grad_norm": 0.35178713276639517, "learning_rate": 0.0002986447493505643, "loss": 3.1944730281829834, "step": 3094, "token_acc": 0.2804610957250102 }, { "epoch": 1.8144239226033423, "grad_norm": 0.33907632871602367, "learning_rate": 0.00029864279878761104, "loss": 3.204986810684204, "step": 3095, "token_acc": 0.27684321778800486 }, { "epoch": 1.8150102609205512, "grad_norm": 0.33653443604374145, "learning_rate": 0.00029864084682835904, "loss": 3.206244468688965, "step": 3096, "token_acc": 0.27764046619649285 }, { "epoch": 1.81559659923776, "grad_norm": 0.3298522977495309, "learning_rate": 0.0002986388934728266, "loss": 3.1717872619628906, "step": 3097, "token_acc": 0.2836129450956071 }, { "epoch": 1.8161829375549692, "grad_norm": 0.3294007155370809, "learning_rate": 0.00029863693872103197, "loss": 3.1714537143707275, "step": 3098, "token_acc": 0.28205546707382884 }, { "epoch": 1.8167692758721783, "grad_norm": 0.3685643171166607, "learning_rate": 0.00029863498257299366, "loss": 3.238262176513672, "step": 3099, "token_acc": 0.27239314017445554 }, { "epoch": 1.8173556141893874, "grad_norm": 0.4234196138726498, "learning_rate": 0.00029863302502872993, "loss": 3.2294039726257324, "step": 3100, "token_acc": 0.2751639519297316 }, { "epoch": 1.8179419525065963, "grad_norm": 0.5167174813397577, "learning_rate": 0.00029863106608825926, "loss": 3.2505478858947754, "step": 3101, "token_acc": 0.27222478362009345 }, { "epoch": 1.8185282908238052, "grad_norm": 0.46935948210100714, "learning_rate": 0.0002986291057516, "loss": 3.2138917446136475, "step": 3102, "token_acc": 0.2756795561913976 }, { "epoch": 1.8191146291410143, "grad_norm": 0.44551284679005143, "learning_rate": 0.00029862714401877053, "loss": 3.211258888244629, "step": 3103, "token_acc": 0.2769806843880918 }, { "epoch": 1.8197009674582234, "grad_norm": 0.4537234968057431, "learning_rate": 0.0002986251808897894, "loss": 3.2034716606140137, "step": 3104, "token_acc": 0.2770397584377291 }, { "epoch": 1.8202873057754325, "grad_norm": 0.39869028198677336, "learning_rate": 0.00029862321636467485, "loss": 3.167607545852661, "step": 3105, "token_acc": 0.28300831722730135 }, { "epoch": 1.8208736440926414, "grad_norm": 0.3686062727039798, "learning_rate": 0.00029862125044344555, "loss": 3.20642352104187, "step": 3106, "token_acc": 0.27738731165403935 }, { "epoch": 1.8214599824098505, "grad_norm": 0.37085357467841207, "learning_rate": 0.00029861928312611985, "loss": 3.216221809387207, "step": 3107, "token_acc": 0.27567667126222584 }, { "epoch": 1.8220463207270594, "grad_norm": 0.3767866787276598, "learning_rate": 0.00029861731441271623, "loss": 3.155963897705078, "step": 3108, "token_acc": 0.28427144715205216 }, { "epoch": 1.8226326590442685, "grad_norm": 0.41341285109018266, "learning_rate": 0.00029861534430325324, "loss": 3.1870346069335938, "step": 3109, "token_acc": 0.2800279275031197 }, { "epoch": 1.8232189973614776, "grad_norm": 0.41418805797569425, "learning_rate": 0.00029861337279774936, "loss": 3.229611873626709, "step": 3110, "token_acc": 0.2764427270620523 }, { "epoch": 1.8238053356786867, "grad_norm": 0.49703068165890885, "learning_rate": 0.0002986113998962231, "loss": 3.180020332336426, "step": 3111, "token_acc": 0.27924554580614747 }, { "epoch": 1.8243916739958956, "grad_norm": 0.46791056259104963, "learning_rate": 0.0002986094255986929, "loss": 3.216400146484375, "step": 3112, "token_acc": 0.27519471260794287 }, { "epoch": 1.8249780123131045, "grad_norm": 0.42541383570304797, "learning_rate": 0.0002986074499051775, "loss": 3.213284969329834, "step": 3113, "token_acc": 0.279238840305554 }, { "epoch": 1.8255643506303136, "grad_norm": 0.3748929424448023, "learning_rate": 0.0002986054728156953, "loss": 3.182849884033203, "step": 3114, "token_acc": 0.2797672325002789 }, { "epoch": 1.8261506889475227, "grad_norm": 0.42727198737075833, "learning_rate": 0.000298603494330265, "loss": 3.2009809017181396, "step": 3115, "token_acc": 0.27764275895701757 }, { "epoch": 1.8267370272647319, "grad_norm": 0.47546506951376843, "learning_rate": 0.0002986015144489051, "loss": 3.2002198696136475, "step": 3116, "token_acc": 0.2797896954922943 }, { "epoch": 1.8273233655819408, "grad_norm": 0.47828658624460335, "learning_rate": 0.00029859953317163415, "loss": 3.212883949279785, "step": 3117, "token_acc": 0.27705251469695474 }, { "epoch": 1.8279097038991499, "grad_norm": 0.4058148664822124, "learning_rate": 0.00029859755049847087, "loss": 3.1685757637023926, "step": 3118, "token_acc": 0.28074656652304236 }, { "epoch": 1.8284960422163588, "grad_norm": 0.3903058587257659, "learning_rate": 0.00029859556642943387, "loss": 3.153568744659424, "step": 3119, "token_acc": 0.2823968765914106 }, { "epoch": 1.8290823805335679, "grad_norm": 0.38492404378103423, "learning_rate": 0.0002985935809645417, "loss": 3.2294020652770996, "step": 3120, "token_acc": 0.2738543831469541 }, { "epoch": 1.829668718850777, "grad_norm": 0.44808067750202546, "learning_rate": 0.0002985915941038131, "loss": 3.1805953979492188, "step": 3121, "token_acc": 0.28085333220737957 }, { "epoch": 1.830255057167986, "grad_norm": 0.42568268337021614, "learning_rate": 0.00029858960584726665, "loss": 3.2394044399261475, "step": 3122, "token_acc": 0.2709040631904844 }, { "epoch": 1.830841395485195, "grad_norm": 0.4268552598205282, "learning_rate": 0.0002985876161949212, "loss": 3.2019171714782715, "step": 3123, "token_acc": 0.2778036052883134 }, { "epoch": 1.8314277338024039, "grad_norm": 0.4004359952887598, "learning_rate": 0.00029858562514679525, "loss": 3.1293020248413086, "step": 3124, "token_acc": 0.28673597552955254 }, { "epoch": 1.832014072119613, "grad_norm": 0.44890998005873095, "learning_rate": 0.00029858363270290753, "loss": 3.206660509109497, "step": 3125, "token_acc": 0.2764361216769021 }, { "epoch": 1.832600410436822, "grad_norm": 0.43258716369510586, "learning_rate": 0.00029858163886327686, "loss": 3.217843532562256, "step": 3126, "token_acc": 0.2758903879556892 }, { "epoch": 1.8331867487540312, "grad_norm": 0.450978071018672, "learning_rate": 0.0002985796436279219, "loss": 3.2305612564086914, "step": 3127, "token_acc": 0.2743687172832503 }, { "epoch": 1.83377308707124, "grad_norm": 0.5089404042219443, "learning_rate": 0.00029857764699686137, "loss": 3.2540650367736816, "step": 3128, "token_acc": 0.2690460084766364 }, { "epoch": 1.834359425388449, "grad_norm": 0.4082736116010722, "learning_rate": 0.0002985756489701141, "loss": 3.18744158744812, "step": 3129, "token_acc": 0.2794995274553737 }, { "epoch": 1.834945763705658, "grad_norm": 0.35075497128023786, "learning_rate": 0.00029857364954769883, "loss": 3.1709580421447754, "step": 3130, "token_acc": 0.2793715934924603 }, { "epoch": 1.8355321020228672, "grad_norm": 0.37835861697190143, "learning_rate": 0.0002985716487296343, "loss": 3.208159923553467, "step": 3131, "token_acc": 0.27684298421545805 }, { "epoch": 1.8361184403400763, "grad_norm": 0.4000573020570646, "learning_rate": 0.0002985696465159393, "loss": 3.2082881927490234, "step": 3132, "token_acc": 0.279523993289912 }, { "epoch": 1.8367047786572852, "grad_norm": 0.40340472367935787, "learning_rate": 0.00029856764290663273, "loss": 3.227461576461792, "step": 3133, "token_acc": 0.2739957220367279 }, { "epoch": 1.8372911169744943, "grad_norm": 0.4194641914519934, "learning_rate": 0.0002985656379017333, "loss": 3.2242798805236816, "step": 3134, "token_acc": 0.2766155821394363 }, { "epoch": 1.8378774552917032, "grad_norm": 0.43292567839822604, "learning_rate": 0.00029856363150125993, "loss": 3.2192955017089844, "step": 3135, "token_acc": 0.2745324278009651 }, { "epoch": 1.8384637936089123, "grad_norm": 0.4558226350646853, "learning_rate": 0.0002985616237052314, "loss": 3.202052593231201, "step": 3136, "token_acc": 0.27658421030995645 }, { "epoch": 1.8390501319261214, "grad_norm": 0.45727269258474135, "learning_rate": 0.0002985596145136666, "loss": 3.2248802185058594, "step": 3137, "token_acc": 0.27515891702600853 }, { "epoch": 1.8396364702433305, "grad_norm": 0.43041455822369623, "learning_rate": 0.00029855760392658444, "loss": 3.190176486968994, "step": 3138, "token_acc": 0.2786948637260654 }, { "epoch": 1.8402228085605394, "grad_norm": 0.441329278628718, "learning_rate": 0.0002985555919440038, "loss": 3.1970410346984863, "step": 3139, "token_acc": 0.27860734043742996 }, { "epoch": 1.8408091468777483, "grad_norm": 0.46537009334174434, "learning_rate": 0.0002985535785659435, "loss": 3.1814727783203125, "step": 3140, "token_acc": 0.28188284485526066 }, { "epoch": 1.8413954851949574, "grad_norm": 0.4605879301198367, "learning_rate": 0.00029855156379242256, "loss": 3.226712703704834, "step": 3141, "token_acc": 0.2744886509578078 }, { "epoch": 1.8419818235121665, "grad_norm": 0.4076116366859706, "learning_rate": 0.0002985495476234598, "loss": 3.2322587966918945, "step": 3142, "token_acc": 0.2748466663178159 }, { "epoch": 1.8425681618293757, "grad_norm": 0.38425798073138234, "learning_rate": 0.0002985475300590743, "loss": 3.1664044857025146, "step": 3143, "token_acc": 0.28218780377162855 }, { "epoch": 1.8431545001465846, "grad_norm": 0.41519430838295224, "learning_rate": 0.00029854551109928485, "loss": 3.2139058113098145, "step": 3144, "token_acc": 0.2755124461610549 }, { "epoch": 1.8437408384637937, "grad_norm": 0.3868168126568655, "learning_rate": 0.0002985434907441105, "loss": 3.210146427154541, "step": 3145, "token_acc": 0.2760524777275385 }, { "epoch": 1.8443271767810026, "grad_norm": 0.3985831011018103, "learning_rate": 0.0002985414689935702, "loss": 3.194047689437866, "step": 3146, "token_acc": 0.2787377444636553 }, { "epoch": 1.8449135150982117, "grad_norm": 0.40317171119465345, "learning_rate": 0.000298539445847683, "loss": 3.2293734550476074, "step": 3147, "token_acc": 0.27307719723430035 }, { "epoch": 1.8454998534154208, "grad_norm": 0.37243362005012787, "learning_rate": 0.0002985374213064679, "loss": 3.182706832885742, "step": 3148, "token_acc": 0.2819988234181803 }, { "epoch": 1.8460861917326299, "grad_norm": 0.44328076855296367, "learning_rate": 0.0002985353953699438, "loss": 3.2005558013916016, "step": 3149, "token_acc": 0.27830500538946956 }, { "epoch": 1.8466725300498388, "grad_norm": 0.43004968188085785, "learning_rate": 0.00029853336803812983, "loss": 3.1745519638061523, "step": 3150, "token_acc": 0.2813922412543455 }, { "epoch": 1.8472588683670477, "grad_norm": 0.3852882331914369, "learning_rate": 0.0002985313393110451, "loss": 3.202648162841797, "step": 3151, "token_acc": 0.2785962777285414 }, { "epoch": 1.8478452066842568, "grad_norm": 0.4870368865653353, "learning_rate": 0.0002985293091887085, "loss": 3.2058701515197754, "step": 3152, "token_acc": 0.2773094317814291 }, { "epoch": 1.848431545001466, "grad_norm": 0.45651623027631416, "learning_rate": 0.0002985272776711392, "loss": 3.227034330368042, "step": 3153, "token_acc": 0.27455820749595544 }, { "epoch": 1.849017883318675, "grad_norm": 0.5259379715010543, "learning_rate": 0.0002985252447583563, "loss": 3.211610794067383, "step": 3154, "token_acc": 0.275925784971693 }, { "epoch": 1.849604221635884, "grad_norm": 0.4809338588034194, "learning_rate": 0.00029852321045037883, "loss": 3.217149257659912, "step": 3155, "token_acc": 0.2742873101907402 }, { "epoch": 1.8501905599530928, "grad_norm": 0.4108569049849537, "learning_rate": 0.0002985211747472259, "loss": 3.201446056365967, "step": 3156, "token_acc": 0.27515519732694815 }, { "epoch": 1.850776898270302, "grad_norm": 0.37086706909781375, "learning_rate": 0.00029851913764891675, "loss": 3.210491418838501, "step": 3157, "token_acc": 0.27733549158618614 }, { "epoch": 1.851363236587511, "grad_norm": 0.41262517789485464, "learning_rate": 0.00029851709915547044, "loss": 3.17791748046875, "step": 3158, "token_acc": 0.2787053851222138 }, { "epoch": 1.8519495749047201, "grad_norm": 0.45673646088590075, "learning_rate": 0.00029851505926690606, "loss": 3.2235374450683594, "step": 3159, "token_acc": 0.2746550156998843 }, { "epoch": 1.852535913221929, "grad_norm": 0.35702179950292057, "learning_rate": 0.00029851301798324286, "loss": 3.1815433502197266, "step": 3160, "token_acc": 0.28194149199810553 }, { "epoch": 1.8531222515391381, "grad_norm": 0.39987948080732244, "learning_rate": 0.0002985109753044999, "loss": 3.229124069213867, "step": 3161, "token_acc": 0.2747942843988072 }, { "epoch": 1.853708589856347, "grad_norm": 0.39850377029089884, "learning_rate": 0.00029850893123069657, "loss": 3.171560287475586, "step": 3162, "token_acc": 0.281475852393504 }, { "epoch": 1.8542949281735561, "grad_norm": 0.4218096560658906, "learning_rate": 0.00029850688576185186, "loss": 3.2326807975769043, "step": 3163, "token_acc": 0.27570190989239185 }, { "epoch": 1.8548812664907652, "grad_norm": 0.4021593184716227, "learning_rate": 0.0002985048388979851, "loss": 3.254361391067505, "step": 3164, "token_acc": 0.27140013446353006 }, { "epoch": 1.8554676048079743, "grad_norm": 0.38612231061113916, "learning_rate": 0.0002985027906391155, "loss": 3.2154479026794434, "step": 3165, "token_acc": 0.2759643840285919 }, { "epoch": 1.8560539431251832, "grad_norm": 0.35717847703912836, "learning_rate": 0.0002985007409852623, "loss": 3.171966552734375, "step": 3166, "token_acc": 0.2812637467421016 }, { "epoch": 1.8566402814423921, "grad_norm": 0.3769056647241825, "learning_rate": 0.0002984986899364447, "loss": 3.1887423992156982, "step": 3167, "token_acc": 0.2794621554888916 }, { "epoch": 1.8572266197596012, "grad_norm": 0.3648182072034886, "learning_rate": 0.00029849663749268205, "loss": 3.178748607635498, "step": 3168, "token_acc": 0.281371768099718 }, { "epoch": 1.8578129580768104, "grad_norm": 0.3542305046949214, "learning_rate": 0.0002984945836539936, "loss": 3.182419538497925, "step": 3169, "token_acc": 0.2802276250305948 }, { "epoch": 1.8583992963940195, "grad_norm": 0.3378534815473059, "learning_rate": 0.0002984925284203986, "loss": 3.200014114379883, "step": 3170, "token_acc": 0.2782660668449055 }, { "epoch": 1.8589856347112284, "grad_norm": 0.371313051338446, "learning_rate": 0.0002984904717919164, "loss": 3.1628899574279785, "step": 3171, "token_acc": 0.28373754104088483 }, { "epoch": 1.8595719730284375, "grad_norm": 0.39976028829886423, "learning_rate": 0.00029848841376856636, "loss": 3.2044270038604736, "step": 3172, "token_acc": 0.2770537030870951 }, { "epoch": 1.8601583113456464, "grad_norm": 0.4250598892164116, "learning_rate": 0.0002984863543503677, "loss": 3.2030868530273438, "step": 3173, "token_acc": 0.27621551148468887 }, { "epoch": 1.8607446496628555, "grad_norm": 0.40323851635304736, "learning_rate": 0.00029848429353733984, "loss": 3.202575922012329, "step": 3174, "token_acc": 0.2771224144797048 }, { "epoch": 1.8613309879800646, "grad_norm": 0.377348552669163, "learning_rate": 0.0002984822313295022, "loss": 3.2006585597991943, "step": 3175, "token_acc": 0.27729184122534317 }, { "epoch": 1.8619173262972737, "grad_norm": 0.4181877463261301, "learning_rate": 0.000298480167726874, "loss": 3.151139497756958, "step": 3176, "token_acc": 0.2858931753600154 }, { "epoch": 1.8625036646144826, "grad_norm": 0.43541420658597085, "learning_rate": 0.00029847810272947475, "loss": 3.2313599586486816, "step": 3177, "token_acc": 0.27430587870327283 }, { "epoch": 1.8630900029316915, "grad_norm": 0.4079072090005843, "learning_rate": 0.0002984760363373237, "loss": 3.202117919921875, "step": 3178, "token_acc": 0.2774894244145681 }, { "epoch": 1.8636763412489006, "grad_norm": 0.47212875656883946, "learning_rate": 0.0002984739685504405, "loss": 3.239431142807007, "step": 3179, "token_acc": 0.27287224479054023 }, { "epoch": 1.8642626795661097, "grad_norm": 0.4514300879350858, "learning_rate": 0.00029847189936884434, "loss": 3.1829934120178223, "step": 3180, "token_acc": 0.2795262232811818 }, { "epoch": 1.8648490178833188, "grad_norm": 0.522484620590958, "learning_rate": 0.0002984698287925548, "loss": 3.2194814682006836, "step": 3181, "token_acc": 0.27591910993666374 }, { "epoch": 1.8654353562005277, "grad_norm": 0.45634783663542133, "learning_rate": 0.0002984677568215913, "loss": 3.2230944633483887, "step": 3182, "token_acc": 0.27489761868648566 }, { "epoch": 1.8660216945177366, "grad_norm": 0.4655294040153684, "learning_rate": 0.0002984656834559733, "loss": 3.2047390937805176, "step": 3183, "token_acc": 0.27477459234246776 }, { "epoch": 1.8666080328349457, "grad_norm": 0.4788033130084151, "learning_rate": 0.0002984636086957202, "loss": 3.1858067512512207, "step": 3184, "token_acc": 0.2798367928805218 }, { "epoch": 1.8671943711521548, "grad_norm": 0.44564861390293875, "learning_rate": 0.00029846153254085156, "loss": 3.1930346488952637, "step": 3185, "token_acc": 0.27878234463426804 }, { "epoch": 1.867780709469364, "grad_norm": 0.4176694295595294, "learning_rate": 0.0002984594549913869, "loss": 3.233114719390869, "step": 3186, "token_acc": 0.27319412298760914 }, { "epoch": 1.8683670477865728, "grad_norm": 0.43749726833104186, "learning_rate": 0.00029845737604734573, "loss": 3.225269317626953, "step": 3187, "token_acc": 0.2757619886814299 }, { "epoch": 1.868953386103782, "grad_norm": 0.5181997279425303, "learning_rate": 0.0002984552957087475, "loss": 3.172684907913208, "step": 3188, "token_acc": 0.28107840206428875 }, { "epoch": 1.8695397244209908, "grad_norm": 0.4033264073286499, "learning_rate": 0.00029845321397561187, "loss": 3.1759142875671387, "step": 3189, "token_acc": 0.2808328365342949 }, { "epoch": 1.8701260627382, "grad_norm": 0.4398765656377838, "learning_rate": 0.0002984511308479583, "loss": 3.190573215484619, "step": 3190, "token_acc": 0.2777804347005298 }, { "epoch": 1.870712401055409, "grad_norm": 0.4102816450588917, "learning_rate": 0.0002984490463258064, "loss": 3.199730396270752, "step": 3191, "token_acc": 0.27973053844646517 }, { "epoch": 1.8712987393726181, "grad_norm": 0.4130506687215074, "learning_rate": 0.00029844696040917575, "loss": 3.176208972930908, "step": 3192, "token_acc": 0.2803861599425547 }, { "epoch": 1.871885077689827, "grad_norm": 0.4296636872450154, "learning_rate": 0.000298444873098086, "loss": 3.2095255851745605, "step": 3193, "token_acc": 0.27644879583988996 }, { "epoch": 1.872471416007036, "grad_norm": 0.3965659237224688, "learning_rate": 0.00029844278439255666, "loss": 3.184441089630127, "step": 3194, "token_acc": 0.2814311673730448 }, { "epoch": 1.873057754324245, "grad_norm": 0.3580081571413274, "learning_rate": 0.00029844069429260737, "loss": 3.1753878593444824, "step": 3195, "token_acc": 0.2802337458105739 }, { "epoch": 1.8736440926414542, "grad_norm": 0.3706597560374224, "learning_rate": 0.00029843860279825775, "loss": 3.1752281188964844, "step": 3196, "token_acc": 0.27982441618640647 }, { "epoch": 1.8742304309586633, "grad_norm": 0.39021112388741136, "learning_rate": 0.0002984365099095276, "loss": 3.203425645828247, "step": 3197, "token_acc": 0.27845959324222536 }, { "epoch": 1.8748167692758722, "grad_norm": 0.41017745983710135, "learning_rate": 0.00029843441562643635, "loss": 3.2024641036987305, "step": 3198, "token_acc": 0.27575206846115224 }, { "epoch": 1.875403107593081, "grad_norm": 0.4012340903681659, "learning_rate": 0.0002984323199490039, "loss": 3.1705002784729004, "step": 3199, "token_acc": 0.2818638900985787 }, { "epoch": 1.8759894459102902, "grad_norm": 0.41982304482250743, "learning_rate": 0.00029843022287724967, "loss": 3.1912131309509277, "step": 3200, "token_acc": 0.2806412206643096 }, { "epoch": 1.8765757842274993, "grad_norm": 0.45467515897719585, "learning_rate": 0.0002984281244111936, "loss": 3.1630783081054688, "step": 3201, "token_acc": 0.28236466978261715 }, { "epoch": 1.8771621225447084, "grad_norm": 0.4035403465148831, "learning_rate": 0.0002984260245508553, "loss": 3.13374662399292, "step": 3202, "token_acc": 0.286162007090855 }, { "epoch": 1.8777484608619175, "grad_norm": 0.38405944037625445, "learning_rate": 0.0002984239232962545, "loss": 3.209691286087036, "step": 3203, "token_acc": 0.2763281266099285 }, { "epoch": 1.8783347991791264, "grad_norm": 0.3477970900455854, "learning_rate": 0.0002984218206474109, "loss": 3.2011451721191406, "step": 3204, "token_acc": 0.2797791485447703 }, { "epoch": 1.8789211374963353, "grad_norm": 0.3872956566422683, "learning_rate": 0.00029841971660434435, "loss": 3.20654296875, "step": 3205, "token_acc": 0.2753751594941813 }, { "epoch": 1.8795074758135444, "grad_norm": 0.34121031345541986, "learning_rate": 0.00029841761116707456, "loss": 3.226215362548828, "step": 3206, "token_acc": 0.2751305457009607 }, { "epoch": 1.8800938141307535, "grad_norm": 0.4070360101527037, "learning_rate": 0.00029841550433562123, "loss": 3.1702065467834473, "step": 3207, "token_acc": 0.2804094070746127 }, { "epoch": 1.8806801524479626, "grad_norm": 0.40992383035305885, "learning_rate": 0.0002984133961100043, "loss": 3.194882392883301, "step": 3208, "token_acc": 0.2787433295516618 }, { "epoch": 1.8812664907651715, "grad_norm": 0.40138179079917286, "learning_rate": 0.00029841128649024353, "loss": 3.190107822418213, "step": 3209, "token_acc": 0.27804613525743455 }, { "epoch": 1.8818528290823804, "grad_norm": 0.459921014553069, "learning_rate": 0.00029840917547635867, "loss": 3.213904619216919, "step": 3210, "token_acc": 0.27567705321886293 }, { "epoch": 1.8824391673995895, "grad_norm": 0.3573585452830387, "learning_rate": 0.0002984070630683696, "loss": 3.195622444152832, "step": 3211, "token_acc": 0.27764944418665644 }, { "epoch": 1.8830255057167986, "grad_norm": 0.42240784141541315, "learning_rate": 0.00029840494926629615, "loss": 3.2173094749450684, "step": 3212, "token_acc": 0.27477803852018473 }, { "epoch": 1.8836118440340077, "grad_norm": 0.4443676416793418, "learning_rate": 0.0002984028340701582, "loss": 3.215662717819214, "step": 3213, "token_acc": 0.2753571484782825 }, { "epoch": 1.8841981823512166, "grad_norm": 0.4263192329542528, "learning_rate": 0.0002984007174799756, "loss": 3.1895689964294434, "step": 3214, "token_acc": 0.27935678033104644 }, { "epoch": 1.8847845206684257, "grad_norm": 0.4117901506083151, "learning_rate": 0.00029839859949576814, "loss": 3.2455568313598633, "step": 3215, "token_acc": 0.2738851312925044 }, { "epoch": 1.8853708589856346, "grad_norm": 0.4236778417235489, "learning_rate": 0.0002983964801175559, "loss": 3.192448377609253, "step": 3216, "token_acc": 0.2767620898803224 }, { "epoch": 1.8859571973028437, "grad_norm": 0.4432769365229791, "learning_rate": 0.0002983943593453587, "loss": 3.2194581031799316, "step": 3217, "token_acc": 0.2751429991578438 }, { "epoch": 1.8865435356200528, "grad_norm": 0.37060157269203853, "learning_rate": 0.0002983922371791964, "loss": 3.1897478103637695, "step": 3218, "token_acc": 0.28095427372588455 }, { "epoch": 1.887129873937262, "grad_norm": 0.3764482518877029, "learning_rate": 0.000298390113619089, "loss": 3.2313661575317383, "step": 3219, "token_acc": 0.2750642805850756 }, { "epoch": 1.8877162122544708, "grad_norm": 0.45964119030685785, "learning_rate": 0.0002983879886650565, "loss": 3.2060577869415283, "step": 3220, "token_acc": 0.2760493527849222 }, { "epoch": 1.8883025505716797, "grad_norm": 0.43845234300249014, "learning_rate": 0.0002983858623171188, "loss": 3.2169058322906494, "step": 3221, "token_acc": 0.27592220878142154 }, { "epoch": 1.8888888888888888, "grad_norm": 0.3839137522523228, "learning_rate": 0.0002983837345752958, "loss": 3.2282400131225586, "step": 3222, "token_acc": 0.27429584581652205 }, { "epoch": 1.889475227206098, "grad_norm": 0.41613139085523676, "learning_rate": 0.0002983816054396076, "loss": 3.2257776260375977, "step": 3223, "token_acc": 0.27400575193698623 }, { "epoch": 1.890061565523307, "grad_norm": 0.4291768558383752, "learning_rate": 0.0002983794749100742, "loss": 3.201963424682617, "step": 3224, "token_acc": 0.27672577192721737 }, { "epoch": 1.890647903840516, "grad_norm": 0.418901959353053, "learning_rate": 0.0002983773429867156, "loss": 3.200531482696533, "step": 3225, "token_acc": 0.2782756387042808 }, { "epoch": 1.8912342421577248, "grad_norm": 0.4224010659989229, "learning_rate": 0.0002983752096695517, "loss": 3.241856336593628, "step": 3226, "token_acc": 0.2717264969464637 }, { "epoch": 1.891820580474934, "grad_norm": 0.5014097727150821, "learning_rate": 0.0002983730749586027, "loss": 3.242023229598999, "step": 3227, "token_acc": 0.2722166714058754 }, { "epoch": 1.892406918792143, "grad_norm": 0.3852111713370862, "learning_rate": 0.00029837093885388857, "loss": 3.174863815307617, "step": 3228, "token_acc": 0.28121074401266405 }, { "epoch": 1.8929932571093522, "grad_norm": 0.41313256438738555, "learning_rate": 0.0002983688013554294, "loss": 3.169738292694092, "step": 3229, "token_acc": 0.2818856958346787 }, { "epoch": 1.8935795954265613, "grad_norm": 0.40940744932653717, "learning_rate": 0.00029836666246324533, "loss": 3.1308212280273438, "step": 3230, "token_acc": 0.2847391165172855 }, { "epoch": 1.8941659337437702, "grad_norm": 0.4274488445251595, "learning_rate": 0.00029836452217735633, "loss": 3.199251651763916, "step": 3231, "token_acc": 0.2780745544152852 }, { "epoch": 1.894752272060979, "grad_norm": 0.3986837271736455, "learning_rate": 0.0002983623804977826, "loss": 3.236386775970459, "step": 3232, "token_acc": 0.2736496271837268 }, { "epoch": 1.8953386103781882, "grad_norm": 0.4123211530595274, "learning_rate": 0.00029836023742454423, "loss": 3.230705738067627, "step": 3233, "token_acc": 0.27349479567033014 }, { "epoch": 1.8959249486953973, "grad_norm": 0.44933503811263664, "learning_rate": 0.0002983580929576613, "loss": 3.2067837715148926, "step": 3234, "token_acc": 0.2774796978006722 }, { "epoch": 1.8965112870126064, "grad_norm": 0.37673431041986927, "learning_rate": 0.0002983559470971541, "loss": 3.1614632606506348, "step": 3235, "token_acc": 0.2838031051042028 }, { "epoch": 1.8970976253298153, "grad_norm": 0.380946580472696, "learning_rate": 0.00029835379984304255, "loss": 3.2016046047210693, "step": 3236, "token_acc": 0.27655908772508137 }, { "epoch": 1.8976839636470242, "grad_norm": 0.352380810400323, "learning_rate": 0.0002983516511953471, "loss": 3.205845355987549, "step": 3237, "token_acc": 0.27607971904330697 }, { "epoch": 1.8982703019642333, "grad_norm": 0.41320997110120156, "learning_rate": 0.00029834950115408774, "loss": 3.1926727294921875, "step": 3238, "token_acc": 0.27720375429390604 }, { "epoch": 1.8988566402814424, "grad_norm": 0.42468964170473467, "learning_rate": 0.00029834734971928464, "loss": 3.156770706176758, "step": 3239, "token_acc": 0.2836733639791333 }, { "epoch": 1.8994429785986515, "grad_norm": 0.45615514219722897, "learning_rate": 0.00029834519689095817, "loss": 3.1967973709106445, "step": 3240, "token_acc": 0.27857696525370784 }, { "epoch": 1.9000293169158604, "grad_norm": 0.4887867250369316, "learning_rate": 0.0002983430426691285, "loss": 3.176856517791748, "step": 3241, "token_acc": 0.2808887638122647 }, { "epoch": 1.9006156552330695, "grad_norm": 0.4165573955246559, "learning_rate": 0.00029834088705381584, "loss": 3.18300724029541, "step": 3242, "token_acc": 0.2794832604564001 }, { "epoch": 1.9012019935502784, "grad_norm": 0.4015630854465249, "learning_rate": 0.00029833873004504036, "loss": 3.20864200592041, "step": 3243, "token_acc": 0.2760727373444691 }, { "epoch": 1.9017883318674875, "grad_norm": 0.3792470085882102, "learning_rate": 0.00029833657164282244, "loss": 3.2033562660217285, "step": 3244, "token_acc": 0.27489906760826394 }, { "epoch": 1.9023746701846966, "grad_norm": 0.43503849658118826, "learning_rate": 0.0002983344118471823, "loss": 3.229607582092285, "step": 3245, "token_acc": 0.27428279803323197 }, { "epoch": 1.9029610085019057, "grad_norm": 0.3846147936325712, "learning_rate": 0.0002983322506581403, "loss": 3.168962001800537, "step": 3246, "token_acc": 0.2839145118568745 }, { "epoch": 1.9035473468191146, "grad_norm": 0.3613047043056451, "learning_rate": 0.0002983300880757166, "loss": 3.160580635070801, "step": 3247, "token_acc": 0.2820882833017561 }, { "epoch": 1.9041336851363235, "grad_norm": 0.3745111184181826, "learning_rate": 0.00029832792409993165, "loss": 3.190911293029785, "step": 3248, "token_acc": 0.27724068155904225 }, { "epoch": 1.9047200234535326, "grad_norm": 0.34041692139170465, "learning_rate": 0.0002983257587308057, "loss": 3.220590829849243, "step": 3249, "token_acc": 0.27406374102089615 }, { "epoch": 1.9053063617707418, "grad_norm": 0.39170381131087156, "learning_rate": 0.0002983235919683592, "loss": 3.2262701988220215, "step": 3250, "token_acc": 0.27329793741788777 }, { "epoch": 1.9058927000879509, "grad_norm": 0.5122507035629825, "learning_rate": 0.00029832142381261233, "loss": 3.1575300693511963, "step": 3251, "token_acc": 0.2835954877661265 }, { "epoch": 1.9064790384051598, "grad_norm": 0.365203698763899, "learning_rate": 0.0002983192542635856, "loss": 3.2405402660369873, "step": 3252, "token_acc": 0.2734824960212339 }, { "epoch": 1.9070653767223686, "grad_norm": 0.43648747033508006, "learning_rate": 0.00029831708332129933, "loss": 3.2181406021118164, "step": 3253, "token_acc": 0.2738248025369914 }, { "epoch": 1.9076517150395778, "grad_norm": 0.43688086757076067, "learning_rate": 0.0002983149109857739, "loss": 3.197798490524292, "step": 3254, "token_acc": 0.2774641029359965 }, { "epoch": 1.9082380533567869, "grad_norm": 0.4340239944059772, "learning_rate": 0.00029831273725702974, "loss": 3.1855835914611816, "step": 3255, "token_acc": 0.277578808280375 }, { "epoch": 1.908824391673996, "grad_norm": 0.3912222676932408, "learning_rate": 0.0002983105621350873, "loss": 3.2201390266418457, "step": 3256, "token_acc": 0.27630883410157736 }, { "epoch": 1.909410729991205, "grad_norm": 0.3854329019934525, "learning_rate": 0.00029830838561996695, "loss": 3.1903791427612305, "step": 3257, "token_acc": 0.2782542537003735 }, { "epoch": 1.909997068308414, "grad_norm": 0.3862478016982444, "learning_rate": 0.0002983062077116892, "loss": 3.1459033489227295, "step": 3258, "token_acc": 0.2854167289813654 }, { "epoch": 1.9105834066256229, "grad_norm": 0.41511038361437846, "learning_rate": 0.0002983040284102744, "loss": 3.1794991493225098, "step": 3259, "token_acc": 0.27992423165679936 }, { "epoch": 1.911169744942832, "grad_norm": 0.381718857994355, "learning_rate": 0.00029830184771574314, "loss": 3.1398983001708984, "step": 3260, "token_acc": 0.2852127377429291 }, { "epoch": 1.911756083260041, "grad_norm": 0.47046937749274026, "learning_rate": 0.00029829966562811586, "loss": 3.1875643730163574, "step": 3261, "token_acc": 0.27873890776606364 }, { "epoch": 1.9123424215772502, "grad_norm": 0.4506070821078623, "learning_rate": 0.0002982974821474131, "loss": 3.2244222164154053, "step": 3262, "token_acc": 0.2755318262752897 }, { "epoch": 1.912928759894459, "grad_norm": 0.40635206044609135, "learning_rate": 0.00029829529727365525, "loss": 3.1653754711151123, "step": 3263, "token_acc": 0.28184894405285754 }, { "epoch": 1.913515098211668, "grad_norm": 0.4785599601434365, "learning_rate": 0.000298293111006863, "loss": 3.1700034141540527, "step": 3264, "token_acc": 0.28323772013969234 }, { "epoch": 1.914101436528877, "grad_norm": 0.401628713067513, "learning_rate": 0.0002982909233470567, "loss": 3.1526448726654053, "step": 3265, "token_acc": 0.2845154719336768 }, { "epoch": 1.9146877748460862, "grad_norm": 0.430457793619935, "learning_rate": 0.0002982887342942571, "loss": 3.205681800842285, "step": 3266, "token_acc": 0.2762036042984751 }, { "epoch": 1.9152741131632953, "grad_norm": 0.45014472090374535, "learning_rate": 0.00029828654384848457, "loss": 3.204684257507324, "step": 3267, "token_acc": 0.2771506610911025 }, { "epoch": 1.9158604514805042, "grad_norm": 0.414036650147306, "learning_rate": 0.00029828435200975983, "loss": 3.194882392883301, "step": 3268, "token_acc": 0.27805809137455273 }, { "epoch": 1.9164467897977133, "grad_norm": 0.49028250393990896, "learning_rate": 0.00029828215877810343, "loss": 3.1921753883361816, "step": 3269, "token_acc": 0.27922428393292514 }, { "epoch": 1.9170331281149222, "grad_norm": 0.42596667605325345, "learning_rate": 0.0002982799641535359, "loss": 3.180551052093506, "step": 3270, "token_acc": 0.2797045888733604 }, { "epoch": 1.9176194664321313, "grad_norm": 0.43981858528141105, "learning_rate": 0.00029827776813607797, "loss": 3.2114505767822266, "step": 3271, "token_acc": 0.27596880255327566 }, { "epoch": 1.9182058047493404, "grad_norm": 0.42509479073889406, "learning_rate": 0.0002982755707257502, "loss": 3.183439254760742, "step": 3272, "token_acc": 0.2810664509209556 }, { "epoch": 1.9187921430665495, "grad_norm": 0.37565389269065763, "learning_rate": 0.00029827337192257325, "loss": 3.1612391471862793, "step": 3273, "token_acc": 0.28248648906282686 }, { "epoch": 1.9193784813837584, "grad_norm": 0.3929822899700079, "learning_rate": 0.00029827117172656777, "loss": 3.150998592376709, "step": 3274, "token_acc": 0.2846092262709618 }, { "epoch": 1.9199648197009673, "grad_norm": 0.41602259410780307, "learning_rate": 0.0002982689701377544, "loss": 3.157850980758667, "step": 3275, "token_acc": 0.2827557158464191 }, { "epoch": 1.9205511580181764, "grad_norm": 0.4254472187762964, "learning_rate": 0.0002982667671561539, "loss": 3.2182846069335938, "step": 3276, "token_acc": 0.27647127495362744 }, { "epoch": 1.9211374963353856, "grad_norm": 0.41361663878700644, "learning_rate": 0.0002982645627817869, "loss": 3.1519813537597656, "step": 3277, "token_acc": 0.2839578130731237 }, { "epoch": 1.9217238346525947, "grad_norm": 0.5189396805664954, "learning_rate": 0.00029826235701467416, "loss": 3.195946455001831, "step": 3278, "token_acc": 0.2759253848633836 }, { "epoch": 1.9223101729698036, "grad_norm": 0.38126211327871623, "learning_rate": 0.0002982601498548363, "loss": 3.2020576000213623, "step": 3279, "token_acc": 0.2766937999857728 }, { "epoch": 1.9228965112870124, "grad_norm": 0.40458316481430173, "learning_rate": 0.0002982579413022941, "loss": 3.201653480529785, "step": 3280, "token_acc": 0.2793728290017085 }, { "epoch": 1.9234828496042216, "grad_norm": 0.378524823196167, "learning_rate": 0.00029825573135706837, "loss": 3.1889257431030273, "step": 3281, "token_acc": 0.2774900176456413 }, { "epoch": 1.9240691879214307, "grad_norm": 0.4303950731629008, "learning_rate": 0.00029825352001917985, "loss": 3.214787006378174, "step": 3282, "token_acc": 0.2771549985207498 }, { "epoch": 1.9246555262386398, "grad_norm": 0.39262207122526765, "learning_rate": 0.00029825130728864925, "loss": 3.172614097595215, "step": 3283, "token_acc": 0.2796875248933899 }, { "epoch": 1.9252418645558487, "grad_norm": 0.4732405221061119, "learning_rate": 0.00029824909316549734, "loss": 3.2121779918670654, "step": 3284, "token_acc": 0.2748695713943693 }, { "epoch": 1.9258282028730578, "grad_norm": 0.4579406080508642, "learning_rate": 0.00029824687764974504, "loss": 3.209717273712158, "step": 3285, "token_acc": 0.27702217708704413 }, { "epoch": 1.9264145411902667, "grad_norm": 0.42690220719481053, "learning_rate": 0.00029824466074141305, "loss": 3.192824363708496, "step": 3286, "token_acc": 0.2788978995939854 }, { "epoch": 1.9270008795074758, "grad_norm": 0.4105712300770551, "learning_rate": 0.0002982424424405222, "loss": 3.1622745990753174, "step": 3287, "token_acc": 0.28333783971796156 }, { "epoch": 1.927587217824685, "grad_norm": 0.33173920117456757, "learning_rate": 0.0002982402227470934, "loss": 3.164241313934326, "step": 3288, "token_acc": 0.2813170897063204 }, { "epoch": 1.928173556141894, "grad_norm": 0.36749657710773265, "learning_rate": 0.0002982380016611475, "loss": 3.133445978164673, "step": 3289, "token_acc": 0.28680531841661655 }, { "epoch": 1.928759894459103, "grad_norm": 0.4050672066283259, "learning_rate": 0.0002982357791827053, "loss": 3.232879638671875, "step": 3290, "token_acc": 0.27073077705451587 }, { "epoch": 1.9293462327763118, "grad_norm": 0.35628364754238656, "learning_rate": 0.00029823355531178767, "loss": 3.2116737365722656, "step": 3291, "token_acc": 0.2766563589369053 }, { "epoch": 1.929932571093521, "grad_norm": 0.3604384097230929, "learning_rate": 0.00029823133004841556, "loss": 3.2077221870422363, "step": 3292, "token_acc": 0.2761558487958786 }, { "epoch": 1.93051890941073, "grad_norm": 0.39439111226140755, "learning_rate": 0.00029822910339260986, "loss": 3.2032601833343506, "step": 3293, "token_acc": 0.27645328948820375 }, { "epoch": 1.9311052477279391, "grad_norm": 0.3750406128769394, "learning_rate": 0.0002982268753443915, "loss": 3.2258524894714355, "step": 3294, "token_acc": 0.2716833026456052 }, { "epoch": 1.931691586045148, "grad_norm": 0.412236860923718, "learning_rate": 0.00029822464590378134, "loss": 3.203927516937256, "step": 3295, "token_acc": 0.27702681059611134 }, { "epoch": 1.9322779243623571, "grad_norm": 0.49576705318650954, "learning_rate": 0.0002982224150708004, "loss": 3.2049429416656494, "step": 3296, "token_acc": 0.2765579618230108 }, { "epoch": 1.932864262679566, "grad_norm": 0.43590032710430804, "learning_rate": 0.00029822018284546953, "loss": 3.1903162002563477, "step": 3297, "token_acc": 0.2808540628854255 }, { "epoch": 1.9334506009967751, "grad_norm": 0.3856189410273088, "learning_rate": 0.00029821794922780983, "loss": 3.1884617805480957, "step": 3298, "token_acc": 0.27852309027823724 }, { "epoch": 1.9340369393139842, "grad_norm": 0.41304800711767214, "learning_rate": 0.00029821571421784226, "loss": 3.172560214996338, "step": 3299, "token_acc": 0.2810791453440056 }, { "epoch": 1.9346232776311933, "grad_norm": 0.4501092591489766, "learning_rate": 0.00029821347781558774, "loss": 3.182562828063965, "step": 3300, "token_acc": 0.280446725178703 }, { "epoch": 1.9352096159484022, "grad_norm": 0.3578776335923242, "learning_rate": 0.00029821124002106725, "loss": 3.213266134262085, "step": 3301, "token_acc": 0.27627538666034684 }, { "epoch": 1.9357959542656111, "grad_norm": 0.3337353788765908, "learning_rate": 0.000298209000834302, "loss": 3.1792633533477783, "step": 3302, "token_acc": 0.28133187481657373 }, { "epoch": 1.9363822925828202, "grad_norm": 0.34260121730734133, "learning_rate": 0.00029820676025531283, "loss": 3.2000203132629395, "step": 3303, "token_acc": 0.276902518307977 }, { "epoch": 1.9369686309000294, "grad_norm": 0.3356792226973069, "learning_rate": 0.00029820451828412085, "loss": 3.188465118408203, "step": 3304, "token_acc": 0.28093685957892356 }, { "epoch": 1.9375549692172385, "grad_norm": 0.33502387990846544, "learning_rate": 0.00029820227492074715, "loss": 3.1891541481018066, "step": 3305, "token_acc": 0.2787599008177714 }, { "epoch": 1.9381413075344474, "grad_norm": 0.3449171200822794, "learning_rate": 0.00029820003016521276, "loss": 3.185636043548584, "step": 3306, "token_acc": 0.2802411221333775 }, { "epoch": 1.9387276458516562, "grad_norm": 0.4029365558771742, "learning_rate": 0.00029819778401753887, "loss": 3.1883487701416016, "step": 3307, "token_acc": 0.2789084969547925 }, { "epoch": 1.9393139841688654, "grad_norm": 0.43275664429039784, "learning_rate": 0.0002981955364777464, "loss": 3.215132236480713, "step": 3308, "token_acc": 0.2744035546747999 }, { "epoch": 1.9399003224860745, "grad_norm": 0.4037274148022834, "learning_rate": 0.0002981932875458566, "loss": 3.2274036407470703, "step": 3309, "token_acc": 0.2732151339350046 }, { "epoch": 1.9404866608032836, "grad_norm": 0.381948067075118, "learning_rate": 0.00029819103722189056, "loss": 3.1984851360321045, "step": 3310, "token_acc": 0.2773605794588723 }, { "epoch": 1.9410729991204925, "grad_norm": 0.3524375031515061, "learning_rate": 0.0002981887855058694, "loss": 3.1581273078918457, "step": 3311, "token_acc": 0.28273383203352964 }, { "epoch": 1.9416593374377016, "grad_norm": 0.4071483241788557, "learning_rate": 0.0002981865323978143, "loss": 3.1918249130249023, "step": 3312, "token_acc": 0.27839754289911195 }, { "epoch": 1.9422456757549105, "grad_norm": 0.47921824901010524, "learning_rate": 0.00029818427789774643, "loss": 3.2381887435913086, "step": 3313, "token_acc": 0.2725154293709497 }, { "epoch": 1.9428320140721196, "grad_norm": 0.48081525741191644, "learning_rate": 0.0002981820220056869, "loss": 3.2318496704101562, "step": 3314, "token_acc": 0.2724779054071263 }, { "epoch": 1.9434183523893287, "grad_norm": 0.42311287483682924, "learning_rate": 0.00029817976472165696, "loss": 3.214770555496216, "step": 3315, "token_acc": 0.275542615841495 }, { "epoch": 1.9440046907065378, "grad_norm": 0.4478272748059361, "learning_rate": 0.00029817750604567786, "loss": 3.183302402496338, "step": 3316, "token_acc": 0.278842094018181 }, { "epoch": 1.9445910290237467, "grad_norm": 0.4786799140179171, "learning_rate": 0.0002981752459777707, "loss": 3.2055699825286865, "step": 3317, "token_acc": 0.2741945618196384 }, { "epoch": 1.9451773673409556, "grad_norm": 0.4286479259847441, "learning_rate": 0.00029817298451795683, "loss": 3.2112278938293457, "step": 3318, "token_acc": 0.27629442897303574 }, { "epoch": 1.9457637056581647, "grad_norm": 0.5145411571997006, "learning_rate": 0.0002981707216662574, "loss": 3.1898062229156494, "step": 3319, "token_acc": 0.28030057494900085 }, { "epoch": 1.9463500439753738, "grad_norm": 0.4235195803927868, "learning_rate": 0.0002981684574226937, "loss": 3.198345422744751, "step": 3320, "token_acc": 0.27767898798970925 }, { "epoch": 1.946936382292583, "grad_norm": 0.3775682745066809, "learning_rate": 0.000298166191787287, "loss": 3.1314826011657715, "step": 3321, "token_acc": 0.2866647184882135 }, { "epoch": 1.9475227206097918, "grad_norm": 0.36721432278091465, "learning_rate": 0.00029816392476005857, "loss": 3.194535493850708, "step": 3322, "token_acc": 0.2770400516899781 }, { "epoch": 1.948109058927001, "grad_norm": 0.38883516635987747, "learning_rate": 0.0002981616563410298, "loss": 3.1548545360565186, "step": 3323, "token_acc": 0.2839111224947173 }, { "epoch": 1.9486953972442098, "grad_norm": 0.37379266958737695, "learning_rate": 0.0002981593865302218, "loss": 3.2424850463867188, "step": 3324, "token_acc": 0.2710454380263664 }, { "epoch": 1.949281735561419, "grad_norm": 0.39466643921567207, "learning_rate": 0.00029815711532765613, "loss": 3.14503812789917, "step": 3325, "token_acc": 0.2857516894685202 }, { "epoch": 1.949868073878628, "grad_norm": 0.4410454586777076, "learning_rate": 0.00029815484273335393, "loss": 3.2125816345214844, "step": 3326, "token_acc": 0.2750875752525975 }, { "epoch": 1.9504544121958372, "grad_norm": 0.3818890148236516, "learning_rate": 0.0002981525687473366, "loss": 3.1598093509674072, "step": 3327, "token_acc": 0.28131851296166177 }, { "epoch": 1.951040750513046, "grad_norm": 0.37253793323580653, "learning_rate": 0.0002981502933696256, "loss": 3.196945905685425, "step": 3328, "token_acc": 0.27836811183144244 }, { "epoch": 1.951627088830255, "grad_norm": 0.4533250080495823, "learning_rate": 0.0002981480166002422, "loss": 3.200519561767578, "step": 3329, "token_acc": 0.27938840140563515 }, { "epoch": 1.952213427147464, "grad_norm": 0.41884550768654216, "learning_rate": 0.00029814573843920777, "loss": 3.2075729370117188, "step": 3330, "token_acc": 0.2754437620473002 }, { "epoch": 1.9527997654646732, "grad_norm": 0.4321080278107562, "learning_rate": 0.00029814345888654384, "loss": 3.2020134925842285, "step": 3331, "token_acc": 0.2760553389562849 }, { "epoch": 1.9533861037818823, "grad_norm": 0.44580260643052616, "learning_rate": 0.0002981411779422717, "loss": 3.144413709640503, "step": 3332, "token_acc": 0.28522076418474246 }, { "epoch": 1.9539724420990912, "grad_norm": 0.37774152281232387, "learning_rate": 0.0002981388956064128, "loss": 3.1753480434417725, "step": 3333, "token_acc": 0.28009258042990526 }, { "epoch": 1.9545587804163, "grad_norm": 0.4390593345767438, "learning_rate": 0.0002981366118789886, "loss": 3.1932919025421143, "step": 3334, "token_acc": 0.27945905074199767 }, { "epoch": 1.9551451187335092, "grad_norm": 0.44904778864841616, "learning_rate": 0.0002981343267600206, "loss": 3.2144229412078857, "step": 3335, "token_acc": 0.27534977680416634 }, { "epoch": 1.9557314570507183, "grad_norm": 0.38132758654801113, "learning_rate": 0.00029813204024953016, "loss": 3.2166664600372314, "step": 3336, "token_acc": 0.27556653145833065 }, { "epoch": 1.9563177953679274, "grad_norm": 0.3791002976992549, "learning_rate": 0.0002981297523475388, "loss": 3.217221736907959, "step": 3337, "token_acc": 0.27372611964489246 }, { "epoch": 1.9569041336851363, "grad_norm": 0.4458679749297398, "learning_rate": 0.00029812746305406804, "loss": 3.2028467655181885, "step": 3338, "token_acc": 0.27645650481219974 }, { "epoch": 1.9574904720023454, "grad_norm": 0.42027871517592674, "learning_rate": 0.0002981251723691394, "loss": 3.1748695373535156, "step": 3339, "token_acc": 0.2797402847867314 }, { "epoch": 1.9580768103195543, "grad_norm": 0.4033260705541706, "learning_rate": 0.00029812288029277433, "loss": 3.2126588821411133, "step": 3340, "token_acc": 0.2739816513761468 }, { "epoch": 1.9586631486367634, "grad_norm": 0.4118000596195956, "learning_rate": 0.00029812058682499444, "loss": 3.216691732406616, "step": 3341, "token_acc": 0.27443571379635634 }, { "epoch": 1.9592494869539725, "grad_norm": 0.38530467116516487, "learning_rate": 0.0002981182919658212, "loss": 3.2303221225738525, "step": 3342, "token_acc": 0.2746282109058134 }, { "epoch": 1.9598358252711816, "grad_norm": 0.3900969240197085, "learning_rate": 0.0002981159957152762, "loss": 3.206285238265991, "step": 3343, "token_acc": 0.27562706518474017 }, { "epoch": 1.9604221635883905, "grad_norm": 0.45661259972418955, "learning_rate": 0.000298113698073381, "loss": 3.2184529304504395, "step": 3344, "token_acc": 0.2744780586493138 }, { "epoch": 1.9610085019055994, "grad_norm": 0.44203255455665885, "learning_rate": 0.0002981113990401572, "loss": 3.217252731323242, "step": 3345, "token_acc": 0.2757837846330293 }, { "epoch": 1.9615948402228085, "grad_norm": 0.4051769473174531, "learning_rate": 0.0002981090986156264, "loss": 3.192514419555664, "step": 3346, "token_acc": 0.2765101901817991 }, { "epoch": 1.9621811785400176, "grad_norm": 0.3931895936801137, "learning_rate": 0.0002981067967998102, "loss": 3.1989264488220215, "step": 3347, "token_acc": 0.27841162161236166 }, { "epoch": 1.9627675168572267, "grad_norm": 0.394766761969608, "learning_rate": 0.0002981044935927302, "loss": 3.155118465423584, "step": 3348, "token_acc": 0.2828657712787094 }, { "epoch": 1.9633538551744356, "grad_norm": 0.3704907512293779, "learning_rate": 0.00029810218899440803, "loss": 3.176358222961426, "step": 3349, "token_acc": 0.28128635270214075 }, { "epoch": 1.9639401934916447, "grad_norm": 0.31508603859403145, "learning_rate": 0.0002980998830048654, "loss": 3.235736846923828, "step": 3350, "token_acc": 0.2717218084407978 }, { "epoch": 1.9645265318088536, "grad_norm": 0.38380912812083057, "learning_rate": 0.000298097575624124, "loss": 3.1732888221740723, "step": 3351, "token_acc": 0.2807628209185073 }, { "epoch": 1.9651128701260627, "grad_norm": 0.3662293069653066, "learning_rate": 0.00029809526685220533, "loss": 3.195889472961426, "step": 3352, "token_acc": 0.27930063121942655 }, { "epoch": 1.9656992084432718, "grad_norm": 0.38151433491931575, "learning_rate": 0.00029809295668913125, "loss": 3.1938557624816895, "step": 3353, "token_acc": 0.2784071738855384 }, { "epoch": 1.966285546760481, "grad_norm": 0.3427815865251826, "learning_rate": 0.0002980906451349234, "loss": 3.2070651054382324, "step": 3354, "token_acc": 0.27608612760394374 }, { "epoch": 1.9668718850776898, "grad_norm": 0.3800055243023331, "learning_rate": 0.00029808833218960347, "loss": 3.1367244720458984, "step": 3355, "token_acc": 0.28626813563312237 }, { "epoch": 1.9674582233948987, "grad_norm": 0.36934742439101065, "learning_rate": 0.00029808601785319324, "loss": 3.1776466369628906, "step": 3356, "token_acc": 0.28181682147094345 }, { "epoch": 1.9680445617121078, "grad_norm": 0.3369324288371547, "learning_rate": 0.0002980837021257144, "loss": 3.193371534347534, "step": 3357, "token_acc": 0.27857620231507474 }, { "epoch": 1.968630900029317, "grad_norm": 0.3857452494345077, "learning_rate": 0.00029808138500718874, "loss": 3.1710965633392334, "step": 3358, "token_acc": 0.2818963260884776 }, { "epoch": 1.969217238346526, "grad_norm": 0.3884919844665951, "learning_rate": 0.000298079066497638, "loss": 3.1391916275024414, "step": 3359, "token_acc": 0.2854416893704085 }, { "epoch": 1.969803576663735, "grad_norm": 0.3407201310779719, "learning_rate": 0.0002980767465970839, "loss": 3.1727020740509033, "step": 3360, "token_acc": 0.2810303719748796 }, { "epoch": 1.9703899149809438, "grad_norm": 0.38151157714908196, "learning_rate": 0.0002980744253055484, "loss": 3.2263922691345215, "step": 3361, "token_acc": 0.27155195908881435 }, { "epoch": 1.970976253298153, "grad_norm": 0.3834518965681551, "learning_rate": 0.0002980721026230532, "loss": 3.222538709640503, "step": 3362, "token_acc": 0.27440405680593083 }, { "epoch": 1.971562591615362, "grad_norm": 0.371486825197991, "learning_rate": 0.0002980697785496201, "loss": 3.2115392684936523, "step": 3363, "token_acc": 0.27605826360477786 }, { "epoch": 1.9721489299325712, "grad_norm": 0.38852705969961343, "learning_rate": 0.0002980674530852709, "loss": 3.1961581707000732, "step": 3364, "token_acc": 0.27628800733276226 }, { "epoch": 1.97273526824978, "grad_norm": 0.3547550976883598, "learning_rate": 0.0002980651262300276, "loss": 3.155595064163208, "step": 3365, "token_acc": 0.28347993827160495 }, { "epoch": 1.9733216065669892, "grad_norm": 0.38286935170102565, "learning_rate": 0.0002980627979839119, "loss": 3.1769320964813232, "step": 3366, "token_acc": 0.2826406381192275 }, { "epoch": 1.973907944884198, "grad_norm": 0.4275581205098955, "learning_rate": 0.00029806046834694575, "loss": 3.2086310386657715, "step": 3367, "token_acc": 0.2752268404311433 }, { "epoch": 1.9744942832014072, "grad_norm": 0.41196820554876673, "learning_rate": 0.00029805813731915103, "loss": 3.240220308303833, "step": 3368, "token_acc": 0.2713572482766086 }, { "epoch": 1.9750806215186163, "grad_norm": 0.410749692762219, "learning_rate": 0.00029805580490054956, "loss": 3.1351304054260254, "step": 3369, "token_acc": 0.28883281792764554 }, { "epoch": 1.9756669598358254, "grad_norm": 0.36825549015755243, "learning_rate": 0.00029805347109116337, "loss": 3.2013583183288574, "step": 3370, "token_acc": 0.2772010468839261 }, { "epoch": 1.9762532981530343, "grad_norm": 0.36624203603685684, "learning_rate": 0.0002980511358910143, "loss": 3.210986614227295, "step": 3371, "token_acc": 0.27531358071786016 }, { "epoch": 1.9768396364702432, "grad_norm": 0.3912545071492538, "learning_rate": 0.00029804879930012433, "loss": 3.1605324745178223, "step": 3372, "token_acc": 0.2837726091231095 }, { "epoch": 1.9774259747874523, "grad_norm": 0.4385274472201182, "learning_rate": 0.0002980464613185154, "loss": 3.217862606048584, "step": 3373, "token_acc": 0.2753342941058431 }, { "epoch": 1.9780123131046614, "grad_norm": 0.4710715067931232, "learning_rate": 0.0002980441219462094, "loss": 3.2124388217926025, "step": 3374, "token_acc": 0.27557038687568974 }, { "epoch": 1.9785986514218705, "grad_norm": 0.5229571340675347, "learning_rate": 0.00029804178118322843, "loss": 3.205665111541748, "step": 3375, "token_acc": 0.2775821269135077 }, { "epoch": 1.9791849897390794, "grad_norm": 0.47597492712186795, "learning_rate": 0.0002980394390295944, "loss": 3.217750310897827, "step": 3376, "token_acc": 0.27510263450522 }, { "epoch": 1.9797713280562885, "grad_norm": 0.3815455478629293, "learning_rate": 0.0002980370954853293, "loss": 3.226161241531372, "step": 3377, "token_acc": 0.2727554870432166 }, { "epoch": 1.9803576663734974, "grad_norm": 0.42996653121034245, "learning_rate": 0.00029803475055045515, "loss": 3.20371150970459, "step": 3378, "token_acc": 0.2760148559852479 }, { "epoch": 1.9809440046907065, "grad_norm": 0.4348522933684916, "learning_rate": 0.000298032404224994, "loss": 3.2051711082458496, "step": 3379, "token_acc": 0.2777285308537005 }, { "epoch": 1.9815303430079156, "grad_norm": 0.3832950545575956, "learning_rate": 0.000298030056508968, "loss": 3.1846365928649902, "step": 3380, "token_acc": 0.27835558753200584 }, { "epoch": 1.9821166813251248, "grad_norm": 0.4334540998076748, "learning_rate": 0.00029802770740239894, "loss": 3.17218017578125, "step": 3381, "token_acc": 0.2805139464150395 }, { "epoch": 1.9827030196423336, "grad_norm": 0.4369274971382508, "learning_rate": 0.0002980253569053091, "loss": 3.1959147453308105, "step": 3382, "token_acc": 0.2765149886842072 }, { "epoch": 1.9832893579595425, "grad_norm": 0.39196166238801156, "learning_rate": 0.0002980230050177206, "loss": 3.157712697982788, "step": 3383, "token_acc": 0.28273083462704224 }, { "epoch": 1.9838756962767516, "grad_norm": 0.3906006710629176, "learning_rate": 0.0002980206517396553, "loss": 3.207461357116699, "step": 3384, "token_acc": 0.27782520805167105 }, { "epoch": 1.9844620345939608, "grad_norm": 0.4663236834899523, "learning_rate": 0.0002980182970711355, "loss": 3.160243511199951, "step": 3385, "token_acc": 0.2823008713794796 }, { "epoch": 1.9850483729111699, "grad_norm": 0.4135120842634517, "learning_rate": 0.0002980159410121832, "loss": 3.204855442047119, "step": 3386, "token_acc": 0.2759180727239917 }, { "epoch": 1.9856347112283788, "grad_norm": 0.3723443810245823, "learning_rate": 0.0002980135835628206, "loss": 3.185324192047119, "step": 3387, "token_acc": 0.27974397014514474 }, { "epoch": 1.9862210495455876, "grad_norm": 0.3804048246392445, "learning_rate": 0.0002980112247230699, "loss": 3.16774845123291, "step": 3388, "token_acc": 0.2826880053713947 }, { "epoch": 1.9868073878627968, "grad_norm": 0.3726620750949641, "learning_rate": 0.00029800886449295313, "loss": 3.1935040950775146, "step": 3389, "token_acc": 0.2784573073233131 }, { "epoch": 1.9873937261800059, "grad_norm": 0.40202464782262426, "learning_rate": 0.00029800650287249253, "loss": 3.1757564544677734, "step": 3390, "token_acc": 0.2800574037834312 }, { "epoch": 1.987980064497215, "grad_norm": 0.3771190893082877, "learning_rate": 0.0002980041398617103, "loss": 3.186380386352539, "step": 3391, "token_acc": 0.27781333812957254 }, { "epoch": 1.9885664028144239, "grad_norm": 0.36412645512417874, "learning_rate": 0.0002980017754606286, "loss": 3.173776626586914, "step": 3392, "token_acc": 0.2796649521813379 }, { "epoch": 1.989152741131633, "grad_norm": 0.4199477875595848, "learning_rate": 0.0002979994096692696, "loss": 3.2383148670196533, "step": 3393, "token_acc": 0.2728222961241958 }, { "epoch": 1.9897390794488419, "grad_norm": 0.41224534750998254, "learning_rate": 0.0002979970424876557, "loss": 3.222076892852783, "step": 3394, "token_acc": 0.2737492030468776 }, { "epoch": 1.990325417766051, "grad_norm": 0.39633262701575256, "learning_rate": 0.0002979946739158089, "loss": 3.1951208114624023, "step": 3395, "token_acc": 0.27740279567303383 }, { "epoch": 1.99091175608326, "grad_norm": 0.38435878069984414, "learning_rate": 0.00029799230395375167, "loss": 3.2123942375183105, "step": 3396, "token_acc": 0.27582848045582137 }, { "epoch": 1.9914980944004692, "grad_norm": 0.36975804109065546, "learning_rate": 0.0002979899326015061, "loss": 3.1646666526794434, "step": 3397, "token_acc": 0.28235180768068996 }, { "epoch": 1.992084432717678, "grad_norm": 0.3654960051140184, "learning_rate": 0.0002979875598590945, "loss": 3.1928539276123047, "step": 3398, "token_acc": 0.28084030710775465 }, { "epoch": 1.992670771034887, "grad_norm": 0.3178800950094833, "learning_rate": 0.00029798518572653925, "loss": 3.229656457901001, "step": 3399, "token_acc": 0.2739833651086296 }, { "epoch": 1.993257109352096, "grad_norm": 0.4168841050810552, "learning_rate": 0.0002979828102038625, "loss": 3.230128765106201, "step": 3400, "token_acc": 0.27231527295976665 }, { "epoch": 1.9938434476693052, "grad_norm": 0.4060076991545318, "learning_rate": 0.0002979804332910868, "loss": 3.209616184234619, "step": 3401, "token_acc": 0.27532426918061975 }, { "epoch": 1.9944297859865143, "grad_norm": 0.3474952930150825, "learning_rate": 0.0002979780549882343, "loss": 3.225494861602783, "step": 3402, "token_acc": 0.2751629178873053 }, { "epoch": 1.9950161243037232, "grad_norm": 0.37390638891861583, "learning_rate": 0.00029797567529532727, "loss": 3.2129006385803223, "step": 3403, "token_acc": 0.273763058202273 }, { "epoch": 1.9956024626209323, "grad_norm": 0.39394712142879085, "learning_rate": 0.00029797329421238827, "loss": 3.193532943725586, "step": 3404, "token_acc": 0.27796589233879954 }, { "epoch": 1.9961888009381412, "grad_norm": 0.35348550773198245, "learning_rate": 0.00029797091173943953, "loss": 3.210447311401367, "step": 3405, "token_acc": 0.27487725174403116 }, { "epoch": 1.9967751392553503, "grad_norm": 0.3317223774491301, "learning_rate": 0.0002979685278765035, "loss": 3.192009687423706, "step": 3406, "token_acc": 0.2767474772497872 }, { "epoch": 1.9973614775725594, "grad_norm": 0.4037863277658947, "learning_rate": 0.0002979661426236025, "loss": 3.1939737796783447, "step": 3407, "token_acc": 0.27676198337982183 }, { "epoch": 1.9979478158897686, "grad_norm": 0.43014307105734145, "learning_rate": 0.000297963755980759, "loss": 3.196213960647583, "step": 3408, "token_acc": 0.27807366427959423 }, { "epoch": 1.9985341542069774, "grad_norm": 0.43607282692039373, "learning_rate": 0.0002979613679479954, "loss": 3.1638436317443848, "step": 3409, "token_acc": 0.28304777413816656 }, { "epoch": 1.9991204925241863, "grad_norm": 0.40025565446754036, "learning_rate": 0.00029795897852533413, "loss": 3.179595947265625, "step": 3410, "token_acc": 0.28001762029342087 }, { "epoch": 1.9997068308413954, "grad_norm": 0.38469475673173525, "learning_rate": 0.0002979565877127976, "loss": 3.179605484008789, "step": 3411, "token_acc": 0.2783551831672339 }, { "epoch": 2.0, "grad_norm": 0.3772237471439729, "learning_rate": 0.00029795419551040833, "loss": 3.1722822189331055, "step": 3412, "token_acc": 0.2822865944549339 }, { "epoch": 2.0, "eval_loss": 3.1689116954803467, "eval_runtime": 8.8282, "eval_samples_per_second": 28.998, "eval_steps_per_second": 3.625, "eval_token_acc": 0.2808737831853162, "step": 3412 }, { "epoch": 2.000586338317209, "grad_norm": 0.4693268402300224, "learning_rate": 0.0002979518019181888, "loss": 3.128814935684204, "step": 3413, "token_acc": 0.2850745210087439 }, { "epoch": 2.0011726766344182, "grad_norm": 0.4494293147080739, "learning_rate": 0.00029794940693616135, "loss": 3.1643800735473633, "step": 3414, "token_acc": 0.280364862164351 }, { "epoch": 2.001759014951627, "grad_norm": 0.4999891350411283, "learning_rate": 0.00029794701056434867, "loss": 3.212587356567383, "step": 3415, "token_acc": 0.2754552227610109 }, { "epoch": 2.002345353268836, "grad_norm": 0.42985230431226545, "learning_rate": 0.00029794461280277317, "loss": 3.1644279956817627, "step": 3416, "token_acc": 0.27951792664306757 }, { "epoch": 2.002931691586045, "grad_norm": 0.4936059455030206, "learning_rate": 0.0002979422136514574, "loss": 3.149993419647217, "step": 3417, "token_acc": 0.28243212911779597 }, { "epoch": 2.0035180299032542, "grad_norm": 0.4265801067684844, "learning_rate": 0.0002979398131104239, "loss": 3.1089510917663574, "step": 3418, "token_acc": 0.2888276955438235 }, { "epoch": 2.0041043682204633, "grad_norm": 0.4357110941176869, "learning_rate": 0.0002979374111796952, "loss": 3.1907286643981934, "step": 3419, "token_acc": 0.27735494094595753 }, { "epoch": 2.0046907065376725, "grad_norm": 0.39912122899714053, "learning_rate": 0.0002979350078592938, "loss": 3.149362802505493, "step": 3420, "token_acc": 0.2815598010441765 }, { "epoch": 2.005277044854881, "grad_norm": 0.36255386799156597, "learning_rate": 0.00029793260314924246, "loss": 3.1551661491394043, "step": 3421, "token_acc": 0.2817579707217503 }, { "epoch": 2.0058633831720902, "grad_norm": 0.36632928426957834, "learning_rate": 0.0002979301970495636, "loss": 3.165836811065674, "step": 3422, "token_acc": 0.2825132746155687 }, { "epoch": 2.0064497214892993, "grad_norm": 0.41413484301211717, "learning_rate": 0.00029792778956027986, "loss": 3.0863258838653564, "step": 3423, "token_acc": 0.29070250648135487 }, { "epoch": 2.0070360598065085, "grad_norm": 0.3964880212843554, "learning_rate": 0.00029792538068141385, "loss": 3.1333117485046387, "step": 3424, "token_acc": 0.2848141383755909 }, { "epoch": 2.0076223981237176, "grad_norm": 0.4106567877032098, "learning_rate": 0.00029792297041298825, "loss": 3.1283042430877686, "step": 3425, "token_acc": 0.28598961725149824 }, { "epoch": 2.0082087364409262, "grad_norm": 0.4678643064897086, "learning_rate": 0.0002979205587550257, "loss": 3.137134075164795, "step": 3426, "token_acc": 0.2844349198595474 }, { "epoch": 2.0087950747581353, "grad_norm": 0.4084393550899194, "learning_rate": 0.0002979181457075488, "loss": 3.150017738342285, "step": 3427, "token_acc": 0.2829810991537732 }, { "epoch": 2.0093814130753445, "grad_norm": 0.4209590751654001, "learning_rate": 0.0002979157312705803, "loss": 3.137946367263794, "step": 3428, "token_acc": 0.2841458576804726 }, { "epoch": 2.0099677513925536, "grad_norm": 0.4500839660224752, "learning_rate": 0.0002979133154441427, "loss": 3.1342806816101074, "step": 3429, "token_acc": 0.28405961149247816 }, { "epoch": 2.0105540897097627, "grad_norm": 0.40353803071576827, "learning_rate": 0.0002979108982282589, "loss": 3.104767322540283, "step": 3430, "token_acc": 0.28898804174838966 }, { "epoch": 2.0111404280269713, "grad_norm": 0.3597226941894105, "learning_rate": 0.00029790847962295154, "loss": 3.098072052001953, "step": 3431, "token_acc": 0.2889789353695289 }, { "epoch": 2.0117267663441805, "grad_norm": 0.3730829068510743, "learning_rate": 0.0002979060596282433, "loss": 3.1085596084594727, "step": 3432, "token_acc": 0.2870292568382306 }, { "epoch": 2.0123131046613896, "grad_norm": 0.38421643536287337, "learning_rate": 0.00029790363824415693, "loss": 3.1333045959472656, "step": 3433, "token_acc": 0.28487105971253973 }, { "epoch": 2.0128994429785987, "grad_norm": 0.4015381665042725, "learning_rate": 0.00029790121547071516, "loss": 3.1151533126831055, "step": 3434, "token_acc": 0.28771270951505284 }, { "epoch": 2.013485781295808, "grad_norm": 0.3908685556457136, "learning_rate": 0.0002978987913079408, "loss": 3.1296937465667725, "step": 3435, "token_acc": 0.2865242176290379 }, { "epoch": 2.014072119613017, "grad_norm": 0.362248717817106, "learning_rate": 0.00029789636575585656, "loss": 3.1299726963043213, "step": 3436, "token_acc": 0.2862445403203012 }, { "epoch": 2.0146584579302256, "grad_norm": 0.4121208915781683, "learning_rate": 0.00029789393881448533, "loss": 3.107898712158203, "step": 3437, "token_acc": 0.28749310300921016 }, { "epoch": 2.0152447962474347, "grad_norm": 0.3651512223553721, "learning_rate": 0.0002978915104838498, "loss": 3.1158671379089355, "step": 3438, "token_acc": 0.2862957242495729 }, { "epoch": 2.015831134564644, "grad_norm": 0.3667427697512673, "learning_rate": 0.0002978890807639728, "loss": 3.1370677947998047, "step": 3439, "token_acc": 0.282250969728476 }, { "epoch": 2.016417472881853, "grad_norm": 0.42623514841764887, "learning_rate": 0.0002978866496548771, "loss": 3.1452929973602295, "step": 3440, "token_acc": 0.28459368827702103 }, { "epoch": 2.017003811199062, "grad_norm": 0.36351875555823093, "learning_rate": 0.00029788421715658573, "loss": 3.132627010345459, "step": 3441, "token_acc": 0.2848018845451417 }, { "epoch": 2.0175901495162707, "grad_norm": 0.37883952206581556, "learning_rate": 0.00029788178326912133, "loss": 3.1542673110961914, "step": 3442, "token_acc": 0.2834036474504359 }, { "epoch": 2.01817648783348, "grad_norm": 0.39772390047638145, "learning_rate": 0.00029787934799250685, "loss": 3.2037856578826904, "step": 3443, "token_acc": 0.2734826375180308 }, { "epoch": 2.018762826150689, "grad_norm": 0.369383127275441, "learning_rate": 0.0002978769113267652, "loss": 3.159280776977539, "step": 3444, "token_acc": 0.28142865057898525 }, { "epoch": 2.019349164467898, "grad_norm": 0.3853129161257599, "learning_rate": 0.00029787447327191927, "loss": 3.1549551486968994, "step": 3445, "token_acc": 0.28148429513571116 }, { "epoch": 2.019935502785107, "grad_norm": 0.35432230549810184, "learning_rate": 0.0002978720338279919, "loss": 3.148341655731201, "step": 3446, "token_acc": 0.2831590088572573 }, { "epoch": 2.0205218411023163, "grad_norm": 0.3794908010657576, "learning_rate": 0.00029786959299500605, "loss": 3.121584892272949, "step": 3447, "token_acc": 0.28644777914064323 }, { "epoch": 2.021108179419525, "grad_norm": 0.38514720598065894, "learning_rate": 0.00029786715077298454, "loss": 3.09490966796875, "step": 3448, "token_acc": 0.2911864795625741 }, { "epoch": 2.021694517736734, "grad_norm": 0.33695839204147454, "learning_rate": 0.0002978647071619505, "loss": 3.096996545791626, "step": 3449, "token_acc": 0.2910687147342017 }, { "epoch": 2.022280856053943, "grad_norm": 0.34468055695152805, "learning_rate": 0.00029786226216192675, "loss": 3.132293462753296, "step": 3450, "token_acc": 0.28298106527484007 }, { "epoch": 2.0228671943711523, "grad_norm": 0.38124693863745074, "learning_rate": 0.00029785981577293627, "loss": 3.1362264156341553, "step": 3451, "token_acc": 0.2824507392337959 }, { "epoch": 2.0234535326883614, "grad_norm": 0.382586837702508, "learning_rate": 0.00029785736799500215, "loss": 3.1886556148529053, "step": 3452, "token_acc": 0.2787853441319425 }, { "epoch": 2.02403987100557, "grad_norm": 0.38038669886498655, "learning_rate": 0.0002978549188281472, "loss": 3.0679826736450195, "step": 3453, "token_acc": 0.29393952216480396 }, { "epoch": 2.024626209322779, "grad_norm": 0.38004811148090367, "learning_rate": 0.00029785246827239453, "loss": 3.194671154022217, "step": 3454, "token_acc": 0.27638870389312076 }, { "epoch": 2.0252125476399883, "grad_norm": 0.3711478235635344, "learning_rate": 0.0002978500163277672, "loss": 3.1409754753112793, "step": 3455, "token_acc": 0.28372774923452176 }, { "epoch": 2.0257988859571974, "grad_norm": 0.36493214590936185, "learning_rate": 0.0002978475629942882, "loss": 3.098780870437622, "step": 3456, "token_acc": 0.29087998555834005 }, { "epoch": 2.0263852242744065, "grad_norm": 0.36195177990017, "learning_rate": 0.00029784510827198055, "loss": 3.1051435470581055, "step": 3457, "token_acc": 0.2887861780565643 }, { "epoch": 2.026971562591615, "grad_norm": 0.34780890327243913, "learning_rate": 0.00029784265216086734, "loss": 3.148003339767456, "step": 3458, "token_acc": 0.2812387883422328 }, { "epoch": 2.0275579009088243, "grad_norm": 0.3727664772732618, "learning_rate": 0.0002978401946609716, "loss": 3.0691351890563965, "step": 3459, "token_acc": 0.29365884240222745 }, { "epoch": 2.0281442392260334, "grad_norm": 0.3855725913060223, "learning_rate": 0.0002978377357723165, "loss": 3.159719467163086, "step": 3460, "token_acc": 0.2826157324153522 }, { "epoch": 2.0287305775432425, "grad_norm": 0.36752659830781986, "learning_rate": 0.00029783527549492503, "loss": 3.137112855911255, "step": 3461, "token_acc": 0.2822810288137503 }, { "epoch": 2.0293169158604516, "grad_norm": 0.39319522138372287, "learning_rate": 0.0002978328138288204, "loss": 3.1231744289398193, "step": 3462, "token_acc": 0.2861779978009375 }, { "epoch": 2.0299032541776607, "grad_norm": 0.3241839013696892, "learning_rate": 0.0002978303507740257, "loss": 3.1348609924316406, "step": 3463, "token_acc": 0.28654076199146755 }, { "epoch": 2.0304895924948694, "grad_norm": 0.329587503257926, "learning_rate": 0.0002978278863305641, "loss": 3.122725009918213, "step": 3464, "token_acc": 0.2869753751611894 }, { "epoch": 2.0310759308120785, "grad_norm": 0.3552415486900838, "learning_rate": 0.0002978254204984586, "loss": 3.135404109954834, "step": 3465, "token_acc": 0.2851585716503981 }, { "epoch": 2.0316622691292876, "grad_norm": 0.3374796323262722, "learning_rate": 0.0002978229532777325, "loss": 3.1368470191955566, "step": 3466, "token_acc": 0.2850211399999471 }, { "epoch": 2.0322486074464967, "grad_norm": 0.42808961517240873, "learning_rate": 0.000297820484668409, "loss": 3.1344170570373535, "step": 3467, "token_acc": 0.2839436894625125 }, { "epoch": 2.032834945763706, "grad_norm": 0.4285950097165144, "learning_rate": 0.0002978180146705112, "loss": 3.162283420562744, "step": 3468, "token_acc": 0.27864230359341535 }, { "epoch": 2.0334212840809145, "grad_norm": 0.3942104733731069, "learning_rate": 0.00029781554328406237, "loss": 3.1488261222839355, "step": 3469, "token_acc": 0.2823741909588079 }, { "epoch": 2.0340076223981236, "grad_norm": 0.4275392574220518, "learning_rate": 0.0002978130705090857, "loss": 3.1416211128234863, "step": 3470, "token_acc": 0.28273339300307426 }, { "epoch": 2.0345939607153327, "grad_norm": 0.48542663274783737, "learning_rate": 0.0002978105963456043, "loss": 3.1221628189086914, "step": 3471, "token_acc": 0.28567592650524515 }, { "epoch": 2.035180299032542, "grad_norm": 0.4540814159435083, "learning_rate": 0.00029780812079364163, "loss": 3.1432619094848633, "step": 3472, "token_acc": 0.2830303683133356 }, { "epoch": 2.035766637349751, "grad_norm": 0.3712601559118844, "learning_rate": 0.00029780564385322085, "loss": 3.1556954383850098, "step": 3473, "token_acc": 0.2823031671396317 }, { "epoch": 2.03635297566696, "grad_norm": 0.33314191723912395, "learning_rate": 0.0002978031655243652, "loss": 3.1179394721984863, "step": 3474, "token_acc": 0.2865865855043005 }, { "epoch": 2.0369393139841687, "grad_norm": 0.32220550490807864, "learning_rate": 0.00029780068580709793, "loss": 3.1002423763275146, "step": 3475, "token_acc": 0.2900517581377644 }, { "epoch": 2.037525652301378, "grad_norm": 0.41199791774491223, "learning_rate": 0.0002977982047014424, "loss": 3.135874032974243, "step": 3476, "token_acc": 0.28354010423945053 }, { "epoch": 2.038111990618587, "grad_norm": 0.40860395631955454, "learning_rate": 0.0002977957222074219, "loss": 3.0974645614624023, "step": 3477, "token_acc": 0.2870893157585079 }, { "epoch": 2.038698328935796, "grad_norm": 0.2997445321027915, "learning_rate": 0.0002977932383250598, "loss": 3.133939027786255, "step": 3478, "token_acc": 0.285202293724116 }, { "epoch": 2.039284667253005, "grad_norm": 0.3522548715001856, "learning_rate": 0.00029779075305437936, "loss": 3.1448988914489746, "step": 3479, "token_acc": 0.2847898246931877 }, { "epoch": 2.039871005570214, "grad_norm": 0.35811174498889337, "learning_rate": 0.0002977882663954039, "loss": 3.164663076400757, "step": 3480, "token_acc": 0.2799471861765558 }, { "epoch": 2.040457343887423, "grad_norm": 0.3523736077082178, "learning_rate": 0.0002977857783481568, "loss": 3.1280064582824707, "step": 3481, "token_acc": 0.2871072249430628 }, { "epoch": 2.041043682204632, "grad_norm": 0.3389376874865217, "learning_rate": 0.0002977832889126615, "loss": 3.1250505447387695, "step": 3482, "token_acc": 0.2853470233041602 }, { "epoch": 2.041630020521841, "grad_norm": 0.39684627094864483, "learning_rate": 0.00029778079808894133, "loss": 3.101452350616455, "step": 3483, "token_acc": 0.28957974284321536 }, { "epoch": 2.0422163588390503, "grad_norm": 0.3557693879707045, "learning_rate": 0.00029777830587701974, "loss": 3.16340708732605, "step": 3484, "token_acc": 0.28090264409642446 }, { "epoch": 2.042802697156259, "grad_norm": 0.33116527751910174, "learning_rate": 0.0002977758122769201, "loss": 3.155015468597412, "step": 3485, "token_acc": 0.281952505003973 }, { "epoch": 2.043389035473468, "grad_norm": 0.3669881335006332, "learning_rate": 0.00029777331728866576, "loss": 3.126927375793457, "step": 3486, "token_acc": 0.2837334104566342 }, { "epoch": 2.043975373790677, "grad_norm": 0.3732859662559385, "learning_rate": 0.0002977708209122803, "loss": 3.1357572078704834, "step": 3487, "token_acc": 0.28539349843830186 }, { "epoch": 2.0445617121078863, "grad_norm": 0.4382736906957138, "learning_rate": 0.0002977683231477871, "loss": 3.1393203735351562, "step": 3488, "token_acc": 0.2842107145046278 }, { "epoch": 2.0451480504250954, "grad_norm": 0.43775367165838047, "learning_rate": 0.0002977658239952096, "loss": 3.1039083003997803, "step": 3489, "token_acc": 0.28923629968707165 }, { "epoch": 2.0457343887423045, "grad_norm": 0.3287333893533817, "learning_rate": 0.0002977633234545713, "loss": 3.0934486389160156, "step": 3490, "token_acc": 0.2911684104051372 }, { "epoch": 2.046320727059513, "grad_norm": 0.40452705999500393, "learning_rate": 0.0002977608215258957, "loss": 3.134270668029785, "step": 3491, "token_acc": 0.28572189097103917 }, { "epoch": 2.0469070653767223, "grad_norm": 0.43003384312600423, "learning_rate": 0.0002977583182092063, "loss": 3.1502599716186523, "step": 3492, "token_acc": 0.2832874867549496 }, { "epoch": 2.0474934036939314, "grad_norm": 0.474634651217974, "learning_rate": 0.00029775581350452657, "loss": 3.1211743354797363, "step": 3493, "token_acc": 0.2877612312228905 }, { "epoch": 2.0480797420111405, "grad_norm": 0.4547410178087421, "learning_rate": 0.0002977533074118801, "loss": 3.1612777709960938, "step": 3494, "token_acc": 0.2815385720958643 }, { "epoch": 2.0486660803283496, "grad_norm": 0.37815061197681005, "learning_rate": 0.0002977507999312904, "loss": 3.127793312072754, "step": 3495, "token_acc": 0.28566074700894023 }, { "epoch": 2.0492524186455583, "grad_norm": 0.33109502420966236, "learning_rate": 0.00029774829106278105, "loss": 3.1137139797210693, "step": 3496, "token_acc": 0.2864732358141778 }, { "epoch": 2.0498387569627674, "grad_norm": 0.3488454773249142, "learning_rate": 0.0002977457808063756, "loss": 3.157222270965576, "step": 3497, "token_acc": 0.2803369130778737 }, { "epoch": 2.0504250952799765, "grad_norm": 0.3572317651350675, "learning_rate": 0.0002977432691620976, "loss": 3.148104190826416, "step": 3498, "token_acc": 0.2833471869804882 }, { "epoch": 2.0510114335971856, "grad_norm": 0.31564968542581406, "learning_rate": 0.0002977407561299707, "loss": 3.116899013519287, "step": 3499, "token_acc": 0.28815661147057897 }, { "epoch": 2.0515977719143947, "grad_norm": 0.3664372042406818, "learning_rate": 0.00029773824171001846, "loss": 3.1276073455810547, "step": 3500, "token_acc": 0.2850510213481638 }, { "epoch": 2.052184110231604, "grad_norm": 0.3481291757459716, "learning_rate": 0.00029773572590226455, "loss": 3.124985694885254, "step": 3501, "token_acc": 0.2849069095685324 }, { "epoch": 2.0527704485488125, "grad_norm": 0.3865212812170118, "learning_rate": 0.00029773320870673256, "loss": 3.0939674377441406, "step": 3502, "token_acc": 0.2909795897746792 }, { "epoch": 2.0533567868660216, "grad_norm": 0.3337478467015942, "learning_rate": 0.0002977306901234461, "loss": 3.1493101119995117, "step": 3503, "token_acc": 0.2830482033119354 }, { "epoch": 2.0539431251832307, "grad_norm": 0.35569292053366425, "learning_rate": 0.0002977281701524289, "loss": 3.130641460418701, "step": 3504, "token_acc": 0.2848616270406106 }, { "epoch": 2.05452946350044, "grad_norm": 0.3883722275928381, "learning_rate": 0.0002977256487937046, "loss": 3.157576560974121, "step": 3505, "token_acc": 0.27892406366382333 }, { "epoch": 2.055115801817649, "grad_norm": 0.33984671228836855, "learning_rate": 0.00029772312604729696, "loss": 3.113192319869995, "step": 3506, "token_acc": 0.286729158007569 }, { "epoch": 2.0557021401348576, "grad_norm": 0.33295295626915616, "learning_rate": 0.00029772060191322956, "loss": 3.12691593170166, "step": 3507, "token_acc": 0.2872470360880705 }, { "epoch": 2.0562884784520667, "grad_norm": 0.3637716501314176, "learning_rate": 0.0002977180763915262, "loss": 3.150609016418457, "step": 3508, "token_acc": 0.2836260346671929 }, { "epoch": 2.056874816769276, "grad_norm": 0.4138224822856884, "learning_rate": 0.0002977155494822105, "loss": 3.1472744941711426, "step": 3509, "token_acc": 0.2803072253327591 }, { "epoch": 2.057461155086485, "grad_norm": 0.41272097146259584, "learning_rate": 0.00029771302118530624, "loss": 3.1490025520324707, "step": 3510, "token_acc": 0.2815108445299942 }, { "epoch": 2.058047493403694, "grad_norm": 0.38247622432235545, "learning_rate": 0.00029771049150083723, "loss": 3.1066107749938965, "step": 3511, "token_acc": 0.28804639728970416 }, { "epoch": 2.0586338317209028, "grad_norm": 0.44756299158841223, "learning_rate": 0.00029770796042882726, "loss": 3.1254422664642334, "step": 3512, "token_acc": 0.28580617661874685 }, { "epoch": 2.059220170038112, "grad_norm": 0.38781975131153684, "learning_rate": 0.00029770542796929997, "loss": 3.1050002574920654, "step": 3513, "token_acc": 0.2876175238215935 }, { "epoch": 2.059806508355321, "grad_norm": 0.38283329468430505, "learning_rate": 0.0002977028941222792, "loss": 3.1218655109405518, "step": 3514, "token_acc": 0.28573012763305894 }, { "epoch": 2.06039284667253, "grad_norm": 0.37080950960535186, "learning_rate": 0.00029770035888778887, "loss": 3.1293225288391113, "step": 3515, "token_acc": 0.28524457256103053 }, { "epoch": 2.060979184989739, "grad_norm": 0.3460738813636834, "learning_rate": 0.0002976978222658526, "loss": 3.156442642211914, "step": 3516, "token_acc": 0.280388287514918 }, { "epoch": 2.0615655233069483, "grad_norm": 0.38894253341154644, "learning_rate": 0.0002976952842564943, "loss": 3.1228482723236084, "step": 3517, "token_acc": 0.2874927142275018 }, { "epoch": 2.062151861624157, "grad_norm": 0.3918812413865541, "learning_rate": 0.0002976927448597379, "loss": 3.1407010555267334, "step": 3518, "token_acc": 0.28330341932343633 }, { "epoch": 2.062738199941366, "grad_norm": 0.3603479048629899, "learning_rate": 0.0002976902040756072, "loss": 3.1232118606567383, "step": 3519, "token_acc": 0.2859059458427339 }, { "epoch": 2.063324538258575, "grad_norm": 0.3959649666699924, "learning_rate": 0.000297687661904126, "loss": 3.1359477043151855, "step": 3520, "token_acc": 0.28413763530880143 }, { "epoch": 2.0639108765757843, "grad_norm": 0.3869707058558757, "learning_rate": 0.0002976851183453182, "loss": 3.131862163543701, "step": 3521, "token_acc": 0.2860237456094662 }, { "epoch": 2.0644972148929934, "grad_norm": 0.3714431022261122, "learning_rate": 0.00029768257339920774, "loss": 3.1441493034362793, "step": 3522, "token_acc": 0.2838706294164689 }, { "epoch": 2.065083553210202, "grad_norm": 0.3335036041680849, "learning_rate": 0.00029768002706581854, "loss": 3.153364419937134, "step": 3523, "token_acc": 0.2819435287521609 }, { "epoch": 2.065669891527411, "grad_norm": 0.3824269434467475, "learning_rate": 0.0002976774793451745, "loss": 3.1012821197509766, "step": 3524, "token_acc": 0.2883134212248136 }, { "epoch": 2.0662562298446203, "grad_norm": 0.35969115990196454, "learning_rate": 0.0002976749302372995, "loss": 3.1937437057495117, "step": 3525, "token_acc": 0.27583544752707534 }, { "epoch": 2.0668425681618294, "grad_norm": 0.36222974563201477, "learning_rate": 0.0002976723797422175, "loss": 3.146131992340088, "step": 3526, "token_acc": 0.2818005960161029 }, { "epoch": 2.0674289064790385, "grad_norm": 0.35350449822194085, "learning_rate": 0.00029766982785995255, "loss": 3.113847017288208, "step": 3527, "token_acc": 0.287634684994635 }, { "epoch": 2.068015244796247, "grad_norm": 0.3340319690663173, "learning_rate": 0.00029766727459052853, "loss": 3.1147704124450684, "step": 3528, "token_acc": 0.28748429129340874 }, { "epoch": 2.0686015831134563, "grad_norm": 0.3993938391747189, "learning_rate": 0.00029766471993396943, "loss": 3.145965576171875, "step": 3529, "token_acc": 0.28364981068530104 }, { "epoch": 2.0691879214306654, "grad_norm": 0.3615959001463227, "learning_rate": 0.00029766216389029925, "loss": 3.121049165725708, "step": 3530, "token_acc": 0.2873849523436305 }, { "epoch": 2.0697742597478745, "grad_norm": 0.364255840245574, "learning_rate": 0.00029765960645954207, "loss": 3.146820068359375, "step": 3531, "token_acc": 0.2837215838286996 }, { "epoch": 2.0703605980650837, "grad_norm": 0.36466553204277075, "learning_rate": 0.00029765704764172184, "loss": 3.1751701831817627, "step": 3532, "token_acc": 0.2791495648028582 }, { "epoch": 2.0709469363822928, "grad_norm": 0.37937564535364704, "learning_rate": 0.0002976544874368626, "loss": 3.100560188293457, "step": 3533, "token_acc": 0.288098886505261 }, { "epoch": 2.0715332746995014, "grad_norm": 0.38360413351165645, "learning_rate": 0.00029765192584498847, "loss": 3.1499667167663574, "step": 3534, "token_acc": 0.28279552371390876 }, { "epoch": 2.0721196130167105, "grad_norm": 0.3765750315115907, "learning_rate": 0.00029764936286612336, "loss": 3.128371477127075, "step": 3535, "token_acc": 0.28464078793437586 }, { "epoch": 2.0727059513339197, "grad_norm": 0.44904992265709776, "learning_rate": 0.00029764679850029154, "loss": 3.1282753944396973, "step": 3536, "token_acc": 0.2861553857162116 }, { "epoch": 2.0732922896511288, "grad_norm": 0.38263740998766227, "learning_rate": 0.00029764423274751696, "loss": 3.113499164581299, "step": 3537, "token_acc": 0.2890378946736946 }, { "epoch": 2.073878627968338, "grad_norm": 0.3281635353850766, "learning_rate": 0.0002976416656078238, "loss": 3.13016676902771, "step": 3538, "token_acc": 0.285801624803117 }, { "epoch": 2.0744649662855466, "grad_norm": 0.3433197943314979, "learning_rate": 0.0002976390970812361, "loss": 3.11934494972229, "step": 3539, "token_acc": 0.2851191679757776 }, { "epoch": 2.0750513046027557, "grad_norm": 0.3888432848829322, "learning_rate": 0.000297636527167778, "loss": 3.140639305114746, "step": 3540, "token_acc": 0.2839151551452493 }, { "epoch": 2.0756376429199648, "grad_norm": 0.41079850319974714, "learning_rate": 0.00029763395586747377, "loss": 3.1287364959716797, "step": 3541, "token_acc": 0.28452843383934745 }, { "epoch": 2.076223981237174, "grad_norm": 0.34197633708891684, "learning_rate": 0.00029763138318034745, "loss": 3.1278767585754395, "step": 3542, "token_acc": 0.2860553423135104 }, { "epoch": 2.076810319554383, "grad_norm": 0.3798433352248671, "learning_rate": 0.00029762880910642317, "loss": 3.1371684074401855, "step": 3543, "token_acc": 0.2858989351880347 }, { "epoch": 2.077396657871592, "grad_norm": 0.3841485055669031, "learning_rate": 0.00029762623364572516, "loss": 3.1701271533966064, "step": 3544, "token_acc": 0.27996077253652324 }, { "epoch": 2.077982996188801, "grad_norm": 0.38086084025145295, "learning_rate": 0.0002976236567982776, "loss": 3.1060070991516113, "step": 3545, "token_acc": 0.28773669995858625 }, { "epoch": 2.07856933450601, "grad_norm": 0.3255364386142253, "learning_rate": 0.00029762107856410474, "loss": 3.1586480140686035, "step": 3546, "token_acc": 0.280195677163609 }, { "epoch": 2.079155672823219, "grad_norm": 0.3557486075825875, "learning_rate": 0.0002976184989432308, "loss": 3.151869297027588, "step": 3547, "token_acc": 0.28266084279116394 }, { "epoch": 2.079742011140428, "grad_norm": 0.4177411514690902, "learning_rate": 0.00029761591793567993, "loss": 3.154684066772461, "step": 3548, "token_acc": 0.2822419283261836 }, { "epoch": 2.0803283494576372, "grad_norm": 0.4010392912952029, "learning_rate": 0.00029761333554147645, "loss": 3.168264150619507, "step": 3549, "token_acc": 0.27807405110035827 }, { "epoch": 2.080914687774846, "grad_norm": 0.3937215626325022, "learning_rate": 0.0002976107517606446, "loss": 3.133364677429199, "step": 3550, "token_acc": 0.2847749062911299 }, { "epoch": 2.081501026092055, "grad_norm": 0.3836105065696574, "learning_rate": 0.0002976081665932086, "loss": 3.112882614135742, "step": 3551, "token_acc": 0.2858181552838897 }, { "epoch": 2.082087364409264, "grad_norm": 0.3602429426971644, "learning_rate": 0.00029760558003919283, "loss": 3.14243221282959, "step": 3552, "token_acc": 0.284724178734434 }, { "epoch": 2.0826737027264732, "grad_norm": 0.3610106469486345, "learning_rate": 0.0002976029920986215, "loss": 3.1269149780273438, "step": 3553, "token_acc": 0.28660333666649124 }, { "epoch": 2.0832600410436823, "grad_norm": 0.41613010757234326, "learning_rate": 0.00029760040277151896, "loss": 3.1113481521606445, "step": 3554, "token_acc": 0.28706547171387986 }, { "epoch": 2.0838463793608915, "grad_norm": 0.3325729284686215, "learning_rate": 0.0002975978120579096, "loss": 3.1228528022766113, "step": 3555, "token_acc": 0.285179826105093 }, { "epoch": 2.0844327176781, "grad_norm": 0.3515770367479981, "learning_rate": 0.00029759521995781764, "loss": 3.131241798400879, "step": 3556, "token_acc": 0.28476430683130927 }, { "epoch": 2.0850190559953092, "grad_norm": 0.39192313867772643, "learning_rate": 0.00029759262647126745, "loss": 3.1419215202331543, "step": 3557, "token_acc": 0.2818572023192494 }, { "epoch": 2.0856053943125183, "grad_norm": 0.35513370214930834, "learning_rate": 0.0002975900315982834, "loss": 3.128060817718506, "step": 3558, "token_acc": 0.2859430495938014 }, { "epoch": 2.0861917326297275, "grad_norm": 0.3611433351560402, "learning_rate": 0.00029758743533889, "loss": 3.12465500831604, "step": 3559, "token_acc": 0.2854470622905293 }, { "epoch": 2.0867780709469366, "grad_norm": 0.3880295163940839, "learning_rate": 0.00029758483769311137, "loss": 3.143515110015869, "step": 3560, "token_acc": 0.28390329579502016 }, { "epoch": 2.0873644092641452, "grad_norm": 0.3972605105443471, "learning_rate": 0.0002975822386609722, "loss": 3.1253089904785156, "step": 3561, "token_acc": 0.2852068726200874 }, { "epoch": 2.0879507475813543, "grad_norm": 0.3317098536815664, "learning_rate": 0.00029757963824249663, "loss": 3.1522445678710938, "step": 3562, "token_acc": 0.2828404790909777 }, { "epoch": 2.0885370858985635, "grad_norm": 0.32468246299298087, "learning_rate": 0.0002975770364377093, "loss": 3.1402926445007324, "step": 3563, "token_acc": 0.28204549129105855 }, { "epoch": 2.0891234242157726, "grad_norm": 0.406059457917977, "learning_rate": 0.0002975744332466346, "loss": 3.1296956539154053, "step": 3564, "token_acc": 0.2857222428518381 }, { "epoch": 2.0897097625329817, "grad_norm": 0.3690906744241124, "learning_rate": 0.0002975718286692969, "loss": 3.2085421085357666, "step": 3565, "token_acc": 0.2740298239310097 }, { "epoch": 2.0902961008501904, "grad_norm": 0.3529771055414883, "learning_rate": 0.00029756922270572075, "loss": 3.0420050621032715, "step": 3566, "token_acc": 0.2981452663179039 }, { "epoch": 2.0908824391673995, "grad_norm": 0.39812031092471895, "learning_rate": 0.00029756661535593063, "loss": 3.150214195251465, "step": 3567, "token_acc": 0.28322985495980196 }, { "epoch": 2.0914687774846086, "grad_norm": 0.4796722219156357, "learning_rate": 0.000297564006619951, "loss": 3.1549415588378906, "step": 3568, "token_acc": 0.28169084218441115 }, { "epoch": 2.0920551158018177, "grad_norm": 0.49373391490108404, "learning_rate": 0.00029756139649780633, "loss": 3.11130428314209, "step": 3569, "token_acc": 0.2873060906380779 }, { "epoch": 2.092641454119027, "grad_norm": 0.35317175253818717, "learning_rate": 0.0002975587849895212, "loss": 3.1161997318267822, "step": 3570, "token_acc": 0.28728083678916694 }, { "epoch": 2.093227792436236, "grad_norm": 0.4683002052064412, "learning_rate": 0.00029755617209512015, "loss": 3.119175910949707, "step": 3571, "token_acc": 0.28698096206033047 }, { "epoch": 2.0938141307534446, "grad_norm": 0.6186168394449949, "learning_rate": 0.0002975535578146277, "loss": 3.1231987476348877, "step": 3572, "token_acc": 0.2875237426972732 }, { "epoch": 2.0944004690706537, "grad_norm": 0.4705795273417421, "learning_rate": 0.0002975509421480684, "loss": 3.107603073120117, "step": 3573, "token_acc": 0.28907461424187547 }, { "epoch": 2.094986807387863, "grad_norm": 0.48348225012270607, "learning_rate": 0.0002975483250954668, "loss": 3.131746768951416, "step": 3574, "token_acc": 0.28382917975715377 }, { "epoch": 2.095573145705072, "grad_norm": 0.515562667266918, "learning_rate": 0.00029754570665684754, "loss": 3.1096596717834473, "step": 3575, "token_acc": 0.2879959405899859 }, { "epoch": 2.096159484022281, "grad_norm": 0.48368499286266764, "learning_rate": 0.00029754308683223514, "loss": 3.1457672119140625, "step": 3576, "token_acc": 0.2827620890538602 }, { "epoch": 2.0967458223394897, "grad_norm": 0.4182187474375285, "learning_rate": 0.0002975404656216543, "loss": 3.1215929985046387, "step": 3577, "token_acc": 0.288066705551726 }, { "epoch": 2.097332160656699, "grad_norm": 0.4080862006835993, "learning_rate": 0.00029753784302512953, "loss": 3.098050832748413, "step": 3578, "token_acc": 0.2904650996496174 }, { "epoch": 2.097918498973908, "grad_norm": 0.4153475572668984, "learning_rate": 0.0002975352190426856, "loss": 3.196120262145996, "step": 3579, "token_acc": 0.2744038105150249 }, { "epoch": 2.098504837291117, "grad_norm": 0.40481276513796816, "learning_rate": 0.0002975325936743471, "loss": 3.076892614364624, "step": 3580, "token_acc": 0.2929127112579418 }, { "epoch": 2.099091175608326, "grad_norm": 0.37725165741294653, "learning_rate": 0.0002975299669201387, "loss": 3.1096906661987305, "step": 3581, "token_acc": 0.28758836746932487 }, { "epoch": 2.099677513925535, "grad_norm": 0.40330931200002346, "learning_rate": 0.000297527338780085, "loss": 3.145169973373413, "step": 3582, "token_acc": 0.28202253247803355 }, { "epoch": 2.100263852242744, "grad_norm": 0.37089091837489785, "learning_rate": 0.0002975247092542108, "loss": 3.1549324989318848, "step": 3583, "token_acc": 0.28181232405710455 }, { "epoch": 2.100850190559953, "grad_norm": 0.3572884964490882, "learning_rate": 0.00029752207834254067, "loss": 3.1102874279022217, "step": 3584, "token_acc": 0.28770217336365933 }, { "epoch": 2.101436528877162, "grad_norm": 0.33487077283584704, "learning_rate": 0.00029751944604509945, "loss": 3.071106195449829, "step": 3585, "token_acc": 0.2922787519616859 }, { "epoch": 2.1020228671943713, "grad_norm": 0.32623614118179456, "learning_rate": 0.00029751681236191185, "loss": 3.1419413089752197, "step": 3586, "token_acc": 0.28314423749847156 }, { "epoch": 2.1026092055115804, "grad_norm": 0.3352223218145237, "learning_rate": 0.00029751417729300257, "loss": 3.1588094234466553, "step": 3587, "token_acc": 0.2824614879412302 }, { "epoch": 2.103195543828789, "grad_norm": 0.35047350883467737, "learning_rate": 0.00029751154083839634, "loss": 3.160095691680908, "step": 3588, "token_acc": 0.2810374607274833 }, { "epoch": 2.103781882145998, "grad_norm": 0.33249898386672827, "learning_rate": 0.00029750890299811794, "loss": 3.1361794471740723, "step": 3589, "token_acc": 0.28384104176486735 }, { "epoch": 2.1043682204632073, "grad_norm": 0.3197873264980517, "learning_rate": 0.0002975062637721922, "loss": 3.146862506866455, "step": 3590, "token_acc": 0.2827296727994848 }, { "epoch": 2.1049545587804164, "grad_norm": 0.3635327029849855, "learning_rate": 0.00029750362316064387, "loss": 3.1684470176696777, "step": 3591, "token_acc": 0.28133028005895977 }, { "epoch": 2.1055408970976255, "grad_norm": 0.329738225742666, "learning_rate": 0.0002975009811634978, "loss": 3.1855082511901855, "step": 3592, "token_acc": 0.2780068159662828 }, { "epoch": 2.106127235414834, "grad_norm": 0.3367470358602128, "learning_rate": 0.0002974983377807787, "loss": 3.105827808380127, "step": 3593, "token_acc": 0.2894561419947261 }, { "epoch": 2.1067135737320433, "grad_norm": 0.35161801930545405, "learning_rate": 0.0002974956930125116, "loss": 3.1370208263397217, "step": 3594, "token_acc": 0.2831918151270352 }, { "epoch": 2.1072999120492524, "grad_norm": 0.3532362738760793, "learning_rate": 0.00029749304685872113, "loss": 3.1760120391845703, "step": 3595, "token_acc": 0.2798521375935876 }, { "epoch": 2.1078862503664615, "grad_norm": 0.3052014277711385, "learning_rate": 0.0002974903993194322, "loss": 3.149266481399536, "step": 3596, "token_acc": 0.2831579132065162 }, { "epoch": 2.1084725886836706, "grad_norm": 0.34982371711926846, "learning_rate": 0.0002974877503946698, "loss": 3.1322388648986816, "step": 3597, "token_acc": 0.2843834965551831 }, { "epoch": 2.1090589270008797, "grad_norm": 0.3784844717049778, "learning_rate": 0.0002974851000844586, "loss": 3.128718376159668, "step": 3598, "token_acc": 0.2859041148001775 }, { "epoch": 2.1096452653180884, "grad_norm": 0.37386227007663875, "learning_rate": 0.00029748244838882375, "loss": 3.182392120361328, "step": 3599, "token_acc": 0.2768258033742236 }, { "epoch": 2.1102316036352975, "grad_norm": 0.33080129152623244, "learning_rate": 0.00029747979530778996, "loss": 3.1474149227142334, "step": 3600, "token_acc": 0.28321249508778684 }, { "epoch": 2.1108179419525066, "grad_norm": 0.3753553148563084, "learning_rate": 0.00029747714084138227, "loss": 3.1178596019744873, "step": 3601, "token_acc": 0.28559688917692805 }, { "epoch": 2.1114042802697157, "grad_norm": 0.3731691525259236, "learning_rate": 0.00029747448498962555, "loss": 3.0865046977996826, "step": 3602, "token_acc": 0.29061540862508456 }, { "epoch": 2.111990618586925, "grad_norm": 0.3781002074743101, "learning_rate": 0.0002974718277525448, "loss": 3.1164581775665283, "step": 3603, "token_acc": 0.28721874263088204 }, { "epoch": 2.1125769569041335, "grad_norm": 0.3453019683483662, "learning_rate": 0.00029746916913016486, "loss": 3.070502519607544, "step": 3604, "token_acc": 0.29477374123645633 }, { "epoch": 2.1131632952213426, "grad_norm": 0.38588116351145907, "learning_rate": 0.0002974665091225109, "loss": 3.1674442291259766, "step": 3605, "token_acc": 0.28288178960057775 }, { "epoch": 2.1137496335385517, "grad_norm": 0.34391724128141, "learning_rate": 0.00029746384772960774, "loss": 3.1040329933166504, "step": 3606, "token_acc": 0.28940249147702085 }, { "epoch": 2.114335971855761, "grad_norm": 0.39691114757066587, "learning_rate": 0.00029746118495148046, "loss": 3.1164209842681885, "step": 3607, "token_acc": 0.2855855949407837 }, { "epoch": 2.11492231017297, "grad_norm": 0.3785613378593729, "learning_rate": 0.00029745852078815404, "loss": 3.1203668117523193, "step": 3608, "token_acc": 0.2866854267667885 }, { "epoch": 2.115508648490179, "grad_norm": 0.3535489550677346, "learning_rate": 0.00029745585523965353, "loss": 3.1209752559661865, "step": 3609, "token_acc": 0.2857183884038598 }, { "epoch": 2.1160949868073877, "grad_norm": 0.3497399312883876, "learning_rate": 0.0002974531883060039, "loss": 3.1599011421203613, "step": 3610, "token_acc": 0.2814807521906075 }, { "epoch": 2.116681325124597, "grad_norm": 0.3091547294333782, "learning_rate": 0.00029745051998723035, "loss": 3.1567039489746094, "step": 3611, "token_acc": 0.2806494305366563 }, { "epoch": 2.117267663441806, "grad_norm": 0.3351889044790218, "learning_rate": 0.00029744785028335783, "loss": 3.1179704666137695, "step": 3612, "token_acc": 0.2853116836098153 }, { "epoch": 2.117854001759015, "grad_norm": 0.31912916059871516, "learning_rate": 0.00029744517919441145, "loss": 3.087202548980713, "step": 3613, "token_acc": 0.29077147733439596 }, { "epoch": 2.118440340076224, "grad_norm": 0.3028282332306614, "learning_rate": 0.00029744250672041625, "loss": 3.143599033355713, "step": 3614, "token_acc": 0.28466444484905096 }, { "epoch": 2.119026678393433, "grad_norm": 0.31557314686677157, "learning_rate": 0.00029743983286139745, "loss": 3.0971221923828125, "step": 3615, "token_acc": 0.2904314588763716 }, { "epoch": 2.119613016710642, "grad_norm": 0.3553347787494989, "learning_rate": 0.00029743715761738004, "loss": 3.0950396060943604, "step": 3616, "token_acc": 0.28926570102945653 }, { "epoch": 2.120199355027851, "grad_norm": 0.40497298504576884, "learning_rate": 0.0002974344809883892, "loss": 3.1668670177459717, "step": 3617, "token_acc": 0.2792664213087144 }, { "epoch": 2.12078569334506, "grad_norm": 0.36363457777852265, "learning_rate": 0.00029743180297445013, "loss": 3.1283974647521973, "step": 3618, "token_acc": 0.2871843760344257 }, { "epoch": 2.1213720316622693, "grad_norm": 0.37108617335266614, "learning_rate": 0.00029742912357558796, "loss": 3.1360888481140137, "step": 3619, "token_acc": 0.28266875463179975 }, { "epoch": 2.121958369979478, "grad_norm": 0.3743372212156133, "learning_rate": 0.0002974264427918278, "loss": 3.1293435096740723, "step": 3620, "token_acc": 0.28594275382240364 }, { "epoch": 2.122544708296687, "grad_norm": 0.3686261478387375, "learning_rate": 0.00029742376062319486, "loss": 3.160668134689331, "step": 3621, "token_acc": 0.281427115837738 }, { "epoch": 2.123131046613896, "grad_norm": 0.34355449323084775, "learning_rate": 0.00029742107706971435, "loss": 3.103022575378418, "step": 3622, "token_acc": 0.288292641929655 }, { "epoch": 2.1237173849311053, "grad_norm": 0.3747066978294427, "learning_rate": 0.00029741839213141147, "loss": 3.133305072784424, "step": 3623, "token_acc": 0.2850711970230641 }, { "epoch": 2.1243037232483144, "grad_norm": 0.4094916292632341, "learning_rate": 0.0002974157058083114, "loss": 3.126133918762207, "step": 3624, "token_acc": 0.2851364600943327 }, { "epoch": 2.1248900615655235, "grad_norm": 0.3861210919909546, "learning_rate": 0.0002974130181004395, "loss": 3.1097095012664795, "step": 3625, "token_acc": 0.28936690374229995 }, { "epoch": 2.125476399882732, "grad_norm": 0.3866438357201555, "learning_rate": 0.0002974103290078209, "loss": 3.110128402709961, "step": 3626, "token_acc": 0.28882484051956037 }, { "epoch": 2.1260627381999413, "grad_norm": 0.4280062320951143, "learning_rate": 0.00029740763853048095, "loss": 3.067365884780884, "step": 3627, "token_acc": 0.29454783867124723 }, { "epoch": 2.1266490765171504, "grad_norm": 0.3756356268079855, "learning_rate": 0.0002974049466684448, "loss": 3.1206445693969727, "step": 3628, "token_acc": 0.2875451709790596 }, { "epoch": 2.1272354148343595, "grad_norm": 0.38587802126662996, "learning_rate": 0.00029740225342173786, "loss": 3.118781328201294, "step": 3629, "token_acc": 0.2865087603245281 }, { "epoch": 2.1278217531515686, "grad_norm": 0.3401795810505323, "learning_rate": 0.00029739955879038533, "loss": 3.142815113067627, "step": 3630, "token_acc": 0.2833537220434422 }, { "epoch": 2.1284080914687773, "grad_norm": 0.3824268935167012, "learning_rate": 0.0002973968627744125, "loss": 3.150843381881714, "step": 3631, "token_acc": 0.2818112990810359 }, { "epoch": 2.1289944297859864, "grad_norm": 0.36166420470483296, "learning_rate": 0.0002973941653738449, "loss": 3.124642848968506, "step": 3632, "token_acc": 0.2845077951918085 }, { "epoch": 2.1295807681031955, "grad_norm": 0.3768012167825204, "learning_rate": 0.0002973914665887077, "loss": 3.1119298934936523, "step": 3633, "token_acc": 0.2856994582552782 }, { "epoch": 2.1301671064204046, "grad_norm": 0.3178086023299626, "learning_rate": 0.00029738876641902627, "loss": 3.1113576889038086, "step": 3634, "token_acc": 0.28883917278016696 }, { "epoch": 2.1307534447376137, "grad_norm": 0.32007537882220766, "learning_rate": 0.000297386064864826, "loss": 3.1324214935302734, "step": 3635, "token_acc": 0.28540138509174856 }, { "epoch": 2.1313397830548224, "grad_norm": 0.35061623679753595, "learning_rate": 0.0002973833619261322, "loss": 3.1358771324157715, "step": 3636, "token_acc": 0.2850238470394126 }, { "epoch": 2.1319261213720315, "grad_norm": 0.3727979365920853, "learning_rate": 0.00029738065760297037, "loss": 3.1353282928466797, "step": 3637, "token_acc": 0.28409883168191324 }, { "epoch": 2.1325124596892406, "grad_norm": 0.33173411988510243, "learning_rate": 0.00029737795189536584, "loss": 3.1528213024139404, "step": 3638, "token_acc": 0.2827891809443663 }, { "epoch": 2.1330987980064497, "grad_norm": 0.34613418929036993, "learning_rate": 0.00029737524480334405, "loss": 3.145735740661621, "step": 3639, "token_acc": 0.2821236595445828 }, { "epoch": 2.133685136323659, "grad_norm": 0.37945271611362374, "learning_rate": 0.00029737253632693047, "loss": 3.177666187286377, "step": 3640, "token_acc": 0.2779716804872037 }, { "epoch": 2.134271474640868, "grad_norm": 0.38928774215867945, "learning_rate": 0.0002973698264661504, "loss": 3.143641948699951, "step": 3641, "token_acc": 0.28483405792664257 }, { "epoch": 2.1348578129580766, "grad_norm": 0.3525326574678476, "learning_rate": 0.0002973671152210295, "loss": 3.1402170658111572, "step": 3642, "token_acc": 0.2835931132172523 }, { "epoch": 2.1354441512752858, "grad_norm": 0.3690826914844778, "learning_rate": 0.00029736440259159303, "loss": 3.1585018634796143, "step": 3643, "token_acc": 0.2830827978849169 }, { "epoch": 2.136030489592495, "grad_norm": 0.4272356535361869, "learning_rate": 0.00029736168857786666, "loss": 3.161320209503174, "step": 3644, "token_acc": 0.2819222608975323 }, { "epoch": 2.136616827909704, "grad_norm": 0.37547532349016904, "learning_rate": 0.0002973589731798757, "loss": 3.1558077335357666, "step": 3645, "token_acc": 0.2822075186581045 }, { "epoch": 2.137203166226913, "grad_norm": 0.3848749721475837, "learning_rate": 0.0002973562563976459, "loss": 3.0737271308898926, "step": 3646, "token_acc": 0.2905421434375962 }, { "epoch": 2.1377895045441218, "grad_norm": 0.37523437087585715, "learning_rate": 0.00029735353823120254, "loss": 3.1658897399902344, "step": 3647, "token_acc": 0.28095983111601663 }, { "epoch": 2.138375842861331, "grad_norm": 0.3591564976510279, "learning_rate": 0.00029735081868057124, "loss": 3.091909646987915, "step": 3648, "token_acc": 0.2905892443292761 }, { "epoch": 2.13896218117854, "grad_norm": 0.39226765137752523, "learning_rate": 0.0002973480977457776, "loss": 3.172088384628296, "step": 3649, "token_acc": 0.2801198168695981 }, { "epoch": 2.139548519495749, "grad_norm": 0.38308088062879925, "learning_rate": 0.00029734537542684713, "loss": 3.1077122688293457, "step": 3650, "token_acc": 0.2891672103524625 }, { "epoch": 2.140134857812958, "grad_norm": 0.3679522544072703, "learning_rate": 0.0002973426517238054, "loss": 3.128175735473633, "step": 3651, "token_acc": 0.28663294493753416 }, { "epoch": 2.1407211961301673, "grad_norm": 0.3779372267794978, "learning_rate": 0.00029733992663667796, "loss": 3.1356520652770996, "step": 3652, "token_acc": 0.28427416172532266 }, { "epoch": 2.141307534447376, "grad_norm": 0.37868653216076603, "learning_rate": 0.0002973372001654905, "loss": 3.191227674484253, "step": 3653, "token_acc": 0.2760653738584097 }, { "epoch": 2.141893872764585, "grad_norm": 0.3907757891161318, "learning_rate": 0.0002973344723102686, "loss": 3.1168456077575684, "step": 3654, "token_acc": 0.2876745402599493 }, { "epoch": 2.142480211081794, "grad_norm": 0.35936585638240326, "learning_rate": 0.0002973317430710378, "loss": 3.1460936069488525, "step": 3655, "token_acc": 0.2831868843245196 }, { "epoch": 2.1430665493990033, "grad_norm": 0.4348895267124142, "learning_rate": 0.00029732901244782384, "loss": 3.0864439010620117, "step": 3656, "token_acc": 0.2912484606750144 }, { "epoch": 2.1436528877162124, "grad_norm": 0.41434188903484775, "learning_rate": 0.00029732628044065235, "loss": 3.0974295139312744, "step": 3657, "token_acc": 0.29001516807623834 }, { "epoch": 2.144239226033421, "grad_norm": 0.35540701313535933, "learning_rate": 0.000297323547049549, "loss": 3.1467647552490234, "step": 3658, "token_acc": 0.28208001242107444 }, { "epoch": 2.14482556435063, "grad_norm": 0.39805944269643206, "learning_rate": 0.0002973208122745394, "loss": 3.126558780670166, "step": 3659, "token_acc": 0.2856025520697101 }, { "epoch": 2.1454119026678393, "grad_norm": 0.39415621137394274, "learning_rate": 0.00029731807611564935, "loss": 3.1468863487243652, "step": 3660, "token_acc": 0.2817636849664858 }, { "epoch": 2.1459982409850484, "grad_norm": 0.39463116534085596, "learning_rate": 0.0002973153385729044, "loss": 3.1094295978546143, "step": 3661, "token_acc": 0.28611949157410166 }, { "epoch": 2.1465845793022575, "grad_norm": 0.4082472157834538, "learning_rate": 0.0002973125996463304, "loss": 3.1418848037719727, "step": 3662, "token_acc": 0.28449804429430414 }, { "epoch": 2.1471709176194667, "grad_norm": 0.39048125810586015, "learning_rate": 0.0002973098593359531, "loss": 3.158926486968994, "step": 3663, "token_acc": 0.28208582177905234 }, { "epoch": 2.1477572559366753, "grad_norm": 0.3622529469682711, "learning_rate": 0.00029730711764179807, "loss": 3.102562427520752, "step": 3664, "token_acc": 0.28952291489209264 }, { "epoch": 2.1483435942538844, "grad_norm": 0.36877861549427204, "learning_rate": 0.0002973043745638912, "loss": 3.1528053283691406, "step": 3665, "token_acc": 0.2829999352625105 }, { "epoch": 2.1489299325710935, "grad_norm": 0.40602504675755247, "learning_rate": 0.00029730163010225827, "loss": 3.09246826171875, "step": 3666, "token_acc": 0.2910243568448947 }, { "epoch": 2.1495162708883027, "grad_norm": 0.3625122608283235, "learning_rate": 0.00029729888425692494, "loss": 3.1218814849853516, "step": 3667, "token_acc": 0.28450962705713845 }, { "epoch": 2.1501026092055118, "grad_norm": 0.37925539397051744, "learning_rate": 0.0002972961370279171, "loss": 3.1087779998779297, "step": 3668, "token_acc": 0.2884590130543964 }, { "epoch": 2.1506889475227204, "grad_norm": 0.35095677442654594, "learning_rate": 0.0002972933884152606, "loss": 3.1027166843414307, "step": 3669, "token_acc": 0.28937091183765745 }, { "epoch": 2.1512752858399296, "grad_norm": 0.3144874769407481, "learning_rate": 0.00029729063841898117, "loss": 3.1827893257141113, "step": 3670, "token_acc": 0.2768148129018929 }, { "epoch": 2.1518616241571387, "grad_norm": 0.34357852056190324, "learning_rate": 0.00029728788703910463, "loss": 3.1004798412323, "step": 3671, "token_acc": 0.287819975094957 }, { "epoch": 2.1524479624743478, "grad_norm": 0.35489894241709186, "learning_rate": 0.0002972851342756569, "loss": 3.13132905960083, "step": 3672, "token_acc": 0.2841390413623201 }, { "epoch": 2.153034300791557, "grad_norm": 0.2890847081218417, "learning_rate": 0.0002972823801286638, "loss": 3.118220567703247, "step": 3673, "token_acc": 0.28704080214536826 }, { "epoch": 2.1536206391087656, "grad_norm": 0.3737304874616641, "learning_rate": 0.00029727962459815115, "loss": 3.1819117069244385, "step": 3674, "token_acc": 0.27841109620950094 }, { "epoch": 2.1542069774259747, "grad_norm": 0.34913409484649843, "learning_rate": 0.00029727686768414493, "loss": 3.0963265895843506, "step": 3675, "token_acc": 0.2915285936807653 }, { "epoch": 2.154793315743184, "grad_norm": 0.32281029421913626, "learning_rate": 0.000297274109386671, "loss": 3.119079113006592, "step": 3676, "token_acc": 0.2857142857142857 }, { "epoch": 2.155379654060393, "grad_norm": 0.3959722050397268, "learning_rate": 0.00029727134970575523, "loss": 3.1270384788513184, "step": 3677, "token_acc": 0.2850111632473174 }, { "epoch": 2.155965992377602, "grad_norm": 0.31602764833827146, "learning_rate": 0.00029726858864142364, "loss": 3.079014778137207, "step": 3678, "token_acc": 0.29039350058180613 }, { "epoch": 2.1565523306948107, "grad_norm": 0.32875718990960107, "learning_rate": 0.0002972658261937021, "loss": 3.1619462966918945, "step": 3679, "token_acc": 0.2820438783781636 }, { "epoch": 2.15713866901202, "grad_norm": 0.37718074180093003, "learning_rate": 0.0002972630623626165, "loss": 3.139472246170044, "step": 3680, "token_acc": 0.2851135743655937 }, { "epoch": 2.157725007329229, "grad_norm": 0.37040363958150374, "learning_rate": 0.0002972602971481929, "loss": 3.1133675575256348, "step": 3681, "token_acc": 0.28805701061758837 }, { "epoch": 2.158311345646438, "grad_norm": 0.33948286677427697, "learning_rate": 0.0002972575305504573, "loss": 3.1386313438415527, "step": 3682, "token_acc": 0.28480566666928625 }, { "epoch": 2.158897683963647, "grad_norm": 0.3612626653013946, "learning_rate": 0.00029725476256943555, "loss": 3.210702419281006, "step": 3683, "token_acc": 0.2749471183246283 }, { "epoch": 2.1594840222808562, "grad_norm": 0.3903099635280311, "learning_rate": 0.0002972519932051538, "loss": 3.138247013092041, "step": 3684, "token_acc": 0.28619558704496 }, { "epoch": 2.160070360598065, "grad_norm": 0.3615692010727741, "learning_rate": 0.000297249222457638, "loss": 3.1362054347991943, "step": 3685, "token_acc": 0.28323117856242247 }, { "epoch": 2.160656698915274, "grad_norm": 0.3606492339244674, "learning_rate": 0.00029724645032691414, "loss": 3.114980697631836, "step": 3686, "token_acc": 0.2869639012099697 }, { "epoch": 2.161243037232483, "grad_norm": 0.4016473151811356, "learning_rate": 0.0002972436768130083, "loss": 3.1188037395477295, "step": 3687, "token_acc": 0.2857533526442812 }, { "epoch": 2.1618293755496922, "grad_norm": 0.413287236823868, "learning_rate": 0.00029724090191594654, "loss": 3.1648106575012207, "step": 3688, "token_acc": 0.281669070898114 }, { "epoch": 2.1624157138669013, "grad_norm": 0.3948354021205157, "learning_rate": 0.0002972381256357549, "loss": 3.132230043411255, "step": 3689, "token_acc": 0.2838120414496001 }, { "epoch": 2.16300205218411, "grad_norm": 0.345112297981206, "learning_rate": 0.0002972353479724595, "loss": 3.0848844051361084, "step": 3690, "token_acc": 0.29048484880241876 }, { "epoch": 2.163588390501319, "grad_norm": 0.351916771334899, "learning_rate": 0.0002972325689260864, "loss": 3.125626564025879, "step": 3691, "token_acc": 0.2874450808969182 }, { "epoch": 2.1641747288185282, "grad_norm": 0.3164030938334927, "learning_rate": 0.0002972297884966617, "loss": 3.1505215167999268, "step": 3692, "token_acc": 0.28120564348299376 }, { "epoch": 2.1647610671357373, "grad_norm": 0.34315390849571514, "learning_rate": 0.0002972270066842115, "loss": 3.128664970397949, "step": 3693, "token_acc": 0.2848959903527284 }, { "epoch": 2.1653474054529465, "grad_norm": 0.3449984384364031, "learning_rate": 0.000297224223488762, "loss": 3.1447925567626953, "step": 3694, "token_acc": 0.2818267651357399 }, { "epoch": 2.1659337437701556, "grad_norm": 0.3681370129097049, "learning_rate": 0.00029722143891033935, "loss": 3.1524910926818848, "step": 3695, "token_acc": 0.28267057795403555 }, { "epoch": 2.1665200820873642, "grad_norm": 0.3977140577950616, "learning_rate": 0.0002972186529489696, "loss": 3.123647689819336, "step": 3696, "token_acc": 0.28472184559919217 }, { "epoch": 2.1671064204045734, "grad_norm": 0.33435729189688934, "learning_rate": 0.000297215865604679, "loss": 3.1324071884155273, "step": 3697, "token_acc": 0.28364051370177484 }, { "epoch": 2.1676927587217825, "grad_norm": 0.3519433101085958, "learning_rate": 0.00029721307687749374, "loss": 3.1004347801208496, "step": 3698, "token_acc": 0.2887127786727641 }, { "epoch": 2.1682790970389916, "grad_norm": 0.31005432810732253, "learning_rate": 0.00029721028676744, "loss": 3.0965301990509033, "step": 3699, "token_acc": 0.29104107676296315 }, { "epoch": 2.1688654353562007, "grad_norm": 0.37171401958548694, "learning_rate": 0.000297207495274544, "loss": 3.158102512359619, "step": 3700, "token_acc": 0.28180967567638804 }, { "epoch": 2.1694517736734094, "grad_norm": 0.38371287955406946, "learning_rate": 0.0002972047023988319, "loss": 3.093679428100586, "step": 3701, "token_acc": 0.29066419851491376 }, { "epoch": 2.1700381119906185, "grad_norm": 0.33021700239126456, "learning_rate": 0.00029720190814032995, "loss": 3.080838203430176, "step": 3702, "token_acc": 0.29000745712155107 }, { "epoch": 2.1706244503078276, "grad_norm": 0.3212098235047024, "learning_rate": 0.0002971991124990645, "loss": 3.1033544540405273, "step": 3703, "token_acc": 0.2890720926153306 }, { "epoch": 2.1712107886250367, "grad_norm": 0.37538442273914396, "learning_rate": 0.0002971963154750617, "loss": 3.1580235958099365, "step": 3704, "token_acc": 0.28001626477339 }, { "epoch": 2.171797126942246, "grad_norm": 0.40590662521371323, "learning_rate": 0.0002971935170683479, "loss": 3.148336887359619, "step": 3705, "token_acc": 0.2819193430493214 }, { "epoch": 2.172383465259455, "grad_norm": 0.33690911595167705, "learning_rate": 0.0002971907172789493, "loss": 3.1577134132385254, "step": 3706, "token_acc": 0.2818503103766038 }, { "epoch": 2.1729698035766636, "grad_norm": 0.3460556106496662, "learning_rate": 0.0002971879161068923, "loss": 3.108748435974121, "step": 3707, "token_acc": 0.2883463248083489 }, { "epoch": 2.1735561418938727, "grad_norm": 0.3864134249865432, "learning_rate": 0.00029718511355220317, "loss": 3.130164623260498, "step": 3708, "token_acc": 0.2834747295830476 }, { "epoch": 2.174142480211082, "grad_norm": 0.32881384185865764, "learning_rate": 0.0002971823096149082, "loss": 3.134215831756592, "step": 3709, "token_acc": 0.2856028797339242 }, { "epoch": 2.174728818528291, "grad_norm": 0.3351313090045613, "learning_rate": 0.0002971795042950338, "loss": 3.1081809997558594, "step": 3710, "token_acc": 0.2869916354196712 }, { "epoch": 2.1753151568455, "grad_norm": 0.3549544996527675, "learning_rate": 0.00029717669759260625, "loss": 3.1474461555480957, "step": 3711, "token_acc": 0.2831281243364676 }, { "epoch": 2.1759014951627087, "grad_norm": 0.35026042871518703, "learning_rate": 0.00029717388950765197, "loss": 3.177511215209961, "step": 3712, "token_acc": 0.2781876613673855 }, { "epoch": 2.176487833479918, "grad_norm": 0.2974110737850865, "learning_rate": 0.0002971710800401973, "loss": 3.1171035766601562, "step": 3713, "token_acc": 0.28907784707193074 }, { "epoch": 2.177074171797127, "grad_norm": 0.3738621478159767, "learning_rate": 0.0002971682691902687, "loss": 3.1230955123901367, "step": 3714, "token_acc": 0.2877539375745363 }, { "epoch": 2.177660510114336, "grad_norm": 0.3528096398357954, "learning_rate": 0.00029716545695789243, "loss": 3.124410629272461, "step": 3715, "token_acc": 0.2841609858682732 }, { "epoch": 2.178246848431545, "grad_norm": 0.33569778548569945, "learning_rate": 0.00029716264334309506, "loss": 3.1467747688293457, "step": 3716, "token_acc": 0.28410485850885125 }, { "epoch": 2.1788331867487543, "grad_norm": 0.38710390616394685, "learning_rate": 0.00029715982834590296, "loss": 3.132063865661621, "step": 3717, "token_acc": 0.28413639871708934 }, { "epoch": 2.179419525065963, "grad_norm": 0.3089674366816096, "learning_rate": 0.00029715701196634256, "loss": 3.0990710258483887, "step": 3718, "token_acc": 0.28976225630193236 }, { "epoch": 2.180005863383172, "grad_norm": 0.3150721522407789, "learning_rate": 0.00029715419420444034, "loss": 3.114352226257324, "step": 3719, "token_acc": 0.289362573652204 }, { "epoch": 2.180592201700381, "grad_norm": 0.34724289088430854, "learning_rate": 0.00029715137506022274, "loss": 3.130066394805908, "step": 3720, "token_acc": 0.2841681618624524 }, { "epoch": 2.1811785400175903, "grad_norm": 0.38270116759199097, "learning_rate": 0.00029714855453371626, "loss": 3.123567581176758, "step": 3721, "token_acc": 0.28573445350237364 }, { "epoch": 2.1817648783347994, "grad_norm": 0.3628778286849395, "learning_rate": 0.00029714573262494743, "loss": 3.157637357711792, "step": 3722, "token_acc": 0.28230468956058447 }, { "epoch": 2.182351216652008, "grad_norm": 0.3282955382901595, "learning_rate": 0.0002971429093339427, "loss": 3.196810245513916, "step": 3723, "token_acc": 0.2766472221389707 }, { "epoch": 2.182937554969217, "grad_norm": 0.31388366259867345, "learning_rate": 0.0002971400846607286, "loss": 3.1468496322631836, "step": 3724, "token_acc": 0.28312359756977706 }, { "epoch": 2.1835238932864263, "grad_norm": 0.3296758979553433, "learning_rate": 0.0002971372586053317, "loss": 3.12540602684021, "step": 3725, "token_acc": 0.28753877945785394 }, { "epoch": 2.1841102316036354, "grad_norm": 0.33613772749076, "learning_rate": 0.0002971344311677785, "loss": 3.158262252807617, "step": 3726, "token_acc": 0.2813853039676992 }, { "epoch": 2.1846965699208445, "grad_norm": 0.30841193010965795, "learning_rate": 0.0002971316023480956, "loss": 3.153303623199463, "step": 3727, "token_acc": 0.28097400329324884 }, { "epoch": 2.185282908238053, "grad_norm": 0.38920712240591765, "learning_rate": 0.0002971287721463096, "loss": 3.1404852867126465, "step": 3728, "token_acc": 0.2840865204062336 }, { "epoch": 2.1858692465552623, "grad_norm": 0.3116747108469472, "learning_rate": 0.00029712594056244696, "loss": 3.1646945476531982, "step": 3729, "token_acc": 0.2791000745444068 }, { "epoch": 2.1864555848724714, "grad_norm": 0.3579300577012172, "learning_rate": 0.0002971231075965345, "loss": 3.145068407058716, "step": 3730, "token_acc": 0.2828042334800524 }, { "epoch": 2.1870419231896805, "grad_norm": 0.31820979443196135, "learning_rate": 0.00029712027324859855, "loss": 3.1453914642333984, "step": 3731, "token_acc": 0.28512502870138534 }, { "epoch": 2.1876282615068896, "grad_norm": 0.3595393785855766, "learning_rate": 0.00029711743751866594, "loss": 3.1465158462524414, "step": 3732, "token_acc": 0.2823496005464187 }, { "epoch": 2.1882145998240983, "grad_norm": 0.30877011368973845, "learning_rate": 0.0002971146004067632, "loss": 3.129978656768799, "step": 3733, "token_acc": 0.2860482610666876 }, { "epoch": 2.1888009381413074, "grad_norm": 0.33223886640453354, "learning_rate": 0.0002971117619129171, "loss": 3.126527786254883, "step": 3734, "token_acc": 0.28645519237727657 }, { "epoch": 2.1893872764585165, "grad_norm": 0.3501422888772264, "learning_rate": 0.00029710892203715423, "loss": 3.1880886554718018, "step": 3735, "token_acc": 0.27684443280823495 }, { "epoch": 2.1899736147757256, "grad_norm": 0.3848066531155169, "learning_rate": 0.0002971060807795012, "loss": 3.159013271331787, "step": 3736, "token_acc": 0.2830311398762082 }, { "epoch": 2.1905599530929347, "grad_norm": 0.3941500679060397, "learning_rate": 0.00029710323813998484, "loss": 3.1417603492736816, "step": 3737, "token_acc": 0.2833038656584666 }, { "epoch": 2.191146291410144, "grad_norm": 0.4005480122039298, "learning_rate": 0.00029710039411863173, "loss": 3.1206064224243164, "step": 3738, "token_acc": 0.28731070052743846 }, { "epoch": 2.1917326297273525, "grad_norm": 0.35424637769856315, "learning_rate": 0.00029709754871546864, "loss": 3.186551332473755, "step": 3739, "token_acc": 0.2771676199164883 }, { "epoch": 2.1923189680445616, "grad_norm": 0.38891189503844975, "learning_rate": 0.00029709470193052236, "loss": 3.1638402938842773, "step": 3740, "token_acc": 0.2814092730961453 }, { "epoch": 2.1929053063617707, "grad_norm": 0.3017733795871617, "learning_rate": 0.0002970918537638195, "loss": 3.1486992835998535, "step": 3741, "token_acc": 0.2837440336712176 }, { "epoch": 2.19349164467898, "grad_norm": 0.34582891863757653, "learning_rate": 0.00029708900421538694, "loss": 3.141735553741455, "step": 3742, "token_acc": 0.2828959128523811 }, { "epoch": 2.194077982996189, "grad_norm": 0.3822139579348903, "learning_rate": 0.0002970861532852513, "loss": 3.1588408946990967, "step": 3743, "token_acc": 0.281739848305041 }, { "epoch": 2.1946643213133976, "grad_norm": 0.39104487183229963, "learning_rate": 0.00029708330097343955, "loss": 3.1367104053497314, "step": 3744, "token_acc": 0.28399802400729285 }, { "epoch": 2.1952506596306067, "grad_norm": 0.4008412132518758, "learning_rate": 0.0002970804472799784, "loss": 3.153278112411499, "step": 3745, "token_acc": 0.28172001340651054 }, { "epoch": 2.195836997947816, "grad_norm": 0.3837046494631987, "learning_rate": 0.0002970775922048945, "loss": 3.176415205001831, "step": 3746, "token_acc": 0.27976056482144684 }, { "epoch": 2.196423336265025, "grad_norm": 0.3167368828147193, "learning_rate": 0.0002970747357482149, "loss": 3.163569927215576, "step": 3747, "token_acc": 0.28121139452074706 }, { "epoch": 2.197009674582234, "grad_norm": 0.33338572636707026, "learning_rate": 0.00029707187790996634, "loss": 3.173992156982422, "step": 3748, "token_acc": 0.278925153425765 }, { "epoch": 2.197596012899443, "grad_norm": 0.33325046619417104, "learning_rate": 0.0002970690186901757, "loss": 3.1533126831054688, "step": 3749, "token_acc": 0.28241743548602755 }, { "epoch": 2.198182351216652, "grad_norm": 0.38337292217801866, "learning_rate": 0.00029706615808886976, "loss": 3.1665897369384766, "step": 3750, "token_acc": 0.2806721886204581 }, { "epoch": 2.198768689533861, "grad_norm": 0.4057576269254285, "learning_rate": 0.00029706329610607545, "loss": 3.1035351753234863, "step": 3751, "token_acc": 0.28971972532179735 }, { "epoch": 2.19935502785107, "grad_norm": 0.3286326306188834, "learning_rate": 0.0002970604327418196, "loss": 3.0740952491760254, "step": 3752, "token_acc": 0.29236206818718946 }, { "epoch": 2.199941366168279, "grad_norm": 0.4137232096991444, "learning_rate": 0.00029705756799612923, "loss": 3.168051242828369, "step": 3753, "token_acc": 0.2792734466324971 }, { "epoch": 2.2005277044854883, "grad_norm": 0.34514452079048275, "learning_rate": 0.00029705470186903115, "loss": 3.163088321685791, "step": 3754, "token_acc": 0.28117876248807927 }, { "epoch": 2.201114042802697, "grad_norm": 0.35387527189584134, "learning_rate": 0.0002970518343605523, "loss": 3.0887796878814697, "step": 3755, "token_acc": 0.29122049259795857 }, { "epoch": 2.201700381119906, "grad_norm": 0.3308009359952547, "learning_rate": 0.0002970489654707196, "loss": 3.1101760864257812, "step": 3756, "token_acc": 0.28565986061084 }, { "epoch": 2.202286719437115, "grad_norm": 0.38555611921801497, "learning_rate": 0.00029704609519956, "loss": 3.135230541229248, "step": 3757, "token_acc": 0.28450695623185235 }, { "epoch": 2.2028730577543243, "grad_norm": 0.3634144851836268, "learning_rate": 0.00029704322354710057, "loss": 3.1189966201782227, "step": 3758, "token_acc": 0.2863978334352279 }, { "epoch": 2.2034593960715334, "grad_norm": 0.3156287722560124, "learning_rate": 0.00029704035051336816, "loss": 3.1024208068847656, "step": 3759, "token_acc": 0.2903743518127591 }, { "epoch": 2.2040457343887425, "grad_norm": 0.3211094660933389, "learning_rate": 0.00029703747609838977, "loss": 3.126427412033081, "step": 3760, "token_acc": 0.28755061275171406 }, { "epoch": 2.204632072705951, "grad_norm": 0.3222561464711346, "learning_rate": 0.0002970346003021924, "loss": 3.099661350250244, "step": 3761, "token_acc": 0.2903913637459484 }, { "epoch": 2.2052184110231603, "grad_norm": 0.3678023808414285, "learning_rate": 0.00029703172312480316, "loss": 3.1137800216674805, "step": 3762, "token_acc": 0.2870190790378748 }, { "epoch": 2.2058047493403694, "grad_norm": 0.3787740942796549, "learning_rate": 0.000297028844566249, "loss": 3.1343183517456055, "step": 3763, "token_acc": 0.28731045490822027 }, { "epoch": 2.2063910876575785, "grad_norm": 0.34356797269212563, "learning_rate": 0.00029702596462655693, "loss": 3.1562204360961914, "step": 3764, "token_acc": 0.2820999463910506 }, { "epoch": 2.2069774259747876, "grad_norm": 0.356217598313379, "learning_rate": 0.0002970230833057541, "loss": 3.1309165954589844, "step": 3765, "token_acc": 0.2846722137717118 }, { "epoch": 2.2075637642919963, "grad_norm": 0.37165901593333567, "learning_rate": 0.00029702020060386745, "loss": 3.1627721786499023, "step": 3766, "token_acc": 0.2815375217079306 }, { "epoch": 2.2081501026092054, "grad_norm": 0.4192899048015179, "learning_rate": 0.00029701731652092417, "loss": 3.1466424465179443, "step": 3767, "token_acc": 0.28414412384737936 }, { "epoch": 2.2087364409264145, "grad_norm": 0.411215745526209, "learning_rate": 0.00029701443105695127, "loss": 3.1085567474365234, "step": 3768, "token_acc": 0.2883136988770717 }, { "epoch": 2.2093227792436236, "grad_norm": 0.319641749025169, "learning_rate": 0.0002970115442119759, "loss": 3.118044376373291, "step": 3769, "token_acc": 0.2889292667760777 }, { "epoch": 2.2099091175608327, "grad_norm": 0.3427863814687333, "learning_rate": 0.00029700865598602524, "loss": 3.1661291122436523, "step": 3770, "token_acc": 0.28011693457630843 }, { "epoch": 2.210495455878042, "grad_norm": 0.43331515616171407, "learning_rate": 0.0002970057663791263, "loss": 3.1126625537872314, "step": 3771, "token_acc": 0.28659843581516276 }, { "epoch": 2.2110817941952505, "grad_norm": 0.37408488330845346, "learning_rate": 0.0002970028753913063, "loss": 3.1556777954101562, "step": 3772, "token_acc": 0.2819533670570152 }, { "epoch": 2.2116681325124596, "grad_norm": 0.3325957105824189, "learning_rate": 0.0002969999830225923, "loss": 3.136723518371582, "step": 3773, "token_acc": 0.28456658061697887 }, { "epoch": 2.2122544708296688, "grad_norm": 0.34109516243354243, "learning_rate": 0.00029699708927301163, "loss": 3.1388840675354004, "step": 3774, "token_acc": 0.2860615560367158 }, { "epoch": 2.212840809146878, "grad_norm": 0.3702297725464668, "learning_rate": 0.00029699419414259135, "loss": 3.139317512512207, "step": 3775, "token_acc": 0.2832988599116062 }, { "epoch": 2.213427147464087, "grad_norm": 0.3508604838928925, "learning_rate": 0.0002969912976313587, "loss": 3.1461031436920166, "step": 3776, "token_acc": 0.2828179572365619 }, { "epoch": 2.2140134857812956, "grad_norm": 0.34468504295810776, "learning_rate": 0.0002969883997393409, "loss": 3.154512882232666, "step": 3777, "token_acc": 0.2808072670349814 }, { "epoch": 2.2145998240985048, "grad_norm": 0.352324993808345, "learning_rate": 0.00029698550046656515, "loss": 3.1246447563171387, "step": 3778, "token_acc": 0.2866449924106317 }, { "epoch": 2.215186162415714, "grad_norm": 0.3460664851058068, "learning_rate": 0.0002969825998130587, "loss": 3.1506452560424805, "step": 3779, "token_acc": 0.2831327539801774 }, { "epoch": 2.215772500732923, "grad_norm": 0.30803323480517325, "learning_rate": 0.00029697969777884876, "loss": 3.1673409938812256, "step": 3780, "token_acc": 0.27995166361150653 }, { "epoch": 2.216358839050132, "grad_norm": 0.32156367067864855, "learning_rate": 0.00029697679436396264, "loss": 3.164299964904785, "step": 3781, "token_acc": 0.27988870620968764 }, { "epoch": 2.2169451773673408, "grad_norm": 0.3179234427896199, "learning_rate": 0.00029697388956842756, "loss": 3.100285053253174, "step": 3782, "token_acc": 0.287210271802568 }, { "epoch": 2.21753151568455, "grad_norm": 0.32121304989483795, "learning_rate": 0.00029697098339227085, "loss": 3.1462411880493164, "step": 3783, "token_acc": 0.2836473048724547 }, { "epoch": 2.218117854001759, "grad_norm": 0.2971499383749068, "learning_rate": 0.00029696807583551977, "loss": 3.0867717266082764, "step": 3784, "token_acc": 0.2906005395420537 }, { "epoch": 2.218704192318968, "grad_norm": 0.30428698891306205, "learning_rate": 0.0002969651668982017, "loss": 3.116027355194092, "step": 3785, "token_acc": 0.2875592401926344 }, { "epoch": 2.219290530636177, "grad_norm": 0.3437803202371795, "learning_rate": 0.0002969622565803439, "loss": 3.1645591259002686, "step": 3786, "token_acc": 0.2798665201600026 }, { "epoch": 2.219876868953386, "grad_norm": 0.35301056642093914, "learning_rate": 0.00029695934488197374, "loss": 3.1671109199523926, "step": 3787, "token_acc": 0.2778507970332579 }, { "epoch": 2.220463207270595, "grad_norm": 0.3385506536449666, "learning_rate": 0.0002969564318031186, "loss": 3.124004364013672, "step": 3788, "token_acc": 0.28593985334016603 }, { "epoch": 2.221049545587804, "grad_norm": 0.35997547002333613, "learning_rate": 0.0002969535173438058, "loss": 3.0796189308166504, "step": 3789, "token_acc": 0.29089671957395025 }, { "epoch": 2.221635883905013, "grad_norm": 0.3529830675098698, "learning_rate": 0.0002969506015040627, "loss": 3.1322474479675293, "step": 3790, "token_acc": 0.28471278675898276 }, { "epoch": 2.2222222222222223, "grad_norm": 0.3258650669055917, "learning_rate": 0.0002969476842839167, "loss": 3.131068468093872, "step": 3791, "token_acc": 0.286188213054302 }, { "epoch": 2.2228085605394314, "grad_norm": 0.3515607586600924, "learning_rate": 0.0002969447656833952, "loss": 3.135874032974243, "step": 3792, "token_acc": 0.2841311746898877 }, { "epoch": 2.22339489885664, "grad_norm": 0.3731176472732044, "learning_rate": 0.00029694184570252575, "loss": 3.113550901412964, "step": 3793, "token_acc": 0.28656675777316537 }, { "epoch": 2.223981237173849, "grad_norm": 0.36658313486687794, "learning_rate": 0.0002969389243413356, "loss": 3.10867977142334, "step": 3794, "token_acc": 0.28809046104002667 }, { "epoch": 2.2245675754910583, "grad_norm": 0.33458899675408305, "learning_rate": 0.0002969360015998522, "loss": 3.146430492401123, "step": 3795, "token_acc": 0.2833598571594923 }, { "epoch": 2.2251539138082674, "grad_norm": 0.3363372338360508, "learning_rate": 0.00029693307747810313, "loss": 3.1588902473449707, "step": 3796, "token_acc": 0.2818576332664033 }, { "epoch": 2.2257402521254765, "grad_norm": 0.3807862388558553, "learning_rate": 0.0002969301519761158, "loss": 3.166027069091797, "step": 3797, "token_acc": 0.2793695011620186 }, { "epoch": 2.226326590442685, "grad_norm": 0.36408872665974407, "learning_rate": 0.0002969272250939177, "loss": 3.179474115371704, "step": 3798, "token_acc": 0.27683282595988795 }, { "epoch": 2.2269129287598943, "grad_norm": 0.35326380619169234, "learning_rate": 0.00029692429683153624, "loss": 3.157876968383789, "step": 3799, "token_acc": 0.28264938844128135 }, { "epoch": 2.2274992670771034, "grad_norm": 0.312563084899839, "learning_rate": 0.000296921367188999, "loss": 3.124584197998047, "step": 3800, "token_acc": 0.28520868113522535 }, { "epoch": 2.2280856053943126, "grad_norm": 0.368973298726103, "learning_rate": 0.00029691843616633354, "loss": 3.0984749794006348, "step": 3801, "token_acc": 0.28978142728862916 }, { "epoch": 2.2286719437115217, "grad_norm": 0.33291453259596576, "learning_rate": 0.00029691550376356735, "loss": 3.155500650405884, "step": 3802, "token_acc": 0.2828012883392936 }, { "epoch": 2.2292582820287308, "grad_norm": 0.31029813734226924, "learning_rate": 0.00029691256998072796, "loss": 3.1208577156066895, "step": 3803, "token_acc": 0.2851888616059767 }, { "epoch": 2.2298446203459394, "grad_norm": 0.35265120750819473, "learning_rate": 0.0002969096348178429, "loss": 3.13283109664917, "step": 3804, "token_acc": 0.28393237962869416 }, { "epoch": 2.2304309586631486, "grad_norm": 0.32799916943871077, "learning_rate": 0.0002969066982749398, "loss": 3.1414284706115723, "step": 3805, "token_acc": 0.2848699324198741 }, { "epoch": 2.2310172969803577, "grad_norm": 0.3497467205532327, "learning_rate": 0.00029690376035204624, "loss": 3.1115567684173584, "step": 3806, "token_acc": 0.28824159682482564 }, { "epoch": 2.231603635297567, "grad_norm": 0.41641875697546643, "learning_rate": 0.00029690082104918985, "loss": 3.1981654167175293, "step": 3807, "token_acc": 0.27552068616758607 }, { "epoch": 2.232189973614776, "grad_norm": 0.38435080507944286, "learning_rate": 0.0002968978803663981, "loss": 3.1274185180664062, "step": 3808, "token_acc": 0.28408991839290293 }, { "epoch": 2.2327763119319846, "grad_norm": 0.39330360153440047, "learning_rate": 0.0002968949383036988, "loss": 3.135956287384033, "step": 3809, "token_acc": 0.2835522588224286 }, { "epoch": 2.2333626502491937, "grad_norm": 0.3521579260985205, "learning_rate": 0.0002968919948611195, "loss": 3.1432793140411377, "step": 3810, "token_acc": 0.28275794996346354 }, { "epoch": 2.233948988566403, "grad_norm": 0.36667814471807175, "learning_rate": 0.0002968890500386878, "loss": 3.1187589168548584, "step": 3811, "token_acc": 0.28658694876204693 }, { "epoch": 2.234535326883612, "grad_norm": 0.3767321266451095, "learning_rate": 0.0002968861038364315, "loss": 3.109773635864258, "step": 3812, "token_acc": 0.28770082354529497 }, { "epoch": 2.235121665200821, "grad_norm": 0.3500032225913167, "learning_rate": 0.0002968831562543781, "loss": 3.176600217819214, "step": 3813, "token_acc": 0.2793028692661023 }, { "epoch": 2.23570800351803, "grad_norm": 0.3795521237061469, "learning_rate": 0.00029688020729255537, "loss": 3.1243607997894287, "step": 3814, "token_acc": 0.2853834461207068 }, { "epoch": 2.236294341835239, "grad_norm": 0.3417693445955888, "learning_rate": 0.00029687725695099105, "loss": 3.1730802059173584, "step": 3815, "token_acc": 0.2785408819281203 }, { "epoch": 2.236880680152448, "grad_norm": 0.34060567431239436, "learning_rate": 0.0002968743052297128, "loss": 3.100273609161377, "step": 3816, "token_acc": 0.28810480064097005 }, { "epoch": 2.237467018469657, "grad_norm": 0.33395715237760143, "learning_rate": 0.00029687135212874834, "loss": 3.0997109413146973, "step": 3817, "token_acc": 0.29117475916467317 }, { "epoch": 2.238053356786866, "grad_norm": 0.29554968053902053, "learning_rate": 0.00029686839764812546, "loss": 3.093292713165283, "step": 3818, "token_acc": 0.2920530448833404 }, { "epoch": 2.2386396951040752, "grad_norm": 0.29924613908688724, "learning_rate": 0.00029686544178787196, "loss": 3.110499382019043, "step": 3819, "token_acc": 0.28761358577526386 }, { "epoch": 2.239226033421284, "grad_norm": 0.33545279414875173, "learning_rate": 0.00029686248454801543, "loss": 3.107767105102539, "step": 3820, "token_acc": 0.28697280986337453 }, { "epoch": 2.239812371738493, "grad_norm": 0.3642427925226817, "learning_rate": 0.00029685952592858384, "loss": 3.127642869949341, "step": 3821, "token_acc": 0.2865531023003976 }, { "epoch": 2.240398710055702, "grad_norm": 0.3660368473803843, "learning_rate": 0.00029685656592960485, "loss": 3.144003391265869, "step": 3822, "token_acc": 0.2835657027963703 }, { "epoch": 2.2409850483729112, "grad_norm": 0.32814374538190083, "learning_rate": 0.00029685360455110636, "loss": 3.147284507751465, "step": 3823, "token_acc": 0.28307908411079147 }, { "epoch": 2.2415713866901203, "grad_norm": 0.30730484711641337, "learning_rate": 0.0002968506417931161, "loss": 3.1099624633789062, "step": 3824, "token_acc": 0.2894372652276281 }, { "epoch": 2.2421577250073295, "grad_norm": 0.3298981889905991, "learning_rate": 0.00029684767765566193, "loss": 3.1177191734313965, "step": 3825, "token_acc": 0.2851696311366097 }, { "epoch": 2.242744063324538, "grad_norm": 0.33551707266600006, "learning_rate": 0.00029684471213877177, "loss": 3.159877300262451, "step": 3826, "token_acc": 0.28150116723345475 }, { "epoch": 2.2433304016417472, "grad_norm": 0.37770551659273416, "learning_rate": 0.0002968417452424734, "loss": 3.1461005210876465, "step": 3827, "token_acc": 0.28053293856402667 }, { "epoch": 2.2439167399589564, "grad_norm": 0.40656106231741346, "learning_rate": 0.0002968387769667947, "loss": 3.159878730773926, "step": 3828, "token_acc": 0.2809157554844164 }, { "epoch": 2.2445030782761655, "grad_norm": 0.37258371286941255, "learning_rate": 0.0002968358073117635, "loss": 3.086667537689209, "step": 3829, "token_acc": 0.28974647311388263 }, { "epoch": 2.2450894165933746, "grad_norm": 0.3765656311318651, "learning_rate": 0.0002968328362774078, "loss": 3.1338484287261963, "step": 3830, "token_acc": 0.2845573751445667 }, { "epoch": 2.2456757549105832, "grad_norm": 0.36054739497645144, "learning_rate": 0.0002968298638637555, "loss": 3.1424970626831055, "step": 3831, "token_acc": 0.2831309891076901 }, { "epoch": 2.2462620932277924, "grad_norm": 0.3563008399280607, "learning_rate": 0.00029682689007083445, "loss": 3.1569464206695557, "step": 3832, "token_acc": 0.2807823799295909 }, { "epoch": 2.2468484315450015, "grad_norm": 0.34154786239696044, "learning_rate": 0.0002968239148986726, "loss": 3.126563549041748, "step": 3833, "token_acc": 0.28375958948828855 }, { "epoch": 2.2474347698622106, "grad_norm": 0.32308367313291103, "learning_rate": 0.0002968209383472979, "loss": 3.1239233016967773, "step": 3834, "token_acc": 0.284844520350946 }, { "epoch": 2.2480211081794197, "grad_norm": 0.3043028392150398, "learning_rate": 0.00029681796041673844, "loss": 3.1476945877075195, "step": 3835, "token_acc": 0.28281338113268345 }, { "epoch": 2.2486074464966284, "grad_norm": 0.3037170146744987, "learning_rate": 0.00029681498110702197, "loss": 3.10396146774292, "step": 3836, "token_acc": 0.28775192098316743 }, { "epoch": 2.2491937848138375, "grad_norm": 0.3233377813541696, "learning_rate": 0.00029681200041817665, "loss": 3.1522209644317627, "step": 3837, "token_acc": 0.2827375109779408 }, { "epoch": 2.2497801231310466, "grad_norm": 0.3346904642707688, "learning_rate": 0.0002968090183502304, "loss": 3.1349873542785645, "step": 3838, "token_acc": 0.28562865238413376 }, { "epoch": 2.2503664614482557, "grad_norm": 0.3604269112568013, "learning_rate": 0.0002968060349032112, "loss": 3.1142184734344482, "step": 3839, "token_acc": 0.2879528040387433 }, { "epoch": 2.250952799765465, "grad_norm": 0.41697501414188737, "learning_rate": 0.0002968030500771472, "loss": 3.1308512687683105, "step": 3840, "token_acc": 0.28416912242767856 }, { "epoch": 2.2515391380826735, "grad_norm": 0.3775215996045852, "learning_rate": 0.0002968000638720663, "loss": 3.114043712615967, "step": 3841, "token_acc": 0.28654369301498 }, { "epoch": 2.2521254763998826, "grad_norm": 0.3001607838943336, "learning_rate": 0.0002967970762879966, "loss": 3.1494038105010986, "step": 3842, "token_acc": 0.28356555719559096 }, { "epoch": 2.2527118147170917, "grad_norm": 0.37390369884088986, "learning_rate": 0.0002967940873249663, "loss": 3.146125316619873, "step": 3843, "token_acc": 0.2838406153253715 }, { "epoch": 2.253298153034301, "grad_norm": 0.3945815798769358, "learning_rate": 0.00029679109698300325, "loss": 3.1140263080596924, "step": 3844, "token_acc": 0.28696604034677364 }, { "epoch": 2.25388449135151, "grad_norm": 0.3398301393583601, "learning_rate": 0.0002967881052621357, "loss": 3.1278798580169678, "step": 3845, "token_acc": 0.2845089357549584 }, { "epoch": 2.254470829668719, "grad_norm": 0.33682761351978074, "learning_rate": 0.00029678511216239166, "loss": 3.144913673400879, "step": 3846, "token_acc": 0.2820162649954136 }, { "epoch": 2.2550571679859277, "grad_norm": 0.361349656022621, "learning_rate": 0.00029678211768379933, "loss": 3.1437714099884033, "step": 3847, "token_acc": 0.284574283188829 }, { "epoch": 2.255643506303137, "grad_norm": 0.32778601007311964, "learning_rate": 0.00029677912182638676, "loss": 3.1215953826904297, "step": 3848, "token_acc": 0.28636693897577103 }, { "epoch": 2.256229844620346, "grad_norm": 0.3364444704217451, "learning_rate": 0.00029677612459018214, "loss": 3.0988543033599854, "step": 3849, "token_acc": 0.2882610877776897 }, { "epoch": 2.256816182937555, "grad_norm": 0.3923369037646225, "learning_rate": 0.0002967731259752136, "loss": 3.161343574523926, "step": 3850, "token_acc": 0.2792298287241869 }, { "epoch": 2.257402521254764, "grad_norm": 0.3555615523116818, "learning_rate": 0.00029677012598150937, "loss": 3.1440279483795166, "step": 3851, "token_acc": 0.28371905103937806 }, { "epoch": 2.257988859571973, "grad_norm": 0.37126208302062824, "learning_rate": 0.00029676712460909754, "loss": 3.102146625518799, "step": 3852, "token_acc": 0.2896049057884909 }, { "epoch": 2.258575197889182, "grad_norm": 0.3266113859872727, "learning_rate": 0.00029676412185800636, "loss": 3.148850440979004, "step": 3853, "token_acc": 0.282865212255426 }, { "epoch": 2.259161536206391, "grad_norm": 0.3781781504771243, "learning_rate": 0.00029676111772826403, "loss": 3.1555047035217285, "step": 3854, "token_acc": 0.2828065203477729 }, { "epoch": 2.2597478745236, "grad_norm": 0.39110195545602594, "learning_rate": 0.00029675811221989873, "loss": 3.1286869049072266, "step": 3855, "token_acc": 0.28481809380685785 }, { "epoch": 2.2603342128408093, "grad_norm": 0.34498430255440204, "learning_rate": 0.0002967551053329387, "loss": 3.108625888824463, "step": 3856, "token_acc": 0.287273098651358 }, { "epoch": 2.2609205511580184, "grad_norm": 0.3265490740514863, "learning_rate": 0.0002967520970674123, "loss": 3.145751953125, "step": 3857, "token_acc": 0.28336042827262 }, { "epoch": 2.261506889475227, "grad_norm": 0.37687619857249566, "learning_rate": 0.0002967490874233476, "loss": 3.153994083404541, "step": 3858, "token_acc": 0.2805961651345176 }, { "epoch": 2.262093227792436, "grad_norm": 0.3659101633880088, "learning_rate": 0.00029674607640077305, "loss": 3.1383116245269775, "step": 3859, "token_acc": 0.2840793496291625 }, { "epoch": 2.2626795661096453, "grad_norm": 0.3649728108488499, "learning_rate": 0.0002967430639997168, "loss": 3.1229546070098877, "step": 3860, "token_acc": 0.2862786525052479 }, { "epoch": 2.2632659044268544, "grad_norm": 0.40516352434086467, "learning_rate": 0.00029674005022020726, "loss": 3.1451821327209473, "step": 3861, "token_acc": 0.2831056383668179 }, { "epoch": 2.2638522427440635, "grad_norm": 0.340402796275945, "learning_rate": 0.0002967370350622727, "loss": 3.109259843826294, "step": 3862, "token_acc": 0.2878759043211888 }, { "epoch": 2.264438581061272, "grad_norm": 0.33687622846561444, "learning_rate": 0.00029673401852594136, "loss": 3.0722784996032715, "step": 3863, "token_acc": 0.2931314709797911 }, { "epoch": 2.2650249193784813, "grad_norm": 0.3838536611274872, "learning_rate": 0.00029673100061124164, "loss": 3.1475141048431396, "step": 3864, "token_acc": 0.28426435692413715 }, { "epoch": 2.2656112576956904, "grad_norm": 0.3338870541478003, "learning_rate": 0.0002967279813182019, "loss": 3.1759462356567383, "step": 3865, "token_acc": 0.27812563496561826 }, { "epoch": 2.2661975960128995, "grad_norm": 0.35767772301268946, "learning_rate": 0.0002967249606468505, "loss": 3.1165928840637207, "step": 3866, "token_acc": 0.2856306857477257 }, { "epoch": 2.2667839343301086, "grad_norm": 0.3335793755320412, "learning_rate": 0.0002967219385972158, "loss": 3.124889850616455, "step": 3867, "token_acc": 0.2839560058161089 }, { "epoch": 2.2673702726473177, "grad_norm": 0.3425308685522111, "learning_rate": 0.00029671891516932624, "loss": 3.0921688079833984, "step": 3868, "token_acc": 0.2904580832364372 }, { "epoch": 2.2679566109645264, "grad_norm": 0.3734309048171176, "learning_rate": 0.00029671589036321016, "loss": 3.1452436447143555, "step": 3869, "token_acc": 0.2828461341672888 }, { "epoch": 2.2685429492817355, "grad_norm": 0.30028012289456135, "learning_rate": 0.00029671286417889595, "loss": 3.118638277053833, "step": 3870, "token_acc": 0.2879698124290348 }, { "epoch": 2.2691292875989446, "grad_norm": 0.34802881387688106, "learning_rate": 0.00029670983661641214, "loss": 3.1682145595550537, "step": 3871, "token_acc": 0.28056332871302664 }, { "epoch": 2.2697156259161537, "grad_norm": 0.3757465966308485, "learning_rate": 0.0002967068076757871, "loss": 3.088167428970337, "step": 3872, "token_acc": 0.2922698606802053 }, { "epoch": 2.270301964233363, "grad_norm": 0.33390297229917815, "learning_rate": 0.0002967037773570492, "loss": 3.1354007720947266, "step": 3873, "token_acc": 0.2854307371267025 }, { "epoch": 2.2708883025505715, "grad_norm": 0.31112408057579755, "learning_rate": 0.0002967007456602271, "loss": 3.1719579696655273, "step": 3874, "token_acc": 0.2808750491119551 }, { "epoch": 2.2714746408677806, "grad_norm": 0.31376901647324984, "learning_rate": 0.0002966977125853492, "loss": 3.1327662467956543, "step": 3875, "token_acc": 0.2848221258580295 }, { "epoch": 2.2720609791849897, "grad_norm": 0.36455449106456456, "learning_rate": 0.0002966946781324439, "loss": 3.098599910736084, "step": 3876, "token_acc": 0.2903051358958734 }, { "epoch": 2.272647317502199, "grad_norm": 0.3309040090128516, "learning_rate": 0.0002966916423015398, "loss": 3.148508071899414, "step": 3877, "token_acc": 0.2819160960906339 }, { "epoch": 2.273233655819408, "grad_norm": 0.3214519503023754, "learning_rate": 0.00029668860509266535, "loss": 3.167297840118408, "step": 3878, "token_acc": 0.2805810693347158 }, { "epoch": 2.273819994136617, "grad_norm": 0.3503842681760275, "learning_rate": 0.00029668556650584916, "loss": 3.1011996269226074, "step": 3879, "token_acc": 0.2896082449085622 }, { "epoch": 2.2744063324538257, "grad_norm": 0.3299744501590456, "learning_rate": 0.00029668252654111967, "loss": 3.1620965003967285, "step": 3880, "token_acc": 0.2817415831916971 }, { "epoch": 2.274992670771035, "grad_norm": 0.3519721356743776, "learning_rate": 0.00029667948519850556, "loss": 3.1272995471954346, "step": 3881, "token_acc": 0.28598799043305684 }, { "epoch": 2.275579009088244, "grad_norm": 0.3348353766366132, "learning_rate": 0.00029667644247803534, "loss": 3.106405258178711, "step": 3882, "token_acc": 0.2869021038900759 }, { "epoch": 2.276165347405453, "grad_norm": 0.3156452423075233, "learning_rate": 0.0002966733983797376, "loss": 3.13942813873291, "step": 3883, "token_acc": 0.2834587058513514 }, { "epoch": 2.2767516857226617, "grad_norm": 0.3529102264331451, "learning_rate": 0.0002966703529036409, "loss": 3.1225242614746094, "step": 3884, "token_acc": 0.2842691930363516 }, { "epoch": 2.277338024039871, "grad_norm": 0.33722665754907466, "learning_rate": 0.0002966673060497739, "loss": 3.1357922554016113, "step": 3885, "token_acc": 0.28500778216317574 }, { "epoch": 2.27792436235708, "grad_norm": 0.3475788528720892, "learning_rate": 0.0002966642578181652, "loss": 3.1662609577178955, "step": 3886, "token_acc": 0.2800709905566666 }, { "epoch": 2.278510700674289, "grad_norm": 0.35692902207972327, "learning_rate": 0.0002966612082088434, "loss": 3.1336910724639893, "step": 3887, "token_acc": 0.2837569968674575 }, { "epoch": 2.279097038991498, "grad_norm": 0.3301430970741716, "learning_rate": 0.00029665815722183716, "loss": 3.1702640056610107, "step": 3888, "token_acc": 0.2815069844366915 }, { "epoch": 2.2796833773087073, "grad_norm": 0.3613877821275972, "learning_rate": 0.0002966551048571752, "loss": 3.167794704437256, "step": 3889, "token_acc": 0.2811859742113042 }, { "epoch": 2.280269715625916, "grad_norm": 0.3299456115221439, "learning_rate": 0.00029665205111488615, "loss": 3.1297707557678223, "step": 3890, "token_acc": 0.2853902608939845 }, { "epoch": 2.280856053943125, "grad_norm": 0.3398061648929076, "learning_rate": 0.00029664899599499866, "loss": 3.0753061771392822, "step": 3891, "token_acc": 0.29305059538864664 }, { "epoch": 2.281442392260334, "grad_norm": 0.3752650198139334, "learning_rate": 0.00029664593949754145, "loss": 3.1909584999084473, "step": 3892, "token_acc": 0.2783515901910154 }, { "epoch": 2.2820287305775433, "grad_norm": 0.35936101703074697, "learning_rate": 0.0002966428816225433, "loss": 3.155496597290039, "step": 3893, "token_acc": 0.28021690718803666 }, { "epoch": 2.2826150688947524, "grad_norm": 0.3655586993002499, "learning_rate": 0.0002966398223700329, "loss": 3.1330766677856445, "step": 3894, "token_acc": 0.28580541343672644 }, { "epoch": 2.283201407211961, "grad_norm": 0.34027944033069607, "learning_rate": 0.0002966367617400389, "loss": 3.124612331390381, "step": 3895, "token_acc": 0.2859967946105995 }, { "epoch": 2.28378774552917, "grad_norm": 0.3706794538789568, "learning_rate": 0.00029663369973259015, "loss": 3.130789279937744, "step": 3896, "token_acc": 0.2844121418104852 }, { "epoch": 2.2843740838463793, "grad_norm": 0.34332320720651394, "learning_rate": 0.0002966306363477154, "loss": 3.1528263092041016, "step": 3897, "token_acc": 0.2824466217550635 }, { "epoch": 2.2849604221635884, "grad_norm": 0.302780726186809, "learning_rate": 0.0002966275715854434, "loss": 3.1284918785095215, "step": 3898, "token_acc": 0.28730334141664055 }, { "epoch": 2.2855467604807975, "grad_norm": 0.31777351851650415, "learning_rate": 0.00029662450544580294, "loss": 3.163057804107666, "step": 3899, "token_acc": 0.2820830229565924 }, { "epoch": 2.2861330987980066, "grad_norm": 0.33607005571603743, "learning_rate": 0.0002966214379288228, "loss": 3.143604278564453, "step": 3900, "token_acc": 0.28307982586514036 }, { "epoch": 2.2867194371152153, "grad_norm": 0.4175559245841742, "learning_rate": 0.00029661836903453184, "loss": 3.1550049781799316, "step": 3901, "token_acc": 0.281235959863931 }, { "epoch": 2.2873057754324244, "grad_norm": 0.3748280162975092, "learning_rate": 0.00029661529876295894, "loss": 3.141681671142578, "step": 3902, "token_acc": 0.2826647964802507 }, { "epoch": 2.2878921137496335, "grad_norm": 0.36143263743728443, "learning_rate": 0.0002966122271141328, "loss": 3.121706247329712, "step": 3903, "token_acc": 0.2855800214822771 }, { "epoch": 2.2884784520668426, "grad_norm": 0.38173752096554653, "learning_rate": 0.0002966091540880824, "loss": 3.09523344039917, "step": 3904, "token_acc": 0.29140673797881544 }, { "epoch": 2.2890647903840518, "grad_norm": 0.3375537807760514, "learning_rate": 0.0002966060796848365, "loss": 3.1792566776275635, "step": 3905, "token_acc": 0.2796097274694641 }, { "epoch": 2.2896511287012604, "grad_norm": 0.32067076985580467, "learning_rate": 0.0002966030039044241, "loss": 3.0969648361206055, "step": 3906, "token_acc": 0.2886835199081478 }, { "epoch": 2.2902374670184695, "grad_norm": 0.3334424061310324, "learning_rate": 0.000296599926746874, "loss": 3.123234272003174, "step": 3907, "token_acc": 0.284843666512652 }, { "epoch": 2.2908238053356786, "grad_norm": 0.3099521682328931, "learning_rate": 0.00029659684821221514, "loss": 3.116097927093506, "step": 3908, "token_acc": 0.28575720388824105 }, { "epoch": 2.2914101436528878, "grad_norm": 0.32783597070791204, "learning_rate": 0.0002965937683004764, "loss": 3.091261148452759, "step": 3909, "token_acc": 0.290263353115727 }, { "epoch": 2.291996481970097, "grad_norm": 0.32131269174333393, "learning_rate": 0.00029659068701168675, "loss": 3.1498451232910156, "step": 3910, "token_acc": 0.281163006495515 }, { "epoch": 2.292582820287306, "grad_norm": 0.3422824619005444, "learning_rate": 0.00029658760434587517, "loss": 3.135819911956787, "step": 3911, "token_acc": 0.2830876855031873 }, { "epoch": 2.2931691586045146, "grad_norm": 0.3769530906535803, "learning_rate": 0.00029658452030307056, "loss": 3.1156129837036133, "step": 3912, "token_acc": 0.2854136102441734 }, { "epoch": 2.2937554969217238, "grad_norm": 0.3468570513356614, "learning_rate": 0.00029658143488330187, "loss": 3.123091220855713, "step": 3913, "token_acc": 0.285465907032207 }, { "epoch": 2.294341835238933, "grad_norm": 0.3758397522396083, "learning_rate": 0.00029657834808659815, "loss": 3.1301941871643066, "step": 3914, "token_acc": 0.28541657475196003 }, { "epoch": 2.294928173556142, "grad_norm": 0.3955409525476425, "learning_rate": 0.00029657525991298836, "loss": 3.133077621459961, "step": 3915, "token_acc": 0.2851247563097777 }, { "epoch": 2.295514511873351, "grad_norm": 0.3792246447401132, "learning_rate": 0.00029657217036250155, "loss": 3.1052136421203613, "step": 3916, "token_acc": 0.287138235993905 }, { "epoch": 2.2961008501905598, "grad_norm": 0.3535137053883964, "learning_rate": 0.00029656907943516667, "loss": 3.1539230346679688, "step": 3917, "token_acc": 0.28208302471478414 }, { "epoch": 2.296687188507769, "grad_norm": 0.40949508696268716, "learning_rate": 0.00029656598713101277, "loss": 3.167966365814209, "step": 3918, "token_acc": 0.27944552695624636 }, { "epoch": 2.297273526824978, "grad_norm": 0.3689495820905759, "learning_rate": 0.00029656289345006897, "loss": 3.149592399597168, "step": 3919, "token_acc": 0.2803497498706227 }, { "epoch": 2.297859865142187, "grad_norm": 0.35365823333052937, "learning_rate": 0.0002965597983923642, "loss": 3.1518445014953613, "step": 3920, "token_acc": 0.2823548342604938 }, { "epoch": 2.298446203459396, "grad_norm": 0.37423928834599296, "learning_rate": 0.0002965567019579277, "loss": 3.152134895324707, "step": 3921, "token_acc": 0.28398140054134324 }, { "epoch": 2.2990325417766053, "grad_norm": 0.3618158156999347, "learning_rate": 0.0002965536041467885, "loss": 3.1430611610412598, "step": 3922, "token_acc": 0.2829186828244765 }, { "epoch": 2.299618880093814, "grad_norm": 0.2717137584957784, "learning_rate": 0.00029655050495897565, "loss": 3.1336140632629395, "step": 3923, "token_acc": 0.2853663197319643 }, { "epoch": 2.300205218411023, "grad_norm": 0.326539145877011, "learning_rate": 0.00029654740439451823, "loss": 3.1463160514831543, "step": 3924, "token_acc": 0.28246615247454954 }, { "epoch": 2.300791556728232, "grad_norm": 0.32903399491159996, "learning_rate": 0.0002965443024534454, "loss": 3.0967490673065186, "step": 3925, "token_acc": 0.2888586107588872 }, { "epoch": 2.3013778950454413, "grad_norm": 0.3218240281904549, "learning_rate": 0.0002965411991357864, "loss": 3.1402153968811035, "step": 3926, "token_acc": 0.28366721931129274 }, { "epoch": 2.3019642333626504, "grad_norm": 0.3337488523605423, "learning_rate": 0.0002965380944415703, "loss": 3.1310198307037354, "step": 3927, "token_acc": 0.28258745979430566 }, { "epoch": 2.302550571679859, "grad_norm": 0.3739003140582341, "learning_rate": 0.00029653498837082625, "loss": 3.0922772884368896, "step": 3928, "token_acc": 0.2896576442633179 }, { "epoch": 2.303136909997068, "grad_norm": 0.349797427692297, "learning_rate": 0.0002965318809235834, "loss": 3.1183156967163086, "step": 3929, "token_acc": 0.28653450891262366 }, { "epoch": 2.3037232483142773, "grad_norm": 0.3330890973584048, "learning_rate": 0.000296528772099871, "loss": 3.1265792846679688, "step": 3930, "token_acc": 0.28602233743454825 }, { "epoch": 2.3043095866314864, "grad_norm": 0.31747836969058285, "learning_rate": 0.00029652566189971826, "loss": 3.0859501361846924, "step": 3931, "token_acc": 0.29039291460535616 }, { "epoch": 2.3048959249486956, "grad_norm": 0.31927717232309866, "learning_rate": 0.00029652255032315436, "loss": 3.08859920501709, "step": 3932, "token_acc": 0.2898383822103449 }, { "epoch": 2.3054822632659047, "grad_norm": 0.30303922238932196, "learning_rate": 0.00029651943737020854, "loss": 3.1357712745666504, "step": 3933, "token_acc": 0.2841259175023261 }, { "epoch": 2.3060686015831133, "grad_norm": 0.31209786074099005, "learning_rate": 0.00029651632304091, "loss": 3.1018872261047363, "step": 3934, "token_acc": 0.28658283527051354 }, { "epoch": 2.3066549399003224, "grad_norm": 0.3140674826141942, "learning_rate": 0.00029651320733528814, "loss": 3.146984577178955, "step": 3935, "token_acc": 0.2828596480305483 }, { "epoch": 2.3072412782175316, "grad_norm": 0.31580586498134955, "learning_rate": 0.00029651009025337204, "loss": 3.102692127227783, "step": 3936, "token_acc": 0.2889240727956699 }, { "epoch": 2.3078276165347407, "grad_norm": 0.3893506483722437, "learning_rate": 0.0002965069717951911, "loss": 3.1709413528442383, "step": 3937, "token_acc": 0.279024983008259 }, { "epoch": 2.3084139548519493, "grad_norm": 0.4131410864702742, "learning_rate": 0.0002965038519607746, "loss": 3.1448557376861572, "step": 3938, "token_acc": 0.2823305163459752 }, { "epoch": 2.3090002931691584, "grad_norm": 0.4184412134362271, "learning_rate": 0.0002965007307501518, "loss": 3.135000467300415, "step": 3939, "token_acc": 0.2847566284131381 }, { "epoch": 2.3095866314863676, "grad_norm": 0.3527129246227712, "learning_rate": 0.00029649760816335204, "loss": 3.1537468433380127, "step": 3940, "token_acc": 0.28108774848684775 }, { "epoch": 2.3101729698035767, "grad_norm": 0.4130054447942725, "learning_rate": 0.0002964944842004047, "loss": 3.111358642578125, "step": 3941, "token_acc": 0.28666590099919603 }, { "epoch": 2.310759308120786, "grad_norm": 0.3351690768305104, "learning_rate": 0.000296491358861339, "loss": 3.093397855758667, "step": 3942, "token_acc": 0.2904525542885551 }, { "epoch": 2.311345646437995, "grad_norm": 0.3224020722044585, "learning_rate": 0.0002964882321461845, "loss": 3.134650230407715, "step": 3943, "token_acc": 0.2862757495465615 }, { "epoch": 2.3119319847552036, "grad_norm": 0.387140069543194, "learning_rate": 0.00029648510405497035, "loss": 3.0998148918151855, "step": 3944, "token_acc": 0.2879225416351742 }, { "epoch": 2.3125183230724127, "grad_norm": 0.40692566500967514, "learning_rate": 0.0002964819745877261, "loss": 3.1301662921905518, "step": 3945, "token_acc": 0.2839390885015218 }, { "epoch": 2.313104661389622, "grad_norm": 0.32819625967990196, "learning_rate": 0.00029647884374448105, "loss": 3.1174418926239014, "step": 3946, "token_acc": 0.28632289907780234 }, { "epoch": 2.313690999706831, "grad_norm": 0.3294581142727935, "learning_rate": 0.0002964757115252647, "loss": 3.187544822692871, "step": 3947, "token_acc": 0.2777391725089307 }, { "epoch": 2.31427733802404, "grad_norm": 0.36576806378099, "learning_rate": 0.0002964725779301063, "loss": 3.1446797847747803, "step": 3948, "token_acc": 0.28411444948055387 }, { "epoch": 2.3148636763412487, "grad_norm": 0.34682858093929086, "learning_rate": 0.00029646944295903555, "loss": 3.1067044734954834, "step": 3949, "token_acc": 0.2877818253421286 }, { "epoch": 2.315450014658458, "grad_norm": 0.30360641586581255, "learning_rate": 0.00029646630661208165, "loss": 3.171372413635254, "step": 3950, "token_acc": 0.27954881921252583 }, { "epoch": 2.316036352975667, "grad_norm": 0.32071206464679897, "learning_rate": 0.0002964631688892742, "loss": 3.1548526287078857, "step": 3951, "token_acc": 0.2818333748177648 }, { "epoch": 2.316622691292876, "grad_norm": 0.3011349284656512, "learning_rate": 0.0002964600297906427, "loss": 3.130326509475708, "step": 3952, "token_acc": 0.28512613647587215 }, { "epoch": 2.317209029610085, "grad_norm": 0.2842852131705738, "learning_rate": 0.0002964568893162165, "loss": 3.1081178188323975, "step": 3953, "token_acc": 0.28764650701968886 }, { "epoch": 2.3177953679272942, "grad_norm": 0.30492201892864884, "learning_rate": 0.0002964537474660252, "loss": 3.1088638305664062, "step": 3954, "token_acc": 0.28838705711182605 }, { "epoch": 2.318381706244503, "grad_norm": 0.29042600108079875, "learning_rate": 0.0002964506042400983, "loss": 3.1179306507110596, "step": 3955, "token_acc": 0.28552800451768573 }, { "epoch": 2.318968044561712, "grad_norm": 0.3452267393152285, "learning_rate": 0.0002964474596384653, "loss": 3.157042980194092, "step": 3956, "token_acc": 0.2826617974592864 }, { "epoch": 2.319554382878921, "grad_norm": 0.3092341425990623, "learning_rate": 0.0002964443136611558, "loss": 3.095261573791504, "step": 3957, "token_acc": 0.2905771909065206 }, { "epoch": 2.3201407211961302, "grad_norm": 0.33663624105604206, "learning_rate": 0.00029644116630819924, "loss": 3.171421527862549, "step": 3958, "token_acc": 0.27816660048108904 }, { "epoch": 2.3207270595133394, "grad_norm": 0.32444857746916417, "learning_rate": 0.0002964380175796253, "loss": 3.1454882621765137, "step": 3959, "token_acc": 0.28199545904469187 }, { "epoch": 2.321313397830548, "grad_norm": 0.3093550942552928, "learning_rate": 0.00029643486747546347, "loss": 3.143613815307617, "step": 3960, "token_acc": 0.2845564775072277 }, { "epoch": 2.321899736147757, "grad_norm": 0.3382588347576792, "learning_rate": 0.00029643171599574343, "loss": 3.1464314460754395, "step": 3961, "token_acc": 0.28252593708165996 }, { "epoch": 2.3224860744649662, "grad_norm": 0.35538502107573894, "learning_rate": 0.00029642856314049474, "loss": 3.132188320159912, "step": 3962, "token_acc": 0.2850337793085177 }, { "epoch": 2.3230724127821754, "grad_norm": 0.33200635455332145, "learning_rate": 0.000296425408909747, "loss": 3.0920443534851074, "step": 3963, "token_acc": 0.29052592266549293 }, { "epoch": 2.3236587510993845, "grad_norm": 0.3253505439891719, "learning_rate": 0.00029642225330352986, "loss": 3.151855707168579, "step": 3964, "token_acc": 0.282366732896616 }, { "epoch": 2.3242450894165936, "grad_norm": 0.409766579251743, "learning_rate": 0.0002964190963218729, "loss": 3.09792160987854, "step": 3965, "token_acc": 0.28958612887099744 }, { "epoch": 2.3248314277338022, "grad_norm": 0.33186332916078143, "learning_rate": 0.0002964159379648059, "loss": 3.1322860717773438, "step": 3966, "token_acc": 0.28488048326447485 }, { "epoch": 2.3254177660510114, "grad_norm": 0.3272413661718686, "learning_rate": 0.00029641277823235846, "loss": 3.157687187194824, "step": 3967, "token_acc": 0.2818728465997084 }, { "epoch": 2.3260041043682205, "grad_norm": 0.37484030417323033, "learning_rate": 0.0002964096171245602, "loss": 3.139496088027954, "step": 3968, "token_acc": 0.28392500074008886 }, { "epoch": 2.3265904426854296, "grad_norm": 0.34778044600332114, "learning_rate": 0.00029640645464144096, "loss": 3.156140089035034, "step": 3969, "token_acc": 0.2819651225165228 }, { "epoch": 2.3271767810026387, "grad_norm": 0.3027776066419856, "learning_rate": 0.0002964032907830303, "loss": 3.1464316844940186, "step": 3970, "token_acc": 0.28328766981792264 }, { "epoch": 2.3277631193198474, "grad_norm": 0.36096171851981074, "learning_rate": 0.00029640012554935807, "loss": 3.1473302841186523, "step": 3971, "token_acc": 0.28267223603936825 }, { "epoch": 2.3283494576370565, "grad_norm": 0.3258349418188849, "learning_rate": 0.00029639695894045383, "loss": 3.155453681945801, "step": 3972, "token_acc": 0.2803161645610625 }, { "epoch": 2.3289357959542656, "grad_norm": 0.34544446514383104, "learning_rate": 0.00029639379095634756, "loss": 3.1648499965667725, "step": 3973, "token_acc": 0.2802476246078269 }, { "epoch": 2.3295221342714747, "grad_norm": 0.3139305613152924, "learning_rate": 0.0002963906215970688, "loss": 3.1688733100891113, "step": 3974, "token_acc": 0.2774523323207084 }, { "epoch": 2.330108472588684, "grad_norm": 0.31801358213108577, "learning_rate": 0.00029638745086264746, "loss": 3.1264429092407227, "step": 3975, "token_acc": 0.2853276618370331 }, { "epoch": 2.330694810905893, "grad_norm": 0.32132890089752575, "learning_rate": 0.00029638427875311327, "loss": 3.1065244674682617, "step": 3976, "token_acc": 0.28859072182785156 }, { "epoch": 2.3312811492231016, "grad_norm": 0.31120303707540703, "learning_rate": 0.00029638110526849604, "loss": 3.0976758003234863, "step": 3977, "token_acc": 0.2894734770871102 }, { "epoch": 2.3318674875403107, "grad_norm": 0.3194433083350436, "learning_rate": 0.0002963779304088255, "loss": 3.1128406524658203, "step": 3978, "token_acc": 0.28683485321122365 }, { "epoch": 2.33245382585752, "grad_norm": 0.341211404222118, "learning_rate": 0.0002963747541741317, "loss": 3.114959955215454, "step": 3979, "token_acc": 0.28751626924351725 }, { "epoch": 2.333040164174729, "grad_norm": 0.3403736337168058, "learning_rate": 0.0002963715765644442, "loss": 3.086817741394043, "step": 3980, "token_acc": 0.2920673460911792 }, { "epoch": 2.333626502491938, "grad_norm": 0.3514878459147988, "learning_rate": 0.000296368397579793, "loss": 3.168426990509033, "step": 3981, "token_acc": 0.27941350019537753 }, { "epoch": 2.3342128408091467, "grad_norm": 0.36394326447013103, "learning_rate": 0.00029636521722020795, "loss": 3.1469502449035645, "step": 3982, "token_acc": 0.2828156877653171 }, { "epoch": 2.334799179126356, "grad_norm": 0.3236336101844483, "learning_rate": 0.00029636203548571896, "loss": 3.0912115573883057, "step": 3983, "token_acc": 0.28926650802469783 }, { "epoch": 2.335385517443565, "grad_norm": 0.33984706048480506, "learning_rate": 0.00029635885237635584, "loss": 3.1286025047302246, "step": 3984, "token_acc": 0.2839609014971456 }, { "epoch": 2.335971855760774, "grad_norm": 0.31787548191424597, "learning_rate": 0.0002963556678921485, "loss": 3.1059465408325195, "step": 3985, "token_acc": 0.29002777646509037 }, { "epoch": 2.336558194077983, "grad_norm": 0.32525891787625666, "learning_rate": 0.0002963524820331269, "loss": 3.122732639312744, "step": 3986, "token_acc": 0.28664019727554213 }, { "epoch": 2.3371445323951923, "grad_norm": 0.3939331440926721, "learning_rate": 0.0002963492947993209, "loss": 3.0824568271636963, "step": 3987, "token_acc": 0.2915181379744181 }, { "epoch": 2.337730870712401, "grad_norm": 0.3937511848206936, "learning_rate": 0.0002963461061907605, "loss": 3.140044927597046, "step": 3988, "token_acc": 0.2827458548304517 }, { "epoch": 2.33831720902961, "grad_norm": 0.3525153941912084, "learning_rate": 0.0002963429162074757, "loss": 3.12719464302063, "step": 3989, "token_acc": 0.2856854653828519 }, { "epoch": 2.338903547346819, "grad_norm": 0.34240452425242723, "learning_rate": 0.0002963397248494964, "loss": 3.1316399574279785, "step": 3990, "token_acc": 0.28416875166218686 }, { "epoch": 2.3394898856640283, "grad_norm": 0.3549401904942206, "learning_rate": 0.00029633653211685255, "loss": 3.1107828617095947, "step": 3991, "token_acc": 0.2874998366418798 }, { "epoch": 2.340076223981237, "grad_norm": 0.29849073705542006, "learning_rate": 0.00029633333800957413, "loss": 3.132326602935791, "step": 3992, "token_acc": 0.2827539591568191 }, { "epoch": 2.340662562298446, "grad_norm": 0.3459356721374136, "learning_rate": 0.00029633014252769123, "loss": 3.1048688888549805, "step": 3993, "token_acc": 0.2880155930572814 }, { "epoch": 2.341248900615655, "grad_norm": 0.3267966345195763, "learning_rate": 0.0002963269456712338, "loss": 3.143235445022583, "step": 3994, "token_acc": 0.2826626598754357 }, { "epoch": 2.3418352389328643, "grad_norm": 0.30339352674640824, "learning_rate": 0.0002963237474402319, "loss": 3.1140542030334473, "step": 3995, "token_acc": 0.28642132337667997 }, { "epoch": 2.3424215772500734, "grad_norm": 0.3102619538977227, "learning_rate": 0.00029632054783471556, "loss": 3.140777111053467, "step": 3996, "token_acc": 0.28516348998135943 }, { "epoch": 2.3430079155672825, "grad_norm": 0.32332447712786505, "learning_rate": 0.00029631734685471486, "loss": 3.112484931945801, "step": 3997, "token_acc": 0.2883135321930377 }, { "epoch": 2.343594253884491, "grad_norm": 0.3087385496165147, "learning_rate": 0.00029631414450025984, "loss": 3.1353821754455566, "step": 3998, "token_acc": 0.2840378474518065 }, { "epoch": 2.3441805922017003, "grad_norm": 0.3174757693173645, "learning_rate": 0.0002963109407713806, "loss": 3.106165885925293, "step": 3999, "token_acc": 0.2885664414864559 }, { "epoch": 2.3447669305189094, "grad_norm": 0.3502495059107925, "learning_rate": 0.0002963077356681072, "loss": 3.0772173404693604, "step": 4000, "token_acc": 0.29168265804046273 }, { "epoch": 2.3453532688361185, "grad_norm": 0.36539377137315054, "learning_rate": 0.0002963045291904699, "loss": 3.103294610977173, "step": 4001, "token_acc": 0.2899008055032453 }, { "epoch": 2.3459396071533276, "grad_norm": 0.4235753577062346, "learning_rate": 0.0002963013213384985, "loss": 3.167001485824585, "step": 4002, "token_acc": 0.27866895138329445 }, { "epoch": 2.3465259454705363, "grad_norm": 0.41216292895103995, "learning_rate": 0.0002962981121122235, "loss": 3.1391708850860596, "step": 4003, "token_acc": 0.2836572378921391 }, { "epoch": 2.3471122837877454, "grad_norm": 0.36287085215595655, "learning_rate": 0.0002962949015116748, "loss": 3.1735856533050537, "step": 4004, "token_acc": 0.2780420702699605 }, { "epoch": 2.3476986221049545, "grad_norm": 0.2866053570603616, "learning_rate": 0.0002962916895368826, "loss": 3.1510367393493652, "step": 4005, "token_acc": 0.28242361402970123 }, { "epoch": 2.3482849604221636, "grad_norm": 0.39624344703642184, "learning_rate": 0.0002962884761878772, "loss": 3.1754367351531982, "step": 4006, "token_acc": 0.2784526719487525 }, { "epoch": 2.3488712987393727, "grad_norm": 0.37577173799126873, "learning_rate": 0.0002962852614646886, "loss": 3.110475540161133, "step": 4007, "token_acc": 0.2874274416431515 }, { "epoch": 2.349457637056582, "grad_norm": 0.3120587502323707, "learning_rate": 0.0002962820453673471, "loss": 3.1042656898498535, "step": 4008, "token_acc": 0.2890577445408928 }, { "epoch": 2.3500439753737905, "grad_norm": 0.3339394466202423, "learning_rate": 0.00029627882789588295, "loss": 3.1650099754333496, "step": 4009, "token_acc": 0.28150531182530714 }, { "epoch": 2.3506303136909996, "grad_norm": 0.35143519282375235, "learning_rate": 0.00029627560905032626, "loss": 3.1488072872161865, "step": 4010, "token_acc": 0.28168383947939263 }, { "epoch": 2.3512166520082087, "grad_norm": 0.3462029532525808, "learning_rate": 0.0002962723888307074, "loss": 3.1110737323760986, "step": 4011, "token_acc": 0.28760575328339977 }, { "epoch": 2.351802990325418, "grad_norm": 0.33700075200261886, "learning_rate": 0.0002962691672370565, "loss": 3.1305384635925293, "step": 4012, "token_acc": 0.2852899208717084 }, { "epoch": 2.352389328642627, "grad_norm": 0.35343213532433165, "learning_rate": 0.00029626594426940397, "loss": 3.1542434692382812, "step": 4013, "token_acc": 0.27996121113138456 }, { "epoch": 2.3529756669598356, "grad_norm": 0.3290123110196839, "learning_rate": 0.00029626271992777987, "loss": 3.135974884033203, "step": 4014, "token_acc": 0.28392988577815337 }, { "epoch": 2.3535620052770447, "grad_norm": 0.3232403138813767, "learning_rate": 0.00029625949421221466, "loss": 3.127687931060791, "step": 4015, "token_acc": 0.2842832585679109 }, { "epoch": 2.354148343594254, "grad_norm": 0.37049770078288097, "learning_rate": 0.00029625626712273865, "loss": 3.1001381874084473, "step": 4016, "token_acc": 0.2899405262606256 }, { "epoch": 2.354734681911463, "grad_norm": 0.36671027763347663, "learning_rate": 0.00029625303865938197, "loss": 3.1361236572265625, "step": 4017, "token_acc": 0.2822531737943356 }, { "epoch": 2.355321020228672, "grad_norm": 0.33918883653612, "learning_rate": 0.00029624980882217515, "loss": 3.1558055877685547, "step": 4018, "token_acc": 0.28311769232612694 }, { "epoch": 2.355907358545881, "grad_norm": 0.3381591991155212, "learning_rate": 0.00029624657761114843, "loss": 3.099740505218506, "step": 4019, "token_acc": 0.28886623079025375 }, { "epoch": 2.35649369686309, "grad_norm": 0.34766829766852736, "learning_rate": 0.0002962433450263322, "loss": 3.131413459777832, "step": 4020, "token_acc": 0.285502096079316 }, { "epoch": 2.357080035180299, "grad_norm": 0.35838235456841555, "learning_rate": 0.0002962401110677568, "loss": 3.158010959625244, "step": 4021, "token_acc": 0.2793105837608395 }, { "epoch": 2.357666373497508, "grad_norm": 0.3443667883443588, "learning_rate": 0.0002962368757354526, "loss": 3.1425743103027344, "step": 4022, "token_acc": 0.28125927584608734 }, { "epoch": 2.358252711814717, "grad_norm": 0.3301598643512975, "learning_rate": 0.00029623363902945004, "loss": 3.1080713272094727, "step": 4023, "token_acc": 0.2889172395241 }, { "epoch": 2.3588390501319263, "grad_norm": 0.3576615253979242, "learning_rate": 0.00029623040094977943, "loss": 3.117068290710449, "step": 4024, "token_acc": 0.28600126876718124 }, { "epoch": 2.359425388449135, "grad_norm": 0.3345417991709525, "learning_rate": 0.0002962271614964713, "loss": 3.1294901371002197, "step": 4025, "token_acc": 0.287467301843857 }, { "epoch": 2.360011726766344, "grad_norm": 0.35664834864674017, "learning_rate": 0.00029622392066955603, "loss": 3.1373677253723145, "step": 4026, "token_acc": 0.28445016084020364 }, { "epoch": 2.360598065083553, "grad_norm": 0.32651405600435834, "learning_rate": 0.00029622067846906406, "loss": 3.122591018676758, "step": 4027, "token_acc": 0.28593844422139747 }, { "epoch": 2.3611844034007623, "grad_norm": 0.36632852545161243, "learning_rate": 0.00029621743489502585, "loss": 3.1384105682373047, "step": 4028, "token_acc": 0.2837051757777122 }, { "epoch": 2.3617707417179714, "grad_norm": 0.3410284343938234, "learning_rate": 0.00029621418994747186, "loss": 3.1472935676574707, "step": 4029, "token_acc": 0.2808508269398166 }, { "epoch": 2.3623570800351805, "grad_norm": 0.3183450443639991, "learning_rate": 0.00029621094362643257, "loss": 3.1052231788635254, "step": 4030, "token_acc": 0.28649753238741515 }, { "epoch": 2.362943418352389, "grad_norm": 0.302784952813276, "learning_rate": 0.00029620769593193853, "loss": 3.1508584022521973, "step": 4031, "token_acc": 0.2835671979124212 }, { "epoch": 2.3635297566695983, "grad_norm": 0.29871037637940984, "learning_rate": 0.00029620444686402023, "loss": 3.1142523288726807, "step": 4032, "token_acc": 0.28713735505209964 }, { "epoch": 2.3641160949868074, "grad_norm": 0.32005576723436063, "learning_rate": 0.0002962011964227081, "loss": 3.0794737339019775, "step": 4033, "token_acc": 0.29188106824373095 }, { "epoch": 2.3647024333040165, "grad_norm": 0.3287711271571033, "learning_rate": 0.00029619794460803274, "loss": 3.1443397998809814, "step": 4034, "token_acc": 0.2817958693438471 }, { "epoch": 2.3652887716212256, "grad_norm": 0.34891523167838623, "learning_rate": 0.0002961946914200247, "loss": 3.0749239921569824, "step": 4035, "token_acc": 0.29375930475403883 }, { "epoch": 2.3658751099384343, "grad_norm": 0.3575819862446973, "learning_rate": 0.0002961914368587145, "loss": 3.1467981338500977, "step": 4036, "token_acc": 0.28168531956424603 }, { "epoch": 2.3664614482556434, "grad_norm": 0.3060707502384176, "learning_rate": 0.00029618818092413284, "loss": 3.1116278171539307, "step": 4037, "token_acc": 0.2875863916421109 }, { "epoch": 2.3670477865728525, "grad_norm": 0.38860454815255263, "learning_rate": 0.0002961849236163102, "loss": 3.1438052654266357, "step": 4038, "token_acc": 0.2838916218914811 }, { "epoch": 2.3676341248900616, "grad_norm": 0.38122393721910874, "learning_rate": 0.0002961816649352771, "loss": 3.0928635597229004, "step": 4039, "token_acc": 0.2898877001794139 }, { "epoch": 2.3682204632072708, "grad_norm": 0.30310739867053454, "learning_rate": 0.0002961784048810643, "loss": 3.111734628677368, "step": 4040, "token_acc": 0.28747518983775755 }, { "epoch": 2.36880680152448, "grad_norm": 0.3610304676336057, "learning_rate": 0.00029617514345370234, "loss": 3.1203346252441406, "step": 4041, "token_acc": 0.2861496067205308 }, { "epoch": 2.3693931398416885, "grad_norm": 0.42574548618412045, "learning_rate": 0.0002961718806532219, "loss": 3.1274099349975586, "step": 4042, "token_acc": 0.28313789443711007 }, { "epoch": 2.3699794781588976, "grad_norm": 0.38230190329200986, "learning_rate": 0.00029616861647965365, "loss": 3.072632312774658, "step": 4043, "token_acc": 0.29222820373295577 }, { "epoch": 2.3705658164761068, "grad_norm": 0.3433491510755498, "learning_rate": 0.0002961653509330281, "loss": 3.060450315475464, "step": 4044, "token_acc": 0.2946065875768338 }, { "epoch": 2.371152154793316, "grad_norm": 0.3506460771442109, "learning_rate": 0.00029616208401337616, "loss": 3.1136536598205566, "step": 4045, "token_acc": 0.28747545219638243 }, { "epoch": 2.3717384931105245, "grad_norm": 0.3835965271805788, "learning_rate": 0.00029615881572072836, "loss": 3.1222023963928223, "step": 4046, "token_acc": 0.2857692116558079 }, { "epoch": 2.3723248314277336, "grad_norm": 0.39173783632215414, "learning_rate": 0.00029615554605511544, "loss": 3.1482996940612793, "step": 4047, "token_acc": 0.28218468852951895 }, { "epoch": 2.3729111697449428, "grad_norm": 0.38283616174466534, "learning_rate": 0.00029615227501656805, "loss": 3.105515956878662, "step": 4048, "token_acc": 0.289061929928803 }, { "epoch": 2.373497508062152, "grad_norm": 0.34234163076003277, "learning_rate": 0.000296149002605117, "loss": 3.1336746215820312, "step": 4049, "token_acc": 0.28492549673132017 }, { "epoch": 2.374083846379361, "grad_norm": 0.3406497260932234, "learning_rate": 0.00029614572882079304, "loss": 3.1649649143218994, "step": 4050, "token_acc": 0.28033965110596437 }, { "epoch": 2.37467018469657, "grad_norm": 0.37071580450177355, "learning_rate": 0.0002961424536636269, "loss": 3.1256635189056396, "step": 4051, "token_acc": 0.2870016406559391 }, { "epoch": 2.3752565230137788, "grad_norm": 0.3261009887266913, "learning_rate": 0.00029613917713364933, "loss": 3.112837791442871, "step": 4052, "token_acc": 0.2882105952612339 }, { "epoch": 2.375842861330988, "grad_norm": 0.32844758197651663, "learning_rate": 0.00029613589923089113, "loss": 3.1194145679473877, "step": 4053, "token_acc": 0.2866617577309187 }, { "epoch": 2.376429199648197, "grad_norm": 0.3076168975013088, "learning_rate": 0.000296132619955383, "loss": 3.144341230392456, "step": 4054, "token_acc": 0.28342546087151715 }, { "epoch": 2.377015537965406, "grad_norm": 0.3353014927479724, "learning_rate": 0.0002961293393071559, "loss": 3.1068387031555176, "step": 4055, "token_acc": 0.28893783816779617 }, { "epoch": 2.377601876282615, "grad_norm": 0.3400359099807351, "learning_rate": 0.00029612605728624055, "loss": 3.122575521469116, "step": 4056, "token_acc": 0.28587565253651304 }, { "epoch": 2.378188214599824, "grad_norm": 0.31526799643771264, "learning_rate": 0.0002961227738926678, "loss": 3.0796141624450684, "step": 4057, "token_acc": 0.2906491321567029 }, { "epoch": 2.378774552917033, "grad_norm": 0.3449717272074962, "learning_rate": 0.00029611948912646846, "loss": 3.1379218101501465, "step": 4058, "token_acc": 0.2851804302197978 }, { "epoch": 2.379360891234242, "grad_norm": 0.3144034080239278, "learning_rate": 0.00029611620298767346, "loss": 3.1387691497802734, "step": 4059, "token_acc": 0.28454155122159536 }, { "epoch": 2.379947229551451, "grad_norm": 0.3419398553620478, "learning_rate": 0.0002961129154763136, "loss": 3.1118006706237793, "step": 4060, "token_acc": 0.2872363250077186 }, { "epoch": 2.3805335678686603, "grad_norm": 0.3028329584812501, "learning_rate": 0.0002961096265924198, "loss": 3.1462883949279785, "step": 4061, "token_acc": 0.28254646154485613 }, { "epoch": 2.3811199061858694, "grad_norm": 0.30734348794617594, "learning_rate": 0.0002961063363360229, "loss": 3.1184375286102295, "step": 4062, "token_acc": 0.2858412337989612 }, { "epoch": 2.381706244503078, "grad_norm": 0.3089975517855078, "learning_rate": 0.0002961030447071539, "loss": 3.0917253494262695, "step": 4063, "token_acc": 0.2902494149542237 }, { "epoch": 2.382292582820287, "grad_norm": 0.3515939521765386, "learning_rate": 0.0002960997517058437, "loss": 3.074690818786621, "step": 4064, "token_acc": 0.2917661433589762 }, { "epoch": 2.3828789211374963, "grad_norm": 0.29647511080238154, "learning_rate": 0.00029609645733212316, "loss": 3.1222891807556152, "step": 4065, "token_acc": 0.28517263448304 }, { "epoch": 2.3834652594547054, "grad_norm": 0.34521614119582134, "learning_rate": 0.0002960931615860233, "loss": 3.1230716705322266, "step": 4066, "token_acc": 0.2854011099899092 }, { "epoch": 2.3840515977719146, "grad_norm": 0.3041334236621187, "learning_rate": 0.00029608986446757503, "loss": 3.16392183303833, "step": 4067, "token_acc": 0.2816469333644976 }, { "epoch": 2.3846379360891232, "grad_norm": 0.30059224044736854, "learning_rate": 0.00029608656597680935, "loss": 3.114427089691162, "step": 4068, "token_acc": 0.2863731377948837 }, { "epoch": 2.3852242744063323, "grad_norm": 0.3225804534347919, "learning_rate": 0.0002960832661137572, "loss": 3.1056575775146484, "step": 4069, "token_acc": 0.289475895609792 }, { "epoch": 2.3858106127235414, "grad_norm": 0.2777038315065759, "learning_rate": 0.0002960799648784497, "loss": 3.087029457092285, "step": 4070, "token_acc": 0.29098091897933664 }, { "epoch": 2.3863969510407506, "grad_norm": 0.3437772758278913, "learning_rate": 0.0002960766622709177, "loss": 3.125253677368164, "step": 4071, "token_acc": 0.2846819151620822 }, { "epoch": 2.3869832893579597, "grad_norm": 0.4025348878327826, "learning_rate": 0.0002960733582911923, "loss": 3.1176581382751465, "step": 4072, "token_acc": 0.284976229995287 }, { "epoch": 2.387569627675169, "grad_norm": 0.3975775316064328, "learning_rate": 0.00029607005293930453, "loss": 3.086373805999756, "step": 4073, "token_acc": 0.28947672819529446 }, { "epoch": 2.3881559659923774, "grad_norm": 0.336936223997053, "learning_rate": 0.00029606674621528547, "loss": 3.134627342224121, "step": 4074, "token_acc": 0.2858716467431601 }, { "epoch": 2.3887423043095866, "grad_norm": 0.3508290550956907, "learning_rate": 0.00029606343811916616, "loss": 3.1428351402282715, "step": 4075, "token_acc": 0.2839732000425396 }, { "epoch": 2.3893286426267957, "grad_norm": 0.38666413131651467, "learning_rate": 0.0002960601286509777, "loss": 3.1315760612487793, "step": 4076, "token_acc": 0.2842102577188058 }, { "epoch": 2.389914980944005, "grad_norm": 0.39542023347359023, "learning_rate": 0.0002960568178107511, "loss": 3.1438581943511963, "step": 4077, "token_acc": 0.28342988247738565 }, { "epoch": 2.390501319261214, "grad_norm": 0.3718882888342459, "learning_rate": 0.0002960535055985175, "loss": 3.1236462593078613, "step": 4078, "token_acc": 0.28655080606961436 }, { "epoch": 2.3910876575784226, "grad_norm": 0.33925481837890226, "learning_rate": 0.00029605019201430806, "loss": 3.1145498752593994, "step": 4079, "token_acc": 0.28739140468551094 }, { "epoch": 2.3916739958956317, "grad_norm": 0.33448650446974554, "learning_rate": 0.00029604687705815386, "loss": 3.0845329761505127, "step": 4080, "token_acc": 0.28863444367908714 }, { "epoch": 2.392260334212841, "grad_norm": 0.37587453914622954, "learning_rate": 0.00029604356073008607, "loss": 3.1235480308532715, "step": 4081, "token_acc": 0.2857210148246039 }, { "epoch": 2.39284667253005, "grad_norm": 0.36603099398672506, "learning_rate": 0.00029604024303013575, "loss": 3.1164331436157227, "step": 4082, "token_acc": 0.2866449167206013 }, { "epoch": 2.393433010847259, "grad_norm": 0.3221779260161908, "learning_rate": 0.0002960369239583342, "loss": 3.12549090385437, "step": 4083, "token_acc": 0.2851708070510063 }, { "epoch": 2.394019349164468, "grad_norm": 0.3880953580475413, "learning_rate": 0.00029603360351471256, "loss": 3.117244243621826, "step": 4084, "token_acc": 0.2858404635602847 }, { "epoch": 2.394605687481677, "grad_norm": 0.3736562404370773, "learning_rate": 0.000296030281699302, "loss": 3.1411666870117188, "step": 4085, "token_acc": 0.2826039682560476 }, { "epoch": 2.395192025798886, "grad_norm": 0.29477651727289295, "learning_rate": 0.00029602695851213367, "loss": 3.1056132316589355, "step": 4086, "token_acc": 0.28745504023964474 }, { "epoch": 2.395778364116095, "grad_norm": 0.3461508578510441, "learning_rate": 0.00029602363395323883, "loss": 3.16405987739563, "step": 4087, "token_acc": 0.27963235680849946 }, { "epoch": 2.396364702433304, "grad_norm": 0.287510992254556, "learning_rate": 0.00029602030802264876, "loss": 3.0820648670196533, "step": 4088, "token_acc": 0.29155795726978767 }, { "epoch": 2.3969510407505132, "grad_norm": 0.3568126469643849, "learning_rate": 0.00029601698072039467, "loss": 3.1050209999084473, "step": 4089, "token_acc": 0.288143938782429 }, { "epoch": 2.397537379067722, "grad_norm": 0.31551141678308997, "learning_rate": 0.00029601365204650774, "loss": 3.149054765701294, "step": 4090, "token_acc": 0.28167853753662936 }, { "epoch": 2.398123717384931, "grad_norm": 0.29643560180878287, "learning_rate": 0.00029601032200101936, "loss": 3.160217761993408, "step": 4091, "token_acc": 0.2808048465922728 }, { "epoch": 2.39871005570214, "grad_norm": 0.34984919943191994, "learning_rate": 0.00029600699058396075, "loss": 3.129704713821411, "step": 4092, "token_acc": 0.2846545691097856 }, { "epoch": 2.3992963940193492, "grad_norm": 0.322181122170411, "learning_rate": 0.0002960036577953632, "loss": 3.063467025756836, "step": 4093, "token_acc": 0.2933167576508652 }, { "epoch": 2.3998827323365584, "grad_norm": 0.3207150039030181, "learning_rate": 0.00029600032363525806, "loss": 3.1432952880859375, "step": 4094, "token_acc": 0.2822501724608153 }, { "epoch": 2.4004690706537675, "grad_norm": 0.299801657605114, "learning_rate": 0.00029599698810367655, "loss": 3.138869285583496, "step": 4095, "token_acc": 0.28362501828109005 }, { "epoch": 2.401055408970976, "grad_norm": 0.3576568958487254, "learning_rate": 0.0002959936512006501, "loss": 3.1874008178710938, "step": 4096, "token_acc": 0.2764936752774024 }, { "epoch": 2.4016417472881852, "grad_norm": 0.3757895284805148, "learning_rate": 0.00029599031292621005, "loss": 3.1302578449249268, "step": 4097, "token_acc": 0.2847600886200164 }, { "epoch": 2.4022280856053944, "grad_norm": 0.4290185213415451, "learning_rate": 0.00029598697328038774, "loss": 3.1395249366760254, "step": 4098, "token_acc": 0.28380919067815136 }, { "epoch": 2.4028144239226035, "grad_norm": 0.32681236661979357, "learning_rate": 0.0002959836322632145, "loss": 3.146493911743164, "step": 4099, "token_acc": 0.28295107013974446 }, { "epoch": 2.403400762239812, "grad_norm": 0.35674182153429684, "learning_rate": 0.00029598028987472177, "loss": 3.1419923305511475, "step": 4100, "token_acc": 0.28420438104424195 }, { "epoch": 2.4039871005570213, "grad_norm": 0.33575957290197384, "learning_rate": 0.00029597694611494094, "loss": 3.1567134857177734, "step": 4101, "token_acc": 0.28247825707255175 }, { "epoch": 2.4045734388742304, "grad_norm": 0.31078131968223355, "learning_rate": 0.00029597360098390337, "loss": 3.104005813598633, "step": 4102, "token_acc": 0.2887838858796563 }, { "epoch": 2.4051597771914395, "grad_norm": 0.43178801483634843, "learning_rate": 0.00029597025448164057, "loss": 3.125809669494629, "step": 4103, "token_acc": 0.2842391360299795 }, { "epoch": 2.4057461155086486, "grad_norm": 0.3594254704889613, "learning_rate": 0.00029596690660818386, "loss": 3.1811869144439697, "step": 4104, "token_acc": 0.27947864742940565 }, { "epoch": 2.4063324538258577, "grad_norm": 0.39445662325112135, "learning_rate": 0.0002959635573635648, "loss": 3.1588358879089355, "step": 4105, "token_acc": 0.2812822816369751 }, { "epoch": 2.4069187921430664, "grad_norm": 0.3231290976746693, "learning_rate": 0.00029596020674781484, "loss": 3.1087775230407715, "step": 4106, "token_acc": 0.28906112825143604 }, { "epoch": 2.4075051304602755, "grad_norm": 0.3867762431882229, "learning_rate": 0.00029595685476096535, "loss": 3.101858139038086, "step": 4107, "token_acc": 0.2887637087163549 }, { "epoch": 2.4080914687774846, "grad_norm": 0.34439915688176387, "learning_rate": 0.00029595350140304794, "loss": 3.091704845428467, "step": 4108, "token_acc": 0.2893276706056091 }, { "epoch": 2.4086778070946937, "grad_norm": 0.32005193007743693, "learning_rate": 0.00029595014667409405, "loss": 3.1325976848602295, "step": 4109, "token_acc": 0.28550594876399144 }, { "epoch": 2.409264145411903, "grad_norm": 0.29270829279910404, "learning_rate": 0.00029594679057413513, "loss": 3.1128897666931152, "step": 4110, "token_acc": 0.28656458429255266 }, { "epoch": 2.4098504837291115, "grad_norm": 0.3075684205270804, "learning_rate": 0.0002959434331032029, "loss": 3.10868501663208, "step": 4111, "token_acc": 0.28729127516778524 }, { "epoch": 2.4104368220463206, "grad_norm": 0.3056223033297288, "learning_rate": 0.0002959400742613287, "loss": 3.127923011779785, "step": 4112, "token_acc": 0.2841130188752374 }, { "epoch": 2.4110231603635297, "grad_norm": 0.3362550594282149, "learning_rate": 0.0002959367140485442, "loss": 3.0786402225494385, "step": 4113, "token_acc": 0.29261380200213444 }, { "epoch": 2.411609498680739, "grad_norm": 0.3472997632138236, "learning_rate": 0.00029593335246488086, "loss": 3.125199317932129, "step": 4114, "token_acc": 0.2868676800398704 }, { "epoch": 2.412195836997948, "grad_norm": 0.27041974597209306, "learning_rate": 0.0002959299895103704, "loss": 3.1392602920532227, "step": 4115, "token_acc": 0.2850552054607795 }, { "epoch": 2.412782175315157, "grad_norm": 0.3239632986517963, "learning_rate": 0.00029592662518504426, "loss": 3.155673027038574, "step": 4116, "token_acc": 0.28069993760727796 }, { "epoch": 2.4133685136323657, "grad_norm": 0.33626305069370355, "learning_rate": 0.00029592325948893416, "loss": 3.1176717281341553, "step": 4117, "token_acc": 0.28786474361351083 }, { "epoch": 2.413954851949575, "grad_norm": 0.31075485068079367, "learning_rate": 0.00029591989242207164, "loss": 3.1246867179870605, "step": 4118, "token_acc": 0.2846070105547303 }, { "epoch": 2.414541190266784, "grad_norm": 0.3088628753353623, "learning_rate": 0.0002959165239844884, "loss": 3.1521925926208496, "step": 4119, "token_acc": 0.2830860155279586 }, { "epoch": 2.415127528583993, "grad_norm": 0.4136812844884011, "learning_rate": 0.00029591315417621604, "loss": 3.1566102504730225, "step": 4120, "token_acc": 0.27947669498006505 }, { "epoch": 2.415713866901202, "grad_norm": 0.40448314318857753, "learning_rate": 0.0002959097829972862, "loss": 3.1274571418762207, "step": 4121, "token_acc": 0.2851610273134937 }, { "epoch": 2.416300205218411, "grad_norm": 0.41654616543111284, "learning_rate": 0.0002959064104477305, "loss": 3.1164355278015137, "step": 4122, "token_acc": 0.28624112579068317 }, { "epoch": 2.41688654353562, "grad_norm": 0.3475172403706704, "learning_rate": 0.0002959030365275808, "loss": 3.093113422393799, "step": 4123, "token_acc": 0.2882727811314108 }, { "epoch": 2.417472881852829, "grad_norm": 0.35205889002219554, "learning_rate": 0.0002958996612368686, "loss": 3.1584134101867676, "step": 4124, "token_acc": 0.27981491298406197 }, { "epoch": 2.418059220170038, "grad_norm": 0.3546336626181856, "learning_rate": 0.00029589628457562573, "loss": 3.1534931659698486, "step": 4125, "token_acc": 0.2797648784003778 }, { "epoch": 2.4186455584872473, "grad_norm": 0.3755933268778347, "learning_rate": 0.00029589290654388387, "loss": 3.133286476135254, "step": 4126, "token_acc": 0.28505585293609786 }, { "epoch": 2.4192318968044564, "grad_norm": 0.3835454247674965, "learning_rate": 0.00029588952714167474, "loss": 3.1292192935943604, "step": 4127, "token_acc": 0.28310922821882484 }, { "epoch": 2.419818235121665, "grad_norm": 0.31516398047155963, "learning_rate": 0.00029588614636903005, "loss": 3.1010119915008545, "step": 4128, "token_acc": 0.2893087448070603 }, { "epoch": 2.420404573438874, "grad_norm": 0.32727412237861264, "learning_rate": 0.0002958827642259816, "loss": 3.1238107681274414, "step": 4129, "token_acc": 0.285604711395504 }, { "epoch": 2.4209909117560833, "grad_norm": 0.3358974419579676, "learning_rate": 0.00029587938071256117, "loss": 3.11397123336792, "step": 4130, "token_acc": 0.28599735305333523 }, { "epoch": 2.4215772500732924, "grad_norm": 0.3249341829871764, "learning_rate": 0.0002958759958288006, "loss": 3.1062324047088623, "step": 4131, "token_acc": 0.28723074201232285 }, { "epoch": 2.4221635883905015, "grad_norm": 0.3176484182382069, "learning_rate": 0.00029587260957473154, "loss": 3.098982334136963, "step": 4132, "token_acc": 0.28952504018238967 }, { "epoch": 2.42274992670771, "grad_norm": 0.3103169039732458, "learning_rate": 0.0002958692219503859, "loss": 3.1370491981506348, "step": 4133, "token_acc": 0.2850324688259432 }, { "epoch": 2.4233362650249193, "grad_norm": 0.30764324934479886, "learning_rate": 0.0002958658329557955, "loss": 3.0795207023620605, "step": 4134, "token_acc": 0.2911532715789686 }, { "epoch": 2.4239226033421284, "grad_norm": 0.3174245833609015, "learning_rate": 0.00029586244259099216, "loss": 3.112072467803955, "step": 4135, "token_acc": 0.28790613019401307 }, { "epoch": 2.4245089416593375, "grad_norm": 0.3591540576913985, "learning_rate": 0.0002958590508560077, "loss": 3.0682766437530518, "step": 4136, "token_acc": 0.29346222959518836 }, { "epoch": 2.4250952799765466, "grad_norm": 0.33661657399529404, "learning_rate": 0.000295855657750874, "loss": 3.168714761734009, "step": 4137, "token_acc": 0.2796815787343068 }, { "epoch": 2.4256816182937557, "grad_norm": 0.29427638079772583, "learning_rate": 0.00029585226327562297, "loss": 3.1352345943450928, "step": 4138, "token_acc": 0.28466872525391634 }, { "epoch": 2.4262679566109644, "grad_norm": 0.3470082969552848, "learning_rate": 0.00029584886743028643, "loss": 3.1168084144592285, "step": 4139, "token_acc": 0.28691008728684264 }, { "epoch": 2.4268542949281735, "grad_norm": 0.3416335169278841, "learning_rate": 0.0002958454702148963, "loss": 3.1049888134002686, "step": 4140, "token_acc": 0.2884488655976402 }, { "epoch": 2.4274406332453826, "grad_norm": 0.27829244556515836, "learning_rate": 0.00029584207162948456, "loss": 3.1409943103790283, "step": 4141, "token_acc": 0.282409726096716 }, { "epoch": 2.4280269715625917, "grad_norm": 0.3242347918917184, "learning_rate": 0.00029583867167408303, "loss": 3.104142665863037, "step": 4142, "token_acc": 0.28715826500788866 }, { "epoch": 2.4286133098798004, "grad_norm": 0.30335562857582377, "learning_rate": 0.00029583527034872376, "loss": 3.1046061515808105, "step": 4143, "token_acc": 0.2888985780164989 }, { "epoch": 2.4291996481970095, "grad_norm": 0.29787614919578354, "learning_rate": 0.0002958318676534386, "loss": 3.1188926696777344, "step": 4144, "token_acc": 0.28509726368729016 }, { "epoch": 2.4297859865142186, "grad_norm": 0.3163667983951163, "learning_rate": 0.0002958284635882595, "loss": 3.1389687061309814, "step": 4145, "token_acc": 0.28454566048500546 }, { "epoch": 2.4303723248314277, "grad_norm": 0.3853796205601756, "learning_rate": 0.00029582505815321856, "loss": 3.1683313846588135, "step": 4146, "token_acc": 0.2784516181484662 }, { "epoch": 2.430958663148637, "grad_norm": 0.3592960418412085, "learning_rate": 0.00029582165134834775, "loss": 3.1298131942749023, "step": 4147, "token_acc": 0.28345887700125116 }, { "epoch": 2.431545001465846, "grad_norm": 0.30809064151861587, "learning_rate": 0.00029581824317367894, "loss": 3.1052701473236084, "step": 4148, "token_acc": 0.28943949794290025 }, { "epoch": 2.432131339783055, "grad_norm": 0.35784324141670515, "learning_rate": 0.0002958148336292443, "loss": 3.1838088035583496, "step": 4149, "token_acc": 0.2778973586298084 }, { "epoch": 2.4327176781002637, "grad_norm": 0.3307604483002467, "learning_rate": 0.0002958114227150757, "loss": 3.136889934539795, "step": 4150, "token_acc": 0.2853696690702047 }, { "epoch": 2.433304016417473, "grad_norm": 0.3252739757065157, "learning_rate": 0.0002958080104312053, "loss": 3.165478229522705, "step": 4151, "token_acc": 0.2797660812679283 }, { "epoch": 2.433890354734682, "grad_norm": 0.32482038551705, "learning_rate": 0.00029580459677766514, "loss": 3.123816967010498, "step": 4152, "token_acc": 0.2862042351651999 }, { "epoch": 2.434476693051891, "grad_norm": 0.3435108623904546, "learning_rate": 0.0002958011817544872, "loss": 3.073190689086914, "step": 4153, "token_acc": 0.2925355156599344 }, { "epoch": 2.4350630313690997, "grad_norm": 0.32125023117817675, "learning_rate": 0.00029579776536170374, "loss": 3.0986857414245605, "step": 4154, "token_acc": 0.2880927477855091 }, { "epoch": 2.435649369686309, "grad_norm": 0.3120668006854101, "learning_rate": 0.00029579434759934665, "loss": 3.1393611431121826, "step": 4155, "token_acc": 0.2843403086630424 }, { "epoch": 2.436235708003518, "grad_norm": 0.28873752260934354, "learning_rate": 0.00029579092846744815, "loss": 3.107593297958374, "step": 4156, "token_acc": 0.28766502000984384 }, { "epoch": 2.436822046320727, "grad_norm": 0.34062789305957064, "learning_rate": 0.00029578750796604037, "loss": 3.1653482913970947, "step": 4157, "token_acc": 0.2784347401344037 }, { "epoch": 2.437408384637936, "grad_norm": 0.34622514319925485, "learning_rate": 0.00029578408609515536, "loss": 3.1186602115631104, "step": 4158, "token_acc": 0.2868462547886638 }, { "epoch": 2.4379947229551453, "grad_norm": 0.28138463398848174, "learning_rate": 0.0002957806628548253, "loss": 3.163783550262451, "step": 4159, "token_acc": 0.28030947653714033 }, { "epoch": 2.438581061272354, "grad_norm": 0.3067602183936032, "learning_rate": 0.0002957772382450824, "loss": 3.124952793121338, "step": 4160, "token_acc": 0.2844958357523336 }, { "epoch": 2.439167399589563, "grad_norm": 0.3243698449715789, "learning_rate": 0.00029577381226595875, "loss": 3.110630750656128, "step": 4161, "token_acc": 0.28887556847056983 }, { "epoch": 2.439753737906772, "grad_norm": 0.29078139061430774, "learning_rate": 0.0002957703849174866, "loss": 3.127685785293579, "step": 4162, "token_acc": 0.2845871197576688 }, { "epoch": 2.4403400762239813, "grad_norm": 0.3152966473782495, "learning_rate": 0.000295766956199698, "loss": 3.128417730331421, "step": 4163, "token_acc": 0.28486416347270455 }, { "epoch": 2.4409264145411904, "grad_norm": 0.29299622687503285, "learning_rate": 0.0002957635261126254, "loss": 3.1237001419067383, "step": 4164, "token_acc": 0.28450736414423566 }, { "epoch": 2.441512752858399, "grad_norm": 0.27848354177585716, "learning_rate": 0.00029576009465630086, "loss": 3.1376285552978516, "step": 4165, "token_acc": 0.28463043618879236 }, { "epoch": 2.442099091175608, "grad_norm": 0.3083234842261065, "learning_rate": 0.00029575666183075664, "loss": 3.114762306213379, "step": 4166, "token_acc": 0.2863711539741627 }, { "epoch": 2.4426854294928173, "grad_norm": 0.29221217452966763, "learning_rate": 0.00029575322763602494, "loss": 3.1787891387939453, "step": 4167, "token_acc": 0.2788001186707784 }, { "epoch": 2.4432717678100264, "grad_norm": 0.28847867120031484, "learning_rate": 0.0002957497920721382, "loss": 3.122265338897705, "step": 4168, "token_acc": 0.2864476797818153 }, { "epoch": 2.4438581061272355, "grad_norm": 0.2843944230967726, "learning_rate": 0.00029574635513912844, "loss": 3.1669845581054688, "step": 4169, "token_acc": 0.279684713733333 }, { "epoch": 2.4444444444444446, "grad_norm": 0.3236609233308892, "learning_rate": 0.0002957429168370281, "loss": 3.1467251777648926, "step": 4170, "token_acc": 0.28437059180004787 }, { "epoch": 2.4450307827616533, "grad_norm": 0.3280662803360395, "learning_rate": 0.0002957394771658695, "loss": 3.132016897201538, "step": 4171, "token_acc": 0.2850108107548907 }, { "epoch": 2.4456171210788624, "grad_norm": 0.3181695316937913, "learning_rate": 0.0002957360361256848, "loss": 3.144162178039551, "step": 4172, "token_acc": 0.28185316826731494 }, { "epoch": 2.4462034593960715, "grad_norm": 0.3250763790010029, "learning_rate": 0.0002957325937165065, "loss": 3.1430256366729736, "step": 4173, "token_acc": 0.2826527076075656 }, { "epoch": 2.4467897977132806, "grad_norm": 0.31958522365760733, "learning_rate": 0.00029572914993836684, "loss": 3.1445913314819336, "step": 4174, "token_acc": 0.2827902411174186 }, { "epoch": 2.4473761360304898, "grad_norm": 0.3243449422808361, "learning_rate": 0.0002957257047912981, "loss": 3.0831727981567383, "step": 4175, "token_acc": 0.2929087118226796 }, { "epoch": 2.4479624743476984, "grad_norm": 0.34987616331853155, "learning_rate": 0.00029572225827533287, "loss": 3.166348934173584, "step": 4176, "token_acc": 0.27772515383181057 }, { "epoch": 2.4485488126649075, "grad_norm": 0.3374678325520746, "learning_rate": 0.00029571881039050334, "loss": 3.1206789016723633, "step": 4177, "token_acc": 0.2860181635561445 }, { "epoch": 2.4491351509821166, "grad_norm": 0.29682492344740724, "learning_rate": 0.0002957153611368419, "loss": 3.128352403640747, "step": 4178, "token_acc": 0.28411640934848004 }, { "epoch": 2.4497214892993258, "grad_norm": 0.34839576578136816, "learning_rate": 0.00029571191051438106, "loss": 3.1155009269714355, "step": 4179, "token_acc": 0.2866963650262277 }, { "epoch": 2.450307827616535, "grad_norm": 0.3560622198599429, "learning_rate": 0.00029570845852315314, "loss": 3.1029927730560303, "step": 4180, "token_acc": 0.2887865535764464 }, { "epoch": 2.450894165933744, "grad_norm": 0.32997705390420234, "learning_rate": 0.00029570500516319057, "loss": 3.121243953704834, "step": 4181, "token_acc": 0.2856549212593468 }, { "epoch": 2.4514805042509527, "grad_norm": 0.360099242951122, "learning_rate": 0.00029570155043452586, "loss": 3.1484968662261963, "step": 4182, "token_acc": 0.2834438609469389 }, { "epoch": 2.4520668425681618, "grad_norm": 0.356534687717105, "learning_rate": 0.00029569809433719143, "loss": 3.163255214691162, "step": 4183, "token_acc": 0.27985202242116175 }, { "epoch": 2.452653180885371, "grad_norm": 0.3086882308229312, "learning_rate": 0.0002956946368712197, "loss": 3.1430625915527344, "step": 4184, "token_acc": 0.2842363300621709 }, { "epoch": 2.45323951920258, "grad_norm": 0.3763624685664077, "learning_rate": 0.0002956911780366432, "loss": 3.107767105102539, "step": 4185, "token_acc": 0.285120833716597 }, { "epoch": 2.453825857519789, "grad_norm": 0.3669182500537939, "learning_rate": 0.00029568771783349435, "loss": 3.0617265701293945, "step": 4186, "token_acc": 0.2956620297705423 }, { "epoch": 2.4544121958369978, "grad_norm": 0.34354353619456457, "learning_rate": 0.00029568425626180577, "loss": 3.1100916862487793, "step": 4187, "token_acc": 0.28686218640670647 }, { "epoch": 2.454998534154207, "grad_norm": 0.40416474070091496, "learning_rate": 0.00029568079332160994, "loss": 3.177902936935425, "step": 4188, "token_acc": 0.2778932235509377 }, { "epoch": 2.455584872471416, "grad_norm": 0.33395624337927426, "learning_rate": 0.0002956773290129393, "loss": 3.084484100341797, "step": 4189, "token_acc": 0.2905265292671179 }, { "epoch": 2.456171210788625, "grad_norm": 0.3242802555969176, "learning_rate": 0.0002956738633358265, "loss": 3.149378776550293, "step": 4190, "token_acc": 0.28139552263807177 }, { "epoch": 2.456757549105834, "grad_norm": 0.3539448464617524, "learning_rate": 0.000295670396290304, "loss": 3.0530712604522705, "step": 4191, "token_acc": 0.2935181586561299 }, { "epoch": 2.4573438874230433, "grad_norm": 0.3411228719522272, "learning_rate": 0.0002956669278764045, "loss": 3.079084634780884, "step": 4192, "token_acc": 0.29214418521592067 }, { "epoch": 2.457930225740252, "grad_norm": 0.3277844080736803, "learning_rate": 0.0002956634580941604, "loss": 3.1283328533172607, "step": 4193, "token_acc": 0.28482219722229984 }, { "epoch": 2.458516564057461, "grad_norm": 0.33957628128592593, "learning_rate": 0.00029565998694360446, "loss": 3.1166884899139404, "step": 4194, "token_acc": 0.2857177948268662 }, { "epoch": 2.45910290237467, "grad_norm": 0.3484167500081368, "learning_rate": 0.0002956565144247692, "loss": 3.1315340995788574, "step": 4195, "token_acc": 0.287011826544021 }, { "epoch": 2.4596892406918793, "grad_norm": 0.34330509317864644, "learning_rate": 0.0002956530405376873, "loss": 3.150961399078369, "step": 4196, "token_acc": 0.2808462914892008 }, { "epoch": 2.460275579009088, "grad_norm": 0.41202732951419013, "learning_rate": 0.00029564956528239125, "loss": 3.1293752193450928, "step": 4197, "token_acc": 0.28497534499558436 }, { "epoch": 2.460861917326297, "grad_norm": 0.43297067789715016, "learning_rate": 0.0002956460886589139, "loss": 3.110107421875, "step": 4198, "token_acc": 0.28839363410099 }, { "epoch": 2.4614482556435062, "grad_norm": 0.36172252475118377, "learning_rate": 0.00029564261066728783, "loss": 3.1329855918884277, "step": 4199, "token_acc": 0.283375803307192 }, { "epoch": 2.4620345939607153, "grad_norm": 0.36295689629908084, "learning_rate": 0.0002956391313075456, "loss": 3.080261707305908, "step": 4200, "token_acc": 0.29044826615523445 }, { "epoch": 2.4626209322779244, "grad_norm": 0.36560847232008425, "learning_rate": 0.00029563565057972, "loss": 3.1188130378723145, "step": 4201, "token_acc": 0.2876926082848662 }, { "epoch": 2.4632072705951336, "grad_norm": 0.36342945049757625, "learning_rate": 0.00029563216848384373, "loss": 3.1749165058135986, "step": 4202, "token_acc": 0.27960442153960224 }, { "epoch": 2.4637936089123427, "grad_norm": 0.34904288908233316, "learning_rate": 0.0002956286850199495, "loss": 3.188642978668213, "step": 4203, "token_acc": 0.2780057345605467 }, { "epoch": 2.4643799472295513, "grad_norm": 0.3487022381206377, "learning_rate": 0.00029562520018807, "loss": 3.114593505859375, "step": 4204, "token_acc": 0.2870092885848881 }, { "epoch": 2.4649662855467604, "grad_norm": 0.32716583505502966, "learning_rate": 0.000295621713988238, "loss": 3.138054132461548, "step": 4205, "token_acc": 0.28321283118422536 }, { "epoch": 2.4655526238639696, "grad_norm": 0.35143450296374307, "learning_rate": 0.00029561822642048613, "loss": 3.1015212535858154, "step": 4206, "token_acc": 0.28827525409666044 }, { "epoch": 2.4661389621811787, "grad_norm": 0.33099857728937004, "learning_rate": 0.00029561473748484737, "loss": 3.117356777191162, "step": 4207, "token_acc": 0.2853810124069735 }, { "epoch": 2.4667253004983873, "grad_norm": 0.3614354394189754, "learning_rate": 0.0002956112471813543, "loss": 3.1570494174957275, "step": 4208, "token_acc": 0.28168092876219547 }, { "epoch": 2.4673116388155965, "grad_norm": 0.28021653797624285, "learning_rate": 0.00029560775551003976, "loss": 3.094374418258667, "step": 4209, "token_acc": 0.28876766764975187 }, { "epoch": 2.4678979771328056, "grad_norm": 0.3315945644434655, "learning_rate": 0.0002956042624709366, "loss": 3.121507167816162, "step": 4210, "token_acc": 0.2856035916366763 }, { "epoch": 2.4684843154500147, "grad_norm": 0.2872596819165396, "learning_rate": 0.0002956007680640776, "loss": 3.13769793510437, "step": 4211, "token_acc": 0.2836105192037836 }, { "epoch": 2.469070653767224, "grad_norm": 0.32233988174839634, "learning_rate": 0.00029559727228949557, "loss": 3.1804490089416504, "step": 4212, "token_acc": 0.2776913749815006 }, { "epoch": 2.469656992084433, "grad_norm": 0.29495945382982525, "learning_rate": 0.00029559377514722335, "loss": 3.1457433700561523, "step": 4213, "token_acc": 0.2807951485922437 }, { "epoch": 2.4702433304016416, "grad_norm": 0.32294445777366326, "learning_rate": 0.0002955902766372938, "loss": 3.1555449962615967, "step": 4214, "token_acc": 0.28004664058522327 }, { "epoch": 2.4708296687188507, "grad_norm": 0.3333422555111827, "learning_rate": 0.00029558677675973983, "loss": 3.1423935890197754, "step": 4215, "token_acc": 0.2835082075886914 }, { "epoch": 2.47141600703606, "grad_norm": 0.31116082835263803, "learning_rate": 0.00029558327551459424, "loss": 3.0912275314331055, "step": 4216, "token_acc": 0.28784021930683373 }, { "epoch": 2.472002345353269, "grad_norm": 0.32525185116017097, "learning_rate": 0.0002955797729018899, "loss": 3.1119723320007324, "step": 4217, "token_acc": 0.2880186869450072 }, { "epoch": 2.472588683670478, "grad_norm": 0.3061107417915048, "learning_rate": 0.0002955762689216599, "loss": 3.0874617099761963, "step": 4218, "token_acc": 0.29053891318193736 }, { "epoch": 2.4731750219876867, "grad_norm": 0.3524930550252353, "learning_rate": 0.0002955727635739369, "loss": 3.1435797214508057, "step": 4219, "token_acc": 0.28136873087755826 }, { "epoch": 2.473761360304896, "grad_norm": 0.3253360138174695, "learning_rate": 0.00029556925685875397, "loss": 3.1105964183807373, "step": 4220, "token_acc": 0.2891587646342109 }, { "epoch": 2.474347698622105, "grad_norm": 0.3519338953765347, "learning_rate": 0.00029556574877614414, "loss": 3.1222329139709473, "step": 4221, "token_acc": 0.2862146354733406 }, { "epoch": 2.474934036939314, "grad_norm": 0.31087372963778626, "learning_rate": 0.0002955622393261401, "loss": 3.107698917388916, "step": 4222, "token_acc": 0.2850278306957142 }, { "epoch": 2.475520375256523, "grad_norm": 0.32991025649227784, "learning_rate": 0.0002955587285087751, "loss": 3.135448455810547, "step": 4223, "token_acc": 0.28400683129854276 }, { "epoch": 2.4761067135737322, "grad_norm": 0.36558085572983107, "learning_rate": 0.0002955552163240819, "loss": 3.1406960487365723, "step": 4224, "token_acc": 0.28434973724837065 }, { "epoch": 2.476693051890941, "grad_norm": 0.3510819427215274, "learning_rate": 0.0002955517027720936, "loss": 3.135343074798584, "step": 4225, "token_acc": 0.2850491544200243 }, { "epoch": 2.47727939020815, "grad_norm": 0.30928077986996405, "learning_rate": 0.0002955481878528431, "loss": 3.0975117683410645, "step": 4226, "token_acc": 0.2896353814107506 }, { "epoch": 2.477865728525359, "grad_norm": 0.34303876384753146, "learning_rate": 0.0002955446715663636, "loss": 3.1281652450561523, "step": 4227, "token_acc": 0.2843875799000313 }, { "epoch": 2.4784520668425682, "grad_norm": 0.288192394799522, "learning_rate": 0.00029554115391268806, "loss": 3.1197304725646973, "step": 4228, "token_acc": 0.2858469970737145 }, { "epoch": 2.4790384051597774, "grad_norm": 0.3202206229172926, "learning_rate": 0.00029553763489184945, "loss": 3.145132303237915, "step": 4229, "token_acc": 0.2832567360074038 }, { "epoch": 2.479624743476986, "grad_norm": 0.34367550376943795, "learning_rate": 0.00029553411450388095, "loss": 3.036426305770874, "step": 4230, "token_acc": 0.29639427888143616 }, { "epoch": 2.480211081794195, "grad_norm": 0.33272174101248536, "learning_rate": 0.0002955305927488155, "loss": 3.1014623641967773, "step": 4231, "token_acc": 0.287030085893128 }, { "epoch": 2.4807974201114043, "grad_norm": 0.2935401257400599, "learning_rate": 0.0002955270696266862, "loss": 3.1379213333129883, "step": 4232, "token_acc": 0.284338440517109 }, { "epoch": 2.4813837584286134, "grad_norm": 0.3113287323525847, "learning_rate": 0.0002955235451375262, "loss": 3.109300136566162, "step": 4233, "token_acc": 0.2875254626534641 }, { "epoch": 2.4819700967458225, "grad_norm": 0.355477084486217, "learning_rate": 0.00029552001928136856, "loss": 3.1507697105407715, "step": 4234, "token_acc": 0.28038723835859497 }, { "epoch": 2.4825564350630316, "grad_norm": 0.3335133025940925, "learning_rate": 0.0002955164920582465, "loss": 3.1125264167785645, "step": 4235, "token_acc": 0.28860916893668365 }, { "epoch": 2.4831427733802403, "grad_norm": 0.29447035353746276, "learning_rate": 0.00029551296346819303, "loss": 3.146637439727783, "step": 4236, "token_acc": 0.28278621073820404 }, { "epoch": 2.4837291116974494, "grad_norm": 0.3200699913914711, "learning_rate": 0.00029550943351124134, "loss": 3.1082613468170166, "step": 4237, "token_acc": 0.28849520710417476 }, { "epoch": 2.4843154500146585, "grad_norm": 0.3356610793835404, "learning_rate": 0.00029550590218742465, "loss": 3.1045634746551514, "step": 4238, "token_acc": 0.28982759931778435 }, { "epoch": 2.4849017883318676, "grad_norm": 0.29371548077260956, "learning_rate": 0.0002955023694967761, "loss": 3.129265785217285, "step": 4239, "token_acc": 0.2829359402722526 }, { "epoch": 2.4854881266490767, "grad_norm": 0.3162517737406707, "learning_rate": 0.0002954988354393288, "loss": 3.174086332321167, "step": 4240, "token_acc": 0.2786354225924685 }, { "epoch": 2.4860744649662854, "grad_norm": 0.3508128545201341, "learning_rate": 0.000295495300015116, "loss": 3.128365993499756, "step": 4241, "token_acc": 0.283498046133179 }, { "epoch": 2.4866608032834945, "grad_norm": 0.2696902572173051, "learning_rate": 0.0002954917632241709, "loss": 3.097126007080078, "step": 4242, "token_acc": 0.2902416885065304 }, { "epoch": 2.4872471416007036, "grad_norm": 0.315232384988462, "learning_rate": 0.00029548822506652683, "loss": 3.114626884460449, "step": 4243, "token_acc": 0.2863126863525797 }, { "epoch": 2.4878334799179127, "grad_norm": 0.3145377426585128, "learning_rate": 0.00029548468554221684, "loss": 3.122138500213623, "step": 4244, "token_acc": 0.2859493749043632 }, { "epoch": 2.488419818235122, "grad_norm": 0.34108521453424073, "learning_rate": 0.0002954811446512743, "loss": 3.1309239864349365, "step": 4245, "token_acc": 0.2838607985159848 }, { "epoch": 2.489006156552331, "grad_norm": 0.30384387372361843, "learning_rate": 0.0002954776023937325, "loss": 3.104358434677124, "step": 4246, "token_acc": 0.28820151101750774 }, { "epoch": 2.4895924948695396, "grad_norm": 0.31865600447866366, "learning_rate": 0.0002954740587696246, "loss": 3.1377036571502686, "step": 4247, "token_acc": 0.2829648969062782 }, { "epoch": 2.4901788331867487, "grad_norm": 0.32152378678503973, "learning_rate": 0.0002954705137789839, "loss": 3.120666027069092, "step": 4248, "token_acc": 0.2863717732936955 }, { "epoch": 2.490765171503958, "grad_norm": 0.31906300216078254, "learning_rate": 0.00029546696742184384, "loss": 3.1133618354797363, "step": 4249, "token_acc": 0.28531728022638086 }, { "epoch": 2.491351509821167, "grad_norm": 0.31299503493825803, "learning_rate": 0.0002954634196982376, "loss": 3.124864101409912, "step": 4250, "token_acc": 0.28509427853409597 }, { "epoch": 2.4919378481383756, "grad_norm": 0.2897941975931226, "learning_rate": 0.0002954598706081985, "loss": 3.1135191917419434, "step": 4251, "token_acc": 0.28712027120576616 }, { "epoch": 2.4925241864555847, "grad_norm": 0.3128995613373998, "learning_rate": 0.00029545632015176, "loss": 3.1187100410461426, "step": 4252, "token_acc": 0.2847487420397716 }, { "epoch": 2.493110524772794, "grad_norm": 0.31561856367490715, "learning_rate": 0.00029545276832895534, "loss": 3.1421236991882324, "step": 4253, "token_acc": 0.28182060835839107 }, { "epoch": 2.493696863090003, "grad_norm": 0.27803548320666305, "learning_rate": 0.00029544921513981794, "loss": 3.1107163429260254, "step": 4254, "token_acc": 0.2889379748124876 }, { "epoch": 2.494283201407212, "grad_norm": 0.314744966893636, "learning_rate": 0.00029544566058438117, "loss": 3.1327579021453857, "step": 4255, "token_acc": 0.28558351043289704 }, { "epoch": 2.494869539724421, "grad_norm": 0.33870765981748074, "learning_rate": 0.0002954421046626784, "loss": 3.112725019454956, "step": 4256, "token_acc": 0.28596951160306827 }, { "epoch": 2.49545587804163, "grad_norm": 0.3553777146326446, "learning_rate": 0.00029543854737474305, "loss": 3.1192259788513184, "step": 4257, "token_acc": 0.2855761277597506 }, { "epoch": 2.496042216358839, "grad_norm": 0.30900343562019056, "learning_rate": 0.0002954349887206085, "loss": 3.1153345108032227, "step": 4258, "token_acc": 0.28593679053019505 }, { "epoch": 2.496628554676048, "grad_norm": 0.32860506662179256, "learning_rate": 0.0002954314287003082, "loss": 3.067028522491455, "step": 4259, "token_acc": 0.29180275610508105 }, { "epoch": 2.497214892993257, "grad_norm": 0.31094513519671807, "learning_rate": 0.00029542786731387563, "loss": 3.0821661949157715, "step": 4260, "token_acc": 0.2925090483676379 }, { "epoch": 2.4978012313104663, "grad_norm": 0.3201587033810314, "learning_rate": 0.00029542430456134416, "loss": 3.166276454925537, "step": 4261, "token_acc": 0.280776188723648 }, { "epoch": 2.498387569627675, "grad_norm": 0.3270154881297289, "learning_rate": 0.0002954207404427474, "loss": 3.091919422149658, "step": 4262, "token_acc": 0.289708317076 }, { "epoch": 2.498973907944884, "grad_norm": 0.33923472568021196, "learning_rate": 0.00029541717495811865, "loss": 3.122898578643799, "step": 4263, "token_acc": 0.2856605681503482 }, { "epoch": 2.499560246262093, "grad_norm": 0.308202116241944, "learning_rate": 0.00029541360810749157, "loss": 3.1659488677978516, "step": 4264, "token_acc": 0.28156263896871236 }, { "epoch": 2.5001465845793023, "grad_norm": 0.3614750298077898, "learning_rate": 0.0002954100398908995, "loss": 3.114520788192749, "step": 4265, "token_acc": 0.28730132965772687 }, { "epoch": 2.5007329228965114, "grad_norm": 0.316542868780964, "learning_rate": 0.00029540647030837613, "loss": 3.1104397773742676, "step": 4266, "token_acc": 0.2872000652990724 }, { "epoch": 2.5013192612137205, "grad_norm": 0.32958127722448066, "learning_rate": 0.0002954028993599549, "loss": 3.1017701625823975, "step": 4267, "token_acc": 0.28864066111072206 }, { "epoch": 2.5019055995309296, "grad_norm": 0.2699079753546184, "learning_rate": 0.00029539932704566936, "loss": 3.1212515830993652, "step": 4268, "token_acc": 0.2850652823319054 }, { "epoch": 2.5024919378481383, "grad_norm": 0.32981919795996034, "learning_rate": 0.00029539575336555305, "loss": 3.1233763694763184, "step": 4269, "token_acc": 0.28456277541132735 }, { "epoch": 2.5030782761653474, "grad_norm": 0.28232892417559213, "learning_rate": 0.00029539217831963955, "loss": 3.158236503601074, "step": 4270, "token_acc": 0.28058296519549153 }, { "epoch": 2.5036646144825565, "grad_norm": 0.3192177218110458, "learning_rate": 0.0002953886019079625, "loss": 3.143063545227051, "step": 4271, "token_acc": 0.2840551930575431 }, { "epoch": 2.5042509527997656, "grad_norm": 0.28279937988709414, "learning_rate": 0.00029538502413055536, "loss": 3.093001127243042, "step": 4272, "token_acc": 0.2891821231001556 }, { "epoch": 2.5048372911169743, "grad_norm": 0.31487389756325257, "learning_rate": 0.0002953814449874519, "loss": 3.128673791885376, "step": 4273, "token_acc": 0.2821220661037056 }, { "epoch": 2.5054236294341834, "grad_norm": 0.31742831419528206, "learning_rate": 0.0002953778644786856, "loss": 3.1556921005249023, "step": 4274, "token_acc": 0.28313281848353333 }, { "epoch": 2.5060099677513925, "grad_norm": 0.31815936401486744, "learning_rate": 0.0002953742826042903, "loss": 3.1049160957336426, "step": 4275, "token_acc": 0.28871999872758025 }, { "epoch": 2.5065963060686016, "grad_norm": 0.3503645021974508, "learning_rate": 0.00029537069936429937, "loss": 3.157719135284424, "step": 4276, "token_acc": 0.28207544399627343 }, { "epoch": 2.5071826443858107, "grad_norm": 0.3594402892783687, "learning_rate": 0.00029536711475874666, "loss": 3.1241660118103027, "step": 4277, "token_acc": 0.2852489383913784 }, { "epoch": 2.50776898270302, "grad_norm": 0.34849343226314317, "learning_rate": 0.0002953635287876658, "loss": 3.1454551219940186, "step": 4278, "token_acc": 0.28227923769095253 }, { "epoch": 2.5083553210202285, "grad_norm": 0.3585555438063583, "learning_rate": 0.00029535994145109046, "loss": 3.1272215843200684, "step": 4279, "token_acc": 0.28581487898855157 }, { "epoch": 2.5089416593374376, "grad_norm": 0.3351317717773386, "learning_rate": 0.00029535635274905434, "loss": 3.1277172565460205, "step": 4280, "token_acc": 0.2848978675410932 }, { "epoch": 2.5095279976546467, "grad_norm": 0.30607508534974665, "learning_rate": 0.0002953527626815912, "loss": 3.1315340995788574, "step": 4281, "token_acc": 0.28478914624250173 }, { "epoch": 2.510114335971856, "grad_norm": 0.2918488155099912, "learning_rate": 0.0002953491712487347, "loss": 3.1241111755371094, "step": 4282, "token_acc": 0.2853506817766011 }, { "epoch": 2.5107006742890645, "grad_norm": 0.2994312593529727, "learning_rate": 0.0002953455784505185, "loss": 3.1180672645568848, "step": 4283, "token_acc": 0.285654876787409 }, { "epoch": 2.5112870126062736, "grad_norm": 0.30416809637775405, "learning_rate": 0.00029534198428697654, "loss": 3.1356091499328613, "step": 4284, "token_acc": 0.28221616205439687 }, { "epoch": 2.5118733509234827, "grad_norm": 0.32496246652529037, "learning_rate": 0.0002953383887581425, "loss": 3.149949312210083, "step": 4285, "token_acc": 0.2832623024591683 }, { "epoch": 2.512459689240692, "grad_norm": 0.2956962743833167, "learning_rate": 0.0002953347918640501, "loss": 3.131580352783203, "step": 4286, "token_acc": 0.2859376464943388 }, { "epoch": 2.513046027557901, "grad_norm": 0.2784091394795092, "learning_rate": 0.00029533119360473323, "loss": 3.1316494941711426, "step": 4287, "token_acc": 0.2856677482653807 }, { "epoch": 2.51363236587511, "grad_norm": 0.34004882817463417, "learning_rate": 0.00029532759398022553, "loss": 3.1386194229125977, "step": 4288, "token_acc": 0.2831550623167646 }, { "epoch": 2.514218704192319, "grad_norm": 0.3702305124962639, "learning_rate": 0.000295323992990561, "loss": 3.1292080879211426, "step": 4289, "token_acc": 0.2844236817228404 }, { "epoch": 2.514805042509528, "grad_norm": 0.33489668508845444, "learning_rate": 0.00029532039063577336, "loss": 3.0976943969726562, "step": 4290, "token_acc": 0.28858561562522583 }, { "epoch": 2.515391380826737, "grad_norm": 0.300478838592618, "learning_rate": 0.00029531678691589643, "loss": 3.109555244445801, "step": 4291, "token_acc": 0.2864804367558189 }, { "epoch": 2.515977719143946, "grad_norm": 0.31555278485035515, "learning_rate": 0.0002953131818309642, "loss": 3.131467819213867, "step": 4292, "token_acc": 0.2852478495595795 }, { "epoch": 2.516564057461155, "grad_norm": 0.32770524376508636, "learning_rate": 0.00029530957538101036, "loss": 3.1313812732696533, "step": 4293, "token_acc": 0.28433728077215836 }, { "epoch": 2.517150395778364, "grad_norm": 0.3269669908293961, "learning_rate": 0.00029530596756606887, "loss": 3.1337761878967285, "step": 4294, "token_acc": 0.28435513657739203 }, { "epoch": 2.517736734095573, "grad_norm": 0.34938551979373583, "learning_rate": 0.00029530235838617366, "loss": 3.1000711917877197, "step": 4295, "token_acc": 0.28843630325915215 }, { "epoch": 2.518323072412782, "grad_norm": 0.2869628877369993, "learning_rate": 0.00029529874784135855, "loss": 3.114349365234375, "step": 4296, "token_acc": 0.28658154597579105 }, { "epoch": 2.518909410729991, "grad_norm": 0.3050272365898088, "learning_rate": 0.00029529513593165753, "loss": 3.1176042556762695, "step": 4297, "token_acc": 0.2866637739605401 }, { "epoch": 2.5194957490472003, "grad_norm": 0.30225819434537354, "learning_rate": 0.0002952915226571045, "loss": 3.159348964691162, "step": 4298, "token_acc": 0.2810411175675745 }, { "epoch": 2.5200820873644094, "grad_norm": 0.2856017547883984, "learning_rate": 0.00029528790801773336, "loss": 3.13706636428833, "step": 4299, "token_acc": 0.28298534217747157 }, { "epoch": 2.5206684256816185, "grad_norm": 0.31889563939603155, "learning_rate": 0.00029528429201357813, "loss": 3.126056671142578, "step": 4300, "token_acc": 0.28436832246339155 }, { "epoch": 2.521254763998827, "grad_norm": 0.29718437847148615, "learning_rate": 0.0002952806746446727, "loss": 3.1044211387634277, "step": 4301, "token_acc": 0.287364331491237 }, { "epoch": 2.5218411023160363, "grad_norm": 0.2812343284948562, "learning_rate": 0.0002952770559110511, "loss": 3.110229015350342, "step": 4302, "token_acc": 0.2855062331047399 }, { "epoch": 2.5224274406332454, "grad_norm": 0.29906768468566935, "learning_rate": 0.0002952734358127474, "loss": 3.117032527923584, "step": 4303, "token_acc": 0.28555354610657846 }, { "epoch": 2.5230137789504545, "grad_norm": 0.3017265818305121, "learning_rate": 0.00029526981434979546, "loss": 3.098659038543701, "step": 4304, "token_acc": 0.28831436756292356 }, { "epoch": 2.523600117267663, "grad_norm": 0.3150195391158688, "learning_rate": 0.0002952661915222294, "loss": 3.1114182472229004, "step": 4305, "token_acc": 0.28765731802027455 }, { "epoch": 2.5241864555848723, "grad_norm": 0.3178407889061973, "learning_rate": 0.0002952625673300832, "loss": 3.1247172355651855, "step": 4306, "token_acc": 0.28514862513791056 }, { "epoch": 2.5247727939020814, "grad_norm": 0.295426154146563, "learning_rate": 0.00029525894177339095, "loss": 3.1134142875671387, "step": 4307, "token_acc": 0.285136732603042 }, { "epoch": 2.5253591322192905, "grad_norm": 0.27540679341412566, "learning_rate": 0.0002952553148521867, "loss": 3.139166831970215, "step": 4308, "token_acc": 0.28229051997874627 }, { "epoch": 2.5259454705364996, "grad_norm": 0.3074991356813386, "learning_rate": 0.0002952516865665045, "loss": 3.1365439891815186, "step": 4309, "token_acc": 0.2835927002281179 }, { "epoch": 2.5265318088537088, "grad_norm": 0.3355890767173872, "learning_rate": 0.00029524805691637837, "loss": 3.131884813308716, "step": 4310, "token_acc": 0.2829848938746893 }, { "epoch": 2.527118147170918, "grad_norm": 0.32943775594173325, "learning_rate": 0.0002952444259018425, "loss": 3.0950441360473633, "step": 4311, "token_acc": 0.2897810374262723 }, { "epoch": 2.5277044854881265, "grad_norm": 0.29195384204391867, "learning_rate": 0.000295240793522931, "loss": 3.135244607925415, "step": 4312, "token_acc": 0.2836666639925338 }, { "epoch": 2.5282908238053357, "grad_norm": 0.32707942250435523, "learning_rate": 0.0002952371597796779, "loss": 3.110598564147949, "step": 4313, "token_acc": 0.2868764627569894 }, { "epoch": 2.5288771621225448, "grad_norm": 0.28930101968404526, "learning_rate": 0.00029523352467211744, "loss": 3.1911144256591797, "step": 4314, "token_acc": 0.27720302026228594 }, { "epoch": 2.529463500439754, "grad_norm": 0.29720059197993975, "learning_rate": 0.00029522988820028366, "loss": 3.142454147338867, "step": 4315, "token_acc": 0.28396613648024627 }, { "epoch": 2.5300498387569625, "grad_norm": 0.3176909678364232, "learning_rate": 0.00029522625036421086, "loss": 3.125840663909912, "step": 4316, "token_acc": 0.2853479948927743 }, { "epoch": 2.5306361770741717, "grad_norm": 0.323385488028823, "learning_rate": 0.0002952226111639331, "loss": 3.1766505241394043, "step": 4317, "token_acc": 0.28011322871417577 }, { "epoch": 2.5312225153913808, "grad_norm": 0.5711545762521465, "learning_rate": 0.0002952189705994846, "loss": 3.1467244625091553, "step": 4318, "token_acc": 0.2823093290865852 }, { "epoch": 2.53180885370859, "grad_norm": 0.42517476164799983, "learning_rate": 0.00029521532867089956, "loss": 3.148420810699463, "step": 4319, "token_acc": 0.2810075343025155 }, { "epoch": 2.532395192025799, "grad_norm": 0.4531341666926811, "learning_rate": 0.00029521168537821217, "loss": 3.0976853370666504, "step": 4320, "token_acc": 0.2895007854124222 }, { "epoch": 2.532981530343008, "grad_norm": 0.3601458314074524, "learning_rate": 0.0002952080407214567, "loss": 3.1023125648498535, "step": 4321, "token_acc": 0.2887278768129832 }, { "epoch": 2.533567868660217, "grad_norm": 0.3055958516578653, "learning_rate": 0.00029520439470066736, "loss": 3.102931499481201, "step": 4322, "token_acc": 0.2879292025876562 }, { "epoch": 2.534154206977426, "grad_norm": 0.34252233101167223, "learning_rate": 0.00029520074731587836, "loss": 3.08762788772583, "step": 4323, "token_acc": 0.29119557842140703 }, { "epoch": 2.534740545294635, "grad_norm": 0.31070446588425027, "learning_rate": 0.000295197098567124, "loss": 3.13425350189209, "step": 4324, "token_acc": 0.28301842390139054 }, { "epoch": 2.535326883611844, "grad_norm": 0.33073183951612883, "learning_rate": 0.0002951934484544386, "loss": 3.094320297241211, "step": 4325, "token_acc": 0.2906551310783343 }, { "epoch": 2.535913221929053, "grad_norm": 0.2871215998342306, "learning_rate": 0.00029518979697785633, "loss": 3.0834999084472656, "step": 4326, "token_acc": 0.2900523161316538 }, { "epoch": 2.536499560246262, "grad_norm": 0.32334953741881967, "learning_rate": 0.0002951861441374116, "loss": 3.1239235401153564, "step": 4327, "token_acc": 0.28674789274014995 }, { "epoch": 2.537085898563471, "grad_norm": 0.2962183376528778, "learning_rate": 0.0002951824899331387, "loss": 3.1008574962615967, "step": 4328, "token_acc": 0.2891705696900502 }, { "epoch": 2.53767223688068, "grad_norm": 0.28892001601227174, "learning_rate": 0.00029517883436507193, "loss": 3.1234235763549805, "step": 4329, "token_acc": 0.28476881600551357 }, { "epoch": 2.538258575197889, "grad_norm": 0.322772654724156, "learning_rate": 0.0002951751774332456, "loss": 3.1584339141845703, "step": 4330, "token_acc": 0.2800853961649642 }, { "epoch": 2.5388449135150983, "grad_norm": 0.26843708580399667, "learning_rate": 0.0002951715191376942, "loss": 3.148487091064453, "step": 4331, "token_acc": 0.2811636856424633 }, { "epoch": 2.5394312518323074, "grad_norm": 0.3221691230651787, "learning_rate": 0.00029516785947845194, "loss": 3.1144909858703613, "step": 4332, "token_acc": 0.2861130662275401 }, { "epoch": 2.540017590149516, "grad_norm": 0.32212569801984964, "learning_rate": 0.00029516419845555326, "loss": 3.081233024597168, "step": 4333, "token_acc": 0.29166847744115837 }, { "epoch": 2.5406039284667252, "grad_norm": 0.34875309863963444, "learning_rate": 0.0002951605360690326, "loss": 3.1339614391326904, "step": 4334, "token_acc": 0.2838891260692883 }, { "epoch": 2.5411902667839343, "grad_norm": 0.3005455802712372, "learning_rate": 0.00029515687231892427, "loss": 3.112187385559082, "step": 4335, "token_acc": 0.28716115101761586 }, { "epoch": 2.5417766051011434, "grad_norm": 0.2948602043787114, "learning_rate": 0.0002951532072052627, "loss": 3.1442360877990723, "step": 4336, "token_acc": 0.28142212673434025 }, { "epoch": 2.542362943418352, "grad_norm": 0.30669703887629524, "learning_rate": 0.00029514954072808235, "loss": 3.0923445224761963, "step": 4337, "token_acc": 0.2883934763983518 }, { "epoch": 2.5429492817355612, "grad_norm": 0.314359106506302, "learning_rate": 0.0002951458728874177, "loss": 3.1754751205444336, "step": 4338, "token_acc": 0.27958526614560164 }, { "epoch": 2.5435356200527703, "grad_norm": 0.3019288974426342, "learning_rate": 0.00029514220368330305, "loss": 3.116985559463501, "step": 4339, "token_acc": 0.2853228603019849 }, { "epoch": 2.5441219583699795, "grad_norm": 0.30992999749565403, "learning_rate": 0.0002951385331157731, "loss": 3.1041176319122314, "step": 4340, "token_acc": 0.28871285875515523 }, { "epoch": 2.5447082966871886, "grad_norm": 0.2951211858558363, "learning_rate": 0.00029513486118486215, "loss": 3.1062018871307373, "step": 4341, "token_acc": 0.28826703160146605 }, { "epoch": 2.5452946350043977, "grad_norm": 0.3043654689407853, "learning_rate": 0.0002951311878906048, "loss": 3.128478527069092, "step": 4342, "token_acc": 0.2852092350241292 }, { "epoch": 2.545880973321607, "grad_norm": 0.2949057042583338, "learning_rate": 0.00029512751323303545, "loss": 3.1382389068603516, "step": 4343, "token_acc": 0.28298161174873504 }, { "epoch": 2.5464673116388155, "grad_norm": 0.31525590252254637, "learning_rate": 0.0002951238372121887, "loss": 3.114595890045166, "step": 4344, "token_acc": 0.2859545366785558 }, { "epoch": 2.5470536499560246, "grad_norm": 0.26685994626090065, "learning_rate": 0.00029512015982809906, "loss": 3.1080551147460938, "step": 4345, "token_acc": 0.286880634280277 }, { "epoch": 2.5476399882732337, "grad_norm": 0.31901019579041623, "learning_rate": 0.00029511648108080106, "loss": 3.1524410247802734, "step": 4346, "token_acc": 0.28102772270503384 }, { "epoch": 2.548226326590443, "grad_norm": 0.32382884197008066, "learning_rate": 0.0002951128009703293, "loss": 3.185955286026001, "step": 4347, "token_acc": 0.2775910971879729 }, { "epoch": 2.5488126649076515, "grad_norm": 0.3107217353447862, "learning_rate": 0.00029510911949671824, "loss": 3.1457128524780273, "step": 4348, "token_acc": 0.28192732778592894 }, { "epoch": 2.5493990032248606, "grad_norm": 0.28768705683471113, "learning_rate": 0.00029510543666000263, "loss": 3.098903179168701, "step": 4349, "token_acc": 0.2870544628757129 }, { "epoch": 2.5499853415420697, "grad_norm": 0.346000343460998, "learning_rate": 0.00029510175246021694, "loss": 3.140838146209717, "step": 4350, "token_acc": 0.28419521924130386 }, { "epoch": 2.550571679859279, "grad_norm": 0.30412145309418237, "learning_rate": 0.0002950980668973958, "loss": 3.1386942863464355, "step": 4351, "token_acc": 0.2822545925172673 }, { "epoch": 2.551158018176488, "grad_norm": 0.2917771024965471, "learning_rate": 0.0002950943799715738, "loss": 3.127626419067383, "step": 4352, "token_acc": 0.28530725946619473 }, { "epoch": 2.551744356493697, "grad_norm": 0.3184346330605679, "learning_rate": 0.0002950906916827857, "loss": 3.1056160926818848, "step": 4353, "token_acc": 0.28769003902564966 }, { "epoch": 2.552330694810906, "grad_norm": 0.3366031887115573, "learning_rate": 0.000295087002031066, "loss": 3.10898494720459, "step": 4354, "token_acc": 0.287010877908339 }, { "epoch": 2.552917033128115, "grad_norm": 0.3900294131392648, "learning_rate": 0.0002950833110164495, "loss": 3.1653246879577637, "step": 4355, "token_acc": 0.2783011304417542 }, { "epoch": 2.553503371445324, "grad_norm": 0.39600210284819887, "learning_rate": 0.00029507961863897074, "loss": 3.1169683933258057, "step": 4356, "token_acc": 0.28607577422590713 }, { "epoch": 2.554089709762533, "grad_norm": 0.354412183900009, "learning_rate": 0.0002950759248986645, "loss": 3.1187214851379395, "step": 4357, "token_acc": 0.2864059706782161 }, { "epoch": 2.554676048079742, "grad_norm": 0.33466426404644734, "learning_rate": 0.0002950722297955654, "loss": 3.0871427059173584, "step": 4358, "token_acc": 0.28971643320806284 }, { "epoch": 2.555262386396951, "grad_norm": 0.35524089748517096, "learning_rate": 0.00029506853332970814, "loss": 3.147824287414551, "step": 4359, "token_acc": 0.28206004240680804 }, { "epoch": 2.55584872471416, "grad_norm": 0.3279110375968813, "learning_rate": 0.0002950648355011276, "loss": 3.1067605018615723, "step": 4360, "token_acc": 0.28561821373443513 }, { "epoch": 2.556435063031369, "grad_norm": 0.3370991569712934, "learning_rate": 0.0002950611363098583, "loss": 3.0919687747955322, "step": 4361, "token_acc": 0.2899748481999949 }, { "epoch": 2.557021401348578, "grad_norm": 0.3023634549399652, "learning_rate": 0.0002950574357559352, "loss": 3.170109272003174, "step": 4362, "token_acc": 0.27823921136804575 }, { "epoch": 2.5576077396657872, "grad_norm": 0.31447414054696804, "learning_rate": 0.0002950537338393929, "loss": 3.1639931201934814, "step": 4363, "token_acc": 0.2782095334120488 }, { "epoch": 2.5581940779829964, "grad_norm": 0.2977151656534409, "learning_rate": 0.0002950500305602662, "loss": 3.1461853981018066, "step": 4364, "token_acc": 0.2817006955111794 }, { "epoch": 2.5587804163002055, "grad_norm": 0.27639854561917093, "learning_rate": 0.00029504632591859, "loss": 3.1195225715637207, "step": 4365, "token_acc": 0.2858139642220092 }, { "epoch": 2.559366754617414, "grad_norm": 0.2717349340870264, "learning_rate": 0.000295042619914399, "loss": 3.1506540775299072, "step": 4366, "token_acc": 0.2804789012968571 }, { "epoch": 2.5599530929346233, "grad_norm": 0.28384034115080187, "learning_rate": 0.000295038912547728, "loss": 3.1091511249542236, "step": 4367, "token_acc": 0.2861622933026879 }, { "epoch": 2.5605394312518324, "grad_norm": 0.29981542517857024, "learning_rate": 0.00029503520381861186, "loss": 3.1431097984313965, "step": 4368, "token_acc": 0.2806266597448236 }, { "epoch": 2.5611257695690415, "grad_norm": 0.3211722355759345, "learning_rate": 0.00029503149372708543, "loss": 3.1561903953552246, "step": 4369, "token_acc": 0.2825811623246493 }, { "epoch": 2.56171210788625, "grad_norm": 0.3318296788898076, "learning_rate": 0.0002950277822731835, "loss": 3.1563777923583984, "step": 4370, "token_acc": 0.28234208552935985 }, { "epoch": 2.5622984462034593, "grad_norm": 0.28784693971253433, "learning_rate": 0.0002950240694569411, "loss": 3.125364303588867, "step": 4371, "token_acc": 0.2840758917697542 }, { "epoch": 2.5628847845206684, "grad_norm": 0.29734017209625707, "learning_rate": 0.0002950203552783929, "loss": 3.1248130798339844, "step": 4372, "token_acc": 0.2842547828602124 }, { "epoch": 2.5634711228378775, "grad_norm": 0.2602852303075442, "learning_rate": 0.0002950166397375739, "loss": 3.1469712257385254, "step": 4373, "token_acc": 0.2832765909301297 }, { "epoch": 2.5640574611550866, "grad_norm": 0.2692801805455761, "learning_rate": 0.000295012922834519, "loss": 3.1314332485198975, "step": 4374, "token_acc": 0.28464791848314097 }, { "epoch": 2.5646437994722957, "grad_norm": 0.3151564119152561, "learning_rate": 0.00029500920456926305, "loss": 3.105403184890747, "step": 4375, "token_acc": 0.2869601566110531 }, { "epoch": 2.565230137789505, "grad_norm": 0.33683323513831975, "learning_rate": 0.0002950054849418411, "loss": 3.165574550628662, "step": 4376, "token_acc": 0.2798517244633663 }, { "epoch": 2.5658164761067135, "grad_norm": 0.28715701717167186, "learning_rate": 0.00029500176395228796, "loss": 3.12180233001709, "step": 4377, "token_acc": 0.28493311429241963 }, { "epoch": 2.5664028144239226, "grad_norm": 0.30395840474823715, "learning_rate": 0.00029499804160063866, "loss": 3.117748975753784, "step": 4378, "token_acc": 0.28422642507335777 }, { "epoch": 2.5669891527411317, "grad_norm": 0.29727573089296977, "learning_rate": 0.0002949943178869281, "loss": 3.0939571857452393, "step": 4379, "token_acc": 0.28889755940047895 }, { "epoch": 2.567575491058341, "grad_norm": 0.36473946332686935, "learning_rate": 0.0002949905928111914, "loss": 3.110833168029785, "step": 4380, "token_acc": 0.28693505104984307 }, { "epoch": 2.5681618293755495, "grad_norm": 0.32293143268439933, "learning_rate": 0.0002949868663734634, "loss": 3.1611194610595703, "step": 4381, "token_acc": 0.2802599594953906 }, { "epoch": 2.5687481676927586, "grad_norm": 0.29880298500069874, "learning_rate": 0.00029498313857377915, "loss": 3.092435359954834, "step": 4382, "token_acc": 0.2878354451244477 }, { "epoch": 2.5693345060099677, "grad_norm": 0.3148009530606465, "learning_rate": 0.0002949794094121737, "loss": 3.1207785606384277, "step": 4383, "token_acc": 0.2864595771258519 }, { "epoch": 2.569920844327177, "grad_norm": 0.35304362688843116, "learning_rate": 0.0002949756788886821, "loss": 3.138930320739746, "step": 4384, "token_acc": 0.2838769967886166 }, { "epoch": 2.570507182644386, "grad_norm": 0.3799587311765748, "learning_rate": 0.0002949719470033393, "loss": 3.119027614593506, "step": 4385, "token_acc": 0.28666809973091195 }, { "epoch": 2.571093520961595, "grad_norm": 0.3515453375963114, "learning_rate": 0.0002949682137561804, "loss": 3.1399941444396973, "step": 4386, "token_acc": 0.2856207556131815 }, { "epoch": 2.5716798592788037, "grad_norm": 0.3072085172667316, "learning_rate": 0.0002949644791472405, "loss": 3.1417970657348633, "step": 4387, "token_acc": 0.2811423744232894 }, { "epoch": 2.572266197596013, "grad_norm": 0.3462903309993942, "learning_rate": 0.0002949607431765547, "loss": 3.1345481872558594, "step": 4388, "token_acc": 0.28480670591964746 }, { "epoch": 2.572852535913222, "grad_norm": 0.3250746652358085, "learning_rate": 0.000294957005844158, "loss": 3.113795042037964, "step": 4389, "token_acc": 0.28636478558566 }, { "epoch": 2.573438874230431, "grad_norm": 0.32276943273505265, "learning_rate": 0.00029495326715008556, "loss": 3.1472067832946777, "step": 4390, "token_acc": 0.2831517702726464 }, { "epoch": 2.5740252125476397, "grad_norm": 0.30551538617941953, "learning_rate": 0.0002949495270943725, "loss": 3.120004653930664, "step": 4391, "token_acc": 0.28606939064943976 }, { "epoch": 2.574611550864849, "grad_norm": 0.29785636449092207, "learning_rate": 0.000294945785677054, "loss": 3.1246328353881836, "step": 4392, "token_acc": 0.28412799501084773 }, { "epoch": 2.575197889182058, "grad_norm": 0.29342010828032744, "learning_rate": 0.00029494204289816513, "loss": 3.086207866668701, "step": 4393, "token_acc": 0.29039409759050483 }, { "epoch": 2.575784227499267, "grad_norm": 0.27444581647952726, "learning_rate": 0.0002949382987577411, "loss": 3.1350343227386475, "step": 4394, "token_acc": 0.28434525901826513 }, { "epoch": 2.576370565816476, "grad_norm": 0.29473224835262446, "learning_rate": 0.00029493455325581703, "loss": 3.120664596557617, "step": 4395, "token_acc": 0.2857770704635971 }, { "epoch": 2.5769569041336853, "grad_norm": 0.29736177203859415, "learning_rate": 0.00029493080639242814, "loss": 3.154418468475342, "step": 4396, "token_acc": 0.2791622644606058 }, { "epoch": 2.5775432424508944, "grad_norm": 0.29434797814593494, "learning_rate": 0.0002949270581676096, "loss": 3.202594757080078, "step": 4397, "token_acc": 0.2738442415260315 }, { "epoch": 2.578129580768103, "grad_norm": 0.3057331415442335, "learning_rate": 0.0002949233085813967, "loss": 3.1284661293029785, "step": 4398, "token_acc": 0.2839789315117761 }, { "epoch": 2.578715919085312, "grad_norm": 0.34712178785429587, "learning_rate": 0.0002949195576338246, "loss": 3.1357181072235107, "step": 4399, "token_acc": 0.2839457627118644 }, { "epoch": 2.5793022574025213, "grad_norm": 0.30680029040518214, "learning_rate": 0.0002949158053249285, "loss": 3.1408510208129883, "step": 4400, "token_acc": 0.28282620331864783 }, { "epoch": 2.5798885957197304, "grad_norm": 0.2839143343176179, "learning_rate": 0.00029491205165474367, "loss": 3.0932655334472656, "step": 4401, "token_acc": 0.28915264836470517 }, { "epoch": 2.580474934036939, "grad_norm": 0.351168746015683, "learning_rate": 0.00029490829662330543, "loss": 3.125319004058838, "step": 4402, "token_acc": 0.2865990348191583 }, { "epoch": 2.581061272354148, "grad_norm": 0.32370112940416346, "learning_rate": 0.000294904540230649, "loss": 3.1333746910095215, "step": 4403, "token_acc": 0.2832029194006807 }, { "epoch": 2.5816476106713573, "grad_norm": 0.3021806819239285, "learning_rate": 0.0002949007824768097, "loss": 3.127964973449707, "step": 4404, "token_acc": 0.28453553027597817 }, { "epoch": 2.5822339489885664, "grad_norm": 0.2892606214211373, "learning_rate": 0.00029489702336182275, "loss": 3.065701723098755, "step": 4405, "token_acc": 0.29287432489960047 }, { "epoch": 2.5828202873057755, "grad_norm": 0.3253737997067806, "learning_rate": 0.00029489326288572356, "loss": 3.085606098175049, "step": 4406, "token_acc": 0.29013771407885547 }, { "epoch": 2.5834066256229846, "grad_norm": 0.3208388678077559, "learning_rate": 0.0002948895010485473, "loss": 3.082479953765869, "step": 4407, "token_acc": 0.2925389328567781 }, { "epoch": 2.5839929639401937, "grad_norm": 0.40104695294976167, "learning_rate": 0.00029488573785032955, "loss": 3.125638008117676, "step": 4408, "token_acc": 0.2870836203261635 }, { "epoch": 2.5845793022574024, "grad_norm": 0.336878912008137, "learning_rate": 0.0002948819732911055, "loss": 3.0985116958618164, "step": 4409, "token_acc": 0.2889409385709314 }, { "epoch": 2.5851656405746115, "grad_norm": 0.3775450529158059, "learning_rate": 0.00029487820737091053, "loss": 3.1670422554016113, "step": 4410, "token_acc": 0.2793131373489429 }, { "epoch": 2.5857519788918206, "grad_norm": 0.363154698614877, "learning_rate": 0.00029487444008978003, "loss": 3.065610408782959, "step": 4411, "token_acc": 0.29208049587087187 }, { "epoch": 2.5863383172090297, "grad_norm": 0.3270768057389953, "learning_rate": 0.0002948706714477494, "loss": 3.159055471420288, "step": 4412, "token_acc": 0.28089671274863 }, { "epoch": 2.5869246555262384, "grad_norm": 0.3609294287854356, "learning_rate": 0.000294866901444854, "loss": 3.093126058578491, "step": 4413, "token_acc": 0.2893595756531516 }, { "epoch": 2.5875109938434475, "grad_norm": 0.3052366351885031, "learning_rate": 0.00029486313008112927, "loss": 3.1404781341552734, "step": 4414, "token_acc": 0.2833371942423317 }, { "epoch": 2.5880973321606566, "grad_norm": 0.30651872213848946, "learning_rate": 0.00029485935735661063, "loss": 3.103621482849121, "step": 4415, "token_acc": 0.2880036348572116 }, { "epoch": 2.5886836704778657, "grad_norm": 0.32659771474955773, "learning_rate": 0.0002948555832713336, "loss": 3.0994062423706055, "step": 4416, "token_acc": 0.28908911532117587 }, { "epoch": 2.589270008795075, "grad_norm": 0.3286291408074442, "learning_rate": 0.0002948518078253335, "loss": 3.1457695960998535, "step": 4417, "token_acc": 0.28193285221211173 }, { "epoch": 2.589856347112284, "grad_norm": 0.2847441453989785, "learning_rate": 0.00029484803101864583, "loss": 3.173375368118286, "step": 4418, "token_acc": 0.2788848465881678 }, { "epoch": 2.590442685429493, "grad_norm": 0.3270679224976089, "learning_rate": 0.00029484425285130613, "loss": 3.0986316204071045, "step": 4419, "token_acc": 0.28881507581661103 }, { "epoch": 2.5910290237467017, "grad_norm": 0.27752761878569976, "learning_rate": 0.00029484047332334985, "loss": 3.118795871734619, "step": 4420, "token_acc": 0.2868318628986941 }, { "epoch": 2.591615362063911, "grad_norm": 0.3178909534396568, "learning_rate": 0.00029483669243481254, "loss": 3.115995168685913, "step": 4421, "token_acc": 0.28604252667653246 }, { "epoch": 2.59220170038112, "grad_norm": 0.3080345373551721, "learning_rate": 0.0002948329101857296, "loss": 3.121906280517578, "step": 4422, "token_acc": 0.2859240930678054 }, { "epoch": 2.592788038698329, "grad_norm": 0.348775077062953, "learning_rate": 0.0002948291265761367, "loss": 3.074571132659912, "step": 4423, "token_acc": 0.29275791091761905 }, { "epoch": 2.5933743770155377, "grad_norm": 0.27911861216986267, "learning_rate": 0.0002948253416060693, "loss": 3.147961139678955, "step": 4424, "token_acc": 0.28228315979965446 }, { "epoch": 2.593960715332747, "grad_norm": 0.3384160526771754, "learning_rate": 0.00029482155527556296, "loss": 3.1292662620544434, "step": 4425, "token_acc": 0.2843295357991295 }, { "epoch": 2.594547053649956, "grad_norm": 0.3408272751847346, "learning_rate": 0.00029481776758465323, "loss": 3.1380972862243652, "step": 4426, "token_acc": 0.28467994501235155 }, { "epoch": 2.595133391967165, "grad_norm": 0.3126027704893727, "learning_rate": 0.00029481397853337575, "loss": 3.09269380569458, "step": 4427, "token_acc": 0.2888217349658504 }, { "epoch": 2.595719730284374, "grad_norm": 0.2881480346729404, "learning_rate": 0.00029481018812176605, "loss": 3.0983381271362305, "step": 4428, "token_acc": 0.2885320098234606 }, { "epoch": 2.5963060686015833, "grad_norm": 0.344508246663763, "learning_rate": 0.0002948063963498598, "loss": 3.105874538421631, "step": 4429, "token_acc": 0.28835819038261623 }, { "epoch": 2.5968924069187924, "grad_norm": 0.3583760620853102, "learning_rate": 0.0002948026032176926, "loss": 3.129619598388672, "step": 4430, "token_acc": 0.28583458011551943 }, { "epoch": 2.597478745236001, "grad_norm": 0.31829518365844917, "learning_rate": 0.0002947988087253, "loss": 3.1357240676879883, "step": 4431, "token_acc": 0.28329746321843136 }, { "epoch": 2.59806508355321, "grad_norm": 0.3003434767787419, "learning_rate": 0.00029479501287271774, "loss": 3.1525259017944336, "step": 4432, "token_acc": 0.2789810927358752 }, { "epoch": 2.5986514218704193, "grad_norm": 0.3385922992762416, "learning_rate": 0.0002947912156599815, "loss": 3.121633529663086, "step": 4433, "token_acc": 0.28573407792650696 }, { "epoch": 2.5992377601876284, "grad_norm": 0.3716887587928683, "learning_rate": 0.00029478741708712685, "loss": 3.115966558456421, "step": 4434, "token_acc": 0.2847943947346327 }, { "epoch": 2.599824098504837, "grad_norm": 0.3004205571408154, "learning_rate": 0.00029478361715418953, "loss": 3.1105775833129883, "step": 4435, "token_acc": 0.28791223738176946 }, { "epoch": 2.600410436822046, "grad_norm": 0.31006673035742477, "learning_rate": 0.0002947798158612052, "loss": 3.109954833984375, "step": 4436, "token_acc": 0.28654567446696766 }, { "epoch": 2.6009967751392553, "grad_norm": 0.30977658998328045, "learning_rate": 0.0002947760132082096, "loss": 3.082505941390991, "step": 4437, "token_acc": 0.2898430184810299 }, { "epoch": 2.6015831134564644, "grad_norm": 0.3059373897227026, "learning_rate": 0.0002947722091952385, "loss": 3.123225450515747, "step": 4438, "token_acc": 0.28598095803425677 }, { "epoch": 2.6021694517736735, "grad_norm": 0.30179314557423453, "learning_rate": 0.0002947684038223275, "loss": 3.104154348373413, "step": 4439, "token_acc": 0.2862873429747704 }, { "epoch": 2.6027557900908826, "grad_norm": 0.2845127059378352, "learning_rate": 0.0002947645970895125, "loss": 3.113457441329956, "step": 4440, "token_acc": 0.2861876554083447 }, { "epoch": 2.6033421284080913, "grad_norm": 0.3001924954573138, "learning_rate": 0.0002947607889968291, "loss": 3.130687713623047, "step": 4441, "token_acc": 0.28544855596861896 }, { "epoch": 2.6039284667253004, "grad_norm": 0.3334417464516731, "learning_rate": 0.0002947569795443132, "loss": 3.160916805267334, "step": 4442, "token_acc": 0.28023702705599535 }, { "epoch": 2.6045148050425095, "grad_norm": 0.3241429607638957, "learning_rate": 0.00029475316873200057, "loss": 3.135272979736328, "step": 4443, "token_acc": 0.2831368738257011 }, { "epoch": 2.6051011433597187, "grad_norm": 0.30111457204944936, "learning_rate": 0.00029474935655992695, "loss": 3.103388786315918, "step": 4444, "token_acc": 0.28825775451542324 }, { "epoch": 2.6056874816769273, "grad_norm": 0.3710035222902008, "learning_rate": 0.00029474554302812817, "loss": 3.15543270111084, "step": 4445, "token_acc": 0.2791620079167837 }, { "epoch": 2.6062738199941364, "grad_norm": 0.3334417052254881, "learning_rate": 0.00029474172813664007, "loss": 3.1264641284942627, "step": 4446, "token_acc": 0.28281221666709844 }, { "epoch": 2.6068601583113455, "grad_norm": 0.3298689803948548, "learning_rate": 0.0002947379118854984, "loss": 3.1219534873962402, "step": 4447, "token_acc": 0.2847492237910333 }, { "epoch": 2.6074464966285547, "grad_norm": 0.3077807031188826, "learning_rate": 0.0002947340942747392, "loss": 3.1514925956726074, "step": 4448, "token_acc": 0.2815697623571877 }, { "epoch": 2.6080328349457638, "grad_norm": 0.29836743433400387, "learning_rate": 0.00029473027530439814, "loss": 3.0916762351989746, "step": 4449, "token_acc": 0.29010443749401754 }, { "epoch": 2.608619173262973, "grad_norm": 0.3293911453983814, "learning_rate": 0.00029472645497451123, "loss": 3.0977602005004883, "step": 4450, "token_acc": 0.28738383956538127 }, { "epoch": 2.609205511580182, "grad_norm": 0.31931957019786134, "learning_rate": 0.00029472263328511426, "loss": 3.120115280151367, "step": 4451, "token_acc": 0.28677817613357254 }, { "epoch": 2.6097918498973907, "grad_norm": 0.26689231153298926, "learning_rate": 0.0002947188102362432, "loss": 3.1187400817871094, "step": 4452, "token_acc": 0.28688228711691877 }, { "epoch": 2.6103781882145998, "grad_norm": 0.3180540684089845, "learning_rate": 0.0002947149858279339, "loss": 3.1094088554382324, "step": 4453, "token_acc": 0.2863388846933503 }, { "epoch": 2.610964526531809, "grad_norm": 0.29389254654758384, "learning_rate": 0.0002947111600602223, "loss": 3.1249537467956543, "step": 4454, "token_acc": 0.28545080604314926 }, { "epoch": 2.611550864849018, "grad_norm": 0.28507612272920185, "learning_rate": 0.00029470733293314433, "loss": 3.097743511199951, "step": 4455, "token_acc": 0.2896060779681503 }, { "epoch": 2.6121372031662267, "grad_norm": 0.30219065686933205, "learning_rate": 0.000294703504446736, "loss": 3.0970048904418945, "step": 4456, "token_acc": 0.2881431963432649 }, { "epoch": 2.6127235414834358, "grad_norm": 0.2873429390782826, "learning_rate": 0.00029469967460103323, "loss": 3.1402952671051025, "step": 4457, "token_acc": 0.28191646528708464 }, { "epoch": 2.613309879800645, "grad_norm": 0.28657919296288137, "learning_rate": 0.00029469584339607204, "loss": 3.1217007637023926, "step": 4458, "token_acc": 0.2844849986734172 }, { "epoch": 2.613896218117854, "grad_norm": 0.2880679747077227, "learning_rate": 0.0002946920108318884, "loss": 3.134267807006836, "step": 4459, "token_acc": 0.2840379782770984 }, { "epoch": 2.614482556435063, "grad_norm": 0.3012431186024602, "learning_rate": 0.0002946881769085182, "loss": 3.173287868499756, "step": 4460, "token_acc": 0.27856562753795217 }, { "epoch": 2.615068894752272, "grad_norm": 0.27696098180741774, "learning_rate": 0.0002946843416259976, "loss": 3.1313071250915527, "step": 4461, "token_acc": 0.283089091717668 }, { "epoch": 2.6156552330694813, "grad_norm": 0.28783535693619433, "learning_rate": 0.00029468050498436256, "loss": 3.100653886795044, "step": 4462, "token_acc": 0.2871162687039043 }, { "epoch": 2.61624157138669, "grad_norm": 0.2986841496382395, "learning_rate": 0.00029467666698364915, "loss": 3.082475423812866, "step": 4463, "token_acc": 0.290224406625505 }, { "epoch": 2.616827909703899, "grad_norm": 0.3123607023323966, "learning_rate": 0.0002946728276238934, "loss": 3.0614943504333496, "step": 4464, "token_acc": 0.2940618170465189 }, { "epoch": 2.6174142480211082, "grad_norm": 0.3483738296595411, "learning_rate": 0.00029466898690513134, "loss": 3.1398444175720215, "step": 4465, "token_acc": 0.28255523145476197 }, { "epoch": 2.6180005863383173, "grad_norm": 0.34501401573811336, "learning_rate": 0.00029466514482739915, "loss": 3.130096435546875, "step": 4466, "token_acc": 0.28659926040596195 }, { "epoch": 2.618586924655526, "grad_norm": 0.34134563497421655, "learning_rate": 0.0002946613013907329, "loss": 3.142551898956299, "step": 4467, "token_acc": 0.2841445986364808 }, { "epoch": 2.619173262972735, "grad_norm": 0.28983829480261003, "learning_rate": 0.00029465745659516856, "loss": 3.0939371585845947, "step": 4468, "token_acc": 0.28951004604113284 }, { "epoch": 2.6197596012899442, "grad_norm": 0.26830473908993374, "learning_rate": 0.0002946536104407424, "loss": 3.0971415042877197, "step": 4469, "token_acc": 0.28951914182479777 }, { "epoch": 2.6203459396071533, "grad_norm": 0.2982651301678548, "learning_rate": 0.00029464976292749046, "loss": 3.1109986305236816, "step": 4470, "token_acc": 0.2855461937679911 }, { "epoch": 2.6209322779243625, "grad_norm": 0.30722198286480995, "learning_rate": 0.00029464591405544896, "loss": 3.0927956104278564, "step": 4471, "token_acc": 0.2898912792042563 }, { "epoch": 2.6215186162415716, "grad_norm": 0.30232189063020365, "learning_rate": 0.00029464206382465397, "loss": 3.0823330879211426, "step": 4472, "token_acc": 0.2922501769275899 }, { "epoch": 2.6221049545587807, "grad_norm": 0.27353034870617754, "learning_rate": 0.00029463821223514174, "loss": 3.1145758628845215, "step": 4473, "token_acc": 0.28739488671749736 }, { "epoch": 2.6226912928759893, "grad_norm": 0.2972198770220653, "learning_rate": 0.0002946343592869484, "loss": 3.168844699859619, "step": 4474, "token_acc": 0.27997429636293536 }, { "epoch": 2.6232776311931985, "grad_norm": 0.3669913922981606, "learning_rate": 0.00029463050498011007, "loss": 3.1001155376434326, "step": 4475, "token_acc": 0.2881258315757477 }, { "epoch": 2.6238639695104076, "grad_norm": 0.36798295599954267, "learning_rate": 0.00029462664931466316, "loss": 3.1225318908691406, "step": 4476, "token_acc": 0.2845547470087015 }, { "epoch": 2.6244503078276167, "grad_norm": 0.3046180335034568, "learning_rate": 0.00029462279229064365, "loss": 3.1383934020996094, "step": 4477, "token_acc": 0.2831128585744542 }, { "epoch": 2.6250366461448253, "grad_norm": 0.35624790695411007, "learning_rate": 0.000294618933908088, "loss": 3.0853304862976074, "step": 4478, "token_acc": 0.2901535511573708 }, { "epoch": 2.6256229844620345, "grad_norm": 0.3970435949496295, "learning_rate": 0.0002946150741670323, "loss": 3.115877151489258, "step": 4479, "token_acc": 0.2864246762280501 }, { "epoch": 2.6262093227792436, "grad_norm": 0.313722321332007, "learning_rate": 0.0002946112130675128, "loss": 3.1043930053710938, "step": 4480, "token_acc": 0.288219149840869 }, { "epoch": 2.6267956610964527, "grad_norm": 0.3308439121741794, "learning_rate": 0.00029460735060956586, "loss": 3.1465582847595215, "step": 4481, "token_acc": 0.2827420566766853 }, { "epoch": 2.627381999413662, "grad_norm": 0.32563490913710724, "learning_rate": 0.00029460348679322774, "loss": 3.106356143951416, "step": 4482, "token_acc": 0.28769485101831094 }, { "epoch": 2.627968337730871, "grad_norm": 0.329831870898015, "learning_rate": 0.0002945996216185347, "loss": 3.0992166996002197, "step": 4483, "token_acc": 0.28703242758301234 }, { "epoch": 2.62855467604808, "grad_norm": 0.31865717732894344, "learning_rate": 0.00029459575508552306, "loss": 3.1208362579345703, "step": 4484, "token_acc": 0.2859584060500291 }, { "epoch": 2.6291410143652887, "grad_norm": 0.3259990229745666, "learning_rate": 0.00029459188719422913, "loss": 3.15392804145813, "step": 4485, "token_acc": 0.2807935548016113 }, { "epoch": 2.629727352682498, "grad_norm": 0.31743075077054056, "learning_rate": 0.0002945880179446893, "loss": 3.113034725189209, "step": 4486, "token_acc": 0.2870712455896948 }, { "epoch": 2.630313690999707, "grad_norm": 0.26805614139764844, "learning_rate": 0.0002945841473369399, "loss": 3.109351396560669, "step": 4487, "token_acc": 0.2884074907008025 }, { "epoch": 2.630900029316916, "grad_norm": 0.2759313414608668, "learning_rate": 0.0002945802753710172, "loss": 3.1152775287628174, "step": 4488, "token_acc": 0.2862446420729621 }, { "epoch": 2.6314863676341247, "grad_norm": 0.28283616148595353, "learning_rate": 0.0002945764020469576, "loss": 3.146833658218384, "step": 4489, "token_acc": 0.28164509082028916 }, { "epoch": 2.632072705951334, "grad_norm": 0.2667966763143784, "learning_rate": 0.0002945725273647976, "loss": 3.077296495437622, "step": 4490, "token_acc": 0.2898549954146121 }, { "epoch": 2.632659044268543, "grad_norm": 0.29665867311703914, "learning_rate": 0.0002945686513245735, "loss": 3.1546359062194824, "step": 4491, "token_acc": 0.28165909336795414 }, { "epoch": 2.633245382585752, "grad_norm": 0.33504356040719213, "learning_rate": 0.00029456477392632177, "loss": 3.1974258422851562, "step": 4492, "token_acc": 0.27598467416993766 }, { "epoch": 2.633831720902961, "grad_norm": 0.33291768005297473, "learning_rate": 0.0002945608951700787, "loss": 3.099846601486206, "step": 4493, "token_acc": 0.2892484418789476 }, { "epoch": 2.6344180592201702, "grad_norm": 0.29610885311088064, "learning_rate": 0.0002945570150558809, "loss": 3.1082544326782227, "step": 4494, "token_acc": 0.28674563058727837 }, { "epoch": 2.635004397537379, "grad_norm": 0.2908536717429911, "learning_rate": 0.00029455313358376473, "loss": 3.132927656173706, "step": 4495, "token_acc": 0.28427612525683515 }, { "epoch": 2.635590735854588, "grad_norm": 0.28378918250306284, "learning_rate": 0.00029454925075376656, "loss": 3.116621971130371, "step": 4496, "token_acc": 0.2869566856308949 }, { "epoch": 2.636177074171797, "grad_norm": 0.3087509984504465, "learning_rate": 0.0002945453665659231, "loss": 3.1072750091552734, "step": 4497, "token_acc": 0.28671364544521843 }, { "epoch": 2.6367634124890063, "grad_norm": 0.3262741522741202, "learning_rate": 0.0002945414810202706, "loss": 3.1088879108428955, "step": 4498, "token_acc": 0.28843046386910604 }, { "epoch": 2.637349750806215, "grad_norm": 0.29049905294695677, "learning_rate": 0.00029453759411684566, "loss": 3.1208126544952393, "step": 4499, "token_acc": 0.2851344120270098 }, { "epoch": 2.637936089123424, "grad_norm": 0.26623810362851663, "learning_rate": 0.00029453370585568486, "loss": 3.104092597961426, "step": 4500, "token_acc": 0.2886932090190631 }, { "epoch": 2.638522427440633, "grad_norm": 0.315278410294714, "learning_rate": 0.00029452981623682463, "loss": 3.1607017517089844, "step": 4501, "token_acc": 0.28145554448245214 }, { "epoch": 2.6391087657578423, "grad_norm": 0.2944853279733132, "learning_rate": 0.0002945259252603015, "loss": 3.0929694175720215, "step": 4502, "token_acc": 0.2894278193899704 }, { "epoch": 2.6396951040750514, "grad_norm": 0.3269992913761978, "learning_rate": 0.00029452203292615206, "loss": 3.123652935028076, "step": 4503, "token_acc": 0.28667023347057774 }, { "epoch": 2.6402814423922605, "grad_norm": 0.32976511746561943, "learning_rate": 0.0002945181392344129, "loss": 3.0861825942993164, "step": 4504, "token_acc": 0.29020750232942827 }, { "epoch": 2.6408677807094696, "grad_norm": 0.29676788652328, "learning_rate": 0.00029451424418512053, "loss": 3.1230692863464355, "step": 4505, "token_acc": 0.28456207902301867 }, { "epoch": 2.6414541190266783, "grad_norm": 0.35589622171526314, "learning_rate": 0.00029451034777831157, "loss": 3.123164415359497, "step": 4506, "token_acc": 0.2836371358603467 }, { "epoch": 2.6420404573438874, "grad_norm": 0.3013563670644299, "learning_rate": 0.00029450645001402267, "loss": 3.1289210319519043, "step": 4507, "token_acc": 0.28232574437887126 }, { "epoch": 2.6426267956610965, "grad_norm": 0.29141965059391456, "learning_rate": 0.00029450255089229037, "loss": 3.106834888458252, "step": 4508, "token_acc": 0.28829949758241064 }, { "epoch": 2.6432131339783056, "grad_norm": 0.3355742036917519, "learning_rate": 0.0002944986504131513, "loss": 3.1411213874816895, "step": 4509, "token_acc": 0.28313926346818186 }, { "epoch": 2.6437994722955143, "grad_norm": 0.31581921994962364, "learning_rate": 0.00029449474857664215, "loss": 3.120835781097412, "step": 4510, "token_acc": 0.28425769020880604 }, { "epoch": 2.6443858106127234, "grad_norm": 0.3078142545872994, "learning_rate": 0.0002944908453827995, "loss": 3.143155097961426, "step": 4511, "token_acc": 0.2822982052251781 }, { "epoch": 2.6449721489299325, "grad_norm": 0.3020452815068232, "learning_rate": 0.00029448694083166014, "loss": 3.137824773788452, "step": 4512, "token_acc": 0.28304180368773274 }, { "epoch": 2.6455584872471416, "grad_norm": 0.31357739076310065, "learning_rate": 0.00029448303492326063, "loss": 3.1432530879974365, "step": 4513, "token_acc": 0.2817049622750285 }, { "epoch": 2.6461448255643507, "grad_norm": 0.3057451241488454, "learning_rate": 0.0002944791276576377, "loss": 3.1200733184814453, "step": 4514, "token_acc": 0.2836271147975634 }, { "epoch": 2.64673116388156, "grad_norm": 0.3048074265509225, "learning_rate": 0.000294475219034828, "loss": 3.127669334411621, "step": 4515, "token_acc": 0.2838378237939504 }, { "epoch": 2.647317502198769, "grad_norm": 0.29286276645956855, "learning_rate": 0.0002944713090548684, "loss": 3.141080379486084, "step": 4516, "token_acc": 0.2828118099636592 }, { "epoch": 2.6479038405159776, "grad_norm": 0.32012666472721935, "learning_rate": 0.00029446739771779546, "loss": 3.1143648624420166, "step": 4517, "token_acc": 0.28570622637606485 }, { "epoch": 2.6484901788331867, "grad_norm": 0.3098964459752691, "learning_rate": 0.000294463485023646, "loss": 3.1328814029693604, "step": 4518, "token_acc": 0.28421250759882283 }, { "epoch": 2.649076517150396, "grad_norm": 0.36122983490793154, "learning_rate": 0.00029445957097245677, "loss": 3.133930206298828, "step": 4519, "token_acc": 0.2832190942150153 }, { "epoch": 2.649662855467605, "grad_norm": 0.3248845972291869, "learning_rate": 0.00029445565556426455, "loss": 3.110642433166504, "step": 4520, "token_acc": 0.28643859417388917 }, { "epoch": 2.6502491937848136, "grad_norm": 0.3001539251248104, "learning_rate": 0.00029445173879910614, "loss": 3.1062188148498535, "step": 4521, "token_acc": 0.2881914808060001 }, { "epoch": 2.6508355321020227, "grad_norm": 0.32272285485525526, "learning_rate": 0.0002944478206770182, "loss": 3.125244140625, "step": 4522, "token_acc": 0.28406570716263135 }, { "epoch": 2.651421870419232, "grad_norm": 0.29863892966832806, "learning_rate": 0.0002944439011980377, "loss": 3.145256519317627, "step": 4523, "token_acc": 0.28213299288013854 }, { "epoch": 2.652008208736441, "grad_norm": 0.34975891055417735, "learning_rate": 0.0002944399803622014, "loss": 3.1016664505004883, "step": 4524, "token_acc": 0.28941927004023277 }, { "epoch": 2.65259454705365, "grad_norm": 0.34158336586814353, "learning_rate": 0.0002944360581695461, "loss": 3.12544584274292, "step": 4525, "token_acc": 0.283081654905672 }, { "epoch": 2.653180885370859, "grad_norm": 0.32150714508136635, "learning_rate": 0.0002944321346201086, "loss": 3.0926437377929688, "step": 4526, "token_acc": 0.2888411742084348 }, { "epoch": 2.6537672236880683, "grad_norm": 0.3014131286141054, "learning_rate": 0.00029442820971392587, "loss": 3.14554500579834, "step": 4527, "token_acc": 0.2824575775931772 }, { "epoch": 2.654353562005277, "grad_norm": 0.34735523697570025, "learning_rate": 0.0002944242834510347, "loss": 3.1762454509735107, "step": 4528, "token_acc": 0.2777713078783965 }, { "epoch": 2.654939900322486, "grad_norm": 0.29553390604927027, "learning_rate": 0.0002944203558314721, "loss": 3.112521171569824, "step": 4529, "token_acc": 0.2859794551637222 }, { "epoch": 2.655526238639695, "grad_norm": 0.2914088940190923, "learning_rate": 0.00029441642685527474, "loss": 3.120229721069336, "step": 4530, "token_acc": 0.28803966571102585 }, { "epoch": 2.6561125769569043, "grad_norm": 0.2824668571007022, "learning_rate": 0.00029441249652247973, "loss": 3.10982084274292, "step": 4531, "token_acc": 0.2857464670280986 }, { "epoch": 2.656698915274113, "grad_norm": 0.30158480374992164, "learning_rate": 0.00029440856483312387, "loss": 3.167297840118408, "step": 4532, "token_acc": 0.28153036634759465 }, { "epoch": 2.657285253591322, "grad_norm": 0.2824233444090316, "learning_rate": 0.00029440463178724417, "loss": 3.132747173309326, "step": 4533, "token_acc": 0.28391464191510757 }, { "epoch": 2.657871591908531, "grad_norm": 0.2933281838432, "learning_rate": 0.00029440069738487755, "loss": 3.1250252723693848, "step": 4534, "token_acc": 0.28506260467341893 }, { "epoch": 2.6584579302257403, "grad_norm": 0.3115483896501584, "learning_rate": 0.00029439676162606093, "loss": 3.159391164779663, "step": 4535, "token_acc": 0.28141971758993034 }, { "epoch": 2.6590442685429494, "grad_norm": 0.258356620037849, "learning_rate": 0.00029439282451083134, "loss": 3.081395149230957, "step": 4536, "token_acc": 0.29090324230687087 }, { "epoch": 2.6596306068601585, "grad_norm": 0.299475317586854, "learning_rate": 0.0002943888860392257, "loss": 3.1337337493896484, "step": 4537, "token_acc": 0.28276083860396023 }, { "epoch": 2.660216945177367, "grad_norm": 0.2966300883076466, "learning_rate": 0.00029438494621128106, "loss": 3.1177215576171875, "step": 4538, "token_acc": 0.2865350099286042 }, { "epoch": 2.6608032834945763, "grad_norm": 0.28548170475634843, "learning_rate": 0.00029438100502703437, "loss": 3.1744470596313477, "step": 4539, "token_acc": 0.27856233433884203 }, { "epoch": 2.6613896218117854, "grad_norm": 0.3181981594401044, "learning_rate": 0.0002943770624865228, "loss": 3.1670281887054443, "step": 4540, "token_acc": 0.27970499421160433 }, { "epoch": 2.6619759601289945, "grad_norm": 0.2919753612172915, "learning_rate": 0.0002943731185897832, "loss": 3.124089241027832, "step": 4541, "token_acc": 0.2854420072268309 }, { "epoch": 2.6625622984462036, "grad_norm": 0.28565784700829333, "learning_rate": 0.0002943691733368527, "loss": 3.1125121116638184, "step": 4542, "token_acc": 0.28676200953428677 }, { "epoch": 2.6631486367634123, "grad_norm": 0.2764752263679167, "learning_rate": 0.0002943652267277684, "loss": 3.0768280029296875, "step": 4543, "token_acc": 0.2912662667856624 }, { "epoch": 2.6637349750806214, "grad_norm": 0.2745505550541349, "learning_rate": 0.00029436127876256727, "loss": 3.1277854442596436, "step": 4544, "token_acc": 0.284402669935976 }, { "epoch": 2.6643213133978305, "grad_norm": 0.2898922636639875, "learning_rate": 0.0002943573294412865, "loss": 3.143904209136963, "step": 4545, "token_acc": 0.28221834967341997 }, { "epoch": 2.6649076517150396, "grad_norm": 0.3042838509865545, "learning_rate": 0.0002943533787639631, "loss": 3.1115176677703857, "step": 4546, "token_acc": 0.28923420078307366 }, { "epoch": 2.6654939900322487, "grad_norm": 0.29094634315956897, "learning_rate": 0.00029434942673063424, "loss": 3.14797306060791, "step": 4547, "token_acc": 0.2822168026944795 }, { "epoch": 2.666080328349458, "grad_norm": 0.31502645124894507, "learning_rate": 0.00029434547334133705, "loss": 3.147404670715332, "step": 4548, "token_acc": 0.2816955424377957 }, { "epoch": 2.6666666666666665, "grad_norm": 0.3160527127138627, "learning_rate": 0.00029434151859610865, "loss": 3.081361770629883, "step": 4549, "token_acc": 0.2909261903381999 }, { "epoch": 2.6672530049838756, "grad_norm": 0.2572537006245006, "learning_rate": 0.0002943375624949862, "loss": 3.132317066192627, "step": 4550, "token_acc": 0.2829082721944024 }, { "epoch": 2.6678393433010847, "grad_norm": 0.2918468640805991, "learning_rate": 0.00029433360503800676, "loss": 3.1263396739959717, "step": 4551, "token_acc": 0.2842974030968952 }, { "epoch": 2.668425681618294, "grad_norm": 0.29744403577202044, "learning_rate": 0.0002943296462252077, "loss": 3.1271743774414062, "step": 4552, "token_acc": 0.2835396825822292 }, { "epoch": 2.6690120199355025, "grad_norm": 0.38012510218807855, "learning_rate": 0.000294325686056626, "loss": 3.115926742553711, "step": 4553, "token_acc": 0.2847607948379371 }, { "epoch": 2.6695983582527116, "grad_norm": 0.35113155142498303, "learning_rate": 0.000294321724532299, "loss": 3.1405162811279297, "step": 4554, "token_acc": 0.28350620608063315 }, { "epoch": 2.6701846965699207, "grad_norm": 0.3201227436916076, "learning_rate": 0.00029431776165226393, "loss": 3.1165192127227783, "step": 4555, "token_acc": 0.2846724354169979 }, { "epoch": 2.67077103488713, "grad_norm": 0.31382199303138547, "learning_rate": 0.0002943137974165579, "loss": 3.1102471351623535, "step": 4556, "token_acc": 0.28637897879321705 }, { "epoch": 2.671357373204339, "grad_norm": 0.33368374697118924, "learning_rate": 0.0002943098318252182, "loss": 3.159630298614502, "step": 4557, "token_acc": 0.27996664474059035 }, { "epoch": 2.671943711521548, "grad_norm": 0.3111470864297513, "learning_rate": 0.0002943058648782822, "loss": 3.1030399799346924, "step": 4558, "token_acc": 0.2888672534005064 }, { "epoch": 2.672530049838757, "grad_norm": 0.3348002387553156, "learning_rate": 0.0002943018965757869, "loss": 3.1501152515411377, "step": 4559, "token_acc": 0.28112098731050206 }, { "epoch": 2.673116388155966, "grad_norm": 0.3128416889766457, "learning_rate": 0.0002942979269177698, "loss": 3.107067584991455, "step": 4560, "token_acc": 0.285710372443745 }, { "epoch": 2.673702726473175, "grad_norm": 0.29321016410978984, "learning_rate": 0.0002942939559042681, "loss": 3.141199827194214, "step": 4561, "token_acc": 0.2836139344688846 }, { "epoch": 2.674289064790384, "grad_norm": 0.3416250639555042, "learning_rate": 0.0002942899835353192, "loss": 3.159151077270508, "step": 4562, "token_acc": 0.28059380355684166 }, { "epoch": 2.674875403107593, "grad_norm": 0.30018955031664274, "learning_rate": 0.00029428600981096025, "loss": 3.1631321907043457, "step": 4563, "token_acc": 0.279160543600081 }, { "epoch": 2.675461741424802, "grad_norm": 0.31514239892808615, "learning_rate": 0.00029428203473122873, "loss": 3.1528992652893066, "step": 4564, "token_acc": 0.2829176559203813 }, { "epoch": 2.676048079742011, "grad_norm": 0.33376870192314295, "learning_rate": 0.0002942780582961619, "loss": 3.160259485244751, "step": 4565, "token_acc": 0.2799497980319985 }, { "epoch": 2.67663441805922, "grad_norm": 0.3470464281182145, "learning_rate": 0.00029427408050579704, "loss": 3.1318957805633545, "step": 4566, "token_acc": 0.28352849762122223 }, { "epoch": 2.677220756376429, "grad_norm": 0.3004837373864079, "learning_rate": 0.0002942701013601717, "loss": 3.1211414337158203, "step": 4567, "token_acc": 0.2859289719626168 }, { "epoch": 2.6778070946936383, "grad_norm": 0.3040355758432666, "learning_rate": 0.00029426612085932315, "loss": 3.180159568786621, "step": 4568, "token_acc": 0.27545773601239054 }, { "epoch": 2.6783934330108474, "grad_norm": 0.33718999541708533, "learning_rate": 0.00029426213900328875, "loss": 3.1247029304504395, "step": 4569, "token_acc": 0.286448449922859 }, { "epoch": 2.6789797713280565, "grad_norm": 0.3200281064176358, "learning_rate": 0.00029425815579210604, "loss": 3.114863395690918, "step": 4570, "token_acc": 0.2854968983481148 }, { "epoch": 2.679566109645265, "grad_norm": 0.35095037381926103, "learning_rate": 0.00029425417122581226, "loss": 3.1148738861083984, "step": 4571, "token_acc": 0.28560933313949866 }, { "epoch": 2.6801524479624743, "grad_norm": 0.3265773209290008, "learning_rate": 0.000294250185304445, "loss": 3.1360034942626953, "step": 4572, "token_acc": 0.283788254109655 }, { "epoch": 2.6807387862796834, "grad_norm": 0.31954779452074156, "learning_rate": 0.00029424619802804157, "loss": 3.0899851322174072, "step": 4573, "token_acc": 0.29093935893890344 }, { "epoch": 2.6813251245968925, "grad_norm": 0.2872310802996861, "learning_rate": 0.00029424220939663947, "loss": 3.0752291679382324, "step": 4574, "token_acc": 0.2909343805814149 }, { "epoch": 2.681911462914101, "grad_norm": 0.31919044819298287, "learning_rate": 0.0002942382194102762, "loss": 3.075417995452881, "step": 4575, "token_acc": 0.29182453862364033 }, { "epoch": 2.6824978012313103, "grad_norm": 0.29941200689216924, "learning_rate": 0.00029423422806898925, "loss": 3.1275949478149414, "step": 4576, "token_acc": 0.28610502997810794 }, { "epoch": 2.6830841395485194, "grad_norm": 0.29151606232241184, "learning_rate": 0.0002942302353728161, "loss": 3.1258411407470703, "step": 4577, "token_acc": 0.28495871803412914 }, { "epoch": 2.6836704778657285, "grad_norm": 0.24880192570872103, "learning_rate": 0.0002942262413217942, "loss": 3.0912928581237793, "step": 4578, "token_acc": 0.28886121095741685 }, { "epoch": 2.6842568161829377, "grad_norm": 0.2718167543627989, "learning_rate": 0.0002942222459159611, "loss": 3.135986804962158, "step": 4579, "token_acc": 0.28230148575606445 }, { "epoch": 2.6848431545001468, "grad_norm": 0.28227648718612036, "learning_rate": 0.0002942182491553544, "loss": 3.1465559005737305, "step": 4580, "token_acc": 0.2813216095192045 }, { "epoch": 2.685429492817356, "grad_norm": 0.284518310211154, "learning_rate": 0.00029421425104001153, "loss": 3.147820472717285, "step": 4581, "token_acc": 0.2810195191443211 }, { "epoch": 2.6860158311345645, "grad_norm": 0.3222536309073751, "learning_rate": 0.00029421025156997014, "loss": 3.194807767868042, "step": 4582, "token_acc": 0.2752409518921526 }, { "epoch": 2.6866021694517737, "grad_norm": 0.26185464885492393, "learning_rate": 0.00029420625074526774, "loss": 3.122466802597046, "step": 4583, "token_acc": 0.28468331592441365 }, { "epoch": 2.6871885077689828, "grad_norm": 0.33723361443121597, "learning_rate": 0.00029420224856594194, "loss": 3.1059765815734863, "step": 4584, "token_acc": 0.288374512074808 }, { "epoch": 2.687774846086192, "grad_norm": 0.3464168293876269, "learning_rate": 0.00029419824503203033, "loss": 3.1103920936584473, "step": 4585, "token_acc": 0.28679637249810236 }, { "epoch": 2.6883611844034006, "grad_norm": 0.2958503924369842, "learning_rate": 0.0002941942401435705, "loss": 3.151400566101074, "step": 4586, "token_acc": 0.28271026791190856 }, { "epoch": 2.6889475227206097, "grad_norm": 0.2728700541346181, "learning_rate": 0.0002941902339006001, "loss": 3.1337239742279053, "step": 4587, "token_acc": 0.2827778402182465 }, { "epoch": 2.6895338610378188, "grad_norm": 0.263208801100596, "learning_rate": 0.00029418622630315676, "loss": 3.1237175464630127, "step": 4588, "token_acc": 0.28492662321487877 }, { "epoch": 2.690120199355028, "grad_norm": 0.26093437139826986, "learning_rate": 0.0002941822173512781, "loss": 3.086075782775879, "step": 4589, "token_acc": 0.2904150103925594 }, { "epoch": 2.690706537672237, "grad_norm": 0.2898445953710013, "learning_rate": 0.00029417820704500183, "loss": 3.1390974521636963, "step": 4590, "token_acc": 0.28185563473144354 }, { "epoch": 2.691292875989446, "grad_norm": 0.2902540866849508, "learning_rate": 0.00029417419538436555, "loss": 3.1016061305999756, "step": 4591, "token_acc": 0.28946653101482595 }, { "epoch": 2.6918792143066548, "grad_norm": 0.31348994761380244, "learning_rate": 0.000294170182369407, "loss": 3.1089634895324707, "step": 4592, "token_acc": 0.2880087621206783 }, { "epoch": 2.692465552623864, "grad_norm": 0.27741190874366206, "learning_rate": 0.00029416616800016386, "loss": 3.13444447517395, "step": 4593, "token_acc": 0.2828385013788928 }, { "epoch": 2.693051890941073, "grad_norm": 0.33208293481334733, "learning_rate": 0.00029416215227667385, "loss": 3.058577537536621, "step": 4594, "token_acc": 0.2934651338475849 }, { "epoch": 2.693638229258282, "grad_norm": 0.30616947866275346, "learning_rate": 0.00029415813519897467, "loss": 3.0959835052490234, "step": 4595, "token_acc": 0.28847534065978814 }, { "epoch": 2.6942245675754912, "grad_norm": 0.286437077281594, "learning_rate": 0.00029415411676710405, "loss": 3.167523145675659, "step": 4596, "token_acc": 0.2796301770624153 }, { "epoch": 2.6948109058927, "grad_norm": 0.29924377581762973, "learning_rate": 0.00029415009698109977, "loss": 3.1396360397338867, "step": 4597, "token_acc": 0.2816969191802673 }, { "epoch": 2.695397244209909, "grad_norm": 0.2701862296498904, "learning_rate": 0.00029414607584099956, "loss": 3.129110813140869, "step": 4598, "token_acc": 0.2849161109916111 }, { "epoch": 2.695983582527118, "grad_norm": 0.2843923889168176, "learning_rate": 0.0002941420533468412, "loss": 3.1341147422790527, "step": 4599, "token_acc": 0.2832001532191992 }, { "epoch": 2.6965699208443272, "grad_norm": 0.30006524930281325, "learning_rate": 0.0002941380294986625, "loss": 3.079439401626587, "step": 4600, "token_acc": 0.2907850875107639 }, { "epoch": 2.6971562591615363, "grad_norm": 0.3025171730965367, "learning_rate": 0.0002941340042965013, "loss": 3.124406099319458, "step": 4601, "token_acc": 0.2837750176003218 }, { "epoch": 2.6977425974787455, "grad_norm": 0.33872771422576137, "learning_rate": 0.0002941299777403953, "loss": 3.1456832885742188, "step": 4602, "token_acc": 0.2810418654975834 }, { "epoch": 2.698328935795954, "grad_norm": 0.3042748401569349, "learning_rate": 0.0002941259498303823, "loss": 3.1666271686553955, "step": 4603, "token_acc": 0.27840925284422874 }, { "epoch": 2.6989152741131632, "grad_norm": 0.340930164597336, "learning_rate": 0.0002941219205665003, "loss": 3.0995116233825684, "step": 4604, "token_acc": 0.2895581002520422 }, { "epoch": 2.6995016124303723, "grad_norm": 0.34628017761157465, "learning_rate": 0.000294117889948787, "loss": 3.054107189178467, "step": 4605, "token_acc": 0.2951046765961879 }, { "epoch": 2.7000879507475815, "grad_norm": 0.2877401208491686, "learning_rate": 0.0002941138579772804, "loss": 3.091055393218994, "step": 4606, "token_acc": 0.2883067474514638 }, { "epoch": 2.70067428906479, "grad_norm": 0.3297390062514481, "learning_rate": 0.0002941098246520183, "loss": 3.0988073348999023, "step": 4607, "token_acc": 0.28947751226543594 }, { "epoch": 2.7012606273819992, "grad_norm": 0.33553873920979604, "learning_rate": 0.0002941057899730385, "loss": 3.1095736026763916, "step": 4608, "token_acc": 0.288848005268412 }, { "epoch": 2.7018469656992083, "grad_norm": 0.32228078392927617, "learning_rate": 0.00029410175394037905, "loss": 3.129594564437866, "step": 4609, "token_acc": 0.2851215445842745 }, { "epoch": 2.7024333040164175, "grad_norm": 0.3448477440355411, "learning_rate": 0.0002940977165540778, "loss": 3.077892780303955, "step": 4610, "token_acc": 0.29062265767212764 }, { "epoch": 2.7030196423336266, "grad_norm": 0.27954202740077044, "learning_rate": 0.00029409367781417273, "loss": 3.1068811416625977, "step": 4611, "token_acc": 0.28706848740891816 }, { "epoch": 2.7036059806508357, "grad_norm": 0.32141087691506653, "learning_rate": 0.0002940896377207016, "loss": 3.1368956565856934, "step": 4612, "token_acc": 0.2827881879569259 }, { "epoch": 2.704192318968045, "grad_norm": 0.2911450110690061, "learning_rate": 0.0002940855962737026, "loss": 3.0660271644592285, "step": 4613, "token_acc": 0.29300428735752376 }, { "epoch": 2.7047786572852535, "grad_norm": 0.31185629640258006, "learning_rate": 0.0002940815534732135, "loss": 3.116438388824463, "step": 4614, "token_acc": 0.2852406170849643 }, { "epoch": 2.7053649956024626, "grad_norm": 0.29122025550652547, "learning_rate": 0.0002940775093192724, "loss": 3.12277889251709, "step": 4615, "token_acc": 0.28492211576762616 }, { "epoch": 2.7059513339196717, "grad_norm": 0.2881624350459654, "learning_rate": 0.00029407346381191726, "loss": 3.1263279914855957, "step": 4616, "token_acc": 0.285623343282336 }, { "epoch": 2.706537672236881, "grad_norm": 0.3008507715935431, "learning_rate": 0.000294069416951186, "loss": 3.167377471923828, "step": 4617, "token_acc": 0.2780916980946489 }, { "epoch": 2.7071240105540895, "grad_norm": 0.32366992038099324, "learning_rate": 0.0002940653687371168, "loss": 3.1608505249023438, "step": 4618, "token_acc": 0.28029813297103034 }, { "epoch": 2.7077103488712986, "grad_norm": 0.32944104256304296, "learning_rate": 0.0002940613191697475, "loss": 3.153795003890991, "step": 4619, "token_acc": 0.2800048865565112 }, { "epoch": 2.7082966871885077, "grad_norm": 0.3132669147662173, "learning_rate": 0.00029405726824911635, "loss": 3.1011006832122803, "step": 4620, "token_acc": 0.2858072110839274 }, { "epoch": 2.708883025505717, "grad_norm": 0.3158063532826846, "learning_rate": 0.0002940532159752612, "loss": 3.1045751571655273, "step": 4621, "token_acc": 0.28744945595996574 }, { "epoch": 2.709469363822926, "grad_norm": 0.31561703360670684, "learning_rate": 0.00029404916234822016, "loss": 3.124743938446045, "step": 4622, "token_acc": 0.2840490877896088 }, { "epoch": 2.710055702140135, "grad_norm": 0.28716632716623874, "learning_rate": 0.0002940451073680314, "loss": 3.1051926612854004, "step": 4623, "token_acc": 0.2871723146805589 }, { "epoch": 2.710642040457344, "grad_norm": 0.27228519216772534, "learning_rate": 0.00029404105103473296, "loss": 3.1210622787475586, "step": 4624, "token_acc": 0.2863287616775989 }, { "epoch": 2.711228378774553, "grad_norm": 0.3155644597533601, "learning_rate": 0.00029403699334836294, "loss": 3.1130270957946777, "step": 4625, "token_acc": 0.28519649488531906 }, { "epoch": 2.711814717091762, "grad_norm": 0.3575816525196397, "learning_rate": 0.00029403293430895947, "loss": 3.1579549312591553, "step": 4626, "token_acc": 0.279542061741164 }, { "epoch": 2.712401055408971, "grad_norm": 0.3347961806023059, "learning_rate": 0.00029402887391656064, "loss": 3.160306215286255, "step": 4627, "token_acc": 0.279350511260523 }, { "epoch": 2.71298739372618, "grad_norm": 0.338563928218276, "learning_rate": 0.0002940248121712047, "loss": 3.1232380867004395, "step": 4628, "token_acc": 0.2859852748136589 }, { "epoch": 2.713573732043389, "grad_norm": 0.3000185767301935, "learning_rate": 0.00029402074907292964, "loss": 3.108776330947876, "step": 4629, "token_acc": 0.28754937054211105 }, { "epoch": 2.714160070360598, "grad_norm": 0.4008103726388896, "learning_rate": 0.00029401668462177374, "loss": 3.0986952781677246, "step": 4630, "token_acc": 0.28661991599434417 }, { "epoch": 2.714746408677807, "grad_norm": 0.3666210972814654, "learning_rate": 0.00029401261881777514, "loss": 3.1116297245025635, "step": 4631, "token_acc": 0.2866642986291199 }, { "epoch": 2.715332746995016, "grad_norm": 0.3323319142942056, "learning_rate": 0.00029400855166097207, "loss": 3.1436498165130615, "step": 4632, "token_acc": 0.28320200428334746 }, { "epoch": 2.7159190853122253, "grad_norm": 0.27891262522231886, "learning_rate": 0.0002940044831514027, "loss": 3.13948130607605, "step": 4633, "token_acc": 0.2839069748597127 }, { "epoch": 2.7165054236294344, "grad_norm": 0.3283168390709002, "learning_rate": 0.00029400041328910524, "loss": 3.1330699920654297, "step": 4634, "token_acc": 0.2830496060139244 }, { "epoch": 2.7170917619466435, "grad_norm": 0.32418274608104314, "learning_rate": 0.000293996342074118, "loss": 3.177962303161621, "step": 4635, "token_acc": 0.27783361338379475 }, { "epoch": 2.717678100263852, "grad_norm": 0.32545263476977715, "learning_rate": 0.0002939922695064791, "loss": 3.0984511375427246, "step": 4636, "token_acc": 0.2885143395913631 }, { "epoch": 2.7182644385810613, "grad_norm": 0.35986563853196196, "learning_rate": 0.00029398819558622687, "loss": 3.148623466491699, "step": 4637, "token_acc": 0.28201401478084526 }, { "epoch": 2.7188507768982704, "grad_norm": 0.3089624440242346, "learning_rate": 0.00029398412031339955, "loss": 3.0953633785247803, "step": 4638, "token_acc": 0.28773547000681754 }, { "epoch": 2.7194371152154795, "grad_norm": 0.32095235996202737, "learning_rate": 0.0002939800436880355, "loss": 3.1473608016967773, "step": 4639, "token_acc": 0.2838720527036514 }, { "epoch": 2.720023453532688, "grad_norm": 0.3260751610274036, "learning_rate": 0.00029397596571017294, "loss": 3.1489624977111816, "step": 4640, "token_acc": 0.27905301691696366 }, { "epoch": 2.7206097918498973, "grad_norm": 0.30844978599401995, "learning_rate": 0.0002939718863798502, "loss": 3.1121456623077393, "step": 4641, "token_acc": 0.28609820464225305 }, { "epoch": 2.7211961301671064, "grad_norm": 0.29474267611248495, "learning_rate": 0.00029396780569710556, "loss": 3.1310932636260986, "step": 4642, "token_acc": 0.2837264882320861 }, { "epoch": 2.7217824684843155, "grad_norm": 0.317081254279625, "learning_rate": 0.0002939637236619774, "loss": 3.131783962249756, "step": 4643, "token_acc": 0.2827566545783293 }, { "epoch": 2.7223688068015246, "grad_norm": 0.3121630638752819, "learning_rate": 0.00029395964027450404, "loss": 3.1202597618103027, "step": 4644, "token_acc": 0.28604182787882576 }, { "epoch": 2.7229551451187337, "grad_norm": 0.31638898176387137, "learning_rate": 0.00029395555553472384, "loss": 3.131974220275879, "step": 4645, "token_acc": 0.28377970016850795 }, { "epoch": 2.7235414834359424, "grad_norm": 0.28476223234764236, "learning_rate": 0.0002939514694426752, "loss": 3.129000663757324, "step": 4646, "token_acc": 0.2834111813269485 }, { "epoch": 2.7241278217531515, "grad_norm": 0.3022637605139887, "learning_rate": 0.0002939473819983965, "loss": 3.1290998458862305, "step": 4647, "token_acc": 0.2857560049645908 }, { "epoch": 2.7247141600703606, "grad_norm": 0.2750131357613476, "learning_rate": 0.0002939432932019261, "loss": 3.110006332397461, "step": 4648, "token_acc": 0.287471804316785 }, { "epoch": 2.7253004983875697, "grad_norm": 0.3036775922568293, "learning_rate": 0.00029393920305330237, "loss": 3.07769513130188, "step": 4649, "token_acc": 0.29264439583181595 }, { "epoch": 2.7258868367047784, "grad_norm": 0.3329766059425245, "learning_rate": 0.00029393511155256384, "loss": 3.0733118057250977, "step": 4650, "token_acc": 0.29306160784324015 }, { "epoch": 2.7264731750219875, "grad_norm": 0.3084486390969691, "learning_rate": 0.0002939310186997489, "loss": 3.072504997253418, "step": 4651, "token_acc": 0.2913754838984699 }, { "epoch": 2.7270595133391966, "grad_norm": 0.3091198226888238, "learning_rate": 0.00029392692449489597, "loss": 3.111243486404419, "step": 4652, "token_acc": 0.2863049000768136 }, { "epoch": 2.7276458516564057, "grad_norm": 0.3164573224658852, "learning_rate": 0.00029392282893804354, "loss": 3.0988736152648926, "step": 4653, "token_acc": 0.287824435534923 }, { "epoch": 2.728232189973615, "grad_norm": 0.26437116060976923, "learning_rate": 0.00029391873202923004, "loss": 3.1457619667053223, "step": 4654, "token_acc": 0.2821215959637617 }, { "epoch": 2.728818528290824, "grad_norm": 0.27401766139301315, "learning_rate": 0.000293914633768494, "loss": 3.1506428718566895, "step": 4655, "token_acc": 0.28271964909066855 }, { "epoch": 2.729404866608033, "grad_norm": 0.2596916474162231, "learning_rate": 0.0002939105341558739, "loss": 3.0618858337402344, "step": 4656, "token_acc": 0.2929765367254163 }, { "epoch": 2.7299912049252417, "grad_norm": 0.29477469416101254, "learning_rate": 0.0002939064331914083, "loss": 3.1261134147644043, "step": 4657, "token_acc": 0.2840787849626577 }, { "epoch": 2.730577543242451, "grad_norm": 0.28319840184156814, "learning_rate": 0.00029390233087513563, "loss": 3.1006863117218018, "step": 4658, "token_acc": 0.288687423398402 }, { "epoch": 2.73116388155966, "grad_norm": 0.2663723395472693, "learning_rate": 0.0002938982272070945, "loss": 3.0868067741394043, "step": 4659, "token_acc": 0.290051392512273 }, { "epoch": 2.731750219876869, "grad_norm": 0.2938101319897106, "learning_rate": 0.0002938941221873234, "loss": 3.1115784645080566, "step": 4660, "token_acc": 0.2865728321521849 }, { "epoch": 2.7323365581940777, "grad_norm": 0.3141015403145631, "learning_rate": 0.00029389001581586093, "loss": 3.110257625579834, "step": 4661, "token_acc": 0.28689001981235485 }, { "epoch": 2.732922896511287, "grad_norm": 0.2687667440427265, "learning_rate": 0.00029388590809274566, "loss": 3.0621681213378906, "step": 4662, "token_acc": 0.29392864527827706 }, { "epoch": 2.733509234828496, "grad_norm": 0.31748704360173763, "learning_rate": 0.0002938817990180162, "loss": 3.1268672943115234, "step": 4663, "token_acc": 0.28368625936671604 }, { "epoch": 2.734095573145705, "grad_norm": 0.31611398418617415, "learning_rate": 0.00029387768859171105, "loss": 3.0960335731506348, "step": 4664, "token_acc": 0.2881754589608089 }, { "epoch": 2.734681911462914, "grad_norm": 0.29942489696869135, "learning_rate": 0.000293873576813869, "loss": 3.1195664405822754, "step": 4665, "token_acc": 0.2861620548774987 }, { "epoch": 2.7352682497801233, "grad_norm": 0.28150922018175023, "learning_rate": 0.0002938694636845285, "loss": 3.125598907470703, "step": 4666, "token_acc": 0.2856954678736179 }, { "epoch": 2.7358545880973324, "grad_norm": 0.28428432362677086, "learning_rate": 0.00029386534920372825, "loss": 3.1247076988220215, "step": 4667, "token_acc": 0.28557349290849854 }, { "epoch": 2.736440926414541, "grad_norm": 0.2594797636814349, "learning_rate": 0.00029386123337150693, "loss": 3.057495594024658, "step": 4668, "token_acc": 0.2927059683185887 }, { "epoch": 2.73702726473175, "grad_norm": 0.3065962824255641, "learning_rate": 0.00029385711618790317, "loss": 3.156785011291504, "step": 4669, "token_acc": 0.27995013537670266 }, { "epoch": 2.7376136030489593, "grad_norm": 0.27372840091448825, "learning_rate": 0.00029385299765295563, "loss": 3.1196084022521973, "step": 4670, "token_acc": 0.28438188177382445 }, { "epoch": 2.7381999413661684, "grad_norm": 0.3288698636674791, "learning_rate": 0.00029384887776670305, "loss": 3.0815818309783936, "step": 4671, "token_acc": 0.2903944110318866 }, { "epoch": 2.738786279683377, "grad_norm": 0.3264973956483734, "learning_rate": 0.0002938447565291841, "loss": 3.1288208961486816, "step": 4672, "token_acc": 0.28316162101881504 }, { "epoch": 2.739372618000586, "grad_norm": 0.3503381530088445, "learning_rate": 0.0002938406339404375, "loss": 3.142727851867676, "step": 4673, "token_acc": 0.2821260637682379 }, { "epoch": 2.7399589563177953, "grad_norm": 0.34017410493154704, "learning_rate": 0.0002938365100005019, "loss": 3.1012144088745117, "step": 4674, "token_acc": 0.29087555720454045 }, { "epoch": 2.7405452946350044, "grad_norm": 0.29149865338024367, "learning_rate": 0.0002938323847094162, "loss": 3.14150333404541, "step": 4675, "token_acc": 0.28126029664892677 }, { "epoch": 2.7411316329522135, "grad_norm": 0.35213799633887566, "learning_rate": 0.000293828258067219, "loss": 3.1303629875183105, "step": 4676, "token_acc": 0.2847607238642509 }, { "epoch": 2.7417179712694226, "grad_norm": 0.34775927761089853, "learning_rate": 0.0002938241300739492, "loss": 3.109248638153076, "step": 4677, "token_acc": 0.28661050043390224 }, { "epoch": 2.7423043095866317, "grad_norm": 0.39523054656265644, "learning_rate": 0.0002938200007296455, "loss": 3.1547470092773438, "step": 4678, "token_acc": 0.28048252251876127 }, { "epoch": 2.7428906479038404, "grad_norm": 0.34660825206441775, "learning_rate": 0.0002938158700343466, "loss": 3.140289306640625, "step": 4679, "token_acc": 0.282660073261933 }, { "epoch": 2.7434769862210495, "grad_norm": 0.3330637214132319, "learning_rate": 0.0002938117379880915, "loss": 3.0921826362609863, "step": 4680, "token_acc": 0.2889187917716845 }, { "epoch": 2.7440633245382586, "grad_norm": 0.2655881374935521, "learning_rate": 0.0002938076045909188, "loss": 3.160306930541992, "step": 4681, "token_acc": 0.28060509611454043 }, { "epoch": 2.7446496628554677, "grad_norm": 0.31934861018589944, "learning_rate": 0.00029380346984286755, "loss": 3.131010055541992, "step": 4682, "token_acc": 0.2826089779719435 }, { "epoch": 2.7452360011726764, "grad_norm": 0.28366571160299364, "learning_rate": 0.00029379933374397644, "loss": 3.071028709411621, "step": 4683, "token_acc": 0.2892310308314204 }, { "epoch": 2.7458223394898855, "grad_norm": 0.3547433045237246, "learning_rate": 0.00029379519629428434, "loss": 3.091519355773926, "step": 4684, "token_acc": 0.29102558353622027 }, { "epoch": 2.7464086778070946, "grad_norm": 0.32588485524047744, "learning_rate": 0.0002937910574938302, "loss": 3.130218505859375, "step": 4685, "token_acc": 0.283876237324726 }, { "epoch": 2.7469950161243037, "grad_norm": 0.32484636975519876, "learning_rate": 0.0002937869173426527, "loss": 3.1131410598754883, "step": 4686, "token_acc": 0.2862504735617254 }, { "epoch": 2.747581354441513, "grad_norm": 0.27828495224118727, "learning_rate": 0.00029378277584079095, "loss": 3.0764341354370117, "step": 4687, "token_acc": 0.29348980090230203 }, { "epoch": 2.748167692758722, "grad_norm": 0.3088473060138778, "learning_rate": 0.00029377863298828377, "loss": 3.1043457984924316, "step": 4688, "token_acc": 0.28819144954188164 }, { "epoch": 2.748754031075931, "grad_norm": 0.28115386770389267, "learning_rate": 0.0002937744887851701, "loss": 3.134852170944214, "step": 4689, "token_acc": 0.28398032690201364 }, { "epoch": 2.7493403693931397, "grad_norm": 0.2819556622422523, "learning_rate": 0.0002937703432314888, "loss": 3.1036086082458496, "step": 4690, "token_acc": 0.28956922088952414 }, { "epoch": 2.749926707710349, "grad_norm": 0.27960691946809385, "learning_rate": 0.0002937661963272789, "loss": 3.097256660461426, "step": 4691, "token_acc": 0.28850732543051866 }, { "epoch": 2.750513046027558, "grad_norm": 0.26774711744024093, "learning_rate": 0.0002937620480725793, "loss": 3.1116981506347656, "step": 4692, "token_acc": 0.28684383637153116 }, { "epoch": 2.751099384344767, "grad_norm": 0.28687798158990896, "learning_rate": 0.00029375789846742894, "loss": 3.147639751434326, "step": 4693, "token_acc": 0.2801852447931092 }, { "epoch": 2.7516857226619758, "grad_norm": 0.2702603211912033, "learning_rate": 0.0002937537475118669, "loss": 3.162505626678467, "step": 4694, "token_acc": 0.27960436032210456 }, { "epoch": 2.752272060979185, "grad_norm": 0.2838100637330248, "learning_rate": 0.0002937495952059321, "loss": 3.112856149673462, "step": 4695, "token_acc": 0.28609278079254713 }, { "epoch": 2.752858399296394, "grad_norm": 0.2812387065732931, "learning_rate": 0.0002937454415496635, "loss": 3.0824429988861084, "step": 4696, "token_acc": 0.28975167610410435 }, { "epoch": 2.753444737613603, "grad_norm": 0.3048882371607763, "learning_rate": 0.00029374128654310026, "loss": 3.1206917762756348, "step": 4697, "token_acc": 0.28408926966229786 }, { "epoch": 2.754031075930812, "grad_norm": 0.2839292561526516, "learning_rate": 0.0002937371301862813, "loss": 3.1614980697631836, "step": 4698, "token_acc": 0.27967022483225035 }, { "epoch": 2.7546174142480213, "grad_norm": 0.3299972666673433, "learning_rate": 0.0002937329724792457, "loss": 3.1126513481140137, "step": 4699, "token_acc": 0.28618007769221676 }, { "epoch": 2.75520375256523, "grad_norm": 0.32858639445157345, "learning_rate": 0.00029372881342203247, "loss": 3.123180389404297, "step": 4700, "token_acc": 0.2840429417972238 }, { "epoch": 2.755790090882439, "grad_norm": 0.31993113931543704, "learning_rate": 0.0002937246530146807, "loss": 3.1160054206848145, "step": 4701, "token_acc": 0.286098999743524 }, { "epoch": 2.756376429199648, "grad_norm": 0.35570265038608545, "learning_rate": 0.0002937204912572296, "loss": 3.1265673637390137, "step": 4702, "token_acc": 0.28413944391584733 }, { "epoch": 2.7569627675168573, "grad_norm": 0.3135345615694619, "learning_rate": 0.00029371632814971803, "loss": 3.1235218048095703, "step": 4703, "token_acc": 0.2851160978987687 }, { "epoch": 2.757549105834066, "grad_norm": 0.26694159703073106, "learning_rate": 0.0002937121636921852, "loss": 3.1407878398895264, "step": 4704, "token_acc": 0.28156576127812005 }, { "epoch": 2.758135444151275, "grad_norm": 0.28208073015796237, "learning_rate": 0.00029370799788467033, "loss": 3.1072981357574463, "step": 4705, "token_acc": 0.2886720396123051 }, { "epoch": 2.758721782468484, "grad_norm": 0.2960137923653525, "learning_rate": 0.0002937038307272124, "loss": 3.1451096534729004, "step": 4706, "token_acc": 0.28318405904215443 }, { "epoch": 2.7593081207856933, "grad_norm": 0.31617252197368517, "learning_rate": 0.0002936996622198507, "loss": 3.101792335510254, "step": 4707, "token_acc": 0.28962171868090636 }, { "epoch": 2.7598944591029024, "grad_norm": 0.31785461636081586, "learning_rate": 0.0002936954923626243, "loss": 3.0967421531677246, "step": 4708, "token_acc": 0.28813238923905354 }, { "epoch": 2.7604807974201115, "grad_norm": 0.30820592681765696, "learning_rate": 0.00029369132115557235, "loss": 3.11812162399292, "step": 4709, "token_acc": 0.2870730017426358 }, { "epoch": 2.7610671357373207, "grad_norm": 0.3311623308280372, "learning_rate": 0.00029368714859873406, "loss": 3.122946262359619, "step": 4710, "token_acc": 0.28461078304392684 }, { "epoch": 2.7616534740545293, "grad_norm": 0.33037428075273734, "learning_rate": 0.00029368297469214863, "loss": 3.1733055114746094, "step": 4711, "token_acc": 0.2784378586376831 }, { "epoch": 2.7622398123717384, "grad_norm": 0.319498511795853, "learning_rate": 0.0002936787994358553, "loss": 3.1295394897460938, "step": 4712, "token_acc": 0.2837164147113336 }, { "epoch": 2.7628261506889475, "grad_norm": 0.2674550887606505, "learning_rate": 0.00029367462282989324, "loss": 3.122802257537842, "step": 4713, "token_acc": 0.28452427572220307 }, { "epoch": 2.7634124890061567, "grad_norm": 0.26906438881942324, "learning_rate": 0.0002936704448743017, "loss": 3.1374967098236084, "step": 4714, "token_acc": 0.2831088194939622 }, { "epoch": 2.7639988273233653, "grad_norm": 0.3127751302166687, "learning_rate": 0.0002936662655691199, "loss": 3.1238365173339844, "step": 4715, "token_acc": 0.2840411002616632 }, { "epoch": 2.7645851656405744, "grad_norm": 0.3025758184607278, "learning_rate": 0.0002936620849143872, "loss": 3.1010937690734863, "step": 4716, "token_acc": 0.287815059612283 }, { "epoch": 2.7651715039577835, "grad_norm": 0.30843863847090003, "learning_rate": 0.0002936579029101427, "loss": 3.132065773010254, "step": 4717, "token_acc": 0.28480036571812906 }, { "epoch": 2.7657578422749927, "grad_norm": 0.3256695391131964, "learning_rate": 0.0002936537195564259, "loss": 3.1091110706329346, "step": 4718, "token_acc": 0.2887694879229047 }, { "epoch": 2.7663441805922018, "grad_norm": 0.2895605700306016, "learning_rate": 0.00029364953485327587, "loss": 3.1116220951080322, "step": 4719, "token_acc": 0.2867558742083909 }, { "epoch": 2.766930518909411, "grad_norm": 0.29863988416421283, "learning_rate": 0.0002936453488007321, "loss": 3.1028687953948975, "step": 4720, "token_acc": 0.28861646139508595 }, { "epoch": 2.76751685722662, "grad_norm": 0.2720282939457676, "learning_rate": 0.00029364116139883384, "loss": 3.0520944595336914, "step": 4721, "token_acc": 0.2949303692347004 }, { "epoch": 2.7681031955438287, "grad_norm": 0.28722570662591207, "learning_rate": 0.0002936369726476204, "loss": 3.0994036197662354, "step": 4722, "token_acc": 0.2882835377715889 }, { "epoch": 2.7686895338610378, "grad_norm": 0.2872564167044778, "learning_rate": 0.00029363278254713115, "loss": 3.086535930633545, "step": 4723, "token_acc": 0.29169510984237307 }, { "epoch": 2.769275872178247, "grad_norm": 0.2935175474865802, "learning_rate": 0.0002936285910974055, "loss": 3.1063809394836426, "step": 4724, "token_acc": 0.28766629857495923 }, { "epoch": 2.769862210495456, "grad_norm": 0.3183247809477237, "learning_rate": 0.0002936243982984827, "loss": 3.1144137382507324, "step": 4725, "token_acc": 0.28615390039309896 }, { "epoch": 2.7704485488126647, "grad_norm": 0.27868157717593633, "learning_rate": 0.0002936202041504023, "loss": 3.1402106285095215, "step": 4726, "token_acc": 0.28274125978223885 }, { "epoch": 2.771034887129874, "grad_norm": 0.3002997259143058, "learning_rate": 0.00029361600865320355, "loss": 3.1407697200775146, "step": 4727, "token_acc": 0.2826597646667498 }, { "epoch": 2.771621225447083, "grad_norm": 0.35127357628587, "learning_rate": 0.000293611811806926, "loss": 3.1081745624542236, "step": 4728, "token_acc": 0.2871727064881066 }, { "epoch": 2.772207563764292, "grad_norm": 0.34780037211783155, "learning_rate": 0.00029360761361160893, "loss": 3.1781954765319824, "step": 4729, "token_acc": 0.2780793865629513 }, { "epoch": 2.772793902081501, "grad_norm": 0.3109802659415708, "learning_rate": 0.0002936034140672918, "loss": 3.1407785415649414, "step": 4730, "token_acc": 0.28216742668199385 }, { "epoch": 2.7733802403987102, "grad_norm": 0.2945505158065512, "learning_rate": 0.00029359921317401416, "loss": 3.0765645503997803, "step": 4731, "token_acc": 0.29188713894422313 }, { "epoch": 2.7739665787159193, "grad_norm": 0.2944579371613482, "learning_rate": 0.00029359501093181547, "loss": 3.100515842437744, "step": 4732, "token_acc": 0.2893097380107402 }, { "epoch": 2.774552917033128, "grad_norm": 0.30057316987428545, "learning_rate": 0.0002935908073407351, "loss": 3.1424355506896973, "step": 4733, "token_acc": 0.28182488831109875 }, { "epoch": 2.775139255350337, "grad_norm": 0.2758452216064872, "learning_rate": 0.00029358660240081253, "loss": 3.093410015106201, "step": 4734, "token_acc": 0.2886025613241068 }, { "epoch": 2.7757255936675462, "grad_norm": 0.27295858313573956, "learning_rate": 0.00029358239611208744, "loss": 3.1155290603637695, "step": 4735, "token_acc": 0.2855256120035591 }, { "epoch": 2.7763119319847553, "grad_norm": 0.3090229536327913, "learning_rate": 0.0002935781884745991, "loss": 3.1167690753936768, "step": 4736, "token_acc": 0.28563907620603424 }, { "epoch": 2.776898270301964, "grad_norm": 0.3131120023804289, "learning_rate": 0.00029357397948838725, "loss": 3.117898464202881, "step": 4737, "token_acc": 0.2858612347363679 }, { "epoch": 2.777484608619173, "grad_norm": 0.3249700613060513, "learning_rate": 0.00029356976915349126, "loss": 3.152148485183716, "step": 4738, "token_acc": 0.28047536017613073 }, { "epoch": 2.7780709469363822, "grad_norm": 0.2904561331351648, "learning_rate": 0.00029356555746995076, "loss": 3.0702338218688965, "step": 4739, "token_acc": 0.2927292264555936 }, { "epoch": 2.7786572852535913, "grad_norm": 0.2570788723343336, "learning_rate": 0.00029356134443780533, "loss": 3.0861129760742188, "step": 4740, "token_acc": 0.29051091618643665 }, { "epoch": 2.7792436235708005, "grad_norm": 0.3137862369768742, "learning_rate": 0.0002935571300570945, "loss": 3.151841163635254, "step": 4741, "token_acc": 0.2815171892867259 }, { "epoch": 2.7798299618880096, "grad_norm": 0.27598643946103335, "learning_rate": 0.0002935529143278579, "loss": 3.1331369876861572, "step": 4742, "token_acc": 0.2831885082321636 }, { "epoch": 2.7804163002052187, "grad_norm": 0.24798211082959642, "learning_rate": 0.0002935486972501351, "loss": 3.1088995933532715, "step": 4743, "token_acc": 0.28703560091406355 }, { "epoch": 2.7810026385224274, "grad_norm": 0.30227551263695224, "learning_rate": 0.00029354447882396574, "loss": 3.112123489379883, "step": 4744, "token_acc": 0.284384686878547 }, { "epoch": 2.7815889768396365, "grad_norm": 0.2636358798251168, "learning_rate": 0.0002935402590493894, "loss": 3.0898818969726562, "step": 4745, "token_acc": 0.29021375916803793 }, { "epoch": 2.7821753151568456, "grad_norm": 0.2796744194081281, "learning_rate": 0.00029353603792644573, "loss": 3.1446924209594727, "step": 4746, "token_acc": 0.28264945951908227 }, { "epoch": 2.7827616534740547, "grad_norm": 0.2943004441812503, "learning_rate": 0.00029353181545517445, "loss": 3.1216249465942383, "step": 4747, "token_acc": 0.2849114084084323 }, { "epoch": 2.7833479917912634, "grad_norm": 0.33636066193253317, "learning_rate": 0.00029352759163561514, "loss": 3.1289706230163574, "step": 4748, "token_acc": 0.28333978584587244 }, { "epoch": 2.7839343301084725, "grad_norm": 0.2893669039609805, "learning_rate": 0.00029352336646780756, "loss": 3.127286434173584, "step": 4749, "token_acc": 0.2824601788154205 }, { "epoch": 2.7845206684256816, "grad_norm": 0.3374730069499942, "learning_rate": 0.0002935191399517913, "loss": 3.092006206512451, "step": 4750, "token_acc": 0.2904185272882335 }, { "epoch": 2.7851070067428907, "grad_norm": 0.3057522873725431, "learning_rate": 0.00029351491208760616, "loss": 3.156360149383545, "step": 4751, "token_acc": 0.28291448413866166 }, { "epoch": 2.7856933450601, "grad_norm": 0.3643003397270055, "learning_rate": 0.0002935106828752917, "loss": 3.106804847717285, "step": 4752, "token_acc": 0.28813194141636955 }, { "epoch": 2.786279683377309, "grad_norm": 0.2996529395680403, "learning_rate": 0.00029350645231488793, "loss": 3.099357843399048, "step": 4753, "token_acc": 0.2861523795593194 }, { "epoch": 2.7868660216945176, "grad_norm": 0.3345417177081686, "learning_rate": 0.0002935022204064343, "loss": 3.0993685722351074, "step": 4754, "token_acc": 0.2868112464576929 }, { "epoch": 2.7874523600117267, "grad_norm": 0.3385383767789251, "learning_rate": 0.0002934979871499707, "loss": 3.1421968936920166, "step": 4755, "token_acc": 0.28173475794912967 }, { "epoch": 2.788038698328936, "grad_norm": 0.3466329116364996, "learning_rate": 0.0002934937525455369, "loss": 3.175919532775879, "step": 4756, "token_acc": 0.2766175439677685 }, { "epoch": 2.788625036646145, "grad_norm": 0.32674806265435374, "learning_rate": 0.00029348951659317267, "loss": 3.128875255584717, "step": 4757, "token_acc": 0.28340863858856435 }, { "epoch": 2.7892113749633536, "grad_norm": 0.3008083369200603, "learning_rate": 0.00029348527929291775, "loss": 3.107044219970703, "step": 4758, "token_acc": 0.28552801101205677 }, { "epoch": 2.7897977132805627, "grad_norm": 0.25271317538884214, "learning_rate": 0.00029348104064481196, "loss": 3.1241354942321777, "step": 4759, "token_acc": 0.28352744812962216 }, { "epoch": 2.790384051597772, "grad_norm": 0.3112187424202964, "learning_rate": 0.0002934768006488952, "loss": 3.0990638732910156, "step": 4760, "token_acc": 0.2885161603682938 }, { "epoch": 2.790970389914981, "grad_norm": 0.2902737715054049, "learning_rate": 0.0002934725593052072, "loss": 3.0783305168151855, "step": 4761, "token_acc": 0.2897023372812074 }, { "epoch": 2.79155672823219, "grad_norm": 0.25972198582048506, "learning_rate": 0.0002934683166137878, "loss": 3.1035685539245605, "step": 4762, "token_acc": 0.2892015099860235 }, { "epoch": 2.792143066549399, "grad_norm": 0.3057722291314643, "learning_rate": 0.0002934640725746769, "loss": 3.105126142501831, "step": 4763, "token_acc": 0.28782926816411086 }, { "epoch": 2.7927294048666083, "grad_norm": 0.3463992417814066, "learning_rate": 0.00029345982718791445, "loss": 3.0963521003723145, "step": 4764, "token_acc": 0.2886228188744163 }, { "epoch": 2.793315743183817, "grad_norm": 0.3477265486662584, "learning_rate": 0.0002934555804535402, "loss": 3.1557483673095703, "step": 4765, "token_acc": 0.28115957432674804 }, { "epoch": 2.793902081501026, "grad_norm": 0.3340291466908199, "learning_rate": 0.000293451332371594, "loss": 3.1081161499023438, "step": 4766, "token_acc": 0.2871892972932686 }, { "epoch": 2.794488419818235, "grad_norm": 0.2779540440360137, "learning_rate": 0.0002934470829421159, "loss": 3.1169052124023438, "step": 4767, "token_acc": 0.28599897630164284 }, { "epoch": 2.7950747581354443, "grad_norm": 0.3122197730224559, "learning_rate": 0.00029344283216514575, "loss": 3.1311275959014893, "step": 4768, "token_acc": 0.2826014770121008 }, { "epoch": 2.795661096452653, "grad_norm": 0.30956000606854917, "learning_rate": 0.0002934385800407235, "loss": 3.1180009841918945, "step": 4769, "token_acc": 0.28413777903718396 }, { "epoch": 2.796247434769862, "grad_norm": 0.3233400791004117, "learning_rate": 0.00029343432656888903, "loss": 3.1452269554138184, "step": 4770, "token_acc": 0.28249778293572153 }, { "epoch": 2.796833773087071, "grad_norm": 0.2903950503319801, "learning_rate": 0.0002934300717496824, "loss": 3.1585071086883545, "step": 4771, "token_acc": 0.27954393943785555 }, { "epoch": 2.7974201114042803, "grad_norm": 0.28406839651697097, "learning_rate": 0.0002934258155831435, "loss": 3.1356396675109863, "step": 4772, "token_acc": 0.2832308692734767 }, { "epoch": 2.7980064497214894, "grad_norm": 0.3049935977384799, "learning_rate": 0.00029342155806931226, "loss": 3.1156716346740723, "step": 4773, "token_acc": 0.286786067069376 }, { "epoch": 2.7985927880386985, "grad_norm": 0.29805189566895046, "learning_rate": 0.0002934172992082288, "loss": 3.1358954906463623, "step": 4774, "token_acc": 0.28252401427854723 }, { "epoch": 2.7991791263559076, "grad_norm": 0.2780179268006769, "learning_rate": 0.00029341303899993313, "loss": 3.1307790279388428, "step": 4775, "token_acc": 0.28324462258193395 }, { "epoch": 2.7997654646731163, "grad_norm": 0.25264320977355403, "learning_rate": 0.00029340877744446514, "loss": 3.109707832336426, "step": 4776, "token_acc": 0.2857253570999078 }, { "epoch": 2.8003518029903254, "grad_norm": 0.2633520661089611, "learning_rate": 0.0002934045145418649, "loss": 3.1307430267333984, "step": 4777, "token_acc": 0.283065705588884 }, { "epoch": 2.8009381413075345, "grad_norm": 0.30291166318612983, "learning_rate": 0.00029340025029217254, "loss": 3.1443891525268555, "step": 4778, "token_acc": 0.28039302540973704 }, { "epoch": 2.8015244796247436, "grad_norm": 0.27249199423193987, "learning_rate": 0.00029339598469542807, "loss": 3.100367546081543, "step": 4779, "token_acc": 0.28946103819826363 }, { "epoch": 2.8021108179419523, "grad_norm": 0.3247226014044503, "learning_rate": 0.0002933917177516715, "loss": 3.148219108581543, "step": 4780, "token_acc": 0.28245314892461926 }, { "epoch": 2.8026971562591614, "grad_norm": 0.32783183596603255, "learning_rate": 0.00029338744946094306, "loss": 3.1216397285461426, "step": 4781, "token_acc": 0.28523994568546046 }, { "epoch": 2.8032834945763705, "grad_norm": 0.27548878007703953, "learning_rate": 0.00029338317982328265, "loss": 3.0968270301818848, "step": 4782, "token_acc": 0.2869531156777397 }, { "epoch": 2.8038698328935796, "grad_norm": 0.31312100757531597, "learning_rate": 0.0002933789088387306, "loss": 3.1257262229919434, "step": 4783, "token_acc": 0.2835693752142838 }, { "epoch": 2.8044561712107887, "grad_norm": 0.2941055296799855, "learning_rate": 0.00029337463650732677, "loss": 3.1181437969207764, "step": 4784, "token_acc": 0.28493374896379303 }, { "epoch": 2.805042509527998, "grad_norm": 0.2901696878023032, "learning_rate": 0.0002933703628291115, "loss": 3.1176421642303467, "step": 4785, "token_acc": 0.2856336055203573 }, { "epoch": 2.805628847845207, "grad_norm": 0.3111190769110065, "learning_rate": 0.00029336608780412485, "loss": 3.090714454650879, "step": 4786, "token_acc": 0.2897794207255149 }, { "epoch": 2.8062151861624156, "grad_norm": 0.3001351590770675, "learning_rate": 0.00029336181143240696, "loss": 3.086665153503418, "step": 4787, "token_acc": 0.2895817036895571 }, { "epoch": 2.8068015244796247, "grad_norm": 0.2707266552105435, "learning_rate": 0.0002933575337139981, "loss": 3.123156785964966, "step": 4788, "token_acc": 0.2847980163348326 }, { "epoch": 2.807387862796834, "grad_norm": 0.30095325177062243, "learning_rate": 0.0002933532546489383, "loss": 3.124725341796875, "step": 4789, "token_acc": 0.28469981777954617 }, { "epoch": 2.807974201114043, "grad_norm": 0.3459558116493978, "learning_rate": 0.000293348974237268, "loss": 3.120318651199341, "step": 4790, "token_acc": 0.28479895492329105 }, { "epoch": 2.8085605394312516, "grad_norm": 0.3167117940720076, "learning_rate": 0.0002933446924790271, "loss": 3.1116929054260254, "step": 4791, "token_acc": 0.28677492521273684 }, { "epoch": 2.8091468777484607, "grad_norm": 0.2635443532595926, "learning_rate": 0.000293340409374256, "loss": 3.1197524070739746, "step": 4792, "token_acc": 0.2830235439900867 }, { "epoch": 2.80973321606567, "grad_norm": 0.26755544258369857, "learning_rate": 0.00029333612492299496, "loss": 3.120893716812134, "step": 4793, "token_acc": 0.2855528925686526 }, { "epoch": 2.810319554382879, "grad_norm": 0.2915483329710046, "learning_rate": 0.0002933318391252841, "loss": 3.1189095973968506, "step": 4794, "token_acc": 0.2845259044267203 }, { "epoch": 2.810905892700088, "grad_norm": 0.30296234677748024, "learning_rate": 0.0002933275519811638, "loss": 3.1456851959228516, "step": 4795, "token_acc": 0.28068083675356736 }, { "epoch": 2.811492231017297, "grad_norm": 0.27627042563219706, "learning_rate": 0.00029332326349067433, "loss": 3.095336437225342, "step": 4796, "token_acc": 0.28799177033443357 }, { "epoch": 2.8120785693345063, "grad_norm": 0.2902708852995741, "learning_rate": 0.0002933189736538559, "loss": 3.0547122955322266, "step": 4797, "token_acc": 0.2945795135364472 }, { "epoch": 2.812664907651715, "grad_norm": 0.3061769651257533, "learning_rate": 0.0002933146824707488, "loss": 3.1480655670166016, "step": 4798, "token_acc": 0.28096164514137933 }, { "epoch": 2.813251245968924, "grad_norm": 0.25348439848553617, "learning_rate": 0.0002933103899413934, "loss": 3.12548828125, "step": 4799, "token_acc": 0.28507425277273013 }, { "epoch": 2.813837584286133, "grad_norm": 0.27607788125427285, "learning_rate": 0.00029330609606583, "loss": 3.1160919666290283, "step": 4800, "token_acc": 0.28696695038225806 }, { "epoch": 2.8144239226033423, "grad_norm": 0.2884032026910037, "learning_rate": 0.0002933018008440989, "loss": 3.125800609588623, "step": 4801, "token_acc": 0.28430531841534074 }, { "epoch": 2.815010260920551, "grad_norm": 0.3033818339819906, "learning_rate": 0.00029329750427624054, "loss": 3.0998997688293457, "step": 4802, "token_acc": 0.2864337751157555 }, { "epoch": 2.81559659923776, "grad_norm": 0.2818880447259213, "learning_rate": 0.00029329320636229517, "loss": 3.116429328918457, "step": 4803, "token_acc": 0.2863871521777834 }, { "epoch": 2.816182937554969, "grad_norm": 0.3005505760779356, "learning_rate": 0.00029328890710230327, "loss": 3.072636604309082, "step": 4804, "token_acc": 0.29073063659580667 }, { "epoch": 2.8167692758721783, "grad_norm": 0.3096254094953349, "learning_rate": 0.00029328460649630516, "loss": 3.088721752166748, "step": 4805, "token_acc": 0.2900315601511565 }, { "epoch": 2.8173556141893874, "grad_norm": 0.3229874730198362, "learning_rate": 0.0002932803045443412, "loss": 3.1045455932617188, "step": 4806, "token_acc": 0.286118006886878 }, { "epoch": 2.8179419525065965, "grad_norm": 0.32704077078975075, "learning_rate": 0.0002932760012464519, "loss": 3.1075010299682617, "step": 4807, "token_acc": 0.2876784284526766 }, { "epoch": 2.818528290823805, "grad_norm": 0.2995854010589223, "learning_rate": 0.0002932716966026776, "loss": 3.160248279571533, "step": 4808, "token_acc": 0.27959627337537923 }, { "epoch": 2.8191146291410143, "grad_norm": 0.2938102615170772, "learning_rate": 0.0002932673906130588, "loss": 3.0863037109375, "step": 4809, "token_acc": 0.2891724337175678 }, { "epoch": 2.8197009674582234, "grad_norm": 0.3118335607623899, "learning_rate": 0.0002932630832776359, "loss": 3.0921542644500732, "step": 4810, "token_acc": 0.28992817631783746 }, { "epoch": 2.8202873057754325, "grad_norm": 0.3094380873333429, "learning_rate": 0.00029325877459644944, "loss": 3.1427063941955566, "step": 4811, "token_acc": 0.28129413438189005 }, { "epoch": 2.820873644092641, "grad_norm": 0.2870615448386523, "learning_rate": 0.0002932544645695398, "loss": 3.13639497756958, "step": 4812, "token_acc": 0.28281617872187675 }, { "epoch": 2.8214599824098503, "grad_norm": 0.34447847084531236, "learning_rate": 0.00029325015319694753, "loss": 3.123680591583252, "step": 4813, "token_acc": 0.28504291601681525 }, { "epoch": 2.8220463207270594, "grad_norm": 0.3116127655316353, "learning_rate": 0.000293245840478713, "loss": 3.1213512420654297, "step": 4814, "token_acc": 0.28631178804491025 }, { "epoch": 2.8226326590442685, "grad_norm": 0.25576737437872527, "learning_rate": 0.00029324152641487693, "loss": 3.138035297393799, "step": 4815, "token_acc": 0.2847776384496868 }, { "epoch": 2.8232189973614776, "grad_norm": 0.25787697324500075, "learning_rate": 0.0002932372110054797, "loss": 3.12288498878479, "step": 4816, "token_acc": 0.2847317122186495 }, { "epoch": 2.8238053356786867, "grad_norm": 0.2992994459623034, "learning_rate": 0.0002932328942505619, "loss": 3.1149678230285645, "step": 4817, "token_acc": 0.2863905972393057 }, { "epoch": 2.824391673995896, "grad_norm": 0.2731976209267566, "learning_rate": 0.00029322857615016407, "loss": 3.118638753890991, "step": 4818, "token_acc": 0.2851220271218267 }, { "epoch": 2.8249780123131045, "grad_norm": 0.2523618647440558, "learning_rate": 0.00029322425670432676, "loss": 3.1266844272613525, "step": 4819, "token_acc": 0.28406320305551197 }, { "epoch": 2.8255643506303136, "grad_norm": 0.2741901247567916, "learning_rate": 0.0002932199359130905, "loss": 3.121201992034912, "step": 4820, "token_acc": 0.2859980293522792 }, { "epoch": 2.8261506889475227, "grad_norm": 0.2512626865801087, "learning_rate": 0.00029321561377649604, "loss": 3.0935726165771484, "step": 4821, "token_acc": 0.2865146230211967 }, { "epoch": 2.826737027264732, "grad_norm": 0.27293136031161014, "learning_rate": 0.0002932112902945838, "loss": 3.071885585784912, "step": 4822, "token_acc": 0.2919371286632777 }, { "epoch": 2.8273233655819405, "grad_norm": 0.3274905247227663, "learning_rate": 0.0002932069654673945, "loss": 3.0848751068115234, "step": 4823, "token_acc": 0.2890657331818529 }, { "epoch": 2.8279097038991496, "grad_norm": 0.2800857854942314, "learning_rate": 0.00029320263929496874, "loss": 3.117919445037842, "step": 4824, "token_acc": 0.28667495596311265 }, { "epoch": 2.8284960422163588, "grad_norm": 0.29551091979071137, "learning_rate": 0.0002931983117773471, "loss": 3.14457106590271, "step": 4825, "token_acc": 0.2818612118657492 }, { "epoch": 2.829082380533568, "grad_norm": 0.2992764607681541, "learning_rate": 0.00029319398291457034, "loss": 3.126473903656006, "step": 4826, "token_acc": 0.28361635593555795 }, { "epoch": 2.829668718850777, "grad_norm": 0.27993474524943124, "learning_rate": 0.00029318965270667903, "loss": 3.081585645675659, "step": 4827, "token_acc": 0.2912369465640949 }, { "epoch": 2.830255057167986, "grad_norm": 0.28834165412562, "learning_rate": 0.0002931853211537139, "loss": 3.1001791954040527, "step": 4828, "token_acc": 0.28575067024128686 }, { "epoch": 2.830841395485195, "grad_norm": 0.30523317654819143, "learning_rate": 0.00029318098825571563, "loss": 3.1548798084259033, "step": 4829, "token_acc": 0.2802185968180981 }, { "epoch": 2.831427733802404, "grad_norm": 0.26778805689058516, "learning_rate": 0.0002931766540127249, "loss": 3.068540096282959, "step": 4830, "token_acc": 0.292444714336319 }, { "epoch": 2.832014072119613, "grad_norm": 0.26983028423714406, "learning_rate": 0.00029317231842478244, "loss": 3.1479101181030273, "step": 4831, "token_acc": 0.2824498268030677 }, { "epoch": 2.832600410436822, "grad_norm": 0.29022877944336695, "learning_rate": 0.00029316798149192896, "loss": 3.0813260078430176, "step": 4832, "token_acc": 0.29194514465437893 }, { "epoch": 2.833186748754031, "grad_norm": 0.2827107637912602, "learning_rate": 0.00029316364321420524, "loss": 3.094301223754883, "step": 4833, "token_acc": 0.28942929357174063 }, { "epoch": 2.83377308707124, "grad_norm": 0.2914796846949472, "learning_rate": 0.000293159303591652, "loss": 3.1143198013305664, "step": 4834, "token_acc": 0.28593511842347064 }, { "epoch": 2.834359425388449, "grad_norm": 0.3298816808976584, "learning_rate": 0.00029315496262431, "loss": 3.13633394241333, "step": 4835, "token_acc": 0.2843331502574184 }, { "epoch": 2.834945763705658, "grad_norm": 0.27119629383261534, "learning_rate": 0.0002931506203122201, "loss": 3.0696592330932617, "step": 4836, "token_acc": 0.29377861143539225 }, { "epoch": 2.835532102022867, "grad_norm": 0.30175032629848947, "learning_rate": 0.00029314627665542295, "loss": 3.1369216442108154, "step": 4837, "token_acc": 0.28312175486753405 }, { "epoch": 2.8361184403400763, "grad_norm": 0.302949311866685, "learning_rate": 0.00029314193165395946, "loss": 3.1368658542633057, "step": 4838, "token_acc": 0.28322248389016846 }, { "epoch": 2.8367047786572854, "grad_norm": 0.30883963838799494, "learning_rate": 0.0002931375853078703, "loss": 3.1040472984313965, "step": 4839, "token_acc": 0.2867527979782351 }, { "epoch": 2.8372911169744945, "grad_norm": 0.2827985693863901, "learning_rate": 0.00029313323761719654, "loss": 3.1218366622924805, "step": 4840, "token_acc": 0.2836967814379112 }, { "epoch": 2.837877455291703, "grad_norm": 0.2681569708886454, "learning_rate": 0.00029312888858197886, "loss": 3.1114094257354736, "step": 4841, "token_acc": 0.28511330755505787 }, { "epoch": 2.8384637936089123, "grad_norm": 0.3264772174141351, "learning_rate": 0.0002931245382022581, "loss": 3.1159815788269043, "step": 4842, "token_acc": 0.28625545539829506 }, { "epoch": 2.8390501319261214, "grad_norm": 0.319322879288859, "learning_rate": 0.0002931201864780752, "loss": 3.1065726280212402, "step": 4843, "token_acc": 0.285871300141679 }, { "epoch": 2.8396364702433305, "grad_norm": 0.2650891938940965, "learning_rate": 0.000293115833409471, "loss": 3.0474016666412354, "step": 4844, "token_acc": 0.295455126206745 }, { "epoch": 2.840222808560539, "grad_norm": 0.3119062335788396, "learning_rate": 0.00029311147899648633, "loss": 3.1173930168151855, "step": 4845, "token_acc": 0.28416118143890845 }, { "epoch": 2.8408091468777483, "grad_norm": 0.28850440755766965, "learning_rate": 0.0002931071232391623, "loss": 3.1064767837524414, "step": 4846, "token_acc": 0.28648632904554866 }, { "epoch": 2.8413954851949574, "grad_norm": 0.34849386978601515, "learning_rate": 0.00029310276613753953, "loss": 3.077314853668213, "step": 4847, "token_acc": 0.2921700015028788 }, { "epoch": 2.8419818235121665, "grad_norm": 0.31798818534025625, "learning_rate": 0.0002930984076916592, "loss": 3.1515605449676514, "step": 4848, "token_acc": 0.2810243967207225 }, { "epoch": 2.8425681618293757, "grad_norm": 0.29246374304269834, "learning_rate": 0.00029309404790156215, "loss": 3.1486268043518066, "step": 4849, "token_acc": 0.2804683665229301 }, { "epoch": 2.8431545001465848, "grad_norm": 0.3309723062237501, "learning_rate": 0.0002930896867672893, "loss": 3.158156394958496, "step": 4850, "token_acc": 0.27954230235783634 }, { "epoch": 2.843740838463794, "grad_norm": 0.3424735959826253, "learning_rate": 0.00029308532428888167, "loss": 3.109546184539795, "step": 4851, "token_acc": 0.28682783827206987 }, { "epoch": 2.8443271767810026, "grad_norm": 0.3242010601750907, "learning_rate": 0.0002930809604663803, "loss": 3.1344964504241943, "step": 4852, "token_acc": 0.28407193895159333 }, { "epoch": 2.8449135150982117, "grad_norm": 0.2900361692375067, "learning_rate": 0.000293076595299826, "loss": 3.083594560623169, "step": 4853, "token_acc": 0.29146301464143803 }, { "epoch": 2.8454998534154208, "grad_norm": 0.31932065749919647, "learning_rate": 0.00029307222878925996, "loss": 3.1174509525299072, "step": 4854, "token_acc": 0.28473392024594435 }, { "epoch": 2.84608619173263, "grad_norm": 0.31590827138962746, "learning_rate": 0.0002930678609347231, "loss": 3.141587972640991, "step": 4855, "token_acc": 0.2844984834387304 }, { "epoch": 2.8466725300498386, "grad_norm": 0.27427109296086205, "learning_rate": 0.00029306349173625646, "loss": 3.107347011566162, "step": 4856, "token_acc": 0.28620114738818214 }, { "epoch": 2.8472588683670477, "grad_norm": 0.3118988287136445, "learning_rate": 0.00029305912119390113, "loss": 3.106328248977661, "step": 4857, "token_acc": 0.286497204558962 }, { "epoch": 2.847845206684257, "grad_norm": 0.318315578909619, "learning_rate": 0.00029305474930769814, "loss": 3.148857593536377, "step": 4858, "token_acc": 0.2800166847672653 }, { "epoch": 2.848431545001466, "grad_norm": 0.298690396112282, "learning_rate": 0.00029305037607768846, "loss": 3.1197948455810547, "step": 4859, "token_acc": 0.28621684219726434 }, { "epoch": 2.849017883318675, "grad_norm": 0.29569161439209757, "learning_rate": 0.00029304600150391335, "loss": 3.128751277923584, "step": 4860, "token_acc": 0.286149257280512 }, { "epoch": 2.849604221635884, "grad_norm": 0.2940780428381767, "learning_rate": 0.00029304162558641374, "loss": 3.104583740234375, "step": 4861, "token_acc": 0.28515424587252514 }, { "epoch": 2.850190559953093, "grad_norm": 0.30763720723307464, "learning_rate": 0.0002930372483252309, "loss": 3.1395905017852783, "step": 4862, "token_acc": 0.2815394177975759 }, { "epoch": 2.850776898270302, "grad_norm": 0.2969169871594266, "learning_rate": 0.00029303286972040576, "loss": 3.1214871406555176, "step": 4863, "token_acc": 0.2862974108393171 }, { "epoch": 2.851363236587511, "grad_norm": 0.312502708456956, "learning_rate": 0.0002930284897719796, "loss": 3.07090163230896, "step": 4864, "token_acc": 0.29187167578669854 }, { "epoch": 2.85194957490472, "grad_norm": 0.29267313294868036, "learning_rate": 0.00029302410847999356, "loss": 3.1061668395996094, "step": 4865, "token_acc": 0.2863634445581425 }, { "epoch": 2.852535913221929, "grad_norm": 0.24357207731341954, "learning_rate": 0.0002930197258444887, "loss": 3.1159253120422363, "step": 4866, "token_acc": 0.28773236923753065 }, { "epoch": 2.853122251539138, "grad_norm": 0.2783362557811744, "learning_rate": 0.0002930153418655062, "loss": 3.059241533279419, "step": 4867, "token_acc": 0.2928322504190838 }, { "epoch": 2.853708589856347, "grad_norm": 0.2838960647409691, "learning_rate": 0.0002930109565430873, "loss": 3.144043445587158, "step": 4868, "token_acc": 0.2822934491125016 }, { "epoch": 2.854294928173556, "grad_norm": 0.2565551470454247, "learning_rate": 0.0002930065698772732, "loss": 3.130599021911621, "step": 4869, "token_acc": 0.283473805006662 }, { "epoch": 2.8548812664907652, "grad_norm": 0.2591378229642998, "learning_rate": 0.00029300218186810505, "loss": 3.1789751052856445, "step": 4870, "token_acc": 0.2761540380081097 }, { "epoch": 2.8554676048079743, "grad_norm": 0.2521603663592778, "learning_rate": 0.0002929977925156241, "loss": 3.1506099700927734, "step": 4871, "token_acc": 0.2807923634991304 }, { "epoch": 2.8560539431251835, "grad_norm": 0.2964112174558061, "learning_rate": 0.0002929934018198716, "loss": 3.0899569988250732, "step": 4872, "token_acc": 0.287917724771943 }, { "epoch": 2.856640281442392, "grad_norm": 0.3105701653327218, "learning_rate": 0.0002929890097808888, "loss": 3.1177895069122314, "step": 4873, "token_acc": 0.28626774354034573 }, { "epoch": 2.8572266197596012, "grad_norm": 0.2950936350932301, "learning_rate": 0.0002929846163987169, "loss": 3.09696102142334, "step": 4874, "token_acc": 0.2871042463205912 }, { "epoch": 2.8578129580768104, "grad_norm": 0.32728913303465995, "learning_rate": 0.00029298022167339717, "loss": 3.1100754737854004, "step": 4875, "token_acc": 0.2867621918182918 }, { "epoch": 2.8583992963940195, "grad_norm": 0.28055158717238665, "learning_rate": 0.000292975825604971, "loss": 3.1094837188720703, "step": 4876, "token_acc": 0.2866343581867925 }, { "epoch": 2.858985634711228, "grad_norm": 0.28321546417950155, "learning_rate": 0.0002929714281934796, "loss": 3.133733034133911, "step": 4877, "token_acc": 0.2830215885576091 }, { "epoch": 2.8595719730284372, "grad_norm": 0.3164221953206162, "learning_rate": 0.0002929670294389643, "loss": 3.140878200531006, "step": 4878, "token_acc": 0.2818786657806192 }, { "epoch": 2.8601583113456464, "grad_norm": 0.32065432651004266, "learning_rate": 0.00029296262934146633, "loss": 3.1077091693878174, "step": 4879, "token_acc": 0.28559167609557956 }, { "epoch": 2.8607446496628555, "grad_norm": 0.2732841849190749, "learning_rate": 0.0002929582279010271, "loss": 3.107861042022705, "step": 4880, "token_acc": 0.2858103313039256 }, { "epoch": 2.8613309879800646, "grad_norm": 0.3052649279221656, "learning_rate": 0.000292953825117688, "loss": 3.1658401489257812, "step": 4881, "token_acc": 0.2781068206727932 }, { "epoch": 2.8619173262972737, "grad_norm": 0.2970572470124204, "learning_rate": 0.0002929494209914904, "loss": 3.118779182434082, "step": 4882, "token_acc": 0.2856068330795206 }, { "epoch": 2.862503664614483, "grad_norm": 0.31198031160947043, "learning_rate": 0.0002929450155224756, "loss": 3.1081762313842773, "step": 4883, "token_acc": 0.28829772360410283 }, { "epoch": 2.8630900029316915, "grad_norm": 0.31601388176329565, "learning_rate": 0.0002929406087106849, "loss": 3.1330833435058594, "step": 4884, "token_acc": 0.28466973679439295 }, { "epoch": 2.8636763412489006, "grad_norm": 0.2830113195935163, "learning_rate": 0.0002929362005561599, "loss": 3.100548505783081, "step": 4885, "token_acc": 0.2873959428923845 }, { "epoch": 2.8642626795661097, "grad_norm": 0.29921316637741135, "learning_rate": 0.00029293179105894184, "loss": 3.125476837158203, "step": 4886, "token_acc": 0.2838808884679541 }, { "epoch": 2.864849017883319, "grad_norm": 0.3335859726263728, "learning_rate": 0.0002929273802190722, "loss": 3.0884318351745605, "step": 4887, "token_acc": 0.2890766528081912 }, { "epoch": 2.8654353562005275, "grad_norm": 0.3089060955370666, "learning_rate": 0.00029292296803659244, "loss": 3.101276397705078, "step": 4888, "token_acc": 0.28810263558303095 }, { "epoch": 2.8660216945177366, "grad_norm": 0.30047636215590856, "learning_rate": 0.000292918554511544, "loss": 3.0985074043273926, "step": 4889, "token_acc": 0.28903013283218726 }, { "epoch": 2.8666080328349457, "grad_norm": 0.3134857563913906, "learning_rate": 0.00029291413964396834, "loss": 3.1573214530944824, "step": 4890, "token_acc": 0.2812182511341575 }, { "epoch": 2.867194371152155, "grad_norm": 0.2629266766148198, "learning_rate": 0.0002929097234339069, "loss": 3.0910210609436035, "step": 4891, "token_acc": 0.29000242822527733 }, { "epoch": 2.867780709469364, "grad_norm": 0.2915704342228827, "learning_rate": 0.0002929053058814012, "loss": 3.0773186683654785, "step": 4892, "token_acc": 0.29068148750147466 }, { "epoch": 2.868367047786573, "grad_norm": 0.3076166870050994, "learning_rate": 0.0002929008869864927, "loss": 3.143329620361328, "step": 4893, "token_acc": 0.28330985753470883 }, { "epoch": 2.868953386103782, "grad_norm": 0.27639408230821655, "learning_rate": 0.0002928964667492229, "loss": 3.1065673828125, "step": 4894, "token_acc": 0.2855755184027919 }, { "epoch": 2.869539724420991, "grad_norm": 0.2946722825662996, "learning_rate": 0.0002928920451696334, "loss": 3.151127338409424, "step": 4895, "token_acc": 0.28159459273053317 }, { "epoch": 2.8701260627382, "grad_norm": 0.2798906964861311, "learning_rate": 0.00029288762224776566, "loss": 3.058100938796997, "step": 4896, "token_acc": 0.2925107516570819 }, { "epoch": 2.870712401055409, "grad_norm": 0.3006552208621793, "learning_rate": 0.0002928831979836612, "loss": 3.095743179321289, "step": 4897, "token_acc": 0.29008229617252523 }, { "epoch": 2.871298739372618, "grad_norm": 0.2940403268361965, "learning_rate": 0.00029287877237736177, "loss": 3.1036224365234375, "step": 4898, "token_acc": 0.2869403492618208 }, { "epoch": 2.871885077689827, "grad_norm": 0.32453748369769375, "learning_rate": 0.00029287434542890866, "loss": 3.117668867111206, "step": 4899, "token_acc": 0.28508433106306025 }, { "epoch": 2.872471416007036, "grad_norm": 0.31176107990279756, "learning_rate": 0.0002928699171383437, "loss": 3.1080455780029297, "step": 4900, "token_acc": 0.28614597007088477 }, { "epoch": 2.873057754324245, "grad_norm": 0.30313176922850993, "learning_rate": 0.00029286548750570834, "loss": 3.0947012901306152, "step": 4901, "token_acc": 0.289301459440749 }, { "epoch": 2.873644092641454, "grad_norm": 0.2965457736440195, "learning_rate": 0.0002928610565310442, "loss": 3.1441471576690674, "step": 4902, "token_acc": 0.28153570012101736 }, { "epoch": 2.8742304309586633, "grad_norm": 0.31130153828995155, "learning_rate": 0.00029285662421439304, "loss": 3.0831656455993652, "step": 4903, "token_acc": 0.28918876510277736 }, { "epoch": 2.8748167692758724, "grad_norm": 0.31569128621032455, "learning_rate": 0.00029285219055579637, "loss": 3.1388185024261475, "step": 4904, "token_acc": 0.2811086659383922 }, { "epoch": 2.875403107593081, "grad_norm": 0.30562116025444946, "learning_rate": 0.0002928477555552958, "loss": 3.1299424171447754, "step": 4905, "token_acc": 0.2831447828456722 }, { "epoch": 2.87598944591029, "grad_norm": 0.3040373356176108, "learning_rate": 0.00029284331921293315, "loss": 3.138782024383545, "step": 4906, "token_acc": 0.2822918825836695 }, { "epoch": 2.8765757842274993, "grad_norm": 0.27788813694937736, "learning_rate": 0.0002928388815287499, "loss": 3.1150121688842773, "step": 4907, "token_acc": 0.28705276437461796 }, { "epoch": 2.8771621225447084, "grad_norm": 0.2913365583270703, "learning_rate": 0.0002928344425027879, "loss": 3.093143939971924, "step": 4908, "token_acc": 0.2889180873095509 }, { "epoch": 2.8777484608619175, "grad_norm": 0.29617297110367347, "learning_rate": 0.00029283000213508876, "loss": 3.1062145233154297, "step": 4909, "token_acc": 0.2859172128492038 }, { "epoch": 2.878334799179126, "grad_norm": 0.27817404090336395, "learning_rate": 0.00029282556042569427, "loss": 3.0848326683044434, "step": 4910, "token_acc": 0.28965601502802485 }, { "epoch": 2.8789211374963353, "grad_norm": 0.2909815980452108, "learning_rate": 0.00029282111737464605, "loss": 3.1346516609191895, "step": 4911, "token_acc": 0.28332517668355883 }, { "epoch": 2.8795074758135444, "grad_norm": 0.30011979876552053, "learning_rate": 0.0002928166729819859, "loss": 3.1066339015960693, "step": 4912, "token_acc": 0.28799454261054014 }, { "epoch": 2.8800938141307535, "grad_norm": 0.30858597954703165, "learning_rate": 0.00029281222724775554, "loss": 3.1208858489990234, "step": 4913, "token_acc": 0.28421937001563974 }, { "epoch": 2.8806801524479626, "grad_norm": 0.3048810946956163, "learning_rate": 0.00029280778017199674, "loss": 3.1051111221313477, "step": 4914, "token_acc": 0.2843144238107352 }, { "epoch": 2.8812664907651717, "grad_norm": 0.2503148079871603, "learning_rate": 0.0002928033317547513, "loss": 3.0708112716674805, "step": 4915, "token_acc": 0.2932530108032425 }, { "epoch": 2.8818528290823804, "grad_norm": 0.3278497290402753, "learning_rate": 0.00029279888199606097, "loss": 3.116889238357544, "step": 4916, "token_acc": 0.2858832006207623 }, { "epoch": 2.8824391673995895, "grad_norm": 0.3178528102144469, "learning_rate": 0.0002927944308959676, "loss": 3.134352207183838, "step": 4917, "token_acc": 0.2837621180631753 }, { "epoch": 2.8830255057167986, "grad_norm": 0.26289329138763, "learning_rate": 0.0002927899784545129, "loss": 3.1432456970214844, "step": 4918, "token_acc": 0.28338165052924186 }, { "epoch": 2.8836118440340077, "grad_norm": 0.2735276987729094, "learning_rate": 0.00029278552467173883, "loss": 3.1087403297424316, "step": 4919, "token_acc": 0.28726494740485153 }, { "epoch": 2.8841981823512164, "grad_norm": 0.2562728647980269, "learning_rate": 0.00029278106954768715, "loss": 3.088632583618164, "step": 4920, "token_acc": 0.28892174418946004 }, { "epoch": 2.8847845206684255, "grad_norm": 0.2840800829442021, "learning_rate": 0.00029277661308239975, "loss": 3.1328256130218506, "step": 4921, "token_acc": 0.2862577270565858 }, { "epoch": 2.8853708589856346, "grad_norm": 0.24464763726085684, "learning_rate": 0.00029277215527591843, "loss": 3.0734317302703857, "step": 4922, "token_acc": 0.2924629215942844 }, { "epoch": 2.8859571973028437, "grad_norm": 0.2597953911110076, "learning_rate": 0.0002927676961282851, "loss": 3.1095104217529297, "step": 4923, "token_acc": 0.2863353310608518 }, { "epoch": 2.886543535620053, "grad_norm": 0.2426772338052482, "learning_rate": 0.0002927632356395416, "loss": 3.0974559783935547, "step": 4924, "token_acc": 0.28842860201241705 }, { "epoch": 2.887129873937262, "grad_norm": 0.26898672966739984, "learning_rate": 0.00029275877380972995, "loss": 3.0966944694519043, "step": 4925, "token_acc": 0.28738041862090247 }, { "epoch": 2.887716212254471, "grad_norm": 0.28125507556368123, "learning_rate": 0.00029275431063889194, "loss": 3.1200003623962402, "step": 4926, "token_acc": 0.2860851974365373 }, { "epoch": 2.8883025505716797, "grad_norm": 0.26108841549529993, "learning_rate": 0.0002927498461270696, "loss": 3.1195297241210938, "step": 4927, "token_acc": 0.28657675552504575 }, { "epoch": 2.888888888888889, "grad_norm": 0.26447303883420087, "learning_rate": 0.0002927453802743048, "loss": 3.100036382675171, "step": 4928, "token_acc": 0.28860276486705777 }, { "epoch": 2.889475227206098, "grad_norm": 0.30022872527426225, "learning_rate": 0.00029274091308063946, "loss": 3.0976741313934326, "step": 4929, "token_acc": 0.2867228712944708 }, { "epoch": 2.890061565523307, "grad_norm": 0.38994051818640874, "learning_rate": 0.0002927364445461156, "loss": 3.0919029712677, "step": 4930, "token_acc": 0.2888243114217998 }, { "epoch": 2.8906479038405157, "grad_norm": 0.33524114510752634, "learning_rate": 0.0002927319746707752, "loss": 3.1277573108673096, "step": 4931, "token_acc": 0.28400379177292084 }, { "epoch": 2.891234242157725, "grad_norm": 0.33577874867564966, "learning_rate": 0.00029272750345466024, "loss": 3.1251015663146973, "step": 4932, "token_acc": 0.283281615820988 }, { "epoch": 2.891820580474934, "grad_norm": 0.317200780972459, "learning_rate": 0.0002927230308978127, "loss": 3.088057518005371, "step": 4933, "token_acc": 0.29048793931599254 }, { "epoch": 2.892406918792143, "grad_norm": 0.26743418629055843, "learning_rate": 0.0002927185570002746, "loss": 3.1403372287750244, "step": 4934, "token_acc": 0.2835100154402545 }, { "epoch": 2.892993257109352, "grad_norm": 0.30650204487135085, "learning_rate": 0.000292714081762088, "loss": 3.112123966217041, "step": 4935, "token_acc": 0.28606729166331657 }, { "epoch": 2.8935795954265613, "grad_norm": 0.2642426748034353, "learning_rate": 0.0002927096051832949, "loss": 3.1048831939697266, "step": 4936, "token_acc": 0.28852212297933566 }, { "epoch": 2.8941659337437704, "grad_norm": 0.2721247602872461, "learning_rate": 0.00029270512726393733, "loss": 3.145318031311035, "step": 4937, "token_acc": 0.2815633919241387 }, { "epoch": 2.894752272060979, "grad_norm": 0.32184859082025835, "learning_rate": 0.00029270064800405744, "loss": 3.104903221130371, "step": 4938, "token_acc": 0.2861115642453761 }, { "epoch": 2.895338610378188, "grad_norm": 0.2910367872232558, "learning_rate": 0.00029269616740369725, "loss": 3.131070613861084, "step": 4939, "token_acc": 0.2843429870593367 }, { "epoch": 2.8959249486953973, "grad_norm": 0.2727329077051126, "learning_rate": 0.00029269168546289877, "loss": 3.078613758087158, "step": 4940, "token_acc": 0.29349566703911767 }, { "epoch": 2.8965112870126064, "grad_norm": 0.27612225416491676, "learning_rate": 0.0002926872021817043, "loss": 3.0929245948791504, "step": 4941, "token_acc": 0.28985038028565213 }, { "epoch": 2.897097625329815, "grad_norm": 0.26991637512739985, "learning_rate": 0.00029268271756015577, "loss": 3.121178388595581, "step": 4942, "token_acc": 0.285773841091929 }, { "epoch": 2.897683963647024, "grad_norm": 0.2866764291857337, "learning_rate": 0.0002926782315982954, "loss": 3.06008243560791, "step": 4943, "token_acc": 0.29359372775693177 }, { "epoch": 2.8982703019642333, "grad_norm": 0.25828128337205913, "learning_rate": 0.00029267374429616525, "loss": 3.1210179328918457, "step": 4944, "token_acc": 0.2872359198982266 }, { "epoch": 2.8988566402814424, "grad_norm": 0.2711878668036536, "learning_rate": 0.0002926692556538076, "loss": 3.124605655670166, "step": 4945, "token_acc": 0.2849804202282201 }, { "epoch": 2.8994429785986515, "grad_norm": 0.2801277060597001, "learning_rate": 0.0002926647656712645, "loss": 3.1145777702331543, "step": 4946, "token_acc": 0.2870258988515674 }, { "epoch": 2.9000293169158606, "grad_norm": 0.27688706534969365, "learning_rate": 0.0002926602743485782, "loss": 3.107100486755371, "step": 4947, "token_acc": 0.28582339104677096 }, { "epoch": 2.9006156552330697, "grad_norm": 0.3092188678568817, "learning_rate": 0.00029265578168579087, "loss": 3.0880823135375977, "step": 4948, "token_acc": 0.29001466968348294 }, { "epoch": 2.9012019935502784, "grad_norm": 0.30680723722334424, "learning_rate": 0.00029265128768294463, "loss": 3.162522315979004, "step": 4949, "token_acc": 0.27888485168791705 }, { "epoch": 2.9017883318674875, "grad_norm": 0.2786024667163245, "learning_rate": 0.0002926467923400818, "loss": 3.164005994796753, "step": 4950, "token_acc": 0.2801493339770458 }, { "epoch": 2.9023746701846966, "grad_norm": 0.29592536937037517, "learning_rate": 0.00029264229565724464, "loss": 3.1397287845611572, "step": 4951, "token_acc": 0.28402484680337126 }, { "epoch": 2.9029610085019057, "grad_norm": 0.2894062453717159, "learning_rate": 0.00029263779763447523, "loss": 3.1011850833892822, "step": 4952, "token_acc": 0.2874964825063315 }, { "epoch": 2.9035473468191144, "grad_norm": 0.2785809159898299, "learning_rate": 0.000292633298271816, "loss": 3.144193649291992, "step": 4953, "token_acc": 0.28225386417333237 }, { "epoch": 2.9041336851363235, "grad_norm": 0.29062543122106116, "learning_rate": 0.00029262879756930906, "loss": 3.0774881839752197, "step": 4954, "token_acc": 0.2922983673970577 }, { "epoch": 2.9047200234535326, "grad_norm": 0.33999937328734076, "learning_rate": 0.0002926242955269968, "loss": 3.132823944091797, "step": 4955, "token_acc": 0.2824032670784978 }, { "epoch": 2.9053063617707418, "grad_norm": 0.3348172070761862, "learning_rate": 0.0002926197921449215, "loss": 3.140523672103882, "step": 4956, "token_acc": 0.2835831732128086 }, { "epoch": 2.905892700087951, "grad_norm": 0.37377312262468865, "learning_rate": 0.00029261528742312537, "loss": 3.127758502960205, "step": 4957, "token_acc": 0.2835527008529009 }, { "epoch": 2.90647903840516, "grad_norm": 0.3119879299794423, "learning_rate": 0.00029261078136165084, "loss": 3.130155086517334, "step": 4958, "token_acc": 0.2832016281044804 }, { "epoch": 2.9070653767223686, "grad_norm": 0.27538770872274165, "learning_rate": 0.0002926062739605401, "loss": 3.1095874309539795, "step": 4959, "token_acc": 0.286663998282338 }, { "epoch": 2.9076517150395778, "grad_norm": 0.328417701159959, "learning_rate": 0.0002926017652198357, "loss": 3.1445393562316895, "step": 4960, "token_acc": 0.28257410691664553 }, { "epoch": 2.908238053356787, "grad_norm": 0.27324969970569907, "learning_rate": 0.00029259725513957984, "loss": 3.1246590614318848, "step": 4961, "token_acc": 0.2841856860561025 }, { "epoch": 2.908824391673996, "grad_norm": 0.3133821787387028, "learning_rate": 0.00029259274371981495, "loss": 3.1496641635894775, "step": 4962, "token_acc": 0.2809899605283491 }, { "epoch": 2.909410729991205, "grad_norm": 0.33152757421254747, "learning_rate": 0.0002925882309605833, "loss": 3.0999300479888916, "step": 4963, "token_acc": 0.2875515031707073 }, { "epoch": 2.9099970683084138, "grad_norm": 0.29249392061176943, "learning_rate": 0.0002925837168619274, "loss": 3.1073200702667236, "step": 4964, "token_acc": 0.2869334363561618 }, { "epoch": 2.910583406625623, "grad_norm": 0.34548501849537905, "learning_rate": 0.0002925792014238896, "loss": 3.110381603240967, "step": 4965, "token_acc": 0.2880349123831474 }, { "epoch": 2.911169744942832, "grad_norm": 0.2650746599302164, "learning_rate": 0.00029257468464651237, "loss": 3.109809398651123, "step": 4966, "token_acc": 0.28615309809006445 }, { "epoch": 2.911756083260041, "grad_norm": 0.29624180585907084, "learning_rate": 0.00029257016652983807, "loss": 3.0853514671325684, "step": 4967, "token_acc": 0.2868831126170857 }, { "epoch": 2.91234242157725, "grad_norm": 0.3126753364793521, "learning_rate": 0.00029256564707390916, "loss": 3.1212120056152344, "step": 4968, "token_acc": 0.2846807918198553 }, { "epoch": 2.9129287598944593, "grad_norm": 0.3143575070828216, "learning_rate": 0.0002925611262787681, "loss": 3.1368916034698486, "step": 4969, "token_acc": 0.28324516866477045 }, { "epoch": 2.913515098211668, "grad_norm": 0.29151622212375655, "learning_rate": 0.0002925566041444574, "loss": 3.078115463256836, "step": 4970, "token_acc": 0.29077776789870347 }, { "epoch": 2.914101436528877, "grad_norm": 0.3090904460134409, "learning_rate": 0.00029255208067101947, "loss": 3.0871689319610596, "step": 4971, "token_acc": 0.2905621194720199 }, { "epoch": 2.914687774846086, "grad_norm": 0.2683446758484812, "learning_rate": 0.00029254755585849686, "loss": 3.050039768218994, "step": 4972, "token_acc": 0.2954593043545698 }, { "epoch": 2.9152741131632953, "grad_norm": 0.3099663899704349, "learning_rate": 0.00029254302970693204, "loss": 3.0935826301574707, "step": 4973, "token_acc": 0.2887233305936049 }, { "epoch": 2.915860451480504, "grad_norm": 0.34269777317285155, "learning_rate": 0.00029253850221636757, "loss": 3.1380667686462402, "step": 4974, "token_acc": 0.28417702935411426 }, { "epoch": 2.916446789797713, "grad_norm": 0.3022665232041975, "learning_rate": 0.0002925339733868459, "loss": 3.1303019523620605, "step": 4975, "token_acc": 0.2833759126454073 }, { "epoch": 2.917033128114922, "grad_norm": 0.31008959576603373, "learning_rate": 0.00029252944321840954, "loss": 3.124453544616699, "step": 4976, "token_acc": 0.2833341752967921 }, { "epoch": 2.9176194664321313, "grad_norm": 0.31857098831353664, "learning_rate": 0.00029252491171110126, "loss": 3.108292579650879, "step": 4977, "token_acc": 0.28709394671837235 }, { "epoch": 2.9182058047493404, "grad_norm": 0.28319523898828003, "learning_rate": 0.0002925203788649634, "loss": 3.046318531036377, "step": 4978, "token_acc": 0.2957385453380327 }, { "epoch": 2.9187921430665495, "grad_norm": 0.2951248402759101, "learning_rate": 0.00029251584468003867, "loss": 3.0906453132629395, "step": 4979, "token_acc": 0.28703728494091063 }, { "epoch": 2.9193784813837587, "grad_norm": 0.2798656693356633, "learning_rate": 0.00029251130915636963, "loss": 3.0820045471191406, "step": 4980, "token_acc": 0.2912473430243686 }, { "epoch": 2.9199648197009673, "grad_norm": 0.33480404943122455, "learning_rate": 0.0002925067722939989, "loss": 3.1203885078430176, "step": 4981, "token_acc": 0.2862848715742903 }, { "epoch": 2.9205511580181764, "grad_norm": 0.2635372628344961, "learning_rate": 0.000292502234092969, "loss": 3.09853458404541, "step": 4982, "token_acc": 0.28715111557531203 }, { "epoch": 2.9211374963353856, "grad_norm": 0.29815940073322733, "learning_rate": 0.00029249769455332264, "loss": 3.0711936950683594, "step": 4983, "token_acc": 0.29259683787275076 }, { "epoch": 2.9217238346525947, "grad_norm": 0.33360922274189964, "learning_rate": 0.0002924931536751025, "loss": 3.112743854522705, "step": 4984, "token_acc": 0.2861688634947954 }, { "epoch": 2.9223101729698033, "grad_norm": 0.29455893712412884, "learning_rate": 0.00029248861145835116, "loss": 3.1136324405670166, "step": 4985, "token_acc": 0.2875326592909465 }, { "epoch": 2.9228965112870124, "grad_norm": 0.26459221464377275, "learning_rate": 0.0002924840679031114, "loss": 3.1287808418273926, "step": 4986, "token_acc": 0.285265152413555 }, { "epoch": 2.9234828496042216, "grad_norm": 0.3051166536420172, "learning_rate": 0.0002924795230094257, "loss": 3.1437859535217285, "step": 4987, "token_acc": 0.28179214742259484 }, { "epoch": 2.9240691879214307, "grad_norm": 0.32042602870288234, "learning_rate": 0.000292474976777337, "loss": 3.1082262992858887, "step": 4988, "token_acc": 0.28618188511536213 }, { "epoch": 2.92465552623864, "grad_norm": 0.3065393347162595, "learning_rate": 0.0002924704292068878, "loss": 3.1299118995666504, "step": 4989, "token_acc": 0.284432351068141 }, { "epoch": 2.925241864555849, "grad_norm": 0.33469868278724907, "learning_rate": 0.0002924658802981209, "loss": 3.1187663078308105, "step": 4990, "token_acc": 0.2827876358180947 }, { "epoch": 2.925828202873058, "grad_norm": 0.31055680710044126, "learning_rate": 0.00029246133005107907, "loss": 3.102005958557129, "step": 4991, "token_acc": 0.2883572534195369 }, { "epoch": 2.9264145411902667, "grad_norm": 0.2837105422550072, "learning_rate": 0.00029245677846580497, "loss": 3.1385719776153564, "step": 4992, "token_acc": 0.2826013687602887 }, { "epoch": 2.927000879507476, "grad_norm": 0.32902865942065007, "learning_rate": 0.00029245222554234143, "loss": 3.0618696212768555, "step": 4993, "token_acc": 0.29262519535685183 }, { "epoch": 2.927587217824685, "grad_norm": 0.30300505921728804, "learning_rate": 0.00029244767128073113, "loss": 3.0951180458068848, "step": 4994, "token_acc": 0.2880493842867215 }, { "epoch": 2.928173556141894, "grad_norm": 0.28681627713445695, "learning_rate": 0.000292443115681017, "loss": 3.1173095703125, "step": 4995, "token_acc": 0.28474962197803305 }, { "epoch": 2.9287598944591027, "grad_norm": 0.2910267767491318, "learning_rate": 0.0002924385587432417, "loss": 3.106464385986328, "step": 4996, "token_acc": 0.28874393660164205 }, { "epoch": 2.929346232776312, "grad_norm": 0.27645558923889724, "learning_rate": 0.0002924340004674481, "loss": 3.0502796173095703, "step": 4997, "token_acc": 0.2948054248266788 }, { "epoch": 2.929932571093521, "grad_norm": 0.31975705283015143, "learning_rate": 0.00029242944085367895, "loss": 3.101111888885498, "step": 4998, "token_acc": 0.2882396662324512 }, { "epoch": 2.93051890941073, "grad_norm": 0.28387466329300026, "learning_rate": 0.00029242487990197713, "loss": 3.1161346435546875, "step": 4999, "token_acc": 0.2851972000195515 }, { "epoch": 2.931105247727939, "grad_norm": 0.26470443898442736, "learning_rate": 0.00029242031761238555, "loss": 3.092757225036621, "step": 5000, "token_acc": 0.28801156081233703 }, { "epoch": 2.9316915860451482, "grad_norm": 0.2935624392966686, "learning_rate": 0.00029241575398494693, "loss": 3.112367630004883, "step": 5001, "token_acc": 0.286848041894107 }, { "epoch": 2.9322779243623573, "grad_norm": 0.2717630801157289, "learning_rate": 0.00029241118901970426, "loss": 3.11051082611084, "step": 5002, "token_acc": 0.2873603647442136 }, { "epoch": 2.932864262679566, "grad_norm": 0.2965877518109553, "learning_rate": 0.0002924066227167003, "loss": 3.133275270462036, "step": 5003, "token_acc": 0.2820839928496618 }, { "epoch": 2.933450600996775, "grad_norm": 0.2945892762268756, "learning_rate": 0.00029240205507597805, "loss": 3.107689380645752, "step": 5004, "token_acc": 0.28653478982127767 }, { "epoch": 2.9340369393139842, "grad_norm": 0.270451132271406, "learning_rate": 0.00029239748609758044, "loss": 3.1071999073028564, "step": 5005, "token_acc": 0.28731238449225566 }, { "epoch": 2.9346232776311933, "grad_norm": 0.25780841343004535, "learning_rate": 0.0002923929157815503, "loss": 3.0937864780426025, "step": 5006, "token_acc": 0.28983774690247777 }, { "epoch": 2.935209615948402, "grad_norm": 0.27703455323971504, "learning_rate": 0.00029238834412793056, "loss": 3.156954288482666, "step": 5007, "token_acc": 0.2806017820251207 }, { "epoch": 2.935795954265611, "grad_norm": 0.2798221079752183, "learning_rate": 0.0002923837711367642, "loss": 3.123319625854492, "step": 5008, "token_acc": 0.2844411184434315 }, { "epoch": 2.9363822925828202, "grad_norm": 0.2879098214791004, "learning_rate": 0.0002923791968080942, "loss": 3.1486434936523438, "step": 5009, "token_acc": 0.28070470650138757 }, { "epoch": 2.9369686309000294, "grad_norm": 0.2808736609517171, "learning_rate": 0.0002923746211419635, "loss": 3.109757661819458, "step": 5010, "token_acc": 0.28527400533038855 }, { "epoch": 2.9375549692172385, "grad_norm": 0.2533734691125129, "learning_rate": 0.00029237004413841506, "loss": 3.1228911876678467, "step": 5011, "token_acc": 0.2854579006597439 }, { "epoch": 2.9381413075344476, "grad_norm": 0.2969630460629132, "learning_rate": 0.00029236546579749194, "loss": 3.106259346008301, "step": 5012, "token_acc": 0.28632474191989776 }, { "epoch": 2.9387276458516562, "grad_norm": 0.2842042215648192, "learning_rate": 0.0002923608861192371, "loss": 3.112988233566284, "step": 5013, "token_acc": 0.28704147436632044 }, { "epoch": 2.9393139841688654, "grad_norm": 0.299648245266583, "learning_rate": 0.0002923563051036936, "loss": 3.117884874343872, "step": 5014, "token_acc": 0.2857277759770505 }, { "epoch": 2.9399003224860745, "grad_norm": 0.2954187035281139, "learning_rate": 0.00029235172275090437, "loss": 3.1189539432525635, "step": 5015, "token_acc": 0.28490329394679514 }, { "epoch": 2.9404866608032836, "grad_norm": 0.284827769950012, "learning_rate": 0.0002923471390609125, "loss": 3.149946928024292, "step": 5016, "token_acc": 0.27916704880111687 }, { "epoch": 2.9410729991204922, "grad_norm": 0.28029557699564717, "learning_rate": 0.00029234255403376116, "loss": 3.0718367099761963, "step": 5017, "token_acc": 0.2931087110247397 }, { "epoch": 2.9416593374377014, "grad_norm": 0.2898818814431397, "learning_rate": 0.0002923379676694933, "loss": 3.095771074295044, "step": 5018, "token_acc": 0.28719309602021986 }, { "epoch": 2.9422456757549105, "grad_norm": 0.30172115280909606, "learning_rate": 0.00029233337996815203, "loss": 3.1002144813537598, "step": 5019, "token_acc": 0.28762030680907 }, { "epoch": 2.9428320140721196, "grad_norm": 0.26771476175976017, "learning_rate": 0.0002923287909297805, "loss": 3.1250195503234863, "step": 5020, "token_acc": 0.28476341286668994 }, { "epoch": 2.9434183523893287, "grad_norm": 0.27568986992191624, "learning_rate": 0.0002923242005544217, "loss": 3.116955280303955, "step": 5021, "token_acc": 0.2868338807498634 }, { "epoch": 2.944004690706538, "grad_norm": 0.283915465374956, "learning_rate": 0.00029231960884211884, "loss": 3.095149040222168, "step": 5022, "token_acc": 0.2887512456798762 }, { "epoch": 2.944591029023747, "grad_norm": 0.3039138594946964, "learning_rate": 0.00029231501579291507, "loss": 3.133984088897705, "step": 5023, "token_acc": 0.28458802485602325 }, { "epoch": 2.9451773673409556, "grad_norm": 0.3267981077909956, "learning_rate": 0.0002923104214068535, "loss": 3.113675117492676, "step": 5024, "token_acc": 0.2880666222518321 }, { "epoch": 2.9457637056581647, "grad_norm": 0.26727105241606786, "learning_rate": 0.00029230582568397727, "loss": 3.1102640628814697, "step": 5025, "token_acc": 0.2853624773427761 }, { "epoch": 2.946350043975374, "grad_norm": 0.30394459750023173, "learning_rate": 0.00029230122862432956, "loss": 3.132255792617798, "step": 5026, "token_acc": 0.28444027348089 }, { "epoch": 2.946936382292583, "grad_norm": 0.31726410101999647, "learning_rate": 0.00029229663022795353, "loss": 3.1948556900024414, "step": 5027, "token_acc": 0.27442031538158684 }, { "epoch": 2.9475227206097916, "grad_norm": 0.2914379381217216, "learning_rate": 0.00029229203049489246, "loss": 3.1268067359924316, "step": 5028, "token_acc": 0.28450594536607005 }, { "epoch": 2.9481090589270007, "grad_norm": 0.26689470541833427, "learning_rate": 0.00029228742942518943, "loss": 3.0843310356140137, "step": 5029, "token_acc": 0.2893726158338455 }, { "epoch": 2.94869539724421, "grad_norm": 0.3141608036130159, "learning_rate": 0.0002922828270188878, "loss": 3.1054859161376953, "step": 5030, "token_acc": 0.28667206195055384 }, { "epoch": 2.949281735561419, "grad_norm": 0.2909470277842254, "learning_rate": 0.00029227822327603073, "loss": 3.0878162384033203, "step": 5031, "token_acc": 0.2888544265390061 }, { "epoch": 2.949868073878628, "grad_norm": 0.2754065362476325, "learning_rate": 0.00029227361819666146, "loss": 3.1084535121917725, "step": 5032, "token_acc": 0.2874672435696403 }, { "epoch": 2.950454412195837, "grad_norm": 0.33081130899280703, "learning_rate": 0.0002922690117808233, "loss": 3.1709115505218506, "step": 5033, "token_acc": 0.2792268895848166 }, { "epoch": 2.9510407505130463, "grad_norm": 0.3024505638941856, "learning_rate": 0.00029226440402855945, "loss": 3.084867000579834, "step": 5034, "token_acc": 0.2897134553477198 }, { "epoch": 2.951627088830255, "grad_norm": 0.3196573221832568, "learning_rate": 0.0002922597949399132, "loss": 3.124715805053711, "step": 5035, "token_acc": 0.28550095539867154 }, { "epoch": 2.952213427147464, "grad_norm": 0.3125207643151057, "learning_rate": 0.0002922551845149279, "loss": 3.11138916015625, "step": 5036, "token_acc": 0.28565198767604727 }, { "epoch": 2.952799765464673, "grad_norm": 0.2841398455259827, "learning_rate": 0.0002922505727536469, "loss": 3.1248779296875, "step": 5037, "token_acc": 0.2834396002982374 }, { "epoch": 2.9533861037818823, "grad_norm": 0.27615971932154265, "learning_rate": 0.00029224595965611337, "loss": 3.082575798034668, "step": 5038, "token_acc": 0.29088427728495164 }, { "epoch": 2.953972442099091, "grad_norm": 0.2727563166715515, "learning_rate": 0.0002922413452223707, "loss": 3.1098690032958984, "step": 5039, "token_acc": 0.28775291805886616 }, { "epoch": 2.9545587804163, "grad_norm": 0.30534677443510827, "learning_rate": 0.0002922367294524624, "loss": 3.1228795051574707, "step": 5040, "token_acc": 0.28585293000359063 }, { "epoch": 2.955145118733509, "grad_norm": 0.3164136291629472, "learning_rate": 0.00029223211234643155, "loss": 3.166372060775757, "step": 5041, "token_acc": 0.2790833258588214 }, { "epoch": 2.9557314570507183, "grad_norm": 0.2424803757181179, "learning_rate": 0.00029222749390432173, "loss": 3.1350111961364746, "step": 5042, "token_acc": 0.28367948644653346 }, { "epoch": 2.9563177953679274, "grad_norm": 0.29425809889211346, "learning_rate": 0.00029222287412617625, "loss": 3.1102657318115234, "step": 5043, "token_acc": 0.286075106808941 }, { "epoch": 2.9569041336851365, "grad_norm": 0.2686714713181295, "learning_rate": 0.0002922182530120385, "loss": 3.094804048538208, "step": 5044, "token_acc": 0.28902560683057194 }, { "epoch": 2.9574904720023456, "grad_norm": 0.2552977709804675, "learning_rate": 0.0002922136305619519, "loss": 3.1175918579101562, "step": 5045, "token_acc": 0.2844833977651856 }, { "epoch": 2.9580768103195543, "grad_norm": 0.26328315869907826, "learning_rate": 0.00029220900677595993, "loss": 3.119746208190918, "step": 5046, "token_acc": 0.2870929242158172 }, { "epoch": 2.9586631486367634, "grad_norm": 0.2660971791319296, "learning_rate": 0.00029220438165410595, "loss": 3.0971240997314453, "step": 5047, "token_acc": 0.28673313307410203 }, { "epoch": 2.9592494869539725, "grad_norm": 0.27713531753903836, "learning_rate": 0.0002921997551964334, "loss": 3.147752285003662, "step": 5048, "token_acc": 0.2824378183390302 }, { "epoch": 2.9598358252711816, "grad_norm": 0.2885836531869793, "learning_rate": 0.0002921951274029858, "loss": 3.13508939743042, "step": 5049, "token_acc": 0.28274861131268886 }, { "epoch": 2.9604221635883903, "grad_norm": 0.2486620316568557, "learning_rate": 0.0002921904982738066, "loss": 3.0889055728912354, "step": 5050, "token_acc": 0.2899389512409693 }, { "epoch": 2.9610085019055994, "grad_norm": 0.25213031562449134, "learning_rate": 0.0002921858678089392, "loss": 3.1143767833709717, "step": 5051, "token_acc": 0.28602060744831304 }, { "epoch": 2.9615948402228085, "grad_norm": 0.2823484359610412, "learning_rate": 0.00029218123600842724, "loss": 3.1275830268859863, "step": 5052, "token_acc": 0.2854069470991948 }, { "epoch": 2.9621811785400176, "grad_norm": 0.2681195545528729, "learning_rate": 0.00029217660287231414, "loss": 3.061953544616699, "step": 5053, "token_acc": 0.2934716827206343 }, { "epoch": 2.9627675168572267, "grad_norm": 0.2738560149521086, "learning_rate": 0.0002921719684006434, "loss": 3.1011276245117188, "step": 5054, "token_acc": 0.2873449538416601 }, { "epoch": 2.963353855174436, "grad_norm": 0.28788269346238926, "learning_rate": 0.00029216733259345866, "loss": 3.0788917541503906, "step": 5055, "token_acc": 0.2921459164865474 }, { "epoch": 2.963940193491645, "grad_norm": 0.2684540787524686, "learning_rate": 0.00029216269545080334, "loss": 3.1130213737487793, "step": 5056, "token_acc": 0.28596392017810657 }, { "epoch": 2.9645265318088536, "grad_norm": 0.29280314792644013, "learning_rate": 0.0002921580569727211, "loss": 3.08811354637146, "step": 5057, "token_acc": 0.2899706701977346 }, { "epoch": 2.9651128701260627, "grad_norm": 0.2736859729891114, "learning_rate": 0.00029215341715925546, "loss": 3.064713478088379, "step": 5058, "token_acc": 0.2930078083595379 }, { "epoch": 2.965699208443272, "grad_norm": 0.3119371502553631, "learning_rate": 0.00029214877601045007, "loss": 3.075654983520508, "step": 5059, "token_acc": 0.29036854419788266 }, { "epoch": 2.966285546760481, "grad_norm": 0.30838257067779073, "learning_rate": 0.00029214413352634844, "loss": 3.1679680347442627, "step": 5060, "token_acc": 0.27908365695281995 }, { "epoch": 2.9668718850776896, "grad_norm": 0.28086665075064576, "learning_rate": 0.0002921394897069942, "loss": 3.1128907203674316, "step": 5061, "token_acc": 0.2856346497299925 }, { "epoch": 2.9674582233948987, "grad_norm": 0.33922836876725826, "learning_rate": 0.00029213484455243097, "loss": 3.125619888305664, "step": 5062, "token_acc": 0.2834312597959061 }, { "epoch": 2.968044561712108, "grad_norm": 0.29732833116121965, "learning_rate": 0.0002921301980627025, "loss": 3.076028347015381, "step": 5063, "token_acc": 0.2908988888402705 }, { "epoch": 2.968630900029317, "grad_norm": 0.28312112236743636, "learning_rate": 0.00029212555023785226, "loss": 3.0619237422943115, "step": 5064, "token_acc": 0.2926577472663208 }, { "epoch": 2.969217238346526, "grad_norm": 0.2896989396851186, "learning_rate": 0.00029212090107792396, "loss": 3.0955214500427246, "step": 5065, "token_acc": 0.2894506555220841 }, { "epoch": 2.969803576663735, "grad_norm": 0.25909225964845883, "learning_rate": 0.0002921162505829614, "loss": 3.1067535877227783, "step": 5066, "token_acc": 0.2882758070520662 }, { "epoch": 2.970389914980944, "grad_norm": 0.2901746444673933, "learning_rate": 0.0002921115987530081, "loss": 3.096381664276123, "step": 5067, "token_acc": 0.2878427485959696 }, { "epoch": 2.970976253298153, "grad_norm": 0.2826122913154138, "learning_rate": 0.0002921069455881079, "loss": 3.0496273040771484, "step": 5068, "token_acc": 0.29456771028496864 }, { "epoch": 2.971562591615362, "grad_norm": 0.30532828055029776, "learning_rate": 0.00029210229108830437, "loss": 3.0849170684814453, "step": 5069, "token_acc": 0.2910914693957188 }, { "epoch": 2.972148929932571, "grad_norm": 0.30650939828861495, "learning_rate": 0.0002920976352536413, "loss": 3.1432456970214844, "step": 5070, "token_acc": 0.28116453203025066 }, { "epoch": 2.97273526824978, "grad_norm": 0.28854117470965823, "learning_rate": 0.00029209297808416247, "loss": 3.1159439086914062, "step": 5071, "token_acc": 0.2846584576546575 }, { "epoch": 2.973321606566989, "grad_norm": 0.2880506681741183, "learning_rate": 0.00029208831957991155, "loss": 3.112246036529541, "step": 5072, "token_acc": 0.28675354218454874 }, { "epoch": 2.973907944884198, "grad_norm": 0.3210323389304767, "learning_rate": 0.00029208365974093235, "loss": 3.1248300075531006, "step": 5073, "token_acc": 0.28411261349532957 }, { "epoch": 2.974494283201407, "grad_norm": 0.2823815269355905, "learning_rate": 0.0002920789985672686, "loss": 3.0823240280151367, "step": 5074, "token_acc": 0.28983230023844686 }, { "epoch": 2.9750806215186163, "grad_norm": 0.3019603247716682, "learning_rate": 0.0002920743360589642, "loss": 3.134385585784912, "step": 5075, "token_acc": 0.28310445927303346 }, { "epoch": 2.9756669598358254, "grad_norm": 0.277972235832804, "learning_rate": 0.0002920696722160628, "loss": 3.099012613296509, "step": 5076, "token_acc": 0.2894199174319656 }, { "epoch": 2.9762532981530345, "grad_norm": 0.31332694927412935, "learning_rate": 0.00029206500703860824, "loss": 3.102360248565674, "step": 5077, "token_acc": 0.28587114342256875 }, { "epoch": 2.976839636470243, "grad_norm": 0.3085274597774751, "learning_rate": 0.0002920603405266444, "loss": 3.0702714920043945, "step": 5078, "token_acc": 0.2932083927694195 }, { "epoch": 2.9774259747874523, "grad_norm": 0.2934084141542044, "learning_rate": 0.0002920556726802151, "loss": 3.0628905296325684, "step": 5079, "token_acc": 0.2943366621361363 }, { "epoch": 2.9780123131046614, "grad_norm": 0.3034667084742859, "learning_rate": 0.0002920510034993642, "loss": 3.1041674613952637, "step": 5080, "token_acc": 0.28816404501400184 }, { "epoch": 2.9785986514218705, "grad_norm": 0.30504339649800805, "learning_rate": 0.0002920463329841355, "loss": 3.1233677864074707, "step": 5081, "token_acc": 0.283686965375702 }, { "epoch": 2.979184989739079, "grad_norm": 0.28168778645511033, "learning_rate": 0.00029204166113457286, "loss": 3.1091670989990234, "step": 5082, "token_acc": 0.2861745376247329 }, { "epoch": 2.9797713280562883, "grad_norm": 0.3086575599636707, "learning_rate": 0.00029203698795072033, "loss": 3.160965919494629, "step": 5083, "token_acc": 0.2802046260985791 }, { "epoch": 2.9803576663734974, "grad_norm": 0.29098702910844904, "learning_rate": 0.0002920323134326216, "loss": 3.107667922973633, "step": 5084, "token_acc": 0.2874566968713065 }, { "epoch": 2.9809440046907065, "grad_norm": 0.2878858555278777, "learning_rate": 0.0002920276375803207, "loss": 3.1095995903015137, "step": 5085, "token_acc": 0.2880353247858701 }, { "epoch": 2.9815303430079156, "grad_norm": 0.26777331084812295, "learning_rate": 0.00029202296039386157, "loss": 3.110292673110962, "step": 5086, "token_acc": 0.28547494406440355 }, { "epoch": 2.9821166813251248, "grad_norm": 0.2777794674932488, "learning_rate": 0.00029201828187328807, "loss": 3.1438937187194824, "step": 5087, "token_acc": 0.28034792786082885 }, { "epoch": 2.982703019642334, "grad_norm": 0.2610162108795578, "learning_rate": 0.00029201360201864423, "loss": 3.151064157485962, "step": 5088, "token_acc": 0.28043263727154305 }, { "epoch": 2.9832893579595425, "grad_norm": 0.3000237250262898, "learning_rate": 0.0002920089208299739, "loss": 3.118253469467163, "step": 5089, "token_acc": 0.28660861926415876 }, { "epoch": 2.9838756962767516, "grad_norm": 0.25633572554202444, "learning_rate": 0.00029200423830732115, "loss": 3.116499900817871, "step": 5090, "token_acc": 0.28607967143188967 }, { "epoch": 2.9844620345939608, "grad_norm": 0.3028180024406386, "learning_rate": 0.00029199955445073, "loss": 3.076850414276123, "step": 5091, "token_acc": 0.29044654049277685 }, { "epoch": 2.98504837291117, "grad_norm": 0.27873791987289087, "learning_rate": 0.00029199486926024425, "loss": 3.119007110595703, "step": 5092, "token_acc": 0.28424345311075483 }, { "epoch": 2.9856347112283785, "grad_norm": 0.30101612615759105, "learning_rate": 0.0002919901827359081, "loss": 3.091470956802368, "step": 5093, "token_acc": 0.28922672723300147 }, { "epoch": 2.9862210495455876, "grad_norm": 0.2969763761309888, "learning_rate": 0.00029198549487776553, "loss": 3.104546308517456, "step": 5094, "token_acc": 0.2867430364725637 }, { "epoch": 2.9868073878627968, "grad_norm": 0.3147234461507524, "learning_rate": 0.0002919808056858606, "loss": 3.0515449047088623, "step": 5095, "token_acc": 0.2945025510546417 }, { "epoch": 2.987393726180006, "grad_norm": 0.3041313650394808, "learning_rate": 0.00029197611516023725, "loss": 3.0847854614257812, "step": 5096, "token_acc": 0.28839045632812604 }, { "epoch": 2.987980064497215, "grad_norm": 0.33422917465186386, "learning_rate": 0.0002919714233009397, "loss": 3.138659954071045, "step": 5097, "token_acc": 0.2823014371090749 }, { "epoch": 2.988566402814424, "grad_norm": 0.30287992374803485, "learning_rate": 0.00029196673010801187, "loss": 3.091615676879883, "step": 5098, "token_acc": 0.28828833501466516 }, { "epoch": 2.989152741131633, "grad_norm": 0.2666360946851808, "learning_rate": 0.00029196203558149787, "loss": 3.109976053237915, "step": 5099, "token_acc": 0.2875342915272035 }, { "epoch": 2.989739079448842, "grad_norm": 0.31778316560697006, "learning_rate": 0.000291957339721442, "loss": 3.0775437355041504, "step": 5100, "token_acc": 0.28994236837021614 }, { "epoch": 2.990325417766051, "grad_norm": 0.31512748334759194, "learning_rate": 0.00029195264252788804, "loss": 3.1121773719787598, "step": 5101, "token_acc": 0.2870045693980587 }, { "epoch": 2.99091175608326, "grad_norm": 0.2814887995647331, "learning_rate": 0.00029194794400088037, "loss": 3.119619846343994, "step": 5102, "token_acc": 0.2846271129561171 }, { "epoch": 2.991498094400469, "grad_norm": 0.3123520536000268, "learning_rate": 0.000291943244140463, "loss": 3.1075708866119385, "step": 5103, "token_acc": 0.2888442975487829 }, { "epoch": 2.992084432717678, "grad_norm": 0.2926123203573768, "learning_rate": 0.0002919385429466802, "loss": 3.129755735397339, "step": 5104, "token_acc": 0.2822806339389254 }, { "epoch": 2.992670771034887, "grad_norm": 0.3460480775163618, "learning_rate": 0.00029193384041957597, "loss": 3.1306610107421875, "step": 5105, "token_acc": 0.2846267210132901 }, { "epoch": 2.993257109352096, "grad_norm": 0.25453685638867674, "learning_rate": 0.00029192913655919463, "loss": 3.141019105911255, "step": 5106, "token_acc": 0.2818223104708392 }, { "epoch": 2.993843447669305, "grad_norm": 0.2919845637776699, "learning_rate": 0.0002919244313655803, "loss": 3.102055072784424, "step": 5107, "token_acc": 0.2882983138300825 }, { "epoch": 2.9944297859865143, "grad_norm": 0.2736581917853656, "learning_rate": 0.00029191972483877713, "loss": 3.109133720397949, "step": 5108, "token_acc": 0.2873338356385555 }, { "epoch": 2.9950161243037234, "grad_norm": 0.29908852519117934, "learning_rate": 0.00029191501697882943, "loss": 3.110525608062744, "step": 5109, "token_acc": 0.28685337080487566 }, { "epoch": 2.9956024626209325, "grad_norm": 0.2631826908241675, "learning_rate": 0.00029191030778578133, "loss": 3.1174607276916504, "step": 5110, "token_acc": 0.28643832580522605 }, { "epoch": 2.996188800938141, "grad_norm": 0.3026834790418186, "learning_rate": 0.00029190559725967717, "loss": 3.1329574584960938, "step": 5111, "token_acc": 0.2830717802580965 }, { "epoch": 2.9967751392553503, "grad_norm": 0.28455824840030103, "learning_rate": 0.00029190088540056113, "loss": 3.110084056854248, "step": 5112, "token_acc": 0.2869272159975831 }, { "epoch": 2.9973614775725594, "grad_norm": 0.29720405646404147, "learning_rate": 0.00029189617220847744, "loss": 3.104649305343628, "step": 5113, "token_acc": 0.28822385817145874 }, { "epoch": 2.9979478158897686, "grad_norm": 0.31455821757449737, "learning_rate": 0.00029189145768347046, "loss": 3.071871519088745, "step": 5114, "token_acc": 0.2922248326150431 }, { "epoch": 2.998534154206977, "grad_norm": 0.267812225104246, "learning_rate": 0.00029188674182558446, "loss": 3.153928279876709, "step": 5115, "token_acc": 0.2805709009281518 }, { "epoch": 2.9991204925241863, "grad_norm": 0.3374159442242459, "learning_rate": 0.0002918820246348637, "loss": 3.042898654937744, "step": 5116, "token_acc": 0.2956119249724393 }, { "epoch": 2.9997068308413954, "grad_norm": 0.3105184009673184, "learning_rate": 0.0002918773061113525, "loss": 3.1127207279205322, "step": 5117, "token_acc": 0.28413267548240184 }, { "epoch": 3.0, "grad_norm": 0.35649660795366334, "learning_rate": 0.00029187258625509513, "loss": 3.1052021980285645, "step": 5118, "token_acc": 0.28912331011880377 }, { "epoch": 3.0, "eval_loss": 3.106259822845459, "eval_runtime": 8.79, "eval_samples_per_second": 29.124, "eval_steps_per_second": 3.64, "eval_token_acc": 0.28690646098030675, "step": 5118 }, { "epoch": 3.000586338317209, "grad_norm": 0.3428987835507253, "learning_rate": 0.0002918678650661361, "loss": 3.0772767066955566, "step": 5119, "token_acc": 0.2890505350217869 }, { "epoch": 3.0011726766344182, "grad_norm": 0.38204917548378226, "learning_rate": 0.0002918631425445196, "loss": 3.009969711303711, "step": 5120, "token_acc": 0.29887828570279346 }, { "epoch": 3.001759014951627, "grad_norm": 0.325726653851143, "learning_rate": 0.00029185841869029005, "loss": 3.0065507888793945, "step": 5121, "token_acc": 0.30044527895572937 }, { "epoch": 3.002345353268836, "grad_norm": 0.32029728512926536, "learning_rate": 0.00029185369350349173, "loss": 3.053997278213501, "step": 5122, "token_acc": 0.29280360891745716 }, { "epoch": 3.002931691586045, "grad_norm": 0.3591779446246009, "learning_rate": 0.0002918489669841692, "loss": 3.019418954849243, "step": 5123, "token_acc": 0.2965564756197434 }, { "epoch": 3.0035180299032542, "grad_norm": 0.2876695986586287, "learning_rate": 0.0002918442391323667, "loss": 3.0437543392181396, "step": 5124, "token_acc": 0.2947273066368263 }, { "epoch": 3.0041043682204633, "grad_norm": 0.38647439184998883, "learning_rate": 0.0002918395099481288, "loss": 3.0338940620422363, "step": 5125, "token_acc": 0.29668929727025944 }, { "epoch": 3.0046907065376725, "grad_norm": 0.320748907096521, "learning_rate": 0.0002918347794314998, "loss": 3.040165901184082, "step": 5126, "token_acc": 0.2942833930652958 }, { "epoch": 3.005277044854881, "grad_norm": 0.3135189448896224, "learning_rate": 0.00029183004758252416, "loss": 3.0427966117858887, "step": 5127, "token_acc": 0.2937558193822025 }, { "epoch": 3.0058633831720902, "grad_norm": 0.35892905103197015, "learning_rate": 0.00029182531440124636, "loss": 3.0228395462036133, "step": 5128, "token_acc": 0.29631644082893877 }, { "epoch": 3.0064497214892993, "grad_norm": 0.2586295660541764, "learning_rate": 0.0002918205798877108, "loss": 3.024183750152588, "step": 5129, "token_acc": 0.29765954011026896 }, { "epoch": 3.0070360598065085, "grad_norm": 0.2976298905463469, "learning_rate": 0.00029181584404196204, "loss": 3.006627082824707, "step": 5130, "token_acc": 0.29926706247044604 }, { "epoch": 3.0076223981237176, "grad_norm": 0.27477264899715503, "learning_rate": 0.00029181110686404447, "loss": 3.019460678100586, "step": 5131, "token_acc": 0.2980581051158939 }, { "epoch": 3.0082087364409262, "grad_norm": 0.32687387686689073, "learning_rate": 0.00029180636835400266, "loss": 3.0451433658599854, "step": 5132, "token_acc": 0.2937275121682832 }, { "epoch": 3.0087950747581353, "grad_norm": 0.27219474441838104, "learning_rate": 0.00029180162851188116, "loss": 3.0167179107666016, "step": 5133, "token_acc": 0.2985784461989341 }, { "epoch": 3.0093814130753445, "grad_norm": 0.2992250666438191, "learning_rate": 0.00029179688733772444, "loss": 2.9692487716674805, "step": 5134, "token_acc": 0.3051114350734359 }, { "epoch": 3.0099677513925536, "grad_norm": 0.23907920148708325, "learning_rate": 0.000291792144831577, "loss": 3.0530848503112793, "step": 5135, "token_acc": 0.2912823978887682 }, { "epoch": 3.0105540897097627, "grad_norm": 0.2851943167981261, "learning_rate": 0.00029178740099348343, "loss": 2.986829996109009, "step": 5136, "token_acc": 0.30258522641490443 }, { "epoch": 3.0111404280269713, "grad_norm": 0.2609894454355711, "learning_rate": 0.00029178265582348827, "loss": 3.0341172218322754, "step": 5137, "token_acc": 0.2964714276966168 }, { "epoch": 3.0117267663441805, "grad_norm": 0.262371070548386, "learning_rate": 0.00029177790932163617, "loss": 3.008013963699341, "step": 5138, "token_acc": 0.300569405971068 }, { "epoch": 3.0123131046613896, "grad_norm": 0.2899917737805946, "learning_rate": 0.0002917731614879716, "loss": 2.9970388412475586, "step": 5139, "token_acc": 0.29985284266824197 }, { "epoch": 3.0128994429785987, "grad_norm": 0.31385162365275776, "learning_rate": 0.00029176841232253926, "loss": 3.005070209503174, "step": 5140, "token_acc": 0.29971071687089934 }, { "epoch": 3.013485781295808, "grad_norm": 0.28444976093957813, "learning_rate": 0.00029176366182538367, "loss": 3.01517391204834, "step": 5141, "token_acc": 0.29762677952252753 }, { "epoch": 3.014072119613017, "grad_norm": 0.2628632016505637, "learning_rate": 0.00029175890999654956, "loss": 3.0234503746032715, "step": 5142, "token_acc": 0.29601649572497646 }, { "epoch": 3.0146584579302256, "grad_norm": 0.2764732814873224, "learning_rate": 0.0002917541568360815, "loss": 3.0196003913879395, "step": 5143, "token_acc": 0.2971501463043987 }, { "epoch": 3.0152447962474347, "grad_norm": 0.26490755174116515, "learning_rate": 0.00029174940234402415, "loss": 3.019275665283203, "step": 5144, "token_acc": 0.2972331600777025 }, { "epoch": 3.015831134564644, "grad_norm": 0.31653988455340526, "learning_rate": 0.0002917446465204222, "loss": 3.069411516189575, "step": 5145, "token_acc": 0.2883319037287451 }, { "epoch": 3.016417472881853, "grad_norm": 0.29913382677093603, "learning_rate": 0.0002917398893653202, "loss": 3.0390093326568604, "step": 5146, "token_acc": 0.2941605265796441 }, { "epoch": 3.017003811199062, "grad_norm": 0.2550690008646368, "learning_rate": 0.000291735130878763, "loss": 3.0008602142333984, "step": 5147, "token_acc": 0.3000978681317543 }, { "epoch": 3.0175901495162707, "grad_norm": 0.25606412007552953, "learning_rate": 0.0002917303710607953, "loss": 2.9886553287506104, "step": 5148, "token_acc": 0.30346013160045776 }, { "epoch": 3.01817648783348, "grad_norm": 0.2496773079359478, "learning_rate": 0.00029172560991146167, "loss": 3.069382905960083, "step": 5149, "token_acc": 0.2914038602441986 }, { "epoch": 3.018762826150689, "grad_norm": 0.2616090078057608, "learning_rate": 0.00029172084743080693, "loss": 3.0578813552856445, "step": 5150, "token_acc": 0.2928235595936915 }, { "epoch": 3.019349164467898, "grad_norm": 0.27315518524246335, "learning_rate": 0.0002917160836188758, "loss": 3.0412020683288574, "step": 5151, "token_acc": 0.29490910109241625 }, { "epoch": 3.019935502785107, "grad_norm": 0.23878646080769525, "learning_rate": 0.00029171131847571303, "loss": 3.0304746627807617, "step": 5152, "token_acc": 0.2951688818932877 }, { "epoch": 3.0205218411023163, "grad_norm": 0.2876082097503468, "learning_rate": 0.0002917065520013634, "loss": 2.981696844100952, "step": 5153, "token_acc": 0.3020214885880395 }, { "epoch": 3.021108179419525, "grad_norm": 0.28207747030239555, "learning_rate": 0.00029170178419587166, "loss": 3.0600028038024902, "step": 5154, "token_acc": 0.29314351574484776 }, { "epoch": 3.021694517736734, "grad_norm": 0.2817992677196652, "learning_rate": 0.00029169701505928254, "loss": 3.0093984603881836, "step": 5155, "token_acc": 0.29840652288663994 }, { "epoch": 3.022280856053943, "grad_norm": 0.26918558903648926, "learning_rate": 0.00029169224459164097, "loss": 3.035729169845581, "step": 5156, "token_acc": 0.2949751625497862 }, { "epoch": 3.0228671943711523, "grad_norm": 0.2569778488638999, "learning_rate": 0.0002916874727929917, "loss": 3.046964168548584, "step": 5157, "token_acc": 0.29369545610447023 }, { "epoch": 3.0234535326883614, "grad_norm": 0.24400015281268123, "learning_rate": 0.00029168269966337956, "loss": 3.0505614280700684, "step": 5158, "token_acc": 0.2927835603959866 }, { "epoch": 3.02403987100557, "grad_norm": 0.2669409104658442, "learning_rate": 0.0002916779252028493, "loss": 3.012430191040039, "step": 5159, "token_acc": 0.298516996080007 }, { "epoch": 3.024626209322779, "grad_norm": 0.31446285633272164, "learning_rate": 0.0002916731494114459, "loss": 3.041745662689209, "step": 5160, "token_acc": 0.2932763768602614 }, { "epoch": 3.0252125476399883, "grad_norm": 0.3328351220803896, "learning_rate": 0.00029166837228921413, "loss": 3.0514984130859375, "step": 5161, "token_acc": 0.2918695738820728 }, { "epoch": 3.0257988859571974, "grad_norm": 0.266987277661329, "learning_rate": 0.00029166359383619897, "loss": 3.0127437114715576, "step": 5162, "token_acc": 0.2983614997558438 }, { "epoch": 3.0263852242744065, "grad_norm": 0.2816894640610194, "learning_rate": 0.0002916588140524452, "loss": 2.994537353515625, "step": 5163, "token_acc": 0.301362910838862 }, { "epoch": 3.026971562591615, "grad_norm": 0.2696814467394109, "learning_rate": 0.00029165403293799773, "loss": 3.0565550327301025, "step": 5164, "token_acc": 0.292368946714895 }, { "epoch": 3.0275579009088243, "grad_norm": 0.2804114869835539, "learning_rate": 0.0002916492504929016, "loss": 3.0026986598968506, "step": 5165, "token_acc": 0.30045732103416917 }, { "epoch": 3.0281442392260334, "grad_norm": 0.2556176234800986, "learning_rate": 0.00029164446671720154, "loss": 3.011725425720215, "step": 5166, "token_acc": 0.298342469226808 }, { "epoch": 3.0287305775432425, "grad_norm": 0.2677426948848304, "learning_rate": 0.00029163968161094255, "loss": 3.010127544403076, "step": 5167, "token_acc": 0.2979048952447622 }, { "epoch": 3.0293169158604516, "grad_norm": 0.28163923321410006, "learning_rate": 0.0002916348951741697, "loss": 3.051567792892456, "step": 5168, "token_acc": 0.2928791623143359 }, { "epoch": 3.0299032541776607, "grad_norm": 0.23808396751546573, "learning_rate": 0.00029163010740692783, "loss": 3.0435824394226074, "step": 5169, "token_acc": 0.29455579528387477 }, { "epoch": 3.0304895924948694, "grad_norm": 0.2833773570574727, "learning_rate": 0.00029162531830926203, "loss": 2.9934639930725098, "step": 5170, "token_acc": 0.3011256439064719 }, { "epoch": 3.0310759308120785, "grad_norm": 0.24085181271272585, "learning_rate": 0.0002916205278812171, "loss": 3.0210280418395996, "step": 5171, "token_acc": 0.295643903564126 }, { "epoch": 3.0316622691292876, "grad_norm": 0.25357470192780635, "learning_rate": 0.0002916157361228382, "loss": 3.0010781288146973, "step": 5172, "token_acc": 0.2988520627204396 }, { "epoch": 3.0322486074464967, "grad_norm": 0.25228637212539484, "learning_rate": 0.00029161094303417027, "loss": 3.048628330230713, "step": 5173, "token_acc": 0.293258409239119 }, { "epoch": 3.032834945763706, "grad_norm": 0.24885748925346615, "learning_rate": 0.00029160614861525836, "loss": 3.0450143814086914, "step": 5174, "token_acc": 0.2933806134115949 }, { "epoch": 3.0334212840809145, "grad_norm": 0.23571978509644365, "learning_rate": 0.0002916013528661475, "loss": 2.995908498764038, "step": 5175, "token_acc": 0.2998466958384519 }, { "epoch": 3.0340076223981236, "grad_norm": 0.2552105548561712, "learning_rate": 0.00029159655578688275, "loss": 3.0059757232666016, "step": 5176, "token_acc": 0.3004024242921667 }, { "epoch": 3.0345939607153327, "grad_norm": 0.24088933008471156, "learning_rate": 0.00029159175737750913, "loss": 3.0323877334594727, "step": 5177, "token_acc": 0.29646869942113674 }, { "epoch": 3.035180299032542, "grad_norm": 0.28513270486128606, "learning_rate": 0.0002915869576380718, "loss": 3.0714263916015625, "step": 5178, "token_acc": 0.2894802222948676 }, { "epoch": 3.035766637349751, "grad_norm": 0.30758702171737035, "learning_rate": 0.00029158215656861574, "loss": 3.0113954544067383, "step": 5179, "token_acc": 0.29699933341665624 }, { "epoch": 3.03635297566696, "grad_norm": 0.323033706133563, "learning_rate": 0.0002915773541691861, "loss": 3.0249106884002686, "step": 5180, "token_acc": 0.2972941289867116 }, { "epoch": 3.0369393139841687, "grad_norm": 0.28729023130252634, "learning_rate": 0.00029157255043982803, "loss": 3.077242136001587, "step": 5181, "token_acc": 0.2900023214006144 }, { "epoch": 3.037525652301378, "grad_norm": 0.3060869474858443, "learning_rate": 0.0002915677453805866, "loss": 3.0337257385253906, "step": 5182, "token_acc": 0.29562530501251366 }, { "epoch": 3.038111990618587, "grad_norm": 0.3344903339440123, "learning_rate": 0.000291562938991507, "loss": 3.0713930130004883, "step": 5183, "token_acc": 0.28970109492038265 }, { "epoch": 3.038698328935796, "grad_norm": 0.28792158311649885, "learning_rate": 0.00029155813127263434, "loss": 3.0177316665649414, "step": 5184, "token_acc": 0.2985738763674707 }, { "epoch": 3.039284667253005, "grad_norm": 0.2875059348409001, "learning_rate": 0.00029155332222401375, "loss": 2.99538516998291, "step": 5185, "token_acc": 0.30071428949920254 }, { "epoch": 3.039871005570214, "grad_norm": 0.246600797635135, "learning_rate": 0.00029154851184569043, "loss": 3.019031524658203, "step": 5186, "token_acc": 0.29730581416122437 }, { "epoch": 3.040457343887423, "grad_norm": 0.2584191600695276, "learning_rate": 0.00029154370013770965, "loss": 3.0655641555786133, "step": 5187, "token_acc": 0.2925974691965461 }, { "epoch": 3.041043682204632, "grad_norm": 0.27371519022451346, "learning_rate": 0.00029153888710011655, "loss": 3.0783467292785645, "step": 5188, "token_acc": 0.29066810747693467 }, { "epoch": 3.041630020521841, "grad_norm": 0.2441482242471838, "learning_rate": 0.00029153407273295625, "loss": 3.028428554534912, "step": 5189, "token_acc": 0.2975527092776646 }, { "epoch": 3.0422163588390503, "grad_norm": 0.27158319958110794, "learning_rate": 0.0002915292570362741, "loss": 3.0171897411346436, "step": 5190, "token_acc": 0.29848974529887745 }, { "epoch": 3.042802697156259, "grad_norm": 0.26310857194812015, "learning_rate": 0.0002915244400101153, "loss": 3.032041072845459, "step": 5191, "token_acc": 0.29355104894745365 }, { "epoch": 3.043389035473468, "grad_norm": 0.2601856517982112, "learning_rate": 0.00029151962165452507, "loss": 3.0440006256103516, "step": 5192, "token_acc": 0.2937117779191607 }, { "epoch": 3.043975373790677, "grad_norm": 0.24458705158506702, "learning_rate": 0.0002915148019695487, "loss": 3.008754253387451, "step": 5193, "token_acc": 0.2994218575158523 }, { "epoch": 3.0445617121078863, "grad_norm": 0.30684553523996044, "learning_rate": 0.00029150998095523145, "loss": 3.0330638885498047, "step": 5194, "token_acc": 0.294830181760904 }, { "epoch": 3.0451480504250954, "grad_norm": 0.28298543103930274, "learning_rate": 0.00029150515861161864, "loss": 3.0463099479675293, "step": 5195, "token_acc": 0.29323223870849235 }, { "epoch": 3.0457343887423045, "grad_norm": 0.28390459720668926, "learning_rate": 0.00029150033493875553, "loss": 2.9751009941101074, "step": 5196, "token_acc": 0.30497469340331956 }, { "epoch": 3.046320727059513, "grad_norm": 0.3215378811248207, "learning_rate": 0.0002914955099366874, "loss": 3.0451245307922363, "step": 5197, "token_acc": 0.29320234903226133 }, { "epoch": 3.0469070653767223, "grad_norm": 0.2798016452920836, "learning_rate": 0.0002914906836054597, "loss": 3.027647018432617, "step": 5198, "token_acc": 0.29544133727383654 }, { "epoch": 3.0474934036939314, "grad_norm": 0.2888236980541533, "learning_rate": 0.00029148585594511765, "loss": 3.015082359313965, "step": 5199, "token_acc": 0.29608200141198704 }, { "epoch": 3.0480797420111405, "grad_norm": 0.26177040561509235, "learning_rate": 0.00029148102695570667, "loss": 3.013658046722412, "step": 5200, "token_acc": 0.29756072628767843 }, { "epoch": 3.0486660803283496, "grad_norm": 0.27437900107557617, "learning_rate": 0.0002914761966372721, "loss": 3.0065486431121826, "step": 5201, "token_acc": 0.2994768684409191 }, { "epoch": 3.0492524186455583, "grad_norm": 0.2734905893590825, "learning_rate": 0.00029147136498985926, "loss": 3.011589527130127, "step": 5202, "token_acc": 0.29777078570054916 }, { "epoch": 3.0498387569627674, "grad_norm": 0.27222738910048155, "learning_rate": 0.0002914665320135136, "loss": 3.0344481468200684, "step": 5203, "token_acc": 0.29587463816161846 }, { "epoch": 3.0504250952799765, "grad_norm": 0.27026541942071214, "learning_rate": 0.0002914616977082805, "loss": 3.020756483078003, "step": 5204, "token_acc": 0.29807481616991355 }, { "epoch": 3.0510114335971856, "grad_norm": 0.28178055624345033, "learning_rate": 0.00029145686207420537, "loss": 3.0360846519470215, "step": 5205, "token_acc": 0.2954580074483005 }, { "epoch": 3.0515977719143947, "grad_norm": 0.2773927270643848, "learning_rate": 0.0002914520251113336, "loss": 3.049901247024536, "step": 5206, "token_acc": 0.2930996613424732 }, { "epoch": 3.052184110231604, "grad_norm": 0.3081820774148254, "learning_rate": 0.0002914471868197107, "loss": 3.057295083999634, "step": 5207, "token_acc": 0.29189094614204997 }, { "epoch": 3.0527704485488125, "grad_norm": 0.2827348820056371, "learning_rate": 0.0002914423471993821, "loss": 3.0319340229034424, "step": 5208, "token_acc": 0.2952012650733338 }, { "epoch": 3.0533567868660216, "grad_norm": 0.26348809936882417, "learning_rate": 0.0002914375062503932, "loss": 3.078206777572632, "step": 5209, "token_acc": 0.2915844021109129 }, { "epoch": 3.0539431251832307, "grad_norm": 0.3251125014485965, "learning_rate": 0.00029143266397278956, "loss": 3.0280237197875977, "step": 5210, "token_acc": 0.29541854781177646 }, { "epoch": 3.05452946350044, "grad_norm": 0.3262754941828055, "learning_rate": 0.0002914278203666166, "loss": 3.058741331100464, "step": 5211, "token_acc": 0.29180403818389467 }, { "epoch": 3.055115801817649, "grad_norm": 0.27571426230826795, "learning_rate": 0.0002914229754319198, "loss": 2.995025396347046, "step": 5212, "token_acc": 0.29915258526761096 }, { "epoch": 3.0557021401348576, "grad_norm": 0.3178771911569342, "learning_rate": 0.0002914181291687448, "loss": 3.016723394393921, "step": 5213, "token_acc": 0.2970398593200469 }, { "epoch": 3.0562884784520667, "grad_norm": 0.27403052071779477, "learning_rate": 0.000291413281577137, "loss": 3.040548801422119, "step": 5214, "token_acc": 0.2947568849053434 }, { "epoch": 3.056874816769276, "grad_norm": 0.2694084241339378, "learning_rate": 0.00029140843265714205, "loss": 3.008141040802002, "step": 5215, "token_acc": 0.2980059936949443 }, { "epoch": 3.057461155086485, "grad_norm": 0.2600415806676593, "learning_rate": 0.00029140358240880535, "loss": 3.0298898220062256, "step": 5216, "token_acc": 0.2959809264305177 }, { "epoch": 3.058047493403694, "grad_norm": 0.23645295401223948, "learning_rate": 0.0002913987308321725, "loss": 3.037425994873047, "step": 5217, "token_acc": 0.29331531247496506 }, { "epoch": 3.0586338317209028, "grad_norm": 0.2988990653137114, "learning_rate": 0.00029139387792728917, "loss": 3.02128005027771, "step": 5218, "token_acc": 0.2969729906260684 }, { "epoch": 3.059220170038112, "grad_norm": 0.2689150517806915, "learning_rate": 0.00029138902369420087, "loss": 3.015134334564209, "step": 5219, "token_acc": 0.29755498583187157 }, { "epoch": 3.059806508355321, "grad_norm": 0.2712522978119572, "learning_rate": 0.0002913841681329532, "loss": 2.978469133377075, "step": 5220, "token_acc": 0.3040158257022521 }, { "epoch": 3.06039284667253, "grad_norm": 0.2825418468138627, "learning_rate": 0.0002913793112435918, "loss": 3.059384822845459, "step": 5221, "token_acc": 0.29446063335430533 }, { "epoch": 3.060979184989739, "grad_norm": 0.288214756304783, "learning_rate": 0.0002913744530261623, "loss": 3.016928195953369, "step": 5222, "token_acc": 0.30014532288327195 }, { "epoch": 3.0615655233069483, "grad_norm": 0.2924105011258072, "learning_rate": 0.0002913695934807103, "loss": 3.0282325744628906, "step": 5223, "token_acc": 0.29591279787611746 }, { "epoch": 3.062151861624157, "grad_norm": 0.264473777104705, "learning_rate": 0.0002913647326072815, "loss": 3.0511207580566406, "step": 5224, "token_acc": 0.2938636938620616 }, { "epoch": 3.062738199941366, "grad_norm": 0.30071511551175406, "learning_rate": 0.0002913598704059215, "loss": 2.998683452606201, "step": 5225, "token_acc": 0.29979520593456505 }, { "epoch": 3.063324538258575, "grad_norm": 0.28618236494985083, "learning_rate": 0.00029135500687667596, "loss": 3.031513214111328, "step": 5226, "token_acc": 0.29687933369775665 }, { "epoch": 3.0639108765757843, "grad_norm": 0.29239835351191285, "learning_rate": 0.0002913501420195907, "loss": 3.032356023788452, "step": 5227, "token_acc": 0.29617430090924685 }, { "epoch": 3.0644972148929934, "grad_norm": 0.290236886561205, "learning_rate": 0.0002913452758347113, "loss": 3.036712646484375, "step": 5228, "token_acc": 0.2952040930338912 }, { "epoch": 3.065083553210202, "grad_norm": 0.2551781744286322, "learning_rate": 0.00029134040832208346, "loss": 3.0097084045410156, "step": 5229, "token_acc": 0.2986012970031567 }, { "epoch": 3.065669891527411, "grad_norm": 0.29453987930318065, "learning_rate": 0.000291335539481753, "loss": 3.0628161430358887, "step": 5230, "token_acc": 0.2914844453727758 }, { "epoch": 3.0662562298446203, "grad_norm": 0.2835681300026734, "learning_rate": 0.0002913306693137655, "loss": 3.051583766937256, "step": 5231, "token_acc": 0.2919014337469339 }, { "epoch": 3.0668425681618294, "grad_norm": 0.27537926074896424, "learning_rate": 0.0002913257978181669, "loss": 3.0277092456817627, "step": 5232, "token_acc": 0.2972079969001016 }, { "epoch": 3.0674289064790385, "grad_norm": 0.27716796763703283, "learning_rate": 0.0002913209249950028, "loss": 2.994946002960205, "step": 5233, "token_acc": 0.3012121707390156 }, { "epoch": 3.068015244796247, "grad_norm": 0.2788195107814617, "learning_rate": 0.0002913160508443191, "loss": 3.039104461669922, "step": 5234, "token_acc": 0.29692723116259684 }, { "epoch": 3.0686015831134563, "grad_norm": 0.2805907582201766, "learning_rate": 0.0002913111753661615, "loss": 3.0080413818359375, "step": 5235, "token_acc": 0.29870630636302115 }, { "epoch": 3.0691879214306654, "grad_norm": 0.2638732534983754, "learning_rate": 0.00029130629856057586, "loss": 3.0364460945129395, "step": 5236, "token_acc": 0.29493502112415865 }, { "epoch": 3.0697742597478745, "grad_norm": 0.2866275494992333, "learning_rate": 0.0002913014204276079, "loss": 3.0305275917053223, "step": 5237, "token_acc": 0.2953641710686169 }, { "epoch": 3.0703605980650837, "grad_norm": 0.3076917137824521, "learning_rate": 0.00029129654096730353, "loss": 3.06573486328125, "step": 5238, "token_acc": 0.29007896611219824 }, { "epoch": 3.0709469363822928, "grad_norm": 0.289842673824468, "learning_rate": 0.0002912916601797086, "loss": 3.041769027709961, "step": 5239, "token_acc": 0.2942454567595154 }, { "epoch": 3.0715332746995014, "grad_norm": 0.3215543516365585, "learning_rate": 0.00029128677806486886, "loss": 3.0553603172302246, "step": 5240, "token_acc": 0.29310713014592477 }, { "epoch": 3.0721196130167105, "grad_norm": 0.27710120859539134, "learning_rate": 0.00029128189462283025, "loss": 3.0448684692382812, "step": 5241, "token_acc": 0.2955300075240294 }, { "epoch": 3.0727059513339197, "grad_norm": 0.30535787732976594, "learning_rate": 0.00029127700985363857, "loss": 3.0393378734588623, "step": 5242, "token_acc": 0.2949774273624098 }, { "epoch": 3.0732922896511288, "grad_norm": 0.32328940982525484, "learning_rate": 0.0002912721237573398, "loss": 3.026825428009033, "step": 5243, "token_acc": 0.2962305732066103 }, { "epoch": 3.073878627968338, "grad_norm": 0.2694907499946927, "learning_rate": 0.0002912672363339798, "loss": 3.0590291023254395, "step": 5244, "token_acc": 0.29266625032302523 }, { "epoch": 3.0744649662855466, "grad_norm": 0.3130075470677598, "learning_rate": 0.00029126234758360445, "loss": 3.01023006439209, "step": 5245, "token_acc": 0.29972973895777466 }, { "epoch": 3.0750513046027557, "grad_norm": 0.30088267606481633, "learning_rate": 0.0002912574575062597, "loss": 3.0530476570129395, "step": 5246, "token_acc": 0.29454358333486486 }, { "epoch": 3.0756376429199648, "grad_norm": 0.2728116851746608, "learning_rate": 0.00029125256610199155, "loss": 3.008101463317871, "step": 5247, "token_acc": 0.29781640254144454 }, { "epoch": 3.076223981237174, "grad_norm": 0.25518764771562163, "learning_rate": 0.0002912476733708458, "loss": 3.0320041179656982, "step": 5248, "token_acc": 0.29377881977042736 }, { "epoch": 3.076810319554383, "grad_norm": 0.2559697073960544, "learning_rate": 0.0002912427793128685, "loss": 2.984266996383667, "step": 5249, "token_acc": 0.301332083517329 }, { "epoch": 3.077396657871592, "grad_norm": 0.2866211764693689, "learning_rate": 0.00029123788392810564, "loss": 3.0712506771087646, "step": 5250, "token_acc": 0.2891027880743031 }, { "epoch": 3.077982996188801, "grad_norm": 0.3019561852962493, "learning_rate": 0.0002912329872166032, "loss": 3.045159339904785, "step": 5251, "token_acc": 0.29561285428885825 }, { "epoch": 3.07856933450601, "grad_norm": 0.2787327847685759, "learning_rate": 0.0002912280891784071, "loss": 3.012904405593872, "step": 5252, "token_acc": 0.2982584519960475 }, { "epoch": 3.079155672823219, "grad_norm": 0.3018055043127577, "learning_rate": 0.0002912231898135635, "loss": 3.0492429733276367, "step": 5253, "token_acc": 0.2929207409780381 }, { "epoch": 3.079742011140428, "grad_norm": 0.2712480711430383, "learning_rate": 0.0002912182891221182, "loss": 3.022808313369751, "step": 5254, "token_acc": 0.2956509827372281 }, { "epoch": 3.0803283494576372, "grad_norm": 0.28303728685351587, "learning_rate": 0.0002912133871041175, "loss": 3.073479413986206, "step": 5255, "token_acc": 0.2903218991229576 }, { "epoch": 3.080914687774846, "grad_norm": 0.26367324493965505, "learning_rate": 0.0002912084837596073, "loss": 3.027346611022949, "step": 5256, "token_acc": 0.29459692581038366 }, { "epoch": 3.081501026092055, "grad_norm": 0.29592893911711987, "learning_rate": 0.0002912035790886336, "loss": 3.021733045578003, "step": 5257, "token_acc": 0.2973739174600542 }, { "epoch": 3.082087364409264, "grad_norm": 0.2728082061170481, "learning_rate": 0.0002911986730912426, "loss": 3.0271127223968506, "step": 5258, "token_acc": 0.2964456058722925 }, { "epoch": 3.0826737027264732, "grad_norm": 0.2637405929146654, "learning_rate": 0.0002911937657674803, "loss": 3.0387301445007324, "step": 5259, "token_acc": 0.2964996362586133 }, { "epoch": 3.0832600410436823, "grad_norm": 0.3012274951073473, "learning_rate": 0.00029118885711739285, "loss": 3.0389609336853027, "step": 5260, "token_acc": 0.2937640715262504 }, { "epoch": 3.0838463793608915, "grad_norm": 0.28951819611741003, "learning_rate": 0.0002911839471410264, "loss": 3.0748915672302246, "step": 5261, "token_acc": 0.28973866062573617 }, { "epoch": 3.0844327176781, "grad_norm": 0.27851112935665323, "learning_rate": 0.0002911790358384269, "loss": 3.0424728393554688, "step": 5262, "token_acc": 0.2955294857131081 }, { "epoch": 3.0850190559953092, "grad_norm": 0.3395531145577602, "learning_rate": 0.0002911741232096407, "loss": 3.101142168045044, "step": 5263, "token_acc": 0.2880601241923303 }, { "epoch": 3.0856053943125183, "grad_norm": 0.28791130132430853, "learning_rate": 0.00029116920925471374, "loss": 3.0443780422210693, "step": 5264, "token_acc": 0.2961017472025128 }, { "epoch": 3.0861917326297275, "grad_norm": 0.2624137148651111, "learning_rate": 0.00029116429397369235, "loss": 3.0252060890197754, "step": 5265, "token_acc": 0.2978307861998472 }, { "epoch": 3.0867780709469366, "grad_norm": 0.27412093286040606, "learning_rate": 0.00029115937736662263, "loss": 3.0254530906677246, "step": 5266, "token_acc": 0.2970956477745991 }, { "epoch": 3.0873644092641452, "grad_norm": 0.26802666604916614, "learning_rate": 0.00029115445943355084, "loss": 3.106532096862793, "step": 5267, "token_acc": 0.28511577964272805 }, { "epoch": 3.0879507475813543, "grad_norm": 0.2834502545045432, "learning_rate": 0.00029114954017452305, "loss": 3.0601377487182617, "step": 5268, "token_acc": 0.29177304414180216 }, { "epoch": 3.0885370858985635, "grad_norm": 0.2665283934430523, "learning_rate": 0.00029114461958958555, "loss": 3.115663528442383, "step": 5269, "token_acc": 0.2850559699535245 }, { "epoch": 3.0891234242157726, "grad_norm": 0.32352971403250863, "learning_rate": 0.0002911396976787845, "loss": 3.04937481880188, "step": 5270, "token_acc": 0.2925367943468707 }, { "epoch": 3.0897097625329817, "grad_norm": 0.29724575038427375, "learning_rate": 0.00029113477444216623, "loss": 2.998047351837158, "step": 5271, "token_acc": 0.2993163006378029 }, { "epoch": 3.0902961008501904, "grad_norm": 0.2792337655021091, "learning_rate": 0.0002911298498797769, "loss": 3.008059501647949, "step": 5272, "token_acc": 0.2987994047144763 }, { "epoch": 3.0908824391673995, "grad_norm": 0.28302037125129015, "learning_rate": 0.00029112492399166283, "loss": 3.004110336303711, "step": 5273, "token_acc": 0.29970166759475214 }, { "epoch": 3.0914687774846086, "grad_norm": 0.2861043248853114, "learning_rate": 0.00029111999677787026, "loss": 3.030120849609375, "step": 5274, "token_acc": 0.29603712064208676 }, { "epoch": 3.0920551158018177, "grad_norm": 0.2898510173222872, "learning_rate": 0.0002911150682384455, "loss": 3.0446724891662598, "step": 5275, "token_acc": 0.29339746165912595 }, { "epoch": 3.092641454119027, "grad_norm": 0.2616553193854271, "learning_rate": 0.0002911101383734348, "loss": 3.023768186569214, "step": 5276, "token_acc": 0.29770671681861555 }, { "epoch": 3.093227792436236, "grad_norm": 0.26604392068591975, "learning_rate": 0.0002911052071828845, "loss": 3.068734884262085, "step": 5277, "token_acc": 0.29075208582568723 }, { "epoch": 3.0938141307534446, "grad_norm": 0.28024541318550394, "learning_rate": 0.0002911002746668409, "loss": 3.006502628326416, "step": 5278, "token_acc": 0.30001069938300223 }, { "epoch": 3.0944004690706537, "grad_norm": 0.23853992026649376, "learning_rate": 0.0002910953408253504, "loss": 3.0279572010040283, "step": 5279, "token_acc": 0.2963340877889503 }, { "epoch": 3.094986807387863, "grad_norm": 0.2683110507176322, "learning_rate": 0.0002910904056584592, "loss": 3.064157485961914, "step": 5280, "token_acc": 0.2924125614086543 }, { "epoch": 3.095573145705072, "grad_norm": 0.2580392770864522, "learning_rate": 0.0002910854691662139, "loss": 3.02544903755188, "step": 5281, "token_acc": 0.2980517705852497 }, { "epoch": 3.096159484022281, "grad_norm": 0.25705252987444965, "learning_rate": 0.0002910805313486607, "loss": 3.047605037689209, "step": 5282, "token_acc": 0.2936258405464214 }, { "epoch": 3.0967458223394897, "grad_norm": 0.2686810867968365, "learning_rate": 0.000291075592205846, "loss": 3.029672622680664, "step": 5283, "token_acc": 0.2948142980965197 }, { "epoch": 3.097332160656699, "grad_norm": 0.25798674993215, "learning_rate": 0.0002910706517378162, "loss": 3.0323455333709717, "step": 5284, "token_acc": 0.2957712142993576 }, { "epoch": 3.097918498973908, "grad_norm": 0.26529552182636706, "learning_rate": 0.0002910657099446177, "loss": 3.0670180320739746, "step": 5285, "token_acc": 0.29174760107759645 }, { "epoch": 3.098504837291117, "grad_norm": 0.26072530967769364, "learning_rate": 0.000291060766826297, "loss": 3.051422595977783, "step": 5286, "token_acc": 0.2943143118737793 }, { "epoch": 3.099091175608326, "grad_norm": 0.2662723923015808, "learning_rate": 0.00029105582238290046, "loss": 2.9574272632598877, "step": 5287, "token_acc": 0.3073538383462097 }, { "epoch": 3.099677513925535, "grad_norm": 0.25998451113702425, "learning_rate": 0.0002910508766144745, "loss": 3.0511386394500732, "step": 5288, "token_acc": 0.2935157178351518 }, { "epoch": 3.100263852242744, "grad_norm": 0.2653944095854091, "learning_rate": 0.00029104592952106567, "loss": 3.054295063018799, "step": 5289, "token_acc": 0.29177011494252875 }, { "epoch": 3.100850190559953, "grad_norm": 0.25216494244240106, "learning_rate": 0.00029104098110272034, "loss": 3.0196685791015625, "step": 5290, "token_acc": 0.2969948857123508 }, { "epoch": 3.101436528877162, "grad_norm": 0.2670381508079933, "learning_rate": 0.0002910360313594851, "loss": 3.029458522796631, "step": 5291, "token_acc": 0.2967890237007583 }, { "epoch": 3.1020228671943713, "grad_norm": 0.2648730170671892, "learning_rate": 0.00029103108029140636, "loss": 3.054354429244995, "step": 5292, "token_acc": 0.2946121846661607 }, { "epoch": 3.1026092055115804, "grad_norm": 0.2635941475656157, "learning_rate": 0.0002910261278985307, "loss": 3.018207311630249, "step": 5293, "token_acc": 0.29725975567407315 }, { "epoch": 3.103195543828789, "grad_norm": 0.2500654426960767, "learning_rate": 0.0002910211741809046, "loss": 3.0683445930480957, "step": 5294, "token_acc": 0.2904388587232831 }, { "epoch": 3.103781882145998, "grad_norm": 0.2602962232334179, "learning_rate": 0.00029101621913857454, "loss": 3.0397167205810547, "step": 5295, "token_acc": 0.29478745005567564 }, { "epoch": 3.1043682204632073, "grad_norm": 0.2633219573744176, "learning_rate": 0.0002910112627715872, "loss": 3.0127696990966797, "step": 5296, "token_acc": 0.2986773426262554 }, { "epoch": 3.1049545587804164, "grad_norm": 0.24200877709083898, "learning_rate": 0.000291006305079989, "loss": 3.0220041275024414, "step": 5297, "token_acc": 0.29768044812218947 }, { "epoch": 3.1055408970976255, "grad_norm": 0.2534617035429079, "learning_rate": 0.0002910013460638266, "loss": 3.021932601928711, "step": 5298, "token_acc": 0.29764065335753176 }, { "epoch": 3.106127235414834, "grad_norm": 0.2659963699826802, "learning_rate": 0.0002909963857231466, "loss": 3.060288906097412, "step": 5299, "token_acc": 0.29008916495516707 }, { "epoch": 3.1067135737320433, "grad_norm": 0.29034979606634165, "learning_rate": 0.00029099142405799547, "loss": 3.0164363384246826, "step": 5300, "token_acc": 0.29796215544183086 }, { "epoch": 3.1072999120492524, "grad_norm": 0.2914038929001619, "learning_rate": 0.00029098646106841996, "loss": 3.094759464263916, "step": 5301, "token_acc": 0.2892385325247455 }, { "epoch": 3.1078862503664615, "grad_norm": 0.29332768010568017, "learning_rate": 0.0002909814967544666, "loss": 3.0512495040893555, "step": 5302, "token_acc": 0.29191861970321875 }, { "epoch": 3.1084725886836706, "grad_norm": 0.2653524462581572, "learning_rate": 0.00029097653111618204, "loss": 3.033874988555908, "step": 5303, "token_acc": 0.2954474218328495 }, { "epoch": 3.1090589270008797, "grad_norm": 0.3216990128984195, "learning_rate": 0.000290971564153613, "loss": 3.034513473510742, "step": 5304, "token_acc": 0.29418865820494344 }, { "epoch": 3.1096452653180884, "grad_norm": 0.29215720013142954, "learning_rate": 0.00029096659586680596, "loss": 3.055514335632324, "step": 5305, "token_acc": 0.29384500569543187 }, { "epoch": 3.1102316036352975, "grad_norm": 0.2661209457879693, "learning_rate": 0.0002909616262558078, "loss": 3.0203256607055664, "step": 5306, "token_acc": 0.2977842580130602 }, { "epoch": 3.1108179419525066, "grad_norm": 0.31991968140128063, "learning_rate": 0.00029095665532066507, "loss": 3.0420079231262207, "step": 5307, "token_acc": 0.2950226499984592 }, { "epoch": 3.1114042802697157, "grad_norm": 0.27564936949255975, "learning_rate": 0.00029095168306142455, "loss": 3.016251564025879, "step": 5308, "token_acc": 0.2988576704920195 }, { "epoch": 3.111990618586925, "grad_norm": 0.26903559222941487, "learning_rate": 0.00029094670947813286, "loss": 3.068302631378174, "step": 5309, "token_acc": 0.2918855142908841 }, { "epoch": 3.1125769569041335, "grad_norm": 0.2669738442698242, "learning_rate": 0.0002909417345708368, "loss": 3.0516302585601807, "step": 5310, "token_acc": 0.2929250729808628 }, { "epoch": 3.1131632952213426, "grad_norm": 0.23858634997733813, "learning_rate": 0.000290936758339583, "loss": 3.031249761581421, "step": 5311, "token_acc": 0.2978399611177066 }, { "epoch": 3.1137496335385517, "grad_norm": 0.26892259920206113, "learning_rate": 0.00029093178078441837, "loss": 2.981503963470459, "step": 5312, "token_acc": 0.30284342524703056 }, { "epoch": 3.114335971855761, "grad_norm": 0.2714404817560093, "learning_rate": 0.00029092680190538953, "loss": 3.0349693298339844, "step": 5313, "token_acc": 0.2954997051667666 }, { "epoch": 3.11492231017297, "grad_norm": 0.2846947492250074, "learning_rate": 0.0002909218217025433, "loss": 3.0137877464294434, "step": 5314, "token_acc": 0.2975225900219336 }, { "epoch": 3.115508648490179, "grad_norm": 0.28823721445773864, "learning_rate": 0.00029091684017592634, "loss": 3.0339722633361816, "step": 5315, "token_acc": 0.29658804957721824 }, { "epoch": 3.1160949868073877, "grad_norm": 0.31741437808371786, "learning_rate": 0.00029091185732558567, "loss": 3.025500774383545, "step": 5316, "token_acc": 0.29732495172529594 }, { "epoch": 3.116681325124597, "grad_norm": 0.2757561053419513, "learning_rate": 0.00029090687315156793, "loss": 3.0023460388183594, "step": 5317, "token_acc": 0.2985872099123052 }, { "epoch": 3.117267663441806, "grad_norm": 0.28865608109861973, "learning_rate": 0.00029090188765391997, "loss": 3.043158769607544, "step": 5318, "token_acc": 0.29525406057384673 }, { "epoch": 3.117854001759015, "grad_norm": 0.28475799256125384, "learning_rate": 0.0002908969008326887, "loss": 3.01466965675354, "step": 5319, "token_acc": 0.2975835688828807 }, { "epoch": 3.118440340076224, "grad_norm": 0.27167308381708766, "learning_rate": 0.0002908919126879209, "loss": 2.966885566711426, "step": 5320, "token_acc": 0.30620411875527836 }, { "epoch": 3.119026678393433, "grad_norm": 0.2877389023490949, "learning_rate": 0.0002908869232196634, "loss": 3.0306015014648438, "step": 5321, "token_acc": 0.2959798101495928 }, { "epoch": 3.119613016710642, "grad_norm": 0.28857422370392966, "learning_rate": 0.0002908819324279631, "loss": 3.001605987548828, "step": 5322, "token_acc": 0.2997729818412726 }, { "epoch": 3.120199355027851, "grad_norm": 0.28685639084763337, "learning_rate": 0.00029087694031286693, "loss": 3.020561695098877, "step": 5323, "token_acc": 0.29820995626246816 }, { "epoch": 3.12078569334506, "grad_norm": 0.26485751034770716, "learning_rate": 0.0002908719468744217, "loss": 2.9896717071533203, "step": 5324, "token_acc": 0.30070886220727266 }, { "epoch": 3.1213720316622693, "grad_norm": 0.25036735665863324, "learning_rate": 0.0002908669521126744, "loss": 2.986374616622925, "step": 5325, "token_acc": 0.30080679760995366 }, { "epoch": 3.121958369979478, "grad_norm": 0.2798110967066586, "learning_rate": 0.0002908619560276719, "loss": 3.0329840183258057, "step": 5326, "token_acc": 0.29525755181230645 }, { "epoch": 3.122544708296687, "grad_norm": 0.28123915862052506, "learning_rate": 0.0002908569586194611, "loss": 3.0159850120544434, "step": 5327, "token_acc": 0.297900702384756 }, { "epoch": 3.123131046613896, "grad_norm": 0.27559481323307433, "learning_rate": 0.000290851959888089, "loss": 3.0007190704345703, "step": 5328, "token_acc": 0.2997485753468777 }, { "epoch": 3.1237173849311053, "grad_norm": 0.2814679782227896, "learning_rate": 0.00029084695983360256, "loss": 3.042935371398926, "step": 5329, "token_acc": 0.29395242234630453 }, { "epoch": 3.1243037232483144, "grad_norm": 0.2765345249596503, "learning_rate": 0.00029084195845604865, "loss": 3.067530393600464, "step": 5330, "token_acc": 0.29082318147729047 }, { "epoch": 3.1248900615655235, "grad_norm": 0.3066915918381733, "learning_rate": 0.0002908369557554744, "loss": 3.0139434337615967, "step": 5331, "token_acc": 0.2991293405608086 }, { "epoch": 3.125476399882732, "grad_norm": 0.29974822109018, "learning_rate": 0.00029083195173192674, "loss": 2.991257667541504, "step": 5332, "token_acc": 0.3010812893283678 }, { "epoch": 3.1260627381999413, "grad_norm": 0.30507686688699515, "learning_rate": 0.00029082694638545264, "loss": 3.0258185863494873, "step": 5333, "token_acc": 0.29595493441460025 }, { "epoch": 3.1266490765171504, "grad_norm": 0.2899025326024655, "learning_rate": 0.0002908219397160991, "loss": 3.0256729125976562, "step": 5334, "token_acc": 0.29662606406955117 }, { "epoch": 3.1272354148343595, "grad_norm": 0.2621229607588633, "learning_rate": 0.00029081693172391325, "loss": 3.0674326419830322, "step": 5335, "token_acc": 0.29262342173825906 }, { "epoch": 3.1278217531515686, "grad_norm": 0.2716514394437105, "learning_rate": 0.00029081192240894207, "loss": 3.0238611698150635, "step": 5336, "token_acc": 0.29641827912721286 }, { "epoch": 3.1284080914687773, "grad_norm": 0.2909894649130366, "learning_rate": 0.00029080691177123263, "loss": 3.021062135696411, "step": 5337, "token_acc": 0.2970664539070797 }, { "epoch": 3.1289944297859864, "grad_norm": 0.28317424269579244, "learning_rate": 0.00029080189981083195, "loss": 3.0518617630004883, "step": 5338, "token_acc": 0.2933875592185718 }, { "epoch": 3.1295807681031955, "grad_norm": 0.2773546387978314, "learning_rate": 0.00029079688652778723, "loss": 3.0442910194396973, "step": 5339, "token_acc": 0.2930961020210436 }, { "epoch": 3.1301671064204046, "grad_norm": 0.2861116317668745, "learning_rate": 0.0002907918719221454, "loss": 3.005298614501953, "step": 5340, "token_acc": 0.30018897169671105 }, { "epoch": 3.1307534447376137, "grad_norm": 0.29064232662099465, "learning_rate": 0.00029078685599395374, "loss": 3.017728567123413, "step": 5341, "token_acc": 0.2976612708255292 }, { "epoch": 3.1313397830548224, "grad_norm": 0.26764454260736853, "learning_rate": 0.00029078183874325925, "loss": 3.065646171569824, "step": 5342, "token_acc": 0.291617418831002 }, { "epoch": 3.1319261213720315, "grad_norm": 0.2583612376154264, "learning_rate": 0.00029077682017010904, "loss": 3.0032882690429688, "step": 5343, "token_acc": 0.29911191546254734 }, { "epoch": 3.1325124596892406, "grad_norm": 0.24419268896215868, "learning_rate": 0.0002907718002745504, "loss": 3.0323257446289062, "step": 5344, "token_acc": 0.2963721050588692 }, { "epoch": 3.1330987980064497, "grad_norm": 0.2692959304495686, "learning_rate": 0.0002907667790566303, "loss": 3.0436573028564453, "step": 5345, "token_acc": 0.2934602085894925 }, { "epoch": 3.133685136323659, "grad_norm": 0.2879031454054452, "learning_rate": 0.0002907617565163961, "loss": 3.056293487548828, "step": 5346, "token_acc": 0.291785800823843 }, { "epoch": 3.134271474640868, "grad_norm": 0.26252872156172335, "learning_rate": 0.0002907567326538948, "loss": 3.0544304847717285, "step": 5347, "token_acc": 0.29200598664642247 }, { "epoch": 3.1348578129580766, "grad_norm": 0.3001944020911754, "learning_rate": 0.00029075170746917364, "loss": 3.0910696983337402, "step": 5348, "token_acc": 0.28824532313146173 }, { "epoch": 3.1354441512752858, "grad_norm": 0.26686152772090627, "learning_rate": 0.0002907466809622799, "loss": 3.033529758453369, "step": 5349, "token_acc": 0.29546787343467 }, { "epoch": 3.136030489592495, "grad_norm": 0.26608855188672215, "learning_rate": 0.00029074165313326076, "loss": 3.018584966659546, "step": 5350, "token_acc": 0.29791833551720664 }, { "epoch": 3.136616827909704, "grad_norm": 0.31266874620828916, "learning_rate": 0.0002907366239821634, "loss": 2.984137773513794, "step": 5351, "token_acc": 0.3014846072999794 }, { "epoch": 3.137203166226913, "grad_norm": 0.2853613582308391, "learning_rate": 0.0002907315935090351, "loss": 3.0114269256591797, "step": 5352, "token_acc": 0.29845401389875087 }, { "epoch": 3.1377895045441218, "grad_norm": 0.26666826878229777, "learning_rate": 0.00029072656171392315, "loss": 3.0533699989318848, "step": 5353, "token_acc": 0.2928744930606829 }, { "epoch": 3.138375842861331, "grad_norm": 0.2514837645318631, "learning_rate": 0.0002907215285968748, "loss": 3.0496621131896973, "step": 5354, "token_acc": 0.2920772891936726 }, { "epoch": 3.13896218117854, "grad_norm": 0.31350385719612367, "learning_rate": 0.0002907164941579373, "loss": 3.0495119094848633, "step": 5355, "token_acc": 0.29239103796096844 }, { "epoch": 3.139548519495749, "grad_norm": 0.28469027868181157, "learning_rate": 0.0002907114583971579, "loss": 3.0614070892333984, "step": 5356, "token_acc": 0.2933501997336884 }, { "epoch": 3.140134857812958, "grad_norm": 0.26943323416109544, "learning_rate": 0.00029070642131458403, "loss": 3.0570504665374756, "step": 5357, "token_acc": 0.2939087709699989 }, { "epoch": 3.1407211961301673, "grad_norm": 0.31653381839963846, "learning_rate": 0.0002907013829102629, "loss": 3.0413155555725098, "step": 5358, "token_acc": 0.2930761388280819 }, { "epoch": 3.141307534447376, "grad_norm": 0.24777844895424422, "learning_rate": 0.0002906963431842419, "loss": 3.0301499366760254, "step": 5359, "token_acc": 0.2943010461051783 }, { "epoch": 3.141893872764585, "grad_norm": 0.2584934993644365, "learning_rate": 0.0002906913021365683, "loss": 3.0104033946990967, "step": 5360, "token_acc": 0.29835608790004847 }, { "epoch": 3.142480211081794, "grad_norm": 0.27211892418997763, "learning_rate": 0.00029068625976728956, "loss": 3.04984450340271, "step": 5361, "token_acc": 0.2933301036036718 }, { "epoch": 3.1430665493990033, "grad_norm": 0.3064652902989057, "learning_rate": 0.00029068121607645294, "loss": 3.0345640182495117, "step": 5362, "token_acc": 0.2956272379448095 }, { "epoch": 3.1436528877162124, "grad_norm": 0.28620528785672883, "learning_rate": 0.00029067617106410593, "loss": 3.050757646560669, "step": 5363, "token_acc": 0.2953637671599308 }, { "epoch": 3.144239226033421, "grad_norm": 0.25219912609311596, "learning_rate": 0.00029067112473029575, "loss": 3.0174055099487305, "step": 5364, "token_acc": 0.3003174721051557 }, { "epoch": 3.14482556435063, "grad_norm": 0.26622879607773675, "learning_rate": 0.00029066607707507, "loss": 3.0639500617980957, "step": 5365, "token_acc": 0.28998510419058093 }, { "epoch": 3.1454119026678393, "grad_norm": 0.26544682544027964, "learning_rate": 0.00029066102809847597, "loss": 3.0324273109436035, "step": 5366, "token_acc": 0.2955882390471597 }, { "epoch": 3.1459982409850484, "grad_norm": 0.2614754878789053, "learning_rate": 0.0002906559778005612, "loss": 3.0420727729797363, "step": 5367, "token_acc": 0.2936059087727026 }, { "epoch": 3.1465845793022575, "grad_norm": 0.27349963247134323, "learning_rate": 0.00029065092618137296, "loss": 3.0477142333984375, "step": 5368, "token_acc": 0.29385631100022963 }, { "epoch": 3.1471709176194667, "grad_norm": 0.2943206293677598, "learning_rate": 0.0002906458732409588, "loss": 3.0173418521881104, "step": 5369, "token_acc": 0.29720723033798574 }, { "epoch": 3.1477572559366753, "grad_norm": 0.27339741580279997, "learning_rate": 0.0002906408189793662, "loss": 3.0467071533203125, "step": 5370, "token_acc": 0.2931114446216219 }, { "epoch": 3.1483435942538844, "grad_norm": 0.31017195464839714, "learning_rate": 0.0002906357633966426, "loss": 2.979586601257324, "step": 5371, "token_acc": 0.3030140313005936 }, { "epoch": 3.1489299325710935, "grad_norm": 0.27546916774469465, "learning_rate": 0.0002906307064928356, "loss": 3.0591816902160645, "step": 5372, "token_acc": 0.29251704221576136 }, { "epoch": 3.1495162708883027, "grad_norm": 0.28983717663959646, "learning_rate": 0.00029062564826799254, "loss": 3.0315098762512207, "step": 5373, "token_acc": 0.29758352662620463 }, { "epoch": 3.1501026092055118, "grad_norm": 0.27898695138237106, "learning_rate": 0.000290620588722161, "loss": 3.0246715545654297, "step": 5374, "token_acc": 0.2970512762112999 }, { "epoch": 3.1506889475227204, "grad_norm": 0.2996184907547014, "learning_rate": 0.00029061552785538856, "loss": 3.080068588256836, "step": 5375, "token_acc": 0.28924052097068964 }, { "epoch": 3.1512752858399296, "grad_norm": 0.2409592820298718, "learning_rate": 0.0002906104656677227, "loss": 3.025758743286133, "step": 5376, "token_acc": 0.29551797253903517 }, { "epoch": 3.1518616241571387, "grad_norm": 0.29808990099867594, "learning_rate": 0.000290605402159211, "loss": 3.0626211166381836, "step": 5377, "token_acc": 0.2912986619416453 }, { "epoch": 3.1524479624743478, "grad_norm": 0.28497633646817105, "learning_rate": 0.000290600337329901, "loss": 3.028475046157837, "step": 5378, "token_acc": 0.2958579881656805 }, { "epoch": 3.153034300791557, "grad_norm": 0.25883661850915174, "learning_rate": 0.0002905952711798403, "loss": 3.07020902633667, "step": 5379, "token_acc": 0.28953010316039046 }, { "epoch": 3.1536206391087656, "grad_norm": 0.284355941957499, "learning_rate": 0.00029059020370907643, "loss": 3.069153308868408, "step": 5380, "token_acc": 0.2911276728114763 }, { "epoch": 3.1542069774259747, "grad_norm": 0.27600335514483687, "learning_rate": 0.0002905851349176571, "loss": 3.044420003890991, "step": 5381, "token_acc": 0.2924093051639241 }, { "epoch": 3.154793315743184, "grad_norm": 0.26752599137582783, "learning_rate": 0.00029058006480562986, "loss": 3.0292701721191406, "step": 5382, "token_acc": 0.2969613188951181 }, { "epoch": 3.155379654060393, "grad_norm": 0.24568780454718595, "learning_rate": 0.00029057499337304234, "loss": 3.0193278789520264, "step": 5383, "token_acc": 0.2964344116228852 }, { "epoch": 3.155965992377602, "grad_norm": 0.3021540054641473, "learning_rate": 0.0002905699206199422, "loss": 3.0743165016174316, "step": 5384, "token_acc": 0.2901781703056538 }, { "epoch": 3.1565523306948107, "grad_norm": 0.2697057503939792, "learning_rate": 0.0002905648465463771, "loss": 3.041522264480591, "step": 5385, "token_acc": 0.2935100479904019 }, { "epoch": 3.15713866901202, "grad_norm": 0.2567815942260553, "learning_rate": 0.0002905597711523946, "loss": 3.0842158794403076, "step": 5386, "token_acc": 0.28950353870481665 }, { "epoch": 3.157725007329229, "grad_norm": 0.27295455417085845, "learning_rate": 0.0002905546944380425, "loss": 3.037230968475342, "step": 5387, "token_acc": 0.2934696672894184 }, { "epoch": 3.158311345646438, "grad_norm": 0.2759351507126541, "learning_rate": 0.0002905496164033685, "loss": 3.048790693283081, "step": 5388, "token_acc": 0.29569811244737215 }, { "epoch": 3.158897683963647, "grad_norm": 0.27960265665746, "learning_rate": 0.00029054453704842017, "loss": 3.076842784881592, "step": 5389, "token_acc": 0.2905825828302772 }, { "epoch": 3.1594840222808562, "grad_norm": 0.2719123286955516, "learning_rate": 0.0002905394563732453, "loss": 3.0441150665283203, "step": 5390, "token_acc": 0.2953043123555314 }, { "epoch": 3.160070360598065, "grad_norm": 0.2515209794493845, "learning_rate": 0.00029053437437789165, "loss": 3.0664401054382324, "step": 5391, "token_acc": 0.29121228421927725 }, { "epoch": 3.160656698915274, "grad_norm": 0.2598801143217189, "learning_rate": 0.00029052929106240696, "loss": 3.0633704662323, "step": 5392, "token_acc": 0.2923338071658049 }, { "epoch": 3.161243037232483, "grad_norm": 0.27793707679257335, "learning_rate": 0.0002905242064268389, "loss": 3.051159381866455, "step": 5393, "token_acc": 0.2919332587274288 }, { "epoch": 3.1618293755496922, "grad_norm": 0.29984634921823927, "learning_rate": 0.00029051912047123524, "loss": 3.047536849975586, "step": 5394, "token_acc": 0.29249922868913447 }, { "epoch": 3.1624157138669013, "grad_norm": 0.30388645272033243, "learning_rate": 0.0002905140331956439, "loss": 3.006138801574707, "step": 5395, "token_acc": 0.29989969401608946 }, { "epoch": 3.16300205218411, "grad_norm": 0.2651067217519387, "learning_rate": 0.00029050894460011246, "loss": 3.0379276275634766, "step": 5396, "token_acc": 0.29439573390834667 }, { "epoch": 3.163588390501319, "grad_norm": 0.23287997301780414, "learning_rate": 0.00029050385468468886, "loss": 3.059835910797119, "step": 5397, "token_acc": 0.2907920284523585 }, { "epoch": 3.1641747288185282, "grad_norm": 0.25642931909211886, "learning_rate": 0.0002904987634494209, "loss": 3.0495285987854004, "step": 5398, "token_acc": 0.29227074692234545 }, { "epoch": 3.1647610671357373, "grad_norm": 0.23633083670497035, "learning_rate": 0.00029049367089435636, "loss": 3.036954879760742, "step": 5399, "token_acc": 0.2954503284723548 }, { "epoch": 3.1653474054529465, "grad_norm": 0.23063904805409077, "learning_rate": 0.00029048857701954314, "loss": 3.043638229370117, "step": 5400, "token_acc": 0.2952816803746493 }, { "epoch": 3.1659337437701556, "grad_norm": 0.23850557502820416, "learning_rate": 0.000290483481825029, "loss": 3.064352035522461, "step": 5401, "token_acc": 0.2901427234856072 }, { "epoch": 3.1665200820873642, "grad_norm": 0.2399768774216624, "learning_rate": 0.0002904783853108619, "loss": 3.0597896575927734, "step": 5402, "token_acc": 0.29016302918748355 }, { "epoch": 3.1671064204045734, "grad_norm": 0.2673562749099399, "learning_rate": 0.0002904732874770896, "loss": 3.0342419147491455, "step": 5403, "token_acc": 0.2955557066962903 }, { "epoch": 3.1676927587217825, "grad_norm": 0.25975422664460696, "learning_rate": 0.00029046818832376007, "loss": 3.0449600219726562, "step": 5404, "token_acc": 0.2935151967849515 }, { "epoch": 3.1682790970389916, "grad_norm": 0.28275483028053316, "learning_rate": 0.0002904630878509212, "loss": 3.016857862472534, "step": 5405, "token_acc": 0.2983555227984339 }, { "epoch": 3.1688654353562007, "grad_norm": 0.28010196531031606, "learning_rate": 0.0002904579860586209, "loss": 3.0383315086364746, "step": 5406, "token_acc": 0.2967374595522771 }, { "epoch": 3.1694517736734094, "grad_norm": 0.29218044547206484, "learning_rate": 0.0002904528829469071, "loss": 3.0445313453674316, "step": 5407, "token_acc": 0.2930165890263696 }, { "epoch": 3.1700381119906185, "grad_norm": 0.27411002913094284, "learning_rate": 0.00029044777851582775, "loss": 3.0499987602233887, "step": 5408, "token_acc": 0.29396396745948605 }, { "epoch": 3.1706244503078276, "grad_norm": 0.2552897715355726, "learning_rate": 0.00029044267276543074, "loss": 3.037863254547119, "step": 5409, "token_acc": 0.29415527263795643 }, { "epoch": 3.1712107886250367, "grad_norm": 0.2627861453647873, "learning_rate": 0.0002904375656957641, "loss": 3.0614705085754395, "step": 5410, "token_acc": 0.2910051576913095 }, { "epoch": 3.171797126942246, "grad_norm": 0.2720362615129391, "learning_rate": 0.0002904324573068757, "loss": 3.0795886516571045, "step": 5411, "token_acc": 0.2877877661955273 }, { "epoch": 3.172383465259455, "grad_norm": 0.3035982086941271, "learning_rate": 0.00029042734759881367, "loss": 3.0455641746520996, "step": 5412, "token_acc": 0.2945114098203888 }, { "epoch": 3.1729698035766636, "grad_norm": 0.2615378869609572, "learning_rate": 0.00029042223657162593, "loss": 3.029334783554077, "step": 5413, "token_acc": 0.295146862804927 }, { "epoch": 3.1735561418938727, "grad_norm": 0.27318306972811945, "learning_rate": 0.0002904171242253605, "loss": 3.0196731090545654, "step": 5414, "token_acc": 0.29711179776993996 }, { "epoch": 3.174142480211082, "grad_norm": 0.31264625516040356, "learning_rate": 0.0002904120105600654, "loss": 3.062580108642578, "step": 5415, "token_acc": 0.29244153848902227 }, { "epoch": 3.174728818528291, "grad_norm": 0.30064617982550573, "learning_rate": 0.00029040689557578866, "loss": 3.0004348754882812, "step": 5416, "token_acc": 0.3005659283191981 }, { "epoch": 3.1753151568455, "grad_norm": 0.2450276221308275, "learning_rate": 0.0002904017792725783, "loss": 3.0122342109680176, "step": 5417, "token_acc": 0.2982122969593814 }, { "epoch": 3.1759014951627087, "grad_norm": 0.2697012010480371, "learning_rate": 0.00029039666165048245, "loss": 3.0764012336730957, "step": 5418, "token_acc": 0.29055975238694787 }, { "epoch": 3.176487833479918, "grad_norm": 0.29496868354150235, "learning_rate": 0.00029039154270954915, "loss": 2.9907400608062744, "step": 5419, "token_acc": 0.3018730969479114 }, { "epoch": 3.177074171797127, "grad_norm": 0.29589947118490356, "learning_rate": 0.0002903864224498265, "loss": 3.052560329437256, "step": 5420, "token_acc": 0.29297849137087323 }, { "epoch": 3.177660510114336, "grad_norm": 0.2890579235434179, "learning_rate": 0.0002903813008713626, "loss": 3.0558905601501465, "step": 5421, "token_acc": 0.29126228680212907 }, { "epoch": 3.178246848431545, "grad_norm": 0.30856421629343994, "learning_rate": 0.00029037617797420554, "loss": 3.02246356010437, "step": 5422, "token_acc": 0.2970672563868496 }, { "epoch": 3.1788331867487543, "grad_norm": 0.28135281418437047, "learning_rate": 0.00029037105375840337, "loss": 3.046564817428589, "step": 5423, "token_acc": 0.29374221421187946 }, { "epoch": 3.179419525065963, "grad_norm": 0.2935846657523766, "learning_rate": 0.0002903659282240044, "loss": 3.0378315448760986, "step": 5424, "token_acc": 0.2947460167860734 }, { "epoch": 3.180005863383172, "grad_norm": 0.29372605040584476, "learning_rate": 0.0002903608013710566, "loss": 3.066006660461426, "step": 5425, "token_acc": 0.28993622728553686 }, { "epoch": 3.180592201700381, "grad_norm": 0.25132738097446183, "learning_rate": 0.00029035567319960826, "loss": 3.0172410011291504, "step": 5426, "token_acc": 0.29805499523360596 }, { "epoch": 3.1811785400175903, "grad_norm": 0.2637122367677759, "learning_rate": 0.0002903505437097075, "loss": 3.0304558277130127, "step": 5427, "token_acc": 0.29510546213279343 }, { "epoch": 3.1817648783347994, "grad_norm": 0.28551598582098087, "learning_rate": 0.0002903454129014025, "loss": 3.064781427383423, "step": 5428, "token_acc": 0.29104862337556275 }, { "epoch": 3.182351216652008, "grad_norm": 0.31102106935712576, "learning_rate": 0.00029034028077474144, "loss": 3.0240018367767334, "step": 5429, "token_acc": 0.29745801616364353 }, { "epoch": 3.182937554969217, "grad_norm": 0.28845413054449426, "learning_rate": 0.00029033514732977253, "loss": 3.067995071411133, "step": 5430, "token_acc": 0.290314409407719 }, { "epoch": 3.1835238932864263, "grad_norm": 0.306178067736288, "learning_rate": 0.0002903300125665441, "loss": 3.012289524078369, "step": 5431, "token_acc": 0.2994647201946472 }, { "epoch": 3.1841102316036354, "grad_norm": 0.287390763312986, "learning_rate": 0.00029032487648510423, "loss": 3.053600311279297, "step": 5432, "token_acc": 0.2925086156332522 }, { "epoch": 3.1846965699208445, "grad_norm": 0.3108739635655479, "learning_rate": 0.0002903197390855013, "loss": 3.027193546295166, "step": 5433, "token_acc": 0.2958935040251895 }, { "epoch": 3.185282908238053, "grad_norm": 0.268665878807294, "learning_rate": 0.00029031460036778344, "loss": 3.0519514083862305, "step": 5434, "token_acc": 0.293075507105753 }, { "epoch": 3.1858692465552623, "grad_norm": 0.26836511236589805, "learning_rate": 0.000290309460331999, "loss": 3.021183967590332, "step": 5435, "token_acc": 0.29670797326375314 }, { "epoch": 3.1864555848724714, "grad_norm": 0.2763591767790281, "learning_rate": 0.00029030431897819625, "loss": 3.0452303886413574, "step": 5436, "token_acc": 0.29407998517884826 }, { "epoch": 3.1870419231896805, "grad_norm": 0.25012342390917064, "learning_rate": 0.0002902991763064235, "loss": 3.0219974517822266, "step": 5437, "token_acc": 0.29691974763678647 }, { "epoch": 3.1876282615068896, "grad_norm": 0.30717661013167247, "learning_rate": 0.00029029403231672907, "loss": 3.011460542678833, "step": 5438, "token_acc": 0.2992396965047956 }, { "epoch": 3.1882145998240983, "grad_norm": 0.23360642491833097, "learning_rate": 0.0002902888870091612, "loss": 3.031067371368408, "step": 5439, "token_acc": 0.29602434767803176 }, { "epoch": 3.1888009381413074, "grad_norm": 0.23975529012701033, "learning_rate": 0.0002902837403837683, "loss": 3.069235324859619, "step": 5440, "token_acc": 0.2912979569819972 }, { "epoch": 3.1893872764585165, "grad_norm": 0.23044013977346534, "learning_rate": 0.00029027859244059874, "loss": 3.056037425994873, "step": 5441, "token_acc": 0.29164073392600404 }, { "epoch": 3.1899736147757256, "grad_norm": 0.2672831892400742, "learning_rate": 0.00029027344317970075, "loss": 3.0441417694091797, "step": 5442, "token_acc": 0.2949424672196949 }, { "epoch": 3.1905599530929347, "grad_norm": 0.27655170021198183, "learning_rate": 0.00029026829260112285, "loss": 3.0650463104248047, "step": 5443, "token_acc": 0.29039545691340385 }, { "epoch": 3.191146291410144, "grad_norm": 0.2797173201953544, "learning_rate": 0.00029026314070491335, "loss": 3.0708415508270264, "step": 5444, "token_acc": 0.29222022439776185 }, { "epoch": 3.1917326297273525, "grad_norm": 0.26943236834101214, "learning_rate": 0.0002902579874911206, "loss": 3.0334792137145996, "step": 5445, "token_acc": 0.294558319969939 }, { "epoch": 3.1923189680445616, "grad_norm": 0.2597255027702492, "learning_rate": 0.00029025283295979306, "loss": 3.0589942932128906, "step": 5446, "token_acc": 0.2920280548200062 }, { "epoch": 3.1929053063617707, "grad_norm": 0.2537377800917062, "learning_rate": 0.0002902476771109792, "loss": 3.097214460372925, "step": 5447, "token_acc": 0.2865499248910522 }, { "epoch": 3.19349164467898, "grad_norm": 0.2777055243287423, "learning_rate": 0.0002902425199447273, "loss": 3.0852646827697754, "step": 5448, "token_acc": 0.28888888888888886 }, { "epoch": 3.194077982996189, "grad_norm": 0.2823010273558741, "learning_rate": 0.00029023736146108604, "loss": 3.0226335525512695, "step": 5449, "token_acc": 0.2968397406443049 }, { "epoch": 3.1946643213133976, "grad_norm": 0.25337305922484077, "learning_rate": 0.0002902322016601037, "loss": 3.025752544403076, "step": 5450, "token_acc": 0.2961723064925337 }, { "epoch": 3.1952506596306067, "grad_norm": 0.257736341736192, "learning_rate": 0.00029022704054182874, "loss": 3.093207836151123, "step": 5451, "token_acc": 0.28886961111444986 }, { "epoch": 3.195836997947816, "grad_norm": 0.2662523226579173, "learning_rate": 0.00029022187810630974, "loss": 3.0289978981018066, "step": 5452, "token_acc": 0.2967314233605952 }, { "epoch": 3.196423336265025, "grad_norm": 0.2610753546482197, "learning_rate": 0.0002902167143535951, "loss": 2.9915199279785156, "step": 5453, "token_acc": 0.3000393079673 }, { "epoch": 3.197009674582234, "grad_norm": 0.27860994031028113, "learning_rate": 0.00029021154928373337, "loss": 3.00730562210083, "step": 5454, "token_acc": 0.2992911254408967 }, { "epoch": 3.197596012899443, "grad_norm": 0.3059160783393794, "learning_rate": 0.0002902063828967731, "loss": 3.032785654067993, "step": 5455, "token_acc": 0.2969263627077723 }, { "epoch": 3.198182351216652, "grad_norm": 0.2895615831262765, "learning_rate": 0.00029020121519276283, "loss": 3.027360439300537, "step": 5456, "token_acc": 0.2975511336278184 }, { "epoch": 3.198768689533861, "grad_norm": 0.27258474032298347, "learning_rate": 0.000290196046171751, "loss": 3.0187621116638184, "step": 5457, "token_acc": 0.29648406166194996 }, { "epoch": 3.19935502785107, "grad_norm": 0.2994915424496726, "learning_rate": 0.00029019087583378626, "loss": 3.021726131439209, "step": 5458, "token_acc": 0.2963132861730124 }, { "epoch": 3.199941366168279, "grad_norm": 0.23911046501444816, "learning_rate": 0.0002901857041789172, "loss": 2.97152042388916, "step": 5459, "token_acc": 0.30634925170739563 }, { "epoch": 3.2005277044854883, "grad_norm": 0.26510796259876795, "learning_rate": 0.0002901805312071923, "loss": 3.0717005729675293, "step": 5460, "token_acc": 0.29043326244928314 }, { "epoch": 3.201114042802697, "grad_norm": 0.2628454817882755, "learning_rate": 0.0002901753569186602, "loss": 3.0305166244506836, "step": 5461, "token_acc": 0.2949253421008174 }, { "epoch": 3.201700381119906, "grad_norm": 0.2499596130301154, "learning_rate": 0.0002901701813133695, "loss": 3.029402256011963, "step": 5462, "token_acc": 0.29593944475153466 }, { "epoch": 3.202286719437115, "grad_norm": 0.25851004799773886, "learning_rate": 0.0002901650043913689, "loss": 3.0686545372009277, "step": 5463, "token_acc": 0.2911158703197696 }, { "epoch": 3.2028730577543243, "grad_norm": 0.24269600233604152, "learning_rate": 0.00029015982615270686, "loss": 2.9989731311798096, "step": 5464, "token_acc": 0.30219385449025604 }, { "epoch": 3.2034593960715334, "grad_norm": 0.27809831697534787, "learning_rate": 0.00029015464659743216, "loss": 3.047905445098877, "step": 5465, "token_acc": 0.2939025893113883 }, { "epoch": 3.2040457343887425, "grad_norm": 0.260624470803214, "learning_rate": 0.00029014946572559347, "loss": 3.066075325012207, "step": 5466, "token_acc": 0.2907085334968271 }, { "epoch": 3.204632072705951, "grad_norm": 0.3117444724017944, "learning_rate": 0.00029014428353723936, "loss": 3.0640780925750732, "step": 5467, "token_acc": 0.29116160313612216 }, { "epoch": 3.2052184110231603, "grad_norm": 0.28977588080943023, "learning_rate": 0.0002901391000324185, "loss": 3.0611000061035156, "step": 5468, "token_acc": 0.29149763536966333 }, { "epoch": 3.2058047493403694, "grad_norm": 0.256657011574282, "learning_rate": 0.0002901339152111797, "loss": 3.0344057083129883, "step": 5469, "token_acc": 0.2934286188785784 }, { "epoch": 3.2063910876575785, "grad_norm": 0.2521046874005232, "learning_rate": 0.0002901287290735716, "loss": 3.0613627433776855, "step": 5470, "token_acc": 0.291746213311208 }, { "epoch": 3.2069774259747876, "grad_norm": 0.2914426637231044, "learning_rate": 0.0002901235416196429, "loss": 3.0585129261016846, "step": 5471, "token_acc": 0.29366238433871295 }, { "epoch": 3.2075637642919963, "grad_norm": 0.26625924404058815, "learning_rate": 0.00029011835284944233, "loss": 3.0369839668273926, "step": 5472, "token_acc": 0.2958315352959131 }, { "epoch": 3.2081501026092054, "grad_norm": 0.24497887263317883, "learning_rate": 0.00029011316276301866, "loss": 3.039714813232422, "step": 5473, "token_acc": 0.294254735208495 }, { "epoch": 3.2087364409264145, "grad_norm": 0.28455192960044107, "learning_rate": 0.00029010797136042065, "loss": 3.0285723209381104, "step": 5474, "token_acc": 0.295212102267205 }, { "epoch": 3.2093227792436236, "grad_norm": 0.24803596456015764, "learning_rate": 0.00029010277864169705, "loss": 3.0835280418395996, "step": 5475, "token_acc": 0.2904488575746799 }, { "epoch": 3.2099091175608327, "grad_norm": 0.2714586783911179, "learning_rate": 0.0002900975846068966, "loss": 3.069371223449707, "step": 5476, "token_acc": 0.29053620045097706 }, { "epoch": 3.210495455878042, "grad_norm": 0.292598993112876, "learning_rate": 0.0002900923892560682, "loss": 3.039937973022461, "step": 5477, "token_acc": 0.2941823537655648 }, { "epoch": 3.2110817941952505, "grad_norm": 0.24353582760372317, "learning_rate": 0.0002900871925892605, "loss": 3.0645387172698975, "step": 5478, "token_acc": 0.294048816591178 }, { "epoch": 3.2116681325124596, "grad_norm": 0.26844071423364974, "learning_rate": 0.00029008199460652244, "loss": 3.0633792877197266, "step": 5479, "token_acc": 0.2911857414062841 }, { "epoch": 3.2122544708296688, "grad_norm": 0.2572926056219949, "learning_rate": 0.00029007679530790277, "loss": 3.0901787281036377, "step": 5480, "token_acc": 0.2859894814896913 }, { "epoch": 3.212840809146878, "grad_norm": 0.24949414812310355, "learning_rate": 0.00029007159469345034, "loss": 2.985351324081421, "step": 5481, "token_acc": 0.30219071815901666 }, { "epoch": 3.213427147464087, "grad_norm": 0.2637921694832076, "learning_rate": 0.00029006639276321405, "loss": 3.0297322273254395, "step": 5482, "token_acc": 0.29572955205704937 }, { "epoch": 3.2140134857812956, "grad_norm": 0.23107870206125952, "learning_rate": 0.00029006118951724276, "loss": 3.001009941101074, "step": 5483, "token_acc": 0.3009738023672367 }, { "epoch": 3.2145998240985048, "grad_norm": 0.28099929466254187, "learning_rate": 0.00029005598495558535, "loss": 3.0236196517944336, "step": 5484, "token_acc": 0.29652444870565675 }, { "epoch": 3.215186162415714, "grad_norm": 0.24376113620221973, "learning_rate": 0.0002900507790782906, "loss": 3.0315070152282715, "step": 5485, "token_acc": 0.2956924215271346 }, { "epoch": 3.215772500732923, "grad_norm": 0.27216546822972554, "learning_rate": 0.0002900455718854076, "loss": 3.0448055267333984, "step": 5486, "token_acc": 0.29287998315975183 }, { "epoch": 3.216358839050132, "grad_norm": 0.2722622024930363, "learning_rate": 0.00029004036337698517, "loss": 3.0293242931365967, "step": 5487, "token_acc": 0.2974669603524229 }, { "epoch": 3.2169451773673408, "grad_norm": 0.3047412288158636, "learning_rate": 0.0002900351535530722, "loss": 3.025803327560425, "step": 5488, "token_acc": 0.2983778335469446 }, { "epoch": 3.21753151568455, "grad_norm": 0.29046116637755764, "learning_rate": 0.0002900299424137176, "loss": 3.0587403774261475, "step": 5489, "token_acc": 0.2922288258800744 }, { "epoch": 3.218117854001759, "grad_norm": 0.31047108976155163, "learning_rate": 0.0002900247299589705, "loss": 3.032149314880371, "step": 5490, "token_acc": 0.29482897586374723 }, { "epoch": 3.218704192318968, "grad_norm": 0.2783179430004287, "learning_rate": 0.00029001951618887965, "loss": 3.0139455795288086, "step": 5491, "token_acc": 0.2985564744199761 }, { "epoch": 3.219290530636177, "grad_norm": 0.2880944611656997, "learning_rate": 0.0002900143011034942, "loss": 3.03299617767334, "step": 5492, "token_acc": 0.29511205589875145 }, { "epoch": 3.219876868953386, "grad_norm": 0.2628970147939955, "learning_rate": 0.000290009084702863, "loss": 3.0326945781707764, "step": 5493, "token_acc": 0.2954385532391357 }, { "epoch": 3.220463207270595, "grad_norm": 0.27945003544122965, "learning_rate": 0.0002900038669870351, "loss": 3.070260763168335, "step": 5494, "token_acc": 0.2910573232243124 }, { "epoch": 3.221049545587804, "grad_norm": 0.3093505490129998, "learning_rate": 0.0002899986479560596, "loss": 3.0959115028381348, "step": 5495, "token_acc": 0.28653825169265623 }, { "epoch": 3.221635883905013, "grad_norm": 0.31131706269182524, "learning_rate": 0.0002899934276099854, "loss": 3.0336928367614746, "step": 5496, "token_acc": 0.29768335646418276 }, { "epoch": 3.2222222222222223, "grad_norm": 0.2406284341023306, "learning_rate": 0.0002899882059488616, "loss": 3.0404491424560547, "step": 5497, "token_acc": 0.2937860835318438 }, { "epoch": 3.2228085605394314, "grad_norm": 0.30754568072435984, "learning_rate": 0.0002899829829727373, "loss": 3.0383620262145996, "step": 5498, "token_acc": 0.2954915519465415 }, { "epoch": 3.22339489885664, "grad_norm": 0.28308920880654004, "learning_rate": 0.0002899777586816614, "loss": 3.0325679779052734, "step": 5499, "token_acc": 0.29582267007890806 }, { "epoch": 3.223981237173849, "grad_norm": 0.2660949500121972, "learning_rate": 0.00028997253307568315, "loss": 3.052431344985962, "step": 5500, "token_acc": 0.2922578679049395 }, { "epoch": 3.2245675754910583, "grad_norm": 0.26247052812578126, "learning_rate": 0.00028996730615485155, "loss": 3.0427212715148926, "step": 5501, "token_acc": 0.2967759821524314 }, { "epoch": 3.2251539138082674, "grad_norm": 0.2831839948771978, "learning_rate": 0.00028996207791921573, "loss": 2.996189832687378, "step": 5502, "token_acc": 0.30126247144926516 }, { "epoch": 3.2257402521254765, "grad_norm": 0.2646212788919643, "learning_rate": 0.0002899568483688248, "loss": 3.0375609397888184, "step": 5503, "token_acc": 0.29427728643496476 }, { "epoch": 3.226326590442685, "grad_norm": 0.2601418899392246, "learning_rate": 0.00028995161750372783, "loss": 3.006164789199829, "step": 5504, "token_acc": 0.30018796543980997 }, { "epoch": 3.2269129287598943, "grad_norm": 0.2834035066983693, "learning_rate": 0.00028994638532397403, "loss": 3.057257890701294, "step": 5505, "token_acc": 0.2908360654009551 }, { "epoch": 3.2274992670771034, "grad_norm": 0.2655036459575535, "learning_rate": 0.0002899411518296125, "loss": 3.057617664337158, "step": 5506, "token_acc": 0.29160726287704436 }, { "epoch": 3.2280856053943126, "grad_norm": 0.26957184877541746, "learning_rate": 0.0002899359170206924, "loss": 2.9997286796569824, "step": 5507, "token_acc": 0.30154123171014746 }, { "epoch": 3.2286719437115217, "grad_norm": 0.2574211443116237, "learning_rate": 0.000289930680897263, "loss": 3.0693721771240234, "step": 5508, "token_acc": 0.2887575555452144 }, { "epoch": 3.2292582820287308, "grad_norm": 0.27231880374428336, "learning_rate": 0.00028992544345937335, "loss": 3.0254197120666504, "step": 5509, "token_acc": 0.29659363547681195 }, { "epoch": 3.2298446203459394, "grad_norm": 0.25761703798086216, "learning_rate": 0.0002899202047070728, "loss": 3.0041403770446777, "step": 5510, "token_acc": 0.3003474624278513 }, { "epoch": 3.2304309586631486, "grad_norm": 0.2710049634393407, "learning_rate": 0.00028991496464041036, "loss": 3.0407609939575195, "step": 5511, "token_acc": 0.29314395153252404 }, { "epoch": 3.2310172969803577, "grad_norm": 0.29333998688281687, "learning_rate": 0.00028990972325943545, "loss": 3.0918593406677246, "step": 5512, "token_acc": 0.2858426824748112 }, { "epoch": 3.231603635297567, "grad_norm": 0.2216722457733159, "learning_rate": 0.00028990448056419717, "loss": 3.0777058601379395, "step": 5513, "token_acc": 0.29001384199950075 }, { "epoch": 3.232189973614776, "grad_norm": 0.2759114810575503, "learning_rate": 0.0002898992365547448, "loss": 3.0397377014160156, "step": 5514, "token_acc": 0.29421461305122365 }, { "epoch": 3.2327763119319846, "grad_norm": 0.3048934127116464, "learning_rate": 0.00028989399123112767, "loss": 3.0330615043640137, "step": 5515, "token_acc": 0.2953765632658965 }, { "epoch": 3.2333626502491937, "grad_norm": 0.2706030388281219, "learning_rate": 0.00028988874459339494, "loss": 3.0417191982269287, "step": 5516, "token_acc": 0.29431646381201404 }, { "epoch": 3.233948988566403, "grad_norm": 0.2290258591512697, "learning_rate": 0.000289883496641596, "loss": 3.0254323482513428, "step": 5517, "token_acc": 0.2964872763335584 }, { "epoch": 3.234535326883612, "grad_norm": 0.24435024690518656, "learning_rate": 0.00028987824737578016, "loss": 3.0329482555389404, "step": 5518, "token_acc": 0.2959781621559375 }, { "epoch": 3.235121665200821, "grad_norm": 0.24804368325070508, "learning_rate": 0.0002898729967959966, "loss": 3.07688570022583, "step": 5519, "token_acc": 0.29049422728988505 }, { "epoch": 3.23570800351803, "grad_norm": 0.26034392240820975, "learning_rate": 0.0002898677449022947, "loss": 3.0242600440979004, "step": 5520, "token_acc": 0.295516451760309 }, { "epoch": 3.236294341835239, "grad_norm": 0.24695017229667232, "learning_rate": 0.00028986249169472383, "loss": 3.0386528968811035, "step": 5521, "token_acc": 0.29523492794863476 }, { "epoch": 3.236880680152448, "grad_norm": 0.271305842674611, "learning_rate": 0.00028985723717333335, "loss": 3.007998466491699, "step": 5522, "token_acc": 0.2995216234769178 }, { "epoch": 3.237467018469657, "grad_norm": 0.274615075010814, "learning_rate": 0.00028985198133817255, "loss": 3.0543458461761475, "step": 5523, "token_acc": 0.29289545472516176 }, { "epoch": 3.238053356786866, "grad_norm": 0.25477569345762213, "learning_rate": 0.00028984672418929085, "loss": 3.0879950523376465, "step": 5524, "token_acc": 0.2869741424354224 }, { "epoch": 3.2386396951040752, "grad_norm": 0.2529910242160972, "learning_rate": 0.00028984146572673766, "loss": 3.075887680053711, "step": 5525, "token_acc": 0.28980619528308427 }, { "epoch": 3.239226033421284, "grad_norm": 0.23379481591210036, "learning_rate": 0.0002898362059505623, "loss": 3.0306029319763184, "step": 5526, "token_acc": 0.29739750207475274 }, { "epoch": 3.239812371738493, "grad_norm": 0.2437474662186198, "learning_rate": 0.00028983094486081425, "loss": 3.070091724395752, "step": 5527, "token_acc": 0.2905064164841261 }, { "epoch": 3.240398710055702, "grad_norm": 0.2592720276562673, "learning_rate": 0.00028982568245754285, "loss": 3.0497727394104004, "step": 5528, "token_acc": 0.2925194424991707 }, { "epoch": 3.2409850483729112, "grad_norm": 0.2560044488766083, "learning_rate": 0.0002898204187407976, "loss": 3.033024787902832, "step": 5529, "token_acc": 0.29534485608997774 }, { "epoch": 3.2415713866901203, "grad_norm": 0.2775353210367765, "learning_rate": 0.0002898151537106279, "loss": 3.045614719390869, "step": 5530, "token_acc": 0.29309818660755876 }, { "epoch": 3.2421577250073295, "grad_norm": 0.2650649484386012, "learning_rate": 0.00028980988736708327, "loss": 2.987740993499756, "step": 5531, "token_acc": 0.3014742560943871 }, { "epoch": 3.242744063324538, "grad_norm": 0.281087962261792, "learning_rate": 0.00028980461971021316, "loss": 3.020624876022339, "step": 5532, "token_acc": 0.29760514469453375 }, { "epoch": 3.2433304016417472, "grad_norm": 0.22718387171114135, "learning_rate": 0.000289799350740067, "loss": 3.0307769775390625, "step": 5533, "token_acc": 0.2953111236984008 }, { "epoch": 3.2439167399589564, "grad_norm": 0.2743611903198156, "learning_rate": 0.0002897940804566943, "loss": 3.0270633697509766, "step": 5534, "token_acc": 0.2978946998738387 }, { "epoch": 3.2445030782761655, "grad_norm": 0.22540439950899302, "learning_rate": 0.00028978880886014463, "loss": 3.0013480186462402, "step": 5535, "token_acc": 0.30148696484866744 }, { "epoch": 3.2450894165933746, "grad_norm": 0.26447640159739955, "learning_rate": 0.00028978353595046744, "loss": 3.0681824684143066, "step": 5536, "token_acc": 0.29132485696776816 }, { "epoch": 3.2456757549105832, "grad_norm": 0.24351191133582292, "learning_rate": 0.00028977826172771234, "loss": 3.006533622741699, "step": 5537, "token_acc": 0.2985869881468925 }, { "epoch": 3.2462620932277924, "grad_norm": 0.28358950771167235, "learning_rate": 0.0002897729861919288, "loss": 3.02524995803833, "step": 5538, "token_acc": 0.29631564091809576 }, { "epoch": 3.2468484315450015, "grad_norm": 0.23711071388562208, "learning_rate": 0.0002897677093431664, "loss": 3.040681838989258, "step": 5539, "token_acc": 0.29385430768185405 }, { "epoch": 3.2474347698622106, "grad_norm": 0.29781931821894136, "learning_rate": 0.0002897624311814747, "loss": 3.041667938232422, "step": 5540, "token_acc": 0.29417273374670344 }, { "epoch": 3.2480211081794197, "grad_norm": 0.2459966849553532, "learning_rate": 0.0002897571517069033, "loss": 3.0244832038879395, "step": 5541, "token_acc": 0.29763461125248847 }, { "epoch": 3.2486074464966284, "grad_norm": 0.2511737806433179, "learning_rate": 0.0002897518709195018, "loss": 3.0318479537963867, "step": 5542, "token_acc": 0.2945540056932642 }, { "epoch": 3.2491937848138375, "grad_norm": 0.23754696734060896, "learning_rate": 0.00028974658881931976, "loss": 3.0464601516723633, "step": 5543, "token_acc": 0.29489593111368656 }, { "epoch": 3.2497801231310466, "grad_norm": 0.2585922764025102, "learning_rate": 0.00028974130540640686, "loss": 3.0354185104370117, "step": 5544, "token_acc": 0.29367609094746 }, { "epoch": 3.2503664614482557, "grad_norm": 0.2545077543639786, "learning_rate": 0.00028973602068081266, "loss": 3.0270252227783203, "step": 5545, "token_acc": 0.2954390108326485 }, { "epoch": 3.250952799765465, "grad_norm": 0.27002395286555086, "learning_rate": 0.00028973073464258687, "loss": 3.019052505493164, "step": 5546, "token_acc": 0.2970866500630819 }, { "epoch": 3.2515391380826735, "grad_norm": 0.24398926337427476, "learning_rate": 0.00028972544729177914, "loss": 3.0828499794006348, "step": 5547, "token_acc": 0.28769827047728275 }, { "epoch": 3.2521254763998826, "grad_norm": 0.2774191268910143, "learning_rate": 0.0002897201586284391, "loss": 3.0793557167053223, "step": 5548, "token_acc": 0.28889049938657296 }, { "epoch": 3.2527118147170917, "grad_norm": 0.2708361708467026, "learning_rate": 0.0002897148686526164, "loss": 3.0516977310180664, "step": 5549, "token_acc": 0.29326024795350614 }, { "epoch": 3.253298153034301, "grad_norm": 0.2741728670422611, "learning_rate": 0.00028970957736436083, "loss": 3.055417537689209, "step": 5550, "token_acc": 0.2926756195632178 }, { "epoch": 3.25388449135151, "grad_norm": 0.2424436953279273, "learning_rate": 0.000289704284763722, "loss": 3.04716420173645, "step": 5551, "token_acc": 0.29375208051851415 }, { "epoch": 3.254470829668719, "grad_norm": 0.27167684775524564, "learning_rate": 0.00028969899085074967, "loss": 3.092989683151245, "step": 5552, "token_acc": 0.28718912283524645 }, { "epoch": 3.2550571679859277, "grad_norm": 0.23928769940495204, "learning_rate": 0.0002896936956254936, "loss": 3.0554897785186768, "step": 5553, "token_acc": 0.29284548078429784 }, { "epoch": 3.255643506303137, "grad_norm": 0.257124270395592, "learning_rate": 0.0002896883990880035, "loss": 3.0346858501434326, "step": 5554, "token_acc": 0.2950895270807493 }, { "epoch": 3.256229844620346, "grad_norm": 0.25320612043741525, "learning_rate": 0.00028968310123832913, "loss": 3.0236854553222656, "step": 5555, "token_acc": 0.29637094695602867 }, { "epoch": 3.256816182937555, "grad_norm": 0.2656462721762266, "learning_rate": 0.00028967780207652023, "loss": 3.004427194595337, "step": 5556, "token_acc": 0.30014598156160816 }, { "epoch": 3.257402521254764, "grad_norm": 0.26252434158149857, "learning_rate": 0.00028967250160262656, "loss": 3.064389944076538, "step": 5557, "token_acc": 0.2903170495158793 }, { "epoch": 3.257988859571973, "grad_norm": 0.2348602538895089, "learning_rate": 0.00028966719981669804, "loss": 3.030764579772949, "step": 5558, "token_acc": 0.2958498852983374 }, { "epoch": 3.258575197889182, "grad_norm": 0.28693503524229624, "learning_rate": 0.00028966189671878427, "loss": 3.0904502868652344, "step": 5559, "token_acc": 0.286621665125168 }, { "epoch": 3.259161536206391, "grad_norm": 0.28046144791863115, "learning_rate": 0.00028965659230893525, "loss": 3.0459375381469727, "step": 5560, "token_acc": 0.2930299281408755 }, { "epoch": 3.2597478745236, "grad_norm": 0.2572314419433262, "learning_rate": 0.00028965128658720073, "loss": 3.0727224349975586, "step": 5561, "token_acc": 0.2887826743320374 }, { "epoch": 3.2603342128408093, "grad_norm": 0.30634122138218156, "learning_rate": 0.00028964597955363053, "loss": 3.035705089569092, "step": 5562, "token_acc": 0.2960697871720268 }, { "epoch": 3.2609205511580184, "grad_norm": 0.2624728149742603, "learning_rate": 0.00028964067120827453, "loss": 3.011590003967285, "step": 5563, "token_acc": 0.2971310870865394 }, { "epoch": 3.261506889475227, "grad_norm": 0.27718316656252945, "learning_rate": 0.0002896353615511826, "loss": 3.04506254196167, "step": 5564, "token_acc": 0.2934774866446197 }, { "epoch": 3.262093227792436, "grad_norm": 0.2637743918125357, "learning_rate": 0.00028963005058240467, "loss": 3.0343332290649414, "step": 5565, "token_acc": 0.297028400180455 }, { "epoch": 3.2626795661096453, "grad_norm": 0.3095602695595955, "learning_rate": 0.0002896247383019905, "loss": 3.054948091506958, "step": 5566, "token_acc": 0.2911274521344917 }, { "epoch": 3.2632659044268544, "grad_norm": 0.27837295114862903, "learning_rate": 0.00028961942470999007, "loss": 3.0525155067443848, "step": 5567, "token_acc": 0.29335435134634336 }, { "epoch": 3.2638522427440635, "grad_norm": 0.28051399073654887, "learning_rate": 0.00028961410980645326, "loss": 3.053393840789795, "step": 5568, "token_acc": 0.29285749710911746 }, { "epoch": 3.264438581061272, "grad_norm": 0.23268628511472414, "learning_rate": 0.0002896087935914301, "loss": 2.985903739929199, "step": 5569, "token_acc": 0.3038945289862849 }, { "epoch": 3.2650249193784813, "grad_norm": 0.2797353206589393, "learning_rate": 0.00028960347606497036, "loss": 3.046079397201538, "step": 5570, "token_acc": 0.29326629660844544 }, { "epoch": 3.2656112576956904, "grad_norm": 0.2646331614797058, "learning_rate": 0.0002895981572271241, "loss": 3.0570311546325684, "step": 5571, "token_acc": 0.29305243627272415 }, { "epoch": 3.2661975960128995, "grad_norm": 0.2640157528372407, "learning_rate": 0.0002895928370779413, "loss": 3.024941921234131, "step": 5572, "token_acc": 0.2965845523985059 }, { "epoch": 3.2667839343301086, "grad_norm": 0.2630308935397913, "learning_rate": 0.0002895875156174719, "loss": 3.022364616394043, "step": 5573, "token_acc": 0.2972293026101142 }, { "epoch": 3.2673702726473177, "grad_norm": 0.2723542224230714, "learning_rate": 0.0002895821928457658, "loss": 3.044461250305176, "step": 5574, "token_acc": 0.29613990886327934 }, { "epoch": 3.2679566109645264, "grad_norm": 0.2635002700485669, "learning_rate": 0.0002895768687628732, "loss": 3.0357108116149902, "step": 5575, "token_acc": 0.29512819366225895 }, { "epoch": 3.2685429492817355, "grad_norm": 0.2924223532968938, "learning_rate": 0.000289571543368844, "loss": 3.005058765411377, "step": 5576, "token_acc": 0.2988306364181874 }, { "epoch": 3.2691292875989446, "grad_norm": 0.25805775222937916, "learning_rate": 0.00028956621666372814, "loss": 3.038538694381714, "step": 5577, "token_acc": 0.294988928238396 }, { "epoch": 3.2697156259161537, "grad_norm": 0.27697048000392976, "learning_rate": 0.0002895608886475758, "loss": 3.1012697219848633, "step": 5578, "token_acc": 0.28552491837062766 }, { "epoch": 3.270301964233363, "grad_norm": 0.2689404664518939, "learning_rate": 0.0002895555593204369, "loss": 3.017056941986084, "step": 5579, "token_acc": 0.2976993091689755 }, { "epoch": 3.2708883025505715, "grad_norm": 0.31085951903930686, "learning_rate": 0.00028955022868236164, "loss": 3.0249881744384766, "step": 5580, "token_acc": 0.29720399001132597 }, { "epoch": 3.2714746408677806, "grad_norm": 0.2792005042723576, "learning_rate": 0.0002895448967334, "loss": 3.0462915897369385, "step": 5581, "token_acc": 0.29270653548912456 }, { "epoch": 3.2720609791849897, "grad_norm": 0.2501653093979064, "learning_rate": 0.00028953956347360215, "loss": 3.0420777797698975, "step": 5582, "token_acc": 0.29484552454036445 }, { "epoch": 3.272647317502199, "grad_norm": 0.27821523751966487, "learning_rate": 0.0002895342289030181, "loss": 3.036561965942383, "step": 5583, "token_acc": 0.29312528522861875 }, { "epoch": 3.273233655819408, "grad_norm": 0.2752180999282037, "learning_rate": 0.000289528893021698, "loss": 3.055999279022217, "step": 5584, "token_acc": 0.292750034039208 }, { "epoch": 3.273819994136617, "grad_norm": 0.2744202188018516, "learning_rate": 0.0002895235558296919, "loss": 3.059241771697998, "step": 5585, "token_acc": 0.2918350234453967 }, { "epoch": 3.2744063324538257, "grad_norm": 0.2970213841644865, "learning_rate": 0.0002895182173270501, "loss": 3.1046924591064453, "step": 5586, "token_acc": 0.2845886990801577 }, { "epoch": 3.274992670771035, "grad_norm": 0.24965215454831652, "learning_rate": 0.00028951287751382264, "loss": 3.068326711654663, "step": 5587, "token_acc": 0.2902764174699412 }, { "epoch": 3.275579009088244, "grad_norm": 0.2533446042709383, "learning_rate": 0.00028950753639005964, "loss": 3.054434299468994, "step": 5588, "token_acc": 0.29235562395600684 }, { "epoch": 3.276165347405453, "grad_norm": 0.2695192757253377, "learning_rate": 0.00028950219395581134, "loss": 3.0146186351776123, "step": 5589, "token_acc": 0.298695140252889 }, { "epoch": 3.2767516857226617, "grad_norm": 0.2691507038867357, "learning_rate": 0.0002894968502111279, "loss": 3.0028724670410156, "step": 5590, "token_acc": 0.2985304901607761 }, { "epoch": 3.277338024039871, "grad_norm": 0.22248586525319197, "learning_rate": 0.0002894915051560595, "loss": 3.0288336277008057, "step": 5591, "token_acc": 0.29625174906871216 }, { "epoch": 3.27792436235708, "grad_norm": 0.2708954300957032, "learning_rate": 0.00028948615879065645, "loss": 3.076141595840454, "step": 5592, "token_acc": 0.2899638169188918 }, { "epoch": 3.278510700674289, "grad_norm": 0.23599682238587283, "learning_rate": 0.00028948081111496886, "loss": 3.0233476161956787, "step": 5593, "token_acc": 0.2954615669797331 }, { "epoch": 3.279097038991498, "grad_norm": 0.23974637781131902, "learning_rate": 0.000289475462129047, "loss": 3.0459351539611816, "step": 5594, "token_acc": 0.29246231951781654 }, { "epoch": 3.2796833773087073, "grad_norm": 0.26834137358474613, "learning_rate": 0.00028947011183294113, "loss": 3.073460817337036, "step": 5595, "token_acc": 0.28783520050310935 }, { "epoch": 3.280269715625916, "grad_norm": 0.2737592203929691, "learning_rate": 0.0002894647602267015, "loss": 3.068303108215332, "step": 5596, "token_acc": 0.2910934723121481 }, { "epoch": 3.280856053943125, "grad_norm": 0.26018866374354915, "learning_rate": 0.0002894594073103784, "loss": 3.0626487731933594, "step": 5597, "token_acc": 0.29178409702650493 }, { "epoch": 3.281442392260334, "grad_norm": 0.27486993161148887, "learning_rate": 0.00028945405308402207, "loss": 3.0917370319366455, "step": 5598, "token_acc": 0.2868140591044543 }, { "epoch": 3.2820287305775433, "grad_norm": 0.2988141988285752, "learning_rate": 0.0002894486975476828, "loss": 3.022247314453125, "step": 5599, "token_acc": 0.297442706900565 }, { "epoch": 3.2826150688947524, "grad_norm": 0.27213499692012894, "learning_rate": 0.000289443340701411, "loss": 3.024195909500122, "step": 5600, "token_acc": 0.2967905771924984 }, { "epoch": 3.283201407211961, "grad_norm": 0.24738533843385893, "learning_rate": 0.0002894379825452568, "loss": 3.067525625228882, "step": 5601, "token_acc": 0.28963711902284417 }, { "epoch": 3.28378774552917, "grad_norm": 0.24452320180428466, "learning_rate": 0.00028943262307927074, "loss": 3.027327060699463, "step": 5602, "token_acc": 0.2965860136268969 }, { "epoch": 3.2843740838463793, "grad_norm": 0.24846095656656514, "learning_rate": 0.00028942726230350306, "loss": 3.02362322807312, "step": 5603, "token_acc": 0.29695964320678775 }, { "epoch": 3.2849604221635884, "grad_norm": 0.24155111210308483, "learning_rate": 0.0002894219002180041, "loss": 3.0494234561920166, "step": 5604, "token_acc": 0.2923557052393821 }, { "epoch": 3.2855467604807975, "grad_norm": 0.2623257375270167, "learning_rate": 0.00028941653682282433, "loss": 3.0666255950927734, "step": 5605, "token_acc": 0.29075131926821757 }, { "epoch": 3.2861330987980066, "grad_norm": 0.24793798874928888, "learning_rate": 0.000289411172118014, "loss": 3.025351047515869, "step": 5606, "token_acc": 0.296134512691555 }, { "epoch": 3.2867194371152153, "grad_norm": 0.2542816631691905, "learning_rate": 0.0002894058061036236, "loss": 3.0086538791656494, "step": 5607, "token_acc": 0.29916242701981954 }, { "epoch": 3.2873057754324244, "grad_norm": 0.2546294375795419, "learning_rate": 0.0002894004387797034, "loss": 3.0622305870056152, "step": 5608, "token_acc": 0.2917949474927249 }, { "epoch": 3.2878921137496335, "grad_norm": 0.2909815781212429, "learning_rate": 0.00028939507014630404, "loss": 3.0605907440185547, "step": 5609, "token_acc": 0.2931174659749648 }, { "epoch": 3.2884784520668426, "grad_norm": 0.26501348447786943, "learning_rate": 0.0002893897002034758, "loss": 3.0546793937683105, "step": 5610, "token_acc": 0.29260190666604186 }, { "epoch": 3.2890647903840518, "grad_norm": 0.28487987248399277, "learning_rate": 0.00028938432895126917, "loss": 3.069399118423462, "step": 5611, "token_acc": 0.2893468493370993 }, { "epoch": 3.2896511287012604, "grad_norm": 0.27624960784360575, "learning_rate": 0.0002893789563897345, "loss": 3.07187819480896, "step": 5612, "token_acc": 0.29043491698928625 }, { "epoch": 3.2902374670184695, "grad_norm": 0.2686755237792855, "learning_rate": 0.00028937358251892247, "loss": 3.06264066696167, "step": 5613, "token_acc": 0.2899755967158791 }, { "epoch": 3.2908238053356786, "grad_norm": 0.3241559374601863, "learning_rate": 0.00028936820733888345, "loss": 3.0519227981567383, "step": 5614, "token_acc": 0.2929717541317234 }, { "epoch": 3.2914101436528878, "grad_norm": 0.3024641843627616, "learning_rate": 0.0002893628308496678, "loss": 3.0414178371429443, "step": 5615, "token_acc": 0.29544063247228225 }, { "epoch": 3.291996481970097, "grad_norm": 0.2649525723763808, "learning_rate": 0.00028935745305132623, "loss": 3.053030014038086, "step": 5616, "token_acc": 0.29230753209967825 }, { "epoch": 3.292582820287306, "grad_norm": 0.29086022717146437, "learning_rate": 0.0002893520739439092, "loss": 3.0459671020507812, "step": 5617, "token_acc": 0.29495083976563696 }, { "epoch": 3.2931691586045146, "grad_norm": 0.3113350002552134, "learning_rate": 0.0002893466935274672, "loss": 3.0787854194641113, "step": 5618, "token_acc": 0.28710399381589224 }, { "epoch": 3.2937554969217238, "grad_norm": 0.276536547077667, "learning_rate": 0.00028934131180205074, "loss": 3.0457077026367188, "step": 5619, "token_acc": 0.2928958634882437 }, { "epoch": 3.294341835238933, "grad_norm": 0.2451697976073363, "learning_rate": 0.00028933592876771047, "loss": 3.0542550086975098, "step": 5620, "token_acc": 0.29330755518410573 }, { "epoch": 3.294928173556142, "grad_norm": 0.30348762755190195, "learning_rate": 0.0002893305444244969, "loss": 3.019636869430542, "step": 5621, "token_acc": 0.29669295226925513 }, { "epoch": 3.295514511873351, "grad_norm": 0.27094662936469793, "learning_rate": 0.00028932515877246056, "loss": 3.0415358543395996, "step": 5622, "token_acc": 0.2939202541945984 }, { "epoch": 3.2961008501905598, "grad_norm": 0.29545831273708617, "learning_rate": 0.00028931977181165215, "loss": 3.080528736114502, "step": 5623, "token_acc": 0.2893502395576099 }, { "epoch": 3.296687188507769, "grad_norm": 0.2616063765448077, "learning_rate": 0.00028931438354212215, "loss": 3.0078744888305664, "step": 5624, "token_acc": 0.29907510723023534 }, { "epoch": 3.297273526824978, "grad_norm": 0.26925258710688493, "learning_rate": 0.0002893089939639213, "loss": 3.0290021896362305, "step": 5625, "token_acc": 0.2978160830513409 }, { "epoch": 3.297859865142187, "grad_norm": 0.27235475531578, "learning_rate": 0.0002893036030771002, "loss": 3.0489304065704346, "step": 5626, "token_acc": 0.2934715989362586 }, { "epoch": 3.298446203459396, "grad_norm": 0.24303478753333185, "learning_rate": 0.00028929821088170945, "loss": 3.03454852104187, "step": 5627, "token_acc": 0.2958329386754228 }, { "epoch": 3.2990325417766053, "grad_norm": 0.25348481837335646, "learning_rate": 0.0002892928173777997, "loss": 3.0337109565734863, "step": 5628, "token_acc": 0.2961209218170694 }, { "epoch": 3.299618880093814, "grad_norm": 0.24501509421838993, "learning_rate": 0.0002892874225654216, "loss": 3.0312554836273193, "step": 5629, "token_acc": 0.29733483252898457 }, { "epoch": 3.300205218411023, "grad_norm": 0.2444204008317645, "learning_rate": 0.0002892820264446259, "loss": 3.012662410736084, "step": 5630, "token_acc": 0.29792457162852237 }, { "epoch": 3.300791556728232, "grad_norm": 0.2799154667017893, "learning_rate": 0.00028927662901546324, "loss": 3.071840286254883, "step": 5631, "token_acc": 0.2897522859175111 }, { "epoch": 3.3013778950454413, "grad_norm": 0.23898318591862108, "learning_rate": 0.00028927123027798436, "loss": 3.047630548477173, "step": 5632, "token_acc": 0.29232035562756736 }, { "epoch": 3.3019642333626504, "grad_norm": 0.25099394180945533, "learning_rate": 0.00028926583023223987, "loss": 3.055784225463867, "step": 5633, "token_acc": 0.2936365825820166 }, { "epoch": 3.302550571679859, "grad_norm": 0.23552959912147728, "learning_rate": 0.0002892604288782806, "loss": 3.0040926933288574, "step": 5634, "token_acc": 0.2983880943801128 }, { "epoch": 3.303136909997068, "grad_norm": 0.26549901656036967, "learning_rate": 0.00028925502621615726, "loss": 3.0339224338531494, "step": 5635, "token_acc": 0.29610382054089507 }, { "epoch": 3.3037232483142773, "grad_norm": 0.2628702517111344, "learning_rate": 0.0002892496222459206, "loss": 3.068821907043457, "step": 5636, "token_acc": 0.2902433463228588 }, { "epoch": 3.3043095866314864, "grad_norm": 0.24384994249774025, "learning_rate": 0.0002892442169676214, "loss": 3.072838306427002, "step": 5637, "token_acc": 0.29009729616322677 }, { "epoch": 3.3048959249486956, "grad_norm": 0.2460081987626046, "learning_rate": 0.0002892388103813104, "loss": 3.069751262664795, "step": 5638, "token_acc": 0.290347688027182 }, { "epoch": 3.3054822632659047, "grad_norm": 0.2391441256499383, "learning_rate": 0.0002892334024870384, "loss": 3.0618948936462402, "step": 5639, "token_acc": 0.29163372980127583 }, { "epoch": 3.3060686015831133, "grad_norm": 0.25227918271563365, "learning_rate": 0.0002892279932848562, "loss": 3.0628557205200195, "step": 5640, "token_acc": 0.29108158802864337 }, { "epoch": 3.3066549399003224, "grad_norm": 0.25959258926033146, "learning_rate": 0.0002892225827748146, "loss": 3.039865732192993, "step": 5641, "token_acc": 0.29323881746945885 }, { "epoch": 3.3072412782175316, "grad_norm": 0.24560392727195357, "learning_rate": 0.00028921717095696444, "loss": 3.0141172409057617, "step": 5642, "token_acc": 0.2986240414049926 }, { "epoch": 3.3078276165347407, "grad_norm": 0.2544576029115778, "learning_rate": 0.0002892117578313566, "loss": 3.070706605911255, "step": 5643, "token_acc": 0.2895937806787126 }, { "epoch": 3.3084139548519493, "grad_norm": 0.25629007695611444, "learning_rate": 0.0002892063433980418, "loss": 3.0716235637664795, "step": 5644, "token_acc": 0.2896479273436319 }, { "epoch": 3.3090002931691584, "grad_norm": 0.25628349181811394, "learning_rate": 0.00028920092765707104, "loss": 3.075153112411499, "step": 5645, "token_acc": 0.2899112237280868 }, { "epoch": 3.3095866314863676, "grad_norm": 0.26851441574071705, "learning_rate": 0.00028919551060849517, "loss": 3.03609037399292, "step": 5646, "token_acc": 0.2950730212215856 }, { "epoch": 3.3101729698035767, "grad_norm": 0.262111169435215, "learning_rate": 0.000289190092252365, "loss": 3.0301053524017334, "step": 5647, "token_acc": 0.29629315957463703 }, { "epoch": 3.310759308120786, "grad_norm": 0.23182079627180197, "learning_rate": 0.0002891846725887315, "loss": 3.0141682624816895, "step": 5648, "token_acc": 0.2998067816297355 }, { "epoch": 3.311345646437995, "grad_norm": 0.2713076981625062, "learning_rate": 0.00028917925161764553, "loss": 3.0790865421295166, "step": 5649, "token_acc": 0.2889047383260771 }, { "epoch": 3.3119319847552036, "grad_norm": 0.2599687456885167, "learning_rate": 0.00028917382933915805, "loss": 3.0578112602233887, "step": 5650, "token_acc": 0.29133181103763234 }, { "epoch": 3.3125183230724127, "grad_norm": 0.2717929792509535, "learning_rate": 0.00028916840575332, "loss": 3.0932698249816895, "step": 5651, "token_acc": 0.28786888295742 }, { "epoch": 3.313104661389622, "grad_norm": 0.2798270522480099, "learning_rate": 0.00028916298086018234, "loss": 3.0428555011749268, "step": 5652, "token_acc": 0.2941281385097004 }, { "epoch": 3.313690999706831, "grad_norm": 0.2514717966269451, "learning_rate": 0.0002891575546597959, "loss": 3.040719509124756, "step": 5653, "token_acc": 0.29470160851142957 }, { "epoch": 3.31427733802404, "grad_norm": 0.25076410118559816, "learning_rate": 0.0002891521271522118, "loss": 3.0356903076171875, "step": 5654, "token_acc": 0.2939879992257565 }, { "epoch": 3.3148636763412487, "grad_norm": 0.2658737319772316, "learning_rate": 0.000289146698337481, "loss": 3.0798327922821045, "step": 5655, "token_acc": 0.288920459608574 }, { "epoch": 3.315450014658458, "grad_norm": 0.25727412163460406, "learning_rate": 0.00028914126821565447, "loss": 3.0082955360412598, "step": 5656, "token_acc": 0.29924286753743684 }, { "epoch": 3.316036352975667, "grad_norm": 0.27566221820218606, "learning_rate": 0.0002891358367867832, "loss": 3.0909862518310547, "step": 5657, "token_acc": 0.2876140600218336 }, { "epoch": 3.316622691292876, "grad_norm": 0.27507499118158596, "learning_rate": 0.00028913040405091823, "loss": 3.0608484745025635, "step": 5658, "token_acc": 0.29135826766418177 }, { "epoch": 3.317209029610085, "grad_norm": 0.27047703744986173, "learning_rate": 0.0002891249700081106, "loss": 3.0465924739837646, "step": 5659, "token_acc": 0.2939666238767651 }, { "epoch": 3.3177953679272942, "grad_norm": 0.281899079549966, "learning_rate": 0.00028911953465841136, "loss": 3.070035457611084, "step": 5660, "token_acc": 0.2922704397058504 }, { "epoch": 3.318381706244503, "grad_norm": 0.2608369259985746, "learning_rate": 0.0002891140980018716, "loss": 3.029350996017456, "step": 5661, "token_acc": 0.2961996167497768 }, { "epoch": 3.318968044561712, "grad_norm": 0.2541751694335664, "learning_rate": 0.00028910866003854227, "loss": 3.0701494216918945, "step": 5662, "token_acc": 0.28874898271078364 }, { "epoch": 3.319554382878921, "grad_norm": 0.2408881196639801, "learning_rate": 0.00028910322076847455, "loss": 3.0196757316589355, "step": 5663, "token_acc": 0.2965763728057834 }, { "epoch": 3.3201407211961302, "grad_norm": 0.2834033881554853, "learning_rate": 0.00028909778019171954, "loss": 3.0354104042053223, "step": 5664, "token_acc": 0.29573684555402663 }, { "epoch": 3.3207270595133394, "grad_norm": 0.2473441893920157, "learning_rate": 0.00028909233830832825, "loss": 3.0485830307006836, "step": 5665, "token_acc": 0.2921580863234529 }, { "epoch": 3.321313397830548, "grad_norm": 0.24788250572949358, "learning_rate": 0.0002890868951183519, "loss": 3.041867971420288, "step": 5666, "token_acc": 0.2947608047766858 }, { "epoch": 3.321899736147757, "grad_norm": 0.2604208144989237, "learning_rate": 0.0002890814506218416, "loss": 3.0771420001983643, "step": 5667, "token_acc": 0.28914538551598123 }, { "epoch": 3.3224860744649662, "grad_norm": 0.24573400722510266, "learning_rate": 0.00028907600481884854, "loss": 3.045477867126465, "step": 5668, "token_acc": 0.2941581743923232 }, { "epoch": 3.3230724127821754, "grad_norm": 0.2592597887633075, "learning_rate": 0.0002890705577094238, "loss": 3.0070180892944336, "step": 5669, "token_acc": 0.2992412489027149 }, { "epoch": 3.3236587510993845, "grad_norm": 0.2689104687329127, "learning_rate": 0.00028906510929361856, "loss": 3.055882453918457, "step": 5670, "token_acc": 0.2924316934948277 }, { "epoch": 3.3242450894165936, "grad_norm": 0.2637215566167641, "learning_rate": 0.000289059659571484, "loss": 3.0357956886291504, "step": 5671, "token_acc": 0.29575579438507965 }, { "epoch": 3.3248314277338022, "grad_norm": 0.26116954068157694, "learning_rate": 0.00028905420854307134, "loss": 3.074798583984375, "step": 5672, "token_acc": 0.2903763175860006 }, { "epoch": 3.3254177660510114, "grad_norm": 0.2590549517089792, "learning_rate": 0.00028904875620843173, "loss": 3.053189277648926, "step": 5673, "token_acc": 0.29355535782382103 }, { "epoch": 3.3260041043682205, "grad_norm": 0.27491176400948103, "learning_rate": 0.0002890433025676164, "loss": 3.067682981491089, "step": 5674, "token_acc": 0.29062913572897786 }, { "epoch": 3.3265904426854296, "grad_norm": 0.29322490732737244, "learning_rate": 0.00028903784762067674, "loss": 3.0716772079467773, "step": 5675, "token_acc": 0.2900877269981257 }, { "epoch": 3.3271767810026387, "grad_norm": 0.24051997126987792, "learning_rate": 0.00028903239136766375, "loss": 3.0001792907714844, "step": 5676, "token_acc": 0.29868658373028717 }, { "epoch": 3.3277631193198474, "grad_norm": 0.25638381647041286, "learning_rate": 0.0002890269338086288, "loss": 3.0107851028442383, "step": 5677, "token_acc": 0.29869908494301234 }, { "epoch": 3.3283494576370565, "grad_norm": 0.23348084208780892, "learning_rate": 0.00028902147494362315, "loss": 3.042665958404541, "step": 5678, "token_acc": 0.2942836519794976 }, { "epoch": 3.3289357959542656, "grad_norm": 0.239186066657374, "learning_rate": 0.0002890160147726981, "loss": 3.0522398948669434, "step": 5679, "token_acc": 0.2925206044675303 }, { "epoch": 3.3295221342714747, "grad_norm": 0.2515515935525354, "learning_rate": 0.00028901055329590494, "loss": 3.0348756313323975, "step": 5680, "token_acc": 0.2948010618157455 }, { "epoch": 3.330108472588684, "grad_norm": 0.2459224676871044, "learning_rate": 0.0002890050905132949, "loss": 3.0534145832061768, "step": 5681, "token_acc": 0.2921477459485323 }, { "epoch": 3.330694810905893, "grad_norm": 0.23682384629040473, "learning_rate": 0.0002889996264249194, "loss": 3.0606369972229004, "step": 5682, "token_acc": 0.2915630585380491 }, { "epoch": 3.3312811492231016, "grad_norm": 0.270991647206072, "learning_rate": 0.00028899416103082967, "loss": 3.0742909908294678, "step": 5683, "token_acc": 0.29074362884549315 }, { "epoch": 3.3318674875403107, "grad_norm": 0.27407812012234445, "learning_rate": 0.00028898869433107707, "loss": 3.037078380584717, "step": 5684, "token_acc": 0.2941312804818785 }, { "epoch": 3.33245382585752, "grad_norm": 0.23529509773883864, "learning_rate": 0.00028898322632571303, "loss": 3.0711679458618164, "step": 5685, "token_acc": 0.2905800292346616 }, { "epoch": 3.333040164174729, "grad_norm": 0.2763313898446662, "learning_rate": 0.00028897775701478885, "loss": 3.028648853302002, "step": 5686, "token_acc": 0.29729147358063923 }, { "epoch": 3.333626502491938, "grad_norm": 0.25379415591560245, "learning_rate": 0.0002889722863983559, "loss": 3.0440781116485596, "step": 5687, "token_acc": 0.29392758517323797 }, { "epoch": 3.3342128408091467, "grad_norm": 0.2623555037503734, "learning_rate": 0.0002889668144764656, "loss": 3.0524182319641113, "step": 5688, "token_acc": 0.29334109831549704 }, { "epoch": 3.334799179126356, "grad_norm": 0.25130788950890903, "learning_rate": 0.00028896134124916934, "loss": 3.0114355087280273, "step": 5689, "token_acc": 0.2983103200617767 }, { "epoch": 3.335385517443565, "grad_norm": 0.2715045095980256, "learning_rate": 0.0002889558667165185, "loss": 3.0142765045166016, "step": 5690, "token_acc": 0.29873979373703036 }, { "epoch": 3.335971855760774, "grad_norm": 0.2638065604853873, "learning_rate": 0.0002889503908785646, "loss": 3.000058650970459, "step": 5691, "token_acc": 0.30046116554713337 }, { "epoch": 3.336558194077983, "grad_norm": 0.24247567550373464, "learning_rate": 0.000288944913735359, "loss": 3.0071098804473877, "step": 5692, "token_acc": 0.29852664271491647 }, { "epoch": 3.3371445323951923, "grad_norm": 0.25928555128734876, "learning_rate": 0.0002889394352869531, "loss": 3.0537776947021484, "step": 5693, "token_acc": 0.2923968015217618 }, { "epoch": 3.337730870712401, "grad_norm": 0.2554128030029239, "learning_rate": 0.0002889339555333985, "loss": 3.059546709060669, "step": 5694, "token_acc": 0.29318194048323737 }, { "epoch": 3.33831720902961, "grad_norm": 0.2575516442706426, "learning_rate": 0.00028892847447474653, "loss": 3.069221019744873, "step": 5695, "token_acc": 0.29034126195910515 }, { "epoch": 3.338903547346819, "grad_norm": 0.25672817949680926, "learning_rate": 0.00028892299211104886, "loss": 3.0990309715270996, "step": 5696, "token_acc": 0.28548023755887775 }, { "epoch": 3.3394898856640283, "grad_norm": 0.2610852745125035, "learning_rate": 0.0002889175084423568, "loss": 3.0478129386901855, "step": 5697, "token_acc": 0.29140556937578066 }, { "epoch": 3.340076223981237, "grad_norm": 0.2441749686262877, "learning_rate": 0.000288912023468722, "loss": 3.0370213985443115, "step": 5698, "token_acc": 0.294819236977632 }, { "epoch": 3.340662562298446, "grad_norm": 0.25211298722444914, "learning_rate": 0.0002889065371901958, "loss": 3.0255789756774902, "step": 5699, "token_acc": 0.29681823924180195 }, { "epoch": 3.341248900615655, "grad_norm": 0.24636868795176511, "learning_rate": 0.00028890104960683, "loss": 3.006986141204834, "step": 5700, "token_acc": 0.2995516924416636 }, { "epoch": 3.3418352389328643, "grad_norm": 0.2712470414314111, "learning_rate": 0.000288895560718676, "loss": 3.0725908279418945, "step": 5701, "token_acc": 0.29004314421569516 }, { "epoch": 3.3424215772500734, "grad_norm": 0.2653506625292416, "learning_rate": 0.0002888900705257853, "loss": 3.064542531967163, "step": 5702, "token_acc": 0.2921014777741263 }, { "epoch": 3.3430079155672825, "grad_norm": 0.26040624978626953, "learning_rate": 0.00028888457902820954, "loss": 3.052729606628418, "step": 5703, "token_acc": 0.29251485271865096 }, { "epoch": 3.343594253884491, "grad_norm": 0.30706659119730073, "learning_rate": 0.0002888790862260003, "loss": 3.0718531608581543, "step": 5704, "token_acc": 0.2898490568007978 }, { "epoch": 3.3441805922017003, "grad_norm": 0.2931635725672014, "learning_rate": 0.0002888735921192093, "loss": 3.039879560470581, "step": 5705, "token_acc": 0.2935851628602921 }, { "epoch": 3.3447669305189094, "grad_norm": 0.2769287748958181, "learning_rate": 0.00028886809670788797, "loss": 3.0811378955841064, "step": 5706, "token_acc": 0.28830422632234026 }, { "epoch": 3.3453532688361185, "grad_norm": 0.29139858484291564, "learning_rate": 0.00028886259999208794, "loss": 3.023240089416504, "step": 5707, "token_acc": 0.29806789264183325 }, { "epoch": 3.3459396071533276, "grad_norm": 0.24085872627378682, "learning_rate": 0.000288857101971861, "loss": 3.0622334480285645, "step": 5708, "token_acc": 0.2896504013024775 }, { "epoch": 3.3465259454705363, "grad_norm": 0.2905822487140089, "learning_rate": 0.00028885160264725866, "loss": 3.037686824798584, "step": 5709, "token_acc": 0.2949830679184588 }, { "epoch": 3.3471122837877454, "grad_norm": 0.26506598696880423, "learning_rate": 0.00028884610201833263, "loss": 3.0875120162963867, "step": 5710, "token_acc": 0.2876471402115839 }, { "epoch": 3.3476986221049545, "grad_norm": 0.3040369468891996, "learning_rate": 0.00028884060008513453, "loss": 3.019105911254883, "step": 5711, "token_acc": 0.29718421527761424 }, { "epoch": 3.3482849604221636, "grad_norm": 0.26828749695490156, "learning_rate": 0.00028883509684771613, "loss": 3.011282444000244, "step": 5712, "token_acc": 0.29847493980025525 }, { "epoch": 3.3488712987393727, "grad_norm": 0.29991367598088864, "learning_rate": 0.00028882959230612905, "loss": 3.1143527030944824, "step": 5713, "token_acc": 0.2864458105350714 }, { "epoch": 3.349457637056582, "grad_norm": 0.2825708864243587, "learning_rate": 0.000288824086460425, "loss": 3.019871234893799, "step": 5714, "token_acc": 0.29805985377101735 }, { "epoch": 3.3500439753737905, "grad_norm": 0.27661678682827556, "learning_rate": 0.0002888185793106558, "loss": 3.0867552757263184, "step": 5715, "token_acc": 0.28749610628991856 }, { "epoch": 3.3506303136909996, "grad_norm": 0.2458516586251347, "learning_rate": 0.00028881307085687306, "loss": 3.051924705505371, "step": 5716, "token_acc": 0.29273410276713563 }, { "epoch": 3.3512166520082087, "grad_norm": 0.26757666666863555, "learning_rate": 0.00028880756109912856, "loss": 3.0620033740997314, "step": 5717, "token_acc": 0.292191996932857 }, { "epoch": 3.351802990325418, "grad_norm": 0.25804743649465994, "learning_rate": 0.00028880205003747406, "loss": 3.0546751022338867, "step": 5718, "token_acc": 0.2909680318564486 }, { "epoch": 3.352389328642627, "grad_norm": 0.21231028787374762, "learning_rate": 0.0002887965376719614, "loss": 3.011420249938965, "step": 5719, "token_acc": 0.30028372774607925 }, { "epoch": 3.3529756669598356, "grad_norm": 0.2603236363899813, "learning_rate": 0.0002887910240026422, "loss": 3.0352206230163574, "step": 5720, "token_acc": 0.2959762013295847 }, { "epoch": 3.3535620052770447, "grad_norm": 0.24928058343906215, "learning_rate": 0.00028878550902956845, "loss": 3.0598530769348145, "step": 5721, "token_acc": 0.2924128075737954 }, { "epoch": 3.354148343594254, "grad_norm": 0.23846425006170233, "learning_rate": 0.00028877999275279183, "loss": 3.0860843658447266, "step": 5722, "token_acc": 0.2895748627632842 }, { "epoch": 3.354734681911463, "grad_norm": 0.2100157471983417, "learning_rate": 0.0002887744751723642, "loss": 3.0016491413116455, "step": 5723, "token_acc": 0.300344459748109 }, { "epoch": 3.355321020228672, "grad_norm": 0.24900957958125072, "learning_rate": 0.0002887689562883373, "loss": 3.077404260635376, "step": 5724, "token_acc": 0.2897764074928103 }, { "epoch": 3.355907358545881, "grad_norm": 0.23876661940971108, "learning_rate": 0.0002887634361007631, "loss": 3.0382790565490723, "step": 5725, "token_acc": 0.2933984553798448 }, { "epoch": 3.35649369686309, "grad_norm": 0.24471551857634855, "learning_rate": 0.00028875791460969343, "loss": 3.0530691146850586, "step": 5726, "token_acc": 0.29171793446982475 }, { "epoch": 3.357080035180299, "grad_norm": 0.2532109536154006, "learning_rate": 0.0002887523918151801, "loss": 3.0129566192626953, "step": 5727, "token_acc": 0.2981520080581624 }, { "epoch": 3.357666373497508, "grad_norm": 0.270450828809152, "learning_rate": 0.0002887468677172751, "loss": 3.0905673503875732, "step": 5728, "token_acc": 0.2877225700231717 }, { "epoch": 3.358252711814717, "grad_norm": 0.27102304697905344, "learning_rate": 0.00028874134231603014, "loss": 3.0597429275512695, "step": 5729, "token_acc": 0.2916221857522984 }, { "epoch": 3.3588390501319263, "grad_norm": 0.2587419296891728, "learning_rate": 0.00028873581561149726, "loss": 3.0456390380859375, "step": 5730, "token_acc": 0.29311443161317285 }, { "epoch": 3.359425388449135, "grad_norm": 0.24531870165269162, "learning_rate": 0.00028873028760372833, "loss": 3.0477406978607178, "step": 5731, "token_acc": 0.29254329900866255 }, { "epoch": 3.360011726766344, "grad_norm": 0.27333245817004337, "learning_rate": 0.0002887247582927753, "loss": 3.0554895401000977, "step": 5732, "token_acc": 0.29308647797936094 }, { "epoch": 3.360598065083553, "grad_norm": 0.27259197362058346, "learning_rate": 0.00028871922767869014, "loss": 3.075974464416504, "step": 5733, "token_acc": 0.2915246753246753 }, { "epoch": 3.3611844034007623, "grad_norm": 0.22623268570031646, "learning_rate": 0.0002887136957615247, "loss": 3.0111265182495117, "step": 5734, "token_acc": 0.2984246800969349 }, { "epoch": 3.3617707417179714, "grad_norm": 0.26712271876615806, "learning_rate": 0.000288708162541331, "loss": 3.0088930130004883, "step": 5735, "token_acc": 0.29995981784087866 }, { "epoch": 3.3623570800351805, "grad_norm": 0.2404744592843164, "learning_rate": 0.000288702628018161, "loss": 3.05560302734375, "step": 5736, "token_acc": 0.29184891484722414 }, { "epoch": 3.362943418352389, "grad_norm": 0.2577133893997576, "learning_rate": 0.00028869709219206684, "loss": 3.042890787124634, "step": 5737, "token_acc": 0.2938047493955981 }, { "epoch": 3.3635297566695983, "grad_norm": 0.2889791907655429, "learning_rate": 0.0002886915550631003, "loss": 3.0468735694885254, "step": 5738, "token_acc": 0.2925345878451241 }, { "epoch": 3.3641160949868074, "grad_norm": 0.2642595451123165, "learning_rate": 0.00028868601663131353, "loss": 3.048684597015381, "step": 5739, "token_acc": 0.2933533167565745 }, { "epoch": 3.3647024333040165, "grad_norm": 0.2859077329453965, "learning_rate": 0.0002886804768967585, "loss": 3.0520622730255127, "step": 5740, "token_acc": 0.2939728097066202 }, { "epoch": 3.3652887716212256, "grad_norm": 0.24416010302900212, "learning_rate": 0.00028867493585948723, "loss": 3.0231995582580566, "step": 5741, "token_acc": 0.29673291928781326 }, { "epoch": 3.3658751099384343, "grad_norm": 0.27897052200211175, "learning_rate": 0.0002886693935195518, "loss": 3.046410322189331, "step": 5742, "token_acc": 0.29265131404645267 }, { "epoch": 3.3664614482556434, "grad_norm": 0.26129614343303387, "learning_rate": 0.00028866384987700437, "loss": 3.0319457054138184, "step": 5743, "token_acc": 0.2967821000636837 }, { "epoch": 3.3670477865728525, "grad_norm": 0.2797400207957371, "learning_rate": 0.00028865830493189686, "loss": 3.0027589797973633, "step": 5744, "token_acc": 0.30050184541037145 }, { "epoch": 3.3676341248900616, "grad_norm": 0.25101595236652846, "learning_rate": 0.00028865275868428144, "loss": 3.0285730361938477, "step": 5745, "token_acc": 0.29406593406593406 }, { "epoch": 3.3682204632072708, "grad_norm": 0.28632711347437434, "learning_rate": 0.00028864721113421016, "loss": 3.045104503631592, "step": 5746, "token_acc": 0.2927444134991305 }, { "epoch": 3.36880680152448, "grad_norm": 0.2757253617868351, "learning_rate": 0.00028864166228173517, "loss": 3.0419559478759766, "step": 5747, "token_acc": 0.29466657421137415 }, { "epoch": 3.3693931398416885, "grad_norm": 0.24781005485638438, "learning_rate": 0.00028863611212690855, "loss": 3.0557100772857666, "step": 5748, "token_acc": 0.2933160716801667 }, { "epoch": 3.3699794781588976, "grad_norm": 0.2677493342617239, "learning_rate": 0.0002886305606697826, "loss": 3.023343563079834, "step": 5749, "token_acc": 0.29673538984596887 }, { "epoch": 3.3705658164761068, "grad_norm": 0.2442789107492446, "learning_rate": 0.0002886250079104092, "loss": 3.0109775066375732, "step": 5750, "token_acc": 0.29813723252720964 }, { "epoch": 3.371152154793316, "grad_norm": 0.2915176426549623, "learning_rate": 0.0002886194538488407, "loss": 3.0176544189453125, "step": 5751, "token_acc": 0.29819225906526564 }, { "epoch": 3.3717384931105245, "grad_norm": 0.2881594806516899, "learning_rate": 0.0002886138984851292, "loss": 3.042457342147827, "step": 5752, "token_acc": 0.2938888874495102 }, { "epoch": 3.3723248314277336, "grad_norm": 0.3055471866758318, "learning_rate": 0.00028860834181932695, "loss": 3.078770637512207, "step": 5753, "token_acc": 0.2892475832302683 }, { "epoch": 3.3729111697449428, "grad_norm": 0.28243846417569235, "learning_rate": 0.0002886027838514861, "loss": 3.024970531463623, "step": 5754, "token_acc": 0.2953969982334943 }, { "epoch": 3.373497508062152, "grad_norm": 0.2544545973054159, "learning_rate": 0.0002885972245816588, "loss": 3.0275511741638184, "step": 5755, "token_acc": 0.29666203251249157 }, { "epoch": 3.374083846379361, "grad_norm": 0.2779840945641556, "learning_rate": 0.00028859166400989746, "loss": 3.0503897666931152, "step": 5756, "token_acc": 0.2902596797248866 }, { "epoch": 3.37467018469657, "grad_norm": 0.2808407694469099, "learning_rate": 0.00028858610213625406, "loss": 3.035900592803955, "step": 5757, "token_acc": 0.29506116671721766 }, { "epoch": 3.3752565230137788, "grad_norm": 0.27280808235418263, "learning_rate": 0.00028858053896078104, "loss": 3.0691823959350586, "step": 5758, "token_acc": 0.2919291225766788 }, { "epoch": 3.375842861330988, "grad_norm": 0.26128314578785233, "learning_rate": 0.0002885749744835306, "loss": 3.0721797943115234, "step": 5759, "token_acc": 0.28831657646370146 }, { "epoch": 3.376429199648197, "grad_norm": 0.2658954375230314, "learning_rate": 0.00028856940870455503, "loss": 3.0480942726135254, "step": 5760, "token_acc": 0.2936971576666525 }, { "epoch": 3.377015537965406, "grad_norm": 0.2746358085127675, "learning_rate": 0.00028856384162390656, "loss": 3.059691905975342, "step": 5761, "token_acc": 0.2922623828647925 }, { "epoch": 3.377601876282615, "grad_norm": 0.2552751892859364, "learning_rate": 0.0002885582732416375, "loss": 3.0412750244140625, "step": 5762, "token_acc": 0.2935588271601625 }, { "epoch": 3.378188214599824, "grad_norm": 0.23261099126213788, "learning_rate": 0.0002885527035578002, "loss": 3.066521644592285, "step": 5763, "token_acc": 0.2908722120261701 }, { "epoch": 3.378774552917033, "grad_norm": 0.23794042019741388, "learning_rate": 0.0002885471325724469, "loss": 3.0494213104248047, "step": 5764, "token_acc": 0.2931613312538 }, { "epoch": 3.379360891234242, "grad_norm": 0.2531218723828242, "learning_rate": 0.0002885415602856301, "loss": 3.0331640243530273, "step": 5765, "token_acc": 0.29488001176767253 }, { "epoch": 3.379947229551451, "grad_norm": 0.25509266888777, "learning_rate": 0.0002885359866974019, "loss": 3.017084836959839, "step": 5766, "token_acc": 0.29875581027790876 }, { "epoch": 3.3805335678686603, "grad_norm": 0.26354033636075735, "learning_rate": 0.0002885304118078148, "loss": 3.048737049102783, "step": 5767, "token_acc": 0.2944639498766629 }, { "epoch": 3.3811199061858694, "grad_norm": 0.23052688969461935, "learning_rate": 0.0002885248356169212, "loss": 3.032196044921875, "step": 5768, "token_acc": 0.2961055408970976 }, { "epoch": 3.381706244503078, "grad_norm": 0.2706749118284473, "learning_rate": 0.00028851925812477345, "loss": 3.0332703590393066, "step": 5769, "token_acc": 0.295187743162889 }, { "epoch": 3.382292582820287, "grad_norm": 0.2569977525800442, "learning_rate": 0.0002885136793314239, "loss": 3.035550355911255, "step": 5770, "token_acc": 0.29464179032385174 }, { "epoch": 3.3828789211374963, "grad_norm": 0.2293144973750897, "learning_rate": 0.000288508099236925, "loss": 3.0439419746398926, "step": 5771, "token_acc": 0.2931473038119128 }, { "epoch": 3.3834652594547054, "grad_norm": 0.26031099769081184, "learning_rate": 0.0002885025178413291, "loss": 3.072713851928711, "step": 5772, "token_acc": 0.29015738460400176 }, { "epoch": 3.3840515977719146, "grad_norm": 0.20871474144763544, "learning_rate": 0.00028849693514468875, "loss": 3.019867420196533, "step": 5773, "token_acc": 0.29732814791338774 }, { "epoch": 3.3846379360891232, "grad_norm": 0.264087322815557, "learning_rate": 0.0002884913511470563, "loss": 3.019841432571411, "step": 5774, "token_acc": 0.29873463085055363 }, { "epoch": 3.3852242744063323, "grad_norm": 0.3077413436250943, "learning_rate": 0.0002884857658484842, "loss": 3.0371508598327637, "step": 5775, "token_acc": 0.2961688023592661 }, { "epoch": 3.3858106127235414, "grad_norm": 0.22072733136700523, "learning_rate": 0.00028848017924902494, "loss": 3.0144424438476562, "step": 5776, "token_acc": 0.2975036613644685 }, { "epoch": 3.3863969510407506, "grad_norm": 0.2794207359410877, "learning_rate": 0.000288474591348731, "loss": 3.0644044876098633, "step": 5777, "token_acc": 0.29108518086347723 }, { "epoch": 3.3869832893579597, "grad_norm": 0.27980478877498605, "learning_rate": 0.0002884690021476549, "loss": 3.0284857749938965, "step": 5778, "token_acc": 0.2966252220248668 }, { "epoch": 3.387569627675169, "grad_norm": 0.2719654973940896, "learning_rate": 0.00028846341164584906, "loss": 3.0000524520874023, "step": 5779, "token_acc": 0.30138855693391825 }, { "epoch": 3.3881559659923774, "grad_norm": 0.24644098573419446, "learning_rate": 0.0002884578198433661, "loss": 3.035294771194458, "step": 5780, "token_acc": 0.29602722470751064 }, { "epoch": 3.3887423043095866, "grad_norm": 0.2569235442861615, "learning_rate": 0.0002884522267402585, "loss": 3.0655293464660645, "step": 5781, "token_acc": 0.29038500739257544 }, { "epoch": 3.3893286426267957, "grad_norm": 0.2645668020700716, "learning_rate": 0.0002884466323365788, "loss": 3.0202529430389404, "step": 5782, "token_acc": 0.2963841213442824 }, { "epoch": 3.389914980944005, "grad_norm": 0.2596367354415436, "learning_rate": 0.0002884410366323795, "loss": 3.0303962230682373, "step": 5783, "token_acc": 0.2950342641295176 }, { "epoch": 3.390501319261214, "grad_norm": 0.22887668708480585, "learning_rate": 0.0002884354396277133, "loss": 3.0566201210021973, "step": 5784, "token_acc": 0.2918484970002909 }, { "epoch": 3.3910876575784226, "grad_norm": 0.2536883965852219, "learning_rate": 0.00028842984132263253, "loss": 3.064544916152954, "step": 5785, "token_acc": 0.29161752358867743 }, { "epoch": 3.3916739958956317, "grad_norm": 0.26638258046739627, "learning_rate": 0.00028842424171719006, "loss": 3.0846731662750244, "step": 5786, "token_acc": 0.28851901500649396 }, { "epoch": 3.392260334212841, "grad_norm": 0.27111280413427996, "learning_rate": 0.00028841864081143834, "loss": 3.056457996368408, "step": 5787, "token_acc": 0.29165368964408067 }, { "epoch": 3.39284667253005, "grad_norm": 0.2810631940203439, "learning_rate": 0.00028841303860543, "loss": 3.0368943214416504, "step": 5788, "token_acc": 0.29464607666379244 }, { "epoch": 3.393433010847259, "grad_norm": 0.2314594818423757, "learning_rate": 0.00028840743509921774, "loss": 3.055884838104248, "step": 5789, "token_acc": 0.29338258696344066 }, { "epoch": 3.394019349164468, "grad_norm": 0.27020331482132337, "learning_rate": 0.0002884018302928541, "loss": 3.0823497772216797, "step": 5790, "token_acc": 0.2886612739812863 }, { "epoch": 3.394605687481677, "grad_norm": 0.24976602056082572, "learning_rate": 0.00028839622418639174, "loss": 3.008296012878418, "step": 5791, "token_acc": 0.2977789663326202 }, { "epoch": 3.395192025798886, "grad_norm": 0.25841302598392085, "learning_rate": 0.0002883906167798833, "loss": 3.048793315887451, "step": 5792, "token_acc": 0.29384109198060226 }, { "epoch": 3.395778364116095, "grad_norm": 0.24988395905847802, "learning_rate": 0.0002883850080733816, "loss": 3.0127930641174316, "step": 5793, "token_acc": 0.3006722390566048 }, { "epoch": 3.396364702433304, "grad_norm": 0.2610463041770924, "learning_rate": 0.0002883793980669392, "loss": 3.037114381790161, "step": 5794, "token_acc": 0.29496723965361976 }, { "epoch": 3.3969510407505132, "grad_norm": 0.24260590449045194, "learning_rate": 0.00028837378676060873, "loss": 3.0361244678497314, "step": 5795, "token_acc": 0.29559348070832586 }, { "epoch": 3.397537379067722, "grad_norm": 0.25255175236827604, "learning_rate": 0.0002883681741544431, "loss": 3.054929256439209, "step": 5796, "token_acc": 0.29293808294762025 }, { "epoch": 3.398123717384931, "grad_norm": 0.26753736974825515, "learning_rate": 0.00028836256024849486, "loss": 3.046617031097412, "step": 5797, "token_acc": 0.2936092322522298 }, { "epoch": 3.39871005570214, "grad_norm": 0.25830671007931894, "learning_rate": 0.00028835694504281687, "loss": 3.080627202987671, "step": 5798, "token_acc": 0.2891387232875181 }, { "epoch": 3.3992963940193492, "grad_norm": 0.3094321489359569, "learning_rate": 0.0002883513285374618, "loss": 3.084319829940796, "step": 5799, "token_acc": 0.28655775859605737 }, { "epoch": 3.3998827323365584, "grad_norm": 0.2505391689398477, "learning_rate": 0.00028834571073248243, "loss": 3.019322156906128, "step": 5800, "token_acc": 0.2968487046547604 }, { "epoch": 3.4004690706537675, "grad_norm": 0.2633340607901542, "learning_rate": 0.00028834009162793153, "loss": 3.043074131011963, "step": 5801, "token_acc": 0.29328860318233785 }, { "epoch": 3.401055408970976, "grad_norm": 0.25705877239108366, "learning_rate": 0.00028833447122386186, "loss": 3.0755510330200195, "step": 5802, "token_acc": 0.2908960188352782 }, { "epoch": 3.4016417472881852, "grad_norm": 0.24310445465531763, "learning_rate": 0.0002883288495203263, "loss": 3.0069336891174316, "step": 5803, "token_acc": 0.30006954374614186 }, { "epoch": 3.4022280856053944, "grad_norm": 0.26036085263863024, "learning_rate": 0.00028832322651737755, "loss": 3.0563430786132812, "step": 5804, "token_acc": 0.2922666102902129 }, { "epoch": 3.4028144239226035, "grad_norm": 0.2539218647411148, "learning_rate": 0.00028831760221506846, "loss": 3.0674233436584473, "step": 5805, "token_acc": 0.2903041924976358 }, { "epoch": 3.403400762239812, "grad_norm": 0.2559192173872823, "learning_rate": 0.0002883119766134519, "loss": 3.011321544647217, "step": 5806, "token_acc": 0.29809091819210953 }, { "epoch": 3.4039871005570213, "grad_norm": 0.2485167035017231, "learning_rate": 0.0002883063497125807, "loss": 3.068293571472168, "step": 5807, "token_acc": 0.2907193594214129 }, { "epoch": 3.4045734388742304, "grad_norm": 0.273940327310567, "learning_rate": 0.00028830072151250774, "loss": 3.067835807800293, "step": 5808, "token_acc": 0.29086284659598055 }, { "epoch": 3.4051597771914395, "grad_norm": 0.2849778936192566, "learning_rate": 0.00028829509201328587, "loss": 3.0295190811157227, "step": 5809, "token_acc": 0.29622024035283534 }, { "epoch": 3.4057461155086486, "grad_norm": 0.23843773834340443, "learning_rate": 0.00028828946121496797, "loss": 3.016824722290039, "step": 5810, "token_acc": 0.29740385639991374 }, { "epoch": 3.4063324538258577, "grad_norm": 0.2613213078783441, "learning_rate": 0.00028828382911760684, "loss": 3.027890205383301, "step": 5811, "token_acc": 0.2944947905431921 }, { "epoch": 3.4069187921430664, "grad_norm": 0.242892891166697, "learning_rate": 0.00028827819572125555, "loss": 3.0233802795410156, "step": 5812, "token_acc": 0.29790433378707776 }, { "epoch": 3.4075051304602755, "grad_norm": 0.25230134000758875, "learning_rate": 0.00028827256102596696, "loss": 3.045851230621338, "step": 5813, "token_acc": 0.2941698609104528 }, { "epoch": 3.4080914687774846, "grad_norm": 0.22914362264866167, "learning_rate": 0.0002882669250317939, "loss": 3.0619325637817383, "step": 5814, "token_acc": 0.2926485768470581 }, { "epoch": 3.4086778070946937, "grad_norm": 0.22218817650477204, "learning_rate": 0.00028826128773878944, "loss": 3.0591695308685303, "step": 5815, "token_acc": 0.2914596677602458 }, { "epoch": 3.409264145411903, "grad_norm": 0.26955409295406296, "learning_rate": 0.00028825564914700656, "loss": 3.0590620040893555, "step": 5816, "token_acc": 0.2925702816590225 }, { "epoch": 3.4098504837291115, "grad_norm": 0.2487046297552632, "learning_rate": 0.0002882500092564981, "loss": 3.032207489013672, "step": 5817, "token_acc": 0.2959240394020214 }, { "epoch": 3.4104368220463206, "grad_norm": 0.23897004823638518, "learning_rate": 0.0002882443680673171, "loss": 3.050837755203247, "step": 5818, "token_acc": 0.29104751470541307 }, { "epoch": 3.4110231603635297, "grad_norm": 0.23104599012530924, "learning_rate": 0.0002882387255795165, "loss": 3.066831111907959, "step": 5819, "token_acc": 0.292207307382579 }, { "epoch": 3.411609498680739, "grad_norm": 0.24669436839663125, "learning_rate": 0.0002882330817931494, "loss": 3.068368434906006, "step": 5820, "token_acc": 0.2907287275140918 }, { "epoch": 3.412195836997948, "grad_norm": 0.26568606076028456, "learning_rate": 0.0002882274367082688, "loss": 3.0582644939422607, "step": 5821, "token_acc": 0.2910883895851363 }, { "epoch": 3.412782175315157, "grad_norm": 0.23184923362725787, "learning_rate": 0.00028822179032492764, "loss": 3.048902988433838, "step": 5822, "token_acc": 0.2922377836706955 }, { "epoch": 3.4133685136323657, "grad_norm": 0.28367765940184414, "learning_rate": 0.00028821614264317905, "loss": 3.071821689605713, "step": 5823, "token_acc": 0.2891183720670731 }, { "epoch": 3.413954851949575, "grad_norm": 0.25711330196030424, "learning_rate": 0.00028821049366307603, "loss": 3.051515579223633, "step": 5824, "token_acc": 0.29348250773503415 }, { "epoch": 3.414541190266784, "grad_norm": 0.26874177662288773, "learning_rate": 0.0002882048433846717, "loss": 2.99855375289917, "step": 5825, "token_acc": 0.29907275709033815 }, { "epoch": 3.415127528583993, "grad_norm": 0.2642349257327431, "learning_rate": 0.00028819919180801906, "loss": 3.0628440380096436, "step": 5826, "token_acc": 0.2898326705586295 }, { "epoch": 3.415713866901202, "grad_norm": 0.24727393470939749, "learning_rate": 0.00028819353893317127, "loss": 3.074979305267334, "step": 5827, "token_acc": 0.2889181754794615 }, { "epoch": 3.416300205218411, "grad_norm": 0.24692703652006645, "learning_rate": 0.0002881878847601814, "loss": 3.1171715259552, "step": 5828, "token_acc": 0.2854893244994049 }, { "epoch": 3.41688654353562, "grad_norm": 0.23445281015238778, "learning_rate": 0.0002881822292891025, "loss": 3.0061609745025635, "step": 5829, "token_acc": 0.29941642774615945 }, { "epoch": 3.417472881852829, "grad_norm": 0.27225845059984427, "learning_rate": 0.0002881765725199878, "loss": 3.057243824005127, "step": 5830, "token_acc": 0.2914988079696506 }, { "epoch": 3.418059220170038, "grad_norm": 0.2790428769658944, "learning_rate": 0.0002881709144528904, "loss": 3.092606782913208, "step": 5831, "token_acc": 0.28773975893436954 }, { "epoch": 3.4186455584872473, "grad_norm": 0.23227715263586404, "learning_rate": 0.00028816525508786343, "loss": 3.0575666427612305, "step": 5832, "token_acc": 0.2922832845506784 }, { "epoch": 3.4192318968044564, "grad_norm": 0.23472979976893169, "learning_rate": 0.00028815959442496006, "loss": 3.0768470764160156, "step": 5833, "token_acc": 0.29000573013595127 }, { "epoch": 3.419818235121665, "grad_norm": 0.23170259503761328, "learning_rate": 0.0002881539324642335, "loss": 3.1084694862365723, "step": 5834, "token_acc": 0.287280346869193 }, { "epoch": 3.420404573438874, "grad_norm": 0.26051720746459, "learning_rate": 0.0002881482692057369, "loss": 3.0714192390441895, "step": 5835, "token_acc": 0.29107842412595164 }, { "epoch": 3.4209909117560833, "grad_norm": 0.2896442498795606, "learning_rate": 0.0002881426046495235, "loss": 3.0418190956115723, "step": 5836, "token_acc": 0.2952998303942752 }, { "epoch": 3.4215772500732924, "grad_norm": 0.24052865986578006, "learning_rate": 0.00028813693879564645, "loss": 3.0656003952026367, "step": 5837, "token_acc": 0.29282590300645334 }, { "epoch": 3.4221635883905015, "grad_norm": 0.2573545566609396, "learning_rate": 0.000288131271644159, "loss": 3.0443947315216064, "step": 5838, "token_acc": 0.2955301688788596 }, { "epoch": 3.42274992670771, "grad_norm": 0.2575837960632782, "learning_rate": 0.0002881256031951144, "loss": 3.0550546646118164, "step": 5839, "token_acc": 0.29212435957511257 }, { "epoch": 3.4233362650249193, "grad_norm": 0.23076685553126816, "learning_rate": 0.0002881199334485659, "loss": 3.037919521331787, "step": 5840, "token_acc": 0.29452560999121696 }, { "epoch": 3.4239226033421284, "grad_norm": 0.22874785367810943, "learning_rate": 0.0002881142624045667, "loss": 3.039004325866699, "step": 5841, "token_acc": 0.294684937966596 }, { "epoch": 3.4245089416593375, "grad_norm": 0.2433164491185596, "learning_rate": 0.0002881085900631701, "loss": 3.052949905395508, "step": 5842, "token_acc": 0.293342659413427 }, { "epoch": 3.4250952799765466, "grad_norm": 0.24340437545935076, "learning_rate": 0.00028810291642442944, "loss": 3.073596954345703, "step": 5843, "token_acc": 0.28954056903681236 }, { "epoch": 3.4256816182937557, "grad_norm": 0.26978918489968845, "learning_rate": 0.000288097241488398, "loss": 3.0020980834960938, "step": 5844, "token_acc": 0.29992178816943155 }, { "epoch": 3.4262679566109644, "grad_norm": 0.2399314730757761, "learning_rate": 0.000288091565255129, "loss": 3.044473886489868, "step": 5845, "token_acc": 0.29330799676857183 }, { "epoch": 3.4268542949281735, "grad_norm": 0.2454815945638472, "learning_rate": 0.0002880858877246759, "loss": 3.0374042987823486, "step": 5846, "token_acc": 0.29471217379653153 }, { "epoch": 3.4274406332453826, "grad_norm": 0.26722477519949106, "learning_rate": 0.0002880802088970919, "loss": 3.073298692703247, "step": 5847, "token_acc": 0.29004139184366723 }, { "epoch": 3.4280269715625917, "grad_norm": 0.2691588911379231, "learning_rate": 0.00028807452877243044, "loss": 3.027499198913574, "step": 5848, "token_acc": 0.29598676676421937 }, { "epoch": 3.4286133098798004, "grad_norm": 0.28206833537797543, "learning_rate": 0.0002880688473507448, "loss": 3.055067777633667, "step": 5849, "token_acc": 0.2918902113139286 }, { "epoch": 3.4291996481970095, "grad_norm": 0.2694115146515316, "learning_rate": 0.0002880631646320884, "loss": 3.0189852714538574, "step": 5850, "token_acc": 0.298081181012304 }, { "epoch": 3.4297859865142186, "grad_norm": 0.23840540556512652, "learning_rate": 0.0002880574806165146, "loss": 3.0310802459716797, "step": 5851, "token_acc": 0.2966639088016614 }, { "epoch": 3.4303723248314277, "grad_norm": 0.2704564189955994, "learning_rate": 0.0002880517953040768, "loss": 3.0325770378112793, "step": 5852, "token_acc": 0.2945689368217828 }, { "epoch": 3.430958663148637, "grad_norm": 0.2749233441313519, "learning_rate": 0.00028804610869482845, "loss": 3.0563511848449707, "step": 5853, "token_acc": 0.2927565179153267 }, { "epoch": 3.431545001465846, "grad_norm": 0.2763503785829167, "learning_rate": 0.00028804042078882293, "loss": 2.9992480278015137, "step": 5854, "token_acc": 0.30080083941549524 }, { "epoch": 3.432131339783055, "grad_norm": 0.26868046563336107, "learning_rate": 0.0002880347315861137, "loss": 3.0626468658447266, "step": 5855, "token_acc": 0.28909923971602636 }, { "epoch": 3.4327176781002637, "grad_norm": 0.2791451853659684, "learning_rate": 0.0002880290410867541, "loss": 3.02744722366333, "step": 5856, "token_acc": 0.29619501784106445 }, { "epoch": 3.433304016417473, "grad_norm": 0.2500946776805958, "learning_rate": 0.00028802334929079766, "loss": 3.028989791870117, "step": 5857, "token_acc": 0.29604928295193966 }, { "epoch": 3.433890354734682, "grad_norm": 0.2559625729561729, "learning_rate": 0.00028801765619829785, "loss": 3.040222644805908, "step": 5858, "token_acc": 0.2959968505351712 }, { "epoch": 3.434476693051891, "grad_norm": 0.2512195670071527, "learning_rate": 0.00028801196180930816, "loss": 3.0709176063537598, "step": 5859, "token_acc": 0.2913766602080469 }, { "epoch": 3.4350630313690997, "grad_norm": 0.2748623426259876, "learning_rate": 0.0002880062661238821, "loss": 3.0267386436462402, "step": 5860, "token_acc": 0.29692791800823465 }, { "epoch": 3.435649369686309, "grad_norm": 0.2499096517692109, "learning_rate": 0.00028800056914207305, "loss": 3.0328783988952637, "step": 5861, "token_acc": 0.2947235063897148 }, { "epoch": 3.436235708003518, "grad_norm": 0.2592893364459033, "learning_rate": 0.00028799487086393464, "loss": 3.0702638626098633, "step": 5862, "token_acc": 0.29017857142857145 }, { "epoch": 3.436822046320727, "grad_norm": 0.24648233462216657, "learning_rate": 0.0002879891712895204, "loss": 3.018019676208496, "step": 5863, "token_acc": 0.2975871521071483 }, { "epoch": 3.437408384637936, "grad_norm": 0.2594739915318762, "learning_rate": 0.0002879834704188838, "loss": 3.0093398094177246, "step": 5864, "token_acc": 0.2979458510391926 }, { "epoch": 3.4379947229551453, "grad_norm": 0.2596504109653254, "learning_rate": 0.00028797776825207846, "loss": 3.0316262245178223, "step": 5865, "token_acc": 0.29510336757248734 }, { "epoch": 3.438581061272354, "grad_norm": 0.24154017143289508, "learning_rate": 0.0002879720647891579, "loss": 3.0468475818634033, "step": 5866, "token_acc": 0.29433976263093325 }, { "epoch": 3.439167399589563, "grad_norm": 0.2669644616070634, "learning_rate": 0.0002879663600301757, "loss": 3.0520689487457275, "step": 5867, "token_acc": 0.29263616234336537 }, { "epoch": 3.439753737906772, "grad_norm": 0.2691172509210264, "learning_rate": 0.0002879606539751855, "loss": 3.044283151626587, "step": 5868, "token_acc": 0.295510801225006 }, { "epoch": 3.4403400762239813, "grad_norm": 0.27784847936437657, "learning_rate": 0.00028795494662424084, "loss": 3.035041570663452, "step": 5869, "token_acc": 0.2961496653736306 }, { "epoch": 3.4409264145411904, "grad_norm": 0.30885190304983895, "learning_rate": 0.00028794923797739535, "loss": 3.063432216644287, "step": 5870, "token_acc": 0.2912897316041478 }, { "epoch": 3.441512752858399, "grad_norm": 0.32053917559752154, "learning_rate": 0.00028794352803470264, "loss": 3.0718631744384766, "step": 5871, "token_acc": 0.2894765630697742 }, { "epoch": 3.442099091175608, "grad_norm": 0.23420626035982176, "learning_rate": 0.0002879378167962164, "loss": 3.0366263389587402, "step": 5872, "token_acc": 0.2949868498181799 }, { "epoch": 3.4426854294928173, "grad_norm": 0.2615196442401533, "learning_rate": 0.00028793210426199023, "loss": 3.0659379959106445, "step": 5873, "token_acc": 0.2898499918900394 }, { "epoch": 3.4432717678100264, "grad_norm": 0.27826540370892555, "learning_rate": 0.00028792639043207776, "loss": 3.0123090744018555, "step": 5874, "token_acc": 0.29779938587512794 }, { "epoch": 3.4438581061272355, "grad_norm": 0.24310037189950007, "learning_rate": 0.00028792067530653275, "loss": 3.045793294906616, "step": 5875, "token_acc": 0.2932456546340315 }, { "epoch": 3.4444444444444446, "grad_norm": 0.25718413845866805, "learning_rate": 0.00028791495888540885, "loss": 3.011349678039551, "step": 5876, "token_acc": 0.29804031205900366 }, { "epoch": 3.4450307827616533, "grad_norm": 0.25408892969096114, "learning_rate": 0.00028790924116875975, "loss": 3.032029628753662, "step": 5877, "token_acc": 0.29645307178928915 }, { "epoch": 3.4456171210788624, "grad_norm": 0.27978118498543253, "learning_rate": 0.0002879035221566392, "loss": 3.0299038887023926, "step": 5878, "token_acc": 0.2972638233922371 }, { "epoch": 3.4462034593960715, "grad_norm": 0.2557515299841084, "learning_rate": 0.00028789780184910086, "loss": 3.052673578262329, "step": 5879, "token_acc": 0.29290658617997367 }, { "epoch": 3.4467897977132806, "grad_norm": 0.26183557995586265, "learning_rate": 0.00028789208024619845, "loss": 3.0412583351135254, "step": 5880, "token_acc": 0.2939765989285149 }, { "epoch": 3.4473761360304898, "grad_norm": 0.26491239250678933, "learning_rate": 0.00028788635734798584, "loss": 3.0487637519836426, "step": 5881, "token_acc": 0.2930065519327558 }, { "epoch": 3.4479624743476984, "grad_norm": 0.2678529327624325, "learning_rate": 0.00028788063315451657, "loss": 2.993980646133423, "step": 5882, "token_acc": 0.29996914135829456 }, { "epoch": 3.4485488126649075, "grad_norm": 0.2692288495555472, "learning_rate": 0.0002878749076658447, "loss": 3.031723737716675, "step": 5883, "token_acc": 0.295772705868886 }, { "epoch": 3.4491351509821166, "grad_norm": 0.2662074579299492, "learning_rate": 0.0002878691808820238, "loss": 3.0756757259368896, "step": 5884, "token_acc": 0.28954685137310376 }, { "epoch": 3.4497214892993258, "grad_norm": 0.2426766661685805, "learning_rate": 0.0002878634528031077, "loss": 3.0857162475585938, "step": 5885, "token_acc": 0.2879945220970616 }, { "epoch": 3.450307827616535, "grad_norm": 0.2884759063861678, "learning_rate": 0.0002878577234291503, "loss": 3.028648853302002, "step": 5886, "token_acc": 0.29562079986574824 }, { "epoch": 3.450894165933744, "grad_norm": 0.24191478936049615, "learning_rate": 0.0002878519927602053, "loss": 3.031815528869629, "step": 5887, "token_acc": 0.29497135212168807 }, { "epoch": 3.4514805042509527, "grad_norm": 0.25276509775927886, "learning_rate": 0.00028784626079632656, "loss": 3.034696102142334, "step": 5888, "token_acc": 0.2964767586704255 }, { "epoch": 3.4520668425681618, "grad_norm": 0.23114674582758835, "learning_rate": 0.000287840527537568, "loss": 3.085292339324951, "step": 5889, "token_acc": 0.28957636342728166 }, { "epoch": 3.452653180885371, "grad_norm": 0.2580610485049468, "learning_rate": 0.00028783479298398343, "loss": 3.0597164630889893, "step": 5890, "token_acc": 0.29291268337230847 }, { "epoch": 3.45323951920258, "grad_norm": 0.25223113378033046, "learning_rate": 0.00028782905713562666, "loss": 3.0435709953308105, "step": 5891, "token_acc": 0.2927430767236124 }, { "epoch": 3.453825857519789, "grad_norm": 0.24817996634538592, "learning_rate": 0.0002878233199925517, "loss": 3.077256679534912, "step": 5892, "token_acc": 0.2898922789488427 }, { "epoch": 3.4544121958369978, "grad_norm": 0.23887029876657756, "learning_rate": 0.00028781758155481234, "loss": 3.069082260131836, "step": 5893, "token_acc": 0.29109115481551445 }, { "epoch": 3.454998534154207, "grad_norm": 0.23187766522503486, "learning_rate": 0.00028781184182246245, "loss": 3.0224597454071045, "step": 5894, "token_acc": 0.296214113800002 }, { "epoch": 3.455584872471416, "grad_norm": 0.23618136789972474, "learning_rate": 0.00028780610079555613, "loss": 3.049370765686035, "step": 5895, "token_acc": 0.2944413184149588 }, { "epoch": 3.456171210788625, "grad_norm": 0.2623386269801366, "learning_rate": 0.00028780035847414707, "loss": 3.0724129676818848, "step": 5896, "token_acc": 0.29136701014181776 }, { "epoch": 3.456757549105834, "grad_norm": 0.268668723929693, "learning_rate": 0.0002877946148582894, "loss": 3.0217061042785645, "step": 5897, "token_acc": 0.29571284072930987 }, { "epoch": 3.4573438874230433, "grad_norm": 0.25448000215348926, "learning_rate": 0.000287788869948037, "loss": 3.0349197387695312, "step": 5898, "token_acc": 0.29477731755690173 }, { "epoch": 3.457930225740252, "grad_norm": 0.2477400223229597, "learning_rate": 0.00028778312374344383, "loss": 3.068495750427246, "step": 5899, "token_acc": 0.28919265603337696 }, { "epoch": 3.458516564057461, "grad_norm": 0.2507772145370304, "learning_rate": 0.0002877773762445639, "loss": 3.043917655944824, "step": 5900, "token_acc": 0.292791815546593 }, { "epoch": 3.45910290237467, "grad_norm": 0.2707494778436932, "learning_rate": 0.00028777162745145113, "loss": 3.101447820663452, "step": 5901, "token_acc": 0.28565676254472244 }, { "epoch": 3.4596892406918793, "grad_norm": 0.24496554003226576, "learning_rate": 0.00028776587736415956, "loss": 3.0539588928222656, "step": 5902, "token_acc": 0.29305885472185095 }, { "epoch": 3.460275579009088, "grad_norm": 0.27185883693454654, "learning_rate": 0.0002877601259827433, "loss": 3.05627179145813, "step": 5903, "token_acc": 0.29261279406201757 }, { "epoch": 3.460861917326297, "grad_norm": 0.26695093122312963, "learning_rate": 0.0002877543733072562, "loss": 3.0789635181427, "step": 5904, "token_acc": 0.288932958324222 }, { "epoch": 3.4614482556435062, "grad_norm": 0.2316119571777939, "learning_rate": 0.00028774861933775247, "loss": 3.05426025390625, "step": 5905, "token_acc": 0.29273637090160726 }, { "epoch": 3.4620345939607153, "grad_norm": 0.2595742366141812, "learning_rate": 0.000287742864074286, "loss": 3.063587188720703, "step": 5906, "token_acc": 0.292629691899447 }, { "epoch": 3.4626209322779244, "grad_norm": 0.25404864645851666, "learning_rate": 0.000287737107516911, "loss": 3.121598482131958, "step": 5907, "token_acc": 0.2835095330815572 }, { "epoch": 3.4632072705951336, "grad_norm": 0.2458007753746523, "learning_rate": 0.0002877313496656814, "loss": 3.0921287536621094, "step": 5908, "token_acc": 0.28712553335421565 }, { "epoch": 3.4637936089123427, "grad_norm": 0.2609605527895886, "learning_rate": 0.00028772559052065143, "loss": 3.0306708812713623, "step": 5909, "token_acc": 0.2935485128445016 }, { "epoch": 3.4643799472295513, "grad_norm": 0.23186701590313555, "learning_rate": 0.0002877198300818751, "loss": 3.020260810852051, "step": 5910, "token_acc": 0.29718920467120596 }, { "epoch": 3.4649662855467604, "grad_norm": 0.24107468527183554, "learning_rate": 0.00028771406834940654, "loss": 3.0536727905273438, "step": 5911, "token_acc": 0.2923175422936326 }, { "epoch": 3.4655526238639696, "grad_norm": 0.2414627106913437, "learning_rate": 0.0002877083053232999, "loss": 3.036865711212158, "step": 5912, "token_acc": 0.29618454114363907 }, { "epoch": 3.4661389621811787, "grad_norm": 0.2647117429720385, "learning_rate": 0.00028770254100360935, "loss": 3.0682530403137207, "step": 5913, "token_acc": 0.289561285434483 }, { "epoch": 3.4667253004983873, "grad_norm": 0.2646567119144457, "learning_rate": 0.0002876967753903889, "loss": 3.0637874603271484, "step": 5914, "token_acc": 0.29164232362318804 }, { "epoch": 3.4673116388155965, "grad_norm": 0.27277655166265846, "learning_rate": 0.00028769100848369283, "loss": 3.031582832336426, "step": 5915, "token_acc": 0.29534007282727714 }, { "epoch": 3.4678979771328056, "grad_norm": 0.26319274897690587, "learning_rate": 0.00028768524028357524, "loss": 3.096741199493408, "step": 5916, "token_acc": 0.2869297964354701 }, { "epoch": 3.4684843154500147, "grad_norm": 0.2648562447024931, "learning_rate": 0.0002876794707900904, "loss": 3.0464441776275635, "step": 5917, "token_acc": 0.2949394284530959 }, { "epoch": 3.469070653767224, "grad_norm": 0.27139623773464505, "learning_rate": 0.0002876737000032924, "loss": 3.0862877368927, "step": 5918, "token_acc": 0.28685045524423763 }, { "epoch": 3.469656992084433, "grad_norm": 0.25503525257966664, "learning_rate": 0.0002876679279232355, "loss": 3.0170810222625732, "step": 5919, "token_acc": 0.29789266507039075 }, { "epoch": 3.4702433304016416, "grad_norm": 0.2535248699632488, "learning_rate": 0.000287662154549974, "loss": 3.0863428115844727, "step": 5920, "token_acc": 0.2878763462519808 }, { "epoch": 3.4708296687188507, "grad_norm": 0.28622616738001055, "learning_rate": 0.000287656379883562, "loss": 3.088747978210449, "step": 5921, "token_acc": 0.28750070019979035 }, { "epoch": 3.47141600703606, "grad_norm": 0.29018433652843806, "learning_rate": 0.00028765060392405386, "loss": 3.0412755012512207, "step": 5922, "token_acc": 0.2930465934408518 }, { "epoch": 3.472002345353269, "grad_norm": 0.32979870973476266, "learning_rate": 0.00028764482667150375, "loss": 3.0308361053466797, "step": 5923, "token_acc": 0.29535160131364846 }, { "epoch": 3.472588683670478, "grad_norm": 0.2832147559924382, "learning_rate": 0.00028763904812596596, "loss": 3.0753393173217773, "step": 5924, "token_acc": 0.28956547650347936 }, { "epoch": 3.4731750219876867, "grad_norm": 0.2401684571670994, "learning_rate": 0.0002876332682874948, "loss": 3.0678439140319824, "step": 5925, "token_acc": 0.29052488107044555 }, { "epoch": 3.473761360304896, "grad_norm": 0.2997968624275349, "learning_rate": 0.00028762748715614457, "loss": 3.070957899093628, "step": 5926, "token_acc": 0.2897340367671054 }, { "epoch": 3.474347698622105, "grad_norm": 0.27538808915267676, "learning_rate": 0.0002876217047319695, "loss": 3.069878339767456, "step": 5927, "token_acc": 0.2901674851833927 }, { "epoch": 3.474934036939314, "grad_norm": 0.2471993620422374, "learning_rate": 0.000287615921015024, "loss": 3.0317611694335938, "step": 5928, "token_acc": 0.29653311464134074 }, { "epoch": 3.475520375256523, "grad_norm": 0.28196410551914386, "learning_rate": 0.00028761013600536235, "loss": 3.052868366241455, "step": 5929, "token_acc": 0.2924947789431502 }, { "epoch": 3.4761067135737322, "grad_norm": 0.2703426575137317, "learning_rate": 0.0002876043497030389, "loss": 3.0276036262512207, "step": 5930, "token_acc": 0.2970279007851341 }, { "epoch": 3.476693051890941, "grad_norm": 0.22786254412127804, "learning_rate": 0.000287598562108108, "loss": 3.0489706993103027, "step": 5931, "token_acc": 0.29315776546097244 }, { "epoch": 3.47727939020815, "grad_norm": 0.27985972476827925, "learning_rate": 0.00028759277322062406, "loss": 3.054405689239502, "step": 5932, "token_acc": 0.2930033433695034 }, { "epoch": 3.477865728525359, "grad_norm": 0.2956338028463759, "learning_rate": 0.0002875869830406414, "loss": 3.0708632469177246, "step": 5933, "token_acc": 0.2894749579044169 }, { "epoch": 3.4784520668425682, "grad_norm": 0.23804576792599316, "learning_rate": 0.00028758119156821444, "loss": 3.0508337020874023, "step": 5934, "token_acc": 0.29379493148152486 }, { "epoch": 3.4790384051597774, "grad_norm": 0.2628015264478636, "learning_rate": 0.00028757539880339757, "loss": 3.0871341228485107, "step": 5935, "token_acc": 0.2888905918716632 }, { "epoch": 3.479624743476986, "grad_norm": 0.25222154213478803, "learning_rate": 0.0002875696047462452, "loss": 3.0031325817108154, "step": 5936, "token_acc": 0.2994548953098176 }, { "epoch": 3.480211081794195, "grad_norm": 0.26734110740823697, "learning_rate": 0.00028756380939681185, "loss": 3.0571131706237793, "step": 5937, "token_acc": 0.291545955936047 }, { "epoch": 3.4807974201114043, "grad_norm": 0.2609232876097213, "learning_rate": 0.0002875580127551518, "loss": 3.032316207885742, "step": 5938, "token_acc": 0.29567498386977353 }, { "epoch": 3.4813837584286134, "grad_norm": 0.25265373833681276, "learning_rate": 0.00028755221482131964, "loss": 3.020779609680176, "step": 5939, "token_acc": 0.2980172204063931 }, { "epoch": 3.4819700967458225, "grad_norm": 0.2565409135030726, "learning_rate": 0.0002875464155953697, "loss": 3.0442113876342773, "step": 5940, "token_acc": 0.29485798148449593 }, { "epoch": 3.4825564350630316, "grad_norm": 0.22982556274379223, "learning_rate": 0.0002875406150773566, "loss": 3.0631606578826904, "step": 5941, "token_acc": 0.29148491565958734 }, { "epoch": 3.4831427733802403, "grad_norm": 0.2652567222373783, "learning_rate": 0.0002875348132673347, "loss": 3.082474708557129, "step": 5942, "token_acc": 0.28916430688179123 }, { "epoch": 3.4837291116974494, "grad_norm": 0.2392306436762464, "learning_rate": 0.0002875290101653586, "loss": 3.044640064239502, "step": 5943, "token_acc": 0.29358879205466243 }, { "epoch": 3.4843154500146585, "grad_norm": 0.2324049238880952, "learning_rate": 0.00028752320577148273, "loss": 3.027885913848877, "step": 5944, "token_acc": 0.2960120228307005 }, { "epoch": 3.4849017883318676, "grad_norm": 0.24977578022755825, "learning_rate": 0.0002875174000857617, "loss": 3.0555901527404785, "step": 5945, "token_acc": 0.2918689835036682 }, { "epoch": 3.4854881266490767, "grad_norm": 0.2500482244334264, "learning_rate": 0.00028751159310825, "loss": 3.0905888080596924, "step": 5946, "token_acc": 0.2882377170377255 }, { "epoch": 3.4860744649662854, "grad_norm": 0.24127351407901693, "learning_rate": 0.0002875057848390022, "loss": 3.0553536415100098, "step": 5947, "token_acc": 0.29299905901330825 }, { "epoch": 3.4866608032834945, "grad_norm": 0.23176934339203645, "learning_rate": 0.0002874999752780728, "loss": 3.0293819904327393, "step": 5948, "token_acc": 0.29485588481169744 }, { "epoch": 3.4872471416007036, "grad_norm": 0.23665604447584632, "learning_rate": 0.00028749416442551645, "loss": 3.0499768257141113, "step": 5949, "token_acc": 0.29260113022226675 }, { "epoch": 3.4878334799179127, "grad_norm": 0.24881035131384097, "learning_rate": 0.0002874883522813877, "loss": 2.9956798553466797, "step": 5950, "token_acc": 0.3005015218685032 }, { "epoch": 3.488419818235122, "grad_norm": 0.23880786715724822, "learning_rate": 0.0002874825388457411, "loss": 3.072726249694824, "step": 5951, "token_acc": 0.28940583477377385 }, { "epoch": 3.489006156552331, "grad_norm": 0.25496339314421185, "learning_rate": 0.00028747672411863137, "loss": 3.072484254837036, "step": 5952, "token_acc": 0.29069322098812717 }, { "epoch": 3.4895924948695396, "grad_norm": 0.2534952133766439, "learning_rate": 0.000287470908100113, "loss": 3.0263898372650146, "step": 5953, "token_acc": 0.2963672616596185 }, { "epoch": 3.4901788331867487, "grad_norm": 0.2523026006795684, "learning_rate": 0.00028746509079024076, "loss": 3.0400609970092773, "step": 5954, "token_acc": 0.2964876820488828 }, { "epoch": 3.490765171503958, "grad_norm": 0.24711188754709004, "learning_rate": 0.0002874592721890692, "loss": 3.0278077125549316, "step": 5955, "token_acc": 0.29633276974746064 }, { "epoch": 3.491351509821167, "grad_norm": 0.264814195201621, "learning_rate": 0.000287453452296653, "loss": 3.058867931365967, "step": 5956, "token_acc": 0.29068854356091917 }, { "epoch": 3.4919378481383756, "grad_norm": 0.2566668418488315, "learning_rate": 0.00028744763111304683, "loss": 3.0230700969696045, "step": 5957, "token_acc": 0.2970705458826037 }, { "epoch": 3.4925241864555847, "grad_norm": 0.2546422231036557, "learning_rate": 0.0002874418086383054, "loss": 3.0706512928009033, "step": 5958, "token_acc": 0.28941153188154084 }, { "epoch": 3.493110524772794, "grad_norm": 0.2514863669605506, "learning_rate": 0.0002874359848724834, "loss": 3.0505502223968506, "step": 5959, "token_acc": 0.29373918287809564 }, { "epoch": 3.493696863090003, "grad_norm": 0.21497206556694667, "learning_rate": 0.0002874301598156355, "loss": 3.043281316757202, "step": 5960, "token_acc": 0.29333075195128616 }, { "epoch": 3.494283201407212, "grad_norm": 0.23330689966134266, "learning_rate": 0.0002874243334678164, "loss": 3.052555799484253, "step": 5961, "token_acc": 0.2937873657139637 }, { "epoch": 3.494869539724421, "grad_norm": 0.2454403579297247, "learning_rate": 0.0002874185058290809, "loss": 3.0593371391296387, "step": 5962, "token_acc": 0.2910483934016105 }, { "epoch": 3.49545587804163, "grad_norm": 0.2588920655683688, "learning_rate": 0.0002874126768994837, "loss": 3.0430989265441895, "step": 5963, "token_acc": 0.29320786696844736 }, { "epoch": 3.496042216358839, "grad_norm": 0.24877860530105753, "learning_rate": 0.00028740684667907964, "loss": 3.0307462215423584, "step": 5964, "token_acc": 0.29447868984247116 }, { "epoch": 3.496628554676048, "grad_norm": 0.28213616361096755, "learning_rate": 0.0002874010151679233, "loss": 3.0079731941223145, "step": 5965, "token_acc": 0.2994772574343936 }, { "epoch": 3.497214892993257, "grad_norm": 0.2633146501155154, "learning_rate": 0.00028739518236606964, "loss": 3.0615336894989014, "step": 5966, "token_acc": 0.291834908055976 }, { "epoch": 3.4978012313104663, "grad_norm": 0.23207466409338978, "learning_rate": 0.0002873893482735734, "loss": 3.026992082595825, "step": 5967, "token_acc": 0.29580799776439853 }, { "epoch": 3.498387569627675, "grad_norm": 0.28519796952570675, "learning_rate": 0.00028738351289048935, "loss": 3.0473947525024414, "step": 5968, "token_acc": 0.29364685829394904 }, { "epoch": 3.498973907944884, "grad_norm": 0.2416825946200029, "learning_rate": 0.0002873776762168723, "loss": 3.033376693725586, "step": 5969, "token_acc": 0.29598809156077244 }, { "epoch": 3.499560246262093, "grad_norm": 0.23988346662051854, "learning_rate": 0.0002873718382527771, "loss": 3.0284740924835205, "step": 5970, "token_acc": 0.2956214465193161 }, { "epoch": 3.5001465845793023, "grad_norm": 0.3009415079266259, "learning_rate": 0.00028736599899825856, "loss": 3.0454025268554688, "step": 5971, "token_acc": 0.29310615517263566 }, { "epoch": 3.5007329228965114, "grad_norm": 0.25916301309346185, "learning_rate": 0.00028736015845337164, "loss": 3.0528554916381836, "step": 5972, "token_acc": 0.29270067795805677 }, { "epoch": 3.5013192612137205, "grad_norm": 0.2762354568617323, "learning_rate": 0.00028735431661817105, "loss": 3.0423150062561035, "step": 5973, "token_acc": 0.294897293306191 }, { "epoch": 3.5019055995309296, "grad_norm": 0.2533931639602894, "learning_rate": 0.0002873484734927118, "loss": 3.037900447845459, "step": 5974, "token_acc": 0.29495447948278136 }, { "epoch": 3.5024919378481383, "grad_norm": 0.23825511691320314, "learning_rate": 0.0002873426290770487, "loss": 3.0461227893829346, "step": 5975, "token_acc": 0.2925557949551168 }, { "epoch": 3.5030782761653474, "grad_norm": 0.25672770206650636, "learning_rate": 0.0002873367833712367, "loss": 3.057312488555908, "step": 5976, "token_acc": 0.2931695721869054 }, { "epoch": 3.5036646144825565, "grad_norm": 0.24854848213757877, "learning_rate": 0.00028733093637533066, "loss": 3.0448484420776367, "step": 5977, "token_acc": 0.2936710310721098 }, { "epoch": 3.5042509527997656, "grad_norm": 0.2697503639510263, "learning_rate": 0.0002873250880893855, "loss": 3.0893564224243164, "step": 5978, "token_acc": 0.286661320224134 }, { "epoch": 3.5048372911169743, "grad_norm": 0.2634690388470524, "learning_rate": 0.00028731923851345624, "loss": 3.083617925643921, "step": 5979, "token_acc": 0.28845488658756446 }, { "epoch": 3.5054236294341834, "grad_norm": 0.2367165095960873, "learning_rate": 0.00028731338764759776, "loss": 3.0532584190368652, "step": 5980, "token_acc": 0.294379652036442 }, { "epoch": 3.5060099677513925, "grad_norm": 0.2692838045846597, "learning_rate": 0.000287307535491865, "loss": 3.0589513778686523, "step": 5981, "token_acc": 0.2915144889392774 }, { "epoch": 3.5065963060686016, "grad_norm": 0.22541127090290633, "learning_rate": 0.000287301682046313, "loss": 3.093899726867676, "step": 5982, "token_acc": 0.28776043355652814 }, { "epoch": 3.5071826443858107, "grad_norm": 0.25787588483034934, "learning_rate": 0.0002872958273109967, "loss": 3.0605220794677734, "step": 5983, "token_acc": 0.29029443780414926 }, { "epoch": 3.50776898270302, "grad_norm": 0.23969022336345502, "learning_rate": 0.0002872899712859711, "loss": 3.021665096282959, "step": 5984, "token_acc": 0.296673400952828 }, { "epoch": 3.5083553210202285, "grad_norm": 0.23958011755211261, "learning_rate": 0.0002872841139712913, "loss": 3.026945114135742, "step": 5985, "token_acc": 0.29573985872695585 }, { "epoch": 3.5089416593374376, "grad_norm": 0.2466205026952344, "learning_rate": 0.0002872782553670121, "loss": 3.064274787902832, "step": 5986, "token_acc": 0.2920905968920475 }, { "epoch": 3.5095279976546467, "grad_norm": 0.24783259459695123, "learning_rate": 0.0002872723954731888, "loss": 3.061755418777466, "step": 5987, "token_acc": 0.2905616307463637 }, { "epoch": 3.510114335971856, "grad_norm": 0.25587437082208775, "learning_rate": 0.0002872665342898763, "loss": 3.084259510040283, "step": 5988, "token_acc": 0.2866721283761817 }, { "epoch": 3.5107006742890645, "grad_norm": 0.22463752917409496, "learning_rate": 0.0002872606718171296, "loss": 3.063847541809082, "step": 5989, "token_acc": 0.2905544093389837 }, { "epoch": 3.5112870126062736, "grad_norm": 0.25661038000596653, "learning_rate": 0.0002872548080550039, "loss": 3.0100908279418945, "step": 5990, "token_acc": 0.2986947933016001 }, { "epoch": 3.5118733509234827, "grad_norm": 0.2601587981092489, "learning_rate": 0.00028724894300355424, "loss": 3.0452399253845215, "step": 5991, "token_acc": 0.2945054888538594 }, { "epoch": 3.512459689240692, "grad_norm": 0.2711938790603412, "learning_rate": 0.0002872430766628357, "loss": 3.0633351802825928, "step": 5992, "token_acc": 0.29376806691678453 }, { "epoch": 3.513046027557901, "grad_norm": 0.27554327738938206, "learning_rate": 0.00028723720903290343, "loss": 3.0365192890167236, "step": 5993, "token_acc": 0.29459064707441696 }, { "epoch": 3.51363236587511, "grad_norm": 0.25100098365483037, "learning_rate": 0.00028723134011381243, "loss": 3.0345816612243652, "step": 5994, "token_acc": 0.2963847018250862 }, { "epoch": 3.514218704192319, "grad_norm": 0.3186556187753884, "learning_rate": 0.00028722546990561795, "loss": 3.0510783195495605, "step": 5995, "token_acc": 0.29308762807768773 }, { "epoch": 3.514805042509528, "grad_norm": 0.27078895616893256, "learning_rate": 0.0002872195984083751, "loss": 3.044239044189453, "step": 5996, "token_acc": 0.29327975371682186 }, { "epoch": 3.515391380826737, "grad_norm": 0.2532935944964148, "learning_rate": 0.00028721372562213907, "loss": 3.0705838203430176, "step": 5997, "token_acc": 0.28930548855938426 }, { "epoch": 3.515977719143946, "grad_norm": 0.24672301301806082, "learning_rate": 0.00028720785154696493, "loss": 3.0779964923858643, "step": 5998, "token_acc": 0.28990625601840525 }, { "epoch": 3.516564057461155, "grad_norm": 0.23989597511071573, "learning_rate": 0.0002872019761829079, "loss": 3.0370397567749023, "step": 5999, "token_acc": 0.2961519227563568 }, { "epoch": 3.517150395778364, "grad_norm": 0.24303240594690975, "learning_rate": 0.0002871960995300232, "loss": 3.011654853820801, "step": 6000, "token_acc": 0.2984375996428799 }, { "epoch": 3.517736734095573, "grad_norm": 0.24291930409664628, "learning_rate": 0.00028719022158836603, "loss": 3.0636425018310547, "step": 6001, "token_acc": 0.29041886045809717 }, { "epoch": 3.518323072412782, "grad_norm": 0.24100816837384106, "learning_rate": 0.0002871843423579916, "loss": 3.093930721282959, "step": 6002, "token_acc": 0.28690452826029095 }, { "epoch": 3.518909410729991, "grad_norm": 0.23597134266408792, "learning_rate": 0.0002871784618389552, "loss": 3.057577610015869, "step": 6003, "token_acc": 0.29090918932670784 }, { "epoch": 3.5194957490472003, "grad_norm": 0.23588166175593347, "learning_rate": 0.00028717258003131186, "loss": 3.039311408996582, "step": 6004, "token_acc": 0.29509297867613243 }, { "epoch": 3.5200820873644094, "grad_norm": 0.24187707503618427, "learning_rate": 0.00028716669693511703, "loss": 3.1045784950256348, "step": 6005, "token_acc": 0.2864376804967586 }, { "epoch": 3.5206684256816185, "grad_norm": 0.24842631827574996, "learning_rate": 0.00028716081255042593, "loss": 3.036945104598999, "step": 6006, "token_acc": 0.2949622433231046 }, { "epoch": 3.521254763998827, "grad_norm": 0.2548128476514135, "learning_rate": 0.00028715492687729385, "loss": 2.998538017272949, "step": 6007, "token_acc": 0.3003548660128435 }, { "epoch": 3.5218411023160363, "grad_norm": 0.2654747126519872, "learning_rate": 0.000287149039915776, "loss": 3.0890626907348633, "step": 6008, "token_acc": 0.28603268750213046 }, { "epoch": 3.5224274406332454, "grad_norm": 0.28659579790963585, "learning_rate": 0.00028714315166592777, "loss": 3.0761075019836426, "step": 6009, "token_acc": 0.288469387227931 }, { "epoch": 3.5230137789504545, "grad_norm": 0.23289984821241425, "learning_rate": 0.00028713726212780446, "loss": 2.9916152954101562, "step": 6010, "token_acc": 0.3015373910131817 }, { "epoch": 3.523600117267663, "grad_norm": 0.2790121683273793, "learning_rate": 0.0002871313713014613, "loss": 3.04083251953125, "step": 6011, "token_acc": 0.29379644832353957 }, { "epoch": 3.5241864555848723, "grad_norm": 0.2654451312559019, "learning_rate": 0.0002871254791869537, "loss": 3.047785758972168, "step": 6012, "token_acc": 0.29437451770994627 }, { "epoch": 3.5247727939020814, "grad_norm": 0.2824283926725798, "learning_rate": 0.000287119585784337, "loss": 3.0639586448669434, "step": 6013, "token_acc": 0.29246646898050926 }, { "epoch": 3.5253591322192905, "grad_norm": 0.2381524811316031, "learning_rate": 0.0002871136910936666, "loss": 3.0242481231689453, "step": 6014, "token_acc": 0.29778548416988027 }, { "epoch": 3.5259454705364996, "grad_norm": 0.2688116046157544, "learning_rate": 0.0002871077951149978, "loss": 3.1000301837921143, "step": 6015, "token_acc": 0.28830322835883176 }, { "epoch": 3.5265318088537088, "grad_norm": 0.23981603226348366, "learning_rate": 0.000287101897848386, "loss": 3.041025400161743, "step": 6016, "token_acc": 0.29560134858367204 }, { "epoch": 3.527118147170918, "grad_norm": 0.22777269529245514, "learning_rate": 0.0002870959992938867, "loss": 3.055591344833374, "step": 6017, "token_acc": 0.2923565161151799 }, { "epoch": 3.5277044854881265, "grad_norm": 0.2551678778333477, "learning_rate": 0.00028709009945155516, "loss": 3.0481300354003906, "step": 6018, "token_acc": 0.29506038610211277 }, { "epoch": 3.5282908238053357, "grad_norm": 0.22822836879274422, "learning_rate": 0.0002870841983214469, "loss": 3.0820505619049072, "step": 6019, "token_acc": 0.2909546463165997 }, { "epoch": 3.5288771621225448, "grad_norm": 0.24655492224619516, "learning_rate": 0.00028707829590361733, "loss": 3.0105717182159424, "step": 6020, "token_acc": 0.2991955068140739 }, { "epoch": 3.529463500439754, "grad_norm": 0.2351870446391531, "learning_rate": 0.00028707239219812183, "loss": 3.049309730529785, "step": 6021, "token_acc": 0.2944839953183219 }, { "epoch": 3.5300498387569625, "grad_norm": 0.22983536743438027, "learning_rate": 0.00028706648720501596, "loss": 3.02833890914917, "step": 6022, "token_acc": 0.29668536383577865 }, { "epoch": 3.5306361770741717, "grad_norm": 0.25556293714242784, "learning_rate": 0.00028706058092435507, "loss": 3.0777699947357178, "step": 6023, "token_acc": 0.2898716707842979 }, { "epoch": 3.5312225153913808, "grad_norm": 0.23467796094888332, "learning_rate": 0.0002870546733561948, "loss": 3.0052690505981445, "step": 6024, "token_acc": 0.30090249022389504 }, { "epoch": 3.53180885370859, "grad_norm": 0.27017651121211356, "learning_rate": 0.0002870487645005906, "loss": 3.047358751296997, "step": 6025, "token_acc": 0.29392738466662677 }, { "epoch": 3.532395192025799, "grad_norm": 0.2547905977159385, "learning_rate": 0.0002870428543575978, "loss": 3.027613401412964, "step": 6026, "token_acc": 0.2969415387741027 }, { "epoch": 3.532981530343008, "grad_norm": 0.2732187865672332, "learning_rate": 0.00028703694292727213, "loss": 3.0565667152404785, "step": 6027, "token_acc": 0.29239311555709635 }, { "epoch": 3.533567868660217, "grad_norm": 0.27659542728205433, "learning_rate": 0.00028703103020966895, "loss": 3.061187982559204, "step": 6028, "token_acc": 0.2907359302852205 }, { "epoch": 3.534154206977426, "grad_norm": 0.24159084928846825, "learning_rate": 0.00028702511620484403, "loss": 3.0141677856445312, "step": 6029, "token_acc": 0.2993864310248657 }, { "epoch": 3.534740545294635, "grad_norm": 0.2501779760292787, "learning_rate": 0.00028701920091285266, "loss": 3.0402355194091797, "step": 6030, "token_acc": 0.2939756248071583 }, { "epoch": 3.535326883611844, "grad_norm": 0.2846169352221804, "learning_rate": 0.00028701328433375063, "loss": 3.0625858306884766, "step": 6031, "token_acc": 0.2926795808916406 }, { "epoch": 3.535913221929053, "grad_norm": 0.27326361739346255, "learning_rate": 0.0002870073664675934, "loss": 3.0844950675964355, "step": 6032, "token_acc": 0.28777593176575167 }, { "epoch": 3.536499560246262, "grad_norm": 0.23564983800361874, "learning_rate": 0.0002870014473144366, "loss": 3.061190605163574, "step": 6033, "token_acc": 0.2933484177082898 }, { "epoch": 3.537085898563471, "grad_norm": 0.2847364731955953, "learning_rate": 0.00028699552687433573, "loss": 3.021754026412964, "step": 6034, "token_acc": 0.2983580840828279 }, { "epoch": 3.53767223688068, "grad_norm": 0.2478977196271876, "learning_rate": 0.0002869896051473466, "loss": 3.0920448303222656, "step": 6035, "token_acc": 0.2859795631189258 }, { "epoch": 3.538258575197889, "grad_norm": 0.278030690125152, "learning_rate": 0.0002869836821335246, "loss": 3.0065436363220215, "step": 6036, "token_acc": 0.2988968307484828 }, { "epoch": 3.5388449135150983, "grad_norm": 0.24327824051212757, "learning_rate": 0.0002869777578329256, "loss": 3.029778003692627, "step": 6037, "token_acc": 0.29564744111354807 }, { "epoch": 3.5394312518323074, "grad_norm": 0.25362697753369945, "learning_rate": 0.0002869718322456051, "loss": 3.0354065895080566, "step": 6038, "token_acc": 0.2948021245893586 }, { "epoch": 3.540017590149516, "grad_norm": 0.284496003103699, "learning_rate": 0.0002869659053716188, "loss": 3.073519468307495, "step": 6039, "token_acc": 0.288876724384041 }, { "epoch": 3.5406039284667252, "grad_norm": 0.2693785458083241, "learning_rate": 0.0002869599772110224, "loss": 3.084733486175537, "step": 6040, "token_acc": 0.2879630731791189 }, { "epoch": 3.5411902667839343, "grad_norm": 0.24779997373044924, "learning_rate": 0.00028695404776387154, "loss": 3.0398566722869873, "step": 6041, "token_acc": 0.2964581771251068 }, { "epoch": 3.5417766051011434, "grad_norm": 0.2623558548958517, "learning_rate": 0.00028694811703022197, "loss": 3.0617852210998535, "step": 6042, "token_acc": 0.28977031184361784 }, { "epoch": 3.542362943418352, "grad_norm": 0.2389656006869627, "learning_rate": 0.0002869421850101294, "loss": 3.04427170753479, "step": 6043, "token_acc": 0.2934448413706571 }, { "epoch": 3.5429492817355612, "grad_norm": 0.25001300866029547, "learning_rate": 0.0002869362517036495, "loss": 3.058241367340088, "step": 6044, "token_acc": 0.29194315563718987 }, { "epoch": 3.5435356200527703, "grad_norm": 0.24123573694740974, "learning_rate": 0.0002869303171108381, "loss": 3.064008951187134, "step": 6045, "token_acc": 0.29138885521477353 }, { "epoch": 3.5441219583699795, "grad_norm": 0.23725005013032696, "learning_rate": 0.0002869243812317508, "loss": 3.076772928237915, "step": 6046, "token_acc": 0.29008356545961 }, { "epoch": 3.5447082966871886, "grad_norm": 0.24737911708835764, "learning_rate": 0.0002869184440664435, "loss": 3.050940752029419, "step": 6047, "token_acc": 0.29333133189609434 }, { "epoch": 3.5452946350043977, "grad_norm": 0.2509768615861396, "learning_rate": 0.00028691250561497194, "loss": 3.07908296585083, "step": 6048, "token_acc": 0.2881753307581387 }, { "epoch": 3.545880973321607, "grad_norm": 0.23551892199905822, "learning_rate": 0.0002869065658773918, "loss": 3.0442733764648438, "step": 6049, "token_acc": 0.292755325895967 }, { "epoch": 3.5464673116388155, "grad_norm": 0.27235314538592653, "learning_rate": 0.000286900624853759, "loss": 2.9942917823791504, "step": 6050, "token_acc": 0.299810618428786 }, { "epoch": 3.5470536499560246, "grad_norm": 0.24035301987152533, "learning_rate": 0.00028689468254412934, "loss": 3.0822620391845703, "step": 6051, "token_acc": 0.2889418302448919 }, { "epoch": 3.5476399882732337, "grad_norm": 0.2562068620424487, "learning_rate": 0.0002868887389485586, "loss": 3.0526232719421387, "step": 6052, "token_acc": 0.29242596016546574 }, { "epoch": 3.548226326590443, "grad_norm": 0.2471107313329575, "learning_rate": 0.00028688279406710257, "loss": 3.0399651527404785, "step": 6053, "token_acc": 0.2952737015752531 }, { "epoch": 3.5488126649076515, "grad_norm": 0.2641187759393293, "learning_rate": 0.0002868768478998172, "loss": 3.083714008331299, "step": 6054, "token_acc": 0.28920240271343567 }, { "epoch": 3.5493990032248606, "grad_norm": 0.27193159278858786, "learning_rate": 0.0002868709004467582, "loss": 3.076934337615967, "step": 6055, "token_acc": 0.28843141752812534 }, { "epoch": 3.5499853415420697, "grad_norm": 0.2794570739372252, "learning_rate": 0.0002868649517079816, "loss": 3.0379161834716797, "step": 6056, "token_acc": 0.29329187840468196 }, { "epoch": 3.550571679859279, "grad_norm": 0.2761733784309781, "learning_rate": 0.0002868590016835432, "loss": 3.037870407104492, "step": 6057, "token_acc": 0.2956857371484757 }, { "epoch": 3.551158018176488, "grad_norm": 0.23998662260583714, "learning_rate": 0.0002868530503734989, "loss": 3.030988931655884, "step": 6058, "token_acc": 0.2947896823080556 }, { "epoch": 3.551744356493697, "grad_norm": 0.24578866462039128, "learning_rate": 0.0002868470977779046, "loss": 3.050600528717041, "step": 6059, "token_acc": 0.2931530310629262 }, { "epoch": 3.552330694810906, "grad_norm": 0.24425527088450125, "learning_rate": 0.0002868411438968162, "loss": 3.0394887924194336, "step": 6060, "token_acc": 0.2954149571349122 }, { "epoch": 3.552917033128115, "grad_norm": 0.24016920510971262, "learning_rate": 0.00028683518873028963, "loss": 3.053927421569824, "step": 6061, "token_acc": 0.29436359380423116 }, { "epoch": 3.553503371445324, "grad_norm": 0.25789641790447304, "learning_rate": 0.0002868292322783809, "loss": 3.0603485107421875, "step": 6062, "token_acc": 0.2920824878339019 }, { "epoch": 3.554089709762533, "grad_norm": 0.22453368015172573, "learning_rate": 0.0002868232745411459, "loss": 3.0688514709472656, "step": 6063, "token_acc": 0.29177623191690466 }, { "epoch": 3.554676048079742, "grad_norm": 0.24038225929216378, "learning_rate": 0.0002868173155186406, "loss": 3.030489444732666, "step": 6064, "token_acc": 0.29739601491056344 }, { "epoch": 3.555262386396951, "grad_norm": 0.22277537484992926, "learning_rate": 0.000286811355210921, "loss": 3.0206336975097656, "step": 6065, "token_acc": 0.29686855046245575 }, { "epoch": 3.55584872471416, "grad_norm": 0.2616435465807672, "learning_rate": 0.000286805393618043, "loss": 3.035215377807617, "step": 6066, "token_acc": 0.29298452484060483 }, { "epoch": 3.556435063031369, "grad_norm": 0.268883416098163, "learning_rate": 0.0002867994307400628, "loss": 3.1055145263671875, "step": 6067, "token_acc": 0.28469374111457657 }, { "epoch": 3.557021401348578, "grad_norm": 0.2525141378296953, "learning_rate": 0.0002867934665770362, "loss": 3.079225540161133, "step": 6068, "token_acc": 0.28841556801953766 }, { "epoch": 3.5576077396657872, "grad_norm": 0.25501280086287026, "learning_rate": 0.0002867875011290193, "loss": 3.0819454193115234, "step": 6069, "token_acc": 0.28886650083349347 }, { "epoch": 3.5581940779829964, "grad_norm": 0.2500841946708259, "learning_rate": 0.00028678153439606815, "loss": 3.036304473876953, "step": 6070, "token_acc": 0.2964214742837472 }, { "epoch": 3.5587804163002055, "grad_norm": 0.24953824216749074, "learning_rate": 0.00028677556637823887, "loss": 3.024610757827759, "step": 6071, "token_acc": 0.29557669547705845 }, { "epoch": 3.559366754617414, "grad_norm": 0.26064576866086037, "learning_rate": 0.0002867695970755874, "loss": 3.0633652210235596, "step": 6072, "token_acc": 0.2897616953155338 }, { "epoch": 3.5599530929346233, "grad_norm": 0.21267759734040712, "learning_rate": 0.00028676362648816987, "loss": 3.0441946983337402, "step": 6073, "token_acc": 0.29358587272640135 }, { "epoch": 3.5605394312518324, "grad_norm": 0.2368463417229675, "learning_rate": 0.00028675765461604237, "loss": 3.050931453704834, "step": 6074, "token_acc": 0.2955909827548147 }, { "epoch": 3.5611257695690415, "grad_norm": 0.22531807206989257, "learning_rate": 0.000286751681459261, "loss": 3.0889341831207275, "step": 6075, "token_acc": 0.28889776793011385 }, { "epoch": 3.56171210788625, "grad_norm": 0.24203215325624858, "learning_rate": 0.00028674570701788183, "loss": 3.07252836227417, "step": 6076, "token_acc": 0.29175244265635614 }, { "epoch": 3.5622984462034593, "grad_norm": 0.22156446649180378, "learning_rate": 0.00028673973129196106, "loss": 3.0235276222229004, "step": 6077, "token_acc": 0.2978394061413448 }, { "epoch": 3.5628847845206684, "grad_norm": 0.25741139089566956, "learning_rate": 0.00028673375428155476, "loss": 3.0375866889953613, "step": 6078, "token_acc": 0.29657648107548124 }, { "epoch": 3.5634711228378775, "grad_norm": 0.24396014771401753, "learning_rate": 0.000286727775986719, "loss": 3.029989719390869, "step": 6079, "token_acc": 0.29733488011185977 }, { "epoch": 3.5640574611550866, "grad_norm": 0.23238009686141486, "learning_rate": 0.0002867217964075101, "loss": 3.030158042907715, "step": 6080, "token_acc": 0.296097036854846 }, { "epoch": 3.5646437994722957, "grad_norm": 0.25394760881360384, "learning_rate": 0.0002867158155439842, "loss": 3.055544376373291, "step": 6081, "token_acc": 0.2924656025530132 }, { "epoch": 3.565230137789505, "grad_norm": 0.22446081904579027, "learning_rate": 0.00028670983339619733, "loss": 3.011784553527832, "step": 6082, "token_acc": 0.29798803125755935 }, { "epoch": 3.5658164761067135, "grad_norm": 0.2591285523065901, "learning_rate": 0.0002867038499642059, "loss": 3.059640645980835, "step": 6083, "token_acc": 0.2918689477248874 }, { "epoch": 3.5664028144239226, "grad_norm": 0.2636359209238713, "learning_rate": 0.00028669786524806594, "loss": 3.0491106510162354, "step": 6084, "token_acc": 0.29392017903767254 }, { "epoch": 3.5669891527411317, "grad_norm": 0.26069772537809915, "learning_rate": 0.00028669187924783377, "loss": 3.0741841793060303, "step": 6085, "token_acc": 0.28994687101300504 }, { "epoch": 3.567575491058341, "grad_norm": 0.2645561320942874, "learning_rate": 0.0002866858919635656, "loss": 3.075071334838867, "step": 6086, "token_acc": 0.290495011014643 }, { "epoch": 3.5681618293755495, "grad_norm": 0.27668740723339474, "learning_rate": 0.00028667990339531767, "loss": 3.080467700958252, "step": 6087, "token_acc": 0.2909800239163085 }, { "epoch": 3.5687481676927586, "grad_norm": 0.26476697429168394, "learning_rate": 0.00028667391354314617, "loss": 3.020512104034424, "step": 6088, "token_acc": 0.29754867414879405 }, { "epoch": 3.5693345060099677, "grad_norm": 0.2606152162559852, "learning_rate": 0.00028666792240710745, "loss": 3.06704044342041, "step": 6089, "token_acc": 0.2900319519629266 }, { "epoch": 3.569920844327177, "grad_norm": 0.2936229181469169, "learning_rate": 0.0002866619299872578, "loss": 3.0341434478759766, "step": 6090, "token_acc": 0.29625872818274496 }, { "epoch": 3.570507182644386, "grad_norm": 0.25546281797997994, "learning_rate": 0.00028665593628365344, "loss": 3.059028387069702, "step": 6091, "token_acc": 0.2918790504867958 }, { "epoch": 3.571093520961595, "grad_norm": 0.24321044711431264, "learning_rate": 0.0002866499412963507, "loss": 3.0412228107452393, "step": 6092, "token_acc": 0.2940167280026765 }, { "epoch": 3.5716798592788037, "grad_norm": 0.25321942082991794, "learning_rate": 0.0002866439450254059, "loss": 3.0155866146087646, "step": 6093, "token_acc": 0.2977933636329109 }, { "epoch": 3.572266197596013, "grad_norm": 0.26310823569314196, "learning_rate": 0.00028663794747087537, "loss": 3.032715320587158, "step": 6094, "token_acc": 0.29454439854433573 }, { "epoch": 3.572852535913222, "grad_norm": 0.24142818841379549, "learning_rate": 0.00028663194863281545, "loss": 3.0557267665863037, "step": 6095, "token_acc": 0.291492885741487 }, { "epoch": 3.573438874230431, "grad_norm": 0.2693607352939552, "learning_rate": 0.00028662594851128247, "loss": 3.009385108947754, "step": 6096, "token_acc": 0.2995410363644438 }, { "epoch": 3.5740252125476397, "grad_norm": 0.2694324834046713, "learning_rate": 0.0002866199471063328, "loss": 3.0033278465270996, "step": 6097, "token_acc": 0.29924112657925606 }, { "epoch": 3.574611550864849, "grad_norm": 0.22317934227313005, "learning_rate": 0.00028661394441802286, "loss": 3.000153064727783, "step": 6098, "token_acc": 0.30054169363778593 }, { "epoch": 3.575197889182058, "grad_norm": 0.2239676859098204, "learning_rate": 0.000286607940446409, "loss": 3.0329737663269043, "step": 6099, "token_acc": 0.2954887061625917 }, { "epoch": 3.575784227499267, "grad_norm": 0.27554666446966947, "learning_rate": 0.0002866019351915476, "loss": 3.062764883041382, "step": 6100, "token_acc": 0.29049137083755583 }, { "epoch": 3.576370565816476, "grad_norm": 0.23410070482984183, "learning_rate": 0.00028659592865349514, "loss": 3.0197744369506836, "step": 6101, "token_acc": 0.2965598138058602 }, { "epoch": 3.5769569041336853, "grad_norm": 0.2608647107877979, "learning_rate": 0.00028658992083230794, "loss": 3.0443286895751953, "step": 6102, "token_acc": 0.29283692463302174 }, { "epoch": 3.5775432424508944, "grad_norm": 0.26284075172269095, "learning_rate": 0.0002865839117280425, "loss": 3.0181045532226562, "step": 6103, "token_acc": 0.29859938260581165 }, { "epoch": 3.578129580768103, "grad_norm": 0.2523698722126072, "learning_rate": 0.0002865779013407553, "loss": 3.0449748039245605, "step": 6104, "token_acc": 0.29365795572263104 }, { "epoch": 3.578715919085312, "grad_norm": 0.25188272983563526, "learning_rate": 0.00028657188967050264, "loss": 3.0365653038024902, "step": 6105, "token_acc": 0.2952062601318563 }, { "epoch": 3.5793022574025213, "grad_norm": 0.23478233125600992, "learning_rate": 0.0002865658767173412, "loss": 3.015404224395752, "step": 6106, "token_acc": 0.2978809273241854 }, { "epoch": 3.5798885957197304, "grad_norm": 0.2444031837063552, "learning_rate": 0.0002865598624813274, "loss": 3.0871124267578125, "step": 6107, "token_acc": 0.28788604414713664 }, { "epoch": 3.580474934036939, "grad_norm": 0.2593354618685756, "learning_rate": 0.0002865538469625177, "loss": 3.1010775566101074, "step": 6108, "token_acc": 0.28649030779581197 }, { "epoch": 3.581061272354148, "grad_norm": 0.23591893185375268, "learning_rate": 0.00028654783016096857, "loss": 3.031008005142212, "step": 6109, "token_acc": 0.29463602341527273 }, { "epoch": 3.5816476106713573, "grad_norm": 0.27312903279446565, "learning_rate": 0.0002865418120767366, "loss": 3.0325865745544434, "step": 6110, "token_acc": 0.29547954924016534 }, { "epoch": 3.5822339489885664, "grad_norm": 0.2551680244753278, "learning_rate": 0.0002865357927098783, "loss": 3.01479434967041, "step": 6111, "token_acc": 0.29834968536611123 }, { "epoch": 3.5828202873057755, "grad_norm": 0.2605300215864193, "learning_rate": 0.0002865297720604502, "loss": 3.0945820808410645, "step": 6112, "token_acc": 0.2878335495771521 }, { "epoch": 3.5834066256229846, "grad_norm": 0.23536949717069902, "learning_rate": 0.00028652375012850885, "loss": 3.079036235809326, "step": 6113, "token_acc": 0.2891864264944997 }, { "epoch": 3.5839929639401937, "grad_norm": 0.23095378462270155, "learning_rate": 0.0002865177269141109, "loss": 3.0468597412109375, "step": 6114, "token_acc": 0.2940650752108824 }, { "epoch": 3.5845793022574024, "grad_norm": 0.24711036252549537, "learning_rate": 0.00028651170241731277, "loss": 3.0463035106658936, "step": 6115, "token_acc": 0.2942721105116887 }, { "epoch": 3.5851656405746115, "grad_norm": 0.25666749978510217, "learning_rate": 0.00028650567663817127, "loss": 3.055063486099243, "step": 6116, "token_acc": 0.29176996831792723 }, { "epoch": 3.5857519788918206, "grad_norm": 0.26404471635979787, "learning_rate": 0.0002864996495767428, "loss": 3.0587167739868164, "step": 6117, "token_acc": 0.29249874361890654 }, { "epoch": 3.5863383172090297, "grad_norm": 0.2573388827350888, "learning_rate": 0.00028649362123308404, "loss": 3.0235342979431152, "step": 6118, "token_acc": 0.29753811629595406 }, { "epoch": 3.5869246555262384, "grad_norm": 0.24789261571909826, "learning_rate": 0.0002864875916072517, "loss": 3.0351009368896484, "step": 6119, "token_acc": 0.2956268646234599 }, { "epoch": 3.5875109938434475, "grad_norm": 0.24865731019031992, "learning_rate": 0.0002864815606993023, "loss": 3.0768585205078125, "step": 6120, "token_acc": 0.2896048058446079 }, { "epoch": 3.5880973321606566, "grad_norm": 0.26027644726396876, "learning_rate": 0.00028647552850929255, "loss": 3.017876625061035, "step": 6121, "token_acc": 0.2976597107691469 }, { "epoch": 3.5886836704778657, "grad_norm": 0.2438327212325603, "learning_rate": 0.00028646949503727915, "loss": 2.997507333755493, "step": 6122, "token_acc": 0.30004384720538546 }, { "epoch": 3.589270008795075, "grad_norm": 0.27601691227613323, "learning_rate": 0.00028646346028331877, "loss": 3.0222673416137695, "step": 6123, "token_acc": 0.2954030533381328 }, { "epoch": 3.589856347112284, "grad_norm": 0.22713981941577088, "learning_rate": 0.000286457424247468, "loss": 3.0285282135009766, "step": 6124, "token_acc": 0.2966337824707872 }, { "epoch": 3.590442685429493, "grad_norm": 0.2762942344038067, "learning_rate": 0.00028645138692978364, "loss": 3.0475244522094727, "step": 6125, "token_acc": 0.2931074354971802 }, { "epoch": 3.5910290237467017, "grad_norm": 0.2655786520376476, "learning_rate": 0.00028644534833032235, "loss": 3.0623743534088135, "step": 6126, "token_acc": 0.29072293600230387 }, { "epoch": 3.591615362063911, "grad_norm": 0.261517011834492, "learning_rate": 0.0002864393084491409, "loss": 3.0372674465179443, "step": 6127, "token_acc": 0.2958513931888545 }, { "epoch": 3.59220170038112, "grad_norm": 0.26388563124712244, "learning_rate": 0.00028643326728629596, "loss": 3.0659213066101074, "step": 6128, "token_acc": 0.28964239022571786 }, { "epoch": 3.592788038698329, "grad_norm": 0.27849552554377105, "learning_rate": 0.0002864272248418444, "loss": 3.020321846008301, "step": 6129, "token_acc": 0.2976649634922209 }, { "epoch": 3.5933743770155377, "grad_norm": 0.2524102745096216, "learning_rate": 0.0002864211811158428, "loss": 3.0469841957092285, "step": 6130, "token_acc": 0.2921905634781975 }, { "epoch": 3.593960715332747, "grad_norm": 0.25052004241709375, "learning_rate": 0.0002864151361083481, "loss": 3.0399510860443115, "step": 6131, "token_acc": 0.2930737139264087 }, { "epoch": 3.594547053649956, "grad_norm": 0.24639107346179698, "learning_rate": 0.000286409089819417, "loss": 3.0320656299591064, "step": 6132, "token_acc": 0.2960135419746271 }, { "epoch": 3.595133391967165, "grad_norm": 0.27523393260497503, "learning_rate": 0.0002864030422491063, "loss": 3.050067663192749, "step": 6133, "token_acc": 0.29298050544778237 }, { "epoch": 3.595719730284374, "grad_norm": 0.24652021697663548, "learning_rate": 0.0002863969933974728, "loss": 3.0656063556671143, "step": 6134, "token_acc": 0.29237881532347454 }, { "epoch": 3.5963060686015833, "grad_norm": 0.24991398233729448, "learning_rate": 0.0002863909432645734, "loss": 3.041727066040039, "step": 6135, "token_acc": 0.29450936598122934 }, { "epoch": 3.5968924069187924, "grad_norm": 0.29375214489966295, "learning_rate": 0.0002863848918504649, "loss": 3.0826520919799805, "step": 6136, "token_acc": 0.28887339376431914 }, { "epoch": 3.597478745236001, "grad_norm": 0.24540861090554375, "learning_rate": 0.00028637883915520404, "loss": 3.054462194442749, "step": 6137, "token_acc": 0.2930816191584354 }, { "epoch": 3.59806508355321, "grad_norm": 0.24773845119885868, "learning_rate": 0.00028637278517884784, "loss": 3.0125513076782227, "step": 6138, "token_acc": 0.29829153963617405 }, { "epoch": 3.5986514218704193, "grad_norm": 0.23083250538585498, "learning_rate": 0.000286366729921453, "loss": 3.0563793182373047, "step": 6139, "token_acc": 0.2930490298529291 }, { "epoch": 3.5992377601876284, "grad_norm": 0.2241803537893363, "learning_rate": 0.00028636067338307656, "loss": 3.068307638168335, "step": 6140, "token_acc": 0.28960567985821484 }, { "epoch": 3.599824098504837, "grad_norm": 0.2542748654545773, "learning_rate": 0.0002863546155637753, "loss": 3.0647077560424805, "step": 6141, "token_acc": 0.2919481435752712 }, { "epoch": 3.600410436822046, "grad_norm": 0.2548939663083021, "learning_rate": 0.00028634855646360617, "loss": 3.0493435859680176, "step": 6142, "token_acc": 0.29346120852879126 }, { "epoch": 3.6009967751392553, "grad_norm": 0.2558406809939796, "learning_rate": 0.0002863424960826261, "loss": 3.0604329109191895, "step": 6143, "token_acc": 0.29214809293079885 }, { "epoch": 3.6015831134564644, "grad_norm": 0.25751371870667433, "learning_rate": 0.00028633643442089205, "loss": 3.0120139122009277, "step": 6144, "token_acc": 0.2995056060739825 }, { "epoch": 3.6021694517736735, "grad_norm": 0.22879888964459524, "learning_rate": 0.0002863303714784609, "loss": 3.0437865257263184, "step": 6145, "token_acc": 0.29189373740848756 }, { "epoch": 3.6027557900908826, "grad_norm": 0.300761005894166, "learning_rate": 0.00028632430725538955, "loss": 3.063821315765381, "step": 6146, "token_acc": 0.29028607110798893 }, { "epoch": 3.6033421284080913, "grad_norm": 0.25000948370816856, "learning_rate": 0.00028631824175173504, "loss": 3.009230136871338, "step": 6147, "token_acc": 0.298487106773066 }, { "epoch": 3.6039284667253004, "grad_norm": 0.2568777396495953, "learning_rate": 0.0002863121749675544, "loss": 3.047050952911377, "step": 6148, "token_acc": 0.29299937485268046 }, { "epoch": 3.6045148050425095, "grad_norm": 0.25302034187186356, "learning_rate": 0.0002863061069029045, "loss": 3.066626787185669, "step": 6149, "token_acc": 0.2910793809980091 }, { "epoch": 3.6051011433597187, "grad_norm": 0.2673466253115117, "learning_rate": 0.00028630003755784244, "loss": 3.100741147994995, "step": 6150, "token_acc": 0.285629177488725 }, { "epoch": 3.6056874816769273, "grad_norm": 0.26602340384763923, "learning_rate": 0.00028629396693242515, "loss": 3.067643642425537, "step": 6151, "token_acc": 0.29079610665483024 }, { "epoch": 3.6062738199941364, "grad_norm": 0.27691833789135867, "learning_rate": 0.00028628789502670973, "loss": 3.0707292556762695, "step": 6152, "token_acc": 0.29105057754674163 }, { "epoch": 3.6068601583113455, "grad_norm": 0.2618875803035751, "learning_rate": 0.0002862818218407532, "loss": 3.058117151260376, "step": 6153, "token_acc": 0.29186998635709116 }, { "epoch": 3.6074464966285547, "grad_norm": 0.26052487365354143, "learning_rate": 0.00028627574737461256, "loss": 3.1006875038146973, "step": 6154, "token_acc": 0.28395048755884333 }, { "epoch": 3.6080328349457638, "grad_norm": 0.24535185019424482, "learning_rate": 0.000286269671628345, "loss": 3.036726951599121, "step": 6155, "token_acc": 0.2941332154768818 }, { "epoch": 3.608619173262973, "grad_norm": 0.2500746567933048, "learning_rate": 0.00028626359460200737, "loss": 3.077418804168701, "step": 6156, "token_acc": 0.2877984598119268 }, { "epoch": 3.609205511580182, "grad_norm": 0.25028097189032594, "learning_rate": 0.00028625751629565694, "loss": 3.0473079681396484, "step": 6157, "token_acc": 0.2934210004101126 }, { "epoch": 3.6097918498973907, "grad_norm": 0.237385920363267, "learning_rate": 0.00028625143670935076, "loss": 3.0271854400634766, "step": 6158, "token_acc": 0.2986032689450223 }, { "epoch": 3.6103781882145998, "grad_norm": 0.223297051926556, "learning_rate": 0.00028624535584314593, "loss": 3.0162267684936523, "step": 6159, "token_acc": 0.2980500001314548 }, { "epoch": 3.610964526531809, "grad_norm": 0.22288475084897696, "learning_rate": 0.0002862392736970996, "loss": 3.0254554748535156, "step": 6160, "token_acc": 0.2964708199848631 }, { "epoch": 3.611550864849018, "grad_norm": 0.23650007737977352, "learning_rate": 0.00028623319027126884, "loss": 2.998199462890625, "step": 6161, "token_acc": 0.30141188479612424 }, { "epoch": 3.6121372031662267, "grad_norm": 0.23374753838193305, "learning_rate": 0.00028622710556571086, "loss": 3.0646297931671143, "step": 6162, "token_acc": 0.2923029592578251 }, { "epoch": 3.6127235414834358, "grad_norm": 0.2769463797624376, "learning_rate": 0.0002862210195804828, "loss": 3.0488460063934326, "step": 6163, "token_acc": 0.29308924317005847 }, { "epoch": 3.613309879800645, "grad_norm": 0.27044672170712525, "learning_rate": 0.0002862149323156418, "loss": 3.015994071960449, "step": 6164, "token_acc": 0.2983499403446214 }, { "epoch": 3.613896218117854, "grad_norm": 0.23618806645838863, "learning_rate": 0.0002862088437712451, "loss": 3.086371660232544, "step": 6165, "token_acc": 0.28756024575649164 }, { "epoch": 3.614482556435063, "grad_norm": 0.25547032799685593, "learning_rate": 0.00028620275394734975, "loss": 3.124640703201294, "step": 6166, "token_acc": 0.2813133485648934 }, { "epoch": 3.615068894752272, "grad_norm": 0.26448708533092424, "learning_rate": 0.00028619666284401314, "loss": 3.0205764770507812, "step": 6167, "token_acc": 0.2960181026394426 }, { "epoch": 3.6156552330694813, "grad_norm": 0.24467311555360677, "learning_rate": 0.00028619057046129243, "loss": 3.033078193664551, "step": 6168, "token_acc": 0.2952841709478267 }, { "epoch": 3.61624157138669, "grad_norm": 0.25565826836216243, "learning_rate": 0.0002861844767992448, "loss": 3.0700271129608154, "step": 6169, "token_acc": 0.2894167349672028 }, { "epoch": 3.616827909703899, "grad_norm": 0.2588942798494076, "learning_rate": 0.0002861783818579275, "loss": 3.0236010551452637, "step": 6170, "token_acc": 0.29824452460149464 }, { "epoch": 3.6174142480211082, "grad_norm": 0.24866091364896437, "learning_rate": 0.00028617228563739786, "loss": 3.0428285598754883, "step": 6171, "token_acc": 0.29582905402309073 }, { "epoch": 3.6180005863383173, "grad_norm": 0.2520687335949198, "learning_rate": 0.000286166188137713, "loss": 3.0607612133026123, "step": 6172, "token_acc": 0.291403233042765 }, { "epoch": 3.618586924655526, "grad_norm": 0.2524602510040712, "learning_rate": 0.0002861600893589304, "loss": 3.024083137512207, "step": 6173, "token_acc": 0.29615397829229556 }, { "epoch": 3.619173262972735, "grad_norm": 0.2861131738090535, "learning_rate": 0.00028615398930110716, "loss": 3.0351860523223877, "step": 6174, "token_acc": 0.2958076283393287 }, { "epoch": 3.6197596012899442, "grad_norm": 0.2836115668563803, "learning_rate": 0.0002861478879643007, "loss": 3.125187873840332, "step": 6175, "token_acc": 0.2831471538973646 }, { "epoch": 3.6203459396071533, "grad_norm": 0.2561066939859495, "learning_rate": 0.0002861417853485683, "loss": 3.0650320053100586, "step": 6176, "token_acc": 0.28968109268211367 }, { "epoch": 3.6209322779243625, "grad_norm": 0.2744763830322329, "learning_rate": 0.0002861356814539673, "loss": 3.0637879371643066, "step": 6177, "token_acc": 0.2922969814768325 }, { "epoch": 3.6215186162415716, "grad_norm": 0.2894212138777676, "learning_rate": 0.00028612957628055494, "loss": 3.0428876876831055, "step": 6178, "token_acc": 0.29388227699590647 }, { "epoch": 3.6221049545587807, "grad_norm": 0.245649964513638, "learning_rate": 0.0002861234698283887, "loss": 3.0352120399475098, "step": 6179, "token_acc": 0.2947702958159276 }, { "epoch": 3.6226912928759893, "grad_norm": 0.2507215256292912, "learning_rate": 0.00028611736209752586, "loss": 3.030254364013672, "step": 6180, "token_acc": 0.2958348439819728 }, { "epoch": 3.6232776311931985, "grad_norm": 0.23100764330826834, "learning_rate": 0.00028611125308802387, "loss": 3.0672810077667236, "step": 6181, "token_acc": 0.290608982577188 }, { "epoch": 3.6238639695104076, "grad_norm": 0.27873995391702655, "learning_rate": 0.00028610514279994, "loss": 3.0810956954956055, "step": 6182, "token_acc": 0.28780670507701994 }, { "epoch": 3.6244503078276167, "grad_norm": 0.25015337156171186, "learning_rate": 0.00028609903123333173, "loss": 3.0174880027770996, "step": 6183, "token_acc": 0.2970970241546885 }, { "epoch": 3.6250366461448253, "grad_norm": 0.23399044010413797, "learning_rate": 0.00028609291838825655, "loss": 3.0309877395629883, "step": 6184, "token_acc": 0.29638418501355995 }, { "epoch": 3.6256229844620345, "grad_norm": 0.23900558143245232, "learning_rate": 0.0002860868042647717, "loss": 2.9930672645568848, "step": 6185, "token_acc": 0.30055438993325123 }, { "epoch": 3.6262093227792436, "grad_norm": 0.22857876688954804, "learning_rate": 0.0002860806888629347, "loss": 3.0573232173919678, "step": 6186, "token_acc": 0.2914137193860202 }, { "epoch": 3.6267956610964527, "grad_norm": 0.2449892276346442, "learning_rate": 0.000286074572182803, "loss": 3.026909828186035, "step": 6187, "token_acc": 0.2976281236764083 }, { "epoch": 3.627381999413662, "grad_norm": 0.2238042058608255, "learning_rate": 0.0002860684542244341, "loss": 3.037747621536255, "step": 6188, "token_acc": 0.29374566874566876 }, { "epoch": 3.627968337730871, "grad_norm": 0.2274493480014728, "learning_rate": 0.0002860623349878854, "loss": 3.045605182647705, "step": 6189, "token_acc": 0.2931885568619403 }, { "epoch": 3.62855467604808, "grad_norm": 0.22909641035388437, "learning_rate": 0.0002860562144732145, "loss": 3.058323860168457, "step": 6190, "token_acc": 0.2923006865256688 }, { "epoch": 3.6291410143652887, "grad_norm": 0.2337598865604347, "learning_rate": 0.0002860500926804787, "loss": 3.056485652923584, "step": 6191, "token_acc": 0.2907898409869107 }, { "epoch": 3.629727352682498, "grad_norm": 0.252443491103507, "learning_rate": 0.00028604396960973564, "loss": 3.0622525215148926, "step": 6192, "token_acc": 0.29132857001401896 }, { "epoch": 3.630313690999707, "grad_norm": 0.24914410942169754, "learning_rate": 0.0002860378452610428, "loss": 3.0539793968200684, "step": 6193, "token_acc": 0.2925750362136371 }, { "epoch": 3.630900029316916, "grad_norm": 0.2186735183118364, "learning_rate": 0.00028603171963445767, "loss": 3.0599679946899414, "step": 6194, "token_acc": 0.29024346396836 }, { "epoch": 3.6314863676341247, "grad_norm": 0.2340515365580505, "learning_rate": 0.00028602559273003793, "loss": 3.0474188327789307, "step": 6195, "token_acc": 0.2932076845799642 }, { "epoch": 3.632072705951334, "grad_norm": 0.21326655044349402, "learning_rate": 0.000286019464547841, "loss": 3.047955274581909, "step": 6196, "token_acc": 0.29310788383835834 }, { "epoch": 3.632659044268543, "grad_norm": 0.23455710203427743, "learning_rate": 0.0002860133350879245, "loss": 3.0831246376037598, "step": 6197, "token_acc": 0.2889303894857968 }, { "epoch": 3.633245382585752, "grad_norm": 0.26456820958452226, "learning_rate": 0.00028600720435034596, "loss": 3.057317018508911, "step": 6198, "token_acc": 0.2919823521258261 }, { "epoch": 3.633831720902961, "grad_norm": 0.25347601348141063, "learning_rate": 0.000286001072335163, "loss": 3.0943918228149414, "step": 6199, "token_acc": 0.28618876638354385 }, { "epoch": 3.6344180592201702, "grad_norm": 0.2667852238027669, "learning_rate": 0.0002859949390424333, "loss": 3.040538787841797, "step": 6200, "token_acc": 0.2953380706849001 }, { "epoch": 3.635004397537379, "grad_norm": 0.2676599133263569, "learning_rate": 0.00028598880447221436, "loss": 3.0291266441345215, "step": 6201, "token_acc": 0.2975971636933873 }, { "epoch": 3.635590735854588, "grad_norm": 0.2363339305363119, "learning_rate": 0.00028598266862456386, "loss": 3.0860610008239746, "step": 6202, "token_acc": 0.2860995459243455 }, { "epoch": 3.636177074171797, "grad_norm": 0.2609171603385685, "learning_rate": 0.0002859765314995394, "loss": 3.0501694679260254, "step": 6203, "token_acc": 0.2935008888166416 }, { "epoch": 3.6367634124890063, "grad_norm": 0.24586799200170772, "learning_rate": 0.0002859703930971987, "loss": 3.070349931716919, "step": 6204, "token_acc": 0.29098656178302196 }, { "epoch": 3.637349750806215, "grad_norm": 0.2855009590773437, "learning_rate": 0.00028596425341759934, "loss": 3.0581700801849365, "step": 6205, "token_acc": 0.2931053366921965 }, { "epoch": 3.637936089123424, "grad_norm": 0.2562927867775573, "learning_rate": 0.00028595811246079903, "loss": 3.0518805980682373, "step": 6206, "token_acc": 0.29186901555219125 }, { "epoch": 3.638522427440633, "grad_norm": 0.25392730215652387, "learning_rate": 0.0002859519702268555, "loss": 3.072758674621582, "step": 6207, "token_acc": 0.2905806954910639 }, { "epoch": 3.6391087657578423, "grad_norm": 0.2455116659310737, "learning_rate": 0.0002859458267158264, "loss": 3.0208051204681396, "step": 6208, "token_acc": 0.2975772200519298 }, { "epoch": 3.6396951040750514, "grad_norm": 0.2592505806253727, "learning_rate": 0.0002859396819277694, "loss": 3.0785317420959473, "step": 6209, "token_acc": 0.2895008534949524 }, { "epoch": 3.6402814423922605, "grad_norm": 0.22332606609230557, "learning_rate": 0.00028593353586274235, "loss": 3.0526304244995117, "step": 6210, "token_acc": 0.292312018243543 }, { "epoch": 3.6408677807094696, "grad_norm": 0.27188332554210637, "learning_rate": 0.0002859273885208028, "loss": 3.056244373321533, "step": 6211, "token_acc": 0.2927382297551789 }, { "epoch": 3.6414541190266783, "grad_norm": 0.24207319500751798, "learning_rate": 0.00028592123990200865, "loss": 3.0491786003112793, "step": 6212, "token_acc": 0.2913420431259557 }, { "epoch": 3.6420404573438874, "grad_norm": 0.24832210699905521, "learning_rate": 0.00028591509000641766, "loss": 3.0091922283172607, "step": 6213, "token_acc": 0.299232985370599 }, { "epoch": 3.6426267956610965, "grad_norm": 0.2533741810577404, "learning_rate": 0.0002859089388340875, "loss": 3.053891658782959, "step": 6214, "token_acc": 0.29287441040057327 }, { "epoch": 3.6432131339783056, "grad_norm": 0.24452458245895556, "learning_rate": 0.000285902786385076, "loss": 3.0840253829956055, "step": 6215, "token_acc": 0.28888361853400496 }, { "epoch": 3.6437994722955143, "grad_norm": 0.2260826227990935, "learning_rate": 0.00028589663265944095, "loss": 3.0763978958129883, "step": 6216, "token_acc": 0.2893275067408716 }, { "epoch": 3.6443858106127234, "grad_norm": 0.2624485734753881, "learning_rate": 0.00028589047765724017, "loss": 3.0182533264160156, "step": 6217, "token_acc": 0.2966159834940898 }, { "epoch": 3.6449721489299325, "grad_norm": 0.2575537641801003, "learning_rate": 0.00028588432137853146, "loss": 3.0456085205078125, "step": 6218, "token_acc": 0.2950331841977033 }, { "epoch": 3.6455584872471416, "grad_norm": 0.23851621988862967, "learning_rate": 0.00028587816382337266, "loss": 3.0809545516967773, "step": 6219, "token_acc": 0.2894723668402083 }, { "epoch": 3.6461448255643507, "grad_norm": 0.27164660591724127, "learning_rate": 0.0002858720049918216, "loss": 3.0093164443969727, "step": 6220, "token_acc": 0.298966851510845 }, { "epoch": 3.64673116388156, "grad_norm": 0.31191312216162975, "learning_rate": 0.0002858658448839361, "loss": 3.0800726413726807, "step": 6221, "token_acc": 0.28877864656345076 }, { "epoch": 3.647317502198769, "grad_norm": 0.2893622206488456, "learning_rate": 0.00028585968349977416, "loss": 3.0336108207702637, "step": 6222, "token_acc": 0.2948400791624106 }, { "epoch": 3.6479038405159776, "grad_norm": 0.2723611348941157, "learning_rate": 0.0002858535208393935, "loss": 3.052973747253418, "step": 6223, "token_acc": 0.2920969446937878 }, { "epoch": 3.6484901788331867, "grad_norm": 0.24734548562079076, "learning_rate": 0.0002858473569028521, "loss": 3.0512983798980713, "step": 6224, "token_acc": 0.29333491180300697 }, { "epoch": 3.649076517150396, "grad_norm": 0.31121718894831507, "learning_rate": 0.00028584119169020787, "loss": 3.026674509048462, "step": 6225, "token_acc": 0.29627413221217164 }, { "epoch": 3.649662855467605, "grad_norm": 0.2438336269661788, "learning_rate": 0.0002858350252015186, "loss": 3.0373945236206055, "step": 6226, "token_acc": 0.2940394776926875 }, { "epoch": 3.6502491937848136, "grad_norm": 0.2968199929974087, "learning_rate": 0.0002858288574368424, "loss": 3.076096296310425, "step": 6227, "token_acc": 0.28928181915200984 }, { "epoch": 3.6508355321020227, "grad_norm": 0.25633311263911157, "learning_rate": 0.0002858226883962371, "loss": 3.0544490814208984, "step": 6228, "token_acc": 0.2936423559430151 }, { "epoch": 3.651421870419232, "grad_norm": 0.2495506295652773, "learning_rate": 0.00028581651807976067, "loss": 2.994093894958496, "step": 6229, "token_acc": 0.3019037762764099 }, { "epoch": 3.652008208736441, "grad_norm": 0.26531197448727034, "learning_rate": 0.00028581034648747104, "loss": 3.0873053073883057, "step": 6230, "token_acc": 0.2889724041662193 }, { "epoch": 3.65259454705365, "grad_norm": 0.3039457308766114, "learning_rate": 0.0002858041736194262, "loss": 3.062359094619751, "step": 6231, "token_acc": 0.29122916131474696 }, { "epoch": 3.653180885370859, "grad_norm": 0.3294123808697914, "learning_rate": 0.00028579799947568414, "loss": 3.0800154209136963, "step": 6232, "token_acc": 0.28829030991288457 }, { "epoch": 3.6537672236880683, "grad_norm": 0.26855439668391645, "learning_rate": 0.00028579182405630293, "loss": 3.0684237480163574, "step": 6233, "token_acc": 0.2926446032282892 }, { "epoch": 3.654353562005277, "grad_norm": 0.2744053118200358, "learning_rate": 0.00028578564736134047, "loss": 3.0578064918518066, "step": 6234, "token_acc": 0.2916688122431745 }, { "epoch": 3.654939900322486, "grad_norm": 0.2781075118299357, "learning_rate": 0.00028577946939085485, "loss": 3.070779800415039, "step": 6235, "token_acc": 0.29041626080957833 }, { "epoch": 3.655526238639695, "grad_norm": 0.28853290625949846, "learning_rate": 0.00028577329014490405, "loss": 3.114424467086792, "step": 6236, "token_acc": 0.28584262277121963 }, { "epoch": 3.6561125769569043, "grad_norm": 0.25789046307751595, "learning_rate": 0.00028576710962354617, "loss": 3.016322135925293, "step": 6237, "token_acc": 0.2966898807558528 }, { "epoch": 3.656698915274113, "grad_norm": 0.2686188031717648, "learning_rate": 0.00028576092782683926, "loss": 3.0384693145751953, "step": 6238, "token_acc": 0.2936802973977695 }, { "epoch": 3.657285253591322, "grad_norm": 0.30136181015688024, "learning_rate": 0.0002857547447548413, "loss": 3.026700973510742, "step": 6239, "token_acc": 0.2973675552062911 }, { "epoch": 3.657871591908531, "grad_norm": 0.24176732680901522, "learning_rate": 0.0002857485604076105, "loss": 3.014446258544922, "step": 6240, "token_acc": 0.298042604047223 }, { "epoch": 3.6584579302257403, "grad_norm": 0.2988757082956466, "learning_rate": 0.0002857423747852049, "loss": 3.0650525093078613, "step": 6241, "token_acc": 0.2927221157373204 }, { "epoch": 3.6590442685429494, "grad_norm": 0.2588918181012523, "learning_rate": 0.0002857361878876826, "loss": 3.05169677734375, "step": 6242, "token_acc": 0.2926155286564569 }, { "epoch": 3.6596306068601585, "grad_norm": 0.25534315713592126, "learning_rate": 0.0002857299997151017, "loss": 3.074030876159668, "step": 6243, "token_acc": 0.28976198122970415 }, { "epoch": 3.660216945177367, "grad_norm": 0.25186096541950054, "learning_rate": 0.0002857238102675204, "loss": 3.0542242527008057, "step": 6244, "token_acc": 0.2932612849748801 }, { "epoch": 3.6608032834945763, "grad_norm": 0.24200283496795183, "learning_rate": 0.00028571761954499674, "loss": 3.040381908416748, "step": 6245, "token_acc": 0.2949283973546764 }, { "epoch": 3.6613896218117854, "grad_norm": 0.27428321794677535, "learning_rate": 0.000285711427547589, "loss": 3.018749237060547, "step": 6246, "token_acc": 0.29770466090183556 }, { "epoch": 3.6619759601289945, "grad_norm": 0.2554086582709873, "learning_rate": 0.00028570523427535517, "loss": 3.0863633155822754, "step": 6247, "token_acc": 0.28892004494953777 }, { "epoch": 3.6625622984462036, "grad_norm": 0.24378981407741745, "learning_rate": 0.00028569903972835357, "loss": 3.0609540939331055, "step": 6248, "token_acc": 0.2918095956532598 }, { "epoch": 3.6631486367634123, "grad_norm": 0.25192608311835574, "learning_rate": 0.0002856928439066423, "loss": 3.0808660984039307, "step": 6249, "token_acc": 0.2868351530650158 }, { "epoch": 3.6637349750806214, "grad_norm": 0.22854578902432138, "learning_rate": 0.0002856866468102797, "loss": 3.0131139755249023, "step": 6250, "token_acc": 0.2966140073723012 }, { "epoch": 3.6643213133978305, "grad_norm": 0.23483310703814317, "learning_rate": 0.0002856804484393239, "loss": 3.0671486854553223, "step": 6251, "token_acc": 0.29119262142133606 }, { "epoch": 3.6649076517150396, "grad_norm": 0.2414880184589769, "learning_rate": 0.00028567424879383306, "loss": 3.033574104309082, "step": 6252, "token_acc": 0.2951343473223962 }, { "epoch": 3.6654939900322487, "grad_norm": 0.22063673089474994, "learning_rate": 0.0002856680478738655, "loss": 3.0770397186279297, "step": 6253, "token_acc": 0.29021020280091614 }, { "epoch": 3.666080328349458, "grad_norm": 0.24556187169892654, "learning_rate": 0.0002856618456794794, "loss": 3.039295196533203, "step": 6254, "token_acc": 0.2946949748651187 }, { "epoch": 3.6666666666666665, "grad_norm": 0.2231355415183732, "learning_rate": 0.0002856556422107331, "loss": 3.0602447986602783, "step": 6255, "token_acc": 0.2919747796925746 }, { "epoch": 3.6672530049838756, "grad_norm": 0.23473626656898686, "learning_rate": 0.00028564943746768487, "loss": 3.0048346519470215, "step": 6256, "token_acc": 0.2999019876483359 }, { "epoch": 3.6678393433010847, "grad_norm": 0.2298781700690723, "learning_rate": 0.00028564323145039295, "loss": 3.049794912338257, "step": 6257, "token_acc": 0.2926610376506824 }, { "epoch": 3.668425681618294, "grad_norm": 0.22078621206417584, "learning_rate": 0.0002856370241589156, "loss": 3.069427013397217, "step": 6258, "token_acc": 0.291476882514071 }, { "epoch": 3.6690120199355025, "grad_norm": 0.2496029972358602, "learning_rate": 0.00028563081559331125, "loss": 3.0456881523132324, "step": 6259, "token_acc": 0.29351389228212516 }, { "epoch": 3.6695983582527116, "grad_norm": 0.21231488870326154, "learning_rate": 0.0002856246057536381, "loss": 3.003896951675415, "step": 6260, "token_acc": 0.29941746970351196 }, { "epoch": 3.6701846965699207, "grad_norm": 0.24962407751473809, "learning_rate": 0.0002856183946399546, "loss": 3.067966938018799, "step": 6261, "token_acc": 0.29019375484517623 }, { "epoch": 3.67077103488713, "grad_norm": 0.23036958057995113, "learning_rate": 0.00028561218225231895, "loss": 3.085524797439575, "step": 6262, "token_acc": 0.2899528036506845 }, { "epoch": 3.671357373204339, "grad_norm": 0.24390954942617307, "learning_rate": 0.00028560596859078963, "loss": 3.068650722503662, "step": 6263, "token_acc": 0.2891924101322655 }, { "epoch": 3.671943711521548, "grad_norm": 0.22876964180172474, "learning_rate": 0.000285599753655425, "loss": 3.001950263977051, "step": 6264, "token_acc": 0.3002924565288801 }, { "epoch": 3.672530049838757, "grad_norm": 0.2226076018504195, "learning_rate": 0.00028559353744628333, "loss": 3.01513671875, "step": 6265, "token_acc": 0.29728039332457706 }, { "epoch": 3.673116388155966, "grad_norm": 0.23936517036321858, "learning_rate": 0.0002855873199634232, "loss": 3.0642800331115723, "step": 6266, "token_acc": 0.29035332025492727 }, { "epoch": 3.673702726473175, "grad_norm": 0.2295026491582116, "learning_rate": 0.0002855811012069028, "loss": 3.065189838409424, "step": 6267, "token_acc": 0.2913887888737821 }, { "epoch": 3.674289064790384, "grad_norm": 0.23481535624969016, "learning_rate": 0.00028557488117678066, "loss": 3.0329036712646484, "step": 6268, "token_acc": 0.29511549172029516 }, { "epoch": 3.674875403107593, "grad_norm": 0.24376107587618337, "learning_rate": 0.00028556865987311523, "loss": 3.007115602493286, "step": 6269, "token_acc": 0.30091812568833143 }, { "epoch": 3.675461741424802, "grad_norm": 0.2079329555934218, "learning_rate": 0.00028556243729596496, "loss": 3.0368456840515137, "step": 6270, "token_acc": 0.2947684725025873 }, { "epoch": 3.676048079742011, "grad_norm": 0.24340327717037627, "learning_rate": 0.0002855562134453882, "loss": 3.0828568935394287, "step": 6271, "token_acc": 0.28807240245680477 }, { "epoch": 3.67663441805922, "grad_norm": 0.2431885110915972, "learning_rate": 0.0002855499883214435, "loss": 3.066739082336426, "step": 6272, "token_acc": 0.2911608736742623 }, { "epoch": 3.677220756376429, "grad_norm": 0.22183506098538788, "learning_rate": 0.00028554376192418935, "loss": 3.066556453704834, "step": 6273, "token_acc": 0.29004406415057077 }, { "epoch": 3.6778070946936383, "grad_norm": 0.2349421391947139, "learning_rate": 0.0002855375342536842, "loss": 3.0259203910827637, "step": 6274, "token_acc": 0.29550064539922555 }, { "epoch": 3.6783934330108474, "grad_norm": 0.2391504109763641, "learning_rate": 0.0002855313053099865, "loss": 3.035552501678467, "step": 6275, "token_acc": 0.29476501234459157 }, { "epoch": 3.6789797713280565, "grad_norm": 0.2528250370642422, "learning_rate": 0.00028552507509315485, "loss": 3.041868209838867, "step": 6276, "token_acc": 0.2941566195648758 }, { "epoch": 3.679566109645265, "grad_norm": 0.2723296401651812, "learning_rate": 0.00028551884360324776, "loss": 3.0419082641601562, "step": 6277, "token_acc": 0.29392214313459675 }, { "epoch": 3.6801524479624743, "grad_norm": 0.2449935336146774, "learning_rate": 0.0002855126108403237, "loss": 3.024402379989624, "step": 6278, "token_acc": 0.2961206507843044 }, { "epoch": 3.6807387862796834, "grad_norm": 0.24396717339275592, "learning_rate": 0.0002855063768044413, "loss": 3.0179951190948486, "step": 6279, "token_acc": 0.29862773439790347 }, { "epoch": 3.6813251245968925, "grad_norm": 0.2718716812987276, "learning_rate": 0.00028550014149565905, "loss": 3.0523593425750732, "step": 6280, "token_acc": 0.2929203814570085 }, { "epoch": 3.681911462914101, "grad_norm": 0.23445317676148258, "learning_rate": 0.00028549390491403563, "loss": 3.0370612144470215, "step": 6281, "token_acc": 0.29510116621626176 }, { "epoch": 3.6824978012313103, "grad_norm": 0.26097352342735486, "learning_rate": 0.0002854876670596295, "loss": 3.021806240081787, "step": 6282, "token_acc": 0.2967319774053429 }, { "epoch": 3.6830841395485194, "grad_norm": 0.28311740173105376, "learning_rate": 0.0002854814279324993, "loss": 3.0518534183502197, "step": 6283, "token_acc": 0.2936101347242122 }, { "epoch": 3.6836704778657285, "grad_norm": 0.2570165347182792, "learning_rate": 0.0002854751875327037, "loss": 3.116191864013672, "step": 6284, "token_acc": 0.28238022245804345 }, { "epoch": 3.6842568161829377, "grad_norm": 0.2667994279990382, "learning_rate": 0.00028546894586030125, "loss": 3.0413732528686523, "step": 6285, "token_acc": 0.2934960319037588 }, { "epoch": 3.6848431545001468, "grad_norm": 0.24408015027958915, "learning_rate": 0.0002854627029153506, "loss": 3.0263116359710693, "step": 6286, "token_acc": 0.29689051109529013 }, { "epoch": 3.685429492817356, "grad_norm": 0.2382946833418734, "learning_rate": 0.00028545645869791034, "loss": 3.0616979598999023, "step": 6287, "token_acc": 0.2904464800004078 }, { "epoch": 3.6860158311345645, "grad_norm": 0.24487542150914396, "learning_rate": 0.00028545021320803925, "loss": 3.0452194213867188, "step": 6288, "token_acc": 0.2941673955885298 }, { "epoch": 3.6866021694517737, "grad_norm": 0.25132215773364, "learning_rate": 0.00028544396644579587, "loss": 3.0534563064575195, "step": 6289, "token_acc": 0.29234032512571956 }, { "epoch": 3.6871885077689828, "grad_norm": 0.26664778014370516, "learning_rate": 0.000285437718411239, "loss": 3.0950026512145996, "step": 6290, "token_acc": 0.2868106797754413 }, { "epoch": 3.687774846086192, "grad_norm": 0.26974034346403797, "learning_rate": 0.00028543146910442723, "loss": 3.020292043685913, "step": 6291, "token_acc": 0.2972572333696326 }, { "epoch": 3.6883611844034006, "grad_norm": 0.2645694897598869, "learning_rate": 0.0002854252185254193, "loss": 3.016523599624634, "step": 6292, "token_acc": 0.29942350792190797 }, { "epoch": 3.6889475227206097, "grad_norm": 0.26934204278972035, "learning_rate": 0.00028541896667427395, "loss": 3.0356132984161377, "step": 6293, "token_acc": 0.2952798444108295 }, { "epoch": 3.6895338610378188, "grad_norm": 0.26816659151293093, "learning_rate": 0.0002854127135510499, "loss": 3.0199217796325684, "step": 6294, "token_acc": 0.29616029333311666 }, { "epoch": 3.690120199355028, "grad_norm": 0.27938123406242554, "learning_rate": 0.00028540645915580586, "loss": 3.0653321743011475, "step": 6295, "token_acc": 0.2909547325525825 }, { "epoch": 3.690706537672237, "grad_norm": 0.2652623182298458, "learning_rate": 0.00028540020348860064, "loss": 3.029810905456543, "step": 6296, "token_acc": 0.29663038519194423 }, { "epoch": 3.691292875989446, "grad_norm": 0.2417985982007034, "learning_rate": 0.00028539394654949286, "loss": 3.0540506839752197, "step": 6297, "token_acc": 0.29160489535513817 }, { "epoch": 3.6918792143066548, "grad_norm": 0.24552760084472797, "learning_rate": 0.00028538768833854145, "loss": 3.002300262451172, "step": 6298, "token_acc": 0.29771533912611203 }, { "epoch": 3.692465552623864, "grad_norm": 0.27408660619844993, "learning_rate": 0.00028538142885580517, "loss": 3.0698652267456055, "step": 6299, "token_acc": 0.2936253150036925 }, { "epoch": 3.693051890941073, "grad_norm": 0.21819721342608012, "learning_rate": 0.0002853751681013428, "loss": 3.0275278091430664, "step": 6300, "token_acc": 0.2962553554820315 }, { "epoch": 3.693638229258282, "grad_norm": 0.2520690924503071, "learning_rate": 0.0002853689060752131, "loss": 3.0237326622009277, "step": 6301, "token_acc": 0.2977183692683303 }, { "epoch": 3.6942245675754912, "grad_norm": 0.2607935739738976, "learning_rate": 0.00028536264277747496, "loss": 3.064016342163086, "step": 6302, "token_acc": 0.2926070665758963 }, { "epoch": 3.6948109058927, "grad_norm": 0.2319666157670964, "learning_rate": 0.0002853563782081872, "loss": 3.045077323913574, "step": 6303, "token_acc": 0.2939700391122739 }, { "epoch": 3.695397244209909, "grad_norm": 0.252151786142626, "learning_rate": 0.00028535011236740864, "loss": 3.103978157043457, "step": 6304, "token_acc": 0.28707327524639337 }, { "epoch": 3.695983582527118, "grad_norm": 0.2446633525792926, "learning_rate": 0.0002853438452551982, "loss": 3.0283732414245605, "step": 6305, "token_acc": 0.2949966336846352 }, { "epoch": 3.6965699208443272, "grad_norm": 0.2569562495855419, "learning_rate": 0.0002853375768716147, "loss": 3.0257489681243896, "step": 6306, "token_acc": 0.29564465254074884 }, { "epoch": 3.6971562591615363, "grad_norm": 0.23603421873556632, "learning_rate": 0.000285331307216717, "loss": 3.0712578296661377, "step": 6307, "token_acc": 0.2899419215837297 }, { "epoch": 3.6977425974787455, "grad_norm": 0.24353115834599856, "learning_rate": 0.0002853250362905641, "loss": 3.09810209274292, "step": 6308, "token_acc": 0.2863721321493315 }, { "epoch": 3.698328935795954, "grad_norm": 0.23899595536552137, "learning_rate": 0.0002853187640932148, "loss": 3.030752658843994, "step": 6309, "token_acc": 0.29486384881121724 }, { "epoch": 3.6989152741131632, "grad_norm": 0.22489393718951928, "learning_rate": 0.00028531249062472804, "loss": 3.03317928314209, "step": 6310, "token_acc": 0.29577992574540296 }, { "epoch": 3.6995016124303723, "grad_norm": 0.23873722432503616, "learning_rate": 0.00028530621588516276, "loss": 3.0431041717529297, "step": 6311, "token_acc": 0.2951366577406165 }, { "epoch": 3.7000879507475815, "grad_norm": 0.2255437162308836, "learning_rate": 0.00028529993987457794, "loss": 2.994525671005249, "step": 6312, "token_acc": 0.2988028972863408 }, { "epoch": 3.70067428906479, "grad_norm": 0.22394343794989172, "learning_rate": 0.00028529366259303246, "loss": 3.036836862564087, "step": 6313, "token_acc": 0.29527252337190607 }, { "epoch": 3.7012606273819992, "grad_norm": 0.2560270260913191, "learning_rate": 0.00028528738404058537, "loss": 3.0621659755706787, "step": 6314, "token_acc": 0.2905362726210403 }, { "epoch": 3.7018469656992083, "grad_norm": 0.23908309431002314, "learning_rate": 0.0002852811042172956, "loss": 3.013627290725708, "step": 6315, "token_acc": 0.2973761163152451 }, { "epoch": 3.7024333040164175, "grad_norm": 0.2226503869397667, "learning_rate": 0.00028527482312322216, "loss": 3.0785021781921387, "step": 6316, "token_acc": 0.289618438792825 }, { "epoch": 3.7030196423336266, "grad_norm": 0.24512917350220673, "learning_rate": 0.000285268540758424, "loss": 3.0493693351745605, "step": 6317, "token_acc": 0.2942794709231362 }, { "epoch": 3.7036059806508357, "grad_norm": 0.23357419535857016, "learning_rate": 0.0002852622571229603, "loss": 3.0529932975769043, "step": 6318, "token_acc": 0.2942680721012911 }, { "epoch": 3.704192318968045, "grad_norm": 0.2504386521606627, "learning_rate": 0.00028525597221688983, "loss": 3.040954351425171, "step": 6319, "token_acc": 0.29651677488194106 }, { "epoch": 3.7047786572852535, "grad_norm": 0.2563645836142691, "learning_rate": 0.0002852496860402718, "loss": 3.036862373352051, "step": 6320, "token_acc": 0.29533590558329553 }, { "epoch": 3.7053649956024626, "grad_norm": 0.23995465300416452, "learning_rate": 0.0002852433985931652, "loss": 3.0738558769226074, "step": 6321, "token_acc": 0.2909190982956109 }, { "epoch": 3.7059513339196717, "grad_norm": 0.22603509487138204, "learning_rate": 0.00028523710987562914, "loss": 3.0628132820129395, "step": 6322, "token_acc": 0.29029722797920215 }, { "epoch": 3.706537672236881, "grad_norm": 0.25268831251955265, "learning_rate": 0.00028523081988772263, "loss": 3.0035760402679443, "step": 6323, "token_acc": 0.3014510891438464 }, { "epoch": 3.7071240105540895, "grad_norm": 0.23890538853445126, "learning_rate": 0.00028522452862950485, "loss": 3.036486864089966, "step": 6324, "token_acc": 0.29546627542933496 }, { "epoch": 3.7077103488712986, "grad_norm": 0.323565978030968, "learning_rate": 0.00028521823610103483, "loss": 3.0416018962860107, "step": 6325, "token_acc": 0.2940617951004564 }, { "epoch": 3.7082966871885077, "grad_norm": 0.24874471140667018, "learning_rate": 0.00028521194230237166, "loss": 3.0493903160095215, "step": 6326, "token_acc": 0.2933832742043971 }, { "epoch": 3.708883025505717, "grad_norm": 0.23721255375813155, "learning_rate": 0.00028520564723357453, "loss": 3.0542659759521484, "step": 6327, "token_acc": 0.2919549364863407 }, { "epoch": 3.709469363822926, "grad_norm": 0.26445135370276834, "learning_rate": 0.0002851993508947025, "loss": 3.080838918685913, "step": 6328, "token_acc": 0.2882543075760924 }, { "epoch": 3.710055702140135, "grad_norm": 0.24042224923126732, "learning_rate": 0.00028519305328581477, "loss": 3.061725616455078, "step": 6329, "token_acc": 0.29148789702868855 }, { "epoch": 3.710642040457344, "grad_norm": 0.2445990834729015, "learning_rate": 0.0002851867544069705, "loss": 3.0411431789398193, "step": 6330, "token_acc": 0.29518577759131553 }, { "epoch": 3.711228378774553, "grad_norm": 0.22385064252800183, "learning_rate": 0.0002851804542582288, "loss": 3.027780532836914, "step": 6331, "token_acc": 0.29649943104294413 }, { "epoch": 3.711814717091762, "grad_norm": 0.27817159024959176, "learning_rate": 0.0002851741528396489, "loss": 3.0399398803710938, "step": 6332, "token_acc": 0.29482829213368134 }, { "epoch": 3.712401055408971, "grad_norm": 0.25271228598506756, "learning_rate": 0.00028516785015128993, "loss": 3.0684733390808105, "step": 6333, "token_acc": 0.2900664740563268 }, { "epoch": 3.71298739372618, "grad_norm": 0.25852219919806274, "learning_rate": 0.0002851615461932113, "loss": 3.0366153717041016, "step": 6334, "token_acc": 0.2954201907958929 }, { "epoch": 3.713573732043389, "grad_norm": 0.2386639261627425, "learning_rate": 0.00028515524096547196, "loss": 3.0091781616210938, "step": 6335, "token_acc": 0.2968561755499233 }, { "epoch": 3.714160070360598, "grad_norm": 0.22826335240758994, "learning_rate": 0.0002851489344681313, "loss": 3.0571823120117188, "step": 6336, "token_acc": 0.2925041475242471 }, { "epoch": 3.714746408677807, "grad_norm": 0.2466755645272033, "learning_rate": 0.00028514262670124846, "loss": 3.0207712650299072, "step": 6337, "token_acc": 0.29824639266497666 }, { "epoch": 3.715332746995016, "grad_norm": 0.22324258184515208, "learning_rate": 0.0002851363176648828, "loss": 3.0651774406433105, "step": 6338, "token_acc": 0.28958385075673976 }, { "epoch": 3.7159190853122253, "grad_norm": 0.24490794121404746, "learning_rate": 0.0002851300073590935, "loss": 3.1067333221435547, "step": 6339, "token_acc": 0.28673464641400803 }, { "epoch": 3.7165054236294344, "grad_norm": 0.24905518737434362, "learning_rate": 0.0002851236957839399, "loss": 3.0595171451568604, "step": 6340, "token_acc": 0.291733398063888 }, { "epoch": 3.7170917619466435, "grad_norm": 0.2386830044564574, "learning_rate": 0.0002851173829394812, "loss": 3.034095287322998, "step": 6341, "token_acc": 0.2957442964798183 }, { "epoch": 3.717678100263852, "grad_norm": 0.21168080318368557, "learning_rate": 0.00028511106882577684, "loss": 3.0476434230804443, "step": 6342, "token_acc": 0.2944569451247783 }, { "epoch": 3.7182644385810613, "grad_norm": 0.2436826928759395, "learning_rate": 0.000285104753442886, "loss": 3.0797276496887207, "step": 6343, "token_acc": 0.2902818101530389 }, { "epoch": 3.7188507768982704, "grad_norm": 0.21375735911281327, "learning_rate": 0.0002850984367908681, "loss": 3.0940794944763184, "step": 6344, "token_acc": 0.28747019583679373 }, { "epoch": 3.7194371152154795, "grad_norm": 0.23570682805209767, "learning_rate": 0.00028509211886978237, "loss": 3.060107707977295, "step": 6345, "token_acc": 0.29070612474246116 }, { "epoch": 3.720023453532688, "grad_norm": 0.2386088563016908, "learning_rate": 0.00028508579967968827, "loss": 3.0342483520507812, "step": 6346, "token_acc": 0.2949751625497862 }, { "epoch": 3.7206097918498973, "grad_norm": 0.23327718435811648, "learning_rate": 0.0002850794792206451, "loss": 3.041989803314209, "step": 6347, "token_acc": 0.2940312556854287 }, { "epoch": 3.7211961301671064, "grad_norm": 0.24661049823514744, "learning_rate": 0.0002850731574927123, "loss": 3.012538433074951, "step": 6348, "token_acc": 0.29728749498921486 }, { "epoch": 3.7217824684843155, "grad_norm": 0.25265829111705396, "learning_rate": 0.0002850668344959491, "loss": 3.053062915802002, "step": 6349, "token_acc": 0.2930271388021937 }, { "epoch": 3.7223688068015246, "grad_norm": 0.2706767943312733, "learning_rate": 0.00028506051023041506, "loss": 3.068315029144287, "step": 6350, "token_acc": 0.29156016149355823 }, { "epoch": 3.7229551451187337, "grad_norm": 0.28424886803556937, "learning_rate": 0.0002850541846961695, "loss": 3.0167627334594727, "step": 6351, "token_acc": 0.29693101087383794 }, { "epoch": 3.7235414834359424, "grad_norm": 0.2873740574103147, "learning_rate": 0.0002850478578932719, "loss": 3.108415126800537, "step": 6352, "token_acc": 0.28665363154159923 }, { "epoch": 3.7241278217531515, "grad_norm": 0.2680451038955801, "learning_rate": 0.0002850415298217816, "loss": 3.094160556793213, "step": 6353, "token_acc": 0.28661077520126393 }, { "epoch": 3.7247141600703606, "grad_norm": 0.24137178719796246, "learning_rate": 0.00028503520048175815, "loss": 3.0768256187438965, "step": 6354, "token_acc": 0.2896692314449744 }, { "epoch": 3.7253004983875697, "grad_norm": 0.23462863299580317, "learning_rate": 0.0002850288698732609, "loss": 3.0688531398773193, "step": 6355, "token_acc": 0.2903470343682921 }, { "epoch": 3.7258868367047784, "grad_norm": 0.2454133515991619, "learning_rate": 0.0002850225379963494, "loss": 3.0415146350860596, "step": 6356, "token_acc": 0.29254136208151743 }, { "epoch": 3.7264731750219875, "grad_norm": 0.25109491492211666, "learning_rate": 0.0002850162048510831, "loss": 3.0239834785461426, "step": 6357, "token_acc": 0.2973652096590287 }, { "epoch": 3.7270595133391966, "grad_norm": 0.25680519445512295, "learning_rate": 0.0002850098704375215, "loss": 3.0739731788635254, "step": 6358, "token_acc": 0.288901028627056 }, { "epoch": 3.7276458516564057, "grad_norm": 0.24842415464280718, "learning_rate": 0.00028500353475572406, "loss": 3.051112651824951, "step": 6359, "token_acc": 0.29269594051185416 }, { "epoch": 3.728232189973615, "grad_norm": 0.25902259619998164, "learning_rate": 0.0002849971978057504, "loss": 3.069091796875, "step": 6360, "token_acc": 0.291835238818156 }, { "epoch": 3.728818528290824, "grad_norm": 0.2702791140852835, "learning_rate": 0.0002849908595876599, "loss": 3.053168296813965, "step": 6361, "token_acc": 0.2925977511121995 }, { "epoch": 3.729404866608033, "grad_norm": 0.24184871618343232, "learning_rate": 0.0002849845201015122, "loss": 3.051150321960449, "step": 6362, "token_acc": 0.2946499369077694 }, { "epoch": 3.7299912049252417, "grad_norm": 0.2600497621089124, "learning_rate": 0.00028497817934736684, "loss": 3.046060085296631, "step": 6363, "token_acc": 0.29459531077217854 }, { "epoch": 3.730577543242451, "grad_norm": 0.2530514816247341, "learning_rate": 0.0002849718373252834, "loss": 3.081827402114868, "step": 6364, "token_acc": 0.2881765902142732 }, { "epoch": 3.73116388155966, "grad_norm": 0.26290493113051316, "learning_rate": 0.00028496549403532137, "loss": 3.0431737899780273, "step": 6365, "token_acc": 0.29433937123485293 }, { "epoch": 3.731750219876869, "grad_norm": 0.22599488903163048, "learning_rate": 0.0002849591494775404, "loss": 3.0695924758911133, "step": 6366, "token_acc": 0.29004230460337516 }, { "epoch": 3.7323365581940777, "grad_norm": 0.25546874435239947, "learning_rate": 0.00028495280365200014, "loss": 3.0388965606689453, "step": 6367, "token_acc": 0.29544651561881535 }, { "epoch": 3.732922896511287, "grad_norm": 0.2554775953291527, "learning_rate": 0.0002849464565587601, "loss": 3.0486574172973633, "step": 6368, "token_acc": 0.29296297650321457 }, { "epoch": 3.733509234828496, "grad_norm": 0.25269194270285406, "learning_rate": 0.0002849401081978799, "loss": 3.0564098358154297, "step": 6369, "token_acc": 0.29282154208114913 }, { "epoch": 3.734095573145705, "grad_norm": 0.2628053181491472, "learning_rate": 0.00028493375856941927, "loss": 3.1092867851257324, "step": 6370, "token_acc": 0.2854127702257933 }, { "epoch": 3.734681911462914, "grad_norm": 0.23895046123958516, "learning_rate": 0.00028492740767343774, "loss": 3.0111608505249023, "step": 6371, "token_acc": 0.29776406272861494 }, { "epoch": 3.7352682497801233, "grad_norm": 0.2752433124044039, "learning_rate": 0.0002849210555099951, "loss": 3.046128273010254, "step": 6372, "token_acc": 0.294464993048451 }, { "epoch": 3.7358545880973324, "grad_norm": 0.23982772538034236, "learning_rate": 0.00028491470207915094, "loss": 2.9912474155426025, "step": 6373, "token_acc": 0.3004064020612423 }, { "epoch": 3.736440926414541, "grad_norm": 0.2507539485663484, "learning_rate": 0.0002849083473809649, "loss": 3.010636806488037, "step": 6374, "token_acc": 0.2973783735110242 }, { "epoch": 3.73702726473175, "grad_norm": 0.2090374503572626, "learning_rate": 0.0002849019914154968, "loss": 3.039000988006592, "step": 6375, "token_acc": 0.29455690000589996 }, { "epoch": 3.7376136030489593, "grad_norm": 0.2596178406343501, "learning_rate": 0.0002848956341828063, "loss": 3.0357675552368164, "step": 6376, "token_acc": 0.29370622075950953 }, { "epoch": 3.7381999413661684, "grad_norm": 0.24495939720971815, "learning_rate": 0.00028488927568295306, "loss": 3.1018295288085938, "step": 6377, "token_acc": 0.2845117713457473 }, { "epoch": 3.738786279683377, "grad_norm": 0.2424867944672574, "learning_rate": 0.0002848829159159968, "loss": 3.044104814529419, "step": 6378, "token_acc": 0.2938176201937749 }, { "epoch": 3.739372618000586, "grad_norm": 0.21686618941266966, "learning_rate": 0.0002848765548819973, "loss": 3.0173449516296387, "step": 6379, "token_acc": 0.2979773037721971 }, { "epoch": 3.7399589563177953, "grad_norm": 0.25439399360676873, "learning_rate": 0.00028487019258101434, "loss": 3.0611729621887207, "step": 6380, "token_acc": 0.29304729742102054 }, { "epoch": 3.7405452946350044, "grad_norm": 0.2514092173767438, "learning_rate": 0.0002848638290131077, "loss": 3.053497791290283, "step": 6381, "token_acc": 0.29288549247168627 }, { "epoch": 3.7411316329522135, "grad_norm": 0.2548914787929738, "learning_rate": 0.00028485746417833706, "loss": 3.1131839752197266, "step": 6382, "token_acc": 0.28481981392391875 }, { "epoch": 3.7417179712694226, "grad_norm": 0.24814612398835684, "learning_rate": 0.0002848510980767623, "loss": 3.052476644515991, "step": 6383, "token_acc": 0.2922432408174902 }, { "epoch": 3.7423043095866317, "grad_norm": 0.23686027209091398, "learning_rate": 0.00028484473070844316, "loss": 3.0030674934387207, "step": 6384, "token_acc": 0.30066860668391526 }, { "epoch": 3.7428906479038404, "grad_norm": 0.2736626989457586, "learning_rate": 0.00028483836207343947, "loss": 3.0640578269958496, "step": 6385, "token_acc": 0.29195518716798613 }, { "epoch": 3.7434769862210495, "grad_norm": 0.24992428664228156, "learning_rate": 0.0002848319921718111, "loss": 3.0661873817443848, "step": 6386, "token_acc": 0.2889865894183486 }, { "epoch": 3.7440633245382586, "grad_norm": 0.2752225351196584, "learning_rate": 0.00028482562100361783, "loss": 3.0281476974487305, "step": 6387, "token_acc": 0.29642614562001113 }, { "epoch": 3.7446496628554677, "grad_norm": 0.23917917465863353, "learning_rate": 0.00028481924856891953, "loss": 3.067188262939453, "step": 6388, "token_acc": 0.29242867167089154 }, { "epoch": 3.7452360011726764, "grad_norm": 0.25384496386963346, "learning_rate": 0.0002848128748677761, "loss": 3.054591655731201, "step": 6389, "token_acc": 0.2904961554365634 }, { "epoch": 3.7458223394898855, "grad_norm": 0.2709709902763423, "learning_rate": 0.0002848064999002473, "loss": 3.0426647663116455, "step": 6390, "token_acc": 0.2935410868440227 }, { "epoch": 3.7464086778070946, "grad_norm": 0.2366056656163999, "learning_rate": 0.00028480012366639314, "loss": 3.054283618927002, "step": 6391, "token_acc": 0.29268045981661955 }, { "epoch": 3.7469950161243037, "grad_norm": 0.2462531332293585, "learning_rate": 0.00028479374616627344, "loss": 3.09869384765625, "step": 6392, "token_acc": 0.2858542872101921 }, { "epoch": 3.747581354441513, "grad_norm": 0.23544219368219935, "learning_rate": 0.0002847873673999482, "loss": 3.0563108921051025, "step": 6393, "token_acc": 0.2947198436715474 }, { "epoch": 3.748167692758722, "grad_norm": 0.23223961505635574, "learning_rate": 0.00028478098736747716, "loss": 3.035799503326416, "step": 6394, "token_acc": 0.2962619938502575 }, { "epoch": 3.748754031075931, "grad_norm": 0.24297599340063644, "learning_rate": 0.0002847746060689204, "loss": 3.0335381031036377, "step": 6395, "token_acc": 0.29560658531579537 }, { "epoch": 3.7493403693931397, "grad_norm": 0.25120157199270393, "learning_rate": 0.0002847682235043379, "loss": 3.0461766719818115, "step": 6396, "token_acc": 0.29328470450543864 }, { "epoch": 3.749926707710349, "grad_norm": 0.25705045307561425, "learning_rate": 0.00028476183967378945, "loss": 3.061815023422241, "step": 6397, "token_acc": 0.2926278414779022 }, { "epoch": 3.750513046027558, "grad_norm": 0.2313712386816058, "learning_rate": 0.00028475545457733517, "loss": 3.0745768547058105, "step": 6398, "token_acc": 0.29098399419881665 }, { "epoch": 3.751099384344767, "grad_norm": 0.22846635013309133, "learning_rate": 0.00028474906821503496, "loss": 3.052258014678955, "step": 6399, "token_acc": 0.29240991038705166 }, { "epoch": 3.7516857226619758, "grad_norm": 0.26409885071022154, "learning_rate": 0.0002847426805869488, "loss": 3.018313407897949, "step": 6400, "token_acc": 0.298551751279024 }, { "epoch": 3.752272060979185, "grad_norm": 0.23608630858426222, "learning_rate": 0.0002847362916931368, "loss": 3.0369508266448975, "step": 6401, "token_acc": 0.29457809020697856 }, { "epoch": 3.752858399296394, "grad_norm": 0.24282613930187477, "learning_rate": 0.0002847299015336588, "loss": 3.061882495880127, "step": 6402, "token_acc": 0.2946495084409657 }, { "epoch": 3.753444737613603, "grad_norm": 0.2606626241276367, "learning_rate": 0.000284723510108575, "loss": 3.0644145011901855, "step": 6403, "token_acc": 0.289663515272646 }, { "epoch": 3.754031075930812, "grad_norm": 0.21666394049372992, "learning_rate": 0.00028471711741794535, "loss": 3.0359692573547363, "step": 6404, "token_acc": 0.29566102461642857 }, { "epoch": 3.7546174142480213, "grad_norm": 0.2204038364055347, "learning_rate": 0.0002847107234618299, "loss": 3.0340352058410645, "step": 6405, "token_acc": 0.2953189936867485 }, { "epoch": 3.75520375256523, "grad_norm": 0.24590685353033298, "learning_rate": 0.0002847043282402888, "loss": 3.0559425354003906, "step": 6406, "token_acc": 0.29301301477003455 }, { "epoch": 3.755790090882439, "grad_norm": 0.2453661244887871, "learning_rate": 0.00028469793175338196, "loss": 3.0458221435546875, "step": 6407, "token_acc": 0.2931477931085059 }, { "epoch": 3.756376429199648, "grad_norm": 0.22450336507868635, "learning_rate": 0.00028469153400116957, "loss": 3.0267984867095947, "step": 6408, "token_acc": 0.2961428480312952 }, { "epoch": 3.7569627675168573, "grad_norm": 0.2406599274072631, "learning_rate": 0.0002846851349837117, "loss": 3.0346498489379883, "step": 6409, "token_acc": 0.29565938508171963 }, { "epoch": 3.757549105834066, "grad_norm": 0.2266248558943796, "learning_rate": 0.00028467873470106855, "loss": 3.054716110229492, "step": 6410, "token_acc": 0.2938531237188099 }, { "epoch": 3.758135444151275, "grad_norm": 0.24346540358595822, "learning_rate": 0.00028467233315330017, "loss": 3.062631607055664, "step": 6411, "token_acc": 0.2923513038570567 }, { "epoch": 3.758721782468484, "grad_norm": 0.22536872848043504, "learning_rate": 0.00028466593034046664, "loss": 3.0225768089294434, "step": 6412, "token_acc": 0.29782877072106784 }, { "epoch": 3.7593081207856933, "grad_norm": 0.2772536681402159, "learning_rate": 0.00028465952626262813, "loss": 3.032212972640991, "step": 6413, "token_acc": 0.2963345896887421 }, { "epoch": 3.7598944591029024, "grad_norm": 0.29264129588827326, "learning_rate": 0.0002846531209198449, "loss": 3.0384366512298584, "step": 6414, "token_acc": 0.29443342548417484 }, { "epoch": 3.7604807974201115, "grad_norm": 0.22938356386841893, "learning_rate": 0.000284646714312177, "loss": 3.070744514465332, "step": 6415, "token_acc": 0.29095955590800954 }, { "epoch": 3.7610671357373207, "grad_norm": 0.2847067260309995, "learning_rate": 0.0002846403064396847, "loss": 3.0593061447143555, "step": 6416, "token_acc": 0.29246225660043446 }, { "epoch": 3.7616534740545293, "grad_norm": 0.28283883830815393, "learning_rate": 0.0002846338973024281, "loss": 3.0490171909332275, "step": 6417, "token_acc": 0.292628020357004 }, { "epoch": 3.7622398123717384, "grad_norm": 0.23334805599281574, "learning_rate": 0.0002846274869004675, "loss": 3.074446201324463, "step": 6418, "token_acc": 0.28952731609925586 }, { "epoch": 3.7628261506889475, "grad_norm": 0.240273699956498, "learning_rate": 0.00028462107523386307, "loss": 3.0408897399902344, "step": 6419, "token_acc": 0.2939683275314539 }, { "epoch": 3.7634124890061567, "grad_norm": 0.2818884646412402, "learning_rate": 0.000284614662302675, "loss": 3.09450364112854, "step": 6420, "token_acc": 0.2875641326665701 }, { "epoch": 3.7639988273233653, "grad_norm": 0.3033954108076701, "learning_rate": 0.0002846082481069636, "loss": 3.042027235031128, "step": 6421, "token_acc": 0.2939763852981156 }, { "epoch": 3.7645851656405744, "grad_norm": 0.24273642170049947, "learning_rate": 0.00028460183264678913, "loss": 3.060802936553955, "step": 6422, "token_acc": 0.29244712671899215 }, { "epoch": 3.7651715039577835, "grad_norm": 0.26299360229745156, "learning_rate": 0.0002845954159222118, "loss": 3.025076389312744, "step": 6423, "token_acc": 0.29623854217096274 }, { "epoch": 3.7657578422749927, "grad_norm": 0.24528267993543085, "learning_rate": 0.00028458899793329184, "loss": 3.011979103088379, "step": 6424, "token_acc": 0.298663008648267 }, { "epoch": 3.7663441805922018, "grad_norm": 0.25178545979686223, "learning_rate": 0.00028458257868008965, "loss": 3.0271973609924316, "step": 6425, "token_acc": 0.2956327965146579 }, { "epoch": 3.766930518909411, "grad_norm": 0.2637632497807345, "learning_rate": 0.00028457615816266556, "loss": 3.070158004760742, "step": 6426, "token_acc": 0.28954282573993145 }, { "epoch": 3.76751685722662, "grad_norm": 0.2544184252975495, "learning_rate": 0.00028456973638107974, "loss": 3.0141348838806152, "step": 6427, "token_acc": 0.29697565837792467 }, { "epoch": 3.7681031955438287, "grad_norm": 0.24115774032473214, "learning_rate": 0.00028456331333539256, "loss": 3.0248701572418213, "step": 6428, "token_acc": 0.2980353313698941 }, { "epoch": 3.7686895338610378, "grad_norm": 0.25593151109143103, "learning_rate": 0.0002845568890256644, "loss": 3.034853935241699, "step": 6429, "token_acc": 0.29537424696576425 }, { "epoch": 3.769275872178247, "grad_norm": 0.24726855518624943, "learning_rate": 0.00028455046345195564, "loss": 3.068155527114868, "step": 6430, "token_acc": 0.2902495721391974 }, { "epoch": 3.769862210495456, "grad_norm": 0.22265960546239366, "learning_rate": 0.0002845440366143265, "loss": 3.039623975753784, "step": 6431, "token_acc": 0.29595725948892937 }, { "epoch": 3.7704485488126647, "grad_norm": 0.2652900310559628, "learning_rate": 0.00028453760851283747, "loss": 3.075779914855957, "step": 6432, "token_acc": 0.2897386873938548 }, { "epoch": 3.771034887129874, "grad_norm": 0.23278231456836948, "learning_rate": 0.0002845311791475489, "loss": 3.0230231285095215, "step": 6433, "token_acc": 0.29580449403082726 }, { "epoch": 3.771621225447083, "grad_norm": 0.24816611134423577, "learning_rate": 0.0002845247485185212, "loss": 3.0687031745910645, "step": 6434, "token_acc": 0.29038213581276556 }, { "epoch": 3.772207563764292, "grad_norm": 0.21455168927030738, "learning_rate": 0.0002845183166258147, "loss": 3.035602331161499, "step": 6435, "token_acc": 0.29569096791977667 }, { "epoch": 3.772793902081501, "grad_norm": 0.24432139057861427, "learning_rate": 0.00028451188346948993, "loss": 3.0453948974609375, "step": 6436, "token_acc": 0.2946295136925438 }, { "epoch": 3.7733802403987102, "grad_norm": 0.2421936390638523, "learning_rate": 0.0002845054490496073, "loss": 3.0765252113342285, "step": 6437, "token_acc": 0.28885841458135125 }, { "epoch": 3.7739665787159193, "grad_norm": 0.25820734129475115, "learning_rate": 0.0002844990133662272, "loss": 3.046961784362793, "step": 6438, "token_acc": 0.29247162759805806 }, { "epoch": 3.774552917033128, "grad_norm": 0.23577313944541295, "learning_rate": 0.0002844925764194101, "loss": 3.047924041748047, "step": 6439, "token_acc": 0.2929797298540939 }, { "epoch": 3.775139255350337, "grad_norm": 0.2560432761791687, "learning_rate": 0.00028448613820921645, "loss": 3.0556278228759766, "step": 6440, "token_acc": 0.2931772153240942 }, { "epoch": 3.7757255936675462, "grad_norm": 0.2233623452701283, "learning_rate": 0.0002844796987357068, "loss": 3.036797046661377, "step": 6441, "token_acc": 0.2945163808792695 }, { "epoch": 3.7763119319847553, "grad_norm": 0.24501848887832836, "learning_rate": 0.0002844732579989416, "loss": 3.0581722259521484, "step": 6442, "token_acc": 0.290115338052144 }, { "epoch": 3.776898270301964, "grad_norm": 0.238232780501294, "learning_rate": 0.0002844668159989813, "loss": 3.066704750061035, "step": 6443, "token_acc": 0.2900033051127552 }, { "epoch": 3.777484608619173, "grad_norm": 0.22967718366115042, "learning_rate": 0.0002844603727358865, "loss": 3.0477566719055176, "step": 6444, "token_acc": 0.2925387640404926 }, { "epoch": 3.7780709469363822, "grad_norm": 0.21710124007849205, "learning_rate": 0.0002844539282097177, "loss": 3.077892780303955, "step": 6445, "token_acc": 0.28852728380992765 }, { "epoch": 3.7786572852535913, "grad_norm": 0.2162954666512737, "learning_rate": 0.00028444748242053533, "loss": 3.0255706310272217, "step": 6446, "token_acc": 0.29465866255701106 }, { "epoch": 3.7792436235708005, "grad_norm": 0.22442976405759818, "learning_rate": 0.0002844410353684001, "loss": 3.0664663314819336, "step": 6447, "token_acc": 0.29091161800263293 }, { "epoch": 3.7798299618880096, "grad_norm": 0.2244595182591214, "learning_rate": 0.0002844345870533725, "loss": 3.061717987060547, "step": 6448, "token_acc": 0.291406070820223 }, { "epoch": 3.7804163002052187, "grad_norm": 0.23517134813167523, "learning_rate": 0.0002844281374755131, "loss": 3.0814316272735596, "step": 6449, "token_acc": 0.28957951102956864 }, { "epoch": 3.7810026385224274, "grad_norm": 0.22336815453940145, "learning_rate": 0.0002844216866348825, "loss": 3.0562524795532227, "step": 6450, "token_acc": 0.2917234400274106 }, { "epoch": 3.7815889768396365, "grad_norm": 0.20703671576447452, "learning_rate": 0.0002844152345315413, "loss": 3.011404037475586, "step": 6451, "token_acc": 0.29775394782185644 }, { "epoch": 3.7821753151568456, "grad_norm": 0.2334966017862748, "learning_rate": 0.0002844087811655501, "loss": 3.0344905853271484, "step": 6452, "token_acc": 0.29646743880833787 }, { "epoch": 3.7827616534740547, "grad_norm": 0.21529533160813272, "learning_rate": 0.0002844023265369695, "loss": 3.0114846229553223, "step": 6453, "token_acc": 0.2979767471152388 }, { "epoch": 3.7833479917912634, "grad_norm": 0.23670448252195947, "learning_rate": 0.00028439587064586015, "loss": 3.0306639671325684, "step": 6454, "token_acc": 0.29540054685532224 }, { "epoch": 3.7839343301084725, "grad_norm": 0.2104277151462322, "learning_rate": 0.0002843894134922827, "loss": 3.0959527492523193, "step": 6455, "token_acc": 0.2862502476822159 }, { "epoch": 3.7845206684256816, "grad_norm": 0.2553915305388533, "learning_rate": 0.00028438295507629785, "loss": 3.0344364643096924, "step": 6456, "token_acc": 0.29503686976577603 }, { "epoch": 3.7851070067428907, "grad_norm": 0.2594435434190842, "learning_rate": 0.0002843764953979661, "loss": 3.0273489952087402, "step": 6457, "token_acc": 0.2947190003698589 }, { "epoch": 3.7856933450601, "grad_norm": 0.24300527670216832, "learning_rate": 0.0002843700344573483, "loss": 3.0300164222717285, "step": 6458, "token_acc": 0.2942813838550247 }, { "epoch": 3.786279683377309, "grad_norm": 0.22614606437977886, "learning_rate": 0.00028436357225450516, "loss": 3.039964437484741, "step": 6459, "token_acc": 0.294184095767643 }, { "epoch": 3.7868660216945176, "grad_norm": 0.23540313962612164, "learning_rate": 0.00028435710878949727, "loss": 3.035740375518799, "step": 6460, "token_acc": 0.2955252415646153 }, { "epoch": 3.7874523600117267, "grad_norm": 0.27215698184753095, "learning_rate": 0.0002843506440623854, "loss": 3.0343897342681885, "step": 6461, "token_acc": 0.2948392576900965 }, { "epoch": 3.788038698328936, "grad_norm": 0.23735971413185564, "learning_rate": 0.0002843441780732303, "loss": 3.0503010749816895, "step": 6462, "token_acc": 0.2909729592216943 }, { "epoch": 3.788625036646145, "grad_norm": 0.23159033072329363, "learning_rate": 0.00028433771082209266, "loss": 3.0826187133789062, "step": 6463, "token_acc": 0.2892745872172959 }, { "epoch": 3.7892113749633536, "grad_norm": 0.2488320411798707, "learning_rate": 0.0002843312423090332, "loss": 3.0095126628875732, "step": 6464, "token_acc": 0.2984580823036863 }, { "epoch": 3.7897977132805627, "grad_norm": 0.2580145832332817, "learning_rate": 0.00028432477253411277, "loss": 3.0778167247772217, "step": 6465, "token_acc": 0.2910244603442867 }, { "epoch": 3.790384051597772, "grad_norm": 0.23593527792498536, "learning_rate": 0.0002843183014973921, "loss": 3.0669336318969727, "step": 6466, "token_acc": 0.29168375531478585 }, { "epoch": 3.790970389914981, "grad_norm": 0.2144930487710654, "learning_rate": 0.00028431182919893194, "loss": 3.0286874771118164, "step": 6467, "token_acc": 0.2966722920900973 }, { "epoch": 3.79155672823219, "grad_norm": 0.2398808315952976, "learning_rate": 0.0002843053556387932, "loss": 3.003859519958496, "step": 6468, "token_acc": 0.2996412852672552 }, { "epoch": 3.792143066549399, "grad_norm": 0.22255860697861415, "learning_rate": 0.0002842988808170366, "loss": 2.9843082427978516, "step": 6469, "token_acc": 0.3018933523004735 }, { "epoch": 3.7927294048666083, "grad_norm": 0.24553254366153746, "learning_rate": 0.000284292404733723, "loss": 3.0547640323638916, "step": 6470, "token_acc": 0.29386423414563634 }, { "epoch": 3.793315743183817, "grad_norm": 0.22423763226762575, "learning_rate": 0.00028428592738891323, "loss": 3.035776376724243, "step": 6471, "token_acc": 0.2957616805969193 }, { "epoch": 3.793902081501026, "grad_norm": 0.25264490307938403, "learning_rate": 0.00028427944878266806, "loss": 3.0488390922546387, "step": 6472, "token_acc": 0.29412754049930045 }, { "epoch": 3.794488419818235, "grad_norm": 0.27483429483454197, "learning_rate": 0.00028427296891504847, "loss": 3.0293054580688477, "step": 6473, "token_acc": 0.296178912943845 }, { "epoch": 3.7950747581354443, "grad_norm": 0.2621509294896071, "learning_rate": 0.00028426648778611524, "loss": 3.0560498237609863, "step": 6474, "token_acc": 0.2920218964671246 }, { "epoch": 3.795661096452653, "grad_norm": 0.2843109639280927, "learning_rate": 0.0002842600053959293, "loss": 3.0406782627105713, "step": 6475, "token_acc": 0.29298797438379015 }, { "epoch": 3.796247434769862, "grad_norm": 0.2925644969396739, "learning_rate": 0.0002842535217445515, "loss": 3.070838212966919, "step": 6476, "token_acc": 0.2903781307112152 }, { "epoch": 3.796833773087071, "grad_norm": 0.25515216160340337, "learning_rate": 0.00028424703683204285, "loss": 3.05586838722229, "step": 6477, "token_acc": 0.29095701383774564 }, { "epoch": 3.7974201114042803, "grad_norm": 0.2410106031428619, "learning_rate": 0.0002842405506584641, "loss": 3.031773567199707, "step": 6478, "token_acc": 0.2963058891243492 }, { "epoch": 3.7980064497214894, "grad_norm": 0.24351233982252637, "learning_rate": 0.0002842340632238763, "loss": 3.045006275177002, "step": 6479, "token_acc": 0.29487010516421186 }, { "epoch": 3.7985927880386985, "grad_norm": 0.24132649031562983, "learning_rate": 0.00028422757452834034, "loss": 3.0639381408691406, "step": 6480, "token_acc": 0.29068289066201286 }, { "epoch": 3.7991791263559076, "grad_norm": 0.2514291527075244, "learning_rate": 0.00028422108457191726, "loss": 3.038318395614624, "step": 6481, "token_acc": 0.29253146361389465 }, { "epoch": 3.7997654646731163, "grad_norm": 0.26371432246007004, "learning_rate": 0.0002842145933546679, "loss": 3.021477460861206, "step": 6482, "token_acc": 0.2964851344922132 }, { "epoch": 3.8003518029903254, "grad_norm": 0.2359566190959969, "learning_rate": 0.00028420810087665326, "loss": 3.060835838317871, "step": 6483, "token_acc": 0.29099584463932254 }, { "epoch": 3.8009381413075345, "grad_norm": 0.2203606915876757, "learning_rate": 0.0002842016071379344, "loss": 3.0279574394226074, "step": 6484, "token_acc": 0.295239651193661 }, { "epoch": 3.8015244796247436, "grad_norm": 0.2585880866905589, "learning_rate": 0.0002841951121385723, "loss": 3.0840396881103516, "step": 6485, "token_acc": 0.2889995227838469 }, { "epoch": 3.8021108179419523, "grad_norm": 0.23776772573882637, "learning_rate": 0.00028418861587862793, "loss": 3.0965988636016846, "step": 6486, "token_acc": 0.28499383438201537 }, { "epoch": 3.8026971562591614, "grad_norm": 0.2265598711408663, "learning_rate": 0.0002841821183581623, "loss": 3.031135082244873, "step": 6487, "token_acc": 0.295494839732758 }, { "epoch": 3.8032834945763705, "grad_norm": 0.22902049968928967, "learning_rate": 0.00028417561957723653, "loss": 3.0620620250701904, "step": 6488, "token_acc": 0.2929412199786562 }, { "epoch": 3.8038698328935796, "grad_norm": 0.23826953501110648, "learning_rate": 0.00028416911953591163, "loss": 3.0391287803649902, "step": 6489, "token_acc": 0.2946972411322107 }, { "epoch": 3.8044561712107887, "grad_norm": 0.21449763283532503, "learning_rate": 0.00028416261823424865, "loss": 3.0535244941711426, "step": 6490, "token_acc": 0.29301729822959466 }, { "epoch": 3.805042509527998, "grad_norm": 0.2675763512311035, "learning_rate": 0.0002841561156723086, "loss": 3.0464394092559814, "step": 6491, "token_acc": 0.2936951130106537 }, { "epoch": 3.805628847845207, "grad_norm": 0.2755577913535871, "learning_rate": 0.00028414961185015266, "loss": 3.0616469383239746, "step": 6492, "token_acc": 0.29148671052492636 }, { "epoch": 3.8062151861624156, "grad_norm": 0.2548119208249405, "learning_rate": 0.0002841431067678419, "loss": 3.067382335662842, "step": 6493, "token_acc": 0.28984290378141986 }, { "epoch": 3.8068015244796247, "grad_norm": 0.25701191009551894, "learning_rate": 0.0002841366004254374, "loss": 3.02752685546875, "step": 6494, "token_acc": 0.2957345996542564 }, { "epoch": 3.807387862796834, "grad_norm": 0.2380749726948019, "learning_rate": 0.00028413009282300027, "loss": 3.0252604484558105, "step": 6495, "token_acc": 0.2964519116001991 }, { "epoch": 3.807974201114043, "grad_norm": 0.25607305813708314, "learning_rate": 0.00028412358396059164, "loss": 3.0116400718688965, "step": 6496, "token_acc": 0.29925378671443564 }, { "epoch": 3.8085605394312516, "grad_norm": 0.25046564668792964, "learning_rate": 0.0002841170738382727, "loss": 3.0679514408111572, "step": 6497, "token_acc": 0.2903453436862777 }, { "epoch": 3.8091468777484607, "grad_norm": 0.22118235754033555, "learning_rate": 0.0002841105624561046, "loss": 3.0552923679351807, "step": 6498, "token_acc": 0.29341797099965444 }, { "epoch": 3.80973321606567, "grad_norm": 0.23387149028809978, "learning_rate": 0.00028410404981414844, "loss": 3.027594566345215, "step": 6499, "token_acc": 0.29755377705009856 }, { "epoch": 3.810319554382879, "grad_norm": 0.23973350415852107, "learning_rate": 0.0002840975359124654, "loss": 3.052293300628662, "step": 6500, "token_acc": 0.29125434471478223 }, { "epoch": 3.810905892700088, "grad_norm": 0.23511353140805805, "learning_rate": 0.00028409102075111685, "loss": 3.0663585662841797, "step": 6501, "token_acc": 0.2901506931886679 }, { "epoch": 3.811492231017297, "grad_norm": 0.2375920723671012, "learning_rate": 0.0002840845043301637, "loss": 3.032205104827881, "step": 6502, "token_acc": 0.2949895610438436 }, { "epoch": 3.8120785693345063, "grad_norm": 0.26184012379987287, "learning_rate": 0.0002840779866496674, "loss": 3.060265064239502, "step": 6503, "token_acc": 0.2922478859278686 }, { "epoch": 3.812664907651715, "grad_norm": 0.23985899838496655, "learning_rate": 0.0002840714677096891, "loss": 3.0341007709503174, "step": 6504, "token_acc": 0.29550844813547456 }, { "epoch": 3.813251245968924, "grad_norm": 0.2813590564689873, "learning_rate": 0.00028406494751029, "loss": 3.06073260307312, "step": 6505, "token_acc": 0.2936139448951723 }, { "epoch": 3.813837584286133, "grad_norm": 0.2228921636201004, "learning_rate": 0.00028405842605153136, "loss": 3.0706372261047363, "step": 6506, "token_acc": 0.2906427271187876 }, { "epoch": 3.8144239226033423, "grad_norm": 0.24484077165948195, "learning_rate": 0.00028405190333347444, "loss": 3.0102782249450684, "step": 6507, "token_acc": 0.2960465502736724 }, { "epoch": 3.815010260920551, "grad_norm": 0.25411018658818735, "learning_rate": 0.00028404537935618055, "loss": 3.027517080307007, "step": 6508, "token_acc": 0.2979024928488132 }, { "epoch": 3.81559659923776, "grad_norm": 0.228530114895454, "learning_rate": 0.00028403885411971096, "loss": 3.075068950653076, "step": 6509, "token_acc": 0.2890450909314959 }, { "epoch": 3.816182937554969, "grad_norm": 0.24903481996537805, "learning_rate": 0.0002840323276241269, "loss": 3.0522079467773438, "step": 6510, "token_acc": 0.2933166053759727 }, { "epoch": 3.8167692758721783, "grad_norm": 0.22676293829231473, "learning_rate": 0.0002840257998694898, "loss": 3.0508840084075928, "step": 6511, "token_acc": 0.29262379427906204 }, { "epoch": 3.8173556141893874, "grad_norm": 0.26223626146474843, "learning_rate": 0.00028401927085586084, "loss": 3.0425639152526855, "step": 6512, "token_acc": 0.2934956948567413 }, { "epoch": 3.8179419525065965, "grad_norm": 0.2220260750220639, "learning_rate": 0.0002840127405833015, "loss": 3.0392003059387207, "step": 6513, "token_acc": 0.2936649517650744 }, { "epoch": 3.818528290823805, "grad_norm": 0.25706360985713084, "learning_rate": 0.00028400620905187304, "loss": 3.0403008460998535, "step": 6514, "token_acc": 0.29398352716603626 }, { "epoch": 3.8191146291410143, "grad_norm": 0.22380380442925485, "learning_rate": 0.0002839996762616368, "loss": 3.033729076385498, "step": 6515, "token_acc": 0.2963674100792127 }, { "epoch": 3.8197009674582234, "grad_norm": 0.21657871416713492, "learning_rate": 0.00028399314221265416, "loss": 3.0058112144470215, "step": 6516, "token_acc": 0.2987487276113746 }, { "epoch": 3.8202873057754325, "grad_norm": 0.2261427260937507, "learning_rate": 0.0002839866069049865, "loss": 3.080451488494873, "step": 6517, "token_acc": 0.28769606963048 }, { "epoch": 3.820873644092641, "grad_norm": 0.23740032818495918, "learning_rate": 0.0002839800703386952, "loss": 3.0359115600585938, "step": 6518, "token_acc": 0.2953185005519536 }, { "epoch": 3.8214599824098503, "grad_norm": 0.2291898773250533, "learning_rate": 0.0002839735325138417, "loss": 3.025585412979126, "step": 6519, "token_acc": 0.294110987317279 }, { "epoch": 3.8220463207270594, "grad_norm": 0.2364667564669498, "learning_rate": 0.0002839669934304875, "loss": 3.054555892944336, "step": 6520, "token_acc": 0.2926513571035829 }, { "epoch": 3.8226326590442685, "grad_norm": 0.22201012325243466, "learning_rate": 0.0002839604530886938, "loss": 3.0446062088012695, "step": 6521, "token_acc": 0.2935001607124658 }, { "epoch": 3.8232189973614776, "grad_norm": 0.2198711445737446, "learning_rate": 0.0002839539114885222, "loss": 3.0163462162017822, "step": 6522, "token_acc": 0.2970870650265452 }, { "epoch": 3.8238053356786867, "grad_norm": 0.24552332159739496, "learning_rate": 0.00028394736863003405, "loss": 3.0710837841033936, "step": 6523, "token_acc": 0.28817053073450394 }, { "epoch": 3.824391673995896, "grad_norm": 0.27632625025623053, "learning_rate": 0.00028394082451329086, "loss": 3.0504679679870605, "step": 6524, "token_acc": 0.2934688071324065 }, { "epoch": 3.8249780123131045, "grad_norm": 0.22437532149812653, "learning_rate": 0.0002839342791383542, "loss": 3.0849342346191406, "step": 6525, "token_acc": 0.2867604749699221 }, { "epoch": 3.8255643506303136, "grad_norm": 0.2299069603408196, "learning_rate": 0.00028392773250528544, "loss": 3.0610640048980713, "step": 6526, "token_acc": 0.29115947601565423 }, { "epoch": 3.8261506889475227, "grad_norm": 0.28249171469048323, "learning_rate": 0.00028392118461414604, "loss": 3.043231248855591, "step": 6527, "token_acc": 0.29332953173038384 }, { "epoch": 3.826737027264732, "grad_norm": 0.24041256744041373, "learning_rate": 0.0002839146354649976, "loss": 3.105743885040283, "step": 6528, "token_acc": 0.2856768962468987 }, { "epoch": 3.8273233655819405, "grad_norm": 0.26072902337251747, "learning_rate": 0.0002839080850579017, "loss": 3.0270261764526367, "step": 6529, "token_acc": 0.29712473995941957 }, { "epoch": 3.8279097038991496, "grad_norm": 0.28961981988578656, "learning_rate": 0.00028390153339291965, "loss": 3.0521256923675537, "step": 6530, "token_acc": 0.2930738972714096 }, { "epoch": 3.8284960422163588, "grad_norm": 0.27464138690630246, "learning_rate": 0.0002838949804701132, "loss": 3.0400891304016113, "step": 6531, "token_acc": 0.29284179744202865 }, { "epoch": 3.829082380533568, "grad_norm": 0.29897593806011746, "learning_rate": 0.0002838884262895438, "loss": 3.082455635070801, "step": 6532, "token_acc": 0.289261347705536 }, { "epoch": 3.829668718850777, "grad_norm": 0.24484068384177268, "learning_rate": 0.00028388187085127313, "loss": 3.011160373687744, "step": 6533, "token_acc": 0.2992803405474891 }, { "epoch": 3.830255057167986, "grad_norm": 0.24027257562765503, "learning_rate": 0.0002838753141553626, "loss": 2.9956626892089844, "step": 6534, "token_acc": 0.30118050226534493 }, { "epoch": 3.830841395485195, "grad_norm": 0.23625127813329788, "learning_rate": 0.0002838687562018739, "loss": 3.0871615409851074, "step": 6535, "token_acc": 0.28674924285383707 }, { "epoch": 3.831427733802404, "grad_norm": 0.2546060530380916, "learning_rate": 0.0002838621969908687, "loss": 3.0426220893859863, "step": 6536, "token_acc": 0.2954379418179907 }, { "epoch": 3.832014072119613, "grad_norm": 0.2338593927723718, "learning_rate": 0.0002838556365224085, "loss": 3.013202428817749, "step": 6537, "token_acc": 0.29727467498061017 }, { "epoch": 3.832600410436822, "grad_norm": 0.24328735372256707, "learning_rate": 0.000283849074796555, "loss": 3.0140700340270996, "step": 6538, "token_acc": 0.2944670814441689 }, { "epoch": 3.833186748754031, "grad_norm": 0.22363886437574604, "learning_rate": 0.0002838425118133697, "loss": 3.128633975982666, "step": 6539, "token_acc": 0.28217249129796657 }, { "epoch": 3.83377308707124, "grad_norm": 0.24082098391461015, "learning_rate": 0.00028383594757291447, "loss": 3.076694965362549, "step": 6540, "token_acc": 0.2886501223203549 }, { "epoch": 3.834359425388449, "grad_norm": 0.24792064969559943, "learning_rate": 0.0002838293820752508, "loss": 3.0116188526153564, "step": 6541, "token_acc": 0.2971055107646189 }, { "epoch": 3.834945763705658, "grad_norm": 0.22158772043492048, "learning_rate": 0.00028382281532044043, "loss": 3.062791109085083, "step": 6542, "token_acc": 0.2899432899968863 }, { "epoch": 3.835532102022867, "grad_norm": 0.26723617338002437, "learning_rate": 0.00028381624730854506, "loss": 3.088080406188965, "step": 6543, "token_acc": 0.28852527473689216 }, { "epoch": 3.8361184403400763, "grad_norm": 0.24723662733711946, "learning_rate": 0.00028380967803962634, "loss": 3.069936752319336, "step": 6544, "token_acc": 0.29049436475409834 }, { "epoch": 3.8367047786572854, "grad_norm": 0.25560667001572485, "learning_rate": 0.000283803107513746, "loss": 3.062473773956299, "step": 6545, "token_acc": 0.2924926140919662 }, { "epoch": 3.8372911169744945, "grad_norm": 0.2201425032186063, "learning_rate": 0.00028379653573096583, "loss": 3.0476179122924805, "step": 6546, "token_acc": 0.2943775035096563 }, { "epoch": 3.837877455291703, "grad_norm": 0.22879317999410065, "learning_rate": 0.0002837899626913475, "loss": 3.075305700302124, "step": 6547, "token_acc": 0.2887838385496555 }, { "epoch": 3.8384637936089123, "grad_norm": 0.231829566067993, "learning_rate": 0.00028378338839495266, "loss": 3.0726513862609863, "step": 6548, "token_acc": 0.29075507375585485 }, { "epoch": 3.8390501319261214, "grad_norm": 0.22922536546469777, "learning_rate": 0.0002837768128418432, "loss": 3.06248140335083, "step": 6549, "token_acc": 0.2908382215747166 }, { "epoch": 3.8396364702433305, "grad_norm": 0.2170888109301465, "learning_rate": 0.0002837702360320809, "loss": 3.0245556831359863, "step": 6550, "token_acc": 0.2957644140193173 }, { "epoch": 3.840222808560539, "grad_norm": 0.22510698391222092, "learning_rate": 0.0002837636579657274, "loss": 3.0730843544006348, "step": 6551, "token_acc": 0.29074954959625676 }, { "epoch": 3.8408091468777483, "grad_norm": 0.22832968319802974, "learning_rate": 0.00028375707864284466, "loss": 3.028977870941162, "step": 6552, "token_acc": 0.29615125021582506 }, { "epoch": 3.8413954851949574, "grad_norm": 0.2419614836297206, "learning_rate": 0.0002837504980634944, "loss": 3.0658440589904785, "step": 6553, "token_acc": 0.2919831799545376 }, { "epoch": 3.8419818235121665, "grad_norm": 0.2395143803848042, "learning_rate": 0.0002837439162277384, "loss": 3.073482036590576, "step": 6554, "token_acc": 0.29014902332714504 }, { "epoch": 3.8425681618293757, "grad_norm": 0.23555388501704738, "learning_rate": 0.00028373733313563854, "loss": 3.0484771728515625, "step": 6555, "token_acc": 0.2924699282736587 }, { "epoch": 3.8431545001465848, "grad_norm": 0.2602837275289091, "learning_rate": 0.00028373074878725664, "loss": 3.0312623977661133, "step": 6556, "token_acc": 0.29444929329772684 }, { "epoch": 3.843740838463794, "grad_norm": 0.24920855251526336, "learning_rate": 0.00028372416318265463, "loss": 3.004746437072754, "step": 6557, "token_acc": 0.29906336324808125 }, { "epoch": 3.8443271767810026, "grad_norm": 0.23820684883811902, "learning_rate": 0.00028371757632189424, "loss": 3.012692451477051, "step": 6558, "token_acc": 0.29896210691245206 }, { "epoch": 3.8449135150982117, "grad_norm": 0.23118051539468903, "learning_rate": 0.00028371098820503745, "loss": 3.0799238681793213, "step": 6559, "token_acc": 0.2893817772523972 }, { "epoch": 3.8454998534154208, "grad_norm": 0.24441139752035976, "learning_rate": 0.00028370439883214604, "loss": 3.059861660003662, "step": 6560, "token_acc": 0.29166084761239724 }, { "epoch": 3.84608619173263, "grad_norm": 0.26408986275883317, "learning_rate": 0.000283697808203282, "loss": 3.035032272338867, "step": 6561, "token_acc": 0.2957660644205461 }, { "epoch": 3.8466725300498386, "grad_norm": 0.23471125890296882, "learning_rate": 0.0002836912163185072, "loss": 3.061880111694336, "step": 6562, "token_acc": 0.290609470350439 }, { "epoch": 3.8472588683670477, "grad_norm": 0.23295793855579897, "learning_rate": 0.0002836846231778836, "loss": 3.0336246490478516, "step": 6563, "token_acc": 0.2975588977190205 }, { "epoch": 3.847845206684257, "grad_norm": 0.2270672510369036, "learning_rate": 0.0002836780287814731, "loss": 3.0157861709594727, "step": 6564, "token_acc": 0.29745962539484805 }, { "epoch": 3.848431545001466, "grad_norm": 0.23653561620008845, "learning_rate": 0.00028367143312933767, "loss": 3.043527841567993, "step": 6565, "token_acc": 0.2921853866582421 }, { "epoch": 3.849017883318675, "grad_norm": 0.2334127533213531, "learning_rate": 0.0002836648362215392, "loss": 3.072495937347412, "step": 6566, "token_acc": 0.28940172809799847 }, { "epoch": 3.849604221635884, "grad_norm": 0.2653310870410907, "learning_rate": 0.00028365823805813977, "loss": 3.0122547149658203, "step": 6567, "token_acc": 0.29722806625179116 }, { "epoch": 3.850190559953093, "grad_norm": 0.2196161804131879, "learning_rate": 0.00028365163863920125, "loss": 3.036485195159912, "step": 6568, "token_acc": 0.2938674009332802 }, { "epoch": 3.850776898270302, "grad_norm": 0.23293781740038608, "learning_rate": 0.00028364503796478573, "loss": 3.096240520477295, "step": 6569, "token_acc": 0.28593394751021806 }, { "epoch": 3.851363236587511, "grad_norm": 0.23721017324432295, "learning_rate": 0.0002836384360349551, "loss": 3.0066869258880615, "step": 6570, "token_acc": 0.2988688805491606 }, { "epoch": 3.85194957490472, "grad_norm": 0.24398114440684018, "learning_rate": 0.0002836318328497715, "loss": 3.0344302654266357, "step": 6571, "token_acc": 0.29558047526904657 }, { "epoch": 3.852535913221929, "grad_norm": 0.2785461609069765, "learning_rate": 0.00028362522840929687, "loss": 3.075298309326172, "step": 6572, "token_acc": 0.28743494105953893 }, { "epoch": 3.853122251539138, "grad_norm": 0.2441423321421813, "learning_rate": 0.00028361862271359333, "loss": 3.0563488006591797, "step": 6573, "token_acc": 0.2941047164313393 }, { "epoch": 3.853708589856347, "grad_norm": 0.24198112370701821, "learning_rate": 0.00028361201576272287, "loss": 3.0212860107421875, "step": 6574, "token_acc": 0.2979178230149563 }, { "epoch": 3.854294928173556, "grad_norm": 0.23408258799803067, "learning_rate": 0.00028360540755674755, "loss": 3.0577335357666016, "step": 6575, "token_acc": 0.2915138781611074 }, { "epoch": 3.8548812664907652, "grad_norm": 0.22085812956420903, "learning_rate": 0.00028359879809572946, "loss": 3.0607337951660156, "step": 6576, "token_acc": 0.291428508344626 }, { "epoch": 3.8554676048079743, "grad_norm": 0.23997824332040857, "learning_rate": 0.00028359218737973065, "loss": 3.0403435230255127, "step": 6577, "token_acc": 0.29587652118639723 }, { "epoch": 3.8560539431251835, "grad_norm": 0.22920300110721506, "learning_rate": 0.0002835855754088133, "loss": 3.029397487640381, "step": 6578, "token_acc": 0.29548002366642795 }, { "epoch": 3.856640281442392, "grad_norm": 0.24161968705865386, "learning_rate": 0.0002835789621830395, "loss": 3.0522141456604004, "step": 6579, "token_acc": 0.2932667122271358 }, { "epoch": 3.8572266197596012, "grad_norm": 0.2422886062200418, "learning_rate": 0.00028357234770247125, "loss": 3.025510787963867, "step": 6580, "token_acc": 0.2970521246488453 }, { "epoch": 3.8578129580768104, "grad_norm": 0.24668972253277688, "learning_rate": 0.0002835657319671709, "loss": 3.0520505905151367, "step": 6581, "token_acc": 0.29247619304540184 }, { "epoch": 3.8583992963940195, "grad_norm": 0.2281592570633168, "learning_rate": 0.00028355911497720046, "loss": 3.050171375274658, "step": 6582, "token_acc": 0.2925867722307111 }, { "epoch": 3.858985634711228, "grad_norm": 0.22129408477938184, "learning_rate": 0.00028355249673262206, "loss": 3.0273194313049316, "step": 6583, "token_acc": 0.2969939588425961 }, { "epoch": 3.8595719730284372, "grad_norm": 0.2563267316279477, "learning_rate": 0.00028354587723349793, "loss": 3.033951759338379, "step": 6584, "token_acc": 0.2962750219466993 }, { "epoch": 3.8601583113456464, "grad_norm": 0.25106010474037704, "learning_rate": 0.0002835392564798903, "loss": 3.064647912979126, "step": 6585, "token_acc": 0.29215564173216774 }, { "epoch": 3.8607446496628555, "grad_norm": 0.2642445226595879, "learning_rate": 0.0002835326344718612, "loss": 3.0457444190979004, "step": 6586, "token_acc": 0.2945502785609329 }, { "epoch": 3.8613309879800646, "grad_norm": 0.24833822176356746, "learning_rate": 0.000283526011209473, "loss": 3.0403056144714355, "step": 6587, "token_acc": 0.29512088813681 }, { "epoch": 3.8619173262972737, "grad_norm": 0.2442761448133746, "learning_rate": 0.00028351938669278785, "loss": 3.0538597106933594, "step": 6588, "token_acc": 0.2918890366295696 }, { "epoch": 3.862503664614483, "grad_norm": 0.25445983569170216, "learning_rate": 0.000283512760921868, "loss": 3.079535961151123, "step": 6589, "token_acc": 0.28856359728064673 }, { "epoch": 3.8630900029316915, "grad_norm": 0.26314473891964785, "learning_rate": 0.00028350613389677566, "loss": 3.050187587738037, "step": 6590, "token_acc": 0.2924566627070445 }, { "epoch": 3.8636763412489006, "grad_norm": 0.23840006166732203, "learning_rate": 0.0002834995056175731, "loss": 3.0651443004608154, "step": 6591, "token_acc": 0.2905859585840102 }, { "epoch": 3.8642626795661097, "grad_norm": 0.2666155385353076, "learning_rate": 0.0002834928760843225, "loss": 3.0429234504699707, "step": 6592, "token_acc": 0.2937864640973021 }, { "epoch": 3.864849017883319, "grad_norm": 0.27011910155036123, "learning_rate": 0.0002834862452970863, "loss": 3.0211944580078125, "step": 6593, "token_acc": 0.2956624166650979 }, { "epoch": 3.8654353562005275, "grad_norm": 0.24370965920831367, "learning_rate": 0.0002834796132559266, "loss": 3.0175890922546387, "step": 6594, "token_acc": 0.2987232791005468 }, { "epoch": 3.8660216945177366, "grad_norm": 0.25155780301573755, "learning_rate": 0.0002834729799609059, "loss": 3.0199384689331055, "step": 6595, "token_acc": 0.2954673419770993 }, { "epoch": 3.8666080328349457, "grad_norm": 0.23705558787808953, "learning_rate": 0.0002834663454120864, "loss": 3.058058977127075, "step": 6596, "token_acc": 0.29252557939335483 }, { "epoch": 3.867194371152155, "grad_norm": 0.24452065383412255, "learning_rate": 0.0002834597096095304, "loss": 3.0496368408203125, "step": 6597, "token_acc": 0.2914907599695066 }, { "epoch": 3.867780709469364, "grad_norm": 0.22064986875141399, "learning_rate": 0.0002834530725533003, "loss": 3.0335209369659424, "step": 6598, "token_acc": 0.2944046194149399 }, { "epoch": 3.868367047786573, "grad_norm": 0.23612306403429068, "learning_rate": 0.0002834464342434584, "loss": 2.99648380279541, "step": 6599, "token_acc": 0.3014428541055853 }, { "epoch": 3.868953386103782, "grad_norm": 0.2227464578256123, "learning_rate": 0.00028343979468006705, "loss": 3.0807061195373535, "step": 6600, "token_acc": 0.2886596029922338 }, { "epoch": 3.869539724420991, "grad_norm": 0.24062171324977696, "learning_rate": 0.00028343315386318866, "loss": 3.0487899780273438, "step": 6601, "token_acc": 0.2934997277764137 }, { "epoch": 3.8701260627382, "grad_norm": 0.22833682173483896, "learning_rate": 0.00028342651179288556, "loss": 3.0597872734069824, "step": 6602, "token_acc": 0.29292725686202226 }, { "epoch": 3.870712401055409, "grad_norm": 0.2460784792910518, "learning_rate": 0.00028341986846922024, "loss": 3.0221238136291504, "step": 6603, "token_acc": 0.2973992997224102 }, { "epoch": 3.871298739372618, "grad_norm": 0.2373824010213088, "learning_rate": 0.00028341322389225504, "loss": 3.001662015914917, "step": 6604, "token_acc": 0.2989401155995495 }, { "epoch": 3.871885077689827, "grad_norm": 0.2533704241208796, "learning_rate": 0.0002834065780620523, "loss": 3.0400359630584717, "step": 6605, "token_acc": 0.2952310975823099 }, { "epoch": 3.872471416007036, "grad_norm": 0.26152496031236205, "learning_rate": 0.00028339993097867456, "loss": 3.020984172821045, "step": 6606, "token_acc": 0.295472418829345 }, { "epoch": 3.873057754324245, "grad_norm": 0.2310290416501084, "learning_rate": 0.0002833932826421843, "loss": 3.012481212615967, "step": 6607, "token_acc": 0.29777241009125066 }, { "epoch": 3.873644092641454, "grad_norm": 0.24399349264956485, "learning_rate": 0.00028338663305264383, "loss": 3.0890214443206787, "step": 6608, "token_acc": 0.2859329061439879 }, { "epoch": 3.8742304309586633, "grad_norm": 0.24575260109750743, "learning_rate": 0.00028337998221011565, "loss": 3.0164601802825928, "step": 6609, "token_acc": 0.2988069530838732 }, { "epoch": 3.8748167692758724, "grad_norm": 0.2620952843350049, "learning_rate": 0.00028337333011466234, "loss": 3.037508964538574, "step": 6610, "token_acc": 0.29496751647508385 }, { "epoch": 3.875403107593081, "grad_norm": 0.25403054902000055, "learning_rate": 0.00028336667676634626, "loss": 3.0987722873687744, "step": 6611, "token_acc": 0.2855646543515521 }, { "epoch": 3.87598944591029, "grad_norm": 0.27660082799746244, "learning_rate": 0.00028336002216523, "loss": 3.0839157104492188, "step": 6612, "token_acc": 0.28824707707796016 }, { "epoch": 3.8765757842274993, "grad_norm": 0.2746152488750893, "learning_rate": 0.00028335336631137606, "loss": 3.0903825759887695, "step": 6613, "token_acc": 0.28755831284200783 }, { "epoch": 3.8771621225447084, "grad_norm": 0.26731093173228676, "learning_rate": 0.0002833467092048469, "loss": 3.054202079772949, "step": 6614, "token_acc": 0.29244808526804106 }, { "epoch": 3.8777484608619175, "grad_norm": 0.2356788986306146, "learning_rate": 0.00028334005084570507, "loss": 3.08854341506958, "step": 6615, "token_acc": 0.2858565916979574 }, { "epoch": 3.878334799179126, "grad_norm": 0.24847653646846418, "learning_rate": 0.0002833333912340132, "loss": 3.055333137512207, "step": 6616, "token_acc": 0.29032573331790185 }, { "epoch": 3.8789211374963353, "grad_norm": 0.2680996499132124, "learning_rate": 0.00028332673036983376, "loss": 3.033504009246826, "step": 6617, "token_acc": 0.2951832670794993 }, { "epoch": 3.8795074758135444, "grad_norm": 0.2373471970828621, "learning_rate": 0.00028332006825322934, "loss": 3.044600009918213, "step": 6618, "token_acc": 0.2925640502194549 }, { "epoch": 3.8800938141307535, "grad_norm": 0.2451909437184239, "learning_rate": 0.00028331340488426255, "loss": 3.0642850399017334, "step": 6619, "token_acc": 0.29007979263284456 }, { "epoch": 3.8806801524479626, "grad_norm": 0.2322820559028209, "learning_rate": 0.00028330674026299596, "loss": 3.0462963581085205, "step": 6620, "token_acc": 0.2942489288740281 }, { "epoch": 3.8812664907651717, "grad_norm": 0.2318134110493184, "learning_rate": 0.0002833000743894922, "loss": 3.0335474014282227, "step": 6621, "token_acc": 0.294932730367727 }, { "epoch": 3.8818528290823804, "grad_norm": 0.22933370111301155, "learning_rate": 0.0002832934072638138, "loss": 3.0827155113220215, "step": 6622, "token_acc": 0.2874256210055725 }, { "epoch": 3.8824391673995895, "grad_norm": 0.2459140686649064, "learning_rate": 0.0002832867388860235, "loss": 3.0600996017456055, "step": 6623, "token_acc": 0.29206578757082946 }, { "epoch": 3.8830255057167986, "grad_norm": 0.2552504096456068, "learning_rate": 0.00028328006925618386, "loss": 3.0486087799072266, "step": 6624, "token_acc": 0.29198817223596235 }, { "epoch": 3.8836118440340077, "grad_norm": 0.24279725935769475, "learning_rate": 0.0002832733983743576, "loss": 3.0308966636657715, "step": 6625, "token_acc": 0.2939305303498829 }, { "epoch": 3.8841981823512164, "grad_norm": 0.2330557417018749, "learning_rate": 0.0002832667262406074, "loss": 3.0677649974823, "step": 6626, "token_acc": 0.28943839104932473 }, { "epoch": 3.8847845206684255, "grad_norm": 0.2321188613082975, "learning_rate": 0.0002832600528549958, "loss": 3.0766537189483643, "step": 6627, "token_acc": 0.2888601946751986 }, { "epoch": 3.8853708589856346, "grad_norm": 0.24103151703933487, "learning_rate": 0.0002832533782175856, "loss": 3.073457717895508, "step": 6628, "token_acc": 0.2888510706724198 }, { "epoch": 3.8859571973028437, "grad_norm": 0.23142845261762923, "learning_rate": 0.00028324670232843946, "loss": 3.0297064781188965, "step": 6629, "token_acc": 0.2975073144078358 }, { "epoch": 3.886543535620053, "grad_norm": 0.2348195312800062, "learning_rate": 0.0002832400251876201, "loss": 3.0747756958007812, "step": 6630, "token_acc": 0.28996943944404907 }, { "epoch": 3.887129873937262, "grad_norm": 0.23133719353892598, "learning_rate": 0.00028323334679519025, "loss": 3.0564613342285156, "step": 6631, "token_acc": 0.2911819747783 }, { "epoch": 3.887716212254471, "grad_norm": 0.21218171517766488, "learning_rate": 0.00028322666715121267, "loss": 3.043491840362549, "step": 6632, "token_acc": 0.2940700398732921 }, { "epoch": 3.8883025505716797, "grad_norm": 0.2190640585921852, "learning_rate": 0.0002832199862557501, "loss": 3.0259041786193848, "step": 6633, "token_acc": 0.2967145502138798 }, { "epoch": 3.888888888888889, "grad_norm": 0.22122582908901495, "learning_rate": 0.0002832133041088652, "loss": 3.01729416847229, "step": 6634, "token_acc": 0.29782185836735564 }, { "epoch": 3.889475227206098, "grad_norm": 0.23598118996322903, "learning_rate": 0.00028320662071062085, "loss": 3.1118011474609375, "step": 6635, "token_acc": 0.28577138709779215 }, { "epoch": 3.890061565523307, "grad_norm": 0.21292860098561023, "learning_rate": 0.0002831999360610798, "loss": 3.0199241638183594, "step": 6636, "token_acc": 0.295727015569823 }, { "epoch": 3.8906479038405157, "grad_norm": 0.2377487729979134, "learning_rate": 0.00028319325016030485, "loss": 3.0562400817871094, "step": 6637, "token_acc": 0.2933976184043725 }, { "epoch": 3.891234242157725, "grad_norm": 0.23839287887917152, "learning_rate": 0.0002831865630083588, "loss": 3.0656983852386475, "step": 6638, "token_acc": 0.29149322090124025 }, { "epoch": 3.891820580474934, "grad_norm": 0.2344282854684225, "learning_rate": 0.00028317987460530446, "loss": 3.053492546081543, "step": 6639, "token_acc": 0.2933444956048556 }, { "epoch": 3.892406918792143, "grad_norm": 0.240011301026954, "learning_rate": 0.00028317318495120464, "loss": 3.0419912338256836, "step": 6640, "token_acc": 0.29301302656064965 }, { "epoch": 3.892993257109352, "grad_norm": 0.2574692756331428, "learning_rate": 0.0002831664940461222, "loss": 3.056633949279785, "step": 6641, "token_acc": 0.2919304437720757 }, { "epoch": 3.8935795954265613, "grad_norm": 0.23571932464319284, "learning_rate": 0.00028315980189012, "loss": 3.031526565551758, "step": 6642, "token_acc": 0.29528465157070166 }, { "epoch": 3.8941659337437704, "grad_norm": 0.25688858352677796, "learning_rate": 0.0002831531084832609, "loss": 3.0825862884521484, "step": 6643, "token_acc": 0.28788494565858647 }, { "epoch": 3.894752272060979, "grad_norm": 0.2430288280200242, "learning_rate": 0.0002831464138256078, "loss": 3.008242607116699, "step": 6644, "token_acc": 0.2990035356753757 }, { "epoch": 3.895338610378188, "grad_norm": 0.21856867612893754, "learning_rate": 0.0002831397179172235, "loss": 3.0225906372070312, "step": 6645, "token_acc": 0.2971281419405728 }, { "epoch": 3.8959249486953973, "grad_norm": 0.23243212292273516, "learning_rate": 0.000283133020758171, "loss": 3.030179977416992, "step": 6646, "token_acc": 0.2954980339382215 }, { "epoch": 3.8965112870126064, "grad_norm": 0.20196987227985078, "learning_rate": 0.00028312632234851314, "loss": 3.0433619022369385, "step": 6647, "token_acc": 0.29319429975827577 }, { "epoch": 3.897097625329815, "grad_norm": 0.21897602745318612, "learning_rate": 0.0002831196226883129, "loss": 3.0525031089782715, "step": 6648, "token_acc": 0.294011519871521 }, { "epoch": 3.897683963647024, "grad_norm": 0.2200529194092275, "learning_rate": 0.00028311292177763314, "loss": 3.0778188705444336, "step": 6649, "token_acc": 0.2871573528333422 }, { "epoch": 3.8982703019642333, "grad_norm": 0.2267538829482773, "learning_rate": 0.0002831062196165369, "loss": 3.0224833488464355, "step": 6650, "token_acc": 0.2988349577753551 }, { "epoch": 3.8988566402814424, "grad_norm": 0.24861182609681137, "learning_rate": 0.00028309951620508707, "loss": 3.1081113815307617, "step": 6651, "token_acc": 0.28512684490152074 }, { "epoch": 3.8994429785986515, "grad_norm": 0.2263817309059537, "learning_rate": 0.00028309281154334666, "loss": 3.0390381813049316, "step": 6652, "token_acc": 0.2940059439178993 }, { "epoch": 3.9000293169158606, "grad_norm": 0.24415035959988496, "learning_rate": 0.0002830861056313786, "loss": 3.051593780517578, "step": 6653, "token_acc": 0.2917268461170762 }, { "epoch": 3.9006156552330697, "grad_norm": 0.24121070679955597, "learning_rate": 0.0002830793984692459, "loss": 3.0274524688720703, "step": 6654, "token_acc": 0.2951560144786358 }, { "epoch": 3.9012019935502784, "grad_norm": 0.21522579831353206, "learning_rate": 0.00028307269005701163, "loss": 3.0161027908325195, "step": 6655, "token_acc": 0.298262869711356 }, { "epoch": 3.9017883318674875, "grad_norm": 0.25189247809608206, "learning_rate": 0.00028306598039473874, "loss": 3.039217710494995, "step": 6656, "token_acc": 0.2941456313203525 }, { "epoch": 3.9023746701846966, "grad_norm": 0.23040877993184575, "learning_rate": 0.00028305926948249025, "loss": 3.077394962310791, "step": 6657, "token_acc": 0.2898218357629248 }, { "epoch": 3.9029610085019057, "grad_norm": 0.24157620149121253, "learning_rate": 0.0002830525573203292, "loss": 3.0572948455810547, "step": 6658, "token_acc": 0.2920584980869472 }, { "epoch": 3.9035473468191144, "grad_norm": 0.2559401477063671, "learning_rate": 0.0002830458439083187, "loss": 2.98641300201416, "step": 6659, "token_acc": 0.30239962641817814 }, { "epoch": 3.9041336851363235, "grad_norm": 0.2402687713045518, "learning_rate": 0.0002830391292465218, "loss": 3.0067455768585205, "step": 6660, "token_acc": 0.2990709078355795 }, { "epoch": 3.9047200234535326, "grad_norm": 0.24293552929677187, "learning_rate": 0.00028303241333500154, "loss": 3.026005744934082, "step": 6661, "token_acc": 0.29507943236318945 }, { "epoch": 3.9053063617707418, "grad_norm": 0.25220635120742474, "learning_rate": 0.000283025696173821, "loss": 3.0515077114105225, "step": 6662, "token_acc": 0.29288520413624786 }, { "epoch": 3.905892700087951, "grad_norm": 0.2422910642741095, "learning_rate": 0.0002830189777630433, "loss": 3.047861337661743, "step": 6663, "token_acc": 0.29250611053426595 }, { "epoch": 3.90647903840516, "grad_norm": 0.2658601922689883, "learning_rate": 0.0002830122581027316, "loss": 3.049682140350342, "step": 6664, "token_acc": 0.29529421425496494 }, { "epoch": 3.9070653767223686, "grad_norm": 0.2533509313923523, "learning_rate": 0.0002830055371929489, "loss": 3.0606789588928223, "step": 6665, "token_acc": 0.29164674361638754 }, { "epoch": 3.9076517150395778, "grad_norm": 0.23658192961767557, "learning_rate": 0.00028299881503375844, "loss": 3.066725492477417, "step": 6666, "token_acc": 0.2918875125931499 }, { "epoch": 3.908238053356787, "grad_norm": 0.23952168878554228, "learning_rate": 0.00028299209162522334, "loss": 3.0528571605682373, "step": 6667, "token_acc": 0.2923870185016682 }, { "epoch": 3.908824391673996, "grad_norm": 0.2514848020353997, "learning_rate": 0.0002829853669674067, "loss": 3.046949863433838, "step": 6668, "token_acc": 0.29156380198224774 }, { "epoch": 3.909410729991205, "grad_norm": 0.23267417649085526, "learning_rate": 0.0002829786410603718, "loss": 3.048292398452759, "step": 6669, "token_acc": 0.2930078427626901 }, { "epoch": 3.9099970683084138, "grad_norm": 0.23497326802674273, "learning_rate": 0.00028297191390418174, "loss": 3.0763468742370605, "step": 6670, "token_acc": 0.28934701925307355 }, { "epoch": 3.910583406625623, "grad_norm": 0.220059342250436, "learning_rate": 0.0002829651854988997, "loss": 3.0775368213653564, "step": 6671, "token_acc": 0.2896742716706242 }, { "epoch": 3.911169744942832, "grad_norm": 0.23823364822274698, "learning_rate": 0.0002829584558445889, "loss": 3.0467519760131836, "step": 6672, "token_acc": 0.2921551505882474 }, { "epoch": 3.911756083260041, "grad_norm": 0.21311213306909843, "learning_rate": 0.00028295172494131264, "loss": 3.062983989715576, "step": 6673, "token_acc": 0.2925994728868556 }, { "epoch": 3.91234242157725, "grad_norm": 0.21365425497274132, "learning_rate": 0.00028294499278913403, "loss": 3.0269436836242676, "step": 6674, "token_acc": 0.29536568084603876 }, { "epoch": 3.9129287598944593, "grad_norm": 0.2424438649726307, "learning_rate": 0.0002829382593881164, "loss": 3.036242961883545, "step": 6675, "token_acc": 0.29537313243377045 }, { "epoch": 3.913515098211668, "grad_norm": 0.229441906280608, "learning_rate": 0.00028293152473832294, "loss": 3.0574142932891846, "step": 6676, "token_acc": 0.2928149593744818 }, { "epoch": 3.914101436528877, "grad_norm": 0.2151297514005966, "learning_rate": 0.00028292478883981686, "loss": 3.011045455932617, "step": 6677, "token_acc": 0.29947034535286926 }, { "epoch": 3.914687774846086, "grad_norm": 0.22124619801365442, "learning_rate": 0.0002829180516926616, "loss": 2.9960131645202637, "step": 6678, "token_acc": 0.30058884507283085 }, { "epoch": 3.9152741131632953, "grad_norm": 0.21602277680782794, "learning_rate": 0.00028291131329692036, "loss": 3.0507607460021973, "step": 6679, "token_acc": 0.2923447635714685 }, { "epoch": 3.915860451480504, "grad_norm": 0.2301622597546687, "learning_rate": 0.0002829045736526564, "loss": 3.059081554412842, "step": 6680, "token_acc": 0.2915253636759361 }, { "epoch": 3.916446789797713, "grad_norm": 0.21828523206645992, "learning_rate": 0.00028289783275993306, "loss": 3.0572023391723633, "step": 6681, "token_acc": 0.29058084607210866 }, { "epoch": 3.917033128114922, "grad_norm": 0.22377395322168317, "learning_rate": 0.0002828910906188137, "loss": 3.071209669113159, "step": 6682, "token_acc": 0.2894880138815431 }, { "epoch": 3.9176194664321313, "grad_norm": 0.2436337013254541, "learning_rate": 0.00028288434722936154, "loss": 3.084959030151367, "step": 6683, "token_acc": 0.28897769674355495 }, { "epoch": 3.9182058047493404, "grad_norm": 0.2153056830038619, "learning_rate": 0.00028287760259164, "loss": 3.0685982704162598, "step": 6684, "token_acc": 0.2904121484077443 }, { "epoch": 3.9187921430665495, "grad_norm": 0.247665457436989, "learning_rate": 0.0002828708567057125, "loss": 3.0498600006103516, "step": 6685, "token_acc": 0.29285000936271144 }, { "epoch": 3.9193784813837587, "grad_norm": 0.21787675831426945, "learning_rate": 0.00028286410957164236, "loss": 3.0233664512634277, "step": 6686, "token_acc": 0.2992248062015504 }, { "epoch": 3.9199648197009673, "grad_norm": 0.23429970808200723, "learning_rate": 0.0002828573611894929, "loss": 3.039573907852173, "step": 6687, "token_acc": 0.2936452677249227 }, { "epoch": 3.9205511580181764, "grad_norm": 0.24268162443909458, "learning_rate": 0.00028285061155932756, "loss": 3.0115113258361816, "step": 6688, "token_acc": 0.2984331315550459 }, { "epoch": 3.9211374963353856, "grad_norm": 0.2377186158223472, "learning_rate": 0.0002828438606812098, "loss": 3.03489089012146, "step": 6689, "token_acc": 0.2942436405227374 }, { "epoch": 3.9217238346525947, "grad_norm": 0.22840378749967277, "learning_rate": 0.0002828371085552029, "loss": 3.066713809967041, "step": 6690, "token_acc": 0.28850421889630984 }, { "epoch": 3.9223101729698033, "grad_norm": 0.22149131932269528, "learning_rate": 0.0002828303551813704, "loss": 3.077237844467163, "step": 6691, "token_acc": 0.2905670777923241 }, { "epoch": 3.9228965112870124, "grad_norm": 0.2619601614382145, "learning_rate": 0.0002828236005597757, "loss": 3.077540159225464, "step": 6692, "token_acc": 0.2898365925257431 }, { "epoch": 3.9234828496042216, "grad_norm": 0.24872294504738326, "learning_rate": 0.00028281684469048224, "loss": 3.041816234588623, "step": 6693, "token_acc": 0.29410757965523326 }, { "epoch": 3.9240691879214307, "grad_norm": 0.22309713252723834, "learning_rate": 0.00028281008757355354, "loss": 3.0555336475372314, "step": 6694, "token_acc": 0.29141607435757244 }, { "epoch": 3.92465552623864, "grad_norm": 0.23591044867040722, "learning_rate": 0.000282803329209053, "loss": 3.0738399028778076, "step": 6695, "token_acc": 0.2887602361454961 }, { "epoch": 3.925241864555849, "grad_norm": 0.24836022703718452, "learning_rate": 0.00028279656959704415, "loss": 3.0278615951538086, "step": 6696, "token_acc": 0.2959969600482199 }, { "epoch": 3.925828202873058, "grad_norm": 0.23225732879979902, "learning_rate": 0.0002827898087375905, "loss": 3.044938087463379, "step": 6697, "token_acc": 0.29305999351826806 }, { "epoch": 3.9264145411902667, "grad_norm": 0.23969872674106554, "learning_rate": 0.00028278304663075546, "loss": 3.036458730697632, "step": 6698, "token_acc": 0.2935539590219149 }, { "epoch": 3.927000879507476, "grad_norm": 0.22358460034022487, "learning_rate": 0.0002827762832766027, "loss": 3.0479438304901123, "step": 6699, "token_acc": 0.29341431304383603 }, { "epoch": 3.927587217824685, "grad_norm": 0.22974965522157517, "learning_rate": 0.0002827695186751956, "loss": 3.0245275497436523, "step": 6700, "token_acc": 0.29747486804063555 }, { "epoch": 3.928173556141894, "grad_norm": 0.23460071253228973, "learning_rate": 0.00028276275282659787, "loss": 3.077690839767456, "step": 6701, "token_acc": 0.28635902958130105 }, { "epoch": 3.9287598944591027, "grad_norm": 0.24000515527506108, "learning_rate": 0.0002827559857308729, "loss": 3.107961654663086, "step": 6702, "token_acc": 0.2855098693362941 }, { "epoch": 3.929346232776312, "grad_norm": 0.2673207884776005, "learning_rate": 0.00028274921738808437, "loss": 3.0990211963653564, "step": 6703, "token_acc": 0.2870714370080854 }, { "epoch": 3.929932571093521, "grad_norm": 0.2208192082560746, "learning_rate": 0.00028274244779829584, "loss": 3.0569448471069336, "step": 6704, "token_acc": 0.29163594127736586 }, { "epoch": 3.93051890941073, "grad_norm": 0.25270103199251015, "learning_rate": 0.00028273567696157083, "loss": 3.0339744091033936, "step": 6705, "token_acc": 0.2953227827250867 }, { "epoch": 3.931105247727939, "grad_norm": 0.21903473677944438, "learning_rate": 0.0002827289048779731, "loss": 3.049659252166748, "step": 6706, "token_acc": 0.29200966655526134 }, { "epoch": 3.9316915860451482, "grad_norm": 0.2224460949350582, "learning_rate": 0.0002827221315475661, "loss": 3.0640735626220703, "step": 6707, "token_acc": 0.2895073136970784 }, { "epoch": 3.9322779243623573, "grad_norm": 0.2659978445207278, "learning_rate": 0.00028271535697041347, "loss": 3.0741114616394043, "step": 6708, "token_acc": 0.28919554480808246 }, { "epoch": 3.932864262679566, "grad_norm": 0.21538648632539104, "learning_rate": 0.00028270858114657894, "loss": 3.0534486770629883, "step": 6709, "token_acc": 0.29199621766557426 }, { "epoch": 3.933450600996775, "grad_norm": 0.2643604392624956, "learning_rate": 0.00028270180407612616, "loss": 3.0857062339782715, "step": 6710, "token_acc": 0.2891350005526985 }, { "epoch": 3.9340369393139842, "grad_norm": 0.23725361249887983, "learning_rate": 0.0002826950257591187, "loss": 3.066263198852539, "step": 6711, "token_acc": 0.29015242773632033 }, { "epoch": 3.9346232776311933, "grad_norm": 0.24037034977374608, "learning_rate": 0.0002826882461956203, "loss": 3.0116324424743652, "step": 6712, "token_acc": 0.2982022425949882 }, { "epoch": 3.935209615948402, "grad_norm": 0.24112948827113753, "learning_rate": 0.0002826814653856946, "loss": 3.0219054222106934, "step": 6713, "token_acc": 0.2963651780522232 }, { "epoch": 3.935795954265611, "grad_norm": 0.24683971154497653, "learning_rate": 0.00028267468332940533, "loss": 3.0734739303588867, "step": 6714, "token_acc": 0.29043177413412924 }, { "epoch": 3.9363822925828202, "grad_norm": 0.2548656739647278, "learning_rate": 0.0002826679000268162, "loss": 3.035954236984253, "step": 6715, "token_acc": 0.29439086460745334 }, { "epoch": 3.9369686309000294, "grad_norm": 0.2530366248111408, "learning_rate": 0.0002826611154779909, "loss": 3.0658559799194336, "step": 6716, "token_acc": 0.28949949910880396 }, { "epoch": 3.9375549692172385, "grad_norm": 0.24643513463827965, "learning_rate": 0.0002826543296829933, "loss": 3.034306287765503, "step": 6717, "token_acc": 0.29474731105576796 }, { "epoch": 3.9381413075344476, "grad_norm": 0.24602544672348126, "learning_rate": 0.0002826475426418869, "loss": 3.092775821685791, "step": 6718, "token_acc": 0.2878916682142885 }, { "epoch": 3.9387276458516562, "grad_norm": 0.2636126528223883, "learning_rate": 0.0002826407543547356, "loss": 2.9968972206115723, "step": 6719, "token_acc": 0.2993412675283486 }, { "epoch": 3.9393139841688654, "grad_norm": 0.2540048650595831, "learning_rate": 0.00028263396482160316, "loss": 3.062431812286377, "step": 6720, "token_acc": 0.28976514474859455 }, { "epoch": 3.9399003224860745, "grad_norm": 0.2492990567503665, "learning_rate": 0.00028262717404255335, "loss": 3.090872049331665, "step": 6721, "token_acc": 0.2874532609885051 }, { "epoch": 3.9404866608032836, "grad_norm": 0.2571005349092623, "learning_rate": 0.00028262038201764996, "loss": 3.0898656845092773, "step": 6722, "token_acc": 0.28764725523545187 }, { "epoch": 3.9410729991204922, "grad_norm": 0.2683856831346269, "learning_rate": 0.0002826135887469568, "loss": 3.0476696491241455, "step": 6723, "token_acc": 0.2931884832415876 }, { "epoch": 3.9416593374377014, "grad_norm": 0.22763003596053677, "learning_rate": 0.00028260679423053764, "loss": 3.079838275909424, "step": 6724, "token_acc": 0.2879815719414467 }, { "epoch": 3.9422456757549105, "grad_norm": 0.24956691581811927, "learning_rate": 0.0002825999984684564, "loss": 3.0442395210266113, "step": 6725, "token_acc": 0.29456341436703165 }, { "epoch": 3.9428320140721196, "grad_norm": 0.2338324957094939, "learning_rate": 0.00028259320146077675, "loss": 3.0346221923828125, "step": 6726, "token_acc": 0.2945282817140875 }, { "epoch": 3.9434183523893287, "grad_norm": 0.2003148380291176, "learning_rate": 0.00028258640320756275, "loss": 3.077000617980957, "step": 6727, "token_acc": 0.28916574925444105 }, { "epoch": 3.944004690706538, "grad_norm": 0.22970754977012314, "learning_rate": 0.0002825796037088781, "loss": 3.0199272632598877, "step": 6728, "token_acc": 0.296481563021596 }, { "epoch": 3.944591029023747, "grad_norm": 0.20966191895594977, "learning_rate": 0.00028257280296478676, "loss": 3.0140631198883057, "step": 6729, "token_acc": 0.2978401722279798 }, { "epoch": 3.9451773673409556, "grad_norm": 0.24261714395519132, "learning_rate": 0.00028256600097535255, "loss": 3.0535202026367188, "step": 6730, "token_acc": 0.29231966182134106 }, { "epoch": 3.9457637056581647, "grad_norm": 0.24356005486577106, "learning_rate": 0.00028255919774063944, "loss": 3.0494778156280518, "step": 6731, "token_acc": 0.29256322945897734 }, { "epoch": 3.946350043975374, "grad_norm": 0.22007016055548814, "learning_rate": 0.00028255239326071126, "loss": 3.0662083625793457, "step": 6732, "token_acc": 0.28933919687545495 }, { "epoch": 3.946936382292583, "grad_norm": 0.24723471292230187, "learning_rate": 0.00028254558753563195, "loss": 3.0725972652435303, "step": 6733, "token_acc": 0.2899561651287845 }, { "epoch": 3.9475227206097916, "grad_norm": 0.22379044997419228, "learning_rate": 0.00028253878056546544, "loss": 3.030632972717285, "step": 6734, "token_acc": 0.2946472541765055 }, { "epoch": 3.9481090589270007, "grad_norm": 0.26448024299971945, "learning_rate": 0.0002825319723502757, "loss": 3.016324043273926, "step": 6735, "token_acc": 0.29764991573667376 }, { "epoch": 3.94869539724421, "grad_norm": 0.20742505329074623, "learning_rate": 0.0002825251628901267, "loss": 3.032114028930664, "step": 6736, "token_acc": 0.2954364323967613 }, { "epoch": 3.949281735561419, "grad_norm": 0.22612951347272905, "learning_rate": 0.0002825183521850823, "loss": 3.0563697814941406, "step": 6737, "token_acc": 0.29301349914773533 }, { "epoch": 3.949868073878628, "grad_norm": 0.21894697569714666, "learning_rate": 0.00028251154023520666, "loss": 3.0927939414978027, "step": 6738, "token_acc": 0.2872158259652694 }, { "epoch": 3.950454412195837, "grad_norm": 0.22694070792351537, "learning_rate": 0.0002825047270405636, "loss": 3.068131446838379, "step": 6739, "token_acc": 0.2902431478038964 }, { "epoch": 3.9510407505130463, "grad_norm": 0.22449641283921373, "learning_rate": 0.00028249791260121713, "loss": 3.0866427421569824, "step": 6740, "token_acc": 0.2868734296083856 }, { "epoch": 3.951627088830255, "grad_norm": 0.23892090333774155, "learning_rate": 0.0002824910969172314, "loss": 3.0920932292938232, "step": 6741, "token_acc": 0.2856788265880571 }, { "epoch": 3.952213427147464, "grad_norm": 0.2215545460394356, "learning_rate": 0.0002824842799886703, "loss": 3.056422233581543, "step": 6742, "token_acc": 0.2921752030638441 }, { "epoch": 3.952799765464673, "grad_norm": 0.22046291260055736, "learning_rate": 0.00028247746181559797, "loss": 3.0602200031280518, "step": 6743, "token_acc": 0.29340861262053586 }, { "epoch": 3.9533861037818823, "grad_norm": 0.2172021756251513, "learning_rate": 0.00028247064239807836, "loss": 3.085911750793457, "step": 6744, "token_acc": 0.28657611711545067 }, { "epoch": 3.953972442099091, "grad_norm": 0.2038759613470377, "learning_rate": 0.0002824638217361756, "loss": 3.0290327072143555, "step": 6745, "token_acc": 0.29515317437254335 }, { "epoch": 3.9545587804163, "grad_norm": 0.20382682934560034, "learning_rate": 0.0002824569998299537, "loss": 3.0598413944244385, "step": 6746, "token_acc": 0.2921572943507141 }, { "epoch": 3.955145118733509, "grad_norm": 0.23065389452544796, "learning_rate": 0.0002824501766794768, "loss": 3.057029962539673, "step": 6747, "token_acc": 0.2927866257366862 }, { "epoch": 3.9557314570507183, "grad_norm": 0.2292896347765202, "learning_rate": 0.0002824433522848089, "loss": 3.0636110305786133, "step": 6748, "token_acc": 0.29130186852679457 }, { "epoch": 3.9563177953679274, "grad_norm": 0.21588582234504008, "learning_rate": 0.00028243652664601424, "loss": 3.063899517059326, "step": 6749, "token_acc": 0.2915826544420954 }, { "epoch": 3.9569041336851365, "grad_norm": 0.22809188685895732, "learning_rate": 0.0002824296997631569, "loss": 3.066152811050415, "step": 6750, "token_acc": 0.2892397546567405 }, { "epoch": 3.9574904720023456, "grad_norm": 0.24281547053455135, "learning_rate": 0.0002824228716363009, "loss": 3.0969042778015137, "step": 6751, "token_acc": 0.2870656946868269 }, { "epoch": 3.9580768103195543, "grad_norm": 0.25536229483991557, "learning_rate": 0.0002824160422655105, "loss": 3.0408742427825928, "step": 6752, "token_acc": 0.2921956865777896 }, { "epoch": 3.9586631486367634, "grad_norm": 0.24482224174153644, "learning_rate": 0.0002824092116508498, "loss": 3.0348386764526367, "step": 6753, "token_acc": 0.2960987391302055 }, { "epoch": 3.9592494869539725, "grad_norm": 0.2209886531571347, "learning_rate": 0.000282402379792383, "loss": 3.0129623413085938, "step": 6754, "token_acc": 0.29878544019471304 }, { "epoch": 3.9598358252711816, "grad_norm": 0.24886883430813175, "learning_rate": 0.00028239554669017426, "loss": 3.0871403217315674, "step": 6755, "token_acc": 0.2859506002543268 }, { "epoch": 3.9604221635883903, "grad_norm": 0.2617009560290949, "learning_rate": 0.0002823887123442878, "loss": 3.058096408843994, "step": 6756, "token_acc": 0.293069045567124 }, { "epoch": 3.9610085019055994, "grad_norm": 0.26339141345354955, "learning_rate": 0.00028238187675478775, "loss": 3.0440804958343506, "step": 6757, "token_acc": 0.2928679319629605 }, { "epoch": 3.9615948402228085, "grad_norm": 0.2766444856924938, "learning_rate": 0.00028237503992173835, "loss": 3.019359827041626, "step": 6758, "token_acc": 0.296670774903163 }, { "epoch": 3.9621811785400176, "grad_norm": 0.2182704398338543, "learning_rate": 0.00028236820184520376, "loss": 3.038116455078125, "step": 6759, "token_acc": 0.2940398511960773 }, { "epoch": 3.9627675168572267, "grad_norm": 0.24775618947801742, "learning_rate": 0.0002823613625252484, "loss": 3.04744553565979, "step": 6760, "token_acc": 0.2922109727946809 }, { "epoch": 3.963353855174436, "grad_norm": 0.26219953207804036, "learning_rate": 0.00028235452196193636, "loss": 3.0423405170440674, "step": 6761, "token_acc": 0.29404705963559713 }, { "epoch": 3.963940193491645, "grad_norm": 0.2220418614641057, "learning_rate": 0.0002823476801553319, "loss": 3.0230350494384766, "step": 6762, "token_acc": 0.29711144919832305 }, { "epoch": 3.9645265318088536, "grad_norm": 0.24710636080384274, "learning_rate": 0.00028234083710549935, "loss": 3.052283525466919, "step": 6763, "token_acc": 0.2929190069602352 }, { "epoch": 3.9651128701260627, "grad_norm": 0.2550042049378017, "learning_rate": 0.00028233399281250295, "loss": 3.040428638458252, "step": 6764, "token_acc": 0.29244324456988585 }, { "epoch": 3.965699208443272, "grad_norm": 0.24840276683217738, "learning_rate": 0.000282327147276407, "loss": 3.0092415809631348, "step": 6765, "token_acc": 0.29776719481739994 }, { "epoch": 3.966285546760481, "grad_norm": 0.20819458405542243, "learning_rate": 0.00028232030049727585, "loss": 2.9837443828582764, "step": 6766, "token_acc": 0.3022064371763849 }, { "epoch": 3.9668718850776896, "grad_norm": 0.24306443732799674, "learning_rate": 0.00028231345247517383, "loss": 3.0735344886779785, "step": 6767, "token_acc": 0.28807418646018623 }, { "epoch": 3.9674582233948987, "grad_norm": 0.25195504799063106, "learning_rate": 0.0002823066032101651, "loss": 3.084482192993164, "step": 6768, "token_acc": 0.2886679849369734 }, { "epoch": 3.968044561712108, "grad_norm": 0.2447755461825122, "learning_rate": 0.0002822997527023142, "loss": 3.080294132232666, "step": 6769, "token_acc": 0.2895466390910437 }, { "epoch": 3.968630900029317, "grad_norm": 0.2707098064708686, "learning_rate": 0.0002822929009516854, "loss": 3.0475926399230957, "step": 6770, "token_acc": 0.2932736771746989 }, { "epoch": 3.969217238346526, "grad_norm": 0.2397874020903097, "learning_rate": 0.000282286047958343, "loss": 2.9995970726013184, "step": 6771, "token_acc": 0.29842250026965805 }, { "epoch": 3.969803576663735, "grad_norm": 0.2532080909041808, "learning_rate": 0.0002822791937223515, "loss": 3.0110816955566406, "step": 6772, "token_acc": 0.2982383849412358 }, { "epoch": 3.970389914980944, "grad_norm": 0.23527375119915844, "learning_rate": 0.0002822723382437752, "loss": 3.0472917556762695, "step": 6773, "token_acc": 0.2930018491049113 }, { "epoch": 3.970976253298153, "grad_norm": 0.27297847923919916, "learning_rate": 0.00028226548152267847, "loss": 3.00388765335083, "step": 6774, "token_acc": 0.2988401617583037 }, { "epoch": 3.971562591615362, "grad_norm": 0.23163793971605773, "learning_rate": 0.00028225862355912585, "loss": 2.9881153106689453, "step": 6775, "token_acc": 0.30237324336279714 }, { "epoch": 3.972148929932571, "grad_norm": 0.2750650026383408, "learning_rate": 0.0002822517643531817, "loss": 3.047593832015991, "step": 6776, "token_acc": 0.29272037010144597 }, { "epoch": 3.97273526824978, "grad_norm": 0.23138577226141568, "learning_rate": 0.00028224490390491036, "loss": 3.0961873531341553, "step": 6777, "token_acc": 0.2854853260883614 }, { "epoch": 3.973321606566989, "grad_norm": 0.2589282498922663, "learning_rate": 0.0002822380422143764, "loss": 3.0568552017211914, "step": 6778, "token_acc": 0.29347016951756766 }, { "epoch": 3.973907944884198, "grad_norm": 0.24069344142515253, "learning_rate": 0.0002822311792816442, "loss": 3.0322418212890625, "step": 6779, "token_acc": 0.2938101052263572 }, { "epoch": 3.974494283201407, "grad_norm": 0.23617331941176517, "learning_rate": 0.0002822243151067782, "loss": 3.077845573425293, "step": 6780, "token_acc": 0.2880276408827941 }, { "epoch": 3.9750806215186163, "grad_norm": 0.25616509608122656, "learning_rate": 0.000282217449689843, "loss": 3.0179669857025146, "step": 6781, "token_acc": 0.29785113830901705 }, { "epoch": 3.9756669598358254, "grad_norm": 0.25183341893354994, "learning_rate": 0.00028221058303090304, "loss": 3.127781867980957, "step": 6782, "token_acc": 0.2830237457158142 }, { "epoch": 3.9762532981530345, "grad_norm": 0.22894376745665704, "learning_rate": 0.0002822037151300228, "loss": 3.043041706085205, "step": 6783, "token_acc": 0.2932302450801726 }, { "epoch": 3.976839636470243, "grad_norm": 0.23249955084840923, "learning_rate": 0.0002821968459872668, "loss": 3.039473533630371, "step": 6784, "token_acc": 0.2930569047363446 }, { "epoch": 3.9774259747874523, "grad_norm": 0.23642861366278178, "learning_rate": 0.00028218997560269956, "loss": 3.0834927558898926, "step": 6785, "token_acc": 0.2877023920819716 }, { "epoch": 3.9780123131046614, "grad_norm": 0.21646606616616612, "learning_rate": 0.00028218310397638564, "loss": 3.063615560531616, "step": 6786, "token_acc": 0.29074605564854217 }, { "epoch": 3.9785986514218705, "grad_norm": 0.242648606032343, "learning_rate": 0.00028217623110838956, "loss": 3.03950834274292, "step": 6787, "token_acc": 0.29262699472981074 }, { "epoch": 3.979184989739079, "grad_norm": 0.23327309791884834, "learning_rate": 0.0002821693569987759, "loss": 3.057147979736328, "step": 6788, "token_acc": 0.2917884813607042 }, { "epoch": 3.9797713280562883, "grad_norm": 0.23442939592708562, "learning_rate": 0.0002821624816476092, "loss": 3.0209579467773438, "step": 6789, "token_acc": 0.29614456162313724 }, { "epoch": 3.9803576663734974, "grad_norm": 0.22232839904852242, "learning_rate": 0.00028215560505495414, "loss": 3.0328869819641113, "step": 6790, "token_acc": 0.2960408493945079 }, { "epoch": 3.9809440046907065, "grad_norm": 0.224847599001003, "learning_rate": 0.00028214872722087523, "loss": 3.0409202575683594, "step": 6791, "token_acc": 0.2930351789286394 }, { "epoch": 3.9815303430079156, "grad_norm": 0.22511505498637185, "learning_rate": 0.0002821418481454371, "loss": 3.0554141998291016, "step": 6792, "token_acc": 0.2912018460148241 }, { "epoch": 3.9821166813251248, "grad_norm": 0.2517873437894732, "learning_rate": 0.00028213496782870435, "loss": 3.0371201038360596, "step": 6793, "token_acc": 0.2940547251557694 }, { "epoch": 3.982703019642334, "grad_norm": 0.22287730256972133, "learning_rate": 0.00028212808627074167, "loss": 3.0753560066223145, "step": 6794, "token_acc": 0.29199729111562595 }, { "epoch": 3.9832893579595425, "grad_norm": 0.22673259262250867, "learning_rate": 0.00028212120347161367, "loss": 3.033461093902588, "step": 6795, "token_acc": 0.29428491621517566 }, { "epoch": 3.9838756962767516, "grad_norm": 0.23363708940968483, "learning_rate": 0.0002821143194313849, "loss": 3.0398786067962646, "step": 6796, "token_acc": 0.2942784756676871 }, { "epoch": 3.9844620345939608, "grad_norm": 0.23625510437521774, "learning_rate": 0.00028210743415012023, "loss": 3.0347304344177246, "step": 6797, "token_acc": 0.2959248940298993 }, { "epoch": 3.98504837291117, "grad_norm": 0.2608626052020712, "learning_rate": 0.0002821005476278842, "loss": 3.053231716156006, "step": 6798, "token_acc": 0.2930150246581637 }, { "epoch": 3.9856347112283785, "grad_norm": 0.2522376154282012, "learning_rate": 0.00028209365986474154, "loss": 3.0779242515563965, "step": 6799, "token_acc": 0.2882339406473726 }, { "epoch": 3.9862210495455876, "grad_norm": 0.2261273395977032, "learning_rate": 0.00028208677086075687, "loss": 3.038626194000244, "step": 6800, "token_acc": 0.29485817843516327 }, { "epoch": 3.9868073878627968, "grad_norm": 0.24019710677047218, "learning_rate": 0.0002820798806159951, "loss": 3.1072871685028076, "step": 6801, "token_acc": 0.28583235229244675 }, { "epoch": 3.987393726180006, "grad_norm": 0.2512887802242321, "learning_rate": 0.00028207298913052073, "loss": 3.0856523513793945, "step": 6802, "token_acc": 0.28872333988806537 }, { "epoch": 3.987980064497215, "grad_norm": 0.23261684995397142, "learning_rate": 0.00028206609640439866, "loss": 3.059176445007324, "step": 6803, "token_acc": 0.29180901970219475 }, { "epoch": 3.988566402814424, "grad_norm": 0.2328778392300914, "learning_rate": 0.00028205920243769354, "loss": 3.0244178771972656, "step": 6804, "token_acc": 0.29575560543901747 }, { "epoch": 3.989152741131633, "grad_norm": 0.22089351747239355, "learning_rate": 0.0002820523072304702, "loss": 2.9568567276000977, "step": 6805, "token_acc": 0.30743478687322867 }, { "epoch": 3.989739079448842, "grad_norm": 0.21493245419057083, "learning_rate": 0.0002820454107827934, "loss": 3.0491855144500732, "step": 6806, "token_acc": 0.2921758956990125 }, { "epoch": 3.990325417766051, "grad_norm": 0.21334340386383263, "learning_rate": 0.0002820385130947278, "loss": 3.050236701965332, "step": 6807, "token_acc": 0.2908767636306971 }, { "epoch": 3.99091175608326, "grad_norm": 0.2375055989795221, "learning_rate": 0.00028203161416633836, "loss": 3.1043057441711426, "step": 6808, "token_acc": 0.2866864353246428 }, { "epoch": 3.991498094400469, "grad_norm": 0.20744213953214477, "learning_rate": 0.0002820247139976898, "loss": 3.0365242958068848, "step": 6809, "token_acc": 0.2955002008838891 }, { "epoch": 3.992084432717678, "grad_norm": 0.2316783924806943, "learning_rate": 0.000282017812588847, "loss": 3.03525447845459, "step": 6810, "token_acc": 0.2949135826552979 }, { "epoch": 3.992670771034887, "grad_norm": 0.23384033697557163, "learning_rate": 0.00028201090993987466, "loss": 3.025934934616089, "step": 6811, "token_acc": 0.2963743030252664 }, { "epoch": 3.993257109352096, "grad_norm": 0.2369093605530815, "learning_rate": 0.0002820040060508378, "loss": 3.0263609886169434, "step": 6812, "token_acc": 0.2979733603620206 }, { "epoch": 3.993843447669305, "grad_norm": 0.23293367127104092, "learning_rate": 0.0002819971009218011, "loss": 3.031742811203003, "step": 6813, "token_acc": 0.29453537357087256 }, { "epoch": 3.9944297859865143, "grad_norm": 0.21007110523762135, "learning_rate": 0.0002819901945528296, "loss": 3.032787322998047, "step": 6814, "token_acc": 0.2954955480023631 }, { "epoch": 3.9950161243037234, "grad_norm": 0.23303161242081089, "learning_rate": 0.000281983286943988, "loss": 3.039156913757324, "step": 6815, "token_acc": 0.2967778374880732 }, { "epoch": 3.9956024626209325, "grad_norm": 0.22806358375156352, "learning_rate": 0.0002819763780953413, "loss": 3.0107882022857666, "step": 6816, "token_acc": 0.3007929802952424 }, { "epoch": 3.996188800938141, "grad_norm": 0.24446090428430606, "learning_rate": 0.0002819694680069544, "loss": 3.000490665435791, "step": 6817, "token_acc": 0.2986192968498469 }, { "epoch": 3.9967751392553503, "grad_norm": 0.235042736585659, "learning_rate": 0.00028196255667889213, "loss": 3.060365915298462, "step": 6818, "token_acc": 0.29064286807735795 }, { "epoch": 3.9973614775725594, "grad_norm": 0.25854175721791595, "learning_rate": 0.0002819556441112195, "loss": 3.085367441177368, "step": 6819, "token_acc": 0.28709749942531493 }, { "epoch": 3.9979478158897686, "grad_norm": 0.27743564912082924, "learning_rate": 0.0002819487303040014, "loss": 3.0660033226013184, "step": 6820, "token_acc": 0.28893453495334753 }, { "epoch": 3.998534154206977, "grad_norm": 0.2407415566890626, "learning_rate": 0.0002819418152573027, "loss": 3.048217296600342, "step": 6821, "token_acc": 0.292888224748988 }, { "epoch": 3.9991204925241863, "grad_norm": 0.24866435752706523, "learning_rate": 0.0002819348989711885, "loss": 3.043728828430176, "step": 6822, "token_acc": 0.29371006598931604 }, { "epoch": 3.9997068308413954, "grad_norm": 0.23171331540245313, "learning_rate": 0.00028192798144572375, "loss": 3.0476438999176025, "step": 6823, "token_acc": 0.29490519648710994 }, { "epoch": 4.0, "grad_norm": 0.28439850357277063, "learning_rate": 0.00028192106268097334, "loss": 3.066741466522217, "step": 6824, "token_acc": 0.28813739645869746 }, { "epoch": 4.0, "eval_loss": 3.0832579135894775, "eval_runtime": 8.6908, "eval_samples_per_second": 29.456, "eval_steps_per_second": 3.682, "eval_token_acc": 0.2887123355594045, "step": 6824 }, { "epoch": 4.000586338317209, "grad_norm": 0.28216336788056445, "learning_rate": 0.00028191414267700235, "loss": 2.944533348083496, "step": 6825, "token_acc": 0.3062234452842389 }, { "epoch": 4.001172676634418, "grad_norm": 0.27395479903507763, "learning_rate": 0.0002819072214338757, "loss": 2.967478036880493, "step": 6826, "token_acc": 0.30317066271160004 }, { "epoch": 4.001759014951627, "grad_norm": 0.27788378274825914, "learning_rate": 0.0002819002989516585, "loss": 2.9264707565307617, "step": 6827, "token_acc": 0.30926321163618753 }, { "epoch": 4.0023453532688364, "grad_norm": 0.2714292277794991, "learning_rate": 0.00028189337523041566, "loss": 2.9909849166870117, "step": 6828, "token_acc": 0.29895499045375146 }, { "epoch": 4.002931691586046, "grad_norm": 0.265384067148991, "learning_rate": 0.0002818864502702123, "loss": 2.9543075561523438, "step": 6829, "token_acc": 0.30500837891674837 }, { "epoch": 4.003518029903254, "grad_norm": 0.26683949037227045, "learning_rate": 0.00028187952407111356, "loss": 2.8907968997955322, "step": 6830, "token_acc": 0.3160368425123969 }, { "epoch": 4.004104368220463, "grad_norm": 0.23893140333650503, "learning_rate": 0.00028187259663318433, "loss": 2.916276454925537, "step": 6831, "token_acc": 0.3101819668247062 }, { "epoch": 4.004690706537672, "grad_norm": 0.2502459052824102, "learning_rate": 0.0002818656679564897, "loss": 2.937521457672119, "step": 6832, "token_acc": 0.30728913315319123 }, { "epoch": 4.005277044854881, "grad_norm": 0.26280534259129135, "learning_rate": 0.0002818587380410949, "loss": 2.925959587097168, "step": 6833, "token_acc": 0.30921082819930107 }, { "epoch": 4.00586338317209, "grad_norm": 0.26351394380053844, "learning_rate": 0.0002818518068870649, "loss": 2.9871985912323, "step": 6834, "token_acc": 0.3008091904541924 }, { "epoch": 4.006449721489299, "grad_norm": 0.2728771235082952, "learning_rate": 0.00028184487449446486, "loss": 3.002471446990967, "step": 6835, "token_acc": 0.3005390233454138 }, { "epoch": 4.0070360598065085, "grad_norm": 0.31299569159245527, "learning_rate": 0.00028183794086335983, "loss": 2.9059829711914062, "step": 6836, "token_acc": 0.3108463978809253 }, { "epoch": 4.007622398123718, "grad_norm": 0.23232042205554926, "learning_rate": 0.0002818310059938151, "loss": 2.955127239227295, "step": 6837, "token_acc": 0.30381741175512367 }, { "epoch": 4.008208736440927, "grad_norm": 0.25988279385537993, "learning_rate": 0.00028182406988589565, "loss": 2.9458322525024414, "step": 6838, "token_acc": 0.3072939456493202 }, { "epoch": 4.008795074758136, "grad_norm": 0.2503989358406708, "learning_rate": 0.0002818171325396667, "loss": 2.919551372528076, "step": 6839, "token_acc": 0.3103773938262158 }, { "epoch": 4.009381413075345, "grad_norm": 0.23452859189331524, "learning_rate": 0.00028181019395519345, "loss": 2.9691507816314697, "step": 6840, "token_acc": 0.30378627465297214 }, { "epoch": 4.009967751392553, "grad_norm": 0.2888972510298122, "learning_rate": 0.00028180325413254103, "loss": 2.9465975761413574, "step": 6841, "token_acc": 0.3060956688871564 }, { "epoch": 4.010554089709762, "grad_norm": 0.24449485094201281, "learning_rate": 0.00028179631307177457, "loss": 2.951639413833618, "step": 6842, "token_acc": 0.30501685361359027 }, { "epoch": 4.011140428026971, "grad_norm": 0.25895866770562065, "learning_rate": 0.00028178937077295944, "loss": 2.992367744445801, "step": 6843, "token_acc": 0.3001132561000352 }, { "epoch": 4.0117267663441805, "grad_norm": 0.2321962739273638, "learning_rate": 0.00028178242723616074, "loss": 2.8931946754455566, "step": 6844, "token_acc": 0.31423757579345696 }, { "epoch": 4.01231310466139, "grad_norm": 0.2343966391566824, "learning_rate": 0.0002817754824614437, "loss": 2.9754481315612793, "step": 6845, "token_acc": 0.30297965638703184 }, { "epoch": 4.012899442978599, "grad_norm": 0.23062670460327683, "learning_rate": 0.00028176853644887355, "loss": 2.92448091506958, "step": 6846, "token_acc": 0.30941318977119786 }, { "epoch": 4.013485781295808, "grad_norm": 0.24968046097314264, "learning_rate": 0.00028176158919851566, "loss": 2.961799144744873, "step": 6847, "token_acc": 0.3039912726178385 }, { "epoch": 4.014072119613017, "grad_norm": 0.24445860737859293, "learning_rate": 0.00028175464071043506, "loss": 2.9927375316619873, "step": 6848, "token_acc": 0.30192968303406864 }, { "epoch": 4.014658457930226, "grad_norm": 0.22608646061513976, "learning_rate": 0.00028174769098469725, "loss": 2.9448094367980957, "step": 6849, "token_acc": 0.30507045187249315 }, { "epoch": 4.015244796247435, "grad_norm": 0.22150582025533833, "learning_rate": 0.0002817407400213673, "loss": 2.9568686485290527, "step": 6850, "token_acc": 0.3041502725454569 }, { "epoch": 4.015831134564644, "grad_norm": 0.23244199406580396, "learning_rate": 0.00028173378782051075, "loss": 2.942444086074829, "step": 6851, "token_acc": 0.3057414582599507 }, { "epoch": 4.0164174728818525, "grad_norm": 0.23118338525190385, "learning_rate": 0.0002817268343821927, "loss": 2.9376115798950195, "step": 6852, "token_acc": 0.3093613222369053 }, { "epoch": 4.017003811199062, "grad_norm": 0.24938039788287913, "learning_rate": 0.0002817198797064786, "loss": 2.980619192123413, "step": 6853, "token_acc": 0.30216388995494525 }, { "epoch": 4.017590149516271, "grad_norm": 0.2098607385288621, "learning_rate": 0.00028171292379343367, "loss": 2.9677047729492188, "step": 6854, "token_acc": 0.3022785838506163 }, { "epoch": 4.01817648783348, "grad_norm": 0.23299093960293868, "learning_rate": 0.00028170596664312333, "loss": 2.9541516304016113, "step": 6855, "token_acc": 0.30500204937415265 }, { "epoch": 4.018762826150689, "grad_norm": 0.21243887508578263, "learning_rate": 0.00028169900825561294, "loss": 2.9389421939849854, "step": 6856, "token_acc": 0.30743810483255485 }, { "epoch": 4.019349164467898, "grad_norm": 0.23343911908070072, "learning_rate": 0.0002816920486309678, "loss": 2.966653823852539, "step": 6857, "token_acc": 0.30342511207118383 }, { "epoch": 4.019935502785107, "grad_norm": 0.23591169720119587, "learning_rate": 0.0002816850877692533, "loss": 2.9488651752471924, "step": 6858, "token_acc": 0.30680579056213575 }, { "epoch": 4.020521841102316, "grad_norm": 0.21111065589458275, "learning_rate": 0.0002816781256705349, "loss": 2.9734513759613037, "step": 6859, "token_acc": 0.3011205279898878 }, { "epoch": 4.021108179419525, "grad_norm": 0.24698705205671348, "learning_rate": 0.0002816711623348779, "loss": 2.923494815826416, "step": 6860, "token_acc": 0.31093772166264716 }, { "epoch": 4.0216945177367345, "grad_norm": 0.22884400357792228, "learning_rate": 0.0002816641977623478, "loss": 2.952188014984131, "step": 6861, "token_acc": 0.30517447895871597 }, { "epoch": 4.022280856053943, "grad_norm": 0.2261622138764315, "learning_rate": 0.00028165723195300996, "loss": 2.9609763622283936, "step": 6862, "token_acc": 0.3032411543662107 }, { "epoch": 4.022867194371152, "grad_norm": 0.24293061996220033, "learning_rate": 0.0002816502649069298, "loss": 2.9293220043182373, "step": 6863, "token_acc": 0.3087857093209147 }, { "epoch": 4.023453532688361, "grad_norm": 0.24767818354102958, "learning_rate": 0.00028164329662417286, "loss": 2.9248855113983154, "step": 6864, "token_acc": 0.3082075706586778 }, { "epoch": 4.02403987100557, "grad_norm": 0.22793501718735645, "learning_rate": 0.0002816363271048045, "loss": 2.9628825187683105, "step": 6865, "token_acc": 0.30342474999740393 }, { "epoch": 4.024626209322779, "grad_norm": 0.2357400941888537, "learning_rate": 0.00028162935634889027, "loss": 2.9487009048461914, "step": 6866, "token_acc": 0.3064385853574107 }, { "epoch": 4.025212547639988, "grad_norm": 0.25716743022408983, "learning_rate": 0.00028162238435649556, "loss": 2.9794740676879883, "step": 6867, "token_acc": 0.3018006855821253 }, { "epoch": 4.025798885957197, "grad_norm": 0.2541800291026612, "learning_rate": 0.00028161541112768597, "loss": 2.9488229751586914, "step": 6868, "token_acc": 0.30483346922186066 }, { "epoch": 4.0263852242744065, "grad_norm": 0.2627002261975489, "learning_rate": 0.0002816084366625269, "loss": 2.94063401222229, "step": 6869, "token_acc": 0.3051032542072119 }, { "epoch": 4.026971562591616, "grad_norm": 0.2390431291299368, "learning_rate": 0.0002816014609610839, "loss": 2.9164950847625732, "step": 6870, "token_acc": 0.3100041887740855 }, { "epoch": 4.027557900908825, "grad_norm": 0.25149967986428645, "learning_rate": 0.00028159448402342255, "loss": 2.9283103942871094, "step": 6871, "token_acc": 0.30773154335030145 }, { "epoch": 4.028144239226034, "grad_norm": 0.26899724204387904, "learning_rate": 0.0002815875058496084, "loss": 2.950249671936035, "step": 6872, "token_acc": 0.30503656105176574 }, { "epoch": 4.028730577543242, "grad_norm": 0.2514777683898177, "learning_rate": 0.00028158052643970685, "loss": 2.941936731338501, "step": 6873, "token_acc": 0.30690293628461146 }, { "epoch": 4.029316915860451, "grad_norm": 0.24725644212320996, "learning_rate": 0.0002815735457937836, "loss": 2.9809818267822266, "step": 6874, "token_acc": 0.3009461825242916 }, { "epoch": 4.02990325417766, "grad_norm": 0.2557611248726055, "learning_rate": 0.00028156656391190417, "loss": 2.931403875350952, "step": 6875, "token_acc": 0.3082515920363773 }, { "epoch": 4.030489592494869, "grad_norm": 0.26653458449535766, "learning_rate": 0.00028155958079413416, "loss": 2.978273868560791, "step": 6876, "token_acc": 0.3020928378148734 }, { "epoch": 4.0310759308120785, "grad_norm": 0.25368195208694644, "learning_rate": 0.0002815525964405392, "loss": 2.9217934608459473, "step": 6877, "token_acc": 0.308928412348228 }, { "epoch": 4.031662269129288, "grad_norm": 0.24439218556045106, "learning_rate": 0.0002815456108511848, "loss": 2.9629507064819336, "step": 6878, "token_acc": 0.305746858703107 }, { "epoch": 4.032248607446497, "grad_norm": 0.25185516113403245, "learning_rate": 0.0002815386240261367, "loss": 2.9473233222961426, "step": 6879, "token_acc": 0.3063880994578269 }, { "epoch": 4.032834945763706, "grad_norm": 0.2421124145847601, "learning_rate": 0.00028153163596546054, "loss": 2.9624361991882324, "step": 6880, "token_acc": 0.30239046730822333 }, { "epoch": 4.033421284080915, "grad_norm": 0.2539310943188892, "learning_rate": 0.00028152464666922176, "loss": 2.9481663703918457, "step": 6881, "token_acc": 0.30659783596704393 }, { "epoch": 4.034007622398124, "grad_norm": 0.2184570666864419, "learning_rate": 0.00028151765613748626, "loss": 2.9816980361938477, "step": 6882, "token_acc": 0.3012252797935473 }, { "epoch": 4.034593960715333, "grad_norm": 0.2389289269437876, "learning_rate": 0.00028151066437031956, "loss": 2.928229331970215, "step": 6883, "token_acc": 0.3085266827399777 }, { "epoch": 4.035180299032541, "grad_norm": 0.25975013857731755, "learning_rate": 0.0002815036713677874, "loss": 2.9651870727539062, "step": 6884, "token_acc": 0.3028297357021018 }, { "epoch": 4.0357666373497505, "grad_norm": 0.2542800726881421, "learning_rate": 0.0002814966771299554, "loss": 2.899287700653076, "step": 6885, "token_acc": 0.3132434702186693 }, { "epoch": 4.03635297566696, "grad_norm": 0.2466108736272474, "learning_rate": 0.00028148968165688936, "loss": 2.9474711418151855, "step": 6886, "token_acc": 0.3069161423481007 }, { "epoch": 4.036939313984169, "grad_norm": 0.26595522224939017, "learning_rate": 0.000281482684948655, "loss": 2.972527503967285, "step": 6887, "token_acc": 0.30060869497154813 }, { "epoch": 4.037525652301378, "grad_norm": 0.23235069213361323, "learning_rate": 0.00028147568700531787, "loss": 2.9345703125, "step": 6888, "token_acc": 0.3095714097322959 }, { "epoch": 4.038111990618587, "grad_norm": 0.253754163181534, "learning_rate": 0.0002814686878269439, "loss": 2.955718994140625, "step": 6889, "token_acc": 0.3040995795303046 }, { "epoch": 4.038698328935796, "grad_norm": 0.22127013980095941, "learning_rate": 0.00028146168741359875, "loss": 2.961019992828369, "step": 6890, "token_acc": 0.30375535193712744 }, { "epoch": 4.039284667253005, "grad_norm": 0.22898829945286564, "learning_rate": 0.0002814546857653482, "loss": 2.9535651206970215, "step": 6891, "token_acc": 0.3059546345532539 }, { "epoch": 4.039871005570214, "grad_norm": 0.22509580601947154, "learning_rate": 0.000281447682882258, "loss": 2.9435219764709473, "step": 6892, "token_acc": 0.3054716339114475 }, { "epoch": 4.040457343887423, "grad_norm": 0.22913555838447983, "learning_rate": 0.00028144067876439396, "loss": 2.956338405609131, "step": 6893, "token_acc": 0.3050677177412517 }, { "epoch": 4.0410436822046325, "grad_norm": 0.212019453446366, "learning_rate": 0.0002814336734118218, "loss": 2.951082706451416, "step": 6894, "token_acc": 0.3045080595077145 }, { "epoch": 4.041630020521841, "grad_norm": 0.2316710447880236, "learning_rate": 0.0002814266668246075, "loss": 2.9304442405700684, "step": 6895, "token_acc": 0.3088595771403543 }, { "epoch": 4.04221635883905, "grad_norm": 0.23815131880844087, "learning_rate": 0.00028141965900281666, "loss": 2.982321262359619, "step": 6896, "token_acc": 0.30016633824191674 }, { "epoch": 4.042802697156259, "grad_norm": 0.231105456707983, "learning_rate": 0.0002814126499465153, "loss": 2.967602252960205, "step": 6897, "token_acc": 0.3022644214266966 }, { "epoch": 4.043389035473468, "grad_norm": 0.23221661805069357, "learning_rate": 0.00028140563965576914, "loss": 2.9378325939178467, "step": 6898, "token_acc": 0.3071820897440015 }, { "epoch": 4.043975373790677, "grad_norm": 0.23631529893571196, "learning_rate": 0.00028139862813064405, "loss": 2.9074361324310303, "step": 6899, "token_acc": 0.31095913735835057 }, { "epoch": 4.044561712107886, "grad_norm": 0.21895844765962938, "learning_rate": 0.0002813916153712059, "loss": 2.9249377250671387, "step": 6900, "token_acc": 0.30870772655208256 }, { "epoch": 4.045148050425095, "grad_norm": 0.2308639660276988, "learning_rate": 0.0002813846013775206, "loss": 2.985396385192871, "step": 6901, "token_acc": 0.30179490014584553 }, { "epoch": 4.0457343887423045, "grad_norm": 0.240050473848098, "learning_rate": 0.000281377586149654, "loss": 2.9237637519836426, "step": 6902, "token_acc": 0.309829310716753 }, { "epoch": 4.046320727059514, "grad_norm": 0.23318000128119437, "learning_rate": 0.00028137056968767206, "loss": 2.941197395324707, "step": 6903, "token_acc": 0.30751803910218667 }, { "epoch": 4.046907065376723, "grad_norm": 0.22853206903610426, "learning_rate": 0.0002813635519916406, "loss": 2.9341156482696533, "step": 6904, "token_acc": 0.3063069499695076 }, { "epoch": 4.047493403693931, "grad_norm": 0.23423981529941945, "learning_rate": 0.00028135653306162557, "loss": 2.920598030090332, "step": 6905, "token_acc": 0.30918320258421783 }, { "epoch": 4.04807974201114, "grad_norm": 0.23447817882395006, "learning_rate": 0.0002813495128976929, "loss": 2.944331169128418, "step": 6906, "token_acc": 0.30536422193909923 }, { "epoch": 4.048666080328349, "grad_norm": 0.2368179296052367, "learning_rate": 0.00028134249149990866, "loss": 2.968672275543213, "step": 6907, "token_acc": 0.30193295179202095 }, { "epoch": 4.049252418645558, "grad_norm": 0.2220827766640887, "learning_rate": 0.00028133546886833865, "loss": 2.9314537048339844, "step": 6908, "token_acc": 0.3081834332667111 }, { "epoch": 4.049838756962767, "grad_norm": 0.22807748679717127, "learning_rate": 0.00028132844500304886, "loss": 2.9324493408203125, "step": 6909, "token_acc": 0.3074675445746035 }, { "epoch": 4.0504250952799765, "grad_norm": 0.23036939759035227, "learning_rate": 0.00028132141990410526, "loss": 2.939117670059204, "step": 6910, "token_acc": 0.3072159403772522 }, { "epoch": 4.051011433597186, "grad_norm": 0.21525497370072308, "learning_rate": 0.00028131439357157394, "loss": 2.9474310874938965, "step": 6911, "token_acc": 0.30482529960390276 }, { "epoch": 4.051597771914395, "grad_norm": 0.23171626008961732, "learning_rate": 0.0002813073660055208, "loss": 2.976034164428711, "step": 6912, "token_acc": 0.30085298279487416 }, { "epoch": 4.052184110231604, "grad_norm": 0.24059671071013447, "learning_rate": 0.0002813003372060119, "loss": 2.9680233001708984, "step": 6913, "token_acc": 0.3046745453109783 }, { "epoch": 4.052770448548813, "grad_norm": 0.250940495116202, "learning_rate": 0.0002812933071731133, "loss": 2.9491381645202637, "step": 6914, "token_acc": 0.30599131025595383 }, { "epoch": 4.053356786866022, "grad_norm": 0.2633041123693953, "learning_rate": 0.000281286275906891, "loss": 2.9330320358276367, "step": 6915, "token_acc": 0.3073229923693056 }, { "epoch": 4.05394312518323, "grad_norm": 0.2500502796933771, "learning_rate": 0.000281279243407411, "loss": 2.8771369457244873, "step": 6916, "token_acc": 0.3159533853018131 }, { "epoch": 4.054529463500439, "grad_norm": 0.22923756716949134, "learning_rate": 0.00028127220967473943, "loss": 2.9559926986694336, "step": 6917, "token_acc": 0.3051138087231931 }, { "epoch": 4.0551158018176485, "grad_norm": 0.24755360767433782, "learning_rate": 0.0002812651747089423, "loss": 2.9593422412872314, "step": 6918, "token_acc": 0.30339995899199357 }, { "epoch": 4.055702140134858, "grad_norm": 0.2428586202005314, "learning_rate": 0.00028125813851008583, "loss": 2.9512016773223877, "step": 6919, "token_acc": 0.3047375361456364 }, { "epoch": 4.056288478452067, "grad_norm": 0.22353498608021963, "learning_rate": 0.00028125110107823594, "loss": 2.9419007301330566, "step": 6920, "token_acc": 0.3068314504117864 }, { "epoch": 4.056874816769276, "grad_norm": 0.22876681894709675, "learning_rate": 0.0002812440624134589, "loss": 2.936403751373291, "step": 6921, "token_acc": 0.3079590766137681 }, { "epoch": 4.057461155086485, "grad_norm": 0.2113837598970633, "learning_rate": 0.0002812370225158207, "loss": 2.967212677001953, "step": 6922, "token_acc": 0.3027097238833803 }, { "epoch": 4.058047493403694, "grad_norm": 0.256385156589934, "learning_rate": 0.0002812299813853875, "loss": 2.9763388633728027, "step": 6923, "token_acc": 0.3010738055133314 }, { "epoch": 4.058633831720903, "grad_norm": 0.25883922401192666, "learning_rate": 0.00028122293902222545, "loss": 2.988863945007324, "step": 6924, "token_acc": 0.2997903992638413 }, { "epoch": 4.059220170038112, "grad_norm": 0.22083192845593885, "learning_rate": 0.00028121589542640075, "loss": 2.990067481994629, "step": 6925, "token_acc": 0.2986797881207819 }, { "epoch": 4.059806508355321, "grad_norm": 0.24633861199790094, "learning_rate": 0.0002812088505979795, "loss": 2.9325003623962402, "step": 6926, "token_acc": 0.3082074919147419 }, { "epoch": 4.06039284667253, "grad_norm": 0.21730724404120141, "learning_rate": 0.0002812018045370279, "loss": 2.909435749053955, "step": 6927, "token_acc": 0.31196311976466695 }, { "epoch": 4.060979184989739, "grad_norm": 0.23327928903727468, "learning_rate": 0.0002811947572436122, "loss": 2.9470596313476562, "step": 6928, "token_acc": 0.3061533169433737 }, { "epoch": 4.061565523306948, "grad_norm": 0.23329139830763307, "learning_rate": 0.0002811877087177985, "loss": 2.976271390914917, "step": 6929, "token_acc": 0.3022678171431601 }, { "epoch": 4.062151861624157, "grad_norm": 0.2579642625941111, "learning_rate": 0.0002811806589596531, "loss": 2.902055263519287, "step": 6930, "token_acc": 0.31295412474342343 }, { "epoch": 4.062738199941366, "grad_norm": 0.24602644035909682, "learning_rate": 0.0002811736079692421, "loss": 2.964625358581543, "step": 6931, "token_acc": 0.3030681436723733 }, { "epoch": 4.063324538258575, "grad_norm": 0.22567880039766103, "learning_rate": 0.00028116655574663183, "loss": 2.941694498062134, "step": 6932, "token_acc": 0.30744580200819965 }, { "epoch": 4.063910876575784, "grad_norm": 0.2576117346148285, "learning_rate": 0.00028115950229188854, "loss": 2.9105496406555176, "step": 6933, "token_acc": 0.3105480809642655 }, { "epoch": 4.064497214892993, "grad_norm": 0.23742791168201235, "learning_rate": 0.00028115244760507844, "loss": 2.997262954711914, "step": 6934, "token_acc": 0.29682367233510293 }, { "epoch": 4.0650835532102025, "grad_norm": 0.25004121322333744, "learning_rate": 0.0002811453916862679, "loss": 2.9160399436950684, "step": 6935, "token_acc": 0.3093443810118452 }, { "epoch": 4.065669891527412, "grad_norm": 0.24799324978322812, "learning_rate": 0.00028113833453552304, "loss": 2.944429397583008, "step": 6936, "token_acc": 0.30514306410720754 }, { "epoch": 4.066256229844621, "grad_norm": 0.26184406268904226, "learning_rate": 0.0002811312761529103, "loss": 2.9519710540771484, "step": 6937, "token_acc": 0.30582702530219036 }, { "epoch": 4.066842568161829, "grad_norm": 0.25173615768490015, "learning_rate": 0.0002811242165384959, "loss": 2.958865165710449, "step": 6938, "token_acc": 0.3057311147259032 }, { "epoch": 4.067428906479038, "grad_norm": 0.2132416606100034, "learning_rate": 0.00028111715569234617, "loss": 2.9657392501831055, "step": 6939, "token_acc": 0.30266488774828126 }, { "epoch": 4.068015244796247, "grad_norm": 0.2517744518025153, "learning_rate": 0.0002811100936145274, "loss": 2.9701757431030273, "step": 6940, "token_acc": 0.303782709852822 }, { "epoch": 4.068601583113456, "grad_norm": 0.22932701214736192, "learning_rate": 0.000281103030305106, "loss": 2.9627833366394043, "step": 6941, "token_acc": 0.30162417304690403 }, { "epoch": 4.069187921430665, "grad_norm": 0.2843282222511165, "learning_rate": 0.00028109596576414837, "loss": 2.9619956016540527, "step": 6942, "token_acc": 0.30449611743657773 }, { "epoch": 4.0697742597478745, "grad_norm": 0.2569670015757998, "learning_rate": 0.0002810888999917207, "loss": 2.9476637840270996, "step": 6943, "token_acc": 0.3055021063271914 }, { "epoch": 4.070360598065084, "grad_norm": 0.2772409250860447, "learning_rate": 0.0002810818329878895, "loss": 2.914224624633789, "step": 6944, "token_acc": 0.3102136493474775 }, { "epoch": 4.070946936382293, "grad_norm": 0.23562750983034342, "learning_rate": 0.00028107476475272114, "loss": 2.9132649898529053, "step": 6945, "token_acc": 0.3117679153636303 }, { "epoch": 4.071533274699502, "grad_norm": 0.23420760795161652, "learning_rate": 0.00028106769528628197, "loss": 2.9094314575195312, "step": 6946, "token_acc": 0.3108844700207016 }, { "epoch": 4.072119613016711, "grad_norm": 0.22399538568737767, "learning_rate": 0.00028106062458863843, "loss": 2.9470975399017334, "step": 6947, "token_acc": 0.30716677341581655 }, { "epoch": 4.07270595133392, "grad_norm": 0.22794376746347073, "learning_rate": 0.0002810535526598569, "loss": 2.9407453536987305, "step": 6948, "token_acc": 0.3066781800066423 }, { "epoch": 4.073292289651128, "grad_norm": 0.23367575859109102, "learning_rate": 0.00028104647950000385, "loss": 2.9658050537109375, "step": 6949, "token_acc": 0.3040869502856642 }, { "epoch": 4.073878627968337, "grad_norm": 0.21948915807040575, "learning_rate": 0.0002810394051091457, "loss": 2.9171390533447266, "step": 6950, "token_acc": 0.30975133210185546 }, { "epoch": 4.0744649662855466, "grad_norm": 0.24601820474275396, "learning_rate": 0.00028103232948734893, "loss": 2.965160846710205, "step": 6951, "token_acc": 0.3036856325909604 }, { "epoch": 4.075051304602756, "grad_norm": 0.24710536972874503, "learning_rate": 0.00028102525263467995, "loss": 2.94419527053833, "step": 6952, "token_acc": 0.3058493831176623 }, { "epoch": 4.075637642919965, "grad_norm": 0.2248494191552353, "learning_rate": 0.00028101817455120537, "loss": 2.988813877105713, "step": 6953, "token_acc": 0.30025053397818297 }, { "epoch": 4.076223981237174, "grad_norm": 0.24983539539000194, "learning_rate": 0.0002810110952369915, "loss": 2.9237418174743652, "step": 6954, "token_acc": 0.30906816925142844 }, { "epoch": 4.076810319554383, "grad_norm": 0.24027861714622115, "learning_rate": 0.000281004014692105, "loss": 2.930018901824951, "step": 6955, "token_acc": 0.3077422513075516 }, { "epoch": 4.077396657871592, "grad_norm": 0.23440704285176886, "learning_rate": 0.0002809969329166123, "loss": 2.9123759269714355, "step": 6956, "token_acc": 0.3108210079485067 }, { "epoch": 4.077982996188801, "grad_norm": 0.2574909275167172, "learning_rate": 0.0002809898499105799, "loss": 2.959304094314575, "step": 6957, "token_acc": 0.30463539714942334 }, { "epoch": 4.07856933450601, "grad_norm": 0.25477023014933414, "learning_rate": 0.00028098276567407437, "loss": 2.9802112579345703, "step": 6958, "token_acc": 0.3019641772455579 }, { "epoch": 4.0791556728232194, "grad_norm": 0.23018660726368353, "learning_rate": 0.0002809756802071623, "loss": 2.9313008785247803, "step": 6959, "token_acc": 0.30868448351239 }, { "epoch": 4.079742011140428, "grad_norm": 0.27146401439459206, "learning_rate": 0.0002809685935099102, "loss": 2.9311976432800293, "step": 6960, "token_acc": 0.308977992345828 }, { "epoch": 4.080328349457637, "grad_norm": 0.22274603415499342, "learning_rate": 0.0002809615055823846, "loss": 2.9252712726593018, "step": 6961, "token_acc": 0.30986631983595464 }, { "epoch": 4.080914687774846, "grad_norm": 0.23408572265134073, "learning_rate": 0.0002809544164246522, "loss": 2.955280065536499, "step": 6962, "token_acc": 0.3052462650317878 }, { "epoch": 4.081501026092055, "grad_norm": 0.2373170483958627, "learning_rate": 0.0002809473260367794, "loss": 2.941588878631592, "step": 6963, "token_acc": 0.30850422039919767 }, { "epoch": 4.082087364409264, "grad_norm": 0.25316730489776573, "learning_rate": 0.000280940234418833, "loss": 2.9353485107421875, "step": 6964, "token_acc": 0.3075964268402465 }, { "epoch": 4.082673702726473, "grad_norm": 0.23253274045248168, "learning_rate": 0.00028093314157087956, "loss": 2.950018882751465, "step": 6965, "token_acc": 0.3055967276479184 }, { "epoch": 4.083260041043682, "grad_norm": 0.2440045285991042, "learning_rate": 0.00028092604749298575, "loss": 2.911876678466797, "step": 6966, "token_acc": 0.3110312311822726 }, { "epoch": 4.0838463793608915, "grad_norm": 0.2618890281012526, "learning_rate": 0.00028091895218521805, "loss": 2.9440929889678955, "step": 6967, "token_acc": 0.30677873817464096 }, { "epoch": 4.084432717678101, "grad_norm": 0.23161350121640223, "learning_rate": 0.00028091185564764324, "loss": 2.9329781532287598, "step": 6968, "token_acc": 0.3074822359991199 }, { "epoch": 4.08501905599531, "grad_norm": 0.2655355416123265, "learning_rate": 0.00028090475788032795, "loss": 2.942594051361084, "step": 6969, "token_acc": 0.30590818084712373 }, { "epoch": 4.085605394312518, "grad_norm": 0.2214483964503748, "learning_rate": 0.0002808976588833388, "loss": 2.976132392883301, "step": 6970, "token_acc": 0.3022902464471299 }, { "epoch": 4.086191732629727, "grad_norm": 0.24649806550535966, "learning_rate": 0.0002808905586567426, "loss": 2.9484410285949707, "step": 6971, "token_acc": 0.3043380722577279 }, { "epoch": 4.086778070946936, "grad_norm": 0.21342176360141749, "learning_rate": 0.000280883457200606, "loss": 2.9396610260009766, "step": 6972, "token_acc": 0.30588673330434535 }, { "epoch": 4.087364409264145, "grad_norm": 0.24467962114483774, "learning_rate": 0.0002808763545149956, "loss": 2.9299819469451904, "step": 6973, "token_acc": 0.3079387967635985 }, { "epoch": 4.087950747581354, "grad_norm": 0.24134583800497117, "learning_rate": 0.00028086925059997827, "loss": 2.9529669284820557, "step": 6974, "token_acc": 0.3059884818116744 }, { "epoch": 4.0885370858985635, "grad_norm": 0.2197654080326428, "learning_rate": 0.0002808621454556207, "loss": 2.955392837524414, "step": 6975, "token_acc": 0.3049167893980836 }, { "epoch": 4.089123424215773, "grad_norm": 0.2308425818519057, "learning_rate": 0.00028085503908198954, "loss": 2.9469759464263916, "step": 6976, "token_acc": 0.30431890381397103 }, { "epoch": 4.089709762532982, "grad_norm": 0.23909669167255249, "learning_rate": 0.00028084793147915165, "loss": 2.918078660964966, "step": 6977, "token_acc": 0.309660649068767 }, { "epoch": 4.090296100850191, "grad_norm": 0.2311736844574702, "learning_rate": 0.0002808408226471738, "loss": 2.915369987487793, "step": 6978, "token_acc": 0.3104114556793403 }, { "epoch": 4.0908824391674, "grad_norm": 0.2394403381467421, "learning_rate": 0.0002808337125861227, "loss": 2.99251651763916, "step": 6979, "token_acc": 0.29774545904951627 }, { "epoch": 4.091468777484609, "grad_norm": 0.22923229086532657, "learning_rate": 0.00028082660129606516, "loss": 2.9468069076538086, "step": 6980, "token_acc": 0.30680760002593865 }, { "epoch": 4.092055115801817, "grad_norm": 0.22222499039945026, "learning_rate": 0.00028081948877706805, "loss": 2.968657970428467, "step": 6981, "token_acc": 0.3029411687046342 }, { "epoch": 4.092641454119026, "grad_norm": 0.21198660468617503, "learning_rate": 0.0002808123750291981, "loss": 2.932565689086914, "step": 6982, "token_acc": 0.308610524208208 }, { "epoch": 4.0932277924362355, "grad_norm": 0.21607844782255936, "learning_rate": 0.0002808052600525221, "loss": 2.9474844932556152, "step": 6983, "token_acc": 0.3069443300243028 }, { "epoch": 4.093814130753445, "grad_norm": 0.22468553144386327, "learning_rate": 0.000280798143847107, "loss": 2.9423489570617676, "step": 6984, "token_acc": 0.3075592949804654 }, { "epoch": 4.094400469070654, "grad_norm": 0.23463957823947734, "learning_rate": 0.0002807910264130195, "loss": 2.948122501373291, "step": 6985, "token_acc": 0.305525175138996 }, { "epoch": 4.094986807387863, "grad_norm": 0.2235503887760045, "learning_rate": 0.0002807839077503267, "loss": 2.94474458694458, "step": 6986, "token_acc": 0.30710059326310396 }, { "epoch": 4.095573145705072, "grad_norm": 0.2416409927231637, "learning_rate": 0.0002807767878590952, "loss": 2.901216745376587, "step": 6987, "token_acc": 0.3115882783252291 }, { "epoch": 4.096159484022281, "grad_norm": 0.23121359914777234, "learning_rate": 0.00028076966673939204, "loss": 2.9537413120269775, "step": 6988, "token_acc": 0.304296623103717 }, { "epoch": 4.09674582233949, "grad_norm": 0.24593833094922632, "learning_rate": 0.0002807625443912841, "loss": 2.9365625381469727, "step": 6989, "token_acc": 0.30787411336049847 }, { "epoch": 4.097332160656699, "grad_norm": 0.23073527671601043, "learning_rate": 0.00028075542081483826, "loss": 2.958406925201416, "step": 6990, "token_acc": 0.3029237544785389 }, { "epoch": 4.097918498973908, "grad_norm": 0.22255770005073805, "learning_rate": 0.00028074829601012135, "loss": 2.9231908321380615, "step": 6991, "token_acc": 0.3090659690234182 }, { "epoch": 4.098504837291117, "grad_norm": 0.22661711114172467, "learning_rate": 0.0002807411699772005, "loss": 2.9605636596679688, "step": 6992, "token_acc": 0.30387789497005135 }, { "epoch": 4.099091175608326, "grad_norm": 0.2349355074050412, "learning_rate": 0.00028073404271614246, "loss": 2.9717674255371094, "step": 6993, "token_acc": 0.30238029871119537 }, { "epoch": 4.099677513925535, "grad_norm": 0.2517513559080339, "learning_rate": 0.0002807269142270143, "loss": 2.959568738937378, "step": 6994, "token_acc": 0.30339439627044495 }, { "epoch": 4.100263852242744, "grad_norm": 0.24567950484387097, "learning_rate": 0.0002807197845098829, "loss": 2.954444169998169, "step": 6995, "token_acc": 0.3045302390218256 }, { "epoch": 4.100850190559953, "grad_norm": 0.21968298114852824, "learning_rate": 0.0002807126535648153, "loss": 2.9386672973632812, "step": 6996, "token_acc": 0.30573486249073 }, { "epoch": 4.101436528877162, "grad_norm": 0.22531659145905045, "learning_rate": 0.0002807055213918785, "loss": 2.9417734146118164, "step": 6997, "token_acc": 0.3070468150625838 }, { "epoch": 4.102022867194371, "grad_norm": 0.2394505185396973, "learning_rate": 0.0002806983879911394, "loss": 2.9634926319122314, "step": 6998, "token_acc": 0.30283758377855907 }, { "epoch": 4.10260920551158, "grad_norm": 0.2584810154417215, "learning_rate": 0.000280691253362665, "loss": 2.9657249450683594, "step": 6999, "token_acc": 0.3026017205419656 }, { "epoch": 4.1031955438287895, "grad_norm": 0.236165942874937, "learning_rate": 0.0002806841175065225, "loss": 2.9570493698120117, "step": 7000, "token_acc": 0.304639390460286 }, { "epoch": 4.103781882145999, "grad_norm": 0.24832873786933202, "learning_rate": 0.0002806769804227787, "loss": 2.969536304473877, "step": 7001, "token_acc": 0.3036872356287593 }, { "epoch": 4.104368220463208, "grad_norm": 0.2526444661160461, "learning_rate": 0.00028066984211150086, "loss": 2.958831548690796, "step": 7002, "token_acc": 0.304220326153561 }, { "epoch": 4.104954558780416, "grad_norm": 0.21207038322400587, "learning_rate": 0.0002806627025727559, "loss": 2.9533753395080566, "step": 7003, "token_acc": 0.30400468585105256 }, { "epoch": 4.105540897097625, "grad_norm": 0.26832401180344534, "learning_rate": 0.00028065556180661093, "loss": 2.9689018726348877, "step": 7004, "token_acc": 0.30206641220613295 }, { "epoch": 4.106127235414834, "grad_norm": 0.24374313216129817, "learning_rate": 0.000280648419813133, "loss": 2.997386932373047, "step": 7005, "token_acc": 0.3012756427156285 }, { "epoch": 4.106713573732043, "grad_norm": 0.24559914525244286, "learning_rate": 0.00028064127659238917, "loss": 2.938683032989502, "step": 7006, "token_acc": 0.30888912957510206 }, { "epoch": 4.107299912049252, "grad_norm": 0.2677457960943747, "learning_rate": 0.0002806341321444467, "loss": 2.9458703994750977, "step": 7007, "token_acc": 0.3058554483180654 }, { "epoch": 4.1078862503664615, "grad_norm": 0.2518637497901332, "learning_rate": 0.00028062698646937246, "loss": 2.9873874187469482, "step": 7008, "token_acc": 0.30060431921879555 }, { "epoch": 4.108472588683671, "grad_norm": 0.23117696542780328, "learning_rate": 0.0002806198395672338, "loss": 2.9272685050964355, "step": 7009, "token_acc": 0.30968000658084155 }, { "epoch": 4.10905892700088, "grad_norm": 0.2676533556202926, "learning_rate": 0.0002806126914380977, "loss": 2.9826345443725586, "step": 7010, "token_acc": 0.3010104373439201 }, { "epoch": 4.109645265318089, "grad_norm": 0.22230772657588319, "learning_rate": 0.0002806055420820314, "loss": 2.9499261379241943, "step": 7011, "token_acc": 0.30689628661489965 }, { "epoch": 4.110231603635298, "grad_norm": 0.24191875398136786, "learning_rate": 0.00028059839149910203, "loss": 2.9512939453125, "step": 7012, "token_acc": 0.3051099679176901 }, { "epoch": 4.110817941952506, "grad_norm": 0.22337166676077516, "learning_rate": 0.00028059123968937676, "loss": 2.9200072288513184, "step": 7013, "token_acc": 0.3106512366202621 }, { "epoch": 4.111404280269715, "grad_norm": 0.23946198837486707, "learning_rate": 0.00028058408665292275, "loss": 2.991720676422119, "step": 7014, "token_acc": 0.30046864330242085 }, { "epoch": 4.111990618586924, "grad_norm": 0.24714873620394268, "learning_rate": 0.0002805769323898072, "loss": 2.974813461303711, "step": 7015, "token_acc": 0.3014735078625161 }, { "epoch": 4.1125769569041335, "grad_norm": 0.2203441639150923, "learning_rate": 0.00028056977690009736, "loss": 2.9447214603424072, "step": 7016, "token_acc": 0.30750746414888075 }, { "epoch": 4.113163295221343, "grad_norm": 0.23990041870033801, "learning_rate": 0.0002805626201838604, "loss": 2.9701313972473145, "step": 7017, "token_acc": 0.30236100334831306 }, { "epoch": 4.113749633538552, "grad_norm": 0.21635971070710705, "learning_rate": 0.0002805554622411635, "loss": 2.9463934898376465, "step": 7018, "token_acc": 0.30310996399324475 }, { "epoch": 4.114335971855761, "grad_norm": 0.22763762534898171, "learning_rate": 0.00028054830307207404, "loss": 2.9523544311523438, "step": 7019, "token_acc": 0.3061413722070902 }, { "epoch": 4.11492231017297, "grad_norm": 0.22657027485409625, "learning_rate": 0.00028054114267665915, "loss": 2.92579984664917, "step": 7020, "token_acc": 0.30727782984403357 }, { "epoch": 4.115508648490179, "grad_norm": 0.23159538530595852, "learning_rate": 0.00028053398105498613, "loss": 2.9265389442443848, "step": 7021, "token_acc": 0.31014779888842975 }, { "epoch": 4.116094986807388, "grad_norm": 0.23168884475736576, "learning_rate": 0.0002805268182071223, "loss": 2.947672128677368, "step": 7022, "token_acc": 0.30478707034690605 }, { "epoch": 4.116681325124597, "grad_norm": 0.24236124560948394, "learning_rate": 0.00028051965413313483, "loss": 2.9204933643341064, "step": 7023, "token_acc": 0.31057923806826226 }, { "epoch": 4.1172676634418055, "grad_norm": 0.22627282958571734, "learning_rate": 0.00028051248883309115, "loss": 2.969742774963379, "step": 7024, "token_acc": 0.30365686589934926 }, { "epoch": 4.117854001759015, "grad_norm": 0.267871241652411, "learning_rate": 0.00028050532230705844, "loss": 2.881059169769287, "step": 7025, "token_acc": 0.3169502706225511 }, { "epoch": 4.118440340076224, "grad_norm": 0.25489030902860266, "learning_rate": 0.00028049815455510413, "loss": 2.9760515689849854, "step": 7026, "token_acc": 0.2999971148070724 }, { "epoch": 4.119026678393433, "grad_norm": 0.27679805834243426, "learning_rate": 0.0002804909855772955, "loss": 2.978717803955078, "step": 7027, "token_acc": 0.3010443553190644 }, { "epoch": 4.119613016710642, "grad_norm": 0.23507745164652938, "learning_rate": 0.0002804838153736999, "loss": 2.95672869682312, "step": 7028, "token_acc": 0.30455859951600767 }, { "epoch": 4.120199355027851, "grad_norm": 0.22614213004090142, "learning_rate": 0.0002804766439443847, "loss": 2.9913949966430664, "step": 7029, "token_acc": 0.2988339017226569 }, { "epoch": 4.12078569334506, "grad_norm": 0.2479979734389295, "learning_rate": 0.0002804694712894172, "loss": 2.9279022216796875, "step": 7030, "token_acc": 0.308490558730044 }, { "epoch": 4.121372031662269, "grad_norm": 0.21831398989775666, "learning_rate": 0.00028046229740886483, "loss": 2.9356937408447266, "step": 7031, "token_acc": 0.3068591017774321 }, { "epoch": 4.121958369979478, "grad_norm": 0.25508115668536735, "learning_rate": 0.00028045512230279505, "loss": 2.92872953414917, "step": 7032, "token_acc": 0.30877569726332327 }, { "epoch": 4.1225447082966875, "grad_norm": 0.23704200293915115, "learning_rate": 0.0002804479459712751, "loss": 2.9174094200134277, "step": 7033, "token_acc": 0.31116756191779493 }, { "epoch": 4.123131046613897, "grad_norm": 0.2605022210442283, "learning_rate": 0.0002804407684143725, "loss": 2.9697089195251465, "step": 7034, "token_acc": 0.3022893052643762 }, { "epoch": 4.123717384931105, "grad_norm": 0.22742753369859667, "learning_rate": 0.0002804335896321547, "loss": 2.9532570838928223, "step": 7035, "token_acc": 0.30286329187394223 }, { "epoch": 4.124303723248314, "grad_norm": 0.24214065157301334, "learning_rate": 0.00028042640962468906, "loss": 2.9967823028564453, "step": 7036, "token_acc": 0.2985971328037369 }, { "epoch": 4.124890061565523, "grad_norm": 0.2326218495025064, "learning_rate": 0.00028041922839204303, "loss": 2.941636562347412, "step": 7037, "token_acc": 0.3060871648257797 }, { "epoch": 4.125476399882732, "grad_norm": 0.2541204139686519, "learning_rate": 0.0002804120459342841, "loss": 2.964323043823242, "step": 7038, "token_acc": 0.3049063842075841 }, { "epoch": 4.126062738199941, "grad_norm": 0.26932132221580224, "learning_rate": 0.0002804048622514798, "loss": 2.937098503112793, "step": 7039, "token_acc": 0.30726133036834535 }, { "epoch": 4.12664907651715, "grad_norm": 0.26133225286895573, "learning_rate": 0.00028039767734369745, "loss": 2.9305028915405273, "step": 7040, "token_acc": 0.30853444958415466 }, { "epoch": 4.1272354148343595, "grad_norm": 0.25331248054799116, "learning_rate": 0.0002803904912110047, "loss": 2.936984062194824, "step": 7041, "token_acc": 0.3071333005822036 }, { "epoch": 4.127821753151569, "grad_norm": 0.26815808385911194, "learning_rate": 0.0002803833038534689, "loss": 2.999026298522949, "step": 7042, "token_acc": 0.2985480990978328 }, { "epoch": 4.128408091468778, "grad_norm": 0.2511223325206849, "learning_rate": 0.00028037611527115773, "loss": 2.954646587371826, "step": 7043, "token_acc": 0.3070369030390738 }, { "epoch": 4.128994429785987, "grad_norm": 0.2551933052948444, "learning_rate": 0.00028036892546413856, "loss": 2.9374358654022217, "step": 7044, "token_acc": 0.3073796172391087 }, { "epoch": 4.129580768103196, "grad_norm": 0.2574986024833367, "learning_rate": 0.0002803617344324791, "loss": 2.9563074111938477, "step": 7045, "token_acc": 0.3039713204741065 }, { "epoch": 4.130167106420404, "grad_norm": 0.261607554709604, "learning_rate": 0.0002803545421762468, "loss": 2.924598455429077, "step": 7046, "token_acc": 0.30951524523922674 }, { "epoch": 4.130753444737613, "grad_norm": 0.2594858690872754, "learning_rate": 0.00028034734869550917, "loss": 2.931480884552002, "step": 7047, "token_acc": 0.30810158310331304 }, { "epoch": 4.131339783054822, "grad_norm": 0.25950538188557626, "learning_rate": 0.0002803401539903339, "loss": 2.957671642303467, "step": 7048, "token_acc": 0.30473850567080135 }, { "epoch": 4.1319261213720315, "grad_norm": 0.26676664138606776, "learning_rate": 0.0002803329580607885, "loss": 3.025540828704834, "step": 7049, "token_acc": 0.29556255966938255 }, { "epoch": 4.132512459689241, "grad_norm": 0.2428995759440316, "learning_rate": 0.00028032576090694064, "loss": 2.9836220741271973, "step": 7050, "token_acc": 0.29984319843876783 }, { "epoch": 4.13309879800645, "grad_norm": 0.24274853593252, "learning_rate": 0.0002803185625288578, "loss": 2.9807164669036865, "step": 7051, "token_acc": 0.30284529552108186 }, { "epoch": 4.133685136323659, "grad_norm": 0.2232729070243147, "learning_rate": 0.0002803113629266077, "loss": 2.9615988731384277, "step": 7052, "token_acc": 0.3028200839428192 }, { "epoch": 4.134271474640868, "grad_norm": 0.22309885025648937, "learning_rate": 0.0002803041621002579, "loss": 3.0009429454803467, "step": 7053, "token_acc": 0.2987090803066536 }, { "epoch": 4.134857812958077, "grad_norm": 0.23118697000946808, "learning_rate": 0.0002802969600498761, "loss": 2.9297561645507812, "step": 7054, "token_acc": 0.3097233299562415 }, { "epoch": 4.135444151275286, "grad_norm": 0.24633332716304196, "learning_rate": 0.00028028975677552996, "loss": 2.9691290855407715, "step": 7055, "token_acc": 0.30230859070174243 }, { "epoch": 4.136030489592494, "grad_norm": 0.24210784554013462, "learning_rate": 0.00028028255227728713, "loss": 3.0154945850372314, "step": 7056, "token_acc": 0.295490711895075 }, { "epoch": 4.1366168279097035, "grad_norm": 0.25311856661514065, "learning_rate": 0.0002802753465552153, "loss": 2.91676664352417, "step": 7057, "token_acc": 0.3108535442233308 }, { "epoch": 4.137203166226913, "grad_norm": 0.22818253388629445, "learning_rate": 0.0002802681396093821, "loss": 2.961775779724121, "step": 7058, "token_acc": 0.3044568237704372 }, { "epoch": 4.137789504544122, "grad_norm": 0.2264347300078047, "learning_rate": 0.00028026093143985526, "loss": 3.0084028244018555, "step": 7059, "token_acc": 0.29910979694780043 }, { "epoch": 4.138375842861331, "grad_norm": 0.22188733061543833, "learning_rate": 0.00028025372204670254, "loss": 2.976555824279785, "step": 7060, "token_acc": 0.3017328374131419 }, { "epoch": 4.13896218117854, "grad_norm": 0.20972460257703235, "learning_rate": 0.00028024651142999156, "loss": 2.899305820465088, "step": 7061, "token_acc": 0.31322386154988086 }, { "epoch": 4.139548519495749, "grad_norm": 0.2284321127092416, "learning_rate": 0.00028023929958979015, "loss": 2.9407315254211426, "step": 7062, "token_acc": 0.30848145886235306 }, { "epoch": 4.140134857812958, "grad_norm": 0.2251948627095996, "learning_rate": 0.000280232086526166, "loss": 2.9364328384399414, "step": 7063, "token_acc": 0.307368871869804 }, { "epoch": 4.140721196130167, "grad_norm": 0.2214741640435235, "learning_rate": 0.00028022487223918694, "loss": 3.009774684906006, "step": 7064, "token_acc": 0.2979091022275976 }, { "epoch": 4.141307534447376, "grad_norm": 0.22189053718363874, "learning_rate": 0.0002802176567289206, "loss": 2.99110746383667, "step": 7065, "token_acc": 0.30059114339078985 }, { "epoch": 4.1418938727645855, "grad_norm": 0.222933484263725, "learning_rate": 0.0002802104399954349, "loss": 2.9572460651397705, "step": 7066, "token_acc": 0.30354219882187355 }, { "epoch": 4.142480211081795, "grad_norm": 0.2161261199293689, "learning_rate": 0.0002802032220387976, "loss": 2.931248188018799, "step": 7067, "token_acc": 0.3075161964742018 }, { "epoch": 4.143066549399003, "grad_norm": 0.23061846200269812, "learning_rate": 0.00028019600285907645, "loss": 2.9586434364318848, "step": 7068, "token_acc": 0.30535956032146405 }, { "epoch": 4.143652887716212, "grad_norm": 0.22502234555649936, "learning_rate": 0.00028018878245633926, "loss": 2.97121524810791, "step": 7069, "token_acc": 0.30230687135703144 }, { "epoch": 4.144239226033421, "grad_norm": 0.2204685403627289, "learning_rate": 0.00028018156083065395, "loss": 2.9642229080200195, "step": 7070, "token_acc": 0.30380702191103176 }, { "epoch": 4.14482556435063, "grad_norm": 0.22681193702318006, "learning_rate": 0.0002801743379820883, "loss": 2.9790120124816895, "step": 7071, "token_acc": 0.2999884299433067 }, { "epoch": 4.145411902667839, "grad_norm": 0.22753099396102439, "learning_rate": 0.00028016711391071013, "loss": 2.948507785797119, "step": 7072, "token_acc": 0.30713466547193913 }, { "epoch": 4.145998240985048, "grad_norm": 0.23762351548633373, "learning_rate": 0.0002801598886165873, "loss": 2.93526029586792, "step": 7073, "token_acc": 0.3076610662220512 }, { "epoch": 4.1465845793022575, "grad_norm": 0.23350424503284453, "learning_rate": 0.00028015266209978774, "loss": 2.965040683746338, "step": 7074, "token_acc": 0.3040210248280352 }, { "epoch": 4.147170917619467, "grad_norm": 0.24965724553199728, "learning_rate": 0.0002801454343603793, "loss": 2.9607763290405273, "step": 7075, "token_acc": 0.3040059833882248 }, { "epoch": 4.147757255936676, "grad_norm": 0.2217545354075916, "learning_rate": 0.0002801382053984299, "loss": 2.9728918075561523, "step": 7076, "token_acc": 0.30271535580524345 }, { "epoch": 4.148343594253885, "grad_norm": 0.22762392624710606, "learning_rate": 0.0002801309752140074, "loss": 2.9778435230255127, "step": 7077, "token_acc": 0.300032694797923 }, { "epoch": 4.148929932571093, "grad_norm": 0.22916105769679446, "learning_rate": 0.0002801237438071797, "loss": 2.9443044662475586, "step": 7078, "token_acc": 0.30466865590950676 }, { "epoch": 4.149516270888302, "grad_norm": 0.23011379946959531, "learning_rate": 0.0002801165111780148, "loss": 2.973602294921875, "step": 7079, "token_acc": 0.3030043830521982 }, { "epoch": 4.150102609205511, "grad_norm": 0.23387135508406948, "learning_rate": 0.00028010927732658066, "loss": 2.9712934494018555, "step": 7080, "token_acc": 0.302414244594123 }, { "epoch": 4.15068894752272, "grad_norm": 0.25445804985885206, "learning_rate": 0.00028010204225294513, "loss": 2.9957268238067627, "step": 7081, "token_acc": 0.29876220560509625 }, { "epoch": 4.1512752858399296, "grad_norm": 0.24829901991419118, "learning_rate": 0.00028009480595717626, "loss": 2.930509567260742, "step": 7082, "token_acc": 0.3104601387598959 }, { "epoch": 4.151861624157139, "grad_norm": 0.22691444173188832, "learning_rate": 0.000280087568439342, "loss": 2.972425699234009, "step": 7083, "token_acc": 0.3011443682451383 }, { "epoch": 4.152447962474348, "grad_norm": 0.25637480601914125, "learning_rate": 0.00028008032969951025, "loss": 2.952868938446045, "step": 7084, "token_acc": 0.304035063890253 }, { "epoch": 4.153034300791557, "grad_norm": 0.24183091062628748, "learning_rate": 0.0002800730897377492, "loss": 2.972439765930176, "step": 7085, "token_acc": 0.3035490733700452 }, { "epoch": 4.153620639108766, "grad_norm": 0.2392878230619902, "learning_rate": 0.0002800658485541267, "loss": 2.9905807971954346, "step": 7086, "token_acc": 0.3005035913950329 }, { "epoch": 4.154206977425975, "grad_norm": 0.2444217927559297, "learning_rate": 0.0002800586061487108, "loss": 2.925438642501831, "step": 7087, "token_acc": 0.3091620129800125 }, { "epoch": 4.154793315743184, "grad_norm": 0.21797320716174756, "learning_rate": 0.00028005136252156953, "loss": 2.952061891555786, "step": 7088, "token_acc": 0.30380722891566264 }, { "epoch": 4.1553796540603924, "grad_norm": 0.23439335581191156, "learning_rate": 0.000280044117672771, "loss": 2.9105563163757324, "step": 7089, "token_acc": 0.3107666005318924 }, { "epoch": 4.155965992377602, "grad_norm": 0.240632879032838, "learning_rate": 0.0002800368716023832, "loss": 2.947885513305664, "step": 7090, "token_acc": 0.304783145233106 }, { "epoch": 4.156552330694811, "grad_norm": 0.244042502175311, "learning_rate": 0.00028002962431047425, "loss": 2.937822103500366, "step": 7091, "token_acc": 0.3075243479294802 }, { "epoch": 4.15713866901202, "grad_norm": 0.26747605260300134, "learning_rate": 0.0002800223757971122, "loss": 2.9654717445373535, "step": 7092, "token_acc": 0.30359673409942817 }, { "epoch": 4.157725007329229, "grad_norm": 0.23634852609128923, "learning_rate": 0.0002800151260623651, "loss": 2.937488317489624, "step": 7093, "token_acc": 0.30765783825853166 }, { "epoch": 4.158311345646438, "grad_norm": 0.23847859466719065, "learning_rate": 0.0002800078751063011, "loss": 2.989058017730713, "step": 7094, "token_acc": 0.3001433581315918 }, { "epoch": 4.158897683963647, "grad_norm": 0.26843347481760865, "learning_rate": 0.0002800006229289883, "loss": 2.9836111068725586, "step": 7095, "token_acc": 0.30146643305576987 }, { "epoch": 4.159484022280856, "grad_norm": 0.2221002321510319, "learning_rate": 0.00027999336953049483, "loss": 2.9927072525024414, "step": 7096, "token_acc": 0.2994058913521783 }, { "epoch": 4.160070360598065, "grad_norm": 0.22148263504205806, "learning_rate": 0.00027998611491088883, "loss": 2.95967698097229, "step": 7097, "token_acc": 0.30393760878996806 }, { "epoch": 4.1606566989152745, "grad_norm": 0.2472598839608634, "learning_rate": 0.0002799788590702384, "loss": 2.9445595741271973, "step": 7098, "token_acc": 0.3066633844240312 }, { "epoch": 4.161243037232484, "grad_norm": 0.2306243136796015, "learning_rate": 0.00027997160200861175, "loss": 2.964796543121338, "step": 7099, "token_acc": 0.30244676145982136 }, { "epoch": 4.161829375549692, "grad_norm": 0.2547094045215424, "learning_rate": 0.00027996434372607707, "loss": 2.9427552223205566, "step": 7100, "token_acc": 0.30708439904299056 }, { "epoch": 4.162415713866901, "grad_norm": 0.23370596441884453, "learning_rate": 0.0002799570842227025, "loss": 2.956815481185913, "step": 7101, "token_acc": 0.3044716817506903 }, { "epoch": 4.16300205218411, "grad_norm": 0.2429097091197528, "learning_rate": 0.0002799498234985562, "loss": 2.949885606765747, "step": 7102, "token_acc": 0.3063343717549325 }, { "epoch": 4.163588390501319, "grad_norm": 0.2475838052855693, "learning_rate": 0.00027994256155370646, "loss": 2.9774158000946045, "step": 7103, "token_acc": 0.3022645376536852 }, { "epoch": 4.164174728818528, "grad_norm": 0.2547814826972486, "learning_rate": 0.0002799352983882215, "loss": 2.9701404571533203, "step": 7104, "token_acc": 0.303403080862684 }, { "epoch": 4.164761067135737, "grad_norm": 0.24521281986178634, "learning_rate": 0.00027992803400216944, "loss": 2.9859836101531982, "step": 7105, "token_acc": 0.29887775591955723 }, { "epoch": 4.1653474054529465, "grad_norm": 0.2647998910863689, "learning_rate": 0.0002799207683956186, "loss": 2.954998731613159, "step": 7106, "token_acc": 0.30600121014320025 }, { "epoch": 4.165933743770156, "grad_norm": 0.2629432528514209, "learning_rate": 0.00027991350156863717, "loss": 2.9690797328948975, "step": 7107, "token_acc": 0.3027115677261151 }, { "epoch": 4.166520082087365, "grad_norm": 0.24680571741192905, "learning_rate": 0.00027990623352129346, "loss": 2.9474008083343506, "step": 7108, "token_acc": 0.30641482427325434 }, { "epoch": 4.167106420404574, "grad_norm": 0.23644255257035737, "learning_rate": 0.00027989896425365576, "loss": 2.9576873779296875, "step": 7109, "token_acc": 0.30443801056815334 }, { "epoch": 4.167692758721783, "grad_norm": 0.2619561570670442, "learning_rate": 0.00027989169376579237, "loss": 2.9569711685180664, "step": 7110, "token_acc": 0.3042408353488225 }, { "epoch": 4.168279097038991, "grad_norm": 0.21634723956185892, "learning_rate": 0.0002798844220577715, "loss": 2.9592514038085938, "step": 7111, "token_acc": 0.30343157900176476 }, { "epoch": 4.1688654353562, "grad_norm": 0.26147269816837365, "learning_rate": 0.0002798771491296615, "loss": 2.969801425933838, "step": 7112, "token_acc": 0.30307761332603717 }, { "epoch": 4.169451773673409, "grad_norm": 0.21805415342833442, "learning_rate": 0.0002798698749815307, "loss": 2.9321141242980957, "step": 7113, "token_acc": 0.3085903164853442 }, { "epoch": 4.1700381119906185, "grad_norm": 0.25158614798935675, "learning_rate": 0.0002798625996134475, "loss": 2.9781296253204346, "step": 7114, "token_acc": 0.3012214455483732 }, { "epoch": 4.170624450307828, "grad_norm": 0.21055065657956562, "learning_rate": 0.0002798553230254801, "loss": 2.94368052482605, "step": 7115, "token_acc": 0.3075166462977791 }, { "epoch": 4.171210788625037, "grad_norm": 0.23576910755775563, "learning_rate": 0.0002798480452176969, "loss": 2.9640750885009766, "step": 7116, "token_acc": 0.30338828286781133 }, { "epoch": 4.171797126942246, "grad_norm": 0.235043132161141, "learning_rate": 0.00027984076619016633, "loss": 2.970208168029785, "step": 7117, "token_acc": 0.3022410455614306 }, { "epoch": 4.172383465259455, "grad_norm": 0.21938000103478963, "learning_rate": 0.0002798334859429567, "loss": 2.9788246154785156, "step": 7118, "token_acc": 0.3015351227140714 }, { "epoch": 4.172969803576664, "grad_norm": 0.2322315098196772, "learning_rate": 0.00027982620447613644, "loss": 2.9677741527557373, "step": 7119, "token_acc": 0.30360736076853173 }, { "epoch": 4.173556141893873, "grad_norm": 0.22000374386222343, "learning_rate": 0.00027981892178977394, "loss": 2.986588954925537, "step": 7120, "token_acc": 0.30002392929413785 }, { "epoch": 4.174142480211081, "grad_norm": 0.2260902705951297, "learning_rate": 0.0002798116378839376, "loss": 2.972031593322754, "step": 7121, "token_acc": 0.30150214867551794 }, { "epoch": 4.1747288185282905, "grad_norm": 0.2227465686895031, "learning_rate": 0.0002798043527586958, "loss": 2.927219867706299, "step": 7122, "token_acc": 0.3094250920566751 }, { "epoch": 4.1753151568455, "grad_norm": 0.21935979084719215, "learning_rate": 0.0002797970664141171, "loss": 2.9706227779388428, "step": 7123, "token_acc": 0.30268446553733735 }, { "epoch": 4.175901495162709, "grad_norm": 0.2204873715575863, "learning_rate": 0.00027978977885026983, "loss": 2.945462703704834, "step": 7124, "token_acc": 0.3064844391961385 }, { "epoch": 4.176487833479918, "grad_norm": 0.22956624948708773, "learning_rate": 0.00027978249006722244, "loss": 2.9830143451690674, "step": 7125, "token_acc": 0.30172838609614017 }, { "epoch": 4.177074171797127, "grad_norm": 0.21350312543426075, "learning_rate": 0.0002797752000650435, "loss": 2.9473366737365723, "step": 7126, "token_acc": 0.30502939255388634 }, { "epoch": 4.177660510114336, "grad_norm": 0.23924067253008593, "learning_rate": 0.0002797679088438014, "loss": 2.9383702278137207, "step": 7127, "token_acc": 0.3086238339848964 }, { "epoch": 4.178246848431545, "grad_norm": 0.22888709026859433, "learning_rate": 0.0002797606164035647, "loss": 2.967724323272705, "step": 7128, "token_acc": 0.30075341123567695 }, { "epoch": 4.178833186748754, "grad_norm": 0.22299672441223056, "learning_rate": 0.0002797533227444018, "loss": 2.9012012481689453, "step": 7129, "token_acc": 0.3134994489384267 }, { "epoch": 4.179419525065963, "grad_norm": 0.2267764326101299, "learning_rate": 0.0002797460278663813, "loss": 2.9398555755615234, "step": 7130, "token_acc": 0.30675164920241504 }, { "epoch": 4.1800058633831725, "grad_norm": 0.24350531200783265, "learning_rate": 0.00027973873176957167, "loss": 2.9520580768585205, "step": 7131, "token_acc": 0.30463218358119826 }, { "epoch": 4.180592201700381, "grad_norm": 0.24714987235594757, "learning_rate": 0.0002797314344540415, "loss": 2.9722585678100586, "step": 7132, "token_acc": 0.3023703838365475 }, { "epoch": 4.18117854001759, "grad_norm": 0.2527182244850833, "learning_rate": 0.00027972413591985937, "loss": 2.9668326377868652, "step": 7133, "token_acc": 0.3039479054197168 }, { "epoch": 4.181764878334799, "grad_norm": 0.22874539615960185, "learning_rate": 0.00027971683616709374, "loss": 2.9069981575012207, "step": 7134, "token_acc": 0.31072032639991737 }, { "epoch": 4.182351216652008, "grad_norm": 0.22820477298100342, "learning_rate": 0.0002797095351958133, "loss": 2.9717440605163574, "step": 7135, "token_acc": 0.3011552724780373 }, { "epoch": 4.182937554969217, "grad_norm": 0.2373630390894112, "learning_rate": 0.00027970223300608643, "loss": 2.9145684242248535, "step": 7136, "token_acc": 0.3091124373460199 }, { "epoch": 4.183523893286426, "grad_norm": 0.22602484445093982, "learning_rate": 0.00027969492959798196, "loss": 2.9773716926574707, "step": 7137, "token_acc": 0.3019175021672046 }, { "epoch": 4.184110231603635, "grad_norm": 0.22848192729257139, "learning_rate": 0.00027968762497156835, "loss": 2.9658541679382324, "step": 7138, "token_acc": 0.3032980463631827 }, { "epoch": 4.1846965699208445, "grad_norm": 0.24610876966895218, "learning_rate": 0.0002796803191269143, "loss": 2.932302474975586, "step": 7139, "token_acc": 0.3081606760838522 }, { "epoch": 4.185282908238054, "grad_norm": 0.23334521099448174, "learning_rate": 0.00027967301206408837, "loss": 2.9689481258392334, "step": 7140, "token_acc": 0.30098418101985464 }, { "epoch": 4.185869246555263, "grad_norm": 0.25264448409772233, "learning_rate": 0.00027966570378315926, "loss": 2.965956449508667, "step": 7141, "token_acc": 0.3024810880727472 }, { "epoch": 4.186455584872472, "grad_norm": 0.22368728813867814, "learning_rate": 0.00027965839428419553, "loss": 2.9376280307769775, "step": 7142, "token_acc": 0.30766104554854773 }, { "epoch": 4.18704192318968, "grad_norm": 0.23440779366134426, "learning_rate": 0.0002796510835672659, "loss": 2.963715076446533, "step": 7143, "token_acc": 0.3031145921018922 }, { "epoch": 4.187628261506889, "grad_norm": 0.2569461271408038, "learning_rate": 0.00027964377163243914, "loss": 2.9298667907714844, "step": 7144, "token_acc": 0.3081967213114754 }, { "epoch": 4.188214599824098, "grad_norm": 0.2146023937850612, "learning_rate": 0.00027963645847978375, "loss": 2.9771406650543213, "step": 7145, "token_acc": 0.30071813744281733 }, { "epoch": 4.188800938141307, "grad_norm": 0.23064107450841975, "learning_rate": 0.0002796291441093686, "loss": 2.93847393989563, "step": 7146, "token_acc": 0.30797406299517727 }, { "epoch": 4.1893872764585165, "grad_norm": 0.21873435236853192, "learning_rate": 0.0002796218285212622, "loss": 3.0000576972961426, "step": 7147, "token_acc": 0.2975899149083081 }, { "epoch": 4.189973614775726, "grad_norm": 0.23362462088238492, "learning_rate": 0.0002796145117155335, "loss": 2.945387363433838, "step": 7148, "token_acc": 0.3074763911532191 }, { "epoch": 4.190559953092935, "grad_norm": 0.21900884207080107, "learning_rate": 0.00027960719369225106, "loss": 2.9527676105499268, "step": 7149, "token_acc": 0.3034883254164715 }, { "epoch": 4.191146291410144, "grad_norm": 0.23252562681536362, "learning_rate": 0.0002795998744514837, "loss": 2.9716498851776123, "step": 7150, "token_acc": 0.30208920486571994 }, { "epoch": 4.191732629727353, "grad_norm": 0.2613067061748528, "learning_rate": 0.0002795925539933002, "loss": 2.989401340484619, "step": 7151, "token_acc": 0.29963326524535183 }, { "epoch": 4.192318968044562, "grad_norm": 0.2459518909128689, "learning_rate": 0.0002795852323177692, "loss": 2.9221770763397217, "step": 7152, "token_acc": 0.3112821097825298 }, { "epoch": 4.192905306361771, "grad_norm": 0.2205318046437699, "learning_rate": 0.00027957790942495964, "loss": 3.0039522647857666, "step": 7153, "token_acc": 0.296598592721575 }, { "epoch": 4.193491644678979, "grad_norm": 0.24371516259060283, "learning_rate": 0.0002795705853149402, "loss": 3.002087354660034, "step": 7154, "token_acc": 0.29926761143504393 }, { "epoch": 4.1940779829961885, "grad_norm": 0.23099993737736058, "learning_rate": 0.0002795632599877797, "loss": 2.9613800048828125, "step": 7155, "token_acc": 0.3060259817345438 }, { "epoch": 4.194664321313398, "grad_norm": 0.21666149844607271, "learning_rate": 0.000279555933443547, "loss": 2.9645919799804688, "step": 7156, "token_acc": 0.30355589824268997 }, { "epoch": 4.195250659630607, "grad_norm": 0.22560474757390725, "learning_rate": 0.0002795486056823108, "loss": 2.93660306930542, "step": 7157, "token_acc": 0.307644438195372 }, { "epoch": 4.195836997947816, "grad_norm": 0.2143182202227415, "learning_rate": 0.0002795412767041401, "loss": 2.9247260093688965, "step": 7158, "token_acc": 0.308627515462744 }, { "epoch": 4.196423336265025, "grad_norm": 0.24378449410246744, "learning_rate": 0.0002795339465091036, "loss": 2.9704389572143555, "step": 7159, "token_acc": 0.3022703013694482 }, { "epoch": 4.197009674582234, "grad_norm": 0.2393462724897528, "learning_rate": 0.00027952661509727026, "loss": 2.9679412841796875, "step": 7160, "token_acc": 0.30400949478681466 }, { "epoch": 4.197596012899443, "grad_norm": 0.25074212708065247, "learning_rate": 0.0002795192824687089, "loss": 2.940260410308838, "step": 7161, "token_acc": 0.3066548210955075 }, { "epoch": 4.198182351216652, "grad_norm": 0.2055295308303941, "learning_rate": 0.00027951194862348844, "loss": 2.965881109237671, "step": 7162, "token_acc": 0.30385203098632724 }, { "epoch": 4.198768689533861, "grad_norm": 0.2795800503669608, "learning_rate": 0.00027950461356167773, "loss": 2.928539276123047, "step": 7163, "token_acc": 0.3080695414419093 }, { "epoch": 4.19935502785107, "grad_norm": 0.2574245167648106, "learning_rate": 0.0002794972772833456, "loss": 2.9717133045196533, "step": 7164, "token_acc": 0.3018946823445076 }, { "epoch": 4.199941366168279, "grad_norm": 0.20455830980264006, "learning_rate": 0.0002794899397885612, "loss": 2.9435808658599854, "step": 7165, "token_acc": 0.30615077803143614 }, { "epoch": 4.200527704485488, "grad_norm": 0.26263796607918494, "learning_rate": 0.0002794826010773932, "loss": 2.929746150970459, "step": 7166, "token_acc": 0.30981280113741083 }, { "epoch": 4.201114042802697, "grad_norm": 0.2161661501522452, "learning_rate": 0.00027947526114991073, "loss": 2.9303746223449707, "step": 7167, "token_acc": 0.3091720624520303 }, { "epoch": 4.201700381119906, "grad_norm": 0.2522378803582865, "learning_rate": 0.0002794679200061825, "loss": 3.014087677001953, "step": 7168, "token_acc": 0.29651893819850417 }, { "epoch": 4.202286719437115, "grad_norm": 0.24439679799342348, "learning_rate": 0.00027946057764627775, "loss": 2.906731605529785, "step": 7169, "token_acc": 0.3118171523773575 }, { "epoch": 4.202873057754324, "grad_norm": 0.22583642152211197, "learning_rate": 0.0002794532340702653, "loss": 2.937023162841797, "step": 7170, "token_acc": 0.3073085429203312 }, { "epoch": 4.203459396071533, "grad_norm": 0.23110355674714725, "learning_rate": 0.00027944588927821413, "loss": 2.9656410217285156, "step": 7171, "token_acc": 0.30344160906945383 }, { "epoch": 4.2040457343887425, "grad_norm": 0.2502207298018072, "learning_rate": 0.0002794385432701933, "loss": 2.938009262084961, "step": 7172, "token_acc": 0.308593295770479 }, { "epoch": 4.204632072705952, "grad_norm": 0.24808591023320029, "learning_rate": 0.00027943119604627173, "loss": 2.981632709503174, "step": 7173, "token_acc": 0.3021054006016953 }, { "epoch": 4.205218411023161, "grad_norm": 0.23886907557550716, "learning_rate": 0.0002794238476065185, "loss": 2.975621461868286, "step": 7174, "token_acc": 0.3005077746724803 }, { "epoch": 4.205804749340369, "grad_norm": 0.22924291178181072, "learning_rate": 0.00027941649795100264, "loss": 2.9458160400390625, "step": 7175, "token_acc": 0.3059799574981215 }, { "epoch": 4.206391087657578, "grad_norm": 0.2227007321915543, "learning_rate": 0.00027940914707979316, "loss": 2.951711654663086, "step": 7176, "token_acc": 0.30477109006945124 }, { "epoch": 4.206977425974787, "grad_norm": 0.2300017522391354, "learning_rate": 0.00027940179499295914, "loss": 2.935321807861328, "step": 7177, "token_acc": 0.30842687477212605 }, { "epoch": 4.207563764291996, "grad_norm": 0.2342032710115526, "learning_rate": 0.0002793944416905696, "loss": 2.9410626888275146, "step": 7178, "token_acc": 0.3066074361048511 }, { "epoch": 4.208150102609205, "grad_norm": 0.23937518119630974, "learning_rate": 0.0002793870871726936, "loss": 2.9870901107788086, "step": 7179, "token_acc": 0.2996194736228443 }, { "epoch": 4.2087364409264145, "grad_norm": 0.2435238510899763, "learning_rate": 0.0002793797314394004, "loss": 2.9651312828063965, "step": 7180, "token_acc": 0.30357758286735614 }, { "epoch": 4.209322779243624, "grad_norm": 0.24063754098713017, "learning_rate": 0.0002793723744907589, "loss": 2.937845468521118, "step": 7181, "token_acc": 0.3066822166331961 }, { "epoch": 4.209909117560833, "grad_norm": 0.2336205600179901, "learning_rate": 0.0002793650163268382, "loss": 2.944732904434204, "step": 7182, "token_acc": 0.3071571702043063 }, { "epoch": 4.210495455878042, "grad_norm": 0.22337784377271147, "learning_rate": 0.00027935765694770754, "loss": 2.9650626182556152, "step": 7183, "token_acc": 0.30338061081578804 }, { "epoch": 4.211081794195251, "grad_norm": 0.22940496114069442, "learning_rate": 0.000279350296353436, "loss": 2.954596757888794, "step": 7184, "token_acc": 0.30548322627440133 }, { "epoch": 4.21166813251246, "grad_norm": 0.2368833138661059, "learning_rate": 0.0002793429345440928, "loss": 2.9842207431793213, "step": 7185, "token_acc": 0.2995793049385942 }, { "epoch": 4.212254470829668, "grad_norm": 0.2410989701531458, "learning_rate": 0.00027933557151974697, "loss": 2.9854331016540527, "step": 7186, "token_acc": 0.300787216242699 }, { "epoch": 4.212840809146877, "grad_norm": 0.2195840763549594, "learning_rate": 0.0002793282072804677, "loss": 2.977243423461914, "step": 7187, "token_acc": 0.30053291360727385 }, { "epoch": 4.2134271474640865, "grad_norm": 0.23243841516018374, "learning_rate": 0.00027932084182632425, "loss": 3.0346317291259766, "step": 7188, "token_acc": 0.2930134274582094 }, { "epoch": 4.214013485781296, "grad_norm": 0.2283962479897357, "learning_rate": 0.0002793134751573857, "loss": 2.9303455352783203, "step": 7189, "token_acc": 0.309084792464316 }, { "epoch": 4.214599824098505, "grad_norm": 0.23880408473174367, "learning_rate": 0.0002793061072737213, "loss": 2.9933369159698486, "step": 7190, "token_acc": 0.2982695983493462 }, { "epoch": 4.215186162415714, "grad_norm": 0.22206798767023753, "learning_rate": 0.0002792987381754003, "loss": 2.921048641204834, "step": 7191, "token_acc": 0.310069056367941 }, { "epoch": 4.215772500732923, "grad_norm": 0.2548641533782326, "learning_rate": 0.00027929136786249186, "loss": 2.933879852294922, "step": 7192, "token_acc": 0.30738909725072855 }, { "epoch": 4.216358839050132, "grad_norm": 0.22598315336124358, "learning_rate": 0.00027928399633506525, "loss": 2.967158317565918, "step": 7193, "token_acc": 0.303506335929188 }, { "epoch": 4.216945177367341, "grad_norm": 0.24078647759021363, "learning_rate": 0.00027927662359318974, "loss": 2.9204459190368652, "step": 7194, "token_acc": 0.3096900270940819 }, { "epoch": 4.21753151568455, "grad_norm": 0.24184321318659951, "learning_rate": 0.0002792692496369345, "loss": 3.000326633453369, "step": 7195, "token_acc": 0.2992154433777227 }, { "epoch": 4.218117854001759, "grad_norm": 0.23464220067362695, "learning_rate": 0.00027926187446636894, "loss": 2.9488885402679443, "step": 7196, "token_acc": 0.30547350398346457 }, { "epoch": 4.218704192318968, "grad_norm": 0.2607214459305649, "learning_rate": 0.00027925449808156215, "loss": 3.009669065475464, "step": 7197, "token_acc": 0.2956722827882392 }, { "epoch": 4.219290530636177, "grad_norm": 0.23777982058184158, "learning_rate": 0.00027924712048258354, "loss": 2.942624092102051, "step": 7198, "token_acc": 0.306084282806813 }, { "epoch": 4.219876868953386, "grad_norm": 0.23116310031305282, "learning_rate": 0.0002792397416695025, "loss": 3.0218467712402344, "step": 7199, "token_acc": 0.2979693547531944 }, { "epoch": 4.220463207270595, "grad_norm": 0.22967455758778085, "learning_rate": 0.0002792323616423881, "loss": 2.9594767093658447, "step": 7200, "token_acc": 0.3034323075563779 }, { "epoch": 4.221049545587804, "grad_norm": 0.22284893708568565, "learning_rate": 0.00027922498040130984, "loss": 2.9530961513519287, "step": 7201, "token_acc": 0.3041579244944278 }, { "epoch": 4.221635883905013, "grad_norm": 0.21244358462659857, "learning_rate": 0.0002792175979463371, "loss": 2.9933292865753174, "step": 7202, "token_acc": 0.30087169315981566 }, { "epoch": 4.222222222222222, "grad_norm": 0.23414613721805977, "learning_rate": 0.0002792102142775391, "loss": 3.0051686763763428, "step": 7203, "token_acc": 0.29778418437094506 }, { "epoch": 4.222808560539431, "grad_norm": 0.22866065510903766, "learning_rate": 0.00027920282939498524, "loss": 2.934781074523926, "step": 7204, "token_acc": 0.3078724684178865 }, { "epoch": 4.2233948988566405, "grad_norm": 0.21421220293720977, "learning_rate": 0.00027919544329874487, "loss": 2.9708244800567627, "step": 7205, "token_acc": 0.3035659154059895 }, { "epoch": 4.22398123717385, "grad_norm": 0.21253943761023864, "learning_rate": 0.00027918805598888745, "loss": 2.967909097671509, "step": 7206, "token_acc": 0.30134420280595947 }, { "epoch": 4.224567575491059, "grad_norm": 0.22766300462921712, "learning_rate": 0.0002791806674654823, "loss": 2.9439306259155273, "step": 7207, "token_acc": 0.3073599256295135 }, { "epoch": 4.225153913808267, "grad_norm": 0.25491453230249544, "learning_rate": 0.00027917327772859887, "loss": 2.9557056427001953, "step": 7208, "token_acc": 0.3049779122725648 }, { "epoch": 4.225740252125476, "grad_norm": 0.2573004461323963, "learning_rate": 0.0002791658867783066, "loss": 2.977468490600586, "step": 7209, "token_acc": 0.3020123432297224 }, { "epoch": 4.226326590442685, "grad_norm": 0.2215120293760408, "learning_rate": 0.0002791584946146748, "loss": 3.0005874633789062, "step": 7210, "token_acc": 0.29880392038076475 }, { "epoch": 4.226912928759894, "grad_norm": 0.24115393281733802, "learning_rate": 0.00027915110123777305, "loss": 2.941826343536377, "step": 7211, "token_acc": 0.3061504545988182 }, { "epoch": 4.227499267077103, "grad_norm": 0.23730474695811699, "learning_rate": 0.0002791437066476707, "loss": 2.9924192428588867, "step": 7212, "token_acc": 0.2998862739395954 }, { "epoch": 4.2280856053943126, "grad_norm": 0.22845052258412402, "learning_rate": 0.00027913631084443725, "loss": 3.0223019123077393, "step": 7213, "token_acc": 0.29430885224753484 }, { "epoch": 4.228671943711522, "grad_norm": 0.24559478199437917, "learning_rate": 0.00027912891382814224, "loss": 2.9465341567993164, "step": 7214, "token_acc": 0.305107394866758 }, { "epoch": 4.229258282028731, "grad_norm": 0.2209345299661073, "learning_rate": 0.00027912151559885497, "loss": 2.99446964263916, "step": 7215, "token_acc": 0.2982912379326657 }, { "epoch": 4.22984462034594, "grad_norm": 0.2436864376958213, "learning_rate": 0.00027911411615664513, "loss": 2.934150218963623, "step": 7216, "token_acc": 0.30776252456857206 }, { "epoch": 4.230430958663149, "grad_norm": 0.22593124510100204, "learning_rate": 0.00027910671550158213, "loss": 2.99104905128479, "step": 7217, "token_acc": 0.29994016169075055 }, { "epoch": 4.231017296980358, "grad_norm": 0.22568673634368636, "learning_rate": 0.0002790993136337355, "loss": 2.9484798908233643, "step": 7218, "token_acc": 0.3072376564076428 }, { "epoch": 4.231603635297566, "grad_norm": 0.2303993207447669, "learning_rate": 0.0002790919105531748, "loss": 2.9428210258483887, "step": 7219, "token_acc": 0.30737112087790025 }, { "epoch": 4.2321899736147754, "grad_norm": 0.23764369171518032, "learning_rate": 0.0002790845062599696, "loss": 2.9334921836853027, "step": 7220, "token_acc": 0.3069261854826334 }, { "epoch": 4.232776311931985, "grad_norm": 0.24173926010601354, "learning_rate": 0.0002790771007541893, "loss": 2.939941883087158, "step": 7221, "token_acc": 0.3068294808926423 }, { "epoch": 4.233362650249194, "grad_norm": 0.2181224970387667, "learning_rate": 0.0002790696940359037, "loss": 3.00750732421875, "step": 7222, "token_acc": 0.2968870361140061 }, { "epoch": 4.233948988566403, "grad_norm": 0.22004373046188294, "learning_rate": 0.00027906228610518214, "loss": 2.9417672157287598, "step": 7223, "token_acc": 0.30669800545696824 }, { "epoch": 4.234535326883612, "grad_norm": 0.2273454545502435, "learning_rate": 0.0002790548769620944, "loss": 2.986320972442627, "step": 7224, "token_acc": 0.3006915035351275 }, { "epoch": 4.235121665200821, "grad_norm": 0.223823389661935, "learning_rate": 0.0002790474666067099, "loss": 3.0056748390197754, "step": 7225, "token_acc": 0.2976590734859271 }, { "epoch": 4.23570800351803, "grad_norm": 0.24702666535264076, "learning_rate": 0.0002790400550390984, "loss": 2.9845900535583496, "step": 7226, "token_acc": 0.2994404467947563 }, { "epoch": 4.236294341835239, "grad_norm": 0.2642517644378435, "learning_rate": 0.0002790326422593295, "loss": 2.981661796569824, "step": 7227, "token_acc": 0.3007371138135661 }, { "epoch": 4.236880680152448, "grad_norm": 0.23498001984855038, "learning_rate": 0.0002790252282674727, "loss": 2.9488978385925293, "step": 7228, "token_acc": 0.3051641416870178 }, { "epoch": 4.237467018469657, "grad_norm": 0.2124376175250313, "learning_rate": 0.00027901781306359784, "loss": 2.955808639526367, "step": 7229, "token_acc": 0.3039293625494933 }, { "epoch": 4.238053356786866, "grad_norm": 0.2247504235429204, "learning_rate": 0.00027901039664777447, "loss": 2.962716817855835, "step": 7230, "token_acc": 0.30221665947802523 }, { "epoch": 4.238639695104075, "grad_norm": 0.22354858818037412, "learning_rate": 0.00027900297902007224, "loss": 2.996952533721924, "step": 7231, "token_acc": 0.30008518362245346 }, { "epoch": 4.239226033421284, "grad_norm": 0.23492827246659842, "learning_rate": 0.0002789955601805609, "loss": 2.987431526184082, "step": 7232, "token_acc": 0.3002526992878475 }, { "epoch": 4.239812371738493, "grad_norm": 0.23520890514516835, "learning_rate": 0.00027898814012931, "loss": 3.0119194984436035, "step": 7233, "token_acc": 0.29662859165010363 }, { "epoch": 4.240398710055702, "grad_norm": 0.23379002750282327, "learning_rate": 0.0002789807188663894, "loss": 2.95747447013855, "step": 7234, "token_acc": 0.30488665361578415 }, { "epoch": 4.240985048372911, "grad_norm": 0.23088098275481278, "learning_rate": 0.00027897329639186874, "loss": 3.0047826766967773, "step": 7235, "token_acc": 0.2974579828001486 }, { "epoch": 4.24157138669012, "grad_norm": 0.22960480116811235, "learning_rate": 0.00027896587270581776, "loss": 2.9464821815490723, "step": 7236, "token_acc": 0.30496864716104405 }, { "epoch": 4.2421577250073295, "grad_norm": 0.21414973851746352, "learning_rate": 0.00027895844780830616, "loss": 2.9867663383483887, "step": 7237, "token_acc": 0.29981873587978775 }, { "epoch": 4.242744063324539, "grad_norm": 0.2164449220096112, "learning_rate": 0.00027895102169940377, "loss": 2.9728453159332275, "step": 7238, "token_acc": 0.30284858364737266 }, { "epoch": 4.243330401641748, "grad_norm": 0.2225356668681225, "learning_rate": 0.00027894359437918024, "loss": 2.9414124488830566, "step": 7239, "token_acc": 0.30824104479213615 }, { "epoch": 4.243916739958956, "grad_norm": 0.2284371079919919, "learning_rate": 0.00027893616584770544, "loss": 3.0008625984191895, "step": 7240, "token_acc": 0.29724459425151384 }, { "epoch": 4.244503078276165, "grad_norm": 0.22869310209195204, "learning_rate": 0.00027892873610504905, "loss": 2.9359726905822754, "step": 7241, "token_acc": 0.30582687348291393 }, { "epoch": 4.245089416593374, "grad_norm": 0.23148401445140004, "learning_rate": 0.000278921305151281, "loss": 2.964212656021118, "step": 7242, "token_acc": 0.3033599232791892 }, { "epoch": 4.245675754910583, "grad_norm": 0.22815844596200105, "learning_rate": 0.00027891387298647097, "loss": 2.9286434650421143, "step": 7243, "token_acc": 0.3087061597720895 }, { "epoch": 4.246262093227792, "grad_norm": 0.22207631598123453, "learning_rate": 0.00027890643961068877, "loss": 2.952810287475586, "step": 7244, "token_acc": 0.3050997177411047 }, { "epoch": 4.2468484315450015, "grad_norm": 0.2661146352574905, "learning_rate": 0.00027889900502400437, "loss": 2.986868381500244, "step": 7245, "token_acc": 0.3014544516234521 }, { "epoch": 4.247434769862211, "grad_norm": 0.23686821287018786, "learning_rate": 0.0002788915692264875, "loss": 2.932485342025757, "step": 7246, "token_acc": 0.30744662968566144 }, { "epoch": 4.24802110817942, "grad_norm": 0.20720182121010983, "learning_rate": 0.0002788841322182079, "loss": 2.9421558380126953, "step": 7247, "token_acc": 0.30634722113655904 }, { "epoch": 4.248607446496629, "grad_norm": 0.23844848148599096, "learning_rate": 0.00027887669399923563, "loss": 2.964144706726074, "step": 7248, "token_acc": 0.30308403152306806 }, { "epoch": 4.249193784813838, "grad_norm": 0.23823319275182203, "learning_rate": 0.0002788692545696405, "loss": 2.984557628631592, "step": 7249, "token_acc": 0.30038957020330115 }, { "epoch": 4.249780123131047, "grad_norm": 0.21820367909968633, "learning_rate": 0.00027886181392949237, "loss": 2.9552369117736816, "step": 7250, "token_acc": 0.30631165800360666 }, { "epoch": 4.250366461448255, "grad_norm": 0.23392373874309083, "learning_rate": 0.00027885437207886114, "loss": 2.981729030609131, "step": 7251, "token_acc": 0.30055999457228 }, { "epoch": 4.250952799765464, "grad_norm": 0.24395941239362548, "learning_rate": 0.0002788469290178167, "loss": 2.9653823375701904, "step": 7252, "token_acc": 0.3043243330430014 }, { "epoch": 4.2515391380826735, "grad_norm": 0.20863186148278146, "learning_rate": 0.00027883948474642894, "loss": 2.9317467212677, "step": 7253, "token_acc": 0.30933163464800184 }, { "epoch": 4.252125476399883, "grad_norm": 0.2286143433917573, "learning_rate": 0.00027883203926476794, "loss": 2.953239917755127, "step": 7254, "token_acc": 0.30279217697509353 }, { "epoch": 4.252711814717092, "grad_norm": 0.22551397624086614, "learning_rate": 0.0002788245925729035, "loss": 2.9629886150360107, "step": 7255, "token_acc": 0.30401657474623883 }, { "epoch": 4.253298153034301, "grad_norm": 0.22584383182463144, "learning_rate": 0.00027881714467090557, "loss": 2.9899630546569824, "step": 7256, "token_acc": 0.29995232162095953 }, { "epoch": 4.25388449135151, "grad_norm": 0.2083256029583409, "learning_rate": 0.00027880969555884417, "loss": 2.937809467315674, "step": 7257, "token_acc": 0.3077331902040645 }, { "epoch": 4.254470829668719, "grad_norm": 0.20224647510583466, "learning_rate": 0.00027880224523678924, "loss": 2.9497933387756348, "step": 7258, "token_acc": 0.30434684458713757 }, { "epoch": 4.255057167985928, "grad_norm": 0.2148677563268644, "learning_rate": 0.0002787947937048108, "loss": 2.922703981399536, "step": 7259, "token_acc": 0.3098799733838566 }, { "epoch": 4.255643506303137, "grad_norm": 0.23026476398375495, "learning_rate": 0.00027878734096297884, "loss": 2.993525505065918, "step": 7260, "token_acc": 0.3003125417530108 }, { "epoch": 4.256229844620346, "grad_norm": 0.20487611822643104, "learning_rate": 0.00027877988701136333, "loss": 2.955353260040283, "step": 7261, "token_acc": 0.30489917629579855 }, { "epoch": 4.256816182937555, "grad_norm": 0.22828713198687642, "learning_rate": 0.0002787724318500343, "loss": 2.949578285217285, "step": 7262, "token_acc": 0.3047173797921336 }, { "epoch": 4.257402521254764, "grad_norm": 0.22663398999582224, "learning_rate": 0.0002787649754790618, "loss": 2.917224645614624, "step": 7263, "token_acc": 0.3088675227138123 }, { "epoch": 4.257988859571973, "grad_norm": 0.19943245199961812, "learning_rate": 0.0002787575178985159, "loss": 2.9855685234069824, "step": 7264, "token_acc": 0.29906549291957973 }, { "epoch": 4.258575197889182, "grad_norm": 0.22908655246498183, "learning_rate": 0.0002787500591084666, "loss": 3.0319266319274902, "step": 7265, "token_acc": 0.2923925356383243 }, { "epoch": 4.259161536206391, "grad_norm": 0.21915104013630266, "learning_rate": 0.000278742599108984, "loss": 2.9652533531188965, "step": 7266, "token_acc": 0.3042721274397338 }, { "epoch": 4.2597478745236, "grad_norm": 0.2412256988000664, "learning_rate": 0.00027873513790013815, "loss": 2.963923215866089, "step": 7267, "token_acc": 0.30165588410601957 }, { "epoch": 4.260334212840809, "grad_norm": 0.22216080646828737, "learning_rate": 0.00027872767548199915, "loss": 2.931894302368164, "step": 7268, "token_acc": 0.30872874136568446 }, { "epoch": 4.260920551158018, "grad_norm": 0.2477531655146537, "learning_rate": 0.0002787202118546371, "loss": 2.9958064556121826, "step": 7269, "token_acc": 0.29990436943764437 }, { "epoch": 4.2615068894752275, "grad_norm": 0.24460556786318674, "learning_rate": 0.0002787127470181222, "loss": 2.9284067153930664, "step": 7270, "token_acc": 0.3098318566918806 }, { "epoch": 4.262093227792437, "grad_norm": 0.2547147244752354, "learning_rate": 0.00027870528097252435, "loss": 2.9740731716156006, "step": 7271, "token_acc": 0.30190828301659534 }, { "epoch": 4.262679566109645, "grad_norm": 0.25850772824056945, "learning_rate": 0.00027869781371791386, "loss": 2.943971872329712, "step": 7272, "token_acc": 0.30662736551637637 }, { "epoch": 4.263265904426854, "grad_norm": 0.23606850652969488, "learning_rate": 0.00027869034525436086, "loss": 2.993342399597168, "step": 7273, "token_acc": 0.2990371072691021 }, { "epoch": 4.263852242744063, "grad_norm": 0.22186647149390218, "learning_rate": 0.00027868287558193545, "loss": 2.9822707176208496, "step": 7274, "token_acc": 0.30125332358581736 }, { "epoch": 4.264438581061272, "grad_norm": 0.23008549422646365, "learning_rate": 0.0002786754047007078, "loss": 2.9711735248565674, "step": 7275, "token_acc": 0.30232240041876335 }, { "epoch": 4.265024919378481, "grad_norm": 0.22419943437499343, "learning_rate": 0.0002786679326107482, "loss": 2.964339256286621, "step": 7276, "token_acc": 0.3022512960770707 }, { "epoch": 4.26561125769569, "grad_norm": 0.21518140203649397, "learning_rate": 0.0002786604593121267, "loss": 2.9791159629821777, "step": 7277, "token_acc": 0.30121050688327444 }, { "epoch": 4.2661975960128995, "grad_norm": 0.225399138157019, "learning_rate": 0.0002786529848049136, "loss": 2.9751439094543457, "step": 7278, "token_acc": 0.3029537244875206 }, { "epoch": 4.266783934330109, "grad_norm": 0.22864444401647632, "learning_rate": 0.000278645509089179, "loss": 2.9641313552856445, "step": 7279, "token_acc": 0.3045027526513189 }, { "epoch": 4.267370272647318, "grad_norm": 0.23008937451762368, "learning_rate": 0.00027863803216499327, "loss": 2.917933464050293, "step": 7280, "token_acc": 0.30946260614241233 }, { "epoch": 4.267956610964527, "grad_norm": 0.2318931699117132, "learning_rate": 0.0002786305540324265, "loss": 2.963479995727539, "step": 7281, "token_acc": 0.3040939977967562 }, { "epoch": 4.268542949281736, "grad_norm": 0.24804974801904991, "learning_rate": 0.00027862307469154904, "loss": 2.97214412689209, "step": 7282, "token_acc": 0.30295866653417086 }, { "epoch": 4.269129287598945, "grad_norm": 0.237380116889433, "learning_rate": 0.0002786155941424312, "loss": 2.9963736534118652, "step": 7283, "token_acc": 0.2986338783573182 }, { "epoch": 4.269715625916153, "grad_norm": 0.23486382594631494, "learning_rate": 0.00027860811238514303, "loss": 2.971968650817871, "step": 7284, "token_acc": 0.30230279928783255 }, { "epoch": 4.270301964233362, "grad_norm": 0.1976003320369681, "learning_rate": 0.00027860062941975497, "loss": 2.9394516944885254, "step": 7285, "token_acc": 0.30747536572174805 }, { "epoch": 4.2708883025505715, "grad_norm": 0.21471758595695353, "learning_rate": 0.0002785931452463373, "loss": 2.930746555328369, "step": 7286, "token_acc": 0.30949319097047884 }, { "epoch": 4.271474640867781, "grad_norm": 0.22125778815854652, "learning_rate": 0.00027858565986496034, "loss": 3.0177621841430664, "step": 7287, "token_acc": 0.2956276291303736 }, { "epoch": 4.27206097918499, "grad_norm": 0.22883807450084737, "learning_rate": 0.0002785781732756944, "loss": 2.9535980224609375, "step": 7288, "token_acc": 0.3053907066555652 }, { "epoch": 4.272647317502199, "grad_norm": 0.20984395787708307, "learning_rate": 0.0002785706854786097, "loss": 2.9449849128723145, "step": 7289, "token_acc": 0.3065855836787792 }, { "epoch": 4.273233655819408, "grad_norm": 0.19782005553295082, "learning_rate": 0.00027856319647377676, "loss": 2.9426798820495605, "step": 7290, "token_acc": 0.30734986011134674 }, { "epoch": 4.273819994136617, "grad_norm": 0.2227097223946743, "learning_rate": 0.0002785557062612657, "loss": 3.006328582763672, "step": 7291, "token_acc": 0.29785886160856884 }, { "epoch": 4.274406332453826, "grad_norm": 0.21645721491781605, "learning_rate": 0.00027854821484114714, "loss": 2.983450412750244, "step": 7292, "token_acc": 0.3009982144465579 }, { "epoch": 4.274992670771035, "grad_norm": 0.21341011490581083, "learning_rate": 0.00027854072221349124, "loss": 2.946321964263916, "step": 7293, "token_acc": 0.30640563941460425 }, { "epoch": 4.2755790090882435, "grad_norm": 0.2124940734635888, "learning_rate": 0.0002785332283783685, "loss": 2.9877591133117676, "step": 7294, "token_acc": 0.30071421637997536 }, { "epoch": 4.276165347405453, "grad_norm": 0.21017025760151842, "learning_rate": 0.0002785257333358493, "loss": 2.9772019386291504, "step": 7295, "token_acc": 0.3010421784892291 }, { "epoch": 4.276751685722662, "grad_norm": 0.2039433832680745, "learning_rate": 0.000278518237086004, "loss": 2.966921806335449, "step": 7296, "token_acc": 0.30404714548053496 }, { "epoch": 4.277338024039871, "grad_norm": 0.21323318688308443, "learning_rate": 0.000278510739628903, "loss": 2.979121685028076, "step": 7297, "token_acc": 0.3011762854357891 }, { "epoch": 4.27792436235708, "grad_norm": 0.22044668323003674, "learning_rate": 0.00027850324096461684, "loss": 2.981025457382202, "step": 7298, "token_acc": 0.3011268545545785 }, { "epoch": 4.278510700674289, "grad_norm": 0.22172839523627158, "learning_rate": 0.00027849574109321584, "loss": 2.9934585094451904, "step": 7299, "token_acc": 0.29860844590730595 }, { "epoch": 4.279097038991498, "grad_norm": 0.23812662664903442, "learning_rate": 0.00027848824001477056, "loss": 3.001340866088867, "step": 7300, "token_acc": 0.29800167548681067 }, { "epoch": 4.279683377308707, "grad_norm": 0.22277668044175938, "learning_rate": 0.00027848073772935135, "loss": 2.976656913757324, "step": 7301, "token_acc": 0.3005656102889608 }, { "epoch": 4.280269715625916, "grad_norm": 0.2705714537224366, "learning_rate": 0.0002784732342370288, "loss": 2.995959758758545, "step": 7302, "token_acc": 0.29987252276659626 }, { "epoch": 4.2808560539431255, "grad_norm": 0.2821550847432431, "learning_rate": 0.0002784657295378733, "loss": 2.9847006797790527, "step": 7303, "token_acc": 0.30008076464327166 }, { "epoch": 4.281442392260335, "grad_norm": 0.24712605674202043, "learning_rate": 0.0002784582236319554, "loss": 2.948005199432373, "step": 7304, "token_acc": 0.3051224773156948 }, { "epoch": 4.282028730577543, "grad_norm": 0.22568535406650034, "learning_rate": 0.00027845071651934556, "loss": 2.987281322479248, "step": 7305, "token_acc": 0.30122061608207706 }, { "epoch": 4.282615068894752, "grad_norm": 0.2239776251962893, "learning_rate": 0.00027844320820011434, "loss": 2.969425916671753, "step": 7306, "token_acc": 0.3028026140059335 }, { "epoch": 4.283201407211961, "grad_norm": 0.22080399525976876, "learning_rate": 0.00027843569867433226, "loss": 2.896232843399048, "step": 7307, "token_acc": 0.31288075926123193 }, { "epoch": 4.28378774552917, "grad_norm": 0.2411283814479587, "learning_rate": 0.00027842818794206984, "loss": 2.945919990539551, "step": 7308, "token_acc": 0.3070842161364597 }, { "epoch": 4.284374083846379, "grad_norm": 0.23859573016978153, "learning_rate": 0.0002784206760033976, "loss": 2.9758903980255127, "step": 7309, "token_acc": 0.30247424835760406 }, { "epoch": 4.284960422163588, "grad_norm": 0.2311983869189385, "learning_rate": 0.00027841316285838626, "loss": 2.987905979156494, "step": 7310, "token_acc": 0.29981592937180945 }, { "epoch": 4.2855467604807975, "grad_norm": 0.23898733621874915, "learning_rate": 0.00027840564850710627, "loss": 2.987511157989502, "step": 7311, "token_acc": 0.3014416090824508 }, { "epoch": 4.286133098798007, "grad_norm": 0.23849464929604225, "learning_rate": 0.00027839813294962826, "loss": 2.957730770111084, "step": 7312, "token_acc": 0.30425664510890504 }, { "epoch": 4.286719437115216, "grad_norm": 0.2387539661875173, "learning_rate": 0.0002783906161860228, "loss": 2.977391481399536, "step": 7313, "token_acc": 0.30073084904809166 }, { "epoch": 4.287305775432425, "grad_norm": 0.24129985214027794, "learning_rate": 0.00027838309821636044, "loss": 2.915719985961914, "step": 7314, "token_acc": 0.30988447809512765 }, { "epoch": 4.287892113749633, "grad_norm": 0.22217302071811537, "learning_rate": 0.0002783755790407119, "loss": 2.9591007232666016, "step": 7315, "token_acc": 0.30442186212578964 }, { "epoch": 4.288478452066842, "grad_norm": 0.22380145471505034, "learning_rate": 0.0002783680586591478, "loss": 2.9735379219055176, "step": 7316, "token_acc": 0.30108086560653646 }, { "epoch": 4.289064790384051, "grad_norm": 0.2046230388161708, "learning_rate": 0.00027836053707173876, "loss": 2.9954564571380615, "step": 7317, "token_acc": 0.30006228589224543 }, { "epoch": 4.28965112870126, "grad_norm": 0.21409488910118052, "learning_rate": 0.0002783530142785555, "loss": 2.9871108531951904, "step": 7318, "token_acc": 0.2998447123689215 }, { "epoch": 4.2902374670184695, "grad_norm": 0.20436121060337242, "learning_rate": 0.0002783454902796686, "loss": 2.946488380432129, "step": 7319, "token_acc": 0.3067118254532259 }, { "epoch": 4.290823805335679, "grad_norm": 0.23169151078990502, "learning_rate": 0.00027833796507514873, "loss": 2.9630203247070312, "step": 7320, "token_acc": 0.30242631383032725 }, { "epoch": 4.291410143652888, "grad_norm": 0.21334231936600517, "learning_rate": 0.0002783304386650666, "loss": 2.9706084728240967, "step": 7321, "token_acc": 0.3030002367417945 }, { "epoch": 4.291996481970097, "grad_norm": 0.2316141018543359, "learning_rate": 0.00027832291104949296, "loss": 2.9752144813537598, "step": 7322, "token_acc": 0.3008110250625038 }, { "epoch": 4.292582820287306, "grad_norm": 0.2615834246296336, "learning_rate": 0.0002783153822284985, "loss": 2.9859681129455566, "step": 7323, "token_acc": 0.2986965553450362 }, { "epoch": 4.293169158604515, "grad_norm": 0.22188157660718458, "learning_rate": 0.00027830785220215394, "loss": 2.9728899002075195, "step": 7324, "token_acc": 0.3009495345044804 }, { "epoch": 4.293755496921724, "grad_norm": 0.2416084010837058, "learning_rate": 0.00027830032097053, "loss": 2.9865312576293945, "step": 7325, "token_acc": 0.30182591342298154 }, { "epoch": 4.294341835238933, "grad_norm": 0.21036539713942493, "learning_rate": 0.0002782927885336974, "loss": 2.9353537559509277, "step": 7326, "token_acc": 0.3073467122813448 }, { "epoch": 4.2949281735561415, "grad_norm": 0.25259210961743245, "learning_rate": 0.00027828525489172694, "loss": 2.955595016479492, "step": 7327, "token_acc": 0.3042935244937878 }, { "epoch": 4.295514511873351, "grad_norm": 0.24078945864452275, "learning_rate": 0.00027827772004468935, "loss": 2.945195198059082, "step": 7328, "token_acc": 0.30498479728889133 }, { "epoch": 4.29610085019056, "grad_norm": 0.23694222141086502, "learning_rate": 0.0002782701839926555, "loss": 2.9610705375671387, "step": 7329, "token_acc": 0.3028518011038404 }, { "epoch": 4.296687188507769, "grad_norm": 0.2232397444615466, "learning_rate": 0.00027826264673569607, "loss": 2.9668021202087402, "step": 7330, "token_acc": 0.3037526501578789 }, { "epoch": 4.297273526824978, "grad_norm": 0.250861493532945, "learning_rate": 0.0002782551082738819, "loss": 2.9781816005706787, "step": 7331, "token_acc": 0.302098428924941 }, { "epoch": 4.297859865142187, "grad_norm": 0.2131331077271251, "learning_rate": 0.0002782475686072839, "loss": 3.0011372566223145, "step": 7332, "token_acc": 0.29978922929605795 }, { "epoch": 4.298446203459396, "grad_norm": 0.2251627245377774, "learning_rate": 0.0002782400277359727, "loss": 2.992305278778076, "step": 7333, "token_acc": 0.29960192859626633 }, { "epoch": 4.299032541776605, "grad_norm": 0.2121407170936576, "learning_rate": 0.0002782324856600193, "loss": 2.9771840572357178, "step": 7334, "token_acc": 0.3032398922691995 }, { "epoch": 4.299618880093814, "grad_norm": 0.23481351611464565, "learning_rate": 0.00027822494237949447, "loss": 2.947333812713623, "step": 7335, "token_acc": 0.3057150922575513 }, { "epoch": 4.3002052184110235, "grad_norm": 0.2100285194998613, "learning_rate": 0.00027821739789446915, "loss": 2.983891010284424, "step": 7336, "token_acc": 0.3008895345873477 }, { "epoch": 4.300791556728232, "grad_norm": 0.24065648264378237, "learning_rate": 0.00027820985220501404, "loss": 2.9675650596618652, "step": 7337, "token_acc": 0.30212896676995404 }, { "epoch": 4.301377895045441, "grad_norm": 0.21521295110547, "learning_rate": 0.0002782023053112002, "loss": 2.9489402770996094, "step": 7338, "token_acc": 0.3059314989663036 }, { "epoch": 4.30196423336265, "grad_norm": 0.22734593287269336, "learning_rate": 0.0002781947572130985, "loss": 2.9934773445129395, "step": 7339, "token_acc": 0.29964422708596794 }, { "epoch": 4.302550571679859, "grad_norm": 0.2348058353396603, "learning_rate": 0.00027818720791077974, "loss": 2.966704845428467, "step": 7340, "token_acc": 0.3029773835619652 }, { "epoch": 4.303136909997068, "grad_norm": 0.23283173819188377, "learning_rate": 0.00027817965740431494, "loss": 2.9984846115112305, "step": 7341, "token_acc": 0.300457831009196 }, { "epoch": 4.303723248314277, "grad_norm": 0.21888877581309213, "learning_rate": 0.00027817210569377495, "loss": 2.937060832977295, "step": 7342, "token_acc": 0.3052343242343718 }, { "epoch": 4.304309586631486, "grad_norm": 0.2348822215302708, "learning_rate": 0.0002781645527792307, "loss": 3.0079164505004883, "step": 7343, "token_acc": 0.2971665954844854 }, { "epoch": 4.3048959249486956, "grad_norm": 0.22196561702713719, "learning_rate": 0.00027815699866075327, "loss": 2.96844482421875, "step": 7344, "token_acc": 0.3014047989565411 }, { "epoch": 4.305482263265905, "grad_norm": 0.2188686153926745, "learning_rate": 0.0002781494433384135, "loss": 2.9802191257476807, "step": 7345, "token_acc": 0.3011067794897433 }, { "epoch": 4.306068601583114, "grad_norm": 0.22423033456171357, "learning_rate": 0.0002781418868122824, "loss": 2.942945718765259, "step": 7346, "token_acc": 0.3080456650084304 }, { "epoch": 4.306654939900323, "grad_norm": 0.22505982800605304, "learning_rate": 0.00027813432908243093, "loss": 2.987175464630127, "step": 7347, "token_acc": 0.2987289315968344 }, { "epoch": 4.307241278217531, "grad_norm": 0.23204469749502288, "learning_rate": 0.00027812677014893005, "loss": 2.9192981719970703, "step": 7348, "token_acc": 0.30930834370852067 }, { "epoch": 4.30782761653474, "grad_norm": 0.2403395445204015, "learning_rate": 0.0002781192100118509, "loss": 2.964359760284424, "step": 7349, "token_acc": 0.30255949195863746 }, { "epoch": 4.308413954851949, "grad_norm": 0.22107797698098264, "learning_rate": 0.0002781116486712644, "loss": 2.9808547496795654, "step": 7350, "token_acc": 0.30066191512033996 }, { "epoch": 4.3090002931691584, "grad_norm": 0.20901261991707945, "learning_rate": 0.00027810408612724156, "loss": 3.00819730758667, "step": 7351, "token_acc": 0.2967535104634807 }, { "epoch": 4.309586631486368, "grad_norm": 0.22095505895555012, "learning_rate": 0.0002780965223798535, "loss": 2.961474657058716, "step": 7352, "token_acc": 0.3033051107831632 }, { "epoch": 4.310172969803577, "grad_norm": 0.24821213232415576, "learning_rate": 0.00027808895742917117, "loss": 2.9418067932128906, "step": 7353, "token_acc": 0.30790142163198003 }, { "epoch": 4.310759308120786, "grad_norm": 0.2200656540801713, "learning_rate": 0.0002780813912752657, "loss": 2.9450721740722656, "step": 7354, "token_acc": 0.30597286341179813 }, { "epoch": 4.311345646437995, "grad_norm": 0.2693803743353312, "learning_rate": 0.0002780738239182082, "loss": 2.9657819271087646, "step": 7355, "token_acc": 0.30279379532613665 }, { "epoch": 4.311931984755204, "grad_norm": 0.25786773304675537, "learning_rate": 0.00027806625535806964, "loss": 2.9411869049072266, "step": 7356, "token_acc": 0.3075166060434362 }, { "epoch": 4.312518323072413, "grad_norm": 0.27531643325792154, "learning_rate": 0.0002780586855949212, "loss": 2.9410433769226074, "step": 7357, "token_acc": 0.3067419137556724 }, { "epoch": 4.313104661389621, "grad_norm": 0.2343223844376504, "learning_rate": 0.000278051114628834, "loss": 2.980503559112549, "step": 7358, "token_acc": 0.30229400685627567 }, { "epoch": 4.3136909997068305, "grad_norm": 0.2348211656729805, "learning_rate": 0.0002780435424598791, "loss": 2.992443561553955, "step": 7359, "token_acc": 0.2980131593921023 }, { "epoch": 4.31427733802404, "grad_norm": 0.26110453564782626, "learning_rate": 0.00027803596908812766, "loss": 2.976381778717041, "step": 7360, "token_acc": 0.2999567120887324 }, { "epoch": 4.314863676341249, "grad_norm": 0.2117826384776589, "learning_rate": 0.00027802839451365085, "loss": 2.993807554244995, "step": 7361, "token_acc": 0.29906718416100125 }, { "epoch": 4.315450014658458, "grad_norm": 0.23518032746324566, "learning_rate": 0.0002780208187365198, "loss": 2.9519076347351074, "step": 7362, "token_acc": 0.30601590033603804 }, { "epoch": 4.316036352975667, "grad_norm": 0.25441996874619666, "learning_rate": 0.00027801324175680557, "loss": 2.9919750690460205, "step": 7363, "token_acc": 0.29906050253471456 }, { "epoch": 4.316622691292876, "grad_norm": 0.24687620481151804, "learning_rate": 0.00027800566357457957, "loss": 2.943307876586914, "step": 7364, "token_acc": 0.3066570917981384 }, { "epoch": 4.317209029610085, "grad_norm": 0.2273454330674791, "learning_rate": 0.00027799808418991275, "loss": 2.946281671524048, "step": 7365, "token_acc": 0.3052848502009643 }, { "epoch": 4.317795367927294, "grad_norm": 0.23556441476814813, "learning_rate": 0.00027799050360287645, "loss": 2.9657628536224365, "step": 7366, "token_acc": 0.305032662660047 }, { "epoch": 4.318381706244503, "grad_norm": 0.22643915356520844, "learning_rate": 0.00027798292181354186, "loss": 2.999650239944458, "step": 7367, "token_acc": 0.2992400622026035 }, { "epoch": 4.3189680445617125, "grad_norm": 0.2338470667330291, "learning_rate": 0.00027797533882198015, "loss": 3.0167508125305176, "step": 7368, "token_acc": 0.29723444644694236 }, { "epoch": 4.319554382878922, "grad_norm": 0.21375324234429222, "learning_rate": 0.0002779677546282626, "loss": 2.9877071380615234, "step": 7369, "token_acc": 0.30012600839189907 }, { "epoch": 4.32014072119613, "grad_norm": 0.22903849293370163, "learning_rate": 0.0002779601692324604, "loss": 2.977485418319702, "step": 7370, "token_acc": 0.30106006889061915 }, { "epoch": 4.320727059513339, "grad_norm": 0.20910147767347473, "learning_rate": 0.00027795258263464483, "loss": 2.9834868907928467, "step": 7371, "token_acc": 0.30050895702484454 }, { "epoch": 4.321313397830548, "grad_norm": 0.23453350049545163, "learning_rate": 0.0002779449948348872, "loss": 2.9703733921051025, "step": 7372, "token_acc": 0.30355822390462284 }, { "epoch": 4.321899736147757, "grad_norm": 0.2193544860091229, "learning_rate": 0.00027793740583325873, "loss": 2.9763267040252686, "step": 7373, "token_acc": 0.303390925409172 }, { "epoch": 4.322486074464966, "grad_norm": 0.23228489021779758, "learning_rate": 0.00027792981562983077, "loss": 2.9279422760009766, "step": 7374, "token_acc": 0.30881881871538397 }, { "epoch": 4.323072412782175, "grad_norm": 0.20267274970124177, "learning_rate": 0.0002779222242246745, "loss": 2.954620361328125, "step": 7375, "token_acc": 0.30572977680098695 }, { "epoch": 4.3236587510993845, "grad_norm": 0.2321035668928613, "learning_rate": 0.0002779146316178614, "loss": 2.9720616340637207, "step": 7376, "token_acc": 0.30252231316845696 }, { "epoch": 4.324245089416594, "grad_norm": 0.22835241338947843, "learning_rate": 0.0002779070378094627, "loss": 2.9318339824676514, "step": 7377, "token_acc": 0.3092940731938383 }, { "epoch": 4.324831427733803, "grad_norm": 0.23255490398917625, "learning_rate": 0.0002778994427995497, "loss": 2.93733811378479, "step": 7378, "token_acc": 0.307819070500428 }, { "epoch": 4.325417766051012, "grad_norm": 0.24139845512536073, "learning_rate": 0.00027789184658819375, "loss": 2.964897632598877, "step": 7379, "token_acc": 0.30301182246915176 }, { "epoch": 4.32600410436822, "grad_norm": 0.21745012146193285, "learning_rate": 0.0002778842491754663, "loss": 2.9501442909240723, "step": 7380, "token_acc": 0.3059146553665377 }, { "epoch": 4.326590442685429, "grad_norm": 0.26590685634628747, "learning_rate": 0.00027787665056143863, "loss": 2.98331880569458, "step": 7381, "token_acc": 0.3008528977747945 }, { "epoch": 4.327176781002638, "grad_norm": 0.22630447677430207, "learning_rate": 0.00027786905074618214, "loss": 2.940601348876953, "step": 7382, "token_acc": 0.30701408181848666 }, { "epoch": 4.327763119319847, "grad_norm": 0.24180030021853766, "learning_rate": 0.0002778614497297682, "loss": 2.971580743789673, "step": 7383, "token_acc": 0.3031017198863976 }, { "epoch": 4.3283494576370565, "grad_norm": 0.256198705061953, "learning_rate": 0.00027785384751226826, "loss": 2.9886693954467773, "step": 7384, "token_acc": 0.30011776176667976 }, { "epoch": 4.328935795954266, "grad_norm": 0.23084185164371396, "learning_rate": 0.0002778462440937537, "loss": 2.943281650543213, "step": 7385, "token_acc": 0.3075881461805051 }, { "epoch": 4.329522134271475, "grad_norm": 0.259705052041424, "learning_rate": 0.000277838639474296, "loss": 2.9910545349121094, "step": 7386, "token_acc": 0.29857971492758956 }, { "epoch": 4.330108472588684, "grad_norm": 0.22980670248144583, "learning_rate": 0.00027783103365396647, "loss": 2.96281099319458, "step": 7387, "token_acc": 0.30309761370357646 }, { "epoch": 4.330694810905893, "grad_norm": 0.2538171105014315, "learning_rate": 0.0002778234266328367, "loss": 2.9270219802856445, "step": 7388, "token_acc": 0.30912486174856674 }, { "epoch": 4.331281149223102, "grad_norm": 0.22321323030159743, "learning_rate": 0.00027781581841097803, "loss": 2.990363359451294, "step": 7389, "token_acc": 0.30050385889853615 }, { "epoch": 4.331867487540311, "grad_norm": 0.2301215860981448, "learning_rate": 0.000277808208988462, "loss": 3.007373332977295, "step": 7390, "token_acc": 0.2982451211507469 }, { "epoch": 4.33245382585752, "grad_norm": 0.2540197235546708, "learning_rate": 0.00027780059836536006, "loss": 3.001340627670288, "step": 7391, "token_acc": 0.29750205859408857 }, { "epoch": 4.3330401641747285, "grad_norm": 0.25451663879962383, "learning_rate": 0.0002777929865417437, "loss": 2.9913687705993652, "step": 7392, "token_acc": 0.2992755859470171 }, { "epoch": 4.333626502491938, "grad_norm": 0.22696660805685323, "learning_rate": 0.0002777853735176845, "loss": 2.940615177154541, "step": 7393, "token_acc": 0.30760172046212164 }, { "epoch": 4.334212840809147, "grad_norm": 0.22745082429384703, "learning_rate": 0.00027777775929325386, "loss": 2.9625144004821777, "step": 7394, "token_acc": 0.30414949515763445 }, { "epoch": 4.334799179126356, "grad_norm": 0.24545190095920638, "learning_rate": 0.00027777014386852337, "loss": 2.9754865169525146, "step": 7395, "token_acc": 0.3020578035857749 }, { "epoch": 4.335385517443565, "grad_norm": 0.21689825822132533, "learning_rate": 0.00027776252724356454, "loss": 2.9564085006713867, "step": 7396, "token_acc": 0.3051753447249806 }, { "epoch": 4.335971855760774, "grad_norm": 0.2303311610623671, "learning_rate": 0.0002777549094184489, "loss": 2.9842045307159424, "step": 7397, "token_acc": 0.30146313197160657 }, { "epoch": 4.336558194077983, "grad_norm": 0.23134851953879887, "learning_rate": 0.0002777472903932481, "loss": 3.0256097316741943, "step": 7398, "token_acc": 0.29535930344914507 }, { "epoch": 4.337144532395192, "grad_norm": 0.21713772891746352, "learning_rate": 0.00027773967016803363, "loss": 3.0134687423706055, "step": 7399, "token_acc": 0.296860127986746 }, { "epoch": 4.337730870712401, "grad_norm": 0.23462640799459325, "learning_rate": 0.000277732048742877, "loss": 3.0174005031585693, "step": 7400, "token_acc": 0.2958578439338295 }, { "epoch": 4.3383172090296105, "grad_norm": 0.21605905590825009, "learning_rate": 0.00027772442611785, "loss": 2.9687488079071045, "step": 7401, "token_acc": 0.30237625037564936 }, { "epoch": 4.338903547346819, "grad_norm": 0.22110227960811124, "learning_rate": 0.0002777168022930241, "loss": 2.958282709121704, "step": 7402, "token_acc": 0.30418938930641826 }, { "epoch": 4.339489885664028, "grad_norm": 0.21255136898849908, "learning_rate": 0.00027770917726847096, "loss": 2.9841015338897705, "step": 7403, "token_acc": 0.3003835183824779 }, { "epoch": 4.340076223981237, "grad_norm": 0.2482624736248976, "learning_rate": 0.0002777015510442622, "loss": 2.9673478603363037, "step": 7404, "token_acc": 0.302219011249871 }, { "epoch": 4.340662562298446, "grad_norm": 0.2170379292591644, "learning_rate": 0.00027769392362046936, "loss": 2.954977035522461, "step": 7405, "token_acc": 0.3050576835777907 }, { "epoch": 4.341248900615655, "grad_norm": 0.2545965200666956, "learning_rate": 0.00027768629499716425, "loss": 3.034607410430908, "step": 7406, "token_acc": 0.29509697289497205 }, { "epoch": 4.341835238932864, "grad_norm": 0.23248874755300977, "learning_rate": 0.00027767866517441843, "loss": 2.96799898147583, "step": 7407, "token_acc": 0.30185934239637946 }, { "epoch": 4.342421577250073, "grad_norm": 0.24833632176455733, "learning_rate": 0.0002776710341523036, "loss": 2.943375587463379, "step": 7408, "token_acc": 0.3076517940450697 }, { "epoch": 4.3430079155672825, "grad_norm": 0.22263404272535434, "learning_rate": 0.0002776634019308915, "loss": 2.9831347465515137, "step": 7409, "token_acc": 0.29982119028556176 }, { "epoch": 4.343594253884492, "grad_norm": 0.2412797646195928, "learning_rate": 0.0002776557685102537, "loss": 2.980590343475342, "step": 7410, "token_acc": 0.30060337695208295 }, { "epoch": 4.344180592201701, "grad_norm": 0.22855144588357856, "learning_rate": 0.000277648133890462, "loss": 2.926447868347168, "step": 7411, "token_acc": 0.309918158514036 }, { "epoch": 4.34476693051891, "grad_norm": 0.23063405206132012, "learning_rate": 0.0002776404980715881, "loss": 2.9773662090301514, "step": 7412, "token_acc": 0.30156770481492695 }, { "epoch": 4.345353268836118, "grad_norm": 0.2194283262671779, "learning_rate": 0.00027763286105370375, "loss": 2.9674577713012695, "step": 7413, "token_acc": 0.3016774078096767 }, { "epoch": 4.345939607153327, "grad_norm": 0.22787634998482156, "learning_rate": 0.00027762522283688055, "loss": 2.9720420837402344, "step": 7414, "token_acc": 0.30222894625037566 }, { "epoch": 4.346525945470536, "grad_norm": 0.23791883417456025, "learning_rate": 0.00027761758342119045, "loss": 2.940985679626465, "step": 7415, "token_acc": 0.30664660824390766 }, { "epoch": 4.347112283787745, "grad_norm": 0.21748616358476272, "learning_rate": 0.00027760994280670505, "loss": 2.9615931510925293, "step": 7416, "token_acc": 0.3037087302102096 }, { "epoch": 4.3476986221049545, "grad_norm": 0.201574760546558, "learning_rate": 0.00027760230099349624, "loss": 2.9271907806396484, "step": 7417, "token_acc": 0.30948560568295114 }, { "epoch": 4.348284960422164, "grad_norm": 0.2287733560312696, "learning_rate": 0.0002775946579816358, "loss": 2.934257984161377, "step": 7418, "token_acc": 0.30816977468440215 }, { "epoch": 4.348871298739373, "grad_norm": 0.2256953707389003, "learning_rate": 0.00027758701377119543, "loss": 2.965078115463257, "step": 7419, "token_acc": 0.3042641385648486 }, { "epoch": 4.349457637056582, "grad_norm": 0.22809543565280077, "learning_rate": 0.000277579368362247, "loss": 2.9597039222717285, "step": 7420, "token_acc": 0.30339300280616355 }, { "epoch": 4.350043975373791, "grad_norm": 0.2382039064282838, "learning_rate": 0.0002775717217548623, "loss": 2.9811854362487793, "step": 7421, "token_acc": 0.30339154109121474 }, { "epoch": 4.350630313691, "grad_norm": 0.2111265142200861, "learning_rate": 0.0002775640739491132, "loss": 2.952059745788574, "step": 7422, "token_acc": 0.3043125769560128 }, { "epoch": 4.351216652008208, "grad_norm": 0.20072847261091104, "learning_rate": 0.0002775564249450715, "loss": 2.959197521209717, "step": 7423, "token_acc": 0.3046208074021039 }, { "epoch": 4.351802990325417, "grad_norm": 0.21593366982878148, "learning_rate": 0.0002775487747428091, "loss": 3.0046238899230957, "step": 7424, "token_acc": 0.2968203871823361 }, { "epoch": 4.3523893286426265, "grad_norm": 0.22169574168172437, "learning_rate": 0.00027754112334239784, "loss": 2.9432876110076904, "step": 7425, "token_acc": 0.30547081187589065 }, { "epoch": 4.352975666959836, "grad_norm": 0.234638050073075, "learning_rate": 0.00027753347074390957, "loss": 2.998220443725586, "step": 7426, "token_acc": 0.29888128875535386 }, { "epoch": 4.353562005277045, "grad_norm": 0.22459040539790787, "learning_rate": 0.0002775258169474162, "loss": 2.96234393119812, "step": 7427, "token_acc": 0.30433893761111297 }, { "epoch": 4.354148343594254, "grad_norm": 0.214610552416326, "learning_rate": 0.0002775181619529897, "loss": 2.9591174125671387, "step": 7428, "token_acc": 0.3064812540985155 }, { "epoch": 4.354734681911463, "grad_norm": 0.24878504339242488, "learning_rate": 0.0002775105057607018, "loss": 2.9945285320281982, "step": 7429, "token_acc": 0.298331882642663 }, { "epoch": 4.355321020228672, "grad_norm": 0.224617709020674, "learning_rate": 0.0002775028483706245, "loss": 2.933229923248291, "step": 7430, "token_acc": 0.30825603714236904 }, { "epoch": 4.355907358545881, "grad_norm": 0.22527142763040917, "learning_rate": 0.0002774951897828298, "loss": 2.9705281257629395, "step": 7431, "token_acc": 0.3038445549418809 }, { "epoch": 4.35649369686309, "grad_norm": 0.2318575799599823, "learning_rate": 0.0002774875299973896, "loss": 2.9466748237609863, "step": 7432, "token_acc": 0.306039786318581 }, { "epoch": 4.357080035180299, "grad_norm": 0.22725220371951266, "learning_rate": 0.00027747986901437583, "loss": 2.9579403400421143, "step": 7433, "token_acc": 0.3048014810671142 }, { "epoch": 4.3576663734975085, "grad_norm": 0.2214610566303656, "learning_rate": 0.0002774722068338605, "loss": 2.927346706390381, "step": 7434, "token_acc": 0.3083878973838054 }, { "epoch": 4.358252711814717, "grad_norm": 0.2190755845403344, "learning_rate": 0.0002774645434559156, "loss": 2.982912063598633, "step": 7435, "token_acc": 0.30004665039883527 }, { "epoch": 4.358839050131926, "grad_norm": 0.23995108663628126, "learning_rate": 0.0002774568788806129, "loss": 2.996185779571533, "step": 7436, "token_acc": 0.29866878092373417 }, { "epoch": 4.359425388449135, "grad_norm": 0.23559409635806414, "learning_rate": 0.00027744921310802475, "loss": 2.957252025604248, "step": 7437, "token_acc": 0.30422326692813173 }, { "epoch": 4.360011726766344, "grad_norm": 0.204677043305365, "learning_rate": 0.00027744154613822293, "loss": 2.9237399101257324, "step": 7438, "token_acc": 0.30876550498994726 }, { "epoch": 4.360598065083553, "grad_norm": 0.2344583095496871, "learning_rate": 0.0002774338779712795, "loss": 3.0163698196411133, "step": 7439, "token_acc": 0.2965866504076151 }, { "epoch": 4.361184403400762, "grad_norm": 0.24222907083088832, "learning_rate": 0.0002774262086072665, "loss": 2.9362940788269043, "step": 7440, "token_acc": 0.30632696390658176 }, { "epoch": 4.361770741717971, "grad_norm": 0.23261026021506248, "learning_rate": 0.00027741853804625597, "loss": 2.9780633449554443, "step": 7441, "token_acc": 0.3022428185707544 }, { "epoch": 4.3623570800351805, "grad_norm": 0.22953967202096529, "learning_rate": 0.00027741086628832, "loss": 2.964160442352295, "step": 7442, "token_acc": 0.3060982892103967 }, { "epoch": 4.36294341835239, "grad_norm": 0.2281486530179143, "learning_rate": 0.0002774031933335306, "loss": 2.9509661197662354, "step": 7443, "token_acc": 0.3050236734011309 }, { "epoch": 4.363529756669599, "grad_norm": 0.2318249755996522, "learning_rate": 0.0002773955191819599, "loss": 3.018233299255371, "step": 7444, "token_acc": 0.2970305431364126 }, { "epoch": 4.364116094986807, "grad_norm": 0.21770199107073115, "learning_rate": 0.00027738784383368, "loss": 2.954418897628784, "step": 7445, "token_acc": 0.30453840145245736 }, { "epoch": 4.364702433304016, "grad_norm": 0.2195995871176992, "learning_rate": 0.0002773801672887629, "loss": 2.9593656063079834, "step": 7446, "token_acc": 0.303345385512749 }, { "epoch": 4.365288771621225, "grad_norm": 0.22544094044324695, "learning_rate": 0.0002773724895472808, "loss": 2.960944175720215, "step": 7447, "token_acc": 0.3034861996316368 }, { "epoch": 4.365875109938434, "grad_norm": 0.24358085726752657, "learning_rate": 0.0002773648106093058, "loss": 2.972194194793701, "step": 7448, "token_acc": 0.3016396603430441 }, { "epoch": 4.366461448255643, "grad_norm": 0.2288713130325665, "learning_rate": 0.00027735713047491006, "loss": 2.95180606842041, "step": 7449, "token_acc": 0.30419559876081614 }, { "epoch": 4.3670477865728525, "grad_norm": 0.2423433430253848, "learning_rate": 0.0002773494491441657, "loss": 3.0050132274627686, "step": 7450, "token_acc": 0.29752068220070327 }, { "epoch": 4.367634124890062, "grad_norm": 0.23619046077048764, "learning_rate": 0.0002773417666171448, "loss": 2.94372296333313, "step": 7451, "token_acc": 0.30577219476171974 }, { "epoch": 4.368220463207271, "grad_norm": 0.2162855826483245, "learning_rate": 0.0002773340828939196, "loss": 2.990633964538574, "step": 7452, "token_acc": 0.2999063442099663 }, { "epoch": 4.36880680152448, "grad_norm": 0.22679060865010944, "learning_rate": 0.00027732639797456237, "loss": 2.9583752155303955, "step": 7453, "token_acc": 0.3064472633494351 }, { "epoch": 4.369393139841689, "grad_norm": 0.22268278117379317, "learning_rate": 0.00027731871185914507, "loss": 2.8930678367614746, "step": 7454, "token_acc": 0.3128569183213706 }, { "epoch": 4.369979478158898, "grad_norm": 0.20627290022283126, "learning_rate": 0.0002773110245477401, "loss": 2.9686548709869385, "step": 7455, "token_acc": 0.3019881277678319 }, { "epoch": 4.370565816476106, "grad_norm": 0.23534461078160235, "learning_rate": 0.0002773033360404197, "loss": 2.995039224624634, "step": 7456, "token_acc": 0.2989590179017376 }, { "epoch": 4.371152154793315, "grad_norm": 0.2142829635963942, "learning_rate": 0.0002772956463372559, "loss": 2.9501309394836426, "step": 7457, "token_acc": 0.3040427353609053 }, { "epoch": 4.3717384931105245, "grad_norm": 0.2281564945966076, "learning_rate": 0.00027728795543832105, "loss": 2.9776997566223145, "step": 7458, "token_acc": 0.3026790452350195 }, { "epoch": 4.372324831427734, "grad_norm": 0.204093352012456, "learning_rate": 0.0002772802633436874, "loss": 2.994493246078491, "step": 7459, "token_acc": 0.2988305146205238 }, { "epoch": 4.372911169744943, "grad_norm": 0.23269081759733368, "learning_rate": 0.00027727257005342716, "loss": 2.996680736541748, "step": 7460, "token_acc": 0.2991457458456698 }, { "epoch": 4.373497508062152, "grad_norm": 0.23568474628698902, "learning_rate": 0.00027726487556761266, "loss": 2.9784648418426514, "step": 7461, "token_acc": 0.3031310895048818 }, { "epoch": 4.374083846379361, "grad_norm": 0.2150090003628934, "learning_rate": 0.0002772571798863161, "loss": 2.940512180328369, "step": 7462, "token_acc": 0.3062648920551638 }, { "epoch": 4.37467018469657, "grad_norm": 0.26972538459129053, "learning_rate": 0.0002772494830096099, "loss": 2.985027551651001, "step": 7463, "token_acc": 0.300904702939021 }, { "epoch": 4.375256523013779, "grad_norm": 0.2663370815412667, "learning_rate": 0.0002772417849375662, "loss": 2.9784159660339355, "step": 7464, "token_acc": 0.30264356361376327 }, { "epoch": 4.375842861330988, "grad_norm": 0.2435779929801738, "learning_rate": 0.0002772340856702574, "loss": 3.0186824798583984, "step": 7465, "token_acc": 0.29527416789536015 }, { "epoch": 4.3764291996481965, "grad_norm": 0.26859645515761915, "learning_rate": 0.0002772263852077558, "loss": 2.932619571685791, "step": 7466, "token_acc": 0.3071234581724347 }, { "epoch": 4.377015537965406, "grad_norm": 0.28069880953968096, "learning_rate": 0.00027721868355013384, "loss": 2.9932851791381836, "step": 7467, "token_acc": 0.2988944842948627 }, { "epoch": 4.377601876282615, "grad_norm": 0.23215340541729915, "learning_rate": 0.0002772109806974637, "loss": 2.9450325965881348, "step": 7468, "token_acc": 0.30598153291034036 }, { "epoch": 4.378188214599824, "grad_norm": 0.25374207991843667, "learning_rate": 0.0002772032766498178, "loss": 2.953277826309204, "step": 7469, "token_acc": 0.3047969143655994 }, { "epoch": 4.378774552917033, "grad_norm": 0.24068130144488842, "learning_rate": 0.00027719557140726855, "loss": 2.99070405960083, "step": 7470, "token_acc": 0.3008341777838137 }, { "epoch": 4.379360891234242, "grad_norm": 0.232580553209548, "learning_rate": 0.00027718786496988833, "loss": 2.997485876083374, "step": 7471, "token_acc": 0.29904054520900725 }, { "epoch": 4.379947229551451, "grad_norm": 0.2544280132668076, "learning_rate": 0.0002771801573377495, "loss": 3.005141496658325, "step": 7472, "token_acc": 0.29849009195101084 }, { "epoch": 4.38053356786866, "grad_norm": 0.2537585247026658, "learning_rate": 0.0002771724485109244, "loss": 2.9789953231811523, "step": 7473, "token_acc": 0.3014262373295165 }, { "epoch": 4.381119906185869, "grad_norm": 0.22469715953060296, "learning_rate": 0.0002771647384894856, "loss": 2.956080198287964, "step": 7474, "token_acc": 0.30495235642544427 }, { "epoch": 4.3817062445030786, "grad_norm": 0.25561779959492037, "learning_rate": 0.00027715702727350544, "loss": 2.9696555137634277, "step": 7475, "token_acc": 0.30262395431653916 }, { "epoch": 4.382292582820288, "grad_norm": 0.22094482548707126, "learning_rate": 0.00027714931486305634, "loss": 2.9638609886169434, "step": 7476, "token_acc": 0.3045287308109564 }, { "epoch": 4.382878921137497, "grad_norm": 0.2533397314274439, "learning_rate": 0.0002771416012582107, "loss": 2.99871826171875, "step": 7477, "token_acc": 0.2983464296108157 }, { "epoch": 4.383465259454705, "grad_norm": 0.24903799324657266, "learning_rate": 0.00027713388645904115, "loss": 2.9663758277893066, "step": 7478, "token_acc": 0.30292466765140325 }, { "epoch": 4.384051597771914, "grad_norm": 0.23093484256519903, "learning_rate": 0.00027712617046561996, "loss": 2.9815616607666016, "step": 7479, "token_acc": 0.3024605479756684 }, { "epoch": 4.384637936089123, "grad_norm": 0.2445199063815977, "learning_rate": 0.00027711845327801975, "loss": 2.9700050354003906, "step": 7480, "token_acc": 0.3027628589951036 }, { "epoch": 4.385224274406332, "grad_norm": 0.24360959569548238, "learning_rate": 0.0002771107348963129, "loss": 2.9615426063537598, "step": 7481, "token_acc": 0.3042609888247472 }, { "epoch": 4.3858106127235414, "grad_norm": 0.20756497406590646, "learning_rate": 0.0002771030153205721, "loss": 2.9506173133850098, "step": 7482, "token_acc": 0.304636542340389 }, { "epoch": 4.386396951040751, "grad_norm": 0.26152720128034224, "learning_rate": 0.00027709529455086963, "loss": 3.003720760345459, "step": 7483, "token_acc": 0.29753391982993926 }, { "epoch": 4.38698328935796, "grad_norm": 0.2301045457204367, "learning_rate": 0.0002770875725872782, "loss": 2.9745092391967773, "step": 7484, "token_acc": 0.3021710119792505 }, { "epoch": 4.387569627675169, "grad_norm": 0.22519656736733146, "learning_rate": 0.00027707984942987025, "loss": 2.9724268913269043, "step": 7485, "token_acc": 0.30140923225110716 }, { "epoch": 4.388155965992378, "grad_norm": 0.21959180863653102, "learning_rate": 0.0002770721250787184, "loss": 2.972655773162842, "step": 7486, "token_acc": 0.3010668328258583 }, { "epoch": 4.388742304309587, "grad_norm": 0.2297185154631925, "learning_rate": 0.00027706439953389505, "loss": 2.936591625213623, "step": 7487, "token_acc": 0.30795136874307716 }, { "epoch": 4.389328642626795, "grad_norm": 0.21061868400224334, "learning_rate": 0.000277056672795473, "loss": 2.9652774333953857, "step": 7488, "token_acc": 0.3029329078727312 }, { "epoch": 4.389914980944004, "grad_norm": 0.23281434234550993, "learning_rate": 0.00027704894486352467, "loss": 2.997745990753174, "step": 7489, "token_acc": 0.29790875569524516 }, { "epoch": 4.3905013192612135, "grad_norm": 0.21758261932722223, "learning_rate": 0.00027704121573812274, "loss": 2.9508378505706787, "step": 7490, "token_acc": 0.30605061692262664 }, { "epoch": 4.391087657578423, "grad_norm": 0.21031241634625122, "learning_rate": 0.0002770334854193397, "loss": 2.9566893577575684, "step": 7491, "token_acc": 0.30436006861205256 }, { "epoch": 4.391673995895632, "grad_norm": 0.20961391120017078, "learning_rate": 0.0002770257539072483, "loss": 2.9746947288513184, "step": 7492, "token_acc": 0.3017601489854248 }, { "epoch": 4.392260334212841, "grad_norm": 0.21282074640042992, "learning_rate": 0.00027701802120192116, "loss": 3.0275769233703613, "step": 7493, "token_acc": 0.2958204629120298 }, { "epoch": 4.39284667253005, "grad_norm": 0.20269921259214754, "learning_rate": 0.00027701028730343083, "loss": 2.9803223609924316, "step": 7494, "token_acc": 0.3027306737346366 }, { "epoch": 4.393433010847259, "grad_norm": 0.22014904650462777, "learning_rate": 0.00027700255221184997, "loss": 2.987946033477783, "step": 7495, "token_acc": 0.2998152685157284 }, { "epoch": 4.394019349164468, "grad_norm": 0.21237112595335825, "learning_rate": 0.0002769948159272513, "loss": 2.949683666229248, "step": 7496, "token_acc": 0.3058696576139103 }, { "epoch": 4.394605687481677, "grad_norm": 0.22197503913255287, "learning_rate": 0.0002769870784497074, "loss": 3.0350022315979004, "step": 7497, "token_acc": 0.29369391797547273 }, { "epoch": 4.395192025798886, "grad_norm": 0.21981970771336845, "learning_rate": 0.00027697933977929113, "loss": 2.9578042030334473, "step": 7498, "token_acc": 0.304411668239319 }, { "epoch": 4.395778364116095, "grad_norm": 0.2187988767314944, "learning_rate": 0.00027697159991607503, "loss": 2.958164691925049, "step": 7499, "token_acc": 0.3048302804243249 }, { "epoch": 4.396364702433304, "grad_norm": 0.24020584678510976, "learning_rate": 0.00027696385886013175, "loss": 2.970421552658081, "step": 7500, "token_acc": 0.30253240723930264 }, { "epoch": 4.396951040750513, "grad_norm": 0.22652073223603636, "learning_rate": 0.0002769561166115342, "loss": 2.9632935523986816, "step": 7501, "token_acc": 0.3035878540114575 }, { "epoch": 4.397537379067722, "grad_norm": 0.23870226651248763, "learning_rate": 0.000276948373170355, "loss": 2.9731171131134033, "step": 7502, "token_acc": 0.30197230253370316 }, { "epoch": 4.398123717384931, "grad_norm": 0.22826463125813395, "learning_rate": 0.0002769406285366669, "loss": 3.00234317779541, "step": 7503, "token_acc": 0.29734018917031674 }, { "epoch": 4.39871005570214, "grad_norm": 0.25205782074025596, "learning_rate": 0.0002769328827105426, "loss": 2.9797816276550293, "step": 7504, "token_acc": 0.30147632564710763 }, { "epoch": 4.399296394019349, "grad_norm": 0.22199846075781993, "learning_rate": 0.00027692513569205495, "loss": 3.005668878555298, "step": 7505, "token_acc": 0.29716366875843364 }, { "epoch": 4.399882732336558, "grad_norm": 0.2178636745220201, "learning_rate": 0.00027691738748127667, "loss": 2.934154510498047, "step": 7506, "token_acc": 0.30812018335557584 }, { "epoch": 4.4004690706537675, "grad_norm": 0.2371901276913139, "learning_rate": 0.0002769096380782806, "loss": 2.987424373626709, "step": 7507, "token_acc": 0.30062316377481785 }, { "epoch": 4.401055408970977, "grad_norm": 0.20285393410668617, "learning_rate": 0.00027690188748313947, "loss": 2.9692342281341553, "step": 7508, "token_acc": 0.3020862783589295 }, { "epoch": 4.401641747288186, "grad_norm": 0.24882034699699654, "learning_rate": 0.00027689413569592604, "loss": 2.9444217681884766, "step": 7509, "token_acc": 0.3073822888057929 }, { "epoch": 4.402228085605394, "grad_norm": 0.21646145312417683, "learning_rate": 0.0002768863827167133, "loss": 3.0028581619262695, "step": 7510, "token_acc": 0.29734004889714233 }, { "epoch": 4.402814423922603, "grad_norm": 0.24082268430187542, "learning_rate": 0.0002768786285455739, "loss": 3.0078725814819336, "step": 7511, "token_acc": 0.2973279083916388 }, { "epoch": 4.403400762239812, "grad_norm": 0.24430083717649023, "learning_rate": 0.0002768708731825808, "loss": 3.0146965980529785, "step": 7512, "token_acc": 0.2969611118535347 }, { "epoch": 4.403987100557021, "grad_norm": 0.23385516543001894, "learning_rate": 0.00027686311662780677, "loss": 2.9984161853790283, "step": 7513, "token_acc": 0.29922065550775157 }, { "epoch": 4.40457343887423, "grad_norm": 0.2579148687184216, "learning_rate": 0.00027685535888132473, "loss": 2.9890799522399902, "step": 7514, "token_acc": 0.3000434157693625 }, { "epoch": 4.4051597771914395, "grad_norm": 0.23298779472306833, "learning_rate": 0.00027684759994320757, "loss": 2.9461140632629395, "step": 7515, "token_acc": 0.3055497946824837 }, { "epoch": 4.405746115508649, "grad_norm": 0.22719467724029815, "learning_rate": 0.0002768398398135281, "loss": 2.909564733505249, "step": 7516, "token_acc": 0.3131046474865634 }, { "epoch": 4.406332453825858, "grad_norm": 0.24622822454034474, "learning_rate": 0.0002768320784923593, "loss": 2.9649710655212402, "step": 7517, "token_acc": 0.303135324305619 }, { "epoch": 4.406918792143067, "grad_norm": 0.25064053504721323, "learning_rate": 0.0002768243159797739, "loss": 2.9926137924194336, "step": 7518, "token_acc": 0.3000078571917383 }, { "epoch": 4.407505130460276, "grad_norm": 0.23487473205149595, "learning_rate": 0.0002768165522758451, "loss": 2.984139919281006, "step": 7519, "token_acc": 0.2998250008400994 }, { "epoch": 4.408091468777485, "grad_norm": 0.22706378778188618, "learning_rate": 0.0002768087873806456, "loss": 2.9717254638671875, "step": 7520, "token_acc": 0.30067460601170476 }, { "epoch": 4.408677807094693, "grad_norm": 0.2519205363206708, "learning_rate": 0.00027680102129424845, "loss": 2.9655544757843018, "step": 7521, "token_acc": 0.30559878244979316 }, { "epoch": 4.409264145411902, "grad_norm": 0.23705315451401202, "learning_rate": 0.00027679325401672655, "loss": 3.027557849884033, "step": 7522, "token_acc": 0.294660130202156 }, { "epoch": 4.4098504837291115, "grad_norm": 0.23066611938084383, "learning_rate": 0.0002767854855481529, "loss": 2.989354133605957, "step": 7523, "token_acc": 0.2996040429570662 }, { "epoch": 4.410436822046321, "grad_norm": 0.21936123463566679, "learning_rate": 0.00027677771588860043, "loss": 2.992600202560425, "step": 7524, "token_acc": 0.2995475739808873 }, { "epoch": 4.41102316036353, "grad_norm": 0.23133323323445437, "learning_rate": 0.0002767699450381422, "loss": 2.973036766052246, "step": 7525, "token_acc": 0.30246245164979757 }, { "epoch": 4.411609498680739, "grad_norm": 0.228026485996696, "learning_rate": 0.0002767621729968511, "loss": 2.9853219985961914, "step": 7526, "token_acc": 0.3000444804939951 }, { "epoch": 4.412195836997948, "grad_norm": 0.22741291083716406, "learning_rate": 0.00027675439976480024, "loss": 2.975165367126465, "step": 7527, "token_acc": 0.3012296766562978 }, { "epoch": 4.412782175315157, "grad_norm": 0.22552353828133287, "learning_rate": 0.0002767466253420626, "loss": 2.9211790561676025, "step": 7528, "token_acc": 0.3113293590472288 }, { "epoch": 4.413368513632366, "grad_norm": 0.22311316953967944, "learning_rate": 0.00027673884972871123, "loss": 2.9724016189575195, "step": 7529, "token_acc": 0.3032933108295768 }, { "epoch": 4.413954851949575, "grad_norm": 0.22808560315325163, "learning_rate": 0.00027673107292481913, "loss": 3.0021939277648926, "step": 7530, "token_acc": 0.2990817141336172 }, { "epoch": 4.4145411902667835, "grad_norm": 0.22394169180736145, "learning_rate": 0.0002767232949304593, "loss": 2.9701952934265137, "step": 7531, "token_acc": 0.3030814013986238 }, { "epoch": 4.415127528583993, "grad_norm": 0.2254937808709969, "learning_rate": 0.000276715515745705, "loss": 2.953622341156006, "step": 7532, "token_acc": 0.30599432593302905 }, { "epoch": 4.415713866901202, "grad_norm": 0.22246490998683355, "learning_rate": 0.0002767077353706291, "loss": 2.941204071044922, "step": 7533, "token_acc": 0.3061899249508994 }, { "epoch": 4.416300205218411, "grad_norm": 0.2141833292792169, "learning_rate": 0.0002766999538053048, "loss": 2.9821739196777344, "step": 7534, "token_acc": 0.2998423056436737 }, { "epoch": 4.41688654353562, "grad_norm": 0.22482858779923007, "learning_rate": 0.00027669217104980517, "loss": 2.9544153213500977, "step": 7535, "token_acc": 0.30288021905603607 }, { "epoch": 4.417472881852829, "grad_norm": 0.2220578998000814, "learning_rate": 0.00027668438710420326, "loss": 2.988034248352051, "step": 7536, "token_acc": 0.3013090381983783 }, { "epoch": 4.418059220170038, "grad_norm": 0.22951222042752878, "learning_rate": 0.0002766766019685723, "loss": 3.009068250656128, "step": 7537, "token_acc": 0.29712393689541 }, { "epoch": 4.418645558487247, "grad_norm": 0.262990064336994, "learning_rate": 0.0002766688156429854, "loss": 2.993960380554199, "step": 7538, "token_acc": 0.30064779019956117 }, { "epoch": 4.419231896804456, "grad_norm": 0.2206217469011878, "learning_rate": 0.00027666102812751555, "loss": 2.9506053924560547, "step": 7539, "token_acc": 0.3073134092346616 }, { "epoch": 4.4198182351216655, "grad_norm": 0.2394109278379797, "learning_rate": 0.0002766532394222361, "loss": 2.9905591011047363, "step": 7540, "token_acc": 0.29900388404575523 }, { "epoch": 4.420404573438875, "grad_norm": 0.23826348684458346, "learning_rate": 0.0002766454495272201, "loss": 3.011991500854492, "step": 7541, "token_acc": 0.2964753075979874 }, { "epoch": 4.420990911756084, "grad_norm": 0.2478856679968787, "learning_rate": 0.00027663765844254077, "loss": 2.9614710807800293, "step": 7542, "token_acc": 0.30504336027814427 }, { "epoch": 4.421577250073292, "grad_norm": 0.2318547149069468, "learning_rate": 0.00027662986616827125, "loss": 3.008951187133789, "step": 7543, "token_acc": 0.2977561518429126 }, { "epoch": 4.422163588390501, "grad_norm": 0.2508149531151586, "learning_rate": 0.0002766220727044848, "loss": 2.9602606296539307, "step": 7544, "token_acc": 0.30259982320476364 }, { "epoch": 4.42274992670771, "grad_norm": 0.2074290607644501, "learning_rate": 0.0002766142780512546, "loss": 2.9678430557250977, "step": 7545, "token_acc": 0.30278704812371027 }, { "epoch": 4.423336265024919, "grad_norm": 0.2278039023036722, "learning_rate": 0.0002766064822086539, "loss": 2.977325916290283, "step": 7546, "token_acc": 0.3022880178308545 }, { "epoch": 4.423922603342128, "grad_norm": 0.2206481237285753, "learning_rate": 0.00027659868517675585, "loss": 3.0214197635650635, "step": 7547, "token_acc": 0.29503669887821066 }, { "epoch": 4.4245089416593375, "grad_norm": 0.2182695857307359, "learning_rate": 0.00027659088695563384, "loss": 2.9423303604125977, "step": 7548, "token_acc": 0.3084122828887227 }, { "epoch": 4.425095279976547, "grad_norm": 0.24116400950160918, "learning_rate": 0.00027658308754536094, "loss": 3.0361709594726562, "step": 7549, "token_acc": 0.29410926263587905 }, { "epoch": 4.425681618293756, "grad_norm": 0.2152699195496761, "learning_rate": 0.00027657528694601056, "loss": 2.9664125442504883, "step": 7550, "token_acc": 0.30368169683441665 }, { "epoch": 4.426267956610965, "grad_norm": 0.20811772389048305, "learning_rate": 0.0002765674851576559, "loss": 2.980156421661377, "step": 7551, "token_acc": 0.3004433433634576 }, { "epoch": 4.426854294928174, "grad_norm": 0.22490433518785619, "learning_rate": 0.00027655968218037025, "loss": 3.0214476585388184, "step": 7552, "token_acc": 0.2945921221150579 }, { "epoch": 4.427440633245382, "grad_norm": 0.21924294748443476, "learning_rate": 0.00027655187801422696, "loss": 2.981346607208252, "step": 7553, "token_acc": 0.298751614291864 }, { "epoch": 4.428026971562591, "grad_norm": 0.23186865860986766, "learning_rate": 0.00027654407265929925, "loss": 2.9444243907928467, "step": 7554, "token_acc": 0.30537961024667115 }, { "epoch": 4.4286133098798, "grad_norm": 0.2233740912221921, "learning_rate": 0.00027653626611566056, "loss": 2.943833351135254, "step": 7555, "token_acc": 0.3066100359143345 }, { "epoch": 4.4291996481970095, "grad_norm": 0.2226037170606491, "learning_rate": 0.00027652845838338415, "loss": 2.9573678970336914, "step": 7556, "token_acc": 0.3038769831381344 }, { "epoch": 4.429785986514219, "grad_norm": 0.21112469477535387, "learning_rate": 0.00027652064946254336, "loss": 2.985640048980713, "step": 7557, "token_acc": 0.29939305462508375 }, { "epoch": 4.430372324831428, "grad_norm": 0.21278645889773481, "learning_rate": 0.0002765128393532115, "loss": 2.9902169704437256, "step": 7558, "token_acc": 0.29927112788392585 }, { "epoch": 4.430958663148637, "grad_norm": 0.2335452047190607, "learning_rate": 0.0002765050280554621, "loss": 2.95001220703125, "step": 7559, "token_acc": 0.30507556671719493 }, { "epoch": 4.431545001465846, "grad_norm": 0.21208011085085432, "learning_rate": 0.00027649721556936835, "loss": 2.9829282760620117, "step": 7560, "token_acc": 0.29984166928001593 }, { "epoch": 4.432131339783055, "grad_norm": 0.22861987589734728, "learning_rate": 0.00027648940189500376, "loss": 2.9915008544921875, "step": 7561, "token_acc": 0.2986692886882913 }, { "epoch": 4.432717678100264, "grad_norm": 0.22474816828076577, "learning_rate": 0.0002764815870324417, "loss": 3.007002592086792, "step": 7562, "token_acc": 0.2974789077455117 }, { "epoch": 4.433304016417473, "grad_norm": 0.20449870048134206, "learning_rate": 0.00027647377098175555, "loss": 2.988819122314453, "step": 7563, "token_acc": 0.3004359848241271 }, { "epoch": 4.4338903547346815, "grad_norm": 0.20037998468759713, "learning_rate": 0.0002764659537430187, "loss": 2.9879345893859863, "step": 7564, "token_acc": 0.2997982794579056 }, { "epoch": 4.434476693051891, "grad_norm": 0.2136282549022213, "learning_rate": 0.00027645813531630464, "loss": 2.984762191772461, "step": 7565, "token_acc": 0.29992371386341604 }, { "epoch": 4.4350630313691, "grad_norm": 0.2163536331990884, "learning_rate": 0.0002764503157016869, "loss": 2.966360569000244, "step": 7566, "token_acc": 0.30226549716454015 }, { "epoch": 4.435649369686309, "grad_norm": 0.23056282637307565, "learning_rate": 0.00027644249489923873, "loss": 3.0036206245422363, "step": 7567, "token_acc": 0.2962743208159605 }, { "epoch": 4.436235708003518, "grad_norm": 0.2505185272934878, "learning_rate": 0.0002764346729090337, "loss": 2.934352397918701, "step": 7568, "token_acc": 0.30735287238631814 }, { "epoch": 4.436822046320727, "grad_norm": 0.23291100590036753, "learning_rate": 0.00027642684973114534, "loss": 2.984907865524292, "step": 7569, "token_acc": 0.30181783676069895 }, { "epoch": 4.437408384637936, "grad_norm": 0.2305082005398384, "learning_rate": 0.0002764190253656471, "loss": 2.985118865966797, "step": 7570, "token_acc": 0.29943780872997894 }, { "epoch": 4.437994722955145, "grad_norm": 0.25589584257536085, "learning_rate": 0.0002764111998126124, "loss": 2.975830554962158, "step": 7571, "token_acc": 0.3005234831988947 }, { "epoch": 4.438581061272354, "grad_norm": 0.21715621037107885, "learning_rate": 0.0002764033730721149, "loss": 3.004791736602783, "step": 7572, "token_acc": 0.29819572032236535 }, { "epoch": 4.4391673995895635, "grad_norm": 0.2332126010050994, "learning_rate": 0.000276395545144228, "loss": 2.9431028366088867, "step": 7573, "token_acc": 0.3060748083104383 }, { "epoch": 4.439753737906772, "grad_norm": 0.233698792595349, "learning_rate": 0.0002763877160290253, "loss": 3.0251412391662598, "step": 7574, "token_acc": 0.29486185888717237 }, { "epoch": 4.440340076223981, "grad_norm": 0.2292718913688117, "learning_rate": 0.00027637988572658034, "loss": 2.9524192810058594, "step": 7575, "token_acc": 0.30503595610675993 }, { "epoch": 4.44092641454119, "grad_norm": 0.22887528645919877, "learning_rate": 0.0002763720542369666, "loss": 2.982769012451172, "step": 7576, "token_acc": 0.2992277940890712 }, { "epoch": 4.441512752858399, "grad_norm": 0.2538809280446271, "learning_rate": 0.0002763642215602577, "loss": 2.9990055561065674, "step": 7577, "token_acc": 0.2971679165291434 }, { "epoch": 4.442099091175608, "grad_norm": 0.21638494791598495, "learning_rate": 0.00027635638769652723, "loss": 2.9935128688812256, "step": 7578, "token_acc": 0.3009446981850797 }, { "epoch": 4.442685429492817, "grad_norm": 0.23801170973998323, "learning_rate": 0.0002763485526458488, "loss": 2.9842634201049805, "step": 7579, "token_acc": 0.2998494957096176 }, { "epoch": 4.443271767810026, "grad_norm": 0.2378546425907843, "learning_rate": 0.0002763407164082959, "loss": 2.9668431282043457, "step": 7580, "token_acc": 0.3026356730338699 }, { "epoch": 4.4438581061272355, "grad_norm": 0.2501416334600253, "learning_rate": 0.00027633287898394223, "loss": 2.9581780433654785, "step": 7581, "token_acc": 0.30563516479111763 }, { "epoch": 4.444444444444445, "grad_norm": 0.25523598480605075, "learning_rate": 0.0002763250403728614, "loss": 2.9914441108703613, "step": 7582, "token_acc": 0.2994789904594357 }, { "epoch": 4.445030782761654, "grad_norm": 0.22751298932767122, "learning_rate": 0.0002763172005751271, "loss": 2.975606918334961, "step": 7583, "token_acc": 0.30274614330572264 }, { "epoch": 4.445617121078863, "grad_norm": 0.22322231490937136, "learning_rate": 0.00027630935959081283, "loss": 2.9687275886535645, "step": 7584, "token_acc": 0.3031522115899709 }, { "epoch": 4.446203459396072, "grad_norm": 0.2276494286734312, "learning_rate": 0.0002763015174199924, "loss": 2.9967830181121826, "step": 7585, "token_acc": 0.3000399446101406 }, { "epoch": 4.44678979771328, "grad_norm": 0.22290993602576076, "learning_rate": 0.0002762936740627394, "loss": 3.0188190937042236, "step": 7586, "token_acc": 0.29637081193722803 }, { "epoch": 4.447376136030489, "grad_norm": 0.22810839375775024, "learning_rate": 0.00027628582951912746, "loss": 3.0038933753967285, "step": 7587, "token_acc": 0.298422424753988 }, { "epoch": 4.447962474347698, "grad_norm": 0.215049185161377, "learning_rate": 0.0002762779837892304, "loss": 2.998764753341675, "step": 7588, "token_acc": 0.2984180236637074 }, { "epoch": 4.4485488126649075, "grad_norm": 0.21708986877967146, "learning_rate": 0.0002762701368731218, "loss": 3.014242172241211, "step": 7589, "token_acc": 0.29517731331969155 }, { "epoch": 4.449135150982117, "grad_norm": 0.21439655778186809, "learning_rate": 0.0002762622887708754, "loss": 2.9730865955352783, "step": 7590, "token_acc": 0.30331937315280105 }, { "epoch": 4.449721489299326, "grad_norm": 0.2241885067307578, "learning_rate": 0.00027625443948256495, "loss": 3.035583257675171, "step": 7591, "token_acc": 0.2959085578794088 }, { "epoch": 4.450307827616535, "grad_norm": 0.2387387559963078, "learning_rate": 0.0002762465890082642, "loss": 2.988215446472168, "step": 7592, "token_acc": 0.3015569834157638 }, { "epoch": 4.450894165933744, "grad_norm": 0.21776724465372296, "learning_rate": 0.00027623873734804687, "loss": 2.9739420413970947, "step": 7593, "token_acc": 0.30295207709801575 }, { "epoch": 4.451480504250953, "grad_norm": 0.2294945072124841, "learning_rate": 0.00027623088450198666, "loss": 2.960691213607788, "step": 7594, "token_acc": 0.30552533110471347 }, { "epoch": 4.452066842568162, "grad_norm": 0.20253151984582082, "learning_rate": 0.0002762230304701574, "loss": 2.9611666202545166, "step": 7595, "token_acc": 0.304358322131108 }, { "epoch": 4.45265318088537, "grad_norm": 0.23490758667523984, "learning_rate": 0.0002762151752526329, "loss": 2.9545183181762695, "step": 7596, "token_acc": 0.30677157113559 }, { "epoch": 4.4532395192025795, "grad_norm": 0.22097915643256888, "learning_rate": 0.00027620731884948685, "loss": 2.9702696800231934, "step": 7597, "token_acc": 0.3028869800099432 }, { "epoch": 4.453825857519789, "grad_norm": 0.23308072974715568, "learning_rate": 0.00027619946126079315, "loss": 2.972086191177368, "step": 7598, "token_acc": 0.30219612947925906 }, { "epoch": 4.454412195836998, "grad_norm": 0.21480250335527765, "learning_rate": 0.00027619160248662554, "loss": 2.979414463043213, "step": 7599, "token_acc": 0.30156305354482255 }, { "epoch": 4.454998534154207, "grad_norm": 0.22156480564248046, "learning_rate": 0.0002761837425270579, "loss": 2.991581916809082, "step": 7600, "token_acc": 0.29964662631128575 }, { "epoch": 4.455584872471416, "grad_norm": 0.22047502943256203, "learning_rate": 0.000276175881382164, "loss": 2.9772255420684814, "step": 7601, "token_acc": 0.30140528117593546 }, { "epoch": 4.456171210788625, "grad_norm": 0.23801751997385368, "learning_rate": 0.00027616801905201775, "loss": 2.9637534618377686, "step": 7602, "token_acc": 0.30447306452391554 }, { "epoch": 4.456757549105834, "grad_norm": 0.22740141214600645, "learning_rate": 0.00027616015553669297, "loss": 2.997546434402466, "step": 7603, "token_acc": 0.29920801364463157 }, { "epoch": 4.457343887423043, "grad_norm": 0.22241688038826007, "learning_rate": 0.0002761522908362635, "loss": 2.939868450164795, "step": 7604, "token_acc": 0.307261854455585 }, { "epoch": 4.457930225740252, "grad_norm": 0.21651883624015725, "learning_rate": 0.00027614442495080326, "loss": 2.940763235092163, "step": 7605, "token_acc": 0.3072985639794736 }, { "epoch": 4.4585165640574616, "grad_norm": 0.2217748665330733, "learning_rate": 0.0002761365578803861, "loss": 3.006091833114624, "step": 7606, "token_acc": 0.2982227583852922 }, { "epoch": 4.45910290237467, "grad_norm": 0.20924903109591278, "learning_rate": 0.00027612868962508604, "loss": 2.9682326316833496, "step": 7607, "token_acc": 0.30271689844786104 }, { "epoch": 4.459689240691879, "grad_norm": 0.20612388970463666, "learning_rate": 0.0002761208201849769, "loss": 3.0121848583221436, "step": 7608, "token_acc": 0.2959412571613007 }, { "epoch": 4.460275579009088, "grad_norm": 0.23106248329013215, "learning_rate": 0.0002761129495601325, "loss": 2.9680256843566895, "step": 7609, "token_acc": 0.30297332174554265 }, { "epoch": 4.460861917326297, "grad_norm": 0.23072071015130413, "learning_rate": 0.00027610507775062697, "loss": 2.9940145015716553, "step": 7610, "token_acc": 0.2994108369831639 }, { "epoch": 4.461448255643506, "grad_norm": 0.21866554705368085, "learning_rate": 0.0002760972047565341, "loss": 2.9687657356262207, "step": 7611, "token_acc": 0.3029277242993139 }, { "epoch": 4.462034593960715, "grad_norm": 0.2467556929032799, "learning_rate": 0.0002760893305779279, "loss": 2.961193084716797, "step": 7612, "token_acc": 0.30329074758464486 }, { "epoch": 4.4626209322779244, "grad_norm": 0.23338321289013325, "learning_rate": 0.0002760814552148824, "loss": 2.971200942993164, "step": 7613, "token_acc": 0.30271906988766595 }, { "epoch": 4.463207270595134, "grad_norm": 0.20954108118233988, "learning_rate": 0.0002760735786674715, "loss": 2.9147324562072754, "step": 7614, "token_acc": 0.3104312423212753 }, { "epoch": 4.463793608912343, "grad_norm": 0.2546827217856047, "learning_rate": 0.00027606570093576926, "loss": 2.9942164421081543, "step": 7615, "token_acc": 0.30038036578207705 }, { "epoch": 4.464379947229552, "grad_norm": 0.21659174618311733, "learning_rate": 0.00027605782201984956, "loss": 2.9776782989501953, "step": 7616, "token_acc": 0.301635572268506 }, { "epoch": 4.464966285546761, "grad_norm": 0.2528289550684048, "learning_rate": 0.0002760499419197865, "loss": 2.987570285797119, "step": 7617, "token_acc": 0.30017876677606126 }, { "epoch": 4.465552623863969, "grad_norm": 0.24352480342355978, "learning_rate": 0.00027604206063565416, "loss": 2.9570417404174805, "step": 7618, "token_acc": 0.3043757994075698 }, { "epoch": 4.466138962181178, "grad_norm": 0.240583112577343, "learning_rate": 0.00027603417816752645, "loss": 2.965134620666504, "step": 7619, "token_acc": 0.3039687178129726 }, { "epoch": 4.466725300498387, "grad_norm": 0.23073529259513478, "learning_rate": 0.00027602629451547745, "loss": 2.9953105449676514, "step": 7620, "token_acc": 0.29807001807138594 }, { "epoch": 4.4673116388155965, "grad_norm": 0.22379422980522054, "learning_rate": 0.0002760184096795813, "loss": 2.991844654083252, "step": 7621, "token_acc": 0.29971518256096835 }, { "epoch": 4.467897977132806, "grad_norm": 0.22404903190261807, "learning_rate": 0.00027601052365991196, "loss": 2.962212085723877, "step": 7622, "token_acc": 0.3051771411591502 }, { "epoch": 4.468484315450015, "grad_norm": 0.2173017192481348, "learning_rate": 0.0002760026364565436, "loss": 2.9794421195983887, "step": 7623, "token_acc": 0.30051131615999266 }, { "epoch": 4.469070653767224, "grad_norm": 0.2071021383158694, "learning_rate": 0.0002759947480695502, "loss": 2.946147918701172, "step": 7624, "token_acc": 0.3074731541966795 }, { "epoch": 4.469656992084433, "grad_norm": 0.22466662970887313, "learning_rate": 0.0002759868584990059, "loss": 2.9522435665130615, "step": 7625, "token_acc": 0.3064219800953277 }, { "epoch": 4.470243330401642, "grad_norm": 0.23232520385435237, "learning_rate": 0.0002759789677449849, "loss": 3.005612850189209, "step": 7626, "token_acc": 0.2979912718301832 }, { "epoch": 4.470829668718851, "grad_norm": 0.2173149435748064, "learning_rate": 0.0002759710758075612, "loss": 2.956317663192749, "step": 7627, "token_acc": 0.30477849506438504 }, { "epoch": 4.47141600703606, "grad_norm": 0.24921664699797585, "learning_rate": 0.00027596318268680904, "loss": 2.9827961921691895, "step": 7628, "token_acc": 0.3016147869801452 }, { "epoch": 4.4720023453532685, "grad_norm": 0.24722305291233757, "learning_rate": 0.0002759552883828025, "loss": 3.016123056411743, "step": 7629, "token_acc": 0.294645429581828 }, { "epoch": 4.472588683670478, "grad_norm": 0.21689633675608133, "learning_rate": 0.0002759473928956158, "loss": 2.9752273559570312, "step": 7630, "token_acc": 0.30037186025476165 }, { "epoch": 4.473175021987687, "grad_norm": 0.2434991820689063, "learning_rate": 0.000275939496225323, "loss": 2.9939610958099365, "step": 7631, "token_acc": 0.2992342728203414 }, { "epoch": 4.473761360304896, "grad_norm": 0.21344866813763846, "learning_rate": 0.0002759315983719983, "loss": 2.933187961578369, "step": 7632, "token_acc": 0.3084968984774344 }, { "epoch": 4.474347698622105, "grad_norm": 0.24320203118311193, "learning_rate": 0.000275923699335716, "loss": 2.9964942932128906, "step": 7633, "token_acc": 0.2981918010305851 }, { "epoch": 4.474934036939314, "grad_norm": 0.2495752518385743, "learning_rate": 0.00027591579911655017, "loss": 2.9665846824645996, "step": 7634, "token_acc": 0.3037158191184166 }, { "epoch": 4.475520375256523, "grad_norm": 0.2392050221832817, "learning_rate": 0.00027590789771457513, "loss": 2.974921703338623, "step": 7635, "token_acc": 0.30296626629581863 }, { "epoch": 4.476106713573732, "grad_norm": 0.22611602268956996, "learning_rate": 0.0002758999951298651, "loss": 2.9386470317840576, "step": 7636, "token_acc": 0.3061558644700659 }, { "epoch": 4.476693051890941, "grad_norm": 0.23762525569628992, "learning_rate": 0.0002758920913624942, "loss": 2.964472770690918, "step": 7637, "token_acc": 0.3049408625101289 }, { "epoch": 4.4772793902081505, "grad_norm": 0.22898732954784032, "learning_rate": 0.0002758841864125367, "loss": 2.943307399749756, "step": 7638, "token_acc": 0.3069276919539269 }, { "epoch": 4.477865728525359, "grad_norm": 0.21032174140116416, "learning_rate": 0.000275876280280067, "loss": 3.0144128799438477, "step": 7639, "token_acc": 0.2956134909711444 }, { "epoch": 4.478452066842568, "grad_norm": 0.23463843962194295, "learning_rate": 0.0002758683729651592, "loss": 3.0245981216430664, "step": 7640, "token_acc": 0.2951299411516772 }, { "epoch": 4.479038405159777, "grad_norm": 0.2007427514217131, "learning_rate": 0.00027586046446788766, "loss": 2.999563217163086, "step": 7641, "token_acc": 0.2982976484354753 }, { "epoch": 4.479624743476986, "grad_norm": 0.22053355903278282, "learning_rate": 0.00027585255478832665, "loss": 2.9731335639953613, "step": 7642, "token_acc": 0.30036141434005637 }, { "epoch": 4.480211081794195, "grad_norm": 0.22287474598000823, "learning_rate": 0.0002758446439265505, "loss": 3.020007610321045, "step": 7643, "token_acc": 0.2959017362735672 }, { "epoch": 4.480797420111404, "grad_norm": 0.20509875454697335, "learning_rate": 0.0002758367318826335, "loss": 2.965298891067505, "step": 7644, "token_acc": 0.3032176782105965 }, { "epoch": 4.481383758428613, "grad_norm": 0.23438821667069404, "learning_rate": 0.00027582881865664995, "loss": 3.009063720703125, "step": 7645, "token_acc": 0.2970216794775101 }, { "epoch": 4.4819700967458225, "grad_norm": 0.21664325536563464, "learning_rate": 0.00027582090424867423, "loss": 3.0108723640441895, "step": 7646, "token_acc": 0.2974431073391147 }, { "epoch": 4.482556435063032, "grad_norm": 0.21720614368903043, "learning_rate": 0.0002758129886587806, "loss": 3.022472858428955, "step": 7647, "token_acc": 0.29574099097276724 }, { "epoch": 4.483142773380241, "grad_norm": 0.23169805294547768, "learning_rate": 0.0002758050718870435, "loss": 2.9337656497955322, "step": 7648, "token_acc": 0.3070524087497594 }, { "epoch": 4.48372911169745, "grad_norm": 0.2052382307719247, "learning_rate": 0.00027579715393353735, "loss": 2.983374834060669, "step": 7649, "token_acc": 0.30114648653012893 }, { "epoch": 4.484315450014659, "grad_norm": 0.2451042547369214, "learning_rate": 0.0002757892347983364, "loss": 2.9674196243286133, "step": 7650, "token_acc": 0.30323449063463 }, { "epoch": 4.484901788331867, "grad_norm": 0.2552569946102098, "learning_rate": 0.00027578131448151506, "loss": 2.9782662391662598, "step": 7651, "token_acc": 0.30116845816839277 }, { "epoch": 4.485488126649076, "grad_norm": 0.21230519858773003, "learning_rate": 0.0002757733929831478, "loss": 2.987525463104248, "step": 7652, "token_acc": 0.29979934399947233 }, { "epoch": 4.486074464966285, "grad_norm": 0.22426306371953253, "learning_rate": 0.00027576547030330897, "loss": 3.003708600997925, "step": 7653, "token_acc": 0.2987595238281423 }, { "epoch": 4.4866608032834945, "grad_norm": 0.21456576904585917, "learning_rate": 0.0002757575464420731, "loss": 2.9577674865722656, "step": 7654, "token_acc": 0.3050982047460131 }, { "epoch": 4.487247141600704, "grad_norm": 0.2524770797821413, "learning_rate": 0.0002757496213995144, "loss": 3.0155861377716064, "step": 7655, "token_acc": 0.29672841489835905 }, { "epoch": 4.487833479917913, "grad_norm": 0.21861104063613618, "learning_rate": 0.00027574169517570756, "loss": 2.9387197494506836, "step": 7656, "token_acc": 0.308054603127056 }, { "epoch": 4.488419818235122, "grad_norm": 0.21333302968651946, "learning_rate": 0.00027573376777072694, "loss": 2.954826831817627, "step": 7657, "token_acc": 0.3049119414008665 }, { "epoch": 4.489006156552331, "grad_norm": 0.21572278987173338, "learning_rate": 0.00027572583918464695, "loss": 2.997603416442871, "step": 7658, "token_acc": 0.29893922756432145 }, { "epoch": 4.48959249486954, "grad_norm": 0.20767869334144137, "learning_rate": 0.0002757179094175421, "loss": 2.9891881942749023, "step": 7659, "token_acc": 0.2999237400959065 }, { "epoch": 4.490178833186749, "grad_norm": 0.23555214487189335, "learning_rate": 0.0002757099784694869, "loss": 3.017289161682129, "step": 7660, "token_acc": 0.29596309832463896 }, { "epoch": 4.490765171503957, "grad_norm": 0.23314860967716342, "learning_rate": 0.0002757020463405559, "loss": 3.021169900894165, "step": 7661, "token_acc": 0.294855836336743 }, { "epoch": 4.4913515098211665, "grad_norm": 0.2442421838244778, "learning_rate": 0.00027569411303082357, "loss": 3.0114598274230957, "step": 7662, "token_acc": 0.2957653841971681 }, { "epoch": 4.491937848138376, "grad_norm": 0.21306465471849811, "learning_rate": 0.0002756861785403644, "loss": 2.990102767944336, "step": 7663, "token_acc": 0.3031278088318982 }, { "epoch": 4.492524186455585, "grad_norm": 0.23942117873238872, "learning_rate": 0.00027567824286925293, "loss": 2.9808952808380127, "step": 7664, "token_acc": 0.3017831156679328 }, { "epoch": 4.493110524772794, "grad_norm": 0.2303095524770526, "learning_rate": 0.00027567030601756377, "loss": 2.938162088394165, "step": 7665, "token_acc": 0.30749313520957194 }, { "epoch": 4.493696863090003, "grad_norm": 0.21820544362503638, "learning_rate": 0.00027566236798537137, "loss": 2.992374897003174, "step": 7666, "token_acc": 0.29957593916848674 }, { "epoch": 4.494283201407212, "grad_norm": 0.21833541524736316, "learning_rate": 0.00027565442877275034, "loss": 2.9773597717285156, "step": 7667, "token_acc": 0.3020970801445104 }, { "epoch": 4.494869539724421, "grad_norm": 0.2340539262108697, "learning_rate": 0.0002756464883797753, "loss": 2.944711685180664, "step": 7668, "token_acc": 0.306485216195657 }, { "epoch": 4.49545587804163, "grad_norm": 0.20238945688202734, "learning_rate": 0.00027563854680652083, "loss": 2.931864023208618, "step": 7669, "token_acc": 0.308472712911868 }, { "epoch": 4.496042216358839, "grad_norm": 0.22481747787216352, "learning_rate": 0.0002756306040530615, "loss": 2.987959861755371, "step": 7670, "token_acc": 0.29988672114214526 }, { "epoch": 4.4966285546760485, "grad_norm": 0.21965780034339297, "learning_rate": 0.00027562266011947194, "loss": 2.9668350219726562, "step": 7671, "token_acc": 0.3038297122442386 }, { "epoch": 4.497214892993257, "grad_norm": 0.20342045825504645, "learning_rate": 0.00027561471500582677, "loss": 2.9384617805480957, "step": 7672, "token_acc": 0.30848261327713383 }, { "epoch": 4.497801231310466, "grad_norm": 0.21870435534201574, "learning_rate": 0.0002756067687122006, "loss": 3.0244853496551514, "step": 7673, "token_acc": 0.29324283395590484 }, { "epoch": 4.498387569627675, "grad_norm": 0.19906313659082162, "learning_rate": 0.0002755988212386681, "loss": 2.982006788253784, "step": 7674, "token_acc": 0.30186280920978337 }, { "epoch": 4.498973907944884, "grad_norm": 0.2339817020090608, "learning_rate": 0.00027559087258530396, "loss": 2.9881882667541504, "step": 7675, "token_acc": 0.2997226122034944 }, { "epoch": 4.499560246262093, "grad_norm": 0.2123836960063692, "learning_rate": 0.0002755829227521828, "loss": 2.975555896759033, "step": 7676, "token_acc": 0.3004407814089356 }, { "epoch": 4.500146584579302, "grad_norm": 0.2379519986023324, "learning_rate": 0.00027557497173937923, "loss": 2.9358553886413574, "step": 7677, "token_acc": 0.3073589110464751 }, { "epoch": 4.500732922896511, "grad_norm": 0.22020692528363245, "learning_rate": 0.0002755670195469681, "loss": 2.9977307319641113, "step": 7678, "token_acc": 0.2997275633041668 }, { "epoch": 4.5013192612137205, "grad_norm": 0.2464986715638401, "learning_rate": 0.000275559066175024, "loss": 2.9740078449249268, "step": 7679, "token_acc": 0.30189967458066946 }, { "epoch": 4.50190559953093, "grad_norm": 0.25501870207533445, "learning_rate": 0.00027555111162362166, "loss": 2.9926223754882812, "step": 7680, "token_acc": 0.29984496456141574 }, { "epoch": 4.502491937848139, "grad_norm": 0.21750999537434965, "learning_rate": 0.0002755431558928358, "loss": 2.9678614139556885, "step": 7681, "token_acc": 0.30328653227704133 }, { "epoch": 4.503078276165347, "grad_norm": 0.24988439546725022, "learning_rate": 0.0002755351989827412, "loss": 2.999833106994629, "step": 7682, "token_acc": 0.2983530942779406 }, { "epoch": 4.503664614482556, "grad_norm": 0.23753908934771287, "learning_rate": 0.00027552724089341255, "loss": 2.922658920288086, "step": 7683, "token_acc": 0.3106094747929972 }, { "epoch": 4.504250952799765, "grad_norm": 0.25837274385830045, "learning_rate": 0.00027551928162492456, "loss": 2.9310784339904785, "step": 7684, "token_acc": 0.30704154291866703 }, { "epoch": 4.504837291116974, "grad_norm": 0.2548669538027812, "learning_rate": 0.0002755113211773521, "loss": 2.9529385566711426, "step": 7685, "token_acc": 0.3055461170587728 }, { "epoch": 4.505423629434183, "grad_norm": 0.20457345674230723, "learning_rate": 0.00027550335955076993, "loss": 3.0096793174743652, "step": 7686, "token_acc": 0.2965724193118165 }, { "epoch": 4.5060099677513925, "grad_norm": 0.24858476435106425, "learning_rate": 0.0002754953967452528, "loss": 2.9415903091430664, "step": 7687, "token_acc": 0.30795046112203306 }, { "epoch": 4.506596306068602, "grad_norm": 0.20864641060189953, "learning_rate": 0.0002754874327608755, "loss": 2.950563430786133, "step": 7688, "token_acc": 0.30422527713825637 }, { "epoch": 4.507182644385811, "grad_norm": 0.2432139772116789, "learning_rate": 0.0002754794675977129, "loss": 2.960073709487915, "step": 7689, "token_acc": 0.30343683774071684 }, { "epoch": 4.50776898270302, "grad_norm": 0.21575817635543373, "learning_rate": 0.0002754715012558398, "loss": 2.9802823066711426, "step": 7690, "token_acc": 0.3013164863433457 }, { "epoch": 4.508355321020229, "grad_norm": 0.23980278291918009, "learning_rate": 0.000275463533735331, "loss": 2.940427303314209, "step": 7691, "token_acc": 0.30673909898905477 }, { "epoch": 4.508941659337438, "grad_norm": 0.2095006398751181, "learning_rate": 0.00027545556503626135, "loss": 2.9724385738372803, "step": 7692, "token_acc": 0.3037464255548658 }, { "epoch": 4.509527997654647, "grad_norm": 0.21680007942009089, "learning_rate": 0.00027544759515870575, "loss": 2.9706430435180664, "step": 7693, "token_acc": 0.3041073752256223 }, { "epoch": 4.510114335971855, "grad_norm": 0.2221932434942384, "learning_rate": 0.00027543962410273904, "loss": 3.0166704654693604, "step": 7694, "token_acc": 0.29656931427800437 }, { "epoch": 4.5107006742890645, "grad_norm": 0.2094927413246809, "learning_rate": 0.00027543165186843605, "loss": 2.997117519378662, "step": 7695, "token_acc": 0.29886809874409154 }, { "epoch": 4.511287012606274, "grad_norm": 0.2282586027664007, "learning_rate": 0.00027542367845587173, "loss": 2.9649057388305664, "step": 7696, "token_acc": 0.3039147865733826 }, { "epoch": 4.511873350923483, "grad_norm": 0.2300564114423717, "learning_rate": 0.00027541570386512096, "loss": 2.933500051498413, "step": 7697, "token_acc": 0.3071886994108573 }, { "epoch": 4.512459689240692, "grad_norm": 0.22107905856903817, "learning_rate": 0.00027540772809625866, "loss": 2.989394426345825, "step": 7698, "token_acc": 0.2995021024936432 }, { "epoch": 4.513046027557901, "grad_norm": 0.22665328582718158, "learning_rate": 0.00027539975114935974, "loss": 2.9659981727600098, "step": 7699, "token_acc": 0.30360185410559515 }, { "epoch": 4.51363236587511, "grad_norm": 0.23836709246069981, "learning_rate": 0.0002753917730244991, "loss": 2.978651762008667, "step": 7700, "token_acc": 0.30111827901384597 }, { "epoch": 4.514218704192319, "grad_norm": 0.210970200033658, "learning_rate": 0.0002753837937217518, "loss": 3.0234947204589844, "step": 7701, "token_acc": 0.2941148534913541 }, { "epoch": 4.514805042509528, "grad_norm": 0.23511376538809522, "learning_rate": 0.00027537581324119266, "loss": 3.001394748687744, "step": 7702, "token_acc": 0.29827057364745807 }, { "epoch": 4.515391380826737, "grad_norm": 0.21999748361684662, "learning_rate": 0.0002753678315828967, "loss": 2.961374282836914, "step": 7703, "token_acc": 0.30465367552657335 }, { "epoch": 4.515977719143946, "grad_norm": 0.21964445423063148, "learning_rate": 0.00027535984874693897, "loss": 2.9507014751434326, "step": 7704, "token_acc": 0.30641595859437687 }, { "epoch": 4.516564057461155, "grad_norm": 0.21770301530560826, "learning_rate": 0.0002753518647333943, "loss": 2.9924402236938477, "step": 7705, "token_acc": 0.29946383244424535 }, { "epoch": 4.517150395778364, "grad_norm": 0.23696318971832303, "learning_rate": 0.00027534387954233783, "loss": 2.9958348274230957, "step": 7706, "token_acc": 0.30000416170296884 }, { "epoch": 4.517736734095573, "grad_norm": 0.21288610660964188, "learning_rate": 0.00027533589317384443, "loss": 2.988255739212036, "step": 7707, "token_acc": 0.2978780316152773 }, { "epoch": 4.518323072412782, "grad_norm": 0.22393528496370987, "learning_rate": 0.0002753279056279893, "loss": 2.963256597518921, "step": 7708, "token_acc": 0.3038441866860628 }, { "epoch": 4.518909410729991, "grad_norm": 0.23258115272441177, "learning_rate": 0.0002753199169048473, "loss": 2.9800620079040527, "step": 7709, "token_acc": 0.30160334637920044 }, { "epoch": 4.5194957490472, "grad_norm": 0.21981515569993137, "learning_rate": 0.0002753119270044936, "loss": 2.9860894680023193, "step": 7710, "token_acc": 0.29900013591381525 }, { "epoch": 4.520082087364409, "grad_norm": 0.22771136776689815, "learning_rate": 0.00027530393592700323, "loss": 2.9410014152526855, "step": 7711, "token_acc": 0.30775343887222706 }, { "epoch": 4.5206684256816185, "grad_norm": 0.22074486147046318, "learning_rate": 0.00027529594367245116, "loss": 2.989107131958008, "step": 7712, "token_acc": 0.29935150414302625 }, { "epoch": 4.521254763998828, "grad_norm": 0.23017907428339843, "learning_rate": 0.0002752879502409126, "loss": 2.968398094177246, "step": 7713, "token_acc": 0.3029256875365711 }, { "epoch": 4.521841102316037, "grad_norm": 0.22432293327956712, "learning_rate": 0.0002752799556324625, "loss": 2.9508323669433594, "step": 7714, "token_acc": 0.3072731705765745 }, { "epoch": 4.522427440633246, "grad_norm": 0.20317947861364058, "learning_rate": 0.00027527195984717613, "loss": 2.995086908340454, "step": 7715, "token_acc": 0.2996233433844177 }, { "epoch": 4.523013778950454, "grad_norm": 0.22928105947962923, "learning_rate": 0.00027526396288512844, "loss": 2.993931770324707, "step": 7716, "token_acc": 0.2999773578149765 }, { "epoch": 4.523600117267663, "grad_norm": 0.21895660656556226, "learning_rate": 0.00027525596474639466, "loss": 2.9658145904541016, "step": 7717, "token_acc": 0.30359224048445205 }, { "epoch": 4.524186455584872, "grad_norm": 0.2282841703315982, "learning_rate": 0.00027524796543104983, "loss": 2.9695048332214355, "step": 7718, "token_acc": 0.30254743656998095 }, { "epoch": 4.524772793902081, "grad_norm": 0.23207873565315892, "learning_rate": 0.00027523996493916913, "loss": 2.966928482055664, "step": 7719, "token_acc": 0.30415452178306324 }, { "epoch": 4.5253591322192905, "grad_norm": 0.24644392226382714, "learning_rate": 0.00027523196327082776, "loss": 2.9880852699279785, "step": 7720, "token_acc": 0.3005302570329893 }, { "epoch": 4.5259454705365, "grad_norm": 0.2444212399368073, "learning_rate": 0.00027522396042610085, "loss": 2.9890787601470947, "step": 7721, "token_acc": 0.30163186945044396 }, { "epoch": 4.526531808853709, "grad_norm": 0.21986239219486692, "learning_rate": 0.00027521595640506353, "loss": 2.9860782623291016, "step": 7722, "token_acc": 0.3014009075559308 }, { "epoch": 4.527118147170918, "grad_norm": 0.23600814441317555, "learning_rate": 0.0002752079512077911, "loss": 3.024751663208008, "step": 7723, "token_acc": 0.2959117734596419 }, { "epoch": 4.527704485488127, "grad_norm": 0.24199749840692936, "learning_rate": 0.0002751999448343586, "loss": 2.9594221115112305, "step": 7724, "token_acc": 0.3044213102734409 }, { "epoch": 4.528290823805335, "grad_norm": 0.2267127731836551, "learning_rate": 0.0002751919372848414, "loss": 2.9607176780700684, "step": 7725, "token_acc": 0.3044495428697518 }, { "epoch": 4.528877162122544, "grad_norm": 0.22790637518306908, "learning_rate": 0.0002751839285593146, "loss": 2.963693380355835, "step": 7726, "token_acc": 0.3012129344594417 }, { "epoch": 4.529463500439753, "grad_norm": 0.2380676948524783, "learning_rate": 0.0002751759186578535, "loss": 2.9461140632629395, "step": 7727, "token_acc": 0.3066855497674997 }, { "epoch": 4.5300498387569625, "grad_norm": 0.22676762926695837, "learning_rate": 0.00027516790758053334, "loss": 3.0013465881347656, "step": 7728, "token_acc": 0.298534249900579 }, { "epoch": 4.530636177074172, "grad_norm": 0.21562627933791229, "learning_rate": 0.0002751598953274293, "loss": 3.014930248260498, "step": 7729, "token_acc": 0.2971793523724674 }, { "epoch": 4.531222515391381, "grad_norm": 0.20665000134286518, "learning_rate": 0.00027515188189861675, "loss": 2.9630062580108643, "step": 7730, "token_acc": 0.30416323217976843 }, { "epoch": 4.53180885370859, "grad_norm": 0.21451991595145756, "learning_rate": 0.0002751438672941709, "loss": 2.946467399597168, "step": 7731, "token_acc": 0.30592124959323136 }, { "epoch": 4.532395192025799, "grad_norm": 0.22673265597390274, "learning_rate": 0.00027513585151416704, "loss": 3.014478921890259, "step": 7732, "token_acc": 0.2957839856160133 }, { "epoch": 4.532981530343008, "grad_norm": 0.22382510372097542, "learning_rate": 0.0002751278345586805, "loss": 2.970081329345703, "step": 7733, "token_acc": 0.3036868260813577 }, { "epoch": 4.533567868660217, "grad_norm": 0.2098061907932322, "learning_rate": 0.00027511981642778653, "loss": 2.9808945655822754, "step": 7734, "token_acc": 0.30107383949748484 }, { "epoch": 4.534154206977426, "grad_norm": 0.22543071239948761, "learning_rate": 0.0002751117971215606, "loss": 2.9669575691223145, "step": 7735, "token_acc": 0.30284533809295716 }, { "epoch": 4.534740545294635, "grad_norm": 0.2211636838214423, "learning_rate": 0.0002751037766400778, "loss": 2.990981340408325, "step": 7736, "token_acc": 0.2997858211445115 }, { "epoch": 4.535326883611844, "grad_norm": 0.23426176027712597, "learning_rate": 0.0002750957549834136, "loss": 3.0035042762756348, "step": 7737, "token_acc": 0.29749086529733487 }, { "epoch": 4.535913221929053, "grad_norm": 0.22318727840354854, "learning_rate": 0.0002750877321516434, "loss": 2.9733495712280273, "step": 7738, "token_acc": 0.30123730049250746 }, { "epoch": 4.536499560246262, "grad_norm": 0.23396894850914174, "learning_rate": 0.0002750797081448425, "loss": 2.9857406616210938, "step": 7739, "token_acc": 0.3015606120550465 }, { "epoch": 4.537085898563471, "grad_norm": 0.20553700772355055, "learning_rate": 0.00027507168296308625, "loss": 2.931966781616211, "step": 7740, "token_acc": 0.3084519700423315 }, { "epoch": 4.53767223688068, "grad_norm": 0.24434709846180402, "learning_rate": 0.00027506365660645007, "loss": 2.9528136253356934, "step": 7741, "token_acc": 0.3074063909037444 }, { "epoch": 4.538258575197889, "grad_norm": 0.2422797190611248, "learning_rate": 0.00027505562907500944, "loss": 3.0136547088623047, "step": 7742, "token_acc": 0.2971237185491383 }, { "epoch": 4.538844913515098, "grad_norm": 0.22278820537780417, "learning_rate": 0.00027504760036883966, "loss": 2.9406051635742188, "step": 7743, "token_acc": 0.3065464739185401 }, { "epoch": 4.5394312518323074, "grad_norm": 0.2305967651309063, "learning_rate": 0.00027503957048801613, "loss": 3.0179290771484375, "step": 7744, "token_acc": 0.2971057833645902 }, { "epoch": 4.540017590149517, "grad_norm": 0.24872656815490563, "learning_rate": 0.0002750315394326144, "loss": 3.0021023750305176, "step": 7745, "token_acc": 0.2972013891068085 }, { "epoch": 4.540603928466726, "grad_norm": 0.20310344137934316, "learning_rate": 0.0002750235072027098, "loss": 2.9927072525024414, "step": 7746, "token_acc": 0.2999105996154992 }, { "epoch": 4.541190266783934, "grad_norm": 0.25291785059097727, "learning_rate": 0.00027501547379837785, "loss": 3.0082972049713135, "step": 7747, "token_acc": 0.29722700295590837 }, { "epoch": 4.541776605101143, "grad_norm": 0.24245550744996894, "learning_rate": 0.00027500743921969395, "loss": 3.0299525260925293, "step": 7748, "token_acc": 0.2947659151085473 }, { "epoch": 4.542362943418352, "grad_norm": 0.22409731597927587, "learning_rate": 0.0002749994034667336, "loss": 2.998142957687378, "step": 7749, "token_acc": 0.29922359307530383 }, { "epoch": 4.542949281735561, "grad_norm": 0.23226915357677802, "learning_rate": 0.00027499136653957233, "loss": 2.968794345855713, "step": 7750, "token_acc": 0.3026495576437181 }, { "epoch": 4.54353562005277, "grad_norm": 0.2436078436062602, "learning_rate": 0.0002749833284382856, "loss": 2.9623961448669434, "step": 7751, "token_acc": 0.30542321649136533 }, { "epoch": 4.5441219583699795, "grad_norm": 0.2295361359441344, "learning_rate": 0.0002749752891629489, "loss": 2.9902424812316895, "step": 7752, "token_acc": 0.30026955167368513 }, { "epoch": 4.544708296687189, "grad_norm": 0.20533540148340074, "learning_rate": 0.0002749672487136377, "loss": 2.9729013442993164, "step": 7753, "token_acc": 0.3029422121466814 }, { "epoch": 4.545294635004398, "grad_norm": 0.22786810106419103, "learning_rate": 0.00027495920709042773, "loss": 2.9648854732513428, "step": 7754, "token_acc": 0.3034954152235937 }, { "epoch": 4.545880973321607, "grad_norm": 0.21769783050452754, "learning_rate": 0.0002749511642933943, "loss": 2.9705238342285156, "step": 7755, "token_acc": 0.30420145080772854 }, { "epoch": 4.546467311638816, "grad_norm": 0.2210949243093908, "learning_rate": 0.0002749431203226131, "loss": 2.9676127433776855, "step": 7756, "token_acc": 0.302537440831846 }, { "epoch": 4.547053649956025, "grad_norm": 0.20198459361942897, "learning_rate": 0.0002749350751781596, "loss": 2.946559429168701, "step": 7757, "token_acc": 0.30534744592728946 }, { "epoch": 4.547639988273234, "grad_norm": 0.2163074124529526, "learning_rate": 0.0002749270288601094, "loss": 2.9771487712860107, "step": 7758, "token_acc": 0.30121539087406574 }, { "epoch": 4.548226326590442, "grad_norm": 0.20295629311021735, "learning_rate": 0.00027491898136853816, "loss": 2.948281764984131, "step": 7759, "token_acc": 0.30665099941879564 }, { "epoch": 4.5488126649076515, "grad_norm": 0.22402314450241137, "learning_rate": 0.0002749109327035214, "loss": 2.942823886871338, "step": 7760, "token_acc": 0.30771558807366484 }, { "epoch": 4.549399003224861, "grad_norm": 0.20186083429482596, "learning_rate": 0.0002749028828651348, "loss": 3.0131897926330566, "step": 7761, "token_acc": 0.29507023819911005 }, { "epoch": 4.54998534154207, "grad_norm": 0.21582200660148892, "learning_rate": 0.0002748948318534539, "loss": 2.982340097427368, "step": 7762, "token_acc": 0.30070976636420343 }, { "epoch": 4.550571679859279, "grad_norm": 0.21652509836385608, "learning_rate": 0.0002748867796685543, "loss": 2.9687228202819824, "step": 7763, "token_acc": 0.3027524906683853 }, { "epoch": 4.551158018176488, "grad_norm": 0.2352780849569731, "learning_rate": 0.0002748787263105117, "loss": 2.9721882343292236, "step": 7764, "token_acc": 0.3029697628516561 }, { "epoch": 4.551744356493697, "grad_norm": 0.24564396910040026, "learning_rate": 0.00027487067177940183, "loss": 2.981130361557007, "step": 7765, "token_acc": 0.30254945409758804 }, { "epoch": 4.552330694810906, "grad_norm": 0.2199831596729623, "learning_rate": 0.0002748626160753002, "loss": 3.0031003952026367, "step": 7766, "token_acc": 0.2989423363576164 }, { "epoch": 4.552917033128115, "grad_norm": 0.22875975250252137, "learning_rate": 0.0002748545591982825, "loss": 2.9655323028564453, "step": 7767, "token_acc": 0.3040906316793439 }, { "epoch": 4.5535033714453235, "grad_norm": 0.2474565807113134, "learning_rate": 0.00027484650114842455, "loss": 2.9587340354919434, "step": 7768, "token_acc": 0.30253816163004865 }, { "epoch": 4.554089709762533, "grad_norm": 0.22263978525699793, "learning_rate": 0.0002748384419258019, "loss": 2.975879192352295, "step": 7769, "token_acc": 0.3028028147055948 }, { "epoch": 4.554676048079742, "grad_norm": 0.2270013576657166, "learning_rate": 0.00027483038153049036, "loss": 3.033445358276367, "step": 7770, "token_acc": 0.2938889250602253 }, { "epoch": 4.555262386396951, "grad_norm": 0.23130207950435072, "learning_rate": 0.00027482231996256554, "loss": 2.991196870803833, "step": 7771, "token_acc": 0.29969369273385893 }, { "epoch": 4.55584872471416, "grad_norm": 0.22422879078213484, "learning_rate": 0.0002748142572221032, "loss": 2.9923620223999023, "step": 7772, "token_acc": 0.29871994164858273 }, { "epoch": 4.556435063031369, "grad_norm": 0.2043115435315682, "learning_rate": 0.0002748061933091792, "loss": 2.993715524673462, "step": 7773, "token_acc": 0.2984629132209556 }, { "epoch": 4.557021401348578, "grad_norm": 0.21793277441441922, "learning_rate": 0.0002747981282238691, "loss": 2.9579083919525146, "step": 7774, "token_acc": 0.3042524162045227 }, { "epoch": 4.557607739665787, "grad_norm": 0.20317010800838273, "learning_rate": 0.0002747900619662488, "loss": 2.9364981651306152, "step": 7775, "token_acc": 0.3080165027970621 }, { "epoch": 4.558194077982996, "grad_norm": 0.21498713478707232, "learning_rate": 0.000274781994536394, "loss": 2.983680486679077, "step": 7776, "token_acc": 0.3004606381874646 }, { "epoch": 4.5587804163002055, "grad_norm": 0.21118297123661633, "learning_rate": 0.00027477392593438057, "loss": 2.974483013153076, "step": 7777, "token_acc": 0.30197589523699153 }, { "epoch": 4.559366754617415, "grad_norm": 0.21325495347960452, "learning_rate": 0.0002747658561602842, "loss": 2.9670441150665283, "step": 7778, "token_acc": 0.304626014215563 }, { "epoch": 4.559953092934624, "grad_norm": 0.21489192864117126, "learning_rate": 0.0002747577852141807, "loss": 2.9906210899353027, "step": 7779, "token_acc": 0.29946914930509244 }, { "epoch": 4.560539431251832, "grad_norm": 0.20991353451988473, "learning_rate": 0.00027474971309614596, "loss": 2.950576066970825, "step": 7780, "token_acc": 0.3057473860001996 }, { "epoch": 4.561125769569041, "grad_norm": 0.24948168144597818, "learning_rate": 0.0002747416398062557, "loss": 3.002779722213745, "step": 7781, "token_acc": 0.2974535010940919 }, { "epoch": 4.56171210788625, "grad_norm": 0.2192479419685926, "learning_rate": 0.0002747335653445859, "loss": 3.002641201019287, "step": 7782, "token_acc": 0.2991641638879911 }, { "epoch": 4.562298446203459, "grad_norm": 0.22773612919754305, "learning_rate": 0.00027472548971121236, "loss": 2.986563205718994, "step": 7783, "token_acc": 0.3012086989226814 }, { "epoch": 4.562884784520668, "grad_norm": 0.2258180741571366, "learning_rate": 0.0002747174129062109, "loss": 2.981994867324829, "step": 7784, "token_acc": 0.3014281811559009 }, { "epoch": 4.5634711228378775, "grad_norm": 0.23262902462586132, "learning_rate": 0.00027470933492965735, "loss": 2.9737510681152344, "step": 7785, "token_acc": 0.3018738556030463 }, { "epoch": 4.564057461155087, "grad_norm": 0.22576386510162016, "learning_rate": 0.0002747012557816277, "loss": 2.987853765487671, "step": 7786, "token_acc": 0.29935785595959963 }, { "epoch": 4.564643799472296, "grad_norm": 0.20651870844602707, "learning_rate": 0.00027469317546219773, "loss": 3.0065269470214844, "step": 7787, "token_acc": 0.2967583873014594 }, { "epoch": 4.565230137789505, "grad_norm": 0.2298909664835203, "learning_rate": 0.0002746850939714434, "loss": 2.9651455879211426, "step": 7788, "token_acc": 0.30302198021966886 }, { "epoch": 4.565816476106714, "grad_norm": 0.20922893672470744, "learning_rate": 0.0002746770113094407, "loss": 3.0190906524658203, "step": 7789, "token_acc": 0.29530434970330055 }, { "epoch": 4.566402814423922, "grad_norm": 0.21965201644410878, "learning_rate": 0.00027466892747626543, "loss": 3.0048913955688477, "step": 7790, "token_acc": 0.29941258308857627 }, { "epoch": 4.566989152741131, "grad_norm": 0.21991177847115062, "learning_rate": 0.0002746608424719936, "loss": 2.977902889251709, "step": 7791, "token_acc": 0.3004478754335399 }, { "epoch": 4.56757549105834, "grad_norm": 0.23398431057206012, "learning_rate": 0.00027465275629670117, "loss": 3.042297840118408, "step": 7792, "token_acc": 0.2928192147768667 }, { "epoch": 4.5681618293755495, "grad_norm": 0.2516246091786564, "learning_rate": 0.00027464466895046403, "loss": 2.9790143966674805, "step": 7793, "token_acc": 0.3018321709716621 }, { "epoch": 4.568748167692759, "grad_norm": 0.22211053556914961, "learning_rate": 0.0002746365804333582, "loss": 2.976900339126587, "step": 7794, "token_acc": 0.3006594473799628 }, { "epoch": 4.569334506009968, "grad_norm": 0.2281844933762865, "learning_rate": 0.0002746284907454596, "loss": 2.976524829864502, "step": 7795, "token_acc": 0.30310081728370164 }, { "epoch": 4.569920844327177, "grad_norm": 0.26458863847915837, "learning_rate": 0.00027462039988684434, "loss": 2.9792592525482178, "step": 7796, "token_acc": 0.30117043148742895 }, { "epoch": 4.570507182644386, "grad_norm": 0.21930711607914374, "learning_rate": 0.00027461230785758825, "loss": 2.977689266204834, "step": 7797, "token_acc": 0.3009856958850247 }, { "epoch": 4.571093520961595, "grad_norm": 0.20544541006345346, "learning_rate": 0.00027460421465776754, "loss": 2.944749116897583, "step": 7798, "token_acc": 0.3055804315228257 }, { "epoch": 4.571679859278804, "grad_norm": 0.21353399368099651, "learning_rate": 0.0002745961202874581, "loss": 2.968803882598877, "step": 7799, "token_acc": 0.3019290236309312 }, { "epoch": 4.572266197596013, "grad_norm": 0.20554205299875458, "learning_rate": 0.000274588024746736, "loss": 2.999549627304077, "step": 7800, "token_acc": 0.2986383918062886 }, { "epoch": 4.572852535913222, "grad_norm": 0.22332571881470661, "learning_rate": 0.00027457992803567733, "loss": 2.9907350540161133, "step": 7801, "token_acc": 0.3002282982091513 }, { "epoch": 4.573438874230431, "grad_norm": 0.22779701637785044, "learning_rate": 0.00027457183015435805, "loss": 2.999965190887451, "step": 7802, "token_acc": 0.3001837693247399 }, { "epoch": 4.57402521254764, "grad_norm": 0.20393838001588763, "learning_rate": 0.00027456373110285433, "loss": 3.0043723583221436, "step": 7803, "token_acc": 0.297095678095376 }, { "epoch": 4.574611550864849, "grad_norm": 0.2302243853954627, "learning_rate": 0.0002745556308812422, "loss": 2.962055206298828, "step": 7804, "token_acc": 0.3049126164280602 }, { "epoch": 4.575197889182058, "grad_norm": 0.2170932482851595, "learning_rate": 0.00027454752948959777, "loss": 2.9923315048217773, "step": 7805, "token_acc": 0.29908891473897053 }, { "epoch": 4.575784227499267, "grad_norm": 0.23564919019023953, "learning_rate": 0.00027453942692799715, "loss": 2.9985883235931396, "step": 7806, "token_acc": 0.29944643215231825 }, { "epoch": 4.576370565816476, "grad_norm": 0.24257634449575938, "learning_rate": 0.0002745313231965163, "loss": 3.0147128105163574, "step": 7807, "token_acc": 0.2967742421125001 }, { "epoch": 4.576956904133685, "grad_norm": 0.23273475083922707, "learning_rate": 0.00027452321829523165, "loss": 2.9455080032348633, "step": 7808, "token_acc": 0.30826714061111493 }, { "epoch": 4.577543242450894, "grad_norm": 0.24325756814840865, "learning_rate": 0.0002745151122242191, "loss": 3.002279758453369, "step": 7809, "token_acc": 0.29774522521382396 }, { "epoch": 4.5781295807681035, "grad_norm": 0.22962941851202368, "learning_rate": 0.0002745070049835548, "loss": 2.9595866203308105, "step": 7810, "token_acc": 0.30415173015349595 }, { "epoch": 4.578715919085313, "grad_norm": 0.23537222181844633, "learning_rate": 0.000274498896573315, "loss": 2.9786906242370605, "step": 7811, "token_acc": 0.30180579990384565 }, { "epoch": 4.579302257402521, "grad_norm": 0.23127312028453179, "learning_rate": 0.0002744907869935759, "loss": 2.956346273422241, "step": 7812, "token_acc": 0.30532438847752746 }, { "epoch": 4.57988859571973, "grad_norm": 0.21239023140471866, "learning_rate": 0.00027448267624441354, "loss": 2.9552979469299316, "step": 7813, "token_acc": 0.30299301755048125 }, { "epoch": 4.580474934036939, "grad_norm": 0.20728185143972744, "learning_rate": 0.0002744745643259042, "loss": 2.947542667388916, "step": 7814, "token_acc": 0.3054927718299235 }, { "epoch": 4.581061272354148, "grad_norm": 0.20450771141946977, "learning_rate": 0.000274466451238124, "loss": 3.003728151321411, "step": 7815, "token_acc": 0.298947644891393 }, { "epoch": 4.581647610671357, "grad_norm": 0.2020427747403104, "learning_rate": 0.0002744583369811493, "loss": 2.9895215034484863, "step": 7816, "token_acc": 0.30023551039803076 }, { "epoch": 4.582233948988566, "grad_norm": 0.22090675935548498, "learning_rate": 0.0002744502215550562, "loss": 3.0046422481536865, "step": 7817, "token_acc": 0.29893823722901336 }, { "epoch": 4.5828202873057755, "grad_norm": 0.19714614916653708, "learning_rate": 0.000274442104959921, "loss": 2.9525949954986572, "step": 7818, "token_acc": 0.3044728848375575 }, { "epoch": 4.583406625622985, "grad_norm": 0.23385437967076847, "learning_rate": 0.00027443398719581986, "loss": 2.9863271713256836, "step": 7819, "token_acc": 0.30175533194302373 }, { "epoch": 4.583992963940194, "grad_norm": 0.2028404990582151, "learning_rate": 0.00027442586826282906, "loss": 2.977673292160034, "step": 7820, "token_acc": 0.3018352425927726 }, { "epoch": 4.584579302257403, "grad_norm": 0.21577757454060997, "learning_rate": 0.00027441774816102494, "loss": 2.9683423042297363, "step": 7821, "token_acc": 0.3039007417369687 }, { "epoch": 4.585165640574612, "grad_norm": 0.19794335142791233, "learning_rate": 0.00027440962689048373, "loss": 2.999927520751953, "step": 7822, "token_acc": 0.2981070087609512 }, { "epoch": 4.585751978891821, "grad_norm": 0.22134803873022552, "learning_rate": 0.0002744015044512817, "loss": 2.9816393852233887, "step": 7823, "token_acc": 0.3028717977337125 }, { "epoch": 4.586338317209029, "grad_norm": 0.19659378541643652, "learning_rate": 0.0002743933808434952, "loss": 2.963651180267334, "step": 7824, "token_acc": 0.3026955661810434 }, { "epoch": 4.586924655526238, "grad_norm": 0.209383267507122, "learning_rate": 0.00027438525606720047, "loss": 3.0111918449401855, "step": 7825, "token_acc": 0.2971862660990666 }, { "epoch": 4.5875109938434475, "grad_norm": 0.1950207916267565, "learning_rate": 0.0002743771301224739, "loss": 2.9721925258636475, "step": 7826, "token_acc": 0.3031967873188099 }, { "epoch": 4.588097332160657, "grad_norm": 0.20871373709529475, "learning_rate": 0.0002743690030093918, "loss": 2.946967840194702, "step": 7827, "token_acc": 0.3060509212362024 }, { "epoch": 4.588683670477866, "grad_norm": 0.2162716324751471, "learning_rate": 0.0002743608747280305, "loss": 2.990880012512207, "step": 7828, "token_acc": 0.3000012745510394 }, { "epoch": 4.589270008795075, "grad_norm": 0.19548575533432863, "learning_rate": 0.00027435274527846633, "loss": 2.9647111892700195, "step": 7829, "token_acc": 0.303836109214473 }, { "epoch": 4.589856347112284, "grad_norm": 0.2299900436009014, "learning_rate": 0.0002743446146607757, "loss": 3.017063617706299, "step": 7830, "token_acc": 0.2952265225616874 }, { "epoch": 4.590442685429493, "grad_norm": 0.23810486831795422, "learning_rate": 0.000274336482875035, "loss": 2.9440646171569824, "step": 7831, "token_acc": 0.30581723122863697 }, { "epoch": 4.591029023746702, "grad_norm": 0.22189724423697985, "learning_rate": 0.0002743283499213206, "loss": 3.0022687911987305, "step": 7832, "token_acc": 0.2969015661678969 }, { "epoch": 4.59161536206391, "grad_norm": 0.24395286891682708, "learning_rate": 0.0002743202157997088, "loss": 2.932993173599243, "step": 7833, "token_acc": 0.30905270811391666 }, { "epoch": 4.5922017003811195, "grad_norm": 0.21818236462779567, "learning_rate": 0.00027431208051027615, "loss": 2.9698147773742676, "step": 7834, "token_acc": 0.303368093623314 }, { "epoch": 4.592788038698329, "grad_norm": 0.2403800737000575, "learning_rate": 0.00027430394405309903, "loss": 3.0163397789001465, "step": 7835, "token_acc": 0.29629178227834974 }, { "epoch": 4.593374377015538, "grad_norm": 0.22888299323431083, "learning_rate": 0.0002742958064282539, "loss": 2.987128257751465, "step": 7836, "token_acc": 0.30058310432624075 }, { "epoch": 4.593960715332747, "grad_norm": 0.22980685300617568, "learning_rate": 0.00027428766763581703, "loss": 2.986037254333496, "step": 7837, "token_acc": 0.3008650742414461 }, { "epoch": 4.594547053649956, "grad_norm": 0.21490515447494501, "learning_rate": 0.00027427952767586513, "loss": 2.965238571166992, "step": 7838, "token_acc": 0.3030383226917098 }, { "epoch": 4.595133391967165, "grad_norm": 0.22003182675776053, "learning_rate": 0.00027427138654847447, "loss": 3.0021140575408936, "step": 7839, "token_acc": 0.29780187003151304 }, { "epoch": 4.595719730284374, "grad_norm": 0.22369193224733933, "learning_rate": 0.0002742632442537216, "loss": 2.9564208984375, "step": 7840, "token_acc": 0.3048269680773863 }, { "epoch": 4.596306068601583, "grad_norm": 0.22286984454284345, "learning_rate": 0.000274255100791683, "loss": 2.9980783462524414, "step": 7841, "token_acc": 0.2984844592858978 }, { "epoch": 4.596892406918792, "grad_norm": 0.22505341687528505, "learning_rate": 0.00027424695616243516, "loss": 2.9832797050476074, "step": 7842, "token_acc": 0.30115075667917307 }, { "epoch": 4.5974787452360015, "grad_norm": 0.22150295354680086, "learning_rate": 0.0002742388103660546, "loss": 2.9829509258270264, "step": 7843, "token_acc": 0.30089812363125806 }, { "epoch": 4.598065083553211, "grad_norm": 0.22994982850211107, "learning_rate": 0.0002742306634026178, "loss": 2.9777042865753174, "step": 7844, "token_acc": 0.30211073330339383 }, { "epoch": 4.598651421870419, "grad_norm": 0.21963795006056303, "learning_rate": 0.0002742225152722014, "loss": 2.991011619567871, "step": 7845, "token_acc": 0.2991498148138165 }, { "epoch": 4.599237760187628, "grad_norm": 0.22300525072524863, "learning_rate": 0.0002742143659748818, "loss": 2.991039752960205, "step": 7846, "token_acc": 0.2993824318030518 }, { "epoch": 4.599824098504837, "grad_norm": 0.21211461369094334, "learning_rate": 0.0002742062155107356, "loss": 2.9675590991973877, "step": 7847, "token_acc": 0.3024256405427743 }, { "epoch": 4.600410436822046, "grad_norm": 0.22508377535817234, "learning_rate": 0.0002741980638798394, "loss": 3.0370826721191406, "step": 7848, "token_acc": 0.29364020910853994 }, { "epoch": 4.600996775139255, "grad_norm": 0.23910878013870746, "learning_rate": 0.00027418991108226973, "loss": 2.947202205657959, "step": 7849, "token_acc": 0.3061959559815481 }, { "epoch": 4.601583113456464, "grad_norm": 0.21117266750087316, "learning_rate": 0.00027418175711810326, "loss": 2.983290195465088, "step": 7850, "token_acc": 0.30187040606663984 }, { "epoch": 4.6021694517736735, "grad_norm": 0.21699359328477968, "learning_rate": 0.0002741736019874164, "loss": 2.9855363368988037, "step": 7851, "token_acc": 0.30096359688075935 }, { "epoch": 4.602755790090883, "grad_norm": 0.2146938981295693, "learning_rate": 0.000274165445690286, "loss": 3.001988172531128, "step": 7852, "token_acc": 0.2992727075851365 }, { "epoch": 4.603342128408092, "grad_norm": 0.22883815058911136, "learning_rate": 0.0002741572882267885, "loss": 2.9758567810058594, "step": 7853, "token_acc": 0.30150825123407066 }, { "epoch": 4.603928466725301, "grad_norm": 0.24656383987698396, "learning_rate": 0.00027414912959700056, "loss": 3.012301445007324, "step": 7854, "token_acc": 0.29631137694262843 }, { "epoch": 4.604514805042509, "grad_norm": 0.23331935213824062, "learning_rate": 0.00027414096980099887, "loss": 3.013822078704834, "step": 7855, "token_acc": 0.29604402180276046 }, { "epoch": 4.605101143359718, "grad_norm": 0.22829206542609534, "learning_rate": 0.00027413280883886, "loss": 2.953033924102783, "step": 7856, "token_acc": 0.30484906140012946 }, { "epoch": 4.605687481676927, "grad_norm": 0.21345169469453384, "learning_rate": 0.0002741246467106607, "loss": 2.9741950035095215, "step": 7857, "token_acc": 0.30215075610251646 }, { "epoch": 4.606273819994136, "grad_norm": 0.2383022616883737, "learning_rate": 0.00027411648341647767, "loss": 3.0167148113250732, "step": 7858, "token_acc": 0.29593030717084257 }, { "epoch": 4.6068601583113455, "grad_norm": 0.23026527222383206, "learning_rate": 0.0002741083189563874, "loss": 2.9817028045654297, "step": 7859, "token_acc": 0.2992523235306916 }, { "epoch": 4.607446496628555, "grad_norm": 0.23633052001149207, "learning_rate": 0.0002741001533304668, "loss": 2.970470666885376, "step": 7860, "token_acc": 0.30451676073886613 }, { "epoch": 4.608032834945764, "grad_norm": 0.21282532546358718, "learning_rate": 0.0002740919865387924, "loss": 3.0054333209991455, "step": 7861, "token_acc": 0.29650967220439906 }, { "epoch": 4.608619173262973, "grad_norm": 0.22498247755838438, "learning_rate": 0.0002740838185814411, "loss": 2.9955697059631348, "step": 7862, "token_acc": 0.29896585852445856 }, { "epoch": 4.609205511580182, "grad_norm": 0.20499326123338033, "learning_rate": 0.0002740756494584895, "loss": 2.973649501800537, "step": 7863, "token_acc": 0.3024995540789134 }, { "epoch": 4.609791849897391, "grad_norm": 0.22900751991262022, "learning_rate": 0.00027406747917001434, "loss": 2.984330654144287, "step": 7864, "token_acc": 0.300936870240607 }, { "epoch": 4.6103781882146, "grad_norm": 0.20986312679480085, "learning_rate": 0.00027405930771609246, "loss": 2.9691944122314453, "step": 7865, "token_acc": 0.3019572376049348 }, { "epoch": 4.610964526531809, "grad_norm": 0.23222456593172516, "learning_rate": 0.0002740511350968005, "loss": 2.9644699096679688, "step": 7866, "token_acc": 0.3032042484031838 }, { "epoch": 4.6115508648490176, "grad_norm": 0.21228348282596368, "learning_rate": 0.00027404296131221527, "loss": 3.017974615097046, "step": 7867, "token_acc": 0.2977157536624314 }, { "epoch": 4.612137203166227, "grad_norm": 0.22945964433191268, "learning_rate": 0.0002740347863624136, "loss": 3.0059428215026855, "step": 7868, "token_acc": 0.2987433574001389 }, { "epoch": 4.612723541483436, "grad_norm": 0.22122796979398598, "learning_rate": 0.00027402661024747226, "loss": 3.040830612182617, "step": 7869, "token_acc": 0.29267261449207105 }, { "epoch": 4.613309879800645, "grad_norm": 0.23375406089898634, "learning_rate": 0.00027401843296746804, "loss": 3.0091423988342285, "step": 7870, "token_acc": 0.2988432716795047 }, { "epoch": 4.613896218117854, "grad_norm": 0.22120853512211594, "learning_rate": 0.00027401025452247773, "loss": 2.9589972496032715, "step": 7871, "token_acc": 0.30538537459814985 }, { "epoch": 4.614482556435063, "grad_norm": 0.22418732022182805, "learning_rate": 0.00027400207491257817, "loss": 3.016993522644043, "step": 7872, "token_acc": 0.29486604348370404 }, { "epoch": 4.615068894752272, "grad_norm": 0.2070957040775095, "learning_rate": 0.0002739938941378463, "loss": 2.969886302947998, "step": 7873, "token_acc": 0.30299891919544003 }, { "epoch": 4.615655233069481, "grad_norm": 0.21857763810218245, "learning_rate": 0.00027398571219835885, "loss": 2.961629629135132, "step": 7874, "token_acc": 0.3047949306854805 }, { "epoch": 4.6162415713866904, "grad_norm": 0.21318996984907124, "learning_rate": 0.0002739775290941927, "loss": 2.974924087524414, "step": 7875, "token_acc": 0.30210956168652 }, { "epoch": 4.616827909703899, "grad_norm": 0.22567384349998743, "learning_rate": 0.00027396934482542466, "loss": 3.0118229389190674, "step": 7876, "token_acc": 0.29692728099141874 }, { "epoch": 4.617414248021108, "grad_norm": 0.21527238662508338, "learning_rate": 0.00027396115939213174, "loss": 2.9188170433044434, "step": 7877, "token_acc": 0.3106964274546804 }, { "epoch": 4.618000586338317, "grad_norm": 0.23920514899739181, "learning_rate": 0.0002739529727943907, "loss": 2.9834771156311035, "step": 7878, "token_acc": 0.2996421094580921 }, { "epoch": 4.618586924655526, "grad_norm": 0.22459202430526826, "learning_rate": 0.0002739447850322786, "loss": 2.9495389461517334, "step": 7879, "token_acc": 0.3059905696838525 }, { "epoch": 4.619173262972735, "grad_norm": 0.22470841974248387, "learning_rate": 0.0002739365961058722, "loss": 2.9653334617614746, "step": 7880, "token_acc": 0.30326915253885955 }, { "epoch": 4.619759601289944, "grad_norm": 0.22028763771626472, "learning_rate": 0.00027392840601524855, "loss": 3.022495985031128, "step": 7881, "token_acc": 0.2943040178260567 }, { "epoch": 4.620345939607153, "grad_norm": 0.21960757515597665, "learning_rate": 0.00027392021476048444, "loss": 2.989363193511963, "step": 7882, "token_acc": 0.29975820111977447 }, { "epoch": 4.6209322779243625, "grad_norm": 0.2378688109572106, "learning_rate": 0.00027391202234165697, "loss": 3.0106472969055176, "step": 7883, "token_acc": 0.2968451130871985 }, { "epoch": 4.621518616241572, "grad_norm": 0.21493326076900965, "learning_rate": 0.00027390382875884295, "loss": 3.0061895847320557, "step": 7884, "token_acc": 0.29759498317212 }, { "epoch": 4.622104954558781, "grad_norm": 0.23176526345190887, "learning_rate": 0.0002738956340121195, "loss": 3.021026372909546, "step": 7885, "token_acc": 0.29582684932095926 }, { "epoch": 4.62269129287599, "grad_norm": 0.24239896785230347, "learning_rate": 0.00027388743810156344, "loss": 2.9831275939941406, "step": 7886, "token_acc": 0.3007713109622071 }, { "epoch": 4.623277631193199, "grad_norm": 0.21538246335314448, "learning_rate": 0.00027387924102725196, "loss": 3.01284122467041, "step": 7887, "token_acc": 0.2968251691721971 }, { "epoch": 4.623863969510407, "grad_norm": 0.24420999795030499, "learning_rate": 0.0002738710427892618, "loss": 2.9943602085113525, "step": 7888, "token_acc": 0.2996280479404877 }, { "epoch": 4.624450307827616, "grad_norm": 0.24687625489877257, "learning_rate": 0.0002738628433876702, "loss": 3.004110336303711, "step": 7889, "token_acc": 0.2974887642556018 }, { "epoch": 4.625036646144825, "grad_norm": 0.25262657511549774, "learning_rate": 0.00027385464282255405, "loss": 2.990384101867676, "step": 7890, "token_acc": 0.2998030751574747 }, { "epoch": 4.6256229844620345, "grad_norm": 0.22527591476755743, "learning_rate": 0.00027384644109399044, "loss": 2.985879898071289, "step": 7891, "token_acc": 0.3001405297946894 }, { "epoch": 4.626209322779244, "grad_norm": 0.24057581210767137, "learning_rate": 0.00027383823820205643, "loss": 2.9474940299987793, "step": 7892, "token_acc": 0.3074963470494558 }, { "epoch": 4.626795661096453, "grad_norm": 0.21314238849281605, "learning_rate": 0.000273830034146829, "loss": 2.9497787952423096, "step": 7893, "token_acc": 0.3065112751698627 }, { "epoch": 4.627381999413662, "grad_norm": 0.23193421693615607, "learning_rate": 0.0002738218289283853, "loss": 2.974104404449463, "step": 7894, "token_acc": 0.3021748645348882 }, { "epoch": 4.627968337730871, "grad_norm": 0.226901147187191, "learning_rate": 0.00027381362254680233, "loss": 2.9980640411376953, "step": 7895, "token_acc": 0.2986349437948254 }, { "epoch": 4.62855467604808, "grad_norm": 0.20780275887493488, "learning_rate": 0.00027380541500215727, "loss": 2.960134983062744, "step": 7896, "token_acc": 0.3037701631653457 }, { "epoch": 4.629141014365289, "grad_norm": 0.2253003167627265, "learning_rate": 0.00027379720629452714, "loss": 2.9830193519592285, "step": 7897, "token_acc": 0.3016951504606155 }, { "epoch": 4.629727352682497, "grad_norm": 0.1984753509504225, "learning_rate": 0.000273788996423989, "loss": 2.9896974563598633, "step": 7898, "token_acc": 0.29836876150619196 }, { "epoch": 4.6303136909997065, "grad_norm": 0.2248781754928955, "learning_rate": 0.00027378078539062016, "loss": 3.002549171447754, "step": 7899, "token_acc": 0.29749584886501107 }, { "epoch": 4.630900029316916, "grad_norm": 0.20558541366780608, "learning_rate": 0.0002737725731944976, "loss": 2.9890408515930176, "step": 7900, "token_acc": 0.30151504522270983 }, { "epoch": 4.631486367634125, "grad_norm": 0.22522114441090846, "learning_rate": 0.00027376435983569847, "loss": 3.009610652923584, "step": 7901, "token_acc": 0.29667160673490023 }, { "epoch": 4.632072705951334, "grad_norm": 0.2483450278872476, "learning_rate": 0.00027375614531430003, "loss": 2.9677791595458984, "step": 7902, "token_acc": 0.30406625075212973 }, { "epoch": 4.632659044268543, "grad_norm": 0.20770817554320636, "learning_rate": 0.0002737479296303793, "loss": 2.9650983810424805, "step": 7903, "token_acc": 0.30539615647049906 }, { "epoch": 4.633245382585752, "grad_norm": 0.22437358006782393, "learning_rate": 0.00027373971278401356, "loss": 2.9858570098876953, "step": 7904, "token_acc": 0.2999925552902639 }, { "epoch": 4.633831720902961, "grad_norm": 0.23581521024405372, "learning_rate": 0.0002737314947752799, "loss": 2.984874963760376, "step": 7905, "token_acc": 0.30129264062112826 }, { "epoch": 4.63441805922017, "grad_norm": 0.22119730797346063, "learning_rate": 0.00027372327560425564, "loss": 2.9724478721618652, "step": 7906, "token_acc": 0.30100478985846507 }, { "epoch": 4.635004397537379, "grad_norm": 0.21372731126876, "learning_rate": 0.0002737150552710179, "loss": 2.968052387237549, "step": 7907, "token_acc": 0.3032832130565398 }, { "epoch": 4.6355907358545885, "grad_norm": 0.19528028068683326, "learning_rate": 0.00027370683377564393, "loss": 2.9111578464508057, "step": 7908, "token_acc": 0.309517366039971 }, { "epoch": 4.636177074171798, "grad_norm": 0.1936590538996269, "learning_rate": 0.00027369861111821095, "loss": 3.0161054134368896, "step": 7909, "token_acc": 0.2974640957733135 }, { "epoch": 4.636763412489006, "grad_norm": 0.21097598415478577, "learning_rate": 0.0002736903872987962, "loss": 2.970736503601074, "step": 7910, "token_acc": 0.3037290270583306 }, { "epoch": 4.637349750806215, "grad_norm": 0.19020597660761807, "learning_rate": 0.0002736821623174769, "loss": 2.9615070819854736, "step": 7911, "token_acc": 0.30549114643580577 }, { "epoch": 4.637936089123424, "grad_norm": 0.2299758337699185, "learning_rate": 0.00027367393617433043, "loss": 2.943321704864502, "step": 7912, "token_acc": 0.30642324286852424 }, { "epoch": 4.638522427440633, "grad_norm": 0.2274961769825738, "learning_rate": 0.00027366570886943394, "loss": 3.015221118927002, "step": 7913, "token_acc": 0.2953714123126533 }, { "epoch": 4.639108765757842, "grad_norm": 0.23127847267667376, "learning_rate": 0.0002736574804028648, "loss": 2.962968111038208, "step": 7914, "token_acc": 0.3029647328294563 }, { "epoch": 4.639695104075051, "grad_norm": 0.2204878669443697, "learning_rate": 0.0002736492507747002, "loss": 2.976640224456787, "step": 7915, "token_acc": 0.3010581551007061 }, { "epoch": 4.6402814423922605, "grad_norm": 0.21261445088989553, "learning_rate": 0.0002736410199850175, "loss": 2.960146188735962, "step": 7916, "token_acc": 0.30429978704190075 }, { "epoch": 4.64086778070947, "grad_norm": 0.21122696563166804, "learning_rate": 0.0002736327880338941, "loss": 2.986783742904663, "step": 7917, "token_acc": 0.30071990408447447 }, { "epoch": 4.641454119026679, "grad_norm": 0.21122985068569677, "learning_rate": 0.0002736245549214072, "loss": 2.9786078929901123, "step": 7918, "token_acc": 0.3016138576014159 }, { "epoch": 4.642040457343887, "grad_norm": 0.20958136558663987, "learning_rate": 0.0002736163206476342, "loss": 2.9917502403259277, "step": 7919, "token_acc": 0.29752297441880543 }, { "epoch": 4.642626795661096, "grad_norm": 0.20527415382194056, "learning_rate": 0.0002736080852126524, "loss": 3.037907123565674, "step": 7920, "token_acc": 0.2923290196287523 }, { "epoch": 4.643213133978305, "grad_norm": 0.20112905435383452, "learning_rate": 0.0002735998486165393, "loss": 2.9846224784851074, "step": 7921, "token_acc": 0.30134834610106115 }, { "epoch": 4.643799472295514, "grad_norm": 0.2170908996669939, "learning_rate": 0.0002735916108593721, "loss": 3.0106139183044434, "step": 7922, "token_acc": 0.29814833785791117 }, { "epoch": 4.644385810612723, "grad_norm": 0.21355784849917878, "learning_rate": 0.0002735833719412283, "loss": 2.976480722427368, "step": 7923, "token_acc": 0.30046614314936987 }, { "epoch": 4.6449721489299325, "grad_norm": 0.2132970589056935, "learning_rate": 0.0002735751318621852, "loss": 2.979160785675049, "step": 7924, "token_acc": 0.30112525547292834 }, { "epoch": 4.645558487247142, "grad_norm": 0.21953148166551797, "learning_rate": 0.00027356689062232035, "loss": 3.025585651397705, "step": 7925, "token_acc": 0.2943515866173485 }, { "epoch": 4.646144825564351, "grad_norm": 0.21988834165050689, "learning_rate": 0.000273558648221711, "loss": 3.0060176849365234, "step": 7926, "token_acc": 0.2976501171345198 }, { "epoch": 4.64673116388156, "grad_norm": 0.2297652147532245, "learning_rate": 0.00027355040466043467, "loss": 3.019186019897461, "step": 7927, "token_acc": 0.2949126426575435 }, { "epoch": 4.647317502198769, "grad_norm": 0.20581447729772764, "learning_rate": 0.00027354215993856873, "loss": 2.9856696128845215, "step": 7928, "token_acc": 0.2995927496247008 }, { "epoch": 4.647903840515978, "grad_norm": 0.23359021341170738, "learning_rate": 0.0002735339140561907, "loss": 2.971479892730713, "step": 7929, "token_acc": 0.3018986868395465 }, { "epoch": 4.648490178833187, "grad_norm": 0.2509271772205493, "learning_rate": 0.000273525667013378, "loss": 3.0536880493164062, "step": 7930, "token_acc": 0.2915995235759826 }, { "epoch": 4.649076517150396, "grad_norm": 0.2156042270930878, "learning_rate": 0.0002735174188102081, "loss": 3.0490574836730957, "step": 7931, "token_acc": 0.2927147027032672 }, { "epoch": 4.6496628554676045, "grad_norm": 0.25694675450277366, "learning_rate": 0.00027350916944675856, "loss": 2.9955108165740967, "step": 7932, "token_acc": 0.29926049778633196 }, { "epoch": 4.650249193784814, "grad_norm": 0.2500522912641065, "learning_rate": 0.00027350091892310675, "loss": 2.9811477661132812, "step": 7933, "token_acc": 0.30134318351573 }, { "epoch": 4.650835532102023, "grad_norm": 0.21620605054380906, "learning_rate": 0.0002734926672393302, "loss": 2.9665353298187256, "step": 7934, "token_acc": 0.30272887023353917 }, { "epoch": 4.651421870419232, "grad_norm": 0.23371372627505071, "learning_rate": 0.0002734844143955065, "loss": 2.954139232635498, "step": 7935, "token_acc": 0.30548425493323833 }, { "epoch": 4.652008208736441, "grad_norm": 0.21608538247003312, "learning_rate": 0.00027347616039171313, "loss": 2.9832916259765625, "step": 7936, "token_acc": 0.29999949384259517 }, { "epoch": 4.65259454705365, "grad_norm": 0.2507746314893533, "learning_rate": 0.00027346790522802763, "loss": 3.0002052783966064, "step": 7937, "token_acc": 0.29810756683781287 }, { "epoch": 4.653180885370859, "grad_norm": 0.2508412783959582, "learning_rate": 0.0002734596489045275, "loss": 2.979111909866333, "step": 7938, "token_acc": 0.3020699270699271 }, { "epoch": 4.653767223688068, "grad_norm": 0.2129941666855129, "learning_rate": 0.00027345139142129037, "loss": 2.970345973968506, "step": 7939, "token_acc": 0.3032420577998242 }, { "epoch": 4.654353562005277, "grad_norm": 0.2618629602286676, "learning_rate": 0.0002734431327783937, "loss": 3.0187361240386963, "step": 7940, "token_acc": 0.2961208763800097 }, { "epoch": 4.654939900322486, "grad_norm": 0.22971736973230863, "learning_rate": 0.0002734348729759152, "loss": 2.982077121734619, "step": 7941, "token_acc": 0.3004714113544011 }, { "epoch": 4.655526238639695, "grad_norm": 0.25889329874658945, "learning_rate": 0.0002734266120139323, "loss": 3.020895004272461, "step": 7942, "token_acc": 0.2934063645679777 }, { "epoch": 4.656112576956904, "grad_norm": 0.2701735401434714, "learning_rate": 0.0002734183498925228, "loss": 2.9888343811035156, "step": 7943, "token_acc": 0.29952741616362744 }, { "epoch": 4.656698915274113, "grad_norm": 0.21434114132529647, "learning_rate": 0.00027341008661176423, "loss": 2.9973511695861816, "step": 7944, "token_acc": 0.30127821679312405 }, { "epoch": 4.657285253591322, "grad_norm": 0.22631337654248745, "learning_rate": 0.0002734018221717341, "loss": 3.0231449604034424, "step": 7945, "token_acc": 0.2957242603153477 }, { "epoch": 4.657871591908531, "grad_norm": 0.2161885111566927, "learning_rate": 0.0002733935565725102, "loss": 2.9814815521240234, "step": 7946, "token_acc": 0.3022498900792502 }, { "epoch": 4.65845793022574, "grad_norm": 0.22383341434018186, "learning_rate": 0.0002733852898141701, "loss": 2.96597957611084, "step": 7947, "token_acc": 0.30511503644333776 }, { "epoch": 4.659044268542949, "grad_norm": 0.2187987145010744, "learning_rate": 0.00027337702189679156, "loss": 2.993025302886963, "step": 7948, "token_acc": 0.2987186212459504 }, { "epoch": 4.6596306068601585, "grad_norm": 0.24051137408879936, "learning_rate": 0.000273368752820452, "loss": 2.995567798614502, "step": 7949, "token_acc": 0.2980003599195658 }, { "epoch": 4.660216945177368, "grad_norm": 0.20882775280634566, "learning_rate": 0.0002733604825852293, "loss": 2.988506555557251, "step": 7950, "token_acc": 0.3006173147464966 }, { "epoch": 4.660803283494577, "grad_norm": 0.22230268418020196, "learning_rate": 0.0002733522111912011, "loss": 3.039658546447754, "step": 7951, "token_acc": 0.29181438440529106 }, { "epoch": 4.661389621811786, "grad_norm": 0.2101843486862066, "learning_rate": 0.00027334393863844513, "loss": 2.9997262954711914, "step": 7952, "token_acc": 0.29881092594584246 }, { "epoch": 4.661975960128994, "grad_norm": 0.22514878326196197, "learning_rate": 0.00027333566492703903, "loss": 2.9887073040008545, "step": 7953, "token_acc": 0.30017749455846193 }, { "epoch": 4.662562298446203, "grad_norm": 0.19654375651760705, "learning_rate": 0.00027332739005706056, "loss": 2.9747958183288574, "step": 7954, "token_acc": 0.30117640845535326 }, { "epoch": 4.663148636763412, "grad_norm": 0.21416790770461058, "learning_rate": 0.0002733191140285874, "loss": 2.9822826385498047, "step": 7955, "token_acc": 0.30093858641569476 }, { "epoch": 4.663734975080621, "grad_norm": 0.20841534737225642, "learning_rate": 0.0002733108368416974, "loss": 2.995788097381592, "step": 7956, "token_acc": 0.2987968819256259 }, { "epoch": 4.6643213133978305, "grad_norm": 0.22714883304825007, "learning_rate": 0.00027330255849646826, "loss": 2.9606761932373047, "step": 7957, "token_acc": 0.30402009419152276 }, { "epoch": 4.66490765171504, "grad_norm": 0.21841250031513512, "learning_rate": 0.0002732942789929777, "loss": 3.0253326892852783, "step": 7958, "token_acc": 0.2954864765380767 }, { "epoch": 4.665493990032249, "grad_norm": 0.19845677528562855, "learning_rate": 0.0002732859983313035, "loss": 2.9499659538269043, "step": 7959, "token_acc": 0.3043559981803472 }, { "epoch": 4.666080328349458, "grad_norm": 0.21658290055779053, "learning_rate": 0.00027327771651152355, "loss": 2.961183547973633, "step": 7960, "token_acc": 0.305185543145309 }, { "epoch": 4.666666666666667, "grad_norm": 0.21055850794113065, "learning_rate": 0.0002732694335337155, "loss": 2.967160224914551, "step": 7961, "token_acc": 0.3036493274405804 }, { "epoch": 4.667253004983876, "grad_norm": 0.2172114874525312, "learning_rate": 0.0002732611493979573, "loss": 2.9934380054473877, "step": 7962, "token_acc": 0.298989679694574 }, { "epoch": 4.667839343301084, "grad_norm": 0.19295878333119915, "learning_rate": 0.00027325286410432664, "loss": 2.993499279022217, "step": 7963, "token_acc": 0.29988513911768705 }, { "epoch": 4.668425681618293, "grad_norm": 0.21053089314481063, "learning_rate": 0.00027324457765290144, "loss": 2.9784207344055176, "step": 7964, "token_acc": 0.30162871079919235 }, { "epoch": 4.6690120199355025, "grad_norm": 0.20696284240480398, "learning_rate": 0.0002732362900437595, "loss": 2.9707982540130615, "step": 7965, "token_acc": 0.3036265709156194 }, { "epoch": 4.669598358252712, "grad_norm": 0.20626097390912693, "learning_rate": 0.0002732280012769787, "loss": 3.0139966011047363, "step": 7966, "token_acc": 0.2987944415639413 }, { "epoch": 4.670184696569921, "grad_norm": 0.20826954749847715, "learning_rate": 0.00027321971135263684, "loss": 2.9702110290527344, "step": 7967, "token_acc": 0.3028619626548775 }, { "epoch": 4.67077103488713, "grad_norm": 0.21194127634520923, "learning_rate": 0.00027321142027081184, "loss": 2.9737303256988525, "step": 7968, "token_acc": 0.30234321639659895 }, { "epoch": 4.671357373204339, "grad_norm": 0.22638010308060727, "learning_rate": 0.0002732031280315816, "loss": 2.948225736618042, "step": 7969, "token_acc": 0.3060037892899339 }, { "epoch": 4.671943711521548, "grad_norm": 0.21506609732053453, "learning_rate": 0.000273194834635024, "loss": 2.9383599758148193, "step": 7970, "token_acc": 0.30870443487852356 }, { "epoch": 4.672530049838757, "grad_norm": 0.24901942576062572, "learning_rate": 0.00027318654008121686, "loss": 3.038952589035034, "step": 7971, "token_acc": 0.29367264326616027 }, { "epoch": 4.673116388155966, "grad_norm": 0.2702768988279559, "learning_rate": 0.00027317824437023824, "loss": 2.9810993671417236, "step": 7972, "token_acc": 0.30300101835110255 }, { "epoch": 4.673702726473175, "grad_norm": 0.21997267762676495, "learning_rate": 0.000273169947502166, "loss": 2.969866991043091, "step": 7973, "token_acc": 0.302663926675828 }, { "epoch": 4.6742890647903845, "grad_norm": 0.23652931365495775, "learning_rate": 0.0002731616494770781, "loss": 3.029613971710205, "step": 7974, "token_acc": 0.2952870394939917 }, { "epoch": 4.674875403107593, "grad_norm": 0.2628521175986634, "learning_rate": 0.0002731533502950524, "loss": 3.0189905166625977, "step": 7975, "token_acc": 0.2971794919821153 }, { "epoch": 4.675461741424802, "grad_norm": 0.2014823818164978, "learning_rate": 0.0002731450499561669, "loss": 3.0010294914245605, "step": 7976, "token_acc": 0.2973537242340412 }, { "epoch": 4.676048079742011, "grad_norm": 0.25947850335162304, "learning_rate": 0.0002731367484604997, "loss": 2.976067066192627, "step": 7977, "token_acc": 0.3007154213036566 }, { "epoch": 4.67663441805922, "grad_norm": 0.21624111047423913, "learning_rate": 0.0002731284458081286, "loss": 2.980353355407715, "step": 7978, "token_acc": 0.30337158256296953 }, { "epoch": 4.677220756376429, "grad_norm": 0.23859783833351592, "learning_rate": 0.00027312014199913165, "loss": 3.0396194458007812, "step": 7979, "token_acc": 0.2926389664247548 }, { "epoch": 4.677807094693638, "grad_norm": 0.21789768437574827, "learning_rate": 0.0002731118370335869, "loss": 2.959746837615967, "step": 7980, "token_acc": 0.30570397872688504 }, { "epoch": 4.678393433010847, "grad_norm": 0.21465867468670566, "learning_rate": 0.00027310353091157237, "loss": 2.9209461212158203, "step": 7981, "token_acc": 0.30912338583984533 }, { "epoch": 4.6789797713280565, "grad_norm": 0.21120978030354096, "learning_rate": 0.000273095223633166, "loss": 2.995389461517334, "step": 7982, "token_acc": 0.2997760778748432 }, { "epoch": 4.679566109645266, "grad_norm": 0.22340574434640098, "learning_rate": 0.00027308691519844597, "loss": 2.996685028076172, "step": 7983, "token_acc": 0.29921592945467157 }, { "epoch": 4.680152447962474, "grad_norm": 0.20751340054708936, "learning_rate": 0.00027307860560749016, "loss": 3.045945644378662, "step": 7984, "token_acc": 0.29261429461467564 }, { "epoch": 4.680738786279683, "grad_norm": 0.23364773205284, "learning_rate": 0.0002730702948603767, "loss": 2.9815478324890137, "step": 7985, "token_acc": 0.29951092479674796 }, { "epoch": 4.681325124596892, "grad_norm": 0.22445253211982355, "learning_rate": 0.00027306198295718365, "loss": 2.9784202575683594, "step": 7986, "token_acc": 0.30116526139156 }, { "epoch": 4.681911462914101, "grad_norm": 0.2193594392663409, "learning_rate": 0.0002730536698979891, "loss": 2.989856719970703, "step": 7987, "token_acc": 0.2990195053624067 }, { "epoch": 4.68249780123131, "grad_norm": 0.24382426202586993, "learning_rate": 0.0002730453556828712, "loss": 3.005772829055786, "step": 7988, "token_acc": 0.29937581994096424 }, { "epoch": 4.683084139548519, "grad_norm": 0.2666859932090609, "learning_rate": 0.00027303704031190795, "loss": 2.999368190765381, "step": 7989, "token_acc": 0.2990274867245834 }, { "epoch": 4.6836704778657285, "grad_norm": 0.2176027335373523, "learning_rate": 0.00027302872378517755, "loss": 2.9437031745910645, "step": 7990, "token_acc": 0.30454962348581566 }, { "epoch": 4.684256816182938, "grad_norm": 0.21290410372181298, "learning_rate": 0.000273020406102758, "loss": 2.9472849369049072, "step": 7991, "token_acc": 0.30590080376396783 }, { "epoch": 4.684843154500147, "grad_norm": 0.23209785580193537, "learning_rate": 0.0002730120872647275, "loss": 2.9363064765930176, "step": 7992, "token_acc": 0.30774810261623214 }, { "epoch": 4.685429492817356, "grad_norm": 0.24486586133460342, "learning_rate": 0.00027300376727116426, "loss": 2.9693081378936768, "step": 7993, "token_acc": 0.3037627750196539 }, { "epoch": 4.686015831134565, "grad_norm": 0.21502005844121452, "learning_rate": 0.00027299544612214633, "loss": 2.9695639610290527, "step": 7994, "token_acc": 0.30367876136828525 }, { "epoch": 4.686602169451774, "grad_norm": 0.24629947929931842, "learning_rate": 0.00027298712381775193, "loss": 3.0054609775543213, "step": 7995, "token_acc": 0.2982848919631728 }, { "epoch": 4.687188507768982, "grad_norm": 0.2630777632451376, "learning_rate": 0.00027297880035805923, "loss": 2.98844051361084, "step": 7996, "token_acc": 0.2987310172664864 }, { "epoch": 4.687774846086191, "grad_norm": 0.23916257726085585, "learning_rate": 0.0002729704757431464, "loss": 2.988590717315674, "step": 7997, "token_acc": 0.29893994262839385 }, { "epoch": 4.6883611844034006, "grad_norm": 0.21376240784310124, "learning_rate": 0.0002729621499730917, "loss": 2.962796449661255, "step": 7998, "token_acc": 0.304072253978698 }, { "epoch": 4.68894752272061, "grad_norm": 0.2557251449915677, "learning_rate": 0.0002729538230479733, "loss": 2.9704651832580566, "step": 7999, "token_acc": 0.30254340052057455 }, { "epoch": 4.689533861037819, "grad_norm": 0.22654198640257087, "learning_rate": 0.00027294549496786934, "loss": 3.0095033645629883, "step": 8000, "token_acc": 0.29787940396839696 }, { "epoch": 4.690120199355028, "grad_norm": 0.23369436041267638, "learning_rate": 0.00027293716573285816, "loss": 3.0713820457458496, "step": 8001, "token_acc": 0.2888325748697421 }, { "epoch": 4.690706537672237, "grad_norm": 0.2307123278198811, "learning_rate": 0.000272928835343018, "loss": 3.028798818588257, "step": 8002, "token_acc": 0.29379762018875794 }, { "epoch": 4.691292875989446, "grad_norm": 0.22054933620563386, "learning_rate": 0.0002729205037984271, "loss": 2.9625766277313232, "step": 8003, "token_acc": 0.3038897550405623 }, { "epoch": 4.691879214306655, "grad_norm": 0.23659716880942508, "learning_rate": 0.00027291217109916364, "loss": 2.9549880027770996, "step": 8004, "token_acc": 0.30579445159251484 }, { "epoch": 4.692465552623864, "grad_norm": 0.22905372649224817, "learning_rate": 0.000272903837245306, "loss": 2.971674680709839, "step": 8005, "token_acc": 0.30267043669494187 }, { "epoch": 4.693051890941073, "grad_norm": 0.23996159428642047, "learning_rate": 0.0002728955022369324, "loss": 3.0226359367370605, "step": 8006, "token_acc": 0.2969983787093265 }, { "epoch": 4.693638229258282, "grad_norm": 0.22981323716934784, "learning_rate": 0.0002728871660741211, "loss": 3.0161678791046143, "step": 8007, "token_acc": 0.2971675250731021 }, { "epoch": 4.694224567575491, "grad_norm": 0.21499079584913083, "learning_rate": 0.0002728788287569506, "loss": 2.9847888946533203, "step": 8008, "token_acc": 0.30068877757130164 }, { "epoch": 4.6948109058927, "grad_norm": 0.22724686993533613, "learning_rate": 0.00027287049028549903, "loss": 2.9753470420837402, "step": 8009, "token_acc": 0.30223073016369106 }, { "epoch": 4.695397244209909, "grad_norm": 0.20852963311093395, "learning_rate": 0.00027286215065984475, "loss": 2.974867105484009, "step": 8010, "token_acc": 0.30158437195959575 }, { "epoch": 4.695983582527118, "grad_norm": 0.22236619826920662, "learning_rate": 0.00027285380988006615, "loss": 3.038928508758545, "step": 8011, "token_acc": 0.2940437515908052 }, { "epoch": 4.696569920844327, "grad_norm": 0.24085254571339978, "learning_rate": 0.0002728454679462415, "loss": 3.0386176109313965, "step": 8012, "token_acc": 0.2920775564125489 }, { "epoch": 4.697156259161536, "grad_norm": 0.2330809368477201, "learning_rate": 0.00027283712485844927, "loss": 2.9869959354400635, "step": 8013, "token_acc": 0.2995997004209602 }, { "epoch": 4.6977425974787455, "grad_norm": 0.2272800090309307, "learning_rate": 0.00027282878061676776, "loss": 3.016906976699829, "step": 8014, "token_acc": 0.2957842962278611 }, { "epoch": 4.698328935795955, "grad_norm": 0.2451533334770203, "learning_rate": 0.00027282043522127537, "loss": 2.994156837463379, "step": 8015, "token_acc": 0.2986642489233487 }, { "epoch": 4.698915274113164, "grad_norm": 0.2321531741910977, "learning_rate": 0.00027281208867205054, "loss": 2.9755194187164307, "step": 8016, "token_acc": 0.30286852137544207 }, { "epoch": 4.699501612430373, "grad_norm": 0.22742654394833736, "learning_rate": 0.00027280374096917156, "loss": 3.000978469848633, "step": 8017, "token_acc": 0.2988068183300675 }, { "epoch": 4.700087950747581, "grad_norm": 0.2635632720363313, "learning_rate": 0.00027279539211271696, "loss": 2.9706058502197266, "step": 8018, "token_acc": 0.30589335317865973 }, { "epoch": 4.70067428906479, "grad_norm": 0.2249749411519614, "learning_rate": 0.0002727870421027651, "loss": 2.9328503608703613, "step": 8019, "token_acc": 0.30786230079681276 }, { "epoch": 4.701260627381999, "grad_norm": 0.25597774619735186, "learning_rate": 0.00027277869093939445, "loss": 2.9894392490386963, "step": 8020, "token_acc": 0.30166564177920463 }, { "epoch": 4.701846965699208, "grad_norm": 0.23737269335836553, "learning_rate": 0.0002727703386226834, "loss": 2.968196392059326, "step": 8021, "token_acc": 0.3024451524334965 }, { "epoch": 4.7024333040164175, "grad_norm": 0.24487329800361674, "learning_rate": 0.0002727619851527105, "loss": 3.0047922134399414, "step": 8022, "token_acc": 0.2982718564017695 }, { "epoch": 4.703019642333627, "grad_norm": 0.21445966686077036, "learning_rate": 0.00027275363052955417, "loss": 2.984370708465576, "step": 8023, "token_acc": 0.3003675492497914 }, { "epoch": 4.703605980650836, "grad_norm": 0.22382354594649123, "learning_rate": 0.0002727452747532929, "loss": 2.930237293243408, "step": 8024, "token_acc": 0.3080376000556087 }, { "epoch": 4.704192318968045, "grad_norm": 0.2112900591604549, "learning_rate": 0.0002727369178240051, "loss": 2.998049259185791, "step": 8025, "token_acc": 0.2992473817715395 }, { "epoch": 4.704778657285254, "grad_norm": 0.22146258098587585, "learning_rate": 0.00027272855974176945, "loss": 3.0344738960266113, "step": 8026, "token_acc": 0.2929223000488679 }, { "epoch": 4.705364995602462, "grad_norm": 0.2232619256832775, "learning_rate": 0.0002727202005066643, "loss": 2.9884607791900635, "step": 8027, "token_acc": 0.2995787767250887 }, { "epoch": 4.705951333919671, "grad_norm": 0.2203798126249059, "learning_rate": 0.0002727118401187682, "loss": 3.029386043548584, "step": 8028, "token_acc": 0.29271670543932404 }, { "epoch": 4.70653767223688, "grad_norm": 0.2424125213121547, "learning_rate": 0.0002727034785781598, "loss": 3.0184242725372314, "step": 8029, "token_acc": 0.2956051749221728 }, { "epoch": 4.7071240105540895, "grad_norm": 0.22413763688248656, "learning_rate": 0.00027269511588491754, "loss": 2.966338634490967, "step": 8030, "token_acc": 0.3038055087278253 }, { "epoch": 4.707710348871299, "grad_norm": 0.21755066898771866, "learning_rate": 0.00027268675203912, "loss": 2.9550273418426514, "step": 8031, "token_acc": 0.30404286719316154 }, { "epoch": 4.708296687188508, "grad_norm": 0.23536089490789863, "learning_rate": 0.0002726783870408457, "loss": 2.9618635177612305, "step": 8032, "token_acc": 0.304383621793699 }, { "epoch": 4.708883025505717, "grad_norm": 0.2319098542068753, "learning_rate": 0.0002726700208901733, "loss": 2.9721169471740723, "step": 8033, "token_acc": 0.30294178119684595 }, { "epoch": 4.709469363822926, "grad_norm": 0.2319185965026555, "learning_rate": 0.0002726616535871814, "loss": 2.994365692138672, "step": 8034, "token_acc": 0.30003728452322087 }, { "epoch": 4.710055702140135, "grad_norm": 0.24057991847969476, "learning_rate": 0.0002726532851319485, "loss": 2.973515272140503, "step": 8035, "token_acc": 0.30359241865327635 }, { "epoch": 4.710642040457344, "grad_norm": 0.2107305775809225, "learning_rate": 0.00027264491552455325, "loss": 2.96773624420166, "step": 8036, "token_acc": 0.30253070645453795 }, { "epoch": 4.711228378774553, "grad_norm": 0.24203236171735593, "learning_rate": 0.00027263654476507434, "loss": 3.0651187896728516, "step": 8037, "token_acc": 0.28943586199313576 }, { "epoch": 4.711814717091762, "grad_norm": 0.2124827569366108, "learning_rate": 0.0002726281728535903, "loss": 3.0008060932159424, "step": 8038, "token_acc": 0.2961468012636475 }, { "epoch": 4.7124010554089715, "grad_norm": 0.2516783809690654, "learning_rate": 0.0002726197997901798, "loss": 2.989532709121704, "step": 8039, "token_acc": 0.2999526666241198 }, { "epoch": 4.71298739372618, "grad_norm": 0.24149495261292253, "learning_rate": 0.00027261142557492157, "loss": 3.010761260986328, "step": 8040, "token_acc": 0.29638043039050116 }, { "epoch": 4.713573732043389, "grad_norm": 0.2416809913412415, "learning_rate": 0.0002726030502078942, "loss": 3.0658507347106934, "step": 8041, "token_acc": 0.28928612067984566 }, { "epoch": 4.714160070360598, "grad_norm": 0.23590198769124812, "learning_rate": 0.0002725946736891764, "loss": 2.9290599822998047, "step": 8042, "token_acc": 0.30786941238717386 }, { "epoch": 4.714746408677807, "grad_norm": 0.23480734618753724, "learning_rate": 0.0002725862960188468, "loss": 2.9555444717407227, "step": 8043, "token_acc": 0.3039568070623813 }, { "epoch": 4.715332746995016, "grad_norm": 0.2193194110996426, "learning_rate": 0.00027257791719698414, "loss": 2.9616951942443848, "step": 8044, "token_acc": 0.30490797053558766 }, { "epoch": 4.715919085312225, "grad_norm": 0.24527133346275176, "learning_rate": 0.00027256953722366715, "loss": 2.9864184856414795, "step": 8045, "token_acc": 0.30153216536775757 }, { "epoch": 4.716505423629434, "grad_norm": 0.20224920997425186, "learning_rate": 0.0002725611560989745, "loss": 3.042310953140259, "step": 8046, "token_acc": 0.2939957373752364 }, { "epoch": 4.7170917619466435, "grad_norm": 0.2462626820109491, "learning_rate": 0.000272552773822985, "loss": 2.96690034866333, "step": 8047, "token_acc": 0.3027801645593528 }, { "epoch": 4.717678100263853, "grad_norm": 0.22352432993628502, "learning_rate": 0.0002725443903957772, "loss": 3.0185132026672363, "step": 8048, "token_acc": 0.2964226030603625 }, { "epoch": 4.718264438581061, "grad_norm": 0.24612356666350665, "learning_rate": 0.0002725360058174301, "loss": 2.9798197746276855, "step": 8049, "token_acc": 0.3011730063199147 }, { "epoch": 4.71885077689827, "grad_norm": 0.24615218816816709, "learning_rate": 0.0002725276200880223, "loss": 3.0015792846679688, "step": 8050, "token_acc": 0.29817728738230725 }, { "epoch": 4.719437115215479, "grad_norm": 0.21594849687878387, "learning_rate": 0.00027251923320763266, "loss": 3.0045509338378906, "step": 8051, "token_acc": 0.2985994091826333 }, { "epoch": 4.720023453532688, "grad_norm": 0.23442586372261318, "learning_rate": 0.00027251084517633983, "loss": 2.9854891300201416, "step": 8052, "token_acc": 0.30055258795358264 }, { "epoch": 4.720609791849897, "grad_norm": 0.23892802843571193, "learning_rate": 0.0002725024559942228, "loss": 3.0064148902893066, "step": 8053, "token_acc": 0.2974782111180384 }, { "epoch": 4.721196130167106, "grad_norm": 0.2220125660499891, "learning_rate": 0.0002724940656613602, "loss": 2.9878458976745605, "step": 8054, "token_acc": 0.2991960274296524 }, { "epoch": 4.7217824684843155, "grad_norm": 0.23099055289930986, "learning_rate": 0.00027248567417783096, "loss": 2.9560627937316895, "step": 8055, "token_acc": 0.3056993457872745 }, { "epoch": 4.722368806801525, "grad_norm": 0.20849411239300442, "learning_rate": 0.0002724772815437138, "loss": 2.967928886413574, "step": 8056, "token_acc": 0.3035505121521061 }, { "epoch": 4.722955145118734, "grad_norm": 0.2180631513114469, "learning_rate": 0.0002724688877590877, "loss": 2.952347755432129, "step": 8057, "token_acc": 0.3060695761513072 }, { "epoch": 4.723541483435943, "grad_norm": 0.20721735193610177, "learning_rate": 0.0002724604928240314, "loss": 2.97845458984375, "step": 8058, "token_acc": 0.30116755527337724 }, { "epoch": 4.724127821753152, "grad_norm": 0.21686656030062104, "learning_rate": 0.00027245209673862374, "loss": 2.9854695796966553, "step": 8059, "token_acc": 0.29884981247743464 }, { "epoch": 4.724714160070361, "grad_norm": 0.24383148612551364, "learning_rate": 0.0002724436995029437, "loss": 3.012242317199707, "step": 8060, "token_acc": 0.2983414588264802 }, { "epoch": 4.725300498387569, "grad_norm": 0.21713807779667438, "learning_rate": 0.00027243530111707004, "loss": 2.961216926574707, "step": 8061, "token_acc": 0.3035239687641599 }, { "epoch": 4.725886836704778, "grad_norm": 0.21006561308870556, "learning_rate": 0.0002724269015810818, "loss": 2.9979164600372314, "step": 8062, "token_acc": 0.3003666692488445 }, { "epoch": 4.7264731750219875, "grad_norm": 0.2416855992976606, "learning_rate": 0.0002724185008950577, "loss": 3.023186206817627, "step": 8063, "token_acc": 0.2961096834799351 }, { "epoch": 4.727059513339197, "grad_norm": 0.2094687700245328, "learning_rate": 0.0002724100990590768, "loss": 2.9820547103881836, "step": 8064, "token_acc": 0.3020883020489891 }, { "epoch": 4.727645851656406, "grad_norm": 0.2094968697395024, "learning_rate": 0.00027240169607321797, "loss": 2.9976696968078613, "step": 8065, "token_acc": 0.29833065519247914 }, { "epoch": 4.728232189973615, "grad_norm": 0.21207830452132448, "learning_rate": 0.0002723932919375601, "loss": 3.010378837585449, "step": 8066, "token_acc": 0.2959176480723895 }, { "epoch": 4.728818528290824, "grad_norm": 0.21756340158423706, "learning_rate": 0.0002723848866521822, "loss": 3.0251855850219727, "step": 8067, "token_acc": 0.2950593200199518 }, { "epoch": 4.729404866608033, "grad_norm": 0.20257275390881424, "learning_rate": 0.0002723764802171632, "loss": 2.972163200378418, "step": 8068, "token_acc": 0.3028604556243322 }, { "epoch": 4.729991204925242, "grad_norm": 0.21146542771001683, "learning_rate": 0.00027236807263258207, "loss": 2.994256019592285, "step": 8069, "token_acc": 0.2998357675872186 }, { "epoch": 4.730577543242451, "grad_norm": 0.20894897888440442, "learning_rate": 0.0002723596638985178, "loss": 3.014697551727295, "step": 8070, "token_acc": 0.2971236561268474 }, { "epoch": 4.7311638815596595, "grad_norm": 0.2086270048059413, "learning_rate": 0.0002723512540150494, "loss": 2.959437847137451, "step": 8071, "token_acc": 0.30302420758643633 }, { "epoch": 4.731750219876869, "grad_norm": 0.2045100579126822, "learning_rate": 0.0002723428429822558, "loss": 2.97864031791687, "step": 8072, "token_acc": 0.30203557961604055 }, { "epoch": 4.732336558194078, "grad_norm": 0.21675172364814244, "learning_rate": 0.00027233443080021603, "loss": 2.98246431350708, "step": 8073, "token_acc": 0.3001004095653323 }, { "epoch": 4.732922896511287, "grad_norm": 0.23518894106907345, "learning_rate": 0.0002723260174690092, "loss": 3.003081798553467, "step": 8074, "token_acc": 0.29776394830314656 }, { "epoch": 4.733509234828496, "grad_norm": 0.19180934980549977, "learning_rate": 0.00027231760298871425, "loss": 3.024230480194092, "step": 8075, "token_acc": 0.2962506539243551 }, { "epoch": 4.734095573145705, "grad_norm": 0.24258637739096278, "learning_rate": 0.0002723091873594102, "loss": 2.9799065589904785, "step": 8076, "token_acc": 0.30042230959662153 }, { "epoch": 4.734681911462914, "grad_norm": 0.22082645500724632, "learning_rate": 0.0002723007705811762, "loss": 2.9562482833862305, "step": 8077, "token_acc": 0.3053178775026706 }, { "epoch": 4.735268249780123, "grad_norm": 0.20991252189127738, "learning_rate": 0.0002722923526540912, "loss": 3.039581775665283, "step": 8078, "token_acc": 0.2925579346013029 }, { "epoch": 4.735854588097332, "grad_norm": 0.21437221864604064, "learning_rate": 0.00027228393357823437, "loss": 2.9627161026000977, "step": 8079, "token_acc": 0.30497531438186326 }, { "epoch": 4.7364409264145415, "grad_norm": 0.21971334344857002, "learning_rate": 0.00027227551335368475, "loss": 3.0081300735473633, "step": 8080, "token_acc": 0.2966559967671268 }, { "epoch": 4.737027264731751, "grad_norm": 0.21075295148079304, "learning_rate": 0.0002722670919805215, "loss": 2.977956771850586, "step": 8081, "token_acc": 0.30253538848694556 }, { "epoch": 4.73761360304896, "grad_norm": 0.2097484888854789, "learning_rate": 0.00027225866945882366, "loss": 2.9872775077819824, "step": 8082, "token_acc": 0.30086087739228773 }, { "epoch": 4.738199941366168, "grad_norm": 0.2148846848056903, "learning_rate": 0.0002722502457886703, "loss": 2.9808614253997803, "step": 8083, "token_acc": 0.3015346340050482 }, { "epoch": 4.738786279683377, "grad_norm": 0.20979022357275806, "learning_rate": 0.0002722418209701407, "loss": 2.9707224369049072, "step": 8084, "token_acc": 0.30414855014969266 }, { "epoch": 4.739372618000586, "grad_norm": 0.19132545560111577, "learning_rate": 0.0002722333950033139, "loss": 2.989068031311035, "step": 8085, "token_acc": 0.3001100855279871 }, { "epoch": 4.739958956317795, "grad_norm": 0.21385066161289107, "learning_rate": 0.00027222496788826905, "loss": 2.998983144760132, "step": 8086, "token_acc": 0.29707095092614355 }, { "epoch": 4.740545294635004, "grad_norm": 0.19289893435702277, "learning_rate": 0.00027221653962508527, "loss": 2.979851245880127, "step": 8087, "token_acc": 0.30071025722468525 }, { "epoch": 4.7411316329522135, "grad_norm": 0.22666861844709615, "learning_rate": 0.00027220811021384187, "loss": 3.0059680938720703, "step": 8088, "token_acc": 0.2973458021739241 }, { "epoch": 4.741717971269423, "grad_norm": 0.20364376295513698, "learning_rate": 0.00027219967965461795, "loss": 3.0043649673461914, "step": 8089, "token_acc": 0.2981300163229816 }, { "epoch": 4.742304309586632, "grad_norm": 0.2035883904394352, "learning_rate": 0.00027219124794749264, "loss": 3.006263017654419, "step": 8090, "token_acc": 0.2976270484073624 }, { "epoch": 4.742890647903841, "grad_norm": 0.22064460794510354, "learning_rate": 0.00027218281509254526, "loss": 2.989020824432373, "step": 8091, "token_acc": 0.3001167811553513 }, { "epoch": 4.743476986221049, "grad_norm": 0.19365920966745645, "learning_rate": 0.000272174381089855, "loss": 2.98691987991333, "step": 8092, "token_acc": 0.3019087123782466 }, { "epoch": 4.744063324538258, "grad_norm": 0.240553898355905, "learning_rate": 0.000272165945939501, "loss": 3.01920747756958, "step": 8093, "token_acc": 0.2944937928082192 }, { "epoch": 4.744649662855467, "grad_norm": 0.24385977509853352, "learning_rate": 0.0002721575096415626, "loss": 2.976435661315918, "step": 8094, "token_acc": 0.302342078810637 }, { "epoch": 4.745236001172676, "grad_norm": 0.2155606724809469, "learning_rate": 0.000272149072196119, "loss": 2.934091567993164, "step": 8095, "token_acc": 0.30903334762744256 }, { "epoch": 4.7458223394898855, "grad_norm": 0.22586209540268945, "learning_rate": 0.00027214063360324944, "loss": 3.0193707942962646, "step": 8096, "token_acc": 0.2955047144927759 }, { "epoch": 4.746408677807095, "grad_norm": 0.21828795383280325, "learning_rate": 0.00027213219386303323, "loss": 3.018533706665039, "step": 8097, "token_acc": 0.29638760157110494 }, { "epoch": 4.746995016124304, "grad_norm": 0.23068282136102128, "learning_rate": 0.0002721237529755496, "loss": 2.9844436645507812, "step": 8098, "token_acc": 0.3004166877115006 }, { "epoch": 4.747581354441513, "grad_norm": 0.23005887375686124, "learning_rate": 0.000272115310940878, "loss": 3.012810707092285, "step": 8099, "token_acc": 0.29662058652358075 }, { "epoch": 4.748167692758722, "grad_norm": 0.21160865460004316, "learning_rate": 0.00027210686775909753, "loss": 2.9880380630493164, "step": 8100, "token_acc": 0.29978516744617134 }, { "epoch": 4.748754031075931, "grad_norm": 0.20306037280936265, "learning_rate": 0.00027209842343028755, "loss": 2.9798736572265625, "step": 8101, "token_acc": 0.30227312344384577 }, { "epoch": 4.74934036939314, "grad_norm": 0.22758907214598345, "learning_rate": 0.0002720899779545274, "loss": 3.0060172080993652, "step": 8102, "token_acc": 0.29648418197302157 }, { "epoch": 4.749926707710349, "grad_norm": 0.21858115525045563, "learning_rate": 0.0002720815313318965, "loss": 3.0004186630249023, "step": 8103, "token_acc": 0.2967534314864163 }, { "epoch": 4.7505130460275575, "grad_norm": 0.21179645845140155, "learning_rate": 0.000272073083562474, "loss": 2.9788644313812256, "step": 8104, "token_acc": 0.30281986922422455 }, { "epoch": 4.751099384344767, "grad_norm": 0.20735659827856265, "learning_rate": 0.00027206463464633947, "loss": 3.000202178955078, "step": 8105, "token_acc": 0.29807781644620357 }, { "epoch": 4.751685722661976, "grad_norm": 0.2224979698511867, "learning_rate": 0.0002720561845835722, "loss": 2.9806630611419678, "step": 8106, "token_acc": 0.3015019594722523 }, { "epoch": 4.752272060979185, "grad_norm": 0.2259317784876267, "learning_rate": 0.0002720477333742515, "loss": 2.9911599159240723, "step": 8107, "token_acc": 0.2999364236092343 }, { "epoch": 4.752858399296394, "grad_norm": 0.22649602706087044, "learning_rate": 0.00027203928101845684, "loss": 2.9713850021362305, "step": 8108, "token_acc": 0.30414369956794174 }, { "epoch": 4.753444737613603, "grad_norm": 0.23459639788005016, "learning_rate": 0.0002720308275162675, "loss": 2.991389274597168, "step": 8109, "token_acc": 0.30044359220866074 }, { "epoch": 4.754031075930812, "grad_norm": 0.23277513125107085, "learning_rate": 0.00027202237286776305, "loss": 2.9978554248809814, "step": 8110, "token_acc": 0.2991371579411362 }, { "epoch": 4.754617414248021, "grad_norm": 0.24178606349768897, "learning_rate": 0.0002720139170730228, "loss": 2.996026039123535, "step": 8111, "token_acc": 0.2987498283456748 }, { "epoch": 4.75520375256523, "grad_norm": 0.20406220790662327, "learning_rate": 0.00027200546013212627, "loss": 2.970203399658203, "step": 8112, "token_acc": 0.3033247812643905 }, { "epoch": 4.7557900908824395, "grad_norm": 0.25275325940899135, "learning_rate": 0.0002719970020451528, "loss": 3.013970136642456, "step": 8113, "token_acc": 0.29688934980239196 }, { "epoch": 4.756376429199648, "grad_norm": 0.2387717051905531, "learning_rate": 0.0002719885428121819, "loss": 3.034808874130249, "step": 8114, "token_acc": 0.2942142240322391 }, { "epoch": 4.756962767516857, "grad_norm": 0.22018240752881876, "learning_rate": 0.000271980082433293, "loss": 2.968825578689575, "step": 8115, "token_acc": 0.3022998481801281 }, { "epoch": 4.757549105834066, "grad_norm": 0.23807227169986403, "learning_rate": 0.00027197162090856564, "loss": 2.9648778438568115, "step": 8116, "token_acc": 0.3035104615796421 }, { "epoch": 4.758135444151275, "grad_norm": 0.21597600089676877, "learning_rate": 0.0002719631582380792, "loss": 2.9796981811523438, "step": 8117, "token_acc": 0.30361531941943004 }, { "epoch": 4.758721782468484, "grad_norm": 0.2200850466479177, "learning_rate": 0.0002719546944219133, "loss": 3.011141777038574, "step": 8118, "token_acc": 0.2981951063191953 }, { "epoch": 4.759308120785693, "grad_norm": 0.24007912504893117, "learning_rate": 0.00027194622946014735, "loss": 2.9703171253204346, "step": 8119, "token_acc": 0.3029364174377338 }, { "epoch": 4.759894459102902, "grad_norm": 0.23452691261476002, "learning_rate": 0.0002719377633528609, "loss": 2.9958205223083496, "step": 8120, "token_acc": 0.2997577206865629 }, { "epoch": 4.7604807974201115, "grad_norm": 0.20838469545846613, "learning_rate": 0.00027192929610013347, "loss": 3.018148422241211, "step": 8121, "token_acc": 0.2959528132565323 }, { "epoch": 4.761067135737321, "grad_norm": 0.20753609458500305, "learning_rate": 0.00027192082770204464, "loss": 2.954198122024536, "step": 8122, "token_acc": 0.30477757692193375 }, { "epoch": 4.76165347405453, "grad_norm": 0.2217914657574539, "learning_rate": 0.00027191235815867386, "loss": 2.9740500450134277, "step": 8123, "token_acc": 0.3037371185162647 }, { "epoch": 4.762239812371739, "grad_norm": 0.22738205129208827, "learning_rate": 0.0002719038874701008, "loss": 3.016878604888916, "step": 8124, "token_acc": 0.29575501824227707 }, { "epoch": 4.762826150688948, "grad_norm": 0.22558676494656488, "learning_rate": 0.000271895415636405, "loss": 2.962388277053833, "step": 8125, "token_acc": 0.3044442451468778 }, { "epoch": 4.763412489006156, "grad_norm": 0.21608433648273898, "learning_rate": 0.000271886942657666, "loss": 3.0116310119628906, "step": 8126, "token_acc": 0.2969151044751626 }, { "epoch": 4.763998827323365, "grad_norm": 0.20797664649932088, "learning_rate": 0.00027187846853396345, "loss": 2.9748778343200684, "step": 8127, "token_acc": 0.3033529368444309 }, { "epoch": 4.764585165640574, "grad_norm": 0.21519810695211816, "learning_rate": 0.0002718699932653769, "loss": 2.9693310260772705, "step": 8128, "token_acc": 0.3034183284643719 }, { "epoch": 4.7651715039577835, "grad_norm": 0.21442111456267965, "learning_rate": 0.00027186151685198594, "loss": 3.004716157913208, "step": 8129, "token_acc": 0.2998205453670066 }, { "epoch": 4.765757842274993, "grad_norm": 0.21447691442451744, "learning_rate": 0.0002718530392938703, "loss": 2.9433865547180176, "step": 8130, "token_acc": 0.30774889057140625 }, { "epoch": 4.766344180592202, "grad_norm": 0.20804921360823306, "learning_rate": 0.0002718445605911095, "loss": 2.9241719245910645, "step": 8131, "token_acc": 0.3114894478520894 }, { "epoch": 4.766930518909411, "grad_norm": 0.22946221575388911, "learning_rate": 0.00027183608074378326, "loss": 2.961427688598633, "step": 8132, "token_acc": 0.30399505528754234 }, { "epoch": 4.76751685722662, "grad_norm": 0.23151494826173108, "learning_rate": 0.00027182759975197127, "loss": 2.9625134468078613, "step": 8133, "token_acc": 0.3039790848026207 }, { "epoch": 4.768103195543829, "grad_norm": 0.24919134497492132, "learning_rate": 0.0002718191176157531, "loss": 3.0103061199188232, "step": 8134, "token_acc": 0.29623488863939007 }, { "epoch": 4.768689533861037, "grad_norm": 0.23899527028826328, "learning_rate": 0.00027181063433520853, "loss": 3.006040573120117, "step": 8135, "token_acc": 0.2983042388603791 }, { "epoch": 4.7692758721782464, "grad_norm": 0.23425494784984585, "learning_rate": 0.0002718021499104171, "loss": 2.9856362342834473, "step": 8136, "token_acc": 0.300987846526025 }, { "epoch": 4.769862210495456, "grad_norm": 0.23557434089062848, "learning_rate": 0.0002717936643414586, "loss": 3.052811622619629, "step": 8137, "token_acc": 0.2920363605295112 }, { "epoch": 4.770448548812665, "grad_norm": 0.21395881904809017, "learning_rate": 0.0002717851776284128, "loss": 3.035191535949707, "step": 8138, "token_acc": 0.29292435943333506 }, { "epoch": 4.771034887129874, "grad_norm": 0.22400448431410216, "learning_rate": 0.0002717766897713594, "loss": 2.959608554840088, "step": 8139, "token_acc": 0.3057954159505161 }, { "epoch": 4.771621225447083, "grad_norm": 0.21674650572344048, "learning_rate": 0.00027176820077037806, "loss": 2.987884998321533, "step": 8140, "token_acc": 0.3011820688092451 }, { "epoch": 4.772207563764292, "grad_norm": 0.21439150488067277, "learning_rate": 0.00027175971062554853, "loss": 2.947230339050293, "step": 8141, "token_acc": 0.3055041204073147 }, { "epoch": 4.772793902081501, "grad_norm": 0.2323999661663021, "learning_rate": 0.00027175121933695055, "loss": 2.991596221923828, "step": 8142, "token_acc": 0.29905055104258293 }, { "epoch": 4.77338024039871, "grad_norm": 0.219499360898104, "learning_rate": 0.000271742726904664, "loss": 2.954399824142456, "step": 8143, "token_acc": 0.3053051895300646 }, { "epoch": 4.773966578715919, "grad_norm": 0.23640631422816677, "learning_rate": 0.0002717342333287686, "loss": 2.9846839904785156, "step": 8144, "token_acc": 0.3016776098689685 }, { "epoch": 4.7745529170331285, "grad_norm": 0.23889392392579642, "learning_rate": 0.0002717257386093441, "loss": 2.983081102371216, "step": 8145, "token_acc": 0.3013150059418402 }, { "epoch": 4.775139255350338, "grad_norm": 0.21853203963137557, "learning_rate": 0.00027171724274647026, "loss": 2.9850759506225586, "step": 8146, "token_acc": 0.2997954311250778 }, { "epoch": 4.775725593667546, "grad_norm": 0.2406812465116604, "learning_rate": 0.000271708745740227, "loss": 2.9793460369110107, "step": 8147, "token_acc": 0.30380340562490443 }, { "epoch": 4.776311931984755, "grad_norm": 0.21811188168637588, "learning_rate": 0.00027170024759069403, "loss": 2.9855458736419678, "step": 8148, "token_acc": 0.30129878772986035 }, { "epoch": 4.776898270301964, "grad_norm": 0.2400791627311619, "learning_rate": 0.00027169174829795123, "loss": 3.007376194000244, "step": 8149, "token_acc": 0.2967872609353308 }, { "epoch": 4.777484608619173, "grad_norm": 0.2209573021018777, "learning_rate": 0.00027168324786207846, "loss": 2.989950180053711, "step": 8150, "token_acc": 0.2999348861335055 }, { "epoch": 4.778070946936382, "grad_norm": 0.20833590963821028, "learning_rate": 0.00027167474628315557, "loss": 2.946460723876953, "step": 8151, "token_acc": 0.3068991065813561 }, { "epoch": 4.778657285253591, "grad_norm": 0.23050445346755968, "learning_rate": 0.00027166624356126236, "loss": 3.026897430419922, "step": 8152, "token_acc": 0.2965768677150739 }, { "epoch": 4.7792436235708005, "grad_norm": 0.19564495143169774, "learning_rate": 0.00027165773969647873, "loss": 2.9401674270629883, "step": 8153, "token_acc": 0.3072501293306058 }, { "epoch": 4.77982996188801, "grad_norm": 0.22235335114798596, "learning_rate": 0.00027164923468888465, "loss": 3.0000414848327637, "step": 8154, "token_acc": 0.2985176426594476 }, { "epoch": 4.780416300205219, "grad_norm": 0.20555464826724013, "learning_rate": 0.00027164072853855985, "loss": 3.021088123321533, "step": 8155, "token_acc": 0.2961964268433978 }, { "epoch": 4.781002638522428, "grad_norm": 0.2155821723764754, "learning_rate": 0.00027163222124558436, "loss": 2.9869861602783203, "step": 8156, "token_acc": 0.30052691544584453 }, { "epoch": 4.781588976839636, "grad_norm": 0.23978157219585663, "learning_rate": 0.00027162371281003796, "loss": 3.0162949562072754, "step": 8157, "token_acc": 0.2951361437019702 }, { "epoch": 4.782175315156845, "grad_norm": 0.22655952906906807, "learning_rate": 0.0002716152032320008, "loss": 2.9829556941986084, "step": 8158, "token_acc": 0.30107967218350307 }, { "epoch": 4.782761653474054, "grad_norm": 0.21192069601356697, "learning_rate": 0.00027160669251155263, "loss": 3.0062928199768066, "step": 8159, "token_acc": 0.2980509330120168 }, { "epoch": 4.783347991791263, "grad_norm": 0.21789383608871188, "learning_rate": 0.00027159818064877346, "loss": 3.032963752746582, "step": 8160, "token_acc": 0.294328594076858 }, { "epoch": 4.7839343301084725, "grad_norm": 0.2233877327879554, "learning_rate": 0.00027158966764374317, "loss": 2.9992804527282715, "step": 8161, "token_acc": 0.29945699466516434 }, { "epoch": 4.784520668425682, "grad_norm": 0.2070123832777583, "learning_rate": 0.0002715811534965419, "loss": 2.9553630352020264, "step": 8162, "token_acc": 0.30494919486782057 }, { "epoch": 4.785107006742891, "grad_norm": 0.22302962149082725, "learning_rate": 0.00027157263820724945, "loss": 2.994645833969116, "step": 8163, "token_acc": 0.300642025521313 }, { "epoch": 4.7856933450601, "grad_norm": 0.21631335841293176, "learning_rate": 0.00027156412177594595, "loss": 2.996206521987915, "step": 8164, "token_acc": 0.29681981533412966 }, { "epoch": 4.786279683377309, "grad_norm": 0.19836453723948855, "learning_rate": 0.0002715556042027113, "loss": 2.956622838973999, "step": 8165, "token_acc": 0.3028201055037166 }, { "epoch": 4.786866021694518, "grad_norm": 0.20437559518155743, "learning_rate": 0.0002715470854876255, "loss": 2.997732162475586, "step": 8166, "token_acc": 0.300637151830686 }, { "epoch": 4.787452360011727, "grad_norm": 0.2046373876931285, "learning_rate": 0.0002715385656307687, "loss": 2.947683334350586, "step": 8167, "token_acc": 0.30695501525951907 }, { "epoch": 4.788038698328936, "grad_norm": 0.19805987984712534, "learning_rate": 0.00027153004463222085, "loss": 2.9641036987304688, "step": 8168, "token_acc": 0.30408580052662 }, { "epoch": 4.7886250366461445, "grad_norm": 0.20403526245266768, "learning_rate": 0.0002715215224920619, "loss": 3.001873731613159, "step": 8169, "token_acc": 0.29683457309184996 }, { "epoch": 4.789211374963354, "grad_norm": 0.19159637606105104, "learning_rate": 0.0002715129992103721, "loss": 2.93953275680542, "step": 8170, "token_acc": 0.3075050601182258 }, { "epoch": 4.789797713280563, "grad_norm": 0.21199502186767552, "learning_rate": 0.00027150447478723133, "loss": 2.9722204208374023, "step": 8171, "token_acc": 0.30234263440237735 }, { "epoch": 4.790384051597772, "grad_norm": 0.21512587753543583, "learning_rate": 0.00027149594922271986, "loss": 2.9400267601013184, "step": 8172, "token_acc": 0.3060803865131579 }, { "epoch": 4.790970389914981, "grad_norm": 0.21275268964744332, "learning_rate": 0.00027148742251691756, "loss": 2.9888229370117188, "step": 8173, "token_acc": 0.30022318737289977 }, { "epoch": 4.79155672823219, "grad_norm": 0.23277952531653753, "learning_rate": 0.0002714788946699047, "loss": 2.9840564727783203, "step": 8174, "token_acc": 0.29950300193131246 }, { "epoch": 4.792143066549399, "grad_norm": 0.22161840605118596, "learning_rate": 0.00027147036568176124, "loss": 2.9725589752197266, "step": 8175, "token_acc": 0.30141967634680245 }, { "epoch": 4.792729404866608, "grad_norm": 0.21173966062782776, "learning_rate": 0.0002714618355525675, "loss": 3.003312110900879, "step": 8176, "token_acc": 0.2992656964203674 }, { "epoch": 4.793315743183817, "grad_norm": 0.21077010005482394, "learning_rate": 0.00027145330428240337, "loss": 2.968500852584839, "step": 8177, "token_acc": 0.30230920830508873 }, { "epoch": 4.7939020815010265, "grad_norm": 0.20360883265347024, "learning_rate": 0.00027144477187134914, "loss": 2.97875714302063, "step": 8178, "token_acc": 0.3012231471328628 }, { "epoch": 4.794488419818235, "grad_norm": 0.22089973531024246, "learning_rate": 0.0002714362383194849, "loss": 2.98596453666687, "step": 8179, "token_acc": 0.3017382961770493 }, { "epoch": 4.795074758135444, "grad_norm": 0.2200155810708185, "learning_rate": 0.00027142770362689094, "loss": 2.9603524208068848, "step": 8180, "token_acc": 0.3019557346116579 }, { "epoch": 4.795661096452653, "grad_norm": 0.20658892056225175, "learning_rate": 0.0002714191677936472, "loss": 3.043041944503784, "step": 8181, "token_acc": 0.2922430665923969 }, { "epoch": 4.796247434769862, "grad_norm": 0.2267956562028409, "learning_rate": 0.0002714106308198341, "loss": 3.0422325134277344, "step": 8182, "token_acc": 0.29370239594479836 }, { "epoch": 4.796833773087071, "grad_norm": 0.21976351975746644, "learning_rate": 0.0002714020927055317, "loss": 3.040954351425171, "step": 8183, "token_acc": 0.29152869904070355 }, { "epoch": 4.79742011140428, "grad_norm": 0.21484613930388524, "learning_rate": 0.0002713935534508202, "loss": 2.982804298400879, "step": 8184, "token_acc": 0.3005432461431086 }, { "epoch": 4.798006449721489, "grad_norm": 0.24231842612008836, "learning_rate": 0.0002713850130557799, "loss": 3.0092010498046875, "step": 8185, "token_acc": 0.2977415565504063 }, { "epoch": 4.7985927880386985, "grad_norm": 0.2318808411487887, "learning_rate": 0.0002713764715204909, "loss": 3.0049400329589844, "step": 8186, "token_acc": 0.2977827436779922 }, { "epoch": 4.799179126355908, "grad_norm": 0.21779784137775013, "learning_rate": 0.00027136792884503355, "loss": 2.9662132263183594, "step": 8187, "token_acc": 0.3056335945586278 }, { "epoch": 4.799765464673117, "grad_norm": 0.2185379792459844, "learning_rate": 0.00027135938502948804, "loss": 2.9820468425750732, "step": 8188, "token_acc": 0.2992771548147628 }, { "epoch": 4.800351802990326, "grad_norm": 0.21227476231973072, "learning_rate": 0.00027135084007393463, "loss": 2.9727656841278076, "step": 8189, "token_acc": 0.3036980547082062 }, { "epoch": 4.800938141307535, "grad_norm": 0.21270226004050508, "learning_rate": 0.0002713422939784536, "loss": 2.944664478302002, "step": 8190, "token_acc": 0.3062899773114737 }, { "epoch": 4.801524479624743, "grad_norm": 0.201844602461128, "learning_rate": 0.00027133374674312525, "loss": 2.932436227798462, "step": 8191, "token_acc": 0.3071330845267683 }, { "epoch": 4.802110817941952, "grad_norm": 0.19659157653655712, "learning_rate": 0.00027132519836802984, "loss": 2.970036029815674, "step": 8192, "token_acc": 0.3028290301705469 }, { "epoch": 4.802697156259161, "grad_norm": 0.2085893591505014, "learning_rate": 0.00027131664885324773, "loss": 3.0007452964782715, "step": 8193, "token_acc": 0.2986780928541506 }, { "epoch": 4.8032834945763705, "grad_norm": 0.20289781459526415, "learning_rate": 0.0002713080981988591, "loss": 3.0071005821228027, "step": 8194, "token_acc": 0.2975272533900558 }, { "epoch": 4.80386983289358, "grad_norm": 0.20539441605345993, "learning_rate": 0.00027129954640494437, "loss": 3.0201001167297363, "step": 8195, "token_acc": 0.2957570554114547 }, { "epoch": 4.804456171210789, "grad_norm": 0.19541946898470167, "learning_rate": 0.00027129099347158385, "loss": 2.9966511726379395, "step": 8196, "token_acc": 0.2998181798386482 }, { "epoch": 4.805042509527998, "grad_norm": 0.19712611829820337, "learning_rate": 0.0002712824393988579, "loss": 2.946601390838623, "step": 8197, "token_acc": 0.3061989567850111 }, { "epoch": 4.805628847845207, "grad_norm": 0.19910101313602874, "learning_rate": 0.0002712738841868469, "loss": 3.009077310562134, "step": 8198, "token_acc": 0.29552940166722097 }, { "epoch": 4.806215186162416, "grad_norm": 0.19609388320612112, "learning_rate": 0.00027126532783563117, "loss": 2.982117176055908, "step": 8199, "token_acc": 0.301542337223793 }, { "epoch": 4.806801524479624, "grad_norm": 0.19698726744133707, "learning_rate": 0.000271256770345291, "loss": 3.031919002532959, "step": 8200, "token_acc": 0.295518697737595 }, { "epoch": 4.807387862796833, "grad_norm": 0.20186081842622225, "learning_rate": 0.000271248211715907, "loss": 2.9751083850860596, "step": 8201, "token_acc": 0.30165295763410266 }, { "epoch": 4.8079742011140425, "grad_norm": 0.19934424100404124, "learning_rate": 0.00027123965194755936, "loss": 3.0011074542999268, "step": 8202, "token_acc": 0.2984692994254879 }, { "epoch": 4.808560539431252, "grad_norm": 0.2084473765771158, "learning_rate": 0.0002712310910403286, "loss": 2.9742159843444824, "step": 8203, "token_acc": 0.30204830167839347 }, { "epoch": 4.809146877748461, "grad_norm": 0.21328753337270806, "learning_rate": 0.00027122252899429504, "loss": 2.990501642227173, "step": 8204, "token_acc": 0.2985040876211455 }, { "epoch": 4.80973321606567, "grad_norm": 0.2149387900462949, "learning_rate": 0.0002712139658095392, "loss": 2.981771945953369, "step": 8205, "token_acc": 0.30358772293654085 }, { "epoch": 4.810319554382879, "grad_norm": 0.2072341466467608, "learning_rate": 0.00027120540148614143, "loss": 3.0173938274383545, "step": 8206, "token_acc": 0.2976388770072313 }, { "epoch": 4.810905892700088, "grad_norm": 0.21906253851345914, "learning_rate": 0.00027119683602418236, "loss": 2.9812843799591064, "step": 8207, "token_acc": 0.30107715993430434 }, { "epoch": 4.811492231017297, "grad_norm": 0.21286303996702574, "learning_rate": 0.0002711882694237423, "loss": 2.980482339859009, "step": 8208, "token_acc": 0.3029348755291428 }, { "epoch": 4.812078569334506, "grad_norm": 0.2351808865580167, "learning_rate": 0.00027117970168490167, "loss": 2.9739975929260254, "step": 8209, "token_acc": 0.30425747420049676 }, { "epoch": 4.812664907651715, "grad_norm": 0.21734369655093558, "learning_rate": 0.0002711711328077411, "loss": 2.99165678024292, "step": 8210, "token_acc": 0.2984323905722173 }, { "epoch": 4.8132512459689245, "grad_norm": 0.22367612516852556, "learning_rate": 0.00027116256279234097, "loss": 2.998335123062134, "step": 8211, "token_acc": 0.2984088619290983 }, { "epoch": 4.813837584286133, "grad_norm": 0.21390273500727167, "learning_rate": 0.0002711539916387819, "loss": 2.983384609222412, "step": 8212, "token_acc": 0.3010908025040303 }, { "epoch": 4.814423922603342, "grad_norm": 0.21426138555267643, "learning_rate": 0.0002711454193471443, "loss": 2.9774389266967773, "step": 8213, "token_acc": 0.302832860454431 }, { "epoch": 4.815010260920551, "grad_norm": 0.22273444608217519, "learning_rate": 0.00027113684591750873, "loss": 3.0263800621032715, "step": 8214, "token_acc": 0.29457586064728924 }, { "epoch": 4.81559659923776, "grad_norm": 0.19905173597791648, "learning_rate": 0.0002711282713499557, "loss": 2.997547149658203, "step": 8215, "token_acc": 0.29717456071957826 }, { "epoch": 4.816182937554969, "grad_norm": 0.21811821493841554, "learning_rate": 0.0002711196956445658, "loss": 2.999246597290039, "step": 8216, "token_acc": 0.2986837031401445 }, { "epoch": 4.816769275872178, "grad_norm": 0.20459504764226327, "learning_rate": 0.0002711111188014196, "loss": 2.985507011413574, "step": 8217, "token_acc": 0.3006857958841708 }, { "epoch": 4.817355614189387, "grad_norm": 0.21544538904329882, "learning_rate": 0.0002711025408205976, "loss": 3.0114169120788574, "step": 8218, "token_acc": 0.29720481810016636 }, { "epoch": 4.8179419525065965, "grad_norm": 0.23131985320579745, "learning_rate": 0.0002710939617021805, "loss": 2.994990825653076, "step": 8219, "token_acc": 0.29902670300021295 }, { "epoch": 4.818528290823806, "grad_norm": 0.22415060949403243, "learning_rate": 0.00027108538144624873, "loss": 2.9914660453796387, "step": 8220, "token_acc": 0.2991685598277676 }, { "epoch": 4.819114629141015, "grad_norm": 0.22547287930031026, "learning_rate": 0.00027107680005288297, "loss": 2.9934816360473633, "step": 8221, "token_acc": 0.2993074923731793 }, { "epoch": 4.819700967458223, "grad_norm": 0.2055897786663909, "learning_rate": 0.0002710682175221638, "loss": 2.987076759338379, "step": 8222, "token_acc": 0.30093764882540164 }, { "epoch": 4.820287305775432, "grad_norm": 0.22406053592048655, "learning_rate": 0.00027105963385417193, "loss": 2.9683837890625, "step": 8223, "token_acc": 0.3032326593094184 }, { "epoch": 4.820873644092641, "grad_norm": 0.2212924106764679, "learning_rate": 0.0002710510490489879, "loss": 2.975367546081543, "step": 8224, "token_acc": 0.30239438359007775 }, { "epoch": 4.82145998240985, "grad_norm": 0.22303716093370132, "learning_rate": 0.00027104246310669236, "loss": 2.950197219848633, "step": 8225, "token_acc": 0.3062015605879179 }, { "epoch": 4.822046320727059, "grad_norm": 0.2220174555459367, "learning_rate": 0.00027103387602736605, "loss": 3.018613815307617, "step": 8226, "token_acc": 0.29574920240669766 }, { "epoch": 4.8226326590442685, "grad_norm": 0.22100232005838008, "learning_rate": 0.0002710252878110895, "loss": 3.0370845794677734, "step": 8227, "token_acc": 0.2938350928661447 }, { "epoch": 4.823218997361478, "grad_norm": 0.2140300602102675, "learning_rate": 0.0002710166984579435, "loss": 3.0071358680725098, "step": 8228, "token_acc": 0.29730518862636834 }, { "epoch": 4.823805335678687, "grad_norm": 0.22369306081674917, "learning_rate": 0.0002710081079680087, "loss": 2.9715211391448975, "step": 8229, "token_acc": 0.30218854563342185 }, { "epoch": 4.824391673995896, "grad_norm": 0.2049233559466746, "learning_rate": 0.0002709995163413658, "loss": 2.995271682739258, "step": 8230, "token_acc": 0.2993573797678275 }, { "epoch": 4.824978012313105, "grad_norm": 0.21839645189854812, "learning_rate": 0.0002709909235780954, "loss": 2.9493818283081055, "step": 8231, "token_acc": 0.30578885719838433 }, { "epoch": 4.825564350630314, "grad_norm": 0.20408100897243617, "learning_rate": 0.00027098232967827834, "loss": 2.981884717941284, "step": 8232, "token_acc": 0.3004746238632077 }, { "epoch": 4.826150688947523, "grad_norm": 0.21530520458024063, "learning_rate": 0.0002709737346419954, "loss": 2.9762909412384033, "step": 8233, "token_acc": 0.3014088820018426 }, { "epoch": 4.826737027264731, "grad_norm": 0.2172021562953356, "learning_rate": 0.00027096513846932717, "loss": 2.969449043273926, "step": 8234, "token_acc": 0.3031483749902265 }, { "epoch": 4.8273233655819405, "grad_norm": 0.23812863008341817, "learning_rate": 0.00027095654116035447, "loss": 3.0026800632476807, "step": 8235, "token_acc": 0.29722296134908066 }, { "epoch": 4.82790970389915, "grad_norm": 0.22151080109328217, "learning_rate": 0.000270947942715158, "loss": 2.9960665702819824, "step": 8236, "token_acc": 0.29833930886586885 }, { "epoch": 4.828496042216359, "grad_norm": 0.23800322924273773, "learning_rate": 0.0002709393431338187, "loss": 2.992833137512207, "step": 8237, "token_acc": 0.2982764609423593 }, { "epoch": 4.829082380533568, "grad_norm": 0.22825325827546616, "learning_rate": 0.0002709307424164172, "loss": 2.9995877742767334, "step": 8238, "token_acc": 0.29828417761872444 }, { "epoch": 4.829668718850777, "grad_norm": 0.21221241550834927, "learning_rate": 0.00027092214056303435, "loss": 2.9751338958740234, "step": 8239, "token_acc": 0.3038743105288025 }, { "epoch": 4.830255057167986, "grad_norm": 0.20241885570461254, "learning_rate": 0.0002709135375737508, "loss": 3.0011754035949707, "step": 8240, "token_acc": 0.29941396124828623 }, { "epoch": 4.830841395485195, "grad_norm": 0.2263023123222594, "learning_rate": 0.0002709049334486477, "loss": 3.011152744293213, "step": 8241, "token_acc": 0.29755369017205185 }, { "epoch": 4.831427733802404, "grad_norm": 0.21280120438115002, "learning_rate": 0.00027089632818780556, "loss": 3.030346155166626, "step": 8242, "token_acc": 0.2926867810037504 }, { "epoch": 4.8320140721196125, "grad_norm": 0.22158115126532177, "learning_rate": 0.0002708877217913053, "loss": 2.9899709224700928, "step": 8243, "token_acc": 0.30078662505066467 }, { "epoch": 4.832600410436822, "grad_norm": 0.21514492619127298, "learning_rate": 0.00027087911425922786, "loss": 3.009211778640747, "step": 8244, "token_acc": 0.29765799048716673 }, { "epoch": 4.833186748754031, "grad_norm": 0.19368398781172047, "learning_rate": 0.000270870505591654, "loss": 3.0132951736450195, "step": 8245, "token_acc": 0.29665190602508956 }, { "epoch": 4.83377308707124, "grad_norm": 0.21864186466879718, "learning_rate": 0.00027086189578866466, "loss": 2.986602306365967, "step": 8246, "token_acc": 0.30208327804463786 }, { "epoch": 4.834359425388449, "grad_norm": 0.22493255414681182, "learning_rate": 0.00027085328485034057, "loss": 2.9913735389709473, "step": 8247, "token_acc": 0.30096203848153924 }, { "epoch": 4.834945763705658, "grad_norm": 0.21598239675071565, "learning_rate": 0.0002708446727767628, "loss": 3.005964756011963, "step": 8248, "token_acc": 0.29787409764646067 }, { "epoch": 4.835532102022867, "grad_norm": 0.19631652455296356, "learning_rate": 0.00027083605956801214, "loss": 2.9520821571350098, "step": 8249, "token_acc": 0.30555667944529297 }, { "epoch": 4.836118440340076, "grad_norm": 0.21117866598709542, "learning_rate": 0.00027082744522416956, "loss": 2.9988796710968018, "step": 8250, "token_acc": 0.298918479619905 }, { "epoch": 4.836704778657285, "grad_norm": 0.21200048723371773, "learning_rate": 0.0002708188297453159, "loss": 3.023862600326538, "step": 8251, "token_acc": 0.2945858210513131 }, { "epoch": 4.8372911169744945, "grad_norm": 0.2420840966171339, "learning_rate": 0.00027081021313153213, "loss": 2.991298198699951, "step": 8252, "token_acc": 0.29910673197232884 }, { "epoch": 4.837877455291704, "grad_norm": 0.20225896962337814, "learning_rate": 0.0002708015953828993, "loss": 2.9621973037719727, "step": 8253, "token_acc": 0.30516010692376505 }, { "epoch": 4.838463793608913, "grad_norm": 0.22738052193645644, "learning_rate": 0.0002707929764994982, "loss": 3.000058650970459, "step": 8254, "token_acc": 0.2997276452222076 }, { "epoch": 4.839050131926121, "grad_norm": 0.20931269824707144, "learning_rate": 0.00027078435648140986, "loss": 2.960587978363037, "step": 8255, "token_acc": 0.3060943082363837 }, { "epoch": 4.83963647024333, "grad_norm": 0.204573918118151, "learning_rate": 0.00027077573532871524, "loss": 3.00370454788208, "step": 8256, "token_acc": 0.2986082979083446 }, { "epoch": 4.840222808560539, "grad_norm": 0.2147273382469844, "learning_rate": 0.0002707671130414953, "loss": 2.945596694946289, "step": 8257, "token_acc": 0.3042680566528976 }, { "epoch": 4.840809146877748, "grad_norm": 0.21555106436562346, "learning_rate": 0.0002707584896198312, "loss": 2.968683958053589, "step": 8258, "token_acc": 0.30197661498312894 }, { "epoch": 4.841395485194957, "grad_norm": 0.23213510728065895, "learning_rate": 0.00027074986506380366, "loss": 2.9743924140930176, "step": 8259, "token_acc": 0.30279447700192463 }, { "epoch": 4.8419818235121665, "grad_norm": 0.2084991570675108, "learning_rate": 0.0002707412393734939, "loss": 2.9817371368408203, "step": 8260, "token_acc": 0.3009927604523646 }, { "epoch": 4.842568161829376, "grad_norm": 0.2106397042983924, "learning_rate": 0.00027073261254898293, "loss": 2.984109401702881, "step": 8261, "token_acc": 0.3010085761507528 }, { "epoch": 4.843154500146585, "grad_norm": 0.23451372429900544, "learning_rate": 0.00027072398459035174, "loss": 2.994997262954712, "step": 8262, "token_acc": 0.2989970872680527 }, { "epoch": 4.843740838463794, "grad_norm": 0.24065187245771597, "learning_rate": 0.0002707153554976814, "loss": 2.9774169921875, "step": 8263, "token_acc": 0.3022227262417782 }, { "epoch": 4.844327176781003, "grad_norm": 0.2072442610765007, "learning_rate": 0.0002707067252710529, "loss": 2.968035936355591, "step": 8264, "token_acc": 0.3024128770776678 }, { "epoch": 4.844913515098211, "grad_norm": 0.22094257179755328, "learning_rate": 0.00027069809391054746, "loss": 3.0021698474884033, "step": 8265, "token_acc": 0.2983368208775601 }, { "epoch": 4.84549985341542, "grad_norm": 0.21890303209505382, "learning_rate": 0.00027068946141624604, "loss": 2.98079776763916, "step": 8266, "token_acc": 0.30096179466683065 }, { "epoch": 4.8460861917326294, "grad_norm": 0.21832868162886424, "learning_rate": 0.00027068082778822976, "loss": 2.9945108890533447, "step": 8267, "token_acc": 0.2989355944434422 }, { "epoch": 4.846672530049839, "grad_norm": 0.20786520690932794, "learning_rate": 0.0002706721930265797, "loss": 2.977018356323242, "step": 8268, "token_acc": 0.3000324367546147 }, { "epoch": 4.847258868367048, "grad_norm": 0.2305965951917622, "learning_rate": 0.000270663557131377, "loss": 3.026834487915039, "step": 8269, "token_acc": 0.2946109284825244 }, { "epoch": 4.847845206684257, "grad_norm": 0.2184734153248759, "learning_rate": 0.0002706549201027028, "loss": 2.970600128173828, "step": 8270, "token_acc": 0.30350279056258683 }, { "epoch": 4.848431545001466, "grad_norm": 0.2298998745977003, "learning_rate": 0.00027064628194063825, "loss": 2.9935102462768555, "step": 8271, "token_acc": 0.29928317184101244 }, { "epoch": 4.849017883318675, "grad_norm": 0.245548225775597, "learning_rate": 0.0002706376426452643, "loss": 3.0653042793273926, "step": 8272, "token_acc": 0.29028715952497425 }, { "epoch": 4.849604221635884, "grad_norm": 0.20621047769560788, "learning_rate": 0.0002706290022166624, "loss": 2.979208469390869, "step": 8273, "token_acc": 0.30176831599285026 }, { "epoch": 4.850190559953093, "grad_norm": 0.2306553403659329, "learning_rate": 0.00027062036065491355, "loss": 2.962268352508545, "step": 8274, "token_acc": 0.30426853098267403 }, { "epoch": 4.850776898270302, "grad_norm": 0.23109540804108505, "learning_rate": 0.00027061171796009895, "loss": 3.0084023475646973, "step": 8275, "token_acc": 0.297135769647427 }, { "epoch": 4.8513632365875115, "grad_norm": 0.22251421060882473, "learning_rate": 0.00027060307413229976, "loss": 2.9927330017089844, "step": 8276, "token_acc": 0.2992310272888339 }, { "epoch": 4.85194957490472, "grad_norm": 0.2224051389323478, "learning_rate": 0.00027059442917159716, "loss": 2.990953207015991, "step": 8277, "token_acc": 0.2982096651943232 }, { "epoch": 4.852535913221929, "grad_norm": 0.20377955719138155, "learning_rate": 0.0002705857830780725, "loss": 2.960538864135742, "step": 8278, "token_acc": 0.30470049755363 }, { "epoch": 4.853122251539138, "grad_norm": 0.21275551043603683, "learning_rate": 0.00027057713585180684, "loss": 2.9693145751953125, "step": 8279, "token_acc": 0.30256442345361945 }, { "epoch": 4.853708589856347, "grad_norm": 0.18886876465750535, "learning_rate": 0.00027056848749288146, "loss": 2.984286308288574, "step": 8280, "token_acc": 0.30131887606012375 }, { "epoch": 4.854294928173556, "grad_norm": 0.2113240148454506, "learning_rate": 0.0002705598380013776, "loss": 3.0266451835632324, "step": 8281, "token_acc": 0.29370867329182937 }, { "epoch": 4.854881266490765, "grad_norm": 0.21126946429065754, "learning_rate": 0.0002705511873773766, "loss": 3.0113062858581543, "step": 8282, "token_acc": 0.2974305036191765 }, { "epoch": 4.855467604807974, "grad_norm": 0.22110017260548004, "learning_rate": 0.0002705425356209596, "loss": 2.980210065841675, "step": 8283, "token_acc": 0.30142394873048545 }, { "epoch": 4.8560539431251835, "grad_norm": 0.18245448567354983, "learning_rate": 0.00027053388273220785, "loss": 2.9665417671203613, "step": 8284, "token_acc": 0.3038380116760129 }, { "epoch": 4.856640281442393, "grad_norm": 0.22451408348846177, "learning_rate": 0.0002705252287112028, "loss": 2.973139762878418, "step": 8285, "token_acc": 0.30140251662007755 }, { "epoch": 4.857226619759601, "grad_norm": 0.21268845985964765, "learning_rate": 0.00027051657355802556, "loss": 3.049710750579834, "step": 8286, "token_acc": 0.29100559580825 }, { "epoch": 4.85781295807681, "grad_norm": 0.20462371436907503, "learning_rate": 0.0002705079172727575, "loss": 3.0063636302948, "step": 8287, "token_acc": 0.2966566752447988 }, { "epoch": 4.858399296394019, "grad_norm": 0.20753750365678078, "learning_rate": 0.00027049925985547996, "loss": 2.9956183433532715, "step": 8288, "token_acc": 0.2993776025343255 }, { "epoch": 4.858985634711228, "grad_norm": 0.20977617614297875, "learning_rate": 0.00027049060130627427, "loss": 3.0029563903808594, "step": 8289, "token_acc": 0.29884217879691777 }, { "epoch": 4.859571973028437, "grad_norm": 0.21549819867105793, "learning_rate": 0.00027048194162522174, "loss": 3.0235514640808105, "step": 8290, "token_acc": 0.29508175554935806 }, { "epoch": 4.860158311345646, "grad_norm": 0.21163361187905572, "learning_rate": 0.00027047328081240374, "loss": 2.985368013381958, "step": 8291, "token_acc": 0.2999753793043197 }, { "epoch": 4.8607446496628555, "grad_norm": 0.2268461296799456, "learning_rate": 0.0002704646188679015, "loss": 2.991875171661377, "step": 8292, "token_acc": 0.29971449039673287 }, { "epoch": 4.861330987980065, "grad_norm": 0.22995644597671372, "learning_rate": 0.00027045595579179663, "loss": 2.974151849746704, "step": 8293, "token_acc": 0.30289845050718894 }, { "epoch": 4.861917326297274, "grad_norm": 0.2259457387208414, "learning_rate": 0.00027044729158417027, "loss": 2.9690887928009033, "step": 8294, "token_acc": 0.3042366653210363 }, { "epoch": 4.862503664614483, "grad_norm": 0.2057039044228907, "learning_rate": 0.000270438626245104, "loss": 2.943039655685425, "step": 8295, "token_acc": 0.308653000187586 }, { "epoch": 4.863090002931692, "grad_norm": 0.22562904220503474, "learning_rate": 0.00027042995977467904, "loss": 3.0035948753356934, "step": 8296, "token_acc": 0.2986725305123481 }, { "epoch": 4.863676341248901, "grad_norm": 0.19778530180446824, "learning_rate": 0.00027042129217297697, "loss": 3.009737968444824, "step": 8297, "token_acc": 0.2970891677589317 }, { "epoch": 4.86426267956611, "grad_norm": 0.20897457298158292, "learning_rate": 0.00027041262344007906, "loss": 2.993360996246338, "step": 8298, "token_acc": 0.2983904349487359 }, { "epoch": 4.864849017883318, "grad_norm": 0.1959273235638305, "learning_rate": 0.00027040395357606686, "loss": 2.9853620529174805, "step": 8299, "token_acc": 0.30009978291907496 }, { "epoch": 4.8654353562005275, "grad_norm": 0.20599792891736124, "learning_rate": 0.0002703952825810218, "loss": 2.9691696166992188, "step": 8300, "token_acc": 0.3048215495614276 }, { "epoch": 4.866021694517737, "grad_norm": 0.207600793725992, "learning_rate": 0.0002703866104550252, "loss": 3.0058791637420654, "step": 8301, "token_acc": 0.2978461683107033 }, { "epoch": 4.866608032834946, "grad_norm": 0.20320972403720206, "learning_rate": 0.00027037793719815863, "loss": 2.973942756652832, "step": 8302, "token_acc": 0.301723815278008 }, { "epoch": 4.867194371152155, "grad_norm": 0.21385604701228395, "learning_rate": 0.00027036926281050357, "loss": 3.0055935382843018, "step": 8303, "token_acc": 0.29723260371238625 }, { "epoch": 4.867780709469364, "grad_norm": 0.21743104364286145, "learning_rate": 0.00027036058729214155, "loss": 2.9955458641052246, "step": 8304, "token_acc": 0.29993386421193563 }, { "epoch": 4.868367047786573, "grad_norm": 0.2089473135263253, "learning_rate": 0.00027035191064315393, "loss": 2.988430976867676, "step": 8305, "token_acc": 0.2996390645429847 }, { "epoch": 4.868953386103782, "grad_norm": 0.2051701429427241, "learning_rate": 0.0002703432328636223, "loss": 3.013498544692993, "step": 8306, "token_acc": 0.29519866095781966 }, { "epoch": 4.869539724420991, "grad_norm": 0.20540858157012495, "learning_rate": 0.0002703345539536282, "loss": 3.0219955444335938, "step": 8307, "token_acc": 0.2935993281644456 }, { "epoch": 4.8701260627381995, "grad_norm": 0.22780299336992843, "learning_rate": 0.00027032587391325304, "loss": 2.9904799461364746, "step": 8308, "token_acc": 0.299680403957194 }, { "epoch": 4.870712401055409, "grad_norm": 0.22370764201963325, "learning_rate": 0.00027031719274257847, "loss": 2.9609920978546143, "step": 8309, "token_acc": 0.3043538628866165 }, { "epoch": 4.871298739372618, "grad_norm": 0.20870684594517774, "learning_rate": 0.000270308510441686, "loss": 2.9610705375671387, "step": 8310, "token_acc": 0.304167827114587 }, { "epoch": 4.871885077689827, "grad_norm": 0.24826345593862675, "learning_rate": 0.0002702998270106572, "loss": 2.986482858657837, "step": 8311, "token_acc": 0.3018383553275969 }, { "epoch": 4.872471416007036, "grad_norm": 0.19853341617748643, "learning_rate": 0.00027029114244957365, "loss": 3.026829957962036, "step": 8312, "token_acc": 0.29290353556234244 }, { "epoch": 4.873057754324245, "grad_norm": 0.2786651819651593, "learning_rate": 0.00027028245675851686, "loss": 3.0408358573913574, "step": 8313, "token_acc": 0.29253726756588566 }, { "epoch": 4.873644092641454, "grad_norm": 0.2269772697968515, "learning_rate": 0.00027027376993756853, "loss": 3.0026705265045166, "step": 8314, "token_acc": 0.29847208006067133 }, { "epoch": 4.874230430958663, "grad_norm": 0.25695568579294104, "learning_rate": 0.00027026508198681025, "loss": 3.007025957107544, "step": 8315, "token_acc": 0.2972591339829803 }, { "epoch": 4.874816769275872, "grad_norm": 0.2631847657840096, "learning_rate": 0.00027025639290632344, "loss": 2.943148374557495, "step": 8316, "token_acc": 0.30700712271294556 }, { "epoch": 4.8754031075930815, "grad_norm": 0.23661515736191657, "learning_rate": 0.00027024770269618996, "loss": 2.9593827724456787, "step": 8317, "token_acc": 0.30481104816744164 }, { "epoch": 4.875989445910291, "grad_norm": 0.25111721648964436, "learning_rate": 0.00027023901135649135, "loss": 3.0096023082733154, "step": 8318, "token_acc": 0.2975572551428995 }, { "epoch": 4.8765757842275, "grad_norm": 0.2357558755775501, "learning_rate": 0.00027023031888730924, "loss": 3.0190649032592773, "step": 8319, "token_acc": 0.2969311621460563 }, { "epoch": 4.877162122544708, "grad_norm": 0.2152152197424242, "learning_rate": 0.00027022162528872527, "loss": 3.0297937393188477, "step": 8320, "token_acc": 0.295582867267216 }, { "epoch": 4.877748460861917, "grad_norm": 0.24459521765895484, "learning_rate": 0.00027021293056082114, "loss": 2.99194073677063, "step": 8321, "token_acc": 0.2978930171720939 }, { "epoch": 4.878334799179126, "grad_norm": 0.2058645297594416, "learning_rate": 0.0002702042347036785, "loss": 2.942354679107666, "step": 8322, "token_acc": 0.30625731448462173 }, { "epoch": 4.878921137496335, "grad_norm": 0.23631278594597172, "learning_rate": 0.0002701955377173791, "loss": 2.993741512298584, "step": 8323, "token_acc": 0.300544375769899 }, { "epoch": 4.879507475813544, "grad_norm": 0.23075767503014266, "learning_rate": 0.00027018683960200457, "loss": 2.969167947769165, "step": 8324, "token_acc": 0.302992447748009 }, { "epoch": 4.8800938141307535, "grad_norm": 0.22332381557828496, "learning_rate": 0.00027017814035763663, "loss": 2.994985342025757, "step": 8325, "token_acc": 0.30034226524395746 }, { "epoch": 4.880680152447963, "grad_norm": 0.24463309465615365, "learning_rate": 0.000270169439984357, "loss": 3.049400806427002, "step": 8326, "token_acc": 0.2920832496821254 }, { "epoch": 4.881266490765172, "grad_norm": 0.2334382853092266, "learning_rate": 0.00027016073848224744, "loss": 2.9844210147857666, "step": 8327, "token_acc": 0.30257162010240574 }, { "epoch": 4.881852829082381, "grad_norm": 0.22393271407282483, "learning_rate": 0.0002701520358513896, "loss": 3.002007484436035, "step": 8328, "token_acc": 0.2982487270068126 }, { "epoch": 4.88243916739959, "grad_norm": 0.2271573288808901, "learning_rate": 0.0002701433320918653, "loss": 2.9973502159118652, "step": 8329, "token_acc": 0.29873357399612105 }, { "epoch": 4.883025505716798, "grad_norm": 0.20608651754746962, "learning_rate": 0.0002701346272037564, "loss": 2.96848201751709, "step": 8330, "token_acc": 0.30349997602391215 }, { "epoch": 4.883611844034007, "grad_norm": 0.22779613192528259, "learning_rate": 0.00027012592118714443, "loss": 2.9799840450286865, "step": 8331, "token_acc": 0.303440595510228 }, { "epoch": 4.884198182351216, "grad_norm": 0.19277712424960758, "learning_rate": 0.00027011721404211135, "loss": 3.00592041015625, "step": 8332, "token_acc": 0.297942335093454 }, { "epoch": 4.8847845206684255, "grad_norm": 0.23224211302648523, "learning_rate": 0.00027010850576873887, "loss": 2.938991069793701, "step": 8333, "token_acc": 0.30752372648753157 }, { "epoch": 4.885370858985635, "grad_norm": 0.2234232331299914, "learning_rate": 0.0002700997963671089, "loss": 2.9563779830932617, "step": 8334, "token_acc": 0.3043632734739515 }, { "epoch": 4.885957197302844, "grad_norm": 0.22176051902812696, "learning_rate": 0.00027009108583730317, "loss": 3.010709285736084, "step": 8335, "token_acc": 0.29798133614462535 }, { "epoch": 4.886543535620053, "grad_norm": 0.22805129246129396, "learning_rate": 0.00027008237417940344, "loss": 2.9923486709594727, "step": 8336, "token_acc": 0.2989377882994904 }, { "epoch": 4.887129873937262, "grad_norm": 0.2160863244366599, "learning_rate": 0.0002700736613934917, "loss": 3.003962993621826, "step": 8337, "token_acc": 0.2977441065690178 }, { "epoch": 4.887716212254471, "grad_norm": 0.2081190836802079, "learning_rate": 0.0002700649474796496, "loss": 2.9922757148742676, "step": 8338, "token_acc": 0.2981770372430533 }, { "epoch": 4.88830255057168, "grad_norm": 0.20960946312061488, "learning_rate": 0.00027005623243795925, "loss": 2.9644925594329834, "step": 8339, "token_acc": 0.30314812238341576 }, { "epoch": 4.888888888888889, "grad_norm": 0.2219174063424716, "learning_rate": 0.00027004751626850227, "loss": 2.954298257827759, "step": 8340, "token_acc": 0.305631932364439 }, { "epoch": 4.889475227206098, "grad_norm": 0.20114860864952394, "learning_rate": 0.00027003879897136065, "loss": 2.9308090209960938, "step": 8341, "token_acc": 0.30858802518933376 }, { "epoch": 4.890061565523307, "grad_norm": 0.19597048494203545, "learning_rate": 0.00027003008054661635, "loss": 3.0033202171325684, "step": 8342, "token_acc": 0.29898269083375406 }, { "epoch": 4.890647903840516, "grad_norm": 0.20652956121923874, "learning_rate": 0.0002700213609943511, "loss": 3.0004847049713135, "step": 8343, "token_acc": 0.2995970078491282 }, { "epoch": 4.891234242157725, "grad_norm": 0.199535098714059, "learning_rate": 0.00027001264031464693, "loss": 2.9969570636749268, "step": 8344, "token_acc": 0.29919315257388746 }, { "epoch": 4.891820580474934, "grad_norm": 0.2106188376121522, "learning_rate": 0.00027000391850758574, "loss": 3.0193095207214355, "step": 8345, "token_acc": 0.2973497417166156 }, { "epoch": 4.892406918792143, "grad_norm": 0.220587921191076, "learning_rate": 0.0002699951955732494, "loss": 2.9307024478912354, "step": 8346, "token_acc": 0.3094149357005887 }, { "epoch": 4.892993257109352, "grad_norm": 0.23580207513280918, "learning_rate": 0.0002699864715117199, "loss": 3.0445942878723145, "step": 8347, "token_acc": 0.29174571595582127 }, { "epoch": 4.893579595426561, "grad_norm": 0.23390306811699116, "learning_rate": 0.0002699777463230792, "loss": 3.0159993171691895, "step": 8348, "token_acc": 0.29759288970249814 }, { "epoch": 4.89416593374377, "grad_norm": 0.201915639197607, "learning_rate": 0.00026996902000740925, "loss": 3.046464443206787, "step": 8349, "token_acc": 0.29314712808006055 }, { "epoch": 4.8947522720609795, "grad_norm": 0.2315374185054265, "learning_rate": 0.000269960292564792, "loss": 2.9855873584747314, "step": 8350, "token_acc": 0.29996698831894364 }, { "epoch": 4.895338610378188, "grad_norm": 0.21431515928075984, "learning_rate": 0.0002699515639953095, "loss": 2.999284029006958, "step": 8351, "token_acc": 0.2980395545918314 }, { "epoch": 4.895924948695397, "grad_norm": 0.20033907740782358, "learning_rate": 0.0002699428342990436, "loss": 3.0185885429382324, "step": 8352, "token_acc": 0.2965744841489138 }, { "epoch": 4.896511287012606, "grad_norm": 0.22491898272875785, "learning_rate": 0.0002699341034760765, "loss": 3.009448766708374, "step": 8353, "token_acc": 0.29726778442568874 }, { "epoch": 4.897097625329815, "grad_norm": 0.2109926414410152, "learning_rate": 0.0002699253715264901, "loss": 3.0009264945983887, "step": 8354, "token_acc": 0.2988948922616668 }, { "epoch": 4.897683963647024, "grad_norm": 0.22789136467725807, "learning_rate": 0.00026991663845036634, "loss": 2.967909812927246, "step": 8355, "token_acc": 0.30253024726583566 }, { "epoch": 4.898270301964233, "grad_norm": 0.20359132702118812, "learning_rate": 0.00026990790424778744, "loss": 2.99458646774292, "step": 8356, "token_acc": 0.2989665903499951 }, { "epoch": 4.898856640281442, "grad_norm": 0.21796669226261495, "learning_rate": 0.0002698991689188353, "loss": 3.0155787467956543, "step": 8357, "token_acc": 0.2963291957290243 }, { "epoch": 4.8994429785986515, "grad_norm": 0.2018675695516116, "learning_rate": 0.00026989043246359207, "loss": 2.9886474609375, "step": 8358, "token_acc": 0.30043004883425767 }, { "epoch": 4.900029316915861, "grad_norm": 0.212024831789168, "learning_rate": 0.0002698816948821398, "loss": 2.927656412124634, "step": 8359, "token_acc": 0.3092044794734197 }, { "epoch": 4.90061565523307, "grad_norm": 0.21955611695040633, "learning_rate": 0.00026987295617456053, "loss": 2.958770751953125, "step": 8360, "token_acc": 0.30328605436993206 }, { "epoch": 4.901201993550279, "grad_norm": 0.1929131331019733, "learning_rate": 0.0002698642163409363, "loss": 2.9613914489746094, "step": 8361, "token_acc": 0.3043022908301823 }, { "epoch": 4.901788331867488, "grad_norm": 0.25042426940239154, "learning_rate": 0.00026985547538134935, "loss": 2.9870407581329346, "step": 8362, "token_acc": 0.30051919534026833 }, { "epoch": 4.902374670184696, "grad_norm": 0.22773550843637352, "learning_rate": 0.00026984673329588166, "loss": 3.0202860832214355, "step": 8363, "token_acc": 0.29658324671517283 }, { "epoch": 4.902961008501905, "grad_norm": 0.22044379674509915, "learning_rate": 0.0002698379900846155, "loss": 3.0652928352355957, "step": 8364, "token_acc": 0.2889725126629253 }, { "epoch": 4.903547346819114, "grad_norm": 0.2029750977073379, "learning_rate": 0.0002698292457476328, "loss": 3.005385398864746, "step": 8365, "token_acc": 0.2972461269050371 }, { "epoch": 4.9041336851363235, "grad_norm": 0.2048933439272638, "learning_rate": 0.00026982050028501593, "loss": 2.997194766998291, "step": 8366, "token_acc": 0.2990568584093764 }, { "epoch": 4.904720023453533, "grad_norm": 0.21303290208827466, "learning_rate": 0.00026981175369684686, "loss": 2.996439218521118, "step": 8367, "token_acc": 0.2976712122532745 }, { "epoch": 4.905306361770742, "grad_norm": 0.2126438781667676, "learning_rate": 0.0002698030059832078, "loss": 2.990018844604492, "step": 8368, "token_acc": 0.3001522513338541 }, { "epoch": 4.905892700087951, "grad_norm": 0.21031879554456198, "learning_rate": 0.0002697942571441809, "loss": 2.9971084594726562, "step": 8369, "token_acc": 0.30014850264208764 }, { "epoch": 4.90647903840516, "grad_norm": 0.23065621452013596, "learning_rate": 0.00026978550717984845, "loss": 2.9923644065856934, "step": 8370, "token_acc": 0.30014770659140666 }, { "epoch": 4.907065376722369, "grad_norm": 0.2308186422445125, "learning_rate": 0.00026977675609029253, "loss": 3.0064454078674316, "step": 8371, "token_acc": 0.29700669544694 }, { "epoch": 4.907651715039578, "grad_norm": 0.20070861798165995, "learning_rate": 0.0002697680038755954, "loss": 2.9877872467041016, "step": 8372, "token_acc": 0.299996883796782 }, { "epoch": 4.908238053356786, "grad_norm": 0.24113697071260487, "learning_rate": 0.00026975925053583926, "loss": 2.956625461578369, "step": 8373, "token_acc": 0.30445273605658413 }, { "epoch": 4.9088243916739955, "grad_norm": 0.1946465123500959, "learning_rate": 0.00026975049607110636, "loss": 3.0029759407043457, "step": 8374, "token_acc": 0.2986755613492966 }, { "epoch": 4.909410729991205, "grad_norm": 0.23342854008300248, "learning_rate": 0.00026974174048147886, "loss": 3.003960132598877, "step": 8375, "token_acc": 0.29725691217104344 }, { "epoch": 4.909997068308414, "grad_norm": 0.206435509363085, "learning_rate": 0.00026973298376703914, "loss": 3.0221593379974365, "step": 8376, "token_acc": 0.29548151829911545 }, { "epoch": 4.910583406625623, "grad_norm": 0.19098095810438953, "learning_rate": 0.0002697242259278693, "loss": 2.9960806369781494, "step": 8377, "token_acc": 0.29971028526359356 }, { "epoch": 4.911169744942832, "grad_norm": 0.2021270139908501, "learning_rate": 0.00026971546696405175, "loss": 2.9911251068115234, "step": 8378, "token_acc": 0.2984395875218886 }, { "epoch": 4.911756083260041, "grad_norm": 0.2058673375755389, "learning_rate": 0.0002697067068756687, "loss": 2.9774391651153564, "step": 8379, "token_acc": 0.3007433794434786 }, { "epoch": 4.91234242157725, "grad_norm": 0.21314327746217107, "learning_rate": 0.0002696979456628024, "loss": 2.985125780105591, "step": 8380, "token_acc": 0.30076058158152186 }, { "epoch": 4.912928759894459, "grad_norm": 0.20556273554346913, "learning_rate": 0.00026968918332553524, "loss": 2.9810850620269775, "step": 8381, "token_acc": 0.30111268276816716 }, { "epoch": 4.913515098211668, "grad_norm": 0.21560597605152523, "learning_rate": 0.00026968041986394944, "loss": 2.987072229385376, "step": 8382, "token_acc": 0.30056176213241353 }, { "epoch": 4.9141014365288775, "grad_norm": 0.221942776286736, "learning_rate": 0.0002696716552781274, "loss": 3.0008859634399414, "step": 8383, "token_acc": 0.30010557803976773 }, { "epoch": 4.914687774846087, "grad_norm": 0.18795833429439313, "learning_rate": 0.0002696628895681514, "loss": 2.9854702949523926, "step": 8384, "token_acc": 0.3020270546823838 }, { "epoch": 4.915274113163295, "grad_norm": 0.22173167719645814, "learning_rate": 0.00026965412273410384, "loss": 2.942479133605957, "step": 8385, "token_acc": 0.3066781903921374 }, { "epoch": 4.915860451480504, "grad_norm": 0.2289871099873596, "learning_rate": 0.00026964535477606703, "loss": 2.985018253326416, "step": 8386, "token_acc": 0.29989182490337757 }, { "epoch": 4.916446789797713, "grad_norm": 0.2138179200384933, "learning_rate": 0.0002696365856941233, "loss": 3.042973279953003, "step": 8387, "token_acc": 0.2925601503152205 }, { "epoch": 4.917033128114922, "grad_norm": 0.19969635755408183, "learning_rate": 0.0002696278154883551, "loss": 2.9995951652526855, "step": 8388, "token_acc": 0.2999229479938676 }, { "epoch": 4.917619466432131, "grad_norm": 0.2134099973756909, "learning_rate": 0.0002696190441588447, "loss": 2.9686405658721924, "step": 8389, "token_acc": 0.30155211809033994 }, { "epoch": 4.91820580474934, "grad_norm": 0.21013660157159572, "learning_rate": 0.0002696102717056746, "loss": 2.9872312545776367, "step": 8390, "token_acc": 0.3018252377500107 }, { "epoch": 4.9187921430665495, "grad_norm": 0.20304796541311146, "learning_rate": 0.0002696014981289272, "loss": 3.01591157913208, "step": 8391, "token_acc": 0.2967994324830936 }, { "epoch": 4.919378481383759, "grad_norm": 0.21070608137065513, "learning_rate": 0.0002695927234286849, "loss": 3.03450870513916, "step": 8392, "token_acc": 0.29282850892168394 }, { "epoch": 4.919964819700968, "grad_norm": 0.2171034532229004, "learning_rate": 0.00026958394760503014, "loss": 2.9867115020751953, "step": 8393, "token_acc": 0.3000039200825831 }, { "epoch": 4.920551158018176, "grad_norm": 0.20959457681160368, "learning_rate": 0.00026957517065804524, "loss": 2.987800121307373, "step": 8394, "token_acc": 0.2997534131232333 }, { "epoch": 4.921137496335385, "grad_norm": 0.21093424197436736, "learning_rate": 0.00026956639258781284, "loss": 2.936405658721924, "step": 8395, "token_acc": 0.3083024187752035 }, { "epoch": 4.921723834652594, "grad_norm": 0.20153722135438673, "learning_rate": 0.00026955761339441525, "loss": 2.9935460090637207, "step": 8396, "token_acc": 0.30088912365245857 }, { "epoch": 4.922310172969803, "grad_norm": 0.21499051780596798, "learning_rate": 0.00026954883307793497, "loss": 2.996640205383301, "step": 8397, "token_acc": 0.298638714806364 }, { "epoch": 4.9228965112870124, "grad_norm": 0.22118314826799587, "learning_rate": 0.0002695400516384545, "loss": 2.956268072128296, "step": 8398, "token_acc": 0.3065479556543353 }, { "epoch": 4.923482849604222, "grad_norm": 0.23391369790275318, "learning_rate": 0.00026953126907605635, "loss": 3.018904447555542, "step": 8399, "token_acc": 0.2945343618550592 }, { "epoch": 4.924069187921431, "grad_norm": 0.2165648295214148, "learning_rate": 0.000269522485390823, "loss": 2.942016124725342, "step": 8400, "token_acc": 0.305956547771339 }, { "epoch": 4.92465552623864, "grad_norm": 0.23227379973558326, "learning_rate": 0.00026951370058283695, "loss": 3.0330958366394043, "step": 8401, "token_acc": 0.29258421552750974 }, { "epoch": 4.925241864555849, "grad_norm": 0.27364698104265506, "learning_rate": 0.0002695049146521807, "loss": 2.997887134552002, "step": 8402, "token_acc": 0.29722415950308095 }, { "epoch": 4.925828202873058, "grad_norm": 0.2018351753700248, "learning_rate": 0.0002694961275989369, "loss": 2.988598346710205, "step": 8403, "token_acc": 0.3002343557805601 }, { "epoch": 4.926414541190267, "grad_norm": 0.24482478802891694, "learning_rate": 0.0002694873394231879, "loss": 2.9950923919677734, "step": 8404, "token_acc": 0.2999200333681361 }, { "epoch": 4.927000879507476, "grad_norm": 0.22260276034979332, "learning_rate": 0.0002694785501250164, "loss": 2.9766478538513184, "step": 8405, "token_acc": 0.30282172547773145 }, { "epoch": 4.927587217824685, "grad_norm": 0.2003946066684013, "learning_rate": 0.0002694697597045049, "loss": 2.9792351722717285, "step": 8406, "token_acc": 0.301676268525774 }, { "epoch": 4.928173556141894, "grad_norm": 0.21692017672320285, "learning_rate": 0.00026946096816173607, "loss": 2.974802255630493, "step": 8407, "token_acc": 0.30251938085940566 }, { "epoch": 4.928759894459103, "grad_norm": 0.19313556769733614, "learning_rate": 0.0002694521754967924, "loss": 2.968822956085205, "step": 8408, "token_acc": 0.30276796059872624 }, { "epoch": 4.929346232776312, "grad_norm": 0.22271084516522124, "learning_rate": 0.00026944338170975643, "loss": 3.0156126022338867, "step": 8409, "token_acc": 0.29636260154188443 }, { "epoch": 4.929932571093521, "grad_norm": 0.2190101779363888, "learning_rate": 0.00026943458680071087, "loss": 3.0207104682922363, "step": 8410, "token_acc": 0.2955366811817308 }, { "epoch": 4.93051890941073, "grad_norm": 0.20434589375052417, "learning_rate": 0.0002694257907697383, "loss": 2.9921469688415527, "step": 8411, "token_acc": 0.3005560101992216 }, { "epoch": 4.931105247727939, "grad_norm": 0.2240634989308571, "learning_rate": 0.0002694169936169214, "loss": 2.998490810394287, "step": 8412, "token_acc": 0.2985619709425901 }, { "epoch": 4.931691586045148, "grad_norm": 0.2070128268531123, "learning_rate": 0.0002694081953423428, "loss": 2.9799141883850098, "step": 8413, "token_acc": 0.3021201228266907 }, { "epoch": 4.932277924362357, "grad_norm": 0.22889648050386702, "learning_rate": 0.000269399395946085, "loss": 2.9714977741241455, "step": 8414, "token_acc": 0.3032450742100636 }, { "epoch": 4.9328642626795665, "grad_norm": 0.20199597724945453, "learning_rate": 0.0002693905954282308, "loss": 3.0099167823791504, "step": 8415, "token_acc": 0.2986618532268701 }, { "epoch": 4.933450600996775, "grad_norm": 0.20759498431735665, "learning_rate": 0.0002693817937888629, "loss": 3.017467498779297, "step": 8416, "token_acc": 0.2963571813030786 }, { "epoch": 4.934036939313984, "grad_norm": 0.19768901082411183, "learning_rate": 0.00026937299102806385, "loss": 2.999526023864746, "step": 8417, "token_acc": 0.29828122778471117 }, { "epoch": 4.934623277631193, "grad_norm": 0.2307507312159836, "learning_rate": 0.0002693641871459164, "loss": 2.9789161682128906, "step": 8418, "token_acc": 0.30155039800260414 }, { "epoch": 4.935209615948402, "grad_norm": 0.2175448400383013, "learning_rate": 0.0002693553821425033, "loss": 2.9536073207855225, "step": 8419, "token_acc": 0.3042566873079124 }, { "epoch": 4.935795954265611, "grad_norm": 0.20223555643901286, "learning_rate": 0.00026934657601790715, "loss": 2.9949638843536377, "step": 8420, "token_acc": 0.3004747843958531 }, { "epoch": 4.93638229258282, "grad_norm": 0.1956559284687212, "learning_rate": 0.00026933776877221085, "loss": 2.9929745197296143, "step": 8421, "token_acc": 0.2997855971749275 }, { "epoch": 4.936968630900029, "grad_norm": 0.22867970828366463, "learning_rate": 0.00026932896040549693, "loss": 3.0099945068359375, "step": 8422, "token_acc": 0.2975509226750177 }, { "epoch": 4.9375549692172385, "grad_norm": 0.21787166826352594, "learning_rate": 0.00026932015091784824, "loss": 3.006866216659546, "step": 8423, "token_acc": 0.29695328266348053 }, { "epoch": 4.938141307534448, "grad_norm": 0.1973800338440578, "learning_rate": 0.00026931134030934754, "loss": 3.0022268295288086, "step": 8424, "token_acc": 0.2988993577228871 }, { "epoch": 4.938727645851657, "grad_norm": 0.21314846864445536, "learning_rate": 0.00026930252858007757, "loss": 2.996411085128784, "step": 8425, "token_acc": 0.297467146126185 }, { "epoch": 4.939313984168866, "grad_norm": 0.20750597850772923, "learning_rate": 0.0002692937157301211, "loss": 2.9750428199768066, "step": 8426, "token_acc": 0.3034199264209025 }, { "epoch": 4.939900322486075, "grad_norm": 0.2199610803785113, "learning_rate": 0.00026928490175956096, "loss": 2.990264654159546, "step": 8427, "token_acc": 0.3009576140640633 }, { "epoch": 4.940486660803283, "grad_norm": 0.21524123817502536, "learning_rate": 0.00026927608666847984, "loss": 2.986807346343994, "step": 8428, "token_acc": 0.29978333145571395 }, { "epoch": 4.941072999120492, "grad_norm": 0.20568120707832382, "learning_rate": 0.00026926727045696066, "loss": 2.991528272628784, "step": 8429, "token_acc": 0.29982868712038624 }, { "epoch": 4.941659337437701, "grad_norm": 0.20347567623846854, "learning_rate": 0.00026925845312508615, "loss": 2.975145101547241, "step": 8430, "token_acc": 0.30244403533315084 }, { "epoch": 4.9422456757549105, "grad_norm": 0.23440120849188176, "learning_rate": 0.0002692496346729392, "loss": 2.952730417251587, "step": 8431, "token_acc": 0.3040475518038514 }, { "epoch": 4.94283201407212, "grad_norm": 0.22134977484793977, "learning_rate": 0.0002692408151006026, "loss": 3.020005226135254, "step": 8432, "token_acc": 0.29472090373291093 }, { "epoch": 4.943418352389329, "grad_norm": 0.23924042969055048, "learning_rate": 0.00026923199440815926, "loss": 3.0366058349609375, "step": 8433, "token_acc": 0.2942967488812146 }, { "epoch": 4.944004690706538, "grad_norm": 0.2649927303413495, "learning_rate": 0.000269223172595692, "loss": 2.9961647987365723, "step": 8434, "token_acc": 0.2989492962416224 }, { "epoch": 4.944591029023747, "grad_norm": 0.23854407432808508, "learning_rate": 0.00026921434966328367, "loss": 3.0154669284820557, "step": 8435, "token_acc": 0.29681241871719544 }, { "epoch": 4.945177367340956, "grad_norm": 0.2158491479406104, "learning_rate": 0.0002692055256110171, "loss": 2.978804111480713, "step": 8436, "token_acc": 0.3018929161949497 }, { "epoch": 4.945763705658165, "grad_norm": 0.2025285500797107, "learning_rate": 0.00026919670043897535, "loss": 2.9478654861450195, "step": 8437, "token_acc": 0.3051872959143974 }, { "epoch": 4.946350043975373, "grad_norm": 0.20950336186903448, "learning_rate": 0.0002691878741472412, "loss": 2.9977848529815674, "step": 8438, "token_acc": 0.2970232689444683 }, { "epoch": 4.9469363822925825, "grad_norm": 0.2018905349798462, "learning_rate": 0.0002691790467358976, "loss": 3.015772819519043, "step": 8439, "token_acc": 0.2954351272199683 }, { "epoch": 4.947522720609792, "grad_norm": 0.20918986965550712, "learning_rate": 0.00026917021820502734, "loss": 2.980820417404175, "step": 8440, "token_acc": 0.3016046914635738 }, { "epoch": 4.948109058927001, "grad_norm": 0.221952899334816, "learning_rate": 0.0002691613885547136, "loss": 3.004291534423828, "step": 8441, "token_acc": 0.29754385020983537 }, { "epoch": 4.94869539724421, "grad_norm": 0.19124200755718296, "learning_rate": 0.00026915255778503906, "loss": 2.9752614498138428, "step": 8442, "token_acc": 0.3008053138665609 }, { "epoch": 4.949281735561419, "grad_norm": 0.22770180341123622, "learning_rate": 0.0002691437258960869, "loss": 3.0074715614318848, "step": 8443, "token_acc": 0.29726466952473635 }, { "epoch": 4.949868073878628, "grad_norm": 0.21329574813881888, "learning_rate": 0.00026913489288793994, "loss": 2.9973888397216797, "step": 8444, "token_acc": 0.30099966135082246 }, { "epoch": 4.950454412195837, "grad_norm": 0.20505384439753171, "learning_rate": 0.00026912605876068123, "loss": 2.97145676612854, "step": 8445, "token_acc": 0.30133277645602285 }, { "epoch": 4.951040750513046, "grad_norm": 0.1948742082031514, "learning_rate": 0.0002691172235143937, "loss": 3.0285253524780273, "step": 8446, "token_acc": 0.29586629309346 }, { "epoch": 4.951627088830255, "grad_norm": 0.2112920860753504, "learning_rate": 0.0002691083871491604, "loss": 2.980862617492676, "step": 8447, "token_acc": 0.30051653393727545 }, { "epoch": 4.9522134271474645, "grad_norm": 0.19654947768045805, "learning_rate": 0.00026909954966506424, "loss": 3.0201306343078613, "step": 8448, "token_acc": 0.2949476185561827 }, { "epoch": 4.952799765464674, "grad_norm": 0.19769170493678753, "learning_rate": 0.00026909071106218834, "loss": 3.0439023971557617, "step": 8449, "token_acc": 0.29305726153643286 }, { "epoch": 4.953386103781882, "grad_norm": 0.20819369297640491, "learning_rate": 0.00026908187134061566, "loss": 2.9871439933776855, "step": 8450, "token_acc": 0.3008230726339859 }, { "epoch": 4.953972442099091, "grad_norm": 0.21233919880545474, "learning_rate": 0.00026907303050042927, "loss": 2.997241973876953, "step": 8451, "token_acc": 0.29816929490665217 }, { "epoch": 4.9545587804163, "grad_norm": 0.20627163432831816, "learning_rate": 0.0002690641885417122, "loss": 3.0203399658203125, "step": 8452, "token_acc": 0.29620003631364616 }, { "epoch": 4.955145118733509, "grad_norm": 0.19123315223970358, "learning_rate": 0.00026905534546454757, "loss": 2.993912696838379, "step": 8453, "token_acc": 0.2990920320399353 }, { "epoch": 4.955731457050718, "grad_norm": 0.2081198969617762, "learning_rate": 0.00026904650126901837, "loss": 2.9965202808380127, "step": 8454, "token_acc": 0.30055903549538626 }, { "epoch": 4.956317795367927, "grad_norm": 0.1952698248411993, "learning_rate": 0.0002690376559552077, "loss": 2.9936485290527344, "step": 8455, "token_acc": 0.297996685068452 }, { "epoch": 4.9569041336851365, "grad_norm": 0.20232938908006545, "learning_rate": 0.0002690288095231987, "loss": 3.019227981567383, "step": 8456, "token_acc": 0.2936725815274348 }, { "epoch": 4.957490472002346, "grad_norm": 0.20036035873785094, "learning_rate": 0.0002690199619730744, "loss": 2.952414035797119, "step": 8457, "token_acc": 0.3059009406044965 }, { "epoch": 4.958076810319555, "grad_norm": 0.20267484208594616, "learning_rate": 0.0002690111133049179, "loss": 2.9739437103271484, "step": 8458, "token_acc": 0.3013980092265492 }, { "epoch": 4.958663148636763, "grad_norm": 0.19394045548068875, "learning_rate": 0.00026900226351881236, "loss": 2.982703447341919, "step": 8459, "token_acc": 0.3016168830492769 }, { "epoch": 4.959249486953972, "grad_norm": 0.22145016718680308, "learning_rate": 0.000268993412614841, "loss": 2.9804892539978027, "step": 8460, "token_acc": 0.30077323105664205 }, { "epoch": 4.959835825271181, "grad_norm": 0.22650879298993257, "learning_rate": 0.00026898456059308676, "loss": 2.9736311435699463, "step": 8461, "token_acc": 0.3037633533090151 }, { "epoch": 4.96042216358839, "grad_norm": 0.25046212827401537, "learning_rate": 0.00026897570745363297, "loss": 2.9817047119140625, "step": 8462, "token_acc": 0.30017916573085324 }, { "epoch": 4.961008501905599, "grad_norm": 0.18876787692334743, "learning_rate": 0.00026896685319656276, "loss": 2.9657788276672363, "step": 8463, "token_acc": 0.30202046842597907 }, { "epoch": 4.9615948402228085, "grad_norm": 0.2145776393511897, "learning_rate": 0.00026895799782195923, "loss": 3.033235549926758, "step": 8464, "token_acc": 0.29612808240388117 }, { "epoch": 4.962181178540018, "grad_norm": 0.21159072824452757, "learning_rate": 0.0002689491413299056, "loss": 2.962999105453491, "step": 8465, "token_acc": 0.303319281082986 }, { "epoch": 4.962767516857227, "grad_norm": 0.20996798544046502, "learning_rate": 0.0002689402837204851, "loss": 3.029125928878784, "step": 8466, "token_acc": 0.2930430035471911 }, { "epoch": 4.963353855174436, "grad_norm": 0.19915183699198472, "learning_rate": 0.0002689314249937809, "loss": 2.9940290451049805, "step": 8467, "token_acc": 0.2995471772153206 }, { "epoch": 4.963940193491645, "grad_norm": 0.21439359307416384, "learning_rate": 0.0002689225651498762, "loss": 2.9982776641845703, "step": 8468, "token_acc": 0.2991146779955971 }, { "epoch": 4.964526531808854, "grad_norm": 0.2227121993941797, "learning_rate": 0.0002689137041888543, "loss": 2.987152576446533, "step": 8469, "token_acc": 0.3008716801148285 }, { "epoch": 4.965112870126063, "grad_norm": 0.20264125439393568, "learning_rate": 0.00026890484211079834, "loss": 2.965209484100342, "step": 8470, "token_acc": 0.3029685174555268 }, { "epoch": 4.965699208443271, "grad_norm": 0.21333189836358857, "learning_rate": 0.00026889597891579164, "loss": 2.974865436553955, "step": 8471, "token_acc": 0.30254201050162943 }, { "epoch": 4.9662855467604805, "grad_norm": 0.217476352584251, "learning_rate": 0.0002688871146039174, "loss": 2.9489784240722656, "step": 8472, "token_acc": 0.3052412652063079 }, { "epoch": 4.96687188507769, "grad_norm": 0.21975665916074055, "learning_rate": 0.00026887824917525894, "loss": 3.0387122631073, "step": 8473, "token_acc": 0.2941518743175061 }, { "epoch": 4.967458223394899, "grad_norm": 0.21780992866489557, "learning_rate": 0.0002688693826298996, "loss": 2.9538016319274902, "step": 8474, "token_acc": 0.3058410299618433 }, { "epoch": 4.968044561712108, "grad_norm": 0.22998742603428857, "learning_rate": 0.00026886051496792247, "loss": 2.9767539501190186, "step": 8475, "token_acc": 0.30383988454909777 }, { "epoch": 4.968630900029317, "grad_norm": 0.20713760888149024, "learning_rate": 0.000268851646189411, "loss": 2.9757001399993896, "step": 8476, "token_acc": 0.303928524574363 }, { "epoch": 4.969217238346526, "grad_norm": 0.21319637710003644, "learning_rate": 0.0002688427762944485, "loss": 2.9882278442382812, "step": 8477, "token_acc": 0.3001053876208102 }, { "epoch": 4.969803576663735, "grad_norm": 0.20508565859759853, "learning_rate": 0.0002688339052831182, "loss": 3.0037784576416016, "step": 8478, "token_acc": 0.2977761084244957 }, { "epoch": 4.970389914980944, "grad_norm": 0.23151396736611776, "learning_rate": 0.0002688250331555036, "loss": 3.0115020275115967, "step": 8479, "token_acc": 0.29728166570271836 }, { "epoch": 4.970976253298153, "grad_norm": 0.20139569422768752, "learning_rate": 0.00026881615991168785, "loss": 2.991868734359741, "step": 8480, "token_acc": 0.29873567433284615 }, { "epoch": 4.971562591615362, "grad_norm": 0.19226075973248716, "learning_rate": 0.0002688072855517544, "loss": 2.9649338722229004, "step": 8481, "token_acc": 0.30468180966849107 }, { "epoch": 4.972148929932571, "grad_norm": 0.22781695536934862, "learning_rate": 0.0002687984100757866, "loss": 2.9693360328674316, "step": 8482, "token_acc": 0.30332206630475606 }, { "epoch": 4.97273526824978, "grad_norm": 0.2094257698110659, "learning_rate": 0.00026878953348386783, "loss": 3.0008745193481445, "step": 8483, "token_acc": 0.297136942162911 }, { "epoch": 4.973321606566989, "grad_norm": 0.1906570165359877, "learning_rate": 0.0002687806557760815, "loss": 2.983187675476074, "step": 8484, "token_acc": 0.3008018673766588 }, { "epoch": 4.973907944884198, "grad_norm": 0.2528615748365615, "learning_rate": 0.00026877177695251085, "loss": 3.0421366691589355, "step": 8485, "token_acc": 0.2918308459225226 }, { "epoch": 4.974494283201407, "grad_norm": 0.21558673000252443, "learning_rate": 0.00026876289701323943, "loss": 3.0209274291992188, "step": 8486, "token_acc": 0.29600109126798385 }, { "epoch": 4.975080621518616, "grad_norm": 0.2195569043358475, "learning_rate": 0.0002687540159583507, "loss": 2.9999489784240723, "step": 8487, "token_acc": 0.2997634631328288 }, { "epoch": 4.975666959835825, "grad_norm": 0.22676693059740008, "learning_rate": 0.000268745133787928, "loss": 2.9698002338409424, "step": 8488, "token_acc": 0.3044100580270793 }, { "epoch": 4.9762532981530345, "grad_norm": 0.2076299132726171, "learning_rate": 0.00026873625050205475, "loss": 2.958136558532715, "step": 8489, "token_acc": 0.30499698161910116 }, { "epoch": 4.976839636470244, "grad_norm": 0.2222294543663013, "learning_rate": 0.00026872736610081445, "loss": 2.975961685180664, "step": 8490, "token_acc": 0.30270208671644533 }, { "epoch": 4.977425974787453, "grad_norm": 0.20067974200373415, "learning_rate": 0.0002687184805842905, "loss": 3.0124568939208984, "step": 8491, "token_acc": 0.29955250450489684 }, { "epoch": 4.978012313104662, "grad_norm": 0.2211391695843876, "learning_rate": 0.0002687095939525664, "loss": 2.9497406482696533, "step": 8492, "token_acc": 0.3059322945754705 }, { "epoch": 4.97859865142187, "grad_norm": 0.242848362485957, "learning_rate": 0.00026870070620572565, "loss": 2.9625041484832764, "step": 8493, "token_acc": 0.30511928402234145 }, { "epoch": 4.979184989739079, "grad_norm": 0.20145584357890522, "learning_rate": 0.00026869181734385177, "loss": 2.9758644104003906, "step": 8494, "token_acc": 0.3011712769215066 }, { "epoch": 4.979771328056288, "grad_norm": 0.21269066245735013, "learning_rate": 0.0002686829273670281, "loss": 2.9367735385894775, "step": 8495, "token_acc": 0.3068337432866343 }, { "epoch": 4.980357666373497, "grad_norm": 0.2067552948377258, "learning_rate": 0.0002686740362753383, "loss": 3.0068559646606445, "step": 8496, "token_acc": 0.29747539283251084 }, { "epoch": 4.9809440046907065, "grad_norm": 0.20468657401543952, "learning_rate": 0.00026866514406886585, "loss": 2.9954683780670166, "step": 8497, "token_acc": 0.29878859373104355 }, { "epoch": 4.981530343007916, "grad_norm": 0.21790574319168096, "learning_rate": 0.0002686562507476943, "loss": 3.014962911605835, "step": 8498, "token_acc": 0.2963923442978795 }, { "epoch": 4.982116681325125, "grad_norm": 0.19771411631356903, "learning_rate": 0.0002686473563119071, "loss": 2.980262517929077, "step": 8499, "token_acc": 0.301705067200935 }, { "epoch": 4.982703019642334, "grad_norm": 0.23090012021942957, "learning_rate": 0.0002686384607615879, "loss": 3.062157392501831, "step": 8500, "token_acc": 0.2896665596952349 }, { "epoch": 4.983289357959543, "grad_norm": 0.22546734852786815, "learning_rate": 0.0002686295640968203, "loss": 2.979034900665283, "step": 8501, "token_acc": 0.30035945438005196 }, { "epoch": 4.983875696276751, "grad_norm": 0.2111143415019514, "learning_rate": 0.0002686206663176877, "loss": 2.9808335304260254, "step": 8502, "token_acc": 0.30095291568060534 }, { "epoch": 4.98446203459396, "grad_norm": 0.1935376485788839, "learning_rate": 0.00026861176742427385, "loss": 3.055887222290039, "step": 8503, "token_acc": 0.2902880538698146 }, { "epoch": 4.985048372911169, "grad_norm": 0.21508832953377718, "learning_rate": 0.0002686028674166622, "loss": 3.050093650817871, "step": 8504, "token_acc": 0.29282662433561574 }, { "epoch": 4.9856347112283785, "grad_norm": 0.20413416276731422, "learning_rate": 0.0002685939662949365, "loss": 2.9800338745117188, "step": 8505, "token_acc": 0.30172705111503784 }, { "epoch": 4.986221049545588, "grad_norm": 0.19560517290676374, "learning_rate": 0.0002685850640591803, "loss": 2.972660779953003, "step": 8506, "token_acc": 0.3033042965013331 }, { "epoch": 4.986807387862797, "grad_norm": 0.227106455877966, "learning_rate": 0.00026857616070947716, "loss": 3.0394527912139893, "step": 8507, "token_acc": 0.2938980236364878 }, { "epoch": 4.987393726180006, "grad_norm": 0.21508854612138037, "learning_rate": 0.00026856725624591077, "loss": 2.954577684402466, "step": 8508, "token_acc": 0.3040899513923709 }, { "epoch": 4.987980064497215, "grad_norm": 0.23193811498604655, "learning_rate": 0.00026855835066856484, "loss": 3.043628692626953, "step": 8509, "token_acc": 0.29314740759883523 }, { "epoch": 4.988566402814424, "grad_norm": 0.24117976327701496, "learning_rate": 0.0002685494439775229, "loss": 2.993330478668213, "step": 8510, "token_acc": 0.30112268705065437 }, { "epoch": 4.989152741131633, "grad_norm": 0.24443736466924584, "learning_rate": 0.00026854053617286875, "loss": 2.9977176189422607, "step": 8511, "token_acc": 0.3018649044760815 }, { "epoch": 4.989739079448842, "grad_norm": 0.21939386391594667, "learning_rate": 0.000268531627254686, "loss": 2.9714248180389404, "step": 8512, "token_acc": 0.30218344195399205 }, { "epoch": 4.990325417766051, "grad_norm": 0.24162567612846325, "learning_rate": 0.0002685227172230583, "loss": 2.9805681705474854, "step": 8513, "token_acc": 0.2997437936213446 }, { "epoch": 4.99091175608326, "grad_norm": 0.22742279283167072, "learning_rate": 0.0002685138060780693, "loss": 2.9601595401763916, "step": 8514, "token_acc": 0.3030044317502697 }, { "epoch": 4.991498094400469, "grad_norm": 0.22258674173850046, "learning_rate": 0.0002685048938198029, "loss": 3.0183377265930176, "step": 8515, "token_acc": 0.29600136063401483 }, { "epoch": 4.992084432717678, "grad_norm": 0.2279149068103971, "learning_rate": 0.0002684959804483427, "loss": 2.96256685256958, "step": 8516, "token_acc": 0.30310142541574625 }, { "epoch": 4.992670771034887, "grad_norm": 0.22246681186620215, "learning_rate": 0.0002684870659637724, "loss": 3.0014727115631104, "step": 8517, "token_acc": 0.2997433531737932 }, { "epoch": 4.993257109352096, "grad_norm": 0.22669150093585383, "learning_rate": 0.00026847815036617584, "loss": 3.0169224739074707, "step": 8518, "token_acc": 0.29893758539327864 }, { "epoch": 4.993843447669305, "grad_norm": 0.23032630239969018, "learning_rate": 0.00026846923365563664, "loss": 3.028780221939087, "step": 8519, "token_acc": 0.2946393113659854 }, { "epoch": 4.994429785986514, "grad_norm": 0.21039961834699555, "learning_rate": 0.00026846031583223866, "loss": 3.0046772956848145, "step": 8520, "token_acc": 0.29804268129585826 }, { "epoch": 4.995016124303723, "grad_norm": 0.2172861001487408, "learning_rate": 0.00026845139689606566, "loss": 3.0222673416137695, "step": 8521, "token_acc": 0.29566145732844556 }, { "epoch": 4.9956024626209325, "grad_norm": 0.2344632773659439, "learning_rate": 0.00026844247684720135, "loss": 2.9567339420318604, "step": 8522, "token_acc": 0.30555903571689585 }, { "epoch": 4.996188800938142, "grad_norm": 0.19150579539406898, "learning_rate": 0.00026843355568572963, "loss": 2.9834752082824707, "step": 8523, "token_acc": 0.2995720147588075 }, { "epoch": 4.99677513925535, "grad_norm": 0.23290790577776418, "learning_rate": 0.0002684246334117342, "loss": 3.0050179958343506, "step": 8524, "token_acc": 0.2980218322175985 }, { "epoch": 4.997361477572559, "grad_norm": 0.22853961310330662, "learning_rate": 0.0002684157100252989, "loss": 3.019460439682007, "step": 8525, "token_acc": 0.295701180530556 }, { "epoch": 4.997947815889768, "grad_norm": 0.21390865040351112, "learning_rate": 0.0002684067855265076, "loss": 3.009934902191162, "step": 8526, "token_acc": 0.2981720213515963 }, { "epoch": 4.998534154206977, "grad_norm": 0.2266954669656876, "learning_rate": 0.00026839785991544416, "loss": 3.0203042030334473, "step": 8527, "token_acc": 0.2959147888279368 }, { "epoch": 4.999120492524186, "grad_norm": 0.22410167785565435, "learning_rate": 0.0002683889331921923, "loss": 3.0135276317596436, "step": 8528, "token_acc": 0.29794654180079994 }, { "epoch": 4.999706830841395, "grad_norm": 0.23718210248128158, "learning_rate": 0.000268380005356836, "loss": 3.0254733562469482, "step": 8529, "token_acc": 0.2943432656682148 }, { "epoch": 5.0, "grad_norm": 0.2427673885137112, "learning_rate": 0.00026837107640945905, "loss": 3.0045809745788574, "step": 8530, "token_acc": 0.2979218530143955 }, { "epoch": 5.0, "eval_loss": 3.078289031982422, "eval_runtime": 8.7208, "eval_samples_per_second": 29.355, "eval_steps_per_second": 3.669, "eval_token_acc": 0.28924068596139113, "step": 8530 } ], "logging_steps": 1, "max_steps": 34120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": -34120, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3859518101340160.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }