{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8676036786395974, "eval_steps": 1000.0, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017352073572791948, "grad_norm": 8.0, "learning_rate": 6.920415224913495e-08, "loss": 1.9091681241989136, "step": 1, "token_acc": 0.5288686692981869 }, { "epoch": 0.0008676036786395974, "grad_norm": 9.1875, "learning_rate": 3.460207612456748e-07, "loss": 1.9248077869415283, "step": 5, "token_acc": 0.5311277064784593 }, { "epoch": 0.0017352073572791948, "grad_norm": 8.0625, "learning_rate": 6.920415224913496e-07, "loss": 1.9248884201049805, "step": 10, "token_acc": 0.5290013381031705 }, { "epoch": 0.0026028110359187923, "grad_norm": 8.1875, "learning_rate": 1.0380622837370243e-06, "loss": 1.9355482101440429, "step": 15, "token_acc": 0.5267119264551204 }, { "epoch": 0.0034704147145583897, "grad_norm": 7.6875, "learning_rate": 1.3840830449826992e-06, "loss": 1.9109724044799805, "step": 20, "token_acc": 0.5344387591531053 }, { "epoch": 0.004338018393197987, "grad_norm": 7.5625, "learning_rate": 1.7301038062283736e-06, "loss": 1.9011001586914062, "step": 25, "token_acc": 0.5359598310957124 }, { "epoch": 0.0052056220718375845, "grad_norm": 7.09375, "learning_rate": 2.0761245674740485e-06, "loss": 1.8969675064086915, "step": 30, "token_acc": 0.535208283678928 }, { "epoch": 0.006073225750477182, "grad_norm": 6.375, "learning_rate": 2.4221453287197232e-06, "loss": 1.8716100692749023, "step": 35, "token_acc": 0.5402535722571752 }, { "epoch": 0.006940829429116779, "grad_norm": 5.71875, "learning_rate": 2.7681660899653983e-06, "loss": 1.845738410949707, "step": 40, "token_acc": 0.5421880925293895 }, { "epoch": 0.007808433107756377, "grad_norm": 5.34375, "learning_rate": 3.114186851211073e-06, "loss": 1.8337472915649413, "step": 45, "token_acc": 0.5421731912386869 }, { "epoch": 0.008676036786395974, "grad_norm": 4.875, "learning_rate": 3.4602076124567473e-06, "loss": 1.782250213623047, "step": 50, "token_acc": 0.5486540746507149 }, { "epoch": 0.009543640465035572, "grad_norm": 4.25, "learning_rate": 3.8062283737024224e-06, "loss": 1.7817264556884767, "step": 55, "token_acc": 0.553443922569435 }, { "epoch": 0.010411244143675169, "grad_norm": 4.0, "learning_rate": 4.152249134948097e-06, "loss": 1.7241941452026368, "step": 60, "token_acc": 0.562488997223915 }, { "epoch": 0.011278847822314766, "grad_norm": 3.46875, "learning_rate": 4.498269896193772e-06, "loss": 1.664463996887207, "step": 65, "token_acc": 0.5773684070111784 }, { "epoch": 0.012146451500954364, "grad_norm": 3.125, "learning_rate": 4.8442906574394464e-06, "loss": 1.6484092712402343, "step": 70, "token_acc": 0.5734682375674297 }, { "epoch": 0.013014055179593961, "grad_norm": 2.8125, "learning_rate": 5.190311418685121e-06, "loss": 1.6332122802734375, "step": 75, "token_acc": 0.5776690475867771 }, { "epoch": 0.013881658858233559, "grad_norm": 2.59375, "learning_rate": 5.536332179930797e-06, "loss": 1.6145336151123046, "step": 80, "token_acc": 0.5818942893134579 }, { "epoch": 0.014749262536873156, "grad_norm": 2.515625, "learning_rate": 5.882352941176471e-06, "loss": 1.5732461929321289, "step": 85, "token_acc": 0.5861122807970769 }, { "epoch": 0.015616866215512754, "grad_norm": 2.28125, "learning_rate": 6.228373702422146e-06, "loss": 1.5478083610534668, "step": 90, "token_acc": 0.5937751867175326 }, { "epoch": 0.01648446989415235, "grad_norm": 2.34375, "learning_rate": 6.57439446366782e-06, "loss": 1.5324504852294922, "step": 95, "token_acc": 0.5946228348671784 }, { "epoch": 0.01735207357279195, "grad_norm": 2.3125, "learning_rate": 6.9204152249134946e-06, "loss": 1.5145973205566405, "step": 100, "token_acc": 0.5947611081643478 }, { "epoch": 0.018219677251431546, "grad_norm": 2.25, "learning_rate": 7.2664359861591705e-06, "loss": 1.4743114471435548, "step": 105, "token_acc": 0.6041969950833631 }, { "epoch": 0.019087280930071143, "grad_norm": 2.34375, "learning_rate": 7.612456747404845e-06, "loss": 1.4888650894165039, "step": 110, "token_acc": 0.6016704100708781 }, { "epoch": 0.01995488460871074, "grad_norm": 2.3125, "learning_rate": 7.958477508650519e-06, "loss": 1.450081729888916, "step": 115, "token_acc": 0.6071094352086315 }, { "epoch": 0.020822488287350338, "grad_norm": 2.4375, "learning_rate": 8.304498269896194e-06, "loss": 1.453689956665039, "step": 120, "token_acc": 0.6067426531172755 }, { "epoch": 0.021690091965989935, "grad_norm": 2.25, "learning_rate": 8.65051903114187e-06, "loss": 1.4380900382995605, "step": 125, "token_acc": 0.6129145486928147 }, { "epoch": 0.022557695644629533, "grad_norm": 2.296875, "learning_rate": 8.996539792387544e-06, "loss": 1.4033380508422852, "step": 130, "token_acc": 0.6155833692194423 }, { "epoch": 0.02342529932326913, "grad_norm": 2.125, "learning_rate": 9.34256055363322e-06, "loss": 1.3930879592895509, "step": 135, "token_acc": 0.6180772126035058 }, { "epoch": 0.024292903001908728, "grad_norm": 1.96875, "learning_rate": 9.688581314878893e-06, "loss": 1.3432926177978515, "step": 140, "token_acc": 0.6306223488778284 }, { "epoch": 0.025160506680548325, "grad_norm": 2.234375, "learning_rate": 1.0034602076124568e-05, "loss": 1.39078369140625, "step": 145, "token_acc": 0.6173330823630644 }, { "epoch": 0.026028110359187923, "grad_norm": 2.28125, "learning_rate": 1.0380622837370241e-05, "loss": 1.3656013488769532, "step": 150, "token_acc": 0.6226175175695261 }, { "epoch": 0.02689571403782752, "grad_norm": 2.296875, "learning_rate": 1.0726643598615918e-05, "loss": 1.3753274917602538, "step": 155, "token_acc": 0.6213831896726154 }, { "epoch": 0.027763317716467117, "grad_norm": 2.046875, "learning_rate": 1.1072664359861593e-05, "loss": 1.3534158706665038, "step": 160, "token_acc": 0.6270216023570886 }, { "epoch": 0.028630921395106715, "grad_norm": 2.203125, "learning_rate": 1.1418685121107267e-05, "loss": 1.3482056617736817, "step": 165, "token_acc": 0.6246836055823732 }, { "epoch": 0.029498525073746312, "grad_norm": 2.265625, "learning_rate": 1.1764705882352942e-05, "loss": 1.3297002792358399, "step": 170, "token_acc": 0.6299395312649295 }, { "epoch": 0.03036612875238591, "grad_norm": 2.171875, "learning_rate": 1.2110726643598615e-05, "loss": 1.3250617980957031, "step": 175, "token_acc": 0.6287991301119186 }, { "epoch": 0.031233732431025507, "grad_norm": 2.140625, "learning_rate": 1.2456747404844292e-05, "loss": 1.32503080368042, "step": 180, "token_acc": 0.6283811790503951 }, { "epoch": 0.032101336109665105, "grad_norm": 1.984375, "learning_rate": 1.2802768166089967e-05, "loss": 1.3253366470336914, "step": 185, "token_acc": 0.6305334557323541 }, { "epoch": 0.0329689397883047, "grad_norm": 2.171875, "learning_rate": 1.314878892733564e-05, "loss": 1.3164624214172362, "step": 190, "token_acc": 0.6327720036837259 }, { "epoch": 0.0338365434669443, "grad_norm": 2.203125, "learning_rate": 1.3494809688581316e-05, "loss": 1.2994074821472168, "step": 195, "token_acc": 0.6317387471841726 }, { "epoch": 0.0347041471455839, "grad_norm": 2.203125, "learning_rate": 1.3840830449826989e-05, "loss": 1.3055123329162597, "step": 200, "token_acc": 0.6311878838826895 }, { "epoch": 0.035571750824223494, "grad_norm": 2.25, "learning_rate": 1.4186851211072666e-05, "loss": 1.2885337829589845, "step": 205, "token_acc": 0.6344415604742465 }, { "epoch": 0.03643935450286309, "grad_norm": 2.40625, "learning_rate": 1.4532871972318341e-05, "loss": 1.2472162246704102, "step": 210, "token_acc": 0.6459379308550766 }, { "epoch": 0.03730695818150269, "grad_norm": 2.21875, "learning_rate": 1.4878892733564014e-05, "loss": 1.2773432731628418, "step": 215, "token_acc": 0.6338010832102413 }, { "epoch": 0.038174561860142286, "grad_norm": 2.15625, "learning_rate": 1.522491349480969e-05, "loss": 1.272820281982422, "step": 220, "token_acc": 0.6386346675046153 }, { "epoch": 0.039042165538781884, "grad_norm": 2.171875, "learning_rate": 1.5570934256055366e-05, "loss": 1.2626455307006836, "step": 225, "token_acc": 0.6376424039839007 }, { "epoch": 0.03990976921742148, "grad_norm": 2.140625, "learning_rate": 1.5916955017301038e-05, "loss": 1.2593675613403321, "step": 230, "token_acc": 0.639645674135032 }, { "epoch": 0.04077737289606108, "grad_norm": 2.140625, "learning_rate": 1.6262975778546713e-05, "loss": 1.255127239227295, "step": 235, "token_acc": 0.6431021760799492 }, { "epoch": 0.041644976574700676, "grad_norm": 2.125, "learning_rate": 1.6608996539792388e-05, "loss": 1.2209264755249023, "step": 240, "token_acc": 0.6458485917837405 }, { "epoch": 0.042512580253340274, "grad_norm": 2.078125, "learning_rate": 1.6955017301038063e-05, "loss": 1.2057106018066406, "step": 245, "token_acc": 0.653398644744445 }, { "epoch": 0.04338018393197987, "grad_norm": 2.09375, "learning_rate": 1.730103806228374e-05, "loss": 1.254837989807129, "step": 250, "token_acc": 0.6412084543831126 }, { "epoch": 0.04424778761061947, "grad_norm": 2.09375, "learning_rate": 1.7647058823529414e-05, "loss": 1.2420223236083985, "step": 255, "token_acc": 0.6410300956861068 }, { "epoch": 0.045115391289259066, "grad_norm": 2.140625, "learning_rate": 1.799307958477509e-05, "loss": 1.2112503051757812, "step": 260, "token_acc": 0.6482978496344912 }, { "epoch": 0.04598299496789866, "grad_norm": 2.1875, "learning_rate": 1.833910034602076e-05, "loss": 1.2253754615783692, "step": 265, "token_acc": 0.6429915081156616 }, { "epoch": 0.04685059864653826, "grad_norm": 2.03125, "learning_rate": 1.868512110726644e-05, "loss": 1.2027314186096192, "step": 270, "token_acc": 0.6511740530352609 }, { "epoch": 0.04771820232517786, "grad_norm": 2.203125, "learning_rate": 1.9031141868512114e-05, "loss": 1.219920539855957, "step": 275, "token_acc": 0.6463804044677383 }, { "epoch": 0.048585806003817456, "grad_norm": 2.125, "learning_rate": 1.9377162629757786e-05, "loss": 1.1916674613952636, "step": 280, "token_acc": 0.6508475041894454 }, { "epoch": 0.04945340968245705, "grad_norm": 2.046875, "learning_rate": 1.972318339100346e-05, "loss": 1.2134785652160645, "step": 285, "token_acc": 0.6464441609025586 }, { "epoch": 0.05032101336109665, "grad_norm": 2.171875, "learning_rate": 1.9999998353126843e-05, "loss": 1.1799225807189941, "step": 290, "token_acc": 0.6547402065668172 }, { "epoch": 0.05118861703973625, "grad_norm": 2.140625, "learning_rate": 1.99999407126232e-05, "loss": 1.2018964767456055, "step": 295, "token_acc": 0.6471151511881963 }, { "epoch": 0.052056220718375845, "grad_norm": 2.203125, "learning_rate": 1.9999800729003996e-05, "loss": 1.1920422554016112, "step": 300, "token_acc": 0.6487302769689172 }, { "epoch": 0.05292382439701544, "grad_norm": 2.140625, "learning_rate": 1.9999578403421912e-05, "loss": 1.1693296432495117, "step": 305, "token_acc": 0.6556802430962789 }, { "epoch": 0.05379142807565504, "grad_norm": 2.15625, "learning_rate": 1.9999273737707648e-05, "loss": 1.1848974227905273, "step": 310, "token_acc": 0.6521950494915804 }, { "epoch": 0.05465903175429464, "grad_norm": 2.21875, "learning_rate": 1.9998886734369936e-05, "loss": 1.1778865814208985, "step": 315, "token_acc": 0.6545266496586913 }, { "epoch": 0.055526635432934235, "grad_norm": 2.0625, "learning_rate": 1.9998417396595508e-05, "loss": 1.1698062896728516, "step": 320, "token_acc": 0.6535174771198589 }, { "epoch": 0.05639423911157383, "grad_norm": 2.140625, "learning_rate": 1.9997865728249043e-05, "loss": 1.158426284790039, "step": 325, "token_acc": 0.6557887720539224 }, { "epoch": 0.05726184279021343, "grad_norm": 2.109375, "learning_rate": 1.999723173387319e-05, "loss": 1.1911964416503906, "step": 330, "token_acc": 0.6506533185984529 }, { "epoch": 0.05812944646885303, "grad_norm": 2.140625, "learning_rate": 1.9996515418688493e-05, "loss": 1.1536369323730469, "step": 335, "token_acc": 0.6576180488329568 }, { "epoch": 0.058997050147492625, "grad_norm": 2.078125, "learning_rate": 1.999571678859333e-05, "loss": 1.1710229873657227, "step": 340, "token_acc": 0.6535538729067681 }, { "epoch": 0.05986465382613222, "grad_norm": 2.15625, "learning_rate": 1.9994835850163926e-05, "loss": 1.1672002792358398, "step": 345, "token_acc": 0.6529600985558826 }, { "epoch": 0.06073225750477182, "grad_norm": 2.03125, "learning_rate": 1.9993872610654236e-05, "loss": 1.1647834777832031, "step": 350, "token_acc": 0.655277021628137 }, { "epoch": 0.06159986118341142, "grad_norm": 2.171875, "learning_rate": 1.9992827077995925e-05, "loss": 1.1796775817871095, "step": 355, "token_acc": 0.653560930884772 }, { "epoch": 0.062467464862051014, "grad_norm": 2.0625, "learning_rate": 1.9991699260798284e-05, "loss": 1.1580224990844727, "step": 360, "token_acc": 0.6587823913419223 }, { "epoch": 0.06333506854069061, "grad_norm": 2.1875, "learning_rate": 1.999048916834817e-05, "loss": 1.1684626579284667, "step": 365, "token_acc": 0.6539958690816651 }, { "epoch": 0.06420267221933021, "grad_norm": 2.25, "learning_rate": 1.9989196810609918e-05, "loss": 1.1673255920410157, "step": 370, "token_acc": 0.6526922661481962 }, { "epoch": 0.0650702758979698, "grad_norm": 2.09375, "learning_rate": 1.9987822198225265e-05, "loss": 1.1529643058776855, "step": 375, "token_acc": 0.6575958831216069 }, { "epoch": 0.0659378795766094, "grad_norm": 1.9921875, "learning_rate": 1.9986365342513266e-05, "loss": 1.1544547080993652, "step": 380, "token_acc": 0.6566502399001392 }, { "epoch": 0.066805483255249, "grad_norm": 2.125, "learning_rate": 1.99848262554702e-05, "loss": 1.169089126586914, "step": 385, "token_acc": 0.65285508142651 }, { "epoch": 0.0676730869338886, "grad_norm": 2.09375, "learning_rate": 1.9983204949769454e-05, "loss": 1.1696990966796874, "step": 390, "token_acc": 0.6517770916361757 }, { "epoch": 0.0685406906125282, "grad_norm": 1.9609375, "learning_rate": 1.998150143876146e-05, "loss": 1.1284924507141114, "step": 395, "token_acc": 0.6655096202085395 }, { "epoch": 0.0694082942911678, "grad_norm": 1.9765625, "learning_rate": 1.9979715736473527e-05, "loss": 1.1364903450012207, "step": 400, "token_acc": 0.6588324591008281 }, { "epoch": 0.07027589796980739, "grad_norm": 2.109375, "learning_rate": 1.9977847857609775e-05, "loss": 1.1590328216552734, "step": 405, "token_acc": 0.6557208069503755 }, { "epoch": 0.07114350164844699, "grad_norm": 2.09375, "learning_rate": 1.9975897817550995e-05, "loss": 1.1176044464111328, "step": 410, "token_acc": 0.665058862001308 }, { "epoch": 0.07201110532708659, "grad_norm": 2.140625, "learning_rate": 1.9973865632354516e-05, "loss": 1.1324227333068848, "step": 415, "token_acc": 0.6612732538710037 }, { "epoch": 0.07287870900572618, "grad_norm": 1.9921875, "learning_rate": 1.9971751318754087e-05, "loss": 1.126877784729004, "step": 420, "token_acc": 0.6609341202232488 }, { "epoch": 0.07374631268436578, "grad_norm": 2.171875, "learning_rate": 1.9969554894159723e-05, "loss": 1.14964017868042, "step": 425, "token_acc": 0.6562090443570724 }, { "epoch": 0.07461391636300538, "grad_norm": 2.03125, "learning_rate": 1.996727637665758e-05, "loss": 1.1191633224487305, "step": 430, "token_acc": 0.6615450810570731 }, { "epoch": 0.07548152004164498, "grad_norm": 2.015625, "learning_rate": 1.9964915785009793e-05, "loss": 1.1374661445617675, "step": 435, "token_acc": 0.660144832126399 }, { "epoch": 0.07634912372028457, "grad_norm": 2.078125, "learning_rate": 1.996247313865432e-05, "loss": 1.1567827224731446, "step": 440, "token_acc": 0.6550488426417499 }, { "epoch": 0.07721672739892417, "grad_norm": 2.078125, "learning_rate": 1.9959948457704793e-05, "loss": 1.1355746269226075, "step": 445, "token_acc": 0.6552852877530336 }, { "epoch": 0.07808433107756377, "grad_norm": 2.078125, "learning_rate": 1.9957341762950346e-05, "loss": 1.1385893821716309, "step": 450, "token_acc": 0.6600633591211301 }, { "epoch": 0.07895193475620337, "grad_norm": 2.03125, "learning_rate": 1.9954653075855445e-05, "loss": 1.1308669090270995, "step": 455, "token_acc": 0.6592446678440429 }, { "epoch": 0.07981953843484296, "grad_norm": 1.9140625, "learning_rate": 1.9951882418559703e-05, "loss": 1.1351963043212892, "step": 460, "token_acc": 0.6637463123076334 }, { "epoch": 0.08068714211348256, "grad_norm": 2.015625, "learning_rate": 1.994902981387771e-05, "loss": 1.1261632919311524, "step": 465, "token_acc": 0.6615251252132753 }, { "epoch": 0.08155474579212216, "grad_norm": 2.03125, "learning_rate": 1.994609528529885e-05, "loss": 1.1392766952514648, "step": 470, "token_acc": 0.6591416589510375 }, { "epoch": 0.08242234947076175, "grad_norm": 1.9609375, "learning_rate": 1.994307885698708e-05, "loss": 1.1046557426452637, "step": 475, "token_acc": 0.6653103722274739 }, { "epoch": 0.08328995314940135, "grad_norm": 2.078125, "learning_rate": 1.9939980553780763e-05, "loss": 1.1288423538208008, "step": 480, "token_acc": 0.6587156961405608 }, { "epoch": 0.08415755682804095, "grad_norm": 2.09375, "learning_rate": 1.993680040119244e-05, "loss": 1.1338699340820313, "step": 485, "token_acc": 0.6593820953616607 }, { "epoch": 0.08502516050668055, "grad_norm": 1.9453125, "learning_rate": 1.9933538425408636e-05, "loss": 1.1204511642456054, "step": 490, "token_acc": 0.6627913185082357 }, { "epoch": 0.08589276418532014, "grad_norm": 1.921875, "learning_rate": 1.9930194653289635e-05, "loss": 1.1193718910217285, "step": 495, "token_acc": 0.6627504181868472 }, { "epoch": 0.08676036786395974, "grad_norm": 1.9921875, "learning_rate": 1.9926769112369263e-05, "loss": 1.1357709884643554, "step": 500, "token_acc": 0.6581859131322919 }, { "epoch": 0.08762797154259934, "grad_norm": 2.078125, "learning_rate": 1.9923261830854655e-05, "loss": 1.1109633445739746, "step": 505, "token_acc": 0.6641862471522357 }, { "epoch": 0.08849557522123894, "grad_norm": 1.953125, "learning_rate": 1.991967283762603e-05, "loss": 1.0808055877685547, "step": 510, "token_acc": 0.6749422782577114 }, { "epoch": 0.08936317889987853, "grad_norm": 2.03125, "learning_rate": 1.9916002162236458e-05, "loss": 1.119293212890625, "step": 515, "token_acc": 0.6636471832848453 }, { "epoch": 0.09023078257851813, "grad_norm": 2.109375, "learning_rate": 1.99122498349116e-05, "loss": 1.1222724914550781, "step": 520, "token_acc": 0.6633997145381635 }, { "epoch": 0.09109838625715773, "grad_norm": 1.9609375, "learning_rate": 1.990841588654947e-05, "loss": 1.133096694946289, "step": 525, "token_acc": 0.660575962862001 }, { "epoch": 0.09196598993579733, "grad_norm": 2.03125, "learning_rate": 1.990450034872018e-05, "loss": 1.1255317687988282, "step": 530, "token_acc": 0.659338407094156 }, { "epoch": 0.09283359361443692, "grad_norm": 2.0, "learning_rate": 1.990050325366568e-05, "loss": 1.1009018898010254, "step": 535, "token_acc": 0.6676672499663481 }, { "epoch": 0.09370119729307652, "grad_norm": 2.0, "learning_rate": 1.9896424634299495e-05, "loss": 1.1132999420166017, "step": 540, "token_acc": 0.662574878385441 }, { "epoch": 0.09456880097171612, "grad_norm": 2.078125, "learning_rate": 1.9892264524206442e-05, "loss": 1.0917093276977539, "step": 545, "token_acc": 0.670899655371247 }, { "epoch": 0.09543640465035572, "grad_norm": 1.953125, "learning_rate": 1.9888022957642365e-05, "loss": 1.0798656463623046, "step": 550, "token_acc": 0.6730398457583547 }, { "epoch": 0.09630400832899531, "grad_norm": 1.9609375, "learning_rate": 1.988369996953386e-05, "loss": 1.1211360931396483, "step": 555, "token_acc": 0.660755798237552 }, { "epoch": 0.09717161200763491, "grad_norm": 1.9375, "learning_rate": 1.987929559547796e-05, "loss": 1.0922590255737306, "step": 560, "token_acc": 0.6691658981863865 }, { "epoch": 0.09803921568627451, "grad_norm": 2.015625, "learning_rate": 1.9874809871741877e-05, "loss": 1.109041404724121, "step": 565, "token_acc": 0.6648318872017354 }, { "epoch": 0.0989068193649141, "grad_norm": 2.078125, "learning_rate": 1.9870242835262665e-05, "loss": 1.1087127685546876, "step": 570, "token_acc": 0.6643103084814841 }, { "epoch": 0.0997744230435537, "grad_norm": 2.015625, "learning_rate": 1.986559452364696e-05, "loss": 1.1010761260986328, "step": 575, "token_acc": 0.6648252984798432 }, { "epoch": 0.1006420267221933, "grad_norm": 1.96875, "learning_rate": 1.986086497517063e-05, "loss": 1.107012939453125, "step": 580, "token_acc": 0.6654558712325808 }, { "epoch": 0.1015096304008329, "grad_norm": 2.109375, "learning_rate": 1.985605422877848e-05, "loss": 1.0979772567749024, "step": 585, "token_acc": 0.6680623147820886 }, { "epoch": 0.1023772340794725, "grad_norm": 2.234375, "learning_rate": 1.9851162324083933e-05, "loss": 1.0830554008483886, "step": 590, "token_acc": 0.6709642543415095 }, { "epoch": 0.10324483775811209, "grad_norm": 1.984375, "learning_rate": 1.984618930136869e-05, "loss": 1.0940834999084472, "step": 595, "token_acc": 0.6654719073768496 }, { "epoch": 0.10411244143675169, "grad_norm": 2.015625, "learning_rate": 1.9841135201582418e-05, "loss": 1.087096881866455, "step": 600, "token_acc": 0.6688288825090731 }, { "epoch": 0.10498004511539129, "grad_norm": 2.046875, "learning_rate": 1.9836000066342396e-05, "loss": 1.0840859413146973, "step": 605, "token_acc": 0.6716854817537411 }, { "epoch": 0.10584764879403089, "grad_norm": 2.140625, "learning_rate": 1.9830783937933172e-05, "loss": 1.1092602729797363, "step": 610, "token_acc": 0.6614420479795627 }, { "epoch": 0.10671525247267048, "grad_norm": 2.140625, "learning_rate": 1.982548685930623e-05, "loss": 1.067424201965332, "step": 615, "token_acc": 0.673647896123652 }, { "epoch": 0.10758285615131008, "grad_norm": 2.046875, "learning_rate": 1.9820108874079626e-05, "loss": 1.072523593902588, "step": 620, "token_acc": 0.670838285674334 }, { "epoch": 0.10845045982994968, "grad_norm": 2.265625, "learning_rate": 1.9814650026537632e-05, "loss": 1.1132768630981444, "step": 625, "token_acc": 0.6627720656963546 }, { "epoch": 0.10931806350858927, "grad_norm": 1.875, "learning_rate": 1.9809110361630356e-05, "loss": 1.081822395324707, "step": 630, "token_acc": 0.6701974000962927 }, { "epoch": 0.11018566718722887, "grad_norm": 2.03125, "learning_rate": 1.9803489924973403e-05, "loss": 1.0843083381652832, "step": 635, "token_acc": 0.670640893606908 }, { "epoch": 0.11105327086586847, "grad_norm": 1.875, "learning_rate": 1.9797788762847474e-05, "loss": 1.1068120002746582, "step": 640, "token_acc": 0.6664943545095905 }, { "epoch": 0.11192087454450807, "grad_norm": 2.15625, "learning_rate": 1.9792006922197983e-05, "loss": 1.090738296508789, "step": 645, "token_acc": 0.6678904842496042 }, { "epoch": 0.11278847822314766, "grad_norm": 2.21875, "learning_rate": 1.97861444506347e-05, "loss": 1.0894213676452638, "step": 650, "token_acc": 0.6677103350040985 }, { "epoch": 0.11365608190178726, "grad_norm": 1.9375, "learning_rate": 1.9780201396431328e-05, "loss": 1.1013753890991211, "step": 655, "token_acc": 0.6645008860011813 }, { "epoch": 0.11452368558042686, "grad_norm": 2.046875, "learning_rate": 1.9774177808525113e-05, "loss": 1.0939213752746582, "step": 660, "token_acc": 0.6666116111982823 }, { "epoch": 0.11539128925906646, "grad_norm": 2.046875, "learning_rate": 1.9768073736516446e-05, "loss": 1.0730672836303712, "step": 665, "token_acc": 0.674365815777946 }, { "epoch": 0.11625889293770605, "grad_norm": 1.9609375, "learning_rate": 1.9761889230668462e-05, "loss": 1.0676060676574708, "step": 670, "token_acc": 0.6705334815226451 }, { "epoch": 0.11712649661634565, "grad_norm": 2.078125, "learning_rate": 1.975562434190661e-05, "loss": 1.0712880134582519, "step": 675, "token_acc": 0.6705970273187744 }, { "epoch": 0.11799410029498525, "grad_norm": 2.15625, "learning_rate": 1.9749279121818235e-05, "loss": 1.1015710830688477, "step": 680, "token_acc": 0.6644264612144223 }, { "epoch": 0.11886170397362485, "grad_norm": 2.15625, "learning_rate": 1.9742853622652176e-05, "loss": 1.0666415214538574, "step": 685, "token_acc": 0.6735260146303254 }, { "epoch": 0.11972930765226444, "grad_norm": 2.015625, "learning_rate": 1.9736347897318303e-05, "loss": 1.1168096542358399, "step": 690, "token_acc": 0.6619707286530484 }, { "epoch": 0.12059691133090404, "grad_norm": 1.9140625, "learning_rate": 1.9729761999387102e-05, "loss": 1.061478042602539, "step": 695, "token_acc": 0.674633270806062 }, { "epoch": 0.12146451500954364, "grad_norm": 2.015625, "learning_rate": 1.9723095983089235e-05, "loss": 1.0845521926879882, "step": 700, "token_acc": 0.666814367237328 }, { "epoch": 0.12233211868818324, "grad_norm": 2.0625, "learning_rate": 1.9716349903315075e-05, "loss": 1.0705391883850097, "step": 705, "token_acc": 0.6704897791192207 }, { "epoch": 0.12319972236682283, "grad_norm": 2.0, "learning_rate": 1.970952381561428e-05, "loss": 1.0789600372314454, "step": 710, "token_acc": 0.6659224188949265 }, { "epoch": 0.12406732604546243, "grad_norm": 2.03125, "learning_rate": 1.9702617776195314e-05, "loss": 1.0921841621398927, "step": 715, "token_acc": 0.6679036012597562 }, { "epoch": 0.12493492972410203, "grad_norm": 2.0, "learning_rate": 1.9695631841924993e-05, "loss": 1.084920597076416, "step": 720, "token_acc": 0.6674108653000473 }, { "epoch": 0.12580253340274164, "grad_norm": 2.109375, "learning_rate": 1.9688566070328018e-05, "loss": 1.0615843772888183, "step": 725, "token_acc": 0.6733901515151515 }, { "epoch": 0.12667013708138122, "grad_norm": 2.046875, "learning_rate": 1.9681420519586502e-05, "loss": 1.0624969482421875, "step": 730, "token_acc": 0.6717009575388738 }, { "epoch": 0.12753774076002083, "grad_norm": 1.953125, "learning_rate": 1.9674195248539482e-05, "loss": 1.0610927581787108, "step": 735, "token_acc": 0.6758764832793959 }, { "epoch": 0.12840534443866042, "grad_norm": 1.984375, "learning_rate": 1.9666890316682443e-05, "loss": 1.0778383255004882, "step": 740, "token_acc": 0.6720665616068805 }, { "epoch": 0.12927294811730003, "grad_norm": 1.9453125, "learning_rate": 1.9659505784166827e-05, "loss": 1.078394317626953, "step": 745, "token_acc": 0.6708379109836813 }, { "epoch": 0.1301405517959396, "grad_norm": 2.125, "learning_rate": 1.965204171179954e-05, "loss": 1.088584041595459, "step": 750, "token_acc": 0.667237308961385 }, { "epoch": 0.13100815547457922, "grad_norm": 2.15625, "learning_rate": 1.9644498161042436e-05, "loss": 1.0937715530395509, "step": 755, "token_acc": 0.6666981577704298 }, { "epoch": 0.1318757591532188, "grad_norm": 1.84375, "learning_rate": 1.9636875194011836e-05, "loss": 1.0754453659057617, "step": 760, "token_acc": 0.6709822832582032 }, { "epoch": 0.13274336283185842, "grad_norm": 2.15625, "learning_rate": 1.9629172873477995e-05, "loss": 1.070410919189453, "step": 765, "token_acc": 0.6699807311459616 }, { "epoch": 0.133610966510498, "grad_norm": 2.015625, "learning_rate": 1.9621391262864597e-05, "loss": 1.0697467803955079, "step": 770, "token_acc": 0.672360857509975 }, { "epoch": 0.13447857018913761, "grad_norm": 2.0625, "learning_rate": 1.961353042624823e-05, "loss": 1.090577983856201, "step": 775, "token_acc": 0.6647531413321472 }, { "epoch": 0.1353461738677772, "grad_norm": 1.953125, "learning_rate": 1.9605590428357853e-05, "loss": 1.0771003723144532, "step": 780, "token_acc": 0.6692675159235669 }, { "epoch": 0.1362137775464168, "grad_norm": 1.9765625, "learning_rate": 1.959757133457427e-05, "loss": 1.0793813705444335, "step": 785, "token_acc": 0.6685078374160277 }, { "epoch": 0.1370813812250564, "grad_norm": 2.09375, "learning_rate": 1.958947321092959e-05, "loss": 1.0954531669616698, "step": 790, "token_acc": 0.666005196025954 }, { "epoch": 0.137948984903696, "grad_norm": 2.078125, "learning_rate": 1.9581296124106682e-05, "loss": 1.049675750732422, "step": 795, "token_acc": 0.6793725574174206 }, { "epoch": 0.1388165885823356, "grad_norm": 1.9921875, "learning_rate": 1.9573040141438625e-05, "loss": 1.0865850448608398, "step": 800, "token_acc": 0.6686926806866836 }, { "epoch": 0.1396841922609752, "grad_norm": 1.96875, "learning_rate": 1.9564705330908155e-05, "loss": 1.0714460372924806, "step": 805, "token_acc": 0.6687210017329085 }, { "epoch": 0.14055179593961478, "grad_norm": 2.109375, "learning_rate": 1.9556291761147106e-05, "loss": 1.0626968383789062, "step": 810, "token_acc": 0.6734635695958513 }, { "epoch": 0.1414193996182544, "grad_norm": 2.03125, "learning_rate": 1.9547799501435848e-05, "loss": 1.078728485107422, "step": 815, "token_acc": 0.6699388135142325 }, { "epoch": 0.14228700329689398, "grad_norm": 2.03125, "learning_rate": 1.9539228621702696e-05, "loss": 1.0764430999755858, "step": 820, "token_acc": 0.6692216671049172 }, { "epoch": 0.1431546069755336, "grad_norm": 2.03125, "learning_rate": 1.9530579192523374e-05, "loss": 1.0595266342163085, "step": 825, "token_acc": 0.6733650861607621 }, { "epoch": 0.14402221065417317, "grad_norm": 1.9609375, "learning_rate": 1.9521851285120393e-05, "loss": 1.0454116821289063, "step": 830, "token_acc": 0.6766406455817306 }, { "epoch": 0.14488981433281278, "grad_norm": 2.015625, "learning_rate": 1.9513044971362494e-05, "loss": 1.0634162902832032, "step": 835, "token_acc": 0.6721870895229326 }, { "epoch": 0.14575741801145237, "grad_norm": 2.015625, "learning_rate": 1.9504160323764032e-05, "loss": 1.0595422744750977, "step": 840, "token_acc": 0.6775388978821892 }, { "epoch": 0.14662502169009198, "grad_norm": 2.078125, "learning_rate": 1.9495197415484397e-05, "loss": 1.082723903656006, "step": 845, "token_acc": 0.668543901058705 }, { "epoch": 0.14749262536873156, "grad_norm": 2.0625, "learning_rate": 1.9486156320327406e-05, "loss": 1.0727534294128418, "step": 850, "token_acc": 0.6706723270354948 }, { "epoch": 0.14836022904737117, "grad_norm": 2.09375, "learning_rate": 1.9477037112740703e-05, "loss": 1.0933048248291015, "step": 855, "token_acc": 0.6638589138214922 }, { "epoch": 0.14922783272601076, "grad_norm": 1.8984375, "learning_rate": 1.9467839867815118e-05, "loss": 1.0769481658935547, "step": 860, "token_acc": 0.6706557839960199 }, { "epoch": 0.15009543640465037, "grad_norm": 1.953125, "learning_rate": 1.9458564661284085e-05, "loss": 1.062359619140625, "step": 865, "token_acc": 0.6725027997050067 }, { "epoch": 0.15096304008328995, "grad_norm": 2.015625, "learning_rate": 1.9449211569523002e-05, "loss": 1.0662097930908203, "step": 870, "token_acc": 0.6707562050881049 }, { "epoch": 0.15183064376192956, "grad_norm": 2.0625, "learning_rate": 1.9439780669548586e-05, "loss": 1.0621366500854492, "step": 875, "token_acc": 0.673269502864129 }, { "epoch": 0.15269824744056915, "grad_norm": 1.8984375, "learning_rate": 1.9430272039018277e-05, "loss": 1.0658045768737794, "step": 880, "token_acc": 0.6731692212416783 }, { "epoch": 0.15356585111920876, "grad_norm": 1.9609375, "learning_rate": 1.942068575622956e-05, "loss": 1.0896780967712403, "step": 885, "token_acc": 0.6671697313899149 }, { "epoch": 0.15443345479784834, "grad_norm": 1.953125, "learning_rate": 1.9411021900119343e-05, "loss": 1.0421188354492188, "step": 890, "token_acc": 0.6814261145654187 }, { "epoch": 0.15530105847648795, "grad_norm": 1.8671875, "learning_rate": 1.94012805502633e-05, "loss": 1.0770461082458496, "step": 895, "token_acc": 0.670378502031211 }, { "epoch": 0.15616866215512754, "grad_norm": 1.9765625, "learning_rate": 1.9391461786875216e-05, "loss": 1.0411422729492188, "step": 900, "token_acc": 0.6799084886073606 }, { "epoch": 0.15703626583376715, "grad_norm": 2.03125, "learning_rate": 1.9381565690806328e-05, "loss": 1.0435258865356445, "step": 905, "token_acc": 0.6792285176667363 }, { "epoch": 0.15790386951240673, "grad_norm": 1.90625, "learning_rate": 1.9371592343544655e-05, "loss": 1.0748100280761719, "step": 910, "token_acc": 0.6707071531575654 }, { "epoch": 0.15877147319104634, "grad_norm": 1.8828125, "learning_rate": 1.9361541827214338e-05, "loss": 1.0855265617370606, "step": 915, "token_acc": 0.667949364401157 }, { "epoch": 0.15963907686968593, "grad_norm": 1.9375, "learning_rate": 1.9351414224574944e-05, "loss": 1.0524426460266114, "step": 920, "token_acc": 0.6748159542907373 }, { "epoch": 0.16050668054832554, "grad_norm": 1.9921875, "learning_rate": 1.9341209619020804e-05, "loss": 1.0575942993164062, "step": 925, "token_acc": 0.6725628566510876 }, { "epoch": 0.16137428422696512, "grad_norm": 1.953125, "learning_rate": 1.9330928094580324e-05, "loss": 1.058868408203125, "step": 930, "token_acc": 0.6738103592539609 }, { "epoch": 0.16224188790560473, "grad_norm": 2.0625, "learning_rate": 1.9320569735915273e-05, "loss": 1.0528675079345704, "step": 935, "token_acc": 0.6737685311378745 }, { "epoch": 0.16310949158424431, "grad_norm": 2.171875, "learning_rate": 1.9310134628320116e-05, "loss": 1.0708015441894532, "step": 940, "token_acc": 0.6706150717308855 }, { "epoch": 0.16397709526288393, "grad_norm": 2.03125, "learning_rate": 1.929962285772128e-05, "loss": 1.0595834732055665, "step": 945, "token_acc": 0.6712688842219362 }, { "epoch": 0.1648446989415235, "grad_norm": 1.953125, "learning_rate": 1.9289034510676483e-05, "loss": 1.0492593765258789, "step": 950, "token_acc": 0.6747535596933187 }, { "epoch": 0.16571230262016312, "grad_norm": 1.9140625, "learning_rate": 1.9278369674373985e-05, "loss": 1.0697070121765138, "step": 955, "token_acc": 0.6718501687702754 }, { "epoch": 0.1665799062988027, "grad_norm": 1.9609375, "learning_rate": 1.9267628436631893e-05, "loss": 1.0314347267150878, "step": 960, "token_acc": 0.6785393180717892 }, { "epoch": 0.16744750997744232, "grad_norm": 2.140625, "learning_rate": 1.9256810885897434e-05, "loss": 1.0667208671569823, "step": 965, "token_acc": 0.6720282411646826 }, { "epoch": 0.1683151136560819, "grad_norm": 1.9140625, "learning_rate": 1.9245917111246205e-05, "loss": 1.0356231689453126, "step": 970, "token_acc": 0.6796255346195098 }, { "epoch": 0.1691827173347215, "grad_norm": 1.9921875, "learning_rate": 1.9234947202381487e-05, "loss": 1.0789193153381347, "step": 975, "token_acc": 0.6698977346968708 }, { "epoch": 0.1700503210133611, "grad_norm": 1.9453125, "learning_rate": 1.922390124963345e-05, "loss": 1.0505391120910645, "step": 980, "token_acc": 0.6759316831814153 }, { "epoch": 0.1709179246920007, "grad_norm": 1.953125, "learning_rate": 1.9212779343958466e-05, "loss": 1.0695667266845703, "step": 985, "token_acc": 0.6721886545823162 }, { "epoch": 0.1717855283706403, "grad_norm": 2.0, "learning_rate": 1.92015815769383e-05, "loss": 1.0540275573730469, "step": 990, "token_acc": 0.6755136400344937 }, { "epoch": 0.1726531320492799, "grad_norm": 1.9921875, "learning_rate": 1.919030804077941e-05, "loss": 1.0307014465332032, "step": 995, "token_acc": 0.6810972040253115 }, { "epoch": 0.17352073572791948, "grad_norm": 1.8828125, "learning_rate": 1.9178958828312146e-05, "loss": 1.067826271057129, "step": 1000, "token_acc": 0.6724737299518053 }, { "epoch": 0.1743883394065591, "grad_norm": 1.953125, "learning_rate": 1.9167534032990024e-05, "loss": 1.0573354721069337, "step": 1005, "token_acc": 0.6738269981618387 }, { "epoch": 0.17525594308519868, "grad_norm": 2.046875, "learning_rate": 1.9156033748888918e-05, "loss": 1.0550942420959473, "step": 1010, "token_acc": 0.6726485901683684 }, { "epoch": 0.1761235467638383, "grad_norm": 1.953125, "learning_rate": 1.9144458070706317e-05, "loss": 1.0487598419189452, "step": 1015, "token_acc": 0.6768756795940558 }, { "epoch": 0.17699115044247787, "grad_norm": 1.984375, "learning_rate": 1.9132807093760523e-05, "loss": 1.0621299743652344, "step": 1020, "token_acc": 0.6733473561667395 }, { "epoch": 0.17785875412111748, "grad_norm": 2.140625, "learning_rate": 1.912108091398988e-05, "loss": 1.052401065826416, "step": 1025, "token_acc": 0.6748749154834347 }, { "epoch": 0.17872635779975707, "grad_norm": 1.921875, "learning_rate": 1.9109279627951978e-05, "loss": 1.0468477249145507, "step": 1030, "token_acc": 0.6755857259832536 }, { "epoch": 0.17959396147839668, "grad_norm": 2.046875, "learning_rate": 1.9097403332822863e-05, "loss": 1.0689468383789062, "step": 1035, "token_acc": 0.670591049218667 }, { "epoch": 0.18046156515703626, "grad_norm": 1.921875, "learning_rate": 1.908545212639622e-05, "loss": 1.0497617721557617, "step": 1040, "token_acc": 0.6754224207406193 }, { "epoch": 0.18132916883567587, "grad_norm": 1.953125, "learning_rate": 1.90734261070826e-05, "loss": 1.0642064094543457, "step": 1045, "token_acc": 0.6719027275714755 }, { "epoch": 0.18219677251431546, "grad_norm": 1.828125, "learning_rate": 1.906132537390857e-05, "loss": 1.0482969284057617, "step": 1050, "token_acc": 0.6774891482197671 }, { "epoch": 0.18306437619295507, "grad_norm": 2.0625, "learning_rate": 1.9049150026515937e-05, "loss": 1.0419374465942384, "step": 1055, "token_acc": 0.6783982416374751 }, { "epoch": 0.18393197987159465, "grad_norm": 1.9296875, "learning_rate": 1.9036900165160895e-05, "loss": 1.047512149810791, "step": 1060, "token_acc": 0.673420406340701 }, { "epoch": 0.18479958355023426, "grad_norm": 2.015625, "learning_rate": 1.9024575890713216e-05, "loss": 1.0479446411132813, "step": 1065, "token_acc": 0.6774471529854157 }, { "epoch": 0.18566718722887385, "grad_norm": 2.109375, "learning_rate": 1.9012177304655418e-05, "loss": 1.0644286155700684, "step": 1070, "token_acc": 0.6712419897903769 }, { "epoch": 0.18653479090751346, "grad_norm": 2.03125, "learning_rate": 1.8999704509081927e-05, "loss": 1.0513483047485352, "step": 1075, "token_acc": 0.6753527477190749 }, { "epoch": 0.18740239458615304, "grad_norm": 1.890625, "learning_rate": 1.8987157606698234e-05, "loss": 1.025481605529785, "step": 1080, "token_acc": 0.6835672249886826 }, { "epoch": 0.18826999826479265, "grad_norm": 1.9453125, "learning_rate": 1.8974536700820062e-05, "loss": 1.0314741134643555, "step": 1085, "token_acc": 0.6798985689043553 }, { "epoch": 0.18913760194343224, "grad_norm": 2.046875, "learning_rate": 1.896184189537249e-05, "loss": 1.0473779678344726, "step": 1090, "token_acc": 0.6763754045307443 }, { "epoch": 0.19000520562207185, "grad_norm": 2.0625, "learning_rate": 1.8949073294889127e-05, "loss": 1.0450904846191407, "step": 1095, "token_acc": 0.6737394957983194 }, { "epoch": 0.19087280930071143, "grad_norm": 2.03125, "learning_rate": 1.8936231004511224e-05, "loss": 1.0552305221557616, "step": 1100, "token_acc": 0.6746180059360228 }, { "epoch": 0.19174041297935104, "grad_norm": 2.15625, "learning_rate": 1.8923315129986838e-05, "loss": 1.0332719802856445, "step": 1105, "token_acc": 0.6815246996363837 }, { "epoch": 0.19260801665799063, "grad_norm": 2.03125, "learning_rate": 1.8910325777669923e-05, "loss": 1.0561046600341797, "step": 1110, "token_acc": 0.6739904907684597 }, { "epoch": 0.19347562033663024, "grad_norm": 1.8828125, "learning_rate": 1.8897263054519498e-05, "loss": 1.0276466369628907, "step": 1115, "token_acc": 0.6814717548158276 }, { "epoch": 0.19434322401526982, "grad_norm": 2.03125, "learning_rate": 1.8884127068098726e-05, "loss": 1.0520359992980957, "step": 1120, "token_acc": 0.675560674842469 }, { "epoch": 0.19521082769390943, "grad_norm": 1.9921875, "learning_rate": 1.8870917926574056e-05, "loss": 1.0623506546020507, "step": 1125, "token_acc": 0.6724925733011843 }, { "epoch": 0.19607843137254902, "grad_norm": 1.921875, "learning_rate": 1.8857635738714316e-05, "loss": 1.050804901123047, "step": 1130, "token_acc": 0.6761054927622447 }, { "epoch": 0.19694603505118863, "grad_norm": 2.0, "learning_rate": 1.884428061388983e-05, "loss": 1.0528631210327148, "step": 1135, "token_acc": 0.6742988058872535 }, { "epoch": 0.1978136387298282, "grad_norm": 1.8671875, "learning_rate": 1.8830852662071507e-05, "loss": 1.0435836791992188, "step": 1140, "token_acc": 0.6766125320533894 }, { "epoch": 0.19868124240846782, "grad_norm": 1.90625, "learning_rate": 1.8817351993829947e-05, "loss": 1.058847713470459, "step": 1145, "token_acc": 0.6732232009828266 }, { "epoch": 0.1995488460871074, "grad_norm": 1.765625, "learning_rate": 1.8803778720334512e-05, "loss": 1.0335227966308593, "step": 1150, "token_acc": 0.6779969283000565 }, { "epoch": 0.20041644976574702, "grad_norm": 2.078125, "learning_rate": 1.8790132953352427e-05, "loss": 1.04959077835083, "step": 1155, "token_acc": 0.6770108354485658 }, { "epoch": 0.2012840534443866, "grad_norm": 1.84375, "learning_rate": 1.8776414805247857e-05, "loss": 1.0455670356750488, "step": 1160, "token_acc": 0.6780289627154183 }, { "epoch": 0.2021516571230262, "grad_norm": 1.9609375, "learning_rate": 1.8762624388980976e-05, "loss": 1.033797264099121, "step": 1165, "token_acc": 0.6787956767884714 }, { "epoch": 0.2030192608016658, "grad_norm": 1.859375, "learning_rate": 1.8748761818107046e-05, "loss": 1.0679737091064454, "step": 1170, "token_acc": 0.6713174689300571 }, { "epoch": 0.2038868644803054, "grad_norm": 1.890625, "learning_rate": 1.8734827206775463e-05, "loss": 1.0490418434143067, "step": 1175, "token_acc": 0.6751949483539963 }, { "epoch": 0.204754468158945, "grad_norm": 1.9296875, "learning_rate": 1.8720820669728846e-05, "loss": 1.0127446174621582, "step": 1180, "token_acc": 0.6857832294389704 }, { "epoch": 0.2056220718375846, "grad_norm": 1.96875, "learning_rate": 1.8706742322302064e-05, "loss": 1.0334016799926757, "step": 1185, "token_acc": 0.6802701904224747 }, { "epoch": 0.20648967551622419, "grad_norm": 1.90625, "learning_rate": 1.8692592280421305e-05, "loss": 1.043479824066162, "step": 1190, "token_acc": 0.6771668797706226 }, { "epoch": 0.2073572791948638, "grad_norm": 1.953125, "learning_rate": 1.8678370660603115e-05, "loss": 1.0523313522338866, "step": 1195, "token_acc": 0.6751031599887095 }, { "epoch": 0.20822488287350338, "grad_norm": 1.9765625, "learning_rate": 1.8664077579953434e-05, "loss": 1.05529727935791, "step": 1200, "token_acc": 0.6741786043282646 }, { "epoch": 0.209092486552143, "grad_norm": 2.140625, "learning_rate": 1.864971315616664e-05, "loss": 1.043968391418457, "step": 1205, "token_acc": 0.6766692503598716 }, { "epoch": 0.20996009023078258, "grad_norm": 2.015625, "learning_rate": 1.8635277507524573e-05, "loss": 1.0732519149780273, "step": 1210, "token_acc": 0.6701793283338767 }, { "epoch": 0.2108276939094222, "grad_norm": 1.8984375, "learning_rate": 1.8620770752895567e-05, "loss": 1.0491312980651855, "step": 1215, "token_acc": 0.6746863348120731 }, { "epoch": 0.21169529758806177, "grad_norm": 2.015625, "learning_rate": 1.860619301173347e-05, "loss": 1.0385177612304688, "step": 1220, "token_acc": 0.6771983724985469 }, { "epoch": 0.21256290126670138, "grad_norm": 2.09375, "learning_rate": 1.8591544404076654e-05, "loss": 1.0225757598876952, "step": 1225, "token_acc": 0.6851136908248575 }, { "epoch": 0.21343050494534097, "grad_norm": 1.890625, "learning_rate": 1.8576825050547033e-05, "loss": 1.0491232872009277, "step": 1230, "token_acc": 0.6755004153380315 }, { "epoch": 0.21429810862398058, "grad_norm": 1.90625, "learning_rate": 1.856203507234907e-05, "loss": 1.0523208618164062, "step": 1235, "token_acc": 0.675254080094805 }, { "epoch": 0.21516571230262016, "grad_norm": 2.0, "learning_rate": 1.8547174591268774e-05, "loss": 1.0285789489746093, "step": 1240, "token_acc": 0.6805194115460195 }, { "epoch": 0.21603331598125977, "grad_norm": 1.796875, "learning_rate": 1.8532243729672707e-05, "loss": 1.0230236053466797, "step": 1245, "token_acc": 0.6832664590042764 }, { "epoch": 0.21690091965989935, "grad_norm": 2.03125, "learning_rate": 1.8517242610506953e-05, "loss": 1.0365596771240235, "step": 1250, "token_acc": 0.678233046932105 }, { "epoch": 0.21776852333853897, "grad_norm": 1.9296875, "learning_rate": 1.8502171357296144e-05, "loss": 1.0360082626342773, "step": 1255, "token_acc": 0.6784880946067773 }, { "epoch": 0.21863612701717855, "grad_norm": 1.8984375, "learning_rate": 1.8487030094142403e-05, "loss": 1.044863796234131, "step": 1260, "token_acc": 0.6762245320026152 }, { "epoch": 0.21950373069581816, "grad_norm": 1.859375, "learning_rate": 1.8471818945724355e-05, "loss": 1.0216045379638672, "step": 1265, "token_acc": 0.6839718075188765 }, { "epoch": 0.22037133437445774, "grad_norm": 2.015625, "learning_rate": 1.845653803729607e-05, "loss": 1.0163522720336915, "step": 1270, "token_acc": 0.6835048168294121 }, { "epoch": 0.22123893805309736, "grad_norm": 1.8359375, "learning_rate": 1.8441187494686055e-05, "loss": 1.0463291168212892, "step": 1275, "token_acc": 0.6759099019331642 }, { "epoch": 0.22210654173173694, "grad_norm": 1.8046875, "learning_rate": 1.8425767444296213e-05, "loss": 1.0286881446838378, "step": 1280, "token_acc": 0.6834346103038309 }, { "epoch": 0.22297414541037655, "grad_norm": 1.9375, "learning_rate": 1.8410278013100803e-05, "loss": 1.0348123550415038, "step": 1285, "token_acc": 0.679287010183677 }, { "epoch": 0.22384174908901613, "grad_norm": 2.0625, "learning_rate": 1.839471932864537e-05, "loss": 1.0408474922180175, "step": 1290, "token_acc": 0.6770663593126929 }, { "epoch": 0.22470935276765575, "grad_norm": 2.0625, "learning_rate": 1.8379091519045737e-05, "loss": 1.0488122940063476, "step": 1295, "token_acc": 0.6739063026626222 }, { "epoch": 0.22557695644629533, "grad_norm": 1.921875, "learning_rate": 1.8363394712986915e-05, "loss": 1.0353066444396972, "step": 1300, "token_acc": 0.6792478688704328 }, { "epoch": 0.22644456012493494, "grad_norm": 2.0, "learning_rate": 1.834762903972207e-05, "loss": 1.0343815803527832, "step": 1305, "token_acc": 0.6786524515782157 }, { "epoch": 0.22731216380357452, "grad_norm": 1.9140625, "learning_rate": 1.8331794629071427e-05, "loss": 1.0241337776184083, "step": 1310, "token_acc": 0.6810138309840513 }, { "epoch": 0.22817976748221414, "grad_norm": 1.9921875, "learning_rate": 1.831589161142124e-05, "loss": 1.0487545013427735, "step": 1315, "token_acc": 0.6746494771055173 }, { "epoch": 0.22904737116085372, "grad_norm": 1.8984375, "learning_rate": 1.8299920117722677e-05, "loss": 1.0491311073303222, "step": 1320, "token_acc": 0.6740286726172584 }, { "epoch": 0.22991497483949333, "grad_norm": 1.7734375, "learning_rate": 1.828388027949078e-05, "loss": 1.0435140609741211, "step": 1325, "token_acc": 0.6763682837492424 }, { "epoch": 0.2307825785181329, "grad_norm": 2.015625, "learning_rate": 1.8267772228803357e-05, "loss": 1.023078155517578, "step": 1330, "token_acc": 0.6799355293097844 }, { "epoch": 0.23165018219677252, "grad_norm": 1.9296875, "learning_rate": 1.82515960982999e-05, "loss": 1.015854835510254, "step": 1335, "token_acc": 0.6842098118535009 }, { "epoch": 0.2325177858754121, "grad_norm": 1.9296875, "learning_rate": 1.8235352021180496e-05, "loss": 1.0593996047973633, "step": 1340, "token_acc": 0.6741832751181426 }, { "epoch": 0.23338538955405172, "grad_norm": 2.0, "learning_rate": 1.821904013120473e-05, "loss": 1.0396366119384766, "step": 1345, "token_acc": 0.6776407492466381 }, { "epoch": 0.2342529932326913, "grad_norm": 1.890625, "learning_rate": 1.8202660562690592e-05, "loss": 1.0485494613647461, "step": 1350, "token_acc": 0.6759969479137384 }, { "epoch": 0.23512059691133091, "grad_norm": 1.9765625, "learning_rate": 1.8186213450513336e-05, "loss": 1.026517391204834, "step": 1355, "token_acc": 0.6813391968138068 }, { "epoch": 0.2359882005899705, "grad_norm": 1.890625, "learning_rate": 1.816969893010442e-05, "loss": 1.041010570526123, "step": 1360, "token_acc": 0.6755975379040209 }, { "epoch": 0.2368558042686101, "grad_norm": 1.9765625, "learning_rate": 1.815311713745036e-05, "loss": 1.0168442726135254, "step": 1365, "token_acc": 0.6804629906694595 }, { "epoch": 0.2377234079472497, "grad_norm": 1.953125, "learning_rate": 1.81364682090916e-05, "loss": 1.025059700012207, "step": 1370, "token_acc": 0.680214399694494 }, { "epoch": 0.2385910116258893, "grad_norm": 1.890625, "learning_rate": 1.811975228212143e-05, "loss": 1.02586030960083, "step": 1375, "token_acc": 0.679387984579139 }, { "epoch": 0.2394586153045289, "grad_norm": 1.96875, "learning_rate": 1.810296949418481e-05, "loss": 1.0357915878295898, "step": 1380, "token_acc": 0.6767545616531072 }, { "epoch": 0.2403262189831685, "grad_norm": 1.8828125, "learning_rate": 1.8086119983477265e-05, "loss": 1.031496810913086, "step": 1385, "token_acc": 0.676317743132888 }, { "epoch": 0.24119382266180808, "grad_norm": 1.8671875, "learning_rate": 1.8069203888743734e-05, "loss": 1.0320685386657715, "step": 1390, "token_acc": 0.6808824724396653 }, { "epoch": 0.2420614263404477, "grad_norm": 1.9765625, "learning_rate": 1.8052221349277445e-05, "loss": 1.044478416442871, "step": 1395, "token_acc": 0.6767207412842042 }, { "epoch": 0.24292903001908728, "grad_norm": 1.875, "learning_rate": 1.803517250491874e-05, "loss": 1.037778091430664, "step": 1400, "token_acc": 0.6757977163281176 }, { "epoch": 0.2437966336977269, "grad_norm": 1.9140625, "learning_rate": 1.801805749605395e-05, "loss": 1.0458430290222167, "step": 1405, "token_acc": 0.6760411743080721 }, { "epoch": 0.24466423737636647, "grad_norm": 1.8984375, "learning_rate": 1.800087646361423e-05, "loss": 1.020294761657715, "step": 1410, "token_acc": 0.6817285303383098 }, { "epoch": 0.24553184105500608, "grad_norm": 1.875, "learning_rate": 1.798362954907439e-05, "loss": 1.0418660163879394, "step": 1415, "token_acc": 0.6780114226375908 }, { "epoch": 0.24639944473364567, "grad_norm": 2.015625, "learning_rate": 1.796631689445174e-05, "loss": 1.0439669609069824, "step": 1420, "token_acc": 0.6750978011601241 }, { "epoch": 0.24726704841228528, "grad_norm": 1.90625, "learning_rate": 1.7948938642304915e-05, "loss": 1.0315986633300782, "step": 1425, "token_acc": 0.6803868088271758 }, { "epoch": 0.24813465209092486, "grad_norm": 1.90625, "learning_rate": 1.793149493573271e-05, "loss": 1.0325140953063965, "step": 1430, "token_acc": 0.6792667142140159 }, { "epoch": 0.24900225576956447, "grad_norm": 1.921875, "learning_rate": 1.791398591837289e-05, "loss": 1.0254653930664062, "step": 1435, "token_acc": 0.6815645499333134 }, { "epoch": 0.24986985944820406, "grad_norm": 1.9453125, "learning_rate": 1.7896411734401008e-05, "loss": 1.042679786682129, "step": 1440, "token_acc": 0.6756525459991441 }, { "epoch": 0.25073746312684364, "grad_norm": 1.953125, "learning_rate": 1.7878772528529232e-05, "loss": 1.0409419059753418, "step": 1445, "token_acc": 0.6742666575920506 }, { "epoch": 0.2516050668054833, "grad_norm": 1.8828125, "learning_rate": 1.7861068446005144e-05, "loss": 1.0194078445434571, "step": 1450, "token_acc": 0.68190224912376 }, { "epoch": 0.25247267048412286, "grad_norm": 2.0, "learning_rate": 1.7843299632610537e-05, "loss": 1.031000518798828, "step": 1455, "token_acc": 0.6809780158582832 }, { "epoch": 0.25334027416276245, "grad_norm": 1.9296875, "learning_rate": 1.782546623466022e-05, "loss": 1.0219725608825683, "step": 1460, "token_acc": 0.6826487625065825 }, { "epoch": 0.25420787784140203, "grad_norm": 2.03125, "learning_rate": 1.7807568399000824e-05, "loss": 1.0241089820861817, "step": 1465, "token_acc": 0.6821418475993054 }, { "epoch": 0.25507548152004167, "grad_norm": 1.921875, "learning_rate": 1.7789606273009574e-05, "loss": 1.010830020904541, "step": 1470, "token_acc": 0.6835254004334725 }, { "epoch": 0.25594308519868125, "grad_norm": 1.921875, "learning_rate": 1.7771580004593093e-05, "loss": 1.045233917236328, "step": 1475, "token_acc": 0.6747018970189702 }, { "epoch": 0.25681068887732084, "grad_norm": 1.96875, "learning_rate": 1.7753489742186164e-05, "loss": 1.011804962158203, "step": 1480, "token_acc": 0.6846772177711121 }, { "epoch": 0.2576782925559604, "grad_norm": 1.921875, "learning_rate": 1.773533563475053e-05, "loss": 1.0484785079956054, "step": 1485, "token_acc": 0.6750959795243682 }, { "epoch": 0.25854589623460006, "grad_norm": 2.0, "learning_rate": 1.771711783177366e-05, "loss": 1.0313974380493165, "step": 1490, "token_acc": 0.6784674492495447 }, { "epoch": 0.25941349991323964, "grad_norm": 1.9921875, "learning_rate": 1.76988364832675e-05, "loss": 1.0448792457580567, "step": 1495, "token_acc": 0.6759750041845674 }, { "epoch": 0.2602811035918792, "grad_norm": 1.9375, "learning_rate": 1.768049173976727e-05, "loss": 1.030049991607666, "step": 1500, "token_acc": 0.6813090211643735 }, { "epoch": 0.2611487072705188, "grad_norm": 1.84375, "learning_rate": 1.7662083752330193e-05, "loss": 1.0194572448730468, "step": 1505, "token_acc": 0.6832258674993579 }, { "epoch": 0.26201631094915845, "grad_norm": 1.9375, "learning_rate": 1.7643612672534275e-05, "loss": 1.0071066856384276, "step": 1510, "token_acc": 0.684999272515641 }, { "epoch": 0.26288391462779803, "grad_norm": 1.9296875, "learning_rate": 1.7625078652477036e-05, "loss": 1.0143555641174316, "step": 1515, "token_acc": 0.683634143031619 }, { "epoch": 0.2637515183064376, "grad_norm": 1.9296875, "learning_rate": 1.760648184477429e-05, "loss": 1.0410999298095702, "step": 1520, "token_acc": 0.6760032102728732 }, { "epoch": 0.2646191219850772, "grad_norm": 1.9921875, "learning_rate": 1.7587822402558837e-05, "loss": 1.0309484481811524, "step": 1525, "token_acc": 0.6798862358621602 }, { "epoch": 0.26548672566371684, "grad_norm": 1.8984375, "learning_rate": 1.756910047947926e-05, "loss": 1.045750045776367, "step": 1530, "token_acc": 0.6779168647335341 }, { "epoch": 0.2663543293423564, "grad_norm": 2.078125, "learning_rate": 1.755031622969862e-05, "loss": 1.0056123733520508, "step": 1535, "token_acc": 0.6844262847741953 }, { "epoch": 0.267221933020996, "grad_norm": 1.875, "learning_rate": 1.7531469807893196e-05, "loss": 1.0222766876220704, "step": 1540, "token_acc": 0.6819466963244851 }, { "epoch": 0.2680895366996356, "grad_norm": 1.9375, "learning_rate": 1.751256136925122e-05, "loss": 1.0223438262939453, "step": 1545, "token_acc": 0.6796833846239153 }, { "epoch": 0.26895714037827523, "grad_norm": 1.9765625, "learning_rate": 1.749359106947158e-05, "loss": 1.0395459175109862, "step": 1550, "token_acc": 0.6780158536915294 }, { "epoch": 0.2698247440569148, "grad_norm": 2.09375, "learning_rate": 1.7474559064762575e-05, "loss": 1.0296743392944336, "step": 1555, "token_acc": 0.6756519151698767 }, { "epoch": 0.2706923477355544, "grad_norm": 1.9453125, "learning_rate": 1.745546551184058e-05, "loss": 1.016903781890869, "step": 1560, "token_acc": 0.6835704451583295 }, { "epoch": 0.271559951414194, "grad_norm": 2.046875, "learning_rate": 1.74363105679288e-05, "loss": 1.020066261291504, "step": 1565, "token_acc": 0.679187746898607 }, { "epoch": 0.2724275550928336, "grad_norm": 2.0, "learning_rate": 1.7417094390755936e-05, "loss": 1.0340109825134278, "step": 1570, "token_acc": 0.677778992239589 }, { "epoch": 0.2732951587714732, "grad_norm": 1.8203125, "learning_rate": 1.739781713855492e-05, "loss": 1.0160035133361816, "step": 1575, "token_acc": 0.681804898783274 }, { "epoch": 0.2741627624501128, "grad_norm": 1.9765625, "learning_rate": 1.7378478970061596e-05, "loss": 1.024774169921875, "step": 1580, "token_acc": 0.680820860552937 }, { "epoch": 0.27503036612875237, "grad_norm": 1.8671875, "learning_rate": 1.735908004451341e-05, "loss": 1.0384547233581543, "step": 1585, "token_acc": 0.6769406692778844 }, { "epoch": 0.275897969807392, "grad_norm": 1.8671875, "learning_rate": 1.7339620521648107e-05, "loss": 1.027394962310791, "step": 1590, "token_acc": 0.6820617131309908 }, { "epoch": 0.2767655734860316, "grad_norm": 1.90625, "learning_rate": 1.7320100561702408e-05, "loss": 1.0266061782836915, "step": 1595, "token_acc": 0.6778099499868386 }, { "epoch": 0.2776331771646712, "grad_norm": 2.046875, "learning_rate": 1.73005203254107e-05, "loss": 1.0057987213134765, "step": 1600, "token_acc": 0.6856035977459904 }, { "epoch": 0.27850078084331076, "grad_norm": 1.921875, "learning_rate": 1.728087997400371e-05, "loss": 1.0396166801452638, "step": 1605, "token_acc": 0.6768935264496704 }, { "epoch": 0.2793683845219504, "grad_norm": 1.765625, "learning_rate": 1.726117966920716e-05, "loss": 1.0311265945434571, "step": 1610, "token_acc": 0.6808383077444412 }, { "epoch": 0.28023598820059, "grad_norm": 1.8671875, "learning_rate": 1.7241419573240463e-05, "loss": 1.0097067832946778, "step": 1615, "token_acc": 0.683870040253019 }, { "epoch": 0.28110359187922956, "grad_norm": 1.96875, "learning_rate": 1.7221599848815374e-05, "loss": 1.0008836746215821, "step": 1620, "token_acc": 0.6870527000650618 }, { "epoch": 0.28197119555786915, "grad_norm": 1.8828125, "learning_rate": 1.7201720659134642e-05, "loss": 1.0405941009521484, "step": 1625, "token_acc": 0.6768849218838519 }, { "epoch": 0.2828387992365088, "grad_norm": 1.796875, "learning_rate": 1.7181782167890678e-05, "loss": 1.0066216468811036, "step": 1630, "token_acc": 0.6848891318550914 }, { "epoch": 0.28370640291514837, "grad_norm": 1.9375, "learning_rate": 1.716178453926421e-05, "loss": 1.046470832824707, "step": 1635, "token_acc": 0.6714027873902482 }, { "epoch": 0.28457400659378795, "grad_norm": 1.9140625, "learning_rate": 1.7141727937922912e-05, "loss": 1.0199688911437987, "step": 1640, "token_acc": 0.6823405115629932 }, { "epoch": 0.28544161027242754, "grad_norm": 1.8828125, "learning_rate": 1.712161252902007e-05, "loss": 1.044092559814453, "step": 1645, "token_acc": 0.6758528428093645 }, { "epoch": 0.2863092139510672, "grad_norm": 1.859375, "learning_rate": 1.7101438478193212e-05, "loss": 1.0233346939086914, "step": 1650, "token_acc": 0.6805489760838082 }, { "epoch": 0.28717681762970676, "grad_norm": 1.9375, "learning_rate": 1.708120595156274e-05, "loss": 1.0456744194030763, "step": 1655, "token_acc": 0.6750241212956581 }, { "epoch": 0.28804442130834634, "grad_norm": 2.03125, "learning_rate": 1.706091511573057e-05, "loss": 1.0319430351257324, "step": 1660, "token_acc": 0.6777862117640792 }, { "epoch": 0.2889120249869859, "grad_norm": 1.9296875, "learning_rate": 1.704056613777876e-05, "loss": 1.0204211235046388, "step": 1665, "token_acc": 0.6796524738028916 }, { "epoch": 0.28977962866562557, "grad_norm": 1.9375, "learning_rate": 1.7020159185268123e-05, "loss": 1.0458597183227538, "step": 1670, "token_acc": 0.6737207077953132 }, { "epoch": 0.29064723234426515, "grad_norm": 2.03125, "learning_rate": 1.6999694426236862e-05, "loss": 1.0375800132751465, "step": 1675, "token_acc": 0.6758920495200551 }, { "epoch": 0.29151483602290473, "grad_norm": 1.953125, "learning_rate": 1.697917202919918e-05, "loss": 1.0144439697265626, "step": 1680, "token_acc": 0.679975894834207 }, { "epoch": 0.2923824397015443, "grad_norm": 1.9609375, "learning_rate": 1.6958592163143884e-05, "loss": 1.0309619903564453, "step": 1685, "token_acc": 0.678642271573428 }, { "epoch": 0.29325004338018396, "grad_norm": 1.953125, "learning_rate": 1.6937954997533016e-05, "loss": 1.0367056846618652, "step": 1690, "token_acc": 0.6776543556428868 }, { "epoch": 0.29411764705882354, "grad_norm": 1.8203125, "learning_rate": 1.691726070230043e-05, "loss": 1.0386839866638184, "step": 1695, "token_acc": 0.6778316736701301 }, { "epoch": 0.2949852507374631, "grad_norm": 2.03125, "learning_rate": 1.689650944785041e-05, "loss": 1.0176087379455567, "step": 1700, "token_acc": 0.6798760737924237 }, { "epoch": 0.2958528544161027, "grad_norm": 1.796875, "learning_rate": 1.6875701405056262e-05, "loss": 1.006351852416992, "step": 1705, "token_acc": 0.6863717464315701 }, { "epoch": 0.29672045809474235, "grad_norm": 1.7890625, "learning_rate": 1.685483674525891e-05, "loss": 1.0238887786865234, "step": 1710, "token_acc": 0.6787703215736074 }, { "epoch": 0.29758806177338193, "grad_norm": 2.046875, "learning_rate": 1.6833915640265485e-05, "loss": 1.0253664016723634, "step": 1715, "token_acc": 0.6786322245940176 }, { "epoch": 0.2984556654520215, "grad_norm": 1.890625, "learning_rate": 1.6812938262347907e-05, "loss": 1.0375401496887207, "step": 1720, "token_acc": 0.677038246903498 }, { "epoch": 0.2993232691306611, "grad_norm": 1.8828125, "learning_rate": 1.6791904784241458e-05, "loss": 1.0252004623413087, "step": 1725, "token_acc": 0.6804137056166104 }, { "epoch": 0.30019087280930074, "grad_norm": 1.921875, "learning_rate": 1.6770815379143385e-05, "loss": 1.010302734375, "step": 1730, "token_acc": 0.6837099330986861 }, { "epoch": 0.3010584764879403, "grad_norm": 1.9609375, "learning_rate": 1.674967022071144e-05, "loss": 1.0301790237426758, "step": 1735, "token_acc": 0.6772626037659445 }, { "epoch": 0.3019260801665799, "grad_norm": 1.8515625, "learning_rate": 1.6728469483062486e-05, "loss": 0.9938658714294434, "step": 1740, "token_acc": 0.6898220909033759 }, { "epoch": 0.3027936838452195, "grad_norm": 1.9453125, "learning_rate": 1.6707213340771028e-05, "loss": 1.0314199447631835, "step": 1745, "token_acc": 0.6770383134840673 }, { "epoch": 0.3036612875238591, "grad_norm": 1.8046875, "learning_rate": 1.6685901968867813e-05, "loss": 1.0129788398742676, "step": 1750, "token_acc": 0.6820457843611499 }, { "epoch": 0.3045288912024987, "grad_norm": 1.9296875, "learning_rate": 1.6664535542838352e-05, "loss": 1.002908420562744, "step": 1755, "token_acc": 0.6864051119594943 }, { "epoch": 0.3053964948811383, "grad_norm": 1.8671875, "learning_rate": 1.6643114238621495e-05, "loss": 1.034525489807129, "step": 1760, "token_acc": 0.6801218196814923 }, { "epoch": 0.3062640985597779, "grad_norm": 2.03125, "learning_rate": 1.6621638232607984e-05, "loss": 1.025135612487793, "step": 1765, "token_acc": 0.6795281498360474 }, { "epoch": 0.3071317022384175, "grad_norm": 1.984375, "learning_rate": 1.6600107701638993e-05, "loss": 1.035383129119873, "step": 1770, "token_acc": 0.6749825634422447 }, { "epoch": 0.3079993059170571, "grad_norm": 1.9296875, "learning_rate": 1.6578522823004666e-05, "loss": 0.9947221755981446, "step": 1775, "token_acc": 0.6872965042273526 }, { "epoch": 0.3088669095956967, "grad_norm": 1.8671875, "learning_rate": 1.6556883774442675e-05, "loss": 1.0022805213928223, "step": 1780, "token_acc": 0.6862549392253107 }, { "epoch": 0.30973451327433627, "grad_norm": 1.9609375, "learning_rate": 1.653519073413675e-05, "loss": 1.0279296875, "step": 1785, "token_acc": 0.6769972826086956 }, { "epoch": 0.3106021169529759, "grad_norm": 1.9296875, "learning_rate": 1.65134438807152e-05, "loss": 1.0212496757507323, "step": 1790, "token_acc": 0.679449427274692 }, { "epoch": 0.3114697206316155, "grad_norm": 1.7265625, "learning_rate": 1.649164339324945e-05, "loss": 1.006572437286377, "step": 1795, "token_acc": 0.6861015265579256 }, { "epoch": 0.31233732431025507, "grad_norm": 1.859375, "learning_rate": 1.646978945125257e-05, "loss": 1.0250924110412598, "step": 1800, "token_acc": 0.6786020029623188 }, { "epoch": 0.31320492798889465, "grad_norm": 2.0, "learning_rate": 1.6447882234677796e-05, "loss": 1.0435279846191405, "step": 1805, "token_acc": 0.6758889509765172 }, { "epoch": 0.3140725316675343, "grad_norm": 1.890625, "learning_rate": 1.6425921923917042e-05, "loss": 1.0279791831970215, "step": 1810, "token_acc": 0.6805610242902337 }, { "epoch": 0.3149401353461739, "grad_norm": 1.90625, "learning_rate": 1.6403908699799423e-05, "loss": 1.02548828125, "step": 1815, "token_acc": 0.6779067427037907 }, { "epoch": 0.31580773902481346, "grad_norm": 2.015625, "learning_rate": 1.6381842743589765e-05, "loss": 1.0200424194335938, "step": 1820, "token_acc": 0.6822118412765064 }, { "epoch": 0.31667534270345304, "grad_norm": 1.8203125, "learning_rate": 1.635972423698709e-05, "loss": 1.0166802406311035, "step": 1825, "token_acc": 0.6827727138286145 }, { "epoch": 0.3175429463820927, "grad_norm": 1.921875, "learning_rate": 1.6337553362123165e-05, "loss": 1.0155767440795898, "step": 1830, "token_acc": 0.6837214270455031 }, { "epoch": 0.31841055006073227, "grad_norm": 1.8984375, "learning_rate": 1.6315330301560956e-05, "loss": 1.0089836120605469, "step": 1835, "token_acc": 0.6831295389068122 }, { "epoch": 0.31927815373937185, "grad_norm": 1.9375, "learning_rate": 1.6293055238293155e-05, "loss": 1.0108304977416993, "step": 1840, "token_acc": 0.6825737553161517 }, { "epoch": 0.32014575741801143, "grad_norm": 1.9453125, "learning_rate": 1.6270728355740658e-05, "loss": 1.0052438735961915, "step": 1845, "token_acc": 0.6869656992084433 }, { "epoch": 0.3210133610966511, "grad_norm": 1.984375, "learning_rate": 1.6248349837751064e-05, "loss": 1.0119807243347168, "step": 1850, "token_acc": 0.6814690154990364 }, { "epoch": 0.32188096477529066, "grad_norm": 1.9453125, "learning_rate": 1.6225919868597154e-05, "loss": 1.0213706970214844, "step": 1855, "token_acc": 0.6794819414937081 }, { "epoch": 0.32274856845393024, "grad_norm": 1.96875, "learning_rate": 1.620343863297538e-05, "loss": 0.9990407943725585, "step": 1860, "token_acc": 0.6861129568106312 }, { "epoch": 0.3236161721325698, "grad_norm": 2.0, "learning_rate": 1.6180906316004336e-05, "loss": 1.0262950897216796, "step": 1865, "token_acc": 0.6775860676697801 }, { "epoch": 0.32448377581120946, "grad_norm": 2.078125, "learning_rate": 1.615832310322324e-05, "loss": 1.036133098602295, "step": 1870, "token_acc": 0.6766465480728505 }, { "epoch": 0.32535137948984905, "grad_norm": 1.8203125, "learning_rate": 1.6135689180590404e-05, "loss": 1.020677089691162, "step": 1875, "token_acc": 0.6793250062916407 }, { "epoch": 0.32621898316848863, "grad_norm": 1.8515625, "learning_rate": 1.6113004734481704e-05, "loss": 1.0076414108276368, "step": 1880, "token_acc": 0.6839123609309945 }, { "epoch": 0.3270865868471282, "grad_norm": 1.9375, "learning_rate": 1.609026995168904e-05, "loss": 1.0311081886291504, "step": 1885, "token_acc": 0.6782416456600568 }, { "epoch": 0.32795419052576785, "grad_norm": 1.953125, "learning_rate": 1.6067485019418814e-05, "loss": 1.0099788665771485, "step": 1890, "token_acc": 0.6829244908301488 }, { "epoch": 0.32882179420440744, "grad_norm": 1.890625, "learning_rate": 1.6044650125290365e-05, "loss": 1.0263484001159668, "step": 1895, "token_acc": 0.6801680694975478 }, { "epoch": 0.329689397883047, "grad_norm": 1.8671875, "learning_rate": 1.6021765457334444e-05, "loss": 1.0163857460021972, "step": 1900, "token_acc": 0.6806223824561879 }, { "epoch": 0.3305570015616866, "grad_norm": 1.8125, "learning_rate": 1.5998831203991648e-05, "loss": 1.0088854789733888, "step": 1905, "token_acc": 0.6855646039732352 }, { "epoch": 0.33142460524032624, "grad_norm": 1.8046875, "learning_rate": 1.5975847554110888e-05, "loss": 0.9952527999877929, "step": 1910, "token_acc": 0.6883125788578638 }, { "epoch": 0.3322922089189658, "grad_norm": 1.8359375, "learning_rate": 1.595281469694782e-05, "loss": 1.0266911506652832, "step": 1915, "token_acc": 0.6797566371681416 }, { "epoch": 0.3331598125976054, "grad_norm": 1.90625, "learning_rate": 1.592973282216329e-05, "loss": 1.0018574714660644, "step": 1920, "token_acc": 0.6860606854970837 }, { "epoch": 0.334027416276245, "grad_norm": 1.9453125, "learning_rate": 1.590660211982177e-05, "loss": 1.0108092308044434, "step": 1925, "token_acc": 0.6822820656674948 }, { "epoch": 0.33489501995488463, "grad_norm": 2.015625, "learning_rate": 1.5883422780389806e-05, "loss": 1.0258635520935058, "step": 1930, "token_acc": 0.6778978538515823 }, { "epoch": 0.3357626236335242, "grad_norm": 1.9453125, "learning_rate": 1.5860194994734427e-05, "loss": 1.021854782104492, "step": 1935, "token_acc": 0.6808329178366179 }, { "epoch": 0.3366302273121638, "grad_norm": 1.9453125, "learning_rate": 1.5836918954121588e-05, "loss": 1.0331063270568848, "step": 1940, "token_acc": 0.6784242872199181 }, { "epoch": 0.3374978309908034, "grad_norm": 1.8828125, "learning_rate": 1.58135948502146e-05, "loss": 1.0210276603698731, "step": 1945, "token_acc": 0.6840329583182118 }, { "epoch": 0.338365434669443, "grad_norm": 1.8046875, "learning_rate": 1.579022287507254e-05, "loss": 1.0260606765747071, "step": 1950, "token_acc": 0.6783528979227396 }, { "epoch": 0.3392330383480826, "grad_norm": 1.9765625, "learning_rate": 1.5766803221148676e-05, "loss": 0.9952493667602539, "step": 1955, "token_acc": 0.6859501834760369 }, { "epoch": 0.3401006420267222, "grad_norm": 1.9609375, "learning_rate": 1.574333608128887e-05, "loss": 1.0229947090148925, "step": 1960, "token_acc": 0.6816778645360451 }, { "epoch": 0.34096824570536177, "grad_norm": 1.8984375, "learning_rate": 1.5719821648730014e-05, "loss": 1.0026690483093261, "step": 1965, "token_acc": 0.6833949856144678 }, { "epoch": 0.3418358493840014, "grad_norm": 1.7578125, "learning_rate": 1.5696260117098424e-05, "loss": 0.9994998931884765, "step": 1970, "token_acc": 0.6882951486903434 }, { "epoch": 0.342703453062641, "grad_norm": 1.953125, "learning_rate": 1.5672651680408237e-05, "loss": 1.0034085273742677, "step": 1975, "token_acc": 0.6842928918540483 }, { "epoch": 0.3435710567412806, "grad_norm": 1.8828125, "learning_rate": 1.5648996533059824e-05, "loss": 1.0039892196655273, "step": 1980, "token_acc": 0.6863158175442339 }, { "epoch": 0.34443866041992016, "grad_norm": 1.8125, "learning_rate": 1.5625294869838203e-05, "loss": 1.0203709602355957, "step": 1985, "token_acc": 0.6797555567287894 }, { "epoch": 0.3453062640985598, "grad_norm": 1.9765625, "learning_rate": 1.5601546885911406e-05, "loss": 1.021955966949463, "step": 1990, "token_acc": 0.6785332666062516 }, { "epoch": 0.3461738677771994, "grad_norm": 1.96875, "learning_rate": 1.5577752776828892e-05, "loss": 1.0178564071655274, "step": 1995, "token_acc": 0.6787890301656874 }, { "epoch": 0.34704147145583897, "grad_norm": 2.03125, "learning_rate": 1.555391273851993e-05, "loss": 0.9952051162719726, "step": 2000, "token_acc": 0.6859772527441359 }, { "epoch": 0.34790907513447855, "grad_norm": 2.046875, "learning_rate": 1.553002696729198e-05, "loss": 1.0093853950500489, "step": 2005, "token_acc": 0.6833726738760498 }, { "epoch": 0.3487766788131182, "grad_norm": 2.015625, "learning_rate": 1.55060956598291e-05, "loss": 1.0151101112365724, "step": 2010, "token_acc": 0.6831491047292776 }, { "epoch": 0.3496442824917578, "grad_norm": 1.9609375, "learning_rate": 1.5482119013190296e-05, "loss": 1.0173629760742187, "step": 2015, "token_acc": 0.6829174613265523 }, { "epoch": 0.35051188617039736, "grad_norm": 1.9296875, "learning_rate": 1.5458097224807916e-05, "loss": 1.019275188446045, "step": 2020, "token_acc": 0.6805337208534249 }, { "epoch": 0.35137948984903694, "grad_norm": 1.8671875, "learning_rate": 1.5434030492486023e-05, "loss": 1.0199106216430665, "step": 2025, "token_acc": 0.6799649276633055 }, { "epoch": 0.3522470935276766, "grad_norm": 1.84375, "learning_rate": 1.5409919014398762e-05, "loss": 1.0161332130432128, "step": 2030, "token_acc": 0.682195193046612 }, { "epoch": 0.35311469720631616, "grad_norm": 1.84375, "learning_rate": 1.5385762989088738e-05, "loss": 1.027943992614746, "step": 2035, "token_acc": 0.676602066311027 }, { "epoch": 0.35398230088495575, "grad_norm": 1.7890625, "learning_rate": 1.5361562615465366e-05, "loss": 1.0008016586303712, "step": 2040, "token_acc": 0.6849210596735349 }, { "epoch": 0.35484990456359533, "grad_norm": 1.859375, "learning_rate": 1.5337318092803243e-05, "loss": 1.0304694175720215, "step": 2045, "token_acc": 0.6774736297159127 }, { "epoch": 0.35571750824223497, "grad_norm": 1.8828125, "learning_rate": 1.5313029620740506e-05, "loss": 1.0220866203308105, "step": 2050, "token_acc": 0.6807486487213273 }, { "epoch": 0.35658511192087455, "grad_norm": 1.9453125, "learning_rate": 1.5288697399277182e-05, "loss": 1.019200611114502, "step": 2055, "token_acc": 0.6806197591915717 }, { "epoch": 0.35745271559951414, "grad_norm": 1.796875, "learning_rate": 1.526432162877356e-05, "loss": 1.013671875, "step": 2060, "token_acc": 0.6828907213817285 }, { "epoch": 0.3583203192781537, "grad_norm": 1.921875, "learning_rate": 1.5239902509948514e-05, "loss": 1.0091094017028808, "step": 2065, "token_acc": 0.6834054718392647 }, { "epoch": 0.35918792295679336, "grad_norm": 1.7578125, "learning_rate": 1.521544024387787e-05, "loss": 1.0055926322937012, "step": 2070, "token_acc": 0.6828146538012936 }, { "epoch": 0.36005552663543294, "grad_norm": 1.7578125, "learning_rate": 1.5190935031992742e-05, "loss": 1.0013408660888672, "step": 2075, "token_acc": 0.6865663839408236 }, { "epoch": 0.3609231303140725, "grad_norm": 1.96875, "learning_rate": 1.5166387076077876e-05, "loss": 1.014689826965332, "step": 2080, "token_acc": 0.6808145941313308 }, { "epoch": 0.3617907339927121, "grad_norm": 1.875, "learning_rate": 1.5141796578269986e-05, "loss": 1.0103944778442382, "step": 2085, "token_acc": 0.6806936577861687 }, { "epoch": 0.36265833767135175, "grad_norm": 1.90625, "learning_rate": 1.5117163741056092e-05, "loss": 1.0004392623901368, "step": 2090, "token_acc": 0.6851040904004753 }, { "epoch": 0.36352594134999133, "grad_norm": 1.8671875, "learning_rate": 1.5092488767271858e-05, "loss": 1.004606342315674, "step": 2095, "token_acc": 0.682853725269135 }, { "epoch": 0.3643935450286309, "grad_norm": 1.9375, "learning_rate": 1.5067771860099905e-05, "loss": 0.9848871231079102, "step": 2100, "token_acc": 0.6914043831501331 }, { "epoch": 0.3652611487072705, "grad_norm": 1.9140625, "learning_rate": 1.5043013223068155e-05, "loss": 1.0125656127929688, "step": 2105, "token_acc": 0.6832955602426212 }, { "epoch": 0.36612875238591014, "grad_norm": 1.8828125, "learning_rate": 1.501821306004815e-05, "loss": 1.0106427192687988, "step": 2110, "token_acc": 0.6838881419006099 }, { "epoch": 0.3669963560645497, "grad_norm": 1.8203125, "learning_rate": 1.4993371575253368e-05, "loss": 1.0103277206420898, "step": 2115, "token_acc": 0.6830820506764292 }, { "epoch": 0.3678639597431893, "grad_norm": 1.90625, "learning_rate": 1.496848897323755e-05, "loss": 0.9989145278930665, "step": 2120, "token_acc": 0.6848548395882129 }, { "epoch": 0.3687315634218289, "grad_norm": 1.9453125, "learning_rate": 1.4943565458892999e-05, "loss": 1.0156753540039063, "step": 2125, "token_acc": 0.683586704457614 }, { "epoch": 0.36959916710046853, "grad_norm": 1.828125, "learning_rate": 1.4918601237448925e-05, "loss": 1.0110110282897948, "step": 2130, "token_acc": 0.6824813659671195 }, { "epoch": 0.3704667707791081, "grad_norm": 1.90625, "learning_rate": 1.4893596514469718e-05, "loss": 1.0106982231140136, "step": 2135, "token_acc": 0.6820436574981416 }, { "epoch": 0.3713343744577477, "grad_norm": 1.859375, "learning_rate": 1.4868551495853278e-05, "loss": 1.0084819793701172, "step": 2140, "token_acc": 0.6837009642055211 }, { "epoch": 0.3722019781363873, "grad_norm": 1.9296875, "learning_rate": 1.4843466387829317e-05, "loss": 1.0337956428527832, "step": 2145, "token_acc": 0.6756529177470663 }, { "epoch": 0.3730695818150269, "grad_norm": 1.7890625, "learning_rate": 1.4818341396957651e-05, "loss": 1.010234260559082, "step": 2150, "token_acc": 0.6839618937946557 }, { "epoch": 0.3739371854936665, "grad_norm": 3.484375, "learning_rate": 1.4793176730126512e-05, "loss": 0.9982177734375, "step": 2155, "token_acc": 0.6882951820647545 }, { "epoch": 0.3748047891723061, "grad_norm": 1.8046875, "learning_rate": 1.4767972594550832e-05, "loss": 1.0000919342041015, "step": 2160, "token_acc": 0.685829937736179 }, { "epoch": 0.37567239285094567, "grad_norm": 1.796875, "learning_rate": 1.4742729197770551e-05, "loss": 1.0299704551696778, "step": 2165, "token_acc": 0.6772884904796179 }, { "epoch": 0.3765399965295853, "grad_norm": 1.9765625, "learning_rate": 1.4717446747648894e-05, "loss": 1.016530704498291, "step": 2170, "token_acc": 0.6815982696795492 }, { "epoch": 0.3774076002082249, "grad_norm": 2.03125, "learning_rate": 1.4692125452370664e-05, "loss": 1.0197928428649903, "step": 2175, "token_acc": 0.6793494519840083 }, { "epoch": 0.3782752038868645, "grad_norm": 1.8671875, "learning_rate": 1.4666765520440534e-05, "loss": 1.0177095413208008, "step": 2180, "token_acc": 0.6810032017075773 }, { "epoch": 0.37914280756550406, "grad_norm": 1.9609375, "learning_rate": 1.464136716068132e-05, "loss": 1.0126147270202637, "step": 2185, "token_acc": 0.683709293410274 }, { "epoch": 0.3800104112441437, "grad_norm": 1.84375, "learning_rate": 1.461593058223227e-05, "loss": 1.021070957183838, "step": 2190, "token_acc": 0.679652122955623 }, { "epoch": 0.3808780149227833, "grad_norm": 1.8671875, "learning_rate": 1.4590455994547337e-05, "loss": 1.001976203918457, "step": 2195, "token_acc": 0.6833014477415503 }, { "epoch": 0.38174561860142286, "grad_norm": 1.8671875, "learning_rate": 1.456494360739346e-05, "loss": 0.9893196105957032, "step": 2200, "token_acc": 0.6892311085988446 }, { "epoch": 0.38261322228006245, "grad_norm": 2.03125, "learning_rate": 1.4539393630848829e-05, "loss": 0.9814781188964844, "step": 2205, "token_acc": 0.6899440949405221 }, { "epoch": 0.3834808259587021, "grad_norm": 1.9921875, "learning_rate": 1.451380627530115e-05, "loss": 1.011701488494873, "step": 2210, "token_acc": 0.6809758515295867 }, { "epoch": 0.38434842963734167, "grad_norm": 1.8828125, "learning_rate": 1.4488181751445939e-05, "loss": 1.0211992263793945, "step": 2215, "token_acc": 0.6797978865156532 }, { "epoch": 0.38521603331598125, "grad_norm": 2.0, "learning_rate": 1.4462520270284756e-05, "loss": 0.9845295906066894, "step": 2220, "token_acc": 0.6868465406909026 }, { "epoch": 0.38608363699462084, "grad_norm": 1.859375, "learning_rate": 1.4436822043123485e-05, "loss": 1.0249157905578614, "step": 2225, "token_acc": 0.6786562283760498 }, { "epoch": 0.3869512406732605, "grad_norm": 1.765625, "learning_rate": 1.441108728157059e-05, "loss": 1.0030797004699707, "step": 2230, "token_acc": 0.684765917234319 }, { "epoch": 0.38781884435190006, "grad_norm": 1.96875, "learning_rate": 1.4385316197535373e-05, "loss": 1.0158026695251465, "step": 2235, "token_acc": 0.6832608666746447 }, { "epoch": 0.38868644803053964, "grad_norm": 1.84375, "learning_rate": 1.4359509003226221e-05, "loss": 1.0172318458557128, "step": 2240, "token_acc": 0.6808322441812877 }, { "epoch": 0.3895540517091792, "grad_norm": 1.9453125, "learning_rate": 1.4333665911148881e-05, "loss": 0.9851541519165039, "step": 2245, "token_acc": 0.6889603544215962 }, { "epoch": 0.39042165538781887, "grad_norm": 1.953125, "learning_rate": 1.4307787134104682e-05, "loss": 1.014187717437744, "step": 2250, "token_acc": 0.683114625160409 }, { "epoch": 0.39128925906645845, "grad_norm": 1.8203125, "learning_rate": 1.42818728851888e-05, "loss": 1.0081872940063477, "step": 2255, "token_acc": 0.6823870250820193 }, { "epoch": 0.39215686274509803, "grad_norm": 1.859375, "learning_rate": 1.4255923377788497e-05, "loss": 1.0085988998413087, "step": 2260, "token_acc": 0.6840598070654684 }, { "epoch": 0.3930244664237376, "grad_norm": 1.859375, "learning_rate": 1.4229938825581373e-05, "loss": 1.0013799667358398, "step": 2265, "token_acc": 0.6847899527045825 }, { "epoch": 0.39389207010237726, "grad_norm": 1.890625, "learning_rate": 1.4203919442533597e-05, "loss": 1.018793773651123, "step": 2270, "token_acc": 0.681686886192952 }, { "epoch": 0.39475967378101684, "grad_norm": 1.9921875, "learning_rate": 1.4177865442898137e-05, "loss": 1.0064517974853515, "step": 2275, "token_acc": 0.6819670370966876 }, { "epoch": 0.3956272774596564, "grad_norm": 1.9609375, "learning_rate": 1.4151777041213021e-05, "loss": 0.9828666687011719, "step": 2280, "token_acc": 0.6887780548628429 }, { "epoch": 0.396494881138296, "grad_norm": 1.96875, "learning_rate": 1.4125654452299553e-05, "loss": 1.0092188835144043, "step": 2285, "token_acc": 0.6844631486295059 }, { "epoch": 0.39736248481693565, "grad_norm": 1.8828125, "learning_rate": 1.4099497891260538e-05, "loss": 0.9924700736999512, "step": 2290, "token_acc": 0.6873599312908464 }, { "epoch": 0.39823008849557523, "grad_norm": 2.0, "learning_rate": 1.4073307573478528e-05, "loss": 1.0148592948913575, "step": 2295, "token_acc": 0.6811773236297232 }, { "epoch": 0.3990976921742148, "grad_norm": 1.9765625, "learning_rate": 1.4047083714614038e-05, "loss": 1.0003128051757812, "step": 2300, "token_acc": 0.6852241329539362 }, { "epoch": 0.3999652958528544, "grad_norm": 1.984375, "learning_rate": 1.4020826530603775e-05, "loss": 0.9960025787353516, "step": 2305, "token_acc": 0.6852598031645303 }, { "epoch": 0.40083289953149404, "grad_norm": 2.0, "learning_rate": 1.399453623765885e-05, "loss": 1.0148781776428222, "step": 2310, "token_acc": 0.684109947643979 }, { "epoch": 0.4017005032101336, "grad_norm": 1.90625, "learning_rate": 1.3968213052263014e-05, "loss": 1.012251091003418, "step": 2315, "token_acc": 0.6833315462148831 }, { "epoch": 0.4025681068887732, "grad_norm": 1.921875, "learning_rate": 1.3941857191170857e-05, "loss": 0.9941699028015136, "step": 2320, "token_acc": 0.6864559695983815 }, { "epoch": 0.4034357105674128, "grad_norm": 1.96875, "learning_rate": 1.3915468871406044e-05, "loss": 1.0085437774658204, "step": 2325, "token_acc": 0.6833488248572567 }, { "epoch": 0.4043033142460524, "grad_norm": 1.9296875, "learning_rate": 1.38890483102595e-05, "loss": 1.0144371032714843, "step": 2330, "token_acc": 0.68039780521262 }, { "epoch": 0.405170917924692, "grad_norm": 1.75, "learning_rate": 1.3862595725287653e-05, "loss": 0.9994147300720215, "step": 2335, "token_acc": 0.687611521794545 }, { "epoch": 0.4060385216033316, "grad_norm": 1.9140625, "learning_rate": 1.3836111334310622e-05, "loss": 0.9963122367858886, "step": 2340, "token_acc": 0.685745011351416 }, { "epoch": 0.4069061252819712, "grad_norm": 1.859375, "learning_rate": 1.3809595355410424e-05, "loss": 1.0122366905212403, "step": 2345, "token_acc": 0.683117204922772 }, { "epoch": 0.4077737289606108, "grad_norm": 1.890625, "learning_rate": 1.3783048006929185e-05, "loss": 1.0144343376159668, "step": 2350, "token_acc": 0.6814033279539999 }, { "epoch": 0.4086413326392504, "grad_norm": 1.8984375, "learning_rate": 1.375646950746734e-05, "loss": 1.0156232833862304, "step": 2355, "token_acc": 0.6830444078275435 }, { "epoch": 0.40950893631789, "grad_norm": 1.8984375, "learning_rate": 1.3729860075881827e-05, "loss": 1.034743595123291, "step": 2360, "token_acc": 0.6783182628209359 }, { "epoch": 0.41037653999652957, "grad_norm": 1.9375, "learning_rate": 1.3703219931284304e-05, "loss": 0.9984539031982422, "step": 2365, "token_acc": 0.6839781943890441 }, { "epoch": 0.4112441436751692, "grad_norm": 1.90625, "learning_rate": 1.3676549293039316e-05, "loss": 1.0032421112060548, "step": 2370, "token_acc": 0.6834760671844918 }, { "epoch": 0.4121117473538088, "grad_norm": 1.7890625, "learning_rate": 1.3649848380762513e-05, "loss": 0.9850346565246582, "step": 2375, "token_acc": 0.6905733974775712 }, { "epoch": 0.41297935103244837, "grad_norm": 1.859375, "learning_rate": 1.3623117414318827e-05, "loss": 1.0028590202331542, "step": 2380, "token_acc": 0.6836534850029511 }, { "epoch": 0.41384695471108796, "grad_norm": 1.859375, "learning_rate": 1.3596356613820669e-05, "loss": 1.013303279876709, "step": 2385, "token_acc": 0.6805802728792536 }, { "epoch": 0.4147145583897276, "grad_norm": 1.8828125, "learning_rate": 1.3569566199626114e-05, "loss": 1.0094331741333007, "step": 2390, "token_acc": 0.685405305236406 }, { "epoch": 0.4155821620683672, "grad_norm": 1.9140625, "learning_rate": 1.3542746392337087e-05, "loss": 1.005965805053711, "step": 2395, "token_acc": 0.682434716756596 }, { "epoch": 0.41644976574700676, "grad_norm": 1.75, "learning_rate": 1.3515897412797547e-05, "loss": 0.9940034866333007, "step": 2400, "token_acc": 0.6875033593120129 }, { "epoch": 0.41731736942564635, "grad_norm": 1.90625, "learning_rate": 1.348901948209167e-05, "loss": 0.9850317955017089, "step": 2405, "token_acc": 0.686592845447229 }, { "epoch": 0.418184973104286, "grad_norm": 1.953125, "learning_rate": 1.3462112821542016e-05, "loss": 1.0118427276611328, "step": 2410, "token_acc": 0.6816512666869937 }, { "epoch": 0.41905257678292557, "grad_norm": 1.875, "learning_rate": 1.3435177652707735e-05, "loss": 1.0028743743896484, "step": 2415, "token_acc": 0.6860446549751178 }, { "epoch": 0.41992018046156515, "grad_norm": 1.8828125, "learning_rate": 1.3408214197382705e-05, "loss": 0.9918471336364746, "step": 2420, "token_acc": 0.6874362288279708 }, { "epoch": 0.42078778414020473, "grad_norm": 1.84375, "learning_rate": 1.3381222677593737e-05, "loss": 1.0141358375549316, "step": 2425, "token_acc": 0.6807328527018983 }, { "epoch": 0.4216553878188444, "grad_norm": 1.84375, "learning_rate": 1.3354203315598733e-05, "loss": 1.0219820976257323, "step": 2430, "token_acc": 0.6813468119008437 }, { "epoch": 0.42252299149748396, "grad_norm": 1.96875, "learning_rate": 1.3327156333884856e-05, "loss": 1.0195876121520997, "step": 2435, "token_acc": 0.6805910377684181 }, { "epoch": 0.42339059517612354, "grad_norm": 1.8671875, "learning_rate": 1.33000819551667e-05, "loss": 1.0096649169921874, "step": 2440, "token_acc": 0.6815645521723036 }, { "epoch": 0.4242581988547631, "grad_norm": 1.9453125, "learning_rate": 1.3272980402384459e-05, "loss": 1.0119336128234864, "step": 2445, "token_acc": 0.68414329128903 }, { "epoch": 0.42512580253340276, "grad_norm": 1.90625, "learning_rate": 1.3245851898702083e-05, "loss": 1.004085636138916, "step": 2450, "token_acc": 0.6859707219637835 }, { "epoch": 0.42599340621204235, "grad_norm": 1.90625, "learning_rate": 1.3218696667505444e-05, "loss": 1.006967830657959, "step": 2455, "token_acc": 0.6833605995039316 }, { "epoch": 0.42686100989068193, "grad_norm": 1.984375, "learning_rate": 1.319151493240051e-05, "loss": 1.0127968788146973, "step": 2460, "token_acc": 0.6829119501118417 }, { "epoch": 0.4277286135693215, "grad_norm": 1.9453125, "learning_rate": 1.3164306917211475e-05, "loss": 1.0015942573547363, "step": 2465, "token_acc": 0.683394712251965 }, { "epoch": 0.42859621724796115, "grad_norm": 1.8359375, "learning_rate": 1.313707284597895e-05, "loss": 0.9921387672424317, "step": 2470, "token_acc": 0.6884706008353861 }, { "epoch": 0.42946382092660074, "grad_norm": 1.796875, "learning_rate": 1.3109812942958087e-05, "loss": 0.9937407493591308, "step": 2475, "token_acc": 0.6858287322723828 }, { "epoch": 0.4303314246052403, "grad_norm": 1.8125, "learning_rate": 1.308252743261675e-05, "loss": 1.0000595092773437, "step": 2480, "token_acc": 0.6844413945289899 }, { "epoch": 0.4311990282838799, "grad_norm": 1.890625, "learning_rate": 1.3055216539633668e-05, "loss": 0.9946840286254883, "step": 2485, "token_acc": 0.6857801388537539 }, { "epoch": 0.43206663196251954, "grad_norm": 1.7421875, "learning_rate": 1.302788048889657e-05, "loss": 0.9850924491882325, "step": 2490, "token_acc": 0.6912732362675458 }, { "epoch": 0.4329342356411591, "grad_norm": 1.8828125, "learning_rate": 1.3000519505500354e-05, "loss": 1.013066577911377, "step": 2495, "token_acc": 0.6841163491550963 }, { "epoch": 0.4338018393197987, "grad_norm": 1.90625, "learning_rate": 1.297313381474522e-05, "loss": 1.0059243202209474, "step": 2500, "token_acc": 0.682485376889968 }, { "epoch": 0.4346694429984383, "grad_norm": 1.8984375, "learning_rate": 1.2945723642134808e-05, "loss": 0.9933188438415528, "step": 2505, "token_acc": 0.6839858247063091 }, { "epoch": 0.43553704667707793, "grad_norm": 1.8359375, "learning_rate": 1.2918289213374362e-05, "loss": 1.0141347885131835, "step": 2510, "token_acc": 0.6845435525845792 }, { "epoch": 0.4364046503557175, "grad_norm": 1.8125, "learning_rate": 1.2890830754368855e-05, "loss": 1.0011796951293945, "step": 2515, "token_acc": 0.6849304174950298 }, { "epoch": 0.4372722540343571, "grad_norm": 1.859375, "learning_rate": 1.2863348491221129e-05, "loss": 1.004225254058838, "step": 2520, "token_acc": 0.6839080459770115 }, { "epoch": 0.4381398577129967, "grad_norm": 1.8828125, "learning_rate": 1.2835842650230046e-05, "loss": 1.005355167388916, "step": 2525, "token_acc": 0.6849887538762527 }, { "epoch": 0.4390074613916363, "grad_norm": 1.796875, "learning_rate": 1.2808313457888614e-05, "loss": 1.0048332214355469, "step": 2530, "token_acc": 0.6836279848033905 }, { "epoch": 0.4398750650702759, "grad_norm": 1.8515625, "learning_rate": 1.2780761140882123e-05, "loss": 1.0195894241333008, "step": 2535, "token_acc": 0.6795473179123936 }, { "epoch": 0.4407426687489155, "grad_norm": 1.9140625, "learning_rate": 1.2753185926086282e-05, "loss": 1.0192377090454101, "step": 2540, "token_acc": 0.6803299012123347 }, { "epoch": 0.4416102724275551, "grad_norm": 1.9921875, "learning_rate": 1.2725588040565344e-05, "loss": 1.005928134918213, "step": 2545, "token_acc": 0.6801644427607302 }, { "epoch": 0.4424778761061947, "grad_norm": 1.8984375, "learning_rate": 1.2697967711570243e-05, "loss": 1.003110980987549, "step": 2550, "token_acc": 0.6823795540443708 }, { "epoch": 0.4433454797848343, "grad_norm": 1.8203125, "learning_rate": 1.2670325166536726e-05, "loss": 1.000045108795166, "step": 2555, "token_acc": 0.6848798995377668 }, { "epoch": 0.4442130834634739, "grad_norm": 1.7890625, "learning_rate": 1.2642660633083467e-05, "loss": 0.9951872825622559, "step": 2560, "token_acc": 0.6862447171184515 }, { "epoch": 0.44508068714211346, "grad_norm": 1.859375, "learning_rate": 1.2614974339010208e-05, "loss": 1.0055727005004882, "step": 2565, "token_acc": 0.6850169715125947 }, { "epoch": 0.4459482908207531, "grad_norm": 1.8671875, "learning_rate": 1.2587266512295868e-05, "loss": 1.0195012092590332, "step": 2570, "token_acc": 0.6816625277741472 }, { "epoch": 0.4468158944993927, "grad_norm": 1.921875, "learning_rate": 1.2559537381096681e-05, "loss": 0.9964936256408692, "step": 2575, "token_acc": 0.6885902240435685 }, { "epoch": 0.44768349817803227, "grad_norm": 1.90625, "learning_rate": 1.2531787173744298e-05, "loss": 0.9999607086181641, "step": 2580, "token_acc": 0.6850149960102359 }, { "epoch": 0.44855110185667185, "grad_norm": 1.9609375, "learning_rate": 1.2504016118743936e-05, "loss": 1.0000761032104493, "step": 2585, "token_acc": 0.6829418781621488 }, { "epoch": 0.4494187055353115, "grad_norm": 1.9140625, "learning_rate": 1.2476224444772467e-05, "loss": 1.0015432357788085, "step": 2590, "token_acc": 0.6852074323242031 }, { "epoch": 0.4502863092139511, "grad_norm": 1.8984375, "learning_rate": 1.244841238067655e-05, "loss": 1.0161554336547851, "step": 2595, "token_acc": 0.68090608621095 }, { "epoch": 0.45115391289259066, "grad_norm": 1.953125, "learning_rate": 1.242058015547074e-05, "loss": 1.0064961433410644, "step": 2600, "token_acc": 0.6828215164844034 }, { "epoch": 0.45202151657123024, "grad_norm": 1.8359375, "learning_rate": 1.2392727998335617e-05, "loss": 1.0068798065185547, "step": 2605, "token_acc": 0.6836080829566604 }, { "epoch": 0.4528891202498699, "grad_norm": 1.875, "learning_rate": 1.2364856138615873e-05, "loss": 0.9954544067382812, "step": 2610, "token_acc": 0.6852013951546656 }, { "epoch": 0.45375672392850946, "grad_norm": 1.8671875, "learning_rate": 1.2336964805818445e-05, "loss": 1.012361431121826, "step": 2615, "token_acc": 0.6833901146441704 }, { "epoch": 0.45462432760714905, "grad_norm": 1.9375, "learning_rate": 1.2309054229610625e-05, "loss": 1.0001043319702148, "step": 2620, "token_acc": 0.6823560827524173 }, { "epoch": 0.45549193128578863, "grad_norm": 1.9453125, "learning_rate": 1.2281124639818152e-05, "loss": 1.0002737998962403, "step": 2625, "token_acc": 0.6835434270674609 }, { "epoch": 0.45635953496442827, "grad_norm": 1.8828125, "learning_rate": 1.2253176266423332e-05, "loss": 0.9922337532043457, "step": 2630, "token_acc": 0.6857845693124239 }, { "epoch": 0.45722713864306785, "grad_norm": 1.78125, "learning_rate": 1.2225209339563144e-05, "loss": 0.9958258628845215, "step": 2635, "token_acc": 0.6889344316136575 }, { "epoch": 0.45809474232170744, "grad_norm": 1.8125, "learning_rate": 1.2197224089527347e-05, "loss": 1.000858688354492, "step": 2640, "token_acc": 0.6852724153892232 }, { "epoch": 0.458962346000347, "grad_norm": 1.8984375, "learning_rate": 1.2169220746756567e-05, "loss": 1.01625337600708, "step": 2645, "token_acc": 0.6788307748873977 }, { "epoch": 0.45982994967898666, "grad_norm": 1.8359375, "learning_rate": 1.2141199541840428e-05, "loss": 1.0196890830993652, "step": 2650, "token_acc": 0.6794828350233297 }, { "epoch": 0.46069755335762624, "grad_norm": 1.84375, "learning_rate": 1.2113160705515626e-05, "loss": 1.0036340713500977, "step": 2655, "token_acc": 0.6851736637091539 }, { "epoch": 0.4615651570362658, "grad_norm": 1.9609375, "learning_rate": 1.2085104468664041e-05, "loss": 1.0029501914978027, "step": 2660, "token_acc": 0.6866774142396532 }, { "epoch": 0.4624327607149054, "grad_norm": 1.8203125, "learning_rate": 1.2057031062310845e-05, "loss": 1.0131060600280761, "step": 2665, "token_acc": 0.6806618788309262 }, { "epoch": 0.46330036439354505, "grad_norm": 1.8671875, "learning_rate": 1.2028940717622576e-05, "loss": 1.0073641777038573, "step": 2670, "token_acc": 0.6839914676655741 }, { "epoch": 0.46416796807218463, "grad_norm": 1.7734375, "learning_rate": 1.2000833665905255e-05, "loss": 1.0106398582458496, "step": 2675, "token_acc": 0.6813406569965871 }, { "epoch": 0.4650355717508242, "grad_norm": 1.8671875, "learning_rate": 1.1972710138602482e-05, "loss": 1.0050904273986816, "step": 2680, "token_acc": 0.6856562992838491 }, { "epoch": 0.4659031754294638, "grad_norm": 1.90625, "learning_rate": 1.194457036729351e-05, "loss": 1.015509033203125, "step": 2685, "token_acc": 0.6802187834233648 }, { "epoch": 0.46677077910810344, "grad_norm": 1.90625, "learning_rate": 1.1916414583691361e-05, "loss": 1.0169716835021974, "step": 2690, "token_acc": 0.6822935779816514 }, { "epoch": 0.467638382786743, "grad_norm": 1.890625, "learning_rate": 1.18882430196409e-05, "loss": 1.00482234954834, "step": 2695, "token_acc": 0.6824643916517463 }, { "epoch": 0.4685059864653826, "grad_norm": 1.8046875, "learning_rate": 1.1860055907116937e-05, "loss": 1.013214111328125, "step": 2700, "token_acc": 0.6813338959360743 }, { "epoch": 0.4693735901440222, "grad_norm": 2.015625, "learning_rate": 1.1831853478222318e-05, "loss": 1.0059806823730468, "step": 2705, "token_acc": 0.683576909519807 }, { "epoch": 0.47024119382266183, "grad_norm": 1.9296875, "learning_rate": 1.1803635965186002e-05, "loss": 0.9913622856140136, "step": 2710, "token_acc": 0.6861254522541567 }, { "epoch": 0.4711087975013014, "grad_norm": 1.734375, "learning_rate": 1.1775403600361167e-05, "loss": 1.0054823875427246, "step": 2715, "token_acc": 0.684439954609615 }, { "epoch": 0.471976401179941, "grad_norm": 1.8671875, "learning_rate": 1.1747156616223272e-05, "loss": 1.0157322883605957, "step": 2720, "token_acc": 0.680252464832309 }, { "epoch": 0.4728440048585806, "grad_norm": 1.890625, "learning_rate": 1.1718895245368167e-05, "loss": 1.0170634269714356, "step": 2725, "token_acc": 0.6813107028863409 }, { "epoch": 0.4737116085372202, "grad_norm": 1.984375, "learning_rate": 1.1690619720510165e-05, "loss": 0.9852043151855469, "step": 2730, "token_acc": 0.6872285921724955 }, { "epoch": 0.4745792122158598, "grad_norm": 1.7734375, "learning_rate": 1.1662330274480128e-05, "loss": 1.0059645652770997, "step": 2735, "token_acc": 0.6836207236712307 }, { "epoch": 0.4754468158944994, "grad_norm": 1.859375, "learning_rate": 1.1634027140223544e-05, "loss": 0.9841846466064453, "step": 2740, "token_acc": 0.6890565215615255 }, { "epoch": 0.47631441957313897, "grad_norm": 1.859375, "learning_rate": 1.1605710550798626e-05, "loss": 0.995844554901123, "step": 2745, "token_acc": 0.6851391782871187 }, { "epoch": 0.4771820232517786, "grad_norm": 1.7421875, "learning_rate": 1.1577380739374376e-05, "loss": 0.9913998603820801, "step": 2750, "token_acc": 0.6908336288532514 }, { "epoch": 0.4780496269304182, "grad_norm": 1.8984375, "learning_rate": 1.1549037939228667e-05, "loss": 0.9965376853942871, "step": 2755, "token_acc": 0.6868992547759661 }, { "epoch": 0.4789172306090578, "grad_norm": 1.8828125, "learning_rate": 1.1520682383746334e-05, "loss": 1.0012220382690429, "step": 2760, "token_acc": 0.6884490453429107 }, { "epoch": 0.47978483428769736, "grad_norm": 1.8984375, "learning_rate": 1.1492314306417233e-05, "loss": 0.9879722595214844, "step": 2765, "token_acc": 0.6881789911554461 }, { "epoch": 0.480652437966337, "grad_norm": 1.953125, "learning_rate": 1.1463933940834342e-05, "loss": 1.0053581237792968, "step": 2770, "token_acc": 0.6855757229040982 }, { "epoch": 0.4815200416449766, "grad_norm": 1.96875, "learning_rate": 1.1435541520691815e-05, "loss": 0.9921921730041504, "step": 2775, "token_acc": 0.6862049831504012 }, { "epoch": 0.48238764532361617, "grad_norm": 1.9453125, "learning_rate": 1.1407137279783074e-05, "loss": 1.0183118820190429, "step": 2780, "token_acc": 0.6817061841095448 }, { "epoch": 0.48325524900225575, "grad_norm": 1.9375, "learning_rate": 1.1378721451998874e-05, "loss": 0.9925461769104004, "step": 2785, "token_acc": 0.688545962485034 }, { "epoch": 0.4841228526808954, "grad_norm": 1.9609375, "learning_rate": 1.1350294271325379e-05, "loss": 1.0159781455993653, "step": 2790, "token_acc": 0.681974551332532 }, { "epoch": 0.48499045635953497, "grad_norm": 1.9140625, "learning_rate": 1.1321855971842243e-05, "loss": 1.0086934089660644, "step": 2795, "token_acc": 0.6831038631199038 }, { "epoch": 0.48585806003817456, "grad_norm": 1.875, "learning_rate": 1.129340678772067e-05, "loss": 1.0199012756347656, "step": 2800, "token_acc": 0.6786575483258884 }, { "epoch": 0.48672566371681414, "grad_norm": 1.953125, "learning_rate": 1.1264946953221496e-05, "loss": 1.0137310028076172, "step": 2805, "token_acc": 0.6824437730782141 }, { "epoch": 0.4875932673954538, "grad_norm": 1.8515625, "learning_rate": 1.123647670269325e-05, "loss": 1.0020729064941407, "step": 2810, "token_acc": 0.6836910759886811 }, { "epoch": 0.48846087107409336, "grad_norm": 1.90625, "learning_rate": 1.1207996270570242e-05, "loss": 0.9875768661499024, "step": 2815, "token_acc": 0.6884513431530621 }, { "epoch": 0.48932847475273294, "grad_norm": 1.9140625, "learning_rate": 1.117950589137061e-05, "loss": 1.0017758369445802, "step": 2820, "token_acc": 0.6836398649214367 }, { "epoch": 0.49019607843137253, "grad_norm": 1.875, "learning_rate": 1.1151005799694401e-05, "loss": 1.0143745422363282, "step": 2825, "token_acc": 0.6843956569062094 }, { "epoch": 0.49106368211001217, "grad_norm": 1.8984375, "learning_rate": 1.1122496230221644e-05, "loss": 1.0051603317260742, "step": 2830, "token_acc": 0.6826833612462451 }, { "epoch": 0.49193128578865175, "grad_norm": 1.8984375, "learning_rate": 1.1093977417710408e-05, "loss": 0.9880369186401368, "step": 2835, "token_acc": 0.6881879959200434 }, { "epoch": 0.49279888946729133, "grad_norm": 1.8515625, "learning_rate": 1.1065449596994876e-05, "loss": 0.9956092834472656, "step": 2840, "token_acc": 0.6856416772554003 }, { "epoch": 0.4936664931459309, "grad_norm": 1.8828125, "learning_rate": 1.1036913002983392e-05, "loss": 1.0082509994506836, "step": 2845, "token_acc": 0.6834558638400725 }, { "epoch": 0.49453409682457056, "grad_norm": 1.8125, "learning_rate": 1.1008367870656568e-05, "loss": 0.9957260131835938, "step": 2850, "token_acc": 0.6855102932343144 }, { "epoch": 0.49540170050321014, "grad_norm": 1.8515625, "learning_rate": 1.0979814435065308e-05, "loss": 0.9961285591125488, "step": 2855, "token_acc": 0.6854094100735335 }, { "epoch": 0.4962693041818497, "grad_norm": 1.9296875, "learning_rate": 1.0951252931328887e-05, "loss": 0.9894907951354981, "step": 2860, "token_acc": 0.69068332911443 }, { "epoch": 0.4971369078604893, "grad_norm": 1.9765625, "learning_rate": 1.092268359463302e-05, "loss": 1.0121468544006347, "step": 2865, "token_acc": 0.6819141923071749 }, { "epoch": 0.49800451153912895, "grad_norm": 1.8359375, "learning_rate": 1.0894106660227926e-05, "loss": 1.017982578277588, "step": 2870, "token_acc": 0.6814496708942045 }, { "epoch": 0.49887211521776853, "grad_norm": 1.8515625, "learning_rate": 1.0865522363426376e-05, "loss": 1.0043160438537597, "step": 2875, "token_acc": 0.6829500019793358 }, { "epoch": 0.4997397188964081, "grad_norm": 1.875, "learning_rate": 1.0836930939601768e-05, "loss": 1.008955478668213, "step": 2880, "token_acc": 0.6822375933533712 }, { "epoch": 0.5006073225750477, "grad_norm": 1.9140625, "learning_rate": 1.0808332624186197e-05, "loss": 1.0033825874328612, "step": 2885, "token_acc": 0.684949342881556 }, { "epoch": 0.5014749262536873, "grad_norm": 1.9140625, "learning_rate": 1.0779727652668496e-05, "loss": 1.001988697052002, "step": 2890, "token_acc": 0.6846615607534672 }, { "epoch": 0.5023425299323269, "grad_norm": 1.8515625, "learning_rate": 1.0751116260592312e-05, "loss": 0.9898590087890625, "step": 2895, "token_acc": 0.6871776024781131 }, { "epoch": 0.5032101336109666, "grad_norm": 1.9296875, "learning_rate": 1.072249868355415e-05, "loss": 0.9838379859924317, "step": 2900, "token_acc": 0.6897600586613799 }, { "epoch": 0.5040777372896061, "grad_norm": 1.84375, "learning_rate": 1.0693875157201459e-05, "loss": 0.9647768020629883, "step": 2905, "token_acc": 0.6952837795361677 }, { "epoch": 0.5049453409682457, "grad_norm": 1.859375, "learning_rate": 1.0665245917230666e-05, "loss": 1.0030086517333985, "step": 2910, "token_acc": 0.6839139614674057 }, { "epoch": 0.5058129446468853, "grad_norm": 1.796875, "learning_rate": 1.0636611199385251e-05, "loss": 1.0003300666809083, "step": 2915, "token_acc": 0.6843827426478509 }, { "epoch": 0.5066805483255249, "grad_norm": 1.796875, "learning_rate": 1.0607971239453805e-05, "loss": 0.9978496551513671, "step": 2920, "token_acc": 0.6854442053489087 }, { "epoch": 0.5075481520041645, "grad_norm": 1.96875, "learning_rate": 1.0579326273268074e-05, "loss": 0.9909579277038574, "step": 2925, "token_acc": 0.686803062770415 }, { "epoch": 0.5084157556828041, "grad_norm": 1.8984375, "learning_rate": 1.0550676536701034e-05, "loss": 0.9943648338317871, "step": 2930, "token_acc": 0.6851877207875784 }, { "epoch": 0.5092833593614436, "grad_norm": 1.859375, "learning_rate": 1.052202226566494e-05, "loss": 0.9951316833496093, "step": 2935, "token_acc": 0.6850922617852889 }, { "epoch": 0.5101509630400833, "grad_norm": 1.828125, "learning_rate": 1.0493363696109388e-05, "loss": 0.9918990135192871, "step": 2940, "token_acc": 0.6878556595377437 }, { "epoch": 0.5110185667187229, "grad_norm": 1.875, "learning_rate": 1.0464701064019364e-05, "loss": 1.0089019775390624, "step": 2945, "token_acc": 0.682502467917078 }, { "epoch": 0.5118861703973625, "grad_norm": 1.84375, "learning_rate": 1.0436034605413312e-05, "loss": 0.982180118560791, "step": 2950, "token_acc": 0.6869962643166984 }, { "epoch": 0.5127537740760021, "grad_norm": 1.8984375, "learning_rate": 1.0407364556341183e-05, "loss": 1.0064614295959473, "step": 2955, "token_acc": 0.6841499638737991 }, { "epoch": 0.5136213777546417, "grad_norm": 1.8203125, "learning_rate": 1.0378691152882496e-05, "loss": 1.0329419136047364, "step": 2960, "token_acc": 0.6754290852352366 }, { "epoch": 0.5144889814332813, "grad_norm": 1.8828125, "learning_rate": 1.0350014631144382e-05, "loss": 1.0033533096313476, "step": 2965, "token_acc": 0.6818894869228896 }, { "epoch": 0.5153565851119208, "grad_norm": 1.9140625, "learning_rate": 1.0321335227259661e-05, "loss": 0.9842534065246582, "step": 2970, "token_acc": 0.6909560794180386 }, { "epoch": 0.5162241887905604, "grad_norm": 1.921875, "learning_rate": 1.0292653177384878e-05, "loss": 1.0118374824523926, "step": 2975, "token_acc": 0.682726188540142 }, { "epoch": 0.5170917924692001, "grad_norm": 1.765625, "learning_rate": 1.0263968717698365e-05, "loss": 1.0183884620666503, "step": 2980, "token_acc": 0.6805970149253732 }, { "epoch": 0.5179593961478397, "grad_norm": 1.8203125, "learning_rate": 1.0235282084398301e-05, "loss": 0.9902758598327637, "step": 2985, "token_acc": 0.6868815227383335 }, { "epoch": 0.5188269998264793, "grad_norm": 1.8515625, "learning_rate": 1.0206593513700767e-05, "loss": 1.0007359504699707, "step": 2990, "token_acc": 0.683507329474766 }, { "epoch": 0.5196946035051189, "grad_norm": 1.875, "learning_rate": 1.0177903241837789e-05, "loss": 0.9968069076538086, "step": 2995, "token_acc": 0.6845976760975876 }, { "epoch": 0.5205622071837585, "grad_norm": 1.921875, "learning_rate": 1.0149211505055407e-05, "loss": 0.9842087745666503, "step": 3000, "token_acc": 0.6890838871678698 }, { "epoch": 0.521429810862398, "grad_norm": 1.8359375, "learning_rate": 1.012051853961172e-05, "loss": 0.9979012489318848, "step": 3005, "token_acc": 0.6855041583613994 }, { "epoch": 0.5222974145410376, "grad_norm": 1.8671875, "learning_rate": 1.0091824581774947e-05, "loss": 1.0025611877441407, "step": 3010, "token_acc": 0.6843610112039744 }, { "epoch": 0.5231650182196772, "grad_norm": 1.828125, "learning_rate": 1.0063129867821475e-05, "loss": 1.0025950431823731, "step": 3015, "token_acc": 0.6844177684199013 }, { "epoch": 0.5240326218983169, "grad_norm": 1.859375, "learning_rate": 1.0034434634033919e-05, "loss": 0.9844324111938476, "step": 3020, "token_acc": 0.6901680615091516 }, { "epoch": 0.5249002255769565, "grad_norm": 1.875, "learning_rate": 1.0005739116699178e-05, "loss": 1.0121084213256837, "step": 3025, "token_acc": 0.6821625441696113 }, { "epoch": 0.5257678292555961, "grad_norm": 1.8359375, "learning_rate": 9.977043552106484e-06, "loss": 0.9731731414794922, "step": 3030, "token_acc": 0.69295219319862 }, { "epoch": 0.5266354329342356, "grad_norm": 1.84375, "learning_rate": 9.94834817654545e-06, "loss": 0.9912844657897949, "step": 3035, "token_acc": 0.6854252683732452 }, { "epoch": 0.5275030366128752, "grad_norm": 1.9375, "learning_rate": 9.919653226304148e-06, "loss": 0.989024543762207, "step": 3040, "token_acc": 0.6876149180822745 }, { "epoch": 0.5283706402915148, "grad_norm": 1.7890625, "learning_rate": 9.890958937667135e-06, "loss": 1.012401008605957, "step": 3045, "token_acc": 0.6832023046685692 }, { "epoch": 0.5292382439701544, "grad_norm": 1.8671875, "learning_rate": 9.862265546913526e-06, "loss": 1.0105487823486328, "step": 3050, "token_acc": 0.6831975602049648 }, { "epoch": 0.530105847648794, "grad_norm": 1.9921875, "learning_rate": 9.83357329031504e-06, "loss": 0.9997787475585938, "step": 3055, "token_acc": 0.6836862959420685 }, { "epoch": 0.5309734513274337, "grad_norm": 1.890625, "learning_rate": 9.804882404134057e-06, "loss": 0.9793942451477051, "step": 3060, "token_acc": 0.6905264857446551 }, { "epoch": 0.5318410550060733, "grad_norm": 1.8828125, "learning_rate": 9.776193124621673e-06, "loss": 1.0060349464416505, "step": 3065, "token_acc": 0.6837666900913563 }, { "epoch": 0.5327086586847128, "grad_norm": 1.8046875, "learning_rate": 9.747505688015757e-06, "loss": 0.9506141662597656, "step": 3070, "token_acc": 0.696441489065717 }, { "epoch": 0.5335762623633524, "grad_norm": 1.8203125, "learning_rate": 9.718820330538999e-06, "loss": 1.0000137329101562, "step": 3075, "token_acc": 0.6840303318042609 }, { "epoch": 0.534443866041992, "grad_norm": 1.90625, "learning_rate": 9.690137288396967e-06, "loss": 0.9879467010498046, "step": 3080, "token_acc": 0.6884837459463735 }, { "epoch": 0.5353114697206316, "grad_norm": 1.828125, "learning_rate": 9.66145679777617e-06, "loss": 1.0037842750549317, "step": 3085, "token_acc": 0.683606172775142 }, { "epoch": 0.5361790733992712, "grad_norm": 1.90625, "learning_rate": 9.632779094842104e-06, "loss": 0.9850837707519531, "step": 3090, "token_acc": 0.6906727747296649 }, { "epoch": 0.5370466770779108, "grad_norm": 1.9765625, "learning_rate": 9.604104415737309e-06, "loss": 1.0082507133483887, "step": 3095, "token_acc": 0.6840075020949999 }, { "epoch": 0.5379142807565505, "grad_norm": 1.8828125, "learning_rate": 9.575432996579424e-06, "loss": 0.9955901145935059, "step": 3100, "token_acc": 0.6857669735637754 }, { "epoch": 0.53878188443519, "grad_norm": 1.765625, "learning_rate": 9.546765073459245e-06, "loss": 0.9778296470642089, "step": 3105, "token_acc": 0.6909004764286278 }, { "epoch": 0.5396494881138296, "grad_norm": 1.8515625, "learning_rate": 9.51810088243879e-06, "loss": 1.0080193519592284, "step": 3110, "token_acc": 0.6846056403760251 }, { "epoch": 0.5405170917924692, "grad_norm": 1.984375, "learning_rate": 9.489440659549333e-06, "loss": 1.0060848236083983, "step": 3115, "token_acc": 0.6859093319194062 }, { "epoch": 0.5413846954711088, "grad_norm": 1.8046875, "learning_rate": 9.46078464078948e-06, "loss": 0.9916322708129883, "step": 3120, "token_acc": 0.6852762549715146 }, { "epoch": 0.5422522991497484, "grad_norm": 1.8359375, "learning_rate": 9.432133062123215e-06, "loss": 0.9954086303710937, "step": 3125, "token_acc": 0.6874728114000975 }, { "epoch": 0.543119902828388, "grad_norm": 1.9609375, "learning_rate": 9.40348615947796e-06, "loss": 1.0074991226196288, "step": 3130, "token_acc": 0.681418392340236 }, { "epoch": 0.5439875065070275, "grad_norm": 1.8359375, "learning_rate": 9.374844168742637e-06, "loss": 1.0012994766235352, "step": 3135, "token_acc": 0.6855507942467278 }, { "epoch": 0.5448551101856672, "grad_norm": 1.8515625, "learning_rate": 9.34620732576572e-06, "loss": 1.0055302619934081, "step": 3140, "token_acc": 0.6849006828057107 }, { "epoch": 0.5457227138643068, "grad_norm": 1.9765625, "learning_rate": 9.317575866353293e-06, "loss": 0.9842160224914551, "step": 3145, "token_acc": 0.6883597598729373 }, { "epoch": 0.5465903175429464, "grad_norm": 1.8828125, "learning_rate": 9.28895002626711e-06, "loss": 1.0006650924682616, "step": 3150, "token_acc": 0.6856869530964238 }, { "epoch": 0.547457921221586, "grad_norm": 1.8359375, "learning_rate": 9.260330041222656e-06, "loss": 1.0168807983398438, "step": 3155, "token_acc": 0.6813513261486406 }, { "epoch": 0.5483255249002256, "grad_norm": 1.9453125, "learning_rate": 9.231716146887203e-06, "loss": 0.9734827041625976, "step": 3160, "token_acc": 0.693440864594789 }, { "epoch": 0.5491931285788652, "grad_norm": 1.90625, "learning_rate": 9.203108578877866e-06, "loss": 0.9954551696777344, "step": 3165, "token_acc": 0.685033919424062 }, { "epoch": 0.5500607322575047, "grad_norm": 1.859375, "learning_rate": 9.174507572759672e-06, "loss": 1.005191707611084, "step": 3170, "token_acc": 0.6829043026216833 }, { "epoch": 0.5509283359361443, "grad_norm": 1.859375, "learning_rate": 9.145913364043604e-06, "loss": 0.9932435035705567, "step": 3175, "token_acc": 0.6873352300905745 }, { "epoch": 0.551795939614784, "grad_norm": 1.90625, "learning_rate": 9.117326188184696e-06, "loss": 0.9784406661987305, "step": 3180, "token_acc": 0.691351665477983 }, { "epoch": 0.5526635432934236, "grad_norm": 1.8203125, "learning_rate": 9.088746280580046e-06, "loss": 1.0030339241027832, "step": 3185, "token_acc": 0.6849746393518213 }, { "epoch": 0.5535311469720632, "grad_norm": 1.9765625, "learning_rate": 9.060173876566916e-06, "loss": 1.0087509155273438, "step": 3190, "token_acc": 0.682428123685603 }, { "epoch": 0.5543987506507028, "grad_norm": 1.8125, "learning_rate": 9.031609211420775e-06, "loss": 1.0267830848693849, "step": 3195, "token_acc": 0.679297126313532 }, { "epoch": 0.5552663543293423, "grad_norm": 1.84375, "learning_rate": 9.003052520353372e-06, "loss": 1.0051657676696777, "step": 3200, "token_acc": 0.6841281932693093 }, { "epoch": 0.5561339580079819, "grad_norm": 1.8046875, "learning_rate": 8.974504038510793e-06, "loss": 1.005373477935791, "step": 3205, "token_acc": 0.6836050245944957 }, { "epoch": 0.5570015616866215, "grad_norm": 1.8515625, "learning_rate": 8.945964000971525e-06, "loss": 0.9805338859558106, "step": 3210, "token_acc": 0.6906489566678965 }, { "epoch": 0.5578691653652611, "grad_norm": 1.8359375, "learning_rate": 8.917432642744519e-06, "loss": 1.0035972595214844, "step": 3215, "token_acc": 0.6831262001280136 }, { "epoch": 0.5587367690439008, "grad_norm": 1.8515625, "learning_rate": 8.888910198767265e-06, "loss": 0.9910804748535156, "step": 3220, "token_acc": 0.6850102007945882 }, { "epoch": 0.5596043727225404, "grad_norm": 1.796875, "learning_rate": 8.860396903903844e-06, "loss": 0.9914836883544922, "step": 3225, "token_acc": 0.6887413708576804 }, { "epoch": 0.56047197640118, "grad_norm": 1.8203125, "learning_rate": 8.831892992943e-06, "loss": 1.0112311363220214, "step": 3230, "token_acc": 0.6815932803989763 }, { "epoch": 0.5613395800798195, "grad_norm": 1.9609375, "learning_rate": 8.803398700596208e-06, "loss": 1.007247543334961, "step": 3235, "token_acc": 0.683875897072066 }, { "epoch": 0.5622071837584591, "grad_norm": 1.90625, "learning_rate": 8.774914261495738e-06, "loss": 1.0004298210144043, "step": 3240, "token_acc": 0.6841970344985766 }, { "epoch": 0.5630747874370987, "grad_norm": 1.8359375, "learning_rate": 8.746439910192735e-06, "loss": 0.9888349533081054, "step": 3245, "token_acc": 0.6879607213774719 }, { "epoch": 0.5639423911157383, "grad_norm": 1.890625, "learning_rate": 8.717975881155261e-06, "loss": 1.0053036689758301, "step": 3250, "token_acc": 0.6843277773304346 }, { "epoch": 0.5648099947943779, "grad_norm": 1.8125, "learning_rate": 8.689522408766395e-06, "loss": 1.006988525390625, "step": 3255, "token_acc": 0.6830830648001983 }, { "epoch": 0.5656775984730176, "grad_norm": 1.90625, "learning_rate": 8.661079727322276e-06, "loss": 1.0136844635009765, "step": 3260, "token_acc": 0.6810806425442155 }, { "epoch": 0.5665452021516572, "grad_norm": 1.8671875, "learning_rate": 8.632648071030198e-06, "loss": 1.0038190841674806, "step": 3265, "token_acc": 0.6849343777015168 }, { "epoch": 0.5674128058302967, "grad_norm": 1.9296875, "learning_rate": 8.604227674006661e-06, "loss": 0.9864459991455078, "step": 3270, "token_acc": 0.6862783616540615 }, { "epoch": 0.5682804095089363, "grad_norm": 1.8515625, "learning_rate": 8.57581877027546e-06, "loss": 0.9800386428833008, "step": 3275, "token_acc": 0.6911001694197374 }, { "epoch": 0.5691480131875759, "grad_norm": 1.8203125, "learning_rate": 8.547421593765744e-06, "loss": 0.9790647506713868, "step": 3280, "token_acc": 0.6886432619731929 }, { "epoch": 0.5700156168662155, "grad_norm": 1.9609375, "learning_rate": 8.519036378310098e-06, "loss": 0.9918664932250977, "step": 3285, "token_acc": 0.6854125633826426 }, { "epoch": 0.5708832205448551, "grad_norm": 1.8984375, "learning_rate": 8.490663357642615e-06, "loss": 0.9926240921020508, "step": 3290, "token_acc": 0.687613955720063 }, { "epoch": 0.5717508242234947, "grad_norm": 1.875, "learning_rate": 8.462302765396975e-06, "loss": 0.9821521759033203, "step": 3295, "token_acc": 0.6893415493905228 }, { "epoch": 0.5726184279021344, "grad_norm": 1.828125, "learning_rate": 8.433954835104513e-06, "loss": 1.0029169082641602, "step": 3300, "token_acc": 0.6836323546782512 }, { "epoch": 0.5734860315807739, "grad_norm": 1.921875, "learning_rate": 8.4056198001923e-06, "loss": 0.9930968284606934, "step": 3305, "token_acc": 0.6868556180002426 }, { "epoch": 0.5743536352594135, "grad_norm": 1.921875, "learning_rate": 8.377297893981224e-06, "loss": 0.9897697448730469, "step": 3310, "token_acc": 0.6850893984441819 }, { "epoch": 0.5752212389380531, "grad_norm": 1.8359375, "learning_rate": 8.348989349684077e-06, "loss": 1.0004033088684081, "step": 3315, "token_acc": 0.6834115743155585 }, { "epoch": 0.5760888426166927, "grad_norm": 1.953125, "learning_rate": 8.320694400403608e-06, "loss": 1.0031415939331054, "step": 3320, "token_acc": 0.6850924472948079 }, { "epoch": 0.5769564462953323, "grad_norm": 1.9140625, "learning_rate": 8.292413279130625e-06, "loss": 0.9991157531738282, "step": 3325, "token_acc": 0.6833071420830172 }, { "epoch": 0.5778240499739719, "grad_norm": 1.828125, "learning_rate": 8.264146218742074e-06, "loss": 1.0167976379394532, "step": 3330, "token_acc": 0.6805396906454517 }, { "epoch": 0.5786916536526114, "grad_norm": 2.046875, "learning_rate": 8.235893451999118e-06, "loss": 1.0147868156433106, "step": 3335, "token_acc": 0.6792890262751159 }, { "epoch": 0.5795592573312511, "grad_norm": 1.8515625, "learning_rate": 8.207655211545218e-06, "loss": 1.0142845153808593, "step": 3340, "token_acc": 0.6800431959683763 }, { "epoch": 0.5804268610098907, "grad_norm": 1.8125, "learning_rate": 8.179431729904223e-06, "loss": 1.012403964996338, "step": 3345, "token_acc": 0.6800115019148074 }, { "epoch": 0.5812944646885303, "grad_norm": 1.9609375, "learning_rate": 8.151223239478453e-06, "loss": 0.9996941566467286, "step": 3350, "token_acc": 0.6826174967983586 }, { "epoch": 0.5821620683671699, "grad_norm": 1.90625, "learning_rate": 8.123029972546782e-06, "loss": 1.0093581199645996, "step": 3355, "token_acc": 0.6819460251429169 }, { "epoch": 0.5830296720458095, "grad_norm": 1.8671875, "learning_rate": 8.09485216126273e-06, "loss": 1.0049400329589844, "step": 3360, "token_acc": 0.6827514040478966 }, { "epoch": 0.583897275724449, "grad_norm": 1.8828125, "learning_rate": 8.066690037652552e-06, "loss": 0.9991744995117188, "step": 3365, "token_acc": 0.6828673913638729 }, { "epoch": 0.5847648794030886, "grad_norm": 1.921875, "learning_rate": 8.03854383361332e-06, "loss": 0.9949298858642578, "step": 3370, "token_acc": 0.6877322396851174 }, { "epoch": 0.5856324830817282, "grad_norm": 1.8515625, "learning_rate": 8.010413780911022e-06, "loss": 1.0077406883239746, "step": 3375, "token_acc": 0.6826122846664953 }, { "epoch": 0.5865000867603679, "grad_norm": 1.9140625, "learning_rate": 7.982300111178648e-06, "loss": 1.0013755798339843, "step": 3380, "token_acc": 0.6858611685344359 }, { "epoch": 0.5873676904390075, "grad_norm": 1.921875, "learning_rate": 7.954203055914289e-06, "loss": 0.9829542160034179, "step": 3385, "token_acc": 0.6888979370249728 }, { "epoch": 0.5882352941176471, "grad_norm": 1.8671875, "learning_rate": 7.926122846479224e-06, "loss": 0.993384838104248, "step": 3390, "token_acc": 0.6845540146288179 }, { "epoch": 0.5891028977962867, "grad_norm": 1.828125, "learning_rate": 7.898059714096016e-06, "loss": 0.956721305847168, "step": 3395, "token_acc": 0.698894211628116 }, { "epoch": 0.5899705014749262, "grad_norm": 1.84375, "learning_rate": 7.870013889846608e-06, "loss": 0.9920453071594239, "step": 3400, "token_acc": 0.686382917252157 }, { "epoch": 0.5908381051535658, "grad_norm": 1.84375, "learning_rate": 7.841985604670427e-06, "loss": 0.9913934707641602, "step": 3405, "token_acc": 0.6889823114142937 }, { "epoch": 0.5917057088322054, "grad_norm": 2.0, "learning_rate": 7.81397508936247e-06, "loss": 0.9880316734313965, "step": 3410, "token_acc": 0.6867218573075777 }, { "epoch": 0.592573312510845, "grad_norm": 1.9609375, "learning_rate": 7.78598257457142e-06, "loss": 0.9705442428588867, "step": 3415, "token_acc": 0.6910044977511245 }, { "epoch": 0.5934409161894847, "grad_norm": 1.8046875, "learning_rate": 7.758008290797727e-06, "loss": 0.9677356719970703, "step": 3420, "token_acc": 0.6959344774631571 }, { "epoch": 0.5943085198681243, "grad_norm": 1.8828125, "learning_rate": 7.730052468391726e-06, "loss": 0.9935931205749512, "step": 3425, "token_acc": 0.6881914107130855 }, { "epoch": 0.5951761235467639, "grad_norm": 1.921875, "learning_rate": 7.702115337551733e-06, "loss": 1.028738307952881, "step": 3430, "token_acc": 0.6780585491818397 }, { "epoch": 0.5960437272254034, "grad_norm": 1.921875, "learning_rate": 7.674197128322151e-06, "loss": 1.0033409118652343, "step": 3435, "token_acc": 0.6833868116036933 }, { "epoch": 0.596911330904043, "grad_norm": 1.8046875, "learning_rate": 7.646298070591578e-06, "loss": 1.0005316734313965, "step": 3440, "token_acc": 0.6868071389260162 }, { "epoch": 0.5977789345826826, "grad_norm": 1.7265625, "learning_rate": 7.618418394090907e-06, "loss": 0.9753083229064942, "step": 3445, "token_acc": 0.6916532970218273 }, { "epoch": 0.5986465382613222, "grad_norm": 1.8984375, "learning_rate": 7.59055832839144e-06, "loss": 0.9871037483215332, "step": 3450, "token_acc": 0.688853524302102 }, { "epoch": 0.5995141419399618, "grad_norm": 1.890625, "learning_rate": 7.562718102903002e-06, "loss": 0.9996206283569335, "step": 3455, "token_acc": 0.6837434616393171 }, { "epoch": 0.6003817456186015, "grad_norm": 1.875, "learning_rate": 7.534897946872042e-06, "loss": 1.0057412147521974, "step": 3460, "token_acc": 0.6834609861177597 }, { "epoch": 0.601249349297241, "grad_norm": 1.890625, "learning_rate": 7.507098089379749e-06, "loss": 0.990781593322754, "step": 3465, "token_acc": 0.6880597411570862 }, { "epoch": 0.6021169529758806, "grad_norm": 1.8125, "learning_rate": 7.479318759340171e-06, "loss": 0.9857464790344238, "step": 3470, "token_acc": 0.689186540346292 }, { "epoch": 0.6029845566545202, "grad_norm": 1.828125, "learning_rate": 7.451560185498318e-06, "loss": 0.9758604049682618, "step": 3475, "token_acc": 0.6898560948081264 }, { "epoch": 0.6038521603331598, "grad_norm": 1.8671875, "learning_rate": 7.423822596428291e-06, "loss": 0.9707001686096192, "step": 3480, "token_acc": 0.6923664838627496 }, { "epoch": 0.6047197640117994, "grad_norm": 1.796875, "learning_rate": 7.396106220531398e-06, "loss": 1.0107527732849122, "step": 3485, "token_acc": 0.6832125667742106 }, { "epoch": 0.605587367690439, "grad_norm": 1.9296875, "learning_rate": 7.368411286034265e-06, "loss": 1.011655330657959, "step": 3490, "token_acc": 0.6814160469354903 }, { "epoch": 0.6064549713690786, "grad_norm": 1.8515625, "learning_rate": 7.340738020986961e-06, "loss": 1.0010527610778808, "step": 3495, "token_acc": 0.6855910839856707 }, { "epoch": 0.6073225750477182, "grad_norm": 1.8359375, "learning_rate": 7.313086653261126e-06, "loss": 1.0003108024597167, "step": 3500, "token_acc": 0.6845823427706937 }, { "epoch": 0.6081901787263578, "grad_norm": 1.90625, "learning_rate": 7.285457410548084e-06, "loss": 1.0062461853027345, "step": 3505, "token_acc": 0.6841428111933098 }, { "epoch": 0.6090577824049974, "grad_norm": 1.8515625, "learning_rate": 7.2578505203569775e-06, "loss": 1.0086194038391114, "step": 3510, "token_acc": 0.6810051221539865 }, { "epoch": 0.609925386083637, "grad_norm": 1.8984375, "learning_rate": 7.230266210012886e-06, "loss": 0.9880249977111817, "step": 3515, "token_acc": 0.6883796750337954 }, { "epoch": 0.6107929897622766, "grad_norm": 1.828125, "learning_rate": 7.20270470665497e-06, "loss": 0.9953752517700195, "step": 3520, "token_acc": 0.6858969161328684 }, { "epoch": 0.6116605934409162, "grad_norm": 1.8359375, "learning_rate": 7.1751662372345745e-06, "loss": 1.0096102714538575, "step": 3525, "token_acc": 0.6835760199396563 }, { "epoch": 0.6125281971195558, "grad_norm": 1.9296875, "learning_rate": 7.1476510285133824e-06, "loss": 1.01358003616333, "step": 3530, "token_acc": 0.6808007033204843 }, { "epoch": 0.6133958007981953, "grad_norm": 1.8671875, "learning_rate": 7.1201593070615385e-06, "loss": 0.9893976211547851, "step": 3535, "token_acc": 0.6872642713325582 }, { "epoch": 0.614263404476835, "grad_norm": 1.859375, "learning_rate": 7.0926912992557825e-06, "loss": 1.0022952079772949, "step": 3540, "token_acc": 0.6836349718409973 }, { "epoch": 0.6151310081554746, "grad_norm": 1.875, "learning_rate": 7.065247231277592e-06, "loss": 0.9951557159423828, "step": 3545, "token_acc": 0.6851784290675207 }, { "epoch": 0.6159986118341142, "grad_norm": 1.828125, "learning_rate": 7.037827329111313e-06, "loss": 1.0185998916625976, "step": 3550, "token_acc": 0.6794846010484963 }, { "epoch": 0.6168662155127538, "grad_norm": 1.8671875, "learning_rate": 7.010431818542298e-06, "loss": 1.0109454154968263, "step": 3555, "token_acc": 0.682662396471839 }, { "epoch": 0.6177338191913934, "grad_norm": 1.78125, "learning_rate": 6.983060925155056e-06, "loss": 0.990286922454834, "step": 3560, "token_acc": 0.6862885957035297 }, { "epoch": 0.618601422870033, "grad_norm": 1.84375, "learning_rate": 6.955714874331388e-06, "loss": 0.9858268737792969, "step": 3565, "token_acc": 0.6888843703402467 }, { "epoch": 0.6194690265486725, "grad_norm": 1.8203125, "learning_rate": 6.928393891248529e-06, "loss": 1.0141701698303223, "step": 3570, "token_acc": 0.6834784012484361 }, { "epoch": 0.6203366302273121, "grad_norm": 1.8125, "learning_rate": 6.901098200877301e-06, "loss": 0.967597770690918, "step": 3575, "token_acc": 0.6944241377018675 }, { "epoch": 0.6212042339059518, "grad_norm": 2.015625, "learning_rate": 6.873828027980256e-06, "loss": 0.9855113983154297, "step": 3580, "token_acc": 0.6862524757342923 }, { "epoch": 0.6220718375845914, "grad_norm": 1.9140625, "learning_rate": 6.846583597109817e-06, "loss": 0.9973045349121094, "step": 3585, "token_acc": 0.684243293722762 }, { "epoch": 0.622939441263231, "grad_norm": 1.9140625, "learning_rate": 6.819365132606459e-06, "loss": 0.9799047470092773, "step": 3590, "token_acc": 0.6892774554748672 }, { "epoch": 0.6238070449418706, "grad_norm": 1.75, "learning_rate": 6.7921728585968215e-06, "loss": 1.0055973052978515, "step": 3595, "token_acc": 0.6808834120188846 }, { "epoch": 0.6246746486205101, "grad_norm": 1.7265625, "learning_rate": 6.765006998991889e-06, "loss": 0.9758973121643066, "step": 3600, "token_acc": 0.6907637655417407 }, { "epoch": 0.6255422522991497, "grad_norm": 1.890625, "learning_rate": 6.737867777485136e-06, "loss": 1.0146740913391112, "step": 3605, "token_acc": 0.6811284150100989 }, { "epoch": 0.6264098559777893, "grad_norm": 1.90625, "learning_rate": 6.710755417550698e-06, "loss": 0.9987593650817871, "step": 3610, "token_acc": 0.6847243880941081 }, { "epoch": 0.6272774596564289, "grad_norm": 1.953125, "learning_rate": 6.683670142441514e-06, "loss": 0.9979434013366699, "step": 3615, "token_acc": 0.6853054139387396 }, { "epoch": 0.6281450633350686, "grad_norm": 1.8203125, "learning_rate": 6.6566121751875e-06, "loss": 0.9827108383178711, "step": 3620, "token_acc": 0.6896419200084816 }, { "epoch": 0.6290126670137082, "grad_norm": 1.875, "learning_rate": 6.6295817385937104e-06, "loss": 0.9979496002197266, "step": 3625, "token_acc": 0.6842961073185775 }, { "epoch": 0.6298802706923478, "grad_norm": 1.921875, "learning_rate": 6.602579055238501e-06, "loss": 0.9886339187622071, "step": 3630, "token_acc": 0.6858520767782801 }, { "epoch": 0.6307478743709873, "grad_norm": 1.9375, "learning_rate": 6.575604347471696e-06, "loss": 1.0002639770507813, "step": 3635, "token_acc": 0.6843852893576651 }, { "epoch": 0.6316154780496269, "grad_norm": 1.8984375, "learning_rate": 6.548657837412764e-06, "loss": 0.9971570014953614, "step": 3640, "token_acc": 0.6848995111352526 }, { "epoch": 0.6324830817282665, "grad_norm": 1.90625, "learning_rate": 6.5217397469489765e-06, "loss": 0.9921416282653809, "step": 3645, "token_acc": 0.6852397462075014 }, { "epoch": 0.6333506854069061, "grad_norm": 1.7421875, "learning_rate": 6.494850297733591e-06, "loss": 1.0081979751586914, "step": 3650, "token_acc": 0.6827699225310147 }, { "epoch": 0.6342182890855457, "grad_norm": 1.953125, "learning_rate": 6.467989711184021e-06, "loss": 0.9944825172424316, "step": 3655, "token_acc": 0.6852892695976437 }, { "epoch": 0.6350858927641854, "grad_norm": 1.875, "learning_rate": 6.4411582084800215e-06, "loss": 0.9934005737304688, "step": 3660, "token_acc": 0.6851075268817204 }, { "epoch": 0.635953496442825, "grad_norm": 1.90625, "learning_rate": 6.414356010561853e-06, "loss": 0.9901107788085938, "step": 3665, "token_acc": 0.6862788024738656 }, { "epoch": 0.6368211001214645, "grad_norm": 1.8359375, "learning_rate": 6.387583338128471e-06, "loss": 1.0017055511474608, "step": 3670, "token_acc": 0.6829901814126799 }, { "epoch": 0.6376887038001041, "grad_norm": 1.84375, "learning_rate": 6.3608404116357096e-06, "loss": 1.0016436576843262, "step": 3675, "token_acc": 0.6823521311023893 }, { "epoch": 0.6385563074787437, "grad_norm": 1.8515625, "learning_rate": 6.334127451294461e-06, "loss": 0.995360279083252, "step": 3680, "token_acc": 0.6857756640635555 }, { "epoch": 0.6394239111573833, "grad_norm": 1.8984375, "learning_rate": 6.307444677068869e-06, "loss": 1.0071782112121581, "step": 3685, "token_acc": 0.6841917710589074 }, { "epoch": 0.6402915148360229, "grad_norm": 1.7890625, "learning_rate": 6.280792308674512e-06, "loss": 0.9938779830932617, "step": 3690, "token_acc": 0.6878277558523004 }, { "epoch": 0.6411591185146625, "grad_norm": 1.9140625, "learning_rate": 6.254170565576596e-06, "loss": 0.9867862701416016, "step": 3695, "token_acc": 0.6865127083902706 }, { "epoch": 0.6420267221933021, "grad_norm": 1.828125, "learning_rate": 6.227579666988149e-06, "loss": 0.9970829010009765, "step": 3700, "token_acc": 0.6850032654838358 }, { "epoch": 0.6428943258719417, "grad_norm": 1.84375, "learning_rate": 6.201019831868209e-06, "loss": 0.9995267868041993, "step": 3705, "token_acc": 0.6846874095894374 }, { "epoch": 0.6437619295505813, "grad_norm": 1.84375, "learning_rate": 6.174491278920034e-06, "loss": 0.9917936325073242, "step": 3710, "token_acc": 0.6880288247439375 }, { "epoch": 0.6446295332292209, "grad_norm": 1.8046875, "learning_rate": 6.147994226589287e-06, "loss": 0.9787176132202149, "step": 3715, "token_acc": 0.6913672458526614 }, { "epoch": 0.6454971369078605, "grad_norm": 1.8359375, "learning_rate": 6.121528893062246e-06, "loss": 1.0017691612243653, "step": 3720, "token_acc": 0.6832628692610052 }, { "epoch": 0.6463647405865001, "grad_norm": 1.859375, "learning_rate": 6.095095496264001e-06, "loss": 0.997169303894043, "step": 3725, "token_acc": 0.6856908315278095 }, { "epoch": 0.6472323442651396, "grad_norm": 1.828125, "learning_rate": 6.068694253856675e-06, "loss": 0.9935990333557129, "step": 3730, "token_acc": 0.6857996759957544 }, { "epoch": 0.6480999479437792, "grad_norm": 1.9296875, "learning_rate": 6.04232538323761e-06, "loss": 0.995047664642334, "step": 3735, "token_acc": 0.6855156587473002 }, { "epoch": 0.6489675516224189, "grad_norm": 1.8671875, "learning_rate": 6.015989101537586e-06, "loss": 0.9964488983154297, "step": 3740, "token_acc": 0.6852130600180629 }, { "epoch": 0.6498351553010585, "grad_norm": 1.8046875, "learning_rate": 5.989685625619039e-06, "loss": 1.001780128479004, "step": 3745, "token_acc": 0.6852903955410754 }, { "epoch": 0.6507027589796981, "grad_norm": 1.8359375, "learning_rate": 5.963415172074272e-06, "loss": 0.9760993003845215, "step": 3750, "token_acc": 0.6886531679352932 }, { "epoch": 0.6515703626583377, "grad_norm": 1.84375, "learning_rate": 5.937177957223661e-06, "loss": 0.9900795936584472, "step": 3755, "token_acc": 0.6872612410739596 }, { "epoch": 0.6524379663369773, "grad_norm": 1.84375, "learning_rate": 5.910974197113892e-06, "loss": 1.001762866973877, "step": 3760, "token_acc": 0.6836190449665084 }, { "epoch": 0.6533055700156168, "grad_norm": 1.765625, "learning_rate": 5.884804107516169e-06, "loss": 0.9720080375671387, "step": 3765, "token_acc": 0.6952360976377127 }, { "epoch": 0.6541731736942564, "grad_norm": 1.890625, "learning_rate": 5.858667903924439e-06, "loss": 0.984315013885498, "step": 3770, "token_acc": 0.6863298561396332 }, { "epoch": 0.655040777372896, "grad_norm": 1.796875, "learning_rate": 5.8325658015536205e-06, "loss": 1.002072525024414, "step": 3775, "token_acc": 0.6841665768774916 }, { "epoch": 0.6559083810515357, "grad_norm": 1.875, "learning_rate": 5.8064980153378335e-06, "loss": 0.9898612976074219, "step": 3780, "token_acc": 0.686063766347234 }, { "epoch": 0.6567759847301753, "grad_norm": 1.84375, "learning_rate": 5.780464759928623e-06, "loss": 1.0027915000915528, "step": 3785, "token_acc": 0.6840225269854513 }, { "epoch": 0.6576435884088149, "grad_norm": 1.8984375, "learning_rate": 5.7544662496931935e-06, "loss": 0.9923629760742188, "step": 3790, "token_acc": 0.6860443020793746 }, { "epoch": 0.6585111920874545, "grad_norm": 1.8828125, "learning_rate": 5.7285026987126526e-06, "loss": 1.0032987594604492, "step": 3795, "token_acc": 0.683890081813487 }, { "epoch": 0.659378795766094, "grad_norm": 1.8203125, "learning_rate": 5.7025743207802345e-06, "loss": 1.0057662963867187, "step": 3800, "token_acc": 0.6835548723113827 }, { "epoch": 0.6602463994447336, "grad_norm": 1.8125, "learning_rate": 5.676681329399543e-06, "loss": 0.9910049438476562, "step": 3805, "token_acc": 0.6876422267858134 }, { "epoch": 0.6611140031233732, "grad_norm": 1.8359375, "learning_rate": 5.650823937782803e-06, "loss": 1.0060483932495117, "step": 3810, "token_acc": 0.6838536439827497 }, { "epoch": 0.6619816068020128, "grad_norm": 1.7890625, "learning_rate": 5.625002358849096e-06, "loss": 0.9882902145385742, "step": 3815, "token_acc": 0.6876298080917173 }, { "epoch": 0.6628492104806525, "grad_norm": 1.7734375, "learning_rate": 5.599216805222609e-06, "loss": 0.9882322311401367, "step": 3820, "token_acc": 0.6853805976085054 }, { "epoch": 0.6637168141592921, "grad_norm": 1.8203125, "learning_rate": 5.573467489230879e-06, "loss": 1.0068046569824218, "step": 3825, "token_acc": 0.6833182949170152 }, { "epoch": 0.6645844178379317, "grad_norm": 1.796875, "learning_rate": 5.547754622903059e-06, "loss": 0.995240306854248, "step": 3830, "token_acc": 0.6859673775279661 }, { "epoch": 0.6654520215165712, "grad_norm": 1.84375, "learning_rate": 5.522078417968151e-06, "loss": 0.9991961479187011, "step": 3835, "token_acc": 0.6834129511677283 }, { "epoch": 0.6663196251952108, "grad_norm": 1.7421875, "learning_rate": 5.496439085853282e-06, "loss": 0.9904547691345215, "step": 3840, "token_acc": 0.6883501895504571 }, { "epoch": 0.6671872288738504, "grad_norm": 1.8515625, "learning_rate": 5.470836837681955e-06, "loss": 0.9769336700439453, "step": 3845, "token_acc": 0.6896110755886686 }, { "epoch": 0.66805483255249, "grad_norm": 1.8984375, "learning_rate": 5.445271884272303e-06, "loss": 1.0078944206237792, "step": 3850, "token_acc": 0.6813604508440128 }, { "epoch": 0.6689224362311296, "grad_norm": 1.8515625, "learning_rate": 5.4197444361353675e-06, "loss": 1.0107319831848145, "step": 3855, "token_acc": 0.681410079867805 }, { "epoch": 0.6697900399097693, "grad_norm": 1.828125, "learning_rate": 5.394254703473354e-06, "loss": 0.964967918395996, "step": 3860, "token_acc": 0.6943124165554072 }, { "epoch": 0.6706576435884088, "grad_norm": 1.890625, "learning_rate": 5.368802896177911e-06, "loss": 0.9789441108703614, "step": 3865, "token_acc": 0.6884867885627476 }, { "epoch": 0.6715252472670484, "grad_norm": 1.8828125, "learning_rate": 5.343389223828392e-06, "loss": 0.9796417236328125, "step": 3870, "token_acc": 0.6890180582340962 }, { "epoch": 0.672392850945688, "grad_norm": 1.8828125, "learning_rate": 5.318013895690131e-06, "loss": 0.9787491798400879, "step": 3875, "token_acc": 0.6898387987482578 }, { "epoch": 0.6732604546243276, "grad_norm": 1.9375, "learning_rate": 5.292677120712726e-06, "loss": 0.9852935791015625, "step": 3880, "token_acc": 0.6870102408889257 }, { "epoch": 0.6741280583029672, "grad_norm": 1.9921875, "learning_rate": 5.267379107528311e-06, "loss": 0.9924633026123046, "step": 3885, "token_acc": 0.6847909474491753 }, { "epoch": 0.6749956619816068, "grad_norm": 1.859375, "learning_rate": 5.242120064449845e-06, "loss": 0.9971447944641113, "step": 3890, "token_acc": 0.6849295083489171 }, { "epoch": 0.6758632656602463, "grad_norm": 1.7890625, "learning_rate": 5.216900199469391e-06, "loss": 0.9826061248779296, "step": 3895, "token_acc": 0.6904354672313623 }, { "epoch": 0.676730869338886, "grad_norm": 1.859375, "learning_rate": 5.191719720256407e-06, "loss": 0.9958490371704102, "step": 3900, "token_acc": 0.6858113156286083 }, { "epoch": 0.6775984730175256, "grad_norm": 1.796875, "learning_rate": 5.166578834156031e-06, "loss": 0.9950273513793946, "step": 3905, "token_acc": 0.6859179612865821 }, { "epoch": 0.6784660766961652, "grad_norm": 1.859375, "learning_rate": 5.14147774818738e-06, "loss": 0.990997314453125, "step": 3910, "token_acc": 0.6872399539201735 }, { "epoch": 0.6793336803748048, "grad_norm": 1.921875, "learning_rate": 5.1164166690418435e-06, "loss": 0.9976764678955078, "step": 3915, "token_acc": 0.6839109763660167 }, { "epoch": 0.6802012840534444, "grad_norm": 1.8125, "learning_rate": 5.091395803081376e-06, "loss": 0.980461311340332, "step": 3920, "token_acc": 0.6900414130464994 }, { "epoch": 0.681068887732084, "grad_norm": 1.8125, "learning_rate": 5.066415356336807e-06, "loss": 1.005615234375, "step": 3925, "token_acc": 0.681454565176126 }, { "epoch": 0.6819364914107235, "grad_norm": 2.015625, "learning_rate": 5.041475534506131e-06, "loss": 0.993968391418457, "step": 3930, "token_acc": 0.6851136910077625 }, { "epoch": 0.6828040950893631, "grad_norm": 1.9453125, "learning_rate": 5.01657654295284e-06, "loss": 1.0097810745239257, "step": 3935, "token_acc": 0.6827556629888105 }, { "epoch": 0.6836716987680028, "grad_norm": 1.6953125, "learning_rate": 4.991718586704196e-06, "loss": 0.9924948692321778, "step": 3940, "token_acc": 0.6904830287206266 }, { "epoch": 0.6845393024466424, "grad_norm": 1.9296875, "learning_rate": 4.9669018704495696e-06, "loss": 0.9993215560913086, "step": 3945, "token_acc": 0.6851340222617751 }, { "epoch": 0.685406906125282, "grad_norm": 1.796875, "learning_rate": 4.9421265985387475e-06, "loss": 0.9833191871643067, "step": 3950, "token_acc": 0.6883911507101707 }, { "epoch": 0.6862745098039216, "grad_norm": 1.9296875, "learning_rate": 4.9173929749802465e-06, "loss": 1.0078816413879395, "step": 3955, "token_acc": 0.6850704225352112 }, { "epoch": 0.6871421134825612, "grad_norm": 1.875, "learning_rate": 4.892701203439635e-06, "loss": 1.0204105377197266, "step": 3960, "token_acc": 0.6796032157676348 }, { "epoch": 0.6880097171612007, "grad_norm": 1.9453125, "learning_rate": 4.868051487237858e-06, "loss": 0.973170280456543, "step": 3965, "token_acc": 0.6899492217684071 }, { "epoch": 0.6888773208398403, "grad_norm": 1.7421875, "learning_rate": 4.843444029349564e-06, "loss": 0.9647638320922851, "step": 3970, "token_acc": 0.6957806900520547 }, { "epoch": 0.6897449245184799, "grad_norm": 1.890625, "learning_rate": 4.8188790324014274e-06, "loss": 0.9891746520996094, "step": 3975, "token_acc": 0.685054815133276 }, { "epoch": 0.6906125281971196, "grad_norm": 1.8828125, "learning_rate": 4.794356698670488e-06, "loss": 0.9468636512756348, "step": 3980, "token_acc": 0.6995367131713369 }, { "epoch": 0.6914801318757592, "grad_norm": 1.8359375, "learning_rate": 4.769877230082476e-06, "loss": 0.9977554321289063, "step": 3985, "token_acc": 0.6852840924340428 }, { "epoch": 0.6923477355543988, "grad_norm": 1.9140625, "learning_rate": 4.74544082821016e-06, "loss": 1.004736328125, "step": 3990, "token_acc": 0.6820171598669235 }, { "epoch": 0.6932153392330384, "grad_norm": 1.8828125, "learning_rate": 4.721047694271676e-06, "loss": 1.0017391204833985, "step": 3995, "token_acc": 0.684533952315144 }, { "epoch": 0.6940829429116779, "grad_norm": 1.8828125, "learning_rate": 4.69669802912888e-06, "loss": 0.9762969970703125, "step": 4000, "token_acc": 0.6934091245841135 }, { "epoch": 0.6949505465903175, "grad_norm": 1.7890625, "learning_rate": 4.672392033285695e-06, "loss": 1.0025498390197753, "step": 4005, "token_acc": 0.6851401316784188 }, { "epoch": 0.6958181502689571, "grad_norm": 1.875, "learning_rate": 4.648129906886445e-06, "loss": 1.0146098136901855, "step": 4010, "token_acc": 0.6792288989232372 }, { "epoch": 0.6966857539475967, "grad_norm": 1.78125, "learning_rate": 4.623911849714226e-06, "loss": 1.0010202407836915, "step": 4015, "token_acc": 0.6856064118699079 }, { "epoch": 0.6975533576262364, "grad_norm": 1.8671875, "learning_rate": 4.599738061189244e-06, "loss": 1.0105598449707032, "step": 4020, "token_acc": 0.682280948032655 }, { "epoch": 0.698420961304876, "grad_norm": 1.890625, "learning_rate": 4.575608740367189e-06, "loss": 0.9960094451904297, "step": 4025, "token_acc": 0.6854422794662214 }, { "epoch": 0.6992885649835155, "grad_norm": 1.875, "learning_rate": 4.551524085937582e-06, "loss": 0.9695888519287109, "step": 4030, "token_acc": 0.6933236382866208 }, { "epoch": 0.7001561686621551, "grad_norm": 1.796875, "learning_rate": 4.527484296222149e-06, "loss": 0.9828217506408692, "step": 4035, "token_acc": 0.6878763576059919 }, { "epoch": 0.7010237723407947, "grad_norm": 1.8984375, "learning_rate": 4.503489569173179e-06, "loss": 0.9933969497680664, "step": 4040, "token_acc": 0.6868701758147513 }, { "epoch": 0.7018913760194343, "grad_norm": 1.8203125, "learning_rate": 4.479540102371904e-06, "loss": 1.0078033447265624, "step": 4045, "token_acc": 0.6846505259554748 }, { "epoch": 0.7027589796980739, "grad_norm": 1.7578125, "learning_rate": 4.455636093026865e-06, "loss": 0.9774109840393066, "step": 4050, "token_acc": 0.69020612269789 }, { "epoch": 0.7036265833767135, "grad_norm": 1.8125, "learning_rate": 4.431777737972287e-06, "loss": 0.9925678253173829, "step": 4055, "token_acc": 0.6882775426446069 }, { "epoch": 0.7044941870553532, "grad_norm": 1.796875, "learning_rate": 4.4079652336664645e-06, "loss": 0.9903898239135742, "step": 4060, "token_acc": 0.6858611892801725 }, { "epoch": 0.7053617907339927, "grad_norm": 1.7734375, "learning_rate": 4.384198776190137e-06, "loss": 0.9989794731140137, "step": 4065, "token_acc": 0.6841588232951453 }, { "epoch": 0.7062293944126323, "grad_norm": 1.859375, "learning_rate": 4.360478561244885e-06, "loss": 0.982159423828125, "step": 4070, "token_acc": 0.6892994694174365 }, { "epoch": 0.7070969980912719, "grad_norm": 1.84375, "learning_rate": 4.336804784151505e-06, "loss": 0.9847228050231933, "step": 4075, "token_acc": 0.6867390010281942 }, { "epoch": 0.7079646017699115, "grad_norm": 1.8359375, "learning_rate": 4.313177639848408e-06, "loss": 1.0108787536621093, "step": 4080, "token_acc": 0.6838802388894016 }, { "epoch": 0.7088322054485511, "grad_norm": 1.90625, "learning_rate": 4.2895973228900154e-06, "loss": 0.9985545158386231, "step": 4085, "token_acc": 0.6844401828768361 }, { "epoch": 0.7096998091271907, "grad_norm": 1.859375, "learning_rate": 4.2660640274451545e-06, "loss": 0.9826979637145996, "step": 4090, "token_acc": 0.6863773965691221 }, { "epoch": 0.7105674128058302, "grad_norm": 1.84375, "learning_rate": 4.242577947295462e-06, "loss": 0.9989730834960937, "step": 4095, "token_acc": 0.6846098407914565 }, { "epoch": 0.7114350164844699, "grad_norm": 1.828125, "learning_rate": 4.219139275833783e-06, "loss": 1.000558090209961, "step": 4100, "token_acc": 0.6841488044823767 }, { "epoch": 0.7123026201631095, "grad_norm": 1.78125, "learning_rate": 4.1957482060625865e-06, "loss": 0.9966065406799316, "step": 4105, "token_acc": 0.6879207664422579 }, { "epoch": 0.7131702238417491, "grad_norm": 1.8203125, "learning_rate": 4.172404930592372e-06, "loss": 0.9852560997009278, "step": 4110, "token_acc": 0.6896971139227118 }, { "epoch": 0.7140378275203887, "grad_norm": 1.765625, "learning_rate": 4.149109641640079e-06, "loss": 1.001215362548828, "step": 4115, "token_acc": 0.6853088591189812 }, { "epoch": 0.7149054311990283, "grad_norm": 1.78125, "learning_rate": 4.1258625310275145e-06, "loss": 1.0101828575134277, "step": 4120, "token_acc": 0.681881495767706 }, { "epoch": 0.7157730348776679, "grad_norm": 1.8828125, "learning_rate": 4.102663790179764e-06, "loss": 0.9940977096557617, "step": 4125, "token_acc": 0.6862203534229258 }, { "epoch": 0.7166406385563074, "grad_norm": 1.8984375, "learning_rate": 4.079513610123619e-06, "loss": 0.9920468330383301, "step": 4130, "token_acc": 0.6872316721917288 }, { "epoch": 0.717508242234947, "grad_norm": 1.7734375, "learning_rate": 4.056412181486003e-06, "loss": 0.9854813575744629, "step": 4135, "token_acc": 0.6900252525252525 }, { "epoch": 0.7183758459135867, "grad_norm": 1.953125, "learning_rate": 4.033359694492411e-06, "loss": 0.9985934257507324, "step": 4140, "token_acc": 0.6840057676088909 }, { "epoch": 0.7192434495922263, "grad_norm": 1.8046875, "learning_rate": 4.010356338965323e-06, "loss": 0.9948851585388183, "step": 4145, "token_acc": 0.6854739461477084 }, { "epoch": 0.7201110532708659, "grad_norm": 1.96875, "learning_rate": 3.98740230432266e-06, "loss": 0.9762655258178711, "step": 4150, "token_acc": 0.6923338872694581 }, { "epoch": 0.7209786569495055, "grad_norm": 1.9140625, "learning_rate": 3.9644977795762175e-06, "loss": 0.988780403137207, "step": 4155, "token_acc": 0.6866318047733977 }, { "epoch": 0.721846260628145, "grad_norm": 1.953125, "learning_rate": 3.941642953330102e-06, "loss": 0.9889546394348144, "step": 4160, "token_acc": 0.6865713642503377 }, { "epoch": 0.7227138643067846, "grad_norm": 1.859375, "learning_rate": 3.9188380137791934e-06, "loss": 0.9839936256408691, "step": 4165, "token_acc": 0.6882079424724933 }, { "epoch": 0.7235814679854242, "grad_norm": 1.8984375, "learning_rate": 3.896083148707579e-06, "loss": 0.9844943046569824, "step": 4170, "token_acc": 0.6876788477073265 }, { "epoch": 0.7244490716640638, "grad_norm": 1.8515625, "learning_rate": 3.87337854548702e-06, "loss": 0.9963854789733887, "step": 4175, "token_acc": 0.6848597774936757 }, { "epoch": 0.7253166753427035, "grad_norm": 1.8515625, "learning_rate": 3.8507243910754015e-06, "loss": 1.0020368576049805, "step": 4180, "token_acc": 0.6843103494659816 }, { "epoch": 0.7261842790213431, "grad_norm": 1.8671875, "learning_rate": 3.828120872015193e-06, "loss": 1.0066667556762696, "step": 4185, "token_acc": 0.6836107139967091 }, { "epoch": 0.7270518826999827, "grad_norm": 1.9140625, "learning_rate": 3.8055681744319173e-06, "loss": 1.0011329650878906, "step": 4190, "token_acc": 0.6850806824639539 }, { "epoch": 0.7279194863786222, "grad_norm": 1.8828125, "learning_rate": 3.783066484032615e-06, "loss": 1.0011292457580567, "step": 4195, "token_acc": 0.6820659087561429 }, { "epoch": 0.7287870900572618, "grad_norm": 1.8203125, "learning_rate": 3.7606159861043123e-06, "loss": 1.0115188598632812, "step": 4200, "token_acc": 0.6830158518715865 }, { "epoch": 0.7296546937359014, "grad_norm": 1.8125, "learning_rate": 3.738216865512496e-06, "loss": 0.9878059387207031, "step": 4205, "token_acc": 0.6888757571280839 }, { "epoch": 0.730522297414541, "grad_norm": 1.8046875, "learning_rate": 3.7158693066996066e-06, "loss": 0.9820815086364746, "step": 4210, "token_acc": 0.6891283735961433 }, { "epoch": 0.7313899010931806, "grad_norm": 1.8515625, "learning_rate": 3.69357349368349e-06, "loss": 1.001258373260498, "step": 4215, "token_acc": 0.6836823676196354 }, { "epoch": 0.7322575047718203, "grad_norm": 1.796875, "learning_rate": 3.6713296100559084e-06, "loss": 1.0037827491760254, "step": 4220, "token_acc": 0.6847890011370023 }, { "epoch": 0.7331251084504599, "grad_norm": 1.9296875, "learning_rate": 3.649137838981014e-06, "loss": 0.9784846305847168, "step": 4225, "token_acc": 0.6925998220664197 }, { "epoch": 0.7339927121290994, "grad_norm": 1.9453125, "learning_rate": 3.6269983631938476e-06, "loss": 0.98970947265625, "step": 4230, "token_acc": 0.6865225040519761 }, { "epoch": 0.734860315807739, "grad_norm": 1.828125, "learning_rate": 3.604911364998832e-06, "loss": 1.0065629005432128, "step": 4235, "token_acc": 0.6821462879099767 }, { "epoch": 0.7357279194863786, "grad_norm": 1.8203125, "learning_rate": 3.582877026268269e-06, "loss": 1.0006741523742675, "step": 4240, "token_acc": 0.6835158700622043 }, { "epoch": 0.7365955231650182, "grad_norm": 1.875, "learning_rate": 3.560895528440844e-06, "loss": 0.9968295097351074, "step": 4245, "token_acc": 0.6860261131570137 }, { "epoch": 0.7374631268436578, "grad_norm": 1.8984375, "learning_rate": 3.5389670525201335e-06, "loss": 0.994806957244873, "step": 4250, "token_acc": 0.6844711335861778 }, { "epoch": 0.7383307305222974, "grad_norm": 1.9296875, "learning_rate": 3.5170917790731084e-06, "loss": 0.9853558540344238, "step": 4255, "token_acc": 0.690081677065686 }, { "epoch": 0.7391983342009371, "grad_norm": 1.796875, "learning_rate": 3.4952698882286564e-06, "loss": 1.002675437927246, "step": 4260, "token_acc": 0.6839060402684564 }, { "epoch": 0.7400659378795766, "grad_norm": 1.890625, "learning_rate": 3.473501559676088e-06, "loss": 1.006124496459961, "step": 4265, "token_acc": 0.6798628939749822 }, { "epoch": 0.7409335415582162, "grad_norm": 1.875, "learning_rate": 3.4517869726636667e-06, "loss": 0.9663874626159668, "step": 4270, "token_acc": 0.6933329657757991 }, { "epoch": 0.7418011452368558, "grad_norm": 1.90625, "learning_rate": 3.4301263059971234e-06, "loss": 0.9783464431762695, "step": 4275, "token_acc": 0.690311533509431 }, { "epoch": 0.7426687489154954, "grad_norm": 1.90625, "learning_rate": 3.408519738038202e-06, "loss": 0.9907986640930175, "step": 4280, "token_acc": 0.6858886450905102 }, { "epoch": 0.743536352594135, "grad_norm": 1.8515625, "learning_rate": 3.3869674467031633e-06, "loss": 0.9949624061584472, "step": 4285, "token_acc": 0.6854110544056531 }, { "epoch": 0.7444039562727746, "grad_norm": 1.859375, "learning_rate": 3.3654696094613424e-06, "loss": 1.0062894821166992, "step": 4290, "token_acc": 0.68370965995235 }, { "epoch": 0.7452715599514141, "grad_norm": 1.890625, "learning_rate": 3.3440264033336787e-06, "loss": 0.9806596755981445, "step": 4295, "token_acc": 0.6898993765722411 }, { "epoch": 0.7461391636300538, "grad_norm": 1.859375, "learning_rate": 3.3226380048912586e-06, "loss": 0.9737249374389648, "step": 4300, "token_acc": 0.6907929820819113 }, { "epoch": 0.7470067673086934, "grad_norm": 1.765625, "learning_rate": 3.3013045902538634e-06, "loss": 0.975331974029541, "step": 4305, "token_acc": 0.6905592319015476 }, { "epoch": 0.747874370987333, "grad_norm": 1.9609375, "learning_rate": 3.2800263350885165e-06, "loss": 0.9860298156738281, "step": 4310, "token_acc": 0.6891362690327527 }, { "epoch": 0.7487419746659726, "grad_norm": 1.859375, "learning_rate": 3.2588034146080404e-06, "loss": 0.9883022308349609, "step": 4315, "token_acc": 0.6857588710224575 }, { "epoch": 0.7496095783446122, "grad_norm": 1.828125, "learning_rate": 3.2376360035696085e-06, "loss": 1.0138681411743165, "step": 4320, "token_acc": 0.6818151051185206 }, { "epoch": 0.7504771820232518, "grad_norm": 1.8203125, "learning_rate": 3.216524276273313e-06, "loss": 1.0137529373168945, "step": 4325, "token_acc": 0.6836542657647829 }, { "epoch": 0.7513447857018913, "grad_norm": 1.859375, "learning_rate": 3.1954684065607232e-06, "loss": 0.9806119918823242, "step": 4330, "token_acc": 0.6892710892710893 }, { "epoch": 0.7522123893805309, "grad_norm": 1.8828125, "learning_rate": 3.174468567813461e-06, "loss": 1.0116167068481445, "step": 4335, "token_acc": 0.6821417273014869 }, { "epoch": 0.7530799930591706, "grad_norm": 1.921875, "learning_rate": 3.1535249329517603e-06, "loss": 1.0085960388183595, "step": 4340, "token_acc": 0.6825007871428767 }, { "epoch": 0.7539475967378102, "grad_norm": 1.8984375, "learning_rate": 3.1326376744330667e-06, "loss": 0.9790970802307128, "step": 4345, "token_acc": 0.689821249191562 }, { "epoch": 0.7548152004164498, "grad_norm": 1.8203125, "learning_rate": 3.1118069642505886e-06, "loss": 0.9997638702392578, "step": 4350, "token_acc": 0.6841588385994876 }, { "epoch": 0.7556828040950894, "grad_norm": 1.8125, "learning_rate": 3.0910329739319033e-06, "loss": 0.9993162155151367, "step": 4355, "token_acc": 0.6841426321221009 }, { "epoch": 0.756550407773729, "grad_norm": 1.9140625, "learning_rate": 3.0703158745375316e-06, "loss": 0.9740482330322265, "step": 4360, "token_acc": 0.6929492242406393 }, { "epoch": 0.7574180114523685, "grad_norm": 1.9140625, "learning_rate": 3.0496558366595364e-06, "loss": 0.9911387443542481, "step": 4365, "token_acc": 0.6877242474870587 }, { "epoch": 0.7582856151310081, "grad_norm": 1.8359375, "learning_rate": 3.029053030420115e-06, "loss": 1.001497173309326, "step": 4370, "token_acc": 0.6854735659622271 }, { "epoch": 0.7591532188096477, "grad_norm": 1.8828125, "learning_rate": 3.0085076254701983e-06, "loss": 0.9972357749938965, "step": 4375, "token_acc": 0.6851984268859492 }, { "epoch": 0.7600208224882874, "grad_norm": 1.8125, "learning_rate": 2.988019790988056e-06, "loss": 0.990943431854248, "step": 4380, "token_acc": 0.6880961127665075 }, { "epoch": 0.760888426166927, "grad_norm": 1.8359375, "learning_rate": 2.9675896956778984e-06, "loss": 0.9964810371398926, "step": 4385, "token_acc": 0.6846022286951177 }, { "epoch": 0.7617560298455666, "grad_norm": 1.8671875, "learning_rate": 2.947217507768495e-06, "loss": 0.9866546630859375, "step": 4390, "token_acc": 0.6860718843921116 }, { "epoch": 0.7626236335242061, "grad_norm": 1.796875, "learning_rate": 2.926903395011781e-06, "loss": 0.9983717918395996, "step": 4395, "token_acc": 0.6877645635960492 }, { "epoch": 0.7634912372028457, "grad_norm": 1.875, "learning_rate": 2.9066475246814828e-06, "loss": 1.0109498977661133, "step": 4400, "token_acc": 0.6828291696597227 }, { "epoch": 0.7643588408814853, "grad_norm": 1.8046875, "learning_rate": 2.886450063571735e-06, "loss": 0.9692567825317383, "step": 4405, "token_acc": 0.6919382320189973 }, { "epoch": 0.7652264445601249, "grad_norm": 1.890625, "learning_rate": 2.86631117799571e-06, "loss": 0.9896286964416504, "step": 4410, "token_acc": 0.6858808026192449 }, { "epoch": 0.7660940482387645, "grad_norm": 1.8515625, "learning_rate": 2.8462310337842523e-06, "loss": 0.9920248031616211, "step": 4415, "token_acc": 0.6869760785115518 }, { "epoch": 0.7669616519174042, "grad_norm": 1.921875, "learning_rate": 2.8262097962845058e-06, "loss": 1.0015531539916993, "step": 4420, "token_acc": 0.6852368826004658 }, { "epoch": 0.7678292555960438, "grad_norm": 1.8671875, "learning_rate": 2.806247630358554e-06, "loss": 1.0034663200378418, "step": 4425, "token_acc": 0.684781729446976 }, { "epoch": 0.7686968592746833, "grad_norm": 1.8828125, "learning_rate": 2.7863447003820642e-06, "loss": 0.9939127922058105, "step": 4430, "token_acc": 0.6864493951901668 }, { "epoch": 0.7695644629533229, "grad_norm": 1.84375, "learning_rate": 2.7665011702429357e-06, "loss": 0.9952418327331543, "step": 4435, "token_acc": 0.6857203881939902 }, { "epoch": 0.7704320666319625, "grad_norm": 1.890625, "learning_rate": 2.746717203339946e-06, "loss": 0.9777667045593261, "step": 4440, "token_acc": 0.6909912776054509 }, { "epoch": 0.7712996703106021, "grad_norm": 1.8671875, "learning_rate": 2.7269929625814085e-06, "loss": 1.0063211441040039, "step": 4445, "token_acc": 0.6833169581450187 }, { "epoch": 0.7721672739892417, "grad_norm": 1.859375, "learning_rate": 2.7073286103838293e-06, "loss": 1.0100595474243164, "step": 4450, "token_acc": 0.6820285638719915 }, { "epoch": 0.7730348776678813, "grad_norm": 1.9140625, "learning_rate": 2.6877243086705716e-06, "loss": 0.9833673477172852, "step": 4455, "token_acc": 0.6882294325611871 }, { "epoch": 0.773902481346521, "grad_norm": 1.859375, "learning_rate": 2.6681802188705196e-06, "loss": 0.9901654243469238, "step": 4460, "token_acc": 0.6868834587465766 }, { "epoch": 0.7747700850251605, "grad_norm": 1.921875, "learning_rate": 2.6486965019167544e-06, "loss": 0.9956707000732422, "step": 4465, "token_acc": 0.6848234865946864 }, { "epoch": 0.7756376887038001, "grad_norm": 1.8515625, "learning_rate": 2.629273318245219e-06, "loss": 0.9965853691101074, "step": 4470, "token_acc": 0.6839174626010737 }, { "epoch": 0.7765052923824397, "grad_norm": 1.890625, "learning_rate": 2.6099108277934105e-06, "loss": 0.9861255645751953, "step": 4475, "token_acc": 0.6883442417490601 }, { "epoch": 0.7773728960610793, "grad_norm": 1.9453125, "learning_rate": 2.590609189999049e-06, "loss": 1.0013001441955567, "step": 4480, "token_acc": 0.6833337796941535 }, { "epoch": 0.7782404997397189, "grad_norm": 1.9453125, "learning_rate": 2.5713685637987818e-06, "loss": 0.9976703643798828, "step": 4485, "token_acc": 0.6877284595300261 }, { "epoch": 0.7791081034183585, "grad_norm": 1.953125, "learning_rate": 2.5521891076268555e-06, "loss": 0.9790729522705078, "step": 4490, "token_acc": 0.6894264797255315 }, { "epoch": 0.779975707096998, "grad_norm": 1.765625, "learning_rate": 2.5330709794138254e-06, "loss": 0.9921565055847168, "step": 4495, "token_acc": 0.684914119045047 }, { "epoch": 0.7808433107756377, "grad_norm": 1.84375, "learning_rate": 2.5140143365852476e-06, "loss": 0.9999216079711915, "step": 4500, "token_acc": 0.6869082423624708 }, { "epoch": 0.7817109144542773, "grad_norm": 1.828125, "learning_rate": 2.4950193360603868e-06, "loss": 0.9970880508422851, "step": 4505, "token_acc": 0.6865171230142891 }, { "epoch": 0.7825785181329169, "grad_norm": 1.8828125, "learning_rate": 2.4760861342509235e-06, "loss": 0.9840543746948243, "step": 4510, "token_acc": 0.6897681822438032 }, { "epoch": 0.7834461218115565, "grad_norm": 1.8203125, "learning_rate": 2.4572148870596636e-06, "loss": 1.0138338088989258, "step": 4515, "token_acc": 0.6816099820996104 }, { "epoch": 0.7843137254901961, "grad_norm": 1.84375, "learning_rate": 2.438405749879258e-06, "loss": 1.0032525062561035, "step": 4520, "token_acc": 0.6844471121782046 }, { "epoch": 0.7851813291688357, "grad_norm": 1.8359375, "learning_rate": 2.4196588775909204e-06, "loss": 1.0106260299682617, "step": 4525, "token_acc": 0.6811335272101303 }, { "epoch": 0.7860489328474752, "grad_norm": 1.84375, "learning_rate": 2.4009744245631515e-06, "loss": 0.9920726776123047, "step": 4530, "token_acc": 0.6867518931683088 }, { "epoch": 0.7869165365261148, "grad_norm": 1.890625, "learning_rate": 2.3823525446504735e-06, "loss": 0.985197639465332, "step": 4535, "token_acc": 0.6879341219882787 }, { "epoch": 0.7877841402047545, "grad_norm": 1.9453125, "learning_rate": 2.363793391192155e-06, "loss": 0.9904392242431641, "step": 4540, "token_acc": 0.6878958715534904 }, { "epoch": 0.7886517438833941, "grad_norm": 1.8984375, "learning_rate": 2.345297117010954e-06, "loss": 0.9871200561523438, "step": 4545, "token_acc": 0.687506753106429 }, { "epoch": 0.7895193475620337, "grad_norm": 1.8359375, "learning_rate": 2.3268638744118555e-06, "loss": 0.9928851127624512, "step": 4550, "token_acc": 0.6844927026075655 }, { "epoch": 0.7903869512406733, "grad_norm": 1.8046875, "learning_rate": 2.308493815180827e-06, "loss": 0.974305534362793, "step": 4555, "token_acc": 0.6914228654424733 }, { "epoch": 0.7912545549193128, "grad_norm": 1.8359375, "learning_rate": 2.2901870905835533e-06, "loss": 1.0047635078430175, "step": 4560, "token_acc": 0.6855282218262464 }, { "epoch": 0.7921221585979524, "grad_norm": 1.8828125, "learning_rate": 2.2719438513642023e-06, "loss": 1.0162674903869628, "step": 4565, "token_acc": 0.6807637282560736 }, { "epoch": 0.792989762276592, "grad_norm": 1.859375, "learning_rate": 2.25376424774418e-06, "loss": 1.004638671875, "step": 4570, "token_acc": 0.6815402254920696 }, { "epoch": 0.7938573659552316, "grad_norm": 1.8515625, "learning_rate": 2.2356484294208945e-06, "loss": 0.9928275108337402, "step": 4575, "token_acc": 0.6833461637156787 }, { "epoch": 0.7947249696338713, "grad_norm": 1.8359375, "learning_rate": 2.2175965455665225e-06, "loss": 0.9946788787841797, "step": 4580, "token_acc": 0.6848246423935879 }, { "epoch": 0.7955925733125109, "grad_norm": 1.8515625, "learning_rate": 2.1996087448267813e-06, "loss": 0.9975082397460937, "step": 4585, "token_acc": 0.6866429591314421 }, { "epoch": 0.7964601769911505, "grad_norm": 1.8828125, "learning_rate": 2.1816851753197023e-06, "loss": 0.9974835395812989, "step": 4590, "token_acc": 0.6866237463087606 }, { "epoch": 0.79732778066979, "grad_norm": 1.890625, "learning_rate": 2.163825984634419e-06, "loss": 1.0059508323669433, "step": 4595, "token_acc": 0.681298324742268 }, { "epoch": 0.7981953843484296, "grad_norm": 1.859375, "learning_rate": 2.146031319829942e-06, "loss": 0.9988635063171387, "step": 4600, "token_acc": 0.6838742790766779 }, { "epoch": 0.7990629880270692, "grad_norm": 1.765625, "learning_rate": 2.1283013274339535e-06, "loss": 0.9845050811767578, "step": 4605, "token_acc": 0.6879293681268448 }, { "epoch": 0.7999305917057088, "grad_norm": 1.921875, "learning_rate": 2.110636153441602e-06, "loss": 0.9597654342651367, "step": 4610, "token_acc": 0.696718661601875 }, { "epoch": 0.8007981953843484, "grad_norm": 1.875, "learning_rate": 2.0930359433142934e-06, "loss": 1.0043936729431153, "step": 4615, "token_acc": 0.6844164224450837 }, { "epoch": 0.8016657990629881, "grad_norm": 1.8359375, "learning_rate": 2.0755008419785037e-06, "loss": 1.0047181129455567, "step": 4620, "token_acc": 0.6838641217477737 }, { "epoch": 0.8025334027416277, "grad_norm": 1.90625, "learning_rate": 2.058030993824577e-06, "loss": 0.9849211692810058, "step": 4625, "token_acc": 0.6874467883833129 }, { "epoch": 0.8034010064202672, "grad_norm": 1.953125, "learning_rate": 2.040626542705536e-06, "loss": 0.9540246963500977, "step": 4630, "token_acc": 0.6967229009113826 }, { "epoch": 0.8042686100989068, "grad_norm": 1.8046875, "learning_rate": 2.023287631935904e-06, "loss": 1.00880708694458, "step": 4635, "token_acc": 0.6815457263858633 }, { "epoch": 0.8051362137775464, "grad_norm": 1.6796875, "learning_rate": 2.0060144042905227e-06, "loss": 0.9796277999877929, "step": 4640, "token_acc": 0.690756012376665 }, { "epoch": 0.806003817456186, "grad_norm": 1.8984375, "learning_rate": 1.9888070020033713e-06, "loss": 0.9896170616149902, "step": 4645, "token_acc": 0.6856324413853025 }, { "epoch": 0.8068714211348256, "grad_norm": 1.828125, "learning_rate": 1.971665566766401e-06, "loss": 1.0029498100280763, "step": 4650, "token_acc": 0.6831084917137528 }, { "epoch": 0.8077390248134652, "grad_norm": 1.9921875, "learning_rate": 1.954590239728369e-06, "loss": 0.9786740303039551, "step": 4655, "token_acc": 0.6886030063097072 }, { "epoch": 0.8086066284921049, "grad_norm": 1.9296875, "learning_rate": 1.9375811614936703e-06, "loss": 1.0019638061523437, "step": 4660, "token_acc": 0.6835257720567924 }, { "epoch": 0.8094742321707444, "grad_norm": 1.8984375, "learning_rate": 1.9206384721211847e-06, "loss": 0.9825675010681152, "step": 4665, "token_acc": 0.6896243896500885 }, { "epoch": 0.810341835849384, "grad_norm": 1.875, "learning_rate": 1.9037623111231229e-06, "loss": 1.002269172668457, "step": 4670, "token_acc": 0.6822305407169595 }, { "epoch": 0.8112094395280236, "grad_norm": 1.921875, "learning_rate": 1.8869528174638752e-06, "loss": 0.9967728614807129, "step": 4675, "token_acc": 0.683384136015715 }, { "epoch": 0.8120770432066632, "grad_norm": 1.875, "learning_rate": 1.8702101295588714e-06, "loss": 0.9936102867126465, "step": 4680, "token_acc": 0.6876304142688162 }, { "epoch": 0.8129446468853028, "grad_norm": 1.8359375, "learning_rate": 1.8535343852734333e-06, "loss": 0.9896058082580567, "step": 4685, "token_acc": 0.6851116625310174 }, { "epoch": 0.8138122505639424, "grad_norm": 1.890625, "learning_rate": 1.8369257219216563e-06, "loss": 0.999512004852295, "step": 4690, "token_acc": 0.6847072393860351 }, { "epoch": 0.8146798542425819, "grad_norm": 1.90625, "learning_rate": 1.8203842762652546e-06, "loss": 0.9870369911193848, "step": 4695, "token_acc": 0.6873048561748488 }, { "epoch": 0.8155474579212216, "grad_norm": 1.8828125, "learning_rate": 1.8039101845124552e-06, "loss": 1.0059050559997558, "step": 4700, "token_acc": 0.6838551420357133 }, { "epoch": 0.8164150615998612, "grad_norm": 1.8515625, "learning_rate": 1.7875035823168641e-06, "loss": 1.0021234512329102, "step": 4705, "token_acc": 0.6836149967576725 }, { "epoch": 0.8172826652785008, "grad_norm": 1.8671875, "learning_rate": 1.7711646047763586e-06, "loss": 1.0115555763244628, "step": 4710, "token_acc": 0.6826202404154295 }, { "epoch": 0.8181502689571404, "grad_norm": 1.8828125, "learning_rate": 1.7548933864319661e-06, "loss": 0.9789422035217286, "step": 4715, "token_acc": 0.6882753373099447 }, { "epoch": 0.81901787263578, "grad_norm": 1.8046875, "learning_rate": 1.7386900612667635e-06, "loss": 0.992159366607666, "step": 4720, "token_acc": 0.6854386416259326 }, { "epoch": 0.8198854763144195, "grad_norm": 1.875, "learning_rate": 1.722554762704769e-06, "loss": 0.9974750518798828, "step": 4725, "token_acc": 0.6872423661616824 }, { "epoch": 0.8207530799930591, "grad_norm": 1.921875, "learning_rate": 1.706487623609846e-06, "loss": 1.0016369819641113, "step": 4730, "token_acc": 0.6850419346958717 }, { "epoch": 0.8216206836716987, "grad_norm": 1.7890625, "learning_rate": 1.6904887762846068e-06, "loss": 0.9975146293640137, "step": 4735, "token_acc": 0.6852138558677419 }, { "epoch": 0.8224882873503384, "grad_norm": 1.796875, "learning_rate": 1.6745583524693275e-06, "loss": 0.9930521965026855, "step": 4740, "token_acc": 0.6858060739712228 }, { "epoch": 0.823355891028978, "grad_norm": 1.828125, "learning_rate": 1.658696483340858e-06, "loss": 0.990367317199707, "step": 4745, "token_acc": 0.690088659520974 }, { "epoch": 0.8242234947076176, "grad_norm": 1.890625, "learning_rate": 1.6429032995115446e-06, "loss": 0.9985919952392578, "step": 4750, "token_acc": 0.6834251915580051 }, { "epoch": 0.8250910983862572, "grad_norm": 1.859375, "learning_rate": 1.6271789310281515e-06, "loss": 0.9976622581481933, "step": 4755, "token_acc": 0.6856444289207125 }, { "epoch": 0.8259587020648967, "grad_norm": 1.8984375, "learning_rate": 1.6115235073708024e-06, "loss": 0.9920053482055664, "step": 4760, "token_acc": 0.6863879817112998 }, { "epoch": 0.8268263057435363, "grad_norm": 1.8984375, "learning_rate": 1.5959371574518934e-06, "loss": 1.0084431648254395, "step": 4765, "token_acc": 0.6817347253306887 }, { "epoch": 0.8276939094221759, "grad_norm": 1.84375, "learning_rate": 1.580420009615048e-06, "loss": 0.9829930305480957, "step": 4770, "token_acc": 0.690848938000397 }, { "epoch": 0.8285615131008155, "grad_norm": 1.8359375, "learning_rate": 1.564972191634051e-06, "loss": 0.9950210571289062, "step": 4775, "token_acc": 0.6865175616096914 }, { "epoch": 0.8294291167794552, "grad_norm": 1.859375, "learning_rate": 1.5495938307118052e-06, "loss": 0.9901968002319336, "step": 4780, "token_acc": 0.686112085480694 }, { "epoch": 0.8302967204580948, "grad_norm": 1.8203125, "learning_rate": 1.5342850534792753e-06, "loss": 0.9941259384155273, "step": 4785, "token_acc": 0.6846874957604699 }, { "epoch": 0.8311643241367344, "grad_norm": 1.8203125, "learning_rate": 1.5190459859944506e-06, "loss": 0.9872735977172852, "step": 4790, "token_acc": 0.6862535758770308 }, { "epoch": 0.8320319278153739, "grad_norm": 1.921875, "learning_rate": 1.5038767537413035e-06, "loss": 0.9889012336730957, "step": 4795, "token_acc": 0.6895972043893206 }, { "epoch": 0.8328995314940135, "grad_norm": 1.7890625, "learning_rate": 1.4887774816287604e-06, "loss": 0.9911365509033203, "step": 4800, "token_acc": 0.6869375511317153 }, { "epoch": 0.8337671351726531, "grad_norm": 1.8515625, "learning_rate": 1.4737482939896675e-06, "loss": 1.0037782669067383, "step": 4805, "token_acc": 0.6832780223501523 }, { "epoch": 0.8346347388512927, "grad_norm": 1.796875, "learning_rate": 1.4587893145797738e-06, "loss": 0.9940081596374511, "step": 4810, "token_acc": 0.6838530744774403 }, { "epoch": 0.8355023425299323, "grad_norm": 1.8359375, "learning_rate": 1.4439006665767042e-06, "loss": 0.9779527664184571, "step": 4815, "token_acc": 0.6904385373836274 }, { "epoch": 0.836369946208572, "grad_norm": 1.9609375, "learning_rate": 1.4290824725789542e-06, "loss": 0.9910070419311523, "step": 4820, "token_acc": 0.6858737818147412 }, { "epoch": 0.8372375498872116, "grad_norm": 1.859375, "learning_rate": 1.4143348546048706e-06, "loss": 1.0061234474182128, "step": 4825, "token_acc": 0.6816261604255763 }, { "epoch": 0.8381051535658511, "grad_norm": 1.90625, "learning_rate": 1.3996579340916583e-06, "loss": 1.0056955337524414, "step": 4830, "token_acc": 0.6805094883366865 }, { "epoch": 0.8389727572444907, "grad_norm": 1.875, "learning_rate": 1.3850518318943685e-06, "loss": 0.9870254516601562, "step": 4835, "token_acc": 0.6884185128317614 }, { "epoch": 0.8398403609231303, "grad_norm": 1.78125, "learning_rate": 1.3705166682849103e-06, "loss": 0.9996889114379883, "step": 4840, "token_acc": 0.6827413811061348 }, { "epoch": 0.8407079646017699, "grad_norm": 1.796875, "learning_rate": 1.3560525629510567e-06, "loss": 1.0011041641235352, "step": 4845, "token_acc": 0.6841137322872151 }, { "epoch": 0.8415755682804095, "grad_norm": 1.8671875, "learning_rate": 1.341659634995467e-06, "loss": 0.9982816696166992, "step": 4850, "token_acc": 0.6866214715232295 }, { "epoch": 0.842443171959049, "grad_norm": 1.8671875, "learning_rate": 1.327338002934695e-06, "loss": 1.0017461776733398, "step": 4855, "token_acc": 0.6838751233417388 }, { "epoch": 0.8433107756376887, "grad_norm": 1.9453125, "learning_rate": 1.3130877846982204e-06, "loss": 0.9614505767822266, "step": 4860, "token_acc": 0.6950054819491962 }, { "epoch": 0.8441783793163283, "grad_norm": 1.8984375, "learning_rate": 1.2989090976274765e-06, "loss": 1.0008953094482422, "step": 4865, "token_acc": 0.6828094757789712 }, { "epoch": 0.8450459829949679, "grad_norm": 1.7890625, "learning_rate": 1.28480205847488e-06, "loss": 0.987119197845459, "step": 4870, "token_acc": 0.6882399580718422 }, { "epoch": 0.8459135866736075, "grad_norm": 1.8125, "learning_rate": 1.2707667834028782e-06, "loss": 1.0048983573913575, "step": 4875, "token_acc": 0.6831559340074508 }, { "epoch": 0.8467811903522471, "grad_norm": 1.8984375, "learning_rate": 1.256803387982981e-06, "loss": 0.9872228622436523, "step": 4880, "token_acc": 0.6871233979735624 }, { "epoch": 0.8476487940308867, "grad_norm": 1.921875, "learning_rate": 1.2429119871948203e-06, "loss": 0.9801000595092774, "step": 4885, "token_acc": 0.690729556130764 }, { "epoch": 0.8485163977095262, "grad_norm": 1.8203125, "learning_rate": 1.2290926954251937e-06, "loss": 0.9848250389099121, "step": 4890, "token_acc": 0.6876951737632853 }, { "epoch": 0.8493840013881658, "grad_norm": 1.921875, "learning_rate": 1.2153456264671337e-06, "loss": 0.986370849609375, "step": 4895, "token_acc": 0.6873767258382643 }, { "epoch": 0.8502516050668055, "grad_norm": 1.8828125, "learning_rate": 1.2016708935189591e-06, "loss": 0.9943758010864258, "step": 4900, "token_acc": 0.6850112466771181 }, { "epoch": 0.8511192087454451, "grad_norm": 1.828125, "learning_rate": 1.1880686091833482e-06, "loss": 1.000884437561035, "step": 4905, "token_acc": 0.683370710159701 }, { "epoch": 0.8519868124240847, "grad_norm": 1.8359375, "learning_rate": 1.174538885466412e-06, "loss": 0.9865160942077636, "step": 4910, "token_acc": 0.6890235069467308 }, { "epoch": 0.8528544161027243, "grad_norm": 1.828125, "learning_rate": 1.1610818337767716e-06, "loss": 0.9991436004638672, "step": 4915, "token_acc": 0.6858561584726297 }, { "epoch": 0.8537220197813639, "grad_norm": 1.765625, "learning_rate": 1.147697564924639e-06, "loss": 0.9727308273315429, "step": 4920, "token_acc": 0.6912715446298077 }, { "epoch": 0.8545896234600034, "grad_norm": 1.8515625, "learning_rate": 1.1343861891209106e-06, "loss": 1.0181291580200196, "step": 4925, "token_acc": 0.6820682501542784 }, { "epoch": 0.855457227138643, "grad_norm": 1.8359375, "learning_rate": 1.121147815976248e-06, "loss": 1.0026049613952637, "step": 4930, "token_acc": 0.683598010267869 }, { "epoch": 0.8563248308172826, "grad_norm": 1.7734375, "learning_rate": 1.1079825545001887e-06, "loss": 0.9865102767944336, "step": 4935, "token_acc": 0.6898536402969927 }, { "epoch": 0.8571924344959223, "grad_norm": 1.9140625, "learning_rate": 1.0948905131002407e-06, "loss": 1.0127381324768066, "step": 4940, "token_acc": 0.6825190010857763 }, { "epoch": 0.8580600381745619, "grad_norm": 1.84375, "learning_rate": 1.081871799580989e-06, "loss": 0.9863951683044434, "step": 4945, "token_acc": 0.6883122286792139 }, { "epoch": 0.8589276418532015, "grad_norm": 1.8828125, "learning_rate": 1.0689265211432132e-06, "loss": 0.9868002891540527, "step": 4950, "token_acc": 0.6885490091767522 }, { "epoch": 0.8597952455318411, "grad_norm": 1.75, "learning_rate": 1.0560547843830016e-06, "loss": 0.9947976112365723, "step": 4955, "token_acc": 0.6865387356336733 }, { "epoch": 0.8606628492104806, "grad_norm": 1.859375, "learning_rate": 1.0432566952908696e-06, "loss": 1.0024614334106445, "step": 4960, "token_acc": 0.683781453319746 }, { "epoch": 0.8615304528891202, "grad_norm": 1.8359375, "learning_rate": 1.030532359250901e-06, "loss": 0.9844224929809571, "step": 4965, "token_acc": 0.6905729592779288 }, { "epoch": 0.8623980565677598, "grad_norm": 1.921875, "learning_rate": 1.0178818810398616e-06, "loss": 1.004835605621338, "step": 4970, "token_acc": 0.6851532852387675 }, { "epoch": 0.8632656602463994, "grad_norm": 1.890625, "learning_rate": 1.0053053648263477e-06, "loss": 0.9801043510437012, "step": 4975, "token_acc": 0.6884155757432423 }, { "epoch": 0.8641332639250391, "grad_norm": 1.96875, "learning_rate": 9.92802914169927e-07, "loss": 0.9909211158752441, "step": 4980, "token_acc": 0.6887015132838522 }, { "epoch": 0.8650008676036787, "grad_norm": 1.9296875, "learning_rate": 9.803746320202812e-07, "loss": 1.0029238700866698, "step": 4985, "token_acc": 0.6833788400406263 }, { "epoch": 0.8658684712823183, "grad_norm": 1.8046875, "learning_rate": 9.680206207163666e-07, "loss": 0.990473747253418, "step": 4990, "token_acc": 0.6859917435513812 }, { "epoch": 0.8667360749609578, "grad_norm": 1.8671875, "learning_rate": 9.557409819855645e-07, "loss": 0.9845627784729004, "step": 4995, "token_acc": 0.6892265928567627 }, { "epoch": 0.8676036786395974, "grad_norm": 1.7890625, "learning_rate": 9.435358169428444e-07, "loss": 0.9981782913208008, "step": 5000, "token_acc": 0.6857057648919893 } ], "logging_steps": 5, "max_steps": 5763, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.988905759887589e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }