{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 29709, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.0913017302751542, "epoch": 0.003029614481557222, "grad_norm": 1.1931798458099365, "learning_rate": 3.634894991922456e-07, "loss": 1.7418, "mean_token_accuracy": 0.7027995571494102, "num_tokens": 676413.0, "step": 10 }, { "entropy": 1.0921823412179947, "epoch": 0.006059228963114444, "grad_norm": 1.1370843648910522, "learning_rate": 7.673667205169629e-07, "loss": 1.7408, "mean_token_accuracy": 0.7032344728708267, "num_tokens": 1360739.0, "step": 20 }, { "entropy": 1.0819210216403008, "epoch": 0.009088843444671665, "grad_norm": 0.9659048914909363, "learning_rate": 1.17124394184168e-06, "loss": 1.7275, "mean_token_accuracy": 0.7082073956727981, "num_tokens": 2039094.0, "step": 30 }, { "entropy": 1.0838943645358086, "epoch": 0.012118457926228888, "grad_norm": 1.0033633708953857, "learning_rate": 1.5751211631663977e-06, "loss": 1.6918, "mean_token_accuracy": 0.709006880223751, "num_tokens": 2724293.0, "step": 40 }, { "entropy": 1.0841404393315315, "epoch": 0.015148072407786109, "grad_norm": 0.976797342300415, "learning_rate": 1.978998384491115e-06, "loss": 1.628, "mean_token_accuracy": 0.7091029062867165, "num_tokens": 3410092.0, "step": 50 }, { "entropy": 1.0670536786317826, "epoch": 0.01817768688934333, "grad_norm": 0.8120208978652954, "learning_rate": 2.382875605815832e-06, "loss": 1.5408, "mean_token_accuracy": 0.7167265444993973, "num_tokens": 4097508.0, "step": 60 }, { "entropy": 1.0734323486685753, "epoch": 0.02120730137090055, "grad_norm": 0.4978794753551483, "learning_rate": 2.7867528271405497e-06, "loss": 1.4942, "mean_token_accuracy": 0.7221754133701325, "num_tokens": 4763424.0, "step": 70 }, { "entropy": 1.0327554315328598, "epoch": 0.024236915852457776, "grad_norm": 0.3606416583061218, "learning_rate": 3.1906300484652665e-06, "loss": 1.4376, "mean_token_accuracy": 0.7297119319438934, "num_tokens": 5435268.0, "step": 80 }, { "entropy": 1.024299743771553, "epoch": 0.027266530334014997, "grad_norm": 0.3630509674549103, "learning_rate": 3.5945072697899836e-06, "loss": 1.4181, "mean_token_accuracy": 0.7321271970868111, "num_tokens": 6107570.0, "step": 90 }, { "entropy": 1.0179613307118416, "epoch": 0.030296144815572218, "grad_norm": 0.31415602564811707, "learning_rate": 3.998384491114701e-06, "loss": 1.4065, "mean_token_accuracy": 0.7293723702430726, "num_tokens": 6767478.0, "step": 100 }, { "entropy": 0.9994221851229668, "epoch": 0.03332575929712944, "grad_norm": 0.34493952989578247, "learning_rate": 4.402261712439419e-06, "loss": 1.3978, "mean_token_accuracy": 0.7380139917135239, "num_tokens": 7444132.0, "step": 110 }, { "entropy": 0.9911895662546157, "epoch": 0.03635537377868666, "grad_norm": 0.33052828907966614, "learning_rate": 4.806138933764136e-06, "loss": 1.3869, "mean_token_accuracy": 0.7341301307082176, "num_tokens": 8101986.0, "step": 120 }, { "entropy": 0.9866934418678284, "epoch": 0.039384988260243885, "grad_norm": 0.4284345805644989, "learning_rate": 5.210016155088853e-06, "loss": 1.3799, "mean_token_accuracy": 0.7413432449102402, "num_tokens": 8777186.0, "step": 130 }, { "entropy": 0.9676428496837616, "epoch": 0.0424146027418011, "grad_norm": 0.35278987884521484, "learning_rate": 5.61389337641357e-06, "loss": 1.3681, "mean_token_accuracy": 0.7491588592529297, "num_tokens": 9463550.0, "step": 140 }, { "entropy": 0.9714192464947701, "epoch": 0.04544421722335833, "grad_norm": 0.4610103666782379, "learning_rate": 6.017770597738288e-06, "loss": 1.3639, "mean_token_accuracy": 0.7401145175099373, "num_tokens": 10130428.0, "step": 150 }, { "entropy": 0.95505460947752, "epoch": 0.04847383170491555, "grad_norm": 0.3843373656272888, "learning_rate": 6.421647819063005e-06, "loss": 1.3449, "mean_token_accuracy": 0.7504446610808373, "num_tokens": 10828958.0, "step": 160 }, { "entropy": 0.9737901806831359, "epoch": 0.05150344618647277, "grad_norm": 0.5503286123275757, "learning_rate": 6.825525040387723e-06, "loss": 1.3669, "mean_token_accuracy": 0.7403925940394401, "num_tokens": 11508760.0, "step": 170 }, { "entropy": 0.9536378592252731, "epoch": 0.054533060668029994, "grad_norm": 0.5051810145378113, "learning_rate": 7.22940226171244e-06, "loss": 1.3484, "mean_token_accuracy": 0.742230424284935, "num_tokens": 12165955.0, "step": 180 }, { "entropy": 0.9639706507325172, "epoch": 0.05756267514958722, "grad_norm": 0.40541717410087585, "learning_rate": 7.633279483037158e-06, "loss": 1.3539, "mean_token_accuracy": 0.7430707395076752, "num_tokens": 12853601.0, "step": 190 }, { "entropy": 0.9519797936081886, "epoch": 0.060592289631144436, "grad_norm": 0.535460352897644, "learning_rate": 8.037156704361875e-06, "loss": 1.35, "mean_token_accuracy": 0.7496364802122116, "num_tokens": 13539737.0, "step": 200 }, { "entropy": 0.9520228564739227, "epoch": 0.06362190411270166, "grad_norm": 0.6223520040512085, "learning_rate": 8.441033925686592e-06, "loss": 1.3474, "mean_token_accuracy": 0.7414789229631424, "num_tokens": 14212727.0, "step": 210 }, { "entropy": 0.9600193619728088, "epoch": 0.06665151859425889, "grad_norm": 0.342178612947464, "learning_rate": 8.844911147011308e-06, "loss": 1.3543, "mean_token_accuracy": 0.7420378759503364, "num_tokens": 14883203.0, "step": 220 }, { "entropy": 0.9429377406835556, "epoch": 0.0696811330758161, "grad_norm": 0.4176937937736511, "learning_rate": 9.248788368336027e-06, "loss": 1.3472, "mean_token_accuracy": 0.7506941795349121, "num_tokens": 15569666.0, "step": 230 }, { "entropy": 0.9486132815480233, "epoch": 0.07271074755737332, "grad_norm": 0.28106167912483215, "learning_rate": 9.652665589660744e-06, "loss": 1.3432, "mean_token_accuracy": 0.743465180695057, "num_tokens": 16248620.0, "step": 240 }, { "entropy": 0.9563860490918159, "epoch": 0.07574036203893054, "grad_norm": 0.5263408422470093, "learning_rate": 1.0056542810985462e-05, "loss": 1.3492, "mean_token_accuracy": 0.7460156157612801, "num_tokens": 16936825.0, "step": 250 }, { "entropy": 0.9662684842944145, "epoch": 0.07876997652048777, "grad_norm": 0.4372612535953522, "learning_rate": 1.0460420032310179e-05, "loss": 1.3627, "mean_token_accuracy": 0.7397747457027435, "num_tokens": 17614709.0, "step": 260 }, { "entropy": 0.9495426908135414, "epoch": 0.081799591002045, "grad_norm": 0.3195766508579254, "learning_rate": 1.0864297253634896e-05, "loss": 1.3494, "mean_token_accuracy": 0.7455736428499222, "num_tokens": 18296040.0, "step": 270 }, { "entropy": 0.946918123960495, "epoch": 0.0848292054836022, "grad_norm": 0.7261886596679688, "learning_rate": 1.1268174474959612e-05, "loss": 1.3465, "mean_token_accuracy": 0.7500029459595681, "num_tokens": 18979248.0, "step": 280 }, { "entropy": 0.951644915342331, "epoch": 0.08785881996515943, "grad_norm": 0.33003976941108704, "learning_rate": 1.1672051696284331e-05, "loss": 1.357, "mean_token_accuracy": 0.7410220310091973, "num_tokens": 19635598.0, "step": 290 }, { "entropy": 0.9348028033971787, "epoch": 0.09088843444671665, "grad_norm": 0.3807949423789978, "learning_rate": 1.2075928917609048e-05, "loss": 1.3454, "mean_token_accuracy": 0.7490146473050118, "num_tokens": 20307786.0, "step": 300 }, { "entropy": 0.9427580952644348, "epoch": 0.09391804892827388, "grad_norm": 0.3040551543235779, "learning_rate": 1.2479806138933766e-05, "loss": 1.3458, "mean_token_accuracy": 0.7427674695849419, "num_tokens": 20962399.0, "step": 310 }, { "entropy": 0.9372789472341537, "epoch": 0.0969476634098311, "grad_norm": 0.30593565106391907, "learning_rate": 1.2883683360258483e-05, "loss": 1.3477, "mean_token_accuracy": 0.744150060415268, "num_tokens": 21625782.0, "step": 320 }, { "entropy": 0.9325996667146683, "epoch": 0.09997727789138833, "grad_norm": 0.3430718779563904, "learning_rate": 1.32875605815832e-05, "loss": 1.3373, "mean_token_accuracy": 0.7520561441779137, "num_tokens": 22308600.0, "step": 330 }, { "entropy": 0.936019217967987, "epoch": 0.10300689237294554, "grad_norm": 0.31659775972366333, "learning_rate": 1.3691437802907916e-05, "loss": 1.3405, "mean_token_accuracy": 0.7451581284403801, "num_tokens": 22985019.0, "step": 340 }, { "entropy": 0.9315294235944748, "epoch": 0.10603650685450276, "grad_norm": 0.3390049636363983, "learning_rate": 1.4095315024232633e-05, "loss": 1.3404, "mean_token_accuracy": 0.7504669681191445, "num_tokens": 23663278.0, "step": 350 }, { "entropy": 0.9329710841178894, "epoch": 0.10906612133605999, "grad_norm": 0.27660778164863586, "learning_rate": 1.4499192245557353e-05, "loss": 1.3304, "mean_token_accuracy": 0.7510873705148697, "num_tokens": 24349520.0, "step": 360 }, { "entropy": 0.936093220114708, "epoch": 0.11209573581761721, "grad_norm": 0.37499693036079407, "learning_rate": 1.4903069466882068e-05, "loss": 1.3343, "mean_token_accuracy": 0.7447810202836991, "num_tokens": 25013352.0, "step": 370 }, { "entropy": 0.9364145249128342, "epoch": 0.11512535029917444, "grad_norm": 0.38497716188430786, "learning_rate": 1.5306946688206785e-05, "loss": 1.3386, "mean_token_accuracy": 0.7500496730208397, "num_tokens": 25701865.0, "step": 380 }, { "entropy": 0.9282684996724129, "epoch": 0.11815496478073165, "grad_norm": 0.3929134011268616, "learning_rate": 1.5710823909531502e-05, "loss": 1.3294, "mean_token_accuracy": 0.756112988293171, "num_tokens": 26409204.0, "step": 390 }, { "entropy": 0.9325812742114067, "epoch": 0.12118457926228887, "grad_norm": 0.2611294686794281, "learning_rate": 1.6114701130856222e-05, "loss": 1.3342, "mean_token_accuracy": 0.7490811735391617, "num_tokens": 27089751.0, "step": 400 }, { "entropy": 0.9268250733613967, "epoch": 0.1242141937438461, "grad_norm": 0.3077852427959442, "learning_rate": 1.651857835218094e-05, "loss": 1.3269, "mean_token_accuracy": 0.7516962170600892, "num_tokens": 27770172.0, "step": 410 }, { "entropy": 0.9232483997941017, "epoch": 0.12724380822540332, "grad_norm": 0.40509089827537537, "learning_rate": 1.6922455573505656e-05, "loss": 1.3298, "mean_token_accuracy": 0.7501018136739731, "num_tokens": 28446839.0, "step": 420 }, { "entropy": 0.9298500493168831, "epoch": 0.13027342270696055, "grad_norm": 0.7046210765838623, "learning_rate": 1.7326332794830372e-05, "loss": 1.3395, "mean_token_accuracy": 0.7426234304904937, "num_tokens": 29092374.0, "step": 430 }, { "entropy": 0.9365391865372658, "epoch": 0.13330303718851777, "grad_norm": 0.30491572618484497, "learning_rate": 1.7730210016155093e-05, "loss": 1.3322, "mean_token_accuracy": 0.7497914135456085, "num_tokens": 29782677.0, "step": 440 }, { "entropy": 0.9229448556900024, "epoch": 0.136332651670075, "grad_norm": 0.2625262141227722, "learning_rate": 1.8134087237479806e-05, "loss": 1.3253, "mean_token_accuracy": 0.7531854346394539, "num_tokens": 30465662.0, "step": 450 }, { "entropy": 0.9274683475494385, "epoch": 0.1393622661516322, "grad_norm": 0.28569865226745605, "learning_rate": 1.8537964458804523e-05, "loss": 1.3297, "mean_token_accuracy": 0.7466325148940086, "num_tokens": 31136752.0, "step": 460 }, { "entropy": 0.9268327504396439, "epoch": 0.14239188063318942, "grad_norm": 0.2649734616279602, "learning_rate": 1.894184168012924e-05, "loss": 1.326, "mean_token_accuracy": 0.7489441350102425, "num_tokens": 31817903.0, "step": 470 }, { "entropy": 0.9220950871706008, "epoch": 0.14542149511474664, "grad_norm": 0.29128533601760864, "learning_rate": 1.934571890145396e-05, "loss": 1.3292, "mean_token_accuracy": 0.7483928576111794, "num_tokens": 32484542.0, "step": 480 }, { "entropy": 0.918656262755394, "epoch": 0.14845110959630387, "grad_norm": 0.23605835437774658, "learning_rate": 1.9749596122778676e-05, "loss": 1.3282, "mean_token_accuracy": 0.7509550377726555, "num_tokens": 33167670.0, "step": 490 }, { "entropy": 0.9145197689533233, "epoch": 0.1514807240778611, "grad_norm": 0.23487012088298798, "learning_rate": 2.0153473344103393e-05, "loss": 1.3223, "mean_token_accuracy": 0.753958448767662, "num_tokens": 33848771.0, "step": 500 }, { "entropy": 0.9294292941689491, "epoch": 0.15451033855941831, "grad_norm": 0.24518702924251556, "learning_rate": 2.055735056542811e-05, "loss": 1.3245, "mean_token_accuracy": 0.7495481982827187, "num_tokens": 34530740.0, "step": 510 }, { "entropy": 0.9175413846969604, "epoch": 0.15753995304097554, "grad_norm": 0.2422870546579361, "learning_rate": 2.096122778675283e-05, "loss": 1.3182, "mean_token_accuracy": 0.754038056731224, "num_tokens": 35212668.0, "step": 520 }, { "entropy": 0.9198048174381256, "epoch": 0.16056956752253276, "grad_norm": 0.29682424664497375, "learning_rate": 2.1365105008077547e-05, "loss": 1.3243, "mean_token_accuracy": 0.7482954531908035, "num_tokens": 35884591.0, "step": 530 }, { "entropy": 0.9291597813367843, "epoch": 0.16359918200409, "grad_norm": 0.25015708804130554, "learning_rate": 2.1768982229402264e-05, "loss": 1.3351, "mean_token_accuracy": 0.7464247986674308, "num_tokens": 36549977.0, "step": 540 }, { "entropy": 0.9339838802814484, "epoch": 0.1666287964856472, "grad_norm": 0.23152834177017212, "learning_rate": 2.217285945072698e-05, "loss": 1.3308, "mean_token_accuracy": 0.7481212452054024, "num_tokens": 37225987.0, "step": 550 }, { "entropy": 0.9293303743004799, "epoch": 0.1696584109672044, "grad_norm": 0.24376530945301056, "learning_rate": 2.2576736672051697e-05, "loss": 1.329, "mean_token_accuracy": 0.750653937458992, "num_tokens": 37922786.0, "step": 560 }, { "entropy": 0.9323435664176941, "epoch": 0.17268802544876163, "grad_norm": 0.22466818988323212, "learning_rate": 2.2980613893376414e-05, "loss": 1.3318, "mean_token_accuracy": 0.7409193903207779, "num_tokens": 38583375.0, "step": 570 }, { "entropy": 0.9144039720296859, "epoch": 0.17571763993031886, "grad_norm": 0.2732308804988861, "learning_rate": 2.338449111470113e-05, "loss": 1.322, "mean_token_accuracy": 0.7507803827524185, "num_tokens": 39255897.0, "step": 580 }, { "entropy": 0.9145839855074882, "epoch": 0.17874725441187608, "grad_norm": 0.2554788589477539, "learning_rate": 2.3788368336025848e-05, "loss": 1.3256, "mean_token_accuracy": 0.7467795327305794, "num_tokens": 39920649.0, "step": 590 }, { "entropy": 0.9257472231984138, "epoch": 0.1817768688934333, "grad_norm": 0.23443762958049774, "learning_rate": 2.4192245557350568e-05, "loss": 1.3207, "mean_token_accuracy": 0.7508188039064407, "num_tokens": 40613016.0, "step": 600 }, { "entropy": 0.9184200003743171, "epoch": 0.18480648337499053, "grad_norm": 0.22434309124946594, "learning_rate": 2.4596122778675284e-05, "loss": 1.3257, "mean_token_accuracy": 0.7488593772053719, "num_tokens": 41277901.0, "step": 610 }, { "entropy": 0.9115656569600106, "epoch": 0.18783609785654776, "grad_norm": 0.27809539437294006, "learning_rate": 2.5e-05, "loss": 1.326, "mean_token_accuracy": 0.7556997299194336, "num_tokens": 41970791.0, "step": 620 }, { "entropy": 0.9088801115751266, "epoch": 0.19086571233810498, "grad_norm": 0.2008364498615265, "learning_rate": 2.5403877221324718e-05, "loss": 1.3117, "mean_token_accuracy": 0.7530135080218315, "num_tokens": 42665821.0, "step": 630 }, { "entropy": 0.9266581863164902, "epoch": 0.1938953268196622, "grad_norm": 0.22976058721542358, "learning_rate": 2.5807754442649435e-05, "loss": 1.3237, "mean_token_accuracy": 0.7488701492547989, "num_tokens": 43349418.0, "step": 640 }, { "entropy": 0.915707740187645, "epoch": 0.19692494130121943, "grad_norm": 0.22756750881671906, "learning_rate": 2.621163166397415e-05, "loss": 1.3191, "mean_token_accuracy": 0.7506552428007126, "num_tokens": 44030708.0, "step": 650 }, { "entropy": 0.9139649495482445, "epoch": 0.19995455578277666, "grad_norm": 0.2183780074119568, "learning_rate": 2.6615508885298868e-05, "loss": 1.3204, "mean_token_accuracy": 0.7497727707028389, "num_tokens": 44698948.0, "step": 660 }, { "entropy": 0.9156005099415779, "epoch": 0.20298417026433385, "grad_norm": 0.25522419810295105, "learning_rate": 2.701938610662359e-05, "loss": 1.3194, "mean_token_accuracy": 0.749407297372818, "num_tokens": 45375132.0, "step": 670 }, { "entropy": 0.9204802691936493, "epoch": 0.20601378474589108, "grad_norm": 0.2535501718521118, "learning_rate": 2.7423263327948305e-05, "loss": 1.316, "mean_token_accuracy": 0.7550273388624191, "num_tokens": 46068227.0, "step": 680 }, { "entropy": 0.9193328201770783, "epoch": 0.2090433992274483, "grad_norm": 0.2618839144706726, "learning_rate": 2.7827140549273022e-05, "loss": 1.3206, "mean_token_accuracy": 0.7545509234070777, "num_tokens": 46760521.0, "step": 690 }, { "entropy": 0.9152797266840935, "epoch": 0.21207301370900553, "grad_norm": 0.21720457077026367, "learning_rate": 2.823101777059774e-05, "loss": 1.3167, "mean_token_accuracy": 0.7576111331582069, "num_tokens": 47459871.0, "step": 700 }, { "entropy": 0.9146850362420083, "epoch": 0.21510262819056275, "grad_norm": 0.20628060400485992, "learning_rate": 2.8634894991922456e-05, "loss": 1.3168, "mean_token_accuracy": 0.7556830614805221, "num_tokens": 48155369.0, "step": 710 }, { "entropy": 0.9097549751400947, "epoch": 0.21813224267211997, "grad_norm": 0.24059303104877472, "learning_rate": 2.9038772213247172e-05, "loss": 1.316, "mean_token_accuracy": 0.756433616578579, "num_tokens": 48848798.0, "step": 720 }, { "entropy": 0.9219586238265037, "epoch": 0.2211618571536772, "grad_norm": 0.2317909449338913, "learning_rate": 2.944264943457189e-05, "loss": 1.3194, "mean_token_accuracy": 0.7525115668773651, "num_tokens": 49539969.0, "step": 730 }, { "entropy": 0.9171618938446044, "epoch": 0.22419147163523442, "grad_norm": 0.22604261338710785, "learning_rate": 2.9846526655896606e-05, "loss": 1.3294, "mean_token_accuracy": 0.7438126534223557, "num_tokens": 50202710.0, "step": 740 }, { "entropy": 0.9165790915489197, "epoch": 0.22722108611679165, "grad_norm": 0.22659213840961456, "learning_rate": 3.025040387722133e-05, "loss": 1.3258, "mean_token_accuracy": 0.7470232158899307, "num_tokens": 50865258.0, "step": 750 }, { "entropy": 0.9135280176997185, "epoch": 0.23025070059834887, "grad_norm": 0.20198464393615723, "learning_rate": 3.0654281098546046e-05, "loss": 1.3192, "mean_token_accuracy": 0.7495270982384682, "num_tokens": 51532155.0, "step": 760 }, { "entropy": 0.906398692727089, "epoch": 0.23328031507990607, "grad_norm": 0.2125513106584549, "learning_rate": 3.105815831987076e-05, "loss": 1.3137, "mean_token_accuracy": 0.7528043925762177, "num_tokens": 52213486.0, "step": 770 }, { "entropy": 0.924056950211525, "epoch": 0.2363099295614633, "grad_norm": 0.1975914090871811, "learning_rate": 3.146203554119548e-05, "loss": 1.33, "mean_token_accuracy": 0.74516611546278, "num_tokens": 52880609.0, "step": 780 }, { "entropy": 0.9093386635184288, "epoch": 0.23933954404302052, "grad_norm": 0.22010082006454468, "learning_rate": 3.1865912762520196e-05, "loss": 1.3199, "mean_token_accuracy": 0.7503703728318214, "num_tokens": 53553662.0, "step": 790 }, { "entropy": 0.9259981513023376, "epoch": 0.24236915852457774, "grad_norm": 0.22695289552211761, "learning_rate": 3.226978998384491e-05, "loss": 1.3281, "mean_token_accuracy": 0.7463536381721496, "num_tokens": 54218216.0, "step": 800 }, { "entropy": 0.9152119278907775, "epoch": 0.24539877300613497, "grad_norm": 0.18423648178577423, "learning_rate": 3.267366720516963e-05, "loss": 1.3301, "mean_token_accuracy": 0.7469505056738853, "num_tokens": 54877174.0, "step": 810 }, { "entropy": 0.9253642663359642, "epoch": 0.2484283874876922, "grad_norm": 0.20384164154529572, "learning_rate": 3.307754442649435e-05, "loss": 1.3242, "mean_token_accuracy": 0.7439268216490745, "num_tokens": 55544881.0, "step": 820 }, { "entropy": 0.9119655445218087, "epoch": 0.2514580019692494, "grad_norm": 0.18420374393463135, "learning_rate": 3.3481421647819064e-05, "loss": 1.3131, "mean_token_accuracy": 0.7535008266568184, "num_tokens": 56219809.0, "step": 830 }, { "entropy": 0.914957481622696, "epoch": 0.25448761645080664, "grad_norm": 0.19027648866176605, "learning_rate": 3.388529886914378e-05, "loss": 1.3193, "mean_token_accuracy": 0.7487210378050804, "num_tokens": 56900537.0, "step": 840 }, { "entropy": 0.9218035861849785, "epoch": 0.25751723093236384, "grad_norm": 0.16609440743923187, "learning_rate": 3.42891760904685e-05, "loss": 1.324, "mean_token_accuracy": 0.7453356191515923, "num_tokens": 57567272.0, "step": 850 }, { "entropy": 0.9182787030935288, "epoch": 0.2605468454139211, "grad_norm": 0.21082238852977753, "learning_rate": 3.4693053311793214e-05, "loss": 1.3195, "mean_token_accuracy": 0.7457280382514, "num_tokens": 58233643.0, "step": 860 }, { "entropy": 0.9092610657215119, "epoch": 0.2635764598954783, "grad_norm": 0.17639994621276855, "learning_rate": 3.509693053311793e-05, "loss": 1.3172, "mean_token_accuracy": 0.7527112409472465, "num_tokens": 58914246.0, "step": 870 }, { "entropy": 0.9199270486831665, "epoch": 0.26660607437703554, "grad_norm": 0.19245943427085876, "learning_rate": 3.550080775444265e-05, "loss": 1.3328, "mean_token_accuracy": 0.7467374846339225, "num_tokens": 59581373.0, "step": 880 }, { "entropy": 0.9137106329202652, "epoch": 0.26963568885859274, "grad_norm": 0.17330224812030792, "learning_rate": 3.5904684975767364e-05, "loss": 1.3232, "mean_token_accuracy": 0.7464903146028519, "num_tokens": 60243225.0, "step": 890 }, { "entropy": 0.9105766281485558, "epoch": 0.27266530334015, "grad_norm": 0.19774368405342102, "learning_rate": 3.630856219709208e-05, "loss": 1.3217, "mean_token_accuracy": 0.7488515123724937, "num_tokens": 60912466.0, "step": 900 }, { "entropy": 0.9119695752859116, "epoch": 0.2756949178217072, "grad_norm": 0.16941364109516144, "learning_rate": 3.6712439418416804e-05, "loss": 1.3223, "mean_token_accuracy": 0.7479856431484222, "num_tokens": 61564122.0, "step": 910 }, { "entropy": 0.914450266957283, "epoch": 0.2787245323032644, "grad_norm": 0.19449332356452942, "learning_rate": 3.711631663974152e-05, "loss": 1.3228, "mean_token_accuracy": 0.7504349738359452, "num_tokens": 62233788.0, "step": 920 }, { "entropy": 0.9095871090888977, "epoch": 0.28175414678482164, "grad_norm": 0.20824147760868073, "learning_rate": 3.752019386106624e-05, "loss": 1.3107, "mean_token_accuracy": 0.7514926999807358, "num_tokens": 62904530.0, "step": 930 }, { "entropy": 0.9049410030245781, "epoch": 0.28478376126637883, "grad_norm": 0.19165048003196716, "learning_rate": 3.7924071082390955e-05, "loss": 1.3164, "mean_token_accuracy": 0.7510851979255676, "num_tokens": 63568085.0, "step": 940 }, { "entropy": 0.9160818889737129, "epoch": 0.2878133757479361, "grad_norm": 0.1915004700422287, "learning_rate": 3.832794830371567e-05, "loss": 1.318, "mean_token_accuracy": 0.7500975534319878, "num_tokens": 64243907.0, "step": 950 }, { "entropy": 0.9147925272583961, "epoch": 0.2908429902294933, "grad_norm": 0.1861671656370163, "learning_rate": 3.873182552504039e-05, "loss": 1.3158, "mean_token_accuracy": 0.7435453131794929, "num_tokens": 64897465.0, "step": 960 }, { "entropy": 0.9032413572072983, "epoch": 0.29387260471105053, "grad_norm": 0.1919008046388626, "learning_rate": 3.9135702746365105e-05, "loss": 1.318, "mean_token_accuracy": 0.7475991874933243, "num_tokens": 65552452.0, "step": 970 }, { "entropy": 0.9074742197990417, "epoch": 0.29690221919260773, "grad_norm": 0.1770084798336029, "learning_rate": 3.953957996768982e-05, "loss": 1.3074, "mean_token_accuracy": 0.7538325354456902, "num_tokens": 66243290.0, "step": 980 }, { "entropy": 0.9117066383361816, "epoch": 0.299931833674165, "grad_norm": 0.20442838966846466, "learning_rate": 3.9943457189014545e-05, "loss": 1.3158, "mean_token_accuracy": 0.7498932734131813, "num_tokens": 66915792.0, "step": 990 }, { "entropy": 0.9175997883081436, "epoch": 0.3029614481557222, "grad_norm": 0.21906521916389465, "learning_rate": 4.034733441033926e-05, "loss": 1.3292, "mean_token_accuracy": 0.7450326055288314, "num_tokens": 67572749.0, "step": 1000 }, { "entropy": 0.9061859205365181, "epoch": 0.30599106263727943, "grad_norm": 0.18347729742527008, "learning_rate": 4.075121163166398e-05, "loss": 1.3186, "mean_token_accuracy": 0.751769931614399, "num_tokens": 68243021.0, "step": 1010 }, { "entropy": 0.9110336869955062, "epoch": 0.30902067711883663, "grad_norm": 0.1732109934091568, "learning_rate": 4.1155088852988696e-05, "loss": 1.3261, "mean_token_accuracy": 0.7522352248430252, "num_tokens": 68916281.0, "step": 1020 }, { "entropy": 0.9066652849316597, "epoch": 0.3120502916003938, "grad_norm": 0.16653181612491608, "learning_rate": 4.155896607431341e-05, "loss": 1.3163, "mean_token_accuracy": 0.7484516441822052, "num_tokens": 69579268.0, "step": 1030 }, { "entropy": 0.9179822281002998, "epoch": 0.3150799060819511, "grad_norm": 0.16437485814094543, "learning_rate": 4.196284329563813e-05, "loss": 1.3241, "mean_token_accuracy": 0.7510016962885857, "num_tokens": 70264079.0, "step": 1040 }, { "entropy": 0.9145135924220085, "epoch": 0.3181095205635083, "grad_norm": 0.15796828269958496, "learning_rate": 4.2366720516962846e-05, "loss": 1.3149, "mean_token_accuracy": 0.7525526866316795, "num_tokens": 70945929.0, "step": 1050 }, { "entropy": 0.9066309273242951, "epoch": 0.3211391350450655, "grad_norm": 0.15555834770202637, "learning_rate": 4.277059773828756e-05, "loss": 1.3164, "mean_token_accuracy": 0.7513958021998406, "num_tokens": 71617250.0, "step": 1060 }, { "entropy": 0.9203481048345565, "epoch": 0.3241687495266227, "grad_norm": 0.17885833978652954, "learning_rate": 4.317447495961228e-05, "loss": 1.3168, "mean_token_accuracy": 0.7503919199109077, "num_tokens": 72294905.0, "step": 1070 }, { "entropy": 0.908906027674675, "epoch": 0.32719836400818, "grad_norm": 0.15727676451206207, "learning_rate": 4.3578352180936996e-05, "loss": 1.3187, "mean_token_accuracy": 0.7530272096395493, "num_tokens": 72977657.0, "step": 1080 }, { "entropy": 0.9122103303670883, "epoch": 0.3302279784897372, "grad_norm": 0.1925109326839447, "learning_rate": 4.398222940226171e-05, "loss": 1.3205, "mean_token_accuracy": 0.7486942082643508, "num_tokens": 73649741.0, "step": 1090 }, { "entropy": 0.9175547957420349, "epoch": 0.3332575929712944, "grad_norm": 0.1666097491979599, "learning_rate": 4.438610662358643e-05, "loss": 1.3154, "mean_token_accuracy": 0.7506437614560127, "num_tokens": 74327124.0, "step": 1100 }, { "entropy": 0.9178244799375535, "epoch": 0.3362872074528516, "grad_norm": 0.19218212366104126, "learning_rate": 4.4789983844911147e-05, "loss": 1.3266, "mean_token_accuracy": 0.7483278617262841, "num_tokens": 74993713.0, "step": 1110 }, { "entropy": 0.9133123695850373, "epoch": 0.3393168219344088, "grad_norm": 0.21947059035301208, "learning_rate": 4.519386106623586e-05, "loss": 1.315, "mean_token_accuracy": 0.7546236589550972, "num_tokens": 75684557.0, "step": 1120 }, { "entropy": 0.9051602393388748, "epoch": 0.34234643641596607, "grad_norm": 0.17523469030857086, "learning_rate": 4.559773828756058e-05, "loss": 1.3137, "mean_token_accuracy": 0.7507795050740242, "num_tokens": 76352021.0, "step": 1130 }, { "entropy": 0.9085398092865944, "epoch": 0.34537605089752327, "grad_norm": 0.17349685728549957, "learning_rate": 4.60016155088853e-05, "loss": 1.3122, "mean_token_accuracy": 0.7487612262368202, "num_tokens": 77019617.0, "step": 1140 }, { "entropy": 0.9007386222481728, "epoch": 0.3484056653790805, "grad_norm": 0.16674205660820007, "learning_rate": 4.640549273021002e-05, "loss": 1.3166, "mean_token_accuracy": 0.7528904139995575, "num_tokens": 77687724.0, "step": 1150 }, { "entropy": 0.9056428268551826, "epoch": 0.3514352798606377, "grad_norm": 0.17023763060569763, "learning_rate": 4.680936995153474e-05, "loss": 1.3177, "mean_token_accuracy": 0.7535073205828666, "num_tokens": 78366654.0, "step": 1160 }, { "entropy": 0.9059616297483444, "epoch": 0.35446489434219497, "grad_norm": 0.1579790711402893, "learning_rate": 4.7213247172859454e-05, "loss": 1.3092, "mean_token_accuracy": 0.7533615916967392, "num_tokens": 79045395.0, "step": 1170 }, { "entropy": 0.9000560775399208, "epoch": 0.35749450882375217, "grad_norm": 0.17769810557365417, "learning_rate": 4.761712439418417e-05, "loss": 1.3064, "mean_token_accuracy": 0.7528070211410522, "num_tokens": 79724818.0, "step": 1180 }, { "entropy": 0.906897796690464, "epoch": 0.3605241233053094, "grad_norm": 0.1549178808927536, "learning_rate": 4.802100161550889e-05, "loss": 1.3141, "mean_token_accuracy": 0.7501399010419846, "num_tokens": 80392394.0, "step": 1190 }, { "entropy": 0.9162770986557007, "epoch": 0.3635537377868666, "grad_norm": 0.15900464355945587, "learning_rate": 4.8424878836833604e-05, "loss": 1.3179, "mean_token_accuracy": 0.7535986736416816, "num_tokens": 81080041.0, "step": 1200 }, { "entropy": 0.9049024254083633, "epoch": 0.36658335226842387, "grad_norm": 0.1732468456029892, "learning_rate": 4.882875605815832e-05, "loss": 1.3086, "mean_token_accuracy": 0.7500874444842338, "num_tokens": 81745731.0, "step": 1210 }, { "entropy": 0.8985488951206207, "epoch": 0.36961296674998106, "grad_norm": 0.1543564349412918, "learning_rate": 4.923263327948304e-05, "loss": 1.3099, "mean_token_accuracy": 0.7510283410549163, "num_tokens": 82410533.0, "step": 1220 }, { "entropy": 0.9009802848100662, "epoch": 0.37264258123153826, "grad_norm": 0.15668995678424835, "learning_rate": 4.963651050080776e-05, "loss": 1.3104, "mean_token_accuracy": 0.7525711953639984, "num_tokens": 83085040.0, "step": 1230 }, { "entropy": 0.9112498596310615, "epoch": 0.3756721957130955, "grad_norm": 0.19351772964000702, "learning_rate": 5.004038772213248e-05, "loss": 1.3121, "mean_token_accuracy": 0.753445765376091, "num_tokens": 83773911.0, "step": 1240 }, { "entropy": 0.9024754449725151, "epoch": 0.3787018101946527, "grad_norm": 0.15581312775611877, "learning_rate": 5.044426494345719e-05, "loss": 1.3113, "mean_token_accuracy": 0.7532795190811157, "num_tokens": 84454497.0, "step": 1250 }, { "entropy": 0.9097319796681405, "epoch": 0.38173142467620996, "grad_norm": 0.16212372481822968, "learning_rate": 5.084814216478191e-05, "loss": 1.3199, "mean_token_accuracy": 0.7565267696976662, "num_tokens": 85151452.0, "step": 1260 }, { "entropy": 0.9046913087368011, "epoch": 0.38476103915776716, "grad_norm": 0.16039691865444183, "learning_rate": 5.125201938610663e-05, "loss": 1.3075, "mean_token_accuracy": 0.755273811519146, "num_tokens": 85831124.0, "step": 1270 }, { "entropy": 0.9037176728248596, "epoch": 0.3877906536393244, "grad_norm": 0.18313613533973694, "learning_rate": 5.1655896607431345e-05, "loss": 1.3074, "mean_token_accuracy": 0.7516591116786003, "num_tokens": 86503499.0, "step": 1280 }, { "entropy": 0.8991178900003434, "epoch": 0.3908202681208816, "grad_norm": 0.15124350786209106, "learning_rate": 5.205977382875606e-05, "loss": 1.3132, "mean_token_accuracy": 0.7554905250668525, "num_tokens": 87183993.0, "step": 1290 }, { "entropy": 0.9112952843308448, "epoch": 0.39384988260243886, "grad_norm": 0.1569276601076126, "learning_rate": 5.246365105008078e-05, "loss": 1.3055, "mean_token_accuracy": 0.746527048945427, "num_tokens": 87860571.0, "step": 1300 }, { "entropy": 0.9121203318238258, "epoch": 0.39687949708399606, "grad_norm": 0.1368400901556015, "learning_rate": 5.2867528271405496e-05, "loss": 1.3232, "mean_token_accuracy": 0.7461899116635322, "num_tokens": 88517537.0, "step": 1310 }, { "entropy": 0.9051359668374062, "epoch": 0.3999091115655533, "grad_norm": 0.2260213941335678, "learning_rate": 5.3271405492730206e-05, "loss": 1.3183, "mean_token_accuracy": 0.7520864903926849, "num_tokens": 89195331.0, "step": 1320 }, { "entropy": 0.9018226325511932, "epoch": 0.4029387260471105, "grad_norm": 0.16223658621311188, "learning_rate": 5.367528271405493e-05, "loss": 1.3091, "mean_token_accuracy": 0.7538933858275414, "num_tokens": 89881354.0, "step": 1330 }, { "entropy": 0.9095069825649261, "epoch": 0.4059683405286677, "grad_norm": 0.17114722728729248, "learning_rate": 5.407915993537964e-05, "loss": 1.3234, "mean_token_accuracy": 0.7512053146958351, "num_tokens": 90556951.0, "step": 1340 }, { "entropy": 0.9174318969249725, "epoch": 0.40899795501022496, "grad_norm": 0.16955164074897766, "learning_rate": 5.448303715670436e-05, "loss": 1.3245, "mean_token_accuracy": 0.7475357472896575, "num_tokens": 91231305.0, "step": 1350 }, { "entropy": 0.9048666179180145, "epoch": 0.41202756949178215, "grad_norm": 0.13146966695785522, "learning_rate": 5.4886914378029086e-05, "loss": 1.3152, "mean_token_accuracy": 0.7531999439001084, "num_tokens": 91905496.0, "step": 1360 }, { "entropy": 0.8951819375157356, "epoch": 0.4150571839733394, "grad_norm": 0.17518994212150574, "learning_rate": 5.5290791599353796e-05, "loss": 1.3085, "mean_token_accuracy": 0.7539542600512504, "num_tokens": 92573582.0, "step": 1370 }, { "entropy": 0.8991360008716583, "epoch": 0.4180867984548966, "grad_norm": 0.16288205981254578, "learning_rate": 5.569466882067852e-05, "loss": 1.3105, "mean_token_accuracy": 0.749615055322647, "num_tokens": 93238965.0, "step": 1380 }, { "entropy": 0.9040171906352044, "epoch": 0.42111641293645385, "grad_norm": 0.15458066761493683, "learning_rate": 5.609854604200323e-05, "loss": 1.3143, "mean_token_accuracy": 0.74465721398592, "num_tokens": 93881702.0, "step": 1390 }, { "entropy": 0.9005857020616531, "epoch": 0.42414602741801105, "grad_norm": 0.1993013173341751, "learning_rate": 5.650242326332795e-05, "loss": 1.3075, "mean_token_accuracy": 0.7470027223229408, "num_tokens": 94547314.0, "step": 1400 }, { "entropy": 0.9084305763244629, "epoch": 0.4271756418995683, "grad_norm": 0.17728160321712494, "learning_rate": 5.690630048465266e-05, "loss": 1.3134, "mean_token_accuracy": 0.7524486899375915, "num_tokens": 95228068.0, "step": 1410 }, { "entropy": 0.906204080581665, "epoch": 0.4302052563811255, "grad_norm": 0.1374523639678955, "learning_rate": 5.731017770597739e-05, "loss": 1.3099, "mean_token_accuracy": 0.7536841064691544, "num_tokens": 95898188.0, "step": 1420 }, { "entropy": 0.8936747655272483, "epoch": 0.4332348708626827, "grad_norm": 0.22770892083644867, "learning_rate": 5.771405492730211e-05, "loss": 1.302, "mean_token_accuracy": 0.7515007808804512, "num_tokens": 96561074.0, "step": 1430 }, { "entropy": 0.9160967841744423, "epoch": 0.43626448534423995, "grad_norm": 0.1860688477754593, "learning_rate": 5.811793214862682e-05, "loss": 1.3231, "mean_token_accuracy": 0.7463960200548172, "num_tokens": 97228420.0, "step": 1440 }, { "entropy": 0.9105578064918518, "epoch": 0.43929409982579715, "grad_norm": 0.14435435831546783, "learning_rate": 5.8521809369951544e-05, "loss": 1.3169, "mean_token_accuracy": 0.7499482974410057, "num_tokens": 97906309.0, "step": 1450 }, { "entropy": 0.9005340814590455, "epoch": 0.4423237143073544, "grad_norm": 0.14899055659770966, "learning_rate": 5.8925686591276254e-05, "loss": 1.3051, "mean_token_accuracy": 0.7561104699969292, "num_tokens": 98587364.0, "step": 1460 }, { "entropy": 0.8977873668074607, "epoch": 0.4453533287889116, "grad_norm": 0.15377895534038544, "learning_rate": 5.932956381260098e-05, "loss": 1.3097, "mean_token_accuracy": 0.7519298970699311, "num_tokens": 99253227.0, "step": 1470 }, { "entropy": 0.9171545535326004, "epoch": 0.44838294327046885, "grad_norm": 0.25069257616996765, "learning_rate": 5.973344103392569e-05, "loss": 1.3189, "mean_token_accuracy": 0.7501741036772728, "num_tokens": 99936394.0, "step": 1480 }, { "entropy": 0.8939554750919342, "epoch": 0.45141255775202604, "grad_norm": 0.14128153026103973, "learning_rate": 6.013731825525041e-05, "loss": 1.3056, "mean_token_accuracy": 0.7544304430484772, "num_tokens": 100609703.0, "step": 1490 }, { "entropy": 0.8988944992423058, "epoch": 0.4544421722335833, "grad_norm": 0.1412612348794937, "learning_rate": 6.054119547657512e-05, "loss": 1.3037, "mean_token_accuracy": 0.7509754836559296, "num_tokens": 101281879.0, "step": 1500 }, { "entropy": 0.9070074841380119, "epoch": 0.4574717867151405, "grad_norm": 0.16410954296588898, "learning_rate": 6.094507269789984e-05, "loss": 1.3068, "mean_token_accuracy": 0.7516612082719802, "num_tokens": 101960642.0, "step": 1510 }, { "entropy": 0.9051597341895103, "epoch": 0.46050140119669775, "grad_norm": 0.14085623621940613, "learning_rate": 6.134894991922456e-05, "loss": 1.3162, "mean_token_accuracy": 0.753963851928711, "num_tokens": 102642682.0, "step": 1520 }, { "entropy": 0.9033859834074974, "epoch": 0.46353101567825494, "grad_norm": 0.15599344670772552, "learning_rate": 6.175282714054928e-05, "loss": 1.3099, "mean_token_accuracy": 0.7503072485327721, "num_tokens": 103309851.0, "step": 1530 }, { "entropy": 0.8905642539262771, "epoch": 0.46656063015981214, "grad_norm": 0.15819436311721802, "learning_rate": 6.2156704361874e-05, "loss": 1.3062, "mean_token_accuracy": 0.7543276980519295, "num_tokens": 103977447.0, "step": 1540 }, { "entropy": 0.9092970073223114, "epoch": 0.4695902446413694, "grad_norm": 0.14411026239395142, "learning_rate": 6.256058158319871e-05, "loss": 1.3011, "mean_token_accuracy": 0.7539940506219864, "num_tokens": 104673098.0, "step": 1550 }, { "entropy": 0.9055310070514679, "epoch": 0.4726198591229266, "grad_norm": 0.13895118236541748, "learning_rate": 6.296445880452343e-05, "loss": 1.3045, "mean_token_accuracy": 0.7529991418123245, "num_tokens": 105355415.0, "step": 1560 }, { "entropy": 0.9107632115483284, "epoch": 0.47564947360448384, "grad_norm": 0.1816311925649643, "learning_rate": 6.336833602584815e-05, "loss": 1.3155, "mean_token_accuracy": 0.7511478364467621, "num_tokens": 106022321.0, "step": 1570 }, { "entropy": 0.901921109855175, "epoch": 0.47867908808604104, "grad_norm": 0.12530627846717834, "learning_rate": 6.377221324717286e-05, "loss": 1.3099, "mean_token_accuracy": 0.7471093505620956, "num_tokens": 106678615.0, "step": 1580 }, { "entropy": 0.9076029598712921, "epoch": 0.4817087025675983, "grad_norm": 0.13566669821739197, "learning_rate": 6.417609046849758e-05, "loss": 1.3133, "mean_token_accuracy": 0.7520950257778167, "num_tokens": 107359476.0, "step": 1590 }, { "entropy": 0.9045285671949387, "epoch": 0.4847383170491555, "grad_norm": 0.13298770785331726, "learning_rate": 6.45799676898223e-05, "loss": 1.3057, "mean_token_accuracy": 0.7559781327843667, "num_tokens": 108039664.0, "step": 1600 }, { "entropy": 0.9136948719620704, "epoch": 0.48776793153071274, "grad_norm": 0.16770410537719727, "learning_rate": 6.498384491114701e-05, "loss": 1.3199, "mean_token_accuracy": 0.750070121884346, "num_tokens": 108709395.0, "step": 1610 }, { "entropy": 0.9032595708966256, "epoch": 0.49079754601226994, "grad_norm": 0.1383184939622879, "learning_rate": 6.538772213247173e-05, "loss": 1.3076, "mean_token_accuracy": 0.7500165730714798, "num_tokens": 109385993.0, "step": 1620 }, { "entropy": 0.8895248159766197, "epoch": 0.49382716049382713, "grad_norm": 0.14151626825332642, "learning_rate": 6.579159935379645e-05, "loss": 1.2997, "mean_token_accuracy": 0.75679482370615, "num_tokens": 110060837.0, "step": 1630 }, { "entropy": 0.9049818530678749, "epoch": 0.4968567749753844, "grad_norm": 0.13891007006168365, "learning_rate": 6.619547657512116e-05, "loss": 1.3137, "mean_token_accuracy": 0.7519566610455513, "num_tokens": 110739371.0, "step": 1640 }, { "entropy": 0.9043860971927643, "epoch": 0.4998863894569416, "grad_norm": 0.17376911640167236, "learning_rate": 6.659935379644588e-05, "loss": 1.3066, "mean_token_accuracy": 0.754023851454258, "num_tokens": 111429428.0, "step": 1650 }, { "entropy": 0.8931585773825645, "epoch": 0.5029160039384988, "grad_norm": 0.14392395317554474, "learning_rate": 6.70032310177706e-05, "loss": 1.3051, "mean_token_accuracy": 0.7539272055029869, "num_tokens": 112101071.0, "step": 1660 }, { "entropy": 0.8960565209388733, "epoch": 0.505945618420056, "grad_norm": 0.12862016260623932, "learning_rate": 6.740710823909531e-05, "loss": 1.3062, "mean_token_accuracy": 0.752163028717041, "num_tokens": 112776625.0, "step": 1670 }, { "entropy": 0.8961879700422287, "epoch": 0.5089752329016133, "grad_norm": 0.13162676990032196, "learning_rate": 6.781098546042004e-05, "loss": 1.304, "mean_token_accuracy": 0.7529030278325081, "num_tokens": 113445247.0, "step": 1680 }, { "entropy": 0.8980554938316345, "epoch": 0.5120048473831705, "grad_norm": 0.173239603638649, "learning_rate": 6.821486268174475e-05, "loss": 1.3073, "mean_token_accuracy": 0.7578601375222206, "num_tokens": 114139450.0, "step": 1690 }, { "entropy": 0.9074166044592857, "epoch": 0.5150344618647277, "grad_norm": 0.14682774245738983, "learning_rate": 6.861873990306948e-05, "loss": 1.3123, "mean_token_accuracy": 0.7464081138372421, "num_tokens": 114794323.0, "step": 1700 }, { "entropy": 0.910780593752861, "epoch": 0.5180640763462849, "grad_norm": 0.14194752275943756, "learning_rate": 6.902261712439418e-05, "loss": 1.3149, "mean_token_accuracy": 0.7536340922117233, "num_tokens": 115491317.0, "step": 1710 }, { "entropy": 0.9062231466174125, "epoch": 0.5210936908278422, "grad_norm": 0.15659962594509125, "learning_rate": 6.942649434571891e-05, "loss": 1.3211, "mean_token_accuracy": 0.7496401265263557, "num_tokens": 116154826.0, "step": 1720 }, { "entropy": 0.9074418857693672, "epoch": 0.5241233053093994, "grad_norm": 0.14031372964382172, "learning_rate": 6.983037156704361e-05, "loss": 1.3116, "mean_token_accuracy": 0.7487875491380691, "num_tokens": 116817268.0, "step": 1730 }, { "entropy": 0.8963802754878998, "epoch": 0.5271529197909566, "grad_norm": 0.12388686835765839, "learning_rate": 7.023424878836834e-05, "loss": 1.3083, "mean_token_accuracy": 0.7544260993599892, "num_tokens": 117498412.0, "step": 1740 }, { "entropy": 0.9051859691739083, "epoch": 0.5301825342725138, "grad_norm": 0.12408740818500519, "learning_rate": 7.063812600969306e-05, "loss": 1.3167, "mean_token_accuracy": 0.7449529379606247, "num_tokens": 118154932.0, "step": 1750 }, { "entropy": 0.9028876557946205, "epoch": 0.5332121487540711, "grad_norm": 0.12819243967533112, "learning_rate": 7.104200323101778e-05, "loss": 1.3099, "mean_token_accuracy": 0.7520097956061363, "num_tokens": 118825179.0, "step": 1760 }, { "entropy": 0.9011894449591636, "epoch": 0.5362417632356282, "grad_norm": 0.12917159497737885, "learning_rate": 7.14458804523425e-05, "loss": 1.3023, "mean_token_accuracy": 0.750635813176632, "num_tokens": 119503544.0, "step": 1770 }, { "entropy": 0.9058702141046524, "epoch": 0.5392713777171855, "grad_norm": 0.13154388964176178, "learning_rate": 7.184975767366721e-05, "loss": 1.3037, "mean_token_accuracy": 0.7499114170670509, "num_tokens": 120175476.0, "step": 1780 }, { "entropy": 0.8858574673533439, "epoch": 0.5423009921987427, "grad_norm": 0.12256129086017609, "learning_rate": 7.225363489499193e-05, "loss": 1.2895, "mean_token_accuracy": 0.7617484778165817, "num_tokens": 120871896.0, "step": 1790 }, { "entropy": 0.9071286633610726, "epoch": 0.5453306066803, "grad_norm": 0.12982648611068726, "learning_rate": 7.265751211631664e-05, "loss": 1.3137, "mean_token_accuracy": 0.7497906401753426, "num_tokens": 121548848.0, "step": 1800 }, { "entropy": 0.9043298572301864, "epoch": 0.5483602211618571, "grad_norm": 0.1469738930463791, "learning_rate": 7.306138933764136e-05, "loss": 1.3026, "mean_token_accuracy": 0.7541209653019905, "num_tokens": 122244302.0, "step": 1810 }, { "entropy": 0.9004949659109116, "epoch": 0.5513898356434144, "grad_norm": 0.1332157850265503, "learning_rate": 7.346526655896608e-05, "loss": 1.308, "mean_token_accuracy": 0.7553296402096749, "num_tokens": 122930654.0, "step": 1820 }, { "entropy": 0.8950658679008484, "epoch": 0.5544194501249716, "grad_norm": 0.14082078635692596, "learning_rate": 7.38691437802908e-05, "loss": 1.3041, "mean_token_accuracy": 0.751467551290989, "num_tokens": 123603708.0, "step": 1830 }, { "entropy": 0.8976402968168259, "epoch": 0.5574490646065288, "grad_norm": 0.13623163104057312, "learning_rate": 7.427302100161551e-05, "loss": 1.307, "mean_token_accuracy": 0.7543731585144997, "num_tokens": 124285203.0, "step": 1840 }, { "entropy": 0.9020175158977508, "epoch": 0.560478679088086, "grad_norm": 0.13262920081615448, "learning_rate": 7.467689822294023e-05, "loss": 1.3155, "mean_token_accuracy": 0.7512376889586448, "num_tokens": 124955188.0, "step": 1850 }, { "entropy": 0.8993414729833603, "epoch": 0.5635082935696433, "grad_norm": 0.12388888746500015, "learning_rate": 7.508077544426494e-05, "loss": 1.299, "mean_token_accuracy": 0.7550491631031037, "num_tokens": 125638409.0, "step": 1860 }, { "entropy": 0.8926667675375939, "epoch": 0.5665379080512005, "grad_norm": 0.10712610930204391, "learning_rate": 7.548465266558966e-05, "loss": 1.2996, "mean_token_accuracy": 0.7553495973348617, "num_tokens": 126318498.0, "step": 1870 }, { "entropy": 0.9032000929117203, "epoch": 0.5695675225327577, "grad_norm": 0.1535160094499588, "learning_rate": 7.588852988691438e-05, "loss": 1.3046, "mean_token_accuracy": 0.7536028608679771, "num_tokens": 126996280.0, "step": 1880 }, { "entropy": 0.8911816298961639, "epoch": 0.5725971370143149, "grad_norm": 0.13242319226264954, "learning_rate": 7.62924071082391e-05, "loss": 1.3041, "mean_token_accuracy": 0.755043214559555, "num_tokens": 127670506.0, "step": 1890 }, { "entropy": 0.8888354226946831, "epoch": 0.5756267514958722, "grad_norm": 0.12417840957641602, "learning_rate": 7.669628432956381e-05, "loss": 1.2999, "mean_token_accuracy": 0.7529826268553734, "num_tokens": 128339321.0, "step": 1900 }, { "entropy": 0.8958159863948822, "epoch": 0.5786563659774294, "grad_norm": 0.12293798476457596, "learning_rate": 7.710016155088854e-05, "loss": 1.3022, "mean_token_accuracy": 0.7521115452051162, "num_tokens": 129008376.0, "step": 1910 }, { "entropy": 0.8976372435688973, "epoch": 0.5816859804589866, "grad_norm": 0.12593239545822144, "learning_rate": 7.750403877221325e-05, "loss": 1.306, "mean_token_accuracy": 0.7565735295414925, "num_tokens": 129695657.0, "step": 1920 }, { "entropy": 0.8995207667350769, "epoch": 0.5847155949405438, "grad_norm": 0.1241973489522934, "learning_rate": 7.790791599353798e-05, "loss": 1.304, "mean_token_accuracy": 0.7579465642571449, "num_tokens": 130385609.0, "step": 1930 }, { "entropy": 0.8977545350790024, "epoch": 0.5877452094221011, "grad_norm": 0.12729386985301971, "learning_rate": 7.831179321486268e-05, "loss": 1.3057, "mean_token_accuracy": 0.7563092142343522, "num_tokens": 131069212.0, "step": 1940 }, { "entropy": 0.8979323357343674, "epoch": 0.5907748239036582, "grad_norm": 0.11168866604566574, "learning_rate": 7.871567043618741e-05, "loss": 1.2993, "mean_token_accuracy": 0.7524091944098472, "num_tokens": 131746856.0, "step": 1950 }, { "entropy": 0.8882772088050842, "epoch": 0.5938044383852155, "grad_norm": 0.1279069185256958, "learning_rate": 7.911954765751211e-05, "loss": 1.2985, "mean_token_accuracy": 0.7544113993644714, "num_tokens": 132423575.0, "step": 1960 }, { "entropy": 0.8982036307454109, "epoch": 0.5968340528667727, "grad_norm": 0.133626326918602, "learning_rate": 7.952342487883684e-05, "loss": 1.3017, "mean_token_accuracy": 0.7510138586163521, "num_tokens": 133085251.0, "step": 1970 }, { "entropy": 0.9008032843470574, "epoch": 0.59986366734833, "grad_norm": 0.129901722073555, "learning_rate": 7.992730210016155e-05, "loss": 1.3118, "mean_token_accuracy": 0.7510970681905746, "num_tokens": 133756971.0, "step": 1980 }, { "entropy": 0.8944846227765083, "epoch": 0.6028932818298871, "grad_norm": 0.12799471616744995, "learning_rate": 8.033117932148628e-05, "loss": 1.308, "mean_token_accuracy": 0.7527639657258988, "num_tokens": 134432445.0, "step": 1990 }, { "entropy": 0.9071986898779869, "epoch": 0.6059228963114444, "grad_norm": 0.12767554819583893, "learning_rate": 8.0735056542811e-05, "loss": 1.3109, "mean_token_accuracy": 0.7490269899368286, "num_tokens": 135107011.0, "step": 2000 }, { "entropy": 0.9076018154621124, "epoch": 0.6089525107930016, "grad_norm": 0.1324162483215332, "learning_rate": 8.113893376413571e-05, "loss": 1.3129, "mean_token_accuracy": 0.7502601996064187, "num_tokens": 135783615.0, "step": 2010 }, { "entropy": 0.9079612717032433, "epoch": 0.6119821252745589, "grad_norm": 0.13695330917835236, "learning_rate": 8.154281098546043e-05, "loss": 1.3148, "mean_token_accuracy": 0.7501401528716087, "num_tokens": 136461898.0, "step": 2020 }, { "entropy": 0.9103457570075989, "epoch": 0.615011739756116, "grad_norm": 0.14059525728225708, "learning_rate": 8.194668820678514e-05, "loss": 1.316, "mean_token_accuracy": 0.7511693567037583, "num_tokens": 137136732.0, "step": 2030 }, { "entropy": 0.895035968720913, "epoch": 0.6180413542376733, "grad_norm": 0.12641917169094086, "learning_rate": 8.235056542810986e-05, "loss": 1.3015, "mean_token_accuracy": 0.7535883888602257, "num_tokens": 137808825.0, "step": 2040 }, { "entropy": 0.8961443632841111, "epoch": 0.6210709687192305, "grad_norm": 0.14057329297065735, "learning_rate": 8.275444264943458e-05, "loss": 1.3004, "mean_token_accuracy": 0.7532713904976844, "num_tokens": 138491788.0, "step": 2050 }, { "entropy": 0.8971175700426102, "epoch": 0.6241005832007877, "grad_norm": 0.12069716304540634, "learning_rate": 8.31583198707593e-05, "loss": 1.2959, "mean_token_accuracy": 0.759382213652134, "num_tokens": 139184469.0, "step": 2060 }, { "entropy": 0.9055205196142196, "epoch": 0.6271301976823449, "grad_norm": 0.12682268023490906, "learning_rate": 8.356219709208401e-05, "loss": 1.3218, "mean_token_accuracy": 0.7479860588908196, "num_tokens": 139850956.0, "step": 2070 }, { "entropy": 0.9075116515159607, "epoch": 0.6301598121639022, "grad_norm": 0.1375780701637268, "learning_rate": 8.396607431340873e-05, "loss": 1.3146, "mean_token_accuracy": 0.7482119768857955, "num_tokens": 140521012.0, "step": 2080 }, { "entropy": 0.8973190426826477, "epoch": 0.6331894266454594, "grad_norm": 0.11789047718048096, "learning_rate": 8.436995153473344e-05, "loss": 1.2971, "mean_token_accuracy": 0.7563918709754944, "num_tokens": 141204334.0, "step": 2090 }, { "entropy": 0.9005673974752426, "epoch": 0.6362190411270165, "grad_norm": 0.12826324999332428, "learning_rate": 8.477382875605816e-05, "loss": 1.3046, "mean_token_accuracy": 0.7507783487439156, "num_tokens": 141873785.0, "step": 2100 }, { "entropy": 0.903111070394516, "epoch": 0.6392486556085738, "grad_norm": 0.1388614922761917, "learning_rate": 8.517770597738288e-05, "loss": 1.3102, "mean_token_accuracy": 0.7523079171776772, "num_tokens": 142549820.0, "step": 2110 }, { "entropy": 0.9010299742221832, "epoch": 0.642278270090131, "grad_norm": 0.12743902206420898, "learning_rate": 8.55815831987076e-05, "loss": 1.3158, "mean_token_accuracy": 0.7477226942777634, "num_tokens": 143205717.0, "step": 2120 }, { "entropy": 0.8998202800750732, "epoch": 0.6453078845716883, "grad_norm": 0.14212191104888916, "learning_rate": 8.598546042003231e-05, "loss": 1.3056, "mean_token_accuracy": 0.7463966920971871, "num_tokens": 143861299.0, "step": 2130 }, { "entropy": 0.8835368379950523, "epoch": 0.6483374990532454, "grad_norm": 0.1122928187251091, "learning_rate": 8.638933764135703e-05, "loss": 1.2886, "mean_token_accuracy": 0.7610997334122658, "num_tokens": 144557885.0, "step": 2140 }, { "entropy": 0.8946089386940003, "epoch": 0.6513671135348027, "grad_norm": 0.13596343994140625, "learning_rate": 8.679321486268174e-05, "loss": 1.2981, "mean_token_accuracy": 0.7543591871857643, "num_tokens": 145240703.0, "step": 2150 }, { "entropy": 0.9056951612234115, "epoch": 0.65439672801636, "grad_norm": 0.1226610541343689, "learning_rate": 8.719709208400648e-05, "loss": 1.3086, "mean_token_accuracy": 0.7488695859909058, "num_tokens": 145913481.0, "step": 2160 }, { "entropy": 0.906532245874405, "epoch": 0.6574263424979171, "grad_norm": 0.13818389177322388, "learning_rate": 8.760096930533118e-05, "loss": 1.3156, "mean_token_accuracy": 0.7476009607315064, "num_tokens": 146577442.0, "step": 2170 }, { "entropy": 0.8970832392573357, "epoch": 0.6604559569794743, "grad_norm": 0.11767213046550751, "learning_rate": 8.800484652665591e-05, "loss": 1.3108, "mean_token_accuracy": 0.7520548105239868, "num_tokens": 147246470.0, "step": 2180 }, { "entropy": 0.898816491663456, "epoch": 0.6634855714610316, "grad_norm": 0.12864157557487488, "learning_rate": 8.840872374798061e-05, "loss": 1.3081, "mean_token_accuracy": 0.7583793714642525, "num_tokens": 147928598.0, "step": 2190 }, { "entropy": 0.8935007572174072, "epoch": 0.6665151859425889, "grad_norm": 0.12095800042152405, "learning_rate": 8.881260096930534e-05, "loss": 1.2998, "mean_token_accuracy": 0.7564240217208862, "num_tokens": 148610120.0, "step": 2200 }, { "entropy": 0.9021251410245895, "epoch": 0.669544800424146, "grad_norm": 0.1266978681087494, "learning_rate": 8.921647819063005e-05, "loss": 1.311, "mean_token_accuracy": 0.7462113335728645, "num_tokens": 149256959.0, "step": 2210 }, { "entropy": 0.8901038557291031, "epoch": 0.6725744149057032, "grad_norm": 0.12045814841985703, "learning_rate": 8.962035541195478e-05, "loss": 1.2956, "mean_token_accuracy": 0.7490550830960274, "num_tokens": 149916961.0, "step": 2220 }, { "entropy": 0.8851861774921417, "epoch": 0.6756040293872605, "grad_norm": 0.1233411505818367, "learning_rate": 9.002423263327949e-05, "loss": 1.2895, "mean_token_accuracy": 0.757862888276577, "num_tokens": 150597520.0, "step": 2230 }, { "entropy": 0.8945758268237114, "epoch": 0.6786336438688176, "grad_norm": 0.11606448888778687, "learning_rate": 9.042810985460421e-05, "loss": 1.3073, "mean_token_accuracy": 0.751634182035923, "num_tokens": 151268277.0, "step": 2240 }, { "entropy": 0.9055110797286033, "epoch": 0.6816632583503749, "grad_norm": 0.1208593100309372, "learning_rate": 9.083198707592893e-05, "loss": 1.3085, "mean_token_accuracy": 0.7520596653223037, "num_tokens": 151937122.0, "step": 2250 }, { "entropy": 0.8921197906136513, "epoch": 0.6846928728319321, "grad_norm": 0.13769711554050446, "learning_rate": 9.123586429725364e-05, "loss": 1.2993, "mean_token_accuracy": 0.7518444448709488, "num_tokens": 152611330.0, "step": 2260 }, { "entropy": 0.9008217513561249, "epoch": 0.6877224873134894, "grad_norm": 0.13109052181243896, "learning_rate": 9.163974151857836e-05, "loss": 1.3106, "mean_token_accuracy": 0.7524908572435379, "num_tokens": 153288510.0, "step": 2270 }, { "entropy": 0.9004259362816811, "epoch": 0.6907521017950465, "grad_norm": 0.11402872204780579, "learning_rate": 9.204361873990308e-05, "loss": 1.2996, "mean_token_accuracy": 0.7515971258282661, "num_tokens": 153966417.0, "step": 2280 }, { "entropy": 0.8992137014865875, "epoch": 0.6937817162766038, "grad_norm": 0.13402849435806274, "learning_rate": 9.244749596122779e-05, "loss": 1.3007, "mean_token_accuracy": 0.7534859776496887, "num_tokens": 154654259.0, "step": 2290 }, { "entropy": 0.9095842137932777, "epoch": 0.696811330758161, "grad_norm": 0.12530536949634552, "learning_rate": 9.285137318255251e-05, "loss": 1.316, "mean_token_accuracy": 0.7539889082312584, "num_tokens": 155328311.0, "step": 2300 }, { "entropy": 0.8930700272321701, "epoch": 0.6998409452397183, "grad_norm": 0.12575875222682953, "learning_rate": 9.325525040387723e-05, "loss": 1.2997, "mean_token_accuracy": 0.7537848263978958, "num_tokens": 156003757.0, "step": 2310 }, { "entropy": 0.9096755936741829, "epoch": 0.7028705597212754, "grad_norm": 0.12259205430746078, "learning_rate": 9.365912762520194e-05, "loss": 1.3162, "mean_token_accuracy": 0.7465154975652695, "num_tokens": 156672017.0, "step": 2320 }, { "entropy": 0.8987697646021843, "epoch": 0.7059001742028327, "grad_norm": 0.14718621969223022, "learning_rate": 9.406300484652666e-05, "loss": 1.3027, "mean_token_accuracy": 0.757715892791748, "num_tokens": 157369100.0, "step": 2330 }, { "entropy": 0.8899315431714058, "epoch": 0.7089297886843899, "grad_norm": 0.12225979566574097, "learning_rate": 9.446688206785138e-05, "loss": 1.2981, "mean_token_accuracy": 0.7547729939222336, "num_tokens": 158036981.0, "step": 2340 }, { "entropy": 0.8941576257348061, "epoch": 0.7119594031659471, "grad_norm": 0.11984486132860184, "learning_rate": 9.48707592891761e-05, "loss": 1.295, "mean_token_accuracy": 0.7545301422476769, "num_tokens": 158723158.0, "step": 2350 }, { "entropy": 0.8968440085649491, "epoch": 0.7149890176475043, "grad_norm": 0.1378418505191803, "learning_rate": 9.527463651050081e-05, "loss": 1.2987, "mean_token_accuracy": 0.754326069355011, "num_tokens": 159407156.0, "step": 2360 }, { "entropy": 0.9001185387372971, "epoch": 0.7180186321290616, "grad_norm": 0.1220158115029335, "learning_rate": 9.567851373182553e-05, "loss": 1.2997, "mean_token_accuracy": 0.7525874629616738, "num_tokens": 160085714.0, "step": 2370 }, { "entropy": 0.898030799627304, "epoch": 0.7210482466106188, "grad_norm": 0.1130693331360817, "learning_rate": 9.608239095315024e-05, "loss": 1.298, "mean_token_accuracy": 0.7585792228579521, "num_tokens": 160784292.0, "step": 2380 }, { "entropy": 0.8831940025091172, "epoch": 0.724077861092176, "grad_norm": 0.12520240247249603, "learning_rate": 9.648626817447497e-05, "loss": 1.2882, "mean_token_accuracy": 0.7544678583741188, "num_tokens": 161464712.0, "step": 2390 }, { "entropy": 0.8926929473876953, "epoch": 0.7271074755737332, "grad_norm": 0.13716495037078857, "learning_rate": 9.689014539579968e-05, "loss": 1.2953, "mean_token_accuracy": 0.7539828538894653, "num_tokens": 162152296.0, "step": 2400 }, { "entropy": 0.8940726727247238, "epoch": 0.7301370900552905, "grad_norm": 0.11432312428951263, "learning_rate": 9.729402261712441e-05, "loss": 1.2963, "mean_token_accuracy": 0.7570353090763092, "num_tokens": 162838234.0, "step": 2410 }, { "entropy": 0.8960199370980263, "epoch": 0.7331667045368477, "grad_norm": 0.14145903289318085, "learning_rate": 9.769789983844911e-05, "loss": 1.3043, "mean_token_accuracy": 0.7553196415305138, "num_tokens": 163511723.0, "step": 2420 }, { "entropy": 0.890855559706688, "epoch": 0.7361963190184049, "grad_norm": 0.10976195335388184, "learning_rate": 9.810177705977384e-05, "loss": 1.305, "mean_token_accuracy": 0.7565831497311593, "num_tokens": 164191370.0, "step": 2430 }, { "entropy": 0.9013715043663979, "epoch": 0.7392259334999621, "grad_norm": 0.1118873581290245, "learning_rate": 9.850565428109854e-05, "loss": 1.304, "mean_token_accuracy": 0.7534702464938163, "num_tokens": 164872192.0, "step": 2440 }, { "entropy": 0.896777106821537, "epoch": 0.7422555479815194, "grad_norm": 0.1089012399315834, "learning_rate": 9.890953150242327e-05, "loss": 1.3014, "mean_token_accuracy": 0.7518857836723327, "num_tokens": 165552101.0, "step": 2450 }, { "entropy": 0.900914216041565, "epoch": 0.7452851624630765, "grad_norm": 0.11077170819044113, "learning_rate": 9.931340872374798e-05, "loss": 1.3123, "mean_token_accuracy": 0.7488637402653694, "num_tokens": 166209905.0, "step": 2460 }, { "entropy": 0.8996037840843201, "epoch": 0.7483147769446338, "grad_norm": 0.1135358139872551, "learning_rate": 9.971728594507271e-05, "loss": 1.3063, "mean_token_accuracy": 0.7565658450126648, "num_tokens": 166896674.0, "step": 2470 }, { "entropy": 0.8926998823881149, "epoch": 0.751344391426191, "grad_norm": 0.13807424902915955, "learning_rate": 9.99999989963874e-05, "loss": 1.2951, "mean_token_accuracy": 0.7562185510993004, "num_tokens": 167580000.0, "step": 2480 }, { "entropy": 0.895380625128746, "epoch": 0.7543740059077483, "grad_norm": 0.10803461819887161, "learning_rate": 9.999998115438692e-05, "loss": 1.3077, "mean_token_accuracy": 0.7538283810019493, "num_tokens": 168253653.0, "step": 2490 }, { "entropy": 0.8799598172307015, "epoch": 0.7574036203893054, "grad_norm": 0.11616635322570801, "learning_rate": 9.999994100989359e-05, "loss": 1.2896, "mean_token_accuracy": 0.756775687634945, "num_tokens": 168934649.0, "step": 2500 }, { "entropy": 0.8974808976054192, "epoch": 0.7604332348708627, "grad_norm": 0.10577427595853806, "learning_rate": 9.99998785629253e-05, "loss": 1.302, "mean_token_accuracy": 0.7530675888061523, "num_tokens": 169623256.0, "step": 2510 }, { "entropy": 0.8934375584125519, "epoch": 0.7634628493524199, "grad_norm": 0.11860733479261398, "learning_rate": 9.999979381350992e-05, "loss": 1.2971, "mean_token_accuracy": 0.7546120777726173, "num_tokens": 170305389.0, "step": 2520 }, { "entropy": 0.8967828616499901, "epoch": 0.7664924638339772, "grad_norm": 0.10974089056253433, "learning_rate": 9.999968676168526e-05, "loss": 1.308, "mean_token_accuracy": 0.7479175120592118, "num_tokens": 170971417.0, "step": 2530 }, { "entropy": 0.9001986250281334, "epoch": 0.7695220783155343, "grad_norm": 0.13815833628177643, "learning_rate": 9.999955740749904e-05, "loss": 1.3073, "mean_token_accuracy": 0.7483186662197113, "num_tokens": 171641890.0, "step": 2540 }, { "entropy": 0.8926223456859589, "epoch": 0.7725516927970916, "grad_norm": 0.128375843167305, "learning_rate": 9.999940575100901e-05, "loss": 1.2979, "mean_token_accuracy": 0.7589505150914192, "num_tokens": 172338796.0, "step": 2550 }, { "entropy": 0.8888041824102402, "epoch": 0.7755813072786488, "grad_norm": 0.12634579837322235, "learning_rate": 9.999923179228277e-05, "loss": 1.3083, "mean_token_accuracy": 0.7520693674683571, "num_tokens": 173007762.0, "step": 2560 }, { "entropy": 0.8981804877519608, "epoch": 0.778610921760206, "grad_norm": 0.11115466803312302, "learning_rate": 9.999903553139792e-05, "loss": 1.3026, "mean_token_accuracy": 0.7505076140165329, "num_tokens": 173673680.0, "step": 2570 }, { "entropy": 0.9000903144478798, "epoch": 0.7816405362417632, "grad_norm": 0.11153290420770645, "learning_rate": 9.999881696844202e-05, "loss": 1.3021, "mean_token_accuracy": 0.752273240685463, "num_tokens": 174344011.0, "step": 2580 }, { "entropy": 0.8882798403501511, "epoch": 0.7846701507233205, "grad_norm": 0.1012105643749237, "learning_rate": 9.999857610351258e-05, "loss": 1.2892, "mean_token_accuracy": 0.754874874651432, "num_tokens": 175027770.0, "step": 2590 }, { "entropy": 0.8896907702088356, "epoch": 0.7876997652048777, "grad_norm": 0.12526828050613403, "learning_rate": 9.999831293671697e-05, "loss": 1.2868, "mean_token_accuracy": 0.7556891471147538, "num_tokens": 175708636.0, "step": 2600 }, { "entropy": 0.8958791553974151, "epoch": 0.7907293796864349, "grad_norm": 0.1167873814702034, "learning_rate": 9.999802746817263e-05, "loss": 1.2985, "mean_token_accuracy": 0.7518065556883812, "num_tokens": 176389737.0, "step": 2610 }, { "entropy": 0.8975002259016037, "epoch": 0.7937589941679921, "grad_norm": 0.12051751464605331, "learning_rate": 9.99977196980069e-05, "loss": 1.2944, "mean_token_accuracy": 0.7527152106165886, "num_tokens": 177074799.0, "step": 2620 }, { "entropy": 0.892160353064537, "epoch": 0.7967886086495494, "grad_norm": 0.12530362606048584, "learning_rate": 9.999738962635703e-05, "loss": 1.2975, "mean_token_accuracy": 0.7520674377679825, "num_tokens": 177748277.0, "step": 2630 }, { "entropy": 0.8813617646694183, "epoch": 0.7998182231311066, "grad_norm": 0.1015031561255455, "learning_rate": 9.999703725337024e-05, "loss": 1.2999, "mean_token_accuracy": 0.7577098682522774, "num_tokens": 178415853.0, "step": 2640 }, { "entropy": 0.8927841350436211, "epoch": 0.8028478376126638, "grad_norm": 0.12152383476495743, "learning_rate": 9.999666257920375e-05, "loss": 1.2957, "mean_token_accuracy": 0.757025220990181, "num_tokens": 179100762.0, "step": 2650 }, { "entropy": 0.8956415086984635, "epoch": 0.805877452094221, "grad_norm": 0.11789049953222275, "learning_rate": 9.999626560402464e-05, "loss": 1.2948, "mean_token_accuracy": 0.7551279291510582, "num_tokens": 179787685.0, "step": 2660 }, { "entropy": 0.8890686973929405, "epoch": 0.8089070665757783, "grad_norm": 0.10717087239027023, "learning_rate": 9.999584632801001e-05, "loss": 1.2958, "mean_token_accuracy": 0.7556095629930496, "num_tokens": 180464759.0, "step": 2670 }, { "entropy": 0.8760408699512482, "epoch": 0.8119366810573354, "grad_norm": 0.12731553614139557, "learning_rate": 9.999540475134687e-05, "loss": 1.2881, "mean_token_accuracy": 0.7579726591706276, "num_tokens": 181146753.0, "step": 2680 }, { "entropy": 0.8964680597186089, "epoch": 0.8149662955388927, "grad_norm": 0.11903461068868637, "learning_rate": 9.999494087423218e-05, "loss": 1.2968, "mean_token_accuracy": 0.7534425109624863, "num_tokens": 181818721.0, "step": 2690 }, { "entropy": 0.899785116314888, "epoch": 0.8179959100204499, "grad_norm": 0.0975867435336113, "learning_rate": 9.999445469687285e-05, "loss": 1.3025, "mean_token_accuracy": 0.7537168964743615, "num_tokens": 182501311.0, "step": 2700 }, { "entropy": 0.8866352945566177, "epoch": 0.8210255245020072, "grad_norm": 0.1379888355731964, "learning_rate": 9.999394621948574e-05, "loss": 1.3041, "mean_token_accuracy": 0.7531851753592491, "num_tokens": 183176822.0, "step": 2710 }, { "entropy": 0.8942874193191528, "epoch": 0.8240551389835643, "grad_norm": 0.1034228503704071, "learning_rate": 9.999341544229767e-05, "loss": 1.2937, "mean_token_accuracy": 0.7550044193863868, "num_tokens": 183859190.0, "step": 2720 }, { "entropy": 0.8920839637517929, "epoch": 0.8270847534651216, "grad_norm": 0.10949212312698364, "learning_rate": 9.999286236554537e-05, "loss": 1.298, "mean_token_accuracy": 0.7496090158820152, "num_tokens": 184526568.0, "step": 2730 }, { "entropy": 0.8993422657251358, "epoch": 0.8301143679466788, "grad_norm": 0.11065369844436646, "learning_rate": 9.999228698947557e-05, "loss": 1.2983, "mean_token_accuracy": 0.749603633582592, "num_tokens": 185197242.0, "step": 2740 }, { "entropy": 0.8873072430491448, "epoch": 0.833143982428236, "grad_norm": 0.11458907276391983, "learning_rate": 9.99916893143449e-05, "loss": 1.3013, "mean_token_accuracy": 0.7548268765211106, "num_tokens": 185872647.0, "step": 2750 }, { "entropy": 0.9006134569644928, "epoch": 0.8361735969097932, "grad_norm": 0.12513191998004913, "learning_rate": 9.999106934041995e-05, "loss": 1.3071, "mean_token_accuracy": 0.7464882954955101, "num_tokens": 186533201.0, "step": 2760 }, { "entropy": 0.8941516593098641, "epoch": 0.8392032113913505, "grad_norm": 0.12465955317020416, "learning_rate": 9.999042706797725e-05, "loss": 1.3016, "mean_token_accuracy": 0.7457407161593437, "num_tokens": 187187516.0, "step": 2770 }, { "entropy": 0.8858958646655083, "epoch": 0.8422328258729077, "grad_norm": 0.13242392241954803, "learning_rate": 9.998976249730331e-05, "loss": 1.2983, "mean_token_accuracy": 0.7558173567056656, "num_tokens": 187861549.0, "step": 2780 }, { "entropy": 0.8848733186721802, "epoch": 0.8452624403544649, "grad_norm": 0.11963985860347748, "learning_rate": 9.998907562869456e-05, "loss": 1.2959, "mean_token_accuracy": 0.7539189040660859, "num_tokens": 188526055.0, "step": 2790 }, { "entropy": 0.8775170937180519, "epoch": 0.8482920548360221, "grad_norm": 0.11567117273807526, "learning_rate": 9.998836646245735e-05, "loss": 1.2749, "mean_token_accuracy": 0.7605938255786896, "num_tokens": 189222345.0, "step": 2800 }, { "entropy": 0.8879165351390839, "epoch": 0.8513216693175794, "grad_norm": 0.11390013247728348, "learning_rate": 9.998763499890805e-05, "loss": 1.293, "mean_token_accuracy": 0.7548198118805886, "num_tokens": 189896560.0, "step": 2810 }, { "entropy": 0.8948941051959991, "epoch": 0.8543512837991366, "grad_norm": 0.11404690891504288, "learning_rate": 9.998688123837287e-05, "loss": 1.303, "mean_token_accuracy": 0.7508691176772118, "num_tokens": 190566008.0, "step": 2820 }, { "entropy": 0.8890450417995452, "epoch": 0.8573808982806937, "grad_norm": 0.09506414085626602, "learning_rate": 9.998610518118806e-05, "loss": 1.3019, "mean_token_accuracy": 0.753497801721096, "num_tokens": 191232821.0, "step": 2830 }, { "entropy": 0.8918934434652328, "epoch": 0.860410512762251, "grad_norm": 0.10562010109424591, "learning_rate": 9.998530682769978e-05, "loss": 1.2944, "mean_token_accuracy": 0.7505758956074715, "num_tokens": 191906407.0, "step": 2840 }, { "entropy": 0.8885342419147492, "epoch": 0.8634401272438083, "grad_norm": 0.11452876776456833, "learning_rate": 9.998448617826413e-05, "loss": 1.2858, "mean_token_accuracy": 0.7563617274165153, "num_tokens": 192593029.0, "step": 2850 }, { "entropy": 0.8962948873639107, "epoch": 0.8664697417253654, "grad_norm": 0.10869462043046951, "learning_rate": 9.998364323324716e-05, "loss": 1.2989, "mean_token_accuracy": 0.7537289977073669, "num_tokens": 193271425.0, "step": 2860 }, { "entropy": 0.89390327334404, "epoch": 0.8694993562069226, "grad_norm": 0.12010079622268677, "learning_rate": 9.99827779930249e-05, "loss": 1.3072, "mean_token_accuracy": 0.7536683186888695, "num_tokens": 193948533.0, "step": 2870 }, { "entropy": 0.9021015837788582, "epoch": 0.8725289706884799, "grad_norm": 0.103371761739254, "learning_rate": 9.998189045798322e-05, "loss": 1.2982, "mean_token_accuracy": 0.7517461240291595, "num_tokens": 194625948.0, "step": 2880 }, { "entropy": 0.8892711877822876, "epoch": 0.8755585851700372, "grad_norm": 0.09570302069187164, "learning_rate": 9.998098062851805e-05, "loss": 1.3046, "mean_token_accuracy": 0.7510400608181953, "num_tokens": 195288381.0, "step": 2890 }, { "entropy": 0.8950867876410484, "epoch": 0.8785881996515943, "grad_norm": 0.11434589326381683, "learning_rate": 9.998004850503522e-05, "loss": 1.2964, "mean_token_accuracy": 0.7557195812463761, "num_tokens": 195976557.0, "step": 2900 }, { "entropy": 0.8812711849808693, "epoch": 0.8816178141331515, "grad_norm": 0.1001354306936264, "learning_rate": 9.99790940879505e-05, "loss": 1.2874, "mean_token_accuracy": 0.7547182634472847, "num_tokens": 196643437.0, "step": 2910 }, { "entropy": 0.8998318642377854, "epoch": 0.8846474286147088, "grad_norm": 0.10829000174999237, "learning_rate": 9.99781173776896e-05, "loss": 1.308, "mean_token_accuracy": 0.7503742709755897, "num_tokens": 197309401.0, "step": 2920 }, { "entropy": 0.888218542933464, "epoch": 0.887677043096266, "grad_norm": 0.1064077690243721, "learning_rate": 9.997711837468818e-05, "loss": 1.2828, "mean_token_accuracy": 0.7523164615035057, "num_tokens": 197975801.0, "step": 2930 }, { "entropy": 0.9008451148867607, "epoch": 0.8907066575778232, "grad_norm": 0.11014664173126221, "learning_rate": 9.997609707939187e-05, "loss": 1.2933, "mean_token_accuracy": 0.7497159570455552, "num_tokens": 198644684.0, "step": 2940 }, { "entropy": 0.8830799236893654, "epoch": 0.8937362720593804, "grad_norm": 0.12068645656108856, "learning_rate": 9.997505349225617e-05, "loss": 1.2909, "mean_token_accuracy": 0.7548075884580612, "num_tokens": 199317340.0, "step": 2950 }, { "entropy": 0.8823300585150718, "epoch": 0.8967658865409377, "grad_norm": 0.10538719594478607, "learning_rate": 9.997398761374663e-05, "loss": 1.2899, "mean_token_accuracy": 0.7563262611627579, "num_tokens": 199999874.0, "step": 2960 }, { "entropy": 0.8856275871396064, "epoch": 0.8997955010224948, "grad_norm": 0.1214800700545311, "learning_rate": 9.997289944433864e-05, "loss": 1.2885, "mean_token_accuracy": 0.7522132501006127, "num_tokens": 200669314.0, "step": 2970 }, { "entropy": 0.8952860370278358, "epoch": 0.9028251155040521, "grad_norm": 0.1363692432641983, "learning_rate": 9.997178898451761e-05, "loss": 1.3036, "mean_token_accuracy": 0.7530059188604354, "num_tokens": 201345860.0, "step": 2980 }, { "entropy": 0.8916691571474076, "epoch": 0.9058547299856093, "grad_norm": 0.11812197417020798, "learning_rate": 9.997065623477883e-05, "loss": 1.3088, "mean_token_accuracy": 0.7496755793690681, "num_tokens": 202008085.0, "step": 2990 }, { "entropy": 0.8959484532475471, "epoch": 0.9088843444671666, "grad_norm": 0.10616706311702728, "learning_rate": 9.996950119562758e-05, "loss": 1.301, "mean_token_accuracy": 0.7539889186620712, "num_tokens": 202686534.0, "step": 3000 }, { "entropy": 0.8973194777965545, "epoch": 0.9119139589487237, "grad_norm": 0.11548098176717758, "learning_rate": 9.996832386757908e-05, "loss": 1.2957, "mean_token_accuracy": 0.7508701875805854, "num_tokens": 203366790.0, "step": 3010 }, { "entropy": 0.8855000004172325, "epoch": 0.914943573430281, "grad_norm": 0.11258887499570847, "learning_rate": 9.996712425115845e-05, "loss": 1.2915, "mean_token_accuracy": 0.753988440334797, "num_tokens": 204037411.0, "step": 3020 }, { "entropy": 0.8910848885774613, "epoch": 0.9179731879118382, "grad_norm": 0.10067538172006607, "learning_rate": 9.99659023469008e-05, "loss": 1.2913, "mean_token_accuracy": 0.7553201824426651, "num_tokens": 204727212.0, "step": 3030 }, { "entropy": 0.8893559783697128, "epoch": 0.9210028023933955, "grad_norm": 0.1173923909664154, "learning_rate": 9.996465815535114e-05, "loss": 1.2951, "mean_token_accuracy": 0.7548139676451683, "num_tokens": 205410370.0, "step": 3040 }, { "entropy": 0.890807381272316, "epoch": 0.9240324168749526, "grad_norm": 0.10941410809755325, "learning_rate": 9.996339167706446e-05, "loss": 1.3003, "mean_token_accuracy": 0.7539852261543274, "num_tokens": 206082283.0, "step": 3050 }, { "entropy": 0.8826736882328987, "epoch": 0.9270620313565099, "grad_norm": 0.10980314761400223, "learning_rate": 9.996210291260565e-05, "loss": 1.2864, "mean_token_accuracy": 0.753743426501751, "num_tokens": 206761589.0, "step": 3060 }, { "entropy": 0.8879106298089028, "epoch": 0.9300916458380671, "grad_norm": 0.11397937685251236, "learning_rate": 9.996079186254958e-05, "loss": 1.3003, "mean_token_accuracy": 0.754043634235859, "num_tokens": 207439302.0, "step": 3070 }, { "entropy": 0.8939175412058831, "epoch": 0.9331212603196243, "grad_norm": 0.09899991005659103, "learning_rate": 9.995945852748106e-05, "loss": 1.3063, "mean_token_accuracy": 0.7534767419099808, "num_tokens": 208114788.0, "step": 3080 }, { "entropy": 0.8829623639583588, "epoch": 0.9361508748011815, "grad_norm": 0.11054675281047821, "learning_rate": 9.99581029079948e-05, "loss": 1.2891, "mean_token_accuracy": 0.7545877754688263, "num_tokens": 208789598.0, "step": 3090 }, { "entropy": 0.8849677100777626, "epoch": 0.9391804892827388, "grad_norm": 0.09774205088615417, "learning_rate": 9.995672500469548e-05, "loss": 1.2924, "mean_token_accuracy": 0.7535423532128334, "num_tokens": 209459851.0, "step": 3100 }, { "entropy": 0.8895751640200615, "epoch": 0.942210103764296, "grad_norm": 0.12433359771966934, "learning_rate": 9.995532481819772e-05, "loss": 1.2913, "mean_token_accuracy": 0.7532431796193123, "num_tokens": 210127795.0, "step": 3110 }, { "entropy": 0.881451603770256, "epoch": 0.9452397182458532, "grad_norm": 0.10106968879699707, "learning_rate": 9.995390234912605e-05, "loss": 1.2983, "mean_token_accuracy": 0.7505646288394928, "num_tokens": 210784502.0, "step": 3120 }, { "entropy": 0.8905109122395516, "epoch": 0.9482693327274104, "grad_norm": 0.10910733789205551, "learning_rate": 9.995245759811501e-05, "loss": 1.2959, "mean_token_accuracy": 0.7532315716147423, "num_tokens": 211464326.0, "step": 3130 }, { "entropy": 0.8880620300769806, "epoch": 0.9512989472089677, "grad_norm": 0.11476880311965942, "learning_rate": 9.995099056580896e-05, "loss": 1.2973, "mean_token_accuracy": 0.755242583155632, "num_tokens": 212137201.0, "step": 3140 }, { "entropy": 0.8845109969377518, "epoch": 0.9543285616905249, "grad_norm": 0.11044140160083771, "learning_rate": 9.994950125286234e-05, "loss": 1.2874, "mean_token_accuracy": 0.751310071349144, "num_tokens": 212796724.0, "step": 3150 }, { "entropy": 0.890080326795578, "epoch": 0.9573581761720821, "grad_norm": 0.1296996921300888, "learning_rate": 9.994798965993943e-05, "loss": 1.2968, "mean_token_accuracy": 0.7520108208060264, "num_tokens": 213457695.0, "step": 3160 }, { "entropy": 0.8923839345574379, "epoch": 0.9603877906536393, "grad_norm": 0.10951551795005798, "learning_rate": 9.994645578771446e-05, "loss": 1.2965, "mean_token_accuracy": 0.7527667090296746, "num_tokens": 214126561.0, "step": 3170 }, { "entropy": 0.8904188767075538, "epoch": 0.9634174051351966, "grad_norm": 0.10850483179092407, "learning_rate": 9.994489963687163e-05, "loss": 1.2898, "mean_token_accuracy": 0.7545693084597588, "num_tokens": 214813344.0, "step": 3180 }, { "entropy": 0.8910052701830864, "epoch": 0.9664470196167537, "grad_norm": 0.11159470677375793, "learning_rate": 9.994332120810508e-05, "loss": 1.2982, "mean_token_accuracy": 0.7537946432828904, "num_tokens": 215498268.0, "step": 3190 }, { "entropy": 0.896054194867611, "epoch": 0.969476634098311, "grad_norm": 0.11511316150426865, "learning_rate": 9.994172050211883e-05, "loss": 1.3002, "mean_token_accuracy": 0.7577290192246438, "num_tokens": 216189675.0, "step": 3200 }, { "entropy": 0.8780703529715538, "epoch": 0.9725062485798682, "grad_norm": 0.10506260395050049, "learning_rate": 9.99400975196269e-05, "loss": 1.2887, "mean_token_accuracy": 0.7529979765415191, "num_tokens": 216852550.0, "step": 3210 }, { "entropy": 0.8913513898849488, "epoch": 0.9755358630614255, "grad_norm": 0.10629456490278244, "learning_rate": 9.993845226135322e-05, "loss": 1.2926, "mean_token_accuracy": 0.7555703178048134, "num_tokens": 217539292.0, "step": 3220 }, { "entropy": 0.8858451247215271, "epoch": 0.9785654775429826, "grad_norm": 0.12247616052627563, "learning_rate": 9.993678472803165e-05, "loss": 1.2882, "mean_token_accuracy": 0.7532886505126953, "num_tokens": 218206220.0, "step": 3230 }, { "entropy": 0.894928352534771, "epoch": 0.9815950920245399, "grad_norm": 0.10783626139163971, "learning_rate": 9.993509492040598e-05, "loss": 1.2993, "mean_token_accuracy": 0.7458206444978714, "num_tokens": 218859599.0, "step": 3240 }, { "entropy": 0.8780856564640999, "epoch": 0.9846247065060971, "grad_norm": 0.11459079384803772, "learning_rate": 9.993338283922998e-05, "loss": 1.2793, "mean_token_accuracy": 0.7525880768895149, "num_tokens": 219529665.0, "step": 3250 }, { "entropy": 0.8861164033412934, "epoch": 0.9876543209876543, "grad_norm": 0.11810479313135147, "learning_rate": 9.99316484852673e-05, "loss": 1.2921, "mean_token_accuracy": 0.7580062180757523, "num_tokens": 220212376.0, "step": 3260 }, { "entropy": 0.8857352554798126, "epoch": 0.9906839354692115, "grad_norm": 0.12247594445943832, "learning_rate": 9.992989185929156e-05, "loss": 1.2957, "mean_token_accuracy": 0.7553387239575386, "num_tokens": 220892023.0, "step": 3270 }, { "entropy": 0.8915065169334412, "epoch": 0.9937135499507688, "grad_norm": 0.10942309349775314, "learning_rate": 9.992811296208631e-05, "loss": 1.2972, "mean_token_accuracy": 0.7555397480726243, "num_tokens": 221575451.0, "step": 3280 }, { "entropy": 0.8895631492137909, "epoch": 0.996743164432326, "grad_norm": 0.10511741042137146, "learning_rate": 9.9926311794445e-05, "loss": 1.3013, "mean_token_accuracy": 0.7511893466114998, "num_tokens": 222243998.0, "step": 3290 }, { "entropy": 0.8838838443160058, "epoch": 0.9997727789138832, "grad_norm": 0.11750248819589615, "learning_rate": 9.992448835717108e-05, "loss": 1.2903, "mean_token_accuracy": 0.7565528631210328, "num_tokens": 222926827.0, "step": 3300 }, { "entropy": 0.8911170256443512, "epoch": 1.0027266530334016, "grad_norm": 0.10560097545385361, "learning_rate": 9.992264265107784e-05, "loss": 1.3018, "mean_token_accuracy": 0.7522377738585839, "num_tokens": 223577278.0, "step": 3310 }, { "entropy": 0.8920167312026024, "epoch": 1.0057562675149587, "grad_norm": 0.1127898320555687, "learning_rate": 9.992077467698862e-05, "loss": 1.2945, "mean_token_accuracy": 0.7525437578558922, "num_tokens": 224251920.0, "step": 3320 }, { "entropy": 0.8868375703692436, "epoch": 1.0087858819965159, "grad_norm": 0.10572829842567444, "learning_rate": 9.991888443573658e-05, "loss": 1.2903, "mean_token_accuracy": 0.7567735984921455, "num_tokens": 224929501.0, "step": 3330 }, { "entropy": 0.8888847678899765, "epoch": 1.0118154964780732, "grad_norm": 0.11192059516906738, "learning_rate": 9.991697192816489e-05, "loss": 1.2928, "mean_token_accuracy": 0.7525388732552528, "num_tokens": 225603557.0, "step": 3340 }, { "entropy": 0.8859956607222557, "epoch": 1.0148451109596304, "grad_norm": 0.1040537878870964, "learning_rate": 9.99150371551266e-05, "loss": 1.2912, "mean_token_accuracy": 0.7513513550162315, "num_tokens": 226274502.0, "step": 3350 }, { "entropy": 0.8881052494049072, "epoch": 1.0178747254411875, "grad_norm": 0.10165461897850037, "learning_rate": 9.991308011748475e-05, "loss": 1.2961, "mean_token_accuracy": 0.7474104389548302, "num_tokens": 226935109.0, "step": 3360 }, { "entropy": 0.871713387966156, "epoch": 1.0209043399227449, "grad_norm": 0.11055523157119751, "learning_rate": 9.991110081611225e-05, "loss": 1.2756, "mean_token_accuracy": 0.7542689144611359, "num_tokens": 227608243.0, "step": 3370 }, { "entropy": 0.8877625584602356, "epoch": 1.023933954404302, "grad_norm": 0.10797322541475296, "learning_rate": 9.990909925189196e-05, "loss": 1.2931, "mean_token_accuracy": 0.7551879778504371, "num_tokens": 228289728.0, "step": 3380 }, { "entropy": 0.8882021844387055, "epoch": 1.0269635688858594, "grad_norm": 0.11925064772367477, "learning_rate": 9.990707542571672e-05, "loss": 1.2914, "mean_token_accuracy": 0.7551611572504043, "num_tokens": 228977061.0, "step": 3390 }, { "entropy": 0.8877963319420814, "epoch": 1.0299931833674165, "grad_norm": 0.0958029180765152, "learning_rate": 9.990502933848923e-05, "loss": 1.2821, "mean_token_accuracy": 0.7543014630675315, "num_tokens": 229661460.0, "step": 3400 }, { "entropy": 0.8862324357032776, "epoch": 1.0330227978489737, "grad_norm": 0.10870645195245743, "learning_rate": 9.990296099112213e-05, "loss": 1.2945, "mean_token_accuracy": 0.7560849383473396, "num_tokens": 230351187.0, "step": 3410 }, { "entropy": 0.8828345820307731, "epoch": 1.036052412330531, "grad_norm": 0.10174642503261566, "learning_rate": 9.990087038453802e-05, "loss": 1.2788, "mean_token_accuracy": 0.7653154522180557, "num_tokens": 231066749.0, "step": 3420 }, { "entropy": 0.8851967990398407, "epoch": 1.0390820268120882, "grad_norm": 0.11057489365339279, "learning_rate": 9.989875751966944e-05, "loss": 1.2966, "mean_token_accuracy": 0.754563856124878, "num_tokens": 231741660.0, "step": 3430 }, { "entropy": 0.8901623770594597, "epoch": 1.0421116412936453, "grad_norm": 0.1161167174577713, "learning_rate": 9.98966223974588e-05, "loss": 1.3022, "mean_token_accuracy": 0.7524377182126045, "num_tokens": 232405078.0, "step": 3440 }, { "entropy": 0.8907693952322007, "epoch": 1.0451412557752027, "grad_norm": 0.09815775603055954, "learning_rate": 9.989446501885848e-05, "loss": 1.298, "mean_token_accuracy": 0.7495367169380188, "num_tokens": 233070740.0, "step": 3450 }, { "entropy": 0.8830181643366813, "epoch": 1.0481708702567598, "grad_norm": 0.09803644567728043, "learning_rate": 9.989228538483078e-05, "loss": 1.2939, "mean_token_accuracy": 0.7502244040369987, "num_tokens": 233727328.0, "step": 3460 }, { "entropy": 0.8811929568648338, "epoch": 1.051200484738317, "grad_norm": 0.09551838785409927, "learning_rate": 9.989008349634795e-05, "loss": 1.2815, "mean_token_accuracy": 0.7551064103841781, "num_tokens": 234399755.0, "step": 3470 }, { "entropy": 0.8824715837836266, "epoch": 1.0542300992198743, "grad_norm": 0.10890055447816849, "learning_rate": 9.98878593543921e-05, "loss": 1.2858, "mean_token_accuracy": 0.7526574030518531, "num_tokens": 235078669.0, "step": 3480 }, { "entropy": 0.8932882875204087, "epoch": 1.0572597137014315, "grad_norm": 0.10177361220121384, "learning_rate": 9.988561295995535e-05, "loss": 1.2986, "mean_token_accuracy": 0.7547635450959206, "num_tokens": 235772791.0, "step": 3490 }, { "entropy": 0.8782318502664566, "epoch": 1.0602893281829888, "grad_norm": 0.10841168463230133, "learning_rate": 9.988334431403966e-05, "loss": 1.2894, "mean_token_accuracy": 0.7566312372684478, "num_tokens": 236441005.0, "step": 3500 }, { "entropy": 0.8738766640424729, "epoch": 1.063318942664546, "grad_norm": 0.13350026309490204, "learning_rate": 9.9881053417657e-05, "loss": 1.2844, "mean_token_accuracy": 0.7570654019713402, "num_tokens": 237117858.0, "step": 3510 }, { "entropy": 0.8770181387662888, "epoch": 1.066348557146103, "grad_norm": 0.10082123428583145, "learning_rate": 9.98787402718292e-05, "loss": 1.2854, "mean_token_accuracy": 0.7538925632834435, "num_tokens": 237790207.0, "step": 3520 }, { "entropy": 0.8860610723495483, "epoch": 1.0693781716276605, "grad_norm": 0.12682177126407623, "learning_rate": 9.987640487758804e-05, "loss": 1.2883, "mean_token_accuracy": 0.7573604539036751, "num_tokens": 238473396.0, "step": 3530 }, { "entropy": 0.8924138516187667, "epoch": 1.0724077861092176, "grad_norm": 0.10974256694316864, "learning_rate": 9.987404723597525e-05, "loss": 1.3031, "mean_token_accuracy": 0.7491044074296951, "num_tokens": 239126306.0, "step": 3540 }, { "entropy": 0.8796985596418381, "epoch": 1.0754374005907748, "grad_norm": 0.12989287078380585, "learning_rate": 9.987166734804243e-05, "loss": 1.2775, "mean_token_accuracy": 0.7557509571313858, "num_tokens": 239799162.0, "step": 3550 }, { "entropy": 0.8759955242276192, "epoch": 1.0784670150723321, "grad_norm": 0.10880783945322037, "learning_rate": 9.986926521485111e-05, "loss": 1.2795, "mean_token_accuracy": 0.7552510201931, "num_tokens": 240477949.0, "step": 3560 }, { "entropy": 0.8844213604927063, "epoch": 1.0814966295538893, "grad_norm": 0.10586912930011749, "learning_rate": 9.986684083747282e-05, "loss": 1.2892, "mean_token_accuracy": 0.749414810538292, "num_tokens": 241136134.0, "step": 3570 }, { "entropy": 0.8810238495469094, "epoch": 1.0845262440354464, "grad_norm": 0.11063455790281296, "learning_rate": 9.98643942169889e-05, "loss": 1.2898, "mean_token_accuracy": 0.7572943776845932, "num_tokens": 241814428.0, "step": 3580 }, { "entropy": 0.8795574530959129, "epoch": 1.0875558585170038, "grad_norm": 0.17099305987358093, "learning_rate": 9.986192535449068e-05, "loss": 1.2942, "mean_token_accuracy": 0.7528563812375069, "num_tokens": 242486061.0, "step": 3590 }, { "entropy": 0.8812819436192513, "epoch": 1.090585472998561, "grad_norm": 0.11202829331159592, "learning_rate": 9.985943425107943e-05, "loss": 1.297, "mean_token_accuracy": 0.7580265551805496, "num_tokens": 243170535.0, "step": 3600 }, { "entropy": 0.8764916747808457, "epoch": 1.093615087480118, "grad_norm": 0.1257563978433609, "learning_rate": 9.985692090786624e-05, "loss": 1.2758, "mean_token_accuracy": 0.7582903057336807, "num_tokens": 243865679.0, "step": 3610 }, { "entropy": 0.8862913087010383, "epoch": 1.0966447019616754, "grad_norm": 0.11346849799156189, "learning_rate": 9.985438532597224e-05, "loss": 1.2847, "mean_token_accuracy": 0.7537027999758721, "num_tokens": 244537231.0, "step": 3620 }, { "entropy": 0.8843289449810982, "epoch": 1.0996743164432325, "grad_norm": 0.10032348334789276, "learning_rate": 9.985182750652842e-05, "loss": 1.2861, "mean_token_accuracy": 0.755133393406868, "num_tokens": 245219847.0, "step": 3630 }, { "entropy": 0.887570683658123, "epoch": 1.10270393092479, "grad_norm": 0.11875443160533905, "learning_rate": 9.984924745067567e-05, "loss": 1.29, "mean_token_accuracy": 0.7504461675882339, "num_tokens": 245884554.0, "step": 3640 }, { "entropy": 0.8764506101608276, "epoch": 1.105733545406347, "grad_norm": 0.10695013403892517, "learning_rate": 9.984664515956486e-05, "loss": 1.2879, "mean_token_accuracy": 0.7540923699736595, "num_tokens": 246553745.0, "step": 3650 }, { "entropy": 0.889299499988556, "epoch": 1.1087631598879042, "grad_norm": 0.12081286311149597, "learning_rate": 9.98440206343567e-05, "loss": 1.2925, "mean_token_accuracy": 0.7518796786665917, "num_tokens": 247222648.0, "step": 3660 }, { "entropy": 0.874249455332756, "epoch": 1.1117927743694616, "grad_norm": 0.10921664535999298, "learning_rate": 9.98413738762219e-05, "loss": 1.2799, "mean_token_accuracy": 0.7637021541595459, "num_tokens": 247926515.0, "step": 3670 }, { "entropy": 0.8825855419039726, "epoch": 1.1148223888510187, "grad_norm": 0.10977496951818466, "learning_rate": 9.983870488634102e-05, "loss": 1.2902, "mean_token_accuracy": 0.7545716062188148, "num_tokens": 248605455.0, "step": 3680 }, { "entropy": 0.8877398714423179, "epoch": 1.1178520033325758, "grad_norm": 0.12345528602600098, "learning_rate": 9.983601366590457e-05, "loss": 1.2936, "mean_token_accuracy": 0.7516496270895004, "num_tokens": 249286066.0, "step": 3690 }, { "entropy": 0.878480176627636, "epoch": 1.1208816178141332, "grad_norm": 0.11568955332040787, "learning_rate": 9.983330021611297e-05, "loss": 1.2833, "mean_token_accuracy": 0.7629318952560424, "num_tokens": 249967062.0, "step": 3700 }, { "entropy": 0.8880110561847687, "epoch": 1.1239112322956903, "grad_norm": 0.125409334897995, "learning_rate": 9.983056453817657e-05, "loss": 1.2921, "mean_token_accuracy": 0.7574349761009216, "num_tokens": 250657458.0, "step": 3710 }, { "entropy": 0.8835234329104423, "epoch": 1.1269408467772477, "grad_norm": 0.10620824247598648, "learning_rate": 9.98278066333156e-05, "loss": 1.2856, "mean_token_accuracy": 0.7525221645832062, "num_tokens": 251325229.0, "step": 3720 }, { "entropy": 0.8757663741707802, "epoch": 1.1299704612588048, "grad_norm": 0.1087966337800026, "learning_rate": 9.98250265027602e-05, "loss": 1.2781, "mean_token_accuracy": 0.7562583640217782, "num_tokens": 252005769.0, "step": 3730 }, { "entropy": 0.8799458980560303, "epoch": 1.133000075740362, "grad_norm": 0.09769339114427567, "learning_rate": 9.982222414775048e-05, "loss": 1.2847, "mean_token_accuracy": 0.7568388402462005, "num_tokens": 252684613.0, "step": 3740 }, { "entropy": 0.8838448315858841, "epoch": 1.1360296902219194, "grad_norm": 0.12134331464767456, "learning_rate": 9.981939956953645e-05, "loss": 1.2864, "mean_token_accuracy": 0.7574134305119514, "num_tokens": 253374054.0, "step": 3750 }, { "entropy": 0.8901113018393516, "epoch": 1.1390593047034765, "grad_norm": 0.10419149696826935, "learning_rate": 9.981655276937798e-05, "loss": 1.2892, "mean_token_accuracy": 0.753422400355339, "num_tokens": 254052098.0, "step": 3760 }, { "entropy": 0.8802848294377327, "epoch": 1.1420889191850336, "grad_norm": 0.09308606386184692, "learning_rate": 9.98136837485449e-05, "loss": 1.2817, "mean_token_accuracy": 0.7608528643846512, "num_tokens": 254741940.0, "step": 3770 }, { "entropy": 0.8828628584742546, "epoch": 1.145118533666591, "grad_norm": 0.10295186936855316, "learning_rate": 9.981079250831691e-05, "loss": 1.2906, "mean_token_accuracy": 0.7555951252579689, "num_tokens": 255423370.0, "step": 3780 }, { "entropy": 0.8731147646903992, "epoch": 1.1481481481481481, "grad_norm": 0.11505525559186935, "learning_rate": 9.980787904998367e-05, "loss": 1.2839, "mean_token_accuracy": 0.7563813909888267, "num_tokens": 256098019.0, "step": 3790 }, { "entropy": 0.8898579612374306, "epoch": 1.1511777626297053, "grad_norm": 0.11030592024326324, "learning_rate": 9.980494337484474e-05, "loss": 1.2995, "mean_token_accuracy": 0.757056450843811, "num_tokens": 256790873.0, "step": 3800 }, { "entropy": 0.8818782925605774, "epoch": 1.1542073771112626, "grad_norm": 0.10960029810667038, "learning_rate": 9.980198548420957e-05, "loss": 1.2926, "mean_token_accuracy": 0.7521349757909774, "num_tokens": 257462265.0, "step": 3810 }, { "entropy": 0.881763118505478, "epoch": 1.1572369915928198, "grad_norm": 0.10717824846506119, "learning_rate": 9.97990053793975e-05, "loss": 1.2885, "mean_token_accuracy": 0.7590022325515747, "num_tokens": 258140456.0, "step": 3820 }, { "entropy": 0.872139398753643, "epoch": 1.160266606074377, "grad_norm": 0.10643764585256577, "learning_rate": 9.979600306173784e-05, "loss": 1.2766, "mean_token_accuracy": 0.7581851229071617, "num_tokens": 258824709.0, "step": 3830 }, { "entropy": 0.8823701068758965, "epoch": 1.1632962205559343, "grad_norm": 0.10628998279571533, "learning_rate": 9.979297853256976e-05, "loss": 1.2849, "mean_token_accuracy": 0.7554057717323304, "num_tokens": 259502340.0, "step": 3840 }, { "entropy": 0.8798398494720459, "epoch": 1.1663258350374914, "grad_norm": 0.10866077244281769, "learning_rate": 9.978993179324235e-05, "loss": 1.2877, "mean_token_accuracy": 0.7594596952199936, "num_tokens": 260184864.0, "step": 3850 }, { "entropy": 0.8884417340159416, "epoch": 1.1693554495190488, "grad_norm": 0.11306183785200119, "learning_rate": 9.978686284511461e-05, "loss": 1.2887, "mean_token_accuracy": 0.7554182320833206, "num_tokens": 260872291.0, "step": 3860 }, { "entropy": 0.8777225404977799, "epoch": 1.172385064000606, "grad_norm": 0.10495192557573318, "learning_rate": 9.978377168955545e-05, "loss": 1.2848, "mean_token_accuracy": 0.754628412425518, "num_tokens": 261526593.0, "step": 3870 }, { "entropy": 0.876430393755436, "epoch": 1.175414678482163, "grad_norm": 0.10036977380514145, "learning_rate": 9.978065832794368e-05, "loss": 1.2938, "mean_token_accuracy": 0.755861134827137, "num_tokens": 262189896.0, "step": 3880 }, { "entropy": 0.883771327137947, "epoch": 1.1784442929637204, "grad_norm": 0.10100783407688141, "learning_rate": 9.9777522761668e-05, "loss": 1.2998, "mean_token_accuracy": 0.7537850499153137, "num_tokens": 262854808.0, "step": 3890 }, { "entropy": 0.8863219231367111, "epoch": 1.1814739074452776, "grad_norm": 0.10015401989221573, "learning_rate": 9.977436499212704e-05, "loss": 1.2849, "mean_token_accuracy": 0.7554245010018349, "num_tokens": 263541282.0, "step": 3900 }, { "entropy": 0.8819337621331215, "epoch": 1.1845035219268347, "grad_norm": 0.1085055023431778, "learning_rate": 9.977118502072933e-05, "loss": 1.2887, "mean_token_accuracy": 0.7516910418868065, "num_tokens": 264209728.0, "step": 3910 }, { "entropy": 0.8842660367488862, "epoch": 1.187533136408392, "grad_norm": 0.13629150390625, "learning_rate": 9.976798284889328e-05, "loss": 1.2934, "mean_token_accuracy": 0.7530283167958259, "num_tokens": 264878445.0, "step": 3920 }, { "entropy": 0.8812041372060776, "epoch": 1.1905627508899492, "grad_norm": 0.12887629866600037, "learning_rate": 9.976475847804723e-05, "loss": 1.2819, "mean_token_accuracy": 0.7571926325559616, "num_tokens": 265569481.0, "step": 3930 }, { "entropy": 0.8909408152103424, "epoch": 1.1935923653715066, "grad_norm": 0.10945051908493042, "learning_rate": 9.976151190962941e-05, "loss": 1.3019, "mean_token_accuracy": 0.7494542822241783, "num_tokens": 266234478.0, "step": 3940 }, { "entropy": 0.8925550684332848, "epoch": 1.1966219798530637, "grad_norm": 0.10394002497196198, "learning_rate": 9.975824314508794e-05, "loss": 1.2984, "mean_token_accuracy": 0.7525490179657937, "num_tokens": 266902064.0, "step": 3950 }, { "entropy": 0.8868040114641189, "epoch": 1.1996515943346209, "grad_norm": 0.10101783275604248, "learning_rate": 9.975495218588089e-05, "loss": 1.2952, "mean_token_accuracy": 0.7574144750833511, "num_tokens": 267586908.0, "step": 3960 }, { "entropy": 0.8726276144385338, "epoch": 1.2026812088161782, "grad_norm": 0.1203441247344017, "learning_rate": 9.975163903347615e-05, "loss": 1.286, "mean_token_accuracy": 0.7547452345490455, "num_tokens": 268257457.0, "step": 3970 }, { "entropy": 0.8927853986620903, "epoch": 1.2057108232977354, "grad_norm": 0.11372381448745728, "learning_rate": 9.974830368935156e-05, "loss": 1.2943, "mean_token_accuracy": 0.7555569022893905, "num_tokens": 268943971.0, "step": 3980 }, { "entropy": 0.8818222165107727, "epoch": 1.2087404377792925, "grad_norm": 0.12832969427108765, "learning_rate": 9.974494615499487e-05, "loss": 1.2887, "mean_token_accuracy": 0.7539437264204025, "num_tokens": 269618169.0, "step": 3990 }, { "entropy": 0.8911607503890991, "epoch": 1.2117700522608499, "grad_norm": 0.10649841278791428, "learning_rate": 9.974156643190369e-05, "loss": 1.2929, "mean_token_accuracy": 0.7534996464848518, "num_tokens": 270296734.0, "step": 4000 }, { "entropy": 0.8867155745625496, "epoch": 1.214799666742407, "grad_norm": 0.09304763376712799, "learning_rate": 9.973816452158555e-05, "loss": 1.2863, "mean_token_accuracy": 0.7531761646270752, "num_tokens": 270979344.0, "step": 4010 }, { "entropy": 0.8814450964331627, "epoch": 1.2178292812239642, "grad_norm": 0.11045564711093903, "learning_rate": 9.973474042555787e-05, "loss": 1.289, "mean_token_accuracy": 0.7529393047094345, "num_tokens": 271652049.0, "step": 4020 }, { "entropy": 0.8878124937415123, "epoch": 1.2208588957055215, "grad_norm": 0.10780147463083267, "learning_rate": 9.973129414534797e-05, "loss": 1.2897, "mean_token_accuracy": 0.7520148739218712, "num_tokens": 272325049.0, "step": 4030 }, { "entropy": 0.8735245853662491, "epoch": 1.2238885101870787, "grad_norm": 0.12177599221467972, "learning_rate": 9.972782568249308e-05, "loss": 1.2847, "mean_token_accuracy": 0.7628583461046219, "num_tokens": 273024307.0, "step": 4040 }, { "entropy": 0.8731584176421165, "epoch": 1.2269181246686358, "grad_norm": 0.11406262218952179, "learning_rate": 9.972433503854028e-05, "loss": 1.2906, "mean_token_accuracy": 0.7569164022803306, "num_tokens": 273691121.0, "step": 4050 }, { "entropy": 0.8802512750029564, "epoch": 1.2299477391501932, "grad_norm": 0.12157581746578217, "learning_rate": 9.972082221504659e-05, "loss": 1.2819, "mean_token_accuracy": 0.754756960272789, "num_tokens": 274362828.0, "step": 4060 }, { "entropy": 0.8889210850000382, "epoch": 1.2329773536317503, "grad_norm": 0.10852083563804626, "learning_rate": 9.97172872135789e-05, "loss": 1.2944, "mean_token_accuracy": 0.749279560148716, "num_tokens": 275024532.0, "step": 4070 }, { "entropy": 0.8845911890268325, "epoch": 1.2360069681133075, "grad_norm": 0.11005109548568726, "learning_rate": 9.971373003571401e-05, "loss": 1.2904, "mean_token_accuracy": 0.753645335137844, "num_tokens": 275696277.0, "step": 4080 }, { "entropy": 0.8746917456388473, "epoch": 1.2390365825948648, "grad_norm": 0.12576720118522644, "learning_rate": 9.971015068303856e-05, "loss": 1.2795, "mean_token_accuracy": 0.7542210564017295, "num_tokens": 276361925.0, "step": 4090 }, { "entropy": 0.8848961919546128, "epoch": 1.242066197076422, "grad_norm": 0.10003882646560669, "learning_rate": 9.970654915714917e-05, "loss": 1.2927, "mean_token_accuracy": 0.7483493506908416, "num_tokens": 277026539.0, "step": 4100 }, { "entropy": 0.8785070508718491, "epoch": 1.2450958115579793, "grad_norm": 0.09673545509576797, "learning_rate": 9.970292545965225e-05, "loss": 1.2905, "mean_token_accuracy": 0.7515549525618553, "num_tokens": 277699565.0, "step": 4110 }, { "entropy": 0.8770564898848534, "epoch": 1.2481254260395365, "grad_norm": 0.10010041296482086, "learning_rate": 9.969927959216421e-05, "loss": 1.2867, "mean_token_accuracy": 0.7587835326790809, "num_tokens": 278376956.0, "step": 4120 }, { "entropy": 0.8782692402601242, "epoch": 1.2511550405210936, "grad_norm": 0.11118870228528976, "learning_rate": 9.969561155631123e-05, "loss": 1.2828, "mean_token_accuracy": 0.757876068353653, "num_tokens": 279052134.0, "step": 4130 }, { "entropy": 0.8728073760867119, "epoch": 1.254184655002651, "grad_norm": 0.1019531860947609, "learning_rate": 9.969192135372948e-05, "loss": 1.2771, "mean_token_accuracy": 0.7562713280320168, "num_tokens": 279720145.0, "step": 4140 }, { "entropy": 0.8769888460636139, "epoch": 1.2572142694842081, "grad_norm": 0.10588269680738449, "learning_rate": 9.968820898606496e-05, "loss": 1.2882, "mean_token_accuracy": 0.7562667936086654, "num_tokens": 280396688.0, "step": 4150 }, { "entropy": 0.881463372707367, "epoch": 1.2602438839657655, "grad_norm": 0.11158733814954758, "learning_rate": 9.968447445497356e-05, "loss": 1.2923, "mean_token_accuracy": 0.7556000202894211, "num_tokens": 281078455.0, "step": 4160 }, { "entropy": 0.884491217136383, "epoch": 1.2632734984473226, "grad_norm": 0.10962716490030289, "learning_rate": 9.96807177621211e-05, "loss": 1.2855, "mean_token_accuracy": 0.7545291304588317, "num_tokens": 281752436.0, "step": 4170 }, { "entropy": 0.876974818110466, "epoch": 1.2663031129288798, "grad_norm": 0.11437703669071198, "learning_rate": 9.967693890918322e-05, "loss": 1.2814, "mean_token_accuracy": 0.7614217966794967, "num_tokens": 282448782.0, "step": 4180 }, { "entropy": 0.8695633113384247, "epoch": 1.2693327274104371, "grad_norm": 0.11076894402503967, "learning_rate": 9.967313789784548e-05, "loss": 1.2777, "mean_token_accuracy": 0.7562600076198578, "num_tokens": 283113429.0, "step": 4190 }, { "entropy": 0.8868823811411858, "epoch": 1.2723623418919943, "grad_norm": 0.12090124189853668, "learning_rate": 9.966931472980335e-05, "loss": 1.2951, "mean_token_accuracy": 0.758067575097084, "num_tokens": 283803378.0, "step": 4200 }, { "entropy": 0.8870190247893334, "epoch": 1.2753919563735514, "grad_norm": 0.1082521453499794, "learning_rate": 9.966546940676212e-05, "loss": 1.2852, "mean_token_accuracy": 0.756389519572258, "num_tokens": 284487935.0, "step": 4210 }, { "entropy": 0.8746539890766144, "epoch": 1.2784215708551088, "grad_norm": 0.10147053748369217, "learning_rate": 9.9661601930437e-05, "loss": 1.2789, "mean_token_accuracy": 0.7614048615097999, "num_tokens": 285170657.0, "step": 4220 }, { "entropy": 0.8725147917866707, "epoch": 1.281451185336666, "grad_norm": 0.11726569384336472, "learning_rate": 9.965771230255312e-05, "loss": 1.2829, "mean_token_accuracy": 0.7540886506438256, "num_tokens": 285844852.0, "step": 4230 }, { "entropy": 0.8828619286417961, "epoch": 1.284480799818223, "grad_norm": 0.0989094227552414, "learning_rate": 9.96538005248454e-05, "loss": 1.2855, "mean_token_accuracy": 0.7537649929523468, "num_tokens": 286515351.0, "step": 4240 }, { "entropy": 0.8782873898744583, "epoch": 1.2875104142997804, "grad_norm": 0.1214325875043869, "learning_rate": 9.96498665990587e-05, "loss": 1.2769, "mean_token_accuracy": 0.7535983622074127, "num_tokens": 287195468.0, "step": 4250 }, { "entropy": 0.8750748768448829, "epoch": 1.2905400287813376, "grad_norm": 0.10766669362783432, "learning_rate": 9.964591052694776e-05, "loss": 1.2815, "mean_token_accuracy": 0.7573293700814248, "num_tokens": 287864110.0, "step": 4260 }, { "entropy": 0.8821496844291687, "epoch": 1.2935696432628947, "grad_norm": 0.12105648219585419, "learning_rate": 9.964193231027719e-05, "loss": 1.2869, "mean_token_accuracy": 0.7547226190567017, "num_tokens": 288540926.0, "step": 4270 }, { "entropy": 0.8781195849180221, "epoch": 1.296599257744452, "grad_norm": 0.1090843677520752, "learning_rate": 9.963793195082145e-05, "loss": 1.2869, "mean_token_accuracy": 0.7584155231714249, "num_tokens": 289231734.0, "step": 4280 }, { "entropy": 0.8835205435752869, "epoch": 1.2996288722260092, "grad_norm": 0.10426436364650726, "learning_rate": 9.963390945036492e-05, "loss": 1.2809, "mean_token_accuracy": 0.7557179808616639, "num_tokens": 289919399.0, "step": 4290 }, { "entropy": 0.8799811050295829, "epoch": 1.3026584867075663, "grad_norm": 0.1049380972981453, "learning_rate": 9.962986481070184e-05, "loss": 1.2834, "mean_token_accuracy": 0.757496677339077, "num_tokens": 290605907.0, "step": 4300 }, { "entropy": 0.8923942729830742, "epoch": 1.3056881011891237, "grad_norm": 0.10135234892368317, "learning_rate": 9.96257980336363e-05, "loss": 1.2983, "mean_token_accuracy": 0.7495782405138016, "num_tokens": 291271083.0, "step": 4310 }, { "entropy": 0.8736655041575432, "epoch": 1.3087177156706808, "grad_norm": 0.1003749743103981, "learning_rate": 9.96217091209823e-05, "loss": 1.2764, "mean_token_accuracy": 0.7608644172549248, "num_tokens": 291954576.0, "step": 4320 }, { "entropy": 0.877230028808117, "epoch": 1.311747330152238, "grad_norm": 0.10136937350034714, "learning_rate": 9.96175980745637e-05, "loss": 1.291, "mean_token_accuracy": 0.7531885996460914, "num_tokens": 292622310.0, "step": 4330 }, { "entropy": 0.8699756860733032, "epoch": 1.3147769446337954, "grad_norm": 0.1041715145111084, "learning_rate": 9.961346489621424e-05, "loss": 1.2745, "mean_token_accuracy": 0.7576002046465874, "num_tokens": 293298177.0, "step": 4340 }, { "entropy": 0.8843838930130005, "epoch": 1.3178065591153525, "grad_norm": 0.14830972254276276, "learning_rate": 9.96093095877775e-05, "loss": 1.2964, "mean_token_accuracy": 0.756398618221283, "num_tokens": 293969251.0, "step": 4350 }, { "entropy": 0.885055086016655, "epoch": 1.3208361735969099, "grad_norm": 0.10774227231740952, "learning_rate": 9.960513215110699e-05, "loss": 1.2935, "mean_token_accuracy": 0.7495698869228363, "num_tokens": 294628233.0, "step": 4360 }, { "entropy": 0.8791931942105293, "epoch": 1.323865788078467, "grad_norm": 0.12053106725215912, "learning_rate": 9.960093258806601e-05, "loss": 1.281, "mean_token_accuracy": 0.7571017786860466, "num_tokens": 295306528.0, "step": 4370 }, { "entropy": 0.8699711933732033, "epoch": 1.3268954025600244, "grad_norm": 0.09862420707941055, "learning_rate": 9.95967109005278e-05, "loss": 1.2909, "mean_token_accuracy": 0.7579729691147804, "num_tokens": 295972866.0, "step": 4380 }, { "entropy": 0.8723306894302368, "epoch": 1.3299250170415815, "grad_norm": 0.10990869998931885, "learning_rate": 9.959246709037546e-05, "loss": 1.2867, "mean_token_accuracy": 0.7555100724101067, "num_tokens": 296645546.0, "step": 4390 }, { "entropy": 0.8749506160616874, "epoch": 1.3329546315231386, "grad_norm": 0.10895519703626633, "learning_rate": 9.958820115950192e-05, "loss": 1.2835, "mean_token_accuracy": 0.7593660697340965, "num_tokens": 297319750.0, "step": 4400 }, { "entropy": 0.8869535252451897, "epoch": 1.335984246004696, "grad_norm": 0.09852419793605804, "learning_rate": 9.958391310981001e-05, "loss": 1.2935, "mean_token_accuracy": 0.7517649352550506, "num_tokens": 297983484.0, "step": 4410 }, { "entropy": 0.8746039196848869, "epoch": 1.3390138604862531, "grad_norm": 0.09832970052957535, "learning_rate": 9.957960294321241e-05, "loss": 1.2792, "mean_token_accuracy": 0.757124112546444, "num_tokens": 298666438.0, "step": 4420 }, { "entropy": 0.8672180339694023, "epoch": 1.3420434749678103, "grad_norm": 0.11954855918884277, "learning_rate": 9.957527066163167e-05, "loss": 1.2817, "mean_token_accuracy": 0.7571039840579032, "num_tokens": 299332287.0, "step": 4430 }, { "entropy": 0.8617986917495728, "epoch": 1.3450730894493677, "grad_norm": 0.13852055370807648, "learning_rate": 9.95709162670002e-05, "loss": 1.2695, "mean_token_accuracy": 0.7625701040029526, "num_tokens": 300033099.0, "step": 4440 }, { "entropy": 0.8819914147257805, "epoch": 1.3481027039309248, "grad_norm": 0.1199556440114975, "learning_rate": 9.956653976126028e-05, "loss": 1.2931, "mean_token_accuracy": 0.7540413573384285, "num_tokens": 300698440.0, "step": 4450 }, { "entropy": 0.8740648403763771, "epoch": 1.351132318412482, "grad_norm": 0.12838856875896454, "learning_rate": 9.956214114636407e-05, "loss": 1.2729, "mean_token_accuracy": 0.7602340117096901, "num_tokens": 301384029.0, "step": 4460 }, { "entropy": 0.8644291788339615, "epoch": 1.3541619328940393, "grad_norm": 0.14665523171424866, "learning_rate": 9.955772042427352e-05, "loss": 1.2688, "mean_token_accuracy": 0.7580505087971687, "num_tokens": 302053693.0, "step": 4470 }, { "entropy": 0.8759150877594948, "epoch": 1.3571915473755964, "grad_norm": 0.110257588326931, "learning_rate": 9.955327759696056e-05, "loss": 1.282, "mean_token_accuracy": 0.7561334684491158, "num_tokens": 302738677.0, "step": 4480 }, { "entropy": 0.8849847927689553, "epoch": 1.3602211618571536, "grad_norm": 0.09808912873268127, "learning_rate": 9.954881266640686e-05, "loss": 1.2843, "mean_token_accuracy": 0.754899476468563, "num_tokens": 303421514.0, "step": 4490 }, { "entropy": 0.8758756816387177, "epoch": 1.363250776338711, "grad_norm": 0.1278056800365448, "learning_rate": 9.954432563460403e-05, "loss": 1.2892, "mean_token_accuracy": 0.7498972356319428, "num_tokens": 304069800.0, "step": 4500 }, { "entropy": 0.8744087040424346, "epoch": 1.366280390820268, "grad_norm": 0.1052425354719162, "learning_rate": 9.95398165035535e-05, "loss": 1.2871, "mean_token_accuracy": 0.753123264014721, "num_tokens": 304726982.0, "step": 4510 }, { "entropy": 0.8700909554958344, "epoch": 1.3693100053018252, "grad_norm": 0.10656082630157471, "learning_rate": 9.953528527526659e-05, "loss": 1.2771, "mean_token_accuracy": 0.7586307719349861, "num_tokens": 305405130.0, "step": 4520 }, { "entropy": 0.8599863409996032, "epoch": 1.3723396197833826, "grad_norm": 0.1053885743021965, "learning_rate": 9.953073195176441e-05, "loss": 1.2689, "mean_token_accuracy": 0.7555717900395393, "num_tokens": 306073303.0, "step": 4530 }, { "entropy": 0.8755203872919083, "epoch": 1.3753692342649397, "grad_norm": 0.10587162524461746, "learning_rate": 9.952615653507801e-05, "loss": 1.2903, "mean_token_accuracy": 0.7531731277704239, "num_tokens": 306727301.0, "step": 4540 }, { "entropy": 0.8776930332183838, "epoch": 1.3783988487464969, "grad_norm": 0.10954715311527252, "learning_rate": 9.952155902724825e-05, "loss": 1.2838, "mean_token_accuracy": 0.7542294219136239, "num_tokens": 307402679.0, "step": 4550 }, { "entropy": 0.8587560087442399, "epoch": 1.3814284632280542, "grad_norm": 0.09825403243303299, "learning_rate": 9.951693943032584e-05, "loss": 1.2702, "mean_token_accuracy": 0.7581495806574822, "num_tokens": 308078088.0, "step": 4560 }, { "entropy": 0.878232404589653, "epoch": 1.3844580777096114, "grad_norm": 0.11433161795139313, "learning_rate": 9.951229774637133e-05, "loss": 1.2885, "mean_token_accuracy": 0.753068993985653, "num_tokens": 308738906.0, "step": 4570 }, { "entropy": 0.8618496775627136, "epoch": 1.3874876921911687, "grad_norm": 0.09896930307149887, "learning_rate": 9.950763397745518e-05, "loss": 1.2758, "mean_token_accuracy": 0.7582163274288177, "num_tokens": 309410743.0, "step": 4580 }, { "entropy": 0.8812419295310974, "epoch": 1.3905173066727259, "grad_norm": 0.10881632566452026, "learning_rate": 9.950294812565764e-05, "loss": 1.2879, "mean_token_accuracy": 0.7559777319431304, "num_tokens": 310083429.0, "step": 4590 }, { "entropy": 0.8856163591146469, "epoch": 1.3935469211542832, "grad_norm": 0.09903638064861298, "learning_rate": 9.949824019306883e-05, "loss": 1.2821, "mean_token_accuracy": 0.7560224905610085, "num_tokens": 310766017.0, "step": 4600 }, { "entropy": 0.8799282774329186, "epoch": 1.3965765356358404, "grad_norm": 0.1002153754234314, "learning_rate": 9.949351018178876e-05, "loss": 1.2837, "mean_token_accuracy": 0.7538231521844864, "num_tokens": 311438137.0, "step": 4610 }, { "entropy": 0.8874617710709571, "epoch": 1.3996061501173975, "grad_norm": 0.1033317819237709, "learning_rate": 9.948875809392724e-05, "loss": 1.2923, "mean_token_accuracy": 0.7539964064955711, "num_tokens": 312103399.0, "step": 4620 }, { "entropy": 0.882491098344326, "epoch": 1.402635764598955, "grad_norm": 0.1603907346725464, "learning_rate": 9.94839839316039e-05, "loss": 1.2886, "mean_token_accuracy": 0.7503088295459748, "num_tokens": 312761854.0, "step": 4630 }, { "entropy": 0.8864075720310212, "epoch": 1.405665379080512, "grad_norm": 0.13780377805233002, "learning_rate": 9.947918769694829e-05, "loss": 1.2861, "mean_token_accuracy": 0.7553250983357429, "num_tokens": 313446543.0, "step": 4640 }, { "entropy": 0.8821283102035522, "epoch": 1.4086949935620692, "grad_norm": 0.10473109781742096, "learning_rate": 9.947436939209976e-05, "loss": 1.2914, "mean_token_accuracy": 0.7523757264018058, "num_tokens": 314119299.0, "step": 4650 }, { "entropy": 0.8727343067526817, "epoch": 1.4117246080436265, "grad_norm": 0.12804046273231506, "learning_rate": 9.946952901920752e-05, "loss": 1.282, "mean_token_accuracy": 0.7526518985629082, "num_tokens": 314776989.0, "step": 4660 }, { "entropy": 0.8780082523822784, "epoch": 1.4147542225251837, "grad_norm": 0.09967619180679321, "learning_rate": 9.94646665804306e-05, "loss": 1.2964, "mean_token_accuracy": 0.7571787014603615, "num_tokens": 315452116.0, "step": 4670 }, { "entropy": 0.8719828292727471, "epoch": 1.4177838370067408, "grad_norm": 0.11002221703529358, "learning_rate": 9.945978207793793e-05, "loss": 1.2773, "mean_token_accuracy": 0.7546962603926659, "num_tokens": 316123195.0, "step": 4680 }, { "entropy": 0.8698706135153771, "epoch": 1.4208134514882982, "grad_norm": 0.11689720302820206, "learning_rate": 9.945487551390823e-05, "loss": 1.2822, "mean_token_accuracy": 0.7534810394048691, "num_tokens": 316793218.0, "step": 4690 }, { "entropy": 0.8843429014086723, "epoch": 1.4238430659698553, "grad_norm": 0.10962754487991333, "learning_rate": 9.944994689053004e-05, "loss": 1.2842, "mean_token_accuracy": 0.7533178478479385, "num_tokens": 317477092.0, "step": 4700 }, { "entropy": 0.8758864805102349, "epoch": 1.4268726804514125, "grad_norm": 0.11763538420200348, "learning_rate": 9.944499621000181e-05, "loss": 1.2844, "mean_token_accuracy": 0.7536002263426781, "num_tokens": 318143077.0, "step": 4710 }, { "entropy": 0.8750483646988869, "epoch": 1.4299022949329698, "grad_norm": 0.11927127838134766, "learning_rate": 9.944002347453176e-05, "loss": 1.2764, "mean_token_accuracy": 0.7574943244457245, "num_tokens": 318837687.0, "step": 4720 }, { "entropy": 0.8810449033975601, "epoch": 1.432931909414527, "grad_norm": 0.09647629410028458, "learning_rate": 9.9435028686338e-05, "loss": 1.2795, "mean_token_accuracy": 0.7602642744779586, "num_tokens": 319526456.0, "step": 4730 }, { "entropy": 0.8667797297239304, "epoch": 1.4359615238960841, "grad_norm": 0.08604594320058823, "learning_rate": 9.943001184764845e-05, "loss": 1.2777, "mean_token_accuracy": 0.7617032259702683, "num_tokens": 320213783.0, "step": 4740 }, { "entropy": 0.8809635192155838, "epoch": 1.4389911383776415, "grad_norm": 0.10920163244009018, "learning_rate": 9.942497296070087e-05, "loss": 1.2754, "mean_token_accuracy": 0.759989570081234, "num_tokens": 320914323.0, "step": 4750 }, { "entropy": 0.8780781880021096, "epoch": 1.4420207528591986, "grad_norm": 0.09842902421951294, "learning_rate": 9.941991202774287e-05, "loss": 1.2828, "mean_token_accuracy": 0.756574122607708, "num_tokens": 321586894.0, "step": 4760 }, { "entropy": 0.8765702039003372, "epoch": 1.4450503673407558, "grad_norm": 0.11967124789953232, "learning_rate": 9.941482905103185e-05, "loss": 1.2817, "mean_token_accuracy": 0.7565953239798546, "num_tokens": 322257869.0, "step": 4770 }, { "entropy": 0.8695339649915695, "epoch": 1.4480799818223131, "grad_norm": 0.09481111913919449, "learning_rate": 9.940972403283511e-05, "loss": 1.279, "mean_token_accuracy": 0.7593210890889168, "num_tokens": 322939452.0, "step": 4780 }, { "entropy": 0.8763134732842446, "epoch": 1.4511095963038703, "grad_norm": 0.09376729279756546, "learning_rate": 9.94045969754297e-05, "loss": 1.2791, "mean_token_accuracy": 0.7569221302866935, "num_tokens": 323614509.0, "step": 4790 }, { "entropy": 0.8825785338878631, "epoch": 1.4541392107854276, "grad_norm": 0.10248862951993942, "learning_rate": 9.939944788110258e-05, "loss": 1.2803, "mean_token_accuracy": 0.7533236041665077, "num_tokens": 324295414.0, "step": 4800 }, { "entropy": 0.8729474455118179, "epoch": 1.4571688252669848, "grad_norm": 0.09469485282897949, "learning_rate": 9.939427675215047e-05, "loss": 1.2836, "mean_token_accuracy": 0.7562346294522285, "num_tokens": 324964806.0, "step": 4810 }, { "entropy": 0.8822784170508384, "epoch": 1.4601984397485421, "grad_norm": 0.10746327042579651, "learning_rate": 9.938908359087999e-05, "loss": 1.286, "mean_token_accuracy": 0.7533738493919373, "num_tokens": 325633215.0, "step": 4820 }, { "entropy": 0.8711674973368645, "epoch": 1.4632280542300993, "grad_norm": 0.10797962546348572, "learning_rate": 9.938386839960753e-05, "loss": 1.2763, "mean_token_accuracy": 0.7570240288972855, "num_tokens": 326307140.0, "step": 4830 }, { "entropy": 0.8763237610459328, "epoch": 1.4662576687116564, "grad_norm": 0.09768716990947723, "learning_rate": 9.937863118065932e-05, "loss": 1.2865, "mean_token_accuracy": 0.7576685190200806, "num_tokens": 326997033.0, "step": 4840 }, { "entropy": 0.8790414869785309, "epoch": 1.4692872831932138, "grad_norm": 0.11268474161624908, "learning_rate": 9.937337193637144e-05, "loss": 1.2874, "mean_token_accuracy": 0.7569172218441963, "num_tokens": 327672611.0, "step": 4850 }, { "entropy": 0.873789718747139, "epoch": 1.472316897674771, "grad_norm": 0.1072414219379425, "learning_rate": 9.936809066908975e-05, "loss": 1.2877, "mean_token_accuracy": 0.7574461817741394, "num_tokens": 328348125.0, "step": 4860 }, { "entropy": 0.869526818394661, "epoch": 1.475346512156328, "grad_norm": 0.13900327682495117, "learning_rate": 9.936278738116999e-05, "loss": 1.2797, "mean_token_accuracy": 0.7563824996352195, "num_tokens": 329021041.0, "step": 4870 }, { "entropy": 0.8688310131430625, "epoch": 1.4783761266378854, "grad_norm": 0.10042646527290344, "learning_rate": 9.935746207497766e-05, "loss": 1.2685, "mean_token_accuracy": 0.764211705327034, "num_tokens": 329722877.0, "step": 4880 }, { "entropy": 0.8855464920401573, "epoch": 1.4814057411194426, "grad_norm": 0.1466985046863556, "learning_rate": 9.935211475288815e-05, "loss": 1.2944, "mean_token_accuracy": 0.7504862293601036, "num_tokens": 330382084.0, "step": 4890 }, { "entropy": 0.8752141341567039, "epoch": 1.4844353556009997, "grad_norm": 0.13028596341609955, "learning_rate": 9.934674541728659e-05, "loss": 1.2804, "mean_token_accuracy": 0.758796738088131, "num_tokens": 331071319.0, "step": 4900 }, { "entropy": 0.8620499595999718, "epoch": 1.487464970082557, "grad_norm": 0.0955832302570343, "learning_rate": 9.934135407056801e-05, "loss": 1.2652, "mean_token_accuracy": 0.7580614849925041, "num_tokens": 331749844.0, "step": 4910 }, { "entropy": 0.8735622838139534, "epoch": 1.4904945845641142, "grad_norm": 0.11174575239419937, "learning_rate": 9.933594071513721e-05, "loss": 1.2812, "mean_token_accuracy": 0.7544201269745827, "num_tokens": 332417067.0, "step": 4920 }, { "entropy": 0.8812348023056984, "epoch": 1.4935241990456714, "grad_norm": 0.10573475062847137, "learning_rate": 9.93305053534088e-05, "loss": 1.2843, "mean_token_accuracy": 0.7559713959693909, "num_tokens": 333094908.0, "step": 4930 }, { "entropy": 0.868513534963131, "epoch": 1.4965538135272287, "grad_norm": 0.11965668201446533, "learning_rate": 9.932504798780724e-05, "loss": 1.2743, "mean_token_accuracy": 0.7537699833512306, "num_tokens": 333758618.0, "step": 4940 }, { "entropy": 0.884345480799675, "epoch": 1.4995834280087859, "grad_norm": 0.11149779707193375, "learning_rate": 9.93195686207668e-05, "loss": 1.2941, "mean_token_accuracy": 0.7486284375190735, "num_tokens": 334415616.0, "step": 4950 }, { "entropy": 0.8693102568387985, "epoch": 1.502613042490343, "grad_norm": 0.09893301129341125, "learning_rate": 9.93140672547315e-05, "loss": 1.2728, "mean_token_accuracy": 0.7564949616789818, "num_tokens": 335083027.0, "step": 4960 }, { "entropy": 0.8698922634124756, "epoch": 1.5056426569719004, "grad_norm": 0.09813852608203888, "learning_rate": 9.930854389215528e-05, "loss": 1.2804, "mean_token_accuracy": 0.7566551297903061, "num_tokens": 335757782.0, "step": 4970 }, { "entropy": 0.8766652792692184, "epoch": 1.5086722714534575, "grad_norm": 0.10962329804897308, "learning_rate": 9.930299853550182e-05, "loss": 1.2879, "mean_token_accuracy": 0.7540410026907921, "num_tokens": 336413450.0, "step": 4980 }, { "entropy": 0.883516064286232, "epoch": 1.5117018859350146, "grad_norm": 0.12260321527719498, "learning_rate": 9.929743118724462e-05, "loss": 1.2956, "mean_token_accuracy": 0.7535830676555634, "num_tokens": 337090592.0, "step": 4990 }, { "entropy": 0.870100949704647, "epoch": 1.514731500416572, "grad_norm": 0.10302906483411789, "learning_rate": 9.9291841849867e-05, "loss": 1.2792, "mean_token_accuracy": 0.7576045885682106, "num_tokens": 337766068.0, "step": 5000 }, { "entropy": 0.8732549920678139, "epoch": 1.5177611148981294, "grad_norm": 0.10612724721431732, "learning_rate": 9.928623052586207e-05, "loss": 1.2834, "mean_token_accuracy": 0.7551342651247979, "num_tokens": 338434072.0, "step": 5010 }, { "entropy": 0.8764806777238846, "epoch": 1.5207907293796863, "grad_norm": 0.11097440123558044, "learning_rate": 9.928059721773277e-05, "loss": 1.2809, "mean_token_accuracy": 0.7545226275920868, "num_tokens": 339109284.0, "step": 5020 }, { "entropy": 0.8647286728024483, "epoch": 1.5238203438612437, "grad_norm": 0.1085539236664772, "learning_rate": 9.927494192799187e-05, "loss": 1.275, "mean_token_accuracy": 0.7621150195598603, "num_tokens": 339787535.0, "step": 5030 }, { "entropy": 0.8686896696686744, "epoch": 1.526849958342801, "grad_norm": 0.24225415289402008, "learning_rate": 9.926926465916187e-05, "loss": 1.2769, "mean_token_accuracy": 0.7608293399214745, "num_tokens": 340480025.0, "step": 5040 }, { "entropy": 0.8659367457032203, "epoch": 1.529879572824358, "grad_norm": 0.1000758707523346, "learning_rate": 9.926356541377511e-05, "loss": 1.2812, "mean_token_accuracy": 0.7570604413747788, "num_tokens": 341152337.0, "step": 5050 }, { "entropy": 0.8760500758886337, "epoch": 1.5329091873059153, "grad_norm": 0.11556119471788406, "learning_rate": 9.925784419437378e-05, "loss": 1.284, "mean_token_accuracy": 0.7534583285450935, "num_tokens": 341827001.0, "step": 5060 }, { "entropy": 0.8637175768613815, "epoch": 1.5359388017874727, "grad_norm": 0.10243014991283417, "learning_rate": 9.92521010035098e-05, "loss": 1.2713, "mean_token_accuracy": 0.7606059044599534, "num_tokens": 342514084.0, "step": 5070 }, { "entropy": 0.8739731296896934, "epoch": 1.5389684162690298, "grad_norm": 0.10937733948230743, "learning_rate": 9.924633584374492e-05, "loss": 1.2856, "mean_token_accuracy": 0.7547189772129059, "num_tokens": 343186510.0, "step": 5080 }, { "entropy": 0.8762591108679771, "epoch": 1.541998030750587, "grad_norm": 0.09622039645910263, "learning_rate": 9.924054871765072e-05, "loss": 1.2744, "mean_token_accuracy": 0.756362409889698, "num_tokens": 343857408.0, "step": 5090 }, { "entropy": 0.8718797892332077, "epoch": 1.5450276452321443, "grad_norm": 0.09628882259130478, "learning_rate": 9.923473962780851e-05, "loss": 1.2841, "mean_token_accuracy": 0.75397929251194, "num_tokens": 344514362.0, "step": 5100 }, { "entropy": 0.8766556069254875, "epoch": 1.5480572597137015, "grad_norm": 0.1028362438082695, "learning_rate": 9.922890857680944e-05, "loss": 1.2815, "mean_token_accuracy": 0.7548733547329902, "num_tokens": 345187744.0, "step": 5110 }, { "entropy": 0.8707106709480286, "epoch": 1.5510868741952586, "grad_norm": 0.1039656549692154, "learning_rate": 9.922305556725447e-05, "loss": 1.2733, "mean_token_accuracy": 0.7616032481193542, "num_tokens": 345870292.0, "step": 5120 }, { "entropy": 0.8699567258358002, "epoch": 1.554116488676816, "grad_norm": 0.11757346242666245, "learning_rate": 9.921718060175433e-05, "loss": 1.2763, "mean_token_accuracy": 0.7593500182032585, "num_tokens": 346551306.0, "step": 5130 }, { "entropy": 0.8759250968694687, "epoch": 1.557146103158373, "grad_norm": 0.10635928809642792, "learning_rate": 9.921128368292955e-05, "loss": 1.2822, "mean_token_accuracy": 0.7576028853654861, "num_tokens": 347232359.0, "step": 5140 }, { "entropy": 0.868228904902935, "epoch": 1.5601757176399302, "grad_norm": 0.10289059579372406, "learning_rate": 9.920536481341043e-05, "loss": 1.2826, "mean_token_accuracy": 0.7575011759996414, "num_tokens": 347904580.0, "step": 5150 }, { "entropy": 0.8698439791798591, "epoch": 1.5632053321214876, "grad_norm": 0.1073436439037323, "learning_rate": 9.919942399583711e-05, "loss": 1.2731, "mean_token_accuracy": 0.7596629023551941, "num_tokens": 348592926.0, "step": 5160 }, { "entropy": 0.8713696137070656, "epoch": 1.5662349466030447, "grad_norm": 0.09829632937908173, "learning_rate": 9.919346123285947e-05, "loss": 1.279, "mean_token_accuracy": 0.7549123540520668, "num_tokens": 349259226.0, "step": 5170 }, { "entropy": 0.87287005931139, "epoch": 1.5692645610846019, "grad_norm": 0.09961492568254471, "learning_rate": 9.91874765271372e-05, "loss": 1.2739, "mean_token_accuracy": 0.7540394902229309, "num_tokens": 349934237.0, "step": 5180 }, { "entropy": 0.8709146901965141, "epoch": 1.5722941755661592, "grad_norm": 0.09700673073530197, "learning_rate": 9.918146988133981e-05, "loss": 1.2676, "mean_token_accuracy": 0.7552365481853485, "num_tokens": 350599993.0, "step": 5190 }, { "entropy": 0.8723388150334358, "epoch": 1.5753237900477164, "grad_norm": 0.10038121044635773, "learning_rate": 9.917544129814653e-05, "loss": 1.285, "mean_token_accuracy": 0.7541535750031472, "num_tokens": 351265711.0, "step": 5200 }, { "entropy": 0.8757157295942306, "epoch": 1.5783534045292735, "grad_norm": 0.10745465010404587, "learning_rate": 9.916939078024642e-05, "loss": 1.2792, "mean_token_accuracy": 0.7533563077449799, "num_tokens": 351929991.0, "step": 5210 }, { "entropy": 0.863342608511448, "epoch": 1.581383019010831, "grad_norm": 0.1044878140091896, "learning_rate": 9.916331833033831e-05, "loss": 1.269, "mean_token_accuracy": 0.7579909726977349, "num_tokens": 352606117.0, "step": 5220 }, { "entropy": 0.8738278642296791, "epoch": 1.5844126334923883, "grad_norm": 0.1057114228606224, "learning_rate": 9.915722395113083e-05, "loss": 1.2857, "mean_token_accuracy": 0.749761326611042, "num_tokens": 353258900.0, "step": 5230 }, { "entropy": 0.8743241295218468, "epoch": 1.5874422479739452, "grad_norm": 0.1229625940322876, "learning_rate": 9.915110764534236e-05, "loss": 1.2848, "mean_token_accuracy": 0.7557677298784256, "num_tokens": 353926805.0, "step": 5240 }, { "entropy": 0.8771678328514099, "epoch": 1.5904718624555025, "grad_norm": 0.105762779712677, "learning_rate": 9.91449694157011e-05, "loss": 1.2931, "mean_token_accuracy": 0.7478478208184243, "num_tokens": 354596448.0, "step": 5250 }, { "entropy": 0.8795763000845909, "epoch": 1.59350147693706, "grad_norm": 0.11217822134494781, "learning_rate": 9.9138809264945e-05, "loss": 1.2869, "mean_token_accuracy": 0.7559655770659447, "num_tokens": 355274131.0, "step": 5260 }, { "entropy": 0.8697790130972862, "epoch": 1.5965310914186168, "grad_norm": 0.09957357496023178, "learning_rate": 9.913262719582177e-05, "loss": 1.2781, "mean_token_accuracy": 0.7564894899725914, "num_tokens": 355944901.0, "step": 5270 }, { "entropy": 0.8686492651700973, "epoch": 1.5995607059001742, "grad_norm": 0.12873774766921997, "learning_rate": 9.912642321108896e-05, "loss": 1.2842, "mean_token_accuracy": 0.753792816400528, "num_tokens": 356611500.0, "step": 5280 }, { "entropy": 0.8777525410056114, "epoch": 1.6025903203817315, "grad_norm": 0.09984857589006424, "learning_rate": 9.912019731351383e-05, "loss": 1.2792, "mean_token_accuracy": 0.7551235914230346, "num_tokens": 357287167.0, "step": 5290 }, { "entropy": 0.8676513060927391, "epoch": 1.6056199348632887, "grad_norm": 0.10700773447751999, "learning_rate": 9.911394950587347e-05, "loss": 1.2853, "mean_token_accuracy": 0.7546106904745102, "num_tokens": 357959149.0, "step": 5300 }, { "entropy": 0.8721698507666588, "epoch": 1.6086495493448458, "grad_norm": 0.13279834389686584, "learning_rate": 9.910767979095467e-05, "loss": 1.2741, "mean_token_accuracy": 0.7623576447367668, "num_tokens": 358645791.0, "step": 5310 }, { "entropy": 0.8680669054389, "epoch": 1.6116791638264032, "grad_norm": 0.11023905873298645, "learning_rate": 9.910138817155409e-05, "loss": 1.2647, "mean_token_accuracy": 0.7610991448163986, "num_tokens": 359338240.0, "step": 5320 }, { "entropy": 0.8698973953723907, "epoch": 1.6147087783079603, "grad_norm": 0.10102608799934387, "learning_rate": 9.909507465047807e-05, "loss": 1.2784, "mean_token_accuracy": 0.7560726568102837, "num_tokens": 360016274.0, "step": 5330 }, { "entropy": 0.873685722053051, "epoch": 1.6177383927895175, "grad_norm": 0.10017687827348709, "learning_rate": 9.908873923054276e-05, "loss": 1.2784, "mean_token_accuracy": 0.7573044985532761, "num_tokens": 360700736.0, "step": 5340 }, { "entropy": 0.866089777648449, "epoch": 1.6207680072710748, "grad_norm": 0.10463502258062363, "learning_rate": 9.908238191457409e-05, "loss": 1.2654, "mean_token_accuracy": 0.7583584994077682, "num_tokens": 361365977.0, "step": 5350 }, { "entropy": 0.8682324588298798, "epoch": 1.623797621752632, "grad_norm": 0.12103241682052612, "learning_rate": 9.907600270540773e-05, "loss": 1.2726, "mean_token_accuracy": 0.7610218763351441, "num_tokens": 362051727.0, "step": 5360 }, { "entropy": 0.8686078056693077, "epoch": 1.6268272362341891, "grad_norm": 0.10196422785520554, "learning_rate": 9.906960160588911e-05, "loss": 1.272, "mean_token_accuracy": 0.760060878098011, "num_tokens": 362738689.0, "step": 5370 }, { "entropy": 0.8809151217341423, "epoch": 1.6298568507157465, "grad_norm": 0.10597776621580124, "learning_rate": 9.906317861887349e-05, "loss": 1.2887, "mean_token_accuracy": 0.7559880495071412, "num_tokens": 363411332.0, "step": 5380 }, { "entropy": 0.870364136993885, "epoch": 1.6328864651973036, "grad_norm": 0.10422533750534058, "learning_rate": 9.905673374722579e-05, "loss": 1.2691, "mean_token_accuracy": 0.7573687374591828, "num_tokens": 364083141.0, "step": 5390 }, { "entropy": 0.8716929346323014, "epoch": 1.6359160796788608, "grad_norm": 0.09048692882061005, "learning_rate": 9.905026699382078e-05, "loss": 1.275, "mean_token_accuracy": 0.759492652118206, "num_tokens": 364770249.0, "step": 5400 }, { "entropy": 0.8717505663633347, "epoch": 1.6389456941604181, "grad_norm": 0.12246933579444885, "learning_rate": 9.904377836154294e-05, "loss": 1.2753, "mean_token_accuracy": 0.7598855897784234, "num_tokens": 365456562.0, "step": 5410 }, { "entropy": 0.8657186686992645, "epoch": 1.6419753086419753, "grad_norm": 0.11263216286897659, "learning_rate": 9.903726785328651e-05, "loss": 1.2785, "mean_token_accuracy": 0.757922874391079, "num_tokens": 366132737.0, "step": 5420 }, { "entropy": 0.8811621457338333, "epoch": 1.6450049231235324, "grad_norm": 0.10388202965259552, "learning_rate": 9.903073547195555e-05, "loss": 1.2846, "mean_token_accuracy": 0.7510637819766999, "num_tokens": 366807208.0, "step": 5430 }, { "entropy": 0.8589653491973877, "epoch": 1.6480345376050898, "grad_norm": 0.0990695059299469, "learning_rate": 9.902418122046377e-05, "loss": 1.258, "mean_token_accuracy": 0.7583812937140465, "num_tokens": 367495342.0, "step": 5440 }, { "entropy": 0.8624016270041466, "epoch": 1.651064152086647, "grad_norm": 0.11094211786985397, "learning_rate": 9.901760510173474e-05, "loss": 1.2738, "mean_token_accuracy": 0.759155260026455, "num_tokens": 368163766.0, "step": 5450 }, { "entropy": 0.8742574378848076, "epoch": 1.654093766568204, "grad_norm": 0.09800291806459427, "learning_rate": 9.901100711870172e-05, "loss": 1.2763, "mean_token_accuracy": 0.7553526028990746, "num_tokens": 368834733.0, "step": 5460 }, { "entropy": 0.8638382017612457, "epoch": 1.6571233810497614, "grad_norm": 0.11040802299976349, "learning_rate": 9.900438727430775e-05, "loss": 1.2682, "mean_token_accuracy": 0.7588236540555954, "num_tokens": 369510419.0, "step": 5470 }, { "entropy": 0.8756036445498466, "epoch": 1.6601529955313188, "grad_norm": 0.09599775075912476, "learning_rate": 9.899774557150559e-05, "loss": 1.2818, "mean_token_accuracy": 0.7558170795440674, "num_tokens": 370180297.0, "step": 5480 }, { "entropy": 0.8640083447098732, "epoch": 1.6631826100128757, "grad_norm": 0.09472623467445374, "learning_rate": 9.89910820132578e-05, "loss": 1.2703, "mean_token_accuracy": 0.7586445361375809, "num_tokens": 370858685.0, "step": 5490 }, { "entropy": 0.8590528950095176, "epoch": 1.666212224494433, "grad_norm": 0.09686709195375443, "learning_rate": 9.898439660253662e-05, "loss": 1.2683, "mean_token_accuracy": 0.7555449813604355, "num_tokens": 371520228.0, "step": 5500 }, { "entropy": 0.8739757299423218, "epoch": 1.6692418389759904, "grad_norm": 0.10918669402599335, "learning_rate": 9.897768934232412e-05, "loss": 1.2768, "mean_token_accuracy": 0.7539754524827004, "num_tokens": 372190366.0, "step": 5510 }, { "entropy": 0.8738907009363175, "epoch": 1.6722714534575476, "grad_norm": 0.10441508144140244, "learning_rate": 9.897096023561205e-05, "loss": 1.2876, "mean_token_accuracy": 0.7504386335611344, "num_tokens": 372838175.0, "step": 5520 }, { "entropy": 0.8645015090703965, "epoch": 1.6753010679391047, "grad_norm": 0.11962274461984634, "learning_rate": 9.896420928540193e-05, "loss": 1.262, "mean_token_accuracy": 0.7578808009624481, "num_tokens": 373513090.0, "step": 5530 }, { "entropy": 0.872953699529171, "epoch": 1.678330682420662, "grad_norm": 0.11019662022590637, "learning_rate": 9.895743649470504e-05, "loss": 1.2797, "mean_token_accuracy": 0.7557148829102516, "num_tokens": 374188731.0, "step": 5540 }, { "entropy": 0.8694672659039497, "epoch": 1.6813602969022192, "grad_norm": 0.10090140253305435, "learning_rate": 9.895064186654236e-05, "loss": 1.2718, "mean_token_accuracy": 0.7555843487381935, "num_tokens": 374866416.0, "step": 5550 }, { "entropy": 0.8638262122869491, "epoch": 1.6843899113837764, "grad_norm": 0.10722874104976654, "learning_rate": 9.894382540394465e-05, "loss": 1.2715, "mean_token_accuracy": 0.7578088521957398, "num_tokens": 375542152.0, "step": 5560 }, { "entropy": 0.8736162975430488, "epoch": 1.6874195258653337, "grad_norm": 0.09556689113378525, "learning_rate": 9.893698710995239e-05, "loss": 1.2794, "mean_token_accuracy": 0.7577393084764481, "num_tokens": 376215629.0, "step": 5570 }, { "entropy": 0.8686207786202431, "epoch": 1.6904491403468909, "grad_norm": 0.11578402668237686, "learning_rate": 9.893012698761578e-05, "loss": 1.2785, "mean_token_accuracy": 0.7590609088540077, "num_tokens": 376884318.0, "step": 5580 }, { "entropy": 0.8811026304960251, "epoch": 1.693478754828448, "grad_norm": 0.14174573123455048, "learning_rate": 9.892324503999481e-05, "loss": 1.2932, "mean_token_accuracy": 0.7513189151883125, "num_tokens": 377543265.0, "step": 5590 }, { "entropy": 0.8696400627493859, "epoch": 1.6965083693100054, "grad_norm": 0.11313126981258392, "learning_rate": 9.891634127015916e-05, "loss": 1.2786, "mean_token_accuracy": 0.7602464750409126, "num_tokens": 378234862.0, "step": 5600 }, { "entropy": 0.8803420409560203, "epoch": 1.6995379837915625, "grad_norm": 0.11448783427476883, "learning_rate": 9.890941568118824e-05, "loss": 1.2881, "mean_token_accuracy": 0.7523095175623894, "num_tokens": 378902936.0, "step": 5610 }, { "entropy": 0.8576599538326264, "epoch": 1.7025675982731197, "grad_norm": 0.12137230485677719, "learning_rate": 9.890246827617124e-05, "loss": 1.2718, "mean_token_accuracy": 0.7621455058455467, "num_tokens": 379586990.0, "step": 5620 }, { "entropy": 0.8703350991010665, "epoch": 1.705597212754677, "grad_norm": 0.09922225028276443, "learning_rate": 9.889549905820703e-05, "loss": 1.2741, "mean_token_accuracy": 0.7533906310796737, "num_tokens": 380262953.0, "step": 5630 }, { "entropy": 0.8703566938638687, "epoch": 1.7086268272362342, "grad_norm": 0.11044721305370331, "learning_rate": 9.888850803040424e-05, "loss": 1.2706, "mean_token_accuracy": 0.754063467681408, "num_tokens": 380923283.0, "step": 5640 }, { "entropy": 0.8689834162592888, "epoch": 1.7116564417177913, "grad_norm": 0.10516627877950668, "learning_rate": 9.88814951958812e-05, "loss": 1.2813, "mean_token_accuracy": 0.7555178359150887, "num_tokens": 381596056.0, "step": 5650 }, { "entropy": 0.8661510065197945, "epoch": 1.7146860561993487, "grad_norm": 0.10870422422885895, "learning_rate": 9.887446055776601e-05, "loss": 1.275, "mean_token_accuracy": 0.7583819463849067, "num_tokens": 382272509.0, "step": 5660 }, { "entropy": 0.8740237966179848, "epoch": 1.7177156706809058, "grad_norm": 0.11483550816774368, "learning_rate": 9.886740411919645e-05, "loss": 1.2796, "mean_token_accuracy": 0.7544118940830231, "num_tokens": 382931957.0, "step": 5670 }, { "entropy": 0.8701524794101715, "epoch": 1.720745285162463, "grad_norm": 0.1000395193696022, "learning_rate": 9.886032588332006e-05, "loss": 1.2738, "mean_token_accuracy": 0.7561038956046104, "num_tokens": 383601391.0, "step": 5680 }, { "entropy": 0.8631006270647049, "epoch": 1.7237748996440203, "grad_norm": 0.16434259712696075, "learning_rate": 9.885322585329409e-05, "loss": 1.2664, "mean_token_accuracy": 0.7609700664877892, "num_tokens": 384284931.0, "step": 5690 }, { "entropy": 0.8777032285928726, "epoch": 1.7268045141255777, "grad_norm": 0.10967931151390076, "learning_rate": 9.884610403228547e-05, "loss": 1.2756, "mean_token_accuracy": 0.7540204703807831, "num_tokens": 384974180.0, "step": 5700 }, { "entropy": 0.8755529925227166, "epoch": 1.7298341286071346, "grad_norm": 0.10465622693300247, "learning_rate": 9.883896042347094e-05, "loss": 1.2796, "mean_token_accuracy": 0.7556664749979973, "num_tokens": 385658132.0, "step": 5710 }, { "entropy": 0.877837872505188, "epoch": 1.732863743088692, "grad_norm": 0.10969074815511703, "learning_rate": 9.883179503003688e-05, "loss": 1.279, "mean_token_accuracy": 0.7578695207834244, "num_tokens": 386345994.0, "step": 5720 }, { "entropy": 0.8727489277720452, "epoch": 1.7358933575702493, "grad_norm": 0.09947185963392258, "learning_rate": 9.882460785517941e-05, "loss": 1.279, "mean_token_accuracy": 0.7556466355919838, "num_tokens": 387024607.0, "step": 5730 }, { "entropy": 0.8709679767489433, "epoch": 1.7389229720518065, "grad_norm": 0.11316635459661484, "learning_rate": 9.881739890210437e-05, "loss": 1.2806, "mean_token_accuracy": 0.7591634154319763, "num_tokens": 387700729.0, "step": 5740 }, { "entropy": 0.8856248810887337, "epoch": 1.7419525865333636, "grad_norm": 0.1003992035984993, "learning_rate": 9.881016817402735e-05, "loss": 1.2825, "mean_token_accuracy": 0.7522728234529495, "num_tokens": 388377310.0, "step": 5750 }, { "entropy": 0.867271040380001, "epoch": 1.744982201014921, "grad_norm": 0.10750974714756012, "learning_rate": 9.880291567417356e-05, "loss": 1.2743, "mean_token_accuracy": 0.7556522607803344, "num_tokens": 389049038.0, "step": 5760 }, { "entropy": 0.8712126925587654, "epoch": 1.748011815496478, "grad_norm": 0.11678169667720795, "learning_rate": 9.879564140577801e-05, "loss": 1.2777, "mean_token_accuracy": 0.7550342082977295, "num_tokens": 389720842.0, "step": 5770 }, { "entropy": 0.8805740848183632, "epoch": 1.7510414299780352, "grad_norm": 0.11123267561197281, "learning_rate": 9.878834537208541e-05, "loss": 1.2924, "mean_token_accuracy": 0.7502569913864136, "num_tokens": 390386713.0, "step": 5780 }, { "entropy": 0.8717425882816314, "epoch": 1.7540710444595926, "grad_norm": 0.10272000730037689, "learning_rate": 9.87810275763501e-05, "loss": 1.2851, "mean_token_accuracy": 0.7530350014567375, "num_tokens": 391053621.0, "step": 5790 }, { "entropy": 0.8708827048540115, "epoch": 1.7571006589411498, "grad_norm": 0.1100660189986229, "learning_rate": 9.877368802183622e-05, "loss": 1.2695, "mean_token_accuracy": 0.7565293610095978, "num_tokens": 391734227.0, "step": 5800 }, { "entropy": 0.867878869175911, "epoch": 1.760130273422707, "grad_norm": 0.124241404235363, "learning_rate": 9.876632671181758e-05, "loss": 1.2696, "mean_token_accuracy": 0.7570357009768486, "num_tokens": 392407776.0, "step": 5810 }, { "entropy": 0.8709599047899246, "epoch": 1.7631598879042643, "grad_norm": 0.11583121865987778, "learning_rate": 9.875894364957767e-05, "loss": 1.2754, "mean_token_accuracy": 0.7543368205428124, "num_tokens": 393079411.0, "step": 5820 }, { "entropy": 0.861686784029007, "epoch": 1.7661895023858214, "grad_norm": 0.10723171383142471, "learning_rate": 9.875153883840973e-05, "loss": 1.2806, "mean_token_accuracy": 0.7571526080369949, "num_tokens": 393748704.0, "step": 5830 }, { "entropy": 0.8687817886471748, "epoch": 1.7692191168673785, "grad_norm": 0.10481414943933487, "learning_rate": 9.874411228161665e-05, "loss": 1.2746, "mean_token_accuracy": 0.7538354933261872, "num_tokens": 394408888.0, "step": 5840 }, { "entropy": 0.8641815066337586, "epoch": 1.772248731348936, "grad_norm": 0.11235295236110687, "learning_rate": 9.873666398251107e-05, "loss": 1.2682, "mean_token_accuracy": 0.7587895512580871, "num_tokens": 395085340.0, "step": 5850 }, { "entropy": 0.8639056503772735, "epoch": 1.775278345830493, "grad_norm": 0.0975707545876503, "learning_rate": 9.872919394441529e-05, "loss": 1.2667, "mean_token_accuracy": 0.7623942896723748, "num_tokens": 395765695.0, "step": 5860 }, { "entropy": 0.8668558150529861, "epoch": 1.7783079603120502, "grad_norm": 0.11737079918384552, "learning_rate": 9.872170217066133e-05, "loss": 1.2751, "mean_token_accuracy": 0.7637094333767891, "num_tokens": 396457097.0, "step": 5870 }, { "entropy": 0.8787486582994462, "epoch": 1.7813375747936075, "grad_norm": 0.13197804987430573, "learning_rate": 9.871418866459088e-05, "loss": 1.277, "mean_token_accuracy": 0.7568523600697518, "num_tokens": 397144826.0, "step": 5880 }, { "entropy": 0.8611941158771514, "epoch": 1.7843671892751647, "grad_norm": 0.10854239761829376, "learning_rate": 9.870665342955536e-05, "loss": 1.2709, "mean_token_accuracy": 0.7587135076522827, "num_tokens": 397823258.0, "step": 5890 }, { "entropy": 0.8542108252644539, "epoch": 1.7873968037567218, "grad_norm": 0.10155311226844788, "learning_rate": 9.869909646891585e-05, "loss": 1.2561, "mean_token_accuracy": 0.7667166635394096, "num_tokens": 398526408.0, "step": 5900 }, { "entropy": 0.8701602831482887, "epoch": 1.7904264182382792, "grad_norm": 0.10298395156860352, "learning_rate": 9.869151778604313e-05, "loss": 1.268, "mean_token_accuracy": 0.7556657701730728, "num_tokens": 399200079.0, "step": 5910 }, { "entropy": 0.8665944993495941, "epoch": 1.7934560327198366, "grad_norm": 0.132856547832489, "learning_rate": 9.868391738431768e-05, "loss": 1.2739, "mean_token_accuracy": 0.7654621496796608, "num_tokens": 399900518.0, "step": 5920 }, { "entropy": 0.8721923619508744, "epoch": 1.7964856472013935, "grad_norm": 0.10510390251874924, "learning_rate": 9.867629526712966e-05, "loss": 1.2749, "mean_token_accuracy": 0.7579918444156647, "num_tokens": 400575779.0, "step": 5930 }, { "entropy": 0.8664195269346238, "epoch": 1.7995152616829508, "grad_norm": 0.11647294461727142, "learning_rate": 9.866865143787891e-05, "loss": 1.2722, "mean_token_accuracy": 0.7604745402932167, "num_tokens": 401263785.0, "step": 5940 }, { "entropy": 0.8546414092183113, "epoch": 1.8025448761645082, "grad_norm": 0.12280753999948502, "learning_rate": 9.866098589997496e-05, "loss": 1.2619, "mean_token_accuracy": 0.7601353496313095, "num_tokens": 401933892.0, "step": 5950 }, { "entropy": 0.8621708378195763, "epoch": 1.8055744906460651, "grad_norm": 0.10126500576734543, "learning_rate": 9.8653298656837e-05, "loss": 1.2738, "mean_token_accuracy": 0.760628679394722, "num_tokens": 402617173.0, "step": 5960 }, { "entropy": 0.861925731599331, "epoch": 1.8086041051276225, "grad_norm": 0.1042354628443718, "learning_rate": 9.8645589711894e-05, "loss": 1.2728, "mean_token_accuracy": 0.7555099457502366, "num_tokens": 403278315.0, "step": 5970 }, { "entropy": 0.8682982236146927, "epoch": 1.8116337196091798, "grad_norm": 0.10307774692773819, "learning_rate": 9.863785906858446e-05, "loss": 1.2789, "mean_token_accuracy": 0.7566445082426071, "num_tokens": 403941185.0, "step": 5980 }, { "entropy": 0.865261273086071, "epoch": 1.814663334090737, "grad_norm": 0.11925513297319412, "learning_rate": 9.863010673035664e-05, "loss": 1.2796, "mean_token_accuracy": 0.7551749140024185, "num_tokens": 404617320.0, "step": 5990 }, { "entropy": 0.8638197466731071, "epoch": 1.8176929485722941, "grad_norm": 0.1083996593952179, "learning_rate": 9.862233270066852e-05, "loss": 1.2717, "mean_token_accuracy": 0.7606314837932586, "num_tokens": 405289238.0, "step": 6000 }, { "entropy": 0.8722123250365257, "epoch": 1.8207225630538515, "grad_norm": 0.10418243706226349, "learning_rate": 9.861453698298765e-05, "loss": 1.2815, "mean_token_accuracy": 0.7577072277665138, "num_tokens": 405976804.0, "step": 6010 }, { "entropy": 0.8649901524186134, "epoch": 1.8237521775354086, "grad_norm": 0.09926159679889679, "learning_rate": 9.860671958079135e-05, "loss": 1.2711, "mean_token_accuracy": 0.7606795608997345, "num_tokens": 406667602.0, "step": 6020 }, { "entropy": 0.8635318517684937, "epoch": 1.8267817920169658, "grad_norm": 0.11913318932056427, "learning_rate": 9.859888049756656e-05, "loss": 1.2767, "mean_token_accuracy": 0.7582060411572457, "num_tokens": 407334010.0, "step": 6030 }, { "entropy": 0.8809746861457824, "epoch": 1.8298114064985231, "grad_norm": 0.10927748680114746, "learning_rate": 9.859101973680989e-05, "loss": 1.2828, "mean_token_accuracy": 0.7558536440134048, "num_tokens": 408019021.0, "step": 6040 }, { "entropy": 0.8621488586068153, "epoch": 1.8328410209800803, "grad_norm": 0.12048788368701935, "learning_rate": 9.858313730202765e-05, "loss": 1.2671, "mean_token_accuracy": 0.7627221181988716, "num_tokens": 408701807.0, "step": 6050 }, { "entropy": 0.8674041420221329, "epoch": 1.8358706354616374, "grad_norm": 0.09827356040477753, "learning_rate": 9.857523319673578e-05, "loss": 1.2668, "mean_token_accuracy": 0.7573729902505875, "num_tokens": 409377487.0, "step": 6060 }, { "entropy": 0.8690124318003655, "epoch": 1.8389002499431948, "grad_norm": 0.10867494344711304, "learning_rate": 9.856730742445995e-05, "loss": 1.2706, "mean_token_accuracy": 0.7620067074894905, "num_tokens": 410067582.0, "step": 6070 }, { "entropy": 0.8586040124297142, "epoch": 1.841929864424752, "grad_norm": 0.11834122985601425, "learning_rate": 9.85593599887354e-05, "loss": 1.2565, "mean_token_accuracy": 0.7621797949075699, "num_tokens": 410754726.0, "step": 6080 }, { "entropy": 0.8774945259094238, "epoch": 1.844959478906309, "grad_norm": 0.10356058925390244, "learning_rate": 9.85513908931071e-05, "loss": 1.2828, "mean_token_accuracy": 0.7524344474077225, "num_tokens": 411423839.0, "step": 6090 }, { "entropy": 0.876988162100315, "epoch": 1.8479890933878664, "grad_norm": 0.1258077472448349, "learning_rate": 9.854340014112968e-05, "loss": 1.2793, "mean_token_accuracy": 0.7554637297987938, "num_tokens": 412089967.0, "step": 6100 }, { "entropy": 0.8628374233841896, "epoch": 1.8510187078694236, "grad_norm": 0.1431972235441208, "learning_rate": 9.853538773636742e-05, "loss": 1.272, "mean_token_accuracy": 0.7536155939102173, "num_tokens": 412746947.0, "step": 6110 }, { "entropy": 0.8653267577290535, "epoch": 1.8540483223509807, "grad_norm": 0.10213130712509155, "learning_rate": 9.85273536823942e-05, "loss": 1.2693, "mean_token_accuracy": 0.763349962234497, "num_tokens": 413448245.0, "step": 6120 }, { "entropy": 0.8732479408383369, "epoch": 1.857077936832538, "grad_norm": 0.10812725126743317, "learning_rate": 9.85192979827937e-05, "loss": 1.2776, "mean_token_accuracy": 0.7534717291593551, "num_tokens": 414118443.0, "step": 6130 }, { "entropy": 0.8676200777292251, "epoch": 1.8601075513140954, "grad_norm": 0.12057381868362427, "learning_rate": 9.851122064115908e-05, "loss": 1.2706, "mean_token_accuracy": 0.7561385720968247, "num_tokens": 414785476.0, "step": 6140 }, { "entropy": 0.8682653442025184, "epoch": 1.8631371657956524, "grad_norm": 0.1044098436832428, "learning_rate": 9.850312166109327e-05, "loss": 1.2822, "mean_token_accuracy": 0.7545553788542747, "num_tokens": 415450110.0, "step": 6150 }, { "entropy": 0.8706517443060875, "epoch": 1.8661667802772097, "grad_norm": 0.10405302792787552, "learning_rate": 9.849500104620884e-05, "loss": 1.2768, "mean_token_accuracy": 0.7562116101384163, "num_tokens": 416129549.0, "step": 6160 }, { "entropy": 0.8670205220580101, "epoch": 1.869196394758767, "grad_norm": 0.11802846193313599, "learning_rate": 9.848685880012795e-05, "loss": 1.2727, "mean_token_accuracy": 0.7545148894190788, "num_tokens": 416791373.0, "step": 6170 }, { "entropy": 0.8728636398911476, "epoch": 1.872226009240324, "grad_norm": 0.12189928442239761, "learning_rate": 9.847869492648249e-05, "loss": 1.2808, "mean_token_accuracy": 0.7547622472047806, "num_tokens": 417470032.0, "step": 6180 }, { "entropy": 0.8675832554697991, "epoch": 1.8752556237218814, "grad_norm": 0.10060123354196548, "learning_rate": 9.847050942891394e-05, "loss": 1.2728, "mean_token_accuracy": 0.7568170398473739, "num_tokens": 418145871.0, "step": 6190 }, { "entropy": 0.871724471449852, "epoch": 1.8782852382034387, "grad_norm": 0.11063071340322495, "learning_rate": 9.846230231107343e-05, "loss": 1.281, "mean_token_accuracy": 0.7572342157363892, "num_tokens": 418822283.0, "step": 6200 }, { "entropy": 0.8718278899788856, "epoch": 1.8813148526849959, "grad_norm": 0.10236962884664536, "learning_rate": 9.845407357662175e-05, "loss": 1.2695, "mean_token_accuracy": 0.7589347824454308, "num_tokens": 419513609.0, "step": 6210 }, { "entropy": 0.8620535895228386, "epoch": 1.884344467166553, "grad_norm": 0.10928516834974289, "learning_rate": 9.844582322922936e-05, "loss": 1.2776, "mean_token_accuracy": 0.7593270123004914, "num_tokens": 420182206.0, "step": 6220 }, { "entropy": 0.8648419260978699, "epoch": 1.8873740816481104, "grad_norm": 0.09895563870668411, "learning_rate": 9.843755127257627e-05, "loss": 1.27, "mean_token_accuracy": 0.7572863906621933, "num_tokens": 420852032.0, "step": 6230 }, { "entropy": 0.8686920776963234, "epoch": 1.8904036961296675, "grad_norm": 0.10020595043897629, "learning_rate": 9.842925771035223e-05, "loss": 1.2659, "mean_token_accuracy": 0.7583027169108391, "num_tokens": 421539380.0, "step": 6240 }, { "entropy": 0.8602744072675705, "epoch": 1.8934333106112247, "grad_norm": 0.1016506552696228, "learning_rate": 9.842094254625656e-05, "loss": 1.2645, "mean_token_accuracy": 0.7595287203788758, "num_tokens": 422213057.0, "step": 6250 }, { "entropy": 0.8802652597427368, "epoch": 1.896462925092782, "grad_norm": 0.12578418850898743, "learning_rate": 9.841260578399825e-05, "loss": 1.2827, "mean_token_accuracy": 0.7529232487082481, "num_tokens": 422881960.0, "step": 6260 }, { "entropy": 0.8586903229355812, "epoch": 1.8994925395743392, "grad_norm": 0.09964416176080704, "learning_rate": 9.840424742729591e-05, "loss": 1.27, "mean_token_accuracy": 0.7611197888851166, "num_tokens": 423560249.0, "step": 6270 }, { "entropy": 0.8650608524680138, "epoch": 1.9025221540558963, "grad_norm": 0.12132469564676285, "learning_rate": 9.839586747987779e-05, "loss": 1.2701, "mean_token_accuracy": 0.7577430590987205, "num_tokens": 424229675.0, "step": 6280 }, { "entropy": 0.8667382508516311, "epoch": 1.9055517685374537, "grad_norm": 0.10416774451732635, "learning_rate": 9.838746594548177e-05, "loss": 1.2724, "mean_token_accuracy": 0.7566452905535698, "num_tokens": 424901997.0, "step": 6290 }, { "entropy": 0.8674384728074074, "epoch": 1.9085813830190108, "grad_norm": 0.11140474677085876, "learning_rate": 9.837904282785534e-05, "loss": 1.2666, "mean_token_accuracy": 0.7615894317626953, "num_tokens": 425595505.0, "step": 6300 }, { "entropy": 0.8667259678244591, "epoch": 1.911610997500568, "grad_norm": 0.11175062507390976, "learning_rate": 9.837059813075564e-05, "loss": 1.2755, "mean_token_accuracy": 0.7553184553980827, "num_tokens": 426270325.0, "step": 6310 }, { "entropy": 0.871880267560482, "epoch": 1.9146406119821253, "grad_norm": 0.10434798151254654, "learning_rate": 9.836213185794944e-05, "loss": 1.2819, "mean_token_accuracy": 0.75400460511446, "num_tokens": 426936170.0, "step": 6320 }, { "entropy": 0.8698974281549454, "epoch": 1.9176702264636825, "grad_norm": 0.11283425986766815, "learning_rate": 9.83536440132131e-05, "loss": 1.2738, "mean_token_accuracy": 0.7545256331562996, "num_tokens": 427599913.0, "step": 6330 }, { "entropy": 0.856814657151699, "epoch": 1.9206998409452396, "grad_norm": 0.10942177474498749, "learning_rate": 9.834513460033262e-05, "loss": 1.2643, "mean_token_accuracy": 0.7606557041406632, "num_tokens": 428288763.0, "step": 6340 }, { "entropy": 0.8598751842975616, "epoch": 1.923729455426797, "grad_norm": 0.1006433367729187, "learning_rate": 9.833660362310364e-05, "loss": 1.2758, "mean_token_accuracy": 0.7584156692028046, "num_tokens": 428972887.0, "step": 6350 }, { "entropy": 0.8560686483979225, "epoch": 1.9267590699083543, "grad_norm": 0.12851066887378693, "learning_rate": 9.832805108533141e-05, "loss": 1.2726, "mean_token_accuracy": 0.757812787592411, "num_tokens": 429641614.0, "step": 6360 }, { "entropy": 0.8634830087423324, "epoch": 1.9297886843899112, "grad_norm": 0.13254991173744202, "learning_rate": 9.831947699083076e-05, "loss": 1.2875, "mean_token_accuracy": 0.7525684475898743, "num_tokens": 430292249.0, "step": 6370 }, { "entropy": 0.868242746591568, "epoch": 1.9328182988714686, "grad_norm": 0.10359557718038559, "learning_rate": 9.831088134342619e-05, "loss": 1.272, "mean_token_accuracy": 0.759606520831585, "num_tokens": 430974530.0, "step": 6380 }, { "entropy": 0.8602611169219017, "epoch": 1.935847913353026, "grad_norm": 0.10770441591739655, "learning_rate": 9.830226414695178e-05, "loss": 1.2738, "mean_token_accuracy": 0.7602724403142929, "num_tokens": 431642824.0, "step": 6390 }, { "entropy": 0.8656735271215439, "epoch": 1.938877527834583, "grad_norm": 0.10923300683498383, "learning_rate": 9.829362540525123e-05, "loss": 1.2768, "mean_token_accuracy": 0.7595035210251808, "num_tokens": 432319865.0, "step": 6400 }, { "entropy": 0.8671105921268463, "epoch": 1.9419071423161403, "grad_norm": 0.10808205604553223, "learning_rate": 9.828496512217786e-05, "loss": 1.2689, "mean_token_accuracy": 0.7590477406978607, "num_tokens": 433004538.0, "step": 6410 }, { "entropy": 0.8676582247018814, "epoch": 1.9449367567976976, "grad_norm": 0.10780012607574463, "learning_rate": 9.827628330159458e-05, "loss": 1.2595, "mean_token_accuracy": 0.7648343667387962, "num_tokens": 433702296.0, "step": 6420 }, { "entropy": 0.8603417366743088, "epoch": 1.9479663712792548, "grad_norm": 0.1039799153804779, "learning_rate": 9.826757994737391e-05, "loss": 1.2676, "mean_token_accuracy": 0.7605415299534798, "num_tokens": 434391934.0, "step": 6430 }, { "entropy": 0.8720110803842545, "epoch": 1.950995985760812, "grad_norm": 0.1017688512802124, "learning_rate": 9.8258855063398e-05, "loss": 1.2795, "mean_token_accuracy": 0.7516930446028709, "num_tokens": 435055466.0, "step": 6440 }, { "entropy": 0.8747083127498627, "epoch": 1.9540256002423693, "grad_norm": 0.10512080043554306, "learning_rate": 9.825010865355857e-05, "loss": 1.2804, "mean_token_accuracy": 0.7554742082953453, "num_tokens": 435723328.0, "step": 6450 }, { "entropy": 0.8562737569212914, "epoch": 1.9570552147239264, "grad_norm": 0.11978275328874588, "learning_rate": 9.824134072175696e-05, "loss": 1.2598, "mean_token_accuracy": 0.7656181454658508, "num_tokens": 436423933.0, "step": 6460 }, { "entropy": 0.8638344198465348, "epoch": 1.9600848292054835, "grad_norm": 0.10439760982990265, "learning_rate": 9.82325512719041e-05, "loss": 1.2712, "mean_token_accuracy": 0.7559848457574845, "num_tokens": 437104622.0, "step": 6470 }, { "entropy": 0.8664592877030373, "epoch": 1.963114443687041, "grad_norm": 0.1122550442814827, "learning_rate": 9.822374030792054e-05, "loss": 1.2749, "mean_token_accuracy": 0.7568196907639504, "num_tokens": 437779579.0, "step": 6480 }, { "entropy": 0.8707536339759827, "epoch": 1.966144058168598, "grad_norm": 0.11339100450277328, "learning_rate": 9.82149078337364e-05, "loss": 1.2747, "mean_token_accuracy": 0.7549351662397384, "num_tokens": 438451449.0, "step": 6490 }, { "entropy": 0.861909331381321, "epoch": 1.9691736726501552, "grad_norm": 0.10671177506446838, "learning_rate": 9.820605385329142e-05, "loss": 1.2706, "mean_token_accuracy": 0.7554570451378823, "num_tokens": 439108629.0, "step": 6500 }, { "entropy": 0.867060911655426, "epoch": 1.9722032871317126, "grad_norm": 0.10488517582416534, "learning_rate": 9.819717837053488e-05, "loss": 1.2837, "mean_token_accuracy": 0.7523362159729003, "num_tokens": 439778874.0, "step": 6510 }, { "entropy": 0.8727175980806351, "epoch": 1.9752329016132697, "grad_norm": 0.10052948445081711, "learning_rate": 9.818828138942575e-05, "loss": 1.2708, "mean_token_accuracy": 0.7553378149867058, "num_tokens": 440458837.0, "step": 6520 }, { "entropy": 0.860368287563324, "epoch": 1.9782625160948268, "grad_norm": 0.09840691834688187, "learning_rate": 9.817936291393246e-05, "loss": 1.2791, "mean_token_accuracy": 0.7558258727192879, "num_tokens": 441114532.0, "step": 6530 }, { "entropy": 0.8804644227027894, "epoch": 1.9812921305763842, "grad_norm": 0.10521312057971954, "learning_rate": 9.817042294803314e-05, "loss": 1.2823, "mean_token_accuracy": 0.7543828547000885, "num_tokens": 441787728.0, "step": 6540 }, { "entropy": 0.8673185363411904, "epoch": 1.9843217450579413, "grad_norm": 0.11936893314123154, "learning_rate": 9.816146149571546e-05, "loss": 1.283, "mean_token_accuracy": 0.7550018146634102, "num_tokens": 442442350.0, "step": 6550 }, { "entropy": 0.8756032794713974, "epoch": 1.9873513595394985, "grad_norm": 0.10171148180961609, "learning_rate": 9.815247856097667e-05, "loss": 1.2727, "mean_token_accuracy": 0.7548216253519058, "num_tokens": 443113248.0, "step": 6560 }, { "entropy": 0.8717926159501076, "epoch": 1.9903809740210558, "grad_norm": 0.11027976870536804, "learning_rate": 9.814347414782358e-05, "loss": 1.272, "mean_token_accuracy": 0.7534134685993195, "num_tokens": 443778988.0, "step": 6570 }, { "entropy": 0.8772870868444442, "epoch": 1.993410588502613, "grad_norm": 0.10239473730325699, "learning_rate": 9.813444826027267e-05, "loss": 1.274, "mean_token_accuracy": 0.7549547031521797, "num_tokens": 444464540.0, "step": 6580 }, { "entropy": 0.866197295486927, "epoch": 1.9964402029841701, "grad_norm": 0.10646392405033112, "learning_rate": 9.812540090234988e-05, "loss": 1.2734, "mean_token_accuracy": 0.7553907632827759, "num_tokens": 445139655.0, "step": 6590 }, { "entropy": 0.8781222715973854, "epoch": 1.9994698174657275, "grad_norm": 0.10427211970090866, "learning_rate": 9.811633207809081e-05, "loss": 1.2805, "mean_token_accuracy": 0.7585296750068664, "num_tokens": 445824421.0, "step": 6600 }, { "entropy": 0.855379995627281, "epoch": 2.002423691585246, "grad_norm": 0.12197387963533401, "learning_rate": 9.810724179154061e-05, "loss": 1.2648, "mean_token_accuracy": 0.7599545411574535, "num_tokens": 446481716.0, "step": 6610 }, { "entropy": 0.8597342357039451, "epoch": 2.005453306066803, "grad_norm": 0.10399507731199265, "learning_rate": 9.809813004675399e-05, "loss": 1.2643, "mean_token_accuracy": 0.7580909013748169, "num_tokens": 447160728.0, "step": 6620 }, { "entropy": 0.8628667056560516, "epoch": 2.00848292054836, "grad_norm": 0.10651499032974243, "learning_rate": 9.808899684779527e-05, "loss": 1.2694, "mean_token_accuracy": 0.7572256475687027, "num_tokens": 447827882.0, "step": 6630 }, { "entropy": 0.8654381215572358, "epoch": 2.0115125350299174, "grad_norm": 0.11701236665248871, "learning_rate": 9.807984219873829e-05, "loss": 1.2749, "mean_token_accuracy": 0.7546640053391457, "num_tokens": 448495014.0, "step": 6640 }, { "entropy": 0.8621604233980179, "epoch": 2.014542149511475, "grad_norm": 0.11036840826272964, "learning_rate": 9.807066610366646e-05, "loss": 1.2636, "mean_token_accuracy": 0.7617029055953026, "num_tokens": 449183934.0, "step": 6650 }, { "entropy": 0.8624231189489364, "epoch": 2.0175717639930317, "grad_norm": 0.10649905353784561, "learning_rate": 9.806146856667284e-05, "loss": 1.2709, "mean_token_accuracy": 0.7637782678008079, "num_tokens": 449866563.0, "step": 6660 }, { "entropy": 0.8569630533456802, "epoch": 2.020601378474589, "grad_norm": 0.10633864998817444, "learning_rate": 9.805224959185995e-05, "loss": 1.2639, "mean_token_accuracy": 0.7612204819917678, "num_tokens": 450543198.0, "step": 6670 }, { "entropy": 0.8620423242449761, "epoch": 2.0236309929561465, "grad_norm": 0.11411301791667938, "learning_rate": 9.80430091833399e-05, "loss": 1.2553, "mean_token_accuracy": 0.7586001366376877, "num_tokens": 451220504.0, "step": 6680 }, { "entropy": 0.8453009083867074, "epoch": 2.0266606074377034, "grad_norm": 0.11585341393947601, "learning_rate": 9.80337473452344e-05, "loss": 1.2442, "mean_token_accuracy": 0.7682624146342277, "num_tokens": 451922244.0, "step": 6690 }, { "entropy": 0.8570357114076614, "epoch": 2.0296902219192607, "grad_norm": 0.11498195677995682, "learning_rate": 9.802446408167472e-05, "loss": 1.2632, "mean_token_accuracy": 0.7542983755469322, "num_tokens": 452583893.0, "step": 6700 }, { "entropy": 0.8643184080719948, "epoch": 2.032719836400818, "grad_norm": 0.11596842110157013, "learning_rate": 9.801515939680159e-05, "loss": 1.2712, "mean_token_accuracy": 0.7544256269931793, "num_tokens": 453260582.0, "step": 6710 }, { "entropy": 0.8602597936987877, "epoch": 2.035749450882375, "grad_norm": 0.10272014141082764, "learning_rate": 9.800583329476542e-05, "loss": 1.2588, "mean_token_accuracy": 0.7598227143287659, "num_tokens": 453952031.0, "step": 6720 }, { "entropy": 0.8591838702559471, "epoch": 2.0387790653639324, "grad_norm": 0.10272420197725296, "learning_rate": 9.79964857797261e-05, "loss": 1.2624, "mean_token_accuracy": 0.7543813779950141, "num_tokens": 454617441.0, "step": 6730 }, { "entropy": 0.8641746655106545, "epoch": 2.0418086798454897, "grad_norm": 0.10870486497879028, "learning_rate": 9.798711685585309e-05, "loss": 1.2716, "mean_token_accuracy": 0.7550192728638649, "num_tokens": 455278105.0, "step": 6740 }, { "entropy": 0.8625334724783897, "epoch": 2.0448382943270467, "grad_norm": 0.10910259187221527, "learning_rate": 9.79777265273254e-05, "loss": 1.27, "mean_token_accuracy": 0.7616015195846557, "num_tokens": 455961244.0, "step": 6750 }, { "entropy": 0.8660917222499848, "epoch": 2.047867908808604, "grad_norm": 0.09981189668178558, "learning_rate": 9.796831479833158e-05, "loss": 1.2661, "mean_token_accuracy": 0.7565690204501152, "num_tokens": 456640849.0, "step": 6760 }, { "entropy": 0.853822048008442, "epoch": 2.0508975232901614, "grad_norm": 0.10835167020559311, "learning_rate": 9.795888167306973e-05, "loss": 1.2565, "mean_token_accuracy": 0.7610779702663422, "num_tokens": 457326593.0, "step": 6770 }, { "entropy": 0.8495786920189857, "epoch": 2.0539271377717188, "grad_norm": 0.10651559382677078, "learning_rate": 9.794942715574751e-05, "loss": 1.2553, "mean_token_accuracy": 0.7608906149864196, "num_tokens": 458005097.0, "step": 6780 }, { "entropy": 0.8613103628158569, "epoch": 2.0569567522532757, "grad_norm": 0.11444416642189026, "learning_rate": 9.79399512505821e-05, "loss": 1.2674, "mean_token_accuracy": 0.7556070819497108, "num_tokens": 458676999.0, "step": 6790 }, { "entropy": 0.856567345559597, "epoch": 2.059986366734833, "grad_norm": 0.10138892382383347, "learning_rate": 9.793045396180025e-05, "loss": 1.2648, "mean_token_accuracy": 0.7604643791913986, "num_tokens": 459353383.0, "step": 6800 }, { "entropy": 0.8569768846035004, "epoch": 2.0630159812163904, "grad_norm": 0.10967062413692474, "learning_rate": 9.792093529363818e-05, "loss": 1.265, "mean_token_accuracy": 0.7586782589554787, "num_tokens": 460026096.0, "step": 6810 }, { "entropy": 0.8583046585321427, "epoch": 2.0660455956979473, "grad_norm": 0.1021110787987709, "learning_rate": 9.791139525034172e-05, "loss": 1.2648, "mean_token_accuracy": 0.7559386551380157, "num_tokens": 460696436.0, "step": 6820 }, { "entropy": 0.8555554494261741, "epoch": 2.0690752101795047, "grad_norm": 0.10578510165214539, "learning_rate": 9.790183383616621e-05, "loss": 1.2572, "mean_token_accuracy": 0.7565886348485946, "num_tokens": 461359802.0, "step": 6830 }, { "entropy": 0.8597806006669998, "epoch": 2.072104824661062, "grad_norm": 0.10171770304441452, "learning_rate": 9.78922510553765e-05, "loss": 1.2627, "mean_token_accuracy": 0.7535345032811165, "num_tokens": 462023955.0, "step": 6840 }, { "entropy": 0.8709043994545936, "epoch": 2.075134439142619, "grad_norm": 0.11991483718156815, "learning_rate": 9.7882646912247e-05, "loss": 1.2712, "mean_token_accuracy": 0.7572952851653099, "num_tokens": 462703705.0, "step": 6850 }, { "entropy": 0.8502332121133804, "epoch": 2.0781640536241763, "grad_norm": 0.10345316678285599, "learning_rate": 9.787302141106165e-05, "loss": 1.2562, "mean_token_accuracy": 0.7572643637657166, "num_tokens": 463373837.0, "step": 6860 }, { "entropy": 0.8540147677063942, "epoch": 2.0811936681057337, "grad_norm": 0.12561824917793274, "learning_rate": 9.78633745561139e-05, "loss": 1.2474, "mean_token_accuracy": 0.7603194698691368, "num_tokens": 464054524.0, "step": 6870 }, { "entropy": 0.8574964791536331, "epoch": 2.0842232825872906, "grad_norm": 0.11242875456809998, "learning_rate": 9.785370635170671e-05, "loss": 1.2562, "mean_token_accuracy": 0.7610817566514015, "num_tokens": 464740491.0, "step": 6880 }, { "entropy": 0.8604069128632545, "epoch": 2.087252897068848, "grad_norm": 0.1068437322974205, "learning_rate": 9.78440168021526e-05, "loss": 1.2665, "mean_token_accuracy": 0.7602398425340653, "num_tokens": 465423032.0, "step": 6890 }, { "entropy": 0.8592790767550469, "epoch": 2.0902825115504053, "grad_norm": 0.10740931332111359, "learning_rate": 9.78343059117736e-05, "loss": 1.2618, "mean_token_accuracy": 0.7602035805583001, "num_tokens": 466112075.0, "step": 6900 }, { "entropy": 0.8645462259650231, "epoch": 2.0933121260319623, "grad_norm": 0.10766159743070602, "learning_rate": 9.782457368490124e-05, "loss": 1.2592, "mean_token_accuracy": 0.757443630695343, "num_tokens": 466791246.0, "step": 6910 }, { "entropy": 0.8605925098061562, "epoch": 2.0963417405135196, "grad_norm": 0.09991318732500076, "learning_rate": 9.781482012587659e-05, "loss": 1.2693, "mean_token_accuracy": 0.7562457993626595, "num_tokens": 467461551.0, "step": 6920 }, { "entropy": 0.8558261439204216, "epoch": 2.099371354995077, "grad_norm": 0.10399764776229858, "learning_rate": 9.780504523905021e-05, "loss": 1.2578, "mean_token_accuracy": 0.7609396636486053, "num_tokens": 468148466.0, "step": 6930 }, { "entropy": 0.8557205960154534, "epoch": 2.102400969476634, "grad_norm": 0.10423752665519714, "learning_rate": 9.779524902878221e-05, "loss": 1.25, "mean_token_accuracy": 0.7585823208093643, "num_tokens": 468825740.0, "step": 6940 }, { "entropy": 0.8591925516724587, "epoch": 2.1054305839581913, "grad_norm": 0.10516276210546494, "learning_rate": 9.778543149944216e-05, "loss": 1.2563, "mean_token_accuracy": 0.7639168784022331, "num_tokens": 469519971.0, "step": 6950 }, { "entropy": 0.8553109049797059, "epoch": 2.1084601984397486, "grad_norm": 0.11408320814371109, "learning_rate": 9.77755926554092e-05, "loss": 1.2613, "mean_token_accuracy": 0.7561553657054901, "num_tokens": 470186545.0, "step": 6960 }, { "entropy": 0.8511288121342659, "epoch": 2.1114898129213056, "grad_norm": 0.10330954194068909, "learning_rate": 9.776573250107192e-05, "loss": 1.2591, "mean_token_accuracy": 0.757400181889534, "num_tokens": 470858012.0, "step": 6970 }, { "entropy": 0.8525807246565819, "epoch": 2.114519427402863, "grad_norm": 0.11110203713178635, "learning_rate": 9.775585104082847e-05, "loss": 1.2537, "mean_token_accuracy": 0.7565642282366752, "num_tokens": 471527673.0, "step": 6980 }, { "entropy": 0.8628541558980942, "epoch": 2.1175490418844203, "grad_norm": 0.10720645636320114, "learning_rate": 9.774594827908647e-05, "loss": 1.2731, "mean_token_accuracy": 0.7586317673325539, "num_tokens": 472201465.0, "step": 6990 }, { "entropy": 0.8592759609222412, "epoch": 2.1205786563659776, "grad_norm": 0.1143016666173935, "learning_rate": 9.773602422026302e-05, "loss": 1.2693, "mean_token_accuracy": 0.7563334733247757, "num_tokens": 472871361.0, "step": 7000 }, { "entropy": 0.8691433608531952, "epoch": 2.1236082708475346, "grad_norm": 0.12189134955406189, "learning_rate": 9.772607886878477e-05, "loss": 1.276, "mean_token_accuracy": 0.7545945599675179, "num_tokens": 473531479.0, "step": 7010 }, { "entropy": 0.8580952316522599, "epoch": 2.126637885329092, "grad_norm": 0.11679427325725555, "learning_rate": 9.771611222908785e-05, "loss": 1.2603, "mean_token_accuracy": 0.7634949997067452, "num_tokens": 474222414.0, "step": 7020 }, { "entropy": 0.8566856682300568, "epoch": 2.1296674998106493, "grad_norm": 0.10399682819843292, "learning_rate": 9.770612430561786e-05, "loss": 1.2588, "mean_token_accuracy": 0.7572376638650894, "num_tokens": 474895832.0, "step": 7030 }, { "entropy": 0.8566211298108101, "epoch": 2.132697114292206, "grad_norm": 0.1056089997291565, "learning_rate": 9.769611510282992e-05, "loss": 1.2697, "mean_token_accuracy": 0.7561067789793015, "num_tokens": 475556409.0, "step": 7040 }, { "entropy": 0.8562668204307556, "epoch": 2.1357267287737636, "grad_norm": 0.11222401261329651, "learning_rate": 9.768608462518865e-05, "loss": 1.2683, "mean_token_accuracy": 0.7573768883943558, "num_tokens": 476225073.0, "step": 7050 }, { "entropy": 0.8557048231363297, "epoch": 2.138756343255321, "grad_norm": 0.10089311748743057, "learning_rate": 9.767603287716813e-05, "loss": 1.2605, "mean_token_accuracy": 0.7541783332824707, "num_tokens": 476882152.0, "step": 7060 }, { "entropy": 0.8503266096115112, "epoch": 2.141785957736878, "grad_norm": 0.10705769062042236, "learning_rate": 9.766595986325193e-05, "loss": 1.2676, "mean_token_accuracy": 0.7550541222095489, "num_tokens": 477542573.0, "step": 7070 }, { "entropy": 0.8711753636598587, "epoch": 2.144815572218435, "grad_norm": 0.1165536567568779, "learning_rate": 9.765586558793316e-05, "loss": 1.2668, "mean_token_accuracy": 0.7530593708157539, "num_tokens": 478214118.0, "step": 7080 }, { "entropy": 0.8580114468932152, "epoch": 2.1478451866999926, "grad_norm": 0.1194147914648056, "learning_rate": 9.764575005571432e-05, "loss": 1.2658, "mean_token_accuracy": 0.7554805114865303, "num_tokens": 478881632.0, "step": 7090 }, { "entropy": 0.8596231237053871, "epoch": 2.1508748011815495, "grad_norm": 0.10714981704950333, "learning_rate": 9.763561327110749e-05, "loss": 1.2647, "mean_token_accuracy": 0.7552571997046471, "num_tokens": 479553212.0, "step": 7100 }, { "entropy": 0.8582180052995682, "epoch": 2.153904415663107, "grad_norm": 0.13480797410011292, "learning_rate": 9.762545523863413e-05, "loss": 1.2603, "mean_token_accuracy": 0.7590198293328285, "num_tokens": 480225432.0, "step": 7110 }, { "entropy": 0.8705548882484436, "epoch": 2.1569340301446642, "grad_norm": 0.11295425891876221, "learning_rate": 9.76152759628253e-05, "loss": 1.2793, "mean_token_accuracy": 0.7544082313776016, "num_tokens": 480893534.0, "step": 7120 }, { "entropy": 0.8477849125862121, "epoch": 2.159963644626221, "grad_norm": 0.12092622369527817, "learning_rate": 9.760507544822141e-05, "loss": 1.2578, "mean_token_accuracy": 0.7593964442610741, "num_tokens": 481557885.0, "step": 7130 }, { "entropy": 0.8525932401418685, "epoch": 2.1629932591077785, "grad_norm": 0.10847190767526627, "learning_rate": 9.759485369937241e-05, "loss": 1.2609, "mean_token_accuracy": 0.7636817306280136, "num_tokens": 482250376.0, "step": 7140 }, { "entropy": 0.8548767328262329, "epoch": 2.166022873589336, "grad_norm": 0.11364589631557465, "learning_rate": 9.758461072083773e-05, "loss": 1.2648, "mean_token_accuracy": 0.7583749040961265, "num_tokens": 482921431.0, "step": 7150 }, { "entropy": 0.8482321932911873, "epoch": 2.169052488070893, "grad_norm": 0.12237298488616943, "learning_rate": 9.757434651718624e-05, "loss": 1.2567, "mean_token_accuracy": 0.7610787898302078, "num_tokens": 483597366.0, "step": 7160 }, { "entropy": 0.8592027381062508, "epoch": 2.17208210255245, "grad_norm": 0.10675161331892014, "learning_rate": 9.756406109299629e-05, "loss": 1.2578, "mean_token_accuracy": 0.7558085158467293, "num_tokens": 484263747.0, "step": 7170 }, { "entropy": 0.8659064993262291, "epoch": 2.1751117170340075, "grad_norm": 0.11875585466623306, "learning_rate": 9.755375445285569e-05, "loss": 1.2714, "mean_token_accuracy": 0.7590313360095025, "num_tokens": 484947069.0, "step": 7180 }, { "entropy": 0.8590867668390274, "epoch": 2.1781413315155644, "grad_norm": 0.11141160875558853, "learning_rate": 9.754342660136174e-05, "loss": 1.2635, "mean_token_accuracy": 0.7573440134525299, "num_tokens": 485616362.0, "step": 7190 }, { "entropy": 0.8552271515130997, "epoch": 2.181170945997122, "grad_norm": 0.1052631139755249, "learning_rate": 9.753307754312114e-05, "loss": 1.2553, "mean_token_accuracy": 0.7584361836314202, "num_tokens": 486295062.0, "step": 7200 }, { "entropy": 0.8523262500762939, "epoch": 2.184200560478679, "grad_norm": 0.10578624904155731, "learning_rate": 9.752270728275009e-05, "loss": 1.2583, "mean_token_accuracy": 0.7606440037488937, "num_tokens": 486975723.0, "step": 7210 }, { "entropy": 0.8580429136753083, "epoch": 2.187230174960236, "grad_norm": 0.11437220126390457, "learning_rate": 9.751231582487428e-05, "loss": 1.264, "mean_token_accuracy": 0.7566904693841934, "num_tokens": 487648697.0, "step": 7220 }, { "entropy": 0.855644790828228, "epoch": 2.1902597894417934, "grad_norm": 0.11084471642971039, "learning_rate": 9.750190317412879e-05, "loss": 1.2592, "mean_token_accuracy": 0.7634907543659211, "num_tokens": 488338493.0, "step": 7230 }, { "entropy": 0.8540291801095009, "epoch": 2.193289403923351, "grad_norm": 0.11131398379802704, "learning_rate": 9.74914693351582e-05, "loss": 1.2527, "mean_token_accuracy": 0.7613129258155823, "num_tokens": 489019594.0, "step": 7240 }, { "entropy": 0.851766362786293, "epoch": 2.196319018404908, "grad_norm": 0.11028112471103668, "learning_rate": 9.748101431261652e-05, "loss": 1.2543, "mean_token_accuracy": 0.7593399941921234, "num_tokens": 489691617.0, "step": 7250 }, { "entropy": 0.8516391098499299, "epoch": 2.199348632886465, "grad_norm": 0.1113218367099762, "learning_rate": 9.747053811116721e-05, "loss": 1.2478, "mean_token_accuracy": 0.7576561823487282, "num_tokens": 490359650.0, "step": 7260 }, { "entropy": 0.8581887081265449, "epoch": 2.2023782473680225, "grad_norm": 0.11369206756353378, "learning_rate": 9.746004073548319e-05, "loss": 1.265, "mean_token_accuracy": 0.7545206531882286, "num_tokens": 491031446.0, "step": 7270 }, { "entropy": 0.8595120161771774, "epoch": 2.20540786184958, "grad_norm": 0.10878115147352219, "learning_rate": 9.74495221902468e-05, "loss": 1.2702, "mean_token_accuracy": 0.7577403575181961, "num_tokens": 491702192.0, "step": 7280 }, { "entropy": 0.8505108594894409, "epoch": 2.2084374763311367, "grad_norm": 0.11624005436897278, "learning_rate": 9.743898248014982e-05, "loss": 1.2443, "mean_token_accuracy": 0.7682089433073997, "num_tokens": 492403891.0, "step": 7290 }, { "entropy": 0.8472009196877479, "epoch": 2.211467090812694, "grad_norm": 0.10479124635457993, "learning_rate": 9.742842160989353e-05, "loss": 1.2549, "mean_token_accuracy": 0.7610199108719826, "num_tokens": 493070921.0, "step": 7300 }, { "entropy": 0.8508476316928864, "epoch": 2.2144967052942515, "grad_norm": 0.12949249148368835, "learning_rate": 9.741783958418858e-05, "loss": 1.2635, "mean_token_accuracy": 0.7580212265253067, "num_tokens": 493741382.0, "step": 7310 }, { "entropy": 0.8497532665729522, "epoch": 2.2175263197758084, "grad_norm": 0.11473718285560608, "learning_rate": 9.74072364077551e-05, "loss": 1.2611, "mean_token_accuracy": 0.7563960090279579, "num_tokens": 494399985.0, "step": 7320 }, { "entropy": 0.8576474860310555, "epoch": 2.2205559342573657, "grad_norm": 0.1286933571100235, "learning_rate": 9.739661208532263e-05, "loss": 1.2627, "mean_token_accuracy": 0.7604630246758461, "num_tokens": 495073813.0, "step": 7330 }, { "entropy": 0.8532460451126098, "epoch": 2.223585548738923, "grad_norm": 0.11478506028652191, "learning_rate": 9.738596662163013e-05, "loss": 1.2483, "mean_token_accuracy": 0.7595999017357826, "num_tokens": 495753255.0, "step": 7340 }, { "entropy": 0.8521913647651672, "epoch": 2.22661516322048, "grad_norm": 0.11295423656702042, "learning_rate": 9.737530002142605e-05, "loss": 1.2542, "mean_token_accuracy": 0.7628868624567986, "num_tokens": 496445396.0, "step": 7350 }, { "entropy": 0.8571484595537185, "epoch": 2.2296447777020374, "grad_norm": 0.10795578360557556, "learning_rate": 9.736461228946818e-05, "loss": 1.2608, "mean_token_accuracy": 0.7577721431851387, "num_tokens": 497119111.0, "step": 7360 }, { "entropy": 0.8618745282292366, "epoch": 2.2326743921835948, "grad_norm": 0.10621679574251175, "learning_rate": 9.735390343052382e-05, "loss": 1.2645, "mean_token_accuracy": 0.7595389366149903, "num_tokens": 497791432.0, "step": 7370 }, { "entropy": 0.8541598737239837, "epoch": 2.2357040066651517, "grad_norm": 0.10267043858766556, "learning_rate": 9.734317344936965e-05, "loss": 1.2558, "mean_token_accuracy": 0.7601015284657479, "num_tokens": 498466161.0, "step": 7380 }, { "entropy": 0.8523509263992309, "epoch": 2.238733621146709, "grad_norm": 0.11477231234312057, "learning_rate": 9.733242235079175e-05, "loss": 1.2554, "mean_token_accuracy": 0.7600444808602334, "num_tokens": 499142070.0, "step": 7390 }, { "entropy": 0.8656803876161575, "epoch": 2.2417632356282664, "grad_norm": 0.10855995863676071, "learning_rate": 9.732165013958568e-05, "loss": 1.2607, "mean_token_accuracy": 0.7546856954693795, "num_tokens": 499806749.0, "step": 7400 }, { "entropy": 0.8515672892332077, "epoch": 2.2447928501098233, "grad_norm": 0.10969103872776031, "learning_rate": 9.731085682055639e-05, "loss": 1.2594, "mean_token_accuracy": 0.7607250720262527, "num_tokens": 500488873.0, "step": 7410 }, { "entropy": 0.8555934444069863, "epoch": 2.2478224645913807, "grad_norm": 0.10826828330755234, "learning_rate": 9.73000423985182e-05, "loss": 1.2603, "mean_token_accuracy": 0.7563047707080841, "num_tokens": 501158774.0, "step": 7420 }, { "entropy": 0.8485673606395722, "epoch": 2.250852079072938, "grad_norm": 0.11583668738603592, "learning_rate": 9.728920687829491e-05, "loss": 1.2566, "mean_token_accuracy": 0.7643196985125542, "num_tokens": 501844165.0, "step": 7430 }, { "entropy": 0.8485406875610352, "epoch": 2.2538816935544954, "grad_norm": 0.11024606227874756, "learning_rate": 9.727835026471972e-05, "loss": 1.2562, "mean_token_accuracy": 0.7618746072053909, "num_tokens": 502525376.0, "step": 7440 }, { "entropy": 0.8646971046924591, "epoch": 2.2569113080360523, "grad_norm": 0.11305391043424606, "learning_rate": 9.726747256263518e-05, "loss": 1.2677, "mean_token_accuracy": 0.7597474843263626, "num_tokens": 503209294.0, "step": 7450 }, { "entropy": 0.864198413491249, "epoch": 2.2599409225176097, "grad_norm": 0.10352534800767899, "learning_rate": 9.725657377689334e-05, "loss": 1.2644, "mean_token_accuracy": 0.7597779706120491, "num_tokens": 503897732.0, "step": 7460 }, { "entropy": 0.8614189490675926, "epoch": 2.262970536999167, "grad_norm": 0.13066239655017853, "learning_rate": 9.724565391235556e-05, "loss": 1.2657, "mean_token_accuracy": 0.7616986453533172, "num_tokens": 504586957.0, "step": 7470 }, { "entropy": 0.8593979150056839, "epoch": 2.266000151480724, "grad_norm": 0.11965595930814743, "learning_rate": 9.723471297389268e-05, "loss": 1.2606, "mean_token_accuracy": 0.7532329395413399, "num_tokens": 505243072.0, "step": 7480 }, { "entropy": 0.8494557604193688, "epoch": 2.2690297659622813, "grad_norm": 0.11761012673377991, "learning_rate": 9.722375096638488e-05, "loss": 1.2519, "mean_token_accuracy": 0.7593458399176598, "num_tokens": 505915469.0, "step": 7490 }, { "entropy": 0.864263667166233, "epoch": 2.2720593804438387, "grad_norm": 0.10211925953626633, "learning_rate": 9.721276789472176e-05, "loss": 1.2625, "mean_token_accuracy": 0.7565105214715004, "num_tokens": 506590296.0, "step": 7500 }, { "entropy": 0.8466208800673485, "epoch": 2.2750889949253956, "grad_norm": 0.11636841297149658, "learning_rate": 9.720176376380235e-05, "loss": 1.245, "mean_token_accuracy": 0.7617765799164772, "num_tokens": 507272437.0, "step": 7510 }, { "entropy": 0.8553040236234665, "epoch": 2.278118609406953, "grad_norm": 0.12769252061843872, "learning_rate": 9.719073857853502e-05, "loss": 1.2574, "mean_token_accuracy": 0.7656926468014718, "num_tokens": 507970785.0, "step": 7520 }, { "entropy": 0.852819399535656, "epoch": 2.2811482238885104, "grad_norm": 0.1222531795501709, "learning_rate": 9.717969234383755e-05, "loss": 1.2604, "mean_token_accuracy": 0.7582620829343796, "num_tokens": 508647313.0, "step": 7530 }, { "entropy": 0.8474319830536843, "epoch": 2.2841778383700673, "grad_norm": 0.10286824405193329, "learning_rate": 9.716862506463714e-05, "loss": 1.2563, "mean_token_accuracy": 0.7589914947748184, "num_tokens": 509319096.0, "step": 7540 }, { "entropy": 0.8581209525465965, "epoch": 2.2872074528516246, "grad_norm": 0.10509907454252243, "learning_rate": 9.715753674587032e-05, "loss": 1.2632, "mean_token_accuracy": 0.7579801797866821, "num_tokens": 509986682.0, "step": 7550 }, { "entropy": 0.8578364208340645, "epoch": 2.290237067333182, "grad_norm": 0.1088315099477768, "learning_rate": 9.714642739248305e-05, "loss": 1.2674, "mean_token_accuracy": 0.756266288459301, "num_tokens": 510645010.0, "step": 7560 }, { "entropy": 0.8538104221224785, "epoch": 2.293266681814739, "grad_norm": 0.1265879124403, "learning_rate": 9.713529700943066e-05, "loss": 1.2618, "mean_token_accuracy": 0.7609230205416679, "num_tokens": 511324700.0, "step": 7570 }, { "entropy": 0.8532759755849838, "epoch": 2.2962962962962963, "grad_norm": 0.11841035634279251, "learning_rate": 9.712414560167785e-05, "loss": 1.2629, "mean_token_accuracy": 0.7549974635243416, "num_tokens": 511991352.0, "step": 7580 }, { "entropy": 0.8417635351419449, "epoch": 2.2993259107778536, "grad_norm": 0.13118776679039001, "learning_rate": 9.711297317419871e-05, "loss": 1.2454, "mean_token_accuracy": 0.7595033630728721, "num_tokens": 512663445.0, "step": 7590 }, { "entropy": 0.8628800466656685, "epoch": 2.3023555252594106, "grad_norm": 0.11126203089952469, "learning_rate": 9.710177973197669e-05, "loss": 1.2684, "mean_token_accuracy": 0.7575622022151947, "num_tokens": 513337375.0, "step": 7600 }, { "entropy": 0.8636928454041481, "epoch": 2.305385139740968, "grad_norm": 0.12146361172199249, "learning_rate": 9.709056528000466e-05, "loss": 1.2604, "mean_token_accuracy": 0.755362045764923, "num_tokens": 514023000.0, "step": 7610 }, { "entropy": 0.849440747499466, "epoch": 2.3084147542225253, "grad_norm": 0.12590289115905762, "learning_rate": 9.707932982328478e-05, "loss": 1.2552, "mean_token_accuracy": 0.7635484874248505, "num_tokens": 514706972.0, "step": 7620 }, { "entropy": 0.8544532179832458, "epoch": 2.311444368704082, "grad_norm": 0.10340185463428497, "learning_rate": 9.706807336682866e-05, "loss": 1.2625, "mean_token_accuracy": 0.7590579405426979, "num_tokens": 515388358.0, "step": 7630 }, { "entropy": 0.8562353760004043, "epoch": 2.3144739831856396, "grad_norm": 0.12309904396533966, "learning_rate": 9.705679591565722e-05, "loss": 1.2658, "mean_token_accuracy": 0.7605538964271545, "num_tokens": 516062529.0, "step": 7640 }, { "entropy": 0.8605162307620049, "epoch": 2.317503597667197, "grad_norm": 0.12778401374816895, "learning_rate": 9.704549747480078e-05, "loss": 1.2597, "mean_token_accuracy": 0.7594474971294403, "num_tokens": 516734700.0, "step": 7650 }, { "entropy": 0.8563567087054252, "epoch": 2.320533212148754, "grad_norm": 0.12819762527942657, "learning_rate": 9.703417804929901e-05, "loss": 1.265, "mean_token_accuracy": 0.755785447359085, "num_tokens": 517401137.0, "step": 7660 }, { "entropy": 0.8390669286251068, "epoch": 2.323562826630311, "grad_norm": 0.12742090225219727, "learning_rate": 9.702283764420093e-05, "loss": 1.2506, "mean_token_accuracy": 0.7612207159399986, "num_tokens": 518071824.0, "step": 7670 }, { "entropy": 0.8464131817221642, "epoch": 2.3265924411118686, "grad_norm": 0.11039908975362778, "learning_rate": 9.701147626456495e-05, "loss": 1.2511, "mean_token_accuracy": 0.7588544338941574, "num_tokens": 518731226.0, "step": 7680 }, { "entropy": 0.8588887944817543, "epoch": 2.3296220555934255, "grad_norm": 0.12836633622646332, "learning_rate": 9.700009391545878e-05, "loss": 1.264, "mean_token_accuracy": 0.7568954274058342, "num_tokens": 519409623.0, "step": 7690 }, { "entropy": 0.8526587426662445, "epoch": 2.332651670074983, "grad_norm": 0.10793481767177582, "learning_rate": 9.698869060195957e-05, "loss": 1.2673, "mean_token_accuracy": 0.7589220061898232, "num_tokens": 520080801.0, "step": 7700 }, { "entropy": 0.8565697595477104, "epoch": 2.3356812845565402, "grad_norm": 0.11040239036083221, "learning_rate": 9.69772663291537e-05, "loss": 1.2647, "mean_token_accuracy": 0.7597812503576279, "num_tokens": 520758394.0, "step": 7710 }, { "entropy": 0.8582417130470276, "epoch": 2.3387108990380976, "grad_norm": 0.11134511977434158, "learning_rate": 9.696582110213701e-05, "loss": 1.262, "mean_token_accuracy": 0.7580493733286857, "num_tokens": 521441114.0, "step": 7720 }, { "entropy": 0.8498915374279022, "epoch": 2.3417405135196545, "grad_norm": 0.11319853365421295, "learning_rate": 9.695435492601464e-05, "loss": 1.2523, "mean_token_accuracy": 0.7624587774276733, "num_tokens": 522135170.0, "step": 7730 }, { "entropy": 0.847995363175869, "epoch": 2.344770128001212, "grad_norm": 0.12444458156824112, "learning_rate": 9.694286780590107e-05, "loss": 1.2585, "mean_token_accuracy": 0.7576172351837158, "num_tokens": 522797514.0, "step": 7740 }, { "entropy": 0.8536500424146652, "epoch": 2.3477997424827692, "grad_norm": 0.12499086558818817, "learning_rate": 9.693135974692013e-05, "loss": 1.2678, "mean_token_accuracy": 0.7565039187669754, "num_tokens": 523456154.0, "step": 7750 }, { "entropy": 0.8630647584795952, "epoch": 2.350829356964326, "grad_norm": 0.11499357968568802, "learning_rate": 9.691983075420501e-05, "loss": 1.2616, "mean_token_accuracy": 0.7558874368667603, "num_tokens": 524125446.0, "step": 7760 }, { "entropy": 0.8613532796502114, "epoch": 2.3538589714458835, "grad_norm": 0.123953677713871, "learning_rate": 9.69082808328982e-05, "loss": 1.2654, "mean_token_accuracy": 0.7564760953187942, "num_tokens": 524801334.0, "step": 7770 }, { "entropy": 0.8634515941143036, "epoch": 2.356888585927441, "grad_norm": 0.12936441600322723, "learning_rate": 9.689670998815152e-05, "loss": 1.2663, "mean_token_accuracy": 0.757455189526081, "num_tokens": 525468969.0, "step": 7780 }, { "entropy": 0.848297119140625, "epoch": 2.359918200408998, "grad_norm": 0.11233241111040115, "learning_rate": 9.68851182251262e-05, "loss": 1.2582, "mean_token_accuracy": 0.7608260795474052, "num_tokens": 526146603.0, "step": 7790 }, { "entropy": 0.8517893686890602, "epoch": 2.362947814890555, "grad_norm": 0.11174257844686508, "learning_rate": 9.68735055489927e-05, "loss": 1.2597, "mean_token_accuracy": 0.7575798913836479, "num_tokens": 526811275.0, "step": 7800 }, { "entropy": 0.847201581299305, "epoch": 2.3659774293721125, "grad_norm": 0.10749486833810806, "learning_rate": 9.686187196493088e-05, "loss": 1.2547, "mean_token_accuracy": 0.7573108091950417, "num_tokens": 527476519.0, "step": 7810 }, { "entropy": 0.8587975278496742, "epoch": 2.3690070438536694, "grad_norm": 0.13182587921619415, "learning_rate": 9.685021747812987e-05, "loss": 1.2616, "mean_token_accuracy": 0.7562750145792961, "num_tokens": 528140460.0, "step": 7820 }, { "entropy": 0.858908225595951, "epoch": 2.372036658335227, "grad_norm": 0.10838868468999863, "learning_rate": 9.68385420937882e-05, "loss": 1.2605, "mean_token_accuracy": 0.7547510161995887, "num_tokens": 528809327.0, "step": 7830 }, { "entropy": 0.8573035791516304, "epoch": 2.375066272816784, "grad_norm": 0.10809095948934555, "learning_rate": 9.682684581711363e-05, "loss": 1.2669, "mean_token_accuracy": 0.7581486374139785, "num_tokens": 529484602.0, "step": 7840 }, { "entropy": 0.8556201368570328, "epoch": 2.378095887298341, "grad_norm": 0.11285743117332458, "learning_rate": 9.681512865332332e-05, "loss": 1.2614, "mean_token_accuracy": 0.7597505211830139, "num_tokens": 530147914.0, "step": 7850 }, { "entropy": 0.8480548933148384, "epoch": 2.3811255017798985, "grad_norm": 0.12628069519996643, "learning_rate": 9.680339060764368e-05, "loss": 1.265, "mean_token_accuracy": 0.7584131717681885, "num_tokens": 530804067.0, "step": 7860 }, { "entropy": 0.8499169021844863, "epoch": 2.384155116261456, "grad_norm": 0.10992208868265152, "learning_rate": 9.679163168531049e-05, "loss": 1.2519, "mean_token_accuracy": 0.7621417880058289, "num_tokens": 531485661.0, "step": 7870 }, { "entropy": 0.8616214722394944, "epoch": 2.387184730743013, "grad_norm": 0.13375818729400635, "learning_rate": 9.677985189156882e-05, "loss": 1.2666, "mean_token_accuracy": 0.7609955608844757, "num_tokens": 532167902.0, "step": 7880 }, { "entropy": 0.8562293291091919, "epoch": 2.39021434522457, "grad_norm": 0.11304410547018051, "learning_rate": 9.676805123167302e-05, "loss": 1.2552, "mean_token_accuracy": 0.7582985505461692, "num_tokens": 532847125.0, "step": 7890 }, { "entropy": 0.8536284953355789, "epoch": 2.3932439597061275, "grad_norm": 0.10804271697998047, "learning_rate": 9.675622971088681e-05, "loss": 1.2521, "mean_token_accuracy": 0.7590174600481987, "num_tokens": 533520028.0, "step": 7900 }, { "entropy": 0.8469849348068237, "epoch": 2.396273574187685, "grad_norm": 0.10462608188390732, "learning_rate": 9.674438733448314e-05, "loss": 1.2552, "mean_token_accuracy": 0.759534765779972, "num_tokens": 534190434.0, "step": 7910 }, { "entropy": 0.8400739654898643, "epoch": 2.3993031886692417, "grad_norm": 0.11971303075551987, "learning_rate": 9.673252410774433e-05, "loss": 1.2461, "mean_token_accuracy": 0.7663201510906219, "num_tokens": 534878727.0, "step": 7920 }, { "entropy": 0.8583844676613808, "epoch": 2.402332803150799, "grad_norm": 0.11179837584495544, "learning_rate": 9.672064003596197e-05, "loss": 1.2682, "mean_token_accuracy": 0.756516107916832, "num_tokens": 535542509.0, "step": 7930 }, { "entropy": 0.8607254073023796, "epoch": 2.4053624176323565, "grad_norm": 0.11369846761226654, "learning_rate": 9.670873512443695e-05, "loss": 1.2584, "mean_token_accuracy": 0.7623056411743164, "num_tokens": 536234786.0, "step": 7940 }, { "entropy": 0.8532637119293213, "epoch": 2.4083920321139134, "grad_norm": 0.10826956480741501, "learning_rate": 9.669680937847944e-05, "loss": 1.2541, "mean_token_accuracy": 0.7596175089478493, "num_tokens": 536918744.0, "step": 7950 }, { "entropy": 0.8515122517943382, "epoch": 2.4114216465954708, "grad_norm": 0.10441789776086807, "learning_rate": 9.668486280340894e-05, "loss": 1.2622, "mean_token_accuracy": 0.7563559800386429, "num_tokens": 537579555.0, "step": 7960 }, { "entropy": 0.848784476518631, "epoch": 2.414451261077028, "grad_norm": 0.10566443204879761, "learning_rate": 9.66728954045542e-05, "loss": 1.2506, "mean_token_accuracy": 0.7639730542898178, "num_tokens": 538265841.0, "step": 7970 }, { "entropy": 0.8581619426608086, "epoch": 2.417480875558585, "grad_norm": 0.1146743893623352, "learning_rate": 9.66609071872533e-05, "loss": 1.2595, "mean_token_accuracy": 0.7589652687311172, "num_tokens": 538943989.0, "step": 7980 }, { "entropy": 0.8564366966485977, "epoch": 2.4205104900401424, "grad_norm": 0.11719758808612823, "learning_rate": 9.664889815685358e-05, "loss": 1.2619, "mean_token_accuracy": 0.7580450057983399, "num_tokens": 539628292.0, "step": 7990 }, { "entropy": 0.8529757842421531, "epoch": 2.4235401045216998, "grad_norm": 0.10858716070652008, "learning_rate": 9.663686831871167e-05, "loss": 1.2606, "mean_token_accuracy": 0.7648607611656189, "num_tokens": 540314369.0, "step": 8000 }, { "entropy": 0.8409318551421165, "epoch": 2.4265697190032567, "grad_norm": 0.1144886389374733, "learning_rate": 9.662481767819345e-05, "loss": 1.2526, "mean_token_accuracy": 0.7646861433982849, "num_tokens": 540996019.0, "step": 8010 }, { "entropy": 0.8637088030576706, "epoch": 2.429599333484814, "grad_norm": 0.1376212239265442, "learning_rate": 9.661274624067413e-05, "loss": 1.2616, "mean_token_accuracy": 0.7563422918319702, "num_tokens": 541674215.0, "step": 8020 }, { "entropy": 0.8513024523854256, "epoch": 2.4326289479663714, "grad_norm": 0.10584916174411774, "learning_rate": 9.66006540115382e-05, "loss": 1.2619, "mean_token_accuracy": 0.7597479805350303, "num_tokens": 542352807.0, "step": 8030 }, { "entropy": 0.8591561034321785, "epoch": 2.4356585624479283, "grad_norm": 0.1118103638291359, "learning_rate": 9.658854099617935e-05, "loss": 1.2614, "mean_token_accuracy": 0.7565526068210602, "num_tokens": 543024165.0, "step": 8040 }, { "entropy": 0.8484128370881081, "epoch": 2.4386881769294857, "grad_norm": 0.10558687150478363, "learning_rate": 9.657640720000064e-05, "loss": 1.2472, "mean_token_accuracy": 0.7617892831563949, "num_tokens": 543703859.0, "step": 8050 }, { "entropy": 0.8600789204239845, "epoch": 2.441717791411043, "grad_norm": 0.1135752871632576, "learning_rate": 9.656425262841431e-05, "loss": 1.2675, "mean_token_accuracy": 0.7570716828107834, "num_tokens": 544375173.0, "step": 8060 }, { "entropy": 0.8509885743260384, "epoch": 2.4447474058926, "grad_norm": 0.11385232955217361, "learning_rate": 9.655207728684194e-05, "loss": 1.2536, "mean_token_accuracy": 0.76125747859478, "num_tokens": 545058161.0, "step": 8070 }, { "entropy": 0.8475042149424553, "epoch": 2.4477770203741573, "grad_norm": 0.12125519663095474, "learning_rate": 9.653988118071431e-05, "loss": 1.2549, "mean_token_accuracy": 0.7572567671537399, "num_tokens": 545726905.0, "step": 8080 }, { "entropy": 0.8501904651522636, "epoch": 2.4508066348557147, "grad_norm": 0.11919412761926651, "learning_rate": 9.652766431547151e-05, "loss": 1.2545, "mean_token_accuracy": 0.7620494902133942, "num_tokens": 546411973.0, "step": 8090 }, { "entropy": 0.8457695588469505, "epoch": 2.4538362493372716, "grad_norm": 0.12159312516450882, "learning_rate": 9.651542669656288e-05, "loss": 1.2488, "mean_token_accuracy": 0.7606399342417717, "num_tokens": 547079112.0, "step": 8100 }, { "entropy": 0.8524926468729973, "epoch": 2.456865863818829, "grad_norm": 0.1109381690621376, "learning_rate": 9.650316832944699e-05, "loss": 1.2619, "mean_token_accuracy": 0.7586096912622452, "num_tokens": 547747236.0, "step": 8110 }, { "entropy": 0.8604070246219635, "epoch": 2.4598954783003864, "grad_norm": 0.11541227251291275, "learning_rate": 9.649088921959171e-05, "loss": 1.2602, "mean_token_accuracy": 0.7589732840657234, "num_tokens": 548421661.0, "step": 8120 }, { "entropy": 0.849203696846962, "epoch": 2.4629250927819433, "grad_norm": 0.12876959145069122, "learning_rate": 9.647858937247412e-05, "loss": 1.2521, "mean_token_accuracy": 0.759013582766056, "num_tokens": 549098574.0, "step": 8130 }, { "entropy": 0.8548745185136795, "epoch": 2.4659547072635006, "grad_norm": 0.12017077207565308, "learning_rate": 9.646626879358055e-05, "loss": 1.2571, "mean_token_accuracy": 0.7566325277090072, "num_tokens": 549776319.0, "step": 8140 }, { "entropy": 0.8461618572473526, "epoch": 2.468984321745058, "grad_norm": 0.11218147724866867, "learning_rate": 9.645392748840665e-05, "loss": 1.255, "mean_token_accuracy": 0.7596992626786232, "num_tokens": 550452838.0, "step": 8150 }, { "entropy": 0.8496768981218338, "epoch": 2.472013936226615, "grad_norm": 0.11221098154783249, "learning_rate": 9.644156546245719e-05, "loss": 1.2525, "mean_token_accuracy": 0.7584793582558632, "num_tokens": 551129488.0, "step": 8160 }, { "entropy": 0.8606008782982826, "epoch": 2.4750435507081723, "grad_norm": 0.10581947863101959, "learning_rate": 9.642918272124632e-05, "loss": 1.2673, "mean_token_accuracy": 0.7597725406289101, "num_tokens": 551798387.0, "step": 8170 }, { "entropy": 0.8509682357311249, "epoch": 2.4780731651897296, "grad_norm": 0.1086905375123024, "learning_rate": 9.64167792702973e-05, "loss": 1.2501, "mean_token_accuracy": 0.7609071403741836, "num_tokens": 552473465.0, "step": 8180 }, { "entropy": 0.8588610589504242, "epoch": 2.481102779671287, "grad_norm": 0.14329372346401215, "learning_rate": 9.640435511514274e-05, "loss": 1.259, "mean_token_accuracy": 0.7589243084192276, "num_tokens": 553147564.0, "step": 8190 }, { "entropy": 0.8484311655163765, "epoch": 2.484132394152844, "grad_norm": 0.12002729624509811, "learning_rate": 9.63919102613244e-05, "loss": 1.2608, "mean_token_accuracy": 0.7560410052537918, "num_tokens": 553800562.0, "step": 8200 }, { "entropy": 0.8479263916611671, "epoch": 2.4871620086344013, "grad_norm": 0.11256726831197739, "learning_rate": 9.637944471439333e-05, "loss": 1.2631, "mean_token_accuracy": 0.7618035539984703, "num_tokens": 554477016.0, "step": 8210 }, { "entropy": 0.8599980130791665, "epoch": 2.4901916231159587, "grad_norm": 0.1089865192770958, "learning_rate": 9.636695847990977e-05, "loss": 1.2558, "mean_token_accuracy": 0.7554180040955544, "num_tokens": 555144622.0, "step": 8220 }, { "entropy": 0.8463609889149666, "epoch": 2.4932212375975156, "grad_norm": 0.1190873235464096, "learning_rate": 9.635445156344322e-05, "loss": 1.2517, "mean_token_accuracy": 0.7621619239449501, "num_tokens": 555832382.0, "step": 8230 }, { "entropy": 0.8366684496402741, "epoch": 2.496250852079073, "grad_norm": 0.11321038007736206, "learning_rate": 9.634192397057238e-05, "loss": 1.2486, "mean_token_accuracy": 0.7627046957612038, "num_tokens": 556513252.0, "step": 8240 }, { "entropy": 0.8561166599392891, "epoch": 2.4992804665606303, "grad_norm": 0.1392555832862854, "learning_rate": 9.632937570688518e-05, "loss": 1.2649, "mean_token_accuracy": 0.7513695076107979, "num_tokens": 557164301.0, "step": 8250 }, { "entropy": 0.8640503004193306, "epoch": 2.502310081042187, "grad_norm": 0.11030290275812149, "learning_rate": 9.631680677797879e-05, "loss": 1.2621, "mean_token_accuracy": 0.7571258053183556, "num_tokens": 557841790.0, "step": 8260 }, { "entropy": 0.8509059086441993, "epoch": 2.5053396955237446, "grad_norm": 0.11236338317394257, "learning_rate": 9.630421718945956e-05, "loss": 1.2554, "mean_token_accuracy": 0.756206126511097, "num_tokens": 558503800.0, "step": 8270 }, { "entropy": 0.8505407676100731, "epoch": 2.508369310005302, "grad_norm": 0.11434158682823181, "learning_rate": 9.629160694694311e-05, "loss": 1.2553, "mean_token_accuracy": 0.7556064292788506, "num_tokens": 559161417.0, "step": 8280 }, { "entropy": 0.8487449988722802, "epoch": 2.5113989244868593, "grad_norm": 0.1086057499051094, "learning_rate": 9.627897605605418e-05, "loss": 1.2597, "mean_token_accuracy": 0.7596222028136254, "num_tokens": 559849641.0, "step": 8290 }, { "entropy": 0.846830989420414, "epoch": 2.5144285389684162, "grad_norm": 0.10776392370462418, "learning_rate": 9.626632452242685e-05, "loss": 1.262, "mean_token_accuracy": 0.7554687961935997, "num_tokens": 560503057.0, "step": 8300 }, { "entropy": 0.8512991040945053, "epoch": 2.5174581534499736, "grad_norm": 0.10779240727424622, "learning_rate": 9.625365235170429e-05, "loss": 1.2531, "mean_token_accuracy": 0.7656571835279464, "num_tokens": 561207112.0, "step": 8310 }, { "entropy": 0.8476653531193733, "epoch": 2.520487767931531, "grad_norm": 0.11635958403348923, "learning_rate": 9.624095954953893e-05, "loss": 1.2461, "mean_token_accuracy": 0.7580608189105987, "num_tokens": 561887488.0, "step": 8320 }, { "entropy": 0.8512125059962272, "epoch": 2.523517382413088, "grad_norm": 0.11220627278089523, "learning_rate": 9.62282461215924e-05, "loss": 1.2607, "mean_token_accuracy": 0.7566897615790367, "num_tokens": 562557842.0, "step": 8330 }, { "entropy": 0.8469684407114982, "epoch": 2.5265469968946452, "grad_norm": 0.11146048456430435, "learning_rate": 9.621551207353552e-05, "loss": 1.256, "mean_token_accuracy": 0.7601208105683327, "num_tokens": 563235504.0, "step": 8340 }, { "entropy": 0.859553873538971, "epoch": 2.5295766113762026, "grad_norm": 0.13174964487552643, "learning_rate": 9.620275741104833e-05, "loss": 1.258, "mean_token_accuracy": 0.75794418156147, "num_tokens": 563910671.0, "step": 8350 }, { "entropy": 0.8397681131958962, "epoch": 2.5326062258577595, "grad_norm": 0.11584554612636566, "learning_rate": 9.618998213982003e-05, "loss": 1.2471, "mean_token_accuracy": 0.768119253218174, "num_tokens": 564608284.0, "step": 8360 }, { "entropy": 0.844032707810402, "epoch": 2.535635840339317, "grad_norm": 0.11399803310632706, "learning_rate": 9.617718626554903e-05, "loss": 1.2563, "mean_token_accuracy": 0.7574190989136695, "num_tokens": 565270666.0, "step": 8370 }, { "entropy": 0.8594373747706413, "epoch": 2.5386654548208742, "grad_norm": 0.11004584282636642, "learning_rate": 9.616436979394294e-05, "loss": 1.2591, "mean_token_accuracy": 0.7594205111265182, "num_tokens": 565951343.0, "step": 8380 }, { "entropy": 0.8535692304372787, "epoch": 2.541695069302431, "grad_norm": 0.12403662502765656, "learning_rate": 9.615153273071853e-05, "loss": 1.2528, "mean_token_accuracy": 0.7585661306977272, "num_tokens": 566630426.0, "step": 8390 }, { "entropy": 0.8541465491056442, "epoch": 2.5447246837839885, "grad_norm": 0.1108674630522728, "learning_rate": 9.613867508160179e-05, "loss": 1.2597, "mean_token_accuracy": 0.7620561316609382, "num_tokens": 567303716.0, "step": 8400 }, { "entropy": 0.8517271533608437, "epoch": 2.547754298265546, "grad_norm": 0.10456592589616776, "learning_rate": 9.612579685232788e-05, "loss": 1.2541, "mean_token_accuracy": 0.7625368073582649, "num_tokens": 567979668.0, "step": 8410 }, { "entropy": 0.8534714087843895, "epoch": 2.550783912747103, "grad_norm": 0.10534250736236572, "learning_rate": 9.611289804864112e-05, "loss": 1.2575, "mean_token_accuracy": 0.7609692618250847, "num_tokens": 568660485.0, "step": 8420 }, { "entropy": 0.8569789603352547, "epoch": 2.55381352722866, "grad_norm": 0.10718728601932526, "learning_rate": 9.609997867629501e-05, "loss": 1.2631, "mean_token_accuracy": 0.7570777237415314, "num_tokens": 569338084.0, "step": 8430 }, { "entropy": 0.8605900689959526, "epoch": 2.5568431417102175, "grad_norm": 0.1172102838754654, "learning_rate": 9.608703874105227e-05, "loss": 1.2597, "mean_token_accuracy": 0.7540593296289444, "num_tokens": 570007047.0, "step": 8440 }, { "entropy": 0.856265552341938, "epoch": 2.5598727561917745, "grad_norm": 0.11838158965110779, "learning_rate": 9.607407824868473e-05, "loss": 1.2646, "mean_token_accuracy": 0.7546383023262024, "num_tokens": 570668524.0, "step": 8450 }, { "entropy": 0.8531616821885109, "epoch": 2.562902370673332, "grad_norm": 0.1120709478855133, "learning_rate": 9.606109720497343e-05, "loss": 1.2518, "mean_token_accuracy": 0.7596362516283989, "num_tokens": 571342859.0, "step": 8460 }, { "entropy": 0.8628086686134339, "epoch": 2.565931985154889, "grad_norm": 0.11527913808822632, "learning_rate": 9.604809561570856e-05, "loss": 1.2621, "mean_token_accuracy": 0.7554846331477165, "num_tokens": 572016207.0, "step": 8470 }, { "entropy": 0.853224304318428, "epoch": 2.568961599636446, "grad_norm": 0.10552668571472168, "learning_rate": 9.603507348668947e-05, "loss": 1.2648, "mean_token_accuracy": 0.7635086283087731, "num_tokens": 572701837.0, "step": 8480 }, { "entropy": 0.8454021051526069, "epoch": 2.5719912141180035, "grad_norm": 0.09288834780454636, "learning_rate": 9.602203082372471e-05, "loss": 1.2427, "mean_token_accuracy": 0.7652703821659088, "num_tokens": 573398981.0, "step": 8490 }, { "entropy": 0.8564005300402642, "epoch": 2.575020828599561, "grad_norm": 0.10734734684228897, "learning_rate": 9.600896763263195e-05, "loss": 1.2621, "mean_token_accuracy": 0.7566014558076859, "num_tokens": 574067498.0, "step": 8500 }, { "entropy": 0.8504653915762901, "epoch": 2.5780504430811177, "grad_norm": 0.1403270959854126, "learning_rate": 9.5995883919238e-05, "loss": 1.2524, "mean_token_accuracy": 0.763724073767662, "num_tokens": 574746104.0, "step": 8510 }, { "entropy": 0.8560445606708527, "epoch": 2.581080057562675, "grad_norm": 0.11350507289171219, "learning_rate": 9.598277968937887e-05, "loss": 1.2587, "mean_token_accuracy": 0.759540781378746, "num_tokens": 575420436.0, "step": 8520 }, { "entropy": 0.8538668856024743, "epoch": 2.5841096720442325, "grad_norm": 0.12417567521333694, "learning_rate": 9.596965494889971e-05, "loss": 1.2568, "mean_token_accuracy": 0.7581377297639846, "num_tokens": 576096733.0, "step": 8530 }, { "entropy": 0.8500018835067749, "epoch": 2.5871392865257894, "grad_norm": 0.11514724791049957, "learning_rate": 9.595650970365481e-05, "loss": 1.2584, "mean_token_accuracy": 0.7608907133340835, "num_tokens": 576781076.0, "step": 8540 }, { "entropy": 0.8607743322849274, "epoch": 2.5901689010073468, "grad_norm": 0.10145779699087143, "learning_rate": 9.594334395950759e-05, "loss": 1.2569, "mean_token_accuracy": 0.7564357355237007, "num_tokens": 577459938.0, "step": 8550 }, { "entropy": 0.8551357820630073, "epoch": 2.593198515488904, "grad_norm": 0.11113659292459488, "learning_rate": 9.593015772233065e-05, "loss": 1.2564, "mean_token_accuracy": 0.761788259446621, "num_tokens": 578152991.0, "step": 8560 }, { "entropy": 0.8534735798835754, "epoch": 2.596228129970461, "grad_norm": 0.12159695476293564, "learning_rate": 9.591695099800571e-05, "loss": 1.2603, "mean_token_accuracy": 0.7570795536041259, "num_tokens": 578827823.0, "step": 8570 }, { "entropy": 0.8524635910987854, "epoch": 2.5992577444520184, "grad_norm": 0.15989543497562408, "learning_rate": 9.59037237924236e-05, "loss": 1.2525, "mean_token_accuracy": 0.7586214616894722, "num_tokens": 579496735.0, "step": 8580 }, { "entropy": 0.8524748459458351, "epoch": 2.6022873589335758, "grad_norm": 0.10653384774923325, "learning_rate": 9.589047611148434e-05, "loss": 1.2517, "mean_token_accuracy": 0.7634116768836975, "num_tokens": 580189477.0, "step": 8590 }, { "entropy": 0.8582893222570419, "epoch": 2.6053169734151327, "grad_norm": 0.12020713090896606, "learning_rate": 9.587720796109708e-05, "loss": 1.2625, "mean_token_accuracy": 0.7602693036198616, "num_tokens": 580870093.0, "step": 8600 }, { "entropy": 0.8512812659144402, "epoch": 2.60834658789669, "grad_norm": 0.11194371432065964, "learning_rate": 9.586391934718002e-05, "loss": 1.2476, "mean_token_accuracy": 0.7562606275081635, "num_tokens": 581540651.0, "step": 8610 }, { "entropy": 0.8489811271429062, "epoch": 2.6113762023782474, "grad_norm": 0.11133172363042831, "learning_rate": 9.58506102756606e-05, "loss": 1.2496, "mean_token_accuracy": 0.759826211631298, "num_tokens": 582208839.0, "step": 8620 }, { "entropy": 0.8494002401828766, "epoch": 2.6144058168598043, "grad_norm": 0.1221701055765152, "learning_rate": 9.583728075247532e-05, "loss": 1.2603, "mean_token_accuracy": 0.7597423046827316, "num_tokens": 582891996.0, "step": 8630 }, { "entropy": 0.8590531438589096, "epoch": 2.6174354313413617, "grad_norm": 0.10177098214626312, "learning_rate": 9.582393078356979e-05, "loss": 1.2667, "mean_token_accuracy": 0.7538132026791573, "num_tokens": 583541518.0, "step": 8640 }, { "entropy": 0.8468727216124534, "epoch": 2.620465045822919, "grad_norm": 0.10644321888685226, "learning_rate": 9.581056037489878e-05, "loss": 1.2517, "mean_token_accuracy": 0.7635338783264161, "num_tokens": 584221310.0, "step": 8650 }, { "entropy": 0.8520080730319023, "epoch": 2.623494660304476, "grad_norm": 0.1347191035747528, "learning_rate": 9.579716953242616e-05, "loss": 1.2493, "mean_token_accuracy": 0.7642081990838051, "num_tokens": 584914672.0, "step": 8660 }, { "entropy": 0.8513093635439872, "epoch": 2.6265242747860333, "grad_norm": 0.14387550950050354, "learning_rate": 9.578375826212494e-05, "loss": 1.2467, "mean_token_accuracy": 0.7608000755310058, "num_tokens": 585595603.0, "step": 8670 }, { "entropy": 0.8506295859813691, "epoch": 2.6295538892675907, "grad_norm": 0.11430154740810394, "learning_rate": 9.577032656997718e-05, "loss": 1.2562, "mean_token_accuracy": 0.7552939653396606, "num_tokens": 586260344.0, "step": 8680 }, { "entropy": 0.858209428191185, "epoch": 2.632583503749148, "grad_norm": 0.13120955228805542, "learning_rate": 9.57568744619741e-05, "loss": 1.2627, "mean_token_accuracy": 0.7558746591210366, "num_tokens": 586924039.0, "step": 8690 }, { "entropy": 0.8451697275042533, "epoch": 2.635613118230705, "grad_norm": 0.11614711582660675, "learning_rate": 9.574340194411602e-05, "loss": 1.2489, "mean_token_accuracy": 0.7614456757903099, "num_tokens": 587604761.0, "step": 8700 }, { "entropy": 0.8501728236675262, "epoch": 2.6386427327122624, "grad_norm": 0.1159096509218216, "learning_rate": 9.572990902241236e-05, "loss": 1.2565, "mean_token_accuracy": 0.76109050065279, "num_tokens": 588282551.0, "step": 8710 }, { "entropy": 0.8441739335656167, "epoch": 2.6416723471938197, "grad_norm": 0.1077175885438919, "learning_rate": 9.571639570288162e-05, "loss": 1.2532, "mean_token_accuracy": 0.7615225374698639, "num_tokens": 588960630.0, "step": 8720 }, { "entropy": 0.8558618754148484, "epoch": 2.644701961675377, "grad_norm": 0.10956843942403793, "learning_rate": 9.570286199155143e-05, "loss": 1.2664, "mean_token_accuracy": 0.7598358571529389, "num_tokens": 589633892.0, "step": 8730 }, { "entropy": 0.8522457152605056, "epoch": 2.647731576156934, "grad_norm": 0.11533006280660629, "learning_rate": 9.568930789445849e-05, "loss": 1.2527, "mean_token_accuracy": 0.7591142773628234, "num_tokens": 590310547.0, "step": 8740 }, { "entropy": 0.8420512557029725, "epoch": 2.6507611906384914, "grad_norm": 0.11325296014547348, "learning_rate": 9.567573341764862e-05, "loss": 1.2412, "mean_token_accuracy": 0.764337295293808, "num_tokens": 590990603.0, "step": 8750 }, { "entropy": 0.8466836139559746, "epoch": 2.6537908051200487, "grad_norm": 0.10968432575464249, "learning_rate": 9.56621385671767e-05, "loss": 1.2604, "mean_token_accuracy": 0.7618458330631256, "num_tokens": 591673884.0, "step": 8760 }, { "entropy": 0.8422190606594085, "epoch": 2.6568204196016056, "grad_norm": 0.12760990858078003, "learning_rate": 9.564852334910674e-05, "loss": 1.2533, "mean_token_accuracy": 0.7592846661806106, "num_tokens": 592329661.0, "step": 8770 }, { "entropy": 0.8569976061582565, "epoch": 2.659850034083163, "grad_norm": 0.10784024745225906, "learning_rate": 9.563488776951178e-05, "loss": 1.2519, "mean_token_accuracy": 0.7618673861026763, "num_tokens": 593019514.0, "step": 8780 }, { "entropy": 0.8481792345643043, "epoch": 2.6628796485647204, "grad_norm": 0.11991298943758011, "learning_rate": 9.562123183447399e-05, "loss": 1.2556, "mean_token_accuracy": 0.7591419890522957, "num_tokens": 593688540.0, "step": 8790 }, { "entropy": 0.8476454094052315, "epoch": 2.6659092630462773, "grad_norm": 0.12877626717090607, "learning_rate": 9.560755555008458e-05, "loss": 1.2492, "mean_token_accuracy": 0.7576417684555053, "num_tokens": 594355381.0, "step": 8800 }, { "entropy": 0.8531828731298446, "epoch": 2.6689388775278347, "grad_norm": 0.11032012104988098, "learning_rate": 9.559385892244388e-05, "loss": 1.2505, "mean_token_accuracy": 0.7598303601145744, "num_tokens": 595027868.0, "step": 8810 }, { "entropy": 0.8510458245873451, "epoch": 2.671968492009392, "grad_norm": 0.10477004945278168, "learning_rate": 9.558014195766126e-05, "loss": 1.2555, "mean_token_accuracy": 0.757819676399231, "num_tokens": 595698206.0, "step": 8820 }, { "entropy": 0.8613530620932579, "epoch": 2.674998106490949, "grad_norm": 0.10690850019454956, "learning_rate": 9.556640466185517e-05, "loss": 1.2528, "mean_token_accuracy": 0.7590271100401879, "num_tokens": 596384294.0, "step": 8830 }, { "entropy": 0.8479463741183281, "epoch": 2.6780277209725063, "grad_norm": 0.10583047568798065, "learning_rate": 9.555264704115313e-05, "loss": 1.258, "mean_token_accuracy": 0.7629940390586853, "num_tokens": 597068799.0, "step": 8840 }, { "entropy": 0.8475633636116982, "epoch": 2.6810573354540637, "grad_norm": 0.11271261423826218, "learning_rate": 9.553886910169174e-05, "loss": 1.2439, "mean_token_accuracy": 0.7668149173259735, "num_tokens": 597757095.0, "step": 8850 }, { "entropy": 0.8524195119738579, "epoch": 2.6840869499356206, "grad_norm": 0.11528690904378891, "learning_rate": 9.552507084961664e-05, "loss": 1.2622, "mean_token_accuracy": 0.7573121473193168, "num_tokens": 598434167.0, "step": 8860 }, { "entropy": 0.8472784489393235, "epoch": 2.687116564417178, "grad_norm": 0.11534834653139114, "learning_rate": 9.551125229108253e-05, "loss": 1.2424, "mean_token_accuracy": 0.7616955548524856, "num_tokens": 599119603.0, "step": 8870 }, { "entropy": 0.8592986464500427, "epoch": 2.6901461788987353, "grad_norm": 0.11634981632232666, "learning_rate": 9.54974134322532e-05, "loss": 1.2594, "mean_token_accuracy": 0.7608154565095901, "num_tokens": 599802386.0, "step": 8880 }, { "entropy": 0.8419868484139442, "epoch": 2.6931757933802922, "grad_norm": 0.11178780347108841, "learning_rate": 9.548355427930147e-05, "loss": 1.2455, "mean_token_accuracy": 0.7621794462203979, "num_tokens": 600476266.0, "step": 8890 }, { "entropy": 0.8469268649816513, "epoch": 2.6962054078618496, "grad_norm": 0.11938447505235672, "learning_rate": 9.54696748384092e-05, "loss": 1.262, "mean_token_accuracy": 0.7612688824534416, "num_tokens": 601144964.0, "step": 8900 }, { "entropy": 0.84125135242939, "epoch": 2.699235022343407, "grad_norm": 0.11642032116651535, "learning_rate": 9.545577511576732e-05, "loss": 1.2435, "mean_token_accuracy": 0.7643848404288291, "num_tokens": 601833405.0, "step": 8910 }, { "entropy": 0.853831721842289, "epoch": 2.702264636824964, "grad_norm": 0.11114302277565002, "learning_rate": 9.544185511757581e-05, "loss": 1.263, "mean_token_accuracy": 0.7591975644230843, "num_tokens": 602506466.0, "step": 8920 }, { "entropy": 0.8439517483115196, "epoch": 2.7052942513065212, "grad_norm": 0.11877685785293579, "learning_rate": 9.542791485004368e-05, "loss": 1.2515, "mean_token_accuracy": 0.7627226859331131, "num_tokens": 603183297.0, "step": 8930 }, { "entropy": 0.8474942713975906, "epoch": 2.7083238657880786, "grad_norm": 0.12535443902015686, "learning_rate": 9.5413954319389e-05, "loss": 1.2551, "mean_token_accuracy": 0.7631326824426651, "num_tokens": 603858049.0, "step": 8940 }, { "entropy": 0.8528895542025566, "epoch": 2.7113534802696355, "grad_norm": 0.10631478577852249, "learning_rate": 9.539997353183883e-05, "loss": 1.2533, "mean_token_accuracy": 0.7621230915188789, "num_tokens": 604549132.0, "step": 8950 }, { "entropy": 0.8584895536303521, "epoch": 2.714383094751193, "grad_norm": 0.10547883063554764, "learning_rate": 9.538597249362934e-05, "loss": 1.2642, "mean_token_accuracy": 0.7588069796562195, "num_tokens": 605225654.0, "step": 8960 }, { "entropy": 0.8447905778884888, "epoch": 2.7174127092327502, "grad_norm": 0.10729092359542847, "learning_rate": 9.537195121100568e-05, "loss": 1.2521, "mean_token_accuracy": 0.7592152699828147, "num_tokens": 605897599.0, "step": 8970 }, { "entropy": 0.8485624492168427, "epoch": 2.720442323714307, "grad_norm": 0.10752914845943451, "learning_rate": 9.535790969022202e-05, "loss": 1.2514, "mean_token_accuracy": 0.7590329334139824, "num_tokens": 606578210.0, "step": 8980 }, { "entropy": 0.842961086332798, "epoch": 2.7234719381958645, "grad_norm": 0.10626719146966934, "learning_rate": 9.534384793754163e-05, "loss": 1.2468, "mean_token_accuracy": 0.7642134800553322, "num_tokens": 607269313.0, "step": 8990 }, { "entropy": 0.8540323719382286, "epoch": 2.726501552677422, "grad_norm": 0.11208371818065643, "learning_rate": 9.53297659592367e-05, "loss": 1.2571, "mean_token_accuracy": 0.7591601192951203, "num_tokens": 607942514.0, "step": 9000 }, { "entropy": 0.846892024576664, "epoch": 2.729531167158979, "grad_norm": 0.11249063909053802, "learning_rate": 9.531566376158853e-05, "loss": 1.2543, "mean_token_accuracy": 0.7622483372688293, "num_tokens": 608624206.0, "step": 9010 }, { "entropy": 0.8537290051579476, "epoch": 2.732560781640536, "grad_norm": 0.10326066613197327, "learning_rate": 9.530154135088742e-05, "loss": 1.2407, "mean_token_accuracy": 0.7624550864100457, "num_tokens": 609324533.0, "step": 9020 }, { "entropy": 0.8529239282011986, "epoch": 2.7355903961220935, "grad_norm": 0.14154255390167236, "learning_rate": 9.528739873343264e-05, "loss": 1.2546, "mean_token_accuracy": 0.7607391595840454, "num_tokens": 610004801.0, "step": 9030 }, { "entropy": 0.8454217985272408, "epoch": 2.7386200106036505, "grad_norm": 0.11787353456020355, "learning_rate": 9.52732359155325e-05, "loss": 1.2589, "mean_token_accuracy": 0.7602300241589546, "num_tokens": 610675926.0, "step": 9040 }, { "entropy": 0.8582961186766624, "epoch": 2.741649625085208, "grad_norm": 0.11585194617509842, "learning_rate": 9.525905290350435e-05, "loss": 1.2578, "mean_token_accuracy": 0.7624749362468719, "num_tokens": 611368497.0, "step": 9050 }, { "entropy": 0.8577098563313484, "epoch": 2.744679239566765, "grad_norm": 0.10689109563827515, "learning_rate": 9.524484970367451e-05, "loss": 1.2568, "mean_token_accuracy": 0.757765942811966, "num_tokens": 612039252.0, "step": 9060 }, { "entropy": 0.8351210162043572, "epoch": 2.747708854048322, "grad_norm": 0.14586885273456573, "learning_rate": 9.523062632237834e-05, "loss": 1.2332, "mean_token_accuracy": 0.7655604347586632, "num_tokens": 612720216.0, "step": 9070 }, { "entropy": 0.8519888401031495, "epoch": 2.7507384685298795, "grad_norm": 0.11551733314990997, "learning_rate": 9.521638276596014e-05, "loss": 1.2524, "mean_token_accuracy": 0.7608656138181686, "num_tokens": 613399302.0, "step": 9080 }, { "entropy": 0.8438190400600434, "epoch": 2.753768083011437, "grad_norm": 0.10600708425045013, "learning_rate": 9.520211904077328e-05, "loss": 1.2488, "mean_token_accuracy": 0.7577302813529968, "num_tokens": 614054249.0, "step": 9090 }, { "entropy": 0.8623249784111977, "epoch": 2.7567976974929937, "grad_norm": 0.10966934263706207, "learning_rate": 9.518783515318007e-05, "loss": 1.2621, "mean_token_accuracy": 0.7588957846164703, "num_tokens": 614738179.0, "step": 9100 }, { "entropy": 0.8504249393939972, "epoch": 2.759827311974551, "grad_norm": 0.12102324515581131, "learning_rate": 9.517353110955186e-05, "loss": 1.2552, "mean_token_accuracy": 0.7596418187022209, "num_tokens": 615404091.0, "step": 9110 }, { "entropy": 0.85791544765234, "epoch": 2.7628569264561085, "grad_norm": 0.11166871339082718, "learning_rate": 9.515920691626896e-05, "loss": 1.2587, "mean_token_accuracy": 0.7599508956074714, "num_tokens": 616081723.0, "step": 9120 }, { "entropy": 0.8537204295396805, "epoch": 2.765886540937666, "grad_norm": 0.11071418970823288, "learning_rate": 9.514486257972069e-05, "loss": 1.2552, "mean_token_accuracy": 0.7543772354722023, "num_tokens": 616749032.0, "step": 9130 }, { "entropy": 0.8480722889304161, "epoch": 2.7689161554192228, "grad_norm": 0.1132015585899353, "learning_rate": 9.513049810630533e-05, "loss": 1.2527, "mean_token_accuracy": 0.7601557701826096, "num_tokens": 617429202.0, "step": 9140 }, { "entropy": 0.8600764691829681, "epoch": 2.77194576990078, "grad_norm": 0.11303647607564926, "learning_rate": 9.511611350243014e-05, "loss": 1.2642, "mean_token_accuracy": 0.7554978132247925, "num_tokens": 618089881.0, "step": 9150 }, { "entropy": 0.8386348068714142, "epoch": 2.7749753843823375, "grad_norm": 0.1146254763007164, "learning_rate": 9.51017087745114e-05, "loss": 1.2531, "mean_token_accuracy": 0.765274378657341, "num_tokens": 618768949.0, "step": 9160 }, { "entropy": 0.8436396852135658, "epoch": 2.7780049988638944, "grad_norm": 0.12573018670082092, "learning_rate": 9.508728392897432e-05, "loss": 1.2608, "mean_token_accuracy": 0.756932207942009, "num_tokens": 619426419.0, "step": 9170 }, { "entropy": 0.8480951756238937, "epoch": 2.7810346133454518, "grad_norm": 0.11268395185470581, "learning_rate": 9.50728389722531e-05, "loss": 1.2629, "mean_token_accuracy": 0.7584605038166046, "num_tokens": 620096506.0, "step": 9180 }, { "entropy": 0.8529573127627372, "epoch": 2.784064227827009, "grad_norm": 0.10895754396915436, "learning_rate": 9.505837391079093e-05, "loss": 1.2632, "mean_token_accuracy": 0.7552319064736366, "num_tokens": 620759536.0, "step": 9190 }, { "entropy": 0.8537846297025681, "epoch": 2.7870938423085665, "grad_norm": 0.11702003329992294, "learning_rate": 9.504388875103996e-05, "loss": 1.2607, "mean_token_accuracy": 0.7622180119156837, "num_tokens": 621453454.0, "step": 9200 }, { "entropy": 0.8559833288192749, "epoch": 2.7901234567901234, "grad_norm": 0.10382211953401566, "learning_rate": 9.502938349946125e-05, "loss": 1.2613, "mean_token_accuracy": 0.7598064169287682, "num_tokens": 622140751.0, "step": 9210 }, { "entropy": 0.8430117681622505, "epoch": 2.7931530712716808, "grad_norm": 0.11125016212463379, "learning_rate": 9.501485816252491e-05, "loss": 1.2472, "mean_token_accuracy": 0.7645782843232155, "num_tokens": 622829005.0, "step": 9220 }, { "entropy": 0.8464206829667091, "epoch": 2.796182685753238, "grad_norm": 0.12690483033657074, "learning_rate": 9.500031274670995e-05, "loss": 1.2519, "mean_token_accuracy": 0.7599757418036461, "num_tokens": 623504794.0, "step": 9230 }, { "entropy": 0.8461428165435791, "epoch": 2.799212300234795, "grad_norm": 0.11492403596639633, "learning_rate": 9.498574725850437e-05, "loss": 1.2616, "mean_token_accuracy": 0.7610984742641449, "num_tokens": 624174218.0, "step": 9240 }, { "entropy": 0.8441011562943459, "epoch": 2.8022419147163524, "grad_norm": 0.1064518615603447, "learning_rate": 9.497116170440507e-05, "loss": 1.2506, "mean_token_accuracy": 0.7619548350572586, "num_tokens": 624846524.0, "step": 9250 }, { "entropy": 0.8522069841623306, "epoch": 2.80527152919791, "grad_norm": 0.11196396499872208, "learning_rate": 9.495655609091799e-05, "loss": 1.245, "mean_token_accuracy": 0.7609677344560624, "num_tokens": 625534390.0, "step": 9260 }, { "entropy": 0.8514075130224228, "epoch": 2.8083011436794667, "grad_norm": 0.11803749948740005, "learning_rate": 9.494193042455791e-05, "loss": 1.2586, "mean_token_accuracy": 0.7552815482020379, "num_tokens": 626195246.0, "step": 9270 }, { "entropy": 0.8575049102306366, "epoch": 2.811330758161024, "grad_norm": 0.12270934879779816, "learning_rate": 9.492728471184864e-05, "loss": 1.2544, "mean_token_accuracy": 0.7574294328689575, "num_tokens": 626868897.0, "step": 9280 }, { "entropy": 0.8500569522380829, "epoch": 2.8143603726425814, "grad_norm": 0.12504854798316956, "learning_rate": 9.491261895932291e-05, "loss": 1.2463, "mean_token_accuracy": 0.7641521871089936, "num_tokens": 627559647.0, "step": 9290 }, { "entropy": 0.8494672313332557, "epoch": 2.8173899871241384, "grad_norm": 0.11502181738615036, "learning_rate": 9.489793317352234e-05, "loss": 1.2604, "mean_token_accuracy": 0.758509735763073, "num_tokens": 628243853.0, "step": 9300 }, { "entropy": 0.8530739128589631, "epoch": 2.8204196016056957, "grad_norm": 0.12078163027763367, "learning_rate": 9.488322736099755e-05, "loss": 1.2557, "mean_token_accuracy": 0.7597883760929107, "num_tokens": 628929142.0, "step": 9310 }, { "entropy": 0.8452306926250458, "epoch": 2.823449216087253, "grad_norm": 0.11570421606302261, "learning_rate": 9.486850152830809e-05, "loss": 1.2574, "mean_token_accuracy": 0.7557153657078743, "num_tokens": 629586684.0, "step": 9320 }, { "entropy": 0.8449615359306335, "epoch": 2.82647883056881, "grad_norm": 0.11672792583703995, "learning_rate": 9.485375568202238e-05, "loss": 1.2426, "mean_token_accuracy": 0.7659830927848816, "num_tokens": 630280739.0, "step": 9330 }, { "entropy": 0.8407104566693306, "epoch": 2.8295084450503674, "grad_norm": 0.11738342046737671, "learning_rate": 9.483898982871781e-05, "loss": 1.2384, "mean_token_accuracy": 0.7619338065385819, "num_tokens": 630964866.0, "step": 9340 }, { "entropy": 0.8456329941749573, "epoch": 2.8325380595319247, "grad_norm": 0.12471571564674377, "learning_rate": 9.482420397498069e-05, "loss": 1.2542, "mean_token_accuracy": 0.7682717412710189, "num_tokens": 631653706.0, "step": 9350 }, { "entropy": 0.8532849788665772, "epoch": 2.8355676740134816, "grad_norm": 0.12070607393980026, "learning_rate": 9.480939812740628e-05, "loss": 1.2589, "mean_token_accuracy": 0.7555012702941895, "num_tokens": 632313809.0, "step": 9360 }, { "entropy": 0.847382801771164, "epoch": 2.838597288495039, "grad_norm": 0.10841323435306549, "learning_rate": 9.47945722925987e-05, "loss": 1.2511, "mean_token_accuracy": 0.7587038695812225, "num_tokens": 632987731.0, "step": 9370 }, { "entropy": 0.8449444532394409, "epoch": 2.8416269029765964, "grad_norm": 0.12055893987417221, "learning_rate": 9.477972647717103e-05, "loss": 1.253, "mean_token_accuracy": 0.7607487499713897, "num_tokens": 633656711.0, "step": 9380 }, { "entropy": 0.8486421957612038, "epoch": 2.8446565174581533, "grad_norm": 0.11401059478521347, "learning_rate": 9.476486068774523e-05, "loss": 1.253, "mean_token_accuracy": 0.7612351104617119, "num_tokens": 634327363.0, "step": 9390 }, { "entropy": 0.8498527958989144, "epoch": 2.8476861319397107, "grad_norm": 0.11407098919153214, "learning_rate": 9.47499749309522e-05, "loss": 1.2492, "mean_token_accuracy": 0.7609426572918891, "num_tokens": 635004780.0, "step": 9400 }, { "entropy": 0.8565239489078522, "epoch": 2.850715746421268, "grad_norm": 0.10461205244064331, "learning_rate": 9.473506921343172e-05, "loss": 1.2484, "mean_token_accuracy": 0.7615654408931732, "num_tokens": 635700882.0, "step": 9410 }, { "entropy": 0.8410073146224022, "epoch": 2.853745360902825, "grad_norm": 0.11110935360193253, "learning_rate": 9.472014354183249e-05, "loss": 1.2372, "mean_token_accuracy": 0.7607096031308174, "num_tokens": 636375827.0, "step": 9420 }, { "entropy": 0.8488332957029343, "epoch": 2.8567749753843823, "grad_norm": 0.10545022040605545, "learning_rate": 9.47051979228121e-05, "loss": 1.2653, "mean_token_accuracy": 0.7549908071756363, "num_tokens": 637040252.0, "step": 9430 }, { "entropy": 0.8493894070386887, "epoch": 2.8598045898659397, "grad_norm": 0.1181030422449112, "learning_rate": 9.469023236303707e-05, "loss": 1.2487, "mean_token_accuracy": 0.7594626814126968, "num_tokens": 637705254.0, "step": 9440 }, { "entropy": 0.8443953573703766, "epoch": 2.8628342043474966, "grad_norm": 0.11179035156965256, "learning_rate": 9.467524686918277e-05, "loss": 1.248, "mean_token_accuracy": 0.7587132945656776, "num_tokens": 638373871.0, "step": 9450 }, { "entropy": 0.8514942198991775, "epoch": 2.865863818829054, "grad_norm": 0.11156836152076721, "learning_rate": 9.466024144793346e-05, "loss": 1.2563, "mean_token_accuracy": 0.7566749766469002, "num_tokens": 639043684.0, "step": 9460 }, { "entropy": 0.8520888537168503, "epoch": 2.8688934333106113, "grad_norm": 0.12392573058605194, "learning_rate": 9.464521610598234e-05, "loss": 1.255, "mean_token_accuracy": 0.7587416484951973, "num_tokens": 639718094.0, "step": 9470 }, { "entropy": 0.8556481033563614, "epoch": 2.8719230477921682, "grad_norm": 0.10400848090648651, "learning_rate": 9.463017085003145e-05, "loss": 1.2627, "mean_token_accuracy": 0.754160338640213, "num_tokens": 640383129.0, "step": 9480 }, { "entropy": 0.8500251278281212, "epoch": 2.8749526622737256, "grad_norm": 0.11797893047332764, "learning_rate": 9.461510568679173e-05, "loss": 1.2502, "mean_token_accuracy": 0.7616827592253685, "num_tokens": 641069429.0, "step": 9490 }, { "entropy": 0.8440890595316887, "epoch": 2.877982276755283, "grad_norm": 0.11008976399898529, "learning_rate": 9.460002062298301e-05, "loss": 1.2516, "mean_token_accuracy": 0.75860475897789, "num_tokens": 641729169.0, "step": 9500 }, { "entropy": 0.845183102786541, "epoch": 2.88101189123684, "grad_norm": 0.10795667767524719, "learning_rate": 9.458491566533395e-05, "loss": 1.252, "mean_token_accuracy": 0.7612995803356171, "num_tokens": 642404503.0, "step": 9510 }, { "entropy": 0.8522583171725273, "epoch": 2.8840415057183972, "grad_norm": 0.11366670578718185, "learning_rate": 9.456979082058214e-05, "loss": 1.2602, "mean_token_accuracy": 0.7658773332834243, "num_tokens": 643089662.0, "step": 9520 }, { "entropy": 0.8500687897205352, "epoch": 2.8870711201999546, "grad_norm": 0.1287437230348587, "learning_rate": 9.455464609547401e-05, "loss": 1.2587, "mean_token_accuracy": 0.7604455679655076, "num_tokens": 643768453.0, "step": 9530 }, { "entropy": 0.8454829633235932, "epoch": 2.8901007346815115, "grad_norm": 0.11401085555553436, "learning_rate": 9.453948149676489e-05, "loss": 1.2562, "mean_token_accuracy": 0.7634023189544678, "num_tokens": 644446987.0, "step": 9540 }, { "entropy": 0.855300472676754, "epoch": 2.893130349163069, "grad_norm": 0.11452749371528625, "learning_rate": 9.45242970312189e-05, "loss": 1.2562, "mean_token_accuracy": 0.7571460902690887, "num_tokens": 645111355.0, "step": 9550 }, { "entropy": 0.8505562752485275, "epoch": 2.8961599636446262, "grad_norm": 0.11944160610437393, "learning_rate": 9.45090927056091e-05, "loss": 1.2546, "mean_token_accuracy": 0.7573200240731239, "num_tokens": 645771986.0, "step": 9560 }, { "entropy": 0.8464237809181213, "epoch": 2.8991895781261836, "grad_norm": 0.12742820382118225, "learning_rate": 9.449386852671739e-05, "loss": 1.252, "mean_token_accuracy": 0.7640555202960968, "num_tokens": 646459124.0, "step": 9570 }, { "entropy": 0.8454401388764381, "epoch": 2.9022191926077405, "grad_norm": 0.11775665730237961, "learning_rate": 9.44786245013345e-05, "loss": 1.2455, "mean_token_accuracy": 0.7580542922019958, "num_tokens": 647138288.0, "step": 9580 }, { "entropy": 0.8477908670902252, "epoch": 2.905248807089298, "grad_norm": 0.11539655923843384, "learning_rate": 9.446336063626005e-05, "loss": 1.2587, "mean_token_accuracy": 0.760678480565548, "num_tokens": 647808908.0, "step": 9590 }, { "entropy": 0.8531346842646599, "epoch": 2.9082784215708553, "grad_norm": 0.10464304685592651, "learning_rate": 9.444807693830244e-05, "loss": 1.2516, "mean_token_accuracy": 0.7555674359202385, "num_tokens": 648486324.0, "step": 9600 }, { "entropy": 0.858202300965786, "epoch": 2.911308036052412, "grad_norm": 0.11530975997447968, "learning_rate": 9.443277341427901e-05, "loss": 1.2631, "mean_token_accuracy": 0.7608538568019867, "num_tokens": 649183303.0, "step": 9610 }, { "entropy": 0.8597147583961486, "epoch": 2.9143376505339695, "grad_norm": 0.11264371871948242, "learning_rate": 9.441745007101588e-05, "loss": 1.2619, "mean_token_accuracy": 0.7595389813184739, "num_tokens": 649870902.0, "step": 9620 }, { "entropy": 0.85275067538023, "epoch": 2.917367265015527, "grad_norm": 0.10551595687866211, "learning_rate": 9.440210691534802e-05, "loss": 1.2655, "mean_token_accuracy": 0.7552141278982163, "num_tokens": 650532897.0, "step": 9630 }, { "entropy": 0.839555224776268, "epoch": 2.9203968794970843, "grad_norm": 0.11519236117601395, "learning_rate": 9.438674395411926e-05, "loss": 1.2466, "mean_token_accuracy": 0.7642404139041901, "num_tokens": 651223401.0, "step": 9640 }, { "entropy": 0.8424392610788345, "epoch": 2.923426493978641, "grad_norm": 0.10668787360191345, "learning_rate": 9.437136119418222e-05, "loss": 1.2483, "mean_token_accuracy": 0.7609749466180802, "num_tokens": 651896297.0, "step": 9650 }, { "entropy": 0.8510879307985306, "epoch": 2.9264561084601985, "grad_norm": 0.1254030466079712, "learning_rate": 9.435595864239843e-05, "loss": 1.2548, "mean_token_accuracy": 0.7605242788791656, "num_tokens": 652575573.0, "step": 9660 }, { "entropy": 0.8578651532530784, "epoch": 2.929485722941756, "grad_norm": 0.10282514989376068, "learning_rate": 9.434053630563817e-05, "loss": 1.255, "mean_token_accuracy": 0.7591852471232414, "num_tokens": 653263336.0, "step": 9670 }, { "entropy": 0.8433485999703407, "epoch": 2.932515337423313, "grad_norm": 0.1159236878156662, "learning_rate": 9.432509419078057e-05, "loss": 1.248, "mean_token_accuracy": 0.7621194779872894, "num_tokens": 653942478.0, "step": 9680 }, { "entropy": 0.8420448079705238, "epoch": 2.93554495190487, "grad_norm": 0.11720075458288193, "learning_rate": 9.430963230471359e-05, "loss": 1.247, "mean_token_accuracy": 0.7566432520747185, "num_tokens": 654606479.0, "step": 9690 }, { "entropy": 0.8508445441722869, "epoch": 2.9385745663864276, "grad_norm": 0.11400197446346283, "learning_rate": 9.429415065433401e-05, "loss": 1.2606, "mean_token_accuracy": 0.751141057908535, "num_tokens": 655253275.0, "step": 9700 }, { "entropy": 0.8569239139556885, "epoch": 2.9416041808679845, "grad_norm": 0.10377223789691925, "learning_rate": 9.42786492465474e-05, "loss": 1.2569, "mean_token_accuracy": 0.7572843462228775, "num_tokens": 655934707.0, "step": 9710 }, { "entropy": 0.838527712225914, "epoch": 2.944633795349542, "grad_norm": 0.11711885035037994, "learning_rate": 9.42631280882682e-05, "loss": 1.2376, "mean_token_accuracy": 0.7575570166110992, "num_tokens": 656602773.0, "step": 9720 }, { "entropy": 0.8440342873334885, "epoch": 2.947663409831099, "grad_norm": 0.11179562658071518, "learning_rate": 9.424758718641958e-05, "loss": 1.2476, "mean_token_accuracy": 0.7630826935172081, "num_tokens": 657283167.0, "step": 9730 }, { "entropy": 0.8516422033309936, "epoch": 2.950693024312656, "grad_norm": 0.10201055556535721, "learning_rate": 9.42320265479336e-05, "loss": 1.254, "mean_token_accuracy": 0.7659321084618569, "num_tokens": 657968237.0, "step": 9740 }, { "entropy": 0.8578488811850548, "epoch": 2.9537226387942135, "grad_norm": 0.15761013329029083, "learning_rate": 9.421644617975104e-05, "loss": 1.2592, "mean_token_accuracy": 0.7623055756092072, "num_tokens": 658656196.0, "step": 9750 }, { "entropy": 0.8493423119187355, "epoch": 2.956752253275771, "grad_norm": 0.1358635574579239, "learning_rate": 9.420084608882158e-05, "loss": 1.2489, "mean_token_accuracy": 0.7584482207894325, "num_tokens": 659323514.0, "step": 9760 }, { "entropy": 0.8442327380180359, "epoch": 2.9597818677573278, "grad_norm": 0.1336761713027954, "learning_rate": 9.41852262821036e-05, "loss": 1.2525, "mean_token_accuracy": 0.76187454611063, "num_tokens": 659999460.0, "step": 9770 }, { "entropy": 0.8411857530474662, "epoch": 2.962811482238885, "grad_norm": 0.12662717700004578, "learning_rate": 9.416958676656433e-05, "loss": 1.2513, "mean_token_accuracy": 0.7660849794745446, "num_tokens": 660680894.0, "step": 9780 }, { "entropy": 0.8411580920219421, "epoch": 2.9658410967204425, "grad_norm": 0.11733353137969971, "learning_rate": 9.415392754917976e-05, "loss": 1.2424, "mean_token_accuracy": 0.7625678777694702, "num_tokens": 661357990.0, "step": 9790 }, { "entropy": 0.8468214213848114, "epoch": 2.9688707112019994, "grad_norm": 0.1152617484331131, "learning_rate": 9.41382486369347e-05, "loss": 1.2461, "mean_token_accuracy": 0.761156065762043, "num_tokens": 662039212.0, "step": 9800 }, { "entropy": 0.8458827570080757, "epoch": 2.9719003256835568, "grad_norm": 0.11981873214244843, "learning_rate": 9.412255003682273e-05, "loss": 1.2563, "mean_token_accuracy": 0.7581099763512611, "num_tokens": 662702316.0, "step": 9810 }, { "entropy": 0.8398707866668701, "epoch": 2.974929940165114, "grad_norm": 0.10634782165288925, "learning_rate": 9.41068317558462e-05, "loss": 1.2449, "mean_token_accuracy": 0.7624537348747253, "num_tokens": 663383844.0, "step": 9820 }, { "entropy": 0.8493513718247414, "epoch": 2.977959554646671, "grad_norm": 0.11004682630300522, "learning_rate": 9.409109380101626e-05, "loss": 1.2588, "mean_token_accuracy": 0.7634190067648887, "num_tokens": 664055624.0, "step": 9830 }, { "entropy": 0.844525645673275, "epoch": 2.9809891691282284, "grad_norm": 0.11285140365362167, "learning_rate": 9.407533617935281e-05, "loss": 1.2616, "mean_token_accuracy": 0.7566604182124138, "num_tokens": 664718688.0, "step": 9840 }, { "entropy": 0.8467271819710731, "epoch": 2.984018783609786, "grad_norm": 0.12681548297405243, "learning_rate": 9.405955889788456e-05, "loss": 1.2504, "mean_token_accuracy": 0.7627516657114028, "num_tokens": 665394806.0, "step": 9850 }, { "entropy": 0.8490105450153351, "epoch": 2.9870483980913427, "grad_norm": 0.12867045402526855, "learning_rate": 9.404376196364895e-05, "loss": 1.2515, "mean_token_accuracy": 0.7587300792336464, "num_tokens": 666065349.0, "step": 9860 }, { "entropy": 0.8448463037610054, "epoch": 2.9900780125729, "grad_norm": 0.13521568477153778, "learning_rate": 9.40279453836922e-05, "loss": 1.2541, "mean_token_accuracy": 0.7597836315631866, "num_tokens": 666731085.0, "step": 9870 }, { "entropy": 0.8395397663116455, "epoch": 2.9931076270544574, "grad_norm": 0.1719292253255844, "learning_rate": 9.401210916506931e-05, "loss": 1.2468, "mean_token_accuracy": 0.7587765008211136, "num_tokens": 667396946.0, "step": 9880 }, { "entropy": 0.8458210021257401, "epoch": 2.9961372415360144, "grad_norm": 0.13949954509735107, "learning_rate": 9.399625331484404e-05, "loss": 1.2556, "mean_token_accuracy": 0.7590783119201661, "num_tokens": 668060048.0, "step": 9890 }, { "entropy": 0.8394185081124306, "epoch": 2.9991668560175717, "grad_norm": 0.11261207610368729, "learning_rate": 9.398037784008884e-05, "loss": 1.2522, "mean_token_accuracy": 0.7638598397374153, "num_tokens": 668730199.0, "step": 9900 }, { "entropy": 0.8383808151269571, "epoch": 3.00212073013709, "grad_norm": 0.11749439686536789, "learning_rate": 9.396448274788501e-05, "loss": 1.2364, "mean_token_accuracy": 0.7646774450937907, "num_tokens": 669393471.0, "step": 9910 }, { "entropy": 0.8383400484919548, "epoch": 3.0051503446186474, "grad_norm": 0.1319848746061325, "learning_rate": 9.394856804532253e-05, "loss": 1.2414, "mean_token_accuracy": 0.7644588783383369, "num_tokens": 670088022.0, "step": 9920 }, { "entropy": 0.8331081718206406, "epoch": 3.0081799591002043, "grad_norm": 0.11742068082094193, "learning_rate": 9.393263373950019e-05, "loss": 1.2302, "mean_token_accuracy": 0.7647489890456199, "num_tokens": 670768593.0, "step": 9930 }, { "entropy": 0.8413022235035896, "epoch": 3.0112095735817617, "grad_norm": 0.1186077669262886, "learning_rate": 9.391667983752545e-05, "loss": 1.2459, "mean_token_accuracy": 0.7609393790364265, "num_tokens": 671443863.0, "step": 9940 }, { "entropy": 0.846348412334919, "epoch": 3.014239188063319, "grad_norm": 0.13650287687778473, "learning_rate": 9.390070634651458e-05, "loss": 1.2441, "mean_token_accuracy": 0.7602412641048432, "num_tokens": 672137443.0, "step": 9950 }, { "entropy": 0.8487726628780365, "epoch": 3.017268802544876, "grad_norm": 0.1162915825843811, "learning_rate": 9.388471327359253e-05, "loss": 1.2464, "mean_token_accuracy": 0.7582907408475876, "num_tokens": 672819109.0, "step": 9960 }, { "entropy": 0.8265037685632706, "epoch": 3.0202984170264333, "grad_norm": 0.12178266048431396, "learning_rate": 9.386870062589301e-05, "loss": 1.2318, "mean_token_accuracy": 0.76134063154459, "num_tokens": 673478301.0, "step": 9970 }, { "entropy": 0.8373795002698898, "epoch": 3.0233280315079907, "grad_norm": 0.11897668987512589, "learning_rate": 9.385266841055849e-05, "loss": 1.2412, "mean_token_accuracy": 0.7585442930459976, "num_tokens": 674148719.0, "step": 9980 }, { "entropy": 0.8317443907260895, "epoch": 3.026357645989548, "grad_norm": 0.11980799585580826, "learning_rate": 9.383661663474011e-05, "loss": 1.2352, "mean_token_accuracy": 0.7605805337429047, "num_tokens": 674825439.0, "step": 9990 }, { "entropy": 0.8385573863983155, "epoch": 3.029387260471105, "grad_norm": 0.12173496186733246, "learning_rate": 9.382054530559779e-05, "loss": 1.2393, "mean_token_accuracy": 0.7657988607883454, "num_tokens": 675505171.0, "step": 10000 }, { "entropy": 0.8310229733586312, "epoch": 3.0324168749526623, "grad_norm": 0.10542073100805283, "learning_rate": 9.380445443030011e-05, "loss": 1.2316, "mean_token_accuracy": 0.7675056278705596, "num_tokens": 676199738.0, "step": 10010 }, { "entropy": 0.8390552252531052, "epoch": 3.0354464894342197, "grad_norm": 0.12213842570781708, "learning_rate": 9.378834401602444e-05, "loss": 1.2343, "mean_token_accuracy": 0.761818116903305, "num_tokens": 676876944.0, "step": 10020 }, { "entropy": 0.835042978823185, "epoch": 3.0384761039157766, "grad_norm": 0.11653364449739456, "learning_rate": 9.377221406995683e-05, "loss": 1.2393, "mean_token_accuracy": 0.759946870803833, "num_tokens": 677544094.0, "step": 10030 }, { "entropy": 0.8409670516848564, "epoch": 3.041505718397334, "grad_norm": 0.12055815011262894, "learning_rate": 9.375606459929202e-05, "loss": 1.2363, "mean_token_accuracy": 0.7609785079956055, "num_tokens": 678233082.0, "step": 10040 }, { "entropy": 0.8427327066659928, "epoch": 3.0445353328788913, "grad_norm": 0.13013114035129547, "learning_rate": 9.373989561123351e-05, "loss": 1.2578, "mean_token_accuracy": 0.7567624032497406, "num_tokens": 678895505.0, "step": 10050 }, { "entropy": 0.8381952315568924, "epoch": 3.0475649473604483, "grad_norm": 0.13608799874782562, "learning_rate": 9.372370711299344e-05, "loss": 1.2403, "mean_token_accuracy": 0.7570142805576324, "num_tokens": 679566458.0, "step": 10060 }, { "entropy": 0.8234174102544785, "epoch": 3.0505945618420056, "grad_norm": 0.11896231770515442, "learning_rate": 9.370749911179272e-05, "loss": 1.2314, "mean_token_accuracy": 0.764320008456707, "num_tokens": 680241292.0, "step": 10070 }, { "entropy": 0.8354604631662369, "epoch": 3.053624176323563, "grad_norm": 0.1257018893957138, "learning_rate": 9.369127161486093e-05, "loss": 1.2323, "mean_token_accuracy": 0.7660845428705215, "num_tokens": 680927192.0, "step": 10080 }, { "entropy": 0.8369146749377251, "epoch": 3.05665379080512, "grad_norm": 0.1210678443312645, "learning_rate": 9.367502462943634e-05, "loss": 1.2363, "mean_token_accuracy": 0.7641615822911263, "num_tokens": 681606369.0, "step": 10090 }, { "entropy": 0.834869273006916, "epoch": 3.0596834052866773, "grad_norm": 0.12320520728826523, "learning_rate": 9.36587581627659e-05, "loss": 1.2454, "mean_token_accuracy": 0.7614041447639466, "num_tokens": 682271842.0, "step": 10100 }, { "entropy": 0.8204829946160317, "epoch": 3.0627130197682346, "grad_norm": 0.1113852709531784, "learning_rate": 9.364247222210529e-05, "loss": 1.2235, "mean_token_accuracy": 0.7688616901636124, "num_tokens": 682962488.0, "step": 10110 }, { "entropy": 0.8474239587783814, "epoch": 3.0657426342497915, "grad_norm": 0.13586878776550293, "learning_rate": 9.362616681471885e-05, "loss": 1.2513, "mean_token_accuracy": 0.7578161045908928, "num_tokens": 683631167.0, "step": 10120 }, { "entropy": 0.8334722653031349, "epoch": 3.068772248731349, "grad_norm": 0.11796333640813828, "learning_rate": 9.360984194787958e-05, "loss": 1.2295, "mean_token_accuracy": 0.7639172211289406, "num_tokens": 684308264.0, "step": 10130 }, { "entropy": 0.8372927248477936, "epoch": 3.0718018632129063, "grad_norm": 0.12656259536743164, "learning_rate": 9.359349762886925e-05, "loss": 1.2366, "mean_token_accuracy": 0.7626647621393203, "num_tokens": 684987150.0, "step": 10140 }, { "entropy": 0.8288713663816452, "epoch": 3.074831477694463, "grad_norm": 0.11318030208349228, "learning_rate": 9.357713386497819e-05, "loss": 1.2382, "mean_token_accuracy": 0.7620742693543434, "num_tokens": 685650353.0, "step": 10150 }, { "entropy": 0.8404918104410172, "epoch": 3.0778610921760206, "grad_norm": 0.12882491946220398, "learning_rate": 9.356075066350547e-05, "loss": 1.2387, "mean_token_accuracy": 0.7626294881105423, "num_tokens": 686337380.0, "step": 10160 }, { "entropy": 0.8249937057495117, "epoch": 3.080890706657578, "grad_norm": 0.11957137286663055, "learning_rate": 9.35443480317588e-05, "loss": 1.2305, "mean_token_accuracy": 0.758065702021122, "num_tokens": 686999032.0, "step": 10170 }, { "entropy": 0.8310855448246002, "epoch": 3.083920321139135, "grad_norm": 0.12973418831825256, "learning_rate": 9.352792597705462e-05, "loss": 1.2318, "mean_token_accuracy": 0.7651878580451011, "num_tokens": 687684037.0, "step": 10180 }, { "entropy": 0.8338297083973885, "epoch": 3.086949935620692, "grad_norm": 0.13006281852722168, "learning_rate": 9.351148450671796e-05, "loss": 1.2387, "mean_token_accuracy": 0.7593083173036576, "num_tokens": 688341905.0, "step": 10190 }, { "entropy": 0.8397548958659172, "epoch": 3.0899795501022496, "grad_norm": 0.12187941372394562, "learning_rate": 9.349502362808255e-05, "loss": 1.242, "mean_token_accuracy": 0.7532096341252327, "num_tokens": 688989343.0, "step": 10200 }, { "entropy": 0.8340567663311959, "epoch": 3.093009164583807, "grad_norm": 0.12024606764316559, "learning_rate": 9.347854334849073e-05, "loss": 1.2446, "mean_token_accuracy": 0.759178264439106, "num_tokens": 689644171.0, "step": 10210 }, { "entropy": 0.8291385605931282, "epoch": 3.096038779065364, "grad_norm": 0.11374744027853012, "learning_rate": 9.346204367529359e-05, "loss": 1.2291, "mean_token_accuracy": 0.7584012091159821, "num_tokens": 690303957.0, "step": 10220 }, { "entropy": 0.8290810093283654, "epoch": 3.099068393546921, "grad_norm": 0.1235131099820137, "learning_rate": 9.344552461585074e-05, "loss": 1.2315, "mean_token_accuracy": 0.7684821531176567, "num_tokens": 691005466.0, "step": 10230 }, { "entropy": 0.8295746877789497, "epoch": 3.1020980080284786, "grad_norm": 0.12789379060268402, "learning_rate": 9.342898617753056e-05, "loss": 1.2258, "mean_token_accuracy": 0.7688084349036217, "num_tokens": 691706441.0, "step": 10240 }, { "entropy": 0.8276756659150124, "epoch": 3.1051276225100355, "grad_norm": 0.11145539581775665, "learning_rate": 9.341242836771e-05, "loss": 1.2299, "mean_token_accuracy": 0.7633999362587929, "num_tokens": 692381849.0, "step": 10250 }, { "entropy": 0.8328027233481408, "epoch": 3.108157236991593, "grad_norm": 0.11050871014595032, "learning_rate": 9.339585119377468e-05, "loss": 1.2355, "mean_token_accuracy": 0.7645954817533493, "num_tokens": 693057008.0, "step": 10260 }, { "entropy": 0.8326576486229896, "epoch": 3.11118685147315, "grad_norm": 0.12428499013185501, "learning_rate": 9.337925466311883e-05, "loss": 1.2403, "mean_token_accuracy": 0.7647704347968102, "num_tokens": 693731362.0, "step": 10270 }, { "entropy": 0.8436383217573166, "epoch": 3.114216465954707, "grad_norm": 0.12997333705425262, "learning_rate": 9.336263878314536e-05, "loss": 1.2481, "mean_token_accuracy": 0.7621228620409966, "num_tokens": 694396353.0, "step": 10280 }, { "entropy": 0.8330873399972916, "epoch": 3.1172460804362645, "grad_norm": 0.11834542453289032, "learning_rate": 9.334600356126575e-05, "loss": 1.2338, "mean_token_accuracy": 0.7599046424031257, "num_tokens": 695064359.0, "step": 10290 }, { "entropy": 0.8297991320490837, "epoch": 3.120275694917822, "grad_norm": 0.1220044493675232, "learning_rate": 9.332934900490018e-05, "loss": 1.2328, "mean_token_accuracy": 0.7637186720967293, "num_tokens": 695740694.0, "step": 10300 }, { "entropy": 0.835815292596817, "epoch": 3.123305309399379, "grad_norm": 0.12407717108726501, "learning_rate": 9.331267512147739e-05, "loss": 1.2361, "mean_token_accuracy": 0.7623964294791221, "num_tokens": 696426667.0, "step": 10310 }, { "entropy": 0.833484211564064, "epoch": 3.126334923880936, "grad_norm": 0.14744287729263306, "learning_rate": 9.329598191843478e-05, "loss": 1.2382, "mean_token_accuracy": 0.7577978476881981, "num_tokens": 697086202.0, "step": 10320 }, { "entropy": 0.8317460626363754, "epoch": 3.1293645383624935, "grad_norm": 0.11609450727701187, "learning_rate": 9.327926940321833e-05, "loss": 1.2422, "mean_token_accuracy": 0.7573364853858948, "num_tokens": 697735782.0, "step": 10330 }, { "entropy": 0.8358004689216614, "epoch": 3.1323941528440504, "grad_norm": 0.11458564549684525, "learning_rate": 9.326253758328269e-05, "loss": 1.242, "mean_token_accuracy": 0.7641695529222489, "num_tokens": 698409023.0, "step": 10340 }, { "entropy": 0.8372511133551598, "epoch": 3.135423767325608, "grad_norm": 0.14164815843105316, "learning_rate": 9.324578646609106e-05, "loss": 1.2385, "mean_token_accuracy": 0.7637021109461785, "num_tokens": 699084224.0, "step": 10350 }, { "entropy": 0.8225207179784775, "epoch": 3.138453381807165, "grad_norm": 0.12220578640699387, "learning_rate": 9.32290160591153e-05, "loss": 1.2345, "mean_token_accuracy": 0.7701056480407715, "num_tokens": 699787268.0, "step": 10360 }, { "entropy": 0.8347537323832512, "epoch": 3.141482996288722, "grad_norm": 0.1229391098022461, "learning_rate": 9.321222636983581e-05, "loss": 1.2374, "mean_token_accuracy": 0.7667005762457848, "num_tokens": 700482735.0, "step": 10370 }, { "entropy": 0.8306485459208488, "epoch": 3.1445126107702794, "grad_norm": 0.11771932244300842, "learning_rate": 9.319541740574169e-05, "loss": 1.2281, "mean_token_accuracy": 0.765276812016964, "num_tokens": 701165632.0, "step": 10380 }, { "entropy": 0.8387805730104446, "epoch": 3.147542225251837, "grad_norm": 0.11932957917451859, "learning_rate": 9.317858917433053e-05, "loss": 1.2384, "mean_token_accuracy": 0.7616775602102279, "num_tokens": 701847430.0, "step": 10390 }, { "entropy": 0.8426155775785447, "epoch": 3.1505718397333937, "grad_norm": 0.10999251157045364, "learning_rate": 9.31617416831086e-05, "loss": 1.2543, "mean_token_accuracy": 0.7565399438142777, "num_tokens": 702502651.0, "step": 10400 }, { "entropy": 0.8212842464447021, "epoch": 3.153601454214951, "grad_norm": 0.1370965838432312, "learning_rate": 9.31448749395907e-05, "loss": 1.2322, "mean_token_accuracy": 0.7635385498404503, "num_tokens": 703176499.0, "step": 10410 }, { "entropy": 0.8400916963815689, "epoch": 3.1566310686965084, "grad_norm": 0.11962023377418518, "learning_rate": 9.312798895130024e-05, "loss": 1.2352, "mean_token_accuracy": 0.7685959905385971, "num_tokens": 703883910.0, "step": 10420 }, { "entropy": 0.8473451912403107, "epoch": 3.159660683178066, "grad_norm": 0.13253596425056458, "learning_rate": 9.311108372576923e-05, "loss": 1.2481, "mean_token_accuracy": 0.7600852161645889, "num_tokens": 704558968.0, "step": 10430 }, { "entropy": 0.8328047186136246, "epoch": 3.1626902976596227, "grad_norm": 0.12212762981653214, "learning_rate": 9.309415927053825e-05, "loss": 1.2324, "mean_token_accuracy": 0.7633449226617813, "num_tokens": 705240129.0, "step": 10440 }, { "entropy": 0.8332623779773712, "epoch": 3.16571991214118, "grad_norm": 0.11805460602045059, "learning_rate": 9.307721559315644e-05, "loss": 1.2407, "mean_token_accuracy": 0.7617170751094818, "num_tokens": 705910419.0, "step": 10450 }, { "entropy": 0.8405819192528725, "epoch": 3.1687495266227375, "grad_norm": 0.11516073346138, "learning_rate": 9.30602527011815e-05, "loss": 1.2472, "mean_token_accuracy": 0.7590625390410424, "num_tokens": 706572226.0, "step": 10460 }, { "entropy": 0.8321726381778717, "epoch": 3.1717791411042944, "grad_norm": 0.11908750236034393, "learning_rate": 9.30432706021798e-05, "loss": 1.229, "mean_token_accuracy": 0.7679396644234657, "num_tokens": 707273378.0, "step": 10470 }, { "entropy": 0.8422200918197632, "epoch": 3.1748087555858517, "grad_norm": 0.12345686554908752, "learning_rate": 9.302626930372613e-05, "loss": 1.2456, "mean_token_accuracy": 0.7596239730715751, "num_tokens": 707941328.0, "step": 10480 }, { "entropy": 0.8314088940620422, "epoch": 3.177838370067409, "grad_norm": 0.10889705270528793, "learning_rate": 9.300924881340397e-05, "loss": 1.2275, "mean_token_accuracy": 0.7671215176582337, "num_tokens": 708635710.0, "step": 10490 }, { "entropy": 0.8333190977573395, "epoch": 3.180867984548966, "grad_norm": 0.11417516320943832, "learning_rate": 9.29922091388053e-05, "loss": 1.2368, "mean_token_accuracy": 0.7640294909477234, "num_tokens": 709313698.0, "step": 10500 }, { "entropy": 0.8399480730295181, "epoch": 3.1838975990305234, "grad_norm": 0.12141840904951096, "learning_rate": 9.297515028753065e-05, "loss": 1.2379, "mean_token_accuracy": 0.7629139348864555, "num_tokens": 710004897.0, "step": 10510 }, { "entropy": 0.8384797543287277, "epoch": 3.1869272135120807, "grad_norm": 0.17831021547317505, "learning_rate": 9.295807226718913e-05, "loss": 1.2441, "mean_token_accuracy": 0.7584279000759124, "num_tokens": 710660224.0, "step": 10520 }, { "entropy": 0.8309674605727195, "epoch": 3.1899568279936377, "grad_norm": 0.12027305364608765, "learning_rate": 9.294097508539841e-05, "loss": 1.2309, "mean_token_accuracy": 0.7637091666460037, "num_tokens": 711350685.0, "step": 10530 }, { "entropy": 0.8398581445217133, "epoch": 3.192986442475195, "grad_norm": 0.12445776909589767, "learning_rate": 9.292385874978467e-05, "loss": 1.2428, "mean_token_accuracy": 0.7635554268956184, "num_tokens": 712029223.0, "step": 10540 }, { "entropy": 0.8296842262148857, "epoch": 3.1960160569567524, "grad_norm": 0.11100509762763977, "learning_rate": 9.290672326798264e-05, "loss": 1.2374, "mean_token_accuracy": 0.7651006951928139, "num_tokens": 712718620.0, "step": 10550 }, { "entropy": 0.8360298171639442, "epoch": 3.1990456714383093, "grad_norm": 0.12844832241535187, "learning_rate": 9.288956864763564e-05, "loss": 1.2378, "mean_token_accuracy": 0.7630566358566284, "num_tokens": 713401466.0, "step": 10560 }, { "entropy": 0.8393256574869156, "epoch": 3.2020752859198667, "grad_norm": 0.13267198204994202, "learning_rate": 9.287239489639545e-05, "loss": 1.2478, "mean_token_accuracy": 0.7581466615200043, "num_tokens": 714065572.0, "step": 10570 }, { "entropy": 0.8247277021408081, "epoch": 3.205104900401424, "grad_norm": 0.11070942878723145, "learning_rate": 9.285520202192244e-05, "loss": 1.2358, "mean_token_accuracy": 0.760840256512165, "num_tokens": 714725833.0, "step": 10580 }, { "entropy": 0.832079616189003, "epoch": 3.208134514882981, "grad_norm": 0.11494448035955429, "learning_rate": 9.283799003188549e-05, "loss": 1.2348, "mean_token_accuracy": 0.7642373278737068, "num_tokens": 715410879.0, "step": 10590 }, { "entropy": 0.832519993185997, "epoch": 3.2111641293645383, "grad_norm": 0.1271791309118271, "learning_rate": 9.2820758933962e-05, "loss": 1.2334, "mean_token_accuracy": 0.7576231256127357, "num_tokens": 716079706.0, "step": 10600 }, { "entropy": 0.8424416229128837, "epoch": 3.2141937438460957, "grad_norm": 0.1314493715763092, "learning_rate": 9.280350873583792e-05, "loss": 1.2455, "mean_token_accuracy": 0.7570266142487526, "num_tokens": 716742397.0, "step": 10610 }, { "entropy": 0.8290569439530373, "epoch": 3.2172233583276526, "grad_norm": 0.13425303995609283, "learning_rate": 9.27862394452077e-05, "loss": 1.2326, "mean_token_accuracy": 0.7638805478811264, "num_tokens": 717422160.0, "step": 10620 }, { "entropy": 0.8295670047402381, "epoch": 3.22025297280921, "grad_norm": 0.11815513670444489, "learning_rate": 9.276895106977428e-05, "loss": 1.2295, "mean_token_accuracy": 0.7663416728377342, "num_tokens": 718108091.0, "step": 10630 }, { "entropy": 0.8376719772815704, "epoch": 3.2232825872907673, "grad_norm": 0.13028369843959808, "learning_rate": 9.275164361724917e-05, "loss": 1.2336, "mean_token_accuracy": 0.7605687156319618, "num_tokens": 718791576.0, "step": 10640 }, { "entropy": 0.8381056413054466, "epoch": 3.2263122017723243, "grad_norm": 0.155443012714386, "learning_rate": 9.273431709535235e-05, "loss": 1.2366, "mean_token_accuracy": 0.7605232700705529, "num_tokens": 719472329.0, "step": 10650 }, { "entropy": 0.8404705911874771, "epoch": 3.2293418162538816, "grad_norm": 0.11871460825204849, "learning_rate": 9.271697151181231e-05, "loss": 1.2388, "mean_token_accuracy": 0.7651655808091163, "num_tokens": 720153666.0, "step": 10660 }, { "entropy": 0.8290793329477311, "epoch": 3.232371430735439, "grad_norm": 0.12072983384132385, "learning_rate": 9.269960687436606e-05, "loss": 1.234, "mean_token_accuracy": 0.7592705100774765, "num_tokens": 720811003.0, "step": 10670 }, { "entropy": 0.8383875161409378, "epoch": 3.2354010452169963, "grad_norm": 0.1183423399925232, "learning_rate": 9.268222319075906e-05, "loss": 1.2426, "mean_token_accuracy": 0.761049710214138, "num_tokens": 721495710.0, "step": 10680 }, { "entropy": 0.8362769678235054, "epoch": 3.2384306596985533, "grad_norm": 0.12930332124233246, "learning_rate": 9.266482046874536e-05, "loss": 1.239, "mean_token_accuracy": 0.7631333693861961, "num_tokens": 722178590.0, "step": 10690 }, { "entropy": 0.8385419920086861, "epoch": 3.2414602741801106, "grad_norm": 0.11784379929304123, "learning_rate": 9.26473987160874e-05, "loss": 1.2395, "mean_token_accuracy": 0.7591364905238152, "num_tokens": 722847005.0, "step": 10700 }, { "entropy": 0.832073763012886, "epoch": 3.244489888661668, "grad_norm": 0.1162286102771759, "learning_rate": 9.262995794055618e-05, "loss": 1.2252, "mean_token_accuracy": 0.768974381685257, "num_tokens": 723548894.0, "step": 10710 }, { "entropy": 0.8261068508028984, "epoch": 3.247519503143225, "grad_norm": 0.12522152066230774, "learning_rate": 9.261249814993115e-05, "loss": 1.2191, "mean_token_accuracy": 0.7679961457848549, "num_tokens": 724244841.0, "step": 10720 }, { "entropy": 0.8232755184173584, "epoch": 3.2505491176247823, "grad_norm": 0.1232825443148613, "learning_rate": 9.259501935200024e-05, "loss": 1.2253, "mean_token_accuracy": 0.7634338319301606, "num_tokens": 724914821.0, "step": 10730 }, { "entropy": 0.8415536910295487, "epoch": 3.2535787321063396, "grad_norm": 0.1149967685341835, "learning_rate": 9.25775215545599e-05, "loss": 1.2431, "mean_token_accuracy": 0.761506487429142, "num_tokens": 725590181.0, "step": 10740 }, { "entropy": 0.8311128839850426, "epoch": 3.2566083465878966, "grad_norm": 0.11484793573617935, "learning_rate": 9.256000476541496e-05, "loss": 1.2373, "mean_token_accuracy": 0.7669485747814179, "num_tokens": 726266920.0, "step": 10750 }, { "entropy": 0.8289522245526314, "epoch": 3.259637961069454, "grad_norm": 0.1118326410651207, "learning_rate": 9.254246899237885e-05, "loss": 1.2284, "mean_token_accuracy": 0.7667224913835525, "num_tokens": 726950506.0, "step": 10760 }, { "entropy": 0.8402706578373909, "epoch": 3.2626675755510113, "grad_norm": 0.12299591302871704, "learning_rate": 9.252491424327337e-05, "loss": 1.2511, "mean_token_accuracy": 0.7580578938126564, "num_tokens": 727616277.0, "step": 10770 }, { "entropy": 0.8413041412830353, "epoch": 3.265697190032568, "grad_norm": 0.1593431681394577, "learning_rate": 9.250734052592883e-05, "loss": 1.2417, "mean_token_accuracy": 0.7616387084126472, "num_tokens": 728301131.0, "step": 10780 }, { "entropy": 0.833856324851513, "epoch": 3.2687268045141256, "grad_norm": 0.11823420226573944, "learning_rate": 9.248974784818396e-05, "loss": 1.2339, "mean_token_accuracy": 0.7589184165000915, "num_tokens": 728961023.0, "step": 10790 }, { "entropy": 0.8375367939472198, "epoch": 3.271756418995683, "grad_norm": 0.1314859688282013, "learning_rate": 9.2472136217886e-05, "loss": 1.2383, "mean_token_accuracy": 0.7644320324063301, "num_tokens": 729645651.0, "step": 10800 }, { "entropy": 0.8438203558325768, "epoch": 3.27478603347724, "grad_norm": 0.11647716164588928, "learning_rate": 9.245450564289061e-05, "loss": 1.2434, "mean_token_accuracy": 0.7553828537464142, "num_tokens": 730314758.0, "step": 10810 }, { "entropy": 0.8331206291913986, "epoch": 3.277815647958797, "grad_norm": 0.11722481995820999, "learning_rate": 9.243685613106191e-05, "loss": 1.2336, "mean_token_accuracy": 0.7630361914634705, "num_tokens": 730998053.0, "step": 10820 }, { "entropy": 0.8310005441308022, "epoch": 3.2808452624403546, "grad_norm": 0.1268925815820694, "learning_rate": 9.241918769027247e-05, "loss": 1.2402, "mean_token_accuracy": 0.7594552874565125, "num_tokens": 731664781.0, "step": 10830 }, { "entropy": 0.8313428923487663, "epoch": 3.283874876921912, "grad_norm": 0.12375863641500473, "learning_rate": 9.240150032840329e-05, "loss": 1.2356, "mean_token_accuracy": 0.7621963039040566, "num_tokens": 732347797.0, "step": 10840 }, { "entropy": 0.8397682949900627, "epoch": 3.286904491403469, "grad_norm": 0.1231660544872284, "learning_rate": 9.238379405334382e-05, "loss": 1.2365, "mean_token_accuracy": 0.7627838492393494, "num_tokens": 733028509.0, "step": 10850 }, { "entropy": 0.8354284122586251, "epoch": 3.289934105885026, "grad_norm": 0.1508273482322693, "learning_rate": 9.236606887299195e-05, "loss": 1.2431, "mean_token_accuracy": 0.7645542830228805, "num_tokens": 733705677.0, "step": 10860 }, { "entropy": 0.8260786205530166, "epoch": 3.2929637203665836, "grad_norm": 0.12614572048187256, "learning_rate": 9.2348324795254e-05, "loss": 1.2337, "mean_token_accuracy": 0.7677811950445175, "num_tokens": 734388784.0, "step": 10870 }, { "entropy": 0.8300410374999047, "epoch": 3.2959933348481405, "grad_norm": 0.1307694911956787, "learning_rate": 9.233056182804469e-05, "loss": 1.2478, "mean_token_accuracy": 0.7653857231140136, "num_tokens": 735055958.0, "step": 10880 }, { "entropy": 0.8297205582261086, "epoch": 3.299022949329698, "grad_norm": 0.12933626770973206, "learning_rate": 9.231277997928722e-05, "loss": 1.2322, "mean_token_accuracy": 0.7599256366491318, "num_tokens": 735725145.0, "step": 10890 }, { "entropy": 0.8447857826948166, "epoch": 3.3020525638112552, "grad_norm": 0.11631935834884644, "learning_rate": 9.229497925691318e-05, "loss": 1.2409, "mean_token_accuracy": 0.7583971887826919, "num_tokens": 736402252.0, "step": 10900 }, { "entropy": 0.8415904492139816, "epoch": 3.305082178292812, "grad_norm": 0.12990765273571014, "learning_rate": 9.227715966886257e-05, "loss": 1.2405, "mean_token_accuracy": 0.7592979416251182, "num_tokens": 737086144.0, "step": 10910 }, { "entropy": 0.8309503868222237, "epoch": 3.3081117927743695, "grad_norm": 0.13030767440795898, "learning_rate": 9.225932122308384e-05, "loss": 1.2364, "mean_token_accuracy": 0.7657891899347306, "num_tokens": 737767932.0, "step": 10920 }, { "entropy": 0.8368337139487266, "epoch": 3.311141407255927, "grad_norm": 0.12232133001089096, "learning_rate": 9.224146392753381e-05, "loss": 1.2416, "mean_token_accuracy": 0.7590382680296898, "num_tokens": 738429360.0, "step": 10930 }, { "entropy": 0.8323225796222686, "epoch": 3.314171021737484, "grad_norm": 0.115901418030262, "learning_rate": 9.222358779017772e-05, "loss": 1.232, "mean_token_accuracy": 0.7627633005380631, "num_tokens": 739113087.0, "step": 10940 }, { "entropy": 0.8303186953067779, "epoch": 3.317200636219041, "grad_norm": 0.13112275302410126, "learning_rate": 9.220569281898925e-05, "loss": 1.2341, "mean_token_accuracy": 0.7631768271327019, "num_tokens": 739780446.0, "step": 10950 }, { "entropy": 0.8251298353075981, "epoch": 3.3202302507005985, "grad_norm": 0.14712879061698914, "learning_rate": 9.218777902195043e-05, "loss": 1.24, "mean_token_accuracy": 0.7636583000421524, "num_tokens": 740454648.0, "step": 10960 }, { "entropy": 0.8412736356258392, "epoch": 3.3232598651821554, "grad_norm": 0.11796010285615921, "learning_rate": 9.216984640705171e-05, "loss": 1.2435, "mean_token_accuracy": 0.7623559609055519, "num_tokens": 741138483.0, "step": 10970 }, { "entropy": 0.8381548568606376, "epoch": 3.326289479663713, "grad_norm": 0.12395946681499481, "learning_rate": 9.215189498229194e-05, "loss": 1.2378, "mean_token_accuracy": 0.7647039264440536, "num_tokens": 741815394.0, "step": 10980 }, { "entropy": 0.8362372472882271, "epoch": 3.32931909414527, "grad_norm": 0.1348508596420288, "learning_rate": 9.213392475567836e-05, "loss": 1.2352, "mean_token_accuracy": 0.7623049661517143, "num_tokens": 742492113.0, "step": 10990 }, { "entropy": 0.8433797970414162, "epoch": 3.332348708626827, "grad_norm": 0.12744256854057312, "learning_rate": 9.211593573522658e-05, "loss": 1.2502, "mean_token_accuracy": 0.7595667660236358, "num_tokens": 743163984.0, "step": 11000 }, { "entropy": 0.8397553741931916, "epoch": 3.3353783231083844, "grad_norm": 0.11812698841094971, "learning_rate": 9.20979279289606e-05, "loss": 1.2466, "mean_token_accuracy": 0.7593050569295883, "num_tokens": 743828395.0, "step": 11010 }, { "entropy": 0.8322840496897698, "epoch": 3.338407937589942, "grad_norm": 0.11905736476182938, "learning_rate": 9.207990134491281e-05, "loss": 1.2265, "mean_token_accuracy": 0.7691733837127686, "num_tokens": 744527699.0, "step": 11020 }, { "entropy": 0.8369256645441056, "epoch": 3.3414375520714987, "grad_norm": 0.11343682557344437, "learning_rate": 9.206185599112397e-05, "loss": 1.2382, "mean_token_accuracy": 0.7615094974637031, "num_tokens": 745205385.0, "step": 11030 }, { "entropy": 0.8328930526971817, "epoch": 3.344467166553056, "grad_norm": 0.12398137152194977, "learning_rate": 9.204379187564321e-05, "loss": 1.2451, "mean_token_accuracy": 0.7567608639597893, "num_tokens": 745856589.0, "step": 11040 }, { "entropy": 0.8329830259084702, "epoch": 3.3474967810346135, "grad_norm": 0.12614665925502777, "learning_rate": 9.202570900652801e-05, "loss": 1.234, "mean_token_accuracy": 0.7624053329229354, "num_tokens": 746527617.0, "step": 11050 }, { "entropy": 0.8424591690301895, "epoch": 3.3505263955161704, "grad_norm": 0.12217242270708084, "learning_rate": 9.200760739184426e-05, "loss": 1.2453, "mean_token_accuracy": 0.7612332418560982, "num_tokens": 747202230.0, "step": 11060 }, { "entropy": 0.8356316089630127, "epoch": 3.3535560099977277, "grad_norm": 0.13591527938842773, "learning_rate": 9.198948703966618e-05, "loss": 1.2388, "mean_token_accuracy": 0.7614769533276557, "num_tokens": 747886885.0, "step": 11070 }, { "entropy": 0.8369050204753876, "epoch": 3.356585624479285, "grad_norm": 0.1345531940460205, "learning_rate": 9.197134795807634e-05, "loss": 1.2425, "mean_token_accuracy": 0.7628663569688797, "num_tokens": 748562805.0, "step": 11080 }, { "entropy": 0.8288183137774467, "epoch": 3.359615238960842, "grad_norm": 0.1224965900182724, "learning_rate": 9.19531901551657e-05, "loss": 1.2309, "mean_token_accuracy": 0.7656287491321564, "num_tokens": 749244768.0, "step": 11090 }, { "entropy": 0.8335251197218895, "epoch": 3.3626448534423994, "grad_norm": 0.11962300539016724, "learning_rate": 9.193501363903351e-05, "loss": 1.24, "mean_token_accuracy": 0.7606292203068733, "num_tokens": 749918649.0, "step": 11100 }, { "entropy": 0.8271193683147431, "epoch": 3.3656744679239567, "grad_norm": 0.12110050022602081, "learning_rate": 9.191681841778746e-05, "loss": 1.2358, "mean_token_accuracy": 0.7678375959396362, "num_tokens": 750608272.0, "step": 11110 }, { "entropy": 0.8416226848959922, "epoch": 3.3687040824055137, "grad_norm": 0.13516546785831451, "learning_rate": 9.189860449954349e-05, "loss": 1.2448, "mean_token_accuracy": 0.7645494818687439, "num_tokens": 751304553.0, "step": 11120 }, { "entropy": 0.8248679772019386, "epoch": 3.371733696887071, "grad_norm": 0.12417922168970108, "learning_rate": 9.188037189242593e-05, "loss": 1.2361, "mean_token_accuracy": 0.7616899490356446, "num_tokens": 751967652.0, "step": 11130 }, { "entropy": 0.8328699260950089, "epoch": 3.3747633113686284, "grad_norm": 0.12202231585979462, "learning_rate": 9.186212060456743e-05, "loss": 1.2296, "mean_token_accuracy": 0.76222343146801, "num_tokens": 752645899.0, "step": 11140 }, { "entropy": 0.8371228188276291, "epoch": 3.3777929258501858, "grad_norm": 0.1218470111489296, "learning_rate": 9.184385064410898e-05, "loss": 1.2519, "mean_token_accuracy": 0.7579439014196396, "num_tokens": 753300276.0, "step": 11150 }, { "entropy": 0.8341500461101532, "epoch": 3.3808225403317427, "grad_norm": 0.1183963418006897, "learning_rate": 9.182556201919989e-05, "loss": 1.2268, "mean_token_accuracy": 0.764517468214035, "num_tokens": 753982832.0, "step": 11160 }, { "entropy": 0.8343546494841576, "epoch": 3.3838521548133, "grad_norm": 0.11850113421678543, "learning_rate": 9.180725473799782e-05, "loss": 1.2323, "mean_token_accuracy": 0.7607134401798248, "num_tokens": 754655077.0, "step": 11170 }, { "entropy": 0.8380969345569611, "epoch": 3.3868817692948574, "grad_norm": 0.12320259213447571, "learning_rate": 9.178892880866871e-05, "loss": 1.2452, "mean_token_accuracy": 0.7631853714585304, "num_tokens": 755330996.0, "step": 11180 }, { "entropy": 0.8282105028629303, "epoch": 3.3899113837764143, "grad_norm": 0.12607748806476593, "learning_rate": 9.177058423938687e-05, "loss": 1.23, "mean_token_accuracy": 0.7632102146744728, "num_tokens": 755999983.0, "step": 11190 }, { "entropy": 0.8313679173588753, "epoch": 3.3929409982579717, "grad_norm": 0.12297471612691879, "learning_rate": 9.175222103833488e-05, "loss": 1.2331, "mean_token_accuracy": 0.7653903663158417, "num_tokens": 756675895.0, "step": 11200 }, { "entropy": 0.8348143219947814, "epoch": 3.395970612739529, "grad_norm": 0.12022623419761658, "learning_rate": 9.173383921370363e-05, "loss": 1.2302, "mean_token_accuracy": 0.7649526372551918, "num_tokens": 757359109.0, "step": 11210 }, { "entropy": 0.8278435617685318, "epoch": 3.399000227221086, "grad_norm": 0.1261090785264969, "learning_rate": 9.171543877369236e-05, "loss": 1.2347, "mean_token_accuracy": 0.7597841456532478, "num_tokens": 758030443.0, "step": 11220 }, { "entropy": 0.834585751593113, "epoch": 3.4020298417026433, "grad_norm": 0.12466879189014435, "learning_rate": 9.169701972650857e-05, "loss": 1.2333, "mean_token_accuracy": 0.7644132405519486, "num_tokens": 758709443.0, "step": 11230 }, { "entropy": 0.8404792815446853, "epoch": 3.4050594561842007, "grad_norm": 0.1162450760602951, "learning_rate": 9.167858208036808e-05, "loss": 1.2398, "mean_token_accuracy": 0.7603865653276444, "num_tokens": 759385292.0, "step": 11240 }, { "entropy": 0.8283190071582794, "epoch": 3.4080890706657576, "grad_norm": 0.11930322647094727, "learning_rate": 9.166012584349502e-05, "loss": 1.2418, "mean_token_accuracy": 0.7585573822259903, "num_tokens": 760040634.0, "step": 11250 }, { "entropy": 0.8307083159685135, "epoch": 3.411118685147315, "grad_norm": 0.1354369968175888, "learning_rate": 9.164165102412175e-05, "loss": 1.2406, "mean_token_accuracy": 0.7601440161466598, "num_tokens": 760713312.0, "step": 11260 }, { "entropy": 0.8361027002334595, "epoch": 3.4141482996288723, "grad_norm": 0.1479794681072235, "learning_rate": 9.162315763048901e-05, "loss": 1.2421, "mean_token_accuracy": 0.7593563422560692, "num_tokens": 761374551.0, "step": 11270 }, { "entropy": 0.8356462597846985, "epoch": 3.4171779141104293, "grad_norm": 0.12720368802547455, "learning_rate": 9.160464567084577e-05, "loss": 1.2375, "mean_token_accuracy": 0.7608398467302322, "num_tokens": 762054075.0, "step": 11280 }, { "entropy": 0.8298819020390511, "epoch": 3.4202075285919866, "grad_norm": 0.1229054182767868, "learning_rate": 9.158611515344926e-05, "loss": 1.2251, "mean_token_accuracy": 0.7662895604968071, "num_tokens": 762742531.0, "step": 11290 }, { "entropy": 0.8287305623292923, "epoch": 3.423237143073544, "grad_norm": 0.12051542103290558, "learning_rate": 9.156756608656506e-05, "loss": 1.2386, "mean_token_accuracy": 0.7643614545464515, "num_tokens": 763413614.0, "step": 11300 }, { "entropy": 0.8374639958143234, "epoch": 3.4262667575551013, "grad_norm": 0.127227321267128, "learning_rate": 9.154899847846694e-05, "loss": 1.2393, "mean_token_accuracy": 0.7565767720341683, "num_tokens": 764071880.0, "step": 11310 }, { "entropy": 0.827917568385601, "epoch": 3.4292963720366583, "grad_norm": 0.1351393312215805, "learning_rate": 9.153041233743702e-05, "loss": 1.235, "mean_token_accuracy": 0.7639175683259964, "num_tokens": 764743387.0, "step": 11320 }, { "entropy": 0.8264069110155106, "epoch": 3.4323259865182156, "grad_norm": 0.13566623628139496, "learning_rate": 9.151180767176563e-05, "loss": 1.2372, "mean_token_accuracy": 0.7655463546514512, "num_tokens": 765419689.0, "step": 11330 }, { "entropy": 0.8286457657814026, "epoch": 3.435355600999773, "grad_norm": 0.12399102747440338, "learning_rate": 9.149318448975138e-05, "loss": 1.2383, "mean_token_accuracy": 0.7649675861001015, "num_tokens": 766100907.0, "step": 11340 }, { "entropy": 0.8278289854526519, "epoch": 3.43838521548133, "grad_norm": 0.12072256207466125, "learning_rate": 9.147454279970114e-05, "loss": 1.2217, "mean_token_accuracy": 0.7657185092568397, "num_tokens": 766783645.0, "step": 11350 }, { "entropy": 0.8294686049222946, "epoch": 3.4414148299628873, "grad_norm": 0.1228259727358818, "learning_rate": 9.145588260993006e-05, "loss": 1.2303, "mean_token_accuracy": 0.7650302588939667, "num_tokens": 767461938.0, "step": 11360 }, { "entropy": 0.8382543668150901, "epoch": 3.4444444444444446, "grad_norm": 0.1262565404176712, "learning_rate": 9.143720392876148e-05, "loss": 1.2309, "mean_token_accuracy": 0.7626958236098289, "num_tokens": 768143985.0, "step": 11370 }, { "entropy": 0.8315334230661392, "epoch": 3.4474740589260016, "grad_norm": 0.12471026182174683, "learning_rate": 9.141850676452704e-05, "loss": 1.2347, "mean_token_accuracy": 0.7572854548692703, "num_tokens": 768805960.0, "step": 11380 }, { "entropy": 0.8320801615715027, "epoch": 3.450503673407559, "grad_norm": 0.11635059863328934, "learning_rate": 9.139979112556662e-05, "loss": 1.2444, "mean_token_accuracy": 0.7593884602189064, "num_tokens": 769470007.0, "step": 11390 }, { "entropy": 0.8333582326769828, "epoch": 3.4535332878891163, "grad_norm": 0.11514585465192795, "learning_rate": 9.138105702022834e-05, "loss": 1.2366, "mean_token_accuracy": 0.7615718305110931, "num_tokens": 770134661.0, "step": 11400 }, { "entropy": 0.8287696212530136, "epoch": 3.456562902370673, "grad_norm": 0.11926062405109406, "learning_rate": 9.136230445686853e-05, "loss": 1.2302, "mean_token_accuracy": 0.7648595631122589, "num_tokens": 770811961.0, "step": 11410 }, { "entropy": 0.8429672375321389, "epoch": 3.4595925168522306, "grad_norm": 0.12452052533626556, "learning_rate": 9.134353344385176e-05, "loss": 1.2458, "mean_token_accuracy": 0.7575023502111435, "num_tokens": 771468667.0, "step": 11420 }, { "entropy": 0.844814071059227, "epoch": 3.462622131333788, "grad_norm": 0.13977554440498352, "learning_rate": 9.132474398955087e-05, "loss": 1.2402, "mean_token_accuracy": 0.7594343468546867, "num_tokens": 772150584.0, "step": 11430 }, { "entropy": 0.8297743394970893, "epoch": 3.465651745815345, "grad_norm": 0.12803715467453003, "learning_rate": 9.130593610234689e-05, "loss": 1.2274, "mean_token_accuracy": 0.7670684143900871, "num_tokens": 772834313.0, "step": 11440 }, { "entropy": 0.8327987894415856, "epoch": 3.468681360296902, "grad_norm": 0.1271650195121765, "learning_rate": 9.128710979062907e-05, "loss": 1.2362, "mean_token_accuracy": 0.7619610667228699, "num_tokens": 773511436.0, "step": 11450 }, { "entropy": 0.8360430836677551, "epoch": 3.4717109747784596, "grad_norm": 0.12318103760480881, "learning_rate": 9.126826506279487e-05, "loss": 1.2331, "mean_token_accuracy": 0.7648727923631669, "num_tokens": 774192858.0, "step": 11460 }, { "entropy": 0.8295892536640167, "epoch": 3.4747405892600165, "grad_norm": 0.11842337995767593, "learning_rate": 9.124940192725002e-05, "loss": 1.2365, "mean_token_accuracy": 0.7600327342748642, "num_tokens": 774868822.0, "step": 11470 }, { "entropy": 0.8435277476906776, "epoch": 3.477770203741574, "grad_norm": 0.1237192377448082, "learning_rate": 9.12305203924084e-05, "loss": 1.2435, "mean_token_accuracy": 0.7555145010352134, "num_tokens": 775527906.0, "step": 11480 }, { "entropy": 0.8364113673567772, "epoch": 3.4807998182231312, "grad_norm": 0.1254260092973709, "learning_rate": 9.121162046669212e-05, "loss": 1.2405, "mean_token_accuracy": 0.7616367593407631, "num_tokens": 776210142.0, "step": 11490 }, { "entropy": 0.8302595242857933, "epoch": 3.483829432704688, "grad_norm": 0.122586190700531, "learning_rate": 9.119270215853149e-05, "loss": 1.2376, "mean_token_accuracy": 0.7600247159600257, "num_tokens": 776873062.0, "step": 11500 }, { "entropy": 0.839820210635662, "epoch": 3.4868590471862455, "grad_norm": 0.13961449265480042, "learning_rate": 9.117376547636502e-05, "loss": 1.2391, "mean_token_accuracy": 0.7607545420527458, "num_tokens": 777541322.0, "step": 11510 }, { "entropy": 0.8306924447417259, "epoch": 3.489888661667803, "grad_norm": 0.1306026726961136, "learning_rate": 9.115481042863943e-05, "loss": 1.2321, "mean_token_accuracy": 0.7651106625795364, "num_tokens": 778221429.0, "step": 11520 }, { "entropy": 0.8247424677014351, "epoch": 3.49291827614936, "grad_norm": 0.1263517290353775, "learning_rate": 9.113583702380961e-05, "loss": 1.2284, "mean_token_accuracy": 0.7655428737401963, "num_tokens": 778898343.0, "step": 11530 }, { "entropy": 0.8408169224858284, "epoch": 3.495947890630917, "grad_norm": 0.1276242733001709, "learning_rate": 9.111684527033865e-05, "loss": 1.2423, "mean_token_accuracy": 0.7609571158885956, "num_tokens": 779576822.0, "step": 11540 }, { "entropy": 0.8287448465824128, "epoch": 3.4989775051124745, "grad_norm": 0.1321870982646942, "learning_rate": 9.109783517669783e-05, "loss": 1.2261, "mean_token_accuracy": 0.7656113281846046, "num_tokens": 780263497.0, "step": 11550 }, { "entropy": 0.828518845140934, "epoch": 3.5020071195940314, "grad_norm": 0.12340894341468811, "learning_rate": 9.107880675136658e-05, "loss": 1.2318, "mean_token_accuracy": 0.7637648627161979, "num_tokens": 780945082.0, "step": 11560 }, { "entropy": 0.8328303188085556, "epoch": 3.505036734075589, "grad_norm": 0.11579950153827667, "learning_rate": 9.105976000283256e-05, "loss": 1.2408, "mean_token_accuracy": 0.7612169280648231, "num_tokens": 781613662.0, "step": 11570 }, { "entropy": 0.8312623202800751, "epoch": 3.508066348557146, "grad_norm": 0.1133250892162323, "learning_rate": 9.104069493959156e-05, "loss": 1.2367, "mean_token_accuracy": 0.7617637991905213, "num_tokens": 782288520.0, "step": 11580 }, { "entropy": 0.8360737174749374, "epoch": 3.511095963038703, "grad_norm": 0.1433398276567459, "learning_rate": 9.102161157014753e-05, "loss": 1.2361, "mean_token_accuracy": 0.7610009625554085, "num_tokens": 782964782.0, "step": 11590 }, { "entropy": 0.8245732337236404, "epoch": 3.5141255775202604, "grad_norm": 0.12671203911304474, "learning_rate": 9.100250990301264e-05, "loss": 1.2356, "mean_token_accuracy": 0.7634012997150421, "num_tokens": 783630482.0, "step": 11600 }, { "entropy": 0.8308870613574981, "epoch": 3.517155192001818, "grad_norm": 0.12995724380016327, "learning_rate": 9.098338994670719e-05, "loss": 1.2299, "mean_token_accuracy": 0.7653521984815598, "num_tokens": 784312380.0, "step": 11610 }, { "entropy": 0.8430249497294426, "epoch": 3.5201848064833747, "grad_norm": 0.1239878460764885, "learning_rate": 9.09642517097596e-05, "loss": 1.2396, "mean_token_accuracy": 0.7610897243022918, "num_tokens": 784995872.0, "step": 11620 }, { "entropy": 0.8321187064051628, "epoch": 3.523214420964932, "grad_norm": 0.1331980973482132, "learning_rate": 9.094509520070651e-05, "loss": 1.2354, "mean_token_accuracy": 0.763390064239502, "num_tokens": 785668728.0, "step": 11630 }, { "entropy": 0.8272807270288467, "epoch": 3.5262440354464895, "grad_norm": 0.12257704138755798, "learning_rate": 9.092592042809267e-05, "loss": 1.2327, "mean_token_accuracy": 0.7632969573140145, "num_tokens": 786344132.0, "step": 11640 }, { "entropy": 0.8298324644565582, "epoch": 3.529273649928047, "grad_norm": 0.14654377102851868, "learning_rate": 9.090672740047102e-05, "loss": 1.2352, "mean_token_accuracy": 0.7644375175237655, "num_tokens": 787024814.0, "step": 11650 }, { "entropy": 0.8290773898363113, "epoch": 3.5323032644096037, "grad_norm": 0.1257285624742508, "learning_rate": 9.088751612640255e-05, "loss": 1.2287, "mean_token_accuracy": 0.7638806805014611, "num_tokens": 787705829.0, "step": 11660 }, { "entropy": 0.8311083659529686, "epoch": 3.535332878891161, "grad_norm": 0.1146702840924263, "learning_rate": 9.08682866144565e-05, "loss": 1.2356, "mean_token_accuracy": 0.759133493900299, "num_tokens": 788369869.0, "step": 11670 }, { "entropy": 0.834751708805561, "epoch": 3.5383624933727185, "grad_norm": 0.12474443763494492, "learning_rate": 9.084903887321016e-05, "loss": 1.2401, "mean_token_accuracy": 0.7602774262428283, "num_tokens": 789042854.0, "step": 11680 }, { "entropy": 0.8291317448019981, "epoch": 3.5413921078542754, "grad_norm": 0.12132880091667175, "learning_rate": 9.082977291124903e-05, "loss": 1.2371, "mean_token_accuracy": 0.7625534549355507, "num_tokens": 789707404.0, "step": 11690 }, { "entropy": 0.8243643850088119, "epoch": 3.5444217223358327, "grad_norm": 0.13487163186073303, "learning_rate": 9.081048873716666e-05, "loss": 1.2281, "mean_token_accuracy": 0.763888205587864, "num_tokens": 790373929.0, "step": 11700 }, { "entropy": 0.8182727739214897, "epoch": 3.54745133681739, "grad_norm": 0.11959255486726761, "learning_rate": 9.079118635956473e-05, "loss": 1.2266, "mean_token_accuracy": 0.7675241082906723, "num_tokens": 791052543.0, "step": 11710 }, { "entropy": 0.8281921803951263, "epoch": 3.5504809512989475, "grad_norm": 0.12217836081981659, "learning_rate": 9.077186578705314e-05, "loss": 1.2443, "mean_token_accuracy": 0.7592179551720619, "num_tokens": 791705086.0, "step": 11720 }, { "entropy": 0.838127113878727, "epoch": 3.5535105657805044, "grad_norm": 0.1235826388001442, "learning_rate": 9.075252702824976e-05, "loss": 1.2424, "mean_token_accuracy": 0.7577555432915688, "num_tokens": 792372054.0, "step": 11730 }, { "entropy": 0.8330118611454964, "epoch": 3.5565401802620618, "grad_norm": 0.11890354752540588, "learning_rate": 9.073317009178068e-05, "loss": 1.237, "mean_token_accuracy": 0.7636782974004745, "num_tokens": 793049582.0, "step": 11740 }, { "entropy": 0.8319677591323853, "epoch": 3.559569794743619, "grad_norm": 0.12073475867509842, "learning_rate": 9.071379498628008e-05, "loss": 1.2451, "mean_token_accuracy": 0.7606442883610726, "num_tokens": 793718001.0, "step": 11750 }, { "entropy": 0.8443213135004044, "epoch": 3.562599409225176, "grad_norm": 0.11481815576553345, "learning_rate": 9.069440172039017e-05, "loss": 1.2456, "mean_token_accuracy": 0.7587682247161865, "num_tokens": 794388471.0, "step": 11760 }, { "entropy": 0.82033039778471, "epoch": 3.5656290237067334, "grad_norm": 0.14372728765010834, "learning_rate": 9.067499030276136e-05, "loss": 1.2299, "mean_token_accuracy": 0.7666828289628029, "num_tokens": 795067679.0, "step": 11770 }, { "entropy": 0.8215655073523521, "epoch": 3.5686586381882908, "grad_norm": 0.12320514023303986, "learning_rate": 9.065556074205212e-05, "loss": 1.2276, "mean_token_accuracy": 0.7630557641386986, "num_tokens": 795746543.0, "step": 11780 }, { "entropy": 0.8349901750683785, "epoch": 3.5716882526698477, "grad_norm": 0.13253360986709595, "learning_rate": 9.063611304692896e-05, "loss": 1.2411, "mean_token_accuracy": 0.7601036667823792, "num_tokens": 796412165.0, "step": 11790 }, { "entropy": 0.8316451072692871, "epoch": 3.574717867151405, "grad_norm": 0.11468390375375748, "learning_rate": 9.061664722606657e-05, "loss": 1.2405, "mean_token_accuracy": 0.7620562642812729, "num_tokens": 797077751.0, "step": 11800 }, { "entropy": 0.824966624379158, "epoch": 3.5777474816329624, "grad_norm": 0.12100493907928467, "learning_rate": 9.059716328814765e-05, "loss": 1.2261, "mean_token_accuracy": 0.7641262948513031, "num_tokens": 797751465.0, "step": 11810 }, { "entropy": 0.8425392359495163, "epoch": 3.5807770961145193, "grad_norm": 0.12134084850549698, "learning_rate": 9.057766124186303e-05, "loss": 1.2399, "mean_token_accuracy": 0.7614474728703499, "num_tokens": 798435722.0, "step": 11820 }, { "entropy": 0.8275352597236634, "epoch": 3.5838067105960767, "grad_norm": 0.11163262277841568, "learning_rate": 9.055814109591159e-05, "loss": 1.2239, "mean_token_accuracy": 0.7658421337604523, "num_tokens": 799115112.0, "step": 11830 }, { "entropy": 0.8259432733058929, "epoch": 3.586836325077634, "grad_norm": 0.1207873523235321, "learning_rate": 9.05386028590003e-05, "loss": 1.2251, "mean_token_accuracy": 0.7633491337299347, "num_tokens": 799790970.0, "step": 11840 }, { "entropy": 0.8228763297200203, "epoch": 3.589865939559191, "grad_norm": 0.11874350905418396, "learning_rate": 9.051904653984418e-05, "loss": 1.2213, "mean_token_accuracy": 0.7662205755710602, "num_tokens": 800475946.0, "step": 11850 }, { "entropy": 0.8243829488754273, "epoch": 3.5928955540407483, "grad_norm": 0.12391141802072525, "learning_rate": 9.049947214716634e-05, "loss": 1.2273, "mean_token_accuracy": 0.7658755227923393, "num_tokens": 801149538.0, "step": 11860 }, { "entropy": 0.8380412116646767, "epoch": 3.5959251685223057, "grad_norm": 0.14538021385669708, "learning_rate": 9.047987968969792e-05, "loss": 1.2373, "mean_token_accuracy": 0.7669436022639274, "num_tokens": 801842586.0, "step": 11870 }, { "entropy": 0.8286140769720077, "epoch": 3.5989547830038626, "grad_norm": 0.1327095776796341, "learning_rate": 9.046026917617813e-05, "loss": 1.2296, "mean_token_accuracy": 0.7633321568369865, "num_tokens": 802512963.0, "step": 11880 }, { "entropy": 0.8307457745075226, "epoch": 3.60198439748542, "grad_norm": 0.12284592539072037, "learning_rate": 9.044064061535428e-05, "loss": 1.2318, "mean_token_accuracy": 0.7620765492320061, "num_tokens": 803191402.0, "step": 11890 }, { "entropy": 0.8326386421918869, "epoch": 3.6050140119669773, "grad_norm": 0.1168675646185875, "learning_rate": 9.042099401598166e-05, "loss": 1.2353, "mean_token_accuracy": 0.7611799791455269, "num_tokens": 803859566.0, "step": 11900 }, { "entropy": 0.8209079593420029, "epoch": 3.6080436264485343, "grad_norm": 0.12039514631032944, "learning_rate": 9.040132938682365e-05, "loss": 1.2339, "mean_token_accuracy": 0.7661359086632729, "num_tokens": 804531546.0, "step": 11910 }, { "entropy": 0.8414869412779808, "epoch": 3.6110732409300916, "grad_norm": 0.12295954674482346, "learning_rate": 9.038164673665162e-05, "loss": 1.2396, "mean_token_accuracy": 0.7620622083544731, "num_tokens": 805216610.0, "step": 11920 }, { "entropy": 0.8272681817412376, "epoch": 3.614102855411649, "grad_norm": 0.12621666491031647, "learning_rate": 9.036194607424508e-05, "loss": 1.2334, "mean_token_accuracy": 0.7656963005661964, "num_tokens": 805900069.0, "step": 11930 }, { "entropy": 0.8419021785259246, "epoch": 3.617132469893206, "grad_norm": 0.1228092685341835, "learning_rate": 9.034222740839147e-05, "loss": 1.2459, "mean_token_accuracy": 0.760851913690567, "num_tokens": 806568925.0, "step": 11940 }, { "entropy": 0.8313126280903816, "epoch": 3.6201620843747633, "grad_norm": 0.13267569243907928, "learning_rate": 9.03224907478863e-05, "loss": 1.2332, "mean_token_accuracy": 0.7593785226345062, "num_tokens": 807240671.0, "step": 11950 }, { "entropy": 0.8313692197203636, "epoch": 3.6231916988563206, "grad_norm": 0.12366776168346405, "learning_rate": 9.030273610153312e-05, "loss": 1.2361, "mean_token_accuracy": 0.7617954403162003, "num_tokens": 807917683.0, "step": 11960 }, { "entropy": 0.8314715504646302, "epoch": 3.6262213133378776, "grad_norm": 0.11919554322957993, "learning_rate": 9.028296347814349e-05, "loss": 1.2322, "mean_token_accuracy": 0.7616859465837479, "num_tokens": 808602683.0, "step": 11970 }, { "entropy": 0.8367446169257164, "epoch": 3.629250927819435, "grad_norm": 0.13895639777183533, "learning_rate": 9.026317288653698e-05, "loss": 1.2404, "mean_token_accuracy": 0.7621679157018661, "num_tokens": 809277786.0, "step": 11980 }, { "entropy": 0.8247834354639053, "epoch": 3.6322805423009923, "grad_norm": 0.12393562495708466, "learning_rate": 9.02433643355412e-05, "loss": 1.2367, "mean_token_accuracy": 0.7608552008867264, "num_tokens": 809943651.0, "step": 11990 }, { "entropy": 0.8331888973712921, "epoch": 3.635310156782549, "grad_norm": 0.12172278016805649, "learning_rate": 9.022353783399175e-05, "loss": 1.2348, "mean_token_accuracy": 0.7604128733277321, "num_tokens": 810615890.0, "step": 12000 }, { "entropy": 0.8270653799176216, "epoch": 3.6383397712641066, "grad_norm": 0.1276981234550476, "learning_rate": 9.020369339073222e-05, "loss": 1.232, "mean_token_accuracy": 0.7668903693556786, "num_tokens": 811299502.0, "step": 12010 }, { "entropy": 0.8299973547458649, "epoch": 3.641369385745664, "grad_norm": 0.12148939073085785, "learning_rate": 9.018383101461425e-05, "loss": 1.2367, "mean_token_accuracy": 0.7597500324249268, "num_tokens": 811963216.0, "step": 12020 }, { "entropy": 0.8284254223108292, "epoch": 3.644399000227221, "grad_norm": 0.1537405252456665, "learning_rate": 9.016395071449745e-05, "loss": 1.221, "mean_token_accuracy": 0.771775534749031, "num_tokens": 812666893.0, "step": 12030 }, { "entropy": 0.8277289435267449, "epoch": 3.647428614708778, "grad_norm": 0.1311744749546051, "learning_rate": 9.014405249924942e-05, "loss": 1.242, "mean_token_accuracy": 0.7604265868663788, "num_tokens": 813324365.0, "step": 12040 }, { "entropy": 0.8355601668357849, "epoch": 3.6504582291903356, "grad_norm": 0.12726181745529175, "learning_rate": 9.012413637774576e-05, "loss": 1.2402, "mean_token_accuracy": 0.7586885496973992, "num_tokens": 813995063.0, "step": 12050 }, { "entropy": 0.8477053657174111, "epoch": 3.6534878436718925, "grad_norm": 0.12102725356817245, "learning_rate": 9.010420235887007e-05, "loss": 1.2542, "mean_token_accuracy": 0.7584789529442787, "num_tokens": 814657398.0, "step": 12060 }, { "entropy": 0.8242482304573059, "epoch": 3.65651745815345, "grad_norm": 0.1336752474308014, "learning_rate": 9.00842504515139e-05, "loss": 1.2279, "mean_token_accuracy": 0.7599899381399154, "num_tokens": 815318538.0, "step": 12070 }, { "entropy": 0.8387343779206275, "epoch": 3.6595470726350072, "grad_norm": 0.13991276919841766, "learning_rate": 9.00642806645768e-05, "loss": 1.2416, "mean_token_accuracy": 0.7603589996695519, "num_tokens": 815999407.0, "step": 12080 }, { "entropy": 0.8195283800363541, "epoch": 3.662576687116564, "grad_norm": 0.13147521018981934, "learning_rate": 9.004429300696631e-05, "loss": 1.2229, "mean_token_accuracy": 0.7663108944892884, "num_tokens": 816682090.0, "step": 12090 }, { "entropy": 0.8434464514255524, "epoch": 3.6656063015981215, "grad_norm": 0.11861646920442581, "learning_rate": 9.002428748759791e-05, "loss": 1.2406, "mean_token_accuracy": 0.7598227173089981, "num_tokens": 817354308.0, "step": 12100 }, { "entropy": 0.8313905954360962, "epoch": 3.668635916079679, "grad_norm": 0.12398044764995575, "learning_rate": 9.000426411539507e-05, "loss": 1.2373, "mean_token_accuracy": 0.7607077971100807, "num_tokens": 818022898.0, "step": 12110 }, { "entropy": 0.8373366162180901, "epoch": 3.6716655305612362, "grad_norm": 0.11641517281532288, "learning_rate": 8.998422289928923e-05, "loss": 1.2387, "mean_token_accuracy": 0.7564888164401055, "num_tokens": 818682512.0, "step": 12120 }, { "entropy": 0.8366742789745331, "epoch": 3.674695145042793, "grad_norm": 0.12284574657678604, "learning_rate": 8.996416384821974e-05, "loss": 1.2304, "mean_token_accuracy": 0.7645779192447663, "num_tokens": 819367189.0, "step": 12130 }, { "entropy": 0.8274043366312981, "epoch": 3.6777247595243505, "grad_norm": 0.11731072515249252, "learning_rate": 8.994408697113397e-05, "loss": 1.2267, "mean_token_accuracy": 0.7654698178172111, "num_tokens": 820052012.0, "step": 12140 }, { "entropy": 0.8292063072323799, "epoch": 3.680754374005908, "grad_norm": 0.12219222635030746, "learning_rate": 8.992399227698721e-05, "loss": 1.2339, "mean_token_accuracy": 0.7619503021240235, "num_tokens": 820720978.0, "step": 12150 }, { "entropy": 0.8339642301201821, "epoch": 3.6837839884874652, "grad_norm": 0.12715142965316772, "learning_rate": 8.990387977474266e-05, "loss": 1.2385, "mean_token_accuracy": 0.7595003217458725, "num_tokens": 821383669.0, "step": 12160 }, { "entropy": 0.8279847532510758, "epoch": 3.686813602969022, "grad_norm": 0.13311992585659027, "learning_rate": 8.988374947337155e-05, "loss": 1.2384, "mean_token_accuracy": 0.7625073760747909, "num_tokens": 822064780.0, "step": 12170 }, { "entropy": 0.8269133359193802, "epoch": 3.6898432174505795, "grad_norm": 0.12409292161464691, "learning_rate": 8.9863601381853e-05, "loss": 1.2282, "mean_token_accuracy": 0.7661284163594246, "num_tokens": 822751650.0, "step": 12180 }, { "entropy": 0.8387887254357338, "epoch": 3.692872831932137, "grad_norm": 0.12265481799840927, "learning_rate": 8.984343550917403e-05, "loss": 1.2363, "mean_token_accuracy": 0.7628649190068245, "num_tokens": 823433050.0, "step": 12190 }, { "entropy": 0.8265530884265899, "epoch": 3.695902446413694, "grad_norm": 0.12023020535707474, "learning_rate": 8.982325186432966e-05, "loss": 1.2224, "mean_token_accuracy": 0.7683528974652291, "num_tokens": 824119564.0, "step": 12200 }, { "entropy": 0.8196702033281327, "epoch": 3.698932060895251, "grad_norm": 0.11864407360553741, "learning_rate": 8.980305045632278e-05, "loss": 1.2316, "mean_token_accuracy": 0.7656280145049095, "num_tokens": 824794803.0, "step": 12210 }, { "entropy": 0.8307268977165222, "epoch": 3.7019616753768085, "grad_norm": 0.11842304468154907, "learning_rate": 8.978283129416424e-05, "loss": 1.2327, "mean_token_accuracy": 0.7590238392353058, "num_tokens": 825466266.0, "step": 12220 }, { "entropy": 0.8390016943216324, "epoch": 3.7049912898583655, "grad_norm": 0.12841185927391052, "learning_rate": 8.976259438687282e-05, "loss": 1.229, "mean_token_accuracy": 0.7644271805882454, "num_tokens": 826153789.0, "step": 12230 }, { "entropy": 0.8306706994771957, "epoch": 3.708020904339923, "grad_norm": 0.14335404336452484, "learning_rate": 8.974233974347517e-05, "loss": 1.2374, "mean_token_accuracy": 0.7622088611125946, "num_tokens": 826818162.0, "step": 12240 }, { "entropy": 0.826574458181858, "epoch": 3.71105051882148, "grad_norm": 0.1161520928144455, "learning_rate": 8.972206737300586e-05, "loss": 1.2267, "mean_token_accuracy": 0.7656951203942299, "num_tokens": 827503722.0, "step": 12250 }, { "entropy": 0.8206147626042366, "epoch": 3.714080133303037, "grad_norm": 0.13360336422920227, "learning_rate": 8.970177728450742e-05, "loss": 1.2275, "mean_token_accuracy": 0.7604197725653649, "num_tokens": 828166082.0, "step": 12260 }, { "entropy": 0.8373993262648582, "epoch": 3.7171097477845945, "grad_norm": 0.13123397529125214, "learning_rate": 8.96814694870302e-05, "loss": 1.2429, "mean_token_accuracy": 0.7625438809394837, "num_tokens": 828844434.0, "step": 12270 }, { "entropy": 0.8399862021207809, "epoch": 3.720139362266152, "grad_norm": 0.1167200431227684, "learning_rate": 8.966114398963252e-05, "loss": 1.2449, "mean_token_accuracy": 0.7576163247227669, "num_tokens": 829509473.0, "step": 12280 }, { "entropy": 0.8254438489675522, "epoch": 3.7231689767477087, "grad_norm": 0.16813643276691437, "learning_rate": 8.96408008013806e-05, "loss": 1.226, "mean_token_accuracy": 0.7664303198456764, "num_tokens": 830186083.0, "step": 12290 }, { "entropy": 0.8378492087125778, "epoch": 3.726198591229266, "grad_norm": 0.12639133632183075, "learning_rate": 8.962043993134846e-05, "loss": 1.2413, "mean_token_accuracy": 0.7567730829119682, "num_tokens": 830853542.0, "step": 12300 }, { "entropy": 0.8167424246668815, "epoch": 3.7292282057108235, "grad_norm": 0.11713540554046631, "learning_rate": 8.960006138861808e-05, "loss": 1.2216, "mean_token_accuracy": 0.7664415761828423, "num_tokens": 831533067.0, "step": 12310 }, { "entropy": 0.8392841696739197, "epoch": 3.7322578201923804, "grad_norm": 0.12706278264522552, "learning_rate": 8.957966518227934e-05, "loss": 1.244, "mean_token_accuracy": 0.759884363412857, "num_tokens": 832205180.0, "step": 12320 }, { "entropy": 0.8391022115945816, "epoch": 3.7352874346739378, "grad_norm": 0.12109837681055069, "learning_rate": 8.955925132142992e-05, "loss": 1.2275, "mean_token_accuracy": 0.7620567560195923, "num_tokens": 832885884.0, "step": 12330 }, { "entropy": 0.8387953415513039, "epoch": 3.738317049155495, "grad_norm": 0.12044090032577515, "learning_rate": 8.953881981517549e-05, "loss": 1.2328, "mean_token_accuracy": 0.765028627216816, "num_tokens": 833571123.0, "step": 12340 }, { "entropy": 0.8334729716181755, "epoch": 3.741346663637052, "grad_norm": 0.126737579703331, "learning_rate": 8.951837067262946e-05, "loss": 1.2366, "mean_token_accuracy": 0.7619820713996888, "num_tokens": 834242306.0, "step": 12350 }, { "entropy": 0.8303900763392449, "epoch": 3.7443762781186094, "grad_norm": 0.12160580605268478, "learning_rate": 8.949790390291321e-05, "loss": 1.2314, "mean_token_accuracy": 0.7624058470129966, "num_tokens": 834922541.0, "step": 12360 }, { "entropy": 0.8399101763963699, "epoch": 3.7474058926001668, "grad_norm": 0.12068485468626022, "learning_rate": 8.947741951515594e-05, "loss": 1.2405, "mean_token_accuracy": 0.7593312174081802, "num_tokens": 835591469.0, "step": 12370 }, { "entropy": 0.8358460009098053, "epoch": 3.7504355070817237, "grad_norm": 0.12638629972934723, "learning_rate": 8.945691751849469e-05, "loss": 1.2305, "mean_token_accuracy": 0.7640237674117089, "num_tokens": 836271264.0, "step": 12380 }, { "entropy": 0.835583382844925, "epoch": 3.753465121563281, "grad_norm": 0.1262970119714737, "learning_rate": 8.943639792207438e-05, "loss": 1.2406, "mean_token_accuracy": 0.7630352824926376, "num_tokens": 836951359.0, "step": 12390 }, { "entropy": 0.8302667006850243, "epoch": 3.7564947360448384, "grad_norm": 0.13091841340065002, "learning_rate": 8.94158607350478e-05, "loss": 1.2264, "mean_token_accuracy": 0.7653634011745453, "num_tokens": 837639885.0, "step": 12400 }, { "entropy": 0.8293650209903717, "epoch": 3.7595243505263953, "grad_norm": 0.11191177368164062, "learning_rate": 8.939530596657553e-05, "loss": 1.2303, "mean_token_accuracy": 0.7612865820527077, "num_tokens": 838305937.0, "step": 12410 }, { "entropy": 0.8327704191207885, "epoch": 3.7625539650079527, "grad_norm": 0.11614000797271729, "learning_rate": 8.937473362582605e-05, "loss": 1.2357, "mean_token_accuracy": 0.7675615847110748, "num_tokens": 839009703.0, "step": 12420 }, { "entropy": 0.825287601351738, "epoch": 3.76558357948951, "grad_norm": 0.12107538431882858, "learning_rate": 8.935414372197566e-05, "loss": 1.2268, "mean_token_accuracy": 0.7634308502078057, "num_tokens": 839688040.0, "step": 12430 }, { "entropy": 0.8294574156403541, "epoch": 3.768613193971067, "grad_norm": 0.13719813525676727, "learning_rate": 8.933353626420845e-05, "loss": 1.2403, "mean_token_accuracy": 0.759545375406742, "num_tokens": 840359150.0, "step": 12440 }, { "entropy": 0.8285234406590462, "epoch": 3.7716428084526243, "grad_norm": 0.1382272094488144, "learning_rate": 8.931291126171641e-05, "loss": 1.234, "mean_token_accuracy": 0.7611021846532822, "num_tokens": 841021983.0, "step": 12450 }, { "entropy": 0.836207677423954, "epoch": 3.7746724229341817, "grad_norm": 0.12078831344842911, "learning_rate": 8.929226872369931e-05, "loss": 1.2335, "mean_token_accuracy": 0.7623356714844703, "num_tokens": 841706968.0, "step": 12460 }, { "entropy": 0.8264741778373719, "epoch": 3.7777020374157386, "grad_norm": 0.1228814572095871, "learning_rate": 8.927160865936476e-05, "loss": 1.2339, "mean_token_accuracy": 0.7623338222503662, "num_tokens": 842372400.0, "step": 12470 }, { "entropy": 0.827699501812458, "epoch": 3.780731651897296, "grad_norm": 0.12411075830459595, "learning_rate": 8.925093107792818e-05, "loss": 1.2319, "mean_token_accuracy": 0.7617390379309654, "num_tokens": 843040839.0, "step": 12480 }, { "entropy": 0.8308931544423104, "epoch": 3.7837612663788533, "grad_norm": 0.12857046723365784, "learning_rate": 8.92302359886128e-05, "loss": 1.2352, "mean_token_accuracy": 0.7628135606646538, "num_tokens": 843709430.0, "step": 12490 }, { "entropy": 0.8338014483451843, "epoch": 3.7867908808604103, "grad_norm": 0.12557992339134216, "learning_rate": 8.920952340064968e-05, "loss": 1.2323, "mean_token_accuracy": 0.7621446952223778, "num_tokens": 844385709.0, "step": 12500 }, { "entropy": 0.8305648371577263, "epoch": 3.7898204953419676, "grad_norm": 0.14182014763355255, "learning_rate": 8.918879332327765e-05, "loss": 1.2302, "mean_token_accuracy": 0.763672611117363, "num_tokens": 845077281.0, "step": 12510 }, { "entropy": 0.8348019689321518, "epoch": 3.792850109823525, "grad_norm": 0.11439071595668793, "learning_rate": 8.916804576574337e-05, "loss": 1.2339, "mean_token_accuracy": 0.7650744035840035, "num_tokens": 845760517.0, "step": 12520 }, { "entropy": 0.8238743111491204, "epoch": 3.795879724305082, "grad_norm": 0.12323829531669617, "learning_rate": 8.91472807373013e-05, "loss": 1.2279, "mean_token_accuracy": 0.7617947995662689, "num_tokens": 846422146.0, "step": 12530 }, { "entropy": 0.8199110209941864, "epoch": 3.7989093387866393, "grad_norm": 0.11645542085170746, "learning_rate": 8.912649824721367e-05, "loss": 1.2203, "mean_token_accuracy": 0.7677612766623497, "num_tokens": 847107414.0, "step": 12540 }, { "entropy": 0.8264547348022461, "epoch": 3.8019389532681966, "grad_norm": 0.12416919320821762, "learning_rate": 8.910569830475051e-05, "loss": 1.2431, "mean_token_accuracy": 0.7657420426607132, "num_tokens": 847782719.0, "step": 12550 }, { "entropy": 0.8251286134123802, "epoch": 3.804968567749754, "grad_norm": 0.13622988760471344, "learning_rate": 8.908488091918963e-05, "loss": 1.2271, "mean_token_accuracy": 0.7634443789720535, "num_tokens": 848454904.0, "step": 12560 }, { "entropy": 0.8287327572703361, "epoch": 3.807998182231311, "grad_norm": 0.1126769632101059, "learning_rate": 8.906404609981664e-05, "loss": 1.2322, "mean_token_accuracy": 0.763195650279522, "num_tokens": 849125354.0, "step": 12570 }, { "entropy": 0.8304981291294098, "epoch": 3.8110277967128683, "grad_norm": 0.13094691932201385, "learning_rate": 8.90431938559249e-05, "loss": 1.2344, "mean_token_accuracy": 0.7624080702662468, "num_tokens": 849790788.0, "step": 12580 }, { "entropy": 0.8332023620605469, "epoch": 3.8140574111944257, "grad_norm": 0.1280962973833084, "learning_rate": 8.902232419681557e-05, "loss": 1.2293, "mean_token_accuracy": 0.7648158460855484, "num_tokens": 850474638.0, "step": 12590 }, { "entropy": 0.8346199035644531, "epoch": 3.8170870256759826, "grad_norm": 0.1164981797337532, "learning_rate": 8.900143713179754e-05, "loss": 1.2371, "mean_token_accuracy": 0.7623358771204949, "num_tokens": 851145955.0, "step": 12600 }, { "entropy": 0.8156718030571938, "epoch": 3.82011664015754, "grad_norm": 0.11042694747447968, "learning_rate": 8.898053267018749e-05, "loss": 1.2215, "mean_token_accuracy": 0.7646094113588333, "num_tokens": 851816436.0, "step": 12610 }, { "entropy": 0.8390977829694748, "epoch": 3.8231462546390973, "grad_norm": 0.12022904306650162, "learning_rate": 8.895961082130989e-05, "loss": 1.2421, "mean_token_accuracy": 0.7592789888381958, "num_tokens": 852484169.0, "step": 12620 }, { "entropy": 0.830880680680275, "epoch": 3.8261758691206547, "grad_norm": 0.12470162659883499, "learning_rate": 8.893867159449687e-05, "loss": 1.2287, "mean_token_accuracy": 0.7626701295375824, "num_tokens": 853162486.0, "step": 12630 }, { "entropy": 0.8348073527216912, "epoch": 3.8292054836022116, "grad_norm": 0.12988479435443878, "learning_rate": 8.891771499908841e-05, "loss": 1.2395, "mean_token_accuracy": 0.762461656332016, "num_tokens": 853836077.0, "step": 12640 }, { "entropy": 0.8428922668099403, "epoch": 3.832235098083769, "grad_norm": 0.13711707293987274, "learning_rate": 8.889674104443221e-05, "loss": 1.2467, "mean_token_accuracy": 0.761611770093441, "num_tokens": 854518670.0, "step": 12650 }, { "entropy": 0.8193681225180626, "epoch": 3.8352647125653263, "grad_norm": 0.12601418793201447, "learning_rate": 8.887574973988368e-05, "loss": 1.2221, "mean_token_accuracy": 0.763744181394577, "num_tokens": 855196632.0, "step": 12660 }, { "entropy": 0.8222943872213364, "epoch": 3.8382943270468832, "grad_norm": 0.12922759354114532, "learning_rate": 8.8854741094806e-05, "loss": 1.2326, "mean_token_accuracy": 0.7638288870453834, "num_tokens": 855871139.0, "step": 12670 }, { "entropy": 0.8280269578099251, "epoch": 3.8413239415284406, "grad_norm": 0.1370943933725357, "learning_rate": 8.883371511857008e-05, "loss": 1.2299, "mean_token_accuracy": 0.7631358623504638, "num_tokens": 856534323.0, "step": 12680 }, { "entropy": 0.8340017095208168, "epoch": 3.844353556009998, "grad_norm": 0.1301402598619461, "learning_rate": 8.881267182055455e-05, "loss": 1.2324, "mean_token_accuracy": 0.7608328521251678, "num_tokens": 857207021.0, "step": 12690 }, { "entropy": 0.8315331548452377, "epoch": 3.847383170491555, "grad_norm": 0.11876879632472992, "learning_rate": 8.879161121014578e-05, "loss": 1.232, "mean_token_accuracy": 0.7626194566488266, "num_tokens": 857894691.0, "step": 12700 }, { "entropy": 0.8218623787164688, "epoch": 3.8504127849731122, "grad_norm": 0.12570799887180328, "learning_rate": 8.877053329673786e-05, "loss": 1.2282, "mean_token_accuracy": 0.7642956376075745, "num_tokens": 858566188.0, "step": 12710 }, { "entropy": 0.8263124644756317, "epoch": 3.8534423994546696, "grad_norm": 0.12733717262744904, "learning_rate": 8.874943808973257e-05, "loss": 1.2305, "mean_token_accuracy": 0.7641133055090904, "num_tokens": 859242879.0, "step": 12720 }, { "entropy": 0.837696586549282, "epoch": 3.8564720139362265, "grad_norm": 0.12091173976659775, "learning_rate": 8.872832559853946e-05, "loss": 1.2409, "mean_token_accuracy": 0.7607804715633393, "num_tokens": 859915787.0, "step": 12730 }, { "entropy": 0.8271465063095093, "epoch": 3.859501628417784, "grad_norm": 0.14471746981143951, "learning_rate": 8.870719583257574e-05, "loss": 1.2297, "mean_token_accuracy": 0.7651708543300628, "num_tokens": 860594248.0, "step": 12740 }, { "entropy": 0.812814648449421, "epoch": 3.8625312428993412, "grad_norm": 0.12557920813560486, "learning_rate": 8.868604880126634e-05, "loss": 1.2209, "mean_token_accuracy": 0.7714944124221802, "num_tokens": 861289719.0, "step": 12750 }, { "entropy": 0.8259367436170578, "epoch": 3.865560857380898, "grad_norm": 0.12413983792066574, "learning_rate": 8.86648845140439e-05, "loss": 1.2268, "mean_token_accuracy": 0.7617395550012589, "num_tokens": 861959403.0, "step": 12760 }, { "entropy": 0.8290757730603218, "epoch": 3.8685904718624555, "grad_norm": 0.117474764585495, "learning_rate": 8.864370298034875e-05, "loss": 1.2258, "mean_token_accuracy": 0.7611317902803421, "num_tokens": 862633647.0, "step": 12770 }, { "entropy": 0.8299058496952056, "epoch": 3.871620086344013, "grad_norm": 0.1349320411682129, "learning_rate": 8.862250420962892e-05, "loss": 1.2396, "mean_token_accuracy": 0.7624454960227013, "num_tokens": 863312912.0, "step": 12780 }, { "entropy": 0.8241622790694236, "epoch": 3.87464970082557, "grad_norm": 0.11762060225009918, "learning_rate": 8.86012882113401e-05, "loss": 1.2339, "mean_token_accuracy": 0.7630821943283081, "num_tokens": 863985023.0, "step": 12790 }, { "entropy": 0.8294262751936913, "epoch": 3.877679315307127, "grad_norm": 0.13306935131549835, "learning_rate": 8.858005499494573e-05, "loss": 1.2319, "mean_token_accuracy": 0.7615495726466179, "num_tokens": 864658888.0, "step": 12800 }, { "entropy": 0.8245569050312043, "epoch": 3.8807089297886845, "grad_norm": 0.1198149248957634, "learning_rate": 8.855880456991684e-05, "loss": 1.2298, "mean_token_accuracy": 0.7609682634472847, "num_tokens": 865318098.0, "step": 12810 }, { "entropy": 0.8243506833910942, "epoch": 3.8837385442702415, "grad_norm": 0.13753031194210052, "learning_rate": 8.853753694573219e-05, "loss": 1.223, "mean_token_accuracy": 0.7650005280971527, "num_tokens": 865995887.0, "step": 12820 }, { "entropy": 0.8278594672679901, "epoch": 3.886768158751799, "grad_norm": 0.11619915813207626, "learning_rate": 8.851625213187823e-05, "loss": 1.233, "mean_token_accuracy": 0.7637771308422089, "num_tokens": 866660519.0, "step": 12830 }, { "entropy": 0.8225967958569527, "epoch": 3.889797773233356, "grad_norm": 0.1255115568637848, "learning_rate": 8.849495013784904e-05, "loss": 1.2309, "mean_token_accuracy": 0.7614174097776413, "num_tokens": 867321427.0, "step": 12840 }, { "entropy": 0.8249985828995705, "epoch": 3.892827387714913, "grad_norm": 0.12730218470096588, "learning_rate": 8.847363097314636e-05, "loss": 1.2214, "mean_token_accuracy": 0.766840037703514, "num_tokens": 868019621.0, "step": 12850 }, { "entropy": 0.8264155417680741, "epoch": 3.8958570021964705, "grad_norm": 0.1336270421743393, "learning_rate": 8.845229464727962e-05, "loss": 1.2239, "mean_token_accuracy": 0.7623827084898949, "num_tokens": 868687725.0, "step": 12860 }, { "entropy": 0.8309557393193245, "epoch": 3.898886616678028, "grad_norm": 0.11706342548131943, "learning_rate": 8.84309411697659e-05, "loss": 1.233, "mean_token_accuracy": 0.7596857607364654, "num_tokens": 869360519.0, "step": 12870 }, { "entropy": 0.8345236212015152, "epoch": 3.9019162311595847, "grad_norm": 0.12713146209716797, "learning_rate": 8.840957055012989e-05, "loss": 1.2398, "mean_token_accuracy": 0.7650113448500633, "num_tokens": 870041084.0, "step": 12880 }, { "entropy": 0.8312649458646775, "epoch": 3.904945845641142, "grad_norm": 0.13826613128185272, "learning_rate": 8.838818279790396e-05, "loss": 1.2412, "mean_token_accuracy": 0.765237282216549, "num_tokens": 870714772.0, "step": 12890 }, { "entropy": 0.8348280221223832, "epoch": 3.9079754601226995, "grad_norm": 0.12400850653648376, "learning_rate": 8.836677792262814e-05, "loss": 1.2468, "mean_token_accuracy": 0.7586354508996009, "num_tokens": 871377777.0, "step": 12900 }, { "entropy": 0.8311003044247627, "epoch": 3.9110050746042564, "grad_norm": 0.12937553226947784, "learning_rate": 8.834535593385007e-05, "loss": 1.2447, "mean_token_accuracy": 0.7613857537508011, "num_tokens": 872042117.0, "step": 12910 }, { "entropy": 0.8208565428853035, "epoch": 3.9140346890858138, "grad_norm": 0.11717119812965393, "learning_rate": 8.8323916841125e-05, "loss": 1.2245, "mean_token_accuracy": 0.7649449229240417, "num_tokens": 872711042.0, "step": 12920 }, { "entropy": 0.8317764848470688, "epoch": 3.917064303567371, "grad_norm": 0.11867055296897888, "learning_rate": 8.830246065401588e-05, "loss": 1.2318, "mean_token_accuracy": 0.762212210893631, "num_tokens": 873383179.0, "step": 12930 }, { "entropy": 0.832485431432724, "epoch": 3.920093918048928, "grad_norm": 0.12630002200603485, "learning_rate": 8.82809873820932e-05, "loss": 1.2376, "mean_token_accuracy": 0.7598984807729721, "num_tokens": 874040069.0, "step": 12940 }, { "entropy": 0.839852300286293, "epoch": 3.9231235325304854, "grad_norm": 0.15134549140930176, "learning_rate": 8.825949703493515e-05, "loss": 1.2456, "mean_token_accuracy": 0.757933346927166, "num_tokens": 874695045.0, "step": 12950 }, { "entropy": 0.8406209647655487, "epoch": 3.9261531470120428, "grad_norm": 0.12075337767601013, "learning_rate": 8.823798962212748e-05, "loss": 1.2412, "mean_token_accuracy": 0.7612475737929344, "num_tokens": 875381984.0, "step": 12960 }, { "entropy": 0.8319113612174988, "epoch": 3.9291827614935997, "grad_norm": 0.1266404390335083, "learning_rate": 8.821646515326359e-05, "loss": 1.2391, "mean_token_accuracy": 0.7626997500658035, "num_tokens": 876044980.0, "step": 12970 }, { "entropy": 0.8321159169077873, "epoch": 3.932212375975157, "grad_norm": 0.12632417678833008, "learning_rate": 8.819492363794444e-05, "loss": 1.2379, "mean_token_accuracy": 0.7624675497412682, "num_tokens": 876720301.0, "step": 12980 }, { "entropy": 0.8342352926731109, "epoch": 3.9352419904567144, "grad_norm": 0.1273949295282364, "learning_rate": 8.817336508577865e-05, "loss": 1.2323, "mean_token_accuracy": 0.764403061568737, "num_tokens": 877405746.0, "step": 12990 }, { "entropy": 0.8261652573943138, "epoch": 3.9382716049382713, "grad_norm": 0.11959680169820786, "learning_rate": 8.815178950638239e-05, "loss": 1.2306, "mean_token_accuracy": 0.7622801020741463, "num_tokens": 878069456.0, "step": 13000 }, { "entropy": 0.837689746916294, "epoch": 3.9413012194198287, "grad_norm": 0.1293964833021164, "learning_rate": 8.813019690937947e-05, "loss": 1.2296, "mean_token_accuracy": 0.7628279864788056, "num_tokens": 878761245.0, "step": 13010 }, { "entropy": 0.8373796567320824, "epoch": 3.944330833901386, "grad_norm": 0.14157019555568695, "learning_rate": 8.810858730440126e-05, "loss": 1.2406, "mean_token_accuracy": 0.759574468433857, "num_tokens": 879432362.0, "step": 13020 }, { "entropy": 0.8271543443202972, "epoch": 3.9473604483829434, "grad_norm": 0.12580905854701996, "learning_rate": 8.808696070108671e-05, "loss": 1.243, "mean_token_accuracy": 0.7620757684111595, "num_tokens": 880097240.0, "step": 13030 }, { "entropy": 0.826493576169014, "epoch": 3.9503900628645003, "grad_norm": 0.1348680853843689, "learning_rate": 8.806531710908239e-05, "loss": 1.2325, "mean_token_accuracy": 0.7682598948478698, "num_tokens": 880783720.0, "step": 13040 }, { "entropy": 0.821631669998169, "epoch": 3.9534196773460577, "grad_norm": 0.13794854283332825, "learning_rate": 8.804365653804243e-05, "loss": 1.2168, "mean_token_accuracy": 0.7667650580406189, "num_tokens": 881478582.0, "step": 13050 }, { "entropy": 0.828932423889637, "epoch": 3.956449291827615, "grad_norm": 0.11824832856655121, "learning_rate": 8.802197899762849e-05, "loss": 1.2323, "mean_token_accuracy": 0.7659588649868965, "num_tokens": 882159416.0, "step": 13060 }, { "entropy": 0.8260334685444832, "epoch": 3.9594789063091724, "grad_norm": 0.12015210092067719, "learning_rate": 8.800028449750987e-05, "loss": 1.2287, "mean_token_accuracy": 0.7666187763214112, "num_tokens": 882839711.0, "step": 13070 }, { "entropy": 0.830867451429367, "epoch": 3.9625085207907293, "grad_norm": 0.11991837620735168, "learning_rate": 8.797857304736342e-05, "loss": 1.239, "mean_token_accuracy": 0.7593985632061958, "num_tokens": 883510673.0, "step": 13080 }, { "entropy": 0.8303544789552688, "epoch": 3.9655381352722867, "grad_norm": 0.11978612095117569, "learning_rate": 8.795684465687347e-05, "loss": 1.2344, "mean_token_accuracy": 0.7616124227643013, "num_tokens": 884194771.0, "step": 13090 }, { "entropy": 0.8189367249608039, "epoch": 3.968567749753844, "grad_norm": 0.11828684061765671, "learning_rate": 8.793509933573201e-05, "loss": 1.2246, "mean_token_accuracy": 0.7643954247236252, "num_tokens": 884865730.0, "step": 13100 }, { "entropy": 0.8249358102679253, "epoch": 3.971597364235401, "grad_norm": 0.12646810710430145, "learning_rate": 8.791333709363856e-05, "loss": 1.2308, "mean_token_accuracy": 0.7633047968149185, "num_tokens": 885537265.0, "step": 13110 }, { "entropy": 0.8147301137447357, "epoch": 3.9746269787169584, "grad_norm": 0.11637123674154282, "learning_rate": 8.789155794030012e-05, "loss": 1.2162, "mean_token_accuracy": 0.7662988126277923, "num_tokens": 886220738.0, "step": 13120 }, { "entropy": 0.8361765071749687, "epoch": 3.9776565931985157, "grad_norm": 0.13231906294822693, "learning_rate": 8.786976188543133e-05, "loss": 1.2292, "mean_token_accuracy": 0.7626230180263519, "num_tokens": 886920302.0, "step": 13130 }, { "entropy": 0.8213987976312638, "epoch": 3.9806862076800726, "grad_norm": 0.12882764637470245, "learning_rate": 8.784794893875428e-05, "loss": 1.223, "mean_token_accuracy": 0.7662776485085487, "num_tokens": 887604571.0, "step": 13140 }, { "entropy": 0.8245305463671684, "epoch": 3.98371582216163, "grad_norm": 0.12455525249242783, "learning_rate": 8.782611910999865e-05, "loss": 1.2324, "mean_token_accuracy": 0.7616021037101746, "num_tokens": 888262413.0, "step": 13150 }, { "entropy": 0.8323712050914764, "epoch": 3.9867454366431874, "grad_norm": 0.1199771836400032, "learning_rate": 8.780427240890166e-05, "loss": 1.2349, "mean_token_accuracy": 0.7597421824932098, "num_tokens": 888933393.0, "step": 13160 }, { "entropy": 0.8318541258573532, "epoch": 3.9897750511247443, "grad_norm": 0.11837221682071686, "learning_rate": 8.778240884520798e-05, "loss": 1.2325, "mean_token_accuracy": 0.7667552843689919, "num_tokens": 889621517.0, "step": 13170 }, { "entropy": 0.8278790339827538, "epoch": 3.9928046656063017, "grad_norm": 0.13204719126224518, "learning_rate": 8.776052842866992e-05, "loss": 1.227, "mean_token_accuracy": 0.7613449260592461, "num_tokens": 890300645.0, "step": 13180 }, { "entropy": 0.8214327931404114, "epoch": 3.995834280087859, "grad_norm": 0.1523841768503189, "learning_rate": 8.773863116904718e-05, "loss": 1.2325, "mean_token_accuracy": 0.7664753451943398, "num_tokens": 890970218.0, "step": 13190 }, { "entropy": 0.8335215345025062, "epoch": 3.998863894569416, "grad_norm": 0.13725320994853973, "learning_rate": 8.771671707610707e-05, "loss": 1.2467, "mean_token_accuracy": 0.7620050013065338, "num_tokens": 891640496.0, "step": 13200 }, { "entropy": 0.8312990482036884, "epoch": 4.001817768688935, "grad_norm": 0.1220497339963913, "learning_rate": 8.769478615962436e-05, "loss": 1.2284, "mean_token_accuracy": 0.7663144346995231, "num_tokens": 892314225.0, "step": 13210 }, { "entropy": 0.811907809972763, "epoch": 4.004847383170492, "grad_norm": 0.1275583654642105, "learning_rate": 8.767283842938133e-05, "loss": 1.2171, "mean_token_accuracy": 0.7655479878187179, "num_tokens": 892990894.0, "step": 13220 }, { "entropy": 0.8137328565120697, "epoch": 4.0078769976520485, "grad_norm": 0.1291702687740326, "learning_rate": 8.765087389516775e-05, "loss": 1.2217, "mean_token_accuracy": 0.765879887342453, "num_tokens": 893667329.0, "step": 13230 }, { "entropy": 0.8127585962414742, "epoch": 4.010906612133606, "grad_norm": 0.1276501566171646, "learning_rate": 8.762889256678095e-05, "loss": 1.2071, "mean_token_accuracy": 0.7639842689037323, "num_tokens": 894343030.0, "step": 13240 }, { "entropy": 0.8192038983106613, "epoch": 4.013936226615163, "grad_norm": 0.13279663026332855, "learning_rate": 8.760689445402567e-05, "loss": 1.2175, "mean_token_accuracy": 0.7616088435053825, "num_tokens": 895016670.0, "step": 13250 }, { "entropy": 0.8052625715732574, "epoch": 4.01696584109672, "grad_norm": 0.14576607942581177, "learning_rate": 8.758487956671415e-05, "loss": 1.2088, "mean_token_accuracy": 0.770484383404255, "num_tokens": 895710879.0, "step": 13260 }, { "entropy": 0.8075692936778068, "epoch": 4.019995455578278, "grad_norm": 0.13980166614055634, "learning_rate": 8.756284791466617e-05, "loss": 1.2171, "mean_token_accuracy": 0.7611551076173783, "num_tokens": 896362662.0, "step": 13270 }, { "entropy": 0.7997980833053588, "epoch": 4.023025070059835, "grad_norm": 0.13313572108745575, "learning_rate": 8.754079950770894e-05, "loss": 1.2079, "mean_token_accuracy": 0.7700257301330566, "num_tokens": 897039094.0, "step": 13280 }, { "entropy": 0.8125384911894799, "epoch": 4.026054684541392, "grad_norm": 0.1359756737947464, "learning_rate": 8.751873435567715e-05, "loss": 1.2113, "mean_token_accuracy": 0.7646631121635437, "num_tokens": 897717124.0, "step": 13290 }, { "entropy": 0.8125770598649978, "epoch": 4.02908429902295, "grad_norm": 0.14367371797561646, "learning_rate": 8.749665246841293e-05, "loss": 1.2127, "mean_token_accuracy": 0.7631704553961753, "num_tokens": 898378571.0, "step": 13300 }, { "entropy": 0.8129245296120644, "epoch": 4.0321139135045065, "grad_norm": 0.14231349527835846, "learning_rate": 8.747455385576595e-05, "loss": 1.2146, "mean_token_accuracy": 0.7658406496047974, "num_tokens": 899052172.0, "step": 13310 }, { "entropy": 0.8133532971143722, "epoch": 4.0351435279860635, "grad_norm": 0.15407222509384155, "learning_rate": 8.745243852759328e-05, "loss": 1.2186, "mean_token_accuracy": 0.7606793612241745, "num_tokens": 899704453.0, "step": 13320 }, { "entropy": 0.8136651709675788, "epoch": 4.038173142467621, "grad_norm": 0.1306125670671463, "learning_rate": 8.743030649375944e-05, "loss": 1.2049, "mean_token_accuracy": 0.7665054619312286, "num_tokens": 900388361.0, "step": 13330 }, { "entropy": 0.8115026831626893, "epoch": 4.041202756949178, "grad_norm": 0.12938164174556732, "learning_rate": 8.740815776413649e-05, "loss": 1.2175, "mean_token_accuracy": 0.7635725408792495, "num_tokens": 901057525.0, "step": 13340 }, { "entropy": 0.8118620291352272, "epoch": 4.044232371430735, "grad_norm": 0.1395217478275299, "learning_rate": 8.73859923486038e-05, "loss": 1.2151, "mean_token_accuracy": 0.7668686181306839, "num_tokens": 901734076.0, "step": 13350 }, { "entropy": 0.813816773891449, "epoch": 4.047261985912293, "grad_norm": 0.14438989758491516, "learning_rate": 8.736381025704827e-05, "loss": 1.2091, "mean_token_accuracy": 0.7655443996191025, "num_tokens": 902418339.0, "step": 13360 }, { "entropy": 0.8176364481449128, "epoch": 4.05029160039385, "grad_norm": 0.13676360249519348, "learning_rate": 8.734161149936426e-05, "loss": 1.2147, "mean_token_accuracy": 0.7684245362877846, "num_tokens": 903101313.0, "step": 13370 }, { "entropy": 0.8075049206614494, "epoch": 4.053321214875407, "grad_norm": 0.13936209678649902, "learning_rate": 8.73193960854535e-05, "loss": 1.2114, "mean_token_accuracy": 0.7677946135401725, "num_tokens": 903786652.0, "step": 13380 }, { "entropy": 0.815261285007, "epoch": 4.056350829356965, "grad_norm": 0.12821030616760254, "learning_rate": 8.729716402522517e-05, "loss": 1.215, "mean_token_accuracy": 0.7635524079203606, "num_tokens": 904449904.0, "step": 13390 }, { "entropy": 0.8139819651842117, "epoch": 4.0593804438385215, "grad_norm": 0.12646350264549255, "learning_rate": 8.727491532859587e-05, "loss": 1.2166, "mean_token_accuracy": 0.7698590472340584, "num_tokens": 905133903.0, "step": 13400 }, { "entropy": 0.8102973580360413, "epoch": 4.062410058320078, "grad_norm": 0.13327458500862122, "learning_rate": 8.725265000548967e-05, "loss": 1.2118, "mean_token_accuracy": 0.7633210942149162, "num_tokens": 905805761.0, "step": 13410 }, { "entropy": 0.8091414451599122, "epoch": 4.065439672801636, "grad_norm": 0.1316269487142563, "learning_rate": 8.7230368065838e-05, "loss": 1.2106, "mean_token_accuracy": 0.7654315799474716, "num_tokens": 906479368.0, "step": 13420 }, { "entropy": 0.8207312270998954, "epoch": 4.068469287283193, "grad_norm": 0.1382802426815033, "learning_rate": 8.72080695195797e-05, "loss": 1.2193, "mean_token_accuracy": 0.7628284618258476, "num_tokens": 907148763.0, "step": 13430 }, { "entropy": 0.8118870124220848, "epoch": 4.07149890176475, "grad_norm": 0.13254445791244507, "learning_rate": 8.718575437666107e-05, "loss": 1.2173, "mean_token_accuracy": 0.7619264394044876, "num_tokens": 907810001.0, "step": 13440 }, { "entropy": 0.8125631526112557, "epoch": 4.074528516246308, "grad_norm": 0.1385975182056427, "learning_rate": 8.716342264703577e-05, "loss": 1.2106, "mean_token_accuracy": 0.7686882153153419, "num_tokens": 908492593.0, "step": 13450 }, { "entropy": 0.8135557323694229, "epoch": 4.077558130727865, "grad_norm": 0.13102348148822784, "learning_rate": 8.714107434066485e-05, "loss": 1.2093, "mean_token_accuracy": 0.77032929956913, "num_tokens": 909188260.0, "step": 13460 }, { "entropy": 0.8188177153468132, "epoch": 4.080587745209422, "grad_norm": 0.1288897544145584, "learning_rate": 8.711870946751677e-05, "loss": 1.2183, "mean_token_accuracy": 0.7633365780115128, "num_tokens": 909867008.0, "step": 13470 }, { "entropy": 0.8076186761260032, "epoch": 4.0836173596909795, "grad_norm": 0.13218490779399872, "learning_rate": 8.709632803756743e-05, "loss": 1.213, "mean_token_accuracy": 0.7662195295095444, "num_tokens": 910534417.0, "step": 13480 }, { "entropy": 0.8244075238704681, "epoch": 4.086646974172536, "grad_norm": 0.13766668736934662, "learning_rate": 8.70739300608e-05, "loss": 1.2176, "mean_token_accuracy": 0.7656984314322471, "num_tokens": 911198736.0, "step": 13490 }, { "entropy": 0.8187069833278656, "epoch": 4.089676588654093, "grad_norm": 0.13429325819015503, "learning_rate": 8.705151554720516e-05, "loss": 1.2158, "mean_token_accuracy": 0.7615954145789147, "num_tokens": 911867620.0, "step": 13500 }, { "entropy": 0.8119468748569488, "epoch": 4.092706203135651, "grad_norm": 0.12528271973133087, "learning_rate": 8.702908450678088e-05, "loss": 1.2082, "mean_token_accuracy": 0.7680399090051651, "num_tokens": 912557718.0, "step": 13510 }, { "entropy": 0.8165273770689965, "epoch": 4.095735817617208, "grad_norm": 0.13239304721355438, "learning_rate": 8.700663694953253e-05, "loss": 1.2087, "mean_token_accuracy": 0.7682377830147743, "num_tokens": 913259735.0, "step": 13520 }, { "entropy": 0.8079084277153015, "epoch": 4.098765432098766, "grad_norm": 0.1349460929632187, "learning_rate": 8.698417288547282e-05, "loss": 1.2039, "mean_token_accuracy": 0.7641630545258522, "num_tokens": 913931114.0, "step": 13530 }, { "entropy": 0.8251795873045922, "epoch": 4.101795046580323, "grad_norm": 0.13713224232196808, "learning_rate": 8.696169232462186e-05, "loss": 1.2312, "mean_token_accuracy": 0.7643203541636467, "num_tokens": 914605485.0, "step": 13540 }, { "entropy": 0.8222374960780143, "epoch": 4.10482466106188, "grad_norm": 0.1352066695690155, "learning_rate": 8.693919527700712e-05, "loss": 1.228, "mean_token_accuracy": 0.7634329333901405, "num_tokens": 915283506.0, "step": 13550 }, { "entropy": 0.8097446709871292, "epoch": 4.1078542755434375, "grad_norm": 0.13599705696105957, "learning_rate": 8.691668175266339e-05, "loss": 1.2089, "mean_token_accuracy": 0.7686758816242218, "num_tokens": 915964558.0, "step": 13560 }, { "entropy": 0.8166528180241585, "epoch": 4.110883890024994, "grad_norm": 0.1307644248008728, "learning_rate": 8.689415176163284e-05, "loss": 1.2236, "mean_token_accuracy": 0.7618932396173477, "num_tokens": 916628512.0, "step": 13570 }, { "entropy": 0.8150522410869598, "epoch": 4.113913504506551, "grad_norm": 0.14148427546024323, "learning_rate": 8.687160531396497e-05, "loss": 1.2116, "mean_token_accuracy": 0.766238908469677, "num_tokens": 917296021.0, "step": 13580 }, { "entropy": 0.8116999998688698, "epoch": 4.116943118988109, "grad_norm": 0.13924165070056915, "learning_rate": 8.68490424197166e-05, "loss": 1.2225, "mean_token_accuracy": 0.760797917842865, "num_tokens": 917948662.0, "step": 13590 }, { "entropy": 0.8144552662968636, "epoch": 4.119972733469666, "grad_norm": 0.13732901215553284, "learning_rate": 8.682646308895198e-05, "loss": 1.225, "mean_token_accuracy": 0.7634315997362137, "num_tokens": 918612165.0, "step": 13600 }, { "entropy": 0.8196040391921997, "epoch": 4.123002347951223, "grad_norm": 0.1463479995727539, "learning_rate": 8.680386733174252e-05, "loss": 1.2255, "mean_token_accuracy": 0.7612081736326217, "num_tokens": 919268989.0, "step": 13610 }, { "entropy": 0.8091532364487648, "epoch": 4.126031962432781, "grad_norm": 0.1418394297361374, "learning_rate": 8.678125515816715e-05, "loss": 1.2138, "mean_token_accuracy": 0.7636659547686577, "num_tokens": 919930574.0, "step": 13620 }, { "entropy": 0.8077966541051864, "epoch": 4.129061576914338, "grad_norm": 0.13525235652923584, "learning_rate": 8.675862657831198e-05, "loss": 1.2105, "mean_token_accuracy": 0.7673310711979866, "num_tokens": 920608705.0, "step": 13630 }, { "entropy": 0.8101565122604371, "epoch": 4.132091191395895, "grad_norm": 0.14173512160778046, "learning_rate": 8.673598160227048e-05, "loss": 1.2029, "mean_token_accuracy": 0.7678299769759178, "num_tokens": 921299297.0, "step": 13640 }, { "entropy": 0.81236642152071, "epoch": 4.1351208058774525, "grad_norm": 0.13491219282150269, "learning_rate": 8.671332024014347e-05, "loss": 1.2141, "mean_token_accuracy": 0.7643201470375061, "num_tokens": 921969151.0, "step": 13650 }, { "entropy": 0.814178629219532, "epoch": 4.138150420359009, "grad_norm": 0.13452528417110443, "learning_rate": 8.669064250203905e-05, "loss": 1.209, "mean_token_accuracy": 0.7639429673552514, "num_tokens": 922644973.0, "step": 13660 }, { "entropy": 0.8204580396413803, "epoch": 4.141180034840566, "grad_norm": 0.13058091700077057, "learning_rate": 8.666794839807262e-05, "loss": 1.2231, "mean_token_accuracy": 0.7653258740901947, "num_tokens": 923327982.0, "step": 13670 }, { "entropy": 0.8129162982106208, "epoch": 4.144209649322124, "grad_norm": 0.13659673929214478, "learning_rate": 8.664523793836688e-05, "loss": 1.2108, "mean_token_accuracy": 0.7651533886790276, "num_tokens": 924003653.0, "step": 13680 }, { "entropy": 0.8184856981039047, "epoch": 4.147239263803681, "grad_norm": 0.13943171501159668, "learning_rate": 8.662251113305184e-05, "loss": 1.2201, "mean_token_accuracy": 0.7643186420202255, "num_tokens": 924675200.0, "step": 13690 }, { "entropy": 0.810869137942791, "epoch": 4.150268878285238, "grad_norm": 0.13399697840213776, "learning_rate": 8.65997679922648e-05, "loss": 1.2063, "mean_token_accuracy": 0.7663172572851181, "num_tokens": 925345355.0, "step": 13700 }, { "entropy": 0.8122163027524948, "epoch": 4.153298492766796, "grad_norm": 0.17282022535800934, "learning_rate": 8.65770085261503e-05, "loss": 1.2168, "mean_token_accuracy": 0.7658040776848793, "num_tokens": 926015775.0, "step": 13710 }, { "entropy": 0.8136539295315742, "epoch": 4.156328107248353, "grad_norm": 0.1364956945180893, "learning_rate": 8.655423274486023e-05, "loss": 1.2206, "mean_token_accuracy": 0.7697982728481293, "num_tokens": 926704437.0, "step": 13720 }, { "entropy": 0.8150509908795357, "epoch": 4.15935772172991, "grad_norm": 0.13956472277641296, "learning_rate": 8.653144065855374e-05, "loss": 1.219, "mean_token_accuracy": 0.7586035177111625, "num_tokens": 927351346.0, "step": 13730 }, { "entropy": 0.8135612204670906, "epoch": 4.162387336211467, "grad_norm": 0.1430983990430832, "learning_rate": 8.650863227739722e-05, "loss": 1.2204, "mean_token_accuracy": 0.766981391608715, "num_tokens": 928026910.0, "step": 13740 }, { "entropy": 0.795094721019268, "epoch": 4.165416950693024, "grad_norm": 0.13425539433956146, "learning_rate": 8.648580761156434e-05, "loss": 1.206, "mean_token_accuracy": 0.7714309111237526, "num_tokens": 928702950.0, "step": 13750 }, { "entropy": 0.8066852271556855, "epoch": 4.168446565174581, "grad_norm": 0.13715395331382751, "learning_rate": 8.646296667123607e-05, "loss": 1.2129, "mean_token_accuracy": 0.7653296887874603, "num_tokens": 929371855.0, "step": 13760 }, { "entropy": 0.8244137406349182, "epoch": 4.171476179656139, "grad_norm": 0.13420824706554413, "learning_rate": 8.64401094666006e-05, "loss": 1.2228, "mean_token_accuracy": 0.7606052219867706, "num_tokens": 930047999.0, "step": 13770 }, { "entropy": 0.8052446603775024, "epoch": 4.174505794137696, "grad_norm": 0.13088949024677277, "learning_rate": 8.641723600785338e-05, "loss": 1.208, "mean_token_accuracy": 0.7691586732864379, "num_tokens": 930730923.0, "step": 13780 }, { "entropy": 0.80958491563797, "epoch": 4.177535408619253, "grad_norm": 0.1393127143383026, "learning_rate": 8.639434630519712e-05, "loss": 1.2196, "mean_token_accuracy": 0.7635791033506394, "num_tokens": 931400971.0, "step": 13790 }, { "entropy": 0.8069665089249611, "epoch": 4.180565023100811, "grad_norm": 0.16349659860134125, "learning_rate": 8.637144036884178e-05, "loss": 1.2022, "mean_token_accuracy": 0.7647842794656754, "num_tokens": 932073176.0, "step": 13800 }, { "entropy": 0.8113209322094918, "epoch": 4.183594637582368, "grad_norm": 0.1333882361650467, "learning_rate": 8.634851820900455e-05, "loss": 1.224, "mean_token_accuracy": 0.7642734766006469, "num_tokens": 932735046.0, "step": 13810 }, { "entropy": 0.8123429358005524, "epoch": 4.1866242520639245, "grad_norm": 0.13888338208198547, "learning_rate": 8.632557983590985e-05, "loss": 1.2188, "mean_token_accuracy": 0.7666823834180831, "num_tokens": 933411711.0, "step": 13820 }, { "entropy": 0.8132064625620842, "epoch": 4.189653866545482, "grad_norm": 0.14748670160770416, "learning_rate": 8.630262525978937e-05, "loss": 1.2112, "mean_token_accuracy": 0.7644979268312454, "num_tokens": 934080196.0, "step": 13830 }, { "entropy": 0.8083377107977867, "epoch": 4.192683481027039, "grad_norm": 0.13556091487407684, "learning_rate": 8.627965449088196e-05, "loss": 1.2118, "mean_token_accuracy": 0.7660816133022308, "num_tokens": 934758164.0, "step": 13840 }, { "entropy": 0.8108568266034126, "epoch": 4.195713095508596, "grad_norm": 0.1261628419160843, "learning_rate": 8.625666753943375e-05, "loss": 1.2123, "mean_token_accuracy": 0.7633062645792961, "num_tokens": 935419773.0, "step": 13850 }, { "entropy": 0.8159422129392624, "epoch": 4.198742709990154, "grad_norm": 0.13253046572208405, "learning_rate": 8.623366441569806e-05, "loss": 1.2145, "mean_token_accuracy": 0.7646999746561051, "num_tokens": 936101595.0, "step": 13860 }, { "entropy": 0.8183444306254387, "epoch": 4.201772324471711, "grad_norm": 0.14163215458393097, "learning_rate": 8.621064512993548e-05, "loss": 1.2193, "mean_token_accuracy": 0.7625939324498177, "num_tokens": 936769221.0, "step": 13870 }, { "entropy": 0.811541984975338, "epoch": 4.204801938953268, "grad_norm": 0.13868501782417297, "learning_rate": 8.618760969241371e-05, "loss": 1.2109, "mean_token_accuracy": 0.7672978028655052, "num_tokens": 937458407.0, "step": 13880 }, { "entropy": 0.8121904909610749, "epoch": 4.207831553434826, "grad_norm": 0.13706368207931519, "learning_rate": 8.616455811340774e-05, "loss": 1.2108, "mean_token_accuracy": 0.7664934456348419, "num_tokens": 938139899.0, "step": 13890 }, { "entropy": 0.8236121952533721, "epoch": 4.2108611679163825, "grad_norm": 0.13133464753627777, "learning_rate": 8.614149040319968e-05, "loss": 1.2219, "mean_token_accuracy": 0.7693839401006699, "num_tokens": 938837457.0, "step": 13900 }, { "entropy": 0.8124981418251991, "epoch": 4.2138907823979395, "grad_norm": 0.136515811085701, "learning_rate": 8.611840657207895e-05, "loss": 1.2257, "mean_token_accuracy": 0.7673719182610512, "num_tokens": 939510878.0, "step": 13910 }, { "entropy": 0.8288216248154641, "epoch": 4.216920396879497, "grad_norm": 0.1298895627260208, "learning_rate": 8.609530663034204e-05, "loss": 1.2249, "mean_token_accuracy": 0.7594271808862686, "num_tokens": 940182253.0, "step": 13920 }, { "entropy": 0.8282486319541931, "epoch": 4.219950011361054, "grad_norm": 0.1499754935503006, "learning_rate": 8.60721905882927e-05, "loss": 1.2236, "mean_token_accuracy": 0.7609874993562699, "num_tokens": 940851472.0, "step": 13930 }, { "entropy": 0.8192692086100578, "epoch": 4.222979625842611, "grad_norm": 0.13645020127296448, "learning_rate": 8.604905845624184e-05, "loss": 1.2211, "mean_token_accuracy": 0.7661446630954742, "num_tokens": 941531104.0, "step": 13940 }, { "entropy": 0.8111198768019676, "epoch": 4.226009240324169, "grad_norm": 0.1310085952281952, "learning_rate": 8.602591024450757e-05, "loss": 1.2083, "mean_token_accuracy": 0.7674522623419762, "num_tokens": 942213304.0, "step": 13950 }, { "entropy": 0.8215175598859787, "epoch": 4.229038854805726, "grad_norm": 0.15496326982975006, "learning_rate": 8.600274596341511e-05, "loss": 1.2154, "mean_token_accuracy": 0.7666410595178604, "num_tokens": 942905829.0, "step": 13960 }, { "entropy": 0.8067178651690483, "epoch": 4.232068469287283, "grad_norm": 0.13284319639205933, "learning_rate": 8.597956562329691e-05, "loss": 1.207, "mean_token_accuracy": 0.7688114762306213, "num_tokens": 943582951.0, "step": 13970 }, { "entropy": 0.8132088467478752, "epoch": 4.235098083768841, "grad_norm": 0.1390438824892044, "learning_rate": 8.595636923449256e-05, "loss": 1.2129, "mean_token_accuracy": 0.7628922238945961, "num_tokens": 944258139.0, "step": 13980 }, { "entropy": 0.8245077699422836, "epoch": 4.2381276982503975, "grad_norm": 0.1278022974729538, "learning_rate": 8.593315680734881e-05, "loss": 1.2203, "mean_token_accuracy": 0.7631716653704643, "num_tokens": 944938300.0, "step": 13990 }, { "entropy": 0.8103908881545067, "epoch": 4.241157312731955, "grad_norm": 0.15149471163749695, "learning_rate": 8.590992835221955e-05, "loss": 1.2129, "mean_token_accuracy": 0.7649084851145744, "num_tokens": 945609140.0, "step": 14000 }, { "entropy": 0.8094377368688583, "epoch": 4.244186927213512, "grad_norm": 0.1317652463912964, "learning_rate": 8.588668387946586e-05, "loss": 1.2131, "mean_token_accuracy": 0.7669806465506553, "num_tokens": 946282228.0, "step": 14010 }, { "entropy": 0.8111464530229568, "epoch": 4.247216541695069, "grad_norm": 0.1401730626821518, "learning_rate": 8.58634233994559e-05, "loss": 1.2114, "mean_token_accuracy": 0.7699093639850616, "num_tokens": 946973660.0, "step": 14020 }, { "entropy": 0.8090970531105995, "epoch": 4.250246156176627, "grad_norm": 0.13968342542648315, "learning_rate": 8.584014692256506e-05, "loss": 1.2121, "mean_token_accuracy": 0.7649696469306946, "num_tokens": 947645356.0, "step": 14030 }, { "entropy": 0.8118878945708274, "epoch": 4.253275770658184, "grad_norm": 0.14246892929077148, "learning_rate": 8.581685445917578e-05, "loss": 1.2202, "mean_token_accuracy": 0.7633164018392563, "num_tokens": 948311021.0, "step": 14040 }, { "entropy": 0.8142295688390732, "epoch": 4.256305385139741, "grad_norm": 0.12931780517101288, "learning_rate": 8.579354601967766e-05, "loss": 1.212, "mean_token_accuracy": 0.7635192066431046, "num_tokens": 948980277.0, "step": 14050 }, { "entropy": 0.8129866749048233, "epoch": 4.259334999621299, "grad_norm": 0.13530075550079346, "learning_rate": 8.577022161446742e-05, "loss": 1.2063, "mean_token_accuracy": 0.7671345919370651, "num_tokens": 949665068.0, "step": 14060 }, { "entropy": 0.811836202442646, "epoch": 4.2623646141028555, "grad_norm": 0.1332283765077591, "learning_rate": 8.574688125394894e-05, "loss": 1.213, "mean_token_accuracy": 0.7662887141108513, "num_tokens": 950344526.0, "step": 14070 }, { "entropy": 0.817075201869011, "epoch": 4.265394228584412, "grad_norm": 0.13864247500896454, "learning_rate": 8.572352494853319e-05, "loss": 1.2227, "mean_token_accuracy": 0.7603160515427589, "num_tokens": 950998511.0, "step": 14080 }, { "entropy": 0.8112716108560563, "epoch": 4.26842384306597, "grad_norm": 0.14149050414562225, "learning_rate": 8.570015270863822e-05, "loss": 1.2079, "mean_token_accuracy": 0.7637853011488914, "num_tokens": 951673719.0, "step": 14090 }, { "entropy": 0.8229402303695679, "epoch": 4.271453457547527, "grad_norm": 0.1343902349472046, "learning_rate": 8.567676454468925e-05, "loss": 1.226, "mean_token_accuracy": 0.7641852721571922, "num_tokens": 952351935.0, "step": 14100 }, { "entropy": 0.8148226022720337, "epoch": 4.274483072029084, "grad_norm": 0.1378401517868042, "learning_rate": 8.565336046711854e-05, "loss": 1.2222, "mean_token_accuracy": 0.7640312537550926, "num_tokens": 953032820.0, "step": 14110 }, { "entropy": 0.8163686230778694, "epoch": 4.277512686510642, "grad_norm": 0.13915479183197021, "learning_rate": 8.56299404863655e-05, "loss": 1.2077, "mean_token_accuracy": 0.7669281914830208, "num_tokens": 953725056.0, "step": 14120 }, { "entropy": 0.8072247564792633, "epoch": 4.280542300992199, "grad_norm": 0.15108776092529297, "learning_rate": 8.560650461287662e-05, "loss": 1.2135, "mean_token_accuracy": 0.7661883860826493, "num_tokens": 954392636.0, "step": 14130 }, { "entropy": 0.8094939455389977, "epoch": 4.283571915473756, "grad_norm": 0.13692191243171692, "learning_rate": 8.558305285710546e-05, "loss": 1.2155, "mean_token_accuracy": 0.7612668544054031, "num_tokens": 955055043.0, "step": 14140 }, { "entropy": 0.816562020778656, "epoch": 4.2866015299553135, "grad_norm": 0.1471427083015442, "learning_rate": 8.555958522951269e-05, "loss": 1.2206, "mean_token_accuracy": 0.7659133851528168, "num_tokens": 955731236.0, "step": 14150 }, { "entropy": 0.81419358253479, "epoch": 4.28963114443687, "grad_norm": 0.1354820430278778, "learning_rate": 8.553610174056601e-05, "loss": 1.2191, "mean_token_accuracy": 0.7625333324074746, "num_tokens": 956391286.0, "step": 14160 }, { "entropy": 0.8200589090585708, "epoch": 4.292660758918427, "grad_norm": 0.1293133646249771, "learning_rate": 8.551260240074026e-05, "loss": 1.2203, "mean_token_accuracy": 0.7590763792395592, "num_tokens": 957043185.0, "step": 14170 }, { "entropy": 0.8099162101745605, "epoch": 4.295690373399985, "grad_norm": 0.14451120793819427, "learning_rate": 8.548908722051733e-05, "loss": 1.215, "mean_token_accuracy": 0.7641231268644333, "num_tokens": 957713860.0, "step": 14180 }, { "entropy": 0.8195161297917366, "epoch": 4.298719987881542, "grad_norm": 0.12840956449508667, "learning_rate": 8.546555621038613e-05, "loss": 1.2212, "mean_token_accuracy": 0.762025710940361, "num_tokens": 958374608.0, "step": 14190 }, { "entropy": 0.8093711003661156, "epoch": 4.301749602363099, "grad_norm": 0.14335192739963531, "learning_rate": 8.54420093808427e-05, "loss": 1.2084, "mean_token_accuracy": 0.7724227145314216, "num_tokens": 959069213.0, "step": 14200 }, { "entropy": 0.8138993307948112, "epoch": 4.304779216844657, "grad_norm": 0.14377017319202423, "learning_rate": 8.54184467423901e-05, "loss": 1.2102, "mean_token_accuracy": 0.7692980572581292, "num_tokens": 959757398.0, "step": 14210 }, { "entropy": 0.8112579181790351, "epoch": 4.307808831326214, "grad_norm": 0.1374746412038803, "learning_rate": 8.539486830553845e-05, "loss": 1.2086, "mean_token_accuracy": 0.7667512550950051, "num_tokens": 960439284.0, "step": 14220 }, { "entropy": 0.8109935387969017, "epoch": 4.310838445807771, "grad_norm": 0.1405801922082901, "learning_rate": 8.537127408080488e-05, "loss": 1.2115, "mean_token_accuracy": 0.7662670150399208, "num_tokens": 961120781.0, "step": 14230 }, { "entropy": 0.8141834795475006, "epoch": 4.3138680602893285, "grad_norm": 0.13471923768520355, "learning_rate": 8.534766407871362e-05, "loss": 1.2173, "mean_token_accuracy": 0.7663844376802444, "num_tokens": 961788829.0, "step": 14240 }, { "entropy": 0.8285132646560669, "epoch": 4.316897674770885, "grad_norm": 0.1346365064382553, "learning_rate": 8.532403830979591e-05, "loss": 1.2243, "mean_token_accuracy": 0.766338886320591, "num_tokens": 962482284.0, "step": 14250 }, { "entropy": 0.8150933369994163, "epoch": 4.319927289252442, "grad_norm": 0.1380923092365265, "learning_rate": 8.530039678459001e-05, "loss": 1.2102, "mean_token_accuracy": 0.7657512426376343, "num_tokens": 963166506.0, "step": 14260 }, { "entropy": 0.812044833600521, "epoch": 4.322956903734, "grad_norm": 0.12815997004508972, "learning_rate": 8.527673951364126e-05, "loss": 1.2138, "mean_token_accuracy": 0.7688316002488136, "num_tokens": 963851087.0, "step": 14270 }, { "entropy": 0.8042856082320213, "epoch": 4.325986518215557, "grad_norm": 0.13710647821426392, "learning_rate": 8.525306650750196e-05, "loss": 1.2128, "mean_token_accuracy": 0.7665573492646217, "num_tokens": 964516982.0, "step": 14280 }, { "entropy": 0.8157733410596848, "epoch": 4.329016132697114, "grad_norm": 0.13905055820941925, "learning_rate": 8.522937777673144e-05, "loss": 1.2233, "mean_token_accuracy": 0.7639014184474945, "num_tokens": 965184940.0, "step": 14290 }, { "entropy": 0.8091638401150704, "epoch": 4.332045747178672, "grad_norm": 0.1665315479040146, "learning_rate": 8.520567333189608e-05, "loss": 1.2096, "mean_token_accuracy": 0.7671777755022049, "num_tokens": 965856379.0, "step": 14300 }, { "entropy": 0.827835176885128, "epoch": 4.335075361660229, "grad_norm": 0.13923880457878113, "learning_rate": 8.518195318356925e-05, "loss": 1.2216, "mean_token_accuracy": 0.7618249371647835, "num_tokens": 966539254.0, "step": 14310 }, { "entropy": 0.8134009554982186, "epoch": 4.338104976141786, "grad_norm": 0.137948676943779, "learning_rate": 8.515821734233132e-05, "loss": 1.2145, "mean_token_accuracy": 0.7653644904494286, "num_tokens": 967221257.0, "step": 14320 }, { "entropy": 0.8160528019070625, "epoch": 4.341134590623343, "grad_norm": 0.14116792380809784, "learning_rate": 8.513446581876965e-05, "loss": 1.2152, "mean_token_accuracy": 0.7628973737359047, "num_tokens": 967891935.0, "step": 14330 }, { "entropy": 0.8142945170402527, "epoch": 4.3441642051049, "grad_norm": 0.1404132843017578, "learning_rate": 8.511069862347862e-05, "loss": 1.2119, "mean_token_accuracy": 0.7677595824003219, "num_tokens": 968574266.0, "step": 14340 }, { "entropy": 0.822813107073307, "epoch": 4.347193819586457, "grad_norm": 0.1303926408290863, "learning_rate": 8.508691576705959e-05, "loss": 1.2249, "mean_token_accuracy": 0.7640576988458634, "num_tokens": 969257136.0, "step": 14350 }, { "entropy": 0.819758141040802, "epoch": 4.350223434068015, "grad_norm": 0.13424734771251678, "learning_rate": 8.50631172601209e-05, "loss": 1.2139, "mean_token_accuracy": 0.7648958772420883, "num_tokens": 969940681.0, "step": 14360 }, { "entropy": 0.8146367132663727, "epoch": 4.353253048549572, "grad_norm": 0.15804411470890045, "learning_rate": 8.503930311327788e-05, "loss": 1.2141, "mean_token_accuracy": 0.7659523740410805, "num_tokens": 970617776.0, "step": 14370 }, { "entropy": 0.8044301435351372, "epoch": 4.356282663031129, "grad_norm": 0.1360851377248764, "learning_rate": 8.501547333715281e-05, "loss": 1.2156, "mean_token_accuracy": 0.7675289556384086, "num_tokens": 971284578.0, "step": 14380 }, { "entropy": 0.81045912951231, "epoch": 4.359312277512687, "grad_norm": 0.1404189020395279, "learning_rate": 8.499162794237499e-05, "loss": 1.2129, "mean_token_accuracy": 0.7676175624132157, "num_tokens": 971969518.0, "step": 14390 }, { "entropy": 0.8229447811841964, "epoch": 4.362341891994244, "grad_norm": 0.14740420877933502, "learning_rate": 8.496776693958062e-05, "loss": 1.2234, "mean_token_accuracy": 0.7622767046093941, "num_tokens": 972637831.0, "step": 14400 }, { "entropy": 0.8168444901704788, "epoch": 4.365371506475801, "grad_norm": 0.1315646916627884, "learning_rate": 8.494389033941296e-05, "loss": 1.2167, "mean_token_accuracy": 0.7641868308186531, "num_tokens": 973320160.0, "step": 14410 }, { "entropy": 0.810432693362236, "epoch": 4.368401120957358, "grad_norm": 0.1290990114212036, "learning_rate": 8.491999815252212e-05, "loss": 1.2153, "mean_token_accuracy": 0.7621484085917473, "num_tokens": 973987722.0, "step": 14420 }, { "entropy": 0.8143949195742607, "epoch": 4.371430735438915, "grad_norm": 0.1370246559381485, "learning_rate": 8.489609038956523e-05, "loss": 1.2159, "mean_token_accuracy": 0.7636998429894447, "num_tokens": 974650043.0, "step": 14430 }, { "entropy": 0.8091020226478577, "epoch": 4.374460349920472, "grad_norm": 0.14625504612922668, "learning_rate": 8.487216706120634e-05, "loss": 1.2203, "mean_token_accuracy": 0.7658385097980499, "num_tokens": 975320079.0, "step": 14440 }, { "entropy": 0.8171315059065819, "epoch": 4.37748996440203, "grad_norm": 0.1365855187177658, "learning_rate": 8.484822817811644e-05, "loss": 1.2143, "mean_token_accuracy": 0.7674818530678749, "num_tokens": 976001958.0, "step": 14450 }, { "entropy": 0.8118990361690521, "epoch": 4.380519578883587, "grad_norm": 0.1415693610906601, "learning_rate": 8.482427375097351e-05, "loss": 1.2136, "mean_token_accuracy": 0.7660479754209518, "num_tokens": 976689225.0, "step": 14460 }, { "entropy": 0.814107233285904, "epoch": 4.383549193365145, "grad_norm": 0.14036540687084198, "learning_rate": 8.480030379046238e-05, "loss": 1.211, "mean_token_accuracy": 0.7664266467094422, "num_tokens": 977366619.0, "step": 14470 }, { "entropy": 0.8128906980156898, "epoch": 4.386578807846702, "grad_norm": 0.14060281217098236, "learning_rate": 8.477631830727486e-05, "loss": 1.2106, "mean_token_accuracy": 0.762708805501461, "num_tokens": 978036800.0, "step": 14480 }, { "entropy": 0.8185534238815307, "epoch": 4.3896084223282585, "grad_norm": 0.142329141497612, "learning_rate": 8.475231731210971e-05, "loss": 1.2251, "mean_token_accuracy": 0.7630772069096565, "num_tokens": 978705526.0, "step": 14490 }, { "entropy": 0.813590244948864, "epoch": 4.392638036809816, "grad_norm": 0.13582834601402283, "learning_rate": 8.472830081567253e-05, "loss": 1.2102, "mean_token_accuracy": 0.7681605830788613, "num_tokens": 979384960.0, "step": 14500 }, { "entropy": 0.8131711512804032, "epoch": 4.395667651291373, "grad_norm": 0.1515778750181198, "learning_rate": 8.470426882867591e-05, "loss": 1.2193, "mean_token_accuracy": 0.7632424727082252, "num_tokens": 980053631.0, "step": 14510 }, { "entropy": 0.8127683132886887, "epoch": 4.39869726577293, "grad_norm": 0.14411474764347076, "learning_rate": 8.46802213618393e-05, "loss": 1.209, "mean_token_accuracy": 0.7665164932608605, "num_tokens": 980731299.0, "step": 14520 }, { "entropy": 0.8086910650134087, "epoch": 4.401726880254488, "grad_norm": 0.13578078150749207, "learning_rate": 8.465615842588908e-05, "loss": 1.2092, "mean_token_accuracy": 0.7675674170255661, "num_tokens": 981411298.0, "step": 14530 }, { "entropy": 0.8203605756163597, "epoch": 4.404756494736045, "grad_norm": 0.1365104466676712, "learning_rate": 8.463208003155851e-05, "loss": 1.2169, "mean_token_accuracy": 0.7646425098180771, "num_tokens": 982087097.0, "step": 14540 }, { "entropy": 0.8082945555448532, "epoch": 4.407786109217602, "grad_norm": 0.13865475356578827, "learning_rate": 8.460798618958778e-05, "loss": 1.2056, "mean_token_accuracy": 0.7731250032782555, "num_tokens": 982779112.0, "step": 14550 }, { "entropy": 0.8139575839042663, "epoch": 4.41081572369916, "grad_norm": 0.14405213296413422, "learning_rate": 8.458387691072395e-05, "loss": 1.2133, "mean_token_accuracy": 0.7653507739305496, "num_tokens": 983446998.0, "step": 14560 }, { "entropy": 0.8129200100898742, "epoch": 4.413845338180717, "grad_norm": 0.1305195540189743, "learning_rate": 8.455975220572094e-05, "loss": 1.2234, "mean_token_accuracy": 0.7651848569512367, "num_tokens": 984119489.0, "step": 14570 }, { "entropy": 0.8138880968093872, "epoch": 4.4168749526622735, "grad_norm": 0.13817481696605682, "learning_rate": 8.453561208533958e-05, "loss": 1.2174, "mean_token_accuracy": 0.7658104941248893, "num_tokens": 984799393.0, "step": 14580 }, { "entropy": 0.811996553838253, "epoch": 4.419904567143831, "grad_norm": 0.14861634373664856, "learning_rate": 8.45114565603476e-05, "loss": 1.2149, "mean_token_accuracy": 0.7660542353987694, "num_tokens": 985483715.0, "step": 14590 }, { "entropy": 0.8117525488138199, "epoch": 4.422934181625388, "grad_norm": 0.13194222748279572, "learning_rate": 8.448728564151954e-05, "loss": 1.2192, "mean_token_accuracy": 0.7686347112059593, "num_tokens": 986165074.0, "step": 14600 }, { "entropy": 0.8027610033750534, "epoch": 4.425963796106945, "grad_norm": 0.14185863733291626, "learning_rate": 8.446309933963687e-05, "loss": 1.2086, "mean_token_accuracy": 0.7731486231088638, "num_tokens": 986851050.0, "step": 14610 }, { "entropy": 0.8131787851452827, "epoch": 4.428993410588503, "grad_norm": 0.1457170844078064, "learning_rate": 8.443889766548785e-05, "loss": 1.2172, "mean_token_accuracy": 0.7673168465495109, "num_tokens": 987532410.0, "step": 14620 }, { "entropy": 0.8106110647320748, "epoch": 4.43202302507006, "grad_norm": 0.1411815732717514, "learning_rate": 8.441468062986767e-05, "loss": 1.207, "mean_token_accuracy": 0.7676076322793961, "num_tokens": 988204759.0, "step": 14630 }, { "entropy": 0.8080695107579231, "epoch": 4.435052639551617, "grad_norm": 0.14832152426242828, "learning_rate": 8.439044824357833e-05, "loss": 1.2152, "mean_token_accuracy": 0.7709108591079712, "num_tokens": 988895306.0, "step": 14640 }, { "entropy": 0.8206983223557472, "epoch": 4.438082254033175, "grad_norm": 0.13751220703125, "learning_rate": 8.436620051742867e-05, "loss": 1.2135, "mean_token_accuracy": 0.7643748372793198, "num_tokens": 989588875.0, "step": 14650 }, { "entropy": 0.8055402368307114, "epoch": 4.4411118685147315, "grad_norm": 0.13688570261001587, "learning_rate": 8.434193746223443e-05, "loss": 1.2069, "mean_token_accuracy": 0.7714090630412102, "num_tokens": 990272859.0, "step": 14660 }, { "entropy": 0.8120644554495812, "epoch": 4.444141482996288, "grad_norm": 0.1294250637292862, "learning_rate": 8.431765908881811e-05, "loss": 1.2107, "mean_token_accuracy": 0.7651125490665436, "num_tokens": 990949476.0, "step": 14670 }, { "entropy": 0.8165026217699051, "epoch": 4.447171097477846, "grad_norm": 0.14319740235805511, "learning_rate": 8.429336540800908e-05, "loss": 1.2131, "mean_token_accuracy": 0.763539296388626, "num_tokens": 991613763.0, "step": 14680 }, { "entropy": 0.7998925268650054, "epoch": 4.450200711959403, "grad_norm": 0.1286431849002838, "learning_rate": 8.426905643064355e-05, "loss": 1.2086, "mean_token_accuracy": 0.7659187391400337, "num_tokens": 992281176.0, "step": 14690 }, { "entropy": 0.8212928548455238, "epoch": 4.45323032644096, "grad_norm": 0.15050755441188812, "learning_rate": 8.424473216756456e-05, "loss": 1.2217, "mean_token_accuracy": 0.7637400344014168, "num_tokens": 992957794.0, "step": 14700 }, { "entropy": 0.8027153119444848, "epoch": 4.456259940922518, "grad_norm": 0.13645991683006287, "learning_rate": 8.42203926296219e-05, "loss": 1.2167, "mean_token_accuracy": 0.768967580795288, "num_tokens": 993632473.0, "step": 14710 }, { "entropy": 0.8125939905643463, "epoch": 4.459289555404075, "grad_norm": 0.13304193317890167, "learning_rate": 8.419603782767225e-05, "loss": 1.2154, "mean_token_accuracy": 0.7635219246149063, "num_tokens": 994303273.0, "step": 14720 }, { "entropy": 0.8155429273843765, "epoch": 4.462319169885632, "grad_norm": 0.1383727341890335, "learning_rate": 8.417166777257907e-05, "loss": 1.2102, "mean_token_accuracy": 0.7683589532971382, "num_tokens": 994983943.0, "step": 14730 }, { "entropy": 0.8064889863133431, "epoch": 4.4653487843671895, "grad_norm": 0.16005361080169678, "learning_rate": 8.414728247521262e-05, "loss": 1.2068, "mean_token_accuracy": 0.7632562965154648, "num_tokens": 995649623.0, "step": 14740 }, { "entropy": 0.8158274561166763, "epoch": 4.468378398848746, "grad_norm": 0.14108243584632874, "learning_rate": 8.412288194644997e-05, "loss": 1.2134, "mean_token_accuracy": 0.7660295411944389, "num_tokens": 996323907.0, "step": 14750 }, { "entropy": 0.8111421972513199, "epoch": 4.471408013330303, "grad_norm": 0.13207165896892548, "learning_rate": 8.409846619717496e-05, "loss": 1.2112, "mean_token_accuracy": 0.7684741467237473, "num_tokens": 997013381.0, "step": 14760 }, { "entropy": 0.8017989575862885, "epoch": 4.474437627811861, "grad_norm": 0.1371523141860962, "learning_rate": 8.407403523827823e-05, "loss": 1.216, "mean_token_accuracy": 0.7698475867509842, "num_tokens": 997686186.0, "step": 14770 }, { "entropy": 0.8003453806042671, "epoch": 4.477467242293418, "grad_norm": 0.13464106619358063, "learning_rate": 8.404958908065725e-05, "loss": 1.1972, "mean_token_accuracy": 0.7718183264136315, "num_tokens": 998382987.0, "step": 14780 }, { "entropy": 0.8182407349348069, "epoch": 4.480496856774975, "grad_norm": 0.13492155075073242, "learning_rate": 8.40251277352162e-05, "loss": 1.22, "mean_token_accuracy": 0.7610447928309441, "num_tokens": 999061110.0, "step": 14790 }, { "entropy": 0.8050194546580315, "epoch": 4.483526471256533, "grad_norm": 0.13588498532772064, "learning_rate": 8.400065121286607e-05, "loss": 1.2069, "mean_token_accuracy": 0.7692304775118828, "num_tokens": 999740484.0, "step": 14800 }, { "entropy": 0.8066660225391388, "epoch": 4.48655608573809, "grad_norm": 0.15822049975395203, "learning_rate": 8.39761595245246e-05, "loss": 1.2098, "mean_token_accuracy": 0.7670860573649406, "num_tokens": 1000415961.0, "step": 14810 }, { "entropy": 0.810329258441925, "epoch": 4.489585700219647, "grad_norm": 0.14247414469718933, "learning_rate": 8.395165268111632e-05, "loss": 1.2161, "mean_token_accuracy": 0.7617546677589416, "num_tokens": 1001081174.0, "step": 14820 }, { "entropy": 0.8143522471189499, "epoch": 4.4926153147012045, "grad_norm": 0.13341271877288818, "learning_rate": 8.39271306935725e-05, "loss": 1.2172, "mean_token_accuracy": 0.766852393746376, "num_tokens": 1001764471.0, "step": 14830 }, { "entropy": 0.8150116130709648, "epoch": 4.495644929182761, "grad_norm": 0.140299990773201, "learning_rate": 8.390259357283118e-05, "loss": 1.2196, "mean_token_accuracy": 0.7648457616567612, "num_tokens": 1002436289.0, "step": 14840 }, { "entropy": 0.8112609446048736, "epoch": 4.498674543664318, "grad_norm": 0.13958598673343658, "learning_rate": 8.387804132983716e-05, "loss": 1.2127, "mean_token_accuracy": 0.7658768385648728, "num_tokens": 1003110419.0, "step": 14850 }, { "entropy": 0.8080875903367997, "epoch": 4.501704158145876, "grad_norm": 0.13154947757720947, "learning_rate": 8.385347397554195e-05, "loss": 1.2195, "mean_token_accuracy": 0.7668353110551834, "num_tokens": 1003781756.0, "step": 14860 }, { "entropy": 0.8050442636013031, "epoch": 4.504733772627433, "grad_norm": 0.1507265269756317, "learning_rate": 8.382889152090382e-05, "loss": 1.2001, "mean_token_accuracy": 0.7674769267439843, "num_tokens": 1004456980.0, "step": 14870 }, { "entropy": 0.802332204580307, "epoch": 4.507763387108991, "grad_norm": 0.13116209208965302, "learning_rate": 8.380429397688777e-05, "loss": 1.2075, "mean_token_accuracy": 0.7701631054282189, "num_tokens": 1005139514.0, "step": 14880 }, { "entropy": 0.8175762817263603, "epoch": 4.510793001590548, "grad_norm": 0.14305265247821808, "learning_rate": 8.377968135446555e-05, "loss": 1.2113, "mean_token_accuracy": 0.7696104750037194, "num_tokens": 1005837509.0, "step": 14890 }, { "entropy": 0.8158002674579621, "epoch": 4.513822616072105, "grad_norm": 0.14322514832019806, "learning_rate": 8.37550536646156e-05, "loss": 1.2165, "mean_token_accuracy": 0.7621502950787544, "num_tokens": 1006504325.0, "step": 14900 }, { "entropy": 0.8165830463171005, "epoch": 4.516852230553662, "grad_norm": 0.13803119957447052, "learning_rate": 8.373041091832312e-05, "loss": 1.2082, "mean_token_accuracy": 0.7644147738814354, "num_tokens": 1007183989.0, "step": 14910 }, { "entropy": 0.8164145991206169, "epoch": 4.519881845035219, "grad_norm": 0.13508456945419312, "learning_rate": 8.370575312658e-05, "loss": 1.2197, "mean_token_accuracy": 0.7663424417376519, "num_tokens": 1007865080.0, "step": 14920 }, { "entropy": 0.817217543721199, "epoch": 4.522911459516776, "grad_norm": 0.13762493431568146, "learning_rate": 8.368108030038485e-05, "loss": 1.2156, "mean_token_accuracy": 0.7620110109448432, "num_tokens": 1008535404.0, "step": 14930 }, { "entropy": 0.8187590450048446, "epoch": 4.525941073998334, "grad_norm": 0.1280880570411682, "learning_rate": 8.365639245074298e-05, "loss": 1.2058, "mean_token_accuracy": 0.7672760635614395, "num_tokens": 1009224753.0, "step": 14940 }, { "entropy": 0.8178510829806328, "epoch": 4.528970688479891, "grad_norm": 0.1395222544670105, "learning_rate": 8.363168958866642e-05, "loss": 1.2156, "mean_token_accuracy": 0.7642698660492897, "num_tokens": 1009898856.0, "step": 14950 }, { "entropy": 0.8200342208147049, "epoch": 4.532000302961448, "grad_norm": 0.14817102253437042, "learning_rate": 8.360697172517386e-05, "loss": 1.2233, "mean_token_accuracy": 0.7670670941472053, "num_tokens": 1010580410.0, "step": 14960 }, { "entropy": 0.8099397957324982, "epoch": 4.535029917443006, "grad_norm": 0.13535571098327637, "learning_rate": 8.358223887129074e-05, "loss": 1.2117, "mean_token_accuracy": 0.7699789434671402, "num_tokens": 1011272061.0, "step": 14970 }, { "entropy": 0.8149744793772697, "epoch": 4.538059531924563, "grad_norm": 0.13294681906700134, "learning_rate": 8.35574910380491e-05, "loss": 1.2198, "mean_token_accuracy": 0.7701617524027824, "num_tokens": 1011972676.0, "step": 14980 }, { "entropy": 0.8180561020970345, "epoch": 4.54108914640612, "grad_norm": 0.1370512843132019, "learning_rate": 8.353272823648774e-05, "loss": 1.2169, "mean_token_accuracy": 0.7560781911015511, "num_tokens": 1012625462.0, "step": 14990 }, { "entropy": 0.8148312240839004, "epoch": 4.544118760887677, "grad_norm": 0.13839320838451385, "learning_rate": 8.350795047765212e-05, "loss": 1.2109, "mean_token_accuracy": 0.7686563178896904, "num_tokens": 1013318928.0, "step": 15000 }, { "entropy": 0.8100945860147476, "epoch": 4.547148375369234, "grad_norm": 0.14533695578575134, "learning_rate": 8.348315777259433e-05, "loss": 1.2244, "mean_token_accuracy": 0.762176875770092, "num_tokens": 1013973728.0, "step": 15010 }, { "entropy": 0.8118842095136642, "epoch": 4.550177989850791, "grad_norm": 0.138160839676857, "learning_rate": 8.345835013237318e-05, "loss": 1.214, "mean_token_accuracy": 0.7652218982577323, "num_tokens": 1014652356.0, "step": 15020 }, { "entropy": 0.8077614337205887, "epoch": 4.553207604332349, "grad_norm": 0.13375826179981232, "learning_rate": 8.343352756805413e-05, "loss": 1.2085, "mean_token_accuracy": 0.7711880415678024, "num_tokens": 1015334978.0, "step": 15030 }, { "entropy": 0.8138082325458527, "epoch": 4.556237218813906, "grad_norm": 0.14000007510185242, "learning_rate": 8.340869009070924e-05, "loss": 1.224, "mean_token_accuracy": 0.7669972136616707, "num_tokens": 1016006651.0, "step": 15040 }, { "entropy": 0.8117908909916878, "epoch": 4.559266833295463, "grad_norm": 0.13255485892295837, "learning_rate": 8.338383771141731e-05, "loss": 1.2188, "mean_token_accuracy": 0.762118911743164, "num_tokens": 1016668648.0, "step": 15050 }, { "entropy": 0.8168422803282738, "epoch": 4.562296447777021, "grad_norm": 0.13548730313777924, "learning_rate": 8.335897044126372e-05, "loss": 1.2194, "mean_token_accuracy": 0.7666918858885765, "num_tokens": 1017347166.0, "step": 15060 }, { "entropy": 0.814100868999958, "epoch": 4.565326062258578, "grad_norm": 0.14705607295036316, "learning_rate": 8.333408829134055e-05, "loss": 1.2156, "mean_token_accuracy": 0.7637822508811951, "num_tokens": 1018016924.0, "step": 15070 }, { "entropy": 0.8101621180772781, "epoch": 4.5683556767401345, "grad_norm": 0.139455646276474, "learning_rate": 8.330919127274643e-05, "loss": 1.2143, "mean_token_accuracy": 0.7600699231028557, "num_tokens": 1018678644.0, "step": 15080 }, { "entropy": 0.8048439994454384, "epoch": 4.571385291221692, "grad_norm": 0.14077451825141907, "learning_rate": 8.328427939658672e-05, "loss": 1.2094, "mean_token_accuracy": 0.7688601195812226, "num_tokens": 1019365627.0, "step": 15090 }, { "entropy": 0.8014249309897423, "epoch": 4.574414905703249, "grad_norm": 0.13245782256126404, "learning_rate": 8.325935267397333e-05, "loss": 1.1945, "mean_token_accuracy": 0.7756657838821411, "num_tokens": 1020066102.0, "step": 15100 }, { "entropy": 0.8136430740356445, "epoch": 4.577444520184806, "grad_norm": 0.14787311851978302, "learning_rate": 8.323441111602486e-05, "loss": 1.2117, "mean_token_accuracy": 0.7652172610163689, "num_tokens": 1020741192.0, "step": 15110 }, { "entropy": 0.8075628504157066, "epoch": 4.580474134666364, "grad_norm": 0.13490545749664307, "learning_rate": 8.320945473386647e-05, "loss": 1.2085, "mean_token_accuracy": 0.7699533253908157, "num_tokens": 1021434348.0, "step": 15120 }, { "entropy": 0.8200042188167572, "epoch": 4.583503749147921, "grad_norm": 0.1354069709777832, "learning_rate": 8.318448353862995e-05, "loss": 1.2175, "mean_token_accuracy": 0.7627965018153191, "num_tokens": 1022095966.0, "step": 15130 }, { "entropy": 0.8047011941671371, "epoch": 4.586533363629478, "grad_norm": 0.13225625455379486, "learning_rate": 8.315949754145372e-05, "loss": 1.2077, "mean_token_accuracy": 0.7679173469543457, "num_tokens": 1022772442.0, "step": 15140 }, { "entropy": 0.8104792013764381, "epoch": 4.589562978111036, "grad_norm": 0.13480982184410095, "learning_rate": 8.313449675348278e-05, "loss": 1.2048, "mean_token_accuracy": 0.76620062738657, "num_tokens": 1023443677.0, "step": 15150 }, { "entropy": 0.8011610880494118, "epoch": 4.592592592592593, "grad_norm": 0.13761554658412933, "learning_rate": 8.310948118586872e-05, "loss": 1.2054, "mean_token_accuracy": 0.7683380633592606, "num_tokens": 1024113965.0, "step": 15160 }, { "entropy": 0.8223711282014847, "epoch": 4.5956222070741495, "grad_norm": 0.1307128667831421, "learning_rate": 8.308445084976977e-05, "loss": 1.2176, "mean_token_accuracy": 0.7671125277876853, "num_tokens": 1024789279.0, "step": 15170 }, { "entropy": 0.7965401828289032, "epoch": 4.598651821555707, "grad_norm": 0.13591811060905457, "learning_rate": 8.305940575635064e-05, "loss": 1.2049, "mean_token_accuracy": 0.7685336083173752, "num_tokens": 1025462406.0, "step": 15180 }, { "entropy": 0.8169444009661675, "epoch": 4.601681436037264, "grad_norm": 0.13895291090011597, "learning_rate": 8.303434591678279e-05, "loss": 1.2171, "mean_token_accuracy": 0.7644126906991004, "num_tokens": 1026134747.0, "step": 15190 }, { "entropy": 0.8088120698928833, "epoch": 4.604711050518821, "grad_norm": 0.13839735090732574, "learning_rate": 8.300927134224409e-05, "loss": 1.2119, "mean_token_accuracy": 0.7721304342150688, "num_tokens": 1026824488.0, "step": 15200 }, { "entropy": 0.8069801807403565, "epoch": 4.607740665000379, "grad_norm": 0.14297841489315033, "learning_rate": 8.298418204391907e-05, "loss": 1.2093, "mean_token_accuracy": 0.7674991518259049, "num_tokens": 1027506067.0, "step": 15210 }, { "entropy": 0.8225396156311036, "epoch": 4.610770279481936, "grad_norm": 0.1426960974931717, "learning_rate": 8.295907803299883e-05, "loss": 1.2168, "mean_token_accuracy": 0.7645534157752991, "num_tokens": 1028192598.0, "step": 15220 }, { "entropy": 0.8128435805439949, "epoch": 4.613799893963493, "grad_norm": 0.1445625275373459, "learning_rate": 8.293395932068101e-05, "loss": 1.2113, "mean_token_accuracy": 0.7692129746079445, "num_tokens": 1028883590.0, "step": 15230 }, { "entropy": 0.8124251589179039, "epoch": 4.616829508445051, "grad_norm": 0.14045129716396332, "learning_rate": 8.290882591816979e-05, "loss": 1.2158, "mean_token_accuracy": 0.7653728276491165, "num_tokens": 1029554945.0, "step": 15240 }, { "entropy": 0.8150558784604073, "epoch": 4.6198591229266075, "grad_norm": 0.15508978068828583, "learning_rate": 8.288367783667596e-05, "loss": 1.2166, "mean_token_accuracy": 0.7606960788369179, "num_tokens": 1030212555.0, "step": 15250 }, { "entropy": 0.8126764863729476, "epoch": 4.622888737408164, "grad_norm": 0.14180020987987518, "learning_rate": 8.285851508741678e-05, "loss": 1.2126, "mean_token_accuracy": 0.7690615206956863, "num_tokens": 1030891731.0, "step": 15260 }, { "entropy": 0.8112331315875053, "epoch": 4.625918351889722, "grad_norm": 0.13965746760368347, "learning_rate": 8.283333768161615e-05, "loss": 1.2261, "mean_token_accuracy": 0.762207692861557, "num_tokens": 1031547425.0, "step": 15270 }, { "entropy": 0.8134624063968658, "epoch": 4.628947966371279, "grad_norm": 0.13768364489078522, "learning_rate": 8.280814563050438e-05, "loss": 1.2249, "mean_token_accuracy": 0.7601321175694465, "num_tokens": 1032206076.0, "step": 15280 }, { "entropy": 0.8125778660178185, "epoch": 4.631977580852837, "grad_norm": 0.14565123617649078, "learning_rate": 8.278293894531845e-05, "loss": 1.2119, "mean_token_accuracy": 0.7681752011179924, "num_tokens": 1032888534.0, "step": 15290 }, { "entropy": 0.8162658482789993, "epoch": 4.635007195334394, "grad_norm": 0.14811156690120697, "learning_rate": 8.275771763730178e-05, "loss": 1.2153, "mean_token_accuracy": 0.7628666043281556, "num_tokens": 1033563510.0, "step": 15300 }, { "entropy": 0.8310571193695069, "epoch": 4.638036809815951, "grad_norm": 0.14319728314876556, "learning_rate": 8.273248171770433e-05, "loss": 1.2266, "mean_token_accuracy": 0.7628302797675133, "num_tokens": 1034248909.0, "step": 15310 }, { "entropy": 0.8047871500253677, "epoch": 4.641066424297508, "grad_norm": 0.13362020254135132, "learning_rate": 8.270723119778259e-05, "loss": 1.2134, "mean_token_accuracy": 0.7651651829481125, "num_tokens": 1034914348.0, "step": 15320 }, { "entropy": 0.8066971078515053, "epoch": 4.6440960387790655, "grad_norm": 0.1409444361925125, "learning_rate": 8.268196608879954e-05, "loss": 1.1975, "mean_token_accuracy": 0.7695615276694298, "num_tokens": 1035599061.0, "step": 15330 }, { "entropy": 0.8144468575716018, "epoch": 4.647125653260622, "grad_norm": 0.13992279767990112, "learning_rate": 8.265668640202471e-05, "loss": 1.2152, "mean_token_accuracy": 0.7671654686331749, "num_tokens": 1036283892.0, "step": 15340 }, { "entropy": 0.8210369318723678, "epoch": 4.65015526774218, "grad_norm": 0.1452576220035553, "learning_rate": 8.263139214873406e-05, "loss": 1.2228, "mean_token_accuracy": 0.7624813586473465, "num_tokens": 1036952450.0, "step": 15350 }, { "entropy": 0.8076359361410141, "epoch": 4.653184882223737, "grad_norm": 0.15487810969352722, "learning_rate": 8.260608334021016e-05, "loss": 1.2064, "mean_token_accuracy": 0.768352934718132, "num_tokens": 1037632946.0, "step": 15360 }, { "entropy": 0.8118151858448982, "epoch": 4.656214496705294, "grad_norm": 0.13978509604930878, "learning_rate": 8.258075998774193e-05, "loss": 1.2127, "mean_token_accuracy": 0.7646442472934722, "num_tokens": 1038295444.0, "step": 15370 }, { "entropy": 0.8052182763814926, "epoch": 4.659244111186851, "grad_norm": 0.14350928366184235, "learning_rate": 8.25554221026249e-05, "loss": 1.2111, "mean_token_accuracy": 0.7669170662760735, "num_tokens": 1038968930.0, "step": 15380 }, { "entropy": 0.8079338103532792, "epoch": 4.662273725668409, "grad_norm": 0.14178936183452606, "learning_rate": 8.253006969616104e-05, "loss": 1.2074, "mean_token_accuracy": 0.7662130638957023, "num_tokens": 1039646283.0, "step": 15390 }, { "entropy": 0.8065927296876907, "epoch": 4.665303340149966, "grad_norm": 0.14469890296459198, "learning_rate": 8.250470277965876e-05, "loss": 1.2163, "mean_token_accuracy": 0.7637876242399215, "num_tokens": 1040309806.0, "step": 15400 }, { "entropy": 0.8208244100213051, "epoch": 4.6683329546315235, "grad_norm": 0.1422407031059265, "learning_rate": 8.247932136443298e-05, "loss": 1.2144, "mean_token_accuracy": 0.763920234143734, "num_tokens": 1040980401.0, "step": 15410 }, { "entropy": 0.8125808432698249, "epoch": 4.6713625691130805, "grad_norm": 0.14303629100322723, "learning_rate": 8.24539254618051e-05, "loss": 1.2171, "mean_token_accuracy": 0.7659008145332337, "num_tokens": 1041651725.0, "step": 15420 }, { "entropy": 0.8139560624957085, "epoch": 4.674392183594637, "grad_norm": 0.13927346467971802, "learning_rate": 8.242851508310294e-05, "loss": 1.2167, "mean_token_accuracy": 0.7657284542918206, "num_tokens": 1042328003.0, "step": 15430 }, { "entropy": 0.8144581004977226, "epoch": 4.677421798076195, "grad_norm": 0.14626960456371307, "learning_rate": 8.240309023966081e-05, "loss": 1.2151, "mean_token_accuracy": 0.7684321507811547, "num_tokens": 1043023342.0, "step": 15440 }, { "entropy": 0.8181347772479057, "epoch": 4.680451412557752, "grad_norm": 0.13941973447799683, "learning_rate": 8.237765094281948e-05, "loss": 1.2173, "mean_token_accuracy": 0.7630537569522857, "num_tokens": 1043689394.0, "step": 15450 }, { "entropy": 0.809430742263794, "epoch": 4.683481027039309, "grad_norm": 0.14009128510951996, "learning_rate": 8.235219720392611e-05, "loss": 1.22, "mean_token_accuracy": 0.7659080073237419, "num_tokens": 1044355961.0, "step": 15460 }, { "entropy": 0.8158665135502815, "epoch": 4.686510641520867, "grad_norm": 0.1422731578350067, "learning_rate": 8.232672903433438e-05, "loss": 1.2192, "mean_token_accuracy": 0.7604943737387657, "num_tokens": 1045016913.0, "step": 15470 }, { "entropy": 0.816311040520668, "epoch": 4.689540256002424, "grad_norm": 0.12189248949289322, "learning_rate": 8.230124644540434e-05, "loss": 1.2149, "mean_token_accuracy": 0.7640485346317292, "num_tokens": 1045695457.0, "step": 15480 }, { "entropy": 0.8113370686769485, "epoch": 4.692569870483981, "grad_norm": 0.13859044015407562, "learning_rate": 8.227574944850252e-05, "loss": 1.2117, "mean_token_accuracy": 0.7680921629071236, "num_tokens": 1046381557.0, "step": 15490 }, { "entropy": 0.8088567674160003, "epoch": 4.6955994849655385, "grad_norm": 0.14636880159378052, "learning_rate": 8.225023805500184e-05, "loss": 1.2158, "mean_token_accuracy": 0.765949147939682, "num_tokens": 1047053551.0, "step": 15500 }, { "entropy": 0.8066520035266876, "epoch": 4.698629099447095, "grad_norm": 0.13433587551116943, "learning_rate": 8.222471227628166e-05, "loss": 1.2054, "mean_token_accuracy": 0.767923741042614, "num_tokens": 1047729798.0, "step": 15510 }, { "entropy": 0.8008198201656341, "epoch": 4.701658713928652, "grad_norm": 0.13879390060901642, "learning_rate": 8.219917212372776e-05, "loss": 1.2056, "mean_token_accuracy": 0.7654800340533257, "num_tokens": 1048386762.0, "step": 15520 }, { "entropy": 0.8036904603242874, "epoch": 4.70468832841021, "grad_norm": 0.14773288369178772, "learning_rate": 8.217361760873233e-05, "loss": 1.2092, "mean_token_accuracy": 0.7680824026465416, "num_tokens": 1049058279.0, "step": 15530 }, { "entropy": 0.8194151535630226, "epoch": 4.707717942891767, "grad_norm": 0.14728638529777527, "learning_rate": 8.214804874269395e-05, "loss": 1.2201, "mean_token_accuracy": 0.7612240836024284, "num_tokens": 1049721734.0, "step": 15540 }, { "entropy": 0.8121907562017441, "epoch": 4.710747557373324, "grad_norm": 0.14110548794269562, "learning_rate": 8.212246553701764e-05, "loss": 1.2169, "mean_token_accuracy": 0.7651133015751839, "num_tokens": 1050385605.0, "step": 15550 }, { "entropy": 0.814926028251648, "epoch": 4.713777171854882, "grad_norm": 0.13291309773921967, "learning_rate": 8.209686800311473e-05, "loss": 1.2149, "mean_token_accuracy": 0.763114994764328, "num_tokens": 1051072736.0, "step": 15560 }, { "entropy": 0.8138626217842102, "epoch": 4.716806786336439, "grad_norm": 0.12934871017932892, "learning_rate": 8.207125615240307e-05, "loss": 1.2142, "mean_token_accuracy": 0.769535356760025, "num_tokens": 1051764345.0, "step": 15570 }, { "entropy": 0.8107445687055588, "epoch": 4.719836400817996, "grad_norm": 0.13547451794147491, "learning_rate": 8.20456299963068e-05, "loss": 1.2107, "mean_token_accuracy": 0.764206412434578, "num_tokens": 1052431484.0, "step": 15580 }, { "entropy": 0.8193677708506584, "epoch": 4.722866015299553, "grad_norm": 0.1325022578239441, "learning_rate": 8.201998954625645e-05, "loss": 1.2149, "mean_token_accuracy": 0.7634567454457283, "num_tokens": 1053100447.0, "step": 15590 }, { "entropy": 0.8121628552675247, "epoch": 4.72589562978111, "grad_norm": 0.1375740021467209, "learning_rate": 8.199433481368895e-05, "loss": 1.2052, "mean_token_accuracy": 0.769801427423954, "num_tokens": 1053790415.0, "step": 15600 }, { "entropy": 0.8133607849478721, "epoch": 4.728925244262667, "grad_norm": 0.14360255002975464, "learning_rate": 8.196866581004761e-05, "loss": 1.2173, "mean_token_accuracy": 0.7652547299861908, "num_tokens": 1054454432.0, "step": 15610 }, { "entropy": 0.8189873456954956, "epoch": 4.731954858744225, "grad_norm": 0.1521763801574707, "learning_rate": 8.194298254678208e-05, "loss": 1.2289, "mean_token_accuracy": 0.761813010275364, "num_tokens": 1055127197.0, "step": 15620 }, { "entropy": 0.8223283037543296, "epoch": 4.734984473225782, "grad_norm": 0.13551150262355804, "learning_rate": 8.191728503534839e-05, "loss": 1.2177, "mean_token_accuracy": 0.7663288027048111, "num_tokens": 1055815660.0, "step": 15630 }, { "entropy": 0.8093197181820869, "epoch": 4.738014087707339, "grad_norm": 0.13915053009986877, "learning_rate": 8.189157328720889e-05, "loss": 1.2076, "mean_token_accuracy": 0.7658779978752136, "num_tokens": 1056486953.0, "step": 15640 }, { "entropy": 0.8062524825334549, "epoch": 4.741043702188897, "grad_norm": 0.13573576509952545, "learning_rate": 8.186584731383233e-05, "loss": 1.208, "mean_token_accuracy": 0.770762488245964, "num_tokens": 1057173002.0, "step": 15650 }, { "entropy": 0.8034259334206582, "epoch": 4.744073316670454, "grad_norm": 0.14011842012405396, "learning_rate": 8.184010712669378e-05, "loss": 1.2152, "mean_token_accuracy": 0.7674999088048935, "num_tokens": 1057840865.0, "step": 15660 }, { "entropy": 0.8159258872270584, "epoch": 4.7471029311520105, "grad_norm": 0.15081876516342163, "learning_rate": 8.181435273727464e-05, "loss": 1.2176, "mean_token_accuracy": 0.7630300164222718, "num_tokens": 1058499702.0, "step": 15670 }, { "entropy": 0.8117739945650101, "epoch": 4.750132545633568, "grad_norm": 0.1396917849779129, "learning_rate": 8.178858415706266e-05, "loss": 1.2143, "mean_token_accuracy": 0.7646714493632316, "num_tokens": 1059172595.0, "step": 15680 }, { "entropy": 0.8117744654417038, "epoch": 4.753162160115125, "grad_norm": 0.13941623270511627, "learning_rate": 8.176280139755191e-05, "loss": 1.2087, "mean_token_accuracy": 0.764184258878231, "num_tokens": 1059845063.0, "step": 15690 }, { "entropy": 0.8059625402092934, "epoch": 4.756191774596682, "grad_norm": 0.140048086643219, "learning_rate": 8.17370044702428e-05, "loss": 1.2122, "mean_token_accuracy": 0.7721580326557159, "num_tokens": 1060530286.0, "step": 15700 }, { "entropy": 0.8040213257074356, "epoch": 4.75922138907824, "grad_norm": 0.1433405876159668, "learning_rate": 8.171119338664206e-05, "loss": 1.2035, "mean_token_accuracy": 0.7708293154835701, "num_tokens": 1061220258.0, "step": 15710 }, { "entropy": 0.8147042289376258, "epoch": 4.762251003559797, "grad_norm": 0.1303349882364273, "learning_rate": 8.168536815826271e-05, "loss": 1.2179, "mean_token_accuracy": 0.7646678447723388, "num_tokens": 1061898600.0, "step": 15720 }, { "entropy": 0.8121464133262635, "epoch": 4.765280618041354, "grad_norm": 0.13437813520431519, "learning_rate": 8.165952879662408e-05, "loss": 1.2082, "mean_token_accuracy": 0.7666034877300263, "num_tokens": 1062586608.0, "step": 15730 }, { "entropy": 0.8070258751511574, "epoch": 4.768310232522912, "grad_norm": 0.13249161839485168, "learning_rate": 8.163367531325186e-05, "loss": 1.2061, "mean_token_accuracy": 0.7662536963820458, "num_tokens": 1063278022.0, "step": 15740 }, { "entropy": 0.8094275057315826, "epoch": 4.771339847004469, "grad_norm": 0.1256585270166397, "learning_rate": 8.160780771967796e-05, "loss": 1.2113, "mean_token_accuracy": 0.7595454663038254, "num_tokens": 1063935349.0, "step": 15750 }, { "entropy": 0.8062768653035164, "epoch": 4.774369461486026, "grad_norm": 0.13668043911457062, "learning_rate": 8.158192602744062e-05, "loss": 1.2145, "mean_token_accuracy": 0.7655957818031311, "num_tokens": 1064602766.0, "step": 15760 }, { "entropy": 0.8141536340117455, "epoch": 4.777399075967583, "grad_norm": 0.1468982696533203, "learning_rate": 8.155603024808438e-05, "loss": 1.2121, "mean_token_accuracy": 0.7617022007703781, "num_tokens": 1065259235.0, "step": 15770 }, { "entropy": 0.8113908216357231, "epoch": 4.78042869044914, "grad_norm": 0.13862980902194977, "learning_rate": 8.153012039316005e-05, "loss": 1.2126, "mean_token_accuracy": 0.7633839502930642, "num_tokens": 1065922337.0, "step": 15780 }, { "entropy": 0.8185302555561066, "epoch": 4.783458304930697, "grad_norm": 0.14436276257038116, "learning_rate": 8.150419647422473e-05, "loss": 1.2222, "mean_token_accuracy": 0.7613570883870124, "num_tokens": 1066581990.0, "step": 15790 }, { "entropy": 0.8092867791652679, "epoch": 4.786487919412255, "grad_norm": 0.13496606051921844, "learning_rate": 8.14782585028418e-05, "loss": 1.2158, "mean_token_accuracy": 0.7661177188158035, "num_tokens": 1067256000.0, "step": 15800 }, { "entropy": 0.7992815613746643, "epoch": 4.789517533893812, "grad_norm": 0.14180412888526917, "learning_rate": 8.145230649058085e-05, "loss": 1.2011, "mean_token_accuracy": 0.7731818363070488, "num_tokens": 1067944433.0, "step": 15810 }, { "entropy": 0.8109717398881913, "epoch": 4.79254714837537, "grad_norm": 0.13364098966121674, "learning_rate": 8.14263404490178e-05, "loss": 1.2052, "mean_token_accuracy": 0.7660579293966293, "num_tokens": 1068615734.0, "step": 15820 }, { "entropy": 0.8127626776695251, "epoch": 4.795576762856927, "grad_norm": 0.14590229094028473, "learning_rate": 8.140036038973482e-05, "loss": 1.2074, "mean_token_accuracy": 0.7680396258831024, "num_tokens": 1069300896.0, "step": 15830 }, { "entropy": 0.8128359496593476, "epoch": 4.7986063773384835, "grad_norm": 0.15021905303001404, "learning_rate": 8.137436632432027e-05, "loss": 1.2202, "mean_token_accuracy": 0.7599052727222443, "num_tokens": 1069962948.0, "step": 15840 }, { "entropy": 0.8056706175208092, "epoch": 4.80163599182004, "grad_norm": 0.1618979126214981, "learning_rate": 8.134835826436886e-05, "loss": 1.2154, "mean_token_accuracy": 0.7665325850248337, "num_tokens": 1070628681.0, "step": 15850 }, { "entropy": 0.8111644566059113, "epoch": 4.804665606301598, "grad_norm": 0.13437557220458984, "learning_rate": 8.132233622148145e-05, "loss": 1.2176, "mean_token_accuracy": 0.7641709834337235, "num_tokens": 1071291345.0, "step": 15860 }, { "entropy": 0.8157501295208931, "epoch": 4.807695220783155, "grad_norm": 0.13300754129886627, "learning_rate": 8.129630020726516e-05, "loss": 1.2182, "mean_token_accuracy": 0.7630707338452339, "num_tokens": 1071959301.0, "step": 15870 }, { "entropy": 0.8204719394445419, "epoch": 4.810724835264713, "grad_norm": 0.1367742419242859, "learning_rate": 8.127025023333339e-05, "loss": 1.2218, "mean_token_accuracy": 0.7607513844966889, "num_tokens": 1072630676.0, "step": 15880 }, { "entropy": 0.8045290946960449, "epoch": 4.81375444974627, "grad_norm": 0.1460738480091095, "learning_rate": 8.124418631130572e-05, "loss": 1.202, "mean_token_accuracy": 0.7666138365864754, "num_tokens": 1073309078.0, "step": 15890 }, { "entropy": 0.8093014433979988, "epoch": 4.816784064227827, "grad_norm": 0.14502641558647156, "learning_rate": 8.121810845280795e-05, "loss": 1.2075, "mean_token_accuracy": 0.7641166388988495, "num_tokens": 1073976360.0, "step": 15900 }, { "entropy": 0.8139350295066834, "epoch": 4.819813678709385, "grad_norm": 0.13717105984687805, "learning_rate": 8.119201666947212e-05, "loss": 1.2153, "mean_token_accuracy": 0.7647068202495575, "num_tokens": 1074654333.0, "step": 15910 }, { "entropy": 0.800288000702858, "epoch": 4.8228432931909415, "grad_norm": 0.13237711787223816, "learning_rate": 8.116591097293649e-05, "loss": 1.2013, "mean_token_accuracy": 0.7709699377417565, "num_tokens": 1075345241.0, "step": 15920 }, { "entropy": 0.8054830610752106, "epoch": 4.825872907672498, "grad_norm": 0.1496029943227768, "learning_rate": 8.113979137484547e-05, "loss": 1.2128, "mean_token_accuracy": 0.7653148591518402, "num_tokens": 1076010481.0, "step": 15930 }, { "entropy": 0.8229808688163758, "epoch": 4.828902522154056, "grad_norm": 0.13943567872047424, "learning_rate": 8.111365788684973e-05, "loss": 1.2253, "mean_token_accuracy": 0.7621565163135529, "num_tokens": 1076684694.0, "step": 15940 }, { "entropy": 0.8060909911990166, "epoch": 4.831932136635613, "grad_norm": 0.14224740862846375, "learning_rate": 8.108751052060609e-05, "loss": 1.1991, "mean_token_accuracy": 0.7679936498403549, "num_tokens": 1077364793.0, "step": 15950 }, { "entropy": 0.8114812031388283, "epoch": 4.83496175111717, "grad_norm": 0.14297319948673248, "learning_rate": 8.106134928777762e-05, "loss": 1.2151, "mean_token_accuracy": 0.7653910368680954, "num_tokens": 1078046079.0, "step": 15960 }, { "entropy": 0.8102113500237464, "epoch": 4.837991365598728, "grad_norm": 0.13165238499641418, "learning_rate": 8.103517420003351e-05, "loss": 1.2126, "mean_token_accuracy": 0.7717357322573661, "num_tokens": 1078737750.0, "step": 15970 }, { "entropy": 0.8052035465836525, "epoch": 4.841020980080285, "grad_norm": 0.14232152700424194, "learning_rate": 8.100898526904916e-05, "loss": 1.1977, "mean_token_accuracy": 0.7688471049070358, "num_tokens": 1079427959.0, "step": 15980 }, { "entropy": 0.8029644057154656, "epoch": 4.844050594561842, "grad_norm": 0.13276511430740356, "learning_rate": 8.098278250650616e-05, "loss": 1.2066, "mean_token_accuracy": 0.7717874437570572, "num_tokens": 1080122748.0, "step": 15990 }, { "entropy": 0.8129354000091553, "epoch": 4.8470802090433995, "grad_norm": 0.13995599746704102, "learning_rate": 8.095656592409224e-05, "loss": 1.2177, "mean_token_accuracy": 0.7612034097313881, "num_tokens": 1080781107.0, "step": 16000 }, { "entropy": 0.8003242433071136, "epoch": 4.8501098235249565, "grad_norm": 0.13097521662712097, "learning_rate": 8.09303355335013e-05, "loss": 1.2095, "mean_token_accuracy": 0.7696071028709411, "num_tokens": 1081464992.0, "step": 16010 }, { "entropy": 0.8115729466080666, "epoch": 4.853139438006513, "grad_norm": 0.14724335074424744, "learning_rate": 8.090409134643344e-05, "loss": 1.2132, "mean_token_accuracy": 0.7653060346841812, "num_tokens": 1082140319.0, "step": 16020 }, { "entropy": 0.8205862417817116, "epoch": 4.856169052488071, "grad_norm": 0.15980929136276245, "learning_rate": 8.087783337459483e-05, "loss": 1.2209, "mean_token_accuracy": 0.763490405678749, "num_tokens": 1082820861.0, "step": 16030 }, { "entropy": 0.8085527703166008, "epoch": 4.859198666969628, "grad_norm": 0.13519975543022156, "learning_rate": 8.08515616296979e-05, "loss": 1.2044, "mean_token_accuracy": 0.7669231325387955, "num_tokens": 1083495114.0, "step": 16040 }, { "entropy": 0.8118745133280754, "epoch": 4.862228281451185, "grad_norm": 0.1396489143371582, "learning_rate": 8.082527612346109e-05, "loss": 1.2027, "mean_token_accuracy": 0.7672496333718299, "num_tokens": 1084181821.0, "step": 16050 }, { "entropy": 0.8091390267014503, "epoch": 4.865257895932743, "grad_norm": 0.14365805685520172, "learning_rate": 8.079897686760911e-05, "loss": 1.2156, "mean_token_accuracy": 0.7642460703849793, "num_tokens": 1084848903.0, "step": 16060 }, { "entropy": 0.8168342247605324, "epoch": 4.8682875104143, "grad_norm": 0.1340799331665039, "learning_rate": 8.077266387387274e-05, "loss": 1.2185, "mean_token_accuracy": 0.7625942230224609, "num_tokens": 1085517417.0, "step": 16070 }, { "entropy": 0.8122792810201644, "epoch": 4.871317124895857, "grad_norm": 0.13644464313983917, "learning_rate": 8.074633715398885e-05, "loss": 1.208, "mean_token_accuracy": 0.7683076694607734, "num_tokens": 1086199795.0, "step": 16080 }, { "entropy": 0.8055481418967247, "epoch": 4.8743467393774145, "grad_norm": 0.14579367637634277, "learning_rate": 8.07199967197005e-05, "loss": 1.2053, "mean_token_accuracy": 0.7688647583127022, "num_tokens": 1086882422.0, "step": 16090 }, { "entropy": 0.8040731713175774, "epoch": 4.877376353858971, "grad_norm": 0.14209434390068054, "learning_rate": 8.069364258275686e-05, "loss": 1.2103, "mean_token_accuracy": 0.7682266667485237, "num_tokens": 1087554619.0, "step": 16100 }, { "entropy": 0.813388267159462, "epoch": 4.880405968340528, "grad_norm": 0.14394550025463104, "learning_rate": 8.066727475491316e-05, "loss": 1.2162, "mean_token_accuracy": 0.7615847617387772, "num_tokens": 1088225692.0, "step": 16110 }, { "entropy": 0.8020743995904922, "epoch": 4.883435582822086, "grad_norm": 0.13503175973892212, "learning_rate": 8.064089324793078e-05, "loss": 1.209, "mean_token_accuracy": 0.7675877928733825, "num_tokens": 1088893457.0, "step": 16120 }, { "entropy": 0.8262182638049126, "epoch": 4.886465197303643, "grad_norm": 0.13238561153411865, "learning_rate": 8.06144980735772e-05, "loss": 1.2207, "mean_token_accuracy": 0.7621331796050071, "num_tokens": 1089560857.0, "step": 16130 }, { "entropy": 0.8153786823153496, "epoch": 4.8894948117852, "grad_norm": 0.14539356529712677, "learning_rate": 8.058808924362598e-05, "loss": 1.2161, "mean_token_accuracy": 0.7684778615832328, "num_tokens": 1090242445.0, "step": 16140 }, { "entropy": 0.8062364771962166, "epoch": 4.892524426266758, "grad_norm": 0.147408589720726, "learning_rate": 8.056166676985679e-05, "loss": 1.21, "mean_token_accuracy": 0.7645332485437393, "num_tokens": 1090914161.0, "step": 16150 }, { "entropy": 0.8069047465920448, "epoch": 4.895554040748315, "grad_norm": 0.14002974331378937, "learning_rate": 8.053523066405536e-05, "loss": 1.2112, "mean_token_accuracy": 0.7636183395981788, "num_tokens": 1091582994.0, "step": 16160 }, { "entropy": 0.807197442650795, "epoch": 4.8985836552298725, "grad_norm": 0.13712963461875916, "learning_rate": 8.050878093801352e-05, "loss": 1.2124, "mean_token_accuracy": 0.7733067601919175, "num_tokens": 1092281317.0, "step": 16170 }, { "entropy": 0.8109682485461235, "epoch": 4.901613269711429, "grad_norm": 0.1413911134004593, "learning_rate": 8.048231760352919e-05, "loss": 1.2185, "mean_token_accuracy": 0.7638120874762535, "num_tokens": 1092950999.0, "step": 16180 }, { "entropy": 0.8190348640084266, "epoch": 4.904642884192986, "grad_norm": 0.13088011741638184, "learning_rate": 8.045584067240632e-05, "loss": 1.2139, "mean_token_accuracy": 0.7646927267313004, "num_tokens": 1093622838.0, "step": 16190 }, { "entropy": 0.8030320763587951, "epoch": 4.907672498674543, "grad_norm": 0.1281590461730957, "learning_rate": 8.042935015645494e-05, "loss": 1.2027, "mean_token_accuracy": 0.7652181640267373, "num_tokens": 1094284012.0, "step": 16200 }, { "entropy": 0.8119194015860558, "epoch": 4.910702113156101, "grad_norm": 0.14856310188770294, "learning_rate": 8.040284606749118e-05, "loss": 1.2199, "mean_token_accuracy": 0.76348657310009, "num_tokens": 1094955929.0, "step": 16210 }, { "entropy": 0.8177849322557449, "epoch": 4.913731727637658, "grad_norm": 0.14055438339710236, "learning_rate": 8.037632841733715e-05, "loss": 1.2181, "mean_token_accuracy": 0.76286730915308, "num_tokens": 1095619214.0, "step": 16220 }, { "entropy": 0.8127135917544365, "epoch": 4.916761342119216, "grad_norm": 0.13536129891872406, "learning_rate": 8.034979721782108e-05, "loss": 1.2073, "mean_token_accuracy": 0.765118145942688, "num_tokens": 1096299356.0, "step": 16230 }, { "entropy": 0.8101259261369705, "epoch": 4.919790956600773, "grad_norm": 0.13673269748687744, "learning_rate": 8.032325248077718e-05, "loss": 1.2087, "mean_token_accuracy": 0.7702684447169303, "num_tokens": 1096992769.0, "step": 16240 }, { "entropy": 0.8072752684354783, "epoch": 4.92282057108233, "grad_norm": 0.14365045726299286, "learning_rate": 8.029669421804575e-05, "loss": 1.2088, "mean_token_accuracy": 0.7633398428559304, "num_tokens": 1097654101.0, "step": 16250 }, { "entropy": 0.809220640361309, "epoch": 4.9258501855638865, "grad_norm": 0.14049658179283142, "learning_rate": 8.02701224414731e-05, "loss": 1.2244, "mean_token_accuracy": 0.7636054441332817, "num_tokens": 1098319432.0, "step": 16260 }, { "entropy": 0.8204178407788276, "epoch": 4.928879800045444, "grad_norm": 0.1522514373064041, "learning_rate": 8.02435371629116e-05, "loss": 1.221, "mean_token_accuracy": 0.7643356367945671, "num_tokens": 1098999276.0, "step": 16270 }, { "entropy": 0.8060835391283036, "epoch": 4.931909414527001, "grad_norm": 0.14118434488773346, "learning_rate": 8.021693839421955e-05, "loss": 1.2148, "mean_token_accuracy": 0.7658647254109383, "num_tokens": 1099665803.0, "step": 16280 }, { "entropy": 0.813490042090416, "epoch": 4.934939029008559, "grad_norm": 0.1457880437374115, "learning_rate": 8.019032614726138e-05, "loss": 1.2166, "mean_token_accuracy": 0.7657451406121254, "num_tokens": 1100336693.0, "step": 16290 }, { "entropy": 0.8225024595856667, "epoch": 4.937968643490116, "grad_norm": 0.14237776398658752, "learning_rate": 8.016370043390749e-05, "loss": 1.2242, "mean_token_accuracy": 0.7616039767861367, "num_tokens": 1101004222.0, "step": 16300 }, { "entropy": 0.8141745552420616, "epoch": 4.940998257971673, "grad_norm": 0.17437851428985596, "learning_rate": 8.013706126603425e-05, "loss": 1.2038, "mean_token_accuracy": 0.7661175921559333, "num_tokens": 1101689786.0, "step": 16310 }, { "entropy": 0.8207089826464653, "epoch": 4.94402787245323, "grad_norm": 0.14756397902965546, "learning_rate": 8.011040865552407e-05, "loss": 1.2189, "mean_token_accuracy": 0.7631851613521576, "num_tokens": 1102375768.0, "step": 16320 }, { "entropy": 0.8151626020669938, "epoch": 4.947057486934788, "grad_norm": 0.1360650360584259, "learning_rate": 8.008374261426537e-05, "loss": 1.2185, "mean_token_accuracy": 0.7666550606489182, "num_tokens": 1103050669.0, "step": 16330 }, { "entropy": 0.809863132238388, "epoch": 4.950087101416345, "grad_norm": 0.14102232456207275, "learning_rate": 8.005706315415249e-05, "loss": 1.216, "mean_token_accuracy": 0.7645842403173446, "num_tokens": 1103713172.0, "step": 16340 }, { "entropy": 0.8130029693245888, "epoch": 4.953116715897902, "grad_norm": 0.14143683016300201, "learning_rate": 8.003037028708586e-05, "loss": 1.2159, "mean_token_accuracy": 0.7645550161600113, "num_tokens": 1104386892.0, "step": 16350 }, { "entropy": 0.819441930949688, "epoch": 4.956146330379459, "grad_norm": 0.13079939782619476, "learning_rate": 8.00036640249718e-05, "loss": 1.219, "mean_token_accuracy": 0.760621365904808, "num_tokens": 1105058085.0, "step": 16360 }, { "entropy": 0.8078571721911431, "epoch": 4.959175944861016, "grad_norm": 0.13994356989860535, "learning_rate": 7.997694437972262e-05, "loss": 1.203, "mean_token_accuracy": 0.7705216273665428, "num_tokens": 1105744822.0, "step": 16370 }, { "entropy": 0.8017561569809913, "epoch": 4.962205559342574, "grad_norm": 0.13538730144500732, "learning_rate": 7.995021136325668e-05, "loss": 1.2093, "mean_token_accuracy": 0.7655615821480751, "num_tokens": 1106418696.0, "step": 16380 }, { "entropy": 0.8173870503902435, "epoch": 4.965235173824131, "grad_norm": 0.12956133484840393, "learning_rate": 7.992346498749819e-05, "loss": 1.2046, "mean_token_accuracy": 0.7658839270472526, "num_tokens": 1107102625.0, "step": 16390 }, { "entropy": 0.8095331564545631, "epoch": 4.968264788305688, "grad_norm": 0.15149347484111786, "learning_rate": 7.98967052643774e-05, "loss": 1.2192, "mean_token_accuracy": 0.7668271765112877, "num_tokens": 1107769722.0, "step": 16400 }, { "entropy": 0.7979603424668312, "epoch": 4.971294402787246, "grad_norm": 0.1289897859096527, "learning_rate": 7.986993220583048e-05, "loss": 1.1936, "mean_token_accuracy": 0.7726369246840477, "num_tokens": 1108454674.0, "step": 16410 }, { "entropy": 0.816437041759491, "epoch": 4.974324017268803, "grad_norm": 0.14153149724006653, "learning_rate": 7.984314582379953e-05, "loss": 1.2168, "mean_token_accuracy": 0.7692676842212677, "num_tokens": 1109140993.0, "step": 16420 }, { "entropy": 0.8106490090489388, "epoch": 4.9773536317503595, "grad_norm": 0.13549388945102692, "learning_rate": 7.981634613023264e-05, "loss": 1.2113, "mean_token_accuracy": 0.7671712547540664, "num_tokens": 1109824322.0, "step": 16430 }, { "entropy": 0.7994436040520668, "epoch": 4.980383246231917, "grad_norm": 0.12522515654563904, "learning_rate": 7.978953313708381e-05, "loss": 1.2035, "mean_token_accuracy": 0.772608882188797, "num_tokens": 1110507272.0, "step": 16440 }, { "entropy": 0.8076819390058517, "epoch": 4.983412860713474, "grad_norm": 0.14474385976791382, "learning_rate": 7.976270685631298e-05, "loss": 1.2131, "mean_token_accuracy": 0.7663069173693657, "num_tokens": 1111179622.0, "step": 16450 }, { "entropy": 0.8016024693846703, "epoch": 4.986442475195031, "grad_norm": 0.1426592469215393, "learning_rate": 7.9735867299886e-05, "loss": 1.2004, "mean_token_accuracy": 0.7635600075125695, "num_tokens": 1111846400.0, "step": 16460 }, { "entropy": 0.8050459831953048, "epoch": 4.989472089676589, "grad_norm": 0.13626575469970703, "learning_rate": 7.970901447977468e-05, "loss": 1.2052, "mean_token_accuracy": 0.7665823057293892, "num_tokens": 1112520449.0, "step": 16470 }, { "entropy": 0.7991900339722633, "epoch": 4.992501704158146, "grad_norm": 0.1464741975069046, "learning_rate": 7.968214840795669e-05, "loss": 1.1984, "mean_token_accuracy": 0.7744982793927193, "num_tokens": 1113220416.0, "step": 16480 }, { "entropy": 0.8126252934336662, "epoch": 4.995531318639703, "grad_norm": 0.1428397297859192, "learning_rate": 7.965526909641565e-05, "loss": 1.2223, "mean_token_accuracy": 0.7577746540307999, "num_tokens": 1113869987.0, "step": 16490 }, { "entropy": 0.8123869523406029, "epoch": 4.998560933121261, "grad_norm": 0.14128269255161285, "learning_rate": 7.962837655714108e-05, "loss": 1.2153, "mean_token_accuracy": 0.7669962048530579, "num_tokens": 1114536654.0, "step": 16500 }, { "entropy": 0.8037411188467954, "epoch": 5.0015148072407785, "grad_norm": 0.1507730633020401, "learning_rate": 7.96014708021284e-05, "loss": 1.198, "mean_token_accuracy": 0.7682625345694714, "num_tokens": 1115195349.0, "step": 16510 }, { "entropy": 0.780905881524086, "epoch": 5.004544421722336, "grad_norm": 0.14938247203826904, "learning_rate": 7.957455184337894e-05, "loss": 1.1795, "mean_token_accuracy": 0.7759376361966133, "num_tokens": 1115881351.0, "step": 16520 }, { "entropy": 0.7980219900608063, "epoch": 5.007574036203893, "grad_norm": 0.1593325436115265, "learning_rate": 7.954761969289988e-05, "loss": 1.1897, "mean_token_accuracy": 0.7703300774097442, "num_tokens": 1116566528.0, "step": 16530 }, { "entropy": 0.7913754537701607, "epoch": 5.01060365068545, "grad_norm": 0.1513516902923584, "learning_rate": 7.952067436270432e-05, "loss": 1.194, "mean_token_accuracy": 0.7686186075210572, "num_tokens": 1117232615.0, "step": 16540 }, { "entropy": 0.7906406939029693, "epoch": 5.013633265167008, "grad_norm": 0.15083470940589905, "learning_rate": 7.94937158648112e-05, "loss": 1.1853, "mean_token_accuracy": 0.7708072856068611, "num_tokens": 1117914393.0, "step": 16550 }, { "entropy": 0.7962068796157837, "epoch": 5.016662879648565, "grad_norm": 0.15922969579696655, "learning_rate": 7.946674421124538e-05, "loss": 1.1984, "mean_token_accuracy": 0.7660602360963822, "num_tokens": 1118575694.0, "step": 16560 }, { "entropy": 0.7868657410144806, "epoch": 5.019692494130122, "grad_norm": 0.1561596542596817, "learning_rate": 7.943975941403758e-05, "loss": 1.1842, "mean_token_accuracy": 0.7732081234455108, "num_tokens": 1119264774.0, "step": 16570 }, { "entropy": 0.7847541496157646, "epoch": 5.02272210861168, "grad_norm": 0.16248828172683716, "learning_rate": 7.941276148522434e-05, "loss": 1.187, "mean_token_accuracy": 0.762403430044651, "num_tokens": 1119909359.0, "step": 16580 }, { "entropy": 0.7921791404485703, "epoch": 5.0257517230932365, "grad_norm": 0.15669487416744232, "learning_rate": 7.93857504368481e-05, "loss": 1.1931, "mean_token_accuracy": 0.7709335729479789, "num_tokens": 1120583297.0, "step": 16590 }, { "entropy": 0.796550589799881, "epoch": 5.028781337574793, "grad_norm": 0.15571054816246033, "learning_rate": 7.935872628095714e-05, "loss": 1.1912, "mean_token_accuracy": 0.7698184594511985, "num_tokens": 1121261775.0, "step": 16600 }, { "entropy": 0.7889431893825531, "epoch": 5.031810952056351, "grad_norm": 0.1492854803800583, "learning_rate": 7.933168902960558e-05, "loss": 1.1963, "mean_token_accuracy": 0.7704354137182235, "num_tokens": 1121934145.0, "step": 16610 }, { "entropy": 0.7852740064263344, "epoch": 5.034840566537908, "grad_norm": 0.1636449098587036, "learning_rate": 7.93046386948534e-05, "loss": 1.1812, "mean_token_accuracy": 0.7721859395503998, "num_tokens": 1122616894.0, "step": 16620 }, { "entropy": 0.7963797673583031, "epoch": 5.037870181019465, "grad_norm": 0.1561834067106247, "learning_rate": 7.927757528876638e-05, "loss": 1.1985, "mean_token_accuracy": 0.7642228111624718, "num_tokens": 1123279115.0, "step": 16630 }, { "entropy": 0.7851715862751008, "epoch": 5.040899795501023, "grad_norm": 0.15005038678646088, "learning_rate": 7.925049882341617e-05, "loss": 1.1736, "mean_token_accuracy": 0.7784592941403389, "num_tokens": 1123975413.0, "step": 16640 }, { "entropy": 0.7886789575219154, "epoch": 5.04392940998258, "grad_norm": 0.14715303480625153, "learning_rate": 7.922340931088023e-05, "loss": 1.1867, "mean_token_accuracy": 0.7671677350997925, "num_tokens": 1124645664.0, "step": 16650 }, { "entropy": 0.7879483878612519, "epoch": 5.046959024464137, "grad_norm": 0.15014539659023285, "learning_rate": 7.919630676324182e-05, "loss": 1.1893, "mean_token_accuracy": 0.77218779027462, "num_tokens": 1125325049.0, "step": 16660 }, { "entropy": 0.8027291014790535, "epoch": 5.0499886389456945, "grad_norm": 0.16073153913021088, "learning_rate": 7.916919119259005e-05, "loss": 1.1953, "mean_token_accuracy": 0.7679773375391961, "num_tokens": 1126003373.0, "step": 16670 }, { "entropy": 0.7961043208837509, "epoch": 5.053018253427251, "grad_norm": 0.1571631133556366, "learning_rate": 7.914206261101981e-05, "loss": 1.1946, "mean_token_accuracy": 0.7720628321170807, "num_tokens": 1126693716.0, "step": 16680 }, { "entropy": 0.7894376143813133, "epoch": 5.056047867908808, "grad_norm": 0.15614300966262817, "learning_rate": 7.911492103063182e-05, "loss": 1.1897, "mean_token_accuracy": 0.7688526049256325, "num_tokens": 1127378061.0, "step": 16690 }, { "entropy": 0.7944885432720185, "epoch": 5.059077482390366, "grad_norm": 0.1512015461921692, "learning_rate": 7.908776646353256e-05, "loss": 1.1956, "mean_token_accuracy": 0.7652768597006798, "num_tokens": 1128032509.0, "step": 16700 }, { "entropy": 0.8022823825478553, "epoch": 5.062107096871923, "grad_norm": 0.1586894690990448, "learning_rate": 7.906059892183435e-05, "loss": 1.2, "mean_token_accuracy": 0.7627685695886612, "num_tokens": 1128694417.0, "step": 16710 }, { "entropy": 0.7928654015064239, "epoch": 5.06513671135348, "grad_norm": 0.16155175864696503, "learning_rate": 7.903341841765525e-05, "loss": 1.1953, "mean_token_accuracy": 0.7680094838142395, "num_tokens": 1129362984.0, "step": 16720 }, { "entropy": 0.7847378894686698, "epoch": 5.068166325835038, "grad_norm": 0.15931692719459534, "learning_rate": 7.900622496311913e-05, "loss": 1.1856, "mean_token_accuracy": 0.7690101683139801, "num_tokens": 1130024492.0, "step": 16730 }, { "entropy": 0.7901332467794419, "epoch": 5.071195940316595, "grad_norm": 0.15880520641803741, "learning_rate": 7.897901857035564e-05, "loss": 1.1917, "mean_token_accuracy": 0.7675976231694221, "num_tokens": 1130685618.0, "step": 16740 }, { "entropy": 0.786475557088852, "epoch": 5.074225554798152, "grad_norm": 0.15323227643966675, "learning_rate": 7.895179925150018e-05, "loss": 1.1852, "mean_token_accuracy": 0.7693305298686027, "num_tokens": 1131353060.0, "step": 16750 }, { "entropy": 0.7913914695382118, "epoch": 5.077255169279709, "grad_norm": 0.16168981790542603, "learning_rate": 7.892456701869395e-05, "loss": 1.1898, "mean_token_accuracy": 0.7700254335999489, "num_tokens": 1132024439.0, "step": 16760 }, { "entropy": 0.7973674595355987, "epoch": 5.080284783761266, "grad_norm": 0.16982251405715942, "learning_rate": 7.889732188408388e-05, "loss": 1.1911, "mean_token_accuracy": 0.7697751298546791, "num_tokens": 1132709850.0, "step": 16770 }, { "entropy": 0.7975167855620384, "epoch": 5.083314398242823, "grad_norm": 0.15392808616161346, "learning_rate": 7.887006385982263e-05, "loss": 1.196, "mean_token_accuracy": 0.7692336022853852, "num_tokens": 1133385764.0, "step": 16780 }, { "entropy": 0.7845405787229538, "epoch": 5.086344012724381, "grad_norm": 0.15405374765396118, "learning_rate": 7.884279295806869e-05, "loss": 1.1895, "mean_token_accuracy": 0.7696777954697609, "num_tokens": 1134061937.0, "step": 16790 }, { "entropy": 0.7827245593070984, "epoch": 5.089373627205938, "grad_norm": 0.15893754363059998, "learning_rate": 7.881550919098622e-05, "loss": 1.187, "mean_token_accuracy": 0.7711850360035897, "num_tokens": 1134733794.0, "step": 16800 }, { "entropy": 0.7979673892259598, "epoch": 5.092403241687495, "grad_norm": 0.15813829004764557, "learning_rate": 7.878821257074515e-05, "loss": 1.1941, "mean_token_accuracy": 0.7615123555064202, "num_tokens": 1135390702.0, "step": 16810 }, { "entropy": 0.7882887244224548, "epoch": 5.095432856169053, "grad_norm": 0.1594170778989792, "learning_rate": 7.876090310952114e-05, "loss": 1.1883, "mean_token_accuracy": 0.7671463191509247, "num_tokens": 1136048360.0, "step": 16820 }, { "entropy": 0.7984920248389245, "epoch": 5.09846247065061, "grad_norm": 0.16381298005580902, "learning_rate": 7.873358081949556e-05, "loss": 1.1943, "mean_token_accuracy": 0.765522375702858, "num_tokens": 1136729881.0, "step": 16830 }, { "entropy": 0.7995185390114784, "epoch": 5.101492085132167, "grad_norm": 0.1667272001504898, "learning_rate": 7.870624571285554e-05, "loss": 1.2002, "mean_token_accuracy": 0.7661367550492286, "num_tokens": 1137397887.0, "step": 16840 }, { "entropy": 0.7874711185693741, "epoch": 5.104521699613724, "grad_norm": 0.1503990888595581, "learning_rate": 7.867889780179391e-05, "loss": 1.1856, "mean_token_accuracy": 0.7681826263666153, "num_tokens": 1138069343.0, "step": 16850 }, { "entropy": 0.8023771598935128, "epoch": 5.107551314095281, "grad_norm": 0.1579052209854126, "learning_rate": 7.865153709850918e-05, "loss": 1.202, "mean_token_accuracy": 0.7645575031638145, "num_tokens": 1138735463.0, "step": 16860 }, { "entropy": 0.7982661604881287, "epoch": 5.110580928576838, "grad_norm": 0.16254378855228424, "learning_rate": 7.862416361520561e-05, "loss": 1.1893, "mean_token_accuracy": 0.7708167925477027, "num_tokens": 1139414205.0, "step": 16870 }, { "entropy": 0.7918319016695022, "epoch": 5.113610543058396, "grad_norm": 0.15832503139972687, "learning_rate": 7.859677736409314e-05, "loss": 1.1854, "mean_token_accuracy": 0.7706602737307549, "num_tokens": 1140087064.0, "step": 16880 }, { "entropy": 0.7866656050086022, "epoch": 5.116640157539953, "grad_norm": 0.1559450775384903, "learning_rate": 7.85693783573874e-05, "loss": 1.1823, "mean_token_accuracy": 0.7663189440965652, "num_tokens": 1140758625.0, "step": 16890 }, { "entropy": 0.786269998550415, "epoch": 5.11966977202151, "grad_norm": 0.16720271110534668, "learning_rate": 7.854196660730973e-05, "loss": 1.1854, "mean_token_accuracy": 0.7719661965966225, "num_tokens": 1141450650.0, "step": 16900 }, { "entropy": 0.793235857784748, "epoch": 5.122699386503068, "grad_norm": 0.16150441765785217, "learning_rate": 7.851454212608715e-05, "loss": 1.2012, "mean_token_accuracy": 0.7687240347266198, "num_tokens": 1142130031.0, "step": 16910 }, { "entropy": 0.7887409865856171, "epoch": 5.125729000984625, "grad_norm": 0.15608227252960205, "learning_rate": 7.848710492595232e-05, "loss": 1.1968, "mean_token_accuracy": 0.7706323504447937, "num_tokens": 1142801822.0, "step": 16920 }, { "entropy": 0.791064091026783, "epoch": 5.1287586154661815, "grad_norm": 0.16037173569202423, "learning_rate": 7.845965501914362e-05, "loss": 1.1873, "mean_token_accuracy": 0.7689804404973983, "num_tokens": 1143480608.0, "step": 16930 }, { "entropy": 0.7976777598261833, "epoch": 5.131788229947739, "grad_norm": 0.15852181613445282, "learning_rate": 7.843219241790509e-05, "loss": 1.1988, "mean_token_accuracy": 0.7673050552606583, "num_tokens": 1144153187.0, "step": 16940 }, { "entropy": 0.7921642765402794, "epoch": 5.134817844429296, "grad_norm": 0.1522085815668106, "learning_rate": 7.84047171344864e-05, "loss": 1.193, "mean_token_accuracy": 0.7688619747757912, "num_tokens": 1144831260.0, "step": 16950 }, { "entropy": 0.7973412096500396, "epoch": 5.137847458910853, "grad_norm": 0.15903477370738983, "learning_rate": 7.837722918114295e-05, "loss": 1.1951, "mean_token_accuracy": 0.7696291863918304, "num_tokens": 1145512417.0, "step": 16960 }, { "entropy": 0.791239495575428, "epoch": 5.140877073392411, "grad_norm": 0.1490985006093979, "learning_rate": 7.834972857013569e-05, "loss": 1.1892, "mean_token_accuracy": 0.7675310984253884, "num_tokens": 1146196717.0, "step": 16970 }, { "entropy": 0.8050056949257851, "epoch": 5.143906687873968, "grad_norm": 0.16099320352077484, "learning_rate": 7.83222153137313e-05, "loss": 1.1949, "mean_token_accuracy": 0.7670167252421379, "num_tokens": 1146879833.0, "step": 16980 }, { "entropy": 0.8021310731768608, "epoch": 5.146936302355526, "grad_norm": 0.16402247548103333, "learning_rate": 7.829468942420203e-05, "loss": 1.2004, "mean_token_accuracy": 0.7693236470222473, "num_tokens": 1147557609.0, "step": 16990 }, { "entropy": 0.7945751905441284, "epoch": 5.149965916837083, "grad_norm": 0.16784335672855377, "learning_rate": 7.826715091382586e-05, "loss": 1.1976, "mean_token_accuracy": 0.7710944831371307, "num_tokens": 1148231438.0, "step": 17000 }, { "entropy": 0.7846817806363106, "epoch": 5.1529955313186395, "grad_norm": 0.16117709875106812, "learning_rate": 7.823959979488629e-05, "loss": 1.181, "mean_token_accuracy": 0.7752747610211372, "num_tokens": 1148930952.0, "step": 17010 }, { "entropy": 0.7957236215472221, "epoch": 5.156025145800197, "grad_norm": 0.15562129020690918, "learning_rate": 7.821203607967254e-05, "loss": 1.1963, "mean_token_accuracy": 0.7648106575012207, "num_tokens": 1149603103.0, "step": 17020 }, { "entropy": 0.7977365210652352, "epoch": 5.159054760281754, "grad_norm": 0.1677064448595047, "learning_rate": 7.818445978047936e-05, "loss": 1.1947, "mean_token_accuracy": 0.7639513969421386, "num_tokens": 1150263365.0, "step": 17030 }, { "entropy": 0.7925516247749329, "epoch": 5.162084374763311, "grad_norm": 0.15624701976776123, "learning_rate": 7.815687090960721e-05, "loss": 1.1923, "mean_token_accuracy": 0.7755320727825165, "num_tokens": 1150953297.0, "step": 17040 }, { "entropy": 0.7901217445731163, "epoch": 5.165113989244869, "grad_norm": 0.15500527620315552, "learning_rate": 7.812926947936208e-05, "loss": 1.1905, "mean_token_accuracy": 0.7713487282395363, "num_tokens": 1151631635.0, "step": 17050 }, { "entropy": 0.7947961136698722, "epoch": 5.168143603726426, "grad_norm": 0.1531098335981369, "learning_rate": 7.810165550205559e-05, "loss": 1.1933, "mean_token_accuracy": 0.7699369430541992, "num_tokens": 1152309119.0, "step": 17060 }, { "entropy": 0.7894000634551048, "epoch": 5.171173218207983, "grad_norm": 0.15637709200382233, "learning_rate": 7.807402899000496e-05, "loss": 1.1934, "mean_token_accuracy": 0.7707509502768517, "num_tokens": 1152979896.0, "step": 17070 }, { "entropy": 0.7942910522222519, "epoch": 5.174202832689541, "grad_norm": 0.14667530357837677, "learning_rate": 7.804638995553297e-05, "loss": 1.1959, "mean_token_accuracy": 0.7722572177648545, "num_tokens": 1153663084.0, "step": 17080 }, { "entropy": 0.7933478116989136, "epoch": 5.1772324471710975, "grad_norm": 0.15234503149986267, "learning_rate": 7.801873841096806e-05, "loss": 1.1929, "mean_token_accuracy": 0.7653992846608162, "num_tokens": 1154331840.0, "step": 17090 }, { "entropy": 0.789708323776722, "epoch": 5.1802620616526545, "grad_norm": 0.1696467250585556, "learning_rate": 7.799107436864417e-05, "loss": 1.1968, "mean_token_accuracy": 0.7668629556894302, "num_tokens": 1154986076.0, "step": 17100 }, { "entropy": 0.7896785274147987, "epoch": 5.183291676134212, "grad_norm": 0.16039060056209564, "learning_rate": 7.796339784090085e-05, "loss": 1.1843, "mean_token_accuracy": 0.7684125617146492, "num_tokens": 1155666259.0, "step": 17110 }, { "entropy": 0.7867568179965019, "epoch": 5.186321290615769, "grad_norm": 0.15491805970668793, "learning_rate": 7.79357088400832e-05, "loss": 1.1882, "mean_token_accuracy": 0.7716071099042893, "num_tokens": 1156355994.0, "step": 17120 }, { "entropy": 0.7851361095905304, "epoch": 5.189350905097326, "grad_norm": 0.1565706729888916, "learning_rate": 7.790800737854195e-05, "loss": 1.1745, "mean_token_accuracy": 0.771054470539093, "num_tokens": 1157043126.0, "step": 17130 }, { "entropy": 0.7874387934803962, "epoch": 5.192380519578884, "grad_norm": 0.1554880291223526, "learning_rate": 7.78802934686333e-05, "loss": 1.1883, "mean_token_accuracy": 0.7711294487118721, "num_tokens": 1157721720.0, "step": 17140 }, { "entropy": 0.7959549397230148, "epoch": 5.195410134060441, "grad_norm": 0.15542958676815033, "learning_rate": 7.785256712271904e-05, "loss": 1.1973, "mean_token_accuracy": 0.767873203754425, "num_tokens": 1158404116.0, "step": 17150 }, { "entropy": 0.796953085064888, "epoch": 5.198439748541998, "grad_norm": 0.15236327052116394, "learning_rate": 7.782482835316652e-05, "loss": 1.195, "mean_token_accuracy": 0.7699533119797707, "num_tokens": 1159095642.0, "step": 17160 }, { "entropy": 0.8015800729393959, "epoch": 5.201469363023556, "grad_norm": 0.15539121627807617, "learning_rate": 7.77970771723486e-05, "loss": 1.2046, "mean_token_accuracy": 0.7635770738124847, "num_tokens": 1159750670.0, "step": 17170 }, { "entropy": 0.7865219786763191, "epoch": 5.2044989775051125, "grad_norm": 0.1567796766757965, "learning_rate": 7.776931359264372e-05, "loss": 1.1858, "mean_token_accuracy": 0.7690562218427658, "num_tokens": 1160420566.0, "step": 17180 }, { "entropy": 0.7902747079730034, "epoch": 5.207528591986669, "grad_norm": 0.15823183953762054, "learning_rate": 7.774153762643582e-05, "loss": 1.1898, "mean_token_accuracy": 0.7678332209587098, "num_tokens": 1161094600.0, "step": 17190 }, { "entropy": 0.7934893280267715, "epoch": 5.210558206468227, "grad_norm": 0.15117739140987396, "learning_rate": 7.771374928611435e-05, "loss": 1.1961, "mean_token_accuracy": 0.7713102355599404, "num_tokens": 1161775732.0, "step": 17200 }, { "entropy": 0.7978516027331353, "epoch": 5.213587820949784, "grad_norm": 0.15321889519691467, "learning_rate": 7.768594858407431e-05, "loss": 1.1883, "mean_token_accuracy": 0.7674748927354813, "num_tokens": 1162474209.0, "step": 17210 }, { "entropy": 0.7908621087670327, "epoch": 5.216617435431341, "grad_norm": 0.15823061764240265, "learning_rate": 7.765813553271622e-05, "loss": 1.1942, "mean_token_accuracy": 0.7713371187448501, "num_tokens": 1163156017.0, "step": 17220 }, { "entropy": 0.7892576485872269, "epoch": 5.219647049912899, "grad_norm": 0.15625379979610443, "learning_rate": 7.763031014444605e-05, "loss": 1.1863, "mean_token_accuracy": 0.7694304808974266, "num_tokens": 1163828041.0, "step": 17230 }, { "entropy": 0.7902972042560578, "epoch": 5.222676664394456, "grad_norm": 0.1579941213130951, "learning_rate": 7.760247243167537e-05, "loss": 1.1918, "mean_token_accuracy": 0.7715951129794121, "num_tokens": 1164522773.0, "step": 17240 }, { "entropy": 0.7882058292627334, "epoch": 5.225706278876013, "grad_norm": 0.1585836261510849, "learning_rate": 7.757462240682119e-05, "loss": 1.2006, "mean_token_accuracy": 0.769963701069355, "num_tokens": 1165188335.0, "step": 17250 }, { "entropy": 0.7864675641059875, "epoch": 5.2287358933575705, "grad_norm": 0.15874925255775452, "learning_rate": 7.754676008230595e-05, "loss": 1.1889, "mean_token_accuracy": 0.7740319341421127, "num_tokens": 1165870965.0, "step": 17260 }, { "entropy": 0.7968963533639908, "epoch": 5.231765507839127, "grad_norm": 0.15225261449813843, "learning_rate": 7.75188854705577e-05, "loss": 1.2002, "mean_token_accuracy": 0.769596466422081, "num_tokens": 1166544318.0, "step": 17270 }, { "entropy": 0.7892345637083054, "epoch": 5.234795122320684, "grad_norm": 0.1502452939748764, "learning_rate": 7.749099858400989e-05, "loss": 1.1929, "mean_token_accuracy": 0.7687133431434632, "num_tokens": 1167211314.0, "step": 17280 }, { "entropy": 0.7954817578196526, "epoch": 5.237824736802242, "grad_norm": 0.15724745392799377, "learning_rate": 7.746309943510148e-05, "loss": 1.193, "mean_token_accuracy": 0.7699337929487229, "num_tokens": 1167905279.0, "step": 17290 }, { "entropy": 0.7950238794088363, "epoch": 5.240854351283799, "grad_norm": 0.15903352200984955, "learning_rate": 7.743518803627688e-05, "loss": 1.1932, "mean_token_accuracy": 0.76488067060709, "num_tokens": 1168568163.0, "step": 17300 }, { "entropy": 0.792098093032837, "epoch": 5.243883965765356, "grad_norm": 0.15061922371387482, "learning_rate": 7.740726439998594e-05, "loss": 1.196, "mean_token_accuracy": 0.7639679074287414, "num_tokens": 1169228184.0, "step": 17310 }, { "entropy": 0.7873492568731308, "epoch": 5.246913580246914, "grad_norm": 0.15553319454193115, "learning_rate": 7.737932853868404e-05, "loss": 1.1919, "mean_token_accuracy": 0.7729442462325096, "num_tokens": 1169911211.0, "step": 17320 }, { "entropy": 0.7864971786737442, "epoch": 5.249943194728471, "grad_norm": 0.17369091510772705, "learning_rate": 7.735138046483196e-05, "loss": 1.1872, "mean_token_accuracy": 0.7715881854295731, "num_tokens": 1170586535.0, "step": 17330 }, { "entropy": 0.795674116909504, "epoch": 5.252972809210028, "grad_norm": 0.1560736447572708, "learning_rate": 7.732342019089593e-05, "loss": 1.1922, "mean_token_accuracy": 0.7709483101963996, "num_tokens": 1171271147.0, "step": 17340 }, { "entropy": 0.7958288848400116, "epoch": 5.256002423691585, "grad_norm": 0.15873171389102936, "learning_rate": 7.729544772934764e-05, "loss": 1.1962, "mean_token_accuracy": 0.7691018924117088, "num_tokens": 1171944249.0, "step": 17350 }, { "entropy": 0.7819776698946953, "epoch": 5.259032038173142, "grad_norm": 0.15958859026432037, "learning_rate": 7.72674630926642e-05, "loss": 1.187, "mean_token_accuracy": 0.7761292487382889, "num_tokens": 1172625656.0, "step": 17360 }, { "entropy": 0.7825765237212181, "epoch": 5.262061652654699, "grad_norm": 0.15552274882793427, "learning_rate": 7.723946629332817e-05, "loss": 1.1863, "mean_token_accuracy": 0.7742067262530327, "num_tokens": 1173310261.0, "step": 17370 }, { "entropy": 0.7916419252753257, "epoch": 5.265091267136257, "grad_norm": 0.15146741271018982, "learning_rate": 7.721145734382749e-05, "loss": 1.1892, "mean_token_accuracy": 0.7693522796034813, "num_tokens": 1173977891.0, "step": 17380 }, { "entropy": 0.7943963199853897, "epoch": 5.268120881617814, "grad_norm": 0.1591690331697464, "learning_rate": 7.718343625665559e-05, "loss": 1.1894, "mean_token_accuracy": 0.7696996122598648, "num_tokens": 1174663181.0, "step": 17390 }, { "entropy": 0.7926700994372368, "epoch": 5.271150496099372, "grad_norm": 0.160016730427742, "learning_rate": 7.715540304431128e-05, "loss": 1.1914, "mean_token_accuracy": 0.7728733107447624, "num_tokens": 1175359104.0, "step": 17400 }, { "entropy": 0.8013308346271515, "epoch": 5.274180110580929, "grad_norm": 0.1549455225467682, "learning_rate": 7.712735771929873e-05, "loss": 1.2046, "mean_token_accuracy": 0.7645180448889732, "num_tokens": 1176030111.0, "step": 17410 }, { "entropy": 0.8020660355687141, "epoch": 5.277209725062486, "grad_norm": 0.15448881685733795, "learning_rate": 7.709930029412762e-05, "loss": 1.1964, "mean_token_accuracy": 0.7696937382221222, "num_tokens": 1176709324.0, "step": 17420 }, { "entropy": 0.7843293473124504, "epoch": 5.280239339544043, "grad_norm": 0.15545623004436493, "learning_rate": 7.70712307813129e-05, "loss": 1.1921, "mean_token_accuracy": 0.7684095501899719, "num_tokens": 1177375159.0, "step": 17430 }, { "entropy": 0.8017087817192078, "epoch": 5.2832689540256, "grad_norm": 0.15782956779003143, "learning_rate": 7.704314919337504e-05, "loss": 1.1971, "mean_token_accuracy": 0.7661067008972168, "num_tokens": 1178046557.0, "step": 17440 }, { "entropy": 0.7860934764146805, "epoch": 5.286298568507157, "grad_norm": 0.15507152676582336, "learning_rate": 7.701505554283977e-05, "loss": 1.1891, "mean_token_accuracy": 0.7704472541809082, "num_tokens": 1178720516.0, "step": 17450 }, { "entropy": 0.7910739570856095, "epoch": 5.289328182988715, "grad_norm": 0.1577184647321701, "learning_rate": 7.698694984223831e-05, "loss": 1.1902, "mean_token_accuracy": 0.7696097001433373, "num_tokens": 1179391162.0, "step": 17460 }, { "entropy": 0.78514154702425, "epoch": 5.292357797470272, "grad_norm": 0.15412352979183197, "learning_rate": 7.69588321041072e-05, "loss": 1.1819, "mean_token_accuracy": 0.7715278282761574, "num_tokens": 1180072891.0, "step": 17470 }, { "entropy": 0.7782529965043068, "epoch": 5.295387411951829, "grad_norm": 0.1585690826177597, "learning_rate": 7.693070234098834e-05, "loss": 1.1831, "mean_token_accuracy": 0.7694088965654373, "num_tokens": 1180742973.0, "step": 17480 }, { "entropy": 0.8020209014415741, "epoch": 5.298417026433387, "grad_norm": 0.15189771354198456, "learning_rate": 7.690256056542903e-05, "loss": 1.1988, "mean_token_accuracy": 0.7645947188138962, "num_tokens": 1181415326.0, "step": 17490 }, { "entropy": 0.8022048667073249, "epoch": 5.301446640914944, "grad_norm": 0.15041834115982056, "learning_rate": 7.68744067899819e-05, "loss": 1.1947, "mean_token_accuracy": 0.7714553833007812, "num_tokens": 1182104379.0, "step": 17500 }, { "entropy": 0.8034254133701324, "epoch": 5.304476255396501, "grad_norm": 0.15402550995349884, "learning_rate": 7.684624102720495e-05, "loss": 1.1918, "mean_token_accuracy": 0.7663976415991783, "num_tokens": 1182791512.0, "step": 17510 }, { "entropy": 0.7869031742215157, "epoch": 5.307505869878058, "grad_norm": 0.15747013688087463, "learning_rate": 7.68180632896615e-05, "loss": 1.1935, "mean_token_accuracy": 0.7666245847940445, "num_tokens": 1183458053.0, "step": 17520 }, { "entropy": 0.7984980285167694, "epoch": 5.310535484359615, "grad_norm": 0.1575598120689392, "learning_rate": 7.678987358992026e-05, "loss": 1.1934, "mean_token_accuracy": 0.7688501477241516, "num_tokens": 1184122101.0, "step": 17530 }, { "entropy": 0.7883581951260566, "epoch": 5.313565098841172, "grad_norm": 0.17190022766590118, "learning_rate": 7.676167194055523e-05, "loss": 1.1914, "mean_token_accuracy": 0.7717085897922515, "num_tokens": 1184800988.0, "step": 17540 }, { "entropy": 0.7959327608346939, "epoch": 5.31659471332273, "grad_norm": 0.15310914814472198, "learning_rate": 7.673345835414575e-05, "loss": 1.1903, "mean_token_accuracy": 0.7683573260903358, "num_tokens": 1185475110.0, "step": 17550 }, { "entropy": 0.7981788322329522, "epoch": 5.319624327804287, "grad_norm": 0.1599741131067276, "learning_rate": 7.67052328432765e-05, "loss": 1.2044, "mean_token_accuracy": 0.7693395286798477, "num_tokens": 1186148233.0, "step": 17560 }, { "entropy": 0.7871917247772217, "epoch": 5.322653942285844, "grad_norm": 0.16612312197685242, "learning_rate": 7.667699542053748e-05, "loss": 1.1906, "mean_token_accuracy": 0.7699298709630966, "num_tokens": 1186822751.0, "step": 17570 }, { "entropy": 0.7859062150120735, "epoch": 5.325683556767402, "grad_norm": 0.17001472413539886, "learning_rate": 7.664874609852397e-05, "loss": 1.1978, "mean_token_accuracy": 0.7713186040520668, "num_tokens": 1187493996.0, "step": 17580 }, { "entropy": 0.7932150304317475, "epoch": 5.328713171248959, "grad_norm": 0.1645856350660324, "learning_rate": 7.662048488983658e-05, "loss": 1.1869, "mean_token_accuracy": 0.7701080903410912, "num_tokens": 1188171986.0, "step": 17590 }, { "entropy": 0.7961829259991646, "epoch": 5.3317427857305155, "grad_norm": 0.14714856445789337, "learning_rate": 7.659221180708126e-05, "loss": 1.1883, "mean_token_accuracy": 0.7689448133111, "num_tokens": 1188859031.0, "step": 17600 }, { "entropy": 0.7916832432150841, "epoch": 5.334772400212073, "grad_norm": 0.16263554990291595, "learning_rate": 7.656392686286918e-05, "loss": 1.1831, "mean_token_accuracy": 0.7754878297448158, "num_tokens": 1189551929.0, "step": 17610 }, { "entropy": 0.7899981632828712, "epoch": 5.33780201469363, "grad_norm": 0.1638210117816925, "learning_rate": 7.653563006981685e-05, "loss": 1.1937, "mean_token_accuracy": 0.7634357333183288, "num_tokens": 1190204767.0, "step": 17620 }, { "entropy": 0.7990321606397629, "epoch": 5.340831629175187, "grad_norm": 0.1562741994857788, "learning_rate": 7.650732144054607e-05, "loss": 1.2078, "mean_token_accuracy": 0.7641814783215523, "num_tokens": 1190859135.0, "step": 17630 }, { "entropy": 0.795019020140171, "epoch": 5.343861243656745, "grad_norm": 0.14475341141223907, "learning_rate": 7.64790009876839e-05, "loss": 1.1927, "mean_token_accuracy": 0.7689029216766358, "num_tokens": 1191530293.0, "step": 17640 }, { "entropy": 0.7977340087294579, "epoch": 5.346890858138302, "grad_norm": 0.1600002646446228, "learning_rate": 7.645066872386266e-05, "loss": 1.2017, "mean_token_accuracy": 0.7684462234377861, "num_tokens": 1192217113.0, "step": 17650 }, { "entropy": 0.7936551615595817, "epoch": 5.349920472619859, "grad_norm": 0.1655941754579544, "learning_rate": 7.642232466171997e-05, "loss": 1.1848, "mean_token_accuracy": 0.7676477208733559, "num_tokens": 1192884086.0, "step": 17660 }, { "entropy": 0.7909827798604965, "epoch": 5.352950087101417, "grad_norm": 0.14532136917114258, "learning_rate": 7.639396881389872e-05, "loss": 1.1917, "mean_token_accuracy": 0.7680937454104424, "num_tokens": 1193553087.0, "step": 17670 }, { "entropy": 0.7936312824487686, "epoch": 5.3559797015829735, "grad_norm": 0.18110795319080353, "learning_rate": 7.6365601193047e-05, "loss": 1.1932, "mean_token_accuracy": 0.7639016881585121, "num_tokens": 1194221589.0, "step": 17680 }, { "entropy": 0.7801448628306389, "epoch": 5.3590093160645305, "grad_norm": 0.1572643667459488, "learning_rate": 7.633722181181819e-05, "loss": 1.1811, "mean_token_accuracy": 0.7772280246019363, "num_tokens": 1194915590.0, "step": 17690 }, { "entropy": 0.79637009203434, "epoch": 5.362038930546088, "grad_norm": 0.15378724038600922, "learning_rate": 7.630883068287096e-05, "loss": 1.1969, "mean_token_accuracy": 0.7703924238681793, "num_tokens": 1195587875.0, "step": 17700 }, { "entropy": 0.7883513554930687, "epoch": 5.365068545027645, "grad_norm": 0.15346989035606384, "learning_rate": 7.628042781886911e-05, "loss": 1.1852, "mean_token_accuracy": 0.7732666581869125, "num_tokens": 1196274595.0, "step": 17710 }, { "entropy": 0.7764868423342705, "epoch": 5.368098159509202, "grad_norm": 0.14909599721431732, "learning_rate": 7.625201323248179e-05, "loss": 1.1815, "mean_token_accuracy": 0.7732973173260689, "num_tokens": 1196948431.0, "step": 17720 }, { "entropy": 0.7900894731283188, "epoch": 5.37112777399076, "grad_norm": 0.1467720866203308, "learning_rate": 7.622358693638329e-05, "loss": 1.1905, "mean_token_accuracy": 0.769812461733818, "num_tokens": 1197620161.0, "step": 17730 }, { "entropy": 0.7899725899100304, "epoch": 5.374157388472317, "grad_norm": 0.15409912168979645, "learning_rate": 7.61951489432532e-05, "loss": 1.1898, "mean_token_accuracy": 0.7687654465436935, "num_tokens": 1198284040.0, "step": 17740 }, { "entropy": 0.7853469118475914, "epoch": 5.377187002953874, "grad_norm": 0.15078231692314148, "learning_rate": 7.616669926577625e-05, "loss": 1.1803, "mean_token_accuracy": 0.7739398464560509, "num_tokens": 1198967676.0, "step": 17750 }, { "entropy": 0.7953891828656197, "epoch": 5.380216617435432, "grad_norm": 0.15821775794029236, "learning_rate": 7.613823791664244e-05, "loss": 1.1971, "mean_token_accuracy": 0.7657200857996941, "num_tokens": 1199635094.0, "step": 17760 }, { "entropy": 0.8010059207677841, "epoch": 5.3832462319169885, "grad_norm": 0.16155438125133514, "learning_rate": 7.610976490854696e-05, "loss": 1.1946, "mean_token_accuracy": 0.7681468084454537, "num_tokens": 1200322029.0, "step": 17770 }, { "entropy": 0.7836634263396263, "epoch": 5.386275846398545, "grad_norm": 0.15151795744895935, "learning_rate": 7.608128025419017e-05, "loss": 1.1793, "mean_token_accuracy": 0.7685976520180702, "num_tokens": 1201000866.0, "step": 17780 }, { "entropy": 0.7943032741546631, "epoch": 5.389305460880103, "grad_norm": 0.16315065324306488, "learning_rate": 7.605278396627766e-05, "loss": 1.192, "mean_token_accuracy": 0.7686750799417496, "num_tokens": 1201685014.0, "step": 17790 }, { "entropy": 0.7829157680273056, "epoch": 5.39233507536166, "grad_norm": 0.1587747037410736, "learning_rate": 7.602427605752023e-05, "loss": 1.1857, "mean_token_accuracy": 0.7704502210021019, "num_tokens": 1202350608.0, "step": 17800 }, { "entropy": 0.8006536051630974, "epoch": 5.395364689843217, "grad_norm": 0.1580352634191513, "learning_rate": 7.599575654063378e-05, "loss": 1.2079, "mean_token_accuracy": 0.7639207169413567, "num_tokens": 1203010524.0, "step": 17810 }, { "entropy": 0.7890890315175056, "epoch": 5.398394304324775, "grad_norm": 0.15163546800613403, "learning_rate": 7.596722542833949e-05, "loss": 1.1899, "mean_token_accuracy": 0.76937026232481, "num_tokens": 1203682275.0, "step": 17820 }, { "entropy": 0.8010433107614517, "epoch": 5.401423918806332, "grad_norm": 0.15345145761966705, "learning_rate": 7.593868273336364e-05, "loss": 1.1963, "mean_token_accuracy": 0.7656665444374084, "num_tokens": 1204349962.0, "step": 17830 }, { "entropy": 0.7970649287104606, "epoch": 5.404453533287889, "grad_norm": 0.16714106500148773, "learning_rate": 7.591012846843771e-05, "loss": 1.1999, "mean_token_accuracy": 0.7701074466109276, "num_tokens": 1205021424.0, "step": 17840 }, { "entropy": 0.7867384389042854, "epoch": 5.4074831477694465, "grad_norm": 0.14899560809135437, "learning_rate": 7.588156264629831e-05, "loss": 1.183, "mean_token_accuracy": 0.7700198113918304, "num_tokens": 1205698314.0, "step": 17850 }, { "entropy": 0.8040710180997849, "epoch": 5.410512762251003, "grad_norm": 0.15596075356006622, "learning_rate": 7.585298527968726e-05, "loss": 1.1995, "mean_token_accuracy": 0.7669637724757195, "num_tokens": 1206372716.0, "step": 17860 }, { "entropy": 0.7969091936945916, "epoch": 5.413542376732561, "grad_norm": 0.15836764872074127, "learning_rate": 7.582439638135146e-05, "loss": 1.1986, "mean_token_accuracy": 0.770765969157219, "num_tokens": 1207050369.0, "step": 17870 }, { "entropy": 0.7875916823744774, "epoch": 5.416571991214118, "grad_norm": 0.14866401255130768, "learning_rate": 7.579579596404301e-05, "loss": 1.1873, "mean_token_accuracy": 0.7716606304049491, "num_tokens": 1207729643.0, "step": 17880 }, { "entropy": 0.8014676854014396, "epoch": 5.419601605695675, "grad_norm": 0.15639302134513855, "learning_rate": 7.576718404051912e-05, "loss": 1.1979, "mean_token_accuracy": 0.7659138143062592, "num_tokens": 1208406975.0, "step": 17890 }, { "entropy": 0.7919231161475182, "epoch": 5.422631220177232, "grad_norm": 0.15693379938602448, "learning_rate": 7.573856062354214e-05, "loss": 1.1901, "mean_token_accuracy": 0.7700849264860153, "num_tokens": 1209086200.0, "step": 17900 }, { "entropy": 0.7928600683808327, "epoch": 5.42566083465879, "grad_norm": 0.15709640085697174, "learning_rate": 7.570992572587955e-05, "loss": 1.1882, "mean_token_accuracy": 0.7692574381828308, "num_tokens": 1209781026.0, "step": 17910 }, { "entropy": 0.7872502312064171, "epoch": 5.428690449140347, "grad_norm": 0.16345106065273285, "learning_rate": 7.568127936030394e-05, "loss": 1.1912, "mean_token_accuracy": 0.7697390720248223, "num_tokens": 1210464753.0, "step": 17920 }, { "entropy": 0.797100330889225, "epoch": 5.4317200636219045, "grad_norm": 0.16022591292858124, "learning_rate": 7.565262153959301e-05, "loss": 1.194, "mean_token_accuracy": 0.7657869562506676, "num_tokens": 1211130813.0, "step": 17930 }, { "entropy": 0.7876862704753875, "epoch": 5.434749678103461, "grad_norm": 0.14714138209819794, "learning_rate": 7.562395227652962e-05, "loss": 1.1903, "mean_token_accuracy": 0.7710764214396477, "num_tokens": 1211815871.0, "step": 17940 }, { "entropy": 0.7902646377682686, "epoch": 5.437779292585018, "grad_norm": 0.15407568216323853, "learning_rate": 7.559527158390166e-05, "loss": 1.1894, "mean_token_accuracy": 0.7710242703557014, "num_tokens": 1212497763.0, "step": 17950 }, { "entropy": 0.7920958489179611, "epoch": 5.440808907066576, "grad_norm": 0.15582390129566193, "learning_rate": 7.556657947450217e-05, "loss": 1.1944, "mean_token_accuracy": 0.7680147767066956, "num_tokens": 1213167488.0, "step": 17960 }, { "entropy": 0.7920024797320366, "epoch": 5.443838521548133, "grad_norm": 0.16489140689373016, "learning_rate": 7.553787596112927e-05, "loss": 1.1871, "mean_token_accuracy": 0.7655975177884102, "num_tokens": 1213829908.0, "step": 17970 }, { "entropy": 0.7848840445280075, "epoch": 5.44686813602969, "grad_norm": 0.16326844692230225, "learning_rate": 7.550916105658614e-05, "loss": 1.1861, "mean_token_accuracy": 0.7712792336940766, "num_tokens": 1214503562.0, "step": 17980 }, { "entropy": 0.7959401324391365, "epoch": 5.449897750511248, "grad_norm": 0.15023329854011536, "learning_rate": 7.548043477368108e-05, "loss": 1.1896, "mean_token_accuracy": 0.7691247791051865, "num_tokens": 1215183259.0, "step": 17990 }, { "entropy": 0.7946044862270355, "epoch": 5.452927364992805, "grad_norm": 0.1672666370868683, "learning_rate": 7.545169712522745e-05, "loss": 1.1974, "mean_token_accuracy": 0.7653155639767647, "num_tokens": 1215843813.0, "step": 18000 }, { "entropy": 0.7864162981510162, "epoch": 5.455956979474362, "grad_norm": 0.15071184933185577, "learning_rate": 7.54229481240437e-05, "loss": 1.1825, "mean_token_accuracy": 0.7732902497053147, "num_tokens": 1216541863.0, "step": 18010 }, { "entropy": 0.797180961072445, "epoch": 5.4589865939559195, "grad_norm": 0.16377383470535278, "learning_rate": 7.539418778295328e-05, "loss": 1.1943, "mean_token_accuracy": 0.7646159008145332, "num_tokens": 1217206095.0, "step": 18020 }, { "entropy": 0.7854379042983055, "epoch": 5.462016208437476, "grad_norm": 0.15638822317123413, "learning_rate": 7.536541611478475e-05, "loss": 1.1871, "mean_token_accuracy": 0.7728175222873688, "num_tokens": 1217885808.0, "step": 18030 }, { "entropy": 0.7983357831835747, "epoch": 5.465045822919033, "grad_norm": 0.15034106373786926, "learning_rate": 7.533663313237174e-05, "loss": 1.197, "mean_token_accuracy": 0.7691913187503815, "num_tokens": 1218549867.0, "step": 18040 }, { "entropy": 0.7936286747455596, "epoch": 5.468075437400591, "grad_norm": 0.1538112610578537, "learning_rate": 7.530783884855288e-05, "loss": 1.1996, "mean_token_accuracy": 0.7685976907610893, "num_tokens": 1219220339.0, "step": 18050 }, { "entropy": 0.7856300622224808, "epoch": 5.471105051882148, "grad_norm": 0.1578034907579422, "learning_rate": 7.527903327617185e-05, "loss": 1.1933, "mean_token_accuracy": 0.766740171611309, "num_tokens": 1219885319.0, "step": 18060 }, { "entropy": 0.7912115275859832, "epoch": 5.474134666363705, "grad_norm": 0.15818874537944794, "learning_rate": 7.52502164280774e-05, "loss": 1.1858, "mean_token_accuracy": 0.7702404156327247, "num_tokens": 1220555918.0, "step": 18070 }, { "entropy": 0.7877009674906731, "epoch": 5.477164280845263, "grad_norm": 0.15725462138652802, "learning_rate": 7.522138831712329e-05, "loss": 1.1929, "mean_token_accuracy": 0.7679716780781746, "num_tokens": 1221224817.0, "step": 18080 }, { "entropy": 0.7976912975311279, "epoch": 5.48019389532682, "grad_norm": 0.15438312292099, "learning_rate": 7.519254895616828e-05, "loss": 1.1928, "mean_token_accuracy": 0.7668454617261886, "num_tokens": 1221898011.0, "step": 18090 }, { "entropy": 0.7954000815749168, "epoch": 5.483223509808377, "grad_norm": 0.15981487929821014, "learning_rate": 7.516369835807615e-05, "loss": 1.1948, "mean_token_accuracy": 0.7648048520088195, "num_tokens": 1222570179.0, "step": 18100 }, { "entropy": 0.7921616017818451, "epoch": 5.486253124289934, "grad_norm": 0.16003654897212982, "learning_rate": 7.513483653571577e-05, "loss": 1.1958, "mean_token_accuracy": 0.7714850351214408, "num_tokens": 1223238713.0, "step": 18110 }, { "entropy": 0.7942561611533165, "epoch": 5.489282738771491, "grad_norm": 0.15539222955703735, "learning_rate": 7.510596350196091e-05, "loss": 1.2015, "mean_token_accuracy": 0.7683041110634804, "num_tokens": 1223908044.0, "step": 18120 }, { "entropy": 0.7851544454693794, "epoch": 5.492312353253048, "grad_norm": 0.1554105132818222, "learning_rate": 7.50770792696904e-05, "loss": 1.1918, "mean_token_accuracy": 0.7712665885686875, "num_tokens": 1224574438.0, "step": 18130 }, { "entropy": 0.7835942476987838, "epoch": 5.495341967734606, "grad_norm": 0.16624827682971954, "learning_rate": 7.504818385178803e-05, "loss": 1.1864, "mean_token_accuracy": 0.7709278538823128, "num_tokens": 1225244698.0, "step": 18140 }, { "entropy": 0.797513335943222, "epoch": 5.498371582216163, "grad_norm": 0.16395531594753265, "learning_rate": 7.501927726114264e-05, "loss": 1.2023, "mean_token_accuracy": 0.7699491426348686, "num_tokens": 1225935554.0, "step": 18150 }, { "entropy": 0.7859024435281754, "epoch": 5.50140119669772, "grad_norm": 0.15835483372211456, "learning_rate": 7.499035951064799e-05, "loss": 1.1841, "mean_token_accuracy": 0.7728272244334221, "num_tokens": 1226609336.0, "step": 18160 }, { "entropy": 0.7876161232590675, "epoch": 5.504430811179278, "grad_norm": 0.1516963094472885, "learning_rate": 7.496143061320284e-05, "loss": 1.1965, "mean_token_accuracy": 0.7686561167240142, "num_tokens": 1227276724.0, "step": 18170 }, { "entropy": 0.7917749524116516, "epoch": 5.507460425660835, "grad_norm": 0.15597166121006012, "learning_rate": 7.493249058171096e-05, "loss": 1.1836, "mean_token_accuracy": 0.7711313754320145, "num_tokens": 1227967930.0, "step": 18180 }, { "entropy": 0.7929075747728348, "epoch": 5.5104900401423915, "grad_norm": 0.14477355778217316, "learning_rate": 7.490353942908103e-05, "loss": 1.1947, "mean_token_accuracy": 0.7674643844366074, "num_tokens": 1228634174.0, "step": 18190 }, { "entropy": 0.796661950647831, "epoch": 5.513519654623949, "grad_norm": 0.1461552232503891, "learning_rate": 7.48745771682267e-05, "loss": 1.189, "mean_token_accuracy": 0.7670310020446778, "num_tokens": 1229307940.0, "step": 18200 }, { "entropy": 0.7936533480882645, "epoch": 5.516549269105506, "grad_norm": 0.16306835412979126, "learning_rate": 7.48456038120666e-05, "loss": 1.1884, "mean_token_accuracy": 0.7639216050505638, "num_tokens": 1229976902.0, "step": 18210 }, { "entropy": 0.7884961009025574, "epoch": 5.519578883587063, "grad_norm": 0.1640898585319519, "learning_rate": 7.481661937352431e-05, "loss": 1.1738, "mean_token_accuracy": 0.7729783669114113, "num_tokens": 1230657835.0, "step": 18220 }, { "entropy": 0.7897786647081375, "epoch": 5.522608498068621, "grad_norm": 0.15668991208076477, "learning_rate": 7.47876238655283e-05, "loss": 1.1922, "mean_token_accuracy": 0.770969046652317, "num_tokens": 1231340515.0, "step": 18230 }, { "entropy": 0.8003821015357971, "epoch": 5.525638112550178, "grad_norm": 0.16316929459571838, "learning_rate": 7.475861730101204e-05, "loss": 1.196, "mean_token_accuracy": 0.7662308365106583, "num_tokens": 1232007819.0, "step": 18240 }, { "entropy": 0.7810446426272393, "epoch": 5.528667727031735, "grad_norm": 0.1480761468410492, "learning_rate": 7.472959969291393e-05, "loss": 1.1869, "mean_token_accuracy": 0.7710848569869995, "num_tokens": 1232678244.0, "step": 18250 }, { "entropy": 0.7922813922166825, "epoch": 5.531697341513293, "grad_norm": 0.15463969111442566, "learning_rate": 7.470057105417725e-05, "loss": 1.1856, "mean_token_accuracy": 0.7781815335154534, "num_tokens": 1233385639.0, "step": 18260 }, { "entropy": 0.793801961839199, "epoch": 5.5347269559948495, "grad_norm": 0.1465470790863037, "learning_rate": 7.467153139775022e-05, "loss": 1.188, "mean_token_accuracy": 0.7720064729452133, "num_tokens": 1234076432.0, "step": 18270 }, { "entropy": 0.7815561160445214, "epoch": 5.537756570476407, "grad_norm": 0.15440933406352997, "learning_rate": 7.464248073658599e-05, "loss": 1.1862, "mean_token_accuracy": 0.7724433094263077, "num_tokens": 1234754653.0, "step": 18280 }, { "entropy": 0.7791904523968697, "epoch": 5.540786184957964, "grad_norm": 0.16058407723903656, "learning_rate": 7.461341908364261e-05, "loss": 1.1848, "mean_token_accuracy": 0.7732041746377945, "num_tokens": 1235437436.0, "step": 18290 }, { "entropy": 0.7996877983212471, "epoch": 5.543815799439521, "grad_norm": 0.15439097583293915, "learning_rate": 7.458434645188304e-05, "loss": 1.1997, "mean_token_accuracy": 0.7694524973630905, "num_tokens": 1236110412.0, "step": 18300 }, { "entropy": 0.7916013330221177, "epoch": 5.546845413921078, "grad_norm": 0.15042059123516083, "learning_rate": 7.45552628542751e-05, "loss": 1.183, "mean_token_accuracy": 0.7714708939194679, "num_tokens": 1236801923.0, "step": 18310 }, { "entropy": 0.7963166251778603, "epoch": 5.549875028402636, "grad_norm": 0.14816337823867798, "learning_rate": 7.452616830379156e-05, "loss": 1.1963, "mean_token_accuracy": 0.7706051111221314, "num_tokens": 1237476287.0, "step": 18320 }, { "entropy": 0.7852500066161155, "epoch": 5.552904642884193, "grad_norm": 0.14444084465503693, "learning_rate": 7.449706281341e-05, "loss": 1.1888, "mean_token_accuracy": 0.7696308091282844, "num_tokens": 1238147258.0, "step": 18330 }, { "entropy": 0.796066838502884, "epoch": 5.555934257365751, "grad_norm": 0.1868371218442917, "learning_rate": 7.446794639611298e-05, "loss": 1.1928, "mean_token_accuracy": 0.7696374446153641, "num_tokens": 1238820762.0, "step": 18340 }, { "entropy": 0.7891002267599105, "epoch": 5.558963871847308, "grad_norm": 0.1523510366678238, "learning_rate": 7.443881906488786e-05, "loss": 1.1798, "mean_token_accuracy": 0.7739343196153641, "num_tokens": 1239507757.0, "step": 18350 }, { "entropy": 0.8047911122441291, "epoch": 5.5619934863288645, "grad_norm": 0.15410935878753662, "learning_rate": 7.440968083272688e-05, "loss": 1.1976, "mean_token_accuracy": 0.7676858559250832, "num_tokens": 1240189486.0, "step": 18360 }, { "entropy": 0.7843504250049591, "epoch": 5.565023100810421, "grad_norm": 0.15792490541934967, "learning_rate": 7.438053171262715e-05, "loss": 1.1952, "mean_token_accuracy": 0.7682094275951385, "num_tokens": 1240846048.0, "step": 18370 }, { "entropy": 0.7906922951340676, "epoch": 5.568052715291979, "grad_norm": 0.1568026840686798, "learning_rate": 7.435137171759062e-05, "loss": 1.1846, "mean_token_accuracy": 0.7710306391119957, "num_tokens": 1241524774.0, "step": 18380 }, { "entropy": 0.7865373685956001, "epoch": 5.571082329773536, "grad_norm": 0.15185314416885376, "learning_rate": 7.432220086062414e-05, "loss": 1.1851, "mean_token_accuracy": 0.7710539147257804, "num_tokens": 1242203296.0, "step": 18390 }, { "entropy": 0.7973109602928161, "epoch": 5.574111944255094, "grad_norm": 0.14071327447891235, "learning_rate": 7.429301915473935e-05, "loss": 1.1965, "mean_token_accuracy": 0.7697199746966362, "num_tokens": 1242890670.0, "step": 18400 }, { "entropy": 0.7858362942934036, "epoch": 5.577141558736651, "grad_norm": 0.16116997599601746, "learning_rate": 7.426382661295275e-05, "loss": 1.1804, "mean_token_accuracy": 0.7727782040834427, "num_tokens": 1243573883.0, "step": 18410 }, { "entropy": 0.7910760179162025, "epoch": 5.580171173218208, "grad_norm": 0.16359494626522064, "learning_rate": 7.423462324828567e-05, "loss": 1.1888, "mean_token_accuracy": 0.77058724462986, "num_tokens": 1244257292.0, "step": 18420 }, { "entropy": 0.7906682461500167, "epoch": 5.583200787699766, "grad_norm": 0.15936478972434998, "learning_rate": 7.42054090737643e-05, "loss": 1.1946, "mean_token_accuracy": 0.7710719019174576, "num_tokens": 1244940279.0, "step": 18430 }, { "entropy": 0.782247719168663, "epoch": 5.5862304021813225, "grad_norm": 0.16135825216770172, "learning_rate": 7.417618410241959e-05, "loss": 1.1838, "mean_token_accuracy": 0.7702401608228684, "num_tokens": 1245610404.0, "step": 18440 }, { "entropy": 0.7918521538376808, "epoch": 5.589260016662879, "grad_norm": 0.1581607162952423, "learning_rate": 7.414694834728736e-05, "loss": 1.1946, "mean_token_accuracy": 0.7683800473809242, "num_tokens": 1246285274.0, "step": 18450 }, { "entropy": 0.7975309431552887, "epoch": 5.592289631144437, "grad_norm": 0.17319001257419586, "learning_rate": 7.411770182140821e-05, "loss": 1.1892, "mean_token_accuracy": 0.7679103955626487, "num_tokens": 1246967242.0, "step": 18460 }, { "entropy": 0.7915191993117332, "epoch": 5.595319245625994, "grad_norm": 0.15570372343063354, "learning_rate": 7.408844453782755e-05, "loss": 1.192, "mean_token_accuracy": 0.7694740876555443, "num_tokens": 1247643658.0, "step": 18470 }, { "entropy": 0.7896543279290199, "epoch": 5.598348860107551, "grad_norm": 0.1625194251537323, "learning_rate": 7.405917650959561e-05, "loss": 1.1951, "mean_token_accuracy": 0.769255644083023, "num_tokens": 1248312462.0, "step": 18480 }, { "entropy": 0.7988270029425621, "epoch": 5.601378474589109, "grad_norm": 0.16404829919338226, "learning_rate": 7.402989774976737e-05, "loss": 1.1969, "mean_token_accuracy": 0.761144632101059, "num_tokens": 1248965124.0, "step": 18490 }, { "entropy": 0.7940766930580139, "epoch": 5.604408089070666, "grad_norm": 0.15311947464942932, "learning_rate": 7.400060827140263e-05, "loss": 1.1903, "mean_token_accuracy": 0.7688000202178955, "num_tokens": 1249647917.0, "step": 18500 }, { "entropy": 0.7972461000084877, "epoch": 5.607437703552223, "grad_norm": 0.16149654984474182, "learning_rate": 7.397130808756597e-05, "loss": 1.1975, "mean_token_accuracy": 0.7670831084251404, "num_tokens": 1250314808.0, "step": 18510 }, { "entropy": 0.7870167657732964, "epoch": 5.6104673180337805, "grad_norm": 0.1804661601781845, "learning_rate": 7.394199721132675e-05, "loss": 1.1917, "mean_token_accuracy": 0.773274627327919, "num_tokens": 1251001863.0, "step": 18520 }, { "entropy": 0.8022281333804131, "epoch": 5.613496932515337, "grad_norm": 0.153724804520607, "learning_rate": 7.391267565575905e-05, "loss": 1.2029, "mean_token_accuracy": 0.7655744567513466, "num_tokens": 1251667408.0, "step": 18530 }, { "entropy": 0.7879312083125114, "epoch": 5.616526546996894, "grad_norm": 0.16561438143253326, "learning_rate": 7.388334343394179e-05, "loss": 1.193, "mean_token_accuracy": 0.7718574240803718, "num_tokens": 1252351615.0, "step": 18540 }, { "entropy": 0.7981021985411644, "epoch": 5.619556161478452, "grad_norm": 0.16246850788593292, "learning_rate": 7.385400055895857e-05, "loss": 1.1871, "mean_token_accuracy": 0.7688254058361054, "num_tokens": 1253036617.0, "step": 18550 }, { "entropy": 0.7941867128014565, "epoch": 5.622585775960009, "grad_norm": 0.15055938065052032, "learning_rate": 7.382464704389783e-05, "loss": 1.1978, "mean_token_accuracy": 0.7644386947154999, "num_tokens": 1253693899.0, "step": 18560 }, { "entropy": 0.8000770881772041, "epoch": 5.625615390441566, "grad_norm": 0.161276713013649, "learning_rate": 7.379528290185265e-05, "loss": 1.1939, "mean_token_accuracy": 0.7682380676269531, "num_tokens": 1254383135.0, "step": 18570 }, { "entropy": 0.7930928155779838, "epoch": 5.628645004923124, "grad_norm": 0.16263698041439056, "learning_rate": 7.376590814592095e-05, "loss": 1.1872, "mean_token_accuracy": 0.7758932799100876, "num_tokens": 1255091820.0, "step": 18580 }, { "entropy": 0.792005829513073, "epoch": 5.631674619404681, "grad_norm": 0.15446345508098602, "learning_rate": 7.373652278920532e-05, "loss": 1.1912, "mean_token_accuracy": 0.7698688164353371, "num_tokens": 1255764662.0, "step": 18590 }, { "entropy": 0.7881263554096222, "epoch": 5.634704233886238, "grad_norm": 0.1543988734483719, "learning_rate": 7.37071268448131e-05, "loss": 1.1978, "mean_token_accuracy": 0.7717945784330368, "num_tokens": 1256437468.0, "step": 18600 }, { "entropy": 0.7846297726035119, "epoch": 5.6377338483677955, "grad_norm": 0.15434779226779938, "learning_rate": 7.367772032585634e-05, "loss": 1.1856, "mean_token_accuracy": 0.7687615990638733, "num_tokens": 1257111812.0, "step": 18610 }, { "entropy": 0.7753724783658982, "epoch": 5.640763462849352, "grad_norm": 0.15832781791687012, "learning_rate": 7.364830324545187e-05, "loss": 1.1737, "mean_token_accuracy": 0.7739689543843269, "num_tokens": 1257785256.0, "step": 18620 }, { "entropy": 0.7910309210419655, "epoch": 5.643793077330909, "grad_norm": 0.15043175220489502, "learning_rate": 7.361887561672111e-05, "loss": 1.1846, "mean_token_accuracy": 0.7723511561751366, "num_tokens": 1258477035.0, "step": 18630 }, { "entropy": 0.7858791396021843, "epoch": 5.646822691812467, "grad_norm": 0.15828905999660492, "learning_rate": 7.358943745279029e-05, "loss": 1.1948, "mean_token_accuracy": 0.7697947084903717, "num_tokens": 1259147551.0, "step": 18640 }, { "entropy": 0.8024312183260918, "epoch": 5.649852306294024, "grad_norm": 0.1596386730670929, "learning_rate": 7.35599887667903e-05, "loss": 1.1964, "mean_token_accuracy": 0.7663578435778617, "num_tokens": 1259809721.0, "step": 18650 }, { "entropy": 0.7919725030660629, "epoch": 5.652881920775581, "grad_norm": 0.16339971125125885, "learning_rate": 7.353052957185673e-05, "loss": 1.1914, "mean_token_accuracy": 0.7671084478497505, "num_tokens": 1260475216.0, "step": 18660 }, { "entropy": 0.7944097578525543, "epoch": 5.655911535257139, "grad_norm": 0.15540051460266113, "learning_rate": 7.350105988112987e-05, "loss": 1.1922, "mean_token_accuracy": 0.7699928566813469, "num_tokens": 1261157376.0, "step": 18670 }, { "entropy": 0.7971886783838272, "epoch": 5.658941149738696, "grad_norm": 0.1517374962568283, "learning_rate": 7.347157970775463e-05, "loss": 1.1972, "mean_token_accuracy": 0.7699651926755905, "num_tokens": 1261834200.0, "step": 18680 }, { "entropy": 0.80126001983881, "epoch": 5.6619707642202535, "grad_norm": 0.1589142233133316, "learning_rate": 7.34420890648807e-05, "loss": 1.193, "mean_token_accuracy": 0.7652816787362099, "num_tokens": 1262514719.0, "step": 18690 }, { "entropy": 0.7946578770875931, "epoch": 5.66500037870181, "grad_norm": 0.15374290943145752, "learning_rate": 7.341258796566233e-05, "loss": 1.1853, "mean_token_accuracy": 0.7693043813109398, "num_tokens": 1263195002.0, "step": 18700 }, { "entropy": 0.7874152943491936, "epoch": 5.668029993183367, "grad_norm": 0.15570077300071716, "learning_rate": 7.338307642325852e-05, "loss": 1.189, "mean_token_accuracy": 0.7674408495426178, "num_tokens": 1263863468.0, "step": 18710 }, { "entropy": 0.7889522552490235, "epoch": 5.671059607664924, "grad_norm": 0.15601657330989838, "learning_rate": 7.335355445083288e-05, "loss": 1.1973, "mean_token_accuracy": 0.7666130721569061, "num_tokens": 1264526777.0, "step": 18720 }, { "entropy": 0.7890931233763695, "epoch": 5.674089222146482, "grad_norm": 0.16730496287345886, "learning_rate": 7.332402206155369e-05, "loss": 1.1919, "mean_token_accuracy": 0.7724406734108925, "num_tokens": 1265198857.0, "step": 18730 }, { "entropy": 0.7928927630186081, "epoch": 5.677118836628039, "grad_norm": 0.16443376243114471, "learning_rate": 7.329447926859388e-05, "loss": 1.188, "mean_token_accuracy": 0.7705048725008965, "num_tokens": 1265886577.0, "step": 18740 }, { "entropy": 0.7997891709208489, "epoch": 5.680148451109597, "grad_norm": 0.16134659945964813, "learning_rate": 7.326492608513098e-05, "loss": 1.1975, "mean_token_accuracy": 0.7630728602409362, "num_tokens": 1266559041.0, "step": 18750 }, { "entropy": 0.79414232224226, "epoch": 5.683178065591154, "grad_norm": 0.16661733388900757, "learning_rate": 7.323536252434724e-05, "loss": 1.1932, "mean_token_accuracy": 0.7700477302074432, "num_tokens": 1267238094.0, "step": 18760 }, { "entropy": 0.7935743287205697, "epoch": 5.686207680072711, "grad_norm": 0.15668271481990814, "learning_rate": 7.320578859942945e-05, "loss": 1.1891, "mean_token_accuracy": 0.7664597243070602, "num_tokens": 1267909228.0, "step": 18770 }, { "entropy": 0.7928174495697021, "epoch": 5.6892372945542675, "grad_norm": 0.16351042687892914, "learning_rate": 7.317620432356907e-05, "loss": 1.1862, "mean_token_accuracy": 0.7701626896858216, "num_tokens": 1268588648.0, "step": 18780 }, { "entropy": 0.8047268077731132, "epoch": 5.692266909035825, "grad_norm": 0.15795977413654327, "learning_rate": 7.314660970996217e-05, "loss": 1.2056, "mean_token_accuracy": 0.7643048569560051, "num_tokens": 1269254322.0, "step": 18790 }, { "entropy": 0.8028568893671035, "epoch": 5.695296523517382, "grad_norm": 0.15192359685897827, "learning_rate": 7.311700477180944e-05, "loss": 1.2016, "mean_token_accuracy": 0.7660802662372589, "num_tokens": 1269931439.0, "step": 18800 }, { "entropy": 0.7914088234305382, "epoch": 5.69832613799894, "grad_norm": 0.1599586009979248, "learning_rate": 7.308738952231614e-05, "loss": 1.1873, "mean_token_accuracy": 0.7692271962761879, "num_tokens": 1270601240.0, "step": 18810 }, { "entropy": 0.7939092203974724, "epoch": 5.701355752480497, "grad_norm": 0.14729230105876923, "learning_rate": 7.305776397469214e-05, "loss": 1.1926, "mean_token_accuracy": 0.7719800055027009, "num_tokens": 1271291215.0, "step": 18820 }, { "entropy": 0.7896327704191208, "epoch": 5.704385366962054, "grad_norm": 0.1626734584569931, "learning_rate": 7.302812814215198e-05, "loss": 1.1893, "mean_token_accuracy": 0.767254875600338, "num_tokens": 1271959921.0, "step": 18830 }, { "entropy": 0.7918659880757332, "epoch": 5.707414981443611, "grad_norm": 0.14825868606567383, "learning_rate": 7.299848203791467e-05, "loss": 1.1853, "mean_token_accuracy": 0.7726846173405647, "num_tokens": 1272651250.0, "step": 18840 }, { "entropy": 0.789652743935585, "epoch": 5.710444595925169, "grad_norm": 0.16363964974880219, "learning_rate": 7.296882567520385e-05, "loss": 1.1801, "mean_token_accuracy": 0.7706850856542588, "num_tokens": 1273330270.0, "step": 18850 }, { "entropy": 0.7887715801596642, "epoch": 5.7134742104067255, "grad_norm": 0.15937940776348114, "learning_rate": 7.293915906724778e-05, "loss": 1.1944, "mean_token_accuracy": 0.7677953779697418, "num_tokens": 1273998084.0, "step": 18860 }, { "entropy": 0.7939240783452988, "epoch": 5.716503824888283, "grad_norm": 0.15611006319522858, "learning_rate": 7.29094822272792e-05, "loss": 1.1955, "mean_token_accuracy": 0.7675396710634231, "num_tokens": 1274670896.0, "step": 18870 }, { "entropy": 0.781504075229168, "epoch": 5.71953343936984, "grad_norm": 0.17100565135478973, "learning_rate": 7.287979516853553e-05, "loss": 1.1761, "mean_token_accuracy": 0.7743938907980918, "num_tokens": 1275342835.0, "step": 18880 }, { "entropy": 0.7951151594519615, "epoch": 5.722563053851397, "grad_norm": 0.16005516052246094, "learning_rate": 7.285009790425863e-05, "loss": 1.1931, "mean_token_accuracy": 0.7707011327147484, "num_tokens": 1276026509.0, "step": 18890 }, { "entropy": 0.7936810344457627, "epoch": 5.725592668332955, "grad_norm": 0.166903555393219, "learning_rate": 7.282039044769499e-05, "loss": 1.1979, "mean_token_accuracy": 0.769514313340187, "num_tokens": 1276706121.0, "step": 18900 }, { "entropy": 0.7938599124550819, "epoch": 5.728622282814512, "grad_norm": 0.1707628220319748, "learning_rate": 7.279067281209563e-05, "loss": 1.1941, "mean_token_accuracy": 0.7680787861347198, "num_tokens": 1277378551.0, "step": 18910 }, { "entropy": 0.7916633740067482, "epoch": 5.731651897296069, "grad_norm": 0.15222841501235962, "learning_rate": 7.276094501071607e-05, "loss": 1.1871, "mean_token_accuracy": 0.772497744858265, "num_tokens": 1278072838.0, "step": 18920 }, { "entropy": 0.7980836063623429, "epoch": 5.734681511777627, "grad_norm": 0.16312545537948608, "learning_rate": 7.273120705681643e-05, "loss": 1.1988, "mean_token_accuracy": 0.7672946929931641, "num_tokens": 1278736216.0, "step": 18930 }, { "entropy": 0.7914466798305512, "epoch": 5.737711126259184, "grad_norm": 0.15629099309444427, "learning_rate": 7.27014589636613e-05, "loss": 1.1896, "mean_token_accuracy": 0.7754410624504089, "num_tokens": 1279423182.0, "step": 18940 }, { "entropy": 0.7761142745614051, "epoch": 5.7407407407407405, "grad_norm": 0.16202078759670258, "learning_rate": 7.267170074451983e-05, "loss": 1.1829, "mean_token_accuracy": 0.7714239031076431, "num_tokens": 1280103864.0, "step": 18950 }, { "entropy": 0.8018604129552841, "epoch": 5.743770355222298, "grad_norm": 0.15807020664215088, "learning_rate": 7.264193241266568e-05, "loss": 1.1949, "mean_token_accuracy": 0.7640351712703705, "num_tokens": 1280776689.0, "step": 18960 }, { "entropy": 0.7881122648715972, "epoch": 5.746799969703855, "grad_norm": 0.15949994325637817, "learning_rate": 7.2612153981377e-05, "loss": 1.185, "mean_token_accuracy": 0.7737415760755539, "num_tokens": 1281465727.0, "step": 18970 }, { "entropy": 0.797203715145588, "epoch": 5.749829584185412, "grad_norm": 0.15690580010414124, "learning_rate": 7.258236546393645e-05, "loss": 1.1887, "mean_token_accuracy": 0.7698037967085838, "num_tokens": 1282148924.0, "step": 18980 }, { "entropy": 0.8052161574363709, "epoch": 5.75285919866697, "grad_norm": 0.16531206667423248, "learning_rate": 7.255256687363124e-05, "loss": 1.1977, "mean_token_accuracy": 0.7619235455989838, "num_tokens": 1282816960.0, "step": 18990 }, { "entropy": 0.7997000426054001, "epoch": 5.755888813148527, "grad_norm": 0.152301624417305, "learning_rate": 7.252275822375301e-05, "loss": 1.2022, "mean_token_accuracy": 0.7650304600596428, "num_tokens": 1283487423.0, "step": 19000 }, { "entropy": 0.8115334004163742, "epoch": 5.758918427630084, "grad_norm": 0.16885791718959808, "learning_rate": 7.249293952759789e-05, "loss": 1.2024, "mean_token_accuracy": 0.7626567095518112, "num_tokens": 1284162524.0, "step": 19010 }, { "entropy": 0.8028330817818642, "epoch": 5.761948042111642, "grad_norm": 0.15730434656143188, "learning_rate": 7.246311079846654e-05, "loss": 1.2063, "mean_token_accuracy": 0.7625677794218063, "num_tokens": 1284825914.0, "step": 19020 }, { "entropy": 0.7898641049861908, "epoch": 5.7649776565931985, "grad_norm": 0.16985639929771423, "learning_rate": 7.243327204966406e-05, "loss": 1.1799, "mean_token_accuracy": 0.7730851233005523, "num_tokens": 1285507408.0, "step": 19030 }, { "entropy": 0.7899454146623611, "epoch": 5.768007271074755, "grad_norm": 0.16257216036319733, "learning_rate": 7.24034232945e-05, "loss": 1.1978, "mean_token_accuracy": 0.7655003413558006, "num_tokens": 1286160339.0, "step": 19040 }, { "entropy": 0.7982629612088203, "epoch": 5.771036885556313, "grad_norm": 0.15875552594661713, "learning_rate": 7.237356454628841e-05, "loss": 1.2007, "mean_token_accuracy": 0.7625041589140892, "num_tokens": 1286820905.0, "step": 19050 }, { "entropy": 0.8031689330935479, "epoch": 5.77406650003787, "grad_norm": 0.1723564863204956, "learning_rate": 7.234369581834783e-05, "loss": 1.199, "mean_token_accuracy": 0.7648579478263855, "num_tokens": 1287494716.0, "step": 19060 }, { "entropy": 0.7959000512957572, "epoch": 5.777096114519427, "grad_norm": 0.15248917043209076, "learning_rate": 7.231381712400115e-05, "loss": 1.1898, "mean_token_accuracy": 0.7684626832604409, "num_tokens": 1288172111.0, "step": 19070 }, { "entropy": 0.796762265264988, "epoch": 5.780125729000985, "grad_norm": 0.1766333132982254, "learning_rate": 7.228392847657577e-05, "loss": 1.1982, "mean_token_accuracy": 0.7703196331858635, "num_tokens": 1288845492.0, "step": 19080 }, { "entropy": 0.7944855257868767, "epoch": 5.783155343482542, "grad_norm": 0.1556379795074463, "learning_rate": 7.225402988940352e-05, "loss": 1.1894, "mean_token_accuracy": 0.7687153145670891, "num_tokens": 1289525200.0, "step": 19090 }, { "entropy": 0.7888134881854058, "epoch": 5.786184957964099, "grad_norm": 0.15383945405483246, "learning_rate": 7.222412137582071e-05, "loss": 1.187, "mean_token_accuracy": 0.7703951612114907, "num_tokens": 1290195608.0, "step": 19100 }, { "entropy": 0.7984809562563896, "epoch": 5.7892145724456565, "grad_norm": 0.2070225477218628, "learning_rate": 7.219420294916798e-05, "loss": 1.1874, "mean_token_accuracy": 0.7712823674082756, "num_tokens": 1290892584.0, "step": 19110 }, { "entropy": 0.784035250544548, "epoch": 5.792244186927213, "grad_norm": 0.16227585077285767, "learning_rate": 7.216427462279047e-05, "loss": 1.1972, "mean_token_accuracy": 0.7716500088572502, "num_tokens": 1291561531.0, "step": 19120 }, { "entropy": 0.7916143849492073, "epoch": 5.79527380140877, "grad_norm": 0.15463599562644958, "learning_rate": 7.213433641003771e-05, "loss": 1.1966, "mean_token_accuracy": 0.7703131452202797, "num_tokens": 1292238265.0, "step": 19130 }, { "entropy": 0.795188945531845, "epoch": 5.798303415890328, "grad_norm": 0.15650375187397003, "learning_rate": 7.210438832426363e-05, "loss": 1.194, "mean_token_accuracy": 0.76949143409729, "num_tokens": 1292912695.0, "step": 19140 }, { "entropy": 0.7866457670927047, "epoch": 5.801333030371885, "grad_norm": 0.15094313025474548, "learning_rate": 7.207443037882659e-05, "loss": 1.1753, "mean_token_accuracy": 0.7699485033750534, "num_tokens": 1293607617.0, "step": 19150 }, { "entropy": 0.7934285506606102, "epoch": 5.804362644853443, "grad_norm": 0.159365713596344, "learning_rate": 7.204446258708931e-05, "loss": 1.1979, "mean_token_accuracy": 0.7682175755500793, "num_tokens": 1294279292.0, "step": 19160 }, { "entropy": 0.7908381313085556, "epoch": 5.807392259335, "grad_norm": 0.14878462255001068, "learning_rate": 7.201448496241896e-05, "loss": 1.1952, "mean_token_accuracy": 0.767137984931469, "num_tokens": 1294950994.0, "step": 19170 }, { "entropy": 0.7872956618666649, "epoch": 5.810421873816557, "grad_norm": 0.1709751933813095, "learning_rate": 7.198449751818702e-05, "loss": 1.1906, "mean_token_accuracy": 0.7710516393184662, "num_tokens": 1295627328.0, "step": 19180 }, { "entropy": 0.7985885605216027, "epoch": 5.813451488298114, "grad_norm": 0.15704365074634552, "learning_rate": 7.195450026776943e-05, "loss": 1.1973, "mean_token_accuracy": 0.7730544298887253, "num_tokens": 1296327150.0, "step": 19190 }, { "entropy": 0.795363686978817, "epoch": 5.8164811027796715, "grad_norm": 0.15271684527397156, "learning_rate": 7.192449322454642e-05, "loss": 1.1979, "mean_token_accuracy": 0.7606057360768318, "num_tokens": 1296982357.0, "step": 19200 }, { "entropy": 0.78994672447443, "epoch": 5.819510717261228, "grad_norm": 0.16777105629444122, "learning_rate": 7.189447640190267e-05, "loss": 1.1892, "mean_token_accuracy": 0.7716968283057213, "num_tokens": 1297670726.0, "step": 19210 }, { "entropy": 0.7922337487339973, "epoch": 5.822540331742786, "grad_norm": 0.15544579923152924, "learning_rate": 7.186444981322715e-05, "loss": 1.1848, "mean_token_accuracy": 0.7683053568005562, "num_tokens": 1298347347.0, "step": 19220 }, { "entropy": 0.791400583088398, "epoch": 5.825569946224343, "grad_norm": 0.1687878668308258, "learning_rate": 7.183441347191326e-05, "loss": 1.1846, "mean_token_accuracy": 0.7777029946446419, "num_tokens": 1299056544.0, "step": 19230 }, { "entropy": 0.7915820553898811, "epoch": 5.8285995607059, "grad_norm": 0.14781229197978973, "learning_rate": 7.18043673913587e-05, "loss": 1.1927, "mean_token_accuracy": 0.7701451942324639, "num_tokens": 1299743640.0, "step": 19240 }, { "entropy": 0.7932293877005577, "epoch": 5.831629175187457, "grad_norm": 0.1546233743429184, "learning_rate": 7.17743115849655e-05, "loss": 1.1925, "mean_token_accuracy": 0.7677518740296364, "num_tokens": 1300418189.0, "step": 19250 }, { "entropy": 0.7910405933856964, "epoch": 5.834658789669015, "grad_norm": 0.16984455287456512, "learning_rate": 7.174424606614008e-05, "loss": 1.1863, "mean_token_accuracy": 0.7770651251077652, "num_tokens": 1301116441.0, "step": 19260 }, { "entropy": 0.8009702622890472, "epoch": 5.837688404150572, "grad_norm": 0.15600861608982086, "learning_rate": 7.171417084829315e-05, "loss": 1.1978, "mean_token_accuracy": 0.7650102362036705, "num_tokens": 1301784912.0, "step": 19270 }, { "entropy": 0.7929332584142685, "epoch": 5.8407180186321295, "grad_norm": 0.15729230642318726, "learning_rate": 7.168408594483975e-05, "loss": 1.1938, "mean_token_accuracy": 0.7671513259410858, "num_tokens": 1302460911.0, "step": 19280 }, { "entropy": 0.7850655108690262, "epoch": 5.843747633113686, "grad_norm": 0.15478479862213135, "learning_rate": 7.16539913691993e-05, "loss": 1.1817, "mean_token_accuracy": 0.7702582538127899, "num_tokens": 1303139014.0, "step": 19290 }, { "entropy": 0.7898176908493042, "epoch": 5.846777247595243, "grad_norm": 0.15592904388904572, "learning_rate": 7.162388713479544e-05, "loss": 1.1866, "mean_token_accuracy": 0.7677637532353401, "num_tokens": 1303808926.0, "step": 19300 }, { "entropy": 0.790964238345623, "epoch": 5.8498068620768, "grad_norm": 0.1592186987400055, "learning_rate": 7.159377325505617e-05, "loss": 1.1929, "mean_token_accuracy": 0.7658483535051346, "num_tokens": 1304468962.0, "step": 19310 }, { "entropy": 0.7826980590820313, "epoch": 5.852836476558358, "grad_norm": 0.15509718656539917, "learning_rate": 7.15636497434138e-05, "loss": 1.1879, "mean_token_accuracy": 0.7703955799341202, "num_tokens": 1305147093.0, "step": 19320 }, { "entropy": 0.7993157789111137, "epoch": 5.855866091039915, "grad_norm": 0.1570509821176529, "learning_rate": 7.153351661330492e-05, "loss": 1.2041, "mean_token_accuracy": 0.7632969990372658, "num_tokens": 1305810051.0, "step": 19330 }, { "entropy": 0.7967530325055122, "epoch": 5.858895705521473, "grad_norm": 0.16697965562343597, "learning_rate": 7.150337387817042e-05, "loss": 1.1946, "mean_token_accuracy": 0.7688148751854896, "num_tokens": 1306484715.0, "step": 19340 }, { "entropy": 0.7971322774887085, "epoch": 5.86192532000303, "grad_norm": 0.16007335484027863, "learning_rate": 7.147322155145544e-05, "loss": 1.1977, "mean_token_accuracy": 0.7678506687283516, "num_tokens": 1307157150.0, "step": 19350 }, { "entropy": 0.7920721471309662, "epoch": 5.864954934484587, "grad_norm": 0.15300531685352325, "learning_rate": 7.144305964660947e-05, "loss": 1.1928, "mean_token_accuracy": 0.7665317371487618, "num_tokens": 1307823563.0, "step": 19360 }, { "entropy": 0.7872507840394973, "epoch": 5.867984548966144, "grad_norm": 0.15557889640331268, "learning_rate": 7.14128881770862e-05, "loss": 1.1988, "mean_token_accuracy": 0.7663601219654084, "num_tokens": 1308485053.0, "step": 19370 }, { "entropy": 0.8012245669960976, "epoch": 5.871014163447701, "grad_norm": 0.15355554223060608, "learning_rate": 7.138270715634362e-05, "loss": 1.1988, "mean_token_accuracy": 0.7701532348990441, "num_tokens": 1309169126.0, "step": 19380 }, { "entropy": 0.7840667784214019, "epoch": 5.874043777929258, "grad_norm": 0.15598563849925995, "learning_rate": 7.135251659784396e-05, "loss": 1.1861, "mean_token_accuracy": 0.769740016758442, "num_tokens": 1309849883.0, "step": 19390 }, { "entropy": 0.7877208113670349, "epoch": 5.877073392410816, "grad_norm": 0.15426141023635864, "learning_rate": 7.132231651505377e-05, "loss": 1.2014, "mean_token_accuracy": 0.7657011389732361, "num_tokens": 1310511116.0, "step": 19400 }, { "entropy": 0.7917147055268288, "epoch": 5.880103006892373, "grad_norm": 0.15134041011333466, "learning_rate": 7.129210692144374e-05, "loss": 1.1986, "mean_token_accuracy": 0.7654175668954849, "num_tokens": 1311168820.0, "step": 19410 }, { "entropy": 0.7980439901351929, "epoch": 5.88313262137393, "grad_norm": 0.1536293923854828, "learning_rate": 7.126188783048888e-05, "loss": 1.2001, "mean_token_accuracy": 0.7672934398055077, "num_tokens": 1311843261.0, "step": 19420 }, { "entropy": 0.7802516609430313, "epoch": 5.886162235855488, "grad_norm": 0.1546633094549179, "learning_rate": 7.123165925566842e-05, "loss": 1.1839, "mean_token_accuracy": 0.7746330320835113, "num_tokens": 1312533249.0, "step": 19430 }, { "entropy": 0.7927233129739761, "epoch": 5.889191850337045, "grad_norm": 0.15313683450222015, "learning_rate": 7.120142121046581e-05, "loss": 1.1906, "mean_token_accuracy": 0.7654723390936852, "num_tokens": 1313197685.0, "step": 19440 }, { "entropy": 0.7918976411223412, "epoch": 5.8922214648186015, "grad_norm": 0.1636413335800171, "learning_rate": 7.117117370836873e-05, "loss": 1.1846, "mean_token_accuracy": 0.7683885022997856, "num_tokens": 1313872546.0, "step": 19450 }, { "entropy": 0.7887762054800987, "epoch": 5.895251079300159, "grad_norm": 0.163103386759758, "learning_rate": 7.11409167628691e-05, "loss": 1.195, "mean_token_accuracy": 0.7676061347126961, "num_tokens": 1314542011.0, "step": 19460 }, { "entropy": 0.7964710831642151, "epoch": 5.898280693781716, "grad_norm": 0.1580525040626526, "learning_rate": 7.1110650387463e-05, "loss": 1.19, "mean_token_accuracy": 0.7661490544676781, "num_tokens": 1315215987.0, "step": 19470 }, { "entropy": 0.7858775854110718, "epoch": 5.901310308263273, "grad_norm": 0.16609440743923187, "learning_rate": 7.108037459565078e-05, "loss": 1.1866, "mean_token_accuracy": 0.7727140128612519, "num_tokens": 1315903511.0, "step": 19480 }, { "entropy": 0.8043234646320343, "epoch": 5.904339922744831, "grad_norm": 0.16437289118766785, "learning_rate": 7.105008940093694e-05, "loss": 1.2015, "mean_token_accuracy": 0.7644197762012481, "num_tokens": 1316581139.0, "step": 19490 }, { "entropy": 0.7888402491807938, "epoch": 5.907369537226388, "grad_norm": 0.15607315301895142, "learning_rate": 7.101979481683018e-05, "loss": 1.1898, "mean_token_accuracy": 0.7767038270831108, "num_tokens": 1317282784.0, "step": 19500 }, { "entropy": 0.7878912091255188, "epoch": 5.910399151707945, "grad_norm": 0.16913576424121857, "learning_rate": 7.098949085684342e-05, "loss": 1.19, "mean_token_accuracy": 0.7655957534909248, "num_tokens": 1317939963.0, "step": 19510 }, { "entropy": 0.785400713980198, "epoch": 5.913428766189503, "grad_norm": 0.1648530811071396, "learning_rate": 7.095917753449373e-05, "loss": 1.1815, "mean_token_accuracy": 0.7728199392557145, "num_tokens": 1318624430.0, "step": 19520 }, { "entropy": 0.7962307214736939, "epoch": 5.91645838067106, "grad_norm": 0.16219139099121094, "learning_rate": 7.092885486330239e-05, "loss": 1.1963, "mean_token_accuracy": 0.7642073258757591, "num_tokens": 1319285362.0, "step": 19530 }, { "entropy": 0.7846979156136513, "epoch": 5.9194879951526165, "grad_norm": 0.1602168083190918, "learning_rate": 7.089852285679479e-05, "loss": 1.181, "mean_token_accuracy": 0.766859495639801, "num_tokens": 1319945502.0, "step": 19540 }, { "entropy": 0.786933034658432, "epoch": 5.922517609634174, "grad_norm": 0.16247202455997467, "learning_rate": 7.086818152850055e-05, "loss": 1.1898, "mean_token_accuracy": 0.770433597266674, "num_tokens": 1320615059.0, "step": 19550 }, { "entropy": 0.794632551074028, "epoch": 5.925547224115731, "grad_norm": 0.16537994146347046, "learning_rate": 7.083783089195341e-05, "loss": 1.1899, "mean_token_accuracy": 0.767238911986351, "num_tokens": 1321296288.0, "step": 19560 }, { "entropy": 0.7873178437352181, "epoch": 5.928576838597288, "grad_norm": 0.15839438140392303, "learning_rate": 7.08074709606913e-05, "loss": 1.1878, "mean_token_accuracy": 0.7728889375925064, "num_tokens": 1321973030.0, "step": 19570 }, { "entropy": 0.7870238736271858, "epoch": 5.931606453078846, "grad_norm": 0.16366255283355713, "learning_rate": 7.077710174825623e-05, "loss": 1.19, "mean_token_accuracy": 0.7699470698833466, "num_tokens": 1322645930.0, "step": 19580 }, { "entropy": 0.7962046086788177, "epoch": 5.934636067560403, "grad_norm": 0.17779672145843506, "learning_rate": 7.07467232681944e-05, "loss": 1.2058, "mean_token_accuracy": 0.7600207984447479, "num_tokens": 1323287817.0, "step": 19590 }, { "entropy": 0.7926531061530113, "epoch": 5.93766568204196, "grad_norm": 0.15745460987091064, "learning_rate": 7.071633553405614e-05, "loss": 1.1926, "mean_token_accuracy": 0.7657125771045685, "num_tokens": 1323957898.0, "step": 19600 }, { "entropy": 0.7850239261984825, "epoch": 5.940695296523518, "grad_norm": 0.15340839326381683, "learning_rate": 7.06859385593959e-05, "loss": 1.1834, "mean_token_accuracy": 0.771136499941349, "num_tokens": 1324638511.0, "step": 19610 }, { "entropy": 0.7995823636651039, "epoch": 5.9437249110050745, "grad_norm": 0.14871494472026825, "learning_rate": 7.065553235777222e-05, "loss": 1.2005, "mean_token_accuracy": 0.7643187955021858, "num_tokens": 1325312190.0, "step": 19620 }, { "entropy": 0.7911097630858421, "epoch": 5.946754525486632, "grad_norm": 0.15272296965122223, "learning_rate": 7.062511694274783e-05, "loss": 1.1918, "mean_token_accuracy": 0.7669488146901131, "num_tokens": 1325992657.0, "step": 19630 }, { "entropy": 0.7895109668374062, "epoch": 5.949784139968189, "grad_norm": 0.16099727153778076, "learning_rate": 7.05946923278895e-05, "loss": 1.1893, "mean_token_accuracy": 0.7747374445199966, "num_tokens": 1326677656.0, "step": 19640 }, { "entropy": 0.7924715891480446, "epoch": 5.952813754449746, "grad_norm": 0.16094346344470978, "learning_rate": 7.056425852676815e-05, "loss": 1.1934, "mean_token_accuracy": 0.7675824925303459, "num_tokens": 1327354617.0, "step": 19650 }, { "entropy": 0.7775761127471924, "epoch": 5.955843368931303, "grad_norm": 0.15954801440238953, "learning_rate": 7.053381555295875e-05, "loss": 1.1722, "mean_token_accuracy": 0.777365431189537, "num_tokens": 1328049564.0, "step": 19660 }, { "entropy": 0.7921777665615082, "epoch": 5.958872983412861, "grad_norm": 0.1612180471420288, "learning_rate": 7.050336342004041e-05, "loss": 1.1975, "mean_token_accuracy": 0.7655911773443222, "num_tokens": 1328712963.0, "step": 19670 }, { "entropy": 0.8030157566070557, "epoch": 5.961902597894418, "grad_norm": 0.1663007289171219, "learning_rate": 7.047290214159632e-05, "loss": 1.2032, "mean_token_accuracy": 0.7662871927022934, "num_tokens": 1329379221.0, "step": 19680 }, { "entropy": 0.784817336499691, "epoch": 5.964932212375976, "grad_norm": 0.15503285825252533, "learning_rate": 7.04424317312137e-05, "loss": 1.1892, "mean_token_accuracy": 0.7714136093854904, "num_tokens": 1330057020.0, "step": 19690 }, { "entropy": 0.7827223688364029, "epoch": 5.9679618268575325, "grad_norm": 0.15545709431171417, "learning_rate": 7.04119522024839e-05, "loss": 1.1907, "mean_token_accuracy": 0.7702628016471863, "num_tokens": 1330722574.0, "step": 19700 }, { "entropy": 0.7823386698961258, "epoch": 5.970991441339089, "grad_norm": 0.154511496424675, "learning_rate": 7.038146356900229e-05, "loss": 1.1786, "mean_token_accuracy": 0.7735481590032578, "num_tokens": 1331397459.0, "step": 19710 }, { "entropy": 0.7853929430246354, "epoch": 5.974021055820646, "grad_norm": 0.16265028715133667, "learning_rate": 7.035096584436835e-05, "loss": 1.1951, "mean_token_accuracy": 0.7678526639938354, "num_tokens": 1332060462.0, "step": 19720 }, { "entropy": 0.7929315254092216, "epoch": 5.977050670302204, "grad_norm": 0.15164761245250702, "learning_rate": 7.032045904218559e-05, "loss": 1.1865, "mean_token_accuracy": 0.7695974588394165, "num_tokens": 1332737535.0, "step": 19730 }, { "entropy": 0.7914792761206627, "epoch": 5.980080284783761, "grad_norm": 0.1563020944595337, "learning_rate": 7.028994317606156e-05, "loss": 1.195, "mean_token_accuracy": 0.7703168600797653, "num_tokens": 1333424842.0, "step": 19740 }, { "entropy": 0.7891993552446366, "epoch": 5.983109899265319, "grad_norm": 0.15779948234558105, "learning_rate": 7.025941825960786e-05, "loss": 1.1873, "mean_token_accuracy": 0.7709450766444206, "num_tokens": 1334100599.0, "step": 19750 }, { "entropy": 0.7896355286240577, "epoch": 5.986139513746876, "grad_norm": 0.1670207530260086, "learning_rate": 7.022888430644015e-05, "loss": 1.1927, "mean_token_accuracy": 0.765450245141983, "num_tokens": 1334753309.0, "step": 19760 }, { "entropy": 0.7893132537603378, "epoch": 5.989169128228433, "grad_norm": 0.14903050661087036, "learning_rate": 7.019834133017806e-05, "loss": 1.1859, "mean_token_accuracy": 0.7656871825456619, "num_tokens": 1335410874.0, "step": 19770 }, { "entropy": 0.7974526420235634, "epoch": 5.9921987427099905, "grad_norm": 0.1609318107366562, "learning_rate": 7.016778934444534e-05, "loss": 1.1953, "mean_token_accuracy": 0.7665823966264724, "num_tokens": 1336087207.0, "step": 19780 }, { "entropy": 0.781538225710392, "epoch": 5.9952283571915475, "grad_norm": 0.1576438695192337, "learning_rate": 7.013722836286964e-05, "loss": 1.18, "mean_token_accuracy": 0.7733356311917305, "num_tokens": 1336780452.0, "step": 19790 }, { "entropy": 0.7825202971696854, "epoch": 5.998257971673104, "grad_norm": 0.16125264763832092, "learning_rate": 7.010665839908275e-05, "loss": 1.1815, "mean_token_accuracy": 0.771214810013771, "num_tokens": 1337455601.0, "step": 19800 }, { "entropy": 0.7791619728773068, "epoch": 6.001211845792623, "grad_norm": 0.15400418639183044, "learning_rate": 7.007607946672037e-05, "loss": 1.1656, "mean_token_accuracy": 0.7763047447571387, "num_tokens": 1338128078.0, "step": 19810 }, { "entropy": 0.7685077264904976, "epoch": 6.00424146027418, "grad_norm": 0.1843770295381546, "learning_rate": 7.004549157942224e-05, "loss": 1.1758, "mean_token_accuracy": 0.7730411633849144, "num_tokens": 1338802408.0, "step": 19820 }, { "entropy": 0.764088423550129, "epoch": 6.007271074755737, "grad_norm": 0.1830776333808899, "learning_rate": 7.001489475083208e-05, "loss": 1.1551, "mean_token_accuracy": 0.7786645635962486, "num_tokens": 1339493060.0, "step": 19830 }, { "entropy": 0.7704333201050758, "epoch": 6.010300689237295, "grad_norm": 0.1900692731142044, "learning_rate": 6.998428899459761e-05, "loss": 1.1623, "mean_token_accuracy": 0.771507452428341, "num_tokens": 1340164718.0, "step": 19840 }, { "entropy": 0.7672426775097847, "epoch": 6.013330303718852, "grad_norm": 0.17119291424751282, "learning_rate": 6.995367432437054e-05, "loss": 1.1562, "mean_token_accuracy": 0.7723920196294785, "num_tokens": 1340836623.0, "step": 19850 }, { "entropy": 0.7724898129701614, "epoch": 6.016359918200409, "grad_norm": 0.1718260645866394, "learning_rate": 6.992305075380654e-05, "loss": 1.1679, "mean_token_accuracy": 0.7711360037326813, "num_tokens": 1341513959.0, "step": 19860 }, { "entropy": 0.7720463991165161, "epoch": 6.019389532681966, "grad_norm": 0.1606324315071106, "learning_rate": 6.989241829656523e-05, "loss": 1.1679, "mean_token_accuracy": 0.7754394978284835, "num_tokens": 1342201281.0, "step": 19870 }, { "entropy": 0.7681100949645042, "epoch": 6.022419147163523, "grad_norm": 0.1752772033214569, "learning_rate": 6.986177696631026e-05, "loss": 1.1645, "mean_token_accuracy": 0.7750775516033173, "num_tokens": 1342876140.0, "step": 19880 }, { "entropy": 0.7687909170985222, "epoch": 6.02544876164508, "grad_norm": 0.19374622404575348, "learning_rate": 6.983112677670917e-05, "loss": 1.1653, "mean_token_accuracy": 0.7729068502783776, "num_tokens": 1343553676.0, "step": 19890 }, { "entropy": 0.7666820168495179, "epoch": 6.028478376126638, "grad_norm": 0.17111173272132874, "learning_rate": 6.980046774143348e-05, "loss": 1.1635, "mean_token_accuracy": 0.7752456784248352, "num_tokens": 1344228850.0, "step": 19900 }, { "entropy": 0.767454381287098, "epoch": 6.031507990608195, "grad_norm": 0.17312178015708923, "learning_rate": 6.976979987415865e-05, "loss": 1.1711, "mean_token_accuracy": 0.7684289693832398, "num_tokens": 1344890022.0, "step": 19910 }, { "entropy": 0.768279604613781, "epoch": 6.034537605089752, "grad_norm": 0.18300095200538635, "learning_rate": 6.973912318856409e-05, "loss": 1.1626, "mean_token_accuracy": 0.7727115780115128, "num_tokens": 1345562713.0, "step": 19920 }, { "entropy": 0.7703651890158654, "epoch": 6.03756721957131, "grad_norm": 0.181358203291893, "learning_rate": 6.970843769833313e-05, "loss": 1.1622, "mean_token_accuracy": 0.7731382727622986, "num_tokens": 1346241555.0, "step": 19930 }, { "entropy": 0.7753687739372254, "epoch": 6.040596834052867, "grad_norm": 0.17933306097984314, "learning_rate": 6.967774341715305e-05, "loss": 1.1737, "mean_token_accuracy": 0.7695334941148758, "num_tokens": 1346918603.0, "step": 19940 }, { "entropy": 0.763907541334629, "epoch": 6.043626448534424, "grad_norm": 0.17744694650173187, "learning_rate": 6.964704035871503e-05, "loss": 1.1644, "mean_token_accuracy": 0.7717218473553658, "num_tokens": 1347587724.0, "step": 19950 }, { "entropy": 0.7658626720309257, "epoch": 6.046656063015981, "grad_norm": 0.18007683753967285, "learning_rate": 6.961632853671414e-05, "loss": 1.1621, "mean_token_accuracy": 0.7754075735807419, "num_tokens": 1348267404.0, "step": 19960 }, { "entropy": 0.7742058008909225, "epoch": 6.049685677497538, "grad_norm": 0.1880507916212082, "learning_rate": 6.958560796484944e-05, "loss": 1.1697, "mean_token_accuracy": 0.7779480457305908, "num_tokens": 1348959197.0, "step": 19970 }, { "entropy": 0.778060233592987, "epoch": 6.052715291979096, "grad_norm": 0.18712382018566132, "learning_rate": 6.955487865682379e-05, "loss": 1.1759, "mean_token_accuracy": 0.7741430401802063, "num_tokens": 1349639599.0, "step": 19980 }, { "entropy": 0.7732942149043083, "epoch": 6.055744906460653, "grad_norm": 0.18797504901885986, "learning_rate": 6.952414062634403e-05, "loss": 1.1719, "mean_token_accuracy": 0.7757289350032807, "num_tokens": 1350327928.0, "step": 19990 }, { "entropy": 0.7683482483029366, "epoch": 6.05877452094221, "grad_norm": 0.18267159163951874, "learning_rate": 6.949339388712086e-05, "loss": 1.1685, "mean_token_accuracy": 0.7765683248639107, "num_tokens": 1351015808.0, "step": 20000 }, { "entropy": 0.7739752158522606, "epoch": 6.061804135423768, "grad_norm": 0.18127068877220154, "learning_rate": 6.946263845286884e-05, "loss": 1.1667, "mean_token_accuracy": 0.774583350121975, "num_tokens": 1351703087.0, "step": 20010 }, { "entropy": 0.7762703031301499, "epoch": 6.064833749905325, "grad_norm": 0.2021704465150833, "learning_rate": 6.943187433730647e-05, "loss": 1.1623, "mean_token_accuracy": 0.7790561065077781, "num_tokens": 1352405058.0, "step": 20020 }, { "entropy": 0.7643448635935783, "epoch": 6.067863364386882, "grad_norm": 0.1878141462802887, "learning_rate": 6.940110155415606e-05, "loss": 1.1676, "mean_token_accuracy": 0.7734850108623504, "num_tokens": 1353074092.0, "step": 20030 }, { "entropy": 0.7669717848300934, "epoch": 6.070892978868439, "grad_norm": 0.1895095407962799, "learning_rate": 6.937032011714379e-05, "loss": 1.166, "mean_token_accuracy": 0.7723661869764328, "num_tokens": 1353752868.0, "step": 20040 }, { "entropy": 0.756782203912735, "epoch": 6.073922593349996, "grad_norm": 0.1780415028333664, "learning_rate": 6.933953003999977e-05, "loss": 1.1609, "mean_token_accuracy": 0.7720997422933579, "num_tokens": 1354412022.0, "step": 20050 }, { "entropy": 0.7747316166758538, "epoch": 6.076952207831553, "grad_norm": 0.17755159735679626, "learning_rate": 6.930873133645787e-05, "loss": 1.1664, "mean_token_accuracy": 0.7734985128045082, "num_tokens": 1355089984.0, "step": 20060 }, { "entropy": 0.7698041707277298, "epoch": 6.079981822313111, "grad_norm": 0.18307293951511383, "learning_rate": 6.927792402025588e-05, "loss": 1.1701, "mean_token_accuracy": 0.768767488002777, "num_tokens": 1355750467.0, "step": 20070 }, { "entropy": 0.7640743359923363, "epoch": 6.083011436794668, "grad_norm": 0.18154790997505188, "learning_rate": 6.924710810513542e-05, "loss": 1.1586, "mean_token_accuracy": 0.7760217905044555, "num_tokens": 1356431932.0, "step": 20080 }, { "entropy": 0.7717065319418908, "epoch": 6.086041051276225, "grad_norm": 0.16322270035743713, "learning_rate": 6.921628360484186e-05, "loss": 1.1709, "mean_token_accuracy": 0.769880224764347, "num_tokens": 1357108067.0, "step": 20090 }, { "entropy": 0.7686224833130837, "epoch": 6.089070665757783, "grad_norm": 0.18724380433559418, "learning_rate": 6.918545053312454e-05, "loss": 1.1674, "mean_token_accuracy": 0.7708944946527481, "num_tokens": 1357783406.0, "step": 20100 }, { "entropy": 0.7616812288761139, "epoch": 6.09210028023934, "grad_norm": 0.18448813259601593, "learning_rate": 6.915460890373652e-05, "loss": 1.168, "mean_token_accuracy": 0.7714104011654854, "num_tokens": 1358442411.0, "step": 20110 }, { "entropy": 0.7682848289608956, "epoch": 6.0951298947208965, "grad_norm": 0.1796640306711197, "learning_rate": 6.912375873043474e-05, "loss": 1.1689, "mean_token_accuracy": 0.7742606103420258, "num_tokens": 1359125270.0, "step": 20120 }, { "entropy": 0.7673468515276909, "epoch": 6.098159509202454, "grad_norm": 0.17251326143741608, "learning_rate": 6.909290002697987e-05, "loss": 1.1717, "mean_token_accuracy": 0.7708658576011658, "num_tokens": 1359796282.0, "step": 20130 }, { "entropy": 0.7758640006184578, "epoch": 6.101189123684011, "grad_norm": 0.18424230813980103, "learning_rate": 6.906203280713647e-05, "loss": 1.1686, "mean_token_accuracy": 0.7709204509854317, "num_tokens": 1360462142.0, "step": 20140 }, { "entropy": 0.7815222010016442, "epoch": 6.104218738165568, "grad_norm": 0.17566242814064026, "learning_rate": 6.903115708467285e-05, "loss": 1.1779, "mean_token_accuracy": 0.7686811447143554, "num_tokens": 1361144650.0, "step": 20150 }, { "entropy": 0.7736230954527855, "epoch": 6.107248352647126, "grad_norm": 0.18372832238674164, "learning_rate": 6.900027287336115e-05, "loss": 1.1684, "mean_token_accuracy": 0.7732756614685059, "num_tokens": 1361815906.0, "step": 20160 }, { "entropy": 0.7611715704202652, "epoch": 6.110277967128683, "grad_norm": 0.18187449872493744, "learning_rate": 6.896938018697723e-05, "loss": 1.1686, "mean_token_accuracy": 0.7724242389202118, "num_tokens": 1362480459.0, "step": 20170 }, { "entropy": 0.7746902257204056, "epoch": 6.11330758161024, "grad_norm": 0.19529913365840912, "learning_rate": 6.893847903930082e-05, "loss": 1.1659, "mean_token_accuracy": 0.7734479159116745, "num_tokens": 1363166426.0, "step": 20180 }, { "entropy": 0.7706331163644791, "epoch": 6.116337196091798, "grad_norm": 0.18860577046871185, "learning_rate": 6.890756944411537e-05, "loss": 1.1766, "mean_token_accuracy": 0.7709536388516426, "num_tokens": 1363825369.0, "step": 20190 }, { "entropy": 0.7658568829298019, "epoch": 6.1193668105733545, "grad_norm": 0.178918719291687, "learning_rate": 6.887665141520807e-05, "loss": 1.169, "mean_token_accuracy": 0.7709693178534508, "num_tokens": 1364492287.0, "step": 20200 }, { "entropy": 0.7668952539563179, "epoch": 6.122396425054911, "grad_norm": 0.18230096995830536, "learning_rate": 6.884572496636993e-05, "loss": 1.1635, "mean_token_accuracy": 0.772887147963047, "num_tokens": 1365157281.0, "step": 20210 }, { "entropy": 0.7698351189494133, "epoch": 6.125426039536469, "grad_norm": 0.18724210560321808, "learning_rate": 6.881479011139571e-05, "loss": 1.1717, "mean_token_accuracy": 0.7695181295275688, "num_tokens": 1365808415.0, "step": 20220 }, { "entropy": 0.7700842469930649, "epoch": 6.128455654018026, "grad_norm": 0.17615756392478943, "learning_rate": 6.878384686408387e-05, "loss": 1.1652, "mean_token_accuracy": 0.7756535485386848, "num_tokens": 1366489041.0, "step": 20230 }, { "entropy": 0.7667704582214355, "epoch": 6.131485268499583, "grad_norm": 0.18842682242393494, "learning_rate": 6.875289523823669e-05, "loss": 1.1744, "mean_token_accuracy": 0.7754338160157204, "num_tokens": 1367160173.0, "step": 20240 }, { "entropy": 0.7679364934563637, "epoch": 6.134514882981141, "grad_norm": 0.1914799064397812, "learning_rate": 6.87219352476601e-05, "loss": 1.1636, "mean_token_accuracy": 0.7715009048581123, "num_tokens": 1367823763.0, "step": 20250 }, { "entropy": 0.7675938904285431, "epoch": 6.137544497462698, "grad_norm": 0.18526522815227509, "learning_rate": 6.869096690616383e-05, "loss": 1.1677, "mean_token_accuracy": 0.770045630633831, "num_tokens": 1368483957.0, "step": 20260 }, { "entropy": 0.7649978309869766, "epoch": 6.140574111944255, "grad_norm": 0.17641954123973846, "learning_rate": 6.865999022756128e-05, "loss": 1.1583, "mean_token_accuracy": 0.7767908945679665, "num_tokens": 1369165404.0, "step": 20270 }, { "entropy": 0.7735571846365928, "epoch": 6.1436037264258125, "grad_norm": 0.18176063895225525, "learning_rate": 6.862900522566962e-05, "loss": 1.1677, "mean_token_accuracy": 0.7714340478181839, "num_tokens": 1369831429.0, "step": 20280 }, { "entropy": 0.7654300466179847, "epoch": 6.1466333409073695, "grad_norm": 0.17600718140602112, "learning_rate": 6.859801191430976e-05, "loss": 1.167, "mean_token_accuracy": 0.7764873564243316, "num_tokens": 1370511767.0, "step": 20290 }, { "entropy": 0.7687219202518463, "epoch": 6.149662955388926, "grad_norm": 0.19362908601760864, "learning_rate": 6.856701030730617e-05, "loss": 1.1697, "mean_token_accuracy": 0.7721666499972344, "num_tokens": 1371180860.0, "step": 20300 }, { "entropy": 0.7682798579335213, "epoch": 6.152692569870484, "grad_norm": 0.18298585712909698, "learning_rate": 6.853600041848718e-05, "loss": 1.1635, "mean_token_accuracy": 0.7707045659422874, "num_tokens": 1371841453.0, "step": 20310 }, { "entropy": 0.7731047704815864, "epoch": 6.155722184352041, "grad_norm": 0.17189756035804749, "learning_rate": 6.850498226168473e-05, "loss": 1.1631, "mean_token_accuracy": 0.7727273076772689, "num_tokens": 1372517132.0, "step": 20320 }, { "entropy": 0.7688058093190193, "epoch": 6.158751798833598, "grad_norm": 0.17937079071998596, "learning_rate": 6.847395585073448e-05, "loss": 1.1647, "mean_token_accuracy": 0.7755904942750931, "num_tokens": 1373200818.0, "step": 20330 }, { "entropy": 0.766862353682518, "epoch": 6.161781413315156, "grad_norm": 0.18698036670684814, "learning_rate": 6.844292119947576e-05, "loss": 1.1649, "mean_token_accuracy": 0.7730498358607292, "num_tokens": 1373870605.0, "step": 20340 }, { "entropy": 0.7802311062812806, "epoch": 6.164811027796713, "grad_norm": 0.1781839281320572, "learning_rate": 6.841187832175157e-05, "loss": 1.1644, "mean_token_accuracy": 0.7746206551790238, "num_tokens": 1374569387.0, "step": 20350 }, { "entropy": 0.7692409440875053, "epoch": 6.16784064227827, "grad_norm": 0.19574400782585144, "learning_rate": 6.83808272314086e-05, "loss": 1.1728, "mean_token_accuracy": 0.7712500303983688, "num_tokens": 1375233750.0, "step": 20360 }, { "entropy": 0.7716340467333793, "epoch": 6.1708702567598275, "grad_norm": 0.1821366846561432, "learning_rate": 6.834976794229717e-05, "loss": 1.1673, "mean_token_accuracy": 0.7683703422546386, "num_tokens": 1375887021.0, "step": 20370 }, { "entropy": 0.7655805602669716, "epoch": 6.173899871241384, "grad_norm": 0.1861383318901062, "learning_rate": 6.83187004682713e-05, "loss": 1.157, "mean_token_accuracy": 0.773718175292015, "num_tokens": 1376564944.0, "step": 20380 }, { "entropy": 0.7723828539252281, "epoch": 6.176929485722942, "grad_norm": 0.18086256086826324, "learning_rate": 6.828762482318861e-05, "loss": 1.1684, "mean_token_accuracy": 0.7714751392602921, "num_tokens": 1377250732.0, "step": 20390 }, { "entropy": 0.777621379494667, "epoch": 6.179959100204499, "grad_norm": 0.19169875979423523, "learning_rate": 6.825654102091041e-05, "loss": 1.1687, "mean_token_accuracy": 0.769960243999958, "num_tokens": 1377927823.0, "step": 20400 }, { "entropy": 0.7638231918215752, "epoch": 6.182988714686056, "grad_norm": 0.18605810403823853, "learning_rate": 6.822544907530164e-05, "loss": 1.1667, "mean_token_accuracy": 0.7757900342345238, "num_tokens": 1378605440.0, "step": 20410 }, { "entropy": 0.7629158541560173, "epoch": 6.186018329167614, "grad_norm": 0.19657094776630402, "learning_rate": 6.819434900023083e-05, "loss": 1.1624, "mean_token_accuracy": 0.776650607585907, "num_tokens": 1379286782.0, "step": 20420 }, { "entropy": 0.7685953602194786, "epoch": 6.189047943649171, "grad_norm": 0.18294546008110046, "learning_rate": 6.816324080957022e-05, "loss": 1.1691, "mean_token_accuracy": 0.7680969446897506, "num_tokens": 1379940653.0, "step": 20430 }, { "entropy": 0.7718799218535424, "epoch": 6.192077558130728, "grad_norm": 0.17661738395690918, "learning_rate": 6.813212451719558e-05, "loss": 1.1652, "mean_token_accuracy": 0.7731961011886597, "num_tokens": 1380615271.0, "step": 20440 }, { "entropy": 0.7688711792230606, "epoch": 6.1951071726122855, "grad_norm": 0.17443495988845825, "learning_rate": 6.810100013698632e-05, "loss": 1.1673, "mean_token_accuracy": 0.7736601695418358, "num_tokens": 1381291690.0, "step": 20450 }, { "entropy": 0.7773081868886947, "epoch": 6.198136787093842, "grad_norm": 0.18575076758861542, "learning_rate": 6.806986768282553e-05, "loss": 1.1596, "mean_token_accuracy": 0.7757630035281181, "num_tokens": 1381987252.0, "step": 20460 }, { "entropy": 0.7711373716592789, "epoch": 6.201166401575399, "grad_norm": 0.18245543539524078, "learning_rate": 6.803872716859979e-05, "loss": 1.1621, "mean_token_accuracy": 0.7761328637599945, "num_tokens": 1382683321.0, "step": 20470 }, { "entropy": 0.7689271911978721, "epoch": 6.204196016056957, "grad_norm": 0.18027953803539276, "learning_rate": 6.800757860819933e-05, "loss": 1.1639, "mean_token_accuracy": 0.7739423215389252, "num_tokens": 1383357143.0, "step": 20480 }, { "entropy": 0.7688937187194824, "epoch": 6.207225630538514, "grad_norm": 0.17201583087444305, "learning_rate": 6.797642201551799e-05, "loss": 1.171, "mean_token_accuracy": 0.7725284457206726, "num_tokens": 1384029840.0, "step": 20490 }, { "entropy": 0.7655343785881996, "epoch": 6.210255245020071, "grad_norm": 0.17956753075122833, "learning_rate": 6.794525740445317e-05, "loss": 1.1594, "mean_token_accuracy": 0.7772940382361412, "num_tokens": 1384709893.0, "step": 20500 }, { "entropy": 0.7810137525200844, "epoch": 6.213284859501629, "grad_norm": 0.18092070519924164, "learning_rate": 6.791408478890581e-05, "loss": 1.1818, "mean_token_accuracy": 0.7734193980693818, "num_tokens": 1385390270.0, "step": 20510 }, { "entropy": 0.7619176179170608, "epoch": 6.216314473983186, "grad_norm": 0.18442845344543457, "learning_rate": 6.788290418278049e-05, "loss": 1.156, "mean_token_accuracy": 0.7772478967905044, "num_tokens": 1386072852.0, "step": 20520 }, { "entropy": 0.7646384924650192, "epoch": 6.219344088464743, "grad_norm": 0.1901431381702423, "learning_rate": 6.785171559998531e-05, "loss": 1.1661, "mean_token_accuracy": 0.77126996666193, "num_tokens": 1386738809.0, "step": 20530 }, { "entropy": 0.7642429247498512, "epoch": 6.2223737029463, "grad_norm": 0.1791820526123047, "learning_rate": 6.782051905443193e-05, "loss": 1.1675, "mean_token_accuracy": 0.7767746433615684, "num_tokens": 1387419720.0, "step": 20540 }, { "entropy": 0.766360518336296, "epoch": 6.225403317427857, "grad_norm": 0.19383500516414642, "learning_rate": 6.778931456003558e-05, "loss": 1.1683, "mean_token_accuracy": 0.7729840204119682, "num_tokens": 1388085492.0, "step": 20550 }, { "entropy": 0.7726920738816261, "epoch": 6.228432931909414, "grad_norm": 0.18173034489154816, "learning_rate": 6.775810213071503e-05, "loss": 1.1706, "mean_token_accuracy": 0.7684736415743828, "num_tokens": 1388753867.0, "step": 20560 }, { "entropy": 0.7729493036866188, "epoch": 6.231462546390972, "grad_norm": 0.18822112679481506, "learning_rate": 6.772688178039257e-05, "loss": 1.1659, "mean_token_accuracy": 0.7745878711342812, "num_tokens": 1389437894.0, "step": 20570 }, { "entropy": 0.7687354937195778, "epoch": 6.234492160872529, "grad_norm": 0.1805078536272049, "learning_rate": 6.769565352299404e-05, "loss": 1.1648, "mean_token_accuracy": 0.7734712541103363, "num_tokens": 1390119855.0, "step": 20580 }, { "entropy": 0.7803248658776283, "epoch": 6.237521775354086, "grad_norm": 0.1833615005016327, "learning_rate": 6.766441737244881e-05, "loss": 1.1665, "mean_token_accuracy": 0.7708469524979591, "num_tokens": 1390794339.0, "step": 20590 }, { "entropy": 0.7634388208389282, "epoch": 6.240551389835644, "grad_norm": 0.18130077421665192, "learning_rate": 6.763317334268979e-05, "loss": 1.164, "mean_token_accuracy": 0.7763359144330024, "num_tokens": 1391467689.0, "step": 20600 }, { "entropy": 0.7681302428245544, "epoch": 6.243581004317201, "grad_norm": 0.1837979257106781, "learning_rate": 6.760192144765333e-05, "loss": 1.1684, "mean_token_accuracy": 0.7731573298573494, "num_tokens": 1392138378.0, "step": 20610 }, { "entropy": 0.7732110306620598, "epoch": 6.246610618798758, "grad_norm": 0.18030866980552673, "learning_rate": 6.757066170127939e-05, "loss": 1.1698, "mean_token_accuracy": 0.7718074098229408, "num_tokens": 1392807259.0, "step": 20620 }, { "entropy": 0.7699881076812745, "epoch": 6.249640233280315, "grad_norm": 0.1618461310863495, "learning_rate": 6.753939411751135e-05, "loss": 1.1691, "mean_token_accuracy": 0.772416403889656, "num_tokens": 1393470765.0, "step": 20630 }, { "entropy": 0.7750715002417564, "epoch": 6.252669847761872, "grad_norm": 0.18438762426376343, "learning_rate": 6.750811871029611e-05, "loss": 1.1698, "mean_token_accuracy": 0.7706983745098114, "num_tokens": 1394148049.0, "step": 20640 }, { "entropy": 0.7704644456505776, "epoch": 6.255699462243429, "grad_norm": 0.1812216341495514, "learning_rate": 6.74768354935841e-05, "loss": 1.1674, "mean_token_accuracy": 0.7730281010270119, "num_tokens": 1394821669.0, "step": 20650 }, { "entropy": 0.7773768618702889, "epoch": 6.258729076724987, "grad_norm": 0.1881628930568695, "learning_rate": 6.744554448132915e-05, "loss": 1.1767, "mean_token_accuracy": 0.7692205518484115, "num_tokens": 1395489466.0, "step": 20660 }, { "entropy": 0.7700340136885643, "epoch": 6.261758691206544, "grad_norm": 0.18863986432552338, "learning_rate": 6.741424568748867e-05, "loss": 1.1694, "mean_token_accuracy": 0.7716132119297981, "num_tokens": 1396150850.0, "step": 20670 }, { "entropy": 0.7674183547496796, "epoch": 6.264788305688101, "grad_norm": 0.17054618895053864, "learning_rate": 6.738293912602343e-05, "loss": 1.1559, "mean_token_accuracy": 0.7718262419104576, "num_tokens": 1396833321.0, "step": 20680 }, { "entropy": 0.7717158362269402, "epoch": 6.267817920169659, "grad_norm": 0.17838239669799805, "learning_rate": 6.735162481089779e-05, "loss": 1.1586, "mean_token_accuracy": 0.7769655585289001, "num_tokens": 1397526053.0, "step": 20690 }, { "entropy": 0.7677412182092667, "epoch": 6.270847534651216, "grad_norm": 0.1865473985671997, "learning_rate": 6.732030275607946e-05, "loss": 1.1654, "mean_token_accuracy": 0.774252912402153, "num_tokens": 1398206362.0, "step": 20700 }, { "entropy": 0.7620617344975471, "epoch": 6.2738771491327725, "grad_norm": 0.1808382123708725, "learning_rate": 6.728897297553964e-05, "loss": 1.1644, "mean_token_accuracy": 0.7775876253843308, "num_tokens": 1398882277.0, "step": 20710 }, { "entropy": 0.7642869144678116, "epoch": 6.27690676361433, "grad_norm": 0.17975032329559326, "learning_rate": 6.725763548325298e-05, "loss": 1.1677, "mean_token_accuracy": 0.7721994742751122, "num_tokens": 1399549887.0, "step": 20720 }, { "entropy": 0.7756190747022629, "epoch": 6.279936378095887, "grad_norm": 0.192529559135437, "learning_rate": 6.72262902931976e-05, "loss": 1.1738, "mean_token_accuracy": 0.7731177315115929, "num_tokens": 1400227892.0, "step": 20730 }, { "entropy": 0.7728633806109428, "epoch": 6.282965992577444, "grad_norm": 0.18479959666728973, "learning_rate": 6.719493741935498e-05, "loss": 1.1683, "mean_token_accuracy": 0.7704318210482597, "num_tokens": 1400897290.0, "step": 20740 }, { "entropy": 0.7752651602029801, "epoch": 6.285995607059002, "grad_norm": 0.18089070916175842, "learning_rate": 6.71635768757101e-05, "loss": 1.1775, "mean_token_accuracy": 0.7714816778898239, "num_tokens": 1401565432.0, "step": 20750 }, { "entropy": 0.7818360552191734, "epoch": 6.289025221540559, "grad_norm": 0.18499885499477386, "learning_rate": 6.713220867625133e-05, "loss": 1.1665, "mean_token_accuracy": 0.7688517034053802, "num_tokens": 1402235614.0, "step": 20760 }, { "entropy": 0.7780620649456977, "epoch": 6.292054836022116, "grad_norm": 0.17752474546432495, "learning_rate": 6.710083283497045e-05, "loss": 1.1742, "mean_token_accuracy": 0.7699872747063636, "num_tokens": 1402905570.0, "step": 20770 }, { "entropy": 0.7712685331702233, "epoch": 6.295084450503674, "grad_norm": 0.17574584484100342, "learning_rate": 6.706944936586264e-05, "loss": 1.1677, "mean_token_accuracy": 0.7757701098918914, "num_tokens": 1403590577.0, "step": 20780 }, { "entropy": 0.7641827404499054, "epoch": 6.2981140649852305, "grad_norm": 0.18991263210773468, "learning_rate": 6.703805828292652e-05, "loss": 1.1651, "mean_token_accuracy": 0.7717865869402886, "num_tokens": 1404257085.0, "step": 20790 }, { "entropy": 0.7810153186321258, "epoch": 6.301143679466787, "grad_norm": 0.19716088473796844, "learning_rate": 6.700665960016408e-05, "loss": 1.1733, "mean_token_accuracy": 0.7687037140130997, "num_tokens": 1404934702.0, "step": 20800 }, { "entropy": 0.7753670707345008, "epoch": 6.304173293948345, "grad_norm": 0.1863342970609665, "learning_rate": 6.69752533315807e-05, "loss": 1.1625, "mean_token_accuracy": 0.7753713056445122, "num_tokens": 1405622781.0, "step": 20810 }, { "entropy": 0.7637102693319321, "epoch": 6.307202908429902, "grad_norm": 0.1791115254163742, "learning_rate": 6.694383949118512e-05, "loss": 1.1606, "mean_token_accuracy": 0.7726149007678031, "num_tokens": 1406300935.0, "step": 20820 }, { "entropy": 0.7656391814351082, "epoch": 6.310232522911459, "grad_norm": 0.18493247032165527, "learning_rate": 6.691241809298952e-05, "loss": 1.1603, "mean_token_accuracy": 0.7783811420202256, "num_tokens": 1406980777.0, "step": 20830 }, { "entropy": 0.7651975587010383, "epoch": 6.313262137393017, "grad_norm": 0.20166723430156708, "learning_rate": 6.688098915100942e-05, "loss": 1.164, "mean_token_accuracy": 0.7746549114584923, "num_tokens": 1407651515.0, "step": 20840 }, { "entropy": 0.7801913648843766, "epoch": 6.316291751874574, "grad_norm": 0.17842784523963928, "learning_rate": 6.684955267926367e-05, "loss": 1.1705, "mean_token_accuracy": 0.7728280574083328, "num_tokens": 1408333492.0, "step": 20850 }, { "entropy": 0.7690182015299797, "epoch": 6.319321366356132, "grad_norm": 0.18181131780147552, "learning_rate": 6.681810869177452e-05, "loss": 1.17, "mean_token_accuracy": 0.7699664264917374, "num_tokens": 1408993748.0, "step": 20860 }, { "entropy": 0.775431276857853, "epoch": 6.3223509808376885, "grad_norm": 0.18599024415016174, "learning_rate": 6.678665720256759e-05, "loss": 1.172, "mean_token_accuracy": 0.7708151072263718, "num_tokens": 1409669977.0, "step": 20870 }, { "entropy": 0.7725912168622017, "epoch": 6.3253805953192455, "grad_norm": 0.17442113161087036, "learning_rate": 6.675519822567175e-05, "loss": 1.1683, "mean_token_accuracy": 0.7700779899954796, "num_tokens": 1410334412.0, "step": 20880 }, { "entropy": 0.7692646980285645, "epoch": 6.328410209800802, "grad_norm": 0.19657650589942932, "learning_rate": 6.672373177511935e-05, "loss": 1.1602, "mean_token_accuracy": 0.7722544282674789, "num_tokens": 1411019522.0, "step": 20890 }, { "entropy": 0.7827627912163735, "epoch": 6.33143982428236, "grad_norm": 0.18095193803310394, "learning_rate": 6.669225786494597e-05, "loss": 1.1814, "mean_token_accuracy": 0.7697557792067528, "num_tokens": 1411691157.0, "step": 20900 }, { "entropy": 0.7719816759228706, "epoch": 6.334469438763917, "grad_norm": 0.18132972717285156, "learning_rate": 6.666077650919054e-05, "loss": 1.1691, "mean_token_accuracy": 0.770168337225914, "num_tokens": 1412368112.0, "step": 20910 }, { "entropy": 0.7707783684134484, "epoch": 6.337499053245475, "grad_norm": 0.19714146852493286, "learning_rate": 6.662928772189532e-05, "loss": 1.1656, "mean_token_accuracy": 0.7698728039860725, "num_tokens": 1413049734.0, "step": 20920 }, { "entropy": 0.7757603093981743, "epoch": 6.340528667727032, "grad_norm": 0.17612393200397491, "learning_rate": 6.65977915171059e-05, "loss": 1.1727, "mean_token_accuracy": 0.7731116905808448, "num_tokens": 1413729512.0, "step": 20930 }, { "entropy": 0.7686492502689362, "epoch": 6.343558282208589, "grad_norm": 0.17846208810806274, "learning_rate": 6.656628790887117e-05, "loss": 1.1647, "mean_token_accuracy": 0.7792172431945801, "num_tokens": 1414427392.0, "step": 20940 }, { "entropy": 0.7680420055985451, "epoch": 6.346587896690147, "grad_norm": 0.18803076446056366, "learning_rate": 6.653477691124329e-05, "loss": 1.1667, "mean_token_accuracy": 0.7723503828048706, "num_tokens": 1415093152.0, "step": 20950 }, { "entropy": 0.7752744823694229, "epoch": 6.3496175111717035, "grad_norm": 0.1949007511138916, "learning_rate": 6.650325853827775e-05, "loss": 1.1696, "mean_token_accuracy": 0.7694061130285264, "num_tokens": 1415769889.0, "step": 20960 }, { "entropy": 0.7765705659985542, "epoch": 6.35264712565326, "grad_norm": 0.18722619116306305, "learning_rate": 6.647173280403334e-05, "loss": 1.1713, "mean_token_accuracy": 0.7727076575160027, "num_tokens": 1416450068.0, "step": 20970 }, { "entropy": 0.7655983179807663, "epoch": 6.355676740134818, "grad_norm": 0.17996463179588318, "learning_rate": 6.644019972257207e-05, "loss": 1.1561, "mean_token_accuracy": 0.7806936472654342, "num_tokens": 1417142308.0, "step": 20980 }, { "entropy": 0.7769755586981774, "epoch": 6.358706354616375, "grad_norm": 0.17622792720794678, "learning_rate": 6.640865930795933e-05, "loss": 1.1705, "mean_token_accuracy": 0.7706779524683952, "num_tokens": 1417815837.0, "step": 20990 }, { "entropy": 0.7751562669873238, "epoch": 6.361735969097932, "grad_norm": 0.18360324203968048, "learning_rate": 6.637711157426367e-05, "loss": 1.1658, "mean_token_accuracy": 0.7699324071407319, "num_tokens": 1418493989.0, "step": 21000 }, { "entropy": 0.7727232158184052, "epoch": 6.36476558357949, "grad_norm": 0.18577717244625092, "learning_rate": 6.634555653555701e-05, "loss": 1.1738, "mean_token_accuracy": 0.7678659647703171, "num_tokens": 1419152661.0, "step": 21010 }, { "entropy": 0.7622781082987785, "epoch": 6.367795198061047, "grad_norm": 0.18314194679260254, "learning_rate": 6.631399420591443e-05, "loss": 1.1643, "mean_token_accuracy": 0.7772304236888885, "num_tokens": 1419836556.0, "step": 21020 }, { "entropy": 0.7705505311489105, "epoch": 6.370824812542604, "grad_norm": 0.16682995855808258, "learning_rate": 6.628242459941429e-05, "loss": 1.1711, "mean_token_accuracy": 0.7730928897857666, "num_tokens": 1420505280.0, "step": 21030 }, { "entropy": 0.7660258039832115, "epoch": 6.3738544270241615, "grad_norm": 0.17763105034828186, "learning_rate": 6.62508477301383e-05, "loss": 1.1752, "mean_token_accuracy": 0.7703719526529312, "num_tokens": 1421168671.0, "step": 21040 }, { "entropy": 0.7788975641131402, "epoch": 6.376884041505718, "grad_norm": 0.18896135687828064, "learning_rate": 6.621926361217123e-05, "loss": 1.1765, "mean_token_accuracy": 0.7684742927551269, "num_tokens": 1421833344.0, "step": 21050 }, { "entropy": 0.7638883426785469, "epoch": 6.379913655987275, "grad_norm": 0.18676747381687164, "learning_rate": 6.618767225960124e-05, "loss": 1.1669, "mean_token_accuracy": 0.7727542161941529, "num_tokens": 1422507130.0, "step": 21060 }, { "entropy": 0.761621555685997, "epoch": 6.382943270468833, "grad_norm": 0.19346609711647034, "learning_rate": 6.615607368651964e-05, "loss": 1.1664, "mean_token_accuracy": 0.7779986679553985, "num_tokens": 1423180466.0, "step": 21070 }, { "entropy": 0.7653658419847489, "epoch": 6.38597288495039, "grad_norm": 0.1793552190065384, "learning_rate": 6.612446790702094e-05, "loss": 1.1616, "mean_token_accuracy": 0.7750341057777405, "num_tokens": 1423855718.0, "step": 21080 }, { "entropy": 0.7796183124184608, "epoch": 6.389002499431947, "grad_norm": 0.18394005298614502, "learning_rate": 6.609285493520294e-05, "loss": 1.1765, "mean_token_accuracy": 0.774328587949276, "num_tokens": 1424547336.0, "step": 21090 }, { "entropy": 0.7839424893260002, "epoch": 6.392032113913505, "grad_norm": 0.17285147309303284, "learning_rate": 6.606123478516659e-05, "loss": 1.177, "mean_token_accuracy": 0.7707555308938027, "num_tokens": 1425231517.0, "step": 21100 }, { "entropy": 0.7662839666008949, "epoch": 6.395061728395062, "grad_norm": 0.17895035445690155, "learning_rate": 6.602960747101605e-05, "loss": 1.1701, "mean_token_accuracy": 0.7697966247797012, "num_tokens": 1425893988.0, "step": 21110 }, { "entropy": 0.7741492956876754, "epoch": 6.398091342876619, "grad_norm": 0.1894502490758896, "learning_rate": 6.59979730068587e-05, "loss": 1.1728, "mean_token_accuracy": 0.7667291045188904, "num_tokens": 1426561243.0, "step": 21120 }, { "entropy": 0.765935517847538, "epoch": 6.401120957358176, "grad_norm": 0.19292519986629486, "learning_rate": 6.596633140680509e-05, "loss": 1.1682, "mean_token_accuracy": 0.7751199170947075, "num_tokens": 1427241194.0, "step": 21130 }, { "entropy": 0.776461872458458, "epoch": 6.404150571839733, "grad_norm": 0.191630020737648, "learning_rate": 6.593468268496896e-05, "loss": 1.1734, "mean_token_accuracy": 0.7709326341748237, "num_tokens": 1427908234.0, "step": 21140 }, { "entropy": 0.7621702805161477, "epoch": 6.40718018632129, "grad_norm": 0.18412964046001434, "learning_rate": 6.590302685546721e-05, "loss": 1.1583, "mean_token_accuracy": 0.7765338391065597, "num_tokens": 1428588885.0, "step": 21150 }, { "entropy": 0.782624889910221, "epoch": 6.410209800802848, "grad_norm": 0.16957661509513855, "learning_rate": 6.587136393241993e-05, "loss": 1.1744, "mean_token_accuracy": 0.7702025204896927, "num_tokens": 1429271107.0, "step": 21160 }, { "entropy": 0.7680342867970467, "epoch": 6.413239415284405, "grad_norm": 0.18677100539207458, "learning_rate": 6.583969392995037e-05, "loss": 1.1658, "mean_token_accuracy": 0.7732661232352257, "num_tokens": 1429942334.0, "step": 21170 }, { "entropy": 0.7686312094330787, "epoch": 6.416269029765962, "grad_norm": 0.17361260950565338, "learning_rate": 6.580801686218495e-05, "loss": 1.1585, "mean_token_accuracy": 0.7795374110341072, "num_tokens": 1430641178.0, "step": 21180 }, { "entropy": 0.7684334322810173, "epoch": 6.41929864424752, "grad_norm": 0.1868567019701004, "learning_rate": 6.577633274325319e-05, "loss": 1.1649, "mean_token_accuracy": 0.7755624249577522, "num_tokens": 1431329956.0, "step": 21190 }, { "entropy": 0.7772552326321602, "epoch": 6.422328258729077, "grad_norm": 0.18956871330738068, "learning_rate": 6.574464158728781e-05, "loss": 1.1731, "mean_token_accuracy": 0.7735658958554268, "num_tokens": 1432010736.0, "step": 21200 }, { "entropy": 0.7731815323233604, "epoch": 6.425357873210634, "grad_norm": 0.18785738945007324, "learning_rate": 6.571294340842467e-05, "loss": 1.17, "mean_token_accuracy": 0.772030559182167, "num_tokens": 1432696627.0, "step": 21210 }, { "entropy": 0.7707874432206154, "epoch": 6.428387487692191, "grad_norm": 0.19727292656898499, "learning_rate": 6.568123822080273e-05, "loss": 1.1673, "mean_token_accuracy": 0.7771949291229248, "num_tokens": 1433401192.0, "step": 21220 }, { "entropy": 0.7744519487023354, "epoch": 6.431417102173748, "grad_norm": 0.18756203353405, "learning_rate": 6.564952603856408e-05, "loss": 1.1636, "mean_token_accuracy": 0.7736545264720917, "num_tokens": 1434080468.0, "step": 21230 }, { "entropy": 0.7654246866703034, "epoch": 6.434446716655305, "grad_norm": 0.18440191447734833, "learning_rate": 6.561780687585392e-05, "loss": 1.162, "mean_token_accuracy": 0.7743609964847564, "num_tokens": 1434756409.0, "step": 21240 }, { "entropy": 0.7709704726934433, "epoch": 6.437476331136863, "grad_norm": 0.17374394834041595, "learning_rate": 6.558608074682064e-05, "loss": 1.1689, "mean_token_accuracy": 0.7725660175085067, "num_tokens": 1435434873.0, "step": 21250 }, { "entropy": 0.7626835972070694, "epoch": 6.44050594561842, "grad_norm": 0.18436864018440247, "learning_rate": 6.555434766561565e-05, "loss": 1.1665, "mean_token_accuracy": 0.7708254888653755, "num_tokens": 1436098544.0, "step": 21260 }, { "entropy": 0.7821949139237404, "epoch": 6.443535560099978, "grad_norm": 0.19370079040527344, "learning_rate": 6.552260764639348e-05, "loss": 1.1765, "mean_token_accuracy": 0.7679042026400567, "num_tokens": 1436766331.0, "step": 21270 }, { "entropy": 0.7772647470235825, "epoch": 6.446565174581535, "grad_norm": 0.1867731362581253, "learning_rate": 6.549086070331179e-05, "loss": 1.1747, "mean_token_accuracy": 0.7668262258172035, "num_tokens": 1437434949.0, "step": 21280 }, { "entropy": 0.765082560479641, "epoch": 6.449594789063092, "grad_norm": 0.18199868500232697, "learning_rate": 6.545910685053128e-05, "loss": 1.1681, "mean_token_accuracy": 0.7766094893217087, "num_tokens": 1438118585.0, "step": 21290 }, { "entropy": 0.7694566279649735, "epoch": 6.4526244035446485, "grad_norm": 0.18930061161518097, "learning_rate": 6.542734610221578e-05, "loss": 1.1651, "mean_token_accuracy": 0.7690699368715286, "num_tokens": 1438778345.0, "step": 21300 }, { "entropy": 0.7629023566842079, "epoch": 6.455654018026206, "grad_norm": 0.19168712198734283, "learning_rate": 6.539557847253217e-05, "loss": 1.1659, "mean_token_accuracy": 0.7722378998994828, "num_tokens": 1439443575.0, "step": 21310 }, { "entropy": 0.7745316356420517, "epoch": 6.458683632507763, "grad_norm": 0.18028612434864044, "learning_rate": 6.536380397565038e-05, "loss": 1.1763, "mean_token_accuracy": 0.7670167997479439, "num_tokens": 1440106438.0, "step": 21320 }, { "entropy": 0.7686283111572265, "epoch": 6.461713246989321, "grad_norm": 0.18088003993034363, "learning_rate": 6.533202262574343e-05, "loss": 1.1654, "mean_token_accuracy": 0.7731160148978233, "num_tokens": 1440788951.0, "step": 21330 }, { "entropy": 0.7695881143212319, "epoch": 6.464742861470878, "grad_norm": 0.1931450366973877, "learning_rate": 6.530023443698744e-05, "loss": 1.1624, "mean_token_accuracy": 0.7759535938501358, "num_tokens": 1441472780.0, "step": 21340 }, { "entropy": 0.7654684498906136, "epoch": 6.467772475952435, "grad_norm": 0.1829850971698761, "learning_rate": 6.526843942356147e-05, "loss": 1.1685, "mean_token_accuracy": 0.7688734218478203, "num_tokens": 1442131107.0, "step": 21350 }, { "entropy": 0.7659239575266839, "epoch": 6.470802090433993, "grad_norm": 0.19256149232387543, "learning_rate": 6.523663759964772e-05, "loss": 1.1669, "mean_token_accuracy": 0.7726069211959838, "num_tokens": 1442814003.0, "step": 21360 }, { "entropy": 0.774470879137516, "epoch": 6.47383170491555, "grad_norm": 0.18819394707679749, "learning_rate": 6.520482897943137e-05, "loss": 1.1654, "mean_token_accuracy": 0.7749659284949303, "num_tokens": 1443499409.0, "step": 21370 }, { "entropy": 0.7676126733422279, "epoch": 6.4768613193971065, "grad_norm": 0.18283072113990784, "learning_rate": 6.517301357710069e-05, "loss": 1.1585, "mean_token_accuracy": 0.7723554998636246, "num_tokens": 1444177570.0, "step": 21380 }, { "entropy": 0.7656297534704208, "epoch": 6.479890933878664, "grad_norm": 0.18021374940872192, "learning_rate": 6.514119140684691e-05, "loss": 1.1671, "mean_token_accuracy": 0.7718273043632508, "num_tokens": 1444847150.0, "step": 21390 }, { "entropy": 0.7661767914891243, "epoch": 6.482920548360221, "grad_norm": 0.1733720302581787, "learning_rate": 6.510936248286432e-05, "loss": 1.1592, "mean_token_accuracy": 0.7731059432029724, "num_tokens": 1445527955.0, "step": 21400 }, { "entropy": 0.763685031235218, "epoch": 6.485950162841778, "grad_norm": 0.18219871819019318, "learning_rate": 6.507752681935022e-05, "loss": 1.1588, "mean_token_accuracy": 0.7770874068140984, "num_tokens": 1446200835.0, "step": 21410 }, { "entropy": 0.771403020620346, "epoch": 6.488979777323336, "grad_norm": 0.18652617931365967, "learning_rate": 6.50456844305049e-05, "loss": 1.1791, "mean_token_accuracy": 0.7701421588659286, "num_tokens": 1446862912.0, "step": 21420 }, { "entropy": 0.7709251582622528, "epoch": 6.492009391804893, "grad_norm": 0.1999097764492035, "learning_rate": 6.501383533053166e-05, "loss": 1.1639, "mean_token_accuracy": 0.7720854237675667, "num_tokens": 1447540393.0, "step": 21430 }, { "entropy": 0.7736376330256463, "epoch": 6.49503900628645, "grad_norm": 0.18020248413085938, "learning_rate": 6.49819795336368e-05, "loss": 1.1608, "mean_token_accuracy": 0.770463714003563, "num_tokens": 1448214715.0, "step": 21440 }, { "entropy": 0.7666349977254867, "epoch": 6.498068620768008, "grad_norm": 0.19957131147384644, "learning_rate": 6.495011705402958e-05, "loss": 1.167, "mean_token_accuracy": 0.7720817238092422, "num_tokens": 1448887278.0, "step": 21450 }, { "entropy": 0.757916721701622, "epoch": 6.5010982352495645, "grad_norm": 0.23013772070407867, "learning_rate": 6.491824790592228e-05, "loss": 1.1546, "mean_token_accuracy": 0.7753512367606163, "num_tokens": 1449568819.0, "step": 21460 }, { "entropy": 0.7611304610967636, "epoch": 6.5041278497311215, "grad_norm": 0.1955873966217041, "learning_rate": 6.488637210353012e-05, "loss": 1.1546, "mean_token_accuracy": 0.7735495448112488, "num_tokens": 1450237286.0, "step": 21470 }, { "entropy": 0.7752103328704834, "epoch": 6.507157464212679, "grad_norm": 0.17470267415046692, "learning_rate": 6.485448966107133e-05, "loss": 1.1663, "mean_token_accuracy": 0.7741127356886863, "num_tokens": 1450927403.0, "step": 21480 }, { "entropy": 0.7749665439128876, "epoch": 6.510187078694236, "grad_norm": 0.18244533240795135, "learning_rate": 6.482260059276703e-05, "loss": 1.1717, "mean_token_accuracy": 0.7713807061314583, "num_tokens": 1451597746.0, "step": 21490 }, { "entropy": 0.7803113788366318, "epoch": 6.513216693175793, "grad_norm": 0.1868499368429184, "learning_rate": 6.479070491284136e-05, "loss": 1.1799, "mean_token_accuracy": 0.7682018563151359, "num_tokens": 1452264988.0, "step": 21500 }, { "entropy": 0.7720252394676208, "epoch": 6.516246307657351, "grad_norm": 0.17205385863780975, "learning_rate": 6.47588026355214e-05, "loss": 1.162, "mean_token_accuracy": 0.7716523960232735, "num_tokens": 1452941641.0, "step": 21510 }, { "entropy": 0.7706853568553924, "epoch": 6.519275922138908, "grad_norm": 0.1778043806552887, "learning_rate": 6.472689377503718e-05, "loss": 1.1717, "mean_token_accuracy": 0.7744766980409622, "num_tokens": 1453617733.0, "step": 21520 }, { "entropy": 0.7690259456634522, "epoch": 6.522305536620465, "grad_norm": 0.18089909851551056, "learning_rate": 6.469497834562159e-05, "loss": 1.1657, "mean_token_accuracy": 0.7723587095737457, "num_tokens": 1454285958.0, "step": 21530 }, { "entropy": 0.7676387712359428, "epoch": 6.525335151102023, "grad_norm": 0.18544796109199524, "learning_rate": 6.466305636151054e-05, "loss": 1.1669, "mean_token_accuracy": 0.771427059173584, "num_tokens": 1454949315.0, "step": 21540 }, { "entropy": 0.7739736899733544, "epoch": 6.5283647655835795, "grad_norm": 0.17433473467826843, "learning_rate": 6.463112783694284e-05, "loss": 1.1695, "mean_token_accuracy": 0.7705166786909103, "num_tokens": 1455617190.0, "step": 21550 }, { "entropy": 0.7755619525909424, "epoch": 6.531394380065136, "grad_norm": 0.16753840446472168, "learning_rate": 6.459919278616019e-05, "loss": 1.1691, "mean_token_accuracy": 0.7722525209188461, "num_tokens": 1456295247.0, "step": 21560 }, { "entropy": 0.7638898327946663, "epoch": 6.534423994546694, "grad_norm": 0.1864500343799591, "learning_rate": 6.456725122340724e-05, "loss": 1.1574, "mean_token_accuracy": 0.7794457793235778, "num_tokens": 1456982654.0, "step": 21570 }, { "entropy": 0.7689558789134026, "epoch": 6.537453609028251, "grad_norm": 0.18878714740276337, "learning_rate": 6.45353031629315e-05, "loss": 1.1719, "mean_token_accuracy": 0.7743179693818092, "num_tokens": 1457656508.0, "step": 21580 }, { "entropy": 0.7655672878026962, "epoch": 6.540483223509808, "grad_norm": 0.17761436104774475, "learning_rate": 6.450334861898344e-05, "loss": 1.1623, "mean_token_accuracy": 0.7732078984379769, "num_tokens": 1458336182.0, "step": 21590 }, { "entropy": 0.7692855253815651, "epoch": 6.543512837991366, "grad_norm": 0.1718793660402298, "learning_rate": 6.447138760581635e-05, "loss": 1.1692, "mean_token_accuracy": 0.7737704396247864, "num_tokens": 1459007287.0, "step": 21600 }, { "entropy": 0.7747060254216194, "epoch": 6.546542452472923, "grad_norm": 0.1755637675523758, "learning_rate": 6.443942013768645e-05, "loss": 1.171, "mean_token_accuracy": 0.7702451661229134, "num_tokens": 1459679061.0, "step": 21610 }, { "entropy": 0.7761596962809563, "epoch": 6.54957206695448, "grad_norm": 0.19001683592796326, "learning_rate": 6.440744622885284e-05, "loss": 1.1686, "mean_token_accuracy": 0.7771530330181122, "num_tokens": 1460374429.0, "step": 21620 }, { "entropy": 0.764761796593666, "epoch": 6.5526016814360375, "grad_norm": 0.1785161793231964, "learning_rate": 6.437546589357748e-05, "loss": 1.164, "mean_token_accuracy": 0.7729075953364373, "num_tokens": 1461049779.0, "step": 21630 }, { "entropy": 0.7704018160700798, "epoch": 6.555631295917594, "grad_norm": 0.19004888832569122, "learning_rate": 6.434347914612519e-05, "loss": 1.1635, "mean_token_accuracy": 0.7719914078712463, "num_tokens": 1461727645.0, "step": 21640 }, { "entropy": 0.7683401018381119, "epoch": 6.558660910399151, "grad_norm": 0.171677827835083, "learning_rate": 6.431148600076368e-05, "loss": 1.1649, "mean_token_accuracy": 0.7729918599128723, "num_tokens": 1462404520.0, "step": 21650 }, { "entropy": 0.7704039484262466, "epoch": 6.561690524880709, "grad_norm": 0.1909465491771698, "learning_rate": 6.427948647176347e-05, "loss": 1.1679, "mean_token_accuracy": 0.7740996435284615, "num_tokens": 1463080948.0, "step": 21660 }, { "entropy": 0.7697620555758476, "epoch": 6.564720139362266, "grad_norm": 0.18859635293483734, "learning_rate": 6.424748057339795e-05, "loss": 1.1644, "mean_token_accuracy": 0.7722744092345237, "num_tokens": 1463750087.0, "step": 21670 }, { "entropy": 0.7678390488028526, "epoch": 6.567749753843824, "grad_norm": 0.18070949614048004, "learning_rate": 6.421546831994338e-05, "loss": 1.1629, "mean_token_accuracy": 0.7758280858397484, "num_tokens": 1464428117.0, "step": 21680 }, { "entropy": 0.7706129759550094, "epoch": 6.570779368325381, "grad_norm": 0.18724402785301208, "learning_rate": 6.418344972567881e-05, "loss": 1.1759, "mean_token_accuracy": 0.7726528882980347, "num_tokens": 1465103570.0, "step": 21690 }, { "entropy": 0.7660466030240058, "epoch": 6.573808982806938, "grad_norm": 0.19266277551651, "learning_rate": 6.415142480488612e-05, "loss": 1.1709, "mean_token_accuracy": 0.7720398455858231, "num_tokens": 1465771173.0, "step": 21700 }, { "entropy": 0.7690909415483475, "epoch": 6.576838597288495, "grad_norm": 0.18735866248607635, "learning_rate": 6.411939357185004e-05, "loss": 1.1704, "mean_token_accuracy": 0.7696846678853035, "num_tokens": 1466440331.0, "step": 21710 }, { "entropy": 0.7741300716996193, "epoch": 6.579868211770052, "grad_norm": 0.18521477282047272, "learning_rate": 6.40873560408581e-05, "loss": 1.17, "mean_token_accuracy": 0.7761170193552971, "num_tokens": 1467129411.0, "step": 21720 }, { "entropy": 0.7730172485113144, "epoch": 6.582897826251609, "grad_norm": 0.18900567293167114, "learning_rate": 6.405531222620065e-05, "loss": 1.1762, "mean_token_accuracy": 0.7701623916625977, "num_tokens": 1467795570.0, "step": 21730 }, { "entropy": 0.7690958172082901, "epoch": 6.585927440733167, "grad_norm": 0.1802007108926773, "learning_rate": 6.402326214217082e-05, "loss": 1.161, "mean_token_accuracy": 0.7755455136299133, "num_tokens": 1468470270.0, "step": 21740 }, { "entropy": 0.7712338462471962, "epoch": 6.588957055214724, "grad_norm": 0.1835826337337494, "learning_rate": 6.399120580306456e-05, "loss": 1.1656, "mean_token_accuracy": 0.7762703418731689, "num_tokens": 1469154893.0, "step": 21750 }, { "entropy": 0.7653421729803085, "epoch": 6.591986669696281, "grad_norm": 0.18978463113307953, "learning_rate": 6.395914322318062e-05, "loss": 1.1663, "mean_token_accuracy": 0.7751046478748321, "num_tokens": 1469823552.0, "step": 21760 }, { "entropy": 0.7720669090747834, "epoch": 6.595016284177838, "grad_norm": 0.18262676894664764, "learning_rate": 6.392707441682047e-05, "loss": 1.1645, "mean_token_accuracy": 0.7753222897648812, "num_tokens": 1470519341.0, "step": 21770 }, { "entropy": 0.7722769737243652, "epoch": 6.598045898659396, "grad_norm": 0.1838468313217163, "learning_rate": 6.389499939828842e-05, "loss": 1.1625, "mean_token_accuracy": 0.7714739501476288, "num_tokens": 1471192707.0, "step": 21780 }, { "entropy": 0.7652555599808692, "epoch": 6.601075513140953, "grad_norm": 0.183171346783638, "learning_rate": 6.386291818189157e-05, "loss": 1.169, "mean_token_accuracy": 0.7731975227594375, "num_tokens": 1471864045.0, "step": 21790 }, { "entropy": 0.7602854669094086, "epoch": 6.6041051276225105, "grad_norm": 0.18502184748649597, "learning_rate": 6.383083078193969e-05, "loss": 1.1561, "mean_token_accuracy": 0.774936942756176, "num_tokens": 1472549992.0, "step": 21800 }, { "entropy": 0.7682442143559456, "epoch": 6.607134742104067, "grad_norm": 0.18116509914398193, "learning_rate": 6.37987372127454e-05, "loss": 1.1615, "mean_token_accuracy": 0.770703761279583, "num_tokens": 1473221478.0, "step": 21810 }, { "entropy": 0.7739316254854203, "epoch": 6.610164356585624, "grad_norm": 0.18188564479351044, "learning_rate": 6.376663748862404e-05, "loss": 1.1745, "mean_token_accuracy": 0.7728255540132523, "num_tokens": 1473897881.0, "step": 21820 }, { "entropy": 0.7783873111009598, "epoch": 6.613193971067181, "grad_norm": 0.1905154585838318, "learning_rate": 6.373453162389365e-05, "loss": 1.1727, "mean_token_accuracy": 0.7709947720170021, "num_tokens": 1474562263.0, "step": 21830 }, { "entropy": 0.7556312188506127, "epoch": 6.616223585548739, "grad_norm": 0.19350853562355042, "learning_rate": 6.370241963287511e-05, "loss": 1.1541, "mean_token_accuracy": 0.774929678440094, "num_tokens": 1475234089.0, "step": 21840 }, { "entropy": 0.7703248292207718, "epoch": 6.619253200030296, "grad_norm": 0.18423984944820404, "learning_rate": 6.367030152989195e-05, "loss": 1.1741, "mean_token_accuracy": 0.7735764056444168, "num_tokens": 1475911688.0, "step": 21850 }, { "entropy": 0.7765189647674561, "epoch": 6.622282814511854, "grad_norm": 0.18914060294628143, "learning_rate": 6.363817732927044e-05, "loss": 1.1727, "mean_token_accuracy": 0.7717511236667634, "num_tokens": 1476587303.0, "step": 21860 }, { "entropy": 0.768643818795681, "epoch": 6.625312428993411, "grad_norm": 0.18642932176589966, "learning_rate": 6.360604704533959e-05, "loss": 1.1701, "mean_token_accuracy": 0.7708021387457847, "num_tokens": 1477255880.0, "step": 21870 }, { "entropy": 0.772607932984829, "epoch": 6.628342043474968, "grad_norm": 0.18266133964061737, "learning_rate": 6.357391069243112e-05, "loss": 1.1767, "mean_token_accuracy": 0.7690922453999519, "num_tokens": 1477933555.0, "step": 21880 }, { "entropy": 0.7673264116048812, "epoch": 6.631371657956525, "grad_norm": 0.18531456589698792, "learning_rate": 6.354176828487943e-05, "loss": 1.1597, "mean_token_accuracy": 0.7732823312282562, "num_tokens": 1478602707.0, "step": 21890 }, { "entropy": 0.7652122467756272, "epoch": 6.634401272438082, "grad_norm": 0.17667856812477112, "learning_rate": 6.350961983702167e-05, "loss": 1.1634, "mean_token_accuracy": 0.7752939403057099, "num_tokens": 1479286804.0, "step": 21900 }, { "entropy": 0.7755970239639283, "epoch": 6.637430886919639, "grad_norm": 0.19186754524707794, "learning_rate": 6.347746536319763e-05, "loss": 1.1706, "mean_token_accuracy": 0.7728856489062309, "num_tokens": 1479958048.0, "step": 21910 }, { "entropy": 0.7646700069308281, "epoch": 6.640460501401197, "grad_norm": 0.1853509545326233, "learning_rate": 6.344530487774982e-05, "loss": 1.1619, "mean_token_accuracy": 0.7750240325927734, "num_tokens": 1480637890.0, "step": 21920 }, { "entropy": 0.7710616961121559, "epoch": 6.643490115882754, "grad_norm": 0.1920197606086731, "learning_rate": 6.341313839502345e-05, "loss": 1.1705, "mean_token_accuracy": 0.7739571809768677, "num_tokens": 1481311429.0, "step": 21930 }, { "entropy": 0.7739026889204978, "epoch": 6.646519730364311, "grad_norm": 0.1981394737958908, "learning_rate": 6.338096592936635e-05, "loss": 1.174, "mean_token_accuracy": 0.7715424120426178, "num_tokens": 1481981945.0, "step": 21940 }, { "entropy": 0.7698251903057098, "epoch": 6.649549344845869, "grad_norm": 0.18606361746788025, "learning_rate": 6.334878749512906e-05, "loss": 1.1716, "mean_token_accuracy": 0.7694026336073876, "num_tokens": 1482652948.0, "step": 21950 }, { "entropy": 0.7713384702801704, "epoch": 6.652578959327426, "grad_norm": 0.19235661625862122, "learning_rate": 6.331660310666479e-05, "loss": 1.1645, "mean_token_accuracy": 0.7726158857345581, "num_tokens": 1483326600.0, "step": 21960 }, { "entropy": 0.7762814402580261, "epoch": 6.6556085738089825, "grad_norm": 0.1893935203552246, "learning_rate": 6.328441277832934e-05, "loss": 1.1677, "mean_token_accuracy": 0.7695004731416702, "num_tokens": 1484003122.0, "step": 21970 }, { "entropy": 0.7783913135528564, "epoch": 6.65863818829054, "grad_norm": 0.18274828791618347, "learning_rate": 6.325221652448124e-05, "loss": 1.1739, "mean_token_accuracy": 0.7683811485767365, "num_tokens": 1484668823.0, "step": 21980 }, { "entropy": 0.7588885337114334, "epoch": 6.661667802772097, "grad_norm": 0.18802721798419952, "learning_rate": 6.322001435948166e-05, "loss": 1.1531, "mean_token_accuracy": 0.7772417977452278, "num_tokens": 1485344835.0, "step": 21990 }, { "entropy": 0.7634912580251694, "epoch": 6.664697417253654, "grad_norm": 0.18112128973007202, "learning_rate": 6.31878062976943e-05, "loss": 1.1654, "mean_token_accuracy": 0.773204405605793, "num_tokens": 1486008505.0, "step": 22000 }, { "entropy": 0.7671458825469017, "epoch": 6.667727031735212, "grad_norm": 0.19336499273777008, "learning_rate": 6.315559235348563e-05, "loss": 1.1686, "mean_token_accuracy": 0.7707120925188065, "num_tokens": 1486676616.0, "step": 22010 }, { "entropy": 0.7553473100066185, "epoch": 6.670756646216769, "grad_norm": 0.1731991022825241, "learning_rate": 6.312337254122465e-05, "loss": 1.1478, "mean_token_accuracy": 0.7773711651563644, "num_tokens": 1487356232.0, "step": 22020 }, { "entropy": 0.7643198490142822, "epoch": 6.673786260698326, "grad_norm": 0.18662288784980774, "learning_rate": 6.309114687528305e-05, "loss": 1.1653, "mean_token_accuracy": 0.7735546737909317, "num_tokens": 1488033523.0, "step": 22030 }, { "entropy": 0.7643371403217316, "epoch": 6.676815875179884, "grad_norm": 0.18357449769973755, "learning_rate": 6.305891537003502e-05, "loss": 1.1726, "mean_token_accuracy": 0.7715471386909485, "num_tokens": 1488705079.0, "step": 22040 }, { "entropy": 0.77202457934618, "epoch": 6.6798454896614405, "grad_norm": 0.18090148270130157, "learning_rate": 6.302667803985747e-05, "loss": 1.1682, "mean_token_accuracy": 0.7699681699275971, "num_tokens": 1489367689.0, "step": 22050 }, { "entropy": 0.774673941731453, "epoch": 6.6828751041429975, "grad_norm": 0.1801070272922516, "learning_rate": 6.299443489912985e-05, "loss": 1.1738, "mean_token_accuracy": 0.7740351021289825, "num_tokens": 1490052150.0, "step": 22060 }, { "entropy": 0.7639446377754211, "epoch": 6.685904718624555, "grad_norm": 0.18547028303146362, "learning_rate": 6.296218596223422e-05, "loss": 1.1599, "mean_token_accuracy": 0.7719153627753258, "num_tokens": 1490717649.0, "step": 22070 }, { "entropy": 0.7652400135993958, "epoch": 6.688934333106112, "grad_norm": 0.18799661099910736, "learning_rate": 6.292993124355522e-05, "loss": 1.1591, "mean_token_accuracy": 0.7754050731658936, "num_tokens": 1491397320.0, "step": 22080 }, { "entropy": 0.770381124317646, "epoch": 6.691963947587669, "grad_norm": 0.17602533102035522, "learning_rate": 6.289767075748005e-05, "loss": 1.1703, "mean_token_accuracy": 0.7748016655445099, "num_tokens": 1492081050.0, "step": 22090 }, { "entropy": 0.7705921873450279, "epoch": 6.694993562069227, "grad_norm": 0.1815701276063919, "learning_rate": 6.286540451839853e-05, "loss": 1.165, "mean_token_accuracy": 0.7728049546480179, "num_tokens": 1492769410.0, "step": 22100 }, { "entropy": 0.7681502133607865, "epoch": 6.698023176550784, "grad_norm": 0.18458382785320282, "learning_rate": 6.2833132540703e-05, "loss": 1.1657, "mean_token_accuracy": 0.768888546526432, "num_tokens": 1493441838.0, "step": 22110 }, { "entropy": 0.774550287425518, "epoch": 6.701052791032341, "grad_norm": 0.19067157804965973, "learning_rate": 6.280085483878837e-05, "loss": 1.1717, "mean_token_accuracy": 0.772263289988041, "num_tokens": 1494120881.0, "step": 22120 }, { "entropy": 0.7727632358670234, "epoch": 6.704082405513899, "grad_norm": 0.1965942680835724, "learning_rate": 6.276857142705213e-05, "loss": 1.1788, "mean_token_accuracy": 0.7671290412545204, "num_tokens": 1494778778.0, "step": 22130 }, { "entropy": 0.7777154386043549, "epoch": 6.7071120199954555, "grad_norm": 0.19251009821891785, "learning_rate": 6.273628231989426e-05, "loss": 1.1733, "mean_token_accuracy": 0.7677260801196099, "num_tokens": 1495445881.0, "step": 22140 }, { "entropy": 0.7793846324086189, "epoch": 6.710141634477013, "grad_norm": 0.1896645426750183, "learning_rate": 6.270398753171734e-05, "loss": 1.1696, "mean_token_accuracy": 0.7731392189860344, "num_tokens": 1496131151.0, "step": 22150 }, { "entropy": 0.7714336290955544, "epoch": 6.71317124895857, "grad_norm": 0.18072839081287384, "learning_rate": 6.267168707692649e-05, "loss": 1.1653, "mean_token_accuracy": 0.771985012292862, "num_tokens": 1496805690.0, "step": 22160 }, { "entropy": 0.7614537060260773, "epoch": 6.716200863440127, "grad_norm": 0.2017190158367157, "learning_rate": 6.263938096992929e-05, "loss": 1.1659, "mean_token_accuracy": 0.7736597269773483, "num_tokens": 1497480078.0, "step": 22170 }, { "entropy": 0.7740855038166046, "epoch": 6.719230477921684, "grad_norm": 0.1913452297449112, "learning_rate": 6.260706922513587e-05, "loss": 1.1662, "mean_token_accuracy": 0.7732095837593078, "num_tokens": 1498160528.0, "step": 22180 }, { "entropy": 0.7760282531380653, "epoch": 6.722260092403242, "grad_norm": 0.18119335174560547, "learning_rate": 6.257475185695891e-05, "loss": 1.1722, "mean_token_accuracy": 0.7678147166967392, "num_tokens": 1498825082.0, "step": 22190 }, { "entropy": 0.7587352946400643, "epoch": 6.725289706884799, "grad_norm": 0.18721026182174683, "learning_rate": 6.254242887981359e-05, "loss": 1.1595, "mean_token_accuracy": 0.7760735020041466, "num_tokens": 1499499710.0, "step": 22200 }, { "entropy": 0.77638940513134, "epoch": 6.728319321366357, "grad_norm": 0.19338718056678772, "learning_rate": 6.251010030811755e-05, "loss": 1.1744, "mean_token_accuracy": 0.7720439538359642, "num_tokens": 1500183912.0, "step": 22210 }, { "entropy": 0.7665472030639648, "epoch": 6.7313489358479135, "grad_norm": 0.18072675168514252, "learning_rate": 6.247776615629094e-05, "loss": 1.1615, "mean_token_accuracy": 0.7728681981563568, "num_tokens": 1500854050.0, "step": 22220 }, { "entropy": 0.7659976661205292, "epoch": 6.73437855032947, "grad_norm": 0.1857847422361374, "learning_rate": 6.244542643875642e-05, "loss": 1.1614, "mean_token_accuracy": 0.7746493652462959, "num_tokens": 1501533385.0, "step": 22230 }, { "entropy": 0.7666662365198136, "epoch": 6.737408164811027, "grad_norm": 0.19038133323192596, "learning_rate": 6.241308116993914e-05, "loss": 1.1625, "mean_token_accuracy": 0.7739652186632157, "num_tokens": 1502208304.0, "step": 22240 }, { "entropy": 0.7609266832470893, "epoch": 6.740437779292585, "grad_norm": 0.17761822044849396, "learning_rate": 6.238073036426669e-05, "loss": 1.1628, "mean_token_accuracy": 0.7767347857356072, "num_tokens": 1502885874.0, "step": 22250 }, { "entropy": 0.77239308655262, "epoch": 6.743467393774142, "grad_norm": 0.18619075417518616, "learning_rate": 6.234837403616912e-05, "loss": 1.1704, "mean_token_accuracy": 0.7716382130980491, "num_tokens": 1503555544.0, "step": 22260 }, { "entropy": 0.7584697559475899, "epoch": 6.7464970082557, "grad_norm": 0.18827976286411285, "learning_rate": 6.231601220007903e-05, "loss": 1.1565, "mean_token_accuracy": 0.7730544328689575, "num_tokens": 1504216227.0, "step": 22270 }, { "entropy": 0.7744659379124641, "epoch": 6.749526622737257, "grad_norm": 0.1921151727437973, "learning_rate": 6.228364487043137e-05, "loss": 1.1652, "mean_token_accuracy": 0.769395387172699, "num_tokens": 1504883190.0, "step": 22280 }, { "entropy": 0.7673099309206008, "epoch": 6.752556237218814, "grad_norm": 0.2096191793680191, "learning_rate": 6.225127206166363e-05, "loss": 1.1722, "mean_token_accuracy": 0.772365789115429, "num_tokens": 1505552306.0, "step": 22290 }, { "entropy": 0.7724889099597931, "epoch": 6.7555858517003715, "grad_norm": 0.17974375188350677, "learning_rate": 6.221889378821566e-05, "loss": 1.1698, "mean_token_accuracy": 0.7738417014479637, "num_tokens": 1506235893.0, "step": 22300 }, { "entropy": 0.7580396920442581, "epoch": 6.758615466181928, "grad_norm": 0.18559345602989197, "learning_rate": 6.218651006452981e-05, "loss": 1.1575, "mean_token_accuracy": 0.774227486550808, "num_tokens": 1506920520.0, "step": 22310 }, { "entropy": 0.7742979377508163, "epoch": 6.761645080663485, "grad_norm": 0.18375980854034424, "learning_rate": 6.215412090505084e-05, "loss": 1.1659, "mean_token_accuracy": 0.7725578680634498, "num_tokens": 1507599440.0, "step": 22320 }, { "entropy": 0.7670708894729614, "epoch": 6.764674695145043, "grad_norm": 0.18639779090881348, "learning_rate": 6.212172632422594e-05, "loss": 1.1623, "mean_token_accuracy": 0.7718268305063247, "num_tokens": 1508286070.0, "step": 22330 }, { "entropy": 0.7699150651693344, "epoch": 6.7677043096266, "grad_norm": 0.1727694272994995, "learning_rate": 6.20893263365047e-05, "loss": 1.1667, "mean_token_accuracy": 0.7740846604108811, "num_tokens": 1508967305.0, "step": 22340 }, { "entropy": 0.7649823412299156, "epoch": 6.770733924108157, "grad_norm": 0.1860731542110443, "learning_rate": 6.205692095633915e-05, "loss": 1.1636, "mean_token_accuracy": 0.7753878831863403, "num_tokens": 1509649444.0, "step": 22350 }, { "entropy": 0.7677910998463631, "epoch": 6.773763538589715, "grad_norm": 0.1788383275270462, "learning_rate": 6.20245101981837e-05, "loss": 1.1583, "mean_token_accuracy": 0.778513939678669, "num_tokens": 1510344184.0, "step": 22360 }, { "entropy": 0.7659306794404983, "epoch": 6.776793153071272, "grad_norm": 0.18011821806430817, "learning_rate": 6.199209407649518e-05, "loss": 1.1654, "mean_token_accuracy": 0.7731028303503991, "num_tokens": 1511023896.0, "step": 22370 }, { "entropy": 0.7609267979860306, "epoch": 6.779822767552829, "grad_norm": 0.1777167022228241, "learning_rate": 6.195967260573278e-05, "loss": 1.1689, "mean_token_accuracy": 0.7709998160600662, "num_tokens": 1511678266.0, "step": 22380 }, { "entropy": 0.7727775529026986, "epoch": 6.7828523820343865, "grad_norm": 0.18532463908195496, "learning_rate": 6.192724580035814e-05, "loss": 1.1707, "mean_token_accuracy": 0.7709300830960274, "num_tokens": 1512354459.0, "step": 22390 }, { "entropy": 0.7810803890228272, "epoch": 6.785881996515943, "grad_norm": 0.18865905702114105, "learning_rate": 6.189481367483518e-05, "loss": 1.1774, "mean_token_accuracy": 0.7638928830623627, "num_tokens": 1513012403.0, "step": 22400 }, { "entropy": 0.7617676466703415, "epoch": 6.7889116109975, "grad_norm": 0.18379583954811096, "learning_rate": 6.18623762436303e-05, "loss": 1.1525, "mean_token_accuracy": 0.7848607137799263, "num_tokens": 1513730338.0, "step": 22410 }, { "entropy": 0.7729958280920982, "epoch": 6.791941225479058, "grad_norm": 0.18827588856220245, "learning_rate": 6.182993352121219e-05, "loss": 1.1645, "mean_token_accuracy": 0.7752539649605751, "num_tokens": 1514425220.0, "step": 22420 }, { "entropy": 0.7729710429906845, "epoch": 6.794970839960615, "grad_norm": 0.18430310487747192, "learning_rate": 6.179748552205193e-05, "loss": 1.168, "mean_token_accuracy": 0.772411173582077, "num_tokens": 1515098424.0, "step": 22430 }, { "entropy": 0.7681291326880455, "epoch": 6.798000454442172, "grad_norm": 0.17213250696659088, "learning_rate": 6.176503226062298e-05, "loss": 1.1613, "mean_token_accuracy": 0.7747751772403717, "num_tokens": 1515772411.0, "step": 22440 }, { "entropy": 0.775424613058567, "epoch": 6.80103006892373, "grad_norm": 0.19226394593715668, "learning_rate": 6.173257375140107e-05, "loss": 1.1722, "mean_token_accuracy": 0.7726172283291817, "num_tokens": 1516450283.0, "step": 22450 }, { "entropy": 0.7723731324076653, "epoch": 6.804059683405287, "grad_norm": 0.18939033150672913, "learning_rate": 6.170011000886436e-05, "loss": 1.1627, "mean_token_accuracy": 0.7734440341591835, "num_tokens": 1517135844.0, "step": 22460 }, { "entropy": 0.7599155440926552, "epoch": 6.807089297886844, "grad_norm": 0.1787664294242859, "learning_rate": 6.166764104749328e-05, "loss": 1.1578, "mean_token_accuracy": 0.7775285467505455, "num_tokens": 1517815583.0, "step": 22470 }, { "entropy": 0.7809656620025635, "epoch": 6.810118912368401, "grad_norm": 0.1927752047777176, "learning_rate": 6.163516688177061e-05, "loss": 1.1796, "mean_token_accuracy": 0.7675548031926155, "num_tokens": 1518483753.0, "step": 22480 }, { "entropy": 0.7554050832986832, "epoch": 6.813148526849958, "grad_norm": 0.18380290269851685, "learning_rate": 6.160268752618145e-05, "loss": 1.1625, "mean_token_accuracy": 0.7777298867702485, "num_tokens": 1519169091.0, "step": 22490 }, { "entropy": 0.7745677649974823, "epoch": 6.816178141331515, "grad_norm": 0.17235508561134338, "learning_rate": 6.157020299521326e-05, "loss": 1.1675, "mean_token_accuracy": 0.7761607438325882, "num_tokens": 1519864001.0, "step": 22500 }, { "entropy": 0.7699503168463707, "epoch": 6.819207755813073, "grad_norm": 0.18010473251342773, "learning_rate": 6.15377133033557e-05, "loss": 1.1685, "mean_token_accuracy": 0.7711123511195183, "num_tokens": 1520532427.0, "step": 22510 }, { "entropy": 0.7693112701177597, "epoch": 6.82223737029463, "grad_norm": 0.1972823143005371, "learning_rate": 6.150521846510084e-05, "loss": 1.1651, "mean_token_accuracy": 0.7719914734363555, "num_tokens": 1521204222.0, "step": 22520 }, { "entropy": 0.766348859667778, "epoch": 6.825266984776187, "grad_norm": 0.18678975105285645, "learning_rate": 6.147271849494301e-05, "loss": 1.1738, "mean_token_accuracy": 0.7723152801394463, "num_tokens": 1521879719.0, "step": 22530 }, { "entropy": 0.7674392148852348, "epoch": 6.828296599257745, "grad_norm": 0.17786964774131775, "learning_rate": 6.14402134073788e-05, "loss": 1.1724, "mean_token_accuracy": 0.776619590818882, "num_tokens": 1522563588.0, "step": 22540 }, { "entropy": 0.7676349014043808, "epoch": 6.831326213739302, "grad_norm": 0.18887832760810852, "learning_rate": 6.14077032169071e-05, "loss": 1.1651, "mean_token_accuracy": 0.7697415858507156, "num_tokens": 1523221949.0, "step": 22550 }, { "entropy": 0.7809972450137138, "epoch": 6.8343558282208585, "grad_norm": 0.1912042498588562, "learning_rate": 6.137518793802911e-05, "loss": 1.1802, "mean_token_accuracy": 0.7686642140150071, "num_tokens": 1523895779.0, "step": 22560 }, { "entropy": 0.7668987795710563, "epoch": 6.837385442702416, "grad_norm": 0.17426718771457672, "learning_rate": 6.134266758524824e-05, "loss": 1.1566, "mean_token_accuracy": 0.772175969183445, "num_tokens": 1524587967.0, "step": 22570 }, { "entropy": 0.768090882897377, "epoch": 6.840415057183973, "grad_norm": 0.2009420245885849, "learning_rate": 6.131014217307021e-05, "loss": 1.165, "mean_token_accuracy": 0.7689484387636185, "num_tokens": 1525255486.0, "step": 22580 }, { "entropy": 0.7724968254566192, "epoch": 6.84344467166553, "grad_norm": 0.18546654284000397, "learning_rate": 6.127761171600298e-05, "loss": 1.1728, "mean_token_accuracy": 0.7720789045095444, "num_tokens": 1525928126.0, "step": 22590 }, { "entropy": 0.7581162869930267, "epoch": 6.846474286147088, "grad_norm": 0.18833152949810028, "learning_rate": 6.124507622855677e-05, "loss": 1.1618, "mean_token_accuracy": 0.7740279600024224, "num_tokens": 1526603842.0, "step": 22600 }, { "entropy": 0.766860842704773, "epoch": 6.849503900628645, "grad_norm": 0.1974152773618698, "learning_rate": 6.121253572524402e-05, "loss": 1.1679, "mean_token_accuracy": 0.7746059313416481, "num_tokens": 1527282804.0, "step": 22610 }, { "entropy": 0.7733130246400833, "epoch": 6.852533515110203, "grad_norm": 0.18934597074985504, "learning_rate": 6.117999022057942e-05, "loss": 1.1663, "mean_token_accuracy": 0.7716334849596024, "num_tokens": 1527951591.0, "step": 22620 }, { "entropy": 0.774915198981762, "epoch": 6.85556312959176, "grad_norm": 0.18406635522842407, "learning_rate": 6.11474397290799e-05, "loss": 1.1696, "mean_token_accuracy": 0.7750470638275146, "num_tokens": 1528639116.0, "step": 22630 }, { "entropy": 0.7649851545691491, "epoch": 6.8585927440733165, "grad_norm": 0.1802431344985962, "learning_rate": 6.111488426526462e-05, "loss": 1.1686, "mean_token_accuracy": 0.769770884513855, "num_tokens": 1529308239.0, "step": 22640 }, { "entropy": 0.7749467492103577, "epoch": 6.8616223585548735, "grad_norm": 0.16982093453407288, "learning_rate": 6.108232384365492e-05, "loss": 1.1702, "mean_token_accuracy": 0.7691055357456207, "num_tokens": 1529975463.0, "step": 22650 }, { "entropy": 0.7667987331748009, "epoch": 6.864651973036431, "grad_norm": 0.18780744075775146, "learning_rate": 6.10497584787744e-05, "loss": 1.1663, "mean_token_accuracy": 0.7754819333553314, "num_tokens": 1530655183.0, "step": 22660 }, { "entropy": 0.7683945238590241, "epoch": 6.867681587517988, "grad_norm": 0.18448594212532043, "learning_rate": 6.1017188185148835e-05, "loss": 1.1593, "mean_token_accuracy": 0.78026642203331, "num_tokens": 1531356516.0, "step": 22670 }, { "entropy": 0.7647533610463142, "epoch": 6.870711201999546, "grad_norm": 0.17418862879276276, "learning_rate": 6.09846129773062e-05, "loss": 1.1617, "mean_token_accuracy": 0.7716727629303932, "num_tokens": 1532025501.0, "step": 22680 }, { "entropy": 0.771768918633461, "epoch": 6.873740816481103, "grad_norm": 0.21458394825458527, "learning_rate": 6.0952032869776666e-05, "loss": 1.1736, "mean_token_accuracy": 0.7672025233507156, "num_tokens": 1532679284.0, "step": 22690 }, { "entropy": 0.7655633240938187, "epoch": 6.87677043096266, "grad_norm": 0.18572165071964264, "learning_rate": 6.091944787709258e-05, "loss": 1.161, "mean_token_accuracy": 0.776415741443634, "num_tokens": 1533366501.0, "step": 22700 }, { "entropy": 0.7642249599099159, "epoch": 6.879800045444217, "grad_norm": 0.1808936893939972, "learning_rate": 6.088685801378853e-05, "loss": 1.1658, "mean_token_accuracy": 0.7749256417155266, "num_tokens": 1534042272.0, "step": 22710 }, { "entropy": 0.7605438247323036, "epoch": 6.882829659925775, "grad_norm": 0.17921391129493713, "learning_rate": 6.085426329440116e-05, "loss": 1.1635, "mean_token_accuracy": 0.7730612322688103, "num_tokens": 1534703535.0, "step": 22720 }, { "entropy": 0.7646952569484711, "epoch": 6.8858592744073315, "grad_norm": 0.20235541462898254, "learning_rate": 6.082166373346939e-05, "loss": 1.1583, "mean_token_accuracy": 0.7779818847775459, "num_tokens": 1535396020.0, "step": 22730 }, { "entropy": 0.7687417015433311, "epoch": 6.888888888888889, "grad_norm": 0.18874038755893707, "learning_rate": 6.078905934553424e-05, "loss": 1.1653, "mean_token_accuracy": 0.7701189145445824, "num_tokens": 1536063943.0, "step": 22740 }, { "entropy": 0.7666957467794419, "epoch": 6.891918503370446, "grad_norm": 0.18202944099903107, "learning_rate": 6.075645014513889e-05, "loss": 1.1683, "mean_token_accuracy": 0.768748952448368, "num_tokens": 1536722118.0, "step": 22750 }, { "entropy": 0.7854179322719574, "epoch": 6.894948117852003, "grad_norm": 0.18317753076553345, "learning_rate": 6.0723836146828684e-05, "loss": 1.1766, "mean_token_accuracy": 0.7681457713246346, "num_tokens": 1537395544.0, "step": 22760 }, { "entropy": 0.764841052889824, "epoch": 6.897977732333561, "grad_norm": 0.19204257428646088, "learning_rate": 6.0691217365151085e-05, "loss": 1.1629, "mean_token_accuracy": 0.7735884115099907, "num_tokens": 1538066040.0, "step": 22770 }, { "entropy": 0.7716961309313775, "epoch": 6.901007346815118, "grad_norm": 0.18792079389095306, "learning_rate": 6.0658593814655726e-05, "loss": 1.1708, "mean_token_accuracy": 0.7709859684109688, "num_tokens": 1538731061.0, "step": 22780 }, { "entropy": 0.7698103070259095, "epoch": 6.904036961296675, "grad_norm": 0.16728675365447998, "learning_rate": 6.062596550989432e-05, "loss": 1.1686, "mean_token_accuracy": 0.7722794011235237, "num_tokens": 1539407709.0, "step": 22790 }, { "entropy": 0.7710490822792053, "epoch": 6.907066575778233, "grad_norm": 0.17876802384853363, "learning_rate": 6.059333246542074e-05, "loss": 1.1706, "mean_token_accuracy": 0.770564828813076, "num_tokens": 1540080998.0, "step": 22800 }, { "entropy": 0.7533868879079819, "epoch": 6.9100961902597895, "grad_norm": 0.1808808594942093, "learning_rate": 6.056069469579094e-05, "loss": 1.1546, "mean_token_accuracy": 0.77374277561903, "num_tokens": 1540747136.0, "step": 22810 }, { "entropy": 0.7582154646515846, "epoch": 6.913125804741346, "grad_norm": 0.19275827705860138, "learning_rate": 6.0528052215563e-05, "loss": 1.1647, "mean_token_accuracy": 0.7769809141755104, "num_tokens": 1541421610.0, "step": 22820 }, { "entropy": 0.7601933673024177, "epoch": 6.916155419222904, "grad_norm": 0.1873077005147934, "learning_rate": 6.04954050392971e-05, "loss": 1.1526, "mean_token_accuracy": 0.7743327051401139, "num_tokens": 1542100777.0, "step": 22830 }, { "entropy": 0.7621347069740295, "epoch": 6.919185033704461, "grad_norm": 0.19351033866405487, "learning_rate": 6.0462753181555534e-05, "loss": 1.1683, "mean_token_accuracy": 0.7717160657048225, "num_tokens": 1542765304.0, "step": 22840 }, { "entropy": 0.7661462962627411, "epoch": 6.922214648186018, "grad_norm": 0.185185506939888, "learning_rate": 6.0430096656902645e-05, "loss": 1.1634, "mean_token_accuracy": 0.7741788566112519, "num_tokens": 1543439607.0, "step": 22850 }, { "entropy": 0.7748598620295525, "epoch": 6.925244262667576, "grad_norm": 0.18341249227523804, "learning_rate": 6.0397435479904874e-05, "loss": 1.165, "mean_token_accuracy": 0.7725229263305664, "num_tokens": 1544125563.0, "step": 22860 }, { "entropy": 0.7673856854438782, "epoch": 6.928273877149133, "grad_norm": 0.181553915143013, "learning_rate": 6.0364769665130735e-05, "loss": 1.1672, "mean_token_accuracy": 0.7745458260178566, "num_tokens": 1544806082.0, "step": 22870 }, { "entropy": 0.7706713765859604, "epoch": 6.93130349163069, "grad_norm": 0.18595345318317413, "learning_rate": 6.0332099227150853e-05, "loss": 1.1586, "mean_token_accuracy": 0.7737122029066086, "num_tokens": 1545493338.0, "step": 22880 }, { "entropy": 0.766918708384037, "epoch": 6.9343331061122475, "grad_norm": 0.1871090978384018, "learning_rate": 6.0299424180537835e-05, "loss": 1.1558, "mean_token_accuracy": 0.776808249950409, "num_tokens": 1546180215.0, "step": 22890 }, { "entropy": 0.7650902613997459, "epoch": 6.937362720593804, "grad_norm": 0.17750787734985352, "learning_rate": 6.026674453986641e-05, "loss": 1.1636, "mean_token_accuracy": 0.7759310081601143, "num_tokens": 1546861315.0, "step": 22900 }, { "entropy": 0.7809863060712814, "epoch": 6.940392335075361, "grad_norm": 0.17797423899173737, "learning_rate": 6.0234060319713315e-05, "loss": 1.1713, "mean_token_accuracy": 0.7697636038064957, "num_tokens": 1547530157.0, "step": 22910 }, { "entropy": 0.7729805439710618, "epoch": 6.943421949556919, "grad_norm": 0.17471666634082794, "learning_rate": 6.020137153465737e-05, "loss": 1.1718, "mean_token_accuracy": 0.7714817345142364, "num_tokens": 1548212298.0, "step": 22920 }, { "entropy": 0.7698503375053406, "epoch": 6.946451564038476, "grad_norm": 0.17625947296619415, "learning_rate": 6.016867819927938e-05, "loss": 1.1691, "mean_token_accuracy": 0.7711612269282341, "num_tokens": 1548881160.0, "step": 22930 }, { "entropy": 0.7707154870033264, "epoch": 6.949481178520033, "grad_norm": 0.19445252418518066, "learning_rate": 6.013598032816222e-05, "loss": 1.1689, "mean_token_accuracy": 0.773430486023426, "num_tokens": 1549560622.0, "step": 22940 }, { "entropy": 0.7669893369078636, "epoch": 6.952510793001591, "grad_norm": 0.18977509438991547, "learning_rate": 6.01032779358908e-05, "loss": 1.1634, "mean_token_accuracy": 0.7739262640476227, "num_tokens": 1550239370.0, "step": 22950 }, { "entropy": 0.7711462885141372, "epoch": 6.955540407483148, "grad_norm": 0.1888669729232788, "learning_rate": 6.0070571037051994e-05, "loss": 1.1709, "mean_token_accuracy": 0.7699010357260704, "num_tokens": 1550903445.0, "step": 22960 }, { "entropy": 0.7590415224432945, "epoch": 6.958570021964705, "grad_norm": 0.1832433044910431, "learning_rate": 6.003785964623472e-05, "loss": 1.1532, "mean_token_accuracy": 0.7786305457353592, "num_tokens": 1551588450.0, "step": 22970 }, { "entropy": 0.7689649447798729, "epoch": 6.9615996364462625, "grad_norm": 0.1829010248184204, "learning_rate": 6.0005143778029906e-05, "loss": 1.1672, "mean_token_accuracy": 0.7758778482675552, "num_tokens": 1552276431.0, "step": 22980 }, { "entropy": 0.7748028218746186, "epoch": 6.964629250927819, "grad_norm": 0.1904628574848175, "learning_rate": 5.997242344703043e-05, "loss": 1.1729, "mean_token_accuracy": 0.7732190832495689, "num_tokens": 1552958727.0, "step": 22990 }, { "entropy": 0.7566230967640877, "epoch": 6.967658865409376, "grad_norm": 0.19712401926517487, "learning_rate": 5.993969866783125e-05, "loss": 1.1655, "mean_token_accuracy": 0.7763057157397271, "num_tokens": 1553626742.0, "step": 23000 }, { "entropy": 0.7784351959824563, "epoch": 6.970688479890934, "grad_norm": 0.1927061527967453, "learning_rate": 5.9906969455029226e-05, "loss": 1.1751, "mean_token_accuracy": 0.7713754266500473, "num_tokens": 1554302878.0, "step": 23010 }, { "entropy": 0.7713057413697243, "epoch": 6.973718094372491, "grad_norm": 0.19877652823925018, "learning_rate": 5.987423582322322e-05, "loss": 1.1678, "mean_token_accuracy": 0.7730776354670524, "num_tokens": 1554970858.0, "step": 23020 }, { "entropy": 0.7634402811527252, "epoch": 6.976747708854048, "grad_norm": 0.1868930608034134, "learning_rate": 5.984149778701409e-05, "loss": 1.1733, "mean_token_accuracy": 0.770755261182785, "num_tokens": 1555625363.0, "step": 23030 }, { "entropy": 0.772444897890091, "epoch": 6.979777323335606, "grad_norm": 0.1826222687959671, "learning_rate": 5.980875536100461e-05, "loss": 1.172, "mean_token_accuracy": 0.7747440591454506, "num_tokens": 1556310882.0, "step": 23040 }, { "entropy": 0.7708359241485596, "epoch": 6.982806937817163, "grad_norm": 0.18035846948623657, "learning_rate": 5.977600855979957e-05, "loss": 1.1605, "mean_token_accuracy": 0.7712452054023743, "num_tokens": 1556987180.0, "step": 23050 }, { "entropy": 0.7600368797779083, "epoch": 6.98583655229872, "grad_norm": 0.19632886350154877, "learning_rate": 5.974325739800565e-05, "loss": 1.1673, "mean_token_accuracy": 0.772319607436657, "num_tokens": 1557651327.0, "step": 23060 }, { "entropy": 0.7694717109203338, "epoch": 6.988866166780277, "grad_norm": 0.18624110519886017, "learning_rate": 5.971050189023153e-05, "loss": 1.1644, "mean_token_accuracy": 0.76870878636837, "num_tokens": 1558312900.0, "step": 23070 }, { "entropy": 0.7642167627811431, "epoch": 6.991895781261834, "grad_norm": 0.1855657994747162, "learning_rate": 5.9677742051087804e-05, "loss": 1.1592, "mean_token_accuracy": 0.7784095257520676, "num_tokens": 1559002299.0, "step": 23080 }, { "entropy": 0.7630846992135047, "epoch": 6.994925395743392, "grad_norm": 0.17961439490318298, "learning_rate": 5.964497789518699e-05, "loss": 1.1668, "mean_token_accuracy": 0.7715464368462562, "num_tokens": 1559668049.0, "step": 23090 }, { "entropy": 0.7648632317781449, "epoch": 6.997955010224949, "grad_norm": 0.19289350509643555, "learning_rate": 5.961220943714354e-05, "loss": 1.1593, "mean_token_accuracy": 0.7767577812075614, "num_tokens": 1560354052.0, "step": 23100 }, { "entropy": 0.769464543232551, "epoch": 7.000908884344467, "grad_norm": 0.17458492517471313, "learning_rate": 5.957943669157382e-05, "loss": 1.1615, "mean_token_accuracy": 0.7763572549208616, "num_tokens": 1561022967.0, "step": 23110 }, { "entropy": 0.7415925338864326, "epoch": 7.003938498826025, "grad_norm": 0.20691928267478943, "learning_rate": 5.954665967309614e-05, "loss": 1.1336, "mean_token_accuracy": 0.783345764875412, "num_tokens": 1561709761.0, "step": 23120 }, { "entropy": 0.7527311459183693, "epoch": 7.006968113307582, "grad_norm": 0.2043900191783905, "learning_rate": 5.951387839633066e-05, "loss": 1.1407, "mean_token_accuracy": 0.7745445773005486, "num_tokens": 1562383663.0, "step": 23130 }, { "entropy": 0.7451196193695069, "epoch": 7.0099977277891385, "grad_norm": 0.2298509031534195, "learning_rate": 5.948109287589947e-05, "loss": 1.1344, "mean_token_accuracy": 0.7788399383425713, "num_tokens": 1563061168.0, "step": 23140 }, { "entropy": 0.7414814531803131, "epoch": 7.013027342270696, "grad_norm": 0.20013704895973206, "learning_rate": 5.94483031264266e-05, "loss": 1.1284, "mean_token_accuracy": 0.7796229213476181, "num_tokens": 1563742282.0, "step": 23150 }, { "entropy": 0.7403618216514587, "epoch": 7.016056956752253, "grad_norm": 0.21095553040504456, "learning_rate": 5.9415509162537855e-05, "loss": 1.1289, "mean_token_accuracy": 0.7779444932937623, "num_tokens": 1564417247.0, "step": 23160 }, { "entropy": 0.7454135611653327, "epoch": 7.01908657123381, "grad_norm": 0.22342520952224731, "learning_rate": 5.9382710998861005e-05, "loss": 1.139, "mean_token_accuracy": 0.7780433669686317, "num_tokens": 1565093792.0, "step": 23170 }, { "entropy": 0.7413820624351501, "epoch": 7.022116185715368, "grad_norm": 0.20998717844486237, "learning_rate": 5.934990865002569e-05, "loss": 1.1314, "mean_token_accuracy": 0.780006904900074, "num_tokens": 1565772423.0, "step": 23180 }, { "entropy": 0.7410301849246025, "epoch": 7.025145800196925, "grad_norm": 0.20335505902767181, "learning_rate": 5.931710213066339e-05, "loss": 1.1336, "mean_token_accuracy": 0.778225240111351, "num_tokens": 1566443073.0, "step": 23190 }, { "entropy": 0.73619065284729, "epoch": 7.028175414678482, "grad_norm": 0.2237924337387085, "learning_rate": 5.928429145540745e-05, "loss": 1.1259, "mean_token_accuracy": 0.7830476120114327, "num_tokens": 1567123051.0, "step": 23200 }, { "entropy": 0.7490300342440606, "epoch": 7.03120502916004, "grad_norm": 0.21476173400878906, "learning_rate": 5.9251476638893053e-05, "loss": 1.1298, "mean_token_accuracy": 0.7808086097240448, "num_tokens": 1567811235.0, "step": 23210 }, { "entropy": 0.7409378379583359, "epoch": 7.034234643641597, "grad_norm": 0.21300707757472992, "learning_rate": 5.921865769575729e-05, "loss": 1.1393, "mean_token_accuracy": 0.7780482247471809, "num_tokens": 1568488704.0, "step": 23220 }, { "entropy": 0.7460506349802017, "epoch": 7.0372642581231535, "grad_norm": 0.21356716752052307, "learning_rate": 5.918583464063903e-05, "loss": 1.1453, "mean_token_accuracy": 0.7756352841854095, "num_tokens": 1569155307.0, "step": 23230 }, { "entropy": 0.7379998996853828, "epoch": 7.040293872604711, "grad_norm": 0.200457364320755, "learning_rate": 5.915300748817899e-05, "loss": 1.1286, "mean_token_accuracy": 0.7798128291964531, "num_tokens": 1569831441.0, "step": 23240 }, { "entropy": 0.7388947442173958, "epoch": 7.043323487086268, "grad_norm": 0.21624140441417694, "learning_rate": 5.912017625301973e-05, "loss": 1.1412, "mean_token_accuracy": 0.7798016265034675, "num_tokens": 1570507044.0, "step": 23250 }, { "entropy": 0.7562513992190361, "epoch": 7.046353101567825, "grad_norm": 0.20515398681163788, "learning_rate": 5.908734094980564e-05, "loss": 1.1519, "mean_token_accuracy": 0.7743081346154213, "num_tokens": 1571175446.0, "step": 23260 }, { "entropy": 0.7395011693239212, "epoch": 7.049382716049383, "grad_norm": 0.21537697315216064, "learning_rate": 5.905450159318286e-05, "loss": 1.1381, "mean_token_accuracy": 0.7777394846081733, "num_tokens": 1571847808.0, "step": 23270 }, { "entropy": 0.7445904403924942, "epoch": 7.05241233053094, "grad_norm": 0.23160496354103088, "learning_rate": 5.9021658197799435e-05, "loss": 1.139, "mean_token_accuracy": 0.7750459596514702, "num_tokens": 1572508289.0, "step": 23280 }, { "entropy": 0.7437122985720634, "epoch": 7.055441945012497, "grad_norm": 0.2150079309940338, "learning_rate": 5.898881077830515e-05, "loss": 1.1339, "mean_token_accuracy": 0.7771567583084107, "num_tokens": 1573176400.0, "step": 23290 }, { "entropy": 0.742127226293087, "epoch": 7.058471559494055, "grad_norm": 0.21064236760139465, "learning_rate": 5.8955959349351574e-05, "loss": 1.1383, "mean_token_accuracy": 0.7798569083213807, "num_tokens": 1573854994.0, "step": 23300 }, { "entropy": 0.7435739010572433, "epoch": 7.0615011739756115, "grad_norm": 0.2325943112373352, "learning_rate": 5.8923103925592116e-05, "loss": 1.1404, "mean_token_accuracy": 0.7728467658162117, "num_tokens": 1574513069.0, "step": 23310 }, { "entropy": 0.7404571071267128, "epoch": 7.064530788457168, "grad_norm": 0.20979952812194824, "learning_rate": 5.889024452168193e-05, "loss": 1.1353, "mean_token_accuracy": 0.7756794095039368, "num_tokens": 1575178513.0, "step": 23320 }, { "entropy": 0.7502257347106933, "epoch": 7.067560402938726, "grad_norm": 0.23219376802444458, "learning_rate": 5.885738115227795e-05, "loss": 1.1325, "mean_token_accuracy": 0.7795778349041939, "num_tokens": 1575872216.0, "step": 23330 }, { "entropy": 0.7455775111913681, "epoch": 7.070590017420283, "grad_norm": 0.2104475498199463, "learning_rate": 5.8824513832038866e-05, "loss": 1.1447, "mean_token_accuracy": 0.7723892733454705, "num_tokens": 1576528943.0, "step": 23340 }, { "entropy": 0.7544861778616905, "epoch": 7.07361963190184, "grad_norm": 0.21834543347358704, "learning_rate": 5.879164257562517e-05, "loss": 1.1477, "mean_token_accuracy": 0.7774828374385834, "num_tokens": 1577202197.0, "step": 23350 }, { "entropy": 0.7377363726496696, "epoch": 7.076649246383398, "grad_norm": 0.21699722111225128, "learning_rate": 5.87587673976991e-05, "loss": 1.1412, "mean_token_accuracy": 0.7775470077991485, "num_tokens": 1577869101.0, "step": 23360 }, { "entropy": 0.7471830129623414, "epoch": 7.079678860864955, "grad_norm": 0.21366539597511292, "learning_rate": 5.8725888312924606e-05, "loss": 1.1414, "mean_token_accuracy": 0.7751694902777672, "num_tokens": 1578534353.0, "step": 23370 }, { "entropy": 0.7416027471423149, "epoch": 7.082708475346513, "grad_norm": 0.23158206045627594, "learning_rate": 5.86930053359674e-05, "loss": 1.133, "mean_token_accuracy": 0.7785997346043587, "num_tokens": 1579208524.0, "step": 23380 }, { "entropy": 0.7411579832434654, "epoch": 7.0857380898280695, "grad_norm": 0.20406202971935272, "learning_rate": 5.866011848149495e-05, "loss": 1.1386, "mean_token_accuracy": 0.780168367922306, "num_tokens": 1579885128.0, "step": 23390 }, { "entropy": 0.7510242238640785, "epoch": 7.088767704309626, "grad_norm": 0.25734636187553406, "learning_rate": 5.862722776417644e-05, "loss": 1.1439, "mean_token_accuracy": 0.771857887506485, "num_tokens": 1580550798.0, "step": 23400 }, { "entropy": 0.7522947192192078, "epoch": 7.091797318791184, "grad_norm": 0.20757640898227692, "learning_rate": 5.859433319868276e-05, "loss": 1.1448, "mean_token_accuracy": 0.7728193268179894, "num_tokens": 1581218703.0, "step": 23410 }, { "entropy": 0.7494997382164001, "epoch": 7.094826933272741, "grad_norm": 0.22695372998714447, "learning_rate": 5.8561434799686554e-05, "loss": 1.1475, "mean_token_accuracy": 0.7778515160083771, "num_tokens": 1581903891.0, "step": 23420 }, { "entropy": 0.739686281979084, "epoch": 7.097856547754298, "grad_norm": 0.21500404179096222, "learning_rate": 5.852853258186213e-05, "loss": 1.132, "mean_token_accuracy": 0.7795198231935501, "num_tokens": 1582582728.0, "step": 23430 }, { "entropy": 0.7484030231833458, "epoch": 7.100886162235856, "grad_norm": 0.21093358099460602, "learning_rate": 5.849562655988553e-05, "loss": 1.1403, "mean_token_accuracy": 0.7773430526256562, "num_tokens": 1583265598.0, "step": 23440 }, { "entropy": 0.7469753220677375, "epoch": 7.103915776717413, "grad_norm": 0.22998684644699097, "learning_rate": 5.8462716748434486e-05, "loss": 1.1405, "mean_token_accuracy": 0.7785072311758995, "num_tokens": 1583937787.0, "step": 23450 }, { "entropy": 0.7508120566606522, "epoch": 7.10694539119897, "grad_norm": 0.20284292101860046, "learning_rate": 5.842980316218845e-05, "loss": 1.1398, "mean_token_accuracy": 0.7788093894720077, "num_tokens": 1584629630.0, "step": 23460 }, { "entropy": 0.7422115087509156, "epoch": 7.1099750056805275, "grad_norm": 0.21174338459968567, "learning_rate": 5.839688581582847e-05, "loss": 1.1455, "mean_token_accuracy": 0.7801809579133987, "num_tokens": 1585313405.0, "step": 23470 }, { "entropy": 0.7515713751316071, "epoch": 7.1130046201620845, "grad_norm": 0.21566711366176605, "learning_rate": 5.836396472403737e-05, "loss": 1.1402, "mean_token_accuracy": 0.7787953034043312, "num_tokens": 1586005561.0, "step": 23480 }, { "entropy": 0.7425371959805489, "epoch": 7.116034234643641, "grad_norm": 0.2266668826341629, "learning_rate": 5.8331039901499606e-05, "loss": 1.1306, "mean_token_accuracy": 0.7758061677217484, "num_tokens": 1586664727.0, "step": 23490 }, { "entropy": 0.7493927583098412, "epoch": 7.119063849125199, "grad_norm": 0.2102845460176468, "learning_rate": 5.829811136290128e-05, "loss": 1.1434, "mean_token_accuracy": 0.777026678621769, "num_tokens": 1587340222.0, "step": 23500 }, { "entropy": 0.7541937798261642, "epoch": 7.122093463606756, "grad_norm": 0.20236948132514954, "learning_rate": 5.826517912293016e-05, "loss": 1.1355, "mean_token_accuracy": 0.7758743047714234, "num_tokens": 1588020769.0, "step": 23510 }, { "entropy": 0.7430880099534989, "epoch": 7.125123078088313, "grad_norm": 0.20082427561283112, "learning_rate": 5.823224319627568e-05, "loss": 1.1441, "mean_token_accuracy": 0.7754659965634346, "num_tokens": 1588677061.0, "step": 23520 }, { "entropy": 0.7427562147378921, "epoch": 7.128152692569871, "grad_norm": 0.2160859853029251, "learning_rate": 5.8199303597628926e-05, "loss": 1.1391, "mean_token_accuracy": 0.7824327185750007, "num_tokens": 1589364715.0, "step": 23530 }, { "entropy": 0.7464813143014908, "epoch": 7.131182307051428, "grad_norm": 0.21505455672740936, "learning_rate": 5.8166360341682605e-05, "loss": 1.1356, "mean_token_accuracy": 0.7731814235448837, "num_tokens": 1590038292.0, "step": 23540 }, { "entropy": 0.7509728401899338, "epoch": 7.134211921532985, "grad_norm": 0.21698719263076782, "learning_rate": 5.8133413443131036e-05, "loss": 1.1448, "mean_token_accuracy": 0.7785868659615517, "num_tokens": 1590728880.0, "step": 23550 }, { "entropy": 0.7430469363927841, "epoch": 7.1372415360145425, "grad_norm": 0.22430694103240967, "learning_rate": 5.8100462916670196e-05, "loss": 1.1321, "mean_token_accuracy": 0.7757880195975304, "num_tokens": 1591393754.0, "step": 23560 }, { "entropy": 0.7454712346196175, "epoch": 7.140271150496099, "grad_norm": 0.20653754472732544, "learning_rate": 5.806750877699767e-05, "loss": 1.1348, "mean_token_accuracy": 0.7810803964734078, "num_tokens": 1592073212.0, "step": 23570 }, { "entropy": 0.7382087767124176, "epoch": 7.143300764977656, "grad_norm": 0.2208268642425537, "learning_rate": 5.8034551038812657e-05, "loss": 1.1319, "mean_token_accuracy": 0.782666739821434, "num_tokens": 1592767591.0, "step": 23580 }, { "entropy": 0.7456942811608315, "epoch": 7.146330379459214, "grad_norm": 0.21309810876846313, "learning_rate": 5.800158971681595e-05, "loss": 1.1378, "mean_token_accuracy": 0.7783115655183792, "num_tokens": 1593459153.0, "step": 23590 }, { "entropy": 0.7472904816269874, "epoch": 7.149359993940771, "grad_norm": 0.21896205842494965, "learning_rate": 5.796862482570994e-05, "loss": 1.1422, "mean_token_accuracy": 0.7750360250473023, "num_tokens": 1594132093.0, "step": 23600 }, { "entropy": 0.7441201984882355, "epoch": 7.152389608422328, "grad_norm": 0.2231408655643463, "learning_rate": 5.7935656380198634e-05, "loss": 1.1366, "mean_token_accuracy": 0.7772513866424561, "num_tokens": 1594815093.0, "step": 23610 }, { "entropy": 0.7458626598119735, "epoch": 7.155419222903886, "grad_norm": 0.2119058519601822, "learning_rate": 5.790268439498759e-05, "loss": 1.149, "mean_token_accuracy": 0.7717437729239464, "num_tokens": 1595480749.0, "step": 23620 }, { "entropy": 0.7433493629097938, "epoch": 7.158448837385443, "grad_norm": 0.21936173737049103, "learning_rate": 5.7869708884783994e-05, "loss": 1.1385, "mean_token_accuracy": 0.7798545464873314, "num_tokens": 1596162626.0, "step": 23630 }, { "entropy": 0.7416732639074326, "epoch": 7.161478451867, "grad_norm": 0.21237511932849884, "learning_rate": 5.7836729864296537e-05, "loss": 1.1306, "mean_token_accuracy": 0.7766799658536911, "num_tokens": 1596833948.0, "step": 23640 }, { "entropy": 0.7414098903536797, "epoch": 7.164508066348557, "grad_norm": 0.21430625021457672, "learning_rate": 5.780374734823552e-05, "loss": 1.14, "mean_token_accuracy": 0.776218494772911, "num_tokens": 1597498836.0, "step": 23650 }, { "entropy": 0.7381272122263909, "epoch": 7.167537680830114, "grad_norm": 0.1998218148946762, "learning_rate": 5.777076135131281e-05, "loss": 1.1324, "mean_token_accuracy": 0.7800790205597877, "num_tokens": 1598178573.0, "step": 23660 }, { "entropy": 0.7380612477660179, "epoch": 7.170567295311671, "grad_norm": 0.22392015159130096, "learning_rate": 5.773777188824181e-05, "loss": 1.1368, "mean_token_accuracy": 0.7783250853419303, "num_tokens": 1598846556.0, "step": 23670 }, { "entropy": 0.7345643967390061, "epoch": 7.173596909793229, "grad_norm": 0.20985360443592072, "learning_rate": 5.770477897373745e-05, "loss": 1.1308, "mean_token_accuracy": 0.7828897207975387, "num_tokens": 1599542618.0, "step": 23680 }, { "entropy": 0.7467793002724648, "epoch": 7.176626524274786, "grad_norm": 0.2099127322435379, "learning_rate": 5.7671782622516236e-05, "loss": 1.142, "mean_token_accuracy": 0.7768888935446739, "num_tokens": 1600217753.0, "step": 23690 }, { "entropy": 0.7502148687839508, "epoch": 7.179656138756343, "grad_norm": 0.21838758885860443, "learning_rate": 5.7638782849296204e-05, "loss": 1.1347, "mean_token_accuracy": 0.7802770152688027, "num_tokens": 1600911236.0, "step": 23700 }, { "entropy": 0.7430243715643883, "epoch": 7.182685753237901, "grad_norm": 0.21046076714992523, "learning_rate": 5.760577966879688e-05, "loss": 1.1327, "mean_token_accuracy": 0.7815440043807029, "num_tokens": 1601595935.0, "step": 23710 }, { "entropy": 0.7310260012745857, "epoch": 7.185715367719458, "grad_norm": 0.2190067321062088, "learning_rate": 5.757277309573934e-05, "loss": 1.1319, "mean_token_accuracy": 0.7807996392250061, "num_tokens": 1602264612.0, "step": 23720 }, { "entropy": 0.7429593563079834, "epoch": 7.1887449822010145, "grad_norm": 0.21921633183956146, "learning_rate": 5.7539763144846184e-05, "loss": 1.1383, "mean_token_accuracy": 0.7799703419208527, "num_tokens": 1602944973.0, "step": 23730 }, { "entropy": 0.7431642904877662, "epoch": 7.191774596682572, "grad_norm": 0.2143329381942749, "learning_rate": 5.7506749830841476e-05, "loss": 1.1395, "mean_token_accuracy": 0.7765309974551201, "num_tokens": 1603607959.0, "step": 23740 }, { "entropy": 0.7459607213735581, "epoch": 7.194804211164129, "grad_norm": 0.22598759829998016, "learning_rate": 5.747373316845081e-05, "loss": 1.1363, "mean_token_accuracy": 0.7783181667327881, "num_tokens": 1604291864.0, "step": 23750 }, { "entropy": 0.7523634403944015, "epoch": 7.197833825645686, "grad_norm": 0.22408194839954376, "learning_rate": 5.7440713172401294e-05, "loss": 1.1523, "mean_token_accuracy": 0.7770254418253899, "num_tokens": 1604973582.0, "step": 23760 }, { "entropy": 0.7411210224032402, "epoch": 7.200863440127244, "grad_norm": 0.2086329311132431, "learning_rate": 5.7407689857421473e-05, "loss": 1.1343, "mean_token_accuracy": 0.7793959960341453, "num_tokens": 1605646230.0, "step": 23770 }, { "entropy": 0.7558016300201416, "epoch": 7.203893054608801, "grad_norm": 0.21734054386615753, "learning_rate": 5.737466323824141e-05, "loss": 1.1492, "mean_token_accuracy": 0.7727850556373597, "num_tokens": 1606312586.0, "step": 23780 }, { "entropy": 0.7510229885578156, "epoch": 7.206922669090358, "grad_norm": 0.21224647760391235, "learning_rate": 5.734163332959261e-05, "loss": 1.1469, "mean_token_accuracy": 0.7727331802248955, "num_tokens": 1606971412.0, "step": 23790 }, { "entropy": 0.733087569475174, "epoch": 7.209952283571916, "grad_norm": 0.22325651347637177, "learning_rate": 5.73086001462081e-05, "loss": 1.1305, "mean_token_accuracy": 0.7817760735750199, "num_tokens": 1607644861.0, "step": 23800 }, { "entropy": 0.7554741337895393, "epoch": 7.212981898053473, "grad_norm": 0.20936958491802216, "learning_rate": 5.727556370282229e-05, "loss": 1.1434, "mean_token_accuracy": 0.7714540019631386, "num_tokens": 1608311747.0, "step": 23810 }, { "entropy": 0.7569979503750801, "epoch": 7.2160115125350295, "grad_norm": 0.20916040241718292, "learning_rate": 5.724252401417112e-05, "loss": 1.1484, "mean_token_accuracy": 0.774019081890583, "num_tokens": 1608988099.0, "step": 23820 }, { "entropy": 0.7400409236550332, "epoch": 7.219041127016587, "grad_norm": 0.21031858026981354, "learning_rate": 5.7209481094991916e-05, "loss": 1.1435, "mean_token_accuracy": 0.7749984413385391, "num_tokens": 1609651043.0, "step": 23830 }, { "entropy": 0.7454067185521126, "epoch": 7.222070741498144, "grad_norm": 0.21775247156620026, "learning_rate": 5.717643496002351e-05, "loss": 1.1393, "mean_token_accuracy": 0.7788102000951767, "num_tokens": 1610331166.0, "step": 23840 }, { "entropy": 0.7444580152630806, "epoch": 7.225100355979702, "grad_norm": 0.21025405824184418, "learning_rate": 5.714338562400609e-05, "loss": 1.1372, "mean_token_accuracy": 0.7797574743628501, "num_tokens": 1611013078.0, "step": 23850 }, { "entropy": 0.7428898483514785, "epoch": 7.228129970461259, "grad_norm": 0.2147832065820694, "learning_rate": 5.7110333101681336e-05, "loss": 1.1413, "mean_token_accuracy": 0.7777466505765915, "num_tokens": 1611688453.0, "step": 23860 }, { "entropy": 0.7444768413901329, "epoch": 7.231159584942816, "grad_norm": 0.22270500659942627, "learning_rate": 5.707727740779234e-05, "loss": 1.1405, "mean_token_accuracy": 0.7768704071640968, "num_tokens": 1612364201.0, "step": 23870 }, { "entropy": 0.7423681631684304, "epoch": 7.234189199424374, "grad_norm": 0.21367275714874268, "learning_rate": 5.704421855708355e-05, "loss": 1.1326, "mean_token_accuracy": 0.7826312854886055, "num_tokens": 1613059538.0, "step": 23880 }, { "entropy": 0.7551495105028152, "epoch": 7.237218813905931, "grad_norm": 0.20957709848880768, "learning_rate": 5.701115656430092e-05, "loss": 1.1541, "mean_token_accuracy": 0.769644808769226, "num_tokens": 1613718151.0, "step": 23890 }, { "entropy": 0.7407628953456878, "epoch": 7.2402484283874875, "grad_norm": 0.2054181545972824, "learning_rate": 5.697809144419172e-05, "loss": 1.1366, "mean_token_accuracy": 0.7815528899431229, "num_tokens": 1614402887.0, "step": 23900 }, { "entropy": 0.7427027866244316, "epoch": 7.243278042869045, "grad_norm": 0.21740637719631195, "learning_rate": 5.6945023211504656e-05, "loss": 1.1404, "mean_token_accuracy": 0.7759135112166404, "num_tokens": 1615066380.0, "step": 23910 }, { "entropy": 0.7589648768305779, "epoch": 7.246307657350602, "grad_norm": 0.22193023562431335, "learning_rate": 5.69119518809898e-05, "loss": 1.154, "mean_token_accuracy": 0.7683237046003342, "num_tokens": 1615723150.0, "step": 23920 }, { "entropy": 0.7384623020887375, "epoch": 7.249337271832159, "grad_norm": 0.21717776358127594, "learning_rate": 5.687887746739866e-05, "loss": 1.1329, "mean_token_accuracy": 0.7788059458136558, "num_tokens": 1616397325.0, "step": 23930 }, { "entropy": 0.7552555456757546, "epoch": 7.252366886313717, "grad_norm": 0.2296213060617447, "learning_rate": 5.684579998548403e-05, "loss": 1.1396, "mean_token_accuracy": 0.772842176258564, "num_tokens": 1617072088.0, "step": 23940 }, { "entropy": 0.7496678099036217, "epoch": 7.255396500795274, "grad_norm": 0.2228216975927353, "learning_rate": 5.681271945000015e-05, "loss": 1.1468, "mean_token_accuracy": 0.7734100058674812, "num_tokens": 1617732992.0, "step": 23950 }, { "entropy": 0.7452170476317406, "epoch": 7.258426115276831, "grad_norm": 0.22176329791545868, "learning_rate": 5.67796358757026e-05, "loss": 1.1453, "mean_token_accuracy": 0.7765678241848946, "num_tokens": 1618406533.0, "step": 23960 }, { "entropy": 0.7457974344491959, "epoch": 7.261455729758389, "grad_norm": 0.21788766980171204, "learning_rate": 5.674654927734829e-05, "loss": 1.1403, "mean_token_accuracy": 0.7738040804862976, "num_tokens": 1619072744.0, "step": 23970 }, { "entropy": 0.7414055898785591, "epoch": 7.2644853442399455, "grad_norm": 0.2042253017425537, "learning_rate": 5.67134596696955e-05, "loss": 1.1428, "mean_token_accuracy": 0.7790959507226944, "num_tokens": 1619753563.0, "step": 23980 }, { "entropy": 0.7385856539011002, "epoch": 7.267514958721502, "grad_norm": 0.22292554378509521, "learning_rate": 5.668036706750386e-05, "loss": 1.1343, "mean_token_accuracy": 0.7773075819015502, "num_tokens": 1620419569.0, "step": 23990 }, { "entropy": 0.7456323817372322, "epoch": 7.27054457320306, "grad_norm": 0.2110702246427536, "learning_rate": 5.664727148553431e-05, "loss": 1.1402, "mean_token_accuracy": 0.7801220253109932, "num_tokens": 1621111439.0, "step": 24000 }, { "entropy": 0.7472523480653763, "epoch": 7.273574187684617, "grad_norm": 0.2149803340435028, "learning_rate": 5.661417293854917e-05, "loss": 1.1441, "mean_token_accuracy": 0.7775317147374153, "num_tokens": 1621787136.0, "step": 24010 }, { "entropy": 0.7425885871052742, "epoch": 7.276603802166174, "grad_norm": 0.21277956664562225, "learning_rate": 5.6581071441312006e-05, "loss": 1.1353, "mean_token_accuracy": 0.7793334111571312, "num_tokens": 1622470050.0, "step": 24020 }, { "entropy": 0.7456920474767685, "epoch": 7.279633416647732, "grad_norm": 0.2155601978302002, "learning_rate": 5.654796700858775e-05, "loss": 1.1425, "mean_token_accuracy": 0.7784823909401893, "num_tokens": 1623148353.0, "step": 24030 }, { "entropy": 0.7436433017253876, "epoch": 7.282663031129289, "grad_norm": 0.21758146584033966, "learning_rate": 5.651485965514267e-05, "loss": 1.1361, "mean_token_accuracy": 0.7788098677992821, "num_tokens": 1623814664.0, "step": 24040 }, { "entropy": 0.7383101120591163, "epoch": 7.285692645610846, "grad_norm": 0.22921079397201538, "learning_rate": 5.648174939574427e-05, "loss": 1.1392, "mean_token_accuracy": 0.7783874228596688, "num_tokens": 1624492342.0, "step": 24050 }, { "entropy": 0.7421487629413605, "epoch": 7.2887222600924035, "grad_norm": 0.21617472171783447, "learning_rate": 5.644863624516138e-05, "loss": 1.1454, "mean_token_accuracy": 0.7777203261852265, "num_tokens": 1625167670.0, "step": 24060 }, { "entropy": 0.7490338414907456, "epoch": 7.2917518745739605, "grad_norm": 0.20987454056739807, "learning_rate": 5.641552021816414e-05, "loss": 1.1464, "mean_token_accuracy": 0.777875579893589, "num_tokens": 1625851661.0, "step": 24070 }, { "entropy": 0.7433881223201751, "epoch": 7.294781489055517, "grad_norm": 0.21918809413909912, "learning_rate": 5.638240132952394e-05, "loss": 1.1417, "mean_token_accuracy": 0.7791951164603234, "num_tokens": 1626523825.0, "step": 24080 }, { "entropy": 0.7431725174188614, "epoch": 7.297811103537075, "grad_norm": 0.20071259140968323, "learning_rate": 5.634927959401346e-05, "loss": 1.1412, "mean_token_accuracy": 0.7779849901795387, "num_tokens": 1627191800.0, "step": 24090 }, { "entropy": 0.745476596057415, "epoch": 7.300840718018632, "grad_norm": 0.20968084037303925, "learning_rate": 5.6316155026406684e-05, "loss": 1.1435, "mean_token_accuracy": 0.7744548842310905, "num_tokens": 1627861146.0, "step": 24100 }, { "entropy": 0.7318308338522911, "epoch": 7.303870332500189, "grad_norm": 0.20997008681297302, "learning_rate": 5.628302764147877e-05, "loss": 1.1299, "mean_token_accuracy": 0.7816808238625527, "num_tokens": 1628548768.0, "step": 24110 }, { "entropy": 0.7426049247384071, "epoch": 7.306899946981747, "grad_norm": 0.2112835943698883, "learning_rate": 5.624989745400623e-05, "loss": 1.1378, "mean_token_accuracy": 0.779009498655796, "num_tokens": 1629220912.0, "step": 24120 }, { "entropy": 0.7388949811458587, "epoch": 7.309929561463304, "grad_norm": 0.21682319045066833, "learning_rate": 5.621676447876677e-05, "loss": 1.1365, "mean_token_accuracy": 0.7800767734646797, "num_tokens": 1629893582.0, "step": 24130 }, { "entropy": 0.7498345792293548, "epoch": 7.312959175944861, "grad_norm": 0.2141590118408203, "learning_rate": 5.618362873053937e-05, "loss": 1.1514, "mean_token_accuracy": 0.7738681003451348, "num_tokens": 1630561143.0, "step": 24140 }, { "entropy": 0.7501821503043175, "epoch": 7.3159887904264185, "grad_norm": 0.2280568927526474, "learning_rate": 5.61504902241042e-05, "loss": 1.1415, "mean_token_accuracy": 0.7810229748487473, "num_tokens": 1631233693.0, "step": 24150 }, { "entropy": 0.7449058368802071, "epoch": 7.319018404907975, "grad_norm": 0.22089658677577972, "learning_rate": 5.6117348974242724e-05, "loss": 1.1342, "mean_token_accuracy": 0.7829345911741257, "num_tokens": 1631927017.0, "step": 24160 }, { "entropy": 0.735488374531269, "epoch": 7.322048019389532, "grad_norm": 0.21309734880924225, "learning_rate": 5.6084204995737576e-05, "loss": 1.1332, "mean_token_accuracy": 0.7801588609814644, "num_tokens": 1632604646.0, "step": 24170 }, { "entropy": 0.745568074285984, "epoch": 7.32507763387109, "grad_norm": 0.21478989720344543, "learning_rate": 5.605105830337265e-05, "loss": 1.145, "mean_token_accuracy": 0.7778879448771476, "num_tokens": 1633286932.0, "step": 24180 }, { "entropy": 0.7399929359555244, "epoch": 7.328107248352647, "grad_norm": 0.20924386382102966, "learning_rate": 5.6017908911933004e-05, "loss": 1.1418, "mean_token_accuracy": 0.7734668895602226, "num_tokens": 1633944821.0, "step": 24190 }, { "entropy": 0.7454836443066597, "epoch": 7.331136862834204, "grad_norm": 0.21767070889472961, "learning_rate": 5.598475683620493e-05, "loss": 1.14, "mean_token_accuracy": 0.7767185181379318, "num_tokens": 1634613657.0, "step": 24200 }, { "entropy": 0.745055228471756, "epoch": 7.334166477315762, "grad_norm": 0.2201317697763443, "learning_rate": 5.595160209097593e-05, "loss": 1.1354, "mean_token_accuracy": 0.7779637157917023, "num_tokens": 1635293860.0, "step": 24210 }, { "entropy": 0.7456524521112442, "epoch": 7.337196091797319, "grad_norm": 0.202079176902771, "learning_rate": 5.591844469103467e-05, "loss": 1.1395, "mean_token_accuracy": 0.7798618689179421, "num_tokens": 1635992432.0, "step": 24220 }, { "entropy": 0.7374287351965905, "epoch": 7.340225706278876, "grad_norm": 0.22293561697006226, "learning_rate": 5.5885284651170986e-05, "loss": 1.139, "mean_token_accuracy": 0.7847616389393807, "num_tokens": 1636675788.0, "step": 24230 }, { "entropy": 0.7431089982390404, "epoch": 7.343255320760433, "grad_norm": 0.20412328839302063, "learning_rate": 5.585212198617596e-05, "loss": 1.1362, "mean_token_accuracy": 0.7805899128317833, "num_tokens": 1637367574.0, "step": 24240 }, { "entropy": 0.7460298150777817, "epoch": 7.34628493524199, "grad_norm": 0.21624484658241272, "learning_rate": 5.581895671084176e-05, "loss": 1.1359, "mean_token_accuracy": 0.7783966347575187, "num_tokens": 1638050741.0, "step": 24250 }, { "entropy": 0.7557017922401428, "epoch": 7.349314549723548, "grad_norm": 0.22091956436634064, "learning_rate": 5.5785788839961785e-05, "loss": 1.1445, "mean_token_accuracy": 0.7714565888047218, "num_tokens": 1638717200.0, "step": 24260 }, { "entropy": 0.7368078112602234, "epoch": 7.352344164205105, "grad_norm": 0.2095811516046524, "learning_rate": 5.575261838833054e-05, "loss": 1.1378, "mean_token_accuracy": 0.7802141949534416, "num_tokens": 1639401631.0, "step": 24270 }, { "entropy": 0.7564533114433288, "epoch": 7.355373778686662, "grad_norm": 0.2242221087217331, "learning_rate": 5.571944537074373e-05, "loss": 1.1382, "mean_token_accuracy": 0.7788906678557396, "num_tokens": 1640106395.0, "step": 24280 }, { "entropy": 0.737377966940403, "epoch": 7.358403393168219, "grad_norm": 0.20979861915111542, "learning_rate": 5.568626980199816e-05, "loss": 1.1301, "mean_token_accuracy": 0.7788542896509171, "num_tokens": 1640788308.0, "step": 24290 }, { "entropy": 0.7406774923205376, "epoch": 7.361433007649777, "grad_norm": 0.2215997725725174, "learning_rate": 5.565309169689179e-05, "loss": 1.1349, "mean_token_accuracy": 0.7774257019162178, "num_tokens": 1641460222.0, "step": 24300 }, { "entropy": 0.7524708792567253, "epoch": 7.364462622131334, "grad_norm": 0.21218158304691315, "learning_rate": 5.5619911070223764e-05, "loss": 1.144, "mean_token_accuracy": 0.7777711153030396, "num_tokens": 1642143261.0, "step": 24310 }, { "entropy": 0.7503455430269241, "epoch": 7.367492236612891, "grad_norm": 0.2106654793024063, "learning_rate": 5.5586727936794236e-05, "loss": 1.1411, "mean_token_accuracy": 0.776572035253048, "num_tokens": 1642817220.0, "step": 24320 }, { "entropy": 0.7529546201229096, "epoch": 7.370521851094448, "grad_norm": 0.20978443324565887, "learning_rate": 5.555354231140457e-05, "loss": 1.139, "mean_token_accuracy": 0.7750160679221153, "num_tokens": 1643490100.0, "step": 24330 }, { "entropy": 0.7439987063407898, "epoch": 7.373551465576005, "grad_norm": 0.21684180200099945, "learning_rate": 5.5520354208857215e-05, "loss": 1.1334, "mean_token_accuracy": 0.7768491759896279, "num_tokens": 1644171664.0, "step": 24340 }, { "entropy": 0.7514504194259644, "epoch": 7.376581080057563, "grad_norm": 0.2160317301750183, "learning_rate": 5.548716364395574e-05, "loss": 1.1439, "mean_token_accuracy": 0.7775610223412514, "num_tokens": 1644847717.0, "step": 24350 }, { "entropy": 0.7286875173449516, "epoch": 7.37961069453912, "grad_norm": 0.22281469404697418, "learning_rate": 5.545397063150477e-05, "loss": 1.1289, "mean_token_accuracy": 0.7795726001262665, "num_tokens": 1645514078.0, "step": 24360 }, { "entropy": 0.7473196431994438, "epoch": 7.382640309020677, "grad_norm": 0.22722819447517395, "learning_rate": 5.542077518631007e-05, "loss": 1.1409, "mean_token_accuracy": 0.7789425924420357, "num_tokens": 1646191594.0, "step": 24370 }, { "entropy": 0.744561393558979, "epoch": 7.385669923502235, "grad_norm": 0.2064792662858963, "learning_rate": 5.5387577323178464e-05, "loss": 1.1439, "mean_token_accuracy": 0.7743083328008652, "num_tokens": 1646855313.0, "step": 24380 }, { "entropy": 0.7324327319860459, "epoch": 7.388699537983792, "grad_norm": 0.21427488327026367, "learning_rate": 5.535437705691785e-05, "loss": 1.1281, "mean_token_accuracy": 0.7798612684011459, "num_tokens": 1647528294.0, "step": 24390 }, { "entropy": 0.7458065509796142, "epoch": 7.391729152465349, "grad_norm": 0.21999485790729523, "learning_rate": 5.532117440233722e-05, "loss": 1.1442, "mean_token_accuracy": 0.7761651277542114, "num_tokens": 1648194479.0, "step": 24400 }, { "entropy": 0.7495456725358963, "epoch": 7.394758766946906, "grad_norm": 0.2247866690158844, "learning_rate": 5.528796937424662e-05, "loss": 1.1481, "mean_token_accuracy": 0.7760565713047981, "num_tokens": 1648869091.0, "step": 24410 }, { "entropy": 0.7413565754890442, "epoch": 7.397788381428463, "grad_norm": 0.20716004073619843, "learning_rate": 5.525476198745714e-05, "loss": 1.1358, "mean_token_accuracy": 0.7809042319655418, "num_tokens": 1649549800.0, "step": 24420 }, { "entropy": 0.7377701357007027, "epoch": 7.40081799591002, "grad_norm": 0.22581028938293457, "learning_rate": 5.522155225678093e-05, "loss": 1.1299, "mean_token_accuracy": 0.7825725868344307, "num_tokens": 1650228436.0, "step": 24430 }, { "entropy": 0.7420076668262482, "epoch": 7.403847610391578, "grad_norm": 0.2277398556470871, "learning_rate": 5.518834019703122e-05, "loss": 1.1381, "mean_token_accuracy": 0.7727554067969322, "num_tokens": 1650880381.0, "step": 24440 }, { "entropy": 0.7543826818466186, "epoch": 7.406877224873135, "grad_norm": 0.21878106892108917, "learning_rate": 5.515512582302224e-05, "loss": 1.1432, "mean_token_accuracy": 0.7753280624747276, "num_tokens": 1651559161.0, "step": 24450 }, { "entropy": 0.7435263082385063, "epoch": 7.409906839354692, "grad_norm": 0.22242344915866852, "learning_rate": 5.5121909149569237e-05, "loss": 1.1346, "mean_token_accuracy": 0.7759518295526504, "num_tokens": 1652237914.0, "step": 24460 }, { "entropy": 0.7520292147994041, "epoch": 7.41293645383625, "grad_norm": 0.2094714492559433, "learning_rate": 5.508869019148855e-05, "loss": 1.138, "mean_token_accuracy": 0.7758851468563079, "num_tokens": 1652908954.0, "step": 24470 }, { "entropy": 0.7410292357206345, "epoch": 7.415966068317807, "grad_norm": 0.2083895057439804, "learning_rate": 5.505546896359747e-05, "loss": 1.1304, "mean_token_accuracy": 0.786678695678711, "num_tokens": 1653619377.0, "step": 24480 }, { "entropy": 0.7511360585689545, "epoch": 7.4189956827993635, "grad_norm": 0.21662305295467377, "learning_rate": 5.502224548071433e-05, "loss": 1.1489, "mean_token_accuracy": 0.7749097287654877, "num_tokens": 1654289661.0, "step": 24490 }, { "entropy": 0.7482529670000077, "epoch": 7.422025297280921, "grad_norm": 0.21172094345092773, "learning_rate": 5.4989019757658466e-05, "loss": 1.1416, "mean_token_accuracy": 0.7741121739149094, "num_tokens": 1654963488.0, "step": 24500 }, { "entropy": 0.7441295340657235, "epoch": 7.425054911762478, "grad_norm": 0.21113701164722443, "learning_rate": 5.4955791809250204e-05, "loss": 1.1447, "mean_token_accuracy": 0.7777657687664032, "num_tokens": 1655627965.0, "step": 24510 }, { "entropy": 0.7464437022805214, "epoch": 7.428084526244035, "grad_norm": 0.21303771436214447, "learning_rate": 5.49225616503109e-05, "loss": 1.1352, "mean_token_accuracy": 0.7807260543107987, "num_tokens": 1656316403.0, "step": 24520 }, { "entropy": 0.7457913666963577, "epoch": 7.431114140725593, "grad_norm": 0.2176608294248581, "learning_rate": 5.488932929566283e-05, "loss": 1.1508, "mean_token_accuracy": 0.7762917503714561, "num_tokens": 1656984317.0, "step": 24530 }, { "entropy": 0.7491842567920685, "epoch": 7.43414375520715, "grad_norm": 0.21883437037467957, "learning_rate": 5.485609476012931e-05, "loss": 1.1454, "mean_token_accuracy": 0.7750205948948861, "num_tokens": 1657648557.0, "step": 24540 }, { "entropy": 0.7478434219956398, "epoch": 7.437173369688707, "grad_norm": 0.22971990704536438, "learning_rate": 5.482285805853461e-05, "loss": 1.1351, "mean_token_accuracy": 0.7778780668973923, "num_tokens": 1658330995.0, "step": 24550 }, { "entropy": 0.7507547929883003, "epoch": 7.440202984170265, "grad_norm": 0.22184528410434723, "learning_rate": 5.478961920570394e-05, "loss": 1.1445, "mean_token_accuracy": 0.7745205998420716, "num_tokens": 1658996530.0, "step": 24560 }, { "entropy": 0.7461107209324837, "epoch": 7.4432325986518215, "grad_norm": 0.2146303355693817, "learning_rate": 5.47563782164635e-05, "loss": 1.1437, "mean_token_accuracy": 0.777275325357914, "num_tokens": 1659671191.0, "step": 24570 }, { "entropy": 0.7430723607540131, "epoch": 7.446262213133378, "grad_norm": 0.20267100632190704, "learning_rate": 5.4723135105640445e-05, "loss": 1.1381, "mean_token_accuracy": 0.7795214995741844, "num_tokens": 1660352543.0, "step": 24580 }, { "entropy": 0.7431056842207908, "epoch": 7.449291827614936, "grad_norm": 0.21331390738487244, "learning_rate": 5.4689889888062853e-05, "loss": 1.1363, "mean_token_accuracy": 0.7799687758088112, "num_tokens": 1661034375.0, "step": 24590 }, { "entropy": 0.7397371128201484, "epoch": 7.452321442096493, "grad_norm": 0.20924900472164154, "learning_rate": 5.465664257855976e-05, "loss": 1.1274, "mean_token_accuracy": 0.7805151045322418, "num_tokens": 1661726134.0, "step": 24600 }, { "entropy": 0.7526766210794449, "epoch": 7.45535105657805, "grad_norm": 0.21683505177497864, "learning_rate": 5.462339319196112e-05, "loss": 1.1512, "mean_token_accuracy": 0.7739051789045334, "num_tokens": 1662399680.0, "step": 24610 }, { "entropy": 0.7580562621355057, "epoch": 7.458380671059608, "grad_norm": 0.2254616767168045, "learning_rate": 5.459014174309785e-05, "loss": 1.1496, "mean_token_accuracy": 0.7714251548051834, "num_tokens": 1663063774.0, "step": 24620 }, { "entropy": 0.7435199916362762, "epoch": 7.461410285541165, "grad_norm": 0.20744958519935608, "learning_rate": 5.455688824680173e-05, "loss": 1.1344, "mean_token_accuracy": 0.7797993645071983, "num_tokens": 1663755581.0, "step": 24630 }, { "entropy": 0.7406926706433297, "epoch": 7.464439900022722, "grad_norm": 0.21881410479545593, "learning_rate": 5.452363271790548e-05, "loss": 1.1365, "mean_token_accuracy": 0.7793450295925141, "num_tokens": 1664433891.0, "step": 24640 }, { "entropy": 0.7477216809988022, "epoch": 7.4674695145042795, "grad_norm": 0.21395400166511536, "learning_rate": 5.449037517124276e-05, "loss": 1.1405, "mean_token_accuracy": 0.7774820044636727, "num_tokens": 1665111657.0, "step": 24650 }, { "entropy": 0.747513872385025, "epoch": 7.4704991289858365, "grad_norm": 0.22176103293895721, "learning_rate": 5.445711562164809e-05, "loss": 1.1404, "mean_token_accuracy": 0.7792335256934166, "num_tokens": 1665799335.0, "step": 24660 }, { "entropy": 0.7383706942200661, "epoch": 7.473528743467393, "grad_norm": 0.210130512714386, "learning_rate": 5.442385408395686e-05, "loss": 1.1351, "mean_token_accuracy": 0.7739451065659523, "num_tokens": 1666463124.0, "step": 24670 }, { "entropy": 0.7512979134917259, "epoch": 7.476558357948951, "grad_norm": 0.201966792345047, "learning_rate": 5.4390590573005415e-05, "loss": 1.1382, "mean_token_accuracy": 0.7743869826197625, "num_tokens": 1667141456.0, "step": 24680 }, { "entropy": 0.7560889542102813, "epoch": 7.479587972430508, "grad_norm": 0.22573871910572052, "learning_rate": 5.435732510363094e-05, "loss": 1.1475, "mean_token_accuracy": 0.7724868059158325, "num_tokens": 1667821834.0, "step": 24690 }, { "entropy": 0.7346357598900795, "epoch": 7.482617586912065, "grad_norm": 0.2244092971086502, "learning_rate": 5.4324057690671485e-05, "loss": 1.14, "mean_token_accuracy": 0.7774963542819023, "num_tokens": 1668492166.0, "step": 24700 }, { "entropy": 0.7364307418465614, "epoch": 7.485647201393623, "grad_norm": 0.22128066420555115, "learning_rate": 5.429078834896598e-05, "loss": 1.1353, "mean_token_accuracy": 0.7787325322628021, "num_tokens": 1669155086.0, "step": 24710 }, { "entropy": 0.7441545367240906, "epoch": 7.48867681587518, "grad_norm": 0.2105528563261032, "learning_rate": 5.4257517093354246e-05, "loss": 1.1442, "mean_token_accuracy": 0.7805730924010277, "num_tokens": 1669837761.0, "step": 24720 }, { "entropy": 0.7354769557714462, "epoch": 7.491706430356738, "grad_norm": 0.21838191151618958, "learning_rate": 5.422424393867689e-05, "loss": 1.1341, "mean_token_accuracy": 0.7799891889095306, "num_tokens": 1670510937.0, "step": 24730 }, { "entropy": 0.746819120645523, "epoch": 7.4947360448382945, "grad_norm": 0.20795127749443054, "learning_rate": 5.419096889977542e-05, "loss": 1.1453, "mean_token_accuracy": 0.7764046773314476, "num_tokens": 1671187171.0, "step": 24740 }, { "entropy": 0.7433664634823799, "epoch": 7.497765659319851, "grad_norm": 0.21214525401592255, "learning_rate": 5.4157691991492174e-05, "loss": 1.1427, "mean_token_accuracy": 0.7786703392863273, "num_tokens": 1671866647.0, "step": 24750 }, { "entropy": 0.7340948358178139, "epoch": 7.500795273801408, "grad_norm": 0.22363007068634033, "learning_rate": 5.412441322867031e-05, "loss": 1.1377, "mean_token_accuracy": 0.7721888646483421, "num_tokens": 1672519297.0, "step": 24760 }, { "entropy": 0.7401010930538178, "epoch": 7.503824888282966, "grad_norm": 0.21117638051509857, "learning_rate": 5.409113262615381e-05, "loss": 1.1263, "mean_token_accuracy": 0.7808028668165207, "num_tokens": 1673211238.0, "step": 24770 }, { "entropy": 0.7396194770932197, "epoch": 7.506854502764523, "grad_norm": 0.22600512206554413, "learning_rate": 5.4057850198787506e-05, "loss": 1.1395, "mean_token_accuracy": 0.7758064150810242, "num_tokens": 1673869540.0, "step": 24780 }, { "entropy": 0.7452031940221786, "epoch": 7.509884117246081, "grad_norm": 0.21701331436634064, "learning_rate": 5.402456596141703e-05, "loss": 1.1528, "mean_token_accuracy": 0.7733472928404808, "num_tokens": 1674527406.0, "step": 24790 }, { "entropy": 0.734237490594387, "epoch": 7.512913731727638, "grad_norm": 0.22200846672058105, "learning_rate": 5.399127992888878e-05, "loss": 1.1346, "mean_token_accuracy": 0.7834853321313858, "num_tokens": 1675208503.0, "step": 24800 }, { "entropy": 0.7524815320968627, "epoch": 7.515943346209195, "grad_norm": 0.21497096121311188, "learning_rate": 5.395799211605004e-05, "loss": 1.1439, "mean_token_accuracy": 0.7742198213934899, "num_tokens": 1675893075.0, "step": 24810 }, { "entropy": 0.7457500502467156, "epoch": 7.518972960690752, "grad_norm": 0.2262214720249176, "learning_rate": 5.3924702537748815e-05, "loss": 1.1449, "mean_token_accuracy": 0.7760716915130615, "num_tokens": 1676569241.0, "step": 24820 }, { "entropy": 0.740559920668602, "epoch": 7.522002575172309, "grad_norm": 0.20782777667045593, "learning_rate": 5.389141120883392e-05, "loss": 1.1446, "mean_token_accuracy": 0.7764463156461716, "num_tokens": 1677244238.0, "step": 24830 }, { "entropy": 0.7468500062823296, "epoch": 7.525032189653866, "grad_norm": 0.21614263951778412, "learning_rate": 5.385811814415496e-05, "loss": 1.1408, "mean_token_accuracy": 0.7774775564670563, "num_tokens": 1677921825.0, "step": 24840 }, { "entropy": 0.7555354133248329, "epoch": 7.528061804135424, "grad_norm": 0.23248232901096344, "learning_rate": 5.382482335856229e-05, "loss": 1.1413, "mean_token_accuracy": 0.7761192858219147, "num_tokens": 1678606445.0, "step": 24850 }, { "entropy": 0.7485510841012001, "epoch": 7.531091418616981, "grad_norm": 0.22634170949459076, "learning_rate": 5.3791526866907084e-05, "loss": 1.1472, "mean_token_accuracy": 0.7722590655088425, "num_tokens": 1679271499.0, "step": 24860 }, { "entropy": 0.7513169005513192, "epoch": 7.534121033098538, "grad_norm": 0.21921472251415253, "learning_rate": 5.375822868404121e-05, "loss": 1.1413, "mean_token_accuracy": 0.7759198114275933, "num_tokens": 1679950085.0, "step": 24870 }, { "entropy": 0.7495874270796776, "epoch": 7.537150647580096, "grad_norm": 0.2265593707561493, "learning_rate": 5.372492882481734e-05, "loss": 1.1484, "mean_token_accuracy": 0.7736220329999923, "num_tokens": 1680618916.0, "step": 24880 }, { "entropy": 0.7537253230810166, "epoch": 7.540180262061653, "grad_norm": 0.20332907140254974, "learning_rate": 5.369162730408889e-05, "loss": 1.1435, "mean_token_accuracy": 0.7734320342540741, "num_tokens": 1681285210.0, "step": 24890 }, { "entropy": 0.7369566798210144, "epoch": 7.54320987654321, "grad_norm": 0.2314373254776001, "learning_rate": 5.365832413670997e-05, "loss": 1.1266, "mean_token_accuracy": 0.7839697256684304, "num_tokens": 1681979698.0, "step": 24900 }, { "entropy": 0.7459864601492882, "epoch": 7.546239491024767, "grad_norm": 0.22535762190818787, "learning_rate": 5.3625019337535475e-05, "loss": 1.1399, "mean_token_accuracy": 0.7763787895441056, "num_tokens": 1682648758.0, "step": 24910 }, { "entropy": 0.7416107371449471, "epoch": 7.549269105506324, "grad_norm": 0.21356399357318878, "learning_rate": 5.3591712921421024e-05, "loss": 1.1379, "mean_token_accuracy": 0.7756614923477173, "num_tokens": 1683314524.0, "step": 24920 }, { "entropy": 0.7331911876797677, "epoch": 7.552298719987881, "grad_norm": 0.2103649228811264, "learning_rate": 5.355840490322293e-05, "loss": 1.1323, "mean_token_accuracy": 0.7800414964556694, "num_tokens": 1683995587.0, "step": 24930 }, { "entropy": 0.7561028689146042, "epoch": 7.555328334469439, "grad_norm": 0.21475647389888763, "learning_rate": 5.352509529779823e-05, "loss": 1.1496, "mean_token_accuracy": 0.7750357866287232, "num_tokens": 1684671456.0, "step": 24940 }, { "entropy": 0.7422101736068726, "epoch": 7.558357948950996, "grad_norm": 0.21627213060855865, "learning_rate": 5.3491784120004685e-05, "loss": 1.1407, "mean_token_accuracy": 0.776326359808445, "num_tokens": 1685338459.0, "step": 24950 }, { "entropy": 0.7497922003269195, "epoch": 7.561387563432553, "grad_norm": 0.22639530897140503, "learning_rate": 5.345847138470077e-05, "loss": 1.1428, "mean_token_accuracy": 0.7732251092791558, "num_tokens": 1685995806.0, "step": 24960 }, { "entropy": 0.7480155780911446, "epoch": 7.564417177914111, "grad_norm": 0.22280606627464294, "learning_rate": 5.342515710674558e-05, "loss": 1.1517, "mean_token_accuracy": 0.7730533495545387, "num_tokens": 1686646088.0, "step": 24970 }, { "entropy": 0.7508288726210595, "epoch": 7.567446792395668, "grad_norm": 0.21140004694461823, "learning_rate": 5.339184130099898e-05, "loss": 1.1407, "mean_token_accuracy": 0.7772667378187179, "num_tokens": 1687332806.0, "step": 24980 }, { "entropy": 0.7477628767490387, "epoch": 7.570476406877225, "grad_norm": 0.24616877734661102, "learning_rate": 5.3358523982321494e-05, "loss": 1.1394, "mean_token_accuracy": 0.7770170167088508, "num_tokens": 1688018557.0, "step": 24990 }, { "entropy": 0.7574859589338303, "epoch": 7.573506021358782, "grad_norm": 0.21998058259487152, "learning_rate": 5.3325205165574297e-05, "loss": 1.153, "mean_token_accuracy": 0.7750434413552284, "num_tokens": 1688703140.0, "step": 25000 }, { "entropy": 0.7477376744151115, "epoch": 7.576535635840339, "grad_norm": 0.21323354542255402, "learning_rate": 5.3291884865619256e-05, "loss": 1.1402, "mean_token_accuracy": 0.7790637165307999, "num_tokens": 1689382179.0, "step": 25010 }, { "entropy": 0.7436079770326615, "epoch": 7.579565250321896, "grad_norm": 0.20804129540920258, "learning_rate": 5.325856309731888e-05, "loss": 1.1383, "mean_token_accuracy": 0.7806669667363166, "num_tokens": 1690076304.0, "step": 25020 }, { "entropy": 0.7412419065833091, "epoch": 7.582594864803454, "grad_norm": 0.22202837467193604, "learning_rate": 5.322523987553636e-05, "loss": 1.1364, "mean_token_accuracy": 0.7770675376057625, "num_tokens": 1690744702.0, "step": 25030 }, { "entropy": 0.746268455684185, "epoch": 7.585624479285011, "grad_norm": 0.21350513398647308, "learning_rate": 5.31919152151355e-05, "loss": 1.1254, "mean_token_accuracy": 0.7802624836564064, "num_tokens": 1691441533.0, "step": 25040 }, { "entropy": 0.7400405585765839, "epoch": 7.588654093766568, "grad_norm": 0.21588623523712158, "learning_rate": 5.3158589130980783e-05, "loss": 1.1372, "mean_token_accuracy": 0.778198529779911, "num_tokens": 1692124994.0, "step": 25050 }, { "entropy": 0.7503207892179489, "epoch": 7.591683708248126, "grad_norm": 0.21895559132099152, "learning_rate": 5.312526163793732e-05, "loss": 1.1448, "mean_token_accuracy": 0.7747712373733521, "num_tokens": 1692796826.0, "step": 25060 }, { "entropy": 0.7457394242286682, "epoch": 7.594713322729683, "grad_norm": 0.21419158577919006, "learning_rate": 5.309193275087081e-05, "loss": 1.1391, "mean_token_accuracy": 0.7781403362751007, "num_tokens": 1693479871.0, "step": 25070 }, { "entropy": 0.7462506055831909, "epoch": 7.5977429372112395, "grad_norm": 0.21640583872795105, "learning_rate": 5.305860248464761e-05, "loss": 1.1383, "mean_token_accuracy": 0.777347669005394, "num_tokens": 1694158507.0, "step": 25080 }, { "entropy": 0.7491141736507416, "epoch": 7.600772551692797, "grad_norm": 0.21499212086200714, "learning_rate": 5.30252708541347e-05, "loss": 1.1479, "mean_token_accuracy": 0.7724004551768303, "num_tokens": 1694816905.0, "step": 25090 }, { "entropy": 0.7444032818078995, "epoch": 7.603802166174354, "grad_norm": 0.23837362229824066, "learning_rate": 5.2991937874199657e-05, "loss": 1.1406, "mean_token_accuracy": 0.7754931151866913, "num_tokens": 1695488226.0, "step": 25100 }, { "entropy": 0.7473259896039963, "epoch": 7.606831780655911, "grad_norm": 0.21419358253479004, "learning_rate": 5.295860355971065e-05, "loss": 1.136, "mean_token_accuracy": 0.7758742675185204, "num_tokens": 1696152897.0, "step": 25110 }, { "entropy": 0.7429569244384766, "epoch": 7.609861395137469, "grad_norm": 0.2154773473739624, "learning_rate": 5.292526792553645e-05, "loss": 1.1444, "mean_token_accuracy": 0.77743329256773, "num_tokens": 1696824855.0, "step": 25120 }, { "entropy": 0.7521555483341217, "epoch": 7.612891009619026, "grad_norm": 0.21548032760620117, "learning_rate": 5.2891930986546426e-05, "loss": 1.1434, "mean_token_accuracy": 0.7764998629689217, "num_tokens": 1697505906.0, "step": 25130 }, { "entropy": 0.746573394536972, "epoch": 7.615920624100584, "grad_norm": 0.20502008497714996, "learning_rate": 5.2858592757610494e-05, "loss": 1.1446, "mean_token_accuracy": 0.773410850763321, "num_tokens": 1698165815.0, "step": 25140 }, { "entropy": 0.7367995575070381, "epoch": 7.618950238582141, "grad_norm": 0.2106497585773468, "learning_rate": 5.282525325359919e-05, "loss": 1.138, "mean_token_accuracy": 0.7751655489206314, "num_tokens": 1698825451.0, "step": 25150 }, { "entropy": 0.7473216980695725, "epoch": 7.6219798530636975, "grad_norm": 0.2234392911195755, "learning_rate": 5.2791912489383624e-05, "loss": 1.1377, "mean_token_accuracy": 0.7727016642689705, "num_tokens": 1699495216.0, "step": 25160 }, { "entropy": 0.7424630865454673, "epoch": 7.625009467545254, "grad_norm": 0.22373464703559875, "learning_rate": 5.27585704798354e-05, "loss": 1.1395, "mean_token_accuracy": 0.7784733980894089, "num_tokens": 1700167107.0, "step": 25170 }, { "entropy": 0.7530706122517585, "epoch": 7.628039082026812, "grad_norm": 0.22579248249530792, "learning_rate": 5.2725227239826756e-05, "loss": 1.1412, "mean_token_accuracy": 0.776701420545578, "num_tokens": 1700853133.0, "step": 25180 }, { "entropy": 0.7394076824188233, "epoch": 7.631068696508369, "grad_norm": 0.23015308380126953, "learning_rate": 5.269188278423043e-05, "loss": 1.1262, "mean_token_accuracy": 0.7838101804256439, "num_tokens": 1701546542.0, "step": 25190 }, { "entropy": 0.739519964158535, "epoch": 7.634098310989927, "grad_norm": 0.2184506356716156, "learning_rate": 5.265853712791973e-05, "loss": 1.1408, "mean_token_accuracy": 0.7789064064621926, "num_tokens": 1702224022.0, "step": 25200 }, { "entropy": 0.7475892812013626, "epoch": 7.637127925471484, "grad_norm": 0.2101866602897644, "learning_rate": 5.2625190285768464e-05, "loss": 1.1338, "mean_token_accuracy": 0.7773853868246079, "num_tokens": 1702904070.0, "step": 25210 }, { "entropy": 0.7507322892546654, "epoch": 7.640157539953041, "grad_norm": 0.20114384591579437, "learning_rate": 5.2591842272651003e-05, "loss": 1.1473, "mean_token_accuracy": 0.7686016082763671, "num_tokens": 1703561907.0, "step": 25220 }, { "entropy": 0.7429238423705101, "epoch": 7.643187154434598, "grad_norm": 0.22367319464683533, "learning_rate": 5.255849310344224e-05, "loss": 1.1411, "mean_token_accuracy": 0.7787930741906166, "num_tokens": 1704247823.0, "step": 25230 }, { "entropy": 0.7425561651587487, "epoch": 7.6462167689161555, "grad_norm": 0.2082562893629074, "learning_rate": 5.252514279301756e-05, "loss": 1.1359, "mean_token_accuracy": 0.7764569118618965, "num_tokens": 1704917401.0, "step": 25240 }, { "entropy": 0.7391936540603637, "epoch": 7.6492463833977125, "grad_norm": 0.21968869864940643, "learning_rate": 5.2491791356252876e-05, "loss": 1.1344, "mean_token_accuracy": 0.7789005756378173, "num_tokens": 1705586357.0, "step": 25250 }, { "entropy": 0.7499315455555916, "epoch": 7.65227599787927, "grad_norm": 0.2282896488904953, "learning_rate": 5.2458438808024576e-05, "loss": 1.141, "mean_token_accuracy": 0.7771964773535729, "num_tokens": 1706262520.0, "step": 25260 }, { "entropy": 0.7449300393462182, "epoch": 7.655305612360827, "grad_norm": 0.20787255465984344, "learning_rate": 5.242508516320962e-05, "loss": 1.1449, "mean_token_accuracy": 0.7738001808524132, "num_tokens": 1706932425.0, "step": 25270 }, { "entropy": 0.734488981962204, "epoch": 7.658335226842384, "grad_norm": 0.21595390141010284, "learning_rate": 5.239173043668534e-05, "loss": 1.1319, "mean_token_accuracy": 0.7872363120317459, "num_tokens": 1707629153.0, "step": 25280 }, { "entropy": 0.7480002358555794, "epoch": 7.661364841323942, "grad_norm": 0.22453685104846954, "learning_rate": 5.2358374643329633e-05, "loss": 1.15, "mean_token_accuracy": 0.7736384928226471, "num_tokens": 1708291373.0, "step": 25290 }, { "entropy": 0.7501042276620865, "epoch": 7.664394455805499, "grad_norm": 0.21583005785942078, "learning_rate": 5.232501779802088e-05, "loss": 1.1425, "mean_token_accuracy": 0.7768845707178116, "num_tokens": 1708973385.0, "step": 25300 }, { "entropy": 0.742583692073822, "epoch": 7.667424070287056, "grad_norm": 0.2266470193862915, "learning_rate": 5.2291659915637866e-05, "loss": 1.1414, "mean_token_accuracy": 0.7782695710659027, "num_tokens": 1709653473.0, "step": 25310 }, { "entropy": 0.7440670862793922, "epoch": 7.6704536847686136, "grad_norm": 0.21403197944164276, "learning_rate": 5.225830101105988e-05, "loss": 1.14, "mean_token_accuracy": 0.7751417070627212, "num_tokens": 1710319434.0, "step": 25320 }, { "entropy": 0.753742316365242, "epoch": 7.6734832992501705, "grad_norm": 0.2077597677707672, "learning_rate": 5.22249410991667e-05, "loss": 1.1462, "mean_token_accuracy": 0.7745224148035049, "num_tokens": 1711012533.0, "step": 25330 }, { "entropy": 0.743423281610012, "epoch": 7.676512913731727, "grad_norm": 0.2180161029100418, "learning_rate": 5.2191580194838464e-05, "loss": 1.1358, "mean_token_accuracy": 0.7788239121437073, "num_tokens": 1711691176.0, "step": 25340 }, { "entropy": 0.7364968597888947, "epoch": 7.679542528213285, "grad_norm": 0.222105473279953, "learning_rate": 5.2158218312955845e-05, "loss": 1.1376, "mean_token_accuracy": 0.7743668854236603, "num_tokens": 1712351284.0, "step": 25350 }, { "entropy": 0.7402329221367836, "epoch": 7.682572142694842, "grad_norm": 0.2200395166873932, "learning_rate": 5.212485546839987e-05, "loss": 1.144, "mean_token_accuracy": 0.7772669464349746, "num_tokens": 1713015717.0, "step": 25360 }, { "entropy": 0.736244784295559, "epoch": 7.685601757176399, "grad_norm": 0.22047896683216095, "learning_rate": 5.209149167605207e-05, "loss": 1.1338, "mean_token_accuracy": 0.7799585655331611, "num_tokens": 1713692961.0, "step": 25370 }, { "entropy": 0.7455885216593743, "epoch": 7.688631371657957, "grad_norm": 0.21278539299964905, "learning_rate": 5.205812695079436e-05, "loss": 1.1394, "mean_token_accuracy": 0.7775056421756744, "num_tokens": 1714366935.0, "step": 25380 }, { "entropy": 0.743017515540123, "epoch": 7.691660986139514, "grad_norm": 0.20847824215888977, "learning_rate": 5.202476130750906e-05, "loss": 1.1231, "mean_token_accuracy": 0.7809359863400459, "num_tokens": 1715048623.0, "step": 25390 }, { "entropy": 0.7346621245145798, "epoch": 7.694690600621071, "grad_norm": 0.20856250822544098, "learning_rate": 5.1991394761078946e-05, "loss": 1.1331, "mean_token_accuracy": 0.7845747739076614, "num_tokens": 1715741602.0, "step": 25400 }, { "entropy": 0.7453661963343621, "epoch": 7.6977202151026285, "grad_norm": 0.2149330973625183, "learning_rate": 5.195802732638714e-05, "loss": 1.1363, "mean_token_accuracy": 0.7745748803019523, "num_tokens": 1716413572.0, "step": 25410 }, { "entropy": 0.7401582688093186, "epoch": 7.700749829584185, "grad_norm": 0.2142152488231659, "learning_rate": 5.192465901831718e-05, "loss": 1.1314, "mean_token_accuracy": 0.7795200228691102, "num_tokens": 1717095588.0, "step": 25420 }, { "entropy": 0.7450745359063149, "epoch": 7.703779444065742, "grad_norm": 0.22123929858207703, "learning_rate": 5.1891289851753034e-05, "loss": 1.1394, "mean_token_accuracy": 0.7832079187035561, "num_tokens": 1717803940.0, "step": 25430 }, { "entropy": 0.7458594724535942, "epoch": 7.7068090585473, "grad_norm": 0.21973532438278198, "learning_rate": 5.1857919841579006e-05, "loss": 1.1419, "mean_token_accuracy": 0.7753015145659446, "num_tokens": 1718474653.0, "step": 25440 }, { "entropy": 0.7485863000154496, "epoch": 7.709838673028857, "grad_norm": 0.22180785238742828, "learning_rate": 5.182454900267977e-05, "loss": 1.1425, "mean_token_accuracy": 0.7791336089372635, "num_tokens": 1719157344.0, "step": 25450 }, { "entropy": 0.7507815167307854, "epoch": 7.712868287510414, "grad_norm": 0.2153090238571167, "learning_rate": 5.179117734994041e-05, "loss": 1.1458, "mean_token_accuracy": 0.7759419962763786, "num_tokens": 1719836087.0, "step": 25460 }, { "entropy": 0.7442035615444184, "epoch": 7.715897901991972, "grad_norm": 0.2220461368560791, "learning_rate": 5.1757804898246354e-05, "loss": 1.1401, "mean_token_accuracy": 0.7786268338561058, "num_tokens": 1720520273.0, "step": 25470 }, { "entropy": 0.7401345342397689, "epoch": 7.718927516473529, "grad_norm": 0.2081901878118515, "learning_rate": 5.1724431662483374e-05, "loss": 1.1348, "mean_token_accuracy": 0.7731791481375694, "num_tokens": 1721182612.0, "step": 25480 }, { "entropy": 0.7447274580597878, "epoch": 7.721957130955086, "grad_norm": 0.217488631606102, "learning_rate": 5.169105765753761e-05, "loss": 1.1467, "mean_token_accuracy": 0.776178726553917, "num_tokens": 1721857650.0, "step": 25490 }, { "entropy": 0.7468136623501778, "epoch": 7.724986745436643, "grad_norm": 0.2096599042415619, "learning_rate": 5.1657682898295546e-05, "loss": 1.1353, "mean_token_accuracy": 0.7728841468691826, "num_tokens": 1722529175.0, "step": 25500 }, { "entropy": 0.7344526037573814, "epoch": 7.7280163599182, "grad_norm": 0.2137104570865631, "learning_rate": 5.162430739964397e-05, "loss": 1.1354, "mean_token_accuracy": 0.7796164557337761, "num_tokens": 1723209381.0, "step": 25510 }, { "entropy": 0.7481473803520202, "epoch": 7.731045974399757, "grad_norm": 0.22086377441883087, "learning_rate": 5.159093117647004e-05, "loss": 1.1525, "mean_token_accuracy": 0.7749302208423614, "num_tokens": 1723880355.0, "step": 25520 }, { "entropy": 0.7484655126929283, "epoch": 7.734075588881315, "grad_norm": 0.22618655860424042, "learning_rate": 5.155755424366122e-05, "loss": 1.139, "mean_token_accuracy": 0.7727179780602456, "num_tokens": 1724542211.0, "step": 25530 }, { "entropy": 0.7443579941987991, "epoch": 7.737105203362872, "grad_norm": 0.21905077993869781, "learning_rate": 5.152417661610529e-05, "loss": 1.1343, "mean_token_accuracy": 0.7727830931544304, "num_tokens": 1725198559.0, "step": 25540 }, { "entropy": 0.7451524212956429, "epoch": 7.740134817844429, "grad_norm": 0.2269979566335678, "learning_rate": 5.149079830869035e-05, "loss": 1.1447, "mean_token_accuracy": 0.7765819162130356, "num_tokens": 1725871918.0, "step": 25550 }, { "entropy": 0.7532629147171974, "epoch": 7.743164432325987, "grad_norm": 0.2097410261631012, "learning_rate": 5.145741933630477e-05, "loss": 1.1443, "mean_token_accuracy": 0.7739479586482048, "num_tokens": 1726548865.0, "step": 25560 }, { "entropy": 0.7375313773751259, "epoch": 7.746194046807544, "grad_norm": 0.22446924448013306, "learning_rate": 5.142403971383727e-05, "loss": 1.138, "mean_token_accuracy": 0.7751024857163429, "num_tokens": 1727207165.0, "step": 25570 }, { "entropy": 0.7458680495619774, "epoch": 7.749223661289101, "grad_norm": 0.22164654731750488, "learning_rate": 5.1390659456176814e-05, "loss": 1.1455, "mean_token_accuracy": 0.7725724175572395, "num_tokens": 1727860610.0, "step": 25580 }, { "entropy": 0.7698509722948075, "epoch": 7.752253275770658, "grad_norm": 0.22158177196979523, "learning_rate": 5.135727857821267e-05, "loss": 1.1567, "mean_token_accuracy": 0.7673335209488868, "num_tokens": 1728532118.0, "step": 25590 }, { "entropy": 0.7348514199256897, "epoch": 7.755282890252215, "grad_norm": 0.1981198638677597, "learning_rate": 5.132389709483438e-05, "loss": 1.1375, "mean_token_accuracy": 0.780041366815567, "num_tokens": 1729205410.0, "step": 25600 }, { "entropy": 0.7483593672513962, "epoch": 7.758312504733773, "grad_norm": 0.21139384806156158, "learning_rate": 5.129051502093177e-05, "loss": 1.1462, "mean_token_accuracy": 0.7759598404169082, "num_tokens": 1729875927.0, "step": 25610 }, { "entropy": 0.7465812742710114, "epoch": 7.76134211921533, "grad_norm": 0.2231806516647339, "learning_rate": 5.12571323713949e-05, "loss": 1.1429, "mean_token_accuracy": 0.7714732691645623, "num_tokens": 1730528703.0, "step": 25620 }, { "entropy": 0.7381371155381202, "epoch": 7.764371733696887, "grad_norm": 0.21462588012218475, "learning_rate": 5.12237491611141e-05, "loss": 1.1367, "mean_token_accuracy": 0.7785015001893043, "num_tokens": 1731206259.0, "step": 25630 }, { "entropy": 0.759087823331356, "epoch": 7.767401348178444, "grad_norm": 0.21570007503032684, "learning_rate": 5.119036540497996e-05, "loss": 1.1529, "mean_token_accuracy": 0.7716951355338096, "num_tokens": 1731882044.0, "step": 25640 }, { "entropy": 0.7455698773264885, "epoch": 7.770430962660002, "grad_norm": 0.21832765638828278, "learning_rate": 5.1156981117883296e-05, "loss": 1.1406, "mean_token_accuracy": 0.7762769013643265, "num_tokens": 1732567092.0, "step": 25650 }, { "entropy": 0.7408061742782592, "epoch": 7.773460577141559, "grad_norm": 0.21853670477867126, "learning_rate": 5.1123596314715175e-05, "loss": 1.1315, "mean_token_accuracy": 0.7758067131042481, "num_tokens": 1733240134.0, "step": 25660 }, { "entropy": 0.7455203846096993, "epoch": 7.776490191623116, "grad_norm": 0.21512551605701447, "learning_rate": 5.1090211010366895e-05, "loss": 1.1435, "mean_token_accuracy": 0.7802461013197899, "num_tokens": 1733928793.0, "step": 25670 }, { "entropy": 0.7461658477783203, "epoch": 7.779519806104673, "grad_norm": 0.2122987061738968, "learning_rate": 5.1056825219729966e-05, "loss": 1.1327, "mean_token_accuracy": 0.7792137682437896, "num_tokens": 1734616105.0, "step": 25680 }, { "entropy": 0.7498957872390747, "epoch": 7.78254942058623, "grad_norm": 0.21380706131458282, "learning_rate": 5.1023438957696115e-05, "loss": 1.1388, "mean_token_accuracy": 0.7798877775669097, "num_tokens": 1735305057.0, "step": 25690 }, { "entropy": 0.7470949575304985, "epoch": 7.785579035067787, "grad_norm": 0.21742387115955353, "learning_rate": 5.099005223915729e-05, "loss": 1.1437, "mean_token_accuracy": 0.7777863383293152, "num_tokens": 1735983727.0, "step": 25700 }, { "entropy": 0.7559949979186058, "epoch": 7.788608649549345, "grad_norm": 0.22177299857139587, "learning_rate": 5.095666507900566e-05, "loss": 1.1466, "mean_token_accuracy": 0.7746515914797782, "num_tokens": 1736668849.0, "step": 25710 }, { "entropy": 0.756106062233448, "epoch": 7.791638264030902, "grad_norm": 0.21292948722839355, "learning_rate": 5.0923277492133526e-05, "loss": 1.1407, "mean_token_accuracy": 0.7747223898768425, "num_tokens": 1737340633.0, "step": 25720 }, { "entropy": 0.7445455402135849, "epoch": 7.79466787851246, "grad_norm": 0.22191624343395233, "learning_rate": 5.088988949343344e-05, "loss": 1.1506, "mean_token_accuracy": 0.7734212204813957, "num_tokens": 1737997480.0, "step": 25730 }, { "entropy": 0.7500823542475701, "epoch": 7.797697492994017, "grad_norm": 0.22217002511024475, "learning_rate": 5.0856501097798146e-05, "loss": 1.1516, "mean_token_accuracy": 0.7724595025181771, "num_tokens": 1738656996.0, "step": 25740 }, { "entropy": 0.742697848379612, "epoch": 7.8007271074755735, "grad_norm": 0.22848986089229584, "learning_rate": 5.0823112320120494e-05, "loss": 1.1402, "mean_token_accuracy": 0.7776058688759804, "num_tokens": 1739338729.0, "step": 25750 }, { "entropy": 0.7325052246451378, "epoch": 7.803756721957131, "grad_norm": 0.22159868478775024, "learning_rate": 5.078972317529358e-05, "loss": 1.1337, "mean_token_accuracy": 0.7774195194244384, "num_tokens": 1739995077.0, "step": 25760 }, { "entropy": 0.7375677943229675, "epoch": 7.806786336438688, "grad_norm": 0.20608189702033997, "learning_rate": 5.075633367821061e-05, "loss": 1.1387, "mean_token_accuracy": 0.7787287041544915, "num_tokens": 1740667260.0, "step": 25770 }, { "entropy": 0.748692835867405, "epoch": 7.809815950920245, "grad_norm": 0.220370352268219, "learning_rate": 5.0722943843765006e-05, "loss": 1.1296, "mean_token_accuracy": 0.7749065220355987, "num_tokens": 1741345071.0, "step": 25780 }, { "entropy": 0.7556084752082824, "epoch": 7.812845565401803, "grad_norm": 0.2178974449634552, "learning_rate": 5.0689553686850263e-05, "loss": 1.1404, "mean_token_accuracy": 0.7793405070900917, "num_tokens": 1742039572.0, "step": 25790 }, { "entropy": 0.7367696225643158, "epoch": 7.81587517988336, "grad_norm": 0.22308918833732605, "learning_rate": 5.0656163222360084e-05, "loss": 1.1419, "mean_token_accuracy": 0.7798249363899231, "num_tokens": 1742713423.0, "step": 25800 }, { "entropy": 0.742949104309082, "epoch": 7.818904794364917, "grad_norm": 0.2218913733959198, "learning_rate": 5.062277246518831e-05, "loss": 1.135, "mean_token_accuracy": 0.7802236065268516, "num_tokens": 1743400089.0, "step": 25810 }, { "entropy": 0.7405900165438652, "epoch": 7.821934408846475, "grad_norm": 0.23014149069786072, "learning_rate": 5.058938143022883e-05, "loss": 1.1359, "mean_token_accuracy": 0.7767721772193908, "num_tokens": 1744064439.0, "step": 25820 }, { "entropy": 0.7466139867901802, "epoch": 7.8249640233280315, "grad_norm": 0.22135604918003082, "learning_rate": 5.0555990132375753e-05, "loss": 1.1443, "mean_token_accuracy": 0.7720306783914566, "num_tokens": 1744728361.0, "step": 25830 }, { "entropy": 0.748503664135933, "epoch": 7.8279936378095885, "grad_norm": 0.21388953924179077, "learning_rate": 5.0522598586523286e-05, "loss": 1.1426, "mean_token_accuracy": 0.77419193983078, "num_tokens": 1745399338.0, "step": 25840 }, { "entropy": 0.749726516008377, "epoch": 7.831023252291146, "grad_norm": 0.2057487815618515, "learning_rate": 5.048920680756568e-05, "loss": 1.15, "mean_token_accuracy": 0.7743424639105797, "num_tokens": 1746070743.0, "step": 25850 }, { "entropy": 0.7458449557423592, "epoch": 7.834052866772703, "grad_norm": 0.2177976369857788, "learning_rate": 5.0455814810397374e-05, "loss": 1.1371, "mean_token_accuracy": 0.7758996948599816, "num_tokens": 1746741146.0, "step": 25860 }, { "entropy": 0.7548872157931328, "epoch": 7.83708248125426, "grad_norm": 0.2201891839504242, "learning_rate": 5.042242260991286e-05, "loss": 1.1364, "mean_token_accuracy": 0.7745819926261902, "num_tokens": 1747423056.0, "step": 25870 }, { "entropy": 0.7475612044334412, "epoch": 7.840112095735818, "grad_norm": 0.20265506207942963, "learning_rate": 5.038903022100675e-05, "loss": 1.1384, "mean_token_accuracy": 0.7801030427217484, "num_tokens": 1748110164.0, "step": 25880 }, { "entropy": 0.7405172199010849, "epoch": 7.843141710217375, "grad_norm": 0.2164795845746994, "learning_rate": 5.035563765857367e-05, "loss": 1.1318, "mean_token_accuracy": 0.7839020133018494, "num_tokens": 1748805998.0, "step": 25890 }, { "entropy": 0.7411181926727295, "epoch": 7.846171324698932, "grad_norm": 0.21809406578540802, "learning_rate": 5.032224493750841e-05, "loss": 1.1345, "mean_token_accuracy": 0.7742871686816215, "num_tokens": 1749459915.0, "step": 25900 }, { "entropy": 0.7525318443775177, "epoch": 7.8492009391804896, "grad_norm": 0.21590840816497803, "learning_rate": 5.028885207270579e-05, "loss": 1.1493, "mean_token_accuracy": 0.7758223533630371, "num_tokens": 1750138323.0, "step": 25910 }, { "entropy": 0.7444149792194367, "epoch": 7.8522305536620465, "grad_norm": 0.22707167267799377, "learning_rate": 5.025545907906071e-05, "loss": 1.14, "mean_token_accuracy": 0.7741804406046867, "num_tokens": 1750812267.0, "step": 25920 }, { "entropy": 0.7396515145897865, "epoch": 7.855260168143603, "grad_norm": 0.23188693821430206, "learning_rate": 5.022206597146809e-05, "loss": 1.1429, "mean_token_accuracy": 0.7752804264426232, "num_tokens": 1751466833.0, "step": 25930 }, { "entropy": 0.7539496392011642, "epoch": 7.858289782625161, "grad_norm": 0.222253680229187, "learning_rate": 5.018867276482293e-05, "loss": 1.1531, "mean_token_accuracy": 0.7719980478286743, "num_tokens": 1752123862.0, "step": 25940 }, { "entropy": 0.7511607229709625, "epoch": 7.861319397106718, "grad_norm": 0.21358038485050201, "learning_rate": 5.01552794740203e-05, "loss": 1.1465, "mean_token_accuracy": 0.7767821297049522, "num_tokens": 1752804083.0, "step": 25950 }, { "entropy": 0.7481107085943222, "epoch": 7.864349011588275, "grad_norm": 0.2152298241853714, "learning_rate": 5.012188611395523e-05, "loss": 1.1423, "mean_token_accuracy": 0.77596565335989, "num_tokens": 1753488779.0, "step": 25960 }, { "entropy": 0.7437504604458809, "epoch": 7.867378626069833, "grad_norm": 0.2288159728050232, "learning_rate": 5.008849269952287e-05, "loss": 1.1383, "mean_token_accuracy": 0.7771565794944764, "num_tokens": 1754166460.0, "step": 25970 }, { "entropy": 0.7486496567726135, "epoch": 7.87040824055139, "grad_norm": 0.22141054272651672, "learning_rate": 5.0055099245618343e-05, "loss": 1.1408, "mean_token_accuracy": 0.7729766875505447, "num_tokens": 1754839682.0, "step": 25980 }, { "entropy": 0.7523567467927933, "epoch": 7.873437855032947, "grad_norm": 0.2228238731622696, "learning_rate": 5.0021705767136784e-05, "loss": 1.151, "mean_token_accuracy": 0.7777088150382042, "num_tokens": 1755529541.0, "step": 25990 }, { "entropy": 0.7315017133951187, "epoch": 7.8764674695145045, "grad_norm": 0.2139190435409546, "learning_rate": 4.9988312278973374e-05, "loss": 1.1339, "mean_token_accuracy": 0.7816120103001595, "num_tokens": 1756207843.0, "step": 26000 }, { "entropy": 0.7476119130849839, "epoch": 7.879497083996061, "grad_norm": 0.21859972178936005, "learning_rate": 4.995491879602328e-05, "loss": 1.1514, "mean_token_accuracy": 0.7729114070534706, "num_tokens": 1756868766.0, "step": 26010 }, { "entropy": 0.7385255351662636, "epoch": 7.882526698477619, "grad_norm": 0.2217828333377838, "learning_rate": 4.9921525333181645e-05, "loss": 1.1259, "mean_token_accuracy": 0.7782105281949043, "num_tokens": 1757545544.0, "step": 26020 }, { "entropy": 0.7463892117142678, "epoch": 7.885556312959176, "grad_norm": 0.22420364618301392, "learning_rate": 4.988813190534364e-05, "loss": 1.1373, "mean_token_accuracy": 0.7758491992950439, "num_tokens": 1758214530.0, "step": 26030 }, { "entropy": 0.7418407365679741, "epoch": 7.888585927440733, "grad_norm": 0.22543227672576904, "learning_rate": 4.98547385274044e-05, "loss": 1.1343, "mean_token_accuracy": 0.7768672183156013, "num_tokens": 1758890862.0, "step": 26040 }, { "entropy": 0.7480473071336746, "epoch": 7.89161554192229, "grad_norm": 0.2153388112783432, "learning_rate": 4.982134521425906e-05, "loss": 1.1438, "mean_token_accuracy": 0.7791155099868774, "num_tokens": 1759585957.0, "step": 26050 }, { "entropy": 0.7421821311116219, "epoch": 7.894645156403848, "grad_norm": 0.2179073989391327, "learning_rate": 4.978795198080267e-05, "loss": 1.1399, "mean_token_accuracy": 0.773826913535595, "num_tokens": 1760254722.0, "step": 26060 }, { "entropy": 0.7383495569229126, "epoch": 7.897674770885405, "grad_norm": 0.22895166277885437, "learning_rate": 4.975455884193031e-05, "loss": 1.1325, "mean_token_accuracy": 0.7804876893758774, "num_tokens": 1760929886.0, "step": 26070 }, { "entropy": 0.7413220927119255, "epoch": 7.9007043853669625, "grad_norm": 0.2241257280111313, "learning_rate": 4.9721165812536994e-05, "loss": 1.1428, "mean_token_accuracy": 0.7776457533240319, "num_tokens": 1761608920.0, "step": 26080 }, { "entropy": 0.744784663617611, "epoch": 7.903733999848519, "grad_norm": 0.2179223597049713, "learning_rate": 4.9687772907517665e-05, "loss": 1.142, "mean_token_accuracy": 0.7793117061257362, "num_tokens": 1762298896.0, "step": 26090 }, { "entropy": 0.7535127177834511, "epoch": 7.906763614330076, "grad_norm": 0.21491022408008575, "learning_rate": 4.965438014176724e-05, "loss": 1.1477, "mean_token_accuracy": 0.7741543173789978, "num_tokens": 1762977870.0, "step": 26100 }, { "entropy": 0.748827189207077, "epoch": 7.909793228811633, "grad_norm": 0.216589093208313, "learning_rate": 4.962098753018057e-05, "loss": 1.1432, "mean_token_accuracy": 0.773657874763012, "num_tokens": 1763647714.0, "step": 26110 }, { "entropy": 0.7505015954375267, "epoch": 7.912822843293191, "grad_norm": 0.22123666107654572, "learning_rate": 4.958759508765241e-05, "loss": 1.1455, "mean_token_accuracy": 0.7739342629909516, "num_tokens": 1764314179.0, "step": 26120 }, { "entropy": 0.7430455759167671, "epoch": 7.915852457774748, "grad_norm": 0.24435074627399445, "learning_rate": 4.955420282907748e-05, "loss": 1.1447, "mean_token_accuracy": 0.7748620107769966, "num_tokens": 1764971979.0, "step": 26130 }, { "entropy": 0.7427011296153069, "epoch": 7.918882072256306, "grad_norm": 0.21677272021770477, "learning_rate": 4.95208107693504e-05, "loss": 1.1355, "mean_token_accuracy": 0.7760151118040085, "num_tokens": 1765651991.0, "step": 26140 }, { "entropy": 0.7467780768871307, "epoch": 7.921911686737863, "grad_norm": 0.21139970421791077, "learning_rate": 4.948741892336567e-05, "loss": 1.135, "mean_token_accuracy": 0.7797481656074524, "num_tokens": 1766335249.0, "step": 26150 }, { "entropy": 0.7491904139518738, "epoch": 7.92494130121942, "grad_norm": 0.21966759860515594, "learning_rate": 4.9454027306017756e-05, "loss": 1.148, "mean_token_accuracy": 0.774878802895546, "num_tokens": 1767002053.0, "step": 26160 }, { "entropy": 0.7397206977009774, "epoch": 7.927970915700977, "grad_norm": 0.2051488608121872, "learning_rate": 4.942063593220096e-05, "loss": 1.13, "mean_token_accuracy": 0.7776805475354195, "num_tokens": 1767669297.0, "step": 26170 }, { "entropy": 0.7347766533493996, "epoch": 7.931000530182534, "grad_norm": 0.20992016792297363, "learning_rate": 4.9387244816809525e-05, "loss": 1.1346, "mean_token_accuracy": 0.7761421769857406, "num_tokens": 1768332143.0, "step": 26180 }, { "entropy": 0.7496741846203804, "epoch": 7.934030144664091, "grad_norm": 0.21435385942459106, "learning_rate": 4.9353853974737564e-05, "loss": 1.1375, "mean_token_accuracy": 0.7734736561775207, "num_tokens": 1769001909.0, "step": 26190 }, { "entropy": 0.7377337127923965, "epoch": 7.937059759145649, "grad_norm": 0.21795615553855896, "learning_rate": 4.9320463420879045e-05, "loss": 1.1331, "mean_token_accuracy": 0.78009432554245, "num_tokens": 1769677823.0, "step": 26200 }, { "entropy": 0.734045286476612, "epoch": 7.940089373627206, "grad_norm": 0.2170575112104416, "learning_rate": 4.928707317012783e-05, "loss": 1.1313, "mean_token_accuracy": 0.7810531422495842, "num_tokens": 1770358913.0, "step": 26210 }, { "entropy": 0.7450289964675904, "epoch": 7.943118988108763, "grad_norm": 0.2235105037689209, "learning_rate": 4.925368323737766e-05, "loss": 1.1424, "mean_token_accuracy": 0.7792221143841743, "num_tokens": 1771048989.0, "step": 26220 }, { "entropy": 0.7492415189743042, "epoch": 7.946148602590321, "grad_norm": 0.2182435542345047, "learning_rate": 4.922029363752209e-05, "loss": 1.1375, "mean_token_accuracy": 0.7748518586158752, "num_tokens": 1771723596.0, "step": 26230 }, { "entropy": 0.7439008742570877, "epoch": 7.949178217071878, "grad_norm": 0.2185424119234085, "learning_rate": 4.9186904385454566e-05, "loss": 1.1383, "mean_token_accuracy": 0.7725673720240593, "num_tokens": 1772387819.0, "step": 26240 }, { "entropy": 0.7428147852420807, "epoch": 7.952207831553435, "grad_norm": 0.21208150684833527, "learning_rate": 4.915351549606835e-05, "loss": 1.139, "mean_token_accuracy": 0.7782357901334762, "num_tokens": 1773068984.0, "step": 26250 }, { "entropy": 0.7407799273729324, "epoch": 7.955237446034992, "grad_norm": 0.2177264541387558, "learning_rate": 4.912012698425659e-05, "loss": 1.1366, "mean_token_accuracy": 0.7793653145432472, "num_tokens": 1773751389.0, "step": 26260 }, { "entropy": 0.7367604196071624, "epoch": 7.958267060516549, "grad_norm": 0.2107536643743515, "learning_rate": 4.9086738864912196e-05, "loss": 1.1379, "mean_token_accuracy": 0.7817483022809029, "num_tokens": 1774426363.0, "step": 26270 }, { "entropy": 0.7470024287700653, "epoch": 7.961296674998106, "grad_norm": 0.2174767702817917, "learning_rate": 4.905335115292795e-05, "loss": 1.1386, "mean_token_accuracy": 0.7773818820714951, "num_tokens": 1775098932.0, "step": 26280 }, { "entropy": 0.7420658662915229, "epoch": 7.964326289479664, "grad_norm": 0.21864546835422516, "learning_rate": 4.9019963863196464e-05, "loss": 1.1315, "mean_token_accuracy": 0.7792339041829109, "num_tokens": 1775776540.0, "step": 26290 }, { "entropy": 0.7492674008011818, "epoch": 7.967355903961221, "grad_norm": 0.2162260264158249, "learning_rate": 4.8986577010610104e-05, "loss": 1.1404, "mean_token_accuracy": 0.7754534050822258, "num_tokens": 1776466246.0, "step": 26300 }, { "entropy": 0.7473296016454697, "epoch": 7.970385518442778, "grad_norm": 0.22251366078853607, "learning_rate": 4.89531906100611e-05, "loss": 1.1405, "mean_token_accuracy": 0.7775650814175605, "num_tokens": 1777150534.0, "step": 26310 }, { "entropy": 0.7539983540773392, "epoch": 7.973415132924336, "grad_norm": 0.21550723910331726, "learning_rate": 4.8919804676441463e-05, "loss": 1.1428, "mean_token_accuracy": 0.7771080404520034, "num_tokens": 1777829444.0, "step": 26320 }, { "entropy": 0.7489592969417572, "epoch": 7.976444747405893, "grad_norm": 0.23051001131534576, "learning_rate": 4.888641922464296e-05, "loss": 1.1414, "mean_token_accuracy": 0.7754841029644013, "num_tokens": 1778496537.0, "step": 26330 }, { "entropy": 0.7378235667943954, "epoch": 7.9794743618874495, "grad_norm": 0.22835935652256012, "learning_rate": 4.885303426955719e-05, "loss": 1.1253, "mean_token_accuracy": 0.7837060406804085, "num_tokens": 1779184947.0, "step": 26340 }, { "entropy": 0.7488358020782471, "epoch": 7.982503976369007, "grad_norm": 0.21689769625663757, "learning_rate": 4.881964982607553e-05, "loss": 1.1429, "mean_token_accuracy": 0.7760648384690285, "num_tokens": 1779864840.0, "step": 26350 }, { "entropy": 0.7419907003641129, "epoch": 7.985533590850564, "grad_norm": 0.22363406419754028, "learning_rate": 4.8786265909089076e-05, "loss": 1.1388, "mean_token_accuracy": 0.780245891213417, "num_tokens": 1780544252.0, "step": 26360 }, { "entropy": 0.7440352529287338, "epoch": 7.988563205332121, "grad_norm": 0.20977413654327393, "learning_rate": 4.8752882533488756e-05, "loss": 1.148, "mean_token_accuracy": 0.7776442632079125, "num_tokens": 1781215187.0, "step": 26370 }, { "entropy": 0.7335471183061599, "epoch": 7.991592819813679, "grad_norm": 0.22370311617851257, "learning_rate": 4.87194997141652e-05, "loss": 1.1253, "mean_token_accuracy": 0.7811989828944206, "num_tokens": 1781901541.0, "step": 26380 }, { "entropy": 0.7404125243425369, "epoch": 7.994622434295236, "grad_norm": 0.22028976678848267, "learning_rate": 4.8686117466008845e-05, "loss": 1.141, "mean_token_accuracy": 0.7835704386234283, "num_tokens": 1782591873.0, "step": 26390 }, { "entropy": 0.7476471602916718, "epoch": 7.997652048776793, "grad_norm": 0.22728370130062103, "learning_rate": 4.8652735803909814e-05, "loss": 1.1371, "mean_token_accuracy": 0.7757069900631904, "num_tokens": 1783270137.0, "step": 26400 }, { "entropy": 0.7424362485225384, "epoch": 8.00060592289631, "grad_norm": 0.19484201073646545, "learning_rate": 4.861935474275802e-05, "loss": 1.1397, "mean_token_accuracy": 0.7754941108899239, "num_tokens": 1783925499.0, "step": 26410 }, { "entropy": 0.7199281647801399, "epoch": 8.00363553737787, "grad_norm": 0.25212445855140686, "learning_rate": 4.8585974297443074e-05, "loss": 1.1066, "mean_token_accuracy": 0.780900226533413, "num_tokens": 1784596583.0, "step": 26420 }, { "entropy": 0.722083055973053, "epoch": 8.006665151859426, "grad_norm": 0.23797836899757385, "learning_rate": 4.855259448285433e-05, "loss": 1.1062, "mean_token_accuracy": 0.7874858692288399, "num_tokens": 1785290769.0, "step": 26430 }, { "entropy": 0.7245304256677627, "epoch": 8.009694766340983, "grad_norm": 0.24838151037693024, "learning_rate": 4.851921531388085e-05, "loss": 1.1132, "mean_token_accuracy": 0.7804965898394585, "num_tokens": 1785970378.0, "step": 26440 }, { "entropy": 0.7161515861749649, "epoch": 8.01272438082254, "grad_norm": 0.2590703070163727, "learning_rate": 4.848583680541143e-05, "loss": 1.1095, "mean_token_accuracy": 0.7841158762574196, "num_tokens": 1786648781.0, "step": 26450 }, { "entropy": 0.7293979570269584, "epoch": 8.015753995304097, "grad_norm": 0.24743109941482544, "learning_rate": 4.845245897233453e-05, "loss": 1.1126, "mean_token_accuracy": 0.7821021333336831, "num_tokens": 1787344983.0, "step": 26460 }, { "entropy": 0.7267181724309921, "epoch": 8.018783609785654, "grad_norm": 0.23520056903362274, "learning_rate": 4.841908182953833e-05, "loss": 1.1109, "mean_token_accuracy": 0.7799164101481437, "num_tokens": 1788032482.0, "step": 26470 }, { "entropy": 0.7187842220067978, "epoch": 8.021813224267213, "grad_norm": 0.23745307326316833, "learning_rate": 4.8385705391910734e-05, "loss": 1.0985, "mean_token_accuracy": 0.781644044816494, "num_tokens": 1788711125.0, "step": 26480 }, { "entropy": 0.7077472984790802, "epoch": 8.02484283874877, "grad_norm": 0.24203577637672424, "learning_rate": 4.835232967433931e-05, "loss": 1.0988, "mean_token_accuracy": 0.783315247297287, "num_tokens": 1789364497.0, "step": 26490 }, { "entropy": 0.7164745584130288, "epoch": 8.027872453230326, "grad_norm": 0.2467387318611145, "learning_rate": 4.831895469171126e-05, "loss": 1.0961, "mean_token_accuracy": 0.7835220918059349, "num_tokens": 1790045867.0, "step": 26500 }, { "entropy": 0.7072773531079293, "epoch": 8.030902067711883, "grad_norm": 0.25449636578559875, "learning_rate": 4.82855804589135e-05, "loss": 1.0966, "mean_token_accuracy": 0.7852317169308662, "num_tokens": 1790726659.0, "step": 26510 }, { "entropy": 0.7197290822863579, "epoch": 8.03393168219344, "grad_norm": 0.2365366667509079, "learning_rate": 4.825220699083263e-05, "loss": 1.1083, "mean_token_accuracy": 0.7822935700416564, "num_tokens": 1791407340.0, "step": 26520 }, { "entropy": 0.7254417568445206, "epoch": 8.036961296674997, "grad_norm": 0.26874080300331116, "learning_rate": 4.8218834302354875e-05, "loss": 1.1101, "mean_token_accuracy": 0.7791984334588051, "num_tokens": 1792081333.0, "step": 26530 }, { "entropy": 0.7225031182169914, "epoch": 8.039990911156556, "grad_norm": 0.24366725981235504, "learning_rate": 4.818546240836612e-05, "loss": 1.1045, "mean_token_accuracy": 0.7792498484253884, "num_tokens": 1792751027.0, "step": 26540 }, { "entropy": 0.7129725962877274, "epoch": 8.043020525638113, "grad_norm": 0.2428189367055893, "learning_rate": 4.81520913237519e-05, "loss": 1.1037, "mean_token_accuracy": 0.7835505723953247, "num_tokens": 1793417824.0, "step": 26550 }, { "entropy": 0.7184087797999382, "epoch": 8.04605014011967, "grad_norm": 0.24311953783035278, "learning_rate": 4.81187210633974e-05, "loss": 1.1099, "mean_token_accuracy": 0.7814700752496719, "num_tokens": 1794089530.0, "step": 26560 }, { "entropy": 0.718797305226326, "epoch": 8.049079754601227, "grad_norm": 0.2504687011241913, "learning_rate": 4.808535164218741e-05, "loss": 1.1038, "mean_token_accuracy": 0.7837594002485275, "num_tokens": 1794773932.0, "step": 26570 }, { "entropy": 0.7104886934161186, "epoch": 8.052109369082784, "grad_norm": 0.25315359234809875, "learning_rate": 4.8051983075006374e-05, "loss": 1.1109, "mean_token_accuracy": 0.78170804977417, "num_tokens": 1795441765.0, "step": 26580 }, { "entropy": 0.7183498606085778, "epoch": 8.05513898356434, "grad_norm": 0.2640795111656189, "learning_rate": 4.801861537673833e-05, "loss": 1.111, "mean_token_accuracy": 0.7804919451475143, "num_tokens": 1796113390.0, "step": 26590 }, { "entropy": 0.7245974063873291, "epoch": 8.0581685980459, "grad_norm": 0.24883559346199036, "learning_rate": 4.798524856226696e-05, "loss": 1.1092, "mean_token_accuracy": 0.7838584095239639, "num_tokens": 1796806113.0, "step": 26600 }, { "entropy": 0.7104981392621994, "epoch": 8.061198212527456, "grad_norm": 0.25070619583129883, "learning_rate": 4.79518826464755e-05, "loss": 1.1059, "mean_token_accuracy": 0.7818232834339142, "num_tokens": 1797475384.0, "step": 26610 }, { "entropy": 0.716577960550785, "epoch": 8.064227827009013, "grad_norm": 0.25383082032203674, "learning_rate": 4.7918517644246855e-05, "loss": 1.1033, "mean_token_accuracy": 0.782039375603199, "num_tokens": 1798149303.0, "step": 26620 }, { "entropy": 0.7199601382017136, "epoch": 8.06725744149057, "grad_norm": 0.24829378724098206, "learning_rate": 4.788515357046347e-05, "loss": 1.1012, "mean_token_accuracy": 0.7868308603763581, "num_tokens": 1798846822.0, "step": 26630 }, { "entropy": 0.7276430949568748, "epoch": 8.070287055972127, "grad_norm": 0.24197372794151306, "learning_rate": 4.7851790440007366e-05, "loss": 1.1147, "mean_token_accuracy": 0.7819822028279304, "num_tokens": 1799537476.0, "step": 26640 }, { "entropy": 0.7098560944199562, "epoch": 8.073316670453686, "grad_norm": 0.256600022315979, "learning_rate": 4.78184282677602e-05, "loss": 1.1021, "mean_token_accuracy": 0.7854956075549125, "num_tokens": 1800219877.0, "step": 26650 }, { "entropy": 0.7111981973052025, "epoch": 8.076346284935243, "grad_norm": 0.2502190172672272, "learning_rate": 4.778506706860317e-05, "loss": 1.1075, "mean_token_accuracy": 0.7847366347908974, "num_tokens": 1800897818.0, "step": 26660 }, { "entropy": 0.7186481073498726, "epoch": 8.0793758994168, "grad_norm": 0.2546496093273163, "learning_rate": 4.775170685741702e-05, "loss": 1.1116, "mean_token_accuracy": 0.7852939248085022, "num_tokens": 1801578318.0, "step": 26670 }, { "entropy": 0.7206821799278259, "epoch": 8.082405513898356, "grad_norm": 0.24004603922367096, "learning_rate": 4.771834764908208e-05, "loss": 1.1161, "mean_token_accuracy": 0.7796136900782585, "num_tokens": 1802240373.0, "step": 26680 }, { "entropy": 0.7167475149035454, "epoch": 8.085435128379913, "grad_norm": 0.25139474868774414, "learning_rate": 4.768498945847823e-05, "loss": 1.1089, "mean_token_accuracy": 0.7803749486804008, "num_tokens": 1802899947.0, "step": 26690 }, { "entropy": 0.720377242565155, "epoch": 8.08846474286147, "grad_norm": 0.25136709213256836, "learning_rate": 4.76516323004849e-05, "loss": 1.1038, "mean_token_accuracy": 0.7825200736522675, "num_tokens": 1803572374.0, "step": 26700 }, { "entropy": 0.7177918121218682, "epoch": 8.091494357343029, "grad_norm": 0.2568497955799103, "learning_rate": 4.761827618998103e-05, "loss": 1.1117, "mean_token_accuracy": 0.7785768941044807, "num_tokens": 1804241280.0, "step": 26710 }, { "entropy": 0.7282487601041794, "epoch": 8.094523971824586, "grad_norm": 0.2670879364013672, "learning_rate": 4.758492114184513e-05, "loss": 1.1279, "mean_token_accuracy": 0.7746492862701416, "num_tokens": 1804897799.0, "step": 26720 }, { "entropy": 0.7177425503730774, "epoch": 8.097553586306143, "grad_norm": 0.2556954622268677, "learning_rate": 4.755156717095523e-05, "loss": 1.1101, "mean_token_accuracy": 0.7844574332237244, "num_tokens": 1805583806.0, "step": 26730 }, { "entropy": 0.7086010038852691, "epoch": 8.1005832007877, "grad_norm": 0.2505972385406494, "learning_rate": 4.751821429218883e-05, "loss": 1.1118, "mean_token_accuracy": 0.7858968749642372, "num_tokens": 1806261822.0, "step": 26740 }, { "entropy": 0.7242101460695267, "epoch": 8.103612815269257, "grad_norm": 0.24488812685012817, "learning_rate": 4.748486252042302e-05, "loss": 1.1112, "mean_token_accuracy": 0.777028651535511, "num_tokens": 1806926942.0, "step": 26750 }, { "entropy": 0.7144681587815285, "epoch": 8.106642429750814, "grad_norm": 0.23839133977890015, "learning_rate": 4.7451511870534345e-05, "loss": 1.1051, "mean_token_accuracy": 0.7829489514231682, "num_tokens": 1807600572.0, "step": 26760 }, { "entropy": 0.7107941642403602, "epoch": 8.109672044232372, "grad_norm": 0.2450329214334488, "learning_rate": 4.7418162357398866e-05, "loss": 1.1042, "mean_token_accuracy": 0.7836286336183548, "num_tokens": 1808275883.0, "step": 26770 }, { "entropy": 0.7249545127153396, "epoch": 8.11270165871393, "grad_norm": 0.25030460953712463, "learning_rate": 4.7384813995892126e-05, "loss": 1.1171, "mean_token_accuracy": 0.7797255769371987, "num_tokens": 1808949615.0, "step": 26780 }, { "entropy": 0.7185983180999755, "epoch": 8.115731273195486, "grad_norm": 0.2522261142730713, "learning_rate": 4.7351466800889183e-05, "loss": 1.1041, "mean_token_accuracy": 0.7817625656723977, "num_tokens": 1809619652.0, "step": 26790 }, { "entropy": 0.7124221190810204, "epoch": 8.118760887677043, "grad_norm": 0.2514772415161133, "learning_rate": 4.7318120787264525e-05, "loss": 1.1108, "mean_token_accuracy": 0.7823670014739037, "num_tokens": 1810285799.0, "step": 26800 }, { "entropy": 0.7199279174208641, "epoch": 8.1217905021586, "grad_norm": 0.2518217861652374, "learning_rate": 4.728477596989216e-05, "loss": 1.1162, "mean_token_accuracy": 0.7802311480045319, "num_tokens": 1810962658.0, "step": 26810 }, { "entropy": 0.7195024460554122, "epoch": 8.124820116640157, "grad_norm": 0.23763404786586761, "learning_rate": 4.725143236364554e-05, "loss": 1.1097, "mean_token_accuracy": 0.7863524854183197, "num_tokens": 1811660178.0, "step": 26820 }, { "entropy": 0.7217857331037522, "epoch": 8.127849731121715, "grad_norm": 0.26382240653038025, "learning_rate": 4.721808998339759e-05, "loss": 1.1144, "mean_token_accuracy": 0.7774293676018715, "num_tokens": 1812315947.0, "step": 26830 }, { "entropy": 0.708995920419693, "epoch": 8.130879345603272, "grad_norm": 0.25705721974372864, "learning_rate": 4.7184748844020695e-05, "loss": 1.1026, "mean_token_accuracy": 0.7843075603246689, "num_tokens": 1812985285.0, "step": 26840 }, { "entropy": 0.7199554100632668, "epoch": 8.13390896008483, "grad_norm": 0.24791404604911804, "learning_rate": 4.715140896038662e-05, "loss": 1.1093, "mean_token_accuracy": 0.7802799060940743, "num_tokens": 1813646439.0, "step": 26850 }, { "entropy": 0.7239656805992126, "epoch": 8.136938574566386, "grad_norm": 0.2485019564628601, "learning_rate": 4.7118070347366657e-05, "loss": 1.1183, "mean_token_accuracy": 0.7817316979169846, "num_tokens": 1814315361.0, "step": 26860 }, { "entropy": 0.7147956863045692, "epoch": 8.139968189047943, "grad_norm": 0.24146954715251923, "learning_rate": 4.708473301983149e-05, "loss": 1.1049, "mean_token_accuracy": 0.7799753293395042, "num_tokens": 1814981719.0, "step": 26870 }, { "entropy": 0.7090666785836219, "epoch": 8.1429978035295, "grad_norm": 0.2604823112487793, "learning_rate": 4.7051396992651223e-05, "loss": 1.1059, "mean_token_accuracy": 0.7828262433409691, "num_tokens": 1815642105.0, "step": 26880 }, { "entropy": 0.7162450239062309, "epoch": 8.146027418011059, "grad_norm": 0.24658261239528656, "learning_rate": 4.701806228069541e-05, "loss": 1.1113, "mean_token_accuracy": 0.7840412139892579, "num_tokens": 1816311172.0, "step": 26890 }, { "entropy": 0.7098590478301048, "epoch": 8.149057032492616, "grad_norm": 0.26275911927223206, "learning_rate": 4.698472889883298e-05, "loss": 1.1027, "mean_token_accuracy": 0.7864038065075875, "num_tokens": 1816980224.0, "step": 26900 }, { "entropy": 0.7219566449522972, "epoch": 8.152086646974173, "grad_norm": 0.25178688764572144, "learning_rate": 4.695139686193232e-05, "loss": 1.1089, "mean_token_accuracy": 0.781039121747017, "num_tokens": 1817652354.0, "step": 26910 }, { "entropy": 0.7122976735234261, "epoch": 8.15511626145573, "grad_norm": 0.2587715983390808, "learning_rate": 4.6918066184861154e-05, "loss": 1.102, "mean_token_accuracy": 0.7840895235538483, "num_tokens": 1818332978.0, "step": 26920 }, { "entropy": 0.7142306208610535, "epoch": 8.158145875937286, "grad_norm": 0.23534339666366577, "learning_rate": 4.688473688248664e-05, "loss": 1.1136, "mean_token_accuracy": 0.7824066042900085, "num_tokens": 1819012246.0, "step": 26930 }, { "entropy": 0.7209535852074623, "epoch": 8.161175490418843, "grad_norm": 0.24771210551261902, "learning_rate": 4.6851408969675334e-05, "loss": 1.1182, "mean_token_accuracy": 0.77869703322649, "num_tokens": 1819664571.0, "step": 26940 }, { "entropy": 0.7205091133713722, "epoch": 8.164205104900402, "grad_norm": 0.24920061230659485, "learning_rate": 4.681808246129312e-05, "loss": 1.1105, "mean_token_accuracy": 0.7805688068270683, "num_tokens": 1820336391.0, "step": 26950 }, { "entropy": 0.7151263654232025, "epoch": 8.167234719381959, "grad_norm": 0.253296822309494, "learning_rate": 4.678475737220531e-05, "loss": 1.1101, "mean_token_accuracy": 0.782950097322464, "num_tokens": 1821014808.0, "step": 26960 }, { "entropy": 0.7242428854107856, "epoch": 8.170264333863516, "grad_norm": 0.25193753838539124, "learning_rate": 4.675143371727656e-05, "loss": 1.1129, "mean_token_accuracy": 0.7782095596194267, "num_tokens": 1821669158.0, "step": 26970 }, { "entropy": 0.719010303914547, "epoch": 8.173293948345073, "grad_norm": 0.2513599991798401, "learning_rate": 4.671811151137088e-05, "loss": 1.1105, "mean_token_accuracy": 0.7793019488453865, "num_tokens": 1822330924.0, "step": 26980 }, { "entropy": 0.7160741940140725, "epoch": 8.17632356282663, "grad_norm": 0.23966023325920105, "learning_rate": 4.6684790769351634e-05, "loss": 1.1098, "mean_token_accuracy": 0.7804221719503402, "num_tokens": 1823001974.0, "step": 26990 }, { "entropy": 0.719944280385971, "epoch": 8.179353177308187, "grad_norm": 0.2498185932636261, "learning_rate": 4.6651471506081565e-05, "loss": 1.1136, "mean_token_accuracy": 0.7820601284503936, "num_tokens": 1823675112.0, "step": 27000 }, { "entropy": 0.7151422023773193, "epoch": 8.182382791789745, "grad_norm": 0.2516774535179138, "learning_rate": 4.66181537364227e-05, "loss": 1.1009, "mean_token_accuracy": 0.7837372854351997, "num_tokens": 1824351926.0, "step": 27010 }, { "entropy": 0.7109734356403351, "epoch": 8.185412406271302, "grad_norm": 0.25796839594841003, "learning_rate": 4.6584837475236444e-05, "loss": 1.1034, "mean_token_accuracy": 0.7837534859776497, "num_tokens": 1825025318.0, "step": 27020 }, { "entropy": 0.7213475614786148, "epoch": 8.18844202075286, "grad_norm": 0.2408466339111328, "learning_rate": 4.6551522737383526e-05, "loss": 1.116, "mean_token_accuracy": 0.7806976407766342, "num_tokens": 1825704868.0, "step": 27030 }, { "entropy": 0.723088102042675, "epoch": 8.191471635234416, "grad_norm": 0.27157673239707947, "learning_rate": 4.651820953772398e-05, "loss": 1.1085, "mean_token_accuracy": 0.7855571120977402, "num_tokens": 1826396242.0, "step": 27040 }, { "entropy": 0.7191458314657211, "epoch": 8.194501249715973, "grad_norm": 0.2600638270378113, "learning_rate": 4.6484897891117154e-05, "loss": 1.1076, "mean_token_accuracy": 0.7821206986904145, "num_tokens": 1827074091.0, "step": 27050 }, { "entropy": 0.7229193404316903, "epoch": 8.197530864197532, "grad_norm": 0.26254802942276, "learning_rate": 4.645158781242171e-05, "loss": 1.118, "mean_token_accuracy": 0.7763147711753845, "num_tokens": 1827728690.0, "step": 27060 }, { "entropy": 0.7128867775201797, "epoch": 8.200560478679089, "grad_norm": 0.26198434829711914, "learning_rate": 4.641827931649562e-05, "loss": 1.1094, "mean_token_accuracy": 0.7814601376652718, "num_tokens": 1828391383.0, "step": 27070 }, { "entropy": 0.7261982277035713, "epoch": 8.203590093160646, "grad_norm": 0.24718758463859558, "learning_rate": 4.638497241819612e-05, "loss": 1.1202, "mean_token_accuracy": 0.7774030700325966, "num_tokens": 1829056240.0, "step": 27080 }, { "entropy": 0.720742504298687, "epoch": 8.206619707642202, "grad_norm": 0.25525447726249695, "learning_rate": 4.6351667132379766e-05, "loss": 1.1086, "mean_token_accuracy": 0.7852376803755761, "num_tokens": 1829744599.0, "step": 27090 }, { "entropy": 0.7264063850045204, "epoch": 8.20964932212376, "grad_norm": 0.2477932572364807, "learning_rate": 4.631836347390239e-05, "loss": 1.1134, "mean_token_accuracy": 0.7852577731013298, "num_tokens": 1830441169.0, "step": 27100 }, { "entropy": 0.7133047297596932, "epoch": 8.212678936605316, "grad_norm": 0.25836342573165894, "learning_rate": 4.628506145761907e-05, "loss": 1.1009, "mean_token_accuracy": 0.7800086751580239, "num_tokens": 1831106159.0, "step": 27110 }, { "entropy": 0.7223998859524727, "epoch": 8.215708551086875, "grad_norm": 0.2607588768005371, "learning_rate": 4.6251761098384174e-05, "loss": 1.1208, "mean_token_accuracy": 0.7797861859202385, "num_tokens": 1831777748.0, "step": 27120 }, { "entropy": 0.7217176571488381, "epoch": 8.218738165568432, "grad_norm": 0.24700438976287842, "learning_rate": 4.6218462411051355e-05, "loss": 1.1151, "mean_token_accuracy": 0.7837042525410652, "num_tokens": 1832456356.0, "step": 27130 }, { "entropy": 0.7273529902100563, "epoch": 8.221767780049989, "grad_norm": 0.2521395683288574, "learning_rate": 4.618516541047345e-05, "loss": 1.1188, "mean_token_accuracy": 0.7798873633146286, "num_tokens": 1833133661.0, "step": 27140 }, { "entropy": 0.7186020672321319, "epoch": 8.224797394531546, "grad_norm": 0.24985063076019287, "learning_rate": 4.615187011150262e-05, "loss": 1.1138, "mean_token_accuracy": 0.7808549642562866, "num_tokens": 1833796802.0, "step": 27150 }, { "entropy": 0.712014751136303, "epoch": 8.227827009013103, "grad_norm": 0.2520310878753662, "learning_rate": 4.611857652899022e-05, "loss": 1.1043, "mean_token_accuracy": 0.7841321885585785, "num_tokens": 1834466999.0, "step": 27160 }, { "entropy": 0.7179373130202293, "epoch": 8.23085662349466, "grad_norm": 0.25570186972618103, "learning_rate": 4.608528467778687e-05, "loss": 1.1151, "mean_token_accuracy": 0.7850959330797196, "num_tokens": 1835139694.0, "step": 27170 }, { "entropy": 0.7286332011222839, "epoch": 8.233886237976218, "grad_norm": 0.2549498379230499, "learning_rate": 4.605199457274238e-05, "loss": 1.1162, "mean_token_accuracy": 0.7740892961621284, "num_tokens": 1835802648.0, "step": 27180 }, { "entropy": 0.721484349668026, "epoch": 8.236915852457775, "grad_norm": 0.24085596203804016, "learning_rate": 4.6018706228705825e-05, "loss": 1.1192, "mean_token_accuracy": 0.7827752307057381, "num_tokens": 1836477469.0, "step": 27190 }, { "entropy": 0.7150719866156579, "epoch": 8.239945466939332, "grad_norm": 0.2526509463787079, "learning_rate": 4.5985419660525434e-05, "loss": 1.1049, "mean_token_accuracy": 0.7816643178462982, "num_tokens": 1837145950.0, "step": 27200 }, { "entropy": 0.7226071417331695, "epoch": 8.242975081420889, "grad_norm": 0.2403123676776886, "learning_rate": 4.595213488304872e-05, "loss": 1.1092, "mean_token_accuracy": 0.7827942579984665, "num_tokens": 1837827447.0, "step": 27210 }, { "entropy": 0.7206570655107498, "epoch": 8.246004695902446, "grad_norm": 0.26418620347976685, "learning_rate": 4.591885191112233e-05, "loss": 1.1075, "mean_token_accuracy": 0.7818023473024368, "num_tokens": 1838509638.0, "step": 27220 }, { "entropy": 0.723764368891716, "epoch": 8.249034310384003, "grad_norm": 0.24933916330337524, "learning_rate": 4.588557075959214e-05, "loss": 1.1025, "mean_token_accuracy": 0.7819537103176117, "num_tokens": 1839193441.0, "step": 27230 }, { "entropy": 0.7162736654281616, "epoch": 8.252063924865562, "grad_norm": 0.23663337528705597, "learning_rate": 4.585229144330323e-05, "loss": 1.1178, "mean_token_accuracy": 0.778569583594799, "num_tokens": 1839852616.0, "step": 27240 }, { "entropy": 0.7260021716356277, "epoch": 8.255093539347119, "grad_norm": 0.2518142759799957, "learning_rate": 4.581901397709983e-05, "loss": 1.114, "mean_token_accuracy": 0.7796929970383644, "num_tokens": 1840528113.0, "step": 27250 }, { "entropy": 0.7233222261071205, "epoch": 8.258123153828675, "grad_norm": 0.2555762827396393, "learning_rate": 4.578573837582534e-05, "loss": 1.1125, "mean_token_accuracy": 0.7773929163813591, "num_tokens": 1841183735.0, "step": 27260 }, { "entropy": 0.723837748169899, "epoch": 8.261152768310232, "grad_norm": 0.2641092538833618, "learning_rate": 4.5752464654322355e-05, "loss": 1.1105, "mean_token_accuracy": 0.7805815920233726, "num_tokens": 1841862928.0, "step": 27270 }, { "entropy": 0.715344388782978, "epoch": 8.26418238279179, "grad_norm": 0.25379928946495056, "learning_rate": 4.5719192827432624e-05, "loss": 1.1122, "mean_token_accuracy": 0.7848491683602333, "num_tokens": 1842528590.0, "step": 27280 }, { "entropy": 0.716085834801197, "epoch": 8.267211997273346, "grad_norm": 0.256211519241333, "learning_rate": 4.5685922909997036e-05, "loss": 1.1039, "mean_token_accuracy": 0.7850614383816719, "num_tokens": 1843213297.0, "step": 27290 }, { "entropy": 0.7137120842933655, "epoch": 8.270241611754905, "grad_norm": 0.24229760468006134, "learning_rate": 4.565265491685564e-05, "loss": 1.1116, "mean_token_accuracy": 0.7800800368189812, "num_tokens": 1843875109.0, "step": 27300 }, { "entropy": 0.7221919119358062, "epoch": 8.273271226236462, "grad_norm": 0.25407901406288147, "learning_rate": 4.561938886284765e-05, "loss": 1.1129, "mean_token_accuracy": 0.7793138191103935, "num_tokens": 1844545036.0, "step": 27310 }, { "entropy": 0.7136831447482109, "epoch": 8.276300840718019, "grad_norm": 0.24182777106761932, "learning_rate": 4.5586124762811336e-05, "loss": 1.1104, "mean_token_accuracy": 0.7838802829384803, "num_tokens": 1845219829.0, "step": 27320 }, { "entropy": 0.7143359944224358, "epoch": 8.279330455199576, "grad_norm": 0.2557224929332733, "learning_rate": 4.555286263158419e-05, "loss": 1.1084, "mean_token_accuracy": 0.7849928557872772, "num_tokens": 1845892685.0, "step": 27330 }, { "entropy": 0.7223512455821037, "epoch": 8.282360069681133, "grad_norm": 0.24389885365962982, "learning_rate": 4.55196024840028e-05, "loss": 1.1102, "mean_token_accuracy": 0.7804371014237403, "num_tokens": 1846564946.0, "step": 27340 }, { "entropy": 0.7124507904052735, "epoch": 8.28538968416269, "grad_norm": 0.247615784406662, "learning_rate": 4.548634433490281e-05, "loss": 1.1105, "mean_token_accuracy": 0.7814389169216156, "num_tokens": 1847230526.0, "step": 27350 }, { "entropy": 0.7159087106585502, "epoch": 8.288419298644248, "grad_norm": 0.2521499991416931, "learning_rate": 4.5453088199119056e-05, "loss": 1.1024, "mean_token_accuracy": 0.7813684791326523, "num_tokens": 1847906579.0, "step": 27360 }, { "entropy": 0.7156479343771934, "epoch": 8.291448913125805, "grad_norm": 0.24656377732753754, "learning_rate": 4.5419834091485414e-05, "loss": 1.1032, "mean_token_accuracy": 0.7859241157770157, "num_tokens": 1848596034.0, "step": 27370 }, { "entropy": 0.7168033167719841, "epoch": 8.294478527607362, "grad_norm": 0.24311594665050507, "learning_rate": 4.5386582026834906e-05, "loss": 1.1113, "mean_token_accuracy": 0.7866822898387908, "num_tokens": 1849278634.0, "step": 27380 }, { "entropy": 0.7253716468811036, "epoch": 8.297508142088919, "grad_norm": 0.2668185830116272, "learning_rate": 4.535333201999959e-05, "loss": 1.1239, "mean_token_accuracy": 0.7788155362010002, "num_tokens": 1849954261.0, "step": 27390 }, { "entropy": 0.7032744482159614, "epoch": 8.300537756570476, "grad_norm": 0.24929794669151306, "learning_rate": 4.532008408581064e-05, "loss": 1.0972, "mean_token_accuracy": 0.7904573962092399, "num_tokens": 1850653969.0, "step": 27400 }, { "entropy": 0.7162431120872498, "epoch": 8.303567371052033, "grad_norm": 0.2612493336200714, "learning_rate": 4.5286838239098325e-05, "loss": 1.1107, "mean_token_accuracy": 0.7826158821582794, "num_tokens": 1851324108.0, "step": 27410 }, { "entropy": 0.7205453425645828, "epoch": 8.306596985533591, "grad_norm": 0.25268685817718506, "learning_rate": 4.525359449469191e-05, "loss": 1.1149, "mean_token_accuracy": 0.7815184578299522, "num_tokens": 1852000998.0, "step": 27420 }, { "entropy": 0.7282377079129219, "epoch": 8.309626600015148, "grad_norm": 0.2584766447544098, "learning_rate": 4.522035286741979e-05, "loss": 1.1175, "mean_token_accuracy": 0.7803824990987778, "num_tokens": 1852679929.0, "step": 27430 }, { "entropy": 0.7233649790287018, "epoch": 8.312656214496705, "grad_norm": 0.253195196390152, "learning_rate": 4.518711337210941e-05, "loss": 1.1215, "mean_token_accuracy": 0.7783728897571563, "num_tokens": 1853351086.0, "step": 27440 }, { "entropy": 0.7166319891810418, "epoch": 8.315685828978262, "grad_norm": 0.25387871265411377, "learning_rate": 4.5153876023587214e-05, "loss": 1.1096, "mean_token_accuracy": 0.7807391300797463, "num_tokens": 1854019055.0, "step": 27450 }, { "entropy": 0.7199093952775002, "epoch": 8.31871544345982, "grad_norm": 0.2522037625312805, "learning_rate": 4.512064083667875e-05, "loss": 1.1129, "mean_token_accuracy": 0.7819029286503791, "num_tokens": 1854690897.0, "step": 27460 }, { "entropy": 0.7243819639086724, "epoch": 8.321745057941378, "grad_norm": 0.23504793643951416, "learning_rate": 4.508740782620855e-05, "loss": 1.1181, "mean_token_accuracy": 0.7792690947651864, "num_tokens": 1855371240.0, "step": 27470 }, { "entropy": 0.7095865100622177, "epoch": 8.324774672422935, "grad_norm": 0.24862954020500183, "learning_rate": 4.505417700700023e-05, "loss": 1.1072, "mean_token_accuracy": 0.7814011991024017, "num_tokens": 1856033287.0, "step": 27480 }, { "entropy": 0.7154641449451447, "epoch": 8.327804286904492, "grad_norm": 0.2469140589237213, "learning_rate": 4.502094839387637e-05, "loss": 1.1043, "mean_token_accuracy": 0.7874842047691345, "num_tokens": 1856721284.0, "step": 27490 }, { "entropy": 0.7299807772040368, "epoch": 8.330833901386049, "grad_norm": 0.2515232563018799, "learning_rate": 4.49877220016586e-05, "loss": 1.1233, "mean_token_accuracy": 0.7815038755536079, "num_tokens": 1857414157.0, "step": 27500 }, { "entropy": 0.7260418713092804, "epoch": 8.333863515867606, "grad_norm": 0.25561216473579407, "learning_rate": 4.4954497845167574e-05, "loss": 1.1148, "mean_token_accuracy": 0.7742961138486862, "num_tokens": 1858082085.0, "step": 27510 }, { "entropy": 0.7206007733941078, "epoch": 8.336893130349162, "grad_norm": 0.2394639402627945, "learning_rate": 4.49212759392229e-05, "loss": 1.1089, "mean_token_accuracy": 0.7829590395092965, "num_tokens": 1858766844.0, "step": 27520 }, { "entropy": 0.7164310291409492, "epoch": 8.33992274483072, "grad_norm": 0.2688215970993042, "learning_rate": 4.4888056298643225e-05, "loss": 1.1143, "mean_token_accuracy": 0.779485197365284, "num_tokens": 1859431672.0, "step": 27530 }, { "entropy": 0.7138465449213982, "epoch": 8.342952359312278, "grad_norm": 0.25172656774520874, "learning_rate": 4.485483893824618e-05, "loss": 1.104, "mean_token_accuracy": 0.7824663892388344, "num_tokens": 1860110045.0, "step": 27540 }, { "entropy": 0.7216217398643494, "epoch": 8.345981973793835, "grad_norm": 0.25880149006843567, "learning_rate": 4.482162387284835e-05, "loss": 1.1136, "mean_token_accuracy": 0.7828172624111176, "num_tokens": 1860800566.0, "step": 27550 }, { "entropy": 0.7184495881199837, "epoch": 8.349011588275392, "grad_norm": 0.23833420872688293, "learning_rate": 4.4788411117265304e-05, "loss": 1.1101, "mean_token_accuracy": 0.7817671433091163, "num_tokens": 1861474775.0, "step": 27560 }, { "entropy": 0.7274298757314682, "epoch": 8.352041202756949, "grad_norm": 0.2440783977508545, "learning_rate": 4.475520068631162e-05, "loss": 1.1198, "mean_token_accuracy": 0.7835265845060349, "num_tokens": 1862156443.0, "step": 27570 }, { "entropy": 0.7170529827475548, "epoch": 8.355070817238506, "grad_norm": 0.26175349950790405, "learning_rate": 4.4721992594800794e-05, "loss": 1.1095, "mean_token_accuracy": 0.778462964296341, "num_tokens": 1862828400.0, "step": 27580 }, { "entropy": 0.7256954088807106, "epoch": 8.358100431720064, "grad_norm": 0.2389829009771347, "learning_rate": 4.468878685754532e-05, "loss": 1.1188, "mean_token_accuracy": 0.782408845424652, "num_tokens": 1863513803.0, "step": 27590 }, { "entropy": 0.717227679491043, "epoch": 8.361130046201621, "grad_norm": 0.2596455216407776, "learning_rate": 4.465558348935659e-05, "loss": 1.1059, "mean_token_accuracy": 0.7873851031064987, "num_tokens": 1864210837.0, "step": 27600 }, { "entropy": 0.7271547868847847, "epoch": 8.364159660683178, "grad_norm": 0.24676461517810822, "learning_rate": 4.462238250504498e-05, "loss": 1.1126, "mean_token_accuracy": 0.7838033780455589, "num_tokens": 1864900256.0, "step": 27610 }, { "entropy": 0.7318004697561264, "epoch": 8.367189275164735, "grad_norm": 0.25214236974716187, "learning_rate": 4.45891839194198e-05, "loss": 1.119, "mean_token_accuracy": 0.7798203274607658, "num_tokens": 1865577911.0, "step": 27620 }, { "entropy": 0.7154213026165962, "epoch": 8.370218889646292, "grad_norm": 0.25352394580841064, "learning_rate": 4.455598774728925e-05, "loss": 1.1134, "mean_token_accuracy": 0.7794664621353149, "num_tokens": 1866233536.0, "step": 27630 }, { "entropy": 0.7112863630056381, "epoch": 8.373248504127849, "grad_norm": 0.25486281514167786, "learning_rate": 4.452279400346051e-05, "loss": 1.1094, "mean_token_accuracy": 0.7859494358301162, "num_tokens": 1866918882.0, "step": 27640 }, { "entropy": 0.7169707760214805, "epoch": 8.376278118609408, "grad_norm": 0.2506760358810425, "learning_rate": 4.448960270273965e-05, "loss": 1.1064, "mean_token_accuracy": 0.7848715350031853, "num_tokens": 1867605528.0, "step": 27650 }, { "entropy": 0.7194483205676079, "epoch": 8.379307733090965, "grad_norm": 0.25176456570625305, "learning_rate": 4.445641385993163e-05, "loss": 1.1156, "mean_token_accuracy": 0.7766473367810249, "num_tokens": 1868263380.0, "step": 27660 }, { "entropy": 0.7132114320993423, "epoch": 8.382337347572522, "grad_norm": 0.26137015223503113, "learning_rate": 4.442322748984036e-05, "loss": 1.1134, "mean_token_accuracy": 0.7818466663360596, "num_tokens": 1868927424.0, "step": 27670 }, { "entropy": 0.7198499768972397, "epoch": 8.385366962054078, "grad_norm": 0.2476302832365036, "learning_rate": 4.4390043607268594e-05, "loss": 1.114, "mean_token_accuracy": 0.7790758609771729, "num_tokens": 1869578969.0, "step": 27680 }, { "entropy": 0.7200073435902595, "epoch": 8.388396576535635, "grad_norm": 0.25056976079940796, "learning_rate": 4.4356862227018035e-05, "loss": 1.1149, "mean_token_accuracy": 0.782424296438694, "num_tokens": 1870258507.0, "step": 27690 }, { "entropy": 0.7121904641389847, "epoch": 8.391426191017192, "grad_norm": 0.24387040734291077, "learning_rate": 4.4323683363889214e-05, "loss": 1.1058, "mean_token_accuracy": 0.7839852780103683, "num_tokens": 1870927719.0, "step": 27700 }, { "entropy": 0.7183660760521888, "epoch": 8.394455805498751, "grad_norm": 0.2422250360250473, "learning_rate": 4.4290507032681565e-05, "loss": 1.1123, "mean_token_accuracy": 0.7803048029541969, "num_tokens": 1871592721.0, "step": 27710 }, { "entropy": 0.7177755072712898, "epoch": 8.397485419980308, "grad_norm": 0.260509192943573, "learning_rate": 4.425733324819341e-05, "loss": 1.1144, "mean_token_accuracy": 0.7785208135843277, "num_tokens": 1872258357.0, "step": 27720 }, { "entropy": 0.7150320529937744, "epoch": 8.400515034461865, "grad_norm": 0.25201788544654846, "learning_rate": 4.42241620252219e-05, "loss": 1.1022, "mean_token_accuracy": 0.7828321620821953, "num_tokens": 1872930446.0, "step": 27730 }, { "entropy": 0.7162398502230645, "epoch": 8.403544648943422, "grad_norm": 0.25627806782722473, "learning_rate": 4.419099337856306e-05, "loss": 1.0997, "mean_token_accuracy": 0.7854035153985024, "num_tokens": 1873625344.0, "step": 27740 }, { "entropy": 0.7124519675970078, "epoch": 8.406574263424979, "grad_norm": 0.2569003105163574, "learning_rate": 4.415782732301177e-05, "loss": 1.106, "mean_token_accuracy": 0.7857031732797622, "num_tokens": 1874312978.0, "step": 27750 }, { "entropy": 0.721549491584301, "epoch": 8.409603877906536, "grad_norm": 0.24334709346294403, "learning_rate": 4.4124663873361753e-05, "loss": 1.1144, "mean_token_accuracy": 0.7795088529586792, "num_tokens": 1874979206.0, "step": 27760 }, { "entropy": 0.7219868063926697, "epoch": 8.412633492388094, "grad_norm": 0.23988959193229675, "learning_rate": 4.409150304440556e-05, "loss": 1.1108, "mean_token_accuracy": 0.7811971604824066, "num_tokens": 1875650612.0, "step": 27770 }, { "entropy": 0.7256921246647835, "epoch": 8.415663106869651, "grad_norm": 0.24760156869888306, "learning_rate": 4.405834485093459e-05, "loss": 1.107, "mean_token_accuracy": 0.7802922621369361, "num_tokens": 1876327876.0, "step": 27780 }, { "entropy": 0.7178578466176987, "epoch": 8.418692721351208, "grad_norm": 0.26577064394950867, "learning_rate": 4.402518930773904e-05, "loss": 1.1177, "mean_token_accuracy": 0.7766996204853058, "num_tokens": 1876992197.0, "step": 27790 }, { "entropy": 0.7079323068261146, "epoch": 8.421722335832765, "grad_norm": 0.26500049233436584, "learning_rate": 4.3992036429607955e-05, "loss": 1.1013, "mean_token_accuracy": 0.787224443256855, "num_tokens": 1877672478.0, "step": 27800 }, { "entropy": 0.7205834656953811, "epoch": 8.424751950314322, "grad_norm": 0.2477947175502777, "learning_rate": 4.395888623132916e-05, "loss": 1.1102, "mean_token_accuracy": 0.7847822561860085, "num_tokens": 1878354158.0, "step": 27810 }, { "entropy": 0.7100614115595818, "epoch": 8.427781564795879, "grad_norm": 0.23827891051769257, "learning_rate": 4.392573872768933e-05, "loss": 1.1092, "mean_token_accuracy": 0.7817847982048989, "num_tokens": 1879020492.0, "step": 27820 }, { "entropy": 0.7129092320799828, "epoch": 8.430811179277438, "grad_norm": 0.26287147402763367, "learning_rate": 4.389259393347389e-05, "loss": 1.1055, "mean_token_accuracy": 0.7855137750506401, "num_tokens": 1879707040.0, "step": 27830 }, { "entropy": 0.7174157917499542, "epoch": 8.433840793758995, "grad_norm": 0.26285824179649353, "learning_rate": 4.385945186346707e-05, "loss": 1.1052, "mean_token_accuracy": 0.7869385465979576, "num_tokens": 1880397214.0, "step": 27840 }, { "entropy": 0.7163793385028839, "epoch": 8.436870408240551, "grad_norm": 0.2453935742378235, "learning_rate": 4.3826312532451914e-05, "loss": 1.1104, "mean_token_accuracy": 0.7838813096284867, "num_tokens": 1881079728.0, "step": 27850 }, { "entropy": 0.7231308802962303, "epoch": 8.439900022722108, "grad_norm": 0.25110098719596863, "learning_rate": 4.37931759552102e-05, "loss": 1.1121, "mean_token_accuracy": 0.781943641602993, "num_tokens": 1881758803.0, "step": 27860 }, { "entropy": 0.716950623691082, "epoch": 8.442929637203665, "grad_norm": 0.2400575876235962, "learning_rate": 4.37600421465225e-05, "loss": 1.1116, "mean_token_accuracy": 0.7835470572113991, "num_tokens": 1882441320.0, "step": 27870 }, { "entropy": 0.7130150616168975, "epoch": 8.445959251685222, "grad_norm": 0.2530190348625183, "learning_rate": 4.372691112116817e-05, "loss": 1.109, "mean_token_accuracy": 0.7860501155257225, "num_tokens": 1883125135.0, "step": 27880 }, { "entropy": 0.7143324956297874, "epoch": 8.448988866166781, "grad_norm": 0.2560359537601471, "learning_rate": 4.3693782893925304e-05, "loss": 1.1038, "mean_token_accuracy": 0.7861661210656166, "num_tokens": 1883817683.0, "step": 27890 }, { "entropy": 0.7231035277247428, "epoch": 8.452018480648338, "grad_norm": 0.2547987997531891, "learning_rate": 4.366065747957072e-05, "loss": 1.1156, "mean_token_accuracy": 0.781479986011982, "num_tokens": 1884489814.0, "step": 27900 }, { "entropy": 0.7147853136062622, "epoch": 8.455048095129895, "grad_norm": 0.2504449784755707, "learning_rate": 4.362753489288003e-05, "loss": 1.1115, "mean_token_accuracy": 0.782762411236763, "num_tokens": 1885172135.0, "step": 27910 }, { "entropy": 0.7212734669446945, "epoch": 8.458077709611452, "grad_norm": 0.2554200291633606, "learning_rate": 4.3594415148627555e-05, "loss": 1.1169, "mean_token_accuracy": 0.7784308612346649, "num_tokens": 1885831076.0, "step": 27920 }, { "entropy": 0.7203407719731331, "epoch": 8.461107324093009, "grad_norm": 0.2583375573158264, "learning_rate": 4.356129826158637e-05, "loss": 1.1164, "mean_token_accuracy": 0.778656542301178, "num_tokens": 1886491048.0, "step": 27930 }, { "entropy": 0.7222388982772827, "epoch": 8.464136938574566, "grad_norm": 0.24358335137367249, "learning_rate": 4.352818424652826e-05, "loss": 1.1128, "mean_token_accuracy": 0.7837407380342484, "num_tokens": 1887175296.0, "step": 27940 }, { "entropy": 0.719977830350399, "epoch": 8.467166553056124, "grad_norm": 0.2525087594985962, "learning_rate": 4.3495073118223725e-05, "loss": 1.1109, "mean_token_accuracy": 0.7815982520580291, "num_tokens": 1887854011.0, "step": 27950 }, { "entropy": 0.7202391773462296, "epoch": 8.470196167537681, "grad_norm": 0.2515532374382019, "learning_rate": 4.346196489144199e-05, "loss": 1.1081, "mean_token_accuracy": 0.7765378817915917, "num_tokens": 1888523269.0, "step": 27960 }, { "entropy": 0.7178052544593811, "epoch": 8.473225782019238, "grad_norm": 0.25526100397109985, "learning_rate": 4.342885958095098e-05, "loss": 1.1044, "mean_token_accuracy": 0.7809532567858696, "num_tokens": 1889197502.0, "step": 27970 }, { "entropy": 0.7176271915435791, "epoch": 8.476255396500795, "grad_norm": 0.2526986598968506, "learning_rate": 4.3395757201517315e-05, "loss": 1.1087, "mean_token_accuracy": 0.7788459807634354, "num_tokens": 1889859446.0, "step": 27980 }, { "entropy": 0.7217741340398789, "epoch": 8.479285010982352, "grad_norm": 0.261422336101532, "learning_rate": 4.336265776790633e-05, "loss": 1.1097, "mean_token_accuracy": 0.7857506960630417, "num_tokens": 1890554079.0, "step": 27990 }, { "entropy": 0.7163657307624817, "epoch": 8.48231462546391, "grad_norm": 0.25897666811943054, "learning_rate": 4.3329561294882e-05, "loss": 1.1137, "mean_token_accuracy": 0.7787162497639656, "num_tokens": 1891209107.0, "step": 28000 }, { "entropy": 0.7168700203299523, "epoch": 8.485344239945467, "grad_norm": 0.24963456392288208, "learning_rate": 4.3296467797207025e-05, "loss": 1.1172, "mean_token_accuracy": 0.7804996103048325, "num_tokens": 1891877021.0, "step": 28010 }, { "entropy": 0.7277984783053398, "epoch": 8.488373854427024, "grad_norm": 0.2574210464954376, "learning_rate": 4.3263377289642754e-05, "loss": 1.1115, "mean_token_accuracy": 0.7805963233113289, "num_tokens": 1892555296.0, "step": 28020 }, { "entropy": 0.7244296565651893, "epoch": 8.491403468908581, "grad_norm": 0.25558575987815857, "learning_rate": 4.323028978694923e-05, "loss": 1.1129, "mean_token_accuracy": 0.7798798620700836, "num_tokens": 1893233523.0, "step": 28030 }, { "entropy": 0.7282684370875359, "epoch": 8.494433083390138, "grad_norm": 0.2511744201183319, "learning_rate": 4.3197205303885105e-05, "loss": 1.109, "mean_token_accuracy": 0.781240402162075, "num_tokens": 1893928697.0, "step": 28040 }, { "entropy": 0.716360405087471, "epoch": 8.497462697871695, "grad_norm": 0.26084938645362854, "learning_rate": 4.3164123855207715e-05, "loss": 1.1107, "mean_token_accuracy": 0.7831664323806763, "num_tokens": 1894606541.0, "step": 28050 }, { "entropy": 0.7177587166428566, "epoch": 8.500492312353254, "grad_norm": 0.2502937912940979, "learning_rate": 4.313104545567307e-05, "loss": 1.1035, "mean_token_accuracy": 0.7813873678445816, "num_tokens": 1895284845.0, "step": 28060 }, { "entropy": 0.7103653758764267, "epoch": 8.50352192683481, "grad_norm": 0.2538270950317383, "learning_rate": 4.309797012003576e-05, "loss": 1.1075, "mean_token_accuracy": 0.7838470160961151, "num_tokens": 1895957404.0, "step": 28070 }, { "entropy": 0.7125907152891159, "epoch": 8.506551541316368, "grad_norm": 0.2410464733839035, "learning_rate": 4.306489786304906e-05, "loss": 1.107, "mean_token_accuracy": 0.7825109049677849, "num_tokens": 1896636351.0, "step": 28080 }, { "entropy": 0.7232971996068954, "epoch": 8.509581155797925, "grad_norm": 0.24507884681224823, "learning_rate": 4.3031828699464846e-05, "loss": 1.1247, "mean_token_accuracy": 0.778888575732708, "num_tokens": 1897302092.0, "step": 28090 }, { "entropy": 0.7203190252184868, "epoch": 8.512610770279482, "grad_norm": 0.277683824300766, "learning_rate": 4.29987626440336e-05, "loss": 1.1031, "mean_token_accuracy": 0.7816737964749336, "num_tokens": 1897983133.0, "step": 28100 }, { "entropy": 0.7236964136362076, "epoch": 8.515640384761038, "grad_norm": 0.2470213621854782, "learning_rate": 4.296569971150446e-05, "loss": 1.1197, "mean_token_accuracy": 0.7798929691314698, "num_tokens": 1898656412.0, "step": 28110 }, { "entropy": 0.7217274710536004, "epoch": 8.518669999242597, "grad_norm": 0.2536276578903198, "learning_rate": 4.293263991662515e-05, "loss": 1.1039, "mean_token_accuracy": 0.7806472942233086, "num_tokens": 1899327501.0, "step": 28120 }, { "entropy": 0.7342067047953605, "epoch": 8.521699613724154, "grad_norm": 0.2557133734226227, "learning_rate": 4.2899583274141975e-05, "loss": 1.1191, "mean_token_accuracy": 0.7798523887991905, "num_tokens": 1900009318.0, "step": 28130 }, { "entropy": 0.7135517805814743, "epoch": 8.524729228205711, "grad_norm": 0.26016944646835327, "learning_rate": 4.2866529798799855e-05, "loss": 1.1108, "mean_token_accuracy": 0.781361098587513, "num_tokens": 1900671691.0, "step": 28140 }, { "entropy": 0.7154786333441734, "epoch": 8.527758842687268, "grad_norm": 0.2544073164463043, "learning_rate": 4.28334795053423e-05, "loss": 1.1145, "mean_token_accuracy": 0.7790338054299355, "num_tokens": 1901329773.0, "step": 28150 }, { "entropy": 0.7150747537612915, "epoch": 8.530788457168825, "grad_norm": 0.2526049017906189, "learning_rate": 4.28004324085114e-05, "loss": 1.1056, "mean_token_accuracy": 0.7860410287976265, "num_tokens": 1902008559.0, "step": 28160 }, { "entropy": 0.7090863332152366, "epoch": 8.533818071650382, "grad_norm": 0.2356344610452652, "learning_rate": 4.27673885230478e-05, "loss": 1.1094, "mean_token_accuracy": 0.7842254042625427, "num_tokens": 1902682622.0, "step": 28170 }, { "entropy": 0.7112705111503601, "epoch": 8.53684768613194, "grad_norm": 0.25435927510261536, "learning_rate": 4.273434786369072e-05, "loss": 1.1155, "mean_token_accuracy": 0.7843185245990754, "num_tokens": 1903355345.0, "step": 28180 }, { "entropy": 0.7159225881099701, "epoch": 8.539877300613497, "grad_norm": 0.25240522623062134, "learning_rate": 4.270131044517798e-05, "loss": 1.1053, "mean_token_accuracy": 0.7823803201317787, "num_tokens": 1904041698.0, "step": 28190 }, { "entropy": 0.7105033457279205, "epoch": 8.542906915095054, "grad_norm": 0.2581464946269989, "learning_rate": 4.2668276282245886e-05, "loss": 1.1008, "mean_token_accuracy": 0.7827621147036552, "num_tokens": 1904714535.0, "step": 28200 }, { "entropy": 0.7183787137269974, "epoch": 8.545936529576611, "grad_norm": 0.26322516798973083, "learning_rate": 4.263524538962934e-05, "loss": 1.1104, "mean_token_accuracy": 0.7877506449818611, "num_tokens": 1905415985.0, "step": 28210 }, { "entropy": 0.7210176229476929, "epoch": 8.548966144058168, "grad_norm": 0.24380862712860107, "learning_rate": 4.260221778206178e-05, "loss": 1.1111, "mean_token_accuracy": 0.7773053452372551, "num_tokens": 1906066787.0, "step": 28220 }, { "entropy": 0.7159445375204087, "epoch": 8.551995758539725, "grad_norm": 0.25712108612060547, "learning_rate": 4.256919347427516e-05, "loss": 1.1096, "mean_token_accuracy": 0.7849787518382072, "num_tokens": 1906746985.0, "step": 28230 }, { "entropy": 0.7213902294635772, "epoch": 8.555025373021284, "grad_norm": 0.2573804259300232, "learning_rate": 4.2536172481e-05, "loss": 1.1148, "mean_token_accuracy": 0.7856921151280403, "num_tokens": 1907443686.0, "step": 28240 }, { "entropy": 0.7158431977033615, "epoch": 8.55805498750284, "grad_norm": 0.25303325057029724, "learning_rate": 4.250315481696527e-05, "loss": 1.1027, "mean_token_accuracy": 0.7845643639564515, "num_tokens": 1908114289.0, "step": 28250 }, { "entropy": 0.7229256302118301, "epoch": 8.561084601984398, "grad_norm": 0.24933502078056335, "learning_rate": 4.247014049689852e-05, "loss": 1.1147, "mean_token_accuracy": 0.782072140276432, "num_tokens": 1908798709.0, "step": 28260 }, { "entropy": 0.7287473857402802, "epoch": 8.564114216465954, "grad_norm": 0.27063044905662537, "learning_rate": 4.24371295355258e-05, "loss": 1.1163, "mean_token_accuracy": 0.7813744112849236, "num_tokens": 1909477472.0, "step": 28270 }, { "entropy": 0.7227299347519874, "epoch": 8.567143830947511, "grad_norm": 0.2610662579536438, "learning_rate": 4.240412194757163e-05, "loss": 1.1064, "mean_token_accuracy": 0.7828714400529861, "num_tokens": 1910158452.0, "step": 28280 }, { "entropy": 0.7248746410012246, "epoch": 8.570173445429068, "grad_norm": 0.2513570487499237, "learning_rate": 4.2371117747759064e-05, "loss": 1.12, "mean_token_accuracy": 0.7782956436276436, "num_tokens": 1910826710.0, "step": 28290 }, { "entropy": 0.7234019681811332, "epoch": 8.573203059910627, "grad_norm": 0.26494210958480835, "learning_rate": 4.2338116950809626e-05, "loss": 1.1143, "mean_token_accuracy": 0.7800129473209381, "num_tokens": 1911500695.0, "step": 28300 }, { "entropy": 0.7204536572098732, "epoch": 8.576232674392184, "grad_norm": 0.24218662083148956, "learning_rate": 4.2305119571443305e-05, "loss": 1.1164, "mean_token_accuracy": 0.7839529857039451, "num_tokens": 1912176330.0, "step": 28310 }, { "entropy": 0.7227112546563148, "epoch": 8.57926228887374, "grad_norm": 0.23897382616996765, "learning_rate": 4.227212562437859e-05, "loss": 1.1228, "mean_token_accuracy": 0.7834416672587394, "num_tokens": 1912856100.0, "step": 28320 }, { "entropy": 0.7184045344591141, "epoch": 8.582291903355298, "grad_norm": 0.2519665062427521, "learning_rate": 4.223913512433244e-05, "loss": 1.1095, "mean_token_accuracy": 0.780766113102436, "num_tokens": 1913540086.0, "step": 28330 }, { "entropy": 0.7270558908581733, "epoch": 8.585321517836855, "grad_norm": 0.23440684378147125, "learning_rate": 4.220614808602026e-05, "loss": 1.1126, "mean_token_accuracy": 0.7794644325971604, "num_tokens": 1914209628.0, "step": 28340 }, { "entropy": 0.71775231808424, "epoch": 8.588351132318412, "grad_norm": 0.24744759500026703, "learning_rate": 4.217316452415592e-05, "loss": 1.1136, "mean_token_accuracy": 0.7830313116312027, "num_tokens": 1914886075.0, "step": 28350 }, { "entropy": 0.7131304040551185, "epoch": 8.59138074679997, "grad_norm": 0.2522117793560028, "learning_rate": 4.2140184453451746e-05, "loss": 1.1029, "mean_token_accuracy": 0.7822527140378952, "num_tokens": 1915571029.0, "step": 28360 }, { "entropy": 0.7166482031345367, "epoch": 8.594410361281527, "grad_norm": 0.2604322135448456, "learning_rate": 4.21072078886185e-05, "loss": 1.1192, "mean_token_accuracy": 0.779690945148468, "num_tokens": 1916233717.0, "step": 28370 }, { "entropy": 0.7124838411808014, "epoch": 8.597439975763084, "grad_norm": 0.2668404281139374, "learning_rate": 4.207423484436537e-05, "loss": 1.1071, "mean_token_accuracy": 0.785042567551136, "num_tokens": 1916913507.0, "step": 28380 }, { "entropy": 0.7189269706606864, "epoch": 8.600469590244641, "grad_norm": 0.2488768994808197, "learning_rate": 4.204126533539998e-05, "loss": 1.1125, "mean_token_accuracy": 0.7832905948162079, "num_tokens": 1917595164.0, "step": 28390 }, { "entropy": 0.7153389841318131, "epoch": 8.603499204726198, "grad_norm": 0.2504398822784424, "learning_rate": 4.2008299376428394e-05, "loss": 1.1093, "mean_token_accuracy": 0.7843625247478485, "num_tokens": 1918272014.0, "step": 28400 }, { "entropy": 0.7196563646197319, "epoch": 8.606528819207757, "grad_norm": 0.26472902297973633, "learning_rate": 4.197533698215507e-05, "loss": 1.0974, "mean_token_accuracy": 0.7859700217843055, "num_tokens": 1918963097.0, "step": 28410 }, { "entropy": 0.7287015676498413, "epoch": 8.609558433689314, "grad_norm": 0.2562050521373749, "learning_rate": 4.194237816728288e-05, "loss": 1.1163, "mean_token_accuracy": 0.7776473611593246, "num_tokens": 1919639991.0, "step": 28420 }, { "entropy": 0.7181836724281311, "epoch": 8.61258804817087, "grad_norm": 0.24757793545722961, "learning_rate": 4.190942294651313e-05, "loss": 1.1161, "mean_token_accuracy": 0.7788678079843521, "num_tokens": 1920304695.0, "step": 28430 }, { "entropy": 0.7180621802806855, "epoch": 8.615617662652427, "grad_norm": 0.24904799461364746, "learning_rate": 4.1876471334545466e-05, "loss": 1.1102, "mean_token_accuracy": 0.7853743806481361, "num_tokens": 1920986144.0, "step": 28440 }, { "entropy": 0.716050174832344, "epoch": 8.618647277133984, "grad_norm": 0.254031777381897, "learning_rate": 4.184352334607796e-05, "loss": 1.1081, "mean_token_accuracy": 0.7810869053006172, "num_tokens": 1921662755.0, "step": 28450 }, { "entropy": 0.7207084417343139, "epoch": 8.621676891615541, "grad_norm": 0.24686495959758759, "learning_rate": 4.181057899580708e-05, "loss": 1.108, "mean_token_accuracy": 0.7840207055211067, "num_tokens": 1922342411.0, "step": 28460 }, { "entropy": 0.7214000925421715, "epoch": 8.624706506097098, "grad_norm": 0.25418463349342346, "learning_rate": 4.1777638298427646e-05, "loss": 1.1091, "mean_token_accuracy": 0.7834576144814491, "num_tokens": 1923019703.0, "step": 28470 }, { "entropy": 0.7185854330658913, "epoch": 8.627736120578657, "grad_norm": 0.2534089982509613, "learning_rate": 4.174470126863285e-05, "loss": 1.1003, "mean_token_accuracy": 0.7833646684885025, "num_tokens": 1923702536.0, "step": 28480 }, { "entropy": 0.7247257903218269, "epoch": 8.630765735060214, "grad_norm": 0.2549518346786499, "learning_rate": 4.1711767921114265e-05, "loss": 1.1112, "mean_token_accuracy": 0.7785336881875992, "num_tokens": 1924376486.0, "step": 28490 }, { "entropy": 0.7220595791935921, "epoch": 8.63379534954177, "grad_norm": 0.2647266983985901, "learning_rate": 4.1678838270561814e-05, "loss": 1.1148, "mean_token_accuracy": 0.7850570365786552, "num_tokens": 1925068760.0, "step": 28500 }, { "entropy": 0.7314556479454041, "epoch": 8.636824964023328, "grad_norm": 0.2623205780982971, "learning_rate": 4.164591233166375e-05, "loss": 1.1291, "mean_token_accuracy": 0.7768009513616562, "num_tokens": 1925747773.0, "step": 28510 }, { "entropy": 0.7182144120335578, "epoch": 8.639854578504885, "grad_norm": 0.27247917652130127, "learning_rate": 4.1612990119106706e-05, "loss": 1.1182, "mean_token_accuracy": 0.7795385822653771, "num_tokens": 1926409866.0, "step": 28520 }, { "entropy": 0.7145122721791267, "epoch": 8.642884192986443, "grad_norm": 0.2504322826862335, "learning_rate": 4.1580071647575634e-05, "loss": 1.1101, "mean_token_accuracy": 0.7811422646045685, "num_tokens": 1927074050.0, "step": 28530 }, { "entropy": 0.7201222479343414, "epoch": 8.645913807468, "grad_norm": 0.24874699115753174, "learning_rate": 4.1547156931753813e-05, "loss": 1.1099, "mean_token_accuracy": 0.7810144543647766, "num_tokens": 1927739565.0, "step": 28540 }, { "entropy": 0.7202480390667916, "epoch": 8.648943421949557, "grad_norm": 0.24992485344409943, "learning_rate": 4.1514245986322846e-05, "loss": 1.1239, "mean_token_accuracy": 0.7761563330888748, "num_tokens": 1928391157.0, "step": 28550 }, { "entropy": 0.7083740383386612, "epoch": 8.651973036431114, "grad_norm": 0.25214624404907227, "learning_rate": 4.148133882596269e-05, "loss": 1.0964, "mean_token_accuracy": 0.7914482444524765, "num_tokens": 1929094996.0, "step": 28560 }, { "entropy": 0.7194327369332314, "epoch": 8.655002650912671, "grad_norm": 0.23992250859737396, "learning_rate": 4.144843546535155e-05, "loss": 1.1143, "mean_token_accuracy": 0.7817048177123069, "num_tokens": 1929772714.0, "step": 28570 }, { "entropy": 0.7236621171236038, "epoch": 8.658032265394228, "grad_norm": 0.25560665130615234, "learning_rate": 4.141553591916598e-05, "loss": 1.1075, "mean_token_accuracy": 0.7771933913230896, "num_tokens": 1930445435.0, "step": 28580 }, { "entropy": 0.725893759727478, "epoch": 8.661061879875787, "grad_norm": 0.25523504614830017, "learning_rate": 4.138264020208086e-05, "loss": 1.124, "mean_token_accuracy": 0.7764753505587578, "num_tokens": 1931098014.0, "step": 28590 }, { "entropy": 0.7127537101507186, "epoch": 8.664091494357343, "grad_norm": 0.25567346811294556, "learning_rate": 4.1349748328769265e-05, "loss": 1.1195, "mean_token_accuracy": 0.7771034657955169, "num_tokens": 1931748298.0, "step": 28600 }, { "entropy": 0.7272965505719184, "epoch": 8.6671211088389, "grad_norm": 0.25819697976112366, "learning_rate": 4.1316860313902666e-05, "loss": 1.1166, "mean_token_accuracy": 0.7850852489471436, "num_tokens": 1932437385.0, "step": 28610 }, { "entropy": 0.7176499485969543, "epoch": 8.670150723320457, "grad_norm": 0.24638673663139343, "learning_rate": 4.1283976172150726e-05, "loss": 1.1035, "mean_token_accuracy": 0.7825480386614799, "num_tokens": 1933116742.0, "step": 28620 }, { "entropy": 0.7143126979470253, "epoch": 8.673180337802014, "grad_norm": 0.2555699646472931, "learning_rate": 4.125109591818144e-05, "loss": 1.1018, "mean_token_accuracy": 0.7826764941215515, "num_tokens": 1933789342.0, "step": 28630 }, { "entropy": 0.7244675323367119, "epoch": 8.676209952283571, "grad_norm": 0.249857097864151, "learning_rate": 4.121821956666104e-05, "loss": 1.1089, "mean_token_accuracy": 0.783166480064392, "num_tokens": 1934482228.0, "step": 28640 }, { "entropy": 0.7126829192042351, "epoch": 8.67923956676513, "grad_norm": 0.2583950161933899, "learning_rate": 4.118534713225402e-05, "loss": 1.1144, "mean_token_accuracy": 0.7875151291489602, "num_tokens": 1935168965.0, "step": 28650 }, { "entropy": 0.7243151098489762, "epoch": 8.682269181246687, "grad_norm": 0.2539556920528412, "learning_rate": 4.115247862962313e-05, "loss": 1.1068, "mean_token_accuracy": 0.782958970963955, "num_tokens": 1935853331.0, "step": 28660 }, { "entropy": 0.721580271422863, "epoch": 8.685298795728244, "grad_norm": 0.2541027367115021, "learning_rate": 4.111961407342937e-05, "loss": 1.1118, "mean_token_accuracy": 0.7814381510019303, "num_tokens": 1936529254.0, "step": 28670 }, { "entropy": 0.7292007640004158, "epoch": 8.6883284102098, "grad_norm": 0.25219234824180603, "learning_rate": 4.108675347833199e-05, "loss": 1.1168, "mean_token_accuracy": 0.7778084501624107, "num_tokens": 1937198608.0, "step": 28680 }, { "entropy": 0.7166239753365516, "epoch": 8.691358024691358, "grad_norm": 0.25991955399513245, "learning_rate": 4.105389685898843e-05, "loss": 1.1103, "mean_token_accuracy": 0.7806353375315667, "num_tokens": 1937865862.0, "step": 28690 }, { "entropy": 0.7189975202083587, "epoch": 8.694387639172914, "grad_norm": 0.24662035703659058, "learning_rate": 4.1021044230054405e-05, "loss": 1.1086, "mean_token_accuracy": 0.7831170216202736, "num_tokens": 1938547499.0, "step": 28700 }, { "entropy": 0.7202324852347374, "epoch": 8.697417253654473, "grad_norm": 0.2525968849658966, "learning_rate": 4.098819560618384e-05, "loss": 1.1186, "mean_token_accuracy": 0.7782834306359291, "num_tokens": 1939204955.0, "step": 28710 }, { "entropy": 0.721531181037426, "epoch": 8.70044686813603, "grad_norm": 0.25534752011299133, "learning_rate": 4.095535100202884e-05, "loss": 1.112, "mean_token_accuracy": 0.7807690426707268, "num_tokens": 1939887443.0, "step": 28720 }, { "entropy": 0.7301719605922699, "epoch": 8.703476482617587, "grad_norm": 0.2842459976673126, "learning_rate": 4.092251043223975e-05, "loss": 1.1146, "mean_token_accuracy": 0.7789567887783051, "num_tokens": 1940559581.0, "step": 28730 }, { "entropy": 0.7073962211608886, "epoch": 8.706506097099144, "grad_norm": 0.24807953834533691, "learning_rate": 4.088967391146512e-05, "loss": 1.101, "mean_token_accuracy": 0.7865076690912247, "num_tokens": 1941240652.0, "step": 28740 }, { "entropy": 0.7230818122625351, "epoch": 8.7095357115807, "grad_norm": 0.2505377531051636, "learning_rate": 4.0856841454351655e-05, "loss": 1.1198, "mean_token_accuracy": 0.7763510674238205, "num_tokens": 1941902577.0, "step": 28750 }, { "entropy": 0.7187742874026298, "epoch": 8.712565326062258, "grad_norm": 0.2498081773519516, "learning_rate": 4.08240130755443e-05, "loss": 1.1179, "mean_token_accuracy": 0.7771988451480866, "num_tokens": 1942566897.0, "step": 28760 }, { "entropy": 0.7182139918208122, "epoch": 8.715594940543816, "grad_norm": 0.2587888240814209, "learning_rate": 4.079118878968615e-05, "loss": 1.1091, "mean_token_accuracy": 0.7775295615196228, "num_tokens": 1943229094.0, "step": 28770 }, { "entropy": 0.7200123488903045, "epoch": 8.718624555025373, "grad_norm": 0.25699582695961, "learning_rate": 4.075836861141846e-05, "loss": 1.1148, "mean_token_accuracy": 0.783364151418209, "num_tokens": 1943903293.0, "step": 28780 }, { "entropy": 0.7022015497088432, "epoch": 8.72165416950693, "grad_norm": 0.25240638852119446, "learning_rate": 4.072555255538068e-05, "loss": 1.0967, "mean_token_accuracy": 0.7873583644628525, "num_tokens": 1944576805.0, "step": 28790 }, { "entropy": 0.7154752492904664, "epoch": 8.724683783988487, "grad_norm": 0.2615509331226349, "learning_rate": 4.069274063621043e-05, "loss": 1.1072, "mean_token_accuracy": 0.7806541219353675, "num_tokens": 1945241096.0, "step": 28800 }, { "entropy": 0.7336331263184548, "epoch": 8.727713398470044, "grad_norm": 0.2593686580657959, "learning_rate": 4.065993286854345e-05, "loss": 1.1183, "mean_token_accuracy": 0.776524794101715, "num_tokens": 1945918249.0, "step": 28810 }, { "entropy": 0.7094599232077599, "epoch": 8.730743012951603, "grad_norm": 0.2444300800561905, "learning_rate": 4.0627129267013654e-05, "loss": 1.106, "mean_token_accuracy": 0.7816817224025726, "num_tokens": 1946588886.0, "step": 28820 }, { "entropy": 0.7200517654418945, "epoch": 8.73377262743316, "grad_norm": 0.2554880380630493, "learning_rate": 4.059432984625307e-05, "loss": 1.1104, "mean_token_accuracy": 0.7789775893092156, "num_tokens": 1947255963.0, "step": 28830 }, { "entropy": 0.7202544391155243, "epoch": 8.736802241914717, "grad_norm": 0.2426423281431198, "learning_rate": 4.056153462089191e-05, "loss": 1.1111, "mean_token_accuracy": 0.7811928883194923, "num_tokens": 1947924276.0, "step": 28840 }, { "entropy": 0.7059272572398185, "epoch": 8.739831856396274, "grad_norm": 0.2617579698562622, "learning_rate": 4.052874360555846e-05, "loss": 1.1139, "mean_token_accuracy": 0.7811008810997009, "num_tokens": 1948586366.0, "step": 28850 }, { "entropy": 0.7253514006733894, "epoch": 8.74286147087783, "grad_norm": 0.25759056210517883, "learning_rate": 4.049595681487916e-05, "loss": 1.1206, "mean_token_accuracy": 0.7825950235128403, "num_tokens": 1949261209.0, "step": 28860 }, { "entropy": 0.7255978435277939, "epoch": 8.745891085359387, "grad_norm": 0.25632065534591675, "learning_rate": 4.0463174263478575e-05, "loss": 1.1028, "mean_token_accuracy": 0.7838640242815018, "num_tokens": 1949951570.0, "step": 28870 }, { "entropy": 0.718338181078434, "epoch": 8.748920699840944, "grad_norm": 0.25563666224479675, "learning_rate": 4.043039596597934e-05, "loss": 1.1131, "mean_token_accuracy": 0.7802659586071968, "num_tokens": 1950624759.0, "step": 28880 }, { "entropy": 0.7295897051692009, "epoch": 8.751950314322503, "grad_norm": 0.24798616766929626, "learning_rate": 4.0397621937002224e-05, "loss": 1.1177, "mean_token_accuracy": 0.7814691454172135, "num_tokens": 1951308501.0, "step": 28890 }, { "entropy": 0.7243193000555038, "epoch": 8.75497992880406, "grad_norm": 0.258666068315506, "learning_rate": 4.036485219116608e-05, "loss": 1.1161, "mean_token_accuracy": 0.7811876088380814, "num_tokens": 1951982623.0, "step": 28900 }, { "entropy": 0.7230897203087807, "epoch": 8.758009543285617, "grad_norm": 0.24920624494552612, "learning_rate": 4.033208674308788e-05, "loss": 1.1081, "mean_token_accuracy": 0.781388807296753, "num_tokens": 1952666389.0, "step": 28910 }, { "entropy": 0.7174295037984848, "epoch": 8.761039157767174, "grad_norm": 0.2559826076030731, "learning_rate": 4.029932560738261e-05, "loss": 1.117, "mean_token_accuracy": 0.7765072166919709, "num_tokens": 1953338904.0, "step": 28920 }, { "entropy": 0.711271233856678, "epoch": 8.76406877224873, "grad_norm": 0.2394898384809494, "learning_rate": 4.02665687986634e-05, "loss": 1.1017, "mean_token_accuracy": 0.7824870631098747, "num_tokens": 1954013318.0, "step": 28930 }, { "entropy": 0.7250132650136948, "epoch": 8.76709838673029, "grad_norm": 0.24374224245548248, "learning_rate": 4.0233816331541446e-05, "loss": 1.1114, "mean_token_accuracy": 0.7829572439193726, "num_tokens": 1954701010.0, "step": 28940 }, { "entropy": 0.7257913932204246, "epoch": 8.770128001211846, "grad_norm": 0.2666158080101013, "learning_rate": 4.0201068220625956e-05, "loss": 1.1189, "mean_token_accuracy": 0.7795502886176109, "num_tokens": 1955374388.0, "step": 28950 }, { "entropy": 0.7138613164424896, "epoch": 8.773157615693403, "grad_norm": 0.2544364631175995, "learning_rate": 4.0168324480524215e-05, "loss": 1.112, "mean_token_accuracy": 0.7827801123261452, "num_tokens": 1956053327.0, "step": 28960 }, { "entropy": 0.7104690358042717, "epoch": 8.77618723017496, "grad_norm": 0.24729299545288086, "learning_rate": 4.013558512584159e-05, "loss": 1.0972, "mean_token_accuracy": 0.7920410320162773, "num_tokens": 1956759614.0, "step": 28970 }, { "entropy": 0.715251824259758, "epoch": 8.779216844656517, "grad_norm": 0.2622358500957489, "learning_rate": 4.010285017118148e-05, "loss": 1.1088, "mean_token_accuracy": 0.780117604136467, "num_tokens": 1957431280.0, "step": 28980 }, { "entropy": 0.7235038727521896, "epoch": 8.782246459138074, "grad_norm": 0.26019060611724854, "learning_rate": 4.007011963114529e-05, "loss": 1.1152, "mean_token_accuracy": 0.777077080309391, "num_tokens": 1958090531.0, "step": 28990 }, { "entropy": 0.7245069786906242, "epoch": 8.785276073619633, "grad_norm": 0.25815483927726746, "learning_rate": 4.003739352033248e-05, "loss": 1.1167, "mean_token_accuracy": 0.7816456466913223, "num_tokens": 1958761785.0, "step": 29000 }, { "entropy": 0.7195040881633759, "epoch": 8.78830568810119, "grad_norm": 0.26050928235054016, "learning_rate": 4.0004671853340545e-05, "loss": 1.1165, "mean_token_accuracy": 0.7832333400845528, "num_tokens": 1959443739.0, "step": 29010 }, { "entropy": 0.7293911203742027, "epoch": 8.791335302582747, "grad_norm": 0.2726181447505951, "learning_rate": 3.9971954644765e-05, "loss": 1.1218, "mean_token_accuracy": 0.7771417945623398, "num_tokens": 1960108512.0, "step": 29020 }, { "entropy": 0.715778224170208, "epoch": 8.794364917064303, "grad_norm": 0.24542336165905, "learning_rate": 3.993924190919932e-05, "loss": 1.1144, "mean_token_accuracy": 0.7793296068906784, "num_tokens": 1960768263.0, "step": 29030 }, { "entropy": 0.7286868259310723, "epoch": 8.79739453154586, "grad_norm": 0.2558838129043579, "learning_rate": 3.990653366123504e-05, "loss": 1.1209, "mean_token_accuracy": 0.7779481574892998, "num_tokens": 1961431378.0, "step": 29040 }, { "entropy": 0.7266527563333511, "epoch": 8.800424146027417, "grad_norm": 0.2615821361541748, "learning_rate": 3.987382991546169e-05, "loss": 1.1234, "mean_token_accuracy": 0.7768598183989525, "num_tokens": 1962099362.0, "step": 29050 }, { "entropy": 0.7164703905582428, "epoch": 8.803453760508976, "grad_norm": 0.2523133158683777, "learning_rate": 3.984113068646674e-05, "loss": 1.1067, "mean_token_accuracy": 0.7812863737344742, "num_tokens": 1962763998.0, "step": 29060 }, { "entropy": 0.7180733740329742, "epoch": 8.806483374990533, "grad_norm": 0.26194852590560913, "learning_rate": 3.9808435988835714e-05, "loss": 1.1159, "mean_token_accuracy": 0.7819589123129844, "num_tokens": 1963446007.0, "step": 29070 }, { "entropy": 0.715312410891056, "epoch": 8.80951298947209, "grad_norm": 0.25656309723854065, "learning_rate": 3.977574583715207e-05, "loss": 1.1066, "mean_token_accuracy": 0.7843555957078934, "num_tokens": 1964130571.0, "step": 29080 }, { "entropy": 0.7154703140258789, "epoch": 8.812542603953647, "grad_norm": 0.25038787722587585, "learning_rate": 3.974306024599725e-05, "loss": 1.1073, "mean_token_accuracy": 0.7852437317371368, "num_tokens": 1964806701.0, "step": 29090 }, { "entropy": 0.7199769169092178, "epoch": 8.815572218435204, "grad_norm": 0.25432780385017395, "learning_rate": 3.971037922995067e-05, "loss": 1.1098, "mean_token_accuracy": 0.7845522180199623, "num_tokens": 1965487561.0, "step": 29100 }, { "entropy": 0.7070397034287452, "epoch": 8.81860183291676, "grad_norm": 0.2585289776325226, "learning_rate": 3.967770280358969e-05, "loss": 1.101, "mean_token_accuracy": 0.7847616329789162, "num_tokens": 1966164914.0, "step": 29110 }, { "entropy": 0.7252490550279618, "epoch": 8.82163144739832, "grad_norm": 0.261148065328598, "learning_rate": 3.964503098148964e-05, "loss": 1.1164, "mean_token_accuracy": 0.779693067073822, "num_tokens": 1966838245.0, "step": 29120 }, { "entropy": 0.719429862499237, "epoch": 8.824661061879876, "grad_norm": 0.24912382662296295, "learning_rate": 3.961236377822377e-05, "loss": 1.1062, "mean_token_accuracy": 0.7832588732242585, "num_tokens": 1967537263.0, "step": 29130 }, { "entropy": 0.7216413915157318, "epoch": 8.827690676361433, "grad_norm": 0.25173404812812805, "learning_rate": 3.957970120836331e-05, "loss": 1.107, "mean_token_accuracy": 0.779204361140728, "num_tokens": 1968218852.0, "step": 29140 }, { "entropy": 0.7214172005653381, "epoch": 8.83072029084299, "grad_norm": 0.24195538461208344, "learning_rate": 3.954704328647739e-05, "loss": 1.1186, "mean_token_accuracy": 0.7817006081342697, "num_tokens": 1968893708.0, "step": 29150 }, { "entropy": 0.7169260442256927, "epoch": 8.833749905324547, "grad_norm": 0.25657886266708374, "learning_rate": 3.951439002713306e-05, "loss": 1.115, "mean_token_accuracy": 0.7818932577967643, "num_tokens": 1969562790.0, "step": 29160 }, { "entropy": 0.7181063741445541, "epoch": 8.836779519806104, "grad_norm": 0.2619813084602356, "learning_rate": 3.948174144489533e-05, "loss": 1.1079, "mean_token_accuracy": 0.7846121177077293, "num_tokens": 1970247901.0, "step": 29170 }, { "entropy": 0.7212286561727523, "epoch": 8.839809134287663, "grad_norm": 0.25744473934173584, "learning_rate": 3.9449097554327106e-05, "loss": 1.115, "mean_token_accuracy": 0.7834999904036521, "num_tokens": 1970934230.0, "step": 29180 }, { "entropy": 0.7198623836040496, "epoch": 8.84283874876922, "grad_norm": 0.24997738003730774, "learning_rate": 3.9416458369989165e-05, "loss": 1.1109, "mean_token_accuracy": 0.7817680180072785, "num_tokens": 1971611091.0, "step": 29190 }, { "entropy": 0.7187659323215485, "epoch": 8.845868363250776, "grad_norm": 0.2572028338909149, "learning_rate": 3.938382390644024e-05, "loss": 1.109, "mean_token_accuracy": 0.7830177575349808, "num_tokens": 1972301989.0, "step": 29200 }, { "entropy": 0.7165235981345177, "epoch": 8.848897977732333, "grad_norm": 0.26468753814697266, "learning_rate": 3.9351194178236936e-05, "loss": 1.1123, "mean_token_accuracy": 0.779882937669754, "num_tokens": 1972967238.0, "step": 29210 }, { "entropy": 0.7153317481279373, "epoch": 8.85192759221389, "grad_norm": 0.2593344748020172, "learning_rate": 3.9318569199933726e-05, "loss": 1.1063, "mean_token_accuracy": 0.7848672181367874, "num_tokens": 1973656440.0, "step": 29220 }, { "entropy": 0.7206306457519531, "epoch": 8.854957206695449, "grad_norm": 0.2500818967819214, "learning_rate": 3.9285948986082995e-05, "loss": 1.1131, "mean_token_accuracy": 0.7811333194375039, "num_tokens": 1974347297.0, "step": 29230 }, { "entropy": 0.7156893268227578, "epoch": 8.857986821177006, "grad_norm": 0.2541957497596741, "learning_rate": 3.9253333551234984e-05, "loss": 1.1027, "mean_token_accuracy": 0.7849383920431137, "num_tokens": 1975034819.0, "step": 29240 }, { "entropy": 0.7273165121674537, "epoch": 8.861016435658563, "grad_norm": 0.24332493543624878, "learning_rate": 3.922072290993783e-05, "loss": 1.1038, "mean_token_accuracy": 0.788136437535286, "num_tokens": 1975749273.0, "step": 29250 }, { "entropy": 0.7169123306870461, "epoch": 8.86404605014012, "grad_norm": 0.25201770663261414, "learning_rate": 3.918811707673748e-05, "loss": 1.1127, "mean_token_accuracy": 0.7837028563022613, "num_tokens": 1976421424.0, "step": 29260 }, { "entropy": 0.7232697606086731, "epoch": 8.867075664621677, "grad_norm": 0.25215113162994385, "learning_rate": 3.9155516066177775e-05, "loss": 1.1129, "mean_token_accuracy": 0.7839630246162415, "num_tokens": 1977109932.0, "step": 29270 }, { "entropy": 0.7059458583593369, "epoch": 8.870105279103234, "grad_norm": 0.27492555975914, "learning_rate": 3.912291989280042e-05, "loss": 1.1135, "mean_token_accuracy": 0.7810848966240883, "num_tokens": 1977765241.0, "step": 29280 }, { "entropy": 0.7126860484480858, "epoch": 8.87313489358479, "grad_norm": 0.2462565302848816, "learning_rate": 3.909032857114493e-05, "loss": 1.1066, "mean_token_accuracy": 0.78338822722435, "num_tokens": 1978438667.0, "step": 29290 }, { "entropy": 0.7065578386187553, "epoch": 8.87616450806635, "grad_norm": 0.24767723679542542, "learning_rate": 3.905774211574863e-05, "loss": 1.1116, "mean_token_accuracy": 0.7801328271627426, "num_tokens": 1979090499.0, "step": 29300 }, { "entropy": 0.7230974361300468, "epoch": 8.879194122547906, "grad_norm": 0.24369339644908905, "learning_rate": 3.9025160541146735e-05, "loss": 1.11, "mean_token_accuracy": 0.7799566656351089, "num_tokens": 1979775527.0, "step": 29310 }, { "entropy": 0.7286474034190178, "epoch": 8.882223737029463, "grad_norm": 0.2690872550010681, "learning_rate": 3.899258386187226e-05, "loss": 1.1186, "mean_token_accuracy": 0.7749822065234184, "num_tokens": 1980438431.0, "step": 29320 }, { "entropy": 0.7080101639032363, "epoch": 8.88525335151102, "grad_norm": 0.24646250903606415, "learning_rate": 3.896001209245604e-05, "loss": 1.1005, "mean_token_accuracy": 0.7858548179268837, "num_tokens": 1981117410.0, "step": 29330 }, { "entropy": 0.7161659374833107, "epoch": 8.888282965992577, "grad_norm": 0.24269527196884155, "learning_rate": 3.892744524742669e-05, "loss": 1.1195, "mean_token_accuracy": 0.7821093693375587, "num_tokens": 1981780762.0, "step": 29340 }, { "entropy": 0.7193952903151513, "epoch": 8.891312580474136, "grad_norm": 0.26987215876579285, "learning_rate": 3.889488334131067e-05, "loss": 1.1111, "mean_token_accuracy": 0.7841506749391556, "num_tokens": 1982456617.0, "step": 29350 }, { "entropy": 0.7211821421980857, "epoch": 8.894342194955692, "grad_norm": 0.25645774602890015, "learning_rate": 3.886232638863223e-05, "loss": 1.1109, "mean_token_accuracy": 0.7815490037202835, "num_tokens": 1983133499.0, "step": 29360 }, { "entropy": 0.7222009599208832, "epoch": 8.89737180943725, "grad_norm": 0.24122609198093414, "learning_rate": 3.882977440391337e-05, "loss": 1.1088, "mean_token_accuracy": 0.7885529518127441, "num_tokens": 1983839999.0, "step": 29370 }, { "entropy": 0.7147975280880928, "epoch": 8.900401423918806, "grad_norm": 0.2429054230451584, "learning_rate": 3.8797227401673915e-05, "loss": 1.1139, "mean_token_accuracy": 0.7818574577569961, "num_tokens": 1984510703.0, "step": 29380 }, { "entropy": 0.713473479449749, "epoch": 8.903431038400363, "grad_norm": 0.2651282250881195, "learning_rate": 3.876468539643147e-05, "loss": 1.1028, "mean_token_accuracy": 0.7835205689072609, "num_tokens": 1985180169.0, "step": 29390 }, { "entropy": 0.716995793581009, "epoch": 8.90646065288192, "grad_norm": 0.2531137764453888, "learning_rate": 3.873214840270138e-05, "loss": 1.1066, "mean_token_accuracy": 0.7841816321015358, "num_tokens": 1985861492.0, "step": 29400 }, { "entropy": 0.718168930709362, "epoch": 8.909490267363479, "grad_norm": 0.25928670167922974, "learning_rate": 3.8699616434996774e-05, "loss": 1.1031, "mean_token_accuracy": 0.7842026695609092, "num_tokens": 1986548929.0, "step": 29410 }, { "entropy": 0.7155626058578491, "epoch": 8.912519881845036, "grad_norm": 0.23230260610580444, "learning_rate": 3.866708950782856e-05, "loss": 1.1102, "mean_token_accuracy": 0.7817296728491783, "num_tokens": 1987225729.0, "step": 29420 }, { "entropy": 0.7169981315732002, "epoch": 8.915549496326593, "grad_norm": 0.25931882858276367, "learning_rate": 3.863456763570533e-05, "loss": 1.1133, "mean_token_accuracy": 0.7813719809055328, "num_tokens": 1987893133.0, "step": 29430 }, { "entropy": 0.711993083357811, "epoch": 8.91857911080815, "grad_norm": 0.26571208238601685, "learning_rate": 3.860205083313351e-05, "loss": 1.1114, "mean_token_accuracy": 0.7813589677214623, "num_tokens": 1988548951.0, "step": 29440 }, { "entropy": 0.7180174738168716, "epoch": 8.921608725289706, "grad_norm": 0.25587186217308044, "learning_rate": 3.8569539114617196e-05, "loss": 1.1186, "mean_token_accuracy": 0.7839012235403061, "num_tokens": 1989231270.0, "step": 29450 }, { "entropy": 0.7281316161155701, "epoch": 8.924638339771263, "grad_norm": 0.2636795938014984, "learning_rate": 3.853703249465826e-05, "loss": 1.1129, "mean_token_accuracy": 0.780683021247387, "num_tokens": 1989916503.0, "step": 29460 }, { "entropy": 0.710832704603672, "epoch": 8.927667954252822, "grad_norm": 0.2544217109680176, "learning_rate": 3.850453098775625e-05, "loss": 1.1019, "mean_token_accuracy": 0.7840294167399406, "num_tokens": 1990598033.0, "step": 29470 }, { "entropy": 0.7293656095862389, "epoch": 8.930697568734379, "grad_norm": 0.25565722584724426, "learning_rate": 3.8472034608408485e-05, "loss": 1.1162, "mean_token_accuracy": 0.7786854088306427, "num_tokens": 1991274377.0, "step": 29480 }, { "entropy": 0.7208972543478012, "epoch": 8.933727183215936, "grad_norm": 0.25705480575561523, "learning_rate": 3.8439543371109984e-05, "loss": 1.1143, "mean_token_accuracy": 0.7823522612452507, "num_tokens": 1991950348.0, "step": 29490 }, { "entropy": 0.7095813632011414, "epoch": 8.936756797697493, "grad_norm": 0.2603360116481781, "learning_rate": 3.8407057290353444e-05, "loss": 1.1124, "mean_token_accuracy": 0.7848794236779213, "num_tokens": 1992628427.0, "step": 29500 }, { "entropy": 0.7260840758681297, "epoch": 8.93978641217905, "grad_norm": 0.2520385682582855, "learning_rate": 3.8374576380629296e-05, "loss": 1.115, "mean_token_accuracy": 0.7772787541151047, "num_tokens": 1993300669.0, "step": 29510 }, { "entropy": 0.7156735628843307, "epoch": 8.942816026660607, "grad_norm": 0.26142042875289917, "learning_rate": 3.834210065642565e-05, "loss": 1.1086, "mean_token_accuracy": 0.7815072506666183, "num_tokens": 1993973719.0, "step": 29520 }, { "entropy": 0.7224139705300331, "epoch": 8.945845641142165, "grad_norm": 0.24675850570201874, "learning_rate": 3.830963013222829e-05, "loss": 1.1106, "mean_token_accuracy": 0.7821033999323845, "num_tokens": 1994657764.0, "step": 29530 }, { "entropy": 0.7191166371107102, "epoch": 8.948875255623722, "grad_norm": 0.2539442181587219, "learning_rate": 3.82771648225207e-05, "loss": 1.1128, "mean_token_accuracy": 0.7840816512703895, "num_tokens": 1995345280.0, "step": 29540 }, { "entropy": 0.719819089770317, "epoch": 8.95190487010528, "grad_norm": 0.2582361102104187, "learning_rate": 3.824470474178406e-05, "loss": 1.1065, "mean_token_accuracy": 0.782945702970028, "num_tokens": 1996036948.0, "step": 29550 }, { "entropy": 0.7191162839531898, "epoch": 8.954934484586836, "grad_norm": 0.26400452852249146, "learning_rate": 3.821224990449715e-05, "loss": 1.114, "mean_token_accuracy": 0.7831235095858574, "num_tokens": 1996705772.0, "step": 29560 }, { "entropy": 0.7220586121082306, "epoch": 8.957964099068393, "grad_norm": 0.25238141417503357, "learning_rate": 3.817980032513646e-05, "loss": 1.1215, "mean_token_accuracy": 0.7829408764839172, "num_tokens": 1997386912.0, "step": 29570 }, { "entropy": 0.7142419680953026, "epoch": 8.96099371354995, "grad_norm": 0.2720177471637726, "learning_rate": 3.8147356018176134e-05, "loss": 1.1155, "mean_token_accuracy": 0.7802116602659226, "num_tokens": 1998049925.0, "step": 29580 }, { "entropy": 0.7250123754143715, "epoch": 8.964023328031509, "grad_norm": 0.24264773726463318, "learning_rate": 3.8114916998087965e-05, "loss": 1.1136, "mean_token_accuracy": 0.7808478653430939, "num_tokens": 1998732906.0, "step": 29590 }, { "entropy": 0.718470261991024, "epoch": 8.967052942513066, "grad_norm": 0.24063296616077423, "learning_rate": 3.808248327934136e-05, "loss": 1.1078, "mean_token_accuracy": 0.7862830191850663, "num_tokens": 1999415635.0, "step": 29600 }, { "entropy": 0.7226502120494842, "epoch": 8.970082556994623, "grad_norm": 0.25834742188453674, "learning_rate": 3.8050054876403385e-05, "loss": 1.1172, "mean_token_accuracy": 0.7758548200130463, "num_tokens": 2000071312.0, "step": 29610 }, { "entropy": 0.7235815927386284, "epoch": 8.97311217147618, "grad_norm": 0.2576269805431366, "learning_rate": 3.8017631803738754e-05, "loss": 1.1174, "mean_token_accuracy": 0.7818986803293229, "num_tokens": 2000747914.0, "step": 29620 }, { "entropy": 0.7264309778809548, "epoch": 8.976141785957736, "grad_norm": 0.24839350581169128, "learning_rate": 3.7985214075809735e-05, "loss": 1.1238, "mean_token_accuracy": 0.7805746138095856, "num_tokens": 2001416072.0, "step": 29630 }, { "entropy": 0.7113376304507255, "epoch": 8.979171400439293, "grad_norm": 0.2609247863292694, "learning_rate": 3.795280170707631e-05, "loss": 1.1137, "mean_token_accuracy": 0.7840653508901596, "num_tokens": 2002086596.0, "step": 29640 }, { "entropy": 0.7157400205731392, "epoch": 8.982201014920852, "grad_norm": 0.2429143339395523, "learning_rate": 3.7920394711995954e-05, "loss": 1.1089, "mean_token_accuracy": 0.7816368266940117, "num_tokens": 2002763076.0, "step": 29650 }, { "entropy": 0.7212949648499489, "epoch": 8.985230629402409, "grad_norm": 0.26175034046173096, "learning_rate": 3.788799310502385e-05, "loss": 1.1073, "mean_token_accuracy": 0.7816547930240632, "num_tokens": 2003442438.0, "step": 29660 }, { "entropy": 0.7140222698450088, "epoch": 8.988260243883966, "grad_norm": 0.25381457805633545, "learning_rate": 3.785559690061274e-05, "loss": 1.1104, "mean_token_accuracy": 0.7850309312343597, "num_tokens": 2004125910.0, "step": 29670 }, { "entropy": 0.7134387657046318, "epoch": 8.991289858365523, "grad_norm": 0.24920444190502167, "learning_rate": 3.7823206113212916e-05, "loss": 1.1067, "mean_token_accuracy": 0.7890672579407692, "num_tokens": 2004813988.0, "step": 29680 }, { "entropy": 0.7175428092479705, "epoch": 8.99431947284708, "grad_norm": 0.26466912031173706, "learning_rate": 3.779082075727232e-05, "loss": 1.1148, "mean_token_accuracy": 0.7806145489215851, "num_tokens": 2005488022.0, "step": 29690 }, { "entropy": 0.7156146258115769, "epoch": 8.997349087328637, "grad_norm": 0.2508610188961029, "learning_rate": 3.775844084723644e-05, "loss": 1.1094, "mean_token_accuracy": 0.7850867241621018, "num_tokens": 2006180506.0, "step": 29700 } ], "logging_steps": 10, "max_steps": 49515, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.86333686365883e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }