{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 645, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004662004662004662, "grad_norm": 206.61297042982778, "learning_rate": 0.0, "loss": 6.0047, "mean_token_accuracy": 0.26166820526123047, "num_tokens": 9481.0, "step": 1 }, { "epoch": 0.009324009324009324, "grad_norm": 237.90030179764935, "learning_rate": 1.5384615384615387e-07, "loss": 5.9896, "mean_token_accuracy": 0.26549550890922546, "num_tokens": 18486.0, "step": 2 }, { "epoch": 0.013986013986013986, "grad_norm": 191.565778458644, "learning_rate": 3.0769230769230774e-07, "loss": 5.8697, "mean_token_accuracy": 0.26646704971790314, "num_tokens": 28054.0, "step": 3 }, { "epoch": 0.018648018648018648, "grad_norm": 173.73423360043478, "learning_rate": 4.615384615384616e-07, "loss": 5.7664, "mean_token_accuracy": 0.28080685436725616, "num_tokens": 38100.0, "step": 4 }, { "epoch": 0.023310023310023312, "grad_norm": 188.53311568741194, "learning_rate": 6.153846153846155e-07, "loss": 6.1116, "mean_token_accuracy": 0.24714654684066772, "num_tokens": 46674.0, "step": 5 }, { "epoch": 0.027972027972027972, "grad_norm": 138.50157116139866, "learning_rate": 7.692307692307694e-07, "loss": 5.7375, "mean_token_accuracy": 0.2682064026594162, "num_tokens": 55892.0, "step": 6 }, { "epoch": 0.03263403263403263, "grad_norm": 137.331644930486, "learning_rate": 9.230769230769232e-07, "loss": 5.7865, "mean_token_accuracy": 0.2567756175994873, "num_tokens": 64557.0, "step": 7 }, { "epoch": 0.037296037296037296, "grad_norm": 113.93237748074404, "learning_rate": 1.076923076923077e-06, "loss": 5.4186, "mean_token_accuracy": 0.2943734973669052, "num_tokens": 74534.0, "step": 8 }, { "epoch": 0.04195804195804196, "grad_norm": 97.42189347532377, "learning_rate": 1.230769230769231e-06, "loss": 5.1123, "mean_token_accuracy": 0.29780860245227814, "num_tokens": 84839.0, "step": 9 }, { "epoch": 0.046620046620046623, "grad_norm": 115.13017773253503, "learning_rate": 1.3846153846153848e-06, "loss": 5.0274, "mean_token_accuracy": 0.30082090198993683, "num_tokens": 94486.0, "step": 10 }, { "epoch": 0.05128205128205128, "grad_norm": 92.86208928347652, "learning_rate": 1.5384615384615387e-06, "loss": 4.8261, "mean_token_accuracy": 0.31195709109306335, "num_tokens": 103901.0, "step": 11 }, { "epoch": 0.055944055944055944, "grad_norm": 74.98678911328179, "learning_rate": 1.6923076923076926e-06, "loss": 4.3226, "mean_token_accuracy": 0.332173153758049, "num_tokens": 113421.0, "step": 12 }, { "epoch": 0.06060606060606061, "grad_norm": 65.42071488824216, "learning_rate": 1.8461538461538465e-06, "loss": 4.1213, "mean_token_accuracy": 0.34372538328170776, "num_tokens": 122888.0, "step": 13 }, { "epoch": 0.06526806526806526, "grad_norm": 51.76685372566078, "learning_rate": 2.0000000000000003e-06, "loss": 3.8684, "mean_token_accuracy": 0.36909155547618866, "num_tokens": 132046.0, "step": 14 }, { "epoch": 0.06993006993006994, "grad_norm": 47.330915768289, "learning_rate": 2.153846153846154e-06, "loss": 3.5595, "mean_token_accuracy": 0.4020439237356186, "num_tokens": 141586.0, "step": 15 }, { "epoch": 0.07459207459207459, "grad_norm": 39.592533375514385, "learning_rate": 2.307692307692308e-06, "loss": 3.0525, "mean_token_accuracy": 0.46130137145519257, "num_tokens": 151366.0, "step": 16 }, { "epoch": 0.07925407925407925, "grad_norm": 38.681099075404326, "learning_rate": 2.461538461538462e-06, "loss": 2.8418, "mean_token_accuracy": 0.47952745854854584, "num_tokens": 160361.0, "step": 17 }, { "epoch": 0.08391608391608392, "grad_norm": 26.957564440869795, "learning_rate": 2.615384615384616e-06, "loss": 2.4668, "mean_token_accuracy": 0.5341931283473969, "num_tokens": 170716.0, "step": 18 }, { "epoch": 0.08857808857808858, "grad_norm": 29.725894005554363, "learning_rate": 2.7692307692307697e-06, "loss": 2.4476, "mean_token_accuracy": 0.530603438615799, "num_tokens": 179378.0, "step": 19 }, { "epoch": 0.09324009324009325, "grad_norm": 22.45654494245414, "learning_rate": 2.9230769230769236e-06, "loss": 1.9608, "mean_token_accuracy": 0.6144967377185822, "num_tokens": 189155.0, "step": 20 }, { "epoch": 0.0979020979020979, "grad_norm": 22.420677559887096, "learning_rate": 3.0769230769230774e-06, "loss": 1.8408, "mean_token_accuracy": 0.6224100291728973, "num_tokens": 198128.0, "step": 21 }, { "epoch": 0.10256410256410256, "grad_norm": 20.045412685294725, "learning_rate": 3.2307692307692313e-06, "loss": 1.5358, "mean_token_accuracy": 0.668096661567688, "num_tokens": 207526.0, "step": 22 }, { "epoch": 0.10722610722610723, "grad_norm": 17.826429643351467, "learning_rate": 3.384615384615385e-06, "loss": 1.3385, "mean_token_accuracy": 0.7064461410045624, "num_tokens": 217437.0, "step": 23 }, { "epoch": 0.11188811188811189, "grad_norm": 14.795248732817978, "learning_rate": 3.538461538461539e-06, "loss": 1.1874, "mean_token_accuracy": 0.728604257106781, "num_tokens": 226725.0, "step": 24 }, { "epoch": 0.11655011655011654, "grad_norm": 16.367290617286876, "learning_rate": 3.692307692307693e-06, "loss": 0.9682, "mean_token_accuracy": 0.7707604169845581, "num_tokens": 235721.0, "step": 25 }, { "epoch": 0.12121212121212122, "grad_norm": 9.588756303110177, "learning_rate": 3.846153846153847e-06, "loss": 0.8884, "mean_token_accuracy": 0.7929337918758392, "num_tokens": 244662.0, "step": 26 }, { "epoch": 0.1258741258741259, "grad_norm": 7.015049125629419, "learning_rate": 4.000000000000001e-06, "loss": 0.7978, "mean_token_accuracy": 0.8093675374984741, "num_tokens": 254047.0, "step": 27 }, { "epoch": 0.13053613053613053, "grad_norm": 5.597049269802009, "learning_rate": 4.1538461538461545e-06, "loss": 0.7933, "mean_token_accuracy": 0.8108646273612976, "num_tokens": 262770.0, "step": 28 }, { "epoch": 0.1351981351981352, "grad_norm": 12.495340171422823, "learning_rate": 4.307692307692308e-06, "loss": 0.7206, "mean_token_accuracy": 0.8183692693710327, "num_tokens": 272936.0, "step": 29 }, { "epoch": 0.13986013986013987, "grad_norm": 4.8146353297051325, "learning_rate": 4.461538461538462e-06, "loss": 0.7378, "mean_token_accuracy": 0.8188252151012421, "num_tokens": 282677.0, "step": 30 }, { "epoch": 0.1445221445221445, "grad_norm": 4.397488614777519, "learning_rate": 4.615384615384616e-06, "loss": 0.7168, "mean_token_accuracy": 0.8155234456062317, "num_tokens": 291851.0, "step": 31 }, { "epoch": 0.14918414918414918, "grad_norm": 3.8407813105595707, "learning_rate": 4.76923076923077e-06, "loss": 0.653, "mean_token_accuracy": 0.8314312100410461, "num_tokens": 300775.0, "step": 32 }, { "epoch": 0.15384615384615385, "grad_norm": 3.9490135740465506, "learning_rate": 4.923076923076924e-06, "loss": 0.6982, "mean_token_accuracy": 0.8216440379619598, "num_tokens": 309003.0, "step": 33 }, { "epoch": 0.1585081585081585, "grad_norm": 3.1111593206661423, "learning_rate": 5.076923076923077e-06, "loss": 0.6367, "mean_token_accuracy": 0.8357088267803192, "num_tokens": 318722.0, "step": 34 }, { "epoch": 0.16317016317016317, "grad_norm": 3.8540200089616787, "learning_rate": 5.230769230769232e-06, "loss": 0.6725, "mean_token_accuracy": 0.8249399065971375, "num_tokens": 328363.0, "step": 35 }, { "epoch": 0.16783216783216784, "grad_norm": 3.2279851487302533, "learning_rate": 5.384615384615385e-06, "loss": 0.6361, "mean_token_accuracy": 0.8352257907390594, "num_tokens": 337298.0, "step": 36 }, { "epoch": 0.17249417249417248, "grad_norm": 3.2846323967634277, "learning_rate": 5.538461538461539e-06, "loss": 0.585, "mean_token_accuracy": 0.8430881202220917, "num_tokens": 346068.0, "step": 37 }, { "epoch": 0.17715617715617715, "grad_norm": 3.201411224226897, "learning_rate": 5.692307692307692e-06, "loss": 0.6067, "mean_token_accuracy": 0.8457746207714081, "num_tokens": 354566.0, "step": 38 }, { "epoch": 0.18181818181818182, "grad_norm": 3.603688713367759, "learning_rate": 5.846153846153847e-06, "loss": 0.5355, "mean_token_accuracy": 0.860386848449707, "num_tokens": 363662.0, "step": 39 }, { "epoch": 0.1864801864801865, "grad_norm": 3.095744451514778, "learning_rate": 6e-06, "loss": 0.6101, "mean_token_accuracy": 0.8404548466205597, "num_tokens": 372778.0, "step": 40 }, { "epoch": 0.19114219114219114, "grad_norm": 2.851157082704997, "learning_rate": 6.153846153846155e-06, "loss": 0.5961, "mean_token_accuracy": 0.8459599018096924, "num_tokens": 381990.0, "step": 41 }, { "epoch": 0.1958041958041958, "grad_norm": 2.920481788097288, "learning_rate": 6.307692307692308e-06, "loss": 0.6033, "mean_token_accuracy": 0.8439716398715973, "num_tokens": 390764.0, "step": 42 }, { "epoch": 0.20046620046620048, "grad_norm": 2.6590805207702877, "learning_rate": 6.461538461538463e-06, "loss": 0.5411, "mean_token_accuracy": 0.8539808988571167, "num_tokens": 399544.0, "step": 43 }, { "epoch": 0.20512820512820512, "grad_norm": 2.869045415372655, "learning_rate": 6.615384615384616e-06, "loss": 0.5994, "mean_token_accuracy": 0.8429957032203674, "num_tokens": 408895.0, "step": 44 }, { "epoch": 0.2097902097902098, "grad_norm": 2.5632574285139693, "learning_rate": 6.76923076923077e-06, "loss": 0.5474, "mean_token_accuracy": 0.8559859097003937, "num_tokens": 417528.0, "step": 45 }, { "epoch": 0.21445221445221446, "grad_norm": 3.0716143926843213, "learning_rate": 6.923076923076923e-06, "loss": 0.5659, "mean_token_accuracy": 0.8473467230796814, "num_tokens": 426632.0, "step": 46 }, { "epoch": 0.2191142191142191, "grad_norm": 2.816132536269355, "learning_rate": 7.076923076923078e-06, "loss": 0.6149, "mean_token_accuracy": 0.8401601612567902, "num_tokens": 435698.0, "step": 47 }, { "epoch": 0.22377622377622378, "grad_norm": 2.4694488487146544, "learning_rate": 7.230769230769231e-06, "loss": 0.5604, "mean_token_accuracy": 0.8553016185760498, "num_tokens": 444865.0, "step": 48 }, { "epoch": 0.22843822843822845, "grad_norm": 2.534416309991065, "learning_rate": 7.384615384615386e-06, "loss": 0.5465, "mean_token_accuracy": 0.8501911461353302, "num_tokens": 454293.0, "step": 49 }, { "epoch": 0.2331002331002331, "grad_norm": 2.5987473992591994, "learning_rate": 7.538461538461539e-06, "loss": 0.5779, "mean_token_accuracy": 0.846989244222641, "num_tokens": 463669.0, "step": 50 }, { "epoch": 0.23776223776223776, "grad_norm": 2.9304334416656035, "learning_rate": 7.692307692307694e-06, "loss": 0.5951, "mean_token_accuracy": 0.842944860458374, "num_tokens": 472913.0, "step": 51 }, { "epoch": 0.24242424242424243, "grad_norm": 2.557007651858603, "learning_rate": 7.846153846153847e-06, "loss": 0.5522, "mean_token_accuracy": 0.8499407768249512, "num_tokens": 482966.0, "step": 52 }, { "epoch": 0.24708624708624707, "grad_norm": 2.413067290036643, "learning_rate": 8.000000000000001e-06, "loss": 0.556, "mean_token_accuracy": 0.8527302443981171, "num_tokens": 491361.0, "step": 53 }, { "epoch": 0.2517482517482518, "grad_norm": 2.3167133059321765, "learning_rate": 8.153846153846154e-06, "loss": 0.5474, "mean_token_accuracy": 0.8524684011936188, "num_tokens": 500582.0, "step": 54 }, { "epoch": 0.2564102564102564, "grad_norm": 2.459059208267421, "learning_rate": 8.307692307692309e-06, "loss": 0.5474, "mean_token_accuracy": 0.8470076322555542, "num_tokens": 510444.0, "step": 55 }, { "epoch": 0.26107226107226106, "grad_norm": 2.3850596138441955, "learning_rate": 8.461538461538462e-06, "loss": 0.5477, "mean_token_accuracy": 0.8558304607868195, "num_tokens": 519595.0, "step": 56 }, { "epoch": 0.26573426573426573, "grad_norm": 2.425014600765099, "learning_rate": 8.615384615384617e-06, "loss": 0.5312, "mean_token_accuracy": 0.856484979391098, "num_tokens": 529256.0, "step": 57 }, { "epoch": 0.2703962703962704, "grad_norm": 2.41544921845608, "learning_rate": 8.76923076923077e-06, "loss": 0.5581, "mean_token_accuracy": 0.8501502573490143, "num_tokens": 538883.0, "step": 58 }, { "epoch": 0.27505827505827507, "grad_norm": 2.3601513682778537, "learning_rate": 8.923076923076925e-06, "loss": 0.5381, "mean_token_accuracy": 0.8557923436164856, "num_tokens": 547820.0, "step": 59 }, { "epoch": 0.27972027972027974, "grad_norm": 2.5228638625713815, "learning_rate": 9.076923076923078e-06, "loss": 0.5478, "mean_token_accuracy": 0.8510215282440186, "num_tokens": 556572.0, "step": 60 }, { "epoch": 0.28438228438228436, "grad_norm": 2.4788861740314925, "learning_rate": 9.230769230769232e-06, "loss": 0.5702, "mean_token_accuracy": 0.8453376293182373, "num_tokens": 565988.0, "step": 61 }, { "epoch": 0.289044289044289, "grad_norm": 2.443670735215224, "learning_rate": 9.384615384615385e-06, "loss": 0.5701, "mean_token_accuracy": 0.8493904769420624, "num_tokens": 575388.0, "step": 62 }, { "epoch": 0.2937062937062937, "grad_norm": 2.51188327450779, "learning_rate": 9.53846153846154e-06, "loss": 0.6161, "mean_token_accuracy": 0.8381120562553406, "num_tokens": 584991.0, "step": 63 }, { "epoch": 0.29836829836829837, "grad_norm": 2.4568944814979763, "learning_rate": 9.692307692307693e-06, "loss": 0.5544, "mean_token_accuracy": 0.8492381870746613, "num_tokens": 594666.0, "step": 64 }, { "epoch": 0.30303030303030304, "grad_norm": 2.3543409090851872, "learning_rate": 9.846153846153848e-06, "loss": 0.5627, "mean_token_accuracy": 0.8481525778770447, "num_tokens": 603192.0, "step": 65 }, { "epoch": 0.3076923076923077, "grad_norm": 2.516015882056836, "learning_rate": 1e-05, "loss": 0.5518, "mean_token_accuracy": 0.8509586453437805, "num_tokens": 612501.0, "step": 66 }, { "epoch": 0.3123543123543124, "grad_norm": 2.5702229906928626, "learning_rate": 9.999933987646821e-06, "loss": 0.5652, "mean_token_accuracy": 0.8515127599239349, "num_tokens": 621564.0, "step": 67 }, { "epoch": 0.317016317016317, "grad_norm": 2.311235457272914, "learning_rate": 9.99973595252401e-06, "loss": 0.5007, "mean_token_accuracy": 0.8656518757343292, "num_tokens": 630726.0, "step": 68 }, { "epoch": 0.32167832167832167, "grad_norm": 2.3626760252822736, "learning_rate": 9.999405900441683e-06, "loss": 0.5291, "mean_token_accuracy": 0.8573567271232605, "num_tokens": 639950.0, "step": 69 }, { "epoch": 0.32634032634032634, "grad_norm": 2.5106829719874733, "learning_rate": 9.998943841083179e-06, "loss": 0.5333, "mean_token_accuracy": 0.8539510667324066, "num_tokens": 649402.0, "step": 70 }, { "epoch": 0.331002331002331, "grad_norm": 2.374126097719514, "learning_rate": 9.99834978800478e-06, "loss": 0.5136, "mean_token_accuracy": 0.8584230840206146, "num_tokens": 658474.0, "step": 71 }, { "epoch": 0.3356643356643357, "grad_norm": 2.2734777943143945, "learning_rate": 9.997623758635298e-06, "loss": 0.5469, "mean_token_accuracy": 0.8496910333633423, "num_tokens": 667908.0, "step": 72 }, { "epoch": 0.34032634032634035, "grad_norm": 2.5255087017576043, "learning_rate": 9.996765774275587e-06, "loss": 0.527, "mean_token_accuracy": 0.8526964783668518, "num_tokens": 677532.0, "step": 73 }, { "epoch": 0.34498834498834496, "grad_norm": 2.2143213966162216, "learning_rate": 9.995775860097897e-06, "loss": 0.5219, "mean_token_accuracy": 0.8588562309741974, "num_tokens": 686796.0, "step": 74 }, { "epoch": 0.34965034965034963, "grad_norm": 2.1934169241509665, "learning_rate": 9.994654045145142e-06, "loss": 0.5401, "mean_token_accuracy": 0.8564260303974152, "num_tokens": 695776.0, "step": 75 }, { "epoch": 0.3543123543123543, "grad_norm": 2.153560775154425, "learning_rate": 9.993400362330058e-06, "loss": 0.5446, "mean_token_accuracy": 0.8583995997905731, "num_tokens": 704530.0, "step": 76 }, { "epoch": 0.358974358974359, "grad_norm": 2.128801081198065, "learning_rate": 9.992014848434221e-06, "loss": 0.5363, "mean_token_accuracy": 0.8531051576137543, "num_tokens": 714123.0, "step": 77 }, { "epoch": 0.36363636363636365, "grad_norm": 2.312071712839466, "learning_rate": 9.990497544106981e-06, "loss": 0.5344, "mean_token_accuracy": 0.8557191491127014, "num_tokens": 723719.0, "step": 78 }, { "epoch": 0.3682983682983683, "grad_norm": 2.4795067817061573, "learning_rate": 9.988848493864259e-06, "loss": 0.5597, "mean_token_accuracy": 0.8507181704044342, "num_tokens": 733447.0, "step": 79 }, { "epoch": 0.372960372960373, "grad_norm": 2.2998442504007452, "learning_rate": 9.987067746087251e-06, "loss": 0.5467, "mean_token_accuracy": 0.8498886525630951, "num_tokens": 742567.0, "step": 80 }, { "epoch": 0.3776223776223776, "grad_norm": 2.220780163026348, "learning_rate": 9.985155353021004e-06, "loss": 0.538, "mean_token_accuracy": 0.8577711582183838, "num_tokens": 751488.0, "step": 81 }, { "epoch": 0.3822843822843823, "grad_norm": 2.23769737502521, "learning_rate": 9.983111370772877e-06, "loss": 0.5375, "mean_token_accuracy": 0.8561404347419739, "num_tokens": 760518.0, "step": 82 }, { "epoch": 0.38694638694638694, "grad_norm": 2.3468431520556243, "learning_rate": 9.980935859310907e-06, "loss": 0.5071, "mean_token_accuracy": 0.8601345419883728, "num_tokens": 769855.0, "step": 83 }, { "epoch": 0.3916083916083916, "grad_norm": 2.420088401775177, "learning_rate": 9.97862888246204e-06, "loss": 0.5517, "mean_token_accuracy": 0.8494808971881866, "num_tokens": 780309.0, "step": 84 }, { "epoch": 0.3962703962703963, "grad_norm": 2.1832026417273864, "learning_rate": 9.976190507910265e-06, "loss": 0.5007, "mean_token_accuracy": 0.8591891229152679, "num_tokens": 790560.0, "step": 85 }, { "epoch": 0.40093240093240096, "grad_norm": 2.217081826161623, "learning_rate": 9.97362080719462e-06, "loss": 0.4945, "mean_token_accuracy": 0.8628792762756348, "num_tokens": 799972.0, "step": 86 }, { "epoch": 0.40559440559440557, "grad_norm": 2.1021603924715877, "learning_rate": 9.970919855707103e-06, "loss": 0.5149, "mean_token_accuracy": 0.8582651615142822, "num_tokens": 808998.0, "step": 87 }, { "epoch": 0.41025641025641024, "grad_norm": 2.2137525851668163, "learning_rate": 9.968087732690452e-06, "loss": 0.5643, "mean_token_accuracy": 0.8490354120731354, "num_tokens": 818322.0, "step": 88 }, { "epoch": 0.4149184149184149, "grad_norm": 2.1357303202881193, "learning_rate": 9.965124521235827e-06, "loss": 0.5465, "mean_token_accuracy": 0.8538801968097687, "num_tokens": 828468.0, "step": 89 }, { "epoch": 0.4195804195804196, "grad_norm": 2.144386382606401, "learning_rate": 9.962030308280363e-06, "loss": 0.5598, "mean_token_accuracy": 0.8531892895698547, "num_tokens": 836909.0, "step": 90 }, { "epoch": 0.42424242424242425, "grad_norm": 2.120216517402183, "learning_rate": 9.958805184604631e-06, "loss": 0.508, "mean_token_accuracy": 0.8650859892368317, "num_tokens": 845651.0, "step": 91 }, { "epoch": 0.4289044289044289, "grad_norm": 2.3227262446712014, "learning_rate": 9.955449244829966e-06, "loss": 0.5533, "mean_token_accuracy": 0.850572019815445, "num_tokens": 854858.0, "step": 92 }, { "epoch": 0.43356643356643354, "grad_norm": 2.0524529217213985, "learning_rate": 9.95196258741569e-06, "loss": 0.5097, "mean_token_accuracy": 0.8616639971733093, "num_tokens": 863666.0, "step": 93 }, { "epoch": 0.4382284382284382, "grad_norm": 2.178830324772212, "learning_rate": 9.948345314656234e-06, "loss": 0.513, "mean_token_accuracy": 0.8623040318489075, "num_tokens": 873307.0, "step": 94 }, { "epoch": 0.4428904428904429, "grad_norm": 2.178878661646046, "learning_rate": 9.94459753267812e-06, "loss": 0.5377, "mean_token_accuracy": 0.851812869310379, "num_tokens": 882337.0, "step": 95 }, { "epoch": 0.44755244755244755, "grad_norm": 2.2151123798224366, "learning_rate": 9.94071935143687e-06, "loss": 0.527, "mean_token_accuracy": 0.8585948050022125, "num_tokens": 891528.0, "step": 96 }, { "epoch": 0.4522144522144522, "grad_norm": 2.153917534671565, "learning_rate": 9.936710884713752e-06, "loss": 0.5136, "mean_token_accuracy": 0.8649525940418243, "num_tokens": 900782.0, "step": 97 }, { "epoch": 0.4568764568764569, "grad_norm": 2.0747029950901057, "learning_rate": 9.932572250112469e-06, "loss": 0.5389, "mean_token_accuracy": 0.8555485904216766, "num_tokens": 909811.0, "step": 98 }, { "epoch": 0.46153846153846156, "grad_norm": 1.983982143931433, "learning_rate": 9.92830356905569e-06, "loss": 0.4814, "mean_token_accuracy": 0.8693816661834717, "num_tokens": 919377.0, "step": 99 }, { "epoch": 0.4662004662004662, "grad_norm": 2.1327543713500603, "learning_rate": 9.923904966781496e-06, "loss": 0.5515, "mean_token_accuracy": 0.8478606641292572, "num_tokens": 929132.0, "step": 100 }, { "epoch": 0.47086247086247085, "grad_norm": 2.396154142849225, "learning_rate": 9.919376572339703e-06, "loss": 0.5521, "mean_token_accuracy": 0.8526241481304169, "num_tokens": 938942.0, "step": 101 }, { "epoch": 0.4755244755244755, "grad_norm": 2.1649063337799603, "learning_rate": 9.914718518588076e-06, "loss": 0.5689, "mean_token_accuracy": 0.8482916951179504, "num_tokens": 948271.0, "step": 102 }, { "epoch": 0.4801864801864802, "grad_norm": 2.0645529438999506, "learning_rate": 9.909930942188436e-06, "loss": 0.5219, "mean_token_accuracy": 0.8553557991981506, "num_tokens": 957852.0, "step": 103 }, { "epoch": 0.48484848484848486, "grad_norm": 2.221579905396912, "learning_rate": 9.905013983602639e-06, "loss": 0.5092, "mean_token_accuracy": 0.8628838658332825, "num_tokens": 967455.0, "step": 104 }, { "epoch": 0.48951048951048953, "grad_norm": 2.1265754888438533, "learning_rate": 9.899967787088468e-06, "loss": 0.5494, "mean_token_accuracy": 0.849203497171402, "num_tokens": 977978.0, "step": 105 }, { "epoch": 0.49417249417249415, "grad_norm": 2.1206984788420145, "learning_rate": 9.89479250069539e-06, "loss": 0.513, "mean_token_accuracy": 0.8621029257774353, "num_tokens": 987423.0, "step": 106 }, { "epoch": 0.4988344988344988, "grad_norm": 2.2958737279282917, "learning_rate": 9.889488276260222e-06, "loss": 0.5562, "mean_token_accuracy": 0.8555387854576111, "num_tokens": 996656.0, "step": 107 }, { "epoch": 0.5034965034965035, "grad_norm": 2.2321368229532443, "learning_rate": 9.88405526940267e-06, "loss": 0.5579, "mean_token_accuracy": 0.854218989610672, "num_tokens": 1005952.0, "step": 108 }, { "epoch": 0.5081585081585082, "grad_norm": 2.145064834641496, "learning_rate": 9.87849363952076e-06, "loss": 0.525, "mean_token_accuracy": 0.8584134578704834, "num_tokens": 1015020.0, "step": 109 }, { "epoch": 0.5128205128205128, "grad_norm": 2.2441739750519316, "learning_rate": 9.872803549786177e-06, "loss": 0.542, "mean_token_accuracy": 0.8544652462005615, "num_tokens": 1023815.0, "step": 110 }, { "epoch": 0.5174825174825175, "grad_norm": 2.259110803475594, "learning_rate": 9.866985167139453e-06, "loss": 0.5356, "mean_token_accuracy": 0.8533321619033813, "num_tokens": 1032375.0, "step": 111 }, { "epoch": 0.5221445221445221, "grad_norm": 2.1512979801268095, "learning_rate": 9.861038662285093e-06, "loss": 0.5272, "mean_token_accuracy": 0.8559562265872955, "num_tokens": 1041287.0, "step": 112 }, { "epoch": 0.5268065268065268, "grad_norm": 2.0895533173369616, "learning_rate": 9.854964209686555e-06, "loss": 0.5578, "mean_token_accuracy": 0.8457550704479218, "num_tokens": 1052277.0, "step": 113 }, { "epoch": 0.5314685314685315, "grad_norm": 2.07804701176361, "learning_rate": 9.848761987561132e-06, "loss": 0.5587, "mean_token_accuracy": 0.8497881889343262, "num_tokens": 1061624.0, "step": 114 }, { "epoch": 0.5361305361305362, "grad_norm": 2.029504980400925, "learning_rate": 9.842432177874725e-06, "loss": 0.5222, "mean_token_accuracy": 0.8595547080039978, "num_tokens": 1070577.0, "step": 115 }, { "epoch": 0.5407925407925408, "grad_norm": 1.9605500161506988, "learning_rate": 9.835974966336504e-06, "loss": 0.49, "mean_token_accuracy": 0.8654608428478241, "num_tokens": 1080819.0, "step": 116 }, { "epoch": 0.5454545454545454, "grad_norm": 2.2122252988560223, "learning_rate": 9.82939054239346e-06, "loss": 0.5679, "mean_token_accuracy": 0.8448387086391449, "num_tokens": 1089899.0, "step": 117 }, { "epoch": 0.5501165501165501, "grad_norm": 2.072420046717628, "learning_rate": 9.822679099224844e-06, "loss": 0.5611, "mean_token_accuracy": 0.8533997237682343, "num_tokens": 1099422.0, "step": 118 }, { "epoch": 0.5547785547785548, "grad_norm": 2.2414814409383754, "learning_rate": 9.815840833736508e-06, "loss": 0.5222, "mean_token_accuracy": 0.8619909286499023, "num_tokens": 1108059.0, "step": 119 }, { "epoch": 0.5594405594405595, "grad_norm": 1.945252737941495, "learning_rate": 9.808875946555109e-06, "loss": 0.5034, "mean_token_accuracy": 0.8639355599880219, "num_tokens": 1117344.0, "step": 120 }, { "epoch": 0.5641025641025641, "grad_norm": 1.8638080342388037, "learning_rate": 9.801784642022254e-06, "loss": 0.4565, "mean_token_accuracy": 0.8745285272598267, "num_tokens": 1126493.0, "step": 121 }, { "epoch": 0.5687645687645687, "grad_norm": 2.1910077292347006, "learning_rate": 9.794567128188466e-06, "loss": 0.5313, "mean_token_accuracy": 0.8547643721103668, "num_tokens": 1135454.0, "step": 122 }, { "epoch": 0.5734265734265734, "grad_norm": 2.320759590932711, "learning_rate": 9.787223616807118e-06, "loss": 0.5518, "mean_token_accuracy": 0.8485196530818939, "num_tokens": 1145120.0, "step": 123 }, { "epoch": 0.578088578088578, "grad_norm": 2.205905846852017, "learning_rate": 9.779754323328192e-06, "loss": 0.5135, "mean_token_accuracy": 0.8605582118034363, "num_tokens": 1154569.0, "step": 124 }, { "epoch": 0.5827505827505828, "grad_norm": 1.9377848593966973, "learning_rate": 9.772159466891971e-06, "loss": 0.4979, "mean_token_accuracy": 0.8665166199207306, "num_tokens": 1163964.0, "step": 125 }, { "epoch": 0.5874125874125874, "grad_norm": 2.143716998667602, "learning_rate": 9.764439270322612e-06, "loss": 0.5332, "mean_token_accuracy": 0.8567988276481628, "num_tokens": 1172705.0, "step": 126 }, { "epoch": 0.5920745920745921, "grad_norm": 1.9575201068560006, "learning_rate": 9.756593960121598e-06, "loss": 0.4744, "mean_token_accuracy": 0.8642941415309906, "num_tokens": 1182428.0, "step": 127 }, { "epoch": 0.5967365967365967, "grad_norm": 2.057073316572943, "learning_rate": 9.748623766461101e-06, "loss": 0.5236, "mean_token_accuracy": 0.8576280772686005, "num_tokens": 1191692.0, "step": 128 }, { "epoch": 0.6013986013986014, "grad_norm": 2.315109150829485, "learning_rate": 9.740528923177227e-06, "loss": 0.5901, "mean_token_accuracy": 0.8435404002666473, "num_tokens": 1200804.0, "step": 129 }, { "epoch": 0.6060606060606061, "grad_norm": 2.160355676496633, "learning_rate": 9.732309667763158e-06, "loss": 0.5131, "mean_token_accuracy": 0.8617720901966095, "num_tokens": 1209500.0, "step": 130 }, { "epoch": 0.6107226107226107, "grad_norm": 2.1245956205330776, "learning_rate": 9.723966241362178e-06, "loss": 0.5508, "mean_token_accuracy": 0.8526682257652283, "num_tokens": 1218360.0, "step": 131 }, { "epoch": 0.6153846153846154, "grad_norm": 1.9474554252940732, "learning_rate": 9.7154988887606e-06, "loss": 0.5172, "mean_token_accuracy": 0.8601753413677216, "num_tokens": 1227995.0, "step": 132 }, { "epoch": 0.62004662004662, "grad_norm": 2.0598106310347886, "learning_rate": 9.706907858380593e-06, "loss": 0.5292, "mean_token_accuracy": 0.853904515504837, "num_tokens": 1237369.0, "step": 133 }, { "epoch": 0.6247086247086248, "grad_norm": 2.167359943385511, "learning_rate": 9.69819340227288e-06, "loss": 0.5337, "mean_token_accuracy": 0.8580853343009949, "num_tokens": 1246811.0, "step": 134 }, { "epoch": 0.6293706293706294, "grad_norm": 2.0419581939057703, "learning_rate": 9.68935577610935e-06, "loss": 0.5584, "mean_token_accuracy": 0.8511963188648224, "num_tokens": 1257728.0, "step": 135 }, { "epoch": 0.634032634032634, "grad_norm": 2.179167067571999, "learning_rate": 9.680395239175563e-06, "loss": 0.547, "mean_token_accuracy": 0.8516505658626556, "num_tokens": 1267082.0, "step": 136 }, { "epoch": 0.6386946386946387, "grad_norm": 2.1523271385107843, "learning_rate": 9.671312054363126e-06, "loss": 0.5097, "mean_token_accuracy": 0.8639609515666962, "num_tokens": 1276457.0, "step": 137 }, { "epoch": 0.6433566433566433, "grad_norm": 2.2140190778794717, "learning_rate": 9.662106488162001e-06, "loss": 0.5353, "mean_token_accuracy": 0.8548583686351776, "num_tokens": 1285816.0, "step": 138 }, { "epoch": 0.6480186480186481, "grad_norm": 1.9892790673234197, "learning_rate": 9.652778810652669e-06, "loss": 0.5141, "mean_token_accuracy": 0.85847008228302, "num_tokens": 1294833.0, "step": 139 }, { "epoch": 0.6526806526806527, "grad_norm": 2.138516925288519, "learning_rate": 9.643329295498215e-06, "loss": 0.5171, "mean_token_accuracy": 0.8615297675132751, "num_tokens": 1304626.0, "step": 140 }, { "epoch": 0.6573426573426573, "grad_norm": 2.2563369591379376, "learning_rate": 9.633758219936299e-06, "loss": 0.5306, "mean_token_accuracy": 0.8586675524711609, "num_tokens": 1314352.0, "step": 141 }, { "epoch": 0.662004662004662, "grad_norm": 2.1291835975325446, "learning_rate": 9.624065864771017e-06, "loss": 0.5387, "mean_token_accuracy": 0.854125052690506, "num_tokens": 1323815.0, "step": 142 }, { "epoch": 0.6666666666666666, "grad_norm": 1.9289636352361732, "learning_rate": 9.614252514364671e-06, "loss": 0.5302, "mean_token_accuracy": 0.8544224202632904, "num_tokens": 1333085.0, "step": 143 }, { "epoch": 0.6713286713286714, "grad_norm": 2.142108471781227, "learning_rate": 9.604318456629415e-06, "loss": 0.5841, "mean_token_accuracy": 0.8435969054698944, "num_tokens": 1342490.0, "step": 144 }, { "epoch": 0.675990675990676, "grad_norm": 2.256848010848965, "learning_rate": 9.594263983018818e-06, "loss": 0.5644, "mean_token_accuracy": 0.8460557162761688, "num_tokens": 1351579.0, "step": 145 }, { "epoch": 0.6806526806526807, "grad_norm": 2.0607158153594343, "learning_rate": 9.584089388519307e-06, "loss": 0.525, "mean_token_accuracy": 0.8550526797771454, "num_tokens": 1363017.0, "step": 146 }, { "epoch": 0.6853146853146853, "grad_norm": 2.0404326207258223, "learning_rate": 9.573794971641518e-06, "loss": 0.5067, "mean_token_accuracy": 0.8611325323581696, "num_tokens": 1372276.0, "step": 147 }, { "epoch": 0.6899766899766899, "grad_norm": 2.017662742079808, "learning_rate": 9.563381034411529e-06, "loss": 0.5173, "mean_token_accuracy": 0.8577908575534821, "num_tokens": 1381474.0, "step": 148 }, { "epoch": 0.6946386946386947, "grad_norm": 2.2063297261619668, "learning_rate": 9.55284788236201e-06, "loss": 0.5575, "mean_token_accuracy": 0.8511577248573303, "num_tokens": 1390844.0, "step": 149 }, { "epoch": 0.6993006993006993, "grad_norm": 2.2322560935923295, "learning_rate": 9.542195824523251e-06, "loss": 0.4993, "mean_token_accuracy": 0.8618028461933136, "num_tokens": 1399649.0, "step": 150 }, { "epoch": 0.703962703962704, "grad_norm": 2.1940354248783667, "learning_rate": 9.531425173414095e-06, "loss": 0.5766, "mean_token_accuracy": 0.8469588458538055, "num_tokens": 1409096.0, "step": 151 }, { "epoch": 0.7086247086247086, "grad_norm": 2.276091425733425, "learning_rate": 9.520536245032783e-06, "loss": 0.6052, "mean_token_accuracy": 0.8366052806377411, "num_tokens": 1417944.0, "step": 152 }, { "epoch": 0.7132867132867133, "grad_norm": 2.199611556073951, "learning_rate": 9.509529358847655e-06, "loss": 0.5454, "mean_token_accuracy": 0.853013813495636, "num_tokens": 1427079.0, "step": 153 }, { "epoch": 0.717948717948718, "grad_norm": 2.080422260259384, "learning_rate": 9.498404837787811e-06, "loss": 0.5353, "mean_token_accuracy": 0.8578924536705017, "num_tokens": 1436521.0, "step": 154 }, { "epoch": 0.7226107226107226, "grad_norm": 1.9852264975254732, "learning_rate": 9.48716300823361e-06, "loss": 0.5259, "mean_token_accuracy": 0.862900048494339, "num_tokens": 1445553.0, "step": 155 }, { "epoch": 0.7272727272727273, "grad_norm": 2.0020640140351738, "learning_rate": 9.475804200007104e-06, "loss": 0.5327, "mean_token_accuracy": 0.8615141212940216, "num_tokens": 1453774.0, "step": 156 }, { "epoch": 0.7319347319347319, "grad_norm": 2.1863487215175392, "learning_rate": 9.464328746362367e-06, "loss": 0.5588, "mean_token_accuracy": 0.8489395678043365, "num_tokens": 1462611.0, "step": 157 }, { "epoch": 0.7365967365967366, "grad_norm": 2.4020487040671084, "learning_rate": 9.452736983975708e-06, "loss": 0.4923, "mean_token_accuracy": 0.8653963208198547, "num_tokens": 1471736.0, "step": 158 }, { "epoch": 0.7412587412587412, "grad_norm": 2.1281438451825987, "learning_rate": 9.441029252935804e-06, "loss": 0.5397, "mean_token_accuracy": 0.8503198325634003, "num_tokens": 1482117.0, "step": 159 }, { "epoch": 0.745920745920746, "grad_norm": 2.1763048717368756, "learning_rate": 9.429205896733705e-06, "loss": 0.5566, "mean_token_accuracy": 0.8503492772579193, "num_tokens": 1491292.0, "step": 160 }, { "epoch": 0.7505827505827506, "grad_norm": 1.9354492116167334, "learning_rate": 9.417267262252775e-06, "loss": 0.503, "mean_token_accuracy": 0.8627839088439941, "num_tokens": 1500735.0, "step": 161 }, { "epoch": 0.7552447552447552, "grad_norm": 2.0708959320246887, "learning_rate": 9.405213699758507e-06, "loss": 0.5067, "mean_token_accuracy": 0.8632858693599701, "num_tokens": 1510190.0, "step": 162 }, { "epoch": 0.7599067599067599, "grad_norm": 2.11618902292271, "learning_rate": 9.393045562888245e-06, "loss": 0.5424, "mean_token_accuracy": 0.8534725308418274, "num_tokens": 1519207.0, "step": 163 }, { "epoch": 0.7645687645687645, "grad_norm": 2.027410237379273, "learning_rate": 9.380763208640809e-06, "loss": 0.5197, "mean_token_accuracy": 0.8590981066226959, "num_tokens": 1528808.0, "step": 164 }, { "epoch": 0.7692307692307693, "grad_norm": 2.008086929554116, "learning_rate": 9.368366997366027e-06, "loss": 0.5071, "mean_token_accuracy": 0.8610014617443085, "num_tokens": 1537765.0, "step": 165 }, { "epoch": 0.7738927738927739, "grad_norm": 2.1365369175139763, "learning_rate": 9.355857292754152e-06, "loss": 0.5299, "mean_token_accuracy": 0.8569334447383881, "num_tokens": 1547822.0, "step": 166 }, { "epoch": 0.7785547785547785, "grad_norm": 1.8545592775377633, "learning_rate": 9.343234461825204e-06, "loss": 0.4791, "mean_token_accuracy": 0.869120866060257, "num_tokens": 1557620.0, "step": 167 }, { "epoch": 0.7832167832167832, "grad_norm": 2.028039178942425, "learning_rate": 9.330498874918191e-06, "loss": 0.5106, "mean_token_accuracy": 0.8565393388271332, "num_tokens": 1567111.0, "step": 168 }, { "epoch": 0.7878787878787878, "grad_norm": 2.0701117186586817, "learning_rate": 9.317650905680254e-06, "loss": 0.4913, "mean_token_accuracy": 0.8696520030498505, "num_tokens": 1575836.0, "step": 169 }, { "epoch": 0.7925407925407926, "grad_norm": 2.1194332133731235, "learning_rate": 9.304690931055694e-06, "loss": 0.5311, "mean_token_accuracy": 0.8621737062931061, "num_tokens": 1584822.0, "step": 170 }, { "epoch": 0.7972027972027972, "grad_norm": 2.1282940280365747, "learning_rate": 9.29161933127492e-06, "loss": 0.5601, "mean_token_accuracy": 0.8475571274757385, "num_tokens": 1594428.0, "step": 171 }, { "epoch": 0.8018648018648019, "grad_norm": 2.0641332133798937, "learning_rate": 9.278436489843298e-06, "loss": 0.5373, "mean_token_accuracy": 0.8547400534152985, "num_tokens": 1603786.0, "step": 172 }, { "epoch": 0.8065268065268065, "grad_norm": 2.0160796623607804, "learning_rate": 9.265142793529883e-06, "loss": 0.4971, "mean_token_accuracy": 0.8660332560539246, "num_tokens": 1612747.0, "step": 173 }, { "epoch": 0.8111888111888111, "grad_norm": 1.9129699635418105, "learning_rate": 9.251738632356086e-06, "loss": 0.4926, "mean_token_accuracy": 0.869436115026474, "num_tokens": 1621819.0, "step": 174 }, { "epoch": 0.8158508158508159, "grad_norm": 2.3884591059212537, "learning_rate": 9.238224399584232e-06, "loss": 0.5476, "mean_token_accuracy": 0.8577151894569397, "num_tokens": 1631685.0, "step": 175 }, { "epoch": 0.8205128205128205, "grad_norm": 1.8831561540253134, "learning_rate": 9.224600491706009e-06, "loss": 0.4786, "mean_token_accuracy": 0.8686897456645966, "num_tokens": 1640794.0, "step": 176 }, { "epoch": 0.8251748251748252, "grad_norm": 2.0432546539992247, "learning_rate": 9.210867308430847e-06, "loss": 0.5173, "mean_token_accuracy": 0.8568342328071594, "num_tokens": 1650186.0, "step": 177 }, { "epoch": 0.8298368298368298, "grad_norm": 2.0098051546686357, "learning_rate": 9.197025252674192e-06, "loss": 0.5181, "mean_token_accuracy": 0.8594348132610321, "num_tokens": 1659577.0, "step": 178 }, { "epoch": 0.8344988344988346, "grad_norm": 2.0448629279973214, "learning_rate": 9.183074730545674e-06, "loss": 0.521, "mean_token_accuracy": 0.8540050983428955, "num_tokens": 1669821.0, "step": 179 }, { "epoch": 0.8391608391608392, "grad_norm": 2.139730175214238, "learning_rate": 9.169016151337202e-06, "loss": 0.5651, "mean_token_accuracy": 0.8448547720909119, "num_tokens": 1679579.0, "step": 180 }, { "epoch": 0.8438228438228438, "grad_norm": 2.0464895767510742, "learning_rate": 9.15484992751095e-06, "loss": 0.5221, "mean_token_accuracy": 0.8626176416873932, "num_tokens": 1688922.0, "step": 181 }, { "epoch": 0.8484848484848485, "grad_norm": 1.9954837428115382, "learning_rate": 9.140576474687263e-06, "loss": 0.5279, "mean_token_accuracy": 0.8613512217998505, "num_tokens": 1697949.0, "step": 182 }, { "epoch": 0.8531468531468531, "grad_norm": 1.9567685319451251, "learning_rate": 9.126196211632456e-06, "loss": 0.5274, "mean_token_accuracy": 0.8570147156715393, "num_tokens": 1708365.0, "step": 183 }, { "epoch": 0.8578088578088578, "grad_norm": 1.9210555989780966, "learning_rate": 9.11170956024653e-06, "loss": 0.5269, "mean_token_accuracy": 0.8548833727836609, "num_tokens": 1717340.0, "step": 184 }, { "epoch": 0.8624708624708625, "grad_norm": 1.9318021919862647, "learning_rate": 9.097116945550794e-06, "loss": 0.5359, "mean_token_accuracy": 0.8578689694404602, "num_tokens": 1727305.0, "step": 185 }, { "epoch": 0.8671328671328671, "grad_norm": 2.026417344807131, "learning_rate": 9.082418795675397e-06, "loss": 0.5617, "mean_token_accuracy": 0.8492590188980103, "num_tokens": 1737093.0, "step": 186 }, { "epoch": 0.8717948717948718, "grad_norm": 1.8991450738896973, "learning_rate": 9.067615541846768e-06, "loss": 0.5235, "mean_token_accuracy": 0.859586089849472, "num_tokens": 1746772.0, "step": 187 }, { "epoch": 0.8764568764568764, "grad_norm": 1.9522407091267568, "learning_rate": 9.052707618374958e-06, "loss": 0.4765, "mean_token_accuracy": 0.8663023710250854, "num_tokens": 1756307.0, "step": 188 }, { "epoch": 0.8811188811188811, "grad_norm": 2.048727988149695, "learning_rate": 9.037695462640908e-06, "loss": 0.5331, "mean_token_accuracy": 0.8549124300479889, "num_tokens": 1766010.0, "step": 189 }, { "epoch": 0.8857808857808858, "grad_norm": 1.9934806378380259, "learning_rate": 9.022579515083601e-06, "loss": 0.5421, "mean_token_accuracy": 0.8553925156593323, "num_tokens": 1774885.0, "step": 190 }, { "epoch": 0.8904428904428905, "grad_norm": 2.0689376168576286, "learning_rate": 9.007360219187163e-06, "loss": 0.514, "mean_token_accuracy": 0.8535875976085663, "num_tokens": 1785248.0, "step": 191 }, { "epoch": 0.8951048951048951, "grad_norm": 2.009916861001166, "learning_rate": 8.99203802146783e-06, "loss": 0.497, "mean_token_accuracy": 0.8642706871032715, "num_tokens": 1794525.0, "step": 192 }, { "epoch": 0.8997668997668997, "grad_norm": 2.0465375766970277, "learning_rate": 8.976613371460856e-06, "loss": 0.5271, "mean_token_accuracy": 0.8549227714538574, "num_tokens": 1804413.0, "step": 193 }, { "epoch": 0.9044289044289044, "grad_norm": 1.917310132189426, "learning_rate": 8.961086721707331e-06, "loss": 0.4938, "mean_token_accuracy": 0.8640461564064026, "num_tokens": 1813695.0, "step": 194 }, { "epoch": 0.9090909090909091, "grad_norm": 1.915744851468885, "learning_rate": 8.945458527740892e-06, "loss": 0.4707, "mean_token_accuracy": 0.8734670579433441, "num_tokens": 1823851.0, "step": 195 }, { "epoch": 0.9137529137529138, "grad_norm": 1.9274655587588858, "learning_rate": 8.929729248074364e-06, "loss": 0.5021, "mean_token_accuracy": 0.8575229346752167, "num_tokens": 1833828.0, "step": 196 }, { "epoch": 0.9184149184149184, "grad_norm": 2.026284467899399, "learning_rate": 8.913899344186312e-06, "loss": 0.5287, "mean_token_accuracy": 0.8602744936943054, "num_tokens": 1842998.0, "step": 197 }, { "epoch": 0.9230769230769231, "grad_norm": 2.05400953187219, "learning_rate": 8.897969280507494e-06, "loss": 0.5324, "mean_token_accuracy": 0.862190306186676, "num_tokens": 1851919.0, "step": 198 }, { "epoch": 0.9277389277389277, "grad_norm": 2.0766187536409078, "learning_rate": 8.881939524407238e-06, "loss": 0.5412, "mean_token_accuracy": 0.8545754849910736, "num_tokens": 1860906.0, "step": 199 }, { "epoch": 0.9324009324009324, "grad_norm": 1.989815369304169, "learning_rate": 8.86581054617973e-06, "loss": 0.5453, "mean_token_accuracy": 0.8528104722499847, "num_tokens": 1870470.0, "step": 200 }, { "epoch": 0.9370629370629371, "grad_norm": 2.088847720398196, "learning_rate": 8.849582819030217e-06, "loss": 0.5432, "mean_token_accuracy": 0.8563026189804077, "num_tokens": 1879984.0, "step": 201 }, { "epoch": 0.9417249417249417, "grad_norm": 2.078290662100905, "learning_rate": 8.833256819061126e-06, "loss": 0.4983, "mean_token_accuracy": 0.8666514456272125, "num_tokens": 1889348.0, "step": 202 }, { "epoch": 0.9463869463869464, "grad_norm": 1.9223822960306767, "learning_rate": 8.81683302525809e-06, "loss": 0.5411, "mean_token_accuracy": 0.8597702980041504, "num_tokens": 1898543.0, "step": 203 }, { "epoch": 0.951048951048951, "grad_norm": 1.8855235649147306, "learning_rate": 8.800311919475902e-06, "loss": 0.5014, "mean_token_accuracy": 0.8621995449066162, "num_tokens": 1908052.0, "step": 204 }, { "epoch": 0.9557109557109557, "grad_norm": 1.9276050868545773, "learning_rate": 8.783693986424365e-06, "loss": 0.504, "mean_token_accuracy": 0.8629100322723389, "num_tokens": 1917592.0, "step": 205 }, { "epoch": 0.9603729603729604, "grad_norm": 2.143001225129588, "learning_rate": 8.76697971365409e-06, "loss": 0.5387, "mean_token_accuracy": 0.8589153587818146, "num_tokens": 1927404.0, "step": 206 }, { "epoch": 0.965034965034965, "grad_norm": 2.1777112033624557, "learning_rate": 8.750169591542177e-06, "loss": 0.5252, "mean_token_accuracy": 0.8522741794586182, "num_tokens": 1936273.0, "step": 207 }, { "epoch": 0.9696969696969697, "grad_norm": 1.91067041727946, "learning_rate": 8.733264113277832e-06, "loss": 0.4991, "mean_token_accuracy": 0.8641117215156555, "num_tokens": 1944830.0, "step": 208 }, { "epoch": 0.9743589743589743, "grad_norm": 2.004004655149868, "learning_rate": 8.716263774847902e-06, "loss": 0.5276, "mean_token_accuracy": 0.8581711649894714, "num_tokens": 1954306.0, "step": 209 }, { "epoch": 0.9790209790209791, "grad_norm": 1.968917380031634, "learning_rate": 8.69916907502232e-06, "loss": 0.5138, "mean_token_accuracy": 0.8624255359172821, "num_tokens": 1964061.0, "step": 210 }, { "epoch": 0.9836829836829837, "grad_norm": 1.9939460274690515, "learning_rate": 8.681980515339464e-06, "loss": 0.4816, "mean_token_accuracy": 0.8704831600189209, "num_tokens": 1973286.0, "step": 211 }, { "epoch": 0.9883449883449883, "grad_norm": 2.057525517897735, "learning_rate": 8.664698600091462e-06, "loss": 0.5221, "mean_token_accuracy": 0.8580233752727509, "num_tokens": 1983373.0, "step": 212 }, { "epoch": 0.993006993006993, "grad_norm": 1.79381112298063, "learning_rate": 8.64732383630937e-06, "loss": 0.4376, "mean_token_accuracy": 0.8793376386165619, "num_tokens": 1992207.0, "step": 213 }, { "epoch": 0.9976689976689976, "grad_norm": 1.933384153410609, "learning_rate": 8.629856733748325e-06, "loss": 0.505, "mean_token_accuracy": 0.85846146941185, "num_tokens": 2001400.0, "step": 214 }, { "epoch": 1.0, "grad_norm": 1.933384153410609, "learning_rate": 8.612297804872562e-06, "loss": 0.4925, "mean_token_accuracy": 0.8816215991973877, "num_tokens": 2003256.0, "step": 215 }, { "epoch": 1.0046620046620047, "grad_norm": 2.819257871473016, "learning_rate": 8.594647564840407e-06, "loss": 0.3933, "mean_token_accuracy": 0.8927328288555145, "num_tokens": 2012460.0, "step": 216 }, { "epoch": 1.0093240093240092, "grad_norm": 1.7635713882513158, "learning_rate": 8.57690653148913e-06, "loss": 0.392, "mean_token_accuracy": 0.887998104095459, "num_tokens": 2022109.0, "step": 217 }, { "epoch": 1.013986013986014, "grad_norm": 1.7892473419907957, "learning_rate": 8.559075225319786e-06, "loss": 0.3855, "mean_token_accuracy": 0.8922514021396637, "num_tokens": 2030710.0, "step": 218 }, { "epoch": 1.0186480186480187, "grad_norm": 1.6823650842914313, "learning_rate": 8.54115416948192e-06, "loss": 0.356, "mean_token_accuracy": 0.9053717851638794, "num_tokens": 2039777.0, "step": 219 }, { "epoch": 1.0233100233100234, "grad_norm": 1.939934426049098, "learning_rate": 8.523143889758228e-06, "loss": 0.3694, "mean_token_accuracy": 0.8991726338863373, "num_tokens": 2049184.0, "step": 220 }, { "epoch": 1.027972027972028, "grad_norm": 1.752148060977119, "learning_rate": 8.505044914549131e-06, "loss": 0.39, "mean_token_accuracy": 0.8928222954273224, "num_tokens": 2058845.0, "step": 221 }, { "epoch": 1.0326340326340326, "grad_norm": 2.1158285136058033, "learning_rate": 8.48685777485727e-06, "loss": 0.3596, "mean_token_accuracy": 0.8957322537899017, "num_tokens": 2067868.0, "step": 222 }, { "epoch": 1.0372960372960374, "grad_norm": 2.1547545690668026, "learning_rate": 8.46858300427193e-06, "loss": 0.3831, "mean_token_accuracy": 0.8963182866573334, "num_tokens": 2077258.0, "step": 223 }, { "epoch": 1.0419580419580419, "grad_norm": 1.9477915170139626, "learning_rate": 8.450221138953383e-06, "loss": 0.3642, "mean_token_accuracy": 0.8969131708145142, "num_tokens": 2086219.0, "step": 224 }, { "epoch": 1.0466200466200466, "grad_norm": 1.9674246406742486, "learning_rate": 8.431772717617154e-06, "loss": 0.348, "mean_token_accuracy": 0.9007111489772797, "num_tokens": 2095430.0, "step": 225 }, { "epoch": 1.0512820512820513, "grad_norm": 2.0055164195358235, "learning_rate": 8.413238281518225e-06, "loss": 0.3575, "mean_token_accuracy": 0.8980507254600525, "num_tokens": 2104162.0, "step": 226 }, { "epoch": 1.055944055944056, "grad_norm": 1.989341084975444, "learning_rate": 8.394618374435148e-06, "loss": 0.3634, "mean_token_accuracy": 0.8973170518875122, "num_tokens": 2114619.0, "step": 227 }, { "epoch": 1.0606060606060606, "grad_norm": 2.1674333688477074, "learning_rate": 8.375913542654093e-06, "loss": 0.3665, "mean_token_accuracy": 0.8995259404182434, "num_tokens": 2124605.0, "step": 228 }, { "epoch": 1.0652680652680653, "grad_norm": 2.0014110756016703, "learning_rate": 8.357124334952818e-06, "loss": 0.3884, "mean_token_accuracy": 0.889716625213623, "num_tokens": 2133888.0, "step": 229 }, { "epoch": 1.06993006993007, "grad_norm": 1.9633860039478925, "learning_rate": 8.33825130258458e-06, "loss": 0.3578, "mean_token_accuracy": 0.8981423377990723, "num_tokens": 2142444.0, "step": 230 }, { "epoch": 1.0745920745920745, "grad_norm": 2.013885901701478, "learning_rate": 8.319294999261941e-06, "loss": 0.335, "mean_token_accuracy": 0.9031325578689575, "num_tokens": 2151447.0, "step": 231 }, { "epoch": 1.0792540792540792, "grad_norm": 1.8717843659595645, "learning_rate": 8.300255981140544e-06, "loss": 0.3334, "mean_token_accuracy": 0.905355840921402, "num_tokens": 2161278.0, "step": 232 }, { "epoch": 1.083916083916084, "grad_norm": 1.8282559453848013, "learning_rate": 8.281134806802783e-06, "loss": 0.3789, "mean_token_accuracy": 0.8943277895450592, "num_tokens": 2170704.0, "step": 233 }, { "epoch": 1.0885780885780885, "grad_norm": 2.0761431582961705, "learning_rate": 8.261932037241418e-06, "loss": 0.3726, "mean_token_accuracy": 0.8970981240272522, "num_tokens": 2180907.0, "step": 234 }, { "epoch": 1.0932400932400932, "grad_norm": 1.8858855419887754, "learning_rate": 8.242648235843123e-06, "loss": 0.3135, "mean_token_accuracy": 0.9130119383335114, "num_tokens": 2190790.0, "step": 235 }, { "epoch": 1.097902097902098, "grad_norm": 1.8790292725099809, "learning_rate": 8.223283968371945e-06, "loss": 0.3485, "mean_token_accuracy": 0.904576301574707, "num_tokens": 2199657.0, "step": 236 }, { "epoch": 1.1025641025641026, "grad_norm": 1.9939775129861892, "learning_rate": 8.203839802952708e-06, "loss": 0.3518, "mean_token_accuracy": 0.9006724953651428, "num_tokens": 2209030.0, "step": 237 }, { "epoch": 1.1072261072261071, "grad_norm": 2.318895943210041, "learning_rate": 8.184316310054355e-06, "loss": 0.3702, "mean_token_accuracy": 0.8964027166366577, "num_tokens": 2218265.0, "step": 238 }, { "epoch": 1.1118881118881119, "grad_norm": 2.1196825455595003, "learning_rate": 8.164714062473201e-06, "loss": 0.3995, "mean_token_accuracy": 0.889014482498169, "num_tokens": 2227845.0, "step": 239 }, { "epoch": 1.1165501165501166, "grad_norm": 2.1570886496790247, "learning_rate": 8.14503363531613e-06, "loss": 0.367, "mean_token_accuracy": 0.900328516960144, "num_tokens": 2237505.0, "step": 240 }, { "epoch": 1.121212121212121, "grad_norm": 1.892665235627306, "learning_rate": 8.125275605983725e-06, "loss": 0.3458, "mean_token_accuracy": 0.9030775427818298, "num_tokens": 2246730.0, "step": 241 }, { "epoch": 1.1258741258741258, "grad_norm": 1.848983532276759, "learning_rate": 8.10544055415332e-06, "loss": 0.3527, "mean_token_accuracy": 0.9045140743255615, "num_tokens": 2255845.0, "step": 242 }, { "epoch": 1.1305361305361306, "grad_norm": 1.919023337141047, "learning_rate": 8.085529061762007e-06, "loss": 0.3663, "mean_token_accuracy": 0.8982928693294525, "num_tokens": 2264305.0, "step": 243 }, { "epoch": 1.1351981351981353, "grad_norm": 1.8321927781965666, "learning_rate": 8.065541712989546e-06, "loss": 0.337, "mean_token_accuracy": 0.9019491672515869, "num_tokens": 2273361.0, "step": 244 }, { "epoch": 1.1398601398601398, "grad_norm": 2.0269533829700013, "learning_rate": 8.04547909424124e-06, "loss": 0.3614, "mean_token_accuracy": 0.9004016220569611, "num_tokens": 2283178.0, "step": 245 }, { "epoch": 1.1445221445221445, "grad_norm": 1.9130074634196965, "learning_rate": 8.025341794130722e-06, "loss": 0.3482, "mean_token_accuracy": 0.9014346897602081, "num_tokens": 2292865.0, "step": 246 }, { "epoch": 1.1491841491841492, "grad_norm": 1.8547690304895703, "learning_rate": 8.005130403462687e-06, "loss": 0.3427, "mean_token_accuracy": 0.9018321931362152, "num_tokens": 2302310.0, "step": 247 }, { "epoch": 1.1538461538461537, "grad_norm": 1.8535777462460932, "learning_rate": 7.98484551521556e-06, "loss": 0.3529, "mean_token_accuracy": 0.9022264182567596, "num_tokens": 2312189.0, "step": 248 }, { "epoch": 1.1585081585081585, "grad_norm": 2.435521274717378, "learning_rate": 7.964487724524105e-06, "loss": 0.3926, "mean_token_accuracy": 0.8963950574398041, "num_tokens": 2321143.0, "step": 249 }, { "epoch": 1.1631701631701632, "grad_norm": 2.3200358499575375, "learning_rate": 7.944057628661948e-06, "loss": 0.3675, "mean_token_accuracy": 0.8951332271099091, "num_tokens": 2330428.0, "step": 250 }, { "epoch": 1.167832167832168, "grad_norm": 2.033123158008929, "learning_rate": 7.923555827024069e-06, "loss": 0.3621, "mean_token_accuracy": 0.8988972008228302, "num_tokens": 2339659.0, "step": 251 }, { "epoch": 1.1724941724941724, "grad_norm": 1.9890967367444383, "learning_rate": 7.902982921109215e-06, "loss": 0.3734, "mean_token_accuracy": 0.8945567905902863, "num_tokens": 2348827.0, "step": 252 }, { "epoch": 1.1771561771561772, "grad_norm": 1.9463270812100881, "learning_rate": 7.882339514502236e-06, "loss": 0.3334, "mean_token_accuracy": 0.9068220555782318, "num_tokens": 2359744.0, "step": 253 }, { "epoch": 1.1818181818181819, "grad_norm": 1.8550101013313145, "learning_rate": 7.861626212856404e-06, "loss": 0.3528, "mean_token_accuracy": 0.9038570523262024, "num_tokens": 2368857.0, "step": 254 }, { "epoch": 1.1864801864801864, "grad_norm": 1.8873717371173877, "learning_rate": 7.840843623875621e-06, "loss": 0.3496, "mean_token_accuracy": 0.9028986990451813, "num_tokens": 2378219.0, "step": 255 }, { "epoch": 1.191142191142191, "grad_norm": 1.9979141390235062, "learning_rate": 7.8199923572966e-06, "loss": 0.3789, "mean_token_accuracy": 0.8938669860363007, "num_tokens": 2387549.0, "step": 256 }, { "epoch": 1.1958041958041958, "grad_norm": 2.016685234805154, "learning_rate": 7.799073024870972e-06, "loss": 0.3611, "mean_token_accuracy": 0.8983322978019714, "num_tokens": 2397564.0, "step": 257 }, { "epoch": 1.2004662004662006, "grad_norm": 1.9415097190450272, "learning_rate": 7.778086240347343e-06, "loss": 0.3468, "mean_token_accuracy": 0.9009680449962616, "num_tokens": 2406482.0, "step": 258 }, { "epoch": 1.205128205128205, "grad_norm": 2.2166846370519435, "learning_rate": 7.757032619453285e-06, "loss": 0.3436, "mean_token_accuracy": 0.9036185145378113, "num_tokens": 2415453.0, "step": 259 }, { "epoch": 1.2097902097902098, "grad_norm": 1.7953859731976027, "learning_rate": 7.735912779877266e-06, "loss": 0.3557, "mean_token_accuracy": 0.9016382992267609, "num_tokens": 2424373.0, "step": 260 }, { "epoch": 1.2144522144522145, "grad_norm": 2.0224193055579796, "learning_rate": 7.714727341250533e-06, "loss": 0.3206, "mean_token_accuracy": 0.910454124212265, "num_tokens": 2433081.0, "step": 261 }, { "epoch": 1.219114219114219, "grad_norm": 2.105326719368714, "learning_rate": 7.693476925128937e-06, "loss": 0.4086, "mean_token_accuracy": 0.8892490267753601, "num_tokens": 2442248.0, "step": 262 }, { "epoch": 1.2237762237762237, "grad_norm": 2.054961145659142, "learning_rate": 7.672162154974686e-06, "loss": 0.3334, "mean_token_accuracy": 0.9058608114719391, "num_tokens": 2451308.0, "step": 263 }, { "epoch": 1.2284382284382285, "grad_norm": 2.0190722916864448, "learning_rate": 7.650783656138065e-06, "loss": 0.3842, "mean_token_accuracy": 0.8919758200645447, "num_tokens": 2461071.0, "step": 264 }, { "epoch": 1.2331002331002332, "grad_norm": 2.0459573081488664, "learning_rate": 7.629342055839077e-06, "loss": 0.3466, "mean_token_accuracy": 0.9024505317211151, "num_tokens": 2471649.0, "step": 265 }, { "epoch": 1.2377622377622377, "grad_norm": 2.045255678828029, "learning_rate": 7.607837983149057e-06, "loss": 0.3902, "mean_token_accuracy": 0.8912324607372284, "num_tokens": 2481584.0, "step": 266 }, { "epoch": 1.2424242424242424, "grad_norm": 2.0728524474968553, "learning_rate": 7.586272068972196e-06, "loss": 0.4005, "mean_token_accuracy": 0.8886791169643402, "num_tokens": 2491498.0, "step": 267 }, { "epoch": 1.2470862470862472, "grad_norm": 1.9682634266288053, "learning_rate": 7.564644946027049e-06, "loss": 0.3578, "mean_token_accuracy": 0.9002366364002228, "num_tokens": 2501066.0, "step": 268 }, { "epoch": 1.2517482517482517, "grad_norm": 1.7696539616448115, "learning_rate": 7.5429572488279615e-06, "loss": 0.3167, "mean_token_accuracy": 0.9112659692764282, "num_tokens": 2510213.0, "step": 269 }, { "epoch": 1.2564102564102564, "grad_norm": 1.869201624827092, "learning_rate": 7.521209613666457e-06, "loss": 0.3298, "mean_token_accuracy": 0.9096185564994812, "num_tokens": 2518850.0, "step": 270 }, { "epoch": 1.2610722610722611, "grad_norm": 1.7896035852004597, "learning_rate": 7.499402678592568e-06, "loss": 0.3452, "mean_token_accuracy": 0.902194082736969, "num_tokens": 2528729.0, "step": 271 }, { "epoch": 1.2657342657342658, "grad_norm": 1.8578212430775984, "learning_rate": 7.477537083396114e-06, "loss": 0.3377, "mean_token_accuracy": 0.9030162990093231, "num_tokens": 2538576.0, "step": 272 }, { "epoch": 1.2703962703962703, "grad_norm": 1.8583558341062996, "learning_rate": 7.45561346958794e-06, "loss": 0.3327, "mean_token_accuracy": 0.9063755571842194, "num_tokens": 2548020.0, "step": 273 }, { "epoch": 1.275058275058275, "grad_norm": 1.945988010631059, "learning_rate": 7.433632480381083e-06, "loss": 0.3535, "mean_token_accuracy": 0.9046582877635956, "num_tokens": 2556984.0, "step": 274 }, { "epoch": 1.2797202797202798, "grad_norm": 2.0099202078958958, "learning_rate": 7.4115947606719105e-06, "loss": 0.3612, "mean_token_accuracy": 0.899190753698349, "num_tokens": 2566255.0, "step": 275 }, { "epoch": 1.2843822843822843, "grad_norm": 2.1032377215815, "learning_rate": 7.389500957021192e-06, "loss": 0.351, "mean_token_accuracy": 0.9027476012706757, "num_tokens": 2575331.0, "step": 276 }, { "epoch": 1.289044289044289, "grad_norm": 2.0973543646568227, "learning_rate": 7.367351717635136e-06, "loss": 0.3561, "mean_token_accuracy": 0.899603396654129, "num_tokens": 2584609.0, "step": 277 }, { "epoch": 1.2937062937062938, "grad_norm": 1.9524574422408152, "learning_rate": 7.345147692346373e-06, "loss": 0.3621, "mean_token_accuracy": 0.8965962827205658, "num_tokens": 2593822.0, "step": 278 }, { "epoch": 1.2983682983682985, "grad_norm": 1.8684420517312623, "learning_rate": 7.3228895325948835e-06, "loss": 0.3404, "mean_token_accuracy": 0.9057947397232056, "num_tokens": 2602990.0, "step": 279 }, { "epoch": 1.303030303030303, "grad_norm": 1.8423583661355911, "learning_rate": 7.3005778914088895e-06, "loss": 0.3181, "mean_token_accuracy": 0.9117348790168762, "num_tokens": 2612564.0, "step": 280 }, { "epoch": 1.3076923076923077, "grad_norm": 1.8719988018492713, "learning_rate": 7.278213423385701e-06, "loss": 0.3633, "mean_token_accuracy": 0.8996273577213287, "num_tokens": 2621342.0, "step": 281 }, { "epoch": 1.3123543123543124, "grad_norm": 2.097992632063848, "learning_rate": 7.255796784672496e-06, "loss": 0.3772, "mean_token_accuracy": 0.8940203785896301, "num_tokens": 2631008.0, "step": 282 }, { "epoch": 1.317016317016317, "grad_norm": 2.1962950013308147, "learning_rate": 7.233328632947087e-06, "loss": 0.3176, "mean_token_accuracy": 0.9080378413200378, "num_tokens": 2640269.0, "step": 283 }, { "epoch": 1.3216783216783217, "grad_norm": 1.8870826341467308, "learning_rate": 7.210809627398615e-06, "loss": 0.4153, "mean_token_accuracy": 0.8845455944538116, "num_tokens": 2650339.0, "step": 284 }, { "epoch": 1.3263403263403264, "grad_norm": 2.4502915111296306, "learning_rate": 7.188240428708211e-06, "loss": 0.3789, "mean_token_accuracy": 0.8997355103492737, "num_tokens": 2659574.0, "step": 285 }, { "epoch": 1.3310023310023311, "grad_norm": 1.8548118165118153, "learning_rate": 7.165621699029615e-06, "loss": 0.3484, "mean_token_accuracy": 0.9010252058506012, "num_tokens": 2670228.0, "step": 286 }, { "epoch": 1.3356643356643356, "grad_norm": 1.9980570026677085, "learning_rate": 7.1429541019697505e-06, "loss": 0.3404, "mean_token_accuracy": 0.9027169346809387, "num_tokens": 2679552.0, "step": 287 }, { "epoch": 1.3403263403263403, "grad_norm": 2.0041119431302143, "learning_rate": 7.120238302569245e-06, "loss": 0.3582, "mean_token_accuracy": 0.8998216986656189, "num_tokens": 2689039.0, "step": 288 }, { "epoch": 1.3449883449883449, "grad_norm": 2.1117609014334198, "learning_rate": 7.097474967282936e-06, "loss": 0.3538, "mean_token_accuracy": 0.9006753861904144, "num_tokens": 2698186.0, "step": 289 }, { "epoch": 1.3496503496503496, "grad_norm": 2.0486984888660893, "learning_rate": 7.0746647639602994e-06, "loss": 0.3735, "mean_token_accuracy": 0.8939989805221558, "num_tokens": 2707633.0, "step": 290 }, { "epoch": 1.3543123543123543, "grad_norm": 2.0849324489543966, "learning_rate": 7.051808361825867e-06, "loss": 0.3615, "mean_token_accuracy": 0.8953780829906464, "num_tokens": 2717211.0, "step": 291 }, { "epoch": 1.358974358974359, "grad_norm": 2.0186929363318846, "learning_rate": 7.028906431459593e-06, "loss": 0.3721, "mean_token_accuracy": 0.8980013132095337, "num_tokens": 2726028.0, "step": 292 }, { "epoch": 1.3636363636363638, "grad_norm": 2.1845476421129746, "learning_rate": 7.0059596447771714e-06, "loss": 0.3582, "mean_token_accuracy": 0.9010344445705414, "num_tokens": 2735300.0, "step": 293 }, { "epoch": 1.3682983682983683, "grad_norm": 1.895694935116521, "learning_rate": 6.982968675010332e-06, "loss": 0.3425, "mean_token_accuracy": 0.904049277305603, "num_tokens": 2745014.0, "step": 294 }, { "epoch": 1.372960372960373, "grad_norm": 2.1465561267774618, "learning_rate": 6.959934196687079e-06, "loss": 0.3973, "mean_token_accuracy": 0.8906912803649902, "num_tokens": 2754679.0, "step": 295 }, { "epoch": 1.3776223776223775, "grad_norm": 2.1936247573101584, "learning_rate": 6.93685688561191e-06, "loss": 0.3958, "mean_token_accuracy": 0.8900346755981445, "num_tokens": 2763778.0, "step": 296 }, { "epoch": 1.3822843822843822, "grad_norm": 2.2014931147614707, "learning_rate": 6.913737418845985e-06, "loss": 0.3484, "mean_token_accuracy": 0.8970995843410492, "num_tokens": 2772924.0, "step": 297 }, { "epoch": 1.386946386946387, "grad_norm": 1.9248709270571454, "learning_rate": 6.890576474687264e-06, "loss": 0.374, "mean_token_accuracy": 0.8932155966758728, "num_tokens": 2782527.0, "step": 298 }, { "epoch": 1.3916083916083917, "grad_norm": 2.127937137873515, "learning_rate": 6.8673747326506e-06, "loss": 0.4019, "mean_token_accuracy": 0.8896147608757019, "num_tokens": 2792249.0, "step": 299 }, { "epoch": 1.3962703962703964, "grad_norm": 2.2064832449632656, "learning_rate": 6.8441328734478115e-06, "loss": 0.3501, "mean_token_accuracy": 0.8990257680416107, "num_tokens": 2801881.0, "step": 300 }, { "epoch": 1.400932400932401, "grad_norm": 1.8573619457430928, "learning_rate": 6.820851578967708e-06, "loss": 0.3872, "mean_token_accuracy": 0.8920771181583405, "num_tokens": 2811478.0, "step": 301 }, { "epoch": 1.4055944055944056, "grad_norm": 2.0254753288605225, "learning_rate": 6.797531532256079e-06, "loss": 0.3734, "mean_token_accuracy": 0.8915270268917084, "num_tokens": 2821393.0, "step": 302 }, { "epoch": 1.4102564102564101, "grad_norm": 2.0305791181245927, "learning_rate": 6.774173417495667e-06, "loss": 0.3913, "mean_token_accuracy": 0.8924152255058289, "num_tokens": 2830925.0, "step": 303 }, { "epoch": 1.4149184149184149, "grad_norm": 2.004426882378005, "learning_rate": 6.750777919986075e-06, "loss": 0.3446, "mean_token_accuracy": 0.900405764579773, "num_tokens": 2840991.0, "step": 304 }, { "epoch": 1.4195804195804196, "grad_norm": 1.888471422341293, "learning_rate": 6.727345726123684e-06, "loss": 0.4219, "mean_token_accuracy": 0.8845990300178528, "num_tokens": 2850900.0, "step": 305 }, { "epoch": 1.4242424242424243, "grad_norm": 2.3906699897573067, "learning_rate": 6.703877523381495e-06, "loss": 0.339, "mean_token_accuracy": 0.90447598695755, "num_tokens": 2860353.0, "step": 306 }, { "epoch": 1.428904428904429, "grad_norm": 1.84404450023143, "learning_rate": 6.680374000288968e-06, "loss": 0.3976, "mean_token_accuracy": 0.8878736197948456, "num_tokens": 2868792.0, "step": 307 }, { "epoch": 1.4335664335664335, "grad_norm": 2.2669491579710908, "learning_rate": 6.656835846411824e-06, "loss": 0.3815, "mean_token_accuracy": 0.892831951379776, "num_tokens": 2877774.0, "step": 308 }, { "epoch": 1.4382284382284383, "grad_norm": 2.049722778481883, "learning_rate": 6.633263752331808e-06, "loss": 0.341, "mean_token_accuracy": 0.9073670506477356, "num_tokens": 2886388.0, "step": 309 }, { "epoch": 1.4428904428904428, "grad_norm": 1.9762730207593977, "learning_rate": 6.609658409626431e-06, "loss": 0.3336, "mean_token_accuracy": 0.9044366478919983, "num_tokens": 2896615.0, "step": 310 }, { "epoch": 1.4475524475524475, "grad_norm": 2.0368692579640504, "learning_rate": 6.586020510848676e-06, "loss": 0.3983, "mean_token_accuracy": 0.8900512158870697, "num_tokens": 2905800.0, "step": 311 }, { "epoch": 1.4522144522144522, "grad_norm": 2.2134931405904728, "learning_rate": 6.562350749506691e-06, "loss": 0.3625, "mean_token_accuracy": 0.8963052034378052, "num_tokens": 2914643.0, "step": 312 }, { "epoch": 1.456876456876457, "grad_norm": 2.0611362055815206, "learning_rate": 6.538649820043427e-06, "loss": 0.3273, "mean_token_accuracy": 0.9081335067749023, "num_tokens": 2924023.0, "step": 313 }, { "epoch": 1.4615384615384617, "grad_norm": 2.054315048122934, "learning_rate": 6.514918417816275e-06, "loss": 0.3644, "mean_token_accuracy": 0.8971601724624634, "num_tokens": 2933568.0, "step": 314 }, { "epoch": 1.4662004662004662, "grad_norm": 1.8809804843640063, "learning_rate": 6.4911572390766575e-06, "loss": 0.3577, "mean_token_accuracy": 0.9007624089717865, "num_tokens": 2943220.0, "step": 315 }, { "epoch": 1.470862470862471, "grad_norm": 2.2331671138955405, "learning_rate": 6.46736698094961e-06, "loss": 0.3637, "mean_token_accuracy": 0.8960618078708649, "num_tokens": 2953179.0, "step": 316 }, { "epoch": 1.4755244755244754, "grad_norm": 1.9954505430707832, "learning_rate": 6.443548341413316e-06, "loss": 0.3644, "mean_token_accuracy": 0.8977023661136627, "num_tokens": 2961793.0, "step": 317 }, { "epoch": 1.4801864801864801, "grad_norm": 1.9724699365395055, "learning_rate": 6.419702019278643e-06, "loss": 0.3758, "mean_token_accuracy": 0.8944825232028961, "num_tokens": 2971416.0, "step": 318 }, { "epoch": 1.4848484848484849, "grad_norm": 2.0154500749099387, "learning_rate": 6.3958287141686294e-06, "loss": 0.3699, "mean_token_accuracy": 0.8967875242233276, "num_tokens": 2980488.0, "step": 319 }, { "epoch": 1.4895104895104896, "grad_norm": 1.9968714801195095, "learning_rate": 6.371929126497963e-06, "loss": 0.3482, "mean_token_accuracy": 0.8994722068309784, "num_tokens": 2989801.0, "step": 320 }, { "epoch": 1.494172494172494, "grad_norm": 1.8686828557788273, "learning_rate": 6.348003957452433e-06, "loss": 0.3408, "mean_token_accuracy": 0.9057431817054749, "num_tokens": 2998497.0, "step": 321 }, { "epoch": 1.4988344988344988, "grad_norm": 1.961491939994152, "learning_rate": 6.324053908968353e-06, "loss": 0.3708, "mean_token_accuracy": 0.898443877696991, "num_tokens": 3008134.0, "step": 322 }, { "epoch": 1.5034965034965035, "grad_norm": 2.074784243247548, "learning_rate": 6.300079683711973e-06, "loss": 0.3431, "mean_token_accuracy": 0.9046167731285095, "num_tokens": 3017363.0, "step": 323 }, { "epoch": 1.508158508158508, "grad_norm": 2.0249158717108338, "learning_rate": 6.276081985058857e-06, "loss": 0.3313, "mean_token_accuracy": 0.9048315584659576, "num_tokens": 3026359.0, "step": 324 }, { "epoch": 1.5128205128205128, "grad_norm": 2.0275965811268506, "learning_rate": 6.2520615170732555e-06, "loss": 0.364, "mean_token_accuracy": 0.8976201117038727, "num_tokens": 3035205.0, "step": 325 }, { "epoch": 1.5174825174825175, "grad_norm": 2.1956087559799706, "learning_rate": 6.228018984487443e-06, "loss": 0.4091, "mean_token_accuracy": 0.8842986524105072, "num_tokens": 3044415.0, "step": 326 }, { "epoch": 1.5221445221445222, "grad_norm": 2.454246418757256, "learning_rate": 6.20395509268104e-06, "loss": 0.3733, "mean_token_accuracy": 0.8995526134967804, "num_tokens": 3053666.0, "step": 327 }, { "epoch": 1.526806526806527, "grad_norm": 2.1884276223847343, "learning_rate": 6.179870547660326e-06, "loss": 0.3684, "mean_token_accuracy": 0.8975951671600342, "num_tokens": 3063098.0, "step": 328 }, { "epoch": 1.5314685314685315, "grad_norm": 2.1597054844225747, "learning_rate": 6.15576605603752e-06, "loss": 0.3949, "mean_token_accuracy": 0.8913069069385529, "num_tokens": 3072433.0, "step": 329 }, { "epoch": 1.5361305361305362, "grad_norm": 2.113291542241382, "learning_rate": 6.13164232501005e-06, "loss": 0.3543, "mean_token_accuracy": 0.9012714624404907, "num_tokens": 3081854.0, "step": 330 }, { "epoch": 1.5407925407925407, "grad_norm": 1.9680970926751453, "learning_rate": 6.107500062339806e-06, "loss": 0.3338, "mean_token_accuracy": 0.906892865896225, "num_tokens": 3090890.0, "step": 331 }, { "epoch": 1.5454545454545454, "grad_norm": 1.9286059557341297, "learning_rate": 6.083339976332375e-06, "loss": 0.3927, "mean_token_accuracy": 0.8934414088726044, "num_tokens": 3100652.0, "step": 332 }, { "epoch": 1.5501165501165501, "grad_norm": 2.086637851483338, "learning_rate": 6.05916277581626e-06, "loss": 0.3607, "mean_token_accuracy": 0.8994470834732056, "num_tokens": 3109678.0, "step": 333 }, { "epoch": 1.5547785547785549, "grad_norm": 2.0191393522647414, "learning_rate": 6.034969170122079e-06, "loss": 0.3506, "mean_token_accuracy": 0.9034914076328278, "num_tokens": 3118426.0, "step": 334 }, { "epoch": 1.5594405594405596, "grad_norm": 2.0591466593650005, "learning_rate": 6.010759869061768e-06, "loss": 0.3887, "mean_token_accuracy": 0.8912419378757477, "num_tokens": 3128508.0, "step": 335 }, { "epoch": 1.564102564102564, "grad_norm": 1.9767624630895566, "learning_rate": 5.986535582907739e-06, "loss": 0.3392, "mean_token_accuracy": 0.9024395048618317, "num_tokens": 3138063.0, "step": 336 }, { "epoch": 1.5687645687645686, "grad_norm": 1.968183719788235, "learning_rate": 5.96229702237205e-06, "loss": 0.3617, "mean_token_accuracy": 0.8971932530403137, "num_tokens": 3147501.0, "step": 337 }, { "epoch": 1.5734265734265733, "grad_norm": 1.9372262604926125, "learning_rate": 5.938044898585555e-06, "loss": 0.3583, "mean_token_accuracy": 0.9022301137447357, "num_tokens": 3156896.0, "step": 338 }, { "epoch": 1.578088578088578, "grad_norm": 2.007232850048927, "learning_rate": 5.913779923077035e-06, "loss": 0.3512, "mean_token_accuracy": 0.8955269753932953, "num_tokens": 3166931.0, "step": 339 }, { "epoch": 1.5827505827505828, "grad_norm": 1.906142795024627, "learning_rate": 5.889502807752329e-06, "loss": 0.3772, "mean_token_accuracy": 0.8949233889579773, "num_tokens": 3176414.0, "step": 340 }, { "epoch": 1.5874125874125875, "grad_norm": 2.1876602349847154, "learning_rate": 5.865214264873441e-06, "loss": 0.4217, "mean_token_accuracy": 0.8859592080116272, "num_tokens": 3185570.0, "step": 341 }, { "epoch": 1.5920745920745922, "grad_norm": 2.036501182255171, "learning_rate": 5.840915007037648e-06, "loss": 0.3967, "mean_token_accuracy": 0.891197919845581, "num_tokens": 3194296.0, "step": 342 }, { "epoch": 1.5967365967365967, "grad_norm": 2.165455182761244, "learning_rate": 5.816605747156588e-06, "loss": 0.3858, "mean_token_accuracy": 0.8901144564151764, "num_tokens": 3204108.0, "step": 343 }, { "epoch": 1.6013986013986012, "grad_norm": 2.020010750738948, "learning_rate": 5.792287198435349e-06, "loss": 0.3321, "mean_token_accuracy": 0.9072842001914978, "num_tokens": 3213706.0, "step": 344 }, { "epoch": 1.606060606060606, "grad_norm": 1.8267767685145804, "learning_rate": 5.767960074351545e-06, "loss": 0.3166, "mean_token_accuracy": 0.9123204052448273, "num_tokens": 3223445.0, "step": 345 }, { "epoch": 1.6107226107226107, "grad_norm": 1.8249584358478421, "learning_rate": 5.74362508863438e-06, "loss": 0.3557, "mean_token_accuracy": 0.9038134515285492, "num_tokens": 3232428.0, "step": 346 }, { "epoch": 1.6153846153846154, "grad_norm": 2.031208096971931, "learning_rate": 5.719282955243705e-06, "loss": 0.3729, "mean_token_accuracy": 0.8925433158874512, "num_tokens": 3241575.0, "step": 347 }, { "epoch": 1.6200466200466201, "grad_norm": 2.1078269837893817, "learning_rate": 5.69493438834908e-06, "loss": 0.3927, "mean_token_accuracy": 0.89081871509552, "num_tokens": 3250577.0, "step": 348 }, { "epoch": 1.6247086247086249, "grad_norm": 2.053266844522503, "learning_rate": 5.670580102308816e-06, "loss": 0.3708, "mean_token_accuracy": 0.8976758718490601, "num_tokens": 3259305.0, "step": 349 }, { "epoch": 1.6293706293706294, "grad_norm": 2.2217560492827486, "learning_rate": 5.646220811649013e-06, "loss": 0.3631, "mean_token_accuracy": 0.9028047621250153, "num_tokens": 3268502.0, "step": 350 }, { "epoch": 1.6340326340326339, "grad_norm": 1.9993214416997105, "learning_rate": 5.6218572310426065e-06, "loss": 0.3751, "mean_token_accuracy": 0.8933521211147308, "num_tokens": 3276568.0, "step": 351 }, { "epoch": 1.6386946386946386, "grad_norm": 2.1734690923322115, "learning_rate": 5.59749007528839e-06, "loss": 0.3498, "mean_token_accuracy": 0.9007490277290344, "num_tokens": 3285881.0, "step": 352 }, { "epoch": 1.6433566433566433, "grad_norm": 1.98059234929291, "learning_rate": 5.573120059290047e-06, "loss": 0.3484, "mean_token_accuracy": 0.9028733670711517, "num_tokens": 3295490.0, "step": 353 }, { "epoch": 1.648018648018648, "grad_norm": 1.8606757870005566, "learning_rate": 5.5487478980351805e-06, "loss": 0.342, "mean_token_accuracy": 0.9035466313362122, "num_tokens": 3304645.0, "step": 354 }, { "epoch": 1.6526806526806528, "grad_norm": 2.0090495432050726, "learning_rate": 5.524374306574331e-06, "loss": 0.3516, "mean_token_accuracy": 0.9037186503410339, "num_tokens": 3313188.0, "step": 355 }, { "epoch": 1.6573426573426573, "grad_norm": 1.9688667212238595, "learning_rate": 5.500000000000001e-06, "loss": 0.3673, "mean_token_accuracy": 0.9001396596431732, "num_tokens": 3322514.0, "step": 356 }, { "epoch": 1.662004662004662, "grad_norm": 2.1361286046991568, "learning_rate": 5.47562569342567e-06, "loss": 0.3553, "mean_token_accuracy": 0.9002968966960907, "num_tokens": 3332207.0, "step": 357 }, { "epoch": 1.6666666666666665, "grad_norm": 1.9983765145158914, "learning_rate": 5.451252101964821e-06, "loss": 0.345, "mean_token_accuracy": 0.9027390778064728, "num_tokens": 3342114.0, "step": 358 }, { "epoch": 1.6713286713286712, "grad_norm": 2.221522630829983, "learning_rate": 5.426879940709956e-06, "loss": 0.4167, "mean_token_accuracy": 0.8860217034816742, "num_tokens": 3351663.0, "step": 359 }, { "epoch": 1.675990675990676, "grad_norm": 2.351737145138835, "learning_rate": 5.402509924711612e-06, "loss": 0.3787, "mean_token_accuracy": 0.8979602456092834, "num_tokens": 3360915.0, "step": 360 }, { "epoch": 1.6806526806526807, "grad_norm": 2.2067080103402366, "learning_rate": 5.378142768957396e-06, "loss": 0.3694, "mean_token_accuracy": 0.8943982720375061, "num_tokens": 3370401.0, "step": 361 }, { "epoch": 1.6853146853146854, "grad_norm": 2.2270263309806366, "learning_rate": 5.353779188350989e-06, "loss": 0.3982, "mean_token_accuracy": 0.8878339529037476, "num_tokens": 3380711.0, "step": 362 }, { "epoch": 1.68997668997669, "grad_norm": 1.8254660836200485, "learning_rate": 5.329419897691187e-06, "loss": 0.3084, "mean_token_accuracy": 0.9157176613807678, "num_tokens": 3390032.0, "step": 363 }, { "epoch": 1.6946386946386947, "grad_norm": 1.8835789154592308, "learning_rate": 5.305065611650921e-06, "loss": 0.3653, "mean_token_accuracy": 0.8960134088993073, "num_tokens": 3399395.0, "step": 364 }, { "epoch": 1.6993006993006992, "grad_norm": 1.9906870579116909, "learning_rate": 5.280717044756298e-06, "loss": 0.3656, "mean_token_accuracy": 0.8999285995960236, "num_tokens": 3408677.0, "step": 365 }, { "epoch": 1.7039627039627039, "grad_norm": 2.0749533505721245, "learning_rate": 5.256374911365621e-06, "loss": 0.3439, "mean_token_accuracy": 0.9051547646522522, "num_tokens": 3418478.0, "step": 366 }, { "epoch": 1.7086247086247086, "grad_norm": 1.8643996151955922, "learning_rate": 5.232039925648457e-06, "loss": 0.3449, "mean_token_accuracy": 0.9024885594844818, "num_tokens": 3427641.0, "step": 367 }, { "epoch": 1.7132867132867133, "grad_norm": 1.8625024767097098, "learning_rate": 5.207712801564652e-06, "loss": 0.3371, "mean_token_accuracy": 0.9041755497455597, "num_tokens": 3437250.0, "step": 368 }, { "epoch": 1.717948717948718, "grad_norm": 2.0436576954469823, "learning_rate": 5.1833942528434145e-06, "loss": 0.3721, "mean_token_accuracy": 0.8964725732803345, "num_tokens": 3446807.0, "step": 369 }, { "epoch": 1.7226107226107226, "grad_norm": 1.9839081705779613, "learning_rate": 5.159084992962354e-06, "loss": 0.3303, "mean_token_accuracy": 0.9086009562015533, "num_tokens": 3456306.0, "step": 370 }, { "epoch": 1.7272727272727273, "grad_norm": 2.0000086910401667, "learning_rate": 5.13478573512656e-06, "loss": 0.3667, "mean_token_accuracy": 0.8962165117263794, "num_tokens": 3466027.0, "step": 371 }, { "epoch": 1.7319347319347318, "grad_norm": 2.038609761919514, "learning_rate": 5.110497192247671e-06, "loss": 0.3434, "mean_token_accuracy": 0.9010106325149536, "num_tokens": 3475253.0, "step": 372 }, { "epoch": 1.7365967365967365, "grad_norm": 2.1715737014463894, "learning_rate": 5.086220076922965e-06, "loss": 0.3448, "mean_token_accuracy": 0.9069474339485168, "num_tokens": 3484178.0, "step": 373 }, { "epoch": 1.7412587412587412, "grad_norm": 1.867996028633842, "learning_rate": 5.061955101414448e-06, "loss": 0.3381, "mean_token_accuracy": 0.9080156981945038, "num_tokens": 3494001.0, "step": 374 }, { "epoch": 1.745920745920746, "grad_norm": 2.1685787201422837, "learning_rate": 5.0377029776279514e-06, "loss": 0.3693, "mean_token_accuracy": 0.902137279510498, "num_tokens": 3503472.0, "step": 375 }, { "epoch": 1.7505827505827507, "grad_norm": 2.054942165163828, "learning_rate": 5.013464417092263e-06, "loss": 0.3739, "mean_token_accuracy": 0.8999918103218079, "num_tokens": 3512786.0, "step": 376 }, { "epoch": 1.7552447552447552, "grad_norm": 2.0866190139943246, "learning_rate": 4.989240130938232e-06, "loss": 0.3298, "mean_token_accuracy": 0.9082687795162201, "num_tokens": 3522266.0, "step": 377 }, { "epoch": 1.75990675990676, "grad_norm": 1.9164366875851901, "learning_rate": 4.9650308298779215e-06, "loss": 0.3728, "mean_token_accuracy": 0.8957255184650421, "num_tokens": 3531741.0, "step": 378 }, { "epoch": 1.7645687645687644, "grad_norm": 2.1286294755388386, "learning_rate": 4.940837224183742e-06, "loss": 0.3469, "mean_token_accuracy": 0.9021869897842407, "num_tokens": 3541026.0, "step": 379 }, { "epoch": 1.7692307692307692, "grad_norm": 1.9795732637258567, "learning_rate": 4.916660023667627e-06, "loss": 0.389, "mean_token_accuracy": 0.8947827517986298, "num_tokens": 3550336.0, "step": 380 }, { "epoch": 1.7738927738927739, "grad_norm": 2.2291523062610845, "learning_rate": 4.892499937660195e-06, "loss": 0.3698, "mean_token_accuracy": 0.9012640714645386, "num_tokens": 3558944.0, "step": 381 }, { "epoch": 1.7785547785547786, "grad_norm": 2.142983628191579, "learning_rate": 4.8683576749899505e-06, "loss": 0.3583, "mean_token_accuracy": 0.896765947341919, "num_tokens": 3567747.0, "step": 382 }, { "epoch": 1.7832167832167833, "grad_norm": 2.1501200195656134, "learning_rate": 4.844233943962481e-06, "loss": 0.3476, "mean_token_accuracy": 0.9035030007362366, "num_tokens": 3576606.0, "step": 383 }, { "epoch": 1.7878787878787878, "grad_norm": 1.8885760684090536, "learning_rate": 4.820129452339676e-06, "loss": 0.3913, "mean_token_accuracy": 0.8892778158187866, "num_tokens": 3586451.0, "step": 384 }, { "epoch": 1.7925407925407926, "grad_norm": 2.3749625105377423, "learning_rate": 4.796044907318961e-06, "loss": 0.3493, "mean_token_accuracy": 0.9036368727684021, "num_tokens": 3595348.0, "step": 385 }, { "epoch": 1.797202797202797, "grad_norm": 2.1616552910281044, "learning_rate": 4.771981015512559e-06, "loss": 0.404, "mean_token_accuracy": 0.888810396194458, "num_tokens": 3605507.0, "step": 386 }, { "epoch": 1.8018648018648018, "grad_norm": 1.859001299709581, "learning_rate": 4.747938482926746e-06, "loss": 0.3422, "mean_token_accuracy": 0.9032579958438873, "num_tokens": 3615183.0, "step": 387 }, { "epoch": 1.8065268065268065, "grad_norm": 2.1152225374726337, "learning_rate": 4.723918014941144e-06, "loss": 0.4379, "mean_token_accuracy": 0.8827772438526154, "num_tokens": 3624158.0, "step": 388 }, { "epoch": 1.8111888111888113, "grad_norm": 2.149586435718605, "learning_rate": 4.69992031628803e-06, "loss": 0.3702, "mean_token_accuracy": 0.8978259861469269, "num_tokens": 3633033.0, "step": 389 }, { "epoch": 1.815850815850816, "grad_norm": 2.0006781263840843, "learning_rate": 4.675946091031648e-06, "loss": 0.3604, "mean_token_accuracy": 0.901467889547348, "num_tokens": 3642167.0, "step": 390 }, { "epoch": 1.8205128205128205, "grad_norm": 1.9865318354604984, "learning_rate": 4.65199604254757e-06, "loss": 0.3266, "mean_token_accuracy": 0.902495414018631, "num_tokens": 3651178.0, "step": 391 }, { "epoch": 1.8251748251748252, "grad_norm": 1.8174794166059944, "learning_rate": 4.628070873502038e-06, "loss": 0.3595, "mean_token_accuracy": 0.8980917930603027, "num_tokens": 3660853.0, "step": 392 }, { "epoch": 1.8298368298368297, "grad_norm": 1.8237936767860705, "learning_rate": 4.604171285831373e-06, "loss": 0.3247, "mean_token_accuracy": 0.9076565206050873, "num_tokens": 3671265.0, "step": 393 }, { "epoch": 1.8344988344988344, "grad_norm": 2.967734088138488, "learning_rate": 4.5802979807213585e-06, "loss": 0.3284, "mean_token_accuracy": 0.9076884090900421, "num_tokens": 3681935.0, "step": 394 }, { "epoch": 1.8391608391608392, "grad_norm": 1.7848473602660753, "learning_rate": 4.556451658586687e-06, "loss": 0.3327, "mean_token_accuracy": 0.9050241410732269, "num_tokens": 3690809.0, "step": 395 }, { "epoch": 1.843822843822844, "grad_norm": 2.132215578463288, "learning_rate": 4.532633019050392e-06, "loss": 0.3344, "mean_token_accuracy": 0.9007371068000793, "num_tokens": 3700254.0, "step": 396 }, { "epoch": 1.8484848484848486, "grad_norm": 1.802687602683309, "learning_rate": 4.508842760923344e-06, "loss": 0.3319, "mean_token_accuracy": 0.9053416550159454, "num_tokens": 3709572.0, "step": 397 }, { "epoch": 1.8531468531468531, "grad_norm": 2.141026355729347, "learning_rate": 4.4850815821837265e-06, "loss": 0.3686, "mean_token_accuracy": 0.8985418379306793, "num_tokens": 3718477.0, "step": 398 }, { "epoch": 1.8578088578088578, "grad_norm": 2.064794666005344, "learning_rate": 4.4613501799565755e-06, "loss": 0.3776, "mean_token_accuracy": 0.8916601240634918, "num_tokens": 3728055.0, "step": 399 }, { "epoch": 1.8624708624708624, "grad_norm": 2.3103287859074477, "learning_rate": 4.43764925049331e-06, "loss": 0.3689, "mean_token_accuracy": 0.8986863493919373, "num_tokens": 3736756.0, "step": 400 }, { "epoch": 1.867132867132867, "grad_norm": 2.1582853507409765, "learning_rate": 4.413979489151326e-06, "loss": 0.3664, "mean_token_accuracy": 0.8990582525730133, "num_tokens": 3746049.0, "step": 401 }, { "epoch": 1.8717948717948718, "grad_norm": 2.346758495250227, "learning_rate": 4.3903415903735725e-06, "loss": 0.3933, "mean_token_accuracy": 0.893925815820694, "num_tokens": 3755161.0, "step": 402 }, { "epoch": 1.8764568764568765, "grad_norm": 2.2276831775466626, "learning_rate": 4.366736247668194e-06, "loss": 0.3578, "mean_token_accuracy": 0.9008547365665436, "num_tokens": 3763950.0, "step": 403 }, { "epoch": 1.8811188811188813, "grad_norm": 2.005267203414508, "learning_rate": 4.343164153588176e-06, "loss": 0.3592, "mean_token_accuracy": 0.8986201584339142, "num_tokens": 3773509.0, "step": 404 }, { "epoch": 1.8857808857808858, "grad_norm": 2.07305174671755, "learning_rate": 4.3196259997110326e-06, "loss": 0.374, "mean_token_accuracy": 0.8963338732719421, "num_tokens": 3782734.0, "step": 405 }, { "epoch": 1.8904428904428905, "grad_norm": 1.8657634956210976, "learning_rate": 4.296122476618507e-06, "loss": 0.2938, "mean_token_accuracy": 0.9148504436016083, "num_tokens": 3792797.0, "step": 406 }, { "epoch": 1.895104895104895, "grad_norm": 1.7854646245845165, "learning_rate": 4.2726542738763185e-06, "loss": 0.3506, "mean_token_accuracy": 0.9033430516719818, "num_tokens": 3801566.0, "step": 407 }, { "epoch": 1.8997668997668997, "grad_norm": 2.1277604879204297, "learning_rate": 4.249222080013927e-06, "loss": 0.4041, "mean_token_accuracy": 0.8863241672515869, "num_tokens": 3810506.0, "step": 408 }, { "epoch": 1.9044289044289044, "grad_norm": 2.203068756546148, "learning_rate": 4.2258265825043365e-06, "loss": 0.3564, "mean_token_accuracy": 0.8995753824710846, "num_tokens": 3819472.0, "step": 409 }, { "epoch": 1.9090909090909092, "grad_norm": 1.9056766783093182, "learning_rate": 4.202468467743922e-06, "loss": 0.3437, "mean_token_accuracy": 0.9032955765724182, "num_tokens": 3828386.0, "step": 410 }, { "epoch": 1.913752913752914, "grad_norm": 2.0477422656146715, "learning_rate": 4.1791484210322945e-06, "loss": 0.3963, "mean_token_accuracy": 0.8910067081451416, "num_tokens": 3837876.0, "step": 411 }, { "epoch": 1.9184149184149184, "grad_norm": 2.003989704673306, "learning_rate": 4.15586712655219e-06, "loss": 0.3766, "mean_token_accuracy": 0.894858181476593, "num_tokens": 3847317.0, "step": 412 }, { "epoch": 1.9230769230769231, "grad_norm": 2.040339338795703, "learning_rate": 4.1326252673494006e-06, "loss": 0.3611, "mean_token_accuracy": 0.9015864133834839, "num_tokens": 3856516.0, "step": 413 }, { "epoch": 1.9277389277389276, "grad_norm": 1.9997790559031594, "learning_rate": 4.109423525312738e-06, "loss": 0.3524, "mean_token_accuracy": 0.9032379388809204, "num_tokens": 3866154.0, "step": 414 }, { "epoch": 1.9324009324009324, "grad_norm": 1.8004052846393808, "learning_rate": 4.086262581154015e-06, "loss": 0.3496, "mean_token_accuracy": 0.9037461578845978, "num_tokens": 3875591.0, "step": 415 }, { "epoch": 1.937062937062937, "grad_norm": 2.0740585166328422, "learning_rate": 4.0631431143880915e-06, "loss": 0.3397, "mean_token_accuracy": 0.9040902256965637, "num_tokens": 3885411.0, "step": 416 }, { "epoch": 1.9417249417249418, "grad_norm": 2.0199474623334037, "learning_rate": 4.040065803312921e-06, "loss": 0.3619, "mean_token_accuracy": 0.8977847695350647, "num_tokens": 3893582.0, "step": 417 }, { "epoch": 1.9463869463869465, "grad_norm": 1.9355091980822123, "learning_rate": 4.017031324989669e-06, "loss": 0.3382, "mean_token_accuracy": 0.9016455113887787, "num_tokens": 3903096.0, "step": 418 }, { "epoch": 1.951048951048951, "grad_norm": 2.0385295249371067, "learning_rate": 3.994040355222828e-06, "loss": 0.3558, "mean_token_accuracy": 0.899684876203537, "num_tokens": 3912289.0, "step": 419 }, { "epoch": 1.9557109557109555, "grad_norm": 2.1721644040047225, "learning_rate": 3.971093568540408e-06, "loss": 0.3711, "mean_token_accuracy": 0.893602579832077, "num_tokens": 3921342.0, "step": 420 }, { "epoch": 1.9603729603729603, "grad_norm": 2.173567052204643, "learning_rate": 3.948191638174135e-06, "loss": 0.3468, "mean_token_accuracy": 0.9026708602905273, "num_tokens": 3930927.0, "step": 421 }, { "epoch": 1.965034965034965, "grad_norm": 2.183705481115713, "learning_rate": 3.925335236039702e-06, "loss": 0.3842, "mean_token_accuracy": 0.8991383612155914, "num_tokens": 3939964.0, "step": 422 }, { "epoch": 1.9696969696969697, "grad_norm": 1.8559395167069694, "learning_rate": 3.902525032717067e-06, "loss": 0.3593, "mean_token_accuracy": 0.8971259593963623, "num_tokens": 3949279.0, "step": 423 }, { "epoch": 1.9743589743589745, "grad_norm": 1.9573191314090432, "learning_rate": 3.879761697430756e-06, "loss": 0.3379, "mean_token_accuracy": 0.9067609906196594, "num_tokens": 3957777.0, "step": 424 }, { "epoch": 1.9790209790209792, "grad_norm": 2.0757659987817423, "learning_rate": 3.8570458980302526e-06, "loss": 0.3598, "mean_token_accuracy": 0.8982405662536621, "num_tokens": 3967234.0, "step": 425 }, { "epoch": 1.9836829836829837, "grad_norm": 2.1061841538720314, "learning_rate": 3.834378300970385e-06, "loss": 0.3595, "mean_token_accuracy": 0.9008378386497498, "num_tokens": 3976342.0, "step": 426 }, { "epoch": 1.9883449883449882, "grad_norm": 2.012444035670508, "learning_rate": 3.811759571291792e-06, "loss": 0.3734, "mean_token_accuracy": 0.8925457000732422, "num_tokens": 3985787.0, "step": 427 }, { "epoch": 1.993006993006993, "grad_norm": 2.0766910903000557, "learning_rate": 3.789190372601387e-06, "loss": 0.3746, "mean_token_accuracy": 0.8980415463447571, "num_tokens": 3995313.0, "step": 428 }, { "epoch": 1.9976689976689976, "grad_norm": 2.3515299611822744, "learning_rate": 3.7666713670529153e-06, "loss": 0.3756, "mean_token_accuracy": 0.8946518898010254, "num_tokens": 4004696.0, "step": 429 }, { "epoch": 2.0, "grad_norm": 2.8965666457221966, "learning_rate": 3.7442032153275053e-06, "loss": 0.2816, "mean_token_accuracy": 0.9159619212150574, "num_tokens": 4006594.0, "step": 430 }, { "epoch": 2.0046620046620047, "grad_norm": 1.9140541750140387, "learning_rate": 3.7217865766143014e-06, "loss": 0.2339, "mean_token_accuracy": 0.9410516619682312, "num_tokens": 4016177.0, "step": 431 }, { "epoch": 2.0093240093240095, "grad_norm": 1.9667546418678483, "learning_rate": 3.6994221085911107e-06, "loss": 0.2227, "mean_token_accuracy": 0.9423635900020599, "num_tokens": 4025325.0, "step": 432 }, { "epoch": 2.013986013986014, "grad_norm": 1.9133395961990365, "learning_rate": 3.6771104674051184e-06, "loss": 0.2103, "mean_token_accuracy": 0.9473540186882019, "num_tokens": 4034580.0, "step": 433 }, { "epoch": 2.0186480186480185, "grad_norm": 2.010682976434144, "learning_rate": 3.654852307653628e-06, "loss": 0.2033, "mean_token_accuracy": 0.9448116421699524, "num_tokens": 4043927.0, "step": 434 }, { "epoch": 2.023310023310023, "grad_norm": 2.264710867597396, "learning_rate": 3.6326482823648656e-06, "loss": 0.1945, "mean_token_accuracy": 0.9458509981632233, "num_tokens": 4052748.0, "step": 435 }, { "epoch": 2.027972027972028, "grad_norm": 3.3055471689921445, "learning_rate": 3.6104990429788102e-06, "loss": 0.1809, "mean_token_accuracy": 0.9497792422771454, "num_tokens": 4062547.0, "step": 436 }, { "epoch": 2.0326340326340326, "grad_norm": 3.545040453235985, "learning_rate": 3.588405239328091e-06, "loss": 0.1825, "mean_token_accuracy": 0.9520241916179657, "num_tokens": 4072151.0, "step": 437 }, { "epoch": 2.0372960372960374, "grad_norm": 4.174805075882892, "learning_rate": 3.5663675196189184e-06, "loss": 0.2058, "mean_token_accuracy": 0.9453835189342499, "num_tokens": 4081740.0, "step": 438 }, { "epoch": 2.041958041958042, "grad_norm": 2.8755369630384786, "learning_rate": 3.5443865304120607e-06, "loss": 0.1838, "mean_token_accuracy": 0.9492034912109375, "num_tokens": 4090794.0, "step": 439 }, { "epoch": 2.046620046620047, "grad_norm": 2.7985718626593763, "learning_rate": 3.522462916603887e-06, "loss": 0.2009, "mean_token_accuracy": 0.9445820152759552, "num_tokens": 4099715.0, "step": 440 }, { "epoch": 2.051282051282051, "grad_norm": 2.307748489794371, "learning_rate": 3.500597321407435e-06, "loss": 0.1825, "mean_token_accuracy": 0.9495159089565277, "num_tokens": 4109312.0, "step": 441 }, { "epoch": 2.055944055944056, "grad_norm": 2.167026623774839, "learning_rate": 3.478790386333546e-06, "loss": 0.2046, "mean_token_accuracy": 0.9436551332473755, "num_tokens": 4118384.0, "step": 442 }, { "epoch": 2.0606060606060606, "grad_norm": 2.197047391901218, "learning_rate": 3.45704275117204e-06, "loss": 0.2117, "mean_token_accuracy": 0.9415621161460876, "num_tokens": 4127809.0, "step": 443 }, { "epoch": 2.0652680652680653, "grad_norm": 1.944924615630873, "learning_rate": 3.435355053972953e-06, "loss": 0.1827, "mean_token_accuracy": 0.9510793387889862, "num_tokens": 4137712.0, "step": 444 }, { "epoch": 2.06993006993007, "grad_norm": 2.3278118252307975, "learning_rate": 3.4137279310278054e-06, "loss": 0.2021, "mean_token_accuracy": 0.9442847967147827, "num_tokens": 4148072.0, "step": 445 }, { "epoch": 2.0745920745920747, "grad_norm": 2.094002783532905, "learning_rate": 3.392162016850945e-06, "loss": 0.1972, "mean_token_accuracy": 0.9459817409515381, "num_tokens": 4158042.0, "step": 446 }, { "epoch": 2.0792540792540795, "grad_norm": 2.142035620280267, "learning_rate": 3.3706579441609224e-06, "loss": 0.1887, "mean_token_accuracy": 0.9461734592914581, "num_tokens": 4167033.0, "step": 447 }, { "epoch": 2.0839160839160837, "grad_norm": 2.4332724683825884, "learning_rate": 3.3492163438619362e-06, "loss": 0.2023, "mean_token_accuracy": 0.9443434476852417, "num_tokens": 4176760.0, "step": 448 }, { "epoch": 2.0885780885780885, "grad_norm": 2.228646127092191, "learning_rate": 3.327837845025315e-06, "loss": 0.1917, "mean_token_accuracy": 0.9497533142566681, "num_tokens": 4186226.0, "step": 449 }, { "epoch": 2.093240093240093, "grad_norm": 2.459774250577539, "learning_rate": 3.3065230748710646e-06, "loss": 0.2124, "mean_token_accuracy": 0.9428280293941498, "num_tokens": 4195499.0, "step": 450 }, { "epoch": 2.097902097902098, "grad_norm": 2.4038605044962686, "learning_rate": 3.2852726587494673e-06, "loss": 0.1825, "mean_token_accuracy": 0.9510272741317749, "num_tokens": 4204395.0, "step": 451 }, { "epoch": 2.1025641025641026, "grad_norm": 2.4928140282417517, "learning_rate": 3.2640872201227364e-06, "loss": 0.1967, "mean_token_accuracy": 0.9442079365253448, "num_tokens": 4213737.0, "step": 452 }, { "epoch": 2.1072261072261074, "grad_norm": 2.256378296799341, "learning_rate": 3.242967380546717e-06, "loss": 0.1826, "mean_token_accuracy": 0.947964072227478, "num_tokens": 4223235.0, "step": 453 }, { "epoch": 2.111888111888112, "grad_norm": 2.1809559104100487, "learning_rate": 3.221913759652657e-06, "loss": 0.1738, "mean_token_accuracy": 0.9498437345027924, "num_tokens": 4232422.0, "step": 454 }, { "epoch": 2.1165501165501164, "grad_norm": 2.2886117964470643, "learning_rate": 3.20092697512903e-06, "loss": 0.2023, "mean_token_accuracy": 0.9447461664676666, "num_tokens": 4241879.0, "step": 455 }, { "epoch": 2.121212121212121, "grad_norm": 2.212820158000472, "learning_rate": 3.180007642703402e-06, "loss": 0.1865, "mean_token_accuracy": 0.9497852921485901, "num_tokens": 4251371.0, "step": 456 }, { "epoch": 2.125874125874126, "grad_norm": 2.2290617079488553, "learning_rate": 3.1591563761243816e-06, "loss": 0.1904, "mean_token_accuracy": 0.9462641477584839, "num_tokens": 4260491.0, "step": 457 }, { "epoch": 2.1305361305361306, "grad_norm": 2.143157870337979, "learning_rate": 3.138373787143598e-06, "loss": 0.1825, "mean_token_accuracy": 0.9492957592010498, "num_tokens": 4269810.0, "step": 458 }, { "epoch": 2.1351981351981353, "grad_norm": 2.38741737774062, "learning_rate": 3.1176604854977665e-06, "loss": 0.1902, "mean_token_accuracy": 0.9479398429393768, "num_tokens": 4279116.0, "step": 459 }, { "epoch": 2.13986013986014, "grad_norm": 2.3435622778544203, "learning_rate": 3.0970170788907878e-06, "loss": 0.2016, "mean_token_accuracy": 0.9475513100624084, "num_tokens": 4288073.0, "step": 460 }, { "epoch": 2.1445221445221447, "grad_norm": 2.338474318951203, "learning_rate": 3.076444172975932e-06, "loss": 0.1734, "mean_token_accuracy": 0.952549546957016, "num_tokens": 4296974.0, "step": 461 }, { "epoch": 2.149184149184149, "grad_norm": 2.5215652212493653, "learning_rate": 3.055942371338052e-06, "loss": 0.199, "mean_token_accuracy": 0.9409568309783936, "num_tokens": 4306323.0, "step": 462 }, { "epoch": 2.1538461538461537, "grad_norm": 2.327317690460305, "learning_rate": 3.035512275475896e-06, "loss": 0.1827, "mean_token_accuracy": 0.9496185481548309, "num_tokens": 4315250.0, "step": 463 }, { "epoch": 2.1585081585081585, "grad_norm": 2.210597087672196, "learning_rate": 3.0151544847844385e-06, "loss": 0.1742, "mean_token_accuracy": 0.9511962532997131, "num_tokens": 4324195.0, "step": 464 }, { "epoch": 2.163170163170163, "grad_norm": 2.3071386492578045, "learning_rate": 2.994869596537316e-06, "loss": 0.1983, "mean_token_accuracy": 0.9448690712451935, "num_tokens": 4333101.0, "step": 465 }, { "epoch": 2.167832167832168, "grad_norm": 2.2596793138798104, "learning_rate": 2.9746582058692803e-06, "loss": 0.1803, "mean_token_accuracy": 0.9491128325462341, "num_tokens": 4341626.0, "step": 466 }, { "epoch": 2.1724941724941726, "grad_norm": 2.2305243738775964, "learning_rate": 2.954520905758762e-06, "loss": 0.1935, "mean_token_accuracy": 0.9493462145328522, "num_tokens": 4350773.0, "step": 467 }, { "epoch": 2.177156177156177, "grad_norm": 1.8784409884434285, "learning_rate": 2.934458287010455e-06, "loss": 0.1608, "mean_token_accuracy": 0.9552096724510193, "num_tokens": 4360531.0, "step": 468 }, { "epoch": 2.1818181818181817, "grad_norm": 2.419757102541458, "learning_rate": 2.9144709382379955e-06, "loss": 0.1929, "mean_token_accuracy": 0.9458503425121307, "num_tokens": 4370583.0, "step": 469 }, { "epoch": 2.1864801864801864, "grad_norm": 2.1959363632762354, "learning_rate": 2.894559445846682e-06, "loss": 0.1891, "mean_token_accuracy": 0.9474013149738312, "num_tokens": 4380422.0, "step": 470 }, { "epoch": 2.191142191142191, "grad_norm": 2.4899530178247877, "learning_rate": 2.8747243940162774e-06, "loss": 0.2115, "mean_token_accuracy": 0.9412341713905334, "num_tokens": 4389278.0, "step": 471 }, { "epoch": 2.195804195804196, "grad_norm": 2.4399237002135146, "learning_rate": 2.854966364683872e-06, "loss": 0.1853, "mean_token_accuracy": 0.9496332406997681, "num_tokens": 4397770.0, "step": 472 }, { "epoch": 2.2004662004662006, "grad_norm": 2.215224089050249, "learning_rate": 2.835285937526801e-06, "loss": 0.1783, "mean_token_accuracy": 0.9523276388645172, "num_tokens": 4407968.0, "step": 473 }, { "epoch": 2.2051282051282053, "grad_norm": 2.2820905304008647, "learning_rate": 2.8156836899456475e-06, "loss": 0.1972, "mean_token_accuracy": 0.9460384547710419, "num_tokens": 4417210.0, "step": 474 }, { "epoch": 2.20979020979021, "grad_norm": 2.261724927462342, "learning_rate": 2.796160197047294e-06, "loss": 0.1754, "mean_token_accuracy": 0.9524807631969452, "num_tokens": 4427199.0, "step": 475 }, { "epoch": 2.2144522144522143, "grad_norm": 2.2910100819309966, "learning_rate": 2.7767160316280583e-06, "loss": 0.187, "mean_token_accuracy": 0.9480907320976257, "num_tokens": 4435729.0, "step": 476 }, { "epoch": 2.219114219114219, "grad_norm": 2.5356260307985727, "learning_rate": 2.757351764156877e-06, "loss": 0.1898, "mean_token_accuracy": 0.9483682215213776, "num_tokens": 4444444.0, "step": 477 }, { "epoch": 2.2237762237762237, "grad_norm": 2.185337315098767, "learning_rate": 2.7380679627585817e-06, "loss": 0.1746, "mean_token_accuracy": 0.9521575570106506, "num_tokens": 4453166.0, "step": 478 }, { "epoch": 2.2284382284382285, "grad_norm": 2.379831452455389, "learning_rate": 2.718865193197218e-06, "loss": 0.1947, "mean_token_accuracy": 0.9449348151683807, "num_tokens": 4462042.0, "step": 479 }, { "epoch": 2.233100233100233, "grad_norm": 2.171897352199924, "learning_rate": 2.699744018859457e-06, "loss": 0.1717, "mean_token_accuracy": 0.9506651163101196, "num_tokens": 4471400.0, "step": 480 }, { "epoch": 2.237762237762238, "grad_norm": 2.216542641182696, "learning_rate": 2.680705000738061e-06, "loss": 0.1691, "mean_token_accuracy": 0.9513165950775146, "num_tokens": 4481107.0, "step": 481 }, { "epoch": 2.242424242424242, "grad_norm": 2.1820463594567134, "learning_rate": 2.661748697415423e-06, "loss": 0.1753, "mean_token_accuracy": 0.94921013712883, "num_tokens": 4490865.0, "step": 482 }, { "epoch": 2.247086247086247, "grad_norm": 2.3975273161711868, "learning_rate": 2.642875665047182e-06, "loss": 0.2074, "mean_token_accuracy": 0.9420484900474548, "num_tokens": 4500700.0, "step": 483 }, { "epoch": 2.2517482517482517, "grad_norm": 2.3576310035425885, "learning_rate": 2.6240864573459095e-06, "loss": 0.1933, "mean_token_accuracy": 0.948281466960907, "num_tokens": 4509781.0, "step": 484 }, { "epoch": 2.2564102564102564, "grad_norm": 2.1072685233889783, "learning_rate": 2.6053816255648543e-06, "loss": 0.1791, "mean_token_accuracy": 0.9498388171195984, "num_tokens": 4519055.0, "step": 485 }, { "epoch": 2.261072261072261, "grad_norm": 2.3694467761113365, "learning_rate": 2.586761718481776e-06, "loss": 0.2016, "mean_token_accuracy": 0.9420836865901947, "num_tokens": 4528368.0, "step": 486 }, { "epoch": 2.265734265734266, "grad_norm": 2.354865943769181, "learning_rate": 2.5682272823828474e-06, "loss": 0.195, "mean_token_accuracy": 0.9475519955158234, "num_tokens": 4537216.0, "step": 487 }, { "epoch": 2.2703962703962706, "grad_norm": 2.0798315233476146, "learning_rate": 2.5497788610466177e-06, "loss": 0.1775, "mean_token_accuracy": 0.9497499167919159, "num_tokens": 4546595.0, "step": 488 }, { "epoch": 2.2750582750582753, "grad_norm": 2.245906905354928, "learning_rate": 2.53141699572807e-06, "loss": 0.1873, "mean_token_accuracy": 0.9471332430839539, "num_tokens": 4555647.0, "step": 489 }, { "epoch": 2.2797202797202796, "grad_norm": 2.3166573667243973, "learning_rate": 2.5131422251427313e-06, "loss": 0.179, "mean_token_accuracy": 0.9487544298171997, "num_tokens": 4565487.0, "step": 490 }, { "epoch": 2.2843822843822843, "grad_norm": 2.356746974573123, "learning_rate": 2.4949550854508713e-06, "loss": 0.1977, "mean_token_accuracy": 0.9462016522884369, "num_tokens": 4574193.0, "step": 491 }, { "epoch": 2.289044289044289, "grad_norm": 2.3601704690415213, "learning_rate": 2.476856110241773e-06, "loss": 0.1963, "mean_token_accuracy": 0.9484397768974304, "num_tokens": 4583652.0, "step": 492 }, { "epoch": 2.2937062937062938, "grad_norm": 2.194739252262633, "learning_rate": 2.458845830518082e-06, "loss": 0.1808, "mean_token_accuracy": 0.948834627866745, "num_tokens": 4593314.0, "step": 493 }, { "epoch": 2.2983682983682985, "grad_norm": 2.262428951601218, "learning_rate": 2.440924774680215e-06, "loss": 0.196, "mean_token_accuracy": 0.9466191530227661, "num_tokens": 4602528.0, "step": 494 }, { "epoch": 2.303030303030303, "grad_norm": 2.2737186057132246, "learning_rate": 2.4230934685108707e-06, "loss": 0.1959, "mean_token_accuracy": 0.948832631111145, "num_tokens": 4611387.0, "step": 495 }, { "epoch": 2.3076923076923075, "grad_norm": 2.19558751291413, "learning_rate": 2.405352435159595e-06, "loss": 0.1832, "mean_token_accuracy": 0.9490224421024323, "num_tokens": 4620809.0, "step": 496 }, { "epoch": 2.312354312354312, "grad_norm": 2.222802090174067, "learning_rate": 2.3877021951274374e-06, "loss": 0.1911, "mean_token_accuracy": 0.9486294388771057, "num_tokens": 4629397.0, "step": 497 }, { "epoch": 2.317016317016317, "grad_norm": 2.206389027273452, "learning_rate": 2.3701432662516772e-06, "loss": 0.1727, "mean_token_accuracy": 0.9500547051429749, "num_tokens": 4638953.0, "step": 498 }, { "epoch": 2.3216783216783217, "grad_norm": 2.5956601776793056, "learning_rate": 2.3526761636906313e-06, "loss": 0.1849, "mean_token_accuracy": 0.9497621655464172, "num_tokens": 4648328.0, "step": 499 }, { "epoch": 2.3263403263403264, "grad_norm": 2.222447522693371, "learning_rate": 2.3353013999085402e-06, "loss": 0.1878, "mean_token_accuracy": 0.9493353068828583, "num_tokens": 4658253.0, "step": 500 }, { "epoch": 2.331002331002331, "grad_norm": 2.1725903860996265, "learning_rate": 2.3180194846605367e-06, "loss": 0.1731, "mean_token_accuracy": 0.9540884494781494, "num_tokens": 4667108.0, "step": 501 }, { "epoch": 2.335664335664336, "grad_norm": 2.2106617960868546, "learning_rate": 2.300830924977683e-06, "loss": 0.1827, "mean_token_accuracy": 0.9487708210945129, "num_tokens": 4675537.0, "step": 502 }, { "epoch": 2.3403263403263406, "grad_norm": 2.26357210261458, "learning_rate": 2.283736225152099e-06, "loss": 0.1781, "mean_token_accuracy": 0.9498123228549957, "num_tokens": 4684963.0, "step": 503 }, { "epoch": 2.344988344988345, "grad_norm": 2.2970053926748366, "learning_rate": 2.26673588672217e-06, "loss": 0.1947, "mean_token_accuracy": 0.9470812678337097, "num_tokens": 4694167.0, "step": 504 }, { "epoch": 2.3496503496503496, "grad_norm": 2.180761219218081, "learning_rate": 2.249830408457826e-06, "loss": 0.1734, "mean_token_accuracy": 0.9532337486743927, "num_tokens": 4704455.0, "step": 505 }, { "epoch": 2.3543123543123543, "grad_norm": 2.1592931519499854, "learning_rate": 2.2330202863459123e-06, "loss": 0.18, "mean_token_accuracy": 0.9502027928829193, "num_tokens": 4714149.0, "step": 506 }, { "epoch": 2.358974358974359, "grad_norm": 2.3049252234685382, "learning_rate": 2.2163060135756364e-06, "loss": 0.193, "mean_token_accuracy": 0.9451474845409393, "num_tokens": 4724018.0, "step": 507 }, { "epoch": 2.3636363636363638, "grad_norm": 2.047360241682356, "learning_rate": 2.1996880805241e-06, "loss": 0.1603, "mean_token_accuracy": 0.9543373584747314, "num_tokens": 4733728.0, "step": 508 }, { "epoch": 2.3682983682983685, "grad_norm": 2.243883991122542, "learning_rate": 2.1831669747419093e-06, "loss": 0.1895, "mean_token_accuracy": 0.9504224061965942, "num_tokens": 4743424.0, "step": 509 }, { "epoch": 2.3729603729603728, "grad_norm": 2.0125158383022383, "learning_rate": 2.166743180938875e-06, "loss": 0.1592, "mean_token_accuracy": 0.9573898613452911, "num_tokens": 4753172.0, "step": 510 }, { "epoch": 2.3776223776223775, "grad_norm": 1.9899532151234143, "learning_rate": 2.150417180969784e-06, "loss": 0.1689, "mean_token_accuracy": 0.9538862705230713, "num_tokens": 4763449.0, "step": 511 }, { "epoch": 2.382284382284382, "grad_norm": 2.1855347374072602, "learning_rate": 2.1341894538202726e-06, "loss": 0.1933, "mean_token_accuracy": 0.9471964538097382, "num_tokens": 4773131.0, "step": 512 }, { "epoch": 2.386946386946387, "grad_norm": 2.1191232295552638, "learning_rate": 2.1180604755927646e-06, "loss": 0.1705, "mean_token_accuracy": 0.9518732130527496, "num_tokens": 4782766.0, "step": 513 }, { "epoch": 2.3916083916083917, "grad_norm": 2.1956924392039268, "learning_rate": 2.102030719492508e-06, "loss": 0.1917, "mean_token_accuracy": 0.9490616321563721, "num_tokens": 4792428.0, "step": 514 }, { "epoch": 2.3962703962703964, "grad_norm": 2.2810533470677705, "learning_rate": 2.086100655813688e-06, "loss": 0.1862, "mean_token_accuracy": 0.9471859931945801, "num_tokens": 4801662.0, "step": 515 }, { "epoch": 2.400932400932401, "grad_norm": 2.244633860144498, "learning_rate": 2.0702707519256365e-06, "loss": 0.18, "mean_token_accuracy": 0.9517810344696045, "num_tokens": 4810704.0, "step": 516 }, { "epoch": 2.4055944055944054, "grad_norm": 2.494051955620123, "learning_rate": 2.0545414722591096e-06, "loss": 0.1864, "mean_token_accuracy": 0.9491457939147949, "num_tokens": 4819473.0, "step": 517 }, { "epoch": 2.41025641025641, "grad_norm": 2.0714876270735867, "learning_rate": 2.03891327829267e-06, "loss": 0.1611, "mean_token_accuracy": 0.9556960761547089, "num_tokens": 4829237.0, "step": 518 }, { "epoch": 2.414918414918415, "grad_norm": 2.1368173991326387, "learning_rate": 2.0233866285391455e-06, "loss": 0.175, "mean_token_accuracy": 0.9533757269382477, "num_tokens": 4838770.0, "step": 519 }, { "epoch": 2.4195804195804196, "grad_norm": 2.4559899729586157, "learning_rate": 2.0079619785321713e-06, "loss": 0.178, "mean_token_accuracy": 0.9503377377986908, "num_tokens": 4848313.0, "step": 520 }, { "epoch": 2.4242424242424243, "grad_norm": 2.408883608780403, "learning_rate": 1.992639780812838e-06, "loss": 0.1797, "mean_token_accuracy": 0.9508785903453827, "num_tokens": 4856905.0, "step": 521 }, { "epoch": 2.428904428904429, "grad_norm": 2.4178249725991776, "learning_rate": 1.9774204849164004e-06, "loss": 0.1909, "mean_token_accuracy": 0.9491060078144073, "num_tokens": 4866602.0, "step": 522 }, { "epoch": 2.4335664335664333, "grad_norm": 2.1757038919566085, "learning_rate": 1.9623045373590955e-06, "loss": 0.1734, "mean_token_accuracy": 0.9516046643257141, "num_tokens": 4875823.0, "step": 523 }, { "epoch": 2.438228438228438, "grad_norm": 2.27555860775744, "learning_rate": 1.9472923816250427e-06, "loss": 0.1869, "mean_token_accuracy": 0.9469164311885834, "num_tokens": 4885891.0, "step": 524 }, { "epoch": 2.4428904428904428, "grad_norm": 2.3720687067635517, "learning_rate": 1.9323844581532334e-06, "loss": 0.1901, "mean_token_accuracy": 0.9472060799598694, "num_tokens": 4895721.0, "step": 525 }, { "epoch": 2.4475524475524475, "grad_norm": 2.3467414380479483, "learning_rate": 1.9175812043246034e-06, "loss": 0.1904, "mean_token_accuracy": 0.9455529749393463, "num_tokens": 4905678.0, "step": 526 }, { "epoch": 2.4522144522144522, "grad_norm": 2.1506573419595045, "learning_rate": 1.9028830544492074e-06, "loss": 0.1903, "mean_token_accuracy": 0.948146402835846, "num_tokens": 4915087.0, "step": 527 }, { "epoch": 2.456876456876457, "grad_norm": 2.379645454246707, "learning_rate": 1.8882904397534705e-06, "loss": 0.2152, "mean_token_accuracy": 0.9423483908176422, "num_tokens": 4924020.0, "step": 528 }, { "epoch": 2.4615384615384617, "grad_norm": 2.264126564788235, "learning_rate": 1.8738037883675445e-06, "loss": 0.1844, "mean_token_accuracy": 0.9492262303829193, "num_tokens": 4933023.0, "step": 529 }, { "epoch": 2.4662004662004664, "grad_norm": 2.190357275615204, "learning_rate": 1.8594235253127373e-06, "loss": 0.1996, "mean_token_accuracy": 0.9468533098697662, "num_tokens": 4941720.0, "step": 530 }, { "epoch": 2.4708624708624707, "grad_norm": 2.0683527870115483, "learning_rate": 1.8451500724890509e-06, "loss": 0.1852, "mean_token_accuracy": 0.9456667006015778, "num_tokens": 4951562.0, "step": 531 }, { "epoch": 2.4755244755244754, "grad_norm": 2.3584045584131617, "learning_rate": 1.8309838486627995e-06, "loss": 0.2036, "mean_token_accuracy": 0.94382044672966, "num_tokens": 4960478.0, "step": 532 }, { "epoch": 2.48018648018648, "grad_norm": 2.1234920339277044, "learning_rate": 1.816925269454327e-06, "loss": 0.1666, "mean_token_accuracy": 0.9552336037158966, "num_tokens": 4970160.0, "step": 533 }, { "epoch": 2.484848484848485, "grad_norm": 2.3345270471564303, "learning_rate": 1.8029747473258092e-06, "loss": 0.1859, "mean_token_accuracy": 0.9483939707279205, "num_tokens": 4979864.0, "step": 534 }, { "epoch": 2.4895104895104896, "grad_norm": 2.407762000225371, "learning_rate": 1.789132691569153e-06, "loss": 0.195, "mean_token_accuracy": 0.9456151127815247, "num_tokens": 4988667.0, "step": 535 }, { "epoch": 2.4941724941724943, "grad_norm": 2.347393889553957, "learning_rate": 1.7753995082939932e-06, "loss": 0.1732, "mean_token_accuracy": 0.9526363909244537, "num_tokens": 4998687.0, "step": 536 }, { "epoch": 2.4988344988344986, "grad_norm": 2.5390123853320445, "learning_rate": 1.7617756004157693e-06, "loss": 0.1987, "mean_token_accuracy": 0.945299506187439, "num_tokens": 5007734.0, "step": 537 }, { "epoch": 2.5034965034965033, "grad_norm": 2.438556510312228, "learning_rate": 1.7482613676439153e-06, "loss": 0.1943, "mean_token_accuracy": 0.9447510838508606, "num_tokens": 5016545.0, "step": 538 }, { "epoch": 2.508158508158508, "grad_norm": 2.3154480135315194, "learning_rate": 1.7348572064701188e-06, "loss": 0.1947, "mean_token_accuracy": 0.9469634890556335, "num_tokens": 5025652.0, "step": 539 }, { "epoch": 2.5128205128205128, "grad_norm": 2.149785741270873, "learning_rate": 1.721563510156704e-06, "loss": 0.1718, "mean_token_accuracy": 0.9520467817783356, "num_tokens": 5035567.0, "step": 540 }, { "epoch": 2.5174825174825175, "grad_norm": 2.4390965002580463, "learning_rate": 1.7083806687250795e-06, "loss": 0.1999, "mean_token_accuracy": 0.947026789188385, "num_tokens": 5044442.0, "step": 541 }, { "epoch": 2.5221445221445222, "grad_norm": 2.294881333986716, "learning_rate": 1.6953090689443074e-06, "loss": 0.1868, "mean_token_accuracy": 0.9499901831150055, "num_tokens": 5053663.0, "step": 542 }, { "epoch": 2.526806526806527, "grad_norm": 2.2057605088088197, "learning_rate": 1.6823490943197473e-06, "loss": 0.1719, "mean_token_accuracy": 0.9531058371067047, "num_tokens": 5063344.0, "step": 543 }, { "epoch": 2.5314685314685317, "grad_norm": 2.4507085825396007, "learning_rate": 1.6695011250818094e-06, "loss": 0.1717, "mean_token_accuracy": 0.9547788798809052, "num_tokens": 5073038.0, "step": 544 }, { "epoch": 2.5361305361305364, "grad_norm": 2.3638435763054493, "learning_rate": 1.6567655381747976e-06, "loss": 0.1927, "mean_token_accuracy": 0.9466958940029144, "num_tokens": 5081603.0, "step": 545 }, { "epoch": 2.5407925407925407, "grad_norm": 2.32752114137372, "learning_rate": 1.6441427072458493e-06, "loss": 0.1958, "mean_token_accuracy": 0.9444697499275208, "num_tokens": 5091091.0, "step": 546 }, { "epoch": 2.5454545454545454, "grad_norm": 2.509857709791671, "learning_rate": 1.6316330026339743e-06, "loss": 0.213, "mean_token_accuracy": 0.941786378622055, "num_tokens": 5100278.0, "step": 547 }, { "epoch": 2.55011655011655, "grad_norm": 2.0952838106121914, "learning_rate": 1.6192367913591916e-06, "loss": 0.1653, "mean_token_accuracy": 0.9539439380168915, "num_tokens": 5110291.0, "step": 548 }, { "epoch": 2.554778554778555, "grad_norm": 2.4251986988481704, "learning_rate": 1.6069544371117556e-06, "loss": 0.1737, "mean_token_accuracy": 0.9515743851661682, "num_tokens": 5119413.0, "step": 549 }, { "epoch": 2.5594405594405596, "grad_norm": 2.2675536817510276, "learning_rate": 1.5947863002414938e-06, "loss": 0.1816, "mean_token_accuracy": 0.9483968019485474, "num_tokens": 5128968.0, "step": 550 }, { "epoch": 2.564102564102564, "grad_norm": 2.225047159227379, "learning_rate": 1.5827327377472262e-06, "loss": 0.1872, "mean_token_accuracy": 0.9502107799053192, "num_tokens": 5138040.0, "step": 551 }, { "epoch": 2.5687645687645686, "grad_norm": 2.5646265404291033, "learning_rate": 1.5707941032662967e-06, "loss": 0.1941, "mean_token_accuracy": 0.9476030170917511, "num_tokens": 5147278.0, "step": 552 }, { "epoch": 2.5734265734265733, "grad_norm": 2.512848592730855, "learning_rate": 1.558970747064198e-06, "loss": 0.184, "mean_token_accuracy": 0.9474705457687378, "num_tokens": 5156257.0, "step": 553 }, { "epoch": 2.578088578088578, "grad_norm": 2.092641334488186, "learning_rate": 1.5472630160242921e-06, "loss": 0.1692, "mean_token_accuracy": 0.9498989582061768, "num_tokens": 5166290.0, "step": 554 }, { "epoch": 2.582750582750583, "grad_norm": 2.232715743448255, "learning_rate": 1.5356712536376345e-06, "loss": 0.1803, "mean_token_accuracy": 0.9500272274017334, "num_tokens": 5176118.0, "step": 555 }, { "epoch": 2.5874125874125875, "grad_norm": 2.5230454773039828, "learning_rate": 1.5241957999928974e-06, "loss": 0.1689, "mean_token_accuracy": 0.952000617980957, "num_tokens": 5185921.0, "step": 556 }, { "epoch": 2.5920745920745922, "grad_norm": 2.6026889894908978, "learning_rate": 1.5128369917663924e-06, "loss": 0.1873, "mean_token_accuracy": 0.9518662393093109, "num_tokens": 5194883.0, "step": 557 }, { "epoch": 2.596736596736597, "grad_norm": 2.4331824963283375, "learning_rate": 1.5015951622121896e-06, "loss": 0.1782, "mean_token_accuracy": 0.9511874914169312, "num_tokens": 5203915.0, "step": 558 }, { "epoch": 2.6013986013986012, "grad_norm": 2.3494840072703598, "learning_rate": 1.490470641152345e-06, "loss": 0.1832, "mean_token_accuracy": 0.9490853250026703, "num_tokens": 5212979.0, "step": 559 }, { "epoch": 2.606060606060606, "grad_norm": 2.37225992368595, "learning_rate": 1.4794637549672182e-06, "loss": 0.2049, "mean_token_accuracy": 0.9459311068058014, "num_tokens": 5222060.0, "step": 560 }, { "epoch": 2.6107226107226107, "grad_norm": 2.2581667401414354, "learning_rate": 1.4685748265859043e-06, "loss": 0.1853, "mean_token_accuracy": 0.949056476354599, "num_tokens": 5230955.0, "step": 561 }, { "epoch": 2.6153846153846154, "grad_norm": 2.049312050186223, "learning_rate": 1.457804175476751e-06, "loss": 0.1627, "mean_token_accuracy": 0.9556883871555328, "num_tokens": 5241027.0, "step": 562 }, { "epoch": 2.62004662004662, "grad_norm": 2.2972322506270095, "learning_rate": 1.447152117637992e-06, "loss": 0.185, "mean_token_accuracy": 0.9484710693359375, "num_tokens": 5250218.0, "step": 563 }, { "epoch": 2.624708624708625, "grad_norm": 2.1577151240414443, "learning_rate": 1.436618965588472e-06, "loss": 0.1715, "mean_token_accuracy": 0.9517091810703278, "num_tokens": 5259812.0, "step": 564 }, { "epoch": 2.629370629370629, "grad_norm": 2.2654942496472836, "learning_rate": 1.4262050283584836e-06, "loss": 0.1708, "mean_token_accuracy": 0.952095627784729, "num_tokens": 5268750.0, "step": 565 }, { "epoch": 2.634032634032634, "grad_norm": 2.0539513008080132, "learning_rate": 1.4159106114806943e-06, "loss": 0.1703, "mean_token_accuracy": 0.9545489847660065, "num_tokens": 5277570.0, "step": 566 }, { "epoch": 2.6386946386946386, "grad_norm": 2.2352237949709814, "learning_rate": 1.4057360169811832e-06, "loss": 0.1856, "mean_token_accuracy": 0.9448749423027039, "num_tokens": 5287563.0, "step": 567 }, { "epoch": 2.6433566433566433, "grad_norm": 2.274026900774616, "learning_rate": 1.3956815433705861e-06, "loss": 0.1854, "mean_token_accuracy": 0.9487999975681305, "num_tokens": 5296479.0, "step": 568 }, { "epoch": 2.648018648018648, "grad_norm": 2.271359052605945, "learning_rate": 1.3857474856353299e-06, "loss": 0.1895, "mean_token_accuracy": 0.9485887885093689, "num_tokens": 5305636.0, "step": 569 }, { "epoch": 2.652680652680653, "grad_norm": 2.2832232390511935, "learning_rate": 1.3759341352289832e-06, "loss": 0.1919, "mean_token_accuracy": 0.9442925155162811, "num_tokens": 5315157.0, "step": 570 }, { "epoch": 2.6573426573426575, "grad_norm": 2.4224807634049874, "learning_rate": 1.3662417800637023e-06, "loss": 0.196, "mean_token_accuracy": 0.9467974901199341, "num_tokens": 5324117.0, "step": 571 }, { "epoch": 2.6620046620046622, "grad_norm": 2.383821092419705, "learning_rate": 1.3566707045017867e-06, "loss": 0.181, "mean_token_accuracy": 0.9486918449401855, "num_tokens": 5332491.0, "step": 572 }, { "epoch": 2.6666666666666665, "grad_norm": 2.451846629070992, "learning_rate": 1.3472211893473327e-06, "loss": 0.183, "mean_token_accuracy": 0.9497961103916168, "num_tokens": 5341476.0, "step": 573 }, { "epoch": 2.6713286713286712, "grad_norm": 2.2050228122309297, "learning_rate": 1.3378935118380004e-06, "loss": 0.1664, "mean_token_accuracy": 0.9522497951984406, "num_tokens": 5351031.0, "step": 574 }, { "epoch": 2.675990675990676, "grad_norm": 2.221685948440498, "learning_rate": 1.3286879456368746e-06, "loss": 0.1868, "mean_token_accuracy": 0.9497689306735992, "num_tokens": 5360953.0, "step": 575 }, { "epoch": 2.6806526806526807, "grad_norm": 2.3611824277077895, "learning_rate": 1.319604760824439e-06, "loss": 0.1885, "mean_token_accuracy": 0.9504337906837463, "num_tokens": 5370450.0, "step": 576 }, { "epoch": 2.6853146853146854, "grad_norm": 2.524561240638075, "learning_rate": 1.31064422389065e-06, "loss": 0.1852, "mean_token_accuracy": 0.947800487279892, "num_tokens": 5379656.0, "step": 577 }, { "epoch": 2.6899766899766897, "grad_norm": 2.4377644762322848, "learning_rate": 1.3018065977271215e-06, "loss": 0.2048, "mean_token_accuracy": 0.9430462419986725, "num_tokens": 5388537.0, "step": 578 }, { "epoch": 2.6946386946386944, "grad_norm": 2.399526282941045, "learning_rate": 1.293092141619407e-06, "loss": 0.1904, "mean_token_accuracy": 0.9497084021568298, "num_tokens": 5398026.0, "step": 579 }, { "epoch": 2.699300699300699, "grad_norm": 2.461708260111587, "learning_rate": 1.2845011112394e-06, "loss": 0.1979, "mean_token_accuracy": 0.9429396092891693, "num_tokens": 5407274.0, "step": 580 }, { "epoch": 2.703962703962704, "grad_norm": 2.1140139302951506, "learning_rate": 1.276033758637823e-06, "loss": 0.1711, "mean_token_accuracy": 0.954795241355896, "num_tokens": 5416685.0, "step": 581 }, { "epoch": 2.7086247086247086, "grad_norm": 2.256627061683475, "learning_rate": 1.2676903322368423e-06, "loss": 0.1914, "mean_token_accuracy": 0.9461691081523895, "num_tokens": 5426216.0, "step": 582 }, { "epoch": 2.7132867132867133, "grad_norm": 2.224029918327043, "learning_rate": 1.2594710768227734e-06, "loss": 0.1902, "mean_token_accuracy": 0.9479357004165649, "num_tokens": 5435218.0, "step": 583 }, { "epoch": 2.717948717948718, "grad_norm": 2.3441213495740616, "learning_rate": 1.2513762335389004e-06, "loss": 0.1952, "mean_token_accuracy": 0.9469590187072754, "num_tokens": 5444848.0, "step": 584 }, { "epoch": 2.722610722610723, "grad_norm": 2.2818966905915525, "learning_rate": 1.2434060398784039e-06, "loss": 0.1861, "mean_token_accuracy": 0.9511770606040955, "num_tokens": 5453935.0, "step": 585 }, { "epoch": 2.7272727272727275, "grad_norm": 2.527822525615972, "learning_rate": 1.2355607296773896e-06, "loss": 0.1826, "mean_token_accuracy": 0.9509838223457336, "num_tokens": 5463825.0, "step": 586 }, { "epoch": 2.731934731934732, "grad_norm": 2.3014422706991193, "learning_rate": 1.2278405331080296e-06, "loss": 0.2017, "mean_token_accuracy": 0.9433080554008484, "num_tokens": 5472633.0, "step": 587 }, { "epoch": 2.7365967365967365, "grad_norm": 2.481469694015223, "learning_rate": 1.2202456766718092e-06, "loss": 0.1929, "mean_token_accuracy": 0.9468889534473419, "num_tokens": 5481508.0, "step": 588 }, { "epoch": 2.7412587412587412, "grad_norm": 2.2243902331538803, "learning_rate": 1.212776383192883e-06, "loss": 0.1921, "mean_token_accuracy": 0.9495699405670166, "num_tokens": 5490583.0, "step": 589 }, { "epoch": 2.745920745920746, "grad_norm": 2.079107821673962, "learning_rate": 1.2054328718115336e-06, "loss": 0.171, "mean_token_accuracy": 0.9525066316127777, "num_tokens": 5500701.0, "step": 590 }, { "epoch": 2.7505827505827507, "grad_norm": 2.612790704517144, "learning_rate": 1.1982153579777483e-06, "loss": 0.1945, "mean_token_accuracy": 0.9466931521892548, "num_tokens": 5509719.0, "step": 591 }, { "epoch": 2.755244755244755, "grad_norm": 2.279498702941535, "learning_rate": 1.1911240534448899e-06, "loss": 0.2023, "mean_token_accuracy": 0.9443814754486084, "num_tokens": 5518911.0, "step": 592 }, { "epoch": 2.7599067599067597, "grad_norm": 2.302875853828702, "learning_rate": 1.1841591662634943e-06, "loss": 0.1782, "mean_token_accuracy": 0.9504655301570892, "num_tokens": 5528230.0, "step": 593 }, { "epoch": 2.7645687645687644, "grad_norm": 2.391231643668634, "learning_rate": 1.1773209007751562e-06, "loss": 0.1973, "mean_token_accuracy": 0.9472830295562744, "num_tokens": 5537899.0, "step": 594 }, { "epoch": 2.769230769230769, "grad_norm": 2.195727031813818, "learning_rate": 1.1706094576065416e-06, "loss": 0.1797, "mean_token_accuracy": 0.9503377079963684, "num_tokens": 5547675.0, "step": 595 }, { "epoch": 2.773892773892774, "grad_norm": 2.3904334187953777, "learning_rate": 1.164025033663497e-06, "loss": 0.2021, "mean_token_accuracy": 0.9435946643352509, "num_tokens": 5557022.0, "step": 596 }, { "epoch": 2.7785547785547786, "grad_norm": 2.2336433005731924, "learning_rate": 1.1575678221252763e-06, "loss": 0.178, "mean_token_accuracy": 0.9506051242351532, "num_tokens": 5566788.0, "step": 597 }, { "epoch": 2.7832167832167833, "grad_norm": 2.45559515222341, "learning_rate": 1.1512380124388695e-06, "loss": 0.1885, "mean_token_accuracy": 0.948212593793869, "num_tokens": 5575464.0, "step": 598 }, { "epoch": 2.787878787878788, "grad_norm": 2.3652897419571213, "learning_rate": 1.1450357903134463e-06, "loss": 0.1838, "mean_token_accuracy": 0.9474283754825592, "num_tokens": 5584904.0, "step": 599 }, { "epoch": 2.792540792540793, "grad_norm": 2.3554447950041304, "learning_rate": 1.1389613377149086e-06, "loss": 0.1903, "mean_token_accuracy": 0.9471111297607422, "num_tokens": 5594389.0, "step": 600 }, { "epoch": 2.797202797202797, "grad_norm": 2.2047518432891633, "learning_rate": 1.1330148328605484e-06, "loss": 0.1763, "mean_token_accuracy": 0.951262503862381, "num_tokens": 5603819.0, "step": 601 }, { "epoch": 2.801864801864802, "grad_norm": 2.291063530837646, "learning_rate": 1.127196450213825e-06, "loss": 0.1773, "mean_token_accuracy": 0.9493178725242615, "num_tokens": 5614459.0, "step": 602 }, { "epoch": 2.8065268065268065, "grad_norm": 2.11284252635657, "learning_rate": 1.1215063604792396e-06, "loss": 0.1694, "mean_token_accuracy": 0.9510103464126587, "num_tokens": 5623744.0, "step": 603 }, { "epoch": 2.8111888111888113, "grad_norm": 2.1857533437327925, "learning_rate": 1.1159447305973313e-06, "loss": 0.1835, "mean_token_accuracy": 0.9497886300086975, "num_tokens": 5632743.0, "step": 604 }, { "epoch": 2.815850815850816, "grad_norm": 2.3920794052563665, "learning_rate": 1.1105117237397777e-06, "loss": 0.1772, "mean_token_accuracy": 0.95287024974823, "num_tokens": 5641669.0, "step": 605 }, { "epoch": 2.8205128205128203, "grad_norm": 2.3412796580645745, "learning_rate": 1.1052074993046102e-06, "loss": 0.1808, "mean_token_accuracy": 0.9482883512973785, "num_tokens": 5650735.0, "step": 606 }, { "epoch": 2.825174825174825, "grad_norm": 2.3440493241719147, "learning_rate": 1.100032212911533e-06, "loss": 0.2039, "mean_token_accuracy": 0.9425942301750183, "num_tokens": 5659501.0, "step": 607 }, { "epoch": 2.8298368298368297, "grad_norm": 2.0320304598963213, "learning_rate": 1.0949860163973616e-06, "loss": 0.1738, "mean_token_accuracy": 0.9530138373374939, "num_tokens": 5669759.0, "step": 608 }, { "epoch": 2.8344988344988344, "grad_norm": 2.325052766726883, "learning_rate": 1.0900690578115643e-06, "loss": 0.1905, "mean_token_accuracy": 0.9488844573497772, "num_tokens": 5678984.0, "step": 609 }, { "epoch": 2.839160839160839, "grad_norm": 2.4642634644073396, "learning_rate": 1.0852814814119238e-06, "loss": 0.2002, "mean_token_accuracy": 0.9488008916378021, "num_tokens": 5688220.0, "step": 610 }, { "epoch": 2.843822843822844, "grad_norm": 2.345629463948856, "learning_rate": 1.0806234276602984e-06, "loss": 0.1949, "mean_token_accuracy": 0.9474165737628937, "num_tokens": 5697038.0, "step": 611 }, { "epoch": 2.8484848484848486, "grad_norm": 2.0761774146720398, "learning_rate": 1.0760950332185055e-06, "loss": 0.1623, "mean_token_accuracy": 0.9569342732429504, "num_tokens": 5706946.0, "step": 612 }, { "epoch": 2.8531468531468533, "grad_norm": 2.1184654015758357, "learning_rate": 1.071696430944311e-06, "loss": 0.1657, "mean_token_accuracy": 0.9557340145111084, "num_tokens": 5716519.0, "step": 613 }, { "epoch": 2.857808857808858, "grad_norm": 2.7525481756348427, "learning_rate": 1.0674277498875325e-06, "loss": 0.192, "mean_token_accuracy": 0.9446141123771667, "num_tokens": 5725936.0, "step": 614 }, { "epoch": 2.8624708624708624, "grad_norm": 2.415698399491206, "learning_rate": 1.0632891152862493e-06, "loss": 0.1881, "mean_token_accuracy": 0.9490151107311249, "num_tokens": 5735600.0, "step": 615 }, { "epoch": 2.867132867132867, "grad_norm": 2.0904164041799644, "learning_rate": 1.0592806485631326e-06, "loss": 0.1651, "mean_token_accuracy": 0.9557408690452576, "num_tokens": 5745302.0, "step": 616 }, { "epoch": 2.871794871794872, "grad_norm": 2.373611778744568, "learning_rate": 1.0554024673218808e-06, "loss": 0.1925, "mean_token_accuracy": 0.9461594521999359, "num_tokens": 5754314.0, "step": 617 }, { "epoch": 2.8764568764568765, "grad_norm": 2.3770918318336687, "learning_rate": 1.0516546853437686e-06, "loss": 0.1888, "mean_token_accuracy": 0.9470888376235962, "num_tokens": 5763507.0, "step": 618 }, { "epoch": 2.8811188811188813, "grad_norm": 2.615459038788364, "learning_rate": 1.0480374125843114e-06, "loss": 0.1873, "mean_token_accuracy": 0.9462830722332001, "num_tokens": 5772748.0, "step": 619 }, { "epoch": 2.8857808857808855, "grad_norm": 2.412878313629322, "learning_rate": 1.0445507551700356e-06, "loss": 0.1896, "mean_token_accuracy": 0.9492884576320648, "num_tokens": 5782160.0, "step": 620 }, { "epoch": 2.8904428904428903, "grad_norm": 2.349334019362764, "learning_rate": 1.0411948153953696e-06, "loss": 0.194, "mean_token_accuracy": 0.9473778307437897, "num_tokens": 5790728.0, "step": 621 }, { "epoch": 2.895104895104895, "grad_norm": 2.342666618452988, "learning_rate": 1.0379696917196378e-06, "loss": 0.1764, "mean_token_accuracy": 0.9477843642234802, "num_tokens": 5800771.0, "step": 622 }, { "epoch": 2.8997668997668997, "grad_norm": 2.369332709510537, "learning_rate": 1.0348754787641751e-06, "loss": 0.1775, "mean_token_accuracy": 0.949904203414917, "num_tokens": 5809633.0, "step": 623 }, { "epoch": 2.9044289044289044, "grad_norm": 2.2013635420452173, "learning_rate": 1.031912267309549e-06, "loss": 0.1624, "mean_token_accuracy": 0.9559362530708313, "num_tokens": 5819766.0, "step": 624 }, { "epoch": 2.909090909090909, "grad_norm": 2.208698138845042, "learning_rate": 1.029080144292899e-06, "loss": 0.1829, "mean_token_accuracy": 0.9497508108615875, "num_tokens": 5829413.0, "step": 625 }, { "epoch": 2.913752913752914, "grad_norm": 2.332053491912036, "learning_rate": 1.026379192805382e-06, "loss": 0.1777, "mean_token_accuracy": 0.9528988003730774, "num_tokens": 5839124.0, "step": 626 }, { "epoch": 2.9184149184149186, "grad_norm": 2.4789581729644077, "learning_rate": 1.0238094920897374e-06, "loss": 0.173, "mean_token_accuracy": 0.953327864408493, "num_tokens": 5848406.0, "step": 627 }, { "epoch": 2.9230769230769234, "grad_norm": 2.2923048279485667, "learning_rate": 1.0213711175379614e-06, "loss": 0.171, "mean_token_accuracy": 0.956305056810379, "num_tokens": 5857592.0, "step": 628 }, { "epoch": 2.9277389277389276, "grad_norm": 2.34320318426194, "learning_rate": 1.0190641406890946e-06, "loss": 0.1831, "mean_token_accuracy": 0.9514197111129761, "num_tokens": 5866687.0, "step": 629 }, { "epoch": 2.9324009324009324, "grad_norm": 2.4904828242049293, "learning_rate": 1.0168886292271246e-06, "loss": 0.189, "mean_token_accuracy": 0.9476959109306335, "num_tokens": 5875890.0, "step": 630 }, { "epoch": 2.937062937062937, "grad_norm": 2.216909312747613, "learning_rate": 1.0148446469789979e-06, "loss": 0.179, "mean_token_accuracy": 0.9502739012241364, "num_tokens": 5884491.0, "step": 631 }, { "epoch": 2.941724941724942, "grad_norm": 2.393678749036967, "learning_rate": 1.0129322539127494e-06, "loss": 0.1815, "mean_token_accuracy": 0.9475315511226654, "num_tokens": 5894473.0, "step": 632 }, { "epoch": 2.9463869463869465, "grad_norm": 2.3025122993118443, "learning_rate": 1.011151506135742e-06, "loss": 0.1755, "mean_token_accuracy": 0.9525851905345917, "num_tokens": 5903874.0, "step": 633 }, { "epoch": 2.951048951048951, "grad_norm": 2.149623052195717, "learning_rate": 1.0095024558930204e-06, "loss": 0.1827, "mean_token_accuracy": 0.9500269293785095, "num_tokens": 5913132.0, "step": 634 }, { "epoch": 2.9557109557109555, "grad_norm": 2.347852575129903, "learning_rate": 1.0079851515657794e-06, "loss": 0.1992, "mean_token_accuracy": 0.9474380910396576, "num_tokens": 5923130.0, "step": 635 }, { "epoch": 2.9603729603729603, "grad_norm": 2.4799488075118408, "learning_rate": 1.006599637669943e-06, "loss": 0.192, "mean_token_accuracy": 0.946626216173172, "num_tokens": 5931948.0, "step": 636 }, { "epoch": 2.965034965034965, "grad_norm": 2.1839881945919055, "learning_rate": 1.0053459548548582e-06, "loss": 0.1699, "mean_token_accuracy": 0.9527485072612762, "num_tokens": 5941672.0, "step": 637 }, { "epoch": 2.9696969696969697, "grad_norm": 2.3405062342525884, "learning_rate": 1.004224139902105e-06, "loss": 0.192, "mean_token_accuracy": 0.9456218183040619, "num_tokens": 5951095.0, "step": 638 }, { "epoch": 2.9743589743589745, "grad_norm": 2.1209481683555906, "learning_rate": 1.0032342257244139e-06, "loss": 0.17, "mean_token_accuracy": 0.9527087509632111, "num_tokens": 5961196.0, "step": 639 }, { "epoch": 2.979020979020979, "grad_norm": 2.3906839410006957, "learning_rate": 1.0023762413647023e-06, "loss": 0.19, "mean_token_accuracy": 0.9449608623981476, "num_tokens": 5970352.0, "step": 640 }, { "epoch": 2.983682983682984, "grad_norm": 2.2744118662012736, "learning_rate": 1.0016502119952224e-06, "loss": 0.1824, "mean_token_accuracy": 0.9502459466457367, "num_tokens": 5979759.0, "step": 641 }, { "epoch": 2.988344988344988, "grad_norm": 2.3978292522263933, "learning_rate": 1.0010561589168217e-06, "loss": 0.1858, "mean_token_accuracy": 0.9475610256195068, "num_tokens": 5989632.0, "step": 642 }, { "epoch": 2.993006993006993, "grad_norm": 2.220217029439386, "learning_rate": 1.0005940995583183e-06, "loss": 0.1843, "mean_token_accuracy": 0.9482160210609436, "num_tokens": 5999031.0, "step": 643 }, { "epoch": 2.9976689976689976, "grad_norm": 2.303701651736636, "learning_rate": 1.0002640474759911e-06, "loss": 0.1822, "mean_token_accuracy": 0.950013667345047, "num_tokens": 6008269.0, "step": 644 }, { "epoch": 3.0, "grad_norm": 2.303701651736636, "learning_rate": 1.0000660123531788e-06, "loss": 0.1305, "mean_token_accuracy": 0.9722093343734741, "num_tokens": 6010434.0, "step": 645 }, { "epoch": 3.0, "step": 645, "total_flos": 194968109678592.0, "train_loss": 0.4934643579314845, "train_runtime": 19223.4417, "train_samples_per_second": 1.069, "train_steps_per_second": 0.034 } ], "logging_steps": 1, "max_steps": 645, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 194968109678592.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }