Qwen2.5-1.5B-Open-R1-SFT / trainer_state.json
od2961's picture
Model save
5e8f96f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994879672299027,
"eval_steps": 500,
"global_step": 1464,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000682710360129715,
"grad_norm": 11.289746284484863,
"learning_rate": 3.4013605442176873e-08,
"loss": 1.1263,
"mean_token_accuracy": 0.723930612206459,
"num_tokens": 65264.0,
"step": 1
},
{
"epoch": 0.00136542072025943,
"grad_norm": 11.28925609588623,
"learning_rate": 6.802721088435375e-08,
"loss": 1.1255,
"mean_token_accuracy": 0.7249358594417572,
"num_tokens": 130800.0,
"step": 2
},
{
"epoch": 0.002048131080389145,
"grad_norm": 11.120320320129395,
"learning_rate": 1.0204081632653061e-07,
"loss": 1.1779,
"mean_token_accuracy": 0.7114491611719131,
"num_tokens": 196336.0,
"step": 3
},
{
"epoch": 0.00273084144051886,
"grad_norm": 11.073629379272461,
"learning_rate": 1.360544217687075e-07,
"loss": 1.1357,
"mean_token_accuracy": 0.722400426864624,
"num_tokens": 261872.0,
"step": 4
},
{
"epoch": 0.0034135518006485747,
"grad_norm": 11.142949104309082,
"learning_rate": 1.700680272108844e-07,
"loss": 1.1198,
"mean_token_accuracy": 0.7241874635219574,
"num_tokens": 327408.0,
"step": 5
},
{
"epoch": 0.00409626216077829,
"grad_norm": 10.996609687805176,
"learning_rate": 2.0408163265306121e-07,
"loss": 1.1234,
"mean_token_accuracy": 0.7269978076219559,
"num_tokens": 392944.0,
"step": 6
},
{
"epoch": 0.0047789725209080045,
"grad_norm": 10.851889610290527,
"learning_rate": 2.3809523809523811e-07,
"loss": 1.135,
"mean_token_accuracy": 0.7213312536478043,
"num_tokens": 458480.0,
"step": 7
},
{
"epoch": 0.00546168288103772,
"grad_norm": 10.883561134338379,
"learning_rate": 2.72108843537415e-07,
"loss": 1.0987,
"mean_token_accuracy": 0.7315308004617691,
"num_tokens": 523678.0,
"step": 8
},
{
"epoch": 0.006144393241167435,
"grad_norm": 10.859150886535645,
"learning_rate": 3.0612244897959183e-07,
"loss": 1.1308,
"mean_token_accuracy": 0.7226448059082031,
"num_tokens": 589214.0,
"step": 9
},
{
"epoch": 0.006827103601297149,
"grad_norm": 10.717151641845703,
"learning_rate": 3.401360544217688e-07,
"loss": 1.1209,
"mean_token_accuracy": 0.7246762067079544,
"num_tokens": 654750.0,
"step": 10
},
{
"epoch": 0.007509813961426865,
"grad_norm": 10.239886283874512,
"learning_rate": 3.7414965986394563e-07,
"loss": 1.0971,
"mean_token_accuracy": 0.7295017838478088,
"num_tokens": 720282.0,
"step": 11
},
{
"epoch": 0.00819252432155658,
"grad_norm": 10.038299560546875,
"learning_rate": 4.0816326530612243e-07,
"loss": 1.1444,
"mean_token_accuracy": 0.7180626839399338,
"num_tokens": 785818.0,
"step": 12
},
{
"epoch": 0.008875234681686295,
"grad_norm": 8.35667896270752,
"learning_rate": 4.421768707482994e-07,
"loss": 1.0937,
"mean_token_accuracy": 0.732512354850769,
"num_tokens": 850288.0,
"step": 13
},
{
"epoch": 0.009557945041816009,
"grad_norm": 8.884153366088867,
"learning_rate": 4.7619047619047623e-07,
"loss": 1.1475,
"mean_token_accuracy": 0.7174967974424362,
"num_tokens": 914746.0,
"step": 14
},
{
"epoch": 0.010240655401945725,
"grad_norm": 8.479328155517578,
"learning_rate": 5.102040816326531e-07,
"loss": 1.1403,
"mean_token_accuracy": 0.7170087844133377,
"num_tokens": 980282.0,
"step": 15
},
{
"epoch": 0.01092336576207544,
"grad_norm": 8.230568885803223,
"learning_rate": 5.4421768707483e-07,
"loss": 1.1216,
"mean_token_accuracy": 0.7216520011425018,
"num_tokens": 1045818.0,
"step": 16
},
{
"epoch": 0.011606076122205154,
"grad_norm": 7.903264999389648,
"learning_rate": 5.782312925170068e-07,
"loss": 1.1004,
"mean_token_accuracy": 0.7233932018280029,
"num_tokens": 1111354.0,
"step": 17
},
{
"epoch": 0.01228878648233487,
"grad_norm": 8.133345603942871,
"learning_rate": 6.122448979591837e-07,
"loss": 1.1465,
"mean_token_accuracy": 0.7130529135465622,
"num_tokens": 1176890.0,
"step": 18
},
{
"epoch": 0.012971496842464585,
"grad_norm": 7.991074562072754,
"learning_rate": 6.462585034013606e-07,
"loss": 1.0987,
"mean_token_accuracy": 0.723148837685585,
"num_tokens": 1242426.0,
"step": 19
},
{
"epoch": 0.013654207202594299,
"grad_norm": 7.945312023162842,
"learning_rate": 6.802721088435376e-07,
"loss": 1.0762,
"mean_token_accuracy": 0.7300067245960236,
"num_tokens": 1307962.0,
"step": 20
},
{
"epoch": 0.014336917562724014,
"grad_norm": 7.9221367835998535,
"learning_rate": 7.142857142857143e-07,
"loss": 1.1439,
"mean_token_accuracy": 0.7103036493062973,
"num_tokens": 1373498.0,
"step": 21
},
{
"epoch": 0.01501962792285373,
"grad_norm": 7.6807026863098145,
"learning_rate": 7.482993197278913e-07,
"loss": 1.0533,
"mean_token_accuracy": 0.732389435172081,
"num_tokens": 1439034.0,
"step": 22
},
{
"epoch": 0.015702338282983445,
"grad_norm": 6.164129734039307,
"learning_rate": 7.823129251700681e-07,
"loss": 1.0217,
"mean_token_accuracy": 0.7375824898481369,
"num_tokens": 1504570.0,
"step": 23
},
{
"epoch": 0.01638504864311316,
"grad_norm": 6.058833599090576,
"learning_rate": 8.163265306122449e-07,
"loss": 1.0362,
"mean_token_accuracy": 0.7366355210542679,
"num_tokens": 1570106.0,
"step": 24
},
{
"epoch": 0.017067759003242873,
"grad_norm": 6.059955596923828,
"learning_rate": 8.503401360544218e-07,
"loss": 1.0173,
"mean_token_accuracy": 0.7385695725679398,
"num_tokens": 1635048.0,
"step": 25
},
{
"epoch": 0.01775046936337259,
"grad_norm": 6.09025764465332,
"learning_rate": 8.843537414965988e-07,
"loss": 1.0653,
"mean_token_accuracy": 0.727337121963501,
"num_tokens": 1698944.0,
"step": 26
},
{
"epoch": 0.018433179723502304,
"grad_norm": 5.8138747215271,
"learning_rate": 9.183673469387756e-07,
"loss": 1.0863,
"mean_token_accuracy": 0.7196179032325745,
"num_tokens": 1764382.0,
"step": 27
},
{
"epoch": 0.019115890083632018,
"grad_norm": 5.593854904174805,
"learning_rate": 9.523809523809525e-07,
"loss": 1.0164,
"mean_token_accuracy": 0.7361662089824677,
"num_tokens": 1829763.0,
"step": 28
},
{
"epoch": 0.019798600443761735,
"grad_norm": 5.434593677520752,
"learning_rate": 9.863945578231293e-07,
"loss": 1.0043,
"mean_token_accuracy": 0.7424242496490479,
"num_tokens": 1895299.0,
"step": 29
},
{
"epoch": 0.02048131080389145,
"grad_norm": 5.440600395202637,
"learning_rate": 1.0204081632653063e-06,
"loss": 1.0275,
"mean_token_accuracy": 0.7329698354005814,
"num_tokens": 1960835.0,
"step": 30
},
{
"epoch": 0.021164021164021163,
"grad_norm": 5.293028354644775,
"learning_rate": 1.0544217687074832e-06,
"loss": 0.9885,
"mean_token_accuracy": 0.7405608594417572,
"num_tokens": 2026371.0,
"step": 31
},
{
"epoch": 0.02184673152415088,
"grad_norm": 5.160416603088379,
"learning_rate": 1.08843537414966e-06,
"loss": 0.9793,
"mean_token_accuracy": 0.7408407628536224,
"num_tokens": 2091861.0,
"step": 32
},
{
"epoch": 0.022529441884280594,
"grad_norm": 5.16143798828125,
"learning_rate": 1.122448979591837e-06,
"loss": 0.9774,
"mean_token_accuracy": 0.7396446615457535,
"num_tokens": 2157349.0,
"step": 33
},
{
"epoch": 0.023212152244410308,
"grad_norm": 5.192001819610596,
"learning_rate": 1.1564625850340136e-06,
"loss": 1.0125,
"mean_token_accuracy": 0.7285452634096146,
"num_tokens": 2222852.0,
"step": 34
},
{
"epoch": 0.023894862604540025,
"grad_norm": 4.953834056854248,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.948,
"mean_token_accuracy": 0.7469452619552612,
"num_tokens": 2288388.0,
"step": 35
},
{
"epoch": 0.02457757296466974,
"grad_norm": 4.8762407302856445,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.9768,
"mean_token_accuracy": 0.7382392585277557,
"num_tokens": 2353924.0,
"step": 36
},
{
"epoch": 0.025260283324799453,
"grad_norm": 4.632943630218506,
"learning_rate": 1.2585034013605443e-06,
"loss": 0.9532,
"mean_token_accuracy": 0.7446745932102203,
"num_tokens": 2419362.0,
"step": 37
},
{
"epoch": 0.02594299368492917,
"grad_norm": 4.591414928436279,
"learning_rate": 1.2925170068027212e-06,
"loss": 0.9509,
"mean_token_accuracy": 0.7436308562755585,
"num_tokens": 2484898.0,
"step": 38
},
{
"epoch": 0.026625704045058884,
"grad_norm": 4.45168924331665,
"learning_rate": 1.3265306122448982e-06,
"loss": 0.9328,
"mean_token_accuracy": 0.7455859035253525,
"num_tokens": 2550434.0,
"step": 39
},
{
"epoch": 0.027308414405188598,
"grad_norm": 4.439093112945557,
"learning_rate": 1.3605442176870751e-06,
"loss": 0.926,
"mean_token_accuracy": 0.7475508600473404,
"num_tokens": 2615867.0,
"step": 40
},
{
"epoch": 0.027991124765318315,
"grad_norm": 4.357616424560547,
"learning_rate": 1.3945578231292517e-06,
"loss": 0.9152,
"mean_token_accuracy": 0.7535587698221207,
"num_tokens": 2681403.0,
"step": 41
},
{
"epoch": 0.02867383512544803,
"grad_norm": 4.222606658935547,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.9855,
"mean_token_accuracy": 0.7323101311922073,
"num_tokens": 2746765.0,
"step": 42
},
{
"epoch": 0.029356545485577742,
"grad_norm": 3.9405136108398438,
"learning_rate": 1.4625850340136056e-06,
"loss": 0.9199,
"mean_token_accuracy": 0.7474187463521957,
"num_tokens": 2812301.0,
"step": 43
},
{
"epoch": 0.03003925584570746,
"grad_norm": 3.81093430519104,
"learning_rate": 1.4965986394557825e-06,
"loss": 0.8939,
"mean_token_accuracy": 0.7549030482769012,
"num_tokens": 2877395.0,
"step": 44
},
{
"epoch": 0.030721966205837174,
"grad_norm": 3.7143189907073975,
"learning_rate": 1.5306122448979593e-06,
"loss": 0.8639,
"mean_token_accuracy": 0.7581400275230408,
"num_tokens": 2942730.0,
"step": 45
},
{
"epoch": 0.03140467656596689,
"grad_norm": 3.47798490524292,
"learning_rate": 1.5646258503401362e-06,
"loss": 0.8984,
"mean_token_accuracy": 0.7516056448221207,
"num_tokens": 3007476.0,
"step": 46
},
{
"epoch": 0.032087386926096605,
"grad_norm": 3.2375733852386475,
"learning_rate": 1.5986394557823132e-06,
"loss": 0.8831,
"mean_token_accuracy": 0.7556970864534378,
"num_tokens": 3073012.0,
"step": 47
},
{
"epoch": 0.03277009728622632,
"grad_norm": 2.973647356033325,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.8794,
"mean_token_accuracy": 0.7556320279836655,
"num_tokens": 3138364.0,
"step": 48
},
{
"epoch": 0.03345280764635603,
"grad_norm": 2.5924108028411865,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.8928,
"mean_token_accuracy": 0.7511672079563141,
"num_tokens": 3203484.0,
"step": 49
},
{
"epoch": 0.034135518006485746,
"grad_norm": 1.7866108417510986,
"learning_rate": 1.7006802721088436e-06,
"loss": 0.8587,
"mean_token_accuracy": 0.7600806355476379,
"num_tokens": 3269020.0,
"step": 50
},
{
"epoch": 0.03481822836661547,
"grad_norm": 1.5599899291992188,
"learning_rate": 1.7346938775510206e-06,
"loss": 0.8653,
"mean_token_accuracy": 0.7569495290517807,
"num_tokens": 3334556.0,
"step": 51
},
{
"epoch": 0.03550093872674518,
"grad_norm": 1.4173204898834229,
"learning_rate": 1.7687074829931975e-06,
"loss": 0.8721,
"mean_token_accuracy": 0.7553055435419083,
"num_tokens": 3399816.0,
"step": 52
},
{
"epoch": 0.036183649086874894,
"grad_norm": 1.3384040594100952,
"learning_rate": 1.8027210884353743e-06,
"loss": 0.8411,
"mean_token_accuracy": 0.761073425412178,
"num_tokens": 3465352.0,
"step": 53
},
{
"epoch": 0.03686635944700461,
"grad_norm": 1.294304370880127,
"learning_rate": 1.8367346938775512e-06,
"loss": 0.7959,
"mean_token_accuracy": 0.7744806259870529,
"num_tokens": 3530741.0,
"step": 54
},
{
"epoch": 0.03754906980713432,
"grad_norm": 1.186849594116211,
"learning_rate": 1.8707482993197282e-06,
"loss": 0.7824,
"mean_token_accuracy": 0.7765350043773651,
"num_tokens": 3596088.0,
"step": 55
},
{
"epoch": 0.038231780167264036,
"grad_norm": 1.154654860496521,
"learning_rate": 1.904761904761905e-06,
"loss": 0.8091,
"mean_token_accuracy": 0.7686949968338013,
"num_tokens": 3661624.0,
"step": 56
},
{
"epoch": 0.03891449052739376,
"grad_norm": 1.1420106887817383,
"learning_rate": 1.938775510204082e-06,
"loss": 0.8476,
"mean_token_accuracy": 0.7588370442390442,
"num_tokens": 3726929.0,
"step": 57
},
{
"epoch": 0.03959720088752347,
"grad_norm": 1.085784673690796,
"learning_rate": 1.9727891156462586e-06,
"loss": 0.7694,
"mean_token_accuracy": 0.7776759564876556,
"num_tokens": 3792465.0,
"step": 58
},
{
"epoch": 0.040279911247653184,
"grad_norm": 1.073722243309021,
"learning_rate": 2.0068027210884353e-06,
"loss": 0.8398,
"mean_token_accuracy": 0.7589962035417557,
"num_tokens": 3858001.0,
"step": 59
},
{
"epoch": 0.0409626216077829,
"grad_norm": 1.0647130012512207,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.8283,
"mean_token_accuracy": 0.7600959092378616,
"num_tokens": 3923537.0,
"step": 60
},
{
"epoch": 0.04164533196791261,
"grad_norm": 0.9948044419288635,
"learning_rate": 2.0748299319727892e-06,
"loss": 0.8076,
"mean_token_accuracy": 0.7665566951036453,
"num_tokens": 3989073.0,
"step": 61
},
{
"epoch": 0.042328042328042326,
"grad_norm": 1.0430023670196533,
"learning_rate": 2.1088435374149664e-06,
"loss": 0.7738,
"mean_token_accuracy": 0.7728800028562546,
"num_tokens": 4054609.0,
"step": 62
},
{
"epoch": 0.043010752688172046,
"grad_norm": 0.9742470383644104,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.7987,
"mean_token_accuracy": 0.7675991207361221,
"num_tokens": 4119840.0,
"step": 63
},
{
"epoch": 0.04369346304830176,
"grad_norm": 0.9585213661193848,
"learning_rate": 2.17687074829932e-06,
"loss": 0.7975,
"mean_token_accuracy": 0.7663673758506775,
"num_tokens": 4185284.0,
"step": 64
},
{
"epoch": 0.044376173408431474,
"grad_norm": 0.9440982341766357,
"learning_rate": 2.2108843537414966e-06,
"loss": 0.7675,
"mean_token_accuracy": 0.776309072971344,
"num_tokens": 4250735.0,
"step": 65
},
{
"epoch": 0.04505888376856119,
"grad_norm": 1.0054755210876465,
"learning_rate": 2.244897959183674e-06,
"loss": 0.8071,
"mean_token_accuracy": 0.7647958248853683,
"num_tokens": 4316205.0,
"step": 66
},
{
"epoch": 0.0457415941286909,
"grad_norm": 0.9801008105278015,
"learning_rate": 2.2789115646258505e-06,
"loss": 0.7575,
"mean_token_accuracy": 0.7767521739006042,
"num_tokens": 4381221.0,
"step": 67
},
{
"epoch": 0.046424304488820615,
"grad_norm": 0.9017577767372131,
"learning_rate": 2.3129251700680273e-06,
"loss": 0.7807,
"mean_token_accuracy": 0.7724828869104385,
"num_tokens": 4446757.0,
"step": 68
},
{
"epoch": 0.047107014848950336,
"grad_norm": 0.8664342761039734,
"learning_rate": 2.3469387755102044e-06,
"loss": 0.7467,
"mean_token_accuracy": 0.780700147151947,
"num_tokens": 4512293.0,
"step": 69
},
{
"epoch": 0.04778972520908005,
"grad_norm": 0.9046992659568787,
"learning_rate": 2.380952380952381e-06,
"loss": 0.7342,
"mean_token_accuracy": 0.7841294556856155,
"num_tokens": 4577297.0,
"step": 70
},
{
"epoch": 0.048472435569209764,
"grad_norm": 0.8165750503540039,
"learning_rate": 2.414965986394558e-06,
"loss": 0.7255,
"mean_token_accuracy": 0.7828498333692551,
"num_tokens": 4642721.0,
"step": 71
},
{
"epoch": 0.04915514592933948,
"grad_norm": 0.841571033000946,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.7587,
"mean_token_accuracy": 0.7761638462543488,
"num_tokens": 4708257.0,
"step": 72
},
{
"epoch": 0.04983785628946919,
"grad_norm": 0.8995031118392944,
"learning_rate": 2.482993197278912e-06,
"loss": 0.7815,
"mean_token_accuracy": 0.7689618766307831,
"num_tokens": 4773557.0,
"step": 73
},
{
"epoch": 0.050520566649598905,
"grad_norm": 0.8214359879493713,
"learning_rate": 2.5170068027210886e-06,
"loss": 0.7447,
"mean_token_accuracy": 0.7801923453807831,
"num_tokens": 4838977.0,
"step": 74
},
{
"epoch": 0.051203277009728626,
"grad_norm": 0.7673158049583435,
"learning_rate": 2.5510204081632657e-06,
"loss": 0.726,
"mean_token_accuracy": 0.7852364182472229,
"num_tokens": 4904513.0,
"step": 75
},
{
"epoch": 0.05188598736985834,
"grad_norm": 0.7837581038475037,
"learning_rate": 2.5850340136054425e-06,
"loss": 0.7536,
"mean_token_accuracy": 0.7792949676513672,
"num_tokens": 4970049.0,
"step": 76
},
{
"epoch": 0.052568697729988054,
"grad_norm": 0.7806072235107422,
"learning_rate": 2.6190476190476192e-06,
"loss": 0.7094,
"mean_token_accuracy": 0.7869623750448227,
"num_tokens": 5035585.0,
"step": 77
},
{
"epoch": 0.05325140809011777,
"grad_norm": 0.8338956832885742,
"learning_rate": 2.6530612244897964e-06,
"loss": 0.7431,
"mean_token_accuracy": 0.7790174335241318,
"num_tokens": 5100407.0,
"step": 78
},
{
"epoch": 0.05393411845024748,
"grad_norm": 0.857094407081604,
"learning_rate": 2.687074829931973e-06,
"loss": 0.7612,
"mean_token_accuracy": 0.7757337838411331,
"num_tokens": 5165420.0,
"step": 79
},
{
"epoch": 0.054616828810377195,
"grad_norm": 0.8075940012931824,
"learning_rate": 2.7210884353741503e-06,
"loss": 0.71,
"mean_token_accuracy": 0.7867943644523621,
"num_tokens": 5230956.0,
"step": 80
},
{
"epoch": 0.055299539170506916,
"grad_norm": 0.8337777853012085,
"learning_rate": 2.7551020408163266e-06,
"loss": 0.7418,
"mean_token_accuracy": 0.7804368883371353,
"num_tokens": 5296230.0,
"step": 81
},
{
"epoch": 0.05598224953063663,
"grad_norm": 0.8036956787109375,
"learning_rate": 2.7891156462585034e-06,
"loss": 0.7116,
"mean_token_accuracy": 0.7864073514938354,
"num_tokens": 5361298.0,
"step": 82
},
{
"epoch": 0.056664959890766343,
"grad_norm": 0.8216161131858826,
"learning_rate": 2.8231292517006805e-06,
"loss": 0.6875,
"mean_token_accuracy": 0.7953281402587891,
"num_tokens": 5426054.0,
"step": 83
},
{
"epoch": 0.05734767025089606,
"grad_norm": 0.7968023419380188,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.7463,
"mean_token_accuracy": 0.7788520306348801,
"num_tokens": 5491590.0,
"step": 84
},
{
"epoch": 0.05803038061102577,
"grad_norm": 0.8076518177986145,
"learning_rate": 2.891156462585034e-06,
"loss": 0.7363,
"mean_token_accuracy": 0.7817998677492142,
"num_tokens": 5557126.0,
"step": 85
},
{
"epoch": 0.058713090971155485,
"grad_norm": 0.8246069550514221,
"learning_rate": 2.925170068027211e-06,
"loss": 0.7219,
"mean_token_accuracy": 0.7867027223110199,
"num_tokens": 5622662.0,
"step": 86
},
{
"epoch": 0.059395801331285206,
"grad_norm": 0.7641127705574036,
"learning_rate": 2.959183673469388e-06,
"loss": 0.7189,
"mean_token_accuracy": 0.7851627767086029,
"num_tokens": 5688140.0,
"step": 87
},
{
"epoch": 0.06007851169141492,
"grad_norm": 0.8021376729011536,
"learning_rate": 2.993197278911565e-06,
"loss": 0.7412,
"mean_token_accuracy": 0.7815249264240265,
"num_tokens": 5753676.0,
"step": 88
},
{
"epoch": 0.06076122205154463,
"grad_norm": 0.7843667268753052,
"learning_rate": 3.027210884353742e-06,
"loss": 0.7453,
"mean_token_accuracy": 0.7783620357513428,
"num_tokens": 5818498.0,
"step": 89
},
{
"epoch": 0.06144393241167435,
"grad_norm": 0.8114190101623535,
"learning_rate": 3.0612244897959185e-06,
"loss": 0.6977,
"mean_token_accuracy": 0.7898796498775482,
"num_tokens": 5884034.0,
"step": 90
},
{
"epoch": 0.06212664277180406,
"grad_norm": 0.8369579315185547,
"learning_rate": 3.0952380952380957e-06,
"loss": 0.7216,
"mean_token_accuracy": 0.7870012819766998,
"num_tokens": 5949370.0,
"step": 91
},
{
"epoch": 0.06280935313193378,
"grad_norm": 0.7947114706039429,
"learning_rate": 3.1292517006802725e-06,
"loss": 0.7464,
"mean_token_accuracy": 0.7776340842247009,
"num_tokens": 6014764.0,
"step": 92
},
{
"epoch": 0.06349206349206349,
"grad_norm": 0.8602317571640015,
"learning_rate": 3.1632653061224496e-06,
"loss": 0.6994,
"mean_token_accuracy": 0.7892228811979294,
"num_tokens": 6080300.0,
"step": 93
},
{
"epoch": 0.06417477385219321,
"grad_norm": 0.7978633046150208,
"learning_rate": 3.1972789115646264e-06,
"loss": 0.7218,
"mean_token_accuracy": 0.7852825820446014,
"num_tokens": 6145359.0,
"step": 94
},
{
"epoch": 0.06485748421232292,
"grad_norm": 0.8335587978363037,
"learning_rate": 3.231292517006803e-06,
"loss": 0.7087,
"mean_token_accuracy": 0.7889785021543503,
"num_tokens": 6210895.0,
"step": 95
},
{
"epoch": 0.06554019457245264,
"grad_norm": 0.7953450083732605,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.7208,
"mean_token_accuracy": 0.7854044586420059,
"num_tokens": 6276431.0,
"step": 96
},
{
"epoch": 0.06622290493258236,
"grad_norm": 0.7711222767829895,
"learning_rate": 3.2993197278911566e-06,
"loss": 0.7419,
"mean_token_accuracy": 0.7802113890647888,
"num_tokens": 6341967.0,
"step": 97
},
{
"epoch": 0.06690561529271206,
"grad_norm": 0.8267924189567566,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.7365,
"mean_token_accuracy": 0.7791422307491302,
"num_tokens": 6407503.0,
"step": 98
},
{
"epoch": 0.06758832565284179,
"grad_norm": 0.7593364715576172,
"learning_rate": 3.3673469387755105e-06,
"loss": 0.6944,
"mean_token_accuracy": 0.7914222925901413,
"num_tokens": 6473039.0,
"step": 99
},
{
"epoch": 0.06827103601297149,
"grad_norm": 0.7679820656776428,
"learning_rate": 3.4013605442176872e-06,
"loss": 0.701,
"mean_token_accuracy": 0.7901545912027359,
"num_tokens": 6538575.0,
"step": 100
},
{
"epoch": 0.06895374637310121,
"grad_norm": 0.788735568523407,
"learning_rate": 3.435374149659864e-06,
"loss": 0.7016,
"mean_token_accuracy": 0.7907655388116837,
"num_tokens": 6604111.0,
"step": 101
},
{
"epoch": 0.06963645673323093,
"grad_norm": 0.8061610460281372,
"learning_rate": 3.469387755102041e-06,
"loss": 0.7805,
"mean_token_accuracy": 0.7686565071344376,
"num_tokens": 6669582.0,
"step": 102
},
{
"epoch": 0.07031916709336064,
"grad_norm": 0.7788532972335815,
"learning_rate": 3.503401360544218e-06,
"loss": 0.706,
"mean_token_accuracy": 0.7864888906478882,
"num_tokens": 6735118.0,
"step": 103
},
{
"epoch": 0.07100187745349036,
"grad_norm": 0.7763977646827698,
"learning_rate": 3.537414965986395e-06,
"loss": 0.7035,
"mean_token_accuracy": 0.7874220460653305,
"num_tokens": 6800487.0,
"step": 104
},
{
"epoch": 0.07168458781362007,
"grad_norm": 0.8377651572227478,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.7497,
"mean_token_accuracy": 0.7772635817527771,
"num_tokens": 6866023.0,
"step": 105
},
{
"epoch": 0.07236729817374979,
"grad_norm": 0.8014208674430847,
"learning_rate": 3.6054421768707485e-06,
"loss": 0.726,
"mean_token_accuracy": 0.7824260890483856,
"num_tokens": 6931559.0,
"step": 106
},
{
"epoch": 0.0730500085338795,
"grad_norm": 0.7863408923149109,
"learning_rate": 3.6394557823129257e-06,
"loss": 0.6997,
"mean_token_accuracy": 0.7897269278764725,
"num_tokens": 6997095.0,
"step": 107
},
{
"epoch": 0.07373271889400922,
"grad_norm": 0.7708235383033752,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.7234,
"mean_token_accuracy": 0.7831897437572479,
"num_tokens": 7062631.0,
"step": 108
},
{
"epoch": 0.07441542925413894,
"grad_norm": 0.8085599541664124,
"learning_rate": 3.7074829931972796e-06,
"loss": 0.7218,
"mean_token_accuracy": 0.7822886258363724,
"num_tokens": 7128167.0,
"step": 109
},
{
"epoch": 0.07509813961426864,
"grad_norm": 0.7934694886207581,
"learning_rate": 3.7414965986394563e-06,
"loss": 0.7195,
"mean_token_accuracy": 0.7839381843805313,
"num_tokens": 7193703.0,
"step": 110
},
{
"epoch": 0.07578084997439836,
"grad_norm": 0.7413143515586853,
"learning_rate": 3.7755102040816327e-06,
"loss": 0.6853,
"mean_token_accuracy": 0.7936522662639618,
"num_tokens": 7259239.0,
"step": 111
},
{
"epoch": 0.07646356033452807,
"grad_norm": 0.7616100311279297,
"learning_rate": 3.80952380952381e-06,
"loss": 0.686,
"mean_token_accuracy": 0.7947530299425125,
"num_tokens": 7323989.0,
"step": 112
},
{
"epoch": 0.07714627069465779,
"grad_norm": 0.8442330956459045,
"learning_rate": 3.843537414965986e-06,
"loss": 0.7003,
"mean_token_accuracy": 0.7893252372741699,
"num_tokens": 7389177.0,
"step": 113
},
{
"epoch": 0.07782898105478751,
"grad_norm": 0.7328389286994934,
"learning_rate": 3.877551020408164e-06,
"loss": 0.6991,
"mean_token_accuracy": 0.7894214391708374,
"num_tokens": 7454713.0,
"step": 114
},
{
"epoch": 0.07851169141491722,
"grad_norm": 0.7462553977966309,
"learning_rate": 3.9115646258503405e-06,
"loss": 0.7192,
"mean_token_accuracy": 0.7846713215112686,
"num_tokens": 7520249.0,
"step": 115
},
{
"epoch": 0.07919440177504694,
"grad_norm": 0.7286996245384216,
"learning_rate": 3.945578231292517e-06,
"loss": 0.7022,
"mean_token_accuracy": 0.788993775844574,
"num_tokens": 7585785.0,
"step": 116
},
{
"epoch": 0.07987711213517665,
"grad_norm": 0.8258713483810425,
"learning_rate": 3.979591836734694e-06,
"loss": 0.7303,
"mean_token_accuracy": 0.7783785462379456,
"num_tokens": 7651321.0,
"step": 117
},
{
"epoch": 0.08055982249530637,
"grad_norm": 0.7999934554100037,
"learning_rate": 4.013605442176871e-06,
"loss": 0.6826,
"mean_token_accuracy": 0.7930682301521301,
"num_tokens": 7716448.0,
"step": 118
},
{
"epoch": 0.08124253285543608,
"grad_norm": 0.7555844187736511,
"learning_rate": 4.047619047619048e-06,
"loss": 0.6219,
"mean_token_accuracy": 0.8124694526195526,
"num_tokens": 7781984.0,
"step": 119
},
{
"epoch": 0.0819252432155658,
"grad_norm": 0.7775622606277466,
"learning_rate": 4.081632653061225e-06,
"loss": 0.6954,
"mean_token_accuracy": 0.7905822545289993,
"num_tokens": 7847520.0,
"step": 120
},
{
"epoch": 0.08260795357569552,
"grad_norm": 0.8294740915298462,
"learning_rate": 4.115646258503402e-06,
"loss": 0.7175,
"mean_token_accuracy": 0.7847935110330582,
"num_tokens": 7913056.0,
"step": 121
},
{
"epoch": 0.08329066393582522,
"grad_norm": 0.7734805941581726,
"learning_rate": 4.1496598639455785e-06,
"loss": 0.6987,
"mean_token_accuracy": 0.7903053909540176,
"num_tokens": 7978127.0,
"step": 122
},
{
"epoch": 0.08397337429595494,
"grad_norm": 0.7841943502426147,
"learning_rate": 4.183673469387755e-06,
"loss": 0.7017,
"mean_token_accuracy": 0.7904753237962723,
"num_tokens": 8043663.0,
"step": 123
},
{
"epoch": 0.08465608465608465,
"grad_norm": 0.7548030614852905,
"learning_rate": 4.217687074829933e-06,
"loss": 0.6716,
"mean_token_accuracy": 0.7981427162885666,
"num_tokens": 8109199.0,
"step": 124
},
{
"epoch": 0.08533879501621437,
"grad_norm": 0.7701455950737,
"learning_rate": 4.251700680272109e-06,
"loss": 0.7019,
"mean_token_accuracy": 0.7921554446220398,
"num_tokens": 8174735.0,
"step": 125
},
{
"epoch": 0.08602150537634409,
"grad_norm": 0.7813974022865295,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.716,
"mean_token_accuracy": 0.7849920690059662,
"num_tokens": 8240271.0,
"step": 126
},
{
"epoch": 0.0867042157364738,
"grad_norm": 0.7576754689216614,
"learning_rate": 4.319727891156463e-06,
"loss": 0.6691,
"mean_token_accuracy": 0.7991696149110794,
"num_tokens": 8305531.0,
"step": 127
},
{
"epoch": 0.08738692609660352,
"grad_norm": 0.8087888360023499,
"learning_rate": 4.35374149659864e-06,
"loss": 0.7109,
"mean_token_accuracy": 0.7871221750974655,
"num_tokens": 8370791.0,
"step": 128
},
{
"epoch": 0.08806963645673323,
"grad_norm": 0.7525134682655334,
"learning_rate": 4.3877551020408165e-06,
"loss": 0.6671,
"mean_token_accuracy": 0.79823437333107,
"num_tokens": 8436327.0,
"step": 129
},
{
"epoch": 0.08875234681686295,
"grad_norm": 0.7696347832679749,
"learning_rate": 4.421768707482993e-06,
"loss": 0.7001,
"mean_token_accuracy": 0.7891006916761398,
"num_tokens": 8501863.0,
"step": 130
},
{
"epoch": 0.08943505717699266,
"grad_norm": 0.7710400223731995,
"learning_rate": 4.45578231292517e-06,
"loss": 0.7287,
"mean_token_accuracy": 0.7811278104782104,
"num_tokens": 8567399.0,
"step": 131
},
{
"epoch": 0.09011776753712238,
"grad_norm": 0.7712966203689575,
"learning_rate": 4.489795918367348e-06,
"loss": 0.6576,
"mean_token_accuracy": 0.7996701002120972,
"num_tokens": 8632935.0,
"step": 132
},
{
"epoch": 0.0908004778972521,
"grad_norm": 0.7195627689361572,
"learning_rate": 4.523809523809524e-06,
"loss": 0.6521,
"mean_token_accuracy": 0.8024804592132568,
"num_tokens": 8698471.0,
"step": 133
},
{
"epoch": 0.0914831882573818,
"grad_norm": 0.8118554949760437,
"learning_rate": 4.557823129251701e-06,
"loss": 0.6879,
"mean_token_accuracy": 0.7921166270971298,
"num_tokens": 8763993.0,
"step": 134
},
{
"epoch": 0.09216589861751152,
"grad_norm": 0.786746621131897,
"learning_rate": 4.591836734693878e-06,
"loss": 0.6991,
"mean_token_accuracy": 0.789543628692627,
"num_tokens": 8829529.0,
"step": 135
},
{
"epoch": 0.09284860897764123,
"grad_norm": 0.7629210948944092,
"learning_rate": 4.6258503401360546e-06,
"loss": 0.6929,
"mean_token_accuracy": 0.7892262637615204,
"num_tokens": 8894503.0,
"step": 136
},
{
"epoch": 0.09353131933777095,
"grad_norm": 0.7598711252212524,
"learning_rate": 4.659863945578232e-06,
"loss": 0.6607,
"mean_token_accuracy": 0.7976081520318985,
"num_tokens": 8960039.0,
"step": 137
},
{
"epoch": 0.09421402969790067,
"grad_norm": 0.7624816298484802,
"learning_rate": 4.693877551020409e-06,
"loss": 0.6648,
"mean_token_accuracy": 0.7974095940589905,
"num_tokens": 9025575.0,
"step": 138
},
{
"epoch": 0.09489674005803038,
"grad_norm": 0.7908929586410522,
"learning_rate": 4.727891156462586e-06,
"loss": 0.6839,
"mean_token_accuracy": 0.79278165102005,
"num_tokens": 9091111.0,
"step": 139
},
{
"epoch": 0.0955794504181601,
"grad_norm": 0.7983325123786926,
"learning_rate": 4.761904761904762e-06,
"loss": 0.7252,
"mean_token_accuracy": 0.7833272218704224,
"num_tokens": 9156647.0,
"step": 140
},
{
"epoch": 0.09626216077828981,
"grad_norm": 0.787777841091156,
"learning_rate": 4.795918367346939e-06,
"loss": 0.6854,
"mean_token_accuracy": 0.7925372868776321,
"num_tokens": 9222183.0,
"step": 141
},
{
"epoch": 0.09694487113841953,
"grad_norm": 0.7467668056488037,
"learning_rate": 4.829931972789116e-06,
"loss": 0.6994,
"mean_token_accuracy": 0.7906127870082855,
"num_tokens": 9287719.0,
"step": 142
},
{
"epoch": 0.09762758149854923,
"grad_norm": 0.777410089969635,
"learning_rate": 4.863945578231293e-06,
"loss": 0.6915,
"mean_token_accuracy": 0.7887188494205475,
"num_tokens": 9353255.0,
"step": 143
},
{
"epoch": 0.09831029185867896,
"grad_norm": 0.7552134394645691,
"learning_rate": 4.897959183673469e-06,
"loss": 0.6787,
"mean_token_accuracy": 0.7940340936183929,
"num_tokens": 9418791.0,
"step": 144
},
{
"epoch": 0.09899300221880868,
"grad_norm": 0.7737309336662292,
"learning_rate": 4.931972789115647e-06,
"loss": 0.696,
"mean_token_accuracy": 0.7886081039905548,
"num_tokens": 9484311.0,
"step": 145
},
{
"epoch": 0.09967571257893838,
"grad_norm": 0.7639101147651672,
"learning_rate": 4.965986394557824e-06,
"loss": 0.6617,
"mean_token_accuracy": 0.7969208359718323,
"num_tokens": 9549847.0,
"step": 146
},
{
"epoch": 0.1003584229390681,
"grad_norm": 0.7466997504234314,
"learning_rate": 5e-06,
"loss": 0.7199,
"mean_token_accuracy": 0.7817477583885193,
"num_tokens": 9615193.0,
"step": 147
},
{
"epoch": 0.10104113329919781,
"grad_norm": 0.8260450959205627,
"learning_rate": 4.999992887242496e-06,
"loss": 0.6805,
"mean_token_accuracy": 0.7912542819976807,
"num_tokens": 9680729.0,
"step": 148
},
{
"epoch": 0.10172384365932753,
"grad_norm": 0.7522003054618835,
"learning_rate": 4.999971549010455e-06,
"loss": 0.6651,
"mean_token_accuracy": 0.7988911420106888,
"num_tokens": 9746265.0,
"step": 149
},
{
"epoch": 0.10240655401945725,
"grad_norm": 0.7995320558547974,
"learning_rate": 4.9999359854252975e-06,
"loss": 0.7125,
"mean_token_accuracy": 0.7878176867961884,
"num_tokens": 9811801.0,
"step": 150
},
{
"epoch": 0.10308926437958696,
"grad_norm": 0.7423191666603088,
"learning_rate": 4.999886196689386e-06,
"loss": 0.6646,
"mean_token_accuracy": 0.7983260154724121,
"num_tokens": 9877337.0,
"step": 151
},
{
"epoch": 0.10377197473971668,
"grad_norm": 0.7659249901771545,
"learning_rate": 4.99982218308603e-06,
"loss": 0.6188,
"mean_token_accuracy": 0.811354473233223,
"num_tokens": 9942873.0,
"step": 152
},
{
"epoch": 0.10445468509984639,
"grad_norm": 0.7772722840309143,
"learning_rate": 4.999743944979481e-06,
"loss": 0.6389,
"mean_token_accuracy": 0.8042980134487152,
"num_tokens": 10008409.0,
"step": 153
},
{
"epoch": 0.10513739545997611,
"grad_norm": 0.7917481064796448,
"learning_rate": 4.999651482814928e-06,
"loss": 0.6516,
"mean_token_accuracy": 0.8001801371574402,
"num_tokens": 10073870.0,
"step": 154
},
{
"epoch": 0.10582010582010581,
"grad_norm": 0.7320284843444824,
"learning_rate": 4.9995447971185e-06,
"loss": 0.66,
"mean_token_accuracy": 0.7984074205160141,
"num_tokens": 10139280.0,
"step": 155
},
{
"epoch": 0.10650281618023553,
"grad_norm": 0.7924143671989441,
"learning_rate": 4.999423888497261e-06,
"loss": 0.677,
"mean_token_accuracy": 0.7951185405254364,
"num_tokens": 10204816.0,
"step": 156
},
{
"epoch": 0.10718552654036526,
"grad_norm": 0.8169876933097839,
"learning_rate": 4.999288757639206e-06,
"loss": 0.662,
"mean_token_accuracy": 0.7976234257221222,
"num_tokens": 10270352.0,
"step": 157
},
{
"epoch": 0.10786823690049496,
"grad_norm": 0.7444480657577515,
"learning_rate": 4.999139405313257e-06,
"loss": 0.6446,
"mean_token_accuracy": 0.8019917011260986,
"num_tokens": 10335888.0,
"step": 158
},
{
"epoch": 0.10855094726062468,
"grad_norm": 0.8082272410392761,
"learning_rate": 4.998975832369261e-06,
"loss": 0.722,
"mean_token_accuracy": 0.78294537961483,
"num_tokens": 10401424.0,
"step": 159
},
{
"epoch": 0.10923365762075439,
"grad_norm": 0.7980340123176575,
"learning_rate": 4.99879803973798e-06,
"loss": 0.7303,
"mean_token_accuracy": 0.7807306796312332,
"num_tokens": 10466960.0,
"step": 160
},
{
"epoch": 0.10991636798088411,
"grad_norm": 0.7741237878799438,
"learning_rate": 4.998606028431091e-06,
"loss": 0.6135,
"mean_token_accuracy": 0.811751589179039,
"num_tokens": 10532496.0,
"step": 161
},
{
"epoch": 0.11059907834101383,
"grad_norm": 0.8673920631408691,
"learning_rate": 4.998399799541179e-06,
"loss": 0.6504,
"mean_token_accuracy": 0.80196113884449,
"num_tokens": 10598032.0,
"step": 162
},
{
"epoch": 0.11128178870114354,
"grad_norm": 0.7713350653648376,
"learning_rate": 4.998179354241728e-06,
"loss": 0.6319,
"mean_token_accuracy": 0.8055504709482193,
"num_tokens": 10663568.0,
"step": 163
},
{
"epoch": 0.11196449906127326,
"grad_norm": 0.7200196385383606,
"learning_rate": 4.997944693787117e-06,
"loss": 0.6266,
"mean_token_accuracy": 0.8064363449811935,
"num_tokens": 10729104.0,
"step": 164
},
{
"epoch": 0.11264720942140297,
"grad_norm": 0.8603804111480713,
"learning_rate": 4.997695819512612e-06,
"loss": 0.6469,
"mean_token_accuracy": 0.8034121543169022,
"num_tokens": 10794640.0,
"step": 165
},
{
"epoch": 0.11332991978153269,
"grad_norm": 0.7777139544487,
"learning_rate": 4.99743273283436e-06,
"loss": 0.6875,
"mean_token_accuracy": 0.7916819453239441,
"num_tokens": 10860176.0,
"step": 166
},
{
"epoch": 0.1140126301416624,
"grad_norm": 0.7675216794013977,
"learning_rate": 4.997155435249379e-06,
"loss": 0.6617,
"mean_token_accuracy": 0.7968956828117371,
"num_tokens": 10925650.0,
"step": 167
},
{
"epoch": 0.11469534050179211,
"grad_norm": 0.7635762691497803,
"learning_rate": 4.996863928335547e-06,
"loss": 0.6355,
"mean_token_accuracy": 0.8052615821361542,
"num_tokens": 10990622.0,
"step": 168
},
{
"epoch": 0.11537805086192184,
"grad_norm": 0.7841809988021851,
"learning_rate": 4.9965582137516e-06,
"loss": 0.6615,
"mean_token_accuracy": 0.7980477660894394,
"num_tokens": 11056068.0,
"step": 169
},
{
"epoch": 0.11606076122205154,
"grad_norm": 0.8226203322410583,
"learning_rate": 4.996238293237116e-06,
"loss": 0.675,
"mean_token_accuracy": 0.7942021191120148,
"num_tokens": 11121604.0,
"step": 170
},
{
"epoch": 0.11674347158218126,
"grad_norm": 0.7752227783203125,
"learning_rate": 4.99590416861251e-06,
"loss": 0.6452,
"mean_token_accuracy": 0.802770659327507,
"num_tokens": 11187140.0,
"step": 171
},
{
"epoch": 0.11742618194231097,
"grad_norm": 0.7512950897216797,
"learning_rate": 4.99555584177902e-06,
"loss": 0.6394,
"mean_token_accuracy": 0.802770659327507,
"num_tokens": 11252676.0,
"step": 172
},
{
"epoch": 0.11810889230244069,
"grad_norm": 0.7564979791641235,
"learning_rate": 4.995193314718695e-06,
"loss": 0.6577,
"mean_token_accuracy": 0.7998839169740677,
"num_tokens": 11318212.0,
"step": 173
},
{
"epoch": 0.11879160266257041,
"grad_norm": 0.7963838577270508,
"learning_rate": 4.994816589494391e-06,
"loss": 0.7155,
"mean_token_accuracy": 0.7836958467960358,
"num_tokens": 11383385.0,
"step": 174
},
{
"epoch": 0.11947431302270012,
"grad_norm": 0.8138285875320435,
"learning_rate": 4.994425668249751e-06,
"loss": 0.6714,
"mean_token_accuracy": 0.7966306358575821,
"num_tokens": 11448921.0,
"step": 175
},
{
"epoch": 0.12015702338282984,
"grad_norm": 0.8292612433433533,
"learning_rate": 4.994020553209199e-06,
"loss": 0.641,
"mean_token_accuracy": 0.804618775844574,
"num_tokens": 11514457.0,
"step": 176
},
{
"epoch": 0.12083973374295955,
"grad_norm": 0.7372015118598938,
"learning_rate": 4.993601246677921e-06,
"loss": 0.6556,
"mean_token_accuracy": 0.7988605946302414,
"num_tokens": 11579993.0,
"step": 177
},
{
"epoch": 0.12152244410308927,
"grad_norm": 0.8773394227027893,
"learning_rate": 4.993167751041858e-06,
"loss": 0.6926,
"mean_token_accuracy": 0.790887713432312,
"num_tokens": 11645529.0,
"step": 178
},
{
"epoch": 0.12220515446321897,
"grad_norm": 0.7941958904266357,
"learning_rate": 4.9927200687676905e-06,
"loss": 0.6788,
"mean_token_accuracy": 0.7920080274343491,
"num_tokens": 11711045.0,
"step": 179
},
{
"epoch": 0.1228878648233487,
"grad_norm": 0.7528053522109985,
"learning_rate": 4.992258202402822e-06,
"loss": 0.6856,
"mean_token_accuracy": 0.791987419128418,
"num_tokens": 11776581.0,
"step": 180
},
{
"epoch": 0.12357057518347841,
"grad_norm": 0.8133561611175537,
"learning_rate": 4.991782154575368e-06,
"loss": 0.7112,
"mean_token_accuracy": 0.7850594073534012,
"num_tokens": 11841587.0,
"step": 181
},
{
"epoch": 0.12425328554360812,
"grad_norm": 0.7941370010375977,
"learning_rate": 4.991291927994138e-06,
"loss": 0.6595,
"mean_token_accuracy": 0.7981490045785904,
"num_tokens": 11906559.0,
"step": 182
},
{
"epoch": 0.12493599590373784,
"grad_norm": 0.7561349868774414,
"learning_rate": 4.990787525448623e-06,
"loss": 0.6884,
"mean_token_accuracy": 0.7897268980741501,
"num_tokens": 11972095.0,
"step": 183
},
{
"epoch": 0.12561870626386756,
"grad_norm": 0.7951211333274841,
"learning_rate": 4.990268949808976e-06,
"loss": 0.645,
"mean_token_accuracy": 0.8034732490777969,
"num_tokens": 12037631.0,
"step": 184
},
{
"epoch": 0.12630141662399727,
"grad_norm": 0.7793656587600708,
"learning_rate": 4.989736204026e-06,
"loss": 0.6483,
"mean_token_accuracy": 0.8013349175453186,
"num_tokens": 12103167.0,
"step": 185
},
{
"epoch": 0.12698412698412698,
"grad_norm": 0.8032715916633606,
"learning_rate": 4.989189291131129e-06,
"loss": 0.6866,
"mean_token_accuracy": 0.7917736023664474,
"num_tokens": 12168703.0,
"step": 186
},
{
"epoch": 0.1276668373442567,
"grad_norm": 0.7852079272270203,
"learning_rate": 4.988628214236409e-06,
"loss": 0.6442,
"mean_token_accuracy": 0.8026293814182281,
"num_tokens": 12234100.0,
"step": 187
},
{
"epoch": 0.12834954770438642,
"grad_norm": 0.7962309122085571,
"learning_rate": 4.988052976534483e-06,
"loss": 0.6989,
"mean_token_accuracy": 0.7885660976171494,
"num_tokens": 12299636.0,
"step": 188
},
{
"epoch": 0.12903225806451613,
"grad_norm": 0.774936854839325,
"learning_rate": 4.987463581298573e-06,
"loss": 0.6561,
"mean_token_accuracy": 0.7997016906738281,
"num_tokens": 12365042.0,
"step": 189
},
{
"epoch": 0.12971496842464583,
"grad_norm": 0.7444131374359131,
"learning_rate": 4.986860031882459e-06,
"loss": 0.6164,
"mean_token_accuracy": 0.8107282519340515,
"num_tokens": 12430578.0,
"step": 190
},
{
"epoch": 0.13039767878477557,
"grad_norm": 0.7779427766799927,
"learning_rate": 4.986242331720461e-06,
"loss": 0.6538,
"mean_token_accuracy": 0.7991146445274353,
"num_tokens": 12496036.0,
"step": 191
},
{
"epoch": 0.13108038914490527,
"grad_norm": 0.7790391445159912,
"learning_rate": 4.985610484327421e-06,
"loss": 0.6263,
"mean_token_accuracy": 0.8059475868940353,
"num_tokens": 12561572.0,
"step": 192
},
{
"epoch": 0.13176309950503498,
"grad_norm": 0.7643638849258423,
"learning_rate": 4.9849644932986795e-06,
"loss": 0.6947,
"mean_token_accuracy": 0.7892792373895645,
"num_tokens": 12627073.0,
"step": 193
},
{
"epoch": 0.13244580986516472,
"grad_norm": 0.8144778609275818,
"learning_rate": 4.98430436231006e-06,
"loss": 0.6745,
"mean_token_accuracy": 0.793398305773735,
"num_tokens": 12692601.0,
"step": 194
},
{
"epoch": 0.13312852022529442,
"grad_norm": 0.7241678833961487,
"learning_rate": 4.983630095117843e-06,
"loss": 0.6126,
"mean_token_accuracy": 0.8117210417985916,
"num_tokens": 12758137.0,
"step": 195
},
{
"epoch": 0.13381123058542413,
"grad_norm": 0.730594277381897,
"learning_rate": 4.982941695558748e-06,
"loss": 0.6867,
"mean_token_accuracy": 0.7909946292638779,
"num_tokens": 12823673.0,
"step": 196
},
{
"epoch": 0.13449394094555384,
"grad_norm": 0.753174901008606,
"learning_rate": 4.98223916754991e-06,
"loss": 0.6757,
"mean_token_accuracy": 0.7946603149175644,
"num_tokens": 12889209.0,
"step": 197
},
{
"epoch": 0.13517665130568357,
"grad_norm": 0.7512592673301697,
"learning_rate": 4.981522515088858e-06,
"loss": 0.6531,
"mean_token_accuracy": 0.801472395658493,
"num_tokens": 12954745.0,
"step": 198
},
{
"epoch": 0.13585936166581328,
"grad_norm": 0.7973717451095581,
"learning_rate": 4.980791742253493e-06,
"loss": 0.7005,
"mean_token_accuracy": 0.7849462330341339,
"num_tokens": 13020281.0,
"step": 199
},
{
"epoch": 0.13654207202594298,
"grad_norm": 0.7488005757331848,
"learning_rate": 4.980046853202062e-06,
"loss": 0.6187,
"mean_token_accuracy": 0.8101655095815659,
"num_tokens": 13085725.0,
"step": 200
},
{
"epoch": 0.13722478238607272,
"grad_norm": 0.7275497913360596,
"learning_rate": 4.979287852173138e-06,
"loss": 0.6091,
"mean_token_accuracy": 0.8102089464664459,
"num_tokens": 13151261.0,
"step": 201
},
{
"epoch": 0.13790749274620243,
"grad_norm": 0.7557955384254456,
"learning_rate": 4.978514743485593e-06,
"loss": 0.6613,
"mean_token_accuracy": 0.799455463886261,
"num_tokens": 13216711.0,
"step": 202
},
{
"epoch": 0.13859020310633213,
"grad_norm": 0.7728015184402466,
"learning_rate": 4.977727531538573e-06,
"loss": 0.6621,
"mean_token_accuracy": 0.7968718856573105,
"num_tokens": 13281491.0,
"step": 203
},
{
"epoch": 0.13927291346646187,
"grad_norm": 0.7593042254447937,
"learning_rate": 4.976926220811479e-06,
"loss": 0.694,
"mean_token_accuracy": 0.7916208654642105,
"num_tokens": 13347027.0,
"step": 204
},
{
"epoch": 0.13995562382659157,
"grad_norm": 0.7546338438987732,
"learning_rate": 4.976110815863932e-06,
"loss": 0.6704,
"mean_token_accuracy": 0.796303778886795,
"num_tokens": 13412524.0,
"step": 205
},
{
"epoch": 0.14063833418672128,
"grad_norm": 0.7690262794494629,
"learning_rate": 4.975281321335755e-06,
"loss": 0.6753,
"mean_token_accuracy": 0.7962145358324051,
"num_tokens": 13477914.0,
"step": 206
},
{
"epoch": 0.141321044546851,
"grad_norm": 0.7461578845977783,
"learning_rate": 4.974437741946943e-06,
"loss": 0.6543,
"mean_token_accuracy": 0.7989522367715836,
"num_tokens": 13543450.0,
"step": 207
},
{
"epoch": 0.14200375490698072,
"grad_norm": 0.7672309279441833,
"learning_rate": 4.973580082497636e-06,
"loss": 0.6676,
"mean_token_accuracy": 0.7950268983840942,
"num_tokens": 13608986.0,
"step": 208
},
{
"epoch": 0.14268646526711043,
"grad_norm": 0.7266797423362732,
"learning_rate": 4.9727083478680925e-06,
"loss": 0.6327,
"mean_token_accuracy": 0.8044965714216232,
"num_tokens": 13674522.0,
"step": 209
},
{
"epoch": 0.14336917562724014,
"grad_norm": 0.7649458050727844,
"learning_rate": 4.971822543018663e-06,
"loss": 0.6644,
"mean_token_accuracy": 0.7988300323486328,
"num_tokens": 13740058.0,
"step": 210
},
{
"epoch": 0.14405188598736987,
"grad_norm": 0.7558519244194031,
"learning_rate": 4.970922672989759e-06,
"loss": 0.6387,
"mean_token_accuracy": 0.8038550913333893,
"num_tokens": 13805594.0,
"step": 211
},
{
"epoch": 0.14473459634749958,
"grad_norm": 0.7533179521560669,
"learning_rate": 4.970008742901827e-06,
"loss": 0.6512,
"mean_token_accuracy": 0.7999603003263474,
"num_tokens": 13871130.0,
"step": 212
},
{
"epoch": 0.14541730670762928,
"grad_norm": 0.7587620615959167,
"learning_rate": 4.969080757955317e-06,
"loss": 0.6682,
"mean_token_accuracy": 0.7954698204994202,
"num_tokens": 13936666.0,
"step": 213
},
{
"epoch": 0.146100017067759,
"grad_norm": 0.7344343066215515,
"learning_rate": 4.968138723430654e-06,
"loss": 0.6331,
"mean_token_accuracy": 0.8062831312417984,
"num_tokens": 14001627.0,
"step": 214
},
{
"epoch": 0.14678272742788873,
"grad_norm": 0.8171228170394897,
"learning_rate": 4.96718264468821e-06,
"loss": 0.6709,
"mean_token_accuracy": 0.7964931726455688,
"num_tokens": 14067163.0,
"step": 215
},
{
"epoch": 0.14746543778801843,
"grad_norm": 0.7533463835716248,
"learning_rate": 4.966212527168268e-06,
"loss": 0.6755,
"mean_token_accuracy": 0.7955246716737747,
"num_tokens": 14132389.0,
"step": 216
},
{
"epoch": 0.14814814814814814,
"grad_norm": 0.7684210538864136,
"learning_rate": 4.965228376390998e-06,
"loss": 0.6898,
"mean_token_accuracy": 0.7908294945955276,
"num_tokens": 14197709.0,
"step": 217
},
{
"epoch": 0.14883085850827787,
"grad_norm": 0.7119394540786743,
"learning_rate": 4.964230197956421e-06,
"loss": 0.6306,
"mean_token_accuracy": 0.8053364753723145,
"num_tokens": 14263205.0,
"step": 218
},
{
"epoch": 0.14951356886840758,
"grad_norm": 0.7983673810958862,
"learning_rate": 4.963217997544376e-06,
"loss": 0.6943,
"mean_token_accuracy": 0.79004767537117,
"num_tokens": 14328741.0,
"step": 219
},
{
"epoch": 0.1501962792285373,
"grad_norm": 0.7425892353057861,
"learning_rate": 4.962191780914494e-06,
"loss": 0.6252,
"mean_token_accuracy": 0.8074288666248322,
"num_tokens": 14394170.0,
"step": 220
},
{
"epoch": 0.150878989588667,
"grad_norm": 0.7484636902809143,
"learning_rate": 4.961151553906158e-06,
"loss": 0.6192,
"mean_token_accuracy": 0.810231164097786,
"num_tokens": 14459375.0,
"step": 221
},
{
"epoch": 0.15156169994879673,
"grad_norm": 0.795856237411499,
"learning_rate": 4.960097322438474e-06,
"loss": 0.6494,
"mean_token_accuracy": 0.8015000522136688,
"num_tokens": 14524534.0,
"step": 222
},
{
"epoch": 0.15224441030892644,
"grad_norm": 0.7697437405586243,
"learning_rate": 4.959029092510236e-06,
"loss": 0.615,
"mean_token_accuracy": 0.8114613890647888,
"num_tokens": 14590070.0,
"step": 223
},
{
"epoch": 0.15292712066905614,
"grad_norm": 0.7738355994224548,
"learning_rate": 4.9579468701998926e-06,
"loss": 0.6611,
"mean_token_accuracy": 0.7977761626243591,
"num_tokens": 14655606.0,
"step": 224
},
{
"epoch": 0.15360983102918588,
"grad_norm": 0.759848952293396,
"learning_rate": 4.9568506616655125e-06,
"loss": 0.6472,
"mean_token_accuracy": 0.8012253940105438,
"num_tokens": 14721099.0,
"step": 225
},
{
"epoch": 0.15429254138931559,
"grad_norm": 0.7471370697021484,
"learning_rate": 4.955740473144746e-06,
"loss": 0.6849,
"mean_token_accuracy": 0.7934689670801163,
"num_tokens": 14786635.0,
"step": 226
},
{
"epoch": 0.1549752517494453,
"grad_norm": 0.7679874897003174,
"learning_rate": 4.954616310954796e-06,
"loss": 0.6626,
"mean_token_accuracy": 0.7951722294092178,
"num_tokens": 14851243.0,
"step": 227
},
{
"epoch": 0.15565796210957503,
"grad_norm": 0.7903568744659424,
"learning_rate": 4.953478181492377e-06,
"loss": 0.6891,
"mean_token_accuracy": 0.7914992719888687,
"num_tokens": 14916708.0,
"step": 228
},
{
"epoch": 0.15634067246970473,
"grad_norm": 0.8032446503639221,
"learning_rate": 4.95232609123368e-06,
"loss": 0.6801,
"mean_token_accuracy": 0.7916972190141678,
"num_tokens": 14982244.0,
"step": 229
},
{
"epoch": 0.15702338282983444,
"grad_norm": 0.7658280730247498,
"learning_rate": 4.9511600467343355e-06,
"loss": 0.6485,
"mean_token_accuracy": 0.8023097962141037,
"num_tokens": 15047779.0,
"step": 230
},
{
"epoch": 0.15770609318996415,
"grad_norm": 0.7308084964752197,
"learning_rate": 4.9499800546293776e-06,
"loss": 0.6263,
"mean_token_accuracy": 0.8065585345029831,
"num_tokens": 15113315.0,
"step": 231
},
{
"epoch": 0.15838880355009388,
"grad_norm": 0.7760301828384399,
"learning_rate": 4.948786121633204e-06,
"loss": 0.6715,
"mean_token_accuracy": 0.7964503765106201,
"num_tokens": 15178784.0,
"step": 232
},
{
"epoch": 0.1590715139102236,
"grad_norm": 0.7470239400863647,
"learning_rate": 4.947578254539539e-06,
"loss": 0.6479,
"mean_token_accuracy": 0.8013196587562561,
"num_tokens": 15244320.0,
"step": 233
},
{
"epoch": 0.1597542242703533,
"grad_norm": 0.8011210560798645,
"learning_rate": 4.946356460221396e-06,
"loss": 0.6536,
"mean_token_accuracy": 0.7990744262933731,
"num_tokens": 15309856.0,
"step": 234
},
{
"epoch": 0.16043693463048303,
"grad_norm": 0.8322118520736694,
"learning_rate": 4.945120745631036e-06,
"loss": 0.6862,
"mean_token_accuracy": 0.7918499410152435,
"num_tokens": 15375392.0,
"step": 235
},
{
"epoch": 0.16111964499061274,
"grad_norm": 0.7466151118278503,
"learning_rate": 4.943871117799929e-06,
"loss": 0.654,
"mean_token_accuracy": 0.7988605946302414,
"num_tokens": 15440928.0,
"step": 236
},
{
"epoch": 0.16180235535074244,
"grad_norm": 0.7664824724197388,
"learning_rate": 4.942607583838715e-06,
"loss": 0.6263,
"mean_token_accuracy": 0.807658240199089,
"num_tokens": 15506464.0,
"step": 237
},
{
"epoch": 0.16248506571087215,
"grad_norm": 0.7403832674026489,
"learning_rate": 4.941330150937164e-06,
"loss": 0.6117,
"mean_token_accuracy": 0.8102048337459564,
"num_tokens": 15571701.0,
"step": 238
},
{
"epoch": 0.16316777607100189,
"grad_norm": 0.8409820199012756,
"learning_rate": 4.94003882636413e-06,
"loss": 0.6636,
"mean_token_accuracy": 0.7948121428489685,
"num_tokens": 15636714.0,
"step": 239
},
{
"epoch": 0.1638504864311316,
"grad_norm": 0.7570234537124634,
"learning_rate": 4.938733617467517e-06,
"loss": 0.6386,
"mean_token_accuracy": 0.8039925545454025,
"num_tokens": 15702250.0,
"step": 240
},
{
"epoch": 0.1645331967912613,
"grad_norm": 0.7913652062416077,
"learning_rate": 4.937414531674234e-06,
"loss": 0.6241,
"mean_token_accuracy": 0.8082087188959122,
"num_tokens": 15767776.0,
"step": 241
},
{
"epoch": 0.16521590715139103,
"grad_norm": 0.7465484738349915,
"learning_rate": 4.9360815764901485e-06,
"loss": 0.6072,
"mean_token_accuracy": 0.8118190169334412,
"num_tokens": 15832920.0,
"step": 242
},
{
"epoch": 0.16589861751152074,
"grad_norm": 0.7312461733818054,
"learning_rate": 4.934734759500052e-06,
"loss": 0.6311,
"mean_token_accuracy": 0.8050464242696762,
"num_tokens": 15898456.0,
"step": 243
},
{
"epoch": 0.16658132787165045,
"grad_norm": 0.7343611717224121,
"learning_rate": 4.933374088367608e-06,
"loss": 0.6303,
"mean_token_accuracy": 0.8073833137750626,
"num_tokens": 15963992.0,
"step": 244
},
{
"epoch": 0.16726403823178015,
"grad_norm": 0.7481185793876648,
"learning_rate": 4.931999570835319e-06,
"loss": 0.6477,
"mean_token_accuracy": 0.8016556799411774,
"num_tokens": 16029528.0,
"step": 245
},
{
"epoch": 0.1679467485919099,
"grad_norm": 0.7350367307662964,
"learning_rate": 4.93061121472447e-06,
"loss": 0.6316,
"mean_token_accuracy": 0.8050617128610611,
"num_tokens": 16095064.0,
"step": 246
},
{
"epoch": 0.1686294589520396,
"grad_norm": 0.7721445560455322,
"learning_rate": 4.929209027935095e-06,
"loss": 0.649,
"mean_token_accuracy": 0.7996423244476318,
"num_tokens": 16160499.0,
"step": 247
},
{
"epoch": 0.1693121693121693,
"grad_norm": 0.7480275630950928,
"learning_rate": 4.927793018445924e-06,
"loss": 0.6655,
"mean_token_accuracy": 0.7958669364452362,
"num_tokens": 16226035.0,
"step": 248
},
{
"epoch": 0.16999487967229904,
"grad_norm": 0.7658473253250122,
"learning_rate": 4.926363194314345e-06,
"loss": 0.6314,
"mean_token_accuracy": 0.8055810183286667,
"num_tokens": 16291571.0,
"step": 249
},
{
"epoch": 0.17067759003242874,
"grad_norm": 0.7760869264602661,
"learning_rate": 4.92491956367635e-06,
"loss": 0.6814,
"mean_token_accuracy": 0.7943701148033142,
"num_tokens": 16357107.0,
"step": 250
},
{
"epoch": 0.17136030039255845,
"grad_norm": 0.7353349328041077,
"learning_rate": 4.9234621347464965e-06,
"loss": 0.6221,
"mean_token_accuracy": 0.8080248087644577,
"num_tokens": 16422643.0,
"step": 251
},
{
"epoch": 0.17204301075268819,
"grad_norm": 0.775214433670044,
"learning_rate": 4.9219909158178525e-06,
"loss": 0.6353,
"mean_token_accuracy": 0.8042980283498764,
"num_tokens": 16488179.0,
"step": 252
},
{
"epoch": 0.1727257211128179,
"grad_norm": 0.7820099592208862,
"learning_rate": 4.9205059152619595e-06,
"loss": 0.6736,
"mean_token_accuracy": 0.7939413487911224,
"num_tokens": 16552913.0,
"step": 253
},
{
"epoch": 0.1734084314729476,
"grad_norm": 0.7453340291976929,
"learning_rate": 4.919007141528776e-06,
"loss": 0.6617,
"mean_token_accuracy": 0.7975623160600662,
"num_tokens": 16618449.0,
"step": 254
},
{
"epoch": 0.1740911418330773,
"grad_norm": 0.7431179285049438,
"learning_rate": 4.917494603146632e-06,
"loss": 0.6419,
"mean_token_accuracy": 0.8054893761873245,
"num_tokens": 16683985.0,
"step": 255
},
{
"epoch": 0.17477385219320704,
"grad_norm": 0.76502525806427,
"learning_rate": 4.9159683087221835e-06,
"loss": 0.6393,
"mean_token_accuracy": 0.8057032078504562,
"num_tokens": 16749521.0,
"step": 256
},
{
"epoch": 0.17545656255333675,
"grad_norm": 0.793250322341919,
"learning_rate": 4.91442826694036e-06,
"loss": 0.7001,
"mean_token_accuracy": 0.7883522659540176,
"num_tokens": 16815057.0,
"step": 257
},
{
"epoch": 0.17613927291346645,
"grad_norm": 0.8034051060676575,
"learning_rate": 4.912874486564317e-06,
"loss": 0.655,
"mean_token_accuracy": 0.7997875660657883,
"num_tokens": 16880373.0,
"step": 258
},
{
"epoch": 0.1768219832735962,
"grad_norm": 0.7254968285560608,
"learning_rate": 4.911306976435384e-06,
"loss": 0.6268,
"mean_token_accuracy": 0.8077804297208786,
"num_tokens": 16945909.0,
"step": 259
},
{
"epoch": 0.1775046936337259,
"grad_norm": 0.7864726781845093,
"learning_rate": 4.909725745473017e-06,
"loss": 0.6524,
"mean_token_accuracy": 0.7994562685489655,
"num_tokens": 17011445.0,
"step": 260
},
{
"epoch": 0.1781874039938556,
"grad_norm": 0.7491886019706726,
"learning_rate": 4.908130802674747e-06,
"loss": 0.6405,
"mean_token_accuracy": 0.803121954202652,
"num_tokens": 17076981.0,
"step": 261
},
{
"epoch": 0.1788701143539853,
"grad_norm": 0.7460026144981384,
"learning_rate": 4.906522157116125e-06,
"loss": 0.6518,
"mean_token_accuracy": 0.799975574016571,
"num_tokens": 17142517.0,
"step": 262
},
{
"epoch": 0.17955282471411504,
"grad_norm": 0.7651973962783813,
"learning_rate": 4.904899817950677e-06,
"loss": 0.6911,
"mean_token_accuracy": 0.7881384491920471,
"num_tokens": 17208053.0,
"step": 263
},
{
"epoch": 0.18023553507424475,
"grad_norm": 0.773990273475647,
"learning_rate": 4.903263794409846e-06,
"loss": 0.6426,
"mean_token_accuracy": 0.8015334904193878,
"num_tokens": 17273589.0,
"step": 264
},
{
"epoch": 0.18091824543437446,
"grad_norm": 0.7365688681602478,
"learning_rate": 4.901614095802945e-06,
"loss": 0.6416,
"mean_token_accuracy": 0.8022055327892303,
"num_tokens": 17339125.0,
"step": 265
},
{
"epoch": 0.1816009557945042,
"grad_norm": 0.7673572301864624,
"learning_rate": 4.8999507315170965e-06,
"loss": 0.6567,
"mean_token_accuracy": 0.7990744113922119,
"num_tokens": 17404661.0,
"step": 266
},
{
"epoch": 0.1822836661546339,
"grad_norm": 0.751385509967804,
"learning_rate": 4.898273711017187e-06,
"loss": 0.6364,
"mean_token_accuracy": 0.8052991777658463,
"num_tokens": 17469831.0,
"step": 267
},
{
"epoch": 0.1829663765147636,
"grad_norm": 0.744659423828125,
"learning_rate": 4.896583043845809e-06,
"loss": 0.6295,
"mean_token_accuracy": 0.8060995042324066,
"num_tokens": 17535046.0,
"step": 268
},
{
"epoch": 0.1836490868748933,
"grad_norm": 0.708869993686676,
"learning_rate": 4.894878739623207e-06,
"loss": 0.5935,
"mean_token_accuracy": 0.8142259269952774,
"num_tokens": 17600582.0,
"step": 269
},
{
"epoch": 0.18433179723502305,
"grad_norm": 0.7676243185997009,
"learning_rate": 4.893160808047222e-06,
"loss": 0.6736,
"mean_token_accuracy": 0.7934078723192215,
"num_tokens": 17666118.0,
"step": 270
},
{
"epoch": 0.18501450759515276,
"grad_norm": 0.7718145251274109,
"learning_rate": 4.89142925889324e-06,
"loss": 0.6561,
"mean_token_accuracy": 0.7996843755245209,
"num_tokens": 17730784.0,
"step": 271
},
{
"epoch": 0.18569721795528246,
"grad_norm": 0.7711499929428101,
"learning_rate": 4.889684102014132e-06,
"loss": 0.649,
"mean_token_accuracy": 0.800556480884552,
"num_tokens": 17796029.0,
"step": 272
},
{
"epoch": 0.1863799283154122,
"grad_norm": 0.7592529654502869,
"learning_rate": 4.887925347340199e-06,
"loss": 0.6602,
"mean_token_accuracy": 0.797866091132164,
"num_tokens": 17860903.0,
"step": 273
},
{
"epoch": 0.1870626386755419,
"grad_norm": 0.7690698504447937,
"learning_rate": 4.886153004879119e-06,
"loss": 0.6666,
"mean_token_accuracy": 0.7956989407539368,
"num_tokens": 17926439.0,
"step": 274
},
{
"epoch": 0.1877453490356716,
"grad_norm": 0.7630760669708252,
"learning_rate": 4.884367084715884e-06,
"loss": 0.6189,
"mean_token_accuracy": 0.8093841671943665,
"num_tokens": 17991975.0,
"step": 275
},
{
"epoch": 0.18842805939580134,
"grad_norm": 0.7848920822143555,
"learning_rate": 4.882567597012749e-06,
"loss": 0.6744,
"mean_token_accuracy": 0.7962527424097061,
"num_tokens": 18057034.0,
"step": 276
},
{
"epoch": 0.18911076975593105,
"grad_norm": 0.7455816268920898,
"learning_rate": 4.88075455200917e-06,
"loss": 0.6357,
"mean_token_accuracy": 0.8023249208927155,
"num_tokens": 18122157.0,
"step": 277
},
{
"epoch": 0.18979348011606076,
"grad_norm": 0.7486319541931152,
"learning_rate": 4.878927960021746e-06,
"loss": 0.6457,
"mean_token_accuracy": 0.8028607368469238,
"num_tokens": 18187425.0,
"step": 278
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.7425917983055115,
"learning_rate": 4.87708783144416e-06,
"loss": 0.6426,
"mean_token_accuracy": 0.8033974319696426,
"num_tokens": 18252951.0,
"step": 279
},
{
"epoch": 0.1911589008363202,
"grad_norm": 0.7807708978652954,
"learning_rate": 4.875234176747125e-06,
"loss": 0.7113,
"mean_token_accuracy": 0.7846617698669434,
"num_tokens": 18317646.0,
"step": 280
},
{
"epoch": 0.1918416111964499,
"grad_norm": 0.7756418585777283,
"learning_rate": 4.873367006478319e-06,
"loss": 0.6106,
"mean_token_accuracy": 0.8120723366737366,
"num_tokens": 18383182.0,
"step": 281
},
{
"epoch": 0.19252432155657961,
"grad_norm": 0.7727638483047485,
"learning_rate": 4.871486331262322e-06,
"loss": 0.6201,
"mean_token_accuracy": 0.8101401478052139,
"num_tokens": 18448389.0,
"step": 282
},
{
"epoch": 0.19320703191670935,
"grad_norm": 0.7593509554862976,
"learning_rate": 4.869592161800566e-06,
"loss": 0.6288,
"mean_token_accuracy": 0.8057032078504562,
"num_tokens": 18513925.0,
"step": 283
},
{
"epoch": 0.19388974227683906,
"grad_norm": 0.7642320394515991,
"learning_rate": 4.867684508871264e-06,
"loss": 0.6609,
"mean_token_accuracy": 0.7985596507787704,
"num_tokens": 18579436.0,
"step": 284
},
{
"epoch": 0.19457245263696876,
"grad_norm": 0.761231541633606,
"learning_rate": 4.865763383329356e-06,
"loss": 0.6115,
"mean_token_accuracy": 0.8105635046958923,
"num_tokens": 18644911.0,
"step": 285
},
{
"epoch": 0.19525516299709847,
"grad_norm": 0.7838804125785828,
"learning_rate": 4.8638287961064405e-06,
"loss": 0.6728,
"mean_token_accuracy": 0.7939742207527161,
"num_tokens": 18710341.0,
"step": 286
},
{
"epoch": 0.1959378733572282,
"grad_norm": 0.8204277157783508,
"learning_rate": 4.861880758210717e-06,
"loss": 0.6403,
"mean_token_accuracy": 0.8030597120523453,
"num_tokens": 18775099.0,
"step": 287
},
{
"epoch": 0.1966205837173579,
"grad_norm": 0.7632973194122314,
"learning_rate": 4.859919280726925e-06,
"loss": 0.6266,
"mean_token_accuracy": 0.807135596871376,
"num_tokens": 18840451.0,
"step": 288
},
{
"epoch": 0.19730329407748762,
"grad_norm": 0.7931602001190186,
"learning_rate": 4.857944374816272e-06,
"loss": 0.6502,
"mean_token_accuracy": 0.7989926338195801,
"num_tokens": 18905826.0,
"step": 289
},
{
"epoch": 0.19798600443761735,
"grad_norm": 0.7971415519714355,
"learning_rate": 4.855956051716382e-06,
"loss": 0.662,
"mean_token_accuracy": 0.7989369630813599,
"num_tokens": 18971362.0,
"step": 290
},
{
"epoch": 0.19866871479774706,
"grad_norm": 0.7802919149398804,
"learning_rate": 4.853954322741221e-06,
"loss": 0.6415,
"mean_token_accuracy": 0.8015654236078262,
"num_tokens": 19036550.0,
"step": 291
},
{
"epoch": 0.19935142515787677,
"grad_norm": 0.7734397649765015,
"learning_rate": 4.851939199281042e-06,
"loss": 0.6578,
"mean_token_accuracy": 0.7977914214134216,
"num_tokens": 19102086.0,
"step": 292
},
{
"epoch": 0.20003413551800647,
"grad_norm": 0.7715845704078674,
"learning_rate": 4.84991069280231e-06,
"loss": 0.6628,
"mean_token_accuracy": 0.7946704924106598,
"num_tokens": 19167541.0,
"step": 293
},
{
"epoch": 0.2007168458781362,
"grad_norm": 0.7626240253448486,
"learning_rate": 4.847868814847646e-06,
"loss": 0.6318,
"mean_token_accuracy": 0.8061958402395248,
"num_tokens": 19232955.0,
"step": 294
},
{
"epoch": 0.20139955623826591,
"grad_norm": 0.7462788820266724,
"learning_rate": 4.845813577035756e-06,
"loss": 0.6455,
"mean_token_accuracy": 0.8009988963603973,
"num_tokens": 19298491.0,
"step": 295
},
{
"epoch": 0.20208226659839562,
"grad_norm": 0.7987794280052185,
"learning_rate": 4.843744991061366e-06,
"loss": 0.674,
"mean_token_accuracy": 0.7931847423315048,
"num_tokens": 19363418.0,
"step": 296
},
{
"epoch": 0.20276497695852536,
"grad_norm": 0.7509877681732178,
"learning_rate": 4.841663068695157e-06,
"loss": 0.6494,
"mean_token_accuracy": 0.8006734549999237,
"num_tokens": 19428911.0,
"step": 297
},
{
"epoch": 0.20344768731865506,
"grad_norm": 0.762054979801178,
"learning_rate": 4.839567821783696e-06,
"loss": 0.6296,
"mean_token_accuracy": 0.8052349090576172,
"num_tokens": 19494165.0,
"step": 298
},
{
"epoch": 0.20413039767878477,
"grad_norm": 0.7115033268928528,
"learning_rate": 4.8374592622493696e-06,
"loss": 0.6046,
"mean_token_accuracy": 0.8131680637598038,
"num_tokens": 19559490.0,
"step": 299
},
{
"epoch": 0.2048131080389145,
"grad_norm": 0.7701613306999207,
"learning_rate": 4.835337402090317e-06,
"loss": 0.6404,
"mean_token_accuracy": 0.8019764274358749,
"num_tokens": 19625026.0,
"step": 300
},
{
"epoch": 0.2054958183990442,
"grad_norm": 0.8328090310096741,
"learning_rate": 4.833202253380357e-06,
"loss": 0.6707,
"mean_token_accuracy": 0.7956531047821045,
"num_tokens": 19690562.0,
"step": 301
},
{
"epoch": 0.20617852875917392,
"grad_norm": 0.7679016590118408,
"learning_rate": 4.831053828268927e-06,
"loss": 0.6237,
"mean_token_accuracy": 0.809225007891655,
"num_tokens": 19755943.0,
"step": 302
},
{
"epoch": 0.20686123911930362,
"grad_norm": 0.7081552743911743,
"learning_rate": 4.828892138981009e-06,
"loss": 0.5685,
"mean_token_accuracy": 0.8225348144769669,
"num_tokens": 19821479.0,
"step": 303
},
{
"epoch": 0.20754394947943336,
"grad_norm": 0.7495241761207581,
"learning_rate": 4.82671719781706e-06,
"loss": 0.6344,
"mean_token_accuracy": 0.8030302971601486,
"num_tokens": 19887015.0,
"step": 304
},
{
"epoch": 0.20822665983956307,
"grad_norm": 0.7506179213523865,
"learning_rate": 4.824529017152943e-06,
"loss": 0.6156,
"mean_token_accuracy": 0.8103922307491302,
"num_tokens": 19952551.0,
"step": 305
},
{
"epoch": 0.20890937019969277,
"grad_norm": 0.8004708886146545,
"learning_rate": 4.822327609439857e-06,
"loss": 0.6525,
"mean_token_accuracy": 0.7992118746042252,
"num_tokens": 20018087.0,
"step": 306
},
{
"epoch": 0.2095920805598225,
"grad_norm": 0.7550822496414185,
"learning_rate": 4.820112987204265e-06,
"loss": 0.6152,
"mean_token_accuracy": 0.8115045875310898,
"num_tokens": 20083376.0,
"step": 307
},
{
"epoch": 0.21027479091995221,
"grad_norm": 0.7593732476234436,
"learning_rate": 4.817885163047824e-06,
"loss": 0.6825,
"mean_token_accuracy": 0.7943395674228668,
"num_tokens": 20148912.0,
"step": 308
},
{
"epoch": 0.21095750128008192,
"grad_norm": 0.7294323444366455,
"learning_rate": 4.815644149647313e-06,
"loss": 0.6264,
"mean_token_accuracy": 0.8054499328136444,
"num_tokens": 20214180.0,
"step": 309
},
{
"epoch": 0.21164021164021163,
"grad_norm": 0.8109260201454163,
"learning_rate": 4.813389959754559e-06,
"loss": 0.6476,
"mean_token_accuracy": 0.7988300323486328,
"num_tokens": 20279716.0,
"step": 310
},
{
"epoch": 0.21232292200034136,
"grad_norm": 0.735640823841095,
"learning_rate": 4.811122606196367e-06,
"loss": 0.6329,
"mean_token_accuracy": 0.805917039513588,
"num_tokens": 20345252.0,
"step": 311
},
{
"epoch": 0.21300563236047107,
"grad_norm": 0.7742691040039062,
"learning_rate": 4.808842101874447e-06,
"loss": 0.654,
"mean_token_accuracy": 0.80037821829319,
"num_tokens": 20410686.0,
"step": 312
},
{
"epoch": 0.21368834272060078,
"grad_norm": 0.8276035785675049,
"learning_rate": 4.806548459765337e-06,
"loss": 0.6349,
"mean_token_accuracy": 0.8064994513988495,
"num_tokens": 20475985.0,
"step": 313
},
{
"epoch": 0.2143710530807305,
"grad_norm": 0.8679972290992737,
"learning_rate": 4.804241692920333e-06,
"loss": 0.656,
"mean_token_accuracy": 0.799239456653595,
"num_tokens": 20541000.0,
"step": 314
},
{
"epoch": 0.21505376344086022,
"grad_norm": 0.781409502029419,
"learning_rate": 4.8019218144654135e-06,
"loss": 0.7053,
"mean_token_accuracy": 0.7867943644523621,
"num_tokens": 20606536.0,
"step": 315
},
{
"epoch": 0.21573647380098993,
"grad_norm": 0.7729482650756836,
"learning_rate": 4.799588837601167e-06,
"loss": 0.6525,
"mean_token_accuracy": 0.8019153326749802,
"num_tokens": 20672072.0,
"step": 316
},
{
"epoch": 0.21641918416111963,
"grad_norm": 0.8480416536331177,
"learning_rate": 4.79724277560271e-06,
"loss": 0.6216,
"mean_token_accuracy": 0.8093619346618652,
"num_tokens": 20736796.0,
"step": 317
},
{
"epoch": 0.21710189452124937,
"grad_norm": 0.7638158202171326,
"learning_rate": 4.794883641819619e-06,
"loss": 0.6586,
"mean_token_accuracy": 0.7982496321201324,
"num_tokens": 20802332.0,
"step": 318
},
{
"epoch": 0.21778460488137907,
"grad_norm": 0.7327334880828857,
"learning_rate": 4.792511449675852e-06,
"loss": 0.6338,
"mean_token_accuracy": 0.8040231019258499,
"num_tokens": 20867868.0,
"step": 319
},
{
"epoch": 0.21846731524150878,
"grad_norm": 0.8290518522262573,
"learning_rate": 4.79012621266967e-06,
"loss": 0.6651,
"mean_token_accuracy": 0.7965084463357925,
"num_tokens": 20933404.0,
"step": 320
},
{
"epoch": 0.21915002560163852,
"grad_norm": 0.7668699026107788,
"learning_rate": 4.787727944373565e-06,
"loss": 0.6644,
"mean_token_accuracy": 0.7934842258691788,
"num_tokens": 20998940.0,
"step": 321
},
{
"epoch": 0.21983273596176822,
"grad_norm": 0.7874463796615601,
"learning_rate": 4.7853166584341745e-06,
"loss": 0.6177,
"mean_token_accuracy": 0.8053746223449707,
"num_tokens": 21064190.0,
"step": 322
},
{
"epoch": 0.22051544632189793,
"grad_norm": 0.7763647437095642,
"learning_rate": 4.7828923685722155e-06,
"loss": 0.6288,
"mean_token_accuracy": 0.808009535074234,
"num_tokens": 21129726.0,
"step": 323
},
{
"epoch": 0.22119815668202766,
"grad_norm": 0.8179218173027039,
"learning_rate": 4.780455088582394e-06,
"loss": 0.645,
"mean_token_accuracy": 0.8021404594182968,
"num_tokens": 21195177.0,
"step": 324
},
{
"epoch": 0.22188086704215737,
"grad_norm": 0.7938908338546753,
"learning_rate": 4.778004832333337e-06,
"loss": 0.6616,
"mean_token_accuracy": 0.7988453060388565,
"num_tokens": 21260713.0,
"step": 325
},
{
"epoch": 0.22256357740228708,
"grad_norm": 0.7682486176490784,
"learning_rate": 4.775541613767506e-06,
"loss": 0.6328,
"mean_token_accuracy": 0.8051348775625229,
"num_tokens": 21326248.0,
"step": 326
},
{
"epoch": 0.22324628776241678,
"grad_norm": 0.766139805316925,
"learning_rate": 4.773065446901123e-06,
"loss": 0.617,
"mean_token_accuracy": 0.8100256621837616,
"num_tokens": 21391784.0,
"step": 327
},
{
"epoch": 0.22392899812254652,
"grad_norm": 0.8150837421417236,
"learning_rate": 4.770576345824087e-06,
"loss": 0.6582,
"mean_token_accuracy": 0.7959434241056442,
"num_tokens": 21456337.0,
"step": 328
},
{
"epoch": 0.22461170848267623,
"grad_norm": 0.8115120530128479,
"learning_rate": 4.768074324699897e-06,
"loss": 0.6373,
"mean_token_accuracy": 0.8020833283662796,
"num_tokens": 21521873.0,
"step": 329
},
{
"epoch": 0.22529441884280593,
"grad_norm": 0.7324831485748291,
"learning_rate": 4.765559397765568e-06,
"loss": 0.618,
"mean_token_accuracy": 0.8072916716337204,
"num_tokens": 21587409.0,
"step": 330
},
{
"epoch": 0.22597712920293567,
"grad_norm": 0.8572807908058167,
"learning_rate": 4.763031579331552e-06,
"loss": 0.6274,
"mean_token_accuracy": 0.8070014715194702,
"num_tokens": 21652945.0,
"step": 331
},
{
"epoch": 0.22665983956306537,
"grad_norm": 0.7536141276359558,
"learning_rate": 4.760490883781657e-06,
"loss": 0.6069,
"mean_token_accuracy": 0.8116141259670258,
"num_tokens": 21718481.0,
"step": 332
},
{
"epoch": 0.22734254992319508,
"grad_norm": 0.7357029318809509,
"learning_rate": 4.757937325572963e-06,
"loss": 0.6398,
"mean_token_accuracy": 0.8047867864370346,
"num_tokens": 21784017.0,
"step": 333
},
{
"epoch": 0.2280252602833248,
"grad_norm": 0.7707255482673645,
"learning_rate": 4.755370919235743e-06,
"loss": 0.6288,
"mean_token_accuracy": 0.8075971454381943,
"num_tokens": 21849553.0,
"step": 334
},
{
"epoch": 0.22870797064345452,
"grad_norm": 0.7792888879776001,
"learning_rate": 4.752791679373379e-06,
"loss": 0.6313,
"mean_token_accuracy": 0.8058997839689255,
"num_tokens": 21914714.0,
"step": 335
},
{
"epoch": 0.22939068100358423,
"grad_norm": 0.8160707354545593,
"learning_rate": 4.750199620662276e-06,
"loss": 0.6464,
"mean_token_accuracy": 0.800752580165863,
"num_tokens": 21979602.0,
"step": 336
},
{
"epoch": 0.23007339136371394,
"grad_norm": 0.7280910015106201,
"learning_rate": 4.747594757851781e-06,
"loss": 0.6274,
"mean_token_accuracy": 0.8049395233392715,
"num_tokens": 22045138.0,
"step": 337
},
{
"epoch": 0.23075610172384367,
"grad_norm": 0.7372390627861023,
"learning_rate": 4.744977105764102e-06,
"loss": 0.6241,
"mean_token_accuracy": 0.8091856092214584,
"num_tokens": 22110674.0,
"step": 338
},
{
"epoch": 0.23143881208397338,
"grad_norm": 0.757652997970581,
"learning_rate": 4.742346679294218e-06,
"loss": 0.6618,
"mean_token_accuracy": 0.7985092997550964,
"num_tokens": 22176210.0,
"step": 339
},
{
"epoch": 0.23212152244410308,
"grad_norm": 0.7790650129318237,
"learning_rate": 4.739703493409797e-06,
"loss": 0.6201,
"mean_token_accuracy": 0.80793197453022,
"num_tokens": 22241702.0,
"step": 340
},
{
"epoch": 0.2328042328042328,
"grad_norm": 0.7788594961166382,
"learning_rate": 4.73704756315111e-06,
"loss": 0.6437,
"mean_token_accuracy": 0.8009341210126877,
"num_tokens": 22307095.0,
"step": 341
},
{
"epoch": 0.23348694316436253,
"grad_norm": 0.7534693479537964,
"learning_rate": 4.73437890363095e-06,
"loss": 0.6437,
"mean_token_accuracy": 0.802969217300415,
"num_tokens": 22372631.0,
"step": 342
},
{
"epoch": 0.23416965352449223,
"grad_norm": 0.7882516384124756,
"learning_rate": 4.731697530034538e-06,
"loss": 0.655,
"mean_token_accuracy": 0.7998992055654526,
"num_tokens": 22438167.0,
"step": 343
},
{
"epoch": 0.23485236388462194,
"grad_norm": 0.7213677763938904,
"learning_rate": 4.729003457619441e-06,
"loss": 0.6046,
"mean_token_accuracy": 0.8131537586450577,
"num_tokens": 22503611.0,
"step": 344
},
{
"epoch": 0.23553507424475167,
"grad_norm": 0.767708957195282,
"learning_rate": 4.726296701715489e-06,
"loss": 0.6736,
"mean_token_accuracy": 0.794715091586113,
"num_tokens": 22569080.0,
"step": 345
},
{
"epoch": 0.23621778460488138,
"grad_norm": 0.7923814058303833,
"learning_rate": 4.723577277724678e-06,
"loss": 0.6895,
"mean_token_accuracy": 0.7892992496490479,
"num_tokens": 22634616.0,
"step": 346
},
{
"epoch": 0.2369004949650111,
"grad_norm": 0.7433699369430542,
"learning_rate": 4.720845201121092e-06,
"loss": 0.5982,
"mean_token_accuracy": 0.8131873160600662,
"num_tokens": 22700152.0,
"step": 347
},
{
"epoch": 0.23758320532514082,
"grad_norm": 0.7465718984603882,
"learning_rate": 4.71810048745081e-06,
"loss": 0.6782,
"mean_token_accuracy": 0.7941065728664398,
"num_tokens": 22764843.0,
"step": 348
},
{
"epoch": 0.23826591568527053,
"grad_norm": 0.7546844482421875,
"learning_rate": 4.715343152331816e-06,
"loss": 0.6164,
"mean_token_accuracy": 0.8110382258892059,
"num_tokens": 22830057.0,
"step": 349
},
{
"epoch": 0.23894862604540024,
"grad_norm": 0.7519754767417908,
"learning_rate": 4.712573211453918e-06,
"loss": 0.6608,
"mean_token_accuracy": 0.7954239994287491,
"num_tokens": 22895593.0,
"step": 350
},
{
"epoch": 0.23963133640552994,
"grad_norm": 0.7927505373954773,
"learning_rate": 4.7097906805786474e-06,
"loss": 0.6448,
"mean_token_accuracy": 0.7999225705862045,
"num_tokens": 22960640.0,
"step": 351
},
{
"epoch": 0.24031404676565968,
"grad_norm": 0.7735369205474854,
"learning_rate": 4.70699557553918e-06,
"loss": 0.6456,
"mean_token_accuracy": 0.8013502061367035,
"num_tokens": 23026176.0,
"step": 352
},
{
"epoch": 0.24099675712578938,
"grad_norm": 0.7586895823478699,
"learning_rate": 4.704187912240239e-06,
"loss": 0.6038,
"mean_token_accuracy": 0.8122097998857498,
"num_tokens": 23091712.0,
"step": 353
},
{
"epoch": 0.2416794674859191,
"grad_norm": 0.7558093667030334,
"learning_rate": 4.701367706658006e-06,
"loss": 0.6612,
"mean_token_accuracy": 0.7979543209075928,
"num_tokens": 23157067.0,
"step": 354
},
{
"epoch": 0.24236217784604883,
"grad_norm": 0.7904984951019287,
"learning_rate": 4.698534974840033e-06,
"loss": 0.6475,
"mean_token_accuracy": 0.8018562942743301,
"num_tokens": 23222084.0,
"step": 355
},
{
"epoch": 0.24304488820617853,
"grad_norm": 0.7433775067329407,
"learning_rate": 4.6956897329051456e-06,
"loss": 0.6104,
"mean_token_accuracy": 0.8106366097927094,
"num_tokens": 23287620.0,
"step": 356
},
{
"epoch": 0.24372759856630824,
"grad_norm": 0.766858696937561,
"learning_rate": 4.692831997043359e-06,
"loss": 0.6628,
"mean_token_accuracy": 0.7968552559614182,
"num_tokens": 23352620.0,
"step": 357
},
{
"epoch": 0.24441030892643795,
"grad_norm": 0.7720522880554199,
"learning_rate": 4.689961783515777e-06,
"loss": 0.6159,
"mean_token_accuracy": 0.8069179058074951,
"num_tokens": 23418065.0,
"step": 358
},
{
"epoch": 0.24509301928656768,
"grad_norm": 0.7662636637687683,
"learning_rate": 4.687079108654508e-06,
"loss": 0.6449,
"mean_token_accuracy": 0.8010782450437546,
"num_tokens": 23483557.0,
"step": 359
},
{
"epoch": 0.2457757296466974,
"grad_norm": 0.7301419377326965,
"learning_rate": 4.684183988862563e-06,
"loss": 0.6264,
"mean_token_accuracy": 0.8084524720907211,
"num_tokens": 23549093.0,
"step": 360
},
{
"epoch": 0.2464584400068271,
"grad_norm": 0.746433675289154,
"learning_rate": 4.681276440613772e-06,
"loss": 0.6566,
"mean_token_accuracy": 0.7988911420106888,
"num_tokens": 23614629.0,
"step": 361
},
{
"epoch": 0.24714115036695683,
"grad_norm": 0.7949883341789246,
"learning_rate": 4.678356480452683e-06,
"loss": 0.6575,
"mean_token_accuracy": 0.7978372424840927,
"num_tokens": 23680165.0,
"step": 362
},
{
"epoch": 0.24782386072708654,
"grad_norm": 0.7822641730308533,
"learning_rate": 4.675424124994471e-06,
"loss": 0.6526,
"mean_token_accuracy": 0.7977532297372818,
"num_tokens": 23744937.0,
"step": 363
},
{
"epoch": 0.24850657108721624,
"grad_norm": 0.7230205535888672,
"learning_rate": 4.672479390924842e-06,
"loss": 0.6103,
"mean_token_accuracy": 0.8111226707696915,
"num_tokens": 23810143.0,
"step": 364
},
{
"epoch": 0.24918928144734595,
"grad_norm": 0.7075436115264893,
"learning_rate": 4.669522294999941e-06,
"loss": 0.6161,
"mean_token_accuracy": 0.8099951148033142,
"num_tokens": 23875679.0,
"step": 365
},
{
"epoch": 0.24987199180747569,
"grad_norm": 0.7330030798912048,
"learning_rate": 4.66655285404625e-06,
"loss": 0.6507,
"mean_token_accuracy": 0.801278367638588,
"num_tokens": 23941169.0,
"step": 366
},
{
"epoch": 0.2505547021676054,
"grad_norm": 0.7502052783966064,
"learning_rate": 4.6635710849605034e-06,
"loss": 0.6205,
"mean_token_accuracy": 0.8079535663127899,
"num_tokens": 24006214.0,
"step": 367
},
{
"epoch": 0.2512374125277351,
"grad_norm": 0.755878210067749,
"learning_rate": 4.660577004709579e-06,
"loss": 0.6197,
"mean_token_accuracy": 0.8080706298351288,
"num_tokens": 24071750.0,
"step": 368
},
{
"epoch": 0.2519201228878648,
"grad_norm": 0.7299730777740479,
"learning_rate": 4.657570630330411e-06,
"loss": 0.6366,
"mean_token_accuracy": 0.8038407862186432,
"num_tokens": 24137117.0,
"step": 369
},
{
"epoch": 0.25260283324799454,
"grad_norm": 0.7870468497276306,
"learning_rate": 4.65455197892989e-06,
"loss": 0.6363,
"mean_token_accuracy": 0.8036412596702576,
"num_tokens": 24202653.0,
"step": 370
},
{
"epoch": 0.2532855436081243,
"grad_norm": 0.7470530271530151,
"learning_rate": 4.651521067684762e-06,
"loss": 0.668,
"mean_token_accuracy": 0.7951490730047226,
"num_tokens": 24268189.0,
"step": 371
},
{
"epoch": 0.25396825396825395,
"grad_norm": 0.7217208743095398,
"learning_rate": 4.6484779138415385e-06,
"loss": 0.5903,
"mean_token_accuracy": 0.8160587698221207,
"num_tokens": 24333725.0,
"step": 372
},
{
"epoch": 0.2546509643283837,
"grad_norm": 0.7956851720809937,
"learning_rate": 4.64542253471639e-06,
"loss": 0.6924,
"mean_token_accuracy": 0.7903948575258255,
"num_tokens": 24398857.0,
"step": 373
},
{
"epoch": 0.2553336746885134,
"grad_norm": 0.7602137923240662,
"learning_rate": 4.642354947695055e-06,
"loss": 0.6302,
"mean_token_accuracy": 0.8039003759622574,
"num_tokens": 24464251.0,
"step": 374
},
{
"epoch": 0.2560163850486431,
"grad_norm": 0.7951780557632446,
"learning_rate": 4.639275170232734e-06,
"loss": 0.6699,
"mean_token_accuracy": 0.7974366098642349,
"num_tokens": 24529439.0,
"step": 375
},
{
"epoch": 0.25669909540877284,
"grad_norm": 0.8132842183113098,
"learning_rate": 4.636183219853996e-06,
"loss": 0.6646,
"mean_token_accuracy": 0.7951338142156601,
"num_tokens": 24594975.0,
"step": 376
},
{
"epoch": 0.2573818057689025,
"grad_norm": 0.7737603187561035,
"learning_rate": 4.633079114152676e-06,
"loss": 0.608,
"mean_token_accuracy": 0.811751589179039,
"num_tokens": 24660511.0,
"step": 377
},
{
"epoch": 0.25806451612903225,
"grad_norm": 0.804283082485199,
"learning_rate": 4.629962870791774e-06,
"loss": 0.6921,
"mean_token_accuracy": 0.7884904444217682,
"num_tokens": 24725671.0,
"step": 378
},
{
"epoch": 0.258747226489162,
"grad_norm": 0.7124019861221313,
"learning_rate": 4.626834507503357e-06,
"loss": 0.5663,
"mean_token_accuracy": 0.8226875513792038,
"num_tokens": 24791207.0,
"step": 379
},
{
"epoch": 0.25942993684929166,
"grad_norm": 0.7700932025909424,
"learning_rate": 4.623694042088457e-06,
"loss": 0.6398,
"mean_token_accuracy": 0.8026179224252701,
"num_tokens": 24856743.0,
"step": 380
},
{
"epoch": 0.2601126472094214,
"grad_norm": 0.792400598526001,
"learning_rate": 4.620541492416968e-06,
"loss": 0.6382,
"mean_token_accuracy": 0.8020680695772171,
"num_tokens": 24922279.0,
"step": 381
},
{
"epoch": 0.26079535756955113,
"grad_norm": 0.7475351691246033,
"learning_rate": 4.617376876427549e-06,
"loss": 0.6306,
"mean_token_accuracy": 0.8047256767749786,
"num_tokens": 24987815.0,
"step": 382
},
{
"epoch": 0.2614780679296808,
"grad_norm": 0.7587133049964905,
"learning_rate": 4.614200212127514e-06,
"loss": 0.6442,
"mean_token_accuracy": 0.8020219951868057,
"num_tokens": 25053170.0,
"step": 383
},
{
"epoch": 0.26216077828981055,
"grad_norm": 0.7837956547737122,
"learning_rate": 4.611011517592741e-06,
"loss": 0.598,
"mean_token_accuracy": 0.8139603137969971,
"num_tokens": 25117886.0,
"step": 384
},
{
"epoch": 0.2628434886499403,
"grad_norm": 0.7706968188285828,
"learning_rate": 4.607810810967556e-06,
"loss": 0.6207,
"mean_token_accuracy": 0.8087273985147476,
"num_tokens": 25183422.0,
"step": 385
},
{
"epoch": 0.26352619901006996,
"grad_norm": 0.752069354057312,
"learning_rate": 4.604598110464639e-06,
"loss": 0.6318,
"mean_token_accuracy": 0.8054282814264297,
"num_tokens": 25248958.0,
"step": 386
},
{
"epoch": 0.2642089093701997,
"grad_norm": 0.7261558175086975,
"learning_rate": 4.60137343436492e-06,
"loss": 0.6412,
"mean_token_accuracy": 0.8022666275501251,
"num_tokens": 25314494.0,
"step": 387
},
{
"epoch": 0.26489161973032943,
"grad_norm": 0.7284137606620789,
"learning_rate": 4.5981368010174676e-06,
"loss": 0.606,
"mean_token_accuracy": 0.8140856623649597,
"num_tokens": 25379983.0,
"step": 388
},
{
"epoch": 0.2655743300904591,
"grad_norm": 0.7730989456176758,
"learning_rate": 4.5948882288393935e-06,
"loss": 0.6313,
"mean_token_accuracy": 0.8056421130895615,
"num_tokens": 25445519.0,
"step": 389
},
{
"epoch": 0.26625704045058884,
"grad_norm": 0.7879030108451843,
"learning_rate": 4.5916277363157434e-06,
"loss": 0.6762,
"mean_token_accuracy": 0.790292039513588,
"num_tokens": 25511055.0,
"step": 390
},
{
"epoch": 0.2669397508107186,
"grad_norm": 0.7832986116409302,
"learning_rate": 4.588355341999391e-06,
"loss": 0.663,
"mean_token_accuracy": 0.7950224727392197,
"num_tokens": 25576264.0,
"step": 391
},
{
"epoch": 0.26762246117084826,
"grad_norm": 0.7901932597160339,
"learning_rate": 4.585071064510933e-06,
"loss": 0.6107,
"mean_token_accuracy": 0.8095369040966034,
"num_tokens": 25641800.0,
"step": 392
},
{
"epoch": 0.268305171530978,
"grad_norm": 0.7595347762107849,
"learning_rate": 4.581774922538586e-06,
"loss": 0.642,
"mean_token_accuracy": 0.8009682595729828,
"num_tokens": 25707162.0,
"step": 393
},
{
"epoch": 0.26898788189110767,
"grad_norm": 0.7476745843887329,
"learning_rate": 4.578466934838076e-06,
"loss": 0.6396,
"mean_token_accuracy": 0.8006323426961899,
"num_tokens": 25772698.0,
"step": 394
},
{
"epoch": 0.2696705922512374,
"grad_norm": 0.7542417049407959,
"learning_rate": 4.575147120232536e-06,
"loss": 0.6125,
"mean_token_accuracy": 0.8105543851852417,
"num_tokens": 25838038.0,
"step": 395
},
{
"epoch": 0.27035330261136714,
"grad_norm": 0.7183694243431091,
"learning_rate": 4.571815497612393e-06,
"loss": 0.6075,
"mean_token_accuracy": 0.8125783056020737,
"num_tokens": 25903529.0,
"step": 396
},
{
"epoch": 0.2710360129714968,
"grad_norm": 0.747693657875061,
"learning_rate": 4.568472085935267e-06,
"loss": 0.6624,
"mean_token_accuracy": 0.8001435697078705,
"num_tokens": 25969065.0,
"step": 397
},
{
"epoch": 0.27171872333162655,
"grad_norm": 0.8016436696052551,
"learning_rate": 4.5651169042258605e-06,
"loss": 0.6261,
"mean_token_accuracy": 0.8060239553451538,
"num_tokens": 26034601.0,
"step": 398
},
{
"epoch": 0.2724014336917563,
"grad_norm": 0.7097094655036926,
"learning_rate": 4.561749971575846e-06,
"loss": 0.6236,
"mean_token_accuracy": 0.808208093047142,
"num_tokens": 26100137.0,
"step": 399
},
{
"epoch": 0.27308414405188597,
"grad_norm": 0.7615159749984741,
"learning_rate": 4.558371307143766e-06,
"loss": 0.6452,
"mean_token_accuracy": 0.7992061525583267,
"num_tokens": 26165490.0,
"step": 400
},
{
"epoch": 0.2737668544120157,
"grad_norm": 0.7655863761901855,
"learning_rate": 4.554980930154916e-06,
"loss": 0.5753,
"mean_token_accuracy": 0.8202753812074661,
"num_tokens": 26230866.0,
"step": 401
},
{
"epoch": 0.27444956477214544,
"grad_norm": 0.7693037986755371,
"learning_rate": 4.55157885990124e-06,
"loss": 0.6185,
"mean_token_accuracy": 0.8076163828372955,
"num_tokens": 26295675.0,
"step": 402
},
{
"epoch": 0.2751322751322751,
"grad_norm": 0.7439252734184265,
"learning_rate": 4.548165115741218e-06,
"loss": 0.6128,
"mean_token_accuracy": 0.8100256621837616,
"num_tokens": 26361211.0,
"step": 403
},
{
"epoch": 0.27581498549240485,
"grad_norm": 0.7341496348381042,
"learning_rate": 4.544739717099758e-06,
"loss": 0.6449,
"mean_token_accuracy": 0.8028012067079544,
"num_tokens": 26426747.0,
"step": 404
},
{
"epoch": 0.2764976958525346,
"grad_norm": 0.7716757655143738,
"learning_rate": 4.541302683468084e-06,
"loss": 0.6469,
"mean_token_accuracy": 0.8010788112878799,
"num_tokens": 26491980.0,
"step": 405
},
{
"epoch": 0.27718040621266427,
"grad_norm": 0.7929121255874634,
"learning_rate": 4.537854034403626e-06,
"loss": 0.6995,
"mean_token_accuracy": 0.7865194380283356,
"num_tokens": 26557516.0,
"step": 406
},
{
"epoch": 0.277863116572794,
"grad_norm": 0.7356821894645691,
"learning_rate": 4.534393789529905e-06,
"loss": 0.6291,
"mean_token_accuracy": 0.8049547970294952,
"num_tokens": 26623052.0,
"step": 407
},
{
"epoch": 0.27854582693292373,
"grad_norm": 0.7607781887054443,
"learning_rate": 4.53092196853643e-06,
"loss": 0.6226,
"mean_token_accuracy": 0.8083969801664352,
"num_tokens": 26688234.0,
"step": 408
},
{
"epoch": 0.2792285372930534,
"grad_norm": 0.7681825757026672,
"learning_rate": 4.5274385911785765e-06,
"loss": 0.6866,
"mean_token_accuracy": 0.7905820459127426,
"num_tokens": 26753765.0,
"step": 409
},
{
"epoch": 0.27991124765318315,
"grad_norm": 0.7501204013824463,
"learning_rate": 4.52394367727748e-06,
"loss": 0.6388,
"mean_token_accuracy": 0.8037634491920471,
"num_tokens": 26819301.0,
"step": 410
},
{
"epoch": 0.2805939580133128,
"grad_norm": 0.7248852849006653,
"learning_rate": 4.52043724671992e-06,
"loss": 0.6373,
"mean_token_accuracy": 0.8037328869104385,
"num_tokens": 26884837.0,
"step": 411
},
{
"epoch": 0.28127666837344256,
"grad_norm": 0.7588721513748169,
"learning_rate": 4.516919319458209e-06,
"loss": 0.6697,
"mean_token_accuracy": 0.7954139858484268,
"num_tokens": 26949905.0,
"step": 412
},
{
"epoch": 0.2819593787335723,
"grad_norm": 0.772433876991272,
"learning_rate": 4.513389915510075e-06,
"loss": 0.6095,
"mean_token_accuracy": 0.8109726309776306,
"num_tokens": 27015441.0,
"step": 413
},
{
"epoch": 0.282642089093702,
"grad_norm": 0.741692841053009,
"learning_rate": 4.509849054958559e-06,
"loss": 0.5835,
"mean_token_accuracy": 0.8177823275327682,
"num_tokens": 27080627.0,
"step": 414
},
{
"epoch": 0.2833247994538317,
"grad_norm": 0.743645966053009,
"learning_rate": 4.506296757951883e-06,
"loss": 0.6028,
"mean_token_accuracy": 0.8136608004570007,
"num_tokens": 27146163.0,
"step": 415
},
{
"epoch": 0.28400750981396145,
"grad_norm": 0.7747307419776917,
"learning_rate": 4.50273304470335e-06,
"loss": 0.6248,
"mean_token_accuracy": 0.8071649372577667,
"num_tokens": 27211646.0,
"step": 416
},
{
"epoch": 0.2846902201740911,
"grad_norm": 0.7713055610656738,
"learning_rate": 4.499157935491222e-06,
"loss": 0.6555,
"mean_token_accuracy": 0.7991966158151627,
"num_tokens": 27277182.0,
"step": 417
},
{
"epoch": 0.28537293053422086,
"grad_norm": 0.7445299029350281,
"learning_rate": 4.4955714506586064e-06,
"loss": 0.6093,
"mean_token_accuracy": 0.8092619180679321,
"num_tokens": 27342578.0,
"step": 418
},
{
"epoch": 0.2860556408943506,
"grad_norm": 0.740449845790863,
"learning_rate": 4.491973610613343e-06,
"loss": 0.631,
"mean_token_accuracy": 0.8066043555736542,
"num_tokens": 27408114.0,
"step": 419
},
{
"epoch": 0.2867383512544803,
"grad_norm": 0.7795570492744446,
"learning_rate": 4.4883644358278815e-06,
"loss": 0.6263,
"mean_token_accuracy": 0.8064363300800323,
"num_tokens": 27473650.0,
"step": 420
},
{
"epoch": 0.28742106161461,
"grad_norm": 0.7142336964607239,
"learning_rate": 4.484743946839169e-06,
"loss": 0.6184,
"mean_token_accuracy": 0.8090627640485764,
"num_tokens": 27539146.0,
"step": 421
},
{
"epoch": 0.28810377197473974,
"grad_norm": 0.7614313364028931,
"learning_rate": 4.481112164248534e-06,
"loss": 0.6241,
"mean_token_accuracy": 0.8096743673086166,
"num_tokens": 27604682.0,
"step": 422
},
{
"epoch": 0.2887864823348694,
"grad_norm": 0.7186126708984375,
"learning_rate": 4.477469108721568e-06,
"loss": 0.6347,
"mean_token_accuracy": 0.8030977845191956,
"num_tokens": 27670067.0,
"step": 423
},
{
"epoch": 0.28946919269499916,
"grad_norm": 0.71300208568573,
"learning_rate": 4.473814800988009e-06,
"loss": 0.6057,
"mean_token_accuracy": 0.8111559152603149,
"num_tokens": 27735603.0,
"step": 424
},
{
"epoch": 0.29015190305512883,
"grad_norm": 0.7614585757255554,
"learning_rate": 4.4701492618416175e-06,
"loss": 0.6431,
"mean_token_accuracy": 0.8026790171861649,
"num_tokens": 27801139.0,
"step": 425
},
{
"epoch": 0.29083461341525857,
"grad_norm": 0.8069416880607605,
"learning_rate": 4.466472512140069e-06,
"loss": 0.6811,
"mean_token_accuracy": 0.7914680987596512,
"num_tokens": 27866675.0,
"step": 426
},
{
"epoch": 0.2915173237753883,
"grad_norm": 0.7372321486473083,
"learning_rate": 4.4627845728048255e-06,
"loss": 0.6423,
"mean_token_accuracy": 0.8003201633691788,
"num_tokens": 27932051.0,
"step": 427
},
{
"epoch": 0.292200034135518,
"grad_norm": 0.7879287004470825,
"learning_rate": 4.459085464821024e-06,
"loss": 0.6453,
"mean_token_accuracy": 0.8002199530601501,
"num_tokens": 27997587.0,
"step": 428
},
{
"epoch": 0.2928827444956477,
"grad_norm": 0.7693133354187012,
"learning_rate": 4.455375209237346e-06,
"loss": 0.6208,
"mean_token_accuracy": 0.8095216304063797,
"num_tokens": 28063123.0,
"step": 429
},
{
"epoch": 0.29356545485577745,
"grad_norm": 0.7321552038192749,
"learning_rate": 4.451653827165915e-06,
"loss": 0.6313,
"mean_token_accuracy": 0.8039006143808365,
"num_tokens": 28128572.0,
"step": 430
},
{
"epoch": 0.29424816521590713,
"grad_norm": 0.7376238107681274,
"learning_rate": 4.4479213397821585e-06,
"loss": 0.6408,
"mean_token_accuracy": 0.8035496175289154,
"num_tokens": 28194108.0,
"step": 431
},
{
"epoch": 0.29493087557603687,
"grad_norm": 0.7184596061706543,
"learning_rate": 4.4441777683247e-06,
"loss": 0.6243,
"mean_token_accuracy": 0.8056421130895615,
"num_tokens": 28259644.0,
"step": 432
},
{
"epoch": 0.2956135859361666,
"grad_norm": 0.7423664927482605,
"learning_rate": 4.440423134095232e-06,
"loss": 0.6035,
"mean_token_accuracy": 0.8111253678798676,
"num_tokens": 28325180.0,
"step": 433
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.7306990027427673,
"learning_rate": 4.436657458458396e-06,
"loss": 0.65,
"mean_token_accuracy": 0.8002391308546066,
"num_tokens": 28390585.0,
"step": 434
},
{
"epoch": 0.296979006656426,
"grad_norm": 0.7384706139564514,
"learning_rate": 4.432880762841665e-06,
"loss": 0.602,
"mean_token_accuracy": 0.8149714320898056,
"num_tokens": 28455904.0,
"step": 435
},
{
"epoch": 0.29766171701655575,
"grad_norm": 0.7444391250610352,
"learning_rate": 4.4290930687352105e-06,
"loss": 0.6271,
"mean_token_accuracy": 0.8050769865512848,
"num_tokens": 28521440.0,
"step": 436
},
{
"epoch": 0.29834442737668543,
"grad_norm": 0.7371619939804077,
"learning_rate": 4.425294397691796e-06,
"loss": 0.6099,
"mean_token_accuracy": 0.8098118305206299,
"num_tokens": 28586976.0,
"step": 437
},
{
"epoch": 0.29902713773681516,
"grad_norm": 0.7588141560554504,
"learning_rate": 4.42148477132664e-06,
"loss": 0.6119,
"mean_token_accuracy": 0.810527041554451,
"num_tokens": 28652489.0,
"step": 438
},
{
"epoch": 0.2997098480969449,
"grad_norm": 0.774115264415741,
"learning_rate": 4.417664211317304e-06,
"loss": 0.627,
"mean_token_accuracy": 0.8073985874652863,
"num_tokens": 28718025.0,
"step": 439
},
{
"epoch": 0.3003925584570746,
"grad_norm": 0.7610304951667786,
"learning_rate": 4.413832739403558e-06,
"loss": 0.6646,
"mean_token_accuracy": 0.798673689365387,
"num_tokens": 28783437.0,
"step": 440
},
{
"epoch": 0.3010752688172043,
"grad_norm": 0.7736408114433289,
"learning_rate": 4.40999037738727e-06,
"loss": 0.6713,
"mean_token_accuracy": 0.794889435172081,
"num_tokens": 28848973.0,
"step": 441
},
{
"epoch": 0.301757979177334,
"grad_norm": 0.7538256049156189,
"learning_rate": 4.406137147132268e-06,
"loss": 0.601,
"mean_token_accuracy": 0.8116599470376968,
"num_tokens": 28914509.0,
"step": 442
},
{
"epoch": 0.3024406895374637,
"grad_norm": 0.7843046188354492,
"learning_rate": 4.402273070564228e-06,
"loss": 0.6335,
"mean_token_accuracy": 0.8031372278928757,
"num_tokens": 28980045.0,
"step": 443
},
{
"epoch": 0.30312339989759346,
"grad_norm": 0.7894677519798279,
"learning_rate": 4.3983981696705415e-06,
"loss": 0.6715,
"mean_token_accuracy": 0.7911015450954437,
"num_tokens": 29045581.0,
"step": 444
},
{
"epoch": 0.30380611025772314,
"grad_norm": 0.7658438682556152,
"learning_rate": 4.3945124665001926e-06,
"loss": 0.6393,
"mean_token_accuracy": 0.8040514290332794,
"num_tokens": 29110647.0,
"step": 445
},
{
"epoch": 0.3044888206178529,
"grad_norm": 0.7538372278213501,
"learning_rate": 4.390615983163633e-06,
"loss": 0.639,
"mean_token_accuracy": 0.8023277074098587,
"num_tokens": 29176183.0,
"step": 446
},
{
"epoch": 0.3051715309779826,
"grad_norm": 0.7676253914833069,
"learning_rate": 4.386708741832655e-06,
"loss": 0.6047,
"mean_token_accuracy": 0.8126374632120132,
"num_tokens": 29241719.0,
"step": 447
},
{
"epoch": 0.3058542413381123,
"grad_norm": 0.8105066418647766,
"learning_rate": 4.382790764740267e-06,
"loss": 0.6009,
"mean_token_accuracy": 0.8113532811403275,
"num_tokens": 29306858.0,
"step": 448
},
{
"epoch": 0.306536951698242,
"grad_norm": 0.7600321769714355,
"learning_rate": 4.378862074180566e-06,
"loss": 0.5914,
"mean_token_accuracy": 0.814546674489975,
"num_tokens": 29372394.0,
"step": 449
},
{
"epoch": 0.30721966205837176,
"grad_norm": 0.7705000638961792,
"learning_rate": 4.374922692508611e-06,
"loss": 0.6188,
"mean_token_accuracy": 0.8112035095691681,
"num_tokens": 29437883.0,
"step": 450
},
{
"epoch": 0.30790237241850144,
"grad_norm": 0.780503511428833,
"learning_rate": 4.370972642140294e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.8074749559164047,
"num_tokens": 29503419.0,
"step": 451
},
{
"epoch": 0.30858508277863117,
"grad_norm": 0.7811777591705322,
"learning_rate": 4.367011945552217e-06,
"loss": 0.6088,
"mean_token_accuracy": 0.8096781969070435,
"num_tokens": 29568784.0,
"step": 452
},
{
"epoch": 0.3092677931387609,
"grad_norm": 0.8117642402648926,
"learning_rate": 4.363040625281557e-06,
"loss": 0.6262,
"mean_token_accuracy": 0.8069403767585754,
"num_tokens": 29634320.0,
"step": 453
},
{
"epoch": 0.3099505034988906,
"grad_norm": 0.7232481241226196,
"learning_rate": 4.359058703925947e-06,
"loss": 0.6107,
"mean_token_accuracy": 0.8092174828052521,
"num_tokens": 29699343.0,
"step": 454
},
{
"epoch": 0.3106332138590203,
"grad_norm": 0.7087581157684326,
"learning_rate": 4.355066204143338e-06,
"loss": 0.5939,
"mean_token_accuracy": 0.8157000690698624,
"num_tokens": 29764058.0,
"step": 455
},
{
"epoch": 0.31131592421915005,
"grad_norm": 0.8165489435195923,
"learning_rate": 4.351063148651878e-06,
"loss": 0.6408,
"mean_token_accuracy": 0.8040536493062973,
"num_tokens": 29829594.0,
"step": 456
},
{
"epoch": 0.31199863457927973,
"grad_norm": 0.7757338881492615,
"learning_rate": 4.347049560229776e-06,
"loss": 0.6387,
"mean_token_accuracy": 0.8039254248142242,
"num_tokens": 29894568.0,
"step": 457
},
{
"epoch": 0.31268134493940947,
"grad_norm": 0.7120219469070435,
"learning_rate": 4.343025461715177e-06,
"loss": 0.6376,
"mean_token_accuracy": 0.8034885227680206,
"num_tokens": 29960104.0,
"step": 458
},
{
"epoch": 0.31336405529953915,
"grad_norm": 0.7201838493347168,
"learning_rate": 4.338990876006031e-06,
"loss": 0.614,
"mean_token_accuracy": 0.8114766627550125,
"num_tokens": 30025640.0,
"step": 459
},
{
"epoch": 0.3140467656596689,
"grad_norm": 0.8032012581825256,
"learning_rate": 4.334945826059961e-06,
"loss": 0.6094,
"mean_token_accuracy": 0.8113402128219604,
"num_tokens": 30090352.0,
"step": 460
},
{
"epoch": 0.3147294760197986,
"grad_norm": 0.7260733842849731,
"learning_rate": 4.330890334894136e-06,
"loss": 0.6028,
"mean_token_accuracy": 0.8130651265382767,
"num_tokens": 30155888.0,
"step": 461
},
{
"epoch": 0.3154121863799283,
"grad_norm": 0.6967595219612122,
"learning_rate": 4.326824425585135e-06,
"loss": 0.6017,
"mean_token_accuracy": 0.8125849515199661,
"num_tokens": 30221258.0,
"step": 462
},
{
"epoch": 0.31609489674005803,
"grad_norm": 0.7427617907524109,
"learning_rate": 4.322748121268821e-06,
"loss": 0.5934,
"mean_token_accuracy": 0.8141037374734879,
"num_tokens": 30286794.0,
"step": 463
},
{
"epoch": 0.31677760710018776,
"grad_norm": 0.7407168745994568,
"learning_rate": 4.318661445140202e-06,
"loss": 0.6337,
"mean_token_accuracy": 0.8028928488492966,
"num_tokens": 30352330.0,
"step": 464
},
{
"epoch": 0.31746031746031744,
"grad_norm": 0.714897096157074,
"learning_rate": 4.314564420453311e-06,
"loss": 0.5786,
"mean_token_accuracy": 0.817768931388855,
"num_tokens": 30417646.0,
"step": 465
},
{
"epoch": 0.3181430278204472,
"grad_norm": 0.7537848353385925,
"learning_rate": 4.31045707052106e-06,
"loss": 0.6363,
"mean_token_accuracy": 0.8019506186246872,
"num_tokens": 30482883.0,
"step": 466
},
{
"epoch": 0.3188257381805769,
"grad_norm": 0.7029747366905212,
"learning_rate": 4.306339418715117e-06,
"loss": 0.5894,
"mean_token_accuracy": 0.8145619481801987,
"num_tokens": 30548127.0,
"step": 467
},
{
"epoch": 0.3195084485407066,
"grad_norm": 0.710258424282074,
"learning_rate": 4.302211488465769e-06,
"loss": 0.6181,
"mean_token_accuracy": 0.8077957034111023,
"num_tokens": 30613663.0,
"step": 468
},
{
"epoch": 0.3201911589008363,
"grad_norm": 0.7728216052055359,
"learning_rate": 4.298073303261791e-06,
"loss": 0.6565,
"mean_token_accuracy": 0.7991966009140015,
"num_tokens": 30679199.0,
"step": 469
},
{
"epoch": 0.32087386926096606,
"grad_norm": 0.7947602868080139,
"learning_rate": 4.293924886650306e-06,
"loss": 0.6152,
"mean_token_accuracy": 0.8107740730047226,
"num_tokens": 30744735.0,
"step": 470
},
{
"epoch": 0.32155657962109574,
"grad_norm": 0.7346971035003662,
"learning_rate": 4.289766262236661e-06,
"loss": 0.6387,
"mean_token_accuracy": 0.8023582696914673,
"num_tokens": 30810271.0,
"step": 471
},
{
"epoch": 0.3222392899812255,
"grad_norm": 0.7462263107299805,
"learning_rate": 4.285597453684286e-06,
"loss": 0.6056,
"mean_token_accuracy": 0.8118889033794403,
"num_tokens": 30875569.0,
"step": 472
},
{
"epoch": 0.32292200034135515,
"grad_norm": 0.7579359412193298,
"learning_rate": 4.2814184847145595e-06,
"loss": 0.628,
"mean_token_accuracy": 0.805214449763298,
"num_tokens": 30941105.0,
"step": 473
},
{
"epoch": 0.3236047107014849,
"grad_norm": 0.7670093178749084,
"learning_rate": 4.2772293791066764e-06,
"loss": 0.639,
"mean_token_accuracy": 0.8046798706054688,
"num_tokens": 31006641.0,
"step": 474
},
{
"epoch": 0.3242874210616146,
"grad_norm": 0.8125339150428772,
"learning_rate": 4.27303016069751e-06,
"loss": 0.6028,
"mean_token_accuracy": 0.8125305473804474,
"num_tokens": 31072177.0,
"step": 475
},
{
"epoch": 0.3249701314217443,
"grad_norm": 0.7287182807922363,
"learning_rate": 4.26882085338148e-06,
"loss": 0.5951,
"mean_token_accuracy": 0.8134927898645401,
"num_tokens": 31137713.0,
"step": 476
},
{
"epoch": 0.32565284178187404,
"grad_norm": 0.764122486114502,
"learning_rate": 4.2646014811104095e-06,
"loss": 0.6298,
"mean_token_accuracy": 0.8057503998279572,
"num_tokens": 31202855.0,
"step": 477
},
{
"epoch": 0.32633555214200377,
"grad_norm": 0.7873974442481995,
"learning_rate": 4.2603720678933965e-06,
"loss": 0.6214,
"mean_token_accuracy": 0.8090328723192215,
"num_tokens": 31268391.0,
"step": 478
},
{
"epoch": 0.32701826250213345,
"grad_norm": 0.7522021532058716,
"learning_rate": 4.2561326377966755e-06,
"loss": 0.6279,
"mean_token_accuracy": 0.8060545027256012,
"num_tokens": 31333927.0,
"step": 479
},
{
"epoch": 0.3277009728622632,
"grad_norm": 0.7264294624328613,
"learning_rate": 4.2518832149434755e-06,
"loss": 0.637,
"mean_token_accuracy": 0.8003726750612259,
"num_tokens": 31399463.0,
"step": 480
},
{
"epoch": 0.3283836832223929,
"grad_norm": 0.7738657593727112,
"learning_rate": 4.247623823513888e-06,
"loss": 0.6039,
"mean_token_accuracy": 0.8129608482122421,
"num_tokens": 31464886.0,
"step": 481
},
{
"epoch": 0.3290663935825226,
"grad_norm": 0.7558001279830933,
"learning_rate": 4.243354487744727e-06,
"loss": 0.6228,
"mean_token_accuracy": 0.80671127140522,
"num_tokens": 31530422.0,
"step": 482
},
{
"epoch": 0.32974910394265233,
"grad_norm": 0.758977472782135,
"learning_rate": 4.239075231929394e-06,
"loss": 0.6334,
"mean_token_accuracy": 0.8040841966867447,
"num_tokens": 31595958.0,
"step": 483
},
{
"epoch": 0.33043181430278207,
"grad_norm": 0.7687391042709351,
"learning_rate": 4.234786080417735e-06,
"loss": 0.6465,
"mean_token_accuracy": 0.8003726899623871,
"num_tokens": 31661494.0,
"step": 484
},
{
"epoch": 0.33111452466291175,
"grad_norm": 0.7958798408508301,
"learning_rate": 4.230487057615906e-06,
"loss": 0.6701,
"mean_token_accuracy": 0.7937389463186264,
"num_tokens": 31726959.0,
"step": 485
},
{
"epoch": 0.3317972350230415,
"grad_norm": 0.812972903251648,
"learning_rate": 4.226178187986233e-06,
"loss": 0.6752,
"mean_token_accuracy": 0.7940188199281693,
"num_tokens": 31792495.0,
"step": 486
},
{
"epoch": 0.3324799453831712,
"grad_norm": 0.7672164440155029,
"learning_rate": 4.221859496047072e-06,
"loss": 0.6325,
"mean_token_accuracy": 0.8042980283498764,
"num_tokens": 31858031.0,
"step": 487
},
{
"epoch": 0.3331626557433009,
"grad_norm": 0.7787556052207947,
"learning_rate": 4.217531006372668e-06,
"loss": 0.6196,
"mean_token_accuracy": 0.8083626925945282,
"num_tokens": 31923483.0,
"step": 488
},
{
"epoch": 0.33384536610343063,
"grad_norm": 0.7686751484870911,
"learning_rate": 4.21319274359302e-06,
"loss": 0.6394,
"mean_token_accuracy": 0.803551197052002,
"num_tokens": 31989015.0,
"step": 489
},
{
"epoch": 0.3345280764635603,
"grad_norm": 0.7874401211738586,
"learning_rate": 4.208844732393738e-06,
"loss": 0.5972,
"mean_token_accuracy": 0.8128054738044739,
"num_tokens": 32054551.0,
"step": 490
},
{
"epoch": 0.33521078682369004,
"grad_norm": 0.7712216973304749,
"learning_rate": 4.2044869975159e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.8059378415346146,
"num_tokens": 32119517.0,
"step": 491
},
{
"epoch": 0.3358934971838198,
"grad_norm": 0.7174788117408752,
"learning_rate": 4.200119563755915e-06,
"loss": 0.6025,
"mean_token_accuracy": 0.81402987241745,
"num_tokens": 32185039.0,
"step": 492
},
{
"epoch": 0.33657620754394946,
"grad_norm": 0.7734994292259216,
"learning_rate": 4.195742455965381e-06,
"loss": 0.6263,
"mean_token_accuracy": 0.8075665980577469,
"num_tokens": 32250575.0,
"step": 493
},
{
"epoch": 0.3372589179040792,
"grad_norm": 0.776196300983429,
"learning_rate": 4.191355699050945e-06,
"loss": 0.6427,
"mean_token_accuracy": 0.8039925545454025,
"num_tokens": 32316111.0,
"step": 494
},
{
"epoch": 0.3379416282642089,
"grad_norm": 0.7637385129928589,
"learning_rate": 4.186959317974155e-06,
"loss": 0.6087,
"mean_token_accuracy": 0.8097383975982666,
"num_tokens": 32381556.0,
"step": 495
},
{
"epoch": 0.3386243386243386,
"grad_norm": 0.7658755779266357,
"learning_rate": 4.182553337751326e-06,
"loss": 0.6456,
"mean_token_accuracy": 0.8000977635383606,
"num_tokens": 32447092.0,
"step": 496
},
{
"epoch": 0.33930704898446834,
"grad_norm": 0.7636200189590454,
"learning_rate": 4.178137783453393e-06,
"loss": 0.6444,
"mean_token_accuracy": 0.7969666421413422,
"num_tokens": 32512628.0,
"step": 497
},
{
"epoch": 0.3399897593445981,
"grad_norm": 0.7598303556442261,
"learning_rate": 4.17371268020577e-06,
"loss": 0.6172,
"mean_token_accuracy": 0.8083913624286652,
"num_tokens": 32578164.0,
"step": 498
},
{
"epoch": 0.34067246970472775,
"grad_norm": 0.7488183379173279,
"learning_rate": 4.169278053188206e-06,
"loss": 0.6004,
"mean_token_accuracy": 0.8118432313203812,
"num_tokens": 32643700.0,
"step": 499
},
{
"epoch": 0.3413551800648575,
"grad_norm": 0.7670736312866211,
"learning_rate": 4.164833927634641e-06,
"loss": 0.6166,
"mean_token_accuracy": 0.809506356716156,
"num_tokens": 32709236.0,
"step": 500
},
{
"epoch": 0.3420378904249872,
"grad_norm": 0.7257670760154724,
"learning_rate": 4.160380328833066e-06,
"loss": 0.5888,
"mean_token_accuracy": 0.8163184225559235,
"num_tokens": 32774772.0,
"step": 501
},
{
"epoch": 0.3427206007851169,
"grad_norm": 0.7861335873603821,
"learning_rate": 4.155917282125376e-06,
"loss": 0.6194,
"mean_token_accuracy": 0.8079789876937866,
"num_tokens": 32840308.0,
"step": 502
},
{
"epoch": 0.34340331114524664,
"grad_norm": 0.8057557344436646,
"learning_rate": 4.151444812907226e-06,
"loss": 0.5966,
"mean_token_accuracy": 0.8141276389360428,
"num_tokens": 32905558.0,
"step": 503
},
{
"epoch": 0.34408602150537637,
"grad_norm": 0.7587814331054688,
"learning_rate": 4.146962946627886e-06,
"loss": 0.6238,
"mean_token_accuracy": 0.8071389347314835,
"num_tokens": 32971094.0,
"step": 504
},
{
"epoch": 0.34476873186550605,
"grad_norm": 0.7340732216835022,
"learning_rate": 4.1424717087901005e-06,
"loss": 0.6343,
"mean_token_accuracy": 0.8036870807409286,
"num_tokens": 33036630.0,
"step": 505
},
{
"epoch": 0.3454514422256358,
"grad_norm": 0.7753547430038452,
"learning_rate": 4.1379711249499355e-06,
"loss": 0.6344,
"mean_token_accuracy": 0.8057337552309036,
"num_tokens": 33102166.0,
"step": 506
},
{
"epoch": 0.34613415258576546,
"grad_norm": 0.7536847591400146,
"learning_rate": 4.133461220716642e-06,
"loss": 0.6072,
"mean_token_accuracy": 0.8107985109090805,
"num_tokens": 33167684.0,
"step": 507
},
{
"epoch": 0.3468168629458952,
"grad_norm": 0.7741045951843262,
"learning_rate": 4.1289420217525035e-06,
"loss": 0.6165,
"mean_token_accuracy": 0.808781310915947,
"num_tokens": 33232862.0,
"step": 508
},
{
"epoch": 0.34749957330602493,
"grad_norm": 0.7509851455688477,
"learning_rate": 4.124413553772693e-06,
"loss": 0.6507,
"mean_token_accuracy": 0.8005150854587555,
"num_tokens": 33298347.0,
"step": 509
},
{
"epoch": 0.3481822836661546,
"grad_norm": 0.741675078868866,
"learning_rate": 4.119875842545127e-06,
"loss": 0.5784,
"mean_token_accuracy": 0.8181970864534378,
"num_tokens": 33363883.0,
"step": 510
},
{
"epoch": 0.34886499402628435,
"grad_norm": 0.7717655301094055,
"learning_rate": 4.115328913890317e-06,
"loss": 0.6133,
"mean_token_accuracy": 0.8071389347314835,
"num_tokens": 33429419.0,
"step": 511
},
{
"epoch": 0.3495477043864141,
"grad_norm": 0.7376033067703247,
"learning_rate": 4.1107727936812216e-06,
"loss": 0.6235,
"mean_token_accuracy": 0.8058039098978043,
"num_tokens": 33494724.0,
"step": 512
},
{
"epoch": 0.35023041474654376,
"grad_norm": 0.7243679761886597,
"learning_rate": 4.106207507843106e-06,
"loss": 0.5884,
"mean_token_accuracy": 0.815142348408699,
"num_tokens": 33560260.0,
"step": 513
},
{
"epoch": 0.3509131251066735,
"grad_norm": 0.7643143534660339,
"learning_rate": 4.1016330823533866e-06,
"loss": 0.5928,
"mean_token_accuracy": 0.8167460858821869,
"num_tokens": 33625796.0,
"step": 514
},
{
"epoch": 0.35159583546680323,
"grad_norm": 0.7645459771156311,
"learning_rate": 4.0970495432414854e-06,
"loss": 0.589,
"mean_token_accuracy": 0.8166697174310684,
"num_tokens": 33691332.0,
"step": 515
},
{
"epoch": 0.3522785458269329,
"grad_norm": 0.7253304123878479,
"learning_rate": 4.092456916588686e-06,
"loss": 0.6383,
"mean_token_accuracy": 0.8029126226902008,
"num_tokens": 33756568.0,
"step": 516
},
{
"epoch": 0.35296125618706264,
"grad_norm": 0.744203507900238,
"learning_rate": 4.0878552285279794e-06,
"loss": 0.616,
"mean_token_accuracy": 0.8080986589193344,
"num_tokens": 33821830.0,
"step": 517
},
{
"epoch": 0.3536439665471924,
"grad_norm": 0.7576060891151428,
"learning_rate": 4.083244505243918e-06,
"loss": 0.601,
"mean_token_accuracy": 0.8153256475925446,
"num_tokens": 33887366.0,
"step": 518
},
{
"epoch": 0.35432667690732206,
"grad_norm": 0.7077663540840149,
"learning_rate": 4.078624772972467e-06,
"loss": 0.5658,
"mean_token_accuracy": 0.8255590051412582,
"num_tokens": 33952902.0,
"step": 519
},
{
"epoch": 0.3550093872674518,
"grad_norm": 0.7638600468635559,
"learning_rate": 4.0739960580008565e-06,
"loss": 0.6068,
"mean_token_accuracy": 0.8101020306348801,
"num_tokens": 34018438.0,
"step": 520
},
{
"epoch": 0.35569209762758147,
"grad_norm": 0.7264181971549988,
"learning_rate": 4.0693583866674255e-06,
"loss": 0.6234,
"mean_token_accuracy": 0.8054409176111221,
"num_tokens": 34083345.0,
"step": 521
},
{
"epoch": 0.3563748079877112,
"grad_norm": 0.7131197452545166,
"learning_rate": 4.0647117853614824e-06,
"loss": 0.5843,
"mean_token_accuracy": 0.8186552971601486,
"num_tokens": 34148881.0,
"step": 522
},
{
"epoch": 0.35705751834784094,
"grad_norm": 0.7427366375923157,
"learning_rate": 4.060056280523144e-06,
"loss": 0.638,
"mean_token_accuracy": 0.8032098114490509,
"num_tokens": 34213574.0,
"step": 523
},
{
"epoch": 0.3577402287079706,
"grad_norm": 0.7695744037628174,
"learning_rate": 4.0553918986431904e-06,
"loss": 0.6793,
"mean_token_accuracy": 0.7921401709318161,
"num_tokens": 34279110.0,
"step": 524
},
{
"epoch": 0.35842293906810035,
"grad_norm": 0.7793335318565369,
"learning_rate": 4.050718666262919e-06,
"loss": 0.6295,
"mean_token_accuracy": 0.8063169717788696,
"num_tokens": 34343656.0,
"step": 525
},
{
"epoch": 0.3591056494282301,
"grad_norm": 0.708317220211029,
"learning_rate": 4.046036609973982e-06,
"loss": 0.6001,
"mean_token_accuracy": 0.8142264485359192,
"num_tokens": 34409018.0,
"step": 526
},
{
"epoch": 0.35978835978835977,
"grad_norm": 0.7289883494377136,
"learning_rate": 4.0413457564182455e-06,
"loss": 0.627,
"mean_token_accuracy": 0.8050616979598999,
"num_tokens": 34474554.0,
"step": 527
},
{
"epoch": 0.3604710701484895,
"grad_norm": 0.7371439933776855,
"learning_rate": 4.036646132287632e-06,
"loss": 0.6438,
"mean_token_accuracy": 0.8033890724182129,
"num_tokens": 34540077.0,
"step": 528
},
{
"epoch": 0.36115378050861924,
"grad_norm": 0.7393643260002136,
"learning_rate": 4.03193776432397e-06,
"loss": 0.5997,
"mean_token_accuracy": 0.8115717619657516,
"num_tokens": 34605589.0,
"step": 529
},
{
"epoch": 0.3618364908687489,
"grad_norm": 0.7248691916465759,
"learning_rate": 4.027220679318846e-06,
"loss": 0.6102,
"mean_token_accuracy": 0.8101173043251038,
"num_tokens": 34671125.0,
"step": 530
},
{
"epoch": 0.36251920122887865,
"grad_norm": 0.7262829542160034,
"learning_rate": 4.0224949041134425e-06,
"loss": 0.5782,
"mean_token_accuracy": 0.8194034397602081,
"num_tokens": 34736394.0,
"step": 531
},
{
"epoch": 0.3632019115890084,
"grad_norm": 0.7357496619224548,
"learning_rate": 4.017760465598395e-06,
"loss": 0.6161,
"mean_token_accuracy": 0.8090364784002304,
"num_tokens": 34801868.0,
"step": 532
},
{
"epoch": 0.36388462194913807,
"grad_norm": 0.7482576966285706,
"learning_rate": 4.013017390713635e-06,
"loss": 0.6069,
"mean_token_accuracy": 0.8091856092214584,
"num_tokens": 34867404.0,
"step": 533
},
{
"epoch": 0.3645673323092678,
"grad_norm": 0.7310959100723267,
"learning_rate": 4.008265706448234e-06,
"loss": 0.6071,
"mean_token_accuracy": 0.8111569434404373,
"num_tokens": 34932885.0,
"step": 534
},
{
"epoch": 0.36525004266939753,
"grad_norm": 0.7276792526245117,
"learning_rate": 4.003505439840255e-06,
"loss": 0.6192,
"mean_token_accuracy": 0.8077193349599838,
"num_tokens": 34998421.0,
"step": 535
},
{
"epoch": 0.3659327530295272,
"grad_norm": 0.7438013553619385,
"learning_rate": 3.998736617976596e-06,
"loss": 0.6409,
"mean_token_accuracy": 0.802770659327507,
"num_tokens": 35063957.0,
"step": 536
},
{
"epoch": 0.36661546338965695,
"grad_norm": 0.763702392578125,
"learning_rate": 3.993959267992835e-06,
"loss": 0.5908,
"mean_token_accuracy": 0.8138718605041504,
"num_tokens": 35129311.0,
"step": 537
},
{
"epoch": 0.3672981737497866,
"grad_norm": 0.757030189037323,
"learning_rate": 3.989173417073078e-06,
"loss": 0.6427,
"mean_token_accuracy": 0.799578458070755,
"num_tokens": 35194847.0,
"step": 538
},
{
"epoch": 0.36798088410991636,
"grad_norm": 0.7634031176567078,
"learning_rate": 3.984379092449804e-06,
"loss": 0.643,
"mean_token_accuracy": 0.8004353195428848,
"num_tokens": 35260042.0,
"step": 539
},
{
"epoch": 0.3686635944700461,
"grad_norm": 0.7350435853004456,
"learning_rate": 3.979576321403705e-06,
"loss": 0.6184,
"mean_token_accuracy": 0.8084860742092133,
"num_tokens": 35325490.0,
"step": 540
},
{
"epoch": 0.3693463048301758,
"grad_norm": 0.7435086369514465,
"learning_rate": 3.974765131263539e-06,
"loss": 0.6271,
"mean_token_accuracy": 0.8046798706054688,
"num_tokens": 35391026.0,
"step": 541
},
{
"epoch": 0.3700290151903055,
"grad_norm": 0.8303893804550171,
"learning_rate": 3.96994554940597e-06,
"loss": 0.6375,
"mean_token_accuracy": 0.8043476492166519,
"num_tokens": 35456464.0,
"step": 542
},
{
"epoch": 0.37071172555043524,
"grad_norm": 0.7540069818496704,
"learning_rate": 3.965117603255411e-06,
"loss": 0.6357,
"mean_token_accuracy": 0.8061919659376144,
"num_tokens": 35522000.0,
"step": 543
},
{
"epoch": 0.3713944359105649,
"grad_norm": 0.7430548667907715,
"learning_rate": 3.960281320283869e-06,
"loss": 0.6124,
"mean_token_accuracy": 0.8097812831401825,
"num_tokens": 35587536.0,
"step": 544
},
{
"epoch": 0.37207714627069466,
"grad_norm": 0.7310859560966492,
"learning_rate": 3.955436728010792e-06,
"loss": 0.6242,
"mean_token_accuracy": 0.8084578067064285,
"num_tokens": 35653036.0,
"step": 545
},
{
"epoch": 0.3727598566308244,
"grad_norm": 0.7654127478599548,
"learning_rate": 3.950583854002906e-06,
"loss": 0.6393,
"mean_token_accuracy": 0.8009989112615585,
"num_tokens": 35718572.0,
"step": 546
},
{
"epoch": 0.3734425669909541,
"grad_norm": 0.7217867374420166,
"learning_rate": 3.945722725874066e-06,
"loss": 0.6247,
"mean_token_accuracy": 0.8056681156158447,
"num_tokens": 35784039.0,
"step": 547
},
{
"epoch": 0.3741252773510838,
"grad_norm": 0.7655815482139587,
"learning_rate": 3.940853371285092e-06,
"loss": 0.6094,
"mean_token_accuracy": 0.8104991465806961,
"num_tokens": 35849575.0,
"step": 548
},
{
"epoch": 0.37480798771121354,
"grad_norm": 0.7061954140663147,
"learning_rate": 3.935975817943613e-06,
"loss": 0.5749,
"mean_token_accuracy": 0.8176930546760559,
"num_tokens": 35915111.0,
"step": 549
},
{
"epoch": 0.3754906980713432,
"grad_norm": 0.7559870481491089,
"learning_rate": 3.931090093603916e-06,
"loss": 0.634,
"mean_token_accuracy": 0.8024040907621384,
"num_tokens": 35980647.0,
"step": 550
},
{
"epoch": 0.37617340843147296,
"grad_norm": 0.7588599920272827,
"learning_rate": 3.9261962260667744e-06,
"loss": 0.6144,
"mean_token_accuracy": 0.8079178929328918,
"num_tokens": 36046183.0,
"step": 551
},
{
"epoch": 0.3768561187916027,
"grad_norm": 0.7603986263275146,
"learning_rate": 3.9212942431793055e-06,
"loss": 0.6403,
"mean_token_accuracy": 0.8029824495315552,
"num_tokens": 36110803.0,
"step": 552
},
{
"epoch": 0.37753882915173237,
"grad_norm": 0.7612850666046143,
"learning_rate": 3.916384172834802e-06,
"loss": 0.6011,
"mean_token_accuracy": 0.8126069158315659,
"num_tokens": 36176339.0,
"step": 553
},
{
"epoch": 0.3782215395118621,
"grad_norm": 0.7219383716583252,
"learning_rate": 3.911466042972573e-06,
"loss": 0.5851,
"mean_token_accuracy": 0.8179527074098587,
"num_tokens": 36241875.0,
"step": 554
},
{
"epoch": 0.3789042498719918,
"grad_norm": 0.7435865998268127,
"learning_rate": 3.906539881577793e-06,
"loss": 0.6009,
"mean_token_accuracy": 0.8127596527338028,
"num_tokens": 36307411.0,
"step": 555
},
{
"epoch": 0.3795869602321215,
"grad_norm": 0.708329975605011,
"learning_rate": 3.9016057166813355e-06,
"loss": 0.6139,
"mean_token_accuracy": 0.8103158622980118,
"num_tokens": 36372947.0,
"step": 556
},
{
"epoch": 0.38026967059225125,
"grad_norm": 0.7403861880302429,
"learning_rate": 3.896663576359614e-06,
"loss": 0.6063,
"mean_token_accuracy": 0.8107587993144989,
"num_tokens": 36438483.0,
"step": 557
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.7762757539749146,
"learning_rate": 3.8917134887344235e-06,
"loss": 0.6399,
"mean_token_accuracy": 0.8010013103485107,
"num_tokens": 36503880.0,
"step": 558
},
{
"epoch": 0.38163509131251067,
"grad_norm": 0.8195488452911377,
"learning_rate": 3.8867554819727855e-06,
"loss": 0.6557,
"mean_token_accuracy": 0.7968902885913849,
"num_tokens": 36569416.0,
"step": 559
},
{
"epoch": 0.3823178016726404,
"grad_norm": 0.7722012996673584,
"learning_rate": 3.881789584286778e-06,
"loss": 0.6515,
"mean_token_accuracy": 0.7978678047657013,
"num_tokens": 36634952.0,
"step": 560
},
{
"epoch": 0.3830005120327701,
"grad_norm": 0.7423476576805115,
"learning_rate": 3.876815823933382e-06,
"loss": 0.5918,
"mean_token_accuracy": 0.814546674489975,
"num_tokens": 36700488.0,
"step": 561
},
{
"epoch": 0.3836832223928998,
"grad_norm": 0.7171317934989929,
"learning_rate": 3.87183422921432e-06,
"loss": 0.6324,
"mean_token_accuracy": 0.8058253973722458,
"num_tokens": 36766024.0,
"step": 562
},
{
"epoch": 0.38436593275302955,
"grad_norm": 0.7465117573738098,
"learning_rate": 3.866844828475889e-06,
"loss": 0.63,
"mean_token_accuracy": 0.8047417551279068,
"num_tokens": 36831180.0,
"step": 563
},
{
"epoch": 0.38504864311315923,
"grad_norm": 0.7372057437896729,
"learning_rate": 3.86184765010881e-06,
"loss": 0.6464,
"mean_token_accuracy": 0.8016995638608932,
"num_tokens": 36896700.0,
"step": 564
},
{
"epoch": 0.38573135347328896,
"grad_norm": 0.7480089068412781,
"learning_rate": 3.8568427225480556e-06,
"loss": 0.6215,
"mean_token_accuracy": 0.8073798418045044,
"num_tokens": 36961476.0,
"step": 565
},
{
"epoch": 0.3864140638334187,
"grad_norm": 0.7431397438049316,
"learning_rate": 3.851830074272697e-06,
"loss": 0.6476,
"mean_token_accuracy": 0.8006934374570847,
"num_tokens": 37027012.0,
"step": 566
},
{
"epoch": 0.3870967741935484,
"grad_norm": 0.7774817943572998,
"learning_rate": 3.846809733805732e-06,
"loss": 0.665,
"mean_token_accuracy": 0.7960631251335144,
"num_tokens": 37091896.0,
"step": 567
},
{
"epoch": 0.3877794845536781,
"grad_norm": 0.7300996780395508,
"learning_rate": 3.841781729713935e-06,
"loss": 0.6205,
"mean_token_accuracy": 0.8074519038200378,
"num_tokens": 37157354.0,
"step": 568
},
{
"epoch": 0.3884621949138078,
"grad_norm": 0.7174612879753113,
"learning_rate": 3.836746090607683e-06,
"loss": 0.565,
"mean_token_accuracy": 0.8222929537296295,
"num_tokens": 37222875.0,
"step": 569
},
{
"epoch": 0.3891449052739375,
"grad_norm": 0.7718711495399475,
"learning_rate": 3.831702845140801e-06,
"loss": 0.6529,
"mean_token_accuracy": 0.7992597669363022,
"num_tokens": 37288236.0,
"step": 570
},
{
"epoch": 0.38982761563406726,
"grad_norm": 0.7533946633338928,
"learning_rate": 3.826652022010396e-06,
"loss": 0.6283,
"mean_token_accuracy": 0.8052937835454941,
"num_tokens": 37353484.0,
"step": 571
},
{
"epoch": 0.39051032599419694,
"grad_norm": 0.7376362085342407,
"learning_rate": 3.821593649956688e-06,
"loss": 0.6261,
"mean_token_accuracy": 0.8082539141178131,
"num_tokens": 37419020.0,
"step": 572
},
{
"epoch": 0.3911930363543267,
"grad_norm": 0.7462616562843323,
"learning_rate": 3.81652775776286e-06,
"loss": 0.6104,
"mean_token_accuracy": 0.8107587993144989,
"num_tokens": 37484556.0,
"step": 573
},
{
"epoch": 0.3918757467144564,
"grad_norm": 0.7472273707389832,
"learning_rate": 3.8114543742548817e-06,
"loss": 0.6238,
"mean_token_accuracy": 0.8062225133180618,
"num_tokens": 37550092.0,
"step": 574
},
{
"epoch": 0.3925584570745861,
"grad_norm": 0.7769070863723755,
"learning_rate": 3.8063735283013483e-06,
"loss": 0.6022,
"mean_token_accuracy": 0.8123604208230972,
"num_tokens": 37615338.0,
"step": 575
},
{
"epoch": 0.3932411674347158,
"grad_norm": 0.7517635226249695,
"learning_rate": 3.8012852488133212e-06,
"loss": 0.5952,
"mean_token_accuracy": 0.816242054104805,
"num_tokens": 37680874.0,
"step": 576
},
{
"epoch": 0.39392387779484556,
"grad_norm": 0.7434868216514587,
"learning_rate": 3.7961895647441595e-06,
"loss": 0.5827,
"mean_token_accuracy": 0.8170668333768845,
"num_tokens": 37746410.0,
"step": 577
},
{
"epoch": 0.39460658815497524,
"grad_norm": 0.7710351943969727,
"learning_rate": 3.791086505089354e-06,
"loss": 0.647,
"mean_token_accuracy": 0.797424852848053,
"num_tokens": 37811946.0,
"step": 578
},
{
"epoch": 0.39528929851510497,
"grad_norm": 0.7557309865951538,
"learning_rate": 3.7859760988863664e-06,
"loss": 0.704,
"mean_token_accuracy": 0.7859237641096115,
"num_tokens": 37877482.0,
"step": 579
},
{
"epoch": 0.3959720088752347,
"grad_norm": 0.723376452922821,
"learning_rate": 3.7808583752144602e-06,
"loss": 0.6146,
"mean_token_accuracy": 0.8090023249387741,
"num_tokens": 37943018.0,
"step": 580
},
{
"epoch": 0.3966547192353644,
"grad_norm": 0.7643930912017822,
"learning_rate": 3.775733363194537e-06,
"loss": 0.6384,
"mean_token_accuracy": 0.801774263381958,
"num_tokens": 38008548.0,
"step": 581
},
{
"epoch": 0.3973374295954941,
"grad_norm": 0.7285202145576477,
"learning_rate": 3.7706010919889726e-06,
"loss": 0.6471,
"mean_token_accuracy": 0.7999582588672638,
"num_tokens": 38073998.0,
"step": 582
},
{
"epoch": 0.39802013995562385,
"grad_norm": 0.7560527324676514,
"learning_rate": 3.7654615908014456e-06,
"loss": 0.6032,
"mean_token_accuracy": 0.8126820176839828,
"num_tokens": 38139245.0,
"step": 583
},
{
"epoch": 0.39870285031575353,
"grad_norm": 0.7738918662071228,
"learning_rate": 3.760314888876777e-06,
"loss": 0.6294,
"mean_token_accuracy": 0.8050972521305084,
"num_tokens": 38204491.0,
"step": 584
},
{
"epoch": 0.39938556067588327,
"grad_norm": 0.8183801770210266,
"learning_rate": 3.755161015500762e-06,
"loss": 0.5619,
"mean_token_accuracy": 0.823215052485466,
"num_tokens": 38270002.0,
"step": 585
},
{
"epoch": 0.40006827103601295,
"grad_norm": 0.7287594676017761,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.6482,
"mean_token_accuracy": 0.799517348408699,
"num_tokens": 38335538.0,
"step": 586
},
{
"epoch": 0.4007509813961427,
"grad_norm": 0.7398510575294495,
"learning_rate": 3.7448318717417343e-06,
"loss": 0.614,
"mean_token_accuracy": 0.8087639510631561,
"num_tokens": 38400582.0,
"step": 587
},
{
"epoch": 0.4014336917562724,
"grad_norm": 0.71683269739151,
"learning_rate": 3.739656660133678e-06,
"loss": 0.5832,
"mean_token_accuracy": 0.818490669131279,
"num_tokens": 38466075.0,
"step": 588
},
{
"epoch": 0.4021164021164021,
"grad_norm": 0.7904890179634094,
"learning_rate": 3.734474394623852e-06,
"loss": 0.6087,
"mean_token_accuracy": 0.812148705124855,
"num_tokens": 38531611.0,
"step": 589
},
{
"epoch": 0.40279911247653183,
"grad_norm": 0.762642502784729,
"learning_rate": 3.7292851047004143e-06,
"loss": 0.6222,
"mean_token_accuracy": 0.8086510300636292,
"num_tokens": 38597147.0,
"step": 590
},
{
"epoch": 0.40348182283666156,
"grad_norm": 0.7368521690368652,
"learning_rate": 3.7240888198914935e-06,
"loss": 0.5977,
"mean_token_accuracy": 0.8123778104782104,
"num_tokens": 38662683.0,
"step": 591
},
{
"epoch": 0.40416453319679124,
"grad_norm": 0.7672507762908936,
"learning_rate": 3.7188855697650212e-06,
"loss": 0.5934,
"mean_token_accuracy": 0.8154783695936203,
"num_tokens": 38728219.0,
"step": 592
},
{
"epoch": 0.404847243556921,
"grad_norm": 0.7662923336029053,
"learning_rate": 3.713675383928561e-06,
"loss": 0.6617,
"mean_token_accuracy": 0.7965695410966873,
"num_tokens": 38793755.0,
"step": 593
},
{
"epoch": 0.4055299539170507,
"grad_norm": 0.7628138661384583,
"learning_rate": 3.7084582920291456e-06,
"loss": 0.6167,
"mean_token_accuracy": 0.8086204826831818,
"num_tokens": 38859291.0,
"step": 594
},
{
"epoch": 0.4062126642771804,
"grad_norm": 0.7820776104927063,
"learning_rate": 3.7032343237531017e-06,
"loss": 0.5786,
"mean_token_accuracy": 0.8179832547903061,
"num_tokens": 38924827.0,
"step": 595
},
{
"epoch": 0.4068953746373101,
"grad_norm": 0.7740116119384766,
"learning_rate": 3.6980035088258842e-06,
"loss": 0.5694,
"mean_token_accuracy": 0.81998410820961,
"num_tokens": 38990363.0,
"step": 596
},
{
"epoch": 0.40757808499743986,
"grad_norm": 0.7644342184066772,
"learning_rate": 3.692765877011909e-06,
"loss": 0.6167,
"mean_token_accuracy": 0.805490642786026,
"num_tokens": 39055369.0,
"step": 597
},
{
"epoch": 0.40826079535756954,
"grad_norm": 0.7698699831962585,
"learning_rate": 3.68752145811438e-06,
"loss": 0.616,
"mean_token_accuracy": 0.8080400824546814,
"num_tokens": 39120905.0,
"step": 598
},
{
"epoch": 0.4089435057176993,
"grad_norm": 0.7477402687072754,
"learning_rate": 3.6822702819751195e-06,
"loss": 0.6072,
"mean_token_accuracy": 0.8107282519340515,
"num_tokens": 39186441.0,
"step": 599
},
{
"epoch": 0.409626216077829,
"grad_norm": 0.7572869658470154,
"learning_rate": 3.6770123784744027e-06,
"loss": 0.6225,
"mean_token_accuracy": 0.8081317245960236,
"num_tokens": 39251977.0,
"step": 600
},
{
"epoch": 0.4103089264379587,
"grad_norm": 0.7412436604499817,
"learning_rate": 3.671747777530784e-06,
"loss": 0.5927,
"mean_token_accuracy": 0.8155547380447388,
"num_tokens": 39317513.0,
"step": 601
},
{
"epoch": 0.4109916367980884,
"grad_norm": 0.7406189441680908,
"learning_rate": 3.6664765091009267e-06,
"loss": 0.6025,
"mean_token_accuracy": 0.8125419914722443,
"num_tokens": 39382777.0,
"step": 602
},
{
"epoch": 0.4116743471582181,
"grad_norm": 0.7247329950332642,
"learning_rate": 3.6611986031794345e-06,
"loss": 0.618,
"mean_token_accuracy": 0.8083761036396027,
"num_tokens": 39448313.0,
"step": 603
},
{
"epoch": 0.41235705751834784,
"grad_norm": 0.7504489421844482,
"learning_rate": 3.6559140897986777e-06,
"loss": 0.6565,
"mean_token_accuracy": 0.7968750149011612,
"num_tokens": 39513849.0,
"step": 604
},
{
"epoch": 0.41303976787847757,
"grad_norm": 0.7441123723983765,
"learning_rate": 3.6506229990286292e-06,
"loss": 0.6122,
"mean_token_accuracy": 0.8100409358739853,
"num_tokens": 39579385.0,
"step": 605
},
{
"epoch": 0.41372247823860725,
"grad_norm": 0.7135398387908936,
"learning_rate": 3.6453253609766825e-06,
"loss": 0.5994,
"mean_token_accuracy": 0.8148216009140015,
"num_tokens": 39644921.0,
"step": 606
},
{
"epoch": 0.414405188598737,
"grad_norm": 0.7364180088043213,
"learning_rate": 3.6400212057874912e-06,
"loss": 0.6371,
"mean_token_accuracy": 0.8020986020565033,
"num_tokens": 39710457.0,
"step": 607
},
{
"epoch": 0.4150878989588667,
"grad_norm": 0.7383872270584106,
"learning_rate": 3.63471056364279e-06,
"loss": 0.6188,
"mean_token_accuracy": 0.8076735138893127,
"num_tokens": 39775993.0,
"step": 608
},
{
"epoch": 0.4157706093189964,
"grad_norm": 0.7400882244110107,
"learning_rate": 3.629393464761227e-06,
"loss": 0.6417,
"mean_token_accuracy": 0.8041910529136658,
"num_tokens": 39841183.0,
"step": 609
},
{
"epoch": 0.41645331967912613,
"grad_norm": 0.7408536076545715,
"learning_rate": 3.6240699393981915e-06,
"loss": 0.6219,
"mean_token_accuracy": 0.8063905239105225,
"num_tokens": 39906719.0,
"step": 610
},
{
"epoch": 0.41713603003925587,
"grad_norm": 0.7284941673278809,
"learning_rate": 3.618740017845638e-06,
"loss": 0.6009,
"mean_token_accuracy": 0.8142106533050537,
"num_tokens": 39972255.0,
"step": 611
},
{
"epoch": 0.41781874039938555,
"grad_norm": 0.7574661374092102,
"learning_rate": 3.613403730431917e-06,
"loss": 0.642,
"mean_token_accuracy": 0.8028387278318405,
"num_tokens": 40037702.0,
"step": 612
},
{
"epoch": 0.4185014507595153,
"grad_norm": 0.742321252822876,
"learning_rate": 3.6080611075216053e-06,
"loss": 0.6214,
"mean_token_accuracy": 0.8056885898113251,
"num_tokens": 40103160.0,
"step": 613
},
{
"epoch": 0.419184161119645,
"grad_norm": 0.7570779919624329,
"learning_rate": 3.602712179515328e-06,
"loss": 0.6328,
"mean_token_accuracy": 0.8036107122898102,
"num_tokens": 40168696.0,
"step": 614
},
{
"epoch": 0.4198668714797747,
"grad_norm": 0.7323223948478699,
"learning_rate": 3.5973569768495858e-06,
"loss": 0.6051,
"mean_token_accuracy": 0.8121181577444077,
"num_tokens": 40234232.0,
"step": 615
},
{
"epoch": 0.42054958183990443,
"grad_norm": 0.7437307834625244,
"learning_rate": 3.5919955299965858e-06,
"loss": 0.6317,
"mean_token_accuracy": 0.802324965596199,
"num_tokens": 40298970.0,
"step": 616
},
{
"epoch": 0.4212322922000341,
"grad_norm": 0.7594529390335083,
"learning_rate": 3.586627869464065e-06,
"loss": 0.6508,
"mean_token_accuracy": 0.8005407005548477,
"num_tokens": 40364506.0,
"step": 617
},
{
"epoch": 0.42191500256016384,
"grad_norm": 0.7406197786331177,
"learning_rate": 3.5812540257951178e-06,
"loss": 0.615,
"mean_token_accuracy": 0.812698557972908,
"num_tokens": 40430042.0,
"step": 618
},
{
"epoch": 0.4225977129202936,
"grad_norm": 0.755382239818573,
"learning_rate": 3.575874029568021e-06,
"loss": 0.613,
"mean_token_accuracy": 0.8104227781295776,
"num_tokens": 40495578.0,
"step": 619
},
{
"epoch": 0.42328042328042326,
"grad_norm": 0.7402629256248474,
"learning_rate": 3.5704879113960627e-06,
"loss": 0.6589,
"mean_token_accuracy": 0.7991355061531067,
"num_tokens": 40561114.0,
"step": 620
},
{
"epoch": 0.423963133640553,
"grad_norm": 0.7542107105255127,
"learning_rate": 3.5650957019273642e-06,
"loss": 0.5923,
"mean_token_accuracy": 0.8151881694793701,
"num_tokens": 40626650.0,
"step": 621
},
{
"epoch": 0.4246458440006827,
"grad_norm": 0.772157609462738,
"learning_rate": 3.5596974318447075e-06,
"loss": 0.6473,
"mean_token_accuracy": 0.7991202473640442,
"num_tokens": 40692186.0,
"step": 622
},
{
"epoch": 0.4253285543608124,
"grad_norm": 0.782319188117981,
"learning_rate": 3.5542931318653625e-06,
"loss": 0.6498,
"mean_token_accuracy": 0.799028605222702,
"num_tokens": 40757722.0,
"step": 623
},
{
"epoch": 0.42601126472094214,
"grad_norm": 0.7283681631088257,
"learning_rate": 3.5488828327409086e-06,
"loss": 0.6361,
"mean_token_accuracy": 0.8019764274358749,
"num_tokens": 40823258.0,
"step": 624
},
{
"epoch": 0.4266939750810719,
"grad_norm": 0.7392454743385315,
"learning_rate": 3.543466565257063e-06,
"loss": 0.6063,
"mean_token_accuracy": 0.8119959682226181,
"num_tokens": 40888794.0,
"step": 625
},
{
"epoch": 0.42737668544120155,
"grad_norm": 0.7856171131134033,
"learning_rate": 3.538044360233503e-06,
"loss": 0.6085,
"mean_token_accuracy": 0.8111406415700912,
"num_tokens": 40954330.0,
"step": 626
},
{
"epoch": 0.4280593958013313,
"grad_norm": 0.7746567130088806,
"learning_rate": 3.532616248523692e-06,
"loss": 0.6443,
"mean_token_accuracy": 0.8002853393554688,
"num_tokens": 41019315.0,
"step": 627
},
{
"epoch": 0.428742106161461,
"grad_norm": 0.7823646068572998,
"learning_rate": 3.527182261014705e-06,
"loss": 0.6094,
"mean_token_accuracy": 0.8089713901281357,
"num_tokens": 41084672.0,
"step": 628
},
{
"epoch": 0.4294248165215907,
"grad_norm": 0.7409360408782959,
"learning_rate": 3.521742428627049e-06,
"loss": 0.6239,
"mean_token_accuracy": 0.8043133020401001,
"num_tokens": 41150208.0,
"step": 629
},
{
"epoch": 0.43010752688172044,
"grad_norm": 0.7446600198745728,
"learning_rate": 3.516296782314491e-06,
"loss": 0.6312,
"mean_token_accuracy": 0.8042682111263275,
"num_tokens": 41215719.0,
"step": 630
},
{
"epoch": 0.43079023724185017,
"grad_norm": 0.7764535546302795,
"learning_rate": 3.5108453530638815e-06,
"loss": 0.611,
"mean_token_accuracy": 0.8111705332994461,
"num_tokens": 41280701.0,
"step": 631
},
{
"epoch": 0.43147294760197985,
"grad_norm": 0.7295299172401428,
"learning_rate": 3.5053881718949758e-06,
"loss": 0.609,
"mean_token_accuracy": 0.8096132725477219,
"num_tokens": 41346237.0,
"step": 632
},
{
"epoch": 0.4321556579621096,
"grad_norm": 0.7986657619476318,
"learning_rate": 3.499925269860257e-06,
"loss": 0.6505,
"mean_token_accuracy": 0.799974262714386,
"num_tokens": 41411547.0,
"step": 633
},
{
"epoch": 0.43283836832223926,
"grad_norm": 0.7602563500404358,
"learning_rate": 3.4944566780447648e-06,
"loss": 0.6407,
"mean_token_accuracy": 0.802831619977951,
"num_tokens": 41477017.0,
"step": 634
},
{
"epoch": 0.433521078682369,
"grad_norm": 0.750855565071106,
"learning_rate": 3.4889824275659136e-06,
"loss": 0.5869,
"mean_token_accuracy": 0.8175708651542664,
"num_tokens": 41542553.0,
"step": 635
},
{
"epoch": 0.43420378904249873,
"grad_norm": 0.7245985865592957,
"learning_rate": 3.4835025495733143e-06,
"loss": 0.6471,
"mean_token_accuracy": 0.8033660650253296,
"num_tokens": 41607382.0,
"step": 636
},
{
"epoch": 0.4348864994026284,
"grad_norm": 0.7158551812171936,
"learning_rate": 3.4780170752486035e-06,
"loss": 0.5953,
"mean_token_accuracy": 0.8136302530765533,
"num_tokens": 41672918.0,
"step": 637
},
{
"epoch": 0.43556920976275815,
"grad_norm": 0.7428006529808044,
"learning_rate": 3.4725260358052597e-06,
"loss": 0.5941,
"mean_token_accuracy": 0.8138288110494614,
"num_tokens": 41738454.0,
"step": 638
},
{
"epoch": 0.4362519201228879,
"grad_norm": 0.7119945287704468,
"learning_rate": 3.4670294624884275e-06,
"loss": 0.617,
"mean_token_accuracy": 0.8088801354169846,
"num_tokens": 41803990.0,
"step": 639
},
{
"epoch": 0.43693463048301756,
"grad_norm": 0.6839730739593506,
"learning_rate": 3.461527386574743e-06,
"loss": 0.5456,
"mean_token_accuracy": 0.8276820480823517,
"num_tokens": 41869526.0,
"step": 640
},
{
"epoch": 0.4376173408431473,
"grad_norm": 0.7335575222969055,
"learning_rate": 3.45601983937215e-06,
"loss": 0.6375,
"mean_token_accuracy": 0.8022129386663437,
"num_tokens": 41935054.0,
"step": 641
},
{
"epoch": 0.43830005120327703,
"grad_norm": 0.7447086572647095,
"learning_rate": 3.4505068522197277e-06,
"loss": 0.6568,
"mean_token_accuracy": 0.7966000735759735,
"num_tokens": 42000590.0,
"step": 642
},
{
"epoch": 0.4389827615634067,
"grad_norm": 0.7332596778869629,
"learning_rate": 3.4449884564875086e-06,
"loss": 0.6065,
"mean_token_accuracy": 0.8104380518198013,
"num_tokens": 42066126.0,
"step": 643
},
{
"epoch": 0.43966547192353644,
"grad_norm": 0.7137930989265442,
"learning_rate": 3.439464683576301e-06,
"loss": 0.6694,
"mean_token_accuracy": 0.7953858822584152,
"num_tokens": 42131485.0,
"step": 644
},
{
"epoch": 0.4403481822836662,
"grad_norm": 0.7591450214385986,
"learning_rate": 3.43393556491751e-06,
"loss": 0.649,
"mean_token_accuracy": 0.8000366687774658,
"num_tokens": 42197021.0,
"step": 645
},
{
"epoch": 0.44103089264379586,
"grad_norm": 0.7286630868911743,
"learning_rate": 3.428401131972961e-06,
"loss": 0.5651,
"mean_token_accuracy": 0.8224584460258484,
"num_tokens": 42262557.0,
"step": 646
},
{
"epoch": 0.4417136030039256,
"grad_norm": 0.7138211131095886,
"learning_rate": 3.4228614162347167e-06,
"loss": 0.5986,
"mean_token_accuracy": 0.8115658462047577,
"num_tokens": 42328086.0,
"step": 647
},
{
"epoch": 0.4423963133640553,
"grad_norm": 0.691111147403717,
"learning_rate": 3.417316449224902e-06,
"loss": 0.575,
"mean_token_accuracy": 0.8209549337625504,
"num_tokens": 42393344.0,
"step": 648
},
{
"epoch": 0.443079023724185,
"grad_norm": 0.7229531407356262,
"learning_rate": 3.41176626249552e-06,
"loss": 0.6104,
"mean_token_accuracy": 0.8075550347566605,
"num_tokens": 42458741.0,
"step": 649
},
{
"epoch": 0.44376173408431474,
"grad_norm": 0.8127530813217163,
"learning_rate": 3.4062108876282773e-06,
"loss": 0.6424,
"mean_token_accuracy": 0.8024535477161407,
"num_tokens": 42523740.0,
"step": 650
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.7284678220748901,
"learning_rate": 3.4006503562344014e-06,
"loss": 0.5886,
"mean_token_accuracy": 0.81828872859478,
"num_tokens": 42589276.0,
"step": 651
},
{
"epoch": 0.44512715480457415,
"grad_norm": 0.7327984571456909,
"learning_rate": 3.3950846999544613e-06,
"loss": 0.5961,
"mean_token_accuracy": 0.8132331371307373,
"num_tokens": 42654812.0,
"step": 652
},
{
"epoch": 0.4458098651647039,
"grad_norm": 0.7355505228042603,
"learning_rate": 3.389513950458187e-06,
"loss": 0.6422,
"mean_token_accuracy": 0.800984725356102,
"num_tokens": 42720132.0,
"step": 653
},
{
"epoch": 0.44649257552483357,
"grad_norm": 0.7620731592178345,
"learning_rate": 3.383938139444293e-06,
"loss": 0.6518,
"mean_token_accuracy": 0.7987159639596939,
"num_tokens": 42785539.0,
"step": 654
},
{
"epoch": 0.4471752858849633,
"grad_norm": 0.7601433992385864,
"learning_rate": 3.3783572986402896e-06,
"loss": 0.6462,
"mean_token_accuracy": 0.8014137893915176,
"num_tokens": 42850974.0,
"step": 655
},
{
"epoch": 0.44785799624509304,
"grad_norm": 0.7175619602203369,
"learning_rate": 3.372771459802313e-06,
"loss": 0.5875,
"mean_token_accuracy": 0.8182276338338852,
"num_tokens": 42916510.0,
"step": 656
},
{
"epoch": 0.4485407066052227,
"grad_norm": 0.7211723327636719,
"learning_rate": 3.3671806547149357e-06,
"loss": 0.6365,
"mean_token_accuracy": 0.8023043274879456,
"num_tokens": 42981896.0,
"step": 657
},
{
"epoch": 0.44922341696535245,
"grad_norm": 0.7135072350502014,
"learning_rate": 3.36158491519099e-06,
"loss": 0.6132,
"mean_token_accuracy": 0.8097965568304062,
"num_tokens": 43047432.0,
"step": 658
},
{
"epoch": 0.4499061273254822,
"grad_norm": 0.7017936110496521,
"learning_rate": 3.355984273071389e-06,
"loss": 0.5707,
"mean_token_accuracy": 0.8174466788768768,
"num_tokens": 43112715.0,
"step": 659
},
{
"epoch": 0.45058883768561186,
"grad_norm": 0.7288024425506592,
"learning_rate": 3.3503787602249366e-06,
"loss": 0.6104,
"mean_token_accuracy": 0.8088495880365372,
"num_tokens": 43178251.0,
"step": 660
},
{
"epoch": 0.4512715480457416,
"grad_norm": 0.7068003416061401,
"learning_rate": 3.344768408548158e-06,
"loss": 0.624,
"mean_token_accuracy": 0.8051533550024033,
"num_tokens": 43243787.0,
"step": 661
},
{
"epoch": 0.45195425840587133,
"grad_norm": 0.7492927312850952,
"learning_rate": 3.339153249965109e-06,
"loss": 0.6621,
"mean_token_accuracy": 0.7967247366905212,
"num_tokens": 43308554.0,
"step": 662
},
{
"epoch": 0.452636968766001,
"grad_norm": 0.7419738173484802,
"learning_rate": 3.3335333164272e-06,
"loss": 0.6254,
"mean_token_accuracy": 0.8067606091499329,
"num_tokens": 43373512.0,
"step": 663
},
{
"epoch": 0.45331967912613075,
"grad_norm": 0.7345742583274841,
"learning_rate": 3.327908639913009e-06,
"loss": 0.572,
"mean_token_accuracy": 0.8185178339481354,
"num_tokens": 43439048.0,
"step": 664
},
{
"epoch": 0.4540023894862604,
"grad_norm": 0.7236369848251343,
"learning_rate": 3.3222792524281045e-06,
"loss": 0.5821,
"mean_token_accuracy": 0.8172501176595688,
"num_tokens": 43504584.0,
"step": 665
},
{
"epoch": 0.45468509984639016,
"grad_norm": 0.778313934803009,
"learning_rate": 3.3166451860048615e-06,
"loss": 0.6343,
"mean_token_accuracy": 0.8037634491920471,
"num_tokens": 43570120.0,
"step": 666
},
{
"epoch": 0.4553678102065199,
"grad_norm": 0.7267420887947083,
"learning_rate": 3.3110064727022783e-06,
"loss": 0.6388,
"mean_token_accuracy": 0.8018084168434143,
"num_tokens": 43635656.0,
"step": 667
},
{
"epoch": 0.4560505205666496,
"grad_norm": 0.7459083199501038,
"learning_rate": 3.3053631446057944e-06,
"loss": 0.633,
"mean_token_accuracy": 0.803519070148468,
"num_tokens": 43701192.0,
"step": 668
},
{
"epoch": 0.4567332309267793,
"grad_norm": 0.7337201833724976,
"learning_rate": 3.299715233827111e-06,
"loss": 0.6728,
"mean_token_accuracy": 0.7914834022521973,
"num_tokens": 43766728.0,
"step": 669
},
{
"epoch": 0.45741594128690904,
"grad_norm": 0.75600665807724,
"learning_rate": 3.294062772504002e-06,
"loss": 0.61,
"mean_token_accuracy": 0.8095979988574982,
"num_tokens": 43832264.0,
"step": 670
},
{
"epoch": 0.4580986516470387,
"grad_norm": 0.7811246514320374,
"learning_rate": 3.288405792800138e-06,
"loss": 0.6318,
"mean_token_accuracy": 0.8027248382568359,
"num_tokens": 43897800.0,
"step": 671
},
{
"epoch": 0.45878136200716846,
"grad_norm": 0.7400442957878113,
"learning_rate": 3.282744326904899e-06,
"loss": 0.5896,
"mean_token_accuracy": 0.8151118010282516,
"num_tokens": 43963336.0,
"step": 672
},
{
"epoch": 0.4594640723672982,
"grad_norm": 0.7188034653663635,
"learning_rate": 3.27707840703319e-06,
"loss": 0.6026,
"mean_token_accuracy": 0.8125610947608948,
"num_tokens": 44028872.0,
"step": 673
},
{
"epoch": 0.46014678272742787,
"grad_norm": 0.7437601685523987,
"learning_rate": 3.2714080654252657e-06,
"loss": 0.6495,
"mean_token_accuracy": 0.7988808453083038,
"num_tokens": 44094131.0,
"step": 674
},
{
"epoch": 0.4608294930875576,
"grad_norm": 0.7074049711227417,
"learning_rate": 3.265733334346536e-06,
"loss": 0.6018,
"mean_token_accuracy": 0.8113830238580704,
"num_tokens": 44159596.0,
"step": 675
},
{
"epoch": 0.46151220344768734,
"grad_norm": 0.7336816787719727,
"learning_rate": 3.260054246087389e-06,
"loss": 0.5852,
"mean_token_accuracy": 0.8168530017137527,
"num_tokens": 44225132.0,
"step": 676
},
{
"epoch": 0.462194913807817,
"grad_norm": 0.7625792026519775,
"learning_rate": 3.2543708329630085e-06,
"loss": 0.6105,
"mean_token_accuracy": 0.8130651265382767,
"num_tokens": 44290668.0,
"step": 677
},
{
"epoch": 0.46287762416794676,
"grad_norm": 0.7495646476745605,
"learning_rate": 3.248683127313186e-06,
"loss": 0.6233,
"mean_token_accuracy": 0.8054354339838028,
"num_tokens": 44355496.0,
"step": 678
},
{
"epoch": 0.4635603345280765,
"grad_norm": 0.7479454278945923,
"learning_rate": 3.242991161502136e-06,
"loss": 0.6531,
"mean_token_accuracy": 0.7986889332532883,
"num_tokens": 44420943.0,
"step": 679
},
{
"epoch": 0.46424304488820617,
"grad_norm": 0.7727522850036621,
"learning_rate": 3.2372949679183196e-06,
"loss": 0.6671,
"mean_token_accuracy": 0.7948741465806961,
"num_tokens": 44486479.0,
"step": 680
},
{
"epoch": 0.4649257552483359,
"grad_norm": 0.7375922799110413,
"learning_rate": 3.23159457897425e-06,
"loss": 0.6078,
"mean_token_accuracy": 0.8109726309776306,
"num_tokens": 44552015.0,
"step": 681
},
{
"epoch": 0.4656084656084656,
"grad_norm": 0.7293082475662231,
"learning_rate": 3.2258900271063143e-06,
"loss": 0.6135,
"mean_token_accuracy": 0.810254767537117,
"num_tokens": 44617551.0,
"step": 682
},
{
"epoch": 0.4662911759685953,
"grad_norm": 0.7409456372261047,
"learning_rate": 3.2201813447745885e-06,
"loss": 0.6045,
"mean_token_accuracy": 0.810606062412262,
"num_tokens": 44683087.0,
"step": 683
},
{
"epoch": 0.46697388632872505,
"grad_norm": 0.6928256154060364,
"learning_rate": 3.2144685644626513e-06,
"loss": 0.5872,
"mean_token_accuracy": 0.8173173069953918,
"num_tokens": 44748500.0,
"step": 684
},
{
"epoch": 0.46765659668885473,
"grad_norm": 0.7540627121925354,
"learning_rate": 3.2087517186773986e-06,
"loss": 0.6253,
"mean_token_accuracy": 0.8058706372976303,
"num_tokens": 44813885.0,
"step": 685
},
{
"epoch": 0.46833930704898447,
"grad_norm": 0.7306386828422546,
"learning_rate": 3.203030839948862e-06,
"loss": 0.6371,
"mean_token_accuracy": 0.8030995428562164,
"num_tokens": 44879386.0,
"step": 686
},
{
"epoch": 0.4690220174091142,
"grad_norm": 0.7555832266807556,
"learning_rate": 3.1973059608300185e-06,
"loss": 0.6404,
"mean_token_accuracy": 0.8015610724687576,
"num_tokens": 44944541.0,
"step": 687
},
{
"epoch": 0.4697047277692439,
"grad_norm": 0.7078608870506287,
"learning_rate": 3.19157711389661e-06,
"loss": 0.5954,
"mean_token_accuracy": 0.8156158328056335,
"num_tokens": 45010077.0,
"step": 688
},
{
"epoch": 0.4703874381293736,
"grad_norm": 0.7409559488296509,
"learning_rate": 3.185844331746957e-06,
"loss": 0.6411,
"mean_token_accuracy": 0.8022622019052505,
"num_tokens": 45074969.0,
"step": 689
},
{
"epoch": 0.47107014848950335,
"grad_norm": 0.7214213013648987,
"learning_rate": 3.1801076470017696e-06,
"loss": 0.6545,
"mean_token_accuracy": 0.7991660535335541,
"num_tokens": 45140505.0,
"step": 690
},
{
"epoch": 0.471752858849633,
"grad_norm": 0.733475387096405,
"learning_rate": 3.1743670923039655e-06,
"loss": 0.6209,
"mean_token_accuracy": 0.8057470470666885,
"num_tokens": 45205945.0,
"step": 691
},
{
"epoch": 0.47243556920976276,
"grad_norm": 0.7223991751670837,
"learning_rate": 3.168622700318485e-06,
"loss": 0.6138,
"mean_token_accuracy": 0.8067930787801743,
"num_tokens": 45271110.0,
"step": 692
},
{
"epoch": 0.4731182795698925,
"grad_norm": 0.7543123364448547,
"learning_rate": 3.1628745037321005e-06,
"loss": 0.6375,
"mean_token_accuracy": 0.8020515888929367,
"num_tokens": 45336637.0,
"step": 693
},
{
"epoch": 0.4738009899300222,
"grad_norm": 0.7674548625946045,
"learning_rate": 3.157122535253235e-06,
"loss": 0.6376,
"mean_token_accuracy": 0.802969217300415,
"num_tokens": 45402173.0,
"step": 694
},
{
"epoch": 0.4744837002901519,
"grad_norm": 0.740697979927063,
"learning_rate": 3.1513668276117747e-06,
"loss": 0.6336,
"mean_token_accuracy": 0.803000196814537,
"num_tokens": 45467557.0,
"step": 695
},
{
"epoch": 0.47516641065028165,
"grad_norm": 0.7490605115890503,
"learning_rate": 3.1456074135588805e-06,
"loss": 0.65,
"mean_token_accuracy": 0.7991335093975067,
"num_tokens": 45533034.0,
"step": 696
},
{
"epoch": 0.4758491210104113,
"grad_norm": 0.7376020550727844,
"learning_rate": 3.1398443258668067e-06,
"loss": 0.594,
"mean_token_accuracy": 0.8152694553136826,
"num_tokens": 45598284.0,
"step": 697
},
{
"epoch": 0.47653183137054106,
"grad_norm": 0.7506970167160034,
"learning_rate": 3.134077597328708e-06,
"loss": 0.6136,
"mean_token_accuracy": 0.8080400824546814,
"num_tokens": 45663820.0,
"step": 698
},
{
"epoch": 0.47721454173067074,
"grad_norm": 0.7497142553329468,
"learning_rate": 3.1283072607584573e-06,
"loss": 0.613,
"mean_token_accuracy": 0.8081164509057999,
"num_tokens": 45729356.0,
"step": 699
},
{
"epoch": 0.4778972520908005,
"grad_norm": 0.7283821105957031,
"learning_rate": 3.1225333489904606e-06,
"loss": 0.6041,
"mean_token_accuracy": 0.8139815479516983,
"num_tokens": 45794892.0,
"step": 700
},
{
"epoch": 0.4785799624509302,
"grad_norm": 0.7512271404266357,
"learning_rate": 3.116755894879464e-06,
"loss": 0.6081,
"mean_token_accuracy": 0.8116752207279205,
"num_tokens": 45860428.0,
"step": 701
},
{
"epoch": 0.4792626728110599,
"grad_norm": 0.7062849998474121,
"learning_rate": 3.1109749313003708e-06,
"loss": 0.6007,
"mean_token_accuracy": 0.8138440847396851,
"num_tokens": 45925964.0,
"step": 702
},
{
"epoch": 0.4799453831711896,
"grad_norm": 0.8034562468528748,
"learning_rate": 3.1051904911480557e-06,
"loss": 0.6304,
"mean_token_accuracy": 0.8042827546596527,
"num_tokens": 45991500.0,
"step": 703
},
{
"epoch": 0.48062809353131936,
"grad_norm": 0.7626417875289917,
"learning_rate": 3.099402607337175e-06,
"loss": 0.6265,
"mean_token_accuracy": 0.8054893612861633,
"num_tokens": 46057036.0,
"step": 704
},
{
"epoch": 0.48131080389144903,
"grad_norm": 0.7254495024681091,
"learning_rate": 3.093611312801979e-06,
"loss": 0.647,
"mean_token_accuracy": 0.8013196587562561,
"num_tokens": 46122572.0,
"step": 705
},
{
"epoch": 0.48199351425157877,
"grad_norm": 0.7576347589492798,
"learning_rate": 3.087816640496127e-06,
"loss": 0.6537,
"mean_token_accuracy": 0.7981427162885666,
"num_tokens": 46188108.0,
"step": 706
},
{
"epoch": 0.4826762246117085,
"grad_norm": 0.7223957777023315,
"learning_rate": 3.0820186233924983e-06,
"loss": 0.5885,
"mean_token_accuracy": 0.8181630969047546,
"num_tokens": 46252922.0,
"step": 707
},
{
"epoch": 0.4833589349718382,
"grad_norm": 0.7446067929267883,
"learning_rate": 3.076217294483005e-06,
"loss": 0.6751,
"mean_token_accuracy": 0.7922012507915497,
"num_tokens": 46318458.0,
"step": 708
},
{
"epoch": 0.4840416453319679,
"grad_norm": 0.7198394536972046,
"learning_rate": 3.070412686778403e-06,
"loss": 0.6288,
"mean_token_accuracy": 0.8034121543169022,
"num_tokens": 46383994.0,
"step": 709
},
{
"epoch": 0.48472435569209765,
"grad_norm": 0.7317565679550171,
"learning_rate": 3.064604833308105e-06,
"loss": 0.6018,
"mean_token_accuracy": 0.8128818422555923,
"num_tokens": 46449530.0,
"step": 710
},
{
"epoch": 0.48540706605222733,
"grad_norm": 0.7785357236862183,
"learning_rate": 3.058793767119996e-06,
"loss": 0.6415,
"mean_token_accuracy": 0.8014406114816666,
"num_tokens": 46514874.0,
"step": 711
},
{
"epoch": 0.48608977641235707,
"grad_norm": 0.7061479687690735,
"learning_rate": 3.0529795212802383e-06,
"loss": 0.5641,
"mean_token_accuracy": 0.8236192464828491,
"num_tokens": 46580410.0,
"step": 712
},
{
"epoch": 0.48677248677248675,
"grad_norm": 0.735151469707489,
"learning_rate": 3.047162128873089e-06,
"loss": 0.6134,
"mean_token_accuracy": 0.8087884932756424,
"num_tokens": 46645946.0,
"step": 713
},
{
"epoch": 0.4874551971326165,
"grad_norm": 0.7411344051361084,
"learning_rate": 3.041341623000708e-06,
"loss": 0.6257,
"mean_token_accuracy": 0.8060804158449173,
"num_tokens": 46711201.0,
"step": 714
},
{
"epoch": 0.4881379074927462,
"grad_norm": 0.7967774271965027,
"learning_rate": 3.0355180367829746e-06,
"loss": 0.6345,
"mean_token_accuracy": 0.8028616309165955,
"num_tokens": 46776659.0,
"step": 715
},
{
"epoch": 0.4888206178528759,
"grad_norm": 0.7407638430595398,
"learning_rate": 3.029691403357293e-06,
"loss": 0.5917,
"mean_token_accuracy": 0.815096527338028,
"num_tokens": 46842195.0,
"step": 716
},
{
"epoch": 0.48950332821300563,
"grad_norm": 0.7118982076644897,
"learning_rate": 3.0238617558784077e-06,
"loss": 0.5633,
"mean_token_accuracy": 0.8250522017478943,
"num_tokens": 46907443.0,
"step": 717
},
{
"epoch": 0.49018603857313536,
"grad_norm": 0.7390048503875732,
"learning_rate": 3.0180291275182144e-06,
"loss": 0.6331,
"mean_token_accuracy": 0.8052017390727997,
"num_tokens": 46972873.0,
"step": 718
},
{
"epoch": 0.49086874893326504,
"grad_norm": 0.7418379783630371,
"learning_rate": 3.0121935514655697e-06,
"loss": 0.5866,
"mean_token_accuracy": 0.8170940279960632,
"num_tokens": 47038259.0,
"step": 719
},
{
"epoch": 0.4915514592933948,
"grad_norm": 0.7906901240348816,
"learning_rate": 3.006355060926103e-06,
"loss": 0.6644,
"mean_token_accuracy": 0.7941715717315674,
"num_tokens": 47103795.0,
"step": 720
},
{
"epoch": 0.4922341696535245,
"grad_norm": 0.7372046709060669,
"learning_rate": 3.000513689122029e-06,
"loss": 0.6327,
"mean_token_accuracy": 0.8058201223611832,
"num_tokens": 47168745.0,
"step": 721
},
{
"epoch": 0.4929168800136542,
"grad_norm": 0.7171697616577148,
"learning_rate": 2.9946694692919553e-06,
"loss": 0.5739,
"mean_token_accuracy": 0.820540577173233,
"num_tokens": 47233820.0,
"step": 722
},
{
"epoch": 0.4935995903737839,
"grad_norm": 0.7130782604217529,
"learning_rate": 2.988822434690699e-06,
"loss": 0.6228,
"mean_token_accuracy": 0.8077346086502075,
"num_tokens": 47299356.0,
"step": 723
},
{
"epoch": 0.49428230073391366,
"grad_norm": 0.7585386633872986,
"learning_rate": 2.9829726185890894e-06,
"loss": 0.6007,
"mean_token_accuracy": 0.8131654858589172,
"num_tokens": 47364281.0,
"step": 724
},
{
"epoch": 0.49496501109404334,
"grad_norm": 0.7572980523109436,
"learning_rate": 2.9771200542737856e-06,
"loss": 0.6105,
"mean_token_accuracy": 0.80910924077034,
"num_tokens": 47429817.0,
"step": 725
},
{
"epoch": 0.4956477214541731,
"grad_norm": 0.748471200466156,
"learning_rate": 2.9712647750470853e-06,
"loss": 0.6478,
"mean_token_accuracy": 0.8009799122810364,
"num_tokens": 47495145.0,
"step": 726
},
{
"epoch": 0.4963304318143028,
"grad_norm": 0.7291464805603027,
"learning_rate": 2.9654068142267325e-06,
"loss": 0.6266,
"mean_token_accuracy": 0.8058406710624695,
"num_tokens": 47560681.0,
"step": 727
},
{
"epoch": 0.4970131421744325,
"grad_norm": 0.7313188910484314,
"learning_rate": 2.9595462051457307e-06,
"loss": 0.5765,
"mean_token_accuracy": 0.8204728662967682,
"num_tokens": 47626217.0,
"step": 728
},
{
"epoch": 0.4976958525345622,
"grad_norm": 0.7730467915534973,
"learning_rate": 2.953682981152154e-06,
"loss": 0.6191,
"mean_token_accuracy": 0.8103005886077881,
"num_tokens": 47691753.0,
"step": 729
},
{
"epoch": 0.4983785628946919,
"grad_norm": 0.7231931686401367,
"learning_rate": 2.947817175608954e-06,
"loss": 0.6072,
"mean_token_accuracy": 0.8107830137014389,
"num_tokens": 47757123.0,
"step": 730
},
{
"epoch": 0.49906127325482164,
"grad_norm": 0.747162401676178,
"learning_rate": 2.9419488218937723e-06,
"loss": 0.6292,
"mean_token_accuracy": 0.804267480969429,
"num_tokens": 47822659.0,
"step": 731
},
{
"epoch": 0.49974398361495137,
"grad_norm": 0.7242068648338318,
"learning_rate": 2.9360779533987515e-06,
"loss": 0.5868,
"mean_token_accuracy": 0.815844938158989,
"num_tokens": 47888195.0,
"step": 732
},
{
"epoch": 0.5004266939750811,
"grad_norm": 0.7154374122619629,
"learning_rate": 2.9302046035303424e-06,
"loss": 0.5967,
"mean_token_accuracy": 0.8129582107067108,
"num_tokens": 47953731.0,
"step": 733
},
{
"epoch": 0.5011094043352108,
"grad_norm": 0.721357524394989,
"learning_rate": 2.9243288057091147e-06,
"loss": 0.6025,
"mean_token_accuracy": 0.813941165804863,
"num_tokens": 48018967.0,
"step": 734
},
{
"epoch": 0.5017921146953405,
"grad_norm": 0.7229841947555542,
"learning_rate": 2.91845059336957e-06,
"loss": 0.6009,
"mean_token_accuracy": 0.8107102662324905,
"num_tokens": 48084313.0,
"step": 735
},
{
"epoch": 0.5024748250554703,
"grad_norm": 0.7424589991569519,
"learning_rate": 2.9125699999599467e-06,
"loss": 0.6195,
"mean_token_accuracy": 0.8080553561449051,
"num_tokens": 48149849.0,
"step": 736
},
{
"epoch": 0.5031575354155999,
"grad_norm": 0.7185443043708801,
"learning_rate": 2.9066870589420323e-06,
"loss": 0.6142,
"mean_token_accuracy": 0.808559387922287,
"num_tokens": 48215385.0,
"step": 737
},
{
"epoch": 0.5038402457757296,
"grad_norm": 0.7181544303894043,
"learning_rate": 2.9008018037909735e-06,
"loss": 0.6381,
"mean_token_accuracy": 0.8024193644523621,
"num_tokens": 48280921.0,
"step": 738
},
{
"epoch": 0.5045229561358594,
"grad_norm": 0.6995269656181335,
"learning_rate": 2.8949142679950848e-06,
"loss": 0.5899,
"mean_token_accuracy": 0.816242054104805,
"num_tokens": 48346457.0,
"step": 739
},
{
"epoch": 0.5052056664959891,
"grad_norm": 0.7697423100471497,
"learning_rate": 2.889024485055657e-06,
"loss": 0.601,
"mean_token_accuracy": 0.813996821641922,
"num_tokens": 48411993.0,
"step": 740
},
{
"epoch": 0.5058883768561188,
"grad_norm": 0.7361392974853516,
"learning_rate": 2.883132488486769e-06,
"loss": 0.5846,
"mean_token_accuracy": 0.8176256269216537,
"num_tokens": 48477020.0,
"step": 741
},
{
"epoch": 0.5065710872162486,
"grad_norm": 0.7436454892158508,
"learning_rate": 2.8772383118150946e-06,
"loss": 0.6126,
"mean_token_accuracy": 0.8058253973722458,
"num_tokens": 48542556.0,
"step": 742
},
{
"epoch": 0.5072537975763782,
"grad_norm": 0.7757138013839722,
"learning_rate": 2.871341988579714e-06,
"loss": 0.6233,
"mean_token_accuracy": 0.8082896620035172,
"num_tokens": 48607806.0,
"step": 743
},
{
"epoch": 0.5079365079365079,
"grad_norm": 0.7233085632324219,
"learning_rate": 2.86544355233192e-06,
"loss": 0.6005,
"mean_token_accuracy": 0.8126832842826843,
"num_tokens": 48673342.0,
"step": 744
},
{
"epoch": 0.5086192182966377,
"grad_norm": 0.722743570804596,
"learning_rate": 2.859543036635031e-06,
"loss": 0.6606,
"mean_token_accuracy": 0.7941257357597351,
"num_tokens": 48738878.0,
"step": 745
},
{
"epoch": 0.5093019286567674,
"grad_norm": 0.7590633034706116,
"learning_rate": 2.8536404750641963e-06,
"loss": 0.6346,
"mean_token_accuracy": 0.8029753863811493,
"num_tokens": 48804385.0,
"step": 746
},
{
"epoch": 0.5099846390168971,
"grad_norm": 0.7025012373924255,
"learning_rate": 2.8477359012062067e-06,
"loss": 0.6003,
"mean_token_accuracy": 0.8105480968952179,
"num_tokens": 48869610.0,
"step": 747
},
{
"epoch": 0.5106673493770268,
"grad_norm": 0.7078215479850769,
"learning_rate": 2.8418293486593044e-06,
"loss": 0.5977,
"mean_token_accuracy": 0.8118890523910522,
"num_tokens": 48935146.0,
"step": 748
},
{
"epoch": 0.5113500597371565,
"grad_norm": 0.7257028818130493,
"learning_rate": 2.8359208510329913e-06,
"loss": 0.6179,
"mean_token_accuracy": 0.8075818568468094,
"num_tokens": 49000682.0,
"step": 749
},
{
"epoch": 0.5120327700972862,
"grad_norm": 0.7726735472679138,
"learning_rate": 2.830010441947834e-06,
"loss": 0.6213,
"mean_token_accuracy": 0.8048478811979294,
"num_tokens": 49066218.0,
"step": 750
},
{
"epoch": 0.512715480457416,
"grad_norm": 0.7362603545188904,
"learning_rate": 2.8240981550352785e-06,
"loss": 0.6196,
"mean_token_accuracy": 0.8084271550178528,
"num_tokens": 49131451.0,
"step": 751
},
{
"epoch": 0.5133981908175457,
"grad_norm": 0.7677423357963562,
"learning_rate": 2.818184023937456e-06,
"loss": 0.6294,
"mean_token_accuracy": 0.8077346086502075,
"num_tokens": 49196987.0,
"step": 752
},
{
"epoch": 0.5140809011776754,
"grad_norm": 0.7333040833473206,
"learning_rate": 2.81226808230699e-06,
"loss": 0.6175,
"mean_token_accuracy": 0.8068181872367859,
"num_tokens": 49262523.0,
"step": 753
},
{
"epoch": 0.514763611537805,
"grad_norm": 0.7260233163833618,
"learning_rate": 2.8063503638068073e-06,
"loss": 0.5838,
"mean_token_accuracy": 0.8187592774629593,
"num_tokens": 49327753.0,
"step": 754
},
{
"epoch": 0.5154463218979348,
"grad_norm": 0.7574732899665833,
"learning_rate": 2.8004309021099444e-06,
"loss": 0.617,
"mean_token_accuracy": 0.807459682226181,
"num_tokens": 49393289.0,
"step": 755
},
{
"epoch": 0.5161290322580645,
"grad_norm": 0.7057220935821533,
"learning_rate": 2.79450973089936e-06,
"loss": 0.596,
"mean_token_accuracy": 0.8131316304206848,
"num_tokens": 49458588.0,
"step": 756
},
{
"epoch": 0.5168117426181942,
"grad_norm": 0.7108963131904602,
"learning_rate": 2.7885868838677364e-06,
"loss": 0.5688,
"mean_token_accuracy": 0.8220307976007462,
"num_tokens": 49524124.0,
"step": 757
},
{
"epoch": 0.517494452978324,
"grad_norm": 0.7996737957000732,
"learning_rate": 2.782662394717293e-06,
"loss": 0.5885,
"mean_token_accuracy": 0.8194495290517807,
"num_tokens": 49589660.0,
"step": 758
},
{
"epoch": 0.5181771633384537,
"grad_norm": 0.773198664188385,
"learning_rate": 2.7767362971595944e-06,
"loss": 0.6388,
"mean_token_accuracy": 0.8019306063652039,
"num_tokens": 49655196.0,
"step": 759
},
{
"epoch": 0.5188598736985833,
"grad_norm": 0.7262683510780334,
"learning_rate": 2.7708086249153565e-06,
"loss": 0.5985,
"mean_token_accuracy": 0.811354473233223,
"num_tokens": 49720732.0,
"step": 760
},
{
"epoch": 0.5195425840587131,
"grad_norm": 0.7870587706565857,
"learning_rate": 2.764879411714256e-06,
"loss": 0.6536,
"mean_token_accuracy": 0.7973643690347672,
"num_tokens": 49785745.0,
"step": 761
},
{
"epoch": 0.5202252944188428,
"grad_norm": 0.7811471223831177,
"learning_rate": 2.7589486912947354e-06,
"loss": 0.6228,
"mean_token_accuracy": 0.8065677434206009,
"num_tokens": 49851242.0,
"step": 762
},
{
"epoch": 0.5209080047789725,
"grad_norm": 0.7129400372505188,
"learning_rate": 2.7530164974038176e-06,
"loss": 0.5993,
"mean_token_accuracy": 0.813798263669014,
"num_tokens": 49916778.0,
"step": 763
},
{
"epoch": 0.5215907151391023,
"grad_norm": 0.7337347269058228,
"learning_rate": 2.747082863796907e-06,
"loss": 0.6037,
"mean_token_accuracy": 0.8119270205497742,
"num_tokens": 49982301.0,
"step": 764
},
{
"epoch": 0.522273425499232,
"grad_norm": 0.768386960029602,
"learning_rate": 2.741147824237602e-06,
"loss": 0.6086,
"mean_token_accuracy": 0.809155061841011,
"num_tokens": 50047837.0,
"step": 765
},
{
"epoch": 0.5229561358593616,
"grad_norm": 0.7903435230255127,
"learning_rate": 2.735211412497499e-06,
"loss": 0.6246,
"mean_token_accuracy": 0.805413007736206,
"num_tokens": 50113373.0,
"step": 766
},
{
"epoch": 0.5236388462194914,
"grad_norm": 0.7200343608856201,
"learning_rate": 2.7292736623560044e-06,
"loss": 0.582,
"mean_token_accuracy": 0.8190562874078751,
"num_tokens": 50178582.0,
"step": 767
},
{
"epoch": 0.5243215565796211,
"grad_norm": 0.7194651365280151,
"learning_rate": 2.7233346076001403e-06,
"loss": 0.593,
"mean_token_accuracy": 0.8154478222131729,
"num_tokens": 50244118.0,
"step": 768
},
{
"epoch": 0.5250042669397508,
"grad_norm": 0.7576190829277039,
"learning_rate": 2.717394282024351e-06,
"loss": 0.6074,
"mean_token_accuracy": 0.8109115362167358,
"num_tokens": 50309654.0,
"step": 769
},
{
"epoch": 0.5256869772998806,
"grad_norm": 0.7116931080818176,
"learning_rate": 2.711452719430313e-06,
"loss": 0.6229,
"mean_token_accuracy": 0.8080746233463287,
"num_tokens": 50375028.0,
"step": 770
},
{
"epoch": 0.5263696876600102,
"grad_norm": 0.7312449216842651,
"learning_rate": 2.705509953626741e-06,
"loss": 0.6039,
"mean_token_accuracy": 0.8116446733474731,
"num_tokens": 50440564.0,
"step": 771
},
{
"epoch": 0.5270523980201399,
"grad_norm": 0.7251054048538208,
"learning_rate": 2.6995660184291977e-06,
"loss": 0.6074,
"mean_token_accuracy": 0.8126221895217896,
"num_tokens": 50506100.0,
"step": 772
},
{
"epoch": 0.5277351083802697,
"grad_norm": 0.7374172806739807,
"learning_rate": 2.6936209476598977e-06,
"loss": 0.639,
"mean_token_accuracy": 0.8034860640764236,
"num_tokens": 50571534.0,
"step": 773
},
{
"epoch": 0.5284178187403994,
"grad_norm": 0.7465745210647583,
"learning_rate": 2.687674775147519e-06,
"loss": 0.6428,
"mean_token_accuracy": 0.8013654798269272,
"num_tokens": 50637070.0,
"step": 774
},
{
"epoch": 0.5291005291005291,
"grad_norm": 0.7335869073867798,
"learning_rate": 2.681727534727008e-06,
"loss": 0.649,
"mean_token_accuracy": 0.7997464686632156,
"num_tokens": 50702606.0,
"step": 775
},
{
"epoch": 0.5297832394606589,
"grad_norm": 0.7344288229942322,
"learning_rate": 2.6757792602393885e-06,
"loss": 0.6295,
"mean_token_accuracy": 0.8041147440671921,
"num_tokens": 50768142.0,
"step": 776
},
{
"epoch": 0.5304659498207885,
"grad_norm": 0.7773926854133606,
"learning_rate": 2.669829985531566e-06,
"loss": 0.637,
"mean_token_accuracy": 0.8031677752733231,
"num_tokens": 50833678.0,
"step": 777
},
{
"epoch": 0.5311486601809182,
"grad_norm": 0.7365777492523193,
"learning_rate": 2.6638797444561415e-06,
"loss": 0.5796,
"mean_token_accuracy": 0.8170362859964371,
"num_tokens": 50899214.0,
"step": 778
},
{
"epoch": 0.531831370541048,
"grad_norm": 0.7678808569908142,
"learning_rate": 2.6579285708712103e-06,
"loss": 0.6381,
"mean_token_accuracy": 0.8041147440671921,
"num_tokens": 50964750.0,
"step": 779
},
{
"epoch": 0.5325140809011777,
"grad_norm": 0.7613728642463684,
"learning_rate": 2.6519764986401776e-06,
"loss": 0.598,
"mean_token_accuracy": 0.8142717480659485,
"num_tokens": 51030286.0,
"step": 780
},
{
"epoch": 0.5331967912613074,
"grad_norm": 0.6854228973388672,
"learning_rate": 2.646023561631559e-06,
"loss": 0.5874,
"mean_token_accuracy": 0.815096527338028,
"num_tokens": 51095822.0,
"step": 781
},
{
"epoch": 0.5338795016214372,
"grad_norm": 0.7385619282722473,
"learning_rate": 2.6400697937187946e-06,
"loss": 0.6224,
"mean_token_accuracy": 0.8047562390565872,
"num_tokens": 51161358.0,
"step": 782
},
{
"epoch": 0.5345622119815668,
"grad_norm": 0.7568676471710205,
"learning_rate": 2.6341152287800475e-06,
"loss": 0.6509,
"mean_token_accuracy": 0.7998993992805481,
"num_tokens": 51226841.0,
"step": 783
},
{
"epoch": 0.5352449223416965,
"grad_norm": 0.7271560430526733,
"learning_rate": 2.628159900698022e-06,
"loss": 0.5857,
"mean_token_accuracy": 0.817341759800911,
"num_tokens": 51292377.0,
"step": 784
},
{
"epoch": 0.5359276327018262,
"grad_norm": 0.7313284277915955,
"learning_rate": 2.622203843359759e-06,
"loss": 0.5898,
"mean_token_accuracy": 0.8143208026885986,
"num_tokens": 51357356.0,
"step": 785
},
{
"epoch": 0.536610343061956,
"grad_norm": 0.7274580597877502,
"learning_rate": 2.616247090656453e-06,
"loss": 0.612,
"mean_token_accuracy": 0.8110948204994202,
"num_tokens": 51422892.0,
"step": 786
},
{
"epoch": 0.5372930534220857,
"grad_norm": 0.7195629477500916,
"learning_rate": 2.610289676483254e-06,
"loss": 0.5761,
"mean_token_accuracy": 0.8174325376749039,
"num_tokens": 51488267.0,
"step": 787
},
{
"epoch": 0.5379757637822153,
"grad_norm": 0.7511928081512451,
"learning_rate": 2.6043316347390765e-06,
"loss": 0.6126,
"mean_token_accuracy": 0.8073833137750626,
"num_tokens": 51553803.0,
"step": 788
},
{
"epoch": 0.5386584741423451,
"grad_norm": 0.6966540217399597,
"learning_rate": 2.5983729993264033e-06,
"loss": 0.604,
"mean_token_accuracy": 0.811553031206131,
"num_tokens": 51619339.0,
"step": 789
},
{
"epoch": 0.5393411845024748,
"grad_norm": 0.7874122262001038,
"learning_rate": 2.5924138041510993e-06,
"loss": 0.633,
"mean_token_accuracy": 0.8046645969152451,
"num_tokens": 51684875.0,
"step": 790
},
{
"epoch": 0.5400238948626045,
"grad_norm": 0.7472220659255981,
"learning_rate": 2.586454083122212e-06,
"loss": 0.6023,
"mean_token_accuracy": 0.8128818422555923,
"num_tokens": 51750411.0,
"step": 791
},
{
"epoch": 0.5407066052227343,
"grad_norm": 0.747721791267395,
"learning_rate": 2.5804938701517825e-06,
"loss": 0.6402,
"mean_token_accuracy": 0.8023495376110077,
"num_tokens": 51815702.0,
"step": 792
},
{
"epoch": 0.541389315582864,
"grad_norm": 0.7737058401107788,
"learning_rate": 2.574533199154649e-06,
"loss": 0.673,
"mean_token_accuracy": 0.7901087552309036,
"num_tokens": 51881238.0,
"step": 793
},
{
"epoch": 0.5420720259429936,
"grad_norm": 0.7369558811187744,
"learning_rate": 2.5685721040482587e-06,
"loss": 0.6308,
"mean_token_accuracy": 0.8035927265882492,
"num_tokens": 51945928.0,
"step": 794
},
{
"epoch": 0.5427547363031234,
"grad_norm": 0.7183763384819031,
"learning_rate": 2.56261061875247e-06,
"loss": 0.5752,
"mean_token_accuracy": 0.8207653313875198,
"num_tokens": 52011454.0,
"step": 795
},
{
"epoch": 0.5434374466632531,
"grad_norm": 0.7445209622383118,
"learning_rate": 2.5566487771893627e-06,
"loss": 0.6272,
"mean_token_accuracy": 0.805764302611351,
"num_tokens": 52076990.0,
"step": 796
},
{
"epoch": 0.5441201570233828,
"grad_norm": 0.7469929456710815,
"learning_rate": 2.5506866132830433e-06,
"loss": 0.6253,
"mean_token_accuracy": 0.8070320188999176,
"num_tokens": 52142526.0,
"step": 797
},
{
"epoch": 0.5448028673835126,
"grad_norm": 0.7637821435928345,
"learning_rate": 2.5447241609594524e-06,
"loss": 0.613,
"mean_token_accuracy": 0.8110882490873337,
"num_tokens": 52207756.0,
"step": 798
},
{
"epoch": 0.5454855777436423,
"grad_norm": 0.722944974899292,
"learning_rate": 2.538761454146173e-06,
"loss": 0.6254,
"mean_token_accuracy": 0.8025415539741516,
"num_tokens": 52273292.0,
"step": 799
},
{
"epoch": 0.5461682881037719,
"grad_norm": 0.7083418369293213,
"learning_rate": 2.5327985267722337e-06,
"loss": 0.5797,
"mean_token_accuracy": 0.8187222331762314,
"num_tokens": 52338411.0,
"step": 800
},
{
"epoch": 0.5468509984639017,
"grad_norm": 0.6983156204223633,
"learning_rate": 2.526835412767921e-06,
"loss": 0.6182,
"mean_token_accuracy": 0.8081211447715759,
"num_tokens": 52403361.0,
"step": 801
},
{
"epoch": 0.5475337088240314,
"grad_norm": 0.7576866149902344,
"learning_rate": 2.520872146064582e-06,
"loss": 0.6434,
"mean_token_accuracy": 0.8006476163864136,
"num_tokens": 52468897.0,
"step": 802
},
{
"epoch": 0.5482164191841611,
"grad_norm": 0.7381872534751892,
"learning_rate": 2.514908760594431e-06,
"loss": 0.6478,
"mean_token_accuracy": 0.8003574162721634,
"num_tokens": 52534433.0,
"step": 803
},
{
"epoch": 0.5488991295442909,
"grad_norm": 0.6863521337509155,
"learning_rate": 2.5089452902903616e-06,
"loss": 0.6015,
"mean_token_accuracy": 0.8113850206136703,
"num_tokens": 52599969.0,
"step": 804
},
{
"epoch": 0.5495818399044206,
"grad_norm": 0.7224480509757996,
"learning_rate": 2.502981769085748e-06,
"loss": 0.5932,
"mean_token_accuracy": 0.815065935254097,
"num_tokens": 52665021.0,
"step": 805
},
{
"epoch": 0.5502645502645502,
"grad_norm": 0.7919376492500305,
"learning_rate": 2.4970182309142533e-06,
"loss": 0.6559,
"mean_token_accuracy": 0.7980355471372604,
"num_tokens": 52730429.0,
"step": 806
},
{
"epoch": 0.55094726062468,
"grad_norm": 0.7454491853713989,
"learning_rate": 2.491054709709639e-06,
"loss": 0.6195,
"mean_token_accuracy": 0.8066349029541016,
"num_tokens": 52795965.0,
"step": 807
},
{
"epoch": 0.5516299709848097,
"grad_norm": 0.7252407073974609,
"learning_rate": 2.4850912394055693e-06,
"loss": 0.6077,
"mean_token_accuracy": 0.8098803013563156,
"num_tokens": 52861340.0,
"step": 808
},
{
"epoch": 0.5523126813449394,
"grad_norm": 0.7148882746696472,
"learning_rate": 2.479127853935419e-06,
"loss": 0.6179,
"mean_token_accuracy": 0.8085928410291672,
"num_tokens": 52926258.0,
"step": 809
},
{
"epoch": 0.5529953917050692,
"grad_norm": 0.7061576247215271,
"learning_rate": 2.473164587232079e-06,
"loss": 0.5861,
"mean_token_accuracy": 0.8157074749469757,
"num_tokens": 52991794.0,
"step": 810
},
{
"epoch": 0.5536781020651989,
"grad_norm": 0.7092587947845459,
"learning_rate": 2.4672014732277667e-06,
"loss": 0.5682,
"mean_token_accuracy": 0.8220918774604797,
"num_tokens": 53057330.0,
"step": 811
},
{
"epoch": 0.5543608124253285,
"grad_norm": 0.7507848739624023,
"learning_rate": 2.4612385458538276e-06,
"loss": 0.6013,
"mean_token_accuracy": 0.811751589179039,
"num_tokens": 53122866.0,
"step": 812
},
{
"epoch": 0.5550435227854583,
"grad_norm": 0.7575944662094116,
"learning_rate": 2.455275839040547e-06,
"loss": 0.6226,
"mean_token_accuracy": 0.8050311654806137,
"num_tokens": 53188402.0,
"step": 813
},
{
"epoch": 0.555726233145588,
"grad_norm": 0.7243123650550842,
"learning_rate": 2.4493133867169575e-06,
"loss": 0.6513,
"mean_token_accuracy": 0.8003964424133301,
"num_tokens": 53253425.0,
"step": 814
},
{
"epoch": 0.5564089435057177,
"grad_norm": 0.6894735097885132,
"learning_rate": 2.443351222810638e-06,
"loss": 0.5822,
"mean_token_accuracy": 0.8188735246658325,
"num_tokens": 53318734.0,
"step": 815
},
{
"epoch": 0.5570916538658475,
"grad_norm": 0.7273776531219482,
"learning_rate": 2.43738938124753e-06,
"loss": 0.6333,
"mean_token_accuracy": 0.8024957329034805,
"num_tokens": 53384270.0,
"step": 816
},
{
"epoch": 0.5577743642259771,
"grad_norm": 0.7011674046516418,
"learning_rate": 2.4314278959517425e-06,
"loss": 0.6148,
"mean_token_accuracy": 0.8078524768352509,
"num_tokens": 53448239.0,
"step": 817
},
{
"epoch": 0.5584570745861068,
"grad_norm": 0.7421677708625793,
"learning_rate": 2.4254668008453513e-06,
"loss": 0.6289,
"mean_token_accuracy": 0.806909829378128,
"num_tokens": 53513775.0,
"step": 818
},
{
"epoch": 0.5591397849462365,
"grad_norm": 0.7313380241394043,
"learning_rate": 2.419506129848218e-06,
"loss": 0.5929,
"mean_token_accuracy": 0.8127902001142502,
"num_tokens": 53579311.0,
"step": 819
},
{
"epoch": 0.5598224953063663,
"grad_norm": 0.7254424691200256,
"learning_rate": 2.4135459168777887e-06,
"loss": 0.6211,
"mean_token_accuracy": 0.8071694821119308,
"num_tokens": 53644847.0,
"step": 820
},
{
"epoch": 0.560505205666496,
"grad_norm": 0.7302435636520386,
"learning_rate": 2.407586195848901e-06,
"loss": 0.5977,
"mean_token_accuracy": 0.8155899941921234,
"num_tokens": 53710151.0,
"step": 821
},
{
"epoch": 0.5611879160266257,
"grad_norm": 0.7332073450088501,
"learning_rate": 2.4016270006735967e-06,
"loss": 0.6131,
"mean_token_accuracy": 0.8101572394371033,
"num_tokens": 53775637.0,
"step": 822
},
{
"epoch": 0.5618706263867554,
"grad_norm": 0.7568655014038086,
"learning_rate": 2.395668365260925e-06,
"loss": 0.6091,
"mean_token_accuracy": 0.8092314302921295,
"num_tokens": 53841173.0,
"step": 823
},
{
"epoch": 0.5625533367468851,
"grad_norm": 0.7010401487350464,
"learning_rate": 2.3897103235167465e-06,
"loss": 0.5522,
"mean_token_accuracy": 0.8247800469398499,
"num_tokens": 53906709.0,
"step": 824
},
{
"epoch": 0.5632360471070148,
"grad_norm": 0.7365289330482483,
"learning_rate": 2.383752909343547e-06,
"loss": 0.594,
"mean_token_accuracy": 0.8127911686897278,
"num_tokens": 53972052.0,
"step": 825
},
{
"epoch": 0.5639187574671446,
"grad_norm": 0.7445287108421326,
"learning_rate": 2.377796156640242e-06,
"loss": 0.6276,
"mean_token_accuracy": 0.8042827546596527,
"num_tokens": 54037588.0,
"step": 826
},
{
"epoch": 0.5646014678272743,
"grad_norm": 0.7611055970191956,
"learning_rate": 2.3718400993019793e-06,
"loss": 0.6262,
"mean_token_accuracy": 0.8061387240886688,
"num_tokens": 54102947.0,
"step": 827
},
{
"epoch": 0.565284178187404,
"grad_norm": 0.7284524440765381,
"learning_rate": 2.3658847712199524e-06,
"loss": 0.6047,
"mean_token_accuracy": 0.8119959682226181,
"num_tokens": 54168483.0,
"step": 828
},
{
"epoch": 0.5659668885475337,
"grad_norm": 0.7890021800994873,
"learning_rate": 2.359930206281207e-06,
"loss": 0.5932,
"mean_token_accuracy": 0.8162310272455215,
"num_tokens": 54233578.0,
"step": 829
},
{
"epoch": 0.5666495989076634,
"grad_norm": 0.7527608871459961,
"learning_rate": 2.3539764383684412e-06,
"loss": 0.5985,
"mean_token_accuracy": 0.8142717480659485,
"num_tokens": 54299114.0,
"step": 830
},
{
"epoch": 0.5673323092677931,
"grad_norm": 0.7443063855171204,
"learning_rate": 2.348023501359823e-06,
"loss": 0.6502,
"mean_token_accuracy": 0.7986772954463959,
"num_tokens": 54364650.0,
"step": 831
},
{
"epoch": 0.5680150196279229,
"grad_norm": 0.6920120716094971,
"learning_rate": 2.3420714291287905e-06,
"loss": 0.5764,
"mean_token_accuracy": 0.8207630664110184,
"num_tokens": 54430186.0,
"step": 832
},
{
"epoch": 0.5686977299880526,
"grad_norm": 0.7068764567375183,
"learning_rate": 2.3361202555438594e-06,
"loss": 0.5915,
"mean_token_accuracy": 0.8150812536478043,
"num_tokens": 54495722.0,
"step": 833
},
{
"epoch": 0.5693804403481822,
"grad_norm": 0.8468170762062073,
"learning_rate": 2.330170014468434e-06,
"loss": 0.6343,
"mean_token_accuracy": 0.8036412596702576,
"num_tokens": 54561258.0,
"step": 834
},
{
"epoch": 0.570063150708312,
"grad_norm": 0.6984657049179077,
"learning_rate": 2.3242207397606124e-06,
"loss": 0.5634,
"mean_token_accuracy": 0.8241946548223495,
"num_tokens": 54626564.0,
"step": 835
},
{
"epoch": 0.5707458610684417,
"grad_norm": 0.7546541690826416,
"learning_rate": 2.3182724652729922e-06,
"loss": 0.6029,
"mean_token_accuracy": 0.8130804002285004,
"num_tokens": 54692100.0,
"step": 836
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.7233748435974121,
"learning_rate": 2.312325224852481e-06,
"loss": 0.5796,
"mean_token_accuracy": 0.8184720128774643,
"num_tokens": 54757636.0,
"step": 837
},
{
"epoch": 0.5721112817887012,
"grad_norm": 0.7522431015968323,
"learning_rate": 2.3063790523401035e-06,
"loss": 0.6447,
"mean_token_accuracy": 0.7997826039791107,
"num_tokens": 54822970.0,
"step": 838
},
{
"epoch": 0.5727939921488309,
"grad_norm": 0.7364733815193176,
"learning_rate": 2.300433981570803e-06,
"loss": 0.5975,
"mean_token_accuracy": 0.8118279576301575,
"num_tokens": 54888506.0,
"step": 839
},
{
"epoch": 0.5734767025089605,
"grad_norm": 0.7409289479255676,
"learning_rate": 2.2944900463732594e-06,
"loss": 0.5998,
"mean_token_accuracy": 0.8123625367879868,
"num_tokens": 54954042.0,
"step": 840
},
{
"epoch": 0.5741594128690903,
"grad_norm": 0.7288930416107178,
"learning_rate": 2.2885472805696883e-06,
"loss": 0.5874,
"mean_token_accuracy": 0.8148674219846725,
"num_tokens": 55019578.0,
"step": 841
},
{
"epoch": 0.57484212322922,
"grad_norm": 0.7401166558265686,
"learning_rate": 2.28260571797565e-06,
"loss": 0.6255,
"mean_token_accuracy": 0.8048631548881531,
"num_tokens": 55085114.0,
"step": 842
},
{
"epoch": 0.5755248335893497,
"grad_norm": 0.7475305795669556,
"learning_rate": 2.2766653923998605e-06,
"loss": 0.6149,
"mean_token_accuracy": 0.8096132725477219,
"num_tokens": 55150650.0,
"step": 843
},
{
"epoch": 0.5762075439494795,
"grad_norm": 0.6970760226249695,
"learning_rate": 2.270726337643997e-06,
"loss": 0.6288,
"mean_token_accuracy": 0.8047256916761398,
"num_tokens": 55216186.0,
"step": 844
},
{
"epoch": 0.5768902543096092,
"grad_norm": 0.7151328325271606,
"learning_rate": 2.264788587502502e-06,
"loss": 0.5999,
"mean_token_accuracy": 0.8139662742614746,
"num_tokens": 55281722.0,
"step": 845
},
{
"epoch": 0.5775729646697388,
"grad_norm": 0.6835965514183044,
"learning_rate": 2.258852175762399e-06,
"loss": 0.6157,
"mean_token_accuracy": 0.8080445528030396,
"num_tokens": 55347183.0,
"step": 846
},
{
"epoch": 0.5782556750298686,
"grad_norm": 0.7275755405426025,
"learning_rate": 2.2529171362030943e-06,
"loss": 0.6363,
"mean_token_accuracy": 0.799471452832222,
"num_tokens": 55412714.0,
"step": 847
},
{
"epoch": 0.5789383853899983,
"grad_norm": 0.7379560470581055,
"learning_rate": 2.246983502596183e-06,
"loss": 0.6503,
"mean_token_accuracy": 0.7994582206010818,
"num_tokens": 55478058.0,
"step": 848
},
{
"epoch": 0.579621095750128,
"grad_norm": 0.7486806511878967,
"learning_rate": 2.241051308705265e-06,
"loss": 0.6229,
"mean_token_accuracy": 0.8074291348457336,
"num_tokens": 55543594.0,
"step": 849
},
{
"epoch": 0.5803038061102577,
"grad_norm": 0.6972367763519287,
"learning_rate": 2.235120588285746e-06,
"loss": 0.5471,
"mean_token_accuracy": 0.8273751586675644,
"num_tokens": 55608322.0,
"step": 850
},
{
"epoch": 0.5809865164703875,
"grad_norm": 0.7290424704551697,
"learning_rate": 2.229191375084644e-06,
"loss": 0.5765,
"mean_token_accuracy": 0.8169446438550949,
"num_tokens": 55673858.0,
"step": 851
},
{
"epoch": 0.5816692268305171,
"grad_norm": 0.7283189296722412,
"learning_rate": 2.223263702840406e-06,
"loss": 0.5646,
"mean_token_accuracy": 0.8223196864128113,
"num_tokens": 55739371.0,
"step": 852
},
{
"epoch": 0.5823519371906468,
"grad_norm": 0.7880806922912598,
"learning_rate": 2.2173376052827077e-06,
"loss": 0.6166,
"mean_token_accuracy": 0.807247519493103,
"num_tokens": 55804876.0,
"step": 853
},
{
"epoch": 0.5830346475507766,
"grad_norm": 0.7274592518806458,
"learning_rate": 2.2114131161322645e-06,
"loss": 0.6043,
"mean_token_accuracy": 0.8126132190227509,
"num_tokens": 55870223.0,
"step": 854
},
{
"epoch": 0.5837173579109063,
"grad_norm": 0.7590665221214294,
"learning_rate": 2.2054902691006407e-06,
"loss": 0.6013,
"mean_token_accuracy": 0.8119831830263138,
"num_tokens": 55935728.0,
"step": 855
},
{
"epoch": 0.584400068271036,
"grad_norm": 0.7268674373626709,
"learning_rate": 2.199569097890055e-06,
"loss": 0.5879,
"mean_token_accuracy": 0.8171060085296631,
"num_tokens": 56000505.0,
"step": 856
},
{
"epoch": 0.5850827786311658,
"grad_norm": 0.7419127821922302,
"learning_rate": 2.1936496361931935e-06,
"loss": 0.662,
"mean_token_accuracy": 0.7967375367879868,
"num_tokens": 56066041.0,
"step": 857
},
{
"epoch": 0.5857654889912954,
"grad_norm": 0.7568214535713196,
"learning_rate": 2.187731917693011e-06,
"loss": 0.5984,
"mean_token_accuracy": 0.8127596527338028,
"num_tokens": 56131577.0,
"step": 858
},
{
"epoch": 0.5864481993514251,
"grad_norm": 0.6971532702445984,
"learning_rate": 2.1818159760625444e-06,
"loss": 0.5897,
"mean_token_accuracy": 0.8156934976577759,
"num_tokens": 56197064.0,
"step": 859
},
{
"epoch": 0.5871309097115549,
"grad_norm": 0.7272664308547974,
"learning_rate": 2.1759018449647224e-06,
"loss": 0.5836,
"mean_token_accuracy": 0.8184851855039597,
"num_tokens": 56262485.0,
"step": 860
},
{
"epoch": 0.5878136200716846,
"grad_norm": 0.725836992263794,
"learning_rate": 2.1699895580521666e-06,
"loss": 0.6278,
"mean_token_accuracy": 0.804779976606369,
"num_tokens": 56327459.0,
"step": 861
},
{
"epoch": 0.5884963304318143,
"grad_norm": 0.7364003658294678,
"learning_rate": 2.164079148967009e-06,
"loss": 0.6447,
"mean_token_accuracy": 0.8013349324464798,
"num_tokens": 56392995.0,
"step": 862
},
{
"epoch": 0.589179040791944,
"grad_norm": 0.732744574546814,
"learning_rate": 2.158170651340696e-06,
"loss": 0.598,
"mean_token_accuracy": 0.8127138316631317,
"num_tokens": 56458531.0,
"step": 863
},
{
"epoch": 0.5898617511520737,
"grad_norm": 0.7075684070587158,
"learning_rate": 2.1522640987937937e-06,
"loss": 0.5823,
"mean_token_accuracy": 0.8167233616113663,
"num_tokens": 56523016.0,
"step": 864
},
{
"epoch": 0.5905444615122034,
"grad_norm": 0.7445662021636963,
"learning_rate": 2.146359524935804e-06,
"loss": 0.6338,
"mean_token_accuracy": 0.8040997236967087,
"num_tokens": 56588299.0,
"step": 865
},
{
"epoch": 0.5912271718723332,
"grad_norm": 0.7430678009986877,
"learning_rate": 2.1404569633649703e-06,
"loss": 0.58,
"mean_token_accuracy": 0.8178457915782928,
"num_tokens": 56653835.0,
"step": 866
},
{
"epoch": 0.5919098822324629,
"grad_norm": 0.7150656580924988,
"learning_rate": 2.1345564476680807e-06,
"loss": 0.5607,
"mean_token_accuracy": 0.8213276714086533,
"num_tokens": 56718903.0,
"step": 867
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.7253732085227966,
"learning_rate": 2.1286580114202866e-06,
"loss": 0.5935,
"mean_token_accuracy": 0.815096527338028,
"num_tokens": 56784439.0,
"step": 868
},
{
"epoch": 0.5932753029527223,
"grad_norm": 0.7164980173110962,
"learning_rate": 2.122761688184906e-06,
"loss": 0.5905,
"mean_token_accuracy": 0.815692201256752,
"num_tokens": 56849975.0,
"step": 869
},
{
"epoch": 0.593958013312852,
"grad_norm": 0.7268425822257996,
"learning_rate": 2.1168675115132317e-06,
"loss": 0.6602,
"mean_token_accuracy": 0.796783372759819,
"num_tokens": 56915511.0,
"step": 870
},
{
"epoch": 0.5946407236729817,
"grad_norm": 0.7295462489128113,
"learning_rate": 2.1109755149443436e-06,
"loss": 0.5928,
"mean_token_accuracy": 0.8162573277950287,
"num_tokens": 56981047.0,
"step": 871
},
{
"epoch": 0.5953234340331115,
"grad_norm": 0.7488856315612793,
"learning_rate": 2.1050857320049165e-06,
"loss": 0.6352,
"mean_token_accuracy": 0.8048478811979294,
"num_tokens": 57046583.0,
"step": 872
},
{
"epoch": 0.5960061443932412,
"grad_norm": 0.7265530824661255,
"learning_rate": 2.099198196209027e-06,
"loss": 0.627,
"mean_token_accuracy": 0.8041452914476395,
"num_tokens": 57112119.0,
"step": 873
},
{
"epoch": 0.5966888547533709,
"grad_norm": 0.7198655009269714,
"learning_rate": 2.093312941057968e-06,
"loss": 0.6357,
"mean_token_accuracy": 0.8020598292350769,
"num_tokens": 57177223.0,
"step": 874
},
{
"epoch": 0.5973715651135006,
"grad_norm": 0.7067784070968628,
"learning_rate": 2.0874300000400546e-06,
"loss": 0.6042,
"mean_token_accuracy": 0.8123319894075394,
"num_tokens": 57242759.0,
"step": 875
},
{
"epoch": 0.5980542754736303,
"grad_norm": 0.7130993604660034,
"learning_rate": 2.0815494066304307e-06,
"loss": 0.5653,
"mean_token_accuracy": 0.8243982046842575,
"num_tokens": 57308295.0,
"step": 876
},
{
"epoch": 0.59873698583376,
"grad_norm": 0.7170623540878296,
"learning_rate": 2.0756711942908853e-06,
"loss": 0.5961,
"mean_token_accuracy": 0.8153766393661499,
"num_tokens": 57373249.0,
"step": 877
},
{
"epoch": 0.5994196961938898,
"grad_norm": 0.744687020778656,
"learning_rate": 2.069795396469659e-06,
"loss": 0.5967,
"mean_token_accuracy": 0.8129734843969345,
"num_tokens": 57438785.0,
"step": 878
},
{
"epoch": 0.6001024065540195,
"grad_norm": 0.7068538665771484,
"learning_rate": 2.063922046601249e-06,
"loss": 0.5869,
"mean_token_accuracy": 0.8159824013710022,
"num_tokens": 57504321.0,
"step": 879
},
{
"epoch": 0.6007851169141492,
"grad_norm": 0.6912579536437988,
"learning_rate": 2.058051178106228e-06,
"loss": 0.5619,
"mean_token_accuracy": 0.8215725719928741,
"num_tokens": 57569857.0,
"step": 880
},
{
"epoch": 0.6014678272742788,
"grad_norm": 0.7329758405685425,
"learning_rate": 2.0521828243910476e-06,
"loss": 0.5746,
"mean_token_accuracy": 0.8185483813285828,
"num_tokens": 57635393.0,
"step": 881
},
{
"epoch": 0.6021505376344086,
"grad_norm": 0.7120118141174316,
"learning_rate": 2.0463170188478473e-06,
"loss": 0.5934,
"mean_token_accuracy": 0.8147895932197571,
"num_tokens": 57700789.0,
"step": 882
},
{
"epoch": 0.6028332479945383,
"grad_norm": 0.7233956456184387,
"learning_rate": 2.0404537948542697e-06,
"loss": 0.5776,
"mean_token_accuracy": 0.8192051500082016,
"num_tokens": 57766325.0,
"step": 883
},
{
"epoch": 0.603515958354668,
"grad_norm": 0.7008931040763855,
"learning_rate": 2.0345931857732688e-06,
"loss": 0.6225,
"mean_token_accuracy": 0.8070472925901413,
"num_tokens": 57831861.0,
"step": 884
},
{
"epoch": 0.6041986687147978,
"grad_norm": 0.7301531434059143,
"learning_rate": 2.0287352249529155e-06,
"loss": 0.6238,
"mean_token_accuracy": 0.8049700707197189,
"num_tokens": 57897397.0,
"step": 885
},
{
"epoch": 0.6048813790749274,
"grad_norm": 0.7681100964546204,
"learning_rate": 2.0228799457262144e-06,
"loss": 0.6269,
"mean_token_accuracy": 0.8048326075077057,
"num_tokens": 57962933.0,
"step": 886
},
{
"epoch": 0.6055640894350571,
"grad_norm": 0.7019287347793579,
"learning_rate": 2.017027381410912e-06,
"loss": 0.5653,
"mean_token_accuracy": 0.8218242228031158,
"num_tokens": 58028420.0,
"step": 887
},
{
"epoch": 0.6062467997951869,
"grad_norm": 0.6995770931243896,
"learning_rate": 2.011177565309302e-06,
"loss": 0.6069,
"mean_token_accuracy": 0.8123671561479568,
"num_tokens": 58093581.0,
"step": 888
},
{
"epoch": 0.6069295101553166,
"grad_norm": 0.7418209314346313,
"learning_rate": 2.0053305307080447e-06,
"loss": 0.6059,
"mean_token_accuracy": 0.8123618960380554,
"num_tokens": 58159091.0,
"step": 889
},
{
"epoch": 0.6076122205154463,
"grad_norm": 0.8020544052124023,
"learning_rate": 1.9994863108779723e-06,
"loss": 0.6168,
"mean_token_accuracy": 0.8074444085359573,
"num_tokens": 58224627.0,
"step": 890
},
{
"epoch": 0.6082949308755761,
"grad_norm": 0.7087436318397522,
"learning_rate": 1.9936449390738976e-06,
"loss": 0.6054,
"mean_token_accuracy": 0.8124083578586578,
"num_tokens": 58290163.0,
"step": 891
},
{
"epoch": 0.6089776412357057,
"grad_norm": 0.7432106733322144,
"learning_rate": 1.9878064485344307e-06,
"loss": 0.6097,
"mean_token_accuracy": 0.811003178358078,
"num_tokens": 58355699.0,
"step": 892
},
{
"epoch": 0.6096603515958354,
"grad_norm": 0.7401469349861145,
"learning_rate": 1.981970872481787e-06,
"loss": 0.6295,
"mean_token_accuracy": 0.803007110953331,
"num_tokens": 58421022.0,
"step": 893
},
{
"epoch": 0.6103430619559652,
"grad_norm": 0.704863429069519,
"learning_rate": 1.9761382441215927e-06,
"loss": 0.593,
"mean_token_accuracy": 0.8138948082923889,
"num_tokens": 58485866.0,
"step": 894
},
{
"epoch": 0.6110257723160949,
"grad_norm": 0.678066611289978,
"learning_rate": 1.9703085966427077e-06,
"loss": 0.5684,
"mean_token_accuracy": 0.821878045797348,
"num_tokens": 58551402.0,
"step": 895
},
{
"epoch": 0.6117084826762246,
"grad_norm": 0.7411065697669983,
"learning_rate": 1.9644819632170267e-06,
"loss": 0.6253,
"mean_token_accuracy": 0.8053397685289383,
"num_tokens": 58616897.0,
"step": 896
},
{
"epoch": 0.6123911930363544,
"grad_norm": 0.7031525373458862,
"learning_rate": 1.9586583769992924e-06,
"loss": 0.5742,
"mean_token_accuracy": 0.8187011182308197,
"num_tokens": 58682433.0,
"step": 897
},
{
"epoch": 0.613073903396484,
"grad_norm": 0.749374508857727,
"learning_rate": 1.952837871126912e-06,
"loss": 0.6343,
"mean_token_accuracy": 0.8025873750448227,
"num_tokens": 58747969.0,
"step": 898
},
{
"epoch": 0.6137566137566137,
"grad_norm": 0.7202054262161255,
"learning_rate": 1.947020478719763e-06,
"loss": 0.6213,
"mean_token_accuracy": 0.8076076060533524,
"num_tokens": 58812802.0,
"step": 899
},
{
"epoch": 0.6144393241167435,
"grad_norm": 0.7742454409599304,
"learning_rate": 1.9412062328800044e-06,
"loss": 0.6569,
"mean_token_accuracy": 0.7975776046514511,
"num_tokens": 58878338.0,
"step": 900
},
{
"epoch": 0.6151220344768732,
"grad_norm": 0.7440277338027954,
"learning_rate": 1.9353951666918957e-06,
"loss": 0.6077,
"mean_token_accuracy": 0.808162271976471,
"num_tokens": 58943874.0,
"step": 901
},
{
"epoch": 0.6158047448370029,
"grad_norm": 0.7421770095825195,
"learning_rate": 1.929587313221599e-06,
"loss": 0.603,
"mean_token_accuracy": 0.8112628310918808,
"num_tokens": 59009410.0,
"step": 902
},
{
"epoch": 0.6164874551971327,
"grad_norm": 0.7264229655265808,
"learning_rate": 1.9237827055169963e-06,
"loss": 0.6386,
"mean_token_accuracy": 0.8024193644523621,
"num_tokens": 59074946.0,
"step": 903
},
{
"epoch": 0.6171701655572623,
"grad_norm": 0.7455774545669556,
"learning_rate": 1.917981376607502e-06,
"loss": 0.6415,
"mean_token_accuracy": 0.8024804592132568,
"num_tokens": 59140482.0,
"step": 904
},
{
"epoch": 0.617852875917392,
"grad_norm": 0.7394263744354248,
"learning_rate": 1.912183359503873e-06,
"loss": 0.5846,
"mean_token_accuracy": 0.8150625228881836,
"num_tokens": 59205527.0,
"step": 905
},
{
"epoch": 0.6185355862775218,
"grad_norm": 0.7652600407600403,
"learning_rate": 1.9063886871980215e-06,
"loss": 0.6415,
"mean_token_accuracy": 0.8018143624067307,
"num_tokens": 59270685.0,
"step": 906
},
{
"epoch": 0.6192182966376515,
"grad_norm": 0.7579291462898254,
"learning_rate": 1.9005973926628256e-06,
"loss": 0.6439,
"mean_token_accuracy": 0.8011413216590881,
"num_tokens": 59336075.0,
"step": 907
},
{
"epoch": 0.6199010069977812,
"grad_norm": 0.6949485540390015,
"learning_rate": 1.894809508851944e-06,
"loss": 0.5629,
"mean_token_accuracy": 0.823924720287323,
"num_tokens": 59401611.0,
"step": 908
},
{
"epoch": 0.620583717357911,
"grad_norm": 0.7305551767349243,
"learning_rate": 1.8890250686996299e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.806849792599678,
"num_tokens": 59466961.0,
"step": 909
},
{
"epoch": 0.6212664277180406,
"grad_norm": 0.7133076190948486,
"learning_rate": 1.8832441051205366e-06,
"loss": 0.6064,
"mean_token_accuracy": 0.8111100941896439,
"num_tokens": 59532497.0,
"step": 910
},
{
"epoch": 0.6219491380781703,
"grad_norm": 0.7591602802276611,
"learning_rate": 1.8774666510095394e-06,
"loss": 0.6046,
"mean_token_accuracy": 0.8109726309776306,
"num_tokens": 59598033.0,
"step": 911
},
{
"epoch": 0.6226318484383001,
"grad_norm": 0.7252746820449829,
"learning_rate": 1.871692739241543e-06,
"loss": 0.6375,
"mean_token_accuracy": 0.8006934374570847,
"num_tokens": 59663569.0,
"step": 912
},
{
"epoch": 0.6233145587984298,
"grad_norm": 0.7542442083358765,
"learning_rate": 1.8659224026712926e-06,
"loss": 0.6207,
"mean_token_accuracy": 0.8062988817691803,
"num_tokens": 59729105.0,
"step": 913
},
{
"epoch": 0.6239972691585595,
"grad_norm": 0.6936948299407959,
"learning_rate": 1.860155674133194e-06,
"loss": 0.5966,
"mean_token_accuracy": 0.815292090177536,
"num_tokens": 59794521.0,
"step": 914
},
{
"epoch": 0.6246799795186891,
"grad_norm": 0.7454006671905518,
"learning_rate": 1.8543925864411203e-06,
"loss": 0.6305,
"mean_token_accuracy": 0.8039772808551788,
"num_tokens": 59860057.0,
"step": 915
},
{
"epoch": 0.6253626898788189,
"grad_norm": 0.7165136933326721,
"learning_rate": 1.8486331723882261e-06,
"loss": 0.5786,
"mean_token_accuracy": 0.8195000439882278,
"num_tokens": 59925480.0,
"step": 916
},
{
"epoch": 0.6260454002389486,
"grad_norm": 0.7344151139259338,
"learning_rate": 1.8428774647467654e-06,
"loss": 0.6229,
"mean_token_accuracy": 0.804817333817482,
"num_tokens": 59991016.0,
"step": 917
},
{
"epoch": 0.6267281105990783,
"grad_norm": 0.7066066861152649,
"learning_rate": 1.8371254962679008e-06,
"loss": 0.6018,
"mean_token_accuracy": 0.8117974102497101,
"num_tokens": 60056552.0,
"step": 918
},
{
"epoch": 0.6274108209592081,
"grad_norm": 0.7304503321647644,
"learning_rate": 1.831377299681516e-06,
"loss": 0.599,
"mean_token_accuracy": 0.8132178634405136,
"num_tokens": 60122088.0,
"step": 919
},
{
"epoch": 0.6280935313193378,
"grad_norm": 0.7016748785972595,
"learning_rate": 1.8256329076960345e-06,
"loss": 0.5527,
"mean_token_accuracy": 0.8235010355710983,
"num_tokens": 60187603.0,
"step": 920
},
{
"epoch": 0.6287762416794674,
"grad_norm": 0.7145335674285889,
"learning_rate": 1.8198923529982314e-06,
"loss": 0.5926,
"mean_token_accuracy": 0.8130956739187241,
"num_tokens": 60253139.0,
"step": 921
},
{
"epoch": 0.6294589520395972,
"grad_norm": 0.7018191814422607,
"learning_rate": 1.8141556682530437e-06,
"loss": 0.5884,
"mean_token_accuracy": 0.8140273690223694,
"num_tokens": 60318675.0,
"step": 922
},
{
"epoch": 0.6301416623997269,
"grad_norm": 0.7363874316215515,
"learning_rate": 1.8084228861033898e-06,
"loss": 0.5798,
"mean_token_accuracy": 0.8173789381980896,
"num_tokens": 60383787.0,
"step": 923
},
{
"epoch": 0.6308243727598566,
"grad_norm": 0.7196556925773621,
"learning_rate": 1.8026940391699826e-06,
"loss": 0.6073,
"mean_token_accuracy": 0.8098729252815247,
"num_tokens": 60449323.0,
"step": 924
},
{
"epoch": 0.6315070831199864,
"grad_norm": 0.7288717031478882,
"learning_rate": 1.796969160051139e-06,
"loss": 0.6005,
"mean_token_accuracy": 0.811950147151947,
"num_tokens": 60514859.0,
"step": 925
},
{
"epoch": 0.6321897934801161,
"grad_norm": 0.686795711517334,
"learning_rate": 1.7912482813226018e-06,
"loss": 0.6087,
"mean_token_accuracy": 0.8124847114086151,
"num_tokens": 60580395.0,
"step": 926
},
{
"epoch": 0.6328725038402457,
"grad_norm": 0.7237222194671631,
"learning_rate": 1.78553143553735e-06,
"loss": 0.5973,
"mean_token_accuracy": 0.8137066215276718,
"num_tokens": 60645931.0,
"step": 927
},
{
"epoch": 0.6335552142003755,
"grad_norm": 0.7238149642944336,
"learning_rate": 1.779818655225412e-06,
"loss": 0.5694,
"mean_token_accuracy": 0.8201062977313995,
"num_tokens": 60711467.0,
"step": 928
},
{
"epoch": 0.6342379245605052,
"grad_norm": 0.7108457088470459,
"learning_rate": 1.774109972893686e-06,
"loss": 0.6072,
"mean_token_accuracy": 0.8110642731189728,
"num_tokens": 60777003.0,
"step": 929
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.7974938750267029,
"learning_rate": 1.7684054210257517e-06,
"loss": 0.6173,
"mean_token_accuracy": 0.806986540555954,
"num_tokens": 60842344.0,
"step": 930
},
{
"epoch": 0.6356033452807647,
"grad_norm": 0.7198942303657532,
"learning_rate": 1.7627050320816814e-06,
"loss": 0.5821,
"mean_token_accuracy": 0.8179832547903061,
"num_tokens": 60907880.0,
"step": 931
},
{
"epoch": 0.6362860556408944,
"grad_norm": 0.7319303154945374,
"learning_rate": 1.7570088384978639e-06,
"loss": 0.6081,
"mean_token_accuracy": 0.8093383461236954,
"num_tokens": 60973416.0,
"step": 932
},
{
"epoch": 0.636968766001024,
"grad_norm": 0.7228114008903503,
"learning_rate": 1.7513168726868157e-06,
"loss": 0.6258,
"mean_token_accuracy": 0.808112844824791,
"num_tokens": 61038123.0,
"step": 933
},
{
"epoch": 0.6376514763611538,
"grad_norm": 0.7491247057914734,
"learning_rate": 1.7456291670369917e-06,
"loss": 0.5901,
"mean_token_accuracy": 0.8173221349716187,
"num_tokens": 61103617.0,
"step": 934
},
{
"epoch": 0.6383341867212835,
"grad_norm": 0.7460877299308777,
"learning_rate": 1.7399457539126114e-06,
"loss": 0.6724,
"mean_token_accuracy": 0.7928316593170166,
"num_tokens": 61168856.0,
"step": 935
},
{
"epoch": 0.6390168970814132,
"grad_norm": 0.7103235125541687,
"learning_rate": 1.7342666656534658e-06,
"loss": 0.6028,
"mean_token_accuracy": 0.8120417892932892,
"num_tokens": 61234392.0,
"step": 936
},
{
"epoch": 0.639699607441543,
"grad_norm": 0.6912817358970642,
"learning_rate": 1.7285919345747352e-06,
"loss": 0.5863,
"mean_token_accuracy": 0.8152818083763123,
"num_tokens": 61299918.0,
"step": 937
},
{
"epoch": 0.6403823178016727,
"grad_norm": 0.7359888553619385,
"learning_rate": 1.7229215929668098e-06,
"loss": 0.6297,
"mean_token_accuracy": 0.8056879341602325,
"num_tokens": 61365454.0,
"step": 938
},
{
"epoch": 0.6410650281618023,
"grad_norm": 0.7430459260940552,
"learning_rate": 1.7172556730951028e-06,
"loss": 0.6426,
"mean_token_accuracy": 0.7991660684347153,
"num_tokens": 61430990.0,
"step": 939
},
{
"epoch": 0.6417477385219321,
"grad_norm": 0.7271755337715149,
"learning_rate": 1.7115942071998628e-06,
"loss": 0.6198,
"mean_token_accuracy": 0.807865634560585,
"num_tokens": 61496420.0,
"step": 940
},
{
"epoch": 0.6424304488820618,
"grad_norm": 0.73670893907547,
"learning_rate": 1.7059372274959984e-06,
"loss": 0.6409,
"mean_token_accuracy": 0.8009095340967178,
"num_tokens": 61561952.0,
"step": 941
},
{
"epoch": 0.6431131592421915,
"grad_norm": 0.6874427199363708,
"learning_rate": 1.7002847661728905e-06,
"loss": 0.5671,
"mean_token_accuracy": 0.8194037079811096,
"num_tokens": 61627488.0,
"step": 942
},
{
"epoch": 0.6437958696023213,
"grad_norm": 0.7189149260520935,
"learning_rate": 1.6946368553942062e-06,
"loss": 0.5826,
"mean_token_accuracy": 0.8184414654970169,
"num_tokens": 61693024.0,
"step": 943
},
{
"epoch": 0.644478579962451,
"grad_norm": 0.7161331176757812,
"learning_rate": 1.6889935272977225e-06,
"loss": 0.601,
"mean_token_accuracy": 0.8121945261955261,
"num_tokens": 61758560.0,
"step": 944
},
{
"epoch": 0.6451612903225806,
"grad_norm": 0.7162284851074219,
"learning_rate": 1.6833548139951398e-06,
"loss": 0.6452,
"mean_token_accuracy": 0.7995644211769104,
"num_tokens": 61823760.0,
"step": 945
},
{
"epoch": 0.6458440006827103,
"grad_norm": 0.7503809928894043,
"learning_rate": 1.6777207475718961e-06,
"loss": 0.5762,
"mean_token_accuracy": 0.8186247497797012,
"num_tokens": 61889296.0,
"step": 946
},
{
"epoch": 0.6465267110428401,
"grad_norm": 0.7211571335792542,
"learning_rate": 1.6720913600869914e-06,
"loss": 0.6211,
"mean_token_accuracy": 0.8063294291496277,
"num_tokens": 61954832.0,
"step": 947
},
{
"epoch": 0.6472094214029698,
"grad_norm": 0.7424342632293701,
"learning_rate": 1.6664666835728014e-06,
"loss": 0.5955,
"mean_token_accuracy": 0.8143786638975143,
"num_tokens": 62020368.0,
"step": 948
},
{
"epoch": 0.6478921317630995,
"grad_norm": 0.7574367523193359,
"learning_rate": 1.6608467500348912e-06,
"loss": 0.581,
"mean_token_accuracy": 0.8168988227844238,
"num_tokens": 62085904.0,
"step": 949
},
{
"epoch": 0.6485748421232292,
"grad_norm": 0.7286249399185181,
"learning_rate": 1.655231591451843e-06,
"loss": 0.6212,
"mean_token_accuracy": 0.8064210712909698,
"num_tokens": 62151440.0,
"step": 950
},
{
"epoch": 0.6492575524833589,
"grad_norm": 0.7372191548347473,
"learning_rate": 1.6496212397750643e-06,
"loss": 0.6129,
"mean_token_accuracy": 0.8075648546218872,
"num_tokens": 62216902.0,
"step": 951
},
{
"epoch": 0.6499402628434886,
"grad_norm": 0.6884051561355591,
"learning_rate": 1.6440157269286123e-06,
"loss": 0.5711,
"mean_token_accuracy": 0.8208394348621368,
"num_tokens": 62282438.0,
"step": 952
},
{
"epoch": 0.6506229732036184,
"grad_norm": 0.6881023645401001,
"learning_rate": 1.6384150848090102e-06,
"loss": 0.5868,
"mean_token_accuracy": 0.8153561800718307,
"num_tokens": 62347974.0,
"step": 953
},
{
"epoch": 0.6513056835637481,
"grad_norm": 0.7258963584899902,
"learning_rate": 1.632819345285065e-06,
"loss": 0.6304,
"mean_token_accuracy": 0.8034427016973495,
"num_tokens": 62413510.0,
"step": 954
},
{
"epoch": 0.6519883939238778,
"grad_norm": 0.7467467188835144,
"learning_rate": 1.6272285401976878e-06,
"loss": 0.5642,
"mean_token_accuracy": 0.8224889934062958,
"num_tokens": 62479046.0,
"step": 955
},
{
"epoch": 0.6526711042840075,
"grad_norm": 0.7235984206199646,
"learning_rate": 1.6216427013597108e-06,
"loss": 0.6214,
"mean_token_accuracy": 0.8049242496490479,
"num_tokens": 62544582.0,
"step": 956
},
{
"epoch": 0.6533538146441372,
"grad_norm": 0.7176678776741028,
"learning_rate": 1.6160618605557076e-06,
"loss": 0.594,
"mean_token_accuracy": 0.8146688640117645,
"num_tokens": 62610118.0,
"step": 957
},
{
"epoch": 0.6540365250042669,
"grad_norm": 0.6972813606262207,
"learning_rate": 1.6104860495418135e-06,
"loss": 0.6246,
"mean_token_accuracy": 0.806063175201416,
"num_tokens": 62674835.0,
"step": 958
},
{
"epoch": 0.6547192353643967,
"grad_norm": 0.7483172416687012,
"learning_rate": 1.6049153000455396e-06,
"loss": 0.5686,
"mean_token_accuracy": 0.8201521188020706,
"num_tokens": 62740371.0,
"step": 959
},
{
"epoch": 0.6554019457245264,
"grad_norm": 0.7193106412887573,
"learning_rate": 1.5993496437655988e-06,
"loss": 0.5832,
"mean_token_accuracy": 0.8177759349346161,
"num_tokens": 62805898.0,
"step": 960
},
{
"epoch": 0.656084656084656,
"grad_norm": 0.7257005572319031,
"learning_rate": 1.5937891123717236e-06,
"loss": 0.6143,
"mean_token_accuracy": 0.8085288405418396,
"num_tokens": 62871434.0,
"step": 961
},
{
"epoch": 0.6567673664447858,
"grad_norm": 0.7278813719749451,
"learning_rate": 1.5882337375044803e-06,
"loss": 0.6099,
"mean_token_accuracy": 0.8087999373674393,
"num_tokens": 62936156.0,
"step": 962
},
{
"epoch": 0.6574500768049155,
"grad_norm": 0.6961346864700317,
"learning_rate": 1.5826835507750984e-06,
"loss": 0.6202,
"mean_token_accuracy": 0.8076887875795364,
"num_tokens": 63001692.0,
"step": 963
},
{
"epoch": 0.6581327871650452,
"grad_norm": 0.7237011790275574,
"learning_rate": 1.5771385837652839e-06,
"loss": 0.6107,
"mean_token_accuracy": 0.8089922368526459,
"num_tokens": 63066839.0,
"step": 964
},
{
"epoch": 0.658815497525175,
"grad_norm": 0.7264211177825928,
"learning_rate": 1.5715988680270394e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.8053977340459824,
"num_tokens": 63132375.0,
"step": 965
},
{
"epoch": 0.6594982078853047,
"grad_norm": 0.7212889194488525,
"learning_rate": 1.56606443508249e-06,
"loss": 0.5914,
"mean_token_accuracy": 0.8170210123062134,
"num_tokens": 63197911.0,
"step": 966
},
{
"epoch": 0.6601809182454343,
"grad_norm": 0.7629103660583496,
"learning_rate": 1.5605353164237002e-06,
"loss": 0.6252,
"mean_token_accuracy": 0.8060850501060486,
"num_tokens": 63263447.0,
"step": 967
},
{
"epoch": 0.6608636286055641,
"grad_norm": 0.7188578844070435,
"learning_rate": 1.5550115435124922e-06,
"loss": 0.5945,
"mean_token_accuracy": 0.8150165379047394,
"num_tokens": 63327993.0,
"step": 968
},
{
"epoch": 0.6615463389656938,
"grad_norm": 0.7305589318275452,
"learning_rate": 1.5494931477802725e-06,
"loss": 0.6028,
"mean_token_accuracy": 0.8119195997714996,
"num_tokens": 63393529.0,
"step": 969
},
{
"epoch": 0.6622290493258235,
"grad_norm": 0.720365047454834,
"learning_rate": 1.5439801606278509e-06,
"loss": 0.6045,
"mean_token_accuracy": 0.810316875576973,
"num_tokens": 63458319.0,
"step": 970
},
{
"epoch": 0.6629117596859533,
"grad_norm": 0.713546097278595,
"learning_rate": 1.5384726134252578e-06,
"loss": 0.5899,
"mean_token_accuracy": 0.8163031488656998,
"num_tokens": 63523855.0,
"step": 971
},
{
"epoch": 0.663594470046083,
"grad_norm": 0.6926174759864807,
"learning_rate": 1.5329705375115727e-06,
"loss": 0.5968,
"mean_token_accuracy": 0.8157380223274231,
"num_tokens": 63589391.0,
"step": 972
},
{
"epoch": 0.6642771804062126,
"grad_norm": 0.7352408766746521,
"learning_rate": 1.5274739641947418e-06,
"loss": 0.6154,
"mean_token_accuracy": 0.8105907887220383,
"num_tokens": 63654927.0,
"step": 973
},
{
"epoch": 0.6649598907663424,
"grad_norm": 0.6985450983047485,
"learning_rate": 1.521982924751397e-06,
"loss": 0.6176,
"mean_token_accuracy": 0.8080248087644577,
"num_tokens": 63720463.0,
"step": 974
},
{
"epoch": 0.6656426011264721,
"grad_norm": 0.6925053000450134,
"learning_rate": 1.5164974504266861e-06,
"loss": 0.6056,
"mean_token_accuracy": 0.8100867569446564,
"num_tokens": 63785999.0,
"step": 975
},
{
"epoch": 0.6663253114866018,
"grad_norm": 0.7355689406394958,
"learning_rate": 1.511017572434088e-06,
"loss": 0.6187,
"mean_token_accuracy": 0.8089259564876556,
"num_tokens": 63851535.0,
"step": 976
},
{
"epoch": 0.6670080218467315,
"grad_norm": 0.7681476473808289,
"learning_rate": 1.5055433219552356e-06,
"loss": 0.6592,
"mean_token_accuracy": 0.7974248677492142,
"num_tokens": 63917071.0,
"step": 977
},
{
"epoch": 0.6676907322068613,
"grad_norm": 0.7094239592552185,
"learning_rate": 1.5000747301397434e-06,
"loss": 0.5903,
"mean_token_accuracy": 0.8155722469091415,
"num_tokens": 63981986.0,
"step": 978
},
{
"epoch": 0.6683734425669909,
"grad_norm": 0.7052043676376343,
"learning_rate": 1.494611828105026e-06,
"loss": 0.5933,
"mean_token_accuracy": 0.8148674219846725,
"num_tokens": 64047522.0,
"step": 979
},
{
"epoch": 0.6690561529271206,
"grad_norm": 0.7070150375366211,
"learning_rate": 1.489154646936119e-06,
"loss": 0.6201,
"mean_token_accuracy": 0.8075665831565857,
"num_tokens": 64113058.0,
"step": 980
},
{
"epoch": 0.6697388632872504,
"grad_norm": 0.7390539050102234,
"learning_rate": 1.483703217685509e-06,
"loss": 0.6184,
"mean_token_accuracy": 0.8068029135465622,
"num_tokens": 64178594.0,
"step": 981
},
{
"epoch": 0.6704215736473801,
"grad_norm": 0.7140552997589111,
"learning_rate": 1.4782575713729522e-06,
"loss": 0.6125,
"mean_token_accuracy": 0.8070817142724991,
"num_tokens": 64243854.0,
"step": 982
},
{
"epoch": 0.6711042840075098,
"grad_norm": 0.6927697658538818,
"learning_rate": 1.472817738985296e-06,
"loss": 0.5814,
"mean_token_accuracy": 0.8188707530498505,
"num_tokens": 64308929.0,
"step": 983
},
{
"epoch": 0.6717869943676396,
"grad_norm": 0.7148230075836182,
"learning_rate": 1.4673837514763082e-06,
"loss": 0.6,
"mean_token_accuracy": 0.8128818422555923,
"num_tokens": 64374465.0,
"step": 984
},
{
"epoch": 0.6724697047277692,
"grad_norm": 0.7072917819023132,
"learning_rate": 1.461955639766498e-06,
"loss": 0.5879,
"mean_token_accuracy": 0.8144537210464478,
"num_tokens": 64439849.0,
"step": 985
},
{
"epoch": 0.6731524150878989,
"grad_norm": 0.7203112840652466,
"learning_rate": 1.4565334347429378e-06,
"loss": 0.5921,
"mean_token_accuracy": 0.8115013837814331,
"num_tokens": 64505313.0,
"step": 986
},
{
"epoch": 0.6738351254480287,
"grad_norm": 0.7090385556221008,
"learning_rate": 1.4511171672590924e-06,
"loss": 0.5915,
"mean_token_accuracy": 0.8125305473804474,
"num_tokens": 64570849.0,
"step": 987
},
{
"epoch": 0.6745178358081584,
"grad_norm": 0.73131263256073,
"learning_rate": 1.4457068681346388e-06,
"loss": 0.61,
"mean_token_accuracy": 0.8091856092214584,
"num_tokens": 64636385.0,
"step": 988
},
{
"epoch": 0.6752005461682881,
"grad_norm": 0.7277146577835083,
"learning_rate": 1.4403025681552937e-06,
"loss": 0.6139,
"mean_token_accuracy": 0.8073527663946152,
"num_tokens": 64701921.0,
"step": 989
},
{
"epoch": 0.6758832565284179,
"grad_norm": 0.7310377955436707,
"learning_rate": 1.4349042980726364e-06,
"loss": 0.6019,
"mean_token_accuracy": 0.811369001865387,
"num_tokens": 64767230.0,
"step": 990
},
{
"epoch": 0.6765659668885475,
"grad_norm": 0.7303698658943176,
"learning_rate": 1.4295120886039388e-06,
"loss": 0.5969,
"mean_token_accuracy": 0.8156616538763046,
"num_tokens": 64832766.0,
"step": 991
},
{
"epoch": 0.6772486772486772,
"grad_norm": 0.7154099941253662,
"learning_rate": 1.4241259704319792e-06,
"loss": 0.5901,
"mean_token_accuracy": 0.8163703680038452,
"num_tokens": 64898166.0,
"step": 992
},
{
"epoch": 0.677931387608807,
"grad_norm": 0.7075607180595398,
"learning_rate": 1.418745974204883e-06,
"loss": 0.5843,
"mean_token_accuracy": 0.8173753768205643,
"num_tokens": 64963348.0,
"step": 993
},
{
"epoch": 0.6786140979689367,
"grad_norm": 0.7342114448547363,
"learning_rate": 1.413372130535936e-06,
"loss": 0.5549,
"mean_token_accuracy": 0.8245428204536438,
"num_tokens": 65028831.0,
"step": 994
},
{
"epoch": 0.6792968083290664,
"grad_norm": 0.7262970805168152,
"learning_rate": 1.4080044700034157e-06,
"loss": 0.6546,
"mean_token_accuracy": 0.7953476309776306,
"num_tokens": 65094367.0,
"step": 995
},
{
"epoch": 0.6799795186891961,
"grad_norm": 0.7255319952964783,
"learning_rate": 1.4026430231504149e-06,
"loss": 0.6022,
"mean_token_accuracy": 0.8116804212331772,
"num_tokens": 65159591.0,
"step": 996
},
{
"epoch": 0.6806622290493258,
"grad_norm": 0.7310954928398132,
"learning_rate": 1.3972878204846738e-06,
"loss": 0.6267,
"mean_token_accuracy": 0.8056421130895615,
"num_tokens": 65225127.0,
"step": 997
},
{
"epoch": 0.6813449394094555,
"grad_norm": 0.7071923017501831,
"learning_rate": 1.391938892478395e-06,
"loss": 0.5951,
"mean_token_accuracy": 0.8134410679340363,
"num_tokens": 65290355.0,
"step": 998
},
{
"epoch": 0.6820276497695853,
"grad_norm": 0.710976779460907,
"learning_rate": 1.3865962695680837e-06,
"loss": 0.5996,
"mean_token_accuracy": 0.8111506998538971,
"num_tokens": 65355655.0,
"step": 999
},
{
"epoch": 0.682710360129715,
"grad_norm": 0.7076782584190369,
"learning_rate": 1.3812599821543638e-06,
"loss": 0.6007,
"mean_token_accuracy": 0.8128324896097183,
"num_tokens": 65420897.0,
"step": 1000
},
{
"epoch": 0.6833930704898447,
"grad_norm": 0.7270994186401367,
"learning_rate": 1.37593006060181e-06,
"loss": 0.6098,
"mean_token_accuracy": 0.8112322837114334,
"num_tokens": 65486433.0,
"step": 1001
},
{
"epoch": 0.6840757808499744,
"grad_norm": 0.6897610425949097,
"learning_rate": 1.3706065352387726e-06,
"loss": 0.5808,
"mean_token_accuracy": 0.8184567391872406,
"num_tokens": 65551969.0,
"step": 1002
},
{
"epoch": 0.6847584912101041,
"grad_norm": 0.7167555689811707,
"learning_rate": 1.3652894363572102e-06,
"loss": 0.6175,
"mean_token_accuracy": 0.8073680400848389,
"num_tokens": 65617505.0,
"step": 1003
},
{
"epoch": 0.6854412015702338,
"grad_norm": 0.7112638354301453,
"learning_rate": 1.3599787942125092e-06,
"loss": 0.5994,
"mean_token_accuracy": 0.8103158622980118,
"num_tokens": 65683041.0,
"step": 1004
},
{
"epoch": 0.6861239119303636,
"grad_norm": 0.7199926972389221,
"learning_rate": 1.354674639023318e-06,
"loss": 0.6189,
"mean_token_accuracy": 0.8093230724334717,
"num_tokens": 65748577.0,
"step": 1005
},
{
"epoch": 0.6868066222904933,
"grad_norm": 0.7374444007873535,
"learning_rate": 1.3493770009713708e-06,
"loss": 0.5905,
"mean_token_accuracy": 0.8165017068386078,
"num_tokens": 65814113.0,
"step": 1006
},
{
"epoch": 0.687489332650623,
"grad_norm": 0.7460854053497314,
"learning_rate": 1.3440859102013227e-06,
"loss": 0.6313,
"mean_token_accuracy": 0.8055046498775482,
"num_tokens": 65879649.0,
"step": 1007
},
{
"epoch": 0.6881720430107527,
"grad_norm": 0.746315598487854,
"learning_rate": 1.338801396820566e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.8058559447526932,
"num_tokens": 65945185.0,
"step": 1008
},
{
"epoch": 0.6888547533708824,
"grad_norm": 0.6928518414497375,
"learning_rate": 1.3335234908990735e-06,
"loss": 0.5857,
"mean_token_accuracy": 0.8161656856536865,
"num_tokens": 66010721.0,
"step": 1009
},
{
"epoch": 0.6895374637310121,
"grad_norm": 0.7147393822669983,
"learning_rate": 1.3282522224692162e-06,
"loss": 0.6104,
"mean_token_accuracy": 0.8116415292024612,
"num_tokens": 66076050.0,
"step": 1010
},
{
"epoch": 0.6902201740911418,
"grad_norm": 0.697171688079834,
"learning_rate": 1.3229876215255977e-06,
"loss": 0.5854,
"mean_token_accuracy": 0.8165714740753174,
"num_tokens": 66141217.0,
"step": 1011
},
{
"epoch": 0.6909028844512716,
"grad_norm": 0.7116130590438843,
"learning_rate": 1.3177297180248804e-06,
"loss": 0.6027,
"mean_token_accuracy": 0.8118819445371628,
"num_tokens": 66206629.0,
"step": 1012
},
{
"epoch": 0.6915855948114012,
"grad_norm": 0.7741715908050537,
"learning_rate": 1.3124785418856216e-06,
"loss": 0.6525,
"mean_token_accuracy": 0.7982190996408463,
"num_tokens": 66272165.0,
"step": 1013
},
{
"epoch": 0.6922683051715309,
"grad_norm": 0.7506465911865234,
"learning_rate": 1.3072341229880909e-06,
"loss": 0.6395,
"mean_token_accuracy": 0.8002963215112686,
"num_tokens": 66337701.0,
"step": 1014
},
{
"epoch": 0.6929510155316607,
"grad_norm": 0.7145585417747498,
"learning_rate": 1.3019964911741157e-06,
"loss": 0.5892,
"mean_token_accuracy": 0.8156403005123138,
"num_tokens": 66402662.0,
"step": 1015
},
{
"epoch": 0.6936337258917904,
"grad_norm": 0.6993339657783508,
"learning_rate": 1.296765676246899e-06,
"loss": 0.6007,
"mean_token_accuracy": 0.8119682371616364,
"num_tokens": 66467955.0,
"step": 1016
},
{
"epoch": 0.6943164362519201,
"grad_norm": 0.7100650668144226,
"learning_rate": 1.291541707970855e-06,
"loss": 0.6129,
"mean_token_accuracy": 0.8092161566019058,
"num_tokens": 66533491.0,
"step": 1017
},
{
"epoch": 0.6949991466120499,
"grad_norm": 0.7469538450241089,
"learning_rate": 1.2863246160714394e-06,
"loss": 0.6017,
"mean_token_accuracy": 0.8118737787008286,
"num_tokens": 66599027.0,
"step": 1018
},
{
"epoch": 0.6956818569721795,
"grad_norm": 0.706939160823822,
"learning_rate": 1.28111443023498e-06,
"loss": 0.5585,
"mean_token_accuracy": 0.8248105943202972,
"num_tokens": 66664563.0,
"step": 1019
},
{
"epoch": 0.6963645673323092,
"grad_norm": 0.7209792137145996,
"learning_rate": 1.2759111801085067e-06,
"loss": 0.6063,
"mean_token_accuracy": 0.8109879046678543,
"num_tokens": 66730099.0,
"step": 1020
},
{
"epoch": 0.697047277692439,
"grad_norm": 0.7196431756019592,
"learning_rate": 1.270714895299586e-06,
"loss": 0.5876,
"mean_token_accuracy": 0.8137677162885666,
"num_tokens": 66795635.0,
"step": 1021
},
{
"epoch": 0.6977299880525687,
"grad_norm": 0.7060737013816833,
"learning_rate": 1.2655256053761483e-06,
"loss": 0.5657,
"mean_token_accuracy": 0.8216947615146637,
"num_tokens": 66861171.0,
"step": 1022
},
{
"epoch": 0.6984126984126984,
"grad_norm": 0.7312152981758118,
"learning_rate": 1.2603433398663222e-06,
"loss": 0.6333,
"mean_token_accuracy": 0.8031066805124283,
"num_tokens": 66926707.0,
"step": 1023
},
{
"epoch": 0.6990954087728282,
"grad_norm": 0.7349741458892822,
"learning_rate": 1.2551681282582663e-06,
"loss": 0.6243,
"mean_token_accuracy": 0.8045498430728912,
"num_tokens": 66991463.0,
"step": 1024
},
{
"epoch": 0.6997781191329578,
"grad_norm": 0.7243534326553345,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.6324,
"mean_token_accuracy": 0.8045996129512787,
"num_tokens": 67056104.0,
"step": 1025
},
{
"epoch": 0.7004608294930875,
"grad_norm": 0.6944549679756165,
"learning_rate": 1.2448389844992392e-06,
"loss": 0.5886,
"mean_token_accuracy": 0.8170895129442215,
"num_tokens": 67121350.0,
"step": 1026
},
{
"epoch": 0.7011435398532173,
"grad_norm": 0.7031959295272827,
"learning_rate": 1.239685111123223e-06,
"loss": 0.5969,
"mean_token_accuracy": 0.8143786638975143,
"num_tokens": 67186886.0,
"step": 1027
},
{
"epoch": 0.701826250213347,
"grad_norm": 0.7678669691085815,
"learning_rate": 1.234538409198555e-06,
"loss": 0.6496,
"mean_token_accuracy": 0.7978525310754776,
"num_tokens": 67252422.0,
"step": 1028
},
{
"epoch": 0.7025089605734767,
"grad_norm": 0.7110222578048706,
"learning_rate": 1.2293989080110283e-06,
"loss": 0.5936,
"mean_token_accuracy": 0.815337523818016,
"num_tokens": 67317916.0,
"step": 1029
},
{
"epoch": 0.7031916709336065,
"grad_norm": 0.7030754089355469,
"learning_rate": 1.2242666368054635e-06,
"loss": 0.5808,
"mean_token_accuracy": 0.8198858350515366,
"num_tokens": 67383424.0,
"step": 1030
},
{
"epoch": 0.7038743812937361,
"grad_norm": 0.748938798904419,
"learning_rate": 1.2191416247855408e-06,
"loss": 0.6279,
"mean_token_accuracy": 0.8053995966911316,
"num_tokens": 67448892.0,
"step": 1031
},
{
"epoch": 0.7045570916538658,
"grad_norm": 0.7475568652153015,
"learning_rate": 1.214023901113635e-06,
"loss": 0.6515,
"mean_token_accuracy": 0.7992729842662811,
"num_tokens": 67514428.0,
"step": 1032
},
{
"epoch": 0.7052398020139956,
"grad_norm": 0.714144229888916,
"learning_rate": 1.2089134949106462e-06,
"loss": 0.6015,
"mean_token_accuracy": 0.8113239258527756,
"num_tokens": 67579964.0,
"step": 1033
},
{
"epoch": 0.7059225123741253,
"grad_norm": 0.7368439435958862,
"learning_rate": 1.203810435255842e-06,
"loss": 0.6169,
"mean_token_accuracy": 0.8093841671943665,
"num_tokens": 67645500.0,
"step": 1034
},
{
"epoch": 0.706605222734255,
"grad_norm": 0.7233380079269409,
"learning_rate": 1.198714751186679e-06,
"loss": 0.587,
"mean_token_accuracy": 0.8154478222131729,
"num_tokens": 67711036.0,
"step": 1035
},
{
"epoch": 0.7072879330943848,
"grad_norm": 0.7264472842216492,
"learning_rate": 1.1936264716986523e-06,
"loss": 0.623,
"mean_token_accuracy": 0.8043133020401001,
"num_tokens": 67776572.0,
"step": 1036
},
{
"epoch": 0.7079706434545144,
"grad_norm": 0.7293205261230469,
"learning_rate": 1.1885456257451194e-06,
"loss": 0.6148,
"mean_token_accuracy": 0.8091706782579422,
"num_tokens": 67842091.0,
"step": 1037
},
{
"epoch": 0.7086533538146441,
"grad_norm": 0.7417516708374023,
"learning_rate": 1.1834722422371405e-06,
"loss": 0.5954,
"mean_token_accuracy": 0.8143175691366196,
"num_tokens": 67907627.0,
"step": 1038
},
{
"epoch": 0.7093360641747739,
"grad_norm": 0.7399863004684448,
"learning_rate": 1.1784063500433117e-06,
"loss": 0.625,
"mean_token_accuracy": 0.8072239905595779,
"num_tokens": 67971992.0,
"step": 1039
},
{
"epoch": 0.7100187745349036,
"grad_norm": 0.736422598361969,
"learning_rate": 1.1733479779896065e-06,
"loss": 0.6255,
"mean_token_accuracy": 0.803932249546051,
"num_tokens": 68037343.0,
"step": 1040
},
{
"epoch": 0.7107014848950333,
"grad_norm": 0.7014506459236145,
"learning_rate": 1.1682971548591995e-06,
"loss": 0.6101,
"mean_token_accuracy": 0.8105144202709198,
"num_tokens": 68102879.0,
"step": 1041
},
{
"epoch": 0.7113841952551629,
"grad_norm": 0.7273626923561096,
"learning_rate": 1.163253909392318e-06,
"loss": 0.5935,
"mean_token_accuracy": 0.815775141119957,
"num_tokens": 68167984.0,
"step": 1042
},
{
"epoch": 0.7120669056152927,
"grad_norm": 0.7792891263961792,
"learning_rate": 1.1582182702860667e-06,
"loss": 0.6134,
"mean_token_accuracy": 0.808910682797432,
"num_tokens": 68233520.0,
"step": 1043
},
{
"epoch": 0.7127496159754224,
"grad_norm": 0.7218680381774902,
"learning_rate": 1.1531902661942692e-06,
"loss": 0.5552,
"mean_token_accuracy": 0.8247953355312347,
"num_tokens": 68299056.0,
"step": 1044
},
{
"epoch": 0.7134323263355521,
"grad_norm": 0.763369619846344,
"learning_rate": 1.1481699257273041e-06,
"loss": 0.6369,
"mean_token_accuracy": 0.8021597117185593,
"num_tokens": 68364592.0,
"step": 1045
},
{
"epoch": 0.7141150366956819,
"grad_norm": 0.712095320224762,
"learning_rate": 1.1431572774519457e-06,
"loss": 0.5888,
"mean_token_accuracy": 0.8145788908004761,
"num_tokens": 68429133.0,
"step": 1046
},
{
"epoch": 0.7147977470558116,
"grad_norm": 0.6819875240325928,
"learning_rate": 1.1381523498911907e-06,
"loss": 0.5616,
"mean_token_accuracy": 0.8223264962434769,
"num_tokens": 68494642.0,
"step": 1047
},
{
"epoch": 0.7154804574159412,
"grad_norm": 0.720863938331604,
"learning_rate": 1.1331551715241115e-06,
"loss": 0.6048,
"mean_token_accuracy": 0.8117210417985916,
"num_tokens": 68560178.0,
"step": 1048
},
{
"epoch": 0.716163167776071,
"grad_norm": 0.7568435072898865,
"learning_rate": 1.1281657707856817e-06,
"loss": 0.6984,
"mean_token_accuracy": 0.7855392396450043,
"num_tokens": 68625232.0,
"step": 1049
},
{
"epoch": 0.7168458781362007,
"grad_norm": 0.7198119163513184,
"learning_rate": 1.1231841760666188e-06,
"loss": 0.5967,
"mean_token_accuracy": 0.8142653554677963,
"num_tokens": 68690063.0,
"step": 1050
},
{
"epoch": 0.7175285884963304,
"grad_norm": 0.7616945505142212,
"learning_rate": 1.118210415713222e-06,
"loss": 0.6256,
"mean_token_accuracy": 0.8061919659376144,
"num_tokens": 68755599.0,
"step": 1051
},
{
"epoch": 0.7182112988564602,
"grad_norm": 0.7210955023765564,
"learning_rate": 1.1132445180272147e-06,
"loss": 0.5825,
"mean_token_accuracy": 0.8181665390729904,
"num_tokens": 68821135.0,
"step": 1052
},
{
"epoch": 0.7188940092165899,
"grad_norm": 0.7416355013847351,
"learning_rate": 1.1082865112655767e-06,
"loss": 0.5861,
"mean_token_accuracy": 0.8164656460285187,
"num_tokens": 68885990.0,
"step": 1053
},
{
"epoch": 0.7195767195767195,
"grad_norm": 0.6842755675315857,
"learning_rate": 1.1033364236403874e-06,
"loss": 0.5815,
"mean_token_accuracy": 0.8173264861106873,
"num_tokens": 68951526.0,
"step": 1054
},
{
"epoch": 0.7202594299368493,
"grad_norm": 0.712204098701477,
"learning_rate": 1.0983942833186644e-06,
"loss": 0.6026,
"mean_token_accuracy": 0.8113086521625519,
"num_tokens": 69017062.0,
"step": 1055
},
{
"epoch": 0.720942140296979,
"grad_norm": 0.7125012278556824,
"learning_rate": 1.0934601184222073e-06,
"loss": 0.6239,
"mean_token_accuracy": 0.8067265450954437,
"num_tokens": 69082598.0,
"step": 1056
},
{
"epoch": 0.7216248506571087,
"grad_norm": 0.6971497535705566,
"learning_rate": 1.0885339570274268e-06,
"loss": 0.6034,
"mean_token_accuracy": 0.8114766627550125,
"num_tokens": 69148134.0,
"step": 1057
},
{
"epoch": 0.7223075610172385,
"grad_norm": 0.7023418545722961,
"learning_rate": 1.083615827165199e-06,
"loss": 0.5973,
"mean_token_accuracy": 0.811950147151947,
"num_tokens": 69213670.0,
"step": 1058
},
{
"epoch": 0.7229902713773682,
"grad_norm": 0.7571163177490234,
"learning_rate": 1.0787057568206945e-06,
"loss": 0.636,
"mean_token_accuracy": 0.8033449351787567,
"num_tokens": 69279108.0,
"step": 1059
},
{
"epoch": 0.7236729817374978,
"grad_norm": 0.7366777658462524,
"learning_rate": 1.073803773933226e-06,
"loss": 0.5972,
"mean_token_accuracy": 0.8136913478374481,
"num_tokens": 69344644.0,
"step": 1060
},
{
"epoch": 0.7243556920976276,
"grad_norm": 0.727080762386322,
"learning_rate": 1.0689099063960844e-06,
"loss": 0.6161,
"mean_token_accuracy": 0.8062903136014938,
"num_tokens": 69409956.0,
"step": 1061
},
{
"epoch": 0.7250384024577573,
"grad_norm": 0.7016240954399109,
"learning_rate": 1.0640241820563871e-06,
"loss": 0.5584,
"mean_token_accuracy": 0.8244874775409698,
"num_tokens": 69475444.0,
"step": 1062
},
{
"epoch": 0.725721112817887,
"grad_norm": 0.7436378598213196,
"learning_rate": 1.0591466287149082e-06,
"loss": 0.6115,
"mean_token_accuracy": 0.8092294335365295,
"num_tokens": 69540509.0,
"step": 1063
},
{
"epoch": 0.7264038231780168,
"grad_norm": 0.7381221652030945,
"learning_rate": 1.054277274125934e-06,
"loss": 0.5855,
"mean_token_accuracy": 0.8153375536203384,
"num_tokens": 69605192.0,
"step": 1064
},
{
"epoch": 0.7270865335381465,
"grad_norm": 0.7373767495155334,
"learning_rate": 1.0494161459970942e-06,
"loss": 0.6438,
"mean_token_accuracy": 0.8019611537456512,
"num_tokens": 69670728.0,
"step": 1065
},
{
"epoch": 0.7277692438982761,
"grad_norm": 0.6937271356582642,
"learning_rate": 1.0445632719892093e-06,
"loss": 0.5947,
"mean_token_accuracy": 0.8145161271095276,
"num_tokens": 69736264.0,
"step": 1066
},
{
"epoch": 0.7284519542584059,
"grad_norm": 0.7547881603240967,
"learning_rate": 1.039718679716132e-06,
"loss": 0.6021,
"mean_token_accuracy": 0.8118002116680145,
"num_tokens": 69801715.0,
"step": 1067
},
{
"epoch": 0.7291346646185356,
"grad_norm": 0.743807852268219,
"learning_rate": 1.0348823967445904e-06,
"loss": 0.6157,
"mean_token_accuracy": 0.8069556504487991,
"num_tokens": 69867251.0,
"step": 1068
},
{
"epoch": 0.7298173749786653,
"grad_norm": 0.734443724155426,
"learning_rate": 1.03005445059403e-06,
"loss": 0.5992,
"mean_token_accuracy": 0.8127291053533554,
"num_tokens": 69932787.0,
"step": 1069
},
{
"epoch": 0.7305000853387951,
"grad_norm": 0.7220298051834106,
"learning_rate": 1.0252348687364608e-06,
"loss": 0.6139,
"mean_token_accuracy": 0.8077517449855804,
"num_tokens": 69998191.0,
"step": 1070
},
{
"epoch": 0.7311827956989247,
"grad_norm": 0.6889765858650208,
"learning_rate": 1.0204236785962954e-06,
"loss": 0.5883,
"mean_token_accuracy": 0.8164100646972656,
"num_tokens": 70063727.0,
"step": 1071
},
{
"epoch": 0.7318655060590544,
"grad_norm": 0.7063385248184204,
"learning_rate": 1.0156209075501972e-06,
"loss": 0.608,
"mean_token_accuracy": 0.8102242201566696,
"num_tokens": 70129263.0,
"step": 1072
},
{
"epoch": 0.7325482164191841,
"grad_norm": 0.7358285188674927,
"learning_rate": 1.0108265829269223e-06,
"loss": 0.6236,
"mean_token_accuracy": 0.807369664311409,
"num_tokens": 70193946.0,
"step": 1073
},
{
"epoch": 0.7332309267793139,
"grad_norm": 0.7315536141395569,
"learning_rate": 1.0060407320071658e-06,
"loss": 0.6119,
"mean_token_accuracy": 0.8106366097927094,
"num_tokens": 70259482.0,
"step": 1074
},
{
"epoch": 0.7339136371394436,
"grad_norm": 0.7272475361824036,
"learning_rate": 1.0012633820234052e-06,
"loss": 0.6137,
"mean_token_accuracy": 0.8086815774440765,
"num_tokens": 70325018.0,
"step": 1075
},
{
"epoch": 0.7345963474995733,
"grad_norm": 0.6909750699996948,
"learning_rate": 9.964945601597454e-07,
"loss": 0.5922,
"mean_token_accuracy": 0.8150201588869095,
"num_tokens": 70390554.0,
"step": 1076
},
{
"epoch": 0.735279057859703,
"grad_norm": 0.6908272504806519,
"learning_rate": 9.917342935517665e-07,
"loss": 0.5954,
"mean_token_accuracy": 0.8137371689081192,
"num_tokens": 70456090.0,
"step": 1077
},
{
"epoch": 0.7359617682198327,
"grad_norm": 0.7175625562667847,
"learning_rate": 9.869826092863661e-07,
"loss": 0.6074,
"mean_token_accuracy": 0.8106231540441513,
"num_tokens": 70521316.0,
"step": 1078
},
{
"epoch": 0.7366444785799624,
"grad_norm": 0.7069457769393921,
"learning_rate": 9.822395344016054e-07,
"loss": 0.6087,
"mean_token_accuracy": 0.8105449676513672,
"num_tokens": 70586852.0,
"step": 1079
},
{
"epoch": 0.7373271889400922,
"grad_norm": 0.7554196715354919,
"learning_rate": 9.775050958865584e-07,
"loss": 0.5844,
"mean_token_accuracy": 0.8151825070381165,
"num_tokens": 70652261.0,
"step": 1080
},
{
"epoch": 0.7380098993002219,
"grad_norm": 0.749987006187439,
"learning_rate": 9.727793206811553e-07,
"loss": 0.639,
"mean_token_accuracy": 0.8026099056005478,
"num_tokens": 70717628.0,
"step": 1081
},
{
"epoch": 0.7386926096603516,
"grad_norm": 0.7476643323898315,
"learning_rate": 9.680622356760297e-07,
"loss": 0.6094,
"mean_token_accuracy": 0.8092154115438461,
"num_tokens": 70782933.0,
"step": 1082
},
{
"epoch": 0.7393753200204813,
"grad_norm": 0.707511305809021,
"learning_rate": 9.633538677123697e-07,
"loss": 0.6022,
"mean_token_accuracy": 0.812897115945816,
"num_tokens": 70848469.0,
"step": 1083
},
{
"epoch": 0.740058030380611,
"grad_norm": 0.722371518611908,
"learning_rate": 9.586542435817553e-07,
"loss": 0.6375,
"mean_token_accuracy": 0.8035801649093628,
"num_tokens": 70914005.0,
"step": 1084
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.7201296091079712,
"learning_rate": 9.539633900260186e-07,
"loss": 0.6109,
"mean_token_accuracy": 0.8105449676513672,
"num_tokens": 70979541.0,
"step": 1085
},
{
"epoch": 0.7414234511008705,
"grad_norm": 0.7072751522064209,
"learning_rate": 9.49281333737082e-07,
"loss": 0.6166,
"mean_token_accuracy": 0.8087597638368607,
"num_tokens": 71045050.0,
"step": 1086
},
{
"epoch": 0.7421061614610002,
"grad_norm": 0.7147124409675598,
"learning_rate": 9.446081013568101e-07,
"loss": 0.5877,
"mean_token_accuracy": 0.8150329142808914,
"num_tokens": 71110580.0,
"step": 1087
},
{
"epoch": 0.7427888718211298,
"grad_norm": 0.7225379347801208,
"learning_rate": 9.399437194768571e-07,
"loss": 0.5851,
"mean_token_accuracy": 0.8183732032775879,
"num_tokens": 71176002.0,
"step": 1088
},
{
"epoch": 0.7434715821812596,
"grad_norm": 0.7688735723495483,
"learning_rate": 9.352882146385193e-07,
"loss": 0.6727,
"mean_token_accuracy": 0.7923766225576401,
"num_tokens": 71241504.0,
"step": 1089
},
{
"epoch": 0.7441542925413893,
"grad_norm": 0.7088306546211243,
"learning_rate": 9.306416133325747e-07,
"loss": 0.5761,
"mean_token_accuracy": 0.8190524131059647,
"num_tokens": 71307040.0,
"step": 1090
},
{
"epoch": 0.744837002901519,
"grad_norm": 0.7349225878715515,
"learning_rate": 9.260039419991448e-07,
"loss": 0.6314,
"mean_token_accuracy": 0.8064516186714172,
"num_tokens": 71372576.0,
"step": 1091
},
{
"epoch": 0.7455197132616488,
"grad_norm": 0.7324482202529907,
"learning_rate": 9.213752270275339e-07,
"loss": 0.6315,
"mean_token_accuracy": 0.8049242496490479,
"num_tokens": 71438112.0,
"step": 1092
},
{
"epoch": 0.7462024236217785,
"grad_norm": 0.7754062414169312,
"learning_rate": 9.167554947560836e-07,
"loss": 0.587,
"mean_token_accuracy": 0.8168682754039764,
"num_tokens": 71503648.0,
"step": 1093
},
{
"epoch": 0.7468851339819081,
"grad_norm": 0.7258090376853943,
"learning_rate": 9.121447714720214e-07,
"loss": 0.5716,
"mean_token_accuracy": 0.8204730749130249,
"num_tokens": 71569058.0,
"step": 1094
},
{
"epoch": 0.7475678443420379,
"grad_norm": 0.7216829657554626,
"learning_rate": 9.075430834113153e-07,
"loss": 0.6208,
"mean_token_accuracy": 0.8046814501285553,
"num_tokens": 71634344.0,
"step": 1095
},
{
"epoch": 0.7482505547021676,
"grad_norm": 0.7033687829971313,
"learning_rate": 9.029504567585149e-07,
"loss": 0.5581,
"mean_token_accuracy": 0.8244858235120773,
"num_tokens": 71699830.0,
"step": 1096
},
{
"epoch": 0.7489332650622973,
"grad_norm": 0.7144305109977722,
"learning_rate": 8.983669176466143e-07,
"loss": 0.5762,
"mean_token_accuracy": 0.818400427699089,
"num_tokens": 71764790.0,
"step": 1097
},
{
"epoch": 0.7496159754224271,
"grad_norm": 0.7208042740821838,
"learning_rate": 8.937924921568946e-07,
"loss": 0.5997,
"mean_token_accuracy": 0.8123532086610794,
"num_tokens": 71829855.0,
"step": 1098
},
{
"epoch": 0.7502986857825568,
"grad_norm": 0.7652632594108582,
"learning_rate": 8.892272063187793e-07,
"loss": 0.634,
"mean_token_accuracy": 0.8046972900629044,
"num_tokens": 71894984.0,
"step": 1099
},
{
"epoch": 0.7509813961426864,
"grad_norm": 0.7493232488632202,
"learning_rate": 8.846710861096841e-07,
"loss": 0.6421,
"mean_token_accuracy": 0.8010455518960953,
"num_tokens": 71960164.0,
"step": 1100
},
{
"epoch": 0.7516641065028162,
"grad_norm": 0.7200855016708374,
"learning_rate": 8.801241574548735e-07,
"loss": 0.5745,
"mean_token_accuracy": 0.8203964978456497,
"num_tokens": 72025700.0,
"step": 1101
},
{
"epoch": 0.7523468168629459,
"grad_norm": 0.7226145267486572,
"learning_rate": 8.755864462273072e-07,
"loss": 0.5948,
"mean_token_accuracy": 0.8125305473804474,
"num_tokens": 72091236.0,
"step": 1102
},
{
"epoch": 0.7530295272230756,
"grad_norm": 0.7232508659362793,
"learning_rate": 8.710579782474974e-07,
"loss": 0.5999,
"mean_token_accuracy": 0.813049852848053,
"num_tokens": 72156772.0,
"step": 1103
},
{
"epoch": 0.7537122375832054,
"grad_norm": 0.6991187930107117,
"learning_rate": 8.665387792833582e-07,
"loss": 0.5508,
"mean_token_accuracy": 0.8265517950057983,
"num_tokens": 72222308.0,
"step": 1104
},
{
"epoch": 0.7543949479433351,
"grad_norm": 0.7070404887199402,
"learning_rate": 8.620288750500658e-07,
"loss": 0.6137,
"mean_token_accuracy": 0.8111559152603149,
"num_tokens": 72287844.0,
"step": 1105
},
{
"epoch": 0.7550776583034647,
"grad_norm": 0.7090805768966675,
"learning_rate": 8.575282912099003e-07,
"loss": 0.6246,
"mean_token_accuracy": 0.8064394891262054,
"num_tokens": 72353148.0,
"step": 1106
},
{
"epoch": 0.7557603686635944,
"grad_norm": 0.7003458142280579,
"learning_rate": 8.530370533721144e-07,
"loss": 0.597,
"mean_token_accuracy": 0.8132027834653854,
"num_tokens": 72418651.0,
"step": 1107
},
{
"epoch": 0.7564430790237242,
"grad_norm": 0.7059638500213623,
"learning_rate": 8.485551870927747e-07,
"loss": 0.602,
"mean_token_accuracy": 0.8118747770786285,
"num_tokens": 72483869.0,
"step": 1108
},
{
"epoch": 0.7571257893838539,
"grad_norm": 0.7228066921234131,
"learning_rate": 8.440827178746244e-07,
"loss": 0.5637,
"mean_token_accuracy": 0.8214890360832214,
"num_tokens": 72549379.0,
"step": 1109
},
{
"epoch": 0.7578084997439836,
"grad_norm": 0.6997213959693909,
"learning_rate": 8.396196711669335e-07,
"loss": 0.5883,
"mean_token_accuracy": 0.8160992115736008,
"num_tokens": 72614862.0,
"step": 1110
},
{
"epoch": 0.7584912101041134,
"grad_norm": 0.7410682439804077,
"learning_rate": 8.351660723653599e-07,
"loss": 0.6115,
"mean_token_accuracy": 0.8090939670801163,
"num_tokens": 72680398.0,
"step": 1111
},
{
"epoch": 0.759173920464243,
"grad_norm": 0.7432024478912354,
"learning_rate": 8.307219468117947e-07,
"loss": 0.6055,
"mean_token_accuracy": 0.8117057681083679,
"num_tokens": 72745934.0,
"step": 1112
},
{
"epoch": 0.7598566308243727,
"grad_norm": 0.7230374217033386,
"learning_rate": 8.262873197942303e-07,
"loss": 0.571,
"mean_token_accuracy": 0.820182666182518,
"num_tokens": 72811470.0,
"step": 1113
},
{
"epoch": 0.7605393411845025,
"grad_norm": 0.7165111899375916,
"learning_rate": 8.21862216546607e-07,
"loss": 0.5683,
"mean_token_accuracy": 0.8194189816713333,
"num_tokens": 72877006.0,
"step": 1114
},
{
"epoch": 0.7612220515446322,
"grad_norm": 0.7650198340415955,
"learning_rate": 8.174466622486743e-07,
"loss": 0.6179,
"mean_token_accuracy": 0.8073527663946152,
"num_tokens": 72942542.0,
"step": 1115
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.7104616165161133,
"learning_rate": 8.130406820258455e-07,
"loss": 0.6032,
"mean_token_accuracy": 0.8136285096406937,
"num_tokens": 73007960.0,
"step": 1116
},
{
"epoch": 0.7625874722648917,
"grad_norm": 0.7432583570480347,
"learning_rate": 8.086443009490558e-07,
"loss": 0.6147,
"mean_token_accuracy": 0.8103925734758377,
"num_tokens": 73073463.0,
"step": 1117
},
{
"epoch": 0.7632701826250213,
"grad_norm": 0.7258316874504089,
"learning_rate": 8.042575440346185e-07,
"loss": 0.6106,
"mean_token_accuracy": 0.806356206536293,
"num_tokens": 73138807.0,
"step": 1118
},
{
"epoch": 0.763952892985151,
"grad_norm": 0.7336967587471008,
"learning_rate": 7.998804362440854e-07,
"loss": 0.628,
"mean_token_accuracy": 0.8075513243675232,
"num_tokens": 73204343.0,
"step": 1119
},
{
"epoch": 0.7646356033452808,
"grad_norm": 0.7090906500816345,
"learning_rate": 7.955130024841009e-07,
"loss": 0.5771,
"mean_token_accuracy": 0.8172195702791214,
"num_tokens": 73269879.0,
"step": 1120
},
{
"epoch": 0.7653183137054105,
"grad_norm": 0.6913905739784241,
"learning_rate": 7.911552676062629e-07,
"loss": 0.5969,
"mean_token_accuracy": 0.8146230429410934,
"num_tokens": 73335415.0,
"step": 1121
},
{
"epoch": 0.7660010240655402,
"grad_norm": 0.7096146941184998,
"learning_rate": 7.868072564069807e-07,
"loss": 0.5892,
"mean_token_accuracy": 0.8174639493227005,
"num_tokens": 73400951.0,
"step": 1122
},
{
"epoch": 0.76668373442567,
"grad_norm": 0.7153047323226929,
"learning_rate": 7.82468993627333e-07,
"loss": 0.6304,
"mean_token_accuracy": 0.8055810183286667,
"num_tokens": 73466487.0,
"step": 1123
},
{
"epoch": 0.7673664447857996,
"grad_norm": 0.7417140603065491,
"learning_rate": 7.781405039529296e-07,
"loss": 0.5973,
"mean_token_accuracy": 0.8134699463844299,
"num_tokens": 73531758.0,
"step": 1124
},
{
"epoch": 0.7680491551459293,
"grad_norm": 0.7037280797958374,
"learning_rate": 7.738218120137672e-07,
"loss": 0.5728,
"mean_token_accuracy": 0.8211449086666107,
"num_tokens": 73597294.0,
"step": 1125
},
{
"epoch": 0.7687318655060591,
"grad_norm": 0.7011156678199768,
"learning_rate": 7.695129423840944e-07,
"loss": 0.6029,
"mean_token_accuracy": 0.8113086521625519,
"num_tokens": 73662830.0,
"step": 1126
},
{
"epoch": 0.7694145758661888,
"grad_norm": 0.7726342678070068,
"learning_rate": 7.652139195822658e-07,
"loss": 0.6001,
"mean_token_accuracy": 0.8128054738044739,
"num_tokens": 73728366.0,
"step": 1127
},
{
"epoch": 0.7700972862263185,
"grad_norm": 0.7018040418624878,
"learning_rate": 7.609247680706072e-07,
"loss": 0.5608,
"mean_token_accuracy": 0.8241385519504547,
"num_tokens": 73793902.0,
"step": 1128
},
{
"epoch": 0.7707799965864482,
"grad_norm": 0.7123659253120422,
"learning_rate": 7.566455122552744e-07,
"loss": 0.5959,
"mean_token_accuracy": 0.8140884637832642,
"num_tokens": 73859438.0,
"step": 1129
},
{
"epoch": 0.7714627069465779,
"grad_norm": 0.6594699025154114,
"learning_rate": 7.523761764861138e-07,
"loss": 0.5628,
"mean_token_accuracy": 0.8215267658233643,
"num_tokens": 73924974.0,
"step": 1130
},
{
"epoch": 0.7721454173067076,
"grad_norm": 0.7053015232086182,
"learning_rate": 7.481167850565255e-07,
"loss": 0.5569,
"mean_token_accuracy": 0.8225195407867432,
"num_tokens": 73990510.0,
"step": 1131
},
{
"epoch": 0.7728281276668374,
"grad_norm": 0.7094056606292725,
"learning_rate": 7.43867362203326e-07,
"loss": 0.5928,
"mean_token_accuracy": 0.8135844320058823,
"num_tokens": 74056046.0,
"step": 1132
},
{
"epoch": 0.7735108380269671,
"grad_norm": 0.7524219751358032,
"learning_rate": 7.396279321066035e-07,
"loss": 0.6431,
"mean_token_accuracy": 0.8012891113758087,
"num_tokens": 74121582.0,
"step": 1133
},
{
"epoch": 0.7741935483870968,
"grad_norm": 0.7120317816734314,
"learning_rate": 7.353985188895915e-07,
"loss": 0.6297,
"mean_token_accuracy": 0.8051542043685913,
"num_tokens": 74185990.0,
"step": 1134
},
{
"epoch": 0.7748762587472265,
"grad_norm": 0.7252651453018188,
"learning_rate": 7.311791466185214e-07,
"loss": 0.6045,
"mean_token_accuracy": 0.8110489994287491,
"num_tokens": 74251526.0,
"step": 1135
},
{
"epoch": 0.7755589691073562,
"grad_norm": 0.7242283225059509,
"learning_rate": 7.269698393024904e-07,
"loss": 0.6094,
"mean_token_accuracy": 0.8091860562562943,
"num_tokens": 74316251.0,
"step": 1136
},
{
"epoch": 0.7762416794674859,
"grad_norm": 0.7422311305999756,
"learning_rate": 7.227706208933239e-07,
"loss": 0.6407,
"mean_token_accuracy": 0.8013196587562561,
"num_tokens": 74381787.0,
"step": 1137
},
{
"epoch": 0.7769243898276156,
"grad_norm": 0.6992833018302917,
"learning_rate": 7.185815152854417e-07,
"loss": 0.5788,
"mean_token_accuracy": 0.8192576766014099,
"num_tokens": 74447315.0,
"step": 1138
},
{
"epoch": 0.7776071001877454,
"grad_norm": 0.7244083881378174,
"learning_rate": 7.144025463157147e-07,
"loss": 0.5659,
"mean_token_accuracy": 0.8207778781652451,
"num_tokens": 74512781.0,
"step": 1139
},
{
"epoch": 0.778289810547875,
"grad_norm": 0.7246714234352112,
"learning_rate": 7.102337377633395e-07,
"loss": 0.6041,
"mean_token_accuracy": 0.8115072101354599,
"num_tokens": 74578317.0,
"step": 1140
},
{
"epoch": 0.7789725209080047,
"grad_norm": 0.6905441284179688,
"learning_rate": 7.060751133496948e-07,
"loss": 0.561,
"mean_token_accuracy": 0.8240596801042557,
"num_tokens": 74643608.0,
"step": 1141
},
{
"epoch": 0.7796552312681345,
"grad_norm": 0.7125344276428223,
"learning_rate": 7.019266967382105e-07,
"loss": 0.6195,
"mean_token_accuracy": 0.8087732195854187,
"num_tokens": 74709144.0,
"step": 1142
},
{
"epoch": 0.7803379416282642,
"grad_norm": 0.7289828658103943,
"learning_rate": 6.977885115342306e-07,
"loss": 0.5832,
"mean_token_accuracy": 0.8175250440835953,
"num_tokens": 74774680.0,
"step": 1143
},
{
"epoch": 0.7810206519883939,
"grad_norm": 0.752156674861908,
"learning_rate": 6.936605812848837e-07,
"loss": 0.6223,
"mean_token_accuracy": 0.8056115657091141,
"num_tokens": 74840216.0,
"step": 1144
},
{
"epoch": 0.7817033623485237,
"grad_norm": 0.7085697650909424,
"learning_rate": 6.895429294789402e-07,
"loss": 0.6034,
"mean_token_accuracy": 0.8111406415700912,
"num_tokens": 74905752.0,
"step": 1145
},
{
"epoch": 0.7823860727086533,
"grad_norm": 0.7335535287857056,
"learning_rate": 6.854355795466897e-07,
"loss": 0.5903,
"mean_token_accuracy": 0.8162474036216736,
"num_tokens": 74971229.0,
"step": 1146
},
{
"epoch": 0.783068783068783,
"grad_norm": 0.7267248034477234,
"learning_rate": 6.813385548597976e-07,
"loss": 0.6087,
"mean_token_accuracy": 0.8100256621837616,
"num_tokens": 75036765.0,
"step": 1147
},
{
"epoch": 0.7837514934289128,
"grad_norm": 0.7313956618309021,
"learning_rate": 6.772518787311804e-07,
"loss": 0.6383,
"mean_token_accuracy": 0.8019611537456512,
"num_tokens": 75102301.0,
"step": 1148
},
{
"epoch": 0.7844342037890425,
"grad_norm": 0.7354364395141602,
"learning_rate": 6.731755744148652e-07,
"loss": 0.582,
"mean_token_accuracy": 0.8179615139961243,
"num_tokens": 75167672.0,
"step": 1149
},
{
"epoch": 0.7851169141491722,
"grad_norm": 0.7073311805725098,
"learning_rate": 6.691096651058643e-07,
"loss": 0.6127,
"mean_token_accuracy": 0.8103054463863373,
"num_tokens": 75233122.0,
"step": 1150
},
{
"epoch": 0.785799624509302,
"grad_norm": 0.748970091342926,
"learning_rate": 6.650541739400393e-07,
"loss": 0.6182,
"mean_token_accuracy": 0.8087337911128998,
"num_tokens": 75298544.0,
"step": 1151
},
{
"epoch": 0.7864823348694316,
"grad_norm": 0.7270997166633606,
"learning_rate": 6.610091239939704e-07,
"loss": 0.5972,
"mean_token_accuracy": 0.8138899058103561,
"num_tokens": 75364080.0,
"step": 1152
},
{
"epoch": 0.7871650452295613,
"grad_norm": 0.7352398633956909,
"learning_rate": 6.569745382848236e-07,
"loss": 0.6171,
"mean_token_accuracy": 0.8078258484601974,
"num_tokens": 75429577.0,
"step": 1153
},
{
"epoch": 0.7878477555896911,
"grad_norm": 0.7277079224586487,
"learning_rate": 6.529504397702255e-07,
"loss": 0.5929,
"mean_token_accuracy": 0.8138288110494614,
"num_tokens": 75495113.0,
"step": 1154
},
{
"epoch": 0.7885304659498208,
"grad_norm": 0.7266401648521423,
"learning_rate": 6.489368513481228e-07,
"loss": 0.618,
"mean_token_accuracy": 0.8080567568540573,
"num_tokens": 75560634.0,
"step": 1155
},
{
"epoch": 0.7892131763099505,
"grad_norm": 0.7263786196708679,
"learning_rate": 6.449337958566623e-07,
"loss": 0.6235,
"mean_token_accuracy": 0.8064727634191513,
"num_tokens": 75626081.0,
"step": 1156
},
{
"epoch": 0.7898958866700803,
"grad_norm": 0.7240126132965088,
"learning_rate": 6.40941296074054e-07,
"loss": 0.6134,
"mean_token_accuracy": 0.8087421804666519,
"num_tokens": 75691529.0,
"step": 1157
},
{
"epoch": 0.7905785970302099,
"grad_norm": 0.6938052773475647,
"learning_rate": 6.369593747184438e-07,
"loss": 0.5664,
"mean_token_accuracy": 0.8221682459115982,
"num_tokens": 75757065.0,
"step": 1158
},
{
"epoch": 0.7912613073903396,
"grad_norm": 0.7303853631019592,
"learning_rate": 6.32988054447784e-07,
"loss": 0.5906,
"mean_token_accuracy": 0.8149492591619492,
"num_tokens": 75822541.0,
"step": 1159
},
{
"epoch": 0.7919440177504694,
"grad_norm": 0.6966801285743713,
"learning_rate": 6.29027357859707e-07,
"loss": 0.5667,
"mean_token_accuracy": 0.8210838139057159,
"num_tokens": 75888077.0,
"step": 1160
},
{
"epoch": 0.7926267281105991,
"grad_norm": 0.6900127530097961,
"learning_rate": 6.250773074913897e-07,
"loss": 0.5997,
"mean_token_accuracy": 0.8122250735759735,
"num_tokens": 75953613.0,
"step": 1161
},
{
"epoch": 0.7933094384707288,
"grad_norm": 0.7056220173835754,
"learning_rate": 6.211379258194342e-07,
"loss": 0.6128,
"mean_token_accuracy": 0.8070778399705887,
"num_tokens": 76019149.0,
"step": 1162
},
{
"epoch": 0.7939921488308586,
"grad_norm": 0.7348355650901794,
"learning_rate": 6.172092352597334e-07,
"loss": 0.6386,
"mean_token_accuracy": 0.8025385290384293,
"num_tokens": 76084565.0,
"step": 1163
},
{
"epoch": 0.7946748591909882,
"grad_norm": 0.7304251790046692,
"learning_rate": 6.132912581673456e-07,
"loss": 0.6003,
"mean_token_accuracy": 0.8130903542041779,
"num_tokens": 76149571.0,
"step": 1164
},
{
"epoch": 0.7953575695511179,
"grad_norm": 0.7153202295303345,
"learning_rate": 6.093840168363679e-07,
"loss": 0.6151,
"mean_token_accuracy": 0.8080400824546814,
"num_tokens": 76215107.0,
"step": 1165
},
{
"epoch": 0.7960402799112477,
"grad_norm": 0.724839985370636,
"learning_rate": 6.054875334998084e-07,
"loss": 0.6474,
"mean_token_accuracy": 0.799018919467926,
"num_tokens": 76279104.0,
"step": 1166
},
{
"epoch": 0.7967229902713774,
"grad_norm": 0.716566801071167,
"learning_rate": 6.016018303294588e-07,
"loss": 0.591,
"mean_token_accuracy": 0.814546674489975,
"num_tokens": 76344640.0,
"step": 1167
},
{
"epoch": 0.7974057006315071,
"grad_norm": 0.759793758392334,
"learning_rate": 5.977269294357724e-07,
"loss": 0.6073,
"mean_token_accuracy": 0.8116446733474731,
"num_tokens": 76410176.0,
"step": 1168
},
{
"epoch": 0.7980884109916367,
"grad_norm": 0.7540044188499451,
"learning_rate": 5.938628528677326e-07,
"loss": 0.569,
"mean_token_accuracy": 0.821023091673851,
"num_tokens": 76475010.0,
"step": 1169
},
{
"epoch": 0.7987711213517665,
"grad_norm": 0.6950900554656982,
"learning_rate": 5.900096226127314e-07,
"loss": 0.597,
"mean_token_accuracy": 0.8137371689081192,
"num_tokens": 76540546.0,
"step": 1170
},
{
"epoch": 0.7994538317118962,
"grad_norm": 0.681171178817749,
"learning_rate": 5.861672605964422e-07,
"loss": 0.5647,
"mean_token_accuracy": 0.8229624778032303,
"num_tokens": 76606082.0,
"step": 1171
},
{
"epoch": 0.8001365420720259,
"grad_norm": 0.6856206655502319,
"learning_rate": 5.823357886826972e-07,
"loss": 0.5742,
"mean_token_accuracy": 0.819632813334465,
"num_tokens": 76671618.0,
"step": 1172
},
{
"epoch": 0.8008192524321557,
"grad_norm": 0.7125565409660339,
"learning_rate": 5.7851522867336e-07,
"loss": 0.6056,
"mean_token_accuracy": 0.8092008829116821,
"num_tokens": 76737154.0,
"step": 1173
},
{
"epoch": 0.8015019627922854,
"grad_norm": 0.7257224917411804,
"learning_rate": 5.747056023082042e-07,
"loss": 0.6366,
"mean_token_accuracy": 0.8041041642427444,
"num_tokens": 76802088.0,
"step": 1174
},
{
"epoch": 0.802184673152415,
"grad_norm": 0.6963748931884766,
"learning_rate": 5.709069312647894e-07,
"loss": 0.6104,
"mean_token_accuracy": 0.8100867569446564,
"num_tokens": 76867624.0,
"step": 1175
},
{
"epoch": 0.8028673835125448,
"grad_norm": 0.764582633972168,
"learning_rate": 5.671192371583361e-07,
"loss": 0.6299,
"mean_token_accuracy": 0.8033950179815292,
"num_tokens": 76932121.0,
"step": 1176
},
{
"epoch": 0.8035500938726745,
"grad_norm": 0.7009971141815186,
"learning_rate": 5.633425415416038e-07,
"loss": 0.6341,
"mean_token_accuracy": 0.8024957329034805,
"num_tokens": 76997657.0,
"step": 1177
},
{
"epoch": 0.8042328042328042,
"grad_norm": 0.7152801156044006,
"learning_rate": 5.595768659047688e-07,
"loss": 0.619,
"mean_token_accuracy": 0.8068641424179077,
"num_tokens": 77062948.0,
"step": 1178
},
{
"epoch": 0.804915514592934,
"grad_norm": 0.7051480412483215,
"learning_rate": 5.558222316753009e-07,
"loss": 0.6329,
"mean_token_accuracy": 0.8018084168434143,
"num_tokens": 77128484.0,
"step": 1179
},
{
"epoch": 0.8055982249530637,
"grad_norm": 0.7400657534599304,
"learning_rate": 5.520786602178418e-07,
"loss": 0.6173,
"mean_token_accuracy": 0.8071723580360413,
"num_tokens": 77193405.0,
"step": 1180
},
{
"epoch": 0.8062809353131933,
"grad_norm": 0.7040783762931824,
"learning_rate": 5.483461728340867e-07,
"loss": 0.5981,
"mean_token_accuracy": 0.8125204145908356,
"num_tokens": 77258112.0,
"step": 1181
},
{
"epoch": 0.8069636456733231,
"grad_norm": 0.6948963403701782,
"learning_rate": 5.446247907626543e-07,
"loss": 0.6115,
"mean_token_accuracy": 0.8102853149175644,
"num_tokens": 77323648.0,
"step": 1182
},
{
"epoch": 0.8076463560334528,
"grad_norm": 0.7158141732215881,
"learning_rate": 5.409145351789777e-07,
"loss": 0.6102,
"mean_token_accuracy": 0.8111711889505386,
"num_tokens": 77389184.0,
"step": 1183
},
{
"epoch": 0.8083290663935825,
"grad_norm": 0.7212420701980591,
"learning_rate": 5.372154271951746e-07,
"loss": 0.6006,
"mean_token_accuracy": 0.8151035904884338,
"num_tokens": 77454660.0,
"step": 1184
},
{
"epoch": 0.8090117767537123,
"grad_norm": 0.6939152479171753,
"learning_rate": 5.335274878599317e-07,
"loss": 0.5746,
"mean_token_accuracy": 0.8195730149745941,
"num_tokens": 77520141.0,
"step": 1185
},
{
"epoch": 0.809694487113842,
"grad_norm": 0.7088807225227356,
"learning_rate": 5.298507381583826e-07,
"loss": 0.6292,
"mean_token_accuracy": 0.805413007736206,
"num_tokens": 77585677.0,
"step": 1186
},
{
"epoch": 0.8103771974739716,
"grad_norm": 0.728660523891449,
"learning_rate": 5.261851990119926e-07,
"loss": 0.6153,
"mean_token_accuracy": 0.8086510300636292,
"num_tokens": 77651213.0,
"step": 1187
},
{
"epoch": 0.8110599078341014,
"grad_norm": 0.6933480501174927,
"learning_rate": 5.225308912784321e-07,
"loss": 0.5616,
"mean_token_accuracy": 0.8231625705957413,
"num_tokens": 77716478.0,
"step": 1188
},
{
"epoch": 0.8117426181942311,
"grad_norm": 0.7177363634109497,
"learning_rate": 5.188878357514668e-07,
"loss": 0.6168,
"mean_token_accuracy": 0.8091703355312347,
"num_tokens": 77782014.0,
"step": 1189
},
{
"epoch": 0.8124253285543608,
"grad_norm": 0.7240033149719238,
"learning_rate": 5.152560531608325e-07,
"loss": 0.608,
"mean_token_accuracy": 0.8108504414558411,
"num_tokens": 77847550.0,
"step": 1190
},
{
"epoch": 0.8131080389144906,
"grad_norm": 0.7297149896621704,
"learning_rate": 5.116355641721202e-07,
"loss": 0.5873,
"mean_token_accuracy": 0.8166238963603973,
"num_tokens": 77913086.0,
"step": 1191
},
{
"epoch": 0.8137907492746203,
"grad_norm": 0.7229415774345398,
"learning_rate": 5.080263893866572e-07,
"loss": 0.587,
"mean_token_accuracy": 0.8156934976577759,
"num_tokens": 77978469.0,
"step": 1192
},
{
"epoch": 0.8144734596347499,
"grad_norm": 0.6919569373130798,
"learning_rate": 5.044285493413942e-07,
"loss": 0.5878,
"mean_token_accuracy": 0.8175217360258102,
"num_tokens": 78043943.0,
"step": 1193
},
{
"epoch": 0.8151561699948797,
"grad_norm": 0.7613787055015564,
"learning_rate": 5.008420645087785e-07,
"loss": 0.6068,
"mean_token_accuracy": 0.8115917444229126,
"num_tokens": 78109340.0,
"step": 1194
},
{
"epoch": 0.8158388803550094,
"grad_norm": 0.7041038870811462,
"learning_rate": 4.972669552966508e-07,
"loss": 0.5735,
"mean_token_accuracy": 0.8211907297372818,
"num_tokens": 78174876.0,
"step": 1195
},
{
"epoch": 0.8165215907151391,
"grad_norm": 0.7106341123580933,
"learning_rate": 4.937032420481169e-07,
"loss": 0.5736,
"mean_token_accuracy": 0.8204770088195801,
"num_tokens": 78240208.0,
"step": 1196
},
{
"epoch": 0.8172043010752689,
"grad_norm": 0.7360470294952393,
"learning_rate": 4.901509450414418e-07,
"loss": 0.5837,
"mean_token_accuracy": 0.8159824013710022,
"num_tokens": 78305744.0,
"step": 1197
},
{
"epoch": 0.8178870114353985,
"grad_norm": 0.6911918520927429,
"learning_rate": 4.86610084489924e-07,
"loss": 0.6177,
"mean_token_accuracy": 0.8082218617200851,
"num_tokens": 78371143.0,
"step": 1198
},
{
"epoch": 0.8185697217955282,
"grad_norm": 0.7414296269416809,
"learning_rate": 4.83080680541792e-07,
"loss": 0.5975,
"mean_token_accuracy": 0.8125152736902237,
"num_tokens": 78436679.0,
"step": 1199
},
{
"epoch": 0.819252432155658,
"grad_norm": 0.7289243936538696,
"learning_rate": 4.795627532800806e-07,
"loss": 0.6296,
"mean_token_accuracy": 0.8031218498945236,
"num_tokens": 78502009.0,
"step": 1200
},
{
"epoch": 0.8199351425157877,
"grad_norm": 0.7345913052558899,
"learning_rate": 4.760563227225204e-07,
"loss": 0.6461,
"mean_token_accuracy": 0.7988996803760529,
"num_tokens": 78567526.0,
"step": 1201
},
{
"epoch": 0.8206178528759174,
"grad_norm": 0.7304643988609314,
"learning_rate": 4.725614088214231e-07,
"loss": 0.6372,
"mean_token_accuracy": 0.802532970905304,
"num_tokens": 78632602.0,
"step": 1202
},
{
"epoch": 0.821300563236047,
"grad_norm": 0.711274266242981,
"learning_rate": 4.690780314635704e-07,
"loss": 0.598,
"mean_token_accuracy": 0.8129887580871582,
"num_tokens": 78698138.0,
"step": 1203
},
{
"epoch": 0.8219832735961768,
"grad_norm": 0.7121052145957947,
"learning_rate": 4.656062104700951e-07,
"loss": 0.5784,
"mean_token_accuracy": 0.8166697174310684,
"num_tokens": 78763674.0,
"step": 1204
},
{
"epoch": 0.8226659839563065,
"grad_norm": 0.7592654228210449,
"learning_rate": 4.621459655963753e-07,
"loss": 0.6325,
"mean_token_accuracy": 0.8049637675285339,
"num_tokens": 78828816.0,
"step": 1205
},
{
"epoch": 0.8233486943164362,
"grad_norm": 0.7198703289031982,
"learning_rate": 4.586973165319164e-07,
"loss": 0.5935,
"mean_token_accuracy": 0.8148216009140015,
"num_tokens": 78894352.0,
"step": 1206
},
{
"epoch": 0.824031404676566,
"grad_norm": 0.7096201181411743,
"learning_rate": 4.5526028290024253e-07,
"loss": 0.5818,
"mean_token_accuracy": 0.8182827532291412,
"num_tokens": 78959669.0,
"step": 1207
},
{
"epoch": 0.8247141150366957,
"grad_norm": 0.7286295890808105,
"learning_rate": 4.518348842587822e-07,
"loss": 0.5924,
"mean_token_accuracy": 0.8151398599147797,
"num_tokens": 79024817.0,
"step": 1208
},
{
"epoch": 0.8253968253968254,
"grad_norm": 0.7021476030349731,
"learning_rate": 4.484211400987612e-07,
"loss": 0.5974,
"mean_token_accuracy": 0.812148705124855,
"num_tokens": 79090353.0,
"step": 1209
},
{
"epoch": 0.8260795357569551,
"grad_norm": 0.7140706181526184,
"learning_rate": 4.450190698450843e-07,
"loss": 0.567,
"mean_token_accuracy": 0.8210151493549347,
"num_tokens": 79155623.0,
"step": 1210
},
{
"epoch": 0.8267622461170848,
"grad_norm": 0.736964762210846,
"learning_rate": 4.416286928562344e-07,
"loss": 0.6204,
"mean_token_accuracy": 0.8086510300636292,
"num_tokens": 79221159.0,
"step": 1211
},
{
"epoch": 0.8274449564772145,
"grad_norm": 0.6984681487083435,
"learning_rate": 4.3825002842415423e-07,
"loss": 0.6231,
"mean_token_accuracy": 0.8068502992391586,
"num_tokens": 79286680.0,
"step": 1212
},
{
"epoch": 0.8281276668373443,
"grad_norm": 0.7289581298828125,
"learning_rate": 4.3488309577414014e-07,
"loss": 0.6099,
"mean_token_accuracy": 0.8104861825704575,
"num_tokens": 79352167.0,
"step": 1213
},
{
"epoch": 0.828810377197474,
"grad_norm": 0.7081075310707092,
"learning_rate": 4.31527914064733e-07,
"loss": 0.568,
"mean_token_accuracy": 0.8197198361158371,
"num_tokens": 79417627.0,
"step": 1214
},
{
"epoch": 0.8294930875576036,
"grad_norm": 0.6993555426597595,
"learning_rate": 4.2818450238760745e-07,
"loss": 0.5891,
"mean_token_accuracy": 0.8168835490942001,
"num_tokens": 79483163.0,
"step": 1215
},
{
"epoch": 0.8301757979177334,
"grad_norm": 0.725541889667511,
"learning_rate": 4.248528797674645e-07,
"loss": 0.5958,
"mean_token_accuracy": 0.8150507062673569,
"num_tokens": 79548699.0,
"step": 1216
},
{
"epoch": 0.8308585082778631,
"grad_norm": 0.7068691849708557,
"learning_rate": 4.21533065161924e-07,
"loss": 0.5988,
"mean_token_accuracy": 0.8137481957674026,
"num_tokens": 79613306.0,
"step": 1217
},
{
"epoch": 0.8315412186379928,
"grad_norm": 0.709589958190918,
"learning_rate": 4.182250774614144e-07,
"loss": 0.587,
"mean_token_accuracy": 0.815237820148468,
"num_tokens": 79678725.0,
"step": 1218
},
{
"epoch": 0.8322239289981226,
"grad_norm": 0.7335296273231506,
"learning_rate": 4.149289354890676e-07,
"loss": 0.6533,
"mean_token_accuracy": 0.7973637729883194,
"num_tokens": 79744261.0,
"step": 1219
},
{
"epoch": 0.8329066393582523,
"grad_norm": 0.713219165802002,
"learning_rate": 4.1164465800060977e-07,
"loss": 0.6041,
"mean_token_accuracy": 0.8116752207279205,
"num_tokens": 79809797.0,
"step": 1220
},
{
"epoch": 0.833589349718382,
"grad_norm": 0.6817585825920105,
"learning_rate": 4.0837226368425714e-07,
"loss": 0.5986,
"mean_token_accuracy": 0.813984677195549,
"num_tokens": 79874634.0,
"step": 1221
},
{
"epoch": 0.8342720600785117,
"grad_norm": 0.7115651369094849,
"learning_rate": 4.0511177116060674e-07,
"loss": 0.6543,
"mean_token_accuracy": 0.7982038259506226,
"num_tokens": 79940170.0,
"step": 1222
},
{
"epoch": 0.8349547704386414,
"grad_norm": 0.7281413674354553,
"learning_rate": 4.018631989825328e-07,
"loss": 0.6427,
"mean_token_accuracy": 0.7999297529459,
"num_tokens": 80005706.0,
"step": 1223
},
{
"epoch": 0.8356374807987711,
"grad_norm": 0.7227478623390198,
"learning_rate": 3.98626565635081e-07,
"loss": 0.5918,
"mean_token_accuracy": 0.8124083578586578,
"num_tokens": 80071242.0,
"step": 1224
},
{
"epoch": 0.8363201911589009,
"grad_norm": 0.6979718804359436,
"learning_rate": 3.954018895353615e-07,
"loss": 0.5896,
"mean_token_accuracy": 0.8178916126489639,
"num_tokens": 80136778.0,
"step": 1225
},
{
"epoch": 0.8370029015190306,
"grad_norm": 0.6697226166725159,
"learning_rate": 3.92189189032445e-07,
"loss": 0.5544,
"mean_token_accuracy": 0.8262005001306534,
"num_tokens": 80202314.0,
"step": 1226
},
{
"epoch": 0.8376856118791602,
"grad_norm": 0.6812226176261902,
"learning_rate": 3.889884824072601e-07,
"loss": 0.568,
"mean_token_accuracy": 0.8224816024303436,
"num_tokens": 80267756.0,
"step": 1227
},
{
"epoch": 0.83836832223929,
"grad_norm": 0.7076123356819153,
"learning_rate": 3.857997878724862e-07,
"loss": 0.593,
"mean_token_accuracy": 0.8150422126054764,
"num_tokens": 80332455.0,
"step": 1228
},
{
"epoch": 0.8390510325994197,
"grad_norm": 0.7348125576972961,
"learning_rate": 3.8262312357245173e-07,
"loss": 0.6193,
"mean_token_accuracy": 0.8065432608127594,
"num_tokens": 80397991.0,
"step": 1229
},
{
"epoch": 0.8397337429595494,
"grad_norm": 0.7341455817222595,
"learning_rate": 3.7945850758303286e-07,
"loss": 0.6064,
"mean_token_accuracy": 0.8103394955396652,
"num_tokens": 80463054.0,
"step": 1230
},
{
"epoch": 0.8404164533196792,
"grad_norm": 0.7043530344963074,
"learning_rate": 3.7630595791154383e-07,
"loss": 0.5764,
"mean_token_accuracy": 0.8203535825014114,
"num_tokens": 80528495.0,
"step": 1231
},
{
"epoch": 0.8410991636798089,
"grad_norm": 0.7355886697769165,
"learning_rate": 3.7316549249664353e-07,
"loss": 0.5983,
"mean_token_accuracy": 0.811448335647583,
"num_tokens": 80593906.0,
"step": 1232
},
{
"epoch": 0.8417818740399385,
"grad_norm": 0.70097815990448,
"learning_rate": 3.7003712920822665e-07,
"loss": 0.6176,
"mean_token_accuracy": 0.8075818717479706,
"num_tokens": 80659442.0,
"step": 1233
},
{
"epoch": 0.8424645844000682,
"grad_norm": 0.7031533718109131,
"learning_rate": 3.6692088584732474e-07,
"loss": 0.5966,
"mean_token_accuracy": 0.8145619481801987,
"num_tokens": 80724978.0,
"step": 1234
},
{
"epoch": 0.843147294760198,
"grad_norm": 0.7055192589759827,
"learning_rate": 3.638167801460041e-07,
"loss": 0.6101,
"mean_token_accuracy": 0.810369685292244,
"num_tokens": 80790332.0,
"step": 1235
},
{
"epoch": 0.8438300051203277,
"grad_norm": 0.7216753363609314,
"learning_rate": 3.607248297672669e-07,
"loss": 0.6273,
"mean_token_accuracy": 0.8048436045646667,
"num_tokens": 80855669.0,
"step": 1236
},
{
"epoch": 0.8445127154804574,
"grad_norm": 0.7097287178039551,
"learning_rate": 3.576450523049457e-07,
"loss": 0.5449,
"mean_token_accuracy": 0.8278195112943649,
"num_tokens": 80921205.0,
"step": 1237
},
{
"epoch": 0.8451954258405872,
"grad_norm": 0.7152010202407837,
"learning_rate": 3.5457746528361035e-07,
"loss": 0.6267,
"mean_token_accuracy": 0.8040536344051361,
"num_tokens": 80986741.0,
"step": 1238
},
{
"epoch": 0.8458781362007168,
"grad_norm": 0.715424120426178,
"learning_rate": 3.5152208615846256e-07,
"loss": 0.6069,
"mean_token_accuracy": 0.8103158622980118,
"num_tokens": 81052277.0,
"step": 1239
},
{
"epoch": 0.8465608465608465,
"grad_norm": 0.7013223171234131,
"learning_rate": 3.4847893231523877e-07,
"loss": 0.5815,
"mean_token_accuracy": 0.8173014968633652,
"num_tokens": 81117665.0,
"step": 1240
},
{
"epoch": 0.8472435569209763,
"grad_norm": 0.7214623689651489,
"learning_rate": 3.454480210701108e-07,
"loss": 0.5866,
"mean_token_accuracy": 0.8163795173168182,
"num_tokens": 81183201.0,
"step": 1241
},
{
"epoch": 0.847926267281106,
"grad_norm": 0.7023662328720093,
"learning_rate": 3.424293696695896e-07,
"loss": 0.6043,
"mean_token_accuracy": 0.8114422112703323,
"num_tokens": 81248226.0,
"step": 1242
},
{
"epoch": 0.8486089776412357,
"grad_norm": 0.6930263042449951,
"learning_rate": 3.3942299529042157e-07,
"loss": 0.6071,
"mean_token_accuracy": 0.8101478517055511,
"num_tokens": 81313762.0,
"step": 1243
},
{
"epoch": 0.8492916880013655,
"grad_norm": 0.7055590152740479,
"learning_rate": 3.3642891503949724e-07,
"loss": 0.5873,
"mean_token_accuracy": 0.8138440847396851,
"num_tokens": 81379298.0,
"step": 1244
},
{
"epoch": 0.8499743983614951,
"grad_norm": 0.7280580997467041,
"learning_rate": 3.334471459537497e-07,
"loss": 0.6059,
"mean_token_accuracy": 0.8118687719106674,
"num_tokens": 81444583.0,
"step": 1245
},
{
"epoch": 0.8506571087216248,
"grad_norm": 0.6961165070533752,
"learning_rate": 3.304777050000604e-07,
"loss": 0.6349,
"mean_token_accuracy": 0.8040469288825989,
"num_tokens": 81510106.0,
"step": 1246
},
{
"epoch": 0.8513398190817546,
"grad_norm": 0.7137936949729919,
"learning_rate": 3.2752060907515813e-07,
"loss": 0.5937,
"mean_token_accuracy": 0.8142594546079636,
"num_tokens": 81575560.0,
"step": 1247
},
{
"epoch": 0.8520225294418843,
"grad_norm": 0.6947458982467651,
"learning_rate": 3.2457587500552946e-07,
"loss": 0.5696,
"mean_token_accuracy": 0.8206253200769424,
"num_tokens": 81640613.0,
"step": 1248
},
{
"epoch": 0.852705239802014,
"grad_norm": 0.7190272808074951,
"learning_rate": 3.216435195473175e-07,
"loss": 0.599,
"mean_token_accuracy": 0.8117821365594864,
"num_tokens": 81706149.0,
"step": 1249
},
{
"epoch": 0.8533879501621437,
"grad_norm": 0.7290658354759216,
"learning_rate": 3.187235593862284e-07,
"loss": 0.6001,
"mean_token_accuracy": 0.8131567686796188,
"num_tokens": 81771685.0,
"step": 1250
},
{
"epoch": 0.8540706605222734,
"grad_norm": 0.7395808696746826,
"learning_rate": 3.15816011137437e-07,
"loss": 0.6373,
"mean_token_accuracy": 0.8025502562522888,
"num_tokens": 81837208.0,
"step": 1251
},
{
"epoch": 0.8547533708824031,
"grad_norm": 0.7159065008163452,
"learning_rate": 3.129208913454931e-07,
"loss": 0.6143,
"mean_token_accuracy": 0.8089412301778793,
"num_tokens": 81902744.0,
"step": 1252
},
{
"epoch": 0.8554360812425329,
"grad_norm": 0.6992605328559875,
"learning_rate": 3.1003821648422277e-07,
"loss": 0.5936,
"mean_token_accuracy": 0.8155394643545151,
"num_tokens": 81968280.0,
"step": 1253
},
{
"epoch": 0.8561187916026626,
"grad_norm": 0.7298917770385742,
"learning_rate": 3.071680029566415e-07,
"loss": 0.5907,
"mean_token_accuracy": 0.8136913478374481,
"num_tokens": 82033816.0,
"step": 1254
},
{
"epoch": 0.8568015019627923,
"grad_norm": 0.7134488224983215,
"learning_rate": 3.043102670948545e-07,
"loss": 0.6234,
"mean_token_accuracy": 0.8073708415031433,
"num_tokens": 82099329.0,
"step": 1255
},
{
"epoch": 0.857484212322922,
"grad_norm": 0.7187513113021851,
"learning_rate": 3.0146502515996796e-07,
"loss": 0.6533,
"mean_token_accuracy": 0.7979899793863297,
"num_tokens": 82164865.0,
"step": 1256
},
{
"epoch": 0.8581669226830517,
"grad_norm": 0.6844106316566467,
"learning_rate": 2.9863229334199413e-07,
"loss": 0.5732,
"mean_token_accuracy": 0.8199816644191742,
"num_tokens": 82230373.0,
"step": 1257
},
{
"epoch": 0.8588496330431814,
"grad_norm": 0.7100004553794861,
"learning_rate": 2.958120877597617e-07,
"loss": 0.6157,
"mean_token_accuracy": 0.8082807064056396,
"num_tokens": 82295451.0,
"step": 1258
},
{
"epoch": 0.8595323434033112,
"grad_norm": 0.7535333633422852,
"learning_rate": 2.930044244608199e-07,
"loss": 0.6065,
"mean_token_accuracy": 0.8120112419128418,
"num_tokens": 82360987.0,
"step": 1259
},
{
"epoch": 0.8602150537634409,
"grad_norm": 0.7360159158706665,
"learning_rate": 2.902093194213526e-07,
"loss": 0.6027,
"mean_token_accuracy": 0.8140120953321457,
"num_tokens": 82426523.0,
"step": 1260
},
{
"epoch": 0.8608977641235706,
"grad_norm": 0.7043842673301697,
"learning_rate": 2.874267885460827e-07,
"loss": 0.6022,
"mean_token_accuracy": 0.812301442027092,
"num_tokens": 82492059.0,
"step": 1261
},
{
"epoch": 0.8615804744837003,
"grad_norm": 0.692662239074707,
"learning_rate": 2.8465684766818406e-07,
"loss": 0.5733,
"mean_token_accuracy": 0.8192416429519653,
"num_tokens": 82556988.0,
"step": 1262
},
{
"epoch": 0.86226318484383,
"grad_norm": 0.6765474081039429,
"learning_rate": 2.8189951254919105e-07,
"loss": 0.5842,
"mean_token_accuracy": 0.8190467804670334,
"num_tokens": 82622241.0,
"step": 1263
},
{
"epoch": 0.8629458952039597,
"grad_norm": 0.7088792324066162,
"learning_rate": 2.791547988789087e-07,
"loss": 0.6155,
"mean_token_accuracy": 0.8090481460094452,
"num_tokens": 82687777.0,
"step": 1264
},
{
"epoch": 0.8636286055640894,
"grad_norm": 0.6913022398948669,
"learning_rate": 2.7642272227532214e-07,
"loss": 0.5895,
"mean_token_accuracy": 0.8141342848539352,
"num_tokens": 82753313.0,
"step": 1265
},
{
"epoch": 0.8643113159242192,
"grad_norm": 0.7497997283935547,
"learning_rate": 2.737032982845114e-07,
"loss": 0.6165,
"mean_token_accuracy": 0.8086052089929581,
"num_tokens": 82818849.0,
"step": 1266
},
{
"epoch": 0.8649940262843488,
"grad_norm": 0.7286543250083923,
"learning_rate": 2.7099654238055886e-07,
"loss": 0.6268,
"mean_token_accuracy": 0.8044813126325607,
"num_tokens": 82884385.0,
"step": 1267
},
{
"epoch": 0.8656767366444785,
"grad_norm": 0.7021951079368591,
"learning_rate": 2.683024699654629e-07,
"loss": 0.5923,
"mean_token_accuracy": 0.8144855797290802,
"num_tokens": 82949921.0,
"step": 1268
},
{
"epoch": 0.8663594470046083,
"grad_norm": 0.7155722975730896,
"learning_rate": 2.6562109636905085e-07,
"loss": 0.6121,
"mean_token_accuracy": 0.8079178929328918,
"num_tokens": 83015457.0,
"step": 1269
},
{
"epoch": 0.867042157364738,
"grad_norm": 0.7293785810470581,
"learning_rate": 2.629524368488906e-07,
"loss": 0.6081,
"mean_token_accuracy": 0.8101783990859985,
"num_tokens": 83080993.0,
"step": 1270
},
{
"epoch": 0.8677248677248677,
"grad_norm": 0.7298372983932495,
"learning_rate": 2.6029650659020467e-07,
"loss": 0.6082,
"mean_token_accuracy": 0.8083854168653488,
"num_tokens": 83146351.0,
"step": 1271
},
{
"epoch": 0.8684075780849975,
"grad_norm": 0.7006279826164246,
"learning_rate": 2.5765332070578296e-07,
"loss": 0.6112,
"mean_token_accuracy": 0.8127480298280716,
"num_tokens": 83211735.0,
"step": 1272
},
{
"epoch": 0.8690902884451271,
"grad_norm": 0.7038744688034058,
"learning_rate": 2.5502289423589844e-07,
"loss": 0.591,
"mean_token_accuracy": 0.8158559948205948,
"num_tokens": 83277014.0,
"step": 1273
},
{
"epoch": 0.8697729988052568,
"grad_norm": 0.7346209287643433,
"learning_rate": 2.52405242148219e-07,
"loss": 0.6247,
"mean_token_accuracy": 0.8049071878194809,
"num_tokens": 83342354.0,
"step": 1274
},
{
"epoch": 0.8704557091653866,
"grad_norm": 0.7262876629829407,
"learning_rate": 2.4980037933772487e-07,
"loss": 0.5764,
"mean_token_accuracy": 0.8189149498939514,
"num_tokens": 83407890.0,
"step": 1275
},
{
"epoch": 0.8711384195255163,
"grad_norm": 0.7252150177955627,
"learning_rate": 2.472083206266215e-07,
"loss": 0.6541,
"mean_token_accuracy": 0.7969246655702591,
"num_tokens": 83473156.0,
"step": 1276
},
{
"epoch": 0.871821129885646,
"grad_norm": 0.7076960206031799,
"learning_rate": 2.4462908076425706e-07,
"loss": 0.5983,
"mean_token_accuracy": 0.8119195997714996,
"num_tokens": 83538692.0,
"step": 1277
},
{
"epoch": 0.8725038402457758,
"grad_norm": 0.741290807723999,
"learning_rate": 2.4206267442703743e-07,
"loss": 0.5495,
"mean_token_accuracy": 0.8270227015018463,
"num_tokens": 83604067.0,
"step": 1278
},
{
"epoch": 0.8731865506059054,
"grad_norm": 0.6800980567932129,
"learning_rate": 2.3950911621834437e-07,
"loss": 0.5726,
"mean_token_accuracy": 0.8190676867961884,
"num_tokens": 83669603.0,
"step": 1279
},
{
"epoch": 0.8738692609660351,
"grad_norm": 0.6879988312721252,
"learning_rate": 2.3696842066844862e-07,
"loss": 0.5828,
"mean_token_accuracy": 0.8156311064958572,
"num_tokens": 83735139.0,
"step": 1280
},
{
"epoch": 0.8745519713261649,
"grad_norm": 0.7209987044334412,
"learning_rate": 2.3444060223443226e-07,
"loss": 0.5755,
"mean_token_accuracy": 0.818487286567688,
"num_tokens": 83800675.0,
"step": 1281
},
{
"epoch": 0.8752346816862946,
"grad_norm": 0.7542129755020142,
"learning_rate": 2.3192567530010313e-07,
"loss": 0.6229,
"mean_token_accuracy": 0.8044636845588684,
"num_tokens": 83866196.0,
"step": 1282
},
{
"epoch": 0.8759173920464243,
"grad_norm": 0.7246004939079285,
"learning_rate": 2.2942365417591288e-07,
"loss": 0.6183,
"mean_token_accuracy": 0.8091397881507874,
"num_tokens": 83931732.0,
"step": 1283
},
{
"epoch": 0.8766001024065541,
"grad_norm": 0.6989543437957764,
"learning_rate": 2.2693455309887702e-07,
"loss": 0.5791,
"mean_token_accuracy": 0.8160740435123444,
"num_tokens": 83997268.0,
"step": 1284
},
{
"epoch": 0.8772828127666837,
"grad_norm": 0.7213975787162781,
"learning_rate": 2.2445838623249478e-07,
"loss": 0.5927,
"mean_token_accuracy": 0.8150552809238434,
"num_tokens": 84062653.0,
"step": 1285
},
{
"epoch": 0.8779655231268134,
"grad_norm": 0.7489351630210876,
"learning_rate": 2.2199516766666373e-07,
"loss": 0.6014,
"mean_token_accuracy": 0.812301442027092,
"num_tokens": 84128189.0,
"step": 1286
},
{
"epoch": 0.8786482334869432,
"grad_norm": 0.7406776547431946,
"learning_rate": 2.1954491141760653e-07,
"loss": 0.5933,
"mean_token_accuracy": 0.8135691583156586,
"num_tokens": 84193725.0,
"step": 1287
},
{
"epoch": 0.8793309438470729,
"grad_norm": 0.7083876729011536,
"learning_rate": 2.1710763142778562e-07,
"loss": 0.5823,
"mean_token_accuracy": 0.8167155385017395,
"num_tokens": 84259261.0,
"step": 1288
},
{
"epoch": 0.8800136542072026,
"grad_norm": 0.7141042947769165,
"learning_rate": 2.1468334156582588e-07,
"loss": 0.6013,
"mean_token_accuracy": 0.8128054738044739,
"num_tokens": 84324797.0,
"step": 1289
},
{
"epoch": 0.8806963645673324,
"grad_norm": 0.6838583946228027,
"learning_rate": 2.122720556264357e-07,
"loss": 0.5893,
"mean_token_accuracy": 0.811652198433876,
"num_tokens": 84390053.0,
"step": 1290
},
{
"epoch": 0.881379074927462,
"grad_norm": 0.7378459572792053,
"learning_rate": 2.0987378733033053e-07,
"loss": 0.6419,
"mean_token_accuracy": 0.8003421276807785,
"num_tokens": 84455589.0,
"step": 1291
},
{
"epoch": 0.8820617852875917,
"grad_norm": 0.7026259899139404,
"learning_rate": 2.074885503241486e-07,
"loss": 0.5948,
"mean_token_accuracy": 0.8153750449419022,
"num_tokens": 84521076.0,
"step": 1292
},
{
"epoch": 0.8827444956477215,
"grad_norm": 0.7418661713600159,
"learning_rate": 2.0511635818038167e-07,
"loss": 0.5942,
"mean_token_accuracy": 0.8132331371307373,
"num_tokens": 84586612.0,
"step": 1293
},
{
"epoch": 0.8834272060078512,
"grad_norm": 0.7378196120262146,
"learning_rate": 2.0275722439729084e-07,
"loss": 0.6204,
"mean_token_accuracy": 0.8063240945339203,
"num_tokens": 84651365.0,
"step": 1294
},
{
"epoch": 0.8841099163679809,
"grad_norm": 0.7377257943153381,
"learning_rate": 2.0041116239883418e-07,
"loss": 0.6248,
"mean_token_accuracy": 0.8058953881263733,
"num_tokens": 84716615.0,
"step": 1295
},
{
"epoch": 0.8847926267281107,
"grad_norm": 0.7164666056632996,
"learning_rate": 1.9807818553458647e-07,
"loss": 0.6007,
"mean_token_accuracy": 0.8099205642938614,
"num_tokens": 84781744.0,
"step": 1296
},
{
"epoch": 0.8854753370882403,
"grad_norm": 0.6761177182197571,
"learning_rate": 1.9575830707966787e-07,
"loss": 0.5384,
"mean_token_accuracy": 0.8291252255439758,
"num_tokens": 84846213.0,
"step": 1297
},
{
"epoch": 0.88615804744837,
"grad_norm": 0.6863158941268921,
"learning_rate": 1.93451540234664e-07,
"loss": 0.5811,
"mean_token_accuracy": 0.8185331076383591,
"num_tokens": 84911749.0,
"step": 1298
},
{
"epoch": 0.8868407578084997,
"grad_norm": 0.7062719464302063,
"learning_rate": 1.9115789812555379e-07,
"loss": 0.5811,
"mean_token_accuracy": 0.8158602118492126,
"num_tokens": 84977285.0,
"step": 1299
},
{
"epoch": 0.8875234681686295,
"grad_norm": 0.7036576867103577,
"learning_rate": 1.8887739380363286e-07,
"loss": 0.5893,
"mean_token_accuracy": 0.8168071806430817,
"num_tokens": 85042821.0,
"step": 1300
},
{
"epoch": 0.8882061785287592,
"grad_norm": 0.6974568963050842,
"learning_rate": 1.8661004024544155e-07,
"loss": 0.584,
"mean_token_accuracy": 0.8170604258775711,
"num_tokens": 85107913.0,
"step": 1301
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.7532427310943604,
"learning_rate": 1.843558503526871e-07,
"loss": 0.6289,
"mean_token_accuracy": 0.803327277302742,
"num_tokens": 85173223.0,
"step": 1302
},
{
"epoch": 0.8895715992490186,
"grad_norm": 0.693387508392334,
"learning_rate": 1.8211483695217607e-07,
"loss": 0.5964,
"mean_token_accuracy": 0.8128963261842728,
"num_tokens": 85238459.0,
"step": 1303
},
{
"epoch": 0.8902543096091483,
"grad_norm": 0.722933828830719,
"learning_rate": 1.7988701279573527e-07,
"loss": 0.6283,
"mean_token_accuracy": 0.8051991760730743,
"num_tokens": 85303995.0,
"step": 1304
},
{
"epoch": 0.890937019969278,
"grad_norm": 0.6916481852531433,
"learning_rate": 1.776723905601438e-07,
"loss": 0.5839,
"mean_token_accuracy": 0.8173570334911346,
"num_tokens": 85369531.0,
"step": 1305
},
{
"epoch": 0.8916197303294078,
"grad_norm": 0.7146339416503906,
"learning_rate": 1.7547098284705715e-07,
"loss": 0.5917,
"mean_token_accuracy": 0.813369631767273,
"num_tokens": 85434804.0,
"step": 1306
},
{
"epoch": 0.8923024406895375,
"grad_norm": 0.7200230956077576,
"learning_rate": 1.732828021829408e-07,
"loss": 0.5957,
"mean_token_accuracy": 0.8136608004570007,
"num_tokens": 85500340.0,
"step": 1307
},
{
"epoch": 0.8929851510496671,
"grad_norm": 0.6903286576271057,
"learning_rate": 1.711078610189912e-07,
"loss": 0.6013,
"mean_token_accuracy": 0.8124754130840302,
"num_tokens": 85565806.0,
"step": 1308
},
{
"epoch": 0.8936678614097969,
"grad_norm": 0.7025361061096191,
"learning_rate": 1.6894617173107336e-07,
"loss": 0.6147,
"mean_token_accuracy": 0.8069481402635574,
"num_tokens": 85630679.0,
"step": 1309
},
{
"epoch": 0.8943505717699266,
"grad_norm": 0.6796531081199646,
"learning_rate": 1.6679774661964382e-07,
"loss": 0.5591,
"mean_token_accuracy": 0.825024425983429,
"num_tokens": 85696215.0,
"step": 1310
},
{
"epoch": 0.8950332821300563,
"grad_norm": 0.7060521841049194,
"learning_rate": 1.6466259790968415e-07,
"loss": 0.6564,
"mean_token_accuracy": 0.7979288846254349,
"num_tokens": 85761751.0,
"step": 1311
},
{
"epoch": 0.8957159924901861,
"grad_norm": 0.7059783935546875,
"learning_rate": 1.6254073775063078e-07,
"loss": 0.571,
"mean_token_accuracy": 0.8190450817346573,
"num_tokens": 85827263.0,
"step": 1312
},
{
"epoch": 0.8963987028503158,
"grad_norm": 0.7131505012512207,
"learning_rate": 1.604321782163043e-07,
"loss": 0.5895,
"mean_token_accuracy": 0.8140321224927902,
"num_tokens": 85891330.0,
"step": 1313
},
{
"epoch": 0.8970814132104454,
"grad_norm": 0.701805591583252,
"learning_rate": 1.5833693130484328e-07,
"loss": 0.5899,
"mean_token_accuracy": 0.8150812536478043,
"num_tokens": 85956866.0,
"step": 1314
},
{
"epoch": 0.8977641235705752,
"grad_norm": 0.7065866589546204,
"learning_rate": 1.5625500893863445e-07,
"loss": 0.6296,
"mean_token_accuracy": 0.8030914068222046,
"num_tokens": 86022402.0,
"step": 1315
},
{
"epoch": 0.8984468339307049,
"grad_norm": 0.7108662724494934,
"learning_rate": 1.541864229642448e-07,
"loss": 0.6028,
"mean_token_accuracy": 0.8082844614982605,
"num_tokens": 86087938.0,
"step": 1316
},
{
"epoch": 0.8991295442908346,
"grad_norm": 0.7196572422981262,
"learning_rate": 1.5213118515235493e-07,
"loss": 0.5941,
"mean_token_accuracy": 0.8129324316978455,
"num_tokens": 86153367.0,
"step": 1317
},
{
"epoch": 0.8998122546509644,
"grad_norm": 0.7143006920814514,
"learning_rate": 1.5008930719769084e-07,
"loss": 0.5996,
"mean_token_accuracy": 0.8113086521625519,
"num_tokens": 86218903.0,
"step": 1318
},
{
"epoch": 0.900494965011094,
"grad_norm": 0.6986265778541565,
"learning_rate": 1.4806080071895906e-07,
"loss": 0.5964,
"mean_token_accuracy": 0.8124541789293289,
"num_tokens": 86284439.0,
"step": 1319
},
{
"epoch": 0.9011776753712237,
"grad_norm": 0.6960071325302124,
"learning_rate": 1.4604567725877926e-07,
"loss": 0.6196,
"mean_token_accuracy": 0.8103516399860382,
"num_tokens": 86349442.0,
"step": 1320
},
{
"epoch": 0.9018603857313535,
"grad_norm": 0.6868788599967957,
"learning_rate": 1.4404394828361896e-07,
"loss": 0.5626,
"mean_token_accuracy": 0.8217864036560059,
"num_tokens": 86414978.0,
"step": 1321
},
{
"epoch": 0.9025430960914832,
"grad_norm": 0.6804670095443726,
"learning_rate": 1.4205562518372851e-07,
"loss": 0.5924,
"mean_token_accuracy": 0.8156616538763046,
"num_tokens": 86480514.0,
"step": 1322
},
{
"epoch": 0.9032258064516129,
"grad_norm": 0.7235873937606812,
"learning_rate": 1.4008071927307605e-07,
"loss": 0.6339,
"mean_token_accuracy": 0.8052700459957123,
"num_tokens": 86545580.0,
"step": 1323
},
{
"epoch": 0.9039085168117427,
"grad_norm": 0.6959832310676575,
"learning_rate": 1.3811924178928292e-07,
"loss": 0.5681,
"mean_token_accuracy": 0.8211601823568344,
"num_tokens": 86611116.0,
"step": 1324
},
{
"epoch": 0.9045912271718723,
"grad_norm": 0.6909657120704651,
"learning_rate": 1.3617120389356002e-07,
"loss": 0.6008,
"mean_token_accuracy": 0.8118375539779663,
"num_tokens": 86676025.0,
"step": 1325
},
{
"epoch": 0.905273937532002,
"grad_norm": 0.7252708077430725,
"learning_rate": 1.3423661667064463e-07,
"loss": 0.6057,
"mean_token_accuracy": 0.8103173822164536,
"num_tokens": 86741105.0,
"step": 1326
},
{
"epoch": 0.9059566478921318,
"grad_norm": 0.6941865086555481,
"learning_rate": 1.323154911287358e-07,
"loss": 0.5833,
"mean_token_accuracy": 0.8163434863090515,
"num_tokens": 86806449.0,
"step": 1327
},
{
"epoch": 0.9066393582522615,
"grad_norm": 0.6878736019134521,
"learning_rate": 1.3040783819943476e-07,
"loss": 0.5342,
"mean_token_accuracy": 0.8290414214134216,
"num_tokens": 86871985.0,
"step": 1328
},
{
"epoch": 0.9073220686123912,
"grad_norm": 0.7258753776550293,
"learning_rate": 1.285136687376784e-07,
"loss": 0.5901,
"mean_token_accuracy": 0.812301442027092,
"num_tokens": 86937521.0,
"step": 1329
},
{
"epoch": 0.9080047789725209,
"grad_norm": 0.7099855542182922,
"learning_rate": 1.26632993521682e-07,
"loss": 0.5998,
"mean_token_accuracy": 0.8137090355157852,
"num_tokens": 87002994.0,
"step": 1330
},
{
"epoch": 0.9086874893326506,
"grad_norm": 0.6997334957122803,
"learning_rate": 1.247658232528748e-07,
"loss": 0.5838,
"mean_token_accuracy": 0.8170560598373413,
"num_tokens": 87068266.0,
"step": 1331
},
{
"epoch": 0.9093701996927803,
"grad_norm": 0.7097135186195374,
"learning_rate": 1.2291216855584016e-07,
"loss": 0.6261,
"mean_token_accuracy": 0.804741770029068,
"num_tokens": 87132857.0,
"step": 1332
},
{
"epoch": 0.91005291005291,
"grad_norm": 0.730303168296814,
"learning_rate": 1.21072039978255e-07,
"loss": 0.5975,
"mean_token_accuracy": 0.8127291053533554,
"num_tokens": 87198393.0,
"step": 1333
},
{
"epoch": 0.9107356204130398,
"grad_norm": 0.7141602635383606,
"learning_rate": 1.1924544799083089e-07,
"loss": 0.6183,
"mean_token_accuracy": 0.8092756867408752,
"num_tokens": 87263900.0,
"step": 1334
},
{
"epoch": 0.9114183307731695,
"grad_norm": 0.7135557532310486,
"learning_rate": 1.1743240298725117e-07,
"loss": 0.6232,
"mean_token_accuracy": 0.8066501766443253,
"num_tokens": 87329436.0,
"step": 1335
},
{
"epoch": 0.9121010411332992,
"grad_norm": 0.6904719471931458,
"learning_rate": 1.1563291528411653e-07,
"loss": 0.5862,
"mean_token_accuracy": 0.8155700117349625,
"num_tokens": 87394972.0,
"step": 1336
},
{
"epoch": 0.9127837514934289,
"grad_norm": 0.6796468496322632,
"learning_rate": 1.1384699512088204e-07,
"loss": 0.5445,
"mean_token_accuracy": 0.8271016478538513,
"num_tokens": 87460508.0,
"step": 1337
},
{
"epoch": 0.9134664618535586,
"grad_norm": 0.7061638236045837,
"learning_rate": 1.1207465265980183e-07,
"loss": 0.6197,
"mean_token_accuracy": 0.8061693012714386,
"num_tokens": 87525967.0,
"step": 1338
},
{
"epoch": 0.9141491722136883,
"grad_norm": 0.7025821208953857,
"learning_rate": 1.1031589798586873e-07,
"loss": 0.6177,
"mean_token_accuracy": 0.8084349930286407,
"num_tokens": 87591261.0,
"step": 1339
},
{
"epoch": 0.9148318825738181,
"grad_norm": 0.7310721278190613,
"learning_rate": 1.0857074110676075e-07,
"loss": 0.6389,
"mean_token_accuracy": 0.8023429960012436,
"num_tokens": 87656797.0,
"step": 1340
},
{
"epoch": 0.9155145929339478,
"grad_norm": 0.7138649821281433,
"learning_rate": 1.0683919195277809e-07,
"loss": 0.5841,
"mean_token_accuracy": 0.8164864331483841,
"num_tokens": 87722333.0,
"step": 1341
},
{
"epoch": 0.9161973032940774,
"grad_norm": 0.7323006391525269,
"learning_rate": 1.0512126037679371e-07,
"loss": 0.6125,
"mean_token_accuracy": 0.8062072396278381,
"num_tokens": 87787869.0,
"step": 1342
},
{
"epoch": 0.9168800136542072,
"grad_norm": 0.7108177542686462,
"learning_rate": 1.0341695615419089e-07,
"loss": 0.5828,
"mean_token_accuracy": 0.8172561675310135,
"num_tokens": 87853258.0,
"step": 1343
},
{
"epoch": 0.9175627240143369,
"grad_norm": 0.6755034923553467,
"learning_rate": 1.0172628898281329e-07,
"loss": 0.5654,
"mean_token_accuracy": 0.8198619186878204,
"num_tokens": 87918794.0,
"step": 1344
},
{
"epoch": 0.9182454343744666,
"grad_norm": 0.7065688967704773,
"learning_rate": 1.0004926848290409e-07,
"loss": 0.5673,
"mean_token_accuracy": 0.8215725719928741,
"num_tokens": 87984330.0,
"step": 1345
},
{
"epoch": 0.9189281447345964,
"grad_norm": 0.7121526598930359,
"learning_rate": 9.838590419705585e-08,
"loss": 0.556,
"mean_token_accuracy": 0.8245990574359894,
"num_tokens": 88049588.0,
"step": 1346
},
{
"epoch": 0.9196108550947261,
"grad_norm": 0.7254539132118225,
"learning_rate": 9.673620559015411e-08,
"loss": 0.638,
"mean_token_accuracy": 0.8036533892154694,
"num_tokens": 88114794.0,
"step": 1347
},
{
"epoch": 0.9202935654548557,
"grad_norm": 0.7252166271209717,
"learning_rate": 9.510018204932386e-08,
"loss": 0.5902,
"mean_token_accuracy": 0.8169325292110443,
"num_tokens": 88179712.0,
"step": 1348
},
{
"epoch": 0.9209762758149855,
"grad_norm": 0.7186472415924072,
"learning_rate": 9.347784288387534e-08,
"loss": 0.6058,
"mean_token_accuracy": 0.812102884054184,
"num_tokens": 88245248.0,
"step": 1349
},
{
"epoch": 0.9216589861751152,
"grad_norm": 0.7491932511329651,
"learning_rate": 9.18691973252539e-08,
"loss": 0.6458,
"mean_token_accuracy": 0.8007392585277557,
"num_tokens": 88310784.0,
"step": 1350
},
{
"epoch": 0.9223416965352449,
"grad_norm": 0.6888880729675293,
"learning_rate": 9.027425452698302e-08,
"loss": 0.5626,
"mean_token_accuracy": 0.8191746026277542,
"num_tokens": 88376320.0,
"step": 1351
},
{
"epoch": 0.9230244068953747,
"grad_norm": 0.739963948726654,
"learning_rate": 8.869302356461634e-08,
"loss": 0.6184,
"mean_token_accuracy": 0.8079484403133392,
"num_tokens": 88441856.0,
"step": 1352
},
{
"epoch": 0.9237071172555044,
"grad_norm": 0.7058925032615662,
"learning_rate": 8.712551343568354e-08,
"loss": 0.6479,
"mean_token_accuracy": 0.8010905534029007,
"num_tokens": 88507392.0,
"step": 1353
},
{
"epoch": 0.924389827615634,
"grad_norm": 0.7416124939918518,
"learning_rate": 8.557173305964034e-08,
"loss": 0.6333,
"mean_token_accuracy": 0.8034361302852631,
"num_tokens": 88572667.0,
"step": 1354
},
{
"epoch": 0.9250725379757638,
"grad_norm": 0.6951413750648499,
"learning_rate": 8.40316912778169e-08,
"loss": 0.5833,
"mean_token_accuracy": 0.8171948045492172,
"num_tokens": 88638102.0,
"step": 1355
},
{
"epoch": 0.9257552483358935,
"grad_norm": 0.7167767882347107,
"learning_rate": 8.2505396853369e-08,
"loss": 0.6298,
"mean_token_accuracy": 0.8051463514566422,
"num_tokens": 88703485.0,
"step": 1356
},
{
"epoch": 0.9264379586960232,
"grad_norm": 0.6876281499862671,
"learning_rate": 8.099285847122496e-08,
"loss": 0.6061,
"mean_token_accuracy": 0.8101783990859985,
"num_tokens": 88769021.0,
"step": 1357
},
{
"epoch": 0.927120669056153,
"grad_norm": 0.7173566222190857,
"learning_rate": 7.949408473804099e-08,
"loss": 0.573,
"mean_token_accuracy": 0.8186247497797012,
"num_tokens": 88834557.0,
"step": 1358
},
{
"epoch": 0.9278033794162827,
"grad_norm": 0.73436439037323,
"learning_rate": 7.800908418214792e-08,
"loss": 0.6062,
"mean_token_accuracy": 0.8138167709112167,
"num_tokens": 88900047.0,
"step": 1359
},
{
"epoch": 0.9284860897764123,
"grad_norm": 0.7449289560317993,
"learning_rate": 7.653786525350482e-08,
"loss": 0.6025,
"mean_token_accuracy": 0.8100714832544327,
"num_tokens": 88965583.0,
"step": 1360
},
{
"epoch": 0.929168800136542,
"grad_norm": 0.709634006023407,
"learning_rate": 7.508043632365042e-08,
"loss": 0.5799,
"mean_token_accuracy": 0.8167552798986435,
"num_tokens": 89030784.0,
"step": 1361
},
{
"epoch": 0.9298515104966718,
"grad_norm": 0.7063040137290955,
"learning_rate": 7.363680568565568e-08,
"loss": 0.6098,
"mean_token_accuracy": 0.809704914689064,
"num_tokens": 89096320.0,
"step": 1362
},
{
"epoch": 0.9305342208568015,
"grad_norm": 0.7060918211936951,
"learning_rate": 7.220698155407602e-08,
"loss": 0.5828,
"mean_token_accuracy": 0.8185582906007767,
"num_tokens": 89161825.0,
"step": 1363
},
{
"epoch": 0.9312169312169312,
"grad_norm": 0.6989777088165283,
"learning_rate": 7.079097206490581e-08,
"loss": 0.6244,
"mean_token_accuracy": 0.8075470179319382,
"num_tokens": 89226535.0,
"step": 1364
},
{
"epoch": 0.931899641577061,
"grad_norm": 0.6618038415908813,
"learning_rate": 6.938878527553067e-08,
"loss": 0.5753,
"mean_token_accuracy": 0.8200604766607285,
"num_tokens": 89292071.0,
"step": 1365
},
{
"epoch": 0.9325823519371906,
"grad_norm": 0.6828494668006897,
"learning_rate": 6.800042916468186e-08,
"loss": 0.5724,
"mean_token_accuracy": 0.8199077546596527,
"num_tokens": 89357607.0,
"step": 1366
},
{
"epoch": 0.9332650622973203,
"grad_norm": 0.7094539999961853,
"learning_rate": 6.662591163239224e-08,
"loss": 0.6011,
"mean_token_accuracy": 0.8109215497970581,
"num_tokens": 89423043.0,
"step": 1367
},
{
"epoch": 0.9339477726574501,
"grad_norm": 0.6817854046821594,
"learning_rate": 6.526524049994904e-08,
"loss": 0.5704,
"mean_token_accuracy": 0.8201979398727417,
"num_tokens": 89488579.0,
"step": 1368
},
{
"epoch": 0.9346304830175798,
"grad_norm": 0.7198058366775513,
"learning_rate": 6.391842350985195e-08,
"loss": 0.6235,
"mean_token_accuracy": 0.8059934079647064,
"num_tokens": 89554115.0,
"step": 1369
},
{
"epoch": 0.9353131933777095,
"grad_norm": 0.7299594283103943,
"learning_rate": 6.258546832576651e-08,
"loss": 0.6566,
"mean_token_accuracy": 0.7970735728740692,
"num_tokens": 89619651.0,
"step": 1370
},
{
"epoch": 0.9359959037378393,
"grad_norm": 0.689508318901062,
"learning_rate": 6.126638253248273e-08,
"loss": 0.5713,
"mean_token_accuracy": 0.8205289244651794,
"num_tokens": 89684778.0,
"step": 1371
},
{
"epoch": 0.9366786140979689,
"grad_norm": 0.6871770024299622,
"learning_rate": 5.996117363587045e-08,
"loss": 0.5902,
"mean_token_accuracy": 0.8142588883638382,
"num_tokens": 89750254.0,
"step": 1372
},
{
"epoch": 0.9373613244580986,
"grad_norm": 0.6985852122306824,
"learning_rate": 5.866984906283707e-08,
"loss": 0.6062,
"mean_token_accuracy": 0.8118279576301575,
"num_tokens": 89815790.0,
"step": 1373
},
{
"epoch": 0.9380440348182284,
"grad_norm": 0.7541195154190063,
"learning_rate": 5.739241616128544e-08,
"loss": 0.618,
"mean_token_accuracy": 0.8069403767585754,
"num_tokens": 89881326.0,
"step": 1374
},
{
"epoch": 0.9387267451783581,
"grad_norm": 0.7442590594291687,
"learning_rate": 5.6128882200071897e-08,
"loss": 0.5782,
"mean_token_accuracy": 0.8197620958089828,
"num_tokens": 89946482.0,
"step": 1375
},
{
"epoch": 0.9394094555384878,
"grad_norm": 0.6985146403312683,
"learning_rate": 5.4879254368964964e-08,
"loss": 0.6205,
"mean_token_accuracy": 0.8052755445241928,
"num_tokens": 90012018.0,
"step": 1376
},
{
"epoch": 0.9400921658986175,
"grad_norm": 0.7548423409461975,
"learning_rate": 5.3643539778605036e-08,
"loss": 0.6038,
"mean_token_accuracy": 0.8120417892932892,
"num_tokens": 90077554.0,
"step": 1377
},
{
"epoch": 0.9407748762587472,
"grad_norm": 0.6929084658622742,
"learning_rate": 5.2421745460461416e-08,
"loss": 0.6033,
"mean_token_accuracy": 0.8120417892932892,
"num_tokens": 90143090.0,
"step": 1378
},
{
"epoch": 0.9414575866188769,
"grad_norm": 0.7029266357421875,
"learning_rate": 5.121387836679676e-08,
"loss": 0.6018,
"mean_token_accuracy": 0.8125683218240738,
"num_tokens": 90208547.0,
"step": 1379
},
{
"epoch": 0.9421402969790067,
"grad_norm": 0.7251154780387878,
"learning_rate": 5.001994537062266e-08,
"loss": 0.6186,
"mean_token_accuracy": 0.8107893466949463,
"num_tokens": 90274083.0,
"step": 1380
},
{
"epoch": 0.9428230073391364,
"grad_norm": 0.7269313931465149,
"learning_rate": 4.8839953265664705e-08,
"loss": 0.6383,
"mean_token_accuracy": 0.8016709536314011,
"num_tokens": 90339619.0,
"step": 1381
},
{
"epoch": 0.943505717699266,
"grad_norm": 0.7003495693206787,
"learning_rate": 4.7673908766319996e-08,
"loss": 0.5779,
"mean_token_accuracy": 0.8185178339481354,
"num_tokens": 90405155.0,
"step": 1382
},
{
"epoch": 0.9441884280593958,
"grad_norm": 0.7337906956672668,
"learning_rate": 4.652181850762327e-08,
"loss": 0.6347,
"mean_token_accuracy": 0.8032441437244415,
"num_tokens": 90470691.0,
"step": 1383
},
{
"epoch": 0.9448711384195255,
"grad_norm": 0.7205424308776855,
"learning_rate": 4.5383689045204184e-08,
"loss": 0.595,
"mean_token_accuracy": 0.8148368746042252,
"num_tokens": 90536227.0,
"step": 1384
},
{
"epoch": 0.9455538487796552,
"grad_norm": 0.7154881954193115,
"learning_rate": 4.425952685525453e-08,
"loss": 0.5951,
"mean_token_accuracy": 0.8150924146175385,
"num_tokens": 90600758.0,
"step": 1385
},
{
"epoch": 0.946236559139785,
"grad_norm": 0.7424868941307068,
"learning_rate": 4.3149338334488864e-08,
"loss": 0.617,
"mean_token_accuracy": 0.8103158473968506,
"num_tokens": 90666294.0,
"step": 1386
},
{
"epoch": 0.9469192694999147,
"grad_norm": 0.7165055274963379,
"learning_rate": 4.2053129800108114e-08,
"loss": 0.5895,
"mean_token_accuracy": 0.8167934268712997,
"num_tokens": 90731240.0,
"step": 1387
},
{
"epoch": 0.9476019798600444,
"grad_norm": 0.7434617877006531,
"learning_rate": 4.0970907489764625e-08,
"loss": 0.6281,
"mean_token_accuracy": 0.8033971935510635,
"num_tokens": 90796695.0,
"step": 1388
},
{
"epoch": 0.9482846902201741,
"grad_norm": 0.7286208868026733,
"learning_rate": 3.990267756152688e-08,
"loss": 0.6078,
"mean_token_accuracy": 0.8082691878080368,
"num_tokens": 90862231.0,
"step": 1389
},
{
"epoch": 0.9489674005803038,
"grad_norm": 0.7145051956176758,
"learning_rate": 3.8848446093842365e-08,
"loss": 0.6077,
"mean_token_accuracy": 0.8082844614982605,
"num_tokens": 90927767.0,
"step": 1390
},
{
"epoch": 0.9496501109404335,
"grad_norm": 0.7143110036849976,
"learning_rate": 3.780821908550614e-08,
"loss": 0.6146,
"mean_token_accuracy": 0.8080993145704269,
"num_tokens": 90993192.0,
"step": 1391
},
{
"epoch": 0.9503328213005633,
"grad_norm": 0.7215762138366699,
"learning_rate": 3.6782002455623686e-08,
"loss": 0.5857,
"mean_token_accuracy": 0.8155962228775024,
"num_tokens": 91058644.0,
"step": 1392
},
{
"epoch": 0.951015531660693,
"grad_norm": 0.7443726062774658,
"learning_rate": 3.5769802043579546e-08,
"loss": 0.6452,
"mean_token_accuracy": 0.7994347214698792,
"num_tokens": 91123763.0,
"step": 1393
},
{
"epoch": 0.9516982420208226,
"grad_norm": 0.71949702501297,
"learning_rate": 3.477162360900177e-08,
"loss": 0.601,
"mean_token_accuracy": 0.8104380518198013,
"num_tokens": 91189299.0,
"step": 1394
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.7069823741912842,
"learning_rate": 3.3787472831732225e-08,
"loss": 0.5817,
"mean_token_accuracy": 0.8176014125347137,
"num_tokens": 91254835.0,
"step": 1395
},
{
"epoch": 0.9530636627410821,
"grad_norm": 0.7261773943901062,
"learning_rate": 3.281735531179053e-08,
"loss": 0.6213,
"mean_token_accuracy": 0.8069199919700623,
"num_tokens": 91320310.0,
"step": 1396
},
{
"epoch": 0.9537463731012118,
"grad_norm": 0.6818092465400696,
"learning_rate": 3.186127656934629e-08,
"loss": 0.5901,
"mean_token_accuracy": 0.8161265105009079,
"num_tokens": 91385722.0,
"step": 1397
},
{
"epoch": 0.9544290834613415,
"grad_norm": 0.7226613759994507,
"learning_rate": 3.0919242044683554e-08,
"loss": 0.6151,
"mean_token_accuracy": 0.8083223253488541,
"num_tokens": 91451157.0,
"step": 1398
},
{
"epoch": 0.9551117938214713,
"grad_norm": 0.7244173884391785,
"learning_rate": 2.999125709817363e-08,
"loss": 0.6046,
"mean_token_accuracy": 0.8112988919019699,
"num_tokens": 91516667.0,
"step": 1399
},
{
"epoch": 0.955794504181601,
"grad_norm": 0.722210705280304,
"learning_rate": 2.9077327010241242e-08,
"loss": 0.6025,
"mean_token_accuracy": 0.8122861683368683,
"num_tokens": 91582203.0,
"step": 1400
},
{
"epoch": 0.9564772145417306,
"grad_norm": 0.7271984815597534,
"learning_rate": 2.817745698133728e-08,
"loss": 0.6074,
"mean_token_accuracy": 0.80910924077034,
"num_tokens": 91647739.0,
"step": 1401
},
{
"epoch": 0.9571599249018604,
"grad_norm": 0.7084903120994568,
"learning_rate": 2.7291652131908043e-08,
"loss": 0.6065,
"mean_token_accuracy": 0.808757945895195,
"num_tokens": 91713275.0,
"step": 1402
},
{
"epoch": 0.9578426352619901,
"grad_norm": 0.7087424993515015,
"learning_rate": 2.6419917502364667e-08,
"loss": 0.5937,
"mean_token_accuracy": 0.8132331371307373,
"num_tokens": 91778811.0,
"step": 1403
},
{
"epoch": 0.9585253456221198,
"grad_norm": 0.7017878293991089,
"learning_rate": 2.5562258053057343e-08,
"loss": 0.61,
"mean_token_accuracy": 0.8106268793344498,
"num_tokens": 91844236.0,
"step": 1404
},
{
"epoch": 0.9592080559822496,
"grad_norm": 0.7056769132614136,
"learning_rate": 2.4718678664245323e-08,
"loss": 0.5793,
"mean_token_accuracy": 0.8190218657255173,
"num_tokens": 91909772.0,
"step": 1405
},
{
"epoch": 0.9598907663423792,
"grad_norm": 0.6610713005065918,
"learning_rate": 2.3889184136068334e-08,
"loss": 0.5798,
"mean_token_accuracy": 0.8191746026277542,
"num_tokens": 91975308.0,
"step": 1406
},
{
"epoch": 0.9605734767025089,
"grad_norm": 0.7304586172103882,
"learning_rate": 2.3073779188521606e-08,
"loss": 0.6338,
"mean_token_accuracy": 0.8037023395299911,
"num_tokens": 92040844.0,
"step": 1407
},
{
"epoch": 0.9612561870626387,
"grad_norm": 0.7191253304481506,
"learning_rate": 2.2272468461427276e-08,
"loss": 0.6016,
"mean_token_accuracy": 0.8119097203016281,
"num_tokens": 92106227.0,
"step": 1408
},
{
"epoch": 0.9619388974227684,
"grad_norm": 0.7064103484153748,
"learning_rate": 2.1485256514408025e-08,
"loss": 0.5488,
"mean_token_accuracy": 0.8251618891954422,
"num_tokens": 92171763.0,
"step": 1409
},
{
"epoch": 0.9626216077828981,
"grad_norm": 0.7105903029441833,
"learning_rate": 2.071214782686265e-08,
"loss": 0.5767,
"mean_token_accuracy": 0.81931933760643,
"num_tokens": 92236780.0,
"step": 1410
},
{
"epoch": 0.9633043181430279,
"grad_norm": 0.71761554479599,
"learning_rate": 1.9953146797938306e-08,
"loss": 0.6162,
"mean_token_accuracy": 0.8084098994731903,
"num_tokens": 92302202.0,
"step": 1411
},
{
"epoch": 0.9639870285031575,
"grad_norm": 0.7019249200820923,
"learning_rate": 1.9208257746507476e-08,
"loss": 0.5907,
"mean_token_accuracy": 0.8163642436265945,
"num_tokens": 92367738.0,
"step": 1412
},
{
"epoch": 0.9646697388632872,
"grad_norm": 0.7232638597488403,
"learning_rate": 1.847748491114215e-08,
"loss": 0.6128,
"mean_token_accuracy": 0.8088495880365372,
"num_tokens": 92433274.0,
"step": 1413
},
{
"epoch": 0.965352449223417,
"grad_norm": 0.7209507822990417,
"learning_rate": 1.7760832450090526e-08,
"loss": 0.6016,
"mean_token_accuracy": 0.8138899058103561,
"num_tokens": 92498810.0,
"step": 1414
},
{
"epoch": 0.9660351595835467,
"grad_norm": 0.6921422481536865,
"learning_rate": 1.705830444125256e-08,
"loss": 0.6244,
"mean_token_accuracy": 0.8077894598245621,
"num_tokens": 92564344.0,
"step": 1415
},
{
"epoch": 0.9667178699436764,
"grad_norm": 0.7221968770027161,
"learning_rate": 1.6369904882157507e-08,
"loss": 0.6278,
"mean_token_accuracy": 0.804711103439331,
"num_tokens": 92629875.0,
"step": 1416
},
{
"epoch": 0.9674005803038062,
"grad_norm": 0.6897109150886536,
"learning_rate": 1.569563768994031e-08,
"loss": 0.5864,
"mean_token_accuracy": 0.8165169805288315,
"num_tokens": 92695411.0,
"step": 1417
},
{
"epoch": 0.9680832906639358,
"grad_norm": 0.7170175909996033,
"learning_rate": 1.5035506701320812e-08,
"loss": 0.5601,
"mean_token_accuracy": 0.8227028250694275,
"num_tokens": 92760947.0,
"step": 1418
},
{
"epoch": 0.9687660010240655,
"grad_norm": 0.6924940943717957,
"learning_rate": 1.4389515672579568e-08,
"loss": 0.5804,
"mean_token_accuracy": 0.8154478222131729,
"num_tokens": 92826483.0,
"step": 1419
},
{
"epoch": 0.9694487113841953,
"grad_norm": 0.6951374411582947,
"learning_rate": 1.3757668279539282e-08,
"loss": 0.5984,
"mean_token_accuracy": 0.811620831489563,
"num_tokens": 92892016.0,
"step": 1420
},
{
"epoch": 0.970131421744325,
"grad_norm": 0.7141850590705872,
"learning_rate": 1.313996811754148e-08,
"loss": 0.5796,
"mean_token_accuracy": 0.8167347013950348,
"num_tokens": 92957477.0,
"step": 1421
},
{
"epoch": 0.9708141321044547,
"grad_norm": 0.7237462401390076,
"learning_rate": 1.2536418701427078e-08,
"loss": 0.6088,
"mean_token_accuracy": 0.8100256621837616,
"num_tokens": 93023013.0,
"step": 1422
},
{
"epoch": 0.9714968424645845,
"grad_norm": 0.7390242218971252,
"learning_rate": 1.1947023465517238e-08,
"loss": 0.6607,
"mean_token_accuracy": 0.7965389937162399,
"num_tokens": 93088549.0,
"step": 1423
},
{
"epoch": 0.9721795528247141,
"grad_norm": 0.693906307220459,
"learning_rate": 1.1371785763591714e-08,
"loss": 0.5722,
"mean_token_accuracy": 0.8215420246124268,
"num_tokens": 93154085.0,
"step": 1424
},
{
"epoch": 0.9728622631848438,
"grad_norm": 0.7362616062164307,
"learning_rate": 1.0810708868871645e-08,
"loss": 0.6348,
"mean_token_accuracy": 0.802773505449295,
"num_tokens": 93219447.0,
"step": 1425
},
{
"epoch": 0.9735449735449735,
"grad_norm": 0.6923553347587585,
"learning_rate": 1.0263795974000401e-08,
"loss": 0.5766,
"mean_token_accuracy": 0.8195104598999023,
"num_tokens": 93284829.0,
"step": 1426
},
{
"epoch": 0.9742276839051033,
"grad_norm": 0.7165243625640869,
"learning_rate": 9.731050191024716e-09,
"loss": 0.6419,
"mean_token_accuracy": 0.8003726899623871,
"num_tokens": 93350365.0,
"step": 1427
},
{
"epoch": 0.974910394265233,
"grad_norm": 0.6946384906768799,
"learning_rate": 9.212474551378025e-09,
"loss": 0.5609,
"mean_token_accuracy": 0.8229713141918182,
"num_tokens": 93415789.0,
"step": 1428
},
{
"epoch": 0.9755931046253626,
"grad_norm": 0.7266075611114502,
"learning_rate": 8.708072005862433e-09,
"loss": 0.6061,
"mean_token_accuracy": 0.8118890523910522,
"num_tokens": 93481325.0,
"step": 1429
},
{
"epoch": 0.9762758149854924,
"grad_norm": 0.7116613984107971,
"learning_rate": 8.217845424632332e-09,
"loss": 0.6266,
"mean_token_accuracy": 0.8050356209278107,
"num_tokens": 93546853.0,
"step": 1430
},
{
"epoch": 0.9769585253456221,
"grad_norm": 0.7236589789390564,
"learning_rate": 7.741797597178024e-09,
"loss": 0.5874,
"mean_token_accuracy": 0.8152187168598175,
"num_tokens": 93612389.0,
"step": 1431
},
{
"epoch": 0.9776412357057518,
"grad_norm": 0.7012879252433777,
"learning_rate": 7.279931232309911e-09,
"loss": 0.5846,
"mean_token_accuracy": 0.8153409063816071,
"num_tokens": 93677925.0,
"step": 1432
},
{
"epoch": 0.9783239460658816,
"grad_norm": 0.7387993335723877,
"learning_rate": 6.832248958142107e-09,
"loss": 0.5972,
"mean_token_accuracy": 0.811590164899826,
"num_tokens": 93742181.0,
"step": 1433
},
{
"epoch": 0.9790066564260113,
"grad_norm": 0.8164238929748535,
"learning_rate": 6.398753322079676e-09,
"loss": 0.6047,
"mean_token_accuracy": 0.8127580732107162,
"num_tokens": 93807621.0,
"step": 1434
},
{
"epoch": 0.9796893667861409,
"grad_norm": 0.7341460585594177,
"learning_rate": 5.979446790801979e-09,
"loss": 0.657,
"mean_token_accuracy": 0.7979288995265961,
"num_tokens": 93873157.0,
"step": 1435
},
{
"epoch": 0.9803720771462707,
"grad_norm": 0.728808581829071,
"learning_rate": 5.574331750249074e-09,
"loss": 0.6026,
"mean_token_accuracy": 0.8145161271095276,
"num_tokens": 93938693.0,
"step": 1436
},
{
"epoch": 0.9810547875064004,
"grad_norm": 0.7273023724555969,
"learning_rate": 5.183410505609498e-09,
"loss": 0.6069,
"mean_token_accuracy": 0.8126724660396576,
"num_tokens": 94003911.0,
"step": 1437
},
{
"epoch": 0.9817374978665301,
"grad_norm": 0.7281927466392517,
"learning_rate": 4.806685281305568e-09,
"loss": 0.5996,
"mean_token_accuracy": 0.8104685992002487,
"num_tokens": 94069447.0,
"step": 1438
},
{
"epoch": 0.9824202082266599,
"grad_norm": 0.7687538862228394,
"learning_rate": 4.444158220981154e-09,
"loss": 0.6093,
"mean_token_accuracy": 0.8106977045536041,
"num_tokens": 94134983.0,
"step": 1439
},
{
"epoch": 0.9831029185867896,
"grad_norm": 0.7035177946090698,
"learning_rate": 4.095831387490312e-09,
"loss": 0.6002,
"mean_token_accuracy": 0.810405820608139,
"num_tokens": 94200496.0,
"step": 1440
},
{
"epoch": 0.9837856289469192,
"grad_norm": 0.7233470678329468,
"learning_rate": 3.761706762884232e-09,
"loss": 0.6151,
"mean_token_accuracy": 0.8110263049602509,
"num_tokens": 94265839.0,
"step": 1441
},
{
"epoch": 0.984468339307049,
"grad_norm": 0.7216858267784119,
"learning_rate": 3.4417862484006914e-09,
"loss": 0.6047,
"mean_token_accuracy": 0.8115992546081543,
"num_tokens": 94331298.0,
"step": 1442
},
{
"epoch": 0.9851510496671787,
"grad_norm": 0.7692837715148926,
"learning_rate": 3.136071664453788e-09,
"loss": 0.607,
"mean_token_accuracy": 0.8103876262903214,
"num_tokens": 94396631.0,
"step": 1443
},
{
"epoch": 0.9858337600273084,
"grad_norm": 0.7010142803192139,
"learning_rate": 2.8445647506220032e-09,
"loss": 0.6288,
"mean_token_accuracy": 0.8041263669729233,
"num_tokens": 94462025.0,
"step": 1444
},
{
"epoch": 0.9865164703874382,
"grad_norm": 0.7143568396568298,
"learning_rate": 2.5672671656401526e-09,
"loss": 0.5985,
"mean_token_accuracy": 0.814523309469223,
"num_tokens": 94527487.0,
"step": 1445
},
{
"epoch": 0.9871991807475679,
"grad_norm": 0.6615064144134521,
"learning_rate": 2.3041804873882857e-09,
"loss": 0.5596,
"mean_token_accuracy": 0.8252646028995514,
"num_tokens": 94592979.0,
"step": 1446
},
{
"epoch": 0.9878818911076975,
"grad_norm": 0.7245396971702576,
"learning_rate": 2.0553062128839117e-09,
"loss": 0.5973,
"mean_token_accuracy": 0.8139510005712509,
"num_tokens": 94658515.0,
"step": 1447
},
{
"epoch": 0.9885646014678273,
"grad_norm": 0.7064764499664307,
"learning_rate": 1.8206457582728432e-09,
"loss": 0.6182,
"mean_token_accuracy": 0.8098118305206299,
"num_tokens": 94724051.0,
"step": 1448
},
{
"epoch": 0.989247311827957,
"grad_norm": 0.7237589955329895,
"learning_rate": 1.600200458821699e-09,
"loss": 0.619,
"mean_token_accuracy": 0.8067723661661148,
"num_tokens": 94789587.0,
"step": 1449
},
{
"epoch": 0.9899300221880867,
"grad_norm": 0.7340627312660217,
"learning_rate": 1.3939715689093025e-09,
"loss": 0.5694,
"mean_token_accuracy": 0.8215572983026505,
"num_tokens": 94855123.0,
"step": 1450
},
{
"epoch": 0.9906127325482165,
"grad_norm": 0.7079196572303772,
"learning_rate": 1.201960262020574e-09,
"loss": 0.5989,
"mean_token_accuracy": 0.815142348408699,
"num_tokens": 94920659.0,
"step": 1451
},
{
"epoch": 0.9912954429083461,
"grad_norm": 0.7087104916572571,
"learning_rate": 1.0241676307398696e-09,
"loss": 0.559,
"mean_token_accuracy": 0.8240730315446854,
"num_tokens": 94985980.0,
"step": 1452
},
{
"epoch": 0.9919781532684758,
"grad_norm": 0.7006968855857849,
"learning_rate": 8.605946867432103e-10,
"loss": 0.6028,
"mean_token_accuracy": 0.8109879046678543,
"num_tokens": 95051516.0,
"step": 1453
},
{
"epoch": 0.9926608636286056,
"grad_norm": 0.7328653931617737,
"learning_rate": 7.112423607946728e-10,
"loss": 0.6228,
"mean_token_accuracy": 0.8052449971437454,
"num_tokens": 95117052.0,
"step": 1454
},
{
"epoch": 0.9933435739887353,
"grad_norm": 0.7530898451805115,
"learning_rate": 5.76111502739729e-10,
"loss": 0.631,
"mean_token_accuracy": 0.8047104179859161,
"num_tokens": 95182588.0,
"step": 1455
},
{
"epoch": 0.994026284348865,
"grad_norm": 0.7755634784698486,
"learning_rate": 4.552028815008047e-10,
"loss": 0.647,
"mean_token_accuracy": 0.7990311533212662,
"num_tokens": 95247829.0,
"step": 1456
},
{
"epoch": 0.9947089947089947,
"grad_norm": 0.6960027813911438,
"learning_rate": 3.4851718507311617e-10,
"loss": 0.5842,
"mean_token_accuracy": 0.8175555914640427,
"num_tokens": 95313365.0,
"step": 1457
},
{
"epoch": 0.9953917050691244,
"grad_norm": 0.6915958523750305,
"learning_rate": 2.5605502051967435e-10,
"loss": 0.5701,
"mean_token_accuracy": 0.8211907297372818,
"num_tokens": 95378901.0,
"step": 1458
},
{
"epoch": 0.9960744154292541,
"grad_norm": 0.7065199017524719,
"learning_rate": 1.7781691396961952e-10,
"loss": 0.6341,
"mean_token_accuracy": 0.8043896704912186,
"num_tokens": 95444437.0,
"step": 1459
},
{
"epoch": 0.9967571257893838,
"grad_norm": 0.6968325972557068,
"learning_rate": 1.1380331061405791e-10,
"loss": 0.5858,
"mean_token_accuracy": 0.8169599175453186,
"num_tokens": 95509973.0,
"step": 1460
},
{
"epoch": 0.9974398361495136,
"grad_norm": 0.697854220867157,
"learning_rate": 6.401457470300853e-11,
"loss": 0.5955,
"mean_token_accuracy": 0.8130066990852356,
"num_tokens": 95575408.0,
"step": 1461
},
{
"epoch": 0.9981225465096433,
"grad_norm": 0.7216105461120605,
"learning_rate": 2.8450989545125706e-11,
"loss": 0.6084,
"mean_token_accuracy": 0.8100103884935379,
"num_tokens": 95640944.0,
"step": 1462
},
{
"epoch": 0.998805256869773,
"grad_norm": 0.6907703876495361,
"learning_rate": 7.112757504645907e-12,
"loss": 0.5858,
"mean_token_accuracy": 0.8143328428268433,
"num_tokens": 95706480.0,
"step": 1463
},
{
"epoch": 0.9994879672299027,
"grad_norm": 0.7113831043243408,
"learning_rate": 0.0,
"loss": 0.5998,
"mean_token_accuracy": 0.811400294303894,
"num_tokens": 95772016.0,
"step": 1464
},
{
"epoch": 0.9994879672299027,
"step": 1464,
"total_flos": 83397779128320.0,
"train_loss": 0.6384449617458823,
"train_runtime": 24106.2969,
"train_samples_per_second": 3.888,
"train_steps_per_second": 0.061
}
],
"logging_steps": 1,
"max_steps": 1464,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 83397779128320.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}