412k_raw_e1 / trainer_state.json
vuhaian's picture
Add files using upload-large-folder tool
33b6c80 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1611,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006207324643078833,
"grad_norm": 4.0,
"learning_rate": 0.0,
"loss": 1.179443359375,
"num_tokens": 79611.0,
"step": 1
},
{
"epoch": 0.0012414649286157666,
"grad_norm": 4.59375,
"learning_rate": 6.122448979591837e-07,
"loss": 1.44921875,
"num_tokens": 151724.0,
"step": 2
},
{
"epoch": 0.00186219739292365,
"grad_norm": 4.5,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.38330078125,
"num_tokens": 236392.0,
"step": 3
},
{
"epoch": 0.002482929857231533,
"grad_norm": 4.375,
"learning_rate": 1.836734693877551e-06,
"loss": 1.268798828125,
"num_tokens": 318898.0,
"step": 4
},
{
"epoch": 0.0031036623215394167,
"grad_norm": 4.875,
"learning_rate": 2.4489795918367347e-06,
"loss": 1.47998046875,
"num_tokens": 397311.0,
"step": 5
},
{
"epoch": 0.0037243947858473,
"grad_norm": 4.0625,
"learning_rate": 3.0612244897959185e-06,
"loss": 1.22265625,
"num_tokens": 475395.0,
"step": 6
},
{
"epoch": 0.004345127250155183,
"grad_norm": 4.25,
"learning_rate": 3.673469387755102e-06,
"loss": 1.25286865234375,
"num_tokens": 552206.0,
"step": 7
},
{
"epoch": 0.004965859714463066,
"grad_norm": 3.78125,
"learning_rate": 4.2857142857142855e-06,
"loss": 1.16796875,
"num_tokens": 627230.0,
"step": 8
},
{
"epoch": 0.00558659217877095,
"grad_norm": 3.390625,
"learning_rate": 4.897959183673469e-06,
"loss": 1.222900390625,
"num_tokens": 707653.0,
"step": 9
},
{
"epoch": 0.006207324643078833,
"grad_norm": 3.28125,
"learning_rate": 5.510204081632653e-06,
"loss": 1.28173828125,
"num_tokens": 791267.0,
"step": 10
},
{
"epoch": 0.006828057107386716,
"grad_norm": 2.734375,
"learning_rate": 6.122448979591837e-06,
"loss": 1.248046875,
"num_tokens": 867206.0,
"step": 11
},
{
"epoch": 0.0074487895716946,
"grad_norm": 2.265625,
"learning_rate": 6.734693877551021e-06,
"loss": 1.0118408203125,
"num_tokens": 938194.0,
"step": 12
},
{
"epoch": 0.008069522036002483,
"grad_norm": 2.078125,
"learning_rate": 7.346938775510204e-06,
"loss": 1.130615234375,
"num_tokens": 1015387.0,
"step": 13
},
{
"epoch": 0.008690254500310366,
"grad_norm": 1.8359375,
"learning_rate": 7.959183673469388e-06,
"loss": 1.085205078125,
"num_tokens": 1100738.0,
"step": 14
},
{
"epoch": 0.00931098696461825,
"grad_norm": 1.7421875,
"learning_rate": 8.571428571428571e-06,
"loss": 1.151123046875,
"num_tokens": 1179837.0,
"step": 15
},
{
"epoch": 0.009931719428926133,
"grad_norm": 1.5859375,
"learning_rate": 9.183673469387756e-06,
"loss": 1.054931640625,
"num_tokens": 1267114.0,
"step": 16
},
{
"epoch": 0.010552451893234015,
"grad_norm": 1.5390625,
"learning_rate": 9.795918367346939e-06,
"loss": 0.9664306640625,
"num_tokens": 1343079.0,
"step": 17
},
{
"epoch": 0.0111731843575419,
"grad_norm": 1.3984375,
"learning_rate": 1.0408163265306123e-05,
"loss": 1.0679931640625,
"num_tokens": 1432157.0,
"step": 18
},
{
"epoch": 0.011793916821849782,
"grad_norm": 1.3671875,
"learning_rate": 1.1020408163265306e-05,
"loss": 1.0408935546875,
"num_tokens": 1505923.0,
"step": 19
},
{
"epoch": 0.012414649286157667,
"grad_norm": 1.2578125,
"learning_rate": 1.163265306122449e-05,
"loss": 1.0843505859375,
"num_tokens": 1588504.0,
"step": 20
},
{
"epoch": 0.01303538175046555,
"grad_norm": 1.078125,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.90338134765625,
"num_tokens": 1668359.0,
"step": 21
},
{
"epoch": 0.013656114214773432,
"grad_norm": 1.0,
"learning_rate": 1.2857142857142857e-05,
"loss": 0.9005126953125,
"num_tokens": 1745555.0,
"step": 22
},
{
"epoch": 0.014276846679081317,
"grad_norm": 0.9453125,
"learning_rate": 1.3469387755102042e-05,
"loss": 0.880615234375,
"num_tokens": 1826162.0,
"step": 23
},
{
"epoch": 0.0148975791433892,
"grad_norm": 1.0,
"learning_rate": 1.4081632653061225e-05,
"loss": 0.9219970703125,
"num_tokens": 1907592.0,
"step": 24
},
{
"epoch": 0.015518311607697082,
"grad_norm": 0.98046875,
"learning_rate": 1.4693877551020408e-05,
"loss": 0.845458984375,
"num_tokens": 1984816.0,
"step": 25
},
{
"epoch": 0.016139044072004966,
"grad_norm": 0.98828125,
"learning_rate": 1.530612244897959e-05,
"loss": 0.8896484375,
"num_tokens": 2065689.0,
"step": 26
},
{
"epoch": 0.01675977653631285,
"grad_norm": 0.93359375,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.86383056640625,
"num_tokens": 2145214.0,
"step": 27
},
{
"epoch": 0.01738050900062073,
"grad_norm": 0.92578125,
"learning_rate": 1.6530612244897957e-05,
"loss": 0.79644775390625,
"num_tokens": 2227084.0,
"step": 28
},
{
"epoch": 0.018001241464928614,
"grad_norm": 0.87109375,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.81396484375,
"num_tokens": 2309170.0,
"step": 29
},
{
"epoch": 0.0186219739292365,
"grad_norm": 0.890625,
"learning_rate": 1.7755102040816327e-05,
"loss": 0.732086181640625,
"num_tokens": 2386568.0,
"step": 30
},
{
"epoch": 0.019242706393544383,
"grad_norm": 0.85546875,
"learning_rate": 1.836734693877551e-05,
"loss": 0.6558837890625,
"num_tokens": 2465738.0,
"step": 31
},
{
"epoch": 0.019863438857852266,
"grad_norm": 0.9375,
"learning_rate": 1.8979591836734696e-05,
"loss": 0.671142578125,
"num_tokens": 2544797.0,
"step": 32
},
{
"epoch": 0.020484171322160148,
"grad_norm": 0.82421875,
"learning_rate": 1.9591836734693877e-05,
"loss": 0.74334716796875,
"num_tokens": 2629296.0,
"step": 33
},
{
"epoch": 0.02110490378646803,
"grad_norm": 0.79296875,
"learning_rate": 2.0204081632653062e-05,
"loss": 0.687713623046875,
"num_tokens": 2717226.0,
"step": 34
},
{
"epoch": 0.021725636250775917,
"grad_norm": 0.90625,
"learning_rate": 2.0816326530612247e-05,
"loss": 0.6878662109375,
"num_tokens": 2796213.0,
"step": 35
},
{
"epoch": 0.0223463687150838,
"grad_norm": 0.8359375,
"learning_rate": 2.1428571428571428e-05,
"loss": 0.61114501953125,
"num_tokens": 2881296.0,
"step": 36
},
{
"epoch": 0.022967101179391682,
"grad_norm": 0.79296875,
"learning_rate": 2.2040816326530613e-05,
"loss": 0.55914306640625,
"num_tokens": 2960034.0,
"step": 37
},
{
"epoch": 0.023587833643699565,
"grad_norm": 0.77734375,
"learning_rate": 2.2653061224489794e-05,
"loss": 0.5269775390625,
"num_tokens": 3038779.0,
"step": 38
},
{
"epoch": 0.024208566108007448,
"grad_norm": 0.73828125,
"learning_rate": 2.326530612244898e-05,
"loss": 0.63275146484375,
"num_tokens": 3115602.0,
"step": 39
},
{
"epoch": 0.024829298572315334,
"grad_norm": 0.828125,
"learning_rate": 2.3877551020408164e-05,
"loss": 0.6881103515625,
"num_tokens": 3189503.0,
"step": 40
},
{
"epoch": 0.025450031036623216,
"grad_norm": 0.859375,
"learning_rate": 2.448979591836735e-05,
"loss": 0.78192138671875,
"num_tokens": 3269301.0,
"step": 41
},
{
"epoch": 0.0260707635009311,
"grad_norm": 0.6875,
"learning_rate": 2.5102040816326533e-05,
"loss": 0.49462890625,
"num_tokens": 3346752.0,
"step": 42
},
{
"epoch": 0.02669149596523898,
"grad_norm": 0.79296875,
"learning_rate": 2.5714285714285714e-05,
"loss": 0.63861083984375,
"num_tokens": 3429216.0,
"step": 43
},
{
"epoch": 0.027312228429546864,
"grad_norm": 0.76171875,
"learning_rate": 2.63265306122449e-05,
"loss": 0.61676025390625,
"num_tokens": 3508782.0,
"step": 44
},
{
"epoch": 0.027932960893854747,
"grad_norm": 0.7890625,
"learning_rate": 2.6938775510204084e-05,
"loss": 0.59075927734375,
"num_tokens": 3597953.0,
"step": 45
},
{
"epoch": 0.028553693358162633,
"grad_norm": 0.8671875,
"learning_rate": 2.7551020408163265e-05,
"loss": 0.5614013671875,
"num_tokens": 3672684.0,
"step": 46
},
{
"epoch": 0.029174425822470516,
"grad_norm": 0.87890625,
"learning_rate": 2.816326530612245e-05,
"loss": 0.49676513671875,
"num_tokens": 3749794.0,
"step": 47
},
{
"epoch": 0.0297951582867784,
"grad_norm": 0.73046875,
"learning_rate": 2.877551020408163e-05,
"loss": 0.454132080078125,
"num_tokens": 3827513.0,
"step": 48
},
{
"epoch": 0.03041589075108628,
"grad_norm": 0.859375,
"learning_rate": 2.9387755102040816e-05,
"loss": 0.4815673828125,
"num_tokens": 3905091.0,
"step": 49
},
{
"epoch": 0.031036623215394164,
"grad_norm": 0.9375,
"learning_rate": 3e-05,
"loss": 0.5006103515625,
"num_tokens": 3984283.0,
"step": 50
},
{
"epoch": 0.03165735567970205,
"grad_norm": 0.74609375,
"learning_rate": 2.9999969661171756e-05,
"loss": 0.46038818359375,
"num_tokens": 4068007.0,
"step": 51
},
{
"epoch": 0.03227808814400993,
"grad_norm": 0.76953125,
"learning_rate": 2.9999878644809757e-05,
"loss": 0.421356201171875,
"num_tokens": 4149338.0,
"step": 52
},
{
"epoch": 0.032898820608317815,
"grad_norm": 0.94921875,
"learning_rate": 2.9999726951282172e-05,
"loss": 0.4410400390625,
"num_tokens": 4224195.0,
"step": 53
},
{
"epoch": 0.0335195530726257,
"grad_norm": 0.72265625,
"learning_rate": 2.999951458120263e-05,
"loss": 0.359619140625,
"num_tokens": 4298713.0,
"step": 54
},
{
"epoch": 0.03414028553693358,
"grad_norm": 0.7421875,
"learning_rate": 2.999924153543021e-05,
"loss": 0.36236572265625,
"num_tokens": 4375052.0,
"step": 55
},
{
"epoch": 0.03476101800124146,
"grad_norm": 0.7421875,
"learning_rate": 2.9998907815069425e-05,
"loss": 0.424468994140625,
"num_tokens": 4452913.0,
"step": 56
},
{
"epoch": 0.035381750465549346,
"grad_norm": 1.046875,
"learning_rate": 2.9998513421470235e-05,
"loss": 0.473388671875,
"num_tokens": 4538717.0,
"step": 57
},
{
"epoch": 0.03600248292985723,
"grad_norm": 0.71484375,
"learning_rate": 2.9998058356228036e-05,
"loss": 0.37030029296875,
"num_tokens": 4608998.0,
"step": 58
},
{
"epoch": 0.03662321539416512,
"grad_norm": 0.76171875,
"learning_rate": 2.999754262118364e-05,
"loss": 0.47113037109375,
"num_tokens": 4686972.0,
"step": 59
},
{
"epoch": 0.037243947858473,
"grad_norm": 0.7578125,
"learning_rate": 2.9996966218423296e-05,
"loss": 0.3825836181640625,
"num_tokens": 4773118.0,
"step": 60
},
{
"epoch": 0.03786468032278088,
"grad_norm": 0.640625,
"learning_rate": 2.9996329150278646e-05,
"loss": 0.30316162109375,
"num_tokens": 4841124.0,
"step": 61
},
{
"epoch": 0.038485412787088766,
"grad_norm": 0.796875,
"learning_rate": 2.999563141932675e-05,
"loss": 0.401214599609375,
"num_tokens": 4926293.0,
"step": 62
},
{
"epoch": 0.03910614525139665,
"grad_norm": 0.72265625,
"learning_rate": 2.999487302839005e-05,
"loss": 0.38299560546875,
"num_tokens": 5003517.0,
"step": 63
},
{
"epoch": 0.03972687771570453,
"grad_norm": 1.2890625,
"learning_rate": 2.9994053980536368e-05,
"loss": 0.44708251953125,
"num_tokens": 5090773.0,
"step": 64
},
{
"epoch": 0.040347610180012414,
"grad_norm": 0.671875,
"learning_rate": 2.9993174279078906e-05,
"loss": 0.3594970703125,
"num_tokens": 5179656.0,
"step": 65
},
{
"epoch": 0.040968342644320296,
"grad_norm": 0.67578125,
"learning_rate": 2.9992233927576207e-05,
"loss": 0.31390380859375,
"num_tokens": 5258523.0,
"step": 66
},
{
"epoch": 0.04158907510862818,
"grad_norm": 0.84375,
"learning_rate": 2.9991232929832165e-05,
"loss": 0.340301513671875,
"num_tokens": 5334871.0,
"step": 67
},
{
"epoch": 0.04220980757293606,
"grad_norm": 0.66015625,
"learning_rate": 2.9990171289895983e-05,
"loss": 0.305419921875,
"num_tokens": 5414795.0,
"step": 68
},
{
"epoch": 0.04283054003724395,
"grad_norm": 0.8359375,
"learning_rate": 2.998904901206219e-05,
"loss": 0.321746826171875,
"num_tokens": 5493669.0,
"step": 69
},
{
"epoch": 0.043451272501551834,
"grad_norm": 0.734375,
"learning_rate": 2.9987866100870597e-05,
"loss": 0.335052490234375,
"num_tokens": 5570527.0,
"step": 70
},
{
"epoch": 0.04407200496585972,
"grad_norm": 0.6171875,
"learning_rate": 2.998662256110629e-05,
"loss": 0.316741943359375,
"num_tokens": 5648297.0,
"step": 71
},
{
"epoch": 0.0446927374301676,
"grad_norm": 0.62890625,
"learning_rate": 2.9985318397799606e-05,
"loss": 0.27838134765625,
"num_tokens": 5725943.0,
"step": 72
},
{
"epoch": 0.04531346989447548,
"grad_norm": 0.6796875,
"learning_rate": 2.9983953616226113e-05,
"loss": 0.3489532470703125,
"num_tokens": 5801236.0,
"step": 73
},
{
"epoch": 0.045934202358783364,
"grad_norm": 0.70703125,
"learning_rate": 2.9982528221906606e-05,
"loss": 0.322601318359375,
"num_tokens": 5879666.0,
"step": 74
},
{
"epoch": 0.04655493482309125,
"grad_norm": 0.69140625,
"learning_rate": 2.9981042220607046e-05,
"loss": 0.2409820556640625,
"num_tokens": 5959047.0,
"step": 75
},
{
"epoch": 0.04717566728739913,
"grad_norm": 0.66015625,
"learning_rate": 2.9979495618338574e-05,
"loss": 0.29443359375,
"num_tokens": 6039447.0,
"step": 76
},
{
"epoch": 0.04779639975170701,
"grad_norm": 0.625,
"learning_rate": 2.997788842135747e-05,
"loss": 0.278045654296875,
"num_tokens": 6118541.0,
"step": 77
},
{
"epoch": 0.048417132216014895,
"grad_norm": 0.81640625,
"learning_rate": 2.9976220636165136e-05,
"loss": 0.3525390625,
"num_tokens": 6203755.0,
"step": 78
},
{
"epoch": 0.04903786468032278,
"grad_norm": 0.640625,
"learning_rate": 2.9974492269508053e-05,
"loss": 0.29705810546875,
"num_tokens": 6279655.0,
"step": 79
},
{
"epoch": 0.04965859714463067,
"grad_norm": 0.89453125,
"learning_rate": 2.997270332837777e-05,
"loss": 0.302825927734375,
"num_tokens": 6363657.0,
"step": 80
},
{
"epoch": 0.05027932960893855,
"grad_norm": 0.6640625,
"learning_rate": 2.9970853820010878e-05,
"loss": 0.325653076171875,
"num_tokens": 6445428.0,
"step": 81
},
{
"epoch": 0.05090006207324643,
"grad_norm": 0.640625,
"learning_rate": 2.9968943751888953e-05,
"loss": 0.306793212890625,
"num_tokens": 6521708.0,
"step": 82
},
{
"epoch": 0.051520794537554315,
"grad_norm": 0.55859375,
"learning_rate": 2.996697313173857e-05,
"loss": 0.2458343505859375,
"num_tokens": 6599847.0,
"step": 83
},
{
"epoch": 0.0521415270018622,
"grad_norm": 0.58984375,
"learning_rate": 2.9964941967531228e-05,
"loss": 0.2946014404296875,
"num_tokens": 6683876.0,
"step": 84
},
{
"epoch": 0.05276225946617008,
"grad_norm": 0.71484375,
"learning_rate": 2.9962850267483353e-05,
"loss": 0.323455810546875,
"num_tokens": 6757931.0,
"step": 85
},
{
"epoch": 0.05338299193047796,
"grad_norm": 0.65234375,
"learning_rate": 2.9960698040056236e-05,
"loss": 0.272979736328125,
"num_tokens": 6835781.0,
"step": 86
},
{
"epoch": 0.054003724394785846,
"grad_norm": 0.54296875,
"learning_rate": 2.9958485293956025e-05,
"loss": 0.280120849609375,
"num_tokens": 6916505.0,
"step": 87
},
{
"epoch": 0.05462445685909373,
"grad_norm": 0.65625,
"learning_rate": 2.9956212038133666e-05,
"loss": 0.31390380859375,
"num_tokens": 6996864.0,
"step": 88
},
{
"epoch": 0.05524518932340161,
"grad_norm": 0.57421875,
"learning_rate": 2.995387828178488e-05,
"loss": 0.2998046875,
"num_tokens": 7076621.0,
"step": 89
},
{
"epoch": 0.055865921787709494,
"grad_norm": 0.54296875,
"learning_rate": 2.995148403435013e-05,
"loss": 0.2631988525390625,
"num_tokens": 7155470.0,
"step": 90
},
{
"epoch": 0.05648665425201738,
"grad_norm": 0.5546875,
"learning_rate": 2.994902930551457e-05,
"loss": 0.212493896484375,
"num_tokens": 7231794.0,
"step": 91
},
{
"epoch": 0.057107386716325266,
"grad_norm": 0.6875,
"learning_rate": 2.9946514105208e-05,
"loss": 0.25518798828125,
"num_tokens": 7311756.0,
"step": 92
},
{
"epoch": 0.05772811918063315,
"grad_norm": 0.486328125,
"learning_rate": 2.994393844360487e-05,
"loss": 0.1932830810546875,
"num_tokens": 7389961.0,
"step": 93
},
{
"epoch": 0.05834885164494103,
"grad_norm": 0.62109375,
"learning_rate": 2.9941302331124173e-05,
"loss": 0.2710113525390625,
"num_tokens": 7468362.0,
"step": 94
},
{
"epoch": 0.058969584109248914,
"grad_norm": 0.486328125,
"learning_rate": 2.993860577842946e-05,
"loss": 0.245635986328125,
"num_tokens": 7556759.0,
"step": 95
},
{
"epoch": 0.0595903165735568,
"grad_norm": 0.69140625,
"learning_rate": 2.9935848796428756e-05,
"loss": 0.31903076171875,
"num_tokens": 7640993.0,
"step": 96
},
{
"epoch": 0.06021104903786468,
"grad_norm": 0.482421875,
"learning_rate": 2.993303139627455e-05,
"loss": 0.293212890625,
"num_tokens": 7723016.0,
"step": 97
},
{
"epoch": 0.06083178150217256,
"grad_norm": 0.5859375,
"learning_rate": 2.9930153589363717e-05,
"loss": 0.2859039306640625,
"num_tokens": 7800241.0,
"step": 98
},
{
"epoch": 0.061452513966480445,
"grad_norm": 0.5,
"learning_rate": 2.9927215387337502e-05,
"loss": 0.294921875,
"num_tokens": 7888881.0,
"step": 99
},
{
"epoch": 0.06207324643078833,
"grad_norm": 0.484375,
"learning_rate": 2.9924216802081445e-05,
"loss": 0.22564697265625,
"num_tokens": 7980394.0,
"step": 100
},
{
"epoch": 0.06269397889509622,
"grad_norm": 0.54296875,
"learning_rate": 2.9921157845725362e-05,
"loss": 0.2675628662109375,
"num_tokens": 8057881.0,
"step": 101
},
{
"epoch": 0.0633147113594041,
"grad_norm": 0.5,
"learning_rate": 2.9918038530643273e-05,
"loss": 0.2433929443359375,
"num_tokens": 8140601.0,
"step": 102
},
{
"epoch": 0.06393544382371198,
"grad_norm": 0.56640625,
"learning_rate": 2.991485886945335e-05,
"loss": 0.22802734375,
"num_tokens": 8225138.0,
"step": 103
},
{
"epoch": 0.06455617628801986,
"grad_norm": 0.4921875,
"learning_rate": 2.9911618875017895e-05,
"loss": 0.263763427734375,
"num_tokens": 8304312.0,
"step": 104
},
{
"epoch": 0.06517690875232775,
"grad_norm": 0.494140625,
"learning_rate": 2.990831856044326e-05,
"loss": 0.23895263671875,
"num_tokens": 8391772.0,
"step": 105
},
{
"epoch": 0.06579764121663563,
"grad_norm": 0.458984375,
"learning_rate": 2.99049579390798e-05,
"loss": 0.2308502197265625,
"num_tokens": 8470529.0,
"step": 106
},
{
"epoch": 0.06641837368094351,
"grad_norm": 0.498046875,
"learning_rate": 2.9901537024521817e-05,
"loss": 0.2149200439453125,
"num_tokens": 8566396.0,
"step": 107
},
{
"epoch": 0.0670391061452514,
"grad_norm": 0.5,
"learning_rate": 2.9898055830607526e-05,
"loss": 0.265045166015625,
"num_tokens": 8643246.0,
"step": 108
},
{
"epoch": 0.06765983860955928,
"grad_norm": 0.5546875,
"learning_rate": 2.9894514371418968e-05,
"loss": 0.3111572265625,
"num_tokens": 8720654.0,
"step": 109
},
{
"epoch": 0.06828057107386716,
"grad_norm": 0.5078125,
"learning_rate": 2.989091266128197e-05,
"loss": 0.23321533203125,
"num_tokens": 8793996.0,
"step": 110
},
{
"epoch": 0.06890130353817504,
"grad_norm": 0.46484375,
"learning_rate": 2.9887250714766094e-05,
"loss": 0.2656402587890625,
"num_tokens": 8871414.0,
"step": 111
},
{
"epoch": 0.06952203600248293,
"grad_norm": 0.462890625,
"learning_rate": 2.988352854668456e-05,
"loss": 0.234954833984375,
"num_tokens": 8961178.0,
"step": 112
},
{
"epoch": 0.07014276846679081,
"grad_norm": 0.54296875,
"learning_rate": 2.9879746172094195e-05,
"loss": 0.2712860107421875,
"num_tokens": 9042073.0,
"step": 113
},
{
"epoch": 0.07076350093109869,
"grad_norm": 0.482421875,
"learning_rate": 2.987590360629537e-05,
"loss": 0.248870849609375,
"num_tokens": 9113971.0,
"step": 114
},
{
"epoch": 0.07138423339540657,
"grad_norm": 0.4765625,
"learning_rate": 2.9872000864831953e-05,
"loss": 0.2508392333984375,
"num_tokens": 9200614.0,
"step": 115
},
{
"epoch": 0.07200496585971446,
"grad_norm": 0.390625,
"learning_rate": 2.986803796349122e-05,
"loss": 0.17407989501953125,
"num_tokens": 9288686.0,
"step": 116
},
{
"epoch": 0.07262569832402235,
"grad_norm": 0.494140625,
"learning_rate": 2.986401491830381e-05,
"loss": 0.272369384765625,
"num_tokens": 9366942.0,
"step": 117
},
{
"epoch": 0.07324643078833024,
"grad_norm": 0.466796875,
"learning_rate": 2.9859931745543648e-05,
"loss": 0.23114013671875,
"num_tokens": 9438305.0,
"step": 118
},
{
"epoch": 0.07386716325263812,
"grad_norm": 0.44140625,
"learning_rate": 2.9855788461727905e-05,
"loss": 0.2272491455078125,
"num_tokens": 9514539.0,
"step": 119
},
{
"epoch": 0.074487895716946,
"grad_norm": 0.474609375,
"learning_rate": 2.985158508361688e-05,
"loss": 0.2364654541015625,
"num_tokens": 9591929.0,
"step": 120
},
{
"epoch": 0.07510862818125388,
"grad_norm": 0.419921875,
"learning_rate": 2.984732162821399e-05,
"loss": 0.2118682861328125,
"num_tokens": 9677202.0,
"step": 121
},
{
"epoch": 0.07572936064556177,
"grad_norm": 0.423828125,
"learning_rate": 2.984299811276567e-05,
"loss": 0.1953582763671875,
"num_tokens": 9756886.0,
"step": 122
},
{
"epoch": 0.07635009310986965,
"grad_norm": 0.400390625,
"learning_rate": 2.9838614554761306e-05,
"loss": 0.204193115234375,
"num_tokens": 9835849.0,
"step": 123
},
{
"epoch": 0.07697082557417753,
"grad_norm": 0.470703125,
"learning_rate": 2.9834170971933157e-05,
"loss": 0.2611083984375,
"num_tokens": 9920661.0,
"step": 124
},
{
"epoch": 0.07759155803848541,
"grad_norm": 0.384765625,
"learning_rate": 2.9829667382256313e-05,
"loss": 0.1618194580078125,
"num_tokens": 10007676.0,
"step": 125
},
{
"epoch": 0.0782122905027933,
"grad_norm": 0.439453125,
"learning_rate": 2.9825103803948588e-05,
"loss": 0.2403717041015625,
"num_tokens": 10093591.0,
"step": 126
},
{
"epoch": 0.07883302296710118,
"grad_norm": 0.48828125,
"learning_rate": 2.982048025547046e-05,
"loss": 0.23187255859375,
"num_tokens": 10166333.0,
"step": 127
},
{
"epoch": 0.07945375543140906,
"grad_norm": 0.53125,
"learning_rate": 2.9815796755525012e-05,
"loss": 0.3082122802734375,
"num_tokens": 10243848.0,
"step": 128
},
{
"epoch": 0.08007448789571694,
"grad_norm": 0.5,
"learning_rate": 2.981105332305782e-05,
"loss": 0.309478759765625,
"num_tokens": 10323097.0,
"step": 129
},
{
"epoch": 0.08069522036002483,
"grad_norm": 0.466796875,
"learning_rate": 2.9806249977256914e-05,
"loss": 0.2358245849609375,
"num_tokens": 10402310.0,
"step": 130
},
{
"epoch": 0.08131595282433271,
"grad_norm": 0.44921875,
"learning_rate": 2.9801386737552676e-05,
"loss": 0.2324066162109375,
"num_tokens": 10480727.0,
"step": 131
},
{
"epoch": 0.08193668528864059,
"grad_norm": 0.48828125,
"learning_rate": 2.9796463623617772e-05,
"loss": 0.2565460205078125,
"num_tokens": 10559472.0,
"step": 132
},
{
"epoch": 0.08255741775294848,
"grad_norm": 0.423828125,
"learning_rate": 2.979148065536707e-05,
"loss": 0.1997222900390625,
"num_tokens": 10635365.0,
"step": 133
},
{
"epoch": 0.08317815021725636,
"grad_norm": 0.48046875,
"learning_rate": 2.9786437852957564e-05,
"loss": 0.25286865234375,
"num_tokens": 10719764.0,
"step": 134
},
{
"epoch": 0.08379888268156424,
"grad_norm": 0.546875,
"learning_rate": 2.978133523678828e-05,
"loss": 0.28729248046875,
"num_tokens": 10798969.0,
"step": 135
},
{
"epoch": 0.08441961514587212,
"grad_norm": 0.41796875,
"learning_rate": 2.9776172827500196e-05,
"loss": 0.1951141357421875,
"num_tokens": 10872762.0,
"step": 136
},
{
"epoch": 0.08504034761018,
"grad_norm": 0.494140625,
"learning_rate": 2.9770950645976186e-05,
"loss": 0.23980712890625,
"num_tokens": 10955939.0,
"step": 137
},
{
"epoch": 0.0856610800744879,
"grad_norm": 0.486328125,
"learning_rate": 2.9765668713340883e-05,
"loss": 0.2687225341796875,
"num_tokens": 11039455.0,
"step": 138
},
{
"epoch": 0.08628181253879579,
"grad_norm": 0.46875,
"learning_rate": 2.9760327050960654e-05,
"loss": 0.20829010009765625,
"num_tokens": 11112387.0,
"step": 139
},
{
"epoch": 0.08690254500310367,
"grad_norm": 0.5234375,
"learning_rate": 2.9754925680443464e-05,
"loss": 0.289093017578125,
"num_tokens": 11192385.0,
"step": 140
},
{
"epoch": 0.08752327746741155,
"grad_norm": 0.48828125,
"learning_rate": 2.974946462363881e-05,
"loss": 0.253509521484375,
"num_tokens": 11272622.0,
"step": 141
},
{
"epoch": 0.08814400993171943,
"grad_norm": 0.474609375,
"learning_rate": 2.974394390263764e-05,
"loss": 0.2116241455078125,
"num_tokens": 11351393.0,
"step": 142
},
{
"epoch": 0.08876474239602732,
"grad_norm": 0.4765625,
"learning_rate": 2.973836353977225e-05,
"loss": 0.257659912109375,
"num_tokens": 11436612.0,
"step": 143
},
{
"epoch": 0.0893854748603352,
"grad_norm": 0.486328125,
"learning_rate": 2.9732723557616182e-05,
"loss": 0.28607177734375,
"num_tokens": 11516523.0,
"step": 144
},
{
"epoch": 0.09000620732464308,
"grad_norm": 0.44140625,
"learning_rate": 2.9727023978984175e-05,
"loss": 0.235870361328125,
"num_tokens": 11602468.0,
"step": 145
},
{
"epoch": 0.09062693978895096,
"grad_norm": 0.494140625,
"learning_rate": 2.9721264826932037e-05,
"loss": 0.295379638671875,
"num_tokens": 11678165.0,
"step": 146
},
{
"epoch": 0.09124767225325885,
"grad_norm": 0.4921875,
"learning_rate": 2.9715446124756545e-05,
"loss": 0.28414154052734375,
"num_tokens": 11757826.0,
"step": 147
},
{
"epoch": 0.09186840471756673,
"grad_norm": 0.4296875,
"learning_rate": 2.970956789599539e-05,
"loss": 0.1726837158203125,
"num_tokens": 11835725.0,
"step": 148
},
{
"epoch": 0.09248913718187461,
"grad_norm": 0.53125,
"learning_rate": 2.9703630164427042e-05,
"loss": 0.2672882080078125,
"num_tokens": 11909671.0,
"step": 149
},
{
"epoch": 0.0931098696461825,
"grad_norm": 0.4765625,
"learning_rate": 2.9697632954070684e-05,
"loss": 0.24822998046875,
"num_tokens": 11985432.0,
"step": 150
},
{
"epoch": 0.09373060211049038,
"grad_norm": 0.466796875,
"learning_rate": 2.9691576289186088e-05,
"loss": 0.2350006103515625,
"num_tokens": 12057130.0,
"step": 151
},
{
"epoch": 0.09435133457479826,
"grad_norm": 0.435546875,
"learning_rate": 2.9685460194273538e-05,
"loss": 0.235595703125,
"num_tokens": 12148835.0,
"step": 152
},
{
"epoch": 0.09497206703910614,
"grad_norm": 0.494140625,
"learning_rate": 2.9679284694073717e-05,
"loss": 0.27794647216796875,
"num_tokens": 12232513.0,
"step": 153
},
{
"epoch": 0.09559279950341402,
"grad_norm": 0.421875,
"learning_rate": 2.9673049813567627e-05,
"loss": 0.204833984375,
"num_tokens": 12315268.0,
"step": 154
},
{
"epoch": 0.09621353196772191,
"grad_norm": 0.443359375,
"learning_rate": 2.9666755577976457e-05,
"loss": 0.2125244140625,
"num_tokens": 12392742.0,
"step": 155
},
{
"epoch": 0.09683426443202979,
"grad_norm": 0.458984375,
"learning_rate": 2.9660402012761504e-05,
"loss": 0.219146728515625,
"num_tokens": 12467919.0,
"step": 156
},
{
"epoch": 0.09745499689633767,
"grad_norm": 0.421875,
"learning_rate": 2.9653989143624066e-05,
"loss": 0.216033935546875,
"num_tokens": 12548976.0,
"step": 157
},
{
"epoch": 0.09807572936064556,
"grad_norm": 0.478515625,
"learning_rate": 2.9647516996505335e-05,
"loss": 0.1933441162109375,
"num_tokens": 12621858.0,
"step": 158
},
{
"epoch": 0.09869646182495345,
"grad_norm": 0.4921875,
"learning_rate": 2.9640985597586293e-05,
"loss": 0.2196807861328125,
"num_tokens": 12699962.0,
"step": 159
},
{
"epoch": 0.09931719428926133,
"grad_norm": 0.4765625,
"learning_rate": 2.9634394973287605e-05,
"loss": 0.2627105712890625,
"num_tokens": 12779843.0,
"step": 160
},
{
"epoch": 0.09993792675356922,
"grad_norm": 0.5078125,
"learning_rate": 2.962774515026951e-05,
"loss": 0.2523651123046875,
"num_tokens": 12855499.0,
"step": 161
},
{
"epoch": 0.1005586592178771,
"grad_norm": 0.455078125,
"learning_rate": 2.962103615543172e-05,
"loss": 0.22833251953125,
"num_tokens": 12938026.0,
"step": 162
},
{
"epoch": 0.10117939168218498,
"grad_norm": 0.416015625,
"learning_rate": 2.9614268015913314e-05,
"loss": 0.20711517333984375,
"num_tokens": 13017379.0,
"step": 163
},
{
"epoch": 0.10180012414649287,
"grad_norm": 0.474609375,
"learning_rate": 2.9607440759092608e-05,
"loss": 0.26580047607421875,
"num_tokens": 13092012.0,
"step": 164
},
{
"epoch": 0.10242085661080075,
"grad_norm": 0.4765625,
"learning_rate": 2.960055441258707e-05,
"loss": 0.28448486328125,
"num_tokens": 13171154.0,
"step": 165
},
{
"epoch": 0.10304158907510863,
"grad_norm": 0.447265625,
"learning_rate": 2.9593609004253185e-05,
"loss": 0.1904296875,
"num_tokens": 13244589.0,
"step": 166
},
{
"epoch": 0.10366232153941651,
"grad_norm": 0.46484375,
"learning_rate": 2.9586604562186365e-05,
"loss": 0.2339019775390625,
"num_tokens": 13320711.0,
"step": 167
},
{
"epoch": 0.1042830540037244,
"grad_norm": 0.41796875,
"learning_rate": 2.9579541114720817e-05,
"loss": 0.2139434814453125,
"num_tokens": 13397936.0,
"step": 168
},
{
"epoch": 0.10490378646803228,
"grad_norm": 0.5078125,
"learning_rate": 2.957241869042944e-05,
"loss": 0.27524566650390625,
"num_tokens": 13476414.0,
"step": 169
},
{
"epoch": 0.10552451893234016,
"grad_norm": 0.490234375,
"learning_rate": 2.9565237318123695e-05,
"loss": 0.24920654296875,
"num_tokens": 13550890.0,
"step": 170
},
{
"epoch": 0.10614525139664804,
"grad_norm": 0.49609375,
"learning_rate": 2.955799702685351e-05,
"loss": 0.29901123046875,
"num_tokens": 13630283.0,
"step": 171
},
{
"epoch": 0.10676598386095593,
"grad_norm": 0.453125,
"learning_rate": 2.9550697845907146e-05,
"loss": 0.225738525390625,
"num_tokens": 13710519.0,
"step": 172
},
{
"epoch": 0.10738671632526381,
"grad_norm": 0.46484375,
"learning_rate": 2.9543339804811078e-05,
"loss": 0.25389862060546875,
"num_tokens": 13789045.0,
"step": 173
},
{
"epoch": 0.10800744878957169,
"grad_norm": 0.421875,
"learning_rate": 2.9535922933329887e-05,
"loss": 0.2156219482421875,
"num_tokens": 13871966.0,
"step": 174
},
{
"epoch": 0.10862818125387957,
"grad_norm": 0.50390625,
"learning_rate": 2.9528447261466134e-05,
"loss": 0.30413818359375,
"num_tokens": 13961486.0,
"step": 175
},
{
"epoch": 0.10924891371818746,
"grad_norm": 0.412109375,
"learning_rate": 2.9520912819460233e-05,
"loss": 0.178863525390625,
"num_tokens": 14039379.0,
"step": 176
},
{
"epoch": 0.10986964618249534,
"grad_norm": 0.443359375,
"learning_rate": 2.9513319637790337e-05,
"loss": 0.20584869384765625,
"num_tokens": 14119943.0,
"step": 177
},
{
"epoch": 0.11049037864680322,
"grad_norm": 0.45703125,
"learning_rate": 2.9505667747172212e-05,
"loss": 0.25897216796875,
"num_tokens": 14203058.0,
"step": 178
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.486328125,
"learning_rate": 2.949795717855911e-05,
"loss": 0.25653839111328125,
"num_tokens": 14286819.0,
"step": 179
},
{
"epoch": 0.11173184357541899,
"grad_norm": 0.4453125,
"learning_rate": 2.9490187963141644e-05,
"loss": 0.213531494140625,
"num_tokens": 14367159.0,
"step": 180
},
{
"epoch": 0.11235257603972688,
"grad_norm": 0.423828125,
"learning_rate": 2.948236013234767e-05,
"loss": 0.1999053955078125,
"num_tokens": 14445138.0,
"step": 181
},
{
"epoch": 0.11297330850403477,
"grad_norm": 0.421875,
"learning_rate": 2.947447371784215e-05,
"loss": 0.1924285888671875,
"num_tokens": 14524617.0,
"step": 182
},
{
"epoch": 0.11359404096834265,
"grad_norm": 0.515625,
"learning_rate": 2.946652875152702e-05,
"loss": 0.277069091796875,
"num_tokens": 14608040.0,
"step": 183
},
{
"epoch": 0.11421477343265053,
"grad_norm": 0.458984375,
"learning_rate": 2.945852526554109e-05,
"loss": 0.2434234619140625,
"num_tokens": 14689595.0,
"step": 184
},
{
"epoch": 0.11483550589695841,
"grad_norm": 0.47265625,
"learning_rate": 2.9450463292259863e-05,
"loss": 0.240081787109375,
"num_tokens": 14771203.0,
"step": 185
},
{
"epoch": 0.1154562383612663,
"grad_norm": 0.41796875,
"learning_rate": 2.944234286429546e-05,
"loss": 0.19640350341796875,
"num_tokens": 14854070.0,
"step": 186
},
{
"epoch": 0.11607697082557418,
"grad_norm": 0.451171875,
"learning_rate": 2.9434164014496445e-05,
"loss": 0.183197021484375,
"num_tokens": 14928923.0,
"step": 187
},
{
"epoch": 0.11669770328988206,
"grad_norm": 0.453125,
"learning_rate": 2.9425926775947713e-05,
"loss": 0.26019287109375,
"num_tokens": 15010774.0,
"step": 188
},
{
"epoch": 0.11731843575418995,
"grad_norm": 2.171875,
"learning_rate": 2.9417631181970354e-05,
"loss": 0.225250244140625,
"num_tokens": 15093644.0,
"step": 189
},
{
"epoch": 0.11793916821849783,
"grad_norm": 0.451171875,
"learning_rate": 2.9409277266121516e-05,
"loss": 0.221954345703125,
"num_tokens": 15170822.0,
"step": 190
},
{
"epoch": 0.11855990068280571,
"grad_norm": 0.439453125,
"learning_rate": 2.9400865062194268e-05,
"loss": 0.2058868408203125,
"num_tokens": 15243534.0,
"step": 191
},
{
"epoch": 0.1191806331471136,
"grad_norm": 0.396484375,
"learning_rate": 2.939239460421746e-05,
"loss": 0.15682220458984375,
"num_tokens": 15323269.0,
"step": 192
},
{
"epoch": 0.11980136561142148,
"grad_norm": 0.44921875,
"learning_rate": 2.93838659264556e-05,
"loss": 0.2241363525390625,
"num_tokens": 15403461.0,
"step": 193
},
{
"epoch": 0.12042209807572936,
"grad_norm": 0.4609375,
"learning_rate": 2.9375279063408706e-05,
"loss": 0.2346649169921875,
"num_tokens": 15479872.0,
"step": 194
},
{
"epoch": 0.12104283054003724,
"grad_norm": 0.42578125,
"learning_rate": 2.9366634049812145e-05,
"loss": 0.183074951171875,
"num_tokens": 15562255.0,
"step": 195
},
{
"epoch": 0.12166356300434512,
"grad_norm": 0.48046875,
"learning_rate": 2.9357930920636537e-05,
"loss": 0.245849609375,
"num_tokens": 15638673.0,
"step": 196
},
{
"epoch": 0.122284295468653,
"grad_norm": 0.4375,
"learning_rate": 2.9349169711087577e-05,
"loss": 0.1927642822265625,
"num_tokens": 15711371.0,
"step": 197
},
{
"epoch": 0.12290502793296089,
"grad_norm": 0.474609375,
"learning_rate": 2.9340350456605908e-05,
"loss": 0.2207489013671875,
"num_tokens": 15783275.0,
"step": 198
},
{
"epoch": 0.12352576039726877,
"grad_norm": 0.470703125,
"learning_rate": 2.9331473192866984e-05,
"loss": 0.251373291015625,
"num_tokens": 15863857.0,
"step": 199
},
{
"epoch": 0.12414649286157665,
"grad_norm": 0.419921875,
"learning_rate": 2.9322537955780902e-05,
"loss": 0.19305038452148438,
"num_tokens": 15947385.0,
"step": 200
},
{
"epoch": 0.12476722532588454,
"grad_norm": 0.498046875,
"learning_rate": 2.931354478149229e-05,
"loss": 0.2515716552734375,
"num_tokens": 16020854.0,
"step": 201
},
{
"epoch": 0.12538795779019243,
"grad_norm": 0.435546875,
"learning_rate": 2.9304493706380112e-05,
"loss": 0.2241973876953125,
"num_tokens": 16107063.0,
"step": 202
},
{
"epoch": 0.12600869025450032,
"grad_norm": 0.388671875,
"learning_rate": 2.929538476705758e-05,
"loss": 0.1594696044921875,
"num_tokens": 16184032.0,
"step": 203
},
{
"epoch": 0.1266294227188082,
"grad_norm": 0.416015625,
"learning_rate": 2.9286218000371973e-05,
"loss": 0.189910888671875,
"num_tokens": 16267360.0,
"step": 204
},
{
"epoch": 0.12725015518311608,
"grad_norm": 0.494140625,
"learning_rate": 2.9276993443404474e-05,
"loss": 0.310882568359375,
"num_tokens": 16351226.0,
"step": 205
},
{
"epoch": 0.12787088764742396,
"grad_norm": 0.431640625,
"learning_rate": 2.9267711133470052e-05,
"loss": 0.218902587890625,
"num_tokens": 16428433.0,
"step": 206
},
{
"epoch": 0.12849162011173185,
"grad_norm": 0.474609375,
"learning_rate": 2.92583711081173e-05,
"loss": 0.245269775390625,
"num_tokens": 16504509.0,
"step": 207
},
{
"epoch": 0.12911235257603973,
"grad_norm": 0.408203125,
"learning_rate": 2.9248973405128273e-05,
"loss": 0.204315185546875,
"num_tokens": 16588839.0,
"step": 208
},
{
"epoch": 0.1297330850403476,
"grad_norm": 0.4765625,
"learning_rate": 2.9239518062518342e-05,
"loss": 0.2843780517578125,
"num_tokens": 16665978.0,
"step": 209
},
{
"epoch": 0.1303538175046555,
"grad_norm": 0.51953125,
"learning_rate": 2.923000511853604e-05,
"loss": 0.2311859130859375,
"num_tokens": 16740112.0,
"step": 210
},
{
"epoch": 0.13097454996896338,
"grad_norm": 0.466796875,
"learning_rate": 2.9220434611662913e-05,
"loss": 0.241546630859375,
"num_tokens": 16818959.0,
"step": 211
},
{
"epoch": 0.13159528243327126,
"grad_norm": 0.45703125,
"learning_rate": 2.921080658061336e-05,
"loss": 0.2093353271484375,
"num_tokens": 16891523.0,
"step": 212
},
{
"epoch": 0.13221601489757914,
"grad_norm": 0.47265625,
"learning_rate": 2.9201121064334465e-05,
"loss": 0.2313995361328125,
"num_tokens": 16973790.0,
"step": 213
},
{
"epoch": 0.13283674736188703,
"grad_norm": 0.416015625,
"learning_rate": 2.919137810200586e-05,
"loss": 0.1971282958984375,
"num_tokens": 17054316.0,
"step": 214
},
{
"epoch": 0.1334574798261949,
"grad_norm": 0.486328125,
"learning_rate": 2.9181577733039554e-05,
"loss": 0.2489013671875,
"num_tokens": 17136683.0,
"step": 215
},
{
"epoch": 0.1340782122905028,
"grad_norm": 0.431640625,
"learning_rate": 2.9171719997079775e-05,
"loss": 0.2093505859375,
"num_tokens": 17219420.0,
"step": 216
},
{
"epoch": 0.13469894475481067,
"grad_norm": 0.53125,
"learning_rate": 2.9161804934002807e-05,
"loss": 0.2738037109375,
"num_tokens": 17296903.0,
"step": 217
},
{
"epoch": 0.13531967721911856,
"grad_norm": 0.400390625,
"learning_rate": 2.915183258391684e-05,
"loss": 0.1625823974609375,
"num_tokens": 17376132.0,
"step": 218
},
{
"epoch": 0.13594040968342644,
"grad_norm": 0.380859375,
"learning_rate": 2.9141802987161794e-05,
"loss": 0.184234619140625,
"num_tokens": 17463223.0,
"step": 219
},
{
"epoch": 0.13656114214773432,
"grad_norm": 0.392578125,
"learning_rate": 2.9131716184309167e-05,
"loss": 0.14013671875,
"num_tokens": 17536271.0,
"step": 220
},
{
"epoch": 0.1371818746120422,
"grad_norm": 0.423828125,
"learning_rate": 2.912157221616186e-05,
"loss": 0.217742919921875,
"num_tokens": 17620143.0,
"step": 221
},
{
"epoch": 0.1378026070763501,
"grad_norm": 0.46484375,
"learning_rate": 2.9111371123754022e-05,
"loss": 0.265472412109375,
"num_tokens": 17699602.0,
"step": 222
},
{
"epoch": 0.13842333954065797,
"grad_norm": 0.4453125,
"learning_rate": 2.9101112948350876e-05,
"loss": 0.18377685546875,
"num_tokens": 17775105.0,
"step": 223
},
{
"epoch": 0.13904407200496585,
"grad_norm": 0.451171875,
"learning_rate": 2.909079773144856e-05,
"loss": 0.186309814453125,
"num_tokens": 17846816.0,
"step": 224
},
{
"epoch": 0.13966480446927373,
"grad_norm": 0.49609375,
"learning_rate": 2.9080425514773955e-05,
"loss": 0.27982330322265625,
"num_tokens": 17925471.0,
"step": 225
},
{
"epoch": 0.14028553693358162,
"grad_norm": 0.4453125,
"learning_rate": 2.9069996340284513e-05,
"loss": 0.22800445556640625,
"num_tokens": 18001683.0,
"step": 226
},
{
"epoch": 0.1409062693978895,
"grad_norm": 0.490234375,
"learning_rate": 2.905951025016809e-05,
"loss": 0.27423095703125,
"num_tokens": 18085662.0,
"step": 227
},
{
"epoch": 0.14152700186219738,
"grad_norm": 0.48828125,
"learning_rate": 2.9048967286842784e-05,
"loss": 0.23690032958984375,
"num_tokens": 18163019.0,
"step": 228
},
{
"epoch": 0.14214773432650527,
"grad_norm": 0.474609375,
"learning_rate": 2.9038367492956735e-05,
"loss": 0.289642333984375,
"num_tokens": 18244487.0,
"step": 229
},
{
"epoch": 0.14276846679081315,
"grad_norm": 0.447265625,
"learning_rate": 2.9027710911388e-05,
"loss": 0.211639404296875,
"num_tokens": 18324110.0,
"step": 230
},
{
"epoch": 0.14338919925512103,
"grad_norm": 0.40625,
"learning_rate": 2.9016997585244335e-05,
"loss": 0.1941986083984375,
"num_tokens": 18410072.0,
"step": 231
},
{
"epoch": 0.1440099317194289,
"grad_norm": 0.4375,
"learning_rate": 2.900622755786304e-05,
"loss": 0.21749114990234375,
"num_tokens": 18488841.0,
"step": 232
},
{
"epoch": 0.1446306641837368,
"grad_norm": 0.46875,
"learning_rate": 2.8995400872810786e-05,
"loss": 0.266448974609375,
"num_tokens": 18580198.0,
"step": 233
},
{
"epoch": 0.1452513966480447,
"grad_norm": 0.447265625,
"learning_rate": 2.8984517573883426e-05,
"loss": 0.225830078125,
"num_tokens": 18661317.0,
"step": 234
},
{
"epoch": 0.1458721291123526,
"grad_norm": 0.48828125,
"learning_rate": 2.8973577705105835e-05,
"loss": 0.221343994140625,
"num_tokens": 18736060.0,
"step": 235
},
{
"epoch": 0.14649286157666047,
"grad_norm": 0.462890625,
"learning_rate": 2.896258131073172e-05,
"loss": 0.20946502685546875,
"num_tokens": 18813565.0,
"step": 236
},
{
"epoch": 0.14711359404096835,
"grad_norm": 0.42578125,
"learning_rate": 2.8951528435243447e-05,
"loss": 0.1993255615234375,
"num_tokens": 18892569.0,
"step": 237
},
{
"epoch": 0.14773432650527624,
"grad_norm": 0.462890625,
"learning_rate": 2.8940419123351843e-05,
"loss": 0.246368408203125,
"num_tokens": 18973433.0,
"step": 238
},
{
"epoch": 0.14835505896958412,
"grad_norm": 0.439453125,
"learning_rate": 2.8929253419996055e-05,
"loss": 0.23016357421875,
"num_tokens": 19058301.0,
"step": 239
},
{
"epoch": 0.148975791433892,
"grad_norm": 0.51171875,
"learning_rate": 2.8918031370343328e-05,
"loss": 0.31201171875,
"num_tokens": 19140189.0,
"step": 240
},
{
"epoch": 0.14959652389819988,
"grad_norm": 0.453125,
"learning_rate": 2.890675301978883e-05,
"loss": 0.2154998779296875,
"num_tokens": 19215946.0,
"step": 241
},
{
"epoch": 0.15021725636250777,
"grad_norm": 0.451171875,
"learning_rate": 2.8895418413955498e-05,
"loss": 0.22357177734375,
"num_tokens": 19295232.0,
"step": 242
},
{
"epoch": 0.15083798882681565,
"grad_norm": 0.46484375,
"learning_rate": 2.888402759869382e-05,
"loss": 0.2400360107421875,
"num_tokens": 19371531.0,
"step": 243
},
{
"epoch": 0.15145872129112353,
"grad_norm": 0.4296875,
"learning_rate": 2.8872580620081654e-05,
"loss": 0.22708892822265625,
"num_tokens": 19456098.0,
"step": 244
},
{
"epoch": 0.15207945375543142,
"grad_norm": 0.470703125,
"learning_rate": 2.886107752442406e-05,
"loss": 0.2177734375,
"num_tokens": 19524671.0,
"step": 245
},
{
"epoch": 0.1527001862197393,
"grad_norm": 0.423828125,
"learning_rate": 2.8849518358253095e-05,
"loss": 0.19512939453125,
"num_tokens": 19598978.0,
"step": 246
},
{
"epoch": 0.15332091868404718,
"grad_norm": 0.42578125,
"learning_rate": 2.8837903168327634e-05,
"loss": 0.204193115234375,
"num_tokens": 19686637.0,
"step": 247
},
{
"epoch": 0.15394165114835506,
"grad_norm": 0.408203125,
"learning_rate": 2.8826232001633174e-05,
"loss": 0.19580841064453125,
"num_tokens": 19769433.0,
"step": 248
},
{
"epoch": 0.15456238361266295,
"grad_norm": 0.47265625,
"learning_rate": 2.8814504905381663e-05,
"loss": 0.2563629150390625,
"num_tokens": 19849719.0,
"step": 249
},
{
"epoch": 0.15518311607697083,
"grad_norm": 0.37109375,
"learning_rate": 2.8802721927011267e-05,
"loss": 0.13668441772460938,
"num_tokens": 19927576.0,
"step": 250
},
{
"epoch": 0.1558038485412787,
"grad_norm": 0.470703125,
"learning_rate": 2.879088311418623e-05,
"loss": 0.23772048950195312,
"num_tokens": 20001718.0,
"step": 251
},
{
"epoch": 0.1564245810055866,
"grad_norm": 0.4375,
"learning_rate": 2.8778988514796644e-05,
"loss": 0.227874755859375,
"num_tokens": 20088169.0,
"step": 252
},
{
"epoch": 0.15704531346989448,
"grad_norm": 0.5,
"learning_rate": 2.876703817695827e-05,
"loss": 0.272552490234375,
"num_tokens": 20166172.0,
"step": 253
},
{
"epoch": 0.15766604593420236,
"grad_norm": 0.453125,
"learning_rate": 2.8755032149012336e-05,
"loss": 0.27325439453125,
"num_tokens": 20246825.0,
"step": 254
},
{
"epoch": 0.15828677839851024,
"grad_norm": 0.40234375,
"learning_rate": 2.8742970479525362e-05,
"loss": 0.19056320190429688,
"num_tokens": 20330694.0,
"step": 255
},
{
"epoch": 0.15890751086281812,
"grad_norm": 0.4765625,
"learning_rate": 2.8730853217288933e-05,
"loss": 0.2268218994140625,
"num_tokens": 20406311.0,
"step": 256
},
{
"epoch": 0.159528243327126,
"grad_norm": 0.44921875,
"learning_rate": 2.8718680411319516e-05,
"loss": 0.2295379638671875,
"num_tokens": 20484812.0,
"step": 257
},
{
"epoch": 0.1601489757914339,
"grad_norm": 0.408203125,
"learning_rate": 2.870645211085827e-05,
"loss": 0.17767333984375,
"num_tokens": 20560209.0,
"step": 258
},
{
"epoch": 0.16076970825574177,
"grad_norm": 0.4765625,
"learning_rate": 2.869416836537083e-05,
"loss": 0.2900238037109375,
"num_tokens": 20640291.0,
"step": 259
},
{
"epoch": 0.16139044072004965,
"grad_norm": 0.376953125,
"learning_rate": 2.8681829224547133e-05,
"loss": 0.184112548828125,
"num_tokens": 20726081.0,
"step": 260
},
{
"epoch": 0.16201117318435754,
"grad_norm": 0.458984375,
"learning_rate": 2.8669434738301184e-05,
"loss": 0.23430633544921875,
"num_tokens": 20808045.0,
"step": 261
},
{
"epoch": 0.16263190564866542,
"grad_norm": 0.458984375,
"learning_rate": 2.8656984956770873e-05,
"loss": 0.1941375732421875,
"num_tokens": 20890073.0,
"step": 262
},
{
"epoch": 0.1632526381129733,
"grad_norm": 0.455078125,
"learning_rate": 2.8644479930317776e-05,
"loss": 0.19208526611328125,
"num_tokens": 20962225.0,
"step": 263
},
{
"epoch": 0.16387337057728119,
"grad_norm": 0.44140625,
"learning_rate": 2.8631919709526935e-05,
"loss": 0.20654296875,
"num_tokens": 21043747.0,
"step": 264
},
{
"epoch": 0.16449410304158907,
"grad_norm": 0.48828125,
"learning_rate": 2.8619304345206668e-05,
"loss": 0.2682342529296875,
"num_tokens": 21120671.0,
"step": 265
},
{
"epoch": 0.16511483550589695,
"grad_norm": 0.4609375,
"learning_rate": 2.860663388838836e-05,
"loss": 0.2409515380859375,
"num_tokens": 21204173.0,
"step": 266
},
{
"epoch": 0.16573556797020483,
"grad_norm": 0.482421875,
"learning_rate": 2.859390839032625e-05,
"loss": 0.2242584228515625,
"num_tokens": 21281345.0,
"step": 267
},
{
"epoch": 0.16635630043451272,
"grad_norm": 0.447265625,
"learning_rate": 2.858112790249723e-05,
"loss": 0.2214813232421875,
"num_tokens": 21357815.0,
"step": 268
},
{
"epoch": 0.1669770328988206,
"grad_norm": 0.462890625,
"learning_rate": 2.8568292476600642e-05,
"loss": 0.2177734375,
"num_tokens": 21431126.0,
"step": 269
},
{
"epoch": 0.16759776536312848,
"grad_norm": 0.515625,
"learning_rate": 2.8555402164558058e-05,
"loss": 0.2936553955078125,
"num_tokens": 21510733.0,
"step": 270
},
{
"epoch": 0.16821849782743636,
"grad_norm": 0.462890625,
"learning_rate": 2.854245701851307e-05,
"loss": 0.2332000732421875,
"num_tokens": 21592082.0,
"step": 271
},
{
"epoch": 0.16883923029174425,
"grad_norm": 0.455078125,
"learning_rate": 2.852945709083108e-05,
"loss": 0.2206573486328125,
"num_tokens": 21661617.0,
"step": 272
},
{
"epoch": 0.16945996275605213,
"grad_norm": 0.4140625,
"learning_rate": 2.8516402434099106e-05,
"loss": 0.2027130126953125,
"num_tokens": 21738094.0,
"step": 273
},
{
"epoch": 0.17008069522036,
"grad_norm": 0.45703125,
"learning_rate": 2.8503293101125542e-05,
"loss": 0.231048583984375,
"num_tokens": 21814944.0,
"step": 274
},
{
"epoch": 0.1707014276846679,
"grad_norm": 0.419921875,
"learning_rate": 2.8490129144939968e-05,
"loss": 0.217987060546875,
"num_tokens": 21901526.0,
"step": 275
},
{
"epoch": 0.1713221601489758,
"grad_norm": 0.4140625,
"learning_rate": 2.847691061879291e-05,
"loss": 0.1964874267578125,
"num_tokens": 21972946.0,
"step": 276
},
{
"epoch": 0.1719428926132837,
"grad_norm": 0.4140625,
"learning_rate": 2.8463637576155654e-05,
"loss": 0.21360015869140625,
"num_tokens": 22058419.0,
"step": 277
},
{
"epoch": 0.17256362507759157,
"grad_norm": 0.484375,
"learning_rate": 2.8450310070720002e-05,
"loss": 0.252197265625,
"num_tokens": 22136372.0,
"step": 278
},
{
"epoch": 0.17318435754189945,
"grad_norm": 0.466796875,
"learning_rate": 2.843692815639808e-05,
"loss": 0.24320220947265625,
"num_tokens": 22214837.0,
"step": 279
},
{
"epoch": 0.17380509000620734,
"grad_norm": 0.486328125,
"learning_rate": 2.842349188732209e-05,
"loss": 0.2666015625,
"num_tokens": 22292215.0,
"step": 280
},
{
"epoch": 0.17442582247051522,
"grad_norm": 0.40625,
"learning_rate": 2.8410001317844136e-05,
"loss": 0.19649505615234375,
"num_tokens": 22374478.0,
"step": 281
},
{
"epoch": 0.1750465549348231,
"grad_norm": 0.40625,
"learning_rate": 2.839645650253595e-05,
"loss": 0.2139434814453125,
"num_tokens": 22455908.0,
"step": 282
},
{
"epoch": 0.17566728739913098,
"grad_norm": 0.470703125,
"learning_rate": 2.8382857496188714e-05,
"loss": 0.2554473876953125,
"num_tokens": 22531972.0,
"step": 283
},
{
"epoch": 0.17628801986343887,
"grad_norm": 0.4453125,
"learning_rate": 2.836920435381281e-05,
"loss": 0.21221923828125,
"num_tokens": 22607411.0,
"step": 284
},
{
"epoch": 0.17690875232774675,
"grad_norm": 0.470703125,
"learning_rate": 2.8355497130637625e-05,
"loss": 0.257293701171875,
"num_tokens": 22687098.0,
"step": 285
},
{
"epoch": 0.17752948479205463,
"grad_norm": 0.46484375,
"learning_rate": 2.83417358821113e-05,
"loss": 0.2622222900390625,
"num_tokens": 22768796.0,
"step": 286
},
{
"epoch": 0.17815021725636251,
"grad_norm": 0.46875,
"learning_rate": 2.8327920663900523e-05,
"loss": 0.220947265625,
"num_tokens": 22841705.0,
"step": 287
},
{
"epoch": 0.1787709497206704,
"grad_norm": 0.45703125,
"learning_rate": 2.8314051531890297e-05,
"loss": 0.2252655029296875,
"num_tokens": 22922884.0,
"step": 288
},
{
"epoch": 0.17939168218497828,
"grad_norm": 0.48828125,
"learning_rate": 2.830012854218372e-05,
"loss": 0.23340606689453125,
"num_tokens": 22996151.0,
"step": 289
},
{
"epoch": 0.18001241464928616,
"grad_norm": 0.51171875,
"learning_rate": 2.828615175110175e-05,
"loss": 0.3091583251953125,
"num_tokens": 23079834.0,
"step": 290
},
{
"epoch": 0.18063314711359404,
"grad_norm": 0.423828125,
"learning_rate": 2.827212121518298e-05,
"loss": 0.19625091552734375,
"num_tokens": 23160510.0,
"step": 291
},
{
"epoch": 0.18125387957790193,
"grad_norm": 0.48046875,
"learning_rate": 2.8258036991183414e-05,
"loss": 0.2607421875,
"num_tokens": 23240819.0,
"step": 292
},
{
"epoch": 0.1818746120422098,
"grad_norm": 0.3984375,
"learning_rate": 2.824389913607624e-05,
"loss": 0.1927032470703125,
"num_tokens": 23323433.0,
"step": 293
},
{
"epoch": 0.1824953445065177,
"grad_norm": 0.396484375,
"learning_rate": 2.8229707707051572e-05,
"loss": 0.1893768310546875,
"num_tokens": 23399330.0,
"step": 294
},
{
"epoch": 0.18311607697082558,
"grad_norm": 0.478515625,
"learning_rate": 2.821546276151626e-05,
"loss": 0.2425689697265625,
"num_tokens": 23472334.0,
"step": 295
},
{
"epoch": 0.18373680943513346,
"grad_norm": 0.423828125,
"learning_rate": 2.8201164357093632e-05,
"loss": 0.206390380859375,
"num_tokens": 23550900.0,
"step": 296
},
{
"epoch": 0.18435754189944134,
"grad_norm": 0.3671875,
"learning_rate": 2.8186812551623267e-05,
"loss": 0.1666717529296875,
"num_tokens": 23630296.0,
"step": 297
},
{
"epoch": 0.18497827436374922,
"grad_norm": 0.466796875,
"learning_rate": 2.817240740316076e-05,
"loss": 0.26280975341796875,
"num_tokens": 23704573.0,
"step": 298
},
{
"epoch": 0.1855990068280571,
"grad_norm": 0.470703125,
"learning_rate": 2.8157948969977484e-05,
"loss": 0.2276763916015625,
"num_tokens": 23784762.0,
"step": 299
},
{
"epoch": 0.186219739292365,
"grad_norm": 0.44140625,
"learning_rate": 2.8143437310560365e-05,
"loss": 0.23040008544921875,
"num_tokens": 23864416.0,
"step": 300
},
{
"epoch": 0.18684047175667287,
"grad_norm": 0.478515625,
"learning_rate": 2.812887248361164e-05,
"loss": 0.2584381103515625,
"num_tokens": 23945170.0,
"step": 301
},
{
"epoch": 0.18746120422098075,
"grad_norm": 0.482421875,
"learning_rate": 2.81142545480486e-05,
"loss": 0.2735137939453125,
"num_tokens": 24026647.0,
"step": 302
},
{
"epoch": 0.18808193668528864,
"grad_norm": 0.46875,
"learning_rate": 2.809958356300339e-05,
"loss": 0.2475738525390625,
"num_tokens": 24114143.0,
"step": 303
},
{
"epoch": 0.18870266914959652,
"grad_norm": 0.451171875,
"learning_rate": 2.808485958782275e-05,
"loss": 0.224700927734375,
"num_tokens": 24187227.0,
"step": 304
},
{
"epoch": 0.1893234016139044,
"grad_norm": 0.474609375,
"learning_rate": 2.807008268206776e-05,
"loss": 0.259368896484375,
"num_tokens": 24260031.0,
"step": 305
},
{
"epoch": 0.18994413407821228,
"grad_norm": 0.376953125,
"learning_rate": 2.805525290551362e-05,
"loss": 0.15789794921875,
"num_tokens": 24337315.0,
"step": 306
},
{
"epoch": 0.19056486654252017,
"grad_norm": 0.44921875,
"learning_rate": 2.80403703181494e-05,
"loss": 0.215179443359375,
"num_tokens": 24412328.0,
"step": 307
},
{
"epoch": 0.19118559900682805,
"grad_norm": 0.48046875,
"learning_rate": 2.8025434980177813e-05,
"loss": 0.233306884765625,
"num_tokens": 24490780.0,
"step": 308
},
{
"epoch": 0.19180633147113593,
"grad_norm": 0.4375,
"learning_rate": 2.8010446952014937e-05,
"loss": 0.2044830322265625,
"num_tokens": 24565732.0,
"step": 309
},
{
"epoch": 0.19242706393544382,
"grad_norm": 0.46875,
"learning_rate": 2.799540629429e-05,
"loss": 0.19797515869140625,
"num_tokens": 24639970.0,
"step": 310
},
{
"epoch": 0.1930477963997517,
"grad_norm": 0.416015625,
"learning_rate": 2.7980313067845133e-05,
"loss": 0.1732025146484375,
"num_tokens": 24723023.0,
"step": 311
},
{
"epoch": 0.19366852886405958,
"grad_norm": 0.478515625,
"learning_rate": 2.79651673337351e-05,
"loss": 0.2372283935546875,
"num_tokens": 24801917.0,
"step": 312
},
{
"epoch": 0.19428926132836746,
"grad_norm": 0.47265625,
"learning_rate": 2.7949969153227085e-05,
"loss": 0.2459564208984375,
"num_tokens": 24886212.0,
"step": 313
},
{
"epoch": 0.19490999379267535,
"grad_norm": 0.42578125,
"learning_rate": 2.793471858780042e-05,
"loss": 0.21136474609375,
"num_tokens": 24973533.0,
"step": 314
},
{
"epoch": 0.19553072625698323,
"grad_norm": 0.4609375,
"learning_rate": 2.7919415699146334e-05,
"loss": 0.216400146484375,
"num_tokens": 25053324.0,
"step": 315
},
{
"epoch": 0.1961514587212911,
"grad_norm": 0.4296875,
"learning_rate": 2.7904060549167736e-05,
"loss": 0.1983642578125,
"num_tokens": 25129276.0,
"step": 316
},
{
"epoch": 0.196772191185599,
"grad_norm": 0.390625,
"learning_rate": 2.788865319997892e-05,
"loss": 0.1563720703125,
"num_tokens": 25205592.0,
"step": 317
},
{
"epoch": 0.1973929236499069,
"grad_norm": 0.4765625,
"learning_rate": 2.7873193713905338e-05,
"loss": 0.24542999267578125,
"num_tokens": 25290666.0,
"step": 318
},
{
"epoch": 0.1980136561142148,
"grad_norm": 0.51953125,
"learning_rate": 2.7857682153483353e-05,
"loss": 0.27899169921875,
"num_tokens": 25375158.0,
"step": 319
},
{
"epoch": 0.19863438857852267,
"grad_norm": 0.390625,
"learning_rate": 2.7842118581459978e-05,
"loss": 0.167083740234375,
"num_tokens": 25457530.0,
"step": 320
},
{
"epoch": 0.19925512104283055,
"grad_norm": 0.453125,
"learning_rate": 2.782650306079261e-05,
"loss": 0.2344512939453125,
"num_tokens": 25536451.0,
"step": 321
},
{
"epoch": 0.19987585350713843,
"grad_norm": 0.416015625,
"learning_rate": 2.78108356546488e-05,
"loss": 0.184478759765625,
"num_tokens": 25613175.0,
"step": 322
},
{
"epoch": 0.20049658597144632,
"grad_norm": 0.419921875,
"learning_rate": 2.779511642640598e-05,
"loss": 0.196533203125,
"num_tokens": 25692610.0,
"step": 323
},
{
"epoch": 0.2011173184357542,
"grad_norm": 0.4375,
"learning_rate": 2.777934543965121e-05,
"loss": 0.21210479736328125,
"num_tokens": 25771170.0,
"step": 324
},
{
"epoch": 0.20173805090006208,
"grad_norm": 0.388671875,
"learning_rate": 2.776352275818093e-05,
"loss": 0.18120574951171875,
"num_tokens": 25853303.0,
"step": 325
},
{
"epoch": 0.20235878336436997,
"grad_norm": 0.486328125,
"learning_rate": 2.774764844600069e-05,
"loss": 0.243988037109375,
"num_tokens": 25932872.0,
"step": 326
},
{
"epoch": 0.20297951582867785,
"grad_norm": 0.4140625,
"learning_rate": 2.7731722567324885e-05,
"loss": 0.18221282958984375,
"num_tokens": 26012918.0,
"step": 327
},
{
"epoch": 0.20360024829298573,
"grad_norm": 0.462890625,
"learning_rate": 2.7715745186576516e-05,
"loss": 0.23598480224609375,
"num_tokens": 26093519.0,
"step": 328
},
{
"epoch": 0.2042209807572936,
"grad_norm": 0.435546875,
"learning_rate": 2.7699716368386923e-05,
"loss": 0.20574951171875,
"num_tokens": 26172889.0,
"step": 329
},
{
"epoch": 0.2048417132216015,
"grad_norm": 0.423828125,
"learning_rate": 2.7683636177595513e-05,
"loss": 0.208740234375,
"num_tokens": 26255983.0,
"step": 330
},
{
"epoch": 0.20546244568590938,
"grad_norm": 0.431640625,
"learning_rate": 2.7667504679249503e-05,
"loss": 0.189605712890625,
"num_tokens": 26334304.0,
"step": 331
},
{
"epoch": 0.20608317815021726,
"grad_norm": 0.4609375,
"learning_rate": 2.765132193860366e-05,
"loss": 0.20249176025390625,
"num_tokens": 26409949.0,
"step": 332
},
{
"epoch": 0.20670391061452514,
"grad_norm": 0.474609375,
"learning_rate": 2.7635088021120044e-05,
"loss": 0.2257232666015625,
"num_tokens": 26482852.0,
"step": 333
},
{
"epoch": 0.20732464307883303,
"grad_norm": 0.53125,
"learning_rate": 2.7618802992467718e-05,
"loss": 0.2966766357421875,
"num_tokens": 26559417.0,
"step": 334
},
{
"epoch": 0.2079453755431409,
"grad_norm": 0.44921875,
"learning_rate": 2.760246691852251e-05,
"loss": 0.226654052734375,
"num_tokens": 26642833.0,
"step": 335
},
{
"epoch": 0.2085661080074488,
"grad_norm": 0.484375,
"learning_rate": 2.758607986536673e-05,
"loss": 0.28375244140625,
"num_tokens": 26719865.0,
"step": 336
},
{
"epoch": 0.20918684047175667,
"grad_norm": 0.412109375,
"learning_rate": 2.7569641899288914e-05,
"loss": 0.2262420654296875,
"num_tokens": 26804222.0,
"step": 337
},
{
"epoch": 0.20980757293606456,
"grad_norm": 0.42578125,
"learning_rate": 2.755315308678354e-05,
"loss": 0.17522430419921875,
"num_tokens": 26886407.0,
"step": 338
},
{
"epoch": 0.21042830540037244,
"grad_norm": 0.419921875,
"learning_rate": 2.7536613494550783e-05,
"loss": 0.1876220703125,
"num_tokens": 26962878.0,
"step": 339
},
{
"epoch": 0.21104903786468032,
"grad_norm": 0.396484375,
"learning_rate": 2.7520023189496216e-05,
"loss": 0.17600250244140625,
"num_tokens": 27041188.0,
"step": 340
},
{
"epoch": 0.2116697703289882,
"grad_norm": 0.43359375,
"learning_rate": 2.7503382238730563e-05,
"loss": 0.19580078125,
"num_tokens": 27112851.0,
"step": 341
},
{
"epoch": 0.2122905027932961,
"grad_norm": 0.396484375,
"learning_rate": 2.748669070956942e-05,
"loss": 0.17499542236328125,
"num_tokens": 27192259.0,
"step": 342
},
{
"epoch": 0.21291123525760397,
"grad_norm": 0.43359375,
"learning_rate": 2.746994866953297e-05,
"loss": 0.210540771484375,
"num_tokens": 27276402.0,
"step": 343
},
{
"epoch": 0.21353196772191185,
"grad_norm": 0.416015625,
"learning_rate": 2.7453156186345737e-05,
"loss": 0.195098876953125,
"num_tokens": 27358171.0,
"step": 344
},
{
"epoch": 0.21415270018621974,
"grad_norm": 0.451171875,
"learning_rate": 2.7436313327936292e-05,
"loss": 0.2425689697265625,
"num_tokens": 27435691.0,
"step": 345
},
{
"epoch": 0.21477343265052762,
"grad_norm": 0.41796875,
"learning_rate": 2.7419420162436974e-05,
"loss": 0.18865966796875,
"num_tokens": 27512760.0,
"step": 346
},
{
"epoch": 0.2153941651148355,
"grad_norm": 0.466796875,
"learning_rate": 2.740247675818363e-05,
"loss": 0.218170166015625,
"num_tokens": 27591082.0,
"step": 347
},
{
"epoch": 0.21601489757914338,
"grad_norm": 0.4296875,
"learning_rate": 2.7385483183715335e-05,
"loss": 0.2278900146484375,
"num_tokens": 27673367.0,
"step": 348
},
{
"epoch": 0.21663563004345127,
"grad_norm": 0.408203125,
"learning_rate": 2.7368439507774104e-05,
"loss": 0.200927734375,
"num_tokens": 27759491.0,
"step": 349
},
{
"epoch": 0.21725636250775915,
"grad_norm": 0.45703125,
"learning_rate": 2.7351345799304623e-05,
"loss": 0.24560546875,
"num_tokens": 27841661.0,
"step": 350
},
{
"epoch": 0.21787709497206703,
"grad_norm": 0.5078125,
"learning_rate": 2.7334202127453975e-05,
"loss": 0.2792816162109375,
"num_tokens": 27923609.0,
"step": 351
},
{
"epoch": 0.21849782743637491,
"grad_norm": 0.419921875,
"learning_rate": 2.7317008561571343e-05,
"loss": 0.2261962890625,
"num_tokens": 28008160.0,
"step": 352
},
{
"epoch": 0.2191185599006828,
"grad_norm": 0.439453125,
"learning_rate": 2.729976517120775e-05,
"loss": 0.194000244140625,
"num_tokens": 28092708.0,
"step": 353
},
{
"epoch": 0.21973929236499068,
"grad_norm": 0.419921875,
"learning_rate": 2.7282472026115762e-05,
"loss": 0.1892242431640625,
"num_tokens": 28169079.0,
"step": 354
},
{
"epoch": 0.22036002482929856,
"grad_norm": 0.439453125,
"learning_rate": 2.7265129196249213e-05,
"loss": 0.22521209716796875,
"num_tokens": 28247713.0,
"step": 355
},
{
"epoch": 0.22098075729360644,
"grad_norm": 0.498046875,
"learning_rate": 2.724773675176293e-05,
"loss": 0.2608642578125,
"num_tokens": 28323224.0,
"step": 356
},
{
"epoch": 0.22160148975791433,
"grad_norm": 0.44140625,
"learning_rate": 2.7230294763012418e-05,
"loss": 0.2362060546875,
"num_tokens": 28407032.0,
"step": 357
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.392578125,
"learning_rate": 2.721280330055362e-05,
"loss": 0.219757080078125,
"num_tokens": 28493250.0,
"step": 358
},
{
"epoch": 0.2228429546865301,
"grad_norm": 0.49609375,
"learning_rate": 2.7195262435142587e-05,
"loss": 0.30975341796875,
"num_tokens": 28574535.0,
"step": 359
},
{
"epoch": 0.22346368715083798,
"grad_norm": 0.388671875,
"learning_rate": 2.7177672237735235e-05,
"loss": 0.206451416015625,
"num_tokens": 28658332.0,
"step": 360
},
{
"epoch": 0.22408441961514589,
"grad_norm": 0.427734375,
"learning_rate": 2.716003277948703e-05,
"loss": 0.21539306640625,
"num_tokens": 28742370.0,
"step": 361
},
{
"epoch": 0.22470515207945377,
"grad_norm": 0.3984375,
"learning_rate": 2.71423441317527e-05,
"loss": 0.1778411865234375,
"num_tokens": 28823115.0,
"step": 362
},
{
"epoch": 0.22532588454376165,
"grad_norm": 0.40234375,
"learning_rate": 2.7124606366085967e-05,
"loss": 0.17374038696289062,
"num_tokens": 28905963.0,
"step": 363
},
{
"epoch": 0.22594661700806953,
"grad_norm": 0.388671875,
"learning_rate": 2.7106819554239222e-05,
"loss": 0.16725921630859375,
"num_tokens": 28986427.0,
"step": 364
},
{
"epoch": 0.22656734947237742,
"grad_norm": 0.42578125,
"learning_rate": 2.7088983768163275e-05,
"loss": 0.23508071899414062,
"num_tokens": 29072582.0,
"step": 365
},
{
"epoch": 0.2271880819366853,
"grad_norm": 0.447265625,
"learning_rate": 2.7071099080007035e-05,
"loss": 0.240570068359375,
"num_tokens": 29149869.0,
"step": 366
},
{
"epoch": 0.22780881440099318,
"grad_norm": 0.447265625,
"learning_rate": 2.705316556211724e-05,
"loss": 0.23856353759765625,
"num_tokens": 29227206.0,
"step": 367
},
{
"epoch": 0.22842954686530106,
"grad_norm": 0.43359375,
"learning_rate": 2.703518328703814e-05,
"loss": 0.237396240234375,
"num_tokens": 29312112.0,
"step": 368
},
{
"epoch": 0.22905027932960895,
"grad_norm": 0.37890625,
"learning_rate": 2.701715232751122e-05,
"loss": 0.17934417724609375,
"num_tokens": 29394833.0,
"step": 369
},
{
"epoch": 0.22967101179391683,
"grad_norm": 0.474609375,
"learning_rate": 2.6999072756474917e-05,
"loss": 0.2477874755859375,
"num_tokens": 29471415.0,
"step": 370
},
{
"epoch": 0.2302917442582247,
"grad_norm": 0.451171875,
"learning_rate": 2.6980944647064286e-05,
"loss": 0.2259368896484375,
"num_tokens": 29551931.0,
"step": 371
},
{
"epoch": 0.2309124767225326,
"grad_norm": 0.48046875,
"learning_rate": 2.6962768072610742e-05,
"loss": 0.18096923828125,
"num_tokens": 29623327.0,
"step": 372
},
{
"epoch": 0.23153320918684048,
"grad_norm": 0.384765625,
"learning_rate": 2.694454310664175e-05,
"loss": 0.17133331298828125,
"num_tokens": 29700283.0,
"step": 373
},
{
"epoch": 0.23215394165114836,
"grad_norm": 0.400390625,
"learning_rate": 2.6926269822880526e-05,
"loss": 0.192291259765625,
"num_tokens": 29781555.0,
"step": 374
},
{
"epoch": 0.23277467411545624,
"grad_norm": 0.419921875,
"learning_rate": 2.6907948295245736e-05,
"loss": 0.1882476806640625,
"num_tokens": 29856292.0,
"step": 375
},
{
"epoch": 0.23339540657976413,
"grad_norm": 0.43359375,
"learning_rate": 2.6889578597851206e-05,
"loss": 0.225677490234375,
"num_tokens": 29934472.0,
"step": 376
},
{
"epoch": 0.234016139044072,
"grad_norm": 0.39453125,
"learning_rate": 2.6871160805005617e-05,
"loss": 0.19443511962890625,
"num_tokens": 30012351.0,
"step": 377
},
{
"epoch": 0.2346368715083799,
"grad_norm": 0.404296875,
"learning_rate": 2.6852694991212197e-05,
"loss": 0.1674346923828125,
"num_tokens": 30093512.0,
"step": 378
},
{
"epoch": 0.23525760397268777,
"grad_norm": 0.3828125,
"learning_rate": 2.6834181231168437e-05,
"loss": 0.20513153076171875,
"num_tokens": 30175960.0,
"step": 379
},
{
"epoch": 0.23587833643699566,
"grad_norm": 0.447265625,
"learning_rate": 2.6815619599765775e-05,
"loss": 0.248565673828125,
"num_tokens": 30260818.0,
"step": 380
},
{
"epoch": 0.23649906890130354,
"grad_norm": 0.38671875,
"learning_rate": 2.6797010172089297e-05,
"loss": 0.21155548095703125,
"num_tokens": 30345711.0,
"step": 381
},
{
"epoch": 0.23711980136561142,
"grad_norm": 0.439453125,
"learning_rate": 2.6778353023417434e-05,
"loss": 0.21074676513671875,
"num_tokens": 30423097.0,
"step": 382
},
{
"epoch": 0.2377405338299193,
"grad_norm": 0.455078125,
"learning_rate": 2.6759648229221656e-05,
"loss": 0.212890625,
"num_tokens": 30500698.0,
"step": 383
},
{
"epoch": 0.2383612662942272,
"grad_norm": 0.375,
"learning_rate": 2.6740895865166167e-05,
"loss": 0.1587066650390625,
"num_tokens": 30579854.0,
"step": 384
},
{
"epoch": 0.23898199875853507,
"grad_norm": 0.48828125,
"learning_rate": 2.67220960071076e-05,
"loss": 0.253936767578125,
"num_tokens": 30654761.0,
"step": 385
},
{
"epoch": 0.23960273122284295,
"grad_norm": 0.4140625,
"learning_rate": 2.670324873109472e-05,
"loss": 0.2159576416015625,
"num_tokens": 30738386.0,
"step": 386
},
{
"epoch": 0.24022346368715083,
"grad_norm": 0.4921875,
"learning_rate": 2.668435411336808e-05,
"loss": 0.2732086181640625,
"num_tokens": 30816440.0,
"step": 387
},
{
"epoch": 0.24084419615145872,
"grad_norm": 0.427734375,
"learning_rate": 2.6665412230359768e-05,
"loss": 0.204620361328125,
"num_tokens": 30895695.0,
"step": 388
},
{
"epoch": 0.2414649286157666,
"grad_norm": 0.443359375,
"learning_rate": 2.6646423158693048e-05,
"loss": 0.21826171875,
"num_tokens": 30975880.0,
"step": 389
},
{
"epoch": 0.24208566108007448,
"grad_norm": 0.421875,
"learning_rate": 2.6627386975182083e-05,
"loss": 0.18476104736328125,
"num_tokens": 31053925.0,
"step": 390
},
{
"epoch": 0.24270639354438237,
"grad_norm": 0.3671875,
"learning_rate": 2.66083037568316e-05,
"loss": 0.17284393310546875,
"num_tokens": 31134437.0,
"step": 391
},
{
"epoch": 0.24332712600869025,
"grad_norm": 0.400390625,
"learning_rate": 2.6589173580836607e-05,
"loss": 0.19146728515625,
"num_tokens": 31219283.0,
"step": 392
},
{
"epoch": 0.24394785847299813,
"grad_norm": 0.46484375,
"learning_rate": 2.6569996524582047e-05,
"loss": 0.25022125244140625,
"num_tokens": 31294567.0,
"step": 393
},
{
"epoch": 0.244568590937306,
"grad_norm": 0.40625,
"learning_rate": 2.6550772665642505e-05,
"loss": 0.2071380615234375,
"num_tokens": 31377068.0,
"step": 394
},
{
"epoch": 0.2451893234016139,
"grad_norm": 0.419921875,
"learning_rate": 2.6531502081781902e-05,
"loss": 0.17620849609375,
"num_tokens": 31457437.0,
"step": 395
},
{
"epoch": 0.24581005586592178,
"grad_norm": 0.439453125,
"learning_rate": 2.6512184850953164e-05,
"loss": 0.2180328369140625,
"num_tokens": 31536927.0,
"step": 396
},
{
"epoch": 0.24643078833022966,
"grad_norm": 0.455078125,
"learning_rate": 2.64928210512979e-05,
"loss": 0.2259521484375,
"num_tokens": 31615885.0,
"step": 397
},
{
"epoch": 0.24705152079453754,
"grad_norm": 0.384765625,
"learning_rate": 2.647341076114612e-05,
"loss": 0.1813812255859375,
"num_tokens": 31693647.0,
"step": 398
},
{
"epoch": 0.24767225325884543,
"grad_norm": 0.384765625,
"learning_rate": 2.645395405901588e-05,
"loss": 0.16904449462890625,
"num_tokens": 31768454.0,
"step": 399
},
{
"epoch": 0.2482929857231533,
"grad_norm": 0.427734375,
"learning_rate": 2.6434451023612983e-05,
"loss": 0.2210235595703125,
"num_tokens": 31843631.0,
"step": 400
},
{
"epoch": 0.2489137181874612,
"grad_norm": 0.478515625,
"learning_rate": 2.641490173383067e-05,
"loss": 0.2405548095703125,
"num_tokens": 31920118.0,
"step": 401
},
{
"epoch": 0.24953445065176907,
"grad_norm": 0.4609375,
"learning_rate": 2.6395306268749274e-05,
"loss": 0.242462158203125,
"num_tokens": 32001556.0,
"step": 402
},
{
"epoch": 0.250155183116077,
"grad_norm": 0.439453125,
"learning_rate": 2.6375664707635922e-05,
"loss": 0.2298736572265625,
"num_tokens": 32082266.0,
"step": 403
},
{
"epoch": 0.25077591558038487,
"grad_norm": 0.3828125,
"learning_rate": 2.6355977129944212e-05,
"loss": 0.1910858154296875,
"num_tokens": 32165058.0,
"step": 404
},
{
"epoch": 0.25139664804469275,
"grad_norm": 0.396484375,
"learning_rate": 2.6336243615313876e-05,
"loss": 0.18426513671875,
"num_tokens": 32244505.0,
"step": 405
},
{
"epoch": 0.25201738050900063,
"grad_norm": 0.447265625,
"learning_rate": 2.6316464243570476e-05,
"loss": 0.260040283203125,
"num_tokens": 32329605.0,
"step": 406
},
{
"epoch": 0.2526381129733085,
"grad_norm": 0.4296875,
"learning_rate": 2.6296639094725075e-05,
"loss": 0.19622802734375,
"num_tokens": 32406959.0,
"step": 407
},
{
"epoch": 0.2532588454376164,
"grad_norm": 0.4375,
"learning_rate": 2.6276768248973918e-05,
"loss": 0.2441864013671875,
"num_tokens": 32491486.0,
"step": 408
},
{
"epoch": 0.2538795779019243,
"grad_norm": 0.419921875,
"learning_rate": 2.6256851786698084e-05,
"loss": 0.208648681640625,
"num_tokens": 32572553.0,
"step": 409
},
{
"epoch": 0.25450031036623216,
"grad_norm": 0.42578125,
"learning_rate": 2.62368897884632e-05,
"loss": 0.2089385986328125,
"num_tokens": 32655823.0,
"step": 410
},
{
"epoch": 0.25512104283054005,
"grad_norm": 0.455078125,
"learning_rate": 2.621688233501907e-05,
"loss": 0.22441864013671875,
"num_tokens": 32736633.0,
"step": 411
},
{
"epoch": 0.25574177529484793,
"grad_norm": 0.423828125,
"learning_rate": 2.61968295072994e-05,
"loss": 0.204986572265625,
"num_tokens": 32816818.0,
"step": 412
},
{
"epoch": 0.2563625077591558,
"grad_norm": 0.435546875,
"learning_rate": 2.617673138642143e-05,
"loss": 0.23944854736328125,
"num_tokens": 32899253.0,
"step": 413
},
{
"epoch": 0.2569832402234637,
"grad_norm": 0.466796875,
"learning_rate": 2.6156588053685606e-05,
"loss": 0.21295166015625,
"num_tokens": 32974912.0,
"step": 414
},
{
"epoch": 0.2576039726877716,
"grad_norm": 0.412109375,
"learning_rate": 2.6136399590575288e-05,
"loss": 0.202301025390625,
"num_tokens": 33052181.0,
"step": 415
},
{
"epoch": 0.25822470515207946,
"grad_norm": 0.3984375,
"learning_rate": 2.611616607875638e-05,
"loss": 0.21979904174804688,
"num_tokens": 33145296.0,
"step": 416
},
{
"epoch": 0.25884543761638734,
"grad_norm": 0.50390625,
"learning_rate": 2.6095887600077022e-05,
"loss": 0.2843780517578125,
"num_tokens": 33223399.0,
"step": 417
},
{
"epoch": 0.2594661700806952,
"grad_norm": 0.451171875,
"learning_rate": 2.607556423656725e-05,
"loss": 0.2333831787109375,
"num_tokens": 33301036.0,
"step": 418
},
{
"epoch": 0.2600869025450031,
"grad_norm": 0.33984375,
"learning_rate": 2.6055196070438663e-05,
"loss": 0.1499481201171875,
"num_tokens": 33385892.0,
"step": 419
},
{
"epoch": 0.260707635009311,
"grad_norm": 0.37890625,
"learning_rate": 2.603478318408411e-05,
"loss": 0.17242431640625,
"num_tokens": 33470896.0,
"step": 420
},
{
"epoch": 0.26132836747361887,
"grad_norm": 0.462890625,
"learning_rate": 2.601432566007733e-05,
"loss": 0.24725341796875,
"num_tokens": 33544783.0,
"step": 421
},
{
"epoch": 0.26194909993792675,
"grad_norm": 0.400390625,
"learning_rate": 2.599382358117263e-05,
"loss": 0.20053482055664062,
"num_tokens": 33627586.0,
"step": 422
},
{
"epoch": 0.26256983240223464,
"grad_norm": 0.4453125,
"learning_rate": 2.5973277030304543e-05,
"loss": 0.220703125,
"num_tokens": 33705999.0,
"step": 423
},
{
"epoch": 0.2631905648665425,
"grad_norm": 0.439453125,
"learning_rate": 2.5952686090587515e-05,
"loss": 0.2160186767578125,
"num_tokens": 33788984.0,
"step": 424
},
{
"epoch": 0.2638112973308504,
"grad_norm": 0.435546875,
"learning_rate": 2.593205084531554e-05,
"loss": 0.19208526611328125,
"num_tokens": 33861102.0,
"step": 425
},
{
"epoch": 0.2644320297951583,
"grad_norm": 0.408203125,
"learning_rate": 2.5911371377961837e-05,
"loss": 0.20062255859375,
"num_tokens": 33937816.0,
"step": 426
},
{
"epoch": 0.26505276225946617,
"grad_norm": 0.388671875,
"learning_rate": 2.589064777217852e-05,
"loss": 0.189849853515625,
"num_tokens": 34020322.0,
"step": 427
},
{
"epoch": 0.26567349472377405,
"grad_norm": 0.435546875,
"learning_rate": 2.586988011179624e-05,
"loss": 0.19635772705078125,
"num_tokens": 34091086.0,
"step": 428
},
{
"epoch": 0.26629422718808193,
"grad_norm": 0.404296875,
"learning_rate": 2.5849068480823862e-05,
"loss": 0.1734619140625,
"num_tokens": 34164143.0,
"step": 429
},
{
"epoch": 0.2669149596523898,
"grad_norm": 0.451171875,
"learning_rate": 2.5828212963448116e-05,
"loss": 0.22176361083984375,
"num_tokens": 34244415.0,
"step": 430
},
{
"epoch": 0.2675356921166977,
"grad_norm": 0.408203125,
"learning_rate": 2.5807313644033273e-05,
"loss": 0.1698150634765625,
"num_tokens": 34323399.0,
"step": 431
},
{
"epoch": 0.2681564245810056,
"grad_norm": 0.404296875,
"learning_rate": 2.578637060712077e-05,
"loss": 0.209869384765625,
"num_tokens": 34408888.0,
"step": 432
},
{
"epoch": 0.26877715704531346,
"grad_norm": 0.41796875,
"learning_rate": 2.576538393742891e-05,
"loss": 0.18206024169921875,
"num_tokens": 34488933.0,
"step": 433
},
{
"epoch": 0.26939788950962135,
"grad_norm": 0.35546875,
"learning_rate": 2.5744353719852477e-05,
"loss": 0.15802001953125,
"num_tokens": 34574178.0,
"step": 434
},
{
"epoch": 0.27001862197392923,
"grad_norm": 0.39453125,
"learning_rate": 2.572328003946244e-05,
"loss": 0.1738433837890625,
"num_tokens": 34655487.0,
"step": 435
},
{
"epoch": 0.2706393544382371,
"grad_norm": 0.41015625,
"learning_rate": 2.5702162981505555e-05,
"loss": 0.21429443359375,
"num_tokens": 34738038.0,
"step": 436
},
{
"epoch": 0.271260086902545,
"grad_norm": 0.390625,
"learning_rate": 2.5681002631404067e-05,
"loss": 0.178192138671875,
"num_tokens": 34827030.0,
"step": 437
},
{
"epoch": 0.2718808193668529,
"grad_norm": 0.380859375,
"learning_rate": 2.5659799074755342e-05,
"loss": 0.16982269287109375,
"num_tokens": 34905718.0,
"step": 438
},
{
"epoch": 0.27250155183116076,
"grad_norm": 0.345703125,
"learning_rate": 2.5638552397331518e-05,
"loss": 0.14931488037109375,
"num_tokens": 34991598.0,
"step": 439
},
{
"epoch": 0.27312228429546864,
"grad_norm": 0.35546875,
"learning_rate": 2.5617262685079173e-05,
"loss": 0.15102386474609375,
"num_tokens": 35072735.0,
"step": 440
},
{
"epoch": 0.2737430167597765,
"grad_norm": 0.41015625,
"learning_rate": 2.5595930024118957e-05,
"loss": 0.17125701904296875,
"num_tokens": 35146236.0,
"step": 441
},
{
"epoch": 0.2743637492240844,
"grad_norm": 0.400390625,
"learning_rate": 2.5574554500745263e-05,
"loss": 0.1802215576171875,
"num_tokens": 35223566.0,
"step": 442
},
{
"epoch": 0.2749844816883923,
"grad_norm": 0.4296875,
"learning_rate": 2.5553136201425868e-05,
"loss": 0.1890106201171875,
"num_tokens": 35299788.0,
"step": 443
},
{
"epoch": 0.2756052141527002,
"grad_norm": 0.40625,
"learning_rate": 2.553167521280159e-05,
"loss": 0.18357086181640625,
"num_tokens": 35379215.0,
"step": 444
},
{
"epoch": 0.27622594661700806,
"grad_norm": 0.42578125,
"learning_rate": 2.5510171621685926e-05,
"loss": 0.209228515625,
"num_tokens": 35458045.0,
"step": 445
},
{
"epoch": 0.27684667908131594,
"grad_norm": 0.4140625,
"learning_rate": 2.5488625515064713e-05,
"loss": 0.19693756103515625,
"num_tokens": 35536918.0,
"step": 446
},
{
"epoch": 0.2774674115456238,
"grad_norm": 0.416015625,
"learning_rate": 2.5467036980095766e-05,
"loss": 0.2014617919921875,
"num_tokens": 35622887.0,
"step": 447
},
{
"epoch": 0.2780881440099317,
"grad_norm": 0.43359375,
"learning_rate": 2.5445406104108527e-05,
"loss": 0.20465087890625,
"num_tokens": 35702044.0,
"step": 448
},
{
"epoch": 0.2787088764742396,
"grad_norm": 0.462890625,
"learning_rate": 2.5423732974603732e-05,
"loss": 0.219146728515625,
"num_tokens": 35783189.0,
"step": 449
},
{
"epoch": 0.27932960893854747,
"grad_norm": 0.400390625,
"learning_rate": 2.5402017679253015e-05,
"loss": 0.2011260986328125,
"num_tokens": 35867998.0,
"step": 450
},
{
"epoch": 0.27995034140285535,
"grad_norm": 0.458984375,
"learning_rate": 2.5380260305898602e-05,
"loss": 0.222808837890625,
"num_tokens": 35942035.0,
"step": 451
},
{
"epoch": 0.28057107386716323,
"grad_norm": 0.435546875,
"learning_rate": 2.535846094255292e-05,
"loss": 0.1995086669921875,
"num_tokens": 36020075.0,
"step": 452
},
{
"epoch": 0.2811918063314711,
"grad_norm": 0.44140625,
"learning_rate": 2.5336619677398248e-05,
"loss": 0.2311248779296875,
"num_tokens": 36102949.0,
"step": 453
},
{
"epoch": 0.281812538795779,
"grad_norm": 0.453125,
"learning_rate": 2.5314736598786376e-05,
"loss": 0.2327728271484375,
"num_tokens": 36188479.0,
"step": 454
},
{
"epoch": 0.2824332712600869,
"grad_norm": 0.43359375,
"learning_rate": 2.529281179523823e-05,
"loss": 0.2134552001953125,
"num_tokens": 36266869.0,
"step": 455
},
{
"epoch": 0.28305400372439476,
"grad_norm": 0.42578125,
"learning_rate": 2.5270845355443524e-05,
"loss": 0.191680908203125,
"num_tokens": 36341221.0,
"step": 456
},
{
"epoch": 0.28367473618870265,
"grad_norm": 0.43359375,
"learning_rate": 2.5248837368260404e-05,
"loss": 0.1881561279296875,
"num_tokens": 36418826.0,
"step": 457
},
{
"epoch": 0.28429546865301053,
"grad_norm": 0.359375,
"learning_rate": 2.5226787922715068e-05,
"loss": 0.14521026611328125,
"num_tokens": 36494976.0,
"step": 458
},
{
"epoch": 0.2849162011173184,
"grad_norm": 0.4140625,
"learning_rate": 2.5204697108001425e-05,
"loss": 0.19922637939453125,
"num_tokens": 36575143.0,
"step": 459
},
{
"epoch": 0.2855369335816263,
"grad_norm": 0.443359375,
"learning_rate": 2.5182565013480746e-05,
"loss": 0.25048828125,
"num_tokens": 36654947.0,
"step": 460
},
{
"epoch": 0.2861576660459342,
"grad_norm": 0.392578125,
"learning_rate": 2.516039172868126e-05,
"loss": 0.157745361328125,
"num_tokens": 36729518.0,
"step": 461
},
{
"epoch": 0.28677839851024206,
"grad_norm": 0.41015625,
"learning_rate": 2.5138177343297835e-05,
"loss": 0.2284088134765625,
"num_tokens": 36816918.0,
"step": 462
},
{
"epoch": 0.28739913097454994,
"grad_norm": 0.435546875,
"learning_rate": 2.5115921947191597e-05,
"loss": 0.20748138427734375,
"num_tokens": 36894753.0,
"step": 463
},
{
"epoch": 0.2880198634388578,
"grad_norm": 0.4296875,
"learning_rate": 2.509362563038956e-05,
"loss": 0.21819496154785156,
"num_tokens": 36968113.0,
"step": 464
},
{
"epoch": 0.2886405959031657,
"grad_norm": 0.44921875,
"learning_rate": 2.507128848308428e-05,
"loss": 0.220123291015625,
"num_tokens": 37047321.0,
"step": 465
},
{
"epoch": 0.2892613283674736,
"grad_norm": 0.4453125,
"learning_rate": 2.504891059563347e-05,
"loss": 0.223358154296875,
"num_tokens": 37123423.0,
"step": 466
},
{
"epoch": 0.28988206083178153,
"grad_norm": 0.40234375,
"learning_rate": 2.5026492058559643e-05,
"loss": 0.1943206787109375,
"num_tokens": 37202375.0,
"step": 467
},
{
"epoch": 0.2905027932960894,
"grad_norm": 0.470703125,
"learning_rate": 2.500403296254976e-05,
"loss": 0.239013671875,
"num_tokens": 37283800.0,
"step": 468
},
{
"epoch": 0.2911235257603973,
"grad_norm": 0.4609375,
"learning_rate": 2.4981533398454844e-05,
"loss": 0.24169921875,
"num_tokens": 37362390.0,
"step": 469
},
{
"epoch": 0.2917442582247052,
"grad_norm": 0.4375,
"learning_rate": 2.4958993457289602e-05,
"loss": 0.210205078125,
"num_tokens": 37441284.0,
"step": 470
},
{
"epoch": 0.29236499068901306,
"grad_norm": 0.427734375,
"learning_rate": 2.4936413230232103e-05,
"loss": 0.18692779541015625,
"num_tokens": 37516234.0,
"step": 471
},
{
"epoch": 0.29298572315332094,
"grad_norm": 0.4375,
"learning_rate": 2.491379280862336e-05,
"loss": 0.2349700927734375,
"num_tokens": 37596771.0,
"step": 472
},
{
"epoch": 0.2936064556176288,
"grad_norm": 0.423828125,
"learning_rate": 2.4891132283966976e-05,
"loss": 0.1943359375,
"num_tokens": 37678320.0,
"step": 473
},
{
"epoch": 0.2942271880819367,
"grad_norm": 0.435546875,
"learning_rate": 2.4868431747928794e-05,
"loss": 0.23413848876953125,
"num_tokens": 37760030.0,
"step": 474
},
{
"epoch": 0.2948479205462446,
"grad_norm": 0.400390625,
"learning_rate": 2.4845691292336505e-05,
"loss": 0.16797637939453125,
"num_tokens": 37834231.0,
"step": 475
},
{
"epoch": 0.2954686530105525,
"grad_norm": 0.365234375,
"learning_rate": 2.482291100917928e-05,
"loss": 0.154815673828125,
"num_tokens": 37913213.0,
"step": 476
},
{
"epoch": 0.29608938547486036,
"grad_norm": 0.3984375,
"learning_rate": 2.480009099060739e-05,
"loss": 0.1909942626953125,
"num_tokens": 37992954.0,
"step": 477
},
{
"epoch": 0.29671011793916824,
"grad_norm": 0.451171875,
"learning_rate": 2.4777231328931854e-05,
"loss": 0.257049560546875,
"num_tokens": 38081649.0,
"step": 478
},
{
"epoch": 0.2973308504034761,
"grad_norm": 0.53515625,
"learning_rate": 2.4754332116624055e-05,
"loss": 0.35211181640625,
"num_tokens": 38167712.0,
"step": 479
},
{
"epoch": 0.297951582867784,
"grad_norm": 0.439453125,
"learning_rate": 2.473139344631536e-05,
"loss": 0.237213134765625,
"num_tokens": 38250426.0,
"step": 480
},
{
"epoch": 0.2985723153320919,
"grad_norm": 0.4609375,
"learning_rate": 2.470841541079676e-05,
"loss": 0.22540283203125,
"num_tokens": 38330121.0,
"step": 481
},
{
"epoch": 0.29919304779639977,
"grad_norm": 0.39453125,
"learning_rate": 2.4685398103018465e-05,
"loss": 0.161529541015625,
"num_tokens": 38413637.0,
"step": 482
},
{
"epoch": 0.29981378026070765,
"grad_norm": 0.396484375,
"learning_rate": 2.466234161608957e-05,
"loss": 0.1733856201171875,
"num_tokens": 38488030.0,
"step": 483
},
{
"epoch": 0.30043451272501553,
"grad_norm": 0.408203125,
"learning_rate": 2.463924604327765e-05,
"loss": 0.2153167724609375,
"num_tokens": 38566674.0,
"step": 484
},
{
"epoch": 0.3010552451893234,
"grad_norm": 0.4453125,
"learning_rate": 2.4616111478008386e-05,
"loss": 0.2422332763671875,
"num_tokens": 38648975.0,
"step": 485
},
{
"epoch": 0.3016759776536313,
"grad_norm": 0.3984375,
"learning_rate": 2.4592938013865184e-05,
"loss": 0.1868896484375,
"num_tokens": 38726777.0,
"step": 486
},
{
"epoch": 0.3022967101179392,
"grad_norm": 0.44140625,
"learning_rate": 2.4569725744588813e-05,
"loss": 0.2122344970703125,
"num_tokens": 38808863.0,
"step": 487
},
{
"epoch": 0.30291744258224707,
"grad_norm": 0.392578125,
"learning_rate": 2.4546474764077022e-05,
"loss": 0.2018280029296875,
"num_tokens": 38896069.0,
"step": 488
},
{
"epoch": 0.30353817504655495,
"grad_norm": 0.49609375,
"learning_rate": 2.4523185166384137e-05,
"loss": 0.23865509033203125,
"num_tokens": 38969806.0,
"step": 489
},
{
"epoch": 0.30415890751086283,
"grad_norm": 0.41015625,
"learning_rate": 2.4499857045720705e-05,
"loss": 0.202606201171875,
"num_tokens": 39049210.0,
"step": 490
},
{
"epoch": 0.3047796399751707,
"grad_norm": 0.41796875,
"learning_rate": 2.447649049645311e-05,
"loss": 0.19609832763671875,
"num_tokens": 39125183.0,
"step": 491
},
{
"epoch": 0.3054003724394786,
"grad_norm": 0.408203125,
"learning_rate": 2.445308561310318e-05,
"loss": 0.1680145263671875,
"num_tokens": 39206329.0,
"step": 492
},
{
"epoch": 0.3060211049037865,
"grad_norm": 0.455078125,
"learning_rate": 2.4429642490347806e-05,
"loss": 0.2135009765625,
"num_tokens": 39283309.0,
"step": 493
},
{
"epoch": 0.30664183736809436,
"grad_norm": 0.453125,
"learning_rate": 2.440616122301858e-05,
"loss": 0.248931884765625,
"num_tokens": 39367306.0,
"step": 494
},
{
"epoch": 0.30726256983240224,
"grad_norm": 0.408203125,
"learning_rate": 2.4382641906101394e-05,
"loss": 0.16819000244140625,
"num_tokens": 39441358.0,
"step": 495
},
{
"epoch": 0.3078833022967101,
"grad_norm": 0.412109375,
"learning_rate": 2.435908463473603e-05,
"loss": 0.1839447021484375,
"num_tokens": 39517505.0,
"step": 496
},
{
"epoch": 0.308504034761018,
"grad_norm": 0.400390625,
"learning_rate": 2.4335489504215834e-05,
"loss": 0.177398681640625,
"num_tokens": 39591498.0,
"step": 497
},
{
"epoch": 0.3091247672253259,
"grad_norm": 0.423828125,
"learning_rate": 2.431185660998729e-05,
"loss": 0.21129608154296875,
"num_tokens": 39672995.0,
"step": 498
},
{
"epoch": 0.3097454996896338,
"grad_norm": 0.365234375,
"learning_rate": 2.428818604764964e-05,
"loss": 0.1651458740234375,
"num_tokens": 39754665.0,
"step": 499
},
{
"epoch": 0.31036623215394166,
"grad_norm": 0.59765625,
"learning_rate": 2.4264477912954498e-05,
"loss": 0.19366455078125,
"num_tokens": 39834074.0,
"step": 500
},
{
"epoch": 0.31098696461824954,
"grad_norm": 0.498046875,
"learning_rate": 2.4240732301805463e-05,
"loss": 0.22674560546875,
"num_tokens": 39901227.0,
"step": 501
},
{
"epoch": 0.3116076970825574,
"grad_norm": 0.384765625,
"learning_rate": 2.421694931025775e-05,
"loss": 0.1508941650390625,
"num_tokens": 39975272.0,
"step": 502
},
{
"epoch": 0.3122284295468653,
"grad_norm": 0.453125,
"learning_rate": 2.4193129034517758e-05,
"loss": 0.24749755859375,
"num_tokens": 40058538.0,
"step": 503
},
{
"epoch": 0.3128491620111732,
"grad_norm": 0.4453125,
"learning_rate": 2.4169271570942724e-05,
"loss": 0.198455810546875,
"num_tokens": 40135374.0,
"step": 504
},
{
"epoch": 0.31346989447548107,
"grad_norm": 0.443359375,
"learning_rate": 2.414537701604032e-05,
"loss": 0.2157745361328125,
"num_tokens": 40213920.0,
"step": 505
},
{
"epoch": 0.31409062693978895,
"grad_norm": 0.416015625,
"learning_rate": 2.412144546646825e-05,
"loss": 0.18807220458984375,
"num_tokens": 40291930.0,
"step": 506
},
{
"epoch": 0.31471135940409684,
"grad_norm": 0.380859375,
"learning_rate": 2.409747701903387e-05,
"loss": 0.15155029296875,
"num_tokens": 40368748.0,
"step": 507
},
{
"epoch": 0.3153320918684047,
"grad_norm": 0.44140625,
"learning_rate": 2.4073471770693788e-05,
"loss": 0.22333526611328125,
"num_tokens": 40446867.0,
"step": 508
},
{
"epoch": 0.3159528243327126,
"grad_norm": 0.404296875,
"learning_rate": 2.4049429818553494e-05,
"loss": 0.18402099609375,
"num_tokens": 40531129.0,
"step": 509
},
{
"epoch": 0.3165735567970205,
"grad_norm": 0.376953125,
"learning_rate": 2.4025351259866935e-05,
"loss": 0.164276123046875,
"num_tokens": 40610648.0,
"step": 510
},
{
"epoch": 0.31719428926132837,
"grad_norm": 0.419921875,
"learning_rate": 2.4001236192036154e-05,
"loss": 0.11762237548828125,
"num_tokens": 40696900.0,
"step": 511
},
{
"epoch": 0.31781502172563625,
"grad_norm": 0.384765625,
"learning_rate": 2.3977084712610862e-05,
"loss": 0.17205810546875,
"num_tokens": 40775511.0,
"step": 512
},
{
"epoch": 0.31843575418994413,
"grad_norm": 0.34765625,
"learning_rate": 2.3952896919288074e-05,
"loss": 0.13307952880859375,
"num_tokens": 40855259.0,
"step": 513
},
{
"epoch": 0.319056486654252,
"grad_norm": 0.39453125,
"learning_rate": 2.3928672909911703e-05,
"loss": 0.1673126220703125,
"num_tokens": 40930806.0,
"step": 514
},
{
"epoch": 0.3196772191185599,
"grad_norm": 0.447265625,
"learning_rate": 2.3904412782472147e-05,
"loss": 0.2042236328125,
"num_tokens": 41007744.0,
"step": 515
},
{
"epoch": 0.3202979515828678,
"grad_norm": 0.435546875,
"learning_rate": 2.3880116635105923e-05,
"loss": 0.2387542724609375,
"num_tokens": 41086843.0,
"step": 516
},
{
"epoch": 0.32091868404717566,
"grad_norm": 0.36328125,
"learning_rate": 2.385578456609525e-05,
"loss": 0.16551589965820312,
"num_tokens": 41169468.0,
"step": 517
},
{
"epoch": 0.32153941651148354,
"grad_norm": 0.45703125,
"learning_rate": 2.3831416673867657e-05,
"loss": 0.2154693603515625,
"num_tokens": 41247676.0,
"step": 518
},
{
"epoch": 0.3221601489757914,
"grad_norm": 0.466796875,
"learning_rate": 2.3807013056995583e-05,
"loss": 0.23715972900390625,
"num_tokens": 41325006.0,
"step": 519
},
{
"epoch": 0.3227808814400993,
"grad_norm": 0.466796875,
"learning_rate": 2.3782573814195978e-05,
"loss": 0.23017120361328125,
"num_tokens": 41402991.0,
"step": 520
},
{
"epoch": 0.3234016139044072,
"grad_norm": 0.51171875,
"learning_rate": 2.3758099044329912e-05,
"loss": 0.1800079345703125,
"num_tokens": 41479499.0,
"step": 521
},
{
"epoch": 0.3240223463687151,
"grad_norm": 0.439453125,
"learning_rate": 2.3733588846402158e-05,
"loss": 0.2174835205078125,
"num_tokens": 41559656.0,
"step": 522
},
{
"epoch": 0.32464307883302296,
"grad_norm": 0.4296875,
"learning_rate": 2.370904331956081e-05,
"loss": 0.198455810546875,
"num_tokens": 41636299.0,
"step": 523
},
{
"epoch": 0.32526381129733084,
"grad_norm": 0.4453125,
"learning_rate": 2.3684462563096878e-05,
"loss": 0.21435546875,
"num_tokens": 41716272.0,
"step": 524
},
{
"epoch": 0.3258845437616387,
"grad_norm": 0.33203125,
"learning_rate": 2.365984667644386e-05,
"loss": 0.11435699462890625,
"num_tokens": 41798664.0,
"step": 525
},
{
"epoch": 0.3265052762259466,
"grad_norm": 0.41796875,
"learning_rate": 2.3635195759177382e-05,
"loss": 0.1849365234375,
"num_tokens": 41871946.0,
"step": 526
},
{
"epoch": 0.3271260086902545,
"grad_norm": 0.412109375,
"learning_rate": 2.3610509911014785e-05,
"loss": 0.181732177734375,
"num_tokens": 41954277.0,
"step": 527
},
{
"epoch": 0.32774674115456237,
"grad_norm": 0.431640625,
"learning_rate": 2.3585789231814676e-05,
"loss": 0.1625213623046875,
"num_tokens": 42027831.0,
"step": 528
},
{
"epoch": 0.32836747361887025,
"grad_norm": 0.50390625,
"learning_rate": 2.356103382157659e-05,
"loss": 0.2745208740234375,
"num_tokens": 42108413.0,
"step": 529
},
{
"epoch": 0.32898820608317814,
"grad_norm": 0.419921875,
"learning_rate": 2.3536243780440546e-05,
"loss": 0.21575927734375,
"num_tokens": 42192168.0,
"step": 530
},
{
"epoch": 0.329608938547486,
"grad_norm": 0.419921875,
"learning_rate": 2.351141920868665e-05,
"loss": 0.140472412109375,
"num_tokens": 42270694.0,
"step": 531
},
{
"epoch": 0.3302296710117939,
"grad_norm": 0.48828125,
"learning_rate": 2.3486560206734688e-05,
"loss": 0.2412261962890625,
"num_tokens": 42347981.0,
"step": 532
},
{
"epoch": 0.3308504034761018,
"grad_norm": 0.388671875,
"learning_rate": 2.346166687514373e-05,
"loss": 0.1951904296875,
"num_tokens": 42436572.0,
"step": 533
},
{
"epoch": 0.33147113594040967,
"grad_norm": 0.478515625,
"learning_rate": 2.343673931461171e-05,
"loss": 0.2357025146484375,
"num_tokens": 42509995.0,
"step": 534
},
{
"epoch": 0.33209186840471755,
"grad_norm": 0.4453125,
"learning_rate": 2.3411777625975026e-05,
"loss": 0.23712158203125,
"num_tokens": 42593004.0,
"step": 535
},
{
"epoch": 0.33271260086902543,
"grad_norm": 0.455078125,
"learning_rate": 2.338678191020812e-05,
"loss": 0.21124267578125,
"num_tokens": 42668679.0,
"step": 536
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.443359375,
"learning_rate": 2.33617522684231e-05,
"loss": 0.2097625732421875,
"num_tokens": 42748351.0,
"step": 537
},
{
"epoch": 0.3339540657976412,
"grad_norm": 0.404296875,
"learning_rate": 2.3336688801869296e-05,
"loss": 0.1845245361328125,
"num_tokens": 42831403.0,
"step": 538
},
{
"epoch": 0.3345747982619491,
"grad_norm": 0.41796875,
"learning_rate": 2.331159161193287e-05,
"loss": 0.1764068603515625,
"num_tokens": 42909716.0,
"step": 539
},
{
"epoch": 0.33519553072625696,
"grad_norm": 0.4453125,
"learning_rate": 2.3286460800136394e-05,
"loss": 0.20522308349609375,
"num_tokens": 42991405.0,
"step": 540
},
{
"epoch": 0.33581626319056485,
"grad_norm": 0.4296875,
"learning_rate": 2.3261296468138463e-05,
"loss": 0.2218170166015625,
"num_tokens": 43073517.0,
"step": 541
},
{
"epoch": 0.33643699565487273,
"grad_norm": 0.451171875,
"learning_rate": 2.3236098717733246e-05,
"loss": 0.2127838134765625,
"num_tokens": 43151441.0,
"step": 542
},
{
"epoch": 0.3370577281191806,
"grad_norm": 0.400390625,
"learning_rate": 2.3210867650850116e-05,
"loss": 0.1876220703125,
"num_tokens": 43242315.0,
"step": 543
},
{
"epoch": 0.3376784605834885,
"grad_norm": 0.47265625,
"learning_rate": 2.3185603369553196e-05,
"loss": 0.24231719970703125,
"num_tokens": 43319732.0,
"step": 544
},
{
"epoch": 0.3382991930477964,
"grad_norm": 0.404296875,
"learning_rate": 2.3160305976040984e-05,
"loss": 0.185272216796875,
"num_tokens": 43396973.0,
"step": 545
},
{
"epoch": 0.33891992551210426,
"grad_norm": 0.369140625,
"learning_rate": 2.3134975572645917e-05,
"loss": 0.1720733642578125,
"num_tokens": 43484963.0,
"step": 546
},
{
"epoch": 0.33954065797641214,
"grad_norm": 0.484375,
"learning_rate": 2.3109612261833967e-05,
"loss": 0.1727294921875,
"num_tokens": 43563362.0,
"step": 547
},
{
"epoch": 0.34016139044072,
"grad_norm": 0.40234375,
"learning_rate": 2.3084216146204204e-05,
"loss": 0.19522857666015625,
"num_tokens": 43645331.0,
"step": 548
},
{
"epoch": 0.3407821229050279,
"grad_norm": 0.40234375,
"learning_rate": 2.305878732848843e-05,
"loss": 0.1963348388671875,
"num_tokens": 43723853.0,
"step": 549
},
{
"epoch": 0.3414028553693358,
"grad_norm": 0.4375,
"learning_rate": 2.303332591155071e-05,
"loss": 0.20317840576171875,
"num_tokens": 43795201.0,
"step": 550
},
{
"epoch": 0.34202358783364367,
"grad_norm": 0.416015625,
"learning_rate": 2.3007831998386976e-05,
"loss": 0.1800994873046875,
"num_tokens": 43885422.0,
"step": 551
},
{
"epoch": 0.3426443202979516,
"grad_norm": 0.365234375,
"learning_rate": 2.2982305692124633e-05,
"loss": 0.19091796875,
"num_tokens": 43975863.0,
"step": 552
},
{
"epoch": 0.3432650527622595,
"grad_norm": 0.39453125,
"learning_rate": 2.2956747096022103e-05,
"loss": 0.2070465087890625,
"num_tokens": 44063234.0,
"step": 553
},
{
"epoch": 0.3438857852265674,
"grad_norm": 0.400390625,
"learning_rate": 2.2931156313468447e-05,
"loss": 0.17583465576171875,
"num_tokens": 44140881.0,
"step": 554
},
{
"epoch": 0.34450651769087526,
"grad_norm": 0.408203125,
"learning_rate": 2.2905533447982895e-05,
"loss": 0.180572509765625,
"num_tokens": 44227161.0,
"step": 555
},
{
"epoch": 0.34512725015518314,
"grad_norm": 0.404296875,
"learning_rate": 2.287987860321448e-05,
"loss": 0.18878173828125,
"num_tokens": 44307341.0,
"step": 556
},
{
"epoch": 0.345747982619491,
"grad_norm": 0.408203125,
"learning_rate": 2.2854191882941606e-05,
"loss": 0.22259521484375,
"num_tokens": 44391210.0,
"step": 557
},
{
"epoch": 0.3463687150837989,
"grad_norm": 0.392578125,
"learning_rate": 2.2828473391071588e-05,
"loss": 0.1643524169921875,
"num_tokens": 44475341.0,
"step": 558
},
{
"epoch": 0.3469894475481068,
"grad_norm": 0.427734375,
"learning_rate": 2.280272323164029e-05,
"loss": 0.20947265625,
"num_tokens": 44552267.0,
"step": 559
},
{
"epoch": 0.34761018001241467,
"grad_norm": 0.419921875,
"learning_rate": 2.2776941508811662e-05,
"loss": 0.2020416259765625,
"num_tokens": 44633095.0,
"step": 560
},
{
"epoch": 0.34823091247672255,
"grad_norm": 0.435546875,
"learning_rate": 2.275112832687735e-05,
"loss": 0.23046112060546875,
"num_tokens": 44716095.0,
"step": 561
},
{
"epoch": 0.34885164494103044,
"grad_norm": 0.462890625,
"learning_rate": 2.2725283790256235e-05,
"loss": 0.2403411865234375,
"num_tokens": 44792169.0,
"step": 562
},
{
"epoch": 0.3494723774053383,
"grad_norm": 0.427734375,
"learning_rate": 2.269940800349404e-05,
"loss": 0.22369384765625,
"num_tokens": 44873057.0,
"step": 563
},
{
"epoch": 0.3500931098696462,
"grad_norm": 0.392578125,
"learning_rate": 2.2673501071262923e-05,
"loss": 0.185455322265625,
"num_tokens": 44958191.0,
"step": 564
},
{
"epoch": 0.3507138423339541,
"grad_norm": 0.447265625,
"learning_rate": 2.2647563098361e-05,
"loss": 0.2142486572265625,
"num_tokens": 45035635.0,
"step": 565
},
{
"epoch": 0.35133457479826197,
"grad_norm": 0.515625,
"learning_rate": 2.2621594189711965e-05,
"loss": 0.13890838623046875,
"num_tokens": 45113009.0,
"step": 566
},
{
"epoch": 0.35195530726256985,
"grad_norm": 0.408203125,
"learning_rate": 2.2595594450364658e-05,
"loss": 0.16967010498046875,
"num_tokens": 45185701.0,
"step": 567
},
{
"epoch": 0.35257603972687773,
"grad_norm": 0.423828125,
"learning_rate": 2.2569563985492626e-05,
"loss": 0.20782470703125,
"num_tokens": 45267032.0,
"step": 568
},
{
"epoch": 0.3531967721911856,
"grad_norm": 0.42578125,
"learning_rate": 2.254350290039371e-05,
"loss": 0.20135498046875,
"num_tokens": 45344905.0,
"step": 569
},
{
"epoch": 0.3538175046554935,
"grad_norm": 0.376953125,
"learning_rate": 2.251741130048962e-05,
"loss": 0.18006134033203125,
"num_tokens": 45431946.0,
"step": 570
},
{
"epoch": 0.3544382371198014,
"grad_norm": 0.46875,
"learning_rate": 2.2491289291325485e-05,
"loss": 0.25028228759765625,
"num_tokens": 45509704.0,
"step": 571
},
{
"epoch": 0.35505896958410926,
"grad_norm": 0.41015625,
"learning_rate": 2.246513697856947e-05,
"loss": 0.1885833740234375,
"num_tokens": 45585908.0,
"step": 572
},
{
"epoch": 0.35567970204841715,
"grad_norm": 0.388671875,
"learning_rate": 2.2438954468012308e-05,
"loss": 0.1893157958984375,
"num_tokens": 45670191.0,
"step": 573
},
{
"epoch": 0.35630043451272503,
"grad_norm": 0.4453125,
"learning_rate": 2.2412741865566887e-05,
"loss": 0.22332763671875,
"num_tokens": 45751129.0,
"step": 574
},
{
"epoch": 0.3569211669770329,
"grad_norm": 0.390625,
"learning_rate": 2.2386499277267837e-05,
"loss": 0.18932342529296875,
"num_tokens": 45829269.0,
"step": 575
},
{
"epoch": 0.3575418994413408,
"grad_norm": 0.37890625,
"learning_rate": 2.2360226809271064e-05,
"loss": 0.1678466796875,
"num_tokens": 45911335.0,
"step": 576
},
{
"epoch": 0.3581626319056487,
"grad_norm": 0.4453125,
"learning_rate": 2.2333924567853363e-05,
"loss": 0.22794342041015625,
"num_tokens": 45990480.0,
"step": 577
},
{
"epoch": 0.35878336436995656,
"grad_norm": 0.3828125,
"learning_rate": 2.230759265941195e-05,
"loss": 0.17134857177734375,
"num_tokens": 46074975.0,
"step": 578
},
{
"epoch": 0.35940409683426444,
"grad_norm": 0.412109375,
"learning_rate": 2.228123119046406e-05,
"loss": 0.1915130615234375,
"num_tokens": 46152517.0,
"step": 579
},
{
"epoch": 0.3600248292985723,
"grad_norm": 0.458984375,
"learning_rate": 2.2254840267646506e-05,
"loss": 0.24593353271484375,
"num_tokens": 46230369.0,
"step": 580
},
{
"epoch": 0.3606455617628802,
"grad_norm": 0.404296875,
"learning_rate": 2.222841999771525e-05,
"loss": 0.172882080078125,
"num_tokens": 46308236.0,
"step": 581
},
{
"epoch": 0.3612662942271881,
"grad_norm": 0.42578125,
"learning_rate": 2.2201970487544954e-05,
"loss": 0.202362060546875,
"num_tokens": 46389254.0,
"step": 582
},
{
"epoch": 0.36188702669149597,
"grad_norm": 0.44140625,
"learning_rate": 2.2175491844128573e-05,
"loss": 0.18841552734375,
"num_tokens": 46463330.0,
"step": 583
},
{
"epoch": 0.36250775915580385,
"grad_norm": 0.451171875,
"learning_rate": 2.2148984174576906e-05,
"loss": 0.2029876708984375,
"num_tokens": 46543207.0,
"step": 584
},
{
"epoch": 0.36312849162011174,
"grad_norm": 0.384765625,
"learning_rate": 2.212244758611818e-05,
"loss": 0.1617889404296875,
"num_tokens": 46624716.0,
"step": 585
},
{
"epoch": 0.3637492240844196,
"grad_norm": 0.328125,
"learning_rate": 2.2095882186097584e-05,
"loss": 0.1309967041015625,
"num_tokens": 46707564.0,
"step": 586
},
{
"epoch": 0.3643699565487275,
"grad_norm": 0.466796875,
"learning_rate": 2.2069288081976875e-05,
"loss": 0.2613372802734375,
"num_tokens": 46790238.0,
"step": 587
},
{
"epoch": 0.3649906890130354,
"grad_norm": 0.380859375,
"learning_rate": 2.204266538133391e-05,
"loss": 0.2042388916015625,
"num_tokens": 46879381.0,
"step": 588
},
{
"epoch": 0.36561142147734327,
"grad_norm": 0.400390625,
"learning_rate": 2.2016014191862225e-05,
"loss": 0.18834686279296875,
"num_tokens": 46956936.0,
"step": 589
},
{
"epoch": 0.36623215394165115,
"grad_norm": 0.4140625,
"learning_rate": 2.198933462137061e-05,
"loss": 0.159576416015625,
"num_tokens": 47033368.0,
"step": 590
},
{
"epoch": 0.36685288640595903,
"grad_norm": 0.400390625,
"learning_rate": 2.196262677778264e-05,
"loss": 0.1726226806640625,
"num_tokens": 47111585.0,
"step": 591
},
{
"epoch": 0.3674736188702669,
"grad_norm": 0.431640625,
"learning_rate": 2.1935890769136284e-05,
"loss": 0.222747802734375,
"num_tokens": 47194450.0,
"step": 592
},
{
"epoch": 0.3680943513345748,
"grad_norm": 0.431640625,
"learning_rate": 2.190912670358343e-05,
"loss": 0.2170867919921875,
"num_tokens": 47275636.0,
"step": 593
},
{
"epoch": 0.3687150837988827,
"grad_norm": 0.40625,
"learning_rate": 2.188233468938946e-05,
"loss": 0.16595458984375,
"num_tokens": 47353178.0,
"step": 594
},
{
"epoch": 0.36933581626319056,
"grad_norm": 0.423828125,
"learning_rate": 2.1855514834932806e-05,
"loss": 0.20269775390625,
"num_tokens": 47430597.0,
"step": 595
},
{
"epoch": 0.36995654872749845,
"grad_norm": 0.390625,
"learning_rate": 2.182866724870455e-05,
"loss": 0.19757080078125,
"num_tokens": 47522254.0,
"step": 596
},
{
"epoch": 0.37057728119180633,
"grad_norm": 0.47265625,
"learning_rate": 2.180179203930792e-05,
"loss": 0.2463836669921875,
"num_tokens": 47605790.0,
"step": 597
},
{
"epoch": 0.3711980136561142,
"grad_norm": 0.427734375,
"learning_rate": 2.17748893154579e-05,
"loss": 0.19834136962890625,
"num_tokens": 47682588.0,
"step": 598
},
{
"epoch": 0.3718187461204221,
"grad_norm": 0.423828125,
"learning_rate": 2.174795918598077e-05,
"loss": 0.228607177734375,
"num_tokens": 47762530.0,
"step": 599
},
{
"epoch": 0.37243947858473,
"grad_norm": 0.40234375,
"learning_rate": 2.1721001759813677e-05,
"loss": 0.1551666259765625,
"num_tokens": 47837223.0,
"step": 600
},
{
"epoch": 0.37306021104903786,
"grad_norm": 0.3984375,
"learning_rate": 2.1694017146004186e-05,
"loss": 0.16622161865234375,
"num_tokens": 47913845.0,
"step": 601
},
{
"epoch": 0.37368094351334574,
"grad_norm": 0.375,
"learning_rate": 2.166700545370983e-05,
"loss": 0.189453125,
"num_tokens": 48000697.0,
"step": 602
},
{
"epoch": 0.3743016759776536,
"grad_norm": 0.48046875,
"learning_rate": 2.1639966792197694e-05,
"loss": 0.24367523193359375,
"num_tokens": 48082887.0,
"step": 603
},
{
"epoch": 0.3749224084419615,
"grad_norm": 0.359375,
"learning_rate": 2.161290127084396e-05,
"loss": 0.16951751708984375,
"num_tokens": 48167173.0,
"step": 604
},
{
"epoch": 0.3755431409062694,
"grad_norm": 0.4140625,
"learning_rate": 2.1585808999133435e-05,
"loss": 0.1868438720703125,
"num_tokens": 48247447.0,
"step": 605
},
{
"epoch": 0.3761638733705773,
"grad_norm": 0.3828125,
"learning_rate": 2.1558690086659172e-05,
"loss": 0.1612396240234375,
"num_tokens": 48323444.0,
"step": 606
},
{
"epoch": 0.37678460583488516,
"grad_norm": 0.482421875,
"learning_rate": 2.153154464312197e-05,
"loss": 0.21346282958984375,
"num_tokens": 48391728.0,
"step": 607
},
{
"epoch": 0.37740533829919304,
"grad_norm": 0.38671875,
"learning_rate": 2.1504372778329963e-05,
"loss": 0.171112060546875,
"num_tokens": 48476706.0,
"step": 608
},
{
"epoch": 0.3780260707635009,
"grad_norm": 0.4140625,
"learning_rate": 2.1477174602198142e-05,
"loss": 0.179595947265625,
"num_tokens": 48554257.0,
"step": 609
},
{
"epoch": 0.3786468032278088,
"grad_norm": 0.3984375,
"learning_rate": 2.1449950224747954e-05,
"loss": 0.15386962890625,
"num_tokens": 48630367.0,
"step": 610
},
{
"epoch": 0.3792675356921167,
"grad_norm": 0.400390625,
"learning_rate": 2.1422699756106828e-05,
"loss": 0.199737548828125,
"num_tokens": 48715094.0,
"step": 611
},
{
"epoch": 0.37988826815642457,
"grad_norm": 0.380859375,
"learning_rate": 2.139542330650774e-05,
"loss": 0.19342041015625,
"num_tokens": 48800641.0,
"step": 612
},
{
"epoch": 0.38050900062073245,
"grad_norm": 0.416015625,
"learning_rate": 2.1368120986288746e-05,
"loss": 0.173858642578125,
"num_tokens": 48884711.0,
"step": 613
},
{
"epoch": 0.38112973308504033,
"grad_norm": 0.431640625,
"learning_rate": 2.1340792905892578e-05,
"loss": 0.2032928466796875,
"num_tokens": 48964753.0,
"step": 614
},
{
"epoch": 0.3817504655493482,
"grad_norm": 0.4609375,
"learning_rate": 2.1313439175866156e-05,
"loss": 0.2122802734375,
"num_tokens": 49039111.0,
"step": 615
},
{
"epoch": 0.3823711980136561,
"grad_norm": 0.45703125,
"learning_rate": 2.1286059906860162e-05,
"loss": 0.22325897216796875,
"num_tokens": 49120270.0,
"step": 616
},
{
"epoch": 0.382991930477964,
"grad_norm": 0.427734375,
"learning_rate": 2.1258655209628593e-05,
"loss": 0.21783447265625,
"num_tokens": 49196980.0,
"step": 617
},
{
"epoch": 0.38361266294227186,
"grad_norm": 0.384765625,
"learning_rate": 2.12312251950283e-05,
"loss": 0.1694793701171875,
"num_tokens": 49285898.0,
"step": 618
},
{
"epoch": 0.38423339540657975,
"grad_norm": 0.416015625,
"learning_rate": 2.1203769974018545e-05,
"loss": 0.2267608642578125,
"num_tokens": 49365023.0,
"step": 619
},
{
"epoch": 0.38485412787088763,
"grad_norm": 0.4140625,
"learning_rate": 2.1176289657660564e-05,
"loss": 0.1989288330078125,
"num_tokens": 49447858.0,
"step": 620
},
{
"epoch": 0.3854748603351955,
"grad_norm": 0.43359375,
"learning_rate": 2.1148784357117103e-05,
"loss": 0.231170654296875,
"num_tokens": 49527337.0,
"step": 621
},
{
"epoch": 0.3860955927995034,
"grad_norm": 0.47265625,
"learning_rate": 2.1121254183651974e-05,
"loss": 0.209716796875,
"num_tokens": 49602193.0,
"step": 622
},
{
"epoch": 0.3867163252638113,
"grad_norm": 0.42578125,
"learning_rate": 2.1093699248629603e-05,
"loss": 0.23325347900390625,
"num_tokens": 49688375.0,
"step": 623
},
{
"epoch": 0.38733705772811916,
"grad_norm": 0.396484375,
"learning_rate": 2.106611966351459e-05,
"loss": 0.16530609130859375,
"num_tokens": 49761687.0,
"step": 624
},
{
"epoch": 0.38795779019242704,
"grad_norm": 0.419921875,
"learning_rate": 2.1038515539871224e-05,
"loss": 0.2009124755859375,
"num_tokens": 49841425.0,
"step": 625
},
{
"epoch": 0.3885785226567349,
"grad_norm": 0.412109375,
"learning_rate": 2.1010886989363086e-05,
"loss": 0.18260955810546875,
"num_tokens": 49917147.0,
"step": 626
},
{
"epoch": 0.3891992551210428,
"grad_norm": 0.455078125,
"learning_rate": 2.0983234123752553e-05,
"loss": 0.23314666748046875,
"num_tokens": 49995888.0,
"step": 627
},
{
"epoch": 0.3898199875853507,
"grad_norm": 0.453125,
"learning_rate": 2.095555705490037e-05,
"loss": 0.23809814453125,
"num_tokens": 50073478.0,
"step": 628
},
{
"epoch": 0.3904407200496586,
"grad_norm": 0.51953125,
"learning_rate": 2.0927855894765175e-05,
"loss": 0.2695159912109375,
"num_tokens": 50149261.0,
"step": 629
},
{
"epoch": 0.39106145251396646,
"grad_norm": 0.39453125,
"learning_rate": 2.0900130755403066e-05,
"loss": 0.17889404296875,
"num_tokens": 50228053.0,
"step": 630
},
{
"epoch": 0.39168218497827434,
"grad_norm": 0.43359375,
"learning_rate": 2.0872381748967144e-05,
"loss": 0.218994140625,
"num_tokens": 50312331.0,
"step": 631
},
{
"epoch": 0.3923029174425822,
"grad_norm": 0.412109375,
"learning_rate": 2.0844608987707053e-05,
"loss": 0.218292236328125,
"num_tokens": 50399935.0,
"step": 632
},
{
"epoch": 0.3929236499068901,
"grad_norm": 0.43359375,
"learning_rate": 2.0816812583968532e-05,
"loss": 0.2323150634765625,
"num_tokens": 50482609.0,
"step": 633
},
{
"epoch": 0.393544382371198,
"grad_norm": 0.36328125,
"learning_rate": 2.0788992650192958e-05,
"loss": 0.1590576171875,
"num_tokens": 50561251.0,
"step": 634
},
{
"epoch": 0.39416511483550587,
"grad_norm": 0.36328125,
"learning_rate": 2.076114929891689e-05,
"loss": 0.16410064697265625,
"num_tokens": 50639648.0,
"step": 635
},
{
"epoch": 0.3947858472998138,
"grad_norm": 0.431640625,
"learning_rate": 2.0733282642771614e-05,
"loss": 0.2053680419921875,
"num_tokens": 50715555.0,
"step": 636
},
{
"epoch": 0.3954065797641217,
"grad_norm": 0.443359375,
"learning_rate": 2.0705392794482686e-05,
"loss": 0.2141571044921875,
"num_tokens": 50793206.0,
"step": 637
},
{
"epoch": 0.3960273122284296,
"grad_norm": 0.404296875,
"learning_rate": 2.0677479866869486e-05,
"loss": 0.17827606201171875,
"num_tokens": 50870476.0,
"step": 638
},
{
"epoch": 0.39664804469273746,
"grad_norm": 0.439453125,
"learning_rate": 2.064954397284475e-05,
"loss": 0.2004547119140625,
"num_tokens": 50949724.0,
"step": 639
},
{
"epoch": 0.39726877715704534,
"grad_norm": 0.419921875,
"learning_rate": 2.0621585225414114e-05,
"loss": 0.195831298828125,
"num_tokens": 51030554.0,
"step": 640
},
{
"epoch": 0.3978895096213532,
"grad_norm": 0.44921875,
"learning_rate": 2.0593603737675665e-05,
"loss": 0.22705841064453125,
"num_tokens": 51107694.0,
"step": 641
},
{
"epoch": 0.3985102420856611,
"grad_norm": 0.42578125,
"learning_rate": 2.0565599622819466e-05,
"loss": 0.1757965087890625,
"num_tokens": 51181258.0,
"step": 642
},
{
"epoch": 0.399130974549969,
"grad_norm": 0.4296875,
"learning_rate": 2.0537572994127142e-05,
"loss": 0.20317840576171875,
"num_tokens": 51262914.0,
"step": 643
},
{
"epoch": 0.39975170701427687,
"grad_norm": 0.41015625,
"learning_rate": 2.0509523964971355e-05,
"loss": 0.15679168701171875,
"num_tokens": 51339790.0,
"step": 644
},
{
"epoch": 0.40037243947858475,
"grad_norm": 0.4140625,
"learning_rate": 2.0481452648815395e-05,
"loss": 0.221588134765625,
"num_tokens": 51421532.0,
"step": 645
},
{
"epoch": 0.40099317194289263,
"grad_norm": 0.470703125,
"learning_rate": 2.0453359159212715e-05,
"loss": 0.235626220703125,
"num_tokens": 51498904.0,
"step": 646
},
{
"epoch": 0.4016139044072005,
"grad_norm": 0.369140625,
"learning_rate": 2.0425243609806445e-05,
"loss": 0.15847015380859375,
"num_tokens": 51579825.0,
"step": 647
},
{
"epoch": 0.4022346368715084,
"grad_norm": 0.412109375,
"learning_rate": 2.039710611432897e-05,
"loss": 0.185516357421875,
"num_tokens": 51662095.0,
"step": 648
},
{
"epoch": 0.4028553693358163,
"grad_norm": 0.384765625,
"learning_rate": 2.0368946786601443e-05,
"loss": 0.1752471923828125,
"num_tokens": 51739679.0,
"step": 649
},
{
"epoch": 0.40347610180012417,
"grad_norm": 0.416015625,
"learning_rate": 2.0340765740533327e-05,
"loss": 0.18506622314453125,
"num_tokens": 51817801.0,
"step": 650
},
{
"epoch": 0.40409683426443205,
"grad_norm": 0.408203125,
"learning_rate": 2.031256309012195e-05,
"loss": 0.1865386962890625,
"num_tokens": 51896683.0,
"step": 651
},
{
"epoch": 0.40471756672873993,
"grad_norm": 0.46875,
"learning_rate": 2.0284338949452016e-05,
"loss": 0.216888427734375,
"num_tokens": 51977826.0,
"step": 652
},
{
"epoch": 0.4053382991930478,
"grad_norm": 0.435546875,
"learning_rate": 2.0256093432695182e-05,
"loss": 0.21588134765625,
"num_tokens": 52058594.0,
"step": 653
},
{
"epoch": 0.4059590316573557,
"grad_norm": 0.435546875,
"learning_rate": 2.0227826654109566e-05,
"loss": 0.215362548828125,
"num_tokens": 52136915.0,
"step": 654
},
{
"epoch": 0.4065797641216636,
"grad_norm": 0.36328125,
"learning_rate": 2.019953872803929e-05,
"loss": 0.146881103515625,
"num_tokens": 52220786.0,
"step": 655
},
{
"epoch": 0.40720049658597146,
"grad_norm": 0.384765625,
"learning_rate": 2.017122976891403e-05,
"loss": 0.147857666015625,
"num_tokens": 52296180.0,
"step": 656
},
{
"epoch": 0.40782122905027934,
"grad_norm": 0.435546875,
"learning_rate": 2.0142899891248525e-05,
"loss": 0.18096160888671875,
"num_tokens": 52376517.0,
"step": 657
},
{
"epoch": 0.4084419615145872,
"grad_norm": 0.36328125,
"learning_rate": 2.0114549209642165e-05,
"loss": 0.1657867431640625,
"num_tokens": 52462404.0,
"step": 658
},
{
"epoch": 0.4090626939788951,
"grad_norm": 0.396484375,
"learning_rate": 2.008617783877847e-05,
"loss": 0.1858978271484375,
"num_tokens": 52546991.0,
"step": 659
},
{
"epoch": 0.409683426443203,
"grad_norm": 0.392578125,
"learning_rate": 2.0057785893424656e-05,
"loss": 0.2008209228515625,
"num_tokens": 52638809.0,
"step": 660
},
{
"epoch": 0.4103041589075109,
"grad_norm": 0.33984375,
"learning_rate": 2.002937348843118e-05,
"loss": 0.14853668212890625,
"num_tokens": 52727289.0,
"step": 661
},
{
"epoch": 0.41092489137181876,
"grad_norm": 0.384765625,
"learning_rate": 2.000094073873124e-05,
"loss": 0.19116973876953125,
"num_tokens": 52815668.0,
"step": 662
},
{
"epoch": 0.41154562383612664,
"grad_norm": 0.39453125,
"learning_rate": 1.9972487759340355e-05,
"loss": 0.2099609375,
"num_tokens": 52900680.0,
"step": 663
},
{
"epoch": 0.4121663563004345,
"grad_norm": 0.357421875,
"learning_rate": 1.9944014665355855e-05,
"loss": 0.1764984130859375,
"num_tokens": 52980938.0,
"step": 664
},
{
"epoch": 0.4127870887647424,
"grad_norm": 0.384765625,
"learning_rate": 1.9915521571956457e-05,
"loss": 0.2012176513671875,
"num_tokens": 53067584.0,
"step": 665
},
{
"epoch": 0.4134078212290503,
"grad_norm": 0.384765625,
"learning_rate": 1.9887008594401765e-05,
"loss": 0.16516876220703125,
"num_tokens": 53147013.0,
"step": 666
},
{
"epoch": 0.41402855369335817,
"grad_norm": 0.421875,
"learning_rate": 1.9858475848031824e-05,
"loss": 0.20810699462890625,
"num_tokens": 53225332.0,
"step": 667
},
{
"epoch": 0.41464928615766605,
"grad_norm": 0.458984375,
"learning_rate": 1.9829923448266642e-05,
"loss": 0.221160888671875,
"num_tokens": 53300690.0,
"step": 668
},
{
"epoch": 0.41527001862197394,
"grad_norm": 0.357421875,
"learning_rate": 1.9801351510605744e-05,
"loss": 0.137420654296875,
"num_tokens": 53382326.0,
"step": 669
},
{
"epoch": 0.4158907510862818,
"grad_norm": 0.373046875,
"learning_rate": 1.977276015062767e-05,
"loss": 0.1817779541015625,
"num_tokens": 53462021.0,
"step": 670
},
{
"epoch": 0.4165114835505897,
"grad_norm": 0.412109375,
"learning_rate": 1.9744149483989534e-05,
"loss": 0.19124603271484375,
"num_tokens": 53535953.0,
"step": 671
},
{
"epoch": 0.4171322160148976,
"grad_norm": 0.4140625,
"learning_rate": 1.971551962642655e-05,
"loss": 0.19427490234375,
"num_tokens": 53610177.0,
"step": 672
},
{
"epoch": 0.41775294847920547,
"grad_norm": 0.408203125,
"learning_rate": 1.9686870693751562e-05,
"loss": 0.18967437744140625,
"num_tokens": 53687501.0,
"step": 673
},
{
"epoch": 0.41837368094351335,
"grad_norm": 0.388671875,
"learning_rate": 1.965820280185458e-05,
"loss": 0.1967315673828125,
"num_tokens": 53769286.0,
"step": 674
},
{
"epoch": 0.41899441340782123,
"grad_norm": 0.455078125,
"learning_rate": 1.96295160667023e-05,
"loss": 0.218597412109375,
"num_tokens": 53841088.0,
"step": 675
},
{
"epoch": 0.4196151458721291,
"grad_norm": 0.44140625,
"learning_rate": 1.9600810604337646e-05,
"loss": 0.235260009765625,
"num_tokens": 53921346.0,
"step": 676
},
{
"epoch": 0.420235878336437,
"grad_norm": 0.369140625,
"learning_rate": 1.95720865308793e-05,
"loss": 0.16656494140625,
"num_tokens": 54000408.0,
"step": 677
},
{
"epoch": 0.4208566108007449,
"grad_norm": 0.396484375,
"learning_rate": 1.9543343962521225e-05,
"loss": 0.18169403076171875,
"num_tokens": 54080150.0,
"step": 678
},
{
"epoch": 0.42147734326505276,
"grad_norm": 0.421875,
"learning_rate": 1.9514583015532197e-05,
"loss": 0.1837158203125,
"num_tokens": 54159480.0,
"step": 679
},
{
"epoch": 0.42209807572936064,
"grad_norm": 0.46484375,
"learning_rate": 1.9485803806255345e-05,
"loss": 0.220855712890625,
"num_tokens": 54232989.0,
"step": 680
},
{
"epoch": 0.4227188081936685,
"grad_norm": 0.396484375,
"learning_rate": 1.9457006451107664e-05,
"loss": 0.1890869140625,
"num_tokens": 54315143.0,
"step": 681
},
{
"epoch": 0.4233395406579764,
"grad_norm": 0.419921875,
"learning_rate": 1.942819106657956e-05,
"loss": 0.186798095703125,
"num_tokens": 54401624.0,
"step": 682
},
{
"epoch": 0.4239602731222843,
"grad_norm": 0.44140625,
"learning_rate": 1.939935776923436e-05,
"loss": 0.2145843505859375,
"num_tokens": 54480488.0,
"step": 683
},
{
"epoch": 0.4245810055865922,
"grad_norm": 0.3671875,
"learning_rate": 1.937050667570786e-05,
"loss": 0.149749755859375,
"num_tokens": 54557090.0,
"step": 684
},
{
"epoch": 0.42520173805090006,
"grad_norm": 0.443359375,
"learning_rate": 1.9341637902707846e-05,
"loss": 0.27191162109375,
"num_tokens": 54646069.0,
"step": 685
},
{
"epoch": 0.42582247051520794,
"grad_norm": 0.34375,
"learning_rate": 1.9312751567013615e-05,
"loss": 0.14463043212890625,
"num_tokens": 54735450.0,
"step": 686
},
{
"epoch": 0.4264432029795158,
"grad_norm": 0.396484375,
"learning_rate": 1.9283847785475514e-05,
"loss": 0.18885040283203125,
"num_tokens": 54816470.0,
"step": 687
},
{
"epoch": 0.4270639354438237,
"grad_norm": 0.392578125,
"learning_rate": 1.9254926675014452e-05,
"loss": 0.20259857177734375,
"num_tokens": 54896296.0,
"step": 688
},
{
"epoch": 0.4276846679081316,
"grad_norm": 0.443359375,
"learning_rate": 1.9225988352621445e-05,
"loss": 0.233428955078125,
"num_tokens": 54981627.0,
"step": 689
},
{
"epoch": 0.42830540037243947,
"grad_norm": 0.48046875,
"learning_rate": 1.919703293535714e-05,
"loss": 0.229583740234375,
"num_tokens": 55055866.0,
"step": 690
},
{
"epoch": 0.42892613283674735,
"grad_norm": 0.45703125,
"learning_rate": 1.9168060540351314e-05,
"loss": 0.20269775390625,
"num_tokens": 55132535.0,
"step": 691
},
{
"epoch": 0.42954686530105524,
"grad_norm": 0.404296875,
"learning_rate": 1.9139071284802447e-05,
"loss": 0.1852874755859375,
"num_tokens": 55215409.0,
"step": 692
},
{
"epoch": 0.4301675977653631,
"grad_norm": 0.3671875,
"learning_rate": 1.91100652859772e-05,
"loss": 0.17153167724609375,
"num_tokens": 55294673.0,
"step": 693
},
{
"epoch": 0.430788330229671,
"grad_norm": 0.38671875,
"learning_rate": 1.908104266120999e-05,
"loss": 0.2008056640625,
"num_tokens": 55377822.0,
"step": 694
},
{
"epoch": 0.4314090626939789,
"grad_norm": 0.3828125,
"learning_rate": 1.9052003527902464e-05,
"loss": 0.17771148681640625,
"num_tokens": 55455943.0,
"step": 695
},
{
"epoch": 0.43202979515828677,
"grad_norm": 0.412109375,
"learning_rate": 1.9022948003523063e-05,
"loss": 0.195098876953125,
"num_tokens": 55534587.0,
"step": 696
},
{
"epoch": 0.43265052762259465,
"grad_norm": 0.400390625,
"learning_rate": 1.8993876205606527e-05,
"loss": 0.20026016235351562,
"num_tokens": 55617815.0,
"step": 697
},
{
"epoch": 0.43327126008690253,
"grad_norm": 0.427734375,
"learning_rate": 1.8964788251753427e-05,
"loss": 0.198516845703125,
"num_tokens": 55704237.0,
"step": 698
},
{
"epoch": 0.4338919925512104,
"grad_norm": 0.373046875,
"learning_rate": 1.8935684259629688e-05,
"loss": 0.1548919677734375,
"num_tokens": 55786934.0,
"step": 699
},
{
"epoch": 0.4345127250155183,
"grad_norm": 0.392578125,
"learning_rate": 1.8906564346966113e-05,
"loss": 0.19970703125,
"num_tokens": 55872060.0,
"step": 700
},
{
"epoch": 0.4351334574798262,
"grad_norm": 0.435546875,
"learning_rate": 1.8877428631557906e-05,
"loss": 0.17505645751953125,
"num_tokens": 55948342.0,
"step": 701
},
{
"epoch": 0.43575418994413406,
"grad_norm": 0.431640625,
"learning_rate": 1.8848277231264197e-05,
"loss": 0.1900177001953125,
"num_tokens": 56023677.0,
"step": 702
},
{
"epoch": 0.43637492240844195,
"grad_norm": 0.390625,
"learning_rate": 1.881911026400756e-05,
"loss": 0.16182708740234375,
"num_tokens": 56101447.0,
"step": 703
},
{
"epoch": 0.43699565487274983,
"grad_norm": 0.419921875,
"learning_rate": 1.878992784777354e-05,
"loss": 0.189727783203125,
"num_tokens": 56175901.0,
"step": 704
},
{
"epoch": 0.4376163873370577,
"grad_norm": 0.4140625,
"learning_rate": 1.876073010061019e-05,
"loss": 0.179351806640625,
"num_tokens": 56252404.0,
"step": 705
},
{
"epoch": 0.4382371198013656,
"grad_norm": 0.41015625,
"learning_rate": 1.873151714062756e-05,
"loss": 0.20854949951171875,
"num_tokens": 56338172.0,
"step": 706
},
{
"epoch": 0.4388578522656735,
"grad_norm": 0.392578125,
"learning_rate": 1.8702289085997245e-05,
"loss": 0.16644287109375,
"num_tokens": 56417116.0,
"step": 707
},
{
"epoch": 0.43947858472998136,
"grad_norm": 0.470703125,
"learning_rate": 1.8673046054951908e-05,
"loss": 0.227203369140625,
"num_tokens": 56493308.0,
"step": 708
},
{
"epoch": 0.44009931719428924,
"grad_norm": 0.390625,
"learning_rate": 1.864378816578478e-05,
"loss": 0.1591949462890625,
"num_tokens": 56568455.0,
"step": 709
},
{
"epoch": 0.4407200496585971,
"grad_norm": 0.3125,
"learning_rate": 1.8614515536849215e-05,
"loss": 0.109405517578125,
"num_tokens": 56653227.0,
"step": 710
},
{
"epoch": 0.441340782122905,
"grad_norm": 0.390625,
"learning_rate": 1.8585228286558174e-05,
"loss": 0.19554901123046875,
"num_tokens": 56739014.0,
"step": 711
},
{
"epoch": 0.4419615145872129,
"grad_norm": 0.412109375,
"learning_rate": 1.8555926533383776e-05,
"loss": 0.18121337890625,
"num_tokens": 56815246.0,
"step": 712
},
{
"epoch": 0.44258224705152077,
"grad_norm": 0.380859375,
"learning_rate": 1.8526610395856803e-05,
"loss": 0.172515869140625,
"num_tokens": 56894715.0,
"step": 713
},
{
"epoch": 0.44320297951582865,
"grad_norm": 0.3984375,
"learning_rate": 1.849727999256621e-05,
"loss": 0.18987274169921875,
"num_tokens": 56979213.0,
"step": 714
},
{
"epoch": 0.44382371198013654,
"grad_norm": 0.373046875,
"learning_rate": 1.846793544215869e-05,
"loss": 0.1981353759765625,
"num_tokens": 57070660.0,
"step": 715
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.40625,
"learning_rate": 1.8438576863338133e-05,
"loss": 0.21075439453125,
"num_tokens": 57157609.0,
"step": 716
},
{
"epoch": 0.4450651769087523,
"grad_norm": 0.416015625,
"learning_rate": 1.84092043748652e-05,
"loss": 0.2082672119140625,
"num_tokens": 57234608.0,
"step": 717
},
{
"epoch": 0.4456859093730602,
"grad_norm": 0.41015625,
"learning_rate": 1.83798180955568e-05,
"loss": 0.1847076416015625,
"num_tokens": 57318773.0,
"step": 718
},
{
"epoch": 0.44630664183736807,
"grad_norm": 0.353515625,
"learning_rate": 1.835041814428564e-05,
"loss": 0.1503753662109375,
"num_tokens": 57406853.0,
"step": 719
},
{
"epoch": 0.44692737430167595,
"grad_norm": 0.396484375,
"learning_rate": 1.832100463997973e-05,
"loss": 0.19110107421875,
"num_tokens": 57488789.0,
"step": 720
},
{
"epoch": 0.4475481067659839,
"grad_norm": 0.478515625,
"learning_rate": 1.829157770162191e-05,
"loss": 0.2366485595703125,
"num_tokens": 57563698.0,
"step": 721
},
{
"epoch": 0.44816883923029177,
"grad_norm": 0.353515625,
"learning_rate": 1.8262137448249348e-05,
"loss": 0.1322479248046875,
"num_tokens": 57644710.0,
"step": 722
},
{
"epoch": 0.44878957169459965,
"grad_norm": 0.349609375,
"learning_rate": 1.823268399895309e-05,
"loss": 0.1372528076171875,
"num_tokens": 57731064.0,
"step": 723
},
{
"epoch": 0.44941030415890754,
"grad_norm": 0.453125,
"learning_rate": 1.8203217472877544e-05,
"loss": 0.21416473388671875,
"num_tokens": 57810130.0,
"step": 724
},
{
"epoch": 0.4500310366232154,
"grad_norm": 0.4375,
"learning_rate": 1.8173737989220038e-05,
"loss": 0.24169921875,
"num_tokens": 57890792.0,
"step": 725
},
{
"epoch": 0.4506517690875233,
"grad_norm": 0.47265625,
"learning_rate": 1.81442456672303e-05,
"loss": 0.239410400390625,
"num_tokens": 57969745.0,
"step": 726
},
{
"epoch": 0.4512725015518312,
"grad_norm": 0.37109375,
"learning_rate": 1.811474062620999e-05,
"loss": 0.13714599609375,
"num_tokens": 58044387.0,
"step": 727
},
{
"epoch": 0.45189323401613907,
"grad_norm": 0.5078125,
"learning_rate": 1.8085222985512234e-05,
"loss": 0.25836181640625,
"num_tokens": 58124420.0,
"step": 728
},
{
"epoch": 0.45251396648044695,
"grad_norm": 0.412109375,
"learning_rate": 1.8055692864541114e-05,
"loss": 0.18988037109375,
"num_tokens": 58198686.0,
"step": 729
},
{
"epoch": 0.45313469894475483,
"grad_norm": 0.416015625,
"learning_rate": 1.802615038275119e-05,
"loss": 0.201568603515625,
"num_tokens": 58276539.0,
"step": 730
},
{
"epoch": 0.4537554314090627,
"grad_norm": 0.380859375,
"learning_rate": 1.7996595659647043e-05,
"loss": 0.1974639892578125,
"num_tokens": 58361867.0,
"step": 731
},
{
"epoch": 0.4543761638733706,
"grad_norm": 0.40234375,
"learning_rate": 1.796702881478276e-05,
"loss": 0.17498779296875,
"num_tokens": 58436536.0,
"step": 732
},
{
"epoch": 0.4549968963376785,
"grad_norm": 0.412109375,
"learning_rate": 1.793744996776146e-05,
"loss": 0.1988677978515625,
"num_tokens": 58519083.0,
"step": 733
},
{
"epoch": 0.45561762880198636,
"grad_norm": 0.39453125,
"learning_rate": 1.7907859238234826e-05,
"loss": 0.182403564453125,
"num_tokens": 58595835.0,
"step": 734
},
{
"epoch": 0.45623836126629425,
"grad_norm": 0.369140625,
"learning_rate": 1.7878256745902588e-05,
"loss": 0.1378021240234375,
"num_tokens": 58673443.0,
"step": 735
},
{
"epoch": 0.45685909373060213,
"grad_norm": 0.443359375,
"learning_rate": 1.784864261051208e-05,
"loss": 0.239715576171875,
"num_tokens": 58757115.0,
"step": 736
},
{
"epoch": 0.45747982619491,
"grad_norm": 0.423828125,
"learning_rate": 1.781901695185772e-05,
"loss": 0.1883392333984375,
"num_tokens": 58834293.0,
"step": 737
},
{
"epoch": 0.4581005586592179,
"grad_norm": 0.396484375,
"learning_rate": 1.7789379889780544e-05,
"loss": 0.177215576171875,
"num_tokens": 58909500.0,
"step": 738
},
{
"epoch": 0.4587212911235258,
"grad_norm": 0.4140625,
"learning_rate": 1.7759731544167715e-05,
"loss": 0.22180938720703125,
"num_tokens": 58995888.0,
"step": 739
},
{
"epoch": 0.45934202358783366,
"grad_norm": 0.4296875,
"learning_rate": 1.7730072034952047e-05,
"loss": 0.198516845703125,
"num_tokens": 59075355.0,
"step": 740
},
{
"epoch": 0.45996275605214154,
"grad_norm": 0.3984375,
"learning_rate": 1.7700401482111503e-05,
"loss": 0.205902099609375,
"num_tokens": 59165083.0,
"step": 741
},
{
"epoch": 0.4605834885164494,
"grad_norm": 0.37109375,
"learning_rate": 1.7670720005668725e-05,
"loss": 0.167877197265625,
"num_tokens": 59249069.0,
"step": 742
},
{
"epoch": 0.4612042209807573,
"grad_norm": 0.44140625,
"learning_rate": 1.7641027725690544e-05,
"loss": 0.2045135498046875,
"num_tokens": 59327815.0,
"step": 743
},
{
"epoch": 0.4618249534450652,
"grad_norm": 0.4453125,
"learning_rate": 1.761132476228749e-05,
"loss": 0.2032623291015625,
"num_tokens": 59399727.0,
"step": 744
},
{
"epoch": 0.46244568590937307,
"grad_norm": 0.40625,
"learning_rate": 1.75816112356133e-05,
"loss": 0.1790771484375,
"num_tokens": 59475443.0,
"step": 745
},
{
"epoch": 0.46306641837368095,
"grad_norm": 0.408203125,
"learning_rate": 1.7551887265864474e-05,
"loss": 0.19443511962890625,
"num_tokens": 59555681.0,
"step": 746
},
{
"epoch": 0.46368715083798884,
"grad_norm": 0.40625,
"learning_rate": 1.7522152973279713e-05,
"loss": 0.1773529052734375,
"num_tokens": 59634719.0,
"step": 747
},
{
"epoch": 0.4643078833022967,
"grad_norm": 0.3828125,
"learning_rate": 1.7492408478139508e-05,
"loss": 0.14281463623046875,
"num_tokens": 59715727.0,
"step": 748
},
{
"epoch": 0.4649286157666046,
"grad_norm": 0.4375,
"learning_rate": 1.7462653900765607e-05,
"loss": 0.20855712890625,
"num_tokens": 59794308.0,
"step": 749
},
{
"epoch": 0.4655493482309125,
"grad_norm": 0.3984375,
"learning_rate": 1.743288936152054e-05,
"loss": 0.18010711669921875,
"num_tokens": 59872812.0,
"step": 750
},
{
"epoch": 0.46617008069522037,
"grad_norm": 0.431640625,
"learning_rate": 1.7403114980807142e-05,
"loss": 0.210784912109375,
"num_tokens": 59953561.0,
"step": 751
},
{
"epoch": 0.46679081315952825,
"grad_norm": 0.380859375,
"learning_rate": 1.737333087906806e-05,
"loss": 0.17118072509765625,
"num_tokens": 60032312.0,
"step": 752
},
{
"epoch": 0.46741154562383613,
"grad_norm": 0.416015625,
"learning_rate": 1.7343537176785255e-05,
"loss": 0.18750762939453125,
"num_tokens": 60112734.0,
"step": 753
},
{
"epoch": 0.468032278088144,
"grad_norm": 0.474609375,
"learning_rate": 1.7313733994479534e-05,
"loss": 0.2533111572265625,
"num_tokens": 60189052.0,
"step": 754
},
{
"epoch": 0.4686530105524519,
"grad_norm": 0.4609375,
"learning_rate": 1.7283921452710047e-05,
"loss": 0.209014892578125,
"num_tokens": 60271917.0,
"step": 755
},
{
"epoch": 0.4692737430167598,
"grad_norm": 0.41796875,
"learning_rate": 1.72540996720738e-05,
"loss": 0.1859893798828125,
"num_tokens": 60356438.0,
"step": 756
},
{
"epoch": 0.46989447548106766,
"grad_norm": 0.37890625,
"learning_rate": 1.722426877320518e-05,
"loss": 0.14642333984375,
"num_tokens": 60435647.0,
"step": 757
},
{
"epoch": 0.47051520794537555,
"grad_norm": 0.41796875,
"learning_rate": 1.7194428876775458e-05,
"loss": 0.21857452392578125,
"num_tokens": 60519950.0,
"step": 758
},
{
"epoch": 0.47113594040968343,
"grad_norm": 0.39453125,
"learning_rate": 1.7164580103492302e-05,
"loss": 0.1731414794921875,
"num_tokens": 60603961.0,
"step": 759
},
{
"epoch": 0.4717566728739913,
"grad_norm": 0.408203125,
"learning_rate": 1.713472257409928e-05,
"loss": 0.18233489990234375,
"num_tokens": 60681412.0,
"step": 760
},
{
"epoch": 0.4723774053382992,
"grad_norm": 0.43359375,
"learning_rate": 1.7104856409375383e-05,
"loss": 0.19803619384765625,
"num_tokens": 60758500.0,
"step": 761
},
{
"epoch": 0.4729981378026071,
"grad_norm": 0.416015625,
"learning_rate": 1.7074981730134547e-05,
"loss": 0.1900634765625,
"num_tokens": 60839969.0,
"step": 762
},
{
"epoch": 0.47361887026691496,
"grad_norm": 0.4140625,
"learning_rate": 1.7045098657225134e-05,
"loss": 0.195281982421875,
"num_tokens": 60923306.0,
"step": 763
},
{
"epoch": 0.47423960273122284,
"grad_norm": 0.373046875,
"learning_rate": 1.701520731152947e-05,
"loss": 0.155914306640625,
"num_tokens": 61003078.0,
"step": 764
},
{
"epoch": 0.4748603351955307,
"grad_norm": 0.404296875,
"learning_rate": 1.6985307813963338e-05,
"loss": 0.199554443359375,
"num_tokens": 61090253.0,
"step": 765
},
{
"epoch": 0.4754810676598386,
"grad_norm": 0.45703125,
"learning_rate": 1.6955400285475504e-05,
"loss": 0.22792816162109375,
"num_tokens": 61168492.0,
"step": 766
},
{
"epoch": 0.4761018001241465,
"grad_norm": 0.4765625,
"learning_rate": 1.6925484847047213e-05,
"loss": 0.2374267578125,
"num_tokens": 61242755.0,
"step": 767
},
{
"epoch": 0.4767225325884544,
"grad_norm": 0.421875,
"learning_rate": 1.6895561619691714e-05,
"loss": 0.2057952880859375,
"num_tokens": 61322276.0,
"step": 768
},
{
"epoch": 0.47734326505276226,
"grad_norm": 0.37890625,
"learning_rate": 1.686563072445376e-05,
"loss": 0.1525726318359375,
"num_tokens": 61403828.0,
"step": 769
},
{
"epoch": 0.47796399751707014,
"grad_norm": 0.435546875,
"learning_rate": 1.6835692282409126e-05,
"loss": 0.23336029052734375,
"num_tokens": 61488635.0,
"step": 770
},
{
"epoch": 0.478584729981378,
"grad_norm": 0.490234375,
"learning_rate": 1.6805746414664112e-05,
"loss": 0.2491455078125,
"num_tokens": 61568661.0,
"step": 771
},
{
"epoch": 0.4792054624456859,
"grad_norm": 0.44140625,
"learning_rate": 1.6775793242355053e-05,
"loss": 0.2314605712890625,
"num_tokens": 61647420.0,
"step": 772
},
{
"epoch": 0.4798261949099938,
"grad_norm": 0.37109375,
"learning_rate": 1.6745832886647834e-05,
"loss": 0.1607513427734375,
"num_tokens": 61727741.0,
"step": 773
},
{
"epoch": 0.48044692737430167,
"grad_norm": 0.400390625,
"learning_rate": 1.671586546873741e-05,
"loss": 0.1855926513671875,
"num_tokens": 61805443.0,
"step": 774
},
{
"epoch": 0.48106765983860955,
"grad_norm": 0.396484375,
"learning_rate": 1.6685891109847286e-05,
"loss": 0.1652050018310547,
"num_tokens": 61882548.0,
"step": 775
},
{
"epoch": 0.48168839230291743,
"grad_norm": 0.4296875,
"learning_rate": 1.665590993122905e-05,
"loss": 0.194183349609375,
"num_tokens": 61959694.0,
"step": 776
},
{
"epoch": 0.4823091247672253,
"grad_norm": 0.369140625,
"learning_rate": 1.6625922054161878e-05,
"loss": 0.154144287109375,
"num_tokens": 62039175.0,
"step": 777
},
{
"epoch": 0.4829298572315332,
"grad_norm": 0.40625,
"learning_rate": 1.659592759995205e-05,
"loss": 0.1820526123046875,
"num_tokens": 62117984.0,
"step": 778
},
{
"epoch": 0.4835505896958411,
"grad_norm": 0.36328125,
"learning_rate": 1.656592668993245e-05,
"loss": 0.13739776611328125,
"num_tokens": 62194530.0,
"step": 779
},
{
"epoch": 0.48417132216014896,
"grad_norm": 0.36328125,
"learning_rate": 1.653591944546206e-05,
"loss": 0.15596771240234375,
"num_tokens": 62280038.0,
"step": 780
},
{
"epoch": 0.48479205462445685,
"grad_norm": 0.365234375,
"learning_rate": 1.65059059879255e-05,
"loss": 0.154205322265625,
"num_tokens": 62356807.0,
"step": 781
},
{
"epoch": 0.48541278708876473,
"grad_norm": 0.3984375,
"learning_rate": 1.6475886438732527e-05,
"loss": 0.1698455810546875,
"num_tokens": 62435316.0,
"step": 782
},
{
"epoch": 0.4860335195530726,
"grad_norm": 0.41796875,
"learning_rate": 1.6445860919317538e-05,
"loss": 0.2103271484375,
"num_tokens": 62514373.0,
"step": 783
},
{
"epoch": 0.4866542520173805,
"grad_norm": 0.43359375,
"learning_rate": 1.6415829551139062e-05,
"loss": 0.22314453125,
"num_tokens": 62591991.0,
"step": 784
},
{
"epoch": 0.4872749844816884,
"grad_norm": 0.373046875,
"learning_rate": 1.638579245567931e-05,
"loss": 0.1541900634765625,
"num_tokens": 62667694.0,
"step": 785
},
{
"epoch": 0.48789571694599626,
"grad_norm": 0.439453125,
"learning_rate": 1.6355749754443654e-05,
"loss": 0.20981597900390625,
"num_tokens": 62747024.0,
"step": 786
},
{
"epoch": 0.48851644941030414,
"grad_norm": 0.41015625,
"learning_rate": 1.6325701568960136e-05,
"loss": 0.1633758544921875,
"num_tokens": 62822599.0,
"step": 787
},
{
"epoch": 0.489137181874612,
"grad_norm": 0.423828125,
"learning_rate": 1.6295648020778994e-05,
"loss": 0.2032928466796875,
"num_tokens": 62901868.0,
"step": 788
},
{
"epoch": 0.4897579143389199,
"grad_norm": 0.40625,
"learning_rate": 1.626558923147215e-05,
"loss": 0.1837158203125,
"num_tokens": 62978585.0,
"step": 789
},
{
"epoch": 0.4903786468032278,
"grad_norm": 0.369140625,
"learning_rate": 1.623552532263273e-05,
"loss": 0.17771148681640625,
"num_tokens": 63060469.0,
"step": 790
},
{
"epoch": 0.4909993792675357,
"grad_norm": 0.435546875,
"learning_rate": 1.6205456415874572e-05,
"loss": 0.18357086181640625,
"num_tokens": 63137122.0,
"step": 791
},
{
"epoch": 0.49162011173184356,
"grad_norm": 0.380859375,
"learning_rate": 1.6175382632831724e-05,
"loss": 0.16837310791015625,
"num_tokens": 63213580.0,
"step": 792
},
{
"epoch": 0.49224084419615144,
"grad_norm": 0.3828125,
"learning_rate": 1.6145304095157965e-05,
"loss": 0.17380523681640625,
"num_tokens": 63297148.0,
"step": 793
},
{
"epoch": 0.4928615766604593,
"grad_norm": 0.380859375,
"learning_rate": 1.611522092452631e-05,
"loss": 0.1910247802734375,
"num_tokens": 63381047.0,
"step": 794
},
{
"epoch": 0.4934823091247672,
"grad_norm": 0.4453125,
"learning_rate": 1.6085133242628515e-05,
"loss": 0.2176666259765625,
"num_tokens": 63459966.0,
"step": 795
},
{
"epoch": 0.4941030415890751,
"grad_norm": 0.375,
"learning_rate": 1.6055041171174574e-05,
"loss": 0.158843994140625,
"num_tokens": 63537593.0,
"step": 796
},
{
"epoch": 0.49472377405338297,
"grad_norm": 0.408203125,
"learning_rate": 1.602494483189225e-05,
"loss": 0.1724090576171875,
"num_tokens": 63615445.0,
"step": 797
},
{
"epoch": 0.49534450651769085,
"grad_norm": 0.4140625,
"learning_rate": 1.599484434652656e-05,
"loss": 0.1734771728515625,
"num_tokens": 63690727.0,
"step": 798
},
{
"epoch": 0.49596523898199874,
"grad_norm": 0.39453125,
"learning_rate": 1.5964739836839305e-05,
"loss": 0.17597198486328125,
"num_tokens": 63776339.0,
"step": 799
},
{
"epoch": 0.4965859714463066,
"grad_norm": 0.431640625,
"learning_rate": 1.5934631424608556e-05,
"loss": 0.2207794189453125,
"num_tokens": 63855669.0,
"step": 800
},
{
"epoch": 0.4972067039106145,
"grad_norm": 0.443359375,
"learning_rate": 1.5904519231628175e-05,
"loss": 0.22064208984375,
"num_tokens": 63934046.0,
"step": 801
},
{
"epoch": 0.4978274363749224,
"grad_norm": 0.380859375,
"learning_rate": 1.5874403379707306e-05,
"loss": 0.162017822265625,
"num_tokens": 64011864.0,
"step": 802
},
{
"epoch": 0.49844816883923027,
"grad_norm": 0.37109375,
"learning_rate": 1.5844283990669915e-05,
"loss": 0.158050537109375,
"num_tokens": 64094920.0,
"step": 803
},
{
"epoch": 0.49906890130353815,
"grad_norm": 0.4921875,
"learning_rate": 1.5814161186354257e-05,
"loss": 0.2811279296875,
"num_tokens": 64168919.0,
"step": 804
},
{
"epoch": 0.49968963376784603,
"grad_norm": 0.3984375,
"learning_rate": 1.5784035088612415e-05,
"loss": 0.19830322265625,
"num_tokens": 64256050.0,
"step": 805
},
{
"epoch": 0.500310366232154,
"grad_norm": 0.37890625,
"learning_rate": 1.575390581930979e-05,
"loss": 0.16234588623046875,
"num_tokens": 64346466.0,
"step": 806
},
{
"epoch": 0.5009310986964618,
"grad_norm": 0.380859375,
"learning_rate": 1.5723773500324604e-05,
"loss": 0.151947021484375,
"num_tokens": 64422808.0,
"step": 807
},
{
"epoch": 0.5015518311607697,
"grad_norm": 0.412109375,
"learning_rate": 1.569363825354743e-05,
"loss": 0.17218780517578125,
"num_tokens": 64500504.0,
"step": 808
},
{
"epoch": 0.5021725636250776,
"grad_norm": 0.435546875,
"learning_rate": 1.5663500200880684e-05,
"loss": 0.216949462890625,
"num_tokens": 64578821.0,
"step": 809
},
{
"epoch": 0.5027932960893855,
"grad_norm": 0.453125,
"learning_rate": 1.563335946423812e-05,
"loss": 0.222808837890625,
"num_tokens": 64663165.0,
"step": 810
},
{
"epoch": 0.5034140285536933,
"grad_norm": 0.408203125,
"learning_rate": 1.560321616554435e-05,
"loss": 0.1801300048828125,
"num_tokens": 64740844.0,
"step": 811
},
{
"epoch": 0.5040347610180013,
"grad_norm": 0.40234375,
"learning_rate": 1.5573070426734365e-05,
"loss": 0.19293975830078125,
"num_tokens": 64818416.0,
"step": 812
},
{
"epoch": 0.5046554934823091,
"grad_norm": 0.3359375,
"learning_rate": 1.554292236975301e-05,
"loss": 0.1099090576171875,
"num_tokens": 64890722.0,
"step": 813
},
{
"epoch": 0.505276225946617,
"grad_norm": 0.341796875,
"learning_rate": 1.551277211655452e-05,
"loss": 0.13115692138671875,
"num_tokens": 64966495.0,
"step": 814
},
{
"epoch": 0.5058969584109249,
"grad_norm": 0.45703125,
"learning_rate": 1.5482619789102008e-05,
"loss": 0.2239532470703125,
"num_tokens": 65053323.0,
"step": 815
},
{
"epoch": 0.5065176908752328,
"grad_norm": 0.4375,
"learning_rate": 1.545246550936698e-05,
"loss": 0.2304840087890625,
"num_tokens": 65131992.0,
"step": 816
},
{
"epoch": 0.5071384233395406,
"grad_norm": 0.390625,
"learning_rate": 1.5422309399328832e-05,
"loss": 0.18270111083984375,
"num_tokens": 65219013.0,
"step": 817
},
{
"epoch": 0.5077591558038486,
"grad_norm": 0.40625,
"learning_rate": 1.5392151580974373e-05,
"loss": 0.2208099365234375,
"num_tokens": 65309422.0,
"step": 818
},
{
"epoch": 0.5083798882681564,
"grad_norm": 0.44140625,
"learning_rate": 1.536199217629732e-05,
"loss": 0.225372314453125,
"num_tokens": 65387728.0,
"step": 819
},
{
"epoch": 0.5090006207324643,
"grad_norm": 0.369140625,
"learning_rate": 1.5331831307297803e-05,
"loss": 0.12811279296875,
"num_tokens": 65465547.0,
"step": 820
},
{
"epoch": 0.5096213531967722,
"grad_norm": 0.40234375,
"learning_rate": 1.5301669095981885e-05,
"loss": 0.1576385498046875,
"num_tokens": 65545665.0,
"step": 821
},
{
"epoch": 0.5102420856610801,
"grad_norm": 0.392578125,
"learning_rate": 1.527150566436105e-05,
"loss": 0.1743316650390625,
"num_tokens": 65629842.0,
"step": 822
},
{
"epoch": 0.5108628181253879,
"grad_norm": 0.44140625,
"learning_rate": 1.5241341134451715e-05,
"loss": 0.22735595703125,
"num_tokens": 65707379.0,
"step": 823
},
{
"epoch": 0.5114835505896959,
"grad_norm": 0.4296875,
"learning_rate": 1.5211175628274746e-05,
"loss": 0.20652008056640625,
"num_tokens": 65781668.0,
"step": 824
},
{
"epoch": 0.5121042830540037,
"grad_norm": 0.373046875,
"learning_rate": 1.5181009267854968e-05,
"loss": 0.17317962646484375,
"num_tokens": 65864091.0,
"step": 825
},
{
"epoch": 0.5127250155183116,
"grad_norm": 0.431640625,
"learning_rate": 1.5150842175220648e-05,
"loss": 0.211456298828125,
"num_tokens": 65945102.0,
"step": 826
},
{
"epoch": 0.5133457479826194,
"grad_norm": 0.447265625,
"learning_rate": 1.5120674472403009e-05,
"loss": 0.20635986328125,
"num_tokens": 66021221.0,
"step": 827
},
{
"epoch": 0.5139664804469274,
"grad_norm": 0.3984375,
"learning_rate": 1.5090506281435759e-05,
"loss": 0.16302490234375,
"num_tokens": 66103317.0,
"step": 828
},
{
"epoch": 0.5145872129112352,
"grad_norm": 0.3984375,
"learning_rate": 1.5060337724354569e-05,
"loss": 0.2002105712890625,
"num_tokens": 66181326.0,
"step": 829
},
{
"epoch": 0.5152079453755432,
"grad_norm": 0.380859375,
"learning_rate": 1.5030168923196605e-05,
"loss": 0.1685028076171875,
"num_tokens": 66259538.0,
"step": 830
},
{
"epoch": 0.515828677839851,
"grad_norm": 0.46875,
"learning_rate": 1.5e-05,
"loss": 0.2446746826171875,
"num_tokens": 66339990.0,
"step": 831
},
{
"epoch": 0.5164494103041589,
"grad_norm": 0.486328125,
"learning_rate": 1.49698310768034e-05,
"loss": 0.246551513671875,
"num_tokens": 66416214.0,
"step": 832
},
{
"epoch": 0.5170701427684667,
"grad_norm": 0.375,
"learning_rate": 1.493966227564543e-05,
"loss": 0.14859771728515625,
"num_tokens": 66500017.0,
"step": 833
},
{
"epoch": 0.5176908752327747,
"grad_norm": 0.421875,
"learning_rate": 1.4909493718564242e-05,
"loss": 0.2099151611328125,
"num_tokens": 66576672.0,
"step": 834
},
{
"epoch": 0.5183116076970825,
"grad_norm": 0.38671875,
"learning_rate": 1.4879325527596997e-05,
"loss": 0.1600341796875,
"num_tokens": 66653128.0,
"step": 835
},
{
"epoch": 0.5189323401613904,
"grad_norm": 0.44140625,
"learning_rate": 1.4849157824779356e-05,
"loss": 0.22028350830078125,
"num_tokens": 66734373.0,
"step": 836
},
{
"epoch": 0.5195530726256983,
"grad_norm": 0.427734375,
"learning_rate": 1.4818990732145035e-05,
"loss": 0.198974609375,
"num_tokens": 66809502.0,
"step": 837
},
{
"epoch": 0.5201738050900062,
"grad_norm": 0.39453125,
"learning_rate": 1.4788824371725253e-05,
"loss": 0.1495361328125,
"num_tokens": 66878792.0,
"step": 838
},
{
"epoch": 0.520794537554314,
"grad_norm": 0.421875,
"learning_rate": 1.4758658865548286e-05,
"loss": 0.2187652587890625,
"num_tokens": 66962677.0,
"step": 839
},
{
"epoch": 0.521415270018622,
"grad_norm": 0.44921875,
"learning_rate": 1.4728494335638957e-05,
"loss": 0.2018280029296875,
"num_tokens": 67041474.0,
"step": 840
},
{
"epoch": 0.5220360024829298,
"grad_norm": 0.455078125,
"learning_rate": 1.4698330904018115e-05,
"loss": 0.2111968994140625,
"num_tokens": 67115058.0,
"step": 841
},
{
"epoch": 0.5226567349472377,
"grad_norm": 0.41015625,
"learning_rate": 1.46681686927022e-05,
"loss": 0.1927642822265625,
"num_tokens": 67197272.0,
"step": 842
},
{
"epoch": 0.5232774674115456,
"grad_norm": 0.39453125,
"learning_rate": 1.4638007823702684e-05,
"loss": 0.1621246337890625,
"num_tokens": 67278020.0,
"step": 843
},
{
"epoch": 0.5238981998758535,
"grad_norm": 0.416015625,
"learning_rate": 1.4607848419025631e-05,
"loss": 0.22991943359375,
"num_tokens": 67362699.0,
"step": 844
},
{
"epoch": 0.5245189323401613,
"grad_norm": 0.435546875,
"learning_rate": 1.4577690600671174e-05,
"loss": 0.22564697265625,
"num_tokens": 67443724.0,
"step": 845
},
{
"epoch": 0.5251396648044693,
"grad_norm": 0.423828125,
"learning_rate": 1.4547534490633022e-05,
"loss": 0.1840057373046875,
"num_tokens": 67522083.0,
"step": 846
},
{
"epoch": 0.5257603972687771,
"grad_norm": 0.38671875,
"learning_rate": 1.4517380210897995e-05,
"loss": 0.18781280517578125,
"num_tokens": 67604006.0,
"step": 847
},
{
"epoch": 0.526381129733085,
"grad_norm": 0.3671875,
"learning_rate": 1.4487227883445484e-05,
"loss": 0.14209747314453125,
"num_tokens": 67680358.0,
"step": 848
},
{
"epoch": 0.527001862197393,
"grad_norm": 0.431640625,
"learning_rate": 1.4457077630246992e-05,
"loss": 0.19575119018554688,
"num_tokens": 67761222.0,
"step": 849
},
{
"epoch": 0.5276225946617008,
"grad_norm": 0.423828125,
"learning_rate": 1.442692957326564e-05,
"loss": 0.1881866455078125,
"num_tokens": 67844083.0,
"step": 850
},
{
"epoch": 0.5282433271260087,
"grad_norm": 0.40234375,
"learning_rate": 1.4396783834455656e-05,
"loss": 0.18814468383789062,
"num_tokens": 67928085.0,
"step": 851
},
{
"epoch": 0.5288640595903166,
"grad_norm": 0.384765625,
"learning_rate": 1.4366640535761885e-05,
"loss": 0.1851654052734375,
"num_tokens": 68010126.0,
"step": 852
},
{
"epoch": 0.5294847920546245,
"grad_norm": 0.37109375,
"learning_rate": 1.433649979911932e-05,
"loss": 0.1590118408203125,
"num_tokens": 68090900.0,
"step": 853
},
{
"epoch": 0.5301055245189323,
"grad_norm": 0.373046875,
"learning_rate": 1.4306361746452566e-05,
"loss": 0.15969085693359375,
"num_tokens": 68168826.0,
"step": 854
},
{
"epoch": 0.5307262569832403,
"grad_norm": 0.35546875,
"learning_rate": 1.4276226499675395e-05,
"loss": 0.1632537841796875,
"num_tokens": 68255065.0,
"step": 855
},
{
"epoch": 0.5313469894475481,
"grad_norm": 0.427734375,
"learning_rate": 1.4246094180690214e-05,
"loss": 0.20761871337890625,
"num_tokens": 68331792.0,
"step": 856
},
{
"epoch": 0.531967721911856,
"grad_norm": 0.41015625,
"learning_rate": 1.4215964911387584e-05,
"loss": 0.2359619140625,
"num_tokens": 68419168.0,
"step": 857
},
{
"epoch": 0.5325884543761639,
"grad_norm": 0.369140625,
"learning_rate": 1.4185838813645744e-05,
"loss": 0.1770172119140625,
"num_tokens": 68506536.0,
"step": 858
},
{
"epoch": 0.5332091868404718,
"grad_norm": 0.39453125,
"learning_rate": 1.4155716009330088e-05,
"loss": 0.178009033203125,
"num_tokens": 68590568.0,
"step": 859
},
{
"epoch": 0.5338299193047796,
"grad_norm": 0.380859375,
"learning_rate": 1.4125596620292695e-05,
"loss": 0.1668243408203125,
"num_tokens": 68667413.0,
"step": 860
},
{
"epoch": 0.5344506517690876,
"grad_norm": 0.421875,
"learning_rate": 1.409548076837183e-05,
"loss": 0.1732025146484375,
"num_tokens": 68738326.0,
"step": 861
},
{
"epoch": 0.5350713842333954,
"grad_norm": 0.376953125,
"learning_rate": 1.4065368575391443e-05,
"loss": 0.145050048828125,
"num_tokens": 68823637.0,
"step": 862
},
{
"epoch": 0.5356921166977033,
"grad_norm": 0.4453125,
"learning_rate": 1.4035260163160697e-05,
"loss": 0.2113037109375,
"num_tokens": 68900932.0,
"step": 863
},
{
"epoch": 0.5363128491620112,
"grad_norm": 0.423828125,
"learning_rate": 1.4005155653473445e-05,
"loss": 0.2074127197265625,
"num_tokens": 68977006.0,
"step": 864
},
{
"epoch": 0.5369335816263191,
"grad_norm": 0.43359375,
"learning_rate": 1.3975055168107754e-05,
"loss": 0.2095184326171875,
"num_tokens": 69055188.0,
"step": 865
},
{
"epoch": 0.5375543140906269,
"grad_norm": 0.392578125,
"learning_rate": 1.3944958828825428e-05,
"loss": 0.173980712890625,
"num_tokens": 69134818.0,
"step": 866
},
{
"epoch": 0.5381750465549349,
"grad_norm": 0.404296875,
"learning_rate": 1.391486675737149e-05,
"loss": 0.17784881591796875,
"num_tokens": 69213114.0,
"step": 867
},
{
"epoch": 0.5387957790192427,
"grad_norm": 0.380859375,
"learning_rate": 1.3884779075473689e-05,
"loss": 0.1586456298828125,
"num_tokens": 69290426.0,
"step": 868
},
{
"epoch": 0.5394165114835506,
"grad_norm": 0.408203125,
"learning_rate": 1.3854695904842039e-05,
"loss": 0.20092010498046875,
"num_tokens": 69370532.0,
"step": 869
},
{
"epoch": 0.5400372439478585,
"grad_norm": 0.365234375,
"learning_rate": 1.382461736716828e-05,
"loss": 0.16647720336914062,
"num_tokens": 69461040.0,
"step": 870
},
{
"epoch": 0.5406579764121664,
"grad_norm": 0.41015625,
"learning_rate": 1.3794543584125429e-05,
"loss": 0.19617462158203125,
"num_tokens": 69541184.0,
"step": 871
},
{
"epoch": 0.5412787088764742,
"grad_norm": 0.376953125,
"learning_rate": 1.3764474677367273e-05,
"loss": 0.165863037109375,
"num_tokens": 69622477.0,
"step": 872
},
{
"epoch": 0.5418994413407822,
"grad_norm": 0.34765625,
"learning_rate": 1.373441076852785e-05,
"loss": 0.12197494506835938,
"num_tokens": 69700254.0,
"step": 873
},
{
"epoch": 0.54252017380509,
"grad_norm": 0.38671875,
"learning_rate": 1.3704351979221007e-05,
"loss": 0.1822509765625,
"num_tokens": 69783067.0,
"step": 874
},
{
"epoch": 0.5431409062693979,
"grad_norm": 0.396484375,
"learning_rate": 1.3674298431039864e-05,
"loss": 0.18769073486328125,
"num_tokens": 69864793.0,
"step": 875
},
{
"epoch": 0.5437616387337058,
"grad_norm": 0.333984375,
"learning_rate": 1.3644250245556345e-05,
"loss": 0.13982391357421875,
"num_tokens": 69952289.0,
"step": 876
},
{
"epoch": 0.5443823711980137,
"grad_norm": 0.4140625,
"learning_rate": 1.3614207544320692e-05,
"loss": 0.189849853515625,
"num_tokens": 70026926.0,
"step": 877
},
{
"epoch": 0.5450031036623215,
"grad_norm": 0.3984375,
"learning_rate": 1.3584170448860945e-05,
"loss": 0.168121337890625,
"num_tokens": 70111349.0,
"step": 878
},
{
"epoch": 0.5456238361266295,
"grad_norm": 0.423828125,
"learning_rate": 1.3554139080682468e-05,
"loss": 0.1836090087890625,
"num_tokens": 70187805.0,
"step": 879
},
{
"epoch": 0.5462445685909373,
"grad_norm": 0.42578125,
"learning_rate": 1.3524113561267474e-05,
"loss": 0.2201385498046875,
"num_tokens": 70263202.0,
"step": 880
},
{
"epoch": 0.5468653010552452,
"grad_norm": 0.412109375,
"learning_rate": 1.3494094012074497e-05,
"loss": 0.17308807373046875,
"num_tokens": 70342583.0,
"step": 881
},
{
"epoch": 0.547486033519553,
"grad_norm": 0.4609375,
"learning_rate": 1.3464080554537943e-05,
"loss": 0.2161407470703125,
"num_tokens": 70412434.0,
"step": 882
},
{
"epoch": 0.548106765983861,
"grad_norm": 0.4375,
"learning_rate": 1.3434073310067556e-05,
"loss": 0.217529296875,
"num_tokens": 70495209.0,
"step": 883
},
{
"epoch": 0.5487274984481688,
"grad_norm": 0.443359375,
"learning_rate": 1.3404072400047946e-05,
"loss": 0.214080810546875,
"num_tokens": 70573462.0,
"step": 884
},
{
"epoch": 0.5493482309124768,
"grad_norm": 0.41015625,
"learning_rate": 1.3374077945838124e-05,
"loss": 0.17427825927734375,
"num_tokens": 70657878.0,
"step": 885
},
{
"epoch": 0.5499689633767846,
"grad_norm": 0.337890625,
"learning_rate": 1.3344090068770957e-05,
"loss": 0.14078521728515625,
"num_tokens": 70742198.0,
"step": 886
},
{
"epoch": 0.5505896958410925,
"grad_norm": 0.40625,
"learning_rate": 1.3314108890152717e-05,
"loss": 0.17542266845703125,
"num_tokens": 70822176.0,
"step": 887
},
{
"epoch": 0.5512104283054003,
"grad_norm": 0.37890625,
"learning_rate": 1.3284134531262595e-05,
"loss": 0.19176483154296875,
"num_tokens": 70909111.0,
"step": 888
},
{
"epoch": 0.5518311607697083,
"grad_norm": 0.39453125,
"learning_rate": 1.3254167113352162e-05,
"loss": 0.163787841796875,
"num_tokens": 70989773.0,
"step": 889
},
{
"epoch": 0.5524518932340161,
"grad_norm": 0.466796875,
"learning_rate": 1.3224206757644951e-05,
"loss": 0.22904205322265625,
"num_tokens": 71063942.0,
"step": 890
},
{
"epoch": 0.553072625698324,
"grad_norm": 0.423828125,
"learning_rate": 1.319425358533589e-05,
"loss": 0.196685791015625,
"num_tokens": 71141174.0,
"step": 891
},
{
"epoch": 0.5536933581626319,
"grad_norm": 0.3984375,
"learning_rate": 1.3164307717590873e-05,
"loss": 0.18463897705078125,
"num_tokens": 71222244.0,
"step": 892
},
{
"epoch": 0.5543140906269398,
"grad_norm": 0.455078125,
"learning_rate": 1.3134369275546241e-05,
"loss": 0.2183685302734375,
"num_tokens": 71297532.0,
"step": 893
},
{
"epoch": 0.5549348230912476,
"grad_norm": 0.453125,
"learning_rate": 1.3104438380308293e-05,
"loss": 0.1897430419921875,
"num_tokens": 71373291.0,
"step": 894
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.470703125,
"learning_rate": 1.307451515295279e-05,
"loss": 0.2385406494140625,
"num_tokens": 71449653.0,
"step": 895
},
{
"epoch": 0.5561762880198634,
"grad_norm": 0.341796875,
"learning_rate": 1.30445997145245e-05,
"loss": 0.1535797119140625,
"num_tokens": 71533929.0,
"step": 896
},
{
"epoch": 0.5567970204841713,
"grad_norm": 0.392578125,
"learning_rate": 1.3014692186036664e-05,
"loss": 0.179718017578125,
"num_tokens": 71612633.0,
"step": 897
},
{
"epoch": 0.5574177529484792,
"grad_norm": 0.408203125,
"learning_rate": 1.298479268847053e-05,
"loss": 0.189605712890625,
"num_tokens": 71692519.0,
"step": 898
},
{
"epoch": 0.5580384854127871,
"grad_norm": 0.43359375,
"learning_rate": 1.2954901342774869e-05,
"loss": 0.2273101806640625,
"num_tokens": 71770072.0,
"step": 899
},
{
"epoch": 0.5586592178770949,
"grad_norm": 0.3828125,
"learning_rate": 1.2925018269865453e-05,
"loss": 0.175201416015625,
"num_tokens": 71851096.0,
"step": 900
},
{
"epoch": 0.5592799503414029,
"grad_norm": 0.373046875,
"learning_rate": 1.2895143590624618e-05,
"loss": 0.167205810546875,
"num_tokens": 71929139.0,
"step": 901
},
{
"epoch": 0.5599006828057107,
"grad_norm": 0.38671875,
"learning_rate": 1.2865277425900725e-05,
"loss": 0.18930816650390625,
"num_tokens": 72010353.0,
"step": 902
},
{
"epoch": 0.5605214152700186,
"grad_norm": 0.455078125,
"learning_rate": 1.2835419896507699e-05,
"loss": 0.220123291015625,
"num_tokens": 72089982.0,
"step": 903
},
{
"epoch": 0.5611421477343265,
"grad_norm": 0.37109375,
"learning_rate": 1.2805571123224543e-05,
"loss": 0.16143035888671875,
"num_tokens": 72168193.0,
"step": 904
},
{
"epoch": 0.5617628801986344,
"grad_norm": 0.390625,
"learning_rate": 1.2775731226794823e-05,
"loss": 0.1923980712890625,
"num_tokens": 72255592.0,
"step": 905
},
{
"epoch": 0.5623836126629422,
"grad_norm": 0.42578125,
"learning_rate": 1.2745900327926201e-05,
"loss": 0.2190704345703125,
"num_tokens": 72327996.0,
"step": 906
},
{
"epoch": 0.5630043451272502,
"grad_norm": 0.390625,
"learning_rate": 1.2716078547289956e-05,
"loss": 0.1983642578125,
"num_tokens": 72412086.0,
"step": 907
},
{
"epoch": 0.563625077591558,
"grad_norm": 0.427734375,
"learning_rate": 1.2686266005520462e-05,
"loss": 0.2094268798828125,
"num_tokens": 72487655.0,
"step": 908
},
{
"epoch": 0.5642458100558659,
"grad_norm": 0.5,
"learning_rate": 1.2656462823214744e-05,
"loss": 0.27734375,
"num_tokens": 72565211.0,
"step": 909
},
{
"epoch": 0.5648665425201738,
"grad_norm": 0.41015625,
"learning_rate": 1.2626669120931943e-05,
"loss": 0.1863555908203125,
"num_tokens": 72647060.0,
"step": 910
},
{
"epoch": 0.5654872749844817,
"grad_norm": 0.443359375,
"learning_rate": 1.259688501919286e-05,
"loss": 0.192108154296875,
"num_tokens": 72725747.0,
"step": 911
},
{
"epoch": 0.5661080074487895,
"grad_norm": 0.3984375,
"learning_rate": 1.2567110638479462e-05,
"loss": 0.17256927490234375,
"num_tokens": 72804807.0,
"step": 912
},
{
"epoch": 0.5667287399130975,
"grad_norm": 0.474609375,
"learning_rate": 1.2537346099234401e-05,
"loss": 0.210479736328125,
"num_tokens": 72881700.0,
"step": 913
},
{
"epoch": 0.5673494723774053,
"grad_norm": 0.427734375,
"learning_rate": 1.2507591521860493e-05,
"loss": 0.2077178955078125,
"num_tokens": 72963037.0,
"step": 914
},
{
"epoch": 0.5679702048417132,
"grad_norm": 0.404296875,
"learning_rate": 1.2477847026720287e-05,
"loss": 0.1495361328125,
"num_tokens": 73042459.0,
"step": 915
},
{
"epoch": 0.5685909373060211,
"grad_norm": 0.404296875,
"learning_rate": 1.2448112734135527e-05,
"loss": 0.16144561767578125,
"num_tokens": 73118566.0,
"step": 916
},
{
"epoch": 0.569211669770329,
"grad_norm": 0.44140625,
"learning_rate": 1.2418388764386698e-05,
"loss": 0.182769775390625,
"num_tokens": 73191457.0,
"step": 917
},
{
"epoch": 0.5698324022346368,
"grad_norm": 0.451171875,
"learning_rate": 1.2388675237712516e-05,
"loss": 0.21152496337890625,
"num_tokens": 73274378.0,
"step": 918
},
{
"epoch": 0.5704531346989448,
"grad_norm": 0.486328125,
"learning_rate": 1.2358972274309456e-05,
"loss": 0.2570343017578125,
"num_tokens": 73353511.0,
"step": 919
},
{
"epoch": 0.5710738671632526,
"grad_norm": 0.400390625,
"learning_rate": 1.2329279994331277e-05,
"loss": 0.1658477783203125,
"num_tokens": 73432026.0,
"step": 920
},
{
"epoch": 0.5716945996275605,
"grad_norm": 0.431640625,
"learning_rate": 1.2299598517888503e-05,
"loss": 0.2160491943359375,
"num_tokens": 73518039.0,
"step": 921
},
{
"epoch": 0.5723153320918684,
"grad_norm": 0.392578125,
"learning_rate": 1.2269927965047957e-05,
"loss": 0.15822601318359375,
"num_tokens": 73599283.0,
"step": 922
},
{
"epoch": 0.5729360645561763,
"grad_norm": 0.435546875,
"learning_rate": 1.2240268455832288e-05,
"loss": 0.230224609375,
"num_tokens": 73679927.0,
"step": 923
},
{
"epoch": 0.5735567970204841,
"grad_norm": 0.380859375,
"learning_rate": 1.2210620110219457e-05,
"loss": 0.1621551513671875,
"num_tokens": 73763227.0,
"step": 924
},
{
"epoch": 0.5741775294847921,
"grad_norm": 0.37890625,
"learning_rate": 1.2180983048142284e-05,
"loss": 0.1544647216796875,
"num_tokens": 73836676.0,
"step": 925
},
{
"epoch": 0.5747982619490999,
"grad_norm": 0.458984375,
"learning_rate": 1.2151357389487927e-05,
"loss": 0.2332763671875,
"num_tokens": 73920361.0,
"step": 926
},
{
"epoch": 0.5754189944134078,
"grad_norm": 0.4765625,
"learning_rate": 1.2121743254097413e-05,
"loss": 0.23199462890625,
"num_tokens": 73992322.0,
"step": 927
},
{
"epoch": 0.5760397268777157,
"grad_norm": 0.326171875,
"learning_rate": 1.2092140761765178e-05,
"loss": 0.123077392578125,
"num_tokens": 74075990.0,
"step": 928
},
{
"epoch": 0.5766604593420236,
"grad_norm": 0.380859375,
"learning_rate": 1.2062550032238543e-05,
"loss": 0.1793670654296875,
"num_tokens": 74158981.0,
"step": 929
},
{
"epoch": 0.5772811918063314,
"grad_norm": 0.37890625,
"learning_rate": 1.2032971185217241e-05,
"loss": 0.1597747802734375,
"num_tokens": 74237004.0,
"step": 930
},
{
"epoch": 0.5779019242706394,
"grad_norm": 0.41796875,
"learning_rate": 1.200340434035296e-05,
"loss": 0.1903839111328125,
"num_tokens": 74315297.0,
"step": 931
},
{
"epoch": 0.5785226567349472,
"grad_norm": 0.419921875,
"learning_rate": 1.1973849617248809e-05,
"loss": 0.18768310546875,
"num_tokens": 74398632.0,
"step": 932
},
{
"epoch": 0.5791433891992551,
"grad_norm": 0.375,
"learning_rate": 1.1944307135458887e-05,
"loss": 0.1831207275390625,
"num_tokens": 74488238.0,
"step": 933
},
{
"epoch": 0.5797641216635631,
"grad_norm": 0.392578125,
"learning_rate": 1.1914777014487767e-05,
"loss": 0.180328369140625,
"num_tokens": 74571216.0,
"step": 934
},
{
"epoch": 0.5803848541278709,
"grad_norm": 0.388671875,
"learning_rate": 1.1885259373790008e-05,
"loss": 0.1994476318359375,
"num_tokens": 74653026.0,
"step": 935
},
{
"epoch": 0.5810055865921788,
"grad_norm": 0.416015625,
"learning_rate": 1.1855754332769706e-05,
"loss": 0.195098876953125,
"num_tokens": 74735242.0,
"step": 936
},
{
"epoch": 0.5816263190564867,
"grad_norm": 0.375,
"learning_rate": 1.1826262010779966e-05,
"loss": 0.15753936767578125,
"num_tokens": 74822007.0,
"step": 937
},
{
"epoch": 0.5822470515207946,
"grad_norm": 0.37890625,
"learning_rate": 1.1796782527122457e-05,
"loss": 0.1595611572265625,
"num_tokens": 74898040.0,
"step": 938
},
{
"epoch": 0.5828677839851024,
"grad_norm": 0.412109375,
"learning_rate": 1.1767316001046916e-05,
"loss": 0.1820526123046875,
"num_tokens": 74981375.0,
"step": 939
},
{
"epoch": 0.5834885164494104,
"grad_norm": 0.388671875,
"learning_rate": 1.1737862551750658e-05,
"loss": 0.17943572998046875,
"num_tokens": 75060177.0,
"step": 940
},
{
"epoch": 0.5841092489137182,
"grad_norm": 0.453125,
"learning_rate": 1.1708422298378092e-05,
"loss": 0.20145416259765625,
"num_tokens": 75135145.0,
"step": 941
},
{
"epoch": 0.5847299813780261,
"grad_norm": 0.625,
"learning_rate": 1.1678995360020272e-05,
"loss": 0.1782684326171875,
"num_tokens": 75216188.0,
"step": 942
},
{
"epoch": 0.585350713842334,
"grad_norm": 0.392578125,
"learning_rate": 1.1649581855714361e-05,
"loss": 0.1988677978515625,
"num_tokens": 75304938.0,
"step": 943
},
{
"epoch": 0.5859714463066419,
"grad_norm": 0.42578125,
"learning_rate": 1.1620181904443203e-05,
"loss": 0.184051513671875,
"num_tokens": 75378257.0,
"step": 944
},
{
"epoch": 0.5865921787709497,
"grad_norm": 0.4453125,
"learning_rate": 1.1590795625134807e-05,
"loss": 0.2012481689453125,
"num_tokens": 75449830.0,
"step": 945
},
{
"epoch": 0.5872129112352577,
"grad_norm": 0.396484375,
"learning_rate": 1.1561423136661866e-05,
"loss": 0.1639251708984375,
"num_tokens": 75532371.0,
"step": 946
},
{
"epoch": 0.5878336436995655,
"grad_norm": 0.43359375,
"learning_rate": 1.1532064557841316e-05,
"loss": 0.1612091064453125,
"num_tokens": 75604484.0,
"step": 947
},
{
"epoch": 0.5884543761638734,
"grad_norm": 0.3515625,
"learning_rate": 1.1502720007433792e-05,
"loss": 0.1376495361328125,
"num_tokens": 75688669.0,
"step": 948
},
{
"epoch": 0.5890751086281812,
"grad_norm": 0.412109375,
"learning_rate": 1.14733896041432e-05,
"loss": 0.19152069091796875,
"num_tokens": 75766075.0,
"step": 949
},
{
"epoch": 0.5896958410924892,
"grad_norm": 0.376953125,
"learning_rate": 1.1444073466616224e-05,
"loss": 0.1862640380859375,
"num_tokens": 75848988.0,
"step": 950
},
{
"epoch": 0.590316573556797,
"grad_norm": 0.388671875,
"learning_rate": 1.1414771713441821e-05,
"loss": 0.185943603515625,
"num_tokens": 75933789.0,
"step": 951
},
{
"epoch": 0.590937306021105,
"grad_norm": 0.462890625,
"learning_rate": 1.1385484463150784e-05,
"loss": 0.257293701171875,
"num_tokens": 76014391.0,
"step": 952
},
{
"epoch": 0.5915580384854128,
"grad_norm": 0.40234375,
"learning_rate": 1.135621183421522e-05,
"loss": 0.19158172607421875,
"num_tokens": 76101364.0,
"step": 953
},
{
"epoch": 0.5921787709497207,
"grad_norm": 0.412109375,
"learning_rate": 1.1326953945048096e-05,
"loss": 0.162933349609375,
"num_tokens": 76171217.0,
"step": 954
},
{
"epoch": 0.5927995034140285,
"grad_norm": 0.4296875,
"learning_rate": 1.1297710914002758e-05,
"loss": 0.1987457275390625,
"num_tokens": 76245504.0,
"step": 955
},
{
"epoch": 0.5934202358783365,
"grad_norm": 0.3671875,
"learning_rate": 1.1268482859372448e-05,
"loss": 0.14699554443359375,
"num_tokens": 76327814.0,
"step": 956
},
{
"epoch": 0.5940409683426443,
"grad_norm": 0.39453125,
"learning_rate": 1.1239269899389812e-05,
"loss": 0.17163848876953125,
"num_tokens": 76407754.0,
"step": 957
},
{
"epoch": 0.5946617008069522,
"grad_norm": 0.419921875,
"learning_rate": 1.1210072152226462e-05,
"loss": 0.2128143310546875,
"num_tokens": 76487788.0,
"step": 958
},
{
"epoch": 0.5952824332712601,
"grad_norm": 0.466796875,
"learning_rate": 1.1180889735992444e-05,
"loss": 0.21141815185546875,
"num_tokens": 76559394.0,
"step": 959
},
{
"epoch": 0.595903165735568,
"grad_norm": 0.421875,
"learning_rate": 1.1151722768735806e-05,
"loss": 0.20459747314453125,
"num_tokens": 76636321.0,
"step": 960
},
{
"epoch": 0.5965238981998758,
"grad_norm": 0.40234375,
"learning_rate": 1.1122571368442096e-05,
"loss": 0.1572418212890625,
"num_tokens": 76713008.0,
"step": 961
},
{
"epoch": 0.5971446306641838,
"grad_norm": 0.408203125,
"learning_rate": 1.1093435653033888e-05,
"loss": 0.187225341796875,
"num_tokens": 76790334.0,
"step": 962
},
{
"epoch": 0.5977653631284916,
"grad_norm": 0.369140625,
"learning_rate": 1.1064315740370316e-05,
"loss": 0.1584625244140625,
"num_tokens": 76870560.0,
"step": 963
},
{
"epoch": 0.5983860955927995,
"grad_norm": 0.35546875,
"learning_rate": 1.1035211748246577e-05,
"loss": 0.14681243896484375,
"num_tokens": 76946226.0,
"step": 964
},
{
"epoch": 0.5990068280571074,
"grad_norm": 0.466796875,
"learning_rate": 1.1006123794393474e-05,
"loss": 0.21826171875,
"num_tokens": 77017326.0,
"step": 965
},
{
"epoch": 0.5996275605214153,
"grad_norm": 0.392578125,
"learning_rate": 1.0977051996476942e-05,
"loss": 0.1714324951171875,
"num_tokens": 77102455.0,
"step": 966
},
{
"epoch": 0.6002482929857231,
"grad_norm": 0.333984375,
"learning_rate": 1.0947996472097542e-05,
"loss": 0.13805389404296875,
"num_tokens": 77187110.0,
"step": 967
},
{
"epoch": 0.6008690254500311,
"grad_norm": 0.41796875,
"learning_rate": 1.0918957338790012e-05,
"loss": 0.20819091796875,
"num_tokens": 77268567.0,
"step": 968
},
{
"epoch": 0.6014897579143389,
"grad_norm": 0.40234375,
"learning_rate": 1.0889934714022801e-05,
"loss": 0.1937255859375,
"num_tokens": 77355659.0,
"step": 969
},
{
"epoch": 0.6021104903786468,
"grad_norm": 0.353515625,
"learning_rate": 1.0860928715197556e-05,
"loss": 0.1458587646484375,
"num_tokens": 77433425.0,
"step": 970
},
{
"epoch": 0.6027312228429547,
"grad_norm": 0.359375,
"learning_rate": 1.0831939459648689e-05,
"loss": 0.146820068359375,
"num_tokens": 77513317.0,
"step": 971
},
{
"epoch": 0.6033519553072626,
"grad_norm": 0.3984375,
"learning_rate": 1.0802967064642867e-05,
"loss": 0.17783355712890625,
"num_tokens": 77594050.0,
"step": 972
},
{
"epoch": 0.6039726877715704,
"grad_norm": 0.3671875,
"learning_rate": 1.0774011647378554e-05,
"loss": 0.1338653564453125,
"num_tokens": 77677696.0,
"step": 973
},
{
"epoch": 0.6045934202358784,
"grad_norm": 0.4140625,
"learning_rate": 1.074507332498555e-05,
"loss": 0.1887969970703125,
"num_tokens": 77765990.0,
"step": 974
},
{
"epoch": 0.6052141527001862,
"grad_norm": 0.419921875,
"learning_rate": 1.0716152214524492e-05,
"loss": 0.18674468994140625,
"num_tokens": 77847283.0,
"step": 975
},
{
"epoch": 0.6058348851644941,
"grad_norm": 0.4375,
"learning_rate": 1.0687248432986385e-05,
"loss": 0.215911865234375,
"num_tokens": 77924250.0,
"step": 976
},
{
"epoch": 0.606455617628802,
"grad_norm": 0.455078125,
"learning_rate": 1.0658362097292158e-05,
"loss": 0.2231903076171875,
"num_tokens": 78004107.0,
"step": 977
},
{
"epoch": 0.6070763500931099,
"grad_norm": 0.46875,
"learning_rate": 1.062949332429214e-05,
"loss": 0.2095947265625,
"num_tokens": 78081586.0,
"step": 978
},
{
"epoch": 0.6076970825574177,
"grad_norm": 0.419921875,
"learning_rate": 1.0600642230765646e-05,
"loss": 0.19371795654296875,
"num_tokens": 78158824.0,
"step": 979
},
{
"epoch": 0.6083178150217257,
"grad_norm": 0.390625,
"learning_rate": 1.0571808933420446e-05,
"loss": 0.183502197265625,
"num_tokens": 78237451.0,
"step": 980
},
{
"epoch": 0.6089385474860335,
"grad_norm": 0.419921875,
"learning_rate": 1.0542993548892335e-05,
"loss": 0.17093658447265625,
"num_tokens": 78310371.0,
"step": 981
},
{
"epoch": 0.6095592799503414,
"grad_norm": 0.439453125,
"learning_rate": 1.0514196193744659e-05,
"loss": 0.22356414794921875,
"num_tokens": 78390169.0,
"step": 982
},
{
"epoch": 0.6101800124146493,
"grad_norm": 0.38671875,
"learning_rate": 1.0485416984467807e-05,
"loss": 0.181396484375,
"num_tokens": 78474698.0,
"step": 983
},
{
"epoch": 0.6108007448789572,
"grad_norm": 0.416015625,
"learning_rate": 1.045665603747878e-05,
"loss": 0.1837158203125,
"num_tokens": 78546427.0,
"step": 984
},
{
"epoch": 0.611421477343265,
"grad_norm": 0.421875,
"learning_rate": 1.0427913469120702e-05,
"loss": 0.2014923095703125,
"num_tokens": 78625136.0,
"step": 985
},
{
"epoch": 0.612042209807573,
"grad_norm": 0.38671875,
"learning_rate": 1.0399189395662353e-05,
"loss": 0.163421630859375,
"num_tokens": 78709674.0,
"step": 986
},
{
"epoch": 0.6126629422718808,
"grad_norm": 0.408203125,
"learning_rate": 1.0370483933297702e-05,
"loss": 0.18042755126953125,
"num_tokens": 78784917.0,
"step": 987
},
{
"epoch": 0.6132836747361887,
"grad_norm": 0.40234375,
"learning_rate": 1.0341797198145426e-05,
"loss": 0.197662353515625,
"num_tokens": 78869506.0,
"step": 988
},
{
"epoch": 0.6139044072004965,
"grad_norm": 0.419921875,
"learning_rate": 1.0313129306248439e-05,
"loss": 0.215667724609375,
"num_tokens": 78951695.0,
"step": 989
},
{
"epoch": 0.6145251396648045,
"grad_norm": 0.412109375,
"learning_rate": 1.0284480373573453e-05,
"loss": 0.17279052734375,
"num_tokens": 79029222.0,
"step": 990
},
{
"epoch": 0.6151458721291123,
"grad_norm": 0.396484375,
"learning_rate": 1.0255850516010472e-05,
"loss": 0.17056655883789062,
"num_tokens": 79105801.0,
"step": 991
},
{
"epoch": 0.6157666045934203,
"grad_norm": 0.365234375,
"learning_rate": 1.0227239849372333e-05,
"loss": 0.16425323486328125,
"num_tokens": 79189037.0,
"step": 992
},
{
"epoch": 0.6163873370577281,
"grad_norm": 0.443359375,
"learning_rate": 1.019864848939426e-05,
"loss": 0.210418701171875,
"num_tokens": 79264685.0,
"step": 993
},
{
"epoch": 0.617008069522036,
"grad_norm": 0.416015625,
"learning_rate": 1.0170076551733356e-05,
"loss": 0.19185638427734375,
"num_tokens": 79345699.0,
"step": 994
},
{
"epoch": 0.6176288019863438,
"grad_norm": 0.361328125,
"learning_rate": 1.0141524151968182e-05,
"loss": 0.1581878662109375,
"num_tokens": 79431819.0,
"step": 995
},
{
"epoch": 0.6182495344506518,
"grad_norm": 0.400390625,
"learning_rate": 1.0112991405598239e-05,
"loss": 0.189056396484375,
"num_tokens": 79512214.0,
"step": 996
},
{
"epoch": 0.6188702669149596,
"grad_norm": 0.3984375,
"learning_rate": 1.0084478428043544e-05,
"loss": 0.175384521484375,
"num_tokens": 79594543.0,
"step": 997
},
{
"epoch": 0.6194909993792675,
"grad_norm": 0.447265625,
"learning_rate": 1.005598533464415e-05,
"loss": 0.18316650390625,
"num_tokens": 79668677.0,
"step": 998
},
{
"epoch": 0.6201117318435754,
"grad_norm": 0.42578125,
"learning_rate": 1.0027512240659654e-05,
"loss": 0.19012451171875,
"num_tokens": 79746193.0,
"step": 999
},
{
"epoch": 0.6207324643078833,
"grad_norm": 0.435546875,
"learning_rate": 9.999059261268763e-06,
"loss": 0.188385009765625,
"num_tokens": 79822782.0,
"step": 1000
},
{
"epoch": 0.6213531967721911,
"grad_norm": 0.421875,
"learning_rate": 9.970626511568823e-06,
"loss": 0.16900634765625,
"num_tokens": 79903506.0,
"step": 1001
},
{
"epoch": 0.6219739292364991,
"grad_norm": 0.431640625,
"learning_rate": 9.942214106575347e-06,
"loss": 0.178558349609375,
"num_tokens": 79980294.0,
"step": 1002
},
{
"epoch": 0.6225946617008069,
"grad_norm": 0.388671875,
"learning_rate": 9.913822161221532e-06,
"loss": 0.14640045166015625,
"num_tokens": 80058311.0,
"step": 1003
},
{
"epoch": 0.6232153941651148,
"grad_norm": 0.375,
"learning_rate": 9.885450790357838e-06,
"loss": 0.1551666259765625,
"num_tokens": 80144031.0,
"step": 1004
},
{
"epoch": 0.6238361266294227,
"grad_norm": 0.4765625,
"learning_rate": 9.857100108751472e-06,
"loss": 0.23431396484375,
"num_tokens": 80218374.0,
"step": 1005
},
{
"epoch": 0.6244568590937306,
"grad_norm": 0.388671875,
"learning_rate": 9.828770231085973e-06,
"loss": 0.16182708740234375,
"num_tokens": 80294466.0,
"step": 1006
},
{
"epoch": 0.6250775915580384,
"grad_norm": 0.44921875,
"learning_rate": 9.800461271960713e-06,
"loss": 0.2367095947265625,
"num_tokens": 80367504.0,
"step": 1007
},
{
"epoch": 0.6256983240223464,
"grad_norm": 0.4140625,
"learning_rate": 9.772173345890433e-06,
"loss": 0.17333984375,
"num_tokens": 80444754.0,
"step": 1008
},
{
"epoch": 0.6263190564866542,
"grad_norm": 0.37890625,
"learning_rate": 9.743906567304819e-06,
"loss": 0.163726806640625,
"num_tokens": 80530419.0,
"step": 1009
},
{
"epoch": 0.6269397889509621,
"grad_norm": 0.408203125,
"learning_rate": 9.715661050547986e-06,
"loss": 0.1769561767578125,
"num_tokens": 80607413.0,
"step": 1010
},
{
"epoch": 0.62756052141527,
"grad_norm": 0.357421875,
"learning_rate": 9.687436909878054e-06,
"loss": 0.11574554443359375,
"num_tokens": 80690008.0,
"step": 1011
},
{
"epoch": 0.6281812538795779,
"grad_norm": 0.384765625,
"learning_rate": 9.659234259466675e-06,
"loss": 0.1707763671875,
"num_tokens": 80768202.0,
"step": 1012
},
{
"epoch": 0.6288019863438857,
"grad_norm": 0.44921875,
"learning_rate": 9.631053213398557e-06,
"loss": 0.2403106689453125,
"num_tokens": 80848546.0,
"step": 1013
},
{
"epoch": 0.6294227188081937,
"grad_norm": 0.396484375,
"learning_rate": 9.602893885671031e-06,
"loss": 0.181182861328125,
"num_tokens": 80928690.0,
"step": 1014
},
{
"epoch": 0.6300434512725015,
"grad_norm": 0.37890625,
"learning_rate": 9.57475639019356e-06,
"loss": 0.1595458984375,
"num_tokens": 81010382.0,
"step": 1015
},
{
"epoch": 0.6306641837368094,
"grad_norm": 0.423828125,
"learning_rate": 9.546640840787291e-06,
"loss": 0.1982421875,
"num_tokens": 81089515.0,
"step": 1016
},
{
"epoch": 0.6312849162011173,
"grad_norm": 0.4140625,
"learning_rate": 9.518547351184607e-06,
"loss": 0.1847686767578125,
"num_tokens": 81172315.0,
"step": 1017
},
{
"epoch": 0.6319056486654252,
"grad_norm": 0.4296875,
"learning_rate": 9.490476035028652e-06,
"loss": 0.21869659423828125,
"num_tokens": 81257038.0,
"step": 1018
},
{
"epoch": 0.6325263811297331,
"grad_norm": 0.400390625,
"learning_rate": 9.462427005872859e-06,
"loss": 0.14666748046875,
"num_tokens": 81329471.0,
"step": 1019
},
{
"epoch": 0.633147113594041,
"grad_norm": 0.416015625,
"learning_rate": 9.434400377180535e-06,
"loss": 0.1641082763671875,
"num_tokens": 81403335.0,
"step": 1020
},
{
"epoch": 0.6337678460583489,
"grad_norm": 0.4140625,
"learning_rate": 9.406396262324341e-06,
"loss": 0.183349609375,
"num_tokens": 81489845.0,
"step": 1021
},
{
"epoch": 0.6343885785226567,
"grad_norm": 0.40625,
"learning_rate": 9.378414774585889e-06,
"loss": 0.19657135009765625,
"num_tokens": 81568499.0,
"step": 1022
},
{
"epoch": 0.6350093109869647,
"grad_norm": 0.3984375,
"learning_rate": 9.350456027155254e-06,
"loss": 0.16814422607421875,
"num_tokens": 81643811.0,
"step": 1023
},
{
"epoch": 0.6356300434512725,
"grad_norm": 0.416015625,
"learning_rate": 9.322520133130515e-06,
"loss": 0.202880859375,
"num_tokens": 81728916.0,
"step": 1024
},
{
"epoch": 0.6362507759155804,
"grad_norm": 0.4296875,
"learning_rate": 9.294607205517318e-06,
"loss": 0.243621826171875,
"num_tokens": 81819717.0,
"step": 1025
},
{
"epoch": 0.6368715083798883,
"grad_norm": 0.373046875,
"learning_rate": 9.266717357228392e-06,
"loss": 0.13022613525390625,
"num_tokens": 81898465.0,
"step": 1026
},
{
"epoch": 0.6374922408441962,
"grad_norm": 0.37890625,
"learning_rate": 9.23885070108311e-06,
"loss": 0.15389251708984375,
"num_tokens": 81983540.0,
"step": 1027
},
{
"epoch": 0.638112973308504,
"grad_norm": 0.4375,
"learning_rate": 9.211007349807044e-06,
"loss": 0.19677734375,
"num_tokens": 82061511.0,
"step": 1028
},
{
"epoch": 0.638733705772812,
"grad_norm": 0.462890625,
"learning_rate": 9.183187416031465e-06,
"loss": 0.2108306884765625,
"num_tokens": 82135277.0,
"step": 1029
},
{
"epoch": 0.6393544382371198,
"grad_norm": 0.498046875,
"learning_rate": 9.155391012292948e-06,
"loss": 0.2243804931640625,
"num_tokens": 82211020.0,
"step": 1030
},
{
"epoch": 0.6399751707014277,
"grad_norm": 0.470703125,
"learning_rate": 9.12761825103286e-06,
"loss": 0.23211669921875,
"num_tokens": 82285716.0,
"step": 1031
},
{
"epoch": 0.6405959031657356,
"grad_norm": 0.408203125,
"learning_rate": 9.099869244596938e-06,
"loss": 0.1846923828125,
"num_tokens": 82361940.0,
"step": 1032
},
{
"epoch": 0.6412166356300435,
"grad_norm": 0.44140625,
"learning_rate": 9.072144105234829e-06,
"loss": 0.219390869140625,
"num_tokens": 82440261.0,
"step": 1033
},
{
"epoch": 0.6418373680943513,
"grad_norm": 0.3359375,
"learning_rate": 9.044442945099634e-06,
"loss": 0.13309478759765625,
"num_tokens": 82518198.0,
"step": 1034
},
{
"epoch": 0.6424581005586593,
"grad_norm": 0.423828125,
"learning_rate": 9.016765876247445e-06,
"loss": 0.200439453125,
"num_tokens": 82602917.0,
"step": 1035
},
{
"epoch": 0.6430788330229671,
"grad_norm": 0.322265625,
"learning_rate": 8.989113010636918e-06,
"loss": 0.1237640380859375,
"num_tokens": 82689370.0,
"step": 1036
},
{
"epoch": 0.643699565487275,
"grad_norm": 0.490234375,
"learning_rate": 8.96148446012878e-06,
"loss": 0.240997314453125,
"num_tokens": 82762908.0,
"step": 1037
},
{
"epoch": 0.6443202979515829,
"grad_norm": 0.359375,
"learning_rate": 8.933880336485415e-06,
"loss": 0.1269073486328125,
"num_tokens": 82837460.0,
"step": 1038
},
{
"epoch": 0.6449410304158908,
"grad_norm": 0.388671875,
"learning_rate": 8.9063007513704e-06,
"loss": 0.16131591796875,
"num_tokens": 82916638.0,
"step": 1039
},
{
"epoch": 0.6455617628801986,
"grad_norm": 0.462890625,
"learning_rate": 8.878745816348025e-06,
"loss": 0.2454986572265625,
"num_tokens": 83001996.0,
"step": 1040
},
{
"epoch": 0.6461824953445066,
"grad_norm": 0.404296875,
"learning_rate": 8.8512156428829e-06,
"loss": 0.17291259765625,
"num_tokens": 83078821.0,
"step": 1041
},
{
"epoch": 0.6468032278088144,
"grad_norm": 0.4453125,
"learning_rate": 8.823710342339439e-06,
"loss": 0.22509765625,
"num_tokens": 83158631.0,
"step": 1042
},
{
"epoch": 0.6474239602731223,
"grad_norm": 0.36328125,
"learning_rate": 8.796230025981456e-06,
"loss": 0.15559005737304688,
"num_tokens": 83246743.0,
"step": 1043
},
{
"epoch": 0.6480446927374302,
"grad_norm": 0.39453125,
"learning_rate": 8.768774804971705e-06,
"loss": 0.16439056396484375,
"num_tokens": 83324797.0,
"step": 1044
},
{
"epoch": 0.6486654252017381,
"grad_norm": 0.478515625,
"learning_rate": 8.741344790371411e-06,
"loss": 0.2326507568359375,
"num_tokens": 83404222.0,
"step": 1045
},
{
"epoch": 0.6492861576660459,
"grad_norm": 0.41796875,
"learning_rate": 8.713940093139835e-06,
"loss": 0.201171875,
"num_tokens": 83481438.0,
"step": 1046
},
{
"epoch": 0.6499068901303539,
"grad_norm": 0.375,
"learning_rate": 8.686560824133845e-06,
"loss": 0.16651153564453125,
"num_tokens": 83567009.0,
"step": 1047
},
{
"epoch": 0.6505276225946617,
"grad_norm": 0.42578125,
"learning_rate": 8.659207094107421e-06,
"loss": 0.20192718505859375,
"num_tokens": 83644082.0,
"step": 1048
},
{
"epoch": 0.6511483550589696,
"grad_norm": 0.39453125,
"learning_rate": 8.631879013711255e-06,
"loss": 0.170074462890625,
"num_tokens": 83720729.0,
"step": 1049
},
{
"epoch": 0.6517690875232774,
"grad_norm": 0.451171875,
"learning_rate": 8.604576693492269e-06,
"loss": 0.23583984375,
"num_tokens": 83800561.0,
"step": 1050
},
{
"epoch": 0.6523898199875854,
"grad_norm": 0.427734375,
"learning_rate": 8.577300243893173e-06,
"loss": 0.18898773193359375,
"num_tokens": 83873584.0,
"step": 1051
},
{
"epoch": 0.6530105524518932,
"grad_norm": 0.3671875,
"learning_rate": 8.550049775252048e-06,
"loss": 0.15362548828125,
"num_tokens": 83955767.0,
"step": 1052
},
{
"epoch": 0.6536312849162011,
"grad_norm": 0.44921875,
"learning_rate": 8.522825397801864e-06,
"loss": 0.19380950927734375,
"num_tokens": 84032226.0,
"step": 1053
},
{
"epoch": 0.654252017380509,
"grad_norm": 0.41015625,
"learning_rate": 8.495627221670043e-06,
"loss": 0.19615936279296875,
"num_tokens": 84110483.0,
"step": 1054
},
{
"epoch": 0.6548727498448169,
"grad_norm": 0.3515625,
"learning_rate": 8.468455356878027e-06,
"loss": 0.1392669677734375,
"num_tokens": 84188975.0,
"step": 1055
},
{
"epoch": 0.6554934823091247,
"grad_norm": 0.4140625,
"learning_rate": 8.441309913340826e-06,
"loss": 0.211944580078125,
"num_tokens": 84274820.0,
"step": 1056
},
{
"epoch": 0.6561142147734327,
"grad_norm": 0.44140625,
"learning_rate": 8.414191000866566e-06,
"loss": 0.23101806640625,
"num_tokens": 84355077.0,
"step": 1057
},
{
"epoch": 0.6567349472377405,
"grad_norm": 0.408203125,
"learning_rate": 8.387098729156049e-06,
"loss": 0.169464111328125,
"num_tokens": 84433760.0,
"step": 1058
},
{
"epoch": 0.6573556797020484,
"grad_norm": 0.484375,
"learning_rate": 8.360033207802303e-06,
"loss": 0.256561279296875,
"num_tokens": 84513581.0,
"step": 1059
},
{
"epoch": 0.6579764121663563,
"grad_norm": 0.392578125,
"learning_rate": 8.332994546290172e-06,
"loss": 0.1609954833984375,
"num_tokens": 84590465.0,
"step": 1060
},
{
"epoch": 0.6585971446306642,
"grad_norm": 0.435546875,
"learning_rate": 8.305982853995821e-06,
"loss": 0.188507080078125,
"num_tokens": 84669091.0,
"step": 1061
},
{
"epoch": 0.659217877094972,
"grad_norm": 0.416015625,
"learning_rate": 8.278998240186322e-06,
"loss": 0.1766510009765625,
"num_tokens": 84742945.0,
"step": 1062
},
{
"epoch": 0.65983860955928,
"grad_norm": 0.412109375,
"learning_rate": 8.252040814019234e-06,
"loss": 0.1820068359375,
"num_tokens": 84821324.0,
"step": 1063
},
{
"epoch": 0.6604593420235878,
"grad_norm": 0.3984375,
"learning_rate": 8.225110684542102e-06,
"loss": 0.194549560546875,
"num_tokens": 84906422.0,
"step": 1064
},
{
"epoch": 0.6610800744878957,
"grad_norm": 0.38671875,
"learning_rate": 8.198207960692083e-06,
"loss": 0.14788818359375,
"num_tokens": 84983658.0,
"step": 1065
},
{
"epoch": 0.6617008069522036,
"grad_norm": 0.392578125,
"learning_rate": 8.171332751295451e-06,
"loss": 0.170166015625,
"num_tokens": 85066430.0,
"step": 1066
},
{
"epoch": 0.6623215394165115,
"grad_norm": 0.333984375,
"learning_rate": 8.144485165067187e-06,
"loss": 0.13824462890625,
"num_tokens": 85149566.0,
"step": 1067
},
{
"epoch": 0.6629422718808193,
"grad_norm": 0.41015625,
"learning_rate": 8.117665310610544e-06,
"loss": 0.17536163330078125,
"num_tokens": 85232817.0,
"step": 1068
},
{
"epoch": 0.6635630043451273,
"grad_norm": 0.384765625,
"learning_rate": 8.090873296416573e-06,
"loss": 0.177276611328125,
"num_tokens": 85319206.0,
"step": 1069
},
{
"epoch": 0.6641837368094351,
"grad_norm": 0.4140625,
"learning_rate": 8.064109230863711e-06,
"loss": 0.2116546630859375,
"num_tokens": 85403774.0,
"step": 1070
},
{
"epoch": 0.664804469273743,
"grad_norm": 0.408203125,
"learning_rate": 8.037373222217359e-06,
"loss": 0.2097015380859375,
"num_tokens": 85489559.0,
"step": 1071
},
{
"epoch": 0.6654252017380509,
"grad_norm": 0.416015625,
"learning_rate": 8.010665378629394e-06,
"loss": 0.1838836669921875,
"num_tokens": 85573410.0,
"step": 1072
},
{
"epoch": 0.6660459342023588,
"grad_norm": 0.44140625,
"learning_rate": 7.983985808137774e-06,
"loss": 0.2295379638671875,
"num_tokens": 85647620.0,
"step": 1073
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.423828125,
"learning_rate": 7.957334618666096e-06,
"loss": 0.16323089599609375,
"num_tokens": 85728277.0,
"step": 1074
},
{
"epoch": 0.6672873991309746,
"grad_norm": 0.41796875,
"learning_rate": 7.930711918023126e-06,
"loss": 0.1671905517578125,
"num_tokens": 85802882.0,
"step": 1075
},
{
"epoch": 0.6679081315952824,
"grad_norm": 0.41015625,
"learning_rate": 7.904117813902413e-06,
"loss": 0.17621612548828125,
"num_tokens": 85882884.0,
"step": 1076
},
{
"epoch": 0.6685288640595903,
"grad_norm": 0.38671875,
"learning_rate": 7.877552413881824e-06,
"loss": 0.18146896362304688,
"num_tokens": 85962973.0,
"step": 1077
},
{
"epoch": 0.6691495965238982,
"grad_norm": 0.412109375,
"learning_rate": 7.851015825423093e-06,
"loss": 0.18597412109375,
"num_tokens": 86043933.0,
"step": 1078
},
{
"epoch": 0.6697703289882061,
"grad_norm": 0.38671875,
"learning_rate": 7.824508155871431e-06,
"loss": 0.15042877197265625,
"num_tokens": 86118306.0,
"step": 1079
},
{
"epoch": 0.6703910614525139,
"grad_norm": 0.439453125,
"learning_rate": 7.798029512455053e-06,
"loss": 0.19622802734375,
"num_tokens": 86191715.0,
"step": 1080
},
{
"epoch": 0.6710117939168219,
"grad_norm": 0.427734375,
"learning_rate": 7.771580002284752e-06,
"loss": 0.1968994140625,
"num_tokens": 86277394.0,
"step": 1081
},
{
"epoch": 0.6716325263811297,
"grad_norm": 0.44921875,
"learning_rate": 7.745159732353493e-06,
"loss": 0.2355499267578125,
"num_tokens": 86358424.0,
"step": 1082
},
{
"epoch": 0.6722532588454376,
"grad_norm": 0.45703125,
"learning_rate": 7.71876880953594e-06,
"loss": 0.211517333984375,
"num_tokens": 86434095.0,
"step": 1083
},
{
"epoch": 0.6728739913097455,
"grad_norm": 0.4375,
"learning_rate": 7.692407340588055e-06,
"loss": 0.2068023681640625,
"num_tokens": 86510150.0,
"step": 1084
},
{
"epoch": 0.6734947237740534,
"grad_norm": 0.408203125,
"learning_rate": 7.666075432146644e-06,
"loss": 0.1804656982421875,
"num_tokens": 86595432.0,
"step": 1085
},
{
"epoch": 0.6741154562383612,
"grad_norm": 0.40234375,
"learning_rate": 7.639773190728937e-06,
"loss": 0.1710968017578125,
"num_tokens": 86669354.0,
"step": 1086
},
{
"epoch": 0.6747361887026692,
"grad_norm": 0.4296875,
"learning_rate": 7.613500722732162e-06,
"loss": 0.2032012939453125,
"num_tokens": 86749701.0,
"step": 1087
},
{
"epoch": 0.675356921166977,
"grad_norm": 0.357421875,
"learning_rate": 7.587258134433113e-06,
"loss": 0.183990478515625,
"num_tokens": 86833740.0,
"step": 1088
},
{
"epoch": 0.6759776536312849,
"grad_norm": 0.388671875,
"learning_rate": 7.561045531987692e-06,
"loss": 0.158416748046875,
"num_tokens": 86912900.0,
"step": 1089
},
{
"epoch": 0.6765983860955928,
"grad_norm": 0.373046875,
"learning_rate": 7.5348630214305325e-06,
"loss": 0.16168212890625,
"num_tokens": 86997532.0,
"step": 1090
},
{
"epoch": 0.6772191185599007,
"grad_norm": 0.4140625,
"learning_rate": 7.5087107086745144e-06,
"loss": 0.18426513671875,
"num_tokens": 87072421.0,
"step": 1091
},
{
"epoch": 0.6778398510242085,
"grad_norm": 0.357421875,
"learning_rate": 7.4825886995103856e-06,
"loss": 0.15811920166015625,
"num_tokens": 87152464.0,
"step": 1092
},
{
"epoch": 0.6784605834885165,
"grad_norm": 0.44921875,
"learning_rate": 7.456497099606293e-06,
"loss": 0.2179718017578125,
"num_tokens": 87234284.0,
"step": 1093
},
{
"epoch": 0.6790813159528243,
"grad_norm": 0.39453125,
"learning_rate": 7.430436014507373e-06,
"loss": 0.1925048828125,
"num_tokens": 87321567.0,
"step": 1094
},
{
"epoch": 0.6797020484171322,
"grad_norm": 0.3984375,
"learning_rate": 7.4044055496353445e-06,
"loss": 0.19403839111328125,
"num_tokens": 87398585.0,
"step": 1095
},
{
"epoch": 0.68032278088144,
"grad_norm": 0.4140625,
"learning_rate": 7.378405810288035e-06,
"loss": 0.15915679931640625,
"num_tokens": 87478434.0,
"step": 1096
},
{
"epoch": 0.680943513345748,
"grad_norm": 0.390625,
"learning_rate": 7.352436901639005e-06,
"loss": 0.147705078125,
"num_tokens": 87557281.0,
"step": 1097
},
{
"epoch": 0.6815642458100558,
"grad_norm": 0.41796875,
"learning_rate": 7.3264989287370775e-06,
"loss": 0.19734954833984375,
"num_tokens": 87639054.0,
"step": 1098
},
{
"epoch": 0.6821849782743638,
"grad_norm": 0.43359375,
"learning_rate": 7.300591996505955e-06,
"loss": 0.2100677490234375,
"num_tokens": 87721938.0,
"step": 1099
},
{
"epoch": 0.6828057107386716,
"grad_norm": 0.3984375,
"learning_rate": 7.2747162097437684e-06,
"loss": 0.1786956787109375,
"num_tokens": 87808007.0,
"step": 1100
},
{
"epoch": 0.6834264432029795,
"grad_norm": 0.447265625,
"learning_rate": 7.248871673122655e-06,
"loss": 0.1780242919921875,
"num_tokens": 87878005.0,
"step": 1101
},
{
"epoch": 0.6840471756672873,
"grad_norm": 0.419921875,
"learning_rate": 7.223058491188335e-06,
"loss": 0.18431854248046875,
"num_tokens": 87955080.0,
"step": 1102
},
{
"epoch": 0.6846679081315953,
"grad_norm": 0.3828125,
"learning_rate": 7.197276768359712e-06,
"loss": 0.16007232666015625,
"num_tokens": 88032406.0,
"step": 1103
},
{
"epoch": 0.6852886405959032,
"grad_norm": 0.416015625,
"learning_rate": 7.171526608928417e-06,
"loss": 0.1771697998046875,
"num_tokens": 88110736.0,
"step": 1104
},
{
"epoch": 0.685909373060211,
"grad_norm": 0.40625,
"learning_rate": 7.145808117058399e-06,
"loss": 0.1876220703125,
"num_tokens": 88190107.0,
"step": 1105
},
{
"epoch": 0.686530105524519,
"grad_norm": 0.42578125,
"learning_rate": 7.120121396785521e-06,
"loss": 0.20516204833984375,
"num_tokens": 88266901.0,
"step": 1106
},
{
"epoch": 0.6871508379888268,
"grad_norm": 0.41015625,
"learning_rate": 7.094466552017108e-06,
"loss": 0.1861114501953125,
"num_tokens": 88352106.0,
"step": 1107
},
{
"epoch": 0.6877715704531348,
"grad_norm": 0.421875,
"learning_rate": 7.068843686531555e-06,
"loss": 0.18357086181640625,
"num_tokens": 88426358.0,
"step": 1108
},
{
"epoch": 0.6883923029174426,
"grad_norm": 0.4375,
"learning_rate": 7.043252903977895e-06,
"loss": 0.1861419677734375,
"num_tokens": 88503481.0,
"step": 1109
},
{
"epoch": 0.6890130353817505,
"grad_norm": 0.439453125,
"learning_rate": 7.017694307875365e-06,
"loss": 0.1988372802734375,
"num_tokens": 88578451.0,
"step": 1110
},
{
"epoch": 0.6896337678460583,
"grad_norm": 0.419921875,
"learning_rate": 6.992168001613024e-06,
"loss": 0.15110015869140625,
"num_tokens": 88650293.0,
"step": 1111
},
{
"epoch": 0.6902545003103663,
"grad_norm": 0.40234375,
"learning_rate": 6.9666740884492965e-06,
"loss": 0.1735382080078125,
"num_tokens": 88726764.0,
"step": 1112
},
{
"epoch": 0.6908752327746741,
"grad_norm": 0.42578125,
"learning_rate": 6.941212671511569e-06,
"loss": 0.1799774169921875,
"num_tokens": 88806096.0,
"step": 1113
},
{
"epoch": 0.691495965238982,
"grad_norm": 0.431640625,
"learning_rate": 6.9157838537957965e-06,
"loss": 0.18682098388671875,
"num_tokens": 88877547.0,
"step": 1114
},
{
"epoch": 0.6921166977032899,
"grad_norm": 0.427734375,
"learning_rate": 6.890387738166042e-06,
"loss": 0.2119140625,
"num_tokens": 88956193.0,
"step": 1115
},
{
"epoch": 0.6927374301675978,
"grad_norm": 0.404296875,
"learning_rate": 6.8650244273540845e-06,
"loss": 0.1871337890625,
"num_tokens": 89040341.0,
"step": 1116
},
{
"epoch": 0.6933581626319056,
"grad_norm": 0.427734375,
"learning_rate": 6.839694023959016e-06,
"loss": 0.2122344970703125,
"num_tokens": 89117484.0,
"step": 1117
},
{
"epoch": 0.6939788950962136,
"grad_norm": 0.392578125,
"learning_rate": 6.814396630446807e-06,
"loss": 0.183502197265625,
"num_tokens": 89195644.0,
"step": 1118
},
{
"epoch": 0.6945996275605214,
"grad_norm": 0.353515625,
"learning_rate": 6.789132349149886e-06,
"loss": 0.13417816162109375,
"num_tokens": 89273466.0,
"step": 1119
},
{
"epoch": 0.6952203600248293,
"grad_norm": 0.376953125,
"learning_rate": 6.763901282266755e-06,
"loss": 0.1656036376953125,
"num_tokens": 89358256.0,
"step": 1120
},
{
"epoch": 0.6958410924891372,
"grad_norm": 0.392578125,
"learning_rate": 6.738703531861537e-06,
"loss": 0.201568603515625,
"num_tokens": 89441520.0,
"step": 1121
},
{
"epoch": 0.6964618249534451,
"grad_norm": 0.38671875,
"learning_rate": 6.713539199863605e-06,
"loss": 0.15439605712890625,
"num_tokens": 89521371.0,
"step": 1122
},
{
"epoch": 0.6970825574177529,
"grad_norm": 0.412109375,
"learning_rate": 6.688408388067135e-06,
"loss": 0.2021331787109375,
"num_tokens": 89606207.0,
"step": 1123
},
{
"epoch": 0.6977032898820609,
"grad_norm": 0.439453125,
"learning_rate": 6.663311198130705e-06,
"loss": 0.21379852294921875,
"num_tokens": 89686029.0,
"step": 1124
},
{
"epoch": 0.6983240223463687,
"grad_norm": 0.439453125,
"learning_rate": 6.638247731576902e-06,
"loss": 0.2183380126953125,
"num_tokens": 89772289.0,
"step": 1125
},
{
"epoch": 0.6989447548106766,
"grad_norm": 0.419921875,
"learning_rate": 6.6132180897918804e-06,
"loss": 0.1571502685546875,
"num_tokens": 89842683.0,
"step": 1126
},
{
"epoch": 0.6995654872749845,
"grad_norm": 0.408203125,
"learning_rate": 6.5882223740249815e-06,
"loss": 0.19301605224609375,
"num_tokens": 89921966.0,
"step": 1127
},
{
"epoch": 0.7001862197392924,
"grad_norm": 0.3671875,
"learning_rate": 6.563260685388291e-06,
"loss": 0.1480255126953125,
"num_tokens": 89998974.0,
"step": 1128
},
{
"epoch": 0.7008069522036002,
"grad_norm": 0.392578125,
"learning_rate": 6.5383331248562665e-06,
"loss": 0.1881103515625,
"num_tokens": 90085169.0,
"step": 1129
},
{
"epoch": 0.7014276846679082,
"grad_norm": 0.4375,
"learning_rate": 6.513439793265311e-06,
"loss": 0.22406005859375,
"num_tokens": 90166745.0,
"step": 1130
},
{
"epoch": 0.702048417132216,
"grad_norm": 0.375,
"learning_rate": 6.488580791313354e-06,
"loss": 0.122772216796875,
"num_tokens": 90249785.0,
"step": 1131
},
{
"epoch": 0.7026691495965239,
"grad_norm": 0.396484375,
"learning_rate": 6.4637562195594555e-06,
"loss": 0.19120025634765625,
"num_tokens": 90336741.0,
"step": 1132
},
{
"epoch": 0.7032898820608318,
"grad_norm": 0.4765625,
"learning_rate": 6.438966178423413e-06,
"loss": 0.23110198974609375,
"num_tokens": 90416540.0,
"step": 1133
},
{
"epoch": 0.7039106145251397,
"grad_norm": 0.40234375,
"learning_rate": 6.414210768185326e-06,
"loss": 0.18560791015625,
"num_tokens": 90498123.0,
"step": 1134
},
{
"epoch": 0.7045313469894475,
"grad_norm": 0.455078125,
"learning_rate": 6.389490088985222e-06,
"loss": 0.1893463134765625,
"num_tokens": 90576624.0,
"step": 1135
},
{
"epoch": 0.7051520794537555,
"grad_norm": 0.41796875,
"learning_rate": 6.36480424082262e-06,
"loss": 0.190521240234375,
"num_tokens": 90654821.0,
"step": 1136
},
{
"epoch": 0.7057728119180633,
"grad_norm": 0.4296875,
"learning_rate": 6.340153323556144e-06,
"loss": 0.18609619140625,
"num_tokens": 90727103.0,
"step": 1137
},
{
"epoch": 0.7063935443823712,
"grad_norm": 0.3984375,
"learning_rate": 6.315537436903132e-06,
"loss": 0.16522216796875,
"num_tokens": 90805695.0,
"step": 1138
},
{
"epoch": 0.7070142768466791,
"grad_norm": 0.400390625,
"learning_rate": 6.29095668043919e-06,
"loss": 0.164306640625,
"num_tokens": 90882306.0,
"step": 1139
},
{
"epoch": 0.707635009310987,
"grad_norm": 0.376953125,
"learning_rate": 6.266411153597841e-06,
"loss": 0.1566009521484375,
"num_tokens": 90956916.0,
"step": 1140
},
{
"epoch": 0.7082557417752948,
"grad_norm": 0.345703125,
"learning_rate": 6.24190095567009e-06,
"loss": 0.15454864501953125,
"num_tokens": 91047860.0,
"step": 1141
},
{
"epoch": 0.7088764742396028,
"grad_norm": 0.33203125,
"learning_rate": 6.217426185804024e-06,
"loss": 0.1237030029296875,
"num_tokens": 91131173.0,
"step": 1142
},
{
"epoch": 0.7094972067039106,
"grad_norm": 0.42578125,
"learning_rate": 6.192986943004418e-06,
"loss": 0.1826324462890625,
"num_tokens": 91209571.0,
"step": 1143
},
{
"epoch": 0.7101179391682185,
"grad_norm": 0.404296875,
"learning_rate": 6.168583326132346e-06,
"loss": 0.1811981201171875,
"num_tokens": 91285031.0,
"step": 1144
},
{
"epoch": 0.7107386716325264,
"grad_norm": 0.451171875,
"learning_rate": 6.1442154339047506e-06,
"loss": 0.231231689453125,
"num_tokens": 91371948.0,
"step": 1145
},
{
"epoch": 0.7113594040968343,
"grad_norm": 0.443359375,
"learning_rate": 6.11988336489408e-06,
"loss": 0.2124786376953125,
"num_tokens": 91448538.0,
"step": 1146
},
{
"epoch": 0.7119801365611421,
"grad_norm": 0.330078125,
"learning_rate": 6.09558721752786e-06,
"loss": 0.1241607666015625,
"num_tokens": 91525226.0,
"step": 1147
},
{
"epoch": 0.7126008690254501,
"grad_norm": 0.451171875,
"learning_rate": 6.071327090088302e-06,
"loss": 0.22678375244140625,
"num_tokens": 91601221.0,
"step": 1148
},
{
"epoch": 0.7132216014897579,
"grad_norm": 0.453125,
"learning_rate": 6.047103080711925e-06,
"loss": 0.224884033203125,
"num_tokens": 91682422.0,
"step": 1149
},
{
"epoch": 0.7138423339540658,
"grad_norm": 0.42578125,
"learning_rate": 6.02291528738914e-06,
"loss": 0.17754364013671875,
"num_tokens": 91755328.0,
"step": 1150
},
{
"epoch": 0.7144630664183736,
"grad_norm": 0.412109375,
"learning_rate": 5.998763807963846e-06,
"loss": 0.1867523193359375,
"num_tokens": 91833868.0,
"step": 1151
},
{
"epoch": 0.7150837988826816,
"grad_norm": 0.416015625,
"learning_rate": 5.974648740133065e-06,
"loss": 0.208709716796875,
"num_tokens": 91916714.0,
"step": 1152
},
{
"epoch": 0.7157045313469894,
"grad_norm": 0.408203125,
"learning_rate": 5.950570181446507e-06,
"loss": 0.1851043701171875,
"num_tokens": 91991942.0,
"step": 1153
},
{
"epoch": 0.7163252638112974,
"grad_norm": 0.439453125,
"learning_rate": 5.926528229306215e-06,
"loss": 0.248809814453125,
"num_tokens": 92077182.0,
"step": 1154
},
{
"epoch": 0.7169459962756052,
"grad_norm": 0.408203125,
"learning_rate": 5.902522980966138e-06,
"loss": 0.18105316162109375,
"num_tokens": 92153100.0,
"step": 1155
},
{
"epoch": 0.7175667287399131,
"grad_norm": 0.39453125,
"learning_rate": 5.878554533531753e-06,
"loss": 0.16986083984375,
"num_tokens": 92237435.0,
"step": 1156
},
{
"epoch": 0.718187461204221,
"grad_norm": 0.396484375,
"learning_rate": 5.854622983959682e-06,
"loss": 0.16733551025390625,
"num_tokens": 92317774.0,
"step": 1157
},
{
"epoch": 0.7188081936685289,
"grad_norm": 0.419921875,
"learning_rate": 5.830728429057282e-06,
"loss": 0.208831787109375,
"num_tokens": 92405245.0,
"step": 1158
},
{
"epoch": 0.7194289261328367,
"grad_norm": 0.443359375,
"learning_rate": 5.80687096548225e-06,
"loss": 0.21135711669921875,
"num_tokens": 92488878.0,
"step": 1159
},
{
"epoch": 0.7200496585971446,
"grad_norm": 0.46875,
"learning_rate": 5.783050689742255e-06,
"loss": 0.2313690185546875,
"num_tokens": 92565304.0,
"step": 1160
},
{
"epoch": 0.7206703910614525,
"grad_norm": 0.412109375,
"learning_rate": 5.759267698194535e-06,
"loss": 0.18932342529296875,
"num_tokens": 92648605.0,
"step": 1161
},
{
"epoch": 0.7212911235257604,
"grad_norm": 0.47265625,
"learning_rate": 5.735522087045506e-06,
"loss": 0.212432861328125,
"num_tokens": 92722662.0,
"step": 1162
},
{
"epoch": 0.7219118559900682,
"grad_norm": 0.3828125,
"learning_rate": 5.711813952350362e-06,
"loss": 0.15892791748046875,
"num_tokens": 92798917.0,
"step": 1163
},
{
"epoch": 0.7225325884543762,
"grad_norm": 0.43359375,
"learning_rate": 5.688143390012709e-06,
"loss": 0.225738525390625,
"num_tokens": 92884483.0,
"step": 1164
},
{
"epoch": 0.723153320918684,
"grad_norm": 0.5,
"learning_rate": 5.6645104957841655e-06,
"loss": 0.2500152587890625,
"num_tokens": 92968061.0,
"step": 1165
},
{
"epoch": 0.7237740533829919,
"grad_norm": 0.349609375,
"learning_rate": 5.6409153652639755e-06,
"loss": 0.11112594604492188,
"num_tokens": 93041596.0,
"step": 1166
},
{
"epoch": 0.7243947858472998,
"grad_norm": 0.404296875,
"learning_rate": 5.617358093898613e-06,
"loss": 0.192626953125,
"num_tokens": 93118305.0,
"step": 1167
},
{
"epoch": 0.7250155183116077,
"grad_norm": 0.388671875,
"learning_rate": 5.593838776981421e-06,
"loss": 0.15001678466796875,
"num_tokens": 93195446.0,
"step": 1168
},
{
"epoch": 0.7256362507759155,
"grad_norm": 0.392578125,
"learning_rate": 5.570357509652195e-06,
"loss": 0.188720703125,
"num_tokens": 93278206.0,
"step": 1169
},
{
"epoch": 0.7262569832402235,
"grad_norm": 0.4140625,
"learning_rate": 5.546914386896822e-06,
"loss": 0.193389892578125,
"num_tokens": 93357263.0,
"step": 1170
},
{
"epoch": 0.7268777157045313,
"grad_norm": 0.400390625,
"learning_rate": 5.523509503546893e-06,
"loss": 0.1725616455078125,
"num_tokens": 93431226.0,
"step": 1171
},
{
"epoch": 0.7274984481688392,
"grad_norm": 0.4296875,
"learning_rate": 5.500142954279293e-06,
"loss": 0.1910552978515625,
"num_tokens": 93504707.0,
"step": 1172
},
{
"epoch": 0.7281191806331471,
"grad_norm": 0.421875,
"learning_rate": 5.4768148336158655e-06,
"loss": 0.1892547607421875,
"num_tokens": 93582811.0,
"step": 1173
},
{
"epoch": 0.728739913097455,
"grad_norm": 0.431640625,
"learning_rate": 5.453525235922982e-06,
"loss": 0.20801544189453125,
"num_tokens": 93666160.0,
"step": 1174
},
{
"epoch": 0.7293606455617628,
"grad_norm": 0.484375,
"learning_rate": 5.430274255411186e-06,
"loss": 0.249267578125,
"num_tokens": 93742826.0,
"step": 1175
},
{
"epoch": 0.7299813780260708,
"grad_norm": 0.412109375,
"learning_rate": 5.407061986134821e-06,
"loss": 0.199371337890625,
"num_tokens": 93826373.0,
"step": 1176
},
{
"epoch": 0.7306021104903786,
"grad_norm": 0.416015625,
"learning_rate": 5.383888521991622e-06,
"loss": 0.200042724609375,
"num_tokens": 93908597.0,
"step": 1177
},
{
"epoch": 0.7312228429546865,
"grad_norm": 0.416015625,
"learning_rate": 5.36075395672235e-06,
"loss": 0.214691162109375,
"num_tokens": 93991198.0,
"step": 1178
},
{
"epoch": 0.7318435754189944,
"grad_norm": 0.427734375,
"learning_rate": 5.337658383910432e-06,
"loss": 0.1941070556640625,
"num_tokens": 94069378.0,
"step": 1179
},
{
"epoch": 0.7324643078833023,
"grad_norm": 0.43359375,
"learning_rate": 5.314601896981535e-06,
"loss": 0.1955413818359375,
"num_tokens": 94146061.0,
"step": 1180
},
{
"epoch": 0.7330850403476101,
"grad_norm": 0.39453125,
"learning_rate": 5.291584589203242e-06,
"loss": 0.1679840087890625,
"num_tokens": 94226271.0,
"step": 1181
},
{
"epoch": 0.7337057728119181,
"grad_norm": 0.4609375,
"learning_rate": 5.26860655368464e-06,
"loss": 0.20273590087890625,
"num_tokens": 94302823.0,
"step": 1182
},
{
"epoch": 0.7343265052762259,
"grad_norm": 0.3984375,
"learning_rate": 5.245667883375945e-06,
"loss": 0.18761444091796875,
"num_tokens": 94385790.0,
"step": 1183
},
{
"epoch": 0.7349472377405338,
"grad_norm": 0.39453125,
"learning_rate": 5.2227686710681495e-06,
"loss": 0.186309814453125,
"num_tokens": 94468345.0,
"step": 1184
},
{
"epoch": 0.7355679702048417,
"grad_norm": 0.369140625,
"learning_rate": 5.199909009392618e-06,
"loss": 0.17969894409179688,
"num_tokens": 94560952.0,
"step": 1185
},
{
"epoch": 0.7361887026691496,
"grad_norm": 0.404296875,
"learning_rate": 5.177088990820725e-06,
"loss": 0.17291259765625,
"num_tokens": 94642925.0,
"step": 1186
},
{
"epoch": 0.7368094351334574,
"grad_norm": 0.404296875,
"learning_rate": 5.154308707663497e-06,
"loss": 0.1746978759765625,
"num_tokens": 94725017.0,
"step": 1187
},
{
"epoch": 0.7374301675977654,
"grad_norm": 0.353515625,
"learning_rate": 5.131568252071204e-06,
"loss": 0.1286163330078125,
"num_tokens": 94803603.0,
"step": 1188
},
{
"epoch": 0.7380509000620733,
"grad_norm": 0.4453125,
"learning_rate": 5.108867716033028e-06,
"loss": 0.1925506591796875,
"num_tokens": 94876440.0,
"step": 1189
},
{
"epoch": 0.7386716325263811,
"grad_norm": 0.447265625,
"learning_rate": 5.086207191376645e-06,
"loss": 0.223907470703125,
"num_tokens": 94952624.0,
"step": 1190
},
{
"epoch": 0.7392923649906891,
"grad_norm": 0.421875,
"learning_rate": 5.063586769767896e-06,
"loss": 0.18677520751953125,
"num_tokens": 95028937.0,
"step": 1191
},
{
"epoch": 0.7399130974549969,
"grad_norm": 0.3515625,
"learning_rate": 5.041006542710396e-06,
"loss": 0.12854766845703125,
"num_tokens": 95109395.0,
"step": 1192
},
{
"epoch": 0.7405338299193048,
"grad_norm": 0.369140625,
"learning_rate": 5.018466601545162e-06,
"loss": 0.145904541015625,
"num_tokens": 95186310.0,
"step": 1193
},
{
"epoch": 0.7411545623836127,
"grad_norm": 0.423828125,
"learning_rate": 4.995967037450238e-06,
"loss": 0.22533035278320312,
"num_tokens": 95271733.0,
"step": 1194
},
{
"epoch": 0.7417752948479206,
"grad_norm": 0.4609375,
"learning_rate": 4.973507941440357e-06,
"loss": 0.2373199462890625,
"num_tokens": 95349099.0,
"step": 1195
},
{
"epoch": 0.7423960273122284,
"grad_norm": 0.380859375,
"learning_rate": 4.9510894043665315e-06,
"loss": 0.140869140625,
"num_tokens": 95426779.0,
"step": 1196
},
{
"epoch": 0.7430167597765364,
"grad_norm": 0.4140625,
"learning_rate": 4.928711516915723e-06,
"loss": 0.19592666625976562,
"num_tokens": 95511284.0,
"step": 1197
},
{
"epoch": 0.7436374922408442,
"grad_norm": 0.421875,
"learning_rate": 4.906374369610443e-06,
"loss": 0.18447113037109375,
"num_tokens": 95587613.0,
"step": 1198
},
{
"epoch": 0.7442582247051521,
"grad_norm": 0.4140625,
"learning_rate": 4.884078052808405e-06,
"loss": 0.1688995361328125,
"num_tokens": 95664193.0,
"step": 1199
},
{
"epoch": 0.74487895716946,
"grad_norm": 0.353515625,
"learning_rate": 4.861822656702169e-06,
"loss": 0.13661956787109375,
"num_tokens": 95747007.0,
"step": 1200
},
{
"epoch": 0.7454996896337679,
"grad_norm": 0.47265625,
"learning_rate": 4.839608271318742e-06,
"loss": 0.2079925537109375,
"num_tokens": 95822933.0,
"step": 1201
},
{
"epoch": 0.7461204220980757,
"grad_norm": 0.36328125,
"learning_rate": 4.817434986519255e-06,
"loss": 0.14505767822265625,
"num_tokens": 95904324.0,
"step": 1202
},
{
"epoch": 0.7467411545623837,
"grad_norm": 0.4375,
"learning_rate": 4.795302891998574e-06,
"loss": 0.18990325927734375,
"num_tokens": 95982974.0,
"step": 1203
},
{
"epoch": 0.7473618870266915,
"grad_norm": 0.357421875,
"learning_rate": 4.7732120772849325e-06,
"loss": 0.1411285400390625,
"num_tokens": 96065625.0,
"step": 1204
},
{
"epoch": 0.7479826194909994,
"grad_norm": 0.390625,
"learning_rate": 4.751162631739599e-06,
"loss": 0.17435455322265625,
"num_tokens": 96141267.0,
"step": 1205
},
{
"epoch": 0.7486033519553073,
"grad_norm": 0.44140625,
"learning_rate": 4.7291546445564775e-06,
"loss": 0.238250732421875,
"num_tokens": 96222390.0,
"step": 1206
},
{
"epoch": 0.7492240844196152,
"grad_norm": 0.458984375,
"learning_rate": 4.707188204761772e-06,
"loss": 0.21509170532226562,
"num_tokens": 96302319.0,
"step": 1207
},
{
"epoch": 0.749844816883923,
"grad_norm": 0.42578125,
"learning_rate": 4.685263401213629e-06,
"loss": 0.217193603515625,
"num_tokens": 96383023.0,
"step": 1208
},
{
"epoch": 0.750465549348231,
"grad_norm": 0.458984375,
"learning_rate": 4.66338032260176e-06,
"loss": 0.2308349609375,
"num_tokens": 96462459.0,
"step": 1209
},
{
"epoch": 0.7510862818125388,
"grad_norm": 0.384765625,
"learning_rate": 4.641539057447085e-06,
"loss": 0.18218994140625,
"num_tokens": 96541548.0,
"step": 1210
},
{
"epoch": 0.7517070142768467,
"grad_norm": 0.408203125,
"learning_rate": 4.619739694101398e-06,
"loss": 0.1999664306640625,
"num_tokens": 96622820.0,
"step": 1211
},
{
"epoch": 0.7523277467411545,
"grad_norm": 0.365234375,
"learning_rate": 4.597982320746985e-06,
"loss": 0.16184234619140625,
"num_tokens": 96709512.0,
"step": 1212
},
{
"epoch": 0.7529484792054625,
"grad_norm": 0.3984375,
"learning_rate": 4.57626702539627e-06,
"loss": 0.168853759765625,
"num_tokens": 96787786.0,
"step": 1213
},
{
"epoch": 0.7535692116697703,
"grad_norm": 0.380859375,
"learning_rate": 4.554593895891473e-06,
"loss": 0.1801605224609375,
"num_tokens": 96877877.0,
"step": 1214
},
{
"epoch": 0.7541899441340782,
"grad_norm": 0.46875,
"learning_rate": 4.532963019904237e-06,
"loss": 0.2055816650390625,
"num_tokens": 96950419.0,
"step": 1215
},
{
"epoch": 0.7548106765983861,
"grad_norm": 0.412109375,
"learning_rate": 4.5113744849352894e-06,
"loss": 0.1885986328125,
"num_tokens": 97033679.0,
"step": 1216
},
{
"epoch": 0.755431409062694,
"grad_norm": 0.431640625,
"learning_rate": 4.489828378314077e-06,
"loss": 0.2092742919921875,
"num_tokens": 97117388.0,
"step": 1217
},
{
"epoch": 0.7560521415270018,
"grad_norm": 0.365234375,
"learning_rate": 4.468324787198412e-06,
"loss": 0.146820068359375,
"num_tokens": 97197805.0,
"step": 1218
},
{
"epoch": 0.7566728739913098,
"grad_norm": 0.3984375,
"learning_rate": 4.446863798574136e-06,
"loss": 0.18268585205078125,
"num_tokens": 97274498.0,
"step": 1219
},
{
"epoch": 0.7572936064556176,
"grad_norm": 0.328125,
"learning_rate": 4.425445499254745e-06,
"loss": 0.11944580078125,
"num_tokens": 97353682.0,
"step": 1220
},
{
"epoch": 0.7579143389199255,
"grad_norm": 0.4375,
"learning_rate": 4.40406997588105e-06,
"loss": 0.2086029052734375,
"num_tokens": 97437612.0,
"step": 1221
},
{
"epoch": 0.7585350713842334,
"grad_norm": 0.447265625,
"learning_rate": 4.38273731492083e-06,
"loss": 0.20543289184570312,
"num_tokens": 97511523.0,
"step": 1222
},
{
"epoch": 0.7591558038485413,
"grad_norm": 0.396484375,
"learning_rate": 4.361447602668479e-06,
"loss": 0.19820404052734375,
"num_tokens": 97592227.0,
"step": 1223
},
{
"epoch": 0.7597765363128491,
"grad_norm": 0.4140625,
"learning_rate": 4.340200925244659e-06,
"loss": 0.1703033447265625,
"num_tokens": 97666467.0,
"step": 1224
},
{
"epoch": 0.7603972687771571,
"grad_norm": 0.44921875,
"learning_rate": 4.3189973685959345e-06,
"loss": 0.2415924072265625,
"num_tokens": 97744282.0,
"step": 1225
},
{
"epoch": 0.7610180012414649,
"grad_norm": 0.369140625,
"learning_rate": 4.297837018494445e-06,
"loss": 0.1400146484375,
"num_tokens": 97821398.0,
"step": 1226
},
{
"epoch": 0.7616387337057728,
"grad_norm": 0.41796875,
"learning_rate": 4.276719960537565e-06,
"loss": 0.1937255859375,
"num_tokens": 97900872.0,
"step": 1227
},
{
"epoch": 0.7622594661700807,
"grad_norm": 0.353515625,
"learning_rate": 4.255646280147526e-06,
"loss": 0.15023040771484375,
"num_tokens": 97982087.0,
"step": 1228
},
{
"epoch": 0.7628801986343886,
"grad_norm": 0.419921875,
"learning_rate": 4.234616062571094e-06,
"loss": 0.2020111083984375,
"num_tokens": 98064001.0,
"step": 1229
},
{
"epoch": 0.7635009310986964,
"grad_norm": 0.42578125,
"learning_rate": 4.213629392879233e-06,
"loss": 0.19549560546875,
"num_tokens": 98144451.0,
"step": 1230
},
{
"epoch": 0.7641216635630044,
"grad_norm": 0.40234375,
"learning_rate": 4.192686355966729e-06,
"loss": 0.1761016845703125,
"num_tokens": 98226114.0,
"step": 1231
},
{
"epoch": 0.7647423960273122,
"grad_norm": 0.421875,
"learning_rate": 4.171787036551886e-06,
"loss": 0.19824981689453125,
"num_tokens": 98302072.0,
"step": 1232
},
{
"epoch": 0.7653631284916201,
"grad_norm": 0.408203125,
"learning_rate": 4.150931519176141e-06,
"loss": 0.205413818359375,
"num_tokens": 98386088.0,
"step": 1233
},
{
"epoch": 0.765983860955928,
"grad_norm": 0.400390625,
"learning_rate": 4.1301198882037595e-06,
"loss": 0.1674957275390625,
"num_tokens": 98470647.0,
"step": 1234
},
{
"epoch": 0.7666045934202359,
"grad_norm": 0.400390625,
"learning_rate": 4.109352227821482e-06,
"loss": 0.1761627197265625,
"num_tokens": 98547612.0,
"step": 1235
},
{
"epoch": 0.7672253258845437,
"grad_norm": 0.408203125,
"learning_rate": 4.088628622038165e-06,
"loss": 0.18512725830078125,
"num_tokens": 98627105.0,
"step": 1236
},
{
"epoch": 0.7678460583488517,
"grad_norm": 0.439453125,
"learning_rate": 4.067949154684462e-06,
"loss": 0.2196044921875,
"num_tokens": 98706232.0,
"step": 1237
},
{
"epoch": 0.7684667908131595,
"grad_norm": 0.384765625,
"learning_rate": 4.047313909412488e-06,
"loss": 0.1661376953125,
"num_tokens": 98791923.0,
"step": 1238
},
{
"epoch": 0.7690875232774674,
"grad_norm": 0.431640625,
"learning_rate": 4.026722969695462e-06,
"loss": 0.2099761962890625,
"num_tokens": 98872645.0,
"step": 1239
},
{
"epoch": 0.7697082557417753,
"grad_norm": 0.3828125,
"learning_rate": 4.0061764188273755e-06,
"loss": 0.1783294677734375,
"num_tokens": 98954387.0,
"step": 1240
},
{
"epoch": 0.7703289882060832,
"grad_norm": 0.451171875,
"learning_rate": 3.985674339922674e-06,
"loss": 0.2251434326171875,
"num_tokens": 99033442.0,
"step": 1241
},
{
"epoch": 0.770949720670391,
"grad_norm": 0.421875,
"learning_rate": 3.965216815915891e-06,
"loss": 0.18013763427734375,
"num_tokens": 99111922.0,
"step": 1242
},
{
"epoch": 0.771570453134699,
"grad_norm": 0.44140625,
"learning_rate": 3.944803929561336e-06,
"loss": 0.1980133056640625,
"num_tokens": 99195722.0,
"step": 1243
},
{
"epoch": 0.7721911855990068,
"grad_norm": 0.396484375,
"learning_rate": 3.924435763432755e-06,
"loss": 0.174835205078125,
"num_tokens": 99281344.0,
"step": 1244
},
{
"epoch": 0.7728119180633147,
"grad_norm": 0.43359375,
"learning_rate": 3.904112399922981e-06,
"loss": 0.19427490234375,
"num_tokens": 99364276.0,
"step": 1245
},
{
"epoch": 0.7734326505276226,
"grad_norm": 0.3984375,
"learning_rate": 3.883833921243622e-06,
"loss": 0.16762542724609375,
"num_tokens": 99448144.0,
"step": 1246
},
{
"epoch": 0.7740533829919305,
"grad_norm": 0.39453125,
"learning_rate": 3.863600409424716e-06,
"loss": 0.1667022705078125,
"num_tokens": 99532641.0,
"step": 1247
},
{
"epoch": 0.7746741154562383,
"grad_norm": 0.416015625,
"learning_rate": 3.8434119463143935e-06,
"loss": 0.21457672119140625,
"num_tokens": 99617767.0,
"step": 1248
},
{
"epoch": 0.7752948479205463,
"grad_norm": 0.390625,
"learning_rate": 3.823268613578576e-06,
"loss": 0.17430877685546875,
"num_tokens": 99695368.0,
"step": 1249
},
{
"epoch": 0.7759155803848541,
"grad_norm": 0.36328125,
"learning_rate": 3.8031704927005983e-06,
"loss": 0.17668914794921875,
"num_tokens": 99778699.0,
"step": 1250
},
{
"epoch": 0.776536312849162,
"grad_norm": 0.4296875,
"learning_rate": 3.783117664980932e-06,
"loss": 0.18178558349609375,
"num_tokens": 99859725.0,
"step": 1251
},
{
"epoch": 0.7771570453134699,
"grad_norm": 0.3828125,
"learning_rate": 3.7631102115368087e-06,
"loss": 0.16890716552734375,
"num_tokens": 99944743.0,
"step": 1252
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.384765625,
"learning_rate": 3.743148213301919e-06,
"loss": 0.18173980712890625,
"num_tokens": 100033423.0,
"step": 1253
},
{
"epoch": 0.7783985102420856,
"grad_norm": 0.466796875,
"learning_rate": 3.7232317510260825e-06,
"loss": 0.24291229248046875,
"num_tokens": 100113246.0,
"step": 1254
},
{
"epoch": 0.7790192427063936,
"grad_norm": 0.421875,
"learning_rate": 3.703360905274924e-06,
"loss": 0.16143798828125,
"num_tokens": 100186995.0,
"step": 1255
},
{
"epoch": 0.7796399751707014,
"grad_norm": 0.443359375,
"learning_rate": 3.683535756429523e-06,
"loss": 0.221038818359375,
"num_tokens": 100271754.0,
"step": 1256
},
{
"epoch": 0.7802607076350093,
"grad_norm": 0.455078125,
"learning_rate": 3.6637563846861278e-06,
"loss": 0.1905517578125,
"num_tokens": 100345576.0,
"step": 1257
},
{
"epoch": 0.7808814400993171,
"grad_norm": 0.431640625,
"learning_rate": 3.6440228700557893e-06,
"loss": 0.206390380859375,
"num_tokens": 100426619.0,
"step": 1258
},
{
"epoch": 0.7815021725636251,
"grad_norm": 0.3828125,
"learning_rate": 3.624335292364077e-06,
"loss": 0.1775665283203125,
"num_tokens": 100514871.0,
"step": 1259
},
{
"epoch": 0.7821229050279329,
"grad_norm": 0.486328125,
"learning_rate": 3.6046937312507296e-06,
"loss": 0.2408599853515625,
"num_tokens": 100596087.0,
"step": 1260
},
{
"epoch": 0.7827436374922409,
"grad_norm": 0.41015625,
"learning_rate": 3.58509826616933e-06,
"loss": 0.19145965576171875,
"num_tokens": 100679129.0,
"step": 1261
},
{
"epoch": 0.7833643699565487,
"grad_norm": 0.34765625,
"learning_rate": 3.565548976387018e-06,
"loss": 0.15478515625,
"num_tokens": 100765813.0,
"step": 1262
},
{
"epoch": 0.7839851024208566,
"grad_norm": 0.41015625,
"learning_rate": 3.546045940984123e-06,
"loss": 0.1922149658203125,
"num_tokens": 100850232.0,
"step": 1263
},
{
"epoch": 0.7846058348851644,
"grad_norm": 0.40625,
"learning_rate": 3.5265892388538795e-06,
"loss": 0.17694091796875,
"num_tokens": 100921417.0,
"step": 1264
},
{
"epoch": 0.7852265673494724,
"grad_norm": 0.39453125,
"learning_rate": 3.5071789487021017e-06,
"loss": 0.17737579345703125,
"num_tokens": 101003910.0,
"step": 1265
},
{
"epoch": 0.7858472998137802,
"grad_norm": 0.45703125,
"learning_rate": 3.487815149046838e-06,
"loss": 0.22576904296875,
"num_tokens": 101087743.0,
"step": 1266
},
{
"epoch": 0.7864680322780881,
"grad_norm": 0.40625,
"learning_rate": 3.4684979182180976e-06,
"loss": 0.1746368408203125,
"num_tokens": 101172824.0,
"step": 1267
},
{
"epoch": 0.787088764742396,
"grad_norm": 0.462890625,
"learning_rate": 3.4492273343574972e-06,
"loss": 0.2132110595703125,
"num_tokens": 101251894.0,
"step": 1268
},
{
"epoch": 0.7877094972067039,
"grad_norm": 0.42578125,
"learning_rate": 3.4300034754179555e-06,
"loss": 0.2190704345703125,
"num_tokens": 101331559.0,
"step": 1269
},
{
"epoch": 0.7883302296710117,
"grad_norm": 0.4140625,
"learning_rate": 3.4108264191633946e-06,
"loss": 0.194610595703125,
"num_tokens": 101410420.0,
"step": 1270
},
{
"epoch": 0.7889509621353197,
"grad_norm": 0.490234375,
"learning_rate": 3.3916962431684017e-06,
"loss": 0.271148681640625,
"num_tokens": 101492994.0,
"step": 1271
},
{
"epoch": 0.7895716945996276,
"grad_norm": 0.43359375,
"learning_rate": 3.37261302481792e-06,
"loss": 0.18902587890625,
"num_tokens": 101570639.0,
"step": 1272
},
{
"epoch": 0.7901924270639354,
"grad_norm": 0.4609375,
"learning_rate": 3.3535768413069547e-06,
"loss": 0.198089599609375,
"num_tokens": 101642573.0,
"step": 1273
},
{
"epoch": 0.7908131595282434,
"grad_norm": 0.392578125,
"learning_rate": 3.3345877696402347e-06,
"loss": 0.165008544921875,
"num_tokens": 101723678.0,
"step": 1274
},
{
"epoch": 0.7914338919925512,
"grad_norm": 0.380859375,
"learning_rate": 3.3156458866319195e-06,
"loss": 0.1746673583984375,
"num_tokens": 101808849.0,
"step": 1275
},
{
"epoch": 0.7920546244568591,
"grad_norm": 0.39453125,
"learning_rate": 3.2967512689052844e-06,
"loss": 0.16193389892578125,
"num_tokens": 101882091.0,
"step": 1276
},
{
"epoch": 0.792675356921167,
"grad_norm": 0.41796875,
"learning_rate": 3.2779039928923967e-06,
"loss": 0.20468902587890625,
"num_tokens": 101961594.0,
"step": 1277
},
{
"epoch": 0.7932960893854749,
"grad_norm": 0.38671875,
"learning_rate": 3.2591041348338334e-06,
"loss": 0.1653900146484375,
"num_tokens": 102043847.0,
"step": 1278
},
{
"epoch": 0.7939168218497827,
"grad_norm": 0.375,
"learning_rate": 3.2403517707783476e-06,
"loss": 0.14620208740234375,
"num_tokens": 102120706.0,
"step": 1279
},
{
"epoch": 0.7945375543140907,
"grad_norm": 0.361328125,
"learning_rate": 3.221646976582568e-06,
"loss": 0.14786529541015625,
"num_tokens": 102207470.0,
"step": 1280
},
{
"epoch": 0.7951582867783985,
"grad_norm": 0.427734375,
"learning_rate": 3.2029898279107062e-06,
"loss": 0.20794677734375,
"num_tokens": 102291117.0,
"step": 1281
},
{
"epoch": 0.7957790192427064,
"grad_norm": 0.3828125,
"learning_rate": 3.1843804002342296e-06,
"loss": 0.1543731689453125,
"num_tokens": 102374864.0,
"step": 1282
},
{
"epoch": 0.7963997517070143,
"grad_norm": 0.3671875,
"learning_rate": 3.1658187688315654e-06,
"loss": 0.12654876708984375,
"num_tokens": 102451005.0,
"step": 1283
},
{
"epoch": 0.7970204841713222,
"grad_norm": 0.388671875,
"learning_rate": 3.147305008787805e-06,
"loss": 0.153411865234375,
"num_tokens": 102529847.0,
"step": 1284
},
{
"epoch": 0.79764121663563,
"grad_norm": 0.41015625,
"learning_rate": 3.1288391949943825e-06,
"loss": 0.1969146728515625,
"num_tokens": 102606936.0,
"step": 1285
},
{
"epoch": 0.798261949099938,
"grad_norm": 0.44921875,
"learning_rate": 3.110421402148793e-06,
"loss": 0.2253570556640625,
"num_tokens": 102693887.0,
"step": 1286
},
{
"epoch": 0.7988826815642458,
"grad_norm": 0.353515625,
"learning_rate": 3.0920517047542656e-06,
"loss": 0.14318084716796875,
"num_tokens": 102784300.0,
"step": 1287
},
{
"epoch": 0.7995034140285537,
"grad_norm": 0.4140625,
"learning_rate": 3.0737301771194744e-06,
"loss": 0.1566314697265625,
"num_tokens": 102855191.0,
"step": 1288
},
{
"epoch": 0.8001241464928616,
"grad_norm": 0.455078125,
"learning_rate": 3.0554568933582505e-06,
"loss": 0.226654052734375,
"num_tokens": 102934610.0,
"step": 1289
},
{
"epoch": 0.8007448789571695,
"grad_norm": 0.416015625,
"learning_rate": 3.0372319273892606e-06,
"loss": 0.20951080322265625,
"num_tokens": 103017237.0,
"step": 1290
},
{
"epoch": 0.8013656114214773,
"grad_norm": 0.416015625,
"learning_rate": 3.019055352935717e-06,
"loss": 0.206024169921875,
"num_tokens": 103102408.0,
"step": 1291
},
{
"epoch": 0.8019863438857853,
"grad_norm": 0.404296875,
"learning_rate": 3.0009272435250878e-06,
"loss": 0.1588134765625,
"num_tokens": 103174813.0,
"step": 1292
},
{
"epoch": 0.8026070763500931,
"grad_norm": 0.443359375,
"learning_rate": 2.982847672488778e-06,
"loss": 0.19913482666015625,
"num_tokens": 103253612.0,
"step": 1293
},
{
"epoch": 0.803227808814401,
"grad_norm": 0.41015625,
"learning_rate": 2.964816712961865e-06,
"loss": 0.18546295166015625,
"num_tokens": 103333110.0,
"step": 1294
},
{
"epoch": 0.8038485412787089,
"grad_norm": 0.40625,
"learning_rate": 2.9468344378827627e-06,
"loss": 0.19812774658203125,
"num_tokens": 103416087.0,
"step": 1295
},
{
"epoch": 0.8044692737430168,
"grad_norm": 0.384765625,
"learning_rate": 2.9289009199929633e-06,
"loss": 0.1905517578125,
"num_tokens": 103499651.0,
"step": 1296
},
{
"epoch": 0.8050900062073246,
"grad_norm": 0.443359375,
"learning_rate": 2.9110162318367274e-06,
"loss": 0.190582275390625,
"num_tokens": 103570923.0,
"step": 1297
},
{
"epoch": 0.8057107386716326,
"grad_norm": 0.427734375,
"learning_rate": 2.8931804457607815e-06,
"loss": 0.2168731689453125,
"num_tokens": 103652798.0,
"step": 1298
},
{
"epoch": 0.8063314711359404,
"grad_norm": 0.400390625,
"learning_rate": 2.8753936339140342e-06,
"loss": 0.1739959716796875,
"num_tokens": 103729881.0,
"step": 1299
},
{
"epoch": 0.8069522036002483,
"grad_norm": 0.380859375,
"learning_rate": 2.8576558682472974e-06,
"loss": 0.15575408935546875,
"num_tokens": 103807492.0,
"step": 1300
},
{
"epoch": 0.8075729360645562,
"grad_norm": 0.392578125,
"learning_rate": 2.8399672205129674e-06,
"loss": 0.1865997314453125,
"num_tokens": 103884164.0,
"step": 1301
},
{
"epoch": 0.8081936685288641,
"grad_norm": 0.474609375,
"learning_rate": 2.822327762264765e-06,
"loss": 0.2444305419921875,
"num_tokens": 103963985.0,
"step": 1302
},
{
"epoch": 0.8088144009931719,
"grad_norm": 0.390625,
"learning_rate": 2.8047375648574187e-06,
"loss": 0.17287445068359375,
"num_tokens": 104042041.0,
"step": 1303
},
{
"epoch": 0.8094351334574799,
"grad_norm": 0.494140625,
"learning_rate": 2.7871966994463887e-06,
"loss": 0.257781982421875,
"num_tokens": 104121681.0,
"step": 1304
},
{
"epoch": 0.8100558659217877,
"grad_norm": 0.38671875,
"learning_rate": 2.769705236987585e-06,
"loss": 0.15924835205078125,
"num_tokens": 104201431.0,
"step": 1305
},
{
"epoch": 0.8106765983860956,
"grad_norm": 0.423828125,
"learning_rate": 2.752263248237074e-06,
"loss": 0.21198272705078125,
"num_tokens": 104281573.0,
"step": 1306
},
{
"epoch": 0.8112973308504035,
"grad_norm": 0.38671875,
"learning_rate": 2.734870803750783e-06,
"loss": 0.17617034912109375,
"num_tokens": 104360318.0,
"step": 1307
},
{
"epoch": 0.8119180633147114,
"grad_norm": 0.44140625,
"learning_rate": 2.717527973884238e-06,
"loss": 0.2028656005859375,
"num_tokens": 104447102.0,
"step": 1308
},
{
"epoch": 0.8125387957790192,
"grad_norm": 0.416015625,
"learning_rate": 2.7002348287922525e-06,
"loss": 0.2050933837890625,
"num_tokens": 104533303.0,
"step": 1309
},
{
"epoch": 0.8131595282433272,
"grad_norm": 0.439453125,
"learning_rate": 2.6829914384286576e-06,
"loss": 0.2105712890625,
"num_tokens": 104612178.0,
"step": 1310
},
{
"epoch": 0.813780260707635,
"grad_norm": 0.359375,
"learning_rate": 2.6657978725460286e-06,
"loss": 0.1677093505859375,
"num_tokens": 104707343.0,
"step": 1311
},
{
"epoch": 0.8144009931719429,
"grad_norm": 0.43359375,
"learning_rate": 2.6486542006953772e-06,
"loss": 0.25018310546875,
"num_tokens": 104795928.0,
"step": 1312
},
{
"epoch": 0.8150217256362507,
"grad_norm": 0.45703125,
"learning_rate": 2.6315604922259e-06,
"loss": 0.2257232666015625,
"num_tokens": 104880807.0,
"step": 1313
},
{
"epoch": 0.8156424581005587,
"grad_norm": 0.4453125,
"learning_rate": 2.61451681628467e-06,
"loss": 0.236785888671875,
"num_tokens": 104962359.0,
"step": 1314
},
{
"epoch": 0.8162631905648665,
"grad_norm": 0.41796875,
"learning_rate": 2.5975232418163704e-06,
"loss": 0.19732666015625,
"num_tokens": 105045342.0,
"step": 1315
},
{
"epoch": 0.8168839230291745,
"grad_norm": 0.458984375,
"learning_rate": 2.580579837563026e-06,
"loss": 0.2263641357421875,
"num_tokens": 105126223.0,
"step": 1316
},
{
"epoch": 0.8175046554934823,
"grad_norm": 0.451171875,
"learning_rate": 2.563686672063711e-06,
"loss": 0.204345703125,
"num_tokens": 105196201.0,
"step": 1317
},
{
"epoch": 0.8181253879577902,
"grad_norm": 0.478515625,
"learning_rate": 2.546843813654261e-06,
"loss": 0.2430877685546875,
"num_tokens": 105272611.0,
"step": 1318
},
{
"epoch": 0.818746120422098,
"grad_norm": 0.443359375,
"learning_rate": 2.5300513304670315e-06,
"loss": 0.2425537109375,
"num_tokens": 105359767.0,
"step": 1319
},
{
"epoch": 0.819366852886406,
"grad_norm": 0.3671875,
"learning_rate": 2.513309290430583e-06,
"loss": 0.15370941162109375,
"num_tokens": 105445403.0,
"step": 1320
},
{
"epoch": 0.8199875853507138,
"grad_norm": 0.400390625,
"learning_rate": 2.496617761269439e-06,
"loss": 0.20330810546875,
"num_tokens": 105532166.0,
"step": 1321
},
{
"epoch": 0.8206083178150217,
"grad_norm": 0.44140625,
"learning_rate": 2.4799768105037867e-06,
"loss": 0.222412109375,
"num_tokens": 105611571.0,
"step": 1322
},
{
"epoch": 0.8212290502793296,
"grad_norm": 0.375,
"learning_rate": 2.4633865054492193e-06,
"loss": 0.14919281005859375,
"num_tokens": 105691792.0,
"step": 1323
},
{
"epoch": 0.8218497827436375,
"grad_norm": 0.3984375,
"learning_rate": 2.446846913216461e-06,
"loss": 0.1780853271484375,
"num_tokens": 105774938.0,
"step": 1324
},
{
"epoch": 0.8224705152079453,
"grad_norm": 0.4140625,
"learning_rate": 2.430358100711088e-06,
"loss": 0.14537811279296875,
"num_tokens": 105851722.0,
"step": 1325
},
{
"epoch": 0.8230912476722533,
"grad_norm": 0.427734375,
"learning_rate": 2.413920134633272e-06,
"loss": 0.207763671875,
"num_tokens": 105934661.0,
"step": 1326
},
{
"epoch": 0.8237119801365611,
"grad_norm": 0.390625,
"learning_rate": 2.3975330814774915e-06,
"loss": 0.1488037109375,
"num_tokens": 106007825.0,
"step": 1327
},
{
"epoch": 0.824332712600869,
"grad_norm": 0.412109375,
"learning_rate": 2.38119700753228e-06,
"loss": 0.19580078125,
"num_tokens": 106089985.0,
"step": 1328
},
{
"epoch": 0.8249534450651769,
"grad_norm": 0.416015625,
"learning_rate": 2.3649119788799567e-06,
"loss": 0.1855010986328125,
"num_tokens": 106170017.0,
"step": 1329
},
{
"epoch": 0.8255741775294848,
"grad_norm": 0.353515625,
"learning_rate": 2.3486780613963403e-06,
"loss": 0.1274261474609375,
"num_tokens": 106244244.0,
"step": 1330
},
{
"epoch": 0.8261949099937926,
"grad_norm": 0.376953125,
"learning_rate": 2.3324953207504984e-06,
"loss": 0.15641021728515625,
"num_tokens": 106318196.0,
"step": 1331
},
{
"epoch": 0.8268156424581006,
"grad_norm": 0.447265625,
"learning_rate": 2.3163638224044915e-06,
"loss": 0.2002410888671875,
"num_tokens": 106396636.0,
"step": 1332
},
{
"epoch": 0.8274363749224084,
"grad_norm": 0.3671875,
"learning_rate": 2.300283631613081e-06,
"loss": 0.15753936767578125,
"num_tokens": 106481491.0,
"step": 1333
},
{
"epoch": 0.8280571073867163,
"grad_norm": 0.412109375,
"learning_rate": 2.2842548134234866e-06,
"loss": 0.1749725341796875,
"num_tokens": 106560667.0,
"step": 1334
},
{
"epoch": 0.8286778398510242,
"grad_norm": 0.3828125,
"learning_rate": 2.2682774326751215e-06,
"loss": 0.178192138671875,
"num_tokens": 106640126.0,
"step": 1335
},
{
"epoch": 0.8292985723153321,
"grad_norm": 0.408203125,
"learning_rate": 2.2523515539993152e-06,
"loss": 0.1955413818359375,
"num_tokens": 106726158.0,
"step": 1336
},
{
"epoch": 0.8299193047796399,
"grad_norm": 0.3828125,
"learning_rate": 2.236477241819067e-06,
"loss": 0.17472076416015625,
"num_tokens": 106814251.0,
"step": 1337
},
{
"epoch": 0.8305400372439479,
"grad_norm": 0.45703125,
"learning_rate": 2.2206545603487884e-06,
"loss": 0.2115325927734375,
"num_tokens": 106888763.0,
"step": 1338
},
{
"epoch": 0.8311607697082557,
"grad_norm": 0.3984375,
"learning_rate": 2.2048835735940194e-06,
"loss": 0.14669036865234375,
"num_tokens": 106963719.0,
"step": 1339
},
{
"epoch": 0.8317815021725636,
"grad_norm": 0.37890625,
"learning_rate": 2.1891643453512018e-06,
"loss": 0.15201568603515625,
"num_tokens": 107047767.0,
"step": 1340
},
{
"epoch": 0.8324022346368715,
"grad_norm": 0.38671875,
"learning_rate": 2.1734969392073944e-06,
"loss": 0.15398788452148438,
"num_tokens": 107123697.0,
"step": 1341
},
{
"epoch": 0.8330229671011794,
"grad_norm": 0.412109375,
"learning_rate": 2.157881418540024e-06,
"loss": 0.172332763671875,
"num_tokens": 107197505.0,
"step": 1342
},
{
"epoch": 0.8336436995654872,
"grad_norm": 0.408203125,
"learning_rate": 2.1423178465166485e-06,
"loss": 0.16192626953125,
"num_tokens": 107276412.0,
"step": 1343
},
{
"epoch": 0.8342644320297952,
"grad_norm": 0.396484375,
"learning_rate": 2.126806286094666e-06,
"loss": 0.18810272216796875,
"num_tokens": 107356981.0,
"step": 1344
},
{
"epoch": 0.834885164494103,
"grad_norm": 0.4140625,
"learning_rate": 2.1113468000210843e-06,
"loss": 0.17772674560546875,
"num_tokens": 107434241.0,
"step": 1345
},
{
"epoch": 0.8355058969584109,
"grad_norm": 0.369140625,
"learning_rate": 2.0959394508322644e-06,
"loss": 0.1571807861328125,
"num_tokens": 107518350.0,
"step": 1346
},
{
"epoch": 0.8361266294227188,
"grad_norm": 0.408203125,
"learning_rate": 2.080584300853665e-06,
"loss": 0.1822357177734375,
"num_tokens": 107596504.0,
"step": 1347
},
{
"epoch": 0.8367473618870267,
"grad_norm": 0.419921875,
"learning_rate": 2.0652814121995824e-06,
"loss": 0.1939239501953125,
"num_tokens": 107673001.0,
"step": 1348
},
{
"epoch": 0.8373680943513345,
"grad_norm": 0.44921875,
"learning_rate": 2.0500308467729186e-06,
"loss": 0.20574951171875,
"num_tokens": 107746256.0,
"step": 1349
},
{
"epoch": 0.8379888268156425,
"grad_norm": 0.408203125,
"learning_rate": 2.0348326662649008e-06,
"loss": 0.175872802734375,
"num_tokens": 107827993.0,
"step": 1350
},
{
"epoch": 0.8386095592799503,
"grad_norm": 0.46875,
"learning_rate": 2.0196869321548727e-06,
"loss": 0.22869873046875,
"num_tokens": 107907923.0,
"step": 1351
},
{
"epoch": 0.8392302917442582,
"grad_norm": 0.451171875,
"learning_rate": 2.004593705710004e-06,
"loss": 0.20977783203125,
"num_tokens": 107987178.0,
"step": 1352
},
{
"epoch": 0.839851024208566,
"grad_norm": 0.416015625,
"learning_rate": 1.9895530479850654e-06,
"loss": 0.2040557861328125,
"num_tokens": 108068616.0,
"step": 1353
},
{
"epoch": 0.840471756672874,
"grad_norm": 0.462890625,
"learning_rate": 1.9745650198221903e-06,
"loss": 0.2293243408203125,
"num_tokens": 108152086.0,
"step": 1354
},
{
"epoch": 0.8410924891371818,
"grad_norm": 0.408203125,
"learning_rate": 1.9596296818505988e-06,
"loss": 0.194061279296875,
"num_tokens": 108241091.0,
"step": 1355
},
{
"epoch": 0.8417132216014898,
"grad_norm": 0.43359375,
"learning_rate": 1.944747094486386e-06,
"loss": 0.22503662109375,
"num_tokens": 108320387.0,
"step": 1356
},
{
"epoch": 0.8423339540657977,
"grad_norm": 0.42578125,
"learning_rate": 1.929917317932243e-06,
"loss": 0.1920166015625,
"num_tokens": 108405576.0,
"step": 1357
},
{
"epoch": 0.8429546865301055,
"grad_norm": 0.388671875,
"learning_rate": 1.9151404121772507e-06,
"loss": 0.1645660400390625,
"num_tokens": 108485839.0,
"step": 1358
},
{
"epoch": 0.8435754189944135,
"grad_norm": 0.361328125,
"learning_rate": 1.9004164369966088e-06,
"loss": 0.12830352783203125,
"num_tokens": 108563228.0,
"step": 1359
},
{
"epoch": 0.8441961514587213,
"grad_norm": 0.4296875,
"learning_rate": 1.8857454519514044e-06,
"loss": 0.2244873046875,
"num_tokens": 108645284.0,
"step": 1360
},
{
"epoch": 0.8448168839230292,
"grad_norm": 0.3984375,
"learning_rate": 1.871127516388364e-06,
"loss": 0.1541900634765625,
"num_tokens": 108725903.0,
"step": 1361
},
{
"epoch": 0.845437616387337,
"grad_norm": 0.462890625,
"learning_rate": 1.8565626894396366e-06,
"loss": 0.2313995361328125,
"num_tokens": 108803257.0,
"step": 1362
},
{
"epoch": 0.846058348851645,
"grad_norm": 0.353515625,
"learning_rate": 1.8420510300225147e-06,
"loss": 0.13166046142578125,
"num_tokens": 108884045.0,
"step": 1363
},
{
"epoch": 0.8466790813159528,
"grad_norm": 0.345703125,
"learning_rate": 1.8275925968392414e-06,
"loss": 0.14595794677734375,
"num_tokens": 108969431.0,
"step": 1364
},
{
"epoch": 0.8472998137802608,
"grad_norm": 0.359375,
"learning_rate": 1.813187448376734e-06,
"loss": 0.12152862548828125,
"num_tokens": 109049313.0,
"step": 1365
},
{
"epoch": 0.8479205462445686,
"grad_norm": 0.419921875,
"learning_rate": 1.7988356429063684e-06,
"loss": 0.1735076904296875,
"num_tokens": 109122274.0,
"step": 1366
},
{
"epoch": 0.8485412787088765,
"grad_norm": 0.408203125,
"learning_rate": 1.7845372384837433e-06,
"loss": 0.1820220947265625,
"num_tokens": 109203209.0,
"step": 1367
},
{
"epoch": 0.8491620111731844,
"grad_norm": 0.4140625,
"learning_rate": 1.77029229294843e-06,
"loss": 0.19683837890625,
"num_tokens": 109290660.0,
"step": 1368
},
{
"epoch": 0.8497827436374923,
"grad_norm": 0.345703125,
"learning_rate": 1.7561008639237613e-06,
"loss": 0.15297698974609375,
"num_tokens": 109375535.0,
"step": 1369
},
{
"epoch": 0.8504034761018001,
"grad_norm": 0.38671875,
"learning_rate": 1.7419630088165832e-06,
"loss": 0.174041748046875,
"num_tokens": 109459125.0,
"step": 1370
},
{
"epoch": 0.851024208566108,
"grad_norm": 0.3828125,
"learning_rate": 1.7278787848170185e-06,
"loss": 0.1533050537109375,
"num_tokens": 109538042.0,
"step": 1371
},
{
"epoch": 0.8516449410304159,
"grad_norm": 0.376953125,
"learning_rate": 1.71384824889825e-06,
"loss": 0.15036773681640625,
"num_tokens": 109621213.0,
"step": 1372
},
{
"epoch": 0.8522656734947238,
"grad_norm": 0.37890625,
"learning_rate": 1.6998714578162822e-06,
"loss": 0.17192840576171875,
"num_tokens": 109700895.0,
"step": 1373
},
{
"epoch": 0.8528864059590316,
"grad_norm": 0.435546875,
"learning_rate": 1.6859484681097025e-06,
"loss": 0.1910400390625,
"num_tokens": 109779614.0,
"step": 1374
},
{
"epoch": 0.8535071384233396,
"grad_norm": 0.421875,
"learning_rate": 1.672079336099479e-06,
"loss": 0.20361328125,
"num_tokens": 109867597.0,
"step": 1375
},
{
"epoch": 0.8541278708876474,
"grad_norm": 0.361328125,
"learning_rate": 1.6582641178887036e-06,
"loss": 0.13359832763671875,
"num_tokens": 109947167.0,
"step": 1376
},
{
"epoch": 0.8547486033519553,
"grad_norm": 0.412109375,
"learning_rate": 1.6445028693623764e-06,
"loss": 0.16276931762695312,
"num_tokens": 110019199.0,
"step": 1377
},
{
"epoch": 0.8553693358162632,
"grad_norm": 0.44921875,
"learning_rate": 1.6307956461871888e-06,
"loss": 0.2106475830078125,
"num_tokens": 110098896.0,
"step": 1378
},
{
"epoch": 0.8559900682805711,
"grad_norm": 0.390625,
"learning_rate": 1.617142503811288e-06,
"loss": 0.1520233154296875,
"num_tokens": 110171551.0,
"step": 1379
},
{
"epoch": 0.8566108007448789,
"grad_norm": 0.45703125,
"learning_rate": 1.603543497464049e-06,
"loss": 0.230438232421875,
"num_tokens": 110249019.0,
"step": 1380
},
{
"epoch": 0.8572315332091869,
"grad_norm": 0.400390625,
"learning_rate": 1.589998682155866e-06,
"loss": 0.174896240234375,
"num_tokens": 110330713.0,
"step": 1381
},
{
"epoch": 0.8578522656734947,
"grad_norm": 0.439453125,
"learning_rate": 1.5765081126779074e-06,
"loss": 0.20772552490234375,
"num_tokens": 110410310.0,
"step": 1382
},
{
"epoch": 0.8584729981378026,
"grad_norm": 0.390625,
"learning_rate": 1.5630718436019253e-06,
"loss": 0.15108489990234375,
"num_tokens": 110484902.0,
"step": 1383
},
{
"epoch": 0.8590937306021105,
"grad_norm": 0.400390625,
"learning_rate": 1.5496899292800009e-06,
"loss": 0.17327880859375,
"num_tokens": 110565591.0,
"step": 1384
},
{
"epoch": 0.8597144630664184,
"grad_norm": 0.37109375,
"learning_rate": 1.5363624238443497e-06,
"loss": 0.1597137451171875,
"num_tokens": 110653753.0,
"step": 1385
},
{
"epoch": 0.8603351955307262,
"grad_norm": 0.47265625,
"learning_rate": 1.5230893812070928e-06,
"loss": 0.2253265380859375,
"num_tokens": 110732240.0,
"step": 1386
},
{
"epoch": 0.8609559279950342,
"grad_norm": 0.36328125,
"learning_rate": 1.5098708550600366e-06,
"loss": 0.13427734375,
"num_tokens": 110815081.0,
"step": 1387
},
{
"epoch": 0.861576660459342,
"grad_norm": 0.4453125,
"learning_rate": 1.496706898874458e-06,
"loss": 0.213287353515625,
"num_tokens": 110894665.0,
"step": 1388
},
{
"epoch": 0.8621973929236499,
"grad_norm": 0.365234375,
"learning_rate": 1.483597565900895e-06,
"loss": 0.15041351318359375,
"num_tokens": 110974767.0,
"step": 1389
},
{
"epoch": 0.8628181253879578,
"grad_norm": 0.4453125,
"learning_rate": 1.4705429091689194e-06,
"loss": 0.197418212890625,
"num_tokens": 111049554.0,
"step": 1390
},
{
"epoch": 0.8634388578522657,
"grad_norm": 0.41015625,
"learning_rate": 1.457542981486934e-06,
"loss": 0.181884765625,
"num_tokens": 111129832.0,
"step": 1391
},
{
"epoch": 0.8640595903165735,
"grad_norm": 0.384765625,
"learning_rate": 1.4445978354419437e-06,
"loss": 0.15564727783203125,
"num_tokens": 111209554.0,
"step": 1392
},
{
"epoch": 0.8646803227808815,
"grad_norm": 0.3515625,
"learning_rate": 1.4317075233993553e-06,
"loss": 0.126068115234375,
"num_tokens": 111289762.0,
"step": 1393
},
{
"epoch": 0.8653010552451893,
"grad_norm": 0.4453125,
"learning_rate": 1.4188720975027687e-06,
"loss": 0.1878814697265625,
"num_tokens": 111368478.0,
"step": 1394
},
{
"epoch": 0.8659217877094972,
"grad_norm": 0.380859375,
"learning_rate": 1.4060916096737536e-06,
"loss": 0.1748046875,
"num_tokens": 111449441.0,
"step": 1395
},
{
"epoch": 0.8665425201738051,
"grad_norm": 0.37890625,
"learning_rate": 1.393366111611642e-06,
"loss": 0.15207672119140625,
"num_tokens": 111524565.0,
"step": 1396
},
{
"epoch": 0.867163252638113,
"grad_norm": 0.431640625,
"learning_rate": 1.3806956547933347e-06,
"loss": 0.1956939697265625,
"num_tokens": 111604313.0,
"step": 1397
},
{
"epoch": 0.8677839851024208,
"grad_norm": 0.37890625,
"learning_rate": 1.368080290473066e-06,
"loss": 0.16059112548828125,
"num_tokens": 111683673.0,
"step": 1398
},
{
"epoch": 0.8684047175667288,
"grad_norm": 0.462890625,
"learning_rate": 1.3555200696822235e-06,
"loss": 0.20253753662109375,
"num_tokens": 111762330.0,
"step": 1399
},
{
"epoch": 0.8690254500310366,
"grad_norm": 0.423828125,
"learning_rate": 1.3430150432291262e-06,
"loss": 0.1954803466796875,
"num_tokens": 111840676.0,
"step": 1400
},
{
"epoch": 0.8696461824953445,
"grad_norm": 0.3828125,
"learning_rate": 1.330565261698815e-06,
"loss": 0.1727294921875,
"num_tokens": 111919229.0,
"step": 1401
},
{
"epoch": 0.8702669149596524,
"grad_norm": 0.43359375,
"learning_rate": 1.3181707754528671e-06,
"loss": 0.194854736328125,
"num_tokens": 111995077.0,
"step": 1402
},
{
"epoch": 0.8708876474239603,
"grad_norm": 0.4296875,
"learning_rate": 1.305831634629171e-06,
"loss": 0.21188735961914062,
"num_tokens": 112071865.0,
"step": 1403
},
{
"epoch": 0.8715083798882681,
"grad_norm": 0.44921875,
"learning_rate": 1.2935478891417346e-06,
"loss": 0.1915283203125,
"num_tokens": 112143523.0,
"step": 1404
},
{
"epoch": 0.8721291123525761,
"grad_norm": 0.41796875,
"learning_rate": 1.2813195886804885e-06,
"loss": 0.19721221923828125,
"num_tokens": 112228377.0,
"step": 1405
},
{
"epoch": 0.8727498448168839,
"grad_norm": 0.41796875,
"learning_rate": 1.2691467827110693e-06,
"loss": 0.1800537109375,
"num_tokens": 112303546.0,
"step": 1406
},
{
"epoch": 0.8733705772811918,
"grad_norm": 0.498046875,
"learning_rate": 1.257029520474638e-06,
"loss": 0.27400970458984375,
"num_tokens": 112377192.0,
"step": 1407
},
{
"epoch": 0.8739913097454997,
"grad_norm": 0.423828125,
"learning_rate": 1.244967850987664e-06,
"loss": 0.2031402587890625,
"num_tokens": 112456630.0,
"step": 1408
},
{
"epoch": 0.8746120422098076,
"grad_norm": 0.353515625,
"learning_rate": 1.2329618230417327e-06,
"loss": 0.12925338745117188,
"num_tokens": 112536338.0,
"step": 1409
},
{
"epoch": 0.8752327746741154,
"grad_norm": 0.388671875,
"learning_rate": 1.2210114852033572e-06,
"loss": 0.15638351440429688,
"num_tokens": 112617701.0,
"step": 1410
},
{
"epoch": 0.8758535071384234,
"grad_norm": 0.404296875,
"learning_rate": 1.2091168858137714e-06,
"loss": 0.19182205200195312,
"num_tokens": 112698716.0,
"step": 1411
},
{
"epoch": 0.8764742396027312,
"grad_norm": 0.400390625,
"learning_rate": 1.1972780729887333e-06,
"loss": 0.19015884399414062,
"num_tokens": 112784460.0,
"step": 1412
},
{
"epoch": 0.8770949720670391,
"grad_norm": 0.462890625,
"learning_rate": 1.18549509461834e-06,
"loss": 0.2064971923828125,
"num_tokens": 112862225.0,
"step": 1413
},
{
"epoch": 0.877715704531347,
"grad_norm": 0.41015625,
"learning_rate": 1.1737679983668259e-06,
"loss": 0.15874481201171875,
"num_tokens": 112938792.0,
"step": 1414
},
{
"epoch": 0.8783364369956549,
"grad_norm": 0.3984375,
"learning_rate": 1.162096831672369e-06,
"loss": 0.1752777099609375,
"num_tokens": 113023192.0,
"step": 1415
},
{
"epoch": 0.8789571694599627,
"grad_norm": 0.37890625,
"learning_rate": 1.1504816417469088e-06,
"loss": 0.14960479736328125,
"num_tokens": 113106520.0,
"step": 1416
},
{
"epoch": 0.8795779019242707,
"grad_norm": 0.416015625,
"learning_rate": 1.1389224755759425e-06,
"loss": 0.1729583740234375,
"num_tokens": 113190980.0,
"step": 1417
},
{
"epoch": 0.8801986343885785,
"grad_norm": 0.396484375,
"learning_rate": 1.1274193799183486e-06,
"loss": 0.177276611328125,
"num_tokens": 113267794.0,
"step": 1418
},
{
"epoch": 0.8808193668528864,
"grad_norm": 0.3671875,
"learning_rate": 1.1159724013061818e-06,
"loss": 0.1397705078125,
"num_tokens": 113342885.0,
"step": 1419
},
{
"epoch": 0.8814400993171942,
"grad_norm": 0.353515625,
"learning_rate": 1.104581586044502e-06,
"loss": 0.129364013671875,
"num_tokens": 113423145.0,
"step": 1420
},
{
"epoch": 0.8820608317815022,
"grad_norm": 0.45703125,
"learning_rate": 1.0932469802111688e-06,
"loss": 0.226531982421875,
"num_tokens": 113505500.0,
"step": 1421
},
{
"epoch": 0.88268156424581,
"grad_norm": 0.50390625,
"learning_rate": 1.081968629656676e-06,
"loss": 0.253570556640625,
"num_tokens": 113581240.0,
"step": 1422
},
{
"epoch": 0.883302296710118,
"grad_norm": 0.421875,
"learning_rate": 1.0707465800039456e-06,
"loss": 0.19501495361328125,
"num_tokens": 113666314.0,
"step": 1423
},
{
"epoch": 0.8839230291744258,
"grad_norm": 0.54296875,
"learning_rate": 1.0595808766481569e-06,
"loss": 0.252655029296875,
"num_tokens": 113743878.0,
"step": 1424
},
{
"epoch": 0.8845437616387337,
"grad_norm": 0.390625,
"learning_rate": 1.0484715647565562e-06,
"loss": 0.1773681640625,
"num_tokens": 113820248.0,
"step": 1425
},
{
"epoch": 0.8851644941030415,
"grad_norm": 0.443359375,
"learning_rate": 1.0374186892682813e-06,
"loss": 0.2051239013671875,
"num_tokens": 113895970.0,
"step": 1426
},
{
"epoch": 0.8857852265673495,
"grad_norm": 0.44140625,
"learning_rate": 1.026422294894167e-06,
"loss": 0.1966552734375,
"num_tokens": 113975890.0,
"step": 1427
},
{
"epoch": 0.8864059590316573,
"grad_norm": 0.408203125,
"learning_rate": 1.0154824261165763e-06,
"loss": 0.1671295166015625,
"num_tokens": 114047999.0,
"step": 1428
},
{
"epoch": 0.8870266914959652,
"grad_norm": 0.390625,
"learning_rate": 1.004599127189218e-06,
"loss": 0.156585693359375,
"num_tokens": 114128129.0,
"step": 1429
},
{
"epoch": 0.8876474239602731,
"grad_norm": 0.388671875,
"learning_rate": 9.937724421369609e-07,
"loss": 0.1729278564453125,
"num_tokens": 114207622.0,
"step": 1430
},
{
"epoch": 0.888268156424581,
"grad_norm": 0.39453125,
"learning_rate": 9.830024147556637e-07,
"loss": 0.1910552978515625,
"num_tokens": 114294842.0,
"step": 1431
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.39453125,
"learning_rate": 9.722890886120002e-07,
"loss": 0.15358734130859375,
"num_tokens": 114369904.0,
"step": 1432
},
{
"epoch": 0.8895096213531968,
"grad_norm": 0.365234375,
"learning_rate": 9.616325070432636e-07,
"loss": 0.1573638916015625,
"num_tokens": 114451390.0,
"step": 1433
},
{
"epoch": 0.8901303538175046,
"grad_norm": 0.4453125,
"learning_rate": 9.5103271315722e-07,
"loss": 0.22116851806640625,
"num_tokens": 114527301.0,
"step": 1434
},
{
"epoch": 0.8907510862818125,
"grad_norm": 0.384765625,
"learning_rate": 9.40489749831912e-07,
"loss": 0.154052734375,
"num_tokens": 114600377.0,
"step": 1435
},
{
"epoch": 0.8913718187461204,
"grad_norm": 0.37109375,
"learning_rate": 9.300036597154881e-07,
"loss": 0.165008544921875,
"num_tokens": 114684385.0,
"step": 1436
},
{
"epoch": 0.8919925512104283,
"grad_norm": 0.431640625,
"learning_rate": 9.195744852260457e-07,
"loss": 0.1820068359375,
"num_tokens": 114762718.0,
"step": 1437
},
{
"epoch": 0.8926132836747361,
"grad_norm": 0.369140625,
"learning_rate": 9.092022685514429e-07,
"loss": 0.1756591796875,
"num_tokens": 114850757.0,
"step": 1438
},
{
"epoch": 0.8932340161390441,
"grad_norm": 0.455078125,
"learning_rate": 8.988870516491254e-07,
"loss": 0.18399810791015625,
"num_tokens": 114924794.0,
"step": 1439
},
{
"epoch": 0.8938547486033519,
"grad_norm": 0.4921875,
"learning_rate": 8.886288762459805e-07,
"loss": 0.24407958984375,
"num_tokens": 114995720.0,
"step": 1440
},
{
"epoch": 0.8944754810676598,
"grad_norm": 0.361328125,
"learning_rate": 8.784277838381427e-07,
"loss": 0.1392669677734375,
"num_tokens": 115074423.0,
"step": 1441
},
{
"epoch": 0.8950962135319678,
"grad_norm": 0.408203125,
"learning_rate": 8.682838156908335e-07,
"loss": 0.1748046875,
"num_tokens": 115157851.0,
"step": 1442
},
{
"epoch": 0.8957169459962756,
"grad_norm": 0.40625,
"learning_rate": 8.581970128382055e-07,
"loss": 0.19258880615234375,
"num_tokens": 115239612.0,
"step": 1443
},
{
"epoch": 0.8963376784605835,
"grad_norm": 0.439453125,
"learning_rate": 8.481674160831598e-07,
"loss": 0.1905975341796875,
"num_tokens": 115318775.0,
"step": 1444
},
{
"epoch": 0.8969584109248914,
"grad_norm": 0.388671875,
"learning_rate": 8.381950659971943e-07,
"loss": 0.17224884033203125,
"num_tokens": 115403229.0,
"step": 1445
},
{
"epoch": 0.8975791433891993,
"grad_norm": 0.41015625,
"learning_rate": 8.28280002920227e-07,
"loss": 0.18399810791015625,
"num_tokens": 115484047.0,
"step": 1446
},
{
"epoch": 0.8981998758535071,
"grad_norm": 0.451171875,
"learning_rate": 8.184222669604463e-07,
"loss": 0.215240478515625,
"num_tokens": 115565971.0,
"step": 1447
},
{
"epoch": 0.8988206083178151,
"grad_norm": 0.39453125,
"learning_rate": 8.086218979941412e-07,
"loss": 0.155853271484375,
"num_tokens": 115644591.0,
"step": 1448
},
{
"epoch": 0.8994413407821229,
"grad_norm": 0.443359375,
"learning_rate": 7.988789356655379e-07,
"loss": 0.21747589111328125,
"num_tokens": 115726768.0,
"step": 1449
},
{
"epoch": 0.9000620732464308,
"grad_norm": 0.37890625,
"learning_rate": 7.891934193866418e-07,
"loss": 0.15863037109375,
"num_tokens": 115806911.0,
"step": 1450
},
{
"epoch": 0.9006828057107387,
"grad_norm": 0.451171875,
"learning_rate": 7.795653883370857e-07,
"loss": 0.2614288330078125,
"num_tokens": 115888858.0,
"step": 1451
},
{
"epoch": 0.9013035381750466,
"grad_norm": 0.376953125,
"learning_rate": 7.699948814639602e-07,
"loss": 0.14519500732421875,
"num_tokens": 115964929.0,
"step": 1452
},
{
"epoch": 0.9019242706393544,
"grad_norm": 0.40234375,
"learning_rate": 7.604819374816602e-07,
"loss": 0.14528656005859375,
"num_tokens": 116036534.0,
"step": 1453
},
{
"epoch": 0.9025450031036624,
"grad_norm": 0.357421875,
"learning_rate": 7.510265948717304e-07,
"loss": 0.16790771484375,
"num_tokens": 116121277.0,
"step": 1454
},
{
"epoch": 0.9031657355679702,
"grad_norm": 0.384765625,
"learning_rate": 7.416288918827002e-07,
"loss": 0.1535186767578125,
"num_tokens": 116202371.0,
"step": 1455
},
{
"epoch": 0.9037864680322781,
"grad_norm": 0.40234375,
"learning_rate": 7.322888665299487e-07,
"loss": 0.1700439453125,
"num_tokens": 116276724.0,
"step": 1456
},
{
"epoch": 0.904407200496586,
"grad_norm": 0.419921875,
"learning_rate": 7.230065565955302e-07,
"loss": 0.18024444580078125,
"num_tokens": 116353533.0,
"step": 1457
},
{
"epoch": 0.9050279329608939,
"grad_norm": 0.365234375,
"learning_rate": 7.137819996280303e-07,
"loss": 0.1444854736328125,
"num_tokens": 116440136.0,
"step": 1458
},
{
"epoch": 0.9056486654252017,
"grad_norm": 0.349609375,
"learning_rate": 7.046152329424205e-07,
"loss": 0.145660400390625,
"num_tokens": 116521838.0,
"step": 1459
},
{
"epoch": 0.9062693978895097,
"grad_norm": 0.3828125,
"learning_rate": 6.955062936198886e-07,
"loss": 0.18415451049804688,
"num_tokens": 116603356.0,
"step": 1460
},
{
"epoch": 0.9068901303538175,
"grad_norm": 0.4140625,
"learning_rate": 6.86455218507715e-07,
"loss": 0.22351837158203125,
"num_tokens": 116690757.0,
"step": 1461
},
{
"epoch": 0.9075108628181254,
"grad_norm": 0.40625,
"learning_rate": 6.774620442190954e-07,
"loss": 0.1795196533203125,
"num_tokens": 116776859.0,
"step": 1462
},
{
"epoch": 0.9081315952824333,
"grad_norm": 0.4140625,
"learning_rate": 6.685268071330147e-07,
"loss": 0.15985107421875,
"num_tokens": 116853475.0,
"step": 1463
},
{
"epoch": 0.9087523277467412,
"grad_norm": 0.416015625,
"learning_rate": 6.596495433940919e-07,
"loss": 0.2061004638671875,
"num_tokens": 116939042.0,
"step": 1464
},
{
"epoch": 0.909373060211049,
"grad_norm": 0.404296875,
"learning_rate": 6.508302889124268e-07,
"loss": 0.1634521484375,
"num_tokens": 117024209.0,
"step": 1465
},
{
"epoch": 0.909993792675357,
"grad_norm": 0.400390625,
"learning_rate": 6.42069079363467e-07,
"loss": 0.1707916259765625,
"num_tokens": 117102733.0,
"step": 1466
},
{
"epoch": 0.9106145251396648,
"grad_norm": 0.3515625,
"learning_rate": 6.333659501878597e-07,
"loss": 0.1476593017578125,
"num_tokens": 117187854.0,
"step": 1467
},
{
"epoch": 0.9112352576039727,
"grad_norm": 0.43359375,
"learning_rate": 6.247209365912982e-07,
"loss": 0.18548583984375,
"num_tokens": 117269571.0,
"step": 1468
},
{
"epoch": 0.9118559900682806,
"grad_norm": 0.44921875,
"learning_rate": 6.161340735443987e-07,
"loss": 0.24283599853515625,
"num_tokens": 117354410.0,
"step": 1469
},
{
"epoch": 0.9124767225325885,
"grad_norm": 0.373046875,
"learning_rate": 6.076053957825411e-07,
"loss": 0.17169189453125,
"num_tokens": 117435287.0,
"step": 1470
},
{
"epoch": 0.9130974549968963,
"grad_norm": 0.447265625,
"learning_rate": 5.991349378057348e-07,
"loss": 0.2221527099609375,
"num_tokens": 117513198.0,
"step": 1471
},
{
"epoch": 0.9137181874612043,
"grad_norm": 0.345703125,
"learning_rate": 5.907227338784843e-07,
"loss": 0.147308349609375,
"num_tokens": 117596986.0,
"step": 1472
},
{
"epoch": 0.9143389199255121,
"grad_norm": 0.37890625,
"learning_rate": 5.823688180296477e-07,
"loss": 0.1605072021484375,
"num_tokens": 117675745.0,
"step": 1473
},
{
"epoch": 0.91495965238982,
"grad_norm": 0.43359375,
"learning_rate": 5.740732240522883e-07,
"loss": 0.2085418701171875,
"num_tokens": 117756575.0,
"step": 1474
},
{
"epoch": 0.9155803848541278,
"grad_norm": 0.373046875,
"learning_rate": 5.658359855035567e-07,
"loss": 0.1626129150390625,
"num_tokens": 117840473.0,
"step": 1475
},
{
"epoch": 0.9162011173184358,
"grad_norm": 0.43359375,
"learning_rate": 5.576571357045401e-07,
"loss": 0.190399169921875,
"num_tokens": 117916766.0,
"step": 1476
},
{
"epoch": 0.9168218497827436,
"grad_norm": 0.375,
"learning_rate": 5.495367077401353e-07,
"loss": 0.1690216064453125,
"num_tokens": 118002624.0,
"step": 1477
},
{
"epoch": 0.9174425822470516,
"grad_norm": 0.388671875,
"learning_rate": 5.414747344589111e-07,
"loss": 0.16597747802734375,
"num_tokens": 118083268.0,
"step": 1478
},
{
"epoch": 0.9180633147113594,
"grad_norm": 0.37109375,
"learning_rate": 5.33471248472977e-07,
"loss": 0.14627838134765625,
"num_tokens": 118160614.0,
"step": 1479
},
{
"epoch": 0.9186840471756673,
"grad_norm": 0.390625,
"learning_rate": 5.255262821578521e-07,
"loss": 0.17824935913085938,
"num_tokens": 118243952.0,
"step": 1480
},
{
"epoch": 0.9193047796399751,
"grad_norm": 0.455078125,
"learning_rate": 5.176398676523314e-07,
"loss": 0.231048583984375,
"num_tokens": 118322380.0,
"step": 1481
},
{
"epoch": 0.9199255121042831,
"grad_norm": 0.39453125,
"learning_rate": 5.098120368583559e-07,
"loss": 0.18029022216796875,
"num_tokens": 118403710.0,
"step": 1482
},
{
"epoch": 0.9205462445685909,
"grad_norm": 0.38671875,
"learning_rate": 5.020428214408912e-07,
"loss": 0.12561798095703125,
"num_tokens": 118476644.0,
"step": 1483
},
{
"epoch": 0.9211669770328988,
"grad_norm": 0.419921875,
"learning_rate": 4.943322528277894e-07,
"loss": 0.19466400146484375,
"num_tokens": 118559697.0,
"step": 1484
},
{
"epoch": 0.9217877094972067,
"grad_norm": 0.42578125,
"learning_rate": 4.866803622096638e-07,
"loss": 0.18054962158203125,
"num_tokens": 118633723.0,
"step": 1485
},
{
"epoch": 0.9224084419615146,
"grad_norm": 0.478515625,
"learning_rate": 4.790871805397695e-07,
"loss": 0.247344970703125,
"num_tokens": 118711044.0,
"step": 1486
},
{
"epoch": 0.9230291744258224,
"grad_norm": 0.361328125,
"learning_rate": 4.71552738533868e-07,
"loss": 0.167816162109375,
"num_tokens": 118799563.0,
"step": 1487
},
{
"epoch": 0.9236499068901304,
"grad_norm": 0.396484375,
"learning_rate": 4.6407706667011604e-07,
"loss": 0.18695068359375,
"num_tokens": 118881029.0,
"step": 1488
},
{
"epoch": 0.9242706393544382,
"grad_norm": 0.416015625,
"learning_rate": 4.5666019518892544e-07,
"loss": 0.218505859375,
"num_tokens": 118966788.0,
"step": 1489
},
{
"epoch": 0.9248913718187461,
"grad_norm": 0.4296875,
"learning_rate": 4.493021540928566e-07,
"loss": 0.1919097900390625,
"num_tokens": 119048862.0,
"step": 1490
},
{
"epoch": 0.925512104283054,
"grad_norm": 0.421875,
"learning_rate": 4.4200297314649196e-07,
"loss": 0.1617279052734375,
"num_tokens": 119120018.0,
"step": 1491
},
{
"epoch": 0.9261328367473619,
"grad_norm": 0.408203125,
"learning_rate": 4.347626818763062e-07,
"loss": 0.184967041015625,
"num_tokens": 119199971.0,
"step": 1492
},
{
"epoch": 0.9267535692116697,
"grad_norm": 0.4609375,
"learning_rate": 4.275813095705611e-07,
"loss": 0.2259674072265625,
"num_tokens": 119274715.0,
"step": 1493
},
{
"epoch": 0.9273743016759777,
"grad_norm": 0.3359375,
"learning_rate": 4.204588852791824e-07,
"loss": 0.131103515625,
"num_tokens": 119348041.0,
"step": 1494
},
{
"epoch": 0.9279950341402855,
"grad_norm": 0.40234375,
"learning_rate": 4.1339543781363343e-07,
"loss": 0.167022705078125,
"num_tokens": 119430405.0,
"step": 1495
},
{
"epoch": 0.9286157666045934,
"grad_norm": 0.421875,
"learning_rate": 4.063909957468148e-07,
"loss": 0.179901123046875,
"num_tokens": 119512610.0,
"step": 1496
},
{
"epoch": 0.9292364990689013,
"grad_norm": 0.419921875,
"learning_rate": 3.9944558741293325e-07,
"loss": 0.191131591796875,
"num_tokens": 119588713.0,
"step": 1497
},
{
"epoch": 0.9298572315332092,
"grad_norm": 0.412109375,
"learning_rate": 3.9255924090739306e-07,
"loss": 0.171417236328125,
"num_tokens": 119669482.0,
"step": 1498
},
{
"epoch": 0.930477963997517,
"grad_norm": 0.326171875,
"learning_rate": 3.85731984086688e-07,
"loss": 0.12468338012695312,
"num_tokens": 119749076.0,
"step": 1499
},
{
"epoch": 0.931098696461825,
"grad_norm": 0.41015625,
"learning_rate": 3.7896384456828136e-07,
"loss": 0.19616317749023438,
"num_tokens": 119828562.0,
"step": 1500
},
{
"epoch": 0.9317194289261328,
"grad_norm": 0.447265625,
"learning_rate": 3.7225484973049284e-07,
"loss": 0.203277587890625,
"num_tokens": 119904627.0,
"step": 1501
},
{
"epoch": 0.9323401613904407,
"grad_norm": 0.423828125,
"learning_rate": 3.656050267123984e-07,
"loss": 0.17144012451171875,
"num_tokens": 119980341.0,
"step": 1502
},
{
"epoch": 0.9329608938547486,
"grad_norm": 0.4375,
"learning_rate": 3.5901440241370875e-07,
"loss": 0.19316864013671875,
"num_tokens": 120053204.0,
"step": 1503
},
{
"epoch": 0.9335816263190565,
"grad_norm": 0.373046875,
"learning_rate": 3.524830034946647e-07,
"loss": 0.15491485595703125,
"num_tokens": 120135505.0,
"step": 1504
},
{
"epoch": 0.9342023587833643,
"grad_norm": 0.376953125,
"learning_rate": 3.4601085637593344e-07,
"loss": 0.132049560546875,
"num_tokens": 120215908.0,
"step": 1505
},
{
"epoch": 0.9348230912476723,
"grad_norm": 0.3359375,
"learning_rate": 3.395979872384958e-07,
"loss": 0.1197662353515625,
"num_tokens": 120303178.0,
"step": 1506
},
{
"epoch": 0.9354438237119801,
"grad_norm": 0.361328125,
"learning_rate": 3.332444220235442e-07,
"loss": 0.13343048095703125,
"num_tokens": 120384660.0,
"step": 1507
},
{
"epoch": 0.936064556176288,
"grad_norm": 0.427734375,
"learning_rate": 3.269501864323732e-07,
"loss": 0.1978912353515625,
"num_tokens": 120461966.0,
"step": 1508
},
{
"epoch": 0.9366852886405959,
"grad_norm": 0.380859375,
"learning_rate": 3.2071530592628076e-07,
"loss": 0.14560699462890625,
"num_tokens": 120538489.0,
"step": 1509
},
{
"epoch": 0.9373060211049038,
"grad_norm": 0.75390625,
"learning_rate": 3.145398057264637e-07,
"loss": 0.180206298828125,
"num_tokens": 120617202.0,
"step": 1510
},
{
"epoch": 0.9379267535692116,
"grad_norm": 0.453125,
"learning_rate": 3.084237108139143e-07,
"loss": 0.2194061279296875,
"num_tokens": 120698032.0,
"step": 1511
},
{
"epoch": 0.9385474860335196,
"grad_norm": 0.451171875,
"learning_rate": 3.0236704592931876e-07,
"loss": 0.20873260498046875,
"num_tokens": 120775036.0,
"step": 1512
},
{
"epoch": 0.9391682184978274,
"grad_norm": 0.408203125,
"learning_rate": 2.9636983557295716e-07,
"loss": 0.18021392822265625,
"num_tokens": 120854378.0,
"step": 1513
},
{
"epoch": 0.9397889509621353,
"grad_norm": 0.431640625,
"learning_rate": 2.904321040046104e-07,
"loss": 0.233978271484375,
"num_tokens": 120936978.0,
"step": 1514
},
{
"epoch": 0.9404096834264432,
"grad_norm": 0.37890625,
"learning_rate": 2.8455387524345513e-07,
"loss": 0.153228759765625,
"num_tokens": 121014313.0,
"step": 1515
},
{
"epoch": 0.9410304158907511,
"grad_norm": 0.404296875,
"learning_rate": 2.787351730679671e-07,
"loss": 0.200439453125,
"num_tokens": 121094212.0,
"step": 1516
},
{
"epoch": 0.9416511483550589,
"grad_norm": 0.46875,
"learning_rate": 2.729760210158233e-07,
"loss": 0.2059478759765625,
"num_tokens": 121168325.0,
"step": 1517
},
{
"epoch": 0.9422718808193669,
"grad_norm": 0.384765625,
"learning_rate": 2.672764423838181e-07,
"loss": 0.18526458740234375,
"num_tokens": 121255353.0,
"step": 1518
},
{
"epoch": 0.9428926132836747,
"grad_norm": 0.44921875,
"learning_rate": 2.6163646022775544e-07,
"loss": 0.188323974609375,
"num_tokens": 121328380.0,
"step": 1519
},
{
"epoch": 0.9435133457479826,
"grad_norm": 0.44140625,
"learning_rate": 2.560560973623588e-07,
"loss": 0.19647216796875,
"num_tokens": 121407339.0,
"step": 1520
},
{
"epoch": 0.9441340782122905,
"grad_norm": 0.40234375,
"learning_rate": 2.505353763611895e-07,
"loss": 0.17388153076171875,
"num_tokens": 121491153.0,
"step": 1521
},
{
"epoch": 0.9447548106765984,
"grad_norm": 0.427734375,
"learning_rate": 2.450743195565369e-07,
"loss": 0.218963623046875,
"num_tokens": 121572779.0,
"step": 1522
},
{
"epoch": 0.9453755431409062,
"grad_norm": 0.40234375,
"learning_rate": 2.396729490393468e-07,
"loss": 0.17543411254882812,
"num_tokens": 121649615.0,
"step": 1523
},
{
"epoch": 0.9459962756052142,
"grad_norm": 0.390625,
"learning_rate": 2.343312866591163e-07,
"loss": 0.175079345703125,
"num_tokens": 121731434.0,
"step": 1524
},
{
"epoch": 0.946617008069522,
"grad_norm": 0.400390625,
"learning_rate": 2.2904935402381755e-07,
"loss": 0.15227508544921875,
"num_tokens": 121807521.0,
"step": 1525
},
{
"epoch": 0.9472377405338299,
"grad_norm": 0.4609375,
"learning_rate": 2.2382717249980423e-07,
"loss": 0.2166595458984375,
"num_tokens": 121882228.0,
"step": 1526
},
{
"epoch": 0.9478584729981379,
"grad_norm": 0.49609375,
"learning_rate": 2.1866476321172334e-07,
"loss": 0.211517333984375,
"num_tokens": 121957461.0,
"step": 1527
},
{
"epoch": 0.9484792054624457,
"grad_norm": 0.4921875,
"learning_rate": 2.1356214704243537e-07,
"loss": 0.22247314453125,
"num_tokens": 122029914.0,
"step": 1528
},
{
"epoch": 0.9490999379267536,
"grad_norm": 0.3984375,
"learning_rate": 2.0851934463292922e-07,
"loss": 0.197418212890625,
"num_tokens": 122116350.0,
"step": 1529
},
{
"epoch": 0.9497206703910615,
"grad_norm": 0.3671875,
"learning_rate": 2.0353637638222898e-07,
"loss": 0.1572265625,
"num_tokens": 122196440.0,
"step": 1530
},
{
"epoch": 0.9503414028553694,
"grad_norm": 0.388671875,
"learning_rate": 1.986132624473258e-07,
"loss": 0.179290771484375,
"num_tokens": 122275699.0,
"step": 1531
},
{
"epoch": 0.9509621353196772,
"grad_norm": 0.42578125,
"learning_rate": 1.937500227430894e-07,
"loss": 0.190521240234375,
"num_tokens": 122360522.0,
"step": 1532
},
{
"epoch": 0.9515828677839852,
"grad_norm": 0.421875,
"learning_rate": 1.889466769421816e-07,
"loss": 0.1977996826171875,
"num_tokens": 122439298.0,
"step": 1533
},
{
"epoch": 0.952203600248293,
"grad_norm": 0.486328125,
"learning_rate": 1.842032444749897e-07,
"loss": 0.2313079833984375,
"num_tokens": 122509830.0,
"step": 1534
},
{
"epoch": 0.9528243327126009,
"grad_norm": 0.4375,
"learning_rate": 1.7951974452953989e-07,
"loss": 0.2130889892578125,
"num_tokens": 122589044.0,
"step": 1535
},
{
"epoch": 0.9534450651769087,
"grad_norm": 0.40234375,
"learning_rate": 1.7489619605141395e-07,
"loss": 0.19554901123046875,
"num_tokens": 122680765.0,
"step": 1536
},
{
"epoch": 0.9540657976412167,
"grad_norm": 0.33984375,
"learning_rate": 1.7033261774368758e-07,
"loss": 0.1403350830078125,
"num_tokens": 122760966.0,
"step": 1537
},
{
"epoch": 0.9546865301055245,
"grad_norm": 0.39453125,
"learning_rate": 1.658290280668423e-07,
"loss": 0.1837005615234375,
"num_tokens": 122847761.0,
"step": 1538
},
{
"epoch": 0.9553072625698324,
"grad_norm": 0.451171875,
"learning_rate": 1.6138544523869702e-07,
"loss": 0.20369720458984375,
"num_tokens": 122920265.0,
"step": 1539
},
{
"epoch": 0.9559279950341403,
"grad_norm": 0.384765625,
"learning_rate": 1.5700188723432984e-07,
"loss": 0.15799713134765625,
"num_tokens": 122999008.0,
"step": 1540
},
{
"epoch": 0.9565487274984482,
"grad_norm": 0.373046875,
"learning_rate": 1.5267837178600974e-07,
"loss": 0.16912841796875,
"num_tokens": 123091404.0,
"step": 1541
},
{
"epoch": 0.957169459962756,
"grad_norm": 0.357421875,
"learning_rate": 1.4841491638312167e-07,
"loss": 0.13803863525390625,
"num_tokens": 123171022.0,
"step": 1542
},
{
"epoch": 0.957790192427064,
"grad_norm": 0.42578125,
"learning_rate": 1.4421153827209987e-07,
"loss": 0.20819091796875,
"num_tokens": 123249917.0,
"step": 1543
},
{
"epoch": 0.9584109248913718,
"grad_norm": 0.419921875,
"learning_rate": 1.4006825445634975e-07,
"loss": 0.1942291259765625,
"num_tokens": 123334537.0,
"step": 1544
},
{
"epoch": 0.9590316573556797,
"grad_norm": 0.35546875,
"learning_rate": 1.3598508169619106e-07,
"loss": 0.13500213623046875,
"num_tokens": 123414558.0,
"step": 1545
},
{
"epoch": 0.9596523898199876,
"grad_norm": 0.431640625,
"learning_rate": 1.3196203650878148e-07,
"loss": 0.17513275146484375,
"num_tokens": 123487049.0,
"step": 1546
},
{
"epoch": 0.9602731222842955,
"grad_norm": 0.41796875,
"learning_rate": 1.2799913516804818e-07,
"loss": 0.208465576171875,
"num_tokens": 123577657.0,
"step": 1547
},
{
"epoch": 0.9608938547486033,
"grad_norm": 0.326171875,
"learning_rate": 1.2409639370463133e-07,
"loss": 0.12164306640625,
"num_tokens": 123656267.0,
"step": 1548
},
{
"epoch": 0.9615145872129113,
"grad_norm": 0.3984375,
"learning_rate": 1.2025382790580909e-07,
"loss": 0.1643524169921875,
"num_tokens": 123731282.0,
"step": 1549
},
{
"epoch": 0.9621353196772191,
"grad_norm": 0.408203125,
"learning_rate": 1.1647145331544263e-07,
"loss": 0.16950225830078125,
"num_tokens": 123803813.0,
"step": 1550
},
{
"epoch": 0.962756052141527,
"grad_norm": 0.375,
"learning_rate": 1.1274928523390626e-07,
"loss": 0.17138671875,
"num_tokens": 123886681.0,
"step": 1551
},
{
"epoch": 0.9633767846058349,
"grad_norm": 0.369140625,
"learning_rate": 1.0908733871802911e-07,
"loss": 0.12857818603515625,
"num_tokens": 123962326.0,
"step": 1552
},
{
"epoch": 0.9639975170701428,
"grad_norm": 0.427734375,
"learning_rate": 1.054856285810335e-07,
"loss": 0.19605255126953125,
"num_tokens": 124041304.0,
"step": 1553
},
{
"epoch": 0.9646182495344506,
"grad_norm": 0.458984375,
"learning_rate": 1.0194416939247497e-07,
"loss": 0.1844940185546875,
"num_tokens": 124114734.0,
"step": 1554
},
{
"epoch": 0.9652389819987586,
"grad_norm": 0.4375,
"learning_rate": 9.84629754781824e-08,
"loss": 0.227142333984375,
"num_tokens": 124198151.0,
"step": 1555
},
{
"epoch": 0.9658597144630664,
"grad_norm": 0.4140625,
"learning_rate": 9.504206092020129e-08,
"loss": 0.18299102783203125,
"num_tokens": 124277118.0,
"step": 1556
},
{
"epoch": 0.9664804469273743,
"grad_norm": 0.443359375,
"learning_rate": 9.168143955673892e-08,
"loss": 0.1861114501953125,
"num_tokens": 124351071.0,
"step": 1557
},
{
"epoch": 0.9671011793916822,
"grad_norm": 0.435546875,
"learning_rate": 8.83811249821026e-08,
"loss": 0.1986236572265625,
"num_tokens": 124429523.0,
"step": 1558
},
{
"epoch": 0.9677219118559901,
"grad_norm": 0.38671875,
"learning_rate": 8.514113054664984e-08,
"loss": 0.15753173828125,
"num_tokens": 124508850.0,
"step": 1559
},
{
"epoch": 0.9683426443202979,
"grad_norm": 0.421875,
"learning_rate": 8.196146935672999e-08,
"loss": 0.1880035400390625,
"num_tokens": 124583097.0,
"step": 1560
},
{
"epoch": 0.9689633767846059,
"grad_norm": 0.4765625,
"learning_rate": 7.884215427463758e-08,
"loss": 0.22265625,
"num_tokens": 124668189.0,
"step": 1561
},
{
"epoch": 0.9695841092489137,
"grad_norm": 0.416015625,
"learning_rate": 7.578319791855581e-08,
"loss": 0.17105865478515625,
"num_tokens": 124739966.0,
"step": 1562
},
{
"epoch": 0.9702048417132216,
"grad_norm": 0.443359375,
"learning_rate": 7.278461266250147e-08,
"loss": 0.209686279296875,
"num_tokens": 124822047.0,
"step": 1563
},
{
"epoch": 0.9708255741775295,
"grad_norm": 0.451171875,
"learning_rate": 6.984641063628506e-08,
"loss": 0.2391815185546875,
"num_tokens": 124904083.0,
"step": 1564
},
{
"epoch": 0.9714463066418374,
"grad_norm": 0.439453125,
"learning_rate": 6.696860372545244e-08,
"loss": 0.21942901611328125,
"num_tokens": 124984580.0,
"step": 1565
},
{
"epoch": 0.9720670391061452,
"grad_norm": 0.427734375,
"learning_rate": 6.415120357124493e-08,
"loss": 0.2217254638671875,
"num_tokens": 125063992.0,
"step": 1566
},
{
"epoch": 0.9726877715704532,
"grad_norm": 0.41015625,
"learning_rate": 6.139422157054265e-08,
"loss": 0.1790771484375,
"num_tokens": 125141703.0,
"step": 1567
},
{
"epoch": 0.973308504034761,
"grad_norm": 0.41796875,
"learning_rate": 5.869766887582784e-08,
"loss": 0.193359375,
"num_tokens": 125221017.0,
"step": 1568
},
{
"epoch": 0.9739292364990689,
"grad_norm": 0.369140625,
"learning_rate": 5.6061556395131665e-08,
"loss": 0.15718841552734375,
"num_tokens": 125304963.0,
"step": 1569
},
{
"epoch": 0.9745499689633768,
"grad_norm": 0.416015625,
"learning_rate": 5.348589479199917e-08,
"loss": 0.17581939697265625,
"num_tokens": 125382152.0,
"step": 1570
},
{
"epoch": 0.9751707014276847,
"grad_norm": 0.43359375,
"learning_rate": 5.0970694485434346e-08,
"loss": 0.209014892578125,
"num_tokens": 125466570.0,
"step": 1571
},
{
"epoch": 0.9757914338919925,
"grad_norm": 0.361328125,
"learning_rate": 4.8515965649870155e-08,
"loss": 0.1434173583984375,
"num_tokens": 125546768.0,
"step": 1572
},
{
"epoch": 0.9764121663563005,
"grad_norm": 0.419921875,
"learning_rate": 4.6121718215118566e-08,
"loss": 0.2110137939453125,
"num_tokens": 125631483.0,
"step": 1573
},
{
"epoch": 0.9770328988206083,
"grad_norm": 0.431640625,
"learning_rate": 4.3787961866333935e-08,
"loss": 0.19290924072265625,
"num_tokens": 125711555.0,
"step": 1574
},
{
"epoch": 0.9776536312849162,
"grad_norm": 0.41015625,
"learning_rate": 4.151470604397467e-08,
"loss": 0.168670654296875,
"num_tokens": 125786649.0,
"step": 1575
},
{
"epoch": 0.978274363749224,
"grad_norm": 0.419921875,
"learning_rate": 3.930195994376329e-08,
"loss": 0.190093994140625,
"num_tokens": 125869743.0,
"step": 1576
},
{
"epoch": 0.978895096213532,
"grad_norm": 0.38671875,
"learning_rate": 3.714973251664977e-08,
"loss": 0.1556854248046875,
"num_tokens": 125951316.0,
"step": 1577
},
{
"epoch": 0.9795158286778398,
"grad_norm": 0.365234375,
"learning_rate": 3.505803246877326e-08,
"loss": 0.13401031494140625,
"num_tokens": 126032193.0,
"step": 1578
},
{
"epoch": 0.9801365611421478,
"grad_norm": 0.458984375,
"learning_rate": 3.3026868261433754e-08,
"loss": 0.23333740234375,
"num_tokens": 126111506.0,
"step": 1579
},
{
"epoch": 0.9807572936064556,
"grad_norm": 0.431640625,
"learning_rate": 3.105624811104879e-08,
"loss": 0.2361297607421875,
"num_tokens": 126197466.0,
"step": 1580
},
{
"epoch": 0.9813780260707635,
"grad_norm": 0.490234375,
"learning_rate": 2.914617998912683e-08,
"loss": 0.19991302490234375,
"num_tokens": 126267247.0,
"step": 1581
},
{
"epoch": 0.9819987585350713,
"grad_norm": 0.44140625,
"learning_rate": 2.729667162222893e-08,
"loss": 0.200286865234375,
"num_tokens": 126347281.0,
"step": 1582
},
{
"epoch": 0.9826194909993793,
"grad_norm": 0.48046875,
"learning_rate": 2.55077304919471e-08,
"loss": 0.237945556640625,
"num_tokens": 126428027.0,
"step": 1583
},
{
"epoch": 0.9832402234636871,
"grad_norm": 0.396484375,
"learning_rate": 2.3779363834864344e-08,
"loss": 0.1669158935546875,
"num_tokens": 126510422.0,
"step": 1584
},
{
"epoch": 0.983860955927995,
"grad_norm": 0.390625,
"learning_rate": 2.2111578642527997e-08,
"loss": 0.164306640625,
"num_tokens": 126585863.0,
"step": 1585
},
{
"epoch": 0.9844816883923029,
"grad_norm": 0.39453125,
"learning_rate": 2.050438166142643e-08,
"loss": 0.180206298828125,
"num_tokens": 126667421.0,
"step": 1586
},
{
"epoch": 0.9851024208566108,
"grad_norm": 0.46875,
"learning_rate": 1.8957779392955732e-08,
"loss": 0.2297210693359375,
"num_tokens": 126751726.0,
"step": 1587
},
{
"epoch": 0.9857231533209186,
"grad_norm": 0.3203125,
"learning_rate": 1.7471778093396395e-08,
"loss": 0.10770416259765625,
"num_tokens": 126833265.0,
"step": 1588
},
{
"epoch": 0.9863438857852266,
"grad_norm": 0.4296875,
"learning_rate": 1.6046383773885008e-08,
"loss": 0.1649017333984375,
"num_tokens": 126907564.0,
"step": 1589
},
{
"epoch": 0.9869646182495344,
"grad_norm": 0.39453125,
"learning_rate": 1.4681602200395938e-08,
"loss": 0.20687103271484375,
"num_tokens": 126990184.0,
"step": 1590
},
{
"epoch": 0.9875853507138423,
"grad_norm": 0.421875,
"learning_rate": 1.3377438893711347e-08,
"loss": 0.194854736328125,
"num_tokens": 127065596.0,
"step": 1591
},
{
"epoch": 0.9882060831781502,
"grad_norm": 0.39453125,
"learning_rate": 1.2133899129402882e-08,
"loss": 0.17165374755859375,
"num_tokens": 127141734.0,
"step": 1592
},
{
"epoch": 0.9888268156424581,
"grad_norm": 0.41015625,
"learning_rate": 1.095098793781002e-08,
"loss": 0.1893463134765625,
"num_tokens": 127222708.0,
"step": 1593
},
{
"epoch": 0.9894475481067659,
"grad_norm": 0.36328125,
"learning_rate": 9.828710104018424e-09,
"loss": 0.1423492431640625,
"num_tokens": 127303578.0,
"step": 1594
},
{
"epoch": 0.9900682805710739,
"grad_norm": 0.451171875,
"learning_rate": 8.767070167838287e-09,
"loss": 0.2303009033203125,
"num_tokens": 127384163.0,
"step": 1595
},
{
"epoch": 0.9906890130353817,
"grad_norm": 0.404296875,
"learning_rate": 7.76607242379268e-09,
"loss": 0.16613006591796875,
"num_tokens": 127468046.0,
"step": 1596
},
{
"epoch": 0.9913097454996896,
"grad_norm": 0.43359375,
"learning_rate": 6.825720921094236e-09,
"loss": 0.210845947265625,
"num_tokens": 127551436.0,
"step": 1597
},
{
"epoch": 0.9919304779639975,
"grad_norm": 0.4375,
"learning_rate": 5.946019463631824e-09,
"loss": 0.176361083984375,
"num_tokens": 127628295.0,
"step": 1598
},
{
"epoch": 0.9925512104283054,
"grad_norm": 0.40625,
"learning_rate": 5.126971609952235e-09,
"loss": 0.1775970458984375,
"num_tokens": 127709813.0,
"step": 1599
},
{
"epoch": 0.9931719428926132,
"grad_norm": 0.404296875,
"learning_rate": 4.368580673251854e-09,
"loss": 0.1641998291015625,
"num_tokens": 127788013.0,
"step": 1600
},
{
"epoch": 0.9937926753569212,
"grad_norm": 0.451171875,
"learning_rate": 3.6708497213550074e-09,
"loss": 0.22100830078125,
"num_tokens": 127864301.0,
"step": 1601
},
{
"epoch": 0.994413407821229,
"grad_norm": 0.482421875,
"learning_rate": 3.033781576705641e-09,
"loss": 0.238128662109375,
"num_tokens": 127941392.0,
"step": 1602
},
{
"epoch": 0.9950341402855369,
"grad_norm": 0.46875,
"learning_rate": 2.4573788163589906e-09,
"loss": 0.24317169189453125,
"num_tokens": 128021185.0,
"step": 1603
},
{
"epoch": 0.9956548727498448,
"grad_norm": 0.431640625,
"learning_rate": 1.9416437719665946e-09,
"loss": 0.1907958984375,
"num_tokens": 128099875.0,
"step": 1604
},
{
"epoch": 0.9962756052141527,
"grad_norm": 0.43359375,
"learning_rate": 1.4865785297646373e-09,
"loss": 0.205413818359375,
"num_tokens": 128183898.0,
"step": 1605
},
{
"epoch": 0.9968963376784605,
"grad_norm": 0.388671875,
"learning_rate": 1.092184930577278e-09,
"loss": 0.161468505859375,
"num_tokens": 128262383.0,
"step": 1606
},
{
"epoch": 0.9975170701427685,
"grad_norm": 0.396484375,
"learning_rate": 7.584645697933379e-10,
"loss": 0.18444061279296875,
"num_tokens": 128344933.0,
"step": 1607
},
{
"epoch": 0.9981378026070763,
"grad_norm": 0.416015625,
"learning_rate": 4.854187973712954e-10,
"loss": 0.203826904296875,
"num_tokens": 128429729.0,
"step": 1608
},
{
"epoch": 0.9987585350713842,
"grad_norm": 0.48828125,
"learning_rate": 2.730487178309593e-10,
"loss": 0.22216796875,
"num_tokens": 128506793.0,
"step": 1609
},
{
"epoch": 0.9993792675356921,
"grad_norm": 0.416015625,
"learning_rate": 1.2135519024514264e-10,
"loss": 0.16802978515625,
"num_tokens": 128584548.0,
"step": 1610
},
{
"epoch": 1.0,
"grad_norm": 0.33203125,
"learning_rate": 3.033882824299283e-11,
"loss": 0.10080718994140625,
"num_tokens": 128666899.0,
"step": 1611
}
],
"logging_steps": 1,
"max_steps": 1611,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.206218535574333e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}