xyaoNV's picture
Upload folder using huggingface_hub
5e5002d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 68.4931506849315,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03424657534246575,
"grad_norm": 6.8509087562561035,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.9799,
"step": 10
},
{
"epoch": 0.0684931506849315,
"grad_norm": 6.680306911468506,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.9562,
"step": 20
},
{
"epoch": 0.10273972602739725,
"grad_norm": 4.74023962020874,
"learning_rate": 3e-06,
"loss": 1.6955,
"step": 30
},
{
"epoch": 0.136986301369863,
"grad_norm": 5.3383073806762695,
"learning_rate": 4.000000000000001e-06,
"loss": 1.7233,
"step": 40
},
{
"epoch": 0.17123287671232876,
"grad_norm": 4.325743675231934,
"learning_rate": 5e-06,
"loss": 1.4227,
"step": 50
},
{
"epoch": 0.2054794520547945,
"grad_norm": 4.414618968963623,
"learning_rate": 6e-06,
"loss": 1.0638,
"step": 60
},
{
"epoch": 0.23972602739726026,
"grad_norm": 2.4248571395874023,
"learning_rate": 7.000000000000001e-06,
"loss": 0.7888,
"step": 70
},
{
"epoch": 0.273972602739726,
"grad_norm": 1.6021969318389893,
"learning_rate": 8.000000000000001e-06,
"loss": 0.535,
"step": 80
},
{
"epoch": 0.3082191780821918,
"grad_norm": 0.989554762840271,
"learning_rate": 9e-06,
"loss": 0.3166,
"step": 90
},
{
"epoch": 0.3424657534246575,
"grad_norm": 0.5975518822669983,
"learning_rate": 1e-05,
"loss": 0.2056,
"step": 100
},
{
"epoch": 0.3767123287671233,
"grad_norm": 0.40622058510780334,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.1534,
"step": 110
},
{
"epoch": 0.410958904109589,
"grad_norm": 0.2416389435529709,
"learning_rate": 1.2e-05,
"loss": 0.1101,
"step": 120
},
{
"epoch": 0.4452054794520548,
"grad_norm": 0.21470960974693298,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0981,
"step": 130
},
{
"epoch": 0.4794520547945205,
"grad_norm": 0.1605301946401596,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0844,
"step": 140
},
{
"epoch": 0.5136986301369864,
"grad_norm": 0.16127008199691772,
"learning_rate": 1.5e-05,
"loss": 0.0787,
"step": 150
},
{
"epoch": 0.547945205479452,
"grad_norm": 0.14680036902427673,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0747,
"step": 160
},
{
"epoch": 0.5821917808219178,
"grad_norm": 0.1104154884815216,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0646,
"step": 170
},
{
"epoch": 0.6164383561643836,
"grad_norm": 0.12858060002326965,
"learning_rate": 1.8e-05,
"loss": 0.0585,
"step": 180
},
{
"epoch": 0.6506849315068494,
"grad_norm": 0.10955977439880371,
"learning_rate": 1.9e-05,
"loss": 0.0518,
"step": 190
},
{
"epoch": 0.684931506849315,
"grad_norm": 0.1255006641149521,
"learning_rate": 2e-05,
"loss": 0.0532,
"step": 200
},
{
"epoch": 0.7191780821917808,
"grad_norm": 0.10987284034490585,
"learning_rate": 2.1e-05,
"loss": 0.0489,
"step": 210
},
{
"epoch": 0.7534246575342466,
"grad_norm": 0.12291987985372543,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0457,
"step": 220
},
{
"epoch": 0.7876712328767124,
"grad_norm": 0.11342420428991318,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0487,
"step": 230
},
{
"epoch": 0.821917808219178,
"grad_norm": 0.09843714535236359,
"learning_rate": 2.4e-05,
"loss": 0.0452,
"step": 240
},
{
"epoch": 0.8561643835616438,
"grad_norm": 0.10258728265762329,
"learning_rate": 2.5e-05,
"loss": 0.0391,
"step": 250
},
{
"epoch": 0.8904109589041096,
"grad_norm": 0.159352108836174,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0354,
"step": 260
},
{
"epoch": 0.9246575342465754,
"grad_norm": 0.13461743295192719,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0373,
"step": 270
},
{
"epoch": 0.958904109589041,
"grad_norm": 0.11037391424179077,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0348,
"step": 280
},
{
"epoch": 0.9931506849315068,
"grad_norm": 0.1120462641119957,
"learning_rate": 2.9e-05,
"loss": 0.0361,
"step": 290
},
{
"epoch": 1.0273972602739727,
"grad_norm": 0.11409150063991547,
"learning_rate": 3e-05,
"loss": 0.0317,
"step": 300
},
{
"epoch": 1.0616438356164384,
"grad_norm": 0.11472675949335098,
"learning_rate": 3.1e-05,
"loss": 0.0286,
"step": 310
},
{
"epoch": 1.095890410958904,
"grad_norm": 0.1504867672920227,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0285,
"step": 320
},
{
"epoch": 1.13013698630137,
"grad_norm": 0.14106976985931396,
"learning_rate": 3.3e-05,
"loss": 0.0293,
"step": 330
},
{
"epoch": 1.1643835616438356,
"grad_norm": 0.09240074455738068,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.026,
"step": 340
},
{
"epoch": 1.1986301369863013,
"grad_norm": 0.09657058119773865,
"learning_rate": 3.5e-05,
"loss": 0.0254,
"step": 350
},
{
"epoch": 1.2328767123287672,
"grad_norm": 0.11131025850772858,
"learning_rate": 3.6e-05,
"loss": 0.0298,
"step": 360
},
{
"epoch": 1.2671232876712328,
"grad_norm": 0.09079006314277649,
"learning_rate": 3.7e-05,
"loss": 0.0231,
"step": 370
},
{
"epoch": 1.3013698630136985,
"grad_norm": 0.10090917348861694,
"learning_rate": 3.8e-05,
"loss": 0.0279,
"step": 380
},
{
"epoch": 1.3356164383561644,
"grad_norm": 0.10513892769813538,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0245,
"step": 390
},
{
"epoch": 1.36986301369863,
"grad_norm": 0.10360024124383926,
"learning_rate": 4e-05,
"loss": 0.025,
"step": 400
},
{
"epoch": 1.404109589041096,
"grad_norm": 0.09798359870910645,
"learning_rate": 4.1e-05,
"loss": 0.0238,
"step": 410
},
{
"epoch": 1.4383561643835616,
"grad_norm": 0.099854975938797,
"learning_rate": 4.2e-05,
"loss": 0.0229,
"step": 420
},
{
"epoch": 1.4726027397260273,
"grad_norm": 0.14537930488586426,
"learning_rate": 4.3e-05,
"loss": 0.0251,
"step": 430
},
{
"epoch": 1.5068493150684932,
"grad_norm": 0.13373461365699768,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0229,
"step": 440
},
{
"epoch": 1.541095890410959,
"grad_norm": 0.14300961792469025,
"learning_rate": 4.5e-05,
"loss": 0.0203,
"step": 450
},
{
"epoch": 1.5753424657534247,
"grad_norm": 0.10283118486404419,
"learning_rate": 4.600000000000001e-05,
"loss": 0.0186,
"step": 460
},
{
"epoch": 1.6095890410958904,
"grad_norm": 0.1506546288728714,
"learning_rate": 4.7e-05,
"loss": 0.0189,
"step": 470
},
{
"epoch": 1.643835616438356,
"grad_norm": 0.12016734480857849,
"learning_rate": 4.8e-05,
"loss": 0.0211,
"step": 480
},
{
"epoch": 1.678082191780822,
"grad_norm": 0.1319936364889145,
"learning_rate": 4.9e-05,
"loss": 0.0208,
"step": 490
},
{
"epoch": 1.7123287671232876,
"grad_norm": 0.11739251017570496,
"learning_rate": 5e-05,
"loss": 0.0234,
"step": 500
},
{
"epoch": 1.7465753424657535,
"grad_norm": 0.1280950903892517,
"learning_rate": 5.1000000000000006e-05,
"loss": 0.0196,
"step": 510
},
{
"epoch": 1.7808219178082192,
"grad_norm": 0.1243286281824112,
"learning_rate": 5.2000000000000004e-05,
"loss": 0.022,
"step": 520
},
{
"epoch": 1.8150684931506849,
"grad_norm": 0.08275436609983444,
"learning_rate": 5.300000000000001e-05,
"loss": 0.0185,
"step": 530
},
{
"epoch": 1.8493150684931505,
"grad_norm": 0.1354629397392273,
"learning_rate": 5.4000000000000005e-05,
"loss": 0.0167,
"step": 540
},
{
"epoch": 1.8835616438356164,
"grad_norm": 0.14976277947425842,
"learning_rate": 5.500000000000001e-05,
"loss": 0.0187,
"step": 550
},
{
"epoch": 1.9178082191780823,
"grad_norm": 0.15114262700080872,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.0174,
"step": 560
},
{
"epoch": 1.952054794520548,
"grad_norm": 0.24440935254096985,
"learning_rate": 5.6999999999999996e-05,
"loss": 0.0189,
"step": 570
},
{
"epoch": 1.9863013698630136,
"grad_norm": 0.11309883743524551,
"learning_rate": 5.8e-05,
"loss": 0.0189,
"step": 580
},
{
"epoch": 2.0205479452054793,
"grad_norm": 0.14310362935066223,
"learning_rate": 5.9e-05,
"loss": 0.0202,
"step": 590
},
{
"epoch": 2.0547945205479454,
"grad_norm": 0.15003430843353271,
"learning_rate": 6e-05,
"loss": 0.0195,
"step": 600
},
{
"epoch": 2.089041095890411,
"grad_norm": 0.0980050340294838,
"learning_rate": 6.1e-05,
"loss": 0.0153,
"step": 610
},
{
"epoch": 2.1232876712328768,
"grad_norm": 0.1242208257317543,
"learning_rate": 6.2e-05,
"loss": 0.0167,
"step": 620
},
{
"epoch": 2.1575342465753424,
"grad_norm": 0.11008936911821365,
"learning_rate": 6.3e-05,
"loss": 0.0156,
"step": 630
},
{
"epoch": 2.191780821917808,
"grad_norm": 0.16221505403518677,
"learning_rate": 6.400000000000001e-05,
"loss": 0.0189,
"step": 640
},
{
"epoch": 2.2260273972602738,
"grad_norm": 0.12388405948877335,
"learning_rate": 6.500000000000001e-05,
"loss": 0.0168,
"step": 650
},
{
"epoch": 2.26027397260274,
"grad_norm": 0.125056654214859,
"learning_rate": 6.6e-05,
"loss": 0.0181,
"step": 660
},
{
"epoch": 2.2945205479452055,
"grad_norm": 0.13683508336544037,
"learning_rate": 6.7e-05,
"loss": 0.0134,
"step": 670
},
{
"epoch": 2.328767123287671,
"grad_norm": 0.11894214898347855,
"learning_rate": 6.800000000000001e-05,
"loss": 0.0186,
"step": 680
},
{
"epoch": 2.363013698630137,
"grad_norm": 0.09277641028165817,
"learning_rate": 6.9e-05,
"loss": 0.0146,
"step": 690
},
{
"epoch": 2.3972602739726026,
"grad_norm": 0.12442376464605331,
"learning_rate": 7e-05,
"loss": 0.013,
"step": 700
},
{
"epoch": 2.4315068493150687,
"grad_norm": 0.11001459509134293,
"learning_rate": 7.1e-05,
"loss": 0.0135,
"step": 710
},
{
"epoch": 2.4657534246575343,
"grad_norm": 0.1414063721895218,
"learning_rate": 7.2e-05,
"loss": 0.0164,
"step": 720
},
{
"epoch": 2.5,
"grad_norm": 0.1333468109369278,
"learning_rate": 7.3e-05,
"loss": 0.0142,
"step": 730
},
{
"epoch": 2.5342465753424657,
"grad_norm": 0.11644615232944489,
"learning_rate": 7.4e-05,
"loss": 0.0129,
"step": 740
},
{
"epoch": 2.5684931506849313,
"grad_norm": 0.14134648442268372,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0145,
"step": 750
},
{
"epoch": 2.602739726027397,
"grad_norm": 0.1172047108411789,
"learning_rate": 7.6e-05,
"loss": 0.0145,
"step": 760
},
{
"epoch": 2.636986301369863,
"grad_norm": 0.12389740347862244,
"learning_rate": 7.7e-05,
"loss": 0.0147,
"step": 770
},
{
"epoch": 2.671232876712329,
"grad_norm": 0.1197102963924408,
"learning_rate": 7.800000000000001e-05,
"loss": 0.0152,
"step": 780
},
{
"epoch": 2.7054794520547945,
"grad_norm": 0.14396944642066956,
"learning_rate": 7.900000000000001e-05,
"loss": 0.0148,
"step": 790
},
{
"epoch": 2.73972602739726,
"grad_norm": 0.19851979613304138,
"learning_rate": 8e-05,
"loss": 0.0145,
"step": 800
},
{
"epoch": 2.7739726027397262,
"grad_norm": 0.11773121356964111,
"learning_rate": 8.1e-05,
"loss": 0.0122,
"step": 810
},
{
"epoch": 2.808219178082192,
"grad_norm": 0.1518307477235794,
"learning_rate": 8.2e-05,
"loss": 0.0143,
"step": 820
},
{
"epoch": 2.8424657534246576,
"grad_norm": 0.12196581810712814,
"learning_rate": 8.3e-05,
"loss": 0.0119,
"step": 830
},
{
"epoch": 2.8767123287671232,
"grad_norm": 0.13978245854377747,
"learning_rate": 8.4e-05,
"loss": 0.0142,
"step": 840
},
{
"epoch": 2.910958904109589,
"grad_norm": 0.1492251455783844,
"learning_rate": 8.5e-05,
"loss": 0.0145,
"step": 850
},
{
"epoch": 2.9452054794520546,
"grad_norm": 0.14013636112213135,
"learning_rate": 8.6e-05,
"loss": 0.0136,
"step": 860
},
{
"epoch": 2.9794520547945207,
"grad_norm": 0.10790041089057922,
"learning_rate": 8.7e-05,
"loss": 0.0122,
"step": 870
},
{
"epoch": 3.0136986301369864,
"grad_norm": 0.10169006884098053,
"learning_rate": 8.800000000000001e-05,
"loss": 0.0111,
"step": 880
},
{
"epoch": 3.047945205479452,
"grad_norm": 0.10675999522209167,
"learning_rate": 8.900000000000001e-05,
"loss": 0.0163,
"step": 890
},
{
"epoch": 3.0821917808219177,
"grad_norm": 0.14085350930690765,
"learning_rate": 9e-05,
"loss": 0.0145,
"step": 900
},
{
"epoch": 3.1164383561643834,
"grad_norm": 0.14714281260967255,
"learning_rate": 9.1e-05,
"loss": 0.0127,
"step": 910
},
{
"epoch": 3.1506849315068495,
"grad_norm": 0.17444485425949097,
"learning_rate": 9.200000000000001e-05,
"loss": 0.0118,
"step": 920
},
{
"epoch": 3.184931506849315,
"grad_norm": 0.11419258266687393,
"learning_rate": 9.300000000000001e-05,
"loss": 0.0116,
"step": 930
},
{
"epoch": 3.219178082191781,
"grad_norm": 0.112928606569767,
"learning_rate": 9.4e-05,
"loss": 0.0118,
"step": 940
},
{
"epoch": 3.2534246575342465,
"grad_norm": 0.09506986290216446,
"learning_rate": 9.5e-05,
"loss": 0.0121,
"step": 950
},
{
"epoch": 3.287671232876712,
"grad_norm": 0.10540028661489487,
"learning_rate": 9.6e-05,
"loss": 0.0108,
"step": 960
},
{
"epoch": 3.3219178082191783,
"grad_norm": 0.09384490549564362,
"learning_rate": 9.7e-05,
"loss": 0.0138,
"step": 970
},
{
"epoch": 3.356164383561644,
"grad_norm": 0.09561273455619812,
"learning_rate": 9.8e-05,
"loss": 0.0111,
"step": 980
},
{
"epoch": 3.3904109589041096,
"grad_norm": 0.10197819769382477,
"learning_rate": 9.900000000000001e-05,
"loss": 0.0129,
"step": 990
},
{
"epoch": 3.4246575342465753,
"grad_norm": 0.11337673664093018,
"learning_rate": 0.0001,
"loss": 0.0105,
"step": 1000
},
{
"epoch": 3.458904109589041,
"grad_norm": 0.09718764573335648,
"learning_rate": 9.999993165095463e-05,
"loss": 0.0122,
"step": 1010
},
{
"epoch": 3.493150684931507,
"grad_norm": 0.08433857560157776,
"learning_rate": 9.999972660400536e-05,
"loss": 0.0116,
"step": 1020
},
{
"epoch": 3.5273972602739727,
"grad_norm": 0.09182614833116531,
"learning_rate": 9.999938485971279e-05,
"loss": 0.0122,
"step": 1030
},
{
"epoch": 3.5616438356164384,
"grad_norm": 0.10014256834983826,
"learning_rate": 9.999890641901125e-05,
"loss": 0.0122,
"step": 1040
},
{
"epoch": 3.595890410958904,
"grad_norm": 0.1171124204993248,
"learning_rate": 9.999829128320874e-05,
"loss": 0.0122,
"step": 1050
},
{
"epoch": 3.6301369863013697,
"grad_norm": 0.12046120315790176,
"learning_rate": 9.999753945398704e-05,
"loss": 0.0113,
"step": 1060
},
{
"epoch": 3.6643835616438354,
"grad_norm": 0.1236460953950882,
"learning_rate": 9.999665093340165e-05,
"loss": 0.0129,
"step": 1070
},
{
"epoch": 3.6986301369863015,
"grad_norm": 0.0972442626953125,
"learning_rate": 9.99956257238817e-05,
"loss": 0.0096,
"step": 1080
},
{
"epoch": 3.732876712328767,
"grad_norm": 0.12523873150348663,
"learning_rate": 9.999446382823013e-05,
"loss": 0.0124,
"step": 1090
},
{
"epoch": 3.767123287671233,
"grad_norm": 0.12162143737077713,
"learning_rate": 9.999316524962345e-05,
"loss": 0.0098,
"step": 1100
},
{
"epoch": 3.8013698630136985,
"grad_norm": 0.12172838300466537,
"learning_rate": 9.999172999161198e-05,
"loss": 0.0101,
"step": 1110
},
{
"epoch": 3.8356164383561646,
"grad_norm": 0.10995998233556747,
"learning_rate": 9.999015805811965e-05,
"loss": 0.0112,
"step": 1120
},
{
"epoch": 3.8698630136986303,
"grad_norm": 0.13435988128185272,
"learning_rate": 9.998844945344405e-05,
"loss": 0.0147,
"step": 1130
},
{
"epoch": 3.904109589041096,
"grad_norm": 0.1312176138162613,
"learning_rate": 9.998660418225645e-05,
"loss": 0.0125,
"step": 1140
},
{
"epoch": 3.9383561643835616,
"grad_norm": 0.11387414485216141,
"learning_rate": 9.998462224960175e-05,
"loss": 0.0104,
"step": 1150
},
{
"epoch": 3.9726027397260273,
"grad_norm": 0.1328994333744049,
"learning_rate": 9.998250366089848e-05,
"loss": 0.0116,
"step": 1160
},
{
"epoch": 4.006849315068493,
"grad_norm": 0.0746927261352539,
"learning_rate": 9.998024842193876e-05,
"loss": 0.0125,
"step": 1170
},
{
"epoch": 4.041095890410959,
"grad_norm": 0.09642993658781052,
"learning_rate": 9.997785653888835e-05,
"loss": 0.0116,
"step": 1180
},
{
"epoch": 4.075342465753424,
"grad_norm": 0.10804189741611481,
"learning_rate": 9.997532801828658e-05,
"loss": 0.0112,
"step": 1190
},
{
"epoch": 4.109589041095891,
"grad_norm": 0.096079520881176,
"learning_rate": 9.997266286704631e-05,
"loss": 0.0114,
"step": 1200
},
{
"epoch": 4.1438356164383565,
"grad_norm": 0.10320870578289032,
"learning_rate": 9.996986109245395e-05,
"loss": 0.0096,
"step": 1210
},
{
"epoch": 4.178082191780822,
"grad_norm": 0.11493821442127228,
"learning_rate": 9.996692270216947e-05,
"loss": 0.011,
"step": 1220
},
{
"epoch": 4.212328767123288,
"grad_norm": 0.11878206580877304,
"learning_rate": 9.996384770422629e-05,
"loss": 0.0118,
"step": 1230
},
{
"epoch": 4.2465753424657535,
"grad_norm": 0.07943862676620483,
"learning_rate": 9.996063610703137e-05,
"loss": 0.0119,
"step": 1240
},
{
"epoch": 4.280821917808219,
"grad_norm": 0.09945517033338547,
"learning_rate": 9.995728791936504e-05,
"loss": 0.0112,
"step": 1250
},
{
"epoch": 4.315068493150685,
"grad_norm": 0.10141076892614365,
"learning_rate": 9.995380315038119e-05,
"loss": 0.0115,
"step": 1260
},
{
"epoch": 4.3493150684931505,
"grad_norm": 0.08885636180639267,
"learning_rate": 9.9950181809607e-05,
"loss": 0.0096,
"step": 1270
},
{
"epoch": 4.383561643835616,
"grad_norm": 0.08274652808904648,
"learning_rate": 9.994642390694308e-05,
"loss": 0.0114,
"step": 1280
},
{
"epoch": 4.417808219178082,
"grad_norm": 0.14571622014045715,
"learning_rate": 9.99425294526634e-05,
"loss": 0.0107,
"step": 1290
},
{
"epoch": 4.4520547945205475,
"grad_norm": 0.08620978891849518,
"learning_rate": 9.993849845741524e-05,
"loss": 0.0097,
"step": 1300
},
{
"epoch": 4.486301369863014,
"grad_norm": 0.10268136858940125,
"learning_rate": 9.99343309322192e-05,
"loss": 0.0095,
"step": 1310
},
{
"epoch": 4.52054794520548,
"grad_norm": 0.08594616502523422,
"learning_rate": 9.993002688846913e-05,
"loss": 0.0103,
"step": 1320
},
{
"epoch": 4.554794520547945,
"grad_norm": 0.09446701407432556,
"learning_rate": 9.992558633793212e-05,
"loss": 0.0096,
"step": 1330
},
{
"epoch": 4.589041095890411,
"grad_norm": 0.1126297116279602,
"learning_rate": 9.992100929274846e-05,
"loss": 0.0096,
"step": 1340
},
{
"epoch": 4.623287671232877,
"grad_norm": 0.09884592145681381,
"learning_rate": 9.991629576543163e-05,
"loss": 0.0102,
"step": 1350
},
{
"epoch": 4.657534246575342,
"grad_norm": 0.10322685539722443,
"learning_rate": 9.991144576886823e-05,
"loss": 0.0108,
"step": 1360
},
{
"epoch": 4.691780821917808,
"grad_norm": 0.14780113101005554,
"learning_rate": 9.990645931631796e-05,
"loss": 0.0104,
"step": 1370
},
{
"epoch": 4.726027397260274,
"grad_norm": 0.11470142006874084,
"learning_rate": 9.990133642141359e-05,
"loss": 0.0103,
"step": 1380
},
{
"epoch": 4.760273972602739,
"grad_norm": 0.1362563967704773,
"learning_rate": 9.989607709816091e-05,
"loss": 0.0105,
"step": 1390
},
{
"epoch": 4.794520547945205,
"grad_norm": 0.12485076487064362,
"learning_rate": 9.989068136093873e-05,
"loss": 0.0108,
"step": 1400
},
{
"epoch": 4.828767123287671,
"grad_norm": 0.13635548949241638,
"learning_rate": 9.988514922449879e-05,
"loss": 0.0079,
"step": 1410
},
{
"epoch": 4.863013698630137,
"grad_norm": 0.11690958589315414,
"learning_rate": 9.987948070396571e-05,
"loss": 0.0115,
"step": 1420
},
{
"epoch": 4.897260273972603,
"grad_norm": 0.09252661466598511,
"learning_rate": 9.987367581483705e-05,
"loss": 0.009,
"step": 1430
},
{
"epoch": 4.931506849315069,
"grad_norm": 0.08612260967493057,
"learning_rate": 9.986773457298311e-05,
"loss": 0.0089,
"step": 1440
},
{
"epoch": 4.965753424657534,
"grad_norm": 0.12948228418827057,
"learning_rate": 9.986165699464705e-05,
"loss": 0.0082,
"step": 1450
},
{
"epoch": 5.0,
"grad_norm": 0.12411495298147202,
"learning_rate": 9.985544309644475e-05,
"loss": 0.008,
"step": 1460
},
{
"epoch": 5.034246575342466,
"grad_norm": 0.13053423166275024,
"learning_rate": 9.984909289536473e-05,
"loss": 0.0118,
"step": 1470
},
{
"epoch": 5.068493150684931,
"grad_norm": 0.12200204282999039,
"learning_rate": 9.984260640876821e-05,
"loss": 0.0128,
"step": 1480
},
{
"epoch": 5.102739726027397,
"grad_norm": 0.09661053121089935,
"learning_rate": 9.983598365438902e-05,
"loss": 0.0087,
"step": 1490
},
{
"epoch": 5.136986301369863,
"grad_norm": 0.12682437896728516,
"learning_rate": 9.98292246503335e-05,
"loss": 0.0097,
"step": 1500
},
{
"epoch": 5.171232876712328,
"grad_norm": 0.13681413233280182,
"learning_rate": 9.98223294150805e-05,
"loss": 0.0103,
"step": 1510
},
{
"epoch": 5.205479452054795,
"grad_norm": 0.08635826408863068,
"learning_rate": 9.981529796748134e-05,
"loss": 0.0077,
"step": 1520
},
{
"epoch": 5.239726027397261,
"grad_norm": 0.09628362208604813,
"learning_rate": 9.980813032675974e-05,
"loss": 0.0088,
"step": 1530
},
{
"epoch": 5.273972602739726,
"grad_norm": 0.06951496005058289,
"learning_rate": 9.980082651251175e-05,
"loss": 0.0096,
"step": 1540
},
{
"epoch": 5.308219178082192,
"grad_norm": 0.10647785663604736,
"learning_rate": 9.979338654470569e-05,
"loss": 0.01,
"step": 1550
},
{
"epoch": 5.342465753424658,
"grad_norm": 0.08224749565124512,
"learning_rate": 9.97858104436822e-05,
"loss": 0.01,
"step": 1560
},
{
"epoch": 5.376712328767123,
"grad_norm": 0.09614630788564682,
"learning_rate": 9.977809823015401e-05,
"loss": 0.0112,
"step": 1570
},
{
"epoch": 5.410958904109589,
"grad_norm": 0.08124130219221115,
"learning_rate": 9.977024992520602e-05,
"loss": 0.0087,
"step": 1580
},
{
"epoch": 5.445205479452055,
"grad_norm": 0.09138698130846024,
"learning_rate": 9.976226555029522e-05,
"loss": 0.009,
"step": 1590
},
{
"epoch": 5.47945205479452,
"grad_norm": 0.1088441014289856,
"learning_rate": 9.975414512725057e-05,
"loss": 0.01,
"step": 1600
},
{
"epoch": 5.513698630136986,
"grad_norm": 0.12124570459127426,
"learning_rate": 9.974588867827301e-05,
"loss": 0.0088,
"step": 1610
},
{
"epoch": 5.5479452054794525,
"grad_norm": 0.12032249569892883,
"learning_rate": 9.973749622593534e-05,
"loss": 0.0129,
"step": 1620
},
{
"epoch": 5.582191780821918,
"grad_norm": 0.12702365219593048,
"learning_rate": 9.972896779318219e-05,
"loss": 0.0101,
"step": 1630
},
{
"epoch": 5.616438356164384,
"grad_norm": 0.0776495635509491,
"learning_rate": 9.972030340333001e-05,
"loss": 0.0082,
"step": 1640
},
{
"epoch": 5.6506849315068495,
"grad_norm": 0.09928776323795319,
"learning_rate": 9.97115030800669e-05,
"loss": 0.0094,
"step": 1650
},
{
"epoch": 5.684931506849315,
"grad_norm": 0.08945798128843307,
"learning_rate": 9.970256684745258e-05,
"loss": 0.0112,
"step": 1660
},
{
"epoch": 5.719178082191781,
"grad_norm": 0.1274113953113556,
"learning_rate": 9.969349472991838e-05,
"loss": 0.0094,
"step": 1670
},
{
"epoch": 5.7534246575342465,
"grad_norm": 0.1030043512582779,
"learning_rate": 9.968428675226714e-05,
"loss": 0.0093,
"step": 1680
},
{
"epoch": 5.787671232876712,
"grad_norm": 0.07978050410747528,
"learning_rate": 9.967494293967312e-05,
"loss": 0.0088,
"step": 1690
},
{
"epoch": 5.821917808219178,
"grad_norm": 0.11394272744655609,
"learning_rate": 9.966546331768191e-05,
"loss": 0.0102,
"step": 1700
},
{
"epoch": 5.8561643835616435,
"grad_norm": 0.08235814422369003,
"learning_rate": 9.965584791221048e-05,
"loss": 0.0102,
"step": 1710
},
{
"epoch": 5.890410958904109,
"grad_norm": 0.10711020976305008,
"learning_rate": 9.964609674954696e-05,
"loss": 0.0084,
"step": 1720
},
{
"epoch": 5.924657534246576,
"grad_norm": 0.1037852019071579,
"learning_rate": 9.963620985635065e-05,
"loss": 0.0084,
"step": 1730
},
{
"epoch": 5.958904109589041,
"grad_norm": 0.08300093561410904,
"learning_rate": 9.962618725965196e-05,
"loss": 0.0098,
"step": 1740
},
{
"epoch": 5.993150684931507,
"grad_norm": 0.10054755955934525,
"learning_rate": 9.961602898685226e-05,
"loss": 0.0076,
"step": 1750
},
{
"epoch": 6.027397260273973,
"grad_norm": 0.07662080228328705,
"learning_rate": 9.96057350657239e-05,
"loss": 0.0092,
"step": 1760
},
{
"epoch": 6.061643835616438,
"grad_norm": 0.06177311763167381,
"learning_rate": 9.959530552441005e-05,
"loss": 0.0085,
"step": 1770
},
{
"epoch": 6.095890410958904,
"grad_norm": 0.08199790120124817,
"learning_rate": 9.95847403914247e-05,
"loss": 0.009,
"step": 1780
},
{
"epoch": 6.13013698630137,
"grad_norm": 0.08528486639261246,
"learning_rate": 9.95740396956525e-05,
"loss": 0.0092,
"step": 1790
},
{
"epoch": 6.164383561643835,
"grad_norm": 0.0814923569560051,
"learning_rate": 9.956320346634876e-05,
"loss": 0.0078,
"step": 1800
},
{
"epoch": 6.198630136986301,
"grad_norm": 0.12103394418954849,
"learning_rate": 9.955223173313931e-05,
"loss": 0.0079,
"step": 1810
},
{
"epoch": 6.232876712328767,
"grad_norm": 0.1314200758934021,
"learning_rate": 9.954112452602045e-05,
"loss": 0.0095,
"step": 1820
},
{
"epoch": 6.267123287671233,
"grad_norm": 0.0955151692032814,
"learning_rate": 9.952988187535886e-05,
"loss": 0.0076,
"step": 1830
},
{
"epoch": 6.301369863013699,
"grad_norm": 0.07099077850580215,
"learning_rate": 9.95185038118915e-05,
"loss": 0.0078,
"step": 1840
},
{
"epoch": 6.335616438356165,
"grad_norm": 0.06817654520273209,
"learning_rate": 9.950699036672559e-05,
"loss": 0.0093,
"step": 1850
},
{
"epoch": 6.36986301369863,
"grad_norm": 0.10626586526632309,
"learning_rate": 9.949534157133844e-05,
"loss": 0.0077,
"step": 1860
},
{
"epoch": 6.404109589041096,
"grad_norm": 0.08003693073987961,
"learning_rate": 9.948355745757741e-05,
"loss": 0.0097,
"step": 1870
},
{
"epoch": 6.438356164383562,
"grad_norm": 0.08806777745485306,
"learning_rate": 9.94716380576598e-05,
"loss": 0.0102,
"step": 1880
},
{
"epoch": 6.472602739726027,
"grad_norm": 0.07476870715618134,
"learning_rate": 9.945958340417283e-05,
"loss": 0.0073,
"step": 1890
},
{
"epoch": 6.506849315068493,
"grad_norm": 0.08984264731407166,
"learning_rate": 9.944739353007344e-05,
"loss": 0.0081,
"step": 1900
},
{
"epoch": 6.541095890410959,
"grad_norm": 0.08520924299955368,
"learning_rate": 9.943506846868826e-05,
"loss": 0.0111,
"step": 1910
},
{
"epoch": 6.575342465753424,
"grad_norm": 0.07232493162155151,
"learning_rate": 9.942260825371358e-05,
"loss": 0.0075,
"step": 1920
},
{
"epoch": 6.609589041095891,
"grad_norm": 0.07491834461688995,
"learning_rate": 9.941001291921512e-05,
"loss": 0.0094,
"step": 1930
},
{
"epoch": 6.6438356164383565,
"grad_norm": 0.10011128336191177,
"learning_rate": 9.939728249962807e-05,
"loss": 0.0072,
"step": 1940
},
{
"epoch": 6.678082191780822,
"grad_norm": 0.0929722785949707,
"learning_rate": 9.938441702975689e-05,
"loss": 0.011,
"step": 1950
},
{
"epoch": 6.712328767123288,
"grad_norm": 0.1011221706867218,
"learning_rate": 9.937141654477528e-05,
"loss": 0.0099,
"step": 1960
},
{
"epoch": 6.7465753424657535,
"grad_norm": 0.09542585164308548,
"learning_rate": 9.93582810802261e-05,
"loss": 0.0085,
"step": 1970
},
{
"epoch": 6.780821917808219,
"grad_norm": 0.08605185896158218,
"learning_rate": 9.934501067202117e-05,
"loss": 0.0078,
"step": 1980
},
{
"epoch": 6.815068493150685,
"grad_norm": 0.062511146068573,
"learning_rate": 9.93316053564413e-05,
"loss": 0.0065,
"step": 1990
},
{
"epoch": 6.8493150684931505,
"grad_norm": 0.08524167537689209,
"learning_rate": 9.931806517013612e-05,
"loss": 0.0081,
"step": 2000
},
{
"epoch": 6.883561643835616,
"grad_norm": 0.0872335210442543,
"learning_rate": 9.930439015012396e-05,
"loss": 0.0066,
"step": 2010
},
{
"epoch": 6.917808219178082,
"grad_norm": 0.08723822236061096,
"learning_rate": 9.929058033379181e-05,
"loss": 0.0076,
"step": 2020
},
{
"epoch": 6.9520547945205475,
"grad_norm": 0.07593391835689545,
"learning_rate": 9.927663575889521e-05,
"loss": 0.0081,
"step": 2030
},
{
"epoch": 6.986301369863014,
"grad_norm": 0.07648999243974686,
"learning_rate": 9.926255646355804e-05,
"loss": 0.0069,
"step": 2040
},
{
"epoch": 7.02054794520548,
"grad_norm": 0.08651500195264816,
"learning_rate": 9.92483424862726e-05,
"loss": 0.0067,
"step": 2050
},
{
"epoch": 7.054794520547945,
"grad_norm": 0.09083620458841324,
"learning_rate": 9.923399386589933e-05,
"loss": 0.009,
"step": 2060
},
{
"epoch": 7.089041095890411,
"grad_norm": 0.0654049888253212,
"learning_rate": 9.921951064166684e-05,
"loss": 0.008,
"step": 2070
},
{
"epoch": 7.123287671232877,
"grad_norm": 0.06859409064054489,
"learning_rate": 9.92048928531717e-05,
"loss": 0.0077,
"step": 2080
},
{
"epoch": 7.157534246575342,
"grad_norm": 0.07323700934648514,
"learning_rate": 9.919014054037836e-05,
"loss": 0.009,
"step": 2090
},
{
"epoch": 7.191780821917808,
"grad_norm": 0.06128271296620369,
"learning_rate": 9.917525374361912e-05,
"loss": 0.0079,
"step": 2100
},
{
"epoch": 7.226027397260274,
"grad_norm": 0.07024730741977692,
"learning_rate": 9.91602325035939e-05,
"loss": 0.0066,
"step": 2110
},
{
"epoch": 7.260273972602739,
"grad_norm": 0.08250781893730164,
"learning_rate": 9.914507686137019e-05,
"loss": 0.007,
"step": 2120
},
{
"epoch": 7.294520547945205,
"grad_norm": 0.08373738825321198,
"learning_rate": 9.912978685838294e-05,
"loss": 0.0084,
"step": 2130
},
{
"epoch": 7.328767123287671,
"grad_norm": 0.0988110899925232,
"learning_rate": 9.911436253643445e-05,
"loss": 0.0079,
"step": 2140
},
{
"epoch": 7.363013698630137,
"grad_norm": 0.08949548006057739,
"learning_rate": 9.90988039376942e-05,
"loss": 0.0062,
"step": 2150
},
{
"epoch": 7.397260273972603,
"grad_norm": 0.1103079542517662,
"learning_rate": 9.90831111046988e-05,
"loss": 0.0078,
"step": 2160
},
{
"epoch": 7.431506849315069,
"grad_norm": 0.08410000801086426,
"learning_rate": 9.90672840803519e-05,
"loss": 0.0075,
"step": 2170
},
{
"epoch": 7.465753424657534,
"grad_norm": 0.08818292617797852,
"learning_rate": 9.905132290792394e-05,
"loss": 0.009,
"step": 2180
},
{
"epoch": 7.5,
"grad_norm": 0.08308695256710052,
"learning_rate": 9.903522763105218e-05,
"loss": 0.0089,
"step": 2190
},
{
"epoch": 7.534246575342466,
"grad_norm": 0.09962280839681625,
"learning_rate": 9.901899829374047e-05,
"loss": 0.009,
"step": 2200
},
{
"epoch": 7.568493150684931,
"grad_norm": 0.08678163588047028,
"learning_rate": 9.900263494035921e-05,
"loss": 0.008,
"step": 2210
},
{
"epoch": 7.602739726027397,
"grad_norm": 0.0837637260556221,
"learning_rate": 9.89861376156452e-05,
"loss": 0.0072,
"step": 2220
},
{
"epoch": 7.636986301369863,
"grad_norm": 0.09580914676189423,
"learning_rate": 9.896950636470147e-05,
"loss": 0.0077,
"step": 2230
},
{
"epoch": 7.671232876712329,
"grad_norm": 0.11661717295646667,
"learning_rate": 9.895274123299723e-05,
"loss": 0.0071,
"step": 2240
},
{
"epoch": 7.705479452054795,
"grad_norm": 0.10122444480657578,
"learning_rate": 9.893584226636772e-05,
"loss": 0.0086,
"step": 2250
},
{
"epoch": 7.739726027397261,
"grad_norm": 0.111696757376194,
"learning_rate": 9.891880951101407e-05,
"loss": 0.0064,
"step": 2260
},
{
"epoch": 7.773972602739726,
"grad_norm": 0.0847308561205864,
"learning_rate": 9.890164301350318e-05,
"loss": 0.0078,
"step": 2270
},
{
"epoch": 7.808219178082192,
"grad_norm": 0.09149212390184402,
"learning_rate": 9.888434282076758e-05,
"loss": 0.0066,
"step": 2280
},
{
"epoch": 7.842465753424658,
"grad_norm": 0.07390099763870239,
"learning_rate": 9.886690898010535e-05,
"loss": 0.008,
"step": 2290
},
{
"epoch": 7.876712328767123,
"grad_norm": 0.09433721750974655,
"learning_rate": 9.884934153917997e-05,
"loss": 0.0087,
"step": 2300
},
{
"epoch": 7.910958904109589,
"grad_norm": 0.1136075034737587,
"learning_rate": 9.883164054602012e-05,
"loss": 0.0085,
"step": 2310
},
{
"epoch": 7.945205479452055,
"grad_norm": 0.08907122164964676,
"learning_rate": 9.881380604901964e-05,
"loss": 0.0072,
"step": 2320
},
{
"epoch": 7.97945205479452,
"grad_norm": 0.0950908437371254,
"learning_rate": 9.879583809693738e-05,
"loss": 0.0082,
"step": 2330
},
{
"epoch": 8.013698630136986,
"grad_norm": 0.07622794061899185,
"learning_rate": 9.877773673889701e-05,
"loss": 0.0062,
"step": 2340
},
{
"epoch": 8.047945205479452,
"grad_norm": 0.10459341108798981,
"learning_rate": 9.8759502024387e-05,
"loss": 0.0097,
"step": 2350
},
{
"epoch": 8.082191780821917,
"grad_norm": 0.08677548170089722,
"learning_rate": 9.87411340032603e-05,
"loss": 0.0068,
"step": 2360
},
{
"epoch": 8.116438356164384,
"grad_norm": 0.09159575402736664,
"learning_rate": 9.872263272573443e-05,
"loss": 0.0068,
"step": 2370
},
{
"epoch": 8.150684931506849,
"grad_norm": 0.13817910850048065,
"learning_rate": 9.870399824239117e-05,
"loss": 0.0075,
"step": 2380
},
{
"epoch": 8.184931506849315,
"grad_norm": 0.10300517082214355,
"learning_rate": 9.868523060417646e-05,
"loss": 0.0086,
"step": 2390
},
{
"epoch": 8.219178082191782,
"grad_norm": 0.06537918746471405,
"learning_rate": 9.86663298624003e-05,
"loss": 0.0082,
"step": 2400
},
{
"epoch": 8.253424657534246,
"grad_norm": 0.0721374899148941,
"learning_rate": 9.864729606873663e-05,
"loss": 0.0066,
"step": 2410
},
{
"epoch": 8.287671232876713,
"grad_norm": 0.1256812959909439,
"learning_rate": 9.862812927522309e-05,
"loss": 0.0068,
"step": 2420
},
{
"epoch": 8.321917808219178,
"grad_norm": 0.08400053530931473,
"learning_rate": 9.860882953426099e-05,
"loss": 0.0066,
"step": 2430
},
{
"epoch": 8.356164383561644,
"grad_norm": 0.08019398152828217,
"learning_rate": 9.858939689861506e-05,
"loss": 0.0085,
"step": 2440
},
{
"epoch": 8.39041095890411,
"grad_norm": 0.0836905762553215,
"learning_rate": 9.856983142141339e-05,
"loss": 0.0075,
"step": 2450
},
{
"epoch": 8.424657534246576,
"grad_norm": 0.0993325412273407,
"learning_rate": 9.855013315614725e-05,
"loss": 0.007,
"step": 2460
},
{
"epoch": 8.45890410958904,
"grad_norm": 0.08629734069108963,
"learning_rate": 9.853030215667093e-05,
"loss": 0.0073,
"step": 2470
},
{
"epoch": 8.493150684931507,
"grad_norm": 0.08529617637395859,
"learning_rate": 9.851033847720166e-05,
"loss": 0.0083,
"step": 2480
},
{
"epoch": 8.527397260273972,
"grad_norm": 0.10456524789333344,
"learning_rate": 9.849024217231935e-05,
"loss": 0.0076,
"step": 2490
},
{
"epoch": 8.561643835616438,
"grad_norm": 0.09966843575239182,
"learning_rate": 9.847001329696653e-05,
"loss": 0.0079,
"step": 2500
},
{
"epoch": 8.595890410958905,
"grad_norm": 0.1018424928188324,
"learning_rate": 9.844965190644817e-05,
"loss": 0.0075,
"step": 2510
},
{
"epoch": 8.63013698630137,
"grad_norm": 0.09728335589170456,
"learning_rate": 9.842915805643155e-05,
"loss": 0.0057,
"step": 2520
},
{
"epoch": 8.664383561643836,
"grad_norm": 0.12338245660066605,
"learning_rate": 9.840853180294608e-05,
"loss": 0.0081,
"step": 2530
},
{
"epoch": 8.698630136986301,
"grad_norm": 0.10218657553195953,
"learning_rate": 9.838777320238312e-05,
"loss": 0.0067,
"step": 2540
},
{
"epoch": 8.732876712328768,
"grad_norm": 0.10925062745809555,
"learning_rate": 9.836688231149592e-05,
"loss": 0.0074,
"step": 2550
},
{
"epoch": 8.767123287671232,
"grad_norm": 0.08330442756414413,
"learning_rate": 9.834585918739936e-05,
"loss": 0.0067,
"step": 2560
},
{
"epoch": 8.801369863013699,
"grad_norm": 0.11952322721481323,
"learning_rate": 9.832470388756987e-05,
"loss": 0.0069,
"step": 2570
},
{
"epoch": 8.835616438356164,
"grad_norm": 0.0997798889875412,
"learning_rate": 9.830341646984521e-05,
"loss": 0.0071,
"step": 2580
},
{
"epoch": 8.86986301369863,
"grad_norm": 0.07497061789035797,
"learning_rate": 9.82819969924244e-05,
"loss": 0.0058,
"step": 2590
},
{
"epoch": 8.904109589041095,
"grad_norm": 0.08156754821538925,
"learning_rate": 9.826044551386744e-05,
"loss": 0.0067,
"step": 2600
},
{
"epoch": 8.938356164383562,
"grad_norm": 0.08598846197128296,
"learning_rate": 9.823876209309527e-05,
"loss": 0.0103,
"step": 2610
},
{
"epoch": 8.972602739726028,
"grad_norm": 0.09113951027393341,
"learning_rate": 9.821694678938953e-05,
"loss": 0.0062,
"step": 2620
},
{
"epoch": 9.006849315068493,
"grad_norm": 0.10338687896728516,
"learning_rate": 9.819499966239243e-05,
"loss": 0.0064,
"step": 2630
},
{
"epoch": 9.04109589041096,
"grad_norm": 0.09837481379508972,
"learning_rate": 9.817292077210659e-05,
"loss": 0.0071,
"step": 2640
},
{
"epoch": 9.075342465753424,
"grad_norm": 0.0811222493648529,
"learning_rate": 9.815071017889482e-05,
"loss": 0.0087,
"step": 2650
},
{
"epoch": 9.10958904109589,
"grad_norm": 0.10089726746082306,
"learning_rate": 9.812836794348004e-05,
"loss": 0.0112,
"step": 2660
},
{
"epoch": 9.143835616438356,
"grad_norm": 0.12093760073184967,
"learning_rate": 9.81058941269451e-05,
"loss": 0.0077,
"step": 2670
},
{
"epoch": 9.178082191780822,
"grad_norm": 0.09144003689289093,
"learning_rate": 9.808328879073251e-05,
"loss": 0.0066,
"step": 2680
},
{
"epoch": 9.212328767123287,
"grad_norm": 0.05546188727021217,
"learning_rate": 9.806055199664446e-05,
"loss": 0.006,
"step": 2690
},
{
"epoch": 9.246575342465754,
"grad_norm": 0.08728483319282532,
"learning_rate": 9.803768380684242e-05,
"loss": 0.0057,
"step": 2700
},
{
"epoch": 9.280821917808218,
"grad_norm": 0.07582209259271622,
"learning_rate": 9.801468428384716e-05,
"loss": 0.0064,
"step": 2710
},
{
"epoch": 9.315068493150685,
"grad_norm": 0.07939019799232483,
"learning_rate": 9.799155349053851e-05,
"loss": 0.007,
"step": 2720
},
{
"epoch": 9.349315068493151,
"grad_norm": 0.07291562110185623,
"learning_rate": 9.796829149015517e-05,
"loss": 0.007,
"step": 2730
},
{
"epoch": 9.383561643835616,
"grad_norm": 0.06804176419973373,
"learning_rate": 9.794489834629455e-05,
"loss": 0.0073,
"step": 2740
},
{
"epoch": 9.417808219178083,
"grad_norm": 0.0737365186214447,
"learning_rate": 9.792137412291265e-05,
"loss": 0.0062,
"step": 2750
},
{
"epoch": 9.452054794520548,
"grad_norm": 0.08482401072978973,
"learning_rate": 9.789771888432375e-05,
"loss": 0.0083,
"step": 2760
},
{
"epoch": 9.486301369863014,
"grad_norm": 0.08719678968191147,
"learning_rate": 9.787393269520039e-05,
"loss": 0.0073,
"step": 2770
},
{
"epoch": 9.520547945205479,
"grad_norm": 0.0734448954463005,
"learning_rate": 9.785001562057309e-05,
"loss": 0.0067,
"step": 2780
},
{
"epoch": 9.554794520547945,
"grad_norm": 0.08811881393194199,
"learning_rate": 9.782596772583026e-05,
"loss": 0.0062,
"step": 2790
},
{
"epoch": 9.58904109589041,
"grad_norm": 0.07836030423641205,
"learning_rate": 9.780178907671789e-05,
"loss": 0.0067,
"step": 2800
},
{
"epoch": 9.623287671232877,
"grad_norm": 0.066135935485363,
"learning_rate": 9.777747973933948e-05,
"loss": 0.0061,
"step": 2810
},
{
"epoch": 9.657534246575342,
"grad_norm": 0.08274685591459274,
"learning_rate": 9.775303978015585e-05,
"loss": 0.0054,
"step": 2820
},
{
"epoch": 9.691780821917808,
"grad_norm": 0.07670920342206955,
"learning_rate": 9.772846926598491e-05,
"loss": 0.0083,
"step": 2830
},
{
"epoch": 9.726027397260275,
"grad_norm": 0.0781722441315651,
"learning_rate": 9.77037682640015e-05,
"loss": 0.0058,
"step": 2840
},
{
"epoch": 9.76027397260274,
"grad_norm": 0.06222458556294441,
"learning_rate": 9.767893684173721e-05,
"loss": 0.0054,
"step": 2850
},
{
"epoch": 9.794520547945206,
"grad_norm": 0.0780424028635025,
"learning_rate": 9.765397506708023e-05,
"loss": 0.0062,
"step": 2860
},
{
"epoch": 9.82876712328767,
"grad_norm": 0.09629786759614944,
"learning_rate": 9.762888300827507e-05,
"loss": 0.0067,
"step": 2870
},
{
"epoch": 9.863013698630137,
"grad_norm": 0.06657546758651733,
"learning_rate": 9.760366073392246e-05,
"loss": 0.0062,
"step": 2880
},
{
"epoch": 9.897260273972602,
"grad_norm": 0.07197002321481705,
"learning_rate": 9.757830831297914e-05,
"loss": 0.0071,
"step": 2890
},
{
"epoch": 9.931506849315069,
"grad_norm": 0.06577511131763458,
"learning_rate": 9.755282581475769e-05,
"loss": 0.0072,
"step": 2900
},
{
"epoch": 9.965753424657533,
"grad_norm": 0.0619685985147953,
"learning_rate": 9.752721330892624e-05,
"loss": 0.0056,
"step": 2910
},
{
"epoch": 10.0,
"grad_norm": 0.08390691131353378,
"learning_rate": 9.750147086550844e-05,
"loss": 0.0067,
"step": 2920
},
{
"epoch": 10.034246575342467,
"grad_norm": 0.09258890151977539,
"learning_rate": 9.747559855488313e-05,
"loss": 0.0071,
"step": 2930
},
{
"epoch": 10.068493150684931,
"grad_norm": 0.0918927937746048,
"learning_rate": 9.744959644778422e-05,
"loss": 0.0079,
"step": 2940
},
{
"epoch": 10.102739726027398,
"grad_norm": 0.08980443328619003,
"learning_rate": 9.742346461530048e-05,
"loss": 0.0062,
"step": 2950
},
{
"epoch": 10.136986301369863,
"grad_norm": 0.0940912663936615,
"learning_rate": 9.739720312887535e-05,
"loss": 0.0066,
"step": 2960
},
{
"epoch": 10.17123287671233,
"grad_norm": 0.1010262668132782,
"learning_rate": 9.73708120603067e-05,
"loss": 0.0066,
"step": 2970
},
{
"epoch": 10.205479452054794,
"grad_norm": 0.07731979340314865,
"learning_rate": 9.734429148174675e-05,
"loss": 0.0056,
"step": 2980
},
{
"epoch": 10.23972602739726,
"grad_norm": 0.07678744941949844,
"learning_rate": 9.731764146570173e-05,
"loss": 0.0071,
"step": 2990
},
{
"epoch": 10.273972602739725,
"grad_norm": 0.07159540057182312,
"learning_rate": 9.729086208503174e-05,
"loss": 0.0085,
"step": 3000
},
{
"epoch": 10.308219178082192,
"grad_norm": 0.06736049056053162,
"learning_rate": 9.726395341295062e-05,
"loss": 0.0057,
"step": 3010
},
{
"epoch": 10.342465753424657,
"grad_norm": 0.05375010892748833,
"learning_rate": 9.723691552302562e-05,
"loss": 0.0062,
"step": 3020
},
{
"epoch": 10.376712328767123,
"grad_norm": 0.0694858655333519,
"learning_rate": 9.720974848917735e-05,
"loss": 0.0064,
"step": 3030
},
{
"epoch": 10.41095890410959,
"grad_norm": 0.11771047860383987,
"learning_rate": 9.718245238567939e-05,
"loss": 0.007,
"step": 3040
},
{
"epoch": 10.445205479452055,
"grad_norm": 0.08670341968536377,
"learning_rate": 9.715502728715826e-05,
"loss": 0.0065,
"step": 3050
},
{
"epoch": 10.479452054794521,
"grad_norm": 0.08733764290809631,
"learning_rate": 9.712747326859315e-05,
"loss": 0.007,
"step": 3060
},
{
"epoch": 10.513698630136986,
"grad_norm": 0.06813944876194,
"learning_rate": 9.709979040531569e-05,
"loss": 0.0063,
"step": 3070
},
{
"epoch": 10.547945205479452,
"grad_norm": 0.0684538260102272,
"learning_rate": 9.707197877300974e-05,
"loss": 0.0073,
"step": 3080
},
{
"epoch": 10.582191780821917,
"grad_norm": 0.09879663586616516,
"learning_rate": 9.704403844771128e-05,
"loss": 0.0069,
"step": 3090
},
{
"epoch": 10.616438356164384,
"grad_norm": 0.07726743072271347,
"learning_rate": 9.701596950580806e-05,
"loss": 0.0064,
"step": 3100
},
{
"epoch": 10.650684931506849,
"grad_norm": 0.1269214004278183,
"learning_rate": 9.698777202403953e-05,
"loss": 0.0068,
"step": 3110
},
{
"epoch": 10.684931506849315,
"grad_norm": 0.08792764693498611,
"learning_rate": 9.695944607949649e-05,
"loss": 0.0065,
"step": 3120
},
{
"epoch": 10.719178082191782,
"grad_norm": 0.07960551977157593,
"learning_rate": 9.693099174962103e-05,
"loss": 0.0086,
"step": 3130
},
{
"epoch": 10.753424657534246,
"grad_norm": 0.0761343464255333,
"learning_rate": 9.690240911220618e-05,
"loss": 0.0057,
"step": 3140
},
{
"epoch": 10.787671232876713,
"grad_norm": 0.09499238431453705,
"learning_rate": 9.687369824539577e-05,
"loss": 0.0068,
"step": 3150
},
{
"epoch": 10.821917808219178,
"grad_norm": 0.0680365189909935,
"learning_rate": 9.684485922768422e-05,
"loss": 0.0064,
"step": 3160
},
{
"epoch": 10.856164383561644,
"grad_norm": 0.0675700306892395,
"learning_rate": 9.681589213791633e-05,
"loss": 0.0075,
"step": 3170
},
{
"epoch": 10.89041095890411,
"grad_norm": 0.08872543275356293,
"learning_rate": 9.6786797055287e-05,
"loss": 0.0081,
"step": 3180
},
{
"epoch": 10.924657534246576,
"grad_norm": 0.09527339786291122,
"learning_rate": 9.675757405934103e-05,
"loss": 0.0076,
"step": 3190
},
{
"epoch": 10.95890410958904,
"grad_norm": 0.08772981911897659,
"learning_rate": 9.672822322997305e-05,
"loss": 0.0066,
"step": 3200
},
{
"epoch": 10.993150684931507,
"grad_norm": 0.08488666266202927,
"learning_rate": 9.669874464742705e-05,
"loss": 0.0081,
"step": 3210
},
{
"epoch": 11.027397260273972,
"grad_norm": 0.0668686255812645,
"learning_rate": 9.66691383922964e-05,
"loss": 0.0064,
"step": 3220
},
{
"epoch": 11.061643835616438,
"grad_norm": 0.08201269805431366,
"learning_rate": 9.663940454552342e-05,
"loss": 0.0063,
"step": 3230
},
{
"epoch": 11.095890410958905,
"grad_norm": 0.10006823390722275,
"learning_rate": 9.660954318839933e-05,
"loss": 0.0067,
"step": 3240
},
{
"epoch": 11.13013698630137,
"grad_norm": 0.06139950081706047,
"learning_rate": 9.657955440256395e-05,
"loss": 0.0062,
"step": 3250
},
{
"epoch": 11.164383561643836,
"grad_norm": 0.05843405798077583,
"learning_rate": 9.654943827000548e-05,
"loss": 0.0057,
"step": 3260
},
{
"epoch": 11.198630136986301,
"grad_norm": 0.12055237591266632,
"learning_rate": 9.651919487306025e-05,
"loss": 0.007,
"step": 3270
},
{
"epoch": 11.232876712328768,
"grad_norm": 0.09484998136758804,
"learning_rate": 9.648882429441257e-05,
"loss": 0.0077,
"step": 3280
},
{
"epoch": 11.267123287671232,
"grad_norm": 0.10352499783039093,
"learning_rate": 9.645832661709444e-05,
"loss": 0.0079,
"step": 3290
},
{
"epoch": 11.301369863013699,
"grad_norm": 0.08123885840177536,
"learning_rate": 9.642770192448536e-05,
"loss": 0.0086,
"step": 3300
},
{
"epoch": 11.335616438356164,
"grad_norm": 0.07840955257415771,
"learning_rate": 9.639695030031204e-05,
"loss": 0.0073,
"step": 3310
},
{
"epoch": 11.36986301369863,
"grad_norm": 0.11473023891448975,
"learning_rate": 9.636607182864827e-05,
"loss": 0.008,
"step": 3320
},
{
"epoch": 11.404109589041095,
"grad_norm": 0.09389324486255646,
"learning_rate": 9.63350665939146e-05,
"loss": 0.0067,
"step": 3330
},
{
"epoch": 11.438356164383562,
"grad_norm": 0.0731404572725296,
"learning_rate": 9.630393468087818e-05,
"loss": 0.0077,
"step": 3340
},
{
"epoch": 11.472602739726028,
"grad_norm": 0.06645851582288742,
"learning_rate": 9.627267617465243e-05,
"loss": 0.0069,
"step": 3350
},
{
"epoch": 11.506849315068493,
"grad_norm": 0.07165537774562836,
"learning_rate": 9.624129116069694e-05,
"loss": 0.0072,
"step": 3360
},
{
"epoch": 11.54109589041096,
"grad_norm": 0.05754755809903145,
"learning_rate": 9.620977972481716e-05,
"loss": 0.0063,
"step": 3370
},
{
"epoch": 11.575342465753424,
"grad_norm": 0.07463119179010391,
"learning_rate": 9.617814195316411e-05,
"loss": 0.0058,
"step": 3380
},
{
"epoch": 11.60958904109589,
"grad_norm": 0.05406510457396507,
"learning_rate": 9.614637793223425e-05,
"loss": 0.006,
"step": 3390
},
{
"epoch": 11.643835616438356,
"grad_norm": 0.07864365726709366,
"learning_rate": 9.611448774886924e-05,
"loss": 0.006,
"step": 3400
},
{
"epoch": 11.678082191780822,
"grad_norm": 0.05467298626899719,
"learning_rate": 9.60824714902556e-05,
"loss": 0.0069,
"step": 3410
},
{
"epoch": 11.712328767123287,
"grad_norm": 0.06272034347057343,
"learning_rate": 9.605032924392457e-05,
"loss": 0.0062,
"step": 3420
},
{
"epoch": 11.746575342465754,
"grad_norm": 0.1005667969584465,
"learning_rate": 9.601806109775179e-05,
"loss": 0.0077,
"step": 3430
},
{
"epoch": 11.780821917808218,
"grad_norm": 0.07537668198347092,
"learning_rate": 9.598566713995718e-05,
"loss": 0.0069,
"step": 3440
},
{
"epoch": 11.815068493150685,
"grad_norm": 0.05782156065106392,
"learning_rate": 9.595314745910456e-05,
"loss": 0.0051,
"step": 3450
},
{
"epoch": 11.849315068493151,
"grad_norm": 0.09299279749393463,
"learning_rate": 9.59205021441015e-05,
"loss": 0.0056,
"step": 3460
},
{
"epoch": 11.883561643835616,
"grad_norm": 0.05052751302719116,
"learning_rate": 9.588773128419906e-05,
"loss": 0.0061,
"step": 3470
},
{
"epoch": 11.917808219178083,
"grad_norm": 0.07338104397058487,
"learning_rate": 9.58548349689915e-05,
"loss": 0.0063,
"step": 3480
},
{
"epoch": 11.952054794520548,
"grad_norm": 0.0899667963385582,
"learning_rate": 9.582181328841611e-05,
"loss": 0.0061,
"step": 3490
},
{
"epoch": 11.986301369863014,
"grad_norm": 0.06916534900665283,
"learning_rate": 9.578866633275288e-05,
"loss": 0.0062,
"step": 3500
},
{
"epoch": 12.020547945205479,
"grad_norm": 0.08347468823194504,
"learning_rate": 9.575539419262434e-05,
"loss": 0.0081,
"step": 3510
},
{
"epoch": 12.054794520547945,
"grad_norm": 0.0927470400929451,
"learning_rate": 9.572199695899522e-05,
"loss": 0.0054,
"step": 3520
},
{
"epoch": 12.08904109589041,
"grad_norm": 0.07318168878555298,
"learning_rate": 9.568847472317232e-05,
"loss": 0.0056,
"step": 3530
},
{
"epoch": 12.123287671232877,
"grad_norm": 0.05342930555343628,
"learning_rate": 9.565482757680415e-05,
"loss": 0.0054,
"step": 3540
},
{
"epoch": 12.157534246575343,
"grad_norm": 0.05967690050601959,
"learning_rate": 9.562105561188069e-05,
"loss": 0.0054,
"step": 3550
},
{
"epoch": 12.191780821917808,
"grad_norm": 0.06279722601175308,
"learning_rate": 9.558715892073323e-05,
"loss": 0.007,
"step": 3560
},
{
"epoch": 12.226027397260275,
"grad_norm": 0.06517907232046127,
"learning_rate": 9.555313759603402e-05,
"loss": 0.0059,
"step": 3570
},
{
"epoch": 12.26027397260274,
"grad_norm": 0.06889758259057999,
"learning_rate": 9.551899173079607e-05,
"loss": 0.0053,
"step": 3580
},
{
"epoch": 12.294520547945206,
"grad_norm": 0.06394177675247192,
"learning_rate": 9.548472141837286e-05,
"loss": 0.0055,
"step": 3590
},
{
"epoch": 12.32876712328767,
"grad_norm": 0.06316519528627396,
"learning_rate": 9.545032675245813e-05,
"loss": 0.0062,
"step": 3600
},
{
"epoch": 12.363013698630137,
"grad_norm": 0.10206209123134613,
"learning_rate": 9.541580782708557e-05,
"loss": 0.0064,
"step": 3610
},
{
"epoch": 12.397260273972602,
"grad_norm": 0.09707853198051453,
"learning_rate": 9.538116473662861e-05,
"loss": 0.0061,
"step": 3620
},
{
"epoch": 12.431506849315069,
"grad_norm": 0.09929731488227844,
"learning_rate": 9.534639757580013e-05,
"loss": 0.0055,
"step": 3630
},
{
"epoch": 12.465753424657533,
"grad_norm": 0.08357489854097366,
"learning_rate": 9.531150643965223e-05,
"loss": 0.0049,
"step": 3640
},
{
"epoch": 12.5,
"grad_norm": 0.08069298416376114,
"learning_rate": 9.527649142357596e-05,
"loss": 0.0061,
"step": 3650
},
{
"epoch": 12.534246575342467,
"grad_norm": 0.05960075184702873,
"learning_rate": 9.524135262330098e-05,
"loss": 0.0053,
"step": 3660
},
{
"epoch": 12.568493150684931,
"grad_norm": 0.06529633700847626,
"learning_rate": 9.520609013489547e-05,
"loss": 0.0069,
"step": 3670
},
{
"epoch": 12.602739726027398,
"grad_norm": 0.07700713723897934,
"learning_rate": 9.517070405476575e-05,
"loss": 0.0057,
"step": 3680
},
{
"epoch": 12.636986301369863,
"grad_norm": 0.07295393943786621,
"learning_rate": 9.513519447965595e-05,
"loss": 0.0057,
"step": 3690
},
{
"epoch": 12.67123287671233,
"grad_norm": 0.08430688083171844,
"learning_rate": 9.509956150664796e-05,
"loss": 0.0061,
"step": 3700
},
{
"epoch": 12.705479452054794,
"grad_norm": 0.051374781876802444,
"learning_rate": 9.50638052331609e-05,
"loss": 0.0062,
"step": 3710
},
{
"epoch": 12.73972602739726,
"grad_norm": 0.0681716650724411,
"learning_rate": 9.502792575695112e-05,
"loss": 0.0097,
"step": 3720
},
{
"epoch": 12.773972602739725,
"grad_norm": 0.09235887974500656,
"learning_rate": 9.499192317611167e-05,
"loss": 0.0071,
"step": 3730
},
{
"epoch": 12.808219178082192,
"grad_norm": 0.07686050236225128,
"learning_rate": 9.49557975890723e-05,
"loss": 0.008,
"step": 3740
},
{
"epoch": 12.842465753424658,
"grad_norm": 0.08247129619121552,
"learning_rate": 9.491954909459895e-05,
"loss": 0.0072,
"step": 3750
},
{
"epoch": 12.876712328767123,
"grad_norm": 0.06982657313346863,
"learning_rate": 9.488317779179361e-05,
"loss": 0.0052,
"step": 3760
},
{
"epoch": 12.91095890410959,
"grad_norm": 0.07579068094491959,
"learning_rate": 9.484668378009408e-05,
"loss": 0.0063,
"step": 3770
},
{
"epoch": 12.945205479452055,
"grad_norm": 0.09651331603527069,
"learning_rate": 9.481006715927351e-05,
"loss": 0.0066,
"step": 3780
},
{
"epoch": 12.979452054794521,
"grad_norm": 0.07850632816553116,
"learning_rate": 9.477332802944044e-05,
"loss": 0.0067,
"step": 3790
},
{
"epoch": 13.013698630136986,
"grad_norm": 0.07941552251577377,
"learning_rate": 9.473646649103818e-05,
"loss": 0.0047,
"step": 3800
},
{
"epoch": 13.047945205479452,
"grad_norm": 0.06961599737405777,
"learning_rate": 9.46994826448448e-05,
"loss": 0.0064,
"step": 3810
},
{
"epoch": 13.082191780821917,
"grad_norm": 0.09865850955247879,
"learning_rate": 9.46623765919727e-05,
"loss": 0.0074,
"step": 3820
},
{
"epoch": 13.116438356164384,
"grad_norm": 0.06311442703008652,
"learning_rate": 9.462514843386845e-05,
"loss": 0.0058,
"step": 3830
},
{
"epoch": 13.150684931506849,
"grad_norm": 0.06989779323339462,
"learning_rate": 9.458779827231237e-05,
"loss": 0.0071,
"step": 3840
},
{
"epoch": 13.184931506849315,
"grad_norm": 0.07357992976903915,
"learning_rate": 9.45503262094184e-05,
"loss": 0.0062,
"step": 3850
},
{
"epoch": 13.219178082191782,
"grad_norm": 0.07631329447031021,
"learning_rate": 9.451273234763371e-05,
"loss": 0.0054,
"step": 3860
},
{
"epoch": 13.253424657534246,
"grad_norm": 0.07587280869483948,
"learning_rate": 9.447501678973852e-05,
"loss": 0.0062,
"step": 3870
},
{
"epoch": 13.287671232876713,
"grad_norm": 0.07038256525993347,
"learning_rate": 9.443717963884569e-05,
"loss": 0.0067,
"step": 3880
},
{
"epoch": 13.321917808219178,
"grad_norm": 0.06246664375066757,
"learning_rate": 9.439922099840054e-05,
"loss": 0.0055,
"step": 3890
},
{
"epoch": 13.356164383561644,
"grad_norm": 0.07346326857805252,
"learning_rate": 9.43611409721806e-05,
"loss": 0.0059,
"step": 3900
},
{
"epoch": 13.39041095890411,
"grad_norm": 0.08977990597486496,
"learning_rate": 9.432293966429514e-05,
"loss": 0.0063,
"step": 3910
},
{
"epoch": 13.424657534246576,
"grad_norm": 0.06775444746017456,
"learning_rate": 9.428461717918511e-05,
"loss": 0.0057,
"step": 3920
},
{
"epoch": 13.45890410958904,
"grad_norm": 0.08324276655912399,
"learning_rate": 9.424617362162271e-05,
"loss": 0.0047,
"step": 3930
},
{
"epoch": 13.493150684931507,
"grad_norm": 0.09544079750776291,
"learning_rate": 9.420760909671118e-05,
"loss": 0.0066,
"step": 3940
},
{
"epoch": 13.527397260273972,
"grad_norm": 0.09019865840673447,
"learning_rate": 9.416892370988444e-05,
"loss": 0.0071,
"step": 3950
},
{
"epoch": 13.561643835616438,
"grad_norm": 0.07820634543895721,
"learning_rate": 9.413011756690685e-05,
"loss": 0.0078,
"step": 3960
},
{
"epoch": 13.595890410958905,
"grad_norm": 0.07065964490175247,
"learning_rate": 9.409119077387294e-05,
"loss": 0.0079,
"step": 3970
},
{
"epoch": 13.63013698630137,
"grad_norm": 0.07494448125362396,
"learning_rate": 9.405214343720707e-05,
"loss": 0.0054,
"step": 3980
},
{
"epoch": 13.664383561643836,
"grad_norm": 0.07330919057130814,
"learning_rate": 9.401297566366318e-05,
"loss": 0.0073,
"step": 3990
},
{
"epoch": 13.698630136986301,
"grad_norm": 0.07755449414253235,
"learning_rate": 9.397368756032445e-05,
"loss": 0.0053,
"step": 4000
},
{
"epoch": 13.732876712328768,
"grad_norm": 0.0835888609290123,
"learning_rate": 9.393427923460308e-05,
"loss": 0.0069,
"step": 4010
},
{
"epoch": 13.767123287671232,
"grad_norm": 0.09608691185712814,
"learning_rate": 9.389475079423988e-05,
"loss": 0.0062,
"step": 4020
},
{
"epoch": 13.801369863013699,
"grad_norm": 0.06515517085790634,
"learning_rate": 9.385510234730415e-05,
"loss": 0.0049,
"step": 4030
},
{
"epoch": 13.835616438356164,
"grad_norm": 0.07440678030252457,
"learning_rate": 9.381533400219318e-05,
"loss": 0.0062,
"step": 4040
},
{
"epoch": 13.86986301369863,
"grad_norm": 0.0674986019730568,
"learning_rate": 9.377544586763215e-05,
"loss": 0.0057,
"step": 4050
},
{
"epoch": 13.904109589041095,
"grad_norm": 0.09010426700115204,
"learning_rate": 9.373543805267368e-05,
"loss": 0.0084,
"step": 4060
},
{
"epoch": 13.938356164383562,
"grad_norm": 0.09228496253490448,
"learning_rate": 9.369531066669758e-05,
"loss": 0.0065,
"step": 4070
},
{
"epoch": 13.972602739726028,
"grad_norm": 0.11372701078653336,
"learning_rate": 9.365506381941066e-05,
"loss": 0.0067,
"step": 4080
},
{
"epoch": 14.006849315068493,
"grad_norm": 0.08348413556814194,
"learning_rate": 9.36146976208462e-05,
"loss": 0.0065,
"step": 4090
},
{
"epoch": 14.04109589041096,
"grad_norm": 0.0798589214682579,
"learning_rate": 9.357421218136386e-05,
"loss": 0.0056,
"step": 4100
},
{
"epoch": 14.075342465753424,
"grad_norm": 0.0991537868976593,
"learning_rate": 9.353360761164931e-05,
"loss": 0.0051,
"step": 4110
},
{
"epoch": 14.10958904109589,
"grad_norm": 0.0866604894399643,
"learning_rate": 9.349288402271388e-05,
"loss": 0.0049,
"step": 4120
},
{
"epoch": 14.143835616438356,
"grad_norm": 0.07060110569000244,
"learning_rate": 9.345204152589428e-05,
"loss": 0.0059,
"step": 4130
},
{
"epoch": 14.178082191780822,
"grad_norm": 0.12644457817077637,
"learning_rate": 9.341108023285238e-05,
"loss": 0.0064,
"step": 4140
},
{
"epoch": 14.212328767123287,
"grad_norm": 0.10066480189561844,
"learning_rate": 9.337000025557476e-05,
"loss": 0.0053,
"step": 4150
},
{
"epoch": 14.246575342465754,
"grad_norm": 0.06575191020965576,
"learning_rate": 9.332880170637252e-05,
"loss": 0.0065,
"step": 4160
},
{
"epoch": 14.280821917808218,
"grad_norm": 0.06894955784082413,
"learning_rate": 9.328748469788093e-05,
"loss": 0.0053,
"step": 4170
},
{
"epoch": 14.315068493150685,
"grad_norm": 0.08669496327638626,
"learning_rate": 9.32460493430591e-05,
"loss": 0.005,
"step": 4180
},
{
"epoch": 14.349315068493151,
"grad_norm": 0.08158606290817261,
"learning_rate": 9.320449575518972e-05,
"loss": 0.006,
"step": 4190
},
{
"epoch": 14.383561643835616,
"grad_norm": 0.09702680259943008,
"learning_rate": 9.316282404787871e-05,
"loss": 0.005,
"step": 4200
},
{
"epoch": 14.417808219178083,
"grad_norm": 0.08489725738763809,
"learning_rate": 9.31210343350549e-05,
"loss": 0.0059,
"step": 4210
},
{
"epoch": 14.452054794520548,
"grad_norm": 0.08486241847276688,
"learning_rate": 9.30791267309698e-05,
"loss": 0.0056,
"step": 4220
},
{
"epoch": 14.486301369863014,
"grad_norm": 0.11101726442575455,
"learning_rate": 9.30371013501972e-05,
"loss": 0.0063,
"step": 4230
},
{
"epoch": 14.520547945205479,
"grad_norm": 0.10619505494832993,
"learning_rate": 9.299495830763286e-05,
"loss": 0.0063,
"step": 4240
},
{
"epoch": 14.554794520547945,
"grad_norm": 0.08418061584234238,
"learning_rate": 9.295269771849427e-05,
"loss": 0.0054,
"step": 4250
},
{
"epoch": 14.58904109589041,
"grad_norm": 0.07915527373552322,
"learning_rate": 9.291031969832026e-05,
"loss": 0.0068,
"step": 4260
},
{
"epoch": 14.623287671232877,
"grad_norm": 0.0827726274728775,
"learning_rate": 9.286782436297073e-05,
"loss": 0.0077,
"step": 4270
},
{
"epoch": 14.657534246575342,
"grad_norm": 0.07672649621963501,
"learning_rate": 9.282521182862629e-05,
"loss": 0.0059,
"step": 4280
},
{
"epoch": 14.691780821917808,
"grad_norm": 0.08115788549184799,
"learning_rate": 9.278248221178798e-05,
"loss": 0.0051,
"step": 4290
},
{
"epoch": 14.726027397260275,
"grad_norm": 0.06016877293586731,
"learning_rate": 9.273963562927695e-05,
"loss": 0.0053,
"step": 4300
},
{
"epoch": 14.76027397260274,
"grad_norm": 0.06568919122219086,
"learning_rate": 9.269667219823412e-05,
"loss": 0.0053,
"step": 4310
},
{
"epoch": 14.794520547945206,
"grad_norm": 0.06511034816503525,
"learning_rate": 9.265359203611987e-05,
"loss": 0.0045,
"step": 4320
},
{
"epoch": 14.82876712328767,
"grad_norm": 0.0742722898721695,
"learning_rate": 9.261039526071374e-05,
"loss": 0.0067,
"step": 4330
},
{
"epoch": 14.863013698630137,
"grad_norm": 0.0778801292181015,
"learning_rate": 9.256708199011401e-05,
"loss": 0.007,
"step": 4340
},
{
"epoch": 14.897260273972602,
"grad_norm": 0.08015526086091995,
"learning_rate": 9.252365234273755e-05,
"loss": 0.0058,
"step": 4350
},
{
"epoch": 14.931506849315069,
"grad_norm": 0.08591359853744507,
"learning_rate": 9.248010643731935e-05,
"loss": 0.0048,
"step": 4360
},
{
"epoch": 14.965753424657533,
"grad_norm": 0.09805981069803238,
"learning_rate": 9.243644439291223e-05,
"loss": 0.006,
"step": 4370
},
{
"epoch": 15.0,
"grad_norm": 0.06617870181798935,
"learning_rate": 9.239266632888659e-05,
"loss": 0.0068,
"step": 4380
},
{
"epoch": 15.034246575342467,
"grad_norm": 0.08026549965143204,
"learning_rate": 9.234877236492997e-05,
"loss": 0.0049,
"step": 4390
},
{
"epoch": 15.068493150684931,
"grad_norm": 0.081352598965168,
"learning_rate": 9.230476262104677e-05,
"loss": 0.0062,
"step": 4400
},
{
"epoch": 15.102739726027398,
"grad_norm": 0.07488571107387543,
"learning_rate": 9.226063721755799e-05,
"loss": 0.0059,
"step": 4410
},
{
"epoch": 15.136986301369863,
"grad_norm": 0.09087924659252167,
"learning_rate": 9.221639627510076e-05,
"loss": 0.0081,
"step": 4420
},
{
"epoch": 15.17123287671233,
"grad_norm": 0.08181889355182648,
"learning_rate": 9.217203991462815e-05,
"loss": 0.0072,
"step": 4430
},
{
"epoch": 15.205479452054794,
"grad_norm": 0.0886894091963768,
"learning_rate": 9.212756825740873e-05,
"loss": 0.0062,
"step": 4440
},
{
"epoch": 15.23972602739726,
"grad_norm": 0.06616362184286118,
"learning_rate": 9.208298142502636e-05,
"loss": 0.0055,
"step": 4450
},
{
"epoch": 15.273972602739725,
"grad_norm": 0.07078687101602554,
"learning_rate": 9.20382795393797e-05,
"loss": 0.0058,
"step": 4460
},
{
"epoch": 15.308219178082192,
"grad_norm": 0.09435564279556274,
"learning_rate": 9.199346272268199e-05,
"loss": 0.0056,
"step": 4470
},
{
"epoch": 15.342465753424657,
"grad_norm": 0.10773882269859314,
"learning_rate": 9.194853109746074e-05,
"loss": 0.0077,
"step": 4480
},
{
"epoch": 15.376712328767123,
"grad_norm": 0.08089710026979446,
"learning_rate": 9.190348478655724e-05,
"loss": 0.0069,
"step": 4490
},
{
"epoch": 15.41095890410959,
"grad_norm": 0.0753050446510315,
"learning_rate": 9.185832391312644e-05,
"loss": 0.0083,
"step": 4500
},
{
"epoch": 15.445205479452055,
"grad_norm": 0.07557237148284912,
"learning_rate": 9.18130486006364e-05,
"loss": 0.0064,
"step": 4510
},
{
"epoch": 15.479452054794521,
"grad_norm": 0.07062719762325287,
"learning_rate": 9.176765897286813e-05,
"loss": 0.0079,
"step": 4520
},
{
"epoch": 15.513698630136986,
"grad_norm": 0.05990029498934746,
"learning_rate": 9.17221551539151e-05,
"loss": 0.006,
"step": 4530
},
{
"epoch": 15.547945205479452,
"grad_norm": 0.08006462454795837,
"learning_rate": 9.167653726818305e-05,
"loss": 0.0063,
"step": 4540
},
{
"epoch": 15.582191780821917,
"grad_norm": 0.0626312643289566,
"learning_rate": 9.163080544038952e-05,
"loss": 0.0059,
"step": 4550
},
{
"epoch": 15.616438356164384,
"grad_norm": 0.07849642634391785,
"learning_rate": 9.158495979556358e-05,
"loss": 0.0062,
"step": 4560
},
{
"epoch": 15.650684931506849,
"grad_norm": 0.06645455211400986,
"learning_rate": 9.153900045904549e-05,
"loss": 0.0054,
"step": 4570
},
{
"epoch": 15.684931506849315,
"grad_norm": 0.09787298738956451,
"learning_rate": 9.14929275564863e-05,
"loss": 0.005,
"step": 4580
},
{
"epoch": 15.719178082191782,
"grad_norm": 0.09240013360977173,
"learning_rate": 9.144674121384757e-05,
"loss": 0.0062,
"step": 4590
},
{
"epoch": 15.753424657534246,
"grad_norm": 0.06534786522388458,
"learning_rate": 9.140044155740101e-05,
"loss": 0.0059,
"step": 4600
},
{
"epoch": 15.787671232876713,
"grad_norm": 0.0793117806315422,
"learning_rate": 9.135402871372808e-05,
"loss": 0.0068,
"step": 4610
},
{
"epoch": 15.821917808219178,
"grad_norm": 0.07911541312932968,
"learning_rate": 9.130750280971978e-05,
"loss": 0.0062,
"step": 4620
},
{
"epoch": 15.856164383561644,
"grad_norm": 0.11696206033229828,
"learning_rate": 9.126086397257612e-05,
"loss": 0.0051,
"step": 4630
},
{
"epoch": 15.89041095890411,
"grad_norm": 0.09704367071390152,
"learning_rate": 9.121411232980588e-05,
"loss": 0.0061,
"step": 4640
},
{
"epoch": 15.924657534246576,
"grad_norm": 0.09515467286109924,
"learning_rate": 9.116724800922629e-05,
"loss": 0.0063,
"step": 4650
},
{
"epoch": 15.95890410958904,
"grad_norm": 0.0956050381064415,
"learning_rate": 9.112027113896262e-05,
"loss": 0.0051,
"step": 4660
},
{
"epoch": 15.993150684931507,
"grad_norm": 0.06389576941728592,
"learning_rate": 9.107318184744781e-05,
"loss": 0.0054,
"step": 4670
},
{
"epoch": 16.027397260273972,
"grad_norm": 0.08562270551919937,
"learning_rate": 9.102598026342222e-05,
"loss": 0.0057,
"step": 4680
},
{
"epoch": 16.061643835616437,
"grad_norm": 0.07711412012577057,
"learning_rate": 9.097866651593317e-05,
"loss": 0.005,
"step": 4690
},
{
"epoch": 16.095890410958905,
"grad_norm": 0.09755413234233856,
"learning_rate": 9.093124073433463e-05,
"loss": 0.0052,
"step": 4700
},
{
"epoch": 16.13013698630137,
"grad_norm": 0.09233304858207703,
"learning_rate": 9.088370304828685e-05,
"loss": 0.0051,
"step": 4710
},
{
"epoch": 16.164383561643834,
"grad_norm": 0.08450305461883545,
"learning_rate": 9.083605358775612e-05,
"loss": 0.0062,
"step": 4720
},
{
"epoch": 16.198630136986303,
"grad_norm": 0.057008977979421616,
"learning_rate": 9.078829248301417e-05,
"loss": 0.0061,
"step": 4730
},
{
"epoch": 16.232876712328768,
"grad_norm": 0.06467018276453018,
"learning_rate": 9.074041986463808e-05,
"loss": 0.006,
"step": 4740
},
{
"epoch": 16.267123287671232,
"grad_norm": 0.08674854785203934,
"learning_rate": 9.069243586350975e-05,
"loss": 0.0053,
"step": 4750
},
{
"epoch": 16.301369863013697,
"grad_norm": 0.06117716431617737,
"learning_rate": 9.064434061081562e-05,
"loss": 0.0056,
"step": 4760
},
{
"epoch": 16.335616438356166,
"grad_norm": 0.07046956568956375,
"learning_rate": 9.059613423804623e-05,
"loss": 0.0049,
"step": 4770
},
{
"epoch": 16.36986301369863,
"grad_norm": 0.11593141406774521,
"learning_rate": 9.0547816876996e-05,
"loss": 0.0073,
"step": 4780
},
{
"epoch": 16.404109589041095,
"grad_norm": 0.07755231112241745,
"learning_rate": 9.049938865976275e-05,
"loss": 0.007,
"step": 4790
},
{
"epoch": 16.438356164383563,
"grad_norm": 0.10014522075653076,
"learning_rate": 9.045084971874738e-05,
"loss": 0.0066,
"step": 4800
},
{
"epoch": 16.472602739726028,
"grad_norm": 0.07113610208034515,
"learning_rate": 9.040220018665347e-05,
"loss": 0.0056,
"step": 4810
},
{
"epoch": 16.506849315068493,
"grad_norm": 0.08371955156326294,
"learning_rate": 9.035344019648702e-05,
"loss": 0.0064,
"step": 4820
},
{
"epoch": 16.541095890410958,
"grad_norm": 0.08940327167510986,
"learning_rate": 9.030456988155596e-05,
"loss": 0.0075,
"step": 4830
},
{
"epoch": 16.575342465753426,
"grad_norm": 0.0665537565946579,
"learning_rate": 9.025558937546988e-05,
"loss": 0.0043,
"step": 4840
},
{
"epoch": 16.60958904109589,
"grad_norm": 0.07203962653875351,
"learning_rate": 9.020649881213958e-05,
"loss": 0.0064,
"step": 4850
},
{
"epoch": 16.643835616438356,
"grad_norm": 0.06501701474189758,
"learning_rate": 9.015729832577681e-05,
"loss": 0.0056,
"step": 4860
},
{
"epoch": 16.67808219178082,
"grad_norm": 0.08230343461036682,
"learning_rate": 9.010798805089384e-05,
"loss": 0.0054,
"step": 4870
},
{
"epoch": 16.71232876712329,
"grad_norm": 0.08290053904056549,
"learning_rate": 9.005856812230304e-05,
"loss": 0.0064,
"step": 4880
},
{
"epoch": 16.746575342465754,
"grad_norm": 0.05811435729265213,
"learning_rate": 9.000903867511666e-05,
"loss": 0.0044,
"step": 4890
},
{
"epoch": 16.78082191780822,
"grad_norm": 0.08605169504880905,
"learning_rate": 8.995939984474624e-05,
"loss": 0.0052,
"step": 4900
},
{
"epoch": 16.815068493150687,
"grad_norm": 0.08715452998876572,
"learning_rate": 8.990965176690252e-05,
"loss": 0.0095,
"step": 4910
},
{
"epoch": 16.84931506849315,
"grad_norm": 0.08186474442481995,
"learning_rate": 8.98597945775948e-05,
"loss": 0.0076,
"step": 4920
},
{
"epoch": 16.883561643835616,
"grad_norm": 0.07201925665140152,
"learning_rate": 8.980982841313074e-05,
"loss": 0.0063,
"step": 4930
},
{
"epoch": 16.91780821917808,
"grad_norm": 0.08393460512161255,
"learning_rate": 8.975975341011596e-05,
"loss": 0.0043,
"step": 4940
},
{
"epoch": 16.95205479452055,
"grad_norm": 0.07584802061319351,
"learning_rate": 8.970956970545355e-05,
"loss": 0.0049,
"step": 4950
},
{
"epoch": 16.986301369863014,
"grad_norm": 0.09148117899894714,
"learning_rate": 8.965927743634391e-05,
"loss": 0.0051,
"step": 4960
},
{
"epoch": 17.02054794520548,
"grad_norm": 0.07554657012224197,
"learning_rate": 8.96088767402841e-05,
"loss": 0.0048,
"step": 4970
},
{
"epoch": 17.054794520547944,
"grad_norm": 0.06879697740077972,
"learning_rate": 8.955836775506776e-05,
"loss": 0.0067,
"step": 4980
},
{
"epoch": 17.089041095890412,
"grad_norm": 0.06374925374984741,
"learning_rate": 8.950775061878453e-05,
"loss": 0.0045,
"step": 4990
},
{
"epoch": 17.123287671232877,
"grad_norm": 0.07682766765356064,
"learning_rate": 8.945702546981969e-05,
"loss": 0.0053,
"step": 5000
},
{
"epoch": 17.15753424657534,
"grad_norm": 0.057352907955646515,
"learning_rate": 8.940619244685388e-05,
"loss": 0.0052,
"step": 5010
},
{
"epoch": 17.19178082191781,
"grad_norm": 0.07238159328699112,
"learning_rate": 8.935525168886262e-05,
"loss": 0.0058,
"step": 5020
},
{
"epoch": 17.226027397260275,
"grad_norm": 0.06098822504281998,
"learning_rate": 8.930420333511606e-05,
"loss": 0.0051,
"step": 5030
},
{
"epoch": 17.26027397260274,
"grad_norm": 0.0835380032658577,
"learning_rate": 8.92530475251784e-05,
"loss": 0.0069,
"step": 5040
},
{
"epoch": 17.294520547945204,
"grad_norm": 0.04726843908429146,
"learning_rate": 8.920178439890765e-05,
"loss": 0.0049,
"step": 5050
},
{
"epoch": 17.328767123287673,
"grad_norm": 0.09047394245862961,
"learning_rate": 8.91504140964553e-05,
"loss": 0.0045,
"step": 5060
},
{
"epoch": 17.363013698630137,
"grad_norm": 0.05919186770915985,
"learning_rate": 8.909893675826574e-05,
"loss": 0.0057,
"step": 5070
},
{
"epoch": 17.397260273972602,
"grad_norm": 0.08238182961940765,
"learning_rate": 8.90473525250761e-05,
"loss": 0.0053,
"step": 5080
},
{
"epoch": 17.431506849315067,
"grad_norm": 0.0925091803073883,
"learning_rate": 8.899566153791566e-05,
"loss": 0.0064,
"step": 5090
},
{
"epoch": 17.465753424657535,
"grad_norm": 0.0697120800614357,
"learning_rate": 8.894386393810563e-05,
"loss": 0.0055,
"step": 5100
},
{
"epoch": 17.5,
"grad_norm": 0.0903661847114563,
"learning_rate": 8.889195986725865e-05,
"loss": 0.0071,
"step": 5110
},
{
"epoch": 17.534246575342465,
"grad_norm": 0.07673303037881851,
"learning_rate": 8.883994946727849e-05,
"loss": 0.0049,
"step": 5120
},
{
"epoch": 17.568493150684933,
"grad_norm": 0.07298251241445541,
"learning_rate": 8.878783288035957e-05,
"loss": 0.006,
"step": 5130
},
{
"epoch": 17.602739726027398,
"grad_norm": 0.08428546786308289,
"learning_rate": 8.873561024898668e-05,
"loss": 0.0068,
"step": 5140
},
{
"epoch": 17.636986301369863,
"grad_norm": 0.06158934161067009,
"learning_rate": 8.868328171593448e-05,
"loss": 0.005,
"step": 5150
},
{
"epoch": 17.671232876712327,
"grad_norm": 0.07968197017908096,
"learning_rate": 8.863084742426719e-05,
"loss": 0.0048,
"step": 5160
},
{
"epoch": 17.705479452054796,
"grad_norm": 0.09236017614603043,
"learning_rate": 8.857830751733815e-05,
"loss": 0.0055,
"step": 5170
},
{
"epoch": 17.73972602739726,
"grad_norm": 0.0854865089058876,
"learning_rate": 8.852566213878947e-05,
"loss": 0.008,
"step": 5180
},
{
"epoch": 17.773972602739725,
"grad_norm": 0.06545963138341904,
"learning_rate": 8.84729114325516e-05,
"loss": 0.0052,
"step": 5190
},
{
"epoch": 17.80821917808219,
"grad_norm": 0.05856500193476677,
"learning_rate": 8.842005554284296e-05,
"loss": 0.005,
"step": 5200
},
{
"epoch": 17.84246575342466,
"grad_norm": 0.06909248977899551,
"learning_rate": 8.836709461416952e-05,
"loss": 0.0046,
"step": 5210
},
{
"epoch": 17.876712328767123,
"grad_norm": 0.07687606662511826,
"learning_rate": 8.831402879132446e-05,
"loss": 0.0064,
"step": 5220
},
{
"epoch": 17.910958904109588,
"grad_norm": 0.08718976378440857,
"learning_rate": 8.82608582193877e-05,
"loss": 0.0052,
"step": 5230
},
{
"epoch": 17.945205479452056,
"grad_norm": 0.05759764462709427,
"learning_rate": 8.820758304372557e-05,
"loss": 0.0053,
"step": 5240
},
{
"epoch": 17.97945205479452,
"grad_norm": 0.051869217306375504,
"learning_rate": 8.815420340999033e-05,
"loss": 0.0048,
"step": 5250
},
{
"epoch": 18.013698630136986,
"grad_norm": 0.10209794342517853,
"learning_rate": 8.810071946411989e-05,
"loss": 0.0057,
"step": 5260
},
{
"epoch": 18.04794520547945,
"grad_norm": 0.08743973076343536,
"learning_rate": 8.804713135233731e-05,
"loss": 0.0052,
"step": 5270
},
{
"epoch": 18.08219178082192,
"grad_norm": 0.07851141691207886,
"learning_rate": 8.799343922115044e-05,
"loss": 0.0055,
"step": 5280
},
{
"epoch": 18.116438356164384,
"grad_norm": 0.05638684332370758,
"learning_rate": 8.79396432173515e-05,
"loss": 0.0055,
"step": 5290
},
{
"epoch": 18.15068493150685,
"grad_norm": 0.08562038093805313,
"learning_rate": 8.788574348801675e-05,
"loss": 0.0067,
"step": 5300
},
{
"epoch": 18.184931506849313,
"grad_norm": 0.09454862028360367,
"learning_rate": 8.783174018050594e-05,
"loss": 0.008,
"step": 5310
},
{
"epoch": 18.21917808219178,
"grad_norm": 0.08692476898431778,
"learning_rate": 8.77776334424621e-05,
"loss": 0.006,
"step": 5320
},
{
"epoch": 18.253424657534246,
"grad_norm": 0.09937483817338943,
"learning_rate": 8.772342342181095e-05,
"loss": 0.0052,
"step": 5330
},
{
"epoch": 18.28767123287671,
"grad_norm": 0.10558980703353882,
"learning_rate": 8.766911026676064e-05,
"loss": 0.0041,
"step": 5340
},
{
"epoch": 18.32191780821918,
"grad_norm": 0.062475964426994324,
"learning_rate": 8.761469412580125e-05,
"loss": 0.0052,
"step": 5350
},
{
"epoch": 18.356164383561644,
"grad_norm": 0.0724620670080185,
"learning_rate": 8.756017514770443e-05,
"loss": 0.0052,
"step": 5360
},
{
"epoch": 18.39041095890411,
"grad_norm": 0.057776160538196564,
"learning_rate": 8.750555348152298e-05,
"loss": 0.0055,
"step": 5370
},
{
"epoch": 18.424657534246574,
"grad_norm": 0.07162385433912277,
"learning_rate": 8.745082927659047e-05,
"loss": 0.0051,
"step": 5380
},
{
"epoch": 18.458904109589042,
"grad_norm": 0.08095235377550125,
"learning_rate": 8.739600268252078e-05,
"loss": 0.0061,
"step": 5390
},
{
"epoch": 18.493150684931507,
"grad_norm": 0.06584731489419937,
"learning_rate": 8.73410738492077e-05,
"loss": 0.0069,
"step": 5400
},
{
"epoch": 18.527397260273972,
"grad_norm": 0.09591003507375717,
"learning_rate": 8.728604292682459e-05,
"loss": 0.0051,
"step": 5410
},
{
"epoch": 18.561643835616437,
"grad_norm": 0.10172990709543228,
"learning_rate": 8.723091006582389e-05,
"loss": 0.0056,
"step": 5420
},
{
"epoch": 18.595890410958905,
"grad_norm": 0.06665683537721634,
"learning_rate": 8.717567541693673e-05,
"loss": 0.0055,
"step": 5430
},
{
"epoch": 18.63013698630137,
"grad_norm": 0.055505841970443726,
"learning_rate": 8.71203391311725e-05,
"loss": 0.0053,
"step": 5440
},
{
"epoch": 18.664383561643834,
"grad_norm": 0.09754104167222977,
"learning_rate": 8.706490135981855e-05,
"loss": 0.0053,
"step": 5450
},
{
"epoch": 18.698630136986303,
"grad_norm": 0.08901935815811157,
"learning_rate": 8.700936225443959e-05,
"loss": 0.006,
"step": 5460
},
{
"epoch": 18.732876712328768,
"grad_norm": 0.057462576776742935,
"learning_rate": 8.695372196687743e-05,
"loss": 0.0058,
"step": 5470
},
{
"epoch": 18.767123287671232,
"grad_norm": 0.09456628561019897,
"learning_rate": 8.689798064925049e-05,
"loss": 0.0042,
"step": 5480
},
{
"epoch": 18.801369863013697,
"grad_norm": 0.11748138815164566,
"learning_rate": 8.684213845395339e-05,
"loss": 0.0051,
"step": 5490
},
{
"epoch": 18.835616438356166,
"grad_norm": 0.07330834865570068,
"learning_rate": 8.678619553365659e-05,
"loss": 0.0062,
"step": 5500
},
{
"epoch": 18.86986301369863,
"grad_norm": 0.09065715223550797,
"learning_rate": 8.673015204130586e-05,
"loss": 0.0046,
"step": 5510
},
{
"epoch": 18.904109589041095,
"grad_norm": 0.07880376279354095,
"learning_rate": 8.6674008130122e-05,
"loss": 0.0052,
"step": 5520
},
{
"epoch": 18.938356164383563,
"grad_norm": 0.07799544185400009,
"learning_rate": 8.661776395360029e-05,
"loss": 0.0053,
"step": 5530
},
{
"epoch": 18.972602739726028,
"grad_norm": 0.09239964932203293,
"learning_rate": 8.656141966551019e-05,
"loss": 0.0058,
"step": 5540
},
{
"epoch": 19.006849315068493,
"grad_norm": 0.06592954695224762,
"learning_rate": 8.650497541989482e-05,
"loss": 0.0049,
"step": 5550
},
{
"epoch": 19.041095890410958,
"grad_norm": 0.05416293814778328,
"learning_rate": 8.644843137107059e-05,
"loss": 0.0045,
"step": 5560
},
{
"epoch": 19.075342465753426,
"grad_norm": 0.0726301446557045,
"learning_rate": 8.639178767362676e-05,
"loss": 0.0056,
"step": 5570
},
{
"epoch": 19.10958904109589,
"grad_norm": 0.05670231953263283,
"learning_rate": 8.633504448242505e-05,
"loss": 0.0052,
"step": 5580
},
{
"epoch": 19.143835616438356,
"grad_norm": 0.062235865741968155,
"learning_rate": 8.627820195259918e-05,
"loss": 0.0052,
"step": 5590
},
{
"epoch": 19.17808219178082,
"grad_norm": 0.05244023725390434,
"learning_rate": 8.622126023955446e-05,
"loss": 0.0052,
"step": 5600
},
{
"epoch": 19.21232876712329,
"grad_norm": 0.08390739560127258,
"learning_rate": 8.616421949896734e-05,
"loss": 0.0058,
"step": 5610
},
{
"epoch": 19.246575342465754,
"grad_norm": 0.05916329845786095,
"learning_rate": 8.610707988678503e-05,
"loss": 0.0046,
"step": 5620
},
{
"epoch": 19.28082191780822,
"grad_norm": 0.06701207906007767,
"learning_rate": 8.604984155922506e-05,
"loss": 0.0065,
"step": 5630
},
{
"epoch": 19.315068493150687,
"grad_norm": 0.06987061351537704,
"learning_rate": 8.599250467277483e-05,
"loss": 0.0064,
"step": 5640
},
{
"epoch": 19.34931506849315,
"grad_norm": 0.060252390801906586,
"learning_rate": 8.59350693841912e-05,
"loss": 0.0046,
"step": 5650
},
{
"epoch": 19.383561643835616,
"grad_norm": 0.06767981499433517,
"learning_rate": 8.587753585050004e-05,
"loss": 0.006,
"step": 5660
},
{
"epoch": 19.41780821917808,
"grad_norm": 0.06105669587850571,
"learning_rate": 8.581990422899585e-05,
"loss": 0.0063,
"step": 5670
},
{
"epoch": 19.45205479452055,
"grad_norm": 0.08388019353151321,
"learning_rate": 8.576217467724128e-05,
"loss": 0.0052,
"step": 5680
},
{
"epoch": 19.486301369863014,
"grad_norm": 0.10021601617336273,
"learning_rate": 8.570434735306671e-05,
"loss": 0.0055,
"step": 5690
},
{
"epoch": 19.52054794520548,
"grad_norm": 0.07756322622299194,
"learning_rate": 8.564642241456986e-05,
"loss": 0.0051,
"step": 5700
},
{
"epoch": 19.554794520547944,
"grad_norm": 0.08748367428779602,
"learning_rate": 8.558840002011528e-05,
"loss": 0.0067,
"step": 5710
},
{
"epoch": 19.589041095890412,
"grad_norm": 0.09222008287906647,
"learning_rate": 8.553028032833397e-05,
"loss": 0.0063,
"step": 5720
},
{
"epoch": 19.623287671232877,
"grad_norm": 0.07356042414903641,
"learning_rate": 8.547206349812298e-05,
"loss": 0.0047,
"step": 5730
},
{
"epoch": 19.65753424657534,
"grad_norm": 0.0655691996216774,
"learning_rate": 8.541374968864487e-05,
"loss": 0.0062,
"step": 5740
},
{
"epoch": 19.69178082191781,
"grad_norm": 0.09560838341712952,
"learning_rate": 8.535533905932738e-05,
"loss": 0.006,
"step": 5750
},
{
"epoch": 19.726027397260275,
"grad_norm": 0.11108992248773575,
"learning_rate": 8.529683176986295e-05,
"loss": 0.0059,
"step": 5760
},
{
"epoch": 19.76027397260274,
"grad_norm": 0.06656841933727264,
"learning_rate": 8.523822798020827e-05,
"loss": 0.0065,
"step": 5770
},
{
"epoch": 19.794520547945204,
"grad_norm": 0.056710727512836456,
"learning_rate": 8.517952785058385e-05,
"loss": 0.0051,
"step": 5780
},
{
"epoch": 19.828767123287673,
"grad_norm": 0.0718056783080101,
"learning_rate": 8.512073154147362e-05,
"loss": 0.0055,
"step": 5790
},
{
"epoch": 19.863013698630137,
"grad_norm": 0.08344350010156631,
"learning_rate": 8.506183921362443e-05,
"loss": 0.0042,
"step": 5800
},
{
"epoch": 19.897260273972602,
"grad_norm": 0.0831236019730568,
"learning_rate": 8.500285102804568e-05,
"loss": 0.0063,
"step": 5810
},
{
"epoch": 19.931506849315067,
"grad_norm": 0.08485836535692215,
"learning_rate": 8.494376714600878e-05,
"loss": 0.0044,
"step": 5820
},
{
"epoch": 19.965753424657535,
"grad_norm": 0.09129001945257187,
"learning_rate": 8.488458772904684e-05,
"loss": 0.005,
"step": 5830
},
{
"epoch": 20.0,
"grad_norm": 0.060976918786764145,
"learning_rate": 8.482531293895412e-05,
"loss": 0.0039,
"step": 5840
},
{
"epoch": 20.034246575342465,
"grad_norm": 0.07683663815259933,
"learning_rate": 8.476594293778561e-05,
"loss": 0.0066,
"step": 5850
},
{
"epoch": 20.068493150684933,
"grad_norm": 0.09113059937953949,
"learning_rate": 8.470647788785665e-05,
"loss": 0.0057,
"step": 5860
},
{
"epoch": 20.102739726027398,
"grad_norm": 0.060158830136060715,
"learning_rate": 8.46469179517424e-05,
"loss": 0.0041,
"step": 5870
},
{
"epoch": 20.136986301369863,
"grad_norm": 0.05656911060214043,
"learning_rate": 8.458726329227747e-05,
"loss": 0.005,
"step": 5880
},
{
"epoch": 20.171232876712327,
"grad_norm": 0.06181691959500313,
"learning_rate": 8.452751407255541e-05,
"loss": 0.0061,
"step": 5890
},
{
"epoch": 20.205479452054796,
"grad_norm": 0.08877629786729813,
"learning_rate": 8.44676704559283e-05,
"loss": 0.0056,
"step": 5900
},
{
"epoch": 20.23972602739726,
"grad_norm": 0.08738001435995102,
"learning_rate": 8.44077326060063e-05,
"loss": 0.0057,
"step": 5910
},
{
"epoch": 20.273972602739725,
"grad_norm": 0.0723235234618187,
"learning_rate": 8.434770068665723e-05,
"loss": 0.0051,
"step": 5920
},
{
"epoch": 20.30821917808219,
"grad_norm": 0.08437537401914597,
"learning_rate": 8.428757486200603e-05,
"loss": 0.0062,
"step": 5930
},
{
"epoch": 20.34246575342466,
"grad_norm": 0.08028864115476608,
"learning_rate": 8.422735529643444e-05,
"loss": 0.0058,
"step": 5940
},
{
"epoch": 20.376712328767123,
"grad_norm": 0.06959905475378036,
"learning_rate": 8.416704215458043e-05,
"loss": 0.0082,
"step": 5950
},
{
"epoch": 20.410958904109588,
"grad_norm": 0.06425705552101135,
"learning_rate": 8.410663560133784e-05,
"loss": 0.005,
"step": 5960
},
{
"epoch": 20.445205479452056,
"grad_norm": 0.06436647474765778,
"learning_rate": 8.404613580185585e-05,
"loss": 0.0049,
"step": 5970
},
{
"epoch": 20.47945205479452,
"grad_norm": 0.057509083300828934,
"learning_rate": 8.398554292153866e-05,
"loss": 0.0071,
"step": 5980
},
{
"epoch": 20.513698630136986,
"grad_norm": 0.04900655895471573,
"learning_rate": 8.392485712604483e-05,
"loss": 0.0067,
"step": 5990
},
{
"epoch": 20.54794520547945,
"grad_norm": 0.05448565259575844,
"learning_rate": 8.386407858128706e-05,
"loss": 0.0058,
"step": 6000
},
{
"epoch": 20.58219178082192,
"grad_norm": 0.06079663708806038,
"learning_rate": 8.380320745343153e-05,
"loss": 0.0072,
"step": 6010
},
{
"epoch": 20.616438356164384,
"grad_norm": 0.053850773721933365,
"learning_rate": 8.37422439088976e-05,
"loss": 0.0072,
"step": 6020
},
{
"epoch": 20.65068493150685,
"grad_norm": 0.09747336804866791,
"learning_rate": 8.368118811435726e-05,
"loss": 0.0046,
"step": 6030
},
{
"epoch": 20.684931506849313,
"grad_norm": 0.09227459877729416,
"learning_rate": 8.362004023673474e-05,
"loss": 0.0053,
"step": 6040
},
{
"epoch": 20.71917808219178,
"grad_norm": 0.061509717255830765,
"learning_rate": 8.355880044320598e-05,
"loss": 0.0047,
"step": 6050
},
{
"epoch": 20.753424657534246,
"grad_norm": 0.06945177167654037,
"learning_rate": 8.349746890119826e-05,
"loss": 0.0058,
"step": 6060
},
{
"epoch": 20.78767123287671,
"grad_norm": 0.05989925190806389,
"learning_rate": 8.343604577838964e-05,
"loss": 0.0043,
"step": 6070
},
{
"epoch": 20.82191780821918,
"grad_norm": 0.049721136689186096,
"learning_rate": 8.337453124270863e-05,
"loss": 0.0055,
"step": 6080
},
{
"epoch": 20.856164383561644,
"grad_norm": 0.05695385858416557,
"learning_rate": 8.331292546233362e-05,
"loss": 0.0051,
"step": 6090
},
{
"epoch": 20.89041095890411,
"grad_norm": 0.06528819352388382,
"learning_rate": 8.32512286056924e-05,
"loss": 0.0055,
"step": 6100
},
{
"epoch": 20.924657534246574,
"grad_norm": 0.09410160034894943,
"learning_rate": 8.318944084146192e-05,
"loss": 0.0048,
"step": 6110
},
{
"epoch": 20.958904109589042,
"grad_norm": 0.05946965143084526,
"learning_rate": 8.31275623385675e-05,
"loss": 0.0047,
"step": 6120
},
{
"epoch": 20.993150684931507,
"grad_norm": 0.07674143463373184,
"learning_rate": 8.306559326618259e-05,
"loss": 0.0054,
"step": 6130
},
{
"epoch": 21.027397260273972,
"grad_norm": 0.079231858253479,
"learning_rate": 8.300353379372834e-05,
"loss": 0.0066,
"step": 6140
},
{
"epoch": 21.061643835616437,
"grad_norm": 0.07016433775424957,
"learning_rate": 8.29413840908729e-05,
"loss": 0.005,
"step": 6150
},
{
"epoch": 21.095890410958905,
"grad_norm": 0.07434679567813873,
"learning_rate": 8.287914432753123e-05,
"loss": 0.0049,
"step": 6160
},
{
"epoch": 21.13013698630137,
"grad_norm": 0.06633684784173965,
"learning_rate": 8.281681467386446e-05,
"loss": 0.0066,
"step": 6170
},
{
"epoch": 21.164383561643834,
"grad_norm": 0.0985659658908844,
"learning_rate": 8.275439530027948e-05,
"loss": 0.0072,
"step": 6180
},
{
"epoch": 21.198630136986303,
"grad_norm": 0.07376008480787277,
"learning_rate": 8.269188637742846e-05,
"loss": 0.0061,
"step": 6190
},
{
"epoch": 21.232876712328768,
"grad_norm": 0.08632157742977142,
"learning_rate": 8.262928807620843e-05,
"loss": 0.0056,
"step": 6200
},
{
"epoch": 21.267123287671232,
"grad_norm": 0.05637221783399582,
"learning_rate": 8.256660056776076e-05,
"loss": 0.0052,
"step": 6210
},
{
"epoch": 21.301369863013697,
"grad_norm": 0.07299555838108063,
"learning_rate": 8.250382402347065e-05,
"loss": 0.0044,
"step": 6220
},
{
"epoch": 21.335616438356166,
"grad_norm": 0.07100588828325272,
"learning_rate": 8.244095861496686e-05,
"loss": 0.008,
"step": 6230
},
{
"epoch": 21.36986301369863,
"grad_norm": 0.05591908469796181,
"learning_rate": 8.237800451412095e-05,
"loss": 0.0059,
"step": 6240
},
{
"epoch": 21.404109589041095,
"grad_norm": 0.06881851702928543,
"learning_rate": 8.231496189304704e-05,
"loss": 0.0044,
"step": 6250
},
{
"epoch": 21.438356164383563,
"grad_norm": 0.07660767436027527,
"learning_rate": 8.225183092410128e-05,
"loss": 0.0047,
"step": 6260
},
{
"epoch": 21.472602739726028,
"grad_norm": 0.07003331184387207,
"learning_rate": 8.218861177988129e-05,
"loss": 0.0057,
"step": 6270
},
{
"epoch": 21.506849315068493,
"grad_norm": 0.06258975714445114,
"learning_rate": 8.212530463322583e-05,
"loss": 0.0045,
"step": 6280
},
{
"epoch": 21.541095890410958,
"grad_norm": 0.07067704945802689,
"learning_rate": 8.206190965721419e-05,
"loss": 0.0045,
"step": 6290
},
{
"epoch": 21.575342465753426,
"grad_norm": 0.08280424028635025,
"learning_rate": 8.199842702516583e-05,
"loss": 0.0053,
"step": 6300
},
{
"epoch": 21.60958904109589,
"grad_norm": 0.0606718584895134,
"learning_rate": 8.193485691063985e-05,
"loss": 0.0054,
"step": 6310
},
{
"epoch": 21.643835616438356,
"grad_norm": 0.06702598184347153,
"learning_rate": 8.18711994874345e-05,
"loss": 0.0045,
"step": 6320
},
{
"epoch": 21.67808219178082,
"grad_norm": 0.06098796799778938,
"learning_rate": 8.180745492958674e-05,
"loss": 0.0039,
"step": 6330
},
{
"epoch": 21.71232876712329,
"grad_norm": 0.06085795536637306,
"learning_rate": 8.174362341137177e-05,
"loss": 0.0046,
"step": 6340
},
{
"epoch": 21.746575342465754,
"grad_norm": 0.06964936852455139,
"learning_rate": 8.167970510730253e-05,
"loss": 0.0047,
"step": 6350
},
{
"epoch": 21.78082191780822,
"grad_norm": 0.05623121187090874,
"learning_rate": 8.161570019212921e-05,
"loss": 0.0045,
"step": 6360
},
{
"epoch": 21.815068493150687,
"grad_norm": 0.049630485475063324,
"learning_rate": 8.155160884083881e-05,
"loss": 0.0041,
"step": 6370
},
{
"epoch": 21.84931506849315,
"grad_norm": 0.06835552304983139,
"learning_rate": 8.148743122865463e-05,
"loss": 0.0052,
"step": 6380
},
{
"epoch": 21.883561643835616,
"grad_norm": 0.07761190831661224,
"learning_rate": 8.14231675310358e-05,
"loss": 0.0061,
"step": 6390
},
{
"epoch": 21.91780821917808,
"grad_norm": 0.05890411511063576,
"learning_rate": 8.135881792367686e-05,
"loss": 0.0044,
"step": 6400
},
{
"epoch": 21.95205479452055,
"grad_norm": 0.06293132156133652,
"learning_rate": 8.129438258250712e-05,
"loss": 0.0046,
"step": 6410
},
{
"epoch": 21.986301369863014,
"grad_norm": 0.06598832458257675,
"learning_rate": 8.12298616836904e-05,
"loss": 0.0047,
"step": 6420
},
{
"epoch": 22.02054794520548,
"grad_norm": 0.0738854929804802,
"learning_rate": 8.116525540362434e-05,
"loss": 0.0049,
"step": 6430
},
{
"epoch": 22.054794520547944,
"grad_norm": 0.0818733349442482,
"learning_rate": 8.110056391894005e-05,
"loss": 0.0039,
"step": 6440
},
{
"epoch": 22.089041095890412,
"grad_norm": 0.07615378499031067,
"learning_rate": 8.103578740650156e-05,
"loss": 0.0055,
"step": 6450
},
{
"epoch": 22.123287671232877,
"grad_norm": 0.0781155526638031,
"learning_rate": 8.097092604340542e-05,
"loss": 0.0045,
"step": 6460
},
{
"epoch": 22.15753424657534,
"grad_norm": 0.065615713596344,
"learning_rate": 8.090598000698009e-05,
"loss": 0.0055,
"step": 6470
},
{
"epoch": 22.19178082191781,
"grad_norm": 0.06982345879077911,
"learning_rate": 8.084094947478556e-05,
"loss": 0.0046,
"step": 6480
},
{
"epoch": 22.226027397260275,
"grad_norm": 0.04483773186802864,
"learning_rate": 8.077583462461283e-05,
"loss": 0.0045,
"step": 6490
},
{
"epoch": 22.26027397260274,
"grad_norm": 0.08611573278903961,
"learning_rate": 8.07106356344834e-05,
"loss": 0.0069,
"step": 6500
},
{
"epoch": 22.294520547945204,
"grad_norm": 0.08743693679571152,
"learning_rate": 8.064535268264883e-05,
"loss": 0.0052,
"step": 6510
},
{
"epoch": 22.328767123287673,
"grad_norm": 0.07372572273015976,
"learning_rate": 8.057998594759022e-05,
"loss": 0.0067,
"step": 6520
},
{
"epoch": 22.363013698630137,
"grad_norm": 0.07083258777856827,
"learning_rate": 8.051453560801772e-05,
"loss": 0.0057,
"step": 6530
},
{
"epoch": 22.397260273972602,
"grad_norm": 0.06519380211830139,
"learning_rate": 8.044900184287007e-05,
"loss": 0.0041,
"step": 6540
},
{
"epoch": 22.431506849315067,
"grad_norm": 0.07042599469423294,
"learning_rate": 8.038338483131407e-05,
"loss": 0.0038,
"step": 6550
},
{
"epoch": 22.465753424657535,
"grad_norm": 0.07242155075073242,
"learning_rate": 8.031768475274413e-05,
"loss": 0.0053,
"step": 6560
},
{
"epoch": 22.5,
"grad_norm": 0.06891541928052902,
"learning_rate": 8.025190178678175e-05,
"loss": 0.0064,
"step": 6570
},
{
"epoch": 22.534246575342465,
"grad_norm": 0.06691320240497589,
"learning_rate": 8.018603611327504e-05,
"loss": 0.0057,
"step": 6580
},
{
"epoch": 22.568493150684933,
"grad_norm": 0.06496941298246384,
"learning_rate": 8.012008791229826e-05,
"loss": 0.0054,
"step": 6590
},
{
"epoch": 22.602739726027398,
"grad_norm": 0.06711320579051971,
"learning_rate": 8.005405736415126e-05,
"loss": 0.0061,
"step": 6600
},
{
"epoch": 22.636986301369863,
"grad_norm": 0.05645699426531792,
"learning_rate": 7.998794464935904e-05,
"loss": 0.0056,
"step": 6610
},
{
"epoch": 22.671232876712327,
"grad_norm": 0.11775130033493042,
"learning_rate": 7.992174994867123e-05,
"loss": 0.0075,
"step": 6620
},
{
"epoch": 22.705479452054796,
"grad_norm": 0.05640696734189987,
"learning_rate": 7.985547344306161e-05,
"loss": 0.005,
"step": 6630
},
{
"epoch": 22.73972602739726,
"grad_norm": 0.08137935400009155,
"learning_rate": 7.978911531372765e-05,
"loss": 0.0046,
"step": 6640
},
{
"epoch": 22.773972602739725,
"grad_norm": 0.07216529548168182,
"learning_rate": 7.972267574208991e-05,
"loss": 0.0045,
"step": 6650
},
{
"epoch": 22.80821917808219,
"grad_norm": 0.07081134617328644,
"learning_rate": 7.965615490979163e-05,
"loss": 0.0047,
"step": 6660
},
{
"epoch": 22.84246575342466,
"grad_norm": 0.08713933825492859,
"learning_rate": 7.958955299869825e-05,
"loss": 0.0049,
"step": 6670
},
{
"epoch": 22.876712328767123,
"grad_norm": 0.06303700804710388,
"learning_rate": 7.952287019089685e-05,
"loss": 0.0045,
"step": 6680
},
{
"epoch": 22.910958904109588,
"grad_norm": 0.09050445258617401,
"learning_rate": 7.945610666869568e-05,
"loss": 0.005,
"step": 6690
},
{
"epoch": 22.945205479452056,
"grad_norm": 0.07567279040813446,
"learning_rate": 7.938926261462366e-05,
"loss": 0.0043,
"step": 6700
},
{
"epoch": 22.97945205479452,
"grad_norm": 0.06795069575309753,
"learning_rate": 7.932233821142987e-05,
"loss": 0.0041,
"step": 6710
},
{
"epoch": 23.013698630136986,
"grad_norm": 0.07449833303689957,
"learning_rate": 7.925533364208309e-05,
"loss": 0.0063,
"step": 6720
},
{
"epoch": 23.04794520547945,
"grad_norm": 0.07487854361534119,
"learning_rate": 7.918824908977123e-05,
"loss": 0.0055,
"step": 6730
},
{
"epoch": 23.08219178082192,
"grad_norm": 0.07336029410362244,
"learning_rate": 7.912108473790092e-05,
"loss": 0.0063,
"step": 6740
},
{
"epoch": 23.116438356164384,
"grad_norm": 0.05453534796833992,
"learning_rate": 7.905384077009693e-05,
"loss": 0.0038,
"step": 6750
},
{
"epoch": 23.15068493150685,
"grad_norm": 0.08057594299316406,
"learning_rate": 7.898651737020166e-05,
"loss": 0.0046,
"step": 6760
},
{
"epoch": 23.184931506849313,
"grad_norm": 0.07083872705698013,
"learning_rate": 7.891911472227478e-05,
"loss": 0.0038,
"step": 6770
},
{
"epoch": 23.21917808219178,
"grad_norm": 0.07410766929388046,
"learning_rate": 7.88516330105925e-05,
"loss": 0.0057,
"step": 6780
},
{
"epoch": 23.253424657534246,
"grad_norm": 0.0587015375494957,
"learning_rate": 7.878407241964729e-05,
"loss": 0.0039,
"step": 6790
},
{
"epoch": 23.28767123287671,
"grad_norm": 0.05226041004061699,
"learning_rate": 7.871643313414718e-05,
"loss": 0.0041,
"step": 6800
},
{
"epoch": 23.32191780821918,
"grad_norm": 0.06701230257749557,
"learning_rate": 7.864871533901544e-05,
"loss": 0.0048,
"step": 6810
},
{
"epoch": 23.356164383561644,
"grad_norm": 0.046900056302547455,
"learning_rate": 7.858091921938988e-05,
"loss": 0.0051,
"step": 6820
},
{
"epoch": 23.39041095890411,
"grad_norm": 0.07751727104187012,
"learning_rate": 7.851304496062254e-05,
"loss": 0.0048,
"step": 6830
},
{
"epoch": 23.424657534246574,
"grad_norm": 0.06238679215312004,
"learning_rate": 7.844509274827907e-05,
"loss": 0.0051,
"step": 6840
},
{
"epoch": 23.458904109589042,
"grad_norm": 0.060652438551187515,
"learning_rate": 7.837706276813819e-05,
"loss": 0.0048,
"step": 6850
},
{
"epoch": 23.493150684931507,
"grad_norm": 0.0687926784157753,
"learning_rate": 7.830895520619128e-05,
"loss": 0.0059,
"step": 6860
},
{
"epoch": 23.527397260273972,
"grad_norm": 0.08176784217357635,
"learning_rate": 7.824077024864179e-05,
"loss": 0.0053,
"step": 6870
},
{
"epoch": 23.561643835616437,
"grad_norm": 0.05597813054919243,
"learning_rate": 7.817250808190483e-05,
"loss": 0.0035,
"step": 6880
},
{
"epoch": 23.595890410958905,
"grad_norm": 0.06768350303173065,
"learning_rate": 7.810416889260653e-05,
"loss": 0.0056,
"step": 6890
},
{
"epoch": 23.63013698630137,
"grad_norm": 0.08265560865402222,
"learning_rate": 7.803575286758364e-05,
"loss": 0.0045,
"step": 6900
},
{
"epoch": 23.664383561643834,
"grad_norm": 0.06400995701551437,
"learning_rate": 7.796726019388295e-05,
"loss": 0.0047,
"step": 6910
},
{
"epoch": 23.698630136986303,
"grad_norm": 0.06078376621007919,
"learning_rate": 7.789869105876083e-05,
"loss": 0.0039,
"step": 6920
},
{
"epoch": 23.732876712328768,
"grad_norm": 0.08402138203382492,
"learning_rate": 7.783004564968263e-05,
"loss": 0.0054,
"step": 6930
},
{
"epoch": 23.767123287671232,
"grad_norm": 0.05891257897019386,
"learning_rate": 7.776132415432234e-05,
"loss": 0.0052,
"step": 6940
},
{
"epoch": 23.801369863013697,
"grad_norm": 0.04951402544975281,
"learning_rate": 7.769252676056187e-05,
"loss": 0.0046,
"step": 6950
},
{
"epoch": 23.835616438356166,
"grad_norm": 0.06751031428575516,
"learning_rate": 7.762365365649067e-05,
"loss": 0.0036,
"step": 6960
},
{
"epoch": 23.86986301369863,
"grad_norm": 0.052420638501644135,
"learning_rate": 7.755470503040516e-05,
"loss": 0.0046,
"step": 6970
},
{
"epoch": 23.904109589041095,
"grad_norm": 0.05422347038984299,
"learning_rate": 7.748568107080832e-05,
"loss": 0.0049,
"step": 6980
},
{
"epoch": 23.938356164383563,
"grad_norm": 0.08441081643104553,
"learning_rate": 7.741658196640892e-05,
"loss": 0.0067,
"step": 6990
},
{
"epoch": 23.972602739726028,
"grad_norm": 0.08261924237012863,
"learning_rate": 7.734740790612136e-05,
"loss": 0.0078,
"step": 7000
},
{
"epoch": 24.006849315068493,
"grad_norm": 0.06718011945486069,
"learning_rate": 7.727815907906481e-05,
"loss": 0.0049,
"step": 7010
},
{
"epoch": 24.041095890410958,
"grad_norm": 0.07961238920688629,
"learning_rate": 7.720883567456298e-05,
"loss": 0.0051,
"step": 7020
},
{
"epoch": 24.075342465753426,
"grad_norm": 0.06769448518753052,
"learning_rate": 7.713943788214337e-05,
"loss": 0.0061,
"step": 7030
},
{
"epoch": 24.10958904109589,
"grad_norm": 0.06722130626440048,
"learning_rate": 7.70699658915369e-05,
"loss": 0.005,
"step": 7040
},
{
"epoch": 24.143835616438356,
"grad_norm": 0.07311046123504639,
"learning_rate": 7.700041989267736e-05,
"loss": 0.0052,
"step": 7050
},
{
"epoch": 24.17808219178082,
"grad_norm": 0.0782022699713707,
"learning_rate": 7.693080007570084e-05,
"loss": 0.0045,
"step": 7060
},
{
"epoch": 24.21232876712329,
"grad_norm": 0.06442761421203613,
"learning_rate": 7.686110663094525e-05,
"loss": 0.0044,
"step": 7070
},
{
"epoch": 24.246575342465754,
"grad_norm": 0.05376974493265152,
"learning_rate": 7.679133974894983e-05,
"loss": 0.0051,
"step": 7080
},
{
"epoch": 24.28082191780822,
"grad_norm": 0.06022098660469055,
"learning_rate": 7.672149962045457e-05,
"loss": 0.0063,
"step": 7090
},
{
"epoch": 24.315068493150687,
"grad_norm": 0.07531668990850449,
"learning_rate": 7.66515864363997e-05,
"loss": 0.0057,
"step": 7100
},
{
"epoch": 24.34931506849315,
"grad_norm": 0.06881547719240189,
"learning_rate": 7.658160038792518e-05,
"loss": 0.0041,
"step": 7110
},
{
"epoch": 24.383561643835616,
"grad_norm": 0.06569792330265045,
"learning_rate": 7.651154166637025e-05,
"loss": 0.0053,
"step": 7120
},
{
"epoch": 24.41780821917808,
"grad_norm": 0.09814415872097015,
"learning_rate": 7.644141046327271e-05,
"loss": 0.0049,
"step": 7130
},
{
"epoch": 24.45205479452055,
"grad_norm": 0.04979328811168671,
"learning_rate": 7.637120697036866e-05,
"loss": 0.0051,
"step": 7140
},
{
"epoch": 24.486301369863014,
"grad_norm": 0.09558571875095367,
"learning_rate": 7.630093137959171e-05,
"loss": 0.0065,
"step": 7150
},
{
"epoch": 24.52054794520548,
"grad_norm": 0.07612846791744232,
"learning_rate": 7.623058388307269e-05,
"loss": 0.0052,
"step": 7160
},
{
"epoch": 24.554794520547944,
"grad_norm": 0.08981167525053024,
"learning_rate": 7.616016467313891e-05,
"loss": 0.0049,
"step": 7170
},
{
"epoch": 24.589041095890412,
"grad_norm": 0.08244021236896515,
"learning_rate": 7.608967394231387e-05,
"loss": 0.0049,
"step": 7180
},
{
"epoch": 24.623287671232877,
"grad_norm": 0.049214284867048264,
"learning_rate": 7.60191118833165e-05,
"loss": 0.0037,
"step": 7190
},
{
"epoch": 24.65753424657534,
"grad_norm": 0.061441682279109955,
"learning_rate": 7.594847868906076e-05,
"loss": 0.0054,
"step": 7200
},
{
"epoch": 24.69178082191781,
"grad_norm": 0.07628575712442398,
"learning_rate": 7.587777455265515e-05,
"loss": 0.0056,
"step": 7210
},
{
"epoch": 24.726027397260275,
"grad_norm": 0.06337640434503555,
"learning_rate": 7.580699966740201e-05,
"loss": 0.0038,
"step": 7220
},
{
"epoch": 24.76027397260274,
"grad_norm": 0.05385832488536835,
"learning_rate": 7.573615422679726e-05,
"loss": 0.0043,
"step": 7230
},
{
"epoch": 24.794520547945204,
"grad_norm": 0.06283440440893173,
"learning_rate": 7.566523842452958e-05,
"loss": 0.0048,
"step": 7240
},
{
"epoch": 24.828767123287673,
"grad_norm": 0.04975597560405731,
"learning_rate": 7.559425245448006e-05,
"loss": 0.0049,
"step": 7250
},
{
"epoch": 24.863013698630137,
"grad_norm": 0.07069192081689835,
"learning_rate": 7.552319651072164e-05,
"loss": 0.0049,
"step": 7260
},
{
"epoch": 24.897260273972602,
"grad_norm": 0.07121792435646057,
"learning_rate": 7.545207078751857e-05,
"loss": 0.0052,
"step": 7270
},
{
"epoch": 24.931506849315067,
"grad_norm": 0.09325384348630905,
"learning_rate": 7.538087547932585e-05,
"loss": 0.0045,
"step": 7280
},
{
"epoch": 24.965753424657535,
"grad_norm": 0.08620842546224594,
"learning_rate": 7.530961078078873e-05,
"loss": 0.0046,
"step": 7290
},
{
"epoch": 25.0,
"grad_norm": 0.06410706043243408,
"learning_rate": 7.52382768867422e-05,
"loss": 0.0042,
"step": 7300
},
{
"epoch": 25.034246575342465,
"grad_norm": 0.07789020985364914,
"learning_rate": 7.516687399221037e-05,
"loss": 0.0046,
"step": 7310
},
{
"epoch": 25.068493150684933,
"grad_norm": 0.059597231447696686,
"learning_rate": 7.509540229240601e-05,
"loss": 0.0048,
"step": 7320
},
{
"epoch": 25.102739726027398,
"grad_norm": 0.05253349244594574,
"learning_rate": 7.50238619827301e-05,
"loss": 0.0055,
"step": 7330
},
{
"epoch": 25.136986301369863,
"grad_norm": 0.06278624385595322,
"learning_rate": 7.495225325877103e-05,
"loss": 0.0046,
"step": 7340
},
{
"epoch": 25.171232876712327,
"grad_norm": 0.07764960080385208,
"learning_rate": 7.488057631630437e-05,
"loss": 0.0046,
"step": 7350
},
{
"epoch": 25.205479452054796,
"grad_norm": 0.09635750204324722,
"learning_rate": 7.480883135129211e-05,
"loss": 0.0039,
"step": 7360
},
{
"epoch": 25.23972602739726,
"grad_norm": 0.08985763788223267,
"learning_rate": 7.473701855988227e-05,
"loss": 0.0057,
"step": 7370
},
{
"epoch": 25.273972602739725,
"grad_norm": 0.05667169764637947,
"learning_rate": 7.466513813840825e-05,
"loss": 0.0042,
"step": 7380
},
{
"epoch": 25.30821917808219,
"grad_norm": 0.050024062395095825,
"learning_rate": 7.45931902833884e-05,
"loss": 0.0044,
"step": 7390
},
{
"epoch": 25.34246575342466,
"grad_norm": 0.05679883807897568,
"learning_rate": 7.452117519152542e-05,
"loss": 0.0045,
"step": 7400
},
{
"epoch": 25.376712328767123,
"grad_norm": 0.07050922513008118,
"learning_rate": 7.444909305970578e-05,
"loss": 0.0047,
"step": 7410
},
{
"epoch": 25.410958904109588,
"grad_norm": 0.059567615389823914,
"learning_rate": 7.437694408499933e-05,
"loss": 0.0054,
"step": 7420
},
{
"epoch": 25.445205479452056,
"grad_norm": 0.10416945815086365,
"learning_rate": 7.430472846465856e-05,
"loss": 0.0062,
"step": 7430
},
{
"epoch": 25.47945205479452,
"grad_norm": 0.09317982196807861,
"learning_rate": 7.423244639611826e-05,
"loss": 0.0061,
"step": 7440
},
{
"epoch": 25.513698630136986,
"grad_norm": 0.06658624857664108,
"learning_rate": 7.416009807699482e-05,
"loss": 0.0064,
"step": 7450
},
{
"epoch": 25.54794520547945,
"grad_norm": 0.07275442034006119,
"learning_rate": 7.408768370508576e-05,
"loss": 0.007,
"step": 7460
},
{
"epoch": 25.58219178082192,
"grad_norm": 0.09432009607553482,
"learning_rate": 7.401520347836926e-05,
"loss": 0.0077,
"step": 7470
},
{
"epoch": 25.616438356164384,
"grad_norm": 0.06725318729877472,
"learning_rate": 7.394265759500348e-05,
"loss": 0.0049,
"step": 7480
},
{
"epoch": 25.65068493150685,
"grad_norm": 0.07666585594415665,
"learning_rate": 7.387004625332608e-05,
"loss": 0.0078,
"step": 7490
},
{
"epoch": 25.684931506849313,
"grad_norm": 0.07260438054800034,
"learning_rate": 7.379736965185368e-05,
"loss": 0.0052,
"step": 7500
},
{
"epoch": 25.71917808219178,
"grad_norm": 0.060822173953056335,
"learning_rate": 7.372462798928137e-05,
"loss": 0.0049,
"step": 7510
},
{
"epoch": 25.753424657534246,
"grad_norm": 0.049856215715408325,
"learning_rate": 7.365182146448205e-05,
"loss": 0.0036,
"step": 7520
},
{
"epoch": 25.78767123287671,
"grad_norm": 0.04413225129246712,
"learning_rate": 7.357895027650598e-05,
"loss": 0.004,
"step": 7530
},
{
"epoch": 25.82191780821918,
"grad_norm": 0.07596855610609055,
"learning_rate": 7.350601462458024e-05,
"loss": 0.0045,
"step": 7540
},
{
"epoch": 25.856164383561644,
"grad_norm": 0.05063284933567047,
"learning_rate": 7.343301470810808e-05,
"loss": 0.0042,
"step": 7550
},
{
"epoch": 25.89041095890411,
"grad_norm": 0.05205608904361725,
"learning_rate": 7.335995072666848e-05,
"loss": 0.0052,
"step": 7560
},
{
"epoch": 25.924657534246574,
"grad_norm": 0.0644666850566864,
"learning_rate": 7.328682288001561e-05,
"loss": 0.0054,
"step": 7570
},
{
"epoch": 25.958904109589042,
"grad_norm": 0.042358387261629105,
"learning_rate": 7.32136313680782e-05,
"loss": 0.0055,
"step": 7580
},
{
"epoch": 25.993150684931507,
"grad_norm": 0.0498831570148468,
"learning_rate": 7.3140376390959e-05,
"loss": 0.005,
"step": 7590
},
{
"epoch": 26.027397260273972,
"grad_norm": 0.059835679829120636,
"learning_rate": 7.30670581489344e-05,
"loss": 0.005,
"step": 7600
},
{
"epoch": 26.061643835616437,
"grad_norm": 0.052039116621017456,
"learning_rate": 7.299367684245362e-05,
"loss": 0.005,
"step": 7610
},
{
"epoch": 26.095890410958905,
"grad_norm": 0.06602521985769272,
"learning_rate": 7.292023267213835e-05,
"loss": 0.0044,
"step": 7620
},
{
"epoch": 26.13013698630137,
"grad_norm": 0.05572016164660454,
"learning_rate": 7.284672583878219e-05,
"loss": 0.0046,
"step": 7630
},
{
"epoch": 26.164383561643834,
"grad_norm": 0.06919296830892563,
"learning_rate": 7.277315654334997e-05,
"loss": 0.005,
"step": 7640
},
{
"epoch": 26.198630136986303,
"grad_norm": 0.056638121604919434,
"learning_rate": 7.269952498697734e-05,
"loss": 0.004,
"step": 7650
},
{
"epoch": 26.232876712328768,
"grad_norm": 0.060026634484529495,
"learning_rate": 7.262583137097018e-05,
"loss": 0.0041,
"step": 7660
},
{
"epoch": 26.267123287671232,
"grad_norm": 0.07666108757257462,
"learning_rate": 7.255207589680402e-05,
"loss": 0.0044,
"step": 7670
},
{
"epoch": 26.301369863013697,
"grad_norm": 0.05837415158748627,
"learning_rate": 7.247825876612353e-05,
"loss": 0.0048,
"step": 7680
},
{
"epoch": 26.335616438356166,
"grad_norm": 0.04758793115615845,
"learning_rate": 7.240438018074189e-05,
"loss": 0.0066,
"step": 7690
},
{
"epoch": 26.36986301369863,
"grad_norm": 0.0517563596367836,
"learning_rate": 7.233044034264034e-05,
"loss": 0.0054,
"step": 7700
},
{
"epoch": 26.404109589041095,
"grad_norm": 0.06696631014347076,
"learning_rate": 7.225643945396757e-05,
"loss": 0.0046,
"step": 7710
},
{
"epoch": 26.438356164383563,
"grad_norm": 0.06236398220062256,
"learning_rate": 7.218237771703921e-05,
"loss": 0.0059,
"step": 7720
},
{
"epoch": 26.472602739726028,
"grad_norm": 0.04172263294458389,
"learning_rate": 7.210825533433719e-05,
"loss": 0.0049,
"step": 7730
},
{
"epoch": 26.506849315068493,
"grad_norm": 0.07122749090194702,
"learning_rate": 7.203407250850928e-05,
"loss": 0.006,
"step": 7740
},
{
"epoch": 26.541095890410958,
"grad_norm": 0.06881573796272278,
"learning_rate": 7.195982944236851e-05,
"loss": 0.0078,
"step": 7750
},
{
"epoch": 26.575342465753426,
"grad_norm": 0.10515818744897842,
"learning_rate": 7.188552633889259e-05,
"loss": 0.0055,
"step": 7760
},
{
"epoch": 26.60958904109589,
"grad_norm": 0.06753715127706528,
"learning_rate": 7.181116340122336e-05,
"loss": 0.0046,
"step": 7770
},
{
"epoch": 26.643835616438356,
"grad_norm": 0.08554303646087646,
"learning_rate": 7.173674083266624e-05,
"loss": 0.0048,
"step": 7780
},
{
"epoch": 26.67808219178082,
"grad_norm": 0.062416162341833115,
"learning_rate": 7.166225883668969e-05,
"loss": 0.0041,
"step": 7790
},
{
"epoch": 26.71232876712329,
"grad_norm": 0.06461095809936523,
"learning_rate": 7.158771761692464e-05,
"loss": 0.0054,
"step": 7800
},
{
"epoch": 26.746575342465754,
"grad_norm": 0.05914429947733879,
"learning_rate": 7.151311737716397e-05,
"loss": 0.0044,
"step": 7810
},
{
"epoch": 26.78082191780822,
"grad_norm": 0.06104607135057449,
"learning_rate": 7.143845832136188e-05,
"loss": 0.0051,
"step": 7820
},
{
"epoch": 26.815068493150687,
"grad_norm": 0.08471526950597763,
"learning_rate": 7.136374065363334e-05,
"loss": 0.0051,
"step": 7830
},
{
"epoch": 26.84931506849315,
"grad_norm": 0.05899347364902496,
"learning_rate": 7.128896457825364e-05,
"loss": 0.0046,
"step": 7840
},
{
"epoch": 26.883561643835616,
"grad_norm": 0.05316589027643204,
"learning_rate": 7.121413029965769e-05,
"loss": 0.0036,
"step": 7850
},
{
"epoch": 26.91780821917808,
"grad_norm": 0.06834858655929565,
"learning_rate": 7.113923802243957e-05,
"loss": 0.0047,
"step": 7860
},
{
"epoch": 26.95205479452055,
"grad_norm": 0.06230799853801727,
"learning_rate": 7.10642879513519e-05,
"loss": 0.0048,
"step": 7870
},
{
"epoch": 26.986301369863014,
"grad_norm": 0.06642285734415054,
"learning_rate": 7.09892802913053e-05,
"loss": 0.0052,
"step": 7880
},
{
"epoch": 27.02054794520548,
"grad_norm": 0.053017787635326385,
"learning_rate": 7.091421524736784e-05,
"loss": 0.0051,
"step": 7890
},
{
"epoch": 27.054794520547944,
"grad_norm": 0.07587441056966782,
"learning_rate": 7.083909302476453e-05,
"loss": 0.0042,
"step": 7900
},
{
"epoch": 27.089041095890412,
"grad_norm": 0.042078837752342224,
"learning_rate": 7.076391382887661e-05,
"loss": 0.006,
"step": 7910
},
{
"epoch": 27.123287671232877,
"grad_norm": 0.07141551375389099,
"learning_rate": 7.068867786524116e-05,
"loss": 0.0048,
"step": 7920
},
{
"epoch": 27.15753424657534,
"grad_norm": 0.06363295763731003,
"learning_rate": 7.061338533955043e-05,
"loss": 0.0048,
"step": 7930
},
{
"epoch": 27.19178082191781,
"grad_norm": 0.06564045697450638,
"learning_rate": 7.053803645765128e-05,
"loss": 0.0048,
"step": 7940
},
{
"epoch": 27.226027397260275,
"grad_norm": 0.06791261583566666,
"learning_rate": 7.04626314255447e-05,
"loss": 0.0063,
"step": 7950
},
{
"epoch": 27.26027397260274,
"grad_norm": 0.1112477108836174,
"learning_rate": 7.038717044938519e-05,
"loss": 0.0065,
"step": 7960
},
{
"epoch": 27.294520547945204,
"grad_norm": 0.09073132276535034,
"learning_rate": 7.031165373548014e-05,
"loss": 0.005,
"step": 7970
},
{
"epoch": 27.328767123287673,
"grad_norm": 0.06326626986265182,
"learning_rate": 7.023608149028937e-05,
"loss": 0.0058,
"step": 7980
},
{
"epoch": 27.363013698630137,
"grad_norm": 0.06269578635692596,
"learning_rate": 7.016045392042452e-05,
"loss": 0.0047,
"step": 7990
},
{
"epoch": 27.397260273972602,
"grad_norm": 0.07107697427272797,
"learning_rate": 7.008477123264848e-05,
"loss": 0.0055,
"step": 8000
},
{
"epoch": 27.431506849315067,
"grad_norm": 0.05553178861737251,
"learning_rate": 7.000903363387482e-05,
"loss": 0.006,
"step": 8010
},
{
"epoch": 27.465753424657535,
"grad_norm": 0.054099246859550476,
"learning_rate": 6.993324133116726e-05,
"loss": 0.0044,
"step": 8020
},
{
"epoch": 27.5,
"grad_norm": 0.0510900542140007,
"learning_rate": 6.985739453173903e-05,
"loss": 0.005,
"step": 8030
},
{
"epoch": 27.534246575342465,
"grad_norm": 0.061171576380729675,
"learning_rate": 6.978149344295242e-05,
"loss": 0.0043,
"step": 8040
},
{
"epoch": 27.568493150684933,
"grad_norm": 0.06363870948553085,
"learning_rate": 6.97055382723181e-05,
"loss": 0.0039,
"step": 8050
},
{
"epoch": 27.602739726027398,
"grad_norm": 0.061849091202020645,
"learning_rate": 6.962952922749457e-05,
"loss": 0.0037,
"step": 8060
},
{
"epoch": 27.636986301369863,
"grad_norm": 0.0676080584526062,
"learning_rate": 6.955346651628771e-05,
"loss": 0.0044,
"step": 8070
},
{
"epoch": 27.671232876712327,
"grad_norm": 0.0678730309009552,
"learning_rate": 6.947735034665002e-05,
"loss": 0.0045,
"step": 8080
},
{
"epoch": 27.705479452054796,
"grad_norm": 0.05755266547203064,
"learning_rate": 6.940118092668022e-05,
"loss": 0.0045,
"step": 8090
},
{
"epoch": 27.73972602739726,
"grad_norm": 0.05419217795133591,
"learning_rate": 6.932495846462261e-05,
"loss": 0.0043,
"step": 8100
},
{
"epoch": 27.773972602739725,
"grad_norm": 0.04824934899806976,
"learning_rate": 6.924868316886649e-05,
"loss": 0.0046,
"step": 8110
},
{
"epoch": 27.80821917808219,
"grad_norm": 0.06754368543624878,
"learning_rate": 6.917235524794558e-05,
"loss": 0.0046,
"step": 8120
},
{
"epoch": 27.84246575342466,
"grad_norm": 0.07079575955867767,
"learning_rate": 6.909597491053751e-05,
"loss": 0.005,
"step": 8130
},
{
"epoch": 27.876712328767123,
"grad_norm": 0.07714605331420898,
"learning_rate": 6.901954236546323e-05,
"loss": 0.0053,
"step": 8140
},
{
"epoch": 27.910958904109588,
"grad_norm": 0.07691439241170883,
"learning_rate": 6.894305782168638e-05,
"loss": 0.0039,
"step": 8150
},
{
"epoch": 27.945205479452056,
"grad_norm": 0.05573904141783714,
"learning_rate": 6.886652148831279e-05,
"loss": 0.0051,
"step": 8160
},
{
"epoch": 27.97945205479452,
"grad_norm": 0.042645107954740524,
"learning_rate": 6.878993357458986e-05,
"loss": 0.0036,
"step": 8170
},
{
"epoch": 28.013698630136986,
"grad_norm": 0.0758211612701416,
"learning_rate": 6.871329428990602e-05,
"loss": 0.0054,
"step": 8180
},
{
"epoch": 28.04794520547945,
"grad_norm": 0.06774193048477173,
"learning_rate": 6.863660384379017e-05,
"loss": 0.0033,
"step": 8190
},
{
"epoch": 28.08219178082192,
"grad_norm": 0.06408020853996277,
"learning_rate": 6.855986244591104e-05,
"loss": 0.0051,
"step": 8200
},
{
"epoch": 28.116438356164384,
"grad_norm": 0.06723714619874954,
"learning_rate": 6.84830703060767e-05,
"loss": 0.0049,
"step": 8210
},
{
"epoch": 28.15068493150685,
"grad_norm": 0.06539978086948395,
"learning_rate": 6.840622763423391e-05,
"loss": 0.0034,
"step": 8220
},
{
"epoch": 28.184931506849313,
"grad_norm": 0.06268458813428879,
"learning_rate": 6.83293346404676e-05,
"loss": 0.0049,
"step": 8230
},
{
"epoch": 28.21917808219178,
"grad_norm": 0.060107748955488205,
"learning_rate": 6.825239153500029e-05,
"loss": 0.0045,
"step": 8240
},
{
"epoch": 28.253424657534246,
"grad_norm": 0.08829207718372345,
"learning_rate": 6.817539852819149e-05,
"loss": 0.0041,
"step": 8250
},
{
"epoch": 28.28767123287671,
"grad_norm": 0.07106557488441467,
"learning_rate": 6.809835583053715e-05,
"loss": 0.0038,
"step": 8260
},
{
"epoch": 28.32191780821918,
"grad_norm": 0.04838179796934128,
"learning_rate": 6.802126365266905e-05,
"loss": 0.0044,
"step": 8270
},
{
"epoch": 28.356164383561644,
"grad_norm": 0.07035278528928757,
"learning_rate": 6.794412220535426e-05,
"loss": 0.004,
"step": 8280
},
{
"epoch": 28.39041095890411,
"grad_norm": 0.06344835460186005,
"learning_rate": 6.786693169949455e-05,
"loss": 0.0048,
"step": 8290
},
{
"epoch": 28.424657534246574,
"grad_norm": 0.07208611816167831,
"learning_rate": 6.778969234612584e-05,
"loss": 0.0042,
"step": 8300
},
{
"epoch": 28.458904109589042,
"grad_norm": 0.06950279325246811,
"learning_rate": 6.771240435641754e-05,
"loss": 0.0036,
"step": 8310
},
{
"epoch": 28.493150684931507,
"grad_norm": 0.06592731922864914,
"learning_rate": 6.763506794167208e-05,
"loss": 0.0051,
"step": 8320
},
{
"epoch": 28.527397260273972,
"grad_norm": 0.0695786327123642,
"learning_rate": 6.755768331332424e-05,
"loss": 0.0061,
"step": 8330
},
{
"epoch": 28.561643835616437,
"grad_norm": 0.05047084018588066,
"learning_rate": 6.748025068294067e-05,
"loss": 0.0053,
"step": 8340
},
{
"epoch": 28.595890410958905,
"grad_norm": 0.07383386045694351,
"learning_rate": 6.740277026221923e-05,
"loss": 0.0038,
"step": 8350
},
{
"epoch": 28.63013698630137,
"grad_norm": 0.05435343086719513,
"learning_rate": 6.732524226298841e-05,
"loss": 0.0042,
"step": 8360
},
{
"epoch": 28.664383561643834,
"grad_norm": 0.04976963996887207,
"learning_rate": 6.72476668972068e-05,
"loss": 0.005,
"step": 8370
},
{
"epoch": 28.698630136986303,
"grad_norm": 0.0554593987762928,
"learning_rate": 6.71700443769625e-05,
"loss": 0.005,
"step": 8380
},
{
"epoch": 28.732876712328768,
"grad_norm": 0.055551644414663315,
"learning_rate": 6.709237491447249e-05,
"loss": 0.0053,
"step": 8390
},
{
"epoch": 28.767123287671232,
"grad_norm": 0.05606789514422417,
"learning_rate": 6.701465872208216e-05,
"loss": 0.004,
"step": 8400
},
{
"epoch": 28.801369863013697,
"grad_norm": 0.05831046402454376,
"learning_rate": 6.693689601226458e-05,
"loss": 0.0032,
"step": 8410
},
{
"epoch": 28.835616438356166,
"grad_norm": 0.05428626015782356,
"learning_rate": 6.685908699762002e-05,
"loss": 0.0043,
"step": 8420
},
{
"epoch": 28.86986301369863,
"grad_norm": 0.05782606080174446,
"learning_rate": 6.67812318908754e-05,
"loss": 0.0045,
"step": 8430
},
{
"epoch": 28.904109589041095,
"grad_norm": 0.05124809592962265,
"learning_rate": 6.670333090488356e-05,
"loss": 0.0044,
"step": 8440
},
{
"epoch": 28.938356164383563,
"grad_norm": 0.06171569600701332,
"learning_rate": 6.662538425262285e-05,
"loss": 0.0046,
"step": 8450
},
{
"epoch": 28.972602739726028,
"grad_norm": 0.07845284789800644,
"learning_rate": 6.654739214719641e-05,
"loss": 0.0055,
"step": 8460
},
{
"epoch": 29.006849315068493,
"grad_norm": 0.06284799426794052,
"learning_rate": 6.646935480183173e-05,
"loss": 0.0038,
"step": 8470
},
{
"epoch": 29.041095890410958,
"grad_norm": 0.045351553708314896,
"learning_rate": 6.639127242987988e-05,
"loss": 0.006,
"step": 8480
},
{
"epoch": 29.075342465753426,
"grad_norm": 0.05155348405241966,
"learning_rate": 6.631314524481513e-05,
"loss": 0.0049,
"step": 8490
},
{
"epoch": 29.10958904109589,
"grad_norm": 0.04898626729846001,
"learning_rate": 6.623497346023418e-05,
"loss": 0.0042,
"step": 8500
},
{
"epoch": 29.143835616438356,
"grad_norm": 0.06170529127120972,
"learning_rate": 6.615675728985572e-05,
"loss": 0.0063,
"step": 8510
},
{
"epoch": 29.17808219178082,
"grad_norm": 0.0662521943449974,
"learning_rate": 6.607849694751977e-05,
"loss": 0.0043,
"step": 8520
},
{
"epoch": 29.21232876712329,
"grad_norm": 0.057435885071754456,
"learning_rate": 6.600019264718713e-05,
"loss": 0.0053,
"step": 8530
},
{
"epoch": 29.246575342465754,
"grad_norm": 0.05537862703204155,
"learning_rate": 6.592184460293877e-05,
"loss": 0.0039,
"step": 8540
},
{
"epoch": 29.28082191780822,
"grad_norm": 0.05145610123872757,
"learning_rate": 6.584345302897523e-05,
"loss": 0.005,
"step": 8550
},
{
"epoch": 29.315068493150687,
"grad_norm": 0.06142713874578476,
"learning_rate": 6.576501813961609e-05,
"loss": 0.0045,
"step": 8560
},
{
"epoch": 29.34931506849315,
"grad_norm": 0.05496953800320625,
"learning_rate": 6.568654014929932e-05,
"loss": 0.0049,
"step": 8570
},
{
"epoch": 29.383561643835616,
"grad_norm": 0.057442113757133484,
"learning_rate": 6.56080192725808e-05,
"loss": 0.0049,
"step": 8580
},
{
"epoch": 29.41780821917808,
"grad_norm": 0.07670370489358902,
"learning_rate": 6.552945572413358e-05,
"loss": 0.004,
"step": 8590
},
{
"epoch": 29.45205479452055,
"grad_norm": 0.06951688230037689,
"learning_rate": 6.545084971874738e-05,
"loss": 0.0046,
"step": 8600
},
{
"epoch": 29.486301369863014,
"grad_norm": 0.0623633898794651,
"learning_rate": 6.537220147132805e-05,
"loss": 0.0038,
"step": 8610
},
{
"epoch": 29.52054794520548,
"grad_norm": 0.06844084709882736,
"learning_rate": 6.529351119689688e-05,
"loss": 0.0055,
"step": 8620
},
{
"epoch": 29.554794520547944,
"grad_norm": 0.0466805100440979,
"learning_rate": 6.521477911059008e-05,
"loss": 0.0043,
"step": 8630
},
{
"epoch": 29.589041095890412,
"grad_norm": 0.0462033748626709,
"learning_rate": 6.513600542765817e-05,
"loss": 0.0041,
"step": 8640
},
{
"epoch": 29.623287671232877,
"grad_norm": 0.04172181710600853,
"learning_rate": 6.505719036346539e-05,
"loss": 0.0039,
"step": 8650
},
{
"epoch": 29.65753424657534,
"grad_norm": 0.059904795140028,
"learning_rate": 6.497833413348909e-05,
"loss": 0.004,
"step": 8660
},
{
"epoch": 29.69178082191781,
"grad_norm": 0.04163758084177971,
"learning_rate": 6.489943695331923e-05,
"loss": 0.0048,
"step": 8670
},
{
"epoch": 29.726027397260275,
"grad_norm": 0.06971383094787598,
"learning_rate": 6.48204990386577e-05,
"loss": 0.0046,
"step": 8680
},
{
"epoch": 29.76027397260274,
"grad_norm": 0.04700847715139389,
"learning_rate": 6.474152060531768e-05,
"loss": 0.0043,
"step": 8690
},
{
"epoch": 29.794520547945204,
"grad_norm": 0.06670918315649033,
"learning_rate": 6.466250186922325e-05,
"loss": 0.0058,
"step": 8700
},
{
"epoch": 29.828767123287673,
"grad_norm": 0.05440834164619446,
"learning_rate": 6.458344304640858e-05,
"loss": 0.0042,
"step": 8710
},
{
"epoch": 29.863013698630137,
"grad_norm": 0.0671214759349823,
"learning_rate": 6.450434435301751e-05,
"loss": 0.0043,
"step": 8720
},
{
"epoch": 29.897260273972602,
"grad_norm": 0.05730283632874489,
"learning_rate": 6.44252060053028e-05,
"loss": 0.0045,
"step": 8730
},
{
"epoch": 29.931506849315067,
"grad_norm": 0.06602773815393448,
"learning_rate": 6.43460282196257e-05,
"loss": 0.0048,
"step": 8740
},
{
"epoch": 29.965753424657535,
"grad_norm": 0.07988374680280685,
"learning_rate": 6.426681121245527e-05,
"loss": 0.0046,
"step": 8750
},
{
"epoch": 30.0,
"grad_norm": 0.06527750939130783,
"learning_rate": 6.418755520036775e-05,
"loss": 0.0038,
"step": 8760
},
{
"epoch": 30.034246575342465,
"grad_norm": 0.04482458159327507,
"learning_rate": 6.410826040004607e-05,
"loss": 0.0043,
"step": 8770
},
{
"epoch": 30.068493150684933,
"grad_norm": 0.06171128898859024,
"learning_rate": 6.402892702827916e-05,
"loss": 0.0038,
"step": 8780
},
{
"epoch": 30.102739726027398,
"grad_norm": 0.06300436705350876,
"learning_rate": 6.394955530196147e-05,
"loss": 0.0042,
"step": 8790
},
{
"epoch": 30.136986301369863,
"grad_norm": 0.07563607394695282,
"learning_rate": 6.387014543809223e-05,
"loss": 0.0043,
"step": 8800
},
{
"epoch": 30.171232876712327,
"grad_norm": 0.05031699687242508,
"learning_rate": 6.3790697653775e-05,
"loss": 0.0042,
"step": 8810
},
{
"epoch": 30.205479452054796,
"grad_norm": 0.05471353605389595,
"learning_rate": 6.371121216621698e-05,
"loss": 0.0051,
"step": 8820
},
{
"epoch": 30.23972602739726,
"grad_norm": 0.04786813631653786,
"learning_rate": 6.363168919272846e-05,
"loss": 0.003,
"step": 8830
},
{
"epoch": 30.273972602739725,
"grad_norm": 0.0538649708032608,
"learning_rate": 6.355212895072223e-05,
"loss": 0.005,
"step": 8840
},
{
"epoch": 30.30821917808219,
"grad_norm": 0.07921342551708221,
"learning_rate": 6.34725316577129e-05,
"loss": 0.0046,
"step": 8850
},
{
"epoch": 30.34246575342466,
"grad_norm": 0.058982837945222855,
"learning_rate": 6.339289753131649e-05,
"loss": 0.0045,
"step": 8860
},
{
"epoch": 30.376712328767123,
"grad_norm": 0.07950620353221893,
"learning_rate": 6.331322678924962e-05,
"loss": 0.0053,
"step": 8870
},
{
"epoch": 30.410958904109588,
"grad_norm": 0.10326464474201202,
"learning_rate": 6.323351964932908e-05,
"loss": 0.006,
"step": 8880
},
{
"epoch": 30.445205479452056,
"grad_norm": 0.05070188269019127,
"learning_rate": 6.315377632947115e-05,
"loss": 0.0048,
"step": 8890
},
{
"epoch": 30.47945205479452,
"grad_norm": 0.07777089625597,
"learning_rate": 6.307399704769099e-05,
"loss": 0.0045,
"step": 8900
},
{
"epoch": 30.513698630136986,
"grad_norm": 0.07219505310058594,
"learning_rate": 6.299418202210214e-05,
"loss": 0.0054,
"step": 8910
},
{
"epoch": 30.54794520547945,
"grad_norm": 0.06435932964086533,
"learning_rate": 6.291433147091583e-05,
"loss": 0.0041,
"step": 8920
},
{
"epoch": 30.58219178082192,
"grad_norm": 0.05930699408054352,
"learning_rate": 6.283444561244042e-05,
"loss": 0.0042,
"step": 8930
},
{
"epoch": 30.616438356164384,
"grad_norm": 0.07478418946266174,
"learning_rate": 6.275452466508077e-05,
"loss": 0.005,
"step": 8940
},
{
"epoch": 30.65068493150685,
"grad_norm": 0.06302210688591003,
"learning_rate": 6.26745688473377e-05,
"loss": 0.0048,
"step": 8950
},
{
"epoch": 30.684931506849313,
"grad_norm": 0.057475507259368896,
"learning_rate": 6.259457837780742e-05,
"loss": 0.0059,
"step": 8960
},
{
"epoch": 30.71917808219178,
"grad_norm": 0.06078030914068222,
"learning_rate": 6.251455347518073e-05,
"loss": 0.0046,
"step": 8970
},
{
"epoch": 30.753424657534246,
"grad_norm": 0.059874407947063446,
"learning_rate": 6.243449435824276e-05,
"loss": 0.005,
"step": 8980
},
{
"epoch": 30.78767123287671,
"grad_norm": 0.06151744723320007,
"learning_rate": 6.235440124587198e-05,
"loss": 0.0043,
"step": 8990
},
{
"epoch": 30.82191780821918,
"grad_norm": 0.07911258190870285,
"learning_rate": 6.227427435703997e-05,
"loss": 0.0049,
"step": 9000
},
{
"epoch": 30.856164383561644,
"grad_norm": 0.059228286147117615,
"learning_rate": 6.219411391081055e-05,
"loss": 0.0042,
"step": 9010
},
{
"epoch": 30.89041095890411,
"grad_norm": 0.05565868318080902,
"learning_rate": 6.211392012633932e-05,
"loss": 0.0036,
"step": 9020
},
{
"epoch": 30.924657534246574,
"grad_norm": 0.053873926401138306,
"learning_rate": 6.203369322287306e-05,
"loss": 0.0054,
"step": 9030
},
{
"epoch": 30.958904109589042,
"grad_norm": 0.04394320026040077,
"learning_rate": 6.195343341974899e-05,
"loss": 0.0048,
"step": 9040
},
{
"epoch": 30.993150684931507,
"grad_norm": 0.04236266762018204,
"learning_rate": 6.187314093639444e-05,
"loss": 0.0046,
"step": 9050
},
{
"epoch": 31.027397260273972,
"grad_norm": 0.06337432563304901,
"learning_rate": 6.179281599232591e-05,
"loss": 0.0055,
"step": 9060
},
{
"epoch": 31.061643835616437,
"grad_norm": 0.07716334611177444,
"learning_rate": 6.17124588071488e-05,
"loss": 0.0042,
"step": 9070
},
{
"epoch": 31.095890410958905,
"grad_norm": 0.06364544481039047,
"learning_rate": 6.163206960055651e-05,
"loss": 0.0035,
"step": 9080
},
{
"epoch": 31.13013698630137,
"grad_norm": 0.05644696578383446,
"learning_rate": 6.155164859233012e-05,
"loss": 0.0047,
"step": 9090
},
{
"epoch": 31.164383561643834,
"grad_norm": 0.08092032372951508,
"learning_rate": 6.147119600233758e-05,
"loss": 0.005,
"step": 9100
},
{
"epoch": 31.198630136986303,
"grad_norm": 0.06652740389108658,
"learning_rate": 6.13907120505332e-05,
"loss": 0.0043,
"step": 9110
},
{
"epoch": 31.232876712328768,
"grad_norm": 0.07216228544712067,
"learning_rate": 6.131019695695702e-05,
"loss": 0.0056,
"step": 9120
},
{
"epoch": 31.267123287671232,
"grad_norm": 0.05830219388008118,
"learning_rate": 6.122965094173424e-05,
"loss": 0.0046,
"step": 9130
},
{
"epoch": 31.301369863013697,
"grad_norm": 0.06598392874002457,
"learning_rate": 6.11490742250746e-05,
"loss": 0.0051,
"step": 9140
},
{
"epoch": 31.335616438356166,
"grad_norm": 0.05769110471010208,
"learning_rate": 6.106846702727172e-05,
"loss": 0.0038,
"step": 9150
},
{
"epoch": 31.36986301369863,
"grad_norm": 0.05641612783074379,
"learning_rate": 6.0987829568702656e-05,
"loss": 0.0057,
"step": 9160
},
{
"epoch": 31.404109589041095,
"grad_norm": 0.06108755245804787,
"learning_rate": 6.090716206982714e-05,
"loss": 0.0052,
"step": 9170
},
{
"epoch": 31.438356164383563,
"grad_norm": 0.05891823023557663,
"learning_rate": 6.0826464751186994e-05,
"loss": 0.004,
"step": 9180
},
{
"epoch": 31.472602739726028,
"grad_norm": 0.05389763042330742,
"learning_rate": 6.074573783340562e-05,
"loss": 0.0038,
"step": 9190
},
{
"epoch": 31.506849315068493,
"grad_norm": 0.04580220952630043,
"learning_rate": 6.066498153718735e-05,
"loss": 0.0033,
"step": 9200
},
{
"epoch": 31.541095890410958,
"grad_norm": 0.06206485256552696,
"learning_rate": 6.0584196083316794e-05,
"loss": 0.0038,
"step": 9210
},
{
"epoch": 31.575342465753426,
"grad_norm": 0.05921825021505356,
"learning_rate": 6.05033816926583e-05,
"loss": 0.0053,
"step": 9220
},
{
"epoch": 31.60958904109589,
"grad_norm": 0.08074521273374557,
"learning_rate": 6.042253858615532e-05,
"loss": 0.004,
"step": 9230
},
{
"epoch": 31.643835616438356,
"grad_norm": 0.06720732897520065,
"learning_rate": 6.034166698482984e-05,
"loss": 0.0036,
"step": 9240
},
{
"epoch": 31.67808219178082,
"grad_norm": 0.04913540184497833,
"learning_rate": 6.026076710978171e-05,
"loss": 0.0056,
"step": 9250
},
{
"epoch": 31.71232876712329,
"grad_norm": 0.03931824490427971,
"learning_rate": 6.017983918218812e-05,
"loss": 0.0041,
"step": 9260
},
{
"epoch": 31.746575342465754,
"grad_norm": 0.04003632441163063,
"learning_rate": 6.009888342330292e-05,
"loss": 0.0041,
"step": 9270
},
{
"epoch": 31.78082191780822,
"grad_norm": 0.05170729383826256,
"learning_rate": 6.001790005445607e-05,
"loss": 0.0043,
"step": 9280
},
{
"epoch": 31.815068493150687,
"grad_norm": 0.05929422751069069,
"learning_rate": 5.9936889297052986e-05,
"loss": 0.0035,
"step": 9290
},
{
"epoch": 31.84931506849315,
"grad_norm": 0.05444969981908798,
"learning_rate": 5.985585137257401e-05,
"loss": 0.004,
"step": 9300
},
{
"epoch": 31.883561643835616,
"grad_norm": 0.05639302730560303,
"learning_rate": 5.977478650257374e-05,
"loss": 0.0057,
"step": 9310
},
{
"epoch": 31.91780821917808,
"grad_norm": 0.06555838137865067,
"learning_rate": 5.969369490868042e-05,
"loss": 0.0046,
"step": 9320
},
{
"epoch": 31.95205479452055,
"grad_norm": 0.04445594176650047,
"learning_rate": 5.961257681259535e-05,
"loss": 0.0038,
"step": 9330
},
{
"epoch": 31.986301369863014,
"grad_norm": 0.06617361307144165,
"learning_rate": 5.953143243609235e-05,
"loss": 0.0047,
"step": 9340
},
{
"epoch": 32.02054794520548,
"grad_norm": 0.04996689781546593,
"learning_rate": 5.945026200101702e-05,
"loss": 0.0043,
"step": 9350
},
{
"epoch": 32.054794520547944,
"grad_norm": 0.0414830707013607,
"learning_rate": 5.9369065729286245e-05,
"loss": 0.0034,
"step": 9360
},
{
"epoch": 32.08904109589041,
"grad_norm": 0.04811393842101097,
"learning_rate": 5.92878438428875e-05,
"loss": 0.0044,
"step": 9370
},
{
"epoch": 32.12328767123287,
"grad_norm": 0.04682043194770813,
"learning_rate": 5.9206596563878357e-05,
"loss": 0.0029,
"step": 9380
},
{
"epoch": 32.157534246575345,
"grad_norm": 0.056545063853263855,
"learning_rate": 5.912532411438576e-05,
"loss": 0.0035,
"step": 9390
},
{
"epoch": 32.19178082191781,
"grad_norm": 0.047463856637477875,
"learning_rate": 5.90440267166055e-05,
"loss": 0.0038,
"step": 9400
},
{
"epoch": 32.226027397260275,
"grad_norm": 0.05764775723218918,
"learning_rate": 5.896270459280153e-05,
"loss": 0.0046,
"step": 9410
},
{
"epoch": 32.26027397260274,
"grad_norm": 0.057075630873441696,
"learning_rate": 5.888135796530544e-05,
"loss": 0.0039,
"step": 9420
},
{
"epoch": 32.294520547945204,
"grad_norm": 0.05559380352497101,
"learning_rate": 5.8799987056515804e-05,
"loss": 0.0035,
"step": 9430
},
{
"epoch": 32.32876712328767,
"grad_norm": 0.05065063014626503,
"learning_rate": 5.871859208889759e-05,
"loss": 0.0038,
"step": 9440
},
{
"epoch": 32.363013698630134,
"grad_norm": 0.051984649151563644,
"learning_rate": 5.8637173284981526e-05,
"loss": 0.0048,
"step": 9450
},
{
"epoch": 32.397260273972606,
"grad_norm": 0.055756259709596634,
"learning_rate": 5.85557308673635e-05,
"loss": 0.0043,
"step": 9460
},
{
"epoch": 32.43150684931507,
"grad_norm": 0.05831045284867287,
"learning_rate": 5.847426505870399e-05,
"loss": 0.0045,
"step": 9470
},
{
"epoch": 32.465753424657535,
"grad_norm": 0.05842358618974686,
"learning_rate": 5.8392776081727385e-05,
"loss": 0.0047,
"step": 9480
},
{
"epoch": 32.5,
"grad_norm": 0.057939786463975906,
"learning_rate": 5.831126415922148e-05,
"loss": 0.0047,
"step": 9490
},
{
"epoch": 32.534246575342465,
"grad_norm": 0.05761862173676491,
"learning_rate": 5.8229729514036705e-05,
"loss": 0.0044,
"step": 9500
},
{
"epoch": 32.56849315068493,
"grad_norm": 0.03868666663765907,
"learning_rate": 5.8148172369085686e-05,
"loss": 0.0037,
"step": 9510
},
{
"epoch": 32.602739726027394,
"grad_norm": 0.04971576854586601,
"learning_rate": 5.8066592947342555e-05,
"loss": 0.0039,
"step": 9520
},
{
"epoch": 32.636986301369866,
"grad_norm": 0.039059728384017944,
"learning_rate": 5.798499147184233e-05,
"loss": 0.0034,
"step": 9530
},
{
"epoch": 32.67123287671233,
"grad_norm": 0.04403064772486687,
"learning_rate": 5.7903368165680327e-05,
"loss": 0.0057,
"step": 9540
},
{
"epoch": 32.705479452054796,
"grad_norm": 0.05422419682145119,
"learning_rate": 5.782172325201155e-05,
"loss": 0.0041,
"step": 9550
},
{
"epoch": 32.73972602739726,
"grad_norm": 0.04113056883215904,
"learning_rate": 5.7740056954050084e-05,
"loss": 0.0041,
"step": 9560
},
{
"epoch": 32.773972602739725,
"grad_norm": 0.061150237917900085,
"learning_rate": 5.765836949506843e-05,
"loss": 0.0033,
"step": 9570
},
{
"epoch": 32.80821917808219,
"grad_norm": 0.04960055649280548,
"learning_rate": 5.757666109839702e-05,
"loss": 0.0034,
"step": 9580
},
{
"epoch": 32.842465753424655,
"grad_norm": 0.07494784146547318,
"learning_rate": 5.74949319874235e-05,
"loss": 0.0043,
"step": 9590
},
{
"epoch": 32.87671232876713,
"grad_norm": 0.054274559020996094,
"learning_rate": 5.74131823855921e-05,
"loss": 0.0039,
"step": 9600
},
{
"epoch": 32.91095890410959,
"grad_norm": 0.05537914112210274,
"learning_rate": 5.733141251640315e-05,
"loss": 0.0028,
"step": 9610
},
{
"epoch": 32.945205479452056,
"grad_norm": 0.06061186641454697,
"learning_rate": 5.72496226034123e-05,
"loss": 0.004,
"step": 9620
},
{
"epoch": 32.97945205479452,
"grad_norm": 0.07560919225215912,
"learning_rate": 5.7167812870230094e-05,
"loss": 0.0043,
"step": 9630
},
{
"epoch": 33.013698630136986,
"grad_norm": 0.0694555938243866,
"learning_rate": 5.7085983540521216e-05,
"loss": 0.0044,
"step": 9640
},
{
"epoch": 33.04794520547945,
"grad_norm": 0.05044952780008316,
"learning_rate": 5.70041348380039e-05,
"loss": 0.0037,
"step": 9650
},
{
"epoch": 33.082191780821915,
"grad_norm": 0.043574009090662,
"learning_rate": 5.692226698644938e-05,
"loss": 0.0036,
"step": 9660
},
{
"epoch": 33.11643835616438,
"grad_norm": 0.055326227098703384,
"learning_rate": 5.6840380209681255e-05,
"loss": 0.0051,
"step": 9670
},
{
"epoch": 33.15068493150685,
"grad_norm": 0.05255505442619324,
"learning_rate": 5.675847473157485e-05,
"loss": 0.0034,
"step": 9680
},
{
"epoch": 33.18493150684932,
"grad_norm": 0.04825739935040474,
"learning_rate": 5.667655077605659e-05,
"loss": 0.0037,
"step": 9690
},
{
"epoch": 33.21917808219178,
"grad_norm": 0.054841298609972,
"learning_rate": 5.6594608567103456e-05,
"loss": 0.0045,
"step": 9700
},
{
"epoch": 33.25342465753425,
"grad_norm": 0.04731395095586777,
"learning_rate": 5.65126483287423e-05,
"loss": 0.0043,
"step": 9710
},
{
"epoch": 33.28767123287671,
"grad_norm": 0.05347118899226189,
"learning_rate": 5.6430670285049314e-05,
"loss": 0.0037,
"step": 9720
},
{
"epoch": 33.321917808219176,
"grad_norm": 0.053511351346969604,
"learning_rate": 5.634867466014932e-05,
"loss": 0.0037,
"step": 9730
},
{
"epoch": 33.35616438356164,
"grad_norm": 0.056357644498348236,
"learning_rate": 5.6266661678215216e-05,
"loss": 0.005,
"step": 9740
},
{
"epoch": 33.39041095890411,
"grad_norm": 0.04963238537311554,
"learning_rate": 5.618463156346739e-05,
"loss": 0.0032,
"step": 9750
},
{
"epoch": 33.42465753424658,
"grad_norm": 0.0459924079477787,
"learning_rate": 5.6102584540173006e-05,
"loss": 0.0043,
"step": 9760
},
{
"epoch": 33.45890410958904,
"grad_norm": 0.04573008045554161,
"learning_rate": 5.602052083264555e-05,
"loss": 0.0039,
"step": 9770
},
{
"epoch": 33.49315068493151,
"grad_norm": 0.056432124227285385,
"learning_rate": 5.5938440665244006e-05,
"loss": 0.004,
"step": 9780
},
{
"epoch": 33.52739726027397,
"grad_norm": 0.03997287154197693,
"learning_rate": 5.585634426237246e-05,
"loss": 0.0035,
"step": 9790
},
{
"epoch": 33.56164383561644,
"grad_norm": 0.036699339747428894,
"learning_rate": 5.577423184847932e-05,
"loss": 0.0048,
"step": 9800
},
{
"epoch": 33.5958904109589,
"grad_norm": 0.07555674761533737,
"learning_rate": 5.569210364805677e-05,
"loss": 0.0037,
"step": 9810
},
{
"epoch": 33.63013698630137,
"grad_norm": 0.07923568040132523,
"learning_rate": 5.560995988564023e-05,
"loss": 0.0041,
"step": 9820
},
{
"epoch": 33.66438356164384,
"grad_norm": 0.05765343829989433,
"learning_rate": 5.552780078580756e-05,
"loss": 0.0038,
"step": 9830
},
{
"epoch": 33.6986301369863,
"grad_norm": 0.04562293365597725,
"learning_rate": 5.544562657317863e-05,
"loss": 0.0048,
"step": 9840
},
{
"epoch": 33.73287671232877,
"grad_norm": 0.040983159095048904,
"learning_rate": 5.5363437472414595e-05,
"loss": 0.0031,
"step": 9850
},
{
"epoch": 33.76712328767123,
"grad_norm": 0.04989850893616676,
"learning_rate": 5.52812337082173e-05,
"loss": 0.004,
"step": 9860
},
{
"epoch": 33.8013698630137,
"grad_norm": 0.05472975969314575,
"learning_rate": 5.519901550532871e-05,
"loss": 0.005,
"step": 9870
},
{
"epoch": 33.83561643835616,
"grad_norm": 0.04672018066048622,
"learning_rate": 5.511678308853026e-05,
"loss": 0.0032,
"step": 9880
},
{
"epoch": 33.86986301369863,
"grad_norm": 0.055184490978717804,
"learning_rate": 5.5034536682642224e-05,
"loss": 0.004,
"step": 9890
},
{
"epoch": 33.9041095890411,
"grad_norm": 0.04532390460371971,
"learning_rate": 5.495227651252315e-05,
"loss": 0.0034,
"step": 9900
},
{
"epoch": 33.93835616438356,
"grad_norm": 0.04610401391983032,
"learning_rate": 5.487000280306917e-05,
"loss": 0.0038,
"step": 9910
},
{
"epoch": 33.97260273972603,
"grad_norm": 0.06480266153812408,
"learning_rate": 5.478771577921351e-05,
"loss": 0.0043,
"step": 9920
},
{
"epoch": 34.00684931506849,
"grad_norm": 0.055622607469558716,
"learning_rate": 5.470541566592573e-05,
"loss": 0.0042,
"step": 9930
},
{
"epoch": 34.04109589041096,
"grad_norm": 0.05858009308576584,
"learning_rate": 5.462310268821118e-05,
"loss": 0.0047,
"step": 9940
},
{
"epoch": 34.07534246575342,
"grad_norm": 0.05842429772019386,
"learning_rate": 5.454077707111042e-05,
"loss": 0.0047,
"step": 9950
},
{
"epoch": 34.10958904109589,
"grad_norm": 0.055152349174022675,
"learning_rate": 5.445843903969854e-05,
"loss": 0.005,
"step": 9960
},
{
"epoch": 34.14383561643836,
"grad_norm": 0.05233810469508171,
"learning_rate": 5.4376088819084556e-05,
"loss": 0.004,
"step": 9970
},
{
"epoch": 34.178082191780824,
"grad_norm": 0.04537783935666084,
"learning_rate": 5.4293726634410855e-05,
"loss": 0.0037,
"step": 9980
},
{
"epoch": 34.21232876712329,
"grad_norm": 0.0472206212580204,
"learning_rate": 5.4211352710852495e-05,
"loss": 0.004,
"step": 9990
},
{
"epoch": 34.24657534246575,
"grad_norm": 0.04411351680755615,
"learning_rate": 5.4128967273616625e-05,
"loss": 0.0037,
"step": 10000
},
{
"epoch": 34.28082191780822,
"grad_norm": 0.05850313976407051,
"learning_rate": 5.404657054794189e-05,
"loss": 0.0054,
"step": 10010
},
{
"epoch": 34.31506849315068,
"grad_norm": 0.05117764323949814,
"learning_rate": 5.396416275909779e-05,
"loss": 0.0038,
"step": 10020
},
{
"epoch": 34.34931506849315,
"grad_norm": 0.05348801612854004,
"learning_rate": 5.3881744132384104e-05,
"loss": 0.0038,
"step": 10030
},
{
"epoch": 34.38356164383562,
"grad_norm": 0.05136050656437874,
"learning_rate": 5.379931489313016e-05,
"loss": 0.0043,
"step": 10040
},
{
"epoch": 34.417808219178085,
"grad_norm": 0.06988473981618881,
"learning_rate": 5.371687526669439e-05,
"loss": 0.0063,
"step": 10050
},
{
"epoch": 34.45205479452055,
"grad_norm": 0.06275316327810287,
"learning_rate": 5.363442547846356e-05,
"loss": 0.0059,
"step": 10060
},
{
"epoch": 34.486301369863014,
"grad_norm": 0.06286466866731644,
"learning_rate": 5.355196575385225e-05,
"loss": 0.0047,
"step": 10070
},
{
"epoch": 34.52054794520548,
"grad_norm": 0.06469148397445679,
"learning_rate": 5.3469496318302204e-05,
"loss": 0.0048,
"step": 10080
},
{
"epoch": 34.554794520547944,
"grad_norm": 0.04653120040893555,
"learning_rate": 5.3387017397281704e-05,
"loss": 0.0035,
"step": 10090
},
{
"epoch": 34.58904109589041,
"grad_norm": 0.04517116770148277,
"learning_rate": 5.330452921628497e-05,
"loss": 0.0038,
"step": 10100
},
{
"epoch": 34.62328767123287,
"grad_norm": 0.05882929638028145,
"learning_rate": 5.322203200083154e-05,
"loss": 0.0046,
"step": 10110
},
{
"epoch": 34.657534246575345,
"grad_norm": 0.06491271406412125,
"learning_rate": 5.313952597646568e-05,
"loss": 0.003,
"step": 10120
},
{
"epoch": 34.69178082191781,
"grad_norm": 0.05783591791987419,
"learning_rate": 5.305701136875566e-05,
"loss": 0.0051,
"step": 10130
},
{
"epoch": 34.726027397260275,
"grad_norm": 0.06943771988153458,
"learning_rate": 5.297448840329329e-05,
"loss": 0.0035,
"step": 10140
},
{
"epoch": 34.76027397260274,
"grad_norm": 0.07877009361982346,
"learning_rate": 5.2891957305693205e-05,
"loss": 0.0034,
"step": 10150
},
{
"epoch": 34.794520547945204,
"grad_norm": 0.06451980769634247,
"learning_rate": 5.280941830159227e-05,
"loss": 0.0054,
"step": 10160
},
{
"epoch": 34.82876712328767,
"grad_norm": 0.051155924797058105,
"learning_rate": 5.2726871616649e-05,
"loss": 0.0042,
"step": 10170
},
{
"epoch": 34.863013698630134,
"grad_norm": 0.08243589103221893,
"learning_rate": 5.264431747654284e-05,
"loss": 0.0049,
"step": 10180
},
{
"epoch": 34.897260273972606,
"grad_norm": 0.0775737464427948,
"learning_rate": 5.2561756106973656e-05,
"loss": 0.0049,
"step": 10190
},
{
"epoch": 34.93150684931507,
"grad_norm": 0.06622209399938583,
"learning_rate": 5.247918773366112e-05,
"loss": 0.0057,
"step": 10200
},
{
"epoch": 34.965753424657535,
"grad_norm": 0.0489589087665081,
"learning_rate": 5.2396612582343986e-05,
"loss": 0.0034,
"step": 10210
},
{
"epoch": 35.0,
"grad_norm": 0.03715719282627106,
"learning_rate": 5.231403087877955e-05,
"loss": 0.0037,
"step": 10220
},
{
"epoch": 35.034246575342465,
"grad_norm": 0.06789140403270721,
"learning_rate": 5.2231442848743064e-05,
"loss": 0.0037,
"step": 10230
},
{
"epoch": 35.06849315068493,
"grad_norm": 0.054343558847904205,
"learning_rate": 5.214884871802703e-05,
"loss": 0.0045,
"step": 10240
},
{
"epoch": 35.102739726027394,
"grad_norm": 0.051918212324380875,
"learning_rate": 5.2066248712440656e-05,
"loss": 0.005,
"step": 10250
},
{
"epoch": 35.136986301369866,
"grad_norm": 0.06282070279121399,
"learning_rate": 5.198364305780922e-05,
"loss": 0.0046,
"step": 10260
},
{
"epoch": 35.17123287671233,
"grad_norm": 0.048972342163324356,
"learning_rate": 5.1901031979973394e-05,
"loss": 0.0042,
"step": 10270
},
{
"epoch": 35.205479452054796,
"grad_norm": 0.04695377126336098,
"learning_rate": 5.1818415704788725e-05,
"loss": 0.0052,
"step": 10280
},
{
"epoch": 35.23972602739726,
"grad_norm": 0.059636637568473816,
"learning_rate": 5.1735794458124956e-05,
"loss": 0.0045,
"step": 10290
},
{
"epoch": 35.273972602739725,
"grad_norm": 0.049566976726055145,
"learning_rate": 5.165316846586541e-05,
"loss": 0.0034,
"step": 10300
},
{
"epoch": 35.30821917808219,
"grad_norm": 0.0541643388569355,
"learning_rate": 5.157053795390642e-05,
"loss": 0.0031,
"step": 10310
},
{
"epoch": 35.342465753424655,
"grad_norm": 0.06319770216941833,
"learning_rate": 5.148790314815663e-05,
"loss": 0.0048,
"step": 10320
},
{
"epoch": 35.37671232876713,
"grad_norm": 0.0485026016831398,
"learning_rate": 5.1405264274536445e-05,
"loss": 0.0053,
"step": 10330
},
{
"epoch": 35.41095890410959,
"grad_norm": 0.04801278933882713,
"learning_rate": 5.132262155897739e-05,
"loss": 0.0042,
"step": 10340
},
{
"epoch": 35.445205479452056,
"grad_norm": 0.05489344894886017,
"learning_rate": 5.123997522742151e-05,
"loss": 0.0044,
"step": 10350
},
{
"epoch": 35.47945205479452,
"grad_norm": 0.06271504610776901,
"learning_rate": 5.1157325505820694e-05,
"loss": 0.0044,
"step": 10360
},
{
"epoch": 35.513698630136986,
"grad_norm": 0.0559224933385849,
"learning_rate": 5.107467262013614e-05,
"loss": 0.0059,
"step": 10370
},
{
"epoch": 35.54794520547945,
"grad_norm": 0.04511038959026337,
"learning_rate": 5.0992016796337686e-05,
"loss": 0.004,
"step": 10380
},
{
"epoch": 35.582191780821915,
"grad_norm": 0.04959840700030327,
"learning_rate": 5.0909358260403186e-05,
"loss": 0.0049,
"step": 10390
},
{
"epoch": 35.61643835616438,
"grad_norm": 0.04910969361662865,
"learning_rate": 5.0826697238317935e-05,
"loss": 0.0036,
"step": 10400
},
{
"epoch": 35.65068493150685,
"grad_norm": 0.04376056417822838,
"learning_rate": 5.074403395607399e-05,
"loss": 0.0032,
"step": 10410
},
{
"epoch": 35.68493150684932,
"grad_norm": 0.03833574429154396,
"learning_rate": 5.066136863966963e-05,
"loss": 0.0038,
"step": 10420
},
{
"epoch": 35.71917808219178,
"grad_norm": 0.05588557571172714,
"learning_rate": 5.057870151510864e-05,
"loss": 0.0038,
"step": 10430
},
{
"epoch": 35.75342465753425,
"grad_norm": 0.05649462342262268,
"learning_rate": 5.0496032808399815e-05,
"loss": 0.0037,
"step": 10440
},
{
"epoch": 35.78767123287671,
"grad_norm": 0.05454027280211449,
"learning_rate": 5.041336274555625e-05,
"loss": 0.0029,
"step": 10450
},
{
"epoch": 35.821917808219176,
"grad_norm": 0.05826808884739876,
"learning_rate": 5.033069155259471e-05,
"loss": 0.004,
"step": 10460
},
{
"epoch": 35.85616438356164,
"grad_norm": 0.05111277848482132,
"learning_rate": 5.02480194555351e-05,
"loss": 0.0045,
"step": 10470
},
{
"epoch": 35.89041095890411,
"grad_norm": 0.045382965356111526,
"learning_rate": 5.016534668039976e-05,
"loss": 0.0035,
"step": 10480
},
{
"epoch": 35.92465753424658,
"grad_norm": 0.04012225195765495,
"learning_rate": 5.0082673453212914e-05,
"loss": 0.0049,
"step": 10490
},
{
"epoch": 35.95890410958904,
"grad_norm": 0.05032500624656677,
"learning_rate": 5e-05,
"loss": 0.0045,
"step": 10500
},
{
"epoch": 35.99315068493151,
"grad_norm": 0.054377779364585876,
"learning_rate": 4.991732654678709e-05,
"loss": 0.0046,
"step": 10510
},
{
"epoch": 36.02739726027397,
"grad_norm": 0.07059525698423386,
"learning_rate": 4.9834653319600246e-05,
"loss": 0.0037,
"step": 10520
},
{
"epoch": 36.06164383561644,
"grad_norm": 0.07613108307123184,
"learning_rate": 4.975198054446492e-05,
"loss": 0.0043,
"step": 10530
},
{
"epoch": 36.0958904109589,
"grad_norm": 0.050086334347724915,
"learning_rate": 4.96693084474053e-05,
"loss": 0.0049,
"step": 10540
},
{
"epoch": 36.13013698630137,
"grad_norm": 0.054679181426763535,
"learning_rate": 4.9586637254443756e-05,
"loss": 0.0041,
"step": 10550
},
{
"epoch": 36.16438356164384,
"grad_norm": 0.04521835222840309,
"learning_rate": 4.950396719160018e-05,
"loss": 0.0042,
"step": 10560
},
{
"epoch": 36.1986301369863,
"grad_norm": 0.054949574172496796,
"learning_rate": 4.942129848489137e-05,
"loss": 0.0046,
"step": 10570
},
{
"epoch": 36.23287671232877,
"grad_norm": 0.05560026690363884,
"learning_rate": 4.93386313603304e-05,
"loss": 0.0041,
"step": 10580
},
{
"epoch": 36.26712328767123,
"grad_norm": 0.04978262633085251,
"learning_rate": 4.925596604392603e-05,
"loss": 0.0043,
"step": 10590
},
{
"epoch": 36.3013698630137,
"grad_norm": 0.05357494577765465,
"learning_rate": 4.917330276168208e-05,
"loss": 0.005,
"step": 10600
},
{
"epoch": 36.33561643835616,
"grad_norm": 0.06006361544132233,
"learning_rate": 4.909064173959681e-05,
"loss": 0.0034,
"step": 10610
},
{
"epoch": 36.36986301369863,
"grad_norm": 0.06323786079883575,
"learning_rate": 4.9007983203662326e-05,
"loss": 0.0043,
"step": 10620
},
{
"epoch": 36.4041095890411,
"grad_norm": 0.054876018315553665,
"learning_rate": 4.892532737986387e-05,
"loss": 0.0033,
"step": 10630
},
{
"epoch": 36.43835616438356,
"grad_norm": 0.05244192108511925,
"learning_rate": 4.884267449417931e-05,
"loss": 0.004,
"step": 10640
},
{
"epoch": 36.47260273972603,
"grad_norm": 0.03981318697333336,
"learning_rate": 4.87600247725785e-05,
"loss": 0.0038,
"step": 10650
},
{
"epoch": 36.50684931506849,
"grad_norm": 0.07409898191690445,
"learning_rate": 4.867737844102261e-05,
"loss": 0.005,
"step": 10660
},
{
"epoch": 36.54109589041096,
"grad_norm": 0.052267491817474365,
"learning_rate": 4.8594735725463567e-05,
"loss": 0.0037,
"step": 10670
},
{
"epoch": 36.57534246575342,
"grad_norm": 0.06407027691602707,
"learning_rate": 4.851209685184338e-05,
"loss": 0.004,
"step": 10680
},
{
"epoch": 36.60958904109589,
"grad_norm": 0.07134547829627991,
"learning_rate": 4.8429462046093585e-05,
"loss": 0.0033,
"step": 10690
},
{
"epoch": 36.64383561643836,
"grad_norm": 0.07874727249145508,
"learning_rate": 4.834683153413459e-05,
"loss": 0.0041,
"step": 10700
},
{
"epoch": 36.678082191780824,
"grad_norm": 0.05885026976466179,
"learning_rate": 4.826420554187506e-05,
"loss": 0.0042,
"step": 10710
},
{
"epoch": 36.71232876712329,
"grad_norm": 0.04963897913694382,
"learning_rate": 4.818158429521129e-05,
"loss": 0.0042,
"step": 10720
},
{
"epoch": 36.74657534246575,
"grad_norm": 0.062294941395521164,
"learning_rate": 4.809896802002662e-05,
"loss": 0.0048,
"step": 10730
},
{
"epoch": 36.78082191780822,
"grad_norm": 0.04608921334147453,
"learning_rate": 4.801635694219079e-05,
"loss": 0.0032,
"step": 10740
},
{
"epoch": 36.81506849315068,
"grad_norm": 0.04494161158800125,
"learning_rate": 4.7933751287559335e-05,
"loss": 0.0046,
"step": 10750
},
{
"epoch": 36.84931506849315,
"grad_norm": 0.0627615675330162,
"learning_rate": 4.785115128197298e-05,
"loss": 0.0035,
"step": 10760
},
{
"epoch": 36.88356164383562,
"grad_norm": 0.0486239530146122,
"learning_rate": 4.776855715125694e-05,
"loss": 0.0043,
"step": 10770
},
{
"epoch": 36.917808219178085,
"grad_norm": 0.04762973263859749,
"learning_rate": 4.7685969121220456e-05,
"loss": 0.0034,
"step": 10780
},
{
"epoch": 36.95205479452055,
"grad_norm": 0.04642920196056366,
"learning_rate": 4.7603387417656026e-05,
"loss": 0.0041,
"step": 10790
},
{
"epoch": 36.986301369863014,
"grad_norm": 0.04814445227384567,
"learning_rate": 4.7520812266338885e-05,
"loss": 0.0049,
"step": 10800
},
{
"epoch": 37.02054794520548,
"grad_norm": 0.04671894758939743,
"learning_rate": 4.743824389302635e-05,
"loss": 0.0039,
"step": 10810
},
{
"epoch": 37.054794520547944,
"grad_norm": 0.03668942302465439,
"learning_rate": 4.735568252345718e-05,
"loss": 0.0042,
"step": 10820
},
{
"epoch": 37.08904109589041,
"grad_norm": 0.07752338796854019,
"learning_rate": 4.7273128383351015e-05,
"loss": 0.0037,
"step": 10830
},
{
"epoch": 37.12328767123287,
"grad_norm": 0.05838659033179283,
"learning_rate": 4.7190581698407725e-05,
"loss": 0.0042,
"step": 10840
},
{
"epoch": 37.157534246575345,
"grad_norm": 0.054939839988946915,
"learning_rate": 4.710804269430681e-05,
"loss": 0.0046,
"step": 10850
},
{
"epoch": 37.19178082191781,
"grad_norm": 0.06352076679468155,
"learning_rate": 4.702551159670672e-05,
"loss": 0.0059,
"step": 10860
},
{
"epoch": 37.226027397260275,
"grad_norm": 0.060442935675382614,
"learning_rate": 4.694298863124435e-05,
"loss": 0.0049,
"step": 10870
},
{
"epoch": 37.26027397260274,
"grad_norm": 0.0606268011033535,
"learning_rate": 4.6860474023534335e-05,
"loss": 0.0043,
"step": 10880
},
{
"epoch": 37.294520547945204,
"grad_norm": 0.10119619220495224,
"learning_rate": 4.677796799916845e-05,
"loss": 0.0051,
"step": 10890
},
{
"epoch": 37.32876712328767,
"grad_norm": 0.04673849418759346,
"learning_rate": 4.669547078371504e-05,
"loss": 0.0033,
"step": 10900
},
{
"epoch": 37.363013698630134,
"grad_norm": 0.04612864926457405,
"learning_rate": 4.66129826027183e-05,
"loss": 0.0034,
"step": 10910
},
{
"epoch": 37.397260273972606,
"grad_norm": 0.05315816029906273,
"learning_rate": 4.65305036816978e-05,
"loss": 0.0038,
"step": 10920
},
{
"epoch": 37.43150684931507,
"grad_norm": 0.04591621458530426,
"learning_rate": 4.6448034246147754e-05,
"loss": 0.005,
"step": 10930
},
{
"epoch": 37.465753424657535,
"grad_norm": 0.05279077589511871,
"learning_rate": 4.6365574521536445e-05,
"loss": 0.0038,
"step": 10940
},
{
"epoch": 37.5,
"grad_norm": 0.05146599933505058,
"learning_rate": 4.6283124733305624e-05,
"loss": 0.0043,
"step": 10950
},
{
"epoch": 37.534246575342465,
"grad_norm": 0.05392171069979668,
"learning_rate": 4.620068510686985e-05,
"loss": 0.0034,
"step": 10960
},
{
"epoch": 37.56849315068493,
"grad_norm": 0.05198705196380615,
"learning_rate": 4.611825586761591e-05,
"loss": 0.0041,
"step": 10970
},
{
"epoch": 37.602739726027394,
"grad_norm": 0.046402864158153534,
"learning_rate": 4.60358372409022e-05,
"loss": 0.003,
"step": 10980
},
{
"epoch": 37.636986301369866,
"grad_norm": 0.06605874747037888,
"learning_rate": 4.5953429452058135e-05,
"loss": 0.0053,
"step": 10990
},
{
"epoch": 37.67123287671233,
"grad_norm": 0.04339564964175224,
"learning_rate": 4.5871032726383386e-05,
"loss": 0.0034,
"step": 11000
},
{
"epoch": 37.705479452054796,
"grad_norm": 0.0516190268099308,
"learning_rate": 4.5788647289147516e-05,
"loss": 0.0036,
"step": 11010
},
{
"epoch": 37.73972602739726,
"grad_norm": 0.07312241941690445,
"learning_rate": 4.570627336558915e-05,
"loss": 0.0035,
"step": 11020
},
{
"epoch": 37.773972602739725,
"grad_norm": 0.03444405645132065,
"learning_rate": 4.562391118091544e-05,
"loss": 0.0029,
"step": 11030
},
{
"epoch": 37.80821917808219,
"grad_norm": 0.03673717379570007,
"learning_rate": 4.554156096030149e-05,
"loss": 0.0035,
"step": 11040
},
{
"epoch": 37.842465753424655,
"grad_norm": 0.04546872526407242,
"learning_rate": 4.545922292888959e-05,
"loss": 0.0045,
"step": 11050
},
{
"epoch": 37.87671232876713,
"grad_norm": 0.041294973343610764,
"learning_rate": 4.537689731178883e-05,
"loss": 0.0036,
"step": 11060
},
{
"epoch": 37.91095890410959,
"grad_norm": 0.04516911506652832,
"learning_rate": 4.529458433407429e-05,
"loss": 0.0054,
"step": 11070
},
{
"epoch": 37.945205479452056,
"grad_norm": 0.05957014858722687,
"learning_rate": 4.5212284220786494e-05,
"loss": 0.0032,
"step": 11080
},
{
"epoch": 37.97945205479452,
"grad_norm": 0.05336372181773186,
"learning_rate": 4.5129997196930845e-05,
"loss": 0.0047,
"step": 11090
},
{
"epoch": 38.013698630136986,
"grad_norm": 0.05886836349964142,
"learning_rate": 4.504772348747687e-05,
"loss": 0.0039,
"step": 11100
},
{
"epoch": 38.04794520547945,
"grad_norm": 0.05502059683203697,
"learning_rate": 4.496546331735778e-05,
"loss": 0.0043,
"step": 11110
},
{
"epoch": 38.082191780821915,
"grad_norm": 0.04685702919960022,
"learning_rate": 4.488321691146975e-05,
"loss": 0.0038,
"step": 11120
},
{
"epoch": 38.11643835616438,
"grad_norm": 0.0464017279446125,
"learning_rate": 4.480098449467132e-05,
"loss": 0.0062,
"step": 11130
},
{
"epoch": 38.15068493150685,
"grad_norm": 0.043533407151699066,
"learning_rate": 4.471876629178273e-05,
"loss": 0.0034,
"step": 11140
},
{
"epoch": 38.18493150684932,
"grad_norm": 0.04644559323787689,
"learning_rate": 4.463656252758542e-05,
"loss": 0.0035,
"step": 11150
},
{
"epoch": 38.21917808219178,
"grad_norm": 0.05499809607863426,
"learning_rate": 4.4554373426821374e-05,
"loss": 0.0036,
"step": 11160
},
{
"epoch": 38.25342465753425,
"grad_norm": 0.07476824522018433,
"learning_rate": 4.447219921419244e-05,
"loss": 0.005,
"step": 11170
},
{
"epoch": 38.28767123287671,
"grad_norm": 0.07308005541563034,
"learning_rate": 4.439004011435979e-05,
"loss": 0.0035,
"step": 11180
},
{
"epoch": 38.321917808219176,
"grad_norm": 0.045294955372810364,
"learning_rate": 4.430789635194324e-05,
"loss": 0.0031,
"step": 11190
},
{
"epoch": 38.35616438356164,
"grad_norm": 0.05321976915001869,
"learning_rate": 4.4225768151520694e-05,
"loss": 0.0049,
"step": 11200
},
{
"epoch": 38.39041095890411,
"grad_norm": 0.05076253041625023,
"learning_rate": 4.414365573762755e-05,
"loss": 0.0051,
"step": 11210
},
{
"epoch": 38.42465753424658,
"grad_norm": 0.054397087544202805,
"learning_rate": 4.406155933475599e-05,
"loss": 0.0053,
"step": 11220
},
{
"epoch": 38.45890410958904,
"grad_norm": 0.05751054361462593,
"learning_rate": 4.3979479167354477e-05,
"loss": 0.0035,
"step": 11230
},
{
"epoch": 38.49315068493151,
"grad_norm": 0.050182171165943146,
"learning_rate": 4.3897415459827e-05,
"loss": 0.0031,
"step": 11240
},
{
"epoch": 38.52739726027397,
"grad_norm": 0.04209424555301666,
"learning_rate": 4.381536843653262e-05,
"loss": 0.004,
"step": 11250
},
{
"epoch": 38.56164383561644,
"grad_norm": 0.04507875442504883,
"learning_rate": 4.373333832178478e-05,
"loss": 0.0036,
"step": 11260
},
{
"epoch": 38.5958904109589,
"grad_norm": 0.052286434918642044,
"learning_rate": 4.365132533985071e-05,
"loss": 0.0041,
"step": 11270
},
{
"epoch": 38.63013698630137,
"grad_norm": 0.04619704186916351,
"learning_rate": 4.3569329714950704e-05,
"loss": 0.0036,
"step": 11280
},
{
"epoch": 38.66438356164384,
"grad_norm": 0.0394359715282917,
"learning_rate": 4.348735167125771e-05,
"loss": 0.0031,
"step": 11290
},
{
"epoch": 38.6986301369863,
"grad_norm": 0.06122252345085144,
"learning_rate": 4.3405391432896555e-05,
"loss": 0.0032,
"step": 11300
},
{
"epoch": 38.73287671232877,
"grad_norm": 0.043462276458740234,
"learning_rate": 4.3323449223943416e-05,
"loss": 0.0031,
"step": 11310
},
{
"epoch": 38.76712328767123,
"grad_norm": 0.04255270957946777,
"learning_rate": 4.324152526842517e-05,
"loss": 0.0034,
"step": 11320
},
{
"epoch": 38.8013698630137,
"grad_norm": 0.042190827429294586,
"learning_rate": 4.315961979031875e-05,
"loss": 0.0043,
"step": 11330
},
{
"epoch": 38.83561643835616,
"grad_norm": 0.06325828284025192,
"learning_rate": 4.307773301355062e-05,
"loss": 0.0051,
"step": 11340
},
{
"epoch": 38.86986301369863,
"grad_norm": 0.04807128384709358,
"learning_rate": 4.2995865161996105e-05,
"loss": 0.0039,
"step": 11350
},
{
"epoch": 38.9041095890411,
"grad_norm": 0.05032104253768921,
"learning_rate": 4.291401645947879e-05,
"loss": 0.0038,
"step": 11360
},
{
"epoch": 38.93835616438356,
"grad_norm": 0.060034602880477905,
"learning_rate": 4.283218712976992e-05,
"loss": 0.0035,
"step": 11370
},
{
"epoch": 38.97260273972603,
"grad_norm": 0.04731849208474159,
"learning_rate": 4.275037739658771e-05,
"loss": 0.0036,
"step": 11380
},
{
"epoch": 39.00684931506849,
"grad_norm": 0.054858967661857605,
"learning_rate": 4.2668587483596864e-05,
"loss": 0.004,
"step": 11390
},
{
"epoch": 39.04109589041096,
"grad_norm": 0.06338762491941452,
"learning_rate": 4.2586817614407895e-05,
"loss": 0.0036,
"step": 11400
},
{
"epoch": 39.07534246575342,
"grad_norm": 0.03571184724569321,
"learning_rate": 4.250506801257653e-05,
"loss": 0.0036,
"step": 11410
},
{
"epoch": 39.10958904109589,
"grad_norm": 0.03335060179233551,
"learning_rate": 4.2423338901602985e-05,
"loss": 0.0033,
"step": 11420
},
{
"epoch": 39.14383561643836,
"grad_norm": 0.04027834162116051,
"learning_rate": 4.234163050493158e-05,
"loss": 0.0042,
"step": 11430
},
{
"epoch": 39.178082191780824,
"grad_norm": 0.049982279539108276,
"learning_rate": 4.2259943045949934e-05,
"loss": 0.0041,
"step": 11440
},
{
"epoch": 39.21232876712329,
"grad_norm": 0.06824607402086258,
"learning_rate": 4.2178276747988446e-05,
"loss": 0.004,
"step": 11450
},
{
"epoch": 39.24657534246575,
"grad_norm": 0.04790511727333069,
"learning_rate": 4.209663183431969e-05,
"loss": 0.0037,
"step": 11460
},
{
"epoch": 39.28082191780822,
"grad_norm": 0.07713089883327484,
"learning_rate": 4.201500852815768e-05,
"loss": 0.0037,
"step": 11470
},
{
"epoch": 39.31506849315068,
"grad_norm": 0.0667371153831482,
"learning_rate": 4.1933407052657456e-05,
"loss": 0.0048,
"step": 11480
},
{
"epoch": 39.34931506849315,
"grad_norm": 0.05451853573322296,
"learning_rate": 4.1851827630914305e-05,
"loss": 0.0034,
"step": 11490
},
{
"epoch": 39.38356164383562,
"grad_norm": 0.05602137744426727,
"learning_rate": 4.17702704859633e-05,
"loss": 0.0035,
"step": 11500
},
{
"epoch": 39.417808219178085,
"grad_norm": 0.07803839445114136,
"learning_rate": 4.1688735840778546e-05,
"loss": 0.0033,
"step": 11510
},
{
"epoch": 39.45205479452055,
"grad_norm": 0.04363902285695076,
"learning_rate": 4.160722391827262e-05,
"loss": 0.0034,
"step": 11520
},
{
"epoch": 39.486301369863014,
"grad_norm": 0.06316430121660233,
"learning_rate": 4.1525734941296026e-05,
"loss": 0.0037,
"step": 11530
},
{
"epoch": 39.52054794520548,
"grad_norm": 0.09381528198719025,
"learning_rate": 4.14442691326365e-05,
"loss": 0.005,
"step": 11540
},
{
"epoch": 39.554794520547944,
"grad_norm": 0.0672030970454216,
"learning_rate": 4.13628267150185e-05,
"loss": 0.0039,
"step": 11550
},
{
"epoch": 39.58904109589041,
"grad_norm": 0.05352475121617317,
"learning_rate": 4.1281407911102425e-05,
"loss": 0.0032,
"step": 11560
},
{
"epoch": 39.62328767123287,
"grad_norm": 0.05327161028981209,
"learning_rate": 4.120001294348421e-05,
"loss": 0.0033,
"step": 11570
},
{
"epoch": 39.657534246575345,
"grad_norm": 0.04990739747881889,
"learning_rate": 4.111864203469457e-05,
"loss": 0.0041,
"step": 11580
},
{
"epoch": 39.69178082191781,
"grad_norm": 0.04503406584262848,
"learning_rate": 4.103729540719847e-05,
"loss": 0.0034,
"step": 11590
},
{
"epoch": 39.726027397260275,
"grad_norm": 0.05482426658272743,
"learning_rate": 4.095597328339452e-05,
"loss": 0.0063,
"step": 11600
},
{
"epoch": 39.76027397260274,
"grad_norm": 0.047005269676446915,
"learning_rate": 4.087467588561424e-05,
"loss": 0.0028,
"step": 11610
},
{
"epoch": 39.794520547945204,
"grad_norm": 0.05959664657711983,
"learning_rate": 4.079340343612165e-05,
"loss": 0.0039,
"step": 11620
},
{
"epoch": 39.82876712328767,
"grad_norm": 0.06386947631835938,
"learning_rate": 4.07121561571125e-05,
"loss": 0.0027,
"step": 11630
},
{
"epoch": 39.863013698630134,
"grad_norm": 0.07454657554626465,
"learning_rate": 4.063093427071376e-05,
"loss": 0.004,
"step": 11640
},
{
"epoch": 39.897260273972606,
"grad_norm": 0.04651208966970444,
"learning_rate": 4.0549737998983e-05,
"loss": 0.0036,
"step": 11650
},
{
"epoch": 39.93150684931507,
"grad_norm": 0.04951368644833565,
"learning_rate": 4.046856756390767e-05,
"loss": 0.0042,
"step": 11660
},
{
"epoch": 39.965753424657535,
"grad_norm": 0.047568511217832565,
"learning_rate": 4.038742318740465e-05,
"loss": 0.003,
"step": 11670
},
{
"epoch": 40.0,
"grad_norm": 0.04516076669096947,
"learning_rate": 4.0306305091319595e-05,
"loss": 0.0043,
"step": 11680
},
{
"epoch": 40.034246575342465,
"grad_norm": 0.05222661420702934,
"learning_rate": 4.0225213497426276e-05,
"loss": 0.0049,
"step": 11690
},
{
"epoch": 40.06849315068493,
"grad_norm": 0.04708956927061081,
"learning_rate": 4.0144148627425993e-05,
"loss": 0.0036,
"step": 11700
},
{
"epoch": 40.102739726027394,
"grad_norm": 0.05811784416437149,
"learning_rate": 4.006311070294702e-05,
"loss": 0.0047,
"step": 11710
},
{
"epoch": 40.136986301369866,
"grad_norm": 0.05184715986251831,
"learning_rate": 3.9982099945543945e-05,
"loss": 0.0044,
"step": 11720
},
{
"epoch": 40.17123287671233,
"grad_norm": 0.05277574062347412,
"learning_rate": 3.9901116576697083e-05,
"loss": 0.0045,
"step": 11730
},
{
"epoch": 40.205479452054796,
"grad_norm": 0.038952380418777466,
"learning_rate": 3.982016081781189e-05,
"loss": 0.0032,
"step": 11740
},
{
"epoch": 40.23972602739726,
"grad_norm": 0.059717144817113876,
"learning_rate": 3.973923289021829e-05,
"loss": 0.0031,
"step": 11750
},
{
"epoch": 40.273972602739725,
"grad_norm": 0.06439585983753204,
"learning_rate": 3.965833301517017e-05,
"loss": 0.0047,
"step": 11760
},
{
"epoch": 40.30821917808219,
"grad_norm": 0.05810396373271942,
"learning_rate": 3.9577461413844684e-05,
"loss": 0.0044,
"step": 11770
},
{
"epoch": 40.342465753424655,
"grad_norm": 0.043367937207221985,
"learning_rate": 3.949661830734172e-05,
"loss": 0.003,
"step": 11780
},
{
"epoch": 40.37671232876713,
"grad_norm": 0.046338461339473724,
"learning_rate": 3.9415803916683224e-05,
"loss": 0.0045,
"step": 11790
},
{
"epoch": 40.41095890410959,
"grad_norm": 0.04636251553893089,
"learning_rate": 3.933501846281267e-05,
"loss": 0.0038,
"step": 11800
},
{
"epoch": 40.445205479452056,
"grad_norm": 0.03807734698057175,
"learning_rate": 3.925426216659438e-05,
"loss": 0.0028,
"step": 11810
},
{
"epoch": 40.47945205479452,
"grad_norm": 0.0459088459610939,
"learning_rate": 3.917353524881302e-05,
"loss": 0.0032,
"step": 11820
},
{
"epoch": 40.513698630136986,
"grad_norm": 0.032536957412958145,
"learning_rate": 3.9092837930172884e-05,
"loss": 0.0029,
"step": 11830
},
{
"epoch": 40.54794520547945,
"grad_norm": 0.03757496923208237,
"learning_rate": 3.901217043129735e-05,
"loss": 0.0031,
"step": 11840
},
{
"epoch": 40.582191780821915,
"grad_norm": 0.048789843916893005,
"learning_rate": 3.8931532972728285e-05,
"loss": 0.003,
"step": 11850
},
{
"epoch": 40.61643835616438,
"grad_norm": 0.07031738758087158,
"learning_rate": 3.8850925774925425e-05,
"loss": 0.0037,
"step": 11860
},
{
"epoch": 40.65068493150685,
"grad_norm": 0.05289865657687187,
"learning_rate": 3.877034905826577e-05,
"loss": 0.0043,
"step": 11870
},
{
"epoch": 40.68493150684932,
"grad_norm": 0.044006284326314926,
"learning_rate": 3.8689803043043e-05,
"loss": 0.0038,
"step": 11880
},
{
"epoch": 40.71917808219178,
"grad_norm": 0.041351962834596634,
"learning_rate": 3.860928794946682e-05,
"loss": 0.0033,
"step": 11890
},
{
"epoch": 40.75342465753425,
"grad_norm": 0.04072045534849167,
"learning_rate": 3.852880399766243e-05,
"loss": 0.0037,
"step": 11900
},
{
"epoch": 40.78767123287671,
"grad_norm": 0.06456096470355988,
"learning_rate": 3.844835140766988e-05,
"loss": 0.0038,
"step": 11910
},
{
"epoch": 40.821917808219176,
"grad_norm": 0.05244871601462364,
"learning_rate": 3.836793039944349e-05,
"loss": 0.0033,
"step": 11920
},
{
"epoch": 40.85616438356164,
"grad_norm": 0.04571353644132614,
"learning_rate": 3.828754119285123e-05,
"loss": 0.0031,
"step": 11930
},
{
"epoch": 40.89041095890411,
"grad_norm": 0.03916588053107262,
"learning_rate": 3.820718400767409e-05,
"loss": 0.0034,
"step": 11940
},
{
"epoch": 40.92465753424658,
"grad_norm": 0.03788131847977638,
"learning_rate": 3.812685906360557e-05,
"loss": 0.004,
"step": 11950
},
{
"epoch": 40.95890410958904,
"grad_norm": 0.0459442101418972,
"learning_rate": 3.8046566580251e-05,
"loss": 0.0031,
"step": 11960
},
{
"epoch": 40.99315068493151,
"grad_norm": 0.058368634432554245,
"learning_rate": 3.796630677712697e-05,
"loss": 0.0029,
"step": 11970
},
{
"epoch": 41.02739726027397,
"grad_norm": 0.04359531030058861,
"learning_rate": 3.788607987366069e-05,
"loss": 0.0039,
"step": 11980
},
{
"epoch": 41.06164383561644,
"grad_norm": 0.04517539590597153,
"learning_rate": 3.780588608918947e-05,
"loss": 0.0025,
"step": 11990
},
{
"epoch": 41.0958904109589,
"grad_norm": 0.0433451309800148,
"learning_rate": 3.772572564296005e-05,
"loss": 0.0034,
"step": 12000
},
{
"epoch": 41.13013698630137,
"grad_norm": 0.04753587022423744,
"learning_rate": 3.764559875412803e-05,
"loss": 0.0037,
"step": 12010
},
{
"epoch": 41.16438356164384,
"grad_norm": 0.06799773871898651,
"learning_rate": 3.756550564175727e-05,
"loss": 0.0033,
"step": 12020
},
{
"epoch": 41.1986301369863,
"grad_norm": 0.05588128790259361,
"learning_rate": 3.748544652481927e-05,
"loss": 0.0044,
"step": 12030
},
{
"epoch": 41.23287671232877,
"grad_norm": 0.06136851757764816,
"learning_rate": 3.74054216221926e-05,
"loss": 0.0029,
"step": 12040
},
{
"epoch": 41.26712328767123,
"grad_norm": 0.052555352449417114,
"learning_rate": 3.73254311526623e-05,
"loss": 0.0039,
"step": 12050
},
{
"epoch": 41.3013698630137,
"grad_norm": 0.04874827712774277,
"learning_rate": 3.7245475334919246e-05,
"loss": 0.0036,
"step": 12060
},
{
"epoch": 41.33561643835616,
"grad_norm": 0.047369617968797684,
"learning_rate": 3.716555438755961e-05,
"loss": 0.0034,
"step": 12070
},
{
"epoch": 41.36986301369863,
"grad_norm": 0.04781021550297737,
"learning_rate": 3.7085668529084184e-05,
"loss": 0.0035,
"step": 12080
},
{
"epoch": 41.4041095890411,
"grad_norm": 0.05383175238966942,
"learning_rate": 3.700581797789786e-05,
"loss": 0.0037,
"step": 12090
},
{
"epoch": 41.43835616438356,
"grad_norm": 0.03811519220471382,
"learning_rate": 3.6926002952309016e-05,
"loss": 0.0037,
"step": 12100
},
{
"epoch": 41.47260273972603,
"grad_norm": 0.03701299428939819,
"learning_rate": 3.684622367052887e-05,
"loss": 0.0049,
"step": 12110
},
{
"epoch": 41.50684931506849,
"grad_norm": 0.03665095567703247,
"learning_rate": 3.676648035067093e-05,
"loss": 0.0041,
"step": 12120
},
{
"epoch": 41.54109589041096,
"grad_norm": 0.043359316885471344,
"learning_rate": 3.6686773210750385e-05,
"loss": 0.0034,
"step": 12130
},
{
"epoch": 41.57534246575342,
"grad_norm": 0.03668743371963501,
"learning_rate": 3.6607102468683526e-05,
"loss": 0.0033,
"step": 12140
},
{
"epoch": 41.60958904109589,
"grad_norm": 0.033484235405921936,
"learning_rate": 3.65274683422871e-05,
"loss": 0.0044,
"step": 12150
},
{
"epoch": 41.64383561643836,
"grad_norm": 0.04815152660012245,
"learning_rate": 3.6447871049277796e-05,
"loss": 0.0038,
"step": 12160
},
{
"epoch": 41.678082191780824,
"grad_norm": 0.05247364938259125,
"learning_rate": 3.636831080727154e-05,
"loss": 0.0033,
"step": 12170
},
{
"epoch": 41.71232876712329,
"grad_norm": 0.051211703568696976,
"learning_rate": 3.628878783378302e-05,
"loss": 0.0035,
"step": 12180
},
{
"epoch": 41.74657534246575,
"grad_norm": 0.056743912398815155,
"learning_rate": 3.6209302346225006e-05,
"loss": 0.0044,
"step": 12190
},
{
"epoch": 41.78082191780822,
"grad_norm": 0.044049229472875595,
"learning_rate": 3.612985456190778e-05,
"loss": 0.0036,
"step": 12200
},
{
"epoch": 41.81506849315068,
"grad_norm": 0.04488535597920418,
"learning_rate": 3.605044469803854e-05,
"loss": 0.0036,
"step": 12210
},
{
"epoch": 41.84931506849315,
"grad_norm": 0.05490657687187195,
"learning_rate": 3.597107297172084e-05,
"loss": 0.0049,
"step": 12220
},
{
"epoch": 41.88356164383562,
"grad_norm": 0.05091523379087448,
"learning_rate": 3.5891739599953945e-05,
"loss": 0.0035,
"step": 12230
},
{
"epoch": 41.917808219178085,
"grad_norm": 0.04404463246464729,
"learning_rate": 3.581244479963225e-05,
"loss": 0.0034,
"step": 12240
},
{
"epoch": 41.95205479452055,
"grad_norm": 0.05935285985469818,
"learning_rate": 3.5733188787544745e-05,
"loss": 0.0044,
"step": 12250
},
{
"epoch": 41.986301369863014,
"grad_norm": 0.048552870750427246,
"learning_rate": 3.5653971780374295e-05,
"loss": 0.0038,
"step": 12260
},
{
"epoch": 42.02054794520548,
"grad_norm": 0.06793242692947388,
"learning_rate": 3.557479399469721e-05,
"loss": 0.0049,
"step": 12270
},
{
"epoch": 42.054794520547944,
"grad_norm": 0.05885540693998337,
"learning_rate": 3.5495655646982505e-05,
"loss": 0.004,
"step": 12280
},
{
"epoch": 42.08904109589041,
"grad_norm": 0.05235716328024864,
"learning_rate": 3.541655695359142e-05,
"loss": 0.0045,
"step": 12290
},
{
"epoch": 42.12328767123287,
"grad_norm": 0.05270376428961754,
"learning_rate": 3.533749813077677e-05,
"loss": 0.0039,
"step": 12300
},
{
"epoch": 42.157534246575345,
"grad_norm": 0.06084022298455238,
"learning_rate": 3.525847939468233e-05,
"loss": 0.0041,
"step": 12310
},
{
"epoch": 42.19178082191781,
"grad_norm": 0.04897018149495125,
"learning_rate": 3.517950096134232e-05,
"loss": 0.003,
"step": 12320
},
{
"epoch": 42.226027397260275,
"grad_norm": 0.04482729360461235,
"learning_rate": 3.5100563046680764e-05,
"loss": 0.0035,
"step": 12330
},
{
"epoch": 42.26027397260274,
"grad_norm": 0.03667287901043892,
"learning_rate": 3.5021665866510925e-05,
"loss": 0.0038,
"step": 12340
},
{
"epoch": 42.294520547945204,
"grad_norm": 0.03215208277106285,
"learning_rate": 3.494280963653463e-05,
"loss": 0.0042,
"step": 12350
},
{
"epoch": 42.32876712328767,
"grad_norm": 0.061448678374290466,
"learning_rate": 3.4863994572341843e-05,
"loss": 0.0042,
"step": 12360
},
{
"epoch": 42.363013698630134,
"grad_norm": 0.043668776750564575,
"learning_rate": 3.478522088940993e-05,
"loss": 0.0034,
"step": 12370
},
{
"epoch": 42.397260273972606,
"grad_norm": 0.05667470395565033,
"learning_rate": 3.470648880310313e-05,
"loss": 0.0048,
"step": 12380
},
{
"epoch": 42.43150684931507,
"grad_norm": 0.04313276335597038,
"learning_rate": 3.462779852867197e-05,
"loss": 0.0031,
"step": 12390
},
{
"epoch": 42.465753424657535,
"grad_norm": 0.04941844195127487,
"learning_rate": 3.4549150281252636e-05,
"loss": 0.0037,
"step": 12400
},
{
"epoch": 42.5,
"grad_norm": 0.04372037574648857,
"learning_rate": 3.447054427586644e-05,
"loss": 0.004,
"step": 12410
},
{
"epoch": 42.534246575342465,
"grad_norm": 0.03683243691921234,
"learning_rate": 3.439198072741921e-05,
"loss": 0.0032,
"step": 12420
},
{
"epoch": 42.56849315068493,
"grad_norm": 0.03871821239590645,
"learning_rate": 3.431345985070067e-05,
"loss": 0.0033,
"step": 12430
},
{
"epoch": 42.602739726027394,
"grad_norm": 0.04553896188735962,
"learning_rate": 3.423498186038393e-05,
"loss": 0.003,
"step": 12440
},
{
"epoch": 42.636986301369866,
"grad_norm": 0.042918987572193146,
"learning_rate": 3.4156546971024784e-05,
"loss": 0.0029,
"step": 12450
},
{
"epoch": 42.67123287671233,
"grad_norm": 0.04090527072548866,
"learning_rate": 3.407815539706124e-05,
"loss": 0.0036,
"step": 12460
},
{
"epoch": 42.705479452054796,
"grad_norm": 0.059398066252470016,
"learning_rate": 3.399980735281286e-05,
"loss": 0.0032,
"step": 12470
},
{
"epoch": 42.73972602739726,
"grad_norm": 0.050417881458997726,
"learning_rate": 3.392150305248024e-05,
"loss": 0.0033,
"step": 12480
},
{
"epoch": 42.773972602739725,
"grad_norm": 0.05215463042259216,
"learning_rate": 3.384324271014429e-05,
"loss": 0.004,
"step": 12490
},
{
"epoch": 42.80821917808219,
"grad_norm": 0.04774712771177292,
"learning_rate": 3.3765026539765834e-05,
"loss": 0.0041,
"step": 12500
},
{
"epoch": 42.842465753424655,
"grad_norm": 0.04074811935424805,
"learning_rate": 3.368685475518488e-05,
"loss": 0.0031,
"step": 12510
},
{
"epoch": 42.87671232876713,
"grad_norm": 0.04318001866340637,
"learning_rate": 3.360872757012011e-05,
"loss": 0.0034,
"step": 12520
},
{
"epoch": 42.91095890410959,
"grad_norm": 0.04521534591913223,
"learning_rate": 3.3530645198168295e-05,
"loss": 0.0039,
"step": 12530
},
{
"epoch": 42.945205479452056,
"grad_norm": 0.045407313853502274,
"learning_rate": 3.3452607852803584e-05,
"loss": 0.0046,
"step": 12540
},
{
"epoch": 42.97945205479452,
"grad_norm": 0.03356282040476799,
"learning_rate": 3.337461574737716e-05,
"loss": 0.0033,
"step": 12550
},
{
"epoch": 43.013698630136986,
"grad_norm": 0.03246859833598137,
"learning_rate": 3.329666909511645e-05,
"loss": 0.0036,
"step": 12560
},
{
"epoch": 43.04794520547945,
"grad_norm": 0.054344963282346725,
"learning_rate": 3.321876810912461e-05,
"loss": 0.0037,
"step": 12570
},
{
"epoch": 43.082191780821915,
"grad_norm": 0.04024481028318405,
"learning_rate": 3.3140913002379995e-05,
"loss": 0.0031,
"step": 12580
},
{
"epoch": 43.11643835616438,
"grad_norm": 0.036860208958387375,
"learning_rate": 3.3063103987735433e-05,
"loss": 0.0033,
"step": 12590
},
{
"epoch": 43.15068493150685,
"grad_norm": 0.036103636026382446,
"learning_rate": 3.298534127791785e-05,
"loss": 0.0028,
"step": 12600
},
{
"epoch": 43.18493150684932,
"grad_norm": 0.047203242778778076,
"learning_rate": 3.2907625085527503e-05,
"loss": 0.0034,
"step": 12610
},
{
"epoch": 43.21917808219178,
"grad_norm": 0.034795694053173065,
"learning_rate": 3.282995562303754e-05,
"loss": 0.0041,
"step": 12620
},
{
"epoch": 43.25342465753425,
"grad_norm": 0.041322045028209686,
"learning_rate": 3.275233310279321e-05,
"loss": 0.003,
"step": 12630
},
{
"epoch": 43.28767123287671,
"grad_norm": 0.03881575167179108,
"learning_rate": 3.267475773701161e-05,
"loss": 0.0036,
"step": 12640
},
{
"epoch": 43.321917808219176,
"grad_norm": 0.0376458577811718,
"learning_rate": 3.2597229737780774e-05,
"loss": 0.0042,
"step": 12650
},
{
"epoch": 43.35616438356164,
"grad_norm": 0.06743606925010681,
"learning_rate": 3.251974931705933e-05,
"loss": 0.0043,
"step": 12660
},
{
"epoch": 43.39041095890411,
"grad_norm": 0.04050120338797569,
"learning_rate": 3.244231668667578e-05,
"loss": 0.0031,
"step": 12670
},
{
"epoch": 43.42465753424658,
"grad_norm": 0.03748546540737152,
"learning_rate": 3.236493205832795e-05,
"loss": 0.0026,
"step": 12680
},
{
"epoch": 43.45890410958904,
"grad_norm": 0.026954837143421173,
"learning_rate": 3.228759564358248e-05,
"loss": 0.0042,
"step": 12690
},
{
"epoch": 43.49315068493151,
"grad_norm": 0.03144029900431633,
"learning_rate": 3.221030765387417e-05,
"loss": 0.0044,
"step": 12700
},
{
"epoch": 43.52739726027397,
"grad_norm": 0.03990132361650467,
"learning_rate": 3.2133068300505455e-05,
"loss": 0.0059,
"step": 12710
},
{
"epoch": 43.56164383561644,
"grad_norm": 0.03905215859413147,
"learning_rate": 3.205587779464576e-05,
"loss": 0.0041,
"step": 12720
},
{
"epoch": 43.5958904109589,
"grad_norm": 0.0459834523499012,
"learning_rate": 3.197873634733096e-05,
"loss": 0.0036,
"step": 12730
},
{
"epoch": 43.63013698630137,
"grad_norm": 0.04976179450750351,
"learning_rate": 3.190164416946285e-05,
"loss": 0.0059,
"step": 12740
},
{
"epoch": 43.66438356164384,
"grad_norm": 0.05985915660858154,
"learning_rate": 3.18246014718085e-05,
"loss": 0.004,
"step": 12750
},
{
"epoch": 43.6986301369863,
"grad_norm": 0.06645234674215317,
"learning_rate": 3.1747608464999725e-05,
"loss": 0.0047,
"step": 12760
},
{
"epoch": 43.73287671232877,
"grad_norm": 0.05306819826364517,
"learning_rate": 3.167066535953242e-05,
"loss": 0.0046,
"step": 12770
},
{
"epoch": 43.76712328767123,
"grad_norm": 0.0343354307115078,
"learning_rate": 3.1593772365766105e-05,
"loss": 0.0024,
"step": 12780
},
{
"epoch": 43.8013698630137,
"grad_norm": 0.06609547883272171,
"learning_rate": 3.1516929693923315e-05,
"loss": 0.0038,
"step": 12790
},
{
"epoch": 43.83561643835616,
"grad_norm": 0.030476383864879608,
"learning_rate": 3.144013755408895e-05,
"loss": 0.0046,
"step": 12800
},
{
"epoch": 43.86986301369863,
"grad_norm": 0.03590785339474678,
"learning_rate": 3.136339615620985e-05,
"loss": 0.0033,
"step": 12810
},
{
"epoch": 43.9041095890411,
"grad_norm": 0.0393015556037426,
"learning_rate": 3.128670571009399e-05,
"loss": 0.0033,
"step": 12820
},
{
"epoch": 43.93835616438356,
"grad_norm": 0.04334140568971634,
"learning_rate": 3.121006642541014e-05,
"loss": 0.0033,
"step": 12830
},
{
"epoch": 43.97260273972603,
"grad_norm": 0.04587549716234207,
"learning_rate": 3.113347851168721e-05,
"loss": 0.0049,
"step": 12840
},
{
"epoch": 44.00684931506849,
"grad_norm": 0.06654663383960724,
"learning_rate": 3.105694217831361e-05,
"loss": 0.0032,
"step": 12850
},
{
"epoch": 44.04109589041096,
"grad_norm": 0.049101341515779495,
"learning_rate": 3.098045763453678e-05,
"loss": 0.0036,
"step": 12860
},
{
"epoch": 44.07534246575342,
"grad_norm": 0.0578986294567585,
"learning_rate": 3.090402508946249e-05,
"loss": 0.0041,
"step": 12870
},
{
"epoch": 44.10958904109589,
"grad_norm": 0.05904621630907059,
"learning_rate": 3.082764475205442e-05,
"loss": 0.0043,
"step": 12880
},
{
"epoch": 44.14383561643836,
"grad_norm": 0.03861738368868828,
"learning_rate": 3.075131683113352e-05,
"loss": 0.0032,
"step": 12890
},
{
"epoch": 44.178082191780824,
"grad_norm": 0.032082729041576385,
"learning_rate": 3.0675041535377405e-05,
"loss": 0.003,
"step": 12900
},
{
"epoch": 44.21232876712329,
"grad_norm": 0.06508829444646835,
"learning_rate": 3.059881907331979e-05,
"loss": 0.0037,
"step": 12910
},
{
"epoch": 44.24657534246575,
"grad_norm": 0.04459870606660843,
"learning_rate": 3.052264965335e-05,
"loss": 0.0046,
"step": 12920
},
{
"epoch": 44.28082191780822,
"grad_norm": 0.05662244185805321,
"learning_rate": 3.0446533483712304e-05,
"loss": 0.0053,
"step": 12930
},
{
"epoch": 44.31506849315068,
"grad_norm": 0.05471611022949219,
"learning_rate": 3.0370470772505433e-05,
"loss": 0.004,
"step": 12940
},
{
"epoch": 44.34931506849315,
"grad_norm": 0.053697649389505386,
"learning_rate": 3.0294461727681932e-05,
"loss": 0.0034,
"step": 12950
},
{
"epoch": 44.38356164383562,
"grad_norm": 0.05044024437665939,
"learning_rate": 3.0218506557047598e-05,
"loss": 0.0047,
"step": 12960
},
{
"epoch": 44.417808219178085,
"grad_norm": 0.04237433522939682,
"learning_rate": 3.0142605468260978e-05,
"loss": 0.0026,
"step": 12970
},
{
"epoch": 44.45205479452055,
"grad_norm": 0.041244782507419586,
"learning_rate": 3.006675866883275e-05,
"loss": 0.003,
"step": 12980
},
{
"epoch": 44.486301369863014,
"grad_norm": 0.031918175518512726,
"learning_rate": 2.999096636612518e-05,
"loss": 0.0026,
"step": 12990
},
{
"epoch": 44.52054794520548,
"grad_norm": 0.03653609752655029,
"learning_rate": 2.991522876735154e-05,
"loss": 0.0026,
"step": 13000
},
{
"epoch": 44.554794520547944,
"grad_norm": 0.05688665062189102,
"learning_rate": 2.9839546079575497e-05,
"loss": 0.0044,
"step": 13010
},
{
"epoch": 44.58904109589041,
"grad_norm": 0.060732051730155945,
"learning_rate": 2.976391850971065e-05,
"loss": 0.0038,
"step": 13020
},
{
"epoch": 44.62328767123287,
"grad_norm": 0.047593869268894196,
"learning_rate": 2.9688346264519866e-05,
"loss": 0.0028,
"step": 13030
},
{
"epoch": 44.657534246575345,
"grad_norm": 0.03735283017158508,
"learning_rate": 2.9612829550614836e-05,
"loss": 0.0036,
"step": 13040
},
{
"epoch": 44.69178082191781,
"grad_norm": 0.056196749210357666,
"learning_rate": 2.9537368574455304e-05,
"loss": 0.0039,
"step": 13050
},
{
"epoch": 44.726027397260275,
"grad_norm": 0.05041581392288208,
"learning_rate": 2.9461963542348737e-05,
"loss": 0.0027,
"step": 13060
},
{
"epoch": 44.76027397260274,
"grad_norm": 0.04259680584073067,
"learning_rate": 2.9386614660449596e-05,
"loss": 0.0033,
"step": 13070
},
{
"epoch": 44.794520547945204,
"grad_norm": 0.03420880436897278,
"learning_rate": 2.931132213475884e-05,
"loss": 0.0033,
"step": 13080
},
{
"epoch": 44.82876712328767,
"grad_norm": 0.03196268528699875,
"learning_rate": 2.9236086171123404e-05,
"loss": 0.0039,
"step": 13090
},
{
"epoch": 44.863013698630134,
"grad_norm": 0.035604946315288544,
"learning_rate": 2.916090697523549e-05,
"loss": 0.0037,
"step": 13100
},
{
"epoch": 44.897260273972606,
"grad_norm": 0.048967644572257996,
"learning_rate": 2.9085784752632157e-05,
"loss": 0.0044,
"step": 13110
},
{
"epoch": 44.93150684931507,
"grad_norm": 0.0407651923596859,
"learning_rate": 2.9010719708694722e-05,
"loss": 0.0038,
"step": 13120
},
{
"epoch": 44.965753424657535,
"grad_norm": 0.03494444489479065,
"learning_rate": 2.8935712048648112e-05,
"loss": 0.0029,
"step": 13130
},
{
"epoch": 45.0,
"grad_norm": 0.026488201692700386,
"learning_rate": 2.8860761977560436e-05,
"loss": 0.0028,
"step": 13140
},
{
"epoch": 45.034246575342465,
"grad_norm": 0.0324229933321476,
"learning_rate": 2.878586970034232e-05,
"loss": 0.003,
"step": 13150
},
{
"epoch": 45.06849315068493,
"grad_norm": 0.035328809171915054,
"learning_rate": 2.8711035421746367e-05,
"loss": 0.0032,
"step": 13160
},
{
"epoch": 45.102739726027394,
"grad_norm": 0.040484555065631866,
"learning_rate": 2.8636259346366666e-05,
"loss": 0.004,
"step": 13170
},
{
"epoch": 45.136986301369866,
"grad_norm": 0.041628774255514145,
"learning_rate": 2.8561541678638142e-05,
"loss": 0.0046,
"step": 13180
},
{
"epoch": 45.17123287671233,
"grad_norm": 0.06758181750774384,
"learning_rate": 2.8486882622836026e-05,
"loss": 0.0034,
"step": 13190
},
{
"epoch": 45.205479452054796,
"grad_norm": 0.04015596583485603,
"learning_rate": 2.8412282383075363e-05,
"loss": 0.0037,
"step": 13200
},
{
"epoch": 45.23972602739726,
"grad_norm": 0.048500169068574905,
"learning_rate": 2.8337741163310317e-05,
"loss": 0.0035,
"step": 13210
},
{
"epoch": 45.273972602739725,
"grad_norm": 0.04756450653076172,
"learning_rate": 2.8263259167333777e-05,
"loss": 0.0038,
"step": 13220
},
{
"epoch": 45.30821917808219,
"grad_norm": 0.04274963214993477,
"learning_rate": 2.8188836598776662e-05,
"loss": 0.0034,
"step": 13230
},
{
"epoch": 45.342465753424655,
"grad_norm": 0.04680255800485611,
"learning_rate": 2.811447366110741e-05,
"loss": 0.0035,
"step": 13240
},
{
"epoch": 45.37671232876713,
"grad_norm": 0.04105055332183838,
"learning_rate": 2.804017055763149e-05,
"loss": 0.0035,
"step": 13250
},
{
"epoch": 45.41095890410959,
"grad_norm": 0.05548069253563881,
"learning_rate": 2.7965927491490705e-05,
"loss": 0.0033,
"step": 13260
},
{
"epoch": 45.445205479452056,
"grad_norm": 0.045111995190382004,
"learning_rate": 2.7891744665662823e-05,
"loss": 0.0039,
"step": 13270
},
{
"epoch": 45.47945205479452,
"grad_norm": 0.04081454873085022,
"learning_rate": 2.7817622282960815e-05,
"loss": 0.004,
"step": 13280
},
{
"epoch": 45.513698630136986,
"grad_norm": 0.04935455694794655,
"learning_rate": 2.774356054603243e-05,
"loss": 0.0049,
"step": 13290
},
{
"epoch": 45.54794520547945,
"grad_norm": 0.037979427725076675,
"learning_rate": 2.766955965735968e-05,
"loss": 0.0032,
"step": 13300
},
{
"epoch": 45.582191780821915,
"grad_norm": 0.03660598769783974,
"learning_rate": 2.7595619819258116e-05,
"loss": 0.0052,
"step": 13310
},
{
"epoch": 45.61643835616438,
"grad_norm": 0.048113249242305756,
"learning_rate": 2.7521741233876496e-05,
"loss": 0.0031,
"step": 13320
},
{
"epoch": 45.65068493150685,
"grad_norm": 0.03831634670495987,
"learning_rate": 2.7447924103195976e-05,
"loss": 0.0033,
"step": 13330
},
{
"epoch": 45.68493150684932,
"grad_norm": 0.03267529979348183,
"learning_rate": 2.7374168629029813e-05,
"loss": 0.0029,
"step": 13340
},
{
"epoch": 45.71917808219178,
"grad_norm": 0.03972616046667099,
"learning_rate": 2.7300475013022663e-05,
"loss": 0.0029,
"step": 13350
},
{
"epoch": 45.75342465753425,
"grad_norm": 0.033614449203014374,
"learning_rate": 2.7226843456650037e-05,
"loss": 0.0039,
"step": 13360
},
{
"epoch": 45.78767123287671,
"grad_norm": 0.04760490730404854,
"learning_rate": 2.7153274161217846e-05,
"loss": 0.0025,
"step": 13370
},
{
"epoch": 45.821917808219176,
"grad_norm": 0.04591522365808487,
"learning_rate": 2.707976732786166e-05,
"loss": 0.0034,
"step": 13380
},
{
"epoch": 45.85616438356164,
"grad_norm": 0.0396135076880455,
"learning_rate": 2.7006323157546386e-05,
"loss": 0.0036,
"step": 13390
},
{
"epoch": 45.89041095890411,
"grad_norm": 0.04728582501411438,
"learning_rate": 2.693294185106562e-05,
"loss": 0.0029,
"step": 13400
},
{
"epoch": 45.92465753424658,
"grad_norm": 0.032608333975076675,
"learning_rate": 2.6859623609040984e-05,
"loss": 0.0033,
"step": 13410
},
{
"epoch": 45.95890410958904,
"grad_norm": 0.04741152003407478,
"learning_rate": 2.6786368631921836e-05,
"loss": 0.0036,
"step": 13420
},
{
"epoch": 45.99315068493151,
"grad_norm": 0.04052167013287544,
"learning_rate": 2.67131771199844e-05,
"loss": 0.0026,
"step": 13430
},
{
"epoch": 46.02739726027397,
"grad_norm": 0.03956746309995651,
"learning_rate": 2.6640049273331515e-05,
"loss": 0.0042,
"step": 13440
},
{
"epoch": 46.06164383561644,
"grad_norm": 0.04623987898230553,
"learning_rate": 2.656698529189193e-05,
"loss": 0.0045,
"step": 13450
},
{
"epoch": 46.0958904109589,
"grad_norm": 0.043264828622341156,
"learning_rate": 2.6493985375419778e-05,
"loss": 0.0031,
"step": 13460
},
{
"epoch": 46.13013698630137,
"grad_norm": 0.04458721727132797,
"learning_rate": 2.642104972349403e-05,
"loss": 0.0035,
"step": 13470
},
{
"epoch": 46.16438356164384,
"grad_norm": 0.04978759214282036,
"learning_rate": 2.6348178535517966e-05,
"loss": 0.0041,
"step": 13480
},
{
"epoch": 46.1986301369863,
"grad_norm": 0.038638100028038025,
"learning_rate": 2.6275372010718635e-05,
"loss": 0.003,
"step": 13490
},
{
"epoch": 46.23287671232877,
"grad_norm": 0.03764040768146515,
"learning_rate": 2.6202630348146324e-05,
"loss": 0.0033,
"step": 13500
},
{
"epoch": 46.26712328767123,
"grad_norm": 0.040804632008075714,
"learning_rate": 2.612995374667394e-05,
"loss": 0.0034,
"step": 13510
},
{
"epoch": 46.3013698630137,
"grad_norm": 0.041531484574079514,
"learning_rate": 2.6057342404996522e-05,
"loss": 0.003,
"step": 13520
},
{
"epoch": 46.33561643835616,
"grad_norm": 0.061668556183576584,
"learning_rate": 2.5984796521630737e-05,
"loss": 0.0034,
"step": 13530
},
{
"epoch": 46.36986301369863,
"grad_norm": 0.049322471022605896,
"learning_rate": 2.591231629491423e-05,
"loss": 0.004,
"step": 13540
},
{
"epoch": 46.4041095890411,
"grad_norm": 0.03611714765429497,
"learning_rate": 2.5839901923005205e-05,
"loss": 0.0025,
"step": 13550
},
{
"epoch": 46.43835616438356,
"grad_norm": 0.02910764142870903,
"learning_rate": 2.5767553603881767e-05,
"loss": 0.003,
"step": 13560
},
{
"epoch": 46.47260273972603,
"grad_norm": 0.03090509958565235,
"learning_rate": 2.5695271535341443e-05,
"loss": 0.0028,
"step": 13570
},
{
"epoch": 46.50684931506849,
"grad_norm": 0.04449021816253662,
"learning_rate": 2.562305591500069e-05,
"loss": 0.0046,
"step": 13580
},
{
"epoch": 46.54109589041096,
"grad_norm": 0.029060401022434235,
"learning_rate": 2.555090694029421e-05,
"loss": 0.0041,
"step": 13590
},
{
"epoch": 46.57534246575342,
"grad_norm": 0.06126280874013901,
"learning_rate": 2.547882480847461e-05,
"loss": 0.004,
"step": 13600
},
{
"epoch": 46.60958904109589,
"grad_norm": 0.0393584705889225,
"learning_rate": 2.540680971661161e-05,
"loss": 0.0031,
"step": 13610
},
{
"epoch": 46.64383561643836,
"grad_norm": 0.043196532875299454,
"learning_rate": 2.5334861861591753e-05,
"loss": 0.0027,
"step": 13620
},
{
"epoch": 46.678082191780824,
"grad_norm": 0.0547700971364975,
"learning_rate": 2.526298144011775e-05,
"loss": 0.0038,
"step": 13630
},
{
"epoch": 46.71232876712329,
"grad_norm": 0.04245225712656975,
"learning_rate": 2.5191168648707887e-05,
"loss": 0.0034,
"step": 13640
},
{
"epoch": 46.74657534246575,
"grad_norm": 0.04763708636164665,
"learning_rate": 2.511942368369566e-05,
"loss": 0.0046,
"step": 13650
},
{
"epoch": 46.78082191780822,
"grad_norm": 0.04791140928864479,
"learning_rate": 2.5047746741228978e-05,
"loss": 0.0034,
"step": 13660
},
{
"epoch": 46.81506849315068,
"grad_norm": 0.03143971785902977,
"learning_rate": 2.4976138017269908e-05,
"loss": 0.0026,
"step": 13670
},
{
"epoch": 46.84931506849315,
"grad_norm": 0.02771608904004097,
"learning_rate": 2.490459770759398e-05,
"loss": 0.0023,
"step": 13680
},
{
"epoch": 46.88356164383562,
"grad_norm": 0.03285845369100571,
"learning_rate": 2.4833126007789653e-05,
"loss": 0.0038,
"step": 13690
},
{
"epoch": 46.917808219178085,
"grad_norm": 0.033361777663230896,
"learning_rate": 2.476172311325783e-05,
"loss": 0.0028,
"step": 13700
},
{
"epoch": 46.95205479452055,
"grad_norm": 0.03274751454591751,
"learning_rate": 2.4690389219211273e-05,
"loss": 0.0032,
"step": 13710
},
{
"epoch": 46.986301369863014,
"grad_norm": 0.04675106331706047,
"learning_rate": 2.4619124520674146e-05,
"loss": 0.0038,
"step": 13720
},
{
"epoch": 47.02054794520548,
"grad_norm": 0.039955805987119675,
"learning_rate": 2.4547929212481435e-05,
"loss": 0.0035,
"step": 13730
},
{
"epoch": 47.054794520547944,
"grad_norm": 0.04543524235486984,
"learning_rate": 2.447680348927837e-05,
"loss": 0.0041,
"step": 13740
},
{
"epoch": 47.08904109589041,
"grad_norm": 0.031714826822280884,
"learning_rate": 2.4405747545519963e-05,
"loss": 0.0032,
"step": 13750
},
{
"epoch": 47.12328767123287,
"grad_norm": 0.033325448632240295,
"learning_rate": 2.433476157547044e-05,
"loss": 0.003,
"step": 13760
},
{
"epoch": 47.157534246575345,
"grad_norm": 0.0679163858294487,
"learning_rate": 2.4263845773202736e-05,
"loss": 0.0029,
"step": 13770
},
{
"epoch": 47.19178082191781,
"grad_norm": 0.03381791710853577,
"learning_rate": 2.419300033259798e-05,
"loss": 0.0028,
"step": 13780
},
{
"epoch": 47.226027397260275,
"grad_norm": 0.055893249809741974,
"learning_rate": 2.4122225447344875e-05,
"loss": 0.0034,
"step": 13790
},
{
"epoch": 47.26027397260274,
"grad_norm": 0.04048413038253784,
"learning_rate": 2.405152131093926e-05,
"loss": 0.0042,
"step": 13800
},
{
"epoch": 47.294520547945204,
"grad_norm": 0.0625983402132988,
"learning_rate": 2.3980888116683515e-05,
"loss": 0.005,
"step": 13810
},
{
"epoch": 47.32876712328767,
"grad_norm": 0.04345778375864029,
"learning_rate": 2.3910326057686127e-05,
"loss": 0.0036,
"step": 13820
},
{
"epoch": 47.363013698630134,
"grad_norm": 0.04610544443130493,
"learning_rate": 2.3839835326861104e-05,
"loss": 0.0031,
"step": 13830
},
{
"epoch": 47.397260273972606,
"grad_norm": 0.042664166539907455,
"learning_rate": 2.3769416116927335e-05,
"loss": 0.0023,
"step": 13840
},
{
"epoch": 47.43150684931507,
"grad_norm": 0.0539637915790081,
"learning_rate": 2.3699068620408304e-05,
"loss": 0.0038,
"step": 13850
},
{
"epoch": 47.465753424657535,
"grad_norm": 0.027709294110536575,
"learning_rate": 2.362879302963135e-05,
"loss": 0.0026,
"step": 13860
},
{
"epoch": 47.5,
"grad_norm": 0.044280145317316055,
"learning_rate": 2.3558589536727277e-05,
"loss": 0.0032,
"step": 13870
},
{
"epoch": 47.534246575342465,
"grad_norm": 0.03847116231918335,
"learning_rate": 2.3488458333629777e-05,
"loss": 0.0036,
"step": 13880
},
{
"epoch": 47.56849315068493,
"grad_norm": 0.04217471554875374,
"learning_rate": 2.341839961207482e-05,
"loss": 0.0034,
"step": 13890
},
{
"epoch": 47.602739726027394,
"grad_norm": 0.037517059594392776,
"learning_rate": 2.3348413563600325e-05,
"loss": 0.0036,
"step": 13900
},
{
"epoch": 47.636986301369866,
"grad_norm": 0.035919900983572006,
"learning_rate": 2.3278500379545436e-05,
"loss": 0.0035,
"step": 13910
},
{
"epoch": 47.67123287671233,
"grad_norm": 0.03642423823475838,
"learning_rate": 2.3208660251050158e-05,
"loss": 0.0027,
"step": 13920
},
{
"epoch": 47.705479452054796,
"grad_norm": 0.04969945177435875,
"learning_rate": 2.3138893369054766e-05,
"loss": 0.003,
"step": 13930
},
{
"epoch": 47.73972602739726,
"grad_norm": 0.032009631395339966,
"learning_rate": 2.3069199924299174e-05,
"loss": 0.0034,
"step": 13940
},
{
"epoch": 47.773972602739725,
"grad_norm": 0.053595248609781265,
"learning_rate": 2.2999580107322653e-05,
"loss": 0.0037,
"step": 13950
},
{
"epoch": 47.80821917808219,
"grad_norm": 0.03953272104263306,
"learning_rate": 2.29300341084631e-05,
"loss": 0.0039,
"step": 13960
},
{
"epoch": 47.842465753424655,
"grad_norm": 0.02748740464448929,
"learning_rate": 2.2860562117856647e-05,
"loss": 0.0033,
"step": 13970
},
{
"epoch": 47.87671232876713,
"grad_norm": 0.048432186245918274,
"learning_rate": 2.279116432543705e-05,
"loss": 0.0037,
"step": 13980
},
{
"epoch": 47.91095890410959,
"grad_norm": 0.02375647984445095,
"learning_rate": 2.2721840920935196e-05,
"loss": 0.0031,
"step": 13990
},
{
"epoch": 47.945205479452056,
"grad_norm": 0.0455491729080677,
"learning_rate": 2.2652592093878666e-05,
"loss": 0.0033,
"step": 14000
},
{
"epoch": 47.97945205479452,
"grad_norm": 0.03882705047726631,
"learning_rate": 2.258341803359108e-05,
"loss": 0.0032,
"step": 14010
},
{
"epoch": 48.013698630136986,
"grad_norm": 0.04311921074986458,
"learning_rate": 2.251431892919171e-05,
"loss": 0.003,
"step": 14020
},
{
"epoch": 48.04794520547945,
"grad_norm": 0.03556806966662407,
"learning_rate": 2.2445294969594844e-05,
"loss": 0.0037,
"step": 14030
},
{
"epoch": 48.082191780821915,
"grad_norm": 0.04763215407729149,
"learning_rate": 2.237634634350934e-05,
"loss": 0.0027,
"step": 14040
},
{
"epoch": 48.11643835616438,
"grad_norm": 0.048188772052526474,
"learning_rate": 2.2307473239438154e-05,
"loss": 0.0037,
"step": 14050
},
{
"epoch": 48.15068493150685,
"grad_norm": 0.041279006749391556,
"learning_rate": 2.2238675845677663e-05,
"loss": 0.0025,
"step": 14060
},
{
"epoch": 48.18493150684932,
"grad_norm": 0.03583381325006485,
"learning_rate": 2.2169954350317374e-05,
"loss": 0.0028,
"step": 14070
},
{
"epoch": 48.21917808219178,
"grad_norm": 0.0358072929084301,
"learning_rate": 2.2101308941239203e-05,
"loss": 0.0028,
"step": 14080
},
{
"epoch": 48.25342465753425,
"grad_norm": 0.028081903234124184,
"learning_rate": 2.2032739806117058e-05,
"loss": 0.0034,
"step": 14090
},
{
"epoch": 48.28767123287671,
"grad_norm": 0.036171264946460724,
"learning_rate": 2.196424713241637e-05,
"loss": 0.0033,
"step": 14100
},
{
"epoch": 48.321917808219176,
"grad_norm": 0.034019824117422104,
"learning_rate": 2.1895831107393484e-05,
"loss": 0.0033,
"step": 14110
},
{
"epoch": 48.35616438356164,
"grad_norm": 0.04331124201416969,
"learning_rate": 2.182749191809518e-05,
"loss": 0.0028,
"step": 14120
},
{
"epoch": 48.39041095890411,
"grad_norm": 0.026034552603960037,
"learning_rate": 2.1759229751358217e-05,
"loss": 0.0031,
"step": 14130
},
{
"epoch": 48.42465753424658,
"grad_norm": 0.02854372002184391,
"learning_rate": 2.1691044793808734e-05,
"loss": 0.0028,
"step": 14140
},
{
"epoch": 48.45890410958904,
"grad_norm": 0.0506640300154686,
"learning_rate": 2.1622937231861822e-05,
"loss": 0.0028,
"step": 14150
},
{
"epoch": 48.49315068493151,
"grad_norm": 0.06169329211115837,
"learning_rate": 2.1554907251720945e-05,
"loss": 0.0043,
"step": 14160
},
{
"epoch": 48.52739726027397,
"grad_norm": 0.0488462932407856,
"learning_rate": 2.148695503937745e-05,
"loss": 0.0025,
"step": 14170
},
{
"epoch": 48.56164383561644,
"grad_norm": 0.05333937332034111,
"learning_rate": 2.1419080780610123e-05,
"loss": 0.0024,
"step": 14180
},
{
"epoch": 48.5958904109589,
"grad_norm": 0.03566636145114899,
"learning_rate": 2.1351284660984572e-05,
"loss": 0.0029,
"step": 14190
},
{
"epoch": 48.63013698630137,
"grad_norm": 0.04205214977264404,
"learning_rate": 2.128356686585282e-05,
"loss": 0.0028,
"step": 14200
},
{
"epoch": 48.66438356164384,
"grad_norm": 0.03965020179748535,
"learning_rate": 2.121592758035273e-05,
"loss": 0.0041,
"step": 14210
},
{
"epoch": 48.6986301369863,
"grad_norm": 0.037797197699546814,
"learning_rate": 2.1148366989407496e-05,
"loss": 0.0035,
"step": 14220
},
{
"epoch": 48.73287671232877,
"grad_norm": 0.04017401114106178,
"learning_rate": 2.1080885277725236e-05,
"loss": 0.0029,
"step": 14230
},
{
"epoch": 48.76712328767123,
"grad_norm": 0.05713287740945816,
"learning_rate": 2.1013482629798333e-05,
"loss": 0.0042,
"step": 14240
},
{
"epoch": 48.8013698630137,
"grad_norm": 0.04046434909105301,
"learning_rate": 2.094615922990309e-05,
"loss": 0.003,
"step": 14250
},
{
"epoch": 48.83561643835616,
"grad_norm": 0.03594409301877022,
"learning_rate": 2.0878915262099098e-05,
"loss": 0.0023,
"step": 14260
},
{
"epoch": 48.86986301369863,
"grad_norm": 0.038105227053165436,
"learning_rate": 2.0811750910228774e-05,
"loss": 0.0034,
"step": 14270
},
{
"epoch": 48.9041095890411,
"grad_norm": 0.06692781299352646,
"learning_rate": 2.0744666357916925e-05,
"loss": 0.0037,
"step": 14280
},
{
"epoch": 48.93835616438356,
"grad_norm": 0.04800930246710777,
"learning_rate": 2.067766178857013e-05,
"loss": 0.0032,
"step": 14290
},
{
"epoch": 48.97260273972603,
"grad_norm": 0.04606781154870987,
"learning_rate": 2.061073738537635e-05,
"loss": 0.0028,
"step": 14300
},
{
"epoch": 49.00684931506849,
"grad_norm": 0.03653561696410179,
"learning_rate": 2.0543893331304333e-05,
"loss": 0.003,
"step": 14310
},
{
"epoch": 49.04109589041096,
"grad_norm": 0.04066811501979828,
"learning_rate": 2.0477129809103147e-05,
"loss": 0.0036,
"step": 14320
},
{
"epoch": 49.07534246575342,
"grad_norm": 0.028658408671617508,
"learning_rate": 2.0410447001301753e-05,
"loss": 0.0026,
"step": 14330
},
{
"epoch": 49.10958904109589,
"grad_norm": 0.04720501974225044,
"learning_rate": 2.0343845090208368e-05,
"loss": 0.0029,
"step": 14340
},
{
"epoch": 49.14383561643836,
"grad_norm": 0.03682386875152588,
"learning_rate": 2.0277324257910106e-05,
"loss": 0.0033,
"step": 14350
},
{
"epoch": 49.178082191780824,
"grad_norm": 0.029543591663241386,
"learning_rate": 2.0210884686272368e-05,
"loss": 0.0031,
"step": 14360
},
{
"epoch": 49.21232876712329,
"grad_norm": 0.03623099625110626,
"learning_rate": 2.0144526556938387e-05,
"loss": 0.0028,
"step": 14370
},
{
"epoch": 49.24657534246575,
"grad_norm": 0.02651667222380638,
"learning_rate": 2.0078250051328784e-05,
"loss": 0.003,
"step": 14380
},
{
"epoch": 49.28082191780822,
"grad_norm": 0.031783703714609146,
"learning_rate": 2.0012055350640986e-05,
"loss": 0.0037,
"step": 14390
},
{
"epoch": 49.31506849315068,
"grad_norm": 0.03946804255247116,
"learning_rate": 1.9945942635848748e-05,
"loss": 0.0026,
"step": 14400
},
{
"epoch": 49.34931506849315,
"grad_norm": 0.028189141303300858,
"learning_rate": 1.9879912087701753e-05,
"loss": 0.0037,
"step": 14410
},
{
"epoch": 49.38356164383562,
"grad_norm": 0.037286076694726944,
"learning_rate": 1.981396388672496e-05,
"loss": 0.0028,
"step": 14420
},
{
"epoch": 49.417808219178085,
"grad_norm": 0.03162837401032448,
"learning_rate": 1.974809821321827e-05,
"loss": 0.0026,
"step": 14430
},
{
"epoch": 49.45205479452055,
"grad_norm": 0.05570969358086586,
"learning_rate": 1.9682315247255894e-05,
"loss": 0.0032,
"step": 14440
},
{
"epoch": 49.486301369863014,
"grad_norm": 0.0389660969376564,
"learning_rate": 1.9616615168685943e-05,
"loss": 0.0028,
"step": 14450
},
{
"epoch": 49.52054794520548,
"grad_norm": 0.03000059723854065,
"learning_rate": 1.9550998157129946e-05,
"loss": 0.0023,
"step": 14460
},
{
"epoch": 49.554794520547944,
"grad_norm": 0.03354468196630478,
"learning_rate": 1.9485464391982284e-05,
"loss": 0.0033,
"step": 14470
},
{
"epoch": 49.58904109589041,
"grad_norm": 0.037038616836071014,
"learning_rate": 1.942001405240979e-05,
"loss": 0.0039,
"step": 14480
},
{
"epoch": 49.62328767123287,
"grad_norm": 0.03484483063220978,
"learning_rate": 1.9354647317351188e-05,
"loss": 0.0029,
"step": 14490
},
{
"epoch": 49.657534246575345,
"grad_norm": 0.04799222946166992,
"learning_rate": 1.928936436551661e-05,
"loss": 0.0036,
"step": 14500
},
{
"epoch": 49.69178082191781,
"grad_norm": 0.0432819202542305,
"learning_rate": 1.9224165375387193e-05,
"loss": 0.0032,
"step": 14510
},
{
"epoch": 49.726027397260275,
"grad_norm": 0.039506372064352036,
"learning_rate": 1.9159050525214452e-05,
"loss": 0.0042,
"step": 14520
},
{
"epoch": 49.76027397260274,
"grad_norm": 0.03616689145565033,
"learning_rate": 1.909401999301993e-05,
"loss": 0.0022,
"step": 14530
},
{
"epoch": 49.794520547945204,
"grad_norm": 0.042690783739089966,
"learning_rate": 1.9029073956594606e-05,
"loss": 0.0033,
"step": 14540
},
{
"epoch": 49.82876712328767,
"grad_norm": 0.046874433755874634,
"learning_rate": 1.8964212593498442e-05,
"loss": 0.0032,
"step": 14550
},
{
"epoch": 49.863013698630134,
"grad_norm": 0.037254612892866135,
"learning_rate": 1.8899436081059975e-05,
"loss": 0.0028,
"step": 14560
},
{
"epoch": 49.897260273972606,
"grad_norm": 0.04147499427199364,
"learning_rate": 1.8834744596375666e-05,
"loss": 0.0038,
"step": 14570
},
{
"epoch": 49.93150684931507,
"grad_norm": 0.030911028385162354,
"learning_rate": 1.877013831630961e-05,
"loss": 0.0034,
"step": 14580
},
{
"epoch": 49.965753424657535,
"grad_norm": 0.026016168296337128,
"learning_rate": 1.8705617417492883e-05,
"loss": 0.0029,
"step": 14590
},
{
"epoch": 50.0,
"grad_norm": 0.026868095621466637,
"learning_rate": 1.8641182076323148e-05,
"loss": 0.0028,
"step": 14600
},
{
"epoch": 50.034246575342465,
"grad_norm": 0.039373837411403656,
"learning_rate": 1.85768324689642e-05,
"loss": 0.0034,
"step": 14610
},
{
"epoch": 50.06849315068493,
"grad_norm": 0.047938112169504166,
"learning_rate": 1.851256877134538e-05,
"loss": 0.004,
"step": 14620
},
{
"epoch": 50.102739726027394,
"grad_norm": 0.029682587832212448,
"learning_rate": 1.8448391159161204e-05,
"loss": 0.0029,
"step": 14630
},
{
"epoch": 50.136986301369866,
"grad_norm": 0.035953033715486526,
"learning_rate": 1.838429980787081e-05,
"loss": 0.0028,
"step": 14640
},
{
"epoch": 50.17123287671233,
"grad_norm": 0.04639003053307533,
"learning_rate": 1.8320294892697478e-05,
"loss": 0.0038,
"step": 14650
},
{
"epoch": 50.205479452054796,
"grad_norm": 0.03600938618183136,
"learning_rate": 1.8256376588628238e-05,
"loss": 0.0029,
"step": 14660
},
{
"epoch": 50.23972602739726,
"grad_norm": 0.03340703994035721,
"learning_rate": 1.8192545070413282e-05,
"loss": 0.0029,
"step": 14670
},
{
"epoch": 50.273972602739725,
"grad_norm": 0.05106693133711815,
"learning_rate": 1.8128800512565513e-05,
"loss": 0.0036,
"step": 14680
},
{
"epoch": 50.30821917808219,
"grad_norm": 0.05128314346075058,
"learning_rate": 1.8065143089360172e-05,
"loss": 0.0029,
"step": 14690
},
{
"epoch": 50.342465753424655,
"grad_norm": 0.04804209992289543,
"learning_rate": 1.800157297483417e-05,
"loss": 0.0026,
"step": 14700
},
{
"epoch": 50.37671232876713,
"grad_norm": 0.040086787194013596,
"learning_rate": 1.7938090342785817e-05,
"loss": 0.0021,
"step": 14710
},
{
"epoch": 50.41095890410959,
"grad_norm": 0.0328865684568882,
"learning_rate": 1.787469536677419e-05,
"loss": 0.0035,
"step": 14720
},
{
"epoch": 50.445205479452056,
"grad_norm": 0.02870897389948368,
"learning_rate": 1.7811388220118707e-05,
"loss": 0.0033,
"step": 14730
},
{
"epoch": 50.47945205479452,
"grad_norm": 0.03273727372288704,
"learning_rate": 1.774816907589873e-05,
"loss": 0.0026,
"step": 14740
},
{
"epoch": 50.513698630136986,
"grad_norm": 0.03714567795395851,
"learning_rate": 1.768503810695295e-05,
"loss": 0.0027,
"step": 14750
},
{
"epoch": 50.54794520547945,
"grad_norm": 0.051201384514570236,
"learning_rate": 1.7621995485879062e-05,
"loss": 0.0036,
"step": 14760
},
{
"epoch": 50.582191780821915,
"grad_norm": 0.04567592591047287,
"learning_rate": 1.755904138503316e-05,
"loss": 0.0036,
"step": 14770
},
{
"epoch": 50.61643835616438,
"grad_norm": 0.04741727560758591,
"learning_rate": 1.749617597652934e-05,
"loss": 0.0032,
"step": 14780
},
{
"epoch": 50.65068493150685,
"grad_norm": 0.02346430905163288,
"learning_rate": 1.743339943223926e-05,
"loss": 0.0026,
"step": 14790
},
{
"epoch": 50.68493150684932,
"grad_norm": 0.033810559660196304,
"learning_rate": 1.7370711923791567e-05,
"loss": 0.0033,
"step": 14800
},
{
"epoch": 50.71917808219178,
"grad_norm": 0.04429788514971733,
"learning_rate": 1.7308113622571544e-05,
"loss": 0.0043,
"step": 14810
},
{
"epoch": 50.75342465753425,
"grad_norm": 0.034124165773391724,
"learning_rate": 1.7245604699720535e-05,
"loss": 0.0023,
"step": 14820
},
{
"epoch": 50.78767123287671,
"grad_norm": 0.04748954623937607,
"learning_rate": 1.7183185326135543e-05,
"loss": 0.0029,
"step": 14830
},
{
"epoch": 50.821917808219176,
"grad_norm": 0.02713291347026825,
"learning_rate": 1.712085567246878e-05,
"loss": 0.0023,
"step": 14840
},
{
"epoch": 50.85616438356164,
"grad_norm": 0.036193542182445526,
"learning_rate": 1.70586159091271e-05,
"loss": 0.003,
"step": 14850
},
{
"epoch": 50.89041095890411,
"grad_norm": 0.027815349400043488,
"learning_rate": 1.699646620627168e-05,
"loss": 0.003,
"step": 14860
},
{
"epoch": 50.92465753424658,
"grad_norm": 0.02768586575984955,
"learning_rate": 1.6934406733817414e-05,
"loss": 0.0039,
"step": 14870
},
{
"epoch": 50.95890410958904,
"grad_norm": 0.0356876440346241,
"learning_rate": 1.6872437661432517e-05,
"loss": 0.0039,
"step": 14880
},
{
"epoch": 50.99315068493151,
"grad_norm": 0.030868323519825935,
"learning_rate": 1.6810559158538092e-05,
"loss": 0.0036,
"step": 14890
},
{
"epoch": 51.02739726027397,
"grad_norm": 0.04372343793511391,
"learning_rate": 1.6748771394307585e-05,
"loss": 0.0041,
"step": 14900
},
{
"epoch": 51.06164383561644,
"grad_norm": 0.026267332956194878,
"learning_rate": 1.6687074537666398e-05,
"loss": 0.0023,
"step": 14910
},
{
"epoch": 51.0958904109589,
"grad_norm": 0.035959236323833466,
"learning_rate": 1.662546875729138e-05,
"loss": 0.0028,
"step": 14920
},
{
"epoch": 51.13013698630137,
"grad_norm": 0.03822488337755203,
"learning_rate": 1.6563954221610355e-05,
"loss": 0.0043,
"step": 14930
},
{
"epoch": 51.16438356164384,
"grad_norm": 0.03191686421632767,
"learning_rate": 1.6502531098801753e-05,
"loss": 0.0039,
"step": 14940
},
{
"epoch": 51.1986301369863,
"grad_norm": 0.046950504183769226,
"learning_rate": 1.6441199556794033e-05,
"loss": 0.0036,
"step": 14950
},
{
"epoch": 51.23287671232877,
"grad_norm": 0.03460918739438057,
"learning_rate": 1.637995976326527e-05,
"loss": 0.0039,
"step": 14960
},
{
"epoch": 51.26712328767123,
"grad_norm": 0.045809391885995865,
"learning_rate": 1.631881188564275e-05,
"loss": 0.0049,
"step": 14970
},
{
"epoch": 51.3013698630137,
"grad_norm": 0.04050698131322861,
"learning_rate": 1.62577560911024e-05,
"loss": 0.0028,
"step": 14980
},
{
"epoch": 51.33561643835616,
"grad_norm": 0.02764354832470417,
"learning_rate": 1.6196792546568472e-05,
"loss": 0.0021,
"step": 14990
},
{
"epoch": 51.36986301369863,
"grad_norm": 0.035305511206388474,
"learning_rate": 1.6135921418712956e-05,
"loss": 0.0028,
"step": 15000
},
{
"epoch": 51.4041095890411,
"grad_norm": 0.030398089438676834,
"learning_rate": 1.6075142873955164e-05,
"loss": 0.0039,
"step": 15010
},
{
"epoch": 51.43835616438356,
"grad_norm": 0.03482901677489281,
"learning_rate": 1.6014457078461353e-05,
"loss": 0.0026,
"step": 15020
},
{
"epoch": 51.47260273972603,
"grad_norm": 0.035669729113578796,
"learning_rate": 1.5953864198144135e-05,
"loss": 0.0042,
"step": 15030
},
{
"epoch": 51.50684931506849,
"grad_norm": 0.03193089738488197,
"learning_rate": 1.5893364398662176e-05,
"loss": 0.0024,
"step": 15040
},
{
"epoch": 51.54109589041096,
"grad_norm": 0.027457116171717644,
"learning_rate": 1.583295784541958e-05,
"loss": 0.0038,
"step": 15050
},
{
"epoch": 51.57534246575342,
"grad_norm": 0.037244558334350586,
"learning_rate": 1.5772644703565565e-05,
"loss": 0.0032,
"step": 15060
},
{
"epoch": 51.60958904109589,
"grad_norm": 0.026973290368914604,
"learning_rate": 1.5712425137993973e-05,
"loss": 0.0044,
"step": 15070
},
{
"epoch": 51.64383561643836,
"grad_norm": 0.027435757219791412,
"learning_rate": 1.5652299313342773e-05,
"loss": 0.0032,
"step": 15080
},
{
"epoch": 51.678082191780824,
"grad_norm": 0.033582866191864014,
"learning_rate": 1.5592267393993716e-05,
"loss": 0.0035,
"step": 15090
},
{
"epoch": 51.71232876712329,
"grad_norm": 0.029963036999106407,
"learning_rate": 1.553232954407171e-05,
"loss": 0.0036,
"step": 15100
},
{
"epoch": 51.74657534246575,
"grad_norm": 0.029700949788093567,
"learning_rate": 1.5472485927444597e-05,
"loss": 0.0045,
"step": 15110
},
{
"epoch": 51.78082191780822,
"grad_norm": 0.021908050402998924,
"learning_rate": 1.5412736707722537e-05,
"loss": 0.0026,
"step": 15120
},
{
"epoch": 51.81506849315068,
"grad_norm": 0.04361306503415108,
"learning_rate": 1.5353082048257596e-05,
"loss": 0.0045,
"step": 15130
},
{
"epoch": 51.84931506849315,
"grad_norm": 0.047619931399822235,
"learning_rate": 1.5293522112143373e-05,
"loss": 0.0045,
"step": 15140
},
{
"epoch": 51.88356164383562,
"grad_norm": 0.04392608627676964,
"learning_rate": 1.5234057062214402e-05,
"loss": 0.0031,
"step": 15150
},
{
"epoch": 51.917808219178085,
"grad_norm": 0.03222033753991127,
"learning_rate": 1.517468706104589e-05,
"loss": 0.0032,
"step": 15160
},
{
"epoch": 51.95205479452055,
"grad_norm": 0.04997098818421364,
"learning_rate": 1.5115412270953167e-05,
"loss": 0.0036,
"step": 15170
},
{
"epoch": 51.986301369863014,
"grad_norm": 0.04557951167225838,
"learning_rate": 1.5056232853991209e-05,
"loss": 0.0036,
"step": 15180
},
{
"epoch": 52.02054794520548,
"grad_norm": 0.026306597515940666,
"learning_rate": 1.4997148971954344e-05,
"loss": 0.0033,
"step": 15190
},
{
"epoch": 52.054794520547944,
"grad_norm": 0.034577708691358566,
"learning_rate": 1.4938160786375572e-05,
"loss": 0.0026,
"step": 15200
},
{
"epoch": 52.08904109589041,
"grad_norm": 0.030455907806754112,
"learning_rate": 1.4879268458526379e-05,
"loss": 0.0029,
"step": 15210
},
{
"epoch": 52.12328767123287,
"grad_norm": 0.021716345101594925,
"learning_rate": 1.4820472149416154e-05,
"loss": 0.0017,
"step": 15220
},
{
"epoch": 52.157534246575345,
"grad_norm": 0.031647682189941406,
"learning_rate": 1.4761772019791748e-05,
"loss": 0.0024,
"step": 15230
},
{
"epoch": 52.19178082191781,
"grad_norm": 0.026279931887984276,
"learning_rate": 1.470316823013707e-05,
"loss": 0.0025,
"step": 15240
},
{
"epoch": 52.226027397260275,
"grad_norm": 0.03701292723417282,
"learning_rate": 1.4644660940672627e-05,
"loss": 0.0032,
"step": 15250
},
{
"epoch": 52.26027397260274,
"grad_norm": 0.03941259905695915,
"learning_rate": 1.4586250311355132e-05,
"loss": 0.0028,
"step": 15260
},
{
"epoch": 52.294520547945204,
"grad_norm": 0.037332188338041306,
"learning_rate": 1.4527936501877032e-05,
"loss": 0.0033,
"step": 15270
},
{
"epoch": 52.32876712328767,
"grad_norm": 0.027855148538947105,
"learning_rate": 1.4469719671666043e-05,
"loss": 0.0033,
"step": 15280
},
{
"epoch": 52.363013698630134,
"grad_norm": 0.030381757766008377,
"learning_rate": 1.4411599979884744e-05,
"loss": 0.0047,
"step": 15290
},
{
"epoch": 52.397260273972606,
"grad_norm": 0.030294453725218773,
"learning_rate": 1.435357758543015e-05,
"loss": 0.0024,
"step": 15300
},
{
"epoch": 52.43150684931507,
"grad_norm": 0.03383754566311836,
"learning_rate": 1.4295652646933277e-05,
"loss": 0.003,
"step": 15310
},
{
"epoch": 52.465753424657535,
"grad_norm": 0.03104538284242153,
"learning_rate": 1.4237825322758736e-05,
"loss": 0.0036,
"step": 15320
},
{
"epoch": 52.5,
"grad_norm": 0.034425560384988785,
"learning_rate": 1.4180095771004154e-05,
"loss": 0.0032,
"step": 15330
},
{
"epoch": 52.534246575342465,
"grad_norm": 0.028029220178723335,
"learning_rate": 1.412246414949997e-05,
"loss": 0.0024,
"step": 15340
},
{
"epoch": 52.56849315068493,
"grad_norm": 0.03750409930944443,
"learning_rate": 1.4064930615808808e-05,
"loss": 0.0032,
"step": 15350
},
{
"epoch": 52.602739726027394,
"grad_norm": 0.02901621349155903,
"learning_rate": 1.4007495327225162e-05,
"loss": 0.0032,
"step": 15360
},
{
"epoch": 52.636986301369866,
"grad_norm": 0.03773738816380501,
"learning_rate": 1.3950158440774957e-05,
"loss": 0.0032,
"step": 15370
},
{
"epoch": 52.67123287671233,
"grad_norm": 0.035674892365932465,
"learning_rate": 1.389292011321498e-05,
"loss": 0.0027,
"step": 15380
},
{
"epoch": 52.705479452054796,
"grad_norm": 0.03301068767905235,
"learning_rate": 1.383578050103268e-05,
"loss": 0.0037,
"step": 15390
},
{
"epoch": 52.73972602739726,
"grad_norm": 0.04170006886124611,
"learning_rate": 1.3778739760445552e-05,
"loss": 0.0029,
"step": 15400
},
{
"epoch": 52.773972602739725,
"grad_norm": 0.04138687998056412,
"learning_rate": 1.3721798047400813e-05,
"loss": 0.0037,
"step": 15410
},
{
"epoch": 52.80821917808219,
"grad_norm": 0.031535543501377106,
"learning_rate": 1.3664955517574968e-05,
"loss": 0.0032,
"step": 15420
},
{
"epoch": 52.842465753424655,
"grad_norm": 0.04763416200876236,
"learning_rate": 1.3608212326373249e-05,
"loss": 0.0031,
"step": 15430
},
{
"epoch": 52.87671232876713,
"grad_norm": 0.05117536336183548,
"learning_rate": 1.3551568628929434e-05,
"loss": 0.0045,
"step": 15440
},
{
"epoch": 52.91095890410959,
"grad_norm": 0.040727127343416214,
"learning_rate": 1.3495024580105192e-05,
"loss": 0.0026,
"step": 15450
},
{
"epoch": 52.945205479452056,
"grad_norm": 0.03748713806271553,
"learning_rate": 1.343858033448982e-05,
"loss": 0.0029,
"step": 15460
},
{
"epoch": 52.97945205479452,
"grad_norm": 0.026867395266890526,
"learning_rate": 1.3382236046399722e-05,
"loss": 0.0024,
"step": 15470
},
{
"epoch": 53.013698630136986,
"grad_norm": 0.03179669752717018,
"learning_rate": 1.3325991869878013e-05,
"loss": 0.0029,
"step": 15480
},
{
"epoch": 53.04794520547945,
"grad_norm": 0.0422259159386158,
"learning_rate": 1.3269847958694148e-05,
"loss": 0.0026,
"step": 15490
},
{
"epoch": 53.082191780821915,
"grad_norm": 0.030288219451904297,
"learning_rate": 1.3213804466343421e-05,
"loss": 0.0033,
"step": 15500
},
{
"epoch": 53.11643835616438,
"grad_norm": 0.03154587373137474,
"learning_rate": 1.3157861546046613e-05,
"loss": 0.0032,
"step": 15510
},
{
"epoch": 53.15068493150685,
"grad_norm": 0.0416083000600338,
"learning_rate": 1.3102019350749528e-05,
"loss": 0.0038,
"step": 15520
},
{
"epoch": 53.18493150684932,
"grad_norm": 0.030634721741080284,
"learning_rate": 1.3046278033122577e-05,
"loss": 0.0027,
"step": 15530
},
{
"epoch": 53.21917808219178,
"grad_norm": 0.03578125312924385,
"learning_rate": 1.299063774556042e-05,
"loss": 0.0035,
"step": 15540
},
{
"epoch": 53.25342465753425,
"grad_norm": 0.02459009923040867,
"learning_rate": 1.293509864018146e-05,
"loss": 0.0029,
"step": 15550
},
{
"epoch": 53.28767123287671,
"grad_norm": 0.034552790224552155,
"learning_rate": 1.2879660868827508e-05,
"loss": 0.0035,
"step": 15560
},
{
"epoch": 53.321917808219176,
"grad_norm": 0.027246346697211266,
"learning_rate": 1.2824324583063302e-05,
"loss": 0.0026,
"step": 15570
},
{
"epoch": 53.35616438356164,
"grad_norm": 0.03363962098956108,
"learning_rate": 1.2769089934176126e-05,
"loss": 0.0032,
"step": 15580
},
{
"epoch": 53.39041095890411,
"grad_norm": 0.03963744267821312,
"learning_rate": 1.2713957073175425e-05,
"loss": 0.0028,
"step": 15590
},
{
"epoch": 53.42465753424658,
"grad_norm": 0.02972957119345665,
"learning_rate": 1.2658926150792322e-05,
"loss": 0.0024,
"step": 15600
},
{
"epoch": 53.45890410958904,
"grad_norm": 0.04657153785228729,
"learning_rate": 1.2603997317479238e-05,
"loss": 0.0037,
"step": 15610
},
{
"epoch": 53.49315068493151,
"grad_norm": 0.022585947066545486,
"learning_rate": 1.2549170723409549e-05,
"loss": 0.0038,
"step": 15620
},
{
"epoch": 53.52739726027397,
"grad_norm": 0.027521610260009766,
"learning_rate": 1.2494446518477022e-05,
"loss": 0.0028,
"step": 15630
},
{
"epoch": 53.56164383561644,
"grad_norm": 0.02755691297352314,
"learning_rate": 1.243982485229559e-05,
"loss": 0.0044,
"step": 15640
},
{
"epoch": 53.5958904109589,
"grad_norm": 0.027248825877904892,
"learning_rate": 1.2385305874198776e-05,
"loss": 0.0041,
"step": 15650
},
{
"epoch": 53.63013698630137,
"grad_norm": 0.026916412636637688,
"learning_rate": 1.233088973323937e-05,
"loss": 0.0023,
"step": 15660
},
{
"epoch": 53.66438356164384,
"grad_norm": 0.026474550366401672,
"learning_rate": 1.2276576578189064e-05,
"loss": 0.003,
"step": 15670
},
{
"epoch": 53.6986301369863,
"grad_norm": 0.0351148284971714,
"learning_rate": 1.2222366557537911e-05,
"loss": 0.0034,
"step": 15680
},
{
"epoch": 53.73287671232877,
"grad_norm": 0.03675288334488869,
"learning_rate": 1.2168259819494066e-05,
"loss": 0.0033,
"step": 15690
},
{
"epoch": 53.76712328767123,
"grad_norm": 0.027912992984056473,
"learning_rate": 1.2114256511983274e-05,
"loss": 0.0037,
"step": 15700
},
{
"epoch": 53.8013698630137,
"grad_norm": 0.026471871882677078,
"learning_rate": 1.2060356782648503e-05,
"loss": 0.0042,
"step": 15710
},
{
"epoch": 53.83561643835616,
"grad_norm": 0.0362430065870285,
"learning_rate": 1.2006560778849578e-05,
"loss": 0.0029,
"step": 15720
},
{
"epoch": 53.86986301369863,
"grad_norm": 0.03203180804848671,
"learning_rate": 1.1952868647662696e-05,
"loss": 0.0029,
"step": 15730
},
{
"epoch": 53.9041095890411,
"grad_norm": 0.04257775843143463,
"learning_rate": 1.1899280535880119e-05,
"loss": 0.0028,
"step": 15740
},
{
"epoch": 53.93835616438356,
"grad_norm": 0.03652791678905487,
"learning_rate": 1.1845796590009683e-05,
"loss": 0.0032,
"step": 15750
},
{
"epoch": 53.97260273972603,
"grad_norm": 0.02293156087398529,
"learning_rate": 1.1792416956274444e-05,
"loss": 0.0024,
"step": 15760
},
{
"epoch": 54.00684931506849,
"grad_norm": 0.038182858377695084,
"learning_rate": 1.1739141780612306e-05,
"loss": 0.0032,
"step": 15770
},
{
"epoch": 54.04109589041096,
"grad_norm": 0.04348289966583252,
"learning_rate": 1.1685971208675539e-05,
"loss": 0.004,
"step": 15780
},
{
"epoch": 54.07534246575342,
"grad_norm": 0.03379713371396065,
"learning_rate": 1.1632905385830484e-05,
"loss": 0.0021,
"step": 15790
},
{
"epoch": 54.10958904109589,
"grad_norm": 0.03406383469700813,
"learning_rate": 1.157994445715706e-05,
"loss": 0.0028,
"step": 15800
},
{
"epoch": 54.14383561643836,
"grad_norm": 0.047310084104537964,
"learning_rate": 1.1527088567448407e-05,
"loss": 0.0038,
"step": 15810
},
{
"epoch": 54.178082191780824,
"grad_norm": 0.04336090013384819,
"learning_rate": 1.1474337861210543e-05,
"loss": 0.003,
"step": 15820
},
{
"epoch": 54.21232876712329,
"grad_norm": 0.040994029492139816,
"learning_rate": 1.1421692482661856e-05,
"loss": 0.0037,
"step": 15830
},
{
"epoch": 54.24657534246575,
"grad_norm": 0.037672173231840134,
"learning_rate": 1.1369152575732822e-05,
"loss": 0.004,
"step": 15840
},
{
"epoch": 54.28082191780822,
"grad_norm": 0.03158077225089073,
"learning_rate": 1.1316718284065537e-05,
"loss": 0.0027,
"step": 15850
},
{
"epoch": 54.31506849315068,
"grad_norm": 0.03162799030542374,
"learning_rate": 1.1264389751013326e-05,
"loss": 0.0023,
"step": 15860
},
{
"epoch": 54.34931506849315,
"grad_norm": 0.032345883548259735,
"learning_rate": 1.1212167119640438e-05,
"loss": 0.0028,
"step": 15870
},
{
"epoch": 54.38356164383562,
"grad_norm": 0.027058375999331474,
"learning_rate": 1.1160050532721528e-05,
"loss": 0.0033,
"step": 15880
},
{
"epoch": 54.417808219178085,
"grad_norm": 0.03421015664935112,
"learning_rate": 1.1108040132741354e-05,
"loss": 0.003,
"step": 15890
},
{
"epoch": 54.45205479452055,
"grad_norm": 0.04903862252831459,
"learning_rate": 1.1056136061894384e-05,
"loss": 0.003,
"step": 15900
},
{
"epoch": 54.486301369863014,
"grad_norm": 0.033128440380096436,
"learning_rate": 1.100433846208434e-05,
"loss": 0.0027,
"step": 15910
},
{
"epoch": 54.52054794520548,
"grad_norm": 0.02966536581516266,
"learning_rate": 1.095264747492391e-05,
"loss": 0.0029,
"step": 15920
},
{
"epoch": 54.554794520547944,
"grad_norm": 0.02673506550490856,
"learning_rate": 1.090106324173426e-05,
"loss": 0.0027,
"step": 15930
},
{
"epoch": 54.58904109589041,
"grad_norm": 0.02429800108075142,
"learning_rate": 1.0849585903544706e-05,
"loss": 0.0024,
"step": 15940
},
{
"epoch": 54.62328767123287,
"grad_norm": 0.02877028100192547,
"learning_rate": 1.0798215601092354e-05,
"loss": 0.002,
"step": 15950
},
{
"epoch": 54.657534246575345,
"grad_norm": 0.020034709945321083,
"learning_rate": 1.0746952474821614e-05,
"loss": 0.0028,
"step": 15960
},
{
"epoch": 54.69178082191781,
"grad_norm": 0.03347136452794075,
"learning_rate": 1.069579666488395e-05,
"loss": 0.0037,
"step": 15970
},
{
"epoch": 54.726027397260275,
"grad_norm": 0.04249145835638046,
"learning_rate": 1.0644748311137376e-05,
"loss": 0.004,
"step": 15980
},
{
"epoch": 54.76027397260274,
"grad_norm": 0.03087807074189186,
"learning_rate": 1.059380755314613e-05,
"loss": 0.0031,
"step": 15990
},
{
"epoch": 54.794520547945204,
"grad_norm": 0.054326847195625305,
"learning_rate": 1.0542974530180327e-05,
"loss": 0.0028,
"step": 16000
},
{
"epoch": 54.82876712328767,
"grad_norm": 0.027016691863536835,
"learning_rate": 1.049224938121548e-05,
"loss": 0.0036,
"step": 16010
},
{
"epoch": 54.863013698630134,
"grad_norm": 0.025528740137815475,
"learning_rate": 1.0441632244932237e-05,
"loss": 0.0022,
"step": 16020
},
{
"epoch": 54.897260273972606,
"grad_norm": 0.023421315476298332,
"learning_rate": 1.0391123259715906e-05,
"loss": 0.0025,
"step": 16030
},
{
"epoch": 54.93150684931507,
"grad_norm": 0.03620325028896332,
"learning_rate": 1.0340722563656107e-05,
"loss": 0.0026,
"step": 16040
},
{
"epoch": 54.965753424657535,
"grad_norm": 0.036633092910051346,
"learning_rate": 1.0290430294546449e-05,
"loss": 0.0027,
"step": 16050
},
{
"epoch": 55.0,
"grad_norm": 0.04909895360469818,
"learning_rate": 1.0240246589884044e-05,
"loss": 0.0029,
"step": 16060
},
{
"epoch": 55.034246575342465,
"grad_norm": 0.02480817213654518,
"learning_rate": 1.0190171586869258e-05,
"loss": 0.0031,
"step": 16070
},
{
"epoch": 55.06849315068493,
"grad_norm": 0.03510754182934761,
"learning_rate": 1.0140205422405214e-05,
"loss": 0.0036,
"step": 16080
},
{
"epoch": 55.102739726027394,
"grad_norm": 0.02947348728775978,
"learning_rate": 1.009034823309749e-05,
"loss": 0.0026,
"step": 16090
},
{
"epoch": 55.136986301369866,
"grad_norm": 0.023402495309710503,
"learning_rate": 1.0040600155253765e-05,
"loss": 0.002,
"step": 16100
},
{
"epoch": 55.17123287671233,
"grad_norm": 0.04765753448009491,
"learning_rate": 9.990961324883358e-06,
"loss": 0.0046,
"step": 16110
},
{
"epoch": 55.205479452054796,
"grad_norm": 0.034412067383527756,
"learning_rate": 9.941431877696955e-06,
"loss": 0.0031,
"step": 16120
},
{
"epoch": 55.23972602739726,
"grad_norm": 0.0393243208527565,
"learning_rate": 9.892011949106172e-06,
"loss": 0.0033,
"step": 16130
},
{
"epoch": 55.273972602739725,
"grad_norm": 0.0417923778295517,
"learning_rate": 9.842701674223187e-06,
"loss": 0.0036,
"step": 16140
},
{
"epoch": 55.30821917808219,
"grad_norm": 0.03421523794531822,
"learning_rate": 9.793501187860432e-06,
"loss": 0.0032,
"step": 16150
},
{
"epoch": 55.342465753424655,
"grad_norm": 0.02373417280614376,
"learning_rate": 9.744410624530148e-06,
"loss": 0.002,
"step": 16160
},
{
"epoch": 55.37671232876713,
"grad_norm": 0.03909434750676155,
"learning_rate": 9.695430118444048e-06,
"loss": 0.0028,
"step": 16170
},
{
"epoch": 55.41095890410959,
"grad_norm": 0.031819798052310944,
"learning_rate": 9.646559803512994e-06,
"loss": 0.0029,
"step": 16180
},
{
"epoch": 55.445205479452056,
"grad_norm": 0.024081602692604065,
"learning_rate": 9.597799813346525e-06,
"loss": 0.0026,
"step": 16190
},
{
"epoch": 55.47945205479452,
"grad_norm": 0.02767317369580269,
"learning_rate": 9.549150281252633e-06,
"loss": 0.0042,
"step": 16200
},
{
"epoch": 55.513698630136986,
"grad_norm": 0.03075326606631279,
"learning_rate": 9.500611340237258e-06,
"loss": 0.0044,
"step": 16210
},
{
"epoch": 55.54794520547945,
"grad_norm": 0.022155923768877983,
"learning_rate": 9.452183123004e-06,
"loss": 0.0025,
"step": 16220
},
{
"epoch": 55.582191780821915,
"grad_norm": 0.027297066524624825,
"learning_rate": 9.403865761953779e-06,
"loss": 0.0033,
"step": 16230
},
{
"epoch": 55.61643835616438,
"grad_norm": 0.04104280099272728,
"learning_rate": 9.355659389184396e-06,
"loss": 0.0037,
"step": 16240
},
{
"epoch": 55.65068493150685,
"grad_norm": 0.02392885647714138,
"learning_rate": 9.307564136490254e-06,
"loss": 0.0023,
"step": 16250
},
{
"epoch": 55.68493150684932,
"grad_norm": 0.040937427431344986,
"learning_rate": 9.259580135361929e-06,
"loss": 0.003,
"step": 16260
},
{
"epoch": 55.71917808219178,
"grad_norm": 0.025108788162469864,
"learning_rate": 9.211707516985829e-06,
"loss": 0.0043,
"step": 16270
},
{
"epoch": 55.75342465753425,
"grad_norm": 0.03981754183769226,
"learning_rate": 9.163946412243896e-06,
"loss": 0.0024,
"step": 16280
},
{
"epoch": 55.78767123287671,
"grad_norm": 0.027174200862646103,
"learning_rate": 9.116296951713133e-06,
"loss": 0.0035,
"step": 16290
},
{
"epoch": 55.821917808219176,
"grad_norm": 0.03212342411279678,
"learning_rate": 9.068759265665384e-06,
"loss": 0.0048,
"step": 16300
},
{
"epoch": 55.85616438356164,
"grad_norm": 0.022607458755373955,
"learning_rate": 9.02133348406684e-06,
"loss": 0.0017,
"step": 16310
},
{
"epoch": 55.89041095890411,
"grad_norm": 0.0344407893717289,
"learning_rate": 8.974019736577777e-06,
"loss": 0.003,
"step": 16320
},
{
"epoch": 55.92465753424658,
"grad_norm": 0.03668694943189621,
"learning_rate": 8.92681815255219e-06,
"loss": 0.0025,
"step": 16330
},
{
"epoch": 55.95890410958904,
"grad_norm": 0.03393989056348801,
"learning_rate": 8.879728861037384e-06,
"loss": 0.004,
"step": 16340
},
{
"epoch": 55.99315068493151,
"grad_norm": 0.03064884804189205,
"learning_rate": 8.832751990773714e-06,
"loss": 0.0029,
"step": 16350
},
{
"epoch": 56.02739726027397,
"grad_norm": 0.024468744173645973,
"learning_rate": 8.785887670194138e-06,
"loss": 0.002,
"step": 16360
},
{
"epoch": 56.06164383561644,
"grad_norm": 0.02453785017132759,
"learning_rate": 8.739136027423894e-06,
"loss": 0.0029,
"step": 16370
},
{
"epoch": 56.0958904109589,
"grad_norm": 0.019738109782338142,
"learning_rate": 8.692497190280224e-06,
"loss": 0.0029,
"step": 16380
},
{
"epoch": 56.13013698630137,
"grad_norm": 0.02169376239180565,
"learning_rate": 8.645971286271904e-06,
"loss": 0.0036,
"step": 16390
},
{
"epoch": 56.16438356164384,
"grad_norm": 0.02179548889398575,
"learning_rate": 8.599558442598998e-06,
"loss": 0.0023,
"step": 16400
},
{
"epoch": 56.1986301369863,
"grad_norm": 0.025071581825613976,
"learning_rate": 8.55325878615244e-06,
"loss": 0.0022,
"step": 16410
},
{
"epoch": 56.23287671232877,
"grad_norm": 0.027042483910918236,
"learning_rate": 8.507072443513702e-06,
"loss": 0.0035,
"step": 16420
},
{
"epoch": 56.26712328767123,
"grad_norm": 0.02304648794233799,
"learning_rate": 8.460999540954517e-06,
"loss": 0.0024,
"step": 16430
},
{
"epoch": 56.3013698630137,
"grad_norm": 0.02609025314450264,
"learning_rate": 8.415040204436426e-06,
"loss": 0.0034,
"step": 16440
},
{
"epoch": 56.33561643835616,
"grad_norm": 0.029359575361013412,
"learning_rate": 8.369194559610482e-06,
"loss": 0.0022,
"step": 16450
},
{
"epoch": 56.36986301369863,
"grad_norm": 0.031987905502319336,
"learning_rate": 8.323462731816961e-06,
"loss": 0.0022,
"step": 16460
},
{
"epoch": 56.4041095890411,
"grad_norm": 0.04589890316128731,
"learning_rate": 8.277844846084898e-06,
"loss": 0.0036,
"step": 16470
},
{
"epoch": 56.43835616438356,
"grad_norm": 0.019137799739837646,
"learning_rate": 8.232341027131885e-06,
"loss": 0.0029,
"step": 16480
},
{
"epoch": 56.47260273972603,
"grad_norm": 0.02907939814031124,
"learning_rate": 8.186951399363613e-06,
"loss": 0.0024,
"step": 16490
},
{
"epoch": 56.50684931506849,
"grad_norm": 0.030412210151553154,
"learning_rate": 8.141676086873572e-06,
"loss": 0.0033,
"step": 16500
},
{
"epoch": 56.54109589041096,
"grad_norm": 0.03413422778248787,
"learning_rate": 8.096515213442762e-06,
"loss": 0.0026,
"step": 16510
},
{
"epoch": 56.57534246575342,
"grad_norm": 0.01989554800093174,
"learning_rate": 8.051468902539272e-06,
"loss": 0.0042,
"step": 16520
},
{
"epoch": 56.60958904109589,
"grad_norm": 0.021041251718997955,
"learning_rate": 8.00653727731801e-06,
"loss": 0.0032,
"step": 16530
},
{
"epoch": 56.64383561643836,
"grad_norm": 0.036524537950754166,
"learning_rate": 7.96172046062032e-06,
"loss": 0.0034,
"step": 16540
},
{
"epoch": 56.678082191780824,
"grad_norm": 0.03467152640223503,
"learning_rate": 7.917018574973645e-06,
"loss": 0.0038,
"step": 16550
},
{
"epoch": 56.71232876712329,
"grad_norm": 0.03052785061299801,
"learning_rate": 7.872431742591268e-06,
"loss": 0.0025,
"step": 16560
},
{
"epoch": 56.74657534246575,
"grad_norm": 0.04574688896536827,
"learning_rate": 7.827960085371855e-06,
"loss": 0.0036,
"step": 16570
},
{
"epoch": 56.78082191780822,
"grad_norm": 0.04330058395862579,
"learning_rate": 7.783603724899257e-06,
"loss": 0.0031,
"step": 16580
},
{
"epoch": 56.81506849315068,
"grad_norm": 0.024963831529021263,
"learning_rate": 7.739362782442021e-06,
"loss": 0.004,
"step": 16590
},
{
"epoch": 56.84931506849315,
"grad_norm": 0.02104303240776062,
"learning_rate": 7.695237378953223e-06,
"loss": 0.0022,
"step": 16600
},
{
"epoch": 56.88356164383562,
"grad_norm": 0.04167972132563591,
"learning_rate": 7.651227635070041e-06,
"loss": 0.0036,
"step": 16610
},
{
"epoch": 56.917808219178085,
"grad_norm": 0.03367381915450096,
"learning_rate": 7.607333671113409e-06,
"loss": 0.0029,
"step": 16620
},
{
"epoch": 56.95205479452055,
"grad_norm": 0.03414791822433472,
"learning_rate": 7.56355560708778e-06,
"loss": 0.0039,
"step": 16630
},
{
"epoch": 56.986301369863014,
"grad_norm": 0.027016418054699898,
"learning_rate": 7.519893562680663e-06,
"loss": 0.0034,
"step": 16640
},
{
"epoch": 57.02054794520548,
"grad_norm": 0.024846762418746948,
"learning_rate": 7.476347657262456e-06,
"loss": 0.0027,
"step": 16650
},
{
"epoch": 57.054794520547944,
"grad_norm": 0.018531423062086105,
"learning_rate": 7.432918009885997e-06,
"loss": 0.0025,
"step": 16660
},
{
"epoch": 57.08904109589041,
"grad_norm": 0.03298579528927803,
"learning_rate": 7.389604739286271e-06,
"loss": 0.0045,
"step": 16670
},
{
"epoch": 57.12328767123287,
"grad_norm": 0.030705546960234642,
"learning_rate": 7.3464079638801365e-06,
"loss": 0.0024,
"step": 16680
},
{
"epoch": 57.157534246575345,
"grad_norm": 0.03241143003106117,
"learning_rate": 7.30332780176588e-06,
"loss": 0.0031,
"step": 16690
},
{
"epoch": 57.19178082191781,
"grad_norm": 0.025262603536248207,
"learning_rate": 7.260364370723044e-06,
"loss": 0.0031,
"step": 16700
},
{
"epoch": 57.226027397260275,
"grad_norm": 0.028922202065587044,
"learning_rate": 7.217517788212025e-06,
"loss": 0.0026,
"step": 16710
},
{
"epoch": 57.26027397260274,
"grad_norm": 0.028081277385354042,
"learning_rate": 7.174788171373731e-06,
"loss": 0.0045,
"step": 16720
},
{
"epoch": 57.294520547945204,
"grad_norm": 0.0246548093855381,
"learning_rate": 7.132175637029293e-06,
"loss": 0.0022,
"step": 16730
},
{
"epoch": 57.32876712328767,
"grad_norm": 0.02839839644730091,
"learning_rate": 7.089680301679752e-06,
"loss": 0.0039,
"step": 16740
},
{
"epoch": 57.363013698630134,
"grad_norm": 0.016727443784475327,
"learning_rate": 7.047302281505736e-06,
"loss": 0.0023,
"step": 16750
},
{
"epoch": 57.397260273972606,
"grad_norm": 0.022300872951745987,
"learning_rate": 7.005041692367154e-06,
"loss": 0.0035,
"step": 16760
},
{
"epoch": 57.43150684931507,
"grad_norm": 0.025143135339021683,
"learning_rate": 6.962898649802823e-06,
"loss": 0.0033,
"step": 16770
},
{
"epoch": 57.465753424657535,
"grad_norm": 0.034999918192625046,
"learning_rate": 6.92087326903022e-06,
"loss": 0.0029,
"step": 16780
},
{
"epoch": 57.5,
"grad_norm": 0.04293488711118698,
"learning_rate": 6.878965664945108e-06,
"loss": 0.0028,
"step": 16790
},
{
"epoch": 57.534246575342465,
"grad_norm": 0.022644592449069023,
"learning_rate": 6.837175952121306e-06,
"loss": 0.0025,
"step": 16800
},
{
"epoch": 57.56849315068493,
"grad_norm": 0.019077636301517487,
"learning_rate": 6.795504244810285e-06,
"loss": 0.0032,
"step": 16810
},
{
"epoch": 57.602739726027394,
"grad_norm": 0.024243632331490517,
"learning_rate": 6.753950656940905e-06,
"loss": 0.003,
"step": 16820
},
{
"epoch": 57.636986301369866,
"grad_norm": 0.04237434267997742,
"learning_rate": 6.712515302119077e-06,
"loss": 0.0034,
"step": 16830
},
{
"epoch": 57.67123287671233,
"grad_norm": 0.019914429634809494,
"learning_rate": 6.671198293627479e-06,
"loss": 0.0024,
"step": 16840
},
{
"epoch": 57.705479452054796,
"grad_norm": 0.015781041234731674,
"learning_rate": 6.629999744425236e-06,
"loss": 0.0031,
"step": 16850
},
{
"epoch": 57.73972602739726,
"grad_norm": 0.018441669642925262,
"learning_rate": 6.588919767147639e-06,
"loss": 0.003,
"step": 16860
},
{
"epoch": 57.773972602739725,
"grad_norm": 0.017452578991651535,
"learning_rate": 6.5479584741057255e-06,
"loss": 0.0022,
"step": 16870
},
{
"epoch": 57.80821917808219,
"grad_norm": 0.03509654104709625,
"learning_rate": 6.5071159772861436e-06,
"loss": 0.0029,
"step": 16880
},
{
"epoch": 57.842465753424655,
"grad_norm": 0.019614599645137787,
"learning_rate": 6.466392388350695e-06,
"loss": 0.0025,
"step": 16890
},
{
"epoch": 57.87671232876713,
"grad_norm": 0.022444887086749077,
"learning_rate": 6.425787818636131e-06,
"loss": 0.003,
"step": 16900
},
{
"epoch": 57.91095890410959,
"grad_norm": 0.017676763236522675,
"learning_rate": 6.385302379153818e-06,
"loss": 0.0023,
"step": 16910
},
{
"epoch": 57.945205479452056,
"grad_norm": 0.017485516145825386,
"learning_rate": 6.344936180589351e-06,
"loss": 0.0028,
"step": 16920
},
{
"epoch": 57.97945205479452,
"grad_norm": 0.019938629120588303,
"learning_rate": 6.304689333302416e-06,
"loss": 0.0025,
"step": 16930
},
{
"epoch": 58.013698630136986,
"grad_norm": 0.03070191666483879,
"learning_rate": 6.264561947326331e-06,
"loss": 0.0027,
"step": 16940
},
{
"epoch": 58.04794520547945,
"grad_norm": 0.021726680919528008,
"learning_rate": 6.22455413236786e-06,
"loss": 0.0034,
"step": 16950
},
{
"epoch": 58.082191780821915,
"grad_norm": 0.03220411017537117,
"learning_rate": 6.184665997806832e-06,
"loss": 0.0021,
"step": 16960
},
{
"epoch": 58.11643835616438,
"grad_norm": 0.032874926924705505,
"learning_rate": 6.144897652695864e-06,
"loss": 0.0034,
"step": 16970
},
{
"epoch": 58.15068493150685,
"grad_norm": 0.022236965596675873,
"learning_rate": 6.1052492057601275e-06,
"loss": 0.0026,
"step": 16980
},
{
"epoch": 58.18493150684932,
"grad_norm": 0.05407319590449333,
"learning_rate": 6.0657207653969315e-06,
"loss": 0.0037,
"step": 16990
},
{
"epoch": 58.21917808219178,
"grad_norm": 0.04553509131073952,
"learning_rate": 6.026312439675552e-06,
"loss": 0.0033,
"step": 17000
},
{
"epoch": 58.25342465753425,
"grad_norm": 0.028858445584774017,
"learning_rate": 5.9870243363368275e-06,
"loss": 0.0023,
"step": 17010
},
{
"epoch": 58.28767123287671,
"grad_norm": 0.02405349723994732,
"learning_rate": 5.947856562792925e-06,
"loss": 0.0037,
"step": 17020
},
{
"epoch": 58.321917808219176,
"grad_norm": 0.030620204284787178,
"learning_rate": 5.908809226127054e-06,
"loss": 0.0034,
"step": 17030
},
{
"epoch": 58.35616438356164,
"grad_norm": 0.03459925949573517,
"learning_rate": 5.869882433093155e-06,
"loss": 0.003,
"step": 17040
},
{
"epoch": 58.39041095890411,
"grad_norm": 0.03197428211569786,
"learning_rate": 5.831076290115573e-06,
"loss": 0.0027,
"step": 17050
},
{
"epoch": 58.42465753424658,
"grad_norm": 0.022777985781431198,
"learning_rate": 5.79239090328883e-06,
"loss": 0.0038,
"step": 17060
},
{
"epoch": 58.45890410958904,
"grad_norm": 0.025515226647257805,
"learning_rate": 5.753826378377286e-06,
"loss": 0.0026,
"step": 17070
},
{
"epoch": 58.49315068493151,
"grad_norm": 0.01934850960969925,
"learning_rate": 5.715382820814885e-06,
"loss": 0.0026,
"step": 17080
},
{
"epoch": 58.52739726027397,
"grad_norm": 0.02293264865875244,
"learning_rate": 5.67706033570487e-06,
"loss": 0.003,
"step": 17090
},
{
"epoch": 58.56164383561644,
"grad_norm": 0.02049412578344345,
"learning_rate": 5.6388590278194096e-06,
"loss": 0.002,
"step": 17100
},
{
"epoch": 58.5958904109589,
"grad_norm": 0.015052354894578457,
"learning_rate": 5.600779001599455e-06,
"loss": 0.0021,
"step": 17110
},
{
"epoch": 58.63013698630137,
"grad_norm": 0.04019205644726753,
"learning_rate": 5.562820361154314e-06,
"loss": 0.0032,
"step": 17120
},
{
"epoch": 58.66438356164384,
"grad_norm": 0.03583867847919464,
"learning_rate": 5.524983210261481e-06,
"loss": 0.0034,
"step": 17130
},
{
"epoch": 58.6986301369863,
"grad_norm": 0.028554566204547882,
"learning_rate": 5.48726765236629e-06,
"loss": 0.0034,
"step": 17140
},
{
"epoch": 58.73287671232877,
"grad_norm": 0.017591096460819244,
"learning_rate": 5.449673790581611e-06,
"loss": 0.0025,
"step": 17150
},
{
"epoch": 58.76712328767123,
"grad_norm": 0.03151347115635872,
"learning_rate": 5.412201727687644e-06,
"loss": 0.0028,
"step": 17160
},
{
"epoch": 58.8013698630137,
"grad_norm": 0.019181104376912117,
"learning_rate": 5.374851566131561e-06,
"loss": 0.0023,
"step": 17170
},
{
"epoch": 58.83561643835616,
"grad_norm": 0.03802407905459404,
"learning_rate": 5.337623408027293e-06,
"loss": 0.0031,
"step": 17180
},
{
"epoch": 58.86986301369863,
"grad_norm": 0.03747876361012459,
"learning_rate": 5.300517355155215e-06,
"loss": 0.0025,
"step": 17190
},
{
"epoch": 58.9041095890411,
"grad_norm": 0.0215825904160738,
"learning_rate": 5.263533508961827e-06,
"loss": 0.0039,
"step": 17200
},
{
"epoch": 58.93835616438356,
"grad_norm": 0.016658857464790344,
"learning_rate": 5.226671970559577e-06,
"loss": 0.0021,
"step": 17210
},
{
"epoch": 58.97260273972603,
"grad_norm": 0.018328074365854263,
"learning_rate": 5.1899328407264855e-06,
"loss": 0.0023,
"step": 17220
},
{
"epoch": 59.00684931506849,
"grad_norm": 0.025236770510673523,
"learning_rate": 5.153316219905946e-06,
"loss": 0.0028,
"step": 17230
},
{
"epoch": 59.04109589041096,
"grad_norm": 0.016753623262047768,
"learning_rate": 5.116822208206396e-06,
"loss": 0.0025,
"step": 17240
},
{
"epoch": 59.07534246575342,
"grad_norm": 0.020218942314386368,
"learning_rate": 5.080450905401057e-06,
"loss": 0.0025,
"step": 17250
},
{
"epoch": 59.10958904109589,
"grad_norm": 0.03711342811584473,
"learning_rate": 5.044202410927706e-06,
"loss": 0.0034,
"step": 17260
},
{
"epoch": 59.14383561643836,
"grad_norm": 0.039715610444545746,
"learning_rate": 5.008076823888319e-06,
"loss": 0.003,
"step": 17270
},
{
"epoch": 59.178082191780824,
"grad_norm": 0.03945466876029968,
"learning_rate": 4.972074243048897e-06,
"loss": 0.0026,
"step": 17280
},
{
"epoch": 59.21232876712329,
"grad_norm": 0.017290910705924034,
"learning_rate": 4.936194766839103e-06,
"loss": 0.0037,
"step": 17290
},
{
"epoch": 59.24657534246575,
"grad_norm": 0.018677933141589165,
"learning_rate": 4.900438493352055e-06,
"loss": 0.0023,
"step": 17300
},
{
"epoch": 59.28082191780822,
"grad_norm": 0.023664385080337524,
"learning_rate": 4.864805520344051e-06,
"loss": 0.0033,
"step": 17310
},
{
"epoch": 59.31506849315068,
"grad_norm": 0.02739626169204712,
"learning_rate": 4.829295945234258e-06,
"loss": 0.0041,
"step": 17320
},
{
"epoch": 59.34931506849315,
"grad_norm": 0.023249467834830284,
"learning_rate": 4.7939098651045235e-06,
"loss": 0.0036,
"step": 17330
},
{
"epoch": 59.38356164383562,
"grad_norm": 0.02616041526198387,
"learning_rate": 4.758647376699032e-06,
"loss": 0.0028,
"step": 17340
},
{
"epoch": 59.417808219178085,
"grad_norm": 0.04718932509422302,
"learning_rate": 4.723508576424062e-06,
"loss": 0.0029,
"step": 17350
},
{
"epoch": 59.45205479452055,
"grad_norm": 0.02695685438811779,
"learning_rate": 4.688493560347773e-06,
"loss": 0.003,
"step": 17360
},
{
"epoch": 59.486301369863014,
"grad_norm": 0.02369818091392517,
"learning_rate": 4.653602424199876e-06,
"loss": 0.0031,
"step": 17370
},
{
"epoch": 59.52054794520548,
"grad_norm": 0.028814973309636116,
"learning_rate": 4.618835263371396e-06,
"loss": 0.0028,
"step": 17380
},
{
"epoch": 59.554794520547944,
"grad_norm": 0.02037746086716652,
"learning_rate": 4.5841921729144424e-06,
"loss": 0.0023,
"step": 17390
},
{
"epoch": 59.58904109589041,
"grad_norm": 0.020026013255119324,
"learning_rate": 4.549673247541875e-06,
"loss": 0.0023,
"step": 17400
},
{
"epoch": 59.62328767123287,
"grad_norm": 0.023346390575170517,
"learning_rate": 4.515278581627141e-06,
"loss": 0.0027,
"step": 17410
},
{
"epoch": 59.657534246575345,
"grad_norm": 0.02217704802751541,
"learning_rate": 4.48100826920394e-06,
"loss": 0.0027,
"step": 17420
},
{
"epoch": 59.69178082191781,
"grad_norm": 0.012821310199797153,
"learning_rate": 4.446862403965984e-06,
"loss": 0.002,
"step": 17430
},
{
"epoch": 59.726027397260275,
"grad_norm": 0.021954253315925598,
"learning_rate": 4.412841079266777e-06,
"loss": 0.0034,
"step": 17440
},
{
"epoch": 59.76027397260274,
"grad_norm": 0.02877684310078621,
"learning_rate": 4.378944388119311e-06,
"loss": 0.0027,
"step": 17450
},
{
"epoch": 59.794520547945204,
"grad_norm": 0.0313013419508934,
"learning_rate": 4.3451724231958644e-06,
"loss": 0.0022,
"step": 17460
},
{
"epoch": 59.82876712328767,
"grad_norm": 0.03267759829759598,
"learning_rate": 4.311525276827682e-06,
"loss": 0.0033,
"step": 17470
},
{
"epoch": 59.863013698630134,
"grad_norm": 0.026436539366841316,
"learning_rate": 4.27800304100478e-06,
"loss": 0.0025,
"step": 17480
},
{
"epoch": 59.897260273972606,
"grad_norm": 0.03464627265930176,
"learning_rate": 4.244605807375679e-06,
"loss": 0.003,
"step": 17490
},
{
"epoch": 59.93150684931507,
"grad_norm": 0.019924577325582504,
"learning_rate": 4.2113336672471245e-06,
"loss": 0.0028,
"step": 17500
},
{
"epoch": 59.965753424657535,
"grad_norm": 0.02339211106300354,
"learning_rate": 4.178186711583904e-06,
"loss": 0.0046,
"step": 17510
},
{
"epoch": 60.0,
"grad_norm": 0.02530355006456375,
"learning_rate": 4.145165031008508e-06,
"loss": 0.0031,
"step": 17520
},
{
"epoch": 60.034246575342465,
"grad_norm": 0.012683791108429432,
"learning_rate": 4.112268715800943e-06,
"loss": 0.0021,
"step": 17530
},
{
"epoch": 60.06849315068493,
"grad_norm": 0.03863707557320595,
"learning_rate": 4.079497855898501e-06,
"loss": 0.0044,
"step": 17540
},
{
"epoch": 60.102739726027394,
"grad_norm": 0.027493145316839218,
"learning_rate": 4.046852540895446e-06,
"loss": 0.0031,
"step": 17550
},
{
"epoch": 60.136986301369866,
"grad_norm": 0.041166216135025024,
"learning_rate": 4.01433286004283e-06,
"loss": 0.0042,
"step": 17560
},
{
"epoch": 60.17123287671233,
"grad_norm": 0.021608727052807808,
"learning_rate": 3.981938902248222e-06,
"loss": 0.0024,
"step": 17570
},
{
"epoch": 60.205479452054796,
"grad_norm": 0.026905011385679245,
"learning_rate": 3.949670756075447e-06,
"loss": 0.0023,
"step": 17580
},
{
"epoch": 60.23972602739726,
"grad_norm": 0.02121490240097046,
"learning_rate": 3.917528509744412e-06,
"loss": 0.0032,
"step": 17590
},
{
"epoch": 60.273972602739725,
"grad_norm": 0.04649144038558006,
"learning_rate": 3.885512251130763e-06,
"loss": 0.0049,
"step": 17600
},
{
"epoch": 60.30821917808219,
"grad_norm": 0.03161786124110222,
"learning_rate": 3.8536220677657495e-06,
"loss": 0.0025,
"step": 17610
},
{
"epoch": 60.342465753424655,
"grad_norm": 0.02871571108698845,
"learning_rate": 3.821858046835913e-06,
"loss": 0.0029,
"step": 17620
},
{
"epoch": 60.37671232876713,
"grad_norm": 0.01877393200993538,
"learning_rate": 3.790220275182854e-06,
"loss": 0.0031,
"step": 17630
},
{
"epoch": 60.41095890410959,
"grad_norm": 0.028997721150517464,
"learning_rate": 3.75870883930306e-06,
"loss": 0.0021,
"step": 17640
},
{
"epoch": 60.445205479452056,
"grad_norm": 0.019930049777030945,
"learning_rate": 3.7273238253475785e-06,
"loss": 0.0032,
"step": 17650
},
{
"epoch": 60.47945205479452,
"grad_norm": 0.025048483163118362,
"learning_rate": 3.696065319121833e-06,
"loss": 0.0029,
"step": 17660
},
{
"epoch": 60.513698630136986,
"grad_norm": 0.028792202472686768,
"learning_rate": 3.664933406085402e-06,
"loss": 0.0039,
"step": 17670
},
{
"epoch": 60.54794520547945,
"grad_norm": 0.020279956981539726,
"learning_rate": 3.6339281713517303e-06,
"loss": 0.0029,
"step": 17680
},
{
"epoch": 60.582191780821915,
"grad_norm": 0.02139876037836075,
"learning_rate": 3.60304969968796e-06,
"loss": 0.0035,
"step": 17690
},
{
"epoch": 60.61643835616438,
"grad_norm": 0.02807740494608879,
"learning_rate": 3.5722980755146517e-06,
"loss": 0.0034,
"step": 17700
},
{
"epoch": 60.65068493150685,
"grad_norm": 0.03502603620290756,
"learning_rate": 3.541673382905558e-06,
"loss": 0.0032,
"step": 17710
},
{
"epoch": 60.68493150684932,
"grad_norm": 0.032354071736335754,
"learning_rate": 3.511175705587433e-06,
"loss": 0.004,
"step": 17720
},
{
"epoch": 60.71917808219178,
"grad_norm": 0.019513679668307304,
"learning_rate": 3.4808051269397512e-06,
"loss": 0.0035,
"step": 17730
},
{
"epoch": 60.75342465753425,
"grad_norm": 0.03239310160279274,
"learning_rate": 3.4505617299945336e-06,
"loss": 0.0034,
"step": 17740
},
{
"epoch": 60.78767123287671,
"grad_norm": 0.019614113494753838,
"learning_rate": 3.420445597436056e-06,
"loss": 0.0026,
"step": 17750
},
{
"epoch": 60.821917808219176,
"grad_norm": 0.016276845708489418,
"learning_rate": 3.390456811600673e-06,
"loss": 0.004,
"step": 17760
},
{
"epoch": 60.85616438356164,
"grad_norm": 0.032702166587114334,
"learning_rate": 3.360595454476595e-06,
"loss": 0.0032,
"step": 17770
},
{
"epoch": 60.89041095890411,
"grad_norm": 0.020163316279649734,
"learning_rate": 3.3308616077036115e-06,
"loss": 0.0026,
"step": 17780
},
{
"epoch": 60.92465753424658,
"grad_norm": 0.02356639876961708,
"learning_rate": 3.301255352572946e-06,
"loss": 0.0026,
"step": 17790
},
{
"epoch": 60.95890410958904,
"grad_norm": 0.0164373517036438,
"learning_rate": 3.271776770026963e-06,
"loss": 0.0028,
"step": 17800
},
{
"epoch": 60.99315068493151,
"grad_norm": 0.01919223740696907,
"learning_rate": 3.2424259406589664e-06,
"loss": 0.0035,
"step": 17810
},
{
"epoch": 61.02739726027397,
"grad_norm": 0.059007029980421066,
"learning_rate": 3.213202944713023e-06,
"loss": 0.0046,
"step": 17820
},
{
"epoch": 61.06164383561644,
"grad_norm": 0.03275210037827492,
"learning_rate": 3.1841078620836683e-06,
"loss": 0.0026,
"step": 17830
},
{
"epoch": 61.0958904109589,
"grad_norm": 0.013377784751355648,
"learning_rate": 3.155140772315773e-06,
"loss": 0.0028,
"step": 17840
},
{
"epoch": 61.13013698630137,
"grad_norm": 0.016845788806676865,
"learning_rate": 3.126301754604233e-06,
"loss": 0.004,
"step": 17850
},
{
"epoch": 61.16438356164384,
"grad_norm": 0.02182050235569477,
"learning_rate": 3.0975908877938277e-06,
"loss": 0.0039,
"step": 17860
},
{
"epoch": 61.1986301369863,
"grad_norm": 0.02985861524939537,
"learning_rate": 3.0690082503789742e-06,
"loss": 0.003,
"step": 17870
},
{
"epoch": 61.23287671232877,
"grad_norm": 0.014906318858265877,
"learning_rate": 3.040553920503503e-06,
"loss": 0.0024,
"step": 17880
},
{
"epoch": 61.26712328767123,
"grad_norm": 0.013918554410338402,
"learning_rate": 3.0122279759604745e-06,
"loss": 0.0026,
"step": 17890
},
{
"epoch": 61.3013698630137,
"grad_norm": 0.04059552773833275,
"learning_rate": 2.9840304941919415e-06,
"loss": 0.0029,
"step": 17900
},
{
"epoch": 61.33561643835616,
"grad_norm": 0.01856929622590542,
"learning_rate": 2.9559615522887273e-06,
"loss": 0.0032,
"step": 17910
},
{
"epoch": 61.36986301369863,
"grad_norm": 0.021468475461006165,
"learning_rate": 2.928021226990263e-06,
"loss": 0.0027,
"step": 17920
},
{
"epoch": 61.4041095890411,
"grad_norm": 0.031402189284563065,
"learning_rate": 2.9002095946843277e-06,
"loss": 0.0022,
"step": 17930
},
{
"epoch": 61.43835616438356,
"grad_norm": 0.018215125426650047,
"learning_rate": 2.8725267314068495e-06,
"loss": 0.0033,
"step": 17940
},
{
"epoch": 61.47260273972603,
"grad_norm": 0.019874971359968185,
"learning_rate": 2.844972712841737e-06,
"loss": 0.0022,
"step": 17950
},
{
"epoch": 61.50684931506849,
"grad_norm": 0.017870550975203514,
"learning_rate": 2.817547614320615e-06,
"loss": 0.003,
"step": 17960
},
{
"epoch": 61.54109589041096,
"grad_norm": 0.011960655450820923,
"learning_rate": 2.790251510822661e-06,
"loss": 0.0025,
"step": 17970
},
{
"epoch": 61.57534246575342,
"grad_norm": 0.011642039753496647,
"learning_rate": 2.7630844769743757e-06,
"loss": 0.0023,
"step": 17980
},
{
"epoch": 61.60958904109589,
"grad_norm": 0.03464807942509651,
"learning_rate": 2.73604658704939e-06,
"loss": 0.0031,
"step": 17990
},
{
"epoch": 61.64383561643836,
"grad_norm": 0.017682120203971863,
"learning_rate": 2.7091379149682685e-06,
"loss": 0.0036,
"step": 18000
},
{
"epoch": 61.678082191780824,
"grad_norm": 0.019320419058203697,
"learning_rate": 2.682358534298285e-06,
"loss": 0.0027,
"step": 18010
},
{
"epoch": 61.71232876712329,
"grad_norm": 0.01240335963666439,
"learning_rate": 2.6557085182532582e-06,
"loss": 0.0044,
"step": 18020
},
{
"epoch": 61.74657534246575,
"grad_norm": 0.01913302391767502,
"learning_rate": 2.6291879396933004e-06,
"loss": 0.0031,
"step": 18030
},
{
"epoch": 61.78082191780822,
"grad_norm": 0.025125738233327866,
"learning_rate": 2.602796871124663e-06,
"loss": 0.0032,
"step": 18040
},
{
"epoch": 61.81506849315068,
"grad_norm": 0.03669289872050285,
"learning_rate": 2.57653538469953e-06,
"loss": 0.0035,
"step": 18050
},
{
"epoch": 61.84931506849315,
"grad_norm": 0.012292813509702682,
"learning_rate": 2.5504035522157854e-06,
"loss": 0.003,
"step": 18060
},
{
"epoch": 61.88356164383562,
"grad_norm": 0.02633073925971985,
"learning_rate": 2.5244014451168863e-06,
"loss": 0.0022,
"step": 18070
},
{
"epoch": 61.917808219178085,
"grad_norm": 0.014214101247489452,
"learning_rate": 2.4985291344915674e-06,
"loss": 0.0024,
"step": 18080
},
{
"epoch": 61.95205479452055,
"grad_norm": 0.016861189156770706,
"learning_rate": 2.4727866910737583e-06,
"loss": 0.003,
"step": 18090
},
{
"epoch": 61.986301369863014,
"grad_norm": 0.02316778339445591,
"learning_rate": 2.4471741852423237e-06,
"loss": 0.0023,
"step": 18100
},
{
"epoch": 62.02054794520548,
"grad_norm": 0.013381525874137878,
"learning_rate": 2.421691687020855e-06,
"loss": 0.002,
"step": 18110
},
{
"epoch": 62.054794520547944,
"grad_norm": 0.020613618195056915,
"learning_rate": 2.3963392660775575e-06,
"loss": 0.0035,
"step": 18120
},
{
"epoch": 62.08904109589041,
"grad_norm": 0.032079510390758514,
"learning_rate": 2.371116991724953e-06,
"loss": 0.0028,
"step": 18130
},
{
"epoch": 62.12328767123287,
"grad_norm": 0.02837025187909603,
"learning_rate": 2.3460249329197824e-06,
"loss": 0.003,
"step": 18140
},
{
"epoch": 62.157534246575345,
"grad_norm": 0.02057802490890026,
"learning_rate": 2.321063158262793e-06,
"loss": 0.0027,
"step": 18150
},
{
"epoch": 62.19178082191781,
"grad_norm": 0.02426522970199585,
"learning_rate": 2.296231735998511e-06,
"loss": 0.0023,
"step": 18160
},
{
"epoch": 62.226027397260275,
"grad_norm": 0.041218966245651245,
"learning_rate": 2.271530734015104e-06,
"loss": 0.0037,
"step": 18170
},
{
"epoch": 62.26027397260274,
"grad_norm": 0.023262323811650276,
"learning_rate": 2.2469602198441573e-06,
"loss": 0.0022,
"step": 18180
},
{
"epoch": 62.294520547945204,
"grad_norm": 0.022797662764787674,
"learning_rate": 2.222520260660521e-06,
"loss": 0.0032,
"step": 18190
},
{
"epoch": 62.32876712328767,
"grad_norm": 0.018522756174206734,
"learning_rate": 2.1982109232821178e-06,
"loss": 0.0031,
"step": 18200
},
{
"epoch": 62.363013698630134,
"grad_norm": 0.028718652203679085,
"learning_rate": 2.174032274169746e-06,
"loss": 0.0024,
"step": 18210
},
{
"epoch": 62.397260273972606,
"grad_norm": 0.031062902882695198,
"learning_rate": 2.149984379426906e-06,
"loss": 0.003,
"step": 18220
},
{
"epoch": 62.43150684931507,
"grad_norm": 0.01933165453374386,
"learning_rate": 2.1260673047996227e-06,
"loss": 0.0024,
"step": 18230
},
{
"epoch": 62.465753424657535,
"grad_norm": 0.034511059522628784,
"learning_rate": 2.102281115676258e-06,
"loss": 0.0025,
"step": 18240
},
{
"epoch": 62.5,
"grad_norm": 0.029437880963087082,
"learning_rate": 2.0786258770873647e-06,
"loss": 0.0024,
"step": 18250
},
{
"epoch": 62.534246575342465,
"grad_norm": 0.008569435216486454,
"learning_rate": 2.0551016537054493e-06,
"loss": 0.0016,
"step": 18260
},
{
"epoch": 62.56849315068493,
"grad_norm": 0.025799725204706192,
"learning_rate": 2.0317085098448372e-06,
"loss": 0.0025,
"step": 18270
},
{
"epoch": 62.602739726027394,
"grad_norm": 0.011235746555030346,
"learning_rate": 2.008446509461498e-06,
"loss": 0.002,
"step": 18280
},
{
"epoch": 62.636986301369866,
"grad_norm": 0.022236861288547516,
"learning_rate": 1.985315716152847e-06,
"loss": 0.0034,
"step": 18290
},
{
"epoch": 62.67123287671233,
"grad_norm": 0.022197648882865906,
"learning_rate": 1.962316193157593e-06,
"loss": 0.0038,
"step": 18300
},
{
"epoch": 62.705479452054796,
"grad_norm": 0.01615208201110363,
"learning_rate": 1.939448003355554e-06,
"loss": 0.0035,
"step": 18310
},
{
"epoch": 62.73972602739726,
"grad_norm": 0.035238660871982574,
"learning_rate": 1.91671120926748e-06,
"loss": 0.0022,
"step": 18320
},
{
"epoch": 62.773972602739725,
"grad_norm": 0.024143965914845467,
"learning_rate": 1.8941058730549132e-06,
"loss": 0.0027,
"step": 18330
},
{
"epoch": 62.80821917808219,
"grad_norm": 0.016463087871670723,
"learning_rate": 1.8716320565199618e-06,
"loss": 0.0025,
"step": 18340
},
{
"epoch": 62.842465753424655,
"grad_norm": 0.020290328189730644,
"learning_rate": 1.849289821105199e-06,
"loss": 0.0033,
"step": 18350
},
{
"epoch": 62.87671232876713,
"grad_norm": 0.04143396392464638,
"learning_rate": 1.8270792278934302e-06,
"loss": 0.0029,
"step": 18360
},
{
"epoch": 62.91095890410959,
"grad_norm": 0.016841132193803787,
"learning_rate": 1.8050003376075707e-06,
"loss": 0.0022,
"step": 18370
},
{
"epoch": 62.945205479452056,
"grad_norm": 0.02761237323284149,
"learning_rate": 1.7830532106104747e-06,
"loss": 0.0039,
"step": 18380
},
{
"epoch": 62.97945205479452,
"grad_norm": 0.01891123317182064,
"learning_rate": 1.7612379069047335e-06,
"loss": 0.0019,
"step": 18390
},
{
"epoch": 63.013698630136986,
"grad_norm": 0.03597179055213928,
"learning_rate": 1.7395544861325718e-06,
"loss": 0.0032,
"step": 18400
},
{
"epoch": 63.04794520547945,
"grad_norm": 0.03108893521130085,
"learning_rate": 1.7180030075756136e-06,
"loss": 0.0027,
"step": 18410
},
{
"epoch": 63.082191780821915,
"grad_norm": 0.021627260372042656,
"learning_rate": 1.696583530154794e-06,
"loss": 0.0032,
"step": 18420
},
{
"epoch": 63.11643835616438,
"grad_norm": 0.016512632369995117,
"learning_rate": 1.6752961124301415e-06,
"loss": 0.0033,
"step": 18430
},
{
"epoch": 63.15068493150685,
"grad_norm": 0.016389215365052223,
"learning_rate": 1.6541408126006463e-06,
"loss": 0.0027,
"step": 18440
},
{
"epoch": 63.18493150684932,
"grad_norm": 0.02537810057401657,
"learning_rate": 1.6331176885040878e-06,
"loss": 0.0021,
"step": 18450
},
{
"epoch": 63.21917808219178,
"grad_norm": 0.014781366102397442,
"learning_rate": 1.6122267976168781e-06,
"loss": 0.0038,
"step": 18460
},
{
"epoch": 63.25342465753425,
"grad_norm": 0.024314258247613907,
"learning_rate": 1.5914681970539192e-06,
"loss": 0.003,
"step": 18470
},
{
"epoch": 63.28767123287671,
"grad_norm": 0.0542307011783123,
"learning_rate": 1.5708419435684462e-06,
"loss": 0.0055,
"step": 18480
},
{
"epoch": 63.321917808219176,
"grad_norm": 0.01515932660549879,
"learning_rate": 1.550348093551829e-06,
"loss": 0.0029,
"step": 18490
},
{
"epoch": 63.35616438356164,
"grad_norm": 0.01770406775176525,
"learning_rate": 1.5299867030334814e-06,
"loss": 0.003,
"step": 18500
},
{
"epoch": 63.39041095890411,
"grad_norm": 0.016681650653481483,
"learning_rate": 1.5097578276806633e-06,
"loss": 0.0025,
"step": 18510
},
{
"epoch": 63.42465753424658,
"grad_norm": 0.012945062480866909,
"learning_rate": 1.4896615227983468e-06,
"loss": 0.0037,
"step": 18520
},
{
"epoch": 63.45890410958904,
"grad_norm": 0.027772676199674606,
"learning_rate": 1.4696978433290653e-06,
"loss": 0.0037,
"step": 18530
},
{
"epoch": 63.49315068493151,
"grad_norm": 0.01776418834924698,
"learning_rate": 1.4498668438527597e-06,
"loss": 0.0024,
"step": 18540
},
{
"epoch": 63.52739726027397,
"grad_norm": 0.012397520244121552,
"learning_rate": 1.4301685785866214e-06,
"loss": 0.0032,
"step": 18550
},
{
"epoch": 63.56164383561644,
"grad_norm": 0.017262982204556465,
"learning_rate": 1.4106031013849496e-06,
"loss": 0.002,
"step": 18560
},
{
"epoch": 63.5958904109589,
"grad_norm": 0.010316437110304832,
"learning_rate": 1.3911704657390113e-06,
"loss": 0.002,
"step": 18570
},
{
"epoch": 63.63013698630137,
"grad_norm": 0.020639773458242416,
"learning_rate": 1.3718707247769135e-06,
"loss": 0.0026,
"step": 18580
},
{
"epoch": 63.66438356164384,
"grad_norm": 0.023104477673768997,
"learning_rate": 1.3527039312633827e-06,
"loss": 0.0036,
"step": 18590
},
{
"epoch": 63.6986301369863,
"grad_norm": 0.02620391547679901,
"learning_rate": 1.333670137599713e-06,
"loss": 0.0044,
"step": 18600
},
{
"epoch": 63.73287671232877,
"grad_norm": 0.013353808782994747,
"learning_rate": 1.3147693958235618e-06,
"loss": 0.0038,
"step": 18610
},
{
"epoch": 63.76712328767123,
"grad_norm": 0.027655024081468582,
"learning_rate": 1.2960017576088446e-06,
"loss": 0.0032,
"step": 18620
},
{
"epoch": 63.8013698630137,
"grad_norm": 0.014509730041027069,
"learning_rate": 1.2773672742655784e-06,
"loss": 0.0027,
"step": 18630
},
{
"epoch": 63.83561643835616,
"grad_norm": 0.042286377400159836,
"learning_rate": 1.2588659967397e-06,
"loss": 0.0031,
"step": 18640
},
{
"epoch": 63.86986301369863,
"grad_norm": 0.025881841778755188,
"learning_rate": 1.2404979756130142e-06,
"loss": 0.0025,
"step": 18650
},
{
"epoch": 63.9041095890411,
"grad_norm": 0.03276398405432701,
"learning_rate": 1.222263261102985e-06,
"loss": 0.0037,
"step": 18660
},
{
"epoch": 63.93835616438356,
"grad_norm": 0.03172963857650757,
"learning_rate": 1.2041619030626284e-06,
"loss": 0.0036,
"step": 18670
},
{
"epoch": 63.97260273972603,
"grad_norm": 0.02655917964875698,
"learning_rate": 1.1861939509803687e-06,
"loss": 0.0034,
"step": 18680
},
{
"epoch": 64.0068493150685,
"grad_norm": 0.033081576228141785,
"learning_rate": 1.1683594539798893e-06,
"loss": 0.0025,
"step": 18690
},
{
"epoch": 64.04109589041096,
"grad_norm": 0.017231125384569168,
"learning_rate": 1.1506584608200367e-06,
"loss": 0.0033,
"step": 18700
},
{
"epoch": 64.07534246575342,
"grad_norm": 0.027252597734332085,
"learning_rate": 1.1330910198946442e-06,
"loss": 0.0034,
"step": 18710
},
{
"epoch": 64.10958904109589,
"grad_norm": 0.009583823382854462,
"learning_rate": 1.1156571792324211e-06,
"loss": 0.0033,
"step": 18720
},
{
"epoch": 64.14383561643835,
"grad_norm": 0.014770534820854664,
"learning_rate": 1.0983569864968346e-06,
"loss": 0.0032,
"step": 18730
},
{
"epoch": 64.17808219178082,
"grad_norm": 0.017628680914640427,
"learning_rate": 1.0811904889859336e-06,
"loss": 0.0033,
"step": 18740
},
{
"epoch": 64.21232876712328,
"grad_norm": 0.02731098234653473,
"learning_rate": 1.064157733632276e-06,
"loss": 0.0034,
"step": 18750
},
{
"epoch": 64.24657534246575,
"grad_norm": 0.029084838926792145,
"learning_rate": 1.0472587670027678e-06,
"loss": 0.0036,
"step": 18760
},
{
"epoch": 64.28082191780823,
"grad_norm": 0.015242715366184711,
"learning_rate": 1.030493635298535e-06,
"loss": 0.0032,
"step": 18770
},
{
"epoch": 64.31506849315069,
"grad_norm": 0.010613277554512024,
"learning_rate": 1.0138623843548078e-06,
"loss": 0.0029,
"step": 18780
},
{
"epoch": 64.34931506849315,
"grad_norm": 0.031174929812550545,
"learning_rate": 9.97365059640787e-07,
"loss": 0.0034,
"step": 18790
},
{
"epoch": 64.38356164383562,
"grad_norm": 0.011623591184616089,
"learning_rate": 9.810017062595322e-07,
"loss": 0.002,
"step": 18800
},
{
"epoch": 64.41780821917808,
"grad_norm": 0.017375310882925987,
"learning_rate": 9.647723689478305e-07,
"loss": 0.0022,
"step": 18810
},
{
"epoch": 64.45205479452055,
"grad_norm": 0.014030944555997849,
"learning_rate": 9.486770920760668e-07,
"loss": 0.0021,
"step": 18820
},
{
"epoch": 64.48630136986301,
"grad_norm": 0.025326306000351906,
"learning_rate": 9.327159196481138e-07,
"loss": 0.0037,
"step": 18830
},
{
"epoch": 64.52054794520548,
"grad_norm": 0.03436637669801712,
"learning_rate": 9.168888953011989e-07,
"loss": 0.0042,
"step": 18840
},
{
"epoch": 64.55479452054794,
"grad_norm": 0.01274149864912033,
"learning_rate": 9.011960623058202e-07,
"loss": 0.0029,
"step": 18850
},
{
"epoch": 64.58904109589041,
"grad_norm": 0.012670198455452919,
"learning_rate": 8.856374635655695e-07,
"loss": 0.0025,
"step": 18860
},
{
"epoch": 64.62328767123287,
"grad_norm": 0.021424753591418266,
"learning_rate": 8.702131416170656e-07,
"loss": 0.0025,
"step": 18870
},
{
"epoch": 64.65753424657534,
"grad_norm": 0.015697501599788666,
"learning_rate": 8.549231386298151e-07,
"loss": 0.0026,
"step": 18880
},
{
"epoch": 64.6917808219178,
"grad_norm": 0.028277793899178505,
"learning_rate": 8.397674964061075e-07,
"loss": 0.0032,
"step": 18890
},
{
"epoch": 64.72602739726027,
"grad_norm": 0.02876514196395874,
"learning_rate": 8.247462563808817e-07,
"loss": 0.0021,
"step": 18900
},
{
"epoch": 64.76027397260275,
"grad_norm": 0.0304822139441967,
"learning_rate": 8.098594596216424e-07,
"loss": 0.003,
"step": 18910
},
{
"epoch": 64.79452054794521,
"grad_norm": 0.029384471476078033,
"learning_rate": 7.951071468283167e-07,
"loss": 0.0023,
"step": 18920
},
{
"epoch": 64.82876712328768,
"grad_norm": 0.01294246967881918,
"learning_rate": 7.804893583331696e-07,
"loss": 0.0028,
"step": 18930
},
{
"epoch": 64.86301369863014,
"grad_norm": 0.022310776636004448,
"learning_rate": 7.66006134100672e-07,
"loss": 0.0021,
"step": 18940
},
{
"epoch": 64.8972602739726,
"grad_norm": 0.012138908728957176,
"learning_rate": 7.516575137274162e-07,
"loss": 0.002,
"step": 18950
},
{
"epoch": 64.93150684931507,
"grad_norm": 0.01461056899279356,
"learning_rate": 7.374435364419674e-07,
"loss": 0.0021,
"step": 18960
},
{
"epoch": 64.96575342465754,
"grad_norm": 0.019607581198215485,
"learning_rate": 7.233642411048014e-07,
"loss": 0.0022,
"step": 18970
},
{
"epoch": 65.0,
"grad_norm": 0.009313903748989105,
"learning_rate": 7.094196662081831e-07,
"loss": 0.0027,
"step": 18980
},
{
"epoch": 65.03424657534246,
"grad_norm": 0.010933526791632175,
"learning_rate": 6.956098498760389e-07,
"loss": 0.0028,
"step": 18990
},
{
"epoch": 65.06849315068493,
"grad_norm": 0.03836876153945923,
"learning_rate": 6.819348298638839e-07,
"loss": 0.003,
"step": 19000
},
{
"epoch": 65.1027397260274,
"grad_norm": 0.020728355273604393,
"learning_rate": 6.683946435586952e-07,
"loss": 0.0024,
"step": 19010
},
{
"epoch": 65.13698630136986,
"grad_norm": 0.02146265283226967,
"learning_rate": 6.549893279788277e-07,
"loss": 0.0019,
"step": 19020
},
{
"epoch": 65.17123287671232,
"grad_norm": 0.020000096410512924,
"learning_rate": 6.417189197739093e-07,
"loss": 0.0029,
"step": 19030
},
{
"epoch": 65.20547945205479,
"grad_norm": 0.02361264079809189,
"learning_rate": 6.285834552247128e-07,
"loss": 0.0024,
"step": 19040
},
{
"epoch": 65.23972602739725,
"grad_norm": 0.03703237324953079,
"learning_rate": 6.15582970243117e-07,
"loss": 0.0046,
"step": 19050
},
{
"epoch": 65.27397260273973,
"grad_norm": 0.028757376596331596,
"learning_rate": 6.027175003719354e-07,
"loss": 0.0026,
"step": 19060
},
{
"epoch": 65.3082191780822,
"grad_norm": 0.009351376444101334,
"learning_rate": 5.899870807848762e-07,
"loss": 0.0032,
"step": 19070
},
{
"epoch": 65.34246575342466,
"grad_norm": 0.019105447456240654,
"learning_rate": 5.773917462864264e-07,
"loss": 0.0026,
"step": 19080
},
{
"epoch": 65.37671232876713,
"grad_norm": 0.00916428305208683,
"learning_rate": 5.64931531311741e-07,
"loss": 0.0029,
"step": 19090
},
{
"epoch": 65.41095890410959,
"grad_norm": 0.022843752056360245,
"learning_rate": 5.526064699265753e-07,
"loss": 0.0026,
"step": 19100
},
{
"epoch": 65.44520547945206,
"grad_norm": 0.014886964112520218,
"learning_rate": 5.404165958271811e-07,
"loss": 0.0026,
"step": 19110
},
{
"epoch": 65.47945205479452,
"grad_norm": 0.016025792807340622,
"learning_rate": 5.283619423401998e-07,
"loss": 0.0029,
"step": 19120
},
{
"epoch": 65.51369863013699,
"grad_norm": 0.023562895134091377,
"learning_rate": 5.164425424226016e-07,
"loss": 0.0031,
"step": 19130
},
{
"epoch": 65.54794520547945,
"grad_norm": 0.018180225044488907,
"learning_rate": 5.046584286615697e-07,
"loss": 0.0025,
"step": 19140
},
{
"epoch": 65.58219178082192,
"grad_norm": 0.01071920245885849,
"learning_rate": 4.930096332744105e-07,
"loss": 0.0026,
"step": 19150
},
{
"epoch": 65.61643835616438,
"grad_norm": 0.014726242981851101,
"learning_rate": 4.814961881085045e-07,
"loss": 0.003,
"step": 19160
},
{
"epoch": 65.65068493150685,
"grad_norm": 0.032696496695280075,
"learning_rate": 4.701181246411501e-07,
"loss": 0.0044,
"step": 19170
},
{
"epoch": 65.68493150684931,
"grad_norm": 0.0267886221408844,
"learning_rate": 4.5887547397955864e-07,
"loss": 0.0027,
"step": 19180
},
{
"epoch": 65.71917808219177,
"grad_norm": 0.029734879732131958,
"learning_rate": 4.4776826686069305e-07,
"loss": 0.0032,
"step": 19190
},
{
"epoch": 65.75342465753425,
"grad_norm": 0.01782161183655262,
"learning_rate": 4.367965336512403e-07,
"loss": 0.0022,
"step": 19200
},
{
"epoch": 65.78767123287672,
"grad_norm": 0.01377611793577671,
"learning_rate": 4.259603043475002e-07,
"loss": 0.0041,
"step": 19210
},
{
"epoch": 65.82191780821918,
"grad_norm": 0.019605727866292,
"learning_rate": 4.1525960857530243e-07,
"loss": 0.0026,
"step": 19220
},
{
"epoch": 65.85616438356165,
"grad_norm": 0.021362723782658577,
"learning_rate": 4.0469447558995065e-07,
"loss": 0.0031,
"step": 19230
},
{
"epoch": 65.89041095890411,
"grad_norm": 0.010247836820781231,
"learning_rate": 3.9426493427611177e-07,
"loss": 0.003,
"step": 19240
},
{
"epoch": 65.92465753424658,
"grad_norm": 0.016217583790421486,
"learning_rate": 3.839710131477492e-07,
"loss": 0.0039,
"step": 19250
},
{
"epoch": 65.95890410958904,
"grad_norm": 0.010646031238138676,
"learning_rate": 3.738127403480507e-07,
"loss": 0.0029,
"step": 19260
},
{
"epoch": 65.9931506849315,
"grad_norm": 0.013961025513708591,
"learning_rate": 3.637901436493507e-07,
"loss": 0.0031,
"step": 19270
},
{
"epoch": 66.02739726027397,
"grad_norm": 0.009408863261342049,
"learning_rate": 3.5390325045304706e-07,
"loss": 0.0034,
"step": 19280
},
{
"epoch": 66.06164383561644,
"grad_norm": 0.019813723862171173,
"learning_rate": 3.441520877895288e-07,
"loss": 0.0028,
"step": 19290
},
{
"epoch": 66.0958904109589,
"grad_norm": 0.015585197135806084,
"learning_rate": 3.3453668231809286e-07,
"loss": 0.0034,
"step": 19300
},
{
"epoch": 66.13013698630137,
"grad_norm": 0.014616301283240318,
"learning_rate": 3.250570603268943e-07,
"loss": 0.0036,
"step": 19310
},
{
"epoch": 66.16438356164383,
"grad_norm": 0.017365731298923492,
"learning_rate": 3.157132477328628e-07,
"loss": 0.0024,
"step": 19320
},
{
"epoch": 66.1986301369863,
"grad_norm": 0.023117030039429665,
"learning_rate": 3.0650527008162513e-07,
"loss": 0.0031,
"step": 19330
},
{
"epoch": 66.23287671232876,
"grad_norm": 0.012441856786608696,
"learning_rate": 2.9743315254743833e-07,
"loss": 0.0028,
"step": 19340
},
{
"epoch": 66.26712328767124,
"grad_norm": 0.013054047711193562,
"learning_rate": 2.8849691993311777e-07,
"loss": 0.0024,
"step": 19350
},
{
"epoch": 66.3013698630137,
"grad_norm": 0.040562987327575684,
"learning_rate": 2.796965966699927e-07,
"loss": 0.003,
"step": 19360
},
{
"epoch": 66.33561643835617,
"grad_norm": 0.02737599052488804,
"learning_rate": 2.7103220681780615e-07,
"loss": 0.0035,
"step": 19370
},
{
"epoch": 66.36986301369863,
"grad_norm": 0.021345140412449837,
"learning_rate": 2.625037740646763e-07,
"loss": 0.0031,
"step": 19380
},
{
"epoch": 66.4041095890411,
"grad_norm": 0.019200876355171204,
"learning_rate": 2.5411132172700194e-07,
"loss": 0.0031,
"step": 19390
},
{
"epoch": 66.43835616438356,
"grad_norm": 0.010418230667710304,
"learning_rate": 2.458548727494292e-07,
"loss": 0.0025,
"step": 19400
},
{
"epoch": 66.47260273972603,
"grad_norm": 0.013329196721315384,
"learning_rate": 2.3773444970477955e-07,
"loss": 0.0031,
"step": 19410
},
{
"epoch": 66.5068493150685,
"grad_norm": 0.012985438108444214,
"learning_rate": 2.2975007479397738e-07,
"loss": 0.003,
"step": 19420
},
{
"epoch": 66.54109589041096,
"grad_norm": 0.022846786305308342,
"learning_rate": 2.219017698460002e-07,
"loss": 0.0041,
"step": 19430
},
{
"epoch": 66.57534246575342,
"grad_norm": 0.02029629796743393,
"learning_rate": 2.1418955631781202e-07,
"loss": 0.0028,
"step": 19440
},
{
"epoch": 66.60958904109589,
"grad_norm": 0.016922811046242714,
"learning_rate": 2.0661345529430775e-07,
"loss": 0.0033,
"step": 19450
},
{
"epoch": 66.64383561643835,
"grad_norm": 0.023142823949456215,
"learning_rate": 1.9917348748826335e-07,
"loss": 0.0028,
"step": 19460
},
{
"epoch": 66.67808219178082,
"grad_norm": 0.013606252148747444,
"learning_rate": 1.918696732402636e-07,
"loss": 0.0021,
"step": 19470
},
{
"epoch": 66.71232876712328,
"grad_norm": 0.015818113461136818,
"learning_rate": 1.847020325186577e-07,
"loss": 0.0036,
"step": 19480
},
{
"epoch": 66.74657534246575,
"grad_norm": 0.010209470987319946,
"learning_rate": 1.776705849195037e-07,
"loss": 0.0026,
"step": 19490
},
{
"epoch": 66.78082191780823,
"grad_norm": 0.010678197257220745,
"learning_rate": 1.7077534966650766e-07,
"loss": 0.0031,
"step": 19500
},
{
"epoch": 66.81506849315069,
"grad_norm": 0.017493901774287224,
"learning_rate": 1.6401634561098444e-07,
"loss": 0.0032,
"step": 19510
},
{
"epoch": 66.84931506849315,
"grad_norm": 0.029743684455752373,
"learning_rate": 1.5739359123178587e-07,
"loss": 0.0038,
"step": 19520
},
{
"epoch": 66.88356164383562,
"grad_norm": 0.020673219114542007,
"learning_rate": 1.5090710463527836e-07,
"loss": 0.0028,
"step": 19530
},
{
"epoch": 66.91780821917808,
"grad_norm": 0.023049013689160347,
"learning_rate": 1.4455690355525964e-07,
"loss": 0.0028,
"step": 19540
},
{
"epoch": 66.95205479452055,
"grad_norm": 0.026155423372983932,
"learning_rate": 1.383430053529422e-07,
"loss": 0.0032,
"step": 19550
},
{
"epoch": 66.98630136986301,
"grad_norm": 0.032859109342098236,
"learning_rate": 1.3226542701689215e-07,
"loss": 0.0032,
"step": 19560
},
{
"epoch": 67.02054794520548,
"grad_norm": 0.0173659510910511,
"learning_rate": 1.2632418516296262e-07,
"loss": 0.0038,
"step": 19570
},
{
"epoch": 67.05479452054794,
"grad_norm": 0.03304281830787659,
"learning_rate": 1.2051929603428825e-07,
"loss": 0.0021,
"step": 19580
},
{
"epoch": 67.08904109589041,
"grad_norm": 0.010505401529371738,
"learning_rate": 1.1485077550122402e-07,
"loss": 0.0019,
"step": 19590
},
{
"epoch": 67.12328767123287,
"grad_norm": 0.019299061968922615,
"learning_rate": 1.0931863906127327e-07,
"loss": 0.003,
"step": 19600
},
{
"epoch": 67.15753424657534,
"grad_norm": 0.010144400410354137,
"learning_rate": 1.0392290183909304e-07,
"loss": 0.0029,
"step": 19610
},
{
"epoch": 67.1917808219178,
"grad_norm": 0.04509196802973747,
"learning_rate": 9.866357858642205e-08,
"loss": 0.0043,
"step": 19620
},
{
"epoch": 67.22602739726027,
"grad_norm": 0.01752866618335247,
"learning_rate": 9.354068368204739e-08,
"loss": 0.0017,
"step": 19630
},
{
"epoch": 67.26027397260275,
"grad_norm": 0.03391791135072708,
"learning_rate": 8.855423113177664e-08,
"loss": 0.0036,
"step": 19640
},
{
"epoch": 67.29452054794521,
"grad_norm": 0.027711069211363792,
"learning_rate": 8.37042345683714e-08,
"loss": 0.0034,
"step": 19650
},
{
"epoch": 67.32876712328768,
"grad_norm": 0.03244870901107788,
"learning_rate": 7.899070725153613e-08,
"loss": 0.0033,
"step": 19660
},
{
"epoch": 67.36301369863014,
"grad_norm": 0.009590478613972664,
"learning_rate": 7.44136620678848e-08,
"loss": 0.0029,
"step": 19670
},
{
"epoch": 67.3972602739726,
"grad_norm": 0.02920118160545826,
"learning_rate": 6.997311153086883e-08,
"loss": 0.0035,
"step": 19680
},
{
"epoch": 67.43150684931507,
"grad_norm": 0.015596888959407806,
"learning_rate": 6.566906778079917e-08,
"loss": 0.0024,
"step": 19690
},
{
"epoch": 67.46575342465754,
"grad_norm": 0.022850381210446358,
"learning_rate": 6.150154258476315e-08,
"loss": 0.0023,
"step": 19700
},
{
"epoch": 67.5,
"grad_norm": 0.028931666165590286,
"learning_rate": 5.747054733660773e-08,
"loss": 0.0028,
"step": 19710
},
{
"epoch": 67.53424657534246,
"grad_norm": 0.01714816689491272,
"learning_rate": 5.3576093056922906e-08,
"loss": 0.0037,
"step": 19720
},
{
"epoch": 67.56849315068493,
"grad_norm": 0.02712251991033554,
"learning_rate": 4.981819039300284e-08,
"loss": 0.0026,
"step": 19730
},
{
"epoch": 67.6027397260274,
"grad_norm": 0.01652734912931919,
"learning_rate": 4.619684961881254e-08,
"loss": 0.0023,
"step": 19740
},
{
"epoch": 67.63698630136986,
"grad_norm": 0.022933751344680786,
"learning_rate": 4.2712080634949024e-08,
"loss": 0.0025,
"step": 19750
},
{
"epoch": 67.67123287671232,
"grad_norm": 0.024571429938077927,
"learning_rate": 3.936389296864129e-08,
"loss": 0.0049,
"step": 19760
},
{
"epoch": 67.70547945205479,
"grad_norm": 0.019797664135694504,
"learning_rate": 3.615229577371149e-08,
"loss": 0.0023,
"step": 19770
},
{
"epoch": 67.73972602739725,
"grad_norm": 0.009142549708485603,
"learning_rate": 3.3077297830541584e-08,
"loss": 0.0021,
"step": 19780
},
{
"epoch": 67.77397260273973,
"grad_norm": 0.012059146538376808,
"learning_rate": 3.01389075460512e-08,
"loss": 0.0031,
"step": 19790
},
{
"epoch": 67.8082191780822,
"grad_norm": 0.020258858799934387,
"learning_rate": 2.7337132953697554e-08,
"loss": 0.003,
"step": 19800
},
{
"epoch": 67.84246575342466,
"grad_norm": 0.021773777902126312,
"learning_rate": 2.467198171342e-08,
"loss": 0.0026,
"step": 19810
},
{
"epoch": 67.87671232876713,
"grad_norm": 0.014160028658807278,
"learning_rate": 2.214346111164556e-08,
"loss": 0.0024,
"step": 19820
},
{
"epoch": 67.91095890410959,
"grad_norm": 0.021640092134475708,
"learning_rate": 1.9751578061244504e-08,
"loss": 0.0021,
"step": 19830
},
{
"epoch": 67.94520547945206,
"grad_norm": 0.01956215500831604,
"learning_rate": 1.749633910153592e-08,
"loss": 0.0029,
"step": 19840
},
{
"epoch": 67.97945205479452,
"grad_norm": 0.017366180196404457,
"learning_rate": 1.5377750398265502e-08,
"loss": 0.0028,
"step": 19850
},
{
"epoch": 68.01369863013699,
"grad_norm": 0.01383445505052805,
"learning_rate": 1.3395817743561134e-08,
"loss": 0.003,
"step": 19860
},
{
"epoch": 68.04794520547945,
"grad_norm": 0.023640619590878487,
"learning_rate": 1.1550546555960662e-08,
"loss": 0.0042,
"step": 19870
},
{
"epoch": 68.08219178082192,
"grad_norm": 0.029182305559515953,
"learning_rate": 9.841941880361916e-09,
"loss": 0.0023,
"step": 19880
},
{
"epoch": 68.11643835616438,
"grad_norm": 0.026307787746191025,
"learning_rate": 8.270008388022721e-09,
"loss": 0.0025,
"step": 19890
},
{
"epoch": 68.15068493150685,
"grad_norm": 0.017121490091085434,
"learning_rate": 6.834750376549792e-09,
"loss": 0.0038,
"step": 19900
},
{
"epoch": 68.18493150684931,
"grad_norm": 0.023319967091083527,
"learning_rate": 5.536171769887632e-09,
"loss": 0.0033,
"step": 19910
},
{
"epoch": 68.21917808219177,
"grad_norm": 0.022718269377946854,
"learning_rate": 4.3742761183018784e-09,
"loss": 0.0027,
"step": 19920
},
{
"epoch": 68.25342465753425,
"grad_norm": 0.009096325375139713,
"learning_rate": 3.349066598362649e-09,
"loss": 0.0034,
"step": 19930
},
{
"epoch": 68.28767123287672,
"grad_norm": 0.014013983309268951,
"learning_rate": 2.4605460129556445e-09,
"loss": 0.0023,
"step": 19940
},
{
"epoch": 68.32191780821918,
"grad_norm": 0.03348590061068535,
"learning_rate": 1.7087167912710478e-09,
"loss": 0.0042,
"step": 19950
},
{
"epoch": 68.35616438356165,
"grad_norm": 0.02735082246363163,
"learning_rate": 1.0935809887702154e-09,
"loss": 0.0029,
"step": 19960
},
{
"epoch": 68.39041095890411,
"grad_norm": 0.009715776890516281,
"learning_rate": 6.151402872134337e-10,
"loss": 0.0033,
"step": 19970
},
{
"epoch": 68.42465753424658,
"grad_norm": 0.017182039096951485,
"learning_rate": 2.7339599464326627e-10,
"loss": 0.0025,
"step": 19980
},
{
"epoch": 68.45890410958904,
"grad_norm": 0.02159772627055645,
"learning_rate": 6.834904537900144e-11,
"loss": 0.0023,
"step": 19990
},
{
"epoch": 68.4931506849315,
"grad_norm": 0.01271827332675457,
"learning_rate": 0.0,
"loss": 0.002,
"step": 20000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 69,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.923804113456333e+19,
"train_batch_size": 96,
"trial_name": null,
"trial_params": null
}