float-lab's picture
Upload folder using huggingface_hub
1a8f688 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 46.3768115942029,
"eval_steps": 500,
"global_step": 16000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028985507246376812,
"grad_norm": 3.0505027770996094,
"learning_rate": 9e-07,
"loss": 1.5657,
"step": 10
},
{
"epoch": 0.057971014492753624,
"grad_norm": 2.080113410949707,
"learning_rate": 1.9e-06,
"loss": 1.5118,
"step": 20
},
{
"epoch": 0.08695652173913043,
"grad_norm": 2.4925858974456787,
"learning_rate": 2.9e-06,
"loss": 1.2433,
"step": 30
},
{
"epoch": 0.11594202898550725,
"grad_norm": 1.7392232418060303,
"learning_rate": 3.9e-06,
"loss": 1.3809,
"step": 40
},
{
"epoch": 0.14492753623188406,
"grad_norm": 1.8013620376586914,
"learning_rate": 4.9000000000000005e-06,
"loss": 1.3118,
"step": 50
},
{
"epoch": 0.17391304347826086,
"grad_norm": 1.5780786275863647,
"learning_rate": 5.9e-06,
"loss": 1.1046,
"step": 60
},
{
"epoch": 0.2028985507246377,
"grad_norm": 1.2937341928482056,
"learning_rate": 6.900000000000001e-06,
"loss": 1.2428,
"step": 70
},
{
"epoch": 0.2318840579710145,
"grad_norm": 1.147234559059143,
"learning_rate": 7.9e-06,
"loss": 1.2453,
"step": 80
},
{
"epoch": 0.2608695652173913,
"grad_norm": 0.7600051760673523,
"learning_rate": 8.9e-06,
"loss": 0.6767,
"step": 90
},
{
"epoch": 0.2898550724637681,
"grad_norm": 0.8683933615684509,
"learning_rate": 9.900000000000002e-06,
"loss": 1.0279,
"step": 100
},
{
"epoch": 0.3188405797101449,
"grad_norm": 0.6988456845283508,
"learning_rate": 1.09e-05,
"loss": 0.857,
"step": 110
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.8881454467773438,
"learning_rate": 1.19e-05,
"loss": 0.8046,
"step": 120
},
{
"epoch": 0.37681159420289856,
"grad_norm": 0.40000322461128235,
"learning_rate": 1.29e-05,
"loss": 0.774,
"step": 130
},
{
"epoch": 0.4057971014492754,
"grad_norm": 1.2142903804779053,
"learning_rate": 1.3900000000000002e-05,
"loss": 0.5516,
"step": 140
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.9134606719017029,
"learning_rate": 1.49e-05,
"loss": 0.7689,
"step": 150
},
{
"epoch": 0.463768115942029,
"grad_norm": 0.978635311126709,
"learning_rate": 1.59e-05,
"loss": 0.7034,
"step": 160
},
{
"epoch": 0.4927536231884058,
"grad_norm": 0.9544720649719238,
"learning_rate": 1.69e-05,
"loss": 0.5249,
"step": 170
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.750441312789917,
"learning_rate": 1.79e-05,
"loss": 0.5178,
"step": 180
},
{
"epoch": 0.5507246376811594,
"grad_norm": 0.8242844939231873,
"learning_rate": 1.8900000000000002e-05,
"loss": 0.4664,
"step": 190
},
{
"epoch": 0.5797101449275363,
"grad_norm": 0.9814381003379822,
"learning_rate": 1.9900000000000003e-05,
"loss": 0.6898,
"step": 200
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.7581349611282349,
"learning_rate": 2.09e-05,
"loss": 0.5289,
"step": 210
},
{
"epoch": 0.6376811594202898,
"grad_norm": 0.9911883473396301,
"learning_rate": 2.19e-05,
"loss": 0.4249,
"step": 220
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.9439306855201721,
"learning_rate": 2.29e-05,
"loss": 0.4135,
"step": 230
},
{
"epoch": 0.6956521739130435,
"grad_norm": 1.3483587503433228,
"learning_rate": 2.39e-05,
"loss": 0.3891,
"step": 240
},
{
"epoch": 0.7246376811594203,
"grad_norm": 0.819063663482666,
"learning_rate": 2.4900000000000002e-05,
"loss": 0.4549,
"step": 250
},
{
"epoch": 0.7536231884057971,
"grad_norm": 0.8383818864822388,
"learning_rate": 2.5900000000000003e-05,
"loss": 0.4251,
"step": 260
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.9047835469245911,
"learning_rate": 2.6900000000000003e-05,
"loss": 0.4383,
"step": 270
},
{
"epoch": 0.8115942028985508,
"grad_norm": 0.7909944653511047,
"learning_rate": 2.7900000000000004e-05,
"loss": 0.3907,
"step": 280
},
{
"epoch": 0.8405797101449275,
"grad_norm": 0.8012731075286865,
"learning_rate": 2.8899999999999998e-05,
"loss": 0.4489,
"step": 290
},
{
"epoch": 0.8695652173913043,
"grad_norm": 1.4028682708740234,
"learning_rate": 2.9900000000000002e-05,
"loss": 0.3037,
"step": 300
},
{
"epoch": 0.8985507246376812,
"grad_norm": 1.488762617111206,
"learning_rate": 3.09e-05,
"loss": 0.3911,
"step": 310
},
{
"epoch": 0.927536231884058,
"grad_norm": 0.7830433249473572,
"learning_rate": 3.19e-05,
"loss": 0.3919,
"step": 320
},
{
"epoch": 0.9565217391304348,
"grad_norm": 1.5407651662826538,
"learning_rate": 3.29e-05,
"loss": 0.3686,
"step": 330
},
{
"epoch": 0.9855072463768116,
"grad_norm": 0.9575673937797546,
"learning_rate": 3.3900000000000004e-05,
"loss": 0.3367,
"step": 340
},
{
"epoch": 1.0144927536231885,
"grad_norm": 1.3226127624511719,
"learning_rate": 3.49e-05,
"loss": 0.3767,
"step": 350
},
{
"epoch": 1.0434782608695652,
"grad_norm": 1.4169162511825562,
"learning_rate": 3.59e-05,
"loss": 0.3338,
"step": 360
},
{
"epoch": 1.0724637681159421,
"grad_norm": 1.7206474542617798,
"learning_rate": 3.69e-05,
"loss": 0.3345,
"step": 370
},
{
"epoch": 1.1014492753623188,
"grad_norm": 1.4332363605499268,
"learning_rate": 3.79e-05,
"loss": 0.3272,
"step": 380
},
{
"epoch": 1.1304347826086956,
"grad_norm": 1.1724469661712646,
"learning_rate": 3.8900000000000004e-05,
"loss": 0.2866,
"step": 390
},
{
"epoch": 1.1594202898550725,
"grad_norm": 0.83205646276474,
"learning_rate": 3.99e-05,
"loss": 0.356,
"step": 400
},
{
"epoch": 1.1884057971014492,
"grad_norm": 1.1642824411392212,
"learning_rate": 4.09e-05,
"loss": 0.2858,
"step": 410
},
{
"epoch": 1.2173913043478262,
"grad_norm": 1.0703731775283813,
"learning_rate": 4.19e-05,
"loss": 0.3091,
"step": 420
},
{
"epoch": 1.2463768115942029,
"grad_norm": 0.886146605014801,
"learning_rate": 4.29e-05,
"loss": 0.3163,
"step": 430
},
{
"epoch": 1.2753623188405796,
"grad_norm": 0.8630309104919434,
"learning_rate": 4.39e-05,
"loss": 0.2843,
"step": 440
},
{
"epoch": 1.3043478260869565,
"grad_norm": 0.8399680256843567,
"learning_rate": 4.49e-05,
"loss": 0.2451,
"step": 450
},
{
"epoch": 1.3333333333333333,
"grad_norm": 1.4553627967834473,
"learning_rate": 4.5900000000000004e-05,
"loss": 0.2888,
"step": 460
},
{
"epoch": 1.3623188405797102,
"grad_norm": 1.8121979236602783,
"learning_rate": 4.69e-05,
"loss": 0.257,
"step": 470
},
{
"epoch": 1.391304347826087,
"grad_norm": 1.165885329246521,
"learning_rate": 4.79e-05,
"loss": 0.2579,
"step": 480
},
{
"epoch": 1.4202898550724639,
"grad_norm": 0.8950861096382141,
"learning_rate": 4.89e-05,
"loss": 0.279,
"step": 490
},
{
"epoch": 1.4492753623188406,
"grad_norm": 0.8436807990074158,
"learning_rate": 4.99e-05,
"loss": 0.2503,
"step": 500
},
{
"epoch": 1.4782608695652173,
"grad_norm": 0.5035578012466431,
"learning_rate": 5.0900000000000004e-05,
"loss": 0.2177,
"step": 510
},
{
"epoch": 1.5072463768115942,
"grad_norm": 1.0943514108657837,
"learning_rate": 5.19e-05,
"loss": 0.3226,
"step": 520
},
{
"epoch": 1.5362318840579712,
"grad_norm": 0.7721551060676575,
"learning_rate": 5.2900000000000005e-05,
"loss": 0.2258,
"step": 530
},
{
"epoch": 1.5652173913043477,
"grad_norm": 1.0129557847976685,
"learning_rate": 5.390000000000001e-05,
"loss": 0.299,
"step": 540
},
{
"epoch": 1.5942028985507246,
"grad_norm": 1.014032006263733,
"learning_rate": 5.4900000000000006e-05,
"loss": 0.2733,
"step": 550
},
{
"epoch": 1.6231884057971016,
"grad_norm": 1.73903489112854,
"learning_rate": 5.590000000000001e-05,
"loss": 0.2611,
"step": 560
},
{
"epoch": 1.6521739130434783,
"grad_norm": 2.070592164993286,
"learning_rate": 5.69e-05,
"loss": 0.2514,
"step": 570
},
{
"epoch": 1.681159420289855,
"grad_norm": 1.2096529006958008,
"learning_rate": 5.79e-05,
"loss": 0.2635,
"step": 580
},
{
"epoch": 1.710144927536232,
"grad_norm": 0.9375045895576477,
"learning_rate": 5.89e-05,
"loss": 0.2542,
"step": 590
},
{
"epoch": 1.7391304347826086,
"grad_norm": 0.8468955755233765,
"learning_rate": 5.99e-05,
"loss": 0.2525,
"step": 600
},
{
"epoch": 1.7681159420289854,
"grad_norm": 0.9298123717308044,
"learning_rate": 6.09e-05,
"loss": 0.2511,
"step": 610
},
{
"epoch": 1.7971014492753623,
"grad_norm": 0.8824529647827148,
"learning_rate": 6.19e-05,
"loss": 0.2373,
"step": 620
},
{
"epoch": 1.8260869565217392,
"grad_norm": 0.7265031337738037,
"learning_rate": 6.29e-05,
"loss": 0.2139,
"step": 630
},
{
"epoch": 1.855072463768116,
"grad_norm": 1.0328197479248047,
"learning_rate": 6.390000000000001e-05,
"loss": 0.2141,
"step": 640
},
{
"epoch": 1.8840579710144927,
"grad_norm": 0.5020371079444885,
"learning_rate": 6.49e-05,
"loss": 0.2348,
"step": 650
},
{
"epoch": 1.9130434782608696,
"grad_norm": 0.7637607455253601,
"learning_rate": 6.59e-05,
"loss": 0.2097,
"step": 660
},
{
"epoch": 1.9420289855072463,
"grad_norm": 0.8997554779052734,
"learning_rate": 6.690000000000001e-05,
"loss": 0.2228,
"step": 670
},
{
"epoch": 1.971014492753623,
"grad_norm": 0.7784063816070557,
"learning_rate": 6.790000000000001e-05,
"loss": 0.2412,
"step": 680
},
{
"epoch": 2.0,
"grad_norm": 0.9886015057563782,
"learning_rate": 6.89e-05,
"loss": 0.2263,
"step": 690
},
{
"epoch": 2.028985507246377,
"grad_norm": 0.7861230969429016,
"learning_rate": 6.99e-05,
"loss": 0.2281,
"step": 700
},
{
"epoch": 2.0579710144927534,
"grad_norm": 0.6980922222137451,
"learning_rate": 7.09e-05,
"loss": 0.2295,
"step": 710
},
{
"epoch": 2.0869565217391304,
"grad_norm": 0.9516819715499878,
"learning_rate": 7.19e-05,
"loss": 0.2485,
"step": 720
},
{
"epoch": 2.1159420289855073,
"grad_norm": 0.7088673710823059,
"learning_rate": 7.29e-05,
"loss": 0.2256,
"step": 730
},
{
"epoch": 2.1449275362318843,
"grad_norm": 0.8767524361610413,
"learning_rate": 7.390000000000001e-05,
"loss": 0.2105,
"step": 740
},
{
"epoch": 2.1739130434782608,
"grad_norm": 0.5966852903366089,
"learning_rate": 7.49e-05,
"loss": 0.2192,
"step": 750
},
{
"epoch": 2.2028985507246377,
"grad_norm": 0.7955141067504883,
"learning_rate": 7.59e-05,
"loss": 0.2166,
"step": 760
},
{
"epoch": 2.2318840579710146,
"grad_norm": 0.8298391699790955,
"learning_rate": 7.69e-05,
"loss": 0.2583,
"step": 770
},
{
"epoch": 2.260869565217391,
"grad_norm": 1.2403712272644043,
"learning_rate": 7.790000000000001e-05,
"loss": 0.2428,
"step": 780
},
{
"epoch": 2.289855072463768,
"grad_norm": 1.2657474279403687,
"learning_rate": 7.890000000000001e-05,
"loss": 0.2025,
"step": 790
},
{
"epoch": 2.318840579710145,
"grad_norm": 1.0565385818481445,
"learning_rate": 7.99e-05,
"loss": 0.1949,
"step": 800
},
{
"epoch": 2.3478260869565215,
"grad_norm": 1.0542415380477905,
"learning_rate": 8.090000000000001e-05,
"loss": 0.2692,
"step": 810
},
{
"epoch": 2.3768115942028984,
"grad_norm": 0.6383161544799805,
"learning_rate": 8.19e-05,
"loss": 0.221,
"step": 820
},
{
"epoch": 2.4057971014492754,
"grad_norm": 0.8400139808654785,
"learning_rate": 8.29e-05,
"loss": 0.1894,
"step": 830
},
{
"epoch": 2.4347826086956523,
"grad_norm": 0.9511343240737915,
"learning_rate": 8.39e-05,
"loss": 0.2402,
"step": 840
},
{
"epoch": 2.463768115942029,
"grad_norm": 1.1040838956832886,
"learning_rate": 8.49e-05,
"loss": 0.1974,
"step": 850
},
{
"epoch": 2.4927536231884058,
"grad_norm": 0.8064889311790466,
"learning_rate": 8.59e-05,
"loss": 0.2312,
"step": 860
},
{
"epoch": 2.5217391304347827,
"grad_norm": 0.7647086381912231,
"learning_rate": 8.69e-05,
"loss": 0.1977,
"step": 870
},
{
"epoch": 2.550724637681159,
"grad_norm": 0.8380846977233887,
"learning_rate": 8.790000000000001e-05,
"loss": 0.2233,
"step": 880
},
{
"epoch": 2.579710144927536,
"grad_norm": 0.48276486992836,
"learning_rate": 8.89e-05,
"loss": 0.1741,
"step": 890
},
{
"epoch": 2.608695652173913,
"grad_norm": 0.7797939777374268,
"learning_rate": 8.99e-05,
"loss": 0.1951,
"step": 900
},
{
"epoch": 2.63768115942029,
"grad_norm": 0.8178322911262512,
"learning_rate": 9.090000000000001e-05,
"loss": 0.2087,
"step": 910
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.8776262998580933,
"learning_rate": 9.190000000000001e-05,
"loss": 0.1914,
"step": 920
},
{
"epoch": 2.6956521739130435,
"grad_norm": 0.46826550364494324,
"learning_rate": 9.290000000000001e-05,
"loss": 0.1902,
"step": 930
},
{
"epoch": 2.7246376811594204,
"grad_norm": 0.9637788534164429,
"learning_rate": 9.39e-05,
"loss": 0.2052,
"step": 940
},
{
"epoch": 2.753623188405797,
"grad_norm": 1.1427522897720337,
"learning_rate": 9.49e-05,
"loss": 0.2212,
"step": 950
},
{
"epoch": 2.782608695652174,
"grad_norm": 0.6853973865509033,
"learning_rate": 9.59e-05,
"loss": 0.1921,
"step": 960
},
{
"epoch": 2.8115942028985508,
"grad_norm": 0.8581897020339966,
"learning_rate": 9.69e-05,
"loss": 0.2215,
"step": 970
},
{
"epoch": 2.8405797101449277,
"grad_norm": 0.9988269805908203,
"learning_rate": 9.790000000000001e-05,
"loss": 0.1838,
"step": 980
},
{
"epoch": 2.869565217391304,
"grad_norm": 0.3667157292366028,
"learning_rate": 9.89e-05,
"loss": 0.156,
"step": 990
},
{
"epoch": 2.898550724637681,
"grad_norm": 0.8472527265548706,
"learning_rate": 9.99e-05,
"loss": 0.1936,
"step": 1000
},
{
"epoch": 2.927536231884058,
"grad_norm": 0.6419370770454407,
"learning_rate": 9.999994463727085e-05,
"loss": 0.1723,
"step": 1010
},
{
"epoch": 2.9565217391304346,
"grad_norm": 1.061924934387207,
"learning_rate": 9.999975326009292e-05,
"loss": 0.24,
"step": 1020
},
{
"epoch": 2.9855072463768115,
"grad_norm": 0.44797030091285706,
"learning_rate": 9.999942518549879e-05,
"loss": 0.1526,
"step": 1030
},
{
"epoch": 3.0144927536231885,
"grad_norm": 0.5258593559265137,
"learning_rate": 9.999896041438544e-05,
"loss": 0.2082,
"step": 1040
},
{
"epoch": 3.0434782608695654,
"grad_norm": 0.7527342438697815,
"learning_rate": 9.999835894802353e-05,
"loss": 0.166,
"step": 1050
},
{
"epoch": 3.072463768115942,
"grad_norm": 0.8045316934585571,
"learning_rate": 9.999762078805743e-05,
"loss": 0.1526,
"step": 1060
},
{
"epoch": 3.101449275362319,
"grad_norm": 0.6621928215026855,
"learning_rate": 9.999674593650526e-05,
"loss": 0.1965,
"step": 1070
},
{
"epoch": 3.130434782608696,
"grad_norm": 0.6621638536453247,
"learning_rate": 9.99957343957588e-05,
"loss": 0.1575,
"step": 1080
},
{
"epoch": 3.1594202898550723,
"grad_norm": 0.6635481715202332,
"learning_rate": 9.99945861685836e-05,
"loss": 0.1943,
"step": 1090
},
{
"epoch": 3.1884057971014492,
"grad_norm": 1.0563372373580933,
"learning_rate": 9.999330125811884e-05,
"loss": 0.1864,
"step": 1100
},
{
"epoch": 3.217391304347826,
"grad_norm": 0.7428378462791443,
"learning_rate": 9.999187966787744e-05,
"loss": 0.2003,
"step": 1110
},
{
"epoch": 3.246376811594203,
"grad_norm": 0.6000686287879944,
"learning_rate": 9.999032140174595e-05,
"loss": 0.1587,
"step": 1120
},
{
"epoch": 3.2753623188405796,
"grad_norm": 0.8239452838897705,
"learning_rate": 9.998862646398464e-05,
"loss": 0.1838,
"step": 1130
},
{
"epoch": 3.3043478260869565,
"grad_norm": 0.6900084018707275,
"learning_rate": 9.998679485922739e-05,
"loss": 0.2002,
"step": 1140
},
{
"epoch": 3.3333333333333335,
"grad_norm": 1.6132053136825562,
"learning_rate": 9.998482659248174e-05,
"loss": 0.2293,
"step": 1150
},
{
"epoch": 3.36231884057971,
"grad_norm": 0.6084638237953186,
"learning_rate": 9.998272166912883e-05,
"loss": 0.1645,
"step": 1160
},
{
"epoch": 3.391304347826087,
"grad_norm": 0.5943679809570312,
"learning_rate": 9.998048009492347e-05,
"loss": 0.1763,
"step": 1170
},
{
"epoch": 3.420289855072464,
"grad_norm": 0.5672821998596191,
"learning_rate": 9.997810187599403e-05,
"loss": 0.1679,
"step": 1180
},
{
"epoch": 3.449275362318841,
"grad_norm": 1.185848593711853,
"learning_rate": 9.997558701884249e-05,
"loss": 0.2152,
"step": 1190
},
{
"epoch": 3.4782608695652173,
"grad_norm": 0.7329660058021545,
"learning_rate": 9.997293553034433e-05,
"loss": 0.1943,
"step": 1200
},
{
"epoch": 3.5072463768115942,
"grad_norm": 0.6363108158111572,
"learning_rate": 9.997014741774866e-05,
"loss": 0.1579,
"step": 1210
},
{
"epoch": 3.536231884057971,
"grad_norm": 1.2481898069381714,
"learning_rate": 9.996722268867803e-05,
"loss": 0.1869,
"step": 1220
},
{
"epoch": 3.5652173913043477,
"grad_norm": 0.8098170757293701,
"learning_rate": 9.996416135112858e-05,
"loss": 0.2126,
"step": 1230
},
{
"epoch": 3.5942028985507246,
"grad_norm": 0.6532134413719177,
"learning_rate": 9.996096341346988e-05,
"loss": 0.2359,
"step": 1240
},
{
"epoch": 3.6231884057971016,
"grad_norm": 0.774456262588501,
"learning_rate": 9.995762888444495e-05,
"loss": 0.2043,
"step": 1250
},
{
"epoch": 3.6521739130434785,
"grad_norm": 0.7362341284751892,
"learning_rate": 9.995415777317027e-05,
"loss": 0.1705,
"step": 1260
},
{
"epoch": 3.681159420289855,
"grad_norm": 0.6909469366073608,
"learning_rate": 9.995055008913574e-05,
"loss": 0.1981,
"step": 1270
},
{
"epoch": 3.710144927536232,
"grad_norm": 0.5451234579086304,
"learning_rate": 9.994680584220463e-05,
"loss": 0.1705,
"step": 1280
},
{
"epoch": 3.7391304347826084,
"grad_norm": 0.7192392945289612,
"learning_rate": 9.994292504261355e-05,
"loss": 0.1707,
"step": 1290
},
{
"epoch": 3.7681159420289854,
"grad_norm": 0.5111631751060486,
"learning_rate": 9.993890770097247e-05,
"loss": 0.2049,
"step": 1300
},
{
"epoch": 3.7971014492753623,
"grad_norm": 0.5530916452407837,
"learning_rate": 9.993475382826467e-05,
"loss": 0.1931,
"step": 1310
},
{
"epoch": 3.8260869565217392,
"grad_norm": 0.4613671898841858,
"learning_rate": 9.993046343584664e-05,
"loss": 0.1553,
"step": 1320
},
{
"epoch": 3.855072463768116,
"grad_norm": 0.5719594359397888,
"learning_rate": 9.992603653544816e-05,
"loss": 0.1865,
"step": 1330
},
{
"epoch": 3.8840579710144927,
"grad_norm": 0.6633929014205933,
"learning_rate": 9.992147313917222e-05,
"loss": 0.1901,
"step": 1340
},
{
"epoch": 3.9130434782608696,
"grad_norm": 0.3168647587299347,
"learning_rate": 9.991677325949497e-05,
"loss": 0.1871,
"step": 1350
},
{
"epoch": 3.942028985507246,
"grad_norm": 0.35858315229415894,
"learning_rate": 9.991193690926568e-05,
"loss": 0.1533,
"step": 1360
},
{
"epoch": 3.971014492753623,
"grad_norm": 0.35452893376350403,
"learning_rate": 9.990696410170678e-05,
"loss": 0.1843,
"step": 1370
},
{
"epoch": 4.0,
"grad_norm": 1.4836504459381104,
"learning_rate": 9.990185485041371e-05,
"loss": 0.1691,
"step": 1380
},
{
"epoch": 4.028985507246377,
"grad_norm": 0.7394298315048218,
"learning_rate": 9.989660916935498e-05,
"loss": 0.1648,
"step": 1390
},
{
"epoch": 4.057971014492754,
"grad_norm": 0.8527777791023254,
"learning_rate": 9.989122707287208e-05,
"loss": 0.1741,
"step": 1400
},
{
"epoch": 4.086956521739131,
"grad_norm": 0.6024882197380066,
"learning_rate": 9.988570857567945e-05,
"loss": 0.1863,
"step": 1410
},
{
"epoch": 4.115942028985507,
"grad_norm": 0.6260817050933838,
"learning_rate": 9.988005369286446e-05,
"loss": 0.1815,
"step": 1420
},
{
"epoch": 4.144927536231884,
"grad_norm": 0.9622341394424438,
"learning_rate": 9.987426243988734e-05,
"loss": 0.1698,
"step": 1430
},
{
"epoch": 4.173913043478261,
"grad_norm": 0.5575575232505798,
"learning_rate": 9.986833483258114e-05,
"loss": 0.1753,
"step": 1440
},
{
"epoch": 4.202898550724638,
"grad_norm": 0.24518761038780212,
"learning_rate": 9.986227088715173e-05,
"loss": 0.16,
"step": 1450
},
{
"epoch": 4.231884057971015,
"grad_norm": 0.5677102208137512,
"learning_rate": 9.98560706201777e-05,
"loss": 0.1746,
"step": 1460
},
{
"epoch": 4.260869565217392,
"grad_norm": 0.35185858607292175,
"learning_rate": 9.984973404861036e-05,
"loss": 0.152,
"step": 1470
},
{
"epoch": 4.2898550724637685,
"grad_norm": 0.5845288038253784,
"learning_rate": 9.984326118977361e-05,
"loss": 0.1458,
"step": 1480
},
{
"epoch": 4.318840579710145,
"grad_norm": 0.5872308611869812,
"learning_rate": 9.983665206136406e-05,
"loss": 0.1783,
"step": 1490
},
{
"epoch": 4.3478260869565215,
"grad_norm": 0.6161956787109375,
"learning_rate": 9.982990668145075e-05,
"loss": 0.1617,
"step": 1500
},
{
"epoch": 4.3768115942028984,
"grad_norm": 0.48462975025177,
"learning_rate": 9.982302506847534e-05,
"loss": 0.1544,
"step": 1510
},
{
"epoch": 4.405797101449275,
"grad_norm": 0.43805649876594543,
"learning_rate": 9.981600724125189e-05,
"loss": 0.1632,
"step": 1520
},
{
"epoch": 4.434782608695652,
"grad_norm": 0.6712663173675537,
"learning_rate": 9.980885321896685e-05,
"loss": 0.1681,
"step": 1530
},
{
"epoch": 4.463768115942029,
"grad_norm": 0.46296727657318115,
"learning_rate": 9.980156302117905e-05,
"loss": 0.147,
"step": 1540
},
{
"epoch": 4.492753623188406,
"grad_norm": 0.47002753615379333,
"learning_rate": 9.979413666781963e-05,
"loss": 0.1285,
"step": 1550
},
{
"epoch": 4.521739130434782,
"grad_norm": 0.508978009223938,
"learning_rate": 9.978657417919193e-05,
"loss": 0.1611,
"step": 1560
},
{
"epoch": 4.550724637681159,
"grad_norm": 0.5047881007194519,
"learning_rate": 9.977887557597153e-05,
"loss": 0.169,
"step": 1570
},
{
"epoch": 4.579710144927536,
"grad_norm": 0.5661750435829163,
"learning_rate": 9.97710408792061e-05,
"loss": 0.1745,
"step": 1580
},
{
"epoch": 4.608695652173913,
"grad_norm": 0.33027854561805725,
"learning_rate": 9.976307011031542e-05,
"loss": 0.1515,
"step": 1590
},
{
"epoch": 4.63768115942029,
"grad_norm": 0.5191190838813782,
"learning_rate": 9.975496329109126e-05,
"loss": 0.1812,
"step": 1600
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.6009054183959961,
"learning_rate": 9.974672044369732e-05,
"loss": 0.154,
"step": 1610
},
{
"epoch": 4.695652173913043,
"grad_norm": 0.83514004945755,
"learning_rate": 9.97383415906693e-05,
"loss": 0.1915,
"step": 1620
},
{
"epoch": 4.72463768115942,
"grad_norm": 0.7153990864753723,
"learning_rate": 9.97298267549146e-05,
"loss": 0.151,
"step": 1630
},
{
"epoch": 4.753623188405797,
"grad_norm": 0.5760650634765625,
"learning_rate": 9.972117595971249e-05,
"loss": 0.1613,
"step": 1640
},
{
"epoch": 4.782608695652174,
"grad_norm": 0.46681898832321167,
"learning_rate": 9.971238922871391e-05,
"loss": 0.1547,
"step": 1650
},
{
"epoch": 4.811594202898551,
"grad_norm": 0.6712074875831604,
"learning_rate": 9.970346658594142e-05,
"loss": 0.1693,
"step": 1660
},
{
"epoch": 4.840579710144928,
"grad_norm": 0.41927066445350647,
"learning_rate": 9.969440805578923e-05,
"loss": 0.1537,
"step": 1670
},
{
"epoch": 4.869565217391305,
"grad_norm": 0.718482255935669,
"learning_rate": 9.968521366302298e-05,
"loss": 0.1503,
"step": 1680
},
{
"epoch": 4.898550724637682,
"grad_norm": 0.41100355982780457,
"learning_rate": 9.967588343277981e-05,
"loss": 0.131,
"step": 1690
},
{
"epoch": 4.927536231884058,
"grad_norm": 0.6164652705192566,
"learning_rate": 9.966641739056818e-05,
"loss": 0.1633,
"step": 1700
},
{
"epoch": 4.956521739130435,
"grad_norm": 0.6644942760467529,
"learning_rate": 9.965681556226793e-05,
"loss": 0.1686,
"step": 1710
},
{
"epoch": 4.9855072463768115,
"grad_norm": 0.6024698615074158,
"learning_rate": 9.964707797413006e-05,
"loss": 0.1629,
"step": 1720
},
{
"epoch": 5.0144927536231885,
"grad_norm": 0.37680429220199585,
"learning_rate": 9.963720465277679e-05,
"loss": 0.1634,
"step": 1730
},
{
"epoch": 5.043478260869565,
"grad_norm": 0.6451659798622131,
"learning_rate": 9.96271956252014e-05,
"loss": 0.1613,
"step": 1740
},
{
"epoch": 5.072463768115942,
"grad_norm": 0.28793832659721375,
"learning_rate": 9.961705091876816e-05,
"loss": 0.1589,
"step": 1750
},
{
"epoch": 5.101449275362318,
"grad_norm": 0.59237140417099,
"learning_rate": 9.960677056121235e-05,
"loss": 0.1607,
"step": 1760
},
{
"epoch": 5.130434782608695,
"grad_norm": 0.47422319650650024,
"learning_rate": 9.959635458064005e-05,
"loss": 0.1916,
"step": 1770
},
{
"epoch": 5.159420289855072,
"grad_norm": 0.681136965751648,
"learning_rate": 9.958580300552815e-05,
"loss": 0.1624,
"step": 1780
},
{
"epoch": 5.188405797101449,
"grad_norm": 0.6878365874290466,
"learning_rate": 9.957511586472426e-05,
"loss": 0.1762,
"step": 1790
},
{
"epoch": 5.217391304347826,
"grad_norm": 0.5597853064537048,
"learning_rate": 9.956429318744662e-05,
"loss": 0.1648,
"step": 1800
},
{
"epoch": 5.246376811594203,
"grad_norm": 0.5032410621643066,
"learning_rate": 9.955333500328404e-05,
"loss": 0.1482,
"step": 1810
},
{
"epoch": 5.27536231884058,
"grad_norm": 0.6717603802680969,
"learning_rate": 9.95422413421957e-05,
"loss": 0.1815,
"step": 1820
},
{
"epoch": 5.304347826086957,
"grad_norm": 0.5992377400398254,
"learning_rate": 9.953101223451133e-05,
"loss": 0.1551,
"step": 1830
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.35043808817863464,
"learning_rate": 9.951964771093085e-05,
"loss": 0.1493,
"step": 1840
},
{
"epoch": 5.36231884057971,
"grad_norm": 0.49411511421203613,
"learning_rate": 9.950814780252442e-05,
"loss": 0.1561,
"step": 1850
},
{
"epoch": 5.391304347826087,
"grad_norm": 0.5951570868492126,
"learning_rate": 9.949651254073236e-05,
"loss": 0.1675,
"step": 1860
},
{
"epoch": 5.420289855072464,
"grad_norm": 0.6489980220794678,
"learning_rate": 9.948474195736504e-05,
"loss": 0.1579,
"step": 1870
},
{
"epoch": 5.449275362318841,
"grad_norm": 0.5115748047828674,
"learning_rate": 9.947283608460277e-05,
"loss": 0.1999,
"step": 1880
},
{
"epoch": 5.478260869565218,
"grad_norm": 0.4821164906024933,
"learning_rate": 9.946079495499577e-05,
"loss": 0.1695,
"step": 1890
},
{
"epoch": 5.507246376811594,
"grad_norm": 0.40529024600982666,
"learning_rate": 9.944861860146401e-05,
"loss": 0.1764,
"step": 1900
},
{
"epoch": 5.536231884057971,
"grad_norm": 0.46906864643096924,
"learning_rate": 9.943630705729719e-05,
"loss": 0.1572,
"step": 1910
},
{
"epoch": 5.565217391304348,
"grad_norm": 0.34866201877593994,
"learning_rate": 9.942386035615459e-05,
"loss": 0.1155,
"step": 1920
},
{
"epoch": 5.594202898550725,
"grad_norm": 0.6494722962379456,
"learning_rate": 9.941127853206503e-05,
"loss": 0.1588,
"step": 1930
},
{
"epoch": 5.6231884057971016,
"grad_norm": 0.4848741292953491,
"learning_rate": 9.939856161942673e-05,
"loss": 0.1489,
"step": 1940
},
{
"epoch": 5.6521739130434785,
"grad_norm": 0.5746407508850098,
"learning_rate": 9.938570965300724e-05,
"loss": 0.1503,
"step": 1950
},
{
"epoch": 5.681159420289855,
"grad_norm": 0.6178921461105347,
"learning_rate": 9.937272266794335e-05,
"loss": 0.1297,
"step": 1960
},
{
"epoch": 5.710144927536232,
"grad_norm": 0.48752641677856445,
"learning_rate": 9.935960069974096e-05,
"loss": 0.1125,
"step": 1970
},
{
"epoch": 5.739130434782608,
"grad_norm": 0.4455469846725464,
"learning_rate": 9.934634378427506e-05,
"loss": 0.1523,
"step": 1980
},
{
"epoch": 5.768115942028985,
"grad_norm": 0.8876426219940186,
"learning_rate": 9.933295195778954e-05,
"loss": 0.1284,
"step": 1990
},
{
"epoch": 5.797101449275362,
"grad_norm": 0.5639053583145142,
"learning_rate": 9.931942525689715e-05,
"loss": 0.1557,
"step": 2000
},
{
"epoch": 5.826086956521739,
"grad_norm": 0.5348621606826782,
"learning_rate": 9.930576371857936e-05,
"loss": 0.1416,
"step": 2010
},
{
"epoch": 5.855072463768116,
"grad_norm": 0.4637743830680847,
"learning_rate": 9.929196738018629e-05,
"loss": 0.1387,
"step": 2020
},
{
"epoch": 5.884057971014493,
"grad_norm": 0.7224751114845276,
"learning_rate": 9.927803627943662e-05,
"loss": 0.1483,
"step": 2030
},
{
"epoch": 5.913043478260869,
"grad_norm": 0.4575344920158386,
"learning_rate": 9.926397045441744e-05,
"loss": 0.1525,
"step": 2040
},
{
"epoch": 5.942028985507246,
"grad_norm": 0.4177353084087372,
"learning_rate": 9.924976994358417e-05,
"loss": 0.137,
"step": 2050
},
{
"epoch": 5.971014492753623,
"grad_norm": 0.5887998938560486,
"learning_rate": 9.923543478576048e-05,
"loss": 0.1799,
"step": 2060
},
{
"epoch": 6.0,
"grad_norm": 0.6577372550964355,
"learning_rate": 9.922096502013813e-05,
"loss": 0.1675,
"step": 2070
},
{
"epoch": 6.028985507246377,
"grad_norm": 0.6861566305160522,
"learning_rate": 9.92063606862769e-05,
"loss": 0.143,
"step": 2080
},
{
"epoch": 6.057971014492754,
"grad_norm": 0.5720553994178772,
"learning_rate": 9.919162182410453e-05,
"loss": 0.1264,
"step": 2090
},
{
"epoch": 6.086956521739131,
"grad_norm": 0.6558146476745605,
"learning_rate": 9.917674847391645e-05,
"loss": 0.1398,
"step": 2100
},
{
"epoch": 6.115942028985507,
"grad_norm": 0.4062115252017975,
"learning_rate": 9.916174067637584e-05,
"loss": 0.1402,
"step": 2110
},
{
"epoch": 6.144927536231884,
"grad_norm": 0.5962466597557068,
"learning_rate": 9.914659847251348e-05,
"loss": 0.1459,
"step": 2120
},
{
"epoch": 6.173913043478261,
"grad_norm": 0.5116047263145447,
"learning_rate": 9.913132190372753e-05,
"loss": 0.1502,
"step": 2130
},
{
"epoch": 6.202898550724638,
"grad_norm": 0.6019411683082581,
"learning_rate": 9.911591101178359e-05,
"loss": 0.1373,
"step": 2140
},
{
"epoch": 6.231884057971015,
"grad_norm": 0.7383087873458862,
"learning_rate": 9.910036583881443e-05,
"loss": 0.1614,
"step": 2150
},
{
"epoch": 6.260869565217392,
"grad_norm": 0.6318684816360474,
"learning_rate": 9.908468642731995e-05,
"loss": 0.1571,
"step": 2160
},
{
"epoch": 6.2898550724637685,
"grad_norm": 0.4686439633369446,
"learning_rate": 9.906887282016707e-05,
"loss": 0.1431,
"step": 2170
},
{
"epoch": 6.318840579710145,
"grad_norm": 0.5213261842727661,
"learning_rate": 9.90529250605896e-05,
"loss": 0.1661,
"step": 2180
},
{
"epoch": 6.3478260869565215,
"grad_norm": 0.5317389369010925,
"learning_rate": 9.903684319218809e-05,
"loss": 0.1251,
"step": 2190
},
{
"epoch": 6.3768115942028984,
"grad_norm": 0.4725372791290283,
"learning_rate": 9.902062725892976e-05,
"loss": 0.1367,
"step": 2200
},
{
"epoch": 6.405797101449275,
"grad_norm": 0.5488022565841675,
"learning_rate": 9.900427730514834e-05,
"loss": 0.1295,
"step": 2210
},
{
"epoch": 6.434782608695652,
"grad_norm": 0.402173787355423,
"learning_rate": 9.8987793375544e-05,
"loss": 0.1478,
"step": 2220
},
{
"epoch": 6.463768115942029,
"grad_norm": 0.6250830292701721,
"learning_rate": 9.897117551518318e-05,
"loss": 0.1516,
"step": 2230
},
{
"epoch": 6.492753623188406,
"grad_norm": 0.4163563549518585,
"learning_rate": 9.895442376949844e-05,
"loss": 0.1209,
"step": 2240
},
{
"epoch": 6.521739130434782,
"grad_norm": 0.709176778793335,
"learning_rate": 9.893753818428845e-05,
"loss": 0.1412,
"step": 2250
},
{
"epoch": 6.550724637681159,
"grad_norm": 0.526637077331543,
"learning_rate": 9.892051880571773e-05,
"loss": 0.1622,
"step": 2260
},
{
"epoch": 6.579710144927536,
"grad_norm": 0.5909827351570129,
"learning_rate": 9.890336568031663e-05,
"loss": 0.156,
"step": 2270
},
{
"epoch": 6.608695652173913,
"grad_norm": 0.6670017838478088,
"learning_rate": 9.888607885498113e-05,
"loss": 0.1487,
"step": 2280
},
{
"epoch": 6.63768115942029,
"grad_norm": 0.6181092858314514,
"learning_rate": 9.886865837697275e-05,
"loss": 0.151,
"step": 2290
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.4304220378398895,
"learning_rate": 9.88511042939184e-05,
"loss": 0.1463,
"step": 2300
},
{
"epoch": 6.695652173913043,
"grad_norm": 0.40652596950531006,
"learning_rate": 9.883341665381028e-05,
"loss": 0.1495,
"step": 2310
},
{
"epoch": 6.72463768115942,
"grad_norm": 0.43385979533195496,
"learning_rate": 9.881559550500575e-05,
"loss": 0.1357,
"step": 2320
},
{
"epoch": 6.753623188405797,
"grad_norm": 0.4716493487358093,
"learning_rate": 9.879764089622712e-05,
"loss": 0.1589,
"step": 2330
},
{
"epoch": 6.782608695652174,
"grad_norm": 0.4198303520679474,
"learning_rate": 9.87795528765616e-05,
"loss": 0.1314,
"step": 2340
},
{
"epoch": 6.811594202898551,
"grad_norm": 0.5235840082168579,
"learning_rate": 9.876133149546118e-05,
"loss": 0.1525,
"step": 2350
},
{
"epoch": 6.840579710144928,
"grad_norm": 0.3913216292858124,
"learning_rate": 9.874297680274238e-05,
"loss": 0.1571,
"step": 2360
},
{
"epoch": 6.869565217391305,
"grad_norm": 0.38975727558135986,
"learning_rate": 9.872448884858624e-05,
"loss": 0.1561,
"step": 2370
},
{
"epoch": 6.898550724637682,
"grad_norm": 0.2768588662147522,
"learning_rate": 9.870586768353815e-05,
"loss": 0.1152,
"step": 2380
},
{
"epoch": 6.927536231884058,
"grad_norm": 0.48241758346557617,
"learning_rate": 9.868711335850764e-05,
"loss": 0.1588,
"step": 2390
},
{
"epoch": 6.956521739130435,
"grad_norm": 0.4768286347389221,
"learning_rate": 9.866822592476833e-05,
"loss": 0.1518,
"step": 2400
},
{
"epoch": 6.9855072463768115,
"grad_norm": 0.5642341375350952,
"learning_rate": 9.86492054339577e-05,
"loss": 0.1345,
"step": 2410
},
{
"epoch": 7.0144927536231885,
"grad_norm": 0.4740188717842102,
"learning_rate": 9.863005193807711e-05,
"loss": 0.1148,
"step": 2420
},
{
"epoch": 7.043478260869565,
"grad_norm": 0.3090324103832245,
"learning_rate": 9.861076548949143e-05,
"loss": 0.1197,
"step": 2430
},
{
"epoch": 7.072463768115942,
"grad_norm": 0.4523588716983795,
"learning_rate": 9.859134614092912e-05,
"loss": 0.1443,
"step": 2440
},
{
"epoch": 7.101449275362318,
"grad_norm": 0.539725124835968,
"learning_rate": 9.857179394548191e-05,
"loss": 0.1371,
"step": 2450
},
{
"epoch": 7.130434782608695,
"grad_norm": 0.5571834444999695,
"learning_rate": 9.855210895660477e-05,
"loss": 0.1456,
"step": 2460
},
{
"epoch": 7.159420289855072,
"grad_norm": 0.4227403402328491,
"learning_rate": 9.853229122811568e-05,
"loss": 0.1377,
"step": 2470
},
{
"epoch": 7.188405797101449,
"grad_norm": 0.4217086434364319,
"learning_rate": 9.851234081419559e-05,
"loss": 0.1331,
"step": 2480
},
{
"epoch": 7.217391304347826,
"grad_norm": 0.47015127539634705,
"learning_rate": 9.849225776938814e-05,
"loss": 0.1382,
"step": 2490
},
{
"epoch": 7.246376811594203,
"grad_norm": 0.6300743818283081,
"learning_rate": 9.847204214859964e-05,
"loss": 0.1437,
"step": 2500
},
{
"epoch": 7.27536231884058,
"grad_norm": 0.49502405524253845,
"learning_rate": 9.845169400709879e-05,
"loss": 0.1415,
"step": 2510
},
{
"epoch": 7.304347826086957,
"grad_norm": 0.5468514561653137,
"learning_rate": 9.843121340051664e-05,
"loss": 0.1363,
"step": 2520
},
{
"epoch": 7.333333333333333,
"grad_norm": 0.5560225248336792,
"learning_rate": 9.841060038484641e-05,
"loss": 0.14,
"step": 2530
},
{
"epoch": 7.36231884057971,
"grad_norm": 0.6520473957061768,
"learning_rate": 9.838985501644328e-05,
"loss": 0.1538,
"step": 2540
},
{
"epoch": 7.391304347826087,
"grad_norm": 0.71478271484375,
"learning_rate": 9.83689773520243e-05,
"loss": 0.1521,
"step": 2550
},
{
"epoch": 7.420289855072464,
"grad_norm": 0.41255566477775574,
"learning_rate": 9.834796744866819e-05,
"loss": 0.1469,
"step": 2560
},
{
"epoch": 7.449275362318841,
"grad_norm": 0.41565924882888794,
"learning_rate": 9.832682536381525e-05,
"loss": 0.1522,
"step": 2570
},
{
"epoch": 7.478260869565218,
"grad_norm": 0.6504526138305664,
"learning_rate": 9.830555115526711e-05,
"loss": 0.1318,
"step": 2580
},
{
"epoch": 7.507246376811594,
"grad_norm": 0.3729122281074524,
"learning_rate": 9.828414488118667e-05,
"loss": 0.108,
"step": 2590
},
{
"epoch": 7.536231884057971,
"grad_norm": 0.6625639796257019,
"learning_rate": 9.826260660009785e-05,
"loss": 0.1773,
"step": 2600
},
{
"epoch": 7.565217391304348,
"grad_norm": 1.0479519367218018,
"learning_rate": 9.824093637088547e-05,
"loss": 0.1384,
"step": 2610
},
{
"epoch": 7.594202898550725,
"grad_norm": 0.4728688597679138,
"learning_rate": 9.821913425279514e-05,
"loss": 0.144,
"step": 2620
},
{
"epoch": 7.6231884057971016,
"grad_norm": 0.5890956521034241,
"learning_rate": 9.8197200305433e-05,
"loss": 0.1556,
"step": 2630
},
{
"epoch": 7.6521739130434785,
"grad_norm": 0.5349107384681702,
"learning_rate": 9.817513458876564e-05,
"loss": 0.1333,
"step": 2640
},
{
"epoch": 7.681159420289855,
"grad_norm": 0.3802502155303955,
"learning_rate": 9.815293716311987e-05,
"loss": 0.1366,
"step": 2650
},
{
"epoch": 7.710144927536232,
"grad_norm": 0.539300262928009,
"learning_rate": 9.813060808918262e-05,
"loss": 0.1531,
"step": 2660
},
{
"epoch": 7.739130434782608,
"grad_norm": 0.45709091424942017,
"learning_rate": 9.810814742800069e-05,
"loss": 0.1543,
"step": 2670
},
{
"epoch": 7.768115942028985,
"grad_norm": 0.44815441966056824,
"learning_rate": 9.808555524098074e-05,
"loss": 0.1281,
"step": 2680
},
{
"epoch": 7.797101449275362,
"grad_norm": 0.45325276255607605,
"learning_rate": 9.806283158988887e-05,
"loss": 0.136,
"step": 2690
},
{
"epoch": 7.826086956521739,
"grad_norm": 0.41119185090065,
"learning_rate": 9.803997653685072e-05,
"loss": 0.1382,
"step": 2700
},
{
"epoch": 7.855072463768116,
"grad_norm": 0.5879584550857544,
"learning_rate": 9.801699014435112e-05,
"loss": 0.1433,
"step": 2710
},
{
"epoch": 7.884057971014493,
"grad_norm": 0.3625235855579376,
"learning_rate": 9.799387247523398e-05,
"loss": 0.127,
"step": 2720
},
{
"epoch": 7.913043478260869,
"grad_norm": 0.6583592891693115,
"learning_rate": 9.797062359270215e-05,
"loss": 0.16,
"step": 2730
},
{
"epoch": 7.942028985507246,
"grad_norm": 0.3526526689529419,
"learning_rate": 9.794724356031715e-05,
"loss": 0.1129,
"step": 2740
},
{
"epoch": 7.971014492753623,
"grad_norm": 0.4039490818977356,
"learning_rate": 9.792373244199913e-05,
"loss": 0.145,
"step": 2750
},
{
"epoch": 8.0,
"grad_norm": 0.9839149117469788,
"learning_rate": 9.790009030202658e-05,
"loss": 0.1548,
"step": 2760
},
{
"epoch": 8.028985507246377,
"grad_norm": 0.5473302602767944,
"learning_rate": 9.78763172050362e-05,
"loss": 0.1357,
"step": 2770
},
{
"epoch": 8.057971014492754,
"grad_norm": 0.4842037260532379,
"learning_rate": 9.785241321602274e-05,
"loss": 0.1599,
"step": 2780
},
{
"epoch": 8.08695652173913,
"grad_norm": 0.6084038615226746,
"learning_rate": 9.782837840033879e-05,
"loss": 0.1236,
"step": 2790
},
{
"epoch": 8.115942028985508,
"grad_norm": 0.5223290324211121,
"learning_rate": 9.780421282369461e-05,
"loss": 0.1185,
"step": 2800
},
{
"epoch": 8.144927536231885,
"grad_norm": 0.49084579944610596,
"learning_rate": 9.777991655215797e-05,
"loss": 0.1335,
"step": 2810
},
{
"epoch": 8.173913043478262,
"grad_norm": 0.5133453607559204,
"learning_rate": 9.775548965215394e-05,
"loss": 0.143,
"step": 2820
},
{
"epoch": 8.202898550724637,
"grad_norm": 0.5703955292701721,
"learning_rate": 9.773093219046474e-05,
"loss": 0.1714,
"step": 2830
},
{
"epoch": 8.231884057971014,
"grad_norm": 0.3753199279308319,
"learning_rate": 9.770624423422954e-05,
"loss": 0.1514,
"step": 2840
},
{
"epoch": 8.26086956521739,
"grad_norm": 0.3518688678741455,
"learning_rate": 9.768142585094426e-05,
"loss": 0.1448,
"step": 2850
},
{
"epoch": 8.289855072463768,
"grad_norm": 0.5194658041000366,
"learning_rate": 9.765647710846142e-05,
"loss": 0.1319,
"step": 2860
},
{
"epoch": 8.318840579710145,
"grad_norm": 0.4543875455856323,
"learning_rate": 9.763139807498991e-05,
"loss": 0.1525,
"step": 2870
},
{
"epoch": 8.347826086956522,
"grad_norm": 0.5964239239692688,
"learning_rate": 9.760618881909487e-05,
"loss": 0.1428,
"step": 2880
},
{
"epoch": 8.376811594202898,
"grad_norm": 0.31862547993659973,
"learning_rate": 9.758084940969744e-05,
"loss": 0.1424,
"step": 2890
},
{
"epoch": 8.405797101449275,
"grad_norm": 0.5183411836624146,
"learning_rate": 9.755537991607459e-05,
"loss": 0.1235,
"step": 2900
},
{
"epoch": 8.434782608695652,
"grad_norm": 0.5497164130210876,
"learning_rate": 9.752978040785895e-05,
"loss": 0.1226,
"step": 2910
},
{
"epoch": 8.46376811594203,
"grad_norm": 0.5015374422073364,
"learning_rate": 9.750405095503859e-05,
"loss": 0.126,
"step": 2920
},
{
"epoch": 8.492753623188406,
"grad_norm": 0.3834163546562195,
"learning_rate": 9.747819162795686e-05,
"loss": 0.1299,
"step": 2930
},
{
"epoch": 8.521739130434783,
"grad_norm": 0.4107052981853485,
"learning_rate": 9.745220249731217e-05,
"loss": 0.1399,
"step": 2940
},
{
"epoch": 8.55072463768116,
"grad_norm": 0.6754370331764221,
"learning_rate": 9.742608363415781e-05,
"loss": 0.1369,
"step": 2950
},
{
"epoch": 8.579710144927537,
"grad_norm": 0.38062620162963867,
"learning_rate": 9.739983510990176e-05,
"loss": 0.1303,
"step": 2960
},
{
"epoch": 8.608695652173914,
"grad_norm": 0.5319868326187134,
"learning_rate": 9.737345699630647e-05,
"loss": 0.1393,
"step": 2970
},
{
"epoch": 8.63768115942029,
"grad_norm": 0.28532159328460693,
"learning_rate": 9.734694936548869e-05,
"loss": 0.1368,
"step": 2980
},
{
"epoch": 8.666666666666666,
"grad_norm": 0.6283175945281982,
"learning_rate": 9.732031228991932e-05,
"loss": 0.137,
"step": 2990
},
{
"epoch": 8.695652173913043,
"grad_norm": 0.4746125042438507,
"learning_rate": 9.729354584242302e-05,
"loss": 0.1409,
"step": 3000
},
{
"epoch": 8.72463768115942,
"grad_norm": 0.6005597114562988,
"learning_rate": 9.726665009617832e-05,
"loss": 0.1407,
"step": 3010
},
{
"epoch": 8.753623188405797,
"grad_norm": 0.4808926284313202,
"learning_rate": 9.723962512471714e-05,
"loss": 0.1552,
"step": 3020
},
{
"epoch": 8.782608695652174,
"grad_norm": 0.5887641310691833,
"learning_rate": 9.72124710019247e-05,
"loss": 0.1336,
"step": 3030
},
{
"epoch": 8.81159420289855,
"grad_norm": 0.34358280897140503,
"learning_rate": 9.718518780203934e-05,
"loss": 0.1367,
"step": 3040
},
{
"epoch": 8.840579710144928,
"grad_norm": 0.4416921138763428,
"learning_rate": 9.715777559965228e-05,
"loss": 0.1232,
"step": 3050
},
{
"epoch": 8.869565217391305,
"grad_norm": 0.6384701132774353,
"learning_rate": 9.713023446970746e-05,
"loss": 0.1429,
"step": 3060
},
{
"epoch": 8.898550724637682,
"grad_norm": 0.5382649302482605,
"learning_rate": 9.710256448750126e-05,
"loss": 0.1606,
"step": 3070
},
{
"epoch": 8.927536231884059,
"grad_norm": 0.3950713276863098,
"learning_rate": 9.707476572868235e-05,
"loss": 0.131,
"step": 3080
},
{
"epoch": 8.956521739130435,
"grad_norm": 0.38749822974205017,
"learning_rate": 9.704683826925149e-05,
"loss": 0.1158,
"step": 3090
},
{
"epoch": 8.985507246376812,
"grad_norm": 0.4517150819301605,
"learning_rate": 9.701878218556129e-05,
"loss": 0.166,
"step": 3100
},
{
"epoch": 9.014492753623188,
"grad_norm": 0.47911375761032104,
"learning_rate": 9.699059755431598e-05,
"loss": 0.1177,
"step": 3110
},
{
"epoch": 9.043478260869565,
"grad_norm": 0.2541674077510834,
"learning_rate": 9.696228445257132e-05,
"loss": 0.1254,
"step": 3120
},
{
"epoch": 9.072463768115941,
"grad_norm": 0.498009592294693,
"learning_rate": 9.693384295773419e-05,
"loss": 0.1603,
"step": 3130
},
{
"epoch": 9.101449275362318,
"grad_norm": 0.443220317363739,
"learning_rate": 9.690527314756259e-05,
"loss": 0.1382,
"step": 3140
},
{
"epoch": 9.130434782608695,
"grad_norm": 0.32711514830589294,
"learning_rate": 9.687657510016527e-05,
"loss": 0.1351,
"step": 3150
},
{
"epoch": 9.159420289855072,
"grad_norm": 0.4041106402873993,
"learning_rate": 9.684774889400161e-05,
"loss": 0.132,
"step": 3160
},
{
"epoch": 9.18840579710145,
"grad_norm": 0.3735228180885315,
"learning_rate": 9.681879460788135e-05,
"loss": 0.1204,
"step": 3170
},
{
"epoch": 9.217391304347826,
"grad_norm": 0.4736388921737671,
"learning_rate": 9.67897123209644e-05,
"loss": 0.1156,
"step": 3180
},
{
"epoch": 9.246376811594203,
"grad_norm": 0.39969536662101746,
"learning_rate": 9.676050211276062e-05,
"loss": 0.1488,
"step": 3190
},
{
"epoch": 9.27536231884058,
"grad_norm": 0.5019108057022095,
"learning_rate": 9.673116406312962e-05,
"loss": 0.1351,
"step": 3200
},
{
"epoch": 9.304347826086957,
"grad_norm": 0.45118093490600586,
"learning_rate": 9.67016982522805e-05,
"loss": 0.1263,
"step": 3210
},
{
"epoch": 9.333333333333334,
"grad_norm": 0.5472857356071472,
"learning_rate": 9.667210476077164e-05,
"loss": 0.1648,
"step": 3220
},
{
"epoch": 9.36231884057971,
"grad_norm": 0.32493582367897034,
"learning_rate": 9.664238366951055e-05,
"loss": 0.1309,
"step": 3230
},
{
"epoch": 9.391304347826088,
"grad_norm": 0.7096918821334839,
"learning_rate": 9.661253505975355e-05,
"loss": 0.1383,
"step": 3240
},
{
"epoch": 9.420289855072463,
"grad_norm": 0.5345839858055115,
"learning_rate": 9.658255901310557e-05,
"loss": 0.1198,
"step": 3250
},
{
"epoch": 9.44927536231884,
"grad_norm": 0.5087151527404785,
"learning_rate": 9.655245561152e-05,
"loss": 0.1199,
"step": 3260
},
{
"epoch": 9.478260869565217,
"grad_norm": 0.2939687967300415,
"learning_rate": 9.65222249372984e-05,
"loss": 0.1342,
"step": 3270
},
{
"epoch": 9.507246376811594,
"grad_norm": 0.3696477711200714,
"learning_rate": 9.649186707309026e-05,
"loss": 0.1361,
"step": 3280
},
{
"epoch": 9.53623188405797,
"grad_norm": 0.4263698160648346,
"learning_rate": 9.646138210189283e-05,
"loss": 0.1453,
"step": 3290
},
{
"epoch": 9.565217391304348,
"grad_norm": 0.40898415446281433,
"learning_rate": 9.643077010705087e-05,
"loss": 0.112,
"step": 3300
},
{
"epoch": 9.594202898550725,
"grad_norm": 0.37168997526168823,
"learning_rate": 9.640003117225637e-05,
"loss": 0.1338,
"step": 3310
},
{
"epoch": 9.623188405797102,
"grad_norm": 0.4604577124118805,
"learning_rate": 9.636916538154846e-05,
"loss": 0.1511,
"step": 3320
},
{
"epoch": 9.652173913043478,
"grad_norm": 0.5092346668243408,
"learning_rate": 9.633817281931296e-05,
"loss": 0.1197,
"step": 3330
},
{
"epoch": 9.681159420289855,
"grad_norm": 0.43370747566223145,
"learning_rate": 9.630705357028242e-05,
"loss": 0.144,
"step": 3340
},
{
"epoch": 9.710144927536232,
"grad_norm": 0.4658154249191284,
"learning_rate": 9.627580771953563e-05,
"loss": 0.1453,
"step": 3350
},
{
"epoch": 9.73913043478261,
"grad_norm": 0.4420405924320221,
"learning_rate": 9.624443535249759e-05,
"loss": 0.1331,
"step": 3360
},
{
"epoch": 9.768115942028986,
"grad_norm": 0.4711594879627228,
"learning_rate": 9.621293655493913e-05,
"loss": 0.1204,
"step": 3370
},
{
"epoch": 9.797101449275363,
"grad_norm": 0.2817968428134918,
"learning_rate": 9.618131141297675e-05,
"loss": 0.1309,
"step": 3380
},
{
"epoch": 9.826086956521738,
"grad_norm": 0.3537946343421936,
"learning_rate": 9.614956001307242e-05,
"loss": 0.1464,
"step": 3390
},
{
"epoch": 9.855072463768115,
"grad_norm": 0.30007612705230713,
"learning_rate": 9.611768244203321e-05,
"loss": 0.1186,
"step": 3400
},
{
"epoch": 9.884057971014492,
"grad_norm": 0.41064971685409546,
"learning_rate": 9.60856787870112e-05,
"loss": 0.1263,
"step": 3410
},
{
"epoch": 9.91304347826087,
"grad_norm": 0.4655996263027191,
"learning_rate": 9.605354913550318e-05,
"loss": 0.1514,
"step": 3420
},
{
"epoch": 9.942028985507246,
"grad_norm": 0.5630468726158142,
"learning_rate": 9.602129357535037e-05,
"loss": 0.1315,
"step": 3430
},
{
"epoch": 9.971014492753623,
"grad_norm": 0.7113257646560669,
"learning_rate": 9.598891219473825e-05,
"loss": 0.1179,
"step": 3440
},
{
"epoch": 10.0,
"grad_norm": 0.7581853866577148,
"learning_rate": 9.595640508219625e-05,
"loss": 0.1434,
"step": 3450
},
{
"epoch": 10.028985507246377,
"grad_norm": 0.6476505994796753,
"learning_rate": 9.592377232659761e-05,
"loss": 0.1276,
"step": 3460
},
{
"epoch": 10.057971014492754,
"grad_norm": 0.4075034260749817,
"learning_rate": 9.589101401715904e-05,
"loss": 0.142,
"step": 3470
},
{
"epoch": 10.08695652173913,
"grad_norm": 0.7294759154319763,
"learning_rate": 9.585813024344045e-05,
"loss": 0.1464,
"step": 3480
},
{
"epoch": 10.115942028985508,
"grad_norm": 0.3397752642631531,
"learning_rate": 9.58251210953449e-05,
"loss": 0.1374,
"step": 3490
},
{
"epoch": 10.144927536231885,
"grad_norm": 0.4181293547153473,
"learning_rate": 9.579198666311809e-05,
"loss": 0.1442,
"step": 3500
},
{
"epoch": 10.173913043478262,
"grad_norm": 0.45683369040489197,
"learning_rate": 9.575872703734832e-05,
"loss": 0.142,
"step": 3510
},
{
"epoch": 10.202898550724637,
"grad_norm": 0.37618064880371094,
"learning_rate": 9.572534230896611e-05,
"loss": 0.1256,
"step": 3520
},
{
"epoch": 10.231884057971014,
"grad_norm": 0.581132709980011,
"learning_rate": 9.569183256924403e-05,
"loss": 0.1547,
"step": 3530
},
{
"epoch": 10.26086956521739,
"grad_norm": 0.35314807295799255,
"learning_rate": 9.565819790979646e-05,
"loss": 0.119,
"step": 3540
},
{
"epoch": 10.289855072463768,
"grad_norm": 0.43084269762039185,
"learning_rate": 9.562443842257925e-05,
"loss": 0.1155,
"step": 3550
},
{
"epoch": 10.318840579710145,
"grad_norm": 0.37022560834884644,
"learning_rate": 9.559055419988956e-05,
"loss": 0.1609,
"step": 3560
},
{
"epoch": 10.347826086956522,
"grad_norm": 0.2883586883544922,
"learning_rate": 9.555654533436557e-05,
"loss": 0.1052,
"step": 3570
},
{
"epoch": 10.376811594202898,
"grad_norm": 0.5148602724075317,
"learning_rate": 9.552241191898621e-05,
"loss": 0.1423,
"step": 3580
},
{
"epoch": 10.405797101449275,
"grad_norm": 0.4749770164489746,
"learning_rate": 9.548815404707092e-05,
"loss": 0.1194,
"step": 3590
},
{
"epoch": 10.434782608695652,
"grad_norm": 0.4021095335483551,
"learning_rate": 9.545377181227942e-05,
"loss": 0.124,
"step": 3600
},
{
"epoch": 10.46376811594203,
"grad_norm": 0.30841973423957825,
"learning_rate": 9.541926530861145e-05,
"loss": 0.1195,
"step": 3610
},
{
"epoch": 10.492753623188406,
"grad_norm": 0.3576466739177704,
"learning_rate": 9.538463463040645e-05,
"loss": 0.1169,
"step": 3620
},
{
"epoch": 10.521739130434783,
"grad_norm": 0.5112766027450562,
"learning_rate": 9.534987987234337e-05,
"loss": 0.1283,
"step": 3630
},
{
"epoch": 10.55072463768116,
"grad_norm": 0.27624791860580444,
"learning_rate": 9.53150011294404e-05,
"loss": 0.1042,
"step": 3640
},
{
"epoch": 10.579710144927537,
"grad_norm": 0.4616936445236206,
"learning_rate": 9.527999849705471e-05,
"loss": 0.1214,
"step": 3650
},
{
"epoch": 10.608695652173914,
"grad_norm": 0.2872353792190552,
"learning_rate": 9.524487207088213e-05,
"loss": 0.1272,
"step": 3660
},
{
"epoch": 10.63768115942029,
"grad_norm": 0.3924836218357086,
"learning_rate": 9.520962194695698e-05,
"loss": 0.1152,
"step": 3670
},
{
"epoch": 10.666666666666666,
"grad_norm": 0.5018351078033447,
"learning_rate": 9.517424822165175e-05,
"loss": 0.1425,
"step": 3680
},
{
"epoch": 10.695652173913043,
"grad_norm": 0.4114161431789398,
"learning_rate": 9.513875099167685e-05,
"loss": 0.1287,
"step": 3690
},
{
"epoch": 10.72463768115942,
"grad_norm": 0.8867626190185547,
"learning_rate": 9.510313035408035e-05,
"loss": 0.1243,
"step": 3700
},
{
"epoch": 10.753623188405797,
"grad_norm": 0.48639723658561707,
"learning_rate": 9.506738640624775e-05,
"loss": 0.1244,
"step": 3710
},
{
"epoch": 10.782608695652174,
"grad_norm": 0.5300337672233582,
"learning_rate": 9.50315192459016e-05,
"loss": 0.1339,
"step": 3720
},
{
"epoch": 10.81159420289855,
"grad_norm": 0.4277614951133728,
"learning_rate": 9.499552897110136e-05,
"loss": 0.148,
"step": 3730
},
{
"epoch": 10.840579710144928,
"grad_norm": 0.41532713174819946,
"learning_rate": 9.495941568024304e-05,
"loss": 0.1276,
"step": 3740
},
{
"epoch": 10.869565217391305,
"grad_norm": 0.37189435958862305,
"learning_rate": 9.492317947205904e-05,
"loss": 0.1215,
"step": 3750
},
{
"epoch": 10.898550724637682,
"grad_norm": 0.4247940182685852,
"learning_rate": 9.488682044561775e-05,
"loss": 0.1248,
"step": 3760
},
{
"epoch": 10.927536231884059,
"grad_norm": 0.4739855229854584,
"learning_rate": 9.485033870032335e-05,
"loss": 0.1156,
"step": 3770
},
{
"epoch": 10.956521739130435,
"grad_norm": 0.275510311126709,
"learning_rate": 9.481373433591556e-05,
"loss": 0.129,
"step": 3780
},
{
"epoch": 10.985507246376812,
"grad_norm": 0.4555635154247284,
"learning_rate": 9.47770074524693e-05,
"loss": 0.1377,
"step": 3790
},
{
"epoch": 11.014492753623188,
"grad_norm": 0.4588840901851654,
"learning_rate": 9.474015815039446e-05,
"loss": 0.1352,
"step": 3800
},
{
"epoch": 11.043478260869565,
"grad_norm": 0.27891016006469727,
"learning_rate": 9.470318653043565e-05,
"loss": 0.1242,
"step": 3810
},
{
"epoch": 11.072463768115941,
"grad_norm": 0.34980854392051697,
"learning_rate": 9.466609269367185e-05,
"loss": 0.1303,
"step": 3820
},
{
"epoch": 11.101449275362318,
"grad_norm": 0.4605090022087097,
"learning_rate": 9.46288767415162e-05,
"loss": 0.1186,
"step": 3830
},
{
"epoch": 11.130434782608695,
"grad_norm": 0.2761806845664978,
"learning_rate": 9.459153877571567e-05,
"loss": 0.1285,
"step": 3840
},
{
"epoch": 11.159420289855072,
"grad_norm": 0.4459534287452698,
"learning_rate": 9.455407889835087e-05,
"loss": 0.1129,
"step": 3850
},
{
"epoch": 11.18840579710145,
"grad_norm": 0.40482795238494873,
"learning_rate": 9.451649721183564e-05,
"loss": 0.1553,
"step": 3860
},
{
"epoch": 11.217391304347826,
"grad_norm": 0.596967875957489,
"learning_rate": 9.447879381891692e-05,
"loss": 0.1389,
"step": 3870
},
{
"epoch": 11.246376811594203,
"grad_norm": 0.4592018127441406,
"learning_rate": 9.444096882267428e-05,
"loss": 0.1375,
"step": 3880
},
{
"epoch": 11.27536231884058,
"grad_norm": 0.4663671851158142,
"learning_rate": 9.440302232651988e-05,
"loss": 0.1164,
"step": 3890
},
{
"epoch": 11.304347826086957,
"grad_norm": 0.42845603823661804,
"learning_rate": 9.436495443419795e-05,
"loss": 0.1206,
"step": 3900
},
{
"epoch": 11.333333333333334,
"grad_norm": 0.39661505818367004,
"learning_rate": 9.432676524978466e-05,
"loss": 0.1007,
"step": 3910
},
{
"epoch": 11.36231884057971,
"grad_norm": 0.3809431195259094,
"learning_rate": 9.42884548776878e-05,
"loss": 0.147,
"step": 3920
},
{
"epoch": 11.391304347826088,
"grad_norm": 0.3601577877998352,
"learning_rate": 9.425002342264646e-05,
"loss": 0.1223,
"step": 3930
},
{
"epoch": 11.420289855072463,
"grad_norm": 0.4095447063446045,
"learning_rate": 9.421147098973077e-05,
"loss": 0.1101,
"step": 3940
},
{
"epoch": 11.44927536231884,
"grad_norm": 0.43890243768692017,
"learning_rate": 9.41727976843416e-05,
"loss": 0.1257,
"step": 3950
},
{
"epoch": 11.478260869565217,
"grad_norm": 0.31772735714912415,
"learning_rate": 9.413400361221029e-05,
"loss": 0.1126,
"step": 3960
},
{
"epoch": 11.507246376811594,
"grad_norm": 0.3342031240463257,
"learning_rate": 9.409508887939835e-05,
"loss": 0.1275,
"step": 3970
},
{
"epoch": 11.53623188405797,
"grad_norm": 0.3726749122142792,
"learning_rate": 9.40560535922972e-05,
"loss": 0.1108,
"step": 3980
},
{
"epoch": 11.565217391304348,
"grad_norm": 0.4039180278778076,
"learning_rate": 9.40168978576278e-05,
"loss": 0.1288,
"step": 3990
},
{
"epoch": 11.594202898550725,
"grad_norm": 0.4435559809207916,
"learning_rate": 9.397762178244043e-05,
"loss": 0.1298,
"step": 4000
},
{
"epoch": 11.623188405797102,
"grad_norm": 0.48986756801605225,
"learning_rate": 9.393822547411439e-05,
"loss": 0.1584,
"step": 4010
},
{
"epoch": 11.652173913043478,
"grad_norm": 0.33243680000305176,
"learning_rate": 9.389870904035769e-05,
"loss": 0.1322,
"step": 4020
},
{
"epoch": 11.681159420289855,
"grad_norm": 0.27870336174964905,
"learning_rate": 9.385907258920672e-05,
"loss": 0.1187,
"step": 4030
},
{
"epoch": 11.710144927536232,
"grad_norm": 0.4363289773464203,
"learning_rate": 9.381931622902607e-05,
"loss": 0.1322,
"step": 4040
},
{
"epoch": 11.73913043478261,
"grad_norm": 0.39369621872901917,
"learning_rate": 9.377944006850807e-05,
"loss": 0.1221,
"step": 4050
},
{
"epoch": 11.768115942028986,
"grad_norm": 0.4057519733905792,
"learning_rate": 9.373944421667265e-05,
"loss": 0.1439,
"step": 4060
},
{
"epoch": 11.797101449275363,
"grad_norm": 0.4745919406414032,
"learning_rate": 9.369932878286691e-05,
"loss": 0.1367,
"step": 4070
},
{
"epoch": 11.826086956521738,
"grad_norm": 0.5527012944221497,
"learning_rate": 9.365909387676494e-05,
"loss": 0.1388,
"step": 4080
},
{
"epoch": 11.855072463768115,
"grad_norm": 0.4839910566806793,
"learning_rate": 9.361873960836744e-05,
"loss": 0.1204,
"step": 4090
},
{
"epoch": 11.884057971014492,
"grad_norm": 0.4102983772754669,
"learning_rate": 9.357826608800142e-05,
"loss": 0.1202,
"step": 4100
},
{
"epoch": 11.91304347826087,
"grad_norm": 0.382380872964859,
"learning_rate": 9.353767342631994e-05,
"loss": 0.1247,
"step": 4110
},
{
"epoch": 11.942028985507246,
"grad_norm": 0.384352445602417,
"learning_rate": 9.34969617343018e-05,
"loss": 0.1364,
"step": 4120
},
{
"epoch": 11.971014492753623,
"grad_norm": 0.46882691979408264,
"learning_rate": 9.345613112325122e-05,
"loss": 0.1298,
"step": 4130
},
{
"epoch": 12.0,
"grad_norm": 0.986838698387146,
"learning_rate": 9.34151817047975e-05,
"loss": 0.1259,
"step": 4140
},
{
"epoch": 12.028985507246377,
"grad_norm": 0.41458386182785034,
"learning_rate": 9.33741135908948e-05,
"loss": 0.1174,
"step": 4150
},
{
"epoch": 12.057971014492754,
"grad_norm": 0.5669786930084229,
"learning_rate": 9.33329268938218e-05,
"loss": 0.1255,
"step": 4160
},
{
"epoch": 12.08695652173913,
"grad_norm": 0.418151319026947,
"learning_rate": 9.329162172618132e-05,
"loss": 0.1223,
"step": 4170
},
{
"epoch": 12.115942028985508,
"grad_norm": 0.4376254677772522,
"learning_rate": 9.325019820090013e-05,
"loss": 0.1284,
"step": 4180
},
{
"epoch": 12.144927536231885,
"grad_norm": 0.5084844827651978,
"learning_rate": 9.320865643122855e-05,
"loss": 0.1225,
"step": 4190
},
{
"epoch": 12.173913043478262,
"grad_norm": 0.3965758979320526,
"learning_rate": 9.316699653074023e-05,
"loss": 0.1332,
"step": 4200
},
{
"epoch": 12.202898550724637,
"grad_norm": 0.39621663093566895,
"learning_rate": 9.312521861333172e-05,
"loss": 0.1141,
"step": 4210
},
{
"epoch": 12.231884057971014,
"grad_norm": 0.4206016957759857,
"learning_rate": 9.308332279322224e-05,
"loss": 0.1282,
"step": 4220
},
{
"epoch": 12.26086956521739,
"grad_norm": 0.34612345695495605,
"learning_rate": 9.304130918495338e-05,
"loss": 0.1067,
"step": 4230
},
{
"epoch": 12.289855072463768,
"grad_norm": 0.5288470387458801,
"learning_rate": 9.299917790338874e-05,
"loss": 0.1364,
"step": 4240
},
{
"epoch": 12.318840579710145,
"grad_norm": 0.49290069937705994,
"learning_rate": 9.295692906371363e-05,
"loss": 0.1348,
"step": 4250
},
{
"epoch": 12.347826086956522,
"grad_norm": 0.2043665647506714,
"learning_rate": 9.291456278143476e-05,
"loss": 0.1127,
"step": 4260
},
{
"epoch": 12.376811594202898,
"grad_norm": 0.6402058005332947,
"learning_rate": 9.287207917237994e-05,
"loss": 0.1295,
"step": 4270
},
{
"epoch": 12.405797101449275,
"grad_norm": 0.29695194959640503,
"learning_rate": 9.282947835269773e-05,
"loss": 0.102,
"step": 4280
},
{
"epoch": 12.434782608695652,
"grad_norm": 0.3424241244792938,
"learning_rate": 9.278676043885715e-05,
"loss": 0.1275,
"step": 4290
},
{
"epoch": 12.46376811594203,
"grad_norm": 0.4913289546966553,
"learning_rate": 9.274392554764733e-05,
"loss": 0.1413,
"step": 4300
},
{
"epoch": 12.492753623188406,
"grad_norm": 0.3618018627166748,
"learning_rate": 9.270097379617723e-05,
"loss": 0.1103,
"step": 4310
},
{
"epoch": 12.521739130434783,
"grad_norm": 0.42373889684677124,
"learning_rate": 9.26579053018753e-05,
"loss": 0.1198,
"step": 4320
},
{
"epoch": 12.55072463768116,
"grad_norm": 0.3397703170776367,
"learning_rate": 9.261472018248918e-05,
"loss": 0.1132,
"step": 4330
},
{
"epoch": 12.579710144927537,
"grad_norm": 0.4344271719455719,
"learning_rate": 9.25714185560853e-05,
"loss": 0.1173,
"step": 4340
},
{
"epoch": 12.608695652173914,
"grad_norm": 0.4063388705253601,
"learning_rate": 9.252800054104868e-05,
"loss": 0.108,
"step": 4350
},
{
"epoch": 12.63768115942029,
"grad_norm": 0.3664158880710602,
"learning_rate": 9.248446625608252e-05,
"loss": 0.1152,
"step": 4360
},
{
"epoch": 12.666666666666666,
"grad_norm": 0.35261791944503784,
"learning_rate": 9.244081582020789e-05,
"loss": 0.1277,
"step": 4370
},
{
"epoch": 12.695652173913043,
"grad_norm": 0.4147641062736511,
"learning_rate": 9.239704935276339e-05,
"loss": 0.1108,
"step": 4380
},
{
"epoch": 12.72463768115942,
"grad_norm": 0.5231832265853882,
"learning_rate": 9.235316697340489e-05,
"loss": 0.1287,
"step": 4390
},
{
"epoch": 12.753623188405797,
"grad_norm": 0.40551823377609253,
"learning_rate": 9.230916880210512e-05,
"loss": 0.1171,
"step": 4400
},
{
"epoch": 12.782608695652174,
"grad_norm": 0.27819085121154785,
"learning_rate": 9.226505495915342e-05,
"loss": 0.1384,
"step": 4410
},
{
"epoch": 12.81159420289855,
"grad_norm": 0.40564286708831787,
"learning_rate": 9.222082556515536e-05,
"loss": 0.1157,
"step": 4420
},
{
"epoch": 12.840579710144928,
"grad_norm": 0.4431588351726532,
"learning_rate": 9.217648074103242e-05,
"loss": 0.1224,
"step": 4430
},
{
"epoch": 12.869565217391305,
"grad_norm": 0.34970754384994507,
"learning_rate": 9.213202060802161e-05,
"loss": 0.1189,
"step": 4440
},
{
"epoch": 12.898550724637682,
"grad_norm": 0.29916661977767944,
"learning_rate": 9.208744528767528e-05,
"loss": 0.1139,
"step": 4450
},
{
"epoch": 12.927536231884059,
"grad_norm": 0.3757326304912567,
"learning_rate": 9.204275490186064e-05,
"loss": 0.1073,
"step": 4460
},
{
"epoch": 12.956521739130435,
"grad_norm": 0.43750470876693726,
"learning_rate": 9.199794957275949e-05,
"loss": 0.1354,
"step": 4470
},
{
"epoch": 12.985507246376812,
"grad_norm": 0.3462923467159271,
"learning_rate": 9.19530294228679e-05,
"loss": 0.109,
"step": 4480
},
{
"epoch": 13.014492753623188,
"grad_norm": 0.23552751541137695,
"learning_rate": 9.190799457499583e-05,
"loss": 0.1315,
"step": 4490
},
{
"epoch": 13.043478260869565,
"grad_norm": 0.44175973534584045,
"learning_rate": 9.186284515226686e-05,
"loss": 0.1313,
"step": 4500
},
{
"epoch": 13.072463768115941,
"grad_norm": 0.43847179412841797,
"learning_rate": 9.181758127811777e-05,
"loss": 0.1329,
"step": 4510
},
{
"epoch": 13.101449275362318,
"grad_norm": 0.31816014647483826,
"learning_rate": 9.177220307629825e-05,
"loss": 0.1265,
"step": 4520
},
{
"epoch": 13.130434782608695,
"grad_norm": 0.4455469846725464,
"learning_rate": 9.172671067087059e-05,
"loss": 0.1069,
"step": 4530
},
{
"epoch": 13.159420289855072,
"grad_norm": 0.2768830358982086,
"learning_rate": 9.16811041862093e-05,
"loss": 0.1166,
"step": 4540
},
{
"epoch": 13.18840579710145,
"grad_norm": 0.39586612582206726,
"learning_rate": 9.163538374700076e-05,
"loss": 0.1239,
"step": 4550
},
{
"epoch": 13.217391304347826,
"grad_norm": 0.6842658519744873,
"learning_rate": 9.158954947824287e-05,
"loss": 0.1196,
"step": 4560
},
{
"epoch": 13.246376811594203,
"grad_norm": 0.3051077127456665,
"learning_rate": 9.154360150524482e-05,
"loss": 0.1277,
"step": 4570
},
{
"epoch": 13.27536231884058,
"grad_norm": 0.32419049739837646,
"learning_rate": 9.14975399536266e-05,
"loss": 0.1328,
"step": 4580
},
{
"epoch": 13.304347826086957,
"grad_norm": 0.49009594321250916,
"learning_rate": 9.14513649493187e-05,
"loss": 0.11,
"step": 4590
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.41023188829421997,
"learning_rate": 9.140507661856187e-05,
"loss": 0.1204,
"step": 4600
},
{
"epoch": 13.36231884057971,
"grad_norm": 0.27681684494018555,
"learning_rate": 9.135867508790661e-05,
"loss": 0.127,
"step": 4610
},
{
"epoch": 13.391304347826088,
"grad_norm": 0.33429259061813354,
"learning_rate": 9.131216048421291e-05,
"loss": 0.1056,
"step": 4620
},
{
"epoch": 13.420289855072463,
"grad_norm": 0.3825032114982605,
"learning_rate": 9.126553293464998e-05,
"loss": 0.1296,
"step": 4630
},
{
"epoch": 13.44927536231884,
"grad_norm": 0.28926411271095276,
"learning_rate": 9.121879256669572e-05,
"loss": 0.1088,
"step": 4640
},
{
"epoch": 13.478260869565217,
"grad_norm": 0.24572978913784027,
"learning_rate": 9.117193950813652e-05,
"loss": 0.1068,
"step": 4650
},
{
"epoch": 13.507246376811594,
"grad_norm": 0.462626576423645,
"learning_rate": 9.112497388706685e-05,
"loss": 0.1119,
"step": 4660
},
{
"epoch": 13.53623188405797,
"grad_norm": 0.4677536189556122,
"learning_rate": 9.10778958318889e-05,
"loss": 0.1113,
"step": 4670
},
{
"epoch": 13.565217391304348,
"grad_norm": 0.3768196105957031,
"learning_rate": 9.103070547131232e-05,
"loss": 0.111,
"step": 4680
},
{
"epoch": 13.594202898550725,
"grad_norm": 0.28670257329940796,
"learning_rate": 9.098340293435375e-05,
"loss": 0.1007,
"step": 4690
},
{
"epoch": 13.623188405797102,
"grad_norm": 0.3326264023780823,
"learning_rate": 9.093598835033649e-05,
"loss": 0.1417,
"step": 4700
},
{
"epoch": 13.652173913043478,
"grad_norm": 0.4190509021282196,
"learning_rate": 9.088846184889021e-05,
"loss": 0.1094,
"step": 4710
},
{
"epoch": 13.681159420289855,
"grad_norm": 0.48827919363975525,
"learning_rate": 9.084082355995057e-05,
"loss": 0.1145,
"step": 4720
},
{
"epoch": 13.710144927536232,
"grad_norm": 0.42035019397735596,
"learning_rate": 9.079307361375882e-05,
"loss": 0.1408,
"step": 4730
},
{
"epoch": 13.73913043478261,
"grad_norm": 0.35590943694114685,
"learning_rate": 9.074521214086149e-05,
"loss": 0.1125,
"step": 4740
},
{
"epoch": 13.768115942028986,
"grad_norm": 0.3481467068195343,
"learning_rate": 9.069723927211001e-05,
"loss": 0.1306,
"step": 4750
},
{
"epoch": 13.797101449275363,
"grad_norm": 0.5402430891990662,
"learning_rate": 9.064915513866037e-05,
"loss": 0.131,
"step": 4760
},
{
"epoch": 13.826086956521738,
"grad_norm": 0.4278501272201538,
"learning_rate": 9.060095987197279e-05,
"loss": 0.1275,
"step": 4770
},
{
"epoch": 13.855072463768115,
"grad_norm": 0.27769970893859863,
"learning_rate": 9.055265360381126e-05,
"loss": 0.1186,
"step": 4780
},
{
"epoch": 13.884057971014492,
"grad_norm": 0.258645236492157,
"learning_rate": 9.050423646624326e-05,
"loss": 0.1288,
"step": 4790
},
{
"epoch": 13.91304347826087,
"grad_norm": 0.39688029885292053,
"learning_rate": 9.045570859163943e-05,
"loss": 0.1174,
"step": 4800
},
{
"epoch": 13.942028985507246,
"grad_norm": 0.4738856554031372,
"learning_rate": 9.04070701126731e-05,
"loss": 0.1179,
"step": 4810
},
{
"epoch": 13.971014492753623,
"grad_norm": 0.4535987675189972,
"learning_rate": 9.035832116232001e-05,
"loss": 0.123,
"step": 4820
},
{
"epoch": 14.0,
"grad_norm": 0.941590428352356,
"learning_rate": 9.030946187385796e-05,
"loss": 0.1209,
"step": 4830
},
{
"epoch": 14.028985507246377,
"grad_norm": 0.5170802474021912,
"learning_rate": 9.026049238086635e-05,
"loss": 0.1152,
"step": 4840
},
{
"epoch": 14.057971014492754,
"grad_norm": 0.2910565435886383,
"learning_rate": 9.021141281722591e-05,
"loss": 0.1163,
"step": 4850
},
{
"epoch": 14.08695652173913,
"grad_norm": 0.4192429482936859,
"learning_rate": 9.01622233171183e-05,
"loss": 0.0962,
"step": 4860
},
{
"epoch": 14.115942028985508,
"grad_norm": 0.3841983377933502,
"learning_rate": 9.011292401502574e-05,
"loss": 0.122,
"step": 4870
},
{
"epoch": 14.144927536231885,
"grad_norm": 0.42944851517677307,
"learning_rate": 9.006351504573063e-05,
"loss": 0.1251,
"step": 4880
},
{
"epoch": 14.173913043478262,
"grad_norm": 0.3717688322067261,
"learning_rate": 9.001399654431519e-05,
"loss": 0.1096,
"step": 4890
},
{
"epoch": 14.202898550724637,
"grad_norm": 0.37804022431373596,
"learning_rate": 8.996436864616116e-05,
"loss": 0.1209,
"step": 4900
},
{
"epoch": 14.231884057971014,
"grad_norm": 0.3129970133304596,
"learning_rate": 8.991463148694925e-05,
"loss": 0.1097,
"step": 4910
},
{
"epoch": 14.26086956521739,
"grad_norm": 0.40859848260879517,
"learning_rate": 8.986478520265902e-05,
"loss": 0.1214,
"step": 4920
},
{
"epoch": 14.289855072463768,
"grad_norm": 0.3709128797054291,
"learning_rate": 8.981482992956827e-05,
"loss": 0.1264,
"step": 4930
},
{
"epoch": 14.318840579710145,
"grad_norm": 0.3855811059474945,
"learning_rate": 8.976476580425282e-05,
"loss": 0.1113,
"step": 4940
},
{
"epoch": 14.347826086956522,
"grad_norm": 0.28712448477745056,
"learning_rate": 8.971459296358606e-05,
"loss": 0.0821,
"step": 4950
},
{
"epoch": 14.376811594202898,
"grad_norm": 0.48466065526008606,
"learning_rate": 8.966431154473864e-05,
"loss": 0.1493,
"step": 4960
},
{
"epoch": 14.405797101449275,
"grad_norm": 0.30625486373901367,
"learning_rate": 8.961392168517803e-05,
"loss": 0.1163,
"step": 4970
},
{
"epoch": 14.434782608695652,
"grad_norm": 0.32612621784210205,
"learning_rate": 8.956342352266821e-05,
"loss": 0.1294,
"step": 4980
},
{
"epoch": 14.46376811594203,
"grad_norm": 0.39533373713493347,
"learning_rate": 8.95128171952692e-05,
"loss": 0.1141,
"step": 4990
},
{
"epoch": 14.492753623188406,
"grad_norm": 0.5708385109901428,
"learning_rate": 8.946210284133676e-05,
"loss": 0.1435,
"step": 5000
},
{
"epoch": 14.521739130434783,
"grad_norm": 0.41702768206596375,
"learning_rate": 8.941128059952201e-05,
"loss": 0.1244,
"step": 5010
},
{
"epoch": 14.55072463768116,
"grad_norm": 0.581706702709198,
"learning_rate": 8.936035060877102e-05,
"loss": 0.1013,
"step": 5020
},
{
"epoch": 14.579710144927537,
"grad_norm": 0.508090078830719,
"learning_rate": 8.930931300832443e-05,
"loss": 0.0987,
"step": 5030
},
{
"epoch": 14.608695652173914,
"grad_norm": 0.32675766944885254,
"learning_rate": 8.925816793771711e-05,
"loss": 0.1173,
"step": 5040
},
{
"epoch": 14.63768115942029,
"grad_norm": 0.4030362069606781,
"learning_rate": 8.92069155367777e-05,
"loss": 0.1054,
"step": 5050
},
{
"epoch": 14.666666666666666,
"grad_norm": 0.4901740550994873,
"learning_rate": 8.915555594562834e-05,
"loss": 0.1197,
"step": 5060
},
{
"epoch": 14.695652173913043,
"grad_norm": 0.43186917901039124,
"learning_rate": 8.910408930468416e-05,
"loss": 0.1146,
"step": 5070
},
{
"epoch": 14.72463768115942,
"grad_norm": 0.3401460647583008,
"learning_rate": 8.905251575465303e-05,
"loss": 0.1237,
"step": 5080
},
{
"epoch": 14.753623188405797,
"grad_norm": 0.2620072662830353,
"learning_rate": 8.900083543653502e-05,
"loss": 0.123,
"step": 5090
},
{
"epoch": 14.782608695652174,
"grad_norm": 0.3774551451206207,
"learning_rate": 8.894904849162218e-05,
"loss": 0.1237,
"step": 5100
},
{
"epoch": 14.81159420289855,
"grad_norm": 0.4038746654987335,
"learning_rate": 8.889715506149802e-05,
"loss": 0.115,
"step": 5110
},
{
"epoch": 14.840579710144928,
"grad_norm": 0.4395363926887512,
"learning_rate": 8.884515528803722e-05,
"loss": 0.1139,
"step": 5120
},
{
"epoch": 14.869565217391305,
"grad_norm": 0.34769847989082336,
"learning_rate": 8.879304931340517e-05,
"loss": 0.1211,
"step": 5130
},
{
"epoch": 14.898550724637682,
"grad_norm": 0.3238866925239563,
"learning_rate": 8.874083728005759e-05,
"loss": 0.1181,
"step": 5140
},
{
"epoch": 14.927536231884059,
"grad_norm": 0.43937593698501587,
"learning_rate": 8.868851933074021e-05,
"loss": 0.1232,
"step": 5150
},
{
"epoch": 14.956521739130435,
"grad_norm": 0.4402833580970764,
"learning_rate": 8.863609560848829e-05,
"loss": 0.1365,
"step": 5160
},
{
"epoch": 14.985507246376812,
"grad_norm": 0.6102784276008606,
"learning_rate": 8.85835662566263e-05,
"loss": 0.1248,
"step": 5170
},
{
"epoch": 15.014492753623188,
"grad_norm": 0.28894439339637756,
"learning_rate": 8.853093141876747e-05,
"loss": 0.1016,
"step": 5180
},
{
"epoch": 15.043478260869565,
"grad_norm": 0.2645789086818695,
"learning_rate": 8.847819123881343e-05,
"loss": 0.1256,
"step": 5190
},
{
"epoch": 15.072463768115941,
"grad_norm": 0.38724544644355774,
"learning_rate": 8.842534586095383e-05,
"loss": 0.1432,
"step": 5200
},
{
"epoch": 15.101449275362318,
"grad_norm": 0.2536871135234833,
"learning_rate": 8.837239542966593e-05,
"loss": 0.1033,
"step": 5210
},
{
"epoch": 15.130434782608695,
"grad_norm": 0.337372750043869,
"learning_rate": 8.831934008971417e-05,
"loss": 0.1231,
"step": 5220
},
{
"epoch": 15.159420289855072,
"grad_norm": 0.3590666353702545,
"learning_rate": 8.826617998614982e-05,
"loss": 0.109,
"step": 5230
},
{
"epoch": 15.18840579710145,
"grad_norm": 0.37052637338638306,
"learning_rate": 8.821291526431056e-05,
"loss": 0.1001,
"step": 5240
},
{
"epoch": 15.217391304347826,
"grad_norm": 0.5083751082420349,
"learning_rate": 8.815954606982015e-05,
"loss": 0.1224,
"step": 5250
},
{
"epoch": 15.246376811594203,
"grad_norm": 0.3430265486240387,
"learning_rate": 8.810607254858789e-05,
"loss": 0.1201,
"step": 5260
},
{
"epoch": 15.27536231884058,
"grad_norm": 0.6075800061225891,
"learning_rate": 8.805249484680838e-05,
"loss": 0.1281,
"step": 5270
},
{
"epoch": 15.304347826086957,
"grad_norm": 0.6015037894248962,
"learning_rate": 8.799881311096096e-05,
"loss": 0.1337,
"step": 5280
},
{
"epoch": 15.333333333333334,
"grad_norm": 0.3478599786758423,
"learning_rate": 8.794502748780949e-05,
"loss": 0.1363,
"step": 5290
},
{
"epoch": 15.36231884057971,
"grad_norm": 0.39971593022346497,
"learning_rate": 8.78911381244018e-05,
"loss": 0.1015,
"step": 5300
},
{
"epoch": 15.391304347826088,
"grad_norm": 0.38049763441085815,
"learning_rate": 8.783714516806933e-05,
"loss": 0.1209,
"step": 5310
},
{
"epoch": 15.420289855072463,
"grad_norm": 0.33554980158805847,
"learning_rate": 8.77830487664268e-05,
"loss": 0.1077,
"step": 5320
},
{
"epoch": 15.44927536231884,
"grad_norm": 0.2598898410797119,
"learning_rate": 8.772884906737167e-05,
"loss": 0.1056,
"step": 5330
},
{
"epoch": 15.478260869565217,
"grad_norm": 0.30635103583335876,
"learning_rate": 8.767454621908387e-05,
"loss": 0.1182,
"step": 5340
},
{
"epoch": 15.507246376811594,
"grad_norm": 0.31595268845558167,
"learning_rate": 8.76201403700253e-05,
"loss": 0.0987,
"step": 5350
},
{
"epoch": 15.53623188405797,
"grad_norm": 0.4669897258281708,
"learning_rate": 8.756563166893949e-05,
"loss": 0.1093,
"step": 5360
},
{
"epoch": 15.565217391304348,
"grad_norm": 0.41924533247947693,
"learning_rate": 8.751102026485113e-05,
"loss": 0.0981,
"step": 5370
},
{
"epoch": 15.594202898550725,
"grad_norm": 0.3114607334136963,
"learning_rate": 8.745630630706571e-05,
"loss": 0.1265,
"step": 5380
},
{
"epoch": 15.623188405797102,
"grad_norm": 0.33994221687316895,
"learning_rate": 8.740148994516912e-05,
"loss": 0.1061,
"step": 5390
},
{
"epoch": 15.652173913043478,
"grad_norm": 0.5424929857254028,
"learning_rate": 8.73465713290272e-05,
"loss": 0.1112,
"step": 5400
},
{
"epoch": 15.681159420289855,
"grad_norm": 0.4351734519004822,
"learning_rate": 8.729155060878533e-05,
"loss": 0.1043,
"step": 5410
},
{
"epoch": 15.710144927536232,
"grad_norm": 0.33228495717048645,
"learning_rate": 8.723642793486809e-05,
"loss": 0.1257,
"step": 5420
},
{
"epoch": 15.73913043478261,
"grad_norm": 0.4116186201572418,
"learning_rate": 8.718120345797873e-05,
"loss": 0.1102,
"step": 5430
},
{
"epoch": 15.768115942028986,
"grad_norm": 0.38537874817848206,
"learning_rate": 8.712587732909889e-05,
"loss": 0.1315,
"step": 5440
},
{
"epoch": 15.797101449275363,
"grad_norm": 0.2920888066291809,
"learning_rate": 8.707044969948806e-05,
"loss": 0.1393,
"step": 5450
},
{
"epoch": 15.826086956521738,
"grad_norm": 0.3017374277114868,
"learning_rate": 8.701492072068329e-05,
"loss": 0.1181,
"step": 5460
},
{
"epoch": 15.855072463768115,
"grad_norm": 0.3454197645187378,
"learning_rate": 8.695929054449869e-05,
"loss": 0.1144,
"step": 5470
},
{
"epoch": 15.884057971014492,
"grad_norm": 0.3054383099079132,
"learning_rate": 8.690355932302501e-05,
"loss": 0.1149,
"step": 5480
},
{
"epoch": 15.91304347826087,
"grad_norm": 0.6223363280296326,
"learning_rate": 8.684772720862931e-05,
"loss": 0.1138,
"step": 5490
},
{
"epoch": 15.942028985507246,
"grad_norm": 0.33070531487464905,
"learning_rate": 8.679179435395446e-05,
"loss": 0.1074,
"step": 5500
},
{
"epoch": 15.971014492753623,
"grad_norm": 0.3179458677768707,
"learning_rate": 8.673576091191874e-05,
"loss": 0.109,
"step": 5510
},
{
"epoch": 16.0,
"grad_norm": 0.2908962070941925,
"learning_rate": 8.667962703571541e-05,
"loss": 0.0997,
"step": 5520
},
{
"epoch": 16.028985507246375,
"grad_norm": 0.45387428998947144,
"learning_rate": 8.662339287881238e-05,
"loss": 0.1132,
"step": 5530
},
{
"epoch": 16.057971014492754,
"grad_norm": 0.34306666254997253,
"learning_rate": 8.656705859495169e-05,
"loss": 0.099,
"step": 5540
},
{
"epoch": 16.08695652173913,
"grad_norm": 0.317571222782135,
"learning_rate": 8.651062433814912e-05,
"loss": 0.1142,
"step": 5550
},
{
"epoch": 16.115942028985508,
"grad_norm": 0.2807283103466034,
"learning_rate": 8.645409026269375e-05,
"loss": 0.1085,
"step": 5560
},
{
"epoch": 16.144927536231883,
"grad_norm": 0.48772743344306946,
"learning_rate": 8.639745652314759e-05,
"loss": 0.1222,
"step": 5570
},
{
"epoch": 16.17391304347826,
"grad_norm": 0.3181246519088745,
"learning_rate": 8.634072327434515e-05,
"loss": 0.1113,
"step": 5580
},
{
"epoch": 16.202898550724637,
"grad_norm": 0.28259527683258057,
"learning_rate": 8.628389067139294e-05,
"loss": 0.0973,
"step": 5590
},
{
"epoch": 16.231884057971016,
"grad_norm": 0.21859432756900787,
"learning_rate": 8.622695886966911e-05,
"loss": 0.105,
"step": 5600
},
{
"epoch": 16.26086956521739,
"grad_norm": 0.38870155811309814,
"learning_rate": 8.616992802482308e-05,
"loss": 0.1054,
"step": 5610
},
{
"epoch": 16.28985507246377,
"grad_norm": 0.30381137132644653,
"learning_rate": 8.611279829277496e-05,
"loss": 0.1095,
"step": 5620
},
{
"epoch": 16.318840579710145,
"grad_norm": 0.33329617977142334,
"learning_rate": 8.605556982971528e-05,
"loss": 0.0896,
"step": 5630
},
{
"epoch": 16.347826086956523,
"grad_norm": 0.3171881437301636,
"learning_rate": 8.599824279210447e-05,
"loss": 0.1097,
"step": 5640
},
{
"epoch": 16.3768115942029,
"grad_norm": 0.36195775866508484,
"learning_rate": 8.594081733667243e-05,
"loss": 0.1088,
"step": 5650
},
{
"epoch": 16.405797101449274,
"grad_norm": 0.3968923091888428,
"learning_rate": 8.58832936204182e-05,
"loss": 0.0995,
"step": 5660
},
{
"epoch": 16.434782608695652,
"grad_norm": 0.515150249004364,
"learning_rate": 8.582567180060942e-05,
"loss": 0.107,
"step": 5670
},
{
"epoch": 16.463768115942027,
"grad_norm": 0.4465225040912628,
"learning_rate": 8.576795203478194e-05,
"loss": 0.123,
"step": 5680
},
{
"epoch": 16.492753623188406,
"grad_norm": 0.27907755970954895,
"learning_rate": 8.571013448073939e-05,
"loss": 0.1023,
"step": 5690
},
{
"epoch": 16.52173913043478,
"grad_norm": 0.4790158271789551,
"learning_rate": 8.565221929655275e-05,
"loss": 0.1154,
"step": 5700
},
{
"epoch": 16.55072463768116,
"grad_norm": 0.5309686660766602,
"learning_rate": 8.559420664055992e-05,
"loss": 0.1308,
"step": 5710
},
{
"epoch": 16.579710144927535,
"grad_norm": 0.36980125308036804,
"learning_rate": 8.553609667136532e-05,
"loss": 0.1177,
"step": 5720
},
{
"epoch": 16.608695652173914,
"grad_norm": 0.33945196866989136,
"learning_rate": 8.547788954783936e-05,
"loss": 0.1511,
"step": 5730
},
{
"epoch": 16.63768115942029,
"grad_norm": 0.26327815651893616,
"learning_rate": 8.541958542911808e-05,
"loss": 0.1238,
"step": 5740
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.351123571395874,
"learning_rate": 8.536118447460275e-05,
"loss": 0.0927,
"step": 5750
},
{
"epoch": 16.695652173913043,
"grad_norm": 0.3815719187259674,
"learning_rate": 8.530268684395932e-05,
"loss": 0.1071,
"step": 5760
},
{
"epoch": 16.72463768115942,
"grad_norm": 0.4307467043399811,
"learning_rate": 8.524409269711807e-05,
"loss": 0.1098,
"step": 5770
},
{
"epoch": 16.753623188405797,
"grad_norm": 0.33247116208076477,
"learning_rate": 8.51854021942732e-05,
"loss": 0.121,
"step": 5780
},
{
"epoch": 16.782608695652176,
"grad_norm": 0.29645007848739624,
"learning_rate": 8.512661549588227e-05,
"loss": 0.0983,
"step": 5790
},
{
"epoch": 16.81159420289855,
"grad_norm": 0.3584914207458496,
"learning_rate": 8.506773276266588e-05,
"loss": 0.1093,
"step": 5800
},
{
"epoch": 16.840579710144926,
"grad_norm": 0.4944992959499359,
"learning_rate": 8.500875415560721e-05,
"loss": 0.12,
"step": 5810
},
{
"epoch": 16.869565217391305,
"grad_norm": 0.3007963001728058,
"learning_rate": 8.494967983595144e-05,
"loss": 0.1165,
"step": 5820
},
{
"epoch": 16.89855072463768,
"grad_norm": 0.2885436415672302,
"learning_rate": 8.489050996520558e-05,
"loss": 0.1295,
"step": 5830
},
{
"epoch": 16.92753623188406,
"grad_norm": 0.45604297518730164,
"learning_rate": 8.483124470513775e-05,
"loss": 0.1038,
"step": 5840
},
{
"epoch": 16.956521739130434,
"grad_norm": 0.34743809700012207,
"learning_rate": 8.477188421777692e-05,
"loss": 0.1114,
"step": 5850
},
{
"epoch": 16.985507246376812,
"grad_norm": 0.3643774390220642,
"learning_rate": 8.47124286654124e-05,
"loss": 0.1207,
"step": 5860
},
{
"epoch": 17.014492753623188,
"grad_norm": 0.40412595868110657,
"learning_rate": 8.465287821059341e-05,
"loss": 0.1355,
"step": 5870
},
{
"epoch": 17.043478260869566,
"grad_norm": 0.27604588866233826,
"learning_rate": 8.45932330161286e-05,
"loss": 0.1053,
"step": 5880
},
{
"epoch": 17.07246376811594,
"grad_norm": 0.472373366355896,
"learning_rate": 8.453349324508567e-05,
"loss": 0.1032,
"step": 5890
},
{
"epoch": 17.10144927536232,
"grad_norm": 0.4434383511543274,
"learning_rate": 8.447365906079088e-05,
"loss": 0.1033,
"step": 5900
},
{
"epoch": 17.130434782608695,
"grad_norm": 0.38238826394081116,
"learning_rate": 8.441373062682856e-05,
"loss": 0.1056,
"step": 5910
},
{
"epoch": 17.159420289855074,
"grad_norm": 0.34880152344703674,
"learning_rate": 8.43537081070408e-05,
"loss": 0.0964,
"step": 5920
},
{
"epoch": 17.18840579710145,
"grad_norm": 0.46405625343322754,
"learning_rate": 8.429359166552689e-05,
"loss": 0.1363,
"step": 5930
},
{
"epoch": 17.217391304347824,
"grad_norm": 0.35732561349868774,
"learning_rate": 8.423338146664284e-05,
"loss": 0.1046,
"step": 5940
},
{
"epoch": 17.246376811594203,
"grad_norm": 0.3758239150047302,
"learning_rate": 8.417307767500107e-05,
"loss": 0.0963,
"step": 5950
},
{
"epoch": 17.27536231884058,
"grad_norm": 0.40921303629875183,
"learning_rate": 8.411268045546983e-05,
"loss": 0.12,
"step": 5960
},
{
"epoch": 17.304347826086957,
"grad_norm": 0.30313900113105774,
"learning_rate": 8.405218997317281e-05,
"loss": 0.1213,
"step": 5970
},
{
"epoch": 17.333333333333332,
"grad_norm": 0.27081504464149475,
"learning_rate": 8.399160639348869e-05,
"loss": 0.106,
"step": 5980
},
{
"epoch": 17.36231884057971,
"grad_norm": 0.32741713523864746,
"learning_rate": 8.393092988205065e-05,
"loss": 0.089,
"step": 5990
},
{
"epoch": 17.391304347826086,
"grad_norm": 0.350293904542923,
"learning_rate": 8.387016060474597e-05,
"loss": 0.1074,
"step": 6000
},
{
"epoch": 17.420289855072465,
"grad_norm": 0.46384942531585693,
"learning_rate": 8.380929872771551e-05,
"loss": 0.1151,
"step": 6010
},
{
"epoch": 17.44927536231884,
"grad_norm": 0.38906311988830566,
"learning_rate": 8.374834441735335e-05,
"loss": 0.0944,
"step": 6020
},
{
"epoch": 17.47826086956522,
"grad_norm": 0.4116496443748474,
"learning_rate": 8.368729784030622e-05,
"loss": 0.1009,
"step": 6030
},
{
"epoch": 17.507246376811594,
"grad_norm": 0.36326107382774353,
"learning_rate": 8.362615916347315e-05,
"loss": 0.117,
"step": 6040
},
{
"epoch": 17.536231884057973,
"grad_norm": 0.4073273539543152,
"learning_rate": 8.356492855400493e-05,
"loss": 0.1196,
"step": 6050
},
{
"epoch": 17.565217391304348,
"grad_norm": 0.3261200189590454,
"learning_rate": 8.350360617930371e-05,
"loss": 0.0887,
"step": 6060
},
{
"epoch": 17.594202898550726,
"grad_norm": 0.2686854302883148,
"learning_rate": 8.344219220702255e-05,
"loss": 0.103,
"step": 6070
},
{
"epoch": 17.6231884057971,
"grad_norm": 0.30324316024780273,
"learning_rate": 8.338068680506485e-05,
"loss": 0.1244,
"step": 6080
},
{
"epoch": 17.652173913043477,
"grad_norm": 0.3971955180168152,
"learning_rate": 8.33190901415841e-05,
"loss": 0.1114,
"step": 6090
},
{
"epoch": 17.681159420289855,
"grad_norm": 0.23621766269207,
"learning_rate": 8.325740238498317e-05,
"loss": 0.1151,
"step": 6100
},
{
"epoch": 17.71014492753623,
"grad_norm": 0.3847745954990387,
"learning_rate": 8.319562370391406e-05,
"loss": 0.1067,
"step": 6110
},
{
"epoch": 17.73913043478261,
"grad_norm": 0.29595401883125305,
"learning_rate": 8.31337542672773e-05,
"loss": 0.0946,
"step": 6120
},
{
"epoch": 17.768115942028984,
"grad_norm": 0.36179453134536743,
"learning_rate": 8.307179424422158e-05,
"loss": 0.1058,
"step": 6130
},
{
"epoch": 17.797101449275363,
"grad_norm": 0.28337523341178894,
"learning_rate": 8.300974380414327e-05,
"loss": 0.0932,
"step": 6140
},
{
"epoch": 17.82608695652174,
"grad_norm": 0.3820880651473999,
"learning_rate": 8.294760311668586e-05,
"loss": 0.1179,
"step": 6150
},
{
"epoch": 17.855072463768117,
"grad_norm": 0.19762246310710907,
"learning_rate": 8.288537235173961e-05,
"loss": 0.1321,
"step": 6160
},
{
"epoch": 17.884057971014492,
"grad_norm": 0.4157634973526001,
"learning_rate": 8.282305167944108e-05,
"loss": 0.1054,
"step": 6170
},
{
"epoch": 17.91304347826087,
"grad_norm": 0.4183441698551178,
"learning_rate": 8.276064127017262e-05,
"loss": 0.11,
"step": 6180
},
{
"epoch": 17.942028985507246,
"grad_norm": 0.41134294867515564,
"learning_rate": 8.269814129456189e-05,
"loss": 0.0956,
"step": 6190
},
{
"epoch": 17.971014492753625,
"grad_norm": 0.4862001836299896,
"learning_rate": 8.263555192348143e-05,
"loss": 0.1139,
"step": 6200
},
{
"epoch": 18.0,
"grad_norm": 0.4908180236816406,
"learning_rate": 8.257287332804819e-05,
"loss": 0.094,
"step": 6210
},
{
"epoch": 18.028985507246375,
"grad_norm": 0.37922942638397217,
"learning_rate": 8.251010567962307e-05,
"loss": 0.1156,
"step": 6220
},
{
"epoch": 18.057971014492754,
"grad_norm": 0.4405559003353119,
"learning_rate": 8.244724914981041e-05,
"loss": 0.1219,
"step": 6230
},
{
"epoch": 18.08695652173913,
"grad_norm": 0.3812404274940491,
"learning_rate": 8.238430391045757e-05,
"loss": 0.1019,
"step": 6240
},
{
"epoch": 18.115942028985508,
"grad_norm": 0.4499792754650116,
"learning_rate": 8.232127013365445e-05,
"loss": 0.0975,
"step": 6250
},
{
"epoch": 18.144927536231883,
"grad_norm": 0.5252455472946167,
"learning_rate": 8.225814799173295e-05,
"loss": 0.1159,
"step": 6260
},
{
"epoch": 18.17391304347826,
"grad_norm": 0.44270288944244385,
"learning_rate": 8.219493765726663e-05,
"loss": 0.1103,
"step": 6270
},
{
"epoch": 18.202898550724637,
"grad_norm": 0.2811424136161804,
"learning_rate": 8.21316393030701e-05,
"loss": 0.1243,
"step": 6280
},
{
"epoch": 18.231884057971016,
"grad_norm": 0.4186232388019562,
"learning_rate": 8.206825310219865e-05,
"loss": 0.1174,
"step": 6290
},
{
"epoch": 18.26086956521739,
"grad_norm": 0.2894100844860077,
"learning_rate": 8.200477922794776e-05,
"loss": 0.1061,
"step": 6300
},
{
"epoch": 18.28985507246377,
"grad_norm": 0.3997898995876312,
"learning_rate": 8.194121785385256e-05,
"loss": 0.1153,
"step": 6310
},
{
"epoch": 18.318840579710145,
"grad_norm": 0.2798968255519867,
"learning_rate": 8.187756915368741e-05,
"loss": 0.0976,
"step": 6320
},
{
"epoch": 18.347826086956523,
"grad_norm": 0.3138371407985687,
"learning_rate": 8.181383330146544e-05,
"loss": 0.1135,
"step": 6330
},
{
"epoch": 18.3768115942029,
"grad_norm": 0.3996056914329529,
"learning_rate": 8.175001047143804e-05,
"loss": 0.1135,
"step": 6340
},
{
"epoch": 18.405797101449274,
"grad_norm": 0.39477649331092834,
"learning_rate": 8.168610083809438e-05,
"loss": 0.1052,
"step": 6350
},
{
"epoch": 18.434782608695652,
"grad_norm": 0.35394132137298584,
"learning_rate": 8.162210457616095e-05,
"loss": 0.0962,
"step": 6360
},
{
"epoch": 18.463768115942027,
"grad_norm": 0.28887924551963806,
"learning_rate": 8.155802186060109e-05,
"loss": 0.1019,
"step": 6370
},
{
"epoch": 18.492753623188406,
"grad_norm": 0.272850900888443,
"learning_rate": 8.149385286661453e-05,
"loss": 0.127,
"step": 6380
},
{
"epoch": 18.52173913043478,
"grad_norm": 0.5005936026573181,
"learning_rate": 8.14295977696368e-05,
"loss": 0.1174,
"step": 6390
},
{
"epoch": 18.55072463768116,
"grad_norm": 0.38646388053894043,
"learning_rate": 8.13652567453389e-05,
"loss": 0.1069,
"step": 6400
},
{
"epoch": 18.579710144927535,
"grad_norm": 0.3873347043991089,
"learning_rate": 8.130082996962676e-05,
"loss": 0.1235,
"step": 6410
},
{
"epoch": 18.608695652173914,
"grad_norm": 0.33470475673675537,
"learning_rate": 8.123631761864068e-05,
"loss": 0.1031,
"step": 6420
},
{
"epoch": 18.63768115942029,
"grad_norm": 0.4067200720310211,
"learning_rate": 8.1171719868755e-05,
"loss": 0.1193,
"step": 6430
},
{
"epoch": 18.666666666666668,
"grad_norm": 0.34601399302482605,
"learning_rate": 8.110703689657748e-05,
"loss": 0.0968,
"step": 6440
},
{
"epoch": 18.695652173913043,
"grad_norm": 0.4860565662384033,
"learning_rate": 8.104226887894892e-05,
"loss": 0.1008,
"step": 6450
},
{
"epoch": 18.72463768115942,
"grad_norm": 0.51555997133255,
"learning_rate": 8.097741599294257e-05,
"loss": 0.1193,
"step": 6460
},
{
"epoch": 18.753623188405797,
"grad_norm": 0.36039650440216064,
"learning_rate": 8.091247841586378e-05,
"loss": 0.1029,
"step": 6470
},
{
"epoch": 18.782608695652176,
"grad_norm": 0.315164178609848,
"learning_rate": 8.084745632524939e-05,
"loss": 0.0904,
"step": 6480
},
{
"epoch": 18.81159420289855,
"grad_norm": 0.3153921365737915,
"learning_rate": 8.07823498988673e-05,
"loss": 0.1038,
"step": 6490
},
{
"epoch": 18.840579710144926,
"grad_norm": 0.3011777102947235,
"learning_rate": 8.071715931471602e-05,
"loss": 0.1245,
"step": 6500
},
{
"epoch": 18.869565217391305,
"grad_norm": 0.3314365744590759,
"learning_rate": 8.06518847510241e-05,
"loss": 0.1001,
"step": 6510
},
{
"epoch": 18.89855072463768,
"grad_norm": 0.3859410881996155,
"learning_rate": 8.058652638624971e-05,
"loss": 0.1122,
"step": 6520
},
{
"epoch": 18.92753623188406,
"grad_norm": 0.3356384336948395,
"learning_rate": 8.052108439908013e-05,
"loss": 0.1144,
"step": 6530
},
{
"epoch": 18.956521739130434,
"grad_norm": 0.35082948207855225,
"learning_rate": 8.045555896843125e-05,
"loss": 0.1079,
"step": 6540
},
{
"epoch": 18.985507246376812,
"grad_norm": 0.5260385274887085,
"learning_rate": 8.03899502734471e-05,
"loss": 0.1046,
"step": 6550
},
{
"epoch": 19.014492753623188,
"grad_norm": 0.3151768445968628,
"learning_rate": 8.032425849349931e-05,
"loss": 0.105,
"step": 6560
},
{
"epoch": 19.043478260869566,
"grad_norm": 0.362244188785553,
"learning_rate": 8.025848380818674e-05,
"loss": 0.1005,
"step": 6570
},
{
"epoch": 19.07246376811594,
"grad_norm": 0.41462433338165283,
"learning_rate": 8.019262639733487e-05,
"loss": 0.1198,
"step": 6580
},
{
"epoch": 19.10144927536232,
"grad_norm": 0.40146404504776,
"learning_rate": 8.012668644099531e-05,
"loss": 0.0886,
"step": 6590
},
{
"epoch": 19.130434782608695,
"grad_norm": 0.39127445220947266,
"learning_rate": 8.006066411944542e-05,
"loss": 0.0906,
"step": 6600
},
{
"epoch": 19.159420289855074,
"grad_norm": 0.42401593923568726,
"learning_rate": 7.999455961318769e-05,
"loss": 0.1092,
"step": 6610
},
{
"epoch": 19.18840579710145,
"grad_norm": 0.49508869647979736,
"learning_rate": 7.992837310294932e-05,
"loss": 0.1114,
"step": 6620
},
{
"epoch": 19.217391304347824,
"grad_norm": 0.4461759328842163,
"learning_rate": 7.986210476968167e-05,
"loss": 0.1251,
"step": 6630
},
{
"epoch": 19.246376811594203,
"grad_norm": 0.41962409019470215,
"learning_rate": 7.97957547945599e-05,
"loss": 0.0899,
"step": 6640
},
{
"epoch": 19.27536231884058,
"grad_norm": 0.3262649178504944,
"learning_rate": 7.972932335898226e-05,
"loss": 0.0868,
"step": 6650
},
{
"epoch": 19.304347826086957,
"grad_norm": 0.3167392611503601,
"learning_rate": 7.966281064456975e-05,
"loss": 0.1008,
"step": 6660
},
{
"epoch": 19.333333333333332,
"grad_norm": 0.35801073908805847,
"learning_rate": 7.959621683316563e-05,
"loss": 0.0931,
"step": 6670
},
{
"epoch": 19.36231884057971,
"grad_norm": 0.31944307684898376,
"learning_rate": 7.952954210683481e-05,
"loss": 0.1218,
"step": 6680
},
{
"epoch": 19.391304347826086,
"grad_norm": 0.3943234384059906,
"learning_rate": 7.946278664786345e-05,
"loss": 0.1194,
"step": 6690
},
{
"epoch": 19.420289855072465,
"grad_norm": 0.46412956714630127,
"learning_rate": 7.939595063875842e-05,
"loss": 0.0976,
"step": 6700
},
{
"epoch": 19.44927536231884,
"grad_norm": 0.2884758412837982,
"learning_rate": 7.932903426224683e-05,
"loss": 0.1143,
"step": 6710
},
{
"epoch": 19.47826086956522,
"grad_norm": 0.24540093541145325,
"learning_rate": 7.926203770127552e-05,
"loss": 0.096,
"step": 6720
},
{
"epoch": 19.507246376811594,
"grad_norm": 0.40125906467437744,
"learning_rate": 7.919496113901046e-05,
"loss": 0.0998,
"step": 6730
},
{
"epoch": 19.536231884057973,
"grad_norm": 0.366150438785553,
"learning_rate": 7.912780475883649e-05,
"loss": 0.1104,
"step": 6740
},
{
"epoch": 19.565217391304348,
"grad_norm": 0.28204023838043213,
"learning_rate": 7.906056874435652e-05,
"loss": 0.0938,
"step": 6750
},
{
"epoch": 19.594202898550726,
"grad_norm": 0.39345285296440125,
"learning_rate": 7.899325327939131e-05,
"loss": 0.0835,
"step": 6760
},
{
"epoch": 19.6231884057971,
"grad_norm": 0.43696972727775574,
"learning_rate": 7.892585854797872e-05,
"loss": 0.1175,
"step": 6770
},
{
"epoch": 19.652173913043477,
"grad_norm": 0.5411075949668884,
"learning_rate": 7.88583847343734e-05,
"loss": 0.1016,
"step": 6780
},
{
"epoch": 19.681159420289855,
"grad_norm": 0.41619637608528137,
"learning_rate": 7.879083202304616e-05,
"loss": 0.0956,
"step": 6790
},
{
"epoch": 19.71014492753623,
"grad_norm": 0.24727730453014374,
"learning_rate": 7.872320059868355e-05,
"loss": 0.102,
"step": 6800
},
{
"epoch": 19.73913043478261,
"grad_norm": 0.2794191539287567,
"learning_rate": 7.865549064618729e-05,
"loss": 0.1155,
"step": 6810
},
{
"epoch": 19.768115942028984,
"grad_norm": 0.4851526618003845,
"learning_rate": 7.858770235067381e-05,
"loss": 0.1132,
"step": 6820
},
{
"epoch": 19.797101449275363,
"grad_norm": 0.38266780972480774,
"learning_rate": 7.851983589747374e-05,
"loss": 0.1163,
"step": 6830
},
{
"epoch": 19.82608695652174,
"grad_norm": 0.31224480271339417,
"learning_rate": 7.845189147213133e-05,
"loss": 0.1113,
"step": 6840
},
{
"epoch": 19.855072463768117,
"grad_norm": 0.26829686760902405,
"learning_rate": 7.838386926040407e-05,
"loss": 0.0976,
"step": 6850
},
{
"epoch": 19.884057971014492,
"grad_norm": 0.46314260363578796,
"learning_rate": 7.83157694482621e-05,
"loss": 0.1108,
"step": 6860
},
{
"epoch": 19.91304347826087,
"grad_norm": 0.3253716230392456,
"learning_rate": 7.824759222188768e-05,
"loss": 0.1163,
"step": 6870
},
{
"epoch": 19.942028985507246,
"grad_norm": 0.22694610059261322,
"learning_rate": 7.817933776767478e-05,
"loss": 0.0966,
"step": 6880
},
{
"epoch": 19.971014492753625,
"grad_norm": 0.424565851688385,
"learning_rate": 7.811100627222842e-05,
"loss": 0.1106,
"step": 6890
},
{
"epoch": 20.0,
"grad_norm": 0.7507173418998718,
"learning_rate": 7.804259792236435e-05,
"loss": 0.1034,
"step": 6900
},
{
"epoch": 20.028985507246375,
"grad_norm": 0.3750985264778137,
"learning_rate": 7.797411290510835e-05,
"loss": 0.1016,
"step": 6910
},
{
"epoch": 20.057971014492754,
"grad_norm": 0.2645069658756256,
"learning_rate": 7.790555140769586e-05,
"loss": 0.1106,
"step": 6920
},
{
"epoch": 20.08695652173913,
"grad_norm": 0.473185658454895,
"learning_rate": 7.78369136175714e-05,
"loss": 0.0891,
"step": 6930
},
{
"epoch": 20.115942028985508,
"grad_norm": 0.3787136673927307,
"learning_rate": 7.776819972238806e-05,
"loss": 0.1062,
"step": 6940
},
{
"epoch": 20.144927536231883,
"grad_norm": 0.20881570875644684,
"learning_rate": 7.7699409910007e-05,
"loss": 0.0942,
"step": 6950
},
{
"epoch": 20.17391304347826,
"grad_norm": 0.36029985547065735,
"learning_rate": 7.763054436849694e-05,
"loss": 0.1139,
"step": 6960
},
{
"epoch": 20.202898550724637,
"grad_norm": 0.3305976986885071,
"learning_rate": 7.756160328613364e-05,
"loss": 0.107,
"step": 6970
},
{
"epoch": 20.231884057971016,
"grad_norm": 0.35445329546928406,
"learning_rate": 7.749258685139942e-05,
"loss": 0.0989,
"step": 6980
},
{
"epoch": 20.26086956521739,
"grad_norm": 0.39083990454673767,
"learning_rate": 7.742349525298253e-05,
"loss": 0.1105,
"step": 6990
},
{
"epoch": 20.28985507246377,
"grad_norm": 0.2868945896625519,
"learning_rate": 7.735432867977679e-05,
"loss": 0.0906,
"step": 7000
},
{
"epoch": 20.318840579710145,
"grad_norm": 0.3364221155643463,
"learning_rate": 7.728508732088096e-05,
"loss": 0.1045,
"step": 7010
},
{
"epoch": 20.347826086956523,
"grad_norm": 0.27390968799591064,
"learning_rate": 7.721577136559825e-05,
"loss": 0.0983,
"step": 7020
},
{
"epoch": 20.3768115942029,
"grad_norm": 0.30403977632522583,
"learning_rate": 7.714638100343588e-05,
"loss": 0.0966,
"step": 7030
},
{
"epoch": 20.405797101449274,
"grad_norm": 0.476375937461853,
"learning_rate": 7.707691642410444e-05,
"loss": 0.1009,
"step": 7040
},
{
"epoch": 20.434782608695652,
"grad_norm": 0.39029183983802795,
"learning_rate": 7.70073778175174e-05,
"loss": 0.0917,
"step": 7050
},
{
"epoch": 20.463768115942027,
"grad_norm": 0.29170235991477966,
"learning_rate": 7.69377653737907e-05,
"loss": 0.093,
"step": 7060
},
{
"epoch": 20.492753623188406,
"grad_norm": 0.3420107364654541,
"learning_rate": 7.686807928324209e-05,
"loss": 0.0887,
"step": 7070
},
{
"epoch": 20.52173913043478,
"grad_norm": 0.46952491998672485,
"learning_rate": 7.679831973639065e-05,
"loss": 0.1035,
"step": 7080
},
{
"epoch": 20.55072463768116,
"grad_norm": 0.3527598977088928,
"learning_rate": 7.672848692395637e-05,
"loss": 0.1112,
"step": 7090
},
{
"epoch": 20.579710144927535,
"grad_norm": 0.35709843039512634,
"learning_rate": 7.665858103685944e-05,
"loss": 0.1215,
"step": 7100
},
{
"epoch": 20.608695652173914,
"grad_norm": 0.24026872217655182,
"learning_rate": 7.658860226621991e-05,
"loss": 0.1099,
"step": 7110
},
{
"epoch": 20.63768115942029,
"grad_norm": 0.3530397117137909,
"learning_rate": 7.651855080335708e-05,
"loss": 0.1098,
"step": 7120
},
{
"epoch": 20.666666666666668,
"grad_norm": 0.36064979434013367,
"learning_rate": 7.644842683978896e-05,
"loss": 0.0866,
"step": 7130
},
{
"epoch": 20.695652173913043,
"grad_norm": 0.32545793056488037,
"learning_rate": 7.63782305672318e-05,
"loss": 0.0964,
"step": 7140
},
{
"epoch": 20.72463768115942,
"grad_norm": 0.41704756021499634,
"learning_rate": 7.63079621775995e-05,
"loss": 0.1204,
"step": 7150
},
{
"epoch": 20.753623188405797,
"grad_norm": 0.2773784399032593,
"learning_rate": 7.623762186300319e-05,
"loss": 0.1231,
"step": 7160
},
{
"epoch": 20.782608695652176,
"grad_norm": 0.314942866563797,
"learning_rate": 7.616720981575057e-05,
"loss": 0.1082,
"step": 7170
},
{
"epoch": 20.81159420289855,
"grad_norm": 0.28883859515190125,
"learning_rate": 7.609672622834552e-05,
"loss": 0.1026,
"step": 7180
},
{
"epoch": 20.840579710144926,
"grad_norm": 0.3052704334259033,
"learning_rate": 7.602617129348747e-05,
"loss": 0.1078,
"step": 7190
},
{
"epoch": 20.869565217391305,
"grad_norm": 0.40418189764022827,
"learning_rate": 7.595554520407088e-05,
"loss": 0.1008,
"step": 7200
},
{
"epoch": 20.89855072463768,
"grad_norm": 0.547788143157959,
"learning_rate": 7.588484815318484e-05,
"loss": 0.1019,
"step": 7210
},
{
"epoch": 20.92753623188406,
"grad_norm": 0.29421624541282654,
"learning_rate": 7.581408033411234e-05,
"loss": 0.0984,
"step": 7220
},
{
"epoch": 20.956521739130434,
"grad_norm": 0.2703758478164673,
"learning_rate": 7.574324194032995e-05,
"loss": 0.105,
"step": 7230
},
{
"epoch": 20.985507246376812,
"grad_norm": 0.3435475528240204,
"learning_rate": 7.567233316550705e-05,
"loss": 0.1285,
"step": 7240
},
{
"epoch": 21.014492753623188,
"grad_norm": 0.33567100763320923,
"learning_rate": 7.560135420350562e-05,
"loss": 0.0879,
"step": 7250
},
{
"epoch": 21.043478260869566,
"grad_norm": 0.28695228695869446,
"learning_rate": 7.553030524837935e-05,
"loss": 0.0969,
"step": 7260
},
{
"epoch": 21.07246376811594,
"grad_norm": 0.41675615310668945,
"learning_rate": 7.545918649437341e-05,
"loss": 0.1269,
"step": 7270
},
{
"epoch": 21.10144927536232,
"grad_norm": 0.501548171043396,
"learning_rate": 7.538799813592377e-05,
"loss": 0.1074,
"step": 7280
},
{
"epoch": 21.130434782608695,
"grad_norm": 0.3565016984939575,
"learning_rate": 7.531674036765662e-05,
"loss": 0.1011,
"step": 7290
},
{
"epoch": 21.159420289855074,
"grad_norm": 0.4155751168727875,
"learning_rate": 7.524541338438807e-05,
"loss": 0.1139,
"step": 7300
},
{
"epoch": 21.18840579710145,
"grad_norm": 0.21688665449619293,
"learning_rate": 7.517401738112328e-05,
"loss": 0.0914,
"step": 7310
},
{
"epoch": 21.217391304347824,
"grad_norm": 0.28088539838790894,
"learning_rate": 7.510255255305628e-05,
"loss": 0.1125,
"step": 7320
},
{
"epoch": 21.246376811594203,
"grad_norm": 0.3268051743507385,
"learning_rate": 7.503101909556911e-05,
"loss": 0.0877,
"step": 7330
},
{
"epoch": 21.27536231884058,
"grad_norm": 0.34027546644210815,
"learning_rate": 7.495941720423154e-05,
"loss": 0.1026,
"step": 7340
},
{
"epoch": 21.304347826086957,
"grad_norm": 0.28073224425315857,
"learning_rate": 7.488774707480042e-05,
"loss": 0.0913,
"step": 7350
},
{
"epoch": 21.333333333333332,
"grad_norm": 0.37974223494529724,
"learning_rate": 7.481600890321911e-05,
"loss": 0.1041,
"step": 7360
},
{
"epoch": 21.36231884057971,
"grad_norm": 0.32456913590431213,
"learning_rate": 7.474420288561708e-05,
"loss": 0.1021,
"step": 7370
},
{
"epoch": 21.391304347826086,
"grad_norm": 0.3720680773258209,
"learning_rate": 7.467232921830921e-05,
"loss": 0.0958,
"step": 7380
},
{
"epoch": 21.420289855072465,
"grad_norm": 0.3506243824958801,
"learning_rate": 7.460038809779537e-05,
"loss": 0.1062,
"step": 7390
},
{
"epoch": 21.44927536231884,
"grad_norm": 0.33805230259895325,
"learning_rate": 7.452837972075983e-05,
"loss": 0.0969,
"step": 7400
},
{
"epoch": 21.47826086956522,
"grad_norm": 0.260945200920105,
"learning_rate": 7.445630428407074e-05,
"loss": 0.0879,
"step": 7410
},
{
"epoch": 21.507246376811594,
"grad_norm": 0.3978862762451172,
"learning_rate": 7.43841619847796e-05,
"loss": 0.0979,
"step": 7420
},
{
"epoch": 21.536231884057973,
"grad_norm": 0.3000033497810364,
"learning_rate": 7.431195302012072e-05,
"loss": 0.1194,
"step": 7430
},
{
"epoch": 21.565217391304348,
"grad_norm": 0.30280905961990356,
"learning_rate": 7.423967758751061e-05,
"loss": 0.0883,
"step": 7440
},
{
"epoch": 21.594202898550726,
"grad_norm": 0.26231029629707336,
"learning_rate": 7.416733588454758e-05,
"loss": 0.0773,
"step": 7450
},
{
"epoch": 21.6231884057971,
"grad_norm": 0.3744719922542572,
"learning_rate": 7.409492810901106e-05,
"loss": 0.1047,
"step": 7460
},
{
"epoch": 21.652173913043477,
"grad_norm": 0.27250558137893677,
"learning_rate": 7.402245445886116e-05,
"loss": 0.0996,
"step": 7470
},
{
"epoch": 21.681159420289855,
"grad_norm": 0.44744259119033813,
"learning_rate": 7.394991513223806e-05,
"loss": 0.1119,
"step": 7480
},
{
"epoch": 21.71014492753623,
"grad_norm": 0.41720837354660034,
"learning_rate": 7.38773103274615e-05,
"loss": 0.1009,
"step": 7490
},
{
"epoch": 21.73913043478261,
"grad_norm": 0.4026874303817749,
"learning_rate": 7.380464024303028e-05,
"loss": 0.0899,
"step": 7500
},
{
"epoch": 21.768115942028984,
"grad_norm": 0.39172300696372986,
"learning_rate": 7.373190507762162e-05,
"loss": 0.101,
"step": 7510
},
{
"epoch": 21.797101449275363,
"grad_norm": 0.3168098032474518,
"learning_rate": 7.365910503009066e-05,
"loss": 0.074,
"step": 7520
},
{
"epoch": 21.82608695652174,
"grad_norm": 0.28811538219451904,
"learning_rate": 7.358624029946996e-05,
"loss": 0.0993,
"step": 7530
},
{
"epoch": 21.855072463768117,
"grad_norm": 0.4034368693828583,
"learning_rate": 7.351331108496893e-05,
"loss": 0.115,
"step": 7540
},
{
"epoch": 21.884057971014492,
"grad_norm": 0.3656509220600128,
"learning_rate": 7.344031758597325e-05,
"loss": 0.112,
"step": 7550
},
{
"epoch": 21.91304347826087,
"grad_norm": 0.4922838807106018,
"learning_rate": 7.336726000204435e-05,
"loss": 0.0972,
"step": 7560
},
{
"epoch": 21.942028985507246,
"grad_norm": 0.3745553195476532,
"learning_rate": 7.32941385329189e-05,
"loss": 0.1027,
"step": 7570
},
{
"epoch": 21.971014492753625,
"grad_norm": 0.39149320125579834,
"learning_rate": 7.322095337850816e-05,
"loss": 0.1151,
"step": 7580
},
{
"epoch": 22.0,
"grad_norm": 0.8151898384094238,
"learning_rate": 7.314770473889758e-05,
"loss": 0.1026,
"step": 7590
},
{
"epoch": 22.028985507246375,
"grad_norm": 0.4377081096172333,
"learning_rate": 7.307439281434615e-05,
"loss": 0.0823,
"step": 7600
},
{
"epoch": 22.057971014492754,
"grad_norm": 0.35784757137298584,
"learning_rate": 7.300101780528585e-05,
"loss": 0.11,
"step": 7610
},
{
"epoch": 22.08695652173913,
"grad_norm": 0.25670677423477173,
"learning_rate": 7.292757991232117e-05,
"loss": 0.1015,
"step": 7620
},
{
"epoch": 22.115942028985508,
"grad_norm": 0.35505029559135437,
"learning_rate": 7.285407933622848e-05,
"loss": 0.1097,
"step": 7630
},
{
"epoch": 22.144927536231883,
"grad_norm": 0.27871453762054443,
"learning_rate": 7.278051627795557e-05,
"loss": 0.0951,
"step": 7640
},
{
"epoch": 22.17391304347826,
"grad_norm": 0.4752453565597534,
"learning_rate": 7.270689093862105e-05,
"loss": 0.1036,
"step": 7650
},
{
"epoch": 22.202898550724637,
"grad_norm": 0.5493319630622864,
"learning_rate": 7.263320351951374e-05,
"loss": 0.1031,
"step": 7660
},
{
"epoch": 22.231884057971016,
"grad_norm": 0.43251800537109375,
"learning_rate": 7.255945422209227e-05,
"loss": 0.0984,
"step": 7670
},
{
"epoch": 22.26086956521739,
"grad_norm": 0.4127131998538971,
"learning_rate": 7.248564324798437e-05,
"loss": 0.0907,
"step": 7680
},
{
"epoch": 22.28985507246377,
"grad_norm": 0.28903988003730774,
"learning_rate": 7.241177079898644e-05,
"loss": 0.086,
"step": 7690
},
{
"epoch": 22.318840579710145,
"grad_norm": 0.35488802194595337,
"learning_rate": 7.233783707706295e-05,
"loss": 0.1017,
"step": 7700
},
{
"epoch": 22.347826086956523,
"grad_norm": 0.2337232232093811,
"learning_rate": 7.226384228434586e-05,
"loss": 0.0888,
"step": 7710
},
{
"epoch": 22.3768115942029,
"grad_norm": 0.2909092307090759,
"learning_rate": 7.21897866231341e-05,
"loss": 0.1124,
"step": 7720
},
{
"epoch": 22.405797101449274,
"grad_norm": 0.3277481496334076,
"learning_rate": 7.211567029589303e-05,
"loss": 0.1086,
"step": 7730
},
{
"epoch": 22.434782608695652,
"grad_norm": 0.2835393249988556,
"learning_rate": 7.204149350525387e-05,
"loss": 0.1085,
"step": 7740
},
{
"epoch": 22.463768115942027,
"grad_norm": 0.3101160526275635,
"learning_rate": 7.196725645401309e-05,
"loss": 0.0959,
"step": 7750
},
{
"epoch": 22.492753623188406,
"grad_norm": 0.42514339089393616,
"learning_rate": 7.1892959345132e-05,
"loss": 0.1108,
"step": 7760
},
{
"epoch": 22.52173913043478,
"grad_norm": 0.39696502685546875,
"learning_rate": 7.181860238173605e-05,
"loss": 0.1352,
"step": 7770
},
{
"epoch": 22.55072463768116,
"grad_norm": 0.43896979093551636,
"learning_rate": 7.174418576711432e-05,
"loss": 0.0971,
"step": 7780
},
{
"epoch": 22.579710144927535,
"grad_norm": 0.47712811827659607,
"learning_rate": 7.1669709704719e-05,
"loss": 0.0877,
"step": 7790
},
{
"epoch": 22.608695652173914,
"grad_norm": 0.3447103202342987,
"learning_rate": 7.159517439816481e-05,
"loss": 0.0971,
"step": 7800
},
{
"epoch": 22.63768115942029,
"grad_norm": 0.39140835404396057,
"learning_rate": 7.152058005122842e-05,
"loss": 0.0885,
"step": 7810
},
{
"epoch": 22.666666666666668,
"grad_norm": 0.28053638339042664,
"learning_rate": 7.144592686784793e-05,
"loss": 0.0945,
"step": 7820
},
{
"epoch": 22.695652173913043,
"grad_norm": 0.3110656142234802,
"learning_rate": 7.137121505212229e-05,
"loss": 0.1094,
"step": 7830
},
{
"epoch": 22.72463768115942,
"grad_norm": 0.38985612988471985,
"learning_rate": 7.129644480831077e-05,
"loss": 0.0794,
"step": 7840
},
{
"epoch": 22.753623188405797,
"grad_norm": 0.49533525109291077,
"learning_rate": 7.122161634083234e-05,
"loss": 0.1002,
"step": 7850
},
{
"epoch": 22.782608695652176,
"grad_norm": 0.43202659487724304,
"learning_rate": 7.114672985426516e-05,
"loss": 0.0962,
"step": 7860
},
{
"epoch": 22.81159420289855,
"grad_norm": 0.4098835587501526,
"learning_rate": 7.107178555334606e-05,
"loss": 0.1022,
"step": 7870
},
{
"epoch": 22.840579710144926,
"grad_norm": 0.39185699820518494,
"learning_rate": 7.099678364296989e-05,
"loss": 0.0911,
"step": 7880
},
{
"epoch": 22.869565217391305,
"grad_norm": 0.38475117087364197,
"learning_rate": 7.0921724328189e-05,
"loss": 0.1046,
"step": 7890
},
{
"epoch": 22.89855072463768,
"grad_norm": 0.3477749824523926,
"learning_rate": 7.084660781421268e-05,
"loss": 0.1047,
"step": 7900
},
{
"epoch": 22.92753623188406,
"grad_norm": 0.32388657331466675,
"learning_rate": 7.077143430640662e-05,
"loss": 0.1111,
"step": 7910
},
{
"epoch": 22.956521739130434,
"grad_norm": 0.28074944019317627,
"learning_rate": 7.069620401029232e-05,
"loss": 0.0952,
"step": 7920
},
{
"epoch": 22.985507246376812,
"grad_norm": 0.4655712842941284,
"learning_rate": 7.062091713154655e-05,
"loss": 0.119,
"step": 7930
},
{
"epoch": 23.014492753623188,
"grad_norm": 0.3347054719924927,
"learning_rate": 7.054557387600075e-05,
"loss": 0.1116,
"step": 7940
},
{
"epoch": 23.043478260869566,
"grad_norm": 0.3056691288948059,
"learning_rate": 7.04701744496405e-05,
"loss": 0.0995,
"step": 7950
},
{
"epoch": 23.07246376811594,
"grad_norm": 0.29524263739585876,
"learning_rate": 7.039471905860495e-05,
"loss": 0.0923,
"step": 7960
},
{
"epoch": 23.10144927536232,
"grad_norm": 0.3292746841907501,
"learning_rate": 7.031920790918628e-05,
"loss": 0.102,
"step": 7970
},
{
"epoch": 23.130434782608695,
"grad_norm": 0.3358573019504547,
"learning_rate": 7.024364120782906e-05,
"loss": 0.0919,
"step": 7980
},
{
"epoch": 23.159420289855074,
"grad_norm": 0.4067601263523102,
"learning_rate": 7.016801916112978e-05,
"loss": 0.093,
"step": 7990
},
{
"epoch": 23.18840579710145,
"grad_norm": 0.3560484051704407,
"learning_rate": 7.009234197583623e-05,
"loss": 0.1045,
"step": 8000
},
{
"epoch": 23.217391304347824,
"grad_norm": 0.4393708407878876,
"learning_rate": 7.001660985884692e-05,
"loss": 0.0963,
"step": 8010
},
{
"epoch": 23.246376811594203,
"grad_norm": 0.5236015915870667,
"learning_rate": 6.994082301721063e-05,
"loss": 0.1136,
"step": 8020
},
{
"epoch": 23.27536231884058,
"grad_norm": 0.4401554465293884,
"learning_rate": 6.986498165812563e-05,
"loss": 0.0955,
"step": 8030
},
{
"epoch": 23.304347826086957,
"grad_norm": 0.30517131090164185,
"learning_rate": 6.978908598893932e-05,
"loss": 0.0939,
"step": 8040
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.3316713869571686,
"learning_rate": 6.971313621714756e-05,
"loss": 0.0912,
"step": 8050
},
{
"epoch": 23.36231884057971,
"grad_norm": 0.388837069272995,
"learning_rate": 6.96371325503941e-05,
"loss": 0.1064,
"step": 8060
},
{
"epoch": 23.391304347826086,
"grad_norm": 0.2927514612674713,
"learning_rate": 6.956107519647014e-05,
"loss": 0.1115,
"step": 8070
},
{
"epoch": 23.420289855072465,
"grad_norm": 0.4804588556289673,
"learning_rate": 6.94849643633135e-05,
"loss": 0.1035,
"step": 8080
},
{
"epoch": 23.44927536231884,
"grad_norm": 0.32680946588516235,
"learning_rate": 6.940880025900834e-05,
"loss": 0.0984,
"step": 8090
},
{
"epoch": 23.47826086956522,
"grad_norm": 0.392529159784317,
"learning_rate": 6.933258309178438e-05,
"loss": 0.1002,
"step": 8100
},
{
"epoch": 23.507246376811594,
"grad_norm": 0.451831579208374,
"learning_rate": 6.925631307001646e-05,
"loss": 0.094,
"step": 8110
},
{
"epoch": 23.536231884057973,
"grad_norm": 0.314008504152298,
"learning_rate": 6.91799904022239e-05,
"loss": 0.084,
"step": 8120
},
{
"epoch": 23.565217391304348,
"grad_norm": 0.33511435985565186,
"learning_rate": 6.910361529706997e-05,
"loss": 0.0823,
"step": 8130
},
{
"epoch": 23.594202898550726,
"grad_norm": 0.36016684770584106,
"learning_rate": 6.902718796336131e-05,
"loss": 0.1009,
"step": 8140
},
{
"epoch": 23.6231884057971,
"grad_norm": 0.29572927951812744,
"learning_rate": 6.895070861004729e-05,
"loss": 0.1142,
"step": 8150
},
{
"epoch": 23.652173913043477,
"grad_norm": 0.3417483866214752,
"learning_rate": 6.887417744621956e-05,
"loss": 0.1075,
"step": 8160
},
{
"epoch": 23.681159420289855,
"grad_norm": 0.3952733278274536,
"learning_rate": 6.87975946811114e-05,
"loss": 0.0899,
"step": 8170
},
{
"epoch": 23.71014492753623,
"grad_norm": 0.4189750552177429,
"learning_rate": 6.872096052409718e-05,
"loss": 0.0903,
"step": 8180
},
{
"epoch": 23.73913043478261,
"grad_norm": 0.3113269805908203,
"learning_rate": 6.864427518469174e-05,
"loss": 0.098,
"step": 8190
},
{
"epoch": 23.768115942028984,
"grad_norm": 0.29072266817092896,
"learning_rate": 6.856753887254986e-05,
"loss": 0.0961,
"step": 8200
},
{
"epoch": 23.797101449275363,
"grad_norm": 0.31375062465667725,
"learning_rate": 6.849075179746572e-05,
"loss": 0.1149,
"step": 8210
},
{
"epoch": 23.82608695652174,
"grad_norm": 0.3995482325553894,
"learning_rate": 6.841391416937221e-05,
"loss": 0.0941,
"step": 8220
},
{
"epoch": 23.855072463768117,
"grad_norm": 0.4191873371601105,
"learning_rate": 6.833702619834053e-05,
"loss": 0.1021,
"step": 8230
},
{
"epoch": 23.884057971014492,
"grad_norm": 0.3322891891002655,
"learning_rate": 6.82600880945794e-05,
"loss": 0.1022,
"step": 8240
},
{
"epoch": 23.91304347826087,
"grad_norm": 0.37546929717063904,
"learning_rate": 6.818310006843468e-05,
"loss": 0.1051,
"step": 8250
},
{
"epoch": 23.942028985507246,
"grad_norm": 0.4086068868637085,
"learning_rate": 6.810606233038868e-05,
"loss": 0.115,
"step": 8260
},
{
"epoch": 23.971014492753625,
"grad_norm": 0.3562030494213104,
"learning_rate": 6.802897509105966e-05,
"loss": 0.094,
"step": 8270
},
{
"epoch": 24.0,
"grad_norm": 0.8567777276039124,
"learning_rate": 6.79518385612012e-05,
"loss": 0.1065,
"step": 8280
},
{
"epoch": 24.028985507246375,
"grad_norm": 0.6033879518508911,
"learning_rate": 6.787465295170157e-05,
"loss": 0.1118,
"step": 8290
},
{
"epoch": 24.057971014492754,
"grad_norm": 0.4588029384613037,
"learning_rate": 6.779741847358332e-05,
"loss": 0.1,
"step": 8300
},
{
"epoch": 24.08695652173913,
"grad_norm": 0.5255804657936096,
"learning_rate": 6.772013533800256e-05,
"loss": 0.1236,
"step": 8310
},
{
"epoch": 24.115942028985508,
"grad_norm": 0.4105243980884552,
"learning_rate": 6.764280375624843e-05,
"loss": 0.1017,
"step": 8320
},
{
"epoch": 24.144927536231883,
"grad_norm": 0.27915531396865845,
"learning_rate": 6.756542393974252e-05,
"loss": 0.1041,
"step": 8330
},
{
"epoch": 24.17391304347826,
"grad_norm": 0.24191172420978546,
"learning_rate": 6.748799610003828e-05,
"loss": 0.0786,
"step": 8340
},
{
"epoch": 24.202898550724637,
"grad_norm": 0.3112497329711914,
"learning_rate": 6.741052044882048e-05,
"loss": 0.108,
"step": 8350
},
{
"epoch": 24.231884057971016,
"grad_norm": 0.42907461524009705,
"learning_rate": 6.73329971979046e-05,
"loss": 0.1122,
"step": 8360
},
{
"epoch": 24.26086956521739,
"grad_norm": 0.39249175786972046,
"learning_rate": 6.725542655923625e-05,
"loss": 0.1023,
"step": 8370
},
{
"epoch": 24.28985507246377,
"grad_norm": 0.3118097186088562,
"learning_rate": 6.717780874489057e-05,
"loss": 0.0926,
"step": 8380
},
{
"epoch": 24.318840579710145,
"grad_norm": 0.4916854500770569,
"learning_rate": 6.710014396707172e-05,
"loss": 0.1157,
"step": 8390
},
{
"epoch": 24.347826086956523,
"grad_norm": 0.30792731046676636,
"learning_rate": 6.702243243811221e-05,
"loss": 0.103,
"step": 8400
},
{
"epoch": 24.3768115942029,
"grad_norm": 0.438876748085022,
"learning_rate": 6.694467437047244e-05,
"loss": 0.1035,
"step": 8410
},
{
"epoch": 24.405797101449274,
"grad_norm": 0.4188535511493683,
"learning_rate": 6.686686997673997e-05,
"loss": 0.094,
"step": 8420
},
{
"epoch": 24.434782608695652,
"grad_norm": 0.35675615072250366,
"learning_rate": 6.678901946962903e-05,
"loss": 0.0814,
"step": 8430
},
{
"epoch": 24.463768115942027,
"grad_norm": 0.3431568145751953,
"learning_rate": 6.671112306197996e-05,
"loss": 0.0866,
"step": 8440
},
{
"epoch": 24.492753623188406,
"grad_norm": 0.35794076323509216,
"learning_rate": 6.663318096675854e-05,
"loss": 0.1105,
"step": 8450
},
{
"epoch": 24.52173913043478,
"grad_norm": 0.41272181272506714,
"learning_rate": 6.655519339705552e-05,
"loss": 0.0897,
"step": 8460
},
{
"epoch": 24.55072463768116,
"grad_norm": 0.32626742124557495,
"learning_rate": 6.647716056608588e-05,
"loss": 0.0956,
"step": 8470
},
{
"epoch": 24.579710144927535,
"grad_norm": 0.3472849130630493,
"learning_rate": 6.639908268718843e-05,
"loss": 0.0892,
"step": 8480
},
{
"epoch": 24.608695652173914,
"grad_norm": 0.40613627433776855,
"learning_rate": 6.632095997382514e-05,
"loss": 0.0968,
"step": 8490
},
{
"epoch": 24.63768115942029,
"grad_norm": 0.367157518863678,
"learning_rate": 6.624279263958047e-05,
"loss": 0.0907,
"step": 8500
},
{
"epoch": 24.666666666666668,
"grad_norm": 0.3079644441604614,
"learning_rate": 6.616458089816097e-05,
"loss": 0.1052,
"step": 8510
},
{
"epoch": 24.695652173913043,
"grad_norm": 0.29992493987083435,
"learning_rate": 6.608632496339454e-05,
"loss": 0.0841,
"step": 8520
},
{
"epoch": 24.72463768115942,
"grad_norm": 0.4213980734348297,
"learning_rate": 6.600802504922988e-05,
"loss": 0.1172,
"step": 8530
},
{
"epoch": 24.753623188405797,
"grad_norm": 0.42781922221183777,
"learning_rate": 6.592968136973604e-05,
"loss": 0.1114,
"step": 8540
},
{
"epoch": 24.782608695652176,
"grad_norm": 0.3830643892288208,
"learning_rate": 6.585129413910159e-05,
"loss": 0.0979,
"step": 8550
},
{
"epoch": 24.81159420289855,
"grad_norm": 0.33465775847435,
"learning_rate": 6.577286357163424e-05,
"loss": 0.1,
"step": 8560
},
{
"epoch": 24.840579710144926,
"grad_norm": 0.4267924427986145,
"learning_rate": 6.569438988176018e-05,
"loss": 0.0926,
"step": 8570
},
{
"epoch": 24.869565217391305,
"grad_norm": 0.3853652775287628,
"learning_rate": 6.561587328402347e-05,
"loss": 0.0893,
"step": 8580
},
{
"epoch": 24.89855072463768,
"grad_norm": 0.3373638689517975,
"learning_rate": 6.553731399308549e-05,
"loss": 0.091,
"step": 8590
},
{
"epoch": 24.92753623188406,
"grad_norm": 0.38048073649406433,
"learning_rate": 6.545871222372436e-05,
"loss": 0.0821,
"step": 8600
},
{
"epoch": 24.956521739130434,
"grad_norm": 0.30811434984207153,
"learning_rate": 6.538006819083426e-05,
"loss": 0.0819,
"step": 8610
},
{
"epoch": 24.985507246376812,
"grad_norm": 0.34115296602249146,
"learning_rate": 6.530138210942505e-05,
"loss": 0.1111,
"step": 8620
},
{
"epoch": 25.014492753623188,
"grad_norm": 0.23322944343090057,
"learning_rate": 6.522265419462141e-05,
"loss": 0.0829,
"step": 8630
},
{
"epoch": 25.043478260869566,
"grad_norm": 0.4348927438259125,
"learning_rate": 6.514388466166248e-05,
"loss": 0.0903,
"step": 8640
},
{
"epoch": 25.07246376811594,
"grad_norm": 0.42378875613212585,
"learning_rate": 6.506507372590119e-05,
"loss": 0.0964,
"step": 8650
},
{
"epoch": 25.10144927536232,
"grad_norm": 0.4011875092983246,
"learning_rate": 6.498622160280355e-05,
"loss": 0.0741,
"step": 8660
},
{
"epoch": 25.130434782608695,
"grad_norm": 0.2823712229728699,
"learning_rate": 6.490732850794832e-05,
"loss": 0.0806,
"step": 8670
},
{
"epoch": 25.159420289855074,
"grad_norm": 0.40900862216949463,
"learning_rate": 6.482839465702616e-05,
"loss": 0.0823,
"step": 8680
},
{
"epoch": 25.18840579710145,
"grad_norm": 0.31911250948905945,
"learning_rate": 6.474942026583923e-05,
"loss": 0.0901,
"step": 8690
},
{
"epoch": 25.217391304347824,
"grad_norm": 0.42689287662506104,
"learning_rate": 6.467040555030052e-05,
"loss": 0.1052,
"step": 8700
},
{
"epoch": 25.246376811594203,
"grad_norm": 0.4792473018169403,
"learning_rate": 6.459135072643321e-05,
"loss": 0.1032,
"step": 8710
},
{
"epoch": 25.27536231884058,
"grad_norm": 0.39220404624938965,
"learning_rate": 6.451225601037019e-05,
"loss": 0.0866,
"step": 8720
},
{
"epoch": 25.304347826086957,
"grad_norm": 0.3560490012168884,
"learning_rate": 6.443312161835338e-05,
"loss": 0.0755,
"step": 8730
},
{
"epoch": 25.333333333333332,
"grad_norm": 0.36267733573913574,
"learning_rate": 6.43539477667332e-05,
"loss": 0.1098,
"step": 8740
},
{
"epoch": 25.36231884057971,
"grad_norm": 0.31615450978279114,
"learning_rate": 6.427473467196793e-05,
"loss": 0.1033,
"step": 8750
},
{
"epoch": 25.391304347826086,
"grad_norm": 0.4009799659252167,
"learning_rate": 6.419548255062315e-05,
"loss": 0.0904,
"step": 8760
},
{
"epoch": 25.420289855072465,
"grad_norm": 0.6001753211021423,
"learning_rate": 6.411619161937112e-05,
"loss": 0.0924,
"step": 8770
},
{
"epoch": 25.44927536231884,
"grad_norm": 0.2777409851551056,
"learning_rate": 6.403686209499022e-05,
"loss": 0.0911,
"step": 8780
},
{
"epoch": 25.47826086956522,
"grad_norm": 0.25605079531669617,
"learning_rate": 6.395749419436437e-05,
"loss": 0.1018,
"step": 8790
},
{
"epoch": 25.507246376811594,
"grad_norm": 0.3277330696582794,
"learning_rate": 6.387808813448234e-05,
"loss": 0.09,
"step": 8800
},
{
"epoch": 25.536231884057973,
"grad_norm": 0.22537122666835785,
"learning_rate": 6.37986441324373e-05,
"loss": 0.0835,
"step": 8810
},
{
"epoch": 25.565217391304348,
"grad_norm": 0.427733838558197,
"learning_rate": 6.37191624054261e-05,
"loss": 0.0812,
"step": 8820
},
{
"epoch": 25.594202898550726,
"grad_norm": 0.20617811381816864,
"learning_rate": 6.363964317074872e-05,
"loss": 0.0638,
"step": 8830
},
{
"epoch": 25.6231884057971,
"grad_norm": 0.32776620984077454,
"learning_rate": 6.356008664580776e-05,
"loss": 0.0969,
"step": 8840
},
{
"epoch": 25.652173913043477,
"grad_norm": 0.49045297503471375,
"learning_rate": 6.348049304810771e-05,
"loss": 0.0872,
"step": 8850
},
{
"epoch": 25.681159420289855,
"grad_norm": 0.25885435938835144,
"learning_rate": 6.340086259525442e-05,
"loss": 0.0876,
"step": 8860
},
{
"epoch": 25.71014492753623,
"grad_norm": 0.23793990910053253,
"learning_rate": 6.332119550495448e-05,
"loss": 0.0994,
"step": 8870
},
{
"epoch": 25.73913043478261,
"grad_norm": 0.5432042479515076,
"learning_rate": 6.324149199501473e-05,
"loss": 0.0867,
"step": 8880
},
{
"epoch": 25.768115942028984,
"grad_norm": 0.21161885559558868,
"learning_rate": 6.316175228334146e-05,
"loss": 0.0952,
"step": 8890
},
{
"epoch": 25.797101449275363,
"grad_norm": 0.5830066204071045,
"learning_rate": 6.308197658794003e-05,
"loss": 0.1331,
"step": 8900
},
{
"epoch": 25.82608695652174,
"grad_norm": 0.4310133159160614,
"learning_rate": 6.300216512691417e-05,
"loss": 0.1281,
"step": 8910
},
{
"epoch": 25.855072463768117,
"grad_norm": 0.48247355222702026,
"learning_rate": 6.292231811846532e-05,
"loss": 0.0932,
"step": 8920
},
{
"epoch": 25.884057971014492,
"grad_norm": 0.31100866198539734,
"learning_rate": 6.284243578089217e-05,
"loss": 0.0934,
"step": 8930
},
{
"epoch": 25.91304347826087,
"grad_norm": 0.6842138171195984,
"learning_rate": 6.276251833258999e-05,
"loss": 0.0881,
"step": 8940
},
{
"epoch": 25.942028985507246,
"grad_norm": 0.31012195348739624,
"learning_rate": 6.268256599205003e-05,
"loss": 0.1034,
"step": 8950
},
{
"epoch": 25.971014492753625,
"grad_norm": 0.4207480847835541,
"learning_rate": 6.260257897785892e-05,
"loss": 0.1123,
"step": 8960
},
{
"epoch": 26.0,
"grad_norm": 0.4856835901737213,
"learning_rate": 6.252255750869811e-05,
"loss": 0.0968,
"step": 8970
},
{
"epoch": 26.028985507246375,
"grad_norm": 0.34793731570243835,
"learning_rate": 6.244250180334325e-05,
"loss": 0.0958,
"step": 8980
},
{
"epoch": 26.057971014492754,
"grad_norm": 0.36127743124961853,
"learning_rate": 6.236241208066356e-05,
"loss": 0.0995,
"step": 8990
},
{
"epoch": 26.08695652173913,
"grad_norm": 0.3173960745334625,
"learning_rate": 6.228228855962133e-05,
"loss": 0.089,
"step": 9000
},
{
"epoch": 26.115942028985508,
"grad_norm": 0.45852774381637573,
"learning_rate": 6.220213145927115e-05,
"loss": 0.1077,
"step": 9010
},
{
"epoch": 26.144927536231883,
"grad_norm": 0.3714202344417572,
"learning_rate": 6.212194099875951e-05,
"loss": 0.0766,
"step": 9020
},
{
"epoch": 26.17391304347826,
"grad_norm": 0.5989710688591003,
"learning_rate": 6.204171739732405e-05,
"loss": 0.0974,
"step": 9030
},
{
"epoch": 26.202898550724637,
"grad_norm": 0.3582770824432373,
"learning_rate": 6.196146087429303e-05,
"loss": 0.1153,
"step": 9040
},
{
"epoch": 26.231884057971016,
"grad_norm": 0.3930160105228424,
"learning_rate": 6.188117164908474e-05,
"loss": 0.1032,
"step": 9050
},
{
"epoch": 26.26086956521739,
"grad_norm": 0.4734560549259186,
"learning_rate": 6.180084994120684e-05,
"loss": 0.0911,
"step": 9060
},
{
"epoch": 26.28985507246377,
"grad_norm": 0.36610832810401917,
"learning_rate": 6.17204959702558e-05,
"loss": 0.0814,
"step": 9070
},
{
"epoch": 26.318840579710145,
"grad_norm": 0.37070533633232117,
"learning_rate": 6.164010995591635e-05,
"loss": 0.0913,
"step": 9080
},
{
"epoch": 26.347826086956523,
"grad_norm": 0.36717358231544495,
"learning_rate": 6.155969211796076e-05,
"loss": 0.1182,
"step": 9090
},
{
"epoch": 26.3768115942029,
"grad_norm": 0.39474284648895264,
"learning_rate": 6.147924267624829e-05,
"loss": 0.0764,
"step": 9100
},
{
"epoch": 26.405797101449274,
"grad_norm": 0.3292117118835449,
"learning_rate": 6.13987618507247e-05,
"loss": 0.0933,
"step": 9110
},
{
"epoch": 26.434782608695652,
"grad_norm": 0.4586057662963867,
"learning_rate": 6.131824986142147e-05,
"loss": 0.104,
"step": 9120
},
{
"epoch": 26.463768115942027,
"grad_norm": 0.4136529266834259,
"learning_rate": 6.123770692845529e-05,
"loss": 0.1009,
"step": 9130
},
{
"epoch": 26.492753623188406,
"grad_norm": 0.21014559268951416,
"learning_rate": 6.11571332720275e-05,
"loss": 0.097,
"step": 9140
},
{
"epoch": 26.52173913043478,
"grad_norm": 0.34362557530403137,
"learning_rate": 6.107652911242336e-05,
"loss": 0.0935,
"step": 9150
},
{
"epoch": 26.55072463768116,
"grad_norm": 0.40612903237342834,
"learning_rate": 6.0995894670011586e-05,
"loss": 0.1103,
"step": 9160
},
{
"epoch": 26.579710144927535,
"grad_norm": 0.5520173907279968,
"learning_rate": 6.091523016524368e-05,
"loss": 0.08,
"step": 9170
},
{
"epoch": 26.608695652173914,
"grad_norm": 0.34539029002189636,
"learning_rate": 6.083453581865328e-05,
"loss": 0.081,
"step": 9180
},
{
"epoch": 26.63768115942029,
"grad_norm": 0.2292974442243576,
"learning_rate": 6.075381185085568e-05,
"loss": 0.0913,
"step": 9190
},
{
"epoch": 26.666666666666668,
"grad_norm": 0.530166506767273,
"learning_rate": 6.067305848254709e-05,
"loss": 0.1242,
"step": 9200
},
{
"epoch": 26.695652173913043,
"grad_norm": 0.313507616519928,
"learning_rate": 6.059227593450418e-05,
"loss": 0.091,
"step": 9210
},
{
"epoch": 26.72463768115942,
"grad_norm": 0.22776463627815247,
"learning_rate": 6.051146442758333e-05,
"loss": 0.0891,
"step": 9220
},
{
"epoch": 26.753623188405797,
"grad_norm": 0.35936057567596436,
"learning_rate": 6.043062418272012e-05,
"loss": 0.0893,
"step": 9230
},
{
"epoch": 26.782608695652176,
"grad_norm": 0.4251636564731598,
"learning_rate": 6.0349755420928666e-05,
"loss": 0.0899,
"step": 9240
},
{
"epoch": 26.81159420289855,
"grad_norm": 0.420236736536026,
"learning_rate": 6.0268858363301105e-05,
"loss": 0.0914,
"step": 9250
},
{
"epoch": 26.840579710144926,
"grad_norm": 0.4716984033584595,
"learning_rate": 6.018793323100689e-05,
"loss": 0.1019,
"step": 9260
},
{
"epoch": 26.869565217391305,
"grad_norm": 0.2790106534957886,
"learning_rate": 6.0106980245292255e-05,
"loss": 0.0795,
"step": 9270
},
{
"epoch": 26.89855072463768,
"grad_norm": 0.6252140402793884,
"learning_rate": 6.002599962747957e-05,
"loss": 0.0852,
"step": 9280
},
{
"epoch": 26.92753623188406,
"grad_norm": 0.43576961755752563,
"learning_rate": 5.994499159896673e-05,
"loss": 0.0998,
"step": 9290
},
{
"epoch": 26.956521739130434,
"grad_norm": 0.6333770751953125,
"learning_rate": 5.9863956381226607e-05,
"loss": 0.0915,
"step": 9300
},
{
"epoch": 26.985507246376812,
"grad_norm": 0.5500407814979553,
"learning_rate": 5.9782894195806394e-05,
"loss": 0.104,
"step": 9310
},
{
"epoch": 27.014492753623188,
"grad_norm": 0.44380778074264526,
"learning_rate": 5.9701805264327004e-05,
"loss": 0.0836,
"step": 9320
},
{
"epoch": 27.043478260869566,
"grad_norm": 0.41339370608329773,
"learning_rate": 5.96206898084825e-05,
"loss": 0.0898,
"step": 9330
},
{
"epoch": 27.07246376811594,
"grad_norm": 0.42062732577323914,
"learning_rate": 5.953954805003942e-05,
"loss": 0.1016,
"step": 9340
},
{
"epoch": 27.10144927536232,
"grad_norm": 0.26764097809791565,
"learning_rate": 5.945838021083623e-05,
"loss": 0.0953,
"step": 9350
},
{
"epoch": 27.130434782608695,
"grad_norm": 0.3174140453338623,
"learning_rate": 5.9377186512782714e-05,
"loss": 0.1038,
"step": 9360
},
{
"epoch": 27.159420289855074,
"grad_norm": 0.5403830409049988,
"learning_rate": 5.929596717785935e-05,
"loss": 0.0998,
"step": 9370
},
{
"epoch": 27.18840579710145,
"grad_norm": 0.29460418224334717,
"learning_rate": 5.921472242811668e-05,
"loss": 0.0998,
"step": 9380
},
{
"epoch": 27.217391304347824,
"grad_norm": 0.3835254907608032,
"learning_rate": 5.913345248567475e-05,
"loss": 0.0895,
"step": 9390
},
{
"epoch": 27.246376811594203,
"grad_norm": 0.26222512125968933,
"learning_rate": 5.905215757272248e-05,
"loss": 0.0829,
"step": 9400
},
{
"epoch": 27.27536231884058,
"grad_norm": 0.3459964990615845,
"learning_rate": 5.897083791151706e-05,
"loss": 0.0762,
"step": 9410
},
{
"epoch": 27.304347826086957,
"grad_norm": 0.4421097934246063,
"learning_rate": 5.888949372438336e-05,
"loss": 0.0809,
"step": 9420
},
{
"epoch": 27.333333333333332,
"grad_norm": 0.3622925877571106,
"learning_rate": 5.8808125233713255e-05,
"loss": 0.0906,
"step": 9430
},
{
"epoch": 27.36231884057971,
"grad_norm": 0.25134244561195374,
"learning_rate": 5.872673266196509e-05,
"loss": 0.0879,
"step": 9440
},
{
"epoch": 27.391304347826086,
"grad_norm": 0.5268398523330688,
"learning_rate": 5.864531623166305e-05,
"loss": 0.0896,
"step": 9450
},
{
"epoch": 27.420289855072465,
"grad_norm": 0.2773943841457367,
"learning_rate": 5.856387616539656e-05,
"loss": 0.0992,
"step": 9460
},
{
"epoch": 27.44927536231884,
"grad_norm": 0.40491020679473877,
"learning_rate": 5.848241268581967e-05,
"loss": 0.1059,
"step": 9470
},
{
"epoch": 27.47826086956522,
"grad_norm": 0.5842623710632324,
"learning_rate": 5.840092601565037e-05,
"loss": 0.0952,
"step": 9480
},
{
"epoch": 27.507246376811594,
"grad_norm": 0.4927104115486145,
"learning_rate": 5.8319416377670144e-05,
"loss": 0.1123,
"step": 9490
},
{
"epoch": 27.536231884057973,
"grad_norm": 0.29302486777305603,
"learning_rate": 5.82378839947232e-05,
"loss": 0.106,
"step": 9500
},
{
"epoch": 27.565217391304348,
"grad_norm": 0.4573745131492615,
"learning_rate": 5.815632908971599e-05,
"loss": 0.0952,
"step": 9510
},
{
"epoch": 27.594202898550726,
"grad_norm": 0.4357374906539917,
"learning_rate": 5.80747518856165e-05,
"loss": 0.0924,
"step": 9520
},
{
"epoch": 27.6231884057971,
"grad_norm": 0.3612167537212372,
"learning_rate": 5.799315260545367e-05,
"loss": 0.1071,
"step": 9530
},
{
"epoch": 27.652173913043477,
"grad_norm": 0.3312841057777405,
"learning_rate": 5.791153147231686e-05,
"loss": 0.1093,
"step": 9540
},
{
"epoch": 27.681159420289855,
"grad_norm": 0.43029627203941345,
"learning_rate": 5.782988870935509e-05,
"loss": 0.0969,
"step": 9550
},
{
"epoch": 27.71014492753623,
"grad_norm": 0.371330201625824,
"learning_rate": 5.774822453977657e-05,
"loss": 0.0935,
"step": 9560
},
{
"epoch": 27.73913043478261,
"grad_norm": 0.35629457235336304,
"learning_rate": 5.7666539186848036e-05,
"loss": 0.0972,
"step": 9570
},
{
"epoch": 27.768115942028984,
"grad_norm": 0.37646907567977905,
"learning_rate": 5.758483287389411e-05,
"loss": 0.0836,
"step": 9580
},
{
"epoch": 27.797101449275363,
"grad_norm": 0.26607057452201843,
"learning_rate": 5.7503105824296735e-05,
"loss": 0.1109,
"step": 9590
},
{
"epoch": 27.82608695652174,
"grad_norm": 0.3088560998439789,
"learning_rate": 5.742135826149453e-05,
"loss": 0.0888,
"step": 9600
},
{
"epoch": 27.855072463768117,
"grad_norm": 0.2338147908449173,
"learning_rate": 5.7339590408982223e-05,
"loss": 0.0929,
"step": 9610
},
{
"epoch": 27.884057971014492,
"grad_norm": 0.33873507380485535,
"learning_rate": 5.725780249031e-05,
"loss": 0.0971,
"step": 9620
},
{
"epoch": 27.91304347826087,
"grad_norm": 0.2373759001493454,
"learning_rate": 5.717599472908292e-05,
"loss": 0.0844,
"step": 9630
},
{
"epoch": 27.942028985507246,
"grad_norm": 0.36954036355018616,
"learning_rate": 5.7094167348960237e-05,
"loss": 0.0979,
"step": 9640
},
{
"epoch": 27.971014492753625,
"grad_norm": 0.44296813011169434,
"learning_rate": 5.7012320573654945e-05,
"loss": 0.0953,
"step": 9650
},
{
"epoch": 28.0,
"grad_norm": 0.5584344267845154,
"learning_rate": 5.693045462693295e-05,
"loss": 0.0984,
"step": 9660
},
{
"epoch": 28.028985507246375,
"grad_norm": 0.2427714765071869,
"learning_rate": 5.684856973261266e-05,
"loss": 0.0809,
"step": 9670
},
{
"epoch": 28.057971014492754,
"grad_norm": 0.33059096336364746,
"learning_rate": 5.6766666114564215e-05,
"loss": 0.0917,
"step": 9680
},
{
"epoch": 28.08695652173913,
"grad_norm": 0.32649749517440796,
"learning_rate": 5.668474399670899e-05,
"loss": 0.0882,
"step": 9690
},
{
"epoch": 28.115942028985508,
"grad_norm": 0.2927171289920807,
"learning_rate": 5.660280360301896e-05,
"loss": 0.0931,
"step": 9700
},
{
"epoch": 28.144927536231883,
"grad_norm": 0.3866276443004608,
"learning_rate": 5.652084515751599e-05,
"loss": 0.1069,
"step": 9710
},
{
"epoch": 28.17391304347826,
"grad_norm": 0.30153888463974,
"learning_rate": 5.643886888427137e-05,
"loss": 0.0833,
"step": 9720
},
{
"epoch": 28.202898550724637,
"grad_norm": 0.4071616232395172,
"learning_rate": 5.6356875007405074e-05,
"loss": 0.0932,
"step": 9730
},
{
"epoch": 28.231884057971016,
"grad_norm": 0.3093550503253937,
"learning_rate": 5.627486375108525e-05,
"loss": 0.0786,
"step": 9740
},
{
"epoch": 28.26086956521739,
"grad_norm": 0.4130619168281555,
"learning_rate": 5.619283533952754e-05,
"loss": 0.1169,
"step": 9750
},
{
"epoch": 28.28985507246377,
"grad_norm": 0.19634698331356049,
"learning_rate": 5.6110789996994474e-05,
"loss": 0.0808,
"step": 9760
},
{
"epoch": 28.318840579710145,
"grad_norm": 0.27509117126464844,
"learning_rate": 5.602872794779491e-05,
"loss": 0.0818,
"step": 9770
},
{
"epoch": 28.347826086956523,
"grad_norm": 0.36869877576828003,
"learning_rate": 5.594664941628334e-05,
"loss": 0.0808,
"step": 9780
},
{
"epoch": 28.3768115942029,
"grad_norm": 0.28021812438964844,
"learning_rate": 5.5864554626859324e-05,
"loss": 0.086,
"step": 9790
},
{
"epoch": 28.405797101449274,
"grad_norm": 0.26831555366516113,
"learning_rate": 5.578244380396691e-05,
"loss": 0.0951,
"step": 9800
},
{
"epoch": 28.434782608695652,
"grad_norm": 0.3775530457496643,
"learning_rate": 5.570031717209394e-05,
"loss": 0.0837,
"step": 9810
},
{
"epoch": 28.463768115942027,
"grad_norm": 0.24371632933616638,
"learning_rate": 5.561817495577147e-05,
"loss": 0.082,
"step": 9820
},
{
"epoch": 28.492753623188406,
"grad_norm": 0.3440195620059967,
"learning_rate": 5.5536017379573215e-05,
"loss": 0.086,
"step": 9830
},
{
"epoch": 28.52173913043478,
"grad_norm": 0.26935017108917236,
"learning_rate": 5.545384466811483e-05,
"loss": 0.0972,
"step": 9840
},
{
"epoch": 28.55072463768116,
"grad_norm": 0.38084691762924194,
"learning_rate": 5.5371657046053384e-05,
"loss": 0.1017,
"step": 9850
},
{
"epoch": 28.579710144927535,
"grad_norm": 0.290239155292511,
"learning_rate": 5.528945473808669e-05,
"loss": 0.0915,
"step": 9860
},
{
"epoch": 28.608695652173914,
"grad_norm": 0.33482253551483154,
"learning_rate": 5.520723796895272e-05,
"loss": 0.0908,
"step": 9870
},
{
"epoch": 28.63768115942029,
"grad_norm": 0.3747408986091614,
"learning_rate": 5.512500696342897e-05,
"loss": 0.0844,
"step": 9880
},
{
"epoch": 28.666666666666668,
"grad_norm": 0.4802875816822052,
"learning_rate": 5.504276194633188e-05,
"loss": 0.078,
"step": 9890
},
{
"epoch": 28.695652173913043,
"grad_norm": 0.27169641852378845,
"learning_rate": 5.49605031425162e-05,
"loss": 0.0952,
"step": 9900
},
{
"epoch": 28.72463768115942,
"grad_norm": 0.4197971522808075,
"learning_rate": 5.487823077687434e-05,
"loss": 0.0876,
"step": 9910
},
{
"epoch": 28.753623188405797,
"grad_norm": 0.37185847759246826,
"learning_rate": 5.4795945074335806e-05,
"loss": 0.1035,
"step": 9920
},
{
"epoch": 28.782608695652176,
"grad_norm": 0.4602510929107666,
"learning_rate": 5.471364625986657e-05,
"loss": 0.1092,
"step": 9930
},
{
"epoch": 28.81159420289855,
"grad_norm": 0.26933249831199646,
"learning_rate": 5.463133455846845e-05,
"loss": 0.0695,
"step": 9940
},
{
"epoch": 28.840579710144926,
"grad_norm": 0.4972953796386719,
"learning_rate": 5.4549010195178505e-05,
"loss": 0.0927,
"step": 9950
},
{
"epoch": 28.869565217391305,
"grad_norm": 0.33794844150543213,
"learning_rate": 5.446667339506838e-05,
"loss": 0.0836,
"step": 9960
},
{
"epoch": 28.89855072463768,
"grad_norm": 0.4375225007534027,
"learning_rate": 5.4384324383243756e-05,
"loss": 0.0749,
"step": 9970
},
{
"epoch": 28.92753623188406,
"grad_norm": 0.3220159411430359,
"learning_rate": 5.430196338484368e-05,
"loss": 0.1062,
"step": 9980
},
{
"epoch": 28.956521739130434,
"grad_norm": 0.2979547381401062,
"learning_rate": 5.4219590625039975e-05,
"loss": 0.0926,
"step": 9990
},
{
"epoch": 28.985507246376812,
"grad_norm": 0.3251277208328247,
"learning_rate": 5.413720632903664e-05,
"loss": 0.0753,
"step": 10000
},
{
"epoch": 29.014492753623188,
"grad_norm": 0.5778645873069763,
"learning_rate": 5.405481072206917e-05,
"loss": 0.1194,
"step": 10010
},
{
"epoch": 29.043478260869566,
"grad_norm": 0.5020672082901001,
"learning_rate": 5.397240402940402e-05,
"loss": 0.0799,
"step": 10020
},
{
"epoch": 29.07246376811594,
"grad_norm": 0.27410048246383667,
"learning_rate": 5.388998647633794e-05,
"loss": 0.0948,
"step": 10030
},
{
"epoch": 29.10144927536232,
"grad_norm": 0.42518341541290283,
"learning_rate": 5.380755828819737e-05,
"loss": 0.0803,
"step": 10040
},
{
"epoch": 29.130434782608695,
"grad_norm": 0.4766830503940582,
"learning_rate": 5.3725119690337846e-05,
"loss": 0.1079,
"step": 10050
},
{
"epoch": 29.159420289855074,
"grad_norm": 0.3795958161354065,
"learning_rate": 5.3642670908143324e-05,
"loss": 0.1114,
"step": 10060
},
{
"epoch": 29.18840579710145,
"grad_norm": 0.2962549924850464,
"learning_rate": 5.356021216702562e-05,
"loss": 0.1028,
"step": 10070
},
{
"epoch": 29.217391304347824,
"grad_norm": 0.5517275929450989,
"learning_rate": 5.347774369242381e-05,
"loss": 0.1054,
"step": 10080
},
{
"epoch": 29.246376811594203,
"grad_norm": 0.26086458563804626,
"learning_rate": 5.3395265709803545e-05,
"loss": 0.1065,
"step": 10090
},
{
"epoch": 29.27536231884058,
"grad_norm": 0.287026971578598,
"learning_rate": 5.331277844465647e-05,
"loss": 0.0849,
"step": 10100
},
{
"epoch": 29.304347826086957,
"grad_norm": 0.23269617557525635,
"learning_rate": 5.323028212249963e-05,
"loss": 0.0786,
"step": 10110
},
{
"epoch": 29.333333333333332,
"grad_norm": 0.384395569562912,
"learning_rate": 5.314777696887481e-05,
"loss": 0.0739,
"step": 10120
},
{
"epoch": 29.36231884057971,
"grad_norm": 0.3276943266391754,
"learning_rate": 5.306526320934796e-05,
"loss": 0.0752,
"step": 10130
},
{
"epoch": 29.391304347826086,
"grad_norm": 0.4074258506298065,
"learning_rate": 5.298274106950854e-05,
"loss": 0.0975,
"step": 10140
},
{
"epoch": 29.420289855072465,
"grad_norm": 0.48793792724609375,
"learning_rate": 5.290021077496893e-05,
"loss": 0.088,
"step": 10150
},
{
"epoch": 29.44927536231884,
"grad_norm": 0.3513041138648987,
"learning_rate": 5.2817672551363816e-05,
"loss": 0.1068,
"step": 10160
},
{
"epoch": 29.47826086956522,
"grad_norm": 0.4190158247947693,
"learning_rate": 5.273512662434952e-05,
"loss": 0.0749,
"step": 10170
},
{
"epoch": 29.507246376811594,
"grad_norm": 0.41182804107666016,
"learning_rate": 5.265257321960349e-05,
"loss": 0.0832,
"step": 10180
},
{
"epoch": 29.536231884057973,
"grad_norm": 0.406429648399353,
"learning_rate": 5.257001256282357e-05,
"loss": 0.0894,
"step": 10190
},
{
"epoch": 29.565217391304348,
"grad_norm": 0.3909933269023895,
"learning_rate": 5.248744487972742e-05,
"loss": 0.0981,
"step": 10200
},
{
"epoch": 29.594202898550726,
"grad_norm": 0.45473939180374146,
"learning_rate": 5.240487039605196e-05,
"loss": 0.0875,
"step": 10210
},
{
"epoch": 29.6231884057971,
"grad_norm": 0.3364003300666809,
"learning_rate": 5.232228933755267e-05,
"loss": 0.0938,
"step": 10220
},
{
"epoch": 29.652173913043477,
"grad_norm": 0.40386608242988586,
"learning_rate": 5.2239701930003006e-05,
"loss": 0.0972,
"step": 10230
},
{
"epoch": 29.681159420289855,
"grad_norm": 0.4128904342651367,
"learning_rate": 5.215710839919379e-05,
"loss": 0.085,
"step": 10240
},
{
"epoch": 29.71014492753623,
"grad_norm": 0.4223697781562805,
"learning_rate": 5.207450897093257e-05,
"loss": 0.0874,
"step": 10250
},
{
"epoch": 29.73913043478261,
"grad_norm": 0.4211285710334778,
"learning_rate": 5.1991903871043046e-05,
"loss": 0.103,
"step": 10260
},
{
"epoch": 29.768115942028984,
"grad_norm": 0.5267713665962219,
"learning_rate": 5.190929332536439e-05,
"loss": 0.0863,
"step": 10270
},
{
"epoch": 29.797101449275363,
"grad_norm": 0.275651752948761,
"learning_rate": 5.182667755975071e-05,
"loss": 0.0865,
"step": 10280
},
{
"epoch": 29.82608695652174,
"grad_norm": 0.3134634792804718,
"learning_rate": 5.1744056800070315e-05,
"loss": 0.0766,
"step": 10290
},
{
"epoch": 29.855072463768117,
"grad_norm": 0.39577049016952515,
"learning_rate": 5.166143127220524e-05,
"loss": 0.0986,
"step": 10300
},
{
"epoch": 29.884057971014492,
"grad_norm": 0.3079846501350403,
"learning_rate": 5.1578801202050485e-05,
"loss": 0.0919,
"step": 10310
},
{
"epoch": 29.91304347826087,
"grad_norm": 0.3528546690940857,
"learning_rate": 5.149616681551355e-05,
"loss": 0.1022,
"step": 10320
},
{
"epoch": 29.942028985507246,
"grad_norm": 0.45117315649986267,
"learning_rate": 5.141352833851367e-05,
"loss": 0.0895,
"step": 10330
},
{
"epoch": 29.971014492753625,
"grad_norm": 0.3826615810394287,
"learning_rate": 5.1330885996981285e-05,
"loss": 0.0746,
"step": 10340
},
{
"epoch": 30.0,
"grad_norm": 0.5462546348571777,
"learning_rate": 5.124824001685741e-05,
"loss": 0.0873,
"step": 10350
},
{
"epoch": 30.028985507246375,
"grad_norm": 0.38107597827911377,
"learning_rate": 5.116559062409298e-05,
"loss": 0.0957,
"step": 10360
},
{
"epoch": 30.057971014492754,
"grad_norm": 0.30049923062324524,
"learning_rate": 5.10829380446483e-05,
"loss": 0.0765,
"step": 10370
},
{
"epoch": 30.08695652173913,
"grad_norm": 0.3036685585975647,
"learning_rate": 5.100028250449235e-05,
"loss": 0.0828,
"step": 10380
},
{
"epoch": 30.115942028985508,
"grad_norm": 0.3812878727912903,
"learning_rate": 5.0917624229602234e-05,
"loss": 0.0929,
"step": 10390
},
{
"epoch": 30.144927536231883,
"grad_norm": 0.3203854262828827,
"learning_rate": 5.0834963445962524e-05,
"loss": 0.0942,
"step": 10400
},
{
"epoch": 30.17391304347826,
"grad_norm": 0.2617367208003998,
"learning_rate": 5.075230037956461e-05,
"loss": 0.0991,
"step": 10410
},
{
"epoch": 30.202898550724637,
"grad_norm": 0.39057719707489014,
"learning_rate": 5.0669635256406213e-05,
"loss": 0.0895,
"step": 10420
},
{
"epoch": 30.231884057971016,
"grad_norm": 0.4870263636112213,
"learning_rate": 5.058696830249058e-05,
"loss": 0.0991,
"step": 10430
},
{
"epoch": 30.26086956521739,
"grad_norm": 0.40686681866645813,
"learning_rate": 5.050429974382602e-05,
"loss": 0.0821,
"step": 10440
},
{
"epoch": 30.28985507246377,
"grad_norm": 0.32615211606025696,
"learning_rate": 5.042162980642523e-05,
"loss": 0.1024,
"step": 10450
},
{
"epoch": 30.318840579710145,
"grad_norm": 0.47428640723228455,
"learning_rate": 5.033895871630462e-05,
"loss": 0.0855,
"step": 10460
},
{
"epoch": 30.347826086956523,
"grad_norm": 0.45208922028541565,
"learning_rate": 5.025628669948386e-05,
"loss": 0.0922,
"step": 10470
},
{
"epoch": 30.3768115942029,
"grad_norm": 0.41651803255081177,
"learning_rate": 5.017361398198502e-05,
"loss": 0.0776,
"step": 10480
},
{
"epoch": 30.405797101449274,
"grad_norm": 0.40021809935569763,
"learning_rate": 5.009094078983221e-05,
"loss": 0.0828,
"step": 10490
},
{
"epoch": 30.434782608695652,
"grad_norm": 0.40372738242149353,
"learning_rate": 5.000826734905073e-05,
"loss": 0.0947,
"step": 10500
},
{
"epoch": 30.463768115942027,
"grad_norm": 0.21877968311309814,
"learning_rate": 4.9925593885666645e-05,
"loss": 0.0826,
"step": 10510
},
{
"epoch": 30.492753623188406,
"grad_norm": 0.32412436604499817,
"learning_rate": 4.984292062570602e-05,
"loss": 0.1022,
"step": 10520
},
{
"epoch": 30.52173913043478,
"grad_norm": 0.3431316018104553,
"learning_rate": 4.976024779519442e-05,
"loss": 0.079,
"step": 10530
},
{
"epoch": 30.55072463768116,
"grad_norm": 0.3585143983364105,
"learning_rate": 4.9677575620156194e-05,
"loss": 0.0885,
"step": 10540
},
{
"epoch": 30.579710144927535,
"grad_norm": 0.3738825023174286,
"learning_rate": 4.959490432661391e-05,
"loss": 0.1003,
"step": 10550
},
{
"epoch": 30.608695652173914,
"grad_norm": 0.45717304944992065,
"learning_rate": 4.9512234140587726e-05,
"loss": 0.0908,
"step": 10560
},
{
"epoch": 30.63768115942029,
"grad_norm": 0.43018513917922974,
"learning_rate": 4.942956528809477e-05,
"loss": 0.0899,
"step": 10570
},
{
"epoch": 30.666666666666668,
"grad_norm": 0.4122094511985779,
"learning_rate": 4.934689799514854e-05,
"loss": 0.0976,
"step": 10580
},
{
"epoch": 30.695652173913043,
"grad_norm": 0.4348907172679901,
"learning_rate": 4.926423248775827e-05,
"loss": 0.0883,
"step": 10590
},
{
"epoch": 30.72463768115942,
"grad_norm": 0.45371150970458984,
"learning_rate": 4.918156899192826e-05,
"loss": 0.1057,
"step": 10600
},
{
"epoch": 30.753623188405797,
"grad_norm": 0.3190701901912689,
"learning_rate": 4.909890773365738e-05,
"loss": 0.0998,
"step": 10610
},
{
"epoch": 30.782608695652176,
"grad_norm": 0.26156431436538696,
"learning_rate": 4.9016248938938344e-05,
"loss": 0.086,
"step": 10620
},
{
"epoch": 30.81159420289855,
"grad_norm": 0.24229975044727325,
"learning_rate": 4.8933592833757156e-05,
"loss": 0.075,
"step": 10630
},
{
"epoch": 30.840579710144926,
"grad_norm": 0.47945863008499146,
"learning_rate": 4.8850939644092435e-05,
"loss": 0.0942,
"step": 10640
},
{
"epoch": 30.869565217391305,
"grad_norm": 0.42868760228157043,
"learning_rate": 4.876828959591485e-05,
"loss": 0.1054,
"step": 10650
},
{
"epoch": 30.89855072463768,
"grad_norm": 0.5427827835083008,
"learning_rate": 4.8685642915186474e-05,
"loss": 0.0908,
"step": 10660
},
{
"epoch": 30.92753623188406,
"grad_norm": 0.546563982963562,
"learning_rate": 4.860299982786018e-05,
"loss": 0.0944,
"step": 10670
},
{
"epoch": 30.956521739130434,
"grad_norm": 0.3557523190975189,
"learning_rate": 4.852036055987901e-05,
"loss": 0.0871,
"step": 10680
},
{
"epoch": 30.985507246376812,
"grad_norm": 0.50401771068573,
"learning_rate": 4.843772533717558e-05,
"loss": 0.0869,
"step": 10690
},
{
"epoch": 31.014492753623188,
"grad_norm": 0.3340211808681488,
"learning_rate": 4.835509438567142e-05,
"loss": 0.0857,
"step": 10700
},
{
"epoch": 31.043478260869566,
"grad_norm": 0.2681577205657959,
"learning_rate": 4.827246793127639e-05,
"loss": 0.0765,
"step": 10710
},
{
"epoch": 31.07246376811594,
"grad_norm": 0.5375443696975708,
"learning_rate": 4.818984619988807e-05,
"loss": 0.0977,
"step": 10720
},
{
"epoch": 31.10144927536232,
"grad_norm": 0.33494704961776733,
"learning_rate": 4.810722941739115e-05,
"loss": 0.0857,
"step": 10730
},
{
"epoch": 31.130434782608695,
"grad_norm": 0.44509807229042053,
"learning_rate": 4.8024617809656684e-05,
"loss": 0.0814,
"step": 10740
},
{
"epoch": 31.159420289855074,
"grad_norm": 0.42321598529815674,
"learning_rate": 4.794201160254171e-05,
"loss": 0.0832,
"step": 10750
},
{
"epoch": 31.18840579710145,
"grad_norm": 0.41145583987236023,
"learning_rate": 4.785941102188844e-05,
"loss": 0.101,
"step": 10760
},
{
"epoch": 31.217391304347824,
"grad_norm": 0.23340976238250732,
"learning_rate": 4.7776816293523686e-05,
"loss": 0.0987,
"step": 10770
},
{
"epoch": 31.246376811594203,
"grad_norm": 0.355365514755249,
"learning_rate": 4.769422764325832e-05,
"loss": 0.1148,
"step": 10780
},
{
"epoch": 31.27536231884058,
"grad_norm": 0.3993210792541504,
"learning_rate": 4.76116452968865e-05,
"loss": 0.0802,
"step": 10790
},
{
"epoch": 31.304347826086957,
"grad_norm": 0.23266702890396118,
"learning_rate": 4.752906948018525e-05,
"loss": 0.0755,
"step": 10800
},
{
"epoch": 31.333333333333332,
"grad_norm": 0.2677353024482727,
"learning_rate": 4.7446500418913684e-05,
"loss": 0.0754,
"step": 10810
},
{
"epoch": 31.36231884057971,
"grad_norm": 0.39404717087745667,
"learning_rate": 4.736393833881247e-05,
"loss": 0.0813,
"step": 10820
},
{
"epoch": 31.391304347826086,
"grad_norm": 0.39271312952041626,
"learning_rate": 4.7281383465603194e-05,
"loss": 0.0935,
"step": 10830
},
{
"epoch": 31.420289855072465,
"grad_norm": 0.40351206064224243,
"learning_rate": 4.71988360249877e-05,
"loss": 0.0677,
"step": 10840
},
{
"epoch": 31.44927536231884,
"grad_norm": 0.40005189180374146,
"learning_rate": 4.7116296242647554e-05,
"loss": 0.1069,
"step": 10850
},
{
"epoch": 31.47826086956522,
"grad_norm": 0.3219447433948517,
"learning_rate": 4.703376434424336e-05,
"loss": 0.0806,
"step": 10860
},
{
"epoch": 31.507246376811594,
"grad_norm": 0.4746580123901367,
"learning_rate": 4.695124055541421e-05,
"loss": 0.0851,
"step": 10870
},
{
"epoch": 31.536231884057973,
"grad_norm": 0.3610043525695801,
"learning_rate": 4.6868725101776934e-05,
"loss": 0.1042,
"step": 10880
},
{
"epoch": 31.565217391304348,
"grad_norm": 0.3520298898220062,
"learning_rate": 4.678621820892567e-05,
"loss": 0.0718,
"step": 10890
},
{
"epoch": 31.594202898550726,
"grad_norm": 0.4144718050956726,
"learning_rate": 4.670372010243111e-05,
"loss": 0.0957,
"step": 10900
},
{
"epoch": 31.6231884057971,
"grad_norm": 0.35748976469039917,
"learning_rate": 4.662123100783992e-05,
"loss": 0.0911,
"step": 10910
},
{
"epoch": 31.652173913043477,
"grad_norm": 0.21652653813362122,
"learning_rate": 4.653875115067415e-05,
"loss": 0.0731,
"step": 10920
},
{
"epoch": 31.681159420289855,
"grad_norm": 0.3723653256893158,
"learning_rate": 4.6456280756430545e-05,
"loss": 0.0888,
"step": 10930
},
{
"epoch": 31.71014492753623,
"grad_norm": 0.4222668409347534,
"learning_rate": 4.637382005058004e-05,
"loss": 0.1013,
"step": 10940
},
{
"epoch": 31.73913043478261,
"grad_norm": 0.4263753294944763,
"learning_rate": 4.629136925856705e-05,
"loss": 0.0847,
"step": 10950
},
{
"epoch": 31.768115942028984,
"grad_norm": 0.3686303198337555,
"learning_rate": 4.6208928605808895e-05,
"loss": 0.0952,
"step": 10960
},
{
"epoch": 31.797101449275363,
"grad_norm": 0.4002050459384918,
"learning_rate": 4.612649831769519e-05,
"loss": 0.0825,
"step": 10970
},
{
"epoch": 31.82608695652174,
"grad_norm": 0.2441813200712204,
"learning_rate": 4.604407861958715e-05,
"loss": 0.0955,
"step": 10980
},
{
"epoch": 31.855072463768117,
"grad_norm": 0.30742359161376953,
"learning_rate": 4.5961669736817114e-05,
"loss": 0.0881,
"step": 10990
},
{
"epoch": 31.884057971014492,
"grad_norm": 0.23788172006607056,
"learning_rate": 4.5879271894687814e-05,
"loss": 0.0935,
"step": 11000
},
{
"epoch": 31.91304347826087,
"grad_norm": 0.3632306158542633,
"learning_rate": 4.5796885318471826e-05,
"loss": 0.0848,
"step": 11010
},
{
"epoch": 31.942028985507246,
"grad_norm": 0.39893579483032227,
"learning_rate": 4.571451023341086e-05,
"loss": 0.1019,
"step": 11020
},
{
"epoch": 31.971014492753625,
"grad_norm": 0.4167952835559845,
"learning_rate": 4.563214686471527e-05,
"loss": 0.0897,
"step": 11030
},
{
"epoch": 32.0,
"grad_norm": 0.3372804820537567,
"learning_rate": 4.5549795437563365e-05,
"loss": 0.0789,
"step": 11040
},
{
"epoch": 32.028985507246375,
"grad_norm": 0.3572154939174652,
"learning_rate": 4.546745617710081e-05,
"loss": 0.0829,
"step": 11050
},
{
"epoch": 32.05797101449275,
"grad_norm": 0.32375410199165344,
"learning_rate": 4.5385129308440014e-05,
"loss": 0.0787,
"step": 11060
},
{
"epoch": 32.08695652173913,
"grad_norm": 0.3995456099510193,
"learning_rate": 4.530281505665944e-05,
"loss": 0.1026,
"step": 11070
},
{
"epoch": 32.11594202898551,
"grad_norm": 0.4150542616844177,
"learning_rate": 4.5220513646803134e-05,
"loss": 0.0941,
"step": 11080
},
{
"epoch": 32.14492753623188,
"grad_norm": 0.32070857286453247,
"learning_rate": 4.513822530388003e-05,
"loss": 0.0693,
"step": 11090
},
{
"epoch": 32.17391304347826,
"grad_norm": 0.36070406436920166,
"learning_rate": 4.5055950252863296e-05,
"loss": 0.0719,
"step": 11100
},
{
"epoch": 32.20289855072464,
"grad_norm": 0.4651089906692505,
"learning_rate": 4.4973688718689803e-05,
"loss": 0.0776,
"step": 11110
},
{
"epoch": 32.231884057971016,
"grad_norm": 0.39821431040763855,
"learning_rate": 4.4891440926259406e-05,
"loss": 0.0979,
"step": 11120
},
{
"epoch": 32.26086956521739,
"grad_norm": 0.3794202506542206,
"learning_rate": 4.480920710043443e-05,
"loss": 0.1005,
"step": 11130
},
{
"epoch": 32.289855072463766,
"grad_norm": 0.5193749070167542,
"learning_rate": 4.4726987466039044e-05,
"loss": 0.0971,
"step": 11140
},
{
"epoch": 32.31884057971015,
"grad_norm": 0.2910986840724945,
"learning_rate": 4.46447822478586e-05,
"loss": 0.079,
"step": 11150
},
{
"epoch": 32.34782608695652,
"grad_norm": 0.3999570310115814,
"learning_rate": 4.4562591670638974e-05,
"loss": 0.0967,
"step": 11160
},
{
"epoch": 32.3768115942029,
"grad_norm": 0.33184731006622314,
"learning_rate": 4.4480415959086105e-05,
"loss": 0.0931,
"step": 11170
},
{
"epoch": 32.405797101449274,
"grad_norm": 0.3531089723110199,
"learning_rate": 4.439825533786522e-05,
"loss": 0.0847,
"step": 11180
},
{
"epoch": 32.43478260869565,
"grad_norm": 0.45204806327819824,
"learning_rate": 4.431611003160035e-05,
"loss": 0.0856,
"step": 11190
},
{
"epoch": 32.46376811594203,
"grad_norm": 0.328259140253067,
"learning_rate": 4.4233980264873636e-05,
"loss": 0.0916,
"step": 11200
},
{
"epoch": 32.492753623188406,
"grad_norm": 0.30385860800743103,
"learning_rate": 4.4151866262224684e-05,
"loss": 0.0831,
"step": 11210
},
{
"epoch": 32.52173913043478,
"grad_norm": 0.34350085258483887,
"learning_rate": 4.406976824815006e-05,
"loss": 0.0829,
"step": 11220
},
{
"epoch": 32.55072463768116,
"grad_norm": 0.381274551153183,
"learning_rate": 4.3987686447102595e-05,
"loss": 0.0889,
"step": 11230
},
{
"epoch": 32.57971014492754,
"grad_norm": 0.4919489920139313,
"learning_rate": 4.3905621083490804e-05,
"loss": 0.0786,
"step": 11240
},
{
"epoch": 32.608695652173914,
"grad_norm": 0.4313332438468933,
"learning_rate": 4.3823572381678286e-05,
"loss": 0.0832,
"step": 11250
},
{
"epoch": 32.63768115942029,
"grad_norm": 0.3867364823818207,
"learning_rate": 4.374154056598301e-05,
"loss": 0.0911,
"step": 11260
},
{
"epoch": 32.666666666666664,
"grad_norm": 0.4290856719017029,
"learning_rate": 4.3659525860676845e-05,
"loss": 0.0818,
"step": 11270
},
{
"epoch": 32.69565217391305,
"grad_norm": 0.2989586591720581,
"learning_rate": 4.3577528489984854e-05,
"loss": 0.0816,
"step": 11280
},
{
"epoch": 32.72463768115942,
"grad_norm": 0.3265022039413452,
"learning_rate": 4.349554867808476e-05,
"loss": 0.077,
"step": 11290
},
{
"epoch": 32.7536231884058,
"grad_norm": 0.5287574529647827,
"learning_rate": 4.34135866491062e-05,
"loss": 0.0736,
"step": 11300
},
{
"epoch": 32.78260869565217,
"grad_norm": 0.4195975661277771,
"learning_rate": 4.333164262713022e-05,
"loss": 0.0734,
"step": 11310
},
{
"epoch": 32.81159420289855,
"grad_norm": 0.27101531624794006,
"learning_rate": 4.324971683618868e-05,
"loss": 0.0776,
"step": 11320
},
{
"epoch": 32.84057971014493,
"grad_norm": 0.28514423966407776,
"learning_rate": 4.316780950026354e-05,
"loss": 0.0958,
"step": 11330
},
{
"epoch": 32.869565217391305,
"grad_norm": 0.45822855830192566,
"learning_rate": 4.308592084328637e-05,
"loss": 0.0972,
"step": 11340
},
{
"epoch": 32.89855072463768,
"grad_norm": 0.4056869149208069,
"learning_rate": 4.3004051089137576e-05,
"loss": 0.0871,
"step": 11350
},
{
"epoch": 32.927536231884055,
"grad_norm": 0.4822801649570465,
"learning_rate": 4.292220046164597e-05,
"loss": 0.0781,
"step": 11360
},
{
"epoch": 32.95652173913044,
"grad_norm": 0.4903472661972046,
"learning_rate": 4.2840369184588035e-05,
"loss": 0.1022,
"step": 11370
},
{
"epoch": 32.98550724637681,
"grad_norm": 0.2708165645599365,
"learning_rate": 4.2758557481687345e-05,
"loss": 0.0724,
"step": 11380
},
{
"epoch": 33.01449275362319,
"grad_norm": 0.46164244413375854,
"learning_rate": 4.267676557661403e-05,
"loss": 0.063,
"step": 11390
},
{
"epoch": 33.04347826086956,
"grad_norm": 0.3026619553565979,
"learning_rate": 4.2594993692983955e-05,
"loss": 0.0824,
"step": 11400
},
{
"epoch": 33.072463768115945,
"grad_norm": 0.28057217597961426,
"learning_rate": 4.251324205435837e-05,
"loss": 0.089,
"step": 11410
},
{
"epoch": 33.10144927536232,
"grad_norm": 0.18814432621002197,
"learning_rate": 4.243151088424312e-05,
"loss": 0.0838,
"step": 11420
},
{
"epoch": 33.130434782608695,
"grad_norm": 0.3627355098724365,
"learning_rate": 4.234980040608813e-05,
"loss": 0.0754,
"step": 11430
},
{
"epoch": 33.15942028985507,
"grad_norm": 0.3194730877876282,
"learning_rate": 4.22681108432867e-05,
"loss": 0.0857,
"step": 11440
},
{
"epoch": 33.18840579710145,
"grad_norm": 0.387783020734787,
"learning_rate": 4.2186442419174984e-05,
"loss": 0.0851,
"step": 11450
},
{
"epoch": 33.21739130434783,
"grad_norm": 0.34020793437957764,
"learning_rate": 4.210479535703133e-05,
"loss": 0.0821,
"step": 11460
},
{
"epoch": 33.2463768115942,
"grad_norm": 0.48423564434051514,
"learning_rate": 4.202316988007567e-05,
"loss": 0.0985,
"step": 11470
},
{
"epoch": 33.27536231884058,
"grad_norm": 0.4145282506942749,
"learning_rate": 4.194156621146901e-05,
"loss": 0.0704,
"step": 11480
},
{
"epoch": 33.30434782608695,
"grad_norm": 0.602695643901825,
"learning_rate": 4.1859984574312596e-05,
"loss": 0.0846,
"step": 11490
},
{
"epoch": 33.333333333333336,
"grad_norm": 0.23501792550086975,
"learning_rate": 4.177842519164752e-05,
"loss": 0.0817,
"step": 11500
},
{
"epoch": 33.36231884057971,
"grad_norm": 0.43396809697151184,
"learning_rate": 4.169688828645404e-05,
"loss": 0.103,
"step": 11510
},
{
"epoch": 33.391304347826086,
"grad_norm": 0.4772212505340576,
"learning_rate": 4.161537408165092e-05,
"loss": 0.0721,
"step": 11520
},
{
"epoch": 33.42028985507246,
"grad_norm": 0.3769497573375702,
"learning_rate": 4.1533882800094924e-05,
"loss": 0.1031,
"step": 11530
},
{
"epoch": 33.44927536231884,
"grad_norm": 0.4361927807331085,
"learning_rate": 4.145241466458005e-05,
"loss": 0.0785,
"step": 11540
},
{
"epoch": 33.47826086956522,
"grad_norm": 0.47106435894966125,
"learning_rate": 4.13709698978371e-05,
"loss": 0.0805,
"step": 11550
},
{
"epoch": 33.507246376811594,
"grad_norm": 0.19365593791007996,
"learning_rate": 4.1289548722532944e-05,
"loss": 0.0749,
"step": 11560
},
{
"epoch": 33.53623188405797,
"grad_norm": 0.47546547651290894,
"learning_rate": 4.120815136126999e-05,
"loss": 0.0852,
"step": 11570
},
{
"epoch": 33.56521739130435,
"grad_norm": 0.41180577874183655,
"learning_rate": 4.112677803658548e-05,
"loss": 0.0806,
"step": 11580
},
{
"epoch": 33.594202898550726,
"grad_norm": 0.2787127196788788,
"learning_rate": 4.1045428970951e-05,
"loss": 0.0899,
"step": 11590
},
{
"epoch": 33.6231884057971,
"grad_norm": 0.3546220660209656,
"learning_rate": 4.0964104386771785e-05,
"loss": 0.0813,
"step": 11600
},
{
"epoch": 33.65217391304348,
"grad_norm": 0.4572994112968445,
"learning_rate": 4.0882804506386144e-05,
"loss": 0.09,
"step": 11610
},
{
"epoch": 33.68115942028985,
"grad_norm": 0.33741870522499084,
"learning_rate": 4.080152955206485e-05,
"loss": 0.074,
"step": 11620
},
{
"epoch": 33.710144927536234,
"grad_norm": 0.24237462878227234,
"learning_rate": 4.0720279746010505e-05,
"loss": 0.0767,
"step": 11630
},
{
"epoch": 33.73913043478261,
"grad_norm": 0.37967872619628906,
"learning_rate": 4.063905531035699e-05,
"loss": 0.0715,
"step": 11640
},
{
"epoch": 33.768115942028984,
"grad_norm": 0.25618433952331543,
"learning_rate": 4.055785646716882e-05,
"loss": 0.0743,
"step": 11650
},
{
"epoch": 33.79710144927536,
"grad_norm": 0.3028956949710846,
"learning_rate": 4.047668343844051e-05,
"loss": 0.0948,
"step": 11660
},
{
"epoch": 33.82608695652174,
"grad_norm": 0.28945979475975037,
"learning_rate": 4.039553644609604e-05,
"loss": 0.0783,
"step": 11670
},
{
"epoch": 33.85507246376812,
"grad_norm": 0.4274953603744507,
"learning_rate": 4.0314415711988176e-05,
"loss": 0.0846,
"step": 11680
},
{
"epoch": 33.88405797101449,
"grad_norm": 0.4359511137008667,
"learning_rate": 4.023332145789792e-05,
"loss": 0.0772,
"step": 11690
},
{
"epoch": 33.91304347826087,
"grad_norm": 0.2297302633523941,
"learning_rate": 4.015225390553385e-05,
"loss": 0.0663,
"step": 11700
},
{
"epoch": 33.94202898550725,
"grad_norm": 0.4641404449939728,
"learning_rate": 4.007121327653158e-05,
"loss": 0.0822,
"step": 11710
},
{
"epoch": 33.971014492753625,
"grad_norm": 0.3523867726325989,
"learning_rate": 3.9990199792453064e-05,
"loss": 0.0897,
"step": 11720
},
{
"epoch": 34.0,
"grad_norm": 0.6376750469207764,
"learning_rate": 3.9909213674786103e-05,
"loss": 0.082,
"step": 11730
},
{
"epoch": 34.028985507246375,
"grad_norm": 0.4435945749282837,
"learning_rate": 3.982825514494363e-05,
"loss": 0.0849,
"step": 11740
},
{
"epoch": 34.05797101449275,
"grad_norm": 0.3722585439682007,
"learning_rate": 3.974732442426319e-05,
"loss": 0.0991,
"step": 11750
},
{
"epoch": 34.08695652173913,
"grad_norm": 0.4315265715122223,
"learning_rate": 3.966642173400629e-05,
"loss": 0.0878,
"step": 11760
},
{
"epoch": 34.11594202898551,
"grad_norm": 0.45117440819740295,
"learning_rate": 3.9585547295357764e-05,
"loss": 0.0948,
"step": 11770
},
{
"epoch": 34.14492753623188,
"grad_norm": 0.4286547005176544,
"learning_rate": 3.950470132942526e-05,
"loss": 0.082,
"step": 11780
},
{
"epoch": 34.17391304347826,
"grad_norm": 0.34111738204956055,
"learning_rate": 3.942388405723856e-05,
"loss": 0.073,
"step": 11790
},
{
"epoch": 34.20289855072464,
"grad_norm": 0.24257983267307281,
"learning_rate": 3.9343095699749e-05,
"loss": 0.0707,
"step": 11800
},
{
"epoch": 34.231884057971016,
"grad_norm": 0.40664252638816833,
"learning_rate": 3.9262336477828874e-05,
"loss": 0.1052,
"step": 11810
},
{
"epoch": 34.26086956521739,
"grad_norm": 0.2790059745311737,
"learning_rate": 3.9181606612270794e-05,
"loss": 0.0863,
"step": 11820
},
{
"epoch": 34.289855072463766,
"grad_norm": 0.3338426351547241,
"learning_rate": 3.910090632378713e-05,
"loss": 0.1013,
"step": 11830
},
{
"epoch": 34.31884057971015,
"grad_norm": 0.3759063184261322,
"learning_rate": 3.90202358330094e-05,
"loss": 0.089,
"step": 11840
},
{
"epoch": 34.34782608695652,
"grad_norm": 0.3479987680912018,
"learning_rate": 3.8939595360487656e-05,
"loss": 0.0699,
"step": 11850
},
{
"epoch": 34.3768115942029,
"grad_norm": 0.42943084239959717,
"learning_rate": 3.885898512668984e-05,
"loss": 0.1114,
"step": 11860
},
{
"epoch": 34.405797101449274,
"grad_norm": 0.2323223501443863,
"learning_rate": 3.877840535200127e-05,
"loss": 0.0878,
"step": 11870
},
{
"epoch": 34.43478260869565,
"grad_norm": 0.5184713006019592,
"learning_rate": 3.869785625672397e-05,
"loss": 0.0974,
"step": 11880
},
{
"epoch": 34.46376811594203,
"grad_norm": 0.267502099275589,
"learning_rate": 3.8617338061076094e-05,
"loss": 0.0563,
"step": 11890
},
{
"epoch": 34.492753623188406,
"grad_norm": 0.42632079124450684,
"learning_rate": 3.853685098519132e-05,
"loss": 0.0739,
"step": 11900
},
{
"epoch": 34.52173913043478,
"grad_norm": 0.30418580770492554,
"learning_rate": 3.845639524911823e-05,
"loss": 0.0976,
"step": 11910
},
{
"epoch": 34.55072463768116,
"grad_norm": 0.38783854246139526,
"learning_rate": 3.837597107281974e-05,
"loss": 0.0738,
"step": 11920
},
{
"epoch": 34.57971014492754,
"grad_norm": 0.19843190908432007,
"learning_rate": 3.829557867617247e-05,
"loss": 0.0796,
"step": 11930
},
{
"epoch": 34.608695652173914,
"grad_norm": 0.3146209120750427,
"learning_rate": 3.821521827896618e-05,
"loss": 0.0826,
"step": 11940
},
{
"epoch": 34.63768115942029,
"grad_norm": 0.42972853779792786,
"learning_rate": 3.81348901009031e-05,
"loss": 0.0984,
"step": 11950
},
{
"epoch": 34.666666666666664,
"grad_norm": 0.28957119584083557,
"learning_rate": 3.805459436159741e-05,
"loss": 0.0714,
"step": 11960
},
{
"epoch": 34.69565217391305,
"grad_norm": 0.3170105814933777,
"learning_rate": 3.797433128057461e-05,
"loss": 0.0817,
"step": 11970
},
{
"epoch": 34.72463768115942,
"grad_norm": 0.443141907453537,
"learning_rate": 3.789410107727089e-05,
"loss": 0.0931,
"step": 11980
},
{
"epoch": 34.7536231884058,
"grad_norm": 0.4638511538505554,
"learning_rate": 3.781390397103257e-05,
"loss": 0.0917,
"step": 11990
},
{
"epoch": 34.78260869565217,
"grad_norm": 0.5074764490127563,
"learning_rate": 3.7733740181115455e-05,
"loss": 0.0919,
"step": 12000
},
{
"epoch": 34.81159420289855,
"grad_norm": 0.32013916969299316,
"learning_rate": 3.7653609926684306e-05,
"loss": 0.0784,
"step": 12010
},
{
"epoch": 34.84057971014493,
"grad_norm": 0.29025906324386597,
"learning_rate": 3.757351342681217e-05,
"loss": 0.0751,
"step": 12020
},
{
"epoch": 34.869565217391305,
"grad_norm": 0.33754485845565796,
"learning_rate": 3.749345090047982e-05,
"loss": 0.082,
"step": 12030
},
{
"epoch": 34.89855072463768,
"grad_norm": 0.2577219307422638,
"learning_rate": 3.741342256657515e-05,
"loss": 0.083,
"step": 12040
},
{
"epoch": 34.927536231884055,
"grad_norm": 0.4835989773273468,
"learning_rate": 3.7333428643892567e-05,
"loss": 0.096,
"step": 12050
},
{
"epoch": 34.95652173913044,
"grad_norm": 0.5097367763519287,
"learning_rate": 3.725346935113239e-05,
"loss": 0.0939,
"step": 12060
},
{
"epoch": 34.98550724637681,
"grad_norm": 0.47239720821380615,
"learning_rate": 3.717354490690029e-05,
"loss": 0.0732,
"step": 12070
},
{
"epoch": 35.01449275362319,
"grad_norm": 0.36919161677360535,
"learning_rate": 3.709365552970664e-05,
"loss": 0.0824,
"step": 12080
},
{
"epoch": 35.04347826086956,
"grad_norm": 0.3409859836101532,
"learning_rate": 3.7013801437965945e-05,
"loss": 0.0803,
"step": 12090
},
{
"epoch": 35.072463768115945,
"grad_norm": 0.3615312874317169,
"learning_rate": 3.693398284999623e-05,
"loss": 0.0951,
"step": 12100
},
{
"epoch": 35.10144927536232,
"grad_norm": 0.5234674215316772,
"learning_rate": 3.6854199984018484e-05,
"loss": 0.0834,
"step": 12110
},
{
"epoch": 35.130434782608695,
"grad_norm": 0.2838694155216217,
"learning_rate": 3.677445305815601e-05,
"loss": 0.091,
"step": 12120
},
{
"epoch": 35.15942028985507,
"grad_norm": 0.5254635810852051,
"learning_rate": 3.669474229043387e-05,
"loss": 0.0929,
"step": 12130
},
{
"epoch": 35.18840579710145,
"grad_norm": 0.32632967829704285,
"learning_rate": 3.6615067898778235e-05,
"loss": 0.0873,
"step": 12140
},
{
"epoch": 35.21739130434783,
"grad_norm": 0.3260731101036072,
"learning_rate": 3.6535430101015866e-05,
"loss": 0.054,
"step": 12150
},
{
"epoch": 35.2463768115942,
"grad_norm": 0.3727055788040161,
"learning_rate": 3.645582911487345e-05,
"loss": 0.0738,
"step": 12160
},
{
"epoch": 35.27536231884058,
"grad_norm": 0.27279332280158997,
"learning_rate": 3.637626515797706e-05,
"loss": 0.0718,
"step": 12170
},
{
"epoch": 35.30434782608695,
"grad_norm": 0.4319758415222168,
"learning_rate": 3.629673844785152e-05,
"loss": 0.0754,
"step": 12180
},
{
"epoch": 35.333333333333336,
"grad_norm": 0.49372681975364685,
"learning_rate": 3.621724920191979e-05,
"loss": 0.0778,
"step": 12190
},
{
"epoch": 35.36231884057971,
"grad_norm": 0.27620404958724976,
"learning_rate": 3.6137797637502444e-05,
"loss": 0.0776,
"step": 12200
},
{
"epoch": 35.391304347826086,
"grad_norm": 0.4745093286037445,
"learning_rate": 3.6058383971817035e-05,
"loss": 0.091,
"step": 12210
},
{
"epoch": 35.42028985507246,
"grad_norm": 0.49664023518562317,
"learning_rate": 3.59790084219775e-05,
"loss": 0.0892,
"step": 12220
},
{
"epoch": 35.44927536231884,
"grad_norm": 0.30979496240615845,
"learning_rate": 3.589967120499353e-05,
"loss": 0.074,
"step": 12230
},
{
"epoch": 35.47826086956522,
"grad_norm": 0.460953950881958,
"learning_rate": 3.5820372537770075e-05,
"loss": 0.08,
"step": 12240
},
{
"epoch": 35.507246376811594,
"grad_norm": 0.31548449397087097,
"learning_rate": 3.5741112637106655e-05,
"loss": 0.0892,
"step": 12250
},
{
"epoch": 35.53623188405797,
"grad_norm": 0.38868752121925354,
"learning_rate": 3.5661891719696804e-05,
"loss": 0.0803,
"step": 12260
},
{
"epoch": 35.56521739130435,
"grad_norm": 0.39552441239356995,
"learning_rate": 3.5582710002127504e-05,
"loss": 0.0709,
"step": 12270
},
{
"epoch": 35.594202898550726,
"grad_norm": 0.3134962022304535,
"learning_rate": 3.550356770087853e-05,
"loss": 0.0835,
"step": 12280
},
{
"epoch": 35.6231884057971,
"grad_norm": 0.42194268107414246,
"learning_rate": 3.5424465032321914e-05,
"loss": 0.076,
"step": 12290
},
{
"epoch": 35.65217391304348,
"grad_norm": 0.44927000999450684,
"learning_rate": 3.5345402212721335e-05,
"loss": 0.1047,
"step": 12300
},
{
"epoch": 35.68115942028985,
"grad_norm": 0.4046900272369385,
"learning_rate": 3.526637945823152e-05,
"loss": 0.0871,
"step": 12310
},
{
"epoch": 35.710144927536234,
"grad_norm": 0.34118810296058655,
"learning_rate": 3.518739698489767e-05,
"loss": 0.076,
"step": 12320
},
{
"epoch": 35.73913043478261,
"grad_norm": 0.1889665573835373,
"learning_rate": 3.510845500865485e-05,
"loss": 0.078,
"step": 12330
},
{
"epoch": 35.768115942028984,
"grad_norm": 0.25734132528305054,
"learning_rate": 3.502955374532739e-05,
"loss": 0.0808,
"step": 12340
},
{
"epoch": 35.79710144927536,
"grad_norm": 0.4329688549041748,
"learning_rate": 3.495069341062836e-05,
"loss": 0.0949,
"step": 12350
},
{
"epoch": 35.82608695652174,
"grad_norm": 0.4507119655609131,
"learning_rate": 3.4871874220158896e-05,
"loss": 0.0868,
"step": 12360
},
{
"epoch": 35.85507246376812,
"grad_norm": 0.42284590005874634,
"learning_rate": 3.479309638940762e-05,
"loss": 0.0928,
"step": 12370
},
{
"epoch": 35.88405797101449,
"grad_norm": 0.31752341985702515,
"learning_rate": 3.4714360133750146e-05,
"loss": 0.0824,
"step": 12380
},
{
"epoch": 35.91304347826087,
"grad_norm": 0.31320276856422424,
"learning_rate": 3.463566566844839e-05,
"loss": 0.0768,
"step": 12390
},
{
"epoch": 35.94202898550725,
"grad_norm": 0.46019718050956726,
"learning_rate": 3.4557013208650016e-05,
"loss": 0.0783,
"step": 12400
},
{
"epoch": 35.971014492753625,
"grad_norm": 0.3470844626426697,
"learning_rate": 3.4478402969387857e-05,
"loss": 0.0874,
"step": 12410
},
{
"epoch": 36.0,
"grad_norm": 1.3857176303863525,
"learning_rate": 3.4399835165579266e-05,
"loss": 0.0754,
"step": 12420
},
{
"epoch": 36.028985507246375,
"grad_norm": 0.3289060592651367,
"learning_rate": 3.4321310012025645e-05,
"loss": 0.0942,
"step": 12430
},
{
"epoch": 36.05797101449275,
"grad_norm": 0.2967238426208496,
"learning_rate": 3.424282772341176e-05,
"loss": 0.076,
"step": 12440
},
{
"epoch": 36.08695652173913,
"grad_norm": 0.3292827308177948,
"learning_rate": 3.416438851430519e-05,
"loss": 0.0995,
"step": 12450
},
{
"epoch": 36.11594202898551,
"grad_norm": 0.3444810211658478,
"learning_rate": 3.408599259915577e-05,
"loss": 0.0739,
"step": 12460
},
{
"epoch": 36.14492753623188,
"grad_norm": 0.40988513827323914,
"learning_rate": 3.400764019229487e-05,
"loss": 0.0793,
"step": 12470
},
{
"epoch": 36.17391304347826,
"grad_norm": 0.37536290287971497,
"learning_rate": 3.3929331507935035e-05,
"loss": 0.0983,
"step": 12480
},
{
"epoch": 36.20289855072464,
"grad_norm": 0.45117539167404175,
"learning_rate": 3.3851066760169196e-05,
"loss": 0.0981,
"step": 12490
},
{
"epoch": 36.231884057971016,
"grad_norm": 0.4401688575744629,
"learning_rate": 3.377284616297021e-05,
"loss": 0.0702,
"step": 12500
},
{
"epoch": 36.26086956521739,
"grad_norm": 0.24332067370414734,
"learning_rate": 3.3694669930190166e-05,
"loss": 0.0741,
"step": 12510
},
{
"epoch": 36.289855072463766,
"grad_norm": 0.38454926013946533,
"learning_rate": 3.36165382755599e-05,
"loss": 0.0926,
"step": 12520
},
{
"epoch": 36.31884057971015,
"grad_norm": 0.35665246844291687,
"learning_rate": 3.35384514126884e-05,
"loss": 0.0686,
"step": 12530
},
{
"epoch": 36.34782608695652,
"grad_norm": 0.4824955463409424,
"learning_rate": 3.3460409555062154e-05,
"loss": 0.084,
"step": 12540
},
{
"epoch": 36.3768115942029,
"grad_norm": 0.4470244348049164,
"learning_rate": 3.3382412916044645e-05,
"loss": 0.1034,
"step": 12550
},
{
"epoch": 36.405797101449274,
"grad_norm": 0.3308650553226471,
"learning_rate": 3.330446170887566e-05,
"loss": 0.0708,
"step": 12560
},
{
"epoch": 36.43478260869565,
"grad_norm": 0.2681847810745239,
"learning_rate": 3.3226556146670834e-05,
"loss": 0.0748,
"step": 12570
},
{
"epoch": 36.46376811594203,
"grad_norm": 0.4676291048526764,
"learning_rate": 3.314869644242102e-05,
"loss": 0.0849,
"step": 12580
},
{
"epoch": 36.492753623188406,
"grad_norm": 0.468152791261673,
"learning_rate": 3.3070882808991674e-05,
"loss": 0.0726,
"step": 12590
},
{
"epoch": 36.52173913043478,
"grad_norm": 0.423662930727005,
"learning_rate": 3.2993115459122305e-05,
"loss": 0.0832,
"step": 12600
},
{
"epoch": 36.55072463768116,
"grad_norm": 0.4952705502510071,
"learning_rate": 3.2915394605425835e-05,
"loss": 0.086,
"step": 12610
},
{
"epoch": 36.57971014492754,
"grad_norm": 0.3361116945743561,
"learning_rate": 3.283772046038816e-05,
"loss": 0.0686,
"step": 12620
},
{
"epoch": 36.608695652173914,
"grad_norm": 0.35378262400627136,
"learning_rate": 3.276009323636739e-05,
"loss": 0.0956,
"step": 12630
},
{
"epoch": 36.63768115942029,
"grad_norm": 0.26826876401901245,
"learning_rate": 3.268251314559344e-05,
"loss": 0.0725,
"step": 12640
},
{
"epoch": 36.666666666666664,
"grad_norm": 0.4471190571784973,
"learning_rate": 3.2604980400167254e-05,
"loss": 0.0886,
"step": 12650
},
{
"epoch": 36.69565217391305,
"grad_norm": 0.26007452607154846,
"learning_rate": 3.252749521206042e-05,
"loss": 0.0736,
"step": 12660
},
{
"epoch": 36.72463768115942,
"grad_norm": 0.3644675016403198,
"learning_rate": 3.2450057793114494e-05,
"loss": 0.0859,
"step": 12670
},
{
"epoch": 36.7536231884058,
"grad_norm": 0.3555355966091156,
"learning_rate": 3.2372668355040435e-05,
"loss": 0.0952,
"step": 12680
},
{
"epoch": 36.78260869565217,
"grad_norm": 0.3508759140968323,
"learning_rate": 3.2295327109418005e-05,
"loss": 0.0761,
"step": 12690
},
{
"epoch": 36.81159420289855,
"grad_norm": 0.3372611999511719,
"learning_rate": 3.221803426769518e-05,
"loss": 0.1055,
"step": 12700
},
{
"epoch": 36.84057971014493,
"grad_norm": 0.45002785325050354,
"learning_rate": 3.214079004118768e-05,
"loss": 0.0677,
"step": 12710
},
{
"epoch": 36.869565217391305,
"grad_norm": 0.5220909118652344,
"learning_rate": 3.2063594641078234e-05,
"loss": 0.0679,
"step": 12720
},
{
"epoch": 36.89855072463768,
"grad_norm": 0.33023321628570557,
"learning_rate": 3.198644827841616e-05,
"loss": 0.0854,
"step": 12730
},
{
"epoch": 36.927536231884055,
"grad_norm": 0.37969428300857544,
"learning_rate": 3.1909351164116654e-05,
"loss": 0.0975,
"step": 12740
},
{
"epoch": 36.95652173913044,
"grad_norm": 0.39646878838539124,
"learning_rate": 3.183230350896026e-05,
"loss": 0.0651,
"step": 12750
},
{
"epoch": 36.98550724637681,
"grad_norm": 0.42903590202331543,
"learning_rate": 3.1755305523592337e-05,
"loss": 0.0964,
"step": 12760
},
{
"epoch": 37.01449275362319,
"grad_norm": 0.3350338339805603,
"learning_rate": 3.167835741852245e-05,
"loss": 0.0747,
"step": 12770
},
{
"epoch": 37.04347826086956,
"grad_norm": 0.5324596762657166,
"learning_rate": 3.160145940412378e-05,
"loss": 0.0865,
"step": 12780
},
{
"epoch": 37.072463768115945,
"grad_norm": 0.5436109900474548,
"learning_rate": 3.1524611690632545e-05,
"loss": 0.0853,
"step": 12790
},
{
"epoch": 37.10144927536232,
"grad_norm": 0.4058521091938019,
"learning_rate": 3.144781448814746e-05,
"loss": 0.0611,
"step": 12800
},
{
"epoch": 37.130434782608695,
"grad_norm": 0.222909078001976,
"learning_rate": 3.1371068006629145e-05,
"loss": 0.0849,
"step": 12810
},
{
"epoch": 37.15942028985507,
"grad_norm": 0.3150401711463928,
"learning_rate": 3.129437245589956e-05,
"loss": 0.0661,
"step": 12820
},
{
"epoch": 37.18840579710145,
"grad_norm": 0.5720604062080383,
"learning_rate": 3.121772804564143e-05,
"loss": 0.1058,
"step": 12830
},
{
"epoch": 37.21739130434783,
"grad_norm": 0.36148929595947266,
"learning_rate": 3.11411349853976e-05,
"loss": 0.0647,
"step": 12840
},
{
"epoch": 37.2463768115942,
"grad_norm": 0.4873165190219879,
"learning_rate": 3.10645934845706e-05,
"loss": 0.0919,
"step": 12850
},
{
"epoch": 37.27536231884058,
"grad_norm": 0.6560083627700806,
"learning_rate": 3.098810375242196e-05,
"loss": 0.0857,
"step": 12860
},
{
"epoch": 37.30434782608695,
"grad_norm": 0.37037011981010437,
"learning_rate": 3.0911665998071704e-05,
"loss": 0.084,
"step": 12870
},
{
"epoch": 37.333333333333336,
"grad_norm": 0.2736794650554657,
"learning_rate": 3.083528043049774e-05,
"loss": 0.0629,
"step": 12880
},
{
"epoch": 37.36231884057971,
"grad_norm": 0.39787065982818604,
"learning_rate": 3.0758947258535255e-05,
"loss": 0.0937,
"step": 12890
},
{
"epoch": 37.391304347826086,
"grad_norm": 0.2980014979839325,
"learning_rate": 3.068266669087625e-05,
"loss": 0.0747,
"step": 12900
},
{
"epoch": 37.42028985507246,
"grad_norm": 0.38902172446250916,
"learning_rate": 3.060643893606887e-05,
"loss": 0.0922,
"step": 12910
},
{
"epoch": 37.44927536231884,
"grad_norm": 0.412036269903183,
"learning_rate": 3.053026420251693e-05,
"loss": 0.0877,
"step": 12920
},
{
"epoch": 37.47826086956522,
"grad_norm": 0.36954089999198914,
"learning_rate": 3.0454142698479183e-05,
"loss": 0.1029,
"step": 12930
},
{
"epoch": 37.507246376811594,
"grad_norm": 0.521973192691803,
"learning_rate": 3.0378074632068954e-05,
"loss": 0.0682,
"step": 12940
},
{
"epoch": 37.53623188405797,
"grad_norm": 0.2521456182003021,
"learning_rate": 3.0302060211253408e-05,
"loss": 0.07,
"step": 12950
},
{
"epoch": 37.56521739130435,
"grad_norm": 0.4917527139186859,
"learning_rate": 3.0226099643853073e-05,
"loss": 0.0878,
"step": 12960
},
{
"epoch": 37.594202898550726,
"grad_norm": 0.339530348777771,
"learning_rate": 3.0150193137541283e-05,
"loss": 0.069,
"step": 12970
},
{
"epoch": 37.6231884057971,
"grad_norm": 0.3518831133842468,
"learning_rate": 3.0074340899843467e-05,
"loss": 0.0816,
"step": 12980
},
{
"epoch": 37.65217391304348,
"grad_norm": 0.4143315553665161,
"learning_rate": 2.999854313813677e-05,
"loss": 0.0988,
"step": 12990
},
{
"epoch": 37.68115942028985,
"grad_norm": 0.31359317898750305,
"learning_rate": 2.9922800059649382e-05,
"loss": 0.0671,
"step": 13000
},
{
"epoch": 37.710144927536234,
"grad_norm": 0.49539920687675476,
"learning_rate": 2.9847111871459976e-05,
"loss": 0.0752,
"step": 13010
},
{
"epoch": 37.73913043478261,
"grad_norm": 0.42408648133277893,
"learning_rate": 2.977147878049721e-05,
"loss": 0.0762,
"step": 13020
},
{
"epoch": 37.768115942028984,
"grad_norm": 0.5186890959739685,
"learning_rate": 2.9695900993539006e-05,
"loss": 0.0895,
"step": 13030
},
{
"epoch": 37.79710144927536,
"grad_norm": 0.46351712942123413,
"learning_rate": 2.9620378717212183e-05,
"loss": 0.1007,
"step": 13040
},
{
"epoch": 37.82608695652174,
"grad_norm": 0.6148757934570312,
"learning_rate": 2.9544912157991745e-05,
"loss": 0.0661,
"step": 13050
},
{
"epoch": 37.85507246376812,
"grad_norm": 0.43662676215171814,
"learning_rate": 2.9469501522200405e-05,
"loss": 0.0761,
"step": 13060
},
{
"epoch": 37.88405797101449,
"grad_norm": 0.4326452910900116,
"learning_rate": 2.9394147016007946e-05,
"loss": 0.0965,
"step": 13070
},
{
"epoch": 37.91304347826087,
"grad_norm": 0.5132485032081604,
"learning_rate": 2.9318848845430702e-05,
"loss": 0.0817,
"step": 13080
},
{
"epoch": 37.94202898550725,
"grad_norm": 0.4048340618610382,
"learning_rate": 2.9243607216331013e-05,
"loss": 0.0867,
"step": 13090
},
{
"epoch": 37.971014492753625,
"grad_norm": 0.5179027915000916,
"learning_rate": 2.916842233441661e-05,
"loss": 0.0914,
"step": 13100
},
{
"epoch": 38.0,
"grad_norm": 0.6405589580535889,
"learning_rate": 2.90932944052401e-05,
"loss": 0.0758,
"step": 13110
},
{
"epoch": 38.028985507246375,
"grad_norm": 0.3282417356967926,
"learning_rate": 2.9018223634198354e-05,
"loss": 0.0814,
"step": 13120
},
{
"epoch": 38.05797101449275,
"grad_norm": 0.25214284658432007,
"learning_rate": 2.8943210226532025e-05,
"loss": 0.0662,
"step": 13130
},
{
"epoch": 38.08695652173913,
"grad_norm": 0.6154152750968933,
"learning_rate": 2.8868254387324857e-05,
"loss": 0.0793,
"step": 13140
},
{
"epoch": 38.11594202898551,
"grad_norm": 0.4001002907752991,
"learning_rate": 2.8793356321503306e-05,
"loss": 0.0851,
"step": 13150
},
{
"epoch": 38.14492753623188,
"grad_norm": 0.2872644066810608,
"learning_rate": 2.87185162338358e-05,
"loss": 0.0664,
"step": 13160
},
{
"epoch": 38.17391304347826,
"grad_norm": 0.385065495967865,
"learning_rate": 2.8643734328932253e-05,
"loss": 0.077,
"step": 13170
},
{
"epoch": 38.20289855072464,
"grad_norm": 0.32745644450187683,
"learning_rate": 2.856901081124359e-05,
"loss": 0.0762,
"step": 13180
},
{
"epoch": 38.231884057971016,
"grad_norm": 0.3578251004219055,
"learning_rate": 2.8494345885061002e-05,
"loss": 0.0873,
"step": 13190
},
{
"epoch": 38.26086956521739,
"grad_norm": 0.4024776816368103,
"learning_rate": 2.8419739754515616e-05,
"loss": 0.0674,
"step": 13200
},
{
"epoch": 38.289855072463766,
"grad_norm": 0.23126451671123505,
"learning_rate": 2.8345192623577666e-05,
"loss": 0.096,
"step": 13210
},
{
"epoch": 38.31884057971015,
"grad_norm": 0.44609886407852173,
"learning_rate": 2.8270704696056193e-05,
"loss": 0.0924,
"step": 13220
},
{
"epoch": 38.34782608695652,
"grad_norm": 0.28004297614097595,
"learning_rate": 2.8196276175598367e-05,
"loss": 0.0824,
"step": 13230
},
{
"epoch": 38.3768115942029,
"grad_norm": 0.4256015419960022,
"learning_rate": 2.8121907265688884e-05,
"loss": 0.0793,
"step": 13240
},
{
"epoch": 38.405797101449274,
"grad_norm": 0.28294479846954346,
"learning_rate": 2.804759816964957e-05,
"loss": 0.0757,
"step": 13250
},
{
"epoch": 38.43478260869565,
"grad_norm": 0.36253151297569275,
"learning_rate": 2.797334909063857e-05,
"loss": 0.0638,
"step": 13260
},
{
"epoch": 38.46376811594203,
"grad_norm": 0.3807222247123718,
"learning_rate": 2.7899160231650056e-05,
"loss": 0.0824,
"step": 13270
},
{
"epoch": 38.492753623188406,
"grad_norm": 0.2997818887233734,
"learning_rate": 2.7825031795513585e-05,
"loss": 0.084,
"step": 13280
},
{
"epoch": 38.52173913043478,
"grad_norm": 0.24102069437503815,
"learning_rate": 2.775096398489341e-05,
"loss": 0.0893,
"step": 13290
},
{
"epoch": 38.55072463768116,
"grad_norm": 0.258094847202301,
"learning_rate": 2.7676957002288163e-05,
"loss": 0.0814,
"step": 13300
},
{
"epoch": 38.57971014492754,
"grad_norm": 0.4139418303966522,
"learning_rate": 2.760301105003003e-05,
"loss": 0.0803,
"step": 13310
},
{
"epoch": 38.608695652173914,
"grad_norm": 0.31138837337493896,
"learning_rate": 2.752912633028446e-05,
"loss": 0.0783,
"step": 13320
},
{
"epoch": 38.63768115942029,
"grad_norm": 0.4925903379917145,
"learning_rate": 2.7455303045049474e-05,
"loss": 0.0839,
"step": 13330
},
{
"epoch": 38.666666666666664,
"grad_norm": 0.3583664894104004,
"learning_rate": 2.7381541396155098e-05,
"loss": 0.071,
"step": 13340
},
{
"epoch": 38.69565217391305,
"grad_norm": 0.28774356842041016,
"learning_rate": 2.730784158526286e-05,
"loss": 0.0875,
"step": 13350
},
{
"epoch": 38.72463768115942,
"grad_norm": 0.43696558475494385,
"learning_rate": 2.723420381386521e-05,
"loss": 0.0782,
"step": 13360
},
{
"epoch": 38.7536231884058,
"grad_norm": 0.3710800111293793,
"learning_rate": 2.7160628283285018e-05,
"loss": 0.0719,
"step": 13370
},
{
"epoch": 38.78260869565217,
"grad_norm": 0.3696930408477783,
"learning_rate": 2.7087115194675007e-05,
"loss": 0.0656,
"step": 13380
},
{
"epoch": 38.81159420289855,
"grad_norm": 0.3197194039821625,
"learning_rate": 2.701366474901712e-05,
"loss": 0.0755,
"step": 13390
},
{
"epoch": 38.84057971014493,
"grad_norm": 0.3476333022117615,
"learning_rate": 2.6940277147122085e-05,
"loss": 0.0834,
"step": 13400
},
{
"epoch": 38.869565217391305,
"grad_norm": 0.3637937307357788,
"learning_rate": 2.686695258962878e-05,
"loss": 0.0745,
"step": 13410
},
{
"epoch": 38.89855072463768,
"grad_norm": 0.5231657028198242,
"learning_rate": 2.679369127700375e-05,
"loss": 0.0807,
"step": 13420
},
{
"epoch": 38.927536231884055,
"grad_norm": 0.35336682200431824,
"learning_rate": 2.672049340954067e-05,
"loss": 0.072,
"step": 13430
},
{
"epoch": 38.95652173913044,
"grad_norm": 0.5302248597145081,
"learning_rate": 2.6647359187359676e-05,
"loss": 0.0931,
"step": 13440
},
{
"epoch": 38.98550724637681,
"grad_norm": 0.4057472348213196,
"learning_rate": 2.6574288810406946e-05,
"loss": 0.0808,
"step": 13450
},
{
"epoch": 39.01449275362319,
"grad_norm": 0.40481290221214294,
"learning_rate": 2.6501282478454083e-05,
"loss": 0.0742,
"step": 13460
},
{
"epoch": 39.04347826086956,
"grad_norm": 0.5995214581489563,
"learning_rate": 2.6428340391097618e-05,
"loss": 0.0842,
"step": 13470
},
{
"epoch": 39.072463768115945,
"grad_norm": 0.46385887265205383,
"learning_rate": 2.6355462747758485e-05,
"loss": 0.0764,
"step": 13480
},
{
"epoch": 39.10144927536232,
"grad_norm": 0.21818841993808746,
"learning_rate": 2.6282649747681304e-05,
"loss": 0.0689,
"step": 13490
},
{
"epoch": 39.130434782608695,
"grad_norm": 0.24269723892211914,
"learning_rate": 2.620990158993406e-05,
"loss": 0.0674,
"step": 13500
},
{
"epoch": 39.15942028985507,
"grad_norm": 0.18235942721366882,
"learning_rate": 2.6137218473407477e-05,
"loss": 0.0781,
"step": 13510
},
{
"epoch": 39.18840579710145,
"grad_norm": 0.30598685145378113,
"learning_rate": 2.606460059681436e-05,
"loss": 0.0881,
"step": 13520
},
{
"epoch": 39.21739130434783,
"grad_norm": 0.3079904317855835,
"learning_rate": 2.599204815868928e-05,
"loss": 0.0796,
"step": 13530
},
{
"epoch": 39.2463768115942,
"grad_norm": 0.6565821170806885,
"learning_rate": 2.5919561357387756e-05,
"loss": 0.0723,
"step": 13540
},
{
"epoch": 39.27536231884058,
"grad_norm": 0.30793699622154236,
"learning_rate": 2.5847140391085972e-05,
"loss": 0.0741,
"step": 13550
},
{
"epoch": 39.30434782608695,
"grad_norm": 0.40903565287590027,
"learning_rate": 2.5774785457780103e-05,
"loss": 0.0895,
"step": 13560
},
{
"epoch": 39.333333333333336,
"grad_norm": 0.31521743535995483,
"learning_rate": 2.5702496755285753e-05,
"loss": 0.0635,
"step": 13570
},
{
"epoch": 39.36231884057971,
"grad_norm": 0.49470698833465576,
"learning_rate": 2.5630274481237483e-05,
"loss": 0.0812,
"step": 13580
},
{
"epoch": 39.391304347826086,
"grad_norm": 0.49831944704055786,
"learning_rate": 2.5558118833088197e-05,
"loss": 0.0764,
"step": 13590
},
{
"epoch": 39.42028985507246,
"grad_norm": 0.3357720971107483,
"learning_rate": 2.548603000810872e-05,
"loss": 0.0789,
"step": 13600
},
{
"epoch": 39.44927536231884,
"grad_norm": 0.49669551849365234,
"learning_rate": 2.5414008203387152e-05,
"loss": 0.0775,
"step": 13610
},
{
"epoch": 39.47826086956522,
"grad_norm": 0.45243167877197266,
"learning_rate": 2.534205361582834e-05,
"loss": 0.0931,
"step": 13620
},
{
"epoch": 39.507246376811594,
"grad_norm": 0.4529440701007843,
"learning_rate": 2.527016644215338e-05,
"loss": 0.0857,
"step": 13630
},
{
"epoch": 39.53623188405797,
"grad_norm": 0.3923579156398773,
"learning_rate": 2.519834687889905e-05,
"loss": 0.0907,
"step": 13640
},
{
"epoch": 39.56521739130435,
"grad_norm": 0.46026331186294556,
"learning_rate": 2.5126595122417295e-05,
"loss": 0.0653,
"step": 13650
},
{
"epoch": 39.594202898550726,
"grad_norm": 0.3783218264579773,
"learning_rate": 2.5054911368874713e-05,
"loss": 0.0894,
"step": 13660
},
{
"epoch": 39.6231884057971,
"grad_norm": 0.2474319487810135,
"learning_rate": 2.4983295814251916e-05,
"loss": 0.0855,
"step": 13670
},
{
"epoch": 39.65217391304348,
"grad_norm": 0.2657444179058075,
"learning_rate": 2.4911748654343105e-05,
"loss": 0.0811,
"step": 13680
},
{
"epoch": 39.68115942028985,
"grad_norm": 0.3964589238166809,
"learning_rate": 2.4840270084755463e-05,
"loss": 0.0719,
"step": 13690
},
{
"epoch": 39.710144927536234,
"grad_norm": 0.4461621046066284,
"learning_rate": 2.4768860300908685e-05,
"loss": 0.069,
"step": 13700
},
{
"epoch": 39.73913043478261,
"grad_norm": 0.32302120327949524,
"learning_rate": 2.469751949803443e-05,
"loss": 0.0827,
"step": 13710
},
{
"epoch": 39.768115942028984,
"grad_norm": 0.29357752203941345,
"learning_rate": 2.4626247871175666e-05,
"loss": 0.0605,
"step": 13720
},
{
"epoch": 39.79710144927536,
"grad_norm": 0.5546101331710815,
"learning_rate": 2.4555045615186346e-05,
"loss": 0.0806,
"step": 13730
},
{
"epoch": 39.82608695652174,
"grad_norm": 0.4854411482810974,
"learning_rate": 2.4483912924730677e-05,
"loss": 0.0825,
"step": 13740
},
{
"epoch": 39.85507246376812,
"grad_norm": 0.3219527304172516,
"learning_rate": 2.4412849994282742e-05,
"loss": 0.072,
"step": 13750
},
{
"epoch": 39.88405797101449,
"grad_norm": 0.3878593146800995,
"learning_rate": 2.434185701812592e-05,
"loss": 0.0763,
"step": 13760
},
{
"epoch": 39.91304347826087,
"grad_norm": 0.43568170070648193,
"learning_rate": 2.4270934190352218e-05,
"loss": 0.0837,
"step": 13770
},
{
"epoch": 39.94202898550725,
"grad_norm": 0.3280969560146332,
"learning_rate": 2.4200081704861998e-05,
"loss": 0.0852,
"step": 13780
},
{
"epoch": 39.971014492753625,
"grad_norm": 0.4428047239780426,
"learning_rate": 2.412929975536321e-05,
"loss": 0.0778,
"step": 13790
},
{
"epoch": 40.0,
"grad_norm": 1.2345435619354248,
"learning_rate": 2.4058588535371017e-05,
"loss": 0.0667,
"step": 13800
},
{
"epoch": 40.028985507246375,
"grad_norm": 0.3271735906600952,
"learning_rate": 2.3987948238207243e-05,
"loss": 0.0644,
"step": 13810
},
{
"epoch": 40.05797101449275,
"grad_norm": 0.37233906984329224,
"learning_rate": 2.3917379056999678e-05,
"loss": 0.0809,
"step": 13820
},
{
"epoch": 40.08695652173913,
"grad_norm": 0.34821170568466187,
"learning_rate": 2.3846881184681824e-05,
"loss": 0.0712,
"step": 13830
},
{
"epoch": 40.11594202898551,
"grad_norm": 0.47559064626693726,
"learning_rate": 2.377645481399214e-05,
"loss": 0.071,
"step": 13840
},
{
"epoch": 40.14492753623188,
"grad_norm": 0.40675798058509827,
"learning_rate": 2.3706100137473667e-05,
"loss": 0.0876,
"step": 13850
},
{
"epoch": 40.17391304347826,
"grad_norm": 0.33387240767478943,
"learning_rate": 2.3635817347473394e-05,
"loss": 0.0763,
"step": 13860
},
{
"epoch": 40.20289855072464,
"grad_norm": 0.38296619057655334,
"learning_rate": 2.3565606636141757e-05,
"loss": 0.0861,
"step": 13870
},
{
"epoch": 40.231884057971016,
"grad_norm": 0.35619163513183594,
"learning_rate": 2.3495468195432203e-05,
"loss": 0.0875,
"step": 13880
},
{
"epoch": 40.26086956521739,
"grad_norm": 0.38837236166000366,
"learning_rate": 2.3425402217100507e-05,
"loss": 0.076,
"step": 13890
},
{
"epoch": 40.289855072463766,
"grad_norm": 0.5374419093132019,
"learning_rate": 2.3355408892704424e-05,
"loss": 0.0872,
"step": 13900
},
{
"epoch": 40.31884057971015,
"grad_norm": 0.399505078792572,
"learning_rate": 2.3285488413603003e-05,
"loss": 0.0688,
"step": 13910
},
{
"epoch": 40.34782608695652,
"grad_norm": 0.41612479090690613,
"learning_rate": 2.321564097095615e-05,
"loss": 0.0845,
"step": 13920
},
{
"epoch": 40.3768115942029,
"grad_norm": 0.5337821841239929,
"learning_rate": 2.3145866755724142e-05,
"loss": 0.0841,
"step": 13930
},
{
"epoch": 40.405797101449274,
"grad_norm": 0.4810619652271271,
"learning_rate": 2.307616595866699e-05,
"loss": 0.0757,
"step": 13940
},
{
"epoch": 40.43478260869565,
"grad_norm": 0.7072311043739319,
"learning_rate": 2.3006538770344032e-05,
"loss": 0.0872,
"step": 13950
},
{
"epoch": 40.46376811594203,
"grad_norm": 0.4162898659706116,
"learning_rate": 2.293698538111334e-05,
"loss": 0.0834,
"step": 13960
},
{
"epoch": 40.492753623188406,
"grad_norm": 0.40901780128479004,
"learning_rate": 2.28675059811312e-05,
"loss": 0.0647,
"step": 13970
},
{
"epoch": 40.52173913043478,
"grad_norm": 0.32501867413520813,
"learning_rate": 2.279810076035167e-05,
"loss": 0.0776,
"step": 13980
},
{
"epoch": 40.55072463768116,
"grad_norm": 0.32248783111572266,
"learning_rate": 2.272876990852596e-05,
"loss": 0.0749,
"step": 13990
},
{
"epoch": 40.57971014492754,
"grad_norm": 0.36385807394981384,
"learning_rate": 2.265951361520195e-05,
"loss": 0.0853,
"step": 14000
},
{
"epoch": 40.608695652173914,
"grad_norm": 0.3925250470638275,
"learning_rate": 2.2590332069723748e-05,
"loss": 0.0877,
"step": 14010
},
{
"epoch": 40.63768115942029,
"grad_norm": 0.3202069401741028,
"learning_rate": 2.2521225461231004e-05,
"loss": 0.0622,
"step": 14020
},
{
"epoch": 40.666666666666664,
"grad_norm": 0.335860013961792,
"learning_rate": 2.2452193978658597e-05,
"loss": 0.0798,
"step": 14030
},
{
"epoch": 40.69565217391305,
"grad_norm": 0.2558061480522156,
"learning_rate": 2.238323781073594e-05,
"loss": 0.0728,
"step": 14040
},
{
"epoch": 40.72463768115942,
"grad_norm": 0.551850438117981,
"learning_rate": 2.2314357145986552e-05,
"loss": 0.0729,
"step": 14050
},
{
"epoch": 40.7536231884058,
"grad_norm": 0.36312124133110046,
"learning_rate": 2.224555217272757e-05,
"loss": 0.0964,
"step": 14060
},
{
"epoch": 40.78260869565217,
"grad_norm": 0.2239280343055725,
"learning_rate": 2.2176823079069127e-05,
"loss": 0.079,
"step": 14070
},
{
"epoch": 40.81159420289855,
"grad_norm": 0.35991764068603516,
"learning_rate": 2.210817005291398e-05,
"loss": 0.085,
"step": 14080
},
{
"epoch": 40.84057971014493,
"grad_norm": 0.46211162209510803,
"learning_rate": 2.203959328195686e-05,
"loss": 0.0719,
"step": 14090
},
{
"epoch": 40.869565217391305,
"grad_norm": 0.4440068006515503,
"learning_rate": 2.1971092953684026e-05,
"loss": 0.0933,
"step": 14100
},
{
"epoch": 40.89855072463768,
"grad_norm": 0.5218793749809265,
"learning_rate": 2.1902669255372788e-05,
"loss": 0.0777,
"step": 14110
},
{
"epoch": 40.927536231884055,
"grad_norm": 0.23008324205875397,
"learning_rate": 2.1834322374090897e-05,
"loss": 0.0849,
"step": 14120
},
{
"epoch": 40.95652173913044,
"grad_norm": 0.2903602719306946,
"learning_rate": 2.1766052496696153e-05,
"loss": 0.0825,
"step": 14130
},
{
"epoch": 40.98550724637681,
"grad_norm": 0.22217999398708344,
"learning_rate": 2.169785980983577e-05,
"loss": 0.0672,
"step": 14140
},
{
"epoch": 41.01449275362319,
"grad_norm": 0.2826724350452423,
"learning_rate": 2.162974449994593e-05,
"loss": 0.0776,
"step": 14150
},
{
"epoch": 41.04347826086956,
"grad_norm": 0.4253155589103699,
"learning_rate": 2.1561706753251337e-05,
"loss": 0.0682,
"step": 14160
},
{
"epoch": 41.072463768115945,
"grad_norm": 0.5486535429954529,
"learning_rate": 2.1493746755764544e-05,
"loss": 0.0831,
"step": 14170
},
{
"epoch": 41.10144927536232,
"grad_norm": 0.31310802698135376,
"learning_rate": 2.1425864693285635e-05,
"loss": 0.0705,
"step": 14180
},
{
"epoch": 41.130434782608695,
"grad_norm": 0.4277971088886261,
"learning_rate": 2.1358060751401547e-05,
"loss": 0.0734,
"step": 14190
},
{
"epoch": 41.15942028985507,
"grad_norm": 0.25638988614082336,
"learning_rate": 2.129033511548566e-05,
"loss": 0.0608,
"step": 14200
},
{
"epoch": 41.18840579710145,
"grad_norm": 0.3612518608570099,
"learning_rate": 2.1222687970697315e-05,
"loss": 0.0726,
"step": 14210
},
{
"epoch": 41.21739130434783,
"grad_norm": 0.438911110162735,
"learning_rate": 2.1155119501981173e-05,
"loss": 0.0758,
"step": 14220
},
{
"epoch": 41.2463768115942,
"grad_norm": 0.45236918330192566,
"learning_rate": 2.1087629894066895e-05,
"loss": 0.0752,
"step": 14230
},
{
"epoch": 41.27536231884058,
"grad_norm": 0.3085053861141205,
"learning_rate": 2.1020219331468473e-05,
"loss": 0.07,
"step": 14240
},
{
"epoch": 41.30434782608695,
"grad_norm": 0.4090859889984131,
"learning_rate": 2.095288799848379e-05,
"loss": 0.073,
"step": 14250
},
{
"epoch": 41.333333333333336,
"grad_norm": 0.25771570205688477,
"learning_rate": 2.088563607919417e-05,
"loss": 0.0696,
"step": 14260
},
{
"epoch": 41.36231884057971,
"grad_norm": 0.3390887975692749,
"learning_rate": 2.0818463757463786e-05,
"loss": 0.0845,
"step": 14270
},
{
"epoch": 41.391304347826086,
"grad_norm": 0.41618505120277405,
"learning_rate": 2.0751371216939175e-05,
"loss": 0.0863,
"step": 14280
},
{
"epoch": 41.42028985507246,
"grad_norm": 0.3998986780643463,
"learning_rate": 2.068435864104882e-05,
"loss": 0.0709,
"step": 14290
},
{
"epoch": 41.44927536231884,
"grad_norm": 0.6030775308609009,
"learning_rate": 2.0617426213002506e-05,
"loss": 0.0828,
"step": 14300
},
{
"epoch": 41.47826086956522,
"grad_norm": 0.35719752311706543,
"learning_rate": 2.055057411579097e-05,
"loss": 0.08,
"step": 14310
},
{
"epoch": 41.507246376811594,
"grad_norm": 0.3750839829444885,
"learning_rate": 2.0483802532185286e-05,
"loss": 0.062,
"step": 14320
},
{
"epoch": 41.53623188405797,
"grad_norm": 0.303724467754364,
"learning_rate": 2.041711164473638e-05,
"loss": 0.0762,
"step": 14330
},
{
"epoch": 41.56521739130435,
"grad_norm": 0.3491968810558319,
"learning_rate": 2.0350501635774637e-05,
"loss": 0.0681,
"step": 14340
},
{
"epoch": 41.594202898550726,
"grad_norm": 0.31283631920814514,
"learning_rate": 2.0283972687409247e-05,
"loss": 0.099,
"step": 14350
},
{
"epoch": 41.6231884057971,
"grad_norm": 0.3491584360599518,
"learning_rate": 2.021752498152784e-05,
"loss": 0.0799,
"step": 14360
},
{
"epoch": 41.65217391304348,
"grad_norm": 0.40594348311424255,
"learning_rate": 2.015115869979589e-05,
"loss": 0.0673,
"step": 14370
},
{
"epoch": 41.68115942028985,
"grad_norm": 0.2607230842113495,
"learning_rate": 2.0084874023656265e-05,
"loss": 0.0678,
"step": 14380
},
{
"epoch": 41.710144927536234,
"grad_norm": 0.4588332176208496,
"learning_rate": 2.001867113432877e-05,
"loss": 0.0739,
"step": 14390
},
{
"epoch": 41.73913043478261,
"grad_norm": 0.47973960638046265,
"learning_rate": 1.995255021280954e-05,
"loss": 0.0799,
"step": 14400
},
{
"epoch": 41.768115942028984,
"grad_norm": 0.39496302604675293,
"learning_rate": 1.9886511439870688e-05,
"loss": 0.0755,
"step": 14410
},
{
"epoch": 41.79710144927536,
"grad_norm": 0.47075384855270386,
"learning_rate": 1.9820554996059675e-05,
"loss": 0.0843,
"step": 14420
},
{
"epoch": 41.82608695652174,
"grad_norm": 0.3595154583454132,
"learning_rate": 1.9754681061698893e-05,
"loss": 0.0847,
"step": 14430
},
{
"epoch": 41.85507246376812,
"grad_norm": 0.4555226266384125,
"learning_rate": 1.9688889816885185e-05,
"loss": 0.0703,
"step": 14440
},
{
"epoch": 41.88405797101449,
"grad_norm": 0.4868208169937134,
"learning_rate": 1.962318144148928e-05,
"loss": 0.0852,
"step": 14450
},
{
"epoch": 41.91304347826087,
"grad_norm": 0.3341791331768036,
"learning_rate": 1.955755611515539e-05,
"loss": 0.0665,
"step": 14460
},
{
"epoch": 41.94202898550725,
"grad_norm": 0.4766240119934082,
"learning_rate": 1.9492014017300642e-05,
"loss": 0.0717,
"step": 14470
},
{
"epoch": 41.971014492753625,
"grad_norm": 0.4072614908218384,
"learning_rate": 1.942655532711461e-05,
"loss": 0.0689,
"step": 14480
},
{
"epoch": 42.0,
"grad_norm": 0.5508348345756531,
"learning_rate": 1.9361180223558882e-05,
"loss": 0.0654,
"step": 14490
},
{
"epoch": 42.028985507246375,
"grad_norm": 0.3589998483657837,
"learning_rate": 1.929588888536647e-05,
"loss": 0.0736,
"step": 14500
},
{
"epoch": 42.05797101449275,
"grad_norm": 0.33736053109169006,
"learning_rate": 1.9230681491041425e-05,
"loss": 0.0682,
"step": 14510
},
{
"epoch": 42.08695652173913,
"grad_norm": 0.4881956875324249,
"learning_rate": 1.9165558218858264e-05,
"loss": 0.0817,
"step": 14520
},
{
"epoch": 42.11594202898551,
"grad_norm": 0.5997191071510315,
"learning_rate": 1.9100519246861505e-05,
"loss": 0.0722,
"step": 14530
},
{
"epoch": 42.14492753623188,
"grad_norm": 0.4747546911239624,
"learning_rate": 1.9035564752865248e-05,
"loss": 0.0624,
"step": 14540
},
{
"epoch": 42.17391304347826,
"grad_norm": 0.391609251499176,
"learning_rate": 1.897069491445258e-05,
"loss": 0.088,
"step": 14550
},
{
"epoch": 42.20289855072464,
"grad_norm": 0.5286002159118652,
"learning_rate": 1.890590990897515e-05,
"loss": 0.0921,
"step": 14560
},
{
"epoch": 42.231884057971016,
"grad_norm": 0.3322617709636688,
"learning_rate": 1.884120991355272e-05,
"loss": 0.0753,
"step": 14570
},
{
"epoch": 42.26086956521739,
"grad_norm": 0.366778165102005,
"learning_rate": 1.8776595105072576e-05,
"loss": 0.071,
"step": 14580
},
{
"epoch": 42.289855072463766,
"grad_norm": 0.5647521018981934,
"learning_rate": 1.8712065660189166e-05,
"loss": 0.0901,
"step": 14590
},
{
"epoch": 42.31884057971015,
"grad_norm": 0.44216540455818176,
"learning_rate": 1.8647621755323513e-05,
"loss": 0.0754,
"step": 14600
},
{
"epoch": 42.34782608695652,
"grad_norm": 0.41718125343322754,
"learning_rate": 1.858326356666278e-05,
"loss": 0.0798,
"step": 14610
},
{
"epoch": 42.3768115942029,
"grad_norm": 0.3692278563976288,
"learning_rate": 1.851899127015983e-05,
"loss": 0.0687,
"step": 14620
},
{
"epoch": 42.405797101449274,
"grad_norm": 0.5888849496841431,
"learning_rate": 1.8454805041532626e-05,
"loss": 0.0605,
"step": 14630
},
{
"epoch": 42.43478260869565,
"grad_norm": 0.366144061088562,
"learning_rate": 1.8390705056263906e-05,
"loss": 0.0665,
"step": 14640
},
{
"epoch": 42.46376811594203,
"grad_norm": 0.4007920026779175,
"learning_rate": 1.832669148960057e-05,
"loss": 0.0707,
"step": 14650
},
{
"epoch": 42.492753623188406,
"grad_norm": 0.36319825053215027,
"learning_rate": 1.8262764516553233e-05,
"loss": 0.0604,
"step": 14660
},
{
"epoch": 42.52173913043478,
"grad_norm": 0.5968917012214661,
"learning_rate": 1.8198924311895843e-05,
"loss": 0.0792,
"step": 14670
},
{
"epoch": 42.55072463768116,
"grad_norm": 0.3557155132293701,
"learning_rate": 1.813517105016505e-05,
"loss": 0.0587,
"step": 14680
},
{
"epoch": 42.57971014492754,
"grad_norm": 0.3647300899028778,
"learning_rate": 1.8071504905659888e-05,
"loss": 0.0678,
"step": 14690
},
{
"epoch": 42.608695652173914,
"grad_norm": 0.5016182065010071,
"learning_rate": 1.800792605244109e-05,
"loss": 0.0726,
"step": 14700
},
{
"epoch": 42.63768115942029,
"grad_norm": 0.39856255054473877,
"learning_rate": 1.7944434664330844e-05,
"loss": 0.0852,
"step": 14710
},
{
"epoch": 42.666666666666664,
"grad_norm": 0.3633764684200287,
"learning_rate": 1.7881030914912212e-05,
"loss": 0.08,
"step": 14720
},
{
"epoch": 42.69565217391305,
"grad_norm": 0.36024579405784607,
"learning_rate": 1.7817714977528577e-05,
"loss": 0.0686,
"step": 14730
},
{
"epoch": 42.72463768115942,
"grad_norm": 0.40388357639312744,
"learning_rate": 1.7754487025283332e-05,
"loss": 0.0657,
"step": 14740
},
{
"epoch": 42.7536231884058,
"grad_norm": 0.5098476409912109,
"learning_rate": 1.7691347231039275e-05,
"loss": 0.0651,
"step": 14750
},
{
"epoch": 42.78260869565217,
"grad_norm": 0.4363411068916321,
"learning_rate": 1.7628295767418164e-05,
"loss": 0.0966,
"step": 14760
},
{
"epoch": 42.81159420289855,
"grad_norm": 0.48385173082351685,
"learning_rate": 1.7565332806800333e-05,
"loss": 0.0751,
"step": 14770
},
{
"epoch": 42.84057971014493,
"grad_norm": 0.4358624815940857,
"learning_rate": 1.750245852132408e-05,
"loss": 0.087,
"step": 14780
},
{
"epoch": 42.869565217391305,
"grad_norm": 0.4145340621471405,
"learning_rate": 1.7439673082885323e-05,
"loss": 0.0738,
"step": 14790
},
{
"epoch": 42.89855072463768,
"grad_norm": 0.4053754508495331,
"learning_rate": 1.7376976663137047e-05,
"loss": 0.0895,
"step": 14800
},
{
"epoch": 42.927536231884055,
"grad_norm": 0.2905048131942749,
"learning_rate": 1.7314369433488853e-05,
"loss": 0.0622,
"step": 14810
},
{
"epoch": 42.95652173913044,
"grad_norm": 0.5020401477813721,
"learning_rate": 1.7251851565106548e-05,
"loss": 0.0642,
"step": 14820
},
{
"epoch": 42.98550724637681,
"grad_norm": 0.4154917597770691,
"learning_rate": 1.7189423228911574e-05,
"loss": 0.0807,
"step": 14830
},
{
"epoch": 43.01449275362319,
"grad_norm": 0.5019571781158447,
"learning_rate": 1.7127084595580606e-05,
"loss": 0.0779,
"step": 14840
},
{
"epoch": 43.04347826086956,
"grad_norm": 0.3335070312023163,
"learning_rate": 1.706483583554513e-05,
"loss": 0.0811,
"step": 14850
},
{
"epoch": 43.072463768115945,
"grad_norm": 0.3166472911834717,
"learning_rate": 1.700267711899083e-05,
"loss": 0.0729,
"step": 14860
},
{
"epoch": 43.10144927536232,
"grad_norm": 0.45485633611679077,
"learning_rate": 1.69406086158573e-05,
"loss": 0.0674,
"step": 14870
},
{
"epoch": 43.130434782608695,
"grad_norm": 0.27782437205314636,
"learning_rate": 1.6878630495837455e-05,
"loss": 0.0833,
"step": 14880
},
{
"epoch": 43.15942028985507,
"grad_norm": 0.24997830390930176,
"learning_rate": 1.681674292837707e-05,
"loss": 0.0649,
"step": 14890
},
{
"epoch": 43.18840579710145,
"grad_norm": 0.291838675737381,
"learning_rate": 1.6754946082674444e-05,
"loss": 0.0664,
"step": 14900
},
{
"epoch": 43.21739130434783,
"grad_norm": 0.3121786117553711,
"learning_rate": 1.6693240127679748e-05,
"loss": 0.0733,
"step": 14910
},
{
"epoch": 43.2463768115942,
"grad_norm": 0.38471075892448425,
"learning_rate": 1.663162523209475e-05,
"loss": 0.0821,
"step": 14920
},
{
"epoch": 43.27536231884058,
"grad_norm": 0.5700430274009705,
"learning_rate": 1.6570101564372193e-05,
"loss": 0.0669,
"step": 14930
},
{
"epoch": 43.30434782608695,
"grad_norm": 0.5257859230041504,
"learning_rate": 1.650866929271543e-05,
"loss": 0.0602,
"step": 14940
},
{
"epoch": 43.333333333333336,
"grad_norm": 0.4088708162307739,
"learning_rate": 1.644732858507797e-05,
"loss": 0.0871,
"step": 14950
},
{
"epoch": 43.36231884057971,
"grad_norm": 0.5116233825683594,
"learning_rate": 1.6386079609162943e-05,
"loss": 0.0598,
"step": 14960
},
{
"epoch": 43.391304347826086,
"grad_norm": 0.2616664469242096,
"learning_rate": 1.6324922532422742e-05,
"loss": 0.0606,
"step": 14970
},
{
"epoch": 43.42028985507246,
"grad_norm": 0.5427923798561096,
"learning_rate": 1.6263857522058434e-05,
"loss": 0.0937,
"step": 14980
},
{
"epoch": 43.44927536231884,
"grad_norm": 0.3789597153663635,
"learning_rate": 1.6202884745019443e-05,
"loss": 0.0851,
"step": 14990
},
{
"epoch": 43.47826086956522,
"grad_norm": 0.46611571311950684,
"learning_rate": 1.614200436800304e-05,
"loss": 0.0783,
"step": 15000
},
{
"epoch": 43.507246376811594,
"grad_norm": 0.37547364830970764,
"learning_rate": 1.6081216557453814e-05,
"loss": 0.0833,
"step": 15010
},
{
"epoch": 43.53623188405797,
"grad_norm": 0.3774726688861847,
"learning_rate": 1.6020521479563367e-05,
"loss": 0.0767,
"step": 15020
},
{
"epoch": 43.56521739130435,
"grad_norm": 0.44292446970939636,
"learning_rate": 1.5959919300269654e-05,
"loss": 0.0728,
"step": 15030
},
{
"epoch": 43.594202898550726,
"grad_norm": 0.5792534351348877,
"learning_rate": 1.5899410185256764e-05,
"loss": 0.0593,
"step": 15040
},
{
"epoch": 43.6231884057971,
"grad_norm": 0.2785523235797882,
"learning_rate": 1.583899429995431e-05,
"loss": 0.0612,
"step": 15050
},
{
"epoch": 43.65217391304348,
"grad_norm": 0.29454028606414795,
"learning_rate": 1.5778671809536993e-05,
"loss": 0.0751,
"step": 15060
},
{
"epoch": 43.68115942028985,
"grad_norm": 0.2879396378993988,
"learning_rate": 1.5718442878924246e-05,
"loss": 0.0883,
"step": 15070
},
{
"epoch": 43.710144927536234,
"grad_norm": 1.1070629358291626,
"learning_rate": 1.5658307672779593e-05,
"loss": 0.093,
"step": 15080
},
{
"epoch": 43.73913043478261,
"grad_norm": 0.29835617542266846,
"learning_rate": 1.5598266355510427e-05,
"loss": 0.0657,
"step": 15090
},
{
"epoch": 43.768115942028984,
"grad_norm": 0.4190385937690735,
"learning_rate": 1.553831909126744e-05,
"loss": 0.0742,
"step": 15100
},
{
"epoch": 43.79710144927536,
"grad_norm": 0.34586817026138306,
"learning_rate": 1.5478466043944135e-05,
"loss": 0.0715,
"step": 15110
},
{
"epoch": 43.82608695652174,
"grad_norm": 0.37232398986816406,
"learning_rate": 1.5418707377176468e-05,
"loss": 0.0695,
"step": 15120
},
{
"epoch": 43.85507246376812,
"grad_norm": 0.42787492275238037,
"learning_rate": 1.535904325434233e-05,
"loss": 0.0959,
"step": 15130
},
{
"epoch": 43.88405797101449,
"grad_norm": 0.8969880938529968,
"learning_rate": 1.529947383856118e-05,
"loss": 0.0693,
"step": 15140
},
{
"epoch": 43.91304347826087,
"grad_norm": 0.38823625445365906,
"learning_rate": 1.5239999292693524e-05,
"loss": 0.0825,
"step": 15150
},
{
"epoch": 43.94202898550725,
"grad_norm": 0.2747124135494232,
"learning_rate": 1.5180619779340505e-05,
"loss": 0.0809,
"step": 15160
},
{
"epoch": 43.971014492753625,
"grad_norm": 0.420537531375885,
"learning_rate": 1.5121335460843428e-05,
"loss": 0.0634,
"step": 15170
},
{
"epoch": 44.0,
"grad_norm": 0.784938395023346,
"learning_rate": 1.5062146499283347e-05,
"loss": 0.073,
"step": 15180
},
{
"epoch": 44.028985507246375,
"grad_norm": 0.47105634212493896,
"learning_rate": 1.5003053056480643e-05,
"loss": 0.0736,
"step": 15190
},
{
"epoch": 44.05797101449275,
"grad_norm": 0.2737712860107422,
"learning_rate": 1.4944055293994551e-05,
"loss": 0.0674,
"step": 15200
},
{
"epoch": 44.08695652173913,
"grad_norm": 0.6026032567024231,
"learning_rate": 1.4885153373122656e-05,
"loss": 0.0922,
"step": 15210
},
{
"epoch": 44.11594202898551,
"grad_norm": 0.3727162182331085,
"learning_rate": 1.482634745490059e-05,
"loss": 0.0644,
"step": 15220
},
{
"epoch": 44.14492753623188,
"grad_norm": 0.47362762689590454,
"learning_rate": 1.4767637700101466e-05,
"loss": 0.066,
"step": 15230
},
{
"epoch": 44.17391304347826,
"grad_norm": 0.35355237126350403,
"learning_rate": 1.4709024269235528e-05,
"loss": 0.0617,
"step": 15240
},
{
"epoch": 44.20289855072464,
"grad_norm": 0.3178042471408844,
"learning_rate": 1.4650507322549684e-05,
"loss": 0.1073,
"step": 15250
},
{
"epoch": 44.231884057971016,
"grad_norm": 0.5713096857070923,
"learning_rate": 1.4592087020026972e-05,
"loss": 0.0697,
"step": 15260
},
{
"epoch": 44.26086956521739,
"grad_norm": 0.39644819498062134,
"learning_rate": 1.4533763521386318e-05,
"loss": 0.0787,
"step": 15270
},
{
"epoch": 44.289855072463766,
"grad_norm": 0.3511520326137543,
"learning_rate": 1.44755369860819e-05,
"loss": 0.0637,
"step": 15280
},
{
"epoch": 44.31884057971015,
"grad_norm": 0.5535669326782227,
"learning_rate": 1.441740757330287e-05,
"loss": 0.0936,
"step": 15290
},
{
"epoch": 44.34782608695652,
"grad_norm": 0.5639561414718628,
"learning_rate": 1.4359375441972844e-05,
"loss": 0.0809,
"step": 15300
},
{
"epoch": 44.3768115942029,
"grad_norm": 0.3432080149650574,
"learning_rate": 1.4301440750749395e-05,
"loss": 0.0813,
"step": 15310
},
{
"epoch": 44.405797101449274,
"grad_norm": 0.3394940495491028,
"learning_rate": 1.4243603658023808e-05,
"loss": 0.0816,
"step": 15320
},
{
"epoch": 44.43478260869565,
"grad_norm": 0.3588254451751709,
"learning_rate": 1.4185864321920444e-05,
"loss": 0.0711,
"step": 15330
},
{
"epoch": 44.46376811594203,
"grad_norm": 0.3964613676071167,
"learning_rate": 1.4128222900296485e-05,
"loss": 0.0795,
"step": 15340
},
{
"epoch": 44.492753623188406,
"grad_norm": 0.38622230291366577,
"learning_rate": 1.407067955074135e-05,
"loss": 0.0716,
"step": 15350
},
{
"epoch": 44.52173913043478,
"grad_norm": 0.28652891516685486,
"learning_rate": 1.4013234430576356e-05,
"loss": 0.067,
"step": 15360
},
{
"epoch": 44.55072463768116,
"grad_norm": 0.3979763388633728,
"learning_rate": 1.3955887696854286e-05,
"loss": 0.0761,
"step": 15370
},
{
"epoch": 44.57971014492754,
"grad_norm": 0.4278284013271332,
"learning_rate": 1.38986395063589e-05,
"loss": 0.073,
"step": 15380
},
{
"epoch": 44.608695652173914,
"grad_norm": 0.40081092715263367,
"learning_rate": 1.3841490015604597e-05,
"loss": 0.0859,
"step": 15390
},
{
"epoch": 44.63768115942029,
"grad_norm": 0.45146530866622925,
"learning_rate": 1.3784439380835879e-05,
"loss": 0.0809,
"step": 15400
},
{
"epoch": 44.666666666666664,
"grad_norm": 0.3806000351905823,
"learning_rate": 1.3727487758026986e-05,
"loss": 0.0725,
"step": 15410
},
{
"epoch": 44.69565217391305,
"grad_norm": 0.5500205755233765,
"learning_rate": 1.3670635302881525e-05,
"loss": 0.0737,
"step": 15420
},
{
"epoch": 44.72463768115942,
"grad_norm": 0.2973146438598633,
"learning_rate": 1.3613882170831888e-05,
"loss": 0.0739,
"step": 15430
},
{
"epoch": 44.7536231884058,
"grad_norm": 0.4235207736492157,
"learning_rate": 1.355722851703901e-05,
"loss": 0.0837,
"step": 15440
},
{
"epoch": 44.78260869565217,
"grad_norm": 0.3844519853591919,
"learning_rate": 1.3500674496391814e-05,
"loss": 0.0669,
"step": 15450
},
{
"epoch": 44.81159420289855,
"grad_norm": 0.3494715988636017,
"learning_rate": 1.3444220263506795e-05,
"loss": 0.0587,
"step": 15460
},
{
"epoch": 44.84057971014493,
"grad_norm": 0.5101982355117798,
"learning_rate": 1.3387865972727714e-05,
"loss": 0.0871,
"step": 15470
},
{
"epoch": 44.869565217391305,
"grad_norm": 0.3597027361392975,
"learning_rate": 1.3331611778125036e-05,
"loss": 0.0728,
"step": 15480
},
{
"epoch": 44.89855072463768,
"grad_norm": 0.5626224279403687,
"learning_rate": 1.3275457833495564e-05,
"loss": 0.0804,
"step": 15490
},
{
"epoch": 44.927536231884055,
"grad_norm": 0.3257477581501007,
"learning_rate": 1.3219404292362065e-05,
"loss": 0.0632,
"step": 15500
},
{
"epoch": 44.95652173913044,
"grad_norm": 0.4441049098968506,
"learning_rate": 1.3163451307972751e-05,
"loss": 0.0695,
"step": 15510
},
{
"epoch": 44.98550724637681,
"grad_norm": 0.3859218657016754,
"learning_rate": 1.3107599033300977e-05,
"loss": 0.0671,
"step": 15520
},
{
"epoch": 45.01449275362319,
"grad_norm": 0.4354454278945923,
"learning_rate": 1.305184762104471e-05,
"loss": 0.0645,
"step": 15530
},
{
"epoch": 45.04347826086956,
"grad_norm": 0.2836010158061981,
"learning_rate": 1.2996197223626178e-05,
"loss": 0.0645,
"step": 15540
},
{
"epoch": 45.072463768115945,
"grad_norm": 0.3890087306499481,
"learning_rate": 1.2940647993191457e-05,
"loss": 0.0631,
"step": 15550
},
{
"epoch": 45.10144927536232,
"grad_norm": 0.3880113959312439,
"learning_rate": 1.2885200081610005e-05,
"loss": 0.0642,
"step": 15560
},
{
"epoch": 45.130434782608695,
"grad_norm": 0.47158360481262207,
"learning_rate": 1.2829853640474316e-05,
"loss": 0.076,
"step": 15570
},
{
"epoch": 45.15942028985507,
"grad_norm": 0.4908730089664459,
"learning_rate": 1.2774608821099438e-05,
"loss": 0.082,
"step": 15580
},
{
"epoch": 45.18840579710145,
"grad_norm": 0.44620388746261597,
"learning_rate": 1.2719465774522577e-05,
"loss": 0.0805,
"step": 15590
},
{
"epoch": 45.21739130434783,
"grad_norm": 0.39248040318489075,
"learning_rate": 1.2664424651502755e-05,
"loss": 0.0798,
"step": 15600
},
{
"epoch": 45.2463768115942,
"grad_norm": 0.4180006980895996,
"learning_rate": 1.260948560252026e-05,
"loss": 0.0856,
"step": 15610
},
{
"epoch": 45.27536231884058,
"grad_norm": 0.44177964329719543,
"learning_rate": 1.2554648777776396e-05,
"loss": 0.0949,
"step": 15620
},
{
"epoch": 45.30434782608695,
"grad_norm": 0.33813127875328064,
"learning_rate": 1.2499914327192919e-05,
"loss": 0.0781,
"step": 15630
},
{
"epoch": 45.333333333333336,
"grad_norm": 0.3105308711528778,
"learning_rate": 1.2445282400411722e-05,
"loss": 0.07,
"step": 15640
},
{
"epoch": 45.36231884057971,
"grad_norm": 0.2976597249507904,
"learning_rate": 1.2390753146794437e-05,
"loss": 0.0752,
"step": 15650
},
{
"epoch": 45.391304347826086,
"grad_norm": 0.4583851993083954,
"learning_rate": 1.2336326715421925e-05,
"loss": 0.0767,
"step": 15660
},
{
"epoch": 45.42028985507246,
"grad_norm": 0.41092222929000854,
"learning_rate": 1.2282003255094005e-05,
"loss": 0.0728,
"step": 15670
},
{
"epoch": 45.44927536231884,
"grad_norm": 0.3140925467014313,
"learning_rate": 1.2227782914328928e-05,
"loss": 0.069,
"step": 15680
},
{
"epoch": 45.47826086956522,
"grad_norm": 0.355333149433136,
"learning_rate": 1.2173665841363018e-05,
"loss": 0.0711,
"step": 15690
},
{
"epoch": 45.507246376811594,
"grad_norm": 0.3979286253452301,
"learning_rate": 1.211965218415032e-05,
"loss": 0.0755,
"step": 15700
},
{
"epoch": 45.53623188405797,
"grad_norm": 0.27833595871925354,
"learning_rate": 1.2065742090362082e-05,
"loss": 0.0804,
"step": 15710
},
{
"epoch": 45.56521739130435,
"grad_norm": 0.3665226101875305,
"learning_rate": 1.2011935707386457e-05,
"loss": 0.0959,
"step": 15720
},
{
"epoch": 45.594202898550726,
"grad_norm": 0.3983865976333618,
"learning_rate": 1.1958233182328044e-05,
"loss": 0.0809,
"step": 15730
},
{
"epoch": 45.6231884057971,
"grad_norm": 0.3656999468803406,
"learning_rate": 1.1904634662007474e-05,
"loss": 0.0675,
"step": 15740
},
{
"epoch": 45.65217391304348,
"grad_norm": 0.47683125734329224,
"learning_rate": 1.1851140292961088e-05,
"loss": 0.0754,
"step": 15750
},
{
"epoch": 45.68115942028985,
"grad_norm": 0.5368967652320862,
"learning_rate": 1.1797750221440424e-05,
"loss": 0.0786,
"step": 15760
},
{
"epoch": 45.710144927536234,
"grad_norm": 0.37085482478141785,
"learning_rate": 1.1744464593411897e-05,
"loss": 0.0735,
"step": 15770
},
{
"epoch": 45.73913043478261,
"grad_norm": 0.3648932874202728,
"learning_rate": 1.1691283554556399e-05,
"loss": 0.0688,
"step": 15780
},
{
"epoch": 45.768115942028984,
"grad_norm": 0.25463685393333435,
"learning_rate": 1.1638207250268834e-05,
"loss": 0.0658,
"step": 15790
},
{
"epoch": 45.79710144927536,
"grad_norm": 0.2738022804260254,
"learning_rate": 1.158523582565782e-05,
"loss": 0.0851,
"step": 15800
},
{
"epoch": 45.82608695652174,
"grad_norm": 0.43908044695854187,
"learning_rate": 1.1532369425545192e-05,
"loss": 0.079,
"step": 15810
},
{
"epoch": 45.85507246376812,
"grad_norm": 0.424430251121521,
"learning_rate": 1.1479608194465662e-05,
"loss": 0.0783,
"step": 15820
},
{
"epoch": 45.88405797101449,
"grad_norm": 0.4064854681491852,
"learning_rate": 1.1426952276666442e-05,
"loss": 0.0687,
"step": 15830
},
{
"epoch": 45.91304347826087,
"grad_norm": 0.5900323987007141,
"learning_rate": 1.1374401816106778e-05,
"loss": 0.0811,
"step": 15840
},
{
"epoch": 45.94202898550725,
"grad_norm": 0.3530072867870331,
"learning_rate": 1.1321956956457646e-05,
"loss": 0.0707,
"step": 15850
},
{
"epoch": 45.971014492753625,
"grad_norm": 0.4914955794811249,
"learning_rate": 1.1269617841101277e-05,
"loss": 0.0663,
"step": 15860
},
{
"epoch": 46.0,
"grad_norm": 0.6903124451637268,
"learning_rate": 1.1217384613130804e-05,
"loss": 0.0757,
"step": 15870
},
{
"epoch": 46.028985507246375,
"grad_norm": 0.35140737891197205,
"learning_rate": 1.11652574153499e-05,
"loss": 0.0689,
"step": 15880
},
{
"epoch": 46.05797101449275,
"grad_norm": 0.45175376534461975,
"learning_rate": 1.1113236390272303e-05,
"loss": 0.0698,
"step": 15890
},
{
"epoch": 46.08695652173913,
"grad_norm": 0.5367652773857117,
"learning_rate": 1.106132168012155e-05,
"loss": 0.0757,
"step": 15900
},
{
"epoch": 46.11594202898551,
"grad_norm": 0.47009265422821045,
"learning_rate": 1.1009513426830448e-05,
"loss": 0.0658,
"step": 15910
},
{
"epoch": 46.14492753623188,
"grad_norm": 0.26874783635139465,
"learning_rate": 1.0957811772040777e-05,
"loss": 0.0735,
"step": 15920
},
{
"epoch": 46.17391304347826,
"grad_norm": 0.5538775324821472,
"learning_rate": 1.0906216857102913e-05,
"loss": 0.073,
"step": 15930
},
{
"epoch": 46.20289855072464,
"grad_norm": 0.33384883403778076,
"learning_rate": 1.0854728823075355e-05,
"loss": 0.0662,
"step": 15940
},
{
"epoch": 46.231884057971016,
"grad_norm": 0.35423901677131653,
"learning_rate": 1.0803347810724452e-05,
"loss": 0.0773,
"step": 15950
},
{
"epoch": 46.26086956521739,
"grad_norm": 0.3087175488471985,
"learning_rate": 1.0752073960523911e-05,
"loss": 0.0588,
"step": 15960
},
{
"epoch": 46.289855072463766,
"grad_norm": 0.22049643099308014,
"learning_rate": 1.070090741265447e-05,
"loss": 0.0737,
"step": 15970
},
{
"epoch": 46.31884057971015,
"grad_norm": 0.3322051763534546,
"learning_rate": 1.0649848307003547e-05,
"loss": 0.0654,
"step": 15980
},
{
"epoch": 46.34782608695652,
"grad_norm": 0.42505577206611633,
"learning_rate": 1.0598896783164757e-05,
"loss": 0.0815,
"step": 15990
},
{
"epoch": 46.3768115942029,
"grad_norm": 0.26743263006210327,
"learning_rate": 1.0548052980437645e-05,
"loss": 0.0557,
"step": 16000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 58,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}