Upload QLoRA Mistral model

60854d2 11 months ago

494 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 62500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00032,
	"grad_norm": 0.39110425114631653,
	"learning_rate": 0.0001999456,
	"loss": 2.0348,
	"step": 20
	},
	{
	"epoch": 0.00064,
	"grad_norm": 0.17977817356586456,
	"learning_rate": 0.0001998816,
	"loss": 0.9027,
	"step": 40
	},
	{
	"epoch": 0.00096,
	"grad_norm": 0.1247190609574318,
	"learning_rate": 0.00019981760000000002,
	"loss": 0.9304,
	"step": 60
	},
	{
	"epoch": 0.00128,
	"grad_norm": 0.1341821253299713,
	"learning_rate": 0.0001997536,
	"loss": 0.8894,
	"step": 80
	},
	{
	"epoch": 0.0016,
	"grad_norm": 0.1190188005566597,
	"learning_rate": 0.0001996896,
	"loss": 0.924,
	"step": 100
	},
	{
	"epoch": 0.00192,
	"grad_norm": 0.12215295433998108,
	"learning_rate": 0.0001996256,
	"loss": 0.896,
	"step": 120
	},
	{
	"epoch": 0.00224,
	"grad_norm": 0.12413129210472107,
	"learning_rate": 0.00019956160000000002,
	"loss": 0.9179,
	"step": 140
	},
	{
	"epoch": 0.00256,
	"grad_norm": 0.119780533015728,
	"learning_rate": 0.00019949760000000002,
	"loss": 0.9253,
	"step": 160
	},
	{
	"epoch": 0.00288,
	"grad_norm": 0.12296301126480103,
	"learning_rate": 0.00019943360000000001,
	"loss": 0.934,
	"step": 180
	},
	{
	"epoch": 0.0032,
	"grad_norm": 0.10840147733688354,
	"learning_rate": 0.0001993696,
	"loss": 0.9346,
	"step": 200
	},
	{
	"epoch": 0.00352,
	"grad_norm": 0.11459454894065857,
	"learning_rate": 0.0001993056,
	"loss": 0.9282,
	"step": 220
	},
	{
	"epoch": 0.00384,
	"grad_norm": 0.1370021402835846,
	"learning_rate": 0.0001992416,
	"loss": 0.8889,
	"step": 240
	},
	{
	"epoch": 0.00416,
	"grad_norm": 0.12761865556240082,
	"learning_rate": 0.0001991776,
	"loss": 0.9154,
	"step": 260
	},
	{
	"epoch": 0.00448,
	"grad_norm": 0.10725900530815125,
	"learning_rate": 0.00019911360000000002,
	"loss": 0.8488,
	"step": 280
	},
	{
	"epoch": 0.0048,
	"grad_norm": 0.12192831188440323,
	"learning_rate": 0.0001990496,
	"loss": 0.9009,
	"step": 300
	},
	{
	"epoch": 0.00512,
	"grad_norm": 0.1291641741991043,
	"learning_rate": 0.0001989856,
	"loss": 0.8447,
	"step": 320
	},
	{
	"epoch": 0.00544,
	"grad_norm": 0.1057133749127388,
	"learning_rate": 0.00019892160000000003,
	"loss": 0.8813,
	"step": 340
	},
	{
	"epoch": 0.00576,
	"grad_norm": 0.1342541128396988,
	"learning_rate": 0.0001988576,
	"loss": 0.8812,
	"step": 360
	},
	{
	"epoch": 0.00608,
	"grad_norm": 0.11218508332967758,
	"learning_rate": 0.0001987936,
	"loss": 0.9021,
	"step": 380
	},
	{
	"epoch": 0.0064,
	"grad_norm": 0.16854852437973022,
	"learning_rate": 0.00019872960000000002,
	"loss": 0.9124,
	"step": 400
	},
	{
	"epoch": 0.00672,
	"grad_norm": 0.11709938943386078,
	"learning_rate": 0.0001986656,
	"loss": 0.8405,
	"step": 420
	},
	{
	"epoch": 0.00704,
	"grad_norm": 0.12850341200828552,
	"learning_rate": 0.0001986016,
	"loss": 0.8414,
	"step": 440
	},
	{
	"epoch": 0.00736,
	"grad_norm": 0.14519518613815308,
	"learning_rate": 0.0001985376,
	"loss": 0.8812,
	"step": 460
	},
	{
	"epoch": 0.00768,
	"grad_norm": 0.12263692915439606,
	"learning_rate": 0.00019847360000000002,
	"loss": 0.8983,
	"step": 480
	},
	{
	"epoch": 0.008,
	"grad_norm": 0.13009226322174072,
	"learning_rate": 0.00019840960000000002,
	"loss": 0.9046,
	"step": 500
	},
	{
	"epoch": 0.00832,
	"grad_norm": 0.11333148181438446,
	"learning_rate": 0.00019834560000000001,
	"loss": 0.9265,
	"step": 520
	},
	{
	"epoch": 0.00864,
	"grad_norm": 0.1445179581642151,
	"learning_rate": 0.0001982816,
	"loss": 0.8138,
	"step": 540
	},
	{
	"epoch": 0.00896,
	"grad_norm": 0.12200130522251129,
	"learning_rate": 0.0001982176,
	"loss": 0.9048,
	"step": 560
	},
	{
	"epoch": 0.00928,
	"grad_norm": 0.13597029447555542,
	"learning_rate": 0.0001981536,
	"loss": 0.8862,
	"step": 580
	},
	{
	"epoch": 0.0096,
	"grad_norm": 0.130451962351799,
	"learning_rate": 0.00019808960000000002,
	"loss": 0.9051,
	"step": 600
	},
	{
	"epoch": 0.00992,
	"grad_norm": 0.12426720559597015,
	"learning_rate": 0.00019802560000000002,
	"loss": 0.8529,
	"step": 620
	},
	{
	"epoch": 0.01024,
	"grad_norm": 0.13389454782009125,
	"learning_rate": 0.0001979616,
	"loss": 0.8612,
	"step": 640
	},
	{
	"epoch": 0.01056,
	"grad_norm": 0.14324034750461578,
	"learning_rate": 0.0001978976,
	"loss": 0.8377,
	"step": 660
	},
	{
	"epoch": 0.01088,
	"grad_norm": 0.13510741293430328,
	"learning_rate": 0.00019783360000000003,
	"loss": 0.8786,
	"step": 680
	},
	{
	"epoch": 0.0112,
	"grad_norm": 0.13480916619300842,
	"learning_rate": 0.0001977696,
	"loss": 0.8832,
	"step": 700
	},
	{
	"epoch": 0.01152,
	"grad_norm": 0.14060954749584198,
	"learning_rate": 0.0001977056,
	"loss": 0.9028,
	"step": 720
	},
	{
	"epoch": 0.01184,
	"grad_norm": 0.1472562700510025,
	"learning_rate": 0.00019764160000000002,
	"loss": 0.8943,
	"step": 740
	},
	{
	"epoch": 0.01216,
	"grad_norm": 0.15105944871902466,
	"learning_rate": 0.0001975776,
	"loss": 0.8931,
	"step": 760
	},
	{
	"epoch": 0.01248,
	"grad_norm": 0.14458748698234558,
	"learning_rate": 0.0001975136,
	"loss": 0.8901,
	"step": 780
	},
	{
	"epoch": 0.0128,
	"grad_norm": 0.14904917776584625,
	"learning_rate": 0.0001974496,
	"loss": 0.8967,
	"step": 800
	},
	{
	"epoch": 0.01312,
	"grad_norm": 0.1423230618238449,
	"learning_rate": 0.00019738560000000002,
	"loss": 0.891,
	"step": 820
	},
	{
	"epoch": 0.01344,
	"grad_norm": 0.16175036132335663,
	"learning_rate": 0.00019732160000000002,
	"loss": 0.9115,
	"step": 840
	},
	{
	"epoch": 0.01376,
	"grad_norm": 0.1510002315044403,
	"learning_rate": 0.00019725760000000001,
	"loss": 0.8754,
	"step": 860
	},
	{
	"epoch": 0.01408,
	"grad_norm": 0.13992249965667725,
	"learning_rate": 0.0001971936,
	"loss": 0.8817,
	"step": 880
	},
	{
	"epoch": 0.0144,
	"grad_norm": 0.17271611094474792,
	"learning_rate": 0.0001971296,
	"loss": 0.8966,
	"step": 900
	},
	{
	"epoch": 0.01472,
	"grad_norm": 0.13203832507133484,
	"learning_rate": 0.0001970656,
	"loss": 0.9368,
	"step": 920
	},
	{
	"epoch": 0.01504,
	"grad_norm": 0.1591019332408905,
	"learning_rate": 0.00019700160000000002,
	"loss": 0.8906,
	"step": 940
	},
	{
	"epoch": 0.01536,
	"grad_norm": 0.14198246598243713,
	"learning_rate": 0.00019693760000000002,
	"loss": 0.8643,
	"step": 960
	},
	{
	"epoch": 0.01568,
	"grad_norm": 0.17151997983455658,
	"learning_rate": 0.0001968736,
	"loss": 0.8888,
	"step": 980
	},
	{
	"epoch": 0.016,
	"grad_norm": 0.14477385580539703,
	"learning_rate": 0.0001968096,
	"loss": 0.8929,
	"step": 1000
	},
	{
	"epoch": 0.01632,
	"grad_norm": 0.18318715691566467,
	"learning_rate": 0.00019674560000000003,
	"loss": 0.8512,
	"step": 1020
	},
	{
	"epoch": 0.01664,
	"grad_norm": 0.13533398509025574,
	"learning_rate": 0.0001966816,
	"loss": 0.8791,
	"step": 1040
	},
	{
	"epoch": 0.01696,
	"grad_norm": 0.15874390304088593,
	"learning_rate": 0.0001966176,
	"loss": 0.8834,
	"step": 1060
	},
	{
	"epoch": 0.01728,
	"grad_norm": 0.15447142720222473,
	"learning_rate": 0.00019655360000000001,
	"loss": 0.9151,
	"step": 1080
	},
	{
	"epoch": 0.0176,
	"grad_norm": 0.1866873800754547,
	"learning_rate": 0.0001964896,
	"loss": 0.9385,
	"step": 1100
	},
	{
	"epoch": 0.01792,
	"grad_norm": 0.14127598702907562,
	"learning_rate": 0.0001964256,
	"loss": 0.9416,
	"step": 1120
	},
	{
	"epoch": 0.01824,
	"grad_norm": 0.17219585180282593,
	"learning_rate": 0.0001963616,
	"loss": 0.9493,
	"step": 1140
	},
	{
	"epoch": 0.01856,
	"grad_norm": 0.1528492569923401,
	"learning_rate": 0.00019629760000000002,
	"loss": 0.8794,
	"step": 1160
	},
	{
	"epoch": 0.01888,
	"grad_norm": 0.1348702758550644,
	"learning_rate": 0.00019623360000000002,
	"loss": 0.9214,
	"step": 1180
	},
	{
	"epoch": 0.0192,
	"grad_norm": 0.16409145295619965,
	"learning_rate": 0.0001961696,
	"loss": 0.9187,
	"step": 1200
	},
	{
	"epoch": 0.01952,
	"grad_norm": 0.15834647417068481,
	"learning_rate": 0.0001961056,
	"loss": 0.8428,
	"step": 1220
	},
	{
	"epoch": 0.01984,
	"grad_norm": 0.18810701370239258,
	"learning_rate": 0.0001960416,
	"loss": 0.9302,
	"step": 1240
	},
	{
	"epoch": 0.02016,
	"grad_norm": 0.1573120802640915,
	"learning_rate": 0.0001959776,
	"loss": 0.8962,
	"step": 1260
	},
	{
	"epoch": 0.02048,
	"grad_norm": 0.1655122935771942,
	"learning_rate": 0.00019591360000000002,
	"loss": 0.927,
	"step": 1280
	},
	{
	"epoch": 0.0208,
	"grad_norm": 0.1711716651916504,
	"learning_rate": 0.00019584960000000002,
	"loss": 0.8683,
	"step": 1300
	},
	{
	"epoch": 0.02112,
	"grad_norm": 0.1399732083082199,
	"learning_rate": 0.0001957856,
	"loss": 0.8753,
	"step": 1320
	},
	{
	"epoch": 0.02144,
	"grad_norm": 0.19218869507312775,
	"learning_rate": 0.0001957216,
	"loss": 0.8943,
	"step": 1340
	},
	{
	"epoch": 0.02176,
	"grad_norm": 0.15489013493061066,
	"learning_rate": 0.00019565760000000003,
	"loss": 0.8872,
	"step": 1360
	},
	{
	"epoch": 0.02208,
	"grad_norm": 0.17431455850601196,
	"learning_rate": 0.0001955936,
	"loss": 0.9016,
	"step": 1380
	},
	{
	"epoch": 0.0224,
	"grad_norm": 0.13751237094402313,
	"learning_rate": 0.0001955296,
	"loss": 0.8206,
	"step": 1400
	},
	{
	"epoch": 0.02272,
	"grad_norm": 0.15201833844184875,
	"learning_rate": 0.00019546560000000001,
	"loss": 0.824,
	"step": 1420
	},
	{
	"epoch": 0.02304,
	"grad_norm": 0.1994636058807373,
	"learning_rate": 0.0001954016,
	"loss": 0.8673,
	"step": 1440
	},
	{
	"epoch": 0.02336,
	"grad_norm": 0.17202576994895935,
	"learning_rate": 0.0001953376,
	"loss": 0.834,
	"step": 1460
	},
	{
	"epoch": 0.02368,
	"grad_norm": 0.19487006962299347,
	"learning_rate": 0.0001952736,
	"loss": 0.9347,
	"step": 1480
	},
	{
	"epoch": 0.024,
	"grad_norm": 0.16891010105609894,
	"learning_rate": 0.00019520960000000002,
	"loss": 0.8873,
	"step": 1500
	},
	{
	"epoch": 0.02432,
	"grad_norm": 0.18789614737033844,
	"learning_rate": 0.00019514560000000002,
	"loss": 0.8883,
	"step": 1520
	},
	{
	"epoch": 0.02464,
	"grad_norm": 0.19695357978343964,
	"learning_rate": 0.0001950816,
	"loss": 0.9197,
	"step": 1540
	},
	{
	"epoch": 0.02496,
	"grad_norm": 0.13254858553409576,
	"learning_rate": 0.0001950176,
	"loss": 0.8877,
	"step": 1560
	},
	{
	"epoch": 0.02528,
	"grad_norm": 0.1932552009820938,
	"learning_rate": 0.0001949536,
	"loss": 0.8719,
	"step": 1580
	},
	{
	"epoch": 0.0256,
	"grad_norm": 0.18450401723384857,
	"learning_rate": 0.0001948896,
	"loss": 0.8889,
	"step": 1600
	},
	{
	"epoch": 0.02592,
	"grad_norm": 0.14565275609493256,
	"learning_rate": 0.00019482560000000002,
	"loss": 0.8882,
	"step": 1620
	},
	{
	"epoch": 0.02624,
	"grad_norm": 0.17898815870285034,
	"learning_rate": 0.00019476160000000002,
	"loss": 0.9387,
	"step": 1640
	},
	{
	"epoch": 0.02656,
	"grad_norm": 0.1418757438659668,
	"learning_rate": 0.0001946976,
	"loss": 0.835,
	"step": 1660
	},
	{
	"epoch": 0.02688,
	"grad_norm": 0.1738288700580597,
	"learning_rate": 0.0001946336,
	"loss": 0.8402,
	"step": 1680
	},
	{
	"epoch": 0.0272,
	"grad_norm": 0.15658064186573029,
	"learning_rate": 0.00019456960000000003,
	"loss": 0.8626,
	"step": 1700
	},
	{
	"epoch": 0.02752,
	"grad_norm": 0.1640857756137848,
	"learning_rate": 0.0001945056,
	"loss": 0.9208,
	"step": 1720
	},
	{
	"epoch": 0.02784,
	"grad_norm": 0.18185724318027496,
	"learning_rate": 0.0001944416,
	"loss": 0.9001,
	"step": 1740
	},
	{
	"epoch": 0.02816,
	"grad_norm": 0.1771153062582016,
	"learning_rate": 0.00019437760000000001,
	"loss": 0.9175,
	"step": 1760
	},
	{
	"epoch": 0.02848,
	"grad_norm": 0.1369091272354126,
	"learning_rate": 0.0001943136,
	"loss": 0.9019,
	"step": 1780
	},
	{
	"epoch": 0.0288,
	"grad_norm": 0.18259896337985992,
	"learning_rate": 0.0001942496,
	"loss": 0.9223,
	"step": 1800
	},
	{
	"epoch": 0.02912,
	"grad_norm": 0.15459062159061432,
	"learning_rate": 0.0001941856,
	"loss": 0.9302,
	"step": 1820
	},
	{
	"epoch": 0.02944,
	"grad_norm": 0.19653448462486267,
	"learning_rate": 0.00019412160000000002,
	"loss": 0.9116,
	"step": 1840
	},
	{
	"epoch": 0.02976,
	"grad_norm": 0.18698687851428986,
	"learning_rate": 0.00019405760000000002,
	"loss": 0.8874,
	"step": 1860
	},
	{
	"epoch": 0.03008,
	"grad_norm": 0.21010226011276245,
	"learning_rate": 0.0001939936,
	"loss": 0.8956,
	"step": 1880
	},
	{
	"epoch": 0.0304,
	"grad_norm": 0.15704917907714844,
	"learning_rate": 0.0001939296,
	"loss": 0.8816,
	"step": 1900
	},
	{
	"epoch": 0.03072,
	"grad_norm": 0.16836212575435638,
	"learning_rate": 0.0001938656,
	"loss": 0.8419,
	"step": 1920
	},
	{
	"epoch": 0.03104,
	"grad_norm": 0.15333925187587738,
	"learning_rate": 0.0001938016,
	"loss": 0.9068,
	"step": 1940
	},
	{
	"epoch": 0.03136,
	"grad_norm": 0.18370755016803741,
	"learning_rate": 0.00019373760000000002,
	"loss": 0.8932,
	"step": 1960
	},
	{
	"epoch": 0.03168,
	"grad_norm": 0.16511815786361694,
	"learning_rate": 0.00019367360000000002,
	"loss": 0.9328,
	"step": 1980
	},
	{
	"epoch": 0.032,
	"grad_norm": 0.16475580632686615,
	"learning_rate": 0.0001936096,
	"loss": 0.9234,
	"step": 2000
	},
	{
	"epoch": 0.03232,
	"grad_norm": 0.17609569430351257,
	"learning_rate": 0.0001935456,
	"loss": 0.9281,
	"step": 2020
	},
	{
	"epoch": 0.03264,
	"grad_norm": 0.1759602576494217,
	"learning_rate": 0.00019348160000000003,
	"loss": 0.8592,
	"step": 2040
	},
	{
	"epoch": 0.03296,
	"grad_norm": 0.1785658448934555,
	"learning_rate": 0.0001934176,
	"loss": 0.9156,
	"step": 2060
	},
	{
	"epoch": 0.03328,
	"grad_norm": 0.20041823387145996,
	"learning_rate": 0.0001933536,
	"loss": 0.8585,
	"step": 2080
	},
	{
	"epoch": 0.0336,
	"grad_norm": 0.2025129646062851,
	"learning_rate": 0.00019328960000000001,
	"loss": 0.883,
	"step": 2100
	},
	{
	"epoch": 0.03392,
	"grad_norm": 0.1853547841310501,
	"learning_rate": 0.0001932256,
	"loss": 0.9493,
	"step": 2120
	},
	{
	"epoch": 0.03424,
	"grad_norm": 0.1714346706867218,
	"learning_rate": 0.0001931616,
	"loss": 0.9102,
	"step": 2140
	},
	{
	"epoch": 0.03456,
	"grad_norm": 0.14508432149887085,
	"learning_rate": 0.0001930976,
	"loss": 0.8636,
	"step": 2160
	},
	{
	"epoch": 0.03488,
	"grad_norm": 0.15658248960971832,
	"learning_rate": 0.00019303360000000002,
	"loss": 0.8495,
	"step": 2180
	},
	{
	"epoch": 0.0352,
	"grad_norm": 0.1980847865343094,
	"learning_rate": 0.00019296960000000002,
	"loss": 0.8814,
	"step": 2200
	},
	{
	"epoch": 0.03552,
	"grad_norm": 0.18244528770446777,
	"learning_rate": 0.0001929056,
	"loss": 0.896,
	"step": 2220
	},
	{
	"epoch": 0.03584,
	"grad_norm": 0.19880063831806183,
	"learning_rate": 0.0001928416,
	"loss": 0.8725,
	"step": 2240
	},
	{
	"epoch": 0.03616,
	"grad_norm": 0.2539379894733429,
	"learning_rate": 0.0001927776,
	"loss": 0.936,
	"step": 2260
	},
	{
	"epoch": 0.03648,
	"grad_norm": 0.17734292149543762,
	"learning_rate": 0.0001927136,
	"loss": 0.8839,
	"step": 2280
	},
	{
	"epoch": 0.0368,
	"grad_norm": 0.15432968735694885,
	"learning_rate": 0.00019264960000000002,
	"loss": 0.8977,
	"step": 2300
	},
	{
	"epoch": 0.03712,
	"grad_norm": 0.17004595696926117,
	"learning_rate": 0.00019258560000000001,
	"loss": 0.8974,
	"step": 2320
	},
	{
	"epoch": 0.03744,
	"grad_norm": 0.16686637699604034,
	"learning_rate": 0.0001925216,
	"loss": 0.8822,
	"step": 2340
	},
	{
	"epoch": 0.03776,
	"grad_norm": 0.16283023357391357,
	"learning_rate": 0.0001924576,
	"loss": 0.9273,
	"step": 2360
	},
	{
	"epoch": 0.03808,
	"grad_norm": 0.1839868277311325,
	"learning_rate": 0.00019239360000000003,
	"loss": 0.8829,
	"step": 2380
	},
	{
	"epoch": 0.0384,
	"grad_norm": 0.1701708436012268,
	"learning_rate": 0.0001923296,
	"loss": 0.9052,
	"step": 2400
	},
	{
	"epoch": 0.03872,
	"grad_norm": 0.16713082790374756,
	"learning_rate": 0.0001922656,
	"loss": 0.8653,
	"step": 2420
	},
	{
	"epoch": 0.03904,
	"grad_norm": 0.16699771583080292,
	"learning_rate": 0.0001922016,
	"loss": 0.8644,
	"step": 2440
	},
	{
	"epoch": 0.03936,
	"grad_norm": 0.15876609086990356,
	"learning_rate": 0.0001921376,
	"loss": 0.8703,
	"step": 2460
	},
	{
	"epoch": 0.03968,
	"grad_norm": 0.1910441368818283,
	"learning_rate": 0.0001920736,
	"loss": 0.8637,
	"step": 2480
	},
	{
	"epoch": 0.04,
	"grad_norm": 0.18075905740261078,
	"learning_rate": 0.0001920096,
	"loss": 0.8566,
	"step": 2500
	},
	{
	"epoch": 0.04032,
	"grad_norm": 0.19470706582069397,
	"learning_rate": 0.00019194560000000002,
	"loss": 0.9013,
	"step": 2520
	},
	{
	"epoch": 0.04064,
	"grad_norm": 0.19072532653808594,
	"learning_rate": 0.00019188160000000002,
	"loss": 0.9026,
	"step": 2540
	},
	{
	"epoch": 0.04096,
	"grad_norm": 0.17622806131839752,
	"learning_rate": 0.0001918176,
	"loss": 0.8705,
	"step": 2560
	},
	{
	"epoch": 0.04128,
	"grad_norm": 0.19638915359973907,
	"learning_rate": 0.0001917536,
	"loss": 0.8606,
	"step": 2580
	},
	{
	"epoch": 0.0416,
	"grad_norm": 0.18957193195819855,
	"learning_rate": 0.0001916896,
	"loss": 0.9041,
	"step": 2600
	},
	{
	"epoch": 0.04192,
	"grad_norm": 0.1762382835149765,
	"learning_rate": 0.0001916256,
	"loss": 0.8593,
	"step": 2620
	},
	{
	"epoch": 0.04224,
	"grad_norm": 0.16159483790397644,
	"learning_rate": 0.00019156160000000002,
	"loss": 0.9049,
	"step": 2640
	},
	{
	"epoch": 0.04256,
	"grad_norm": 0.19137801229953766,
	"learning_rate": 0.00019149760000000001,
	"loss": 0.9062,
	"step": 2660
	},
	{
	"epoch": 0.04288,
	"grad_norm": 0.19132420420646667,
	"learning_rate": 0.0001914336,
	"loss": 0.8678,
	"step": 2680
	},
	{
	"epoch": 0.0432,
	"grad_norm": 0.1738004982471466,
	"learning_rate": 0.0001913696,
	"loss": 0.898,
	"step": 2700
	},
	{
	"epoch": 0.04352,
	"grad_norm": 0.19048957526683807,
	"learning_rate": 0.00019130560000000003,
	"loss": 0.8471,
	"step": 2720
	},
	{
	"epoch": 0.04384,
	"grad_norm": 0.19051052629947662,
	"learning_rate": 0.0001912416,
	"loss": 0.8878,
	"step": 2740
	},
	{
	"epoch": 0.04416,
	"grad_norm": 0.18549174070358276,
	"learning_rate": 0.0001911776,
	"loss": 0.9166,
	"step": 2760
	},
	{
	"epoch": 0.04448,
	"grad_norm": 0.20678356289863586,
	"learning_rate": 0.0001911136,
	"loss": 0.8624,
	"step": 2780
	},
	{
	"epoch": 0.0448,
	"grad_norm": 0.20438261330127716,
	"learning_rate": 0.0001910496,
	"loss": 0.882,
	"step": 2800
	},
	{
	"epoch": 0.04512,
	"grad_norm": 0.1805305778980255,
	"learning_rate": 0.0001909856,
	"loss": 0.8867,
	"step": 2820
	},
	{
	"epoch": 0.04544,
	"grad_norm": 0.2102346122264862,
	"learning_rate": 0.0001909216,
	"loss": 0.8605,
	"step": 2840
	},
	{
	"epoch": 0.04576,
	"grad_norm": 0.17274044454097748,
	"learning_rate": 0.00019085760000000002,
	"loss": 0.8529,
	"step": 2860
	},
	{
	"epoch": 0.04608,
	"grad_norm": 0.19794899225234985,
	"learning_rate": 0.00019079360000000002,
	"loss": 0.8778,
	"step": 2880
	},
	{
	"epoch": 0.0464,
	"grad_norm": 0.19638848304748535,
	"learning_rate": 0.0001907296,
	"loss": 0.848,
	"step": 2900
	},
	{
	"epoch": 0.04672,
	"grad_norm": 0.20513470470905304,
	"learning_rate": 0.0001906656,
	"loss": 0.8791,
	"step": 2920
	},
	{
	"epoch": 0.04704,
	"grad_norm": 0.18168902397155762,
	"learning_rate": 0.0001906016,
	"loss": 0.9258,
	"step": 2940
	},
	{
	"epoch": 0.04736,
	"grad_norm": 0.1906946301460266,
	"learning_rate": 0.0001905376,
	"loss": 0.9339,
	"step": 2960
	},
	{
	"epoch": 0.04768,
	"grad_norm": 0.20983171463012695,
	"learning_rate": 0.00019047360000000002,
	"loss": 0.9209,
	"step": 2980
	},
	{
	"epoch": 0.048,
	"grad_norm": 0.18700706958770752,
	"learning_rate": 0.00019040960000000001,
	"loss": 0.8569,
	"step": 3000
	},
	{
	"epoch": 0.04832,
	"grad_norm": 0.18951478600502014,
	"learning_rate": 0.0001903456,
	"loss": 0.9087,
	"step": 3020
	},
	{
	"epoch": 0.04864,
	"grad_norm": 0.18202978372573853,
	"learning_rate": 0.0001902816,
	"loss": 0.9239,
	"step": 3040
	},
	{
	"epoch": 0.04896,
	"grad_norm": 0.21562401950359344,
	"learning_rate": 0.00019021760000000003,
	"loss": 0.8721,
	"step": 3060
	},
	{
	"epoch": 0.04928,
	"grad_norm": 0.18537688255310059,
	"learning_rate": 0.0001901536,
	"loss": 0.8798,
	"step": 3080
	},
	{
	"epoch": 0.0496,
	"grad_norm": 0.1878584325313568,
	"learning_rate": 0.0001900896,
	"loss": 0.9315,
	"step": 3100
	},
	{
	"epoch": 0.04992,
	"grad_norm": 0.1872929185628891,
	"learning_rate": 0.0001900256,
	"loss": 0.9202,
	"step": 3120
	},
	{
	"epoch": 0.05024,
	"grad_norm": 0.1833094209432602,
	"learning_rate": 0.0001899616,
	"loss": 0.8837,
	"step": 3140
	},
	{
	"epoch": 0.05056,
	"grad_norm": 0.18516699969768524,
	"learning_rate": 0.0001898976,
	"loss": 0.892,
	"step": 3160
	},
	{
	"epoch": 0.05088,
	"grad_norm": 0.1559123992919922,
	"learning_rate": 0.0001898336,
	"loss": 0.8998,
	"step": 3180
	},
	{
	"epoch": 0.0512,
	"grad_norm": 0.17760765552520752,
	"learning_rate": 0.00018976960000000002,
	"loss": 0.9178,
	"step": 3200
	},
	{
	"epoch": 0.05152,
	"grad_norm": 0.1603628695011139,
	"learning_rate": 0.00018970560000000002,
	"loss": 0.8732,
	"step": 3220
	},
	{
	"epoch": 0.05184,
	"grad_norm": 0.17330580949783325,
	"learning_rate": 0.0001896416,
	"loss": 0.9528,
	"step": 3240
	},
	{
	"epoch": 0.05216,
	"grad_norm": 0.1774517297744751,
	"learning_rate": 0.0001895776,
	"loss": 0.9112,
	"step": 3260
	},
	{
	"epoch": 0.05248,
	"grad_norm": 0.19834113121032715,
	"learning_rate": 0.0001895136,
	"loss": 0.8799,
	"step": 3280
	},
	{
	"epoch": 0.0528,
	"grad_norm": 0.197114035487175,
	"learning_rate": 0.0001894496,
	"loss": 0.8898,
	"step": 3300
	},
	{
	"epoch": 0.05312,
	"grad_norm": 0.21631449460983276,
	"learning_rate": 0.00018938560000000002,
	"loss": 0.8889,
	"step": 3320
	},
	{
	"epoch": 0.05344,
	"grad_norm": 0.1554328352212906,
	"learning_rate": 0.00018932160000000001,
	"loss": 0.9055,
	"step": 3340
	},
	{
	"epoch": 0.05376,
	"grad_norm": 0.17191193997859955,
	"learning_rate": 0.0001892576,
	"loss": 0.8713,
	"step": 3360
	},
	{
	"epoch": 0.05408,
	"grad_norm": 0.18753254413604736,
	"learning_rate": 0.0001891936,
	"loss": 0.9223,
	"step": 3380
	},
	{
	"epoch": 0.0544,
	"grad_norm": 0.172084778547287,
	"learning_rate": 0.00018912960000000003,
	"loss": 0.931,
	"step": 3400
	},
	{
	"epoch": 0.05472,
	"grad_norm": 0.19548653066158295,
	"learning_rate": 0.0001890656,
	"loss": 0.8795,
	"step": 3420
	},
	{
	"epoch": 0.05504,
	"grad_norm": 0.19771696627140045,
	"learning_rate": 0.0001890016,
	"loss": 0.8904,
	"step": 3440
	},
	{
	"epoch": 0.05536,
	"grad_norm": 0.18042775988578796,
	"learning_rate": 0.0001889376,
	"loss": 0.8289,
	"step": 3460
	},
	{
	"epoch": 0.05568,
	"grad_norm": 0.20334866642951965,
	"learning_rate": 0.0001888736,
	"loss": 0.8988,
	"step": 3480
	},
	{
	"epoch": 0.056,
	"grad_norm": 0.2053702026605606,
	"learning_rate": 0.0001888096,
	"loss": 0.85,
	"step": 3500
	},
	{
	"epoch": 0.05632,
	"grad_norm": 0.18091996014118195,
	"learning_rate": 0.00018874560000000002,
	"loss": 0.8816,
	"step": 3520
	},
	{
	"epoch": 0.05664,
	"grad_norm": 0.1538042575120926,
	"learning_rate": 0.00018868160000000002,
	"loss": 0.8792,
	"step": 3540
	},
	{
	"epoch": 0.05696,
	"grad_norm": 0.21067845821380615,
	"learning_rate": 0.00018861760000000002,
	"loss": 0.9104,
	"step": 3560
	},
	{
	"epoch": 0.05728,
	"grad_norm": 0.17531852424144745,
	"learning_rate": 0.0001885536,
	"loss": 0.9073,
	"step": 3580
	},
	{
	"epoch": 0.0576,
	"grad_norm": 0.16701558232307434,
	"learning_rate": 0.0001884896,
	"loss": 0.8782,
	"step": 3600
	},
	{
	"epoch": 0.05792,
	"grad_norm": 0.20766527950763702,
	"learning_rate": 0.0001884256,
	"loss": 0.8751,
	"step": 3620
	},
	{
	"epoch": 0.05824,
	"grad_norm": 0.19526097178459167,
	"learning_rate": 0.0001883616,
	"loss": 0.87,
	"step": 3640
	},
	{
	"epoch": 0.05856,
	"grad_norm": 0.16312770545482635,
	"learning_rate": 0.00018829760000000002,
	"loss": 0.8904,
	"step": 3660
	},
	{
	"epoch": 0.05888,
	"grad_norm": 0.18951712548732758,
	"learning_rate": 0.0001882336,
	"loss": 0.9183,
	"step": 3680
	},
	{
	"epoch": 0.0592,
	"grad_norm": 0.1615159958600998,
	"learning_rate": 0.0001881696,
	"loss": 0.8602,
	"step": 3700
	},
	{
	"epoch": 0.05952,
	"grad_norm": 0.20840367674827576,
	"learning_rate": 0.0001881056,
	"loss": 0.9207,
	"step": 3720
	},
	{
	"epoch": 0.05984,
	"grad_norm": 0.19745437800884247,
	"learning_rate": 0.00018804160000000003,
	"loss": 0.8962,
	"step": 3740
	},
	{
	"epoch": 0.06016,
	"grad_norm": 0.1767299473285675,
	"learning_rate": 0.0001879776,
	"loss": 0.8357,
	"step": 3760
	},
	{
	"epoch": 0.06048,
	"grad_norm": 0.16729581356048584,
	"learning_rate": 0.0001879136,
	"loss": 0.8993,
	"step": 3780
	},
	{
	"epoch": 0.0608,
	"grad_norm": 0.1816299855709076,
	"learning_rate": 0.0001878496,
	"loss": 0.8775,
	"step": 3800
	},
	{
	"epoch": 0.06112,
	"grad_norm": 0.17500704526901245,
	"learning_rate": 0.0001877856,
	"loss": 0.8829,
	"step": 3820
	},
	{
	"epoch": 0.06144,
	"grad_norm": 0.1851237714290619,
	"learning_rate": 0.0001877216,
	"loss": 0.8478,
	"step": 3840
	},
	{
	"epoch": 0.06176,
	"grad_norm": 0.19113439321517944,
	"learning_rate": 0.00018765760000000002,
	"loss": 0.9024,
	"step": 3860
	},
	{
	"epoch": 0.06208,
	"grad_norm": 0.1793053150177002,
	"learning_rate": 0.00018759360000000002,
	"loss": 0.9191,
	"step": 3880
	},
	{
	"epoch": 0.0624,
	"grad_norm": 0.19696858525276184,
	"learning_rate": 0.00018752960000000001,
	"loss": 0.9023,
	"step": 3900
	},
	{
	"epoch": 0.06272,
	"grad_norm": 0.19326741993427277,
	"learning_rate": 0.0001874656,
	"loss": 0.8434,
	"step": 3920
	},
	{
	"epoch": 0.06304,
	"grad_norm": 0.1995677947998047,
	"learning_rate": 0.0001874016,
	"loss": 0.8569,
	"step": 3940
	},
	{
	"epoch": 0.06336,
	"grad_norm": 0.1579284369945526,
	"learning_rate": 0.0001873376,
	"loss": 0.8722,
	"step": 3960
	},
	{
	"epoch": 0.06368,
	"grad_norm": 0.20145860314369202,
	"learning_rate": 0.0001872736,
	"loss": 0.8586,
	"step": 3980
	},
	{
	"epoch": 0.064,
	"grad_norm": 0.16962005198001862,
	"learning_rate": 0.00018720960000000002,
	"loss": 0.8256,
	"step": 4000
	},
	{
	"epoch": 0.06432,
	"grad_norm": 0.14154337346553802,
	"learning_rate": 0.0001871456,
	"loss": 0.9169,
	"step": 4020
	},
	{
	"epoch": 0.06464,
	"grad_norm": 0.18831445276737213,
	"learning_rate": 0.0001870816,
	"loss": 0.8717,
	"step": 4040
	},
	{
	"epoch": 0.06496,
	"grad_norm": 0.2613060176372528,
	"learning_rate": 0.0001870176,
	"loss": 0.8946,
	"step": 4060
	},
	{
	"epoch": 0.06528,
	"grad_norm": 0.1657022088766098,
	"learning_rate": 0.00018695360000000003,
	"loss": 0.8721,
	"step": 4080
	},
	{
	"epoch": 0.0656,
	"grad_norm": 0.17723548412322998,
	"learning_rate": 0.0001868896,
	"loss": 0.8481,
	"step": 4100
	},
	{
	"epoch": 0.06592,
	"grad_norm": 0.1840563416481018,
	"learning_rate": 0.0001868256,
	"loss": 0.8963,
	"step": 4120
	},
	{
	"epoch": 0.06624,
	"grad_norm": 0.19427619874477386,
	"learning_rate": 0.0001867616,
	"loss": 0.8473,
	"step": 4140
	},
	{
	"epoch": 0.06656,
	"grad_norm": 0.20632588863372803,
	"learning_rate": 0.0001866976,
	"loss": 0.8962,
	"step": 4160
	},
	{
	"epoch": 0.06688,
	"grad_norm": 0.17780327796936035,
	"learning_rate": 0.0001866336,
	"loss": 0.9016,
	"step": 4180
	},
	{
	"epoch": 0.0672,
	"grad_norm": 0.17626479268074036,
	"learning_rate": 0.00018656960000000002,
	"loss": 0.8949,
	"step": 4200
	},
	{
	"epoch": 0.06752,
	"grad_norm": 0.19475996494293213,
	"learning_rate": 0.00018650560000000002,
	"loss": 0.9152,
	"step": 4220
	},
	{
	"epoch": 0.06784,
	"grad_norm": 0.2053624838590622,
	"learning_rate": 0.00018644160000000001,
	"loss": 0.9467,
	"step": 4240
	},
	{
	"epoch": 0.06816,
	"grad_norm": 0.17303887009620667,
	"learning_rate": 0.0001863776,
	"loss": 0.9104,
	"step": 4260
	},
	{
	"epoch": 0.06848,
	"grad_norm": 0.19969859719276428,
	"learning_rate": 0.0001863136,
	"loss": 0.8578,
	"step": 4280
	},
	{
	"epoch": 0.0688,
	"grad_norm": 0.23917217552661896,
	"learning_rate": 0.0001862496,
	"loss": 0.8999,
	"step": 4300
	},
	{
	"epoch": 0.06912,
	"grad_norm": 0.18194426596164703,
	"learning_rate": 0.0001861856,
	"loss": 0.9014,
	"step": 4320
	},
	{
	"epoch": 0.06944,
	"grad_norm": 0.21291664242744446,
	"learning_rate": 0.00018612160000000002,
	"loss": 0.9131,
	"step": 4340
	},
	{
	"epoch": 0.06976,
	"grad_norm": 0.18465067446231842,
	"learning_rate": 0.0001860576,
	"loss": 0.8859,
	"step": 4360
	},
	{
	"epoch": 0.07008,
	"grad_norm": 0.22093325853347778,
	"learning_rate": 0.0001859936,
	"loss": 0.9038,
	"step": 4380
	},
	{
	"epoch": 0.0704,
	"grad_norm": 0.1888457089662552,
	"learning_rate": 0.0001859296,
	"loss": 0.8468,
	"step": 4400
	},
	{
	"epoch": 0.07072,
	"grad_norm": 0.19705061614513397,
	"learning_rate": 0.00018586560000000003,
	"loss": 0.8871,
	"step": 4420
	},
	{
	"epoch": 0.07104,
	"grad_norm": 0.20150603353977203,
	"learning_rate": 0.0001858016,
	"loss": 0.8391,
	"step": 4440
	},
	{
	"epoch": 0.07136,
	"grad_norm": 0.21136346459388733,
	"learning_rate": 0.0001857376,
	"loss": 0.8528,
	"step": 4460
	},
	{
	"epoch": 0.07168,
	"grad_norm": 0.20985183119773865,
	"learning_rate": 0.0001856736,
	"loss": 0.9093,
	"step": 4480
	},
	{
	"epoch": 0.072,
	"grad_norm": 0.1725299060344696,
	"learning_rate": 0.0001856096,
	"loss": 0.849,
	"step": 4500
	},
	{
	"epoch": 0.07232,
	"grad_norm": 0.19184072315692902,
	"learning_rate": 0.0001855456,
	"loss": 0.8414,
	"step": 4520
	},
	{
	"epoch": 0.07264,
	"grad_norm": 0.1758476197719574,
	"learning_rate": 0.00018548160000000002,
	"loss": 0.9081,
	"step": 4540
	},
	{
	"epoch": 0.07296,
	"grad_norm": 0.1840459555387497,
	"learning_rate": 0.00018541760000000002,
	"loss": 0.9149,
	"step": 4560
	},
	{
	"epoch": 0.07328,
	"grad_norm": 0.1862034946680069,
	"learning_rate": 0.00018535360000000001,
	"loss": 0.8879,
	"step": 4580
	},
	{
	"epoch": 0.0736,
	"grad_norm": 0.21543624997138977,
	"learning_rate": 0.0001852896,
	"loss": 0.8753,
	"step": 4600
	},
	{
	"epoch": 0.07392,
	"grad_norm": 0.18351414799690247,
	"learning_rate": 0.0001852256,
	"loss": 0.8977,
	"step": 4620
	},
	{
	"epoch": 0.07424,
	"grad_norm": 0.2166828215122223,
	"learning_rate": 0.0001851616,
	"loss": 0.8669,
	"step": 4640
	},
	{
	"epoch": 0.07456,
	"grad_norm": 0.19744159281253815,
	"learning_rate": 0.0001850976,
	"loss": 0.8846,
	"step": 4660
	},
	{
	"epoch": 0.07488,
	"grad_norm": 0.19065077602863312,
	"learning_rate": 0.00018503360000000002,
	"loss": 0.8715,
	"step": 4680
	},
	{
	"epoch": 0.0752,
	"grad_norm": 0.17913594841957092,
	"learning_rate": 0.0001849696,
	"loss": 0.8777,
	"step": 4700
	},
	{
	"epoch": 0.07552,
	"grad_norm": 0.2282969057559967,
	"learning_rate": 0.0001849056,
	"loss": 0.8598,
	"step": 4720
	},
	{
	"epoch": 0.07584,
	"grad_norm": 0.2031577080488205,
	"learning_rate": 0.0001848416,
	"loss": 0.928,
	"step": 4740
	},
	{
	"epoch": 0.07616,
	"grad_norm": 0.24187202751636505,
	"learning_rate": 0.00018477760000000002,
	"loss": 0.9169,
	"step": 4760
	},
	{
	"epoch": 0.07648,
	"grad_norm": 0.2227555513381958,
	"learning_rate": 0.0001847136,
	"loss": 0.914,
	"step": 4780
	},
	{
	"epoch": 0.0768,
	"grad_norm": 0.2157488912343979,
	"learning_rate": 0.0001846496,
	"loss": 0.8697,
	"step": 4800
	},
	{
	"epoch": 0.07712,
	"grad_norm": 0.19421465694904327,
	"learning_rate": 0.0001845856,
	"loss": 0.9358,
	"step": 4820
	},
	{
	"epoch": 0.07744,
	"grad_norm": 0.2111523300409317,
	"learning_rate": 0.0001845216,
	"loss": 0.8708,
	"step": 4840
	},
	{
	"epoch": 0.07776,
	"grad_norm": 0.23789940774440765,
	"learning_rate": 0.0001844576,
	"loss": 0.9036,
	"step": 4860
	},
	{
	"epoch": 0.07808,
	"grad_norm": 0.19063900411128998,
	"learning_rate": 0.00018439360000000002,
	"loss": 0.8825,
	"step": 4880
	},
	{
	"epoch": 0.0784,
	"grad_norm": 0.18922486901283264,
	"learning_rate": 0.00018432960000000002,
	"loss": 0.9094,
	"step": 4900
	},
	{
	"epoch": 0.07872,
	"grad_norm": 0.19124048948287964,
	"learning_rate": 0.0001842656,
	"loss": 0.9422,
	"step": 4920
	},
	{
	"epoch": 0.07904,
	"grad_norm": 0.19916868209838867,
	"learning_rate": 0.0001842016,
	"loss": 0.9341,
	"step": 4940
	},
	{
	"epoch": 0.07936,
	"grad_norm": 0.19486361742019653,
	"learning_rate": 0.0001841376,
	"loss": 0.8836,
	"step": 4960
	},
	{
	"epoch": 0.07968,
	"grad_norm": 0.20217594504356384,
	"learning_rate": 0.0001840736,
	"loss": 0.8485,
	"step": 4980
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.18520930409431458,
	"learning_rate": 0.0001840096,
	"loss": 0.8887,
	"step": 5000
	},
	{
	"epoch": 0.08032,
	"grad_norm": 0.1816449910402298,
	"learning_rate": 0.00018394560000000002,
	"loss": 0.8668,
	"step": 5020
	},
	{
	"epoch": 0.08064,
	"grad_norm": 0.21598085761070251,
	"learning_rate": 0.0001838816,
	"loss": 0.947,
	"step": 5040
	},
	{
	"epoch": 0.08096,
	"grad_norm": 0.21336813271045685,
	"learning_rate": 0.0001838176,
	"loss": 0.928,
	"step": 5060
	},
	{
	"epoch": 0.08128,
	"grad_norm": 0.18636910617351532,
	"learning_rate": 0.0001837536,
	"loss": 0.858,
	"step": 5080
	},
	{
	"epoch": 0.0816,
	"grad_norm": 0.20049895346164703,
	"learning_rate": 0.00018368960000000002,
	"loss": 0.8937,
	"step": 5100
	},
	{
	"epoch": 0.08192,
	"grad_norm": 0.2153417468070984,
	"learning_rate": 0.00018362560000000002,
	"loss": 0.9052,
	"step": 5120
	},
	{
	"epoch": 0.08224,
	"grad_norm": 0.2149072140455246,
	"learning_rate": 0.0001835616,
	"loss": 0.8961,
	"step": 5140
	},
	{
	"epoch": 0.08256,
	"grad_norm": 0.19339273869991302,
	"learning_rate": 0.0001834976,
	"loss": 0.9128,
	"step": 5160
	},
	{
	"epoch": 0.08288,
	"grad_norm": 0.23768258094787598,
	"learning_rate": 0.0001834336,
	"loss": 0.877,
	"step": 5180
	},
	{
	"epoch": 0.0832,
	"grad_norm": 0.20677222311496735,
	"learning_rate": 0.0001833696,
	"loss": 0.8989,
	"step": 5200
	},
	{
	"epoch": 0.08352,
	"grad_norm": 0.2008122056722641,
	"learning_rate": 0.00018330560000000002,
	"loss": 0.8911,
	"step": 5220
	},
	{
	"epoch": 0.08384,
	"grad_norm": 0.1981019526720047,
	"learning_rate": 0.00018324160000000002,
	"loss": 0.9085,
	"step": 5240
	},
	{
	"epoch": 0.08416,
	"grad_norm": 0.22739489376544952,
	"learning_rate": 0.0001831776,
	"loss": 0.8804,
	"step": 5260
	},
	{
	"epoch": 0.08448,
	"grad_norm": 0.2044532150030136,
	"learning_rate": 0.0001831136,
	"loss": 0.8438,
	"step": 5280
	},
	{
	"epoch": 0.0848,
	"grad_norm": 0.23086583614349365,
	"learning_rate": 0.0001830496,
	"loss": 0.8904,
	"step": 5300
	},
	{
	"epoch": 0.08512,
	"grad_norm": 0.1737246811389923,
	"learning_rate": 0.0001829856,
	"loss": 0.8399,
	"step": 5320
	},
	{
	"epoch": 0.08544,
	"grad_norm": 0.19789084792137146,
	"learning_rate": 0.0001829216,
	"loss": 0.8928,
	"step": 5340
	},
	{
	"epoch": 0.08576,
	"grad_norm": 0.19274166226387024,
	"learning_rate": 0.00018285760000000002,
	"loss": 0.9071,
	"step": 5360
	},
	{
	"epoch": 0.08608,
	"grad_norm": 0.18289533257484436,
	"learning_rate": 0.0001827936,
	"loss": 0.885,
	"step": 5380
	},
	{
	"epoch": 0.0864,
	"grad_norm": 0.20274992287158966,
	"learning_rate": 0.0001827296,
	"loss": 0.8716,
	"step": 5400
	},
	{
	"epoch": 0.08672,
	"grad_norm": 0.20618405938148499,
	"learning_rate": 0.0001826656,
	"loss": 0.9022,
	"step": 5420
	},
	{
	"epoch": 0.08704,
	"grad_norm": 0.18017026782035828,
	"learning_rate": 0.00018260160000000002,
	"loss": 0.8997,
	"step": 5440
	},
	{
	"epoch": 0.08736,
	"grad_norm": 0.17250943183898926,
	"learning_rate": 0.00018253760000000002,
	"loss": 0.8778,
	"step": 5460
	},
	{
	"epoch": 0.08768,
	"grad_norm": 0.21039535105228424,
	"learning_rate": 0.0001824736,
	"loss": 0.8629,
	"step": 5480
	},
	{
	"epoch": 0.088,
	"grad_norm": 0.1946125328540802,
	"learning_rate": 0.0001824096,
	"loss": 0.9527,
	"step": 5500
	},
	{
	"epoch": 0.08832,
	"grad_norm": 0.20565049350261688,
	"learning_rate": 0.0001823456,
	"loss": 0.8627,
	"step": 5520
	},
	{
	"epoch": 0.08864,
	"grad_norm": 0.16778771579265594,
	"learning_rate": 0.0001822816,
	"loss": 0.879,
	"step": 5540
	},
	{
	"epoch": 0.08896,
	"grad_norm": 0.1957644522190094,
	"learning_rate": 0.00018221760000000002,
	"loss": 0.9253,
	"step": 5560
	},
	{
	"epoch": 0.08928,
	"grad_norm": 0.20745377242565155,
	"learning_rate": 0.00018215360000000002,
	"loss": 0.9006,
	"step": 5580
	},
	{
	"epoch": 0.0896,
	"grad_norm": 0.19847019016742706,
	"learning_rate": 0.0001820896,
	"loss": 0.9176,
	"step": 5600
	},
	{
	"epoch": 0.08992,
	"grad_norm": 0.22231200337409973,
	"learning_rate": 0.0001820256,
	"loss": 0.9174,
	"step": 5620
	},
	{
	"epoch": 0.09024,
	"grad_norm": 0.21002036333084106,
	"learning_rate": 0.0001819616,
	"loss": 0.8773,
	"step": 5640
	},
	{
	"epoch": 0.09056,
	"grad_norm": 0.18204717338085175,
	"learning_rate": 0.0001818976,
	"loss": 0.9038,
	"step": 5660
	},
	{
	"epoch": 0.09088,
	"grad_norm": 0.21081459522247314,
	"learning_rate": 0.0001818336,
	"loss": 0.8409,
	"step": 5680
	},
	{
	"epoch": 0.0912,
	"grad_norm": 0.1905379593372345,
	"learning_rate": 0.00018176960000000002,
	"loss": 0.9125,
	"step": 5700
	},
	{
	"epoch": 0.09152,
	"grad_norm": 0.17761899530887604,
	"learning_rate": 0.0001817056,
	"loss": 0.8617,
	"step": 5720
	},
	{
	"epoch": 0.09184,
	"grad_norm": 0.20881423354148865,
	"learning_rate": 0.0001816416,
	"loss": 0.8769,
	"step": 5740
	},
	{
	"epoch": 0.09216,
	"grad_norm": 0.22868691384792328,
	"learning_rate": 0.0001815776,
	"loss": 0.8426,
	"step": 5760
	},
	{
	"epoch": 0.09248,
	"grad_norm": 0.2537609040737152,
	"learning_rate": 0.00018151360000000002,
	"loss": 0.9347,
	"step": 5780
	},
	{
	"epoch": 0.0928,
	"grad_norm": 0.2280977964401245,
	"learning_rate": 0.00018144960000000002,
	"loss": 0.89,
	"step": 5800
	},
	{
	"epoch": 0.09312,
	"grad_norm": 0.22828595340251923,
	"learning_rate": 0.0001813856,
	"loss": 0.8818,
	"step": 5820
	},
	{
	"epoch": 0.09344,
	"grad_norm": 0.19653092324733734,
	"learning_rate": 0.0001813216,
	"loss": 0.8944,
	"step": 5840
	},
	{
	"epoch": 0.09376,
	"grad_norm": 0.2112797498703003,
	"learning_rate": 0.0001812576,
	"loss": 0.8945,
	"step": 5860
	},
	{
	"epoch": 0.09408,
	"grad_norm": 0.21034376323223114,
	"learning_rate": 0.0001811936,
	"loss": 0.877,
	"step": 5880
	},
	{
	"epoch": 0.0944,
	"grad_norm": 0.20544138550758362,
	"learning_rate": 0.00018112960000000002,
	"loss": 0.8955,
	"step": 5900
	},
	{
	"epoch": 0.09472,
	"grad_norm": 0.18214848637580872,
	"learning_rate": 0.00018106560000000002,
	"loss": 0.8538,
	"step": 5920
	},
	{
	"epoch": 0.09504,
	"grad_norm": 0.19273880124092102,
	"learning_rate": 0.0001810016,
	"loss": 0.9267,
	"step": 5940
	},
	{
	"epoch": 0.09536,
	"grad_norm": 0.16388094425201416,
	"learning_rate": 0.0001809376,
	"loss": 0.8903,
	"step": 5960
	},
	{
	"epoch": 0.09568,
	"grad_norm": 0.19152410328388214,
	"learning_rate": 0.0001808736,
	"loss": 0.8994,
	"step": 5980
	},
	{
	"epoch": 0.096,
	"grad_norm": 0.20129649341106415,
	"learning_rate": 0.0001808096,
	"loss": 0.9065,
	"step": 6000
	},
	{
	"epoch": 0.09632,
	"grad_norm": 0.2275884598493576,
	"learning_rate": 0.0001807456,
	"loss": 0.8745,
	"step": 6020
	},
	{
	"epoch": 0.09664,
	"grad_norm": 0.1939428150653839,
	"learning_rate": 0.00018068160000000002,
	"loss": 0.9147,
	"step": 6040
	},
	{
	"epoch": 0.09696,
	"grad_norm": 0.21504884958267212,
	"learning_rate": 0.0001806176,
	"loss": 0.8575,
	"step": 6060
	},
	{
	"epoch": 0.09728,
	"grad_norm": 0.21252253651618958,
	"learning_rate": 0.0001805536,
	"loss": 0.8554,
	"step": 6080
	},
	{
	"epoch": 0.0976,
	"grad_norm": 0.213465616106987,
	"learning_rate": 0.0001804896,
	"loss": 0.9016,
	"step": 6100
	},
	{
	"epoch": 0.09792,
	"grad_norm": 0.19815479218959808,
	"learning_rate": 0.00018042560000000002,
	"loss": 0.9675,
	"step": 6120
	},
	{
	"epoch": 0.09824,
	"grad_norm": 0.19477008283138275,
	"learning_rate": 0.00018036160000000002,
	"loss": 0.9025,
	"step": 6140
	},
	{
	"epoch": 0.09856,
	"grad_norm": 0.20203906297683716,
	"learning_rate": 0.0001802976,
	"loss": 0.8952,
	"step": 6160
	},
	{
	"epoch": 0.09888,
	"grad_norm": 0.2099459171295166,
	"learning_rate": 0.0001802336,
	"loss": 0.9044,
	"step": 6180
	},
	{
	"epoch": 0.0992,
	"grad_norm": 0.2077176868915558,
	"learning_rate": 0.0001801696,
	"loss": 0.8826,
	"step": 6200
	},
	{
	"epoch": 0.09952,
	"grad_norm": 0.18981848657131195,
	"learning_rate": 0.0001801056,
	"loss": 0.8455,
	"step": 6220
	},
	{
	"epoch": 0.09984,
	"grad_norm": 0.20933973789215088,
	"learning_rate": 0.00018004160000000002,
	"loss": 0.902,
	"step": 6240
	},
	{
	"epoch": 0.10016,
	"grad_norm": 0.20591773092746735,
	"learning_rate": 0.00017997760000000002,
	"loss": 0.8667,
	"step": 6260
	},
	{
	"epoch": 0.10048,
	"grad_norm": 0.258956640958786,
	"learning_rate": 0.0001799136,
	"loss": 0.8949,
	"step": 6280
	},
	{
	"epoch": 0.1008,
	"grad_norm": 0.19157810509204865,
	"learning_rate": 0.0001798496,
	"loss": 0.8713,
	"step": 6300
	},
	{
	"epoch": 0.10112,
	"grad_norm": 0.21302878856658936,
	"learning_rate": 0.0001797856,
	"loss": 0.8584,
	"step": 6320
	},
	{
	"epoch": 0.10144,
	"grad_norm": 0.1915074735879898,
	"learning_rate": 0.0001797216,
	"loss": 0.9583,
	"step": 6340
	},
	{
	"epoch": 0.10176,
	"grad_norm": 0.22054611146450043,
	"learning_rate": 0.0001796576,
	"loss": 0.9125,
	"step": 6360
	},
	{
	"epoch": 0.10208,
	"grad_norm": 0.22295401990413666,
	"learning_rate": 0.00017959360000000001,
	"loss": 0.8893,
	"step": 6380
	},
	{
	"epoch": 0.1024,
	"grad_norm": 0.19963820278644562,
	"learning_rate": 0.0001795296,
	"loss": 0.8944,
	"step": 6400
	},
	{
	"epoch": 0.10272,
	"grad_norm": 0.17585329711437225,
	"learning_rate": 0.0001794656,
	"loss": 0.869,
	"step": 6420
	},
	{
	"epoch": 0.10304,
	"grad_norm": 0.20457583665847778,
	"learning_rate": 0.00017940160000000003,
	"loss": 0.8894,
	"step": 6440
	},
	{
	"epoch": 0.10336,
	"grad_norm": 0.2085409164428711,
	"learning_rate": 0.00017933760000000002,
	"loss": 0.9218,
	"step": 6460
	},
	{
	"epoch": 0.10368,
	"grad_norm": 0.14747366309165955,
	"learning_rate": 0.00017927360000000002,
	"loss": 0.8441,
	"step": 6480
	},
	{
	"epoch": 0.104,
	"grad_norm": 0.24237246811389923,
	"learning_rate": 0.0001792096,
	"loss": 0.9292,
	"step": 6500
	},
	{
	"epoch": 0.10432,
	"grad_norm": 0.2079431265592575,
	"learning_rate": 0.0001791456,
	"loss": 0.8364,
	"step": 6520
	},
	{
	"epoch": 0.10464,
	"grad_norm": 0.2067815363407135,
	"learning_rate": 0.0001790816,
	"loss": 0.9069,
	"step": 6540
	},
	{
	"epoch": 0.10496,
	"grad_norm": 0.18671968579292297,
	"learning_rate": 0.0001790176,
	"loss": 0.8582,
	"step": 6560
	},
	{
	"epoch": 0.10528,
	"grad_norm": 0.18874432146549225,
	"learning_rate": 0.00017895360000000002,
	"loss": 0.8791,
	"step": 6580
	},
	{
	"epoch": 0.1056,
	"grad_norm": 0.22563117742538452,
	"learning_rate": 0.00017888960000000002,
	"loss": 0.8395,
	"step": 6600
	},
	{
	"epoch": 0.10592,
	"grad_norm": 0.19527731835842133,
	"learning_rate": 0.0001788256,
	"loss": 0.8675,
	"step": 6620
	},
	{
	"epoch": 0.10624,
	"grad_norm": 0.21411758661270142,
	"learning_rate": 0.0001787616,
	"loss": 0.9045,
	"step": 6640
	},
	{
	"epoch": 0.10656,
	"grad_norm": 0.2257653772830963,
	"learning_rate": 0.0001786976,
	"loss": 0.9009,
	"step": 6660
	},
	{
	"epoch": 0.10688,
	"grad_norm": 0.18150146305561066,
	"learning_rate": 0.0001786336,
	"loss": 0.9246,
	"step": 6680
	},
	{
	"epoch": 0.1072,
	"grad_norm": 0.1973322033882141,
	"learning_rate": 0.0001785696,
	"loss": 0.9191,
	"step": 6700
	},
	{
	"epoch": 0.10752,
	"grad_norm": 0.19496308267116547,
	"learning_rate": 0.00017850560000000001,
	"loss": 0.8449,
	"step": 6720
	},
	{
	"epoch": 0.10784,
	"grad_norm": 0.19810955226421356,
	"learning_rate": 0.0001784416,
	"loss": 0.8846,
	"step": 6740
	},
	{
	"epoch": 0.10816,
	"grad_norm": 0.24701924622058868,
	"learning_rate": 0.0001783776,
	"loss": 0.8716,
	"step": 6760
	},
	{
	"epoch": 0.10848,
	"grad_norm": 0.22664742171764374,
	"learning_rate": 0.00017831360000000003,
	"loss": 0.884,
	"step": 6780
	},
	{
	"epoch": 0.1088,
	"grad_norm": 0.228456512093544,
	"learning_rate": 0.00017824960000000002,
	"loss": 0.8975,
	"step": 6800
	},
	{
	"epoch": 0.10912,
	"grad_norm": 0.21849101781845093,
	"learning_rate": 0.00017818560000000002,
	"loss": 0.9255,
	"step": 6820
	},
	{
	"epoch": 0.10944,
	"grad_norm": 0.2064104974269867,
	"learning_rate": 0.0001781216,
	"loss": 0.8829,
	"step": 6840
	},
	{
	"epoch": 0.10976,
	"grad_norm": 0.22377945482730865,
	"learning_rate": 0.0001780576,
	"loss": 0.8715,
	"step": 6860
	},
	{
	"epoch": 0.11008,
	"grad_norm": 0.202182337641716,
	"learning_rate": 0.0001779936,
	"loss": 0.9154,
	"step": 6880
	},
	{
	"epoch": 0.1104,
	"grad_norm": 0.15783466398715973,
	"learning_rate": 0.0001779296,
	"loss": 0.9463,
	"step": 6900
	},
	{
	"epoch": 0.11072,
	"grad_norm": 0.2259039580821991,
	"learning_rate": 0.00017786560000000002,
	"loss": 0.8754,
	"step": 6920
	},
	{
	"epoch": 0.11104,
	"grad_norm": 0.23525789380073547,
	"learning_rate": 0.00017780160000000002,
	"loss": 0.8665,
	"step": 6940
	},
	{
	"epoch": 0.11136,
	"grad_norm": 0.2006695419549942,
	"learning_rate": 0.0001777376,
	"loss": 0.8832,
	"step": 6960
	},
	{
	"epoch": 0.11168,
	"grad_norm": 0.2209470272064209,
	"learning_rate": 0.0001776736,
	"loss": 0.8867,
	"step": 6980
	},
	{
	"epoch": 0.112,
	"grad_norm": 0.22054742276668549,
	"learning_rate": 0.0001776096,
	"loss": 0.8876,
	"step": 7000
	},
	{
	"epoch": 0.11232,
	"grad_norm": 0.24601756036281586,
	"learning_rate": 0.0001775456,
	"loss": 0.8667,
	"step": 7020
	},
	{
	"epoch": 0.11264,
	"grad_norm": 0.20692676305770874,
	"learning_rate": 0.0001774816,
	"loss": 0.8659,
	"step": 7040
	},
	{
	"epoch": 0.11296,
	"grad_norm": 0.18839353322982788,
	"learning_rate": 0.00017741760000000001,
	"loss": 0.8503,
	"step": 7060
	},
	{
	"epoch": 0.11328,
	"grad_norm": 0.2029074728488922,
	"learning_rate": 0.0001773536,
	"loss": 0.8643,
	"step": 7080
	},
	{
	"epoch": 0.1136,
	"grad_norm": 0.22685612738132477,
	"learning_rate": 0.0001772896,
	"loss": 0.9198,
	"step": 7100
	},
	{
	"epoch": 0.11392,
	"grad_norm": 0.22184133529663086,
	"learning_rate": 0.00017722560000000003,
	"loss": 0.8725,
	"step": 7120
	},
	{
	"epoch": 0.11424,
	"grad_norm": 0.19977827370166779,
	"learning_rate": 0.00017716160000000002,
	"loss": 0.8217,
	"step": 7140
	},
	{
	"epoch": 0.11456,
	"grad_norm": 0.22433121502399445,
	"learning_rate": 0.00017709760000000002,
	"loss": 0.9014,
	"step": 7160
	},
	{
	"epoch": 0.11488,
	"grad_norm": 0.2040790170431137,
	"learning_rate": 0.0001770336,
	"loss": 0.9144,
	"step": 7180
	},
	{
	"epoch": 0.1152,
	"grad_norm": 0.22500857710838318,
	"learning_rate": 0.0001769696,
	"loss": 0.8332,
	"step": 7200
	},
	{
	"epoch": 0.11552,
	"grad_norm": 0.2294531762599945,
	"learning_rate": 0.0001769056,
	"loss": 0.9003,
	"step": 7220
	},
	{
	"epoch": 0.11584,
	"grad_norm": 0.2060810774564743,
	"learning_rate": 0.0001768416,
	"loss": 0.9065,
	"step": 7240
	},
	{
	"epoch": 0.11616,
	"grad_norm": 0.21327152848243713,
	"learning_rate": 0.00017677760000000002,
	"loss": 0.9172,
	"step": 7260
	},
	{
	"epoch": 0.11648,
	"grad_norm": 0.2296830266714096,
	"learning_rate": 0.00017671360000000002,
	"loss": 0.9246,
	"step": 7280
	},
	{
	"epoch": 0.1168,
	"grad_norm": 0.18748362362384796,
	"learning_rate": 0.0001766496,
	"loss": 0.8971,
	"step": 7300
	},
	{
	"epoch": 0.11712,
	"grad_norm": 0.1924070417881012,
	"learning_rate": 0.0001765856,
	"loss": 0.8685,
	"step": 7320
	},
	{
	"epoch": 0.11744,
	"grad_norm": 0.2428852766752243,
	"learning_rate": 0.0001765216,
	"loss": 0.9398,
	"step": 7340
	},
	{
	"epoch": 0.11776,
	"grad_norm": 0.24050328135490417,
	"learning_rate": 0.0001764576,
	"loss": 0.8048,
	"step": 7360
	},
	{
	"epoch": 0.11808,
	"grad_norm": 0.2360570877790451,
	"learning_rate": 0.0001763936,
	"loss": 0.8465,
	"step": 7380
	},
	{
	"epoch": 0.1184,
	"grad_norm": 0.21176236867904663,
	"learning_rate": 0.0001763296,
	"loss": 0.8985,
	"step": 7400
	},
	{
	"epoch": 0.11872,
	"grad_norm": 0.20678134262561798,
	"learning_rate": 0.0001762656,
	"loss": 0.8958,
	"step": 7420
	},
	{
	"epoch": 0.11904,
	"grad_norm": 0.28033092617988586,
	"learning_rate": 0.0001762016,
	"loss": 0.8752,
	"step": 7440
	},
	{
	"epoch": 0.11936,
	"grad_norm": 0.1989385336637497,
	"learning_rate": 0.00017613760000000003,
	"loss": 0.9008,
	"step": 7460
	},
	{
	"epoch": 0.11968,
	"grad_norm": 0.22315728664398193,
	"learning_rate": 0.00017607360000000002,
	"loss": 0.8795,
	"step": 7480
	},
	{
	"epoch": 0.12,
	"grad_norm": 0.2524365186691284,
	"learning_rate": 0.00017600960000000002,
	"loss": 0.9486,
	"step": 7500
	},
	{
	"epoch": 0.12032,
	"grad_norm": 0.25160396099090576,
	"learning_rate": 0.0001759456,
	"loss": 0.9099,
	"step": 7520
	},
	{
	"epoch": 0.12064,
	"grad_norm": 0.22552479803562164,
	"learning_rate": 0.0001758816,
	"loss": 0.8352,
	"step": 7540
	},
	{
	"epoch": 0.12096,
	"grad_norm": 0.17683327198028564,
	"learning_rate": 0.0001758176,
	"loss": 0.8771,
	"step": 7560
	},
	{
	"epoch": 0.12128,
	"grad_norm": 0.21366801857948303,
	"learning_rate": 0.0001757536,
	"loss": 0.9409,
	"step": 7580
	},
	{
	"epoch": 0.1216,
	"grad_norm": 0.19283446669578552,
	"learning_rate": 0.00017568960000000002,
	"loss": 0.9305,
	"step": 7600
	},
	{
	"epoch": 0.12192,
	"grad_norm": 0.22334997355937958,
	"learning_rate": 0.00017562560000000001,
	"loss": 0.8974,
	"step": 7620
	},
	{
	"epoch": 0.12224,
	"grad_norm": 0.252670019865036,
	"learning_rate": 0.0001755616,
	"loss": 0.8787,
	"step": 7640
	},
	{
	"epoch": 0.12256,
	"grad_norm": 0.2769858241081238,
	"learning_rate": 0.0001754976,
	"loss": 0.898,
	"step": 7660
	},
	{
	"epoch": 0.12288,
	"grad_norm": 0.1979377120733261,
	"learning_rate": 0.0001754336,
	"loss": 0.8994,
	"step": 7680
	},
	{
	"epoch": 0.1232,
	"grad_norm": 0.2033649981021881,
	"learning_rate": 0.0001753696,
	"loss": 0.8465,
	"step": 7700
	},
	{
	"epoch": 0.12352,
	"grad_norm": 0.19611379504203796,
	"learning_rate": 0.0001753056,
	"loss": 0.9224,
	"step": 7720
	},
	{
	"epoch": 0.12384,
	"grad_norm": 0.33501213788986206,
	"learning_rate": 0.0001752416,
	"loss": 0.9225,
	"step": 7740
	},
	{
	"epoch": 0.12416,
	"grad_norm": 0.17307236790657043,
	"learning_rate": 0.0001751776,
	"loss": 0.9069,
	"step": 7760
	},
	{
	"epoch": 0.12448,
	"grad_norm": 0.21077322959899902,
	"learning_rate": 0.0001751136,
	"loss": 0.9084,
	"step": 7780
	},
	{
	"epoch": 0.1248,
	"grad_norm": 0.2217060923576355,
	"learning_rate": 0.00017504960000000003,
	"loss": 0.8567,
	"step": 7800
	},
	{
	"epoch": 0.12512,
	"grad_norm": 0.2257986068725586,
	"learning_rate": 0.00017498560000000002,
	"loss": 0.8508,
	"step": 7820
	},
	{
	"epoch": 0.12544,
	"grad_norm": 0.2513684332370758,
	"learning_rate": 0.00017492160000000002,
	"loss": 0.8808,
	"step": 7840
	},
	{
	"epoch": 0.12576,
	"grad_norm": 0.3284933865070343,
	"learning_rate": 0.0001748576,
	"loss": 0.8912,
	"step": 7860
	},
	{
	"epoch": 0.12608,
	"grad_norm": 0.20665164291858673,
	"learning_rate": 0.0001747936,
	"loss": 0.8869,
	"step": 7880
	},
	{
	"epoch": 0.1264,
	"grad_norm": 0.2463517189025879,
	"learning_rate": 0.0001747296,
	"loss": 0.9119,
	"step": 7900
	},
	{
	"epoch": 0.12672,
	"grad_norm": 0.19471873342990875,
	"learning_rate": 0.0001746656,
	"loss": 0.898,
	"step": 7920
	},
	{
	"epoch": 0.12704,
	"grad_norm": 0.2780425250530243,
	"learning_rate": 0.00017460160000000002,
	"loss": 0.9174,
	"step": 7940
	},
	{
	"epoch": 0.12736,
	"grad_norm": 0.22313277423381805,
	"learning_rate": 0.00017453760000000001,
	"loss": 0.9054,
	"step": 7960
	},
	{
	"epoch": 0.12768,
	"grad_norm": 0.22709155082702637,
	"learning_rate": 0.0001744736,
	"loss": 0.887,
	"step": 7980
	},
	{
	"epoch": 0.128,
	"grad_norm": 0.22096025943756104,
	"learning_rate": 0.0001744096,
	"loss": 0.8977,
	"step": 8000
	},
	{
	"epoch": 0.12832,
	"grad_norm": 0.2423054575920105,
	"learning_rate": 0.0001743456,
	"loss": 0.9106,
	"step": 8020
	},
	{
	"epoch": 0.12864,
	"grad_norm": 0.20658574998378754,
	"learning_rate": 0.0001742816,
	"loss": 0.8476,
	"step": 8040
	},
	{
	"epoch": 0.12896,
	"grad_norm": 0.22077764570713043,
	"learning_rate": 0.0001742176,
	"loss": 0.911,
	"step": 8060
	},
	{
	"epoch": 0.12928,
	"grad_norm": 0.22980265319347382,
	"learning_rate": 0.0001741536,
	"loss": 0.9451,
	"step": 8080
	},
	{
	"epoch": 0.1296,
	"grad_norm": 0.25283125042915344,
	"learning_rate": 0.0001740896,
	"loss": 0.8582,
	"step": 8100
	},
	{
	"epoch": 0.12992,
	"grad_norm": 0.22836875915527344,
	"learning_rate": 0.0001740256,
	"loss": 0.8644,
	"step": 8120
	},
	{
	"epoch": 0.13024,
	"grad_norm": 0.20451593399047852,
	"learning_rate": 0.00017396160000000003,
	"loss": 0.9361,
	"step": 8140
	},
	{
	"epoch": 0.13056,
	"grad_norm": 0.20466330647468567,
	"learning_rate": 0.00017389760000000002,
	"loss": 0.9134,
	"step": 8160
	},
	{
	"epoch": 0.13088,
	"grad_norm": 0.20562607049942017,
	"learning_rate": 0.00017383360000000002,
	"loss": 0.9157,
	"step": 8180
	},
	{
	"epoch": 0.1312,
	"grad_norm": 0.23010079562664032,
	"learning_rate": 0.0001737696,
	"loss": 0.8571,
	"step": 8200
	},
	{
	"epoch": 0.13152,
	"grad_norm": 0.2761363387107849,
	"learning_rate": 0.0001737056,
	"loss": 0.863,
	"step": 8220
	},
	{
	"epoch": 0.13184,
	"grad_norm": 0.19927144050598145,
	"learning_rate": 0.0001736416,
	"loss": 0.9056,
	"step": 8240
	},
	{
	"epoch": 0.13216,
	"grad_norm": 0.21809734404087067,
	"learning_rate": 0.0001735776,
	"loss": 0.8547,
	"step": 8260
	},
	{
	"epoch": 0.13248,
	"grad_norm": 0.2040037214756012,
	"learning_rate": 0.00017351360000000002,
	"loss": 0.8567,
	"step": 8280
	},
	{
	"epoch": 0.1328,
	"grad_norm": 0.19414140284061432,
	"learning_rate": 0.00017344960000000001,
	"loss": 0.8773,
	"step": 8300
	},
	{
	"epoch": 0.13312,
	"grad_norm": 0.17483866214752197,
	"learning_rate": 0.0001733856,
	"loss": 0.9026,
	"step": 8320
	},
	{
	"epoch": 0.13344,
	"grad_norm": 0.2505808472633362,
	"learning_rate": 0.0001733216,
	"loss": 0.8348,
	"step": 8340
	},
	{
	"epoch": 0.13376,
	"grad_norm": 0.2515566051006317,
	"learning_rate": 0.0001732576,
	"loss": 0.8657,
	"step": 8360
	},
	{
	"epoch": 0.13408,
	"grad_norm": 0.2105536013841629,
	"learning_rate": 0.0001731936,
	"loss": 0.8864,
	"step": 8380
	},
	{
	"epoch": 0.1344,
	"grad_norm": 0.22910176217556,
	"learning_rate": 0.0001731296,
	"loss": 0.8379,
	"step": 8400
	},
	{
	"epoch": 0.13472,
	"grad_norm": 0.20737454295158386,
	"learning_rate": 0.0001730656,
	"loss": 0.8684,
	"step": 8420
	},
	{
	"epoch": 0.13504,
	"grad_norm": 0.22466444969177246,
	"learning_rate": 0.0001730016,
	"loss": 0.9522,
	"step": 8440
	},
	{
	"epoch": 0.13536,
	"grad_norm": 0.19258467853069305,
	"learning_rate": 0.0001729376,
	"loss": 0.8525,
	"step": 8460
	},
	{
	"epoch": 0.13568,
	"grad_norm": 0.2092629224061966,
	"learning_rate": 0.00017287360000000002,
	"loss": 0.8658,
	"step": 8480
	},
	{
	"epoch": 0.136,
	"grad_norm": 0.20756912231445312,
	"learning_rate": 0.00017280960000000002,
	"loss": 0.9148,
	"step": 8500
	},
	{
	"epoch": 0.13632,
	"grad_norm": 0.22604379057884216,
	"learning_rate": 0.00017274560000000002,
	"loss": 0.8489,
	"step": 8520
	},
	{
	"epoch": 0.13664,
	"grad_norm": 0.2140427976846695,
	"learning_rate": 0.0001726816,
	"loss": 0.8702,
	"step": 8540
	},
	{
	"epoch": 0.13696,
	"grad_norm": 0.22593297064304352,
	"learning_rate": 0.0001726176,
	"loss": 0.8572,
	"step": 8560
	},
	{
	"epoch": 0.13728,
	"grad_norm": 0.2053360491991043,
	"learning_rate": 0.0001725536,
	"loss": 0.8283,
	"step": 8580
	},
	{
	"epoch": 0.1376,
	"grad_norm": 0.2059011608362198,
	"learning_rate": 0.0001724896,
	"loss": 0.9218,
	"step": 8600
	},
	{
	"epoch": 0.13792,
	"grad_norm": 0.19691585004329681,
	"learning_rate": 0.00017242560000000002,
	"loss": 0.8718,
	"step": 8620
	},
	{
	"epoch": 0.13824,
	"grad_norm": 0.2076309472322464,
	"learning_rate": 0.00017236480000000002,
	"loss": 0.9297,
	"step": 8640
	},
	{
	"epoch": 0.13856,
	"grad_norm": 0.26082372665405273,
	"learning_rate": 0.00017230080000000002,
	"loss": 0.8568,
	"step": 8660
	},
	{
	"epoch": 0.13888,
	"grad_norm": 0.22894443571567535,
	"learning_rate": 0.0001722368,
	"loss": 0.858,
	"step": 8680
	},
	{
	"epoch": 0.1392,
	"grad_norm": 0.2583048939704895,
	"learning_rate": 0.0001721728,
	"loss": 0.9089,
	"step": 8700
	},
	{
	"epoch": 0.13952,
	"grad_norm": 0.23365485668182373,
	"learning_rate": 0.0001721088,
	"loss": 0.8283,
	"step": 8720
	},
	{
	"epoch": 0.13984,
	"grad_norm": 0.23852278292179108,
	"learning_rate": 0.0001720448,
	"loss": 0.8573,
	"step": 8740
	},
	{
	"epoch": 0.14016,
	"grad_norm": 0.22304783761501312,
	"learning_rate": 0.00017198080000000002,
	"loss": 0.8772,
	"step": 8760
	},
	{
	"epoch": 0.14048,
	"grad_norm": 0.2686362862586975,
	"learning_rate": 0.00017191680000000001,
	"loss": 0.8857,
	"step": 8780
	},
	{
	"epoch": 0.1408,
	"grad_norm": 0.17005324363708496,
	"learning_rate": 0.0001718528,
	"loss": 0.9015,
	"step": 8800
	},
	{
	"epoch": 0.14112,
	"grad_norm": 0.22986558079719543,
	"learning_rate": 0.0001717888,
	"loss": 0.9192,
	"step": 8820
	},
	{
	"epoch": 0.14144,
	"grad_norm": 0.21427962183952332,
	"learning_rate": 0.00017172480000000003,
	"loss": 0.8947,
	"step": 8840
	},
	{
	"epoch": 0.14176,
	"grad_norm": 0.262226402759552,
	"learning_rate": 0.0001716608,
	"loss": 0.8878,
	"step": 8860
	},
	{
	"epoch": 0.14208,
	"grad_norm": 0.23082557320594788,
	"learning_rate": 0.0001715968,
	"loss": 0.9263,
	"step": 8880
	},
	{
	"epoch": 0.1424,
	"grad_norm": 0.2226615846157074,
	"learning_rate": 0.0001715328,
	"loss": 0.9526,
	"step": 8900
	},
	{
	"epoch": 0.14272,
	"grad_norm": 0.2389681190252304,
	"learning_rate": 0.0001714688,
	"loss": 0.8784,
	"step": 8920
	},
	{
	"epoch": 0.14304,
	"grad_norm": 0.20122146606445312,
	"learning_rate": 0.0001714048,
	"loss": 0.9487,
	"step": 8940
	},
	{
	"epoch": 0.14336,
	"grad_norm": 0.24507276713848114,
	"learning_rate": 0.00017134080000000002,
	"loss": 0.8501,
	"step": 8960
	},
	{
	"epoch": 0.14368,
	"grad_norm": 0.23927843570709229,
	"learning_rate": 0.00017127680000000002,
	"loss": 0.8898,
	"step": 8980
	},
	{
	"epoch": 0.144,
	"grad_norm": 0.22527576982975006,
	"learning_rate": 0.00017121280000000002,
	"loss": 0.8939,
	"step": 9000
	},
	{
	"epoch": 0.14432,
	"grad_norm": 0.23542018234729767,
	"learning_rate": 0.0001711488,
	"loss": 0.9004,
	"step": 9020
	},
	{
	"epoch": 0.14464,
	"grad_norm": 0.21746650338172913,
	"learning_rate": 0.0001710848,
	"loss": 0.8618,
	"step": 9040
	},
	{
	"epoch": 0.14496,
	"grad_norm": 0.2594437003135681,
	"learning_rate": 0.0001710208,
	"loss": 0.9052,
	"step": 9060
	},
	{
	"epoch": 0.14528,
	"grad_norm": 0.23847267031669617,
	"learning_rate": 0.0001709568,
	"loss": 0.8654,
	"step": 9080
	},
	{
	"epoch": 0.1456,
	"grad_norm": 0.2352636456489563,
	"learning_rate": 0.00017089280000000002,
	"loss": 0.8685,
	"step": 9100
	},
	{
	"epoch": 0.14592,
	"grad_norm": 0.21218867599964142,
	"learning_rate": 0.0001708288,
	"loss": 0.8865,
	"step": 9120
	},
	{
	"epoch": 0.14624,
	"grad_norm": 0.22339680790901184,
	"learning_rate": 0.0001707648,
	"loss": 0.9414,
	"step": 9140
	},
	{
	"epoch": 0.14656,
	"grad_norm": 0.2145155370235443,
	"learning_rate": 0.0001707008,
	"loss": 0.871,
	"step": 9160
	},
	{
	"epoch": 0.14688,
	"grad_norm": 0.24632301926612854,
	"learning_rate": 0.00017063680000000003,
	"loss": 0.9003,
	"step": 9180
	},
	{
	"epoch": 0.1472,
	"grad_norm": 0.21344535052776337,
	"learning_rate": 0.0001705728,
	"loss": 0.9132,
	"step": 9200
	},
	{
	"epoch": 0.14752,
	"grad_norm": 0.2178122103214264,
	"learning_rate": 0.0001705088,
	"loss": 0.9213,
	"step": 9220
	},
	{
	"epoch": 0.14784,
	"grad_norm": 0.23042111098766327,
	"learning_rate": 0.0001704448,
	"loss": 0.9325,
	"step": 9240
	},
	{
	"epoch": 0.14816,
	"grad_norm": 0.246158629655838,
	"learning_rate": 0.0001703808,
	"loss": 0.87,
	"step": 9260
	},
	{
	"epoch": 0.14848,
	"grad_norm": 0.22557534277439117,
	"learning_rate": 0.0001703168,
	"loss": 0.8192,
	"step": 9280
	},
	{
	"epoch": 0.1488,
	"grad_norm": 0.20784518122673035,
	"learning_rate": 0.00017025280000000002,
	"loss": 0.8372,
	"step": 9300
	},
	{
	"epoch": 0.14912,
	"grad_norm": 0.23057977855205536,
	"learning_rate": 0.00017018880000000002,
	"loss": 0.9297,
	"step": 9320
	},
	{
	"epoch": 0.14944,
	"grad_norm": 0.2289903163909912,
	"learning_rate": 0.00017012480000000001,
	"loss": 0.9167,
	"step": 9340
	},
	{
	"epoch": 0.14976,
	"grad_norm": 0.22998815774917603,
	"learning_rate": 0.0001700608,
	"loss": 0.889,
	"step": 9360
	},
	{
	"epoch": 0.15008,
	"grad_norm": 0.22863976657390594,
	"learning_rate": 0.0001699968,
	"loss": 0.8884,
	"step": 9380
	},
	{
	"epoch": 0.1504,
	"grad_norm": 0.24748341739177704,
	"learning_rate": 0.0001699328,
	"loss": 0.8961,
	"step": 9400
	},
	{
	"epoch": 0.15072,
	"grad_norm": 0.21250346302986145,
	"learning_rate": 0.0001698688,
	"loss": 0.8683,
	"step": 9420
	},
	{
	"epoch": 0.15104,
	"grad_norm": 0.239846333861351,
	"learning_rate": 0.00016980480000000002,
	"loss": 0.8927,
	"step": 9440
	},
	{
	"epoch": 0.15136,
	"grad_norm": 0.2487175464630127,
	"learning_rate": 0.0001697408,
	"loss": 0.9144,
	"step": 9460
	},
	{
	"epoch": 0.15168,
	"grad_norm": 0.23323270678520203,
	"learning_rate": 0.0001696768,
	"loss": 0.9251,
	"step": 9480
	},
	{
	"epoch": 0.152,
	"grad_norm": 0.19210824370384216,
	"learning_rate": 0.0001696128,
	"loss": 0.9244,
	"step": 9500
	},
	{
	"epoch": 0.15232,
	"grad_norm": 0.23382435739040375,
	"learning_rate": 0.00016954880000000003,
	"loss": 0.9249,
	"step": 9520
	},
	{
	"epoch": 0.15264,
	"grad_norm": 0.20494690537452698,
	"learning_rate": 0.0001694848,
	"loss": 0.878,
	"step": 9540
	},
	{
	"epoch": 0.15296,
	"grad_norm": 0.23017622530460358,
	"learning_rate": 0.0001694208,
	"loss": 0.8656,
	"step": 9560
	},
	{
	"epoch": 0.15328,
	"grad_norm": 0.26027923822402954,
	"learning_rate": 0.0001693568,
	"loss": 0.912,
	"step": 9580
	},
	{
	"epoch": 0.1536,
	"grad_norm": 0.19583414494991302,
	"learning_rate": 0.0001692928,
	"loss": 0.8411,
	"step": 9600
	},
	{
	"epoch": 0.15392,
	"grad_norm": 0.25373271107673645,
	"learning_rate": 0.0001692288,
	"loss": 0.8781,
	"step": 9620
	},
	{
	"epoch": 0.15424,
	"grad_norm": 0.27190205454826355,
	"learning_rate": 0.00016916480000000002,
	"loss": 0.8691,
	"step": 9640
	},
	{
	"epoch": 0.15456,
	"grad_norm": 0.22996129095554352,
	"learning_rate": 0.00016910080000000002,
	"loss": 0.8277,
	"step": 9660
	},
	{
	"epoch": 0.15488,
	"grad_norm": 0.1947249174118042,
	"learning_rate": 0.00016903680000000001,
	"loss": 0.8873,
	"step": 9680
	},
	{
	"epoch": 0.1552,
	"grad_norm": 0.18230539560317993,
	"learning_rate": 0.0001689728,
	"loss": 0.8315,
	"step": 9700
	},
	{
	"epoch": 0.15552,
	"grad_norm": 0.25768032670021057,
	"learning_rate": 0.0001689088,
	"loss": 0.8645,
	"step": 9720
	},
	{
	"epoch": 0.15584,
	"grad_norm": 0.2460031509399414,
	"learning_rate": 0.0001688448,
	"loss": 0.9031,
	"step": 9740
	},
	{
	"epoch": 0.15616,
	"grad_norm": 0.22613097727298737,
	"learning_rate": 0.0001687808,
	"loss": 0.9065,
	"step": 9760
	},
	{
	"epoch": 0.15648,
	"grad_norm": 0.2073383629322052,
	"learning_rate": 0.00016871680000000002,
	"loss": 0.8825,
	"step": 9780
	},
	{
	"epoch": 0.1568,
	"grad_norm": 0.2087622731924057,
	"learning_rate": 0.0001686528,
	"loss": 0.9253,
	"step": 9800
	},
	{
	"epoch": 0.15712,
	"grad_norm": 0.2113562375307083,
	"learning_rate": 0.0001685888,
	"loss": 0.8639,
	"step": 9820
	},
	{
	"epoch": 0.15744,
	"grad_norm": 0.23061156272888184,
	"learning_rate": 0.0001685248,
	"loss": 0.8818,
	"step": 9840
	},
	{
	"epoch": 0.15776,
	"grad_norm": 0.2453097254037857,
	"learning_rate": 0.00016846080000000003,
	"loss": 0.91,
	"step": 9860
	},
	{
	"epoch": 0.15808,
	"grad_norm": 0.2568601071834564,
	"learning_rate": 0.0001683968,
	"loss": 0.9147,
	"step": 9880
	},
	{
	"epoch": 0.1584,
	"grad_norm": 0.238372802734375,
	"learning_rate": 0.0001683328,
	"loss": 0.8514,
	"step": 9900
	},
	{
	"epoch": 0.15872,
	"grad_norm": 0.2500544786453247,
	"learning_rate": 0.0001682688,
	"loss": 0.9219,
	"step": 9920
	},
	{
	"epoch": 0.15904,
	"grad_norm": 0.22526203095912933,
	"learning_rate": 0.0001682048,
	"loss": 0.8553,
	"step": 9940
	},
	{
	"epoch": 0.15936,
	"grad_norm": 0.2296661138534546,
	"learning_rate": 0.0001681408,
	"loss": 0.8867,
	"step": 9960
	},
	{
	"epoch": 0.15968,
	"grad_norm": 0.19159358739852905,
	"learning_rate": 0.00016807680000000002,
	"loss": 0.8231,
	"step": 9980
	},
	{
	"epoch": 0.16,
	"grad_norm": 0.21099399030208588,
	"learning_rate": 0.00016801280000000002,
	"loss": 0.9614,
	"step": 10000
	},
	{
	"epoch": 0.16032,
	"grad_norm": 0.19851434230804443,
	"learning_rate": 0.00016794880000000001,
	"loss": 0.8711,
	"step": 10020
	},
	{
	"epoch": 0.16064,
	"grad_norm": 0.255908340215683,
	"learning_rate": 0.0001678848,
	"loss": 0.8584,
	"step": 10040
	},
	{
	"epoch": 0.16096,
	"grad_norm": 0.17037171125411987,
	"learning_rate": 0.0001678208,
	"loss": 0.8858,
	"step": 10060
	},
	{
	"epoch": 0.16128,
	"grad_norm": 0.18440371751785278,
	"learning_rate": 0.0001677568,
	"loss": 0.8785,
	"step": 10080
	},
	{
	"epoch": 0.1616,
	"grad_norm": 0.22271201014518738,
	"learning_rate": 0.0001676928,
	"loss": 0.8777,
	"step": 10100
	},
	{
	"epoch": 0.16192,
	"grad_norm": 0.23368695378303528,
	"learning_rate": 0.00016762880000000002,
	"loss": 0.9383,
	"step": 10120
	},
	{
	"epoch": 0.16224,
	"grad_norm": 0.2024698108434677,
	"learning_rate": 0.0001675648,
	"loss": 0.8235,
	"step": 10140
	},
	{
	"epoch": 0.16256,
	"grad_norm": 0.24644511938095093,
	"learning_rate": 0.0001675008,
	"loss": 0.9375,
	"step": 10160
	},
	{
	"epoch": 0.16288,
	"grad_norm": 0.21530281007289886,
	"learning_rate": 0.0001674368,
	"loss": 0.8697,
	"step": 10180
	},
	{
	"epoch": 0.1632,
	"grad_norm": 0.2107221782207489,
	"learning_rate": 0.00016737280000000002,
	"loss": 0.8798,
	"step": 10200
	},
	{
	"epoch": 0.16352,
	"grad_norm": 0.18811015784740448,
	"learning_rate": 0.0001673088,
	"loss": 0.9518,
	"step": 10220
	},
	{
	"epoch": 0.16384,
	"grad_norm": 0.20447804033756256,
	"learning_rate": 0.0001672448,
	"loss": 0.8528,
	"step": 10240
	},
	{
	"epoch": 0.16416,
	"grad_norm": 0.22877538204193115,
	"learning_rate": 0.0001671808,
	"loss": 0.9376,
	"step": 10260
	},
	{
	"epoch": 0.16448,
	"grad_norm": 0.24324432015419006,
	"learning_rate": 0.0001671168,
	"loss": 0.8818,
	"step": 10280
	},
	{
	"epoch": 0.1648,
	"grad_norm": 0.20559096336364746,
	"learning_rate": 0.0001670528,
	"loss": 0.9382,
	"step": 10300
	},
	{
	"epoch": 0.16512,
	"grad_norm": 0.23329490423202515,
	"learning_rate": 0.00016698880000000002,
	"loss": 0.9457,
	"step": 10320
	},
	{
	"epoch": 0.16544,
	"grad_norm": 0.23040834069252014,
	"learning_rate": 0.00016692480000000002,
	"loss": 0.8943,
	"step": 10340
	},
	{
	"epoch": 0.16576,
	"grad_norm": 0.21570099890232086,
	"learning_rate": 0.0001668608,
	"loss": 0.8714,
	"step": 10360
	},
	{
	"epoch": 0.16608,
	"grad_norm": 0.20824502408504486,
	"learning_rate": 0.0001667968,
	"loss": 0.8851,
	"step": 10380
	},
	{
	"epoch": 0.1664,
	"grad_norm": 0.19650331139564514,
	"learning_rate": 0.0001667328,
	"loss": 0.8649,
	"step": 10400
	},
	{
	"epoch": 0.16672,
	"grad_norm": 0.22227755188941956,
	"learning_rate": 0.0001666688,
	"loss": 0.9556,
	"step": 10420
	},
	{
	"epoch": 0.16704,
	"grad_norm": 0.21929942071437836,
	"learning_rate": 0.0001666048,
	"loss": 0.9107,
	"step": 10440
	},
	{
	"epoch": 0.16736,
	"grad_norm": 0.21728375554084778,
	"learning_rate": 0.00016654080000000002,
	"loss": 0.9389,
	"step": 10460
	},
	{
	"epoch": 0.16768,
	"grad_norm": 0.257927805185318,
	"learning_rate": 0.0001664768,
	"loss": 0.852,
	"step": 10480
	},
	{
	"epoch": 0.168,
	"grad_norm": 0.23964323103427887,
	"learning_rate": 0.0001664128,
	"loss": 0.8966,
	"step": 10500
	},
	{
	"epoch": 0.16832,
	"grad_norm": 0.21869444847106934,
	"learning_rate": 0.00016634880000000003,
	"loss": 0.9496,
	"step": 10520
	},
	{
	"epoch": 0.16864,
	"grad_norm": 0.2491443157196045,
	"learning_rate": 0.00016628480000000002,
	"loss": 0.8853,
	"step": 10540
	},
	{
	"epoch": 0.16896,
	"grad_norm": 0.19421234726905823,
	"learning_rate": 0.0001662208,
	"loss": 0.9094,
	"step": 10560
	},
	{
	"epoch": 0.16928,
	"grad_norm": 0.2546538710594177,
	"learning_rate": 0.00016615680000000001,
	"loss": 0.9055,
	"step": 10580
	},
	{
	"epoch": 0.1696,
	"grad_norm": 0.21943865716457367,
	"learning_rate": 0.0001660928,
	"loss": 0.9036,
	"step": 10600
	},
	{
	"epoch": 0.16992,
	"grad_norm": 0.26403695344924927,
	"learning_rate": 0.0001660288,
	"loss": 0.8961,
	"step": 10620
	},
	{
	"epoch": 0.17024,
	"grad_norm": 0.2386874556541443,
	"learning_rate": 0.0001659648,
	"loss": 0.8756,
	"step": 10640
	},
	{
	"epoch": 0.17056,
	"grad_norm": 0.2226932942867279,
	"learning_rate": 0.00016590080000000002,
	"loss": 0.847,
	"step": 10660
	},
	{
	"epoch": 0.17088,
	"grad_norm": 0.19772516191005707,
	"learning_rate": 0.00016583680000000002,
	"loss": 0.8771,
	"step": 10680
	},
	{
	"epoch": 0.1712,
	"grad_norm": 0.20000356435775757,
	"learning_rate": 0.0001657728,
	"loss": 0.922,
	"step": 10700
	},
	{
	"epoch": 0.17152,
	"grad_norm": 0.24227920174598694,
	"learning_rate": 0.0001657088,
	"loss": 0.8792,
	"step": 10720
	},
	{
	"epoch": 0.17184,
	"grad_norm": 0.2312862128019333,
	"learning_rate": 0.0001656448,
	"loss": 0.8606,
	"step": 10740
	},
	{
	"epoch": 0.17216,
	"grad_norm": 0.229568749666214,
	"learning_rate": 0.0001655808,
	"loss": 0.8763,
	"step": 10760
	},
	{
	"epoch": 0.17248,
	"grad_norm": 0.22286683320999146,
	"learning_rate": 0.0001655168,
	"loss": 0.9215,
	"step": 10780
	},
	{
	"epoch": 0.1728,
	"grad_norm": 0.21545717120170593,
	"learning_rate": 0.00016545280000000002,
	"loss": 0.8683,
	"step": 10800
	},
	{
	"epoch": 0.17312,
	"grad_norm": 0.2119383066892624,
	"learning_rate": 0.0001653888,
	"loss": 0.9104,
	"step": 10820
	},
	{
	"epoch": 0.17344,
	"grad_norm": 0.25230464339256287,
	"learning_rate": 0.0001653248,
	"loss": 0.9178,
	"step": 10840
	},
	{
	"epoch": 0.17376,
	"grad_norm": 0.20645944774150848,
	"learning_rate": 0.00016526080000000003,
	"loss": 0.8743,
	"step": 10860
	},
	{
	"epoch": 0.17408,
	"grad_norm": 0.24283145368099213,
	"learning_rate": 0.00016519680000000002,
	"loss": 0.917,
	"step": 10880
	},
	{
	"epoch": 0.1744,
	"grad_norm": 0.24862386286258698,
	"learning_rate": 0.0001651328,
	"loss": 0.8957,
	"step": 10900
	},
	{
	"epoch": 0.17472,
	"grad_norm": 0.16515551507472992,
	"learning_rate": 0.00016506880000000001,
	"loss": 0.9213,
	"step": 10920
	},
	{
	"epoch": 0.17504,
	"grad_norm": 0.21619679033756256,
	"learning_rate": 0.0001650048,
	"loss": 0.8658,
	"step": 10940
	},
	{
	"epoch": 0.17536,
	"grad_norm": 0.19346758723258972,
	"learning_rate": 0.0001649408,
	"loss": 0.8456,
	"step": 10960
	},
	{
	"epoch": 0.17568,
	"grad_norm": 0.21540650725364685,
	"learning_rate": 0.0001648768,
	"loss": 0.9633,
	"step": 10980
	},
	{
	"epoch": 0.176,
	"grad_norm": 0.21067962050437927,
	"learning_rate": 0.00016481280000000002,
	"loss": 0.8907,
	"step": 11000
	},
	{
	"epoch": 0.17632,
	"grad_norm": 0.2155253291130066,
	"learning_rate": 0.00016474880000000002,
	"loss": 0.8985,
	"step": 11020
	},
	{
	"epoch": 0.17664,
	"grad_norm": 0.27138301730155945,
	"learning_rate": 0.0001646848,
	"loss": 0.8525,
	"step": 11040
	},
	{
	"epoch": 0.17696,
	"grad_norm": 0.20680946111679077,
	"learning_rate": 0.0001646208,
	"loss": 0.9124,
	"step": 11060
	},
	{
	"epoch": 0.17728,
	"grad_norm": 0.2446873039007187,
	"learning_rate": 0.0001645568,
	"loss": 0.8922,
	"step": 11080
	},
	{
	"epoch": 0.1776,
	"grad_norm": 0.19545750319957733,
	"learning_rate": 0.0001644928,
	"loss": 0.8919,
	"step": 11100
	},
	{
	"epoch": 0.17792,
	"grad_norm": 0.20573855936527252,
	"learning_rate": 0.0001644288,
	"loss": 0.8495,
	"step": 11120
	},
	{
	"epoch": 0.17824,
	"grad_norm": 0.1951497346162796,
	"learning_rate": 0.00016436480000000002,
	"loss": 0.8981,
	"step": 11140
	},
	{
	"epoch": 0.17856,
	"grad_norm": 0.25471144914627075,
	"learning_rate": 0.0001643008,
	"loss": 0.9582,
	"step": 11160
	},
	{
	"epoch": 0.17888,
	"grad_norm": 0.22080758213996887,
	"learning_rate": 0.0001642368,
	"loss": 0.9398,
	"step": 11180
	},
	{
	"epoch": 0.1792,
	"grad_norm": 0.23357786238193512,
	"learning_rate": 0.00016417280000000003,
	"loss": 0.8585,
	"step": 11200
	},
	{
	"epoch": 0.17952,
	"grad_norm": 0.3059156537055969,
	"learning_rate": 0.00016410880000000002,
	"loss": 0.9087,
	"step": 11220
	},
	{
	"epoch": 0.17984,
	"grad_norm": 0.21788957715034485,
	"learning_rate": 0.0001640448,
	"loss": 0.9112,
	"step": 11240
	},
	{
	"epoch": 0.18016,
	"grad_norm": 0.2401525229215622,
	"learning_rate": 0.00016398080000000001,
	"loss": 0.9099,
	"step": 11260
	},
	{
	"epoch": 0.18048,
	"grad_norm": 0.22227467596530914,
	"learning_rate": 0.0001639168,
	"loss": 0.8272,
	"step": 11280
	},
	{
	"epoch": 0.1808,
	"grad_norm": 0.21627697348594666,
	"learning_rate": 0.0001638528,
	"loss": 0.8753,
	"step": 11300
	},
	{
	"epoch": 0.18112,
	"grad_norm": 0.21134355664253235,
	"learning_rate": 0.0001637888,
	"loss": 0.9182,
	"step": 11320
	},
	{
	"epoch": 0.18144,
	"grad_norm": 0.22719112038612366,
	"learning_rate": 0.00016372480000000002,
	"loss": 0.8454,
	"step": 11340
	},
	{
	"epoch": 0.18176,
	"grad_norm": 0.22609511017799377,
	"learning_rate": 0.00016366080000000002,
	"loss": 0.888,
	"step": 11360
	},
	{
	"epoch": 0.18208,
	"grad_norm": 0.19711975753307343,
	"learning_rate": 0.0001635968,
	"loss": 0.942,
	"step": 11380
	},
	{
	"epoch": 0.1824,
	"grad_norm": 0.2588805854320526,
	"learning_rate": 0.0001635328,
	"loss": 0.9463,
	"step": 11400
	},
	{
	"epoch": 0.18272,
	"grad_norm": 0.25787708163261414,
	"learning_rate": 0.0001634688,
	"loss": 0.9114,
	"step": 11420
	},
	{
	"epoch": 0.18304,
	"grad_norm": 0.2743508219718933,
	"learning_rate": 0.0001634048,
	"loss": 0.873,
	"step": 11440
	},
	{
	"epoch": 0.18336,
	"grad_norm": 0.23172695934772491,
	"learning_rate": 0.0001633408,
	"loss": 0.8495,
	"step": 11460
	},
	{
	"epoch": 0.18368,
	"grad_norm": 0.18422289192676544,
	"learning_rate": 0.00016327680000000002,
	"loss": 0.8821,
	"step": 11480
	},
	{
	"epoch": 0.184,
	"grad_norm": 0.2328750044107437,
	"learning_rate": 0.0001632128,
	"loss": 0.8885,
	"step": 11500
	},
	{
	"epoch": 0.18432,
	"grad_norm": 0.26465412974357605,
	"learning_rate": 0.0001631488,
	"loss": 0.8943,
	"step": 11520
	},
	{
	"epoch": 0.18464,
	"grad_norm": 0.27734020352363586,
	"learning_rate": 0.00016308480000000003,
	"loss": 0.855,
	"step": 11540
	},
	{
	"epoch": 0.18496,
	"grad_norm": 0.24460507929325104,
	"learning_rate": 0.00016302080000000002,
	"loss": 0.8996,
	"step": 11560
	},
	{
	"epoch": 0.18528,
	"grad_norm": 0.2152118980884552,
	"learning_rate": 0.0001629568,
	"loss": 0.839,
	"step": 11580
	},
	{
	"epoch": 0.1856,
	"grad_norm": 0.22813241183757782,
	"learning_rate": 0.00016289280000000001,
	"loss": 0.9257,
	"step": 11600
	},
	{
	"epoch": 0.18592,
	"grad_norm": 0.2076783925294876,
	"learning_rate": 0.0001628288,
	"loss": 0.88,
	"step": 11620
	},
	{
	"epoch": 0.18624,
	"grad_norm": 0.23828792572021484,
	"learning_rate": 0.0001627648,
	"loss": 0.9087,
	"step": 11640
	},
	{
	"epoch": 0.18656,
	"grad_norm": 0.24277402460575104,
	"learning_rate": 0.0001627008,
	"loss": 0.9419,
	"step": 11660
	},
	{
	"epoch": 0.18688,
	"grad_norm": 0.24770581722259521,
	"learning_rate": 0.00016263680000000002,
	"loss": 0.9184,
	"step": 11680
	},
	{
	"epoch": 0.1872,
	"grad_norm": 0.23547635972499847,
	"learning_rate": 0.00016257280000000002,
	"loss": 0.9069,
	"step": 11700
	},
	{
	"epoch": 0.18752,
	"grad_norm": 0.17838741838932037,
	"learning_rate": 0.0001625088,
	"loss": 0.9483,
	"step": 11720
	},
	{
	"epoch": 0.18784,
	"grad_norm": 0.23091432452201843,
	"learning_rate": 0.0001624448,
	"loss": 0.9225,
	"step": 11740
	},
	{
	"epoch": 0.18816,
	"grad_norm": 0.2132597118616104,
	"learning_rate": 0.0001623808,
	"loss": 0.8979,
	"step": 11760
	},
	{
	"epoch": 0.18848,
	"grad_norm": 0.2296367734670639,
	"learning_rate": 0.0001623168,
	"loss": 0.8762,
	"step": 11780
	},
	{
	"epoch": 0.1888,
	"grad_norm": 0.20997250080108643,
	"learning_rate": 0.0001622528,
	"loss": 0.9156,
	"step": 11800
	},
	{
	"epoch": 0.18912,
	"grad_norm": 0.2033025026321411,
	"learning_rate": 0.00016218880000000001,
	"loss": 0.8847,
	"step": 11820
	},
	{
	"epoch": 0.18944,
	"grad_norm": 0.21794314682483673,
	"learning_rate": 0.0001621248,
	"loss": 0.8564,
	"step": 11840
	},
	{
	"epoch": 0.18976,
	"grad_norm": 0.23999591171741486,
	"learning_rate": 0.0001620608,
	"loss": 0.8581,
	"step": 11860
	},
	{
	"epoch": 0.19008,
	"grad_norm": 0.2366144210100174,
	"learning_rate": 0.00016199680000000003,
	"loss": 0.8745,
	"step": 11880
	},
	{
	"epoch": 0.1904,
	"grad_norm": 0.2415480762720108,
	"learning_rate": 0.00016193280000000002,
	"loss": 0.9004,
	"step": 11900
	},
	{
	"epoch": 0.19072,
	"grad_norm": 0.22656038403511047,
	"learning_rate": 0.0001618688,
	"loss": 0.8871,
	"step": 11920
	},
	{
	"epoch": 0.19104,
	"grad_norm": 0.2326974719762802,
	"learning_rate": 0.0001618048,
	"loss": 0.954,
	"step": 11940
	},
	{
	"epoch": 0.19136,
	"grad_norm": 0.212848499417305,
	"learning_rate": 0.0001617408,
	"loss": 0.9154,
	"step": 11960
	},
	{
	"epoch": 0.19168,
	"grad_norm": 0.16706988215446472,
	"learning_rate": 0.0001616768,
	"loss": 0.9052,
	"step": 11980
	},
	{
	"epoch": 0.192,
	"grad_norm": 0.2651592791080475,
	"learning_rate": 0.0001616128,
	"loss": 0.9448,
	"step": 12000
	},
	{
	"epoch": 0.19232,
	"grad_norm": 0.24427416920661926,
	"learning_rate": 0.00016154880000000002,
	"loss": 0.8794,
	"step": 12020
	},
	{
	"epoch": 0.19264,
	"grad_norm": 0.19025467336177826,
	"learning_rate": 0.00016148480000000002,
	"loss": 0.8535,
	"step": 12040
	},
	{
	"epoch": 0.19296,
	"grad_norm": 0.21214129030704498,
	"learning_rate": 0.0001614208,
	"loss": 0.8756,
	"step": 12060
	},
	{
	"epoch": 0.19328,
	"grad_norm": 0.2451871931552887,
	"learning_rate": 0.0001613568,
	"loss": 0.8858,
	"step": 12080
	},
	{
	"epoch": 0.1936,
	"grad_norm": 0.23217494785785675,
	"learning_rate": 0.0001612928,
	"loss": 0.9066,
	"step": 12100
	},
	{
	"epoch": 0.19392,
	"grad_norm": 0.2479615956544876,
	"learning_rate": 0.0001612288,
	"loss": 0.8477,
	"step": 12120
	},
	{
	"epoch": 0.19424,
	"grad_norm": 0.20965996384620667,
	"learning_rate": 0.0001611648,
	"loss": 0.8573,
	"step": 12140
	},
	{
	"epoch": 0.19456,
	"grad_norm": 0.19635817408561707,
	"learning_rate": 0.00016110080000000001,
	"loss": 0.9182,
	"step": 12160
	},
	{
	"epoch": 0.19488,
	"grad_norm": 0.2266317903995514,
	"learning_rate": 0.0001610368,
	"loss": 0.9243,
	"step": 12180
	},
	{
	"epoch": 0.1952,
	"grad_norm": 0.24232080578804016,
	"learning_rate": 0.0001609728,
	"loss": 0.8944,
	"step": 12200
	},
	{
	"epoch": 0.19552,
	"grad_norm": 0.18726186454296112,
	"learning_rate": 0.00016090880000000003,
	"loss": 0.9084,
	"step": 12220
	},
	{
	"epoch": 0.19584,
	"grad_norm": 0.25809457898139954,
	"learning_rate": 0.00016084480000000002,
	"loss": 0.8629,
	"step": 12240
	},
	{
	"epoch": 0.19616,
	"grad_norm": 0.24405358731746674,
	"learning_rate": 0.0001607808,
	"loss": 0.9071,
	"step": 12260
	},
	{
	"epoch": 0.19648,
	"grad_norm": 0.21723495423793793,
	"learning_rate": 0.0001607168,
	"loss": 0.8814,
	"step": 12280
	},
	{
	"epoch": 0.1968,
	"grad_norm": 0.23140837252140045,
	"learning_rate": 0.0001606528,
	"loss": 0.8499,
	"step": 12300
	},
	{
	"epoch": 0.19712,
	"grad_norm": 0.22470901906490326,
	"learning_rate": 0.0001605888,
	"loss": 0.9249,
	"step": 12320
	},
	{
	"epoch": 0.19744,
	"grad_norm": 0.19264104962348938,
	"learning_rate": 0.0001605248,
	"loss": 0.9057,
	"step": 12340
	},
	{
	"epoch": 0.19776,
	"grad_norm": 0.23376864194869995,
	"learning_rate": 0.00016046080000000002,
	"loss": 0.9535,
	"step": 12360
	},
	{
	"epoch": 0.19808,
	"grad_norm": 0.2225295752286911,
	"learning_rate": 0.00016039680000000002,
	"loss": 0.865,
	"step": 12380
	},
	{
	"epoch": 0.1984,
	"grad_norm": 0.23474235832691193,
	"learning_rate": 0.0001603328,
	"loss": 0.9137,
	"step": 12400
	},
	{
	"epoch": 0.19872,
	"grad_norm": 0.29955846071243286,
	"learning_rate": 0.0001602688,
	"loss": 0.8618,
	"step": 12420
	},
	{
	"epoch": 0.19904,
	"grad_norm": 0.25170376896858215,
	"learning_rate": 0.0001602048,
	"loss": 0.9341,
	"step": 12440
	},
	{
	"epoch": 0.19936,
	"grad_norm": 0.23932316899299622,
	"learning_rate": 0.0001601408,
	"loss": 0.876,
	"step": 12460
	},
	{
	"epoch": 0.19968,
	"grad_norm": 0.24285189807415009,
	"learning_rate": 0.0001600768,
	"loss": 0.8858,
	"step": 12480
	},
	{
	"epoch": 0.2,
	"grad_norm": 0.23304852843284607,
	"learning_rate": 0.00016001280000000001,
	"loss": 0.8659,
	"step": 12500
	},
	{
	"epoch": 0.20032,
	"grad_norm": 0.21106384694576263,
	"learning_rate": 0.0001599488,
	"loss": 0.8626,
	"step": 12520
	},
	{
	"epoch": 0.20064,
	"grad_norm": 0.20884625613689423,
	"learning_rate": 0.0001598848,
	"loss": 0.8872,
	"step": 12540
	},
	{
	"epoch": 0.20096,
	"grad_norm": 0.20588114857673645,
	"learning_rate": 0.00015982080000000003,
	"loss": 0.8462,
	"step": 12560
	},
	{
	"epoch": 0.20128,
	"grad_norm": 0.2657853066921234,
	"learning_rate": 0.00015975680000000002,
	"loss": 0.9246,
	"step": 12580
	},
	{
	"epoch": 0.2016,
	"grad_norm": 0.22846530377864838,
	"learning_rate": 0.0001596928,
	"loss": 0.8847,
	"step": 12600
	},
	{
	"epoch": 0.20192,
	"grad_norm": 0.20565031468868256,
	"learning_rate": 0.0001596288,
	"loss": 0.8966,
	"step": 12620
	},
	{
	"epoch": 0.20224,
	"grad_norm": 0.19185014069080353,
	"learning_rate": 0.0001595648,
	"loss": 0.9018,
	"step": 12640
	},
	{
	"epoch": 0.20256,
	"grad_norm": 0.23399049043655396,
	"learning_rate": 0.0001595008,
	"loss": 0.9258,
	"step": 12660
	},
	{
	"epoch": 0.20288,
	"grad_norm": 0.2446955144405365,
	"learning_rate": 0.0001594368,
	"loss": 0.8574,
	"step": 12680
	},
	{
	"epoch": 0.2032,
	"grad_norm": 0.2344285249710083,
	"learning_rate": 0.00015937280000000002,
	"loss": 0.9078,
	"step": 12700
	},
	{
	"epoch": 0.20352,
	"grad_norm": 0.2038036733865738,
	"learning_rate": 0.00015930880000000002,
	"loss": 0.9086,
	"step": 12720
	},
	{
	"epoch": 0.20384,
	"grad_norm": 0.23228555917739868,
	"learning_rate": 0.0001592448,
	"loss": 0.931,
	"step": 12740
	},
	{
	"epoch": 0.20416,
	"grad_norm": 0.2811441719532013,
	"learning_rate": 0.0001591808,
	"loss": 0.8438,
	"step": 12760
	},
	{
	"epoch": 0.20448,
	"grad_norm": 0.2014266848564148,
	"learning_rate": 0.0001591168,
	"loss": 0.9311,
	"step": 12780
	},
	{
	"epoch": 0.2048,
	"grad_norm": 0.23992010951042175,
	"learning_rate": 0.0001590528,
	"loss": 0.8965,
	"step": 12800
	},
	{
	"epoch": 0.20512,
	"grad_norm": 0.25870153307914734,
	"learning_rate": 0.0001589888,
	"loss": 0.8959,
	"step": 12820
	},
	{
	"epoch": 0.20544,
	"grad_norm": 0.24375873804092407,
	"learning_rate": 0.00015892480000000001,
	"loss": 0.8786,
	"step": 12840
	},
	{
	"epoch": 0.20576,
	"grad_norm": 0.20621752738952637,
	"learning_rate": 0.0001588608,
	"loss": 0.8796,
	"step": 12860
	},
	{
	"epoch": 0.20608,
	"grad_norm": 0.23437882959842682,
	"learning_rate": 0.0001587968,
	"loss": 0.8592,
	"step": 12880
	},
	{
	"epoch": 0.2064,
	"grad_norm": 0.23581136763095856,
	"learning_rate": 0.00015873280000000003,
	"loss": 0.8651,
	"step": 12900
	},
	{
	"epoch": 0.20672,
	"grad_norm": 0.24483484029769897,
	"learning_rate": 0.00015866880000000002,
	"loss": 0.9199,
	"step": 12920
	},
	{
	"epoch": 0.20704,
	"grad_norm": 0.3012985289096832,
	"learning_rate": 0.0001586048,
	"loss": 0.859,
	"step": 12940
	},
	{
	"epoch": 0.20736,
	"grad_norm": 0.26789209246635437,
	"learning_rate": 0.0001585408,
	"loss": 0.8816,
	"step": 12960
	},
	{
	"epoch": 0.20768,
	"grad_norm": 0.21916130185127258,
	"learning_rate": 0.0001584768,
	"loss": 0.9345,
	"step": 12980
	},
	{
	"epoch": 0.208,
	"grad_norm": 0.8556731343269348,
	"learning_rate": 0.0001584128,
	"loss": 0.9199,
	"step": 13000
	},
	{
	"epoch": 0.20832,
	"grad_norm": 0.22015364468097687,
	"learning_rate": 0.0001583488,
	"loss": 0.8742,
	"step": 13020
	},
	{
	"epoch": 0.20864,
	"grad_norm": 0.2598000168800354,
	"learning_rate": 0.00015828480000000002,
	"loss": 0.8665,
	"step": 13040
	},
	{
	"epoch": 0.20896,
	"grad_norm": 0.22221586108207703,
	"learning_rate": 0.00015822080000000001,
	"loss": 0.892,
	"step": 13060
	},
	{
	"epoch": 0.20928,
	"grad_norm": 0.2682360112667084,
	"learning_rate": 0.0001581568,
	"loss": 0.91,
	"step": 13080
	},
	{
	"epoch": 0.2096,
	"grad_norm": 0.24058601260185242,
	"learning_rate": 0.0001580928,
	"loss": 0.8826,
	"step": 13100
	},
	{
	"epoch": 0.20992,
	"grad_norm": 0.25506773591041565,
	"learning_rate": 0.0001580288,
	"loss": 0.8979,
	"step": 13120
	},
	{
	"epoch": 0.21024,
	"grad_norm": 0.2581631541252136,
	"learning_rate": 0.0001579648,
	"loss": 0.9204,
	"step": 13140
	},
	{
	"epoch": 0.21056,
	"grad_norm": 0.2511695623397827,
	"learning_rate": 0.0001579008,
	"loss": 0.8796,
	"step": 13160
	},
	{
	"epoch": 0.21088,
	"grad_norm": 0.20950326323509216,
	"learning_rate": 0.0001578368,
	"loss": 0.8267,
	"step": 13180
	},
	{
	"epoch": 0.2112,
	"grad_norm": 0.2644106149673462,
	"learning_rate": 0.0001577728,
	"loss": 0.9124,
	"step": 13200
	},
	{
	"epoch": 0.21152,
	"grad_norm": 0.1935633271932602,
	"learning_rate": 0.0001577088,
	"loss": 0.8468,
	"step": 13220
	},
	{
	"epoch": 0.21184,
	"grad_norm": 0.2543448507785797,
	"learning_rate": 0.00015764480000000003,
	"loss": 0.8965,
	"step": 13240
	},
	{
	"epoch": 0.21216,
	"grad_norm": 0.27806851267814636,
	"learning_rate": 0.00015758080000000002,
	"loss": 0.894,
	"step": 13260
	},
	{
	"epoch": 0.21248,
	"grad_norm": 0.18095877766609192,
	"learning_rate": 0.0001575168,
	"loss": 0.8876,
	"step": 13280
	},
	{
	"epoch": 0.2128,
	"grad_norm": 0.21904884278774261,
	"learning_rate": 0.0001574528,
	"loss": 0.8835,
	"step": 13300
	},
	{
	"epoch": 0.21312,
	"grad_norm": 0.25367972254753113,
	"learning_rate": 0.0001573888,
	"loss": 0.8688,
	"step": 13320
	},
	{
	"epoch": 0.21344,
	"grad_norm": 0.261203408241272,
	"learning_rate": 0.0001573248,
	"loss": 0.9173,
	"step": 13340
	},
	{
	"epoch": 0.21376,
	"grad_norm": 0.25779855251312256,
	"learning_rate": 0.0001572608,
	"loss": 0.9127,
	"step": 13360
	},
	{
	"epoch": 0.21408,
	"grad_norm": 0.20082098245620728,
	"learning_rate": 0.00015719680000000002,
	"loss": 0.9433,
	"step": 13380
	},
	{
	"epoch": 0.2144,
	"grad_norm": 0.22630241513252258,
	"learning_rate": 0.00015713280000000001,
	"loss": 0.9027,
	"step": 13400
	},
	{
	"epoch": 0.21472,
	"grad_norm": 0.2328576296567917,
	"learning_rate": 0.0001570688,
	"loss": 0.8736,
	"step": 13420
	},
	{
	"epoch": 0.21504,
	"grad_norm": 0.24743099510669708,
	"learning_rate": 0.00015700480000000003,
	"loss": 0.9568,
	"step": 13440
	},
	{
	"epoch": 0.21536,
	"grad_norm": 0.23386693000793457,
	"learning_rate": 0.0001569408,
	"loss": 0.9131,
	"step": 13460
	},
	{
	"epoch": 0.21568,
	"grad_norm": 0.2177802473306656,
	"learning_rate": 0.0001568768,
	"loss": 0.8948,
	"step": 13480
	},
	{
	"epoch": 0.216,
	"grad_norm": 0.19793163239955902,
	"learning_rate": 0.00015681280000000002,
	"loss": 0.9037,
	"step": 13500
	},
	{
	"epoch": 0.21632,
	"grad_norm": 0.6092952489852905,
	"learning_rate": 0.0001567488,
	"loss": 0.8912,
	"step": 13520
	},
	{
	"epoch": 0.21664,
	"grad_norm": 0.21942676603794098,
	"learning_rate": 0.0001566848,
	"loss": 0.8476,
	"step": 13540
	},
	{
	"epoch": 0.21696,
	"grad_norm": 0.2475002408027649,
	"learning_rate": 0.000156624,
	"loss": 0.8796,
	"step": 13560
	},
	{
	"epoch": 0.21728,
	"grad_norm": 0.25338417291641235,
	"learning_rate": 0.00015656,
	"loss": 0.9155,
	"step": 13580
	},
	{
	"epoch": 0.2176,
	"grad_norm": 0.22608576714992523,
	"learning_rate": 0.000156496,
	"loss": 0.8685,
	"step": 13600
	},
	{
	"epoch": 0.21792,
	"grad_norm": 0.20519301295280457,
	"learning_rate": 0.000156432,
	"loss": 0.8913,
	"step": 13620
	},
	{
	"epoch": 0.21824,
	"grad_norm": 0.20905616879463196,
	"learning_rate": 0.000156368,
	"loss": 0.9382,
	"step": 13640
	},
	{
	"epoch": 0.21856,
	"grad_norm": 0.21286025643348694,
	"learning_rate": 0.000156304,
	"loss": 0.8659,
	"step": 13660
	},
	{
	"epoch": 0.21888,
	"grad_norm": 0.23173551261425018,
	"learning_rate": 0.00015624,
	"loss": 0.845,
	"step": 13680
	},
	{
	"epoch": 0.2192,
	"grad_norm": 0.2360743284225464,
	"learning_rate": 0.000156176,
	"loss": 0.9332,
	"step": 13700
	},
	{
	"epoch": 0.21952,
	"grad_norm": 0.23367565870285034,
	"learning_rate": 0.00015611200000000003,
	"loss": 0.8805,
	"step": 13720
	},
	{
	"epoch": 0.21984,
	"grad_norm": 0.2483336627483368,
	"learning_rate": 0.00015604800000000002,
	"loss": 0.9189,
	"step": 13740
	},
	{
	"epoch": 0.22016,
	"grad_norm": 0.23518161475658417,
	"learning_rate": 0.000155984,
	"loss": 0.8927,
	"step": 13760
	},
	{
	"epoch": 0.22048,
	"grad_norm": 0.2596130073070526,
	"learning_rate": 0.00015592,
	"loss": 0.8879,
	"step": 13780
	},
	{
	"epoch": 0.2208,
	"grad_norm": 0.20567701756954193,
	"learning_rate": 0.000155856,
	"loss": 0.8677,
	"step": 13800
	},
	{
	"epoch": 0.22112,
	"grad_norm": 0.21333087980747223,
	"learning_rate": 0.000155792,
	"loss": 0.8599,
	"step": 13820
	},
	{
	"epoch": 0.22144,
	"grad_norm": 0.21102353930473328,
	"learning_rate": 0.000155728,
	"loss": 0.955,
	"step": 13840
	},
	{
	"epoch": 0.22176,
	"grad_norm": 0.23368091881275177,
	"learning_rate": 0.00015566400000000002,
	"loss": 0.9107,
	"step": 13860
	},
	{
	"epoch": 0.22208,
	"grad_norm": 0.2646392285823822,
	"learning_rate": 0.00015560000000000001,
	"loss": 0.8605,
	"step": 13880
	},
	{
	"epoch": 0.2224,
	"grad_norm": 0.2340191900730133,
	"learning_rate": 0.000155536,
	"loss": 0.8651,
	"step": 13900
	},
	{
	"epoch": 0.22272,
	"grad_norm": 0.22169966995716095,
	"learning_rate": 0.000155472,
	"loss": 0.8232,
	"step": 13920
	},
	{
	"epoch": 0.22304,
	"grad_norm": 0.2382878214120865,
	"learning_rate": 0.000155408,
	"loss": 0.8581,
	"step": 13940
	},
	{
	"epoch": 0.22336,
	"grad_norm": 0.22548457980155945,
	"learning_rate": 0.000155344,
	"loss": 0.9245,
	"step": 13960
	},
	{
	"epoch": 0.22368,
	"grad_norm": 0.2386041283607483,
	"learning_rate": 0.00015528,
	"loss": 0.9159,
	"step": 13980
	},
	{
	"epoch": 0.224,
	"grad_norm": 0.2749132812023163,
	"learning_rate": 0.000155216,
	"loss": 0.9209,
	"step": 14000
	},
	{
	"epoch": 0.22432,
	"grad_norm": 0.21053732931613922,
	"learning_rate": 0.000155152,
	"loss": 0.8694,
	"step": 14020
	},
	{
	"epoch": 0.22464,
	"grad_norm": 0.21479672193527222,
	"learning_rate": 0.000155088,
	"loss": 0.8775,
	"step": 14040
	},
	{
	"epoch": 0.22496,
	"grad_norm": 0.21168935298919678,
	"learning_rate": 0.00015502400000000003,
	"loss": 0.886,
	"step": 14060
	},
	{
	"epoch": 0.22528,
	"grad_norm": 0.23790377378463745,
	"learning_rate": 0.00015496000000000002,
	"loss": 0.9171,
	"step": 14080
	},
	{
	"epoch": 0.2256,
	"grad_norm": 0.2546534240245819,
	"learning_rate": 0.000154896,
	"loss": 0.9024,
	"step": 14100
	},
	{
	"epoch": 0.22592,
	"grad_norm": 0.21047984063625336,
	"learning_rate": 0.000154832,
	"loss": 0.9181,
	"step": 14120
	},
	{
	"epoch": 0.22624,
	"grad_norm": 0.18703001737594604,
	"learning_rate": 0.000154768,
	"loss": 0.9229,
	"step": 14140
	},
	{
	"epoch": 0.22656,
	"grad_norm": 0.2910281717777252,
	"learning_rate": 0.000154704,
	"loss": 0.8769,
	"step": 14160
	},
	{
	"epoch": 0.22688,
	"grad_norm": 0.253282368183136,
	"learning_rate": 0.00015464,
	"loss": 0.8899,
	"step": 14180
	},
	{
	"epoch": 0.2272,
	"grad_norm": 0.23244041204452515,
	"learning_rate": 0.00015457600000000002,
	"loss": 0.8847,
	"step": 14200
	},
	{
	"epoch": 0.22752,
	"grad_norm": 0.2044428586959839,
	"learning_rate": 0.00015451200000000001,
	"loss": 0.8558,
	"step": 14220
	},
	{
	"epoch": 0.22784,
	"grad_norm": 0.2259109914302826,
	"learning_rate": 0.000154448,
	"loss": 0.9359,
	"step": 14240
	},
	{
	"epoch": 0.22816,
	"grad_norm": 0.19026106595993042,
	"learning_rate": 0.000154384,
	"loss": 0.888,
	"step": 14260
	},
	{
	"epoch": 0.22848,
	"grad_norm": 0.26393407583236694,
	"learning_rate": 0.00015432,
	"loss": 0.9065,
	"step": 14280
	},
	{
	"epoch": 0.2288,
	"grad_norm": 0.23802846670150757,
	"learning_rate": 0.000154256,
	"loss": 0.859,
	"step": 14300
	},
	{
	"epoch": 0.22912,
	"grad_norm": 0.20962855219841003,
	"learning_rate": 0.000154192,
	"loss": 0.9316,
	"step": 14320
	},
	{
	"epoch": 0.22944,
	"grad_norm": 0.24111364781856537,
	"learning_rate": 0.000154128,
	"loss": 0.8761,
	"step": 14340
	},
	{
	"epoch": 0.22976,
	"grad_norm": 0.24475687742233276,
	"learning_rate": 0.000154064,
	"loss": 0.8639,
	"step": 14360
	},
	{
	"epoch": 0.23008,
	"grad_norm": 0.2179078459739685,
	"learning_rate": 0.000154,
	"loss": 0.9153,
	"step": 14380
	},
	{
	"epoch": 0.2304,
	"grad_norm": 0.21389590203762054,
	"learning_rate": 0.00015393600000000003,
	"loss": 0.8965,
	"step": 14400
	},
	{
	"epoch": 0.23072,
	"grad_norm": 0.25422388315200806,
	"learning_rate": 0.00015387200000000002,
	"loss": 0.9019,
	"step": 14420
	},
	{
	"epoch": 0.23104,
	"grad_norm": 0.25789642333984375,
	"learning_rate": 0.000153808,
	"loss": 0.8862,
	"step": 14440
	},
	{
	"epoch": 0.23136,
	"grad_norm": 0.24445413053035736,
	"learning_rate": 0.000153744,
	"loss": 0.8686,
	"step": 14460
	},
	{
	"epoch": 0.23168,
	"grad_norm": 0.2562089264392853,
	"learning_rate": 0.00015368,
	"loss": 0.8724,
	"step": 14480
	},
	{
	"epoch": 0.232,
	"grad_norm": 0.22422178089618683,
	"learning_rate": 0.000153616,
	"loss": 0.8126,
	"step": 14500
	},
	{
	"epoch": 0.23232,
	"grad_norm": 0.2669355571269989,
	"learning_rate": 0.000153552,
	"loss": 0.9321,
	"step": 14520
	},
	{
	"epoch": 0.23264,
	"grad_norm": 0.22260543704032898,
	"learning_rate": 0.00015348800000000002,
	"loss": 0.9001,
	"step": 14540
	},
	{
	"epoch": 0.23296,
	"grad_norm": 0.2247844934463501,
	"learning_rate": 0.00015342400000000001,
	"loss": 0.8703,
	"step": 14560
	},
	{
	"epoch": 0.23328,
	"grad_norm": 0.21349264681339264,
	"learning_rate": 0.00015336,
	"loss": 0.9012,
	"step": 14580
	},
	{
	"epoch": 0.2336,
	"grad_norm": 0.20764821767807007,
	"learning_rate": 0.000153296,
	"loss": 0.8589,
	"step": 14600
	},
	{
	"epoch": 0.23392,
	"grad_norm": 0.2439945936203003,
	"learning_rate": 0.000153232,
	"loss": 0.8685,
	"step": 14620
	},
	{
	"epoch": 0.23424,
	"grad_norm": 0.189644455909729,
	"learning_rate": 0.000153168,
	"loss": 0.8888,
	"step": 14640
	},
	{
	"epoch": 0.23456,
	"grad_norm": 0.2418312132358551,
	"learning_rate": 0.000153104,
	"loss": 0.8894,
	"step": 14660
	},
	{
	"epoch": 0.23488,
	"grad_norm": 0.2261509746313095,
	"learning_rate": 0.00015304,
	"loss": 0.8834,
	"step": 14680
	},
	{
	"epoch": 0.2352,
	"grad_norm": 0.20159967243671417,
	"learning_rate": 0.000152976,
	"loss": 0.8805,
	"step": 14700
	},
	{
	"epoch": 0.23552,
	"grad_norm": 0.20319266617298126,
	"learning_rate": 0.000152912,
	"loss": 0.9304,
	"step": 14720
	},
	{
	"epoch": 0.23584,
	"grad_norm": 0.26556146144866943,
	"learning_rate": 0.00015284800000000002,
	"loss": 0.8703,
	"step": 14740
	},
	{
	"epoch": 0.23616,
	"grad_norm": 0.2388124316930771,
	"learning_rate": 0.00015278400000000002,
	"loss": 0.9027,
	"step": 14760
	},
	{
	"epoch": 0.23648,
	"grad_norm": 0.2560880184173584,
	"learning_rate": 0.00015272,
	"loss": 0.9153,
	"step": 14780
	},
	{
	"epoch": 0.2368,
	"grad_norm": 0.2266043722629547,
	"learning_rate": 0.000152656,
	"loss": 0.915,
	"step": 14800
	},
	{
	"epoch": 0.23712,
	"grad_norm": 0.21880818903446198,
	"learning_rate": 0.000152592,
	"loss": 0.8509,
	"step": 14820
	},
	{
	"epoch": 0.23744,
	"grad_norm": 0.2733529806137085,
	"learning_rate": 0.000152528,
	"loss": 0.8412,
	"step": 14840
	},
	{
	"epoch": 0.23776,
	"grad_norm": 0.2371928095817566,
	"learning_rate": 0.000152464,
	"loss": 0.9024,
	"step": 14860
	},
	{
	"epoch": 0.23808,
	"grad_norm": 0.21131671965122223,
	"learning_rate": 0.00015240000000000002,
	"loss": 0.9014,
	"step": 14880
	},
	{
	"epoch": 0.2384,
	"grad_norm": 0.22599981725215912,
	"learning_rate": 0.000152336,
	"loss": 0.8418,
	"step": 14900
	},
	{
	"epoch": 0.23872,
	"grad_norm": 0.210512176156044,
	"learning_rate": 0.000152272,
	"loss": 0.8215,
	"step": 14920
	},
	{
	"epoch": 0.23904,
	"grad_norm": 0.24387352168560028,
	"learning_rate": 0.00015220800000000003,
	"loss": 0.9528,
	"step": 14940
	},
	{
	"epoch": 0.23936,
	"grad_norm": 0.23596692085266113,
	"learning_rate": 0.000152144,
	"loss": 0.9321,
	"step": 14960
	},
	{
	"epoch": 0.23968,
	"grad_norm": 0.2662867307662964,
	"learning_rate": 0.00015208,
	"loss": 0.8687,
	"step": 14980
	},
	{
	"epoch": 0.24,
	"grad_norm": 0.27276721596717834,
	"learning_rate": 0.00015201600000000002,
	"loss": 0.8885,
	"step": 15000
	},
	{
	"epoch": 0.24032,
	"grad_norm": 0.2904922366142273,
	"learning_rate": 0.000151952,
	"loss": 0.905,
	"step": 15020
	},
	{
	"epoch": 0.24064,
	"grad_norm": 0.22744856774806976,
	"learning_rate": 0.000151888,
	"loss": 0.875,
	"step": 15040
	},
	{
	"epoch": 0.24096,
	"grad_norm": 0.21145053207874298,
	"learning_rate": 0.000151824,
	"loss": 0.9173,
	"step": 15060
	},
	{
	"epoch": 0.24128,
	"grad_norm": 0.2397310584783554,
	"learning_rate": 0.00015176000000000002,
	"loss": 0.9416,
	"step": 15080
	},
	{
	"epoch": 0.2416,
	"grad_norm": 0.255487322807312,
	"learning_rate": 0.00015169600000000002,
	"loss": 0.9074,
	"step": 15100
	},
	{
	"epoch": 0.24192,
	"grad_norm": 0.20825912058353424,
	"learning_rate": 0.000151632,
	"loss": 0.9081,
	"step": 15120
	},
	{
	"epoch": 0.24224,
	"grad_norm": 0.21789099276065826,
	"learning_rate": 0.000151568,
	"loss": 0.8599,
	"step": 15140
	},
	{
	"epoch": 0.24256,
	"grad_norm": 0.26202690601348877,
	"learning_rate": 0.000151504,
	"loss": 0.9323,
	"step": 15160
	},
	{
	"epoch": 0.24288,
	"grad_norm": 0.24351023137569427,
	"learning_rate": 0.00015144,
	"loss": 0.8613,
	"step": 15180
	},
	{
	"epoch": 0.2432,
	"grad_norm": 0.24698816239833832,
	"learning_rate": 0.000151376,
	"loss": 0.9229,
	"step": 15200
	},
	{
	"epoch": 0.24352,
	"grad_norm": 0.28698813915252686,
	"learning_rate": 0.00015131200000000002,
	"loss": 0.902,
	"step": 15220
	},
	{
	"epoch": 0.24384,
	"grad_norm": 0.2190207839012146,
	"learning_rate": 0.000151248,
	"loss": 0.8347,
	"step": 15240
	},
	{
	"epoch": 0.24416,
	"grad_norm": 0.25162091851234436,
	"learning_rate": 0.000151184,
	"loss": 0.9738,
	"step": 15260
	},
	{
	"epoch": 0.24448,
	"grad_norm": 0.22512441873550415,
	"learning_rate": 0.00015112000000000003,
	"loss": 0.8972,
	"step": 15280
	},
	{
	"epoch": 0.2448,
	"grad_norm": 0.2120593637228012,
	"learning_rate": 0.000151056,
	"loss": 0.9371,
	"step": 15300
	},
	{
	"epoch": 0.24512,
	"grad_norm": 0.26042282581329346,
	"learning_rate": 0.000150992,
	"loss": 0.9792,
	"step": 15320
	},
	{
	"epoch": 0.24544,
	"grad_norm": 0.23215824365615845,
	"learning_rate": 0.00015092800000000002,
	"loss": 0.8731,
	"step": 15340
	},
	{
	"epoch": 0.24576,
	"grad_norm": 0.23564760386943817,
	"learning_rate": 0.000150864,
	"loss": 0.8924,
	"step": 15360
	},
	{
	"epoch": 0.24608,
	"grad_norm": 0.234059140086174,
	"learning_rate": 0.0001508,
	"loss": 0.8987,
	"step": 15380
	},
	{
	"epoch": 0.2464,
	"grad_norm": 0.24413174390792847,
	"learning_rate": 0.000150736,
	"loss": 0.8814,
	"step": 15400
	},
	{
	"epoch": 0.24672,
	"grad_norm": 0.19461284577846527,
	"learning_rate": 0.00015067200000000002,
	"loss": 0.8885,
	"step": 15420
	},
	{
	"epoch": 0.24704,
	"grad_norm": 0.21257640421390533,
	"learning_rate": 0.00015060800000000002,
	"loss": 0.8726,
	"step": 15440
	},
	{
	"epoch": 0.24736,
	"grad_norm": 0.19599197804927826,
	"learning_rate": 0.000150544,
	"loss": 0.8769,
	"step": 15460
	},
	{
	"epoch": 0.24768,
	"grad_norm": 0.2362959086894989,
	"learning_rate": 0.00015048,
	"loss": 0.9387,
	"step": 15480
	},
	{
	"epoch": 0.248,
	"grad_norm": 0.22756963968276978,
	"learning_rate": 0.000150416,
	"loss": 0.8997,
	"step": 15500
	},
	{
	"epoch": 0.24832,
	"grad_norm": 0.20421338081359863,
	"learning_rate": 0.000150352,
	"loss": 0.8393,
	"step": 15520
	},
	{
	"epoch": 0.24864,
	"grad_norm": 0.22076188027858734,
	"learning_rate": 0.000150288,
	"loss": 0.9485,
	"step": 15540
	},
	{
	"epoch": 0.24896,
	"grad_norm": 0.2736372649669647,
	"learning_rate": 0.00015022400000000002,
	"loss": 0.9198,
	"step": 15560
	},
	{
	"epoch": 0.24928,
	"grad_norm": 0.21683241426944733,
	"learning_rate": 0.00015016,
	"loss": 0.896,
	"step": 15580
	},
	{
	"epoch": 0.2496,
	"grad_norm": 0.2821408212184906,
	"learning_rate": 0.000150096,
	"loss": 0.9136,
	"step": 15600
	},
	{
	"epoch": 0.24992,
	"grad_norm": 0.2574373781681061,
	"learning_rate": 0.00015003200000000003,
	"loss": 0.867,
	"step": 15620
	},
	{
	"epoch": 0.25024,
	"grad_norm": 0.25345656275749207,
	"learning_rate": 0.000149968,
	"loss": 0.8558,
	"step": 15640
	},
	{
	"epoch": 0.25056,
	"grad_norm": 0.22354301810264587,
	"learning_rate": 0.000149904,
	"loss": 0.8877,
	"step": 15660
	},
	{
	"epoch": 0.25088,
	"grad_norm": 0.24617154896259308,
	"learning_rate": 0.00014984000000000002,
	"loss": 0.9129,
	"step": 15680
	},
	{
	"epoch": 0.2512,
	"grad_norm": 0.2457919418811798,
	"learning_rate": 0.000149776,
	"loss": 0.8808,
	"step": 15700
	},
	{
	"epoch": 0.25152,
	"grad_norm": 0.24559831619262695,
	"learning_rate": 0.000149712,
	"loss": 0.8843,
	"step": 15720
	},
	{
	"epoch": 0.25184,
	"grad_norm": 0.23059086501598358,
	"learning_rate": 0.0001496512,
	"loss": 0.8913,
	"step": 15740
	},
	{
	"epoch": 0.25216,
	"grad_norm": 0.23776483535766602,
	"learning_rate": 0.0001495872,
	"loss": 0.8903,
	"step": 15760
	},
	{
	"epoch": 0.25248,
	"grad_norm": 0.22387710213661194,
	"learning_rate": 0.0001495232,
	"loss": 0.8357,
	"step": 15780
	},
	{
	"epoch": 0.2528,
	"grad_norm": 0.25012654066085815,
	"learning_rate": 0.0001494592,
	"loss": 0.9193,
	"step": 15800
	},
	{
	"epoch": 0.25312,
	"grad_norm": 0.24608831107616425,
	"learning_rate": 0.0001493952,
	"loss": 0.9439,
	"step": 15820
	},
	{
	"epoch": 0.25344,
	"grad_norm": 0.2951606512069702,
	"learning_rate": 0.0001493312,
	"loss": 0.9266,
	"step": 15840
	},
	{
	"epoch": 0.25376,
	"grad_norm": 0.2593064606189728,
	"learning_rate": 0.0001492672,
	"loss": 0.8889,
	"step": 15860
	},
	{
	"epoch": 0.25408,
	"grad_norm": 0.26488035917282104,
	"learning_rate": 0.0001492032,
	"loss": 0.8961,
	"step": 15880
	},
	{
	"epoch": 0.2544,
	"grad_norm": 0.23882393538951874,
	"learning_rate": 0.00014913920000000002,
	"loss": 0.8546,
	"step": 15900
	},
	{
	"epoch": 0.25472,
	"grad_norm": 0.22206124663352966,
	"learning_rate": 0.00014907520000000002,
	"loss": 0.8872,
	"step": 15920
	},
	{
	"epoch": 0.25504,
	"grad_norm": 0.2286020815372467,
	"learning_rate": 0.00014901120000000001,
	"loss": 0.8716,
	"step": 15940
	},
	{
	"epoch": 0.25536,
	"grad_norm": 0.2474469095468521,
	"learning_rate": 0.0001489472,
	"loss": 0.876,
	"step": 15960
	},
	{
	"epoch": 0.25568,
	"grad_norm": 0.24693526327610016,
	"learning_rate": 0.0001488832,
	"loss": 0.8774,
	"step": 15980
	},
	{
	"epoch": 0.256,
	"grad_norm": 0.25829070806503296,
	"learning_rate": 0.0001488192,
	"loss": 0.9659,
	"step": 16000
	},
	{
	"epoch": 0.25632,
	"grad_norm": 0.19258326292037964,
	"learning_rate": 0.0001487552,
	"loss": 0.8895,
	"step": 16020
	},
	{
	"epoch": 0.25664,
	"grad_norm": 0.22530287504196167,
	"learning_rate": 0.00014869120000000002,
	"loss": 0.8625,
	"step": 16040
	},
	{
	"epoch": 0.25696,
	"grad_norm": 0.23685386776924133,
	"learning_rate": 0.0001486272,
	"loss": 0.8744,
	"step": 16060
	},
	{
	"epoch": 0.25728,
	"grad_norm": 0.2815619111061096,
	"learning_rate": 0.0001485632,
	"loss": 0.9046,
	"step": 16080
	},
	{
	"epoch": 0.2576,
	"grad_norm": 0.2278144359588623,
	"learning_rate": 0.0001484992,
	"loss": 0.9233,
	"step": 16100
	},
	{
	"epoch": 0.25792,
	"grad_norm": 0.23006929457187653,
	"learning_rate": 0.0001484352,
	"loss": 0.8888,
	"step": 16120
	},
	{
	"epoch": 0.25824,
	"grad_norm": 0.23313170671463013,
	"learning_rate": 0.0001483712,
	"loss": 0.8838,
	"step": 16140
	},
	{
	"epoch": 0.25856,
	"grad_norm": 0.1918276846408844,
	"learning_rate": 0.0001483072,
	"loss": 0.9102,
	"step": 16160
	},
	{
	"epoch": 0.25888,
	"grad_norm": 0.22128306329250336,
	"learning_rate": 0.0001482432,
	"loss": 0.9111,
	"step": 16180
	},
	{
	"epoch": 0.2592,
	"grad_norm": 0.2663705348968506,
	"learning_rate": 0.0001481792,
	"loss": 0.8658,
	"step": 16200
	},
	{
	"epoch": 0.25952,
	"grad_norm": 0.2246493250131607,
	"learning_rate": 0.0001481152,
	"loss": 0.9076,
	"step": 16220
	},
	{
	"epoch": 0.25984,
	"grad_norm": 0.276429146528244,
	"learning_rate": 0.00014805120000000002,
	"loss": 0.893,
	"step": 16240
	},
	{
	"epoch": 0.26016,
	"grad_norm": 0.25722208619117737,
	"learning_rate": 0.00014798720000000002,
	"loss": 0.9124,
	"step": 16260
	},
	{
	"epoch": 0.26048,
	"grad_norm": 0.2293781042098999,
	"learning_rate": 0.00014792320000000001,
	"loss": 0.9055,
	"step": 16280
	},
	{
	"epoch": 0.2608,
	"grad_norm": 0.2425844967365265,
	"learning_rate": 0.0001478592,
	"loss": 0.9552,
	"step": 16300
	},
	{
	"epoch": 0.26112,
	"grad_norm": 0.21894103288650513,
	"learning_rate": 0.0001477952,
	"loss": 0.9094,
	"step": 16320
	},
	{
	"epoch": 0.26144,
	"grad_norm": 0.18762537837028503,
	"learning_rate": 0.0001477312,
	"loss": 0.864,
	"step": 16340
	},
	{
	"epoch": 0.26176,
	"grad_norm": 0.24802207946777344,
	"learning_rate": 0.0001476672,
	"loss": 0.94,
	"step": 16360
	},
	{
	"epoch": 0.26208,
	"grad_norm": 0.2168876677751541,
	"learning_rate": 0.00014760320000000002,
	"loss": 0.8795,
	"step": 16380
	},
	{
	"epoch": 0.2624,
	"grad_norm": 0.21225641667842865,
	"learning_rate": 0.0001475392,
	"loss": 0.8783,
	"step": 16400
	},
	{
	"epoch": 0.26272,
	"grad_norm": 0.22440536320209503,
	"learning_rate": 0.0001474752,
	"loss": 0.8984,
	"step": 16420
	},
	{
	"epoch": 0.26304,
	"grad_norm": 0.21807946264743805,
	"learning_rate": 0.0001474112,
	"loss": 0.8949,
	"step": 16440
	},
	{
	"epoch": 0.26336,
	"grad_norm": 0.27320024371147156,
	"learning_rate": 0.0001473472,
	"loss": 0.9225,
	"step": 16460
	},
	{
	"epoch": 0.26368,
	"grad_norm": 0.2062409669160843,
	"learning_rate": 0.0001472832,
	"loss": 0.8705,
	"step": 16480
	},
	{
	"epoch": 0.264,
	"grad_norm": 0.2158362716436386,
	"learning_rate": 0.00014721920000000002,
	"loss": 0.8971,
	"step": 16500
	},
	{
	"epoch": 0.26432,
	"grad_norm": 0.38786885142326355,
	"learning_rate": 0.0001471552,
	"loss": 0.9119,
	"step": 16520
	},
	{
	"epoch": 0.26464,
	"grad_norm": 0.17009785771369934,
	"learning_rate": 0.0001470912,
	"loss": 0.8909,
	"step": 16540
	},
	{
	"epoch": 0.26496,
	"grad_norm": 0.1993030607700348,
	"learning_rate": 0.0001470272,
	"loss": 0.8962,
	"step": 16560
	},
	{
	"epoch": 0.26528,
	"grad_norm": 0.20363526046276093,
	"learning_rate": 0.00014696320000000002,
	"loss": 0.8829,
	"step": 16580
	},
	{
	"epoch": 0.2656,
	"grad_norm": 0.21756501495838165,
	"learning_rate": 0.00014689920000000002,
	"loss": 0.8796,
	"step": 16600
	},
	{
	"epoch": 0.26592,
	"grad_norm": 0.23114700615406036,
	"learning_rate": 0.0001468352,
	"loss": 0.8857,
	"step": 16620
	},
	{
	"epoch": 0.26624,
	"grad_norm": 0.2643290162086487,
	"learning_rate": 0.0001467712,
	"loss": 0.8865,
	"step": 16640
	},
	{
	"epoch": 0.26656,
	"grad_norm": 0.2651253640651703,
	"learning_rate": 0.0001467072,
	"loss": 0.9395,
	"step": 16660
	},
	{
	"epoch": 0.26688,
	"grad_norm": 0.2604687511920929,
	"learning_rate": 0.0001466432,
	"loss": 0.9293,
	"step": 16680
	},
	{
	"epoch": 0.2672,
	"grad_norm": 0.26383012533187866,
	"learning_rate": 0.0001465792,
	"loss": 0.9427,
	"step": 16700
	},
	{
	"epoch": 0.26752,
	"grad_norm": 0.21108990907669067,
	"learning_rate": 0.00014651520000000002,
	"loss": 0.878,
	"step": 16720
	},
	{
	"epoch": 0.26784,
	"grad_norm": 0.19841574132442474,
	"learning_rate": 0.0001464512,
	"loss": 0.9178,
	"step": 16740
	},
	{
	"epoch": 0.26816,
	"grad_norm": 0.18484389781951904,
	"learning_rate": 0.0001463872,
	"loss": 0.8609,
	"step": 16760
	},
	{
	"epoch": 0.26848,
	"grad_norm": 0.22383546829223633,
	"learning_rate": 0.00014632320000000003,
	"loss": 0.8582,
	"step": 16780
	},
	{
	"epoch": 0.2688,
	"grad_norm": 0.23623542487621307,
	"learning_rate": 0.0001462592,
	"loss": 0.9065,
	"step": 16800
	},
	{
	"epoch": 0.26912,
	"grad_norm": 0.20453502237796783,
	"learning_rate": 0.0001461952,
	"loss": 0.873,
	"step": 16820
	},
	{
	"epoch": 0.26944,
	"grad_norm": 0.2092786431312561,
	"learning_rate": 0.00014613120000000001,
	"loss": 0.8592,
	"step": 16840
	},
	{
	"epoch": 0.26976,
	"grad_norm": 0.35000941157341003,
	"learning_rate": 0.0001460672,
	"loss": 0.9135,
	"step": 16860
	},
	{
	"epoch": 0.27008,
	"grad_norm": 0.21759675443172455,
	"learning_rate": 0.0001460032,
	"loss": 0.9318,
	"step": 16880
	},
	{
	"epoch": 0.2704,
	"grad_norm": 0.21129532158374786,
	"learning_rate": 0.0001459392,
	"loss": 0.8726,
	"step": 16900
	},
	{
	"epoch": 0.27072,
	"grad_norm": 0.2558363974094391,
	"learning_rate": 0.00014587520000000002,
	"loss": 0.8606,
	"step": 16920
	},
	{
	"epoch": 0.27104,
	"grad_norm": 0.24021393060684204,
	"learning_rate": 0.00014581120000000002,
	"loss": 0.9545,
	"step": 16940
	},
	{
	"epoch": 0.27136,
	"grad_norm": 0.22521884739398956,
	"learning_rate": 0.0001457472,
	"loss": 0.8983,
	"step": 16960
	},
	{
	"epoch": 0.27168,
	"grad_norm": 0.19116589426994324,
	"learning_rate": 0.0001456832,
	"loss": 0.8628,
	"step": 16980
	},
	{
	"epoch": 0.272,
	"grad_norm": 0.25496795773506165,
	"learning_rate": 0.0001456192,
	"loss": 0.8648,
	"step": 17000
	},
	{
	"epoch": 0.27232,
	"grad_norm": 0.21997754275798798,
	"learning_rate": 0.0001455552,
	"loss": 0.8864,
	"step": 17020
	},
	{
	"epoch": 0.27264,
	"grad_norm": 0.2255709022283554,
	"learning_rate": 0.0001454912,
	"loss": 0.8818,
	"step": 17040
	},
	{
	"epoch": 0.27296,
	"grad_norm": 0.2319864183664322,
	"learning_rate": 0.00014542720000000002,
	"loss": 0.9234,
	"step": 17060
	},
	{
	"epoch": 0.27328,
	"grad_norm": 0.25236740708351135,
	"learning_rate": 0.0001453632,
	"loss": 0.9205,
	"step": 17080
	},
	{
	"epoch": 0.2736,
	"grad_norm": 0.25668826699256897,
	"learning_rate": 0.0001452992,
	"loss": 0.8697,
	"step": 17100
	},
	{
	"epoch": 0.27392,
	"grad_norm": 0.23997314274311066,
	"learning_rate": 0.00014523520000000003,
	"loss": 0.8752,
	"step": 17120
	},
	{
	"epoch": 0.27424,
	"grad_norm": 0.26037994027137756,
	"learning_rate": 0.0001451712,
	"loss": 0.8955,
	"step": 17140
	},
	{
	"epoch": 0.27456,
	"grad_norm": 0.2793257534503937,
	"learning_rate": 0.0001451072,
	"loss": 0.9114,
	"step": 17160
	},
	{
	"epoch": 0.27488,
	"grad_norm": 0.2247275561094284,
	"learning_rate": 0.00014504320000000001,
	"loss": 0.8595,
	"step": 17180
	},
	{
	"epoch": 0.2752,
	"grad_norm": 0.17990809679031372,
	"learning_rate": 0.0001449792,
	"loss": 0.8705,
	"step": 17200
	},
	{
	"epoch": 0.27552,
	"grad_norm": 0.25493231415748596,
	"learning_rate": 0.0001449152,
	"loss": 0.8974,
	"step": 17220
	},
	{
	"epoch": 0.27584,
	"grad_norm": 0.2407209277153015,
	"learning_rate": 0.0001448512,
	"loss": 0.8411,
	"step": 17240
	},
	{
	"epoch": 0.27616,
	"grad_norm": 0.23049895465373993,
	"learning_rate": 0.00014478720000000002,
	"loss": 0.8879,
	"step": 17260
	},
	{
	"epoch": 0.27648,
	"grad_norm": 0.2609173059463501,
	"learning_rate": 0.00014472320000000002,
	"loss": 0.841,
	"step": 17280
	},
	{
	"epoch": 0.2768,
	"grad_norm": 0.25201836228370667,
	"learning_rate": 0.0001446592,
	"loss": 0.9323,
	"step": 17300
	},
	{
	"epoch": 0.27712,
	"grad_norm": 0.2098960429430008,
	"learning_rate": 0.0001445952,
	"loss": 0.9157,
	"step": 17320
	},
	{
	"epoch": 0.27744,
	"grad_norm": 0.20928995311260223,
	"learning_rate": 0.0001445312,
	"loss": 0.9038,
	"step": 17340
	},
	{
	"epoch": 0.27776,
	"grad_norm": 0.27435973286628723,
	"learning_rate": 0.0001444672,
	"loss": 0.9151,
	"step": 17360
	},
	{
	"epoch": 0.27808,
	"grad_norm": 0.21046458184719086,
	"learning_rate": 0.0001444032,
	"loss": 0.916,
	"step": 17380
	},
	{
	"epoch": 0.2784,
	"grad_norm": 0.21415123343467712,
	"learning_rate": 0.00014433920000000002,
	"loss": 0.8068,
	"step": 17400
	},
	{
	"epoch": 0.27872,
	"grad_norm": 0.23788805305957794,
	"learning_rate": 0.0001442752,
	"loss": 0.897,
	"step": 17420
	},
	{
	"epoch": 0.27904,
	"grad_norm": 0.27183711528778076,
	"learning_rate": 0.0001442112,
	"loss": 0.9256,
	"step": 17440
	},
	{
	"epoch": 0.27936,
	"grad_norm": 0.23290027678012848,
	"learning_rate": 0.00014414720000000003,
	"loss": 0.8824,
	"step": 17460
	},
	{
	"epoch": 0.27968,
	"grad_norm": 0.228986918926239,
	"learning_rate": 0.0001440832,
	"loss": 0.8986,
	"step": 17480
	},
	{
	"epoch": 0.28,
	"grad_norm": 0.2455400675535202,
	"learning_rate": 0.0001440192,
	"loss": 0.9284,
	"step": 17500
	},
	{
	"epoch": 0.28032,
	"grad_norm": 0.23576192557811737,
	"learning_rate": 0.00014395520000000001,
	"loss": 0.9038,
	"step": 17520
	},
	{
	"epoch": 0.28064,
	"grad_norm": 0.24957694113254547,
	"learning_rate": 0.0001438912,
	"loss": 0.8735,
	"step": 17540
	},
	{
	"epoch": 0.28096,
	"grad_norm": 0.2399289608001709,
	"learning_rate": 0.0001438272,
	"loss": 0.8819,
	"step": 17560
	},
	{
	"epoch": 0.28128,
	"grad_norm": 0.2208934873342514,
	"learning_rate": 0.0001437664,
	"loss": 0.8615,
	"step": 17580
	},
	{
	"epoch": 0.2816,
	"grad_norm": 0.2641635835170746,
	"learning_rate": 0.0001437024,
	"loss": 0.8801,
	"step": 17600
	},
	{
	"epoch": 0.28192,
	"grad_norm": 0.21068689227104187,
	"learning_rate": 0.00014363840000000002,
	"loss": 0.8928,
	"step": 17620
	},
	{
	"epoch": 0.28224,
	"grad_norm": 0.22611364722251892,
	"learning_rate": 0.0001435744,
	"loss": 0.8997,
	"step": 17640
	},
	{
	"epoch": 0.28256,
	"grad_norm": 0.21559686958789825,
	"learning_rate": 0.0001435104,
	"loss": 0.8757,
	"step": 17660
	},
	{
	"epoch": 0.28288,
	"grad_norm": 0.2164774090051651,
	"learning_rate": 0.0001434464,
	"loss": 0.8621,
	"step": 17680
	},
	{
	"epoch": 0.2832,
	"grad_norm": 0.2710246443748474,
	"learning_rate": 0.0001433824,
	"loss": 0.8788,
	"step": 17700
	},
	{
	"epoch": 0.28352,
	"grad_norm": 0.22402608394622803,
	"learning_rate": 0.0001433184,
	"loss": 0.8655,
	"step": 17720
	},
	{
	"epoch": 0.28384,
	"grad_norm": 0.240563303232193,
	"learning_rate": 0.00014325440000000002,
	"loss": 0.831,
	"step": 17740
	},
	{
	"epoch": 0.28416,
	"grad_norm": 0.2877085208892822,
	"learning_rate": 0.00014319040000000002,
	"loss": 0.8939,
	"step": 17760
	},
	{
	"epoch": 0.28448,
	"grad_norm": 0.21592877805233002,
	"learning_rate": 0.0001431264,
	"loss": 0.8441,
	"step": 17780
	},
	{
	"epoch": 0.2848,
	"grad_norm": 1.138694167137146,
	"learning_rate": 0.0001430624,
	"loss": 0.921,
	"step": 17800
	},
	{
	"epoch": 0.28512,
	"grad_norm": 0.22125540673732758,
	"learning_rate": 0.0001429984,
	"loss": 0.8695,
	"step": 17820
	},
	{
	"epoch": 0.28544,
	"grad_norm": 0.2059083729982376,
	"learning_rate": 0.0001429344,
	"loss": 0.8554,
	"step": 17840
	},
	{
	"epoch": 0.28576,
	"grad_norm": 0.2463064044713974,
	"learning_rate": 0.0001428704,
	"loss": 0.9121,
	"step": 17860
	},
	{
	"epoch": 0.28608,
	"grad_norm": 0.228254035115242,
	"learning_rate": 0.00014280640000000002,
	"loss": 0.8945,
	"step": 17880
	},
	{
	"epoch": 0.2864,
	"grad_norm": 0.23662838339805603,
	"learning_rate": 0.0001427424,
	"loss": 0.9431,
	"step": 17900
	},
	{
	"epoch": 0.28672,
	"grad_norm": 0.22641515731811523,
	"learning_rate": 0.0001426784,
	"loss": 0.9033,
	"step": 17920
	},
	{
	"epoch": 0.28704,
	"grad_norm": 0.23892265558242798,
	"learning_rate": 0.0001426144,
	"loss": 0.8623,
	"step": 17940
	},
	{
	"epoch": 0.28736,
	"grad_norm": 0.2975479066371918,
	"learning_rate": 0.00014255040000000002,
	"loss": 0.8474,
	"step": 17960
	},
	{
	"epoch": 0.28768,
	"grad_norm": 0.23718002438545227,
	"learning_rate": 0.0001424864,
	"loss": 0.9099,
	"step": 17980
	},
	{
	"epoch": 0.288,
	"grad_norm": 0.2706455886363983,
	"learning_rate": 0.0001424224,
	"loss": 0.9357,
	"step": 18000
	},
	{
	"epoch": 0.28832,
	"grad_norm": 0.260484516620636,
	"learning_rate": 0.0001423584,
	"loss": 0.9144,
	"step": 18020
	},
	{
	"epoch": 0.28864,
	"grad_norm": 0.2651614844799042,
	"learning_rate": 0.0001422944,
	"loss": 0.8764,
	"step": 18040
	},
	{
	"epoch": 0.28896,
	"grad_norm": 0.18904343247413635,
	"learning_rate": 0.0001422304,
	"loss": 0.9312,
	"step": 18060
	},
	{
	"epoch": 0.28928,
	"grad_norm": 0.22476926445960999,
	"learning_rate": 0.00014216640000000002,
	"loss": 0.8732,
	"step": 18080
	},
	{
	"epoch": 0.2896,
	"grad_norm": 0.25984928011894226,
	"learning_rate": 0.00014210240000000002,
	"loss": 0.8511,
	"step": 18100
	},
	{
	"epoch": 0.28992,
	"grad_norm": 0.24901899695396423,
	"learning_rate": 0.0001420384,
	"loss": 0.8844,
	"step": 18120
	},
	{
	"epoch": 0.29024,
	"grad_norm": 0.2536833882331848,
	"learning_rate": 0.0001419744,
	"loss": 0.9403,
	"step": 18140
	},
	{
	"epoch": 0.29056,
	"grad_norm": 0.24617129564285278,
	"learning_rate": 0.0001419104,
	"loss": 0.9463,
	"step": 18160
	},
	{
	"epoch": 0.29088,
	"grad_norm": 0.23471851646900177,
	"learning_rate": 0.0001418464,
	"loss": 0.8977,
	"step": 18180
	},
	{
	"epoch": 0.2912,
	"grad_norm": 0.22955600917339325,
	"learning_rate": 0.0001417824,
	"loss": 0.87,
	"step": 18200
	},
	{
	"epoch": 0.29152,
	"grad_norm": 0.24333995580673218,
	"learning_rate": 0.00014171840000000002,
	"loss": 0.8879,
	"step": 18220
	},
	{
	"epoch": 0.29184,
	"grad_norm": 0.20572280883789062,
	"learning_rate": 0.0001416544,
	"loss": 0.8896,
	"step": 18240
	},
	{
	"epoch": 0.29216,
	"grad_norm": 0.26233595609664917,
	"learning_rate": 0.0001415904,
	"loss": 0.9508,
	"step": 18260
	},
	{
	"epoch": 0.29248,
	"grad_norm": 0.25485649704933167,
	"learning_rate": 0.00014152640000000003,
	"loss": 0.8671,
	"step": 18280
	},
	{
	"epoch": 0.2928,
	"grad_norm": 0.2335824966430664,
	"learning_rate": 0.00014146240000000002,
	"loss": 0.8607,
	"step": 18300
	},
	{
	"epoch": 0.29312,
	"grad_norm": 0.23609523475170135,
	"learning_rate": 0.0001413984,
	"loss": 0.8851,
	"step": 18320
	},
	{
	"epoch": 0.29344,
	"grad_norm": 0.23384864628314972,
	"learning_rate": 0.0001413344,
	"loss": 0.8432,
	"step": 18340
	},
	{
	"epoch": 0.29376,
	"grad_norm": 0.20632806420326233,
	"learning_rate": 0.0001412704,
	"loss": 0.8929,
	"step": 18360
	},
	{
	"epoch": 0.29408,
	"grad_norm": 0.23396439850330353,
	"learning_rate": 0.0001412064,
	"loss": 0.9085,
	"step": 18380
	},
	{
	"epoch": 0.2944,
	"grad_norm": 0.22463780641555786,
	"learning_rate": 0.0001411424,
	"loss": 0.882,
	"step": 18400
	},
	{
	"epoch": 0.29472,
	"grad_norm": 0.2543090581893921,
	"learning_rate": 0.00014107840000000002,
	"loss": 0.8529,
	"step": 18420
	},
	{
	"epoch": 0.29504,
	"grad_norm": 0.23137034475803375,
	"learning_rate": 0.00014101440000000002,
	"loss": 0.8763,
	"step": 18440
	},
	{
	"epoch": 0.29536,
	"grad_norm": 0.2112882137298584,
	"learning_rate": 0.0001409504,
	"loss": 0.8764,
	"step": 18460
	},
	{
	"epoch": 0.29568,
	"grad_norm": 0.26310333609580994,
	"learning_rate": 0.0001408864,
	"loss": 0.8922,
	"step": 18480
	},
	{
	"epoch": 0.296,
	"grad_norm": 0.17188023030757904,
	"learning_rate": 0.0001408224,
	"loss": 0.8667,
	"step": 18500
	},
	{
	"epoch": 0.29632,
	"grad_norm": 0.21832036972045898,
	"learning_rate": 0.0001407584,
	"loss": 0.8365,
	"step": 18520
	},
	{
	"epoch": 0.29664,
	"grad_norm": 0.22188645601272583,
	"learning_rate": 0.0001406944,
	"loss": 0.8851,
	"step": 18540
	},
	{
	"epoch": 0.29696,
	"grad_norm": 0.21572217345237732,
	"learning_rate": 0.00014063040000000001,
	"loss": 0.9154,
	"step": 18560
	},
	{
	"epoch": 0.29728,
	"grad_norm": 0.27105310559272766,
	"learning_rate": 0.0001405664,
	"loss": 0.9377,
	"step": 18580
	},
	{
	"epoch": 0.2976,
	"grad_norm": 0.24213473498821259,
	"learning_rate": 0.0001405024,
	"loss": 0.8637,
	"step": 18600
	},
	{
	"epoch": 0.29792,
	"grad_norm": 0.2412949949502945,
	"learning_rate": 0.00014043840000000003,
	"loss": 0.8406,
	"step": 18620
	},
	{
	"epoch": 0.29824,
	"grad_norm": 0.22756318747997284,
	"learning_rate": 0.00014037440000000002,
	"loss": 0.9136,
	"step": 18640
	},
	{
	"epoch": 0.29856,
	"grad_norm": 0.27023327350616455,
	"learning_rate": 0.0001403104,
	"loss": 0.903,
	"step": 18660
	},
	{
	"epoch": 0.29888,
	"grad_norm": 0.2557690739631653,
	"learning_rate": 0.0001402464,
	"loss": 0.91,
	"step": 18680
	},
	{
	"epoch": 0.2992,
	"grad_norm": 0.2063320279121399,
	"learning_rate": 0.0001401824,
	"loss": 0.8563,
	"step": 18700
	},
	{
	"epoch": 0.29952,
	"grad_norm": 0.2544916570186615,
	"learning_rate": 0.0001401184,
	"loss": 0.874,
	"step": 18720
	},
	{
	"epoch": 0.29984,
	"grad_norm": 0.2347075492143631,
	"learning_rate": 0.0001400544,
	"loss": 0.9113,
	"step": 18740
	},
	{
	"epoch": 0.30016,
	"grad_norm": 0.22835484147071838,
	"learning_rate": 0.00013999040000000002,
	"loss": 0.9441,
	"step": 18760
	},
	{
	"epoch": 0.30048,
	"grad_norm": 0.2611948847770691,
	"learning_rate": 0.00013992640000000002,
	"loss": 0.8588,
	"step": 18780
	},
	{
	"epoch": 0.3008,
	"grad_norm": 0.2237863838672638,
	"learning_rate": 0.0001398624,
	"loss": 0.8968,
	"step": 18800
	},
	{
	"epoch": 0.30112,
	"grad_norm": 0.25444677472114563,
	"learning_rate": 0.0001397984,
	"loss": 0.8699,
	"step": 18820
	},
	{
	"epoch": 0.30144,
	"grad_norm": 0.22238390147686005,
	"learning_rate": 0.0001397344,
	"loss": 0.8958,
	"step": 18840
	},
	{
	"epoch": 0.30176,
	"grad_norm": 0.2588092088699341,
	"learning_rate": 0.0001396704,
	"loss": 0.8379,
	"step": 18860
	},
	{
	"epoch": 0.30208,
	"grad_norm": 0.32045722007751465,
	"learning_rate": 0.0001396064,
	"loss": 0.8985,
	"step": 18880
	},
	{
	"epoch": 0.3024,
	"grad_norm": 0.277260959148407,
	"learning_rate": 0.00013954240000000001,
	"loss": 0.9429,
	"step": 18900
	},
	{
	"epoch": 0.30272,
	"grad_norm": 0.21729040145874023,
	"learning_rate": 0.0001394784,
	"loss": 0.8797,
	"step": 18920
	},
	{
	"epoch": 0.30304,
	"grad_norm": 0.21643051505088806,
	"learning_rate": 0.0001394144,
	"loss": 0.8601,
	"step": 18940
	},
	{
	"epoch": 0.30336,
	"grad_norm": 0.2492791712284088,
	"learning_rate": 0.00013935040000000003,
	"loss": 0.8796,
	"step": 18960
	},
	{
	"epoch": 0.30368,
	"grad_norm": 0.22013212740421295,
	"learning_rate": 0.00013928640000000002,
	"loss": 0.8746,
	"step": 18980
	},
	{
	"epoch": 0.304,
	"grad_norm": 0.25737157464027405,
	"learning_rate": 0.0001392224,
	"loss": 0.9122,
	"step": 19000
	},
	{
	"epoch": 0.30432,
	"grad_norm": 0.2505769729614258,
	"learning_rate": 0.0001391584,
	"loss": 0.9256,
	"step": 19020
	},
	{
	"epoch": 0.30464,
	"grad_norm": 0.23468899726867676,
	"learning_rate": 0.0001390944,
	"loss": 0.9104,
	"step": 19040
	},
	{
	"epoch": 0.30496,
	"grad_norm": 0.2267698347568512,
	"learning_rate": 0.0001390304,
	"loss": 0.9157,
	"step": 19060
	},
	{
	"epoch": 0.30528,
	"grad_norm": 0.25038978457450867,
	"learning_rate": 0.0001389664,
	"loss": 0.9413,
	"step": 19080
	},
	{
	"epoch": 0.3056,
	"grad_norm": 0.2678689658641815,
	"learning_rate": 0.00013890240000000002,
	"loss": 0.879,
	"step": 19100
	},
	{
	"epoch": 0.30592,
	"grad_norm": 0.2314527928829193,
	"learning_rate": 0.00013883840000000002,
	"loss": 0.8693,
	"step": 19120
	},
	{
	"epoch": 0.30624,
	"grad_norm": 0.2365267276763916,
	"learning_rate": 0.0001387744,
	"loss": 0.8498,
	"step": 19140
	},
	{
	"epoch": 0.30656,
	"grad_norm": 0.24371282756328583,
	"learning_rate": 0.0001387104,
	"loss": 0.8604,
	"step": 19160
	},
	{
	"epoch": 0.30688,
	"grad_norm": 0.3142736554145813,
	"learning_rate": 0.0001386464,
	"loss": 0.8808,
	"step": 19180
	},
	{
	"epoch": 0.3072,
	"grad_norm": 0.29017287492752075,
	"learning_rate": 0.0001385824,
	"loss": 0.9096,
	"step": 19200
	},
	{
	"epoch": 0.30752,
	"grad_norm": 0.26269444823265076,
	"learning_rate": 0.0001385184,
	"loss": 0.8378,
	"step": 19220
	},
	{
	"epoch": 0.30784,
	"grad_norm": 0.22420069575309753,
	"learning_rate": 0.00013845440000000001,
	"loss": 0.8637,
	"step": 19240
	},
	{
	"epoch": 0.30816,
	"grad_norm": 0.237029567360878,
	"learning_rate": 0.0001383904,
	"loss": 0.9015,
	"step": 19260
	},
	{
	"epoch": 0.30848,
	"grad_norm": 0.2280898541212082,
	"learning_rate": 0.0001383264,
	"loss": 0.888,
	"step": 19280
	},
	{
	"epoch": 0.3088,
	"grad_norm": 0.27057039737701416,
	"learning_rate": 0.00013826240000000003,
	"loss": 0.8871,
	"step": 19300
	},
	{
	"epoch": 0.30912,
	"grad_norm": 0.20367485284805298,
	"learning_rate": 0.00013819840000000002,
	"loss": 0.8596,
	"step": 19320
	},
	{
	"epoch": 0.30944,
	"grad_norm": 0.2941890060901642,
	"learning_rate": 0.0001381344,
	"loss": 0.9363,
	"step": 19340
	},
	{
	"epoch": 0.30976,
	"grad_norm": 0.2736791968345642,
	"learning_rate": 0.0001380704,
	"loss": 0.8547,
	"step": 19360
	},
	{
	"epoch": 0.31008,
	"grad_norm": 0.26755064725875854,
	"learning_rate": 0.0001380064,
	"loss": 0.8846,
	"step": 19380
	},
	{
	"epoch": 0.3104,
	"grad_norm": 0.2217639535665512,
	"learning_rate": 0.0001379424,
	"loss": 0.9354,
	"step": 19400
	},
	{
	"epoch": 0.31072,
	"grad_norm": 0.26769986748695374,
	"learning_rate": 0.0001378784,
	"loss": 0.8951,
	"step": 19420
	},
	{
	"epoch": 0.31104,
	"grad_norm": 0.2696346938610077,
	"learning_rate": 0.00013781440000000002,
	"loss": 0.886,
	"step": 19440
	},
	{
	"epoch": 0.31136,
	"grad_norm": 0.24080106616020203,
	"learning_rate": 0.00013775040000000002,
	"loss": 0.8534,
	"step": 19460
	},
	{
	"epoch": 0.31168,
	"grad_norm": 0.2640572786331177,
	"learning_rate": 0.0001376864,
	"loss": 0.9043,
	"step": 19480
	},
	{
	"epoch": 0.312,
	"grad_norm": 0.22785134613513947,
	"learning_rate": 0.0001376224,
	"loss": 0.9025,
	"step": 19500
	},
	{
	"epoch": 0.31232,
	"grad_norm": 0.25118863582611084,
	"learning_rate": 0.0001375584,
	"loss": 0.9085,
	"step": 19520
	},
	{
	"epoch": 0.31264,
	"grad_norm": 0.25034481287002563,
	"learning_rate": 0.0001374944,
	"loss": 0.9055,
	"step": 19540
	},
	{
	"epoch": 0.31296,
	"grad_norm": 0.2614835798740387,
	"learning_rate": 0.0001374304,
	"loss": 0.8826,
	"step": 19560
	},
	{
	"epoch": 0.31328,
	"grad_norm": 0.24192634224891663,
	"learning_rate": 0.0001373664,
	"loss": 0.8654,
	"step": 19580
	},
	{
	"epoch": 0.3136,
	"grad_norm": 0.21241247653961182,
	"learning_rate": 0.0001373024,
	"loss": 0.8953,
	"step": 19600
	},
	{
	"epoch": 0.31392,
	"grad_norm": 0.2508241832256317,
	"learning_rate": 0.0001372384,
	"loss": 0.8859,
	"step": 19620
	},
	{
	"epoch": 0.31424,
	"grad_norm": 0.21773919463157654,
	"learning_rate": 0.00013717440000000003,
	"loss": 0.9142,
	"step": 19640
	},
	{
	"epoch": 0.31456,
	"grad_norm": 0.24207216501235962,
	"learning_rate": 0.00013711040000000002,
	"loss": 0.9151,
	"step": 19660
	},
	{
	"epoch": 0.31488,
	"grad_norm": 0.2574104368686676,
	"learning_rate": 0.0001370464,
	"loss": 0.9325,
	"step": 19680
	},
	{
	"epoch": 0.3152,
	"grad_norm": 0.27459943294525146,
	"learning_rate": 0.0001369824,
	"loss": 0.9121,
	"step": 19700
	},
	{
	"epoch": 0.31552,
	"grad_norm": 0.29292821884155273,
	"learning_rate": 0.0001369184,
	"loss": 0.8728,
	"step": 19720
	},
	{
	"epoch": 0.31584,
	"grad_norm": 0.24154408276081085,
	"learning_rate": 0.0001368544,
	"loss": 0.9036,
	"step": 19740
	},
	{
	"epoch": 0.31616,
	"grad_norm": 0.24163678288459778,
	"learning_rate": 0.0001367904,
	"loss": 0.894,
	"step": 19760
	},
	{
	"epoch": 0.31648,
	"grad_norm": 0.24694600701332092,
	"learning_rate": 0.00013672640000000002,
	"loss": 0.8377,
	"step": 19780
	},
	{
	"epoch": 0.3168,
	"grad_norm": 0.24963949620723724,
	"learning_rate": 0.00013666240000000001,
	"loss": 0.8749,
	"step": 19800
	},
	{
	"epoch": 0.31712,
	"grad_norm": 0.22724537551403046,
	"learning_rate": 0.0001365984,
	"loss": 0.8355,
	"step": 19820
	},
	{
	"epoch": 0.31744,
	"grad_norm": 0.25323203206062317,
	"learning_rate": 0.0001365344,
	"loss": 0.8674,
	"step": 19840
	},
	{
	"epoch": 0.31776,
	"grad_norm": 0.23880919814109802,
	"learning_rate": 0.0001364704,
	"loss": 0.8944,
	"step": 19860
	},
	{
	"epoch": 0.31808,
	"grad_norm": 0.27053314447402954,
	"learning_rate": 0.0001364064,
	"loss": 0.9309,
	"step": 19880
	},
	{
	"epoch": 0.3184,
	"grad_norm": 0.23681063950061798,
	"learning_rate": 0.0001363424,
	"loss": 0.9299,
	"step": 19900
	},
	{
	"epoch": 0.31872,
	"grad_norm": 0.23930998146533966,
	"learning_rate": 0.0001362784,
	"loss": 0.87,
	"step": 19920
	},
	{
	"epoch": 0.31904,
	"grad_norm": 0.2663067877292633,
	"learning_rate": 0.0001362144,
	"loss": 0.8916,
	"step": 19940
	},
	{
	"epoch": 0.31936,
	"grad_norm": 0.24097581207752228,
	"learning_rate": 0.0001361504,
	"loss": 0.8894,
	"step": 19960
	},
	{
	"epoch": 0.31968,
	"grad_norm": 0.2457173615694046,
	"learning_rate": 0.00013608640000000003,
	"loss": 0.882,
	"step": 19980
	},
	{
	"epoch": 0.32,
	"grad_norm": 0.24961303174495697,
	"learning_rate": 0.00013602240000000002,
	"loss": 0.9111,
	"step": 20000
	},
	{
	"epoch": 0.32032,
	"grad_norm": 0.2094404697418213,
	"learning_rate": 0.0001359584,
	"loss": 0.8509,
	"step": 20020
	},
	{
	"epoch": 0.32064,
	"grad_norm": 0.259491890668869,
	"learning_rate": 0.0001358944,
	"loss": 0.9049,
	"step": 20040
	},
	{
	"epoch": 0.32096,
	"grad_norm": 0.23153385519981384,
	"learning_rate": 0.0001358304,
	"loss": 0.9332,
	"step": 20060
	},
	{
	"epoch": 0.32128,
	"grad_norm": 0.22706463932991028,
	"learning_rate": 0.0001357664,
	"loss": 0.9113,
	"step": 20080
	},
	{
	"epoch": 0.3216,
	"grad_norm": 0.24029311537742615,
	"learning_rate": 0.0001357024,
	"loss": 0.8789,
	"step": 20100
	},
	{
	"epoch": 0.32192,
	"grad_norm": 0.2065390646457672,
	"learning_rate": 0.00013563840000000002,
	"loss": 0.8984,
	"step": 20120
	},
	{
	"epoch": 0.32224,
	"grad_norm": 0.2206522673368454,
	"learning_rate": 0.00013557440000000001,
	"loss": 0.8654,
	"step": 20140
	},
	{
	"epoch": 0.32256,
	"grad_norm": 0.25168558955192566,
	"learning_rate": 0.0001355104,
	"loss": 0.8857,
	"step": 20160
	},
	{
	"epoch": 0.32288,
	"grad_norm": 0.33041876554489136,
	"learning_rate": 0.0001354464,
	"loss": 0.8478,
	"step": 20180
	},
	{
	"epoch": 0.3232,
	"grad_norm": 0.24307800829410553,
	"learning_rate": 0.0001353824,
	"loss": 0.8422,
	"step": 20200
	},
	{
	"epoch": 0.32352,
	"grad_norm": 0.21959048509597778,
	"learning_rate": 0.0001353184,
	"loss": 0.9439,
	"step": 20220
	},
	{
	"epoch": 0.32384,
	"grad_norm": 0.2573561668395996,
	"learning_rate": 0.0001352544,
	"loss": 0.9243,
	"step": 20240
	},
	{
	"epoch": 0.32416,
	"grad_norm": 0.21520912647247314,
	"learning_rate": 0.0001351904,
	"loss": 0.8948,
	"step": 20260
	},
	{
	"epoch": 0.32448,
	"grad_norm": 0.1714809685945511,
	"learning_rate": 0.0001351264,
	"loss": 0.8826,
	"step": 20280
	},
	{
	"epoch": 0.3248,
	"grad_norm": 0.24606648087501526,
	"learning_rate": 0.0001350624,
	"loss": 0.8686,
	"step": 20300
	},
	{
	"epoch": 0.32512,
	"grad_norm": 0.2979150116443634,
	"learning_rate": 0.00013499840000000003,
	"loss": 0.9369,
	"step": 20320
	},
	{
	"epoch": 0.32544,
	"grad_norm": 0.2468058317899704,
	"learning_rate": 0.00013493440000000002,
	"loss": 0.9155,
	"step": 20340
	},
	{
	"epoch": 0.32576,
	"grad_norm": 0.24256190657615662,
	"learning_rate": 0.0001348704,
	"loss": 0.8497,
	"step": 20360
	},
	{
	"epoch": 0.32608,
	"grad_norm": 0.18389752507209778,
	"learning_rate": 0.0001348064,
	"loss": 0.904,
	"step": 20380
	},
	{
	"epoch": 0.3264,
	"grad_norm": 0.3476732075214386,
	"learning_rate": 0.0001347424,
	"loss": 0.8942,
	"step": 20400
	},
	{
	"epoch": 0.32672,
	"grad_norm": 0.24657464027404785,
	"learning_rate": 0.0001346784,
	"loss": 0.8558,
	"step": 20420
	},
	{
	"epoch": 0.32704,
	"grad_norm": 0.23047630488872528,
	"learning_rate": 0.0001346144,
	"loss": 0.8671,
	"step": 20440
	},
	{
	"epoch": 0.32736,
	"grad_norm": 0.26599422097206116,
	"learning_rate": 0.00013455040000000002,
	"loss": 0.8681,
	"step": 20460
	},
	{
	"epoch": 0.32768,
	"grad_norm": 0.21226702630519867,
	"learning_rate": 0.00013448640000000001,
	"loss": 0.8737,
	"step": 20480
	},
	{
	"epoch": 0.328,
	"grad_norm": 0.24917374551296234,
	"learning_rate": 0.0001344224,
	"loss": 0.8631,
	"step": 20500
	},
	{
	"epoch": 0.32832,
	"grad_norm": 0.23581089079380035,
	"learning_rate": 0.0001343584,
	"loss": 0.8835,
	"step": 20520
	},
	{
	"epoch": 0.32864,
	"grad_norm": 0.20437762141227722,
	"learning_rate": 0.0001342944,
	"loss": 0.8462,
	"step": 20540
	},
	{
	"epoch": 0.32896,
	"grad_norm": 0.2745993733406067,
	"learning_rate": 0.0001342304,
	"loss": 0.8921,
	"step": 20560
	},
	{
	"epoch": 0.32928,
	"grad_norm": 0.2283765822649002,
	"learning_rate": 0.0001341664,
	"loss": 0.9724,
	"step": 20580
	},
	{
	"epoch": 0.3296,
	"grad_norm": 0.23891101777553558,
	"learning_rate": 0.0001341024,
	"loss": 0.8836,
	"step": 20600
	},
	{
	"epoch": 0.32992,
	"grad_norm": 0.29253366589546204,
	"learning_rate": 0.0001340384,
	"loss": 0.8708,
	"step": 20620
	},
	{
	"epoch": 0.33024,
	"grad_norm": 0.21287086606025696,
	"learning_rate": 0.0001339744,
	"loss": 0.8523,
	"step": 20640
	},
	{
	"epoch": 0.33056,
	"grad_norm": 0.25029948353767395,
	"learning_rate": 0.00013391040000000002,
	"loss": 0.93,
	"step": 20660
	},
	{
	"epoch": 0.33088,
	"grad_norm": 0.2445763796567917,
	"learning_rate": 0.00013384640000000002,
	"loss": 0.8435,
	"step": 20680
	},
	{
	"epoch": 0.3312,
	"grad_norm": 0.23780953884124756,
	"learning_rate": 0.0001337824,
	"loss": 0.8524,
	"step": 20700
	},
	{
	"epoch": 0.33152,
	"grad_norm": 0.24498897790908813,
	"learning_rate": 0.0001337184,
	"loss": 0.8806,
	"step": 20720
	},
	{
	"epoch": 0.33184,
	"grad_norm": 0.24283845722675323,
	"learning_rate": 0.0001336544,
	"loss": 0.9041,
	"step": 20740
	},
	{
	"epoch": 0.33216,
	"grad_norm": 0.25686827301979065,
	"learning_rate": 0.0001335904,
	"loss": 0.8836,
	"step": 20760
	},
	{
	"epoch": 0.33248,
	"grad_norm": 0.19892160594463348,
	"learning_rate": 0.0001335264,
	"loss": 0.846,
	"step": 20780
	},
	{
	"epoch": 0.3328,
	"grad_norm": 0.22868989408016205,
	"learning_rate": 0.00013346240000000002,
	"loss": 0.9193,
	"step": 20800
	},
	{
	"epoch": 0.33312,
	"grad_norm": 0.22255489230155945,
	"learning_rate": 0.0001333984,
	"loss": 0.898,
	"step": 20820
	},
	{
	"epoch": 0.33344,
	"grad_norm": 0.20357553660869598,
	"learning_rate": 0.0001333344,
	"loss": 0.8621,
	"step": 20840
	},
	{
	"epoch": 0.33376,
	"grad_norm": 0.21813243627548218,
	"learning_rate": 0.0001332704,
	"loss": 0.8745,
	"step": 20860
	},
	{
	"epoch": 0.33408,
	"grad_norm": 0.2878071069717407,
	"learning_rate": 0.0001332064,
	"loss": 0.8639,
	"step": 20880
	},
	{
	"epoch": 0.3344,
	"grad_norm": 0.2555156946182251,
	"learning_rate": 0.0001331424,
	"loss": 0.8748,
	"step": 20900
	},
	{
	"epoch": 0.33472,
	"grad_norm": 0.23196488618850708,
	"learning_rate": 0.00013307840000000002,
	"loss": 0.8921,
	"step": 20920
	},
	{
	"epoch": 0.33504,
	"grad_norm": 0.24895872175693512,
	"learning_rate": 0.0001330144,
	"loss": 0.9024,
	"step": 20940
	},
	{
	"epoch": 0.33536,
	"grad_norm": 0.26333311200141907,
	"learning_rate": 0.0001329504,
	"loss": 0.9119,
	"step": 20960
	},
	{
	"epoch": 0.33568,
	"grad_norm": 0.23664534091949463,
	"learning_rate": 0.0001328864,
	"loss": 0.8766,
	"step": 20980
	},
	{
	"epoch": 0.336,
	"grad_norm": 0.2326672226190567,
	"learning_rate": 0.00013282240000000002,
	"loss": 0.8263,
	"step": 21000
	},
	{
	"epoch": 0.33632,
	"grad_norm": 0.23202668130397797,
	"learning_rate": 0.00013275840000000002,
	"loss": 0.9158,
	"step": 21020
	},
	{
	"epoch": 0.33664,
	"grad_norm": 0.2137191891670227,
	"learning_rate": 0.0001326944,
	"loss": 0.9182,
	"step": 21040
	},
	{
	"epoch": 0.33696,
	"grad_norm": 0.22848786413669586,
	"learning_rate": 0.0001326304,
	"loss": 0.8647,
	"step": 21060
	},
	{
	"epoch": 0.33728,
	"grad_norm": 0.2685950994491577,
	"learning_rate": 0.0001325664,
	"loss": 0.8808,
	"step": 21080
	},
	{
	"epoch": 0.3376,
	"grad_norm": 0.22779199481010437,
	"learning_rate": 0.0001325024,
	"loss": 0.9204,
	"step": 21100
	},
	{
	"epoch": 0.33792,
	"grad_norm": 0.23398426175117493,
	"learning_rate": 0.0001324384,
	"loss": 0.8765,
	"step": 21120
	},
	{
	"epoch": 0.33824,
	"grad_norm": 0.2746240794658661,
	"learning_rate": 0.00013237440000000002,
	"loss": 0.9218,
	"step": 21140
	},
	{
	"epoch": 0.33856,
	"grad_norm": 0.21966886520385742,
	"learning_rate": 0.0001323104,
	"loss": 0.8135,
	"step": 21160
	},
	{
	"epoch": 0.33888,
	"grad_norm": 0.24707438051700592,
	"learning_rate": 0.0001322464,
	"loss": 0.9269,
	"step": 21180
	},
	{
	"epoch": 0.3392,
	"grad_norm": 0.23954932391643524,
	"learning_rate": 0.00013218240000000003,
	"loss": 0.8461,
	"step": 21200
	},
	{
	"epoch": 0.33952,
	"grad_norm": 0.2673095762729645,
	"learning_rate": 0.0001321184,
	"loss": 0.8173,
	"step": 21220
	},
	{
	"epoch": 0.33984,
	"grad_norm": 0.2744886577129364,
	"learning_rate": 0.0001320544,
	"loss": 0.9024,
	"step": 21240
	},
	{
	"epoch": 0.34016,
	"grad_norm": 0.2099352777004242,
	"learning_rate": 0.00013199040000000002,
	"loss": 0.8744,
	"step": 21260
	},
	{
	"epoch": 0.34048,
	"grad_norm": 0.24516913294792175,
	"learning_rate": 0.0001319264,
	"loss": 0.9172,
	"step": 21280
	},
	{
	"epoch": 0.3408,
	"grad_norm": 0.2523512840270996,
	"learning_rate": 0.0001318624,
	"loss": 0.8656,
	"step": 21300
	},
	{
	"epoch": 0.34112,
	"grad_norm": 0.23334811627864838,
	"learning_rate": 0.0001317984,
	"loss": 0.9486,
	"step": 21320
	},
	{
	"epoch": 0.34144,
	"grad_norm": 0.2607022225856781,
	"learning_rate": 0.00013173440000000002,
	"loss": 0.9262,
	"step": 21340
	},
	{
	"epoch": 0.34176,
	"grad_norm": 0.2326965034008026,
	"learning_rate": 0.00013167040000000002,
	"loss": 0.88,
	"step": 21360
	},
	{
	"epoch": 0.34208,
	"grad_norm": 0.26053765416145325,
	"learning_rate": 0.0001316064,
	"loss": 0.926,
	"step": 21380
	},
	{
	"epoch": 0.3424,
	"grad_norm": 0.24075692892074585,
	"learning_rate": 0.0001315424,
	"loss": 0.8301,
	"step": 21400
	},
	{
	"epoch": 0.34272,
	"grad_norm": 0.2621121108531952,
	"learning_rate": 0.0001314784,
	"loss": 0.8895,
	"step": 21420
	},
	{
	"epoch": 0.34304,
	"grad_norm": 0.26628807187080383,
	"learning_rate": 0.0001314144,
	"loss": 0.8634,
	"step": 21440
	},
	{
	"epoch": 0.34336,
	"grad_norm": 0.301937073469162,
	"learning_rate": 0.0001313504,
	"loss": 0.8876,
	"step": 21460
	},
	{
	"epoch": 0.34368,
	"grad_norm": 0.2092495709657669,
	"learning_rate": 0.00013128640000000002,
	"loss": 0.917,
	"step": 21480
	},
	{
	"epoch": 0.344,
	"grad_norm": 0.2542109191417694,
	"learning_rate": 0.0001312224,
	"loss": 0.8835,
	"step": 21500
	},
	{
	"epoch": 0.34432,
	"grad_norm": 0.3914921283721924,
	"learning_rate": 0.0001311584,
	"loss": 0.8868,
	"step": 21520
	},
	{
	"epoch": 0.34464,
	"grad_norm": 0.23783642053604126,
	"learning_rate": 0.00013109440000000003,
	"loss": 0.9062,
	"step": 21540
	},
	{
	"epoch": 0.34496,
	"grad_norm": 0.22890570759773254,
	"learning_rate": 0.0001310304,
	"loss": 0.914,
	"step": 21560
	},
	{
	"epoch": 0.34528,
	"grad_norm": 0.23722144961357117,
	"learning_rate": 0.0001309664,
	"loss": 0.9393,
	"step": 21580
	},
	{
	"epoch": 0.3456,
	"grad_norm": 0.264466255903244,
	"learning_rate": 0.00013090240000000002,
	"loss": 0.9294,
	"step": 21600
	},
	{
	"epoch": 0.34592,
	"grad_norm": 0.23583443462848663,
	"learning_rate": 0.0001308384,
	"loss": 0.8729,
	"step": 21620
	},
	{
	"epoch": 0.34624,
	"grad_norm": 0.25647860765457153,
	"learning_rate": 0.0001307776,
	"loss": 0.8512,
	"step": 21640
	},
	{
	"epoch": 0.34656,
	"grad_norm": 0.2854239344596863,
	"learning_rate": 0.0001307136,
	"loss": 0.925,
	"step": 21660
	},
	{
	"epoch": 0.34688,
	"grad_norm": 0.26097196340560913,
	"learning_rate": 0.0001306496,
	"loss": 0.9127,
	"step": 21680
	},
	{
	"epoch": 0.3472,
	"grad_norm": 0.20337998867034912,
	"learning_rate": 0.0001305856,
	"loss": 0.9048,
	"step": 21700
	},
	{
	"epoch": 0.34752,
	"grad_norm": 0.2021179497241974,
	"learning_rate": 0.0001305216,
	"loss": 0.9213,
	"step": 21720
	},
	{
	"epoch": 0.34784,
	"grad_norm": 0.2213645875453949,
	"learning_rate": 0.0001304576,
	"loss": 0.8891,
	"step": 21740
	},
	{
	"epoch": 0.34816,
	"grad_norm": 0.2201976180076599,
	"learning_rate": 0.0001303936,
	"loss": 0.8425,
	"step": 21760
	},
	{
	"epoch": 0.34848,
	"grad_norm": 0.21747338771820068,
	"learning_rate": 0.0001303296,
	"loss": 0.9035,
	"step": 21780
	},
	{
	"epoch": 0.3488,
	"grad_norm": 0.23967601358890533,
	"learning_rate": 0.0001302656,
	"loss": 0.9168,
	"step": 21800
	},
	{
	"epoch": 0.34912,
	"grad_norm": 0.2570231854915619,
	"learning_rate": 0.00013020160000000002,
	"loss": 0.8813,
	"step": 21820
	},
	{
	"epoch": 0.34944,
	"grad_norm": 0.24394749104976654,
	"learning_rate": 0.00013013760000000002,
	"loss": 0.884,
	"step": 21840
	},
	{
	"epoch": 0.34976,
	"grad_norm": 0.25966569781303406,
	"learning_rate": 0.0001300736,
	"loss": 0.8981,
	"step": 21860
	},
	{
	"epoch": 0.35008,
	"grad_norm": 0.24895374476909637,
	"learning_rate": 0.0001300096,
	"loss": 0.9225,
	"step": 21880
	},
	{
	"epoch": 0.3504,
	"grad_norm": 0.22816093266010284,
	"learning_rate": 0.0001299456,
	"loss": 0.8729,
	"step": 21900
	},
	{
	"epoch": 0.35072,
	"grad_norm": 0.22073081135749817,
	"learning_rate": 0.0001298816,
	"loss": 0.8505,
	"step": 21920
	},
	{
	"epoch": 0.35104,
	"grad_norm": 0.24581517279148102,
	"learning_rate": 0.0001298176,
	"loss": 0.8963,
	"step": 21940
	},
	{
	"epoch": 0.35136,
	"grad_norm": 0.22976480424404144,
	"learning_rate": 0.00012975360000000002,
	"loss": 0.8694,
	"step": 21960
	},
	{
	"epoch": 0.35168,
	"grad_norm": 0.2811223566532135,
	"learning_rate": 0.0001296896,
	"loss": 0.9338,
	"step": 21980
	},
	{
	"epoch": 0.352,
	"grad_norm": 0.2338571697473526,
	"learning_rate": 0.0001296256,
	"loss": 0.883,
	"step": 22000
	},
	{
	"epoch": 0.35232,
	"grad_norm": 0.2589928209781647,
	"learning_rate": 0.0001295616,
	"loss": 0.8674,
	"step": 22020
	},
	{
	"epoch": 0.35264,
	"grad_norm": 0.2552894949913025,
	"learning_rate": 0.0001294976,
	"loss": 0.9449,
	"step": 22040
	},
	{
	"epoch": 0.35296,
	"grad_norm": 0.2796124219894409,
	"learning_rate": 0.0001294336,
	"loss": 0.9292,
	"step": 22060
	},
	{
	"epoch": 0.35328,
	"grad_norm": 0.25079146027565,
	"learning_rate": 0.0001293696,
	"loss": 0.8741,
	"step": 22080
	},
	{
	"epoch": 0.3536,
	"grad_norm": 0.25996407866477966,
	"learning_rate": 0.0001293056,
	"loss": 0.8642,
	"step": 22100
	},
	{
	"epoch": 0.35392,
	"grad_norm": 0.24768070876598358,
	"learning_rate": 0.0001292416,
	"loss": 0.8876,
	"step": 22120
	},
	{
	"epoch": 0.35424,
	"grad_norm": 0.2748354375362396,
	"learning_rate": 0.0001291776,
	"loss": 0.8686,
	"step": 22140
	},
	{
	"epoch": 0.35456,
	"grad_norm": 0.26020100712776184,
	"learning_rate": 0.00012911360000000002,
	"loss": 0.8978,
	"step": 22160
	},
	{
	"epoch": 0.35488,
	"grad_norm": 0.22740164399147034,
	"learning_rate": 0.00012904960000000002,
	"loss": 0.8806,
	"step": 22180
	},
	{
	"epoch": 0.3552,
	"grad_norm": 0.22539575397968292,
	"learning_rate": 0.0001289856,
	"loss": 0.9586,
	"step": 22200
	},
	{
	"epoch": 0.35552,
	"grad_norm": 0.224917471408844,
	"learning_rate": 0.0001289216,
	"loss": 0.929,
	"step": 22220
	},
	{
	"epoch": 0.35584,
	"grad_norm": 0.2811584174633026,
	"learning_rate": 0.0001288576,
	"loss": 0.8816,
	"step": 22240
	},
	{
	"epoch": 0.35616,
	"grad_norm": 0.2050054520368576,
	"learning_rate": 0.0001287936,
	"loss": 0.886,
	"step": 22260
	},
	{
	"epoch": 0.35648,
	"grad_norm": 0.26352056860923767,
	"learning_rate": 0.0001287296,
	"loss": 0.8631,
	"step": 22280
	},
	{
	"epoch": 0.3568,
	"grad_norm": 0.2189260572195053,
	"learning_rate": 0.00012866560000000002,
	"loss": 0.8886,
	"step": 22300
	},
	{
	"epoch": 0.35712,
	"grad_norm": 0.20122426748275757,
	"learning_rate": 0.0001286016,
	"loss": 0.8899,
	"step": 22320
	},
	{
	"epoch": 0.35744,
	"grad_norm": 0.2483946979045868,
	"learning_rate": 0.0001285376,
	"loss": 0.9277,
	"step": 22340
	},
	{
	"epoch": 0.35776,
	"grad_norm": 0.2784961462020874,
	"learning_rate": 0.0001284736,
	"loss": 0.8697,
	"step": 22360
	},
	{
	"epoch": 0.35808,
	"grad_norm": 0.253579318523407,
	"learning_rate": 0.0001284096,
	"loss": 0.9001,
	"step": 22380
	},
	{
	"epoch": 0.3584,
	"grad_norm": 0.24485689401626587,
	"learning_rate": 0.0001283456,
	"loss": 0.8837,
	"step": 22400
	},
	{
	"epoch": 0.35872,
	"grad_norm": 0.28353968262672424,
	"learning_rate": 0.0001282816,
	"loss": 0.8641,
	"step": 22420
	},
	{
	"epoch": 0.35904,
	"grad_norm": 0.22458963096141815,
	"learning_rate": 0.0001282176,
	"loss": 0.8424,
	"step": 22440
	},
	{
	"epoch": 0.35936,
	"grad_norm": 0.21924658119678497,
	"learning_rate": 0.0001281536,
	"loss": 0.9238,
	"step": 22460
	},
	{
	"epoch": 0.35968,
	"grad_norm": 0.2481856644153595,
	"learning_rate": 0.0001280896,
	"loss": 0.914,
	"step": 22480
	},
	{
	"epoch": 0.36,
	"grad_norm": 0.22141766548156738,
	"learning_rate": 0.00012802560000000002,
	"loss": 0.883,
	"step": 22500
	},
	{
	"epoch": 0.36032,
	"grad_norm": 0.241195410490036,
	"learning_rate": 0.00012796160000000002,
	"loss": 0.8758,
	"step": 22520
	},
	{
	"epoch": 0.36064,
	"grad_norm": 0.21804146468639374,
	"learning_rate": 0.0001278976,
	"loss": 0.892,
	"step": 22540
	},
	{
	"epoch": 0.36096,
	"grad_norm": 0.21555864810943604,
	"learning_rate": 0.0001278336,
	"loss": 0.9042,
	"step": 22560
	},
	{
	"epoch": 0.36128,
	"grad_norm": 0.2855897545814514,
	"learning_rate": 0.0001277696,
	"loss": 0.8698,
	"step": 22580
	},
	{
	"epoch": 0.3616,
	"grad_norm": 0.18174555897712708,
	"learning_rate": 0.0001277056,
	"loss": 0.9076,
	"step": 22600
	},
	{
	"epoch": 0.36192,
	"grad_norm": 0.24485164880752563,
	"learning_rate": 0.0001276416,
	"loss": 0.8773,
	"step": 22620
	},
	{
	"epoch": 0.36224,
	"grad_norm": 0.24945834279060364,
	"learning_rate": 0.00012757760000000002,
	"loss": 0.8947,
	"step": 22640
	},
	{
	"epoch": 0.36256,
	"grad_norm": 0.27107083797454834,
	"learning_rate": 0.0001275136,
	"loss": 0.8908,
	"step": 22660
	},
	{
	"epoch": 0.36288,
	"grad_norm": 0.2725388705730438,
	"learning_rate": 0.0001274496,
	"loss": 0.9117,
	"step": 22680
	},
	{
	"epoch": 0.3632,
	"grad_norm": 0.2100275754928589,
	"learning_rate": 0.00012738560000000003,
	"loss": 0.841,
	"step": 22700
	},
	{
	"epoch": 0.36352,
	"grad_norm": 0.18476144969463348,
	"learning_rate": 0.0001273216,
	"loss": 0.8546,
	"step": 22720
	},
	{
	"epoch": 0.36384,
	"grad_norm": 0.2054418921470642,
	"learning_rate": 0.0001272576,
	"loss": 0.9456,
	"step": 22740
	},
	{
	"epoch": 0.36416,
	"grad_norm": 0.23518037796020508,
	"learning_rate": 0.00012719360000000001,
	"loss": 0.8499,
	"step": 22760
	},
	{
	"epoch": 0.36448,
	"grad_norm": 0.22273370623588562,
	"learning_rate": 0.0001271296,
	"loss": 0.8961,
	"step": 22780
	},
	{
	"epoch": 0.3648,
	"grad_norm": 0.2261369377374649,
	"learning_rate": 0.0001270656,
	"loss": 0.8655,
	"step": 22800
	},
	{
	"epoch": 0.36512,
	"grad_norm": 0.2787102460861206,
	"learning_rate": 0.0001270016,
	"loss": 0.8457,
	"step": 22820
	},
	{
	"epoch": 0.36544,
	"grad_norm": 0.287702351808548,
	"learning_rate": 0.00012693760000000002,
	"loss": 0.8998,
	"step": 22840
	},
	{
	"epoch": 0.36576,
	"grad_norm": 0.24665424227714539,
	"learning_rate": 0.00012687360000000002,
	"loss": 0.9337,
	"step": 22860
	},
	{
	"epoch": 0.36608,
	"grad_norm": 0.23870235681533813,
	"learning_rate": 0.00012680959999999999,
	"loss": 0.9023,
	"step": 22880
	},
	{
	"epoch": 0.3664,
	"grad_norm": 0.2987755835056305,
	"learning_rate": 0.0001267456,
	"loss": 0.9055,
	"step": 22900
	},
	{
	"epoch": 0.36672,
	"grad_norm": 0.271150678396225,
	"learning_rate": 0.0001266816,
	"loss": 0.8995,
	"step": 22920
	},
	{
	"epoch": 0.36704,
	"grad_norm": 0.2605392038822174,
	"learning_rate": 0.0001266176,
	"loss": 0.8712,
	"step": 22940
	},
	{
	"epoch": 0.36736,
	"grad_norm": 0.20183727145195007,
	"learning_rate": 0.0001265536,
	"loss": 0.9121,
	"step": 22960
	},
	{
	"epoch": 0.36768,
	"grad_norm": 0.22325585782527924,
	"learning_rate": 0.00012648960000000002,
	"loss": 0.9287,
	"step": 22980
	},
	{
	"epoch": 0.368,
	"grad_norm": 0.2461370825767517,
	"learning_rate": 0.0001264256,
	"loss": 0.9411,
	"step": 23000
	},
	{
	"epoch": 0.36832,
	"grad_norm": 0.26270198822021484,
	"learning_rate": 0.0001263616,
	"loss": 0.8686,
	"step": 23020
	},
	{
	"epoch": 0.36864,
	"grad_norm": 0.2455090433359146,
	"learning_rate": 0.00012629760000000003,
	"loss": 0.8735,
	"step": 23040
	},
	{
	"epoch": 0.36896,
	"grad_norm": 0.22023002803325653,
	"learning_rate": 0.0001262336,
	"loss": 0.9235,
	"step": 23060
	},
	{
	"epoch": 0.36928,
	"grad_norm": 0.19227732717990875,
	"learning_rate": 0.0001261696,
	"loss": 0.8781,
	"step": 23080
	},
	{
	"epoch": 0.3696,
	"grad_norm": 0.230510413646698,
	"learning_rate": 0.00012610560000000001,
	"loss": 0.8865,
	"step": 23100
	},
	{
	"epoch": 0.36992,
	"grad_norm": 0.2508642077445984,
	"learning_rate": 0.0001260416,
	"loss": 0.9123,
	"step": 23120
	},
	{
	"epoch": 0.37024,
	"grad_norm": 0.22809596359729767,
	"learning_rate": 0.0001259776,
	"loss": 0.8799,
	"step": 23140
	},
	{
	"epoch": 0.37056,
	"grad_norm": 0.2615736722946167,
	"learning_rate": 0.0001259136,
	"loss": 0.8927,
	"step": 23160
	},
	{
	"epoch": 0.37088,
	"grad_norm": 0.23064741492271423,
	"learning_rate": 0.00012584960000000002,
	"loss": 0.9142,
	"step": 23180
	},
	{
	"epoch": 0.3712,
	"grad_norm": 0.2514834403991699,
	"learning_rate": 0.00012578560000000002,
	"loss": 0.9439,
	"step": 23200
	},
	{
	"epoch": 0.37152,
	"grad_norm": 0.27186328172683716,
	"learning_rate": 0.00012572159999999999,
	"loss": 0.8197,
	"step": 23220
	},
	{
	"epoch": 0.37184,
	"grad_norm": 0.2310495674610138,
	"learning_rate": 0.0001256576,
	"loss": 0.925,
	"step": 23240
	},
	{
	"epoch": 0.37216,
	"grad_norm": 0.21828240156173706,
	"learning_rate": 0.0001255936,
	"loss": 0.8944,
	"step": 23260
	},
	{
	"epoch": 0.37248,
	"grad_norm": 0.25522857904434204,
	"learning_rate": 0.0001255296,
	"loss": 0.8338,
	"step": 23280
	},
	{
	"epoch": 0.3728,
	"grad_norm": 0.2428404837846756,
	"learning_rate": 0.0001254656,
	"loss": 0.8788,
	"step": 23300
	},
	{
	"epoch": 0.37312,
	"grad_norm": 0.2573212683200836,
	"learning_rate": 0.00012540160000000002,
	"loss": 0.8674,
	"step": 23320
	},
	{
	"epoch": 0.37344,
	"grad_norm": 0.26089292764663696,
	"learning_rate": 0.0001253376,
	"loss": 0.8811,
	"step": 23340
	},
	{
	"epoch": 0.37376,
	"grad_norm": 0.2231331765651703,
	"learning_rate": 0.0001252736,
	"loss": 0.8762,
	"step": 23360
	},
	{
	"epoch": 0.37408,
	"grad_norm": 0.22179879248142242,
	"learning_rate": 0.00012520960000000003,
	"loss": 0.9226,
	"step": 23380
	},
	{
	"epoch": 0.3744,
	"grad_norm": 0.29986852407455444,
	"learning_rate": 0.0001251456,
	"loss": 0.8889,
	"step": 23400
	},
	{
	"epoch": 0.37472,
	"grad_norm": 0.30950990319252014,
	"learning_rate": 0.0001250816,
	"loss": 0.9255,
	"step": 23420
	},
	{
	"epoch": 0.37504,
	"grad_norm": 0.2493577003479004,
	"learning_rate": 0.00012501760000000001,
	"loss": 0.8823,
	"step": 23440
	},
	{
	"epoch": 0.37536,
	"grad_norm": 0.20476320385932922,
	"learning_rate": 0.0001249536,
	"loss": 0.9095,
	"step": 23460
	},
	{
	"epoch": 0.37568,
	"grad_norm": 0.22957487404346466,
	"learning_rate": 0.0001248896,
	"loss": 0.8832,
	"step": 23480
	},
	{
	"epoch": 0.376,
	"grad_norm": 0.20240527391433716,
	"learning_rate": 0.0001248256,
	"loss": 0.8569,
	"step": 23500
	},
	{
	"epoch": 0.37632,
	"grad_norm": 0.2227647453546524,
	"learning_rate": 0.00012476160000000002,
	"loss": 0.8729,
	"step": 23520
	},
	{
	"epoch": 0.37664,
	"grad_norm": 0.2590673565864563,
	"learning_rate": 0.00012469760000000002,
	"loss": 0.9006,
	"step": 23540
	},
	{
	"epoch": 0.37696,
	"grad_norm": 0.2456459403038025,
	"learning_rate": 0.0001246336,
	"loss": 0.9238,
	"step": 23560
	},
	{
	"epoch": 0.37728,
	"grad_norm": 0.23964524269104004,
	"learning_rate": 0.0001245696,
	"loss": 0.9148,
	"step": 23580
	},
	{
	"epoch": 0.3776,
	"grad_norm": 0.2621648907661438,
	"learning_rate": 0.0001245056,
	"loss": 0.91,
	"step": 23600
	},
	{
	"epoch": 0.37792,
	"grad_norm": 0.2700936794281006,
	"learning_rate": 0.0001244416,
	"loss": 0.8256,
	"step": 23620
	},
	{
	"epoch": 0.37824,
	"grad_norm": 0.2757120430469513,
	"learning_rate": 0.0001243776,
	"loss": 0.9158,
	"step": 23640
	},
	{
	"epoch": 0.37856,
	"grad_norm": 0.25307512283325195,
	"learning_rate": 0.00012431360000000002,
	"loss": 0.9564,
	"step": 23660
	},
	{
	"epoch": 0.37888,
	"grad_norm": 0.22811047732830048,
	"learning_rate": 0.0001242496,
	"loss": 0.9005,
	"step": 23680
	},
	{
	"epoch": 0.3792,
	"grad_norm": 0.2570401430130005,
	"learning_rate": 0.0001241856,
	"loss": 0.903,
	"step": 23700
	},
	{
	"epoch": 0.37952,
	"grad_norm": 0.2455683946609497,
	"learning_rate": 0.00012412160000000003,
	"loss": 0.8849,
	"step": 23720
	},
	{
	"epoch": 0.37984,
	"grad_norm": 0.2513890564441681,
	"learning_rate": 0.0001240576,
	"loss": 0.916,
	"step": 23740
	},
	{
	"epoch": 0.38016,
	"grad_norm": 0.2542913556098938,
	"learning_rate": 0.0001239936,
	"loss": 0.8913,
	"step": 23760
	},
	{
	"epoch": 0.38048,
	"grad_norm": 0.23636704683303833,
	"learning_rate": 0.00012392960000000001,
	"loss": 0.9071,
	"step": 23780
	},
	{
	"epoch": 0.3808,
	"grad_norm": 0.23358510434627533,
	"learning_rate": 0.00012386880000000002,
	"loss": 0.8727,
	"step": 23800
	},
	{
	"epoch": 0.38112,
	"grad_norm": 0.2075459063053131,
	"learning_rate": 0.0001238048,
	"loss": 0.8602,
	"step": 23820
	},
	{
	"epoch": 0.38144,
	"grad_norm": 0.24009796977043152,
	"learning_rate": 0.0001237408,
	"loss": 0.8787,
	"step": 23840
	},
	{
	"epoch": 0.38176,
	"grad_norm": 0.20129810273647308,
	"learning_rate": 0.0001236768,
	"loss": 0.9057,
	"step": 23860
	},
	{
	"epoch": 0.38208,
	"grad_norm": 0.2360943704843521,
	"learning_rate": 0.0001236128,
	"loss": 0.8914,
	"step": 23880
	},
	{
	"epoch": 0.3824,
	"grad_norm": 0.2023976743221283,
	"learning_rate": 0.0001235488,
	"loss": 0.8671,
	"step": 23900
	},
	{
	"epoch": 0.38272,
	"grad_norm": 0.28492018580436707,
	"learning_rate": 0.0001234848,
	"loss": 0.8466,
	"step": 23920
	},
	{
	"epoch": 0.38304,
	"grad_norm": 0.2340991497039795,
	"learning_rate": 0.0001234208,
	"loss": 0.8719,
	"step": 23940
	},
	{
	"epoch": 0.38336,
	"grad_norm": 0.2417367696762085,
	"learning_rate": 0.0001233568,
	"loss": 0.8661,
	"step": 23960
	},
	{
	"epoch": 0.38368,
	"grad_norm": 0.2661448121070862,
	"learning_rate": 0.0001232928,
	"loss": 0.9087,
	"step": 23980
	},
	{
	"epoch": 0.384,
	"grad_norm": 0.2731974720954895,
	"learning_rate": 0.00012322880000000002,
	"loss": 0.8759,
	"step": 24000
	},
	{
	"epoch": 0.38432,
	"grad_norm": 0.25823774933815,
	"learning_rate": 0.00012316480000000002,
	"loss": 0.8722,
	"step": 24020
	},
	{
	"epoch": 0.38464,
	"grad_norm": 0.25573599338531494,
	"learning_rate": 0.0001231008,
	"loss": 0.9005,
	"step": 24040
	},
	{
	"epoch": 0.38496,
	"grad_norm": 0.22409121692180634,
	"learning_rate": 0.0001230368,
	"loss": 0.8797,
	"step": 24060
	},
	{
	"epoch": 0.38528,
	"grad_norm": 0.2514369785785675,
	"learning_rate": 0.0001229728,
	"loss": 0.9225,
	"step": 24080
	},
	{
	"epoch": 0.3856,
	"grad_norm": 0.2252058982849121,
	"learning_rate": 0.0001229088,
	"loss": 0.8665,
	"step": 24100
	},
	{
	"epoch": 0.38592,
	"grad_norm": 0.29812994599342346,
	"learning_rate": 0.0001228448,
	"loss": 0.8914,
	"step": 24120
	},
	{
	"epoch": 0.38624,
	"grad_norm": 0.23113328218460083,
	"learning_rate": 0.00012278080000000002,
	"loss": 0.9096,
	"step": 24140
	},
	{
	"epoch": 0.38656,
	"grad_norm": 0.26779311895370483,
	"learning_rate": 0.0001227168,
	"loss": 0.8901,
	"step": 24160
	},
	{
	"epoch": 0.38688,
	"grad_norm": 0.26052671670913696,
	"learning_rate": 0.0001226528,
	"loss": 0.8874,
	"step": 24180
	},
	{
	"epoch": 0.3872,
	"grad_norm": 0.24518652260303497,
	"learning_rate": 0.00012258880000000003,
	"loss": 0.8906,
	"step": 24200
	},
	{
	"epoch": 0.38752,
	"grad_norm": 0.22408343851566315,
	"learning_rate": 0.0001225248,
	"loss": 0.9066,
	"step": 24220
	},
	{
	"epoch": 0.38784,
	"grad_norm": 0.24065548181533813,
	"learning_rate": 0.0001224608,
	"loss": 0.8831,
	"step": 24240
	},
	{
	"epoch": 0.38816,
	"grad_norm": 0.2512281835079193,
	"learning_rate": 0.00012239680000000001,
	"loss": 0.9191,
	"step": 24260
	},
	{
	"epoch": 0.38848,
	"grad_norm": 0.212532177567482,
	"learning_rate": 0.0001223328,
	"loss": 0.8254,
	"step": 24280
	},
	{
	"epoch": 0.3888,
	"grad_norm": 0.27028560638427734,
	"learning_rate": 0.0001222688,
	"loss": 0.9266,
	"step": 24300
	},
	{
	"epoch": 0.38912,
	"grad_norm": 0.25514838099479675,
	"learning_rate": 0.0001222048,
	"loss": 0.8876,
	"step": 24320
	},
	{
	"epoch": 0.38944,
	"grad_norm": 0.23313501477241516,
	"learning_rate": 0.00012214080000000002,
	"loss": 0.8999,
	"step": 24340
	},
	{
	"epoch": 0.38976,
	"grad_norm": 0.24142177402973175,
	"learning_rate": 0.00012207680000000002,
	"loss": 0.8689,
	"step": 24360
	},
	{
	"epoch": 0.39008,
	"grad_norm": 0.26181671023368835,
	"learning_rate": 0.00012201280000000001,
	"loss": 0.9077,
	"step": 24380
	},
	{
	"epoch": 0.3904,
	"grad_norm": 0.20765413343906403,
	"learning_rate": 0.0001219488,
	"loss": 0.8583,
	"step": 24400
	},
	{
	"epoch": 0.39072,
	"grad_norm": 0.23421642184257507,
	"learning_rate": 0.0001218848,
	"loss": 0.8741,
	"step": 24420
	},
	{
	"epoch": 0.39104,
	"grad_norm": 0.24042420089244843,
	"learning_rate": 0.0001218208,
	"loss": 0.8836,
	"step": 24440
	},
	{
	"epoch": 0.39136,
	"grad_norm": 0.23314060270786285,
	"learning_rate": 0.0001217568,
	"loss": 0.8494,
	"step": 24460
	},
	{
	"epoch": 0.39168,
	"grad_norm": 0.21119163930416107,
	"learning_rate": 0.0001216928,
	"loss": 0.8662,
	"step": 24480
	},
	{
	"epoch": 0.392,
	"grad_norm": 0.2608548700809479,
	"learning_rate": 0.00012163200000000002,
	"loss": 0.8893,
	"step": 24500
	},
	{
	"epoch": 0.39232,
	"grad_norm": 0.21324241161346436,
	"learning_rate": 0.00012156800000000001,
	"loss": 0.9002,
	"step": 24520
	},
	{
	"epoch": 0.39264,
	"grad_norm": 0.22912296652793884,
	"learning_rate": 0.000121504,
	"loss": 0.866,
	"step": 24540
	},
	{
	"epoch": 0.39296,
	"grad_norm": 0.21215850114822388,
	"learning_rate": 0.00012144,
	"loss": 0.8513,
	"step": 24560
	},
	{
	"epoch": 0.39328,
	"grad_norm": 0.24554443359375,
	"learning_rate": 0.000121376,
	"loss": 0.8919,
	"step": 24580
	},
	{
	"epoch": 0.3936,
	"grad_norm": 0.20404468476772308,
	"learning_rate": 0.000121312,
	"loss": 0.8908,
	"step": 24600
	},
	{
	"epoch": 0.39392,
	"grad_norm": 0.22551661729812622,
	"learning_rate": 0.000121248,
	"loss": 0.8854,
	"step": 24620
	},
	{
	"epoch": 0.39424,
	"grad_norm": 0.21961207687854767,
	"learning_rate": 0.00012118400000000001,
	"loss": 0.9231,
	"step": 24640
	},
	{
	"epoch": 0.39456,
	"grad_norm": 0.22458186745643616,
	"learning_rate": 0.00012112,
	"loss": 0.9015,
	"step": 24660
	},
	{
	"epoch": 0.39488,
	"grad_norm": 0.29484352469444275,
	"learning_rate": 0.00012105600000000001,
	"loss": 0.8869,
	"step": 24680
	},
	{
	"epoch": 0.3952,
	"grad_norm": 0.228530153632164,
	"learning_rate": 0.00012099200000000001,
	"loss": 0.8622,
	"step": 24700
	},
	{
	"epoch": 0.39552,
	"grad_norm": 0.2094821333885193,
	"learning_rate": 0.00012092799999999999,
	"loss": 0.8878,
	"step": 24720
	},
	{
	"epoch": 0.39584,
	"grad_norm": 0.22119556367397308,
	"learning_rate": 0.000120864,
	"loss": 0.8235,
	"step": 24740
	},
	{
	"epoch": 0.39616,
	"grad_norm": 0.23812732100486755,
	"learning_rate": 0.0001208,
	"loss": 0.8358,
	"step": 24760
	},
	{
	"epoch": 0.39648,
	"grad_norm": 0.26022717356681824,
	"learning_rate": 0.000120736,
	"loss": 0.9271,
	"step": 24780
	},
	{
	"epoch": 0.3968,
	"grad_norm": 0.21341145038604736,
	"learning_rate": 0.00012067200000000001,
	"loss": 0.7994,
	"step": 24800
	},
	{
	"epoch": 0.39712,
	"grad_norm": 0.24168658256530762,
	"learning_rate": 0.00012060800000000001,
	"loss": 0.8575,
	"step": 24820
	},
	{
	"epoch": 0.39744,
	"grad_norm": 0.24363411962985992,
	"learning_rate": 0.00012054400000000002,
	"loss": 0.9,
	"step": 24840
	},
	{
	"epoch": 0.39776,
	"grad_norm": 0.23571257293224335,
	"learning_rate": 0.00012048000000000001,
	"loss": 0.8862,
	"step": 24860
	},
	{
	"epoch": 0.39808,
	"grad_norm": 0.24681545794010162,
	"learning_rate": 0.000120416,
	"loss": 0.8989,
	"step": 24880
	},
	{
	"epoch": 0.3984,
	"grad_norm": 0.2692868113517761,
	"learning_rate": 0.000120352,
	"loss": 0.9006,
	"step": 24900
	},
	{
	"epoch": 0.39872,
	"grad_norm": 0.21049508452415466,
	"learning_rate": 0.000120288,
	"loss": 0.8785,
	"step": 24920
	},
	{
	"epoch": 0.39904,
	"grad_norm": 0.23448914289474487,
	"learning_rate": 0.000120224,
	"loss": 0.8677,
	"step": 24940
	},
	{
	"epoch": 0.39936,
	"grad_norm": 0.21739456057548523,
	"learning_rate": 0.00012016,
	"loss": 0.8595,
	"step": 24960
	},
	{
	"epoch": 0.39968,
	"grad_norm": 0.2921290397644043,
	"learning_rate": 0.00012009600000000001,
	"loss": 0.8987,
	"step": 24980
	},
	{
	"epoch": 0.4,
	"grad_norm": 0.2572174668312073,
	"learning_rate": 0.000120032,
	"loss": 0.8608,
	"step": 25000
	},
	{
	"epoch": 0.40032,
	"grad_norm": 0.22153301537036896,
	"learning_rate": 0.00011996800000000001,
	"loss": 0.924,
	"step": 25020
	},
	{
	"epoch": 0.40064,
	"grad_norm": 0.25618571043014526,
	"learning_rate": 0.00011990400000000002,
	"loss": 0.9082,
	"step": 25040
	},
	{
	"epoch": 0.40096,
	"grad_norm": 0.23285400867462158,
	"learning_rate": 0.00011983999999999999,
	"loss": 0.8467,
	"step": 25060
	},
	{
	"epoch": 0.40128,
	"grad_norm": 0.22959965467453003,
	"learning_rate": 0.000119776,
	"loss": 0.8813,
	"step": 25080
	},
	{
	"epoch": 0.4016,
	"grad_norm": 0.2747071385383606,
	"learning_rate": 0.00011971200000000001,
	"loss": 0.8736,
	"step": 25100
	},
	{
	"epoch": 0.40192,
	"grad_norm": 0.22911617159843445,
	"learning_rate": 0.000119648,
	"loss": 0.8696,
	"step": 25120
	},
	{
	"epoch": 0.40224,
	"grad_norm": 0.2593874931335449,
	"learning_rate": 0.00011958400000000001,
	"loss": 0.8975,
	"step": 25140
	},
	{
	"epoch": 0.40256,
	"grad_norm": 0.25185978412628174,
	"learning_rate": 0.00011952000000000001,
	"loss": 0.9214,
	"step": 25160
	},
	{
	"epoch": 0.40288,
	"grad_norm": 0.20723138749599457,
	"learning_rate": 0.00011945600000000002,
	"loss": 0.8532,
	"step": 25180
	},
	{
	"epoch": 0.4032,
	"grad_norm": 0.2371417135000229,
	"learning_rate": 0.00011939200000000001,
	"loss": 0.9435,
	"step": 25200
	},
	{
	"epoch": 0.40352,
	"grad_norm": 0.22036102414131165,
	"learning_rate": 0.00011932799999999999,
	"loss": 0.9591,
	"step": 25220
	},
	{
	"epoch": 0.40384,
	"grad_norm": 0.1885063499212265,
	"learning_rate": 0.000119264,
	"loss": 0.8855,
	"step": 25240
	},
	{
	"epoch": 0.40416,
	"grad_norm": 0.22148434817790985,
	"learning_rate": 0.0001192,
	"loss": 0.8941,
	"step": 25260
	},
	{
	"epoch": 0.40448,
	"grad_norm": 0.245897576212883,
	"learning_rate": 0.000119136,
	"loss": 0.8979,
	"step": 25280
	},
	{
	"epoch": 0.4048,
	"grad_norm": 0.2093392014503479,
	"learning_rate": 0.000119072,
	"loss": 0.8549,
	"step": 25300
	},
	{
	"epoch": 0.40512,
	"grad_norm": 0.20682351291179657,
	"learning_rate": 0.00011900800000000001,
	"loss": 0.8412,
	"step": 25320
	},
	{
	"epoch": 0.40544,
	"grad_norm": 0.39095112681388855,
	"learning_rate": 0.000118944,
	"loss": 0.9389,
	"step": 25340
	},
	{
	"epoch": 0.40576,
	"grad_norm": 0.2154461294412613,
	"learning_rate": 0.00011888000000000001,
	"loss": 0.9047,
	"step": 25360
	},
	{
	"epoch": 0.40608,
	"grad_norm": 0.2192692905664444,
	"learning_rate": 0.00011881600000000002,
	"loss": 0.8371,
	"step": 25380
	},
	{
	"epoch": 0.4064,
	"grad_norm": 0.30516675114631653,
	"learning_rate": 0.00011875199999999999,
	"loss": 0.9068,
	"step": 25400
	},
	{
	"epoch": 0.40672,
	"grad_norm": 0.24160155653953552,
	"learning_rate": 0.000118688,
	"loss": 0.8778,
	"step": 25420
	},
	{
	"epoch": 0.40704,
	"grad_norm": 0.2394413948059082,
	"learning_rate": 0.00011862400000000001,
	"loss": 0.9009,
	"step": 25440
	},
	{
	"epoch": 0.40736,
	"grad_norm": 0.2312084585428238,
	"learning_rate": 0.00011856,
	"loss": 0.9599,
	"step": 25460
	},
	{
	"epoch": 0.40768,
	"grad_norm": 0.24847859144210815,
	"learning_rate": 0.00011849600000000001,
	"loss": 0.9424,
	"step": 25480
	},
	{
	"epoch": 0.408,
	"grad_norm": 0.2651779055595398,
	"learning_rate": 0.00011843200000000001,
	"loss": 0.8898,
	"step": 25500
	},
	{
	"epoch": 0.40832,
	"grad_norm": 0.22847053408622742,
	"learning_rate": 0.00011836800000000002,
	"loss": 0.8775,
	"step": 25520
	},
	{
	"epoch": 0.40864,
	"grad_norm": 0.25370457768440247,
	"learning_rate": 0.00011830400000000001,
	"loss": 0.8691,
	"step": 25540
	},
	{
	"epoch": 0.40896,
	"grad_norm": 0.24085932970046997,
	"learning_rate": 0.00011823999999999999,
	"loss": 0.8863,
	"step": 25560
	},
	{
	"epoch": 0.40928,
	"grad_norm": 0.2516380548477173,
	"learning_rate": 0.000118176,
	"loss": 0.8686,
	"step": 25580
	},
	{
	"epoch": 0.4096,
	"grad_norm": 0.24218106269836426,
	"learning_rate": 0.000118112,
	"loss": 0.9093,
	"step": 25600
	},
	{
	"epoch": 0.40992,
	"grad_norm": 0.22466421127319336,
	"learning_rate": 0.000118048,
	"loss": 0.8652,
	"step": 25620
	},
	{
	"epoch": 0.41024,
	"grad_norm": 0.2240326702594757,
	"learning_rate": 0.000117984,
	"loss": 0.8873,
	"step": 25640
	},
	{
	"epoch": 0.41056,
	"grad_norm": 0.24201804399490356,
	"learning_rate": 0.00011792000000000001,
	"loss": 0.8556,
	"step": 25660
	},
	{
	"epoch": 0.41088,
	"grad_norm": 0.2758803963661194,
	"learning_rate": 0.000117856,
	"loss": 0.9015,
	"step": 25680
	},
	{
	"epoch": 0.4112,
	"grad_norm": 0.23030854761600494,
	"learning_rate": 0.00011779200000000001,
	"loss": 0.8259,
	"step": 25700
	},
	{
	"epoch": 0.41152,
	"grad_norm": 0.21517449617385864,
	"learning_rate": 0.00011772800000000002,
	"loss": 0.9246,
	"step": 25720
	},
	{
	"epoch": 0.41184,
	"grad_norm": 0.2662043571472168,
	"learning_rate": 0.00011766399999999999,
	"loss": 0.9175,
	"step": 25740
	},
	{
	"epoch": 0.41216,
	"grad_norm": 0.23844832181930542,
	"learning_rate": 0.0001176,
	"loss": 0.8416,
	"step": 25760
	},
	{
	"epoch": 0.41248,
	"grad_norm": 0.23714718222618103,
	"learning_rate": 0.00011753600000000001,
	"loss": 0.8802,
	"step": 25780
	},
	{
	"epoch": 0.4128,
	"grad_norm": 0.2341051995754242,
	"learning_rate": 0.000117472,
	"loss": 0.8629,
	"step": 25800
	},
	{
	"epoch": 0.41312,
	"grad_norm": 0.2298164963722229,
	"learning_rate": 0.00011740800000000001,
	"loss": 0.8794,
	"step": 25820
	},
	{
	"epoch": 0.41344,
	"grad_norm": 0.26338857412338257,
	"learning_rate": 0.00011734400000000001,
	"loss": 0.8953,
	"step": 25840
	},
	{
	"epoch": 0.41376,
	"grad_norm": 0.2441425770521164,
	"learning_rate": 0.00011728000000000002,
	"loss": 0.888,
	"step": 25860
	},
	{
	"epoch": 0.41408,
	"grad_norm": 0.22573915123939514,
	"learning_rate": 0.00011721600000000001,
	"loss": 0.8417,
	"step": 25880
	},
	{
	"epoch": 0.4144,
	"grad_norm": 0.24974480271339417,
	"learning_rate": 0.00011715199999999999,
	"loss": 0.8313,
	"step": 25900
	},
	{
	"epoch": 0.41472,
	"grad_norm": 0.22177425026893616,
	"learning_rate": 0.000117088,
	"loss": 0.8615,
	"step": 25920
	},
	{
	"epoch": 0.41504,
	"grad_norm": 0.25715529918670654,
	"learning_rate": 0.000117024,
	"loss": 0.9622,
	"step": 25940
	},
	{
	"epoch": 0.41536,
	"grad_norm": 0.22982242703437805,
	"learning_rate": 0.00011696,
	"loss": 0.8944,
	"step": 25960
	},
	{
	"epoch": 0.41568,
	"grad_norm": 0.2524280250072479,
	"learning_rate": 0.000116896,
	"loss": 0.9148,
	"step": 25980
	},
	{
	"epoch": 0.416,
	"grad_norm": 0.19068706035614014,
	"learning_rate": 0.00011683200000000001,
	"loss": 0.8383,
	"step": 26000
	},
	{
	"epoch": 0.41632,
	"grad_norm": 0.20955216884613037,
	"learning_rate": 0.000116768,
	"loss": 0.8297,
	"step": 26020
	},
	{
	"epoch": 0.41664,
	"grad_norm": 0.28669893741607666,
	"learning_rate": 0.00011670400000000001,
	"loss": 0.9302,
	"step": 26040
	},
	{
	"epoch": 0.41696,
	"grad_norm": 0.2136538028717041,
	"learning_rate": 0.00011664000000000002,
	"loss": 0.8628,
	"step": 26060
	},
	{
	"epoch": 0.41728,
	"grad_norm": 0.24216507375240326,
	"learning_rate": 0.00011657599999999999,
	"loss": 0.8645,
	"step": 26080
	},
	{
	"epoch": 0.4176,
	"grad_norm": 0.23418830335140228,
	"learning_rate": 0.000116512,
	"loss": 0.8744,
	"step": 26100
	},
	{
	"epoch": 0.41792,
	"grad_norm": 0.21107226610183716,
	"learning_rate": 0.00011644800000000001,
	"loss": 0.882,
	"step": 26120
	},
	{
	"epoch": 0.41824,
	"grad_norm": 0.25925180315971375,
	"learning_rate": 0.000116384,
	"loss": 0.9287,
	"step": 26140
	},
	{
	"epoch": 0.41856,
	"grad_norm": 0.24545122683048248,
	"learning_rate": 0.00011632000000000001,
	"loss": 0.871,
	"step": 26160
	},
	{
	"epoch": 0.41888,
	"grad_norm": 0.24683259427547455,
	"learning_rate": 0.000116256,
	"loss": 0.9309,
	"step": 26180
	},
	{
	"epoch": 0.4192,
	"grad_norm": 0.271581768989563,
	"learning_rate": 0.00011619200000000002,
	"loss": 0.8901,
	"step": 26200
	},
	{
	"epoch": 0.41952,
	"grad_norm": 0.19227302074432373,
	"learning_rate": 0.00011612800000000001,
	"loss": 0.8457,
	"step": 26220
	},
	{
	"epoch": 0.41984,
	"grad_norm": 0.2621937692165375,
	"learning_rate": 0.00011606399999999999,
	"loss": 0.9007,
	"step": 26240
	},
	{
	"epoch": 0.42016,
	"grad_norm": 0.23038643598556519,
	"learning_rate": 0.000116,
	"loss": 0.8869,
	"step": 26260
	},
	{
	"epoch": 0.42048,
	"grad_norm": 0.18889521062374115,
	"learning_rate": 0.000115936,
	"loss": 0.8939,
	"step": 26280
	},
	{
	"epoch": 0.4208,
	"grad_norm": 0.22690792381763458,
	"learning_rate": 0.000115872,
	"loss": 0.8544,
	"step": 26300
	},
	{
	"epoch": 0.42112,
	"grad_norm": 0.23775628209114075,
	"learning_rate": 0.000115808,
	"loss": 0.8847,
	"step": 26320
	},
	{
	"epoch": 0.42144,
	"grad_norm": 0.22833390533924103,
	"learning_rate": 0.00011574400000000001,
	"loss": 0.8963,
	"step": 26340
	},
	{
	"epoch": 0.42176,
	"grad_norm": 0.26199871301651,
	"learning_rate": 0.00011568000000000002,
	"loss": 0.8717,
	"step": 26360
	},
	{
	"epoch": 0.42208,
	"grad_norm": 0.21491499245166779,
	"learning_rate": 0.00011561600000000001,
	"loss": 0.8355,
	"step": 26380
	},
	{
	"epoch": 0.4224,
	"grad_norm": 0.24193742871284485,
	"learning_rate": 0.00011555200000000002,
	"loss": 0.8916,
	"step": 26400
	},
	{
	"epoch": 0.42272,
	"grad_norm": 0.23363493382930756,
	"learning_rate": 0.000115488,
	"loss": 0.882,
	"step": 26420
	},
	{
	"epoch": 0.42304,
	"grad_norm": 0.2511495053768158,
	"learning_rate": 0.000115424,
	"loss": 0.856,
	"step": 26440
	},
	{
	"epoch": 0.42336,
	"grad_norm": 0.2527294456958771,
	"learning_rate": 0.00011536000000000001,
	"loss": 0.9213,
	"step": 26460
	},
	{
	"epoch": 0.42368,
	"grad_norm": 0.24384371936321259,
	"learning_rate": 0.000115296,
	"loss": 0.8766,
	"step": 26480
	},
	{
	"epoch": 0.424,
	"grad_norm": 0.22618679702281952,
	"learning_rate": 0.00011523200000000001,
	"loss": 0.8849,
	"step": 26500
	},
	{
	"epoch": 0.42432,
	"grad_norm": 0.2192445695400238,
	"learning_rate": 0.000115168,
	"loss": 0.8945,
	"step": 26520
	},
	{
	"epoch": 0.42464,
	"grad_norm": 0.21766866743564606,
	"learning_rate": 0.00011510400000000001,
	"loss": 0.9218,
	"step": 26540
	},
	{
	"epoch": 0.42496,
	"grad_norm": 0.25323477387428284,
	"learning_rate": 0.00011504000000000001,
	"loss": 0.8973,
	"step": 26560
	},
	{
	"epoch": 0.42528,
	"grad_norm": 0.22795149683952332,
	"learning_rate": 0.0001149792,
	"loss": 0.842,
	"step": 26580
	},
	{
	"epoch": 0.4256,
	"grad_norm": 0.26310163736343384,
	"learning_rate": 0.00011491520000000001,
	"loss": 0.9459,
	"step": 26600
	},
	{
	"epoch": 0.42592,
	"grad_norm": 0.2246372550725937,
	"learning_rate": 0.0001148512,
	"loss": 0.9001,
	"step": 26620
	},
	{
	"epoch": 0.42624,
	"grad_norm": 0.23721113801002502,
	"learning_rate": 0.00011478720000000001,
	"loss": 0.9489,
	"step": 26640
	},
	{
	"epoch": 0.42656,
	"grad_norm": 0.2320554107427597,
	"learning_rate": 0.0001147232,
	"loss": 0.8728,
	"step": 26660
	},
	{
	"epoch": 0.42688,
	"grad_norm": 0.24293909966945648,
	"learning_rate": 0.00011465920000000002,
	"loss": 0.8921,
	"step": 26680
	},
	{
	"epoch": 0.4272,
	"grad_norm": 0.23679333925247192,
	"learning_rate": 0.00011459520000000001,
	"loss": 0.8383,
	"step": 26700
	},
	{
	"epoch": 0.42752,
	"grad_norm": 0.27467820048332214,
	"learning_rate": 0.00011453120000000002,
	"loss": 0.8646,
	"step": 26720
	},
	{
	"epoch": 0.42784,
	"grad_norm": 0.2692321538925171,
	"learning_rate": 0.0001144672,
	"loss": 0.9117,
	"step": 26740
	},
	{
	"epoch": 0.42816,
	"grad_norm": 0.2061707228422165,
	"learning_rate": 0.0001144032,
	"loss": 0.8794,
	"step": 26760
	},
	{
	"epoch": 0.42848,
	"grad_norm": 0.24439223110675812,
	"learning_rate": 0.0001143392,
	"loss": 0.8522,
	"step": 26780
	},
	{
	"epoch": 0.4288,
	"grad_norm": 0.2587136924266815,
	"learning_rate": 0.0001142752,
	"loss": 0.8729,
	"step": 26800
	},
	{
	"epoch": 0.42912,
	"grad_norm": 0.2559018135070801,
	"learning_rate": 0.00011421120000000001,
	"loss": 0.8797,
	"step": 26820
	},
	{
	"epoch": 0.42944,
	"grad_norm": 0.2468901127576828,
	"learning_rate": 0.0001141472,
	"loss": 0.9022,
	"step": 26840
	},
	{
	"epoch": 0.42976,
	"grad_norm": 0.24873799085617065,
	"learning_rate": 0.00011408320000000001,
	"loss": 0.8634,
	"step": 26860
	},
	{
	"epoch": 0.43008,
	"grad_norm": 0.23509488999843597,
	"learning_rate": 0.00011401920000000002,
	"loss": 0.8995,
	"step": 26880
	},
	{
	"epoch": 0.4304,
	"grad_norm": 0.22719904780387878,
	"learning_rate": 0.00011395519999999999,
	"loss": 0.8288,
	"step": 26900
	},
	{
	"epoch": 0.43072,
	"grad_norm": 0.26962873339653015,
	"learning_rate": 0.0001138912,
	"loss": 0.9099,
	"step": 26920
	},
	{
	"epoch": 0.43104,
	"grad_norm": 0.2308361977338791,
	"learning_rate": 0.00011382720000000001,
	"loss": 0.8927,
	"step": 26940
	},
	{
	"epoch": 0.43136,
	"grad_norm": 0.23821701109409332,
	"learning_rate": 0.0001137632,
	"loss": 0.8926,
	"step": 26960
	},
	{
	"epoch": 0.43168,
	"grad_norm": 0.2621578574180603,
	"learning_rate": 0.00011369920000000001,
	"loss": 0.8996,
	"step": 26980
	},
	{
	"epoch": 0.432,
	"grad_norm": 0.1956038624048233,
	"learning_rate": 0.0001136352,
	"loss": 0.8429,
	"step": 27000
	},
	{
	"epoch": 0.43232,
	"grad_norm": 0.25825899839401245,
	"learning_rate": 0.00011357120000000001,
	"loss": 0.8912,
	"step": 27020
	},
	{
	"epoch": 0.43264,
	"grad_norm": 0.2325858324766159,
	"learning_rate": 0.00011350720000000001,
	"loss": 0.9094,
	"step": 27040
	},
	{
	"epoch": 0.43296,
	"grad_norm": 0.24896900355815887,
	"learning_rate": 0.00011344320000000002,
	"loss": 0.9526,
	"step": 27060
	},
	{
	"epoch": 0.43328,
	"grad_norm": 0.2929576337337494,
	"learning_rate": 0.0001133792,
	"loss": 0.8513,
	"step": 27080
	},
	{
	"epoch": 0.4336,
	"grad_norm": 0.23895148932933807,
	"learning_rate": 0.0001133152,
	"loss": 0.8708,
	"step": 27100
	},
	{
	"epoch": 0.43392,
	"grad_norm": 0.22470715641975403,
	"learning_rate": 0.0001132512,
	"loss": 0.9009,
	"step": 27120
	},
	{
	"epoch": 0.43424,
	"grad_norm": 0.25981777906417847,
	"learning_rate": 0.0001131872,
	"loss": 0.9252,
	"step": 27140
	},
	{
	"epoch": 0.43456,
	"grad_norm": 0.26591363549232483,
	"learning_rate": 0.00011312320000000001,
	"loss": 0.878,
	"step": 27160
	},
	{
	"epoch": 0.43488,
	"grad_norm": 0.24026769399642944,
	"learning_rate": 0.0001130592,
	"loss": 0.838,
	"step": 27180
	},
	{
	"epoch": 0.4352,
	"grad_norm": 0.243183895945549,
	"learning_rate": 0.00011299520000000001,
	"loss": 0.945,
	"step": 27200
	},
	{
	"epoch": 0.43552,
	"grad_norm": 0.28983068466186523,
	"learning_rate": 0.00011293120000000002,
	"loss": 0.8779,
	"step": 27220
	},
	{
	"epoch": 0.43584,
	"grad_norm": 0.25985220074653625,
	"learning_rate": 0.00011286719999999999,
	"loss": 0.8852,
	"step": 27240
	},
	{
	"epoch": 0.43616,
	"grad_norm": 0.2521764934062958,
	"learning_rate": 0.0001128032,
	"loss": 0.8459,
	"step": 27260
	},
	{
	"epoch": 0.43648,
	"grad_norm": 0.2260691374540329,
	"learning_rate": 0.0001127392,
	"loss": 0.8478,
	"step": 27280
	},
	{
	"epoch": 0.4368,
	"grad_norm": 0.24375227093696594,
	"learning_rate": 0.0001126752,
	"loss": 0.8566,
	"step": 27300
	},
	{
	"epoch": 0.43712,
	"grad_norm": 0.23803727328777313,
	"learning_rate": 0.00011261120000000001,
	"loss": 0.8832,
	"step": 27320
	},
	{
	"epoch": 0.43744,
	"grad_norm": 0.35262176394462585,
	"learning_rate": 0.0001125472,
	"loss": 0.9305,
	"step": 27340
	},
	{
	"epoch": 0.43776,
	"grad_norm": 0.22310085594654083,
	"learning_rate": 0.00011248320000000001,
	"loss": 0.8806,
	"step": 27360
	},
	{
	"epoch": 0.43808,
	"grad_norm": 0.22547666728496552,
	"learning_rate": 0.00011241920000000001,
	"loss": 0.8617,
	"step": 27380
	},
	{
	"epoch": 0.4384,
	"grad_norm": 0.23973605036735535,
	"learning_rate": 0.00011235520000000002,
	"loss": 0.8415,
	"step": 27400
	},
	{
	"epoch": 0.43872,
	"grad_norm": 0.2396925538778305,
	"learning_rate": 0.0001122912,
	"loss": 0.9221,
	"step": 27420
	},
	{
	"epoch": 0.43904,
	"grad_norm": 0.2549417018890381,
	"learning_rate": 0.0001122272,
	"loss": 0.8289,
	"step": 27440
	},
	{
	"epoch": 0.43936,
	"grad_norm": 0.3001738488674164,
	"learning_rate": 0.0001121632,
	"loss": 0.8951,
	"step": 27460
	},
	{
	"epoch": 0.43968,
	"grad_norm": 0.28858518600463867,
	"learning_rate": 0.0001120992,
	"loss": 0.9271,
	"step": 27480
	},
	{
	"epoch": 0.44,
	"grad_norm": 0.22798220813274384,
	"learning_rate": 0.00011203520000000001,
	"loss": 0.9133,
	"step": 27500
	},
	{
	"epoch": 0.44032,
	"grad_norm": 0.2488940805196762,
	"learning_rate": 0.0001119712,
	"loss": 0.9054,
	"step": 27520
	},
	{
	"epoch": 0.44064,
	"grad_norm": 0.26057055592536926,
	"learning_rate": 0.00011190720000000001,
	"loss": 0.9136,
	"step": 27540
	},
	{
	"epoch": 0.44096,
	"grad_norm": 0.21583379805088043,
	"learning_rate": 0.00011184320000000002,
	"loss": 0.8559,
	"step": 27560
	},
	{
	"epoch": 0.44128,
	"grad_norm": 0.2284022718667984,
	"learning_rate": 0.00011177919999999999,
	"loss": 0.8976,
	"step": 27580
	},
	{
	"epoch": 0.4416,
	"grad_norm": 0.25823116302490234,
	"learning_rate": 0.0001117152,
	"loss": 0.863,
	"step": 27600
	},
	{
	"epoch": 0.44192,
	"grad_norm": 0.206822007894516,
	"learning_rate": 0.0001116512,
	"loss": 0.8877,
	"step": 27620
	},
	{
	"epoch": 0.44224,
	"grad_norm": 0.21545498073101044,
	"learning_rate": 0.0001115872,
	"loss": 0.907,
	"step": 27640
	},
	{
	"epoch": 0.44256,
	"grad_norm": 0.20267954468727112,
	"learning_rate": 0.00011152320000000001,
	"loss": 0.8363,
	"step": 27660
	},
	{
	"epoch": 0.44288,
	"grad_norm": 0.22508728504180908,
	"learning_rate": 0.0001114592,
	"loss": 0.8913,
	"step": 27680
	},
	{
	"epoch": 0.4432,
	"grad_norm": 0.2211364060640335,
	"learning_rate": 0.00011139520000000001,
	"loss": 0.8492,
	"step": 27700
	},
	{
	"epoch": 0.44352,
	"grad_norm": 0.23222848773002625,
	"learning_rate": 0.00011133120000000001,
	"loss": 0.8941,
	"step": 27720
	},
	{
	"epoch": 0.44384,
	"grad_norm": 0.2355644702911377,
	"learning_rate": 0.00011126720000000002,
	"loss": 0.9167,
	"step": 27740
	},
	{
	"epoch": 0.44416,
	"grad_norm": 0.25646787881851196,
	"learning_rate": 0.0001112032,
	"loss": 0.8852,
	"step": 27760
	},
	{
	"epoch": 0.44448,
	"grad_norm": 0.24109818041324615,
	"learning_rate": 0.0001111392,
	"loss": 0.8864,
	"step": 27780
	},
	{
	"epoch": 0.4448,
	"grad_norm": 0.2541385591030121,
	"learning_rate": 0.0001110752,
	"loss": 0.8674,
	"step": 27800
	},
	{
	"epoch": 0.44512,
	"grad_norm": 0.2090396136045456,
	"learning_rate": 0.0001110112,
	"loss": 0.8601,
	"step": 27820
	},
	{
	"epoch": 0.44544,
	"grad_norm": 0.249611958861351,
	"learning_rate": 0.00011094720000000001,
	"loss": 0.8973,
	"step": 27840
	},
	{
	"epoch": 0.44576,
	"grad_norm": 0.24944797158241272,
	"learning_rate": 0.00011088320000000002,
	"loss": 0.9308,
	"step": 27860
	},
	{
	"epoch": 0.44608,
	"grad_norm": 0.2585296332836151,
	"learning_rate": 0.00011081920000000001,
	"loss": 0.8865,
	"step": 27880
	},
	{
	"epoch": 0.4464,
	"grad_norm": 0.2178046554327011,
	"learning_rate": 0.00011075520000000002,
	"loss": 0.8511,
	"step": 27900
	},
	{
	"epoch": 0.44672,
	"grad_norm": 0.2413053810596466,
	"learning_rate": 0.0001106912,
	"loss": 0.9125,
	"step": 27920
	},
	{
	"epoch": 0.44704,
	"grad_norm": 0.23110991716384888,
	"learning_rate": 0.0001106272,
	"loss": 0.9167,
	"step": 27940
	},
	{
	"epoch": 0.44736,
	"grad_norm": 0.2292502075433731,
	"learning_rate": 0.0001105632,
	"loss": 0.8573,
	"step": 27960
	},
	{
	"epoch": 0.44768,
	"grad_norm": 0.2460576891899109,
	"learning_rate": 0.0001104992,
	"loss": 0.8469,
	"step": 27980
	},
	{
	"epoch": 0.448,
	"grad_norm": 0.2802644371986389,
	"learning_rate": 0.00011043520000000001,
	"loss": 0.8421,
	"step": 28000
	},
	{
	"epoch": 0.44832,
	"grad_norm": 0.21653008460998535,
	"learning_rate": 0.0001103712,
	"loss": 0.9343,
	"step": 28020
	},
	{
	"epoch": 0.44864,
	"grad_norm": 0.21708372235298157,
	"learning_rate": 0.00011030720000000001,
	"loss": 0.8866,
	"step": 28040
	},
	{
	"epoch": 0.44896,
	"grad_norm": 0.2733645737171173,
	"learning_rate": 0.00011024320000000001,
	"loss": 0.9055,
	"step": 28060
	},
	{
	"epoch": 0.44928,
	"grad_norm": 0.2751225531101227,
	"learning_rate": 0.00011017920000000002,
	"loss": 0.8615,
	"step": 28080
	},
	{
	"epoch": 0.4496,
	"grad_norm": 0.23991945385932922,
	"learning_rate": 0.0001101152,
	"loss": 0.8963,
	"step": 28100
	},
	{
	"epoch": 0.44992,
	"grad_norm": 0.3005094528198242,
	"learning_rate": 0.0001100512,
	"loss": 0.8711,
	"step": 28120
	},
	{
	"epoch": 0.45024,
	"grad_norm": 0.2634584903717041,
	"learning_rate": 0.0001099872,
	"loss": 0.8764,
	"step": 28140
	},
	{
	"epoch": 0.45056,
	"grad_norm": 0.22322441637516022,
	"learning_rate": 0.0001099232,
	"loss": 0.8467,
	"step": 28160
	},
	{
	"epoch": 0.45088,
	"grad_norm": 0.21676741540431976,
	"learning_rate": 0.00010985920000000001,
	"loss": 0.8433,
	"step": 28180
	},
	{
	"epoch": 0.4512,
	"grad_norm": 0.26323333382606506,
	"learning_rate": 0.00010979520000000002,
	"loss": 0.9486,
	"step": 28200
	},
	{
	"epoch": 0.45152,
	"grad_norm": 0.21226100623607635,
	"learning_rate": 0.00010973120000000001,
	"loss": 0.8896,
	"step": 28220
	},
	{
	"epoch": 0.45184,
	"grad_norm": 0.22679319977760315,
	"learning_rate": 0.00010966720000000002,
	"loss": 0.8528,
	"step": 28240
	},
	{
	"epoch": 0.45216,
	"grad_norm": 0.18700149655342102,
	"learning_rate": 0.0001096032,
	"loss": 0.8368,
	"step": 28260
	},
	{
	"epoch": 0.45248,
	"grad_norm": 0.23498637974262238,
	"learning_rate": 0.0001095392,
	"loss": 0.9194,
	"step": 28280
	},
	{
	"epoch": 0.4528,
	"grad_norm": 0.22683313488960266,
	"learning_rate": 0.0001094752,
	"loss": 0.906,
	"step": 28300
	},
	{
	"epoch": 0.45312,
	"grad_norm": 0.23284801840782166,
	"learning_rate": 0.0001094112,
	"loss": 0.9314,
	"step": 28320
	},
	{
	"epoch": 0.45344,
	"grad_norm": 0.24406535923480988,
	"learning_rate": 0.00010934720000000001,
	"loss": 0.9017,
	"step": 28340
	},
	{
	"epoch": 0.45376,
	"grad_norm": 0.22531583905220032,
	"learning_rate": 0.0001092832,
	"loss": 0.8906,
	"step": 28360
	},
	{
	"epoch": 0.45408,
	"grad_norm": 0.21459725499153137,
	"learning_rate": 0.00010921920000000001,
	"loss": 0.8495,
	"step": 28380
	},
	{
	"epoch": 0.4544,
	"grad_norm": 0.2690097689628601,
	"learning_rate": 0.00010915520000000001,
	"loss": 0.959,
	"step": 28400
	},
	{
	"epoch": 0.45472,
	"grad_norm": 0.3007669448852539,
	"learning_rate": 0.00010909120000000002,
	"loss": 0.8716,
	"step": 28420
	},
	{
	"epoch": 0.45504,
	"grad_norm": 0.2667557895183563,
	"learning_rate": 0.0001090272,
	"loss": 0.8909,
	"step": 28440
	},
	{
	"epoch": 0.45536,
	"grad_norm": 0.24151913821697235,
	"learning_rate": 0.0001089632,
	"loss": 0.8808,
	"step": 28460
	},
	{
	"epoch": 0.45568,
	"grad_norm": 0.2797807455062866,
	"learning_rate": 0.0001088992,
	"loss": 0.872,
	"step": 28480
	},
	{
	"epoch": 0.456,
	"grad_norm": 0.3146251440048218,
	"learning_rate": 0.0001088352,
	"loss": 0.8632,
	"step": 28500
	},
	{
	"epoch": 0.45632,
	"grad_norm": 0.26347050070762634,
	"learning_rate": 0.0001087712,
	"loss": 0.8863,
	"step": 28520
	},
	{
	"epoch": 0.45664,
	"grad_norm": 0.24761775135993958,
	"learning_rate": 0.00010870720000000002,
	"loss": 0.8692,
	"step": 28540
	},
	{
	"epoch": 0.45696,
	"grad_norm": 0.258346825838089,
	"learning_rate": 0.00010864320000000001,
	"loss": 0.8733,
	"step": 28560
	},
	{
	"epoch": 0.45728,
	"grad_norm": 0.2539924085140228,
	"learning_rate": 0.00010857920000000002,
	"loss": 0.8691,
	"step": 28580
	},
	{
	"epoch": 0.4576,
	"grad_norm": 0.5414292216300964,
	"learning_rate": 0.0001085152,
	"loss": 0.9207,
	"step": 28600
	},
	{
	"epoch": 0.45792,
	"grad_norm": 0.21679574251174927,
	"learning_rate": 0.0001084512,
	"loss": 0.8953,
	"step": 28620
	},
	{
	"epoch": 0.45824,
	"grad_norm": 0.236148864030838,
	"learning_rate": 0.0001083872,
	"loss": 0.8793,
	"step": 28640
	},
	{
	"epoch": 0.45856,
	"grad_norm": 0.21311239898204803,
	"learning_rate": 0.0001083232,
	"loss": 0.8276,
	"step": 28660
	},
	{
	"epoch": 0.45888,
	"grad_norm": 0.23734121024608612,
	"learning_rate": 0.00010825920000000001,
	"loss": 0.8464,
	"step": 28680
	},
	{
	"epoch": 0.4592,
	"grad_norm": 0.2556433081626892,
	"learning_rate": 0.0001081952,
	"loss": 0.8775,
	"step": 28700
	},
	{
	"epoch": 0.45952,
	"grad_norm": 0.25167569518089294,
	"learning_rate": 0.00010813120000000001,
	"loss": 0.8801,
	"step": 28720
	},
	{
	"epoch": 0.45984,
	"grad_norm": 0.21533095836639404,
	"learning_rate": 0.00010806720000000001,
	"loss": 0.9013,
	"step": 28740
	},
	{
	"epoch": 0.46016,
	"grad_norm": 0.2581512928009033,
	"learning_rate": 0.00010800320000000002,
	"loss": 0.8842,
	"step": 28760
	},
	{
	"epoch": 0.46048,
	"grad_norm": 0.22151516377925873,
	"learning_rate": 0.0001079392,
	"loss": 0.9151,
	"step": 28780
	},
	{
	"epoch": 0.4608,
	"grad_norm": 0.2598574459552765,
	"learning_rate": 0.0001078752,
	"loss": 0.8817,
	"step": 28800
	},
	{
	"epoch": 0.46112,
	"grad_norm": 0.22162817418575287,
	"learning_rate": 0.0001078112,
	"loss": 0.8887,
	"step": 28820
	},
	{
	"epoch": 0.46144,
	"grad_norm": 0.2202143371105194,
	"learning_rate": 0.0001077472,
	"loss": 0.9206,
	"step": 28840
	},
	{
	"epoch": 0.46176,
	"grad_norm": 0.24929936230182648,
	"learning_rate": 0.0001076832,
	"loss": 0.9279,
	"step": 28860
	},
	{
	"epoch": 0.46208,
	"grad_norm": 0.26587414741516113,
	"learning_rate": 0.00010761920000000001,
	"loss": 0.9087,
	"step": 28880
	},
	{
	"epoch": 0.4624,
	"grad_norm": 0.2536023259162903,
	"learning_rate": 0.00010755520000000001,
	"loss": 0.9128,
	"step": 28900
	},
	{
	"epoch": 0.46272,
	"grad_norm": 0.1964925229549408,
	"learning_rate": 0.00010749120000000002,
	"loss": 0.8445,
	"step": 28920
	},
	{
	"epoch": 0.46304,
	"grad_norm": 0.21544238924980164,
	"learning_rate": 0.0001074272,
	"loss": 0.856,
	"step": 28940
	},
	{
	"epoch": 0.46336,
	"grad_norm": 0.25481197237968445,
	"learning_rate": 0.0001073632,
	"loss": 0.8555,
	"step": 28960
	},
	{
	"epoch": 0.46368,
	"grad_norm": 0.24622678756713867,
	"learning_rate": 0.0001072992,
	"loss": 0.8975,
	"step": 28980
	},
	{
	"epoch": 0.464,
	"grad_norm": 0.2316320389509201,
	"learning_rate": 0.0001072352,
	"loss": 0.8802,
	"step": 29000
	},
	{
	"epoch": 0.46432,
	"grad_norm": 0.22140583395957947,
	"learning_rate": 0.00010717120000000001,
	"loss": 0.8845,
	"step": 29020
	},
	{
	"epoch": 0.46464,
	"grad_norm": 0.21848374605178833,
	"learning_rate": 0.0001071072,
	"loss": 0.8766,
	"step": 29040
	},
	{
	"epoch": 0.46496,
	"grad_norm": 0.2609255015850067,
	"learning_rate": 0.00010704320000000001,
	"loss": 0.9335,
	"step": 29060
	},
	{
	"epoch": 0.46528,
	"grad_norm": 0.25037628412246704,
	"learning_rate": 0.00010697920000000001,
	"loss": 0.8705,
	"step": 29080
	},
	{
	"epoch": 0.4656,
	"grad_norm": 0.2289629876613617,
	"learning_rate": 0.00010691520000000002,
	"loss": 0.9401,
	"step": 29100
	},
	{
	"epoch": 0.46592,
	"grad_norm": 0.28347960114479065,
	"learning_rate": 0.0001068512,
	"loss": 0.9032,
	"step": 29120
	},
	{
	"epoch": 0.46624,
	"grad_norm": 0.26885560154914856,
	"learning_rate": 0.00010678719999999999,
	"loss": 0.8777,
	"step": 29140
	},
	{
	"epoch": 0.46656,
	"grad_norm": 0.240605428814888,
	"learning_rate": 0.0001067232,
	"loss": 0.8381,
	"step": 29160
	},
	{
	"epoch": 0.46688,
	"grad_norm": 0.21176287531852722,
	"learning_rate": 0.0001066592,
	"loss": 0.8988,
	"step": 29180
	},
	{
	"epoch": 0.4672,
	"grad_norm": 0.2735714316368103,
	"learning_rate": 0.0001065952,
	"loss": 0.8879,
	"step": 29200
	},
	{
	"epoch": 0.46752,
	"grad_norm": 0.22908750176429749,
	"learning_rate": 0.00010653120000000001,
	"loss": 0.9259,
	"step": 29220
	},
	{
	"epoch": 0.46784,
	"grad_norm": 0.2262643426656723,
	"learning_rate": 0.00010646720000000001,
	"loss": 0.8774,
	"step": 29240
	},
	{
	"epoch": 0.46816,
	"grad_norm": 0.1967533677816391,
	"learning_rate": 0.00010640320000000002,
	"loss": 0.8512,
	"step": 29260
	},
	{
	"epoch": 0.46848,
	"grad_norm": 0.23780053853988647,
	"learning_rate": 0.0001063392,
	"loss": 0.8703,
	"step": 29280
	},
	{
	"epoch": 0.4688,
	"grad_norm": 0.2447732537984848,
	"learning_rate": 0.0001062752,
	"loss": 0.8656,
	"step": 29300
	},
	{
	"epoch": 0.46912,
	"grad_norm": 0.23653922975063324,
	"learning_rate": 0.0001062112,
	"loss": 0.9124,
	"step": 29320
	},
	{
	"epoch": 0.46944,
	"grad_norm": 0.22936119139194489,
	"learning_rate": 0.0001061472,
	"loss": 0.9262,
	"step": 29340
	},
	{
	"epoch": 0.46976,
	"grad_norm": 0.26746055483818054,
	"learning_rate": 0.00010608320000000001,
	"loss": 0.9279,
	"step": 29360
	},
	{
	"epoch": 0.47008,
	"grad_norm": 0.27925965189933777,
	"learning_rate": 0.0001060192,
	"loss": 0.9249,
	"step": 29380
	},
	{
	"epoch": 0.4704,
	"grad_norm": 0.25772443413734436,
	"learning_rate": 0.00010595520000000001,
	"loss": 0.857,
	"step": 29400
	},
	{
	"epoch": 0.47072,
	"grad_norm": 0.31103163957595825,
	"learning_rate": 0.00010589120000000001,
	"loss": 0.8858,
	"step": 29420
	},
	{
	"epoch": 0.47104,
	"grad_norm": 0.2527211010456085,
	"learning_rate": 0.00010582720000000002,
	"loss": 0.8851,
	"step": 29440
	},
	{
	"epoch": 0.47136,
	"grad_norm": 0.2501220405101776,
	"learning_rate": 0.0001057632,
	"loss": 0.8829,
	"step": 29460
	},
	{
	"epoch": 0.47168,
	"grad_norm": 0.20719179511070251,
	"learning_rate": 0.00010569919999999999,
	"loss": 0.8934,
	"step": 29480
	},
	{
	"epoch": 0.472,
	"grad_norm": 0.24948135018348694,
	"learning_rate": 0.0001056352,
	"loss": 0.9229,
	"step": 29500
	},
	{
	"epoch": 0.47232,
	"grad_norm": 0.2544534206390381,
	"learning_rate": 0.00010557120000000001,
	"loss": 0.9075,
	"step": 29520
	},
	{
	"epoch": 0.47264,
	"grad_norm": 0.24410614371299744,
	"learning_rate": 0.0001055072,
	"loss": 0.8189,
	"step": 29540
	},
	{
	"epoch": 0.47296,
	"grad_norm": 0.2736496925354004,
	"learning_rate": 0.00010544320000000001,
	"loss": 0.8778,
	"step": 29560
	},
	{
	"epoch": 0.47328,
	"grad_norm": 0.2509610652923584,
	"learning_rate": 0.00010537920000000001,
	"loss": 0.9481,
	"step": 29580
	},
	{
	"epoch": 0.4736,
	"grad_norm": 0.23943458497524261,
	"learning_rate": 0.00010531520000000002,
	"loss": 0.8921,
	"step": 29600
	},
	{
	"epoch": 0.47392,
	"grad_norm": 0.2580275237560272,
	"learning_rate": 0.0001052512,
	"loss": 0.9293,
	"step": 29620
	},
	{
	"epoch": 0.47424,
	"grad_norm": 0.27012938261032104,
	"learning_rate": 0.0001051872,
	"loss": 0.8833,
	"step": 29640
	},
	{
	"epoch": 0.47456,
	"grad_norm": 0.25766292214393616,
	"learning_rate": 0.0001051232,
	"loss": 0.8901,
	"step": 29660
	},
	{
	"epoch": 0.47488,
	"grad_norm": 0.2404147982597351,
	"learning_rate": 0.0001050592,
	"loss": 0.875,
	"step": 29680
	},
	{
	"epoch": 0.4752,
	"grad_norm": 0.24435456097126007,
	"learning_rate": 0.00010499520000000001,
	"loss": 0.9058,
	"step": 29700
	},
	{
	"epoch": 0.47552,
	"grad_norm": 0.21276427805423737,
	"learning_rate": 0.0001049312,
	"loss": 0.851,
	"step": 29720
	},
	{
	"epoch": 0.47584,
	"grad_norm": 0.26436007022857666,
	"learning_rate": 0.00010486720000000001,
	"loss": 0.8794,
	"step": 29740
	},
	{
	"epoch": 0.47616,
	"grad_norm": 0.2028064727783203,
	"learning_rate": 0.0001048032,
	"loss": 0.9011,
	"step": 29760
	},
	{
	"epoch": 0.47648,
	"grad_norm": 0.2643204629421234,
	"learning_rate": 0.00010473920000000002,
	"loss": 0.8951,
	"step": 29780
	},
	{
	"epoch": 0.4768,
	"grad_norm": 0.2519238293170929,
	"learning_rate": 0.0001046752,
	"loss": 0.9099,
	"step": 29800
	},
	{
	"epoch": 0.47712,
	"grad_norm": 0.2304890900850296,
	"learning_rate": 0.00010461119999999999,
	"loss": 0.9054,
	"step": 29820
	},
	{
	"epoch": 0.47744,
	"grad_norm": 0.24808572232723236,
	"learning_rate": 0.0001045472,
	"loss": 0.9119,
	"step": 29840
	},
	{
	"epoch": 0.47776,
	"grad_norm": 0.23621508479118347,
	"learning_rate": 0.00010448320000000001,
	"loss": 0.9018,
	"step": 29860
	},
	{
	"epoch": 0.47808,
	"grad_norm": 0.23834584653377533,
	"learning_rate": 0.0001044192,
	"loss": 0.8939,
	"step": 29880
	},
	{
	"epoch": 0.4784,
	"grad_norm": 0.2608813941478729,
	"learning_rate": 0.00010435520000000001,
	"loss": 0.898,
	"step": 29900
	},
	{
	"epoch": 0.47872,
	"grad_norm": 0.22944658994674683,
	"learning_rate": 0.00010429120000000001,
	"loss": 0.8535,
	"step": 29920
	},
	{
	"epoch": 0.47904,
	"grad_norm": 0.18182271718978882,
	"learning_rate": 0.00010422720000000002,
	"loss": 0.9109,
	"step": 29940
	},
	{
	"epoch": 0.47936,
	"grad_norm": 0.25479796528816223,
	"learning_rate": 0.0001041632,
	"loss": 0.8445,
	"step": 29960
	},
	{
	"epoch": 0.47968,
	"grad_norm": 0.2711828052997589,
	"learning_rate": 0.0001040992,
	"loss": 0.8993,
	"step": 29980
	},
	{
	"epoch": 0.48,
	"grad_norm": 0.23804309964179993,
	"learning_rate": 0.0001040352,
	"loss": 0.826,
	"step": 30000
	},
	{
	"epoch": 0.48032,
	"grad_norm": 0.2019588053226471,
	"learning_rate": 0.0001039712,
	"loss": 0.8777,
	"step": 30020
	},
	{
	"epoch": 0.48064,
	"grad_norm": 0.2302328497171402,
	"learning_rate": 0.00010390720000000001,
	"loss": 0.9068,
	"step": 30040
	},
	{
	"epoch": 0.48096,
	"grad_norm": 0.20539428293704987,
	"learning_rate": 0.0001038432,
	"loss": 0.894,
	"step": 30060
	},
	{
	"epoch": 0.48128,
	"grad_norm": 0.3114759624004364,
	"learning_rate": 0.00010377920000000001,
	"loss": 0.8664,
	"step": 30080
	},
	{
	"epoch": 0.4816,
	"grad_norm": 0.22987259924411774,
	"learning_rate": 0.0001037152,
	"loss": 0.8831,
	"step": 30100
	},
	{
	"epoch": 0.48192,
	"grad_norm": 0.29275548458099365,
	"learning_rate": 0.00010365120000000001,
	"loss": 0.9069,
	"step": 30120
	},
	{
	"epoch": 0.48224,
	"grad_norm": 0.21175888180732727,
	"learning_rate": 0.0001035872,
	"loss": 0.9181,
	"step": 30140
	},
	{
	"epoch": 0.48256,
	"grad_norm": 0.32916828989982605,
	"learning_rate": 0.00010352319999999999,
	"loss": 0.9072,
	"step": 30160
	},
	{
	"epoch": 0.48288,
	"grad_norm": 0.24162223935127258,
	"learning_rate": 0.0001034592,
	"loss": 0.8855,
	"step": 30180
	},
	{
	"epoch": 0.4832,
	"grad_norm": 0.21086947619915009,
	"learning_rate": 0.00010339520000000001,
	"loss": 0.9223,
	"step": 30200
	},
	{
	"epoch": 0.48352,
	"grad_norm": 0.24102705717086792,
	"learning_rate": 0.0001033312,
	"loss": 0.8887,
	"step": 30220
	},
	{
	"epoch": 0.48384,
	"grad_norm": 0.24661841988563538,
	"learning_rate": 0.00010326720000000001,
	"loss": 0.9,
	"step": 30240
	},
	{
	"epoch": 0.48416,
	"grad_norm": 0.2146749347448349,
	"learning_rate": 0.00010320320000000001,
	"loss": 0.9188,
	"step": 30260
	},
	{
	"epoch": 0.48448,
	"grad_norm": 0.31963422894477844,
	"learning_rate": 0.00010313920000000002,
	"loss": 0.8681,
	"step": 30280
	},
	{
	"epoch": 0.4848,
	"grad_norm": 0.25737327337265015,
	"learning_rate": 0.0001030752,
	"loss": 0.866,
	"step": 30300
	},
	{
	"epoch": 0.48512,
	"grad_norm": 0.25805580615997314,
	"learning_rate": 0.0001030112,
	"loss": 0.9203,
	"step": 30320
	},
	{
	"epoch": 0.48544,
	"grad_norm": 0.2447681874036789,
	"learning_rate": 0.0001029472,
	"loss": 0.8897,
	"step": 30340
	},
	{
	"epoch": 0.48576,
	"grad_norm": 0.2577648460865021,
	"learning_rate": 0.0001028832,
	"loss": 0.8548,
	"step": 30360
	},
	{
	"epoch": 0.48608,
	"grad_norm": 0.24042245745658875,
	"learning_rate": 0.0001028192,
	"loss": 0.9512,
	"step": 30380
	},
	{
	"epoch": 0.4864,
	"grad_norm": 0.24115043878555298,
	"learning_rate": 0.0001027552,
	"loss": 0.9036,
	"step": 30400
	},
	{
	"epoch": 0.48672,
	"grad_norm": 0.2144307792186737,
	"learning_rate": 0.00010269120000000001,
	"loss": 0.9172,
	"step": 30420
	},
	{
	"epoch": 0.48704,
	"grad_norm": 0.21385303139686584,
	"learning_rate": 0.0001026272,
	"loss": 0.8796,
	"step": 30440
	},
	{
	"epoch": 0.48736,
	"grad_norm": 0.21479547023773193,
	"learning_rate": 0.00010256320000000001,
	"loss": 0.8923,
	"step": 30460
	},
	{
	"epoch": 0.48768,
	"grad_norm": 0.24400416016578674,
	"learning_rate": 0.0001024992,
	"loss": 0.9339,
	"step": 30480
	},
	{
	"epoch": 0.488,
	"grad_norm": 0.19768428802490234,
	"learning_rate": 0.00010243519999999999,
	"loss": 0.8924,
	"step": 30500
	},
	{
	"epoch": 0.48832,
	"grad_norm": 0.23109054565429688,
	"learning_rate": 0.0001023712,
	"loss": 0.9223,
	"step": 30520
	},
	{
	"epoch": 0.48864,
	"grad_norm": 0.1906929314136505,
	"learning_rate": 0.00010230720000000001,
	"loss": 0.8876,
	"step": 30540
	},
	{
	"epoch": 0.48896,
	"grad_norm": 0.24491210281848907,
	"learning_rate": 0.0001022432,
	"loss": 0.8911,
	"step": 30560
	},
	{
	"epoch": 0.48928,
	"grad_norm": 0.21576926112174988,
	"learning_rate": 0.00010217920000000001,
	"loss": 0.8618,
	"step": 30580
	},
	{
	"epoch": 0.4896,
	"grad_norm": 0.2178792953491211,
	"learning_rate": 0.00010211520000000001,
	"loss": 0.8744,
	"step": 30600
	},
	{
	"epoch": 0.48992,
	"grad_norm": 0.2320430725812912,
	"learning_rate": 0.00010205120000000002,
	"loss": 0.8853,
	"step": 30620
	},
	{
	"epoch": 0.49024,
	"grad_norm": 0.2017570436000824,
	"learning_rate": 0.0001019872,
	"loss": 0.878,
	"step": 30640
	},
	{
	"epoch": 0.49056,
	"grad_norm": 0.26477140188217163,
	"learning_rate": 0.0001019232,
	"loss": 0.9664,
	"step": 30660
	},
	{
	"epoch": 0.49088,
	"grad_norm": 0.22188952565193176,
	"learning_rate": 0.0001018592,
	"loss": 0.8757,
	"step": 30680
	},
	{
	"epoch": 0.4912,
	"grad_norm": 0.2646230161190033,
	"learning_rate": 0.0001017952,
	"loss": 0.8465,
	"step": 30700
	},
	{
	"epoch": 0.49152,
	"grad_norm": 0.19764351844787598,
	"learning_rate": 0.0001017312,
	"loss": 0.8602,
	"step": 30720
	},
	{
	"epoch": 0.49184,
	"grad_norm": 0.2683072090148926,
	"learning_rate": 0.0001016672,
	"loss": 0.9249,
	"step": 30740
	},
	{
	"epoch": 0.49216,
	"grad_norm": 0.2290680855512619,
	"learning_rate": 0.00010160320000000001,
	"loss": 0.8733,
	"step": 30760
	},
	{
	"epoch": 0.49248,
	"grad_norm": 0.2023002654314041,
	"learning_rate": 0.00010153920000000002,
	"loss": 0.9078,
	"step": 30780
	},
	{
	"epoch": 0.4928,
	"grad_norm": 0.2146545648574829,
	"learning_rate": 0.00010147520000000001,
	"loss": 0.901,
	"step": 30800
	},
	{
	"epoch": 0.49312,
	"grad_norm": 0.2241715043783188,
	"learning_rate": 0.0001014112,
	"loss": 0.8551,
	"step": 30820
	},
	{
	"epoch": 0.49344,
	"grad_norm": 0.21123602986335754,
	"learning_rate": 0.0001013472,
	"loss": 0.8821,
	"step": 30840
	},
	{
	"epoch": 0.49376,
	"grad_norm": 0.2702392637729645,
	"learning_rate": 0.0001012832,
	"loss": 0.9438,
	"step": 30860
	},
	{
	"epoch": 0.49408,
	"grad_norm": 0.22311244904994965,
	"learning_rate": 0.00010121920000000001,
	"loss": 0.8835,
	"step": 30880
	},
	{
	"epoch": 0.4944,
	"grad_norm": 0.20762741565704346,
	"learning_rate": 0.0001011552,
	"loss": 0.8668,
	"step": 30900
	},
	{
	"epoch": 0.49472,
	"grad_norm": 0.2043907791376114,
	"learning_rate": 0.00010109120000000001,
	"loss": 0.8684,
	"step": 30920
	},
	{
	"epoch": 0.49504,
	"grad_norm": 0.23084862530231476,
	"learning_rate": 0.00010102720000000001,
	"loss": 0.9253,
	"step": 30940
	},
	{
	"epoch": 0.49536,
	"grad_norm": 0.23380133509635925,
	"learning_rate": 0.00010096320000000002,
	"loss": 0.8374,
	"step": 30960
	},
	{
	"epoch": 0.49568,
	"grad_norm": 0.23766806721687317,
	"learning_rate": 0.00010089920000000001,
	"loss": 0.8712,
	"step": 30980
	},
	{
	"epoch": 0.496,
	"grad_norm": 0.24725081026554108,
	"learning_rate": 0.00010083519999999999,
	"loss": 0.8836,
	"step": 31000
	},
	{
	"epoch": 0.49632,
	"grad_norm": 0.25081491470336914,
	"learning_rate": 0.0001007712,
	"loss": 0.8037,
	"step": 31020
	},
	{
	"epoch": 0.49664,
	"grad_norm": 0.3002113401889801,
	"learning_rate": 0.0001007072,
	"loss": 0.8463,
	"step": 31040
	},
	{
	"epoch": 0.49696,
	"grad_norm": 0.24691928923130035,
	"learning_rate": 0.0001006432,
	"loss": 0.8808,
	"step": 31060
	},
	{
	"epoch": 0.49728,
	"grad_norm": 0.22657333314418793,
	"learning_rate": 0.0001005792,
	"loss": 0.8678,
	"step": 31080
	},
	{
	"epoch": 0.4976,
	"grad_norm": 0.2616662085056305,
	"learning_rate": 0.00010051520000000001,
	"loss": 0.8786,
	"step": 31100
	},
	{
	"epoch": 0.49792,
	"grad_norm": 0.23406346142292023,
	"learning_rate": 0.00010045120000000002,
	"loss": 0.8488,
	"step": 31120
	},
	{
	"epoch": 0.49824,
	"grad_norm": 0.2088574469089508,
	"learning_rate": 0.00010038720000000001,
	"loss": 0.8433,
	"step": 31140
	},
	{
	"epoch": 0.49856,
	"grad_norm": 0.24721549451351166,
	"learning_rate": 0.0001003264,
	"loss": 0.8545,
	"step": 31160
	},
	{
	"epoch": 0.49888,
	"grad_norm": 0.2351522445678711,
	"learning_rate": 0.0001002624,
	"loss": 0.929,
	"step": 31180
	},
	{
	"epoch": 0.4992,
	"grad_norm": 0.24752940237522125,
	"learning_rate": 0.0001001984,
	"loss": 0.8709,
	"step": 31200
	},
	{
	"epoch": 0.49952,
	"grad_norm": 0.2309713363647461,
	"learning_rate": 0.0001001344,
	"loss": 0.8782,
	"step": 31220
	},
	{
	"epoch": 0.49984,
	"grad_norm": 0.24925391376018524,
	"learning_rate": 0.00010007040000000001,
	"loss": 0.8542,
	"step": 31240
	},
	{
	"epoch": 0.50016,
	"grad_norm": 0.22123312950134277,
	"learning_rate": 0.0001000064,
	"loss": 0.8859,
	"step": 31260
	},
	{
	"epoch": 0.50048,
	"grad_norm": 0.24795830249786377,
	"learning_rate": 9.99424e-05,
	"loss": 0.8789,
	"step": 31280
	},
	{
	"epoch": 0.5008,
	"grad_norm": 0.22997990250587463,
	"learning_rate": 9.987840000000001e-05,
	"loss": 0.8454,
	"step": 31300
	},
	{
	"epoch": 0.50112,
	"grad_norm": 0.24946443736553192,
	"learning_rate": 9.98144e-05,
	"loss": 0.9236,
	"step": 31320
	},
	{
	"epoch": 0.50144,
	"grad_norm": 0.21945710480213165,
	"learning_rate": 9.97504e-05,
	"loss": 0.9018,
	"step": 31340
	},
	{
	"epoch": 0.50176,
	"grad_norm": 0.23725731670856476,
	"learning_rate": 9.968640000000001e-05,
	"loss": 0.8932,
	"step": 31360
	},
	{
	"epoch": 0.50208,
	"grad_norm": 0.256510466337204,
	"learning_rate": 9.96224e-05,
	"loss": 0.8957,
	"step": 31380
	},
	{
	"epoch": 0.5024,
	"grad_norm": 0.2660234272480011,
	"learning_rate": 9.955840000000001e-05,
	"loss": 0.8573,
	"step": 31400
	},
	{
	"epoch": 0.50272,
	"grad_norm": 0.2343997359275818,
	"learning_rate": 9.949440000000001e-05,
	"loss": 0.8419,
	"step": 31420
	},
	{
	"epoch": 0.50304,
	"grad_norm": 0.25852516293525696,
	"learning_rate": 9.94304e-05,
	"loss": 0.8699,
	"step": 31440
	},
	{
	"epoch": 0.50336,
	"grad_norm": 0.21607396006584167,
	"learning_rate": 9.93664e-05,
	"loss": 0.8752,
	"step": 31460
	},
	{
	"epoch": 0.50368,
	"grad_norm": 0.23309437930583954,
	"learning_rate": 9.93024e-05,
	"loss": 0.8992,
	"step": 31480
	},
	{
	"epoch": 0.504,
	"grad_norm": 0.24827729165554047,
	"learning_rate": 9.923840000000002e-05,
	"loss": 0.9009,
	"step": 31500
	},
	{
	"epoch": 0.50432,
	"grad_norm": 0.22215944528579712,
	"learning_rate": 9.91744e-05,
	"loss": 0.8935,
	"step": 31520
	},
	{
	"epoch": 0.50464,
	"grad_norm": 0.23230740427970886,
	"learning_rate": 9.91104e-05,
	"loss": 0.9087,
	"step": 31540
	},
	{
	"epoch": 0.50496,
	"grad_norm": 0.25434383749961853,
	"learning_rate": 9.90464e-05,
	"loss": 0.9246,
	"step": 31560
	},
	{
	"epoch": 0.50528,
	"grad_norm": 0.22664053738117218,
	"learning_rate": 9.898240000000001e-05,
	"loss": 0.8454,
	"step": 31580
	},
	{
	"epoch": 0.5056,
	"grad_norm": 0.2654976546764374,
	"learning_rate": 9.89184e-05,
	"loss": 0.8788,
	"step": 31600
	},
	{
	"epoch": 0.50592,
	"grad_norm": 0.23799720406532288,
	"learning_rate": 9.88544e-05,
	"loss": 0.9061,
	"step": 31620
	},
	{
	"epoch": 0.50624,
	"grad_norm": 0.22206011414527893,
	"learning_rate": 9.879040000000001e-05,
	"loss": 0.9175,
	"step": 31640
	},
	{
	"epoch": 0.50656,
	"grad_norm": 0.20660768449306488,
	"learning_rate": 9.87264e-05,
	"loss": 0.9249,
	"step": 31660
	},
	{
	"epoch": 0.50688,
	"grad_norm": 0.2204529196023941,
	"learning_rate": 9.86624e-05,
	"loss": 0.8608,
	"step": 31680
	},
	{
	"epoch": 0.5072,
	"grad_norm": 0.23114994168281555,
	"learning_rate": 9.859840000000001e-05,
	"loss": 0.8453,
	"step": 31700
	},
	{
	"epoch": 0.50752,
	"grad_norm": 0.23731987178325653,
	"learning_rate": 9.85344e-05,
	"loss": 0.8556,
	"step": 31720
	},
	{
	"epoch": 0.50784,
	"grad_norm": 0.2295464277267456,
	"learning_rate": 9.847040000000001e-05,
	"loss": 0.8361,
	"step": 31740
	},
	{
	"epoch": 0.50816,
	"grad_norm": 0.2154739946126938,
	"learning_rate": 9.840640000000001e-05,
	"loss": 0.9396,
	"step": 31760
	},
	{
	"epoch": 0.50848,
	"grad_norm": 0.2496858537197113,
	"learning_rate": 9.83424e-05,
	"loss": 0.9095,
	"step": 31780
	},
	{
	"epoch": 0.5088,
	"grad_norm": 0.2576257288455963,
	"learning_rate": 9.82784e-05,
	"loss": 0.928,
	"step": 31800
	},
	{
	"epoch": 0.50912,
	"grad_norm": 0.2754540741443634,
	"learning_rate": 9.82144e-05,
	"loss": 0.9274,
	"step": 31820
	},
	{
	"epoch": 0.50944,
	"grad_norm": 0.21684108674526215,
	"learning_rate": 9.815040000000001e-05,
	"loss": 0.9536,
	"step": 31840
	},
	{
	"epoch": 0.50976,
	"grad_norm": 0.20123428106307983,
	"learning_rate": 9.80864e-05,
	"loss": 0.8948,
	"step": 31860
	},
	{
	"epoch": 0.51008,
	"grad_norm": 0.19840183854103088,
	"learning_rate": 9.80224e-05,
	"loss": 0.8843,
	"step": 31880
	},
	{
	"epoch": 0.5104,
	"grad_norm": 0.21900126338005066,
	"learning_rate": 9.79584e-05,
	"loss": 0.861,
	"step": 31900
	},
	{
	"epoch": 0.51072,
	"grad_norm": 0.27311161160469055,
	"learning_rate": 9.789440000000001e-05,
	"loss": 0.8824,
	"step": 31920
	},
	{
	"epoch": 0.51104,
	"grad_norm": 0.2307424545288086,
	"learning_rate": 9.78304e-05,
	"loss": 0.9077,
	"step": 31940
	},
	{
	"epoch": 0.51136,
	"grad_norm": 0.23477308452129364,
	"learning_rate": 9.77664e-05,
	"loss": 0.8819,
	"step": 31960
	},
	{
	"epoch": 0.51168,
	"grad_norm": 0.24617180228233337,
	"learning_rate": 9.770240000000001e-05,
	"loss": 0.8832,
	"step": 31980
	},
	{
	"epoch": 0.512,
	"grad_norm": 0.28253304958343506,
	"learning_rate": 9.76384e-05,
	"loss": 0.9143,
	"step": 32000
	},
	{
	"epoch": 0.51232,
	"grad_norm": 0.21168233454227448,
	"learning_rate": 9.75744e-05,
	"loss": 0.8644,
	"step": 32020
	},
	{
	"epoch": 0.51264,
	"grad_norm": 0.2240704596042633,
	"learning_rate": 9.751040000000001e-05,
	"loss": 0.8768,
	"step": 32040
	},
	{
	"epoch": 0.51296,
	"grad_norm": 0.26020580530166626,
	"learning_rate": 9.74464e-05,
	"loss": 0.8708,
	"step": 32060
	},
	{
	"epoch": 0.51328,
	"grad_norm": 0.2664453387260437,
	"learning_rate": 9.738240000000001e-05,
	"loss": 0.8823,
	"step": 32080
	},
	{
	"epoch": 0.5136,
	"grad_norm": 0.24020379781723022,
	"learning_rate": 9.73184e-05,
	"loss": 0.9324,
	"step": 32100
	},
	{
	"epoch": 0.51392,
	"grad_norm": 0.25187745690345764,
	"learning_rate": 9.72544e-05,
	"loss": 0.9079,
	"step": 32120
	},
	{
	"epoch": 0.51424,
	"grad_norm": 0.2086835503578186,
	"learning_rate": 9.71904e-05,
	"loss": 0.9136,
	"step": 32140
	},
	{
	"epoch": 0.51456,
	"grad_norm": 0.20220904052257538,
	"learning_rate": 9.71264e-05,
	"loss": 0.9192,
	"step": 32160
	},
	{
	"epoch": 0.51488,
	"grad_norm": 0.245720773935318,
	"learning_rate": 9.706240000000001e-05,
	"loss": 0.8716,
	"step": 32180
	},
	{
	"epoch": 0.5152,
	"grad_norm": 0.24418127536773682,
	"learning_rate": 9.69984e-05,
	"loss": 0.9469,
	"step": 32200
	},
	{
	"epoch": 0.51552,
	"grad_norm": 0.20389395952224731,
	"learning_rate": 9.69344e-05,
	"loss": 0.8574,
	"step": 32220
	},
	{
	"epoch": 0.51584,
	"grad_norm": 0.25827401876449585,
	"learning_rate": 9.68704e-05,
	"loss": 0.8792,
	"step": 32240
	},
	{
	"epoch": 0.51616,
	"grad_norm": 0.24777017533779144,
	"learning_rate": 9.680640000000001e-05,
	"loss": 0.9179,
	"step": 32260
	},
	{
	"epoch": 0.51648,
	"grad_norm": 0.24638600647449493,
	"learning_rate": 9.67424e-05,
	"loss": 0.8133,
	"step": 32280
	},
	{
	"epoch": 0.5168,
	"grad_norm": 0.24801717698574066,
	"learning_rate": 9.66784e-05,
	"loss": 0.9329,
	"step": 32300
	},
	{
	"epoch": 0.51712,
	"grad_norm": 0.23096071183681488,
	"learning_rate": 9.661440000000001e-05,
	"loss": 0.8527,
	"step": 32320
	},
	{
	"epoch": 0.51744,
	"grad_norm": 0.25937584042549133,
	"learning_rate": 9.65504e-05,
	"loss": 0.8984,
	"step": 32340
	},
	{
	"epoch": 0.51776,
	"grad_norm": 0.22245679795742035,
	"learning_rate": 9.648640000000001e-05,
	"loss": 0.888,
	"step": 32360
	},
	{
	"epoch": 0.51808,
	"grad_norm": 0.24738770723342896,
	"learning_rate": 9.642240000000001e-05,
	"loss": 0.9783,
	"step": 32380
	},
	{
	"epoch": 0.5184,
	"grad_norm": 0.24137365818023682,
	"learning_rate": 9.63584e-05,
	"loss": 0.9105,
	"step": 32400
	},
	{
	"epoch": 0.51872,
	"grad_norm": 0.2397020161151886,
	"learning_rate": 9.629440000000001e-05,
	"loss": 0.8195,
	"step": 32420
	},
	{
	"epoch": 0.51904,
	"grad_norm": 0.2638731598854065,
	"learning_rate": 9.62304e-05,
	"loss": 0.94,
	"step": 32440
	},
	{
	"epoch": 0.51936,
	"grad_norm": 0.24911172688007355,
	"learning_rate": 9.61664e-05,
	"loss": 0.9078,
	"step": 32460
	},
	{
	"epoch": 0.51968,
	"grad_norm": 0.2063673883676529,
	"learning_rate": 9.610240000000001e-05,
	"loss": 0.8501,
	"step": 32480
	},
	{
	"epoch": 0.52,
	"grad_norm": 0.2300567924976349,
	"learning_rate": 9.60384e-05,
	"loss": 0.8857,
	"step": 32500
	},
	{
	"epoch": 0.52032,
	"grad_norm": 0.26897576451301575,
	"learning_rate": 9.597440000000001e-05,
	"loss": 0.9256,
	"step": 32520
	},
	{
	"epoch": 0.52064,
	"grad_norm": 0.25261190533638,
	"learning_rate": 9.59104e-05,
	"loss": 0.8997,
	"step": 32540
	},
	{
	"epoch": 0.52096,
	"grad_norm": 0.24318355321884155,
	"learning_rate": 9.58464e-05,
	"loss": 0.8751,
	"step": 32560
	},
	{
	"epoch": 0.52128,
	"grad_norm": 0.24040265381336212,
	"learning_rate": 9.57824e-05,
	"loss": 0.8543,
	"step": 32580
	},
	{
	"epoch": 0.5216,
	"grad_norm": 0.22509385645389557,
	"learning_rate": 9.571840000000001e-05,
	"loss": 0.8858,
	"step": 32600
	},
	{
	"epoch": 0.52192,
	"grad_norm": 0.2367594838142395,
	"learning_rate": 9.56544e-05,
	"loss": 0.8506,
	"step": 32620
	},
	{
	"epoch": 0.52224,
	"grad_norm": 0.2382354438304901,
	"learning_rate": 9.55904e-05,
	"loss": 0.875,
	"step": 32640
	},
	{
	"epoch": 0.52256,
	"grad_norm": 0.24895018339157104,
	"learning_rate": 9.552640000000001e-05,
	"loss": 0.9271,
	"step": 32660
	},
	{
	"epoch": 0.52288,
	"grad_norm": 0.2637403905391693,
	"learning_rate": 9.54624e-05,
	"loss": 0.8485,
	"step": 32680
	},
	{
	"epoch": 0.5232,
	"grad_norm": 0.2029896229505539,
	"learning_rate": 9.539840000000001e-05,
	"loss": 0.8569,
	"step": 32700
	},
	{
	"epoch": 0.52352,
	"grad_norm": 0.28035101294517517,
	"learning_rate": 9.53344e-05,
	"loss": 0.8745,
	"step": 32720
	},
	{
	"epoch": 0.52384,
	"grad_norm": 0.24843214452266693,
	"learning_rate": 9.52704e-05,
	"loss": 0.8853,
	"step": 32740
	},
	{
	"epoch": 0.52416,
	"grad_norm": 0.26682519912719727,
	"learning_rate": 9.520640000000001e-05,
	"loss": 0.9163,
	"step": 32760
	},
	{
	"epoch": 0.52448,
	"grad_norm": 0.2349347323179245,
	"learning_rate": 9.51424e-05,
	"loss": 0.91,
	"step": 32780
	},
	{
	"epoch": 0.5248,
	"grad_norm": 0.2493859827518463,
	"learning_rate": 9.50784e-05,
	"loss": 0.8412,
	"step": 32800
	},
	{
	"epoch": 0.52512,
	"grad_norm": 0.2364472597837448,
	"learning_rate": 9.501440000000001e-05,
	"loss": 0.8777,
	"step": 32820
	},
	{
	"epoch": 0.52544,
	"grad_norm": 0.2579153776168823,
	"learning_rate": 9.49504e-05,
	"loss": 0.9061,
	"step": 32840
	},
	{
	"epoch": 0.52576,
	"grad_norm": 0.23014868795871735,
	"learning_rate": 9.488640000000001e-05,
	"loss": 0.9311,
	"step": 32860
	},
	{
	"epoch": 0.52608,
	"grad_norm": 0.2836184501647949,
	"learning_rate": 9.48224e-05,
	"loss": 0.8641,
	"step": 32880
	},
	{
	"epoch": 0.5264,
	"grad_norm": 0.36536288261413574,
	"learning_rate": 9.47584e-05,
	"loss": 0.8722,
	"step": 32900
	},
	{
	"epoch": 0.52672,
	"grad_norm": 0.23661687970161438,
	"learning_rate": 9.46944e-05,
	"loss": 0.8349,
	"step": 32920
	},
	{
	"epoch": 0.52704,
	"grad_norm": 0.24428099393844604,
	"learning_rate": 9.463040000000001e-05,
	"loss": 0.8688,
	"step": 32940
	},
	{
	"epoch": 0.52736,
	"grad_norm": 0.24511151015758514,
	"learning_rate": 9.45664e-05,
	"loss": 0.88,
	"step": 32960
	},
	{
	"epoch": 0.52768,
	"grad_norm": 0.244853213429451,
	"learning_rate": 9.45024e-05,
	"loss": 0.8739,
	"step": 32980
	},
	{
	"epoch": 0.528,
	"grad_norm": 0.2549150884151459,
	"learning_rate": 9.443840000000001e-05,
	"loss": 0.8941,
	"step": 33000
	},
	{
	"epoch": 0.52832,
	"grad_norm": 0.24175408482551575,
	"learning_rate": 9.43744e-05,
	"loss": 0.8919,
	"step": 33020
	},
	{
	"epoch": 0.52864,
	"grad_norm": 0.25348398089408875,
	"learning_rate": 9.431040000000001e-05,
	"loss": 0.8914,
	"step": 33040
	},
	{
	"epoch": 0.52896,
	"grad_norm": 0.21426767110824585,
	"learning_rate": 9.42464e-05,
	"loss": 0.8783,
	"step": 33060
	},
	{
	"epoch": 0.52928,
	"grad_norm": 0.2478022277355194,
	"learning_rate": 9.41824e-05,
	"loss": 0.866,
	"step": 33080
	},
	{
	"epoch": 0.5296,
	"grad_norm": 0.21202678978443146,
	"learning_rate": 9.411840000000001e-05,
	"loss": 0.8285,
	"step": 33100
	},
	{
	"epoch": 0.52992,
	"grad_norm": 0.2358037382364273,
	"learning_rate": 9.40544e-05,
	"loss": 0.8479,
	"step": 33120
	},
	{
	"epoch": 0.53024,
	"grad_norm": 0.2295175939798355,
	"learning_rate": 9.39904e-05,
	"loss": 0.8979,
	"step": 33140
	},
	{
	"epoch": 0.53056,
	"grad_norm": 0.22576481103897095,
	"learning_rate": 9.392640000000001e-05,
	"loss": 0.919,
	"step": 33160
	},
	{
	"epoch": 0.53088,
	"grad_norm": 0.20744270086288452,
	"learning_rate": 9.38624e-05,
	"loss": 0.9019,
	"step": 33180
	},
	{
	"epoch": 0.5312,
	"grad_norm": 0.26612403988838196,
	"learning_rate": 9.379840000000001e-05,
	"loss": 0.8855,
	"step": 33200
	},
	{
	"epoch": 0.53152,
	"grad_norm": 0.24272586405277252,
	"learning_rate": 9.37344e-05,
	"loss": 0.8907,
	"step": 33220
	},
	{
	"epoch": 0.53184,
	"grad_norm": 0.25545746088027954,
	"learning_rate": 9.36704e-05,
	"loss": 0.8881,
	"step": 33240
	},
	{
	"epoch": 0.53216,
	"grad_norm": 0.22786740958690643,
	"learning_rate": 9.36064e-05,
	"loss": 0.8739,
	"step": 33260
	},
	{
	"epoch": 0.53248,
	"grad_norm": 0.2257343977689743,
	"learning_rate": 9.354240000000001e-05,
	"loss": 0.8873,
	"step": 33280
	},
	{
	"epoch": 0.5328,
	"grad_norm": 0.23881380259990692,
	"learning_rate": 9.34784e-05,
	"loss": 0.8811,
	"step": 33300
	},
	{
	"epoch": 0.53312,
	"grad_norm": 0.2408117800951004,
	"learning_rate": 9.34144e-05,
	"loss": 0.8528,
	"step": 33320
	},
	{
	"epoch": 0.53344,
	"grad_norm": 0.20471978187561035,
	"learning_rate": 9.33504e-05,
	"loss": 0.8313,
	"step": 33340
	},
	{
	"epoch": 0.53376,
	"grad_norm": 0.24219338595867157,
	"learning_rate": 9.32864e-05,
	"loss": 0.8821,
	"step": 33360
	},
	{
	"epoch": 0.53408,
	"grad_norm": 0.24901708960533142,
	"learning_rate": 9.322240000000001e-05,
	"loss": 0.9017,
	"step": 33380
	},
	{
	"epoch": 0.5344,
	"grad_norm": 0.2642413079738617,
	"learning_rate": 9.31584e-05,
	"loss": 0.8627,
	"step": 33400
	},
	{
	"epoch": 0.53472,
	"grad_norm": 0.20527620613574982,
	"learning_rate": 9.30944e-05,
	"loss": 0.8729,
	"step": 33420
	},
	{
	"epoch": 0.53504,
	"grad_norm": 0.2573811411857605,
	"learning_rate": 9.303040000000001e-05,
	"loss": 0.8767,
	"step": 33440
	},
	{
	"epoch": 0.53536,
	"grad_norm": 0.2389804571866989,
	"learning_rate": 9.29664e-05,
	"loss": 0.8553,
	"step": 33460
	},
	{
	"epoch": 0.53568,
	"grad_norm": 0.25284579396247864,
	"learning_rate": 9.29024e-05,
	"loss": 0.9157,
	"step": 33480
	},
	{
	"epoch": 0.536,
	"grad_norm": 0.24692294001579285,
	"learning_rate": 9.283840000000001e-05,
	"loss": 0.8612,
	"step": 33500
	},
	{
	"epoch": 0.53632,
	"grad_norm": 0.23540472984313965,
	"learning_rate": 9.27744e-05,
	"loss": 0.9049,
	"step": 33520
	},
	{
	"epoch": 0.53664,
	"grad_norm": 0.23751689493656158,
	"learning_rate": 9.271040000000001e-05,
	"loss": 0.837,
	"step": 33540
	},
	{
	"epoch": 0.53696,
	"grad_norm": 0.24152640998363495,
	"learning_rate": 9.26464e-05,
	"loss": 0.8772,
	"step": 33560
	},
	{
	"epoch": 0.53728,
	"grad_norm": 0.2199302613735199,
	"learning_rate": 9.25824e-05,
	"loss": 0.9278,
	"step": 33580
	},
	{
	"epoch": 0.5376,
	"grad_norm": 0.24747338891029358,
	"learning_rate": 9.25184e-05,
	"loss": 0.8802,
	"step": 33600
	},
	{
	"epoch": 0.53792,
	"grad_norm": 0.21488319337368011,
	"learning_rate": 9.245440000000001e-05,
	"loss": 0.9052,
	"step": 33620
	},
	{
	"epoch": 0.53824,
	"grad_norm": 0.2535870671272278,
	"learning_rate": 9.23904e-05,
	"loss": 0.8781,
	"step": 33640
	},
	{
	"epoch": 0.53856,
	"grad_norm": 0.2381758987903595,
	"learning_rate": 9.23264e-05,
	"loss": 0.8088,
	"step": 33660
	},
	{
	"epoch": 0.53888,
	"grad_norm": 0.2485072910785675,
	"learning_rate": 9.226560000000001e-05,
	"loss": 0.8592,
	"step": 33680
	},
	{
	"epoch": 0.5392,
	"grad_norm": 0.21843871474266052,
	"learning_rate": 9.22016e-05,
	"loss": 0.8427,
	"step": 33700
	},
	{
	"epoch": 0.53952,
	"grad_norm": 0.2665683627128601,
	"learning_rate": 9.21376e-05,
	"loss": 0.879,
	"step": 33720
	},
	{
	"epoch": 0.53984,
	"grad_norm": 0.22822092473506927,
	"learning_rate": 9.20736e-05,
	"loss": 0.8936,
	"step": 33740
	},
	{
	"epoch": 0.54016,
	"grad_norm": 0.22950272262096405,
	"learning_rate": 9.200960000000001e-05,
	"loss": 0.838,
	"step": 33760
	},
	{
	"epoch": 0.54048,
	"grad_norm": 0.2490173578262329,
	"learning_rate": 9.19456e-05,
	"loss": 0.8914,
	"step": 33780
	},
	{
	"epoch": 0.5408,
	"grad_norm": 0.2716629207134247,
	"learning_rate": 9.18816e-05,
	"loss": 0.8936,
	"step": 33800
	},
	{
	"epoch": 0.54112,
	"grad_norm": 0.28845784068107605,
	"learning_rate": 9.18176e-05,
	"loss": 0.9278,
	"step": 33820
	},
	{
	"epoch": 0.54144,
	"grad_norm": 0.2092408537864685,
	"learning_rate": 9.17536e-05,
	"loss": 0.8818,
	"step": 33840
	},
	{
	"epoch": 0.54176,
	"grad_norm": 0.19223183393478394,
	"learning_rate": 9.168960000000001e-05,
	"loss": 0.8626,
	"step": 33860
	},
	{
	"epoch": 0.54208,
	"grad_norm": 0.2201627492904663,
	"learning_rate": 9.16256e-05,
	"loss": 0.8946,
	"step": 33880
	},
	{
	"epoch": 0.5424,
	"grad_norm": 0.22081099450588226,
	"learning_rate": 9.15616e-05,
	"loss": 0.8691,
	"step": 33900
	},
	{
	"epoch": 0.54272,
	"grad_norm": 0.229042649269104,
	"learning_rate": 9.149760000000001e-05,
	"loss": 0.856,
	"step": 33920
	},
	{
	"epoch": 0.54304,
	"grad_norm": 0.23319824039936066,
	"learning_rate": 9.14336e-05,
	"loss": 0.8748,
	"step": 33940
	},
	{
	"epoch": 0.54336,
	"grad_norm": 0.21704263985157013,
	"learning_rate": 9.13696e-05,
	"loss": 0.8736,
	"step": 33960
	},
	{
	"epoch": 0.54368,
	"grad_norm": 0.2763427197933197,
	"learning_rate": 9.130560000000001e-05,
	"loss": 0.8392,
	"step": 33980
	},
	{
	"epoch": 0.544,
	"grad_norm": 0.24619615077972412,
	"learning_rate": 9.12416e-05,
	"loss": 0.9046,
	"step": 34000
	},
	{
	"epoch": 0.54432,
	"grad_norm": 0.2581463158130646,
	"learning_rate": 9.117760000000001e-05,
	"loss": 0.8873,
	"step": 34020
	},
	{
	"epoch": 0.54464,
	"grad_norm": 0.25280770659446716,
	"learning_rate": 9.11136e-05,
	"loss": 0.9039,
	"step": 34040
	},
	{
	"epoch": 0.54496,
	"grad_norm": 0.22729690372943878,
	"learning_rate": 9.10496e-05,
	"loss": 0.9558,
	"step": 34060
	},
	{
	"epoch": 0.54528,
	"grad_norm": 0.25227198004722595,
	"learning_rate": 9.09856e-05,
	"loss": 0.8474,
	"step": 34080
	},
	{
	"epoch": 0.5456,
	"grad_norm": 0.22505003213882446,
	"learning_rate": 9.092160000000001e-05,
	"loss": 0.8884,
	"step": 34100
	},
	{
	"epoch": 0.54592,
	"grad_norm": 0.19984784722328186,
	"learning_rate": 9.085760000000002e-05,
	"loss": 0.864,
	"step": 34120
	},
	{
	"epoch": 0.54624,
	"grad_norm": 0.22763599455356598,
	"learning_rate": 9.07936e-05,
	"loss": 0.8398,
	"step": 34140
	},
	{
	"epoch": 0.54656,
	"grad_norm": 0.2618582248687744,
	"learning_rate": 9.07296e-05,
	"loss": 0.9464,
	"step": 34160
	},
	{
	"epoch": 0.54688,
	"grad_norm": 0.2371044158935547,
	"learning_rate": 9.06656e-05,
	"loss": 0.8829,
	"step": 34180
	},
	{
	"epoch": 0.5472,
	"grad_norm": 0.21414311230182648,
	"learning_rate": 9.060160000000001e-05,
	"loss": 0.8997,
	"step": 34200
	},
	{
	"epoch": 0.54752,
	"grad_norm": 0.24445602297782898,
	"learning_rate": 9.05376e-05,
	"loss": 0.8741,
	"step": 34220
	},
	{
	"epoch": 0.54784,
	"grad_norm": 0.22683311998844147,
	"learning_rate": 9.04736e-05,
	"loss": 0.905,
	"step": 34240
	},
	{
	"epoch": 0.54816,
	"grad_norm": 0.23930495977401733,
	"learning_rate": 9.040960000000001e-05,
	"loss": 0.8318,
	"step": 34260
	},
	{
	"epoch": 0.54848,
	"grad_norm": 0.2092512547969818,
	"learning_rate": 9.03456e-05,
	"loss": 0.9077,
	"step": 34280
	},
	{
	"epoch": 0.5488,
	"grad_norm": 0.24851293861865997,
	"learning_rate": 9.02816e-05,
	"loss": 0.8201,
	"step": 34300
	},
	{
	"epoch": 0.54912,
	"grad_norm": 0.25093013048171997,
	"learning_rate": 9.021760000000001e-05,
	"loss": 0.9003,
	"step": 34320
	},
	{
	"epoch": 0.54944,
	"grad_norm": 0.22838135063648224,
	"learning_rate": 9.01536e-05,
	"loss": 0.8897,
	"step": 34340
	},
	{
	"epoch": 0.54976,
	"grad_norm": 0.23337380588054657,
	"learning_rate": 9.008960000000001e-05,
	"loss": 0.8721,
	"step": 34360
	},
	{
	"epoch": 0.55008,
	"grad_norm": 0.2049025148153305,
	"learning_rate": 9.00256e-05,
	"loss": 0.8554,
	"step": 34380
	},
	{
	"epoch": 0.5504,
	"grad_norm": 0.24730657041072845,
	"learning_rate": 8.99616e-05,
	"loss": 0.8799,
	"step": 34400
	},
	{
	"epoch": 0.55072,
	"grad_norm": 0.24854011833667755,
	"learning_rate": 8.98976e-05,
	"loss": 0.7941,
	"step": 34420
	},
	{
	"epoch": 0.55104,
	"grad_norm": 0.2421095371246338,
	"learning_rate": 8.98336e-05,
	"loss": 0.874,
	"step": 34440
	},
	{
	"epoch": 0.55136,
	"grad_norm": 0.23288048803806305,
	"learning_rate": 8.976960000000002e-05,
	"loss": 0.8883,
	"step": 34460
	},
	{
	"epoch": 0.55168,
	"grad_norm": 0.227716326713562,
	"learning_rate": 8.97056e-05,
	"loss": 0.9004,
	"step": 34480
	},
	{
	"epoch": 0.552,
	"grad_norm": 0.24987129867076874,
	"learning_rate": 8.96416e-05,
	"loss": 0.8758,
	"step": 34500
	},
	{
	"epoch": 0.55232,
	"grad_norm": 0.24900184571743011,
	"learning_rate": 8.95776e-05,
	"loss": 0.8657,
	"step": 34520
	},
	{
	"epoch": 0.55264,
	"grad_norm": 0.2685990035533905,
	"learning_rate": 8.951360000000001e-05,
	"loss": 0.8833,
	"step": 34540
	},
	{
	"epoch": 0.55296,
	"grad_norm": 0.23856236040592194,
	"learning_rate": 8.94496e-05,
	"loss": 0.9229,
	"step": 34560
	},
	{
	"epoch": 0.55328,
	"grad_norm": 0.24043366312980652,
	"learning_rate": 8.93856e-05,
	"loss": 0.8985,
	"step": 34580
	},
	{
	"epoch": 0.5536,
	"grad_norm": 0.26800286769866943,
	"learning_rate": 8.932160000000001e-05,
	"loss": 0.9129,
	"step": 34600
	},
	{
	"epoch": 0.55392,
	"grad_norm": 0.2030034214258194,
	"learning_rate": 8.92576e-05,
	"loss": 0.8625,
	"step": 34620
	},
	{
	"epoch": 0.55424,
	"grad_norm": 0.21988297998905182,
	"learning_rate": 8.91936e-05,
	"loss": 0.8638,
	"step": 34640
	},
	{
	"epoch": 0.55456,
	"grad_norm": 0.24161766469478607,
	"learning_rate": 8.912960000000001e-05,
	"loss": 0.9061,
	"step": 34660
	},
	{
	"epoch": 0.55488,
	"grad_norm": 0.24580594897270203,
	"learning_rate": 8.90656e-05,
	"loss": 0.8638,
	"step": 34680
	},
	{
	"epoch": 0.5552,
	"grad_norm": 0.23829227685928345,
	"learning_rate": 8.900160000000001e-05,
	"loss": 0.8996,
	"step": 34700
	},
	{
	"epoch": 0.55552,
	"grad_norm": 0.2329644411802292,
	"learning_rate": 8.893760000000001e-05,
	"loss": 0.8453,
	"step": 34720
	},
	{
	"epoch": 0.55584,
	"grad_norm": 0.22091227769851685,
	"learning_rate": 8.88736e-05,
	"loss": 0.8795,
	"step": 34740
	},
	{
	"epoch": 0.55616,
	"grad_norm": 0.21813109517097473,
	"learning_rate": 8.88096e-05,
	"loss": 0.8929,
	"step": 34760
	},
	{
	"epoch": 0.55648,
	"grad_norm": 0.2232077419757843,
	"learning_rate": 8.87456e-05,
	"loss": 0.8585,
	"step": 34780
	},
	{
	"epoch": 0.5568,
	"grad_norm": 0.27467960119247437,
	"learning_rate": 8.868160000000002e-05,
	"loss": 0.9287,
	"step": 34800
	},
	{
	"epoch": 0.55712,
	"grad_norm": 0.22533021867275238,
	"learning_rate": 8.86176e-05,
	"loss": 0.8408,
	"step": 34820
	},
	{
	"epoch": 0.55744,
	"grad_norm": 0.2704051733016968,
	"learning_rate": 8.85536e-05,
	"loss": 0.8606,
	"step": 34840
	},
	{
	"epoch": 0.55776,
	"grad_norm": 0.2197883278131485,
	"learning_rate": 8.84896e-05,
	"loss": 0.8271,
	"step": 34860
	},
	{
	"epoch": 0.55808,
	"grad_norm": 0.2424239218235016,
	"learning_rate": 8.842560000000001e-05,
	"loss": 0.9006,
	"step": 34880
	},
	{
	"epoch": 0.5584,
	"grad_norm": 0.21460562944412231,
	"learning_rate": 8.83616e-05,
	"loss": 0.8882,
	"step": 34900
	},
	{
	"epoch": 0.55872,
	"grad_norm": 0.2590295076370239,
	"learning_rate": 8.82976e-05,
	"loss": 0.906,
	"step": 34920
	},
	{
	"epoch": 0.55904,
	"grad_norm": 0.2667274475097656,
	"learning_rate": 8.823360000000001e-05,
	"loss": 0.8522,
	"step": 34940
	},
	{
	"epoch": 0.55936,
	"grad_norm": 0.20394787192344666,
	"learning_rate": 8.81696e-05,
	"loss": 0.9169,
	"step": 34960
	},
	{
	"epoch": 0.55968,
	"grad_norm": 0.2314232587814331,
	"learning_rate": 8.81056e-05,
	"loss": 0.847,
	"step": 34980
	},
	{
	"epoch": 0.56,
	"grad_norm": 0.25091394782066345,
	"learning_rate": 8.804160000000001e-05,
	"loss": 0.9109,
	"step": 35000
	},
	{
	"epoch": 0.56032,
	"grad_norm": 0.26859068870544434,
	"learning_rate": 8.79776e-05,
	"loss": 0.879,
	"step": 35020
	},
	{
	"epoch": 0.56064,
	"grad_norm": 0.2730168104171753,
	"learning_rate": 8.791360000000001e-05,
	"loss": 0.8548,
	"step": 35040
	},
	{
	"epoch": 0.56096,
	"grad_norm": 0.21965494751930237,
	"learning_rate": 8.78496e-05,
	"loss": 0.8965,
	"step": 35060
	},
	{
	"epoch": 0.56128,
	"grad_norm": 0.2608742117881775,
	"learning_rate": 8.77856e-05,
	"loss": 0.9028,
	"step": 35080
	},
	{
	"epoch": 0.5616,
	"grad_norm": 0.19259628653526306,
	"learning_rate": 8.77216e-05,
	"loss": 0.8716,
	"step": 35100
	},
	{
	"epoch": 0.56192,
	"grad_norm": 0.2763505280017853,
	"learning_rate": 8.766080000000001e-05,
	"loss": 0.8783,
	"step": 35120
	},
	{
	"epoch": 0.56224,
	"grad_norm": 0.23790399730205536,
	"learning_rate": 8.75968e-05,
	"loss": 0.8931,
	"step": 35140
	},
	{
	"epoch": 0.56256,
	"grad_norm": 0.21415534615516663,
	"learning_rate": 8.75328e-05,
	"loss": 0.8931,
	"step": 35160
	},
	{
	"epoch": 0.56288,
	"grad_norm": 0.23109756410121918,
	"learning_rate": 8.746880000000001e-05,
	"loss": 0.9333,
	"step": 35180
	},
	{
	"epoch": 0.5632,
	"grad_norm": 0.22907577455043793,
	"learning_rate": 8.740480000000001e-05,
	"loss": 0.8739,
	"step": 35200
	},
	{
	"epoch": 0.56352,
	"grad_norm": 0.22961348295211792,
	"learning_rate": 8.73408e-05,
	"loss": 0.883,
	"step": 35220
	},
	{
	"epoch": 0.56384,
	"grad_norm": 0.19997790455818176,
	"learning_rate": 8.72768e-05,
	"loss": 0.8698,
	"step": 35240
	},
	{
	"epoch": 0.56416,
	"grad_norm": 0.22530966997146606,
	"learning_rate": 8.72128e-05,
	"loss": 0.8538,
	"step": 35260
	},
	{
	"epoch": 0.56448,
	"grad_norm": 0.2729052007198334,
	"learning_rate": 8.71488e-05,
	"loss": 0.8882,
	"step": 35280
	},
	{
	"epoch": 0.5648,
	"grad_norm": 0.23470643162727356,
	"learning_rate": 8.70848e-05,
	"loss": 0.8615,
	"step": 35300
	},
	{
	"epoch": 0.56512,
	"grad_norm": 0.24945303797721863,
	"learning_rate": 8.70208e-05,
	"loss": 0.8993,
	"step": 35320
	},
	{
	"epoch": 0.56544,
	"grad_norm": 0.18795213103294373,
	"learning_rate": 8.69568e-05,
	"loss": 0.8676,
	"step": 35340
	},
	{
	"epoch": 0.56576,
	"grad_norm": 0.2676798403263092,
	"learning_rate": 8.689280000000001e-05,
	"loss": 0.9417,
	"step": 35360
	},
	{
	"epoch": 0.56608,
	"grad_norm": 0.2519102096557617,
	"learning_rate": 8.68288e-05,
	"loss": 0.8696,
	"step": 35380
	},
	{
	"epoch": 0.5664,
	"grad_norm": 0.2239411622285843,
	"learning_rate": 8.67648e-05,
	"loss": 0.8518,
	"step": 35400
	},
	{
	"epoch": 0.56672,
	"grad_norm": 0.25220707058906555,
	"learning_rate": 8.670080000000001e-05,
	"loss": 0.8406,
	"step": 35420
	},
	{
	"epoch": 0.56704,
	"grad_norm": 0.24866600334644318,
	"learning_rate": 8.66368e-05,
	"loss": 0.8726,
	"step": 35440
	},
	{
	"epoch": 0.56736,
	"grad_norm": 0.23390260338783264,
	"learning_rate": 8.657280000000001e-05,
	"loss": 0.8616,
	"step": 35460
	},
	{
	"epoch": 0.56768,
	"grad_norm": 0.23465383052825928,
	"learning_rate": 8.650880000000001e-05,
	"loss": 0.8897,
	"step": 35480
	},
	{
	"epoch": 0.568,
	"grad_norm": 0.23219779133796692,
	"learning_rate": 8.64448e-05,
	"loss": 0.892,
	"step": 35500
	},
	{
	"epoch": 0.56832,
	"grad_norm": 0.23238353431224823,
	"learning_rate": 8.638080000000001e-05,
	"loss": 0.8887,
	"step": 35520
	},
	{
	"epoch": 0.56864,
	"grad_norm": 0.23893140256404877,
	"learning_rate": 8.631680000000001e-05,
	"loss": 0.868,
	"step": 35540
	},
	{
	"epoch": 0.56896,
	"grad_norm": 0.23007500171661377,
	"learning_rate": 8.62528e-05,
	"loss": 0.8564,
	"step": 35560
	},
	{
	"epoch": 0.56928,
	"grad_norm": 0.2285536676645279,
	"learning_rate": 8.61888e-05,
	"loss": 0.8967,
	"step": 35580
	},
	{
	"epoch": 0.5696,
	"grad_norm": 0.2179461121559143,
	"learning_rate": 8.61248e-05,
	"loss": 0.8468,
	"step": 35600
	},
	{
	"epoch": 0.56992,
	"grad_norm": 0.26246190071105957,
	"learning_rate": 8.606080000000001e-05,
	"loss": 0.866,
	"step": 35620
	},
	{
	"epoch": 0.57024,
	"grad_norm": 0.2367112636566162,
	"learning_rate": 8.59968e-05,
	"loss": 0.8862,
	"step": 35640
	},
	{
	"epoch": 0.57056,
	"grad_norm": 0.26484036445617676,
	"learning_rate": 8.59328e-05,
	"loss": 0.8992,
	"step": 35660
	},
	{
	"epoch": 0.57088,
	"grad_norm": 0.2554921507835388,
	"learning_rate": 8.58688e-05,
	"loss": 0.9095,
	"step": 35680
	},
	{
	"epoch": 0.5712,
	"grad_norm": 0.20814883708953857,
	"learning_rate": 8.580480000000001e-05,
	"loss": 0.8306,
	"step": 35700
	},
	{
	"epoch": 0.57152,
	"grad_norm": 0.22327591478824615,
	"learning_rate": 8.57408e-05,
	"loss": 0.8366,
	"step": 35720
	},
	{
	"epoch": 0.57184,
	"grad_norm": 0.2438846230506897,
	"learning_rate": 8.56768e-05,
	"loss": 0.8424,
	"step": 35740
	},
	{
	"epoch": 0.57216,
	"grad_norm": 0.20804153382778168,
	"learning_rate": 8.561280000000001e-05,
	"loss": 0.8852,
	"step": 35760
	},
	{
	"epoch": 0.57248,
	"grad_norm": 0.22227492928504944,
	"learning_rate": 8.55488e-05,
	"loss": 0.8586,
	"step": 35780
	},
	{
	"epoch": 0.5728,
	"grad_norm": 0.28952184319496155,
	"learning_rate": 8.548480000000001e-05,
	"loss": 0.9095,
	"step": 35800
	},
	{
	"epoch": 0.57312,
	"grad_norm": 0.22480730712413788,
	"learning_rate": 8.542080000000001e-05,
	"loss": 0.8834,
	"step": 35820
	},
	{
	"epoch": 0.57344,
	"grad_norm": 0.19908693432807922,
	"learning_rate": 8.53568e-05,
	"loss": 0.84,
	"step": 35840
	},
	{
	"epoch": 0.57376,
	"grad_norm": 0.24293170869350433,
	"learning_rate": 8.529280000000001e-05,
	"loss": 0.9019,
	"step": 35860
	},
	{
	"epoch": 0.57408,
	"grad_norm": 0.1839456409215927,
	"learning_rate": 8.52288e-05,
	"loss": 0.923,
	"step": 35880
	},
	{
	"epoch": 0.5744,
	"grad_norm": 0.23249760270118713,
	"learning_rate": 8.51648e-05,
	"loss": 0.8667,
	"step": 35900
	},
	{
	"epoch": 0.57472,
	"grad_norm": 0.2784735858440399,
	"learning_rate": 8.51008e-05,
	"loss": 0.9178,
	"step": 35920
	},
	{
	"epoch": 0.57504,
	"grad_norm": 0.23797404766082764,
	"learning_rate": 8.50368e-05,
	"loss": 0.9291,
	"step": 35940
	},
	{
	"epoch": 0.57536,
	"grad_norm": 0.241110160946846,
	"learning_rate": 8.497280000000001e-05,
	"loss": 0.8447,
	"step": 35960
	},
	{
	"epoch": 0.57568,
	"grad_norm": 0.2260485291481018,
	"learning_rate": 8.49088e-05,
	"loss": 0.8532,
	"step": 35980
	},
	{
	"epoch": 0.576,
	"grad_norm": 0.22092276811599731,
	"learning_rate": 8.48448e-05,
	"loss": 0.8768,
	"step": 36000
	},
	{
	"epoch": 0.57632,
	"grad_norm": 0.2721387445926666,
	"learning_rate": 8.47808e-05,
	"loss": 0.863,
	"step": 36020
	},
	{
	"epoch": 0.57664,
	"grad_norm": 0.21206919848918915,
	"learning_rate": 8.471680000000001e-05,
	"loss": 0.8636,
	"step": 36040
	},
	{
	"epoch": 0.57696,
	"grad_norm": 0.23496432602405548,
	"learning_rate": 8.46528e-05,
	"loss": 0.8745,
	"step": 36060
	},
	{
	"epoch": 0.57728,
	"grad_norm": 0.2214774489402771,
	"learning_rate": 8.45888e-05,
	"loss": 0.873,
	"step": 36080
	},
	{
	"epoch": 0.5776,
	"grad_norm": 0.2665559649467468,
	"learning_rate": 8.452480000000001e-05,
	"loss": 0.9061,
	"step": 36100
	},
	{
	"epoch": 0.57792,
	"grad_norm": 0.25471359491348267,
	"learning_rate": 8.44608e-05,
	"loss": 0.8592,
	"step": 36120
	},
	{
	"epoch": 0.57824,
	"grad_norm": 0.24169199168682098,
	"learning_rate": 8.439680000000001e-05,
	"loss": 0.8867,
	"step": 36140
	},
	{
	"epoch": 0.57856,
	"grad_norm": 0.25281229615211487,
	"learning_rate": 8.433280000000001e-05,
	"loss": 0.8702,
	"step": 36160
	},
	{
	"epoch": 0.57888,
	"grad_norm": 0.2565683424472809,
	"learning_rate": 8.42688e-05,
	"loss": 0.8812,
	"step": 36180
	},
	{
	"epoch": 0.5792,
	"grad_norm": 0.2228873074054718,
	"learning_rate": 8.420480000000001e-05,
	"loss": 0.8556,
	"step": 36200
	},
	{
	"epoch": 0.57952,
	"grad_norm": 0.23793266713619232,
	"learning_rate": 8.41408e-05,
	"loss": 0.8878,
	"step": 36220
	},
	{
	"epoch": 0.57984,
	"grad_norm": 0.24681511521339417,
	"learning_rate": 8.40768e-05,
	"loss": 0.9124,
	"step": 36240
	},
	{
	"epoch": 0.58016,
	"grad_norm": 0.2208309918642044,
	"learning_rate": 8.40128e-05,
	"loss": 0.8988,
	"step": 36260
	},
	{
	"epoch": 0.58048,
	"grad_norm": 0.2527472972869873,
	"learning_rate": 8.39488e-05,
	"loss": 0.886,
	"step": 36280
	},
	{
	"epoch": 0.5808,
	"grad_norm": 0.23893190920352936,
	"learning_rate": 8.388480000000001e-05,
	"loss": 0.8487,
	"step": 36300
	},
	{
	"epoch": 0.58112,
	"grad_norm": 0.22206924855709076,
	"learning_rate": 8.38208e-05,
	"loss": 0.9371,
	"step": 36320
	},
	{
	"epoch": 0.58144,
	"grad_norm": 0.28712591528892517,
	"learning_rate": 8.37568e-05,
	"loss": 0.8959,
	"step": 36340
	},
	{
	"epoch": 0.58176,
	"grad_norm": 0.20586076378822327,
	"learning_rate": 8.36928e-05,
	"loss": 0.8394,
	"step": 36360
	},
	{
	"epoch": 0.58208,
	"grad_norm": 0.20877981185913086,
	"learning_rate": 8.362880000000001e-05,
	"loss": 0.8793,
	"step": 36380
	},
	{
	"epoch": 0.5824,
	"grad_norm": 0.2150806486606598,
	"learning_rate": 8.35648e-05,
	"loss": 0.8575,
	"step": 36400
	},
	{
	"epoch": 0.58272,
	"grad_norm": 0.2607544958591461,
	"learning_rate": 8.35008e-05,
	"loss": 0.8492,
	"step": 36420
	},
	{
	"epoch": 0.58304,
	"grad_norm": 0.21660766005516052,
	"learning_rate": 8.343680000000001e-05,
	"loss": 0.8599,
	"step": 36440
	},
	{
	"epoch": 0.58336,
	"grad_norm": 0.19799566268920898,
	"learning_rate": 8.33728e-05,
	"loss": 0.8754,
	"step": 36460
	},
	{
	"epoch": 0.58368,
	"grad_norm": 0.2376536726951599,
	"learning_rate": 8.330880000000001e-05,
	"loss": 0.8645,
	"step": 36480
	},
	{
	"epoch": 0.584,
	"grad_norm": 0.2775854766368866,
	"learning_rate": 8.32448e-05,
	"loss": 0.8341,
	"step": 36500
	},
	{
	"epoch": 0.58432,
	"grad_norm": 0.2808210849761963,
	"learning_rate": 8.31808e-05,
	"loss": 0.9145,
	"step": 36520
	},
	{
	"epoch": 0.58464,
	"grad_norm": 0.2942677140235901,
	"learning_rate": 8.311680000000001e-05,
	"loss": 0.8645,
	"step": 36540
	},
	{
	"epoch": 0.58496,
	"grad_norm": 0.24506719410419464,
	"learning_rate": 8.30528e-05,
	"loss": 0.9266,
	"step": 36560
	},
	{
	"epoch": 0.58528,
	"grad_norm": 0.23690511286258698,
	"learning_rate": 8.29888e-05,
	"loss": 0.8701,
	"step": 36580
	},
	{
	"epoch": 0.5856,
	"grad_norm": 0.2386239618062973,
	"learning_rate": 8.29248e-05,
	"loss": 0.894,
	"step": 36600
	},
	{
	"epoch": 0.58592,
	"grad_norm": 0.23407141864299774,
	"learning_rate": 8.28608e-05,
	"loss": 0.8795,
	"step": 36620
	},
	{
	"epoch": 0.58624,
	"grad_norm": 0.2837297320365906,
	"learning_rate": 8.279680000000001e-05,
	"loss": 0.8844,
	"step": 36640
	},
	{
	"epoch": 0.58656,
	"grad_norm": 0.18929681181907654,
	"learning_rate": 8.27328e-05,
	"loss": 0.881,
	"step": 36660
	},
	{
	"epoch": 0.58688,
	"grad_norm": 0.2538854479789734,
	"learning_rate": 8.26688e-05,
	"loss": 0.8326,
	"step": 36680
	},
	{
	"epoch": 0.5872,
	"grad_norm": 0.23304085433483124,
	"learning_rate": 8.26048e-05,
	"loss": 0.8535,
	"step": 36700
	},
	{
	"epoch": 0.58752,
	"grad_norm": 0.2668757736682892,
	"learning_rate": 8.254080000000001e-05,
	"loss": 0.9134,
	"step": 36720
	},
	{
	"epoch": 0.58784,
	"grad_norm": 0.2611462473869324,
	"learning_rate": 8.24768e-05,
	"loss": 0.865,
	"step": 36740
	},
	{
	"epoch": 0.58816,
	"grad_norm": 0.20947878062725067,
	"learning_rate": 8.24128e-05,
	"loss": 0.8719,
	"step": 36760
	},
	{
	"epoch": 0.58848,
	"grad_norm": 0.23722214996814728,
	"learning_rate": 8.234880000000001e-05,
	"loss": 0.8801,
	"step": 36780
	},
	{
	"epoch": 0.5888,
	"grad_norm": 0.2517217993736267,
	"learning_rate": 8.22848e-05,
	"loss": 0.929,
	"step": 36800
	},
	{
	"epoch": 0.58912,
	"grad_norm": 0.21423856914043427,
	"learning_rate": 8.222080000000001e-05,
	"loss": 0.8452,
	"step": 36820
	},
	{
	"epoch": 0.58944,
	"grad_norm": 0.224490687251091,
	"learning_rate": 8.21568e-05,
	"loss": 0.8617,
	"step": 36840
	},
	{
	"epoch": 0.58976,
	"grad_norm": 0.1989527940750122,
	"learning_rate": 8.20928e-05,
	"loss": 0.9057,
	"step": 36860
	},
	{
	"epoch": 0.59008,
	"grad_norm": 0.20397096872329712,
	"learning_rate": 8.202880000000001e-05,
	"loss": 0.8504,
	"step": 36880
	},
	{
	"epoch": 0.5904,
	"grad_norm": 0.22488151490688324,
	"learning_rate": 8.19648e-05,
	"loss": 0.8383,
	"step": 36900
	},
	{
	"epoch": 0.59072,
	"grad_norm": 0.23070698976516724,
	"learning_rate": 8.19008e-05,
	"loss": 0.8249,
	"step": 36920
	},
	{
	"epoch": 0.59104,
	"grad_norm": 0.23242853581905365,
	"learning_rate": 8.183680000000001e-05,
	"loss": 0.8153,
	"step": 36940
	},
	{
	"epoch": 0.59136,
	"grad_norm": 0.2361801415681839,
	"learning_rate": 8.17728e-05,
	"loss": 0.913,
	"step": 36960
	},
	{
	"epoch": 0.59168,
	"grad_norm": 0.20463335514068604,
	"learning_rate": 8.170880000000001e-05,
	"loss": 0.8239,
	"step": 36980
	},
	{
	"epoch": 0.592,
	"grad_norm": 0.2715516984462738,
	"learning_rate": 8.16448e-05,
	"loss": 0.8807,
	"step": 37000
	},
	{
	"epoch": 0.59232,
	"grad_norm": 0.260455459356308,
	"learning_rate": 8.15808e-05,
	"loss": 0.874,
	"step": 37020
	},
	{
	"epoch": 0.59264,
	"grad_norm": 0.23766261339187622,
	"learning_rate": 8.15168e-05,
	"loss": 0.8706,
	"step": 37040
	},
	{
	"epoch": 0.59296,
	"grad_norm": 0.2649995684623718,
	"learning_rate": 8.145280000000001e-05,
	"loss": 0.8938,
	"step": 37060
	},
	{
	"epoch": 0.59328,
	"grad_norm": 0.20467999577522278,
	"learning_rate": 8.13888e-05,
	"loss": 0.8686,
	"step": 37080
	},
	{
	"epoch": 0.5936,
	"grad_norm": 0.2401207685470581,
	"learning_rate": 8.13248e-05,
	"loss": 0.8919,
	"step": 37100
	},
	{
	"epoch": 0.59392,
	"grad_norm": 0.2349073439836502,
	"learning_rate": 8.12608e-05,
	"loss": 0.8998,
	"step": 37120
	},
	{
	"epoch": 0.59424,
	"grad_norm": 0.22856348752975464,
	"learning_rate": 8.11968e-05,
	"loss": 0.8892,
	"step": 37140
	},
	{
	"epoch": 0.59456,
	"grad_norm": 0.22828173637390137,
	"learning_rate": 8.113280000000001e-05,
	"loss": 0.8411,
	"step": 37160
	},
	{
	"epoch": 0.59488,
	"grad_norm": 0.21084712445735931,
	"learning_rate": 8.10688e-05,
	"loss": 0.8682,
	"step": 37180
	},
	{
	"epoch": 0.5952,
	"grad_norm": 0.2080162763595581,
	"learning_rate": 8.10048e-05,
	"loss": 0.9049,
	"step": 37200
	},
	{
	"epoch": 0.59552,
	"grad_norm": 0.2635326385498047,
	"learning_rate": 8.094080000000001e-05,
	"loss": 0.8978,
	"step": 37220
	},
	{
	"epoch": 0.59584,
	"grad_norm": 0.2089320570230484,
	"learning_rate": 8.08768e-05,
	"loss": 0.9324,
	"step": 37240
	},
	{
	"epoch": 0.59616,
	"grad_norm": 0.2738167643547058,
	"learning_rate": 8.08128e-05,
	"loss": 0.8635,
	"step": 37260
	},
	{
	"epoch": 0.59648,
	"grad_norm": 0.19724565744400024,
	"learning_rate": 8.074880000000001e-05,
	"loss": 0.8171,
	"step": 37280
	},
	{
	"epoch": 0.5968,
	"grad_norm": 0.208595871925354,
	"learning_rate": 8.06848e-05,
	"loss": 0.8812,
	"step": 37300
	},
	{
	"epoch": 0.59712,
	"grad_norm": 0.22307556867599487,
	"learning_rate": 8.062080000000001e-05,
	"loss": 0.8931,
	"step": 37320
	},
	{
	"epoch": 0.59744,
	"grad_norm": 0.23288871347904205,
	"learning_rate": 8.05568e-05,
	"loss": 0.8764,
	"step": 37340
	},
	{
	"epoch": 0.59776,
	"grad_norm": 0.228603333234787,
	"learning_rate": 8.04928e-05,
	"loss": 0.8742,
	"step": 37360
	},
	{
	"epoch": 0.59808,
	"grad_norm": 0.2657196521759033,
	"learning_rate": 8.04288e-05,
	"loss": 0.8955,
	"step": 37380
	},
	{
	"epoch": 0.5984,
	"grad_norm": 0.21586276590824127,
	"learning_rate": 8.036480000000001e-05,
	"loss": 0.8867,
	"step": 37400
	},
	{
	"epoch": 0.59872,
	"grad_norm": 0.24376487731933594,
	"learning_rate": 8.030080000000002e-05,
	"loss": 0.8941,
	"step": 37420
	},
	{
	"epoch": 0.59904,
	"grad_norm": 0.24991095066070557,
	"learning_rate": 8.02368e-05,
	"loss": 0.8832,
	"step": 37440
	},
	{
	"epoch": 0.59936,
	"grad_norm": 0.2291015237569809,
	"learning_rate": 8.01728e-05,
	"loss": 0.8427,
	"step": 37460
	},
	{
	"epoch": 0.59968,
	"grad_norm": 0.2516835331916809,
	"learning_rate": 8.01088e-05,
	"loss": 0.8526,
	"step": 37480
	},
	{
	"epoch": 0.6,
	"grad_norm": 0.2229020595550537,
	"learning_rate": 8.004480000000001e-05,
	"loss": 0.8766,
	"step": 37500
	},
	{
	"epoch": 0.60032,
	"grad_norm": 0.24501658976078033,
	"learning_rate": 7.99808e-05,
	"loss": 0.9391,
	"step": 37520
	},
	{
	"epoch": 0.60064,
	"grad_norm": 0.21306291222572327,
	"learning_rate": 7.99168e-05,
	"loss": 0.8318,
	"step": 37540
	},
	{
	"epoch": 0.60096,
	"grad_norm": 0.28489160537719727,
	"learning_rate": 7.985280000000001e-05,
	"loss": 0.8845,
	"step": 37560
	},
	{
	"epoch": 0.60128,
	"grad_norm": 0.24311847984790802,
	"learning_rate": 7.97888e-05,
	"loss": 0.8619,
	"step": 37580
	},
	{
	"epoch": 0.6016,
	"grad_norm": 0.2386752963066101,
	"learning_rate": 7.97248e-05,
	"loss": 0.8554,
	"step": 37600
	},
	{
	"epoch": 0.60192,
	"grad_norm": 0.22845840454101562,
	"learning_rate": 7.966080000000001e-05,
	"loss": 0.8505,
	"step": 37620
	},
	{
	"epoch": 0.60224,
	"grad_norm": 0.21545352041721344,
	"learning_rate": 7.95968e-05,
	"loss": 0.8725,
	"step": 37640
	},
	{
	"epoch": 0.60256,
	"grad_norm": 0.24536795914173126,
	"learning_rate": 7.953280000000001e-05,
	"loss": 0.9144,
	"step": 37660
	},
	{
	"epoch": 0.60288,
	"grad_norm": 0.29689520597457886,
	"learning_rate": 7.94688e-05,
	"loss": 0.8949,
	"step": 37680
	},
	{
	"epoch": 0.6032,
	"grad_norm": 0.22102287411689758,
	"learning_rate": 7.94048e-05,
	"loss": 0.8847,
	"step": 37700
	},
	{
	"epoch": 0.60352,
	"grad_norm": 0.2358902543783188,
	"learning_rate": 7.93408e-05,
	"loss": 0.8841,
	"step": 37720
	},
	{
	"epoch": 0.60384,
	"grad_norm": 0.23542214930057526,
	"learning_rate": 7.92768e-05,
	"loss": 0.8899,
	"step": 37740
	},
	{
	"epoch": 0.60416,
	"grad_norm": 0.23006409406661987,
	"learning_rate": 7.921280000000002e-05,
	"loss": 0.824,
	"step": 37760
	},
	{
	"epoch": 0.60448,
	"grad_norm": 0.29189014434814453,
	"learning_rate": 7.91488e-05,
	"loss": 0.9226,
	"step": 37780
	},
	{
	"epoch": 0.6048,
	"grad_norm": 0.23212820291519165,
	"learning_rate": 7.90848e-05,
	"loss": 0.883,
	"step": 37800
	},
	{
	"epoch": 0.60512,
	"grad_norm": 0.2882430851459503,
	"learning_rate": 7.90208e-05,
	"loss": 0.8327,
	"step": 37820
	},
	{
	"epoch": 0.60544,
	"grad_norm": 0.21909599006175995,
	"learning_rate": 7.895680000000001e-05,
	"loss": 0.8446,
	"step": 37840
	},
	{
	"epoch": 0.60576,
	"grad_norm": 0.23385344445705414,
	"learning_rate": 7.88928e-05,
	"loss": 0.8822,
	"step": 37860
	},
	{
	"epoch": 0.60608,
	"grad_norm": 0.23137474060058594,
	"learning_rate": 7.88288e-05,
	"loss": 0.8669,
	"step": 37880
	},
	{
	"epoch": 0.6064,
	"grad_norm": 0.234337717294693,
	"learning_rate": 7.876480000000001e-05,
	"loss": 0.9464,
	"step": 37900
	},
	{
	"epoch": 0.60672,
	"grad_norm": 0.19100037217140198,
	"learning_rate": 7.87008e-05,
	"loss": 0.8538,
	"step": 37920
	},
	{
	"epoch": 0.60704,
	"grad_norm": 0.231523334980011,
	"learning_rate": 7.86368e-05,
	"loss": 0.8869,
	"step": 37940
	},
	{
	"epoch": 0.60736,
	"grad_norm": 0.19973509013652802,
	"learning_rate": 7.857280000000001e-05,
	"loss": 0.8765,
	"step": 37960
	},
	{
	"epoch": 0.60768,
	"grad_norm": 0.2176535725593567,
	"learning_rate": 7.85088e-05,
	"loss": 0.864,
	"step": 37980
	},
	{
	"epoch": 0.608,
	"grad_norm": 0.2557581961154938,
	"learning_rate": 7.844480000000001e-05,
	"loss": 0.8782,
	"step": 38000
	},
	{
	"epoch": 0.60832,
	"grad_norm": 0.273478627204895,
	"learning_rate": 7.83808e-05,
	"loss": 0.9086,
	"step": 38020
	},
	{
	"epoch": 0.60864,
	"grad_norm": 0.26042431592941284,
	"learning_rate": 7.83168e-05,
	"loss": 0.8562,
	"step": 38040
	},
	{
	"epoch": 0.60896,
	"grad_norm": 0.23801887035369873,
	"learning_rate": 7.82528e-05,
	"loss": 0.9028,
	"step": 38060
	},
	{
	"epoch": 0.60928,
	"grad_norm": 0.22711488604545593,
	"learning_rate": 7.81888e-05,
	"loss": 0.9305,
	"step": 38080
	},
	{
	"epoch": 0.6096,
	"grad_norm": 0.19922931492328644,
	"learning_rate": 7.812480000000001e-05,
	"loss": 0.8843,
	"step": 38100
	},
	{
	"epoch": 0.60992,
	"grad_norm": 0.20743116736412048,
	"learning_rate": 7.80608e-05,
	"loss": 0.8837,
	"step": 38120
	},
	{
	"epoch": 0.61024,
	"grad_norm": 0.2508273422718048,
	"learning_rate": 7.79968e-05,
	"loss": 0.8681,
	"step": 38140
	},
	{
	"epoch": 0.61056,
	"grad_norm": 0.24501731991767883,
	"learning_rate": 7.79328e-05,
	"loss": 0.8787,
	"step": 38160
	},
	{
	"epoch": 0.61088,
	"grad_norm": 0.258413702249527,
	"learning_rate": 7.786880000000001e-05,
	"loss": 0.9128,
	"step": 38180
	},
	{
	"epoch": 0.6112,
	"grad_norm": 0.22945356369018555,
	"learning_rate": 7.78048e-05,
	"loss": 0.8835,
	"step": 38200
	},
	{
	"epoch": 0.61152,
	"grad_norm": 0.2546984553337097,
	"learning_rate": 7.77408e-05,
	"loss": 0.8806,
	"step": 38220
	},
	{
	"epoch": 0.61184,
	"grad_norm": 0.2060491144657135,
	"learning_rate": 7.767680000000001e-05,
	"loss": 0.8551,
	"step": 38240
	},
	{
	"epoch": 0.61216,
	"grad_norm": 0.21388421952724457,
	"learning_rate": 7.76128e-05,
	"loss": 0.8576,
	"step": 38260
	},
	{
	"epoch": 0.61248,
	"grad_norm": 0.22443564236164093,
	"learning_rate": 7.75488e-05,
	"loss": 0.8807,
	"step": 38280
	},
	{
	"epoch": 0.6128,
	"grad_norm": 0.24119791388511658,
	"learning_rate": 7.748480000000001e-05,
	"loss": 0.9135,
	"step": 38300
	},
	{
	"epoch": 0.61312,
	"grad_norm": 0.22703927755355835,
	"learning_rate": 7.74208e-05,
	"loss": 0.9121,
	"step": 38320
	},
	{
	"epoch": 0.61344,
	"grad_norm": 0.23478037118911743,
	"learning_rate": 7.735680000000001e-05,
	"loss": 0.9024,
	"step": 38340
	},
	{
	"epoch": 0.61376,
	"grad_norm": 0.22331403195858002,
	"learning_rate": 7.729279999999999e-05,
	"loss": 0.9034,
	"step": 38360
	},
	{
	"epoch": 0.61408,
	"grad_norm": 0.22578079998493195,
	"learning_rate": 7.72288e-05,
	"loss": 0.855,
	"step": 38380
	},
	{
	"epoch": 0.6144,
	"grad_norm": 0.22246988117694855,
	"learning_rate": 7.716480000000001e-05,
	"loss": 0.8515,
	"step": 38400
	},
	{
	"epoch": 0.61472,
	"grad_norm": 0.20506010949611664,
	"learning_rate": 7.71008e-05,
	"loss": 0.8626,
	"step": 38420
	},
	{
	"epoch": 0.61504,
	"grad_norm": 0.23380227386951447,
	"learning_rate": 7.703680000000001e-05,
	"loss": 0.8963,
	"step": 38440
	},
	{
	"epoch": 0.61536,
	"grad_norm": 0.2194732278585434,
	"learning_rate": 7.69728e-05,
	"loss": 0.835,
	"step": 38460
	},
	{
	"epoch": 0.61568,
	"grad_norm": 0.20710738003253937,
	"learning_rate": 7.69088e-05,
	"loss": 0.8783,
	"step": 38480
	},
	{
	"epoch": 0.616,
	"grad_norm": 0.23248372972011566,
	"learning_rate": 7.68448e-05,
	"loss": 0.8603,
	"step": 38500
	},
	{
	"epoch": 0.61632,
	"grad_norm": 0.21198534965515137,
	"learning_rate": 7.678080000000001e-05,
	"loss": 0.8963,
	"step": 38520
	},
	{
	"epoch": 0.61664,
	"grad_norm": 0.2391924113035202,
	"learning_rate": 7.67168e-05,
	"loss": 0.8964,
	"step": 38540
	},
	{
	"epoch": 0.61696,
	"grad_norm": 0.21630340814590454,
	"learning_rate": 7.66528e-05,
	"loss": 0.8468,
	"step": 38560
	},
	{
	"epoch": 0.61728,
	"grad_norm": 0.22936932742595673,
	"learning_rate": 7.658880000000001e-05,
	"loss": 0.8638,
	"step": 38580
	},
	{
	"epoch": 0.6176,
	"grad_norm": 0.15402059257030487,
	"learning_rate": 7.65248e-05,
	"loss": 0.8881,
	"step": 38600
	},
	{
	"epoch": 0.61792,
	"grad_norm": 0.18893806636333466,
	"learning_rate": 7.64608e-05,
	"loss": 0.9137,
	"step": 38620
	},
	{
	"epoch": 0.61824,
	"grad_norm": 0.19027528166770935,
	"learning_rate": 7.639680000000001e-05,
	"loss": 0.8402,
	"step": 38640
	},
	{
	"epoch": 0.61856,
	"grad_norm": 0.2610226571559906,
	"learning_rate": 7.63328e-05,
	"loss": 0.8644,
	"step": 38660
	},
	{
	"epoch": 0.61888,
	"grad_norm": 0.22377745807170868,
	"learning_rate": 7.626880000000001e-05,
	"loss": 0.8621,
	"step": 38680
	},
	{
	"epoch": 0.6192,
	"grad_norm": 0.2550973892211914,
	"learning_rate": 7.620479999999999e-05,
	"loss": 0.8982,
	"step": 38700
	},
	{
	"epoch": 0.61952,
	"grad_norm": 0.21758711338043213,
	"learning_rate": 7.61408e-05,
	"loss": 0.8616,
	"step": 38720
	},
	{
	"epoch": 0.61984,
	"grad_norm": 0.2632993459701538,
	"learning_rate": 7.607680000000001e-05,
	"loss": 0.8565,
	"step": 38740
	},
	{
	"epoch": 0.62016,
	"grad_norm": 0.26186972856521606,
	"learning_rate": 7.60128e-05,
	"loss": 0.883,
	"step": 38760
	},
	{
	"epoch": 0.62048,
	"grad_norm": 0.22264538705348969,
	"learning_rate": 7.594880000000001e-05,
	"loss": 0.8625,
	"step": 38780
	},
	{
	"epoch": 0.6208,
	"grad_norm": 0.2717147171497345,
	"learning_rate": 7.58848e-05,
	"loss": 0.9014,
	"step": 38800
	},
	{
	"epoch": 0.62112,
	"grad_norm": 0.2638401687145233,
	"learning_rate": 7.58208e-05,
	"loss": 0.933,
	"step": 38820
	},
	{
	"epoch": 0.62144,
	"grad_norm": 0.20931828022003174,
	"learning_rate": 7.57568e-05,
	"loss": 0.8697,
	"step": 38840
	},
	{
	"epoch": 0.62176,
	"grad_norm": 0.22345200181007385,
	"learning_rate": 7.569280000000001e-05,
	"loss": 0.8404,
	"step": 38860
	},
	{
	"epoch": 0.62208,
	"grad_norm": 0.21052759885787964,
	"learning_rate": 7.56288e-05,
	"loss": 0.8458,
	"step": 38880
	},
	{
	"epoch": 0.6224,
	"grad_norm": 0.24614858627319336,
	"learning_rate": 7.55648e-05,
	"loss": 0.9028,
	"step": 38900
	},
	{
	"epoch": 0.62272,
	"grad_norm": 0.276304692029953,
	"learning_rate": 7.550080000000001e-05,
	"loss": 0.9097,
	"step": 38920
	},
	{
	"epoch": 0.62304,
	"grad_norm": 0.27636557817459106,
	"learning_rate": 7.54368e-05,
	"loss": 0.8969,
	"step": 38940
	},
	{
	"epoch": 0.62336,
	"grad_norm": 0.2127619832754135,
	"learning_rate": 7.53728e-05,
	"loss": 0.9,
	"step": 38960
	},
	{
	"epoch": 0.62368,
	"grad_norm": 0.23187151551246643,
	"learning_rate": 7.53088e-05,
	"loss": 0.9081,
	"step": 38980
	},
	{
	"epoch": 0.624,
	"grad_norm": 0.2539467215538025,
	"learning_rate": 7.52448e-05,
	"loss": 0.8887,
	"step": 39000
	},
	{
	"epoch": 0.62432,
	"grad_norm": 0.30993226170539856,
	"learning_rate": 7.518080000000001e-05,
	"loss": 0.9242,
	"step": 39020
	},
	{
	"epoch": 0.62464,
	"grad_norm": 0.18882299959659576,
	"learning_rate": 7.511679999999999e-05,
	"loss": 0.8939,
	"step": 39040
	},
	{
	"epoch": 0.62496,
	"grad_norm": 0.21862895786762238,
	"learning_rate": 7.50528e-05,
	"loss": 0.8707,
	"step": 39060
	},
	{
	"epoch": 0.62528,
	"grad_norm": 0.26119473576545715,
	"learning_rate": 7.498880000000001e-05,
	"loss": 0.8699,
	"step": 39080
	},
	{
	"epoch": 0.6256,
	"grad_norm": 0.24526222050189972,
	"learning_rate": 7.49248e-05,
	"loss": 0.8787,
	"step": 39100
	},
	{
	"epoch": 0.62592,
	"grad_norm": 0.2376752644777298,
	"learning_rate": 7.486080000000001e-05,
	"loss": 0.9359,
	"step": 39120
	},
	{
	"epoch": 0.62624,
	"grad_norm": 0.27865490317344666,
	"learning_rate": 7.47968e-05,
	"loss": 0.8842,
	"step": 39140
	},
	{
	"epoch": 0.62656,
	"grad_norm": 0.22411134839057922,
	"learning_rate": 7.47328e-05,
	"loss": 0.9039,
	"step": 39160
	},
	{
	"epoch": 0.62688,
	"grad_norm": 0.24417252838611603,
	"learning_rate": 7.46688e-05,
	"loss": 0.915,
	"step": 39180
	},
	{
	"epoch": 0.6272,
	"grad_norm": 0.21417266130447388,
	"learning_rate": 7.460480000000001e-05,
	"loss": 0.8408,
	"step": 39200
	},
	{
	"epoch": 0.62752,
	"grad_norm": 0.21754087507724762,
	"learning_rate": 7.45408e-05,
	"loss": 0.8663,
	"step": 39220
	},
	{
	"epoch": 0.62784,
	"grad_norm": 0.2340565174818039,
	"learning_rate": 7.44768e-05,
	"loss": 0.8353,
	"step": 39240
	},
	{
	"epoch": 0.62816,
	"grad_norm": 0.25035715103149414,
	"learning_rate": 7.441280000000001e-05,
	"loss": 0.8729,
	"step": 39260
	},
	{
	"epoch": 0.62848,
	"grad_norm": 0.2600226104259491,
	"learning_rate": 7.43488e-05,
	"loss": 0.8794,
	"step": 39280
	},
	{
	"epoch": 0.6288,
	"grad_norm": 0.27380433678627014,
	"learning_rate": 7.42848e-05,
	"loss": 0.8578,
	"step": 39300
	},
	{
	"epoch": 0.62912,
	"grad_norm": 0.18544994294643402,
	"learning_rate": 7.42208e-05,
	"loss": 0.9026,
	"step": 39320
	},
	{
	"epoch": 0.62944,
	"grad_norm": 0.23277778923511505,
	"learning_rate": 7.41568e-05,
	"loss": 0.8759,
	"step": 39340
	},
	{
	"epoch": 0.62976,
	"grad_norm": 0.21839508414268494,
	"learning_rate": 7.409280000000001e-05,
	"loss": 0.884,
	"step": 39360
	},
	{
	"epoch": 0.63008,
	"grad_norm": 0.20547005534172058,
	"learning_rate": 7.402879999999999e-05,
	"loss": 0.8755,
	"step": 39380
	},
	{
	"epoch": 0.6304,
	"grad_norm": 0.21153508126735687,
	"learning_rate": 7.39648e-05,
	"loss": 0.8551,
	"step": 39400
	},
	{
	"epoch": 0.63072,
	"grad_norm": 0.22079287469387054,
	"learning_rate": 7.390080000000001e-05,
	"loss": 0.8424,
	"step": 39420
	},
	{
	"epoch": 0.63104,
	"grad_norm": 0.25068333745002747,
	"learning_rate": 7.38368e-05,
	"loss": 0.893,
	"step": 39440
	},
	{
	"epoch": 0.63136,
	"grad_norm": 0.18888238072395325,
	"learning_rate": 7.377280000000001e-05,
	"loss": 0.8803,
	"step": 39460
	},
	{
	"epoch": 0.63168,
	"grad_norm": 0.258759081363678,
	"learning_rate": 7.37088e-05,
	"loss": 0.921,
	"step": 39480
	},
	{
	"epoch": 0.632,
	"grad_norm": 0.22620119154453278,
	"learning_rate": 7.36448e-05,
	"loss": 0.8552,
	"step": 39500
	},
	{
	"epoch": 0.63232,
	"grad_norm": 0.2477254718542099,
	"learning_rate": 7.35808e-05,
	"loss": 0.8884,
	"step": 39520
	},
	{
	"epoch": 0.63264,
	"grad_norm": 0.23816423118114471,
	"learning_rate": 7.351680000000001e-05,
	"loss": 0.8148,
	"step": 39540
	},
	{
	"epoch": 0.63296,
	"grad_norm": 0.19741494953632355,
	"learning_rate": 7.34528e-05,
	"loss": 0.8668,
	"step": 39560
	},
	{
	"epoch": 0.63328,
	"grad_norm": 0.25120246410369873,
	"learning_rate": 7.33888e-05,
	"loss": 0.9149,
	"step": 39580
	},
	{
	"epoch": 0.6336,
	"grad_norm": 0.21695558726787567,
	"learning_rate": 7.33248e-05,
	"loss": 0.8762,
	"step": 39600
	},
	{
	"epoch": 0.63392,
	"grad_norm": 0.2556038498878479,
	"learning_rate": 7.32608e-05,
	"loss": 0.8438,
	"step": 39620
	},
	{
	"epoch": 0.63424,
	"grad_norm": 0.265425443649292,
	"learning_rate": 7.31968e-05,
	"loss": 0.913,
	"step": 39640
	},
	{
	"epoch": 0.63456,
	"grad_norm": 0.23394323885440826,
	"learning_rate": 7.31328e-05,
	"loss": 0.8572,
	"step": 39660
	},
	{
	"epoch": 0.63488,
	"grad_norm": 0.25647103786468506,
	"learning_rate": 7.30688e-05,
	"loss": 0.9088,
	"step": 39680
	},
	{
	"epoch": 0.6352,
	"grad_norm": 0.2558782994747162,
	"learning_rate": 7.300480000000001e-05,
	"loss": 0.887,
	"step": 39700
	},
	{
	"epoch": 0.63552,
	"grad_norm": 0.23038125038146973,
	"learning_rate": 7.29408e-05,
	"loss": 0.8505,
	"step": 39720
	},
	{
	"epoch": 0.63584,
	"grad_norm": 0.22082971036434174,
	"learning_rate": 7.28768e-05,
	"loss": 0.8159,
	"step": 39740
	},
	{
	"epoch": 0.63616,
	"grad_norm": 0.2407037317752838,
	"learning_rate": 7.281280000000001e-05,
	"loss": 0.8703,
	"step": 39760
	},
	{
	"epoch": 0.63648,
	"grad_norm": 0.2498258352279663,
	"learning_rate": 7.27488e-05,
	"loss": 0.902,
	"step": 39780
	},
	{
	"epoch": 0.6368,
	"grad_norm": 0.212127685546875,
	"learning_rate": 7.268480000000001e-05,
	"loss": 0.8748,
	"step": 39800
	},
	{
	"epoch": 0.63712,
	"grad_norm": 0.2286374717950821,
	"learning_rate": 7.26208e-05,
	"loss": 0.8932,
	"step": 39820
	},
	{
	"epoch": 0.63744,
	"grad_norm": 0.23190194368362427,
	"learning_rate": 7.25568e-05,
	"loss": 0.8772,
	"step": 39840
	},
	{
	"epoch": 0.63776,
	"grad_norm": 0.2264523208141327,
	"learning_rate": 7.249280000000001e-05,
	"loss": 0.8824,
	"step": 39860
	},
	{
	"epoch": 0.63808,
	"grad_norm": 0.267228901386261,
	"learning_rate": 7.242880000000001e-05,
	"loss": 0.808,
	"step": 39880
	},
	{
	"epoch": 0.6384,
	"grad_norm": 0.25287920236587524,
	"learning_rate": 7.23648e-05,
	"loss": 0.8901,
	"step": 39900
	},
	{
	"epoch": 0.63872,
	"grad_norm": 0.22771823406219482,
	"learning_rate": 7.23008e-05,
	"loss": 0.9111,
	"step": 39920
	},
	{
	"epoch": 0.63904,
	"grad_norm": 0.20720939338207245,
	"learning_rate": 7.22368e-05,
	"loss": 0.89,
	"step": 39940
	},
	{
	"epoch": 0.63936,
	"grad_norm": 0.24383579194545746,
	"learning_rate": 7.21728e-05,
	"loss": 0.9141,
	"step": 39960
	},
	{
	"epoch": 0.63968,
	"grad_norm": 0.22674813866615295,
	"learning_rate": 7.21088e-05,
	"loss": 0.8842,
	"step": 39980
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.22404788434505463,
	"learning_rate": 7.20448e-05,
	"loss": 0.8501,
	"step": 40000
	},
	{
	"epoch": 0.64032,
	"grad_norm": 0.21472766995429993,
	"learning_rate": 7.19808e-05,
	"loss": 0.9002,
	"step": 40020
	},
	{
	"epoch": 0.64064,
	"grad_norm": 0.25677719712257385,
	"learning_rate": 7.191680000000001e-05,
	"loss": 0.9173,
	"step": 40040
	},
	{
	"epoch": 0.64096,
	"grad_norm": 0.21479903161525726,
	"learning_rate": 7.18528e-05,
	"loss": 0.8685,
	"step": 40060
	},
	{
	"epoch": 0.64128,
	"grad_norm": 0.24473252892494202,
	"learning_rate": 7.17888e-05,
	"loss": 0.8955,
	"step": 40080
	},
	{
	"epoch": 0.6416,
	"grad_norm": 0.23189175128936768,
	"learning_rate": 7.172480000000001e-05,
	"loss": 0.8438,
	"step": 40100
	},
	{
	"epoch": 0.64192,
	"grad_norm": 0.25505882501602173,
	"learning_rate": 7.16608e-05,
	"loss": 0.8654,
	"step": 40120
	},
	{
	"epoch": 0.64224,
	"grad_norm": 0.23652783036231995,
	"learning_rate": 7.159680000000001e-05,
	"loss": 0.907,
	"step": 40140
	},
	{
	"epoch": 0.64256,
	"grad_norm": 0.26799604296684265,
	"learning_rate": 7.15328e-05,
	"loss": 0.8599,
	"step": 40160
	},
	{
	"epoch": 0.64288,
	"grad_norm": 0.2281332015991211,
	"learning_rate": 7.14688e-05,
	"loss": 0.9087,
	"step": 40180
	},
	{
	"epoch": 0.6432,
	"grad_norm": 0.22313934564590454,
	"learning_rate": 7.140480000000001e-05,
	"loss": 0.8987,
	"step": 40200
	},
	{
	"epoch": 0.64352,
	"grad_norm": 0.21266809105873108,
	"learning_rate": 7.13408e-05,
	"loss": 0.9406,
	"step": 40220
	},
	{
	"epoch": 0.64384,
	"grad_norm": 0.21366780996322632,
	"learning_rate": 7.12768e-05,
	"loss": 0.8144,
	"step": 40240
	},
	{
	"epoch": 0.64416,
	"grad_norm": 0.2068609744310379,
	"learning_rate": 7.12128e-05,
	"loss": 0.8527,
	"step": 40260
	},
	{
	"epoch": 0.64448,
	"grad_norm": 0.2570587396621704,
	"learning_rate": 7.11488e-05,
	"loss": 0.8938,
	"step": 40280
	},
	{
	"epoch": 0.6448,
	"grad_norm": 0.21306006610393524,
	"learning_rate": 7.10848e-05,
	"loss": 0.8601,
	"step": 40300
	},
	{
	"epoch": 0.64512,
	"grad_norm": 0.23386195302009583,
	"learning_rate": 7.10208e-05,
	"loss": 0.8709,
	"step": 40320
	},
	{
	"epoch": 0.64544,
	"grad_norm": 0.2133599817752838,
	"learning_rate": 7.09568e-05,
	"loss": 0.9178,
	"step": 40340
	},
	{
	"epoch": 0.64576,
	"grad_norm": 0.23202918469905853,
	"learning_rate": 7.08928e-05,
	"loss": 0.8588,
	"step": 40360
	},
	{
	"epoch": 0.64608,
	"grad_norm": 0.246184840798378,
	"learning_rate": 7.082880000000001e-05,
	"loss": 0.8732,
	"step": 40380
	},
	{
	"epoch": 0.6464,
	"grad_norm": 0.22465592622756958,
	"learning_rate": 7.07648e-05,
	"loss": 0.8496,
	"step": 40400
	},
	{
	"epoch": 0.64672,
	"grad_norm": 0.27027273178100586,
	"learning_rate": 7.07008e-05,
	"loss": 0.8732,
	"step": 40420
	},
	{
	"epoch": 0.64704,
	"grad_norm": 0.23711097240447998,
	"learning_rate": 7.063680000000001e-05,
	"loss": 0.9601,
	"step": 40440
	},
	{
	"epoch": 0.64736,
	"grad_norm": 0.21715228259563446,
	"learning_rate": 7.05728e-05,
	"loss": 0.93,
	"step": 40460
	},
	{
	"epoch": 0.64768,
	"grad_norm": 0.18533211946487427,
	"learning_rate": 7.050880000000001e-05,
	"loss": 0.8923,
	"step": 40480
	},
	{
	"epoch": 0.648,
	"grad_norm": 0.2325373739004135,
	"learning_rate": 7.04448e-05,
	"loss": 0.8445,
	"step": 40500
	},
	{
	"epoch": 0.64832,
	"grad_norm": 0.20923930406570435,
	"learning_rate": 7.03808e-05,
	"loss": 0.8714,
	"step": 40520
	},
	{
	"epoch": 0.64864,
	"grad_norm": 0.16755761206150055,
	"learning_rate": 7.031680000000001e-05,
	"loss": 0.856,
	"step": 40540
	},
	{
	"epoch": 0.64896,
	"grad_norm": 0.20746345818042755,
	"learning_rate": 7.02528e-05,
	"loss": 0.9456,
	"step": 40560
	},
	{
	"epoch": 0.64928,
	"grad_norm": 0.2445952445268631,
	"learning_rate": 7.01888e-05,
	"loss": 0.8962,
	"step": 40580
	},
	{
	"epoch": 0.6496,
	"grad_norm": 0.21696268022060394,
	"learning_rate": 7.01248e-05,
	"loss": 0.8501,
	"step": 40600
	},
	{
	"epoch": 0.64992,
	"grad_norm": 0.18578511476516724,
	"learning_rate": 7.00608e-05,
	"loss": 0.8168,
	"step": 40620
	},
	{
	"epoch": 0.65024,
	"grad_norm": 0.25534483790397644,
	"learning_rate": 6.99968e-05,
	"loss": 0.8867,
	"step": 40640
	},
	{
	"epoch": 0.65056,
	"grad_norm": 0.24341151118278503,
	"learning_rate": 6.99328e-05,
	"loss": 0.8426,
	"step": 40660
	},
	{
	"epoch": 0.65088,
	"grad_norm": 0.229604572057724,
	"learning_rate": 6.98688e-05,
	"loss": 0.9253,
	"step": 40680
	},
	{
	"epoch": 0.6512,
	"grad_norm": 0.24505998194217682,
	"learning_rate": 6.98048e-05,
	"loss": 0.9244,
	"step": 40700
	},
	{
	"epoch": 0.65152,
	"grad_norm": 0.19099657237529755,
	"learning_rate": 6.974080000000001e-05,
	"loss": 0.8625,
	"step": 40720
	},
	{
	"epoch": 0.65184,
	"grad_norm": 0.24071238934993744,
	"learning_rate": 6.96768e-05,
	"loss": 0.8457,
	"step": 40740
	},
	{
	"epoch": 0.65216,
	"grad_norm": 0.2676192820072174,
	"learning_rate": 6.96128e-05,
	"loss": 0.8621,
	"step": 40760
	},
	{
	"epoch": 0.65248,
	"grad_norm": 0.2141886055469513,
	"learning_rate": 6.954880000000001e-05,
	"loss": 0.8369,
	"step": 40780
	},
	{
	"epoch": 0.6528,
	"grad_norm": 0.24451769888401031,
	"learning_rate": 6.94848e-05,
	"loss": 0.8665,
	"step": 40800
	},
	{
	"epoch": 0.65312,
	"grad_norm": 0.20462900400161743,
	"learning_rate": 6.942080000000001e-05,
	"loss": 0.9034,
	"step": 40820
	},
	{
	"epoch": 0.65344,
	"grad_norm": 0.2098025679588318,
	"learning_rate": 6.935679999999999e-05,
	"loss": 0.8874,
	"step": 40840
	},
	{
	"epoch": 0.65376,
	"grad_norm": 0.2127532809972763,
	"learning_rate": 6.92928e-05,
	"loss": 0.8651,
	"step": 40860
	},
	{
	"epoch": 0.65408,
	"grad_norm": 0.23097112774848938,
	"learning_rate": 6.922880000000001e-05,
	"loss": 0.8587,
	"step": 40880
	},
	{
	"epoch": 0.6544,
	"grad_norm": 0.23748517036437988,
	"learning_rate": 6.91648e-05,
	"loss": 0.8306,
	"step": 40900
	},
	{
	"epoch": 0.65472,
	"grad_norm": 0.2403116524219513,
	"learning_rate": 6.91008e-05,
	"loss": 0.8822,
	"step": 40920
	},
	{
	"epoch": 0.65504,
	"grad_norm": 0.25235334038734436,
	"learning_rate": 6.90368e-05,
	"loss": 0.9339,
	"step": 40940
	},
	{
	"epoch": 0.65536,
	"grad_norm": 0.23442967236042023,
	"learning_rate": 6.89728e-05,
	"loss": 0.9065,
	"step": 40960
	},
	{
	"epoch": 0.65568,
	"grad_norm": 0.22697308659553528,
	"learning_rate": 6.89088e-05,
	"loss": 0.9111,
	"step": 40980
	},
	{
	"epoch": 0.656,
	"grad_norm": 0.21100306510925293,
	"learning_rate": 6.88448e-05,
	"loss": 0.8536,
	"step": 41000
	},
	{
	"epoch": 0.65632,
	"grad_norm": 0.24288100004196167,
	"learning_rate": 6.87808e-05,
	"loss": 0.8757,
	"step": 41020
	},
	{
	"epoch": 0.65664,
	"grad_norm": 0.26507681608200073,
	"learning_rate": 6.87168e-05,
	"loss": 0.8786,
	"step": 41040
	},
	{
	"epoch": 0.65696,
	"grad_norm": 0.2742130160331726,
	"learning_rate": 6.865280000000001e-05,
	"loss": 0.8888,
	"step": 41060
	},
	{
	"epoch": 0.65728,
	"grad_norm": 0.23026636242866516,
	"learning_rate": 6.85888e-05,
	"loss": 0.8212,
	"step": 41080
	},
	{
	"epoch": 0.6576,
	"grad_norm": 0.20454558730125427,
	"learning_rate": 6.85248e-05,
	"loss": 0.8761,
	"step": 41100
	},
	{
	"epoch": 0.65792,
	"grad_norm": 0.20581161975860596,
	"learning_rate": 6.846080000000001e-05,
	"loss": 0.8897,
	"step": 41120
	},
	{
	"epoch": 0.65824,
	"grad_norm": 0.24633437395095825,
	"learning_rate": 6.83968e-05,
	"loss": 0.8448,
	"step": 41140
	},
	{
	"epoch": 0.65856,
	"grad_norm": 0.246739000082016,
	"learning_rate": 6.833280000000001e-05,
	"loss": 0.8769,
	"step": 41160
	},
	{
	"epoch": 0.65888,
	"grad_norm": 0.22334247827529907,
	"learning_rate": 6.82688e-05,
	"loss": 0.8975,
	"step": 41180
	},
	{
	"epoch": 0.6592,
	"grad_norm": 0.23122014105319977,
	"learning_rate": 6.82048e-05,
	"loss": 0.9277,
	"step": 41200
	},
	{
	"epoch": 0.65952,
	"grad_norm": 0.26595011353492737,
	"learning_rate": 6.8144e-05,
	"loss": 0.8685,
	"step": 41220
	},
	{
	"epoch": 0.65984,
	"grad_norm": 0.22354891896247864,
	"learning_rate": 6.808e-05,
	"loss": 0.8693,
	"step": 41240
	},
	{
	"epoch": 0.66016,
	"grad_norm": 0.23355019092559814,
	"learning_rate": 6.801600000000001e-05,
	"loss": 0.8979,
	"step": 41260
	},
	{
	"epoch": 0.66048,
	"grad_norm": 0.2354181855916977,
	"learning_rate": 6.7952e-05,
	"loss": 0.8735,
	"step": 41280
	},
	{
	"epoch": 0.6608,
	"grad_norm": 0.26578792929649353,
	"learning_rate": 6.788800000000001e-05,
	"loss": 0.8971,
	"step": 41300
	},
	{
	"epoch": 0.66112,
	"grad_norm": 0.26312100887298584,
	"learning_rate": 6.782399999999999e-05,
	"loss": 0.9185,
	"step": 41320
	},
	{
	"epoch": 0.66144,
	"grad_norm": 0.22569258511066437,
	"learning_rate": 6.776e-05,
	"loss": 0.8838,
	"step": 41340
	},
	{
	"epoch": 0.66176,
	"grad_norm": 0.30978450179100037,
	"learning_rate": 6.769600000000001e-05,
	"loss": 0.9312,
	"step": 41360
	},
	{
	"epoch": 0.66208,
	"grad_norm": 0.23343753814697266,
	"learning_rate": 6.7632e-05,
	"loss": 0.8593,
	"step": 41380
	},
	{
	"epoch": 0.6624,
	"grad_norm": 0.22287525236606598,
	"learning_rate": 6.7568e-05,
	"loss": 0.9196,
	"step": 41400
	},
	{
	"epoch": 0.66272,
	"grad_norm": 0.20180395245552063,
	"learning_rate": 6.7504e-05,
	"loss": 0.8573,
	"step": 41420
	},
	{
	"epoch": 0.66304,
	"grad_norm": 0.24834296107292175,
	"learning_rate": 6.744e-05,
	"loss": 0.8839,
	"step": 41440
	},
	{
	"epoch": 0.66336,
	"grad_norm": 0.2042527049779892,
	"learning_rate": 6.7376e-05,
	"loss": 0.8805,
	"step": 41460
	},
	{
	"epoch": 0.66368,
	"grad_norm": 0.22171486914157867,
	"learning_rate": 6.731200000000001e-05,
	"loss": 0.8742,
	"step": 41480
	},
	{
	"epoch": 0.664,
	"grad_norm": 0.27567192912101746,
	"learning_rate": 6.7248e-05,
	"loss": 0.8997,
	"step": 41500
	},
	{
	"epoch": 0.66432,
	"grad_norm": 0.22618427872657776,
	"learning_rate": 6.7184e-05,
	"loss": 0.8679,
	"step": 41520
	},
	{
	"epoch": 0.66464,
	"grad_norm": 0.24690526723861694,
	"learning_rate": 6.712000000000001e-05,
	"loss": 0.8564,
	"step": 41540
	},
	{
	"epoch": 0.66496,
	"grad_norm": 0.23084665834903717,
	"learning_rate": 6.7056e-05,
	"loss": 0.9033,
	"step": 41560
	},
	{
	"epoch": 0.66528,
	"grad_norm": 0.2343718707561493,
	"learning_rate": 6.6992e-05,
	"loss": 0.8987,
	"step": 41580
	},
	{
	"epoch": 0.6656,
	"grad_norm": 0.24334469437599182,
	"learning_rate": 6.692800000000001e-05,
	"loss": 0.8943,
	"step": 41600
	},
	{
	"epoch": 0.66592,
	"grad_norm": 0.24616220593452454,
	"learning_rate": 6.6864e-05,
	"loss": 0.831,
	"step": 41620
	},
	{
	"epoch": 0.66624,
	"grad_norm": 0.21528421342372894,
	"learning_rate": 6.680000000000001e-05,
	"loss": 0.8642,
	"step": 41640
	},
	{
	"epoch": 0.66656,
	"grad_norm": 0.20237964391708374,
	"learning_rate": 6.673599999999999e-05,
	"loss": 0.9171,
	"step": 41660
	},
	{
	"epoch": 0.66688,
	"grad_norm": 0.2018793821334839,
	"learning_rate": 6.6672e-05,
	"loss": 0.8975,
	"step": 41680
	},
	{
	"epoch": 0.6672,
	"grad_norm": 0.2155706137418747,
	"learning_rate": 6.660800000000001e-05,
	"loss": 0.8354,
	"step": 41700
	},
	{
	"epoch": 0.66752,
	"grad_norm": 0.23168103396892548,
	"learning_rate": 6.6544e-05,
	"loss": 0.916,
	"step": 41720
	},
	{
	"epoch": 0.66784,
	"grad_norm": 0.20231425762176514,
	"learning_rate": 6.648e-05,
	"loss": 0.8639,
	"step": 41740
	},
	{
	"epoch": 0.66816,
	"grad_norm": 0.20722989737987518,
	"learning_rate": 6.6416e-05,
	"loss": 0.8197,
	"step": 41760
	},
	{
	"epoch": 0.66848,
	"grad_norm": 0.20583872497081757,
	"learning_rate": 6.6352e-05,
	"loss": 0.8644,
	"step": 41780
	},
	{
	"epoch": 0.6688,
	"grad_norm": 0.22469474375247955,
	"learning_rate": 6.6288e-05,
	"loss": 0.8431,
	"step": 41800
	},
	{
	"epoch": 0.66912,
	"grad_norm": 0.22593587636947632,
	"learning_rate": 6.622400000000001e-05,
	"loss": 0.8428,
	"step": 41820
	},
	{
	"epoch": 0.66944,
	"grad_norm": 0.21532204747200012,
	"learning_rate": 6.616e-05,
	"loss": 0.8634,
	"step": 41840
	},
	{
	"epoch": 0.66976,
	"grad_norm": 0.1992572546005249,
	"learning_rate": 6.6096e-05,
	"loss": 0.8744,
	"step": 41860
	},
	{
	"epoch": 0.67008,
	"grad_norm": 0.23626761138439178,
	"learning_rate": 6.603200000000001e-05,
	"loss": 0.8928,
	"step": 41880
	},
	{
	"epoch": 0.6704,
	"grad_norm": 0.2587644159793854,
	"learning_rate": 6.5968e-05,
	"loss": 0.8501,
	"step": 41900
	},
	{
	"epoch": 0.67072,
	"grad_norm": 0.23042425513267517,
	"learning_rate": 6.5904e-05,
	"loss": 0.8317,
	"step": 41920
	},
	{
	"epoch": 0.67104,
	"grad_norm": 0.20776692032814026,
	"learning_rate": 6.584e-05,
	"loss": 0.8842,
	"step": 41940
	},
	{
	"epoch": 0.67136,
	"grad_norm": 0.2233342081308365,
	"learning_rate": 6.5776e-05,
	"loss": 0.9062,
	"step": 41960
	},
	{
	"epoch": 0.67168,
	"grad_norm": 0.22281095385551453,
	"learning_rate": 6.571200000000001e-05,
	"loss": 0.8696,
	"step": 41980
	},
	{
	"epoch": 0.672,
	"grad_norm": 0.2269035130739212,
	"learning_rate": 6.564799999999999e-05,
	"loss": 0.8983,
	"step": 42000
	},
	{
	"epoch": 0.67232,
	"grad_norm": 0.21187534928321838,
	"learning_rate": 6.5584e-05,
	"loss": 0.8666,
	"step": 42020
	},
	{
	"epoch": 0.67264,
	"grad_norm": 0.2288711965084076,
	"learning_rate": 6.552000000000001e-05,
	"loss": 0.8519,
	"step": 42040
	},
	{
	"epoch": 0.67296,
	"grad_norm": 0.24002696573734283,
	"learning_rate": 6.5456e-05,
	"loss": 0.843,
	"step": 42060
	},
	{
	"epoch": 0.67328,
	"grad_norm": 0.19838085770606995,
	"learning_rate": 6.5392e-05,
	"loss": 0.8429,
	"step": 42080
	},
	{
	"epoch": 0.6736,
	"grad_norm": 0.24605266749858856,
	"learning_rate": 6.5328e-05,
	"loss": 0.9016,
	"step": 42100
	},
	{
	"epoch": 0.67392,
	"grad_norm": 0.273473858833313,
	"learning_rate": 6.5264e-05,
	"loss": 0.9013,
	"step": 42120
	},
	{
	"epoch": 0.67424,
	"grad_norm": 0.2528668940067291,
	"learning_rate": 6.52e-05,
	"loss": 0.9072,
	"step": 42140
	},
	{
	"epoch": 0.67456,
	"grad_norm": 0.23695510625839233,
	"learning_rate": 6.513600000000001e-05,
	"loss": 0.8652,
	"step": 42160
	},
	{
	"epoch": 0.67488,
	"grad_norm": 0.23263618350028992,
	"learning_rate": 6.5072e-05,
	"loss": 0.9238,
	"step": 42180
	},
	{
	"epoch": 0.6752,
	"grad_norm": 0.2174840271472931,
	"learning_rate": 6.5008e-05,
	"loss": 0.8806,
	"step": 42200
	},
	{
	"epoch": 0.67552,
	"grad_norm": 0.22841788828372955,
	"learning_rate": 6.494400000000001e-05,
	"loss": 0.8422,
	"step": 42220
	},
	{
	"epoch": 0.67584,
	"grad_norm": 0.24447223544120789,
	"learning_rate": 6.488e-05,
	"loss": 0.8361,
	"step": 42240
	},
	{
	"epoch": 0.67616,
	"grad_norm": 0.24879607558250427,
	"learning_rate": 6.4816e-05,
	"loss": 0.8944,
	"step": 42260
	},
	{
	"epoch": 0.67648,
	"grad_norm": 0.26324090361595154,
	"learning_rate": 6.4752e-05,
	"loss": 0.8424,
	"step": 42280
	},
	{
	"epoch": 0.6768,
	"grad_norm": 0.28680363297462463,
	"learning_rate": 6.4688e-05,
	"loss": 0.8763,
	"step": 42300
	},
	{
	"epoch": 0.67712,
	"grad_norm": 0.2222435623407364,
	"learning_rate": 6.462400000000001e-05,
	"loss": 0.8901,
	"step": 42320
	},
	{
	"epoch": 0.67744,
	"grad_norm": 0.2362917810678482,
	"learning_rate": 6.455999999999999e-05,
	"loss": 0.9079,
	"step": 42340
	},
	{
	"epoch": 0.67776,
	"grad_norm": 0.23679310083389282,
	"learning_rate": 6.4496e-05,
	"loss": 0.832,
	"step": 42360
	},
	{
	"epoch": 0.67808,
	"grad_norm": 0.23579975962638855,
	"learning_rate": 6.443200000000001e-05,
	"loss": 0.8957,
	"step": 42380
	},
	{
	"epoch": 0.6784,
	"grad_norm": 0.2289842814207077,
	"learning_rate": 6.4368e-05,
	"loss": 0.9169,
	"step": 42400
	},
	{
	"epoch": 0.67872,
	"grad_norm": 0.2322479486465454,
	"learning_rate": 6.4304e-05,
	"loss": 0.8729,
	"step": 42420
	},
	{
	"epoch": 0.67904,
	"grad_norm": 0.2532987892627716,
	"learning_rate": 6.42432e-05,
	"loss": 0.8365,
	"step": 42440
	},
	{
	"epoch": 0.67936,
	"grad_norm": 0.31229642033576965,
	"learning_rate": 6.417920000000001e-05,
	"loss": 0.9106,
	"step": 42460
	},
	{
	"epoch": 0.67968,
	"grad_norm": 0.19338229298591614,
	"learning_rate": 6.41152e-05,
	"loss": 0.8812,
	"step": 42480
	},
	{
	"epoch": 0.68,
	"grad_norm": 0.2138776332139969,
	"learning_rate": 6.40512e-05,
	"loss": 0.8538,
	"step": 42500
	},
	{
	"epoch": 0.68032,
	"grad_norm": 0.2549976408481598,
	"learning_rate": 6.39872e-05,
	"loss": 0.8651,
	"step": 42520
	},
	{
	"epoch": 0.68064,
	"grad_norm": 0.24992278218269348,
	"learning_rate": 6.39232e-05,
	"loss": 0.9094,
	"step": 42540
	},
	{
	"epoch": 0.68096,
	"grad_norm": 0.2768593728542328,
	"learning_rate": 6.385920000000001e-05,
	"loss": 0.9047,
	"step": 42560
	},
	{
	"epoch": 0.68128,
	"grad_norm": 0.2133874148130417,
	"learning_rate": 6.37952e-05,
	"loss": 0.9259,
	"step": 42580
	},
	{
	"epoch": 0.6816,
	"grad_norm": 0.24320970475673676,
	"learning_rate": 6.37312e-05,
	"loss": 0.875,
	"step": 42600
	},
	{
	"epoch": 0.68192,
	"grad_norm": 0.21263545751571655,
	"learning_rate": 6.36672e-05,
	"loss": 0.8896,
	"step": 42620
	},
	{
	"epoch": 0.68224,
	"grad_norm": 0.18488876521587372,
	"learning_rate": 6.360320000000001e-05,
	"loss": 0.8557,
	"step": 42640
	},
	{
	"epoch": 0.68256,
	"grad_norm": 0.2456846386194229,
	"learning_rate": 6.35392e-05,
	"loss": 0.8991,
	"step": 42660
	},
	{
	"epoch": 0.68288,
	"grad_norm": 0.20528970658779144,
	"learning_rate": 6.34752e-05,
	"loss": 0.8908,
	"step": 42680
	},
	{
	"epoch": 0.6832,
	"grad_norm": 0.225137397646904,
	"learning_rate": 6.341120000000001e-05,
	"loss": 0.8635,
	"step": 42700
	},
	{
	"epoch": 0.68352,
	"grad_norm": 0.24640017747879028,
	"learning_rate": 6.33472e-05,
	"loss": 0.8551,
	"step": 42720
	},
	{
	"epoch": 0.68384,
	"grad_norm": 0.22672517597675323,
	"learning_rate": 6.32832e-05,
	"loss": 0.8665,
	"step": 42740
	},
	{
	"epoch": 0.68416,
	"grad_norm": 0.229408398270607,
	"learning_rate": 6.32192e-05,
	"loss": 0.9198,
	"step": 42760
	},
	{
	"epoch": 0.68448,
	"grad_norm": 0.19723407924175262,
	"learning_rate": 6.31552e-05,
	"loss": 0.8959,
	"step": 42780
	},
	{
	"epoch": 0.6848,
	"grad_norm": 0.2351776361465454,
	"learning_rate": 6.309120000000001e-05,
	"loss": 0.8286,
	"step": 42800
	},
	{
	"epoch": 0.68512,
	"grad_norm": 0.17581576108932495,
	"learning_rate": 6.30272e-05,
	"loss": 0.8849,
	"step": 42820
	},
	{
	"epoch": 0.68544,
	"grad_norm": 0.22729769349098206,
	"learning_rate": 6.29632e-05,
	"loss": 0.8592,
	"step": 42840
	},
	{
	"epoch": 0.68576,
	"grad_norm": 0.25973424315452576,
	"learning_rate": 6.289920000000001e-05,
	"loss": 0.8797,
	"step": 42860
	},
	{
	"epoch": 0.68608,
	"grad_norm": 0.24774223566055298,
	"learning_rate": 6.28352e-05,
	"loss": 0.8542,
	"step": 42880
	},
	{
	"epoch": 0.6864,
	"grad_norm": 0.25668323040008545,
	"learning_rate": 6.277120000000001e-05,
	"loss": 0.9054,
	"step": 42900
	},
	{
	"epoch": 0.68672,
	"grad_norm": 0.26286524534225464,
	"learning_rate": 6.27072e-05,
	"loss": 0.8654,
	"step": 42920
	},
	{
	"epoch": 0.68704,
	"grad_norm": 0.24494454264640808,
	"learning_rate": 6.26432e-05,
	"loss": 0.9516,
	"step": 42940
	},
	{
	"epoch": 0.68736,
	"grad_norm": 0.2337479293346405,
	"learning_rate": 6.25792e-05,
	"loss": 0.8931,
	"step": 42960
	},
	{
	"epoch": 0.68768,
	"grad_norm": 0.2087046056985855,
	"learning_rate": 6.251520000000001e-05,
	"loss": 0.9421,
	"step": 42980
	},
	{
	"epoch": 0.688,
	"grad_norm": 0.21605008840560913,
	"learning_rate": 6.24512e-05,
	"loss": 0.883,
	"step": 43000
	},
	{
	"epoch": 0.68832,
	"grad_norm": 0.21600419282913208,
	"learning_rate": 6.23872e-05,
	"loss": 0.8408,
	"step": 43020
	},
	{
	"epoch": 0.68864,
	"grad_norm": 0.2731294333934784,
	"learning_rate": 6.23232e-05,
	"loss": 0.8508,
	"step": 43040
	},
	{
	"epoch": 0.68896,
	"grad_norm": 0.20644868910312653,
	"learning_rate": 6.22592e-05,
	"loss": 0.8578,
	"step": 43060
	},
	{
	"epoch": 0.68928,
	"grad_norm": 0.22507797181606293,
	"learning_rate": 6.21952e-05,
	"loss": 0.8556,
	"step": 43080
	},
	{
	"epoch": 0.6896,
	"grad_norm": 0.20173804461956024,
	"learning_rate": 6.21312e-05,
	"loss": 0.8631,
	"step": 43100
	},
	{
	"epoch": 0.68992,
	"grad_norm": 0.2198924422264099,
	"learning_rate": 6.20672e-05,
	"loss": 0.9,
	"step": 43120
	},
	{
	"epoch": 0.69024,
	"grad_norm": 0.2248951494693756,
	"learning_rate": 6.200320000000001e-05,
	"loss": 0.9064,
	"step": 43140
	},
	{
	"epoch": 0.69056,
	"grad_norm": 0.23556740581989288,
	"learning_rate": 6.19392e-05,
	"loss": 0.866,
	"step": 43160
	},
	{
	"epoch": 0.69088,
	"grad_norm": 0.2064543068408966,
	"learning_rate": 6.18752e-05,
	"loss": 0.8798,
	"step": 43180
	},
	{
	"epoch": 0.6912,
	"grad_norm": 0.22137311100959778,
	"learning_rate": 6.181120000000001e-05,
	"loss": 0.8844,
	"step": 43200
	},
	{
	"epoch": 0.69152,
	"grad_norm": 0.21415813267230988,
	"learning_rate": 6.17472e-05,
	"loss": 0.8576,
	"step": 43220
	},
	{
	"epoch": 0.69184,
	"grad_norm": 0.22798651456832886,
	"learning_rate": 6.168320000000001e-05,
	"loss": 0.8471,
	"step": 43240
	},
	{
	"epoch": 0.69216,
	"grad_norm": 0.233371764421463,
	"learning_rate": 6.16192e-05,
	"loss": 0.8734,
	"step": 43260
	},
	{
	"epoch": 0.69248,
	"grad_norm": 0.20008385181427002,
	"learning_rate": 6.15552e-05,
	"loss": 0.8683,
	"step": 43280
	},
	{
	"epoch": 0.6928,
	"grad_norm": 0.22969180345535278,
	"learning_rate": 6.14912e-05,
	"loss": 0.8694,
	"step": 43300
	},
	{
	"epoch": 0.69312,
	"grad_norm": 0.2556081712245941,
	"learning_rate": 6.142720000000001e-05,
	"loss": 0.8439,
	"step": 43320
	},
	{
	"epoch": 0.69344,
	"grad_norm": 0.2534750699996948,
	"learning_rate": 6.13632e-05,
	"loss": 0.9054,
	"step": 43340
	},
	{
	"epoch": 0.69376,
	"grad_norm": 0.2144964188337326,
	"learning_rate": 6.12992e-05,
	"loss": 0.9026,
	"step": 43360
	},
	{
	"epoch": 0.69408,
	"grad_norm": 0.21919748187065125,
	"learning_rate": 6.12352e-05,
	"loss": 0.901,
	"step": 43380
	},
	{
	"epoch": 0.6944,
	"grad_norm": 0.19432856142520905,
	"learning_rate": 6.11712e-05,
	"loss": 0.9081,
	"step": 43400
	},
	{
	"epoch": 0.69472,
	"grad_norm": 0.25123217701911926,
	"learning_rate": 6.11072e-05,
	"loss": 0.9019,
	"step": 43420
	},
	{
	"epoch": 0.69504,
	"grad_norm": 0.29103386402130127,
	"learning_rate": 6.10432e-05,
	"loss": 0.9118,
	"step": 43440
	},
	{
	"epoch": 0.69536,
	"grad_norm": 0.2519950270652771,
	"learning_rate": 6.09792e-05,
	"loss": 0.8581,
	"step": 43460
	},
	{
	"epoch": 0.69568,
	"grad_norm": 0.2215908318758011,
	"learning_rate": 6.09152e-05,
	"loss": 0.8958,
	"step": 43480
	},
	{
	"epoch": 0.696,
	"grad_norm": 0.23915638029575348,
	"learning_rate": 6.085120000000001e-05,
	"loss": 0.8523,
	"step": 43500
	},
	{
	"epoch": 0.69632,
	"grad_norm": 0.2282445728778839,
	"learning_rate": 6.07872e-05,
	"loss": 0.8991,
	"step": 43520
	},
	{
	"epoch": 0.69664,
	"grad_norm": 0.2502846419811249,
	"learning_rate": 6.07232e-05,
	"loss": 0.8664,
	"step": 43540
	},
	{
	"epoch": 0.69696,
	"grad_norm": 0.208401620388031,
	"learning_rate": 6.0659200000000004e-05,
	"loss": 0.8687,
	"step": 43560
	},
	{
	"epoch": 0.69728,
	"grad_norm": 0.20891068875789642,
	"learning_rate": 6.0595200000000006e-05,
	"loss": 0.9093,
	"step": 43580
	},
	{
	"epoch": 0.6976,
	"grad_norm": 0.26877716183662415,
	"learning_rate": 6.05312e-05,
	"loss": 0.847,
	"step": 43600
	},
	{
	"epoch": 0.69792,
	"grad_norm": 0.21002227067947388,
	"learning_rate": 6.04672e-05,
	"loss": 0.8587,
	"step": 43620
	},
	{
	"epoch": 0.69824,
	"grad_norm": 0.19834822416305542,
	"learning_rate": 6.0403200000000005e-05,
	"loss": 0.8871,
	"step": 43640
	},
	{
	"epoch": 0.69856,
	"grad_norm": 0.23921220004558563,
	"learning_rate": 6.033920000000001e-05,
	"loss": 0.8202,
	"step": 43660
	},
	{
	"epoch": 0.69888,
	"grad_norm": 0.23094278573989868,
	"learning_rate": 6.0275199999999995e-05,
	"loss": 0.8753,
	"step": 43680
	},
	{
	"epoch": 0.6992,
	"grad_norm": 0.25601616501808167,
	"learning_rate": 6.0211200000000004e-05,
	"loss": 0.8652,
	"step": 43700
	},
	{
	"epoch": 0.69952,
	"grad_norm": 0.25069522857666016,
	"learning_rate": 6.0147200000000006e-05,
	"loss": 0.8942,
	"step": 43720
	},
	{
	"epoch": 0.69984,
	"grad_norm": 0.19572977721691132,
	"learning_rate": 6.008320000000001e-05,
	"loss": 0.9245,
	"step": 43740
	},
	{
	"epoch": 0.70016,
	"grad_norm": 0.24047626554965973,
	"learning_rate": 6.0019199999999996e-05,
	"loss": 0.899,
	"step": 43760
	},
	{
	"epoch": 0.70048,
	"grad_norm": 0.21386469900608063,
	"learning_rate": 5.99552e-05,
	"loss": 0.874,
	"step": 43780
	},
	{
	"epoch": 0.7008,
	"grad_norm": 0.22829948365688324,
	"learning_rate": 5.98912e-05,
	"loss": 0.9313,
	"step": 43800
	},
	{
	"epoch": 0.70112,
	"grad_norm": 0.23667655885219574,
	"learning_rate": 5.98272e-05,
	"loss": 0.8878,
	"step": 43820
	},
	{
	"epoch": 0.70144,
	"grad_norm": 0.2182048112154007,
	"learning_rate": 5.976320000000001e-05,
	"loss": 0.8397,
	"step": 43840
	},
	{
	"epoch": 0.70176,
	"grad_norm": 0.20164678990840912,
	"learning_rate": 5.96992e-05,
	"loss": 0.8768,
	"step": 43860
	},
	{
	"epoch": 0.70208,
	"grad_norm": 0.23960982263088226,
	"learning_rate": 5.96352e-05,
	"loss": 0.8932,
	"step": 43880
	},
	{
	"epoch": 0.7024,
	"grad_norm": 0.20772390067577362,
	"learning_rate": 5.95712e-05,
	"loss": 0.9277,
	"step": 43900
	},
	{
	"epoch": 0.70272,
	"grad_norm": 0.24492938816547394,
	"learning_rate": 5.9507200000000005e-05,
	"loss": 0.8923,
	"step": 43920
	},
	{
	"epoch": 0.70304,
	"grad_norm": 0.23545905947685242,
	"learning_rate": 5.94432e-05,
	"loss": 0.9141,
	"step": 43940
	},
	{
	"epoch": 0.70336,
	"grad_norm": 0.2978091239929199,
	"learning_rate": 5.93792e-05,
	"loss": 0.9643,
	"step": 43960
	},
	{
	"epoch": 0.70368,
	"grad_norm": 0.19800467789173126,
	"learning_rate": 5.9315200000000004e-05,
	"loss": 0.8799,
	"step": 43980
	},
	{
	"epoch": 0.704,
	"grad_norm": 0.24483546614646912,
	"learning_rate": 5.9251200000000006e-05,
	"loss": 0.8587,
	"step": 44000
	},
	{
	"epoch": 0.70432,
	"grad_norm": 0.32727476954460144,
	"learning_rate": 5.91872e-05,
	"loss": 0.8876,
	"step": 44020
	},
	{
	"epoch": 0.70464,
	"grad_norm": 0.21823062002658844,
	"learning_rate": 5.9123200000000003e-05,
	"loss": 0.8695,
	"step": 44040
	},
	{
	"epoch": 0.70496,
	"grad_norm": 0.2308553159236908,
	"learning_rate": 5.9059200000000005e-05,
	"loss": 0.8926,
	"step": 44060
	},
	{
	"epoch": 0.70528,
	"grad_norm": 0.219979926943779,
	"learning_rate": 5.899520000000001e-05,
	"loss": 0.9193,
	"step": 44080
	},
	{
	"epoch": 0.7056,
	"grad_norm": 0.24282580614089966,
	"learning_rate": 5.8931199999999996e-05,
	"loss": 0.8812,
	"step": 44100
	},
	{
	"epoch": 0.70592,
	"grad_norm": 0.20185592770576477,
	"learning_rate": 5.88672e-05,
	"loss": 0.8588,
	"step": 44120
	},
	{
	"epoch": 0.70624,
	"grad_norm": 0.24580541253089905,
	"learning_rate": 5.88032e-05,
	"loss": 0.849,
	"step": 44140
	},
	{
	"epoch": 0.70656,
	"grad_norm": 0.2542431950569153,
	"learning_rate": 5.873920000000001e-05,
	"loss": 0.882,
	"step": 44160
	},
	{
	"epoch": 0.70688,
	"grad_norm": 0.24872715771198273,
	"learning_rate": 5.867520000000001e-05,
	"loss": 0.8544,
	"step": 44180
	},
	{
	"epoch": 0.7072,
	"grad_norm": 0.19842933118343353,
	"learning_rate": 5.86112e-05,
	"loss": 0.8803,
	"step": 44200
	},
	{
	"epoch": 0.70752,
	"grad_norm": 0.2545991539955139,
	"learning_rate": 5.85472e-05,
	"loss": 0.9178,
	"step": 44220
	},
	{
	"epoch": 0.70784,
	"grad_norm": 0.2342890352010727,
	"learning_rate": 5.84832e-05,
	"loss": 0.8403,
	"step": 44240
	},
	{
	"epoch": 0.70816,
	"grad_norm": 0.2353144884109497,
	"learning_rate": 5.8419200000000005e-05,
	"loss": 0.9219,
	"step": 44260
	},
	{
	"epoch": 0.70848,
	"grad_norm": 0.21412351727485657,
	"learning_rate": 5.83552e-05,
	"loss": 0.8837,
	"step": 44280
	},
	{
	"epoch": 0.7088,
	"grad_norm": 0.18827536702156067,
	"learning_rate": 5.82912e-05,
	"loss": 0.8567,
	"step": 44300
	},
	{
	"epoch": 0.70912,
	"grad_norm": 0.23062194883823395,
	"learning_rate": 5.8227200000000004e-05,
	"loss": 0.903,
	"step": 44320
	},
	{
	"epoch": 0.70944,
	"grad_norm": 0.23226912319660187,
	"learning_rate": 5.8163200000000006e-05,
	"loss": 0.8446,
	"step": 44340
	},
	{
	"epoch": 0.70976,
	"grad_norm": 0.23661820590496063,
	"learning_rate": 5.80992e-05,
	"loss": 0.9511,
	"step": 44360
	},
	{
	"epoch": 0.71008,
	"grad_norm": 0.2356158196926117,
	"learning_rate": 5.80352e-05,
	"loss": 0.8817,
	"step": 44380
	},
	{
	"epoch": 0.7104,
	"grad_norm": 0.23160752654075623,
	"learning_rate": 5.7971200000000005e-05,
	"loss": 0.8637,
	"step": 44400
	},
	{
	"epoch": 0.71072,
	"grad_norm": 0.20803622901439667,
	"learning_rate": 5.790720000000001e-05,
	"loss": 0.8552,
	"step": 44420
	},
	{
	"epoch": 0.71104,
	"grad_norm": 0.22061729431152344,
	"learning_rate": 5.7843199999999995e-05,
	"loss": 0.8751,
	"step": 44440
	},
	{
	"epoch": 0.71136,
	"grad_norm": 0.233897864818573,
	"learning_rate": 5.77792e-05,
	"loss": 0.8686,
	"step": 44460
	},
	{
	"epoch": 0.71168,
	"grad_norm": 0.21677446365356445,
	"learning_rate": 5.77152e-05,
	"loss": 0.8967,
	"step": 44480
	},
	{
	"epoch": 0.712,
	"grad_norm": 0.24504272639751434,
	"learning_rate": 5.765120000000001e-05,
	"loss": 0.8934,
	"step": 44500
	},
	{
	"epoch": 0.71232,
	"grad_norm": 0.21646228432655334,
	"learning_rate": 5.758720000000001e-05,
	"loss": 0.8452,
	"step": 44520
	},
	{
	"epoch": 0.71264,
	"grad_norm": 0.22801847755908966,
	"learning_rate": 5.75232e-05,
	"loss": 0.8388,
	"step": 44540
	},
	{
	"epoch": 0.71296,
	"grad_norm": 0.19865715503692627,
	"learning_rate": 5.74592e-05,
	"loss": 0.9077,
	"step": 44560
	},
	{
	"epoch": 0.71328,
	"grad_norm": 0.24044495820999146,
	"learning_rate": 5.73952e-05,
	"loss": 0.9122,
	"step": 44580
	},
	{
	"epoch": 0.7136,
	"grad_norm": 0.23846623301506042,
	"learning_rate": 5.7331200000000004e-05,
	"loss": 0.8906,
	"step": 44600
	},
	{
	"epoch": 0.71392,
	"grad_norm": 0.21420036256313324,
	"learning_rate": 5.72672e-05,
	"loss": 0.877,
	"step": 44620
	},
	{
	"epoch": 0.71424,
	"grad_norm": 0.2217768281698227,
	"learning_rate": 5.72032e-05,
	"loss": 0.8717,
	"step": 44640
	},
	{
	"epoch": 0.71456,
	"grad_norm": 0.23392203450202942,
	"learning_rate": 5.7139200000000003e-05,
	"loss": 0.898,
	"step": 44660
	},
	{
	"epoch": 0.71488,
	"grad_norm": 0.22015775740146637,
	"learning_rate": 5.7075200000000005e-05,
	"loss": 0.8809,
	"step": 44680
	},
	{
	"epoch": 0.7152,
	"grad_norm": 0.21397672593593597,
	"learning_rate": 5.70112e-05,
	"loss": 0.8441,
	"step": 44700
	},
	{
	"epoch": 0.71552,
	"grad_norm": 0.25773394107818604,
	"learning_rate": 5.69472e-05,
	"loss": 0.8623,
	"step": 44720
	},
	{
	"epoch": 0.71584,
	"grad_norm": 0.24330535531044006,
	"learning_rate": 5.6883200000000005e-05,
	"loss": 0.8889,
	"step": 44740
	},
	{
	"epoch": 0.71616,
	"grad_norm": 0.20773817598819733,
	"learning_rate": 5.6819200000000006e-05,
	"loss": 0.8711,
	"step": 44760
	},
	{
	"epoch": 0.71648,
	"grad_norm": 0.21590672433376312,
	"learning_rate": 5.6755199999999995e-05,
	"loss": 0.8426,
	"step": 44780
	},
	{
	"epoch": 0.7168,
	"grad_norm": 0.1878194808959961,
	"learning_rate": 5.66912e-05,
	"loss": 0.873,
	"step": 44800
	},
	{
	"epoch": 0.71712,
	"grad_norm": 0.2268812656402588,
	"learning_rate": 5.6627200000000006e-05,
	"loss": 0.8769,
	"step": 44820
	},
	{
	"epoch": 0.71744,
	"grad_norm": 0.24054917693138123,
	"learning_rate": 5.6566399999999994e-05,
	"loss": 0.8766,
	"step": 44840
	},
	{
	"epoch": 0.71776,
	"grad_norm": 0.2115447223186493,
	"learning_rate": 5.65024e-05,
	"loss": 0.8531,
	"step": 44860
	},
	{
	"epoch": 0.71808,
	"grad_norm": 0.20368890464305878,
	"learning_rate": 5.6438400000000005e-05,
	"loss": 0.8418,
	"step": 44880
	},
	{
	"epoch": 0.7184,
	"grad_norm": 0.2356366366147995,
	"learning_rate": 5.637440000000001e-05,
	"loss": 0.9,
	"step": 44900
	},
	{
	"epoch": 0.71872,
	"grad_norm": 0.22393269836902618,
	"learning_rate": 5.631040000000001e-05,
	"loss": 0.8868,
	"step": 44920
	},
	{
	"epoch": 0.71904,
	"grad_norm": 0.2569195032119751,
	"learning_rate": 5.62464e-05,
	"loss": 0.895,
	"step": 44940
	},
	{
	"epoch": 0.71936,
	"grad_norm": 0.20783191919326782,
	"learning_rate": 5.61824e-05,
	"loss": 0.8627,
	"step": 44960
	},
	{
	"epoch": 0.71968,
	"grad_norm": 0.2164582461118698,
	"learning_rate": 5.61184e-05,
	"loss": 0.8982,
	"step": 44980
	},
	{
	"epoch": 0.72,
	"grad_norm": 0.38153156638145447,
	"learning_rate": 5.605440000000001e-05,
	"loss": 0.9014,
	"step": 45000
	},
	{
	"epoch": 0.72032,
	"grad_norm": 0.2400229126214981,
	"learning_rate": 5.59904e-05,
	"loss": 0.8997,
	"step": 45020
	},
	{
	"epoch": 0.72064,
	"grad_norm": 0.25559934973716736,
	"learning_rate": 5.59264e-05,
	"loss": 0.8984,
	"step": 45040
	},
	{
	"epoch": 0.72096,
	"grad_norm": 0.2528096139431,
	"learning_rate": 5.58624e-05,
	"loss": 0.914,
	"step": 45060
	},
	{
	"epoch": 0.72128,
	"grad_norm": 0.18854907155036926,
	"learning_rate": 5.5798400000000004e-05,
	"loss": 0.8876,
	"step": 45080
	},
	{
	"epoch": 0.7216,
	"grad_norm": 0.22515028715133667,
	"learning_rate": 5.57344e-05,
	"loss": 0.8684,
	"step": 45100
	},
	{
	"epoch": 0.72192,
	"grad_norm": 0.2182977795600891,
	"learning_rate": 5.56704e-05,
	"loss": 0.8838,
	"step": 45120
	},
	{
	"epoch": 0.72224,
	"grad_norm": 0.21532991528511047,
	"learning_rate": 5.5606400000000003e-05,
	"loss": 0.8298,
	"step": 45140
	},
	{
	"epoch": 0.72256,
	"grad_norm": 0.2378109246492386,
	"learning_rate": 5.5542400000000005e-05,
	"loss": 0.867,
	"step": 45160
	},
	{
	"epoch": 0.72288,
	"grad_norm": 0.22187520563602448,
	"learning_rate": 5.547840000000001e-05,
	"loss": 0.87,
	"step": 45180
	},
	{
	"epoch": 0.7232,
	"grad_norm": 0.2259528785943985,
	"learning_rate": 5.54144e-05,
	"loss": 0.885,
	"step": 45200
	},
	{
	"epoch": 0.72352,
	"grad_norm": 0.19351425766944885,
	"learning_rate": 5.5350400000000005e-05,
	"loss": 0.8984,
	"step": 45220
	},
	{
	"epoch": 0.72384,
	"grad_norm": 0.2292325645685196,
	"learning_rate": 5.5286400000000007e-05,
	"loss": 0.8794,
	"step": 45240
	},
	{
	"epoch": 0.72416,
	"grad_norm": 0.20458444952964783,
	"learning_rate": 5.522240000000001e-05,
	"loss": 0.8585,
	"step": 45260
	},
	{
	"epoch": 0.72448,
	"grad_norm": 0.22770562767982483,
	"learning_rate": 5.51584e-05,
	"loss": 0.9062,
	"step": 45280
	},
	{
	"epoch": 0.7248,
	"grad_norm": 0.21661782264709473,
	"learning_rate": 5.50944e-05,
	"loss": 0.8842,
	"step": 45300
	},
	{
	"epoch": 0.72512,
	"grad_norm": 0.19377048313617706,
	"learning_rate": 5.50304e-05,
	"loss": 0.8405,
	"step": 45320
	},
	{
	"epoch": 0.72544,
	"grad_norm": 0.2309509813785553,
	"learning_rate": 5.496640000000001e-05,
	"loss": 0.8799,
	"step": 45340
	},
	{
	"epoch": 0.72576,
	"grad_norm": 0.18839353322982788,
	"learning_rate": 5.49024e-05,
	"loss": 0.878,
	"step": 45360
	},
	{
	"epoch": 0.72608,
	"grad_norm": 0.248517245054245,
	"learning_rate": 5.48384e-05,
	"loss": 0.9094,
	"step": 45380
	},
	{
	"epoch": 0.7264,
	"grad_norm": 0.21810860931873322,
	"learning_rate": 5.47744e-05,
	"loss": 0.8679,
	"step": 45400
	},
	{
	"epoch": 0.72672,
	"grad_norm": 0.2429954707622528,
	"learning_rate": 5.4710400000000004e-05,
	"loss": 0.8592,
	"step": 45420
	},
	{
	"epoch": 0.72704,
	"grad_norm": 0.20929422974586487,
	"learning_rate": 5.46464e-05,
	"loss": 0.9011,
	"step": 45440
	},
	{
	"epoch": 0.72736,
	"grad_norm": 0.2323046624660492,
	"learning_rate": 5.45824e-05,
	"loss": 0.8614,
	"step": 45460
	},
	{
	"epoch": 0.72768,
	"grad_norm": 0.22738327085971832,
	"learning_rate": 5.45184e-05,
	"loss": 0.8795,
	"step": 45480
	},
	{
	"epoch": 0.728,
	"grad_norm": 0.2241695523262024,
	"learning_rate": 5.4454400000000005e-05,
	"loss": 0.9009,
	"step": 45500
	},
	{
	"epoch": 0.72832,
	"grad_norm": 0.21020178496837616,
	"learning_rate": 5.439040000000001e-05,
	"loss": 0.8568,
	"step": 45520
	},
	{
	"epoch": 0.72864,
	"grad_norm": 0.24524196982383728,
	"learning_rate": 5.43264e-05,
	"loss": 0.8667,
	"step": 45540
	},
	{
	"epoch": 0.72896,
	"grad_norm": 0.2374972701072693,
	"learning_rate": 5.4262400000000004e-05,
	"loss": 0.9086,
	"step": 45560
	},
	{
	"epoch": 0.72928,
	"grad_norm": 0.2362067550420761,
	"learning_rate": 5.4198400000000006e-05,
	"loss": 0.876,
	"step": 45580
	},
	{
	"epoch": 0.7296,
	"grad_norm": 0.21441881358623505,
	"learning_rate": 5.413440000000001e-05,
	"loss": 0.8941,
	"step": 45600
	},
	{
	"epoch": 0.72992,
	"grad_norm": 0.22504673898220062,
	"learning_rate": 5.4070399999999996e-05,
	"loss": 0.9268,
	"step": 45620
	},
	{
	"epoch": 0.73024,
	"grad_norm": 0.20583686232566833,
	"learning_rate": 5.40064e-05,
	"loss": 0.8298,
	"step": 45640
	},
	{
	"epoch": 0.73056,
	"grad_norm": 0.21706163883209229,
	"learning_rate": 5.394240000000001e-05,
	"loss": 0.8833,
	"step": 45660
	},
	{
	"epoch": 0.73088,
	"grad_norm": 0.202799990773201,
	"learning_rate": 5.387840000000001e-05,
	"loss": 0.8792,
	"step": 45680
	},
	{
	"epoch": 0.7312,
	"grad_norm": 0.2602541446685791,
	"learning_rate": 5.38144e-05,
	"loss": 0.9037,
	"step": 45700
	},
	{
	"epoch": 0.73152,
	"grad_norm": 0.22036013007164001,
	"learning_rate": 5.37504e-05,
	"loss": 0.8917,
	"step": 45720
	},
	{
	"epoch": 0.73184,
	"grad_norm": 0.22023898363113403,
	"learning_rate": 5.36864e-05,
	"loss": 0.8671,
	"step": 45740
	},
	{
	"epoch": 0.73216,
	"grad_norm": 0.23420779407024384,
	"learning_rate": 5.3622400000000003e-05,
	"loss": 0.9288,
	"step": 45760
	},
	{
	"epoch": 0.73248,
	"grad_norm": 0.20039279758930206,
	"learning_rate": 5.35584e-05,
	"loss": 0.8755,
	"step": 45780
	},
	{
	"epoch": 0.7328,
	"grad_norm": 0.2586964964866638,
	"learning_rate": 5.34944e-05,
	"loss": 0.8896,
	"step": 45800
	},
	{
	"epoch": 0.73312,
	"grad_norm": 0.2525421380996704,
	"learning_rate": 5.34304e-05,
	"loss": 0.8514,
	"step": 45820
	},
	{
	"epoch": 0.73344,
	"grad_norm": 0.2144252359867096,
	"learning_rate": 5.3366400000000005e-05,
	"loss": 0.9074,
	"step": 45840
	},
	{
	"epoch": 0.73376,
	"grad_norm": 0.21878720819950104,
	"learning_rate": 5.3302400000000007e-05,
	"loss": 0.9488,
	"step": 45860
	},
	{
	"epoch": 0.73408,
	"grad_norm": 0.24089403450489044,
	"learning_rate": 5.32384e-05,
	"loss": 0.9008,
	"step": 45880
	},
	{
	"epoch": 0.7344,
	"grad_norm": 0.25092679262161255,
	"learning_rate": 5.3174400000000004e-05,
	"loss": 0.8905,
	"step": 45900
	},
	{
	"epoch": 0.73472,
	"grad_norm": 0.24005566537380219,
	"learning_rate": 5.3110400000000006e-05,
	"loss": 0.8812,
	"step": 45920
	},
	{
	"epoch": 0.73504,
	"grad_norm": 0.2381397932767868,
	"learning_rate": 5.304640000000001e-05,
	"loss": 0.8944,
	"step": 45940
	},
	{
	"epoch": 0.73536,
	"grad_norm": 0.23799841105937958,
	"learning_rate": 5.2982399999999996e-05,
	"loss": 0.8684,
	"step": 45960
	},
	{
	"epoch": 0.73568,
	"grad_norm": 0.22827275097370148,
	"learning_rate": 5.29184e-05,
	"loss": 0.8657,
	"step": 45980
	},
	{
	"epoch": 0.736,
	"grad_norm": 0.24510063230991364,
	"learning_rate": 5.285440000000001e-05,
	"loss": 0.9598,
	"step": 46000
	},
	{
	"epoch": 0.73632,
	"grad_norm": 0.22655485570430756,
	"learning_rate": 5.279040000000001e-05,
	"loss": 0.8511,
	"step": 46020
	},
	{
	"epoch": 0.73664,
	"grad_norm": 0.23105552792549133,
	"learning_rate": 5.27264e-05,
	"loss": 0.8809,
	"step": 46040
	},
	{
	"epoch": 0.73696,
	"grad_norm": 0.21182331442832947,
	"learning_rate": 5.26624e-05,
	"loss": 0.8786,
	"step": 46060
	},
	{
	"epoch": 0.73728,
	"grad_norm": 0.2535363435745239,
	"learning_rate": 5.25984e-05,
	"loss": 0.8697,
	"step": 46080
	},
	{
	"epoch": 0.7376,
	"grad_norm": 0.2286081463098526,
	"learning_rate": 5.25344e-05,
	"loss": 0.8694,
	"step": 46100
	},
	{
	"epoch": 0.73792,
	"grad_norm": 0.2347458302974701,
	"learning_rate": 5.24704e-05,
	"loss": 0.9251,
	"step": 46120
	},
	{
	"epoch": 0.73824,
	"grad_norm": 0.21052898466587067,
	"learning_rate": 5.24064e-05,
	"loss": 0.8944,
	"step": 46140
	},
	{
	"epoch": 0.73856,
	"grad_norm": 0.23154202103614807,
	"learning_rate": 5.23424e-05,
	"loss": 0.8367,
	"step": 46160
	},
	{
	"epoch": 0.73888,
	"grad_norm": 0.23162192106246948,
	"learning_rate": 5.2278400000000004e-05,
	"loss": 0.8762,
	"step": 46180
	},
	{
	"epoch": 0.7392,
	"grad_norm": 0.2610846757888794,
	"learning_rate": 5.2214400000000006e-05,
	"loss": 0.9275,
	"step": 46200
	},
	{
	"epoch": 0.73952,
	"grad_norm": 0.20983512699604034,
	"learning_rate": 5.21504e-05,
	"loss": 0.8411,
	"step": 46220
	},
	{
	"epoch": 0.73984,
	"grad_norm": 0.27023133635520935,
	"learning_rate": 5.20864e-05,
	"loss": 0.8825,
	"step": 46240
	},
	{
	"epoch": 0.74016,
	"grad_norm": 0.22272150218486786,
	"learning_rate": 5.2022400000000005e-05,
	"loss": 0.8826,
	"step": 46260
	},
	{
	"epoch": 0.74048,
	"grad_norm": 0.22320957481861115,
	"learning_rate": 5.195840000000001e-05,
	"loss": 0.8204,
	"step": 46280
	},
	{
	"epoch": 0.7408,
	"grad_norm": 0.20854775607585907,
	"learning_rate": 5.1894399999999996e-05,
	"loss": 0.8643,
	"step": 46300
	},
	{
	"epoch": 0.74112,
	"grad_norm": 0.23853574693202972,
	"learning_rate": 5.1830400000000004e-05,
	"loss": 0.8504,
	"step": 46320
	},
	{
	"epoch": 0.74144,
	"grad_norm": 0.2031133770942688,
	"learning_rate": 5.1766400000000006e-05,
	"loss": 0.8798,
	"step": 46340
	},
	{
	"epoch": 0.74176,
	"grad_norm": 0.23090733587741852,
	"learning_rate": 5.170240000000001e-05,
	"loss": 0.8771,
	"step": 46360
	},
	{
	"epoch": 0.74208,
	"grad_norm": 0.22893227636814117,
	"learning_rate": 5.16384e-05,
	"loss": 0.9161,
	"step": 46380
	},
	{
	"epoch": 0.7424,
	"grad_norm": 0.24600179493427277,
	"learning_rate": 5.15744e-05,
	"loss": 0.9158,
	"step": 46400
	},
	{
	"epoch": 0.74272,
	"grad_norm": 0.22234416007995605,
	"learning_rate": 5.15104e-05,
	"loss": 0.9224,
	"step": 46420
	},
	{
	"epoch": 0.74304,
	"grad_norm": 0.22974424064159393,
	"learning_rate": 5.14464e-05,
	"loss": 0.8871,
	"step": 46440
	},
	{
	"epoch": 0.74336,
	"grad_norm": 0.2495729774236679,
	"learning_rate": 5.13824e-05,
	"loss": 0.9271,
	"step": 46460
	},
	{
	"epoch": 0.74368,
	"grad_norm": 0.2178795486688614,
	"learning_rate": 5.13184e-05,
	"loss": 0.86,
	"step": 46480
	},
	{
	"epoch": 0.744,
	"grad_norm": 0.2375311255455017,
	"learning_rate": 5.12544e-05,
	"loss": 0.8524,
	"step": 46500
	},
	{
	"epoch": 0.74432,
	"grad_norm": 0.21281583607196808,
	"learning_rate": 5.1190400000000004e-05,
	"loss": 0.9261,
	"step": 46520
	},
	{
	"epoch": 0.74464,
	"grad_norm": 0.26535019278526306,
	"learning_rate": 5.1126400000000006e-05,
	"loss": 0.9067,
	"step": 46540
	},
	{
	"epoch": 0.74496,
	"grad_norm": 0.1832839846611023,
	"learning_rate": 5.10624e-05,
	"loss": 0.8801,
	"step": 46560
	},
	{
	"epoch": 0.74528,
	"grad_norm": 0.21736547350883484,
	"learning_rate": 5.09984e-05,
	"loss": 0.8953,
	"step": 46580
	},
	{
	"epoch": 0.7456,
	"grad_norm": 0.2637736201286316,
	"learning_rate": 5.0934400000000005e-05,
	"loss": 0.8551,
	"step": 46600
	},
	{
	"epoch": 0.74592,
	"grad_norm": 0.19663706421852112,
	"learning_rate": 5.087040000000001e-05,
	"loss": 0.9004,
	"step": 46620
	},
	{
	"epoch": 0.74624,
	"grad_norm": 0.21443675458431244,
	"learning_rate": 5.0806399999999995e-05,
	"loss": 0.8947,
	"step": 46640
	},
	{
	"epoch": 0.74656,
	"grad_norm": 0.2313489317893982,
	"learning_rate": 5.0742400000000004e-05,
	"loss": 0.8798,
	"step": 46660
	},
	{
	"epoch": 0.74688,
	"grad_norm": 0.2411520630121231,
	"learning_rate": 5.0678400000000006e-05,
	"loss": 0.8833,
	"step": 46680
	},
	{
	"epoch": 0.7472,
	"grad_norm": 0.24178458750247955,
	"learning_rate": 5.061440000000001e-05,
	"loss": 0.8768,
	"step": 46700
	},
	{
	"epoch": 0.74752,
	"grad_norm": 0.24031583964824677,
	"learning_rate": 5.0550399999999996e-05,
	"loss": 0.9053,
	"step": 46720
	},
	{
	"epoch": 0.74784,
	"grad_norm": 0.24462060630321503,
	"learning_rate": 5.04864e-05,
	"loss": 0.8677,
	"step": 46740
	},
	{
	"epoch": 0.74816,
	"grad_norm": 0.18988333642482758,
	"learning_rate": 5.04224e-05,
	"loss": 0.877,
	"step": 46760
	},
	{
	"epoch": 0.74848,
	"grad_norm": 0.23754200339317322,
	"learning_rate": 5.03584e-05,
	"loss": 0.8808,
	"step": 46780
	},
	{
	"epoch": 0.7488,
	"grad_norm": 0.2371503710746765,
	"learning_rate": 5.02944e-05,
	"loss": 0.9283,
	"step": 46800
	},
	{
	"epoch": 0.74912,
	"grad_norm": 0.21101176738739014,
	"learning_rate": 5.02304e-05,
	"loss": 0.8769,
	"step": 46820
	},
	{
	"epoch": 0.74944,
	"grad_norm": 0.23707903921604156,
	"learning_rate": 5.01664e-05,
	"loss": 0.876,
	"step": 46840
	},
	{
	"epoch": 0.74976,
	"grad_norm": 0.25081855058670044,
	"learning_rate": 5.01024e-05,
	"loss": 0.8429,
	"step": 46860
	},
	{
	"epoch": 0.75008,
	"grad_norm": 0.2517668604850769,
	"learning_rate": 5.0038400000000005e-05,
	"loss": 0.8936,
	"step": 46880
	},
	{
	"epoch": 0.7504,
	"grad_norm": 0.2321518510580063,
	"learning_rate": 4.997440000000001e-05,
	"loss": 0.9427,
	"step": 46900
	},
	{
	"epoch": 0.75072,
	"grad_norm": 0.3038017749786377,
	"learning_rate": 4.99104e-05,
	"loss": 0.9116,
	"step": 46920
	},
	{
	"epoch": 0.75104,
	"grad_norm": 0.22047431766986847,
	"learning_rate": 4.9846400000000004e-05,
	"loss": 0.9356,
	"step": 46940
	},
	{
	"epoch": 0.75136,
	"grad_norm": 0.2446911334991455,
	"learning_rate": 4.97824e-05,
	"loss": 0.8855,
	"step": 46960
	},
	{
	"epoch": 0.75168,
	"grad_norm": 0.23208874464035034,
	"learning_rate": 4.97184e-05,
	"loss": 0.9064,
	"step": 46980
	},
	{
	"epoch": 0.752,
	"grad_norm": 0.22263742983341217,
	"learning_rate": 4.9654400000000004e-05,
	"loss": 0.9262,
	"step": 47000
	},
	{
	"epoch": 0.75232,
	"grad_norm": 0.24141012132167816,
	"learning_rate": 4.9590400000000006e-05,
	"loss": 0.8755,
	"step": 47020
	},
	{
	"epoch": 0.75264,
	"grad_norm": 0.21403875946998596,
	"learning_rate": 4.95264e-05,
	"loss": 0.9046,
	"step": 47040
	},
	{
	"epoch": 0.75296,
	"grad_norm": 0.22028857469558716,
	"learning_rate": 4.94624e-05,
	"loss": 0.8604,
	"step": 47060
	},
	{
	"epoch": 0.75328,
	"grad_norm": 0.23686060309410095,
	"learning_rate": 4.93984e-05,
	"loss": 0.9034,
	"step": 47080
	},
	{
	"epoch": 0.7536,
	"grad_norm": 0.21621714532375336,
	"learning_rate": 4.93344e-05,
	"loss": 0.8753,
	"step": 47100
	},
	{
	"epoch": 0.75392,
	"grad_norm": 0.19985179603099823,
	"learning_rate": 4.92704e-05,
	"loss": 0.8533,
	"step": 47120
	},
	{
	"epoch": 0.75424,
	"grad_norm": 0.25167474150657654,
	"learning_rate": 4.9206400000000004e-05,
	"loss": 0.899,
	"step": 47140
	},
	{
	"epoch": 0.75456,
	"grad_norm": 0.22282272577285767,
	"learning_rate": 4.9142400000000006e-05,
	"loss": 0.8312,
	"step": 47160
	},
	{
	"epoch": 0.75488,
	"grad_norm": 0.219001904129982,
	"learning_rate": 4.90784e-05,
	"loss": 0.9131,
	"step": 47180
	},
	{
	"epoch": 0.7552,
	"grad_norm": 0.244069442152977,
	"learning_rate": 4.90144e-05,
	"loss": 0.8767,
	"step": 47200
	},
	{
	"epoch": 0.75552,
	"grad_norm": 0.2010125070810318,
	"learning_rate": 4.8950400000000005e-05,
	"loss": 0.8617,
	"step": 47220
	},
	{
	"epoch": 0.75584,
	"grad_norm": 0.19826588034629822,
	"learning_rate": 4.888640000000001e-05,
	"loss": 0.8655,
	"step": 47240
	},
	{
	"epoch": 0.75616,
	"grad_norm": 0.2718552052974701,
	"learning_rate": 4.88224e-05,
	"loss": 0.8918,
	"step": 47260
	},
	{
	"epoch": 0.75648,
	"grad_norm": 0.1939408779144287,
	"learning_rate": 4.8758400000000004e-05,
	"loss": 0.8851,
	"step": 47280
	},
	{
	"epoch": 0.7568,
	"grad_norm": 0.2180645763874054,
	"learning_rate": 4.86944e-05,
	"loss": 0.8959,
	"step": 47300
	},
	{
	"epoch": 0.75712,
	"grad_norm": 0.18682503700256348,
	"learning_rate": 4.86304e-05,
	"loss": 0.8892,
	"step": 47320
	},
	{
	"epoch": 0.75744,
	"grad_norm": 0.24464449286460876,
	"learning_rate": 4.85664e-05,
	"loss": 0.8513,
	"step": 47340
	},
	{
	"epoch": 0.75776,
	"grad_norm": 0.22836542129516602,
	"learning_rate": 4.8502400000000005e-05,
	"loss": 0.7955,
	"step": 47360
	},
	{
	"epoch": 0.75808,
	"grad_norm": 0.236654594540596,
	"learning_rate": 4.84384e-05,
	"loss": 0.9108,
	"step": 47380
	},
	{
	"epoch": 0.7584,
	"grad_norm": 0.23842214047908783,
	"learning_rate": 4.83744e-05,
	"loss": 0.9084,
	"step": 47400
	},
	{
	"epoch": 0.75872,
	"grad_norm": 0.26700836420059204,
	"learning_rate": 4.83104e-05,
	"loss": 0.8792,
	"step": 47420
	},
	{
	"epoch": 0.75904,
	"grad_norm": 0.19707651436328888,
	"learning_rate": 4.82464e-05,
	"loss": 0.885,
	"step": 47440
	},
	{
	"epoch": 0.75936,
	"grad_norm": 0.2712419927120209,
	"learning_rate": 4.81824e-05,
	"loss": 0.891,
	"step": 47460
	},
	{
	"epoch": 0.75968,
	"grad_norm": 0.22818304598331451,
	"learning_rate": 4.81184e-05,
	"loss": 0.8668,
	"step": 47480
	},
	{
	"epoch": 0.76,
	"grad_norm": 0.24705687165260315,
	"learning_rate": 4.8054400000000005e-05,
	"loss": 0.853,
	"step": 47500
	},
	{
	"epoch": 0.76032,
	"grad_norm": 0.2136003077030182,
	"learning_rate": 4.79904e-05,
	"loss": 0.8795,
	"step": 47520
	},
	{
	"epoch": 0.76064,
	"grad_norm": 0.22492119669914246,
	"learning_rate": 4.79264e-05,
	"loss": 0.8962,
	"step": 47540
	},
	{
	"epoch": 0.76096,
	"grad_norm": 0.21469563245773315,
	"learning_rate": 4.7862400000000004e-05,
	"loss": 0.8804,
	"step": 47560
	},
	{
	"epoch": 0.76128,
	"grad_norm": 0.229572594165802,
	"learning_rate": 4.7798400000000006e-05,
	"loss": 0.898,
	"step": 47580
	},
	{
	"epoch": 0.7616,
	"grad_norm": 0.2185087352991104,
	"learning_rate": 4.77344e-05,
	"loss": 0.9312,
	"step": 47600
	},
	{
	"epoch": 0.76192,
	"grad_norm": 0.24852368235588074,
	"learning_rate": 4.7670400000000004e-05,
	"loss": 0.8916,
	"step": 47620
	},
	{
	"epoch": 0.76224,
	"grad_norm": 0.20700128376483917,
	"learning_rate": 4.76064e-05,
	"loss": 0.8553,
	"step": 47640
	},
	{
	"epoch": 0.76256,
	"grad_norm": 0.1880226880311966,
	"learning_rate": 4.75424e-05,
	"loss": 0.8134,
	"step": 47660
	},
	{
	"epoch": 0.76288,
	"grad_norm": 0.24719256162643433,
	"learning_rate": 4.74784e-05,
	"loss": 0.9169,
	"step": 47680
	},
	{
	"epoch": 0.7632,
	"grad_norm": 0.2389199137687683,
	"learning_rate": 4.7414400000000005e-05,
	"loss": 0.8902,
	"step": 47700
	},
	{
	"epoch": 0.76352,
	"grad_norm": 0.26046285033226013,
	"learning_rate": 4.73504e-05,
	"loss": 0.9075,
	"step": 47720
	},
	{
	"epoch": 0.76384,
	"grad_norm": 0.2406904399394989,
	"learning_rate": 4.72864e-05,
	"loss": 0.8481,
	"step": 47740
	},
	{
	"epoch": 0.76416,
	"grad_norm": 0.24346570670604706,
	"learning_rate": 4.72224e-05,
	"loss": 0.8482,
	"step": 47760
	},
	{
	"epoch": 0.76448,
	"grad_norm": 0.2557404935359955,
	"learning_rate": 4.7158400000000006e-05,
	"loss": 0.9096,
	"step": 47780
	},
	{
	"epoch": 0.7648,
	"grad_norm": 0.22144544124603271,
	"learning_rate": 4.70944e-05,
	"loss": 0.8489,
	"step": 47800
	},
	{
	"epoch": 0.76512,
	"grad_norm": 0.2356208860874176,
	"learning_rate": 4.70304e-05,
	"loss": 0.8703,
	"step": 47820
	},
	{
	"epoch": 0.76544,
	"grad_norm": 0.20454536378383636,
	"learning_rate": 4.6966400000000005e-05,
	"loss": 0.9135,
	"step": 47840
	},
	{
	"epoch": 0.76576,
	"grad_norm": 0.2013743370771408,
	"learning_rate": 4.69024e-05,
	"loss": 0.8641,
	"step": 47860
	},
	{
	"epoch": 0.76608,
	"grad_norm": 0.20594638586044312,
	"learning_rate": 4.68384e-05,
	"loss": 0.9012,
	"step": 47880
	},
	{
	"epoch": 0.7664,
	"grad_norm": 0.23454588651657104,
	"learning_rate": 4.6774400000000004e-05,
	"loss": 0.8529,
	"step": 47900
	},
	{
	"epoch": 0.76672,
	"grad_norm": 0.2404514104127884,
	"learning_rate": 4.6710400000000006e-05,
	"loss": 0.9,
	"step": 47920
	},
	{
	"epoch": 0.76704,
	"grad_norm": 0.21869786083698273,
	"learning_rate": 4.66496e-05,
	"loss": 0.8843,
	"step": 47940
	},
	{
	"epoch": 0.76736,
	"grad_norm": 0.228584423661232,
	"learning_rate": 4.65856e-05,
	"loss": 0.935,
	"step": 47960
	},
	{
	"epoch": 0.76768,
	"grad_norm": 0.2123897522687912,
	"learning_rate": 4.6521600000000005e-05,
	"loss": 0.8897,
	"step": 47980
	},
	{
	"epoch": 0.768,
	"grad_norm": 0.27578243613243103,
	"learning_rate": 4.64576e-05,
	"loss": 0.8803,
	"step": 48000
	},
	{
	"epoch": 0.76832,
	"grad_norm": 0.2598460614681244,
	"learning_rate": 4.63936e-05,
	"loss": 0.8164,
	"step": 48020
	},
	{
	"epoch": 0.76864,
	"grad_norm": 0.21342791616916656,
	"learning_rate": 4.63296e-05,
	"loss": 0.9123,
	"step": 48040
	},
	{
	"epoch": 0.76896,
	"grad_norm": 0.2282058596611023,
	"learning_rate": 4.6265600000000006e-05,
	"loss": 0.8956,
	"step": 48060
	},
	{
	"epoch": 0.76928,
	"grad_norm": 0.21980886161327362,
	"learning_rate": 4.62016e-05,
	"loss": 0.8567,
	"step": 48080
	},
	{
	"epoch": 0.7696,
	"grad_norm": 0.24570724368095398,
	"learning_rate": 4.6137600000000004e-05,
	"loss": 0.9293,
	"step": 48100
	},
	{
	"epoch": 0.76992,
	"grad_norm": 0.21538405120372772,
	"learning_rate": 4.60736e-05,
	"loss": 0.8453,
	"step": 48120
	},
	{
	"epoch": 0.77024,
	"grad_norm": 0.19840775430202484,
	"learning_rate": 4.60096e-05,
	"loss": 0.8911,
	"step": 48140
	},
	{
	"epoch": 0.77056,
	"grad_norm": 0.24362660944461823,
	"learning_rate": 4.59456e-05,
	"loss": 0.8839,
	"step": 48160
	},
	{
	"epoch": 0.77088,
	"grad_norm": 0.23664100468158722,
	"learning_rate": 4.5881600000000005e-05,
	"loss": 0.9147,
	"step": 48180
	},
	{
	"epoch": 0.7712,
	"grad_norm": 0.22470878064632416,
	"learning_rate": 4.581760000000001e-05,
	"loss": 0.9081,
	"step": 48200
	},
	{
	"epoch": 0.77152,
	"grad_norm": 0.25879278779029846,
	"learning_rate": 4.57536e-05,
	"loss": 0.8678,
	"step": 48220
	},
	{
	"epoch": 0.77184,
	"grad_norm": 0.22820644080638885,
	"learning_rate": 4.5689600000000004e-05,
	"loss": 0.8437,
	"step": 48240
	},
	{
	"epoch": 0.77216,
	"grad_norm": 0.24052444100379944,
	"learning_rate": 4.56256e-05,
	"loss": 0.8222,
	"step": 48260
	},
	{
	"epoch": 0.77248,
	"grad_norm": 0.2304847240447998,
	"learning_rate": 4.55616e-05,
	"loss": 0.9303,
	"step": 48280
	},
	{
	"epoch": 0.7728,
	"grad_norm": 0.2518431544303894,
	"learning_rate": 4.54976e-05,
	"loss": 0.8785,
	"step": 48300
	},
	{
	"epoch": 0.77312,
	"grad_norm": 0.2376391738653183,
	"learning_rate": 4.5433600000000005e-05,
	"loss": 0.8469,
	"step": 48320
	},
	{
	"epoch": 0.77344,
	"grad_norm": 0.24182195961475372,
	"learning_rate": 4.53696e-05,
	"loss": 0.916,
	"step": 48340
	},
	{
	"epoch": 0.77376,
	"grad_norm": 0.25106081366539,
	"learning_rate": 4.53056e-05,
	"loss": 0.8624,
	"step": 48360
	},
	{
	"epoch": 0.77408,
	"grad_norm": 0.22700931131839752,
	"learning_rate": 4.52416e-05,
	"loss": 0.9024,
	"step": 48380
	},
	{
	"epoch": 0.7744,
	"grad_norm": 0.21767041087150574,
	"learning_rate": 4.5177600000000006e-05,
	"loss": 0.9082,
	"step": 48400
	},
	{
	"epoch": 0.77472,
	"grad_norm": 0.2539537250995636,
	"learning_rate": 4.51136e-05,
	"loss": 0.8893,
	"step": 48420
	},
	{
	"epoch": 0.77504,
	"grad_norm": 0.27352043986320496,
	"learning_rate": 4.50496e-05,
	"loss": 0.8678,
	"step": 48440
	},
	{
	"epoch": 0.77536,
	"grad_norm": 0.22831988334655762,
	"learning_rate": 4.49856e-05,
	"loss": 0.8654,
	"step": 48460
	},
	{
	"epoch": 0.77568,
	"grad_norm": 0.24554172158241272,
	"learning_rate": 4.49216e-05,
	"loss": 0.8604,
	"step": 48480
	},
	{
	"epoch": 0.776,
	"grad_norm": 0.22556883096694946,
	"learning_rate": 4.48576e-05,
	"loss": 0.9112,
	"step": 48500
	},
	{
	"epoch": 0.77632,
	"grad_norm": 0.22238677740097046,
	"learning_rate": 4.4793600000000004e-05,
	"loss": 0.8963,
	"step": 48520
	},
	{
	"epoch": 0.77664,
	"grad_norm": 0.18963344395160675,
	"learning_rate": 4.4729600000000006e-05,
	"loss": 0.8464,
	"step": 48540
	},
	{
	"epoch": 0.77696,
	"grad_norm": 0.21553830802440643,
	"learning_rate": 4.46656e-05,
	"loss": 0.8701,
	"step": 48560
	},
	{
	"epoch": 0.77728,
	"grad_norm": 0.25254547595977783,
	"learning_rate": 4.4601600000000003e-05,
	"loss": 0.8503,
	"step": 48580
	},
	{
	"epoch": 0.7776,
	"grad_norm": 0.21796059608459473,
	"learning_rate": 4.45376e-05,
	"loss": 0.8817,
	"step": 48600
	},
	{
	"epoch": 0.77792,
	"grad_norm": 0.19532719254493713,
	"learning_rate": 4.447360000000001e-05,
	"loss": 0.9274,
	"step": 48620
	},
	{
	"epoch": 0.77824,
	"grad_norm": 0.19615907967090607,
	"learning_rate": 4.44096e-05,
	"loss": 0.8781,
	"step": 48640
	},
	{
	"epoch": 0.77856,
	"grad_norm": 0.20943795144557953,
	"learning_rate": 4.4345600000000004e-05,
	"loss": 0.8958,
	"step": 48660
	},
	{
	"epoch": 0.77888,
	"grad_norm": 0.1854809671640396,
	"learning_rate": 4.42816e-05,
	"loss": 0.8712,
	"step": 48680
	},
	{
	"epoch": 0.7792,
	"grad_norm": 0.23485055565834045,
	"learning_rate": 4.42208e-05,
	"loss": 0.8504,
	"step": 48700
	},
	{
	"epoch": 0.77952,
	"grad_norm": 0.2727571725845337,
	"learning_rate": 4.4156800000000004e-05,
	"loss": 0.8882,
	"step": 48720
	},
	{
	"epoch": 0.77984,
	"grad_norm": 0.2016323059797287,
	"learning_rate": 4.40928e-05,
	"loss": 0.8817,
	"step": 48740
	},
	{
	"epoch": 0.78016,
	"grad_norm": 0.22555996477603912,
	"learning_rate": 4.40288e-05,
	"loss": 0.8778,
	"step": 48760
	},
	{
	"epoch": 0.78048,
	"grad_norm": 0.19512006640434265,
	"learning_rate": 4.39648e-05,
	"loss": 0.8641,
	"step": 48780
	},
	{
	"epoch": 0.7808,
	"grad_norm": 0.21679182350635529,
	"learning_rate": 4.3900800000000005e-05,
	"loss": 0.8819,
	"step": 48800
	},
	{
	"epoch": 0.78112,
	"grad_norm": 0.23714877665042877,
	"learning_rate": 4.38368e-05,
	"loss": 0.8771,
	"step": 48820
	},
	{
	"epoch": 0.78144,
	"grad_norm": 0.2215253859758377,
	"learning_rate": 4.37728e-05,
	"loss": 0.8592,
	"step": 48840
	},
	{
	"epoch": 0.78176,
	"grad_norm": 0.21237672865390778,
	"learning_rate": 4.37088e-05,
	"loss": 0.8735,
	"step": 48860
	},
	{
	"epoch": 0.78208,
	"grad_norm": 0.24682950973510742,
	"learning_rate": 4.36448e-05,
	"loss": 0.8988,
	"step": 48880
	},
	{
	"epoch": 0.7824,
	"grad_norm": 0.27274882793426514,
	"learning_rate": 4.35808e-05,
	"loss": 0.9076,
	"step": 48900
	},
	{
	"epoch": 0.78272,
	"grad_norm": 0.20632825791835785,
	"learning_rate": 4.35168e-05,
	"loss": 0.8541,
	"step": 48920
	},
	{
	"epoch": 0.78304,
	"grad_norm": 0.21251200139522552,
	"learning_rate": 4.3452800000000005e-05,
	"loss": 0.8965,
	"step": 48940
	},
	{
	"epoch": 0.78336,
	"grad_norm": 0.2018088847398758,
	"learning_rate": 4.33888e-05,
	"loss": 0.8742,
	"step": 48960
	},
	{
	"epoch": 0.78368,
	"grad_norm": 0.24776096642017365,
	"learning_rate": 4.33248e-05,
	"loss": 0.8693,
	"step": 48980
	},
	{
	"epoch": 0.784,
	"grad_norm": 0.24149677157402039,
	"learning_rate": 4.3260800000000004e-05,
	"loss": 0.883,
	"step": 49000
	},
	{
	"epoch": 0.78432,
	"grad_norm": 0.2117341160774231,
	"learning_rate": 4.3196800000000006e-05,
	"loss": 0.8831,
	"step": 49020
	},
	{
	"epoch": 0.78464,
	"grad_norm": 0.25594037771224976,
	"learning_rate": 4.31328e-05,
	"loss": 0.9239,
	"step": 49040
	},
	{
	"epoch": 0.78496,
	"grad_norm": 0.20546288788318634,
	"learning_rate": 4.3068800000000003e-05,
	"loss": 0.8446,
	"step": 49060
	},
	{
	"epoch": 0.78528,
	"grad_norm": 0.23239131271839142,
	"learning_rate": 4.30048e-05,
	"loss": 0.8878,
	"step": 49080
	},
	{
	"epoch": 0.7856,
	"grad_norm": 0.24074342846870422,
	"learning_rate": 4.29408e-05,
	"loss": 0.9033,
	"step": 49100
	},
	{
	"epoch": 0.78592,
	"grad_norm": 0.24424532055854797,
	"learning_rate": 4.28768e-05,
	"loss": 0.9051,
	"step": 49120
	},
	{
	"epoch": 0.78624,
	"grad_norm": 0.23116187751293182,
	"learning_rate": 4.2812800000000005e-05,
	"loss": 0.911,
	"step": 49140
	},
	{
	"epoch": 0.78656,
	"grad_norm": 0.2513030171394348,
	"learning_rate": 4.27488e-05,
	"loss": 0.8569,
	"step": 49160
	},
	{
	"epoch": 0.78688,
	"grad_norm": 0.2296024113893509,
	"learning_rate": 4.26848e-05,
	"loss": 0.8666,
	"step": 49180
	},
	{
	"epoch": 0.7872,
	"grad_norm": 0.2069111168384552,
	"learning_rate": 4.26208e-05,
	"loss": 0.9025,
	"step": 49200
	},
	{
	"epoch": 0.78752,
	"grad_norm": 0.21525107324123383,
	"learning_rate": 4.25568e-05,
	"loss": 0.8218,
	"step": 49220
	},
	{
	"epoch": 0.78784,
	"grad_norm": 0.21345154941082,
	"learning_rate": 4.24928e-05,
	"loss": 0.8456,
	"step": 49240
	},
	{
	"epoch": 0.78816,
	"grad_norm": 0.25389420986175537,
	"learning_rate": 4.24288e-05,
	"loss": 0.842,
	"step": 49260
	},
	{
	"epoch": 0.78848,
	"grad_norm": 0.2326725423336029,
	"learning_rate": 4.2364800000000005e-05,
	"loss": 0.8504,
	"step": 49280
	},
	{
	"epoch": 0.7888,
	"grad_norm": 0.21930308640003204,
	"learning_rate": 4.23008e-05,
	"loss": 0.8697,
	"step": 49300
	},
	{
	"epoch": 0.78912,
	"grad_norm": 0.23466825485229492,
	"learning_rate": 4.22368e-05,
	"loss": 0.9128,
	"step": 49320
	},
	{
	"epoch": 0.78944,
	"grad_norm": 0.24129875004291534,
	"learning_rate": 4.2172800000000004e-05,
	"loss": 0.8499,
	"step": 49340
	},
	{
	"epoch": 0.78976,
	"grad_norm": 0.17660856246948242,
	"learning_rate": 4.2108800000000006e-05,
	"loss": 0.8559,
	"step": 49360
	},
	{
	"epoch": 0.79008,
	"grad_norm": 0.24038086831569672,
	"learning_rate": 4.20448e-05,
	"loss": 0.8669,
	"step": 49380
	},
	{
	"epoch": 0.7904,
	"grad_norm": 0.23702336847782135,
	"learning_rate": 4.19808e-05,
	"loss": 0.846,
	"step": 49400
	},
	{
	"epoch": 0.79072,
	"grad_norm": 0.2305484116077423,
	"learning_rate": 4.19168e-05,
	"loss": 0.8462,
	"step": 49420
	},
	{
	"epoch": 0.79104,
	"grad_norm": 0.24989739060401917,
	"learning_rate": 4.18528e-05,
	"loss": 0.9404,
	"step": 49440
	},
	{
	"epoch": 0.79136,
	"grad_norm": 0.23767246305942535,
	"learning_rate": 4.17888e-05,
	"loss": 0.9021,
	"step": 49460
	},
	{
	"epoch": 0.79168,
	"grad_norm": 0.244027242064476,
	"learning_rate": 4.1724800000000004e-05,
	"loss": 0.8757,
	"step": 49480
	},
	{
	"epoch": 0.792,
	"grad_norm": 0.21049901843070984,
	"learning_rate": 4.16608e-05,
	"loss": 0.8554,
	"step": 49500
	},
	{
	"epoch": 0.79232,
	"grad_norm": 0.2375907301902771,
	"learning_rate": 4.15968e-05,
	"loss": 0.8256,
	"step": 49520
	},
	{
	"epoch": 0.79264,
	"grad_norm": 0.23760604858398438,
	"learning_rate": 4.1532799999999996e-05,
	"loss": 0.898,
	"step": 49540
	},
	{
	"epoch": 0.79296,
	"grad_norm": 0.16031509637832642,
	"learning_rate": 4.1468800000000005e-05,
	"loss": 0.8516,
	"step": 49560
	},
	{
	"epoch": 0.79328,
	"grad_norm": 0.21544058620929718,
	"learning_rate": 4.140480000000001e-05,
	"loss": 0.857,
	"step": 49580
	},
	{
	"epoch": 0.7936,
	"grad_norm": 0.23034314811229706,
	"learning_rate": 4.13408e-05,
	"loss": 0.8784,
	"step": 49600
	},
	{
	"epoch": 0.79392,
	"grad_norm": 0.23492272198200226,
	"learning_rate": 4.1276800000000004e-05,
	"loss": 0.9037,
	"step": 49620
	},
	{
	"epoch": 0.79424,
	"grad_norm": 0.2450007051229477,
	"learning_rate": 4.12128e-05,
	"loss": 0.8969,
	"step": 49640
	},
	{
	"epoch": 0.79456,
	"grad_norm": 0.21207576990127563,
	"learning_rate": 4.11488e-05,
	"loss": 0.8492,
	"step": 49660
	},
	{
	"epoch": 0.79488,
	"grad_norm": 0.21204914152622223,
	"learning_rate": 4.1084800000000003e-05,
	"loss": 0.9025,
	"step": 49680
	},
	{
	"epoch": 0.7952,
	"grad_norm": 0.2355094999074936,
	"learning_rate": 4.1020800000000005e-05,
	"loss": 0.8533,
	"step": 49700
	},
	{
	"epoch": 0.79552,
	"grad_norm": 0.21224915981292725,
	"learning_rate": 4.09568e-05,
	"loss": 0.8374,
	"step": 49720
	},
	{
	"epoch": 0.79584,
	"grad_norm": 0.2114105373620987,
	"learning_rate": 4.08928e-05,
	"loss": 0.8823,
	"step": 49740
	},
	{
	"epoch": 0.79616,
	"grad_norm": 0.3748084306716919,
	"learning_rate": 4.08288e-05,
	"loss": 0.8607,
	"step": 49760
	},
	{
	"epoch": 0.79648,
	"grad_norm": 0.2285369336605072,
	"learning_rate": 4.07648e-05,
	"loss": 0.8654,
	"step": 49780
	},
	{
	"epoch": 0.7968,
	"grad_norm": 0.17516812682151794,
	"learning_rate": 4.07008e-05,
	"loss": 0.8728,
	"step": 49800
	},
	{
	"epoch": 0.79712,
	"grad_norm": 0.25664079189300537,
	"learning_rate": 4.0636800000000004e-05,
	"loss": 0.9342,
	"step": 49820
	},
	{
	"epoch": 0.79744,
	"grad_norm": 0.2027619630098343,
	"learning_rate": 4.05728e-05,
	"loss": 0.8644,
	"step": 49840
	},
	{
	"epoch": 0.79776,
	"grad_norm": 0.22048500180244446,
	"learning_rate": 4.05088e-05,
	"loss": 0.8586,
	"step": 49860
	},
	{
	"epoch": 0.79808,
	"grad_norm": 0.2323845624923706,
	"learning_rate": 4.0444799999999996e-05,
	"loss": 0.8298,
	"step": 49880
	},
	{
	"epoch": 0.7984,
	"grad_norm": 0.21872085332870483,
	"learning_rate": 4.0380800000000005e-05,
	"loss": 0.8584,
	"step": 49900
	},
	{
	"epoch": 0.79872,
	"grad_norm": 0.20625688135623932,
	"learning_rate": 4.031680000000001e-05,
	"loss": 0.8122,
	"step": 49920
	},
	{
	"epoch": 0.79904,
	"grad_norm": 0.20388665795326233,
	"learning_rate": 4.02528e-05,
	"loss": 0.8838,
	"step": 49940
	},
	{
	"epoch": 0.79936,
	"grad_norm": 0.2362195998430252,
	"learning_rate": 4.0188800000000004e-05,
	"loss": 0.9139,
	"step": 49960
	},
	{
	"epoch": 0.79968,
	"grad_norm": 0.19558613002300262,
	"learning_rate": 4.01248e-05,
	"loss": 0.8761,
	"step": 49980
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.18318922817707062,
	"learning_rate": 4.00608e-05,
	"loss": 0.8705,
	"step": 50000
	},
	{
	"epoch": 0.80032,
	"grad_norm": 0.2216624766588211,
	"learning_rate": 3.99968e-05,
	"loss": 0.891,
	"step": 50020
	},
	{
	"epoch": 0.80064,
	"grad_norm": 0.22886547446250916,
	"learning_rate": 3.9932800000000005e-05,
	"loss": 0.8926,
	"step": 50040
	},
	{
	"epoch": 0.80096,
	"grad_norm": 0.2386888712644577,
	"learning_rate": 3.98688e-05,
	"loss": 0.8946,
	"step": 50060
	},
	{
	"epoch": 0.80128,
	"grad_norm": 0.22732418775558472,
	"learning_rate": 3.98048e-05,
	"loss": 0.8874,
	"step": 50080
	},
	{
	"epoch": 0.8016,
	"grad_norm": 0.19502227008342743,
	"learning_rate": 3.97408e-05,
	"loss": 0.8628,
	"step": 50100
	},
	{
	"epoch": 0.80192,
	"grad_norm": 0.22442220151424408,
	"learning_rate": 3.9676800000000006e-05,
	"loss": 0.8888,
	"step": 50120
	},
	{
	"epoch": 0.80224,
	"grad_norm": 0.2765730917453766,
	"learning_rate": 3.96128e-05,
	"loss": 0.8692,
	"step": 50140
	},
	{
	"epoch": 0.80256,
	"grad_norm": 0.20345774292945862,
	"learning_rate": 3.95488e-05,
	"loss": 0.8961,
	"step": 50160
	},
	{
	"epoch": 0.80288,
	"grad_norm": 0.17965848743915558,
	"learning_rate": 3.94848e-05,
	"loss": 0.8693,
	"step": 50180
	},
	{
	"epoch": 0.8032,
	"grad_norm": 0.2365620881319046,
	"learning_rate": 3.94208e-05,
	"loss": 0.8645,
	"step": 50200
	},
	{
	"epoch": 0.80352,
	"grad_norm": 0.22703875601291656,
	"learning_rate": 3.93568e-05,
	"loss": 0.9219,
	"step": 50220
	},
	{
	"epoch": 0.80384,
	"grad_norm": 0.21852591633796692,
	"learning_rate": 3.9292800000000004e-05,
	"loss": 0.8594,
	"step": 50240
	},
	{
	"epoch": 0.80416,
	"grad_norm": 0.2254893034696579,
	"learning_rate": 3.9228800000000006e-05,
	"loss": 0.8786,
	"step": 50260
	},
	{
	"epoch": 0.80448,
	"grad_norm": 0.21090517938137054,
	"learning_rate": 3.91648e-05,
	"loss": 0.8894,
	"step": 50280
	},
	{
	"epoch": 0.8048,
	"grad_norm": 0.21594619750976562,
	"learning_rate": 3.9100800000000003e-05,
	"loss": 0.8921,
	"step": 50300
	},
	{
	"epoch": 0.80512,
	"grad_norm": 0.1902090162038803,
	"learning_rate": 3.90368e-05,
	"loss": 0.8454,
	"step": 50320
	},
	{
	"epoch": 0.80544,
	"grad_norm": 0.3519250452518463,
	"learning_rate": 3.89728e-05,
	"loss": 0.8987,
	"step": 50340
	},
	{
	"epoch": 0.80576,
	"grad_norm": 0.2199210673570633,
	"learning_rate": 3.89088e-05,
	"loss": 0.8827,
	"step": 50360
	},
	{
	"epoch": 0.80608,
	"grad_norm": 0.23292851448059082,
	"learning_rate": 3.8844800000000005e-05,
	"loss": 0.8713,
	"step": 50380
	},
	{
	"epoch": 0.8064,
	"grad_norm": 0.21949096024036407,
	"learning_rate": 3.87808e-05,
	"loss": 0.8707,
	"step": 50400
	},
	{
	"epoch": 0.80672,
	"grad_norm": 0.25161877274513245,
	"learning_rate": 3.87168e-05,
	"loss": 0.9223,
	"step": 50420
	},
	{
	"epoch": 0.80704,
	"grad_norm": 0.23557806015014648,
	"learning_rate": 3.86528e-05,
	"loss": 0.8855,
	"step": 50440
	},
	{
	"epoch": 0.80736,
	"grad_norm": 0.2316737025976181,
	"learning_rate": 3.8588800000000006e-05,
	"loss": 0.8749,
	"step": 50460
	},
	{
	"epoch": 0.80768,
	"grad_norm": 0.20035359263420105,
	"learning_rate": 3.85248e-05,
	"loss": 0.838,
	"step": 50480
	},
	{
	"epoch": 0.808,
	"grad_norm": 0.17797014117240906,
	"learning_rate": 3.84608e-05,
	"loss": 0.839,
	"step": 50500
	},
	{
	"epoch": 0.80832,
	"grad_norm": 0.2226208597421646,
	"learning_rate": 3.83968e-05,
	"loss": 0.8729,
	"step": 50520
	},
	{
	"epoch": 0.80864,
	"grad_norm": 0.2194611132144928,
	"learning_rate": 3.83328e-05,
	"loss": 0.9066,
	"step": 50540
	},
	{
	"epoch": 0.80896,
	"grad_norm": 0.21009878814220428,
	"learning_rate": 3.82688e-05,
	"loss": 0.8454,
	"step": 50560
	},
	{
	"epoch": 0.80928,
	"grad_norm": 0.24309992790222168,
	"learning_rate": 3.8204800000000004e-05,
	"loss": 0.8612,
	"step": 50580
	},
	{
	"epoch": 0.8096,
	"grad_norm": 0.19760167598724365,
	"learning_rate": 3.8140800000000006e-05,
	"loss": 0.9226,
	"step": 50600
	},
	{
	"epoch": 0.80992,
	"grad_norm": 0.24959954619407654,
	"learning_rate": 3.80768e-05,
	"loss": 0.8667,
	"step": 50620
	},
	{
	"epoch": 0.81024,
	"grad_norm": 0.2227480560541153,
	"learning_rate": 3.80128e-05,
	"loss": 0.9303,
	"step": 50640
	},
	{
	"epoch": 0.81056,
	"grad_norm": 0.2228361815214157,
	"learning_rate": 3.79488e-05,
	"loss": 0.8365,
	"step": 50660
	},
	{
	"epoch": 0.81088,
	"grad_norm": 0.22961002588272095,
	"learning_rate": 3.788480000000001e-05,
	"loss": 0.8257,
	"step": 50680
	},
	{
	"epoch": 0.8112,
	"grad_norm": 0.19517934322357178,
	"learning_rate": 3.78208e-05,
	"loss": 0.8486,
	"step": 50700
	},
	{
	"epoch": 0.81152,
	"grad_norm": 0.21257704496383667,
	"learning_rate": 3.7756800000000004e-05,
	"loss": 0.8665,
	"step": 50720
	},
	{
	"epoch": 0.81184,
	"grad_norm": 0.17308840155601501,
	"learning_rate": 3.76928e-05,
	"loss": 0.8682,
	"step": 50740
	},
	{
	"epoch": 0.81216,
	"grad_norm": 0.2145150750875473,
	"learning_rate": 3.76288e-05,
	"loss": 0.8652,
	"step": 50760
	},
	{
	"epoch": 0.81248,
	"grad_norm": 0.20915599167346954,
	"learning_rate": 3.75648e-05,
	"loss": 0.881,
	"step": 50780
	},
	{
	"epoch": 0.8128,
	"grad_norm": 0.2703164219856262,
	"learning_rate": 3.7500800000000005e-05,
	"loss": 0.8451,
	"step": 50800
	},
	{
	"epoch": 0.81312,
	"grad_norm": 0.218171164393425,
	"learning_rate": 3.74368e-05,
	"loss": 0.8879,
	"step": 50820
	},
	{
	"epoch": 0.81344,
	"grad_norm": 0.2015322595834732,
	"learning_rate": 3.73728e-05,
	"loss": 0.8583,
	"step": 50840
	},
	{
	"epoch": 0.81376,
	"grad_norm": 0.2532670199871063,
	"learning_rate": 3.73088e-05,
	"loss": 0.9111,
	"step": 50860
	},
	{
	"epoch": 0.81408,
	"grad_norm": 0.22806140780448914,
	"learning_rate": 3.72448e-05,
	"loss": 0.8677,
	"step": 50880
	},
	{
	"epoch": 0.8144,
	"grad_norm": 0.20651741325855255,
	"learning_rate": 3.71808e-05,
	"loss": 0.9084,
	"step": 50900
	},
	{
	"epoch": 0.81472,
	"grad_norm": 0.23538829386234283,
	"learning_rate": 3.7116800000000004e-05,
	"loss": 0.888,
	"step": 50920
	},
	{
	"epoch": 0.81504,
	"grad_norm": 0.23798079788684845,
	"learning_rate": 3.7052800000000005e-05,
	"loss": 0.8974,
	"step": 50940
	},
	{
	"epoch": 0.81536,
	"grad_norm": 0.24513110518455505,
	"learning_rate": 3.69888e-05,
	"loss": 0.9018,
	"step": 50960
	},
	{
	"epoch": 0.81568,
	"grad_norm": 0.24939313530921936,
	"learning_rate": 3.69248e-05,
	"loss": 0.9033,
	"step": 50980
	},
	{
	"epoch": 0.816,
	"grad_norm": 0.20573210716247559,
	"learning_rate": 3.68608e-05,
	"loss": 0.874,
	"step": 51000
	},
	{
	"epoch": 0.81632,
	"grad_norm": 0.22649157047271729,
	"learning_rate": 3.6796800000000007e-05,
	"loss": 0.9008,
	"step": 51020
	},
	{
	"epoch": 0.81664,
	"grad_norm": 0.24870529770851135,
	"learning_rate": 3.67328e-05,
	"loss": 0.8949,
	"step": 51040
	},
	{
	"epoch": 0.81696,
	"grad_norm": 0.22699517011642456,
	"learning_rate": 3.6668800000000004e-05,
	"loss": 0.9049,
	"step": 51060
	},
	{
	"epoch": 0.81728,
	"grad_norm": 0.24480411410331726,
	"learning_rate": 3.66048e-05,
	"loss": 0.8855,
	"step": 51080
	},
	{
	"epoch": 0.8176,
	"grad_norm": 0.2394665777683258,
	"learning_rate": 3.65408e-05,
	"loss": 0.8868,
	"step": 51100
	},
	{
	"epoch": 0.81792,
	"grad_norm": 0.20132653415203094,
	"learning_rate": 3.64768e-05,
	"loss": 0.8209,
	"step": 51120
	},
	{
	"epoch": 0.81824,
	"grad_norm": 0.2139676958322525,
	"learning_rate": 3.6412800000000005e-05,
	"loss": 0.8852,
	"step": 51140
	},
	{
	"epoch": 0.81856,
	"grad_norm": 0.23041175305843353,
	"learning_rate": 3.63488e-05,
	"loss": 0.8864,
	"step": 51160
	},
	{
	"epoch": 0.81888,
	"grad_norm": 0.18776430189609528,
	"learning_rate": 3.62848e-05,
	"loss": 0.8675,
	"step": 51180
	},
	{
	"epoch": 0.8192,
	"grad_norm": 0.24612362682819366,
	"learning_rate": 3.62208e-05,
	"loss": 0.9132,
	"step": 51200
	},
	{
	"epoch": 0.81952,
	"grad_norm": 0.235810786485672,
	"learning_rate": 3.61568e-05,
	"loss": 0.8947,
	"step": 51220
	},
	{
	"epoch": 0.81984,
	"grad_norm": 0.20968469977378845,
	"learning_rate": 3.60928e-05,
	"loss": 0.8943,
	"step": 51240
	},
	{
	"epoch": 0.82016,
	"grad_norm": 0.20469830930233002,
	"learning_rate": 3.60288e-05,
	"loss": 0.8561,
	"step": 51260
	},
	{
	"epoch": 0.82048,
	"grad_norm": 0.19832849502563477,
	"learning_rate": 3.5964800000000005e-05,
	"loss": 0.8784,
	"step": 51280
	},
	{
	"epoch": 0.8208,
	"grad_norm": 0.2282322198152542,
	"learning_rate": 3.59008e-05,
	"loss": 0.8491,
	"step": 51300
	},
	{
	"epoch": 0.82112,
	"grad_norm": 0.21050651371479034,
	"learning_rate": 3.58368e-05,
	"loss": 0.8547,
	"step": 51320
	},
	{
	"epoch": 0.82144,
	"grad_norm": 0.21298284828662872,
	"learning_rate": 3.57728e-05,
	"loss": 0.8696,
	"step": 51340
	},
	{
	"epoch": 0.82176,
	"grad_norm": 0.1952245980501175,
	"learning_rate": 3.5708800000000006e-05,
	"loss": 0.8184,
	"step": 51360
	},
	{
	"epoch": 0.82208,
	"grad_norm": 0.23376013338565826,
	"learning_rate": 3.56448e-05,
	"loss": 0.8348,
	"step": 51380
	},
	{
	"epoch": 0.8224,
	"grad_norm": 0.22029918432235718,
	"learning_rate": 3.55808e-05,
	"loss": 0.8956,
	"step": 51400
	},
	{
	"epoch": 0.82272,
	"grad_norm": 0.23580487072467804,
	"learning_rate": 3.55168e-05,
	"loss": 0.9059,
	"step": 51420
	},
	{
	"epoch": 0.82304,
	"grad_norm": 0.2074773609638214,
	"learning_rate": 3.54528e-05,
	"loss": 0.8591,
	"step": 51440
	},
	{
	"epoch": 0.82336,
	"grad_norm": 0.21169452369213104,
	"learning_rate": 3.53888e-05,
	"loss": 0.8603,
	"step": 51460
	},
	{
	"epoch": 0.82368,
	"grad_norm": 0.21765758097171783,
	"learning_rate": 3.5324800000000004e-05,
	"loss": 0.9107,
	"step": 51480
	},
	{
	"epoch": 0.824,
	"grad_norm": 0.23709143698215485,
	"learning_rate": 3.52608e-05,
	"loss": 0.8909,
	"step": 51500
	},
	{
	"epoch": 0.82432,
	"grad_norm": 0.18285530805587769,
	"learning_rate": 3.51968e-05,
	"loss": 0.8699,
	"step": 51520
	},
	{
	"epoch": 0.82464,
	"grad_norm": 0.19453024864196777,
	"learning_rate": 3.51328e-05,
	"loss": 0.8841,
	"step": 51540
	},
	{
	"epoch": 0.82496,
	"grad_norm": 0.20472079515457153,
	"learning_rate": 3.50688e-05,
	"loss": 0.8651,
	"step": 51560
	},
	{
	"epoch": 0.82528,
	"grad_norm": 0.20934510231018066,
	"learning_rate": 3.500480000000001e-05,
	"loss": 0.9298,
	"step": 51580
	},
	{
	"epoch": 0.8256,
	"grad_norm": 0.20935463905334473,
	"learning_rate": 3.49408e-05,
	"loss": 0.8238,
	"step": 51600
	},
	{
	"epoch": 0.82592,
	"grad_norm": 0.19985993206501007,
	"learning_rate": 3.4876800000000005e-05,
	"loss": 0.8879,
	"step": 51620
	},
	{
	"epoch": 0.82624,
	"grad_norm": 0.21266649663448334,
	"learning_rate": 3.48128e-05,
	"loss": 0.8754,
	"step": 51640
	},
	{
	"epoch": 0.82656,
	"grad_norm": 0.21522794663906097,
	"learning_rate": 3.47488e-05,
	"loss": 0.8851,
	"step": 51660
	},
	{
	"epoch": 0.82688,
	"grad_norm": 0.21246004104614258,
	"learning_rate": 3.4684800000000004e-05,
	"loss": 0.8588,
	"step": 51680
	},
	{
	"epoch": 0.8272,
	"grad_norm": 0.23577255010604858,
	"learning_rate": 3.4620800000000006e-05,
	"loss": 0.8502,
	"step": 51700
	},
	{
	"epoch": 0.82752,
	"grad_norm": 0.22873230278491974,
	"learning_rate": 3.45568e-05,
	"loss": 0.9434,
	"step": 51720
	},
	{
	"epoch": 0.82784,
	"grad_norm": 0.24500973522663116,
	"learning_rate": 3.44928e-05,
	"loss": 0.8575,
	"step": 51740
	},
	{
	"epoch": 0.82816,
	"grad_norm": 0.24861827492713928,
	"learning_rate": 3.44288e-05,
	"loss": 0.8601,
	"step": 51760
	},
	{
	"epoch": 0.82848,
	"grad_norm": 0.25244006514549255,
	"learning_rate": 3.43648e-05,
	"loss": 0.8957,
	"step": 51780
	},
	{
	"epoch": 0.8288,
	"grad_norm": 0.24348974227905273,
	"learning_rate": 3.43008e-05,
	"loss": 0.8433,
	"step": 51800
	},
	{
	"epoch": 0.82912,
	"grad_norm": 0.23009903728961945,
	"learning_rate": 3.4236800000000004e-05,
	"loss": 0.9117,
	"step": 51820
	},
	{
	"epoch": 0.82944,
	"grad_norm": 0.2047446370124817,
	"learning_rate": 3.41728e-05,
	"loss": 0.8102,
	"step": 51840
	},
	{
	"epoch": 0.82976,
	"grad_norm": 0.2244807630777359,
	"learning_rate": 3.41088e-05,
	"loss": 0.8878,
	"step": 51860
	},
	{
	"epoch": 0.83008,
	"grad_norm": 0.23874083161354065,
	"learning_rate": 3.4044799999999996e-05,
	"loss": 0.9476,
	"step": 51880
	},
	{
	"epoch": 0.8304,
	"grad_norm": 0.21802900731563568,
	"learning_rate": 3.39808e-05,
	"loss": 0.8457,
	"step": 51900
	},
	{
	"epoch": 0.83072,
	"grad_norm": 0.32707545161247253,
	"learning_rate": 3.391680000000001e-05,
	"loss": 0.8705,
	"step": 51920
	},
	{
	"epoch": 0.83104,
	"grad_norm": 0.22364617884159088,
	"learning_rate": 3.38528e-05,
	"loss": 0.9154,
	"step": 51940
	},
	{
	"epoch": 0.83136,
	"grad_norm": 0.23513180017471313,
	"learning_rate": 3.3788800000000004e-05,
	"loss": 0.863,
	"step": 51960
	},
	{
	"epoch": 0.83168,
	"grad_norm": 0.24189570546150208,
	"learning_rate": 3.37248e-05,
	"loss": 0.9223,
	"step": 51980
	},
	{
	"epoch": 0.832,
	"grad_norm": 0.2324758619070053,
	"learning_rate": 3.36608e-05,
	"loss": 0.8858,
	"step": 52000
	},
	{
	"epoch": 0.83232,
	"grad_norm": 0.21225537359714508,
	"learning_rate": 3.35968e-05,
	"loss": 0.8573,
	"step": 52020
	},
	{
	"epoch": 0.83264,
	"grad_norm": 0.2314990609884262,
	"learning_rate": 3.3532800000000005e-05,
	"loss": 0.9258,
	"step": 52040
	},
	{
	"epoch": 0.83296,
	"grad_norm": 0.22183631360530853,
	"learning_rate": 3.34688e-05,
	"loss": 0.8842,
	"step": 52060
	},
	{
	"epoch": 0.83328,
	"grad_norm": 0.23704120516777039,
	"learning_rate": 3.34048e-05,
	"loss": 0.8513,
	"step": 52080
	},
	{
	"epoch": 0.8336,
	"grad_norm": 0.21080462634563446,
	"learning_rate": 3.33408e-05,
	"loss": 0.8745,
	"step": 52100
	},
	{
	"epoch": 0.83392,
	"grad_norm": 0.25182968378067017,
	"learning_rate": 3.32768e-05,
	"loss": 0.8492,
	"step": 52120
	},
	{
	"epoch": 0.83424,
	"grad_norm": 0.22719983756542206,
	"learning_rate": 3.32128e-05,
	"loss": 0.8727,
	"step": 52140
	},
	{
	"epoch": 0.83456,
	"grad_norm": 0.36781367659568787,
	"learning_rate": 3.3148800000000004e-05,
	"loss": 0.9106,
	"step": 52160
	},
	{
	"epoch": 0.83488,
	"grad_norm": 0.23190808296203613,
	"learning_rate": 3.30848e-05,
	"loss": 0.8574,
	"step": 52180
	},
	{
	"epoch": 0.8352,
	"grad_norm": 0.19346857070922852,
	"learning_rate": 3.30208e-05,
	"loss": 0.8695,
	"step": 52200
	},
	{
	"epoch": 0.83552,
	"grad_norm": 0.21294453740119934,
	"learning_rate": 3.29568e-05,
	"loss": 0.8724,
	"step": 52220
	},
	{
	"epoch": 0.83584,
	"grad_norm": 0.18836593627929688,
	"learning_rate": 3.2892800000000005e-05,
	"loss": 0.8623,
	"step": 52240
	},
	{
	"epoch": 0.83616,
	"grad_norm": 0.21922753751277924,
	"learning_rate": 3.2828800000000007e-05,
	"loss": 0.8477,
	"step": 52260
	},
	{
	"epoch": 0.83648,
	"grad_norm": 0.2198483943939209,
	"learning_rate": 3.27648e-05,
	"loss": 0.9069,
	"step": 52280
	},
	{
	"epoch": 0.8368,
	"grad_norm": 0.20438086986541748,
	"learning_rate": 3.2700800000000004e-05,
	"loss": 0.8833,
	"step": 52300
	},
	{
	"epoch": 0.83712,
	"grad_norm": 0.2795603573322296,
	"learning_rate": 3.26368e-05,
	"loss": 0.9141,
	"step": 52320
	},
	{
	"epoch": 0.83744,
	"grad_norm": 0.22276397049427032,
	"learning_rate": 3.25728e-05,
	"loss": 0.8741,
	"step": 52340
	},
	{
	"epoch": 0.83776,
	"grad_norm": 0.21769918501377106,
	"learning_rate": 3.25088e-05,
	"loss": 0.88,
	"step": 52360
	},
	{
	"epoch": 0.83808,
	"grad_norm": 0.22383011877536774,
	"learning_rate": 3.2444800000000005e-05,
	"loss": 0.8504,
	"step": 52380
	},
	{
	"epoch": 0.8384,
	"grad_norm": 0.23771865665912628,
	"learning_rate": 3.23808e-05,
	"loss": 0.9323,
	"step": 52400
	},
	{
	"epoch": 0.83872,
	"grad_norm": 0.20924776792526245,
	"learning_rate": 3.23168e-05,
	"loss": 0.8668,
	"step": 52420
	},
	{
	"epoch": 0.83904,
	"grad_norm": 0.27309560775756836,
	"learning_rate": 3.22528e-05,
	"loss": 0.9336,
	"step": 52440
	},
	{
	"epoch": 0.83936,
	"grad_norm": 0.24357040226459503,
	"learning_rate": 3.21888e-05,
	"loss": 0.9199,
	"step": 52460
	},
	{
	"epoch": 0.83968,
	"grad_norm": 0.23052769899368286,
	"learning_rate": 3.21248e-05,
	"loss": 0.9209,
	"step": 52480
	},
	{
	"epoch": 0.84,
	"grad_norm": 0.23305866122245789,
	"learning_rate": 3.20608e-05,
	"loss": 0.9089,
	"step": 52500
	},
	{
	"epoch": 0.84032,
	"grad_norm": 0.21440325677394867,
	"learning_rate": 3.19968e-05,
	"loss": 0.8483,
	"step": 52520
	},
	{
	"epoch": 0.84064,
	"grad_norm": 0.18805745244026184,
	"learning_rate": 3.19328e-05,
	"loss": 0.8436,
	"step": 52540
	},
	{
	"epoch": 0.84096,
	"grad_norm": 0.24525205790996552,
	"learning_rate": 3.18688e-05,
	"loss": 0.8988,
	"step": 52560
	},
	{
	"epoch": 0.84128,
	"grad_norm": 0.3257310688495636,
	"learning_rate": 3.1804800000000004e-05,
	"loss": 0.8461,
	"step": 52580
	},
	{
	"epoch": 0.8416,
	"grad_norm": 0.21542541682720184,
	"learning_rate": 3.1740800000000006e-05,
	"loss": 0.908,
	"step": 52600
	},
	{
	"epoch": 0.84192,
	"grad_norm": 0.18117113411426544,
	"learning_rate": 3.16768e-05,
	"loss": 0.9024,
	"step": 52620
	},
	{
	"epoch": 0.84224,
	"grad_norm": 0.21288667619228363,
	"learning_rate": 3.16128e-05,
	"loss": 0.8981,
	"step": 52640
	},
	{
	"epoch": 0.84256,
	"grad_norm": 0.20366021990776062,
	"learning_rate": 3.15488e-05,
	"loss": 0.8251,
	"step": 52660
	},
	{
	"epoch": 0.84288,
	"grad_norm": 0.21234050393104553,
	"learning_rate": 3.14848e-05,
	"loss": 0.8548,
	"step": 52680
	},
	{
	"epoch": 0.8432,
	"grad_norm": 0.23660403490066528,
	"learning_rate": 3.14208e-05,
	"loss": 0.8707,
	"step": 52700
	},
	{
	"epoch": 0.84352,
	"grad_norm": 0.22134044766426086,
	"learning_rate": 3.1356800000000004e-05,
	"loss": 0.8742,
	"step": 52720
	},
	{
	"epoch": 0.84384,
	"grad_norm": 0.23551689088344574,
	"learning_rate": 3.12928e-05,
	"loss": 0.8716,
	"step": 52740
	},
	{
	"epoch": 0.84416,
	"grad_norm": 0.184543177485466,
	"learning_rate": 3.12288e-05,
	"loss": 0.8352,
	"step": 52760
	},
	{
	"epoch": 0.84448,
	"grad_norm": 0.2306751012802124,
	"learning_rate": 3.11648e-05,
	"loss": 0.9043,
	"step": 52780
	},
	{
	"epoch": 0.8448,
	"grad_norm": 0.2092065066099167,
	"learning_rate": 3.1100800000000006e-05,
	"loss": 0.9103,
	"step": 52800
	},
	{
	"epoch": 0.84512,
	"grad_norm": 0.1914425492286682,
	"learning_rate": 3.10368e-05,
	"loss": 0.8702,
	"step": 52820
	},
	{
	"epoch": 0.84544,
	"grad_norm": 0.24771615862846375,
	"learning_rate": 3.09728e-05,
	"loss": 0.8429,
	"step": 52840
	},
	{
	"epoch": 0.84576,
	"grad_norm": 0.1887243092060089,
	"learning_rate": 3.09088e-05,
	"loss": 0.8835,
	"step": 52860
	},
	{
	"epoch": 0.84608,
	"grad_norm": 0.2107224017381668,
	"learning_rate": 3.08448e-05,
	"loss": 0.8995,
	"step": 52880
	},
	{
	"epoch": 0.8464,
	"grad_norm": 0.2114105075597763,
	"learning_rate": 3.07808e-05,
	"loss": 0.8316,
	"step": 52900
	},
	{
	"epoch": 0.84672,
	"grad_norm": 0.22698649764060974,
	"learning_rate": 3.0716800000000004e-05,
	"loss": 0.917,
	"step": 52920
	},
	{
	"epoch": 0.84704,
	"grad_norm": 0.23728908598423004,
	"learning_rate": 3.0652800000000006e-05,
	"loss": 0.8641,
	"step": 52940
	},
	{
	"epoch": 0.84736,
	"grad_norm": 0.24342834949493408,
	"learning_rate": 3.05888e-05,
	"loss": 0.9067,
	"step": 52960
	},
	{
	"epoch": 0.84768,
	"grad_norm": 0.23248563706874847,
	"learning_rate": 3.05248e-05,
	"loss": 0.8918,
	"step": 52980
	},
	{
	"epoch": 0.848,
	"grad_norm": 0.22036781907081604,
	"learning_rate": 3.04608e-05,
	"loss": 0.8689,
	"step": 53000
	},
	{
	"epoch": 0.84832,
	"grad_norm": 0.21692028641700745,
	"learning_rate": 3.0396800000000003e-05,
	"loss": 0.8941,
	"step": 53020
	},
	{
	"epoch": 0.84864,
	"grad_norm": 0.21200671792030334,
	"learning_rate": 3.03328e-05,
	"loss": 0.8787,
	"step": 53040
	},
	{
	"epoch": 0.84896,
	"grad_norm": 0.31840503215789795,
	"learning_rate": 3.0268800000000004e-05,
	"loss": 0.9181,
	"step": 53060
	},
	{
	"epoch": 0.84928,
	"grad_norm": 0.2347812056541443,
	"learning_rate": 3.02048e-05,
	"loss": 0.885,
	"step": 53080
	},
	{
	"epoch": 0.8496,
	"grad_norm": 0.22629977762699127,
	"learning_rate": 3.01408e-05,
	"loss": 0.9208,
	"step": 53100
	},
	{
	"epoch": 0.84992,
	"grad_norm": 0.20610420405864716,
	"learning_rate": 3.00768e-05,
	"loss": 0.8698,
	"step": 53120
	},
	{
	"epoch": 0.85024,
	"grad_norm": 0.17655342817306519,
	"learning_rate": 3.0012800000000002e-05,
	"loss": 0.8767,
	"step": 53140
	},
	{
	"epoch": 0.85056,
	"grad_norm": 0.2303926944732666,
	"learning_rate": 2.99488e-05,
	"loss": 0.9493,
	"step": 53160
	},
	{
	"epoch": 0.85088,
	"grad_norm": 0.20119711756706238,
	"learning_rate": 2.9884800000000002e-05,
	"loss": 0.8769,
	"step": 53180
	},
	{
	"epoch": 0.8512,
	"grad_norm": 0.2533680200576782,
	"learning_rate": 2.9820799999999997e-05,
	"loss": 0.9568,
	"step": 53200
	},
	{
	"epoch": 0.85152,
	"grad_norm": 0.2610546350479126,
	"learning_rate": 2.9756800000000003e-05,
	"loss": 0.9215,
	"step": 53220
	},
	{
	"epoch": 0.85184,
	"grad_norm": 0.2684386074542999,
	"learning_rate": 2.9692800000000005e-05,
	"loss": 0.8739,
	"step": 53240
	},
	{
	"epoch": 0.85216,
	"grad_norm": 0.2054203301668167,
	"learning_rate": 2.96288e-05,
	"loss": 0.8909,
	"step": 53260
	},
	{
	"epoch": 0.85248,
	"grad_norm": 0.24058941006660461,
	"learning_rate": 2.9564800000000002e-05,
	"loss": 0.8314,
	"step": 53280
	},
	{
	"epoch": 0.8528,
	"grad_norm": 0.2767840325832367,
	"learning_rate": 2.95008e-05,
	"loss": 0.8588,
	"step": 53300
	},
	{
	"epoch": 0.85312,
	"grad_norm": 0.1769871711730957,
	"learning_rate": 2.9436800000000002e-05,
	"loss": 0.8978,
	"step": 53320
	},
	{
	"epoch": 0.85344,
	"grad_norm": 0.23680894076824188,
	"learning_rate": 2.93728e-05,
	"loss": 0.879,
	"step": 53340
	},
	{
	"epoch": 0.85376,
	"grad_norm": 0.2106921374797821,
	"learning_rate": 2.9308800000000003e-05,
	"loss": 0.8792,
	"step": 53360
	},
	{
	"epoch": 0.85408,
	"grad_norm": 0.20546332001686096,
	"learning_rate": 2.9248000000000002e-05,
	"loss": 0.8953,
	"step": 53380
	},
	{
	"epoch": 0.8544,
	"grad_norm": 0.2114826738834381,
	"learning_rate": 2.9184e-05,
	"loss": 0.889,
	"step": 53400
	},
	{
	"epoch": 0.85472,
	"grad_norm": 0.2232695072889328,
	"learning_rate": 2.9120000000000002e-05,
	"loss": 0.8441,
	"step": 53420
	},
	{
	"epoch": 0.85504,
	"grad_norm": 0.25499632954597473,
	"learning_rate": 2.9056e-05,
	"loss": 0.9132,
	"step": 53440
	},
	{
	"epoch": 0.85536,
	"grad_norm": 0.22726675868034363,
	"learning_rate": 2.8992000000000003e-05,
	"loss": 0.8905,
	"step": 53460
	},
	{
	"epoch": 0.85568,
	"grad_norm": 0.18875838816165924,
	"learning_rate": 2.8927999999999998e-05,
	"loss": 0.8665,
	"step": 53480
	},
	{
	"epoch": 0.856,
	"grad_norm": 0.23504668474197388,
	"learning_rate": 2.8864000000000004e-05,
	"loss": 0.9092,
	"step": 53500
	},
	{
	"epoch": 0.85632,
	"grad_norm": 0.23671472072601318,
	"learning_rate": 2.88e-05,
	"loss": 0.8858,
	"step": 53520
	},
	{
	"epoch": 0.85664,
	"grad_norm": 0.22494633495807648,
	"learning_rate": 2.8736e-05,
	"loss": 0.8988,
	"step": 53540
	},
	{
	"epoch": 0.85696,
	"grad_norm": 0.20699791610240936,
	"learning_rate": 2.8672e-05,
	"loss": 0.8523,
	"step": 53560
	},
	{
	"epoch": 0.85728,
	"grad_norm": 0.24414889514446259,
	"learning_rate": 2.8608e-05,
	"loss": 0.8734,
	"step": 53580
	},
	{
	"epoch": 0.8576,
	"grad_norm": 0.23562034964561462,
	"learning_rate": 2.8544000000000003e-05,
	"loss": 0.8349,
	"step": 53600
	},
	{
	"epoch": 0.85792,
	"grad_norm": 0.18858417868614197,
	"learning_rate": 2.8480000000000002e-05,
	"loss": 0.8784,
	"step": 53620
	},
	{
	"epoch": 0.85824,
	"grad_norm": 0.23060615360736847,
	"learning_rate": 2.8416000000000004e-05,
	"loss": 0.8874,
	"step": 53640
	},
	{
	"epoch": 0.85856,
	"grad_norm": 0.2229340374469757,
	"learning_rate": 2.8352000000000002e-05,
	"loss": 0.8772,
	"step": 53660
	},
	{
	"epoch": 0.85888,
	"grad_norm": 0.23819968104362488,
	"learning_rate": 2.8288000000000004e-05,
	"loss": 0.9138,
	"step": 53680
	},
	{
	"epoch": 0.8592,
	"grad_norm": 0.22570669651031494,
	"learning_rate": 2.8224e-05,
	"loss": 0.8543,
	"step": 53700
	},
	{
	"epoch": 0.85952,
	"grad_norm": 0.1779906302690506,
	"learning_rate": 2.816e-05,
	"loss": 0.8453,
	"step": 53720
	},
	{
	"epoch": 0.85984,
	"grad_norm": 0.18111290037631989,
	"learning_rate": 2.8096e-05,
	"loss": 0.8571,
	"step": 53740
	},
	{
	"epoch": 0.86016,
	"grad_norm": 0.23945818841457367,
	"learning_rate": 2.8032000000000002e-05,
	"loss": 0.9347,
	"step": 53760
	},
	{
	"epoch": 0.86048,
	"grad_norm": 0.2050149291753769,
	"learning_rate": 2.7968e-05,
	"loss": 0.8871,
	"step": 53780
	},
	{
	"epoch": 0.8608,
	"grad_norm": 0.20724190771579742,
	"learning_rate": 2.7904000000000003e-05,
	"loss": 0.8506,
	"step": 53800
	},
	{
	"epoch": 0.86112,
	"grad_norm": 0.21259485185146332,
	"learning_rate": 2.7839999999999998e-05,
	"loss": 0.8642,
	"step": 53820
	},
	{
	"epoch": 0.86144,
	"grad_norm": 0.2348015457391739,
	"learning_rate": 2.7776000000000003e-05,
	"loss": 0.8883,
	"step": 53840
	},
	{
	"epoch": 0.86176,
	"grad_norm": 0.2268034815788269,
	"learning_rate": 2.7711999999999998e-05,
	"loss": 0.842,
	"step": 53860
	},
	{
	"epoch": 0.86208,
	"grad_norm": 0.20903776586055756,
	"learning_rate": 2.7648e-05,
	"loss": 0.898,
	"step": 53880
	},
	{
	"epoch": 0.8624,
	"grad_norm": 0.2246081382036209,
	"learning_rate": 2.7584e-05,
	"loss": 0.9053,
	"step": 53900
	},
	{
	"epoch": 0.86272,
	"grad_norm": 0.20664174854755402,
	"learning_rate": 2.752e-05,
	"loss": 0.9123,
	"step": 53920
	},
	{
	"epoch": 0.86304,
	"grad_norm": 0.199870765209198,
	"learning_rate": 2.7456000000000003e-05,
	"loss": 0.8782,
	"step": 53940
	},
	{
	"epoch": 0.86336,
	"grad_norm": 0.2337988168001175,
	"learning_rate": 2.7392e-05,
	"loss": 0.8961,
	"step": 53960
	},
	{
	"epoch": 0.86368,
	"grad_norm": 0.24039073288440704,
	"learning_rate": 2.7328000000000003e-05,
	"loss": 0.9463,
	"step": 53980
	},
	{
	"epoch": 0.864,
	"grad_norm": 0.21727830171585083,
	"learning_rate": 2.7264000000000002e-05,
	"loss": 0.8852,
	"step": 54000
	},
	{
	"epoch": 0.86432,
	"grad_norm": 0.25286540389060974,
	"learning_rate": 2.7200000000000004e-05,
	"loss": 0.8877,
	"step": 54020
	},
	{
	"epoch": 0.86464,
	"grad_norm": 0.2132069319486618,
	"learning_rate": 2.7136e-05,
	"loss": 0.8378,
	"step": 54040
	},
	{
	"epoch": 0.86496,
	"grad_norm": 0.19726885855197906,
	"learning_rate": 2.7072000000000004e-05,
	"loss": 0.8827,
	"step": 54060
	},
	{
	"epoch": 0.86528,
	"grad_norm": 0.19055317342281342,
	"learning_rate": 2.7008e-05,
	"loss": 0.845,
	"step": 54080
	},
	{
	"epoch": 0.8656,
	"grad_norm": 0.21198777854442596,
	"learning_rate": 2.6944e-05,
	"loss": 0.8468,
	"step": 54100
	},
	{
	"epoch": 0.86592,
	"grad_norm": 0.1932942420244217,
	"learning_rate": 2.688e-05,
	"loss": 0.8163,
	"step": 54120
	},
	{
	"epoch": 0.86624,
	"grad_norm": 0.21990489959716797,
	"learning_rate": 2.6816000000000002e-05,
	"loss": 0.8721,
	"step": 54140
	},
	{
	"epoch": 0.86656,
	"grad_norm": 0.2552103102207184,
	"learning_rate": 2.6752e-05,
	"loss": 0.9031,
	"step": 54160
	},
	{
	"epoch": 0.86688,
	"grad_norm": 0.20564943552017212,
	"learning_rate": 2.6688000000000003e-05,
	"loss": 0.8891,
	"step": 54180
	},
	{
	"epoch": 0.8672,
	"grad_norm": 0.21163496375083923,
	"learning_rate": 2.6623999999999998e-05,
	"loss": 0.8529,
	"step": 54200
	},
	{
	"epoch": 0.86752,
	"grad_norm": 0.19944727420806885,
	"learning_rate": 2.6560000000000003e-05,
	"loss": 0.8493,
	"step": 54220
	},
	{
	"epoch": 0.86784,
	"grad_norm": 0.20173463225364685,
	"learning_rate": 2.6496e-05,
	"loss": 0.8716,
	"step": 54240
	},
	{
	"epoch": 0.86816,
	"grad_norm": 0.23707176744937897,
	"learning_rate": 2.6432e-05,
	"loss": 0.9124,
	"step": 54260
	},
	{
	"epoch": 0.86848,
	"grad_norm": 0.24114908277988434,
	"learning_rate": 2.6368000000000002e-05,
	"loss": 0.8897,
	"step": 54280
	},
	{
	"epoch": 0.8688,
	"grad_norm": 0.2588618993759155,
	"learning_rate": 2.6304e-05,
	"loss": 0.8567,
	"step": 54300
	},
	{
	"epoch": 0.86912,
	"grad_norm": 0.21841663122177124,
	"learning_rate": 2.6240000000000003e-05,
	"loss": 0.8804,
	"step": 54320
	},
	{
	"epoch": 0.86944,
	"grad_norm": 0.20472241938114166,
	"learning_rate": 2.6176e-05,
	"loss": 0.8423,
	"step": 54340
	},
	{
	"epoch": 0.86976,
	"grad_norm": 0.21747057139873505,
	"learning_rate": 2.6112000000000003e-05,
	"loss": 0.8352,
	"step": 54360
	},
	{
	"epoch": 0.87008,
	"grad_norm": 0.26556268334388733,
	"learning_rate": 2.6048e-05,
	"loss": 0.8188,
	"step": 54380
	},
	{
	"epoch": 0.8704,
	"grad_norm": 0.20089897513389587,
	"learning_rate": 2.5984000000000004e-05,
	"loss": 0.8916,
	"step": 54400
	},
	{
	"epoch": 0.87072,
	"grad_norm": 0.24367238581180573,
	"learning_rate": 2.592e-05,
	"loss": 0.9158,
	"step": 54420
	},
	{
	"epoch": 0.87104,
	"grad_norm": 0.24030858278274536,
	"learning_rate": 2.5856e-05,
	"loss": 0.9347,
	"step": 54440
	},
	{
	"epoch": 0.87136,
	"grad_norm": 0.22043825685977936,
	"learning_rate": 2.5792e-05,
	"loss": 0.8796,
	"step": 54460
	},
	{
	"epoch": 0.87168,
	"grad_norm": 0.243258535861969,
	"learning_rate": 2.5728e-05,
	"loss": 0.9063,
	"step": 54480
	},
	{
	"epoch": 0.872,
	"grad_norm": 0.2250966578722,
	"learning_rate": 2.5664e-05,
	"loss": 0.8686,
	"step": 54500
	},
	{
	"epoch": 0.87232,
	"grad_norm": 0.24469095468521118,
	"learning_rate": 2.5600000000000002e-05,
	"loss": 0.8392,
	"step": 54520
	},
	{
	"epoch": 0.87264,
	"grad_norm": 0.2351893037557602,
	"learning_rate": 2.5535999999999997e-05,
	"loss": 0.861,
	"step": 54540
	},
	{
	"epoch": 0.87296,
	"grad_norm": 0.22012612223625183,
	"learning_rate": 2.5472000000000003e-05,
	"loss": 0.8925,
	"step": 54560
	},
	{
	"epoch": 0.87328,
	"grad_norm": 0.22451180219650269,
	"learning_rate": 2.5407999999999998e-05,
	"loss": 0.8572,
	"step": 54580
	},
	{
	"epoch": 0.8736,
	"grad_norm": 0.2381567507982254,
	"learning_rate": 2.5344e-05,
	"loss": 0.9039,
	"step": 54600
	},
	{
	"epoch": 0.87392,
	"grad_norm": 0.2319832742214203,
	"learning_rate": 2.5280000000000005e-05,
	"loss": 0.898,
	"step": 54620
	},
	{
	"epoch": 0.87424,
	"grad_norm": 0.24306103587150574,
	"learning_rate": 2.5216e-05,
	"loss": 0.8999,
	"step": 54640
	},
	{
	"epoch": 0.87456,
	"grad_norm": 0.22463731467723846,
	"learning_rate": 2.5152000000000002e-05,
	"loss": 0.8828,
	"step": 54660
	},
	{
	"epoch": 0.87488,
	"grad_norm": 0.2395946979522705,
	"learning_rate": 2.5088e-05,
	"loss": 0.898,
	"step": 54680
	},
	{
	"epoch": 0.8752,
	"grad_norm": 0.22378131747245789,
	"learning_rate": 2.5024000000000003e-05,
	"loss": 0.8967,
	"step": 54700
	},
	{
	"epoch": 0.87552,
	"grad_norm": 0.24939024448394775,
	"learning_rate": 2.496e-05,
	"loss": 0.8861,
	"step": 54720
	},
	{
	"epoch": 0.87584,
	"grad_norm": 0.23700961470603943,
	"learning_rate": 2.4896e-05,
	"loss": 0.9274,
	"step": 54740
	},
	{
	"epoch": 0.87616,
	"grad_norm": 0.2053232043981552,
	"learning_rate": 2.4832000000000002e-05,
	"loss": 0.8325,
	"step": 54760
	},
	{
	"epoch": 0.87648,
	"grad_norm": 0.20047616958618164,
	"learning_rate": 2.4768e-05,
	"loss": 0.9201,
	"step": 54780
	},
	{
	"epoch": 0.8768,
	"grad_norm": 0.30408886075019836,
	"learning_rate": 2.4704000000000003e-05,
	"loss": 0.8911,
	"step": 54800
	},
	{
	"epoch": 0.87712,
	"grad_norm": 0.20753300189971924,
	"learning_rate": 2.464e-05,
	"loss": 0.8867,
	"step": 54820
	},
	{
	"epoch": 0.87744,
	"grad_norm": 0.2093687355518341,
	"learning_rate": 2.4576000000000003e-05,
	"loss": 0.8762,
	"step": 54840
	},
	{
	"epoch": 0.87776,
	"grad_norm": 0.226734921336174,
	"learning_rate": 2.4512000000000002e-05,
	"loss": 0.8893,
	"step": 54860
	},
	{
	"epoch": 0.87808,
	"grad_norm": 0.21793030202388763,
	"learning_rate": 2.4448e-05,
	"loss": 0.8665,
	"step": 54880
	},
	{
	"epoch": 0.8784,
	"grad_norm": 0.2046462595462799,
	"learning_rate": 2.4384000000000002e-05,
	"loss": 0.8999,
	"step": 54900
	},
	{
	"epoch": 0.87872,
	"grad_norm": 0.2325984686613083,
	"learning_rate": 2.432e-05,
	"loss": 0.9291,
	"step": 54920
	},
	{
	"epoch": 0.87904,
	"grad_norm": 0.20662260055541992,
	"learning_rate": 2.4256e-05,
	"loss": 0.8944,
	"step": 54940
	},
	{
	"epoch": 0.87936,
	"grad_norm": 0.2617480158805847,
	"learning_rate": 2.4192e-05,
	"loss": 0.9573,
	"step": 54960
	},
	{
	"epoch": 0.87968,
	"grad_norm": 0.18719272315502167,
	"learning_rate": 2.4128e-05,
	"loss": 0.8767,
	"step": 54980
	},
	{
	"epoch": 0.88,
	"grad_norm": 0.27199268341064453,
	"learning_rate": 2.4064000000000002e-05,
	"loss": 0.9066,
	"step": 55000
	},
	{
	"epoch": 0.88032,
	"grad_norm": 0.21330726146697998,
	"learning_rate": 2.4e-05,
	"loss": 0.8594,
	"step": 55020
	},
	{
	"epoch": 0.88064,
	"grad_norm": 0.1909005492925644,
	"learning_rate": 2.3936e-05,
	"loss": 0.8807,
	"step": 55040
	},
	{
	"epoch": 0.88096,
	"grad_norm": 0.21157494187355042,
	"learning_rate": 2.3872e-05,
	"loss": 0.9019,
	"step": 55060
	},
	{
	"epoch": 0.88128,
	"grad_norm": 0.19137850403785706,
	"learning_rate": 2.3808000000000003e-05,
	"loss": 0.8873,
	"step": 55080
	},
	{
	"epoch": 0.8816,
	"grad_norm": 0.17469841241836548,
	"learning_rate": 2.3744000000000002e-05,
	"loss": 0.8225,
	"step": 55100
	},
	{
	"epoch": 0.88192,
	"grad_norm": 0.21227526664733887,
	"learning_rate": 2.3680000000000004e-05,
	"loss": 0.9273,
	"step": 55120
	},
	{
	"epoch": 0.88224,
	"grad_norm": 0.22155140340328217,
	"learning_rate": 2.3616000000000002e-05,
	"loss": 0.8716,
	"step": 55140
	},
	{
	"epoch": 0.88256,
	"grad_norm": 0.2392839938402176,
	"learning_rate": 2.3552e-05,
	"loss": 0.8905,
	"step": 55160
	},
	{
	"epoch": 0.88288,
	"grad_norm": 0.2480785995721817,
	"learning_rate": 2.3488000000000003e-05,
	"loss": 0.8906,
	"step": 55180
	},
	{
	"epoch": 0.8832,
	"grad_norm": 0.23919089138507843,
	"learning_rate": 2.3424e-05,
	"loss": 0.8912,
	"step": 55200
	},
	{
	"epoch": 0.88352,
	"grad_norm": 0.2384289801120758,
	"learning_rate": 2.336e-05,
	"loss": 0.8556,
	"step": 55220
	},
	{
	"epoch": 0.88384,
	"grad_norm": 0.23314107954502106,
	"learning_rate": 2.3296000000000002e-05,
	"loss": 0.8638,
	"step": 55240
	},
	{
	"epoch": 0.88416,
	"grad_norm": 0.20176903903484344,
	"learning_rate": 2.3232e-05,
	"loss": 0.8767,
	"step": 55260
	},
	{
	"epoch": 0.88448,
	"grad_norm": 0.23449799418449402,
	"learning_rate": 2.3168000000000002e-05,
	"loss": 0.8811,
	"step": 55280
	},
	{
	"epoch": 0.8848,
	"grad_norm": 0.2195407897233963,
	"learning_rate": 2.3104e-05,
	"loss": 0.8956,
	"step": 55300
	},
	{
	"epoch": 0.88512,
	"grad_norm": 0.2260621041059494,
	"learning_rate": 2.304e-05,
	"loss": 0.8781,
	"step": 55320
	},
	{
	"epoch": 0.88544,
	"grad_norm": 0.21167124807834625,
	"learning_rate": 2.2976e-05,
	"loss": 0.8251,
	"step": 55340
	},
	{
	"epoch": 0.88576,
	"grad_norm": 0.219034343957901,
	"learning_rate": 2.2912e-05,
	"loss": 0.8413,
	"step": 55360
	},
	{
	"epoch": 0.88608,
	"grad_norm": 0.24565385282039642,
	"learning_rate": 2.2848e-05,
	"loss": 0.9033,
	"step": 55380
	},
	{
	"epoch": 0.8864,
	"grad_norm": 0.2161465734243393,
	"learning_rate": 2.2784e-05,
	"loss": 0.884,
	"step": 55400
	},
	{
	"epoch": 0.88672,
	"grad_norm": 0.2424006164073944,
	"learning_rate": 2.2720000000000003e-05,
	"loss": 0.9424,
	"step": 55420
	},
	{
	"epoch": 0.88704,
	"grad_norm": 0.2734192907810211,
	"learning_rate": 2.2656e-05,
	"loss": 0.9217,
	"step": 55440
	},
	{
	"epoch": 0.88736,
	"grad_norm": 0.2107728272676468,
	"learning_rate": 2.2592000000000003e-05,
	"loss": 0.8789,
	"step": 55460
	},
	{
	"epoch": 0.88768,
	"grad_norm": 0.2616425156593323,
	"learning_rate": 2.2528000000000002e-05,
	"loss": 0.918,
	"step": 55480
	},
	{
	"epoch": 0.888,
	"grad_norm": 0.23145994544029236,
	"learning_rate": 2.2464e-05,
	"loss": 0.8626,
	"step": 55500
	},
	{
	"epoch": 0.88832,
	"grad_norm": 0.22918421030044556,
	"learning_rate": 2.2400000000000002e-05,
	"loss": 0.8656,
	"step": 55520
	},
	{
	"epoch": 0.88864,
	"grad_norm": 0.2246193289756775,
	"learning_rate": 2.2336e-05,
	"loss": 0.8569,
	"step": 55540
	},
	{
	"epoch": 0.88896,
	"grad_norm": 0.23850585520267487,
	"learning_rate": 2.2272e-05,
	"loss": 0.8612,
	"step": 55560
	},
	{
	"epoch": 0.88928,
	"grad_norm": 0.21243111789226532,
	"learning_rate": 2.2208e-05,
	"loss": 0.8612,
	"step": 55580
	},
	{
	"epoch": 0.8896,
	"grad_norm": 0.1799386590719223,
	"learning_rate": 2.2144e-05,
	"loss": 0.8601,
	"step": 55600
	},
	{
	"epoch": 0.88992,
	"grad_norm": 0.20039919018745422,
	"learning_rate": 2.2080000000000002e-05,
	"loss": 0.8781,
	"step": 55620
	},
	{
	"epoch": 0.89024,
	"grad_norm": 0.20583823323249817,
	"learning_rate": 2.2016e-05,
	"loss": 0.8552,
	"step": 55640
	},
	{
	"epoch": 0.89056,
	"grad_norm": 0.25878387689590454,
	"learning_rate": 2.1952e-05,
	"loss": 0.8694,
	"step": 55660
	},
	{
	"epoch": 0.89088,
	"grad_norm": 0.26383817195892334,
	"learning_rate": 2.1888e-05,
	"loss": 0.916,
	"step": 55680
	},
	{
	"epoch": 0.8912,
	"grad_norm": 0.2270156741142273,
	"learning_rate": 2.1824e-05,
	"loss": 0.9219,
	"step": 55700
	},
	{
	"epoch": 0.89152,
	"grad_norm": 0.22747768461704254,
	"learning_rate": 2.176e-05,
	"loss": 0.9066,
	"step": 55720
	},
	{
	"epoch": 0.89184,
	"grad_norm": 0.2369069904088974,
	"learning_rate": 2.1696e-05,
	"loss": 0.9127,
	"step": 55740
	},
	{
	"epoch": 0.89216,
	"grad_norm": 0.22584250569343567,
	"learning_rate": 2.1632000000000002e-05,
	"loss": 0.8571,
	"step": 55760
	},
	{
	"epoch": 0.89248,
	"grad_norm": 0.2100561112165451,
	"learning_rate": 2.1568e-05,
	"loss": 0.8911,
	"step": 55780
	},
	{
	"epoch": 0.8928,
	"grad_norm": 0.24614600837230682,
	"learning_rate": 2.1504000000000003e-05,
	"loss": 0.8826,
	"step": 55800
	},
	{
	"epoch": 0.89312,
	"grad_norm": 0.25491416454315186,
	"learning_rate": 2.144e-05,
	"loss": 0.8619,
	"step": 55820
	},
	{
	"epoch": 0.89344,
	"grad_norm": 0.22057655453681946,
	"learning_rate": 2.1376e-05,
	"loss": 0.8607,
	"step": 55840
	},
	{
	"epoch": 0.89376,
	"grad_norm": 0.2257709503173828,
	"learning_rate": 2.1312000000000002e-05,
	"loss": 0.8979,
	"step": 55860
	},
	{
	"epoch": 0.89408,
	"grad_norm": 0.20789384841918945,
	"learning_rate": 2.1248e-05,
	"loss": 0.8385,
	"step": 55880
	},
	{
	"epoch": 0.8944,
	"grad_norm": 0.22959135472774506,
	"learning_rate": 2.1184000000000002e-05,
	"loss": 0.866,
	"step": 55900
	},
	{
	"epoch": 0.89472,
	"grad_norm": 0.21270857751369476,
	"learning_rate": 2.112e-05,
	"loss": 0.8747,
	"step": 55920
	},
	{
	"epoch": 0.89504,
	"grad_norm": 0.22662252187728882,
	"learning_rate": 2.1056e-05,
	"loss": 0.8836,
	"step": 55940
	},
	{
	"epoch": 0.89536,
	"grad_norm": 0.22201156616210938,
	"learning_rate": 2.0992e-05,
	"loss": 0.924,
	"step": 55960
	},
	{
	"epoch": 0.89568,
	"grad_norm": 0.21177421510219574,
	"learning_rate": 2.0928e-05,
	"loss": 0.8794,
	"step": 55980
	},
	{
	"epoch": 0.896,
	"grad_norm": 0.23896725475788116,
	"learning_rate": 2.0864e-05,
	"loss": 0.8967,
	"step": 56000
	},
	{
	"epoch": 0.89632,
	"grad_norm": 0.21801073849201202,
	"learning_rate": 2.08e-05,
	"loss": 0.9033,
	"step": 56020
	},
	{
	"epoch": 0.89664,
	"grad_norm": 0.19565339386463165,
	"learning_rate": 2.0736e-05,
	"loss": 0.8857,
	"step": 56040
	},
	{
	"epoch": 0.89696,
	"grad_norm": 0.21768838167190552,
	"learning_rate": 2.0672e-05,
	"loss": 0.8463,
	"step": 56060
	},
	{
	"epoch": 0.89728,
	"grad_norm": 0.20696651935577393,
	"learning_rate": 2.0608000000000003e-05,
	"loss": 0.8725,
	"step": 56080
	},
	{
	"epoch": 0.8976,
	"grad_norm": 0.2298385500907898,
	"learning_rate": 2.0544000000000002e-05,
	"loss": 0.902,
	"step": 56100
	},
	{
	"epoch": 0.89792,
	"grad_norm": 0.22712761163711548,
	"learning_rate": 2.048e-05,
	"loss": 0.8109,
	"step": 56120
	},
	{
	"epoch": 0.89824,
	"grad_norm": 0.23746636509895325,
	"learning_rate": 2.0416000000000002e-05,
	"loss": 0.9398,
	"step": 56140
	},
	{
	"epoch": 0.89856,
	"grad_norm": 0.2161879539489746,
	"learning_rate": 2.0352e-05,
	"loss": 0.9345,
	"step": 56160
	},
	{
	"epoch": 0.89888,
	"grad_norm": 0.18641656637191772,
	"learning_rate": 2.0288000000000003e-05,
	"loss": 0.8868,
	"step": 56180
	},
	{
	"epoch": 0.8992,
	"grad_norm": 0.21114569902420044,
	"learning_rate": 2.0224e-05,
	"loss": 0.8638,
	"step": 56200
	},
	{
	"epoch": 0.89952,
	"grad_norm": 0.2475218027830124,
	"learning_rate": 2.016e-05,
	"loss": 0.8695,
	"step": 56220
	},
	{
	"epoch": 0.89984,
	"grad_norm": 0.2249087691307068,
	"learning_rate": 2.0096000000000002e-05,
	"loss": 0.8671,
	"step": 56240
	},
	{
	"epoch": 0.90016,
	"grad_norm": 0.23106272518634796,
	"learning_rate": 2.0032e-05,
	"loss": 0.8591,
	"step": 56260
	},
	{
	"epoch": 0.90048,
	"grad_norm": 0.23417605459690094,
	"learning_rate": 1.9968e-05,
	"loss": 0.8675,
	"step": 56280
	},
	{
	"epoch": 0.9008,
	"grad_norm": 0.25275781750679016,
	"learning_rate": 1.9904e-05,
	"loss": 0.9146,
	"step": 56300
	},
	{
	"epoch": 0.90112,
	"grad_norm": 0.2217005491256714,
	"learning_rate": 1.984e-05,
	"loss": 0.9167,
	"step": 56320
	},
	{
	"epoch": 0.90144,
	"grad_norm": 0.2137831449508667,
	"learning_rate": 1.9776000000000002e-05,
	"loss": 0.8699,
	"step": 56340
	},
	{
	"epoch": 0.90176,
	"grad_norm": 0.2010769098997116,
	"learning_rate": 1.9712e-05,
	"loss": 0.8862,
	"step": 56360
	},
	{
	"epoch": 0.90208,
	"grad_norm": 0.23932960629463196,
	"learning_rate": 1.9648e-05,
	"loss": 0.866,
	"step": 56380
	},
	{
	"epoch": 0.9024,
	"grad_norm": 0.22078342735767365,
	"learning_rate": 1.9584e-05,
	"loss": 0.914,
	"step": 56400
	},
	{
	"epoch": 0.90272,
	"grad_norm": 0.2117830365896225,
	"learning_rate": 1.9520000000000003e-05,
	"loss": 0.8706,
	"step": 56420
	},
	{
	"epoch": 0.90304,
	"grad_norm": 0.22050690650939941,
	"learning_rate": 1.9456e-05,
	"loss": 0.8842,
	"step": 56440
	},
	{
	"epoch": 0.90336,
	"grad_norm": 0.21533265709877014,
	"learning_rate": 1.9392000000000003e-05,
	"loss": 0.8478,
	"step": 56460
	},
	{
	"epoch": 0.90368,
	"grad_norm": 0.23256616294384003,
	"learning_rate": 1.9328000000000002e-05,
	"loss": 0.8912,
	"step": 56480
	},
	{
	"epoch": 0.904,
	"grad_norm": 0.24610291421413422,
	"learning_rate": 1.9264e-05,
	"loss": 0.8493,
	"step": 56500
	},
	{
	"epoch": 0.90432,
	"grad_norm": 0.2145715057849884,
	"learning_rate": 1.9200000000000003e-05,
	"loss": 0.8626,
	"step": 56520
	},
	{
	"epoch": 0.90464,
	"grad_norm": 0.21198943257331848,
	"learning_rate": 1.91392e-05,
	"loss": 0.9013,
	"step": 56540
	},
	{
	"epoch": 0.90496,
	"grad_norm": 0.2638920843601227,
	"learning_rate": 1.90752e-05,
	"loss": 0.9176,
	"step": 56560
	},
	{
	"epoch": 0.90528,
	"grad_norm": 0.17651067674160004,
	"learning_rate": 1.9011200000000002e-05,
	"loss": 0.9293,
	"step": 56580
	},
	{
	"epoch": 0.9056,
	"grad_norm": 0.20234732329845428,
	"learning_rate": 1.89472e-05,
	"loss": 0.8781,
	"step": 56600
	},
	{
	"epoch": 0.90592,
	"grad_norm": 0.2263152003288269,
	"learning_rate": 1.8883200000000002e-05,
	"loss": 0.9071,
	"step": 56620
	},
	{
	"epoch": 0.90624,
	"grad_norm": 0.2451239675283432,
	"learning_rate": 1.88192e-05,
	"loss": 0.9174,
	"step": 56640
	},
	{
	"epoch": 0.90656,
	"grad_norm": 0.22056809067726135,
	"learning_rate": 1.87552e-05,
	"loss": 0.9132,
	"step": 56660
	},
	{
	"epoch": 0.90688,
	"grad_norm": 0.2163904458284378,
	"learning_rate": 1.86912e-05,
	"loss": 0.9208,
	"step": 56680
	},
	{
	"epoch": 0.9072,
	"grad_norm": 0.23965485394001007,
	"learning_rate": 1.86272e-05,
	"loss": 0.8778,
	"step": 56700
	},
	{
	"epoch": 0.90752,
	"grad_norm": 0.2319948375225067,
	"learning_rate": 1.85632e-05,
	"loss": 0.8673,
	"step": 56720
	},
	{
	"epoch": 0.90784,
	"grad_norm": 0.17709515988826752,
	"learning_rate": 1.84992e-05,
	"loss": 0.8635,
	"step": 56740
	},
	{
	"epoch": 0.90816,
	"grad_norm": 0.22696132957935333,
	"learning_rate": 1.84352e-05,
	"loss": 0.8179,
	"step": 56760
	},
	{
	"epoch": 0.90848,
	"grad_norm": 0.25801581144332886,
	"learning_rate": 1.8371199999999998e-05,
	"loss": 0.9357,
	"step": 56780
	},
	{
	"epoch": 0.9088,
	"grad_norm": 0.20928123593330383,
	"learning_rate": 1.8307200000000003e-05,
	"loss": 0.8228,
	"step": 56800
	},
	{
	"epoch": 0.90912,
	"grad_norm": 0.18093565106391907,
	"learning_rate": 1.8243200000000002e-05,
	"loss": 0.8613,
	"step": 56820
	},
	{
	"epoch": 0.90944,
	"grad_norm": 0.21541033685207367,
	"learning_rate": 1.81792e-05,
	"loss": 0.8624,
	"step": 56840
	},
	{
	"epoch": 0.90976,
	"grad_norm": 0.1991436630487442,
	"learning_rate": 1.8115200000000002e-05,
	"loss": 0.8876,
	"step": 56860
	},
	{
	"epoch": 0.91008,
	"grad_norm": 0.2240942418575287,
	"learning_rate": 1.80512e-05,
	"loss": 0.9089,
	"step": 56880
	},
	{
	"epoch": 0.9104,
	"grad_norm": 0.2239503711462021,
	"learning_rate": 1.7987200000000003e-05,
	"loss": 0.8968,
	"step": 56900
	},
	{
	"epoch": 0.91072,
	"grad_norm": 0.20320284366607666,
	"learning_rate": 1.79232e-05,
	"loss": 0.8932,
	"step": 56920
	},
	{
	"epoch": 0.91104,
	"grad_norm": 0.2082601636648178,
	"learning_rate": 1.78592e-05,
	"loss": 0.8576,
	"step": 56940
	},
	{
	"epoch": 0.91136,
	"grad_norm": 0.24213466048240662,
	"learning_rate": 1.7795200000000002e-05,
	"loss": 0.9186,
	"step": 56960
	},
	{
	"epoch": 0.91168,
	"grad_norm": 0.21737167239189148,
	"learning_rate": 1.77312e-05,
	"loss": 0.8754,
	"step": 56980
	},
	{
	"epoch": 0.912,
	"grad_norm": 0.22130608558654785,
	"learning_rate": 1.76672e-05,
	"loss": 0.8952,
	"step": 57000
	},
	{
	"epoch": 0.91232,
	"grad_norm": 0.24348904192447662,
	"learning_rate": 1.76032e-05,
	"loss": 0.8855,
	"step": 57020
	},
	{
	"epoch": 0.91264,
	"grad_norm": 0.2024615854024887,
	"learning_rate": 1.75392e-05,
	"loss": 0.8279,
	"step": 57040
	},
	{
	"epoch": 0.91296,
	"grad_norm": 0.23168401420116425,
	"learning_rate": 1.7475199999999998e-05,
	"loss": 0.833,
	"step": 57060
	},
	{
	"epoch": 0.91328,
	"grad_norm": 0.2201065719127655,
	"learning_rate": 1.74112e-05,
	"loss": 0.9255,
	"step": 57080
	},
	{
	"epoch": 0.9136,
	"grad_norm": 0.22319328784942627,
	"learning_rate": 1.73472e-05,
	"loss": 0.8326,
	"step": 57100
	},
	{
	"epoch": 0.91392,
	"grad_norm": 0.1963931918144226,
	"learning_rate": 1.72832e-05,
	"loss": 0.8416,
	"step": 57120
	},
	{
	"epoch": 0.91424,
	"grad_norm": 0.23881016671657562,
	"learning_rate": 1.7219200000000003e-05,
	"loss": 0.8786,
	"step": 57140
	},
	{
	"epoch": 0.91456,
	"grad_norm": 0.20957258343696594,
	"learning_rate": 1.71552e-05,
	"loss": 0.8972,
	"step": 57160
	},
	{
	"epoch": 0.91488,
	"grad_norm": 0.25064921379089355,
	"learning_rate": 1.7091200000000003e-05,
	"loss": 0.8594,
	"step": 57180
	},
	{
	"epoch": 0.9152,
	"grad_norm": 0.22620342671871185,
	"learning_rate": 1.7027200000000002e-05,
	"loss": 0.855,
	"step": 57200
	},
	{
	"epoch": 0.91552,
	"grad_norm": 0.22465457022190094,
	"learning_rate": 1.69632e-05,
	"loss": 0.8507,
	"step": 57220
	},
	{
	"epoch": 0.91584,
	"grad_norm": 0.33410021662712097,
	"learning_rate": 1.6899200000000002e-05,
	"loss": 0.8608,
	"step": 57240
	},
	{
	"epoch": 0.91616,
	"grad_norm": 0.20130544900894165,
	"learning_rate": 1.68352e-05,
	"loss": 0.8672,
	"step": 57260
	},
	{
	"epoch": 0.91648,
	"grad_norm": 0.2350778877735138,
	"learning_rate": 1.67712e-05,
	"loss": 0.8942,
	"step": 57280
	},
	{
	"epoch": 0.9168,
	"grad_norm": 0.21875150501728058,
	"learning_rate": 1.67072e-05,
	"loss": 0.9244,
	"step": 57300
	},
	{
	"epoch": 0.91712,
	"grad_norm": 0.18828479945659637,
	"learning_rate": 1.66432e-05,
	"loss": 0.9132,
	"step": 57320
	},
	{
	"epoch": 0.91744,
	"grad_norm": 0.19180278480052948,
	"learning_rate": 1.65792e-05,
	"loss": 0.8833,
	"step": 57340
	},
	{
	"epoch": 0.91776,
	"grad_norm": 0.2215159833431244,
	"learning_rate": 1.65152e-05,
	"loss": 0.909,
	"step": 57360
	},
	{
	"epoch": 0.91808,
	"grad_norm": 0.19516055285930634,
	"learning_rate": 1.64512e-05,
	"loss": 0.801,
	"step": 57380
	},
	{
	"epoch": 0.9184,
	"grad_norm": 0.19184818863868713,
	"learning_rate": 1.63872e-05,
	"loss": 0.8942,
	"step": 57400
	},
	{
	"epoch": 0.91872,
	"grad_norm": 0.21622811257839203,
	"learning_rate": 1.63232e-05,
	"loss": 0.8772,
	"step": 57420
	},
	{
	"epoch": 0.91904,
	"grad_norm": 0.19087673723697662,
	"learning_rate": 1.62592e-05,
	"loss": 0.8736,
	"step": 57440
	},
	{
	"epoch": 0.91936,
	"grad_norm": 0.21483179926872253,
	"learning_rate": 1.61952e-05,
	"loss": 0.8696,
	"step": 57460
	},
	{
	"epoch": 0.91968,
	"grad_norm": 0.18231695890426636,
	"learning_rate": 1.6131200000000002e-05,
	"loss": 0.8429,
	"step": 57480
	},
	{
	"epoch": 0.92,
	"grad_norm": 0.24777543544769287,
	"learning_rate": 1.60672e-05,
	"loss": 0.9207,
	"step": 57500
	},
	{
	"epoch": 0.92032,
	"grad_norm": 0.21233485639095306,
	"learning_rate": 1.6003200000000003e-05,
	"loss": 0.9358,
	"step": 57520
	},
	{
	"epoch": 0.92064,
	"grad_norm": 0.257861465215683,
	"learning_rate": 1.59392e-05,
	"loss": 0.9123,
	"step": 57540
	},
	{
	"epoch": 0.92096,
	"grad_norm": 0.18959666788578033,
	"learning_rate": 1.58752e-05,
	"loss": 0.8834,
	"step": 57560
	},
	{
	"epoch": 0.92128,
	"grad_norm": 0.22325819730758667,
	"learning_rate": 1.5811200000000002e-05,
	"loss": 0.8358,
	"step": 57580
	},
	{
	"epoch": 0.9216,
	"grad_norm": 0.25303423404693604,
	"learning_rate": 1.57472e-05,
	"loss": 0.8783,
	"step": 57600
	},
	{
	"epoch": 0.92192,
	"grad_norm": 0.18842332065105438,
	"learning_rate": 1.56832e-05,
	"loss": 0.8752,
	"step": 57620
	},
	{
	"epoch": 0.92224,
	"grad_norm": 0.20843419432640076,
	"learning_rate": 1.56192e-05,
	"loss": 0.8911,
	"step": 57640
	},
	{
	"epoch": 0.92256,
	"grad_norm": 0.2596145570278168,
	"learning_rate": 1.55552e-05,
	"loss": 0.8641,
	"step": 57660
	},
	{
	"epoch": 0.92288,
	"grad_norm": 0.22827741503715515,
	"learning_rate": 1.5491200000000002e-05,
	"loss": 0.8859,
	"step": 57680
	},
	{
	"epoch": 0.9232,
	"grad_norm": 0.21393606066703796,
	"learning_rate": 1.54272e-05,
	"loss": 0.9042,
	"step": 57700
	},
	{
	"epoch": 0.92352,
	"grad_norm": 0.1834941953420639,
	"learning_rate": 1.53632e-05,
	"loss": 0.8484,
	"step": 57720
	},
	{
	"epoch": 0.92384,
	"grad_norm": 0.21815276145935059,
	"learning_rate": 1.52992e-05,
	"loss": 0.9284,
	"step": 57740
	},
	{
	"epoch": 0.92416,
	"grad_norm": 0.20257651805877686,
	"learning_rate": 1.52352e-05,
	"loss": 0.8286,
	"step": 57760
	},
	{
	"epoch": 0.92448,
	"grad_norm": 0.258215993642807,
	"learning_rate": 1.5171200000000001e-05,
	"loss": 0.9197,
	"step": 57780
	},
	{
	"epoch": 0.9248,
	"grad_norm": 0.21582092344760895,
	"learning_rate": 1.5107200000000002e-05,
	"loss": 0.8859,
	"step": 57800
	},
	{
	"epoch": 0.92512,
	"grad_norm": 0.2228287011384964,
	"learning_rate": 1.5043200000000002e-05,
	"loss": 0.8918,
	"step": 57820
	},
	{
	"epoch": 0.92544,
	"grad_norm": 0.22294846177101135,
	"learning_rate": 1.4979200000000002e-05,
	"loss": 0.9276,
	"step": 57840
	},
	{
	"epoch": 0.92576,
	"grad_norm": 0.19540980458259583,
	"learning_rate": 1.49152e-05,
	"loss": 0.8801,
	"step": 57860
	},
	{
	"epoch": 0.92608,
	"grad_norm": 0.19074346125125885,
	"learning_rate": 1.4851200000000001e-05,
	"loss": 0.8475,
	"step": 57880
	},
	{
	"epoch": 0.9264,
	"grad_norm": 0.19179295003414154,
	"learning_rate": 1.4787200000000001e-05,
	"loss": 0.9089,
	"step": 57900
	},
	{
	"epoch": 0.92672,
	"grad_norm": 0.20641054213047028,
	"learning_rate": 1.4723200000000002e-05,
	"loss": 0.8313,
	"step": 57920
	},
	{
	"epoch": 0.92704,
	"grad_norm": 0.19102010130882263,
	"learning_rate": 1.46592e-05,
	"loss": 0.8915,
	"step": 57940
	},
	{
	"epoch": 0.92736,
	"grad_norm": 0.1892133504152298,
	"learning_rate": 1.45952e-05,
	"loss": 0.8708,
	"step": 57960
	},
	{
	"epoch": 0.92768,
	"grad_norm": 0.21764519810676575,
	"learning_rate": 1.45312e-05,
	"loss": 0.9107,
	"step": 57980
	},
	{
	"epoch": 0.928,
	"grad_norm": 0.2371160387992859,
	"learning_rate": 1.4467200000000001e-05,
	"loss": 0.9397,
	"step": 58000
	},
	{
	"epoch": 0.92832,
	"grad_norm": 0.2032940536737442,
	"learning_rate": 1.44032e-05,
	"loss": 0.8406,
	"step": 58020
	},
	{
	"epoch": 0.92864,
	"grad_norm": 0.20223209261894226,
	"learning_rate": 1.43392e-05,
	"loss": 0.8387,
	"step": 58040
	},
	{
	"epoch": 0.92896,
	"grad_norm": 0.19026170670986176,
	"learning_rate": 1.42752e-05,
	"loss": 0.8826,
	"step": 58060
	},
	{
	"epoch": 0.92928,
	"grad_norm": 0.23420760035514832,
	"learning_rate": 1.4211199999999999e-05,
	"loss": 0.8779,
	"step": 58080
	},
	{
	"epoch": 0.9296,
	"grad_norm": 0.19909054040908813,
	"learning_rate": 1.4147199999999999e-05,
	"loss": 0.8931,
	"step": 58100
	},
	{
	"epoch": 0.92992,
	"grad_norm": 0.23734207451343536,
	"learning_rate": 1.4083200000000003e-05,
	"loss": 0.8522,
	"step": 58120
	},
	{
	"epoch": 0.93024,
	"grad_norm": 0.1749676764011383,
	"learning_rate": 1.4019200000000001e-05,
	"loss": 0.8714,
	"step": 58140
	},
	{
	"epoch": 0.93056,
	"grad_norm": 0.21267655491828918,
	"learning_rate": 1.3955200000000002e-05,
	"loss": 0.8442,
	"step": 58160
	},
	{
	"epoch": 0.93088,
	"grad_norm": 0.25414636731147766,
	"learning_rate": 1.3891200000000002e-05,
	"loss": 0.9268,
	"step": 58180
	},
	{
	"epoch": 0.9312,
	"grad_norm": 0.20942141115665436,
	"learning_rate": 1.3827200000000002e-05,
	"loss": 0.8509,
	"step": 58200
	},
	{
	"epoch": 0.93152,
	"grad_norm": 0.2121083289384842,
	"learning_rate": 1.37632e-05,
	"loss": 0.8751,
	"step": 58220
	},
	{
	"epoch": 0.93184,
	"grad_norm": 0.21758875250816345,
	"learning_rate": 1.3699200000000001e-05,
	"loss": 0.8875,
	"step": 58240
	},
	{
	"epoch": 0.93216,
	"grad_norm": 0.188105046749115,
	"learning_rate": 1.3635200000000001e-05,
	"loss": 0.8876,
	"step": 58260
	},
	{
	"epoch": 0.93248,
	"grad_norm": 0.18917614221572876,
	"learning_rate": 1.35712e-05,
	"loss": 0.8453,
	"step": 58280
	},
	{
	"epoch": 0.9328,
	"grad_norm": 0.19364288449287415,
	"learning_rate": 1.35072e-05,
	"loss": 0.829,
	"step": 58300
	},
	{
	"epoch": 0.93312,
	"grad_norm": 0.19416366517543793,
	"learning_rate": 1.34432e-05,
	"loss": 0.8625,
	"step": 58320
	},
	{
	"epoch": 0.93344,
	"grad_norm": 0.2294871211051941,
	"learning_rate": 1.33792e-05,
	"loss": 0.8696,
	"step": 58340
	},
	{
	"epoch": 0.93376,
	"grad_norm": 0.21654824912548065,
	"learning_rate": 1.33152e-05,
	"loss": 0.8733,
	"step": 58360
	},
	{
	"epoch": 0.93408,
	"grad_norm": 0.2053345888853073,
	"learning_rate": 1.32512e-05,
	"loss": 0.9046,
	"step": 58380
	},
	{
	"epoch": 0.9344,
	"grad_norm": 0.18881118297576904,
	"learning_rate": 1.31872e-05,
	"loss": 0.8252,
	"step": 58400
	},
	{
	"epoch": 0.93472,
	"grad_norm": 0.23716068267822266,
	"learning_rate": 1.31232e-05,
	"loss": 0.8928,
	"step": 58420
	},
	{
	"epoch": 0.93504,
	"grad_norm": 0.19801940023899078,
	"learning_rate": 1.3059200000000002e-05,
	"loss": 0.8692,
	"step": 58440
	},
	{
	"epoch": 0.93536,
	"grad_norm": 0.2585828900337219,
	"learning_rate": 1.2995200000000002e-05,
	"loss": 0.9274,
	"step": 58460
	},
	{
	"epoch": 0.93568,
	"grad_norm": 0.31189459562301636,
	"learning_rate": 1.29312e-05,
	"loss": 0.8347,
	"step": 58480
	},
	{
	"epoch": 0.936,
	"grad_norm": 0.24190790951251984,
	"learning_rate": 1.2867200000000001e-05,
	"loss": 0.8476,
	"step": 58500
	},
	{
	"epoch": 0.93632,
	"grad_norm": 0.2098657637834549,
	"learning_rate": 1.2803200000000001e-05,
	"loss": 0.8735,
	"step": 58520
	},
	{
	"epoch": 0.93664,
	"grad_norm": 0.2108910232782364,
	"learning_rate": 1.2739200000000002e-05,
	"loss": 0.874,
	"step": 58540
	},
	{
	"epoch": 0.93696,
	"grad_norm": 0.2129533439874649,
	"learning_rate": 1.26752e-05,
	"loss": 0.8792,
	"step": 58560
	},
	{
	"epoch": 0.93728,
	"grad_norm": 0.20992882549762726,
	"learning_rate": 1.26112e-05,
	"loss": 0.9094,
	"step": 58580
	},
	{
	"epoch": 0.9376,
	"grad_norm": 0.24041472375392914,
	"learning_rate": 1.25472e-05,
	"loss": 0.927,
	"step": 58600
	},
	{
	"epoch": 0.93792,
	"grad_norm": 0.20306578278541565,
	"learning_rate": 1.2483200000000001e-05,
	"loss": 0.8776,
	"step": 58620
	},
	{
	"epoch": 0.93824,
	"grad_norm": 0.22501681745052338,
	"learning_rate": 1.2422400000000002e-05,
	"loss": 0.9315,
	"step": 58640
	},
	{
	"epoch": 0.93856,
	"grad_norm": 0.23260138928890228,
	"learning_rate": 1.23584e-05,
	"loss": 0.8216,
	"step": 58660
	},
	{
	"epoch": 0.93888,
	"grad_norm": 0.18364718556404114,
	"learning_rate": 1.22944e-05,
	"loss": 0.8805,
	"step": 58680
	},
	{
	"epoch": 0.9392,
	"grad_norm": 0.23733025789260864,
	"learning_rate": 1.22304e-05,
	"loss": 0.8966,
	"step": 58700
	},
	{
	"epoch": 0.93952,
	"grad_norm": 0.19902455806732178,
	"learning_rate": 1.21664e-05,
	"loss": 0.9097,
	"step": 58720
	},
	{
	"epoch": 0.93984,
	"grad_norm": 0.25381171703338623,
	"learning_rate": 1.2102400000000001e-05,
	"loss": 0.9078,
	"step": 58740
	},
	{
	"epoch": 0.94016,
	"grad_norm": 0.22020556032657623,
	"learning_rate": 1.2038400000000001e-05,
	"loss": 0.8428,
	"step": 58760
	},
	{
	"epoch": 0.94048,
	"grad_norm": 0.21431581676006317,
	"learning_rate": 1.19744e-05,
	"loss": 0.8228,
	"step": 58780
	},
	{
	"epoch": 0.9408,
	"grad_norm": 0.24102531373500824,
	"learning_rate": 1.19104e-05,
	"loss": 0.886,
	"step": 58800
	},
	{
	"epoch": 0.94112,
	"grad_norm": 0.20612405240535736,
	"learning_rate": 1.18464e-05,
	"loss": 0.8438,
	"step": 58820
	},
	{
	"epoch": 0.94144,
	"grad_norm": 0.2482701539993286,
	"learning_rate": 1.1782400000000001e-05,
	"loss": 0.9063,
	"step": 58840
	},
	{
	"epoch": 0.94176,
	"grad_norm": 0.2273882031440735,
	"learning_rate": 1.17184e-05,
	"loss": 0.8267,
	"step": 58860
	},
	{
	"epoch": 0.94208,
	"grad_norm": 0.19269466400146484,
	"learning_rate": 1.16544e-05,
	"loss": 0.8889,
	"step": 58880
	},
	{
	"epoch": 0.9424,
	"grad_norm": 0.24158763885498047,
	"learning_rate": 1.15904e-05,
	"loss": 0.8983,
	"step": 58900
	},
	{
	"epoch": 0.94272,
	"grad_norm": 0.18431192636489868,
	"learning_rate": 1.15264e-05,
	"loss": 0.8202,
	"step": 58920
	},
	{
	"epoch": 0.94304,
	"grad_norm": 0.2194778025150299,
	"learning_rate": 1.14624e-05,
	"loss": 0.8591,
	"step": 58940
	},
	{
	"epoch": 0.94336,
	"grad_norm": 0.2087930589914322,
	"learning_rate": 1.13984e-05,
	"loss": 0.882,
	"step": 58960
	},
	{
	"epoch": 0.94368,
	"grad_norm": 0.20091105997562408,
	"learning_rate": 1.1334400000000001e-05,
	"loss": 0.8729,
	"step": 58980
	},
	{
	"epoch": 0.944,
	"grad_norm": 0.22069287300109863,
	"learning_rate": 1.12704e-05,
	"loss": 0.8489,
	"step": 59000
	},
	{
	"epoch": 0.94432,
	"grad_norm": 0.19174374639987946,
	"learning_rate": 1.12064e-05,
	"loss": 0.8284,
	"step": 59020
	},
	{
	"epoch": 0.94464,
	"grad_norm": 0.20969411730766296,
	"learning_rate": 1.11424e-05,
	"loss": 0.8967,
	"step": 59040
	},
	{
	"epoch": 0.94496,
	"grad_norm": 0.21621152758598328,
	"learning_rate": 1.10784e-05,
	"loss": 0.8646,
	"step": 59060
	},
	{
	"epoch": 0.94528,
	"grad_norm": 0.2245556265115738,
	"learning_rate": 1.10144e-05,
	"loss": 0.961,
	"step": 59080
	},
	{
	"epoch": 0.9456,
	"grad_norm": 0.2487545758485794,
	"learning_rate": 1.0950400000000001e-05,
	"loss": 0.9026,
	"step": 59100
	},
	{
	"epoch": 0.94592,
	"grad_norm": 0.2444257289171219,
	"learning_rate": 1.0886400000000001e-05,
	"loss": 0.861,
	"step": 59120
	},
	{
	"epoch": 0.94624,
	"grad_norm": 0.19312791526317596,
	"learning_rate": 1.08224e-05,
	"loss": 0.8604,
	"step": 59140
	},
	{
	"epoch": 0.94656,
	"grad_norm": 0.20798088610172272,
	"learning_rate": 1.07584e-05,
	"loss": 0.8886,
	"step": 59160
	},
	{
	"epoch": 0.94688,
	"grad_norm": 0.22413522005081177,
	"learning_rate": 1.06944e-05,
	"loss": 0.8656,
	"step": 59180
	},
	{
	"epoch": 0.9472,
	"grad_norm": 0.19229187071323395,
	"learning_rate": 1.06304e-05,
	"loss": 0.8742,
	"step": 59200
	},
	{
	"epoch": 0.94752,
	"grad_norm": 0.21749311685562134,
	"learning_rate": 1.05664e-05,
	"loss": 0.9038,
	"step": 59220
	},
	{
	"epoch": 0.94784,
	"grad_norm": 0.21696510910987854,
	"learning_rate": 1.0502400000000001e-05,
	"loss": 0.8729,
	"step": 59240
	},
	{
	"epoch": 0.94816,
	"grad_norm": 0.21962523460388184,
	"learning_rate": 1.0438400000000002e-05,
	"loss": 0.9075,
	"step": 59260
	},
	{
	"epoch": 0.94848,
	"grad_norm": 0.2188694030046463,
	"learning_rate": 1.03744e-05,
	"loss": 0.8401,
	"step": 59280
	},
	{
	"epoch": 0.9488,
	"grad_norm": 0.17384332418441772,
	"learning_rate": 1.03104e-05,
	"loss": 0.8886,
	"step": 59300
	},
	{
	"epoch": 0.94912,
	"grad_norm": 0.18422313034534454,
	"learning_rate": 1.02464e-05,
	"loss": 0.9251,
	"step": 59320
	},
	{
	"epoch": 0.94944,
	"grad_norm": 0.2241748720407486,
	"learning_rate": 1.0182400000000001e-05,
	"loss": 0.8966,
	"step": 59340
	},
	{
	"epoch": 0.94976,
	"grad_norm": 0.2653813660144806,
	"learning_rate": 1.01184e-05,
	"loss": 0.853,
	"step": 59360
	},
	{
	"epoch": 0.95008,
	"grad_norm": 0.2018147110939026,
	"learning_rate": 1.00544e-05,
	"loss": 0.8624,
	"step": 59380
	},
	{
	"epoch": 0.9504,
	"grad_norm": 0.23722539842128754,
	"learning_rate": 9.990400000000002e-06,
	"loss": 0.943,
	"step": 59400
	},
	{
	"epoch": 0.95072,
	"grad_norm": 0.2132965326309204,
	"learning_rate": 9.9264e-06,
	"loss": 0.8511,
	"step": 59420
	},
	{
	"epoch": 0.95104,
	"grad_norm": 0.19337671995162964,
	"learning_rate": 9.8624e-06,
	"loss": 0.8419,
	"step": 59440
	},
	{
	"epoch": 0.95136,
	"grad_norm": 0.1993594765663147,
	"learning_rate": 9.798400000000001e-06,
	"loss": 0.8809,
	"step": 59460
	},
	{
	"epoch": 0.95168,
	"grad_norm": 0.20051348209381104,
	"learning_rate": 9.734400000000001e-06,
	"loss": 0.8246,
	"step": 59480
	},
	{
	"epoch": 0.952,
	"grad_norm": 0.2141229212284088,
	"learning_rate": 9.6704e-06,
	"loss": 0.8575,
	"step": 59500
	},
	{
	"epoch": 0.95232,
	"grad_norm": 0.20707592368125916,
	"learning_rate": 9.6096e-06,
	"loss": 0.8953,
	"step": 59520
	},
	{
	"epoch": 0.95264,
	"grad_norm": 0.2202858179807663,
	"learning_rate": 9.5456e-06,
	"loss": 0.893,
	"step": 59540
	},
	{
	"epoch": 0.95296,
	"grad_norm": 0.22861933708190918,
	"learning_rate": 9.4816e-06,
	"loss": 0.8534,
	"step": 59560
	},
	{
	"epoch": 0.95328,
	"grad_norm": 0.2285875827074051,
	"learning_rate": 9.4176e-06,
	"loss": 0.8686,
	"step": 59580
	},
	{
	"epoch": 0.9536,
	"grad_norm": 0.19898247718811035,
	"learning_rate": 9.3536e-06,
	"loss": 0.8573,
	"step": 59600
	},
	{
	"epoch": 0.95392,
	"grad_norm": 0.19063091278076172,
	"learning_rate": 9.289600000000002e-06,
	"loss": 0.8889,
	"step": 59620
	},
	{
	"epoch": 0.95424,
	"grad_norm": 0.2302456945180893,
	"learning_rate": 9.2256e-06,
	"loss": 0.8927,
	"step": 59640
	},
	{
	"epoch": 0.95456,
	"grad_norm": 0.23316220939159393,
	"learning_rate": 9.1616e-06,
	"loss": 0.8461,
	"step": 59660
	},
	{
	"epoch": 0.95488,
	"grad_norm": 0.21499593555927277,
	"learning_rate": 9.0976e-06,
	"loss": 0.847,
	"step": 59680
	},
	{
	"epoch": 0.9552,
	"grad_norm": 0.2145620733499527,
	"learning_rate": 9.033600000000001e-06,
	"loss": 0.9328,
	"step": 59700
	},
	{
	"epoch": 0.95552,
	"grad_norm": 0.22874926030635834,
	"learning_rate": 8.9696e-06,
	"loss": 0.8883,
	"step": 59720
	},
	{
	"epoch": 0.95584,
	"grad_norm": 0.2054702192544937,
	"learning_rate": 8.9056e-06,
	"loss": 0.9,
	"step": 59740
	},
	{
	"epoch": 0.95616,
	"grad_norm": 0.24557848274707794,
	"learning_rate": 8.8416e-06,
	"loss": 0.8828,
	"step": 59760
	},
	{
	"epoch": 0.95648,
	"grad_norm": 0.21342433989048004,
	"learning_rate": 8.7776e-06,
	"loss": 0.8664,
	"step": 59780
	},
	{
	"epoch": 0.9568,
	"grad_norm": 0.21812336146831512,
	"learning_rate": 8.7136e-06,
	"loss": 0.8613,
	"step": 59800
	},
	{
	"epoch": 0.95712,
	"grad_norm": 0.21000798046588898,
	"learning_rate": 8.649600000000001e-06,
	"loss": 0.8553,
	"step": 59820
	},
	{
	"epoch": 0.95744,
	"grad_norm": 0.23609398305416107,
	"learning_rate": 8.585600000000001e-06,
	"loss": 0.8952,
	"step": 59840
	},
	{
	"epoch": 0.95776,
	"grad_norm": 0.23479294776916504,
	"learning_rate": 8.5216e-06,
	"loss": 0.9345,
	"step": 59860
	},
	{
	"epoch": 0.95808,
	"grad_norm": 0.1910203993320465,
	"learning_rate": 8.4576e-06,
	"loss": 0.891,
	"step": 59880
	},
	{
	"epoch": 0.9584,
	"grad_norm": 0.21888737380504608,
	"learning_rate": 8.3936e-06,
	"loss": 0.8615,
	"step": 59900
	},
	{
	"epoch": 0.95872,
	"grad_norm": 0.2583872973918915,
	"learning_rate": 8.3296e-06,
	"loss": 0.8452,
	"step": 59920
	},
	{
	"epoch": 0.95904,
	"grad_norm": 0.21396338939666748,
	"learning_rate": 8.2656e-06,
	"loss": 0.8582,
	"step": 59940
	},
	{
	"epoch": 0.95936,
	"grad_norm": 0.18018275499343872,
	"learning_rate": 8.201600000000001e-06,
	"loss": 0.9053,
	"step": 59960
	},
	{
	"epoch": 0.95968,
	"grad_norm": 0.24706120789051056,
	"learning_rate": 8.137600000000002e-06,
	"loss": 0.8794,
	"step": 59980
	},
	{
	"epoch": 0.96,
	"grad_norm": 0.19996990263462067,
	"learning_rate": 8.0736e-06,
	"loss": 0.9093,
	"step": 60000
	},
	{
	"epoch": 0.96032,
	"grad_norm": 0.1947757452726364,
	"learning_rate": 8.0096e-06,
	"loss": 0.871,
	"step": 60020
	},
	{
	"epoch": 0.96064,
	"grad_norm": 0.19846396148204803,
	"learning_rate": 7.9456e-06,
	"loss": 0.8555,
	"step": 60040
	},
	{
	"epoch": 0.96096,
	"grad_norm": 0.2101791501045227,
	"learning_rate": 7.881600000000001e-06,
	"loss": 0.8805,
	"step": 60060
	},
	{
	"epoch": 0.96128,
	"grad_norm": 0.2531636953353882,
	"learning_rate": 7.8176e-06,
	"loss": 0.8721,
	"step": 60080
	},
	{
	"epoch": 0.9616,
	"grad_norm": 0.21089531481266022,
	"learning_rate": 7.7536e-06,
	"loss": 0.8266,
	"step": 60100
	},
	{
	"epoch": 0.96192,
	"grad_norm": 0.16842779517173767,
	"learning_rate": 7.689600000000002e-06,
	"loss": 0.8531,
	"step": 60120
	},
	{
	"epoch": 0.96224,
	"grad_norm": 0.21880729496479034,
	"learning_rate": 7.625600000000001e-06,
	"loss": 0.9024,
	"step": 60140
	},
	{
	"epoch": 0.96256,
	"grad_norm": 0.196882426738739,
	"learning_rate": 7.561600000000001e-06,
	"loss": 0.8672,
	"step": 60160
	},
	{
	"epoch": 0.96288,
	"grad_norm": 0.16088998317718506,
	"learning_rate": 7.497600000000001e-06,
	"loss": 0.8431,
	"step": 60180
	},
	{
	"epoch": 0.9632,
	"grad_norm": 0.19500739872455597,
	"learning_rate": 7.4336e-06,
	"loss": 0.8772,
	"step": 60200
	},
	{
	"epoch": 0.96352,
	"grad_norm": 0.22621487081050873,
	"learning_rate": 7.3696e-06,
	"loss": 0.89,
	"step": 60220
	},
	{
	"epoch": 0.96384,
	"grad_norm": 0.20538878440856934,
	"learning_rate": 7.3056e-06,
	"loss": 0.9046,
	"step": 60240
	},
	{
	"epoch": 0.96416,
	"grad_norm": 0.21844108402729034,
	"learning_rate": 7.241599999999999e-06,
	"loss": 0.8146,
	"step": 60260
	},
	{
	"epoch": 0.96448,
	"grad_norm": 0.19265195727348328,
	"learning_rate": 7.177600000000001e-06,
	"loss": 0.919,
	"step": 60280
	},
	{
	"epoch": 0.9648,
	"grad_norm": 0.2025534063577652,
	"learning_rate": 7.113600000000001e-06,
	"loss": 0.8966,
	"step": 60300
	},
	{
	"epoch": 0.96512,
	"grad_norm": 0.2134266346693039,
	"learning_rate": 7.0496e-06,
	"loss": 0.91,
	"step": 60320
	},
	{
	"epoch": 0.96544,
	"grad_norm": 0.25129690766334534,
	"learning_rate": 6.9856000000000005e-06,
	"loss": 0.8821,
	"step": 60340
	},
	{
	"epoch": 0.96576,
	"grad_norm": 0.23355615139007568,
	"learning_rate": 6.9216e-06,
	"loss": 0.8755,
	"step": 60360
	},
	{
	"epoch": 0.96608,
	"grad_norm": 0.22555451095104218,
	"learning_rate": 6.8576e-06,
	"loss": 0.8959,
	"step": 60380
	},
	{
	"epoch": 0.9664,
	"grad_norm": 0.23309843242168427,
	"learning_rate": 6.7936e-06,
	"loss": 0.8482,
	"step": 60400
	},
	{
	"epoch": 0.96672,
	"grad_norm": 0.2550283372402191,
	"learning_rate": 6.7296e-06,
	"loss": 0.8756,
	"step": 60420
	},
	{
	"epoch": 0.96704,
	"grad_norm": 0.24770893156528473,
	"learning_rate": 6.665600000000001e-06,
	"loss": 0.8266,
	"step": 60440
	},
	{
	"epoch": 0.96736,
	"grad_norm": 0.19504638016223907,
	"learning_rate": 6.6016000000000005e-06,
	"loss": 0.8924,
	"step": 60460
	},
	{
	"epoch": 0.96768,
	"grad_norm": 0.1870088279247284,
	"learning_rate": 6.537600000000001e-06,
	"loss": 0.8819,
	"step": 60480
	},
	{
	"epoch": 0.968,
	"grad_norm": 0.19606348872184753,
	"learning_rate": 6.4736e-06,
	"loss": 0.8583,
	"step": 60500
	},
	{
	"epoch": 0.96832,
	"grad_norm": 0.21062546968460083,
	"learning_rate": 6.4096000000000004e-06,
	"loss": 0.8318,
	"step": 60520
	},
	{
	"epoch": 0.96864,
	"grad_norm": 0.2244090735912323,
	"learning_rate": 6.3456e-06,
	"loss": 0.8503,
	"step": 60540
	},
	{
	"epoch": 0.96896,
	"grad_norm": 0.2132522016763687,
	"learning_rate": 6.2816e-06,
	"loss": 0.8805,
	"step": 60560
	},
	{
	"epoch": 0.96928,
	"grad_norm": 0.21387845277786255,
	"learning_rate": 6.2176e-06,
	"loss": 0.8843,
	"step": 60580
	},
	{
	"epoch": 0.9696,
	"grad_norm": 0.20109587907791138,
	"learning_rate": 6.153600000000001e-06,
	"loss": 0.8487,
	"step": 60600
	},
	{
	"epoch": 0.96992,
	"grad_norm": 0.20364026725292206,
	"learning_rate": 6.0896e-06,
	"loss": 0.8737,
	"step": 60620
	},
	{
	"epoch": 0.97024,
	"grad_norm": 0.22072643041610718,
	"learning_rate": 6.0256e-06,
	"loss": 0.881,
	"step": 60640
	},
	{
	"epoch": 0.97056,
	"grad_norm": 0.24078206717967987,
	"learning_rate": 5.961600000000001e-06,
	"loss": 0.8459,
	"step": 60660
	},
	{
	"epoch": 0.97088,
	"grad_norm": 0.24617841839790344,
	"learning_rate": 5.8976e-06,
	"loss": 0.894,
	"step": 60680
	},
	{
	"epoch": 0.9712,
	"grad_norm": 0.2186897248029709,
	"learning_rate": 5.8336e-06,
	"loss": 0.8612,
	"step": 60700
	},
	{
	"epoch": 0.97152,
	"grad_norm": 0.1752256453037262,
	"learning_rate": 5.7696e-06,
	"loss": 0.8619,
	"step": 60720
	},
	{
	"epoch": 0.97184,
	"grad_norm": 0.2205258458852768,
	"learning_rate": 5.705600000000001e-06,
	"loss": 0.8847,
	"step": 60740
	},
	{
	"epoch": 0.97216,
	"grad_norm": 0.20608656108379364,
	"learning_rate": 5.6416e-06,
	"loss": 0.8728,
	"step": 60760
	},
	{
	"epoch": 0.97248,
	"grad_norm": 0.18989993631839752,
	"learning_rate": 5.577600000000001e-06,
	"loss": 0.8496,
	"step": 60780
	},
	{
	"epoch": 0.9728,
	"grad_norm": 0.2024667114019394,
	"learning_rate": 5.5136e-06,
	"loss": 0.8883,
	"step": 60800
	},
	{
	"epoch": 0.97312,
	"grad_norm": 0.17910663783550262,
	"learning_rate": 5.4496e-06,
	"loss": 0.8628,
	"step": 60820
	},
	{
	"epoch": 0.97344,
	"grad_norm": 0.19510726630687714,
	"learning_rate": 5.385600000000001e-06,
	"loss": 0.8632,
	"step": 60840
	},
	{
	"epoch": 0.97376,
	"grad_norm": 0.20516471564769745,
	"learning_rate": 5.3216e-06,
	"loss": 0.9158,
	"step": 60860
	},
	{
	"epoch": 0.97408,
	"grad_norm": 0.2369288057088852,
	"learning_rate": 5.2576e-06,
	"loss": 0.9067,
	"step": 60880
	},
	{
	"epoch": 0.9744,
	"grad_norm": 0.23473519086837769,
	"learning_rate": 5.1936000000000006e-06,
	"loss": 0.8675,
	"step": 60900
	},
	{
	"epoch": 0.97472,
	"grad_norm": 0.20721520483493805,
	"learning_rate": 5.1296e-06,
	"loss": 0.857,
	"step": 60920
	},
	{
	"epoch": 0.97504,
	"grad_norm": 0.25758302211761475,
	"learning_rate": 5.0656e-06,
	"loss": 0.854,
	"step": 60940
	},
	{
	"epoch": 0.97536,
	"grad_norm": 0.23826448619365692,
	"learning_rate": 5.0016e-06,
	"loss": 0.865,
	"step": 60960
	},
	{
	"epoch": 0.97568,
	"grad_norm": 0.21797384321689606,
	"learning_rate": 4.937600000000001e-06,
	"loss": 0.8956,
	"step": 60980
	},
	{
	"epoch": 0.976,
	"grad_norm": 0.2073042243719101,
	"learning_rate": 4.8736e-06,
	"loss": 0.8522,
	"step": 61000
	},
	{
	"epoch": 0.97632,
	"grad_norm": 0.1850445568561554,
	"learning_rate": 4.8096000000000005e-06,
	"loss": 0.945,
	"step": 61020
	},
	{
	"epoch": 0.97664,
	"grad_norm": 0.2104647010564804,
	"learning_rate": 4.7456e-06,
	"loss": 0.8828,
	"step": 61040
	},
	{
	"epoch": 0.97696,
	"grad_norm": 0.17378270626068115,
	"learning_rate": 4.6816e-06,
	"loss": 0.896,
	"step": 61060
	},
	{
	"epoch": 0.97728,
	"grad_norm": 0.21518754959106445,
	"learning_rate": 4.6176000000000005e-06,
	"loss": 0.8639,
	"step": 61080
	},
	{
	"epoch": 0.9776,
	"grad_norm": 0.22130274772644043,
	"learning_rate": 4.5536e-06,
	"loss": 0.8755,
	"step": 61100
	},
	{
	"epoch": 0.97792,
	"grad_norm": 0.24225564301013947,
	"learning_rate": 4.4896e-06,
	"loss": 0.8747,
	"step": 61120
	},
	{
	"epoch": 0.97824,
	"grad_norm": 0.19240306317806244,
	"learning_rate": 4.4256e-06,
	"loss": 0.9133,
	"step": 61140
	},
	{
	"epoch": 0.97856,
	"grad_norm": 0.18395653367042542,
	"learning_rate": 4.361600000000001e-06,
	"loss": 0.8321,
	"step": 61160
	},
	{
	"epoch": 0.97888,
	"grad_norm": 0.21432001888751984,
	"learning_rate": 4.2976e-06,
	"loss": 0.9217,
	"step": 61180
	},
	{
	"epoch": 0.9792,
	"grad_norm": 0.22985559701919556,
	"learning_rate": 4.2336000000000004e-06,
	"loss": 0.8819,
	"step": 61200
	},
	{
	"epoch": 0.97952,
	"grad_norm": 0.18436340987682343,
	"learning_rate": 4.1696e-06,
	"loss": 0.9066,
	"step": 61220
	},
	{
	"epoch": 0.97984,
	"grad_norm": 0.2279936820268631,
	"learning_rate": 4.1056e-06,
	"loss": 0.8828,
	"step": 61240
	},
	{
	"epoch": 0.98016,
	"grad_norm": 0.23359614610671997,
	"learning_rate": 4.0416e-06,
	"loss": 0.873,
	"step": 61260
	},
	{
	"epoch": 0.98048,
	"grad_norm": 0.21372786164283752,
	"learning_rate": 3.9776e-06,
	"loss": 0.8726,
	"step": 61280
	},
	{
	"epoch": 0.9808,
	"grad_norm": 0.17267848551273346,
	"learning_rate": 3.9136e-06,
	"loss": 0.8408,
	"step": 61300
	},
	{
	"epoch": 0.98112,
	"grad_norm": 0.19871732592582703,
	"learning_rate": 3.8496e-06,
	"loss": 0.8697,
	"step": 61320
	},
	{
	"epoch": 0.98144,
	"grad_norm": 0.22875666618347168,
	"learning_rate": 3.7856000000000002e-06,
	"loss": 0.9083,
	"step": 61340
	},
	{
	"epoch": 0.98176,
	"grad_norm": 0.22814328968524933,
	"learning_rate": 3.7216e-06,
	"loss": 0.8542,
	"step": 61360
	},
	{
	"epoch": 0.98208,
	"grad_norm": 0.19292208552360535,
	"learning_rate": 3.6576e-06,
	"loss": 0.7783,
	"step": 61380
	},
	{
	"epoch": 0.9824,
	"grad_norm": 0.20703838765621185,
	"learning_rate": 3.5936000000000006e-06,
	"loss": 0.8342,
	"step": 61400
	},
	{
	"epoch": 0.98272,
	"grad_norm": 0.1766250729560852,
	"learning_rate": 3.5296000000000005e-06,
	"loss": 0.9091,
	"step": 61420
	},
	{
	"epoch": 0.98304,
	"grad_norm": 0.1929435431957245,
	"learning_rate": 3.4656e-06,
	"loss": 0.8704,
	"step": 61440
	},
	{
	"epoch": 0.98336,
	"grad_norm": 0.269422709941864,
	"learning_rate": 3.4015999999999998e-06,
	"loss": 0.8677,
	"step": 61460
	},
	{
	"epoch": 0.98368,
	"grad_norm": 0.17369483411312103,
	"learning_rate": 3.3376000000000005e-06,
	"loss": 0.8388,
	"step": 61480
	},
	{
	"epoch": 0.984,
	"grad_norm": 0.24999183416366577,
	"learning_rate": 3.2736000000000003e-06,
	"loss": 0.8884,
	"step": 61500
	},
	{
	"epoch": 0.98432,
	"grad_norm": 0.2378091812133789,
	"learning_rate": 3.2096e-06,
	"loss": 0.8644,
	"step": 61520
	},
	{
	"epoch": 0.98464,
	"grad_norm": 0.23447105288505554,
	"learning_rate": 3.1456e-06,
	"loss": 0.8962,
	"step": 61540
	},
	{
	"epoch": 0.98496,
	"grad_norm": 0.22000399231910706,
	"learning_rate": 3.0816000000000003e-06,
	"loss": 0.8297,
	"step": 61560
	},
	{
	"epoch": 0.98528,
	"grad_norm": 0.20318488776683807,
	"learning_rate": 3.0176e-06,
	"loss": 0.8639,
	"step": 61580
	},
	{
	"epoch": 0.9856,
	"grad_norm": 0.22560527920722961,
	"learning_rate": 2.9536e-06,
	"loss": 0.8701,
	"step": 61600
	},
	{
	"epoch": 0.98592,
	"grad_norm": 0.2516303062438965,
	"learning_rate": 2.8896000000000003e-06,
	"loss": 0.9267,
	"step": 61620
	},
	{
	"epoch": 0.98624,
	"grad_norm": 0.2284599393606186,
	"learning_rate": 2.8256e-06,
	"loss": 0.858,
	"step": 61640
	},
	{
	"epoch": 0.98656,
	"grad_norm": 0.22970664501190186,
	"learning_rate": 2.7616000000000004e-06,
	"loss": 0.8553,
	"step": 61660
	},
	{
	"epoch": 0.98688,
	"grad_norm": 0.19490259885787964,
	"learning_rate": 2.6976000000000002e-06,
	"loss": 0.8,
	"step": 61680
	},
	{
	"epoch": 0.9872,
	"grad_norm": 0.2025730013847351,
	"learning_rate": 2.6336e-06,
	"loss": 0.8664,
	"step": 61700
	},
	{
	"epoch": 0.98752,
	"grad_norm": 0.202545627951622,
	"learning_rate": 2.5696e-06,
	"loss": 0.8784,
	"step": 61720
	},
	{
	"epoch": 0.98784,
	"grad_norm": 0.21990488469600677,
	"learning_rate": 2.5055999999999998e-06,
	"loss": 0.8768,
	"step": 61740
	},
	{
	"epoch": 0.98816,
	"grad_norm": 0.2364804446697235,
	"learning_rate": 2.4416e-06,
	"loss": 0.9087,
	"step": 61760
	},
	{
	"epoch": 0.98848,
	"grad_norm": 0.20863565802574158,
	"learning_rate": 2.3776e-06,
	"loss": 0.8558,
	"step": 61780
	},
	{
	"epoch": 0.9888,
	"grad_norm": 0.21479357779026031,
	"learning_rate": 2.3136e-06,
	"loss": 0.8801,
	"step": 61800
	},
	{
	"epoch": 0.98912,
	"grad_norm": 0.2164417803287506,
	"learning_rate": 2.2496e-06,
	"loss": 0.8483,
	"step": 61820
	},
	{
	"epoch": 0.98944,
	"grad_norm": 0.2229296714067459,
	"learning_rate": 2.1856000000000003e-06,
	"loss": 0.8708,
	"step": 61840
	},
	{
	"epoch": 0.98976,
	"grad_norm": 0.15766002237796783,
	"learning_rate": 2.1216e-06,
	"loss": 0.8485,
	"step": 61860
	},
	{
	"epoch": 0.99008,
	"grad_norm": 0.1808682680130005,
	"learning_rate": 2.0576e-06,
	"loss": 0.8397,
	"step": 61880
	},
	{
	"epoch": 0.9904,
	"grad_norm": 0.23974210023880005,
	"learning_rate": 1.9936e-06,
	"loss": 0.8173,
	"step": 61900
	},
	{
	"epoch": 0.99072,
	"grad_norm": 0.1957724243402481,
	"learning_rate": 1.9296e-06,
	"loss": 0.8513,
	"step": 61920
	},
	{
	"epoch": 0.99104,
	"grad_norm": 0.1994854062795639,
	"learning_rate": 1.8656e-06,
	"loss": 0.8988,
	"step": 61940
	},
	{
	"epoch": 0.99136,
	"grad_norm": 0.21945121884346008,
	"learning_rate": 1.8016000000000003e-06,
	"loss": 0.8666,
	"step": 61960
	},
	{
	"epoch": 0.99168,
	"grad_norm": 0.23173430562019348,
	"learning_rate": 1.7375999999999999e-06,
	"loss": 0.8751,
	"step": 61980
	},
	{
	"epoch": 0.992,
	"grad_norm": 0.23692530393600464,
	"learning_rate": 1.6736000000000002e-06,
	"loss": 0.8905,
	"step": 62000
	},
	{
	"epoch": 0.99232,
	"grad_norm": 0.2442493587732315,
	"learning_rate": 1.6096e-06,
	"loss": 0.8955,
	"step": 62020
	},
	{
	"epoch": 0.99264,
	"grad_norm": 0.22609354555606842,
	"learning_rate": 1.5456e-06,
	"loss": 0.8639,
	"step": 62040
	},
	{
	"epoch": 0.99296,
	"grad_norm": 0.19429120421409607,
	"learning_rate": 1.4816e-06,
	"loss": 0.8911,
	"step": 62060
	},
	{
	"epoch": 0.99328,
	"grad_norm": 0.23542903363704681,
	"learning_rate": 1.4176e-06,
	"loss": 0.9274,
	"step": 62080
	},
	{
	"epoch": 0.9936,
	"grad_norm": 0.21632255613803864,
	"learning_rate": 1.3536e-06,
	"loss": 0.9395,
	"step": 62100
	},
	{
	"epoch": 0.99392,
	"grad_norm": 0.21293993294239044,
	"learning_rate": 1.2896000000000001e-06,
	"loss": 0.8639,
	"step": 62120
	},
	{
	"epoch": 0.99424,
	"grad_norm": 0.20467883348464966,
	"learning_rate": 1.2256e-06,
	"loss": 0.837,
	"step": 62140
	},
	{
	"epoch": 0.99456,
	"grad_norm": 0.2215726226568222,
	"learning_rate": 1.1616e-06,
	"loss": 0.9212,
	"step": 62160
	},
	{
	"epoch": 0.99488,
	"grad_norm": 0.2058229297399521,
	"learning_rate": 1.0976e-06,
	"loss": 0.8627,
	"step": 62180
	},
	{
	"epoch": 0.9952,
	"grad_norm": 0.19800324738025665,
	"learning_rate": 1.0336e-06,
	"loss": 0.8536,
	"step": 62200
	},
	{
	"epoch": 0.99552,
	"grad_norm": 0.24008409678936005,
	"learning_rate": 9.696e-07,
	"loss": 0.9235,
	"step": 62220
	},
	{
	"epoch": 0.99584,
	"grad_norm": 0.1996374875307083,
	"learning_rate": 9.056000000000001e-07,
	"loss": 0.8699,
	"step": 62240
	},
	{
	"epoch": 0.99616,
	"grad_norm": 0.20383226871490479,
	"learning_rate": 8.416e-07,
	"loss": 0.9042,
	"step": 62260
	},
	{
	"epoch": 0.99648,
	"grad_norm": 0.23892252147197723,
	"learning_rate": 7.776000000000001e-07,
	"loss": 0.8895,
	"step": 62280
	},
	{
	"epoch": 0.9968,
	"grad_norm": 0.23595763742923737,
	"learning_rate": 7.136e-07,
	"loss": 0.9093,
	"step": 62300
	},
	{
	"epoch": 0.99712,
	"grad_norm": 0.168944850564003,
	"learning_rate": 6.496e-07,
	"loss": 0.8377,
	"step": 62320
	},
	{
	"epoch": 0.99744,
	"grad_norm": 0.22417670488357544,
	"learning_rate": 5.856000000000001e-07,
	"loss": 0.9073,
	"step": 62340
	},
	{
	"epoch": 0.99776,
	"grad_norm": 0.25286805629730225,
	"learning_rate": 5.216e-07,
	"loss": 0.8761,
	"step": 62360
	},
	{
	"epoch": 0.99808,
	"grad_norm": 0.25818583369255066,
	"learning_rate": 4.576e-07,
	"loss": 0.9176,
	"step": 62380
	},
	{
	"epoch": 0.9984,
	"grad_norm": 0.20294295251369476,
	"learning_rate": 3.9360000000000003e-07,
	"loss": 0.9181,
	"step": 62400
	},
	{
	"epoch": 0.99872,
	"grad_norm": 0.2515595555305481,
	"learning_rate": 3.296e-07,
	"loss": 0.8951,
	"step": 62420
	},
	{
	"epoch": 0.99904,
	"grad_norm": 0.21728461980819702,
	"learning_rate": 2.656e-07,
	"loss": 0.8226,
	"step": 62440
	},
	{
	"epoch": 0.99936,
	"grad_norm": 0.2443869262933731,
	"learning_rate": 2.016e-07,
	"loss": 0.8448,
	"step": 62460
	},
	{
	"epoch": 0.99968,
	"grad_norm": 0.2046569287776947,
	"learning_rate": 1.3760000000000001e-07,
	"loss": 0.881,
	"step": 62480
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.21948722004890442,
	"learning_rate": 7.36e-08,
	"loss": 0.9002,
	"step": 62500
	}
	],
	"logging_steps": 20,
	"max_steps": 62500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 600,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1005725769728e+19,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}