Mistral_academic_style_tune / trainer_state.json
Joshua-Sun-CompSci's picture
Upload QLoRA Mistral model
60854d2
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 62500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00032,
"grad_norm": 0.39110425114631653,
"learning_rate": 0.0001999456,
"loss": 2.0348,
"step": 20
},
{
"epoch": 0.00064,
"grad_norm": 0.17977817356586456,
"learning_rate": 0.0001998816,
"loss": 0.9027,
"step": 40
},
{
"epoch": 0.00096,
"grad_norm": 0.1247190609574318,
"learning_rate": 0.00019981760000000002,
"loss": 0.9304,
"step": 60
},
{
"epoch": 0.00128,
"grad_norm": 0.1341821253299713,
"learning_rate": 0.0001997536,
"loss": 0.8894,
"step": 80
},
{
"epoch": 0.0016,
"grad_norm": 0.1190188005566597,
"learning_rate": 0.0001996896,
"loss": 0.924,
"step": 100
},
{
"epoch": 0.00192,
"grad_norm": 0.12215295433998108,
"learning_rate": 0.0001996256,
"loss": 0.896,
"step": 120
},
{
"epoch": 0.00224,
"grad_norm": 0.12413129210472107,
"learning_rate": 0.00019956160000000002,
"loss": 0.9179,
"step": 140
},
{
"epoch": 0.00256,
"grad_norm": 0.119780533015728,
"learning_rate": 0.00019949760000000002,
"loss": 0.9253,
"step": 160
},
{
"epoch": 0.00288,
"grad_norm": 0.12296301126480103,
"learning_rate": 0.00019943360000000001,
"loss": 0.934,
"step": 180
},
{
"epoch": 0.0032,
"grad_norm": 0.10840147733688354,
"learning_rate": 0.0001993696,
"loss": 0.9346,
"step": 200
},
{
"epoch": 0.00352,
"grad_norm": 0.11459454894065857,
"learning_rate": 0.0001993056,
"loss": 0.9282,
"step": 220
},
{
"epoch": 0.00384,
"grad_norm": 0.1370021402835846,
"learning_rate": 0.0001992416,
"loss": 0.8889,
"step": 240
},
{
"epoch": 0.00416,
"grad_norm": 0.12761865556240082,
"learning_rate": 0.0001991776,
"loss": 0.9154,
"step": 260
},
{
"epoch": 0.00448,
"grad_norm": 0.10725900530815125,
"learning_rate": 0.00019911360000000002,
"loss": 0.8488,
"step": 280
},
{
"epoch": 0.0048,
"grad_norm": 0.12192831188440323,
"learning_rate": 0.0001990496,
"loss": 0.9009,
"step": 300
},
{
"epoch": 0.00512,
"grad_norm": 0.1291641741991043,
"learning_rate": 0.0001989856,
"loss": 0.8447,
"step": 320
},
{
"epoch": 0.00544,
"grad_norm": 0.1057133749127388,
"learning_rate": 0.00019892160000000003,
"loss": 0.8813,
"step": 340
},
{
"epoch": 0.00576,
"grad_norm": 0.1342541128396988,
"learning_rate": 0.0001988576,
"loss": 0.8812,
"step": 360
},
{
"epoch": 0.00608,
"grad_norm": 0.11218508332967758,
"learning_rate": 0.0001987936,
"loss": 0.9021,
"step": 380
},
{
"epoch": 0.0064,
"grad_norm": 0.16854852437973022,
"learning_rate": 0.00019872960000000002,
"loss": 0.9124,
"step": 400
},
{
"epoch": 0.00672,
"grad_norm": 0.11709938943386078,
"learning_rate": 0.0001986656,
"loss": 0.8405,
"step": 420
},
{
"epoch": 0.00704,
"grad_norm": 0.12850341200828552,
"learning_rate": 0.0001986016,
"loss": 0.8414,
"step": 440
},
{
"epoch": 0.00736,
"grad_norm": 0.14519518613815308,
"learning_rate": 0.0001985376,
"loss": 0.8812,
"step": 460
},
{
"epoch": 0.00768,
"grad_norm": 0.12263692915439606,
"learning_rate": 0.00019847360000000002,
"loss": 0.8983,
"step": 480
},
{
"epoch": 0.008,
"grad_norm": 0.13009226322174072,
"learning_rate": 0.00019840960000000002,
"loss": 0.9046,
"step": 500
},
{
"epoch": 0.00832,
"grad_norm": 0.11333148181438446,
"learning_rate": 0.00019834560000000001,
"loss": 0.9265,
"step": 520
},
{
"epoch": 0.00864,
"grad_norm": 0.1445179581642151,
"learning_rate": 0.0001982816,
"loss": 0.8138,
"step": 540
},
{
"epoch": 0.00896,
"grad_norm": 0.12200130522251129,
"learning_rate": 0.0001982176,
"loss": 0.9048,
"step": 560
},
{
"epoch": 0.00928,
"grad_norm": 0.13597029447555542,
"learning_rate": 0.0001981536,
"loss": 0.8862,
"step": 580
},
{
"epoch": 0.0096,
"grad_norm": 0.130451962351799,
"learning_rate": 0.00019808960000000002,
"loss": 0.9051,
"step": 600
},
{
"epoch": 0.00992,
"grad_norm": 0.12426720559597015,
"learning_rate": 0.00019802560000000002,
"loss": 0.8529,
"step": 620
},
{
"epoch": 0.01024,
"grad_norm": 0.13389454782009125,
"learning_rate": 0.0001979616,
"loss": 0.8612,
"step": 640
},
{
"epoch": 0.01056,
"grad_norm": 0.14324034750461578,
"learning_rate": 0.0001978976,
"loss": 0.8377,
"step": 660
},
{
"epoch": 0.01088,
"grad_norm": 0.13510741293430328,
"learning_rate": 0.00019783360000000003,
"loss": 0.8786,
"step": 680
},
{
"epoch": 0.0112,
"grad_norm": 0.13480916619300842,
"learning_rate": 0.0001977696,
"loss": 0.8832,
"step": 700
},
{
"epoch": 0.01152,
"grad_norm": 0.14060954749584198,
"learning_rate": 0.0001977056,
"loss": 0.9028,
"step": 720
},
{
"epoch": 0.01184,
"grad_norm": 0.1472562700510025,
"learning_rate": 0.00019764160000000002,
"loss": 0.8943,
"step": 740
},
{
"epoch": 0.01216,
"grad_norm": 0.15105944871902466,
"learning_rate": 0.0001975776,
"loss": 0.8931,
"step": 760
},
{
"epoch": 0.01248,
"grad_norm": 0.14458748698234558,
"learning_rate": 0.0001975136,
"loss": 0.8901,
"step": 780
},
{
"epoch": 0.0128,
"grad_norm": 0.14904917776584625,
"learning_rate": 0.0001974496,
"loss": 0.8967,
"step": 800
},
{
"epoch": 0.01312,
"grad_norm": 0.1423230618238449,
"learning_rate": 0.00019738560000000002,
"loss": 0.891,
"step": 820
},
{
"epoch": 0.01344,
"grad_norm": 0.16175036132335663,
"learning_rate": 0.00019732160000000002,
"loss": 0.9115,
"step": 840
},
{
"epoch": 0.01376,
"grad_norm": 0.1510002315044403,
"learning_rate": 0.00019725760000000001,
"loss": 0.8754,
"step": 860
},
{
"epoch": 0.01408,
"grad_norm": 0.13992249965667725,
"learning_rate": 0.0001971936,
"loss": 0.8817,
"step": 880
},
{
"epoch": 0.0144,
"grad_norm": 0.17271611094474792,
"learning_rate": 0.0001971296,
"loss": 0.8966,
"step": 900
},
{
"epoch": 0.01472,
"grad_norm": 0.13203832507133484,
"learning_rate": 0.0001970656,
"loss": 0.9368,
"step": 920
},
{
"epoch": 0.01504,
"grad_norm": 0.1591019332408905,
"learning_rate": 0.00019700160000000002,
"loss": 0.8906,
"step": 940
},
{
"epoch": 0.01536,
"grad_norm": 0.14198246598243713,
"learning_rate": 0.00019693760000000002,
"loss": 0.8643,
"step": 960
},
{
"epoch": 0.01568,
"grad_norm": 0.17151997983455658,
"learning_rate": 0.0001968736,
"loss": 0.8888,
"step": 980
},
{
"epoch": 0.016,
"grad_norm": 0.14477385580539703,
"learning_rate": 0.0001968096,
"loss": 0.8929,
"step": 1000
},
{
"epoch": 0.01632,
"grad_norm": 0.18318715691566467,
"learning_rate": 0.00019674560000000003,
"loss": 0.8512,
"step": 1020
},
{
"epoch": 0.01664,
"grad_norm": 0.13533398509025574,
"learning_rate": 0.0001966816,
"loss": 0.8791,
"step": 1040
},
{
"epoch": 0.01696,
"grad_norm": 0.15874390304088593,
"learning_rate": 0.0001966176,
"loss": 0.8834,
"step": 1060
},
{
"epoch": 0.01728,
"grad_norm": 0.15447142720222473,
"learning_rate": 0.00019655360000000001,
"loss": 0.9151,
"step": 1080
},
{
"epoch": 0.0176,
"grad_norm": 0.1866873800754547,
"learning_rate": 0.0001964896,
"loss": 0.9385,
"step": 1100
},
{
"epoch": 0.01792,
"grad_norm": 0.14127598702907562,
"learning_rate": 0.0001964256,
"loss": 0.9416,
"step": 1120
},
{
"epoch": 0.01824,
"grad_norm": 0.17219585180282593,
"learning_rate": 0.0001963616,
"loss": 0.9493,
"step": 1140
},
{
"epoch": 0.01856,
"grad_norm": 0.1528492569923401,
"learning_rate": 0.00019629760000000002,
"loss": 0.8794,
"step": 1160
},
{
"epoch": 0.01888,
"grad_norm": 0.1348702758550644,
"learning_rate": 0.00019623360000000002,
"loss": 0.9214,
"step": 1180
},
{
"epoch": 0.0192,
"grad_norm": 0.16409145295619965,
"learning_rate": 0.0001961696,
"loss": 0.9187,
"step": 1200
},
{
"epoch": 0.01952,
"grad_norm": 0.15834647417068481,
"learning_rate": 0.0001961056,
"loss": 0.8428,
"step": 1220
},
{
"epoch": 0.01984,
"grad_norm": 0.18810701370239258,
"learning_rate": 0.0001960416,
"loss": 0.9302,
"step": 1240
},
{
"epoch": 0.02016,
"grad_norm": 0.1573120802640915,
"learning_rate": 0.0001959776,
"loss": 0.8962,
"step": 1260
},
{
"epoch": 0.02048,
"grad_norm": 0.1655122935771942,
"learning_rate": 0.00019591360000000002,
"loss": 0.927,
"step": 1280
},
{
"epoch": 0.0208,
"grad_norm": 0.1711716651916504,
"learning_rate": 0.00019584960000000002,
"loss": 0.8683,
"step": 1300
},
{
"epoch": 0.02112,
"grad_norm": 0.1399732083082199,
"learning_rate": 0.0001957856,
"loss": 0.8753,
"step": 1320
},
{
"epoch": 0.02144,
"grad_norm": 0.19218869507312775,
"learning_rate": 0.0001957216,
"loss": 0.8943,
"step": 1340
},
{
"epoch": 0.02176,
"grad_norm": 0.15489013493061066,
"learning_rate": 0.00019565760000000003,
"loss": 0.8872,
"step": 1360
},
{
"epoch": 0.02208,
"grad_norm": 0.17431455850601196,
"learning_rate": 0.0001955936,
"loss": 0.9016,
"step": 1380
},
{
"epoch": 0.0224,
"grad_norm": 0.13751237094402313,
"learning_rate": 0.0001955296,
"loss": 0.8206,
"step": 1400
},
{
"epoch": 0.02272,
"grad_norm": 0.15201833844184875,
"learning_rate": 0.00019546560000000001,
"loss": 0.824,
"step": 1420
},
{
"epoch": 0.02304,
"grad_norm": 0.1994636058807373,
"learning_rate": 0.0001954016,
"loss": 0.8673,
"step": 1440
},
{
"epoch": 0.02336,
"grad_norm": 0.17202576994895935,
"learning_rate": 0.0001953376,
"loss": 0.834,
"step": 1460
},
{
"epoch": 0.02368,
"grad_norm": 0.19487006962299347,
"learning_rate": 0.0001952736,
"loss": 0.9347,
"step": 1480
},
{
"epoch": 0.024,
"grad_norm": 0.16891010105609894,
"learning_rate": 0.00019520960000000002,
"loss": 0.8873,
"step": 1500
},
{
"epoch": 0.02432,
"grad_norm": 0.18789614737033844,
"learning_rate": 0.00019514560000000002,
"loss": 0.8883,
"step": 1520
},
{
"epoch": 0.02464,
"grad_norm": 0.19695357978343964,
"learning_rate": 0.0001950816,
"loss": 0.9197,
"step": 1540
},
{
"epoch": 0.02496,
"grad_norm": 0.13254858553409576,
"learning_rate": 0.0001950176,
"loss": 0.8877,
"step": 1560
},
{
"epoch": 0.02528,
"grad_norm": 0.1932552009820938,
"learning_rate": 0.0001949536,
"loss": 0.8719,
"step": 1580
},
{
"epoch": 0.0256,
"grad_norm": 0.18450401723384857,
"learning_rate": 0.0001948896,
"loss": 0.8889,
"step": 1600
},
{
"epoch": 0.02592,
"grad_norm": 0.14565275609493256,
"learning_rate": 0.00019482560000000002,
"loss": 0.8882,
"step": 1620
},
{
"epoch": 0.02624,
"grad_norm": 0.17898815870285034,
"learning_rate": 0.00019476160000000002,
"loss": 0.9387,
"step": 1640
},
{
"epoch": 0.02656,
"grad_norm": 0.1418757438659668,
"learning_rate": 0.0001946976,
"loss": 0.835,
"step": 1660
},
{
"epoch": 0.02688,
"grad_norm": 0.1738288700580597,
"learning_rate": 0.0001946336,
"loss": 0.8402,
"step": 1680
},
{
"epoch": 0.0272,
"grad_norm": 0.15658064186573029,
"learning_rate": 0.00019456960000000003,
"loss": 0.8626,
"step": 1700
},
{
"epoch": 0.02752,
"grad_norm": 0.1640857756137848,
"learning_rate": 0.0001945056,
"loss": 0.9208,
"step": 1720
},
{
"epoch": 0.02784,
"grad_norm": 0.18185724318027496,
"learning_rate": 0.0001944416,
"loss": 0.9001,
"step": 1740
},
{
"epoch": 0.02816,
"grad_norm": 0.1771153062582016,
"learning_rate": 0.00019437760000000001,
"loss": 0.9175,
"step": 1760
},
{
"epoch": 0.02848,
"grad_norm": 0.1369091272354126,
"learning_rate": 0.0001943136,
"loss": 0.9019,
"step": 1780
},
{
"epoch": 0.0288,
"grad_norm": 0.18259896337985992,
"learning_rate": 0.0001942496,
"loss": 0.9223,
"step": 1800
},
{
"epoch": 0.02912,
"grad_norm": 0.15459062159061432,
"learning_rate": 0.0001941856,
"loss": 0.9302,
"step": 1820
},
{
"epoch": 0.02944,
"grad_norm": 0.19653448462486267,
"learning_rate": 0.00019412160000000002,
"loss": 0.9116,
"step": 1840
},
{
"epoch": 0.02976,
"grad_norm": 0.18698687851428986,
"learning_rate": 0.00019405760000000002,
"loss": 0.8874,
"step": 1860
},
{
"epoch": 0.03008,
"grad_norm": 0.21010226011276245,
"learning_rate": 0.0001939936,
"loss": 0.8956,
"step": 1880
},
{
"epoch": 0.0304,
"grad_norm": 0.15704917907714844,
"learning_rate": 0.0001939296,
"loss": 0.8816,
"step": 1900
},
{
"epoch": 0.03072,
"grad_norm": 0.16836212575435638,
"learning_rate": 0.0001938656,
"loss": 0.8419,
"step": 1920
},
{
"epoch": 0.03104,
"grad_norm": 0.15333925187587738,
"learning_rate": 0.0001938016,
"loss": 0.9068,
"step": 1940
},
{
"epoch": 0.03136,
"grad_norm": 0.18370755016803741,
"learning_rate": 0.00019373760000000002,
"loss": 0.8932,
"step": 1960
},
{
"epoch": 0.03168,
"grad_norm": 0.16511815786361694,
"learning_rate": 0.00019367360000000002,
"loss": 0.9328,
"step": 1980
},
{
"epoch": 0.032,
"grad_norm": 0.16475580632686615,
"learning_rate": 0.0001936096,
"loss": 0.9234,
"step": 2000
},
{
"epoch": 0.03232,
"grad_norm": 0.17609569430351257,
"learning_rate": 0.0001935456,
"loss": 0.9281,
"step": 2020
},
{
"epoch": 0.03264,
"grad_norm": 0.1759602576494217,
"learning_rate": 0.00019348160000000003,
"loss": 0.8592,
"step": 2040
},
{
"epoch": 0.03296,
"grad_norm": 0.1785658448934555,
"learning_rate": 0.0001934176,
"loss": 0.9156,
"step": 2060
},
{
"epoch": 0.03328,
"grad_norm": 0.20041823387145996,
"learning_rate": 0.0001933536,
"loss": 0.8585,
"step": 2080
},
{
"epoch": 0.0336,
"grad_norm": 0.2025129646062851,
"learning_rate": 0.00019328960000000001,
"loss": 0.883,
"step": 2100
},
{
"epoch": 0.03392,
"grad_norm": 0.1853547841310501,
"learning_rate": 0.0001932256,
"loss": 0.9493,
"step": 2120
},
{
"epoch": 0.03424,
"grad_norm": 0.1714346706867218,
"learning_rate": 0.0001931616,
"loss": 0.9102,
"step": 2140
},
{
"epoch": 0.03456,
"grad_norm": 0.14508432149887085,
"learning_rate": 0.0001930976,
"loss": 0.8636,
"step": 2160
},
{
"epoch": 0.03488,
"grad_norm": 0.15658248960971832,
"learning_rate": 0.00019303360000000002,
"loss": 0.8495,
"step": 2180
},
{
"epoch": 0.0352,
"grad_norm": 0.1980847865343094,
"learning_rate": 0.00019296960000000002,
"loss": 0.8814,
"step": 2200
},
{
"epoch": 0.03552,
"grad_norm": 0.18244528770446777,
"learning_rate": 0.0001929056,
"loss": 0.896,
"step": 2220
},
{
"epoch": 0.03584,
"grad_norm": 0.19880063831806183,
"learning_rate": 0.0001928416,
"loss": 0.8725,
"step": 2240
},
{
"epoch": 0.03616,
"grad_norm": 0.2539379894733429,
"learning_rate": 0.0001927776,
"loss": 0.936,
"step": 2260
},
{
"epoch": 0.03648,
"grad_norm": 0.17734292149543762,
"learning_rate": 0.0001927136,
"loss": 0.8839,
"step": 2280
},
{
"epoch": 0.0368,
"grad_norm": 0.15432968735694885,
"learning_rate": 0.00019264960000000002,
"loss": 0.8977,
"step": 2300
},
{
"epoch": 0.03712,
"grad_norm": 0.17004595696926117,
"learning_rate": 0.00019258560000000001,
"loss": 0.8974,
"step": 2320
},
{
"epoch": 0.03744,
"grad_norm": 0.16686637699604034,
"learning_rate": 0.0001925216,
"loss": 0.8822,
"step": 2340
},
{
"epoch": 0.03776,
"grad_norm": 0.16283023357391357,
"learning_rate": 0.0001924576,
"loss": 0.9273,
"step": 2360
},
{
"epoch": 0.03808,
"grad_norm": 0.1839868277311325,
"learning_rate": 0.00019239360000000003,
"loss": 0.8829,
"step": 2380
},
{
"epoch": 0.0384,
"grad_norm": 0.1701708436012268,
"learning_rate": 0.0001923296,
"loss": 0.9052,
"step": 2400
},
{
"epoch": 0.03872,
"grad_norm": 0.16713082790374756,
"learning_rate": 0.0001922656,
"loss": 0.8653,
"step": 2420
},
{
"epoch": 0.03904,
"grad_norm": 0.16699771583080292,
"learning_rate": 0.0001922016,
"loss": 0.8644,
"step": 2440
},
{
"epoch": 0.03936,
"grad_norm": 0.15876609086990356,
"learning_rate": 0.0001921376,
"loss": 0.8703,
"step": 2460
},
{
"epoch": 0.03968,
"grad_norm": 0.1910441368818283,
"learning_rate": 0.0001920736,
"loss": 0.8637,
"step": 2480
},
{
"epoch": 0.04,
"grad_norm": 0.18075905740261078,
"learning_rate": 0.0001920096,
"loss": 0.8566,
"step": 2500
},
{
"epoch": 0.04032,
"grad_norm": 0.19470706582069397,
"learning_rate": 0.00019194560000000002,
"loss": 0.9013,
"step": 2520
},
{
"epoch": 0.04064,
"grad_norm": 0.19072532653808594,
"learning_rate": 0.00019188160000000002,
"loss": 0.9026,
"step": 2540
},
{
"epoch": 0.04096,
"grad_norm": 0.17622806131839752,
"learning_rate": 0.0001918176,
"loss": 0.8705,
"step": 2560
},
{
"epoch": 0.04128,
"grad_norm": 0.19638915359973907,
"learning_rate": 0.0001917536,
"loss": 0.8606,
"step": 2580
},
{
"epoch": 0.0416,
"grad_norm": 0.18957193195819855,
"learning_rate": 0.0001916896,
"loss": 0.9041,
"step": 2600
},
{
"epoch": 0.04192,
"grad_norm": 0.1762382835149765,
"learning_rate": 0.0001916256,
"loss": 0.8593,
"step": 2620
},
{
"epoch": 0.04224,
"grad_norm": 0.16159483790397644,
"learning_rate": 0.00019156160000000002,
"loss": 0.9049,
"step": 2640
},
{
"epoch": 0.04256,
"grad_norm": 0.19137801229953766,
"learning_rate": 0.00019149760000000001,
"loss": 0.9062,
"step": 2660
},
{
"epoch": 0.04288,
"grad_norm": 0.19132420420646667,
"learning_rate": 0.0001914336,
"loss": 0.8678,
"step": 2680
},
{
"epoch": 0.0432,
"grad_norm": 0.1738004982471466,
"learning_rate": 0.0001913696,
"loss": 0.898,
"step": 2700
},
{
"epoch": 0.04352,
"grad_norm": 0.19048957526683807,
"learning_rate": 0.00019130560000000003,
"loss": 0.8471,
"step": 2720
},
{
"epoch": 0.04384,
"grad_norm": 0.19051052629947662,
"learning_rate": 0.0001912416,
"loss": 0.8878,
"step": 2740
},
{
"epoch": 0.04416,
"grad_norm": 0.18549174070358276,
"learning_rate": 0.0001911776,
"loss": 0.9166,
"step": 2760
},
{
"epoch": 0.04448,
"grad_norm": 0.20678356289863586,
"learning_rate": 0.0001911136,
"loss": 0.8624,
"step": 2780
},
{
"epoch": 0.0448,
"grad_norm": 0.20438261330127716,
"learning_rate": 0.0001910496,
"loss": 0.882,
"step": 2800
},
{
"epoch": 0.04512,
"grad_norm": 0.1805305778980255,
"learning_rate": 0.0001909856,
"loss": 0.8867,
"step": 2820
},
{
"epoch": 0.04544,
"grad_norm": 0.2102346122264862,
"learning_rate": 0.0001909216,
"loss": 0.8605,
"step": 2840
},
{
"epoch": 0.04576,
"grad_norm": 0.17274044454097748,
"learning_rate": 0.00019085760000000002,
"loss": 0.8529,
"step": 2860
},
{
"epoch": 0.04608,
"grad_norm": 0.19794899225234985,
"learning_rate": 0.00019079360000000002,
"loss": 0.8778,
"step": 2880
},
{
"epoch": 0.0464,
"grad_norm": 0.19638848304748535,
"learning_rate": 0.0001907296,
"loss": 0.848,
"step": 2900
},
{
"epoch": 0.04672,
"grad_norm": 0.20513470470905304,
"learning_rate": 0.0001906656,
"loss": 0.8791,
"step": 2920
},
{
"epoch": 0.04704,
"grad_norm": 0.18168902397155762,
"learning_rate": 0.0001906016,
"loss": 0.9258,
"step": 2940
},
{
"epoch": 0.04736,
"grad_norm": 0.1906946301460266,
"learning_rate": 0.0001905376,
"loss": 0.9339,
"step": 2960
},
{
"epoch": 0.04768,
"grad_norm": 0.20983171463012695,
"learning_rate": 0.00019047360000000002,
"loss": 0.9209,
"step": 2980
},
{
"epoch": 0.048,
"grad_norm": 0.18700706958770752,
"learning_rate": 0.00019040960000000001,
"loss": 0.8569,
"step": 3000
},
{
"epoch": 0.04832,
"grad_norm": 0.18951478600502014,
"learning_rate": 0.0001903456,
"loss": 0.9087,
"step": 3020
},
{
"epoch": 0.04864,
"grad_norm": 0.18202978372573853,
"learning_rate": 0.0001902816,
"loss": 0.9239,
"step": 3040
},
{
"epoch": 0.04896,
"grad_norm": 0.21562401950359344,
"learning_rate": 0.00019021760000000003,
"loss": 0.8721,
"step": 3060
},
{
"epoch": 0.04928,
"grad_norm": 0.18537688255310059,
"learning_rate": 0.0001901536,
"loss": 0.8798,
"step": 3080
},
{
"epoch": 0.0496,
"grad_norm": 0.1878584325313568,
"learning_rate": 0.0001900896,
"loss": 0.9315,
"step": 3100
},
{
"epoch": 0.04992,
"grad_norm": 0.1872929185628891,
"learning_rate": 0.0001900256,
"loss": 0.9202,
"step": 3120
},
{
"epoch": 0.05024,
"grad_norm": 0.1833094209432602,
"learning_rate": 0.0001899616,
"loss": 0.8837,
"step": 3140
},
{
"epoch": 0.05056,
"grad_norm": 0.18516699969768524,
"learning_rate": 0.0001898976,
"loss": 0.892,
"step": 3160
},
{
"epoch": 0.05088,
"grad_norm": 0.1559123992919922,
"learning_rate": 0.0001898336,
"loss": 0.8998,
"step": 3180
},
{
"epoch": 0.0512,
"grad_norm": 0.17760765552520752,
"learning_rate": 0.00018976960000000002,
"loss": 0.9178,
"step": 3200
},
{
"epoch": 0.05152,
"grad_norm": 0.1603628695011139,
"learning_rate": 0.00018970560000000002,
"loss": 0.8732,
"step": 3220
},
{
"epoch": 0.05184,
"grad_norm": 0.17330580949783325,
"learning_rate": 0.0001896416,
"loss": 0.9528,
"step": 3240
},
{
"epoch": 0.05216,
"grad_norm": 0.1774517297744751,
"learning_rate": 0.0001895776,
"loss": 0.9112,
"step": 3260
},
{
"epoch": 0.05248,
"grad_norm": 0.19834113121032715,
"learning_rate": 0.0001895136,
"loss": 0.8799,
"step": 3280
},
{
"epoch": 0.0528,
"grad_norm": 0.197114035487175,
"learning_rate": 0.0001894496,
"loss": 0.8898,
"step": 3300
},
{
"epoch": 0.05312,
"grad_norm": 0.21631449460983276,
"learning_rate": 0.00018938560000000002,
"loss": 0.8889,
"step": 3320
},
{
"epoch": 0.05344,
"grad_norm": 0.1554328352212906,
"learning_rate": 0.00018932160000000001,
"loss": 0.9055,
"step": 3340
},
{
"epoch": 0.05376,
"grad_norm": 0.17191193997859955,
"learning_rate": 0.0001892576,
"loss": 0.8713,
"step": 3360
},
{
"epoch": 0.05408,
"grad_norm": 0.18753254413604736,
"learning_rate": 0.0001891936,
"loss": 0.9223,
"step": 3380
},
{
"epoch": 0.0544,
"grad_norm": 0.172084778547287,
"learning_rate": 0.00018912960000000003,
"loss": 0.931,
"step": 3400
},
{
"epoch": 0.05472,
"grad_norm": 0.19548653066158295,
"learning_rate": 0.0001890656,
"loss": 0.8795,
"step": 3420
},
{
"epoch": 0.05504,
"grad_norm": 0.19771696627140045,
"learning_rate": 0.0001890016,
"loss": 0.8904,
"step": 3440
},
{
"epoch": 0.05536,
"grad_norm": 0.18042775988578796,
"learning_rate": 0.0001889376,
"loss": 0.8289,
"step": 3460
},
{
"epoch": 0.05568,
"grad_norm": 0.20334866642951965,
"learning_rate": 0.0001888736,
"loss": 0.8988,
"step": 3480
},
{
"epoch": 0.056,
"grad_norm": 0.2053702026605606,
"learning_rate": 0.0001888096,
"loss": 0.85,
"step": 3500
},
{
"epoch": 0.05632,
"grad_norm": 0.18091996014118195,
"learning_rate": 0.00018874560000000002,
"loss": 0.8816,
"step": 3520
},
{
"epoch": 0.05664,
"grad_norm": 0.1538042575120926,
"learning_rate": 0.00018868160000000002,
"loss": 0.8792,
"step": 3540
},
{
"epoch": 0.05696,
"grad_norm": 0.21067845821380615,
"learning_rate": 0.00018861760000000002,
"loss": 0.9104,
"step": 3560
},
{
"epoch": 0.05728,
"grad_norm": 0.17531852424144745,
"learning_rate": 0.0001885536,
"loss": 0.9073,
"step": 3580
},
{
"epoch": 0.0576,
"grad_norm": 0.16701558232307434,
"learning_rate": 0.0001884896,
"loss": 0.8782,
"step": 3600
},
{
"epoch": 0.05792,
"grad_norm": 0.20766527950763702,
"learning_rate": 0.0001884256,
"loss": 0.8751,
"step": 3620
},
{
"epoch": 0.05824,
"grad_norm": 0.19526097178459167,
"learning_rate": 0.0001883616,
"loss": 0.87,
"step": 3640
},
{
"epoch": 0.05856,
"grad_norm": 0.16312770545482635,
"learning_rate": 0.00018829760000000002,
"loss": 0.8904,
"step": 3660
},
{
"epoch": 0.05888,
"grad_norm": 0.18951712548732758,
"learning_rate": 0.0001882336,
"loss": 0.9183,
"step": 3680
},
{
"epoch": 0.0592,
"grad_norm": 0.1615159958600998,
"learning_rate": 0.0001881696,
"loss": 0.8602,
"step": 3700
},
{
"epoch": 0.05952,
"grad_norm": 0.20840367674827576,
"learning_rate": 0.0001881056,
"loss": 0.9207,
"step": 3720
},
{
"epoch": 0.05984,
"grad_norm": 0.19745437800884247,
"learning_rate": 0.00018804160000000003,
"loss": 0.8962,
"step": 3740
},
{
"epoch": 0.06016,
"grad_norm": 0.1767299473285675,
"learning_rate": 0.0001879776,
"loss": 0.8357,
"step": 3760
},
{
"epoch": 0.06048,
"grad_norm": 0.16729581356048584,
"learning_rate": 0.0001879136,
"loss": 0.8993,
"step": 3780
},
{
"epoch": 0.0608,
"grad_norm": 0.1816299855709076,
"learning_rate": 0.0001878496,
"loss": 0.8775,
"step": 3800
},
{
"epoch": 0.06112,
"grad_norm": 0.17500704526901245,
"learning_rate": 0.0001877856,
"loss": 0.8829,
"step": 3820
},
{
"epoch": 0.06144,
"grad_norm": 0.1851237714290619,
"learning_rate": 0.0001877216,
"loss": 0.8478,
"step": 3840
},
{
"epoch": 0.06176,
"grad_norm": 0.19113439321517944,
"learning_rate": 0.00018765760000000002,
"loss": 0.9024,
"step": 3860
},
{
"epoch": 0.06208,
"grad_norm": 0.1793053150177002,
"learning_rate": 0.00018759360000000002,
"loss": 0.9191,
"step": 3880
},
{
"epoch": 0.0624,
"grad_norm": 0.19696858525276184,
"learning_rate": 0.00018752960000000001,
"loss": 0.9023,
"step": 3900
},
{
"epoch": 0.06272,
"grad_norm": 0.19326741993427277,
"learning_rate": 0.0001874656,
"loss": 0.8434,
"step": 3920
},
{
"epoch": 0.06304,
"grad_norm": 0.1995677947998047,
"learning_rate": 0.0001874016,
"loss": 0.8569,
"step": 3940
},
{
"epoch": 0.06336,
"grad_norm": 0.1579284369945526,
"learning_rate": 0.0001873376,
"loss": 0.8722,
"step": 3960
},
{
"epoch": 0.06368,
"grad_norm": 0.20145860314369202,
"learning_rate": 0.0001872736,
"loss": 0.8586,
"step": 3980
},
{
"epoch": 0.064,
"grad_norm": 0.16962005198001862,
"learning_rate": 0.00018720960000000002,
"loss": 0.8256,
"step": 4000
},
{
"epoch": 0.06432,
"grad_norm": 0.14154337346553802,
"learning_rate": 0.0001871456,
"loss": 0.9169,
"step": 4020
},
{
"epoch": 0.06464,
"grad_norm": 0.18831445276737213,
"learning_rate": 0.0001870816,
"loss": 0.8717,
"step": 4040
},
{
"epoch": 0.06496,
"grad_norm": 0.2613060176372528,
"learning_rate": 0.0001870176,
"loss": 0.8946,
"step": 4060
},
{
"epoch": 0.06528,
"grad_norm": 0.1657022088766098,
"learning_rate": 0.00018695360000000003,
"loss": 0.8721,
"step": 4080
},
{
"epoch": 0.0656,
"grad_norm": 0.17723548412322998,
"learning_rate": 0.0001868896,
"loss": 0.8481,
"step": 4100
},
{
"epoch": 0.06592,
"grad_norm": 0.1840563416481018,
"learning_rate": 0.0001868256,
"loss": 0.8963,
"step": 4120
},
{
"epoch": 0.06624,
"grad_norm": 0.19427619874477386,
"learning_rate": 0.0001867616,
"loss": 0.8473,
"step": 4140
},
{
"epoch": 0.06656,
"grad_norm": 0.20632588863372803,
"learning_rate": 0.0001866976,
"loss": 0.8962,
"step": 4160
},
{
"epoch": 0.06688,
"grad_norm": 0.17780327796936035,
"learning_rate": 0.0001866336,
"loss": 0.9016,
"step": 4180
},
{
"epoch": 0.0672,
"grad_norm": 0.17626479268074036,
"learning_rate": 0.00018656960000000002,
"loss": 0.8949,
"step": 4200
},
{
"epoch": 0.06752,
"grad_norm": 0.19475996494293213,
"learning_rate": 0.00018650560000000002,
"loss": 0.9152,
"step": 4220
},
{
"epoch": 0.06784,
"grad_norm": 0.2053624838590622,
"learning_rate": 0.00018644160000000001,
"loss": 0.9467,
"step": 4240
},
{
"epoch": 0.06816,
"grad_norm": 0.17303887009620667,
"learning_rate": 0.0001863776,
"loss": 0.9104,
"step": 4260
},
{
"epoch": 0.06848,
"grad_norm": 0.19969859719276428,
"learning_rate": 0.0001863136,
"loss": 0.8578,
"step": 4280
},
{
"epoch": 0.0688,
"grad_norm": 0.23917217552661896,
"learning_rate": 0.0001862496,
"loss": 0.8999,
"step": 4300
},
{
"epoch": 0.06912,
"grad_norm": 0.18194426596164703,
"learning_rate": 0.0001861856,
"loss": 0.9014,
"step": 4320
},
{
"epoch": 0.06944,
"grad_norm": 0.21291664242744446,
"learning_rate": 0.00018612160000000002,
"loss": 0.9131,
"step": 4340
},
{
"epoch": 0.06976,
"grad_norm": 0.18465067446231842,
"learning_rate": 0.0001860576,
"loss": 0.8859,
"step": 4360
},
{
"epoch": 0.07008,
"grad_norm": 0.22093325853347778,
"learning_rate": 0.0001859936,
"loss": 0.9038,
"step": 4380
},
{
"epoch": 0.0704,
"grad_norm": 0.1888457089662552,
"learning_rate": 0.0001859296,
"loss": 0.8468,
"step": 4400
},
{
"epoch": 0.07072,
"grad_norm": 0.19705061614513397,
"learning_rate": 0.00018586560000000003,
"loss": 0.8871,
"step": 4420
},
{
"epoch": 0.07104,
"grad_norm": 0.20150603353977203,
"learning_rate": 0.0001858016,
"loss": 0.8391,
"step": 4440
},
{
"epoch": 0.07136,
"grad_norm": 0.21136346459388733,
"learning_rate": 0.0001857376,
"loss": 0.8528,
"step": 4460
},
{
"epoch": 0.07168,
"grad_norm": 0.20985183119773865,
"learning_rate": 0.0001856736,
"loss": 0.9093,
"step": 4480
},
{
"epoch": 0.072,
"grad_norm": 0.1725299060344696,
"learning_rate": 0.0001856096,
"loss": 0.849,
"step": 4500
},
{
"epoch": 0.07232,
"grad_norm": 0.19184072315692902,
"learning_rate": 0.0001855456,
"loss": 0.8414,
"step": 4520
},
{
"epoch": 0.07264,
"grad_norm": 0.1758476197719574,
"learning_rate": 0.00018548160000000002,
"loss": 0.9081,
"step": 4540
},
{
"epoch": 0.07296,
"grad_norm": 0.1840459555387497,
"learning_rate": 0.00018541760000000002,
"loss": 0.9149,
"step": 4560
},
{
"epoch": 0.07328,
"grad_norm": 0.1862034946680069,
"learning_rate": 0.00018535360000000001,
"loss": 0.8879,
"step": 4580
},
{
"epoch": 0.0736,
"grad_norm": 0.21543624997138977,
"learning_rate": 0.0001852896,
"loss": 0.8753,
"step": 4600
},
{
"epoch": 0.07392,
"grad_norm": 0.18351414799690247,
"learning_rate": 0.0001852256,
"loss": 0.8977,
"step": 4620
},
{
"epoch": 0.07424,
"grad_norm": 0.2166828215122223,
"learning_rate": 0.0001851616,
"loss": 0.8669,
"step": 4640
},
{
"epoch": 0.07456,
"grad_norm": 0.19744159281253815,
"learning_rate": 0.0001850976,
"loss": 0.8846,
"step": 4660
},
{
"epoch": 0.07488,
"grad_norm": 0.19065077602863312,
"learning_rate": 0.00018503360000000002,
"loss": 0.8715,
"step": 4680
},
{
"epoch": 0.0752,
"grad_norm": 0.17913594841957092,
"learning_rate": 0.0001849696,
"loss": 0.8777,
"step": 4700
},
{
"epoch": 0.07552,
"grad_norm": 0.2282969057559967,
"learning_rate": 0.0001849056,
"loss": 0.8598,
"step": 4720
},
{
"epoch": 0.07584,
"grad_norm": 0.2031577080488205,
"learning_rate": 0.0001848416,
"loss": 0.928,
"step": 4740
},
{
"epoch": 0.07616,
"grad_norm": 0.24187202751636505,
"learning_rate": 0.00018477760000000002,
"loss": 0.9169,
"step": 4760
},
{
"epoch": 0.07648,
"grad_norm": 0.2227555513381958,
"learning_rate": 0.0001847136,
"loss": 0.914,
"step": 4780
},
{
"epoch": 0.0768,
"grad_norm": 0.2157488912343979,
"learning_rate": 0.0001846496,
"loss": 0.8697,
"step": 4800
},
{
"epoch": 0.07712,
"grad_norm": 0.19421465694904327,
"learning_rate": 0.0001845856,
"loss": 0.9358,
"step": 4820
},
{
"epoch": 0.07744,
"grad_norm": 0.2111523300409317,
"learning_rate": 0.0001845216,
"loss": 0.8708,
"step": 4840
},
{
"epoch": 0.07776,
"grad_norm": 0.23789940774440765,
"learning_rate": 0.0001844576,
"loss": 0.9036,
"step": 4860
},
{
"epoch": 0.07808,
"grad_norm": 0.19063900411128998,
"learning_rate": 0.00018439360000000002,
"loss": 0.8825,
"step": 4880
},
{
"epoch": 0.0784,
"grad_norm": 0.18922486901283264,
"learning_rate": 0.00018432960000000002,
"loss": 0.9094,
"step": 4900
},
{
"epoch": 0.07872,
"grad_norm": 0.19124048948287964,
"learning_rate": 0.0001842656,
"loss": 0.9422,
"step": 4920
},
{
"epoch": 0.07904,
"grad_norm": 0.19916868209838867,
"learning_rate": 0.0001842016,
"loss": 0.9341,
"step": 4940
},
{
"epoch": 0.07936,
"grad_norm": 0.19486361742019653,
"learning_rate": 0.0001841376,
"loss": 0.8836,
"step": 4960
},
{
"epoch": 0.07968,
"grad_norm": 0.20217594504356384,
"learning_rate": 0.0001840736,
"loss": 0.8485,
"step": 4980
},
{
"epoch": 0.08,
"grad_norm": 0.18520930409431458,
"learning_rate": 0.0001840096,
"loss": 0.8887,
"step": 5000
},
{
"epoch": 0.08032,
"grad_norm": 0.1816449910402298,
"learning_rate": 0.00018394560000000002,
"loss": 0.8668,
"step": 5020
},
{
"epoch": 0.08064,
"grad_norm": 0.21598085761070251,
"learning_rate": 0.0001838816,
"loss": 0.947,
"step": 5040
},
{
"epoch": 0.08096,
"grad_norm": 0.21336813271045685,
"learning_rate": 0.0001838176,
"loss": 0.928,
"step": 5060
},
{
"epoch": 0.08128,
"grad_norm": 0.18636910617351532,
"learning_rate": 0.0001837536,
"loss": 0.858,
"step": 5080
},
{
"epoch": 0.0816,
"grad_norm": 0.20049895346164703,
"learning_rate": 0.00018368960000000002,
"loss": 0.8937,
"step": 5100
},
{
"epoch": 0.08192,
"grad_norm": 0.2153417468070984,
"learning_rate": 0.00018362560000000002,
"loss": 0.9052,
"step": 5120
},
{
"epoch": 0.08224,
"grad_norm": 0.2149072140455246,
"learning_rate": 0.0001835616,
"loss": 0.8961,
"step": 5140
},
{
"epoch": 0.08256,
"grad_norm": 0.19339273869991302,
"learning_rate": 0.0001834976,
"loss": 0.9128,
"step": 5160
},
{
"epoch": 0.08288,
"grad_norm": 0.23768258094787598,
"learning_rate": 0.0001834336,
"loss": 0.877,
"step": 5180
},
{
"epoch": 0.0832,
"grad_norm": 0.20677222311496735,
"learning_rate": 0.0001833696,
"loss": 0.8989,
"step": 5200
},
{
"epoch": 0.08352,
"grad_norm": 0.2008122056722641,
"learning_rate": 0.00018330560000000002,
"loss": 0.8911,
"step": 5220
},
{
"epoch": 0.08384,
"grad_norm": 0.1981019526720047,
"learning_rate": 0.00018324160000000002,
"loss": 0.9085,
"step": 5240
},
{
"epoch": 0.08416,
"grad_norm": 0.22739489376544952,
"learning_rate": 0.0001831776,
"loss": 0.8804,
"step": 5260
},
{
"epoch": 0.08448,
"grad_norm": 0.2044532150030136,
"learning_rate": 0.0001831136,
"loss": 0.8438,
"step": 5280
},
{
"epoch": 0.0848,
"grad_norm": 0.23086583614349365,
"learning_rate": 0.0001830496,
"loss": 0.8904,
"step": 5300
},
{
"epoch": 0.08512,
"grad_norm": 0.1737246811389923,
"learning_rate": 0.0001829856,
"loss": 0.8399,
"step": 5320
},
{
"epoch": 0.08544,
"grad_norm": 0.19789084792137146,
"learning_rate": 0.0001829216,
"loss": 0.8928,
"step": 5340
},
{
"epoch": 0.08576,
"grad_norm": 0.19274166226387024,
"learning_rate": 0.00018285760000000002,
"loss": 0.9071,
"step": 5360
},
{
"epoch": 0.08608,
"grad_norm": 0.18289533257484436,
"learning_rate": 0.0001827936,
"loss": 0.885,
"step": 5380
},
{
"epoch": 0.0864,
"grad_norm": 0.20274992287158966,
"learning_rate": 0.0001827296,
"loss": 0.8716,
"step": 5400
},
{
"epoch": 0.08672,
"grad_norm": 0.20618405938148499,
"learning_rate": 0.0001826656,
"loss": 0.9022,
"step": 5420
},
{
"epoch": 0.08704,
"grad_norm": 0.18017026782035828,
"learning_rate": 0.00018260160000000002,
"loss": 0.8997,
"step": 5440
},
{
"epoch": 0.08736,
"grad_norm": 0.17250943183898926,
"learning_rate": 0.00018253760000000002,
"loss": 0.8778,
"step": 5460
},
{
"epoch": 0.08768,
"grad_norm": 0.21039535105228424,
"learning_rate": 0.0001824736,
"loss": 0.8629,
"step": 5480
},
{
"epoch": 0.088,
"grad_norm": 0.1946125328540802,
"learning_rate": 0.0001824096,
"loss": 0.9527,
"step": 5500
},
{
"epoch": 0.08832,
"grad_norm": 0.20565049350261688,
"learning_rate": 0.0001823456,
"loss": 0.8627,
"step": 5520
},
{
"epoch": 0.08864,
"grad_norm": 0.16778771579265594,
"learning_rate": 0.0001822816,
"loss": 0.879,
"step": 5540
},
{
"epoch": 0.08896,
"grad_norm": 0.1957644522190094,
"learning_rate": 0.00018221760000000002,
"loss": 0.9253,
"step": 5560
},
{
"epoch": 0.08928,
"grad_norm": 0.20745377242565155,
"learning_rate": 0.00018215360000000002,
"loss": 0.9006,
"step": 5580
},
{
"epoch": 0.0896,
"grad_norm": 0.19847019016742706,
"learning_rate": 0.0001820896,
"loss": 0.9176,
"step": 5600
},
{
"epoch": 0.08992,
"grad_norm": 0.22231200337409973,
"learning_rate": 0.0001820256,
"loss": 0.9174,
"step": 5620
},
{
"epoch": 0.09024,
"grad_norm": 0.21002036333084106,
"learning_rate": 0.0001819616,
"loss": 0.8773,
"step": 5640
},
{
"epoch": 0.09056,
"grad_norm": 0.18204717338085175,
"learning_rate": 0.0001818976,
"loss": 0.9038,
"step": 5660
},
{
"epoch": 0.09088,
"grad_norm": 0.21081459522247314,
"learning_rate": 0.0001818336,
"loss": 0.8409,
"step": 5680
},
{
"epoch": 0.0912,
"grad_norm": 0.1905379593372345,
"learning_rate": 0.00018176960000000002,
"loss": 0.9125,
"step": 5700
},
{
"epoch": 0.09152,
"grad_norm": 0.17761899530887604,
"learning_rate": 0.0001817056,
"loss": 0.8617,
"step": 5720
},
{
"epoch": 0.09184,
"grad_norm": 0.20881423354148865,
"learning_rate": 0.0001816416,
"loss": 0.8769,
"step": 5740
},
{
"epoch": 0.09216,
"grad_norm": 0.22868691384792328,
"learning_rate": 0.0001815776,
"loss": 0.8426,
"step": 5760
},
{
"epoch": 0.09248,
"grad_norm": 0.2537609040737152,
"learning_rate": 0.00018151360000000002,
"loss": 0.9347,
"step": 5780
},
{
"epoch": 0.0928,
"grad_norm": 0.2280977964401245,
"learning_rate": 0.00018144960000000002,
"loss": 0.89,
"step": 5800
},
{
"epoch": 0.09312,
"grad_norm": 0.22828595340251923,
"learning_rate": 0.0001813856,
"loss": 0.8818,
"step": 5820
},
{
"epoch": 0.09344,
"grad_norm": 0.19653092324733734,
"learning_rate": 0.0001813216,
"loss": 0.8944,
"step": 5840
},
{
"epoch": 0.09376,
"grad_norm": 0.2112797498703003,
"learning_rate": 0.0001812576,
"loss": 0.8945,
"step": 5860
},
{
"epoch": 0.09408,
"grad_norm": 0.21034376323223114,
"learning_rate": 0.0001811936,
"loss": 0.877,
"step": 5880
},
{
"epoch": 0.0944,
"grad_norm": 0.20544138550758362,
"learning_rate": 0.00018112960000000002,
"loss": 0.8955,
"step": 5900
},
{
"epoch": 0.09472,
"grad_norm": 0.18214848637580872,
"learning_rate": 0.00018106560000000002,
"loss": 0.8538,
"step": 5920
},
{
"epoch": 0.09504,
"grad_norm": 0.19273880124092102,
"learning_rate": 0.0001810016,
"loss": 0.9267,
"step": 5940
},
{
"epoch": 0.09536,
"grad_norm": 0.16388094425201416,
"learning_rate": 0.0001809376,
"loss": 0.8903,
"step": 5960
},
{
"epoch": 0.09568,
"grad_norm": 0.19152410328388214,
"learning_rate": 0.0001808736,
"loss": 0.8994,
"step": 5980
},
{
"epoch": 0.096,
"grad_norm": 0.20129649341106415,
"learning_rate": 0.0001808096,
"loss": 0.9065,
"step": 6000
},
{
"epoch": 0.09632,
"grad_norm": 0.2275884598493576,
"learning_rate": 0.0001807456,
"loss": 0.8745,
"step": 6020
},
{
"epoch": 0.09664,
"grad_norm": 0.1939428150653839,
"learning_rate": 0.00018068160000000002,
"loss": 0.9147,
"step": 6040
},
{
"epoch": 0.09696,
"grad_norm": 0.21504884958267212,
"learning_rate": 0.0001806176,
"loss": 0.8575,
"step": 6060
},
{
"epoch": 0.09728,
"grad_norm": 0.21252253651618958,
"learning_rate": 0.0001805536,
"loss": 0.8554,
"step": 6080
},
{
"epoch": 0.0976,
"grad_norm": 0.213465616106987,
"learning_rate": 0.0001804896,
"loss": 0.9016,
"step": 6100
},
{
"epoch": 0.09792,
"grad_norm": 0.19815479218959808,
"learning_rate": 0.00018042560000000002,
"loss": 0.9675,
"step": 6120
},
{
"epoch": 0.09824,
"grad_norm": 0.19477008283138275,
"learning_rate": 0.00018036160000000002,
"loss": 0.9025,
"step": 6140
},
{
"epoch": 0.09856,
"grad_norm": 0.20203906297683716,
"learning_rate": 0.0001802976,
"loss": 0.8952,
"step": 6160
},
{
"epoch": 0.09888,
"grad_norm": 0.2099459171295166,
"learning_rate": 0.0001802336,
"loss": 0.9044,
"step": 6180
},
{
"epoch": 0.0992,
"grad_norm": 0.2077176868915558,
"learning_rate": 0.0001801696,
"loss": 0.8826,
"step": 6200
},
{
"epoch": 0.09952,
"grad_norm": 0.18981848657131195,
"learning_rate": 0.0001801056,
"loss": 0.8455,
"step": 6220
},
{
"epoch": 0.09984,
"grad_norm": 0.20933973789215088,
"learning_rate": 0.00018004160000000002,
"loss": 0.902,
"step": 6240
},
{
"epoch": 0.10016,
"grad_norm": 0.20591773092746735,
"learning_rate": 0.00017997760000000002,
"loss": 0.8667,
"step": 6260
},
{
"epoch": 0.10048,
"grad_norm": 0.258956640958786,
"learning_rate": 0.0001799136,
"loss": 0.8949,
"step": 6280
},
{
"epoch": 0.1008,
"grad_norm": 0.19157810509204865,
"learning_rate": 0.0001798496,
"loss": 0.8713,
"step": 6300
},
{
"epoch": 0.10112,
"grad_norm": 0.21302878856658936,
"learning_rate": 0.0001797856,
"loss": 0.8584,
"step": 6320
},
{
"epoch": 0.10144,
"grad_norm": 0.1915074735879898,
"learning_rate": 0.0001797216,
"loss": 0.9583,
"step": 6340
},
{
"epoch": 0.10176,
"grad_norm": 0.22054611146450043,
"learning_rate": 0.0001796576,
"loss": 0.9125,
"step": 6360
},
{
"epoch": 0.10208,
"grad_norm": 0.22295401990413666,
"learning_rate": 0.00017959360000000001,
"loss": 0.8893,
"step": 6380
},
{
"epoch": 0.1024,
"grad_norm": 0.19963820278644562,
"learning_rate": 0.0001795296,
"loss": 0.8944,
"step": 6400
},
{
"epoch": 0.10272,
"grad_norm": 0.17585329711437225,
"learning_rate": 0.0001794656,
"loss": 0.869,
"step": 6420
},
{
"epoch": 0.10304,
"grad_norm": 0.20457583665847778,
"learning_rate": 0.00017940160000000003,
"loss": 0.8894,
"step": 6440
},
{
"epoch": 0.10336,
"grad_norm": 0.2085409164428711,
"learning_rate": 0.00017933760000000002,
"loss": 0.9218,
"step": 6460
},
{
"epoch": 0.10368,
"grad_norm": 0.14747366309165955,
"learning_rate": 0.00017927360000000002,
"loss": 0.8441,
"step": 6480
},
{
"epoch": 0.104,
"grad_norm": 0.24237246811389923,
"learning_rate": 0.0001792096,
"loss": 0.9292,
"step": 6500
},
{
"epoch": 0.10432,
"grad_norm": 0.2079431265592575,
"learning_rate": 0.0001791456,
"loss": 0.8364,
"step": 6520
},
{
"epoch": 0.10464,
"grad_norm": 0.2067815363407135,
"learning_rate": 0.0001790816,
"loss": 0.9069,
"step": 6540
},
{
"epoch": 0.10496,
"grad_norm": 0.18671968579292297,
"learning_rate": 0.0001790176,
"loss": 0.8582,
"step": 6560
},
{
"epoch": 0.10528,
"grad_norm": 0.18874432146549225,
"learning_rate": 0.00017895360000000002,
"loss": 0.8791,
"step": 6580
},
{
"epoch": 0.1056,
"grad_norm": 0.22563117742538452,
"learning_rate": 0.00017888960000000002,
"loss": 0.8395,
"step": 6600
},
{
"epoch": 0.10592,
"grad_norm": 0.19527731835842133,
"learning_rate": 0.0001788256,
"loss": 0.8675,
"step": 6620
},
{
"epoch": 0.10624,
"grad_norm": 0.21411758661270142,
"learning_rate": 0.0001787616,
"loss": 0.9045,
"step": 6640
},
{
"epoch": 0.10656,
"grad_norm": 0.2257653772830963,
"learning_rate": 0.0001786976,
"loss": 0.9009,
"step": 6660
},
{
"epoch": 0.10688,
"grad_norm": 0.18150146305561066,
"learning_rate": 0.0001786336,
"loss": 0.9246,
"step": 6680
},
{
"epoch": 0.1072,
"grad_norm": 0.1973322033882141,
"learning_rate": 0.0001785696,
"loss": 0.9191,
"step": 6700
},
{
"epoch": 0.10752,
"grad_norm": 0.19496308267116547,
"learning_rate": 0.00017850560000000001,
"loss": 0.8449,
"step": 6720
},
{
"epoch": 0.10784,
"grad_norm": 0.19810955226421356,
"learning_rate": 0.0001784416,
"loss": 0.8846,
"step": 6740
},
{
"epoch": 0.10816,
"grad_norm": 0.24701924622058868,
"learning_rate": 0.0001783776,
"loss": 0.8716,
"step": 6760
},
{
"epoch": 0.10848,
"grad_norm": 0.22664742171764374,
"learning_rate": 0.00017831360000000003,
"loss": 0.884,
"step": 6780
},
{
"epoch": 0.1088,
"grad_norm": 0.228456512093544,
"learning_rate": 0.00017824960000000002,
"loss": 0.8975,
"step": 6800
},
{
"epoch": 0.10912,
"grad_norm": 0.21849101781845093,
"learning_rate": 0.00017818560000000002,
"loss": 0.9255,
"step": 6820
},
{
"epoch": 0.10944,
"grad_norm": 0.2064104974269867,
"learning_rate": 0.0001781216,
"loss": 0.8829,
"step": 6840
},
{
"epoch": 0.10976,
"grad_norm": 0.22377945482730865,
"learning_rate": 0.0001780576,
"loss": 0.8715,
"step": 6860
},
{
"epoch": 0.11008,
"grad_norm": 0.202182337641716,
"learning_rate": 0.0001779936,
"loss": 0.9154,
"step": 6880
},
{
"epoch": 0.1104,
"grad_norm": 0.15783466398715973,
"learning_rate": 0.0001779296,
"loss": 0.9463,
"step": 6900
},
{
"epoch": 0.11072,
"grad_norm": 0.2259039580821991,
"learning_rate": 0.00017786560000000002,
"loss": 0.8754,
"step": 6920
},
{
"epoch": 0.11104,
"grad_norm": 0.23525789380073547,
"learning_rate": 0.00017780160000000002,
"loss": 0.8665,
"step": 6940
},
{
"epoch": 0.11136,
"grad_norm": 0.2006695419549942,
"learning_rate": 0.0001777376,
"loss": 0.8832,
"step": 6960
},
{
"epoch": 0.11168,
"grad_norm": 0.2209470272064209,
"learning_rate": 0.0001776736,
"loss": 0.8867,
"step": 6980
},
{
"epoch": 0.112,
"grad_norm": 0.22054742276668549,
"learning_rate": 0.0001776096,
"loss": 0.8876,
"step": 7000
},
{
"epoch": 0.11232,
"grad_norm": 0.24601756036281586,
"learning_rate": 0.0001775456,
"loss": 0.8667,
"step": 7020
},
{
"epoch": 0.11264,
"grad_norm": 0.20692676305770874,
"learning_rate": 0.0001774816,
"loss": 0.8659,
"step": 7040
},
{
"epoch": 0.11296,
"grad_norm": 0.18839353322982788,
"learning_rate": 0.00017741760000000001,
"loss": 0.8503,
"step": 7060
},
{
"epoch": 0.11328,
"grad_norm": 0.2029074728488922,
"learning_rate": 0.0001773536,
"loss": 0.8643,
"step": 7080
},
{
"epoch": 0.1136,
"grad_norm": 0.22685612738132477,
"learning_rate": 0.0001772896,
"loss": 0.9198,
"step": 7100
},
{
"epoch": 0.11392,
"grad_norm": 0.22184133529663086,
"learning_rate": 0.00017722560000000003,
"loss": 0.8725,
"step": 7120
},
{
"epoch": 0.11424,
"grad_norm": 0.19977827370166779,
"learning_rate": 0.00017716160000000002,
"loss": 0.8217,
"step": 7140
},
{
"epoch": 0.11456,
"grad_norm": 0.22433121502399445,
"learning_rate": 0.00017709760000000002,
"loss": 0.9014,
"step": 7160
},
{
"epoch": 0.11488,
"grad_norm": 0.2040790170431137,
"learning_rate": 0.0001770336,
"loss": 0.9144,
"step": 7180
},
{
"epoch": 0.1152,
"grad_norm": 0.22500857710838318,
"learning_rate": 0.0001769696,
"loss": 0.8332,
"step": 7200
},
{
"epoch": 0.11552,
"grad_norm": 0.2294531762599945,
"learning_rate": 0.0001769056,
"loss": 0.9003,
"step": 7220
},
{
"epoch": 0.11584,
"grad_norm": 0.2060810774564743,
"learning_rate": 0.0001768416,
"loss": 0.9065,
"step": 7240
},
{
"epoch": 0.11616,
"grad_norm": 0.21327152848243713,
"learning_rate": 0.00017677760000000002,
"loss": 0.9172,
"step": 7260
},
{
"epoch": 0.11648,
"grad_norm": 0.2296830266714096,
"learning_rate": 0.00017671360000000002,
"loss": 0.9246,
"step": 7280
},
{
"epoch": 0.1168,
"grad_norm": 0.18748362362384796,
"learning_rate": 0.0001766496,
"loss": 0.8971,
"step": 7300
},
{
"epoch": 0.11712,
"grad_norm": 0.1924070417881012,
"learning_rate": 0.0001765856,
"loss": 0.8685,
"step": 7320
},
{
"epoch": 0.11744,
"grad_norm": 0.2428852766752243,
"learning_rate": 0.0001765216,
"loss": 0.9398,
"step": 7340
},
{
"epoch": 0.11776,
"grad_norm": 0.24050328135490417,
"learning_rate": 0.0001764576,
"loss": 0.8048,
"step": 7360
},
{
"epoch": 0.11808,
"grad_norm": 0.2360570877790451,
"learning_rate": 0.0001763936,
"loss": 0.8465,
"step": 7380
},
{
"epoch": 0.1184,
"grad_norm": 0.21176236867904663,
"learning_rate": 0.0001763296,
"loss": 0.8985,
"step": 7400
},
{
"epoch": 0.11872,
"grad_norm": 0.20678134262561798,
"learning_rate": 0.0001762656,
"loss": 0.8958,
"step": 7420
},
{
"epoch": 0.11904,
"grad_norm": 0.28033092617988586,
"learning_rate": 0.0001762016,
"loss": 0.8752,
"step": 7440
},
{
"epoch": 0.11936,
"grad_norm": 0.1989385336637497,
"learning_rate": 0.00017613760000000003,
"loss": 0.9008,
"step": 7460
},
{
"epoch": 0.11968,
"grad_norm": 0.22315728664398193,
"learning_rate": 0.00017607360000000002,
"loss": 0.8795,
"step": 7480
},
{
"epoch": 0.12,
"grad_norm": 0.2524365186691284,
"learning_rate": 0.00017600960000000002,
"loss": 0.9486,
"step": 7500
},
{
"epoch": 0.12032,
"grad_norm": 0.25160396099090576,
"learning_rate": 0.0001759456,
"loss": 0.9099,
"step": 7520
},
{
"epoch": 0.12064,
"grad_norm": 0.22552479803562164,
"learning_rate": 0.0001758816,
"loss": 0.8352,
"step": 7540
},
{
"epoch": 0.12096,
"grad_norm": 0.17683327198028564,
"learning_rate": 0.0001758176,
"loss": 0.8771,
"step": 7560
},
{
"epoch": 0.12128,
"grad_norm": 0.21366801857948303,
"learning_rate": 0.0001757536,
"loss": 0.9409,
"step": 7580
},
{
"epoch": 0.1216,
"grad_norm": 0.19283446669578552,
"learning_rate": 0.00017568960000000002,
"loss": 0.9305,
"step": 7600
},
{
"epoch": 0.12192,
"grad_norm": 0.22334997355937958,
"learning_rate": 0.00017562560000000001,
"loss": 0.8974,
"step": 7620
},
{
"epoch": 0.12224,
"grad_norm": 0.252670019865036,
"learning_rate": 0.0001755616,
"loss": 0.8787,
"step": 7640
},
{
"epoch": 0.12256,
"grad_norm": 0.2769858241081238,
"learning_rate": 0.0001754976,
"loss": 0.898,
"step": 7660
},
{
"epoch": 0.12288,
"grad_norm": 0.1979377120733261,
"learning_rate": 0.0001754336,
"loss": 0.8994,
"step": 7680
},
{
"epoch": 0.1232,
"grad_norm": 0.2033649981021881,
"learning_rate": 0.0001753696,
"loss": 0.8465,
"step": 7700
},
{
"epoch": 0.12352,
"grad_norm": 0.19611379504203796,
"learning_rate": 0.0001753056,
"loss": 0.9224,
"step": 7720
},
{
"epoch": 0.12384,
"grad_norm": 0.33501213788986206,
"learning_rate": 0.0001752416,
"loss": 0.9225,
"step": 7740
},
{
"epoch": 0.12416,
"grad_norm": 0.17307236790657043,
"learning_rate": 0.0001751776,
"loss": 0.9069,
"step": 7760
},
{
"epoch": 0.12448,
"grad_norm": 0.21077322959899902,
"learning_rate": 0.0001751136,
"loss": 0.9084,
"step": 7780
},
{
"epoch": 0.1248,
"grad_norm": 0.2217060923576355,
"learning_rate": 0.00017504960000000003,
"loss": 0.8567,
"step": 7800
},
{
"epoch": 0.12512,
"grad_norm": 0.2257986068725586,
"learning_rate": 0.00017498560000000002,
"loss": 0.8508,
"step": 7820
},
{
"epoch": 0.12544,
"grad_norm": 0.2513684332370758,
"learning_rate": 0.00017492160000000002,
"loss": 0.8808,
"step": 7840
},
{
"epoch": 0.12576,
"grad_norm": 0.3284933865070343,
"learning_rate": 0.0001748576,
"loss": 0.8912,
"step": 7860
},
{
"epoch": 0.12608,
"grad_norm": 0.20665164291858673,
"learning_rate": 0.0001747936,
"loss": 0.8869,
"step": 7880
},
{
"epoch": 0.1264,
"grad_norm": 0.2463517189025879,
"learning_rate": 0.0001747296,
"loss": 0.9119,
"step": 7900
},
{
"epoch": 0.12672,
"grad_norm": 0.19471873342990875,
"learning_rate": 0.0001746656,
"loss": 0.898,
"step": 7920
},
{
"epoch": 0.12704,
"grad_norm": 0.2780425250530243,
"learning_rate": 0.00017460160000000002,
"loss": 0.9174,
"step": 7940
},
{
"epoch": 0.12736,
"grad_norm": 0.22313277423381805,
"learning_rate": 0.00017453760000000001,
"loss": 0.9054,
"step": 7960
},
{
"epoch": 0.12768,
"grad_norm": 0.22709155082702637,
"learning_rate": 0.0001744736,
"loss": 0.887,
"step": 7980
},
{
"epoch": 0.128,
"grad_norm": 0.22096025943756104,
"learning_rate": 0.0001744096,
"loss": 0.8977,
"step": 8000
},
{
"epoch": 0.12832,
"grad_norm": 0.2423054575920105,
"learning_rate": 0.0001743456,
"loss": 0.9106,
"step": 8020
},
{
"epoch": 0.12864,
"grad_norm": 0.20658574998378754,
"learning_rate": 0.0001742816,
"loss": 0.8476,
"step": 8040
},
{
"epoch": 0.12896,
"grad_norm": 0.22077764570713043,
"learning_rate": 0.0001742176,
"loss": 0.911,
"step": 8060
},
{
"epoch": 0.12928,
"grad_norm": 0.22980265319347382,
"learning_rate": 0.0001741536,
"loss": 0.9451,
"step": 8080
},
{
"epoch": 0.1296,
"grad_norm": 0.25283125042915344,
"learning_rate": 0.0001740896,
"loss": 0.8582,
"step": 8100
},
{
"epoch": 0.12992,
"grad_norm": 0.22836875915527344,
"learning_rate": 0.0001740256,
"loss": 0.8644,
"step": 8120
},
{
"epoch": 0.13024,
"grad_norm": 0.20451593399047852,
"learning_rate": 0.00017396160000000003,
"loss": 0.9361,
"step": 8140
},
{
"epoch": 0.13056,
"grad_norm": 0.20466330647468567,
"learning_rate": 0.00017389760000000002,
"loss": 0.9134,
"step": 8160
},
{
"epoch": 0.13088,
"grad_norm": 0.20562607049942017,
"learning_rate": 0.00017383360000000002,
"loss": 0.9157,
"step": 8180
},
{
"epoch": 0.1312,
"grad_norm": 0.23010079562664032,
"learning_rate": 0.0001737696,
"loss": 0.8571,
"step": 8200
},
{
"epoch": 0.13152,
"grad_norm": 0.2761363387107849,
"learning_rate": 0.0001737056,
"loss": 0.863,
"step": 8220
},
{
"epoch": 0.13184,
"grad_norm": 0.19927144050598145,
"learning_rate": 0.0001736416,
"loss": 0.9056,
"step": 8240
},
{
"epoch": 0.13216,
"grad_norm": 0.21809734404087067,
"learning_rate": 0.0001735776,
"loss": 0.8547,
"step": 8260
},
{
"epoch": 0.13248,
"grad_norm": 0.2040037214756012,
"learning_rate": 0.00017351360000000002,
"loss": 0.8567,
"step": 8280
},
{
"epoch": 0.1328,
"grad_norm": 0.19414140284061432,
"learning_rate": 0.00017344960000000001,
"loss": 0.8773,
"step": 8300
},
{
"epoch": 0.13312,
"grad_norm": 0.17483866214752197,
"learning_rate": 0.0001733856,
"loss": 0.9026,
"step": 8320
},
{
"epoch": 0.13344,
"grad_norm": 0.2505808472633362,
"learning_rate": 0.0001733216,
"loss": 0.8348,
"step": 8340
},
{
"epoch": 0.13376,
"grad_norm": 0.2515566051006317,
"learning_rate": 0.0001732576,
"loss": 0.8657,
"step": 8360
},
{
"epoch": 0.13408,
"grad_norm": 0.2105536013841629,
"learning_rate": 0.0001731936,
"loss": 0.8864,
"step": 8380
},
{
"epoch": 0.1344,
"grad_norm": 0.22910176217556,
"learning_rate": 0.0001731296,
"loss": 0.8379,
"step": 8400
},
{
"epoch": 0.13472,
"grad_norm": 0.20737454295158386,
"learning_rate": 0.0001730656,
"loss": 0.8684,
"step": 8420
},
{
"epoch": 0.13504,
"grad_norm": 0.22466444969177246,
"learning_rate": 0.0001730016,
"loss": 0.9522,
"step": 8440
},
{
"epoch": 0.13536,
"grad_norm": 0.19258467853069305,
"learning_rate": 0.0001729376,
"loss": 0.8525,
"step": 8460
},
{
"epoch": 0.13568,
"grad_norm": 0.2092629224061966,
"learning_rate": 0.00017287360000000002,
"loss": 0.8658,
"step": 8480
},
{
"epoch": 0.136,
"grad_norm": 0.20756912231445312,
"learning_rate": 0.00017280960000000002,
"loss": 0.9148,
"step": 8500
},
{
"epoch": 0.13632,
"grad_norm": 0.22604379057884216,
"learning_rate": 0.00017274560000000002,
"loss": 0.8489,
"step": 8520
},
{
"epoch": 0.13664,
"grad_norm": 0.2140427976846695,
"learning_rate": 0.0001726816,
"loss": 0.8702,
"step": 8540
},
{
"epoch": 0.13696,
"grad_norm": 0.22593297064304352,
"learning_rate": 0.0001726176,
"loss": 0.8572,
"step": 8560
},
{
"epoch": 0.13728,
"grad_norm": 0.2053360491991043,
"learning_rate": 0.0001725536,
"loss": 0.8283,
"step": 8580
},
{
"epoch": 0.1376,
"grad_norm": 0.2059011608362198,
"learning_rate": 0.0001724896,
"loss": 0.9218,
"step": 8600
},
{
"epoch": 0.13792,
"grad_norm": 0.19691585004329681,
"learning_rate": 0.00017242560000000002,
"loss": 0.8718,
"step": 8620
},
{
"epoch": 0.13824,
"grad_norm": 0.2076309472322464,
"learning_rate": 0.00017236480000000002,
"loss": 0.9297,
"step": 8640
},
{
"epoch": 0.13856,
"grad_norm": 0.26082372665405273,
"learning_rate": 0.00017230080000000002,
"loss": 0.8568,
"step": 8660
},
{
"epoch": 0.13888,
"grad_norm": 0.22894443571567535,
"learning_rate": 0.0001722368,
"loss": 0.858,
"step": 8680
},
{
"epoch": 0.1392,
"grad_norm": 0.2583048939704895,
"learning_rate": 0.0001721728,
"loss": 0.9089,
"step": 8700
},
{
"epoch": 0.13952,
"grad_norm": 0.23365485668182373,
"learning_rate": 0.0001721088,
"loss": 0.8283,
"step": 8720
},
{
"epoch": 0.13984,
"grad_norm": 0.23852278292179108,
"learning_rate": 0.0001720448,
"loss": 0.8573,
"step": 8740
},
{
"epoch": 0.14016,
"grad_norm": 0.22304783761501312,
"learning_rate": 0.00017198080000000002,
"loss": 0.8772,
"step": 8760
},
{
"epoch": 0.14048,
"grad_norm": 0.2686362862586975,
"learning_rate": 0.00017191680000000001,
"loss": 0.8857,
"step": 8780
},
{
"epoch": 0.1408,
"grad_norm": 0.17005324363708496,
"learning_rate": 0.0001718528,
"loss": 0.9015,
"step": 8800
},
{
"epoch": 0.14112,
"grad_norm": 0.22986558079719543,
"learning_rate": 0.0001717888,
"loss": 0.9192,
"step": 8820
},
{
"epoch": 0.14144,
"grad_norm": 0.21427962183952332,
"learning_rate": 0.00017172480000000003,
"loss": 0.8947,
"step": 8840
},
{
"epoch": 0.14176,
"grad_norm": 0.262226402759552,
"learning_rate": 0.0001716608,
"loss": 0.8878,
"step": 8860
},
{
"epoch": 0.14208,
"grad_norm": 0.23082557320594788,
"learning_rate": 0.0001715968,
"loss": 0.9263,
"step": 8880
},
{
"epoch": 0.1424,
"grad_norm": 0.2226615846157074,
"learning_rate": 0.0001715328,
"loss": 0.9526,
"step": 8900
},
{
"epoch": 0.14272,
"grad_norm": 0.2389681190252304,
"learning_rate": 0.0001714688,
"loss": 0.8784,
"step": 8920
},
{
"epoch": 0.14304,
"grad_norm": 0.20122146606445312,
"learning_rate": 0.0001714048,
"loss": 0.9487,
"step": 8940
},
{
"epoch": 0.14336,
"grad_norm": 0.24507276713848114,
"learning_rate": 0.00017134080000000002,
"loss": 0.8501,
"step": 8960
},
{
"epoch": 0.14368,
"grad_norm": 0.23927843570709229,
"learning_rate": 0.00017127680000000002,
"loss": 0.8898,
"step": 8980
},
{
"epoch": 0.144,
"grad_norm": 0.22527576982975006,
"learning_rate": 0.00017121280000000002,
"loss": 0.8939,
"step": 9000
},
{
"epoch": 0.14432,
"grad_norm": 0.23542018234729767,
"learning_rate": 0.0001711488,
"loss": 0.9004,
"step": 9020
},
{
"epoch": 0.14464,
"grad_norm": 0.21746650338172913,
"learning_rate": 0.0001710848,
"loss": 0.8618,
"step": 9040
},
{
"epoch": 0.14496,
"grad_norm": 0.2594437003135681,
"learning_rate": 0.0001710208,
"loss": 0.9052,
"step": 9060
},
{
"epoch": 0.14528,
"grad_norm": 0.23847267031669617,
"learning_rate": 0.0001709568,
"loss": 0.8654,
"step": 9080
},
{
"epoch": 0.1456,
"grad_norm": 0.2352636456489563,
"learning_rate": 0.00017089280000000002,
"loss": 0.8685,
"step": 9100
},
{
"epoch": 0.14592,
"grad_norm": 0.21218867599964142,
"learning_rate": 0.0001708288,
"loss": 0.8865,
"step": 9120
},
{
"epoch": 0.14624,
"grad_norm": 0.22339680790901184,
"learning_rate": 0.0001707648,
"loss": 0.9414,
"step": 9140
},
{
"epoch": 0.14656,
"grad_norm": 0.2145155370235443,
"learning_rate": 0.0001707008,
"loss": 0.871,
"step": 9160
},
{
"epoch": 0.14688,
"grad_norm": 0.24632301926612854,
"learning_rate": 0.00017063680000000003,
"loss": 0.9003,
"step": 9180
},
{
"epoch": 0.1472,
"grad_norm": 0.21344535052776337,
"learning_rate": 0.0001705728,
"loss": 0.9132,
"step": 9200
},
{
"epoch": 0.14752,
"grad_norm": 0.2178122103214264,
"learning_rate": 0.0001705088,
"loss": 0.9213,
"step": 9220
},
{
"epoch": 0.14784,
"grad_norm": 0.23042111098766327,
"learning_rate": 0.0001704448,
"loss": 0.9325,
"step": 9240
},
{
"epoch": 0.14816,
"grad_norm": 0.246158629655838,
"learning_rate": 0.0001703808,
"loss": 0.87,
"step": 9260
},
{
"epoch": 0.14848,
"grad_norm": 0.22557534277439117,
"learning_rate": 0.0001703168,
"loss": 0.8192,
"step": 9280
},
{
"epoch": 0.1488,
"grad_norm": 0.20784518122673035,
"learning_rate": 0.00017025280000000002,
"loss": 0.8372,
"step": 9300
},
{
"epoch": 0.14912,
"grad_norm": 0.23057977855205536,
"learning_rate": 0.00017018880000000002,
"loss": 0.9297,
"step": 9320
},
{
"epoch": 0.14944,
"grad_norm": 0.2289903163909912,
"learning_rate": 0.00017012480000000001,
"loss": 0.9167,
"step": 9340
},
{
"epoch": 0.14976,
"grad_norm": 0.22998815774917603,
"learning_rate": 0.0001700608,
"loss": 0.889,
"step": 9360
},
{
"epoch": 0.15008,
"grad_norm": 0.22863976657390594,
"learning_rate": 0.0001699968,
"loss": 0.8884,
"step": 9380
},
{
"epoch": 0.1504,
"grad_norm": 0.24748341739177704,
"learning_rate": 0.0001699328,
"loss": 0.8961,
"step": 9400
},
{
"epoch": 0.15072,
"grad_norm": 0.21250346302986145,
"learning_rate": 0.0001698688,
"loss": 0.8683,
"step": 9420
},
{
"epoch": 0.15104,
"grad_norm": 0.239846333861351,
"learning_rate": 0.00016980480000000002,
"loss": 0.8927,
"step": 9440
},
{
"epoch": 0.15136,
"grad_norm": 0.2487175464630127,
"learning_rate": 0.0001697408,
"loss": 0.9144,
"step": 9460
},
{
"epoch": 0.15168,
"grad_norm": 0.23323270678520203,
"learning_rate": 0.0001696768,
"loss": 0.9251,
"step": 9480
},
{
"epoch": 0.152,
"grad_norm": 0.19210824370384216,
"learning_rate": 0.0001696128,
"loss": 0.9244,
"step": 9500
},
{
"epoch": 0.15232,
"grad_norm": 0.23382435739040375,
"learning_rate": 0.00016954880000000003,
"loss": 0.9249,
"step": 9520
},
{
"epoch": 0.15264,
"grad_norm": 0.20494690537452698,
"learning_rate": 0.0001694848,
"loss": 0.878,
"step": 9540
},
{
"epoch": 0.15296,
"grad_norm": 0.23017622530460358,
"learning_rate": 0.0001694208,
"loss": 0.8656,
"step": 9560
},
{
"epoch": 0.15328,
"grad_norm": 0.26027923822402954,
"learning_rate": 0.0001693568,
"loss": 0.912,
"step": 9580
},
{
"epoch": 0.1536,
"grad_norm": 0.19583414494991302,
"learning_rate": 0.0001692928,
"loss": 0.8411,
"step": 9600
},
{
"epoch": 0.15392,
"grad_norm": 0.25373271107673645,
"learning_rate": 0.0001692288,
"loss": 0.8781,
"step": 9620
},
{
"epoch": 0.15424,
"grad_norm": 0.27190205454826355,
"learning_rate": 0.00016916480000000002,
"loss": 0.8691,
"step": 9640
},
{
"epoch": 0.15456,
"grad_norm": 0.22996129095554352,
"learning_rate": 0.00016910080000000002,
"loss": 0.8277,
"step": 9660
},
{
"epoch": 0.15488,
"grad_norm": 0.1947249174118042,
"learning_rate": 0.00016903680000000001,
"loss": 0.8873,
"step": 9680
},
{
"epoch": 0.1552,
"grad_norm": 0.18230539560317993,
"learning_rate": 0.0001689728,
"loss": 0.8315,
"step": 9700
},
{
"epoch": 0.15552,
"grad_norm": 0.25768032670021057,
"learning_rate": 0.0001689088,
"loss": 0.8645,
"step": 9720
},
{
"epoch": 0.15584,
"grad_norm": 0.2460031509399414,
"learning_rate": 0.0001688448,
"loss": 0.9031,
"step": 9740
},
{
"epoch": 0.15616,
"grad_norm": 0.22613097727298737,
"learning_rate": 0.0001687808,
"loss": 0.9065,
"step": 9760
},
{
"epoch": 0.15648,
"grad_norm": 0.2073383629322052,
"learning_rate": 0.00016871680000000002,
"loss": 0.8825,
"step": 9780
},
{
"epoch": 0.1568,
"grad_norm": 0.2087622731924057,
"learning_rate": 0.0001686528,
"loss": 0.9253,
"step": 9800
},
{
"epoch": 0.15712,
"grad_norm": 0.2113562375307083,
"learning_rate": 0.0001685888,
"loss": 0.8639,
"step": 9820
},
{
"epoch": 0.15744,
"grad_norm": 0.23061156272888184,
"learning_rate": 0.0001685248,
"loss": 0.8818,
"step": 9840
},
{
"epoch": 0.15776,
"grad_norm": 0.2453097254037857,
"learning_rate": 0.00016846080000000003,
"loss": 0.91,
"step": 9860
},
{
"epoch": 0.15808,
"grad_norm": 0.2568601071834564,
"learning_rate": 0.0001683968,
"loss": 0.9147,
"step": 9880
},
{
"epoch": 0.1584,
"grad_norm": 0.238372802734375,
"learning_rate": 0.0001683328,
"loss": 0.8514,
"step": 9900
},
{
"epoch": 0.15872,
"grad_norm": 0.2500544786453247,
"learning_rate": 0.0001682688,
"loss": 0.9219,
"step": 9920
},
{
"epoch": 0.15904,
"grad_norm": 0.22526203095912933,
"learning_rate": 0.0001682048,
"loss": 0.8553,
"step": 9940
},
{
"epoch": 0.15936,
"grad_norm": 0.2296661138534546,
"learning_rate": 0.0001681408,
"loss": 0.8867,
"step": 9960
},
{
"epoch": 0.15968,
"grad_norm": 0.19159358739852905,
"learning_rate": 0.00016807680000000002,
"loss": 0.8231,
"step": 9980
},
{
"epoch": 0.16,
"grad_norm": 0.21099399030208588,
"learning_rate": 0.00016801280000000002,
"loss": 0.9614,
"step": 10000
},
{
"epoch": 0.16032,
"grad_norm": 0.19851434230804443,
"learning_rate": 0.00016794880000000001,
"loss": 0.8711,
"step": 10020
},
{
"epoch": 0.16064,
"grad_norm": 0.255908340215683,
"learning_rate": 0.0001678848,
"loss": 0.8584,
"step": 10040
},
{
"epoch": 0.16096,
"grad_norm": 0.17037171125411987,
"learning_rate": 0.0001678208,
"loss": 0.8858,
"step": 10060
},
{
"epoch": 0.16128,
"grad_norm": 0.18440371751785278,
"learning_rate": 0.0001677568,
"loss": 0.8785,
"step": 10080
},
{
"epoch": 0.1616,
"grad_norm": 0.22271201014518738,
"learning_rate": 0.0001676928,
"loss": 0.8777,
"step": 10100
},
{
"epoch": 0.16192,
"grad_norm": 0.23368695378303528,
"learning_rate": 0.00016762880000000002,
"loss": 0.9383,
"step": 10120
},
{
"epoch": 0.16224,
"grad_norm": 0.2024698108434677,
"learning_rate": 0.0001675648,
"loss": 0.8235,
"step": 10140
},
{
"epoch": 0.16256,
"grad_norm": 0.24644511938095093,
"learning_rate": 0.0001675008,
"loss": 0.9375,
"step": 10160
},
{
"epoch": 0.16288,
"grad_norm": 0.21530281007289886,
"learning_rate": 0.0001674368,
"loss": 0.8697,
"step": 10180
},
{
"epoch": 0.1632,
"grad_norm": 0.2107221782207489,
"learning_rate": 0.00016737280000000002,
"loss": 0.8798,
"step": 10200
},
{
"epoch": 0.16352,
"grad_norm": 0.18811015784740448,
"learning_rate": 0.0001673088,
"loss": 0.9518,
"step": 10220
},
{
"epoch": 0.16384,
"grad_norm": 0.20447804033756256,
"learning_rate": 0.0001672448,
"loss": 0.8528,
"step": 10240
},
{
"epoch": 0.16416,
"grad_norm": 0.22877538204193115,
"learning_rate": 0.0001671808,
"loss": 0.9376,
"step": 10260
},
{
"epoch": 0.16448,
"grad_norm": 0.24324432015419006,
"learning_rate": 0.0001671168,
"loss": 0.8818,
"step": 10280
},
{
"epoch": 0.1648,
"grad_norm": 0.20559096336364746,
"learning_rate": 0.0001670528,
"loss": 0.9382,
"step": 10300
},
{
"epoch": 0.16512,
"grad_norm": 0.23329490423202515,
"learning_rate": 0.00016698880000000002,
"loss": 0.9457,
"step": 10320
},
{
"epoch": 0.16544,
"grad_norm": 0.23040834069252014,
"learning_rate": 0.00016692480000000002,
"loss": 0.8943,
"step": 10340
},
{
"epoch": 0.16576,
"grad_norm": 0.21570099890232086,
"learning_rate": 0.0001668608,
"loss": 0.8714,
"step": 10360
},
{
"epoch": 0.16608,
"grad_norm": 0.20824502408504486,
"learning_rate": 0.0001667968,
"loss": 0.8851,
"step": 10380
},
{
"epoch": 0.1664,
"grad_norm": 0.19650331139564514,
"learning_rate": 0.0001667328,
"loss": 0.8649,
"step": 10400
},
{
"epoch": 0.16672,
"grad_norm": 0.22227755188941956,
"learning_rate": 0.0001666688,
"loss": 0.9556,
"step": 10420
},
{
"epoch": 0.16704,
"grad_norm": 0.21929942071437836,
"learning_rate": 0.0001666048,
"loss": 0.9107,
"step": 10440
},
{
"epoch": 0.16736,
"grad_norm": 0.21728375554084778,
"learning_rate": 0.00016654080000000002,
"loss": 0.9389,
"step": 10460
},
{
"epoch": 0.16768,
"grad_norm": 0.257927805185318,
"learning_rate": 0.0001664768,
"loss": 0.852,
"step": 10480
},
{
"epoch": 0.168,
"grad_norm": 0.23964323103427887,
"learning_rate": 0.0001664128,
"loss": 0.8966,
"step": 10500
},
{
"epoch": 0.16832,
"grad_norm": 0.21869444847106934,
"learning_rate": 0.00016634880000000003,
"loss": 0.9496,
"step": 10520
},
{
"epoch": 0.16864,
"grad_norm": 0.2491443157196045,
"learning_rate": 0.00016628480000000002,
"loss": 0.8853,
"step": 10540
},
{
"epoch": 0.16896,
"grad_norm": 0.19421234726905823,
"learning_rate": 0.0001662208,
"loss": 0.9094,
"step": 10560
},
{
"epoch": 0.16928,
"grad_norm": 0.2546538710594177,
"learning_rate": 0.00016615680000000001,
"loss": 0.9055,
"step": 10580
},
{
"epoch": 0.1696,
"grad_norm": 0.21943865716457367,
"learning_rate": 0.0001660928,
"loss": 0.9036,
"step": 10600
},
{
"epoch": 0.16992,
"grad_norm": 0.26403695344924927,
"learning_rate": 0.0001660288,
"loss": 0.8961,
"step": 10620
},
{
"epoch": 0.17024,
"grad_norm": 0.2386874556541443,
"learning_rate": 0.0001659648,
"loss": 0.8756,
"step": 10640
},
{
"epoch": 0.17056,
"grad_norm": 0.2226932942867279,
"learning_rate": 0.00016590080000000002,
"loss": 0.847,
"step": 10660
},
{
"epoch": 0.17088,
"grad_norm": 0.19772516191005707,
"learning_rate": 0.00016583680000000002,
"loss": 0.8771,
"step": 10680
},
{
"epoch": 0.1712,
"grad_norm": 0.20000356435775757,
"learning_rate": 0.0001657728,
"loss": 0.922,
"step": 10700
},
{
"epoch": 0.17152,
"grad_norm": 0.24227920174598694,
"learning_rate": 0.0001657088,
"loss": 0.8792,
"step": 10720
},
{
"epoch": 0.17184,
"grad_norm": 0.2312862128019333,
"learning_rate": 0.0001656448,
"loss": 0.8606,
"step": 10740
},
{
"epoch": 0.17216,
"grad_norm": 0.229568749666214,
"learning_rate": 0.0001655808,
"loss": 0.8763,
"step": 10760
},
{
"epoch": 0.17248,
"grad_norm": 0.22286683320999146,
"learning_rate": 0.0001655168,
"loss": 0.9215,
"step": 10780
},
{
"epoch": 0.1728,
"grad_norm": 0.21545717120170593,
"learning_rate": 0.00016545280000000002,
"loss": 0.8683,
"step": 10800
},
{
"epoch": 0.17312,
"grad_norm": 0.2119383066892624,
"learning_rate": 0.0001653888,
"loss": 0.9104,
"step": 10820
},
{
"epoch": 0.17344,
"grad_norm": 0.25230464339256287,
"learning_rate": 0.0001653248,
"loss": 0.9178,
"step": 10840
},
{
"epoch": 0.17376,
"grad_norm": 0.20645944774150848,
"learning_rate": 0.00016526080000000003,
"loss": 0.8743,
"step": 10860
},
{
"epoch": 0.17408,
"grad_norm": 0.24283145368099213,
"learning_rate": 0.00016519680000000002,
"loss": 0.917,
"step": 10880
},
{
"epoch": 0.1744,
"grad_norm": 0.24862386286258698,
"learning_rate": 0.0001651328,
"loss": 0.8957,
"step": 10900
},
{
"epoch": 0.17472,
"grad_norm": 0.16515551507472992,
"learning_rate": 0.00016506880000000001,
"loss": 0.9213,
"step": 10920
},
{
"epoch": 0.17504,
"grad_norm": 0.21619679033756256,
"learning_rate": 0.0001650048,
"loss": 0.8658,
"step": 10940
},
{
"epoch": 0.17536,
"grad_norm": 0.19346758723258972,
"learning_rate": 0.0001649408,
"loss": 0.8456,
"step": 10960
},
{
"epoch": 0.17568,
"grad_norm": 0.21540650725364685,
"learning_rate": 0.0001648768,
"loss": 0.9633,
"step": 10980
},
{
"epoch": 0.176,
"grad_norm": 0.21067962050437927,
"learning_rate": 0.00016481280000000002,
"loss": 0.8907,
"step": 11000
},
{
"epoch": 0.17632,
"grad_norm": 0.2155253291130066,
"learning_rate": 0.00016474880000000002,
"loss": 0.8985,
"step": 11020
},
{
"epoch": 0.17664,
"grad_norm": 0.27138301730155945,
"learning_rate": 0.0001646848,
"loss": 0.8525,
"step": 11040
},
{
"epoch": 0.17696,
"grad_norm": 0.20680946111679077,
"learning_rate": 0.0001646208,
"loss": 0.9124,
"step": 11060
},
{
"epoch": 0.17728,
"grad_norm": 0.2446873039007187,
"learning_rate": 0.0001645568,
"loss": 0.8922,
"step": 11080
},
{
"epoch": 0.1776,
"grad_norm": 0.19545750319957733,
"learning_rate": 0.0001644928,
"loss": 0.8919,
"step": 11100
},
{
"epoch": 0.17792,
"grad_norm": 0.20573855936527252,
"learning_rate": 0.0001644288,
"loss": 0.8495,
"step": 11120
},
{
"epoch": 0.17824,
"grad_norm": 0.1951497346162796,
"learning_rate": 0.00016436480000000002,
"loss": 0.8981,
"step": 11140
},
{
"epoch": 0.17856,
"grad_norm": 0.25471144914627075,
"learning_rate": 0.0001643008,
"loss": 0.9582,
"step": 11160
},
{
"epoch": 0.17888,
"grad_norm": 0.22080758213996887,
"learning_rate": 0.0001642368,
"loss": 0.9398,
"step": 11180
},
{
"epoch": 0.1792,
"grad_norm": 0.23357786238193512,
"learning_rate": 0.00016417280000000003,
"loss": 0.8585,
"step": 11200
},
{
"epoch": 0.17952,
"grad_norm": 0.3059156537055969,
"learning_rate": 0.00016410880000000002,
"loss": 0.9087,
"step": 11220
},
{
"epoch": 0.17984,
"grad_norm": 0.21788957715034485,
"learning_rate": 0.0001640448,
"loss": 0.9112,
"step": 11240
},
{
"epoch": 0.18016,
"grad_norm": 0.2401525229215622,
"learning_rate": 0.00016398080000000001,
"loss": 0.9099,
"step": 11260
},
{
"epoch": 0.18048,
"grad_norm": 0.22227467596530914,
"learning_rate": 0.0001639168,
"loss": 0.8272,
"step": 11280
},
{
"epoch": 0.1808,
"grad_norm": 0.21627697348594666,
"learning_rate": 0.0001638528,
"loss": 0.8753,
"step": 11300
},
{
"epoch": 0.18112,
"grad_norm": 0.21134355664253235,
"learning_rate": 0.0001637888,
"loss": 0.9182,
"step": 11320
},
{
"epoch": 0.18144,
"grad_norm": 0.22719112038612366,
"learning_rate": 0.00016372480000000002,
"loss": 0.8454,
"step": 11340
},
{
"epoch": 0.18176,
"grad_norm": 0.22609511017799377,
"learning_rate": 0.00016366080000000002,
"loss": 0.888,
"step": 11360
},
{
"epoch": 0.18208,
"grad_norm": 0.19711975753307343,
"learning_rate": 0.0001635968,
"loss": 0.942,
"step": 11380
},
{
"epoch": 0.1824,
"grad_norm": 0.2588805854320526,
"learning_rate": 0.0001635328,
"loss": 0.9463,
"step": 11400
},
{
"epoch": 0.18272,
"grad_norm": 0.25787708163261414,
"learning_rate": 0.0001634688,
"loss": 0.9114,
"step": 11420
},
{
"epoch": 0.18304,
"grad_norm": 0.2743508219718933,
"learning_rate": 0.0001634048,
"loss": 0.873,
"step": 11440
},
{
"epoch": 0.18336,
"grad_norm": 0.23172695934772491,
"learning_rate": 0.0001633408,
"loss": 0.8495,
"step": 11460
},
{
"epoch": 0.18368,
"grad_norm": 0.18422289192676544,
"learning_rate": 0.00016327680000000002,
"loss": 0.8821,
"step": 11480
},
{
"epoch": 0.184,
"grad_norm": 0.2328750044107437,
"learning_rate": 0.0001632128,
"loss": 0.8885,
"step": 11500
},
{
"epoch": 0.18432,
"grad_norm": 0.26465412974357605,
"learning_rate": 0.0001631488,
"loss": 0.8943,
"step": 11520
},
{
"epoch": 0.18464,
"grad_norm": 0.27734020352363586,
"learning_rate": 0.00016308480000000003,
"loss": 0.855,
"step": 11540
},
{
"epoch": 0.18496,
"grad_norm": 0.24460507929325104,
"learning_rate": 0.00016302080000000002,
"loss": 0.8996,
"step": 11560
},
{
"epoch": 0.18528,
"grad_norm": 0.2152118980884552,
"learning_rate": 0.0001629568,
"loss": 0.839,
"step": 11580
},
{
"epoch": 0.1856,
"grad_norm": 0.22813241183757782,
"learning_rate": 0.00016289280000000001,
"loss": 0.9257,
"step": 11600
},
{
"epoch": 0.18592,
"grad_norm": 0.2076783925294876,
"learning_rate": 0.0001628288,
"loss": 0.88,
"step": 11620
},
{
"epoch": 0.18624,
"grad_norm": 0.23828792572021484,
"learning_rate": 0.0001627648,
"loss": 0.9087,
"step": 11640
},
{
"epoch": 0.18656,
"grad_norm": 0.24277402460575104,
"learning_rate": 0.0001627008,
"loss": 0.9419,
"step": 11660
},
{
"epoch": 0.18688,
"grad_norm": 0.24770581722259521,
"learning_rate": 0.00016263680000000002,
"loss": 0.9184,
"step": 11680
},
{
"epoch": 0.1872,
"grad_norm": 0.23547635972499847,
"learning_rate": 0.00016257280000000002,
"loss": 0.9069,
"step": 11700
},
{
"epoch": 0.18752,
"grad_norm": 0.17838741838932037,
"learning_rate": 0.0001625088,
"loss": 0.9483,
"step": 11720
},
{
"epoch": 0.18784,
"grad_norm": 0.23091432452201843,
"learning_rate": 0.0001624448,
"loss": 0.9225,
"step": 11740
},
{
"epoch": 0.18816,
"grad_norm": 0.2132597118616104,
"learning_rate": 0.0001623808,
"loss": 0.8979,
"step": 11760
},
{
"epoch": 0.18848,
"grad_norm": 0.2296367734670639,
"learning_rate": 0.0001623168,
"loss": 0.8762,
"step": 11780
},
{
"epoch": 0.1888,
"grad_norm": 0.20997250080108643,
"learning_rate": 0.0001622528,
"loss": 0.9156,
"step": 11800
},
{
"epoch": 0.18912,
"grad_norm": 0.2033025026321411,
"learning_rate": 0.00016218880000000001,
"loss": 0.8847,
"step": 11820
},
{
"epoch": 0.18944,
"grad_norm": 0.21794314682483673,
"learning_rate": 0.0001621248,
"loss": 0.8564,
"step": 11840
},
{
"epoch": 0.18976,
"grad_norm": 0.23999591171741486,
"learning_rate": 0.0001620608,
"loss": 0.8581,
"step": 11860
},
{
"epoch": 0.19008,
"grad_norm": 0.2366144210100174,
"learning_rate": 0.00016199680000000003,
"loss": 0.8745,
"step": 11880
},
{
"epoch": 0.1904,
"grad_norm": 0.2415480762720108,
"learning_rate": 0.00016193280000000002,
"loss": 0.9004,
"step": 11900
},
{
"epoch": 0.19072,
"grad_norm": 0.22656038403511047,
"learning_rate": 0.0001618688,
"loss": 0.8871,
"step": 11920
},
{
"epoch": 0.19104,
"grad_norm": 0.2326974719762802,
"learning_rate": 0.0001618048,
"loss": 0.954,
"step": 11940
},
{
"epoch": 0.19136,
"grad_norm": 0.212848499417305,
"learning_rate": 0.0001617408,
"loss": 0.9154,
"step": 11960
},
{
"epoch": 0.19168,
"grad_norm": 0.16706988215446472,
"learning_rate": 0.0001616768,
"loss": 0.9052,
"step": 11980
},
{
"epoch": 0.192,
"grad_norm": 0.2651592791080475,
"learning_rate": 0.0001616128,
"loss": 0.9448,
"step": 12000
},
{
"epoch": 0.19232,
"grad_norm": 0.24427416920661926,
"learning_rate": 0.00016154880000000002,
"loss": 0.8794,
"step": 12020
},
{
"epoch": 0.19264,
"grad_norm": 0.19025467336177826,
"learning_rate": 0.00016148480000000002,
"loss": 0.8535,
"step": 12040
},
{
"epoch": 0.19296,
"grad_norm": 0.21214129030704498,
"learning_rate": 0.0001614208,
"loss": 0.8756,
"step": 12060
},
{
"epoch": 0.19328,
"grad_norm": 0.2451871931552887,
"learning_rate": 0.0001613568,
"loss": 0.8858,
"step": 12080
},
{
"epoch": 0.1936,
"grad_norm": 0.23217494785785675,
"learning_rate": 0.0001612928,
"loss": 0.9066,
"step": 12100
},
{
"epoch": 0.19392,
"grad_norm": 0.2479615956544876,
"learning_rate": 0.0001612288,
"loss": 0.8477,
"step": 12120
},
{
"epoch": 0.19424,
"grad_norm": 0.20965996384620667,
"learning_rate": 0.0001611648,
"loss": 0.8573,
"step": 12140
},
{
"epoch": 0.19456,
"grad_norm": 0.19635817408561707,
"learning_rate": 0.00016110080000000001,
"loss": 0.9182,
"step": 12160
},
{
"epoch": 0.19488,
"grad_norm": 0.2266317903995514,
"learning_rate": 0.0001610368,
"loss": 0.9243,
"step": 12180
},
{
"epoch": 0.1952,
"grad_norm": 0.24232080578804016,
"learning_rate": 0.0001609728,
"loss": 0.8944,
"step": 12200
},
{
"epoch": 0.19552,
"grad_norm": 0.18726186454296112,
"learning_rate": 0.00016090880000000003,
"loss": 0.9084,
"step": 12220
},
{
"epoch": 0.19584,
"grad_norm": 0.25809457898139954,
"learning_rate": 0.00016084480000000002,
"loss": 0.8629,
"step": 12240
},
{
"epoch": 0.19616,
"grad_norm": 0.24405358731746674,
"learning_rate": 0.0001607808,
"loss": 0.9071,
"step": 12260
},
{
"epoch": 0.19648,
"grad_norm": 0.21723495423793793,
"learning_rate": 0.0001607168,
"loss": 0.8814,
"step": 12280
},
{
"epoch": 0.1968,
"grad_norm": 0.23140837252140045,
"learning_rate": 0.0001606528,
"loss": 0.8499,
"step": 12300
},
{
"epoch": 0.19712,
"grad_norm": 0.22470901906490326,
"learning_rate": 0.0001605888,
"loss": 0.9249,
"step": 12320
},
{
"epoch": 0.19744,
"grad_norm": 0.19264104962348938,
"learning_rate": 0.0001605248,
"loss": 0.9057,
"step": 12340
},
{
"epoch": 0.19776,
"grad_norm": 0.23376864194869995,
"learning_rate": 0.00016046080000000002,
"loss": 0.9535,
"step": 12360
},
{
"epoch": 0.19808,
"grad_norm": 0.2225295752286911,
"learning_rate": 0.00016039680000000002,
"loss": 0.865,
"step": 12380
},
{
"epoch": 0.1984,
"grad_norm": 0.23474235832691193,
"learning_rate": 0.0001603328,
"loss": 0.9137,
"step": 12400
},
{
"epoch": 0.19872,
"grad_norm": 0.29955846071243286,
"learning_rate": 0.0001602688,
"loss": 0.8618,
"step": 12420
},
{
"epoch": 0.19904,
"grad_norm": 0.25170376896858215,
"learning_rate": 0.0001602048,
"loss": 0.9341,
"step": 12440
},
{
"epoch": 0.19936,
"grad_norm": 0.23932316899299622,
"learning_rate": 0.0001601408,
"loss": 0.876,
"step": 12460
},
{
"epoch": 0.19968,
"grad_norm": 0.24285189807415009,
"learning_rate": 0.0001600768,
"loss": 0.8858,
"step": 12480
},
{
"epoch": 0.2,
"grad_norm": 0.23304852843284607,
"learning_rate": 0.00016001280000000001,
"loss": 0.8659,
"step": 12500
},
{
"epoch": 0.20032,
"grad_norm": 0.21106384694576263,
"learning_rate": 0.0001599488,
"loss": 0.8626,
"step": 12520
},
{
"epoch": 0.20064,
"grad_norm": 0.20884625613689423,
"learning_rate": 0.0001598848,
"loss": 0.8872,
"step": 12540
},
{
"epoch": 0.20096,
"grad_norm": 0.20588114857673645,
"learning_rate": 0.00015982080000000003,
"loss": 0.8462,
"step": 12560
},
{
"epoch": 0.20128,
"grad_norm": 0.2657853066921234,
"learning_rate": 0.00015975680000000002,
"loss": 0.9246,
"step": 12580
},
{
"epoch": 0.2016,
"grad_norm": 0.22846530377864838,
"learning_rate": 0.0001596928,
"loss": 0.8847,
"step": 12600
},
{
"epoch": 0.20192,
"grad_norm": 0.20565031468868256,
"learning_rate": 0.0001596288,
"loss": 0.8966,
"step": 12620
},
{
"epoch": 0.20224,
"grad_norm": 0.19185014069080353,
"learning_rate": 0.0001595648,
"loss": 0.9018,
"step": 12640
},
{
"epoch": 0.20256,
"grad_norm": 0.23399049043655396,
"learning_rate": 0.0001595008,
"loss": 0.9258,
"step": 12660
},
{
"epoch": 0.20288,
"grad_norm": 0.2446955144405365,
"learning_rate": 0.0001594368,
"loss": 0.8574,
"step": 12680
},
{
"epoch": 0.2032,
"grad_norm": 0.2344285249710083,
"learning_rate": 0.00015937280000000002,
"loss": 0.9078,
"step": 12700
},
{
"epoch": 0.20352,
"grad_norm": 0.2038036733865738,
"learning_rate": 0.00015930880000000002,
"loss": 0.9086,
"step": 12720
},
{
"epoch": 0.20384,
"grad_norm": 0.23228555917739868,
"learning_rate": 0.0001592448,
"loss": 0.931,
"step": 12740
},
{
"epoch": 0.20416,
"grad_norm": 0.2811441719532013,
"learning_rate": 0.0001591808,
"loss": 0.8438,
"step": 12760
},
{
"epoch": 0.20448,
"grad_norm": 0.2014266848564148,
"learning_rate": 0.0001591168,
"loss": 0.9311,
"step": 12780
},
{
"epoch": 0.2048,
"grad_norm": 0.23992010951042175,
"learning_rate": 0.0001590528,
"loss": 0.8965,
"step": 12800
},
{
"epoch": 0.20512,
"grad_norm": 0.25870153307914734,
"learning_rate": 0.0001589888,
"loss": 0.8959,
"step": 12820
},
{
"epoch": 0.20544,
"grad_norm": 0.24375873804092407,
"learning_rate": 0.00015892480000000001,
"loss": 0.8786,
"step": 12840
},
{
"epoch": 0.20576,
"grad_norm": 0.20621752738952637,
"learning_rate": 0.0001588608,
"loss": 0.8796,
"step": 12860
},
{
"epoch": 0.20608,
"grad_norm": 0.23437882959842682,
"learning_rate": 0.0001587968,
"loss": 0.8592,
"step": 12880
},
{
"epoch": 0.2064,
"grad_norm": 0.23581136763095856,
"learning_rate": 0.00015873280000000003,
"loss": 0.8651,
"step": 12900
},
{
"epoch": 0.20672,
"grad_norm": 0.24483484029769897,
"learning_rate": 0.00015866880000000002,
"loss": 0.9199,
"step": 12920
},
{
"epoch": 0.20704,
"grad_norm": 0.3012985289096832,
"learning_rate": 0.0001586048,
"loss": 0.859,
"step": 12940
},
{
"epoch": 0.20736,
"grad_norm": 0.26789209246635437,
"learning_rate": 0.0001585408,
"loss": 0.8816,
"step": 12960
},
{
"epoch": 0.20768,
"grad_norm": 0.21916130185127258,
"learning_rate": 0.0001584768,
"loss": 0.9345,
"step": 12980
},
{
"epoch": 0.208,
"grad_norm": 0.8556731343269348,
"learning_rate": 0.0001584128,
"loss": 0.9199,
"step": 13000
},
{
"epoch": 0.20832,
"grad_norm": 0.22015364468097687,
"learning_rate": 0.0001583488,
"loss": 0.8742,
"step": 13020
},
{
"epoch": 0.20864,
"grad_norm": 0.2598000168800354,
"learning_rate": 0.00015828480000000002,
"loss": 0.8665,
"step": 13040
},
{
"epoch": 0.20896,
"grad_norm": 0.22221586108207703,
"learning_rate": 0.00015822080000000001,
"loss": 0.892,
"step": 13060
},
{
"epoch": 0.20928,
"grad_norm": 0.2682360112667084,
"learning_rate": 0.0001581568,
"loss": 0.91,
"step": 13080
},
{
"epoch": 0.2096,
"grad_norm": 0.24058601260185242,
"learning_rate": 0.0001580928,
"loss": 0.8826,
"step": 13100
},
{
"epoch": 0.20992,
"grad_norm": 0.25506773591041565,
"learning_rate": 0.0001580288,
"loss": 0.8979,
"step": 13120
},
{
"epoch": 0.21024,
"grad_norm": 0.2581631541252136,
"learning_rate": 0.0001579648,
"loss": 0.9204,
"step": 13140
},
{
"epoch": 0.21056,
"grad_norm": 0.2511695623397827,
"learning_rate": 0.0001579008,
"loss": 0.8796,
"step": 13160
},
{
"epoch": 0.21088,
"grad_norm": 0.20950326323509216,
"learning_rate": 0.0001578368,
"loss": 0.8267,
"step": 13180
},
{
"epoch": 0.2112,
"grad_norm": 0.2644106149673462,
"learning_rate": 0.0001577728,
"loss": 0.9124,
"step": 13200
},
{
"epoch": 0.21152,
"grad_norm": 0.1935633271932602,
"learning_rate": 0.0001577088,
"loss": 0.8468,
"step": 13220
},
{
"epoch": 0.21184,
"grad_norm": 0.2543448507785797,
"learning_rate": 0.00015764480000000003,
"loss": 0.8965,
"step": 13240
},
{
"epoch": 0.21216,
"grad_norm": 0.27806851267814636,
"learning_rate": 0.00015758080000000002,
"loss": 0.894,
"step": 13260
},
{
"epoch": 0.21248,
"grad_norm": 0.18095877766609192,
"learning_rate": 0.0001575168,
"loss": 0.8876,
"step": 13280
},
{
"epoch": 0.2128,
"grad_norm": 0.21904884278774261,
"learning_rate": 0.0001574528,
"loss": 0.8835,
"step": 13300
},
{
"epoch": 0.21312,
"grad_norm": 0.25367972254753113,
"learning_rate": 0.0001573888,
"loss": 0.8688,
"step": 13320
},
{
"epoch": 0.21344,
"grad_norm": 0.261203408241272,
"learning_rate": 0.0001573248,
"loss": 0.9173,
"step": 13340
},
{
"epoch": 0.21376,
"grad_norm": 0.25779855251312256,
"learning_rate": 0.0001572608,
"loss": 0.9127,
"step": 13360
},
{
"epoch": 0.21408,
"grad_norm": 0.20082098245620728,
"learning_rate": 0.00015719680000000002,
"loss": 0.9433,
"step": 13380
},
{
"epoch": 0.2144,
"grad_norm": 0.22630241513252258,
"learning_rate": 0.00015713280000000001,
"loss": 0.9027,
"step": 13400
},
{
"epoch": 0.21472,
"grad_norm": 0.2328576296567917,
"learning_rate": 0.0001570688,
"loss": 0.8736,
"step": 13420
},
{
"epoch": 0.21504,
"grad_norm": 0.24743099510669708,
"learning_rate": 0.00015700480000000003,
"loss": 0.9568,
"step": 13440
},
{
"epoch": 0.21536,
"grad_norm": 0.23386693000793457,
"learning_rate": 0.0001569408,
"loss": 0.9131,
"step": 13460
},
{
"epoch": 0.21568,
"grad_norm": 0.2177802473306656,
"learning_rate": 0.0001568768,
"loss": 0.8948,
"step": 13480
},
{
"epoch": 0.216,
"grad_norm": 0.19793163239955902,
"learning_rate": 0.00015681280000000002,
"loss": 0.9037,
"step": 13500
},
{
"epoch": 0.21632,
"grad_norm": 0.6092952489852905,
"learning_rate": 0.0001567488,
"loss": 0.8912,
"step": 13520
},
{
"epoch": 0.21664,
"grad_norm": 0.21942676603794098,
"learning_rate": 0.0001566848,
"loss": 0.8476,
"step": 13540
},
{
"epoch": 0.21696,
"grad_norm": 0.2475002408027649,
"learning_rate": 0.000156624,
"loss": 0.8796,
"step": 13560
},
{
"epoch": 0.21728,
"grad_norm": 0.25338417291641235,
"learning_rate": 0.00015656,
"loss": 0.9155,
"step": 13580
},
{
"epoch": 0.2176,
"grad_norm": 0.22608576714992523,
"learning_rate": 0.000156496,
"loss": 0.8685,
"step": 13600
},
{
"epoch": 0.21792,
"grad_norm": 0.20519301295280457,
"learning_rate": 0.000156432,
"loss": 0.8913,
"step": 13620
},
{
"epoch": 0.21824,
"grad_norm": 0.20905616879463196,
"learning_rate": 0.000156368,
"loss": 0.9382,
"step": 13640
},
{
"epoch": 0.21856,
"grad_norm": 0.21286025643348694,
"learning_rate": 0.000156304,
"loss": 0.8659,
"step": 13660
},
{
"epoch": 0.21888,
"grad_norm": 0.23173551261425018,
"learning_rate": 0.00015624,
"loss": 0.845,
"step": 13680
},
{
"epoch": 0.2192,
"grad_norm": 0.2360743284225464,
"learning_rate": 0.000156176,
"loss": 0.9332,
"step": 13700
},
{
"epoch": 0.21952,
"grad_norm": 0.23367565870285034,
"learning_rate": 0.00015611200000000003,
"loss": 0.8805,
"step": 13720
},
{
"epoch": 0.21984,
"grad_norm": 0.2483336627483368,
"learning_rate": 0.00015604800000000002,
"loss": 0.9189,
"step": 13740
},
{
"epoch": 0.22016,
"grad_norm": 0.23518161475658417,
"learning_rate": 0.000155984,
"loss": 0.8927,
"step": 13760
},
{
"epoch": 0.22048,
"grad_norm": 0.2596130073070526,
"learning_rate": 0.00015592,
"loss": 0.8879,
"step": 13780
},
{
"epoch": 0.2208,
"grad_norm": 0.20567701756954193,
"learning_rate": 0.000155856,
"loss": 0.8677,
"step": 13800
},
{
"epoch": 0.22112,
"grad_norm": 0.21333087980747223,
"learning_rate": 0.000155792,
"loss": 0.8599,
"step": 13820
},
{
"epoch": 0.22144,
"grad_norm": 0.21102353930473328,
"learning_rate": 0.000155728,
"loss": 0.955,
"step": 13840
},
{
"epoch": 0.22176,
"grad_norm": 0.23368091881275177,
"learning_rate": 0.00015566400000000002,
"loss": 0.9107,
"step": 13860
},
{
"epoch": 0.22208,
"grad_norm": 0.2646392285823822,
"learning_rate": 0.00015560000000000001,
"loss": 0.8605,
"step": 13880
},
{
"epoch": 0.2224,
"grad_norm": 0.2340191900730133,
"learning_rate": 0.000155536,
"loss": 0.8651,
"step": 13900
},
{
"epoch": 0.22272,
"grad_norm": 0.22169966995716095,
"learning_rate": 0.000155472,
"loss": 0.8232,
"step": 13920
},
{
"epoch": 0.22304,
"grad_norm": 0.2382878214120865,
"learning_rate": 0.000155408,
"loss": 0.8581,
"step": 13940
},
{
"epoch": 0.22336,
"grad_norm": 0.22548457980155945,
"learning_rate": 0.000155344,
"loss": 0.9245,
"step": 13960
},
{
"epoch": 0.22368,
"grad_norm": 0.2386041283607483,
"learning_rate": 0.00015528,
"loss": 0.9159,
"step": 13980
},
{
"epoch": 0.224,
"grad_norm": 0.2749132812023163,
"learning_rate": 0.000155216,
"loss": 0.9209,
"step": 14000
},
{
"epoch": 0.22432,
"grad_norm": 0.21053732931613922,
"learning_rate": 0.000155152,
"loss": 0.8694,
"step": 14020
},
{
"epoch": 0.22464,
"grad_norm": 0.21479672193527222,
"learning_rate": 0.000155088,
"loss": 0.8775,
"step": 14040
},
{
"epoch": 0.22496,
"grad_norm": 0.21168935298919678,
"learning_rate": 0.00015502400000000003,
"loss": 0.886,
"step": 14060
},
{
"epoch": 0.22528,
"grad_norm": 0.23790377378463745,
"learning_rate": 0.00015496000000000002,
"loss": 0.9171,
"step": 14080
},
{
"epoch": 0.2256,
"grad_norm": 0.2546534240245819,
"learning_rate": 0.000154896,
"loss": 0.9024,
"step": 14100
},
{
"epoch": 0.22592,
"grad_norm": 0.21047984063625336,
"learning_rate": 0.000154832,
"loss": 0.9181,
"step": 14120
},
{
"epoch": 0.22624,
"grad_norm": 0.18703001737594604,
"learning_rate": 0.000154768,
"loss": 0.9229,
"step": 14140
},
{
"epoch": 0.22656,
"grad_norm": 0.2910281717777252,
"learning_rate": 0.000154704,
"loss": 0.8769,
"step": 14160
},
{
"epoch": 0.22688,
"grad_norm": 0.253282368183136,
"learning_rate": 0.00015464,
"loss": 0.8899,
"step": 14180
},
{
"epoch": 0.2272,
"grad_norm": 0.23244041204452515,
"learning_rate": 0.00015457600000000002,
"loss": 0.8847,
"step": 14200
},
{
"epoch": 0.22752,
"grad_norm": 0.2044428586959839,
"learning_rate": 0.00015451200000000001,
"loss": 0.8558,
"step": 14220
},
{
"epoch": 0.22784,
"grad_norm": 0.2259109914302826,
"learning_rate": 0.000154448,
"loss": 0.9359,
"step": 14240
},
{
"epoch": 0.22816,
"grad_norm": 0.19026106595993042,
"learning_rate": 0.000154384,
"loss": 0.888,
"step": 14260
},
{
"epoch": 0.22848,
"grad_norm": 0.26393407583236694,
"learning_rate": 0.00015432,
"loss": 0.9065,
"step": 14280
},
{
"epoch": 0.2288,
"grad_norm": 0.23802846670150757,
"learning_rate": 0.000154256,
"loss": 0.859,
"step": 14300
},
{
"epoch": 0.22912,
"grad_norm": 0.20962855219841003,
"learning_rate": 0.000154192,
"loss": 0.9316,
"step": 14320
},
{
"epoch": 0.22944,
"grad_norm": 0.24111364781856537,
"learning_rate": 0.000154128,
"loss": 0.8761,
"step": 14340
},
{
"epoch": 0.22976,
"grad_norm": 0.24475687742233276,
"learning_rate": 0.000154064,
"loss": 0.8639,
"step": 14360
},
{
"epoch": 0.23008,
"grad_norm": 0.2179078459739685,
"learning_rate": 0.000154,
"loss": 0.9153,
"step": 14380
},
{
"epoch": 0.2304,
"grad_norm": 0.21389590203762054,
"learning_rate": 0.00015393600000000003,
"loss": 0.8965,
"step": 14400
},
{
"epoch": 0.23072,
"grad_norm": 0.25422388315200806,
"learning_rate": 0.00015387200000000002,
"loss": 0.9019,
"step": 14420
},
{
"epoch": 0.23104,
"grad_norm": 0.25789642333984375,
"learning_rate": 0.000153808,
"loss": 0.8862,
"step": 14440
},
{
"epoch": 0.23136,
"grad_norm": 0.24445413053035736,
"learning_rate": 0.000153744,
"loss": 0.8686,
"step": 14460
},
{
"epoch": 0.23168,
"grad_norm": 0.2562089264392853,
"learning_rate": 0.00015368,
"loss": 0.8724,
"step": 14480
},
{
"epoch": 0.232,
"grad_norm": 0.22422178089618683,
"learning_rate": 0.000153616,
"loss": 0.8126,
"step": 14500
},
{
"epoch": 0.23232,
"grad_norm": 0.2669355571269989,
"learning_rate": 0.000153552,
"loss": 0.9321,
"step": 14520
},
{
"epoch": 0.23264,
"grad_norm": 0.22260543704032898,
"learning_rate": 0.00015348800000000002,
"loss": 0.9001,
"step": 14540
},
{
"epoch": 0.23296,
"grad_norm": 0.2247844934463501,
"learning_rate": 0.00015342400000000001,
"loss": 0.8703,
"step": 14560
},
{
"epoch": 0.23328,
"grad_norm": 0.21349264681339264,
"learning_rate": 0.00015336,
"loss": 0.9012,
"step": 14580
},
{
"epoch": 0.2336,
"grad_norm": 0.20764821767807007,
"learning_rate": 0.000153296,
"loss": 0.8589,
"step": 14600
},
{
"epoch": 0.23392,
"grad_norm": 0.2439945936203003,
"learning_rate": 0.000153232,
"loss": 0.8685,
"step": 14620
},
{
"epoch": 0.23424,
"grad_norm": 0.189644455909729,
"learning_rate": 0.000153168,
"loss": 0.8888,
"step": 14640
},
{
"epoch": 0.23456,
"grad_norm": 0.2418312132358551,
"learning_rate": 0.000153104,
"loss": 0.8894,
"step": 14660
},
{
"epoch": 0.23488,
"grad_norm": 0.2261509746313095,
"learning_rate": 0.00015304,
"loss": 0.8834,
"step": 14680
},
{
"epoch": 0.2352,
"grad_norm": 0.20159967243671417,
"learning_rate": 0.000152976,
"loss": 0.8805,
"step": 14700
},
{
"epoch": 0.23552,
"grad_norm": 0.20319266617298126,
"learning_rate": 0.000152912,
"loss": 0.9304,
"step": 14720
},
{
"epoch": 0.23584,
"grad_norm": 0.26556146144866943,
"learning_rate": 0.00015284800000000002,
"loss": 0.8703,
"step": 14740
},
{
"epoch": 0.23616,
"grad_norm": 0.2388124316930771,
"learning_rate": 0.00015278400000000002,
"loss": 0.9027,
"step": 14760
},
{
"epoch": 0.23648,
"grad_norm": 0.2560880184173584,
"learning_rate": 0.00015272,
"loss": 0.9153,
"step": 14780
},
{
"epoch": 0.2368,
"grad_norm": 0.2266043722629547,
"learning_rate": 0.000152656,
"loss": 0.915,
"step": 14800
},
{
"epoch": 0.23712,
"grad_norm": 0.21880818903446198,
"learning_rate": 0.000152592,
"loss": 0.8509,
"step": 14820
},
{
"epoch": 0.23744,
"grad_norm": 0.2733529806137085,
"learning_rate": 0.000152528,
"loss": 0.8412,
"step": 14840
},
{
"epoch": 0.23776,
"grad_norm": 0.2371928095817566,
"learning_rate": 0.000152464,
"loss": 0.9024,
"step": 14860
},
{
"epoch": 0.23808,
"grad_norm": 0.21131671965122223,
"learning_rate": 0.00015240000000000002,
"loss": 0.9014,
"step": 14880
},
{
"epoch": 0.2384,
"grad_norm": 0.22599981725215912,
"learning_rate": 0.000152336,
"loss": 0.8418,
"step": 14900
},
{
"epoch": 0.23872,
"grad_norm": 0.210512176156044,
"learning_rate": 0.000152272,
"loss": 0.8215,
"step": 14920
},
{
"epoch": 0.23904,
"grad_norm": 0.24387352168560028,
"learning_rate": 0.00015220800000000003,
"loss": 0.9528,
"step": 14940
},
{
"epoch": 0.23936,
"grad_norm": 0.23596692085266113,
"learning_rate": 0.000152144,
"loss": 0.9321,
"step": 14960
},
{
"epoch": 0.23968,
"grad_norm": 0.2662867307662964,
"learning_rate": 0.00015208,
"loss": 0.8687,
"step": 14980
},
{
"epoch": 0.24,
"grad_norm": 0.27276721596717834,
"learning_rate": 0.00015201600000000002,
"loss": 0.8885,
"step": 15000
},
{
"epoch": 0.24032,
"grad_norm": 0.2904922366142273,
"learning_rate": 0.000151952,
"loss": 0.905,
"step": 15020
},
{
"epoch": 0.24064,
"grad_norm": 0.22744856774806976,
"learning_rate": 0.000151888,
"loss": 0.875,
"step": 15040
},
{
"epoch": 0.24096,
"grad_norm": 0.21145053207874298,
"learning_rate": 0.000151824,
"loss": 0.9173,
"step": 15060
},
{
"epoch": 0.24128,
"grad_norm": 0.2397310584783554,
"learning_rate": 0.00015176000000000002,
"loss": 0.9416,
"step": 15080
},
{
"epoch": 0.2416,
"grad_norm": 0.255487322807312,
"learning_rate": 0.00015169600000000002,
"loss": 0.9074,
"step": 15100
},
{
"epoch": 0.24192,
"grad_norm": 0.20825912058353424,
"learning_rate": 0.000151632,
"loss": 0.9081,
"step": 15120
},
{
"epoch": 0.24224,
"grad_norm": 0.21789099276065826,
"learning_rate": 0.000151568,
"loss": 0.8599,
"step": 15140
},
{
"epoch": 0.24256,
"grad_norm": 0.26202690601348877,
"learning_rate": 0.000151504,
"loss": 0.9323,
"step": 15160
},
{
"epoch": 0.24288,
"grad_norm": 0.24351023137569427,
"learning_rate": 0.00015144,
"loss": 0.8613,
"step": 15180
},
{
"epoch": 0.2432,
"grad_norm": 0.24698816239833832,
"learning_rate": 0.000151376,
"loss": 0.9229,
"step": 15200
},
{
"epoch": 0.24352,
"grad_norm": 0.28698813915252686,
"learning_rate": 0.00015131200000000002,
"loss": 0.902,
"step": 15220
},
{
"epoch": 0.24384,
"grad_norm": 0.2190207839012146,
"learning_rate": 0.000151248,
"loss": 0.8347,
"step": 15240
},
{
"epoch": 0.24416,
"grad_norm": 0.25162091851234436,
"learning_rate": 0.000151184,
"loss": 0.9738,
"step": 15260
},
{
"epoch": 0.24448,
"grad_norm": 0.22512441873550415,
"learning_rate": 0.00015112000000000003,
"loss": 0.8972,
"step": 15280
},
{
"epoch": 0.2448,
"grad_norm": 0.2120593637228012,
"learning_rate": 0.000151056,
"loss": 0.9371,
"step": 15300
},
{
"epoch": 0.24512,
"grad_norm": 0.26042282581329346,
"learning_rate": 0.000150992,
"loss": 0.9792,
"step": 15320
},
{
"epoch": 0.24544,
"grad_norm": 0.23215824365615845,
"learning_rate": 0.00015092800000000002,
"loss": 0.8731,
"step": 15340
},
{
"epoch": 0.24576,
"grad_norm": 0.23564760386943817,
"learning_rate": 0.000150864,
"loss": 0.8924,
"step": 15360
},
{
"epoch": 0.24608,
"grad_norm": 0.234059140086174,
"learning_rate": 0.0001508,
"loss": 0.8987,
"step": 15380
},
{
"epoch": 0.2464,
"grad_norm": 0.24413174390792847,
"learning_rate": 0.000150736,
"loss": 0.8814,
"step": 15400
},
{
"epoch": 0.24672,
"grad_norm": 0.19461284577846527,
"learning_rate": 0.00015067200000000002,
"loss": 0.8885,
"step": 15420
},
{
"epoch": 0.24704,
"grad_norm": 0.21257640421390533,
"learning_rate": 0.00015060800000000002,
"loss": 0.8726,
"step": 15440
},
{
"epoch": 0.24736,
"grad_norm": 0.19599197804927826,
"learning_rate": 0.000150544,
"loss": 0.8769,
"step": 15460
},
{
"epoch": 0.24768,
"grad_norm": 0.2362959086894989,
"learning_rate": 0.00015048,
"loss": 0.9387,
"step": 15480
},
{
"epoch": 0.248,
"grad_norm": 0.22756963968276978,
"learning_rate": 0.000150416,
"loss": 0.8997,
"step": 15500
},
{
"epoch": 0.24832,
"grad_norm": 0.20421338081359863,
"learning_rate": 0.000150352,
"loss": 0.8393,
"step": 15520
},
{
"epoch": 0.24864,
"grad_norm": 0.22076188027858734,
"learning_rate": 0.000150288,
"loss": 0.9485,
"step": 15540
},
{
"epoch": 0.24896,
"grad_norm": 0.2736372649669647,
"learning_rate": 0.00015022400000000002,
"loss": 0.9198,
"step": 15560
},
{
"epoch": 0.24928,
"grad_norm": 0.21683241426944733,
"learning_rate": 0.00015016,
"loss": 0.896,
"step": 15580
},
{
"epoch": 0.2496,
"grad_norm": 0.2821408212184906,
"learning_rate": 0.000150096,
"loss": 0.9136,
"step": 15600
},
{
"epoch": 0.24992,
"grad_norm": 0.2574373781681061,
"learning_rate": 0.00015003200000000003,
"loss": 0.867,
"step": 15620
},
{
"epoch": 0.25024,
"grad_norm": 0.25345656275749207,
"learning_rate": 0.000149968,
"loss": 0.8558,
"step": 15640
},
{
"epoch": 0.25056,
"grad_norm": 0.22354301810264587,
"learning_rate": 0.000149904,
"loss": 0.8877,
"step": 15660
},
{
"epoch": 0.25088,
"grad_norm": 0.24617154896259308,
"learning_rate": 0.00014984000000000002,
"loss": 0.9129,
"step": 15680
},
{
"epoch": 0.2512,
"grad_norm": 0.2457919418811798,
"learning_rate": 0.000149776,
"loss": 0.8808,
"step": 15700
},
{
"epoch": 0.25152,
"grad_norm": 0.24559831619262695,
"learning_rate": 0.000149712,
"loss": 0.8843,
"step": 15720
},
{
"epoch": 0.25184,
"grad_norm": 0.23059086501598358,
"learning_rate": 0.0001496512,
"loss": 0.8913,
"step": 15740
},
{
"epoch": 0.25216,
"grad_norm": 0.23776483535766602,
"learning_rate": 0.0001495872,
"loss": 0.8903,
"step": 15760
},
{
"epoch": 0.25248,
"grad_norm": 0.22387710213661194,
"learning_rate": 0.0001495232,
"loss": 0.8357,
"step": 15780
},
{
"epoch": 0.2528,
"grad_norm": 0.25012654066085815,
"learning_rate": 0.0001494592,
"loss": 0.9193,
"step": 15800
},
{
"epoch": 0.25312,
"grad_norm": 0.24608831107616425,
"learning_rate": 0.0001493952,
"loss": 0.9439,
"step": 15820
},
{
"epoch": 0.25344,
"grad_norm": 0.2951606512069702,
"learning_rate": 0.0001493312,
"loss": 0.9266,
"step": 15840
},
{
"epoch": 0.25376,
"grad_norm": 0.2593064606189728,
"learning_rate": 0.0001492672,
"loss": 0.8889,
"step": 15860
},
{
"epoch": 0.25408,
"grad_norm": 0.26488035917282104,
"learning_rate": 0.0001492032,
"loss": 0.8961,
"step": 15880
},
{
"epoch": 0.2544,
"grad_norm": 0.23882393538951874,
"learning_rate": 0.00014913920000000002,
"loss": 0.8546,
"step": 15900
},
{
"epoch": 0.25472,
"grad_norm": 0.22206124663352966,
"learning_rate": 0.00014907520000000002,
"loss": 0.8872,
"step": 15920
},
{
"epoch": 0.25504,
"grad_norm": 0.2286020815372467,
"learning_rate": 0.00014901120000000001,
"loss": 0.8716,
"step": 15940
},
{
"epoch": 0.25536,
"grad_norm": 0.2474469095468521,
"learning_rate": 0.0001489472,
"loss": 0.876,
"step": 15960
},
{
"epoch": 0.25568,
"grad_norm": 0.24693526327610016,
"learning_rate": 0.0001488832,
"loss": 0.8774,
"step": 15980
},
{
"epoch": 0.256,
"grad_norm": 0.25829070806503296,
"learning_rate": 0.0001488192,
"loss": 0.9659,
"step": 16000
},
{
"epoch": 0.25632,
"grad_norm": 0.19258326292037964,
"learning_rate": 0.0001487552,
"loss": 0.8895,
"step": 16020
},
{
"epoch": 0.25664,
"grad_norm": 0.22530287504196167,
"learning_rate": 0.00014869120000000002,
"loss": 0.8625,
"step": 16040
},
{
"epoch": 0.25696,
"grad_norm": 0.23685386776924133,
"learning_rate": 0.0001486272,
"loss": 0.8744,
"step": 16060
},
{
"epoch": 0.25728,
"grad_norm": 0.2815619111061096,
"learning_rate": 0.0001485632,
"loss": 0.9046,
"step": 16080
},
{
"epoch": 0.2576,
"grad_norm": 0.2278144359588623,
"learning_rate": 0.0001484992,
"loss": 0.9233,
"step": 16100
},
{
"epoch": 0.25792,
"grad_norm": 0.23006929457187653,
"learning_rate": 0.0001484352,
"loss": 0.8888,
"step": 16120
},
{
"epoch": 0.25824,
"grad_norm": 0.23313170671463013,
"learning_rate": 0.0001483712,
"loss": 0.8838,
"step": 16140
},
{
"epoch": 0.25856,
"grad_norm": 0.1918276846408844,
"learning_rate": 0.0001483072,
"loss": 0.9102,
"step": 16160
},
{
"epoch": 0.25888,
"grad_norm": 0.22128306329250336,
"learning_rate": 0.0001482432,
"loss": 0.9111,
"step": 16180
},
{
"epoch": 0.2592,
"grad_norm": 0.2663705348968506,
"learning_rate": 0.0001481792,
"loss": 0.8658,
"step": 16200
},
{
"epoch": 0.25952,
"grad_norm": 0.2246493250131607,
"learning_rate": 0.0001481152,
"loss": 0.9076,
"step": 16220
},
{
"epoch": 0.25984,
"grad_norm": 0.276429146528244,
"learning_rate": 0.00014805120000000002,
"loss": 0.893,
"step": 16240
},
{
"epoch": 0.26016,
"grad_norm": 0.25722208619117737,
"learning_rate": 0.00014798720000000002,
"loss": 0.9124,
"step": 16260
},
{
"epoch": 0.26048,
"grad_norm": 0.2293781042098999,
"learning_rate": 0.00014792320000000001,
"loss": 0.9055,
"step": 16280
},
{
"epoch": 0.2608,
"grad_norm": 0.2425844967365265,
"learning_rate": 0.0001478592,
"loss": 0.9552,
"step": 16300
},
{
"epoch": 0.26112,
"grad_norm": 0.21894103288650513,
"learning_rate": 0.0001477952,
"loss": 0.9094,
"step": 16320
},
{
"epoch": 0.26144,
"grad_norm": 0.18762537837028503,
"learning_rate": 0.0001477312,
"loss": 0.864,
"step": 16340
},
{
"epoch": 0.26176,
"grad_norm": 0.24802207946777344,
"learning_rate": 0.0001476672,
"loss": 0.94,
"step": 16360
},
{
"epoch": 0.26208,
"grad_norm": 0.2168876677751541,
"learning_rate": 0.00014760320000000002,
"loss": 0.8795,
"step": 16380
},
{
"epoch": 0.2624,
"grad_norm": 0.21225641667842865,
"learning_rate": 0.0001475392,
"loss": 0.8783,
"step": 16400
},
{
"epoch": 0.26272,
"grad_norm": 0.22440536320209503,
"learning_rate": 0.0001474752,
"loss": 0.8984,
"step": 16420
},
{
"epoch": 0.26304,
"grad_norm": 0.21807946264743805,
"learning_rate": 0.0001474112,
"loss": 0.8949,
"step": 16440
},
{
"epoch": 0.26336,
"grad_norm": 0.27320024371147156,
"learning_rate": 0.0001473472,
"loss": 0.9225,
"step": 16460
},
{
"epoch": 0.26368,
"grad_norm": 0.2062409669160843,
"learning_rate": 0.0001472832,
"loss": 0.8705,
"step": 16480
},
{
"epoch": 0.264,
"grad_norm": 0.2158362716436386,
"learning_rate": 0.00014721920000000002,
"loss": 0.8971,
"step": 16500
},
{
"epoch": 0.26432,
"grad_norm": 0.38786885142326355,
"learning_rate": 0.0001471552,
"loss": 0.9119,
"step": 16520
},
{
"epoch": 0.26464,
"grad_norm": 0.17009785771369934,
"learning_rate": 0.0001470912,
"loss": 0.8909,
"step": 16540
},
{
"epoch": 0.26496,
"grad_norm": 0.1993030607700348,
"learning_rate": 0.0001470272,
"loss": 0.8962,
"step": 16560
},
{
"epoch": 0.26528,
"grad_norm": 0.20363526046276093,
"learning_rate": 0.00014696320000000002,
"loss": 0.8829,
"step": 16580
},
{
"epoch": 0.2656,
"grad_norm": 0.21756501495838165,
"learning_rate": 0.00014689920000000002,
"loss": 0.8796,
"step": 16600
},
{
"epoch": 0.26592,
"grad_norm": 0.23114700615406036,
"learning_rate": 0.0001468352,
"loss": 0.8857,
"step": 16620
},
{
"epoch": 0.26624,
"grad_norm": 0.2643290162086487,
"learning_rate": 0.0001467712,
"loss": 0.8865,
"step": 16640
},
{
"epoch": 0.26656,
"grad_norm": 0.2651253640651703,
"learning_rate": 0.0001467072,
"loss": 0.9395,
"step": 16660
},
{
"epoch": 0.26688,
"grad_norm": 0.2604687511920929,
"learning_rate": 0.0001466432,
"loss": 0.9293,
"step": 16680
},
{
"epoch": 0.2672,
"grad_norm": 0.26383012533187866,
"learning_rate": 0.0001465792,
"loss": 0.9427,
"step": 16700
},
{
"epoch": 0.26752,
"grad_norm": 0.21108990907669067,
"learning_rate": 0.00014651520000000002,
"loss": 0.878,
"step": 16720
},
{
"epoch": 0.26784,
"grad_norm": 0.19841574132442474,
"learning_rate": 0.0001464512,
"loss": 0.9178,
"step": 16740
},
{
"epoch": 0.26816,
"grad_norm": 0.18484389781951904,
"learning_rate": 0.0001463872,
"loss": 0.8609,
"step": 16760
},
{
"epoch": 0.26848,
"grad_norm": 0.22383546829223633,
"learning_rate": 0.00014632320000000003,
"loss": 0.8582,
"step": 16780
},
{
"epoch": 0.2688,
"grad_norm": 0.23623542487621307,
"learning_rate": 0.0001462592,
"loss": 0.9065,
"step": 16800
},
{
"epoch": 0.26912,
"grad_norm": 0.20453502237796783,
"learning_rate": 0.0001461952,
"loss": 0.873,
"step": 16820
},
{
"epoch": 0.26944,
"grad_norm": 0.2092786431312561,
"learning_rate": 0.00014613120000000001,
"loss": 0.8592,
"step": 16840
},
{
"epoch": 0.26976,
"grad_norm": 0.35000941157341003,
"learning_rate": 0.0001460672,
"loss": 0.9135,
"step": 16860
},
{
"epoch": 0.27008,
"grad_norm": 0.21759675443172455,
"learning_rate": 0.0001460032,
"loss": 0.9318,
"step": 16880
},
{
"epoch": 0.2704,
"grad_norm": 0.21129532158374786,
"learning_rate": 0.0001459392,
"loss": 0.8726,
"step": 16900
},
{
"epoch": 0.27072,
"grad_norm": 0.2558363974094391,
"learning_rate": 0.00014587520000000002,
"loss": 0.8606,
"step": 16920
},
{
"epoch": 0.27104,
"grad_norm": 0.24021393060684204,
"learning_rate": 0.00014581120000000002,
"loss": 0.9545,
"step": 16940
},
{
"epoch": 0.27136,
"grad_norm": 0.22521884739398956,
"learning_rate": 0.0001457472,
"loss": 0.8983,
"step": 16960
},
{
"epoch": 0.27168,
"grad_norm": 0.19116589426994324,
"learning_rate": 0.0001456832,
"loss": 0.8628,
"step": 16980
},
{
"epoch": 0.272,
"grad_norm": 0.25496795773506165,
"learning_rate": 0.0001456192,
"loss": 0.8648,
"step": 17000
},
{
"epoch": 0.27232,
"grad_norm": 0.21997754275798798,
"learning_rate": 0.0001455552,
"loss": 0.8864,
"step": 17020
},
{
"epoch": 0.27264,
"grad_norm": 0.2255709022283554,
"learning_rate": 0.0001454912,
"loss": 0.8818,
"step": 17040
},
{
"epoch": 0.27296,
"grad_norm": 0.2319864183664322,
"learning_rate": 0.00014542720000000002,
"loss": 0.9234,
"step": 17060
},
{
"epoch": 0.27328,
"grad_norm": 0.25236740708351135,
"learning_rate": 0.0001453632,
"loss": 0.9205,
"step": 17080
},
{
"epoch": 0.2736,
"grad_norm": 0.25668826699256897,
"learning_rate": 0.0001452992,
"loss": 0.8697,
"step": 17100
},
{
"epoch": 0.27392,
"grad_norm": 0.23997314274311066,
"learning_rate": 0.00014523520000000003,
"loss": 0.8752,
"step": 17120
},
{
"epoch": 0.27424,
"grad_norm": 0.26037994027137756,
"learning_rate": 0.0001451712,
"loss": 0.8955,
"step": 17140
},
{
"epoch": 0.27456,
"grad_norm": 0.2793257534503937,
"learning_rate": 0.0001451072,
"loss": 0.9114,
"step": 17160
},
{
"epoch": 0.27488,
"grad_norm": 0.2247275561094284,
"learning_rate": 0.00014504320000000001,
"loss": 0.8595,
"step": 17180
},
{
"epoch": 0.2752,
"grad_norm": 0.17990809679031372,
"learning_rate": 0.0001449792,
"loss": 0.8705,
"step": 17200
},
{
"epoch": 0.27552,
"grad_norm": 0.25493231415748596,
"learning_rate": 0.0001449152,
"loss": 0.8974,
"step": 17220
},
{
"epoch": 0.27584,
"grad_norm": 0.2407209277153015,
"learning_rate": 0.0001448512,
"loss": 0.8411,
"step": 17240
},
{
"epoch": 0.27616,
"grad_norm": 0.23049895465373993,
"learning_rate": 0.00014478720000000002,
"loss": 0.8879,
"step": 17260
},
{
"epoch": 0.27648,
"grad_norm": 0.2609173059463501,
"learning_rate": 0.00014472320000000002,
"loss": 0.841,
"step": 17280
},
{
"epoch": 0.2768,
"grad_norm": 0.25201836228370667,
"learning_rate": 0.0001446592,
"loss": 0.9323,
"step": 17300
},
{
"epoch": 0.27712,
"grad_norm": 0.2098960429430008,
"learning_rate": 0.0001445952,
"loss": 0.9157,
"step": 17320
},
{
"epoch": 0.27744,
"grad_norm": 0.20928995311260223,
"learning_rate": 0.0001445312,
"loss": 0.9038,
"step": 17340
},
{
"epoch": 0.27776,
"grad_norm": 0.27435973286628723,
"learning_rate": 0.0001444672,
"loss": 0.9151,
"step": 17360
},
{
"epoch": 0.27808,
"grad_norm": 0.21046458184719086,
"learning_rate": 0.0001444032,
"loss": 0.916,
"step": 17380
},
{
"epoch": 0.2784,
"grad_norm": 0.21415123343467712,
"learning_rate": 0.00014433920000000002,
"loss": 0.8068,
"step": 17400
},
{
"epoch": 0.27872,
"grad_norm": 0.23788805305957794,
"learning_rate": 0.0001442752,
"loss": 0.897,
"step": 17420
},
{
"epoch": 0.27904,
"grad_norm": 0.27183711528778076,
"learning_rate": 0.0001442112,
"loss": 0.9256,
"step": 17440
},
{
"epoch": 0.27936,
"grad_norm": 0.23290027678012848,
"learning_rate": 0.00014414720000000003,
"loss": 0.8824,
"step": 17460
},
{
"epoch": 0.27968,
"grad_norm": 0.228986918926239,
"learning_rate": 0.0001440832,
"loss": 0.8986,
"step": 17480
},
{
"epoch": 0.28,
"grad_norm": 0.2455400675535202,
"learning_rate": 0.0001440192,
"loss": 0.9284,
"step": 17500
},
{
"epoch": 0.28032,
"grad_norm": 0.23576192557811737,
"learning_rate": 0.00014395520000000001,
"loss": 0.9038,
"step": 17520
},
{
"epoch": 0.28064,
"grad_norm": 0.24957694113254547,
"learning_rate": 0.0001438912,
"loss": 0.8735,
"step": 17540
},
{
"epoch": 0.28096,
"grad_norm": 0.2399289608001709,
"learning_rate": 0.0001438272,
"loss": 0.8819,
"step": 17560
},
{
"epoch": 0.28128,
"grad_norm": 0.2208934873342514,
"learning_rate": 0.0001437664,
"loss": 0.8615,
"step": 17580
},
{
"epoch": 0.2816,
"grad_norm": 0.2641635835170746,
"learning_rate": 0.0001437024,
"loss": 0.8801,
"step": 17600
},
{
"epoch": 0.28192,
"grad_norm": 0.21068689227104187,
"learning_rate": 0.00014363840000000002,
"loss": 0.8928,
"step": 17620
},
{
"epoch": 0.28224,
"grad_norm": 0.22611364722251892,
"learning_rate": 0.0001435744,
"loss": 0.8997,
"step": 17640
},
{
"epoch": 0.28256,
"grad_norm": 0.21559686958789825,
"learning_rate": 0.0001435104,
"loss": 0.8757,
"step": 17660
},
{
"epoch": 0.28288,
"grad_norm": 0.2164774090051651,
"learning_rate": 0.0001434464,
"loss": 0.8621,
"step": 17680
},
{
"epoch": 0.2832,
"grad_norm": 0.2710246443748474,
"learning_rate": 0.0001433824,
"loss": 0.8788,
"step": 17700
},
{
"epoch": 0.28352,
"grad_norm": 0.22402608394622803,
"learning_rate": 0.0001433184,
"loss": 0.8655,
"step": 17720
},
{
"epoch": 0.28384,
"grad_norm": 0.240563303232193,
"learning_rate": 0.00014325440000000002,
"loss": 0.831,
"step": 17740
},
{
"epoch": 0.28416,
"grad_norm": 0.2877085208892822,
"learning_rate": 0.00014319040000000002,
"loss": 0.8939,
"step": 17760
},
{
"epoch": 0.28448,
"grad_norm": 0.21592877805233002,
"learning_rate": 0.0001431264,
"loss": 0.8441,
"step": 17780
},
{
"epoch": 0.2848,
"grad_norm": 1.138694167137146,
"learning_rate": 0.0001430624,
"loss": 0.921,
"step": 17800
},
{
"epoch": 0.28512,
"grad_norm": 0.22125540673732758,
"learning_rate": 0.0001429984,
"loss": 0.8695,
"step": 17820
},
{
"epoch": 0.28544,
"grad_norm": 0.2059083729982376,
"learning_rate": 0.0001429344,
"loss": 0.8554,
"step": 17840
},
{
"epoch": 0.28576,
"grad_norm": 0.2463064044713974,
"learning_rate": 0.0001428704,
"loss": 0.9121,
"step": 17860
},
{
"epoch": 0.28608,
"grad_norm": 0.228254035115242,
"learning_rate": 0.00014280640000000002,
"loss": 0.8945,
"step": 17880
},
{
"epoch": 0.2864,
"grad_norm": 0.23662838339805603,
"learning_rate": 0.0001427424,
"loss": 0.9431,
"step": 17900
},
{
"epoch": 0.28672,
"grad_norm": 0.22641515731811523,
"learning_rate": 0.0001426784,
"loss": 0.9033,
"step": 17920
},
{
"epoch": 0.28704,
"grad_norm": 0.23892265558242798,
"learning_rate": 0.0001426144,
"loss": 0.8623,
"step": 17940
},
{
"epoch": 0.28736,
"grad_norm": 0.2975479066371918,
"learning_rate": 0.00014255040000000002,
"loss": 0.8474,
"step": 17960
},
{
"epoch": 0.28768,
"grad_norm": 0.23718002438545227,
"learning_rate": 0.0001424864,
"loss": 0.9099,
"step": 17980
},
{
"epoch": 0.288,
"grad_norm": 0.2706455886363983,
"learning_rate": 0.0001424224,
"loss": 0.9357,
"step": 18000
},
{
"epoch": 0.28832,
"grad_norm": 0.260484516620636,
"learning_rate": 0.0001423584,
"loss": 0.9144,
"step": 18020
},
{
"epoch": 0.28864,
"grad_norm": 0.2651614844799042,
"learning_rate": 0.0001422944,
"loss": 0.8764,
"step": 18040
},
{
"epoch": 0.28896,
"grad_norm": 0.18904343247413635,
"learning_rate": 0.0001422304,
"loss": 0.9312,
"step": 18060
},
{
"epoch": 0.28928,
"grad_norm": 0.22476926445960999,
"learning_rate": 0.00014216640000000002,
"loss": 0.8732,
"step": 18080
},
{
"epoch": 0.2896,
"grad_norm": 0.25984928011894226,
"learning_rate": 0.00014210240000000002,
"loss": 0.8511,
"step": 18100
},
{
"epoch": 0.28992,
"grad_norm": 0.24901899695396423,
"learning_rate": 0.0001420384,
"loss": 0.8844,
"step": 18120
},
{
"epoch": 0.29024,
"grad_norm": 0.2536833882331848,
"learning_rate": 0.0001419744,
"loss": 0.9403,
"step": 18140
},
{
"epoch": 0.29056,
"grad_norm": 0.24617129564285278,
"learning_rate": 0.0001419104,
"loss": 0.9463,
"step": 18160
},
{
"epoch": 0.29088,
"grad_norm": 0.23471851646900177,
"learning_rate": 0.0001418464,
"loss": 0.8977,
"step": 18180
},
{
"epoch": 0.2912,
"grad_norm": 0.22955600917339325,
"learning_rate": 0.0001417824,
"loss": 0.87,
"step": 18200
},
{
"epoch": 0.29152,
"grad_norm": 0.24333995580673218,
"learning_rate": 0.00014171840000000002,
"loss": 0.8879,
"step": 18220
},
{
"epoch": 0.29184,
"grad_norm": 0.20572280883789062,
"learning_rate": 0.0001416544,
"loss": 0.8896,
"step": 18240
},
{
"epoch": 0.29216,
"grad_norm": 0.26233595609664917,
"learning_rate": 0.0001415904,
"loss": 0.9508,
"step": 18260
},
{
"epoch": 0.29248,
"grad_norm": 0.25485649704933167,
"learning_rate": 0.00014152640000000003,
"loss": 0.8671,
"step": 18280
},
{
"epoch": 0.2928,
"grad_norm": 0.2335824966430664,
"learning_rate": 0.00014146240000000002,
"loss": 0.8607,
"step": 18300
},
{
"epoch": 0.29312,
"grad_norm": 0.23609523475170135,
"learning_rate": 0.0001413984,
"loss": 0.8851,
"step": 18320
},
{
"epoch": 0.29344,
"grad_norm": 0.23384864628314972,
"learning_rate": 0.0001413344,
"loss": 0.8432,
"step": 18340
},
{
"epoch": 0.29376,
"grad_norm": 0.20632806420326233,
"learning_rate": 0.0001412704,
"loss": 0.8929,
"step": 18360
},
{
"epoch": 0.29408,
"grad_norm": 0.23396439850330353,
"learning_rate": 0.0001412064,
"loss": 0.9085,
"step": 18380
},
{
"epoch": 0.2944,
"grad_norm": 0.22463780641555786,
"learning_rate": 0.0001411424,
"loss": 0.882,
"step": 18400
},
{
"epoch": 0.29472,
"grad_norm": 0.2543090581893921,
"learning_rate": 0.00014107840000000002,
"loss": 0.8529,
"step": 18420
},
{
"epoch": 0.29504,
"grad_norm": 0.23137034475803375,
"learning_rate": 0.00014101440000000002,
"loss": 0.8763,
"step": 18440
},
{
"epoch": 0.29536,
"grad_norm": 0.2112882137298584,
"learning_rate": 0.0001409504,
"loss": 0.8764,
"step": 18460
},
{
"epoch": 0.29568,
"grad_norm": 0.26310333609580994,
"learning_rate": 0.0001408864,
"loss": 0.8922,
"step": 18480
},
{
"epoch": 0.296,
"grad_norm": 0.17188023030757904,
"learning_rate": 0.0001408224,
"loss": 0.8667,
"step": 18500
},
{
"epoch": 0.29632,
"grad_norm": 0.21832036972045898,
"learning_rate": 0.0001407584,
"loss": 0.8365,
"step": 18520
},
{
"epoch": 0.29664,
"grad_norm": 0.22188645601272583,
"learning_rate": 0.0001406944,
"loss": 0.8851,
"step": 18540
},
{
"epoch": 0.29696,
"grad_norm": 0.21572217345237732,
"learning_rate": 0.00014063040000000001,
"loss": 0.9154,
"step": 18560
},
{
"epoch": 0.29728,
"grad_norm": 0.27105310559272766,
"learning_rate": 0.0001405664,
"loss": 0.9377,
"step": 18580
},
{
"epoch": 0.2976,
"grad_norm": 0.24213473498821259,
"learning_rate": 0.0001405024,
"loss": 0.8637,
"step": 18600
},
{
"epoch": 0.29792,
"grad_norm": 0.2412949949502945,
"learning_rate": 0.00014043840000000003,
"loss": 0.8406,
"step": 18620
},
{
"epoch": 0.29824,
"grad_norm": 0.22756318747997284,
"learning_rate": 0.00014037440000000002,
"loss": 0.9136,
"step": 18640
},
{
"epoch": 0.29856,
"grad_norm": 0.27023327350616455,
"learning_rate": 0.0001403104,
"loss": 0.903,
"step": 18660
},
{
"epoch": 0.29888,
"grad_norm": 0.2557690739631653,
"learning_rate": 0.0001402464,
"loss": 0.91,
"step": 18680
},
{
"epoch": 0.2992,
"grad_norm": 0.2063320279121399,
"learning_rate": 0.0001401824,
"loss": 0.8563,
"step": 18700
},
{
"epoch": 0.29952,
"grad_norm": 0.2544916570186615,
"learning_rate": 0.0001401184,
"loss": 0.874,
"step": 18720
},
{
"epoch": 0.29984,
"grad_norm": 0.2347075492143631,
"learning_rate": 0.0001400544,
"loss": 0.9113,
"step": 18740
},
{
"epoch": 0.30016,
"grad_norm": 0.22835484147071838,
"learning_rate": 0.00013999040000000002,
"loss": 0.9441,
"step": 18760
},
{
"epoch": 0.30048,
"grad_norm": 0.2611948847770691,
"learning_rate": 0.00013992640000000002,
"loss": 0.8588,
"step": 18780
},
{
"epoch": 0.3008,
"grad_norm": 0.2237863838672638,
"learning_rate": 0.0001398624,
"loss": 0.8968,
"step": 18800
},
{
"epoch": 0.30112,
"grad_norm": 0.25444677472114563,
"learning_rate": 0.0001397984,
"loss": 0.8699,
"step": 18820
},
{
"epoch": 0.30144,
"grad_norm": 0.22238390147686005,
"learning_rate": 0.0001397344,
"loss": 0.8958,
"step": 18840
},
{
"epoch": 0.30176,
"grad_norm": 0.2588092088699341,
"learning_rate": 0.0001396704,
"loss": 0.8379,
"step": 18860
},
{
"epoch": 0.30208,
"grad_norm": 0.32045722007751465,
"learning_rate": 0.0001396064,
"loss": 0.8985,
"step": 18880
},
{
"epoch": 0.3024,
"grad_norm": 0.277260959148407,
"learning_rate": 0.00013954240000000001,
"loss": 0.9429,
"step": 18900
},
{
"epoch": 0.30272,
"grad_norm": 0.21729040145874023,
"learning_rate": 0.0001394784,
"loss": 0.8797,
"step": 18920
},
{
"epoch": 0.30304,
"grad_norm": 0.21643051505088806,
"learning_rate": 0.0001394144,
"loss": 0.8601,
"step": 18940
},
{
"epoch": 0.30336,
"grad_norm": 0.2492791712284088,
"learning_rate": 0.00013935040000000003,
"loss": 0.8796,
"step": 18960
},
{
"epoch": 0.30368,
"grad_norm": 0.22013212740421295,
"learning_rate": 0.00013928640000000002,
"loss": 0.8746,
"step": 18980
},
{
"epoch": 0.304,
"grad_norm": 0.25737157464027405,
"learning_rate": 0.0001392224,
"loss": 0.9122,
"step": 19000
},
{
"epoch": 0.30432,
"grad_norm": 0.2505769729614258,
"learning_rate": 0.0001391584,
"loss": 0.9256,
"step": 19020
},
{
"epoch": 0.30464,
"grad_norm": 0.23468899726867676,
"learning_rate": 0.0001390944,
"loss": 0.9104,
"step": 19040
},
{
"epoch": 0.30496,
"grad_norm": 0.2267698347568512,
"learning_rate": 0.0001390304,
"loss": 0.9157,
"step": 19060
},
{
"epoch": 0.30528,
"grad_norm": 0.25038978457450867,
"learning_rate": 0.0001389664,
"loss": 0.9413,
"step": 19080
},
{
"epoch": 0.3056,
"grad_norm": 0.2678689658641815,
"learning_rate": 0.00013890240000000002,
"loss": 0.879,
"step": 19100
},
{
"epoch": 0.30592,
"grad_norm": 0.2314527928829193,
"learning_rate": 0.00013883840000000002,
"loss": 0.8693,
"step": 19120
},
{
"epoch": 0.30624,
"grad_norm": 0.2365267276763916,
"learning_rate": 0.0001387744,
"loss": 0.8498,
"step": 19140
},
{
"epoch": 0.30656,
"grad_norm": 0.24371282756328583,
"learning_rate": 0.0001387104,
"loss": 0.8604,
"step": 19160
},
{
"epoch": 0.30688,
"grad_norm": 0.3142736554145813,
"learning_rate": 0.0001386464,
"loss": 0.8808,
"step": 19180
},
{
"epoch": 0.3072,
"grad_norm": 0.29017287492752075,
"learning_rate": 0.0001385824,
"loss": 0.9096,
"step": 19200
},
{
"epoch": 0.30752,
"grad_norm": 0.26269444823265076,
"learning_rate": 0.0001385184,
"loss": 0.8378,
"step": 19220
},
{
"epoch": 0.30784,
"grad_norm": 0.22420069575309753,
"learning_rate": 0.00013845440000000001,
"loss": 0.8637,
"step": 19240
},
{
"epoch": 0.30816,
"grad_norm": 0.237029567360878,
"learning_rate": 0.0001383904,
"loss": 0.9015,
"step": 19260
},
{
"epoch": 0.30848,
"grad_norm": 0.2280898541212082,
"learning_rate": 0.0001383264,
"loss": 0.888,
"step": 19280
},
{
"epoch": 0.3088,
"grad_norm": 0.27057039737701416,
"learning_rate": 0.00013826240000000003,
"loss": 0.8871,
"step": 19300
},
{
"epoch": 0.30912,
"grad_norm": 0.20367485284805298,
"learning_rate": 0.00013819840000000002,
"loss": 0.8596,
"step": 19320
},
{
"epoch": 0.30944,
"grad_norm": 0.2941890060901642,
"learning_rate": 0.0001381344,
"loss": 0.9363,
"step": 19340
},
{
"epoch": 0.30976,
"grad_norm": 0.2736791968345642,
"learning_rate": 0.0001380704,
"loss": 0.8547,
"step": 19360
},
{
"epoch": 0.31008,
"grad_norm": 0.26755064725875854,
"learning_rate": 0.0001380064,
"loss": 0.8846,
"step": 19380
},
{
"epoch": 0.3104,
"grad_norm": 0.2217639535665512,
"learning_rate": 0.0001379424,
"loss": 0.9354,
"step": 19400
},
{
"epoch": 0.31072,
"grad_norm": 0.26769986748695374,
"learning_rate": 0.0001378784,
"loss": 0.8951,
"step": 19420
},
{
"epoch": 0.31104,
"grad_norm": 0.2696346938610077,
"learning_rate": 0.00013781440000000002,
"loss": 0.886,
"step": 19440
},
{
"epoch": 0.31136,
"grad_norm": 0.24080106616020203,
"learning_rate": 0.00013775040000000002,
"loss": 0.8534,
"step": 19460
},
{
"epoch": 0.31168,
"grad_norm": 0.2640572786331177,
"learning_rate": 0.0001376864,
"loss": 0.9043,
"step": 19480
},
{
"epoch": 0.312,
"grad_norm": 0.22785134613513947,
"learning_rate": 0.0001376224,
"loss": 0.9025,
"step": 19500
},
{
"epoch": 0.31232,
"grad_norm": 0.25118863582611084,
"learning_rate": 0.0001375584,
"loss": 0.9085,
"step": 19520
},
{
"epoch": 0.31264,
"grad_norm": 0.25034481287002563,
"learning_rate": 0.0001374944,
"loss": 0.9055,
"step": 19540
},
{
"epoch": 0.31296,
"grad_norm": 0.2614835798740387,
"learning_rate": 0.0001374304,
"loss": 0.8826,
"step": 19560
},
{
"epoch": 0.31328,
"grad_norm": 0.24192634224891663,
"learning_rate": 0.0001373664,
"loss": 0.8654,
"step": 19580
},
{
"epoch": 0.3136,
"grad_norm": 0.21241247653961182,
"learning_rate": 0.0001373024,
"loss": 0.8953,
"step": 19600
},
{
"epoch": 0.31392,
"grad_norm": 0.2508241832256317,
"learning_rate": 0.0001372384,
"loss": 0.8859,
"step": 19620
},
{
"epoch": 0.31424,
"grad_norm": 0.21773919463157654,
"learning_rate": 0.00013717440000000003,
"loss": 0.9142,
"step": 19640
},
{
"epoch": 0.31456,
"grad_norm": 0.24207216501235962,
"learning_rate": 0.00013711040000000002,
"loss": 0.9151,
"step": 19660
},
{
"epoch": 0.31488,
"grad_norm": 0.2574104368686676,
"learning_rate": 0.0001370464,
"loss": 0.9325,
"step": 19680
},
{
"epoch": 0.3152,
"grad_norm": 0.27459943294525146,
"learning_rate": 0.0001369824,
"loss": 0.9121,
"step": 19700
},
{
"epoch": 0.31552,
"grad_norm": 0.29292821884155273,
"learning_rate": 0.0001369184,
"loss": 0.8728,
"step": 19720
},
{
"epoch": 0.31584,
"grad_norm": 0.24154408276081085,
"learning_rate": 0.0001368544,
"loss": 0.9036,
"step": 19740
},
{
"epoch": 0.31616,
"grad_norm": 0.24163678288459778,
"learning_rate": 0.0001367904,
"loss": 0.894,
"step": 19760
},
{
"epoch": 0.31648,
"grad_norm": 0.24694600701332092,
"learning_rate": 0.00013672640000000002,
"loss": 0.8377,
"step": 19780
},
{
"epoch": 0.3168,
"grad_norm": 0.24963949620723724,
"learning_rate": 0.00013666240000000001,
"loss": 0.8749,
"step": 19800
},
{
"epoch": 0.31712,
"grad_norm": 0.22724537551403046,
"learning_rate": 0.0001365984,
"loss": 0.8355,
"step": 19820
},
{
"epoch": 0.31744,
"grad_norm": 0.25323203206062317,
"learning_rate": 0.0001365344,
"loss": 0.8674,
"step": 19840
},
{
"epoch": 0.31776,
"grad_norm": 0.23880919814109802,
"learning_rate": 0.0001364704,
"loss": 0.8944,
"step": 19860
},
{
"epoch": 0.31808,
"grad_norm": 0.27053314447402954,
"learning_rate": 0.0001364064,
"loss": 0.9309,
"step": 19880
},
{
"epoch": 0.3184,
"grad_norm": 0.23681063950061798,
"learning_rate": 0.0001363424,
"loss": 0.9299,
"step": 19900
},
{
"epoch": 0.31872,
"grad_norm": 0.23930998146533966,
"learning_rate": 0.0001362784,
"loss": 0.87,
"step": 19920
},
{
"epoch": 0.31904,
"grad_norm": 0.2663067877292633,
"learning_rate": 0.0001362144,
"loss": 0.8916,
"step": 19940
},
{
"epoch": 0.31936,
"grad_norm": 0.24097581207752228,
"learning_rate": 0.0001361504,
"loss": 0.8894,
"step": 19960
},
{
"epoch": 0.31968,
"grad_norm": 0.2457173615694046,
"learning_rate": 0.00013608640000000003,
"loss": 0.882,
"step": 19980
},
{
"epoch": 0.32,
"grad_norm": 0.24961303174495697,
"learning_rate": 0.00013602240000000002,
"loss": 0.9111,
"step": 20000
},
{
"epoch": 0.32032,
"grad_norm": 0.2094404697418213,
"learning_rate": 0.0001359584,
"loss": 0.8509,
"step": 20020
},
{
"epoch": 0.32064,
"grad_norm": 0.259491890668869,
"learning_rate": 0.0001358944,
"loss": 0.9049,
"step": 20040
},
{
"epoch": 0.32096,
"grad_norm": 0.23153385519981384,
"learning_rate": 0.0001358304,
"loss": 0.9332,
"step": 20060
},
{
"epoch": 0.32128,
"grad_norm": 0.22706463932991028,
"learning_rate": 0.0001357664,
"loss": 0.9113,
"step": 20080
},
{
"epoch": 0.3216,
"grad_norm": 0.24029311537742615,
"learning_rate": 0.0001357024,
"loss": 0.8789,
"step": 20100
},
{
"epoch": 0.32192,
"grad_norm": 0.2065390646457672,
"learning_rate": 0.00013563840000000002,
"loss": 0.8984,
"step": 20120
},
{
"epoch": 0.32224,
"grad_norm": 0.2206522673368454,
"learning_rate": 0.00013557440000000001,
"loss": 0.8654,
"step": 20140
},
{
"epoch": 0.32256,
"grad_norm": 0.25168558955192566,
"learning_rate": 0.0001355104,
"loss": 0.8857,
"step": 20160
},
{
"epoch": 0.32288,
"grad_norm": 0.33041876554489136,
"learning_rate": 0.0001354464,
"loss": 0.8478,
"step": 20180
},
{
"epoch": 0.3232,
"grad_norm": 0.24307800829410553,
"learning_rate": 0.0001353824,
"loss": 0.8422,
"step": 20200
},
{
"epoch": 0.32352,
"grad_norm": 0.21959048509597778,
"learning_rate": 0.0001353184,
"loss": 0.9439,
"step": 20220
},
{
"epoch": 0.32384,
"grad_norm": 0.2573561668395996,
"learning_rate": 0.0001352544,
"loss": 0.9243,
"step": 20240
},
{
"epoch": 0.32416,
"grad_norm": 0.21520912647247314,
"learning_rate": 0.0001351904,
"loss": 0.8948,
"step": 20260
},
{
"epoch": 0.32448,
"grad_norm": 0.1714809685945511,
"learning_rate": 0.0001351264,
"loss": 0.8826,
"step": 20280
},
{
"epoch": 0.3248,
"grad_norm": 0.24606648087501526,
"learning_rate": 0.0001350624,
"loss": 0.8686,
"step": 20300
},
{
"epoch": 0.32512,
"grad_norm": 0.2979150116443634,
"learning_rate": 0.00013499840000000003,
"loss": 0.9369,
"step": 20320
},
{
"epoch": 0.32544,
"grad_norm": 0.2468058317899704,
"learning_rate": 0.00013493440000000002,
"loss": 0.9155,
"step": 20340
},
{
"epoch": 0.32576,
"grad_norm": 0.24256190657615662,
"learning_rate": 0.0001348704,
"loss": 0.8497,
"step": 20360
},
{
"epoch": 0.32608,
"grad_norm": 0.18389752507209778,
"learning_rate": 0.0001348064,
"loss": 0.904,
"step": 20380
},
{
"epoch": 0.3264,
"grad_norm": 0.3476732075214386,
"learning_rate": 0.0001347424,
"loss": 0.8942,
"step": 20400
},
{
"epoch": 0.32672,
"grad_norm": 0.24657464027404785,
"learning_rate": 0.0001346784,
"loss": 0.8558,
"step": 20420
},
{
"epoch": 0.32704,
"grad_norm": 0.23047630488872528,
"learning_rate": 0.0001346144,
"loss": 0.8671,
"step": 20440
},
{
"epoch": 0.32736,
"grad_norm": 0.26599422097206116,
"learning_rate": 0.00013455040000000002,
"loss": 0.8681,
"step": 20460
},
{
"epoch": 0.32768,
"grad_norm": 0.21226702630519867,
"learning_rate": 0.00013448640000000001,
"loss": 0.8737,
"step": 20480
},
{
"epoch": 0.328,
"grad_norm": 0.24917374551296234,
"learning_rate": 0.0001344224,
"loss": 0.8631,
"step": 20500
},
{
"epoch": 0.32832,
"grad_norm": 0.23581089079380035,
"learning_rate": 0.0001343584,
"loss": 0.8835,
"step": 20520
},
{
"epoch": 0.32864,
"grad_norm": 0.20437762141227722,
"learning_rate": 0.0001342944,
"loss": 0.8462,
"step": 20540
},
{
"epoch": 0.32896,
"grad_norm": 0.2745993733406067,
"learning_rate": 0.0001342304,
"loss": 0.8921,
"step": 20560
},
{
"epoch": 0.32928,
"grad_norm": 0.2283765822649002,
"learning_rate": 0.0001341664,
"loss": 0.9724,
"step": 20580
},
{
"epoch": 0.3296,
"grad_norm": 0.23891101777553558,
"learning_rate": 0.0001341024,
"loss": 0.8836,
"step": 20600
},
{
"epoch": 0.32992,
"grad_norm": 0.29253366589546204,
"learning_rate": 0.0001340384,
"loss": 0.8708,
"step": 20620
},
{
"epoch": 0.33024,
"grad_norm": 0.21287086606025696,
"learning_rate": 0.0001339744,
"loss": 0.8523,
"step": 20640
},
{
"epoch": 0.33056,
"grad_norm": 0.25029948353767395,
"learning_rate": 0.00013391040000000002,
"loss": 0.93,
"step": 20660
},
{
"epoch": 0.33088,
"grad_norm": 0.2445763796567917,
"learning_rate": 0.00013384640000000002,
"loss": 0.8435,
"step": 20680
},
{
"epoch": 0.3312,
"grad_norm": 0.23780953884124756,
"learning_rate": 0.0001337824,
"loss": 0.8524,
"step": 20700
},
{
"epoch": 0.33152,
"grad_norm": 0.24498897790908813,
"learning_rate": 0.0001337184,
"loss": 0.8806,
"step": 20720
},
{
"epoch": 0.33184,
"grad_norm": 0.24283845722675323,
"learning_rate": 0.0001336544,
"loss": 0.9041,
"step": 20740
},
{
"epoch": 0.33216,
"grad_norm": 0.25686827301979065,
"learning_rate": 0.0001335904,
"loss": 0.8836,
"step": 20760
},
{
"epoch": 0.33248,
"grad_norm": 0.19892160594463348,
"learning_rate": 0.0001335264,
"loss": 0.846,
"step": 20780
},
{
"epoch": 0.3328,
"grad_norm": 0.22868989408016205,
"learning_rate": 0.00013346240000000002,
"loss": 0.9193,
"step": 20800
},
{
"epoch": 0.33312,
"grad_norm": 0.22255489230155945,
"learning_rate": 0.0001333984,
"loss": 0.898,
"step": 20820
},
{
"epoch": 0.33344,
"grad_norm": 0.20357553660869598,
"learning_rate": 0.0001333344,
"loss": 0.8621,
"step": 20840
},
{
"epoch": 0.33376,
"grad_norm": 0.21813243627548218,
"learning_rate": 0.0001332704,
"loss": 0.8745,
"step": 20860
},
{
"epoch": 0.33408,
"grad_norm": 0.2878071069717407,
"learning_rate": 0.0001332064,
"loss": 0.8639,
"step": 20880
},
{
"epoch": 0.3344,
"grad_norm": 0.2555156946182251,
"learning_rate": 0.0001331424,
"loss": 0.8748,
"step": 20900
},
{
"epoch": 0.33472,
"grad_norm": 0.23196488618850708,
"learning_rate": 0.00013307840000000002,
"loss": 0.8921,
"step": 20920
},
{
"epoch": 0.33504,
"grad_norm": 0.24895872175693512,
"learning_rate": 0.0001330144,
"loss": 0.9024,
"step": 20940
},
{
"epoch": 0.33536,
"grad_norm": 0.26333311200141907,
"learning_rate": 0.0001329504,
"loss": 0.9119,
"step": 20960
},
{
"epoch": 0.33568,
"grad_norm": 0.23664534091949463,
"learning_rate": 0.0001328864,
"loss": 0.8766,
"step": 20980
},
{
"epoch": 0.336,
"grad_norm": 0.2326672226190567,
"learning_rate": 0.00013282240000000002,
"loss": 0.8263,
"step": 21000
},
{
"epoch": 0.33632,
"grad_norm": 0.23202668130397797,
"learning_rate": 0.00013275840000000002,
"loss": 0.9158,
"step": 21020
},
{
"epoch": 0.33664,
"grad_norm": 0.2137191891670227,
"learning_rate": 0.0001326944,
"loss": 0.9182,
"step": 21040
},
{
"epoch": 0.33696,
"grad_norm": 0.22848786413669586,
"learning_rate": 0.0001326304,
"loss": 0.8647,
"step": 21060
},
{
"epoch": 0.33728,
"grad_norm": 0.2685950994491577,
"learning_rate": 0.0001325664,
"loss": 0.8808,
"step": 21080
},
{
"epoch": 0.3376,
"grad_norm": 0.22779199481010437,
"learning_rate": 0.0001325024,
"loss": 0.9204,
"step": 21100
},
{
"epoch": 0.33792,
"grad_norm": 0.23398426175117493,
"learning_rate": 0.0001324384,
"loss": 0.8765,
"step": 21120
},
{
"epoch": 0.33824,
"grad_norm": 0.2746240794658661,
"learning_rate": 0.00013237440000000002,
"loss": 0.9218,
"step": 21140
},
{
"epoch": 0.33856,
"grad_norm": 0.21966886520385742,
"learning_rate": 0.0001323104,
"loss": 0.8135,
"step": 21160
},
{
"epoch": 0.33888,
"grad_norm": 0.24707438051700592,
"learning_rate": 0.0001322464,
"loss": 0.9269,
"step": 21180
},
{
"epoch": 0.3392,
"grad_norm": 0.23954932391643524,
"learning_rate": 0.00013218240000000003,
"loss": 0.8461,
"step": 21200
},
{
"epoch": 0.33952,
"grad_norm": 0.2673095762729645,
"learning_rate": 0.0001321184,
"loss": 0.8173,
"step": 21220
},
{
"epoch": 0.33984,
"grad_norm": 0.2744886577129364,
"learning_rate": 0.0001320544,
"loss": 0.9024,
"step": 21240
},
{
"epoch": 0.34016,
"grad_norm": 0.2099352777004242,
"learning_rate": 0.00013199040000000002,
"loss": 0.8744,
"step": 21260
},
{
"epoch": 0.34048,
"grad_norm": 0.24516913294792175,
"learning_rate": 0.0001319264,
"loss": 0.9172,
"step": 21280
},
{
"epoch": 0.3408,
"grad_norm": 0.2523512840270996,
"learning_rate": 0.0001318624,
"loss": 0.8656,
"step": 21300
},
{
"epoch": 0.34112,
"grad_norm": 0.23334811627864838,
"learning_rate": 0.0001317984,
"loss": 0.9486,
"step": 21320
},
{
"epoch": 0.34144,
"grad_norm": 0.2607022225856781,
"learning_rate": 0.00013173440000000002,
"loss": 0.9262,
"step": 21340
},
{
"epoch": 0.34176,
"grad_norm": 0.2326965034008026,
"learning_rate": 0.00013167040000000002,
"loss": 0.88,
"step": 21360
},
{
"epoch": 0.34208,
"grad_norm": 0.26053765416145325,
"learning_rate": 0.0001316064,
"loss": 0.926,
"step": 21380
},
{
"epoch": 0.3424,
"grad_norm": 0.24075692892074585,
"learning_rate": 0.0001315424,
"loss": 0.8301,
"step": 21400
},
{
"epoch": 0.34272,
"grad_norm": 0.2621121108531952,
"learning_rate": 0.0001314784,
"loss": 0.8895,
"step": 21420
},
{
"epoch": 0.34304,
"grad_norm": 0.26628807187080383,
"learning_rate": 0.0001314144,
"loss": 0.8634,
"step": 21440
},
{
"epoch": 0.34336,
"grad_norm": 0.301937073469162,
"learning_rate": 0.0001313504,
"loss": 0.8876,
"step": 21460
},
{
"epoch": 0.34368,
"grad_norm": 0.2092495709657669,
"learning_rate": 0.00013128640000000002,
"loss": 0.917,
"step": 21480
},
{
"epoch": 0.344,
"grad_norm": 0.2542109191417694,
"learning_rate": 0.0001312224,
"loss": 0.8835,
"step": 21500
},
{
"epoch": 0.34432,
"grad_norm": 0.3914921283721924,
"learning_rate": 0.0001311584,
"loss": 0.8868,
"step": 21520
},
{
"epoch": 0.34464,
"grad_norm": 0.23783642053604126,
"learning_rate": 0.00013109440000000003,
"loss": 0.9062,
"step": 21540
},
{
"epoch": 0.34496,
"grad_norm": 0.22890570759773254,
"learning_rate": 0.0001310304,
"loss": 0.914,
"step": 21560
},
{
"epoch": 0.34528,
"grad_norm": 0.23722144961357117,
"learning_rate": 0.0001309664,
"loss": 0.9393,
"step": 21580
},
{
"epoch": 0.3456,
"grad_norm": 0.264466255903244,
"learning_rate": 0.00013090240000000002,
"loss": 0.9294,
"step": 21600
},
{
"epoch": 0.34592,
"grad_norm": 0.23583443462848663,
"learning_rate": 0.0001308384,
"loss": 0.8729,
"step": 21620
},
{
"epoch": 0.34624,
"grad_norm": 0.25647860765457153,
"learning_rate": 0.0001307776,
"loss": 0.8512,
"step": 21640
},
{
"epoch": 0.34656,
"grad_norm": 0.2854239344596863,
"learning_rate": 0.0001307136,
"loss": 0.925,
"step": 21660
},
{
"epoch": 0.34688,
"grad_norm": 0.26097196340560913,
"learning_rate": 0.0001306496,
"loss": 0.9127,
"step": 21680
},
{
"epoch": 0.3472,
"grad_norm": 0.20337998867034912,
"learning_rate": 0.0001305856,
"loss": 0.9048,
"step": 21700
},
{
"epoch": 0.34752,
"grad_norm": 0.2021179497241974,
"learning_rate": 0.0001305216,
"loss": 0.9213,
"step": 21720
},
{
"epoch": 0.34784,
"grad_norm": 0.2213645875453949,
"learning_rate": 0.0001304576,
"loss": 0.8891,
"step": 21740
},
{
"epoch": 0.34816,
"grad_norm": 0.2201976180076599,
"learning_rate": 0.0001303936,
"loss": 0.8425,
"step": 21760
},
{
"epoch": 0.34848,
"grad_norm": 0.21747338771820068,
"learning_rate": 0.0001303296,
"loss": 0.9035,
"step": 21780
},
{
"epoch": 0.3488,
"grad_norm": 0.23967601358890533,
"learning_rate": 0.0001302656,
"loss": 0.9168,
"step": 21800
},
{
"epoch": 0.34912,
"grad_norm": 0.2570231854915619,
"learning_rate": 0.00013020160000000002,
"loss": 0.8813,
"step": 21820
},
{
"epoch": 0.34944,
"grad_norm": 0.24394749104976654,
"learning_rate": 0.00013013760000000002,
"loss": 0.884,
"step": 21840
},
{
"epoch": 0.34976,
"grad_norm": 0.25966569781303406,
"learning_rate": 0.0001300736,
"loss": 0.8981,
"step": 21860
},
{
"epoch": 0.35008,
"grad_norm": 0.24895374476909637,
"learning_rate": 0.0001300096,
"loss": 0.9225,
"step": 21880
},
{
"epoch": 0.3504,
"grad_norm": 0.22816093266010284,
"learning_rate": 0.0001299456,
"loss": 0.8729,
"step": 21900
},
{
"epoch": 0.35072,
"grad_norm": 0.22073081135749817,
"learning_rate": 0.0001298816,
"loss": 0.8505,
"step": 21920
},
{
"epoch": 0.35104,
"grad_norm": 0.24581517279148102,
"learning_rate": 0.0001298176,
"loss": 0.8963,
"step": 21940
},
{
"epoch": 0.35136,
"grad_norm": 0.22976480424404144,
"learning_rate": 0.00012975360000000002,
"loss": 0.8694,
"step": 21960
},
{
"epoch": 0.35168,
"grad_norm": 0.2811223566532135,
"learning_rate": 0.0001296896,
"loss": 0.9338,
"step": 21980
},
{
"epoch": 0.352,
"grad_norm": 0.2338571697473526,
"learning_rate": 0.0001296256,
"loss": 0.883,
"step": 22000
},
{
"epoch": 0.35232,
"grad_norm": 0.2589928209781647,
"learning_rate": 0.0001295616,
"loss": 0.8674,
"step": 22020
},
{
"epoch": 0.35264,
"grad_norm": 0.2552894949913025,
"learning_rate": 0.0001294976,
"loss": 0.9449,
"step": 22040
},
{
"epoch": 0.35296,
"grad_norm": 0.2796124219894409,
"learning_rate": 0.0001294336,
"loss": 0.9292,
"step": 22060
},
{
"epoch": 0.35328,
"grad_norm": 0.25079146027565,
"learning_rate": 0.0001293696,
"loss": 0.8741,
"step": 22080
},
{
"epoch": 0.3536,
"grad_norm": 0.25996407866477966,
"learning_rate": 0.0001293056,
"loss": 0.8642,
"step": 22100
},
{
"epoch": 0.35392,
"grad_norm": 0.24768070876598358,
"learning_rate": 0.0001292416,
"loss": 0.8876,
"step": 22120
},
{
"epoch": 0.35424,
"grad_norm": 0.2748354375362396,
"learning_rate": 0.0001291776,
"loss": 0.8686,
"step": 22140
},
{
"epoch": 0.35456,
"grad_norm": 0.26020100712776184,
"learning_rate": 0.00012911360000000002,
"loss": 0.8978,
"step": 22160
},
{
"epoch": 0.35488,
"grad_norm": 0.22740164399147034,
"learning_rate": 0.00012904960000000002,
"loss": 0.8806,
"step": 22180
},
{
"epoch": 0.3552,
"grad_norm": 0.22539575397968292,
"learning_rate": 0.0001289856,
"loss": 0.9586,
"step": 22200
},
{
"epoch": 0.35552,
"grad_norm": 0.224917471408844,
"learning_rate": 0.0001289216,
"loss": 0.929,
"step": 22220
},
{
"epoch": 0.35584,
"grad_norm": 0.2811584174633026,
"learning_rate": 0.0001288576,
"loss": 0.8816,
"step": 22240
},
{
"epoch": 0.35616,
"grad_norm": 0.2050054520368576,
"learning_rate": 0.0001287936,
"loss": 0.886,
"step": 22260
},
{
"epoch": 0.35648,
"grad_norm": 0.26352056860923767,
"learning_rate": 0.0001287296,
"loss": 0.8631,
"step": 22280
},
{
"epoch": 0.3568,
"grad_norm": 0.2189260572195053,
"learning_rate": 0.00012866560000000002,
"loss": 0.8886,
"step": 22300
},
{
"epoch": 0.35712,
"grad_norm": 0.20122426748275757,
"learning_rate": 0.0001286016,
"loss": 0.8899,
"step": 22320
},
{
"epoch": 0.35744,
"grad_norm": 0.2483946979045868,
"learning_rate": 0.0001285376,
"loss": 0.9277,
"step": 22340
},
{
"epoch": 0.35776,
"grad_norm": 0.2784961462020874,
"learning_rate": 0.0001284736,
"loss": 0.8697,
"step": 22360
},
{
"epoch": 0.35808,
"grad_norm": 0.253579318523407,
"learning_rate": 0.0001284096,
"loss": 0.9001,
"step": 22380
},
{
"epoch": 0.3584,
"grad_norm": 0.24485689401626587,
"learning_rate": 0.0001283456,
"loss": 0.8837,
"step": 22400
},
{
"epoch": 0.35872,
"grad_norm": 0.28353968262672424,
"learning_rate": 0.0001282816,
"loss": 0.8641,
"step": 22420
},
{
"epoch": 0.35904,
"grad_norm": 0.22458963096141815,
"learning_rate": 0.0001282176,
"loss": 0.8424,
"step": 22440
},
{
"epoch": 0.35936,
"grad_norm": 0.21924658119678497,
"learning_rate": 0.0001281536,
"loss": 0.9238,
"step": 22460
},
{
"epoch": 0.35968,
"grad_norm": 0.2481856644153595,
"learning_rate": 0.0001280896,
"loss": 0.914,
"step": 22480
},
{
"epoch": 0.36,
"grad_norm": 0.22141766548156738,
"learning_rate": 0.00012802560000000002,
"loss": 0.883,
"step": 22500
},
{
"epoch": 0.36032,
"grad_norm": 0.241195410490036,
"learning_rate": 0.00012796160000000002,
"loss": 0.8758,
"step": 22520
},
{
"epoch": 0.36064,
"grad_norm": 0.21804146468639374,
"learning_rate": 0.0001278976,
"loss": 0.892,
"step": 22540
},
{
"epoch": 0.36096,
"grad_norm": 0.21555864810943604,
"learning_rate": 0.0001278336,
"loss": 0.9042,
"step": 22560
},
{
"epoch": 0.36128,
"grad_norm": 0.2855897545814514,
"learning_rate": 0.0001277696,
"loss": 0.8698,
"step": 22580
},
{
"epoch": 0.3616,
"grad_norm": 0.18174555897712708,
"learning_rate": 0.0001277056,
"loss": 0.9076,
"step": 22600
},
{
"epoch": 0.36192,
"grad_norm": 0.24485164880752563,
"learning_rate": 0.0001276416,
"loss": 0.8773,
"step": 22620
},
{
"epoch": 0.36224,
"grad_norm": 0.24945834279060364,
"learning_rate": 0.00012757760000000002,
"loss": 0.8947,
"step": 22640
},
{
"epoch": 0.36256,
"grad_norm": 0.27107083797454834,
"learning_rate": 0.0001275136,
"loss": 0.8908,
"step": 22660
},
{
"epoch": 0.36288,
"grad_norm": 0.2725388705730438,
"learning_rate": 0.0001274496,
"loss": 0.9117,
"step": 22680
},
{
"epoch": 0.3632,
"grad_norm": 0.2100275754928589,
"learning_rate": 0.00012738560000000003,
"loss": 0.841,
"step": 22700
},
{
"epoch": 0.36352,
"grad_norm": 0.18476144969463348,
"learning_rate": 0.0001273216,
"loss": 0.8546,
"step": 22720
},
{
"epoch": 0.36384,
"grad_norm": 0.2054418921470642,
"learning_rate": 0.0001272576,
"loss": 0.9456,
"step": 22740
},
{
"epoch": 0.36416,
"grad_norm": 0.23518037796020508,
"learning_rate": 0.00012719360000000001,
"loss": 0.8499,
"step": 22760
},
{
"epoch": 0.36448,
"grad_norm": 0.22273370623588562,
"learning_rate": 0.0001271296,
"loss": 0.8961,
"step": 22780
},
{
"epoch": 0.3648,
"grad_norm": 0.2261369377374649,
"learning_rate": 0.0001270656,
"loss": 0.8655,
"step": 22800
},
{
"epoch": 0.36512,
"grad_norm": 0.2787102460861206,
"learning_rate": 0.0001270016,
"loss": 0.8457,
"step": 22820
},
{
"epoch": 0.36544,
"grad_norm": 0.287702351808548,
"learning_rate": 0.00012693760000000002,
"loss": 0.8998,
"step": 22840
},
{
"epoch": 0.36576,
"grad_norm": 0.24665424227714539,
"learning_rate": 0.00012687360000000002,
"loss": 0.9337,
"step": 22860
},
{
"epoch": 0.36608,
"grad_norm": 0.23870235681533813,
"learning_rate": 0.00012680959999999999,
"loss": 0.9023,
"step": 22880
},
{
"epoch": 0.3664,
"grad_norm": 0.2987755835056305,
"learning_rate": 0.0001267456,
"loss": 0.9055,
"step": 22900
},
{
"epoch": 0.36672,
"grad_norm": 0.271150678396225,
"learning_rate": 0.0001266816,
"loss": 0.8995,
"step": 22920
},
{
"epoch": 0.36704,
"grad_norm": 0.2605392038822174,
"learning_rate": 0.0001266176,
"loss": 0.8712,
"step": 22940
},
{
"epoch": 0.36736,
"grad_norm": 0.20183727145195007,
"learning_rate": 0.0001265536,
"loss": 0.9121,
"step": 22960
},
{
"epoch": 0.36768,
"grad_norm": 0.22325585782527924,
"learning_rate": 0.00012648960000000002,
"loss": 0.9287,
"step": 22980
},
{
"epoch": 0.368,
"grad_norm": 0.2461370825767517,
"learning_rate": 0.0001264256,
"loss": 0.9411,
"step": 23000
},
{
"epoch": 0.36832,
"grad_norm": 0.26270198822021484,
"learning_rate": 0.0001263616,
"loss": 0.8686,
"step": 23020
},
{
"epoch": 0.36864,
"grad_norm": 0.2455090433359146,
"learning_rate": 0.00012629760000000003,
"loss": 0.8735,
"step": 23040
},
{
"epoch": 0.36896,
"grad_norm": 0.22023002803325653,
"learning_rate": 0.0001262336,
"loss": 0.9235,
"step": 23060
},
{
"epoch": 0.36928,
"grad_norm": 0.19227732717990875,
"learning_rate": 0.0001261696,
"loss": 0.8781,
"step": 23080
},
{
"epoch": 0.3696,
"grad_norm": 0.230510413646698,
"learning_rate": 0.00012610560000000001,
"loss": 0.8865,
"step": 23100
},
{
"epoch": 0.36992,
"grad_norm": 0.2508642077445984,
"learning_rate": 0.0001260416,
"loss": 0.9123,
"step": 23120
},
{
"epoch": 0.37024,
"grad_norm": 0.22809596359729767,
"learning_rate": 0.0001259776,
"loss": 0.8799,
"step": 23140
},
{
"epoch": 0.37056,
"grad_norm": 0.2615736722946167,
"learning_rate": 0.0001259136,
"loss": 0.8927,
"step": 23160
},
{
"epoch": 0.37088,
"grad_norm": 0.23064741492271423,
"learning_rate": 0.00012584960000000002,
"loss": 0.9142,
"step": 23180
},
{
"epoch": 0.3712,
"grad_norm": 0.2514834403991699,
"learning_rate": 0.00012578560000000002,
"loss": 0.9439,
"step": 23200
},
{
"epoch": 0.37152,
"grad_norm": 0.27186328172683716,
"learning_rate": 0.00012572159999999999,
"loss": 0.8197,
"step": 23220
},
{
"epoch": 0.37184,
"grad_norm": 0.2310495674610138,
"learning_rate": 0.0001256576,
"loss": 0.925,
"step": 23240
},
{
"epoch": 0.37216,
"grad_norm": 0.21828240156173706,
"learning_rate": 0.0001255936,
"loss": 0.8944,
"step": 23260
},
{
"epoch": 0.37248,
"grad_norm": 0.25522857904434204,
"learning_rate": 0.0001255296,
"loss": 0.8338,
"step": 23280
},
{
"epoch": 0.3728,
"grad_norm": 0.2428404837846756,
"learning_rate": 0.0001254656,
"loss": 0.8788,
"step": 23300
},
{
"epoch": 0.37312,
"grad_norm": 0.2573212683200836,
"learning_rate": 0.00012540160000000002,
"loss": 0.8674,
"step": 23320
},
{
"epoch": 0.37344,
"grad_norm": 0.26089292764663696,
"learning_rate": 0.0001253376,
"loss": 0.8811,
"step": 23340
},
{
"epoch": 0.37376,
"grad_norm": 0.2231331765651703,
"learning_rate": 0.0001252736,
"loss": 0.8762,
"step": 23360
},
{
"epoch": 0.37408,
"grad_norm": 0.22179879248142242,
"learning_rate": 0.00012520960000000003,
"loss": 0.9226,
"step": 23380
},
{
"epoch": 0.3744,
"grad_norm": 0.29986852407455444,
"learning_rate": 0.0001251456,
"loss": 0.8889,
"step": 23400
},
{
"epoch": 0.37472,
"grad_norm": 0.30950990319252014,
"learning_rate": 0.0001250816,
"loss": 0.9255,
"step": 23420
},
{
"epoch": 0.37504,
"grad_norm": 0.2493577003479004,
"learning_rate": 0.00012501760000000001,
"loss": 0.8823,
"step": 23440
},
{
"epoch": 0.37536,
"grad_norm": 0.20476320385932922,
"learning_rate": 0.0001249536,
"loss": 0.9095,
"step": 23460
},
{
"epoch": 0.37568,
"grad_norm": 0.22957487404346466,
"learning_rate": 0.0001248896,
"loss": 0.8832,
"step": 23480
},
{
"epoch": 0.376,
"grad_norm": 0.20240527391433716,
"learning_rate": 0.0001248256,
"loss": 0.8569,
"step": 23500
},
{
"epoch": 0.37632,
"grad_norm": 0.2227647453546524,
"learning_rate": 0.00012476160000000002,
"loss": 0.8729,
"step": 23520
},
{
"epoch": 0.37664,
"grad_norm": 0.2590673565864563,
"learning_rate": 0.00012469760000000002,
"loss": 0.9006,
"step": 23540
},
{
"epoch": 0.37696,
"grad_norm": 0.2456459403038025,
"learning_rate": 0.0001246336,
"loss": 0.9238,
"step": 23560
},
{
"epoch": 0.37728,
"grad_norm": 0.23964524269104004,
"learning_rate": 0.0001245696,
"loss": 0.9148,
"step": 23580
},
{
"epoch": 0.3776,
"grad_norm": 0.2621648907661438,
"learning_rate": 0.0001245056,
"loss": 0.91,
"step": 23600
},
{
"epoch": 0.37792,
"grad_norm": 0.2700936794281006,
"learning_rate": 0.0001244416,
"loss": 0.8256,
"step": 23620
},
{
"epoch": 0.37824,
"grad_norm": 0.2757120430469513,
"learning_rate": 0.0001243776,
"loss": 0.9158,
"step": 23640
},
{
"epoch": 0.37856,
"grad_norm": 0.25307512283325195,
"learning_rate": 0.00012431360000000002,
"loss": 0.9564,
"step": 23660
},
{
"epoch": 0.37888,
"grad_norm": 0.22811047732830048,
"learning_rate": 0.0001242496,
"loss": 0.9005,
"step": 23680
},
{
"epoch": 0.3792,
"grad_norm": 0.2570401430130005,
"learning_rate": 0.0001241856,
"loss": 0.903,
"step": 23700
},
{
"epoch": 0.37952,
"grad_norm": 0.2455683946609497,
"learning_rate": 0.00012412160000000003,
"loss": 0.8849,
"step": 23720
},
{
"epoch": 0.37984,
"grad_norm": 0.2513890564441681,
"learning_rate": 0.0001240576,
"loss": 0.916,
"step": 23740
},
{
"epoch": 0.38016,
"grad_norm": 0.2542913556098938,
"learning_rate": 0.0001239936,
"loss": 0.8913,
"step": 23760
},
{
"epoch": 0.38048,
"grad_norm": 0.23636704683303833,
"learning_rate": 0.00012392960000000001,
"loss": 0.9071,
"step": 23780
},
{
"epoch": 0.3808,
"grad_norm": 0.23358510434627533,
"learning_rate": 0.00012386880000000002,
"loss": 0.8727,
"step": 23800
},
{
"epoch": 0.38112,
"grad_norm": 0.2075459063053131,
"learning_rate": 0.0001238048,
"loss": 0.8602,
"step": 23820
},
{
"epoch": 0.38144,
"grad_norm": 0.24009796977043152,
"learning_rate": 0.0001237408,
"loss": 0.8787,
"step": 23840
},
{
"epoch": 0.38176,
"grad_norm": 0.20129810273647308,
"learning_rate": 0.0001236768,
"loss": 0.9057,
"step": 23860
},
{
"epoch": 0.38208,
"grad_norm": 0.2360943704843521,
"learning_rate": 0.0001236128,
"loss": 0.8914,
"step": 23880
},
{
"epoch": 0.3824,
"grad_norm": 0.2023976743221283,
"learning_rate": 0.0001235488,
"loss": 0.8671,
"step": 23900
},
{
"epoch": 0.38272,
"grad_norm": 0.28492018580436707,
"learning_rate": 0.0001234848,
"loss": 0.8466,
"step": 23920
},
{
"epoch": 0.38304,
"grad_norm": 0.2340991497039795,
"learning_rate": 0.0001234208,
"loss": 0.8719,
"step": 23940
},
{
"epoch": 0.38336,
"grad_norm": 0.2417367696762085,
"learning_rate": 0.0001233568,
"loss": 0.8661,
"step": 23960
},
{
"epoch": 0.38368,
"grad_norm": 0.2661448121070862,
"learning_rate": 0.0001232928,
"loss": 0.9087,
"step": 23980
},
{
"epoch": 0.384,
"grad_norm": 0.2731974720954895,
"learning_rate": 0.00012322880000000002,
"loss": 0.8759,
"step": 24000
},
{
"epoch": 0.38432,
"grad_norm": 0.25823774933815,
"learning_rate": 0.00012316480000000002,
"loss": 0.8722,
"step": 24020
},
{
"epoch": 0.38464,
"grad_norm": 0.25573599338531494,
"learning_rate": 0.0001231008,
"loss": 0.9005,
"step": 24040
},
{
"epoch": 0.38496,
"grad_norm": 0.22409121692180634,
"learning_rate": 0.0001230368,
"loss": 0.8797,
"step": 24060
},
{
"epoch": 0.38528,
"grad_norm": 0.2514369785785675,
"learning_rate": 0.0001229728,
"loss": 0.9225,
"step": 24080
},
{
"epoch": 0.3856,
"grad_norm": 0.2252058982849121,
"learning_rate": 0.0001229088,
"loss": 0.8665,
"step": 24100
},
{
"epoch": 0.38592,
"grad_norm": 0.29812994599342346,
"learning_rate": 0.0001228448,
"loss": 0.8914,
"step": 24120
},
{
"epoch": 0.38624,
"grad_norm": 0.23113328218460083,
"learning_rate": 0.00012278080000000002,
"loss": 0.9096,
"step": 24140
},
{
"epoch": 0.38656,
"grad_norm": 0.26779311895370483,
"learning_rate": 0.0001227168,
"loss": 0.8901,
"step": 24160
},
{
"epoch": 0.38688,
"grad_norm": 0.26052671670913696,
"learning_rate": 0.0001226528,
"loss": 0.8874,
"step": 24180
},
{
"epoch": 0.3872,
"grad_norm": 0.24518652260303497,
"learning_rate": 0.00012258880000000003,
"loss": 0.8906,
"step": 24200
},
{
"epoch": 0.38752,
"grad_norm": 0.22408343851566315,
"learning_rate": 0.0001225248,
"loss": 0.9066,
"step": 24220
},
{
"epoch": 0.38784,
"grad_norm": 0.24065548181533813,
"learning_rate": 0.0001224608,
"loss": 0.8831,
"step": 24240
},
{
"epoch": 0.38816,
"grad_norm": 0.2512281835079193,
"learning_rate": 0.00012239680000000001,
"loss": 0.9191,
"step": 24260
},
{
"epoch": 0.38848,
"grad_norm": 0.212532177567482,
"learning_rate": 0.0001223328,
"loss": 0.8254,
"step": 24280
},
{
"epoch": 0.3888,
"grad_norm": 0.27028560638427734,
"learning_rate": 0.0001222688,
"loss": 0.9266,
"step": 24300
},
{
"epoch": 0.38912,
"grad_norm": 0.25514838099479675,
"learning_rate": 0.0001222048,
"loss": 0.8876,
"step": 24320
},
{
"epoch": 0.38944,
"grad_norm": 0.23313501477241516,
"learning_rate": 0.00012214080000000002,
"loss": 0.8999,
"step": 24340
},
{
"epoch": 0.38976,
"grad_norm": 0.24142177402973175,
"learning_rate": 0.00012207680000000002,
"loss": 0.8689,
"step": 24360
},
{
"epoch": 0.39008,
"grad_norm": 0.26181671023368835,
"learning_rate": 0.00012201280000000001,
"loss": 0.9077,
"step": 24380
},
{
"epoch": 0.3904,
"grad_norm": 0.20765413343906403,
"learning_rate": 0.0001219488,
"loss": 0.8583,
"step": 24400
},
{
"epoch": 0.39072,
"grad_norm": 0.23421642184257507,
"learning_rate": 0.0001218848,
"loss": 0.8741,
"step": 24420
},
{
"epoch": 0.39104,
"grad_norm": 0.24042420089244843,
"learning_rate": 0.0001218208,
"loss": 0.8836,
"step": 24440
},
{
"epoch": 0.39136,
"grad_norm": 0.23314060270786285,
"learning_rate": 0.0001217568,
"loss": 0.8494,
"step": 24460
},
{
"epoch": 0.39168,
"grad_norm": 0.21119163930416107,
"learning_rate": 0.0001216928,
"loss": 0.8662,
"step": 24480
},
{
"epoch": 0.392,
"grad_norm": 0.2608548700809479,
"learning_rate": 0.00012163200000000002,
"loss": 0.8893,
"step": 24500
},
{
"epoch": 0.39232,
"grad_norm": 0.21324241161346436,
"learning_rate": 0.00012156800000000001,
"loss": 0.9002,
"step": 24520
},
{
"epoch": 0.39264,
"grad_norm": 0.22912296652793884,
"learning_rate": 0.000121504,
"loss": 0.866,
"step": 24540
},
{
"epoch": 0.39296,
"grad_norm": 0.21215850114822388,
"learning_rate": 0.00012144,
"loss": 0.8513,
"step": 24560
},
{
"epoch": 0.39328,
"grad_norm": 0.24554443359375,
"learning_rate": 0.000121376,
"loss": 0.8919,
"step": 24580
},
{
"epoch": 0.3936,
"grad_norm": 0.20404468476772308,
"learning_rate": 0.000121312,
"loss": 0.8908,
"step": 24600
},
{
"epoch": 0.39392,
"grad_norm": 0.22551661729812622,
"learning_rate": 0.000121248,
"loss": 0.8854,
"step": 24620
},
{
"epoch": 0.39424,
"grad_norm": 0.21961207687854767,
"learning_rate": 0.00012118400000000001,
"loss": 0.9231,
"step": 24640
},
{
"epoch": 0.39456,
"grad_norm": 0.22458186745643616,
"learning_rate": 0.00012112,
"loss": 0.9015,
"step": 24660
},
{
"epoch": 0.39488,
"grad_norm": 0.29484352469444275,
"learning_rate": 0.00012105600000000001,
"loss": 0.8869,
"step": 24680
},
{
"epoch": 0.3952,
"grad_norm": 0.228530153632164,
"learning_rate": 0.00012099200000000001,
"loss": 0.8622,
"step": 24700
},
{
"epoch": 0.39552,
"grad_norm": 0.2094821333885193,
"learning_rate": 0.00012092799999999999,
"loss": 0.8878,
"step": 24720
},
{
"epoch": 0.39584,
"grad_norm": 0.22119556367397308,
"learning_rate": 0.000120864,
"loss": 0.8235,
"step": 24740
},
{
"epoch": 0.39616,
"grad_norm": 0.23812732100486755,
"learning_rate": 0.0001208,
"loss": 0.8358,
"step": 24760
},
{
"epoch": 0.39648,
"grad_norm": 0.26022717356681824,
"learning_rate": 0.000120736,
"loss": 0.9271,
"step": 24780
},
{
"epoch": 0.3968,
"grad_norm": 0.21341145038604736,
"learning_rate": 0.00012067200000000001,
"loss": 0.7994,
"step": 24800
},
{
"epoch": 0.39712,
"grad_norm": 0.24168658256530762,
"learning_rate": 0.00012060800000000001,
"loss": 0.8575,
"step": 24820
},
{
"epoch": 0.39744,
"grad_norm": 0.24363411962985992,
"learning_rate": 0.00012054400000000002,
"loss": 0.9,
"step": 24840
},
{
"epoch": 0.39776,
"grad_norm": 0.23571257293224335,
"learning_rate": 0.00012048000000000001,
"loss": 0.8862,
"step": 24860
},
{
"epoch": 0.39808,
"grad_norm": 0.24681545794010162,
"learning_rate": 0.000120416,
"loss": 0.8989,
"step": 24880
},
{
"epoch": 0.3984,
"grad_norm": 0.2692868113517761,
"learning_rate": 0.000120352,
"loss": 0.9006,
"step": 24900
},
{
"epoch": 0.39872,
"grad_norm": 0.21049508452415466,
"learning_rate": 0.000120288,
"loss": 0.8785,
"step": 24920
},
{
"epoch": 0.39904,
"grad_norm": 0.23448914289474487,
"learning_rate": 0.000120224,
"loss": 0.8677,
"step": 24940
},
{
"epoch": 0.39936,
"grad_norm": 0.21739456057548523,
"learning_rate": 0.00012016,
"loss": 0.8595,
"step": 24960
},
{
"epoch": 0.39968,
"grad_norm": 0.2921290397644043,
"learning_rate": 0.00012009600000000001,
"loss": 0.8987,
"step": 24980
},
{
"epoch": 0.4,
"grad_norm": 0.2572174668312073,
"learning_rate": 0.000120032,
"loss": 0.8608,
"step": 25000
},
{
"epoch": 0.40032,
"grad_norm": 0.22153301537036896,
"learning_rate": 0.00011996800000000001,
"loss": 0.924,
"step": 25020
},
{
"epoch": 0.40064,
"grad_norm": 0.25618571043014526,
"learning_rate": 0.00011990400000000002,
"loss": 0.9082,
"step": 25040
},
{
"epoch": 0.40096,
"grad_norm": 0.23285400867462158,
"learning_rate": 0.00011983999999999999,
"loss": 0.8467,
"step": 25060
},
{
"epoch": 0.40128,
"grad_norm": 0.22959965467453003,
"learning_rate": 0.000119776,
"loss": 0.8813,
"step": 25080
},
{
"epoch": 0.4016,
"grad_norm": 0.2747071385383606,
"learning_rate": 0.00011971200000000001,
"loss": 0.8736,
"step": 25100
},
{
"epoch": 0.40192,
"grad_norm": 0.22911617159843445,
"learning_rate": 0.000119648,
"loss": 0.8696,
"step": 25120
},
{
"epoch": 0.40224,
"grad_norm": 0.2593874931335449,
"learning_rate": 0.00011958400000000001,
"loss": 0.8975,
"step": 25140
},
{
"epoch": 0.40256,
"grad_norm": 0.25185978412628174,
"learning_rate": 0.00011952000000000001,
"loss": 0.9214,
"step": 25160
},
{
"epoch": 0.40288,
"grad_norm": 0.20723138749599457,
"learning_rate": 0.00011945600000000002,
"loss": 0.8532,
"step": 25180
},
{
"epoch": 0.4032,
"grad_norm": 0.2371417135000229,
"learning_rate": 0.00011939200000000001,
"loss": 0.9435,
"step": 25200
},
{
"epoch": 0.40352,
"grad_norm": 0.22036102414131165,
"learning_rate": 0.00011932799999999999,
"loss": 0.9591,
"step": 25220
},
{
"epoch": 0.40384,
"grad_norm": 0.1885063499212265,
"learning_rate": 0.000119264,
"loss": 0.8855,
"step": 25240
},
{
"epoch": 0.40416,
"grad_norm": 0.22148434817790985,
"learning_rate": 0.0001192,
"loss": 0.8941,
"step": 25260
},
{
"epoch": 0.40448,
"grad_norm": 0.245897576212883,
"learning_rate": 0.000119136,
"loss": 0.8979,
"step": 25280
},
{
"epoch": 0.4048,
"grad_norm": 0.2093392014503479,
"learning_rate": 0.000119072,
"loss": 0.8549,
"step": 25300
},
{
"epoch": 0.40512,
"grad_norm": 0.20682351291179657,
"learning_rate": 0.00011900800000000001,
"loss": 0.8412,
"step": 25320
},
{
"epoch": 0.40544,
"grad_norm": 0.39095112681388855,
"learning_rate": 0.000118944,
"loss": 0.9389,
"step": 25340
},
{
"epoch": 0.40576,
"grad_norm": 0.2154461294412613,
"learning_rate": 0.00011888000000000001,
"loss": 0.9047,
"step": 25360
},
{
"epoch": 0.40608,
"grad_norm": 0.2192692905664444,
"learning_rate": 0.00011881600000000002,
"loss": 0.8371,
"step": 25380
},
{
"epoch": 0.4064,
"grad_norm": 0.30516675114631653,
"learning_rate": 0.00011875199999999999,
"loss": 0.9068,
"step": 25400
},
{
"epoch": 0.40672,
"grad_norm": 0.24160155653953552,
"learning_rate": 0.000118688,
"loss": 0.8778,
"step": 25420
},
{
"epoch": 0.40704,
"grad_norm": 0.2394413948059082,
"learning_rate": 0.00011862400000000001,
"loss": 0.9009,
"step": 25440
},
{
"epoch": 0.40736,
"grad_norm": 0.2312084585428238,
"learning_rate": 0.00011856,
"loss": 0.9599,
"step": 25460
},
{
"epoch": 0.40768,
"grad_norm": 0.24847859144210815,
"learning_rate": 0.00011849600000000001,
"loss": 0.9424,
"step": 25480
},
{
"epoch": 0.408,
"grad_norm": 0.2651779055595398,
"learning_rate": 0.00011843200000000001,
"loss": 0.8898,
"step": 25500
},
{
"epoch": 0.40832,
"grad_norm": 0.22847053408622742,
"learning_rate": 0.00011836800000000002,
"loss": 0.8775,
"step": 25520
},
{
"epoch": 0.40864,
"grad_norm": 0.25370457768440247,
"learning_rate": 0.00011830400000000001,
"loss": 0.8691,
"step": 25540
},
{
"epoch": 0.40896,
"grad_norm": 0.24085932970046997,
"learning_rate": 0.00011823999999999999,
"loss": 0.8863,
"step": 25560
},
{
"epoch": 0.40928,
"grad_norm": 0.2516380548477173,
"learning_rate": 0.000118176,
"loss": 0.8686,
"step": 25580
},
{
"epoch": 0.4096,
"grad_norm": 0.24218106269836426,
"learning_rate": 0.000118112,
"loss": 0.9093,
"step": 25600
},
{
"epoch": 0.40992,
"grad_norm": 0.22466421127319336,
"learning_rate": 0.000118048,
"loss": 0.8652,
"step": 25620
},
{
"epoch": 0.41024,
"grad_norm": 0.2240326702594757,
"learning_rate": 0.000117984,
"loss": 0.8873,
"step": 25640
},
{
"epoch": 0.41056,
"grad_norm": 0.24201804399490356,
"learning_rate": 0.00011792000000000001,
"loss": 0.8556,
"step": 25660
},
{
"epoch": 0.41088,
"grad_norm": 0.2758803963661194,
"learning_rate": 0.000117856,
"loss": 0.9015,
"step": 25680
},
{
"epoch": 0.4112,
"grad_norm": 0.23030854761600494,
"learning_rate": 0.00011779200000000001,
"loss": 0.8259,
"step": 25700
},
{
"epoch": 0.41152,
"grad_norm": 0.21517449617385864,
"learning_rate": 0.00011772800000000002,
"loss": 0.9246,
"step": 25720
},
{
"epoch": 0.41184,
"grad_norm": 0.2662043571472168,
"learning_rate": 0.00011766399999999999,
"loss": 0.9175,
"step": 25740
},
{
"epoch": 0.41216,
"grad_norm": 0.23844832181930542,
"learning_rate": 0.0001176,
"loss": 0.8416,
"step": 25760
},
{
"epoch": 0.41248,
"grad_norm": 0.23714718222618103,
"learning_rate": 0.00011753600000000001,
"loss": 0.8802,
"step": 25780
},
{
"epoch": 0.4128,
"grad_norm": 0.2341051995754242,
"learning_rate": 0.000117472,
"loss": 0.8629,
"step": 25800
},
{
"epoch": 0.41312,
"grad_norm": 0.2298164963722229,
"learning_rate": 0.00011740800000000001,
"loss": 0.8794,
"step": 25820
},
{
"epoch": 0.41344,
"grad_norm": 0.26338857412338257,
"learning_rate": 0.00011734400000000001,
"loss": 0.8953,
"step": 25840
},
{
"epoch": 0.41376,
"grad_norm": 0.2441425770521164,
"learning_rate": 0.00011728000000000002,
"loss": 0.888,
"step": 25860
},
{
"epoch": 0.41408,
"grad_norm": 0.22573915123939514,
"learning_rate": 0.00011721600000000001,
"loss": 0.8417,
"step": 25880
},
{
"epoch": 0.4144,
"grad_norm": 0.24974480271339417,
"learning_rate": 0.00011715199999999999,
"loss": 0.8313,
"step": 25900
},
{
"epoch": 0.41472,
"grad_norm": 0.22177425026893616,
"learning_rate": 0.000117088,
"loss": 0.8615,
"step": 25920
},
{
"epoch": 0.41504,
"grad_norm": 0.25715529918670654,
"learning_rate": 0.000117024,
"loss": 0.9622,
"step": 25940
},
{
"epoch": 0.41536,
"grad_norm": 0.22982242703437805,
"learning_rate": 0.00011696,
"loss": 0.8944,
"step": 25960
},
{
"epoch": 0.41568,
"grad_norm": 0.2524280250072479,
"learning_rate": 0.000116896,
"loss": 0.9148,
"step": 25980
},
{
"epoch": 0.416,
"grad_norm": 0.19068706035614014,
"learning_rate": 0.00011683200000000001,
"loss": 0.8383,
"step": 26000
},
{
"epoch": 0.41632,
"grad_norm": 0.20955216884613037,
"learning_rate": 0.000116768,
"loss": 0.8297,
"step": 26020
},
{
"epoch": 0.41664,
"grad_norm": 0.28669893741607666,
"learning_rate": 0.00011670400000000001,
"loss": 0.9302,
"step": 26040
},
{
"epoch": 0.41696,
"grad_norm": 0.2136538028717041,
"learning_rate": 0.00011664000000000002,
"loss": 0.8628,
"step": 26060
},
{
"epoch": 0.41728,
"grad_norm": 0.24216507375240326,
"learning_rate": 0.00011657599999999999,
"loss": 0.8645,
"step": 26080
},
{
"epoch": 0.4176,
"grad_norm": 0.23418830335140228,
"learning_rate": 0.000116512,
"loss": 0.8744,
"step": 26100
},
{
"epoch": 0.41792,
"grad_norm": 0.21107226610183716,
"learning_rate": 0.00011644800000000001,
"loss": 0.882,
"step": 26120
},
{
"epoch": 0.41824,
"grad_norm": 0.25925180315971375,
"learning_rate": 0.000116384,
"loss": 0.9287,
"step": 26140
},
{
"epoch": 0.41856,
"grad_norm": 0.24545122683048248,
"learning_rate": 0.00011632000000000001,
"loss": 0.871,
"step": 26160
},
{
"epoch": 0.41888,
"grad_norm": 0.24683259427547455,
"learning_rate": 0.000116256,
"loss": 0.9309,
"step": 26180
},
{
"epoch": 0.4192,
"grad_norm": 0.271581768989563,
"learning_rate": 0.00011619200000000002,
"loss": 0.8901,
"step": 26200
},
{
"epoch": 0.41952,
"grad_norm": 0.19227302074432373,
"learning_rate": 0.00011612800000000001,
"loss": 0.8457,
"step": 26220
},
{
"epoch": 0.41984,
"grad_norm": 0.2621937692165375,
"learning_rate": 0.00011606399999999999,
"loss": 0.9007,
"step": 26240
},
{
"epoch": 0.42016,
"grad_norm": 0.23038643598556519,
"learning_rate": 0.000116,
"loss": 0.8869,
"step": 26260
},
{
"epoch": 0.42048,
"grad_norm": 0.18889521062374115,
"learning_rate": 0.000115936,
"loss": 0.8939,
"step": 26280
},
{
"epoch": 0.4208,
"grad_norm": 0.22690792381763458,
"learning_rate": 0.000115872,
"loss": 0.8544,
"step": 26300
},
{
"epoch": 0.42112,
"grad_norm": 0.23775628209114075,
"learning_rate": 0.000115808,
"loss": 0.8847,
"step": 26320
},
{
"epoch": 0.42144,
"grad_norm": 0.22833390533924103,
"learning_rate": 0.00011574400000000001,
"loss": 0.8963,
"step": 26340
},
{
"epoch": 0.42176,
"grad_norm": 0.26199871301651,
"learning_rate": 0.00011568000000000002,
"loss": 0.8717,
"step": 26360
},
{
"epoch": 0.42208,
"grad_norm": 0.21491499245166779,
"learning_rate": 0.00011561600000000001,
"loss": 0.8355,
"step": 26380
},
{
"epoch": 0.4224,
"grad_norm": 0.24193742871284485,
"learning_rate": 0.00011555200000000002,
"loss": 0.8916,
"step": 26400
},
{
"epoch": 0.42272,
"grad_norm": 0.23363493382930756,
"learning_rate": 0.000115488,
"loss": 0.882,
"step": 26420
},
{
"epoch": 0.42304,
"grad_norm": 0.2511495053768158,
"learning_rate": 0.000115424,
"loss": 0.856,
"step": 26440
},
{
"epoch": 0.42336,
"grad_norm": 0.2527294456958771,
"learning_rate": 0.00011536000000000001,
"loss": 0.9213,
"step": 26460
},
{
"epoch": 0.42368,
"grad_norm": 0.24384371936321259,
"learning_rate": 0.000115296,
"loss": 0.8766,
"step": 26480
},
{
"epoch": 0.424,
"grad_norm": 0.22618679702281952,
"learning_rate": 0.00011523200000000001,
"loss": 0.8849,
"step": 26500
},
{
"epoch": 0.42432,
"grad_norm": 0.2192445695400238,
"learning_rate": 0.000115168,
"loss": 0.8945,
"step": 26520
},
{
"epoch": 0.42464,
"grad_norm": 0.21766866743564606,
"learning_rate": 0.00011510400000000001,
"loss": 0.9218,
"step": 26540
},
{
"epoch": 0.42496,
"grad_norm": 0.25323477387428284,
"learning_rate": 0.00011504000000000001,
"loss": 0.8973,
"step": 26560
},
{
"epoch": 0.42528,
"grad_norm": 0.22795149683952332,
"learning_rate": 0.0001149792,
"loss": 0.842,
"step": 26580
},
{
"epoch": 0.4256,
"grad_norm": 0.26310163736343384,
"learning_rate": 0.00011491520000000001,
"loss": 0.9459,
"step": 26600
},
{
"epoch": 0.42592,
"grad_norm": 0.2246372550725937,
"learning_rate": 0.0001148512,
"loss": 0.9001,
"step": 26620
},
{
"epoch": 0.42624,
"grad_norm": 0.23721113801002502,
"learning_rate": 0.00011478720000000001,
"loss": 0.9489,
"step": 26640
},
{
"epoch": 0.42656,
"grad_norm": 0.2320554107427597,
"learning_rate": 0.0001147232,
"loss": 0.8728,
"step": 26660
},
{
"epoch": 0.42688,
"grad_norm": 0.24293909966945648,
"learning_rate": 0.00011465920000000002,
"loss": 0.8921,
"step": 26680
},
{
"epoch": 0.4272,
"grad_norm": 0.23679333925247192,
"learning_rate": 0.00011459520000000001,
"loss": 0.8383,
"step": 26700
},
{
"epoch": 0.42752,
"grad_norm": 0.27467820048332214,
"learning_rate": 0.00011453120000000002,
"loss": 0.8646,
"step": 26720
},
{
"epoch": 0.42784,
"grad_norm": 0.2692321538925171,
"learning_rate": 0.0001144672,
"loss": 0.9117,
"step": 26740
},
{
"epoch": 0.42816,
"grad_norm": 0.2061707228422165,
"learning_rate": 0.0001144032,
"loss": 0.8794,
"step": 26760
},
{
"epoch": 0.42848,
"grad_norm": 0.24439223110675812,
"learning_rate": 0.0001143392,
"loss": 0.8522,
"step": 26780
},
{
"epoch": 0.4288,
"grad_norm": 0.2587136924266815,
"learning_rate": 0.0001142752,
"loss": 0.8729,
"step": 26800
},
{
"epoch": 0.42912,
"grad_norm": 0.2559018135070801,
"learning_rate": 0.00011421120000000001,
"loss": 0.8797,
"step": 26820
},
{
"epoch": 0.42944,
"grad_norm": 0.2468901127576828,
"learning_rate": 0.0001141472,
"loss": 0.9022,
"step": 26840
},
{
"epoch": 0.42976,
"grad_norm": 0.24873799085617065,
"learning_rate": 0.00011408320000000001,
"loss": 0.8634,
"step": 26860
},
{
"epoch": 0.43008,
"grad_norm": 0.23509488999843597,
"learning_rate": 0.00011401920000000002,
"loss": 0.8995,
"step": 26880
},
{
"epoch": 0.4304,
"grad_norm": 0.22719904780387878,
"learning_rate": 0.00011395519999999999,
"loss": 0.8288,
"step": 26900
},
{
"epoch": 0.43072,
"grad_norm": 0.26962873339653015,
"learning_rate": 0.0001138912,
"loss": 0.9099,
"step": 26920
},
{
"epoch": 0.43104,
"grad_norm": 0.2308361977338791,
"learning_rate": 0.00011382720000000001,
"loss": 0.8927,
"step": 26940
},
{
"epoch": 0.43136,
"grad_norm": 0.23821701109409332,
"learning_rate": 0.0001137632,
"loss": 0.8926,
"step": 26960
},
{
"epoch": 0.43168,
"grad_norm": 0.2621578574180603,
"learning_rate": 0.00011369920000000001,
"loss": 0.8996,
"step": 26980
},
{
"epoch": 0.432,
"grad_norm": 0.1956038624048233,
"learning_rate": 0.0001136352,
"loss": 0.8429,
"step": 27000
},
{
"epoch": 0.43232,
"grad_norm": 0.25825899839401245,
"learning_rate": 0.00011357120000000001,
"loss": 0.8912,
"step": 27020
},
{
"epoch": 0.43264,
"grad_norm": 0.2325858324766159,
"learning_rate": 0.00011350720000000001,
"loss": 0.9094,
"step": 27040
},
{
"epoch": 0.43296,
"grad_norm": 0.24896900355815887,
"learning_rate": 0.00011344320000000002,
"loss": 0.9526,
"step": 27060
},
{
"epoch": 0.43328,
"grad_norm": 0.2929576337337494,
"learning_rate": 0.0001133792,
"loss": 0.8513,
"step": 27080
},
{
"epoch": 0.4336,
"grad_norm": 0.23895148932933807,
"learning_rate": 0.0001133152,
"loss": 0.8708,
"step": 27100
},
{
"epoch": 0.43392,
"grad_norm": 0.22470715641975403,
"learning_rate": 0.0001132512,
"loss": 0.9009,
"step": 27120
},
{
"epoch": 0.43424,
"grad_norm": 0.25981777906417847,
"learning_rate": 0.0001131872,
"loss": 0.9252,
"step": 27140
},
{
"epoch": 0.43456,
"grad_norm": 0.26591363549232483,
"learning_rate": 0.00011312320000000001,
"loss": 0.878,
"step": 27160
},
{
"epoch": 0.43488,
"grad_norm": 0.24026769399642944,
"learning_rate": 0.0001130592,
"loss": 0.838,
"step": 27180
},
{
"epoch": 0.4352,
"grad_norm": 0.243183895945549,
"learning_rate": 0.00011299520000000001,
"loss": 0.945,
"step": 27200
},
{
"epoch": 0.43552,
"grad_norm": 0.28983068466186523,
"learning_rate": 0.00011293120000000002,
"loss": 0.8779,
"step": 27220
},
{
"epoch": 0.43584,
"grad_norm": 0.25985220074653625,
"learning_rate": 0.00011286719999999999,
"loss": 0.8852,
"step": 27240
},
{
"epoch": 0.43616,
"grad_norm": 0.2521764934062958,
"learning_rate": 0.0001128032,
"loss": 0.8459,
"step": 27260
},
{
"epoch": 0.43648,
"grad_norm": 0.2260691374540329,
"learning_rate": 0.0001127392,
"loss": 0.8478,
"step": 27280
},
{
"epoch": 0.4368,
"grad_norm": 0.24375227093696594,
"learning_rate": 0.0001126752,
"loss": 0.8566,
"step": 27300
},
{
"epoch": 0.43712,
"grad_norm": 0.23803727328777313,
"learning_rate": 0.00011261120000000001,
"loss": 0.8832,
"step": 27320
},
{
"epoch": 0.43744,
"grad_norm": 0.35262176394462585,
"learning_rate": 0.0001125472,
"loss": 0.9305,
"step": 27340
},
{
"epoch": 0.43776,
"grad_norm": 0.22310085594654083,
"learning_rate": 0.00011248320000000001,
"loss": 0.8806,
"step": 27360
},
{
"epoch": 0.43808,
"grad_norm": 0.22547666728496552,
"learning_rate": 0.00011241920000000001,
"loss": 0.8617,
"step": 27380
},
{
"epoch": 0.4384,
"grad_norm": 0.23973605036735535,
"learning_rate": 0.00011235520000000002,
"loss": 0.8415,
"step": 27400
},
{
"epoch": 0.43872,
"grad_norm": 0.2396925538778305,
"learning_rate": 0.0001122912,
"loss": 0.9221,
"step": 27420
},
{
"epoch": 0.43904,
"grad_norm": 0.2549417018890381,
"learning_rate": 0.0001122272,
"loss": 0.8289,
"step": 27440
},
{
"epoch": 0.43936,
"grad_norm": 0.3001738488674164,
"learning_rate": 0.0001121632,
"loss": 0.8951,
"step": 27460
},
{
"epoch": 0.43968,
"grad_norm": 0.28858518600463867,
"learning_rate": 0.0001120992,
"loss": 0.9271,
"step": 27480
},
{
"epoch": 0.44,
"grad_norm": 0.22798220813274384,
"learning_rate": 0.00011203520000000001,
"loss": 0.9133,
"step": 27500
},
{
"epoch": 0.44032,
"grad_norm": 0.2488940805196762,
"learning_rate": 0.0001119712,
"loss": 0.9054,
"step": 27520
},
{
"epoch": 0.44064,
"grad_norm": 0.26057055592536926,
"learning_rate": 0.00011190720000000001,
"loss": 0.9136,
"step": 27540
},
{
"epoch": 0.44096,
"grad_norm": 0.21583379805088043,
"learning_rate": 0.00011184320000000002,
"loss": 0.8559,
"step": 27560
},
{
"epoch": 0.44128,
"grad_norm": 0.2284022718667984,
"learning_rate": 0.00011177919999999999,
"loss": 0.8976,
"step": 27580
},
{
"epoch": 0.4416,
"grad_norm": 0.25823116302490234,
"learning_rate": 0.0001117152,
"loss": 0.863,
"step": 27600
},
{
"epoch": 0.44192,
"grad_norm": 0.206822007894516,
"learning_rate": 0.0001116512,
"loss": 0.8877,
"step": 27620
},
{
"epoch": 0.44224,
"grad_norm": 0.21545498073101044,
"learning_rate": 0.0001115872,
"loss": 0.907,
"step": 27640
},
{
"epoch": 0.44256,
"grad_norm": 0.20267954468727112,
"learning_rate": 0.00011152320000000001,
"loss": 0.8363,
"step": 27660
},
{
"epoch": 0.44288,
"grad_norm": 0.22508728504180908,
"learning_rate": 0.0001114592,
"loss": 0.8913,
"step": 27680
},
{
"epoch": 0.4432,
"grad_norm": 0.2211364060640335,
"learning_rate": 0.00011139520000000001,
"loss": 0.8492,
"step": 27700
},
{
"epoch": 0.44352,
"grad_norm": 0.23222848773002625,
"learning_rate": 0.00011133120000000001,
"loss": 0.8941,
"step": 27720
},
{
"epoch": 0.44384,
"grad_norm": 0.2355644702911377,
"learning_rate": 0.00011126720000000002,
"loss": 0.9167,
"step": 27740
},
{
"epoch": 0.44416,
"grad_norm": 0.25646787881851196,
"learning_rate": 0.0001112032,
"loss": 0.8852,
"step": 27760
},
{
"epoch": 0.44448,
"grad_norm": 0.24109818041324615,
"learning_rate": 0.0001111392,
"loss": 0.8864,
"step": 27780
},
{
"epoch": 0.4448,
"grad_norm": 0.2541385591030121,
"learning_rate": 0.0001110752,
"loss": 0.8674,
"step": 27800
},
{
"epoch": 0.44512,
"grad_norm": 0.2090396136045456,
"learning_rate": 0.0001110112,
"loss": 0.8601,
"step": 27820
},
{
"epoch": 0.44544,
"grad_norm": 0.249611958861351,
"learning_rate": 0.00011094720000000001,
"loss": 0.8973,
"step": 27840
},
{
"epoch": 0.44576,
"grad_norm": 0.24944797158241272,
"learning_rate": 0.00011088320000000002,
"loss": 0.9308,
"step": 27860
},
{
"epoch": 0.44608,
"grad_norm": 0.2585296332836151,
"learning_rate": 0.00011081920000000001,
"loss": 0.8865,
"step": 27880
},
{
"epoch": 0.4464,
"grad_norm": 0.2178046554327011,
"learning_rate": 0.00011075520000000002,
"loss": 0.8511,
"step": 27900
},
{
"epoch": 0.44672,
"grad_norm": 0.2413053810596466,
"learning_rate": 0.0001106912,
"loss": 0.9125,
"step": 27920
},
{
"epoch": 0.44704,
"grad_norm": 0.23110991716384888,
"learning_rate": 0.0001106272,
"loss": 0.9167,
"step": 27940
},
{
"epoch": 0.44736,
"grad_norm": 0.2292502075433731,
"learning_rate": 0.0001105632,
"loss": 0.8573,
"step": 27960
},
{
"epoch": 0.44768,
"grad_norm": 0.2460576891899109,
"learning_rate": 0.0001104992,
"loss": 0.8469,
"step": 27980
},
{
"epoch": 0.448,
"grad_norm": 0.2802644371986389,
"learning_rate": 0.00011043520000000001,
"loss": 0.8421,
"step": 28000
},
{
"epoch": 0.44832,
"grad_norm": 0.21653008460998535,
"learning_rate": 0.0001103712,
"loss": 0.9343,
"step": 28020
},
{
"epoch": 0.44864,
"grad_norm": 0.21708372235298157,
"learning_rate": 0.00011030720000000001,
"loss": 0.8866,
"step": 28040
},
{
"epoch": 0.44896,
"grad_norm": 0.2733645737171173,
"learning_rate": 0.00011024320000000001,
"loss": 0.9055,
"step": 28060
},
{
"epoch": 0.44928,
"grad_norm": 0.2751225531101227,
"learning_rate": 0.00011017920000000002,
"loss": 0.8615,
"step": 28080
},
{
"epoch": 0.4496,
"grad_norm": 0.23991945385932922,
"learning_rate": 0.0001101152,
"loss": 0.8963,
"step": 28100
},
{
"epoch": 0.44992,
"grad_norm": 0.3005094528198242,
"learning_rate": 0.0001100512,
"loss": 0.8711,
"step": 28120
},
{
"epoch": 0.45024,
"grad_norm": 0.2634584903717041,
"learning_rate": 0.0001099872,
"loss": 0.8764,
"step": 28140
},
{
"epoch": 0.45056,
"grad_norm": 0.22322441637516022,
"learning_rate": 0.0001099232,
"loss": 0.8467,
"step": 28160
},
{
"epoch": 0.45088,
"grad_norm": 0.21676741540431976,
"learning_rate": 0.00010985920000000001,
"loss": 0.8433,
"step": 28180
},
{
"epoch": 0.4512,
"grad_norm": 0.26323333382606506,
"learning_rate": 0.00010979520000000002,
"loss": 0.9486,
"step": 28200
},
{
"epoch": 0.45152,
"grad_norm": 0.21226100623607635,
"learning_rate": 0.00010973120000000001,
"loss": 0.8896,
"step": 28220
},
{
"epoch": 0.45184,
"grad_norm": 0.22679319977760315,
"learning_rate": 0.00010966720000000002,
"loss": 0.8528,
"step": 28240
},
{
"epoch": 0.45216,
"grad_norm": 0.18700149655342102,
"learning_rate": 0.0001096032,
"loss": 0.8368,
"step": 28260
},
{
"epoch": 0.45248,
"grad_norm": 0.23498637974262238,
"learning_rate": 0.0001095392,
"loss": 0.9194,
"step": 28280
},
{
"epoch": 0.4528,
"grad_norm": 0.22683313488960266,
"learning_rate": 0.0001094752,
"loss": 0.906,
"step": 28300
},
{
"epoch": 0.45312,
"grad_norm": 0.23284801840782166,
"learning_rate": 0.0001094112,
"loss": 0.9314,
"step": 28320
},
{
"epoch": 0.45344,
"grad_norm": 0.24406535923480988,
"learning_rate": 0.00010934720000000001,
"loss": 0.9017,
"step": 28340
},
{
"epoch": 0.45376,
"grad_norm": 0.22531583905220032,
"learning_rate": 0.0001092832,
"loss": 0.8906,
"step": 28360
},
{
"epoch": 0.45408,
"grad_norm": 0.21459725499153137,
"learning_rate": 0.00010921920000000001,
"loss": 0.8495,
"step": 28380
},
{
"epoch": 0.4544,
"grad_norm": 0.2690097689628601,
"learning_rate": 0.00010915520000000001,
"loss": 0.959,
"step": 28400
},
{
"epoch": 0.45472,
"grad_norm": 0.3007669448852539,
"learning_rate": 0.00010909120000000002,
"loss": 0.8716,
"step": 28420
},
{
"epoch": 0.45504,
"grad_norm": 0.2667557895183563,
"learning_rate": 0.0001090272,
"loss": 0.8909,
"step": 28440
},
{
"epoch": 0.45536,
"grad_norm": 0.24151913821697235,
"learning_rate": 0.0001089632,
"loss": 0.8808,
"step": 28460
},
{
"epoch": 0.45568,
"grad_norm": 0.2797807455062866,
"learning_rate": 0.0001088992,
"loss": 0.872,
"step": 28480
},
{
"epoch": 0.456,
"grad_norm": 0.3146251440048218,
"learning_rate": 0.0001088352,
"loss": 0.8632,
"step": 28500
},
{
"epoch": 0.45632,
"grad_norm": 0.26347050070762634,
"learning_rate": 0.0001087712,
"loss": 0.8863,
"step": 28520
},
{
"epoch": 0.45664,
"grad_norm": 0.24761775135993958,
"learning_rate": 0.00010870720000000002,
"loss": 0.8692,
"step": 28540
},
{
"epoch": 0.45696,
"grad_norm": 0.258346825838089,
"learning_rate": 0.00010864320000000001,
"loss": 0.8733,
"step": 28560
},
{
"epoch": 0.45728,
"grad_norm": 0.2539924085140228,
"learning_rate": 0.00010857920000000002,
"loss": 0.8691,
"step": 28580
},
{
"epoch": 0.4576,
"grad_norm": 0.5414292216300964,
"learning_rate": 0.0001085152,
"loss": 0.9207,
"step": 28600
},
{
"epoch": 0.45792,
"grad_norm": 0.21679574251174927,
"learning_rate": 0.0001084512,
"loss": 0.8953,
"step": 28620
},
{
"epoch": 0.45824,
"grad_norm": 0.236148864030838,
"learning_rate": 0.0001083872,
"loss": 0.8793,
"step": 28640
},
{
"epoch": 0.45856,
"grad_norm": 0.21311239898204803,
"learning_rate": 0.0001083232,
"loss": 0.8276,
"step": 28660
},
{
"epoch": 0.45888,
"grad_norm": 0.23734121024608612,
"learning_rate": 0.00010825920000000001,
"loss": 0.8464,
"step": 28680
},
{
"epoch": 0.4592,
"grad_norm": 0.2556433081626892,
"learning_rate": 0.0001081952,
"loss": 0.8775,
"step": 28700
},
{
"epoch": 0.45952,
"grad_norm": 0.25167569518089294,
"learning_rate": 0.00010813120000000001,
"loss": 0.8801,
"step": 28720
},
{
"epoch": 0.45984,
"grad_norm": 0.21533095836639404,
"learning_rate": 0.00010806720000000001,
"loss": 0.9013,
"step": 28740
},
{
"epoch": 0.46016,
"grad_norm": 0.2581512928009033,
"learning_rate": 0.00010800320000000002,
"loss": 0.8842,
"step": 28760
},
{
"epoch": 0.46048,
"grad_norm": 0.22151516377925873,
"learning_rate": 0.0001079392,
"loss": 0.9151,
"step": 28780
},
{
"epoch": 0.4608,
"grad_norm": 0.2598574459552765,
"learning_rate": 0.0001078752,
"loss": 0.8817,
"step": 28800
},
{
"epoch": 0.46112,
"grad_norm": 0.22162817418575287,
"learning_rate": 0.0001078112,
"loss": 0.8887,
"step": 28820
},
{
"epoch": 0.46144,
"grad_norm": 0.2202143371105194,
"learning_rate": 0.0001077472,
"loss": 0.9206,
"step": 28840
},
{
"epoch": 0.46176,
"grad_norm": 0.24929936230182648,
"learning_rate": 0.0001076832,
"loss": 0.9279,
"step": 28860
},
{
"epoch": 0.46208,
"grad_norm": 0.26587414741516113,
"learning_rate": 0.00010761920000000001,
"loss": 0.9087,
"step": 28880
},
{
"epoch": 0.4624,
"grad_norm": 0.2536023259162903,
"learning_rate": 0.00010755520000000001,
"loss": 0.9128,
"step": 28900
},
{
"epoch": 0.46272,
"grad_norm": 0.1964925229549408,
"learning_rate": 0.00010749120000000002,
"loss": 0.8445,
"step": 28920
},
{
"epoch": 0.46304,
"grad_norm": 0.21544238924980164,
"learning_rate": 0.0001074272,
"loss": 0.856,
"step": 28940
},
{
"epoch": 0.46336,
"grad_norm": 0.25481197237968445,
"learning_rate": 0.0001073632,
"loss": 0.8555,
"step": 28960
},
{
"epoch": 0.46368,
"grad_norm": 0.24622678756713867,
"learning_rate": 0.0001072992,
"loss": 0.8975,
"step": 28980
},
{
"epoch": 0.464,
"grad_norm": 0.2316320389509201,
"learning_rate": 0.0001072352,
"loss": 0.8802,
"step": 29000
},
{
"epoch": 0.46432,
"grad_norm": 0.22140583395957947,
"learning_rate": 0.00010717120000000001,
"loss": 0.8845,
"step": 29020
},
{
"epoch": 0.46464,
"grad_norm": 0.21848374605178833,
"learning_rate": 0.0001071072,
"loss": 0.8766,
"step": 29040
},
{
"epoch": 0.46496,
"grad_norm": 0.2609255015850067,
"learning_rate": 0.00010704320000000001,
"loss": 0.9335,
"step": 29060
},
{
"epoch": 0.46528,
"grad_norm": 0.25037628412246704,
"learning_rate": 0.00010697920000000001,
"loss": 0.8705,
"step": 29080
},
{
"epoch": 0.4656,
"grad_norm": 0.2289629876613617,
"learning_rate": 0.00010691520000000002,
"loss": 0.9401,
"step": 29100
},
{
"epoch": 0.46592,
"grad_norm": 0.28347960114479065,
"learning_rate": 0.0001068512,
"loss": 0.9032,
"step": 29120
},
{
"epoch": 0.46624,
"grad_norm": 0.26885560154914856,
"learning_rate": 0.00010678719999999999,
"loss": 0.8777,
"step": 29140
},
{
"epoch": 0.46656,
"grad_norm": 0.240605428814888,
"learning_rate": 0.0001067232,
"loss": 0.8381,
"step": 29160
},
{
"epoch": 0.46688,
"grad_norm": 0.21176287531852722,
"learning_rate": 0.0001066592,
"loss": 0.8988,
"step": 29180
},
{
"epoch": 0.4672,
"grad_norm": 0.2735714316368103,
"learning_rate": 0.0001065952,
"loss": 0.8879,
"step": 29200
},
{
"epoch": 0.46752,
"grad_norm": 0.22908750176429749,
"learning_rate": 0.00010653120000000001,
"loss": 0.9259,
"step": 29220
},
{
"epoch": 0.46784,
"grad_norm": 0.2262643426656723,
"learning_rate": 0.00010646720000000001,
"loss": 0.8774,
"step": 29240
},
{
"epoch": 0.46816,
"grad_norm": 0.1967533677816391,
"learning_rate": 0.00010640320000000002,
"loss": 0.8512,
"step": 29260
},
{
"epoch": 0.46848,
"grad_norm": 0.23780053853988647,
"learning_rate": 0.0001063392,
"loss": 0.8703,
"step": 29280
},
{
"epoch": 0.4688,
"grad_norm": 0.2447732537984848,
"learning_rate": 0.0001062752,
"loss": 0.8656,
"step": 29300
},
{
"epoch": 0.46912,
"grad_norm": 0.23653922975063324,
"learning_rate": 0.0001062112,
"loss": 0.9124,
"step": 29320
},
{
"epoch": 0.46944,
"grad_norm": 0.22936119139194489,
"learning_rate": 0.0001061472,
"loss": 0.9262,
"step": 29340
},
{
"epoch": 0.46976,
"grad_norm": 0.26746055483818054,
"learning_rate": 0.00010608320000000001,
"loss": 0.9279,
"step": 29360
},
{
"epoch": 0.47008,
"grad_norm": 0.27925965189933777,
"learning_rate": 0.0001060192,
"loss": 0.9249,
"step": 29380
},
{
"epoch": 0.4704,
"grad_norm": 0.25772443413734436,
"learning_rate": 0.00010595520000000001,
"loss": 0.857,
"step": 29400
},
{
"epoch": 0.47072,
"grad_norm": 0.31103163957595825,
"learning_rate": 0.00010589120000000001,
"loss": 0.8858,
"step": 29420
},
{
"epoch": 0.47104,
"grad_norm": 0.2527211010456085,
"learning_rate": 0.00010582720000000002,
"loss": 0.8851,
"step": 29440
},
{
"epoch": 0.47136,
"grad_norm": 0.2501220405101776,
"learning_rate": 0.0001057632,
"loss": 0.8829,
"step": 29460
},
{
"epoch": 0.47168,
"grad_norm": 0.20719179511070251,
"learning_rate": 0.00010569919999999999,
"loss": 0.8934,
"step": 29480
},
{
"epoch": 0.472,
"grad_norm": 0.24948135018348694,
"learning_rate": 0.0001056352,
"loss": 0.9229,
"step": 29500
},
{
"epoch": 0.47232,
"grad_norm": 0.2544534206390381,
"learning_rate": 0.00010557120000000001,
"loss": 0.9075,
"step": 29520
},
{
"epoch": 0.47264,
"grad_norm": 0.24410614371299744,
"learning_rate": 0.0001055072,
"loss": 0.8189,
"step": 29540
},
{
"epoch": 0.47296,
"grad_norm": 0.2736496925354004,
"learning_rate": 0.00010544320000000001,
"loss": 0.8778,
"step": 29560
},
{
"epoch": 0.47328,
"grad_norm": 0.2509610652923584,
"learning_rate": 0.00010537920000000001,
"loss": 0.9481,
"step": 29580
},
{
"epoch": 0.4736,
"grad_norm": 0.23943458497524261,
"learning_rate": 0.00010531520000000002,
"loss": 0.8921,
"step": 29600
},
{
"epoch": 0.47392,
"grad_norm": 0.2580275237560272,
"learning_rate": 0.0001052512,
"loss": 0.9293,
"step": 29620
},
{
"epoch": 0.47424,
"grad_norm": 0.27012938261032104,
"learning_rate": 0.0001051872,
"loss": 0.8833,
"step": 29640
},
{
"epoch": 0.47456,
"grad_norm": 0.25766292214393616,
"learning_rate": 0.0001051232,
"loss": 0.8901,
"step": 29660
},
{
"epoch": 0.47488,
"grad_norm": 0.2404147982597351,
"learning_rate": 0.0001050592,
"loss": 0.875,
"step": 29680
},
{
"epoch": 0.4752,
"grad_norm": 0.24435456097126007,
"learning_rate": 0.00010499520000000001,
"loss": 0.9058,
"step": 29700
},
{
"epoch": 0.47552,
"grad_norm": 0.21276427805423737,
"learning_rate": 0.0001049312,
"loss": 0.851,
"step": 29720
},
{
"epoch": 0.47584,
"grad_norm": 0.26436007022857666,
"learning_rate": 0.00010486720000000001,
"loss": 0.8794,
"step": 29740
},
{
"epoch": 0.47616,
"grad_norm": 0.2028064727783203,
"learning_rate": 0.0001048032,
"loss": 0.9011,
"step": 29760
},
{
"epoch": 0.47648,
"grad_norm": 0.2643204629421234,
"learning_rate": 0.00010473920000000002,
"loss": 0.8951,
"step": 29780
},
{
"epoch": 0.4768,
"grad_norm": 0.2519238293170929,
"learning_rate": 0.0001046752,
"loss": 0.9099,
"step": 29800
},
{
"epoch": 0.47712,
"grad_norm": 0.2304890900850296,
"learning_rate": 0.00010461119999999999,
"loss": 0.9054,
"step": 29820
},
{
"epoch": 0.47744,
"grad_norm": 0.24808572232723236,
"learning_rate": 0.0001045472,
"loss": 0.9119,
"step": 29840
},
{
"epoch": 0.47776,
"grad_norm": 0.23621508479118347,
"learning_rate": 0.00010448320000000001,
"loss": 0.9018,
"step": 29860
},
{
"epoch": 0.47808,
"grad_norm": 0.23834584653377533,
"learning_rate": 0.0001044192,
"loss": 0.8939,
"step": 29880
},
{
"epoch": 0.4784,
"grad_norm": 0.2608813941478729,
"learning_rate": 0.00010435520000000001,
"loss": 0.898,
"step": 29900
},
{
"epoch": 0.47872,
"grad_norm": 0.22944658994674683,
"learning_rate": 0.00010429120000000001,
"loss": 0.8535,
"step": 29920
},
{
"epoch": 0.47904,
"grad_norm": 0.18182271718978882,
"learning_rate": 0.00010422720000000002,
"loss": 0.9109,
"step": 29940
},
{
"epoch": 0.47936,
"grad_norm": 0.25479796528816223,
"learning_rate": 0.0001041632,
"loss": 0.8445,
"step": 29960
},
{
"epoch": 0.47968,
"grad_norm": 0.2711828052997589,
"learning_rate": 0.0001040992,
"loss": 0.8993,
"step": 29980
},
{
"epoch": 0.48,
"grad_norm": 0.23804309964179993,
"learning_rate": 0.0001040352,
"loss": 0.826,
"step": 30000
},
{
"epoch": 0.48032,
"grad_norm": 0.2019588053226471,
"learning_rate": 0.0001039712,
"loss": 0.8777,
"step": 30020
},
{
"epoch": 0.48064,
"grad_norm": 0.2302328497171402,
"learning_rate": 0.00010390720000000001,
"loss": 0.9068,
"step": 30040
},
{
"epoch": 0.48096,
"grad_norm": 0.20539428293704987,
"learning_rate": 0.0001038432,
"loss": 0.894,
"step": 30060
},
{
"epoch": 0.48128,
"grad_norm": 0.3114759624004364,
"learning_rate": 0.00010377920000000001,
"loss": 0.8664,
"step": 30080
},
{
"epoch": 0.4816,
"grad_norm": 0.22987259924411774,
"learning_rate": 0.0001037152,
"loss": 0.8831,
"step": 30100
},
{
"epoch": 0.48192,
"grad_norm": 0.29275548458099365,
"learning_rate": 0.00010365120000000001,
"loss": 0.9069,
"step": 30120
},
{
"epoch": 0.48224,
"grad_norm": 0.21175888180732727,
"learning_rate": 0.0001035872,
"loss": 0.9181,
"step": 30140
},
{
"epoch": 0.48256,
"grad_norm": 0.32916828989982605,
"learning_rate": 0.00010352319999999999,
"loss": 0.9072,
"step": 30160
},
{
"epoch": 0.48288,
"grad_norm": 0.24162223935127258,
"learning_rate": 0.0001034592,
"loss": 0.8855,
"step": 30180
},
{
"epoch": 0.4832,
"grad_norm": 0.21086947619915009,
"learning_rate": 0.00010339520000000001,
"loss": 0.9223,
"step": 30200
},
{
"epoch": 0.48352,
"grad_norm": 0.24102705717086792,
"learning_rate": 0.0001033312,
"loss": 0.8887,
"step": 30220
},
{
"epoch": 0.48384,
"grad_norm": 0.24661841988563538,
"learning_rate": 0.00010326720000000001,
"loss": 0.9,
"step": 30240
},
{
"epoch": 0.48416,
"grad_norm": 0.2146749347448349,
"learning_rate": 0.00010320320000000001,
"loss": 0.9188,
"step": 30260
},
{
"epoch": 0.48448,
"grad_norm": 0.31963422894477844,
"learning_rate": 0.00010313920000000002,
"loss": 0.8681,
"step": 30280
},
{
"epoch": 0.4848,
"grad_norm": 0.25737327337265015,
"learning_rate": 0.0001030752,
"loss": 0.866,
"step": 30300
},
{
"epoch": 0.48512,
"grad_norm": 0.25805580615997314,
"learning_rate": 0.0001030112,
"loss": 0.9203,
"step": 30320
},
{
"epoch": 0.48544,
"grad_norm": 0.2447681874036789,
"learning_rate": 0.0001029472,
"loss": 0.8897,
"step": 30340
},
{
"epoch": 0.48576,
"grad_norm": 0.2577648460865021,
"learning_rate": 0.0001028832,
"loss": 0.8548,
"step": 30360
},
{
"epoch": 0.48608,
"grad_norm": 0.24042245745658875,
"learning_rate": 0.0001028192,
"loss": 0.9512,
"step": 30380
},
{
"epoch": 0.4864,
"grad_norm": 0.24115043878555298,
"learning_rate": 0.0001027552,
"loss": 0.9036,
"step": 30400
},
{
"epoch": 0.48672,
"grad_norm": 0.2144307792186737,
"learning_rate": 0.00010269120000000001,
"loss": 0.9172,
"step": 30420
},
{
"epoch": 0.48704,
"grad_norm": 0.21385303139686584,
"learning_rate": 0.0001026272,
"loss": 0.8796,
"step": 30440
},
{
"epoch": 0.48736,
"grad_norm": 0.21479547023773193,
"learning_rate": 0.00010256320000000001,
"loss": 0.8923,
"step": 30460
},
{
"epoch": 0.48768,
"grad_norm": 0.24400416016578674,
"learning_rate": 0.0001024992,
"loss": 0.9339,
"step": 30480
},
{
"epoch": 0.488,
"grad_norm": 0.19768428802490234,
"learning_rate": 0.00010243519999999999,
"loss": 0.8924,
"step": 30500
},
{
"epoch": 0.48832,
"grad_norm": 0.23109054565429688,
"learning_rate": 0.0001023712,
"loss": 0.9223,
"step": 30520
},
{
"epoch": 0.48864,
"grad_norm": 0.1906929314136505,
"learning_rate": 0.00010230720000000001,
"loss": 0.8876,
"step": 30540
},
{
"epoch": 0.48896,
"grad_norm": 0.24491210281848907,
"learning_rate": 0.0001022432,
"loss": 0.8911,
"step": 30560
},
{
"epoch": 0.48928,
"grad_norm": 0.21576926112174988,
"learning_rate": 0.00010217920000000001,
"loss": 0.8618,
"step": 30580
},
{
"epoch": 0.4896,
"grad_norm": 0.2178792953491211,
"learning_rate": 0.00010211520000000001,
"loss": 0.8744,
"step": 30600
},
{
"epoch": 0.48992,
"grad_norm": 0.2320430725812912,
"learning_rate": 0.00010205120000000002,
"loss": 0.8853,
"step": 30620
},
{
"epoch": 0.49024,
"grad_norm": 0.2017570436000824,
"learning_rate": 0.0001019872,
"loss": 0.878,
"step": 30640
},
{
"epoch": 0.49056,
"grad_norm": 0.26477140188217163,
"learning_rate": 0.0001019232,
"loss": 0.9664,
"step": 30660
},
{
"epoch": 0.49088,
"grad_norm": 0.22188952565193176,
"learning_rate": 0.0001018592,
"loss": 0.8757,
"step": 30680
},
{
"epoch": 0.4912,
"grad_norm": 0.2646230161190033,
"learning_rate": 0.0001017952,
"loss": 0.8465,
"step": 30700
},
{
"epoch": 0.49152,
"grad_norm": 0.19764351844787598,
"learning_rate": 0.0001017312,
"loss": 0.8602,
"step": 30720
},
{
"epoch": 0.49184,
"grad_norm": 0.2683072090148926,
"learning_rate": 0.0001016672,
"loss": 0.9249,
"step": 30740
},
{
"epoch": 0.49216,
"grad_norm": 0.2290680855512619,
"learning_rate": 0.00010160320000000001,
"loss": 0.8733,
"step": 30760
},
{
"epoch": 0.49248,
"grad_norm": 0.2023002654314041,
"learning_rate": 0.00010153920000000002,
"loss": 0.9078,
"step": 30780
},
{
"epoch": 0.4928,
"grad_norm": 0.2146545648574829,
"learning_rate": 0.00010147520000000001,
"loss": 0.901,
"step": 30800
},
{
"epoch": 0.49312,
"grad_norm": 0.2241715043783188,
"learning_rate": 0.0001014112,
"loss": 0.8551,
"step": 30820
},
{
"epoch": 0.49344,
"grad_norm": 0.21123602986335754,
"learning_rate": 0.0001013472,
"loss": 0.8821,
"step": 30840
},
{
"epoch": 0.49376,
"grad_norm": 0.2702392637729645,
"learning_rate": 0.0001012832,
"loss": 0.9438,
"step": 30860
},
{
"epoch": 0.49408,
"grad_norm": 0.22311244904994965,
"learning_rate": 0.00010121920000000001,
"loss": 0.8835,
"step": 30880
},
{
"epoch": 0.4944,
"grad_norm": 0.20762741565704346,
"learning_rate": 0.0001011552,
"loss": 0.8668,
"step": 30900
},
{
"epoch": 0.49472,
"grad_norm": 0.2043907791376114,
"learning_rate": 0.00010109120000000001,
"loss": 0.8684,
"step": 30920
},
{
"epoch": 0.49504,
"grad_norm": 0.23084862530231476,
"learning_rate": 0.00010102720000000001,
"loss": 0.9253,
"step": 30940
},
{
"epoch": 0.49536,
"grad_norm": 0.23380133509635925,
"learning_rate": 0.00010096320000000002,
"loss": 0.8374,
"step": 30960
},
{
"epoch": 0.49568,
"grad_norm": 0.23766806721687317,
"learning_rate": 0.00010089920000000001,
"loss": 0.8712,
"step": 30980
},
{
"epoch": 0.496,
"grad_norm": 0.24725081026554108,
"learning_rate": 0.00010083519999999999,
"loss": 0.8836,
"step": 31000
},
{
"epoch": 0.49632,
"grad_norm": 0.25081491470336914,
"learning_rate": 0.0001007712,
"loss": 0.8037,
"step": 31020
},
{
"epoch": 0.49664,
"grad_norm": 0.3002113401889801,
"learning_rate": 0.0001007072,
"loss": 0.8463,
"step": 31040
},
{
"epoch": 0.49696,
"grad_norm": 0.24691928923130035,
"learning_rate": 0.0001006432,
"loss": 0.8808,
"step": 31060
},
{
"epoch": 0.49728,
"grad_norm": 0.22657333314418793,
"learning_rate": 0.0001005792,
"loss": 0.8678,
"step": 31080
},
{
"epoch": 0.4976,
"grad_norm": 0.2616662085056305,
"learning_rate": 0.00010051520000000001,
"loss": 0.8786,
"step": 31100
},
{
"epoch": 0.49792,
"grad_norm": 0.23406346142292023,
"learning_rate": 0.00010045120000000002,
"loss": 0.8488,
"step": 31120
},
{
"epoch": 0.49824,
"grad_norm": 0.2088574469089508,
"learning_rate": 0.00010038720000000001,
"loss": 0.8433,
"step": 31140
},
{
"epoch": 0.49856,
"grad_norm": 0.24721549451351166,
"learning_rate": 0.0001003264,
"loss": 0.8545,
"step": 31160
},
{
"epoch": 0.49888,
"grad_norm": 0.2351522445678711,
"learning_rate": 0.0001002624,
"loss": 0.929,
"step": 31180
},
{
"epoch": 0.4992,
"grad_norm": 0.24752940237522125,
"learning_rate": 0.0001001984,
"loss": 0.8709,
"step": 31200
},
{
"epoch": 0.49952,
"grad_norm": 0.2309713363647461,
"learning_rate": 0.0001001344,
"loss": 0.8782,
"step": 31220
},
{
"epoch": 0.49984,
"grad_norm": 0.24925391376018524,
"learning_rate": 0.00010007040000000001,
"loss": 0.8542,
"step": 31240
},
{
"epoch": 0.50016,
"grad_norm": 0.22123312950134277,
"learning_rate": 0.0001000064,
"loss": 0.8859,
"step": 31260
},
{
"epoch": 0.50048,
"grad_norm": 0.24795830249786377,
"learning_rate": 9.99424e-05,
"loss": 0.8789,
"step": 31280
},
{
"epoch": 0.5008,
"grad_norm": 0.22997990250587463,
"learning_rate": 9.987840000000001e-05,
"loss": 0.8454,
"step": 31300
},
{
"epoch": 0.50112,
"grad_norm": 0.24946443736553192,
"learning_rate": 9.98144e-05,
"loss": 0.9236,
"step": 31320
},
{
"epoch": 0.50144,
"grad_norm": 0.21945710480213165,
"learning_rate": 9.97504e-05,
"loss": 0.9018,
"step": 31340
},
{
"epoch": 0.50176,
"grad_norm": 0.23725731670856476,
"learning_rate": 9.968640000000001e-05,
"loss": 0.8932,
"step": 31360
},
{
"epoch": 0.50208,
"grad_norm": 0.256510466337204,
"learning_rate": 9.96224e-05,
"loss": 0.8957,
"step": 31380
},
{
"epoch": 0.5024,
"grad_norm": 0.2660234272480011,
"learning_rate": 9.955840000000001e-05,
"loss": 0.8573,
"step": 31400
},
{
"epoch": 0.50272,
"grad_norm": 0.2343997359275818,
"learning_rate": 9.949440000000001e-05,
"loss": 0.8419,
"step": 31420
},
{
"epoch": 0.50304,
"grad_norm": 0.25852516293525696,
"learning_rate": 9.94304e-05,
"loss": 0.8699,
"step": 31440
},
{
"epoch": 0.50336,
"grad_norm": 0.21607396006584167,
"learning_rate": 9.93664e-05,
"loss": 0.8752,
"step": 31460
},
{
"epoch": 0.50368,
"grad_norm": 0.23309437930583954,
"learning_rate": 9.93024e-05,
"loss": 0.8992,
"step": 31480
},
{
"epoch": 0.504,
"grad_norm": 0.24827729165554047,
"learning_rate": 9.923840000000002e-05,
"loss": 0.9009,
"step": 31500
},
{
"epoch": 0.50432,
"grad_norm": 0.22215944528579712,
"learning_rate": 9.91744e-05,
"loss": 0.8935,
"step": 31520
},
{
"epoch": 0.50464,
"grad_norm": 0.23230740427970886,
"learning_rate": 9.91104e-05,
"loss": 0.9087,
"step": 31540
},
{
"epoch": 0.50496,
"grad_norm": 0.25434383749961853,
"learning_rate": 9.90464e-05,
"loss": 0.9246,
"step": 31560
},
{
"epoch": 0.50528,
"grad_norm": 0.22664053738117218,
"learning_rate": 9.898240000000001e-05,
"loss": 0.8454,
"step": 31580
},
{
"epoch": 0.5056,
"grad_norm": 0.2654976546764374,
"learning_rate": 9.89184e-05,
"loss": 0.8788,
"step": 31600
},
{
"epoch": 0.50592,
"grad_norm": 0.23799720406532288,
"learning_rate": 9.88544e-05,
"loss": 0.9061,
"step": 31620
},
{
"epoch": 0.50624,
"grad_norm": 0.22206011414527893,
"learning_rate": 9.879040000000001e-05,
"loss": 0.9175,
"step": 31640
},
{
"epoch": 0.50656,
"grad_norm": 0.20660768449306488,
"learning_rate": 9.87264e-05,
"loss": 0.9249,
"step": 31660
},
{
"epoch": 0.50688,
"grad_norm": 0.2204529196023941,
"learning_rate": 9.86624e-05,
"loss": 0.8608,
"step": 31680
},
{
"epoch": 0.5072,
"grad_norm": 0.23114994168281555,
"learning_rate": 9.859840000000001e-05,
"loss": 0.8453,
"step": 31700
},
{
"epoch": 0.50752,
"grad_norm": 0.23731987178325653,
"learning_rate": 9.85344e-05,
"loss": 0.8556,
"step": 31720
},
{
"epoch": 0.50784,
"grad_norm": 0.2295464277267456,
"learning_rate": 9.847040000000001e-05,
"loss": 0.8361,
"step": 31740
},
{
"epoch": 0.50816,
"grad_norm": 0.2154739946126938,
"learning_rate": 9.840640000000001e-05,
"loss": 0.9396,
"step": 31760
},
{
"epoch": 0.50848,
"grad_norm": 0.2496858537197113,
"learning_rate": 9.83424e-05,
"loss": 0.9095,
"step": 31780
},
{
"epoch": 0.5088,
"grad_norm": 0.2576257288455963,
"learning_rate": 9.82784e-05,
"loss": 0.928,
"step": 31800
},
{
"epoch": 0.50912,
"grad_norm": 0.2754540741443634,
"learning_rate": 9.82144e-05,
"loss": 0.9274,
"step": 31820
},
{
"epoch": 0.50944,
"grad_norm": 0.21684108674526215,
"learning_rate": 9.815040000000001e-05,
"loss": 0.9536,
"step": 31840
},
{
"epoch": 0.50976,
"grad_norm": 0.20123428106307983,
"learning_rate": 9.80864e-05,
"loss": 0.8948,
"step": 31860
},
{
"epoch": 0.51008,
"grad_norm": 0.19840183854103088,
"learning_rate": 9.80224e-05,
"loss": 0.8843,
"step": 31880
},
{
"epoch": 0.5104,
"grad_norm": 0.21900126338005066,
"learning_rate": 9.79584e-05,
"loss": 0.861,
"step": 31900
},
{
"epoch": 0.51072,
"grad_norm": 0.27311161160469055,
"learning_rate": 9.789440000000001e-05,
"loss": 0.8824,
"step": 31920
},
{
"epoch": 0.51104,
"grad_norm": 0.2307424545288086,
"learning_rate": 9.78304e-05,
"loss": 0.9077,
"step": 31940
},
{
"epoch": 0.51136,
"grad_norm": 0.23477308452129364,
"learning_rate": 9.77664e-05,
"loss": 0.8819,
"step": 31960
},
{
"epoch": 0.51168,
"grad_norm": 0.24617180228233337,
"learning_rate": 9.770240000000001e-05,
"loss": 0.8832,
"step": 31980
},
{
"epoch": 0.512,
"grad_norm": 0.28253304958343506,
"learning_rate": 9.76384e-05,
"loss": 0.9143,
"step": 32000
},
{
"epoch": 0.51232,
"grad_norm": 0.21168233454227448,
"learning_rate": 9.75744e-05,
"loss": 0.8644,
"step": 32020
},
{
"epoch": 0.51264,
"grad_norm": 0.2240704596042633,
"learning_rate": 9.751040000000001e-05,
"loss": 0.8768,
"step": 32040
},
{
"epoch": 0.51296,
"grad_norm": 0.26020580530166626,
"learning_rate": 9.74464e-05,
"loss": 0.8708,
"step": 32060
},
{
"epoch": 0.51328,
"grad_norm": 0.2664453387260437,
"learning_rate": 9.738240000000001e-05,
"loss": 0.8823,
"step": 32080
},
{
"epoch": 0.5136,
"grad_norm": 0.24020379781723022,
"learning_rate": 9.73184e-05,
"loss": 0.9324,
"step": 32100
},
{
"epoch": 0.51392,
"grad_norm": 0.25187745690345764,
"learning_rate": 9.72544e-05,
"loss": 0.9079,
"step": 32120
},
{
"epoch": 0.51424,
"grad_norm": 0.2086835503578186,
"learning_rate": 9.71904e-05,
"loss": 0.9136,
"step": 32140
},
{
"epoch": 0.51456,
"grad_norm": 0.20220904052257538,
"learning_rate": 9.71264e-05,
"loss": 0.9192,
"step": 32160
},
{
"epoch": 0.51488,
"grad_norm": 0.245720773935318,
"learning_rate": 9.706240000000001e-05,
"loss": 0.8716,
"step": 32180
},
{
"epoch": 0.5152,
"grad_norm": 0.24418127536773682,
"learning_rate": 9.69984e-05,
"loss": 0.9469,
"step": 32200
},
{
"epoch": 0.51552,
"grad_norm": 0.20389395952224731,
"learning_rate": 9.69344e-05,
"loss": 0.8574,
"step": 32220
},
{
"epoch": 0.51584,
"grad_norm": 0.25827401876449585,
"learning_rate": 9.68704e-05,
"loss": 0.8792,
"step": 32240
},
{
"epoch": 0.51616,
"grad_norm": 0.24777017533779144,
"learning_rate": 9.680640000000001e-05,
"loss": 0.9179,
"step": 32260
},
{
"epoch": 0.51648,
"grad_norm": 0.24638600647449493,
"learning_rate": 9.67424e-05,
"loss": 0.8133,
"step": 32280
},
{
"epoch": 0.5168,
"grad_norm": 0.24801717698574066,
"learning_rate": 9.66784e-05,
"loss": 0.9329,
"step": 32300
},
{
"epoch": 0.51712,
"grad_norm": 0.23096071183681488,
"learning_rate": 9.661440000000001e-05,
"loss": 0.8527,
"step": 32320
},
{
"epoch": 0.51744,
"grad_norm": 0.25937584042549133,
"learning_rate": 9.65504e-05,
"loss": 0.8984,
"step": 32340
},
{
"epoch": 0.51776,
"grad_norm": 0.22245679795742035,
"learning_rate": 9.648640000000001e-05,
"loss": 0.888,
"step": 32360
},
{
"epoch": 0.51808,
"grad_norm": 0.24738770723342896,
"learning_rate": 9.642240000000001e-05,
"loss": 0.9783,
"step": 32380
},
{
"epoch": 0.5184,
"grad_norm": 0.24137365818023682,
"learning_rate": 9.63584e-05,
"loss": 0.9105,
"step": 32400
},
{
"epoch": 0.51872,
"grad_norm": 0.2397020161151886,
"learning_rate": 9.629440000000001e-05,
"loss": 0.8195,
"step": 32420
},
{
"epoch": 0.51904,
"grad_norm": 0.2638731598854065,
"learning_rate": 9.62304e-05,
"loss": 0.94,
"step": 32440
},
{
"epoch": 0.51936,
"grad_norm": 0.24911172688007355,
"learning_rate": 9.61664e-05,
"loss": 0.9078,
"step": 32460
},
{
"epoch": 0.51968,
"grad_norm": 0.2063673883676529,
"learning_rate": 9.610240000000001e-05,
"loss": 0.8501,
"step": 32480
},
{
"epoch": 0.52,
"grad_norm": 0.2300567924976349,
"learning_rate": 9.60384e-05,
"loss": 0.8857,
"step": 32500
},
{
"epoch": 0.52032,
"grad_norm": 0.26897576451301575,
"learning_rate": 9.597440000000001e-05,
"loss": 0.9256,
"step": 32520
},
{
"epoch": 0.52064,
"grad_norm": 0.25261190533638,
"learning_rate": 9.59104e-05,
"loss": 0.8997,
"step": 32540
},
{
"epoch": 0.52096,
"grad_norm": 0.24318355321884155,
"learning_rate": 9.58464e-05,
"loss": 0.8751,
"step": 32560
},
{
"epoch": 0.52128,
"grad_norm": 0.24040265381336212,
"learning_rate": 9.57824e-05,
"loss": 0.8543,
"step": 32580
},
{
"epoch": 0.5216,
"grad_norm": 0.22509385645389557,
"learning_rate": 9.571840000000001e-05,
"loss": 0.8858,
"step": 32600
},
{
"epoch": 0.52192,
"grad_norm": 0.2367594838142395,
"learning_rate": 9.56544e-05,
"loss": 0.8506,
"step": 32620
},
{
"epoch": 0.52224,
"grad_norm": 0.2382354438304901,
"learning_rate": 9.55904e-05,
"loss": 0.875,
"step": 32640
},
{
"epoch": 0.52256,
"grad_norm": 0.24895018339157104,
"learning_rate": 9.552640000000001e-05,
"loss": 0.9271,
"step": 32660
},
{
"epoch": 0.52288,
"grad_norm": 0.2637403905391693,
"learning_rate": 9.54624e-05,
"loss": 0.8485,
"step": 32680
},
{
"epoch": 0.5232,
"grad_norm": 0.2029896229505539,
"learning_rate": 9.539840000000001e-05,
"loss": 0.8569,
"step": 32700
},
{
"epoch": 0.52352,
"grad_norm": 0.28035101294517517,
"learning_rate": 9.53344e-05,
"loss": 0.8745,
"step": 32720
},
{
"epoch": 0.52384,
"grad_norm": 0.24843214452266693,
"learning_rate": 9.52704e-05,
"loss": 0.8853,
"step": 32740
},
{
"epoch": 0.52416,
"grad_norm": 0.26682519912719727,
"learning_rate": 9.520640000000001e-05,
"loss": 0.9163,
"step": 32760
},
{
"epoch": 0.52448,
"grad_norm": 0.2349347323179245,
"learning_rate": 9.51424e-05,
"loss": 0.91,
"step": 32780
},
{
"epoch": 0.5248,
"grad_norm": 0.2493859827518463,
"learning_rate": 9.50784e-05,
"loss": 0.8412,
"step": 32800
},
{
"epoch": 0.52512,
"grad_norm": 0.2364472597837448,
"learning_rate": 9.501440000000001e-05,
"loss": 0.8777,
"step": 32820
},
{
"epoch": 0.52544,
"grad_norm": 0.2579153776168823,
"learning_rate": 9.49504e-05,
"loss": 0.9061,
"step": 32840
},
{
"epoch": 0.52576,
"grad_norm": 0.23014868795871735,
"learning_rate": 9.488640000000001e-05,
"loss": 0.9311,
"step": 32860
},
{
"epoch": 0.52608,
"grad_norm": 0.2836184501647949,
"learning_rate": 9.48224e-05,
"loss": 0.8641,
"step": 32880
},
{
"epoch": 0.5264,
"grad_norm": 0.36536288261413574,
"learning_rate": 9.47584e-05,
"loss": 0.8722,
"step": 32900
},
{
"epoch": 0.52672,
"grad_norm": 0.23661687970161438,
"learning_rate": 9.46944e-05,
"loss": 0.8349,
"step": 32920
},
{
"epoch": 0.52704,
"grad_norm": 0.24428099393844604,
"learning_rate": 9.463040000000001e-05,
"loss": 0.8688,
"step": 32940
},
{
"epoch": 0.52736,
"grad_norm": 0.24511151015758514,
"learning_rate": 9.45664e-05,
"loss": 0.88,
"step": 32960
},
{
"epoch": 0.52768,
"grad_norm": 0.244853213429451,
"learning_rate": 9.45024e-05,
"loss": 0.8739,
"step": 32980
},
{
"epoch": 0.528,
"grad_norm": 0.2549150884151459,
"learning_rate": 9.443840000000001e-05,
"loss": 0.8941,
"step": 33000
},
{
"epoch": 0.52832,
"grad_norm": 0.24175408482551575,
"learning_rate": 9.43744e-05,
"loss": 0.8919,
"step": 33020
},
{
"epoch": 0.52864,
"grad_norm": 0.25348398089408875,
"learning_rate": 9.431040000000001e-05,
"loss": 0.8914,
"step": 33040
},
{
"epoch": 0.52896,
"grad_norm": 0.21426767110824585,
"learning_rate": 9.42464e-05,
"loss": 0.8783,
"step": 33060
},
{
"epoch": 0.52928,
"grad_norm": 0.2478022277355194,
"learning_rate": 9.41824e-05,
"loss": 0.866,
"step": 33080
},
{
"epoch": 0.5296,
"grad_norm": 0.21202678978443146,
"learning_rate": 9.411840000000001e-05,
"loss": 0.8285,
"step": 33100
},
{
"epoch": 0.52992,
"grad_norm": 0.2358037382364273,
"learning_rate": 9.40544e-05,
"loss": 0.8479,
"step": 33120
},
{
"epoch": 0.53024,
"grad_norm": 0.2295175939798355,
"learning_rate": 9.39904e-05,
"loss": 0.8979,
"step": 33140
},
{
"epoch": 0.53056,
"grad_norm": 0.22576481103897095,
"learning_rate": 9.392640000000001e-05,
"loss": 0.919,
"step": 33160
},
{
"epoch": 0.53088,
"grad_norm": 0.20744270086288452,
"learning_rate": 9.38624e-05,
"loss": 0.9019,
"step": 33180
},
{
"epoch": 0.5312,
"grad_norm": 0.26612403988838196,
"learning_rate": 9.379840000000001e-05,
"loss": 0.8855,
"step": 33200
},
{
"epoch": 0.53152,
"grad_norm": 0.24272586405277252,
"learning_rate": 9.37344e-05,
"loss": 0.8907,
"step": 33220
},
{
"epoch": 0.53184,
"grad_norm": 0.25545746088027954,
"learning_rate": 9.36704e-05,
"loss": 0.8881,
"step": 33240
},
{
"epoch": 0.53216,
"grad_norm": 0.22786740958690643,
"learning_rate": 9.36064e-05,
"loss": 0.8739,
"step": 33260
},
{
"epoch": 0.53248,
"grad_norm": 0.2257343977689743,
"learning_rate": 9.354240000000001e-05,
"loss": 0.8873,
"step": 33280
},
{
"epoch": 0.5328,
"grad_norm": 0.23881380259990692,
"learning_rate": 9.34784e-05,
"loss": 0.8811,
"step": 33300
},
{
"epoch": 0.53312,
"grad_norm": 0.2408117800951004,
"learning_rate": 9.34144e-05,
"loss": 0.8528,
"step": 33320
},
{
"epoch": 0.53344,
"grad_norm": 0.20471978187561035,
"learning_rate": 9.33504e-05,
"loss": 0.8313,
"step": 33340
},
{
"epoch": 0.53376,
"grad_norm": 0.24219338595867157,
"learning_rate": 9.32864e-05,
"loss": 0.8821,
"step": 33360
},
{
"epoch": 0.53408,
"grad_norm": 0.24901708960533142,
"learning_rate": 9.322240000000001e-05,
"loss": 0.9017,
"step": 33380
},
{
"epoch": 0.5344,
"grad_norm": 0.2642413079738617,
"learning_rate": 9.31584e-05,
"loss": 0.8627,
"step": 33400
},
{
"epoch": 0.53472,
"grad_norm": 0.20527620613574982,
"learning_rate": 9.30944e-05,
"loss": 0.8729,
"step": 33420
},
{
"epoch": 0.53504,
"grad_norm": 0.2573811411857605,
"learning_rate": 9.303040000000001e-05,
"loss": 0.8767,
"step": 33440
},
{
"epoch": 0.53536,
"grad_norm": 0.2389804571866989,
"learning_rate": 9.29664e-05,
"loss": 0.8553,
"step": 33460
},
{
"epoch": 0.53568,
"grad_norm": 0.25284579396247864,
"learning_rate": 9.29024e-05,
"loss": 0.9157,
"step": 33480
},
{
"epoch": 0.536,
"grad_norm": 0.24692294001579285,
"learning_rate": 9.283840000000001e-05,
"loss": 0.8612,
"step": 33500
},
{
"epoch": 0.53632,
"grad_norm": 0.23540472984313965,
"learning_rate": 9.27744e-05,
"loss": 0.9049,
"step": 33520
},
{
"epoch": 0.53664,
"grad_norm": 0.23751689493656158,
"learning_rate": 9.271040000000001e-05,
"loss": 0.837,
"step": 33540
},
{
"epoch": 0.53696,
"grad_norm": 0.24152640998363495,
"learning_rate": 9.26464e-05,
"loss": 0.8772,
"step": 33560
},
{
"epoch": 0.53728,
"grad_norm": 0.2199302613735199,
"learning_rate": 9.25824e-05,
"loss": 0.9278,
"step": 33580
},
{
"epoch": 0.5376,
"grad_norm": 0.24747338891029358,
"learning_rate": 9.25184e-05,
"loss": 0.8802,
"step": 33600
},
{
"epoch": 0.53792,
"grad_norm": 0.21488319337368011,
"learning_rate": 9.245440000000001e-05,
"loss": 0.9052,
"step": 33620
},
{
"epoch": 0.53824,
"grad_norm": 0.2535870671272278,
"learning_rate": 9.23904e-05,
"loss": 0.8781,
"step": 33640
},
{
"epoch": 0.53856,
"grad_norm": 0.2381758987903595,
"learning_rate": 9.23264e-05,
"loss": 0.8088,
"step": 33660
},
{
"epoch": 0.53888,
"grad_norm": 0.2485072910785675,
"learning_rate": 9.226560000000001e-05,
"loss": 0.8592,
"step": 33680
},
{
"epoch": 0.5392,
"grad_norm": 0.21843871474266052,
"learning_rate": 9.22016e-05,
"loss": 0.8427,
"step": 33700
},
{
"epoch": 0.53952,
"grad_norm": 0.2665683627128601,
"learning_rate": 9.21376e-05,
"loss": 0.879,
"step": 33720
},
{
"epoch": 0.53984,
"grad_norm": 0.22822092473506927,
"learning_rate": 9.20736e-05,
"loss": 0.8936,
"step": 33740
},
{
"epoch": 0.54016,
"grad_norm": 0.22950272262096405,
"learning_rate": 9.200960000000001e-05,
"loss": 0.838,
"step": 33760
},
{
"epoch": 0.54048,
"grad_norm": 0.2490173578262329,
"learning_rate": 9.19456e-05,
"loss": 0.8914,
"step": 33780
},
{
"epoch": 0.5408,
"grad_norm": 0.2716629207134247,
"learning_rate": 9.18816e-05,
"loss": 0.8936,
"step": 33800
},
{
"epoch": 0.54112,
"grad_norm": 0.28845784068107605,
"learning_rate": 9.18176e-05,
"loss": 0.9278,
"step": 33820
},
{
"epoch": 0.54144,
"grad_norm": 0.2092408537864685,
"learning_rate": 9.17536e-05,
"loss": 0.8818,
"step": 33840
},
{
"epoch": 0.54176,
"grad_norm": 0.19223183393478394,
"learning_rate": 9.168960000000001e-05,
"loss": 0.8626,
"step": 33860
},
{
"epoch": 0.54208,
"grad_norm": 0.2201627492904663,
"learning_rate": 9.16256e-05,
"loss": 0.8946,
"step": 33880
},
{
"epoch": 0.5424,
"grad_norm": 0.22081099450588226,
"learning_rate": 9.15616e-05,
"loss": 0.8691,
"step": 33900
},
{
"epoch": 0.54272,
"grad_norm": 0.229042649269104,
"learning_rate": 9.149760000000001e-05,
"loss": 0.856,
"step": 33920
},
{
"epoch": 0.54304,
"grad_norm": 0.23319824039936066,
"learning_rate": 9.14336e-05,
"loss": 0.8748,
"step": 33940
},
{
"epoch": 0.54336,
"grad_norm": 0.21704263985157013,
"learning_rate": 9.13696e-05,
"loss": 0.8736,
"step": 33960
},
{
"epoch": 0.54368,
"grad_norm": 0.2763427197933197,
"learning_rate": 9.130560000000001e-05,
"loss": 0.8392,
"step": 33980
},
{
"epoch": 0.544,
"grad_norm": 0.24619615077972412,
"learning_rate": 9.12416e-05,
"loss": 0.9046,
"step": 34000
},
{
"epoch": 0.54432,
"grad_norm": 0.2581463158130646,
"learning_rate": 9.117760000000001e-05,
"loss": 0.8873,
"step": 34020
},
{
"epoch": 0.54464,
"grad_norm": 0.25280770659446716,
"learning_rate": 9.11136e-05,
"loss": 0.9039,
"step": 34040
},
{
"epoch": 0.54496,
"grad_norm": 0.22729690372943878,
"learning_rate": 9.10496e-05,
"loss": 0.9558,
"step": 34060
},
{
"epoch": 0.54528,
"grad_norm": 0.25227198004722595,
"learning_rate": 9.09856e-05,
"loss": 0.8474,
"step": 34080
},
{
"epoch": 0.5456,
"grad_norm": 0.22505003213882446,
"learning_rate": 9.092160000000001e-05,
"loss": 0.8884,
"step": 34100
},
{
"epoch": 0.54592,
"grad_norm": 0.19984784722328186,
"learning_rate": 9.085760000000002e-05,
"loss": 0.864,
"step": 34120
},
{
"epoch": 0.54624,
"grad_norm": 0.22763599455356598,
"learning_rate": 9.07936e-05,
"loss": 0.8398,
"step": 34140
},
{
"epoch": 0.54656,
"grad_norm": 0.2618582248687744,
"learning_rate": 9.07296e-05,
"loss": 0.9464,
"step": 34160
},
{
"epoch": 0.54688,
"grad_norm": 0.2371044158935547,
"learning_rate": 9.06656e-05,
"loss": 0.8829,
"step": 34180
},
{
"epoch": 0.5472,
"grad_norm": 0.21414311230182648,
"learning_rate": 9.060160000000001e-05,
"loss": 0.8997,
"step": 34200
},
{
"epoch": 0.54752,
"grad_norm": 0.24445602297782898,
"learning_rate": 9.05376e-05,
"loss": 0.8741,
"step": 34220
},
{
"epoch": 0.54784,
"grad_norm": 0.22683311998844147,
"learning_rate": 9.04736e-05,
"loss": 0.905,
"step": 34240
},
{
"epoch": 0.54816,
"grad_norm": 0.23930495977401733,
"learning_rate": 9.040960000000001e-05,
"loss": 0.8318,
"step": 34260
},
{
"epoch": 0.54848,
"grad_norm": 0.2092512547969818,
"learning_rate": 9.03456e-05,
"loss": 0.9077,
"step": 34280
},
{
"epoch": 0.5488,
"grad_norm": 0.24851293861865997,
"learning_rate": 9.02816e-05,
"loss": 0.8201,
"step": 34300
},
{
"epoch": 0.54912,
"grad_norm": 0.25093013048171997,
"learning_rate": 9.021760000000001e-05,
"loss": 0.9003,
"step": 34320
},
{
"epoch": 0.54944,
"grad_norm": 0.22838135063648224,
"learning_rate": 9.01536e-05,
"loss": 0.8897,
"step": 34340
},
{
"epoch": 0.54976,
"grad_norm": 0.23337380588054657,
"learning_rate": 9.008960000000001e-05,
"loss": 0.8721,
"step": 34360
},
{
"epoch": 0.55008,
"grad_norm": 0.2049025148153305,
"learning_rate": 9.00256e-05,
"loss": 0.8554,
"step": 34380
},
{
"epoch": 0.5504,
"grad_norm": 0.24730657041072845,
"learning_rate": 8.99616e-05,
"loss": 0.8799,
"step": 34400
},
{
"epoch": 0.55072,
"grad_norm": 0.24854011833667755,
"learning_rate": 8.98976e-05,
"loss": 0.7941,
"step": 34420
},
{
"epoch": 0.55104,
"grad_norm": 0.2421095371246338,
"learning_rate": 8.98336e-05,
"loss": 0.874,
"step": 34440
},
{
"epoch": 0.55136,
"grad_norm": 0.23288048803806305,
"learning_rate": 8.976960000000002e-05,
"loss": 0.8883,
"step": 34460
},
{
"epoch": 0.55168,
"grad_norm": 0.227716326713562,
"learning_rate": 8.97056e-05,
"loss": 0.9004,
"step": 34480
},
{
"epoch": 0.552,
"grad_norm": 0.24987129867076874,
"learning_rate": 8.96416e-05,
"loss": 0.8758,
"step": 34500
},
{
"epoch": 0.55232,
"grad_norm": 0.24900184571743011,
"learning_rate": 8.95776e-05,
"loss": 0.8657,
"step": 34520
},
{
"epoch": 0.55264,
"grad_norm": 0.2685990035533905,
"learning_rate": 8.951360000000001e-05,
"loss": 0.8833,
"step": 34540
},
{
"epoch": 0.55296,
"grad_norm": 0.23856236040592194,
"learning_rate": 8.94496e-05,
"loss": 0.9229,
"step": 34560
},
{
"epoch": 0.55328,
"grad_norm": 0.24043366312980652,
"learning_rate": 8.93856e-05,
"loss": 0.8985,
"step": 34580
},
{
"epoch": 0.5536,
"grad_norm": 0.26800286769866943,
"learning_rate": 8.932160000000001e-05,
"loss": 0.9129,
"step": 34600
},
{
"epoch": 0.55392,
"grad_norm": 0.2030034214258194,
"learning_rate": 8.92576e-05,
"loss": 0.8625,
"step": 34620
},
{
"epoch": 0.55424,
"grad_norm": 0.21988297998905182,
"learning_rate": 8.91936e-05,
"loss": 0.8638,
"step": 34640
},
{
"epoch": 0.55456,
"grad_norm": 0.24161766469478607,
"learning_rate": 8.912960000000001e-05,
"loss": 0.9061,
"step": 34660
},
{
"epoch": 0.55488,
"grad_norm": 0.24580594897270203,
"learning_rate": 8.90656e-05,
"loss": 0.8638,
"step": 34680
},
{
"epoch": 0.5552,
"grad_norm": 0.23829227685928345,
"learning_rate": 8.900160000000001e-05,
"loss": 0.8996,
"step": 34700
},
{
"epoch": 0.55552,
"grad_norm": 0.2329644411802292,
"learning_rate": 8.893760000000001e-05,
"loss": 0.8453,
"step": 34720
},
{
"epoch": 0.55584,
"grad_norm": 0.22091227769851685,
"learning_rate": 8.88736e-05,
"loss": 0.8795,
"step": 34740
},
{
"epoch": 0.55616,
"grad_norm": 0.21813109517097473,
"learning_rate": 8.88096e-05,
"loss": 0.8929,
"step": 34760
},
{
"epoch": 0.55648,
"grad_norm": 0.2232077419757843,
"learning_rate": 8.87456e-05,
"loss": 0.8585,
"step": 34780
},
{
"epoch": 0.5568,
"grad_norm": 0.27467960119247437,
"learning_rate": 8.868160000000002e-05,
"loss": 0.9287,
"step": 34800
},
{
"epoch": 0.55712,
"grad_norm": 0.22533021867275238,
"learning_rate": 8.86176e-05,
"loss": 0.8408,
"step": 34820
},
{
"epoch": 0.55744,
"grad_norm": 0.2704051733016968,
"learning_rate": 8.85536e-05,
"loss": 0.8606,
"step": 34840
},
{
"epoch": 0.55776,
"grad_norm": 0.2197883278131485,
"learning_rate": 8.84896e-05,
"loss": 0.8271,
"step": 34860
},
{
"epoch": 0.55808,
"grad_norm": 0.2424239218235016,
"learning_rate": 8.842560000000001e-05,
"loss": 0.9006,
"step": 34880
},
{
"epoch": 0.5584,
"grad_norm": 0.21460562944412231,
"learning_rate": 8.83616e-05,
"loss": 0.8882,
"step": 34900
},
{
"epoch": 0.55872,
"grad_norm": 0.2590295076370239,
"learning_rate": 8.82976e-05,
"loss": 0.906,
"step": 34920
},
{
"epoch": 0.55904,
"grad_norm": 0.2667274475097656,
"learning_rate": 8.823360000000001e-05,
"loss": 0.8522,
"step": 34940
},
{
"epoch": 0.55936,
"grad_norm": 0.20394787192344666,
"learning_rate": 8.81696e-05,
"loss": 0.9169,
"step": 34960
},
{
"epoch": 0.55968,
"grad_norm": 0.2314232587814331,
"learning_rate": 8.81056e-05,
"loss": 0.847,
"step": 34980
},
{
"epoch": 0.56,
"grad_norm": 0.25091394782066345,
"learning_rate": 8.804160000000001e-05,
"loss": 0.9109,
"step": 35000
},
{
"epoch": 0.56032,
"grad_norm": 0.26859068870544434,
"learning_rate": 8.79776e-05,
"loss": 0.879,
"step": 35020
},
{
"epoch": 0.56064,
"grad_norm": 0.2730168104171753,
"learning_rate": 8.791360000000001e-05,
"loss": 0.8548,
"step": 35040
},
{
"epoch": 0.56096,
"grad_norm": 0.21965494751930237,
"learning_rate": 8.78496e-05,
"loss": 0.8965,
"step": 35060
},
{
"epoch": 0.56128,
"grad_norm": 0.2608742117881775,
"learning_rate": 8.77856e-05,
"loss": 0.9028,
"step": 35080
},
{
"epoch": 0.5616,
"grad_norm": 0.19259628653526306,
"learning_rate": 8.77216e-05,
"loss": 0.8716,
"step": 35100
},
{
"epoch": 0.56192,
"grad_norm": 0.2763505280017853,
"learning_rate": 8.766080000000001e-05,
"loss": 0.8783,
"step": 35120
},
{
"epoch": 0.56224,
"grad_norm": 0.23790399730205536,
"learning_rate": 8.75968e-05,
"loss": 0.8931,
"step": 35140
},
{
"epoch": 0.56256,
"grad_norm": 0.21415534615516663,
"learning_rate": 8.75328e-05,
"loss": 0.8931,
"step": 35160
},
{
"epoch": 0.56288,
"grad_norm": 0.23109756410121918,
"learning_rate": 8.746880000000001e-05,
"loss": 0.9333,
"step": 35180
},
{
"epoch": 0.5632,
"grad_norm": 0.22907577455043793,
"learning_rate": 8.740480000000001e-05,
"loss": 0.8739,
"step": 35200
},
{
"epoch": 0.56352,
"grad_norm": 0.22961348295211792,
"learning_rate": 8.73408e-05,
"loss": 0.883,
"step": 35220
},
{
"epoch": 0.56384,
"grad_norm": 0.19997790455818176,
"learning_rate": 8.72768e-05,
"loss": 0.8698,
"step": 35240
},
{
"epoch": 0.56416,
"grad_norm": 0.22530966997146606,
"learning_rate": 8.72128e-05,
"loss": 0.8538,
"step": 35260
},
{
"epoch": 0.56448,
"grad_norm": 0.2729052007198334,
"learning_rate": 8.71488e-05,
"loss": 0.8882,
"step": 35280
},
{
"epoch": 0.5648,
"grad_norm": 0.23470643162727356,
"learning_rate": 8.70848e-05,
"loss": 0.8615,
"step": 35300
},
{
"epoch": 0.56512,
"grad_norm": 0.24945303797721863,
"learning_rate": 8.70208e-05,
"loss": 0.8993,
"step": 35320
},
{
"epoch": 0.56544,
"grad_norm": 0.18795213103294373,
"learning_rate": 8.69568e-05,
"loss": 0.8676,
"step": 35340
},
{
"epoch": 0.56576,
"grad_norm": 0.2676798403263092,
"learning_rate": 8.689280000000001e-05,
"loss": 0.9417,
"step": 35360
},
{
"epoch": 0.56608,
"grad_norm": 0.2519102096557617,
"learning_rate": 8.68288e-05,
"loss": 0.8696,
"step": 35380
},
{
"epoch": 0.5664,
"grad_norm": 0.2239411622285843,
"learning_rate": 8.67648e-05,
"loss": 0.8518,
"step": 35400
},
{
"epoch": 0.56672,
"grad_norm": 0.25220707058906555,
"learning_rate": 8.670080000000001e-05,
"loss": 0.8406,
"step": 35420
},
{
"epoch": 0.56704,
"grad_norm": 0.24866600334644318,
"learning_rate": 8.66368e-05,
"loss": 0.8726,
"step": 35440
},
{
"epoch": 0.56736,
"grad_norm": 0.23390260338783264,
"learning_rate": 8.657280000000001e-05,
"loss": 0.8616,
"step": 35460
},
{
"epoch": 0.56768,
"grad_norm": 0.23465383052825928,
"learning_rate": 8.650880000000001e-05,
"loss": 0.8897,
"step": 35480
},
{
"epoch": 0.568,
"grad_norm": 0.23219779133796692,
"learning_rate": 8.64448e-05,
"loss": 0.892,
"step": 35500
},
{
"epoch": 0.56832,
"grad_norm": 0.23238353431224823,
"learning_rate": 8.638080000000001e-05,
"loss": 0.8887,
"step": 35520
},
{
"epoch": 0.56864,
"grad_norm": 0.23893140256404877,
"learning_rate": 8.631680000000001e-05,
"loss": 0.868,
"step": 35540
},
{
"epoch": 0.56896,
"grad_norm": 0.23007500171661377,
"learning_rate": 8.62528e-05,
"loss": 0.8564,
"step": 35560
},
{
"epoch": 0.56928,
"grad_norm": 0.2285536676645279,
"learning_rate": 8.61888e-05,
"loss": 0.8967,
"step": 35580
},
{
"epoch": 0.5696,
"grad_norm": 0.2179461121559143,
"learning_rate": 8.61248e-05,
"loss": 0.8468,
"step": 35600
},
{
"epoch": 0.56992,
"grad_norm": 0.26246190071105957,
"learning_rate": 8.606080000000001e-05,
"loss": 0.866,
"step": 35620
},
{
"epoch": 0.57024,
"grad_norm": 0.2367112636566162,
"learning_rate": 8.59968e-05,
"loss": 0.8862,
"step": 35640
},
{
"epoch": 0.57056,
"grad_norm": 0.26484036445617676,
"learning_rate": 8.59328e-05,
"loss": 0.8992,
"step": 35660
},
{
"epoch": 0.57088,
"grad_norm": 0.2554921507835388,
"learning_rate": 8.58688e-05,
"loss": 0.9095,
"step": 35680
},
{
"epoch": 0.5712,
"grad_norm": 0.20814883708953857,
"learning_rate": 8.580480000000001e-05,
"loss": 0.8306,
"step": 35700
},
{
"epoch": 0.57152,
"grad_norm": 0.22327591478824615,
"learning_rate": 8.57408e-05,
"loss": 0.8366,
"step": 35720
},
{
"epoch": 0.57184,
"grad_norm": 0.2438846230506897,
"learning_rate": 8.56768e-05,
"loss": 0.8424,
"step": 35740
},
{
"epoch": 0.57216,
"grad_norm": 0.20804153382778168,
"learning_rate": 8.561280000000001e-05,
"loss": 0.8852,
"step": 35760
},
{
"epoch": 0.57248,
"grad_norm": 0.22227492928504944,
"learning_rate": 8.55488e-05,
"loss": 0.8586,
"step": 35780
},
{
"epoch": 0.5728,
"grad_norm": 0.28952184319496155,
"learning_rate": 8.548480000000001e-05,
"loss": 0.9095,
"step": 35800
},
{
"epoch": 0.57312,
"grad_norm": 0.22480730712413788,
"learning_rate": 8.542080000000001e-05,
"loss": 0.8834,
"step": 35820
},
{
"epoch": 0.57344,
"grad_norm": 0.19908693432807922,
"learning_rate": 8.53568e-05,
"loss": 0.84,
"step": 35840
},
{
"epoch": 0.57376,
"grad_norm": 0.24293170869350433,
"learning_rate": 8.529280000000001e-05,
"loss": 0.9019,
"step": 35860
},
{
"epoch": 0.57408,
"grad_norm": 0.1839456409215927,
"learning_rate": 8.52288e-05,
"loss": 0.923,
"step": 35880
},
{
"epoch": 0.5744,
"grad_norm": 0.23249760270118713,
"learning_rate": 8.51648e-05,
"loss": 0.8667,
"step": 35900
},
{
"epoch": 0.57472,
"grad_norm": 0.2784735858440399,
"learning_rate": 8.51008e-05,
"loss": 0.9178,
"step": 35920
},
{
"epoch": 0.57504,
"grad_norm": 0.23797404766082764,
"learning_rate": 8.50368e-05,
"loss": 0.9291,
"step": 35940
},
{
"epoch": 0.57536,
"grad_norm": 0.241110160946846,
"learning_rate": 8.497280000000001e-05,
"loss": 0.8447,
"step": 35960
},
{
"epoch": 0.57568,
"grad_norm": 0.2260485291481018,
"learning_rate": 8.49088e-05,
"loss": 0.8532,
"step": 35980
},
{
"epoch": 0.576,
"grad_norm": 0.22092276811599731,
"learning_rate": 8.48448e-05,
"loss": 0.8768,
"step": 36000
},
{
"epoch": 0.57632,
"grad_norm": 0.2721387445926666,
"learning_rate": 8.47808e-05,
"loss": 0.863,
"step": 36020
},
{
"epoch": 0.57664,
"grad_norm": 0.21206919848918915,
"learning_rate": 8.471680000000001e-05,
"loss": 0.8636,
"step": 36040
},
{
"epoch": 0.57696,
"grad_norm": 0.23496432602405548,
"learning_rate": 8.46528e-05,
"loss": 0.8745,
"step": 36060
},
{
"epoch": 0.57728,
"grad_norm": 0.2214774489402771,
"learning_rate": 8.45888e-05,
"loss": 0.873,
"step": 36080
},
{
"epoch": 0.5776,
"grad_norm": 0.2665559649467468,
"learning_rate": 8.452480000000001e-05,
"loss": 0.9061,
"step": 36100
},
{
"epoch": 0.57792,
"grad_norm": 0.25471359491348267,
"learning_rate": 8.44608e-05,
"loss": 0.8592,
"step": 36120
},
{
"epoch": 0.57824,
"grad_norm": 0.24169199168682098,
"learning_rate": 8.439680000000001e-05,
"loss": 0.8867,
"step": 36140
},
{
"epoch": 0.57856,
"grad_norm": 0.25281229615211487,
"learning_rate": 8.433280000000001e-05,
"loss": 0.8702,
"step": 36160
},
{
"epoch": 0.57888,
"grad_norm": 0.2565683424472809,
"learning_rate": 8.42688e-05,
"loss": 0.8812,
"step": 36180
},
{
"epoch": 0.5792,
"grad_norm": 0.2228873074054718,
"learning_rate": 8.420480000000001e-05,
"loss": 0.8556,
"step": 36200
},
{
"epoch": 0.57952,
"grad_norm": 0.23793266713619232,
"learning_rate": 8.41408e-05,
"loss": 0.8878,
"step": 36220
},
{
"epoch": 0.57984,
"grad_norm": 0.24681511521339417,
"learning_rate": 8.40768e-05,
"loss": 0.9124,
"step": 36240
},
{
"epoch": 0.58016,
"grad_norm": 0.2208309918642044,
"learning_rate": 8.40128e-05,
"loss": 0.8988,
"step": 36260
},
{
"epoch": 0.58048,
"grad_norm": 0.2527472972869873,
"learning_rate": 8.39488e-05,
"loss": 0.886,
"step": 36280
},
{
"epoch": 0.5808,
"grad_norm": 0.23893190920352936,
"learning_rate": 8.388480000000001e-05,
"loss": 0.8487,
"step": 36300
},
{
"epoch": 0.58112,
"grad_norm": 0.22206924855709076,
"learning_rate": 8.38208e-05,
"loss": 0.9371,
"step": 36320
},
{
"epoch": 0.58144,
"grad_norm": 0.28712591528892517,
"learning_rate": 8.37568e-05,
"loss": 0.8959,
"step": 36340
},
{
"epoch": 0.58176,
"grad_norm": 0.20586076378822327,
"learning_rate": 8.36928e-05,
"loss": 0.8394,
"step": 36360
},
{
"epoch": 0.58208,
"grad_norm": 0.20877981185913086,
"learning_rate": 8.362880000000001e-05,
"loss": 0.8793,
"step": 36380
},
{
"epoch": 0.5824,
"grad_norm": 0.2150806486606598,
"learning_rate": 8.35648e-05,
"loss": 0.8575,
"step": 36400
},
{
"epoch": 0.58272,
"grad_norm": 0.2607544958591461,
"learning_rate": 8.35008e-05,
"loss": 0.8492,
"step": 36420
},
{
"epoch": 0.58304,
"grad_norm": 0.21660766005516052,
"learning_rate": 8.343680000000001e-05,
"loss": 0.8599,
"step": 36440
},
{
"epoch": 0.58336,
"grad_norm": 0.19799566268920898,
"learning_rate": 8.33728e-05,
"loss": 0.8754,
"step": 36460
},
{
"epoch": 0.58368,
"grad_norm": 0.2376536726951599,
"learning_rate": 8.330880000000001e-05,
"loss": 0.8645,
"step": 36480
},
{
"epoch": 0.584,
"grad_norm": 0.2775854766368866,
"learning_rate": 8.32448e-05,
"loss": 0.8341,
"step": 36500
},
{
"epoch": 0.58432,
"grad_norm": 0.2808210849761963,
"learning_rate": 8.31808e-05,
"loss": 0.9145,
"step": 36520
},
{
"epoch": 0.58464,
"grad_norm": 0.2942677140235901,
"learning_rate": 8.311680000000001e-05,
"loss": 0.8645,
"step": 36540
},
{
"epoch": 0.58496,
"grad_norm": 0.24506719410419464,
"learning_rate": 8.30528e-05,
"loss": 0.9266,
"step": 36560
},
{
"epoch": 0.58528,
"grad_norm": 0.23690511286258698,
"learning_rate": 8.29888e-05,
"loss": 0.8701,
"step": 36580
},
{
"epoch": 0.5856,
"grad_norm": 0.2386239618062973,
"learning_rate": 8.29248e-05,
"loss": 0.894,
"step": 36600
},
{
"epoch": 0.58592,
"grad_norm": 0.23407141864299774,
"learning_rate": 8.28608e-05,
"loss": 0.8795,
"step": 36620
},
{
"epoch": 0.58624,
"grad_norm": 0.2837297320365906,
"learning_rate": 8.279680000000001e-05,
"loss": 0.8844,
"step": 36640
},
{
"epoch": 0.58656,
"grad_norm": 0.18929681181907654,
"learning_rate": 8.27328e-05,
"loss": 0.881,
"step": 36660
},
{
"epoch": 0.58688,
"grad_norm": 0.2538854479789734,
"learning_rate": 8.26688e-05,
"loss": 0.8326,
"step": 36680
},
{
"epoch": 0.5872,
"grad_norm": 0.23304085433483124,
"learning_rate": 8.26048e-05,
"loss": 0.8535,
"step": 36700
},
{
"epoch": 0.58752,
"grad_norm": 0.2668757736682892,
"learning_rate": 8.254080000000001e-05,
"loss": 0.9134,
"step": 36720
},
{
"epoch": 0.58784,
"grad_norm": 0.2611462473869324,
"learning_rate": 8.24768e-05,
"loss": 0.865,
"step": 36740
},
{
"epoch": 0.58816,
"grad_norm": 0.20947878062725067,
"learning_rate": 8.24128e-05,
"loss": 0.8719,
"step": 36760
},
{
"epoch": 0.58848,
"grad_norm": 0.23722214996814728,
"learning_rate": 8.234880000000001e-05,
"loss": 0.8801,
"step": 36780
},
{
"epoch": 0.5888,
"grad_norm": 0.2517217993736267,
"learning_rate": 8.22848e-05,
"loss": 0.929,
"step": 36800
},
{
"epoch": 0.58912,
"grad_norm": 0.21423856914043427,
"learning_rate": 8.222080000000001e-05,
"loss": 0.8452,
"step": 36820
},
{
"epoch": 0.58944,
"grad_norm": 0.224490687251091,
"learning_rate": 8.21568e-05,
"loss": 0.8617,
"step": 36840
},
{
"epoch": 0.58976,
"grad_norm": 0.1989527940750122,
"learning_rate": 8.20928e-05,
"loss": 0.9057,
"step": 36860
},
{
"epoch": 0.59008,
"grad_norm": 0.20397096872329712,
"learning_rate": 8.202880000000001e-05,
"loss": 0.8504,
"step": 36880
},
{
"epoch": 0.5904,
"grad_norm": 0.22488151490688324,
"learning_rate": 8.19648e-05,
"loss": 0.8383,
"step": 36900
},
{
"epoch": 0.59072,
"grad_norm": 0.23070698976516724,
"learning_rate": 8.19008e-05,
"loss": 0.8249,
"step": 36920
},
{
"epoch": 0.59104,
"grad_norm": 0.23242853581905365,
"learning_rate": 8.183680000000001e-05,
"loss": 0.8153,
"step": 36940
},
{
"epoch": 0.59136,
"grad_norm": 0.2361801415681839,
"learning_rate": 8.17728e-05,
"loss": 0.913,
"step": 36960
},
{
"epoch": 0.59168,
"grad_norm": 0.20463335514068604,
"learning_rate": 8.170880000000001e-05,
"loss": 0.8239,
"step": 36980
},
{
"epoch": 0.592,
"grad_norm": 0.2715516984462738,
"learning_rate": 8.16448e-05,
"loss": 0.8807,
"step": 37000
},
{
"epoch": 0.59232,
"grad_norm": 0.260455459356308,
"learning_rate": 8.15808e-05,
"loss": 0.874,
"step": 37020
},
{
"epoch": 0.59264,
"grad_norm": 0.23766261339187622,
"learning_rate": 8.15168e-05,
"loss": 0.8706,
"step": 37040
},
{
"epoch": 0.59296,
"grad_norm": 0.2649995684623718,
"learning_rate": 8.145280000000001e-05,
"loss": 0.8938,
"step": 37060
},
{
"epoch": 0.59328,
"grad_norm": 0.20467999577522278,
"learning_rate": 8.13888e-05,
"loss": 0.8686,
"step": 37080
},
{
"epoch": 0.5936,
"grad_norm": 0.2401207685470581,
"learning_rate": 8.13248e-05,
"loss": 0.8919,
"step": 37100
},
{
"epoch": 0.59392,
"grad_norm": 0.2349073439836502,
"learning_rate": 8.12608e-05,
"loss": 0.8998,
"step": 37120
},
{
"epoch": 0.59424,
"grad_norm": 0.22856348752975464,
"learning_rate": 8.11968e-05,
"loss": 0.8892,
"step": 37140
},
{
"epoch": 0.59456,
"grad_norm": 0.22828173637390137,
"learning_rate": 8.113280000000001e-05,
"loss": 0.8411,
"step": 37160
},
{
"epoch": 0.59488,
"grad_norm": 0.21084712445735931,
"learning_rate": 8.10688e-05,
"loss": 0.8682,
"step": 37180
},
{
"epoch": 0.5952,
"grad_norm": 0.2080162763595581,
"learning_rate": 8.10048e-05,
"loss": 0.9049,
"step": 37200
},
{
"epoch": 0.59552,
"grad_norm": 0.2635326385498047,
"learning_rate": 8.094080000000001e-05,
"loss": 0.8978,
"step": 37220
},
{
"epoch": 0.59584,
"grad_norm": 0.2089320570230484,
"learning_rate": 8.08768e-05,
"loss": 0.9324,
"step": 37240
},
{
"epoch": 0.59616,
"grad_norm": 0.2738167643547058,
"learning_rate": 8.08128e-05,
"loss": 0.8635,
"step": 37260
},
{
"epoch": 0.59648,
"grad_norm": 0.19724565744400024,
"learning_rate": 8.074880000000001e-05,
"loss": 0.8171,
"step": 37280
},
{
"epoch": 0.5968,
"grad_norm": 0.208595871925354,
"learning_rate": 8.06848e-05,
"loss": 0.8812,
"step": 37300
},
{
"epoch": 0.59712,
"grad_norm": 0.22307556867599487,
"learning_rate": 8.062080000000001e-05,
"loss": 0.8931,
"step": 37320
},
{
"epoch": 0.59744,
"grad_norm": 0.23288871347904205,
"learning_rate": 8.05568e-05,
"loss": 0.8764,
"step": 37340
},
{
"epoch": 0.59776,
"grad_norm": 0.228603333234787,
"learning_rate": 8.04928e-05,
"loss": 0.8742,
"step": 37360
},
{
"epoch": 0.59808,
"grad_norm": 0.2657196521759033,
"learning_rate": 8.04288e-05,
"loss": 0.8955,
"step": 37380
},
{
"epoch": 0.5984,
"grad_norm": 0.21586276590824127,
"learning_rate": 8.036480000000001e-05,
"loss": 0.8867,
"step": 37400
},
{
"epoch": 0.59872,
"grad_norm": 0.24376487731933594,
"learning_rate": 8.030080000000002e-05,
"loss": 0.8941,
"step": 37420
},
{
"epoch": 0.59904,
"grad_norm": 0.24991095066070557,
"learning_rate": 8.02368e-05,
"loss": 0.8832,
"step": 37440
},
{
"epoch": 0.59936,
"grad_norm": 0.2291015237569809,
"learning_rate": 8.01728e-05,
"loss": 0.8427,
"step": 37460
},
{
"epoch": 0.59968,
"grad_norm": 0.2516835331916809,
"learning_rate": 8.01088e-05,
"loss": 0.8526,
"step": 37480
},
{
"epoch": 0.6,
"grad_norm": 0.2229020595550537,
"learning_rate": 8.004480000000001e-05,
"loss": 0.8766,
"step": 37500
},
{
"epoch": 0.60032,
"grad_norm": 0.24501658976078033,
"learning_rate": 7.99808e-05,
"loss": 0.9391,
"step": 37520
},
{
"epoch": 0.60064,
"grad_norm": 0.21306291222572327,
"learning_rate": 7.99168e-05,
"loss": 0.8318,
"step": 37540
},
{
"epoch": 0.60096,
"grad_norm": 0.28489160537719727,
"learning_rate": 7.985280000000001e-05,
"loss": 0.8845,
"step": 37560
},
{
"epoch": 0.60128,
"grad_norm": 0.24311847984790802,
"learning_rate": 7.97888e-05,
"loss": 0.8619,
"step": 37580
},
{
"epoch": 0.6016,
"grad_norm": 0.2386752963066101,
"learning_rate": 7.97248e-05,
"loss": 0.8554,
"step": 37600
},
{
"epoch": 0.60192,
"grad_norm": 0.22845840454101562,
"learning_rate": 7.966080000000001e-05,
"loss": 0.8505,
"step": 37620
},
{
"epoch": 0.60224,
"grad_norm": 0.21545352041721344,
"learning_rate": 7.95968e-05,
"loss": 0.8725,
"step": 37640
},
{
"epoch": 0.60256,
"grad_norm": 0.24536795914173126,
"learning_rate": 7.953280000000001e-05,
"loss": 0.9144,
"step": 37660
},
{
"epoch": 0.60288,
"grad_norm": 0.29689520597457886,
"learning_rate": 7.94688e-05,
"loss": 0.8949,
"step": 37680
},
{
"epoch": 0.6032,
"grad_norm": 0.22102287411689758,
"learning_rate": 7.94048e-05,
"loss": 0.8847,
"step": 37700
},
{
"epoch": 0.60352,
"grad_norm": 0.2358902543783188,
"learning_rate": 7.93408e-05,
"loss": 0.8841,
"step": 37720
},
{
"epoch": 0.60384,
"grad_norm": 0.23542214930057526,
"learning_rate": 7.92768e-05,
"loss": 0.8899,
"step": 37740
},
{
"epoch": 0.60416,
"grad_norm": 0.23006409406661987,
"learning_rate": 7.921280000000002e-05,
"loss": 0.824,
"step": 37760
},
{
"epoch": 0.60448,
"grad_norm": 0.29189014434814453,
"learning_rate": 7.91488e-05,
"loss": 0.9226,
"step": 37780
},
{
"epoch": 0.6048,
"grad_norm": 0.23212820291519165,
"learning_rate": 7.90848e-05,
"loss": 0.883,
"step": 37800
},
{
"epoch": 0.60512,
"grad_norm": 0.2882430851459503,
"learning_rate": 7.90208e-05,
"loss": 0.8327,
"step": 37820
},
{
"epoch": 0.60544,
"grad_norm": 0.21909599006175995,
"learning_rate": 7.895680000000001e-05,
"loss": 0.8446,
"step": 37840
},
{
"epoch": 0.60576,
"grad_norm": 0.23385344445705414,
"learning_rate": 7.88928e-05,
"loss": 0.8822,
"step": 37860
},
{
"epoch": 0.60608,
"grad_norm": 0.23137474060058594,
"learning_rate": 7.88288e-05,
"loss": 0.8669,
"step": 37880
},
{
"epoch": 0.6064,
"grad_norm": 0.234337717294693,
"learning_rate": 7.876480000000001e-05,
"loss": 0.9464,
"step": 37900
},
{
"epoch": 0.60672,
"grad_norm": 0.19100037217140198,
"learning_rate": 7.87008e-05,
"loss": 0.8538,
"step": 37920
},
{
"epoch": 0.60704,
"grad_norm": 0.231523334980011,
"learning_rate": 7.86368e-05,
"loss": 0.8869,
"step": 37940
},
{
"epoch": 0.60736,
"grad_norm": 0.19973509013652802,
"learning_rate": 7.857280000000001e-05,
"loss": 0.8765,
"step": 37960
},
{
"epoch": 0.60768,
"grad_norm": 0.2176535725593567,
"learning_rate": 7.85088e-05,
"loss": 0.864,
"step": 37980
},
{
"epoch": 0.608,
"grad_norm": 0.2557581961154938,
"learning_rate": 7.844480000000001e-05,
"loss": 0.8782,
"step": 38000
},
{
"epoch": 0.60832,
"grad_norm": 0.273478627204895,
"learning_rate": 7.83808e-05,
"loss": 0.9086,
"step": 38020
},
{
"epoch": 0.60864,
"grad_norm": 0.26042431592941284,
"learning_rate": 7.83168e-05,
"loss": 0.8562,
"step": 38040
},
{
"epoch": 0.60896,
"grad_norm": 0.23801887035369873,
"learning_rate": 7.82528e-05,
"loss": 0.9028,
"step": 38060
},
{
"epoch": 0.60928,
"grad_norm": 0.22711488604545593,
"learning_rate": 7.81888e-05,
"loss": 0.9305,
"step": 38080
},
{
"epoch": 0.6096,
"grad_norm": 0.19922931492328644,
"learning_rate": 7.812480000000001e-05,
"loss": 0.8843,
"step": 38100
},
{
"epoch": 0.60992,
"grad_norm": 0.20743116736412048,
"learning_rate": 7.80608e-05,
"loss": 0.8837,
"step": 38120
},
{
"epoch": 0.61024,
"grad_norm": 0.2508273422718048,
"learning_rate": 7.79968e-05,
"loss": 0.8681,
"step": 38140
},
{
"epoch": 0.61056,
"grad_norm": 0.24501731991767883,
"learning_rate": 7.79328e-05,
"loss": 0.8787,
"step": 38160
},
{
"epoch": 0.61088,
"grad_norm": 0.258413702249527,
"learning_rate": 7.786880000000001e-05,
"loss": 0.9128,
"step": 38180
},
{
"epoch": 0.6112,
"grad_norm": 0.22945356369018555,
"learning_rate": 7.78048e-05,
"loss": 0.8835,
"step": 38200
},
{
"epoch": 0.61152,
"grad_norm": 0.2546984553337097,
"learning_rate": 7.77408e-05,
"loss": 0.8806,
"step": 38220
},
{
"epoch": 0.61184,
"grad_norm": 0.2060491144657135,
"learning_rate": 7.767680000000001e-05,
"loss": 0.8551,
"step": 38240
},
{
"epoch": 0.61216,
"grad_norm": 0.21388421952724457,
"learning_rate": 7.76128e-05,
"loss": 0.8576,
"step": 38260
},
{
"epoch": 0.61248,
"grad_norm": 0.22443564236164093,
"learning_rate": 7.75488e-05,
"loss": 0.8807,
"step": 38280
},
{
"epoch": 0.6128,
"grad_norm": 0.24119791388511658,
"learning_rate": 7.748480000000001e-05,
"loss": 0.9135,
"step": 38300
},
{
"epoch": 0.61312,
"grad_norm": 0.22703927755355835,
"learning_rate": 7.74208e-05,
"loss": 0.9121,
"step": 38320
},
{
"epoch": 0.61344,
"grad_norm": 0.23478037118911743,
"learning_rate": 7.735680000000001e-05,
"loss": 0.9024,
"step": 38340
},
{
"epoch": 0.61376,
"grad_norm": 0.22331403195858002,
"learning_rate": 7.729279999999999e-05,
"loss": 0.9034,
"step": 38360
},
{
"epoch": 0.61408,
"grad_norm": 0.22578079998493195,
"learning_rate": 7.72288e-05,
"loss": 0.855,
"step": 38380
},
{
"epoch": 0.6144,
"grad_norm": 0.22246988117694855,
"learning_rate": 7.716480000000001e-05,
"loss": 0.8515,
"step": 38400
},
{
"epoch": 0.61472,
"grad_norm": 0.20506010949611664,
"learning_rate": 7.71008e-05,
"loss": 0.8626,
"step": 38420
},
{
"epoch": 0.61504,
"grad_norm": 0.23380227386951447,
"learning_rate": 7.703680000000001e-05,
"loss": 0.8963,
"step": 38440
},
{
"epoch": 0.61536,
"grad_norm": 0.2194732278585434,
"learning_rate": 7.69728e-05,
"loss": 0.835,
"step": 38460
},
{
"epoch": 0.61568,
"grad_norm": 0.20710738003253937,
"learning_rate": 7.69088e-05,
"loss": 0.8783,
"step": 38480
},
{
"epoch": 0.616,
"grad_norm": 0.23248372972011566,
"learning_rate": 7.68448e-05,
"loss": 0.8603,
"step": 38500
},
{
"epoch": 0.61632,
"grad_norm": 0.21198534965515137,
"learning_rate": 7.678080000000001e-05,
"loss": 0.8963,
"step": 38520
},
{
"epoch": 0.61664,
"grad_norm": 0.2391924113035202,
"learning_rate": 7.67168e-05,
"loss": 0.8964,
"step": 38540
},
{
"epoch": 0.61696,
"grad_norm": 0.21630340814590454,
"learning_rate": 7.66528e-05,
"loss": 0.8468,
"step": 38560
},
{
"epoch": 0.61728,
"grad_norm": 0.22936932742595673,
"learning_rate": 7.658880000000001e-05,
"loss": 0.8638,
"step": 38580
},
{
"epoch": 0.6176,
"grad_norm": 0.15402059257030487,
"learning_rate": 7.65248e-05,
"loss": 0.8881,
"step": 38600
},
{
"epoch": 0.61792,
"grad_norm": 0.18893806636333466,
"learning_rate": 7.64608e-05,
"loss": 0.9137,
"step": 38620
},
{
"epoch": 0.61824,
"grad_norm": 0.19027528166770935,
"learning_rate": 7.639680000000001e-05,
"loss": 0.8402,
"step": 38640
},
{
"epoch": 0.61856,
"grad_norm": 0.2610226571559906,
"learning_rate": 7.63328e-05,
"loss": 0.8644,
"step": 38660
},
{
"epoch": 0.61888,
"grad_norm": 0.22377745807170868,
"learning_rate": 7.626880000000001e-05,
"loss": 0.8621,
"step": 38680
},
{
"epoch": 0.6192,
"grad_norm": 0.2550973892211914,
"learning_rate": 7.620479999999999e-05,
"loss": 0.8982,
"step": 38700
},
{
"epoch": 0.61952,
"grad_norm": 0.21758711338043213,
"learning_rate": 7.61408e-05,
"loss": 0.8616,
"step": 38720
},
{
"epoch": 0.61984,
"grad_norm": 0.2632993459701538,
"learning_rate": 7.607680000000001e-05,
"loss": 0.8565,
"step": 38740
},
{
"epoch": 0.62016,
"grad_norm": 0.26186972856521606,
"learning_rate": 7.60128e-05,
"loss": 0.883,
"step": 38760
},
{
"epoch": 0.62048,
"grad_norm": 0.22264538705348969,
"learning_rate": 7.594880000000001e-05,
"loss": 0.8625,
"step": 38780
},
{
"epoch": 0.6208,
"grad_norm": 0.2717147171497345,
"learning_rate": 7.58848e-05,
"loss": 0.9014,
"step": 38800
},
{
"epoch": 0.62112,
"grad_norm": 0.2638401687145233,
"learning_rate": 7.58208e-05,
"loss": 0.933,
"step": 38820
},
{
"epoch": 0.62144,
"grad_norm": 0.20931828022003174,
"learning_rate": 7.57568e-05,
"loss": 0.8697,
"step": 38840
},
{
"epoch": 0.62176,
"grad_norm": 0.22345200181007385,
"learning_rate": 7.569280000000001e-05,
"loss": 0.8404,
"step": 38860
},
{
"epoch": 0.62208,
"grad_norm": 0.21052759885787964,
"learning_rate": 7.56288e-05,
"loss": 0.8458,
"step": 38880
},
{
"epoch": 0.6224,
"grad_norm": 0.24614858627319336,
"learning_rate": 7.55648e-05,
"loss": 0.9028,
"step": 38900
},
{
"epoch": 0.62272,
"grad_norm": 0.276304692029953,
"learning_rate": 7.550080000000001e-05,
"loss": 0.9097,
"step": 38920
},
{
"epoch": 0.62304,
"grad_norm": 0.27636557817459106,
"learning_rate": 7.54368e-05,
"loss": 0.8969,
"step": 38940
},
{
"epoch": 0.62336,
"grad_norm": 0.2127619832754135,
"learning_rate": 7.53728e-05,
"loss": 0.9,
"step": 38960
},
{
"epoch": 0.62368,
"grad_norm": 0.23187151551246643,
"learning_rate": 7.53088e-05,
"loss": 0.9081,
"step": 38980
},
{
"epoch": 0.624,
"grad_norm": 0.2539467215538025,
"learning_rate": 7.52448e-05,
"loss": 0.8887,
"step": 39000
},
{
"epoch": 0.62432,
"grad_norm": 0.30993226170539856,
"learning_rate": 7.518080000000001e-05,
"loss": 0.9242,
"step": 39020
},
{
"epoch": 0.62464,
"grad_norm": 0.18882299959659576,
"learning_rate": 7.511679999999999e-05,
"loss": 0.8939,
"step": 39040
},
{
"epoch": 0.62496,
"grad_norm": 0.21862895786762238,
"learning_rate": 7.50528e-05,
"loss": 0.8707,
"step": 39060
},
{
"epoch": 0.62528,
"grad_norm": 0.26119473576545715,
"learning_rate": 7.498880000000001e-05,
"loss": 0.8699,
"step": 39080
},
{
"epoch": 0.6256,
"grad_norm": 0.24526222050189972,
"learning_rate": 7.49248e-05,
"loss": 0.8787,
"step": 39100
},
{
"epoch": 0.62592,
"grad_norm": 0.2376752644777298,
"learning_rate": 7.486080000000001e-05,
"loss": 0.9359,
"step": 39120
},
{
"epoch": 0.62624,
"grad_norm": 0.27865490317344666,
"learning_rate": 7.47968e-05,
"loss": 0.8842,
"step": 39140
},
{
"epoch": 0.62656,
"grad_norm": 0.22411134839057922,
"learning_rate": 7.47328e-05,
"loss": 0.9039,
"step": 39160
},
{
"epoch": 0.62688,
"grad_norm": 0.24417252838611603,
"learning_rate": 7.46688e-05,
"loss": 0.915,
"step": 39180
},
{
"epoch": 0.6272,
"grad_norm": 0.21417266130447388,
"learning_rate": 7.460480000000001e-05,
"loss": 0.8408,
"step": 39200
},
{
"epoch": 0.62752,
"grad_norm": 0.21754087507724762,
"learning_rate": 7.45408e-05,
"loss": 0.8663,
"step": 39220
},
{
"epoch": 0.62784,
"grad_norm": 0.2340565174818039,
"learning_rate": 7.44768e-05,
"loss": 0.8353,
"step": 39240
},
{
"epoch": 0.62816,
"grad_norm": 0.25035715103149414,
"learning_rate": 7.441280000000001e-05,
"loss": 0.8729,
"step": 39260
},
{
"epoch": 0.62848,
"grad_norm": 0.2600226104259491,
"learning_rate": 7.43488e-05,
"loss": 0.8794,
"step": 39280
},
{
"epoch": 0.6288,
"grad_norm": 0.27380433678627014,
"learning_rate": 7.42848e-05,
"loss": 0.8578,
"step": 39300
},
{
"epoch": 0.62912,
"grad_norm": 0.18544994294643402,
"learning_rate": 7.42208e-05,
"loss": 0.9026,
"step": 39320
},
{
"epoch": 0.62944,
"grad_norm": 0.23277778923511505,
"learning_rate": 7.41568e-05,
"loss": 0.8759,
"step": 39340
},
{
"epoch": 0.62976,
"grad_norm": 0.21839508414268494,
"learning_rate": 7.409280000000001e-05,
"loss": 0.884,
"step": 39360
},
{
"epoch": 0.63008,
"grad_norm": 0.20547005534172058,
"learning_rate": 7.402879999999999e-05,
"loss": 0.8755,
"step": 39380
},
{
"epoch": 0.6304,
"grad_norm": 0.21153508126735687,
"learning_rate": 7.39648e-05,
"loss": 0.8551,
"step": 39400
},
{
"epoch": 0.63072,
"grad_norm": 0.22079287469387054,
"learning_rate": 7.390080000000001e-05,
"loss": 0.8424,
"step": 39420
},
{
"epoch": 0.63104,
"grad_norm": 0.25068333745002747,
"learning_rate": 7.38368e-05,
"loss": 0.893,
"step": 39440
},
{
"epoch": 0.63136,
"grad_norm": 0.18888238072395325,
"learning_rate": 7.377280000000001e-05,
"loss": 0.8803,
"step": 39460
},
{
"epoch": 0.63168,
"grad_norm": 0.258759081363678,
"learning_rate": 7.37088e-05,
"loss": 0.921,
"step": 39480
},
{
"epoch": 0.632,
"grad_norm": 0.22620119154453278,
"learning_rate": 7.36448e-05,
"loss": 0.8552,
"step": 39500
},
{
"epoch": 0.63232,
"grad_norm": 0.2477254718542099,
"learning_rate": 7.35808e-05,
"loss": 0.8884,
"step": 39520
},
{
"epoch": 0.63264,
"grad_norm": 0.23816423118114471,
"learning_rate": 7.351680000000001e-05,
"loss": 0.8148,
"step": 39540
},
{
"epoch": 0.63296,
"grad_norm": 0.19741494953632355,
"learning_rate": 7.34528e-05,
"loss": 0.8668,
"step": 39560
},
{
"epoch": 0.63328,
"grad_norm": 0.25120246410369873,
"learning_rate": 7.33888e-05,
"loss": 0.9149,
"step": 39580
},
{
"epoch": 0.6336,
"grad_norm": 0.21695558726787567,
"learning_rate": 7.33248e-05,
"loss": 0.8762,
"step": 39600
},
{
"epoch": 0.63392,
"grad_norm": 0.2556038498878479,
"learning_rate": 7.32608e-05,
"loss": 0.8438,
"step": 39620
},
{
"epoch": 0.63424,
"grad_norm": 0.265425443649292,
"learning_rate": 7.31968e-05,
"loss": 0.913,
"step": 39640
},
{
"epoch": 0.63456,
"grad_norm": 0.23394323885440826,
"learning_rate": 7.31328e-05,
"loss": 0.8572,
"step": 39660
},
{
"epoch": 0.63488,
"grad_norm": 0.25647103786468506,
"learning_rate": 7.30688e-05,
"loss": 0.9088,
"step": 39680
},
{
"epoch": 0.6352,
"grad_norm": 0.2558782994747162,
"learning_rate": 7.300480000000001e-05,
"loss": 0.887,
"step": 39700
},
{
"epoch": 0.63552,
"grad_norm": 0.23038125038146973,
"learning_rate": 7.29408e-05,
"loss": 0.8505,
"step": 39720
},
{
"epoch": 0.63584,
"grad_norm": 0.22082971036434174,
"learning_rate": 7.28768e-05,
"loss": 0.8159,
"step": 39740
},
{
"epoch": 0.63616,
"grad_norm": 0.2407037317752838,
"learning_rate": 7.281280000000001e-05,
"loss": 0.8703,
"step": 39760
},
{
"epoch": 0.63648,
"grad_norm": 0.2498258352279663,
"learning_rate": 7.27488e-05,
"loss": 0.902,
"step": 39780
},
{
"epoch": 0.6368,
"grad_norm": 0.212127685546875,
"learning_rate": 7.268480000000001e-05,
"loss": 0.8748,
"step": 39800
},
{
"epoch": 0.63712,
"grad_norm": 0.2286374717950821,
"learning_rate": 7.26208e-05,
"loss": 0.8932,
"step": 39820
},
{
"epoch": 0.63744,
"grad_norm": 0.23190194368362427,
"learning_rate": 7.25568e-05,
"loss": 0.8772,
"step": 39840
},
{
"epoch": 0.63776,
"grad_norm": 0.2264523208141327,
"learning_rate": 7.249280000000001e-05,
"loss": 0.8824,
"step": 39860
},
{
"epoch": 0.63808,
"grad_norm": 0.267228901386261,
"learning_rate": 7.242880000000001e-05,
"loss": 0.808,
"step": 39880
},
{
"epoch": 0.6384,
"grad_norm": 0.25287920236587524,
"learning_rate": 7.23648e-05,
"loss": 0.8901,
"step": 39900
},
{
"epoch": 0.63872,
"grad_norm": 0.22771823406219482,
"learning_rate": 7.23008e-05,
"loss": 0.9111,
"step": 39920
},
{
"epoch": 0.63904,
"grad_norm": 0.20720939338207245,
"learning_rate": 7.22368e-05,
"loss": 0.89,
"step": 39940
},
{
"epoch": 0.63936,
"grad_norm": 0.24383579194545746,
"learning_rate": 7.21728e-05,
"loss": 0.9141,
"step": 39960
},
{
"epoch": 0.63968,
"grad_norm": 0.22674813866615295,
"learning_rate": 7.21088e-05,
"loss": 0.8842,
"step": 39980
},
{
"epoch": 0.64,
"grad_norm": 0.22404788434505463,
"learning_rate": 7.20448e-05,
"loss": 0.8501,
"step": 40000
},
{
"epoch": 0.64032,
"grad_norm": 0.21472766995429993,
"learning_rate": 7.19808e-05,
"loss": 0.9002,
"step": 40020
},
{
"epoch": 0.64064,
"grad_norm": 0.25677719712257385,
"learning_rate": 7.191680000000001e-05,
"loss": 0.9173,
"step": 40040
},
{
"epoch": 0.64096,
"grad_norm": 0.21479903161525726,
"learning_rate": 7.18528e-05,
"loss": 0.8685,
"step": 40060
},
{
"epoch": 0.64128,
"grad_norm": 0.24473252892494202,
"learning_rate": 7.17888e-05,
"loss": 0.8955,
"step": 40080
},
{
"epoch": 0.6416,
"grad_norm": 0.23189175128936768,
"learning_rate": 7.172480000000001e-05,
"loss": 0.8438,
"step": 40100
},
{
"epoch": 0.64192,
"grad_norm": 0.25505882501602173,
"learning_rate": 7.16608e-05,
"loss": 0.8654,
"step": 40120
},
{
"epoch": 0.64224,
"grad_norm": 0.23652783036231995,
"learning_rate": 7.159680000000001e-05,
"loss": 0.907,
"step": 40140
},
{
"epoch": 0.64256,
"grad_norm": 0.26799604296684265,
"learning_rate": 7.15328e-05,
"loss": 0.8599,
"step": 40160
},
{
"epoch": 0.64288,
"grad_norm": 0.2281332015991211,
"learning_rate": 7.14688e-05,
"loss": 0.9087,
"step": 40180
},
{
"epoch": 0.6432,
"grad_norm": 0.22313934564590454,
"learning_rate": 7.140480000000001e-05,
"loss": 0.8987,
"step": 40200
},
{
"epoch": 0.64352,
"grad_norm": 0.21266809105873108,
"learning_rate": 7.13408e-05,
"loss": 0.9406,
"step": 40220
},
{
"epoch": 0.64384,
"grad_norm": 0.21366780996322632,
"learning_rate": 7.12768e-05,
"loss": 0.8144,
"step": 40240
},
{
"epoch": 0.64416,
"grad_norm": 0.2068609744310379,
"learning_rate": 7.12128e-05,
"loss": 0.8527,
"step": 40260
},
{
"epoch": 0.64448,
"grad_norm": 0.2570587396621704,
"learning_rate": 7.11488e-05,
"loss": 0.8938,
"step": 40280
},
{
"epoch": 0.6448,
"grad_norm": 0.21306006610393524,
"learning_rate": 7.10848e-05,
"loss": 0.8601,
"step": 40300
},
{
"epoch": 0.64512,
"grad_norm": 0.23386195302009583,
"learning_rate": 7.10208e-05,
"loss": 0.8709,
"step": 40320
},
{
"epoch": 0.64544,
"grad_norm": 0.2133599817752838,
"learning_rate": 7.09568e-05,
"loss": 0.9178,
"step": 40340
},
{
"epoch": 0.64576,
"grad_norm": 0.23202918469905853,
"learning_rate": 7.08928e-05,
"loss": 0.8588,
"step": 40360
},
{
"epoch": 0.64608,
"grad_norm": 0.246184840798378,
"learning_rate": 7.082880000000001e-05,
"loss": 0.8732,
"step": 40380
},
{
"epoch": 0.6464,
"grad_norm": 0.22465592622756958,
"learning_rate": 7.07648e-05,
"loss": 0.8496,
"step": 40400
},
{
"epoch": 0.64672,
"grad_norm": 0.27027273178100586,
"learning_rate": 7.07008e-05,
"loss": 0.8732,
"step": 40420
},
{
"epoch": 0.64704,
"grad_norm": 0.23711097240447998,
"learning_rate": 7.063680000000001e-05,
"loss": 0.9601,
"step": 40440
},
{
"epoch": 0.64736,
"grad_norm": 0.21715228259563446,
"learning_rate": 7.05728e-05,
"loss": 0.93,
"step": 40460
},
{
"epoch": 0.64768,
"grad_norm": 0.18533211946487427,
"learning_rate": 7.050880000000001e-05,
"loss": 0.8923,
"step": 40480
},
{
"epoch": 0.648,
"grad_norm": 0.2325373739004135,
"learning_rate": 7.04448e-05,
"loss": 0.8445,
"step": 40500
},
{
"epoch": 0.64832,
"grad_norm": 0.20923930406570435,
"learning_rate": 7.03808e-05,
"loss": 0.8714,
"step": 40520
},
{
"epoch": 0.64864,
"grad_norm": 0.16755761206150055,
"learning_rate": 7.031680000000001e-05,
"loss": 0.856,
"step": 40540
},
{
"epoch": 0.64896,
"grad_norm": 0.20746345818042755,
"learning_rate": 7.02528e-05,
"loss": 0.9456,
"step": 40560
},
{
"epoch": 0.64928,
"grad_norm": 0.2445952445268631,
"learning_rate": 7.01888e-05,
"loss": 0.8962,
"step": 40580
},
{
"epoch": 0.6496,
"grad_norm": 0.21696268022060394,
"learning_rate": 7.01248e-05,
"loss": 0.8501,
"step": 40600
},
{
"epoch": 0.64992,
"grad_norm": 0.18578511476516724,
"learning_rate": 7.00608e-05,
"loss": 0.8168,
"step": 40620
},
{
"epoch": 0.65024,
"grad_norm": 0.25534483790397644,
"learning_rate": 6.99968e-05,
"loss": 0.8867,
"step": 40640
},
{
"epoch": 0.65056,
"grad_norm": 0.24341151118278503,
"learning_rate": 6.99328e-05,
"loss": 0.8426,
"step": 40660
},
{
"epoch": 0.65088,
"grad_norm": 0.229604572057724,
"learning_rate": 6.98688e-05,
"loss": 0.9253,
"step": 40680
},
{
"epoch": 0.6512,
"grad_norm": 0.24505998194217682,
"learning_rate": 6.98048e-05,
"loss": 0.9244,
"step": 40700
},
{
"epoch": 0.65152,
"grad_norm": 0.19099657237529755,
"learning_rate": 6.974080000000001e-05,
"loss": 0.8625,
"step": 40720
},
{
"epoch": 0.65184,
"grad_norm": 0.24071238934993744,
"learning_rate": 6.96768e-05,
"loss": 0.8457,
"step": 40740
},
{
"epoch": 0.65216,
"grad_norm": 0.2676192820072174,
"learning_rate": 6.96128e-05,
"loss": 0.8621,
"step": 40760
},
{
"epoch": 0.65248,
"grad_norm": 0.2141886055469513,
"learning_rate": 6.954880000000001e-05,
"loss": 0.8369,
"step": 40780
},
{
"epoch": 0.6528,
"grad_norm": 0.24451769888401031,
"learning_rate": 6.94848e-05,
"loss": 0.8665,
"step": 40800
},
{
"epoch": 0.65312,
"grad_norm": 0.20462900400161743,
"learning_rate": 6.942080000000001e-05,
"loss": 0.9034,
"step": 40820
},
{
"epoch": 0.65344,
"grad_norm": 0.2098025679588318,
"learning_rate": 6.935679999999999e-05,
"loss": 0.8874,
"step": 40840
},
{
"epoch": 0.65376,
"grad_norm": 0.2127532809972763,
"learning_rate": 6.92928e-05,
"loss": 0.8651,
"step": 40860
},
{
"epoch": 0.65408,
"grad_norm": 0.23097112774848938,
"learning_rate": 6.922880000000001e-05,
"loss": 0.8587,
"step": 40880
},
{
"epoch": 0.6544,
"grad_norm": 0.23748517036437988,
"learning_rate": 6.91648e-05,
"loss": 0.8306,
"step": 40900
},
{
"epoch": 0.65472,
"grad_norm": 0.2403116524219513,
"learning_rate": 6.91008e-05,
"loss": 0.8822,
"step": 40920
},
{
"epoch": 0.65504,
"grad_norm": 0.25235334038734436,
"learning_rate": 6.90368e-05,
"loss": 0.9339,
"step": 40940
},
{
"epoch": 0.65536,
"grad_norm": 0.23442967236042023,
"learning_rate": 6.89728e-05,
"loss": 0.9065,
"step": 40960
},
{
"epoch": 0.65568,
"grad_norm": 0.22697308659553528,
"learning_rate": 6.89088e-05,
"loss": 0.9111,
"step": 40980
},
{
"epoch": 0.656,
"grad_norm": 0.21100306510925293,
"learning_rate": 6.88448e-05,
"loss": 0.8536,
"step": 41000
},
{
"epoch": 0.65632,
"grad_norm": 0.24288100004196167,
"learning_rate": 6.87808e-05,
"loss": 0.8757,
"step": 41020
},
{
"epoch": 0.65664,
"grad_norm": 0.26507681608200073,
"learning_rate": 6.87168e-05,
"loss": 0.8786,
"step": 41040
},
{
"epoch": 0.65696,
"grad_norm": 0.2742130160331726,
"learning_rate": 6.865280000000001e-05,
"loss": 0.8888,
"step": 41060
},
{
"epoch": 0.65728,
"grad_norm": 0.23026636242866516,
"learning_rate": 6.85888e-05,
"loss": 0.8212,
"step": 41080
},
{
"epoch": 0.6576,
"grad_norm": 0.20454558730125427,
"learning_rate": 6.85248e-05,
"loss": 0.8761,
"step": 41100
},
{
"epoch": 0.65792,
"grad_norm": 0.20581161975860596,
"learning_rate": 6.846080000000001e-05,
"loss": 0.8897,
"step": 41120
},
{
"epoch": 0.65824,
"grad_norm": 0.24633437395095825,
"learning_rate": 6.83968e-05,
"loss": 0.8448,
"step": 41140
},
{
"epoch": 0.65856,
"grad_norm": 0.246739000082016,
"learning_rate": 6.833280000000001e-05,
"loss": 0.8769,
"step": 41160
},
{
"epoch": 0.65888,
"grad_norm": 0.22334247827529907,
"learning_rate": 6.82688e-05,
"loss": 0.8975,
"step": 41180
},
{
"epoch": 0.6592,
"grad_norm": 0.23122014105319977,
"learning_rate": 6.82048e-05,
"loss": 0.9277,
"step": 41200
},
{
"epoch": 0.65952,
"grad_norm": 0.26595011353492737,
"learning_rate": 6.8144e-05,
"loss": 0.8685,
"step": 41220
},
{
"epoch": 0.65984,
"grad_norm": 0.22354891896247864,
"learning_rate": 6.808e-05,
"loss": 0.8693,
"step": 41240
},
{
"epoch": 0.66016,
"grad_norm": 0.23355019092559814,
"learning_rate": 6.801600000000001e-05,
"loss": 0.8979,
"step": 41260
},
{
"epoch": 0.66048,
"grad_norm": 0.2354181855916977,
"learning_rate": 6.7952e-05,
"loss": 0.8735,
"step": 41280
},
{
"epoch": 0.6608,
"grad_norm": 0.26578792929649353,
"learning_rate": 6.788800000000001e-05,
"loss": 0.8971,
"step": 41300
},
{
"epoch": 0.66112,
"grad_norm": 0.26312100887298584,
"learning_rate": 6.782399999999999e-05,
"loss": 0.9185,
"step": 41320
},
{
"epoch": 0.66144,
"grad_norm": 0.22569258511066437,
"learning_rate": 6.776e-05,
"loss": 0.8838,
"step": 41340
},
{
"epoch": 0.66176,
"grad_norm": 0.30978450179100037,
"learning_rate": 6.769600000000001e-05,
"loss": 0.9312,
"step": 41360
},
{
"epoch": 0.66208,
"grad_norm": 0.23343753814697266,
"learning_rate": 6.7632e-05,
"loss": 0.8593,
"step": 41380
},
{
"epoch": 0.6624,
"grad_norm": 0.22287525236606598,
"learning_rate": 6.7568e-05,
"loss": 0.9196,
"step": 41400
},
{
"epoch": 0.66272,
"grad_norm": 0.20180395245552063,
"learning_rate": 6.7504e-05,
"loss": 0.8573,
"step": 41420
},
{
"epoch": 0.66304,
"grad_norm": 0.24834296107292175,
"learning_rate": 6.744e-05,
"loss": 0.8839,
"step": 41440
},
{
"epoch": 0.66336,
"grad_norm": 0.2042527049779892,
"learning_rate": 6.7376e-05,
"loss": 0.8805,
"step": 41460
},
{
"epoch": 0.66368,
"grad_norm": 0.22171486914157867,
"learning_rate": 6.731200000000001e-05,
"loss": 0.8742,
"step": 41480
},
{
"epoch": 0.664,
"grad_norm": 0.27567192912101746,
"learning_rate": 6.7248e-05,
"loss": 0.8997,
"step": 41500
},
{
"epoch": 0.66432,
"grad_norm": 0.22618427872657776,
"learning_rate": 6.7184e-05,
"loss": 0.8679,
"step": 41520
},
{
"epoch": 0.66464,
"grad_norm": 0.24690526723861694,
"learning_rate": 6.712000000000001e-05,
"loss": 0.8564,
"step": 41540
},
{
"epoch": 0.66496,
"grad_norm": 0.23084665834903717,
"learning_rate": 6.7056e-05,
"loss": 0.9033,
"step": 41560
},
{
"epoch": 0.66528,
"grad_norm": 0.2343718707561493,
"learning_rate": 6.6992e-05,
"loss": 0.8987,
"step": 41580
},
{
"epoch": 0.6656,
"grad_norm": 0.24334469437599182,
"learning_rate": 6.692800000000001e-05,
"loss": 0.8943,
"step": 41600
},
{
"epoch": 0.66592,
"grad_norm": 0.24616220593452454,
"learning_rate": 6.6864e-05,
"loss": 0.831,
"step": 41620
},
{
"epoch": 0.66624,
"grad_norm": 0.21528421342372894,
"learning_rate": 6.680000000000001e-05,
"loss": 0.8642,
"step": 41640
},
{
"epoch": 0.66656,
"grad_norm": 0.20237964391708374,
"learning_rate": 6.673599999999999e-05,
"loss": 0.9171,
"step": 41660
},
{
"epoch": 0.66688,
"grad_norm": 0.2018793821334839,
"learning_rate": 6.6672e-05,
"loss": 0.8975,
"step": 41680
},
{
"epoch": 0.6672,
"grad_norm": 0.2155706137418747,
"learning_rate": 6.660800000000001e-05,
"loss": 0.8354,
"step": 41700
},
{
"epoch": 0.66752,
"grad_norm": 0.23168103396892548,
"learning_rate": 6.6544e-05,
"loss": 0.916,
"step": 41720
},
{
"epoch": 0.66784,
"grad_norm": 0.20231425762176514,
"learning_rate": 6.648e-05,
"loss": 0.8639,
"step": 41740
},
{
"epoch": 0.66816,
"grad_norm": 0.20722989737987518,
"learning_rate": 6.6416e-05,
"loss": 0.8197,
"step": 41760
},
{
"epoch": 0.66848,
"grad_norm": 0.20583872497081757,
"learning_rate": 6.6352e-05,
"loss": 0.8644,
"step": 41780
},
{
"epoch": 0.6688,
"grad_norm": 0.22469474375247955,
"learning_rate": 6.6288e-05,
"loss": 0.8431,
"step": 41800
},
{
"epoch": 0.66912,
"grad_norm": 0.22593587636947632,
"learning_rate": 6.622400000000001e-05,
"loss": 0.8428,
"step": 41820
},
{
"epoch": 0.66944,
"grad_norm": 0.21532204747200012,
"learning_rate": 6.616e-05,
"loss": 0.8634,
"step": 41840
},
{
"epoch": 0.66976,
"grad_norm": 0.1992572546005249,
"learning_rate": 6.6096e-05,
"loss": 0.8744,
"step": 41860
},
{
"epoch": 0.67008,
"grad_norm": 0.23626761138439178,
"learning_rate": 6.603200000000001e-05,
"loss": 0.8928,
"step": 41880
},
{
"epoch": 0.6704,
"grad_norm": 0.2587644159793854,
"learning_rate": 6.5968e-05,
"loss": 0.8501,
"step": 41900
},
{
"epoch": 0.67072,
"grad_norm": 0.23042425513267517,
"learning_rate": 6.5904e-05,
"loss": 0.8317,
"step": 41920
},
{
"epoch": 0.67104,
"grad_norm": 0.20776692032814026,
"learning_rate": 6.584e-05,
"loss": 0.8842,
"step": 41940
},
{
"epoch": 0.67136,
"grad_norm": 0.2233342081308365,
"learning_rate": 6.5776e-05,
"loss": 0.9062,
"step": 41960
},
{
"epoch": 0.67168,
"grad_norm": 0.22281095385551453,
"learning_rate": 6.571200000000001e-05,
"loss": 0.8696,
"step": 41980
},
{
"epoch": 0.672,
"grad_norm": 0.2269035130739212,
"learning_rate": 6.564799999999999e-05,
"loss": 0.8983,
"step": 42000
},
{
"epoch": 0.67232,
"grad_norm": 0.21187534928321838,
"learning_rate": 6.5584e-05,
"loss": 0.8666,
"step": 42020
},
{
"epoch": 0.67264,
"grad_norm": 0.2288711965084076,
"learning_rate": 6.552000000000001e-05,
"loss": 0.8519,
"step": 42040
},
{
"epoch": 0.67296,
"grad_norm": 0.24002696573734283,
"learning_rate": 6.5456e-05,
"loss": 0.843,
"step": 42060
},
{
"epoch": 0.67328,
"grad_norm": 0.19838085770606995,
"learning_rate": 6.5392e-05,
"loss": 0.8429,
"step": 42080
},
{
"epoch": 0.6736,
"grad_norm": 0.24605266749858856,
"learning_rate": 6.5328e-05,
"loss": 0.9016,
"step": 42100
},
{
"epoch": 0.67392,
"grad_norm": 0.273473858833313,
"learning_rate": 6.5264e-05,
"loss": 0.9013,
"step": 42120
},
{
"epoch": 0.67424,
"grad_norm": 0.2528668940067291,
"learning_rate": 6.52e-05,
"loss": 0.9072,
"step": 42140
},
{
"epoch": 0.67456,
"grad_norm": 0.23695510625839233,
"learning_rate": 6.513600000000001e-05,
"loss": 0.8652,
"step": 42160
},
{
"epoch": 0.67488,
"grad_norm": 0.23263618350028992,
"learning_rate": 6.5072e-05,
"loss": 0.9238,
"step": 42180
},
{
"epoch": 0.6752,
"grad_norm": 0.2174840271472931,
"learning_rate": 6.5008e-05,
"loss": 0.8806,
"step": 42200
},
{
"epoch": 0.67552,
"grad_norm": 0.22841788828372955,
"learning_rate": 6.494400000000001e-05,
"loss": 0.8422,
"step": 42220
},
{
"epoch": 0.67584,
"grad_norm": 0.24447223544120789,
"learning_rate": 6.488e-05,
"loss": 0.8361,
"step": 42240
},
{
"epoch": 0.67616,
"grad_norm": 0.24879607558250427,
"learning_rate": 6.4816e-05,
"loss": 0.8944,
"step": 42260
},
{
"epoch": 0.67648,
"grad_norm": 0.26324090361595154,
"learning_rate": 6.4752e-05,
"loss": 0.8424,
"step": 42280
},
{
"epoch": 0.6768,
"grad_norm": 0.28680363297462463,
"learning_rate": 6.4688e-05,
"loss": 0.8763,
"step": 42300
},
{
"epoch": 0.67712,
"grad_norm": 0.2222435623407364,
"learning_rate": 6.462400000000001e-05,
"loss": 0.8901,
"step": 42320
},
{
"epoch": 0.67744,
"grad_norm": 0.2362917810678482,
"learning_rate": 6.455999999999999e-05,
"loss": 0.9079,
"step": 42340
},
{
"epoch": 0.67776,
"grad_norm": 0.23679310083389282,
"learning_rate": 6.4496e-05,
"loss": 0.832,
"step": 42360
},
{
"epoch": 0.67808,
"grad_norm": 0.23579975962638855,
"learning_rate": 6.443200000000001e-05,
"loss": 0.8957,
"step": 42380
},
{
"epoch": 0.6784,
"grad_norm": 0.2289842814207077,
"learning_rate": 6.4368e-05,
"loss": 0.9169,
"step": 42400
},
{
"epoch": 0.67872,
"grad_norm": 0.2322479486465454,
"learning_rate": 6.4304e-05,
"loss": 0.8729,
"step": 42420
},
{
"epoch": 0.67904,
"grad_norm": 0.2532987892627716,
"learning_rate": 6.42432e-05,
"loss": 0.8365,
"step": 42440
},
{
"epoch": 0.67936,
"grad_norm": 0.31229642033576965,
"learning_rate": 6.417920000000001e-05,
"loss": 0.9106,
"step": 42460
},
{
"epoch": 0.67968,
"grad_norm": 0.19338229298591614,
"learning_rate": 6.41152e-05,
"loss": 0.8812,
"step": 42480
},
{
"epoch": 0.68,
"grad_norm": 0.2138776332139969,
"learning_rate": 6.40512e-05,
"loss": 0.8538,
"step": 42500
},
{
"epoch": 0.68032,
"grad_norm": 0.2549976408481598,
"learning_rate": 6.39872e-05,
"loss": 0.8651,
"step": 42520
},
{
"epoch": 0.68064,
"grad_norm": 0.24992278218269348,
"learning_rate": 6.39232e-05,
"loss": 0.9094,
"step": 42540
},
{
"epoch": 0.68096,
"grad_norm": 0.2768593728542328,
"learning_rate": 6.385920000000001e-05,
"loss": 0.9047,
"step": 42560
},
{
"epoch": 0.68128,
"grad_norm": 0.2133874148130417,
"learning_rate": 6.37952e-05,
"loss": 0.9259,
"step": 42580
},
{
"epoch": 0.6816,
"grad_norm": 0.24320970475673676,
"learning_rate": 6.37312e-05,
"loss": 0.875,
"step": 42600
},
{
"epoch": 0.68192,
"grad_norm": 0.21263545751571655,
"learning_rate": 6.36672e-05,
"loss": 0.8896,
"step": 42620
},
{
"epoch": 0.68224,
"grad_norm": 0.18488876521587372,
"learning_rate": 6.360320000000001e-05,
"loss": 0.8557,
"step": 42640
},
{
"epoch": 0.68256,
"grad_norm": 0.2456846386194229,
"learning_rate": 6.35392e-05,
"loss": 0.8991,
"step": 42660
},
{
"epoch": 0.68288,
"grad_norm": 0.20528970658779144,
"learning_rate": 6.34752e-05,
"loss": 0.8908,
"step": 42680
},
{
"epoch": 0.6832,
"grad_norm": 0.225137397646904,
"learning_rate": 6.341120000000001e-05,
"loss": 0.8635,
"step": 42700
},
{
"epoch": 0.68352,
"grad_norm": 0.24640017747879028,
"learning_rate": 6.33472e-05,
"loss": 0.8551,
"step": 42720
},
{
"epoch": 0.68384,
"grad_norm": 0.22672517597675323,
"learning_rate": 6.32832e-05,
"loss": 0.8665,
"step": 42740
},
{
"epoch": 0.68416,
"grad_norm": 0.229408398270607,
"learning_rate": 6.32192e-05,
"loss": 0.9198,
"step": 42760
},
{
"epoch": 0.68448,
"grad_norm": 0.19723407924175262,
"learning_rate": 6.31552e-05,
"loss": 0.8959,
"step": 42780
},
{
"epoch": 0.6848,
"grad_norm": 0.2351776361465454,
"learning_rate": 6.309120000000001e-05,
"loss": 0.8286,
"step": 42800
},
{
"epoch": 0.68512,
"grad_norm": 0.17581576108932495,
"learning_rate": 6.30272e-05,
"loss": 0.8849,
"step": 42820
},
{
"epoch": 0.68544,
"grad_norm": 0.22729769349098206,
"learning_rate": 6.29632e-05,
"loss": 0.8592,
"step": 42840
},
{
"epoch": 0.68576,
"grad_norm": 0.25973424315452576,
"learning_rate": 6.289920000000001e-05,
"loss": 0.8797,
"step": 42860
},
{
"epoch": 0.68608,
"grad_norm": 0.24774223566055298,
"learning_rate": 6.28352e-05,
"loss": 0.8542,
"step": 42880
},
{
"epoch": 0.6864,
"grad_norm": 0.25668323040008545,
"learning_rate": 6.277120000000001e-05,
"loss": 0.9054,
"step": 42900
},
{
"epoch": 0.68672,
"grad_norm": 0.26286524534225464,
"learning_rate": 6.27072e-05,
"loss": 0.8654,
"step": 42920
},
{
"epoch": 0.68704,
"grad_norm": 0.24494454264640808,
"learning_rate": 6.26432e-05,
"loss": 0.9516,
"step": 42940
},
{
"epoch": 0.68736,
"grad_norm": 0.2337479293346405,
"learning_rate": 6.25792e-05,
"loss": 0.8931,
"step": 42960
},
{
"epoch": 0.68768,
"grad_norm": 0.2087046056985855,
"learning_rate": 6.251520000000001e-05,
"loss": 0.9421,
"step": 42980
},
{
"epoch": 0.688,
"grad_norm": 0.21605008840560913,
"learning_rate": 6.24512e-05,
"loss": 0.883,
"step": 43000
},
{
"epoch": 0.68832,
"grad_norm": 0.21600419282913208,
"learning_rate": 6.23872e-05,
"loss": 0.8408,
"step": 43020
},
{
"epoch": 0.68864,
"grad_norm": 0.2731294333934784,
"learning_rate": 6.23232e-05,
"loss": 0.8508,
"step": 43040
},
{
"epoch": 0.68896,
"grad_norm": 0.20644868910312653,
"learning_rate": 6.22592e-05,
"loss": 0.8578,
"step": 43060
},
{
"epoch": 0.68928,
"grad_norm": 0.22507797181606293,
"learning_rate": 6.21952e-05,
"loss": 0.8556,
"step": 43080
},
{
"epoch": 0.6896,
"grad_norm": 0.20173804461956024,
"learning_rate": 6.21312e-05,
"loss": 0.8631,
"step": 43100
},
{
"epoch": 0.68992,
"grad_norm": 0.2198924422264099,
"learning_rate": 6.20672e-05,
"loss": 0.9,
"step": 43120
},
{
"epoch": 0.69024,
"grad_norm": 0.2248951494693756,
"learning_rate": 6.200320000000001e-05,
"loss": 0.9064,
"step": 43140
},
{
"epoch": 0.69056,
"grad_norm": 0.23556740581989288,
"learning_rate": 6.19392e-05,
"loss": 0.866,
"step": 43160
},
{
"epoch": 0.69088,
"grad_norm": 0.2064543068408966,
"learning_rate": 6.18752e-05,
"loss": 0.8798,
"step": 43180
},
{
"epoch": 0.6912,
"grad_norm": 0.22137311100959778,
"learning_rate": 6.181120000000001e-05,
"loss": 0.8844,
"step": 43200
},
{
"epoch": 0.69152,
"grad_norm": 0.21415813267230988,
"learning_rate": 6.17472e-05,
"loss": 0.8576,
"step": 43220
},
{
"epoch": 0.69184,
"grad_norm": 0.22798651456832886,
"learning_rate": 6.168320000000001e-05,
"loss": 0.8471,
"step": 43240
},
{
"epoch": 0.69216,
"grad_norm": 0.233371764421463,
"learning_rate": 6.16192e-05,
"loss": 0.8734,
"step": 43260
},
{
"epoch": 0.69248,
"grad_norm": 0.20008385181427002,
"learning_rate": 6.15552e-05,
"loss": 0.8683,
"step": 43280
},
{
"epoch": 0.6928,
"grad_norm": 0.22969180345535278,
"learning_rate": 6.14912e-05,
"loss": 0.8694,
"step": 43300
},
{
"epoch": 0.69312,
"grad_norm": 0.2556081712245941,
"learning_rate": 6.142720000000001e-05,
"loss": 0.8439,
"step": 43320
},
{
"epoch": 0.69344,
"grad_norm": 0.2534750699996948,
"learning_rate": 6.13632e-05,
"loss": 0.9054,
"step": 43340
},
{
"epoch": 0.69376,
"grad_norm": 0.2144964188337326,
"learning_rate": 6.12992e-05,
"loss": 0.9026,
"step": 43360
},
{
"epoch": 0.69408,
"grad_norm": 0.21919748187065125,
"learning_rate": 6.12352e-05,
"loss": 0.901,
"step": 43380
},
{
"epoch": 0.6944,
"grad_norm": 0.19432856142520905,
"learning_rate": 6.11712e-05,
"loss": 0.9081,
"step": 43400
},
{
"epoch": 0.69472,
"grad_norm": 0.25123217701911926,
"learning_rate": 6.11072e-05,
"loss": 0.9019,
"step": 43420
},
{
"epoch": 0.69504,
"grad_norm": 0.29103386402130127,
"learning_rate": 6.10432e-05,
"loss": 0.9118,
"step": 43440
},
{
"epoch": 0.69536,
"grad_norm": 0.2519950270652771,
"learning_rate": 6.09792e-05,
"loss": 0.8581,
"step": 43460
},
{
"epoch": 0.69568,
"grad_norm": 0.2215908318758011,
"learning_rate": 6.09152e-05,
"loss": 0.8958,
"step": 43480
},
{
"epoch": 0.696,
"grad_norm": 0.23915638029575348,
"learning_rate": 6.085120000000001e-05,
"loss": 0.8523,
"step": 43500
},
{
"epoch": 0.69632,
"grad_norm": 0.2282445728778839,
"learning_rate": 6.07872e-05,
"loss": 0.8991,
"step": 43520
},
{
"epoch": 0.69664,
"grad_norm": 0.2502846419811249,
"learning_rate": 6.07232e-05,
"loss": 0.8664,
"step": 43540
},
{
"epoch": 0.69696,
"grad_norm": 0.208401620388031,
"learning_rate": 6.0659200000000004e-05,
"loss": 0.8687,
"step": 43560
},
{
"epoch": 0.69728,
"grad_norm": 0.20891068875789642,
"learning_rate": 6.0595200000000006e-05,
"loss": 0.9093,
"step": 43580
},
{
"epoch": 0.6976,
"grad_norm": 0.26877716183662415,
"learning_rate": 6.05312e-05,
"loss": 0.847,
"step": 43600
},
{
"epoch": 0.69792,
"grad_norm": 0.21002227067947388,
"learning_rate": 6.04672e-05,
"loss": 0.8587,
"step": 43620
},
{
"epoch": 0.69824,
"grad_norm": 0.19834822416305542,
"learning_rate": 6.0403200000000005e-05,
"loss": 0.8871,
"step": 43640
},
{
"epoch": 0.69856,
"grad_norm": 0.23921220004558563,
"learning_rate": 6.033920000000001e-05,
"loss": 0.8202,
"step": 43660
},
{
"epoch": 0.69888,
"grad_norm": 0.23094278573989868,
"learning_rate": 6.0275199999999995e-05,
"loss": 0.8753,
"step": 43680
},
{
"epoch": 0.6992,
"grad_norm": 0.25601616501808167,
"learning_rate": 6.0211200000000004e-05,
"loss": 0.8652,
"step": 43700
},
{
"epoch": 0.69952,
"grad_norm": 0.25069522857666016,
"learning_rate": 6.0147200000000006e-05,
"loss": 0.8942,
"step": 43720
},
{
"epoch": 0.69984,
"grad_norm": 0.19572977721691132,
"learning_rate": 6.008320000000001e-05,
"loss": 0.9245,
"step": 43740
},
{
"epoch": 0.70016,
"grad_norm": 0.24047626554965973,
"learning_rate": 6.0019199999999996e-05,
"loss": 0.899,
"step": 43760
},
{
"epoch": 0.70048,
"grad_norm": 0.21386469900608063,
"learning_rate": 5.99552e-05,
"loss": 0.874,
"step": 43780
},
{
"epoch": 0.7008,
"grad_norm": 0.22829948365688324,
"learning_rate": 5.98912e-05,
"loss": 0.9313,
"step": 43800
},
{
"epoch": 0.70112,
"grad_norm": 0.23667655885219574,
"learning_rate": 5.98272e-05,
"loss": 0.8878,
"step": 43820
},
{
"epoch": 0.70144,
"grad_norm": 0.2182048112154007,
"learning_rate": 5.976320000000001e-05,
"loss": 0.8397,
"step": 43840
},
{
"epoch": 0.70176,
"grad_norm": 0.20164678990840912,
"learning_rate": 5.96992e-05,
"loss": 0.8768,
"step": 43860
},
{
"epoch": 0.70208,
"grad_norm": 0.23960982263088226,
"learning_rate": 5.96352e-05,
"loss": 0.8932,
"step": 43880
},
{
"epoch": 0.7024,
"grad_norm": 0.20772390067577362,
"learning_rate": 5.95712e-05,
"loss": 0.9277,
"step": 43900
},
{
"epoch": 0.70272,
"grad_norm": 0.24492938816547394,
"learning_rate": 5.9507200000000005e-05,
"loss": 0.8923,
"step": 43920
},
{
"epoch": 0.70304,
"grad_norm": 0.23545905947685242,
"learning_rate": 5.94432e-05,
"loss": 0.9141,
"step": 43940
},
{
"epoch": 0.70336,
"grad_norm": 0.2978091239929199,
"learning_rate": 5.93792e-05,
"loss": 0.9643,
"step": 43960
},
{
"epoch": 0.70368,
"grad_norm": 0.19800467789173126,
"learning_rate": 5.9315200000000004e-05,
"loss": 0.8799,
"step": 43980
},
{
"epoch": 0.704,
"grad_norm": 0.24483546614646912,
"learning_rate": 5.9251200000000006e-05,
"loss": 0.8587,
"step": 44000
},
{
"epoch": 0.70432,
"grad_norm": 0.32727476954460144,
"learning_rate": 5.91872e-05,
"loss": 0.8876,
"step": 44020
},
{
"epoch": 0.70464,
"grad_norm": 0.21823062002658844,
"learning_rate": 5.9123200000000003e-05,
"loss": 0.8695,
"step": 44040
},
{
"epoch": 0.70496,
"grad_norm": 0.2308553159236908,
"learning_rate": 5.9059200000000005e-05,
"loss": 0.8926,
"step": 44060
},
{
"epoch": 0.70528,
"grad_norm": 0.219979926943779,
"learning_rate": 5.899520000000001e-05,
"loss": 0.9193,
"step": 44080
},
{
"epoch": 0.7056,
"grad_norm": 0.24282580614089966,
"learning_rate": 5.8931199999999996e-05,
"loss": 0.8812,
"step": 44100
},
{
"epoch": 0.70592,
"grad_norm": 0.20185592770576477,
"learning_rate": 5.88672e-05,
"loss": 0.8588,
"step": 44120
},
{
"epoch": 0.70624,
"grad_norm": 0.24580541253089905,
"learning_rate": 5.88032e-05,
"loss": 0.849,
"step": 44140
},
{
"epoch": 0.70656,
"grad_norm": 0.2542431950569153,
"learning_rate": 5.873920000000001e-05,
"loss": 0.882,
"step": 44160
},
{
"epoch": 0.70688,
"grad_norm": 0.24872715771198273,
"learning_rate": 5.867520000000001e-05,
"loss": 0.8544,
"step": 44180
},
{
"epoch": 0.7072,
"grad_norm": 0.19842933118343353,
"learning_rate": 5.86112e-05,
"loss": 0.8803,
"step": 44200
},
{
"epoch": 0.70752,
"grad_norm": 0.2545991539955139,
"learning_rate": 5.85472e-05,
"loss": 0.9178,
"step": 44220
},
{
"epoch": 0.70784,
"grad_norm": 0.2342890352010727,
"learning_rate": 5.84832e-05,
"loss": 0.8403,
"step": 44240
},
{
"epoch": 0.70816,
"grad_norm": 0.2353144884109497,
"learning_rate": 5.8419200000000005e-05,
"loss": 0.9219,
"step": 44260
},
{
"epoch": 0.70848,
"grad_norm": 0.21412351727485657,
"learning_rate": 5.83552e-05,
"loss": 0.8837,
"step": 44280
},
{
"epoch": 0.7088,
"grad_norm": 0.18827536702156067,
"learning_rate": 5.82912e-05,
"loss": 0.8567,
"step": 44300
},
{
"epoch": 0.70912,
"grad_norm": 0.23062194883823395,
"learning_rate": 5.8227200000000004e-05,
"loss": 0.903,
"step": 44320
},
{
"epoch": 0.70944,
"grad_norm": 0.23226912319660187,
"learning_rate": 5.8163200000000006e-05,
"loss": 0.8446,
"step": 44340
},
{
"epoch": 0.70976,
"grad_norm": 0.23661820590496063,
"learning_rate": 5.80992e-05,
"loss": 0.9511,
"step": 44360
},
{
"epoch": 0.71008,
"grad_norm": 0.2356158196926117,
"learning_rate": 5.80352e-05,
"loss": 0.8817,
"step": 44380
},
{
"epoch": 0.7104,
"grad_norm": 0.23160752654075623,
"learning_rate": 5.7971200000000005e-05,
"loss": 0.8637,
"step": 44400
},
{
"epoch": 0.71072,
"grad_norm": 0.20803622901439667,
"learning_rate": 5.790720000000001e-05,
"loss": 0.8552,
"step": 44420
},
{
"epoch": 0.71104,
"grad_norm": 0.22061729431152344,
"learning_rate": 5.7843199999999995e-05,
"loss": 0.8751,
"step": 44440
},
{
"epoch": 0.71136,
"grad_norm": 0.233897864818573,
"learning_rate": 5.77792e-05,
"loss": 0.8686,
"step": 44460
},
{
"epoch": 0.71168,
"grad_norm": 0.21677446365356445,
"learning_rate": 5.77152e-05,
"loss": 0.8967,
"step": 44480
},
{
"epoch": 0.712,
"grad_norm": 0.24504272639751434,
"learning_rate": 5.765120000000001e-05,
"loss": 0.8934,
"step": 44500
},
{
"epoch": 0.71232,
"grad_norm": 0.21646228432655334,
"learning_rate": 5.758720000000001e-05,
"loss": 0.8452,
"step": 44520
},
{
"epoch": 0.71264,
"grad_norm": 0.22801847755908966,
"learning_rate": 5.75232e-05,
"loss": 0.8388,
"step": 44540
},
{
"epoch": 0.71296,
"grad_norm": 0.19865715503692627,
"learning_rate": 5.74592e-05,
"loss": 0.9077,
"step": 44560
},
{
"epoch": 0.71328,
"grad_norm": 0.24044495820999146,
"learning_rate": 5.73952e-05,
"loss": 0.9122,
"step": 44580
},
{
"epoch": 0.7136,
"grad_norm": 0.23846623301506042,
"learning_rate": 5.7331200000000004e-05,
"loss": 0.8906,
"step": 44600
},
{
"epoch": 0.71392,
"grad_norm": 0.21420036256313324,
"learning_rate": 5.72672e-05,
"loss": 0.877,
"step": 44620
},
{
"epoch": 0.71424,
"grad_norm": 0.2217768281698227,
"learning_rate": 5.72032e-05,
"loss": 0.8717,
"step": 44640
},
{
"epoch": 0.71456,
"grad_norm": 0.23392203450202942,
"learning_rate": 5.7139200000000003e-05,
"loss": 0.898,
"step": 44660
},
{
"epoch": 0.71488,
"grad_norm": 0.22015775740146637,
"learning_rate": 5.7075200000000005e-05,
"loss": 0.8809,
"step": 44680
},
{
"epoch": 0.7152,
"grad_norm": 0.21397672593593597,
"learning_rate": 5.70112e-05,
"loss": 0.8441,
"step": 44700
},
{
"epoch": 0.71552,
"grad_norm": 0.25773394107818604,
"learning_rate": 5.69472e-05,
"loss": 0.8623,
"step": 44720
},
{
"epoch": 0.71584,
"grad_norm": 0.24330535531044006,
"learning_rate": 5.6883200000000005e-05,
"loss": 0.8889,
"step": 44740
},
{
"epoch": 0.71616,
"grad_norm": 0.20773817598819733,
"learning_rate": 5.6819200000000006e-05,
"loss": 0.8711,
"step": 44760
},
{
"epoch": 0.71648,
"grad_norm": 0.21590672433376312,
"learning_rate": 5.6755199999999995e-05,
"loss": 0.8426,
"step": 44780
},
{
"epoch": 0.7168,
"grad_norm": 0.1878194808959961,
"learning_rate": 5.66912e-05,
"loss": 0.873,
"step": 44800
},
{
"epoch": 0.71712,
"grad_norm": 0.2268812656402588,
"learning_rate": 5.6627200000000006e-05,
"loss": 0.8769,
"step": 44820
},
{
"epoch": 0.71744,
"grad_norm": 0.24054917693138123,
"learning_rate": 5.6566399999999994e-05,
"loss": 0.8766,
"step": 44840
},
{
"epoch": 0.71776,
"grad_norm": 0.2115447223186493,
"learning_rate": 5.65024e-05,
"loss": 0.8531,
"step": 44860
},
{
"epoch": 0.71808,
"grad_norm": 0.20368890464305878,
"learning_rate": 5.6438400000000005e-05,
"loss": 0.8418,
"step": 44880
},
{
"epoch": 0.7184,
"grad_norm": 0.2356366366147995,
"learning_rate": 5.637440000000001e-05,
"loss": 0.9,
"step": 44900
},
{
"epoch": 0.71872,
"grad_norm": 0.22393269836902618,
"learning_rate": 5.631040000000001e-05,
"loss": 0.8868,
"step": 44920
},
{
"epoch": 0.71904,
"grad_norm": 0.2569195032119751,
"learning_rate": 5.62464e-05,
"loss": 0.895,
"step": 44940
},
{
"epoch": 0.71936,
"grad_norm": 0.20783191919326782,
"learning_rate": 5.61824e-05,
"loss": 0.8627,
"step": 44960
},
{
"epoch": 0.71968,
"grad_norm": 0.2164582461118698,
"learning_rate": 5.61184e-05,
"loss": 0.8982,
"step": 44980
},
{
"epoch": 0.72,
"grad_norm": 0.38153156638145447,
"learning_rate": 5.605440000000001e-05,
"loss": 0.9014,
"step": 45000
},
{
"epoch": 0.72032,
"grad_norm": 0.2400229126214981,
"learning_rate": 5.59904e-05,
"loss": 0.8997,
"step": 45020
},
{
"epoch": 0.72064,
"grad_norm": 0.25559934973716736,
"learning_rate": 5.59264e-05,
"loss": 0.8984,
"step": 45040
},
{
"epoch": 0.72096,
"grad_norm": 0.2528096139431,
"learning_rate": 5.58624e-05,
"loss": 0.914,
"step": 45060
},
{
"epoch": 0.72128,
"grad_norm": 0.18854907155036926,
"learning_rate": 5.5798400000000004e-05,
"loss": 0.8876,
"step": 45080
},
{
"epoch": 0.7216,
"grad_norm": 0.22515028715133667,
"learning_rate": 5.57344e-05,
"loss": 0.8684,
"step": 45100
},
{
"epoch": 0.72192,
"grad_norm": 0.2182977795600891,
"learning_rate": 5.56704e-05,
"loss": 0.8838,
"step": 45120
},
{
"epoch": 0.72224,
"grad_norm": 0.21532991528511047,
"learning_rate": 5.5606400000000003e-05,
"loss": 0.8298,
"step": 45140
},
{
"epoch": 0.72256,
"grad_norm": 0.2378109246492386,
"learning_rate": 5.5542400000000005e-05,
"loss": 0.867,
"step": 45160
},
{
"epoch": 0.72288,
"grad_norm": 0.22187520563602448,
"learning_rate": 5.547840000000001e-05,
"loss": 0.87,
"step": 45180
},
{
"epoch": 0.7232,
"grad_norm": 0.2259528785943985,
"learning_rate": 5.54144e-05,
"loss": 0.885,
"step": 45200
},
{
"epoch": 0.72352,
"grad_norm": 0.19351425766944885,
"learning_rate": 5.5350400000000005e-05,
"loss": 0.8984,
"step": 45220
},
{
"epoch": 0.72384,
"grad_norm": 0.2292325645685196,
"learning_rate": 5.5286400000000007e-05,
"loss": 0.8794,
"step": 45240
},
{
"epoch": 0.72416,
"grad_norm": 0.20458444952964783,
"learning_rate": 5.522240000000001e-05,
"loss": 0.8585,
"step": 45260
},
{
"epoch": 0.72448,
"grad_norm": 0.22770562767982483,
"learning_rate": 5.51584e-05,
"loss": 0.9062,
"step": 45280
},
{
"epoch": 0.7248,
"grad_norm": 0.21661782264709473,
"learning_rate": 5.50944e-05,
"loss": 0.8842,
"step": 45300
},
{
"epoch": 0.72512,
"grad_norm": 0.19377048313617706,
"learning_rate": 5.50304e-05,
"loss": 0.8405,
"step": 45320
},
{
"epoch": 0.72544,
"grad_norm": 0.2309509813785553,
"learning_rate": 5.496640000000001e-05,
"loss": 0.8799,
"step": 45340
},
{
"epoch": 0.72576,
"grad_norm": 0.18839353322982788,
"learning_rate": 5.49024e-05,
"loss": 0.878,
"step": 45360
},
{
"epoch": 0.72608,
"grad_norm": 0.248517245054245,
"learning_rate": 5.48384e-05,
"loss": 0.9094,
"step": 45380
},
{
"epoch": 0.7264,
"grad_norm": 0.21810860931873322,
"learning_rate": 5.47744e-05,
"loss": 0.8679,
"step": 45400
},
{
"epoch": 0.72672,
"grad_norm": 0.2429954707622528,
"learning_rate": 5.4710400000000004e-05,
"loss": 0.8592,
"step": 45420
},
{
"epoch": 0.72704,
"grad_norm": 0.20929422974586487,
"learning_rate": 5.46464e-05,
"loss": 0.9011,
"step": 45440
},
{
"epoch": 0.72736,
"grad_norm": 0.2323046624660492,
"learning_rate": 5.45824e-05,
"loss": 0.8614,
"step": 45460
},
{
"epoch": 0.72768,
"grad_norm": 0.22738327085971832,
"learning_rate": 5.45184e-05,
"loss": 0.8795,
"step": 45480
},
{
"epoch": 0.728,
"grad_norm": 0.2241695523262024,
"learning_rate": 5.4454400000000005e-05,
"loss": 0.9009,
"step": 45500
},
{
"epoch": 0.72832,
"grad_norm": 0.21020178496837616,
"learning_rate": 5.439040000000001e-05,
"loss": 0.8568,
"step": 45520
},
{
"epoch": 0.72864,
"grad_norm": 0.24524196982383728,
"learning_rate": 5.43264e-05,
"loss": 0.8667,
"step": 45540
},
{
"epoch": 0.72896,
"grad_norm": 0.2374972701072693,
"learning_rate": 5.4262400000000004e-05,
"loss": 0.9086,
"step": 45560
},
{
"epoch": 0.72928,
"grad_norm": 0.2362067550420761,
"learning_rate": 5.4198400000000006e-05,
"loss": 0.876,
"step": 45580
},
{
"epoch": 0.7296,
"grad_norm": 0.21441881358623505,
"learning_rate": 5.413440000000001e-05,
"loss": 0.8941,
"step": 45600
},
{
"epoch": 0.72992,
"grad_norm": 0.22504673898220062,
"learning_rate": 5.4070399999999996e-05,
"loss": 0.9268,
"step": 45620
},
{
"epoch": 0.73024,
"grad_norm": 0.20583686232566833,
"learning_rate": 5.40064e-05,
"loss": 0.8298,
"step": 45640
},
{
"epoch": 0.73056,
"grad_norm": 0.21706163883209229,
"learning_rate": 5.394240000000001e-05,
"loss": 0.8833,
"step": 45660
},
{
"epoch": 0.73088,
"grad_norm": 0.202799990773201,
"learning_rate": 5.387840000000001e-05,
"loss": 0.8792,
"step": 45680
},
{
"epoch": 0.7312,
"grad_norm": 0.2602541446685791,
"learning_rate": 5.38144e-05,
"loss": 0.9037,
"step": 45700
},
{
"epoch": 0.73152,
"grad_norm": 0.22036013007164001,
"learning_rate": 5.37504e-05,
"loss": 0.8917,
"step": 45720
},
{
"epoch": 0.73184,
"grad_norm": 0.22023898363113403,
"learning_rate": 5.36864e-05,
"loss": 0.8671,
"step": 45740
},
{
"epoch": 0.73216,
"grad_norm": 0.23420779407024384,
"learning_rate": 5.3622400000000003e-05,
"loss": 0.9288,
"step": 45760
},
{
"epoch": 0.73248,
"grad_norm": 0.20039279758930206,
"learning_rate": 5.35584e-05,
"loss": 0.8755,
"step": 45780
},
{
"epoch": 0.7328,
"grad_norm": 0.2586964964866638,
"learning_rate": 5.34944e-05,
"loss": 0.8896,
"step": 45800
},
{
"epoch": 0.73312,
"grad_norm": 0.2525421380996704,
"learning_rate": 5.34304e-05,
"loss": 0.8514,
"step": 45820
},
{
"epoch": 0.73344,
"grad_norm": 0.2144252359867096,
"learning_rate": 5.3366400000000005e-05,
"loss": 0.9074,
"step": 45840
},
{
"epoch": 0.73376,
"grad_norm": 0.21878720819950104,
"learning_rate": 5.3302400000000007e-05,
"loss": 0.9488,
"step": 45860
},
{
"epoch": 0.73408,
"grad_norm": 0.24089403450489044,
"learning_rate": 5.32384e-05,
"loss": 0.9008,
"step": 45880
},
{
"epoch": 0.7344,
"grad_norm": 0.25092679262161255,
"learning_rate": 5.3174400000000004e-05,
"loss": 0.8905,
"step": 45900
},
{
"epoch": 0.73472,
"grad_norm": 0.24005566537380219,
"learning_rate": 5.3110400000000006e-05,
"loss": 0.8812,
"step": 45920
},
{
"epoch": 0.73504,
"grad_norm": 0.2381397932767868,
"learning_rate": 5.304640000000001e-05,
"loss": 0.8944,
"step": 45940
},
{
"epoch": 0.73536,
"grad_norm": 0.23799841105937958,
"learning_rate": 5.2982399999999996e-05,
"loss": 0.8684,
"step": 45960
},
{
"epoch": 0.73568,
"grad_norm": 0.22827275097370148,
"learning_rate": 5.29184e-05,
"loss": 0.8657,
"step": 45980
},
{
"epoch": 0.736,
"grad_norm": 0.24510063230991364,
"learning_rate": 5.285440000000001e-05,
"loss": 0.9598,
"step": 46000
},
{
"epoch": 0.73632,
"grad_norm": 0.22655485570430756,
"learning_rate": 5.279040000000001e-05,
"loss": 0.8511,
"step": 46020
},
{
"epoch": 0.73664,
"grad_norm": 0.23105552792549133,
"learning_rate": 5.27264e-05,
"loss": 0.8809,
"step": 46040
},
{
"epoch": 0.73696,
"grad_norm": 0.21182331442832947,
"learning_rate": 5.26624e-05,
"loss": 0.8786,
"step": 46060
},
{
"epoch": 0.73728,
"grad_norm": 0.2535363435745239,
"learning_rate": 5.25984e-05,
"loss": 0.8697,
"step": 46080
},
{
"epoch": 0.7376,
"grad_norm": 0.2286081463098526,
"learning_rate": 5.25344e-05,
"loss": 0.8694,
"step": 46100
},
{
"epoch": 0.73792,
"grad_norm": 0.2347458302974701,
"learning_rate": 5.24704e-05,
"loss": 0.9251,
"step": 46120
},
{
"epoch": 0.73824,
"grad_norm": 0.21052898466587067,
"learning_rate": 5.24064e-05,
"loss": 0.8944,
"step": 46140
},
{
"epoch": 0.73856,
"grad_norm": 0.23154202103614807,
"learning_rate": 5.23424e-05,
"loss": 0.8367,
"step": 46160
},
{
"epoch": 0.73888,
"grad_norm": 0.23162192106246948,
"learning_rate": 5.2278400000000004e-05,
"loss": 0.8762,
"step": 46180
},
{
"epoch": 0.7392,
"grad_norm": 0.2610846757888794,
"learning_rate": 5.2214400000000006e-05,
"loss": 0.9275,
"step": 46200
},
{
"epoch": 0.73952,
"grad_norm": 0.20983512699604034,
"learning_rate": 5.21504e-05,
"loss": 0.8411,
"step": 46220
},
{
"epoch": 0.73984,
"grad_norm": 0.27023133635520935,
"learning_rate": 5.20864e-05,
"loss": 0.8825,
"step": 46240
},
{
"epoch": 0.74016,
"grad_norm": 0.22272150218486786,
"learning_rate": 5.2022400000000005e-05,
"loss": 0.8826,
"step": 46260
},
{
"epoch": 0.74048,
"grad_norm": 0.22320957481861115,
"learning_rate": 5.195840000000001e-05,
"loss": 0.8204,
"step": 46280
},
{
"epoch": 0.7408,
"grad_norm": 0.20854775607585907,
"learning_rate": 5.1894399999999996e-05,
"loss": 0.8643,
"step": 46300
},
{
"epoch": 0.74112,
"grad_norm": 0.23853574693202972,
"learning_rate": 5.1830400000000004e-05,
"loss": 0.8504,
"step": 46320
},
{
"epoch": 0.74144,
"grad_norm": 0.2031133770942688,
"learning_rate": 5.1766400000000006e-05,
"loss": 0.8798,
"step": 46340
},
{
"epoch": 0.74176,
"grad_norm": 0.23090733587741852,
"learning_rate": 5.170240000000001e-05,
"loss": 0.8771,
"step": 46360
},
{
"epoch": 0.74208,
"grad_norm": 0.22893227636814117,
"learning_rate": 5.16384e-05,
"loss": 0.9161,
"step": 46380
},
{
"epoch": 0.7424,
"grad_norm": 0.24600179493427277,
"learning_rate": 5.15744e-05,
"loss": 0.9158,
"step": 46400
},
{
"epoch": 0.74272,
"grad_norm": 0.22234416007995605,
"learning_rate": 5.15104e-05,
"loss": 0.9224,
"step": 46420
},
{
"epoch": 0.74304,
"grad_norm": 0.22974424064159393,
"learning_rate": 5.14464e-05,
"loss": 0.8871,
"step": 46440
},
{
"epoch": 0.74336,
"grad_norm": 0.2495729774236679,
"learning_rate": 5.13824e-05,
"loss": 0.9271,
"step": 46460
},
{
"epoch": 0.74368,
"grad_norm": 0.2178795486688614,
"learning_rate": 5.13184e-05,
"loss": 0.86,
"step": 46480
},
{
"epoch": 0.744,
"grad_norm": 0.2375311255455017,
"learning_rate": 5.12544e-05,
"loss": 0.8524,
"step": 46500
},
{
"epoch": 0.74432,
"grad_norm": 0.21281583607196808,
"learning_rate": 5.1190400000000004e-05,
"loss": 0.9261,
"step": 46520
},
{
"epoch": 0.74464,
"grad_norm": 0.26535019278526306,
"learning_rate": 5.1126400000000006e-05,
"loss": 0.9067,
"step": 46540
},
{
"epoch": 0.74496,
"grad_norm": 0.1832839846611023,
"learning_rate": 5.10624e-05,
"loss": 0.8801,
"step": 46560
},
{
"epoch": 0.74528,
"grad_norm": 0.21736547350883484,
"learning_rate": 5.09984e-05,
"loss": 0.8953,
"step": 46580
},
{
"epoch": 0.7456,
"grad_norm": 0.2637736201286316,
"learning_rate": 5.0934400000000005e-05,
"loss": 0.8551,
"step": 46600
},
{
"epoch": 0.74592,
"grad_norm": 0.19663706421852112,
"learning_rate": 5.087040000000001e-05,
"loss": 0.9004,
"step": 46620
},
{
"epoch": 0.74624,
"grad_norm": 0.21443675458431244,
"learning_rate": 5.0806399999999995e-05,
"loss": 0.8947,
"step": 46640
},
{
"epoch": 0.74656,
"grad_norm": 0.2313489317893982,
"learning_rate": 5.0742400000000004e-05,
"loss": 0.8798,
"step": 46660
},
{
"epoch": 0.74688,
"grad_norm": 0.2411520630121231,
"learning_rate": 5.0678400000000006e-05,
"loss": 0.8833,
"step": 46680
},
{
"epoch": 0.7472,
"grad_norm": 0.24178458750247955,
"learning_rate": 5.061440000000001e-05,
"loss": 0.8768,
"step": 46700
},
{
"epoch": 0.74752,
"grad_norm": 0.24031583964824677,
"learning_rate": 5.0550399999999996e-05,
"loss": 0.9053,
"step": 46720
},
{
"epoch": 0.74784,
"grad_norm": 0.24462060630321503,
"learning_rate": 5.04864e-05,
"loss": 0.8677,
"step": 46740
},
{
"epoch": 0.74816,
"grad_norm": 0.18988333642482758,
"learning_rate": 5.04224e-05,
"loss": 0.877,
"step": 46760
},
{
"epoch": 0.74848,
"grad_norm": 0.23754200339317322,
"learning_rate": 5.03584e-05,
"loss": 0.8808,
"step": 46780
},
{
"epoch": 0.7488,
"grad_norm": 0.2371503710746765,
"learning_rate": 5.02944e-05,
"loss": 0.9283,
"step": 46800
},
{
"epoch": 0.74912,
"grad_norm": 0.21101176738739014,
"learning_rate": 5.02304e-05,
"loss": 0.8769,
"step": 46820
},
{
"epoch": 0.74944,
"grad_norm": 0.23707903921604156,
"learning_rate": 5.01664e-05,
"loss": 0.876,
"step": 46840
},
{
"epoch": 0.74976,
"grad_norm": 0.25081855058670044,
"learning_rate": 5.01024e-05,
"loss": 0.8429,
"step": 46860
},
{
"epoch": 0.75008,
"grad_norm": 0.2517668604850769,
"learning_rate": 5.0038400000000005e-05,
"loss": 0.8936,
"step": 46880
},
{
"epoch": 0.7504,
"grad_norm": 0.2321518510580063,
"learning_rate": 4.997440000000001e-05,
"loss": 0.9427,
"step": 46900
},
{
"epoch": 0.75072,
"grad_norm": 0.3038017749786377,
"learning_rate": 4.99104e-05,
"loss": 0.9116,
"step": 46920
},
{
"epoch": 0.75104,
"grad_norm": 0.22047431766986847,
"learning_rate": 4.9846400000000004e-05,
"loss": 0.9356,
"step": 46940
},
{
"epoch": 0.75136,
"grad_norm": 0.2446911334991455,
"learning_rate": 4.97824e-05,
"loss": 0.8855,
"step": 46960
},
{
"epoch": 0.75168,
"grad_norm": 0.23208874464035034,
"learning_rate": 4.97184e-05,
"loss": 0.9064,
"step": 46980
},
{
"epoch": 0.752,
"grad_norm": 0.22263742983341217,
"learning_rate": 4.9654400000000004e-05,
"loss": 0.9262,
"step": 47000
},
{
"epoch": 0.75232,
"grad_norm": 0.24141012132167816,
"learning_rate": 4.9590400000000006e-05,
"loss": 0.8755,
"step": 47020
},
{
"epoch": 0.75264,
"grad_norm": 0.21403875946998596,
"learning_rate": 4.95264e-05,
"loss": 0.9046,
"step": 47040
},
{
"epoch": 0.75296,
"grad_norm": 0.22028857469558716,
"learning_rate": 4.94624e-05,
"loss": 0.8604,
"step": 47060
},
{
"epoch": 0.75328,
"grad_norm": 0.23686060309410095,
"learning_rate": 4.93984e-05,
"loss": 0.9034,
"step": 47080
},
{
"epoch": 0.7536,
"grad_norm": 0.21621714532375336,
"learning_rate": 4.93344e-05,
"loss": 0.8753,
"step": 47100
},
{
"epoch": 0.75392,
"grad_norm": 0.19985179603099823,
"learning_rate": 4.92704e-05,
"loss": 0.8533,
"step": 47120
},
{
"epoch": 0.75424,
"grad_norm": 0.25167474150657654,
"learning_rate": 4.9206400000000004e-05,
"loss": 0.899,
"step": 47140
},
{
"epoch": 0.75456,
"grad_norm": 0.22282272577285767,
"learning_rate": 4.9142400000000006e-05,
"loss": 0.8312,
"step": 47160
},
{
"epoch": 0.75488,
"grad_norm": 0.219001904129982,
"learning_rate": 4.90784e-05,
"loss": 0.9131,
"step": 47180
},
{
"epoch": 0.7552,
"grad_norm": 0.244069442152977,
"learning_rate": 4.90144e-05,
"loss": 0.8767,
"step": 47200
},
{
"epoch": 0.75552,
"grad_norm": 0.2010125070810318,
"learning_rate": 4.8950400000000005e-05,
"loss": 0.8617,
"step": 47220
},
{
"epoch": 0.75584,
"grad_norm": 0.19826588034629822,
"learning_rate": 4.888640000000001e-05,
"loss": 0.8655,
"step": 47240
},
{
"epoch": 0.75616,
"grad_norm": 0.2718552052974701,
"learning_rate": 4.88224e-05,
"loss": 0.8918,
"step": 47260
},
{
"epoch": 0.75648,
"grad_norm": 0.1939408779144287,
"learning_rate": 4.8758400000000004e-05,
"loss": 0.8851,
"step": 47280
},
{
"epoch": 0.7568,
"grad_norm": 0.2180645763874054,
"learning_rate": 4.86944e-05,
"loss": 0.8959,
"step": 47300
},
{
"epoch": 0.75712,
"grad_norm": 0.18682503700256348,
"learning_rate": 4.86304e-05,
"loss": 0.8892,
"step": 47320
},
{
"epoch": 0.75744,
"grad_norm": 0.24464449286460876,
"learning_rate": 4.85664e-05,
"loss": 0.8513,
"step": 47340
},
{
"epoch": 0.75776,
"grad_norm": 0.22836542129516602,
"learning_rate": 4.8502400000000005e-05,
"loss": 0.7955,
"step": 47360
},
{
"epoch": 0.75808,
"grad_norm": 0.236654594540596,
"learning_rate": 4.84384e-05,
"loss": 0.9108,
"step": 47380
},
{
"epoch": 0.7584,
"grad_norm": 0.23842214047908783,
"learning_rate": 4.83744e-05,
"loss": 0.9084,
"step": 47400
},
{
"epoch": 0.75872,
"grad_norm": 0.26700836420059204,
"learning_rate": 4.83104e-05,
"loss": 0.8792,
"step": 47420
},
{
"epoch": 0.75904,
"grad_norm": 0.19707651436328888,
"learning_rate": 4.82464e-05,
"loss": 0.885,
"step": 47440
},
{
"epoch": 0.75936,
"grad_norm": 0.2712419927120209,
"learning_rate": 4.81824e-05,
"loss": 0.891,
"step": 47460
},
{
"epoch": 0.75968,
"grad_norm": 0.22818304598331451,
"learning_rate": 4.81184e-05,
"loss": 0.8668,
"step": 47480
},
{
"epoch": 0.76,
"grad_norm": 0.24705687165260315,
"learning_rate": 4.8054400000000005e-05,
"loss": 0.853,
"step": 47500
},
{
"epoch": 0.76032,
"grad_norm": 0.2136003077030182,
"learning_rate": 4.79904e-05,
"loss": 0.8795,
"step": 47520
},
{
"epoch": 0.76064,
"grad_norm": 0.22492119669914246,
"learning_rate": 4.79264e-05,
"loss": 0.8962,
"step": 47540
},
{
"epoch": 0.76096,
"grad_norm": 0.21469563245773315,
"learning_rate": 4.7862400000000004e-05,
"loss": 0.8804,
"step": 47560
},
{
"epoch": 0.76128,
"grad_norm": 0.229572594165802,
"learning_rate": 4.7798400000000006e-05,
"loss": 0.898,
"step": 47580
},
{
"epoch": 0.7616,
"grad_norm": 0.2185087352991104,
"learning_rate": 4.77344e-05,
"loss": 0.9312,
"step": 47600
},
{
"epoch": 0.76192,
"grad_norm": 0.24852368235588074,
"learning_rate": 4.7670400000000004e-05,
"loss": 0.8916,
"step": 47620
},
{
"epoch": 0.76224,
"grad_norm": 0.20700128376483917,
"learning_rate": 4.76064e-05,
"loss": 0.8553,
"step": 47640
},
{
"epoch": 0.76256,
"grad_norm": 0.1880226880311966,
"learning_rate": 4.75424e-05,
"loss": 0.8134,
"step": 47660
},
{
"epoch": 0.76288,
"grad_norm": 0.24719256162643433,
"learning_rate": 4.74784e-05,
"loss": 0.9169,
"step": 47680
},
{
"epoch": 0.7632,
"grad_norm": 0.2389199137687683,
"learning_rate": 4.7414400000000005e-05,
"loss": 0.8902,
"step": 47700
},
{
"epoch": 0.76352,
"grad_norm": 0.26046285033226013,
"learning_rate": 4.73504e-05,
"loss": 0.9075,
"step": 47720
},
{
"epoch": 0.76384,
"grad_norm": 0.2406904399394989,
"learning_rate": 4.72864e-05,
"loss": 0.8481,
"step": 47740
},
{
"epoch": 0.76416,
"grad_norm": 0.24346570670604706,
"learning_rate": 4.72224e-05,
"loss": 0.8482,
"step": 47760
},
{
"epoch": 0.76448,
"grad_norm": 0.2557404935359955,
"learning_rate": 4.7158400000000006e-05,
"loss": 0.9096,
"step": 47780
},
{
"epoch": 0.7648,
"grad_norm": 0.22144544124603271,
"learning_rate": 4.70944e-05,
"loss": 0.8489,
"step": 47800
},
{
"epoch": 0.76512,
"grad_norm": 0.2356208860874176,
"learning_rate": 4.70304e-05,
"loss": 0.8703,
"step": 47820
},
{
"epoch": 0.76544,
"grad_norm": 0.20454536378383636,
"learning_rate": 4.6966400000000005e-05,
"loss": 0.9135,
"step": 47840
},
{
"epoch": 0.76576,
"grad_norm": 0.2013743370771408,
"learning_rate": 4.69024e-05,
"loss": 0.8641,
"step": 47860
},
{
"epoch": 0.76608,
"grad_norm": 0.20594638586044312,
"learning_rate": 4.68384e-05,
"loss": 0.9012,
"step": 47880
},
{
"epoch": 0.7664,
"grad_norm": 0.23454588651657104,
"learning_rate": 4.6774400000000004e-05,
"loss": 0.8529,
"step": 47900
},
{
"epoch": 0.76672,
"grad_norm": 0.2404514104127884,
"learning_rate": 4.6710400000000006e-05,
"loss": 0.9,
"step": 47920
},
{
"epoch": 0.76704,
"grad_norm": 0.21869786083698273,
"learning_rate": 4.66496e-05,
"loss": 0.8843,
"step": 47940
},
{
"epoch": 0.76736,
"grad_norm": 0.228584423661232,
"learning_rate": 4.65856e-05,
"loss": 0.935,
"step": 47960
},
{
"epoch": 0.76768,
"grad_norm": 0.2123897522687912,
"learning_rate": 4.6521600000000005e-05,
"loss": 0.8897,
"step": 47980
},
{
"epoch": 0.768,
"grad_norm": 0.27578243613243103,
"learning_rate": 4.64576e-05,
"loss": 0.8803,
"step": 48000
},
{
"epoch": 0.76832,
"grad_norm": 0.2598460614681244,
"learning_rate": 4.63936e-05,
"loss": 0.8164,
"step": 48020
},
{
"epoch": 0.76864,
"grad_norm": 0.21342791616916656,
"learning_rate": 4.63296e-05,
"loss": 0.9123,
"step": 48040
},
{
"epoch": 0.76896,
"grad_norm": 0.2282058596611023,
"learning_rate": 4.6265600000000006e-05,
"loss": 0.8956,
"step": 48060
},
{
"epoch": 0.76928,
"grad_norm": 0.21980886161327362,
"learning_rate": 4.62016e-05,
"loss": 0.8567,
"step": 48080
},
{
"epoch": 0.7696,
"grad_norm": 0.24570724368095398,
"learning_rate": 4.6137600000000004e-05,
"loss": 0.9293,
"step": 48100
},
{
"epoch": 0.76992,
"grad_norm": 0.21538405120372772,
"learning_rate": 4.60736e-05,
"loss": 0.8453,
"step": 48120
},
{
"epoch": 0.77024,
"grad_norm": 0.19840775430202484,
"learning_rate": 4.60096e-05,
"loss": 0.8911,
"step": 48140
},
{
"epoch": 0.77056,
"grad_norm": 0.24362660944461823,
"learning_rate": 4.59456e-05,
"loss": 0.8839,
"step": 48160
},
{
"epoch": 0.77088,
"grad_norm": 0.23664100468158722,
"learning_rate": 4.5881600000000005e-05,
"loss": 0.9147,
"step": 48180
},
{
"epoch": 0.7712,
"grad_norm": 0.22470878064632416,
"learning_rate": 4.581760000000001e-05,
"loss": 0.9081,
"step": 48200
},
{
"epoch": 0.77152,
"grad_norm": 0.25879278779029846,
"learning_rate": 4.57536e-05,
"loss": 0.8678,
"step": 48220
},
{
"epoch": 0.77184,
"grad_norm": 0.22820644080638885,
"learning_rate": 4.5689600000000004e-05,
"loss": 0.8437,
"step": 48240
},
{
"epoch": 0.77216,
"grad_norm": 0.24052444100379944,
"learning_rate": 4.56256e-05,
"loss": 0.8222,
"step": 48260
},
{
"epoch": 0.77248,
"grad_norm": 0.2304847240447998,
"learning_rate": 4.55616e-05,
"loss": 0.9303,
"step": 48280
},
{
"epoch": 0.7728,
"grad_norm": 0.2518431544303894,
"learning_rate": 4.54976e-05,
"loss": 0.8785,
"step": 48300
},
{
"epoch": 0.77312,
"grad_norm": 0.2376391738653183,
"learning_rate": 4.5433600000000005e-05,
"loss": 0.8469,
"step": 48320
},
{
"epoch": 0.77344,
"grad_norm": 0.24182195961475372,
"learning_rate": 4.53696e-05,
"loss": 0.916,
"step": 48340
},
{
"epoch": 0.77376,
"grad_norm": 0.25106081366539,
"learning_rate": 4.53056e-05,
"loss": 0.8624,
"step": 48360
},
{
"epoch": 0.77408,
"grad_norm": 0.22700931131839752,
"learning_rate": 4.52416e-05,
"loss": 0.9024,
"step": 48380
},
{
"epoch": 0.7744,
"grad_norm": 0.21767041087150574,
"learning_rate": 4.5177600000000006e-05,
"loss": 0.9082,
"step": 48400
},
{
"epoch": 0.77472,
"grad_norm": 0.2539537250995636,
"learning_rate": 4.51136e-05,
"loss": 0.8893,
"step": 48420
},
{
"epoch": 0.77504,
"grad_norm": 0.27352043986320496,
"learning_rate": 4.50496e-05,
"loss": 0.8678,
"step": 48440
},
{
"epoch": 0.77536,
"grad_norm": 0.22831988334655762,
"learning_rate": 4.49856e-05,
"loss": 0.8654,
"step": 48460
},
{
"epoch": 0.77568,
"grad_norm": 0.24554172158241272,
"learning_rate": 4.49216e-05,
"loss": 0.8604,
"step": 48480
},
{
"epoch": 0.776,
"grad_norm": 0.22556883096694946,
"learning_rate": 4.48576e-05,
"loss": 0.9112,
"step": 48500
},
{
"epoch": 0.77632,
"grad_norm": 0.22238677740097046,
"learning_rate": 4.4793600000000004e-05,
"loss": 0.8963,
"step": 48520
},
{
"epoch": 0.77664,
"grad_norm": 0.18963344395160675,
"learning_rate": 4.4729600000000006e-05,
"loss": 0.8464,
"step": 48540
},
{
"epoch": 0.77696,
"grad_norm": 0.21553830802440643,
"learning_rate": 4.46656e-05,
"loss": 0.8701,
"step": 48560
},
{
"epoch": 0.77728,
"grad_norm": 0.25254547595977783,
"learning_rate": 4.4601600000000003e-05,
"loss": 0.8503,
"step": 48580
},
{
"epoch": 0.7776,
"grad_norm": 0.21796059608459473,
"learning_rate": 4.45376e-05,
"loss": 0.8817,
"step": 48600
},
{
"epoch": 0.77792,
"grad_norm": 0.19532719254493713,
"learning_rate": 4.447360000000001e-05,
"loss": 0.9274,
"step": 48620
},
{
"epoch": 0.77824,
"grad_norm": 0.19615907967090607,
"learning_rate": 4.44096e-05,
"loss": 0.8781,
"step": 48640
},
{
"epoch": 0.77856,
"grad_norm": 0.20943795144557953,
"learning_rate": 4.4345600000000004e-05,
"loss": 0.8958,
"step": 48660
},
{
"epoch": 0.77888,
"grad_norm": 0.1854809671640396,
"learning_rate": 4.42816e-05,
"loss": 0.8712,
"step": 48680
},
{
"epoch": 0.7792,
"grad_norm": 0.23485055565834045,
"learning_rate": 4.42208e-05,
"loss": 0.8504,
"step": 48700
},
{
"epoch": 0.77952,
"grad_norm": 0.2727571725845337,
"learning_rate": 4.4156800000000004e-05,
"loss": 0.8882,
"step": 48720
},
{
"epoch": 0.77984,
"grad_norm": 0.2016323059797287,
"learning_rate": 4.40928e-05,
"loss": 0.8817,
"step": 48740
},
{
"epoch": 0.78016,
"grad_norm": 0.22555996477603912,
"learning_rate": 4.40288e-05,
"loss": 0.8778,
"step": 48760
},
{
"epoch": 0.78048,
"grad_norm": 0.19512006640434265,
"learning_rate": 4.39648e-05,
"loss": 0.8641,
"step": 48780
},
{
"epoch": 0.7808,
"grad_norm": 0.21679182350635529,
"learning_rate": 4.3900800000000005e-05,
"loss": 0.8819,
"step": 48800
},
{
"epoch": 0.78112,
"grad_norm": 0.23714877665042877,
"learning_rate": 4.38368e-05,
"loss": 0.8771,
"step": 48820
},
{
"epoch": 0.78144,
"grad_norm": 0.2215253859758377,
"learning_rate": 4.37728e-05,
"loss": 0.8592,
"step": 48840
},
{
"epoch": 0.78176,
"grad_norm": 0.21237672865390778,
"learning_rate": 4.37088e-05,
"loss": 0.8735,
"step": 48860
},
{
"epoch": 0.78208,
"grad_norm": 0.24682950973510742,
"learning_rate": 4.36448e-05,
"loss": 0.8988,
"step": 48880
},
{
"epoch": 0.7824,
"grad_norm": 0.27274882793426514,
"learning_rate": 4.35808e-05,
"loss": 0.9076,
"step": 48900
},
{
"epoch": 0.78272,
"grad_norm": 0.20632825791835785,
"learning_rate": 4.35168e-05,
"loss": 0.8541,
"step": 48920
},
{
"epoch": 0.78304,
"grad_norm": 0.21251200139522552,
"learning_rate": 4.3452800000000005e-05,
"loss": 0.8965,
"step": 48940
},
{
"epoch": 0.78336,
"grad_norm": 0.2018088847398758,
"learning_rate": 4.33888e-05,
"loss": 0.8742,
"step": 48960
},
{
"epoch": 0.78368,
"grad_norm": 0.24776096642017365,
"learning_rate": 4.33248e-05,
"loss": 0.8693,
"step": 48980
},
{
"epoch": 0.784,
"grad_norm": 0.24149677157402039,
"learning_rate": 4.3260800000000004e-05,
"loss": 0.883,
"step": 49000
},
{
"epoch": 0.78432,
"grad_norm": 0.2117341160774231,
"learning_rate": 4.3196800000000006e-05,
"loss": 0.8831,
"step": 49020
},
{
"epoch": 0.78464,
"grad_norm": 0.25594037771224976,
"learning_rate": 4.31328e-05,
"loss": 0.9239,
"step": 49040
},
{
"epoch": 0.78496,
"grad_norm": 0.20546288788318634,
"learning_rate": 4.3068800000000003e-05,
"loss": 0.8446,
"step": 49060
},
{
"epoch": 0.78528,
"grad_norm": 0.23239131271839142,
"learning_rate": 4.30048e-05,
"loss": 0.8878,
"step": 49080
},
{
"epoch": 0.7856,
"grad_norm": 0.24074342846870422,
"learning_rate": 4.29408e-05,
"loss": 0.9033,
"step": 49100
},
{
"epoch": 0.78592,
"grad_norm": 0.24424532055854797,
"learning_rate": 4.28768e-05,
"loss": 0.9051,
"step": 49120
},
{
"epoch": 0.78624,
"grad_norm": 0.23116187751293182,
"learning_rate": 4.2812800000000005e-05,
"loss": 0.911,
"step": 49140
},
{
"epoch": 0.78656,
"grad_norm": 0.2513030171394348,
"learning_rate": 4.27488e-05,
"loss": 0.8569,
"step": 49160
},
{
"epoch": 0.78688,
"grad_norm": 0.2296024113893509,
"learning_rate": 4.26848e-05,
"loss": 0.8666,
"step": 49180
},
{
"epoch": 0.7872,
"grad_norm": 0.2069111168384552,
"learning_rate": 4.26208e-05,
"loss": 0.9025,
"step": 49200
},
{
"epoch": 0.78752,
"grad_norm": 0.21525107324123383,
"learning_rate": 4.25568e-05,
"loss": 0.8218,
"step": 49220
},
{
"epoch": 0.78784,
"grad_norm": 0.21345154941082,
"learning_rate": 4.24928e-05,
"loss": 0.8456,
"step": 49240
},
{
"epoch": 0.78816,
"grad_norm": 0.25389420986175537,
"learning_rate": 4.24288e-05,
"loss": 0.842,
"step": 49260
},
{
"epoch": 0.78848,
"grad_norm": 0.2326725423336029,
"learning_rate": 4.2364800000000005e-05,
"loss": 0.8504,
"step": 49280
},
{
"epoch": 0.7888,
"grad_norm": 0.21930308640003204,
"learning_rate": 4.23008e-05,
"loss": 0.8697,
"step": 49300
},
{
"epoch": 0.78912,
"grad_norm": 0.23466825485229492,
"learning_rate": 4.22368e-05,
"loss": 0.9128,
"step": 49320
},
{
"epoch": 0.78944,
"grad_norm": 0.24129875004291534,
"learning_rate": 4.2172800000000004e-05,
"loss": 0.8499,
"step": 49340
},
{
"epoch": 0.78976,
"grad_norm": 0.17660856246948242,
"learning_rate": 4.2108800000000006e-05,
"loss": 0.8559,
"step": 49360
},
{
"epoch": 0.79008,
"grad_norm": 0.24038086831569672,
"learning_rate": 4.20448e-05,
"loss": 0.8669,
"step": 49380
},
{
"epoch": 0.7904,
"grad_norm": 0.23702336847782135,
"learning_rate": 4.19808e-05,
"loss": 0.846,
"step": 49400
},
{
"epoch": 0.79072,
"grad_norm": 0.2305484116077423,
"learning_rate": 4.19168e-05,
"loss": 0.8462,
"step": 49420
},
{
"epoch": 0.79104,
"grad_norm": 0.24989739060401917,
"learning_rate": 4.18528e-05,
"loss": 0.9404,
"step": 49440
},
{
"epoch": 0.79136,
"grad_norm": 0.23767246305942535,
"learning_rate": 4.17888e-05,
"loss": 0.9021,
"step": 49460
},
{
"epoch": 0.79168,
"grad_norm": 0.244027242064476,
"learning_rate": 4.1724800000000004e-05,
"loss": 0.8757,
"step": 49480
},
{
"epoch": 0.792,
"grad_norm": 0.21049901843070984,
"learning_rate": 4.16608e-05,
"loss": 0.8554,
"step": 49500
},
{
"epoch": 0.79232,
"grad_norm": 0.2375907301902771,
"learning_rate": 4.15968e-05,
"loss": 0.8256,
"step": 49520
},
{
"epoch": 0.79264,
"grad_norm": 0.23760604858398438,
"learning_rate": 4.1532799999999996e-05,
"loss": 0.898,
"step": 49540
},
{
"epoch": 0.79296,
"grad_norm": 0.16031509637832642,
"learning_rate": 4.1468800000000005e-05,
"loss": 0.8516,
"step": 49560
},
{
"epoch": 0.79328,
"grad_norm": 0.21544058620929718,
"learning_rate": 4.140480000000001e-05,
"loss": 0.857,
"step": 49580
},
{
"epoch": 0.7936,
"grad_norm": 0.23034314811229706,
"learning_rate": 4.13408e-05,
"loss": 0.8784,
"step": 49600
},
{
"epoch": 0.79392,
"grad_norm": 0.23492272198200226,
"learning_rate": 4.1276800000000004e-05,
"loss": 0.9037,
"step": 49620
},
{
"epoch": 0.79424,
"grad_norm": 0.2450007051229477,
"learning_rate": 4.12128e-05,
"loss": 0.8969,
"step": 49640
},
{
"epoch": 0.79456,
"grad_norm": 0.21207576990127563,
"learning_rate": 4.11488e-05,
"loss": 0.8492,
"step": 49660
},
{
"epoch": 0.79488,
"grad_norm": 0.21204914152622223,
"learning_rate": 4.1084800000000003e-05,
"loss": 0.9025,
"step": 49680
},
{
"epoch": 0.7952,
"grad_norm": 0.2355094999074936,
"learning_rate": 4.1020800000000005e-05,
"loss": 0.8533,
"step": 49700
},
{
"epoch": 0.79552,
"grad_norm": 0.21224915981292725,
"learning_rate": 4.09568e-05,
"loss": 0.8374,
"step": 49720
},
{
"epoch": 0.79584,
"grad_norm": 0.2114105373620987,
"learning_rate": 4.08928e-05,
"loss": 0.8823,
"step": 49740
},
{
"epoch": 0.79616,
"grad_norm": 0.3748084306716919,
"learning_rate": 4.08288e-05,
"loss": 0.8607,
"step": 49760
},
{
"epoch": 0.79648,
"grad_norm": 0.2285369336605072,
"learning_rate": 4.07648e-05,
"loss": 0.8654,
"step": 49780
},
{
"epoch": 0.7968,
"grad_norm": 0.17516812682151794,
"learning_rate": 4.07008e-05,
"loss": 0.8728,
"step": 49800
},
{
"epoch": 0.79712,
"grad_norm": 0.25664079189300537,
"learning_rate": 4.0636800000000004e-05,
"loss": 0.9342,
"step": 49820
},
{
"epoch": 0.79744,
"grad_norm": 0.2027619630098343,
"learning_rate": 4.05728e-05,
"loss": 0.8644,
"step": 49840
},
{
"epoch": 0.79776,
"grad_norm": 0.22048500180244446,
"learning_rate": 4.05088e-05,
"loss": 0.8586,
"step": 49860
},
{
"epoch": 0.79808,
"grad_norm": 0.2323845624923706,
"learning_rate": 4.0444799999999996e-05,
"loss": 0.8298,
"step": 49880
},
{
"epoch": 0.7984,
"grad_norm": 0.21872085332870483,
"learning_rate": 4.0380800000000005e-05,
"loss": 0.8584,
"step": 49900
},
{
"epoch": 0.79872,
"grad_norm": 0.20625688135623932,
"learning_rate": 4.031680000000001e-05,
"loss": 0.8122,
"step": 49920
},
{
"epoch": 0.79904,
"grad_norm": 0.20388665795326233,
"learning_rate": 4.02528e-05,
"loss": 0.8838,
"step": 49940
},
{
"epoch": 0.79936,
"grad_norm": 0.2362195998430252,
"learning_rate": 4.0188800000000004e-05,
"loss": 0.9139,
"step": 49960
},
{
"epoch": 0.79968,
"grad_norm": 0.19558613002300262,
"learning_rate": 4.01248e-05,
"loss": 0.8761,
"step": 49980
},
{
"epoch": 0.8,
"grad_norm": 0.18318922817707062,
"learning_rate": 4.00608e-05,
"loss": 0.8705,
"step": 50000
},
{
"epoch": 0.80032,
"grad_norm": 0.2216624766588211,
"learning_rate": 3.99968e-05,
"loss": 0.891,
"step": 50020
},
{
"epoch": 0.80064,
"grad_norm": 0.22886547446250916,
"learning_rate": 3.9932800000000005e-05,
"loss": 0.8926,
"step": 50040
},
{
"epoch": 0.80096,
"grad_norm": 0.2386888712644577,
"learning_rate": 3.98688e-05,
"loss": 0.8946,
"step": 50060
},
{
"epoch": 0.80128,
"grad_norm": 0.22732418775558472,
"learning_rate": 3.98048e-05,
"loss": 0.8874,
"step": 50080
},
{
"epoch": 0.8016,
"grad_norm": 0.19502227008342743,
"learning_rate": 3.97408e-05,
"loss": 0.8628,
"step": 50100
},
{
"epoch": 0.80192,
"grad_norm": 0.22442220151424408,
"learning_rate": 3.9676800000000006e-05,
"loss": 0.8888,
"step": 50120
},
{
"epoch": 0.80224,
"grad_norm": 0.2765730917453766,
"learning_rate": 3.96128e-05,
"loss": 0.8692,
"step": 50140
},
{
"epoch": 0.80256,
"grad_norm": 0.20345774292945862,
"learning_rate": 3.95488e-05,
"loss": 0.8961,
"step": 50160
},
{
"epoch": 0.80288,
"grad_norm": 0.17965848743915558,
"learning_rate": 3.94848e-05,
"loss": 0.8693,
"step": 50180
},
{
"epoch": 0.8032,
"grad_norm": 0.2365620881319046,
"learning_rate": 3.94208e-05,
"loss": 0.8645,
"step": 50200
},
{
"epoch": 0.80352,
"grad_norm": 0.22703875601291656,
"learning_rate": 3.93568e-05,
"loss": 0.9219,
"step": 50220
},
{
"epoch": 0.80384,
"grad_norm": 0.21852591633796692,
"learning_rate": 3.9292800000000004e-05,
"loss": 0.8594,
"step": 50240
},
{
"epoch": 0.80416,
"grad_norm": 0.2254893034696579,
"learning_rate": 3.9228800000000006e-05,
"loss": 0.8786,
"step": 50260
},
{
"epoch": 0.80448,
"grad_norm": 0.21090517938137054,
"learning_rate": 3.91648e-05,
"loss": 0.8894,
"step": 50280
},
{
"epoch": 0.8048,
"grad_norm": 0.21594619750976562,
"learning_rate": 3.9100800000000003e-05,
"loss": 0.8921,
"step": 50300
},
{
"epoch": 0.80512,
"grad_norm": 0.1902090162038803,
"learning_rate": 3.90368e-05,
"loss": 0.8454,
"step": 50320
},
{
"epoch": 0.80544,
"grad_norm": 0.3519250452518463,
"learning_rate": 3.89728e-05,
"loss": 0.8987,
"step": 50340
},
{
"epoch": 0.80576,
"grad_norm": 0.2199210673570633,
"learning_rate": 3.89088e-05,
"loss": 0.8827,
"step": 50360
},
{
"epoch": 0.80608,
"grad_norm": 0.23292851448059082,
"learning_rate": 3.8844800000000005e-05,
"loss": 0.8713,
"step": 50380
},
{
"epoch": 0.8064,
"grad_norm": 0.21949096024036407,
"learning_rate": 3.87808e-05,
"loss": 0.8707,
"step": 50400
},
{
"epoch": 0.80672,
"grad_norm": 0.25161877274513245,
"learning_rate": 3.87168e-05,
"loss": 0.9223,
"step": 50420
},
{
"epoch": 0.80704,
"grad_norm": 0.23557806015014648,
"learning_rate": 3.86528e-05,
"loss": 0.8855,
"step": 50440
},
{
"epoch": 0.80736,
"grad_norm": 0.2316737025976181,
"learning_rate": 3.8588800000000006e-05,
"loss": 0.8749,
"step": 50460
},
{
"epoch": 0.80768,
"grad_norm": 0.20035359263420105,
"learning_rate": 3.85248e-05,
"loss": 0.838,
"step": 50480
},
{
"epoch": 0.808,
"grad_norm": 0.17797014117240906,
"learning_rate": 3.84608e-05,
"loss": 0.839,
"step": 50500
},
{
"epoch": 0.80832,
"grad_norm": 0.2226208597421646,
"learning_rate": 3.83968e-05,
"loss": 0.8729,
"step": 50520
},
{
"epoch": 0.80864,
"grad_norm": 0.2194611132144928,
"learning_rate": 3.83328e-05,
"loss": 0.9066,
"step": 50540
},
{
"epoch": 0.80896,
"grad_norm": 0.21009878814220428,
"learning_rate": 3.82688e-05,
"loss": 0.8454,
"step": 50560
},
{
"epoch": 0.80928,
"grad_norm": 0.24309992790222168,
"learning_rate": 3.8204800000000004e-05,
"loss": 0.8612,
"step": 50580
},
{
"epoch": 0.8096,
"grad_norm": 0.19760167598724365,
"learning_rate": 3.8140800000000006e-05,
"loss": 0.9226,
"step": 50600
},
{
"epoch": 0.80992,
"grad_norm": 0.24959954619407654,
"learning_rate": 3.80768e-05,
"loss": 0.8667,
"step": 50620
},
{
"epoch": 0.81024,
"grad_norm": 0.2227480560541153,
"learning_rate": 3.80128e-05,
"loss": 0.9303,
"step": 50640
},
{
"epoch": 0.81056,
"grad_norm": 0.2228361815214157,
"learning_rate": 3.79488e-05,
"loss": 0.8365,
"step": 50660
},
{
"epoch": 0.81088,
"grad_norm": 0.22961002588272095,
"learning_rate": 3.788480000000001e-05,
"loss": 0.8257,
"step": 50680
},
{
"epoch": 0.8112,
"grad_norm": 0.19517934322357178,
"learning_rate": 3.78208e-05,
"loss": 0.8486,
"step": 50700
},
{
"epoch": 0.81152,
"grad_norm": 0.21257704496383667,
"learning_rate": 3.7756800000000004e-05,
"loss": 0.8665,
"step": 50720
},
{
"epoch": 0.81184,
"grad_norm": 0.17308840155601501,
"learning_rate": 3.76928e-05,
"loss": 0.8682,
"step": 50740
},
{
"epoch": 0.81216,
"grad_norm": 0.2145150750875473,
"learning_rate": 3.76288e-05,
"loss": 0.8652,
"step": 50760
},
{
"epoch": 0.81248,
"grad_norm": 0.20915599167346954,
"learning_rate": 3.75648e-05,
"loss": 0.881,
"step": 50780
},
{
"epoch": 0.8128,
"grad_norm": 0.2703164219856262,
"learning_rate": 3.7500800000000005e-05,
"loss": 0.8451,
"step": 50800
},
{
"epoch": 0.81312,
"grad_norm": 0.218171164393425,
"learning_rate": 3.74368e-05,
"loss": 0.8879,
"step": 50820
},
{
"epoch": 0.81344,
"grad_norm": 0.2015322595834732,
"learning_rate": 3.73728e-05,
"loss": 0.8583,
"step": 50840
},
{
"epoch": 0.81376,
"grad_norm": 0.2532670199871063,
"learning_rate": 3.73088e-05,
"loss": 0.9111,
"step": 50860
},
{
"epoch": 0.81408,
"grad_norm": 0.22806140780448914,
"learning_rate": 3.72448e-05,
"loss": 0.8677,
"step": 50880
},
{
"epoch": 0.8144,
"grad_norm": 0.20651741325855255,
"learning_rate": 3.71808e-05,
"loss": 0.9084,
"step": 50900
},
{
"epoch": 0.81472,
"grad_norm": 0.23538829386234283,
"learning_rate": 3.7116800000000004e-05,
"loss": 0.888,
"step": 50920
},
{
"epoch": 0.81504,
"grad_norm": 0.23798079788684845,
"learning_rate": 3.7052800000000005e-05,
"loss": 0.8974,
"step": 50940
},
{
"epoch": 0.81536,
"grad_norm": 0.24513110518455505,
"learning_rate": 3.69888e-05,
"loss": 0.9018,
"step": 50960
},
{
"epoch": 0.81568,
"grad_norm": 0.24939313530921936,
"learning_rate": 3.69248e-05,
"loss": 0.9033,
"step": 50980
},
{
"epoch": 0.816,
"grad_norm": 0.20573210716247559,
"learning_rate": 3.68608e-05,
"loss": 0.874,
"step": 51000
},
{
"epoch": 0.81632,
"grad_norm": 0.22649157047271729,
"learning_rate": 3.6796800000000007e-05,
"loss": 0.9008,
"step": 51020
},
{
"epoch": 0.81664,
"grad_norm": 0.24870529770851135,
"learning_rate": 3.67328e-05,
"loss": 0.8949,
"step": 51040
},
{
"epoch": 0.81696,
"grad_norm": 0.22699517011642456,
"learning_rate": 3.6668800000000004e-05,
"loss": 0.9049,
"step": 51060
},
{
"epoch": 0.81728,
"grad_norm": 0.24480411410331726,
"learning_rate": 3.66048e-05,
"loss": 0.8855,
"step": 51080
},
{
"epoch": 0.8176,
"grad_norm": 0.2394665777683258,
"learning_rate": 3.65408e-05,
"loss": 0.8868,
"step": 51100
},
{
"epoch": 0.81792,
"grad_norm": 0.20132653415203094,
"learning_rate": 3.64768e-05,
"loss": 0.8209,
"step": 51120
},
{
"epoch": 0.81824,
"grad_norm": 0.2139676958322525,
"learning_rate": 3.6412800000000005e-05,
"loss": 0.8852,
"step": 51140
},
{
"epoch": 0.81856,
"grad_norm": 0.23041175305843353,
"learning_rate": 3.63488e-05,
"loss": 0.8864,
"step": 51160
},
{
"epoch": 0.81888,
"grad_norm": 0.18776430189609528,
"learning_rate": 3.62848e-05,
"loss": 0.8675,
"step": 51180
},
{
"epoch": 0.8192,
"grad_norm": 0.24612362682819366,
"learning_rate": 3.62208e-05,
"loss": 0.9132,
"step": 51200
},
{
"epoch": 0.81952,
"grad_norm": 0.235810786485672,
"learning_rate": 3.61568e-05,
"loss": 0.8947,
"step": 51220
},
{
"epoch": 0.81984,
"grad_norm": 0.20968469977378845,
"learning_rate": 3.60928e-05,
"loss": 0.8943,
"step": 51240
},
{
"epoch": 0.82016,
"grad_norm": 0.20469830930233002,
"learning_rate": 3.60288e-05,
"loss": 0.8561,
"step": 51260
},
{
"epoch": 0.82048,
"grad_norm": 0.19832849502563477,
"learning_rate": 3.5964800000000005e-05,
"loss": 0.8784,
"step": 51280
},
{
"epoch": 0.8208,
"grad_norm": 0.2282322198152542,
"learning_rate": 3.59008e-05,
"loss": 0.8491,
"step": 51300
},
{
"epoch": 0.82112,
"grad_norm": 0.21050651371479034,
"learning_rate": 3.58368e-05,
"loss": 0.8547,
"step": 51320
},
{
"epoch": 0.82144,
"grad_norm": 0.21298284828662872,
"learning_rate": 3.57728e-05,
"loss": 0.8696,
"step": 51340
},
{
"epoch": 0.82176,
"grad_norm": 0.1952245980501175,
"learning_rate": 3.5708800000000006e-05,
"loss": 0.8184,
"step": 51360
},
{
"epoch": 0.82208,
"grad_norm": 0.23376013338565826,
"learning_rate": 3.56448e-05,
"loss": 0.8348,
"step": 51380
},
{
"epoch": 0.8224,
"grad_norm": 0.22029918432235718,
"learning_rate": 3.55808e-05,
"loss": 0.8956,
"step": 51400
},
{
"epoch": 0.82272,
"grad_norm": 0.23580487072467804,
"learning_rate": 3.55168e-05,
"loss": 0.9059,
"step": 51420
},
{
"epoch": 0.82304,
"grad_norm": 0.2074773609638214,
"learning_rate": 3.54528e-05,
"loss": 0.8591,
"step": 51440
},
{
"epoch": 0.82336,
"grad_norm": 0.21169452369213104,
"learning_rate": 3.53888e-05,
"loss": 0.8603,
"step": 51460
},
{
"epoch": 0.82368,
"grad_norm": 0.21765758097171783,
"learning_rate": 3.5324800000000004e-05,
"loss": 0.9107,
"step": 51480
},
{
"epoch": 0.824,
"grad_norm": 0.23709143698215485,
"learning_rate": 3.52608e-05,
"loss": 0.8909,
"step": 51500
},
{
"epoch": 0.82432,
"grad_norm": 0.18285530805587769,
"learning_rate": 3.51968e-05,
"loss": 0.8699,
"step": 51520
},
{
"epoch": 0.82464,
"grad_norm": 0.19453024864196777,
"learning_rate": 3.51328e-05,
"loss": 0.8841,
"step": 51540
},
{
"epoch": 0.82496,
"grad_norm": 0.20472079515457153,
"learning_rate": 3.50688e-05,
"loss": 0.8651,
"step": 51560
},
{
"epoch": 0.82528,
"grad_norm": 0.20934510231018066,
"learning_rate": 3.500480000000001e-05,
"loss": 0.9298,
"step": 51580
},
{
"epoch": 0.8256,
"grad_norm": 0.20935463905334473,
"learning_rate": 3.49408e-05,
"loss": 0.8238,
"step": 51600
},
{
"epoch": 0.82592,
"grad_norm": 0.19985993206501007,
"learning_rate": 3.4876800000000005e-05,
"loss": 0.8879,
"step": 51620
},
{
"epoch": 0.82624,
"grad_norm": 0.21266649663448334,
"learning_rate": 3.48128e-05,
"loss": 0.8754,
"step": 51640
},
{
"epoch": 0.82656,
"grad_norm": 0.21522794663906097,
"learning_rate": 3.47488e-05,
"loss": 0.8851,
"step": 51660
},
{
"epoch": 0.82688,
"grad_norm": 0.21246004104614258,
"learning_rate": 3.4684800000000004e-05,
"loss": 0.8588,
"step": 51680
},
{
"epoch": 0.8272,
"grad_norm": 0.23577255010604858,
"learning_rate": 3.4620800000000006e-05,
"loss": 0.8502,
"step": 51700
},
{
"epoch": 0.82752,
"grad_norm": 0.22873230278491974,
"learning_rate": 3.45568e-05,
"loss": 0.9434,
"step": 51720
},
{
"epoch": 0.82784,
"grad_norm": 0.24500973522663116,
"learning_rate": 3.44928e-05,
"loss": 0.8575,
"step": 51740
},
{
"epoch": 0.82816,
"grad_norm": 0.24861827492713928,
"learning_rate": 3.44288e-05,
"loss": 0.8601,
"step": 51760
},
{
"epoch": 0.82848,
"grad_norm": 0.25244006514549255,
"learning_rate": 3.43648e-05,
"loss": 0.8957,
"step": 51780
},
{
"epoch": 0.8288,
"grad_norm": 0.24348974227905273,
"learning_rate": 3.43008e-05,
"loss": 0.8433,
"step": 51800
},
{
"epoch": 0.82912,
"grad_norm": 0.23009903728961945,
"learning_rate": 3.4236800000000004e-05,
"loss": 0.9117,
"step": 51820
},
{
"epoch": 0.82944,
"grad_norm": 0.2047446370124817,
"learning_rate": 3.41728e-05,
"loss": 0.8102,
"step": 51840
},
{
"epoch": 0.82976,
"grad_norm": 0.2244807630777359,
"learning_rate": 3.41088e-05,
"loss": 0.8878,
"step": 51860
},
{
"epoch": 0.83008,
"grad_norm": 0.23874083161354065,
"learning_rate": 3.4044799999999996e-05,
"loss": 0.9476,
"step": 51880
},
{
"epoch": 0.8304,
"grad_norm": 0.21802900731563568,
"learning_rate": 3.39808e-05,
"loss": 0.8457,
"step": 51900
},
{
"epoch": 0.83072,
"grad_norm": 0.32707545161247253,
"learning_rate": 3.391680000000001e-05,
"loss": 0.8705,
"step": 51920
},
{
"epoch": 0.83104,
"grad_norm": 0.22364617884159088,
"learning_rate": 3.38528e-05,
"loss": 0.9154,
"step": 51940
},
{
"epoch": 0.83136,
"grad_norm": 0.23513180017471313,
"learning_rate": 3.3788800000000004e-05,
"loss": 0.863,
"step": 51960
},
{
"epoch": 0.83168,
"grad_norm": 0.24189570546150208,
"learning_rate": 3.37248e-05,
"loss": 0.9223,
"step": 51980
},
{
"epoch": 0.832,
"grad_norm": 0.2324758619070053,
"learning_rate": 3.36608e-05,
"loss": 0.8858,
"step": 52000
},
{
"epoch": 0.83232,
"grad_norm": 0.21225537359714508,
"learning_rate": 3.35968e-05,
"loss": 0.8573,
"step": 52020
},
{
"epoch": 0.83264,
"grad_norm": 0.2314990609884262,
"learning_rate": 3.3532800000000005e-05,
"loss": 0.9258,
"step": 52040
},
{
"epoch": 0.83296,
"grad_norm": 0.22183631360530853,
"learning_rate": 3.34688e-05,
"loss": 0.8842,
"step": 52060
},
{
"epoch": 0.83328,
"grad_norm": 0.23704120516777039,
"learning_rate": 3.34048e-05,
"loss": 0.8513,
"step": 52080
},
{
"epoch": 0.8336,
"grad_norm": 0.21080462634563446,
"learning_rate": 3.33408e-05,
"loss": 0.8745,
"step": 52100
},
{
"epoch": 0.83392,
"grad_norm": 0.25182968378067017,
"learning_rate": 3.32768e-05,
"loss": 0.8492,
"step": 52120
},
{
"epoch": 0.83424,
"grad_norm": 0.22719983756542206,
"learning_rate": 3.32128e-05,
"loss": 0.8727,
"step": 52140
},
{
"epoch": 0.83456,
"grad_norm": 0.36781367659568787,
"learning_rate": 3.3148800000000004e-05,
"loss": 0.9106,
"step": 52160
},
{
"epoch": 0.83488,
"grad_norm": 0.23190808296203613,
"learning_rate": 3.30848e-05,
"loss": 0.8574,
"step": 52180
},
{
"epoch": 0.8352,
"grad_norm": 0.19346857070922852,
"learning_rate": 3.30208e-05,
"loss": 0.8695,
"step": 52200
},
{
"epoch": 0.83552,
"grad_norm": 0.21294453740119934,
"learning_rate": 3.29568e-05,
"loss": 0.8724,
"step": 52220
},
{
"epoch": 0.83584,
"grad_norm": 0.18836593627929688,
"learning_rate": 3.2892800000000005e-05,
"loss": 0.8623,
"step": 52240
},
{
"epoch": 0.83616,
"grad_norm": 0.21922753751277924,
"learning_rate": 3.2828800000000007e-05,
"loss": 0.8477,
"step": 52260
},
{
"epoch": 0.83648,
"grad_norm": 0.2198483943939209,
"learning_rate": 3.27648e-05,
"loss": 0.9069,
"step": 52280
},
{
"epoch": 0.8368,
"grad_norm": 0.20438086986541748,
"learning_rate": 3.2700800000000004e-05,
"loss": 0.8833,
"step": 52300
},
{
"epoch": 0.83712,
"grad_norm": 0.2795603573322296,
"learning_rate": 3.26368e-05,
"loss": 0.9141,
"step": 52320
},
{
"epoch": 0.83744,
"grad_norm": 0.22276397049427032,
"learning_rate": 3.25728e-05,
"loss": 0.8741,
"step": 52340
},
{
"epoch": 0.83776,
"grad_norm": 0.21769918501377106,
"learning_rate": 3.25088e-05,
"loss": 0.88,
"step": 52360
},
{
"epoch": 0.83808,
"grad_norm": 0.22383011877536774,
"learning_rate": 3.2444800000000005e-05,
"loss": 0.8504,
"step": 52380
},
{
"epoch": 0.8384,
"grad_norm": 0.23771865665912628,
"learning_rate": 3.23808e-05,
"loss": 0.9323,
"step": 52400
},
{
"epoch": 0.83872,
"grad_norm": 0.20924776792526245,
"learning_rate": 3.23168e-05,
"loss": 0.8668,
"step": 52420
},
{
"epoch": 0.83904,
"grad_norm": 0.27309560775756836,
"learning_rate": 3.22528e-05,
"loss": 0.9336,
"step": 52440
},
{
"epoch": 0.83936,
"grad_norm": 0.24357040226459503,
"learning_rate": 3.21888e-05,
"loss": 0.9199,
"step": 52460
},
{
"epoch": 0.83968,
"grad_norm": 0.23052769899368286,
"learning_rate": 3.21248e-05,
"loss": 0.9209,
"step": 52480
},
{
"epoch": 0.84,
"grad_norm": 0.23305866122245789,
"learning_rate": 3.20608e-05,
"loss": 0.9089,
"step": 52500
},
{
"epoch": 0.84032,
"grad_norm": 0.21440325677394867,
"learning_rate": 3.19968e-05,
"loss": 0.8483,
"step": 52520
},
{
"epoch": 0.84064,
"grad_norm": 0.18805745244026184,
"learning_rate": 3.19328e-05,
"loss": 0.8436,
"step": 52540
},
{
"epoch": 0.84096,
"grad_norm": 0.24525205790996552,
"learning_rate": 3.18688e-05,
"loss": 0.8988,
"step": 52560
},
{
"epoch": 0.84128,
"grad_norm": 0.3257310688495636,
"learning_rate": 3.1804800000000004e-05,
"loss": 0.8461,
"step": 52580
},
{
"epoch": 0.8416,
"grad_norm": 0.21542541682720184,
"learning_rate": 3.1740800000000006e-05,
"loss": 0.908,
"step": 52600
},
{
"epoch": 0.84192,
"grad_norm": 0.18117113411426544,
"learning_rate": 3.16768e-05,
"loss": 0.9024,
"step": 52620
},
{
"epoch": 0.84224,
"grad_norm": 0.21288667619228363,
"learning_rate": 3.16128e-05,
"loss": 0.8981,
"step": 52640
},
{
"epoch": 0.84256,
"grad_norm": 0.20366021990776062,
"learning_rate": 3.15488e-05,
"loss": 0.8251,
"step": 52660
},
{
"epoch": 0.84288,
"grad_norm": 0.21234050393104553,
"learning_rate": 3.14848e-05,
"loss": 0.8548,
"step": 52680
},
{
"epoch": 0.8432,
"grad_norm": 0.23660403490066528,
"learning_rate": 3.14208e-05,
"loss": 0.8707,
"step": 52700
},
{
"epoch": 0.84352,
"grad_norm": 0.22134044766426086,
"learning_rate": 3.1356800000000004e-05,
"loss": 0.8742,
"step": 52720
},
{
"epoch": 0.84384,
"grad_norm": 0.23551689088344574,
"learning_rate": 3.12928e-05,
"loss": 0.8716,
"step": 52740
},
{
"epoch": 0.84416,
"grad_norm": 0.184543177485466,
"learning_rate": 3.12288e-05,
"loss": 0.8352,
"step": 52760
},
{
"epoch": 0.84448,
"grad_norm": 0.2306751012802124,
"learning_rate": 3.11648e-05,
"loss": 0.9043,
"step": 52780
},
{
"epoch": 0.8448,
"grad_norm": 0.2092065066099167,
"learning_rate": 3.1100800000000006e-05,
"loss": 0.9103,
"step": 52800
},
{
"epoch": 0.84512,
"grad_norm": 0.1914425492286682,
"learning_rate": 3.10368e-05,
"loss": 0.8702,
"step": 52820
},
{
"epoch": 0.84544,
"grad_norm": 0.24771615862846375,
"learning_rate": 3.09728e-05,
"loss": 0.8429,
"step": 52840
},
{
"epoch": 0.84576,
"grad_norm": 0.1887243092060089,
"learning_rate": 3.09088e-05,
"loss": 0.8835,
"step": 52860
},
{
"epoch": 0.84608,
"grad_norm": 0.2107224017381668,
"learning_rate": 3.08448e-05,
"loss": 0.8995,
"step": 52880
},
{
"epoch": 0.8464,
"grad_norm": 0.2114105075597763,
"learning_rate": 3.07808e-05,
"loss": 0.8316,
"step": 52900
},
{
"epoch": 0.84672,
"grad_norm": 0.22698649764060974,
"learning_rate": 3.0716800000000004e-05,
"loss": 0.917,
"step": 52920
},
{
"epoch": 0.84704,
"grad_norm": 0.23728908598423004,
"learning_rate": 3.0652800000000006e-05,
"loss": 0.8641,
"step": 52940
},
{
"epoch": 0.84736,
"grad_norm": 0.24342834949493408,
"learning_rate": 3.05888e-05,
"loss": 0.9067,
"step": 52960
},
{
"epoch": 0.84768,
"grad_norm": 0.23248563706874847,
"learning_rate": 3.05248e-05,
"loss": 0.8918,
"step": 52980
},
{
"epoch": 0.848,
"grad_norm": 0.22036781907081604,
"learning_rate": 3.04608e-05,
"loss": 0.8689,
"step": 53000
},
{
"epoch": 0.84832,
"grad_norm": 0.21692028641700745,
"learning_rate": 3.0396800000000003e-05,
"loss": 0.8941,
"step": 53020
},
{
"epoch": 0.84864,
"grad_norm": 0.21200671792030334,
"learning_rate": 3.03328e-05,
"loss": 0.8787,
"step": 53040
},
{
"epoch": 0.84896,
"grad_norm": 0.31840503215789795,
"learning_rate": 3.0268800000000004e-05,
"loss": 0.9181,
"step": 53060
},
{
"epoch": 0.84928,
"grad_norm": 0.2347812056541443,
"learning_rate": 3.02048e-05,
"loss": 0.885,
"step": 53080
},
{
"epoch": 0.8496,
"grad_norm": 0.22629977762699127,
"learning_rate": 3.01408e-05,
"loss": 0.9208,
"step": 53100
},
{
"epoch": 0.84992,
"grad_norm": 0.20610420405864716,
"learning_rate": 3.00768e-05,
"loss": 0.8698,
"step": 53120
},
{
"epoch": 0.85024,
"grad_norm": 0.17655342817306519,
"learning_rate": 3.0012800000000002e-05,
"loss": 0.8767,
"step": 53140
},
{
"epoch": 0.85056,
"grad_norm": 0.2303926944732666,
"learning_rate": 2.99488e-05,
"loss": 0.9493,
"step": 53160
},
{
"epoch": 0.85088,
"grad_norm": 0.20119711756706238,
"learning_rate": 2.9884800000000002e-05,
"loss": 0.8769,
"step": 53180
},
{
"epoch": 0.8512,
"grad_norm": 0.2533680200576782,
"learning_rate": 2.9820799999999997e-05,
"loss": 0.9568,
"step": 53200
},
{
"epoch": 0.85152,
"grad_norm": 0.2610546350479126,
"learning_rate": 2.9756800000000003e-05,
"loss": 0.9215,
"step": 53220
},
{
"epoch": 0.85184,
"grad_norm": 0.2684386074542999,
"learning_rate": 2.9692800000000005e-05,
"loss": 0.8739,
"step": 53240
},
{
"epoch": 0.85216,
"grad_norm": 0.2054203301668167,
"learning_rate": 2.96288e-05,
"loss": 0.8909,
"step": 53260
},
{
"epoch": 0.85248,
"grad_norm": 0.24058941006660461,
"learning_rate": 2.9564800000000002e-05,
"loss": 0.8314,
"step": 53280
},
{
"epoch": 0.8528,
"grad_norm": 0.2767840325832367,
"learning_rate": 2.95008e-05,
"loss": 0.8588,
"step": 53300
},
{
"epoch": 0.85312,
"grad_norm": 0.1769871711730957,
"learning_rate": 2.9436800000000002e-05,
"loss": 0.8978,
"step": 53320
},
{
"epoch": 0.85344,
"grad_norm": 0.23680894076824188,
"learning_rate": 2.93728e-05,
"loss": 0.879,
"step": 53340
},
{
"epoch": 0.85376,
"grad_norm": 0.2106921374797821,
"learning_rate": 2.9308800000000003e-05,
"loss": 0.8792,
"step": 53360
},
{
"epoch": 0.85408,
"grad_norm": 0.20546332001686096,
"learning_rate": 2.9248000000000002e-05,
"loss": 0.8953,
"step": 53380
},
{
"epoch": 0.8544,
"grad_norm": 0.2114826738834381,
"learning_rate": 2.9184e-05,
"loss": 0.889,
"step": 53400
},
{
"epoch": 0.85472,
"grad_norm": 0.2232695072889328,
"learning_rate": 2.9120000000000002e-05,
"loss": 0.8441,
"step": 53420
},
{
"epoch": 0.85504,
"grad_norm": 0.25499632954597473,
"learning_rate": 2.9056e-05,
"loss": 0.9132,
"step": 53440
},
{
"epoch": 0.85536,
"grad_norm": 0.22726675868034363,
"learning_rate": 2.8992000000000003e-05,
"loss": 0.8905,
"step": 53460
},
{
"epoch": 0.85568,
"grad_norm": 0.18875838816165924,
"learning_rate": 2.8927999999999998e-05,
"loss": 0.8665,
"step": 53480
},
{
"epoch": 0.856,
"grad_norm": 0.23504668474197388,
"learning_rate": 2.8864000000000004e-05,
"loss": 0.9092,
"step": 53500
},
{
"epoch": 0.85632,
"grad_norm": 0.23671472072601318,
"learning_rate": 2.88e-05,
"loss": 0.8858,
"step": 53520
},
{
"epoch": 0.85664,
"grad_norm": 0.22494633495807648,
"learning_rate": 2.8736e-05,
"loss": 0.8988,
"step": 53540
},
{
"epoch": 0.85696,
"grad_norm": 0.20699791610240936,
"learning_rate": 2.8672e-05,
"loss": 0.8523,
"step": 53560
},
{
"epoch": 0.85728,
"grad_norm": 0.24414889514446259,
"learning_rate": 2.8608e-05,
"loss": 0.8734,
"step": 53580
},
{
"epoch": 0.8576,
"grad_norm": 0.23562034964561462,
"learning_rate": 2.8544000000000003e-05,
"loss": 0.8349,
"step": 53600
},
{
"epoch": 0.85792,
"grad_norm": 0.18858417868614197,
"learning_rate": 2.8480000000000002e-05,
"loss": 0.8784,
"step": 53620
},
{
"epoch": 0.85824,
"grad_norm": 0.23060615360736847,
"learning_rate": 2.8416000000000004e-05,
"loss": 0.8874,
"step": 53640
},
{
"epoch": 0.85856,
"grad_norm": 0.2229340374469757,
"learning_rate": 2.8352000000000002e-05,
"loss": 0.8772,
"step": 53660
},
{
"epoch": 0.85888,
"grad_norm": 0.23819968104362488,
"learning_rate": 2.8288000000000004e-05,
"loss": 0.9138,
"step": 53680
},
{
"epoch": 0.8592,
"grad_norm": 0.22570669651031494,
"learning_rate": 2.8224e-05,
"loss": 0.8543,
"step": 53700
},
{
"epoch": 0.85952,
"grad_norm": 0.1779906302690506,
"learning_rate": 2.816e-05,
"loss": 0.8453,
"step": 53720
},
{
"epoch": 0.85984,
"grad_norm": 0.18111290037631989,
"learning_rate": 2.8096e-05,
"loss": 0.8571,
"step": 53740
},
{
"epoch": 0.86016,
"grad_norm": 0.23945818841457367,
"learning_rate": 2.8032000000000002e-05,
"loss": 0.9347,
"step": 53760
},
{
"epoch": 0.86048,
"grad_norm": 0.2050149291753769,
"learning_rate": 2.7968e-05,
"loss": 0.8871,
"step": 53780
},
{
"epoch": 0.8608,
"grad_norm": 0.20724190771579742,
"learning_rate": 2.7904000000000003e-05,
"loss": 0.8506,
"step": 53800
},
{
"epoch": 0.86112,
"grad_norm": 0.21259485185146332,
"learning_rate": 2.7839999999999998e-05,
"loss": 0.8642,
"step": 53820
},
{
"epoch": 0.86144,
"grad_norm": 0.2348015457391739,
"learning_rate": 2.7776000000000003e-05,
"loss": 0.8883,
"step": 53840
},
{
"epoch": 0.86176,
"grad_norm": 0.2268034815788269,
"learning_rate": 2.7711999999999998e-05,
"loss": 0.842,
"step": 53860
},
{
"epoch": 0.86208,
"grad_norm": 0.20903776586055756,
"learning_rate": 2.7648e-05,
"loss": 0.898,
"step": 53880
},
{
"epoch": 0.8624,
"grad_norm": 0.2246081382036209,
"learning_rate": 2.7584e-05,
"loss": 0.9053,
"step": 53900
},
{
"epoch": 0.86272,
"grad_norm": 0.20664174854755402,
"learning_rate": 2.752e-05,
"loss": 0.9123,
"step": 53920
},
{
"epoch": 0.86304,
"grad_norm": 0.199870765209198,
"learning_rate": 2.7456000000000003e-05,
"loss": 0.8782,
"step": 53940
},
{
"epoch": 0.86336,
"grad_norm": 0.2337988168001175,
"learning_rate": 2.7392e-05,
"loss": 0.8961,
"step": 53960
},
{
"epoch": 0.86368,
"grad_norm": 0.24039073288440704,
"learning_rate": 2.7328000000000003e-05,
"loss": 0.9463,
"step": 53980
},
{
"epoch": 0.864,
"grad_norm": 0.21727830171585083,
"learning_rate": 2.7264000000000002e-05,
"loss": 0.8852,
"step": 54000
},
{
"epoch": 0.86432,
"grad_norm": 0.25286540389060974,
"learning_rate": 2.7200000000000004e-05,
"loss": 0.8877,
"step": 54020
},
{
"epoch": 0.86464,
"grad_norm": 0.2132069319486618,
"learning_rate": 2.7136e-05,
"loss": 0.8378,
"step": 54040
},
{
"epoch": 0.86496,
"grad_norm": 0.19726885855197906,
"learning_rate": 2.7072000000000004e-05,
"loss": 0.8827,
"step": 54060
},
{
"epoch": 0.86528,
"grad_norm": 0.19055317342281342,
"learning_rate": 2.7008e-05,
"loss": 0.845,
"step": 54080
},
{
"epoch": 0.8656,
"grad_norm": 0.21198777854442596,
"learning_rate": 2.6944e-05,
"loss": 0.8468,
"step": 54100
},
{
"epoch": 0.86592,
"grad_norm": 0.1932942420244217,
"learning_rate": 2.688e-05,
"loss": 0.8163,
"step": 54120
},
{
"epoch": 0.86624,
"grad_norm": 0.21990489959716797,
"learning_rate": 2.6816000000000002e-05,
"loss": 0.8721,
"step": 54140
},
{
"epoch": 0.86656,
"grad_norm": 0.2552103102207184,
"learning_rate": 2.6752e-05,
"loss": 0.9031,
"step": 54160
},
{
"epoch": 0.86688,
"grad_norm": 0.20564943552017212,
"learning_rate": 2.6688000000000003e-05,
"loss": 0.8891,
"step": 54180
},
{
"epoch": 0.8672,
"grad_norm": 0.21163496375083923,
"learning_rate": 2.6623999999999998e-05,
"loss": 0.8529,
"step": 54200
},
{
"epoch": 0.86752,
"grad_norm": 0.19944727420806885,
"learning_rate": 2.6560000000000003e-05,
"loss": 0.8493,
"step": 54220
},
{
"epoch": 0.86784,
"grad_norm": 0.20173463225364685,
"learning_rate": 2.6496e-05,
"loss": 0.8716,
"step": 54240
},
{
"epoch": 0.86816,
"grad_norm": 0.23707176744937897,
"learning_rate": 2.6432e-05,
"loss": 0.9124,
"step": 54260
},
{
"epoch": 0.86848,
"grad_norm": 0.24114908277988434,
"learning_rate": 2.6368000000000002e-05,
"loss": 0.8897,
"step": 54280
},
{
"epoch": 0.8688,
"grad_norm": 0.2588618993759155,
"learning_rate": 2.6304e-05,
"loss": 0.8567,
"step": 54300
},
{
"epoch": 0.86912,
"grad_norm": 0.21841663122177124,
"learning_rate": 2.6240000000000003e-05,
"loss": 0.8804,
"step": 54320
},
{
"epoch": 0.86944,
"grad_norm": 0.20472241938114166,
"learning_rate": 2.6176e-05,
"loss": 0.8423,
"step": 54340
},
{
"epoch": 0.86976,
"grad_norm": 0.21747057139873505,
"learning_rate": 2.6112000000000003e-05,
"loss": 0.8352,
"step": 54360
},
{
"epoch": 0.87008,
"grad_norm": 0.26556268334388733,
"learning_rate": 2.6048e-05,
"loss": 0.8188,
"step": 54380
},
{
"epoch": 0.8704,
"grad_norm": 0.20089897513389587,
"learning_rate": 2.5984000000000004e-05,
"loss": 0.8916,
"step": 54400
},
{
"epoch": 0.87072,
"grad_norm": 0.24367238581180573,
"learning_rate": 2.592e-05,
"loss": 0.9158,
"step": 54420
},
{
"epoch": 0.87104,
"grad_norm": 0.24030858278274536,
"learning_rate": 2.5856e-05,
"loss": 0.9347,
"step": 54440
},
{
"epoch": 0.87136,
"grad_norm": 0.22043825685977936,
"learning_rate": 2.5792e-05,
"loss": 0.8796,
"step": 54460
},
{
"epoch": 0.87168,
"grad_norm": 0.243258535861969,
"learning_rate": 2.5728e-05,
"loss": 0.9063,
"step": 54480
},
{
"epoch": 0.872,
"grad_norm": 0.2250966578722,
"learning_rate": 2.5664e-05,
"loss": 0.8686,
"step": 54500
},
{
"epoch": 0.87232,
"grad_norm": 0.24469095468521118,
"learning_rate": 2.5600000000000002e-05,
"loss": 0.8392,
"step": 54520
},
{
"epoch": 0.87264,
"grad_norm": 0.2351893037557602,
"learning_rate": 2.5535999999999997e-05,
"loss": 0.861,
"step": 54540
},
{
"epoch": 0.87296,
"grad_norm": 0.22012612223625183,
"learning_rate": 2.5472000000000003e-05,
"loss": 0.8925,
"step": 54560
},
{
"epoch": 0.87328,
"grad_norm": 0.22451180219650269,
"learning_rate": 2.5407999999999998e-05,
"loss": 0.8572,
"step": 54580
},
{
"epoch": 0.8736,
"grad_norm": 0.2381567507982254,
"learning_rate": 2.5344e-05,
"loss": 0.9039,
"step": 54600
},
{
"epoch": 0.87392,
"grad_norm": 0.2319832742214203,
"learning_rate": 2.5280000000000005e-05,
"loss": 0.898,
"step": 54620
},
{
"epoch": 0.87424,
"grad_norm": 0.24306103587150574,
"learning_rate": 2.5216e-05,
"loss": 0.8999,
"step": 54640
},
{
"epoch": 0.87456,
"grad_norm": 0.22463731467723846,
"learning_rate": 2.5152000000000002e-05,
"loss": 0.8828,
"step": 54660
},
{
"epoch": 0.87488,
"grad_norm": 0.2395946979522705,
"learning_rate": 2.5088e-05,
"loss": 0.898,
"step": 54680
},
{
"epoch": 0.8752,
"grad_norm": 0.22378131747245789,
"learning_rate": 2.5024000000000003e-05,
"loss": 0.8967,
"step": 54700
},
{
"epoch": 0.87552,
"grad_norm": 0.24939024448394775,
"learning_rate": 2.496e-05,
"loss": 0.8861,
"step": 54720
},
{
"epoch": 0.87584,
"grad_norm": 0.23700961470603943,
"learning_rate": 2.4896e-05,
"loss": 0.9274,
"step": 54740
},
{
"epoch": 0.87616,
"grad_norm": 0.2053232043981552,
"learning_rate": 2.4832000000000002e-05,
"loss": 0.8325,
"step": 54760
},
{
"epoch": 0.87648,
"grad_norm": 0.20047616958618164,
"learning_rate": 2.4768e-05,
"loss": 0.9201,
"step": 54780
},
{
"epoch": 0.8768,
"grad_norm": 0.30408886075019836,
"learning_rate": 2.4704000000000003e-05,
"loss": 0.8911,
"step": 54800
},
{
"epoch": 0.87712,
"grad_norm": 0.20753300189971924,
"learning_rate": 2.464e-05,
"loss": 0.8867,
"step": 54820
},
{
"epoch": 0.87744,
"grad_norm": 0.2093687355518341,
"learning_rate": 2.4576000000000003e-05,
"loss": 0.8762,
"step": 54840
},
{
"epoch": 0.87776,
"grad_norm": 0.226734921336174,
"learning_rate": 2.4512000000000002e-05,
"loss": 0.8893,
"step": 54860
},
{
"epoch": 0.87808,
"grad_norm": 0.21793030202388763,
"learning_rate": 2.4448e-05,
"loss": 0.8665,
"step": 54880
},
{
"epoch": 0.8784,
"grad_norm": 0.2046462595462799,
"learning_rate": 2.4384000000000002e-05,
"loss": 0.8999,
"step": 54900
},
{
"epoch": 0.87872,
"grad_norm": 0.2325984686613083,
"learning_rate": 2.432e-05,
"loss": 0.9291,
"step": 54920
},
{
"epoch": 0.87904,
"grad_norm": 0.20662260055541992,
"learning_rate": 2.4256e-05,
"loss": 0.8944,
"step": 54940
},
{
"epoch": 0.87936,
"grad_norm": 0.2617480158805847,
"learning_rate": 2.4192e-05,
"loss": 0.9573,
"step": 54960
},
{
"epoch": 0.87968,
"grad_norm": 0.18719272315502167,
"learning_rate": 2.4128e-05,
"loss": 0.8767,
"step": 54980
},
{
"epoch": 0.88,
"grad_norm": 0.27199268341064453,
"learning_rate": 2.4064000000000002e-05,
"loss": 0.9066,
"step": 55000
},
{
"epoch": 0.88032,
"grad_norm": 0.21330726146697998,
"learning_rate": 2.4e-05,
"loss": 0.8594,
"step": 55020
},
{
"epoch": 0.88064,
"grad_norm": 0.1909005492925644,
"learning_rate": 2.3936e-05,
"loss": 0.8807,
"step": 55040
},
{
"epoch": 0.88096,
"grad_norm": 0.21157494187355042,
"learning_rate": 2.3872e-05,
"loss": 0.9019,
"step": 55060
},
{
"epoch": 0.88128,
"grad_norm": 0.19137850403785706,
"learning_rate": 2.3808000000000003e-05,
"loss": 0.8873,
"step": 55080
},
{
"epoch": 0.8816,
"grad_norm": 0.17469841241836548,
"learning_rate": 2.3744000000000002e-05,
"loss": 0.8225,
"step": 55100
},
{
"epoch": 0.88192,
"grad_norm": 0.21227526664733887,
"learning_rate": 2.3680000000000004e-05,
"loss": 0.9273,
"step": 55120
},
{
"epoch": 0.88224,
"grad_norm": 0.22155140340328217,
"learning_rate": 2.3616000000000002e-05,
"loss": 0.8716,
"step": 55140
},
{
"epoch": 0.88256,
"grad_norm": 0.2392839938402176,
"learning_rate": 2.3552e-05,
"loss": 0.8905,
"step": 55160
},
{
"epoch": 0.88288,
"grad_norm": 0.2480785995721817,
"learning_rate": 2.3488000000000003e-05,
"loss": 0.8906,
"step": 55180
},
{
"epoch": 0.8832,
"grad_norm": 0.23919089138507843,
"learning_rate": 2.3424e-05,
"loss": 0.8912,
"step": 55200
},
{
"epoch": 0.88352,
"grad_norm": 0.2384289801120758,
"learning_rate": 2.336e-05,
"loss": 0.8556,
"step": 55220
},
{
"epoch": 0.88384,
"grad_norm": 0.23314107954502106,
"learning_rate": 2.3296000000000002e-05,
"loss": 0.8638,
"step": 55240
},
{
"epoch": 0.88416,
"grad_norm": 0.20176903903484344,
"learning_rate": 2.3232e-05,
"loss": 0.8767,
"step": 55260
},
{
"epoch": 0.88448,
"grad_norm": 0.23449799418449402,
"learning_rate": 2.3168000000000002e-05,
"loss": 0.8811,
"step": 55280
},
{
"epoch": 0.8848,
"grad_norm": 0.2195407897233963,
"learning_rate": 2.3104e-05,
"loss": 0.8956,
"step": 55300
},
{
"epoch": 0.88512,
"grad_norm": 0.2260621041059494,
"learning_rate": 2.304e-05,
"loss": 0.8781,
"step": 55320
},
{
"epoch": 0.88544,
"grad_norm": 0.21167124807834625,
"learning_rate": 2.2976e-05,
"loss": 0.8251,
"step": 55340
},
{
"epoch": 0.88576,
"grad_norm": 0.219034343957901,
"learning_rate": 2.2912e-05,
"loss": 0.8413,
"step": 55360
},
{
"epoch": 0.88608,
"grad_norm": 0.24565385282039642,
"learning_rate": 2.2848e-05,
"loss": 0.9033,
"step": 55380
},
{
"epoch": 0.8864,
"grad_norm": 0.2161465734243393,
"learning_rate": 2.2784e-05,
"loss": 0.884,
"step": 55400
},
{
"epoch": 0.88672,
"grad_norm": 0.2424006164073944,
"learning_rate": 2.2720000000000003e-05,
"loss": 0.9424,
"step": 55420
},
{
"epoch": 0.88704,
"grad_norm": 0.2734192907810211,
"learning_rate": 2.2656e-05,
"loss": 0.9217,
"step": 55440
},
{
"epoch": 0.88736,
"grad_norm": 0.2107728272676468,
"learning_rate": 2.2592000000000003e-05,
"loss": 0.8789,
"step": 55460
},
{
"epoch": 0.88768,
"grad_norm": 0.2616425156593323,
"learning_rate": 2.2528000000000002e-05,
"loss": 0.918,
"step": 55480
},
{
"epoch": 0.888,
"grad_norm": 0.23145994544029236,
"learning_rate": 2.2464e-05,
"loss": 0.8626,
"step": 55500
},
{
"epoch": 0.88832,
"grad_norm": 0.22918421030044556,
"learning_rate": 2.2400000000000002e-05,
"loss": 0.8656,
"step": 55520
},
{
"epoch": 0.88864,
"grad_norm": 0.2246193289756775,
"learning_rate": 2.2336e-05,
"loss": 0.8569,
"step": 55540
},
{
"epoch": 0.88896,
"grad_norm": 0.23850585520267487,
"learning_rate": 2.2272e-05,
"loss": 0.8612,
"step": 55560
},
{
"epoch": 0.88928,
"grad_norm": 0.21243111789226532,
"learning_rate": 2.2208e-05,
"loss": 0.8612,
"step": 55580
},
{
"epoch": 0.8896,
"grad_norm": 0.1799386590719223,
"learning_rate": 2.2144e-05,
"loss": 0.8601,
"step": 55600
},
{
"epoch": 0.88992,
"grad_norm": 0.20039919018745422,
"learning_rate": 2.2080000000000002e-05,
"loss": 0.8781,
"step": 55620
},
{
"epoch": 0.89024,
"grad_norm": 0.20583823323249817,
"learning_rate": 2.2016e-05,
"loss": 0.8552,
"step": 55640
},
{
"epoch": 0.89056,
"grad_norm": 0.25878387689590454,
"learning_rate": 2.1952e-05,
"loss": 0.8694,
"step": 55660
},
{
"epoch": 0.89088,
"grad_norm": 0.26383817195892334,
"learning_rate": 2.1888e-05,
"loss": 0.916,
"step": 55680
},
{
"epoch": 0.8912,
"grad_norm": 0.2270156741142273,
"learning_rate": 2.1824e-05,
"loss": 0.9219,
"step": 55700
},
{
"epoch": 0.89152,
"grad_norm": 0.22747768461704254,
"learning_rate": 2.176e-05,
"loss": 0.9066,
"step": 55720
},
{
"epoch": 0.89184,
"grad_norm": 0.2369069904088974,
"learning_rate": 2.1696e-05,
"loss": 0.9127,
"step": 55740
},
{
"epoch": 0.89216,
"grad_norm": 0.22584250569343567,
"learning_rate": 2.1632000000000002e-05,
"loss": 0.8571,
"step": 55760
},
{
"epoch": 0.89248,
"grad_norm": 0.2100561112165451,
"learning_rate": 2.1568e-05,
"loss": 0.8911,
"step": 55780
},
{
"epoch": 0.8928,
"grad_norm": 0.24614600837230682,
"learning_rate": 2.1504000000000003e-05,
"loss": 0.8826,
"step": 55800
},
{
"epoch": 0.89312,
"grad_norm": 0.25491416454315186,
"learning_rate": 2.144e-05,
"loss": 0.8619,
"step": 55820
},
{
"epoch": 0.89344,
"grad_norm": 0.22057655453681946,
"learning_rate": 2.1376e-05,
"loss": 0.8607,
"step": 55840
},
{
"epoch": 0.89376,
"grad_norm": 0.2257709503173828,
"learning_rate": 2.1312000000000002e-05,
"loss": 0.8979,
"step": 55860
},
{
"epoch": 0.89408,
"grad_norm": 0.20789384841918945,
"learning_rate": 2.1248e-05,
"loss": 0.8385,
"step": 55880
},
{
"epoch": 0.8944,
"grad_norm": 0.22959135472774506,
"learning_rate": 2.1184000000000002e-05,
"loss": 0.866,
"step": 55900
},
{
"epoch": 0.89472,
"grad_norm": 0.21270857751369476,
"learning_rate": 2.112e-05,
"loss": 0.8747,
"step": 55920
},
{
"epoch": 0.89504,
"grad_norm": 0.22662252187728882,
"learning_rate": 2.1056e-05,
"loss": 0.8836,
"step": 55940
},
{
"epoch": 0.89536,
"grad_norm": 0.22201156616210938,
"learning_rate": 2.0992e-05,
"loss": 0.924,
"step": 55960
},
{
"epoch": 0.89568,
"grad_norm": 0.21177421510219574,
"learning_rate": 2.0928e-05,
"loss": 0.8794,
"step": 55980
},
{
"epoch": 0.896,
"grad_norm": 0.23896725475788116,
"learning_rate": 2.0864e-05,
"loss": 0.8967,
"step": 56000
},
{
"epoch": 0.89632,
"grad_norm": 0.21801073849201202,
"learning_rate": 2.08e-05,
"loss": 0.9033,
"step": 56020
},
{
"epoch": 0.89664,
"grad_norm": 0.19565339386463165,
"learning_rate": 2.0736e-05,
"loss": 0.8857,
"step": 56040
},
{
"epoch": 0.89696,
"grad_norm": 0.21768838167190552,
"learning_rate": 2.0672e-05,
"loss": 0.8463,
"step": 56060
},
{
"epoch": 0.89728,
"grad_norm": 0.20696651935577393,
"learning_rate": 2.0608000000000003e-05,
"loss": 0.8725,
"step": 56080
},
{
"epoch": 0.8976,
"grad_norm": 0.2298385500907898,
"learning_rate": 2.0544000000000002e-05,
"loss": 0.902,
"step": 56100
},
{
"epoch": 0.89792,
"grad_norm": 0.22712761163711548,
"learning_rate": 2.048e-05,
"loss": 0.8109,
"step": 56120
},
{
"epoch": 0.89824,
"grad_norm": 0.23746636509895325,
"learning_rate": 2.0416000000000002e-05,
"loss": 0.9398,
"step": 56140
},
{
"epoch": 0.89856,
"grad_norm": 0.2161879539489746,
"learning_rate": 2.0352e-05,
"loss": 0.9345,
"step": 56160
},
{
"epoch": 0.89888,
"grad_norm": 0.18641656637191772,
"learning_rate": 2.0288000000000003e-05,
"loss": 0.8868,
"step": 56180
},
{
"epoch": 0.8992,
"grad_norm": 0.21114569902420044,
"learning_rate": 2.0224e-05,
"loss": 0.8638,
"step": 56200
},
{
"epoch": 0.89952,
"grad_norm": 0.2475218027830124,
"learning_rate": 2.016e-05,
"loss": 0.8695,
"step": 56220
},
{
"epoch": 0.89984,
"grad_norm": 0.2249087691307068,
"learning_rate": 2.0096000000000002e-05,
"loss": 0.8671,
"step": 56240
},
{
"epoch": 0.90016,
"grad_norm": 0.23106272518634796,
"learning_rate": 2.0032e-05,
"loss": 0.8591,
"step": 56260
},
{
"epoch": 0.90048,
"grad_norm": 0.23417605459690094,
"learning_rate": 1.9968e-05,
"loss": 0.8675,
"step": 56280
},
{
"epoch": 0.9008,
"grad_norm": 0.25275781750679016,
"learning_rate": 1.9904e-05,
"loss": 0.9146,
"step": 56300
},
{
"epoch": 0.90112,
"grad_norm": 0.2217005491256714,
"learning_rate": 1.984e-05,
"loss": 0.9167,
"step": 56320
},
{
"epoch": 0.90144,
"grad_norm": 0.2137831449508667,
"learning_rate": 1.9776000000000002e-05,
"loss": 0.8699,
"step": 56340
},
{
"epoch": 0.90176,
"grad_norm": 0.2010769098997116,
"learning_rate": 1.9712e-05,
"loss": 0.8862,
"step": 56360
},
{
"epoch": 0.90208,
"grad_norm": 0.23932960629463196,
"learning_rate": 1.9648e-05,
"loss": 0.866,
"step": 56380
},
{
"epoch": 0.9024,
"grad_norm": 0.22078342735767365,
"learning_rate": 1.9584e-05,
"loss": 0.914,
"step": 56400
},
{
"epoch": 0.90272,
"grad_norm": 0.2117830365896225,
"learning_rate": 1.9520000000000003e-05,
"loss": 0.8706,
"step": 56420
},
{
"epoch": 0.90304,
"grad_norm": 0.22050690650939941,
"learning_rate": 1.9456e-05,
"loss": 0.8842,
"step": 56440
},
{
"epoch": 0.90336,
"grad_norm": 0.21533265709877014,
"learning_rate": 1.9392000000000003e-05,
"loss": 0.8478,
"step": 56460
},
{
"epoch": 0.90368,
"grad_norm": 0.23256616294384003,
"learning_rate": 1.9328000000000002e-05,
"loss": 0.8912,
"step": 56480
},
{
"epoch": 0.904,
"grad_norm": 0.24610291421413422,
"learning_rate": 1.9264e-05,
"loss": 0.8493,
"step": 56500
},
{
"epoch": 0.90432,
"grad_norm": 0.2145715057849884,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.8626,
"step": 56520
},
{
"epoch": 0.90464,
"grad_norm": 0.21198943257331848,
"learning_rate": 1.91392e-05,
"loss": 0.9013,
"step": 56540
},
{
"epoch": 0.90496,
"grad_norm": 0.2638920843601227,
"learning_rate": 1.90752e-05,
"loss": 0.9176,
"step": 56560
},
{
"epoch": 0.90528,
"grad_norm": 0.17651067674160004,
"learning_rate": 1.9011200000000002e-05,
"loss": 0.9293,
"step": 56580
},
{
"epoch": 0.9056,
"grad_norm": 0.20234732329845428,
"learning_rate": 1.89472e-05,
"loss": 0.8781,
"step": 56600
},
{
"epoch": 0.90592,
"grad_norm": 0.2263152003288269,
"learning_rate": 1.8883200000000002e-05,
"loss": 0.9071,
"step": 56620
},
{
"epoch": 0.90624,
"grad_norm": 0.2451239675283432,
"learning_rate": 1.88192e-05,
"loss": 0.9174,
"step": 56640
},
{
"epoch": 0.90656,
"grad_norm": 0.22056809067726135,
"learning_rate": 1.87552e-05,
"loss": 0.9132,
"step": 56660
},
{
"epoch": 0.90688,
"grad_norm": 0.2163904458284378,
"learning_rate": 1.86912e-05,
"loss": 0.9208,
"step": 56680
},
{
"epoch": 0.9072,
"grad_norm": 0.23965485394001007,
"learning_rate": 1.86272e-05,
"loss": 0.8778,
"step": 56700
},
{
"epoch": 0.90752,
"grad_norm": 0.2319948375225067,
"learning_rate": 1.85632e-05,
"loss": 0.8673,
"step": 56720
},
{
"epoch": 0.90784,
"grad_norm": 0.17709515988826752,
"learning_rate": 1.84992e-05,
"loss": 0.8635,
"step": 56740
},
{
"epoch": 0.90816,
"grad_norm": 0.22696132957935333,
"learning_rate": 1.84352e-05,
"loss": 0.8179,
"step": 56760
},
{
"epoch": 0.90848,
"grad_norm": 0.25801581144332886,
"learning_rate": 1.8371199999999998e-05,
"loss": 0.9357,
"step": 56780
},
{
"epoch": 0.9088,
"grad_norm": 0.20928123593330383,
"learning_rate": 1.8307200000000003e-05,
"loss": 0.8228,
"step": 56800
},
{
"epoch": 0.90912,
"grad_norm": 0.18093565106391907,
"learning_rate": 1.8243200000000002e-05,
"loss": 0.8613,
"step": 56820
},
{
"epoch": 0.90944,
"grad_norm": 0.21541033685207367,
"learning_rate": 1.81792e-05,
"loss": 0.8624,
"step": 56840
},
{
"epoch": 0.90976,
"grad_norm": 0.1991436630487442,
"learning_rate": 1.8115200000000002e-05,
"loss": 0.8876,
"step": 56860
},
{
"epoch": 0.91008,
"grad_norm": 0.2240942418575287,
"learning_rate": 1.80512e-05,
"loss": 0.9089,
"step": 56880
},
{
"epoch": 0.9104,
"grad_norm": 0.2239503711462021,
"learning_rate": 1.7987200000000003e-05,
"loss": 0.8968,
"step": 56900
},
{
"epoch": 0.91072,
"grad_norm": 0.20320284366607666,
"learning_rate": 1.79232e-05,
"loss": 0.8932,
"step": 56920
},
{
"epoch": 0.91104,
"grad_norm": 0.2082601636648178,
"learning_rate": 1.78592e-05,
"loss": 0.8576,
"step": 56940
},
{
"epoch": 0.91136,
"grad_norm": 0.24213466048240662,
"learning_rate": 1.7795200000000002e-05,
"loss": 0.9186,
"step": 56960
},
{
"epoch": 0.91168,
"grad_norm": 0.21737167239189148,
"learning_rate": 1.77312e-05,
"loss": 0.8754,
"step": 56980
},
{
"epoch": 0.912,
"grad_norm": 0.22130608558654785,
"learning_rate": 1.76672e-05,
"loss": 0.8952,
"step": 57000
},
{
"epoch": 0.91232,
"grad_norm": 0.24348904192447662,
"learning_rate": 1.76032e-05,
"loss": 0.8855,
"step": 57020
},
{
"epoch": 0.91264,
"grad_norm": 0.2024615854024887,
"learning_rate": 1.75392e-05,
"loss": 0.8279,
"step": 57040
},
{
"epoch": 0.91296,
"grad_norm": 0.23168401420116425,
"learning_rate": 1.7475199999999998e-05,
"loss": 0.833,
"step": 57060
},
{
"epoch": 0.91328,
"grad_norm": 0.2201065719127655,
"learning_rate": 1.74112e-05,
"loss": 0.9255,
"step": 57080
},
{
"epoch": 0.9136,
"grad_norm": 0.22319328784942627,
"learning_rate": 1.73472e-05,
"loss": 0.8326,
"step": 57100
},
{
"epoch": 0.91392,
"grad_norm": 0.1963931918144226,
"learning_rate": 1.72832e-05,
"loss": 0.8416,
"step": 57120
},
{
"epoch": 0.91424,
"grad_norm": 0.23881016671657562,
"learning_rate": 1.7219200000000003e-05,
"loss": 0.8786,
"step": 57140
},
{
"epoch": 0.91456,
"grad_norm": 0.20957258343696594,
"learning_rate": 1.71552e-05,
"loss": 0.8972,
"step": 57160
},
{
"epoch": 0.91488,
"grad_norm": 0.25064921379089355,
"learning_rate": 1.7091200000000003e-05,
"loss": 0.8594,
"step": 57180
},
{
"epoch": 0.9152,
"grad_norm": 0.22620342671871185,
"learning_rate": 1.7027200000000002e-05,
"loss": 0.855,
"step": 57200
},
{
"epoch": 0.91552,
"grad_norm": 0.22465457022190094,
"learning_rate": 1.69632e-05,
"loss": 0.8507,
"step": 57220
},
{
"epoch": 0.91584,
"grad_norm": 0.33410021662712097,
"learning_rate": 1.6899200000000002e-05,
"loss": 0.8608,
"step": 57240
},
{
"epoch": 0.91616,
"grad_norm": 0.20130544900894165,
"learning_rate": 1.68352e-05,
"loss": 0.8672,
"step": 57260
},
{
"epoch": 0.91648,
"grad_norm": 0.2350778877735138,
"learning_rate": 1.67712e-05,
"loss": 0.8942,
"step": 57280
},
{
"epoch": 0.9168,
"grad_norm": 0.21875150501728058,
"learning_rate": 1.67072e-05,
"loss": 0.9244,
"step": 57300
},
{
"epoch": 0.91712,
"grad_norm": 0.18828479945659637,
"learning_rate": 1.66432e-05,
"loss": 0.9132,
"step": 57320
},
{
"epoch": 0.91744,
"grad_norm": 0.19180278480052948,
"learning_rate": 1.65792e-05,
"loss": 0.8833,
"step": 57340
},
{
"epoch": 0.91776,
"grad_norm": 0.2215159833431244,
"learning_rate": 1.65152e-05,
"loss": 0.909,
"step": 57360
},
{
"epoch": 0.91808,
"grad_norm": 0.19516055285930634,
"learning_rate": 1.64512e-05,
"loss": 0.801,
"step": 57380
},
{
"epoch": 0.9184,
"grad_norm": 0.19184818863868713,
"learning_rate": 1.63872e-05,
"loss": 0.8942,
"step": 57400
},
{
"epoch": 0.91872,
"grad_norm": 0.21622811257839203,
"learning_rate": 1.63232e-05,
"loss": 0.8772,
"step": 57420
},
{
"epoch": 0.91904,
"grad_norm": 0.19087673723697662,
"learning_rate": 1.62592e-05,
"loss": 0.8736,
"step": 57440
},
{
"epoch": 0.91936,
"grad_norm": 0.21483179926872253,
"learning_rate": 1.61952e-05,
"loss": 0.8696,
"step": 57460
},
{
"epoch": 0.91968,
"grad_norm": 0.18231695890426636,
"learning_rate": 1.6131200000000002e-05,
"loss": 0.8429,
"step": 57480
},
{
"epoch": 0.92,
"grad_norm": 0.24777543544769287,
"learning_rate": 1.60672e-05,
"loss": 0.9207,
"step": 57500
},
{
"epoch": 0.92032,
"grad_norm": 0.21233485639095306,
"learning_rate": 1.6003200000000003e-05,
"loss": 0.9358,
"step": 57520
},
{
"epoch": 0.92064,
"grad_norm": 0.257861465215683,
"learning_rate": 1.59392e-05,
"loss": 0.9123,
"step": 57540
},
{
"epoch": 0.92096,
"grad_norm": 0.18959666788578033,
"learning_rate": 1.58752e-05,
"loss": 0.8834,
"step": 57560
},
{
"epoch": 0.92128,
"grad_norm": 0.22325819730758667,
"learning_rate": 1.5811200000000002e-05,
"loss": 0.8358,
"step": 57580
},
{
"epoch": 0.9216,
"grad_norm": 0.25303423404693604,
"learning_rate": 1.57472e-05,
"loss": 0.8783,
"step": 57600
},
{
"epoch": 0.92192,
"grad_norm": 0.18842332065105438,
"learning_rate": 1.56832e-05,
"loss": 0.8752,
"step": 57620
},
{
"epoch": 0.92224,
"grad_norm": 0.20843419432640076,
"learning_rate": 1.56192e-05,
"loss": 0.8911,
"step": 57640
},
{
"epoch": 0.92256,
"grad_norm": 0.2596145570278168,
"learning_rate": 1.55552e-05,
"loss": 0.8641,
"step": 57660
},
{
"epoch": 0.92288,
"grad_norm": 0.22827741503715515,
"learning_rate": 1.5491200000000002e-05,
"loss": 0.8859,
"step": 57680
},
{
"epoch": 0.9232,
"grad_norm": 0.21393606066703796,
"learning_rate": 1.54272e-05,
"loss": 0.9042,
"step": 57700
},
{
"epoch": 0.92352,
"grad_norm": 0.1834941953420639,
"learning_rate": 1.53632e-05,
"loss": 0.8484,
"step": 57720
},
{
"epoch": 0.92384,
"grad_norm": 0.21815276145935059,
"learning_rate": 1.52992e-05,
"loss": 0.9284,
"step": 57740
},
{
"epoch": 0.92416,
"grad_norm": 0.20257651805877686,
"learning_rate": 1.52352e-05,
"loss": 0.8286,
"step": 57760
},
{
"epoch": 0.92448,
"grad_norm": 0.258215993642807,
"learning_rate": 1.5171200000000001e-05,
"loss": 0.9197,
"step": 57780
},
{
"epoch": 0.9248,
"grad_norm": 0.21582092344760895,
"learning_rate": 1.5107200000000002e-05,
"loss": 0.8859,
"step": 57800
},
{
"epoch": 0.92512,
"grad_norm": 0.2228287011384964,
"learning_rate": 1.5043200000000002e-05,
"loss": 0.8918,
"step": 57820
},
{
"epoch": 0.92544,
"grad_norm": 0.22294846177101135,
"learning_rate": 1.4979200000000002e-05,
"loss": 0.9276,
"step": 57840
},
{
"epoch": 0.92576,
"grad_norm": 0.19540980458259583,
"learning_rate": 1.49152e-05,
"loss": 0.8801,
"step": 57860
},
{
"epoch": 0.92608,
"grad_norm": 0.19074346125125885,
"learning_rate": 1.4851200000000001e-05,
"loss": 0.8475,
"step": 57880
},
{
"epoch": 0.9264,
"grad_norm": 0.19179295003414154,
"learning_rate": 1.4787200000000001e-05,
"loss": 0.9089,
"step": 57900
},
{
"epoch": 0.92672,
"grad_norm": 0.20641054213047028,
"learning_rate": 1.4723200000000002e-05,
"loss": 0.8313,
"step": 57920
},
{
"epoch": 0.92704,
"grad_norm": 0.19102010130882263,
"learning_rate": 1.46592e-05,
"loss": 0.8915,
"step": 57940
},
{
"epoch": 0.92736,
"grad_norm": 0.1892133504152298,
"learning_rate": 1.45952e-05,
"loss": 0.8708,
"step": 57960
},
{
"epoch": 0.92768,
"grad_norm": 0.21764519810676575,
"learning_rate": 1.45312e-05,
"loss": 0.9107,
"step": 57980
},
{
"epoch": 0.928,
"grad_norm": 0.2371160387992859,
"learning_rate": 1.4467200000000001e-05,
"loss": 0.9397,
"step": 58000
},
{
"epoch": 0.92832,
"grad_norm": 0.2032940536737442,
"learning_rate": 1.44032e-05,
"loss": 0.8406,
"step": 58020
},
{
"epoch": 0.92864,
"grad_norm": 0.20223209261894226,
"learning_rate": 1.43392e-05,
"loss": 0.8387,
"step": 58040
},
{
"epoch": 0.92896,
"grad_norm": 0.19026170670986176,
"learning_rate": 1.42752e-05,
"loss": 0.8826,
"step": 58060
},
{
"epoch": 0.92928,
"grad_norm": 0.23420760035514832,
"learning_rate": 1.4211199999999999e-05,
"loss": 0.8779,
"step": 58080
},
{
"epoch": 0.9296,
"grad_norm": 0.19909054040908813,
"learning_rate": 1.4147199999999999e-05,
"loss": 0.8931,
"step": 58100
},
{
"epoch": 0.92992,
"grad_norm": 0.23734207451343536,
"learning_rate": 1.4083200000000003e-05,
"loss": 0.8522,
"step": 58120
},
{
"epoch": 0.93024,
"grad_norm": 0.1749676764011383,
"learning_rate": 1.4019200000000001e-05,
"loss": 0.8714,
"step": 58140
},
{
"epoch": 0.93056,
"grad_norm": 0.21267655491828918,
"learning_rate": 1.3955200000000002e-05,
"loss": 0.8442,
"step": 58160
},
{
"epoch": 0.93088,
"grad_norm": 0.25414636731147766,
"learning_rate": 1.3891200000000002e-05,
"loss": 0.9268,
"step": 58180
},
{
"epoch": 0.9312,
"grad_norm": 0.20942141115665436,
"learning_rate": 1.3827200000000002e-05,
"loss": 0.8509,
"step": 58200
},
{
"epoch": 0.93152,
"grad_norm": 0.2121083289384842,
"learning_rate": 1.37632e-05,
"loss": 0.8751,
"step": 58220
},
{
"epoch": 0.93184,
"grad_norm": 0.21758875250816345,
"learning_rate": 1.3699200000000001e-05,
"loss": 0.8875,
"step": 58240
},
{
"epoch": 0.93216,
"grad_norm": 0.188105046749115,
"learning_rate": 1.3635200000000001e-05,
"loss": 0.8876,
"step": 58260
},
{
"epoch": 0.93248,
"grad_norm": 0.18917614221572876,
"learning_rate": 1.35712e-05,
"loss": 0.8453,
"step": 58280
},
{
"epoch": 0.9328,
"grad_norm": 0.19364288449287415,
"learning_rate": 1.35072e-05,
"loss": 0.829,
"step": 58300
},
{
"epoch": 0.93312,
"grad_norm": 0.19416366517543793,
"learning_rate": 1.34432e-05,
"loss": 0.8625,
"step": 58320
},
{
"epoch": 0.93344,
"grad_norm": 0.2294871211051941,
"learning_rate": 1.33792e-05,
"loss": 0.8696,
"step": 58340
},
{
"epoch": 0.93376,
"grad_norm": 0.21654824912548065,
"learning_rate": 1.33152e-05,
"loss": 0.8733,
"step": 58360
},
{
"epoch": 0.93408,
"grad_norm": 0.2053345888853073,
"learning_rate": 1.32512e-05,
"loss": 0.9046,
"step": 58380
},
{
"epoch": 0.9344,
"grad_norm": 0.18881118297576904,
"learning_rate": 1.31872e-05,
"loss": 0.8252,
"step": 58400
},
{
"epoch": 0.93472,
"grad_norm": 0.23716068267822266,
"learning_rate": 1.31232e-05,
"loss": 0.8928,
"step": 58420
},
{
"epoch": 0.93504,
"grad_norm": 0.19801940023899078,
"learning_rate": 1.3059200000000002e-05,
"loss": 0.8692,
"step": 58440
},
{
"epoch": 0.93536,
"grad_norm": 0.2585828900337219,
"learning_rate": 1.2995200000000002e-05,
"loss": 0.9274,
"step": 58460
},
{
"epoch": 0.93568,
"grad_norm": 0.31189459562301636,
"learning_rate": 1.29312e-05,
"loss": 0.8347,
"step": 58480
},
{
"epoch": 0.936,
"grad_norm": 0.24190790951251984,
"learning_rate": 1.2867200000000001e-05,
"loss": 0.8476,
"step": 58500
},
{
"epoch": 0.93632,
"grad_norm": 0.2098657637834549,
"learning_rate": 1.2803200000000001e-05,
"loss": 0.8735,
"step": 58520
},
{
"epoch": 0.93664,
"grad_norm": 0.2108910232782364,
"learning_rate": 1.2739200000000002e-05,
"loss": 0.874,
"step": 58540
},
{
"epoch": 0.93696,
"grad_norm": 0.2129533439874649,
"learning_rate": 1.26752e-05,
"loss": 0.8792,
"step": 58560
},
{
"epoch": 0.93728,
"grad_norm": 0.20992882549762726,
"learning_rate": 1.26112e-05,
"loss": 0.9094,
"step": 58580
},
{
"epoch": 0.9376,
"grad_norm": 0.24041472375392914,
"learning_rate": 1.25472e-05,
"loss": 0.927,
"step": 58600
},
{
"epoch": 0.93792,
"grad_norm": 0.20306578278541565,
"learning_rate": 1.2483200000000001e-05,
"loss": 0.8776,
"step": 58620
},
{
"epoch": 0.93824,
"grad_norm": 0.22501681745052338,
"learning_rate": 1.2422400000000002e-05,
"loss": 0.9315,
"step": 58640
},
{
"epoch": 0.93856,
"grad_norm": 0.23260138928890228,
"learning_rate": 1.23584e-05,
"loss": 0.8216,
"step": 58660
},
{
"epoch": 0.93888,
"grad_norm": 0.18364718556404114,
"learning_rate": 1.22944e-05,
"loss": 0.8805,
"step": 58680
},
{
"epoch": 0.9392,
"grad_norm": 0.23733025789260864,
"learning_rate": 1.22304e-05,
"loss": 0.8966,
"step": 58700
},
{
"epoch": 0.93952,
"grad_norm": 0.19902455806732178,
"learning_rate": 1.21664e-05,
"loss": 0.9097,
"step": 58720
},
{
"epoch": 0.93984,
"grad_norm": 0.25381171703338623,
"learning_rate": 1.2102400000000001e-05,
"loss": 0.9078,
"step": 58740
},
{
"epoch": 0.94016,
"grad_norm": 0.22020556032657623,
"learning_rate": 1.2038400000000001e-05,
"loss": 0.8428,
"step": 58760
},
{
"epoch": 0.94048,
"grad_norm": 0.21431581676006317,
"learning_rate": 1.19744e-05,
"loss": 0.8228,
"step": 58780
},
{
"epoch": 0.9408,
"grad_norm": 0.24102531373500824,
"learning_rate": 1.19104e-05,
"loss": 0.886,
"step": 58800
},
{
"epoch": 0.94112,
"grad_norm": 0.20612405240535736,
"learning_rate": 1.18464e-05,
"loss": 0.8438,
"step": 58820
},
{
"epoch": 0.94144,
"grad_norm": 0.2482701539993286,
"learning_rate": 1.1782400000000001e-05,
"loss": 0.9063,
"step": 58840
},
{
"epoch": 0.94176,
"grad_norm": 0.2273882031440735,
"learning_rate": 1.17184e-05,
"loss": 0.8267,
"step": 58860
},
{
"epoch": 0.94208,
"grad_norm": 0.19269466400146484,
"learning_rate": 1.16544e-05,
"loss": 0.8889,
"step": 58880
},
{
"epoch": 0.9424,
"grad_norm": 0.24158763885498047,
"learning_rate": 1.15904e-05,
"loss": 0.8983,
"step": 58900
},
{
"epoch": 0.94272,
"grad_norm": 0.18431192636489868,
"learning_rate": 1.15264e-05,
"loss": 0.8202,
"step": 58920
},
{
"epoch": 0.94304,
"grad_norm": 0.2194778025150299,
"learning_rate": 1.14624e-05,
"loss": 0.8591,
"step": 58940
},
{
"epoch": 0.94336,
"grad_norm": 0.2087930589914322,
"learning_rate": 1.13984e-05,
"loss": 0.882,
"step": 58960
},
{
"epoch": 0.94368,
"grad_norm": 0.20091105997562408,
"learning_rate": 1.1334400000000001e-05,
"loss": 0.8729,
"step": 58980
},
{
"epoch": 0.944,
"grad_norm": 0.22069287300109863,
"learning_rate": 1.12704e-05,
"loss": 0.8489,
"step": 59000
},
{
"epoch": 0.94432,
"grad_norm": 0.19174374639987946,
"learning_rate": 1.12064e-05,
"loss": 0.8284,
"step": 59020
},
{
"epoch": 0.94464,
"grad_norm": 0.20969411730766296,
"learning_rate": 1.11424e-05,
"loss": 0.8967,
"step": 59040
},
{
"epoch": 0.94496,
"grad_norm": 0.21621152758598328,
"learning_rate": 1.10784e-05,
"loss": 0.8646,
"step": 59060
},
{
"epoch": 0.94528,
"grad_norm": 0.2245556265115738,
"learning_rate": 1.10144e-05,
"loss": 0.961,
"step": 59080
},
{
"epoch": 0.9456,
"grad_norm": 0.2487545758485794,
"learning_rate": 1.0950400000000001e-05,
"loss": 0.9026,
"step": 59100
},
{
"epoch": 0.94592,
"grad_norm": 0.2444257289171219,
"learning_rate": 1.0886400000000001e-05,
"loss": 0.861,
"step": 59120
},
{
"epoch": 0.94624,
"grad_norm": 0.19312791526317596,
"learning_rate": 1.08224e-05,
"loss": 0.8604,
"step": 59140
},
{
"epoch": 0.94656,
"grad_norm": 0.20798088610172272,
"learning_rate": 1.07584e-05,
"loss": 0.8886,
"step": 59160
},
{
"epoch": 0.94688,
"grad_norm": 0.22413522005081177,
"learning_rate": 1.06944e-05,
"loss": 0.8656,
"step": 59180
},
{
"epoch": 0.9472,
"grad_norm": 0.19229187071323395,
"learning_rate": 1.06304e-05,
"loss": 0.8742,
"step": 59200
},
{
"epoch": 0.94752,
"grad_norm": 0.21749311685562134,
"learning_rate": 1.05664e-05,
"loss": 0.9038,
"step": 59220
},
{
"epoch": 0.94784,
"grad_norm": 0.21696510910987854,
"learning_rate": 1.0502400000000001e-05,
"loss": 0.8729,
"step": 59240
},
{
"epoch": 0.94816,
"grad_norm": 0.21962523460388184,
"learning_rate": 1.0438400000000002e-05,
"loss": 0.9075,
"step": 59260
},
{
"epoch": 0.94848,
"grad_norm": 0.2188694030046463,
"learning_rate": 1.03744e-05,
"loss": 0.8401,
"step": 59280
},
{
"epoch": 0.9488,
"grad_norm": 0.17384332418441772,
"learning_rate": 1.03104e-05,
"loss": 0.8886,
"step": 59300
},
{
"epoch": 0.94912,
"grad_norm": 0.18422313034534454,
"learning_rate": 1.02464e-05,
"loss": 0.9251,
"step": 59320
},
{
"epoch": 0.94944,
"grad_norm": 0.2241748720407486,
"learning_rate": 1.0182400000000001e-05,
"loss": 0.8966,
"step": 59340
},
{
"epoch": 0.94976,
"grad_norm": 0.2653813660144806,
"learning_rate": 1.01184e-05,
"loss": 0.853,
"step": 59360
},
{
"epoch": 0.95008,
"grad_norm": 0.2018147110939026,
"learning_rate": 1.00544e-05,
"loss": 0.8624,
"step": 59380
},
{
"epoch": 0.9504,
"grad_norm": 0.23722539842128754,
"learning_rate": 9.990400000000002e-06,
"loss": 0.943,
"step": 59400
},
{
"epoch": 0.95072,
"grad_norm": 0.2132965326309204,
"learning_rate": 9.9264e-06,
"loss": 0.8511,
"step": 59420
},
{
"epoch": 0.95104,
"grad_norm": 0.19337671995162964,
"learning_rate": 9.8624e-06,
"loss": 0.8419,
"step": 59440
},
{
"epoch": 0.95136,
"grad_norm": 0.1993594765663147,
"learning_rate": 9.798400000000001e-06,
"loss": 0.8809,
"step": 59460
},
{
"epoch": 0.95168,
"grad_norm": 0.20051348209381104,
"learning_rate": 9.734400000000001e-06,
"loss": 0.8246,
"step": 59480
},
{
"epoch": 0.952,
"grad_norm": 0.2141229212284088,
"learning_rate": 9.6704e-06,
"loss": 0.8575,
"step": 59500
},
{
"epoch": 0.95232,
"grad_norm": 0.20707592368125916,
"learning_rate": 9.6096e-06,
"loss": 0.8953,
"step": 59520
},
{
"epoch": 0.95264,
"grad_norm": 0.2202858179807663,
"learning_rate": 9.5456e-06,
"loss": 0.893,
"step": 59540
},
{
"epoch": 0.95296,
"grad_norm": 0.22861933708190918,
"learning_rate": 9.4816e-06,
"loss": 0.8534,
"step": 59560
},
{
"epoch": 0.95328,
"grad_norm": 0.2285875827074051,
"learning_rate": 9.4176e-06,
"loss": 0.8686,
"step": 59580
},
{
"epoch": 0.9536,
"grad_norm": 0.19898247718811035,
"learning_rate": 9.3536e-06,
"loss": 0.8573,
"step": 59600
},
{
"epoch": 0.95392,
"grad_norm": 0.19063091278076172,
"learning_rate": 9.289600000000002e-06,
"loss": 0.8889,
"step": 59620
},
{
"epoch": 0.95424,
"grad_norm": 0.2302456945180893,
"learning_rate": 9.2256e-06,
"loss": 0.8927,
"step": 59640
},
{
"epoch": 0.95456,
"grad_norm": 0.23316220939159393,
"learning_rate": 9.1616e-06,
"loss": 0.8461,
"step": 59660
},
{
"epoch": 0.95488,
"grad_norm": 0.21499593555927277,
"learning_rate": 9.0976e-06,
"loss": 0.847,
"step": 59680
},
{
"epoch": 0.9552,
"grad_norm": 0.2145620733499527,
"learning_rate": 9.033600000000001e-06,
"loss": 0.9328,
"step": 59700
},
{
"epoch": 0.95552,
"grad_norm": 0.22874926030635834,
"learning_rate": 8.9696e-06,
"loss": 0.8883,
"step": 59720
},
{
"epoch": 0.95584,
"grad_norm": 0.2054702192544937,
"learning_rate": 8.9056e-06,
"loss": 0.9,
"step": 59740
},
{
"epoch": 0.95616,
"grad_norm": 0.24557848274707794,
"learning_rate": 8.8416e-06,
"loss": 0.8828,
"step": 59760
},
{
"epoch": 0.95648,
"grad_norm": 0.21342433989048004,
"learning_rate": 8.7776e-06,
"loss": 0.8664,
"step": 59780
},
{
"epoch": 0.9568,
"grad_norm": 0.21812336146831512,
"learning_rate": 8.7136e-06,
"loss": 0.8613,
"step": 59800
},
{
"epoch": 0.95712,
"grad_norm": 0.21000798046588898,
"learning_rate": 8.649600000000001e-06,
"loss": 0.8553,
"step": 59820
},
{
"epoch": 0.95744,
"grad_norm": 0.23609398305416107,
"learning_rate": 8.585600000000001e-06,
"loss": 0.8952,
"step": 59840
},
{
"epoch": 0.95776,
"grad_norm": 0.23479294776916504,
"learning_rate": 8.5216e-06,
"loss": 0.9345,
"step": 59860
},
{
"epoch": 0.95808,
"grad_norm": 0.1910203993320465,
"learning_rate": 8.4576e-06,
"loss": 0.891,
"step": 59880
},
{
"epoch": 0.9584,
"grad_norm": 0.21888737380504608,
"learning_rate": 8.3936e-06,
"loss": 0.8615,
"step": 59900
},
{
"epoch": 0.95872,
"grad_norm": 0.2583872973918915,
"learning_rate": 8.3296e-06,
"loss": 0.8452,
"step": 59920
},
{
"epoch": 0.95904,
"grad_norm": 0.21396338939666748,
"learning_rate": 8.2656e-06,
"loss": 0.8582,
"step": 59940
},
{
"epoch": 0.95936,
"grad_norm": 0.18018275499343872,
"learning_rate": 8.201600000000001e-06,
"loss": 0.9053,
"step": 59960
},
{
"epoch": 0.95968,
"grad_norm": 0.24706120789051056,
"learning_rate": 8.137600000000002e-06,
"loss": 0.8794,
"step": 59980
},
{
"epoch": 0.96,
"grad_norm": 0.19996990263462067,
"learning_rate": 8.0736e-06,
"loss": 0.9093,
"step": 60000
},
{
"epoch": 0.96032,
"grad_norm": 0.1947757452726364,
"learning_rate": 8.0096e-06,
"loss": 0.871,
"step": 60020
},
{
"epoch": 0.96064,
"grad_norm": 0.19846396148204803,
"learning_rate": 7.9456e-06,
"loss": 0.8555,
"step": 60040
},
{
"epoch": 0.96096,
"grad_norm": 0.2101791501045227,
"learning_rate": 7.881600000000001e-06,
"loss": 0.8805,
"step": 60060
},
{
"epoch": 0.96128,
"grad_norm": 0.2531636953353882,
"learning_rate": 7.8176e-06,
"loss": 0.8721,
"step": 60080
},
{
"epoch": 0.9616,
"grad_norm": 0.21089531481266022,
"learning_rate": 7.7536e-06,
"loss": 0.8266,
"step": 60100
},
{
"epoch": 0.96192,
"grad_norm": 0.16842779517173767,
"learning_rate": 7.689600000000002e-06,
"loss": 0.8531,
"step": 60120
},
{
"epoch": 0.96224,
"grad_norm": 0.21880729496479034,
"learning_rate": 7.625600000000001e-06,
"loss": 0.9024,
"step": 60140
},
{
"epoch": 0.96256,
"grad_norm": 0.196882426738739,
"learning_rate": 7.561600000000001e-06,
"loss": 0.8672,
"step": 60160
},
{
"epoch": 0.96288,
"grad_norm": 0.16088998317718506,
"learning_rate": 7.497600000000001e-06,
"loss": 0.8431,
"step": 60180
},
{
"epoch": 0.9632,
"grad_norm": 0.19500739872455597,
"learning_rate": 7.4336e-06,
"loss": 0.8772,
"step": 60200
},
{
"epoch": 0.96352,
"grad_norm": 0.22621487081050873,
"learning_rate": 7.3696e-06,
"loss": 0.89,
"step": 60220
},
{
"epoch": 0.96384,
"grad_norm": 0.20538878440856934,
"learning_rate": 7.3056e-06,
"loss": 0.9046,
"step": 60240
},
{
"epoch": 0.96416,
"grad_norm": 0.21844108402729034,
"learning_rate": 7.241599999999999e-06,
"loss": 0.8146,
"step": 60260
},
{
"epoch": 0.96448,
"grad_norm": 0.19265195727348328,
"learning_rate": 7.177600000000001e-06,
"loss": 0.919,
"step": 60280
},
{
"epoch": 0.9648,
"grad_norm": 0.2025534063577652,
"learning_rate": 7.113600000000001e-06,
"loss": 0.8966,
"step": 60300
},
{
"epoch": 0.96512,
"grad_norm": 0.2134266346693039,
"learning_rate": 7.0496e-06,
"loss": 0.91,
"step": 60320
},
{
"epoch": 0.96544,
"grad_norm": 0.25129690766334534,
"learning_rate": 6.9856000000000005e-06,
"loss": 0.8821,
"step": 60340
},
{
"epoch": 0.96576,
"grad_norm": 0.23355615139007568,
"learning_rate": 6.9216e-06,
"loss": 0.8755,
"step": 60360
},
{
"epoch": 0.96608,
"grad_norm": 0.22555451095104218,
"learning_rate": 6.8576e-06,
"loss": 0.8959,
"step": 60380
},
{
"epoch": 0.9664,
"grad_norm": 0.23309843242168427,
"learning_rate": 6.7936e-06,
"loss": 0.8482,
"step": 60400
},
{
"epoch": 0.96672,
"grad_norm": 0.2550283372402191,
"learning_rate": 6.7296e-06,
"loss": 0.8756,
"step": 60420
},
{
"epoch": 0.96704,
"grad_norm": 0.24770893156528473,
"learning_rate": 6.665600000000001e-06,
"loss": 0.8266,
"step": 60440
},
{
"epoch": 0.96736,
"grad_norm": 0.19504638016223907,
"learning_rate": 6.6016000000000005e-06,
"loss": 0.8924,
"step": 60460
},
{
"epoch": 0.96768,
"grad_norm": 0.1870088279247284,
"learning_rate": 6.537600000000001e-06,
"loss": 0.8819,
"step": 60480
},
{
"epoch": 0.968,
"grad_norm": 0.19606348872184753,
"learning_rate": 6.4736e-06,
"loss": 0.8583,
"step": 60500
},
{
"epoch": 0.96832,
"grad_norm": 0.21062546968460083,
"learning_rate": 6.4096000000000004e-06,
"loss": 0.8318,
"step": 60520
},
{
"epoch": 0.96864,
"grad_norm": 0.2244090735912323,
"learning_rate": 6.3456e-06,
"loss": 0.8503,
"step": 60540
},
{
"epoch": 0.96896,
"grad_norm": 0.2132522016763687,
"learning_rate": 6.2816e-06,
"loss": 0.8805,
"step": 60560
},
{
"epoch": 0.96928,
"grad_norm": 0.21387845277786255,
"learning_rate": 6.2176e-06,
"loss": 0.8843,
"step": 60580
},
{
"epoch": 0.9696,
"grad_norm": 0.20109587907791138,
"learning_rate": 6.153600000000001e-06,
"loss": 0.8487,
"step": 60600
},
{
"epoch": 0.96992,
"grad_norm": 0.20364026725292206,
"learning_rate": 6.0896e-06,
"loss": 0.8737,
"step": 60620
},
{
"epoch": 0.97024,
"grad_norm": 0.22072643041610718,
"learning_rate": 6.0256e-06,
"loss": 0.881,
"step": 60640
},
{
"epoch": 0.97056,
"grad_norm": 0.24078206717967987,
"learning_rate": 5.961600000000001e-06,
"loss": 0.8459,
"step": 60660
},
{
"epoch": 0.97088,
"grad_norm": 0.24617841839790344,
"learning_rate": 5.8976e-06,
"loss": 0.894,
"step": 60680
},
{
"epoch": 0.9712,
"grad_norm": 0.2186897248029709,
"learning_rate": 5.8336e-06,
"loss": 0.8612,
"step": 60700
},
{
"epoch": 0.97152,
"grad_norm": 0.1752256453037262,
"learning_rate": 5.7696e-06,
"loss": 0.8619,
"step": 60720
},
{
"epoch": 0.97184,
"grad_norm": 0.2205258458852768,
"learning_rate": 5.705600000000001e-06,
"loss": 0.8847,
"step": 60740
},
{
"epoch": 0.97216,
"grad_norm": 0.20608656108379364,
"learning_rate": 5.6416e-06,
"loss": 0.8728,
"step": 60760
},
{
"epoch": 0.97248,
"grad_norm": 0.18989993631839752,
"learning_rate": 5.577600000000001e-06,
"loss": 0.8496,
"step": 60780
},
{
"epoch": 0.9728,
"grad_norm": 0.2024667114019394,
"learning_rate": 5.5136e-06,
"loss": 0.8883,
"step": 60800
},
{
"epoch": 0.97312,
"grad_norm": 0.17910663783550262,
"learning_rate": 5.4496e-06,
"loss": 0.8628,
"step": 60820
},
{
"epoch": 0.97344,
"grad_norm": 0.19510726630687714,
"learning_rate": 5.385600000000001e-06,
"loss": 0.8632,
"step": 60840
},
{
"epoch": 0.97376,
"grad_norm": 0.20516471564769745,
"learning_rate": 5.3216e-06,
"loss": 0.9158,
"step": 60860
},
{
"epoch": 0.97408,
"grad_norm": 0.2369288057088852,
"learning_rate": 5.2576e-06,
"loss": 0.9067,
"step": 60880
},
{
"epoch": 0.9744,
"grad_norm": 0.23473519086837769,
"learning_rate": 5.1936000000000006e-06,
"loss": 0.8675,
"step": 60900
},
{
"epoch": 0.97472,
"grad_norm": 0.20721520483493805,
"learning_rate": 5.1296e-06,
"loss": 0.857,
"step": 60920
},
{
"epoch": 0.97504,
"grad_norm": 0.25758302211761475,
"learning_rate": 5.0656e-06,
"loss": 0.854,
"step": 60940
},
{
"epoch": 0.97536,
"grad_norm": 0.23826448619365692,
"learning_rate": 5.0016e-06,
"loss": 0.865,
"step": 60960
},
{
"epoch": 0.97568,
"grad_norm": 0.21797384321689606,
"learning_rate": 4.937600000000001e-06,
"loss": 0.8956,
"step": 60980
},
{
"epoch": 0.976,
"grad_norm": 0.2073042243719101,
"learning_rate": 4.8736e-06,
"loss": 0.8522,
"step": 61000
},
{
"epoch": 0.97632,
"grad_norm": 0.1850445568561554,
"learning_rate": 4.8096000000000005e-06,
"loss": 0.945,
"step": 61020
},
{
"epoch": 0.97664,
"grad_norm": 0.2104647010564804,
"learning_rate": 4.7456e-06,
"loss": 0.8828,
"step": 61040
},
{
"epoch": 0.97696,
"grad_norm": 0.17378270626068115,
"learning_rate": 4.6816e-06,
"loss": 0.896,
"step": 61060
},
{
"epoch": 0.97728,
"grad_norm": 0.21518754959106445,
"learning_rate": 4.6176000000000005e-06,
"loss": 0.8639,
"step": 61080
},
{
"epoch": 0.9776,
"grad_norm": 0.22130274772644043,
"learning_rate": 4.5536e-06,
"loss": 0.8755,
"step": 61100
},
{
"epoch": 0.97792,
"grad_norm": 0.24225564301013947,
"learning_rate": 4.4896e-06,
"loss": 0.8747,
"step": 61120
},
{
"epoch": 0.97824,
"grad_norm": 0.19240306317806244,
"learning_rate": 4.4256e-06,
"loss": 0.9133,
"step": 61140
},
{
"epoch": 0.97856,
"grad_norm": 0.18395653367042542,
"learning_rate": 4.361600000000001e-06,
"loss": 0.8321,
"step": 61160
},
{
"epoch": 0.97888,
"grad_norm": 0.21432001888751984,
"learning_rate": 4.2976e-06,
"loss": 0.9217,
"step": 61180
},
{
"epoch": 0.9792,
"grad_norm": 0.22985559701919556,
"learning_rate": 4.2336000000000004e-06,
"loss": 0.8819,
"step": 61200
},
{
"epoch": 0.97952,
"grad_norm": 0.18436340987682343,
"learning_rate": 4.1696e-06,
"loss": 0.9066,
"step": 61220
},
{
"epoch": 0.97984,
"grad_norm": 0.2279936820268631,
"learning_rate": 4.1056e-06,
"loss": 0.8828,
"step": 61240
},
{
"epoch": 0.98016,
"grad_norm": 0.23359614610671997,
"learning_rate": 4.0416e-06,
"loss": 0.873,
"step": 61260
},
{
"epoch": 0.98048,
"grad_norm": 0.21372786164283752,
"learning_rate": 3.9776e-06,
"loss": 0.8726,
"step": 61280
},
{
"epoch": 0.9808,
"grad_norm": 0.17267848551273346,
"learning_rate": 3.9136e-06,
"loss": 0.8408,
"step": 61300
},
{
"epoch": 0.98112,
"grad_norm": 0.19871732592582703,
"learning_rate": 3.8496e-06,
"loss": 0.8697,
"step": 61320
},
{
"epoch": 0.98144,
"grad_norm": 0.22875666618347168,
"learning_rate": 3.7856000000000002e-06,
"loss": 0.9083,
"step": 61340
},
{
"epoch": 0.98176,
"grad_norm": 0.22814328968524933,
"learning_rate": 3.7216e-06,
"loss": 0.8542,
"step": 61360
},
{
"epoch": 0.98208,
"grad_norm": 0.19292208552360535,
"learning_rate": 3.6576e-06,
"loss": 0.7783,
"step": 61380
},
{
"epoch": 0.9824,
"grad_norm": 0.20703838765621185,
"learning_rate": 3.5936000000000006e-06,
"loss": 0.8342,
"step": 61400
},
{
"epoch": 0.98272,
"grad_norm": 0.1766250729560852,
"learning_rate": 3.5296000000000005e-06,
"loss": 0.9091,
"step": 61420
},
{
"epoch": 0.98304,
"grad_norm": 0.1929435431957245,
"learning_rate": 3.4656e-06,
"loss": 0.8704,
"step": 61440
},
{
"epoch": 0.98336,
"grad_norm": 0.269422709941864,
"learning_rate": 3.4015999999999998e-06,
"loss": 0.8677,
"step": 61460
},
{
"epoch": 0.98368,
"grad_norm": 0.17369483411312103,
"learning_rate": 3.3376000000000005e-06,
"loss": 0.8388,
"step": 61480
},
{
"epoch": 0.984,
"grad_norm": 0.24999183416366577,
"learning_rate": 3.2736000000000003e-06,
"loss": 0.8884,
"step": 61500
},
{
"epoch": 0.98432,
"grad_norm": 0.2378091812133789,
"learning_rate": 3.2096e-06,
"loss": 0.8644,
"step": 61520
},
{
"epoch": 0.98464,
"grad_norm": 0.23447105288505554,
"learning_rate": 3.1456e-06,
"loss": 0.8962,
"step": 61540
},
{
"epoch": 0.98496,
"grad_norm": 0.22000399231910706,
"learning_rate": 3.0816000000000003e-06,
"loss": 0.8297,
"step": 61560
},
{
"epoch": 0.98528,
"grad_norm": 0.20318488776683807,
"learning_rate": 3.0176e-06,
"loss": 0.8639,
"step": 61580
},
{
"epoch": 0.9856,
"grad_norm": 0.22560527920722961,
"learning_rate": 2.9536e-06,
"loss": 0.8701,
"step": 61600
},
{
"epoch": 0.98592,
"grad_norm": 0.2516303062438965,
"learning_rate": 2.8896000000000003e-06,
"loss": 0.9267,
"step": 61620
},
{
"epoch": 0.98624,
"grad_norm": 0.2284599393606186,
"learning_rate": 2.8256e-06,
"loss": 0.858,
"step": 61640
},
{
"epoch": 0.98656,
"grad_norm": 0.22970664501190186,
"learning_rate": 2.7616000000000004e-06,
"loss": 0.8553,
"step": 61660
},
{
"epoch": 0.98688,
"grad_norm": 0.19490259885787964,
"learning_rate": 2.6976000000000002e-06,
"loss": 0.8,
"step": 61680
},
{
"epoch": 0.9872,
"grad_norm": 0.2025730013847351,
"learning_rate": 2.6336e-06,
"loss": 0.8664,
"step": 61700
},
{
"epoch": 0.98752,
"grad_norm": 0.202545627951622,
"learning_rate": 2.5696e-06,
"loss": 0.8784,
"step": 61720
},
{
"epoch": 0.98784,
"grad_norm": 0.21990488469600677,
"learning_rate": 2.5055999999999998e-06,
"loss": 0.8768,
"step": 61740
},
{
"epoch": 0.98816,
"grad_norm": 0.2364804446697235,
"learning_rate": 2.4416e-06,
"loss": 0.9087,
"step": 61760
},
{
"epoch": 0.98848,
"grad_norm": 0.20863565802574158,
"learning_rate": 2.3776e-06,
"loss": 0.8558,
"step": 61780
},
{
"epoch": 0.9888,
"grad_norm": 0.21479357779026031,
"learning_rate": 2.3136e-06,
"loss": 0.8801,
"step": 61800
},
{
"epoch": 0.98912,
"grad_norm": 0.2164417803287506,
"learning_rate": 2.2496e-06,
"loss": 0.8483,
"step": 61820
},
{
"epoch": 0.98944,
"grad_norm": 0.2229296714067459,
"learning_rate": 2.1856000000000003e-06,
"loss": 0.8708,
"step": 61840
},
{
"epoch": 0.98976,
"grad_norm": 0.15766002237796783,
"learning_rate": 2.1216e-06,
"loss": 0.8485,
"step": 61860
},
{
"epoch": 0.99008,
"grad_norm": 0.1808682680130005,
"learning_rate": 2.0576e-06,
"loss": 0.8397,
"step": 61880
},
{
"epoch": 0.9904,
"grad_norm": 0.23974210023880005,
"learning_rate": 1.9936e-06,
"loss": 0.8173,
"step": 61900
},
{
"epoch": 0.99072,
"grad_norm": 0.1957724243402481,
"learning_rate": 1.9296e-06,
"loss": 0.8513,
"step": 61920
},
{
"epoch": 0.99104,
"grad_norm": 0.1994854062795639,
"learning_rate": 1.8656e-06,
"loss": 0.8988,
"step": 61940
},
{
"epoch": 0.99136,
"grad_norm": 0.21945121884346008,
"learning_rate": 1.8016000000000003e-06,
"loss": 0.8666,
"step": 61960
},
{
"epoch": 0.99168,
"grad_norm": 0.23173430562019348,
"learning_rate": 1.7375999999999999e-06,
"loss": 0.8751,
"step": 61980
},
{
"epoch": 0.992,
"grad_norm": 0.23692530393600464,
"learning_rate": 1.6736000000000002e-06,
"loss": 0.8905,
"step": 62000
},
{
"epoch": 0.99232,
"grad_norm": 0.2442493587732315,
"learning_rate": 1.6096e-06,
"loss": 0.8955,
"step": 62020
},
{
"epoch": 0.99264,
"grad_norm": 0.22609354555606842,
"learning_rate": 1.5456e-06,
"loss": 0.8639,
"step": 62040
},
{
"epoch": 0.99296,
"grad_norm": 0.19429120421409607,
"learning_rate": 1.4816e-06,
"loss": 0.8911,
"step": 62060
},
{
"epoch": 0.99328,
"grad_norm": 0.23542903363704681,
"learning_rate": 1.4176e-06,
"loss": 0.9274,
"step": 62080
},
{
"epoch": 0.9936,
"grad_norm": 0.21632255613803864,
"learning_rate": 1.3536e-06,
"loss": 0.9395,
"step": 62100
},
{
"epoch": 0.99392,
"grad_norm": 0.21293993294239044,
"learning_rate": 1.2896000000000001e-06,
"loss": 0.8639,
"step": 62120
},
{
"epoch": 0.99424,
"grad_norm": 0.20467883348464966,
"learning_rate": 1.2256e-06,
"loss": 0.837,
"step": 62140
},
{
"epoch": 0.99456,
"grad_norm": 0.2215726226568222,
"learning_rate": 1.1616e-06,
"loss": 0.9212,
"step": 62160
},
{
"epoch": 0.99488,
"grad_norm": 0.2058229297399521,
"learning_rate": 1.0976e-06,
"loss": 0.8627,
"step": 62180
},
{
"epoch": 0.9952,
"grad_norm": 0.19800324738025665,
"learning_rate": 1.0336e-06,
"loss": 0.8536,
"step": 62200
},
{
"epoch": 0.99552,
"grad_norm": 0.24008409678936005,
"learning_rate": 9.696e-07,
"loss": 0.9235,
"step": 62220
},
{
"epoch": 0.99584,
"grad_norm": 0.1996374875307083,
"learning_rate": 9.056000000000001e-07,
"loss": 0.8699,
"step": 62240
},
{
"epoch": 0.99616,
"grad_norm": 0.20383226871490479,
"learning_rate": 8.416e-07,
"loss": 0.9042,
"step": 62260
},
{
"epoch": 0.99648,
"grad_norm": 0.23892252147197723,
"learning_rate": 7.776000000000001e-07,
"loss": 0.8895,
"step": 62280
},
{
"epoch": 0.9968,
"grad_norm": 0.23595763742923737,
"learning_rate": 7.136e-07,
"loss": 0.9093,
"step": 62300
},
{
"epoch": 0.99712,
"grad_norm": 0.168944850564003,
"learning_rate": 6.496e-07,
"loss": 0.8377,
"step": 62320
},
{
"epoch": 0.99744,
"grad_norm": 0.22417670488357544,
"learning_rate": 5.856000000000001e-07,
"loss": 0.9073,
"step": 62340
},
{
"epoch": 0.99776,
"grad_norm": 0.25286805629730225,
"learning_rate": 5.216e-07,
"loss": 0.8761,
"step": 62360
},
{
"epoch": 0.99808,
"grad_norm": 0.25818583369255066,
"learning_rate": 4.576e-07,
"loss": 0.9176,
"step": 62380
},
{
"epoch": 0.9984,
"grad_norm": 0.20294295251369476,
"learning_rate": 3.9360000000000003e-07,
"loss": 0.9181,
"step": 62400
},
{
"epoch": 0.99872,
"grad_norm": 0.2515595555305481,
"learning_rate": 3.296e-07,
"loss": 0.8951,
"step": 62420
},
{
"epoch": 0.99904,
"grad_norm": 0.21728461980819702,
"learning_rate": 2.656e-07,
"loss": 0.8226,
"step": 62440
},
{
"epoch": 0.99936,
"grad_norm": 0.2443869262933731,
"learning_rate": 2.016e-07,
"loss": 0.8448,
"step": 62460
},
{
"epoch": 0.99968,
"grad_norm": 0.2046569287776947,
"learning_rate": 1.3760000000000001e-07,
"loss": 0.881,
"step": 62480
},
{
"epoch": 1.0,
"grad_norm": 0.21948722004890442,
"learning_rate": 7.36e-08,
"loss": 0.9002,
"step": 62500
}
],
"logging_steps": 20,
"max_steps": 62500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 600,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1005725769728e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}