AssameseGPT2 / trainer_state.json
BharatVLM's picture
Upload folder using huggingface_hub
81472f1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 62980,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015878056525881232,
"grad_norm": 69456.203125,
"learning_rate": 4.992219752302319e-05,
"loss": 3.3489,
"step": 50
},
{
"epoch": 0.031756113051762465,
"grad_norm": 28315.216796875,
"learning_rate": 4.984280724039378e-05,
"loss": 2.1825,
"step": 100
},
{
"epoch": 0.047634169577643694,
"grad_norm": 22939.28515625,
"learning_rate": 4.976341695776437e-05,
"loss": 1.8984,
"step": 150
},
{
"epoch": 0.06351222610352493,
"grad_norm": 23446.751953125,
"learning_rate": 4.968402667513497e-05,
"loss": 1.8356,
"step": 200
},
{
"epoch": 0.07939028262940616,
"grad_norm": 15620.2763671875,
"learning_rate": 4.960463639250556e-05,
"loss": 1.8063,
"step": 250
},
{
"epoch": 0.09526833915528739,
"grad_norm": 17849.13671875,
"learning_rate": 4.9525246109876156e-05,
"loss": 1.7904,
"step": 300
},
{
"epoch": 0.11114639568116863,
"grad_norm": 16811.81640625,
"learning_rate": 4.944585582724674e-05,
"loss": 1.7763,
"step": 350
},
{
"epoch": 0.12702445220704986,
"grad_norm": 15963.05859375,
"learning_rate": 4.936646554461734e-05,
"loss": 1.7694,
"step": 400
},
{
"epoch": 0.1429025087329311,
"grad_norm": 13125.708984375,
"learning_rate": 4.928707526198794e-05,
"loss": 1.7613,
"step": 450
},
{
"epoch": 0.15878056525881232,
"grad_norm": 13808.8876953125,
"learning_rate": 4.920768497935853e-05,
"loss": 1.7565,
"step": 500
},
{
"epoch": 0.17465862178469355,
"grad_norm": 15005.7353515625,
"learning_rate": 4.912829469672912e-05,
"loss": 1.7512,
"step": 550
},
{
"epoch": 0.19053667831057478,
"grad_norm": 15644.3916015625,
"learning_rate": 4.904890441409972e-05,
"loss": 1.7457,
"step": 600
},
{
"epoch": 0.206414734836456,
"grad_norm": 13973.876953125,
"learning_rate": 4.896951413147031e-05,
"loss": 1.7395,
"step": 650
},
{
"epoch": 0.22229279136233726,
"grad_norm": 16523.75390625,
"learning_rate": 4.88901238488409e-05,
"loss": 1.7285,
"step": 700
},
{
"epoch": 0.2381708478882185,
"grad_norm": 25229.689453125,
"learning_rate": 4.88107335662115e-05,
"loss": 1.7046,
"step": 750
},
{
"epoch": 0.2540489044140997,
"grad_norm": 48292.5078125,
"learning_rate": 4.8731343283582094e-05,
"loss": 1.664,
"step": 800
},
{
"epoch": 0.2699269609399809,
"grad_norm": 38503.4296875,
"learning_rate": 4.865195300095269e-05,
"loss": 1.6121,
"step": 850
},
{
"epoch": 0.2858050174658622,
"grad_norm": 48101.1484375,
"learning_rate": 4.8572562718323275e-05,
"loss": 1.5622,
"step": 900
},
{
"epoch": 0.30168307399174343,
"grad_norm": 63038.48046875,
"learning_rate": 4.8493172435693875e-05,
"loss": 1.5095,
"step": 950
},
{
"epoch": 0.31756113051762463,
"grad_norm": 80867.75,
"learning_rate": 4.841378215306447e-05,
"loss": 1.4587,
"step": 1000
},
{
"epoch": 0.3334391870435059,
"grad_norm": 42233.703125,
"learning_rate": 4.8334391870435056e-05,
"loss": 1.4149,
"step": 1050
},
{
"epoch": 0.3493172435693871,
"grad_norm": 53614.875,
"learning_rate": 4.825500158780566e-05,
"loss": 1.375,
"step": 1100
},
{
"epoch": 0.36519530009526835,
"grad_norm": 43879.546875,
"learning_rate": 4.817561130517625e-05,
"loss": 1.3372,
"step": 1150
},
{
"epoch": 0.38107335662114955,
"grad_norm": 43690.40625,
"learning_rate": 4.8096221022546844e-05,
"loss": 1.3026,
"step": 1200
},
{
"epoch": 0.3969514131470308,
"grad_norm": 46655.68359375,
"learning_rate": 4.801683073991743e-05,
"loss": 1.2798,
"step": 1250
},
{
"epoch": 0.412829469672912,
"grad_norm": 39245.97265625,
"learning_rate": 4.793744045728803e-05,
"loss": 1.2521,
"step": 1300
},
{
"epoch": 0.42870752619879327,
"grad_norm": 52106.55859375,
"learning_rate": 4.7858050174658626e-05,
"loss": 1.2299,
"step": 1350
},
{
"epoch": 0.4445855827246745,
"grad_norm": 41391.16796875,
"learning_rate": 4.777865989202922e-05,
"loss": 1.2141,
"step": 1400
},
{
"epoch": 0.4604636392505557,
"grad_norm": 41273.65625,
"learning_rate": 4.769926960939981e-05,
"loss": 1.1954,
"step": 1450
},
{
"epoch": 0.476341695776437,
"grad_norm": 48099.3203125,
"learning_rate": 4.761987932677041e-05,
"loss": 1.1785,
"step": 1500
},
{
"epoch": 0.4922197523023182,
"grad_norm": 41167.43359375,
"learning_rate": 4.7540489044141e-05,
"loss": 1.1642,
"step": 1550
},
{
"epoch": 0.5080978088281994,
"grad_norm": 40066.16796875,
"learning_rate": 4.746109876151159e-05,
"loss": 1.15,
"step": 1600
},
{
"epoch": 0.5239758653540807,
"grad_norm": 57272.24609375,
"learning_rate": 4.738170847888219e-05,
"loss": 1.1426,
"step": 1650
},
{
"epoch": 0.5398539218799618,
"grad_norm": 38700.19921875,
"learning_rate": 4.730231819625278e-05,
"loss": 1.1286,
"step": 1700
},
{
"epoch": 0.5557319784058431,
"grad_norm": 39080.23828125,
"learning_rate": 4.7222927913623376e-05,
"loss": 1.116,
"step": 1750
},
{
"epoch": 0.5716100349317244,
"grad_norm": 40904.78515625,
"learning_rate": 4.714353763099397e-05,
"loss": 1.1037,
"step": 1800
},
{
"epoch": 0.5874880914576056,
"grad_norm": 46008.6328125,
"learning_rate": 4.7064147348364564e-05,
"loss": 1.1006,
"step": 1850
},
{
"epoch": 0.6033661479834869,
"grad_norm": 51325.14453125,
"learning_rate": 4.698475706573516e-05,
"loss": 1.0899,
"step": 1900
},
{
"epoch": 0.619244204509368,
"grad_norm": 38173.07421875,
"learning_rate": 4.6905366783105745e-05,
"loss": 1.0767,
"step": 1950
},
{
"epoch": 0.6351222610352493,
"grad_norm": 36432.33203125,
"learning_rate": 4.6825976500476345e-05,
"loss": 1.0737,
"step": 2000
},
{
"epoch": 0.6510003175611305,
"grad_norm": 38897.46484375,
"learning_rate": 4.674658621784694e-05,
"loss": 1.0631,
"step": 2050
},
{
"epoch": 0.6668783740870118,
"grad_norm": 36565.90234375,
"learning_rate": 4.666719593521753e-05,
"loss": 1.0611,
"step": 2100
},
{
"epoch": 0.6827564306128929,
"grad_norm": 38040.09765625,
"learning_rate": 4.658780565258813e-05,
"loss": 1.049,
"step": 2150
},
{
"epoch": 0.6986344871387742,
"grad_norm": 40119.8984375,
"learning_rate": 4.650841536995872e-05,
"loss": 1.0401,
"step": 2200
},
{
"epoch": 0.7145125436646554,
"grad_norm": 38403.37890625,
"learning_rate": 4.6429025087329314e-05,
"loss": 1.0373,
"step": 2250
},
{
"epoch": 0.7303906001905367,
"grad_norm": 35558.62109375,
"learning_rate": 4.634963480469991e-05,
"loss": 1.0327,
"step": 2300
},
{
"epoch": 0.746268656716418,
"grad_norm": 41901.265625,
"learning_rate": 4.62702445220705e-05,
"loss": 1.0258,
"step": 2350
},
{
"epoch": 0.7621467132422991,
"grad_norm": 35863.7109375,
"learning_rate": 4.6190854239441096e-05,
"loss": 1.0181,
"step": 2400
},
{
"epoch": 0.7780247697681804,
"grad_norm": 34477.07421875,
"learning_rate": 4.611146395681169e-05,
"loss": 1.0156,
"step": 2450
},
{
"epoch": 0.7939028262940616,
"grad_norm": 36488.30078125,
"learning_rate": 4.603207367418228e-05,
"loss": 1.0109,
"step": 2500
},
{
"epoch": 0.8097808828199429,
"grad_norm": 37530.74609375,
"learning_rate": 4.595268339155288e-05,
"loss": 1.0063,
"step": 2550
},
{
"epoch": 0.825658939345824,
"grad_norm": 38817.25390625,
"learning_rate": 4.587329310892347e-05,
"loss": 1.0007,
"step": 2600
},
{
"epoch": 0.8415369958717053,
"grad_norm": 36987.03515625,
"learning_rate": 4.5793902826294065e-05,
"loss": 0.9925,
"step": 2650
},
{
"epoch": 0.8574150523975865,
"grad_norm": 36874.86328125,
"learning_rate": 4.571451254366466e-05,
"loss": 0.9931,
"step": 2700
},
{
"epoch": 0.8732931089234678,
"grad_norm": 38325.61328125,
"learning_rate": 4.563512226103525e-05,
"loss": 0.984,
"step": 2750
},
{
"epoch": 0.889171165449349,
"grad_norm": 35897.0859375,
"learning_rate": 4.5555731978405846e-05,
"loss": 0.982,
"step": 2800
},
{
"epoch": 0.9050492219752302,
"grad_norm": 38165.0,
"learning_rate": 4.547634169577643e-05,
"loss": 0.9786,
"step": 2850
},
{
"epoch": 0.9209272785011114,
"grad_norm": 36260.8671875,
"learning_rate": 4.5396951413147034e-05,
"loss": 0.9736,
"step": 2900
},
{
"epoch": 0.9368053350269927,
"grad_norm": 35681.60546875,
"learning_rate": 4.531756113051763e-05,
"loss": 0.9657,
"step": 2950
},
{
"epoch": 0.952683391552874,
"grad_norm": 33949.65234375,
"learning_rate": 4.523817084788822e-05,
"loss": 0.9691,
"step": 3000
},
{
"epoch": 0.9685614480787551,
"grad_norm": 34695.6796875,
"learning_rate": 4.5158780565258815e-05,
"loss": 0.9621,
"step": 3050
},
{
"epoch": 0.9844395046046364,
"grad_norm": 42460.8359375,
"learning_rate": 4.507939028262941e-05,
"loss": 0.9627,
"step": 3100
},
{
"epoch": 1.0003175611305177,
"grad_norm": 37083.6796875,
"learning_rate": 4.5e-05,
"loss": 0.9538,
"step": 3150
},
{
"epoch": 1.0161956176563989,
"grad_norm": 35333.8359375,
"learning_rate": 4.49206097173706e-05,
"loss": 0.951,
"step": 3200
},
{
"epoch": 1.03207367418228,
"grad_norm": 34177.54296875,
"learning_rate": 4.484121943474119e-05,
"loss": 0.9496,
"step": 3250
},
{
"epoch": 1.0479517307081614,
"grad_norm": 33729.51953125,
"learning_rate": 4.4761829152111784e-05,
"loss": 0.9431,
"step": 3300
},
{
"epoch": 1.0638297872340425,
"grad_norm": 35274.5625,
"learning_rate": 4.468243886948238e-05,
"loss": 0.9405,
"step": 3350
},
{
"epoch": 1.0797078437599237,
"grad_norm": 35014.6796875,
"learning_rate": 4.460304858685297e-05,
"loss": 0.9404,
"step": 3400
},
{
"epoch": 1.095585900285805,
"grad_norm": 34381.96484375,
"learning_rate": 4.4523658304223566e-05,
"loss": 0.9367,
"step": 3450
},
{
"epoch": 1.1114639568116862,
"grad_norm": 36695.0625,
"learning_rate": 4.444426802159416e-05,
"loss": 0.9309,
"step": 3500
},
{
"epoch": 1.1273420133375676,
"grad_norm": 37935.7734375,
"learning_rate": 4.436487773896475e-05,
"loss": 0.9331,
"step": 3550
},
{
"epoch": 1.1432200698634487,
"grad_norm": 34156.03125,
"learning_rate": 4.428548745633535e-05,
"loss": 0.9251,
"step": 3600
},
{
"epoch": 1.1590981263893299,
"grad_norm": 34130.3046875,
"learning_rate": 4.420609717370594e-05,
"loss": 0.9263,
"step": 3650
},
{
"epoch": 1.1749761829152112,
"grad_norm": 35052.70703125,
"learning_rate": 4.4126706891076535e-05,
"loss": 0.9217,
"step": 3700
},
{
"epoch": 1.1908542394410924,
"grad_norm": 36061.64453125,
"learning_rate": 4.404731660844713e-05,
"loss": 0.9197,
"step": 3750
},
{
"epoch": 1.2067322959669737,
"grad_norm": 32517.8984375,
"learning_rate": 4.396792632581772e-05,
"loss": 0.9208,
"step": 3800
},
{
"epoch": 1.2226103524928549,
"grad_norm": 35002.6328125,
"learning_rate": 4.3888536043188316e-05,
"loss": 0.9197,
"step": 3850
},
{
"epoch": 1.238488409018736,
"grad_norm": 32863.40234375,
"learning_rate": 4.380914576055891e-05,
"loss": 0.9089,
"step": 3900
},
{
"epoch": 1.2543664655446174,
"grad_norm": 34469.52734375,
"learning_rate": 4.3729755477929504e-05,
"loss": 0.9127,
"step": 3950
},
{
"epoch": 1.2702445220704985,
"grad_norm": 32288.01171875,
"learning_rate": 4.36503651953001e-05,
"loss": 0.9062,
"step": 4000
},
{
"epoch": 1.28612257859638,
"grad_norm": 34935.56640625,
"learning_rate": 4.357097491267069e-05,
"loss": 0.9076,
"step": 4050
},
{
"epoch": 1.302000635122261,
"grad_norm": 32266.908203125,
"learning_rate": 4.3491584630041285e-05,
"loss": 0.9079,
"step": 4100
},
{
"epoch": 1.3178786916481422,
"grad_norm": 33226.34375,
"learning_rate": 4.341219434741188e-05,
"loss": 0.9044,
"step": 4150
},
{
"epoch": 1.3337567481740236,
"grad_norm": 34449.453125,
"learning_rate": 4.333280406478247e-05,
"loss": 0.9038,
"step": 4200
},
{
"epoch": 1.3496348046999047,
"grad_norm": 32593.271484375,
"learning_rate": 4.325341378215307e-05,
"loss": 0.8969,
"step": 4250
},
{
"epoch": 1.365512861225786,
"grad_norm": 33997.08984375,
"learning_rate": 4.317402349952366e-05,
"loss": 0.8955,
"step": 4300
},
{
"epoch": 1.3813909177516672,
"grad_norm": 34988.44921875,
"learning_rate": 4.3094633216894254e-05,
"loss": 0.8925,
"step": 4350
},
{
"epoch": 1.3972689742775484,
"grad_norm": 31535.099609375,
"learning_rate": 4.301524293426485e-05,
"loss": 0.8931,
"step": 4400
},
{
"epoch": 1.4131470308034297,
"grad_norm": 32298.515625,
"learning_rate": 4.293585265163544e-05,
"loss": 0.8914,
"step": 4450
},
{
"epoch": 1.4290250873293109,
"grad_norm": 33798.9140625,
"learning_rate": 4.2856462369006036e-05,
"loss": 0.889,
"step": 4500
},
{
"epoch": 1.4449031438551923,
"grad_norm": 33727.1484375,
"learning_rate": 4.277707208637663e-05,
"loss": 0.8891,
"step": 4550
},
{
"epoch": 1.4607812003810734,
"grad_norm": 39095.109375,
"learning_rate": 4.269768180374722e-05,
"loss": 0.8932,
"step": 4600
},
{
"epoch": 1.4766592569069545,
"grad_norm": 33266.66796875,
"learning_rate": 4.261829152111782e-05,
"loss": 0.8843,
"step": 4650
},
{
"epoch": 1.4925373134328357,
"grad_norm": 33800.22265625,
"learning_rate": 4.253890123848841e-05,
"loss": 0.882,
"step": 4700
},
{
"epoch": 1.508415369958717,
"grad_norm": 31127.240234375,
"learning_rate": 4.2459510955859005e-05,
"loss": 0.8828,
"step": 4750
},
{
"epoch": 1.5242934264845984,
"grad_norm": 33765.43359375,
"learning_rate": 4.23801206732296e-05,
"loss": 0.8802,
"step": 4800
},
{
"epoch": 1.5401714830104796,
"grad_norm": 32892.484375,
"learning_rate": 4.230073039060019e-05,
"loss": 0.882,
"step": 4850
},
{
"epoch": 1.5560495395363607,
"grad_norm": 32505.76953125,
"learning_rate": 4.2221340107970786e-05,
"loss": 0.8772,
"step": 4900
},
{
"epoch": 1.5719275960622419,
"grad_norm": 32439.31640625,
"learning_rate": 4.214194982534138e-05,
"loss": 0.8742,
"step": 4950
},
{
"epoch": 1.5878056525881232,
"grad_norm": 32518.017578125,
"learning_rate": 4.2062559542711974e-05,
"loss": 0.873,
"step": 5000
},
{
"epoch": 1.6036837091140046,
"grad_norm": 34613.1640625,
"learning_rate": 4.198316926008257e-05,
"loss": 0.873,
"step": 5050
},
{
"epoch": 1.6195617656398857,
"grad_norm": 32058.451171875,
"learning_rate": 4.190377897745316e-05,
"loss": 0.873,
"step": 5100
},
{
"epoch": 1.6354398221657669,
"grad_norm": 31566.736328125,
"learning_rate": 4.1824388694823755e-05,
"loss": 0.8675,
"step": 5150
},
{
"epoch": 1.651317878691648,
"grad_norm": 33988.2734375,
"learning_rate": 4.174499841219435e-05,
"loss": 0.8667,
"step": 5200
},
{
"epoch": 1.6671959352175294,
"grad_norm": 33580.97265625,
"learning_rate": 4.166560812956494e-05,
"loss": 0.8638,
"step": 5250
},
{
"epoch": 1.6830739917434108,
"grad_norm": 33622.66015625,
"learning_rate": 4.158621784693554e-05,
"loss": 0.8668,
"step": 5300
},
{
"epoch": 1.698952048269292,
"grad_norm": 32790.08203125,
"learning_rate": 4.150682756430613e-05,
"loss": 0.8616,
"step": 5350
},
{
"epoch": 1.714830104795173,
"grad_norm": 31265.09765625,
"learning_rate": 4.1427437281676724e-05,
"loss": 0.8578,
"step": 5400
},
{
"epoch": 1.7307081613210542,
"grad_norm": 31559.154296875,
"learning_rate": 4.134804699904732e-05,
"loss": 0.86,
"step": 5450
},
{
"epoch": 1.7465862178469356,
"grad_norm": 31868.90234375,
"learning_rate": 4.126865671641791e-05,
"loss": 0.8612,
"step": 5500
},
{
"epoch": 1.7624642743728167,
"grad_norm": 32017.955078125,
"learning_rate": 4.1189266433788506e-05,
"loss": 0.8599,
"step": 5550
},
{
"epoch": 1.778342330898698,
"grad_norm": 32002.77734375,
"learning_rate": 4.11098761511591e-05,
"loss": 0.8592,
"step": 5600
},
{
"epoch": 1.7942203874245792,
"grad_norm": 30244.373046875,
"learning_rate": 4.103048586852969e-05,
"loss": 0.855,
"step": 5650
},
{
"epoch": 1.8100984439504604,
"grad_norm": 32472.248046875,
"learning_rate": 4.095109558590029e-05,
"loss": 0.857,
"step": 5700
},
{
"epoch": 1.8259765004763417,
"grad_norm": 32474.484375,
"learning_rate": 4.087170530327088e-05,
"loss": 0.8526,
"step": 5750
},
{
"epoch": 1.841854557002223,
"grad_norm": 31490.275390625,
"learning_rate": 4.0792315020641475e-05,
"loss": 0.8559,
"step": 5800
},
{
"epoch": 1.8577326135281043,
"grad_norm": 31051.58203125,
"learning_rate": 4.071292473801207e-05,
"loss": 0.8522,
"step": 5850
},
{
"epoch": 1.8736106700539854,
"grad_norm": 31409.7578125,
"learning_rate": 4.063353445538267e-05,
"loss": 0.8486,
"step": 5900
},
{
"epoch": 1.8894887265798666,
"grad_norm": 32382.998046875,
"learning_rate": 4.0554144172753256e-05,
"loss": 0.8515,
"step": 5950
},
{
"epoch": 1.9053667831057477,
"grad_norm": 31483.380859375,
"learning_rate": 4.047475389012385e-05,
"loss": 0.8477,
"step": 6000
},
{
"epoch": 1.921244839631629,
"grad_norm": 32504.404296875,
"learning_rate": 4.0395363607494444e-05,
"loss": 0.8506,
"step": 6050
},
{
"epoch": 1.9371228961575104,
"grad_norm": 31748.40234375,
"learning_rate": 4.031597332486504e-05,
"loss": 0.8461,
"step": 6100
},
{
"epoch": 1.9530009526833916,
"grad_norm": 31397.05078125,
"learning_rate": 4.023658304223563e-05,
"loss": 0.8459,
"step": 6150
},
{
"epoch": 1.9688790092092727,
"grad_norm": 32932.44140625,
"learning_rate": 4.0157192759606225e-05,
"loss": 0.8431,
"step": 6200
},
{
"epoch": 1.9847570657351539,
"grad_norm": 32290.40234375,
"learning_rate": 4.007780247697682e-05,
"loss": 0.8435,
"step": 6250
},
{
"epoch": 2.0006351222610355,
"grad_norm": 31642.935546875,
"learning_rate": 3.999841219434741e-05,
"loss": 0.8403,
"step": 6300
},
{
"epoch": 2.0165131787869166,
"grad_norm": 31722.009765625,
"learning_rate": 3.991902191171801e-05,
"loss": 0.8393,
"step": 6350
},
{
"epoch": 2.0323912353127978,
"grad_norm": 30855.62890625,
"learning_rate": 3.98396316290886e-05,
"loss": 0.8356,
"step": 6400
},
{
"epoch": 2.048269291838679,
"grad_norm": 31825.283203125,
"learning_rate": 3.9760241346459194e-05,
"loss": 0.8342,
"step": 6450
},
{
"epoch": 2.06414734836456,
"grad_norm": 32278.908203125,
"learning_rate": 3.968085106382979e-05,
"loss": 0.8337,
"step": 6500
},
{
"epoch": 2.080025404890441,
"grad_norm": 31714.283203125,
"learning_rate": 3.960146078120038e-05,
"loss": 0.8348,
"step": 6550
},
{
"epoch": 2.0959034614163228,
"grad_norm": 31567.390625,
"learning_rate": 3.9522070498570976e-05,
"loss": 0.8281,
"step": 6600
},
{
"epoch": 2.111781517942204,
"grad_norm": 32440.802734375,
"learning_rate": 3.944268021594157e-05,
"loss": 0.831,
"step": 6650
},
{
"epoch": 2.127659574468085,
"grad_norm": 32060.837890625,
"learning_rate": 3.936328993331216e-05,
"loss": 0.831,
"step": 6700
},
{
"epoch": 2.143537630993966,
"grad_norm": 31656.82421875,
"learning_rate": 3.928389965068276e-05,
"loss": 0.829,
"step": 6750
},
{
"epoch": 2.1594156875198474,
"grad_norm": 31442.107421875,
"learning_rate": 3.920450936805336e-05,
"loss": 0.8266,
"step": 6800
},
{
"epoch": 2.175293744045729,
"grad_norm": 31876.103515625,
"learning_rate": 3.9125119085423945e-05,
"loss": 0.8256,
"step": 6850
},
{
"epoch": 2.19117180057161,
"grad_norm": 31034.79296875,
"learning_rate": 3.904572880279454e-05,
"loss": 0.8217,
"step": 6900
},
{
"epoch": 2.2070498570974912,
"grad_norm": 32682.744140625,
"learning_rate": 3.896633852016513e-05,
"loss": 0.8301,
"step": 6950
},
{
"epoch": 2.2229279136233724,
"grad_norm": 32618.359375,
"learning_rate": 3.8886948237535726e-05,
"loss": 0.8211,
"step": 7000
},
{
"epoch": 2.2388059701492535,
"grad_norm": 30968.298828125,
"learning_rate": 3.880755795490632e-05,
"loss": 0.825,
"step": 7050
},
{
"epoch": 2.254684026675135,
"grad_norm": 30861.9921875,
"learning_rate": 3.8728167672276914e-05,
"loss": 0.8201,
"step": 7100
},
{
"epoch": 2.2705620832010163,
"grad_norm": 30823.109375,
"learning_rate": 3.8648777389647514e-05,
"loss": 0.8197,
"step": 7150
},
{
"epoch": 2.2864401397268974,
"grad_norm": 30141.990234375,
"learning_rate": 3.85693871070181e-05,
"loss": 0.8224,
"step": 7200
},
{
"epoch": 2.3023181962527786,
"grad_norm": 34504.34375,
"learning_rate": 3.8489996824388695e-05,
"loss": 0.821,
"step": 7250
},
{
"epoch": 2.3181962527786597,
"grad_norm": 30797.966796875,
"learning_rate": 3.841060654175929e-05,
"loss": 0.8203,
"step": 7300
},
{
"epoch": 2.3340743093045413,
"grad_norm": 30129.06640625,
"learning_rate": 3.833121625912988e-05,
"loss": 0.8178,
"step": 7350
},
{
"epoch": 2.3499523658304224,
"grad_norm": 30087.42578125,
"learning_rate": 3.825182597650048e-05,
"loss": 0.8219,
"step": 7400
},
{
"epoch": 2.3658304223563036,
"grad_norm": 30917.18359375,
"learning_rate": 3.817243569387107e-05,
"loss": 0.8163,
"step": 7450
},
{
"epoch": 2.3817084788821847,
"grad_norm": 33235.78515625,
"learning_rate": 3.809304541124167e-05,
"loss": 0.8149,
"step": 7500
},
{
"epoch": 2.397586535408066,
"grad_norm": 30538.533203125,
"learning_rate": 3.801365512861226e-05,
"loss": 0.818,
"step": 7550
},
{
"epoch": 2.4134645919339475,
"grad_norm": 31500.251953125,
"learning_rate": 3.793426484598285e-05,
"loss": 0.8171,
"step": 7600
},
{
"epoch": 2.4293426484598286,
"grad_norm": 30612.111328125,
"learning_rate": 3.7854874563353446e-05,
"loss": 0.8132,
"step": 7650
},
{
"epoch": 2.4452207049857098,
"grad_norm": 30479.9140625,
"learning_rate": 3.7775484280724046e-05,
"loss": 0.8147,
"step": 7700
},
{
"epoch": 2.461098761511591,
"grad_norm": 30102.48046875,
"learning_rate": 3.769609399809463e-05,
"loss": 0.8112,
"step": 7750
},
{
"epoch": 2.476976818037472,
"grad_norm": 30969.11328125,
"learning_rate": 3.761670371546523e-05,
"loss": 0.8161,
"step": 7800
},
{
"epoch": 2.4928548745633536,
"grad_norm": 30215.08203125,
"learning_rate": 3.753731343283583e-05,
"loss": 0.8118,
"step": 7850
},
{
"epoch": 2.508732931089235,
"grad_norm": 30152.841796875,
"learning_rate": 3.7457923150206415e-05,
"loss": 0.813,
"step": 7900
},
{
"epoch": 2.524610987615116,
"grad_norm": 30816.564453125,
"learning_rate": 3.737853286757701e-05,
"loss": 0.8124,
"step": 7950
},
{
"epoch": 2.540489044140997,
"grad_norm": 31137.095703125,
"learning_rate": 3.72991425849476e-05,
"loss": 0.8105,
"step": 8000
},
{
"epoch": 2.5563671006668782,
"grad_norm": 30815.490234375,
"learning_rate": 3.72197523023182e-05,
"loss": 0.8117,
"step": 8050
},
{
"epoch": 2.57224515719276,
"grad_norm": 31679.986328125,
"learning_rate": 3.714036201968879e-05,
"loss": 0.808,
"step": 8100
},
{
"epoch": 2.588123213718641,
"grad_norm": 30918.755859375,
"learning_rate": 3.7060971737059384e-05,
"loss": 0.807,
"step": 8150
},
{
"epoch": 2.604001270244522,
"grad_norm": 30721.857421875,
"learning_rate": 3.6981581454429984e-05,
"loss": 0.8077,
"step": 8200
},
{
"epoch": 2.6198793267704032,
"grad_norm": 30369.298828125,
"learning_rate": 3.690219117180057e-05,
"loss": 0.8067,
"step": 8250
},
{
"epoch": 2.6357573832962844,
"grad_norm": 30493.7421875,
"learning_rate": 3.6822800889171165e-05,
"loss": 0.8063,
"step": 8300
},
{
"epoch": 2.651635439822166,
"grad_norm": 30101.5859375,
"learning_rate": 3.674341060654176e-05,
"loss": 0.8065,
"step": 8350
},
{
"epoch": 2.667513496348047,
"grad_norm": 30403.134765625,
"learning_rate": 3.666402032391236e-05,
"loss": 0.8023,
"step": 8400
},
{
"epoch": 2.6833915528739283,
"grad_norm": 30230.22265625,
"learning_rate": 3.658463004128295e-05,
"loss": 0.8025,
"step": 8450
},
{
"epoch": 2.6992696093998094,
"grad_norm": 30146.58203125,
"learning_rate": 3.650523975865354e-05,
"loss": 0.8035,
"step": 8500
},
{
"epoch": 2.7151476659256906,
"grad_norm": 30205.19921875,
"learning_rate": 3.6425849476024134e-05,
"loss": 0.8023,
"step": 8550
},
{
"epoch": 2.731025722451572,
"grad_norm": 29393.84375,
"learning_rate": 3.6346459193394735e-05,
"loss": 0.7985,
"step": 8600
},
{
"epoch": 2.7469037789774533,
"grad_norm": 30105.896484375,
"learning_rate": 3.626706891076532e-05,
"loss": 0.798,
"step": 8650
},
{
"epoch": 2.7627818355033344,
"grad_norm": 31707.62109375,
"learning_rate": 3.6187678628135916e-05,
"loss": 0.8019,
"step": 8700
},
{
"epoch": 2.7786598920292156,
"grad_norm": 30057.185546875,
"learning_rate": 3.6108288345506516e-05,
"loss": 0.8019,
"step": 8750
},
{
"epoch": 2.7945379485550967,
"grad_norm": 31893.216796875,
"learning_rate": 3.60288980628771e-05,
"loss": 0.8004,
"step": 8800
},
{
"epoch": 2.8104160050809783,
"grad_norm": 29928.91015625,
"learning_rate": 3.59495077802477e-05,
"loss": 0.7989,
"step": 8850
},
{
"epoch": 2.8262940616068595,
"grad_norm": 29859.1015625,
"learning_rate": 3.587011749761829e-05,
"loss": 0.7987,
"step": 8900
},
{
"epoch": 2.8421721181327406,
"grad_norm": 29658.259765625,
"learning_rate": 3.579072721498889e-05,
"loss": 0.8001,
"step": 8950
},
{
"epoch": 2.8580501746586218,
"grad_norm": 29646.7421875,
"learning_rate": 3.571133693235948e-05,
"loss": 0.7946,
"step": 9000
},
{
"epoch": 2.873928231184503,
"grad_norm": 29755.7890625,
"learning_rate": 3.563194664973007e-05,
"loss": 0.7983,
"step": 9050
},
{
"epoch": 2.8898062877103845,
"grad_norm": 29201.306640625,
"learning_rate": 3.555255636710067e-05,
"loss": 0.7966,
"step": 9100
},
{
"epoch": 2.905684344236265,
"grad_norm": 30059.353515625,
"learning_rate": 3.547316608447126e-05,
"loss": 0.7968,
"step": 9150
},
{
"epoch": 2.921562400762147,
"grad_norm": 29836.736328125,
"learning_rate": 3.5393775801841854e-05,
"loss": 0.7915,
"step": 9200
},
{
"epoch": 2.937440457288028,
"grad_norm": 29474.76171875,
"learning_rate": 3.531438551921245e-05,
"loss": 0.7951,
"step": 9250
},
{
"epoch": 2.953318513813909,
"grad_norm": 29720.64453125,
"learning_rate": 3.523499523658305e-05,
"loss": 0.7973,
"step": 9300
},
{
"epoch": 2.9691965703397907,
"grad_norm": 30905.662109375,
"learning_rate": 3.5155604953953635e-05,
"loss": 0.796,
"step": 9350
},
{
"epoch": 2.9850746268656714,
"grad_norm": 29326.87109375,
"learning_rate": 3.507621467132423e-05,
"loss": 0.7943,
"step": 9400
},
{
"epoch": 3.000952683391553,
"grad_norm": 29983.439453125,
"learning_rate": 3.499682438869483e-05,
"loss": 0.7913,
"step": 9450
},
{
"epoch": 3.016830739917434,
"grad_norm": 31416.537109375,
"learning_rate": 3.4917434106065423e-05,
"loss": 0.7875,
"step": 9500
},
{
"epoch": 3.0327087964433153,
"grad_norm": 29326.58984375,
"learning_rate": 3.483804382343601e-05,
"loss": 0.788,
"step": 9550
},
{
"epoch": 3.0485868529691964,
"grad_norm": 30804.06640625,
"learning_rate": 3.4758653540806604e-05,
"loss": 0.7874,
"step": 9600
},
{
"epoch": 3.064464909495078,
"grad_norm": 30785.3125,
"learning_rate": 3.4679263258177205e-05,
"loss": 0.7889,
"step": 9650
},
{
"epoch": 3.080342966020959,
"grad_norm": 29545.892578125,
"learning_rate": 3.459987297554779e-05,
"loss": 0.786,
"step": 9700
},
{
"epoch": 3.0962210225468403,
"grad_norm": 29333.875,
"learning_rate": 3.4520482692918386e-05,
"loss": 0.7869,
"step": 9750
},
{
"epoch": 3.1120990790727214,
"grad_norm": 29140.724609375,
"learning_rate": 3.4441092410288986e-05,
"loss": 0.7848,
"step": 9800
},
{
"epoch": 3.1279771355986026,
"grad_norm": 29844.708984375,
"learning_rate": 3.436170212765958e-05,
"loss": 0.7833,
"step": 9850
},
{
"epoch": 3.143855192124484,
"grad_norm": 29902.115234375,
"learning_rate": 3.428231184503017e-05,
"loss": 0.7868,
"step": 9900
},
{
"epoch": 3.1597332486503653,
"grad_norm": 29875.98828125,
"learning_rate": 3.420292156240076e-05,
"loss": 0.7824,
"step": 9950
},
{
"epoch": 3.1756113051762465,
"grad_norm": 29536.19921875,
"learning_rate": 3.412353127977136e-05,
"loss": 0.7819,
"step": 10000
},
{
"epoch": 3.1914893617021276,
"grad_norm": 29816.875,
"learning_rate": 3.404414099714195e-05,
"loss": 0.7804,
"step": 10050
},
{
"epoch": 3.2073674182280087,
"grad_norm": 30143.560546875,
"learning_rate": 3.396475071451254e-05,
"loss": 0.7819,
"step": 10100
},
{
"epoch": 3.22324547475389,
"grad_norm": 29888.271484375,
"learning_rate": 3.388536043188314e-05,
"loss": 0.7817,
"step": 10150
},
{
"epoch": 3.2391235312797715,
"grad_norm": 29966.201171875,
"learning_rate": 3.380597014925374e-05,
"loss": 0.7809,
"step": 10200
},
{
"epoch": 3.2550015878056526,
"grad_norm": 30786.470703125,
"learning_rate": 3.3726579866624324e-05,
"loss": 0.7765,
"step": 10250
},
{
"epoch": 3.2708796443315338,
"grad_norm": 30272.826171875,
"learning_rate": 3.364718958399492e-05,
"loss": 0.7806,
"step": 10300
},
{
"epoch": 3.286757700857415,
"grad_norm": 29083.400390625,
"learning_rate": 3.356779930136552e-05,
"loss": 0.7799,
"step": 10350
},
{
"epoch": 3.302635757383296,
"grad_norm": 29236.140625,
"learning_rate": 3.348840901873611e-05,
"loss": 0.7801,
"step": 10400
},
{
"epoch": 3.3185138139091777,
"grad_norm": 29460.9296875,
"learning_rate": 3.34090187361067e-05,
"loss": 0.7779,
"step": 10450
},
{
"epoch": 3.334391870435059,
"grad_norm": 29783.908203125,
"learning_rate": 3.332962845347729e-05,
"loss": 0.7782,
"step": 10500
},
{
"epoch": 3.35026992696094,
"grad_norm": 30124.951171875,
"learning_rate": 3.3250238170847893e-05,
"loss": 0.7778,
"step": 10550
},
{
"epoch": 3.366147983486821,
"grad_norm": 29516.72265625,
"learning_rate": 3.317084788821848e-05,
"loss": 0.7771,
"step": 10600
},
{
"epoch": 3.3820260400127022,
"grad_norm": 29329.431640625,
"learning_rate": 3.3091457605589074e-05,
"loss": 0.7777,
"step": 10650
},
{
"epoch": 3.397904096538584,
"grad_norm": 29930.46875,
"learning_rate": 3.3012067322959675e-05,
"loss": 0.7776,
"step": 10700
},
{
"epoch": 3.413782153064465,
"grad_norm": 30200.29296875,
"learning_rate": 3.293267704033027e-05,
"loss": 0.7774,
"step": 10750
},
{
"epoch": 3.429660209590346,
"grad_norm": 29434.279296875,
"learning_rate": 3.2853286757700856e-05,
"loss": 0.7794,
"step": 10800
},
{
"epoch": 3.4455382661162273,
"grad_norm": 29533.7265625,
"learning_rate": 3.277389647507145e-05,
"loss": 0.78,
"step": 10850
},
{
"epoch": 3.4614163226421084,
"grad_norm": 30062.75390625,
"learning_rate": 3.269450619244205e-05,
"loss": 0.7715,
"step": 10900
},
{
"epoch": 3.47729437916799,
"grad_norm": 30203.49609375,
"learning_rate": 3.261511590981264e-05,
"loss": 0.7754,
"step": 10950
},
{
"epoch": 3.493172435693871,
"grad_norm": 29971.337890625,
"learning_rate": 3.253572562718323e-05,
"loss": 0.7743,
"step": 11000
},
{
"epoch": 3.5090504922197523,
"grad_norm": 30626.96875,
"learning_rate": 3.245633534455383e-05,
"loss": 0.7722,
"step": 11050
},
{
"epoch": 3.5249285487456334,
"grad_norm": 29813.41796875,
"learning_rate": 3.2376945061924425e-05,
"loss": 0.7762,
"step": 11100
},
{
"epoch": 3.5408066052715146,
"grad_norm": 30018.158203125,
"learning_rate": 3.229755477929501e-05,
"loss": 0.7708,
"step": 11150
},
{
"epoch": 3.556684661797396,
"grad_norm": 28745.65625,
"learning_rate": 3.2218164496665606e-05,
"loss": 0.7703,
"step": 11200
},
{
"epoch": 3.5725627183232773,
"grad_norm": 29827.396484375,
"learning_rate": 3.213877421403621e-05,
"loss": 0.7731,
"step": 11250
},
{
"epoch": 3.5884407748491585,
"grad_norm": 29104.228515625,
"learning_rate": 3.20593839314068e-05,
"loss": 0.7697,
"step": 11300
},
{
"epoch": 3.6043188313750396,
"grad_norm": 29951.318359375,
"learning_rate": 3.197999364877739e-05,
"loss": 0.7719,
"step": 11350
},
{
"epoch": 3.6201968879009208,
"grad_norm": 29854.1171875,
"learning_rate": 3.190060336614799e-05,
"loss": 0.7707,
"step": 11400
},
{
"epoch": 3.6360749444268023,
"grad_norm": 29064.115234375,
"learning_rate": 3.182121308351858e-05,
"loss": 0.7679,
"step": 11450
},
{
"epoch": 3.6519530009526835,
"grad_norm": 29561.064453125,
"learning_rate": 3.174182280088917e-05,
"loss": 0.773,
"step": 11500
},
{
"epoch": 3.6678310574785646,
"grad_norm": 30519.935546875,
"learning_rate": 3.166243251825976e-05,
"loss": 0.7715,
"step": 11550
},
{
"epoch": 3.683709114004446,
"grad_norm": 29498.1796875,
"learning_rate": 3.1583042235630363e-05,
"loss": 0.7696,
"step": 11600
},
{
"epoch": 3.699587170530327,
"grad_norm": 29892.5,
"learning_rate": 3.150365195300096e-05,
"loss": 0.7722,
"step": 11650
},
{
"epoch": 3.7154652270562085,
"grad_norm": 29588.62890625,
"learning_rate": 3.1424261670371544e-05,
"loss": 0.7695,
"step": 11700
},
{
"epoch": 3.7313432835820897,
"grad_norm": 28297.962890625,
"learning_rate": 3.1344871387742145e-05,
"loss": 0.767,
"step": 11750
},
{
"epoch": 3.747221340107971,
"grad_norm": 29926.146484375,
"learning_rate": 3.126548110511274e-05,
"loss": 0.7686,
"step": 11800
},
{
"epoch": 3.763099396633852,
"grad_norm": 29238.9375,
"learning_rate": 3.1186090822483326e-05,
"loss": 0.7657,
"step": 11850
},
{
"epoch": 3.778977453159733,
"grad_norm": 29130.10546875,
"learning_rate": 3.110670053985392e-05,
"loss": 0.7664,
"step": 11900
},
{
"epoch": 3.7948555096856147,
"grad_norm": 28701.7890625,
"learning_rate": 3.102731025722452e-05,
"loss": 0.7658,
"step": 11950
},
{
"epoch": 3.810733566211496,
"grad_norm": 29051.44140625,
"learning_rate": 3.0947919974595114e-05,
"loss": 0.7693,
"step": 12000
},
{
"epoch": 3.826611622737377,
"grad_norm": 29298.595703125,
"learning_rate": 3.08685296919657e-05,
"loss": 0.767,
"step": 12050
},
{
"epoch": 3.842489679263258,
"grad_norm": 28832.44921875,
"learning_rate": 3.07891394093363e-05,
"loss": 0.7655,
"step": 12100
},
{
"epoch": 3.8583677357891393,
"grad_norm": 28990.67578125,
"learning_rate": 3.0709749126706895e-05,
"loss": 0.7618,
"step": 12150
},
{
"epoch": 3.874245792315021,
"grad_norm": 28622.34765625,
"learning_rate": 3.063035884407749e-05,
"loss": 0.7664,
"step": 12200
},
{
"epoch": 3.890123848840902,
"grad_norm": 29283.064453125,
"learning_rate": 3.0550968561448076e-05,
"loss": 0.7678,
"step": 12250
},
{
"epoch": 3.906001905366783,
"grad_norm": 29989.224609375,
"learning_rate": 3.0471578278818673e-05,
"loss": 0.7631,
"step": 12300
},
{
"epoch": 3.9218799618926643,
"grad_norm": 30515.478515625,
"learning_rate": 3.039218799618927e-05,
"loss": 0.7634,
"step": 12350
},
{
"epoch": 3.9377580184185454,
"grad_norm": 29057.341796875,
"learning_rate": 3.031279771355986e-05,
"loss": 0.7649,
"step": 12400
},
{
"epoch": 3.953636074944427,
"grad_norm": 29256.357421875,
"learning_rate": 3.0233407430930455e-05,
"loss": 0.7617,
"step": 12450
},
{
"epoch": 3.969514131470308,
"grad_norm": 29832.517578125,
"learning_rate": 3.015401714830105e-05,
"loss": 0.762,
"step": 12500
},
{
"epoch": 3.9853921879961893,
"grad_norm": 30056.580078125,
"learning_rate": 3.0074626865671646e-05,
"loss": 0.7635,
"step": 12550
},
{
"epoch": 4.001270244522071,
"grad_norm": 30290.673828125,
"learning_rate": 2.9995236583042236e-05,
"loss": 0.761,
"step": 12600
},
{
"epoch": 4.017148301047952,
"grad_norm": 30393.2421875,
"learning_rate": 2.991584630041283e-05,
"loss": 0.7576,
"step": 12650
},
{
"epoch": 4.033026357573833,
"grad_norm": 29898.7734375,
"learning_rate": 2.9836456017783427e-05,
"loss": 0.7592,
"step": 12700
},
{
"epoch": 4.048904414099714,
"grad_norm": 29665.693359375,
"learning_rate": 2.9757065735154018e-05,
"loss": 0.7564,
"step": 12750
},
{
"epoch": 4.0647824706255955,
"grad_norm": 29677.63671875,
"learning_rate": 2.967767545252461e-05,
"loss": 0.7564,
"step": 12800
},
{
"epoch": 4.080660527151477,
"grad_norm": 29343.1328125,
"learning_rate": 2.9598285169895205e-05,
"loss": 0.7589,
"step": 12850
},
{
"epoch": 4.096538583677358,
"grad_norm": 28720.55078125,
"learning_rate": 2.9518894887265802e-05,
"loss": 0.7543,
"step": 12900
},
{
"epoch": 4.112416640203239,
"grad_norm": 28604.630859375,
"learning_rate": 2.9439504604636393e-05,
"loss": 0.7554,
"step": 12950
},
{
"epoch": 4.12829469672912,
"grad_norm": 29508.154296875,
"learning_rate": 2.9360114322006987e-05,
"loss": 0.7555,
"step": 13000
},
{
"epoch": 4.144172753255002,
"grad_norm": 28996.201171875,
"learning_rate": 2.9280724039377584e-05,
"loss": 0.7552,
"step": 13050
},
{
"epoch": 4.160050809780882,
"grad_norm": 31208.623046875,
"learning_rate": 2.9201333756748178e-05,
"loss": 0.7584,
"step": 13100
},
{
"epoch": 4.175928866306764,
"grad_norm": 29778.853515625,
"learning_rate": 2.9121943474118768e-05,
"loss": 0.7549,
"step": 13150
},
{
"epoch": 4.1918069228326456,
"grad_norm": 29337.587890625,
"learning_rate": 2.9042553191489362e-05,
"loss": 0.7546,
"step": 13200
},
{
"epoch": 4.207684979358526,
"grad_norm": 28682.619140625,
"learning_rate": 2.896316290885996e-05,
"loss": 0.749,
"step": 13250
},
{
"epoch": 4.223563035884408,
"grad_norm": 29200.912109375,
"learning_rate": 2.888377262623055e-05,
"loss": 0.7545,
"step": 13300
},
{
"epoch": 4.2394410924102885,
"grad_norm": 30967.3515625,
"learning_rate": 2.8804382343601143e-05,
"loss": 0.7558,
"step": 13350
},
{
"epoch": 4.25531914893617,
"grad_norm": 30665.638671875,
"learning_rate": 2.872499206097174e-05,
"loss": 0.7532,
"step": 13400
},
{
"epoch": 4.271197205462052,
"grad_norm": 28959.0625,
"learning_rate": 2.8645601778342334e-05,
"loss": 0.7553,
"step": 13450
},
{
"epoch": 4.287075261987932,
"grad_norm": 29186.98046875,
"learning_rate": 2.8566211495712925e-05,
"loss": 0.7535,
"step": 13500
},
{
"epoch": 4.302953318513814,
"grad_norm": 29713.626953125,
"learning_rate": 2.848682121308352e-05,
"loss": 0.7526,
"step": 13550
},
{
"epoch": 4.318831375039695,
"grad_norm": 30507.126953125,
"learning_rate": 2.8407430930454116e-05,
"loss": 0.7557,
"step": 13600
},
{
"epoch": 4.334709431565576,
"grad_norm": 28860.220703125,
"learning_rate": 2.8328040647824706e-05,
"loss": 0.7497,
"step": 13650
},
{
"epoch": 4.350587488091458,
"grad_norm": 28531.17578125,
"learning_rate": 2.82486503651953e-05,
"loss": 0.7536,
"step": 13700
},
{
"epoch": 4.366465544617339,
"grad_norm": 29637.4296875,
"learning_rate": 2.8169260082565897e-05,
"loss": 0.7536,
"step": 13750
},
{
"epoch": 4.38234360114322,
"grad_norm": 29468.94140625,
"learning_rate": 2.808986979993649e-05,
"loss": 0.7517,
"step": 13800
},
{
"epoch": 4.398221657669101,
"grad_norm": 29931.30859375,
"learning_rate": 2.801047951730708e-05,
"loss": 0.7514,
"step": 13850
},
{
"epoch": 4.4140997141949825,
"grad_norm": 29154.953125,
"learning_rate": 2.7931089234677675e-05,
"loss": 0.7545,
"step": 13900
},
{
"epoch": 4.429977770720864,
"grad_norm": 29288.595703125,
"learning_rate": 2.7851698952048273e-05,
"loss": 0.7535,
"step": 13950
},
{
"epoch": 4.445855827246745,
"grad_norm": 29386.255859375,
"learning_rate": 2.7772308669418866e-05,
"loss": 0.7517,
"step": 14000
},
{
"epoch": 4.461733883772626,
"grad_norm": 28722.88671875,
"learning_rate": 2.7692918386789457e-05,
"loss": 0.7482,
"step": 14050
},
{
"epoch": 4.477611940298507,
"grad_norm": 29279.75,
"learning_rate": 2.7613528104160054e-05,
"loss": 0.7481,
"step": 14100
},
{
"epoch": 4.493489996824389,
"grad_norm": 29140.587890625,
"learning_rate": 2.7534137821530648e-05,
"loss": 0.7462,
"step": 14150
},
{
"epoch": 4.50936805335027,
"grad_norm": 28993.6484375,
"learning_rate": 2.7454747538901238e-05,
"loss": 0.7473,
"step": 14200
},
{
"epoch": 4.525246109876151,
"grad_norm": 30390.90234375,
"learning_rate": 2.7375357256271832e-05,
"loss": 0.7515,
"step": 14250
},
{
"epoch": 4.5411241664020325,
"grad_norm": 29218.51953125,
"learning_rate": 2.729596697364243e-05,
"loss": 0.7471,
"step": 14300
},
{
"epoch": 4.557002222927913,
"grad_norm": 30701.79296875,
"learning_rate": 2.7216576691013023e-05,
"loss": 0.7487,
"step": 14350
},
{
"epoch": 4.572880279453795,
"grad_norm": 29310.14453125,
"learning_rate": 2.7137186408383613e-05,
"loss": 0.7492,
"step": 14400
},
{
"epoch": 4.588758335979676,
"grad_norm": 29325.59375,
"learning_rate": 2.705779612575421e-05,
"loss": 0.7479,
"step": 14450
},
{
"epoch": 4.604636392505557,
"grad_norm": 29217.953125,
"learning_rate": 2.6978405843124804e-05,
"loss": 0.7512,
"step": 14500
},
{
"epoch": 4.620514449031439,
"grad_norm": 29466.189453125,
"learning_rate": 2.6899015560495395e-05,
"loss": 0.7477,
"step": 14550
},
{
"epoch": 4.636392505557319,
"grad_norm": 28055.72265625,
"learning_rate": 2.681962527786599e-05,
"loss": 0.749,
"step": 14600
},
{
"epoch": 4.652270562083201,
"grad_norm": 28245.34375,
"learning_rate": 2.6740234995236586e-05,
"loss": 0.7479,
"step": 14650
},
{
"epoch": 4.668148618609083,
"grad_norm": 29280.736328125,
"learning_rate": 2.666084471260718e-05,
"loss": 0.7484,
"step": 14700
},
{
"epoch": 4.684026675134963,
"grad_norm": 29603.513671875,
"learning_rate": 2.658145442997777e-05,
"loss": 0.7499,
"step": 14750
},
{
"epoch": 4.699904731660845,
"grad_norm": 28774.55078125,
"learning_rate": 2.6502064147348364e-05,
"loss": 0.7463,
"step": 14800
},
{
"epoch": 4.715782788186726,
"grad_norm": 29580.22265625,
"learning_rate": 2.642267386471896e-05,
"loss": 0.7424,
"step": 14850
},
{
"epoch": 4.731660844712607,
"grad_norm": 28998.916015625,
"learning_rate": 2.6343283582089555e-05,
"loss": 0.7457,
"step": 14900
},
{
"epoch": 4.747538901238489,
"grad_norm": 28983.86328125,
"learning_rate": 2.6263893299460145e-05,
"loss": 0.7412,
"step": 14950
},
{
"epoch": 4.7634169577643695,
"grad_norm": 30523.78125,
"learning_rate": 2.6184503016830743e-05,
"loss": 0.7468,
"step": 15000
},
{
"epoch": 4.779295014290251,
"grad_norm": 29167.177734375,
"learning_rate": 2.6105112734201336e-05,
"loss": 0.7435,
"step": 15050
},
{
"epoch": 4.795173070816132,
"grad_norm": 29224.529296875,
"learning_rate": 2.6025722451571927e-05,
"loss": 0.7409,
"step": 15100
},
{
"epoch": 4.811051127342013,
"grad_norm": 28679.3046875,
"learning_rate": 2.594633216894252e-05,
"loss": 0.7394,
"step": 15150
},
{
"epoch": 4.826929183867895,
"grad_norm": 29085.837890625,
"learning_rate": 2.5866941886313118e-05,
"loss": 0.745,
"step": 15200
},
{
"epoch": 4.842807240393776,
"grad_norm": 28241.09765625,
"learning_rate": 2.578755160368371e-05,
"loss": 0.7456,
"step": 15250
},
{
"epoch": 4.858685296919657,
"grad_norm": 29162.150390625,
"learning_rate": 2.5708161321054302e-05,
"loss": 0.7428,
"step": 15300
},
{
"epoch": 4.874563353445538,
"grad_norm": 29517.7578125,
"learning_rate": 2.56287710384249e-05,
"loss": 0.7418,
"step": 15350
},
{
"epoch": 4.8904414099714195,
"grad_norm": 28757.634765625,
"learning_rate": 2.5549380755795493e-05,
"loss": 0.7475,
"step": 15400
},
{
"epoch": 4.906319466497301,
"grad_norm": 29112.775390625,
"learning_rate": 2.5469990473166083e-05,
"loss": 0.742,
"step": 15450
},
{
"epoch": 4.922197523023182,
"grad_norm": 29536.763671875,
"learning_rate": 2.5390600190536677e-05,
"loss": 0.7423,
"step": 15500
},
{
"epoch": 4.938075579549063,
"grad_norm": 29418.62890625,
"learning_rate": 2.5311209907907274e-05,
"loss": 0.7445,
"step": 15550
},
{
"epoch": 4.953953636074944,
"grad_norm": 29107.703125,
"learning_rate": 2.5231819625277868e-05,
"loss": 0.7421,
"step": 15600
},
{
"epoch": 4.969831692600826,
"grad_norm": 28584.513671875,
"learning_rate": 2.515242934264846e-05,
"loss": 0.7416,
"step": 15650
},
{
"epoch": 4.985709749126707,
"grad_norm": 29202.330078125,
"learning_rate": 2.5073039060019056e-05,
"loss": 0.7404,
"step": 15700
},
{
"epoch": 5.001587805652588,
"grad_norm": 28508.923828125,
"learning_rate": 2.499364877738965e-05,
"loss": 0.7401,
"step": 15750
},
{
"epoch": 5.01746586217847,
"grad_norm": 29855.150390625,
"learning_rate": 2.4914258494760243e-05,
"loss": 0.7377,
"step": 15800
},
{
"epoch": 5.03334391870435,
"grad_norm": 28641.822265625,
"learning_rate": 2.4834868212130834e-05,
"loss": 0.7373,
"step": 15850
},
{
"epoch": 5.049221975230232,
"grad_norm": 29080.0234375,
"learning_rate": 2.475547792950143e-05,
"loss": 0.7387,
"step": 15900
},
{
"epoch": 5.0651000317561135,
"grad_norm": 29752.501953125,
"learning_rate": 2.467608764687202e-05,
"loss": 0.7364,
"step": 15950
},
{
"epoch": 5.080978088281994,
"grad_norm": 29244.943359375,
"learning_rate": 2.459669736424262e-05,
"loss": 0.7386,
"step": 16000
},
{
"epoch": 5.096856144807876,
"grad_norm": 30756.474609375,
"learning_rate": 2.4517307081613213e-05,
"loss": 0.7365,
"step": 16050
},
{
"epoch": 5.1127342013337564,
"grad_norm": 29783.80078125,
"learning_rate": 2.4437916798983806e-05,
"loss": 0.7345,
"step": 16100
},
{
"epoch": 5.128612257859638,
"grad_norm": 28871.20703125,
"learning_rate": 2.43585265163544e-05,
"loss": 0.7385,
"step": 16150
},
{
"epoch": 5.14449031438552,
"grad_norm": 29666.92578125,
"learning_rate": 2.4279136233724994e-05,
"loss": 0.7364,
"step": 16200
},
{
"epoch": 5.1603683709114,
"grad_norm": 27965.49609375,
"learning_rate": 2.4199745951095588e-05,
"loss": 0.7349,
"step": 16250
},
{
"epoch": 5.176246427437282,
"grad_norm": 29663.21484375,
"learning_rate": 2.4120355668466178e-05,
"loss": 0.7336,
"step": 16300
},
{
"epoch": 5.192124483963163,
"grad_norm": 29286.236328125,
"learning_rate": 2.4040965385836775e-05,
"loss": 0.7353,
"step": 16350
},
{
"epoch": 5.208002540489044,
"grad_norm": 28999.544921875,
"learning_rate": 2.396157510320737e-05,
"loss": 0.7364,
"step": 16400
},
{
"epoch": 5.223880597014926,
"grad_norm": 29259.37890625,
"learning_rate": 2.3882184820577963e-05,
"loss": 0.7386,
"step": 16450
},
{
"epoch": 5.2397586535408065,
"grad_norm": 29506.087890625,
"learning_rate": 2.3802794537948557e-05,
"loss": 0.734,
"step": 16500
},
{
"epoch": 5.255636710066688,
"grad_norm": 29443.67578125,
"learning_rate": 2.372340425531915e-05,
"loss": 0.7408,
"step": 16550
},
{
"epoch": 5.271514766592569,
"grad_norm": 29926.109375,
"learning_rate": 2.3644013972689744e-05,
"loss": 0.7368,
"step": 16600
},
{
"epoch": 5.28739282311845,
"grad_norm": 30687.26953125,
"learning_rate": 2.3564623690060338e-05,
"loss": 0.7345,
"step": 16650
},
{
"epoch": 5.303270879644332,
"grad_norm": 28573.580078125,
"learning_rate": 2.3485233407430932e-05,
"loss": 0.7348,
"step": 16700
},
{
"epoch": 5.319148936170213,
"grad_norm": 30217.728515625,
"learning_rate": 2.3405843124801526e-05,
"loss": 0.7348,
"step": 16750
},
{
"epoch": 5.335026992696094,
"grad_norm": 28830.787109375,
"learning_rate": 2.332645284217212e-05,
"loss": 0.7389,
"step": 16800
},
{
"epoch": 5.350905049221975,
"grad_norm": 29589.51953125,
"learning_rate": 2.3247062559542713e-05,
"loss": 0.7364,
"step": 16850
},
{
"epoch": 5.3667831057478566,
"grad_norm": 28568.32421875,
"learning_rate": 2.3167672276913307e-05,
"loss": 0.7348,
"step": 16900
},
{
"epoch": 5.382661162273738,
"grad_norm": 30656.90234375,
"learning_rate": 2.30882819942839e-05,
"loss": 0.7346,
"step": 16950
},
{
"epoch": 5.398539218799619,
"grad_norm": 29044.07421875,
"learning_rate": 2.3008891711654495e-05,
"loss": 0.7363,
"step": 17000
},
{
"epoch": 5.4144172753255,
"grad_norm": 29246.9765625,
"learning_rate": 2.292950142902509e-05,
"loss": 0.7376,
"step": 17050
},
{
"epoch": 5.430295331851381,
"grad_norm": 29861.26171875,
"learning_rate": 2.2850111146395683e-05,
"loss": 0.7322,
"step": 17100
},
{
"epoch": 5.446173388377263,
"grad_norm": 29695.34765625,
"learning_rate": 2.2770720863766276e-05,
"loss": 0.7324,
"step": 17150
},
{
"epoch": 5.462051444903144,
"grad_norm": 29459.607421875,
"learning_rate": 2.269133058113687e-05,
"loss": 0.733,
"step": 17200
},
{
"epoch": 5.477929501429025,
"grad_norm": 28512.21484375,
"learning_rate": 2.2611940298507464e-05,
"loss": 0.7274,
"step": 17250
},
{
"epoch": 5.493807557954907,
"grad_norm": 29732.572265625,
"learning_rate": 2.2532550015878058e-05,
"loss": 0.7333,
"step": 17300
},
{
"epoch": 5.509685614480787,
"grad_norm": 29110.30859375,
"learning_rate": 2.245315973324865e-05,
"loss": 0.7344,
"step": 17350
},
{
"epoch": 5.525563671006669,
"grad_norm": 29294.5859375,
"learning_rate": 2.2373769450619245e-05,
"loss": 0.7292,
"step": 17400
},
{
"epoch": 5.5414417275325505,
"grad_norm": 28999.29296875,
"learning_rate": 2.229437916798984e-05,
"loss": 0.7322,
"step": 17450
},
{
"epoch": 5.557319784058431,
"grad_norm": 28789.3359375,
"learning_rate": 2.2214988885360433e-05,
"loss": 0.7334,
"step": 17500
},
{
"epoch": 5.573197840584313,
"grad_norm": 29767.94140625,
"learning_rate": 2.2135598602731027e-05,
"loss": 0.7324,
"step": 17550
},
{
"epoch": 5.5890758971101935,
"grad_norm": 29029.384765625,
"learning_rate": 2.205620832010162e-05,
"loss": 0.7318,
"step": 17600
},
{
"epoch": 5.604953953636075,
"grad_norm": 29995.126953125,
"learning_rate": 2.1976818037472214e-05,
"loss": 0.7332,
"step": 17650
},
{
"epoch": 5.620832010161957,
"grad_norm": 29895.734375,
"learning_rate": 2.1897427754842808e-05,
"loss": 0.7323,
"step": 17700
},
{
"epoch": 5.636710066687837,
"grad_norm": 28998.80078125,
"learning_rate": 2.1818037472213402e-05,
"loss": 0.7308,
"step": 17750
},
{
"epoch": 5.652588123213719,
"grad_norm": 29375.140625,
"learning_rate": 2.1738647189583996e-05,
"loss": 0.7307,
"step": 17800
},
{
"epoch": 5.6684661797396,
"grad_norm": 30084.34375,
"learning_rate": 2.165925690695459e-05,
"loss": 0.7292,
"step": 17850
},
{
"epoch": 5.684344236265481,
"grad_norm": 29009.86328125,
"learning_rate": 2.1579866624325183e-05,
"loss": 0.7311,
"step": 17900
},
{
"epoch": 5.700222292791363,
"grad_norm": 28996.576171875,
"learning_rate": 2.1500476341695777e-05,
"loss": 0.731,
"step": 17950
},
{
"epoch": 5.7161003493172435,
"grad_norm": 28874.802734375,
"learning_rate": 2.142108605906637e-05,
"loss": 0.7301,
"step": 18000
},
{
"epoch": 5.731978405843125,
"grad_norm": 29940.734375,
"learning_rate": 2.1341695776436965e-05,
"loss": 0.73,
"step": 18050
},
{
"epoch": 5.747856462369006,
"grad_norm": 29607.59765625,
"learning_rate": 2.126230549380756e-05,
"loss": 0.7311,
"step": 18100
},
{
"epoch": 5.763734518894887,
"grad_norm": 29006.7890625,
"learning_rate": 2.1182915211178153e-05,
"loss": 0.7301,
"step": 18150
},
{
"epoch": 5.779612575420768,
"grad_norm": 30157.11328125,
"learning_rate": 2.1103524928548746e-05,
"loss": 0.7281,
"step": 18200
},
{
"epoch": 5.79549063194665,
"grad_norm": 29396.783203125,
"learning_rate": 2.102413464591934e-05,
"loss": 0.7315,
"step": 18250
},
{
"epoch": 5.811368688472531,
"grad_norm": 29335.1015625,
"learning_rate": 2.0944744363289934e-05,
"loss": 0.7279,
"step": 18300
},
{
"epoch": 5.827246744998412,
"grad_norm": 28650.416015625,
"learning_rate": 2.0865354080660528e-05,
"loss": 0.7347,
"step": 18350
},
{
"epoch": 5.843124801524294,
"grad_norm": 29561.095703125,
"learning_rate": 2.078596379803112e-05,
"loss": 0.7263,
"step": 18400
},
{
"epoch": 5.859002858050174,
"grad_norm": 28661.658203125,
"learning_rate": 2.070657351540172e-05,
"loss": 0.7314,
"step": 18450
},
{
"epoch": 5.874880914576056,
"grad_norm": 28372.755859375,
"learning_rate": 2.062718323277231e-05,
"loss": 0.7265,
"step": 18500
},
{
"epoch": 5.8907589711019375,
"grad_norm": 28641.00390625,
"learning_rate": 2.0547792950142903e-05,
"loss": 0.7296,
"step": 18550
},
{
"epoch": 5.906637027627818,
"grad_norm": 29402.455078125,
"learning_rate": 2.0468402667513497e-05,
"loss": 0.7257,
"step": 18600
},
{
"epoch": 5.9225150841537,
"grad_norm": 29028.78515625,
"learning_rate": 2.038901238488409e-05,
"loss": 0.7262,
"step": 18650
},
{
"epoch": 5.9383931406795805,
"grad_norm": 29547.03125,
"learning_rate": 2.0309622102254684e-05,
"loss": 0.728,
"step": 18700
},
{
"epoch": 5.954271197205462,
"grad_norm": 29396.15234375,
"learning_rate": 2.0230231819625278e-05,
"loss": 0.725,
"step": 18750
},
{
"epoch": 5.970149253731344,
"grad_norm": 29648.634765625,
"learning_rate": 2.0150841536995872e-05,
"loss": 0.7264,
"step": 18800
},
{
"epoch": 5.986027310257224,
"grad_norm": 29708.830078125,
"learning_rate": 2.0071451254366466e-05,
"loss": 0.7263,
"step": 18850
},
{
"epoch": 6.001905366783106,
"grad_norm": 29569.521484375,
"learning_rate": 1.9992060971737063e-05,
"loss": 0.7249,
"step": 18900
},
{
"epoch": 6.017783423308987,
"grad_norm": 29247.328125,
"learning_rate": 1.9912670689107653e-05,
"loss": 0.7248,
"step": 18950
},
{
"epoch": 6.033661479834868,
"grad_norm": 29513.408203125,
"learning_rate": 1.9833280406478247e-05,
"loss": 0.7243,
"step": 19000
},
{
"epoch": 6.04953953636075,
"grad_norm": 29599.560546875,
"learning_rate": 1.975389012384884e-05,
"loss": 0.7228,
"step": 19050
},
{
"epoch": 6.0654175928866305,
"grad_norm": 28947.3515625,
"learning_rate": 1.9674499841219435e-05,
"loss": 0.7261,
"step": 19100
},
{
"epoch": 6.081295649412512,
"grad_norm": 28986.390625,
"learning_rate": 1.959510955859003e-05,
"loss": 0.721,
"step": 19150
},
{
"epoch": 6.097173705938393,
"grad_norm": 29116.353515625,
"learning_rate": 1.9515719275960623e-05,
"loss": 0.7193,
"step": 19200
},
{
"epoch": 6.113051762464274,
"grad_norm": 29632.84375,
"learning_rate": 1.943632899333122e-05,
"loss": 0.7228,
"step": 19250
},
{
"epoch": 6.128929818990156,
"grad_norm": 29931.40234375,
"learning_rate": 1.935693871070181e-05,
"loss": 0.7233,
"step": 19300
},
{
"epoch": 6.144807875516037,
"grad_norm": 29751.517578125,
"learning_rate": 1.9277548428072407e-05,
"loss": 0.7249,
"step": 19350
},
{
"epoch": 6.160685932041918,
"grad_norm": 30318.1796875,
"learning_rate": 1.9198158145442998e-05,
"loss": 0.7232,
"step": 19400
},
{
"epoch": 6.176563988567799,
"grad_norm": 28459.033203125,
"learning_rate": 1.911876786281359e-05,
"loss": 0.7219,
"step": 19450
},
{
"epoch": 6.192442045093681,
"grad_norm": 29604.791015625,
"learning_rate": 1.9039377580184185e-05,
"loss": 0.7232,
"step": 19500
},
{
"epoch": 6.208320101619562,
"grad_norm": 29528.1484375,
"learning_rate": 1.895998729755478e-05,
"loss": 0.7192,
"step": 19550
},
{
"epoch": 6.224198158145443,
"grad_norm": 28977.87109375,
"learning_rate": 1.8880597014925376e-05,
"loss": 0.7198,
"step": 19600
},
{
"epoch": 6.2400762146713245,
"grad_norm": 29568.908203125,
"learning_rate": 1.8801206732295967e-05,
"loss": 0.7231,
"step": 19650
},
{
"epoch": 6.255954271197205,
"grad_norm": 28843.431640625,
"learning_rate": 1.8721816449666564e-05,
"loss": 0.7188,
"step": 19700
},
{
"epoch": 6.271832327723087,
"grad_norm": 29127.72265625,
"learning_rate": 1.8642426167037154e-05,
"loss": 0.7218,
"step": 19750
},
{
"epoch": 6.287710384248968,
"grad_norm": 29642.048828125,
"learning_rate": 1.856303588440775e-05,
"loss": 0.7182,
"step": 19800
},
{
"epoch": 6.303588440774849,
"grad_norm": 29874.93359375,
"learning_rate": 1.8483645601778342e-05,
"loss": 0.719,
"step": 19850
},
{
"epoch": 6.319466497300731,
"grad_norm": 29941.87890625,
"learning_rate": 1.8404255319148936e-05,
"loss": 0.7198,
"step": 19900
},
{
"epoch": 6.335344553826611,
"grad_norm": 29865.37890625,
"learning_rate": 1.832486503651953e-05,
"loss": 0.7249,
"step": 19950
},
{
"epoch": 6.351222610352493,
"grad_norm": 30395.484375,
"learning_rate": 1.8245474753890123e-05,
"loss": 0.7195,
"step": 20000
},
{
"epoch": 6.367100666878374,
"grad_norm": 30510.9765625,
"learning_rate": 1.816608447126072e-05,
"loss": 0.7206,
"step": 20050
},
{
"epoch": 6.382978723404255,
"grad_norm": 30179.208984375,
"learning_rate": 1.808669418863131e-05,
"loss": 0.7208,
"step": 20100
},
{
"epoch": 6.398856779930137,
"grad_norm": 30193.3125,
"learning_rate": 1.8007303906001908e-05,
"loss": 0.7227,
"step": 20150
},
{
"epoch": 6.4147348364560175,
"grad_norm": 30033.546875,
"learning_rate": 1.79279136233725e-05,
"loss": 0.7172,
"step": 20200
},
{
"epoch": 6.430612892981899,
"grad_norm": 30468.65625,
"learning_rate": 1.7848523340743096e-05,
"loss": 0.7215,
"step": 20250
},
{
"epoch": 6.44649094950778,
"grad_norm": 29339.623046875,
"learning_rate": 1.7769133058113686e-05,
"loss": 0.7174,
"step": 20300
},
{
"epoch": 6.462369006033661,
"grad_norm": 29816.017578125,
"learning_rate": 1.768974277548428e-05,
"loss": 0.7194,
"step": 20350
},
{
"epoch": 6.478247062559543,
"grad_norm": 29318.12109375,
"learning_rate": 1.7610352492854877e-05,
"loss": 0.7196,
"step": 20400
},
{
"epoch": 6.494125119085424,
"grad_norm": 29063.029296875,
"learning_rate": 1.7530962210225468e-05,
"loss": 0.7185,
"step": 20450
},
{
"epoch": 6.510003175611305,
"grad_norm": 30219.3359375,
"learning_rate": 1.7451571927596065e-05,
"loss": 0.7168,
"step": 20500
},
{
"epoch": 6.525881232137186,
"grad_norm": 29047.94140625,
"learning_rate": 1.7372181644966655e-05,
"loss": 0.721,
"step": 20550
},
{
"epoch": 6.5417592886630676,
"grad_norm": 28919.94140625,
"learning_rate": 1.7292791362337253e-05,
"loss": 0.7172,
"step": 20600
},
{
"epoch": 6.557637345188949,
"grad_norm": 30273.14453125,
"learning_rate": 1.7213401079707843e-05,
"loss": 0.7188,
"step": 20650
},
{
"epoch": 6.57351540171483,
"grad_norm": 30020.40625,
"learning_rate": 1.713401079707844e-05,
"loss": 0.7184,
"step": 20700
},
{
"epoch": 6.589393458240711,
"grad_norm": 29766.990234375,
"learning_rate": 1.705462051444903e-05,
"loss": 0.7162,
"step": 20750
},
{
"epoch": 6.605271514766592,
"grad_norm": 29873.83203125,
"learning_rate": 1.6975230231819624e-05,
"loss": 0.7176,
"step": 20800
},
{
"epoch": 6.621149571292474,
"grad_norm": 29591.2578125,
"learning_rate": 1.689583994919022e-05,
"loss": 0.715,
"step": 20850
},
{
"epoch": 6.637027627818355,
"grad_norm": 30105.244140625,
"learning_rate": 1.6816449666560812e-05,
"loss": 0.716,
"step": 20900
},
{
"epoch": 6.652905684344236,
"grad_norm": 29515.470703125,
"learning_rate": 1.673705938393141e-05,
"loss": 0.7165,
"step": 20950
},
{
"epoch": 6.668783740870118,
"grad_norm": 29495.97265625,
"learning_rate": 1.6657669101302e-05,
"loss": 0.7157,
"step": 21000
},
{
"epoch": 6.684661797395998,
"grad_norm": 29483.634765625,
"learning_rate": 1.6578278818672597e-05,
"loss": 0.7207,
"step": 21050
},
{
"epoch": 6.70053985392188,
"grad_norm": 29576.375,
"learning_rate": 1.6498888536043187e-05,
"loss": 0.7158,
"step": 21100
},
{
"epoch": 6.7164179104477615,
"grad_norm": 29991.36328125,
"learning_rate": 1.6419498253413784e-05,
"loss": 0.7142,
"step": 21150
},
{
"epoch": 6.732295966973642,
"grad_norm": 30472.951171875,
"learning_rate": 1.6340107970784378e-05,
"loss": 0.7157,
"step": 21200
},
{
"epoch": 6.748174023499524,
"grad_norm": 29432.845703125,
"learning_rate": 1.626071768815497e-05,
"loss": 0.7164,
"step": 21250
},
{
"epoch": 6.7640520800254045,
"grad_norm": 30286.595703125,
"learning_rate": 1.6181327405525566e-05,
"loss": 0.7215,
"step": 21300
},
{
"epoch": 6.779930136551286,
"grad_norm": 29090.42578125,
"learning_rate": 1.6101937122896156e-05,
"loss": 0.7189,
"step": 21350
},
{
"epoch": 6.795808193077168,
"grad_norm": 29895.060546875,
"learning_rate": 1.6022546840266754e-05,
"loss": 0.7172,
"step": 21400
},
{
"epoch": 6.811686249603048,
"grad_norm": 29853.85546875,
"learning_rate": 1.5943156557637344e-05,
"loss": 0.713,
"step": 21450
},
{
"epoch": 6.82756430612893,
"grad_norm": 30071.662109375,
"learning_rate": 1.586376627500794e-05,
"loss": 0.712,
"step": 21500
},
{
"epoch": 6.843442362654811,
"grad_norm": 28977.619140625,
"learning_rate": 1.5784375992378535e-05,
"loss": 0.7134,
"step": 21550
},
{
"epoch": 6.859320419180692,
"grad_norm": 29966.154296875,
"learning_rate": 1.570498570974913e-05,
"loss": 0.7147,
"step": 21600
},
{
"epoch": 6.875198475706574,
"grad_norm": 29842.240234375,
"learning_rate": 1.5625595427119723e-05,
"loss": 0.7201,
"step": 21650
},
{
"epoch": 6.8910765322324545,
"grad_norm": 29326.939453125,
"learning_rate": 1.5546205144490313e-05,
"loss": 0.7159,
"step": 21700
},
{
"epoch": 6.906954588758336,
"grad_norm": 30401.865234375,
"learning_rate": 1.546681486186091e-05,
"loss": 0.7154,
"step": 21750
},
{
"epoch": 6.922832645284217,
"grad_norm": 30828.583984375,
"learning_rate": 1.53874245792315e-05,
"loss": 0.7167,
"step": 21800
},
{
"epoch": 6.938710701810098,
"grad_norm": 30084.78125,
"learning_rate": 1.5308034296602098e-05,
"loss": 0.7147,
"step": 21850
},
{
"epoch": 6.95458875833598,
"grad_norm": 29893.818359375,
"learning_rate": 1.522864401397269e-05,
"loss": 0.7174,
"step": 21900
},
{
"epoch": 6.970466814861861,
"grad_norm": 29464.3125,
"learning_rate": 1.5149253731343285e-05,
"loss": 0.7158,
"step": 21950
},
{
"epoch": 6.986344871387742,
"grad_norm": 28943.29296875,
"learning_rate": 1.5069863448713878e-05,
"loss": 0.7111,
"step": 22000
},
{
"epoch": 7.002222927913623,
"grad_norm": 30447.416015625,
"learning_rate": 1.4990473166084473e-05,
"loss": 0.7167,
"step": 22050
},
{
"epoch": 7.018100984439505,
"grad_norm": 30325.32421875,
"learning_rate": 1.4911082883455065e-05,
"loss": 0.7102,
"step": 22100
},
{
"epoch": 7.033979040965386,
"grad_norm": 30525.169921875,
"learning_rate": 1.4831692600825659e-05,
"loss": 0.7102,
"step": 22150
},
{
"epoch": 7.049857097491267,
"grad_norm": 30165.91015625,
"learning_rate": 1.4752302318196254e-05,
"loss": 0.7124,
"step": 22200
},
{
"epoch": 7.0657351540171485,
"grad_norm": 29645.66796875,
"learning_rate": 1.4672912035566847e-05,
"loss": 0.709,
"step": 22250
},
{
"epoch": 7.081613210543029,
"grad_norm": 29299.05859375,
"learning_rate": 1.4593521752937442e-05,
"loss": 0.7105,
"step": 22300
},
{
"epoch": 7.097491267068911,
"grad_norm": 29655.083984375,
"learning_rate": 1.4514131470308034e-05,
"loss": 0.7106,
"step": 22350
},
{
"epoch": 7.113369323594792,
"grad_norm": 30337.484375,
"learning_rate": 1.443474118767863e-05,
"loss": 0.7153,
"step": 22400
},
{
"epoch": 7.129247380120673,
"grad_norm": 29374.36328125,
"learning_rate": 1.4355350905049222e-05,
"loss": 0.7101,
"step": 22450
},
{
"epoch": 7.145125436646555,
"grad_norm": 29468.953125,
"learning_rate": 1.4275960622419817e-05,
"loss": 0.7074,
"step": 22500
},
{
"epoch": 7.161003493172435,
"grad_norm": 30660.234375,
"learning_rate": 1.4196570339790411e-05,
"loss": 0.7107,
"step": 22550
},
{
"epoch": 7.176881549698317,
"grad_norm": 29092.36328125,
"learning_rate": 1.4117180057161003e-05,
"loss": 0.7102,
"step": 22600
},
{
"epoch": 7.1927596062241985,
"grad_norm": 29264.359375,
"learning_rate": 1.4037789774531599e-05,
"loss": 0.7075,
"step": 22650
},
{
"epoch": 7.208637662750079,
"grad_norm": 30579.66015625,
"learning_rate": 1.3958399491902191e-05,
"loss": 0.7097,
"step": 22700
},
{
"epoch": 7.224515719275961,
"grad_norm": 29726.57421875,
"learning_rate": 1.3879009209272786e-05,
"loss": 0.7095,
"step": 22750
},
{
"epoch": 7.2403937758018415,
"grad_norm": 30103.4921875,
"learning_rate": 1.3799618926643379e-05,
"loss": 0.7108,
"step": 22800
},
{
"epoch": 7.256271832327723,
"grad_norm": 29993.138671875,
"learning_rate": 1.3720228644013974e-05,
"loss": 0.709,
"step": 22850
},
{
"epoch": 7.272149888853605,
"grad_norm": 30139.607421875,
"learning_rate": 1.3640838361384566e-05,
"loss": 0.7087,
"step": 22900
},
{
"epoch": 7.288027945379485,
"grad_norm": 29735.54296875,
"learning_rate": 1.3561448078755162e-05,
"loss": 0.7094,
"step": 22950
},
{
"epoch": 7.303906001905367,
"grad_norm": 29745.779296875,
"learning_rate": 1.3482057796125755e-05,
"loss": 0.7102,
"step": 23000
},
{
"epoch": 7.319784058431248,
"grad_norm": 30882.021484375,
"learning_rate": 1.3402667513496348e-05,
"loss": 0.7108,
"step": 23050
},
{
"epoch": 7.335662114957129,
"grad_norm": 29850.26171875,
"learning_rate": 1.3323277230866943e-05,
"loss": 0.7105,
"step": 23100
},
{
"epoch": 7.351540171483011,
"grad_norm": 30374.646484375,
"learning_rate": 1.3243886948237535e-05,
"loss": 0.7119,
"step": 23150
},
{
"epoch": 7.367418228008892,
"grad_norm": 29295.6640625,
"learning_rate": 1.316449666560813e-05,
"loss": 0.7082,
"step": 23200
},
{
"epoch": 7.383296284534773,
"grad_norm": 30273.5703125,
"learning_rate": 1.3085106382978723e-05,
"loss": 0.7087,
"step": 23250
},
{
"epoch": 7.399174341060654,
"grad_norm": 30317.859375,
"learning_rate": 1.3005716100349318e-05,
"loss": 0.7079,
"step": 23300
},
{
"epoch": 7.4150523975865354,
"grad_norm": 29483.0,
"learning_rate": 1.2926325817719912e-05,
"loss": 0.7065,
"step": 23350
},
{
"epoch": 7.430930454112417,
"grad_norm": 29725.1875,
"learning_rate": 1.2846935535090508e-05,
"loss": 0.7103,
"step": 23400
},
{
"epoch": 7.446808510638298,
"grad_norm": 29491.5234375,
"learning_rate": 1.27675452524611e-05,
"loss": 0.7102,
"step": 23450
},
{
"epoch": 7.462686567164179,
"grad_norm": 30156.08984375,
"learning_rate": 1.2688154969831692e-05,
"loss": 0.7093,
"step": 23500
},
{
"epoch": 7.47856462369006,
"grad_norm": 29961.205078125,
"learning_rate": 1.2608764687202287e-05,
"loss": 0.7082,
"step": 23550
},
{
"epoch": 7.494442680215942,
"grad_norm": 29865.1640625,
"learning_rate": 1.252937440457288e-05,
"loss": 0.7043,
"step": 23600
},
{
"epoch": 7.510320736741823,
"grad_norm": 30199.93359375,
"learning_rate": 1.2449984121943473e-05,
"loss": 0.7076,
"step": 23650
},
{
"epoch": 7.526198793267704,
"grad_norm": 30157.5,
"learning_rate": 1.2370593839314069e-05,
"loss": 0.7079,
"step": 23700
},
{
"epoch": 7.5420768497935855,
"grad_norm": 29760.25390625,
"learning_rate": 1.2291203556684663e-05,
"loss": 0.7048,
"step": 23750
},
{
"epoch": 7.557954906319466,
"grad_norm": 30412.80078125,
"learning_rate": 1.2211813274055256e-05,
"loss": 0.7067,
"step": 23800
},
{
"epoch": 7.573832962845348,
"grad_norm": 29912.32421875,
"learning_rate": 1.213242299142585e-05,
"loss": 0.7092,
"step": 23850
},
{
"epoch": 7.589711019371229,
"grad_norm": 30394.833984375,
"learning_rate": 1.2053032708796444e-05,
"loss": 0.7085,
"step": 23900
},
{
"epoch": 7.60558907589711,
"grad_norm": 30330.65625,
"learning_rate": 1.1973642426167038e-05,
"loss": 0.7055,
"step": 23950
},
{
"epoch": 7.621467132422992,
"grad_norm": 30122.162109375,
"learning_rate": 1.1894252143537632e-05,
"loss": 0.7078,
"step": 24000
},
{
"epoch": 7.637345188948872,
"grad_norm": 29642.91796875,
"learning_rate": 1.1814861860908225e-05,
"loss": 0.7062,
"step": 24050
},
{
"epoch": 7.653223245474754,
"grad_norm": 30002.765625,
"learning_rate": 1.173547157827882e-05,
"loss": 0.7054,
"step": 24100
},
{
"epoch": 7.669101302000636,
"grad_norm": 29660.470703125,
"learning_rate": 1.1656081295649413e-05,
"loss": 0.7048,
"step": 24150
},
{
"epoch": 7.684979358526516,
"grad_norm": 30191.8828125,
"learning_rate": 1.1576691013020007e-05,
"loss": 0.7085,
"step": 24200
},
{
"epoch": 7.700857415052398,
"grad_norm": 29762.623046875,
"learning_rate": 1.14973007303906e-05,
"loss": 0.7102,
"step": 24250
},
{
"epoch": 7.7167354715782785,
"grad_norm": 29867.5625,
"learning_rate": 1.1417910447761194e-05,
"loss": 0.704,
"step": 24300
},
{
"epoch": 7.73261352810416,
"grad_norm": 30145.470703125,
"learning_rate": 1.1338520165131788e-05,
"loss": 0.7043,
"step": 24350
},
{
"epoch": 7.748491584630042,
"grad_norm": 29339.95703125,
"learning_rate": 1.1259129882502382e-05,
"loss": 0.7092,
"step": 24400
},
{
"epoch": 7.764369641155922,
"grad_norm": 30186.40234375,
"learning_rate": 1.1179739599872976e-05,
"loss": 0.7063,
"step": 24450
},
{
"epoch": 7.780247697681804,
"grad_norm": 30043.974609375,
"learning_rate": 1.110034931724357e-05,
"loss": 0.7053,
"step": 24500
},
{
"epoch": 7.796125754207685,
"grad_norm": 30453.666015625,
"learning_rate": 1.1020959034614164e-05,
"loss": 0.7066,
"step": 24550
},
{
"epoch": 7.812003810733566,
"grad_norm": 30049.93359375,
"learning_rate": 1.0941568751984757e-05,
"loss": 0.71,
"step": 24600
},
{
"epoch": 7.827881867259448,
"grad_norm": 30561.3203125,
"learning_rate": 1.0862178469355351e-05,
"loss": 0.7065,
"step": 24650
},
{
"epoch": 7.843759923785329,
"grad_norm": 30853.423828125,
"learning_rate": 1.0782788186725945e-05,
"loss": 0.706,
"step": 24700
},
{
"epoch": 7.85963798031121,
"grad_norm": 29456.958984375,
"learning_rate": 1.0703397904096539e-05,
"loss": 0.7093,
"step": 24750
},
{
"epoch": 7.875516036837091,
"grad_norm": 31488.560546875,
"learning_rate": 1.0624007621467133e-05,
"loss": 0.7063,
"step": 24800
},
{
"epoch": 7.8913940933629725,
"grad_norm": 30248.27734375,
"learning_rate": 1.0544617338837726e-05,
"loss": 0.7062,
"step": 24850
},
{
"epoch": 7.907272149888854,
"grad_norm": 29547.48828125,
"learning_rate": 1.046522705620832e-05,
"loss": 0.7069,
"step": 24900
},
{
"epoch": 7.923150206414735,
"grad_norm": 29567.212890625,
"learning_rate": 1.0385836773578916e-05,
"loss": 0.7059,
"step": 24950
},
{
"epoch": 7.939028262940616,
"grad_norm": 30756.359375,
"learning_rate": 1.0306446490949508e-05,
"loss": 0.7056,
"step": 25000
},
{
"epoch": 7.954906319466497,
"grad_norm": 29843.1953125,
"learning_rate": 1.0227056208320102e-05,
"loss": 0.7058,
"step": 25050
},
{
"epoch": 7.970784375992379,
"grad_norm": 30317.7734375,
"learning_rate": 1.0147665925690695e-05,
"loss": 0.7062,
"step": 25100
},
{
"epoch": 7.98666243251826,
"grad_norm": 29659.53125,
"learning_rate": 1.006827564306129e-05,
"loss": 0.703,
"step": 25150
},
{
"epoch": 8.002540489044142,
"grad_norm": 30429.38671875,
"learning_rate": 9.988885360431883e-06,
"loss": 0.7054,
"step": 25200
},
{
"epoch": 8.018418545570022,
"grad_norm": 31551.69140625,
"learning_rate": 9.909495077802477e-06,
"loss": 0.7015,
"step": 25250
},
{
"epoch": 8.034296602095903,
"grad_norm": 30067.037109375,
"learning_rate": 9.83010479517307e-06,
"loss": 0.7018,
"step": 25300
},
{
"epoch": 8.050174658621785,
"grad_norm": 30266.275390625,
"learning_rate": 9.750714512543666e-06,
"loss": 0.704,
"step": 25350
},
{
"epoch": 8.066052715147666,
"grad_norm": 30958.685546875,
"learning_rate": 9.67132422991426e-06,
"loss": 0.7018,
"step": 25400
},
{
"epoch": 8.081930771673548,
"grad_norm": 30467.9921875,
"learning_rate": 9.591933947284852e-06,
"loss": 0.7016,
"step": 25450
},
{
"epoch": 8.097808828199428,
"grad_norm": 30163.07421875,
"learning_rate": 9.512543664655446e-06,
"loss": 0.7042,
"step": 25500
},
{
"epoch": 8.11368688472531,
"grad_norm": 31141.931640625,
"learning_rate": 9.43315338202604e-06,
"loss": 0.6998,
"step": 25550
},
{
"epoch": 8.129564941251191,
"grad_norm": 30357.98828125,
"learning_rate": 9.353763099396634e-06,
"loss": 0.7008,
"step": 25600
},
{
"epoch": 8.145442997777073,
"grad_norm": 30259.6171875,
"learning_rate": 9.274372816767227e-06,
"loss": 0.7012,
"step": 25650
},
{
"epoch": 8.161321054302954,
"grad_norm": 29641.35546875,
"learning_rate": 9.194982534137823e-06,
"loss": 0.6998,
"step": 25700
},
{
"epoch": 8.177199110828834,
"grad_norm": 30559.46875,
"learning_rate": 9.115592251508417e-06,
"loss": 0.706,
"step": 25750
},
{
"epoch": 8.193077167354716,
"grad_norm": 30127.62890625,
"learning_rate": 9.03620196887901e-06,
"loss": 0.7063,
"step": 25800
},
{
"epoch": 8.208955223880597,
"grad_norm": 30027.09375,
"learning_rate": 8.956811686249604e-06,
"loss": 0.7025,
"step": 25850
},
{
"epoch": 8.224833280406479,
"grad_norm": 30069.939453125,
"learning_rate": 8.877421403620196e-06,
"loss": 0.7002,
"step": 25900
},
{
"epoch": 8.24071133693236,
"grad_norm": 30299.6171875,
"learning_rate": 8.79803112099079e-06,
"loss": 0.6996,
"step": 25950
},
{
"epoch": 8.25658939345824,
"grad_norm": 30868.3671875,
"learning_rate": 8.718640838361384e-06,
"loss": 0.7012,
"step": 26000
},
{
"epoch": 8.272467449984122,
"grad_norm": 30317.115234375,
"learning_rate": 8.639250555731978e-06,
"loss": 0.7003,
"step": 26050
},
{
"epoch": 8.288345506510003,
"grad_norm": 30630.19921875,
"learning_rate": 8.559860273102573e-06,
"loss": 0.7023,
"step": 26100
},
{
"epoch": 8.304223563035885,
"grad_norm": 31124.72265625,
"learning_rate": 8.480469990473167e-06,
"loss": 0.6989,
"step": 26150
},
{
"epoch": 8.320101619561765,
"grad_norm": 30239.1875,
"learning_rate": 8.401079707843761e-06,
"loss": 0.7013,
"step": 26200
},
{
"epoch": 8.335979676087646,
"grad_norm": 30741.263671875,
"learning_rate": 8.321689425214355e-06,
"loss": 0.7029,
"step": 26250
},
{
"epoch": 8.351857732613528,
"grad_norm": 29795.580078125,
"learning_rate": 8.242299142584949e-06,
"loss": 0.6995,
"step": 26300
},
{
"epoch": 8.36773578913941,
"grad_norm": 30709.451171875,
"learning_rate": 8.16290885995554e-06,
"loss": 0.7009,
"step": 26350
},
{
"epoch": 8.383613845665291,
"grad_norm": 30076.46875,
"learning_rate": 8.083518577326135e-06,
"loss": 0.6988,
"step": 26400
},
{
"epoch": 8.399491902191173,
"grad_norm": 29763.3828125,
"learning_rate": 8.004128294696728e-06,
"loss": 0.7028,
"step": 26450
},
{
"epoch": 8.415369958717053,
"grad_norm": 30590.60546875,
"learning_rate": 7.924738012067324e-06,
"loss": 0.7067,
"step": 26500
},
{
"epoch": 8.431248015242934,
"grad_norm": 30087.3671875,
"learning_rate": 7.845347729437918e-06,
"loss": 0.702,
"step": 26550
},
{
"epoch": 8.447126071768816,
"grad_norm": 30778.0859375,
"learning_rate": 7.765957446808511e-06,
"loss": 0.7028,
"step": 26600
},
{
"epoch": 8.463004128294697,
"grad_norm": 30542.35546875,
"learning_rate": 7.686567164179105e-06,
"loss": 0.6996,
"step": 26650
},
{
"epoch": 8.478882184820577,
"grad_norm": 30533.51171875,
"learning_rate": 7.607176881549699e-06,
"loss": 0.7023,
"step": 26700
},
{
"epoch": 8.494760241346459,
"grad_norm": 30710.169921875,
"learning_rate": 7.527786598920293e-06,
"loss": 0.6998,
"step": 26750
},
{
"epoch": 8.51063829787234,
"grad_norm": 30740.416015625,
"learning_rate": 7.448396316290886e-06,
"loss": 0.7036,
"step": 26800
},
{
"epoch": 8.526516354398222,
"grad_norm": 29898.5390625,
"learning_rate": 7.36900603366148e-06,
"loss": 0.7007,
"step": 26850
},
{
"epoch": 8.542394410924103,
"grad_norm": 30001.548828125,
"learning_rate": 7.2896157510320734e-06,
"loss": 0.6958,
"step": 26900
},
{
"epoch": 8.558272467449985,
"grad_norm": 30819.171875,
"learning_rate": 7.210225468402667e-06,
"loss": 0.6976,
"step": 26950
},
{
"epoch": 8.574150523975865,
"grad_norm": 30720.3125,
"learning_rate": 7.130835185773262e-06,
"loss": 0.6985,
"step": 27000
},
{
"epoch": 8.590028580501746,
"grad_norm": 31220.265625,
"learning_rate": 7.051444903143856e-06,
"loss": 0.7008,
"step": 27050
},
{
"epoch": 8.605906637027628,
"grad_norm": 31095.111328125,
"learning_rate": 6.9720546205144495e-06,
"loss": 0.7011,
"step": 27100
},
{
"epoch": 8.62178469355351,
"grad_norm": 30474.576171875,
"learning_rate": 6.892664337885043e-06,
"loss": 0.6994,
"step": 27150
},
{
"epoch": 8.63766275007939,
"grad_norm": 30038.474609375,
"learning_rate": 6.813274055255638e-06,
"loss": 0.7002,
"step": 27200
},
{
"epoch": 8.653540806605271,
"grad_norm": 30336.1953125,
"learning_rate": 6.73388377262623e-06,
"loss": 0.6994,
"step": 27250
},
{
"epoch": 8.669418863131153,
"grad_norm": 30122.53125,
"learning_rate": 6.654493489996824e-06,
"loss": 0.7004,
"step": 27300
},
{
"epoch": 8.685296919657034,
"grad_norm": 30615.431640625,
"learning_rate": 6.575103207367419e-06,
"loss": 0.7021,
"step": 27350
},
{
"epoch": 8.701174976182916,
"grad_norm": 30339.583984375,
"learning_rate": 6.495712924738012e-06,
"loss": 0.7007,
"step": 27400
},
{
"epoch": 8.717053032708796,
"grad_norm": 30189.884765625,
"learning_rate": 6.416322642108606e-06,
"loss": 0.6986,
"step": 27450
},
{
"epoch": 8.732931089234677,
"grad_norm": 30549.412109375,
"learning_rate": 6.3369323594792e-06,
"loss": 0.6996,
"step": 27500
},
{
"epoch": 8.748809145760559,
"grad_norm": 30457.353515625,
"learning_rate": 6.257542076849794e-06,
"loss": 0.6991,
"step": 27550
},
{
"epoch": 8.76468720228644,
"grad_norm": 31163.732421875,
"learning_rate": 6.178151794220388e-06,
"loss": 0.702,
"step": 27600
},
{
"epoch": 8.780565258812322,
"grad_norm": 29878.865234375,
"learning_rate": 6.0987615115909814e-06,
"loss": 0.6999,
"step": 27650
},
{
"epoch": 8.796443315338202,
"grad_norm": 30267.07421875,
"learning_rate": 6.019371228961575e-06,
"loss": 0.7014,
"step": 27700
},
{
"epoch": 8.812321371864083,
"grad_norm": 30354.353515625,
"learning_rate": 5.939980946332169e-06,
"loss": 0.6982,
"step": 27750
},
{
"epoch": 8.828199428389965,
"grad_norm": 30582.365234375,
"learning_rate": 5.860590663702763e-06,
"loss": 0.7002,
"step": 27800
},
{
"epoch": 8.844077484915847,
"grad_norm": 30702.970703125,
"learning_rate": 5.781200381073357e-06,
"loss": 0.6975,
"step": 27850
},
{
"epoch": 8.859955541441728,
"grad_norm": 31331.51953125,
"learning_rate": 5.7018100984439505e-06,
"loss": 0.6966,
"step": 27900
},
{
"epoch": 8.875833597967608,
"grad_norm": 31188.05859375,
"learning_rate": 5.622419815814544e-06,
"loss": 0.6976,
"step": 27950
},
{
"epoch": 8.89171165449349,
"grad_norm": 30835.2578125,
"learning_rate": 5.543029533185139e-06,
"loss": 0.6985,
"step": 28000
},
{
"epoch": 8.907589711019371,
"grad_norm": 30251.89453125,
"learning_rate": 5.463639250555732e-06,
"loss": 0.7002,
"step": 28050
},
{
"epoch": 8.923467767545253,
"grad_norm": 30508.73828125,
"learning_rate": 5.384248967926326e-06,
"loss": 0.6986,
"step": 28100
},
{
"epoch": 8.939345824071134,
"grad_norm": 30640.4375,
"learning_rate": 5.3048586852969195e-06,
"loss": 0.7019,
"step": 28150
},
{
"epoch": 8.955223880597014,
"grad_norm": 31159.490234375,
"learning_rate": 5.225468402667514e-06,
"loss": 0.6987,
"step": 28200
},
{
"epoch": 8.971101937122896,
"grad_norm": 30732.70703125,
"learning_rate": 5.146078120038107e-06,
"loss": 0.6996,
"step": 28250
},
{
"epoch": 8.986979993648777,
"grad_norm": 30562.67578125,
"learning_rate": 5.066687837408701e-06,
"loss": 0.7028,
"step": 28300
},
{
"epoch": 9.002858050174659,
"grad_norm": 31258.55078125,
"learning_rate": 4.987297554779296e-06,
"loss": 0.6952,
"step": 28350
},
{
"epoch": 9.01873610670054,
"grad_norm": 30664.671875,
"learning_rate": 4.9079072721498894e-06,
"loss": 0.6945,
"step": 28400
},
{
"epoch": 9.03461416322642,
"grad_norm": 31010.13671875,
"learning_rate": 4.828516989520483e-06,
"loss": 0.6963,
"step": 28450
},
{
"epoch": 9.050492219752302,
"grad_norm": 31770.1484375,
"learning_rate": 4.749126706891076e-06,
"loss": 0.6951,
"step": 28500
},
{
"epoch": 9.066370276278183,
"grad_norm": 30270.96484375,
"learning_rate": 4.669736424261671e-06,
"loss": 0.6962,
"step": 28550
},
{
"epoch": 9.082248332804065,
"grad_norm": 31095.724609375,
"learning_rate": 4.590346141632265e-06,
"loss": 0.6951,
"step": 28600
},
{
"epoch": 9.098126389329947,
"grad_norm": 30564.689453125,
"learning_rate": 4.5109558590028585e-06,
"loss": 0.6958,
"step": 28650
},
{
"epoch": 9.114004445855826,
"grad_norm": 30729.578125,
"learning_rate": 4.4315655763734514e-06,
"loss": 0.6965,
"step": 28700
},
{
"epoch": 9.129882502381708,
"grad_norm": 30804.77734375,
"learning_rate": 4.352175293744046e-06,
"loss": 0.6987,
"step": 28750
},
{
"epoch": 9.14576055890759,
"grad_norm": 30098.283203125,
"learning_rate": 4.27278501111464e-06,
"loss": 0.693,
"step": 28800
},
{
"epoch": 9.161638615433471,
"grad_norm": 31217.767578125,
"learning_rate": 4.193394728485234e-06,
"loss": 0.6952,
"step": 28850
},
{
"epoch": 9.177516671959353,
"grad_norm": 30664.6953125,
"learning_rate": 4.1140044458558275e-06,
"loss": 0.6961,
"step": 28900
},
{
"epoch": 9.193394728485233,
"grad_norm": 30483.142578125,
"learning_rate": 4.034614163226421e-06,
"loss": 0.6929,
"step": 28950
},
{
"epoch": 9.209272785011114,
"grad_norm": 30538.34765625,
"learning_rate": 3.955223880597015e-06,
"loss": 0.6964,
"step": 29000
},
{
"epoch": 9.225150841536996,
"grad_norm": 31039.91796875,
"learning_rate": 3.875833597967609e-06,
"loss": 0.6946,
"step": 29050
},
{
"epoch": 9.241028898062877,
"grad_norm": 30256.009765625,
"learning_rate": 3.796443315338203e-06,
"loss": 0.6946,
"step": 29100
},
{
"epoch": 9.256906954588759,
"grad_norm": 31257.78125,
"learning_rate": 3.717053032708796e-06,
"loss": 0.6942,
"step": 29150
},
{
"epoch": 9.272785011114639,
"grad_norm": 30951.634765625,
"learning_rate": 3.6376627500793904e-06,
"loss": 0.6944,
"step": 29200
},
{
"epoch": 9.28866306764052,
"grad_norm": 30461.291015625,
"learning_rate": 3.558272467449984e-06,
"loss": 0.6942,
"step": 29250
},
{
"epoch": 9.304541124166402,
"grad_norm": 30459.7109375,
"learning_rate": 3.4788821848205784e-06,
"loss": 0.699,
"step": 29300
},
{
"epoch": 9.320419180692284,
"grad_norm": 30297.685546875,
"learning_rate": 3.3994919021911722e-06,
"loss": 0.6909,
"step": 29350
},
{
"epoch": 9.336297237218165,
"grad_norm": 30345.24609375,
"learning_rate": 3.3201016195617656e-06,
"loss": 0.6962,
"step": 29400
},
{
"epoch": 9.352175293744045,
"grad_norm": 30085.6484375,
"learning_rate": 3.2407113369323594e-06,
"loss": 0.6957,
"step": 29450
},
{
"epoch": 9.368053350269927,
"grad_norm": 31095.689453125,
"learning_rate": 3.1613210543029537e-06,
"loss": 0.698,
"step": 29500
},
{
"epoch": 9.383931406795808,
"grad_norm": 30798.33203125,
"learning_rate": 3.081930771673547e-06,
"loss": 0.6938,
"step": 29550
},
{
"epoch": 9.39980946332169,
"grad_norm": 30953.390625,
"learning_rate": 3.0025404890441413e-06,
"loss": 0.6934,
"step": 29600
},
{
"epoch": 9.415687519847571,
"grad_norm": 30614.728515625,
"learning_rate": 2.9231502064147347e-06,
"loss": 0.6936,
"step": 29650
},
{
"epoch": 9.431565576373451,
"grad_norm": 30555.818359375,
"learning_rate": 2.843759923785329e-06,
"loss": 0.6925,
"step": 29700
},
{
"epoch": 9.447443632899333,
"grad_norm": 30385.14453125,
"learning_rate": 2.7643696411559227e-06,
"loss": 0.6989,
"step": 29750
},
{
"epoch": 9.463321689425214,
"grad_norm": 30924.46875,
"learning_rate": 2.6849793585265165e-06,
"loss": 0.697,
"step": 29800
},
{
"epoch": 9.479199745951096,
"grad_norm": 31371.728515625,
"learning_rate": 2.6055890758971103e-06,
"loss": 0.6955,
"step": 29850
},
{
"epoch": 9.495077802476978,
"grad_norm": 30738.732421875,
"learning_rate": 2.526198793267704e-06,
"loss": 0.6945,
"step": 29900
},
{
"epoch": 9.510955859002857,
"grad_norm": 30800.96484375,
"learning_rate": 2.446808510638298e-06,
"loss": 0.6951,
"step": 29950
},
{
"epoch": 9.526833915528739,
"grad_norm": 30367.607421875,
"learning_rate": 2.3674182280088918e-06,
"loss": 0.6975,
"step": 30000
},
{
"epoch": 9.54271197205462,
"grad_norm": 30162.599609375,
"learning_rate": 2.2880279453794856e-06,
"loss": 0.6949,
"step": 30050
},
{
"epoch": 9.558590028580502,
"grad_norm": 30217.197265625,
"learning_rate": 2.2086376627500794e-06,
"loss": 0.6921,
"step": 30100
},
{
"epoch": 9.574468085106384,
"grad_norm": 30000.412109375,
"learning_rate": 2.129247380120673e-06,
"loss": 0.6967,
"step": 30150
},
{
"epoch": 9.590346141632264,
"grad_norm": 30676.177734375,
"learning_rate": 2.0498570974912674e-06,
"loss": 0.6957,
"step": 30200
},
{
"epoch": 9.606224198158145,
"grad_norm": 30816.064453125,
"learning_rate": 1.970466814861861e-06,
"loss": 0.6944,
"step": 30250
},
{
"epoch": 9.622102254684027,
"grad_norm": 30859.091796875,
"learning_rate": 1.891076532232455e-06,
"loss": 0.695,
"step": 30300
},
{
"epoch": 9.637980311209908,
"grad_norm": 30824.66015625,
"learning_rate": 1.8116862496030486e-06,
"loss": 0.6955,
"step": 30350
},
{
"epoch": 9.65385836773579,
"grad_norm": 31179.345703125,
"learning_rate": 1.7322959669736427e-06,
"loss": 0.692,
"step": 30400
},
{
"epoch": 9.66973642426167,
"grad_norm": 29985.033203125,
"learning_rate": 1.6529056843442363e-06,
"loss": 0.6961,
"step": 30450
},
{
"epoch": 9.685614480787551,
"grad_norm": 29948.798828125,
"learning_rate": 1.5735154017148303e-06,
"loss": 0.6948,
"step": 30500
},
{
"epoch": 9.701492537313433,
"grad_norm": 30504.94140625,
"learning_rate": 1.494125119085424e-06,
"loss": 0.6955,
"step": 30550
},
{
"epoch": 9.717370593839314,
"grad_norm": 30339.3515625,
"learning_rate": 1.414734836456018e-06,
"loss": 0.6962,
"step": 30600
},
{
"epoch": 9.733248650365196,
"grad_norm": 30103.78125,
"learning_rate": 1.3353445538266117e-06,
"loss": 0.6953,
"step": 30650
},
{
"epoch": 9.749126706891076,
"grad_norm": 30390.306640625,
"learning_rate": 1.2559542711972055e-06,
"loss": 0.6962,
"step": 30700
},
{
"epoch": 9.765004763416957,
"grad_norm": 30574.888671875,
"learning_rate": 1.1765639885677993e-06,
"loss": 0.6963,
"step": 30750
},
{
"epoch": 9.780882819942839,
"grad_norm": 30706.4765625,
"learning_rate": 1.0971737059383931e-06,
"loss": 0.6928,
"step": 30800
},
{
"epoch": 9.79676087646872,
"grad_norm": 29788.431640625,
"learning_rate": 1.0177834233089872e-06,
"loss": 0.6918,
"step": 30850
},
{
"epoch": 9.812638932994602,
"grad_norm": 30614.6875,
"learning_rate": 9.383931406795809e-07,
"loss": 0.6931,
"step": 30900
},
{
"epoch": 9.828516989520482,
"grad_norm": 30468.765625,
"learning_rate": 8.590028580501748e-07,
"loss": 0.6976,
"step": 30950
},
{
"epoch": 9.844395046046364,
"grad_norm": 29652.828125,
"learning_rate": 7.796125754207686e-07,
"loss": 0.6938,
"step": 31000
},
{
"epoch": 9.860273102572245,
"grad_norm": 31199.251953125,
"learning_rate": 7.002222927913624e-07,
"loss": 0.6965,
"step": 31050
},
{
"epoch": 9.876151159098127,
"grad_norm": 30488.6796875,
"learning_rate": 6.208320101619562e-07,
"loss": 0.6938,
"step": 31100
},
{
"epoch": 9.892029215624008,
"grad_norm": 30915.556640625,
"learning_rate": 5.4144172753255e-07,
"loss": 0.6951,
"step": 31150
},
{
"epoch": 9.907907272149888,
"grad_norm": 30898.82421875,
"learning_rate": 4.620514449031439e-07,
"loss": 0.6929,
"step": 31200
},
{
"epoch": 9.92378532867577,
"grad_norm": 31000.388671875,
"learning_rate": 3.826611622737377e-07,
"loss": 0.6931,
"step": 31250
},
{
"epoch": 9.939663385201651,
"grad_norm": 30619.94140625,
"learning_rate": 3.0327087964433156e-07,
"loss": 0.6963,
"step": 31300
},
{
"epoch": 9.955541441727533,
"grad_norm": 30345.3515625,
"learning_rate": 2.238805970149254e-07,
"loss": 0.6932,
"step": 31350
},
{
"epoch": 9.971419498253415,
"grad_norm": 31431.78515625,
"learning_rate": 1.444903143855192e-07,
"loss": 0.6936,
"step": 31400
},
{
"epoch": 9.987297554779294,
"grad_norm": 30420.556640625,
"learning_rate": 6.510003175611305e-08,
"loss": 0.6958,
"step": 31450
},
{
"epoch": 10.0,
"step": 31490,
"total_flos": 1.05298598559744e+18,
"train_loss": 0.8101965235240424,
"train_runtime": 86746.076,
"train_samples_per_second": 23.228,
"train_steps_per_second": 0.363
},
{
"epoch": 10.003175611305176,
"grad_norm": 34598.17578125,
"learning_rate": 4.9992854874563356e-05,
"loss": 0.7254,
"step": 31500
},
{
"epoch": 10.019053667831058,
"grad_norm": 32862.16015625,
"learning_rate": 4.995315973324865e-05,
"loss": 0.7148,
"step": 31550
},
{
"epoch": 10.03493172435694,
"grad_norm": 34942.2890625,
"learning_rate": 4.991346459193395e-05,
"loss": 0.7129,
"step": 31600
},
{
"epoch": 10.05080978088282,
"grad_norm": 32553.541015625,
"learning_rate": 4.987376945061924e-05,
"loss": 0.7192,
"step": 31650
},
{
"epoch": 10.0666878374087,
"grad_norm": 32205.275390625,
"learning_rate": 4.983407430930454e-05,
"loss": 0.719,
"step": 31700
},
{
"epoch": 10.082565893934582,
"grad_norm": 32618.0078125,
"learning_rate": 4.9794379167989844e-05,
"loss": 0.7201,
"step": 31750
},
{
"epoch": 10.098443950460464,
"grad_norm": 31251.701171875,
"learning_rate": 4.975468402667514e-05,
"loss": 0.7209,
"step": 31800
},
{
"epoch": 10.114322006986345,
"grad_norm": 30804.701171875,
"learning_rate": 4.971498888536044e-05,
"loss": 0.7199,
"step": 31850
},
{
"epoch": 10.130200063512227,
"grad_norm": 31691.65234375,
"learning_rate": 4.967529374404573e-05,
"loss": 0.7242,
"step": 31900
},
{
"epoch": 10.146078120038107,
"grad_norm": 31379.8046875,
"learning_rate": 4.9635598602731024e-05,
"loss": 0.7244,
"step": 31950
},
{
"epoch": 10.161956176563988,
"grad_norm": 31711.45703125,
"learning_rate": 4.9595903461416325e-05,
"loss": 0.7231,
"step": 32000
},
{
"epoch": 10.17783423308987,
"grad_norm": 30201.376953125,
"learning_rate": 4.955620832010162e-05,
"loss": 0.7212,
"step": 32050
},
{
"epoch": 10.193712289615751,
"grad_norm": 30604.0234375,
"learning_rate": 4.951651317878692e-05,
"loss": 0.7217,
"step": 32100
},
{
"epoch": 10.209590346141633,
"grad_norm": 30721.6171875,
"learning_rate": 4.947681803747222e-05,
"loss": 0.7186,
"step": 32150
},
{
"epoch": 10.225468402667513,
"grad_norm": 30430.392578125,
"learning_rate": 4.943712289615751e-05,
"loss": 0.7247,
"step": 32200
},
{
"epoch": 10.241346459193394,
"grad_norm": 29437.306640625,
"learning_rate": 4.939742775484281e-05,
"loss": 0.7216,
"step": 32250
},
{
"epoch": 10.257224515719276,
"grad_norm": 31316.037109375,
"learning_rate": 4.9357732613528106e-05,
"loss": 0.7232,
"step": 32300
},
{
"epoch": 10.273102572245158,
"grad_norm": 30285.005859375,
"learning_rate": 4.93180374722134e-05,
"loss": 0.7232,
"step": 32350
},
{
"epoch": 10.28898062877104,
"grad_norm": 30771.517578125,
"learning_rate": 4.92783423308987e-05,
"loss": 0.7211,
"step": 32400
},
{
"epoch": 10.304858685296919,
"grad_norm": 31089.01953125,
"learning_rate": 4.9238647189584e-05,
"loss": 0.719,
"step": 32450
},
{
"epoch": 10.3207367418228,
"grad_norm": 29312.451171875,
"learning_rate": 4.9198952048269294e-05,
"loss": 0.7217,
"step": 32500
},
{
"epoch": 10.336614798348682,
"grad_norm": 29717.74609375,
"learning_rate": 4.9159256906954594e-05,
"loss": 0.7204,
"step": 32550
},
{
"epoch": 10.352492854874564,
"grad_norm": 29353.939453125,
"learning_rate": 4.911956176563989e-05,
"loss": 0.7209,
"step": 32600
},
{
"epoch": 10.368370911400445,
"grad_norm": 30583.625,
"learning_rate": 4.907986662432518e-05,
"loss": 0.719,
"step": 32650
},
{
"epoch": 10.384248967926325,
"grad_norm": 30315.1484375,
"learning_rate": 4.904017148301048e-05,
"loss": 0.7224,
"step": 32700
},
{
"epoch": 10.400127024452207,
"grad_norm": 28785.408203125,
"learning_rate": 4.9000476341695775e-05,
"loss": 0.7227,
"step": 32750
},
{
"epoch": 10.416005080978088,
"grad_norm": 29400.603515625,
"learning_rate": 4.8960781200381075e-05,
"loss": 0.7191,
"step": 32800
},
{
"epoch": 10.43188313750397,
"grad_norm": 28836.03125,
"learning_rate": 4.8921086059066375e-05,
"loss": 0.7191,
"step": 32850
},
{
"epoch": 10.447761194029852,
"grad_norm": 29112.181640625,
"learning_rate": 4.888139091775167e-05,
"loss": 0.7196,
"step": 32900
},
{
"epoch": 10.463639250555731,
"grad_norm": 29085.232421875,
"learning_rate": 4.884169577643697e-05,
"loss": 0.7202,
"step": 32950
},
{
"epoch": 10.479517307081613,
"grad_norm": 29157.4609375,
"learning_rate": 4.880200063512226e-05,
"loss": 0.7185,
"step": 33000
},
{
"epoch": 10.495395363607495,
"grad_norm": 29475.744140625,
"learning_rate": 4.8762305493807556e-05,
"loss": 0.7221,
"step": 33050
},
{
"epoch": 10.511273420133376,
"grad_norm": 29044.5703125,
"learning_rate": 4.8722610352492857e-05,
"loss": 0.7194,
"step": 33100
},
{
"epoch": 10.527151476659256,
"grad_norm": 28844.75390625,
"learning_rate": 4.868291521117816e-05,
"loss": 0.7178,
"step": 33150
},
{
"epoch": 10.543029533185138,
"grad_norm": 28828.00390625,
"learning_rate": 4.864322006986345e-05,
"loss": 0.7173,
"step": 33200
},
{
"epoch": 10.55890758971102,
"grad_norm": 28988.736328125,
"learning_rate": 4.860352492854875e-05,
"loss": 0.7163,
"step": 33250
},
{
"epoch": 10.5747856462369,
"grad_norm": 27665.376953125,
"learning_rate": 4.8563829787234044e-05,
"loss": 0.7177,
"step": 33300
},
{
"epoch": 10.590663702762782,
"grad_norm": 28359.263671875,
"learning_rate": 4.852413464591934e-05,
"loss": 0.7215,
"step": 33350
},
{
"epoch": 10.606541759288664,
"grad_norm": 28809.84375,
"learning_rate": 4.848443950460464e-05,
"loss": 0.7186,
"step": 33400
},
{
"epoch": 10.622419815814544,
"grad_norm": 27395.1171875,
"learning_rate": 4.844474436328993e-05,
"loss": 0.7172,
"step": 33450
},
{
"epoch": 10.638297872340425,
"grad_norm": 27581.38671875,
"learning_rate": 4.840504922197523e-05,
"loss": 0.721,
"step": 33500
},
{
"epoch": 10.654175928866307,
"grad_norm": 27007.13671875,
"learning_rate": 4.836535408066053e-05,
"loss": 0.7198,
"step": 33550
},
{
"epoch": 10.670053985392189,
"grad_norm": 28538.033203125,
"learning_rate": 4.8325658939345826e-05,
"loss": 0.717,
"step": 33600
},
{
"epoch": 10.685932041918068,
"grad_norm": 28231.92578125,
"learning_rate": 4.8285963798031126e-05,
"loss": 0.7173,
"step": 33650
},
{
"epoch": 10.70181009844395,
"grad_norm": 27481.81640625,
"learning_rate": 4.824626865671642e-05,
"loss": 0.7179,
"step": 33700
},
{
"epoch": 10.717688154969832,
"grad_norm": 29380.140625,
"learning_rate": 4.820657351540171e-05,
"loss": 0.7185,
"step": 33750
},
{
"epoch": 10.733566211495713,
"grad_norm": 27961.125,
"learning_rate": 4.816687837408701e-05,
"loss": 0.7158,
"step": 33800
},
{
"epoch": 10.749444268021595,
"grad_norm": 28789.4453125,
"learning_rate": 4.8127183232772314e-05,
"loss": 0.7184,
"step": 33850
},
{
"epoch": 10.765322324547476,
"grad_norm": 28526.916015625,
"learning_rate": 4.808748809145761e-05,
"loss": 0.719,
"step": 33900
},
{
"epoch": 10.781200381073356,
"grad_norm": 28261.802734375,
"learning_rate": 4.804779295014291e-05,
"loss": 0.7155,
"step": 33950
},
{
"epoch": 10.797078437599238,
"grad_norm": 28534.755859375,
"learning_rate": 4.80080978088282e-05,
"loss": 0.7169,
"step": 34000
},
{
"epoch": 10.81295649412512,
"grad_norm": 28207.771484375,
"learning_rate": 4.79684026675135e-05,
"loss": 0.713,
"step": 34050
},
{
"epoch": 10.828834550651,
"grad_norm": 27474.365234375,
"learning_rate": 4.7928707526198795e-05,
"loss": 0.7183,
"step": 34100
},
{
"epoch": 10.84471260717688,
"grad_norm": 28205.083984375,
"learning_rate": 4.788901238488409e-05,
"loss": 0.7189,
"step": 34150
},
{
"epoch": 10.860590663702762,
"grad_norm": 27502.6015625,
"learning_rate": 4.784931724356939e-05,
"loss": 0.7161,
"step": 34200
},
{
"epoch": 10.876468720228644,
"grad_norm": 27558.001953125,
"learning_rate": 4.780962210225469e-05,
"loss": 0.7159,
"step": 34250
},
{
"epoch": 10.892346776754525,
"grad_norm": 28145.595703125,
"learning_rate": 4.776992696093998e-05,
"loss": 0.7111,
"step": 34300
},
{
"epoch": 10.908224833280407,
"grad_norm": 27374.37109375,
"learning_rate": 4.773023181962528e-05,
"loss": 0.718,
"step": 34350
},
{
"epoch": 10.924102889806289,
"grad_norm": 27972.486328125,
"learning_rate": 4.7690536678310576e-05,
"loss": 0.7195,
"step": 34400
},
{
"epoch": 10.939980946332168,
"grad_norm": 27874.939453125,
"learning_rate": 4.765084153699587e-05,
"loss": 0.717,
"step": 34450
},
{
"epoch": 10.95585900285805,
"grad_norm": 27341.05859375,
"learning_rate": 4.761114639568117e-05,
"loss": 0.716,
"step": 34500
},
{
"epoch": 10.971737059383932,
"grad_norm": 28336.388671875,
"learning_rate": 4.757145125436647e-05,
"loss": 0.7177,
"step": 34550
},
{
"epoch": 10.987615115909813,
"grad_norm": 26653.798828125,
"learning_rate": 4.753175611305177e-05,
"loss": 0.7111,
"step": 34600
},
{
"epoch": 11.003493172435693,
"grad_norm": 27900.7265625,
"learning_rate": 4.7492060971737064e-05,
"loss": 0.7124,
"step": 34650
},
{
"epoch": 11.019371228961575,
"grad_norm": 27794.94921875,
"learning_rate": 4.745236583042236e-05,
"loss": 0.7103,
"step": 34700
},
{
"epoch": 11.035249285487456,
"grad_norm": 27183.654296875,
"learning_rate": 4.741267068910766e-05,
"loss": 0.7052,
"step": 34750
},
{
"epoch": 11.051127342013338,
"grad_norm": 27468.392578125,
"learning_rate": 4.737297554779295e-05,
"loss": 0.7084,
"step": 34800
},
{
"epoch": 11.06700539853922,
"grad_norm": 26358.43359375,
"learning_rate": 4.7333280406478245e-05,
"loss": 0.7038,
"step": 34850
},
{
"epoch": 11.0828834550651,
"grad_norm": 26982.095703125,
"learning_rate": 4.7293585265163545e-05,
"loss": 0.7023,
"step": 34900
},
{
"epoch": 11.09876151159098,
"grad_norm": 27181.15625,
"learning_rate": 4.7253890123848845e-05,
"loss": 0.7062,
"step": 34950
},
{
"epoch": 11.114639568116862,
"grad_norm": 27455.166015625,
"learning_rate": 4.721419498253414e-05,
"loss": 0.7088,
"step": 35000
},
{
"epoch": 11.130517624642744,
"grad_norm": 27479.68359375,
"learning_rate": 4.717449984121944e-05,
"loss": 0.707,
"step": 35050
},
{
"epoch": 11.146395681168626,
"grad_norm": 28365.30859375,
"learning_rate": 4.713480469990473e-05,
"loss": 0.7052,
"step": 35100
},
{
"epoch": 11.162273737694505,
"grad_norm": 27870.494140625,
"learning_rate": 4.7095109558590026e-05,
"loss": 0.7089,
"step": 35150
},
{
"epoch": 11.178151794220387,
"grad_norm": 27870.310546875,
"learning_rate": 4.7055414417275327e-05,
"loss": 0.7066,
"step": 35200
},
{
"epoch": 11.194029850746269,
"grad_norm": 28179.03125,
"learning_rate": 4.701571927596063e-05,
"loss": 0.7069,
"step": 35250
},
{
"epoch": 11.20990790727215,
"grad_norm": 27911.45703125,
"learning_rate": 4.697602413464593e-05,
"loss": 0.707,
"step": 35300
},
{
"epoch": 11.225785963798032,
"grad_norm": 26871.20703125,
"learning_rate": 4.693632899333122e-05,
"loss": 0.7073,
"step": 35350
},
{
"epoch": 11.241664020323912,
"grad_norm": 27323.912109375,
"learning_rate": 4.6896633852016514e-05,
"loss": 0.7095,
"step": 35400
},
{
"epoch": 11.257542076849793,
"grad_norm": 27162.650390625,
"learning_rate": 4.6856938710701815e-05,
"loss": 0.7047,
"step": 35450
},
{
"epoch": 11.273420133375675,
"grad_norm": 26953.41796875,
"learning_rate": 4.681724356938711e-05,
"loss": 0.7061,
"step": 35500
},
{
"epoch": 11.289298189901556,
"grad_norm": 26672.6953125,
"learning_rate": 4.67775484280724e-05,
"loss": 0.7059,
"step": 35550
},
{
"epoch": 11.305176246427438,
"grad_norm": 27072.08203125,
"learning_rate": 4.67378532867577e-05,
"loss": 0.7047,
"step": 35600
},
{
"epoch": 11.321054302953318,
"grad_norm": 26242.37109375,
"learning_rate": 4.6698158145443e-05,
"loss": 0.7042,
"step": 35650
},
{
"epoch": 11.3369323594792,
"grad_norm": 26593.7890625,
"learning_rate": 4.6658463004128296e-05,
"loss": 0.7022,
"step": 35700
},
{
"epoch": 11.352810416005081,
"grad_norm": 26419.05078125,
"learning_rate": 4.6618767862813596e-05,
"loss": 0.7077,
"step": 35750
},
{
"epoch": 11.368688472530962,
"grad_norm": 26334.150390625,
"learning_rate": 4.657907272149889e-05,
"loss": 0.7084,
"step": 35800
},
{
"epoch": 11.384566529056844,
"grad_norm": 26394.89453125,
"learning_rate": 4.653937758018419e-05,
"loss": 0.7064,
"step": 35850
},
{
"epoch": 11.400444585582724,
"grad_norm": 26715.767578125,
"learning_rate": 4.649968243886948e-05,
"loss": 0.7055,
"step": 35900
},
{
"epoch": 11.416322642108605,
"grad_norm": 27197.98828125,
"learning_rate": 4.645998729755478e-05,
"loss": 0.708,
"step": 35950
},
{
"epoch": 11.432200698634487,
"grad_norm": 27616.12109375,
"learning_rate": 4.6420292156240084e-05,
"loss": 0.708,
"step": 36000
},
{
"epoch": 11.448078755160369,
"grad_norm": 26575.294921875,
"learning_rate": 4.638059701492538e-05,
"loss": 0.7039,
"step": 36050
},
{
"epoch": 11.46395681168625,
"grad_norm": 26070.689453125,
"learning_rate": 4.634090187361067e-05,
"loss": 0.7075,
"step": 36100
},
{
"epoch": 11.47983486821213,
"grad_norm": 26675.865234375,
"learning_rate": 4.630120673229597e-05,
"loss": 0.7058,
"step": 36150
},
{
"epoch": 11.495712924738012,
"grad_norm": 26788.8671875,
"learning_rate": 4.6261511590981265e-05,
"loss": 0.7056,
"step": 36200
},
{
"epoch": 11.511590981263893,
"grad_norm": 26736.013671875,
"learning_rate": 4.622181644966656e-05,
"loss": 0.7049,
"step": 36250
},
{
"epoch": 11.527469037789775,
"grad_norm": 27259.62109375,
"learning_rate": 4.618212130835186e-05,
"loss": 0.7033,
"step": 36300
},
{
"epoch": 11.543347094315656,
"grad_norm": 26500.08984375,
"learning_rate": 4.614242616703716e-05,
"loss": 0.7055,
"step": 36350
},
{
"epoch": 11.559225150841536,
"grad_norm": 26374.705078125,
"learning_rate": 4.610273102572246e-05,
"loss": 0.7017,
"step": 36400
},
{
"epoch": 11.575103207367418,
"grad_norm": 27614.83984375,
"learning_rate": 4.606303588440775e-05,
"loss": 0.7059,
"step": 36450
},
{
"epoch": 11.5909812638933,
"grad_norm": 25694.1015625,
"learning_rate": 4.6023340743093046e-05,
"loss": 0.7048,
"step": 36500
},
{
"epoch": 11.606859320419181,
"grad_norm": 26899.56640625,
"learning_rate": 4.5983645601778346e-05,
"loss": 0.7045,
"step": 36550
},
{
"epoch": 11.622737376945063,
"grad_norm": 25786.490234375,
"learning_rate": 4.594395046046364e-05,
"loss": 0.7038,
"step": 36600
},
{
"epoch": 11.638615433470942,
"grad_norm": 26447.548828125,
"learning_rate": 4.5904255319148933e-05,
"loss": 0.7031,
"step": 36650
},
{
"epoch": 11.654493489996824,
"grad_norm": 25961.685546875,
"learning_rate": 4.5864560177834234e-05,
"loss": 0.7059,
"step": 36700
},
{
"epoch": 11.670371546522706,
"grad_norm": 26539.162109375,
"learning_rate": 4.5824865036519534e-05,
"loss": 0.7033,
"step": 36750
},
{
"epoch": 11.686249603048587,
"grad_norm": 26035.525390625,
"learning_rate": 4.578516989520483e-05,
"loss": 0.7057,
"step": 36800
},
{
"epoch": 11.702127659574469,
"grad_norm": 26614.791015625,
"learning_rate": 4.574547475389013e-05,
"loss": 0.7048,
"step": 36850
},
{
"epoch": 11.718005716100349,
"grad_norm": 26289.15234375,
"learning_rate": 4.570577961257542e-05,
"loss": 0.6975,
"step": 36900
},
{
"epoch": 11.73388377262623,
"grad_norm": 26606.12109375,
"learning_rate": 4.5666084471260715e-05,
"loss": 0.705,
"step": 36950
},
{
"epoch": 11.749761829152112,
"grad_norm": 25736.138671875,
"learning_rate": 4.5626389329946015e-05,
"loss": 0.7004,
"step": 37000
},
{
"epoch": 11.765639885677993,
"grad_norm": 26059.619140625,
"learning_rate": 4.5586694188631315e-05,
"loss": 0.705,
"step": 37050
},
{
"epoch": 11.781517942203875,
"grad_norm": 25117.2734375,
"learning_rate": 4.5546999047316616e-05,
"loss": 0.7088,
"step": 37100
},
{
"epoch": 11.797395998729755,
"grad_norm": 27240.82421875,
"learning_rate": 4.550730390600191e-05,
"loss": 0.7003,
"step": 37150
},
{
"epoch": 11.813274055255636,
"grad_norm": 26413.03125,
"learning_rate": 4.54676087646872e-05,
"loss": 0.7029,
"step": 37200
},
{
"epoch": 11.829152111781518,
"grad_norm": 25459.921875,
"learning_rate": 4.54279136233725e-05,
"loss": 0.6975,
"step": 37250
},
{
"epoch": 11.8450301683074,
"grad_norm": 26057.275390625,
"learning_rate": 4.5388218482057797e-05,
"loss": 0.6994,
"step": 37300
},
{
"epoch": 11.860908224833281,
"grad_norm": 25720.384765625,
"learning_rate": 4.534852334074309e-05,
"loss": 0.7072,
"step": 37350
},
{
"epoch": 11.876786281359161,
"grad_norm": 26672.6015625,
"learning_rate": 4.530882819942839e-05,
"loss": 0.7017,
"step": 37400
},
{
"epoch": 11.892664337885043,
"grad_norm": 25892.302734375,
"learning_rate": 4.526913305811369e-05,
"loss": 0.7031,
"step": 37450
},
{
"epoch": 11.908542394410924,
"grad_norm": 26435.544921875,
"learning_rate": 4.5229437916798984e-05,
"loss": 0.7011,
"step": 37500
},
{
"epoch": 11.924420450936806,
"grad_norm": 25747.921875,
"learning_rate": 4.5189742775484285e-05,
"loss": 0.6979,
"step": 37550
},
{
"epoch": 11.940298507462687,
"grad_norm": 25117.19140625,
"learning_rate": 4.515004763416958e-05,
"loss": 0.6996,
"step": 37600
},
{
"epoch": 11.956176563988567,
"grad_norm": 26094.021484375,
"learning_rate": 4.511035249285488e-05,
"loss": 0.703,
"step": 37650
},
{
"epoch": 11.972054620514449,
"grad_norm": 25621.94140625,
"learning_rate": 4.507065735154017e-05,
"loss": 0.7034,
"step": 37700
},
{
"epoch": 11.98793267704033,
"grad_norm": 25541.908203125,
"learning_rate": 4.503096221022547e-05,
"loss": 0.7004,
"step": 37750
},
{
"epoch": 12.003810733566212,
"grad_norm": 25723.466796875,
"learning_rate": 4.499126706891077e-05,
"loss": 0.6991,
"step": 37800
},
{
"epoch": 12.019688790092093,
"grad_norm": 26813.052734375,
"learning_rate": 4.4951571927596066e-05,
"loss": 0.696,
"step": 37850
},
{
"epoch": 12.035566846617973,
"grad_norm": 25913.0390625,
"learning_rate": 4.491187678628136e-05,
"loss": 0.6931,
"step": 37900
},
{
"epoch": 12.051444903143855,
"grad_norm": 25332.591796875,
"learning_rate": 4.487218164496666e-05,
"loss": 0.6939,
"step": 37950
},
{
"epoch": 12.067322959669736,
"grad_norm": 25602.212890625,
"learning_rate": 4.483248650365195e-05,
"loss": 0.6935,
"step": 38000
},
{
"epoch": 12.083201016195618,
"grad_norm": 26077.806640625,
"learning_rate": 4.479279136233725e-05,
"loss": 0.6946,
"step": 38050
},
{
"epoch": 12.0990790727215,
"grad_norm": 26038.025390625,
"learning_rate": 4.475309622102255e-05,
"loss": 0.6927,
"step": 38100
},
{
"epoch": 12.11495712924738,
"grad_norm": 25922.17578125,
"learning_rate": 4.471340107970785e-05,
"loss": 0.691,
"step": 38150
},
{
"epoch": 12.130835185773261,
"grad_norm": 25839.078125,
"learning_rate": 4.467370593839315e-05,
"loss": 0.6924,
"step": 38200
},
{
"epoch": 12.146713242299143,
"grad_norm": 25702.123046875,
"learning_rate": 4.463401079707844e-05,
"loss": 0.6918,
"step": 38250
},
{
"epoch": 12.162591298825024,
"grad_norm": 26183.7890625,
"learning_rate": 4.4594315655763735e-05,
"loss": 0.6922,
"step": 38300
},
{
"epoch": 12.178469355350906,
"grad_norm": 26349.01953125,
"learning_rate": 4.4554620514449035e-05,
"loss": 0.6919,
"step": 38350
},
{
"epoch": 12.194347411876786,
"grad_norm": 25578.62890625,
"learning_rate": 4.451492537313433e-05,
"loss": 0.6911,
"step": 38400
},
{
"epoch": 12.210225468402667,
"grad_norm": 25342.06640625,
"learning_rate": 4.447523023181963e-05,
"loss": 0.6948,
"step": 38450
},
{
"epoch": 12.226103524928549,
"grad_norm": 25582.328125,
"learning_rate": 4.443553509050493e-05,
"loss": 0.6965,
"step": 38500
},
{
"epoch": 12.24198158145443,
"grad_norm": 26281.44921875,
"learning_rate": 4.439583994919022e-05,
"loss": 0.6947,
"step": 38550
},
{
"epoch": 12.257859637980312,
"grad_norm": 25423.970703125,
"learning_rate": 4.4356144807875516e-05,
"loss": 0.6949,
"step": 38600
},
{
"epoch": 12.273737694506192,
"grad_norm": 25645.67578125,
"learning_rate": 4.4316449666560816e-05,
"loss": 0.692,
"step": 38650
},
{
"epoch": 12.289615751032073,
"grad_norm": 25957.416015625,
"learning_rate": 4.427675452524611e-05,
"loss": 0.6914,
"step": 38700
},
{
"epoch": 12.305493807557955,
"grad_norm": 25020.30859375,
"learning_rate": 4.4237059383931403e-05,
"loss": 0.6956,
"step": 38750
},
{
"epoch": 12.321371864083837,
"grad_norm": 25936.24609375,
"learning_rate": 4.4197364242616704e-05,
"loss": 0.6909,
"step": 38800
},
{
"epoch": 12.337249920609718,
"grad_norm": 25477.5,
"learning_rate": 4.4157669101302004e-05,
"loss": 0.6915,
"step": 38850
},
{
"epoch": 12.353127977135598,
"grad_norm": 24679.041015625,
"learning_rate": 4.4117973959987304e-05,
"loss": 0.6959,
"step": 38900
},
{
"epoch": 12.36900603366148,
"grad_norm": 25305.9140625,
"learning_rate": 4.40782788186726e-05,
"loss": 0.6958,
"step": 38950
},
{
"epoch": 12.384884090187361,
"grad_norm": 25186.720703125,
"learning_rate": 4.403858367735789e-05,
"loss": 0.6899,
"step": 39000
},
{
"epoch": 12.400762146713243,
"grad_norm": 25848.09375,
"learning_rate": 4.399888853604319e-05,
"loss": 0.6954,
"step": 39050
},
{
"epoch": 12.416640203239124,
"grad_norm": 25214.7265625,
"learning_rate": 4.3959193394728485e-05,
"loss": 0.6918,
"step": 39100
},
{
"epoch": 12.432518259765004,
"grad_norm": 25850.787109375,
"learning_rate": 4.3919498253413785e-05,
"loss": 0.6889,
"step": 39150
},
{
"epoch": 12.448396316290886,
"grad_norm": 25535.595703125,
"learning_rate": 4.3879803112099086e-05,
"loss": 0.6913,
"step": 39200
},
{
"epoch": 12.464274372816767,
"grad_norm": 25293.98828125,
"learning_rate": 4.384010797078438e-05,
"loss": 0.6909,
"step": 39250
},
{
"epoch": 12.480152429342649,
"grad_norm": 25519.966796875,
"learning_rate": 4.380041282946967e-05,
"loss": 0.693,
"step": 39300
},
{
"epoch": 12.49603048586853,
"grad_norm": 25590.5390625,
"learning_rate": 4.376071768815497e-05,
"loss": 0.6901,
"step": 39350
},
{
"epoch": 12.51190854239441,
"grad_norm": 25909.888671875,
"learning_rate": 4.3721022546840267e-05,
"loss": 0.6908,
"step": 39400
},
{
"epoch": 12.527786598920292,
"grad_norm": 25248.21484375,
"learning_rate": 4.368132740552557e-05,
"loss": 0.6932,
"step": 39450
},
{
"epoch": 12.543664655446173,
"grad_norm": 24925.970703125,
"learning_rate": 4.364163226421086e-05,
"loss": 0.6914,
"step": 39500
},
{
"epoch": 12.559542711972055,
"grad_norm": 25199.6171875,
"learning_rate": 4.360193712289616e-05,
"loss": 0.6937,
"step": 39550
},
{
"epoch": 12.575420768497937,
"grad_norm": 25989.927734375,
"learning_rate": 4.356224198158146e-05,
"loss": 0.6912,
"step": 39600
},
{
"epoch": 12.591298825023816,
"grad_norm": 24462.072265625,
"learning_rate": 4.3522546840266755e-05,
"loss": 0.6919,
"step": 39650
},
{
"epoch": 12.607176881549698,
"grad_norm": 25439.693359375,
"learning_rate": 4.348285169895205e-05,
"loss": 0.6903,
"step": 39700
},
{
"epoch": 12.62305493807558,
"grad_norm": 25596.04296875,
"learning_rate": 4.344315655763735e-05,
"loss": 0.6912,
"step": 39750
},
{
"epoch": 12.638932994601461,
"grad_norm": 24989.56640625,
"learning_rate": 4.340346141632264e-05,
"loss": 0.6935,
"step": 39800
},
{
"epoch": 12.654811051127343,
"grad_norm": 24844.279296875,
"learning_rate": 4.336376627500794e-05,
"loss": 0.6895,
"step": 39850
},
{
"epoch": 12.670689107653223,
"grad_norm": 25176.060546875,
"learning_rate": 4.332407113369324e-05,
"loss": 0.6924,
"step": 39900
},
{
"epoch": 12.686567164179104,
"grad_norm": 25582.01953125,
"learning_rate": 4.3284375992378536e-05,
"loss": 0.6927,
"step": 39950
},
{
"epoch": 12.702445220704986,
"grad_norm": 25024.33203125,
"learning_rate": 4.3244680851063836e-05,
"loss": 0.6904,
"step": 40000
},
{
"epoch": 12.718323277230867,
"grad_norm": 25088.216796875,
"learning_rate": 4.320498570974913e-05,
"loss": 0.6904,
"step": 40050
},
{
"epoch": 12.734201333756747,
"grad_norm": 24689.61328125,
"learning_rate": 4.316529056843442e-05,
"loss": 0.687,
"step": 40100
},
{
"epoch": 12.750079390282629,
"grad_norm": 25595.205078125,
"learning_rate": 4.3125595427119724e-05,
"loss": 0.6911,
"step": 40150
},
{
"epoch": 12.76595744680851,
"grad_norm": 25500.544921875,
"learning_rate": 4.308590028580502e-05,
"loss": 0.6894,
"step": 40200
},
{
"epoch": 12.781835503334392,
"grad_norm": 24525.78125,
"learning_rate": 4.304620514449032e-05,
"loss": 0.6904,
"step": 40250
},
{
"epoch": 12.797713559860274,
"grad_norm": 24815.94921875,
"learning_rate": 4.300651000317562e-05,
"loss": 0.6879,
"step": 40300
},
{
"epoch": 12.813591616386155,
"grad_norm": 24612.96875,
"learning_rate": 4.296681486186091e-05,
"loss": 0.693,
"step": 40350
},
{
"epoch": 12.829469672912035,
"grad_norm": 25878.453125,
"learning_rate": 4.2927119720546205e-05,
"loss": 0.6877,
"step": 40400
},
{
"epoch": 12.845347729437917,
"grad_norm": 25819.634765625,
"learning_rate": 4.2887424579231505e-05,
"loss": 0.6889,
"step": 40450
},
{
"epoch": 12.861225785963798,
"grad_norm": 24711.671875,
"learning_rate": 4.28477294379168e-05,
"loss": 0.6897,
"step": 40500
},
{
"epoch": 12.87710384248968,
"grad_norm": 25272.193359375,
"learning_rate": 4.280803429660209e-05,
"loss": 0.6916,
"step": 40550
},
{
"epoch": 12.89298189901556,
"grad_norm": 24950.6953125,
"learning_rate": 4.27683391552874e-05,
"loss": 0.6874,
"step": 40600
},
{
"epoch": 12.908859955541441,
"grad_norm": 25394.751953125,
"learning_rate": 4.272864401397269e-05,
"loss": 0.6909,
"step": 40650
},
{
"epoch": 12.924738012067323,
"grad_norm": 25527.599609375,
"learning_rate": 4.268894887265799e-05,
"loss": 0.6888,
"step": 40700
},
{
"epoch": 12.940616068593204,
"grad_norm": 25220.8125,
"learning_rate": 4.2649253731343286e-05,
"loss": 0.6902,
"step": 40750
},
{
"epoch": 12.956494125119086,
"grad_norm": 24796.541015625,
"learning_rate": 4.260955859002858e-05,
"loss": 0.6894,
"step": 40800
},
{
"epoch": 12.972372181644968,
"grad_norm": 25115.955078125,
"learning_rate": 4.256986344871388e-05,
"loss": 0.6908,
"step": 40850
},
{
"epoch": 12.988250238170847,
"grad_norm": 24197.728515625,
"learning_rate": 4.2530168307399174e-05,
"loss": 0.6864,
"step": 40900
},
{
"epoch": 13.004128294696729,
"grad_norm": 24724.552734375,
"learning_rate": 4.2490473166084474e-05,
"loss": 0.6864,
"step": 40950
},
{
"epoch": 13.02000635122261,
"grad_norm": 25012.1640625,
"learning_rate": 4.2450778024769774e-05,
"loss": 0.6799,
"step": 41000
},
{
"epoch": 13.035884407748492,
"grad_norm": 24815.64453125,
"learning_rate": 4.241108288345507e-05,
"loss": 0.6837,
"step": 41050
},
{
"epoch": 13.051762464274374,
"grad_norm": 25473.265625,
"learning_rate": 4.237138774214036e-05,
"loss": 0.6821,
"step": 41100
},
{
"epoch": 13.067640520800254,
"grad_norm": 24979.189453125,
"learning_rate": 4.233169260082566e-05,
"loss": 0.6825,
"step": 41150
},
{
"epoch": 13.083518577326135,
"grad_norm": 24369.861328125,
"learning_rate": 4.2291997459510955e-05,
"loss": 0.6802,
"step": 41200
},
{
"epoch": 13.099396633852017,
"grad_norm": 24936.068359375,
"learning_rate": 4.2252302318196255e-05,
"loss": 0.6817,
"step": 41250
},
{
"epoch": 13.115274690377898,
"grad_norm": 25067.654296875,
"learning_rate": 4.221260717688155e-05,
"loss": 0.68,
"step": 41300
},
{
"epoch": 13.13115274690378,
"grad_norm": 24868.97265625,
"learning_rate": 4.217291203556685e-05,
"loss": 0.6811,
"step": 41350
},
{
"epoch": 13.14703080342966,
"grad_norm": 24265.5546875,
"learning_rate": 4.213321689425215e-05,
"loss": 0.6813,
"step": 41400
},
{
"epoch": 13.162908859955541,
"grad_norm": 24921.30078125,
"learning_rate": 4.209352175293744e-05,
"loss": 0.6796,
"step": 41450
},
{
"epoch": 13.178786916481423,
"grad_norm": 24588.80078125,
"learning_rate": 4.2053826611622737e-05,
"loss": 0.6828,
"step": 41500
},
{
"epoch": 13.194664973007304,
"grad_norm": 24551.5,
"learning_rate": 4.201413147030804e-05,
"loss": 0.6805,
"step": 41550
},
{
"epoch": 13.210543029533184,
"grad_norm": 25206.583984375,
"learning_rate": 4.197443632899333e-05,
"loss": 0.6852,
"step": 41600
},
{
"epoch": 13.226421086059066,
"grad_norm": 24448.646484375,
"learning_rate": 4.193474118767863e-05,
"loss": 0.6832,
"step": 41650
},
{
"epoch": 13.242299142584947,
"grad_norm": 24609.46484375,
"learning_rate": 4.189504604636393e-05,
"loss": 0.6842,
"step": 41700
},
{
"epoch": 13.258177199110829,
"grad_norm": 24898.201171875,
"learning_rate": 4.1855350905049225e-05,
"loss": 0.6812,
"step": 41750
},
{
"epoch": 13.27405525563671,
"grad_norm": 24523.78515625,
"learning_rate": 4.1815655763734525e-05,
"loss": 0.6842,
"step": 41800
},
{
"epoch": 13.28993331216259,
"grad_norm": 24928.837890625,
"learning_rate": 4.177596062241982e-05,
"loss": 0.6813,
"step": 41850
},
{
"epoch": 13.305811368688472,
"grad_norm": 24684.125,
"learning_rate": 4.173626548110511e-05,
"loss": 0.6772,
"step": 41900
},
{
"epoch": 13.321689425214354,
"grad_norm": 25091.099609375,
"learning_rate": 4.169657033979041e-05,
"loss": 0.6804,
"step": 41950
},
{
"epoch": 13.337567481740235,
"grad_norm": 24772.115234375,
"learning_rate": 4.1656875198475706e-05,
"loss": 0.6788,
"step": 42000
},
{
"epoch": 13.353445538266117,
"grad_norm": 25392.5,
"learning_rate": 4.1617180057161006e-05,
"loss": 0.6814,
"step": 42050
},
{
"epoch": 13.369323594791997,
"grad_norm": 24344.09375,
"learning_rate": 4.1577484915846306e-05,
"loss": 0.683,
"step": 42100
},
{
"epoch": 13.385201651317878,
"grad_norm": 24251.580078125,
"learning_rate": 4.15377897745316e-05,
"loss": 0.6826,
"step": 42150
},
{
"epoch": 13.40107970784376,
"grad_norm": 25104.28125,
"learning_rate": 4.149809463321689e-05,
"loss": 0.6812,
"step": 42200
},
{
"epoch": 13.416957764369641,
"grad_norm": 24021.865234375,
"learning_rate": 4.1458399491902194e-05,
"loss": 0.6802,
"step": 42250
},
{
"epoch": 13.432835820895523,
"grad_norm": 24694.013671875,
"learning_rate": 4.141870435058749e-05,
"loss": 0.683,
"step": 42300
},
{
"epoch": 13.448713877421403,
"grad_norm": 24235.306640625,
"learning_rate": 4.137900920927279e-05,
"loss": 0.6818,
"step": 42350
},
{
"epoch": 13.464591933947284,
"grad_norm": 24905.02734375,
"learning_rate": 4.133931406795809e-05,
"loss": 0.68,
"step": 42400
},
{
"epoch": 13.480469990473166,
"grad_norm": 24339.927734375,
"learning_rate": 4.129961892664338e-05,
"loss": 0.6799,
"step": 42450
},
{
"epoch": 13.496348046999048,
"grad_norm": 24592.8828125,
"learning_rate": 4.125992378532868e-05,
"loss": 0.6776,
"step": 42500
},
{
"epoch": 13.51222610352493,
"grad_norm": 24951.8125,
"learning_rate": 4.1220228644013975e-05,
"loss": 0.6841,
"step": 42550
},
{
"epoch": 13.528104160050809,
"grad_norm": 24222.96875,
"learning_rate": 4.118053350269927e-05,
"loss": 0.6817,
"step": 42600
},
{
"epoch": 13.54398221657669,
"grad_norm": 24539.65234375,
"learning_rate": 4.114083836138457e-05,
"loss": 0.6812,
"step": 42650
},
{
"epoch": 13.559860273102572,
"grad_norm": 24310.98828125,
"learning_rate": 4.110114322006986e-05,
"loss": 0.6847,
"step": 42700
},
{
"epoch": 13.575738329628454,
"grad_norm": 24087.740234375,
"learning_rate": 4.106144807875516e-05,
"loss": 0.6795,
"step": 42750
},
{
"epoch": 13.591616386154335,
"grad_norm": 24772.966796875,
"learning_rate": 4.102175293744046e-05,
"loss": 0.68,
"step": 42800
},
{
"epoch": 13.607494442680215,
"grad_norm": 24550.08203125,
"learning_rate": 4.0982057796125756e-05,
"loss": 0.6789,
"step": 42850
},
{
"epoch": 13.623372499206097,
"grad_norm": 24223.595703125,
"learning_rate": 4.094236265481105e-05,
"loss": 0.6837,
"step": 42900
},
{
"epoch": 13.639250555731978,
"grad_norm": 24113.8984375,
"learning_rate": 4.090266751349635e-05,
"loss": 0.681,
"step": 42950
},
{
"epoch": 13.65512861225786,
"grad_norm": 24669.611328125,
"learning_rate": 4.0862972372181644e-05,
"loss": 0.6832,
"step": 43000
},
{
"epoch": 13.671006668783741,
"grad_norm": 23798.759765625,
"learning_rate": 4.0823277230866944e-05,
"loss": 0.677,
"step": 43050
},
{
"epoch": 13.686884725309621,
"grad_norm": 24341.833984375,
"learning_rate": 4.0783582089552244e-05,
"loss": 0.6814,
"step": 43100
},
{
"epoch": 13.702762781835503,
"grad_norm": 24048.4609375,
"learning_rate": 4.074388694823754e-05,
"loss": 0.6784,
"step": 43150
},
{
"epoch": 13.718640838361384,
"grad_norm": 24171.9921875,
"learning_rate": 4.070419180692284e-05,
"loss": 0.6803,
"step": 43200
},
{
"epoch": 13.734518894887266,
"grad_norm": 24474.33203125,
"learning_rate": 4.066449666560813e-05,
"loss": 0.6785,
"step": 43250
},
{
"epoch": 13.750396951413148,
"grad_norm": 23632.65625,
"learning_rate": 4.0624801524293425e-05,
"loss": 0.6811,
"step": 43300
},
{
"epoch": 13.766275007939027,
"grad_norm": 23942.4140625,
"learning_rate": 4.0585106382978725e-05,
"loss": 0.6802,
"step": 43350
},
{
"epoch": 13.782153064464909,
"grad_norm": 24818.43359375,
"learning_rate": 4.054541124166402e-05,
"loss": 0.6817,
"step": 43400
},
{
"epoch": 13.79803112099079,
"grad_norm": 23369.2265625,
"learning_rate": 4.050571610034932e-05,
"loss": 0.6811,
"step": 43450
},
{
"epoch": 13.813909177516672,
"grad_norm": 24389.009765625,
"learning_rate": 4.046602095903462e-05,
"loss": 0.6805,
"step": 43500
},
{
"epoch": 13.829787234042554,
"grad_norm": 24440.2421875,
"learning_rate": 4.042632581771991e-05,
"loss": 0.681,
"step": 43550
},
{
"epoch": 13.845665290568434,
"grad_norm": 24627.8203125,
"learning_rate": 4.038663067640521e-05,
"loss": 0.6775,
"step": 43600
},
{
"epoch": 13.861543347094315,
"grad_norm": 23863.171875,
"learning_rate": 4.034693553509051e-05,
"loss": 0.6803,
"step": 43650
},
{
"epoch": 13.877421403620197,
"grad_norm": 24950.375,
"learning_rate": 4.03072403937758e-05,
"loss": 0.6825,
"step": 43700
},
{
"epoch": 13.893299460146078,
"grad_norm": 23713.75390625,
"learning_rate": 4.02675452524611e-05,
"loss": 0.6774,
"step": 43750
},
{
"epoch": 13.90917751667196,
"grad_norm": 24111.923828125,
"learning_rate": 4.02278501111464e-05,
"loss": 0.6769,
"step": 43800
},
{
"epoch": 13.92505557319784,
"grad_norm": 24068.44140625,
"learning_rate": 4.0188154969831695e-05,
"loss": 0.6792,
"step": 43850
},
{
"epoch": 13.940933629723721,
"grad_norm": 24496.923828125,
"learning_rate": 4.0148459828516995e-05,
"loss": 0.6786,
"step": 43900
},
{
"epoch": 13.956811686249603,
"grad_norm": 24324.5234375,
"learning_rate": 4.010876468720229e-05,
"loss": 0.6818,
"step": 43950
},
{
"epoch": 13.972689742775485,
"grad_norm": 24043.82421875,
"learning_rate": 4.006906954588758e-05,
"loss": 0.6756,
"step": 44000
},
{
"epoch": 13.988567799301366,
"grad_norm": 23718.05859375,
"learning_rate": 4.002937440457288e-05,
"loss": 0.6742,
"step": 44050
},
{
"epoch": 14.004445855827246,
"grad_norm": 24011.693359375,
"learning_rate": 3.9989679263258176e-05,
"loss": 0.6766,
"step": 44100
},
{
"epoch": 14.020323912353128,
"grad_norm": 23874.05859375,
"learning_rate": 3.9949984121943476e-05,
"loss": 0.6721,
"step": 44150
},
{
"epoch": 14.03620196887901,
"grad_norm": 24170.900390625,
"learning_rate": 3.9910288980628776e-05,
"loss": 0.6746,
"step": 44200
},
{
"epoch": 14.05208002540489,
"grad_norm": 23806.9921875,
"learning_rate": 3.987059383931407e-05,
"loss": 0.6702,
"step": 44250
},
{
"epoch": 14.067958081930772,
"grad_norm": 24097.408203125,
"learning_rate": 3.983089869799937e-05,
"loss": 0.6726,
"step": 44300
},
{
"epoch": 14.083836138456652,
"grad_norm": 24183.365234375,
"learning_rate": 3.9791203556684664e-05,
"loss": 0.6688,
"step": 44350
},
{
"epoch": 14.099714194982534,
"grad_norm": 25200.125,
"learning_rate": 3.975150841536996e-05,
"loss": 0.6743,
"step": 44400
},
{
"epoch": 14.115592251508415,
"grad_norm": 24078.8359375,
"learning_rate": 3.971181327405526e-05,
"loss": 0.6707,
"step": 44450
},
{
"epoch": 14.131470308034297,
"grad_norm": 24092.71875,
"learning_rate": 3.967211813274056e-05,
"loss": 0.6727,
"step": 44500
},
{
"epoch": 14.147348364560179,
"grad_norm": 24398.52734375,
"learning_rate": 3.963242299142585e-05,
"loss": 0.6726,
"step": 44550
},
{
"epoch": 14.163226421086058,
"grad_norm": 25464.837890625,
"learning_rate": 3.959272785011115e-05,
"loss": 0.6692,
"step": 44600
},
{
"epoch": 14.17910447761194,
"grad_norm": 24340.91796875,
"learning_rate": 3.9553032708796445e-05,
"loss": 0.6726,
"step": 44650
},
{
"epoch": 14.194982534137822,
"grad_norm": 24626.837890625,
"learning_rate": 3.951333756748174e-05,
"loss": 0.6719,
"step": 44700
},
{
"epoch": 14.210860590663703,
"grad_norm": 24861.15234375,
"learning_rate": 3.947364242616704e-05,
"loss": 0.6724,
"step": 44750
},
{
"epoch": 14.226738647189585,
"grad_norm": 23964.291015625,
"learning_rate": 3.943394728485233e-05,
"loss": 0.674,
"step": 44800
},
{
"epoch": 14.242616703715465,
"grad_norm": 23936.125,
"learning_rate": 3.939425214353763e-05,
"loss": 0.6743,
"step": 44850
},
{
"epoch": 14.258494760241346,
"grad_norm": 24653.296875,
"learning_rate": 3.935455700222293e-05,
"loss": 0.6723,
"step": 44900
},
{
"epoch": 14.274372816767228,
"grad_norm": 24398.095703125,
"learning_rate": 3.9314861860908226e-05,
"loss": 0.6774,
"step": 44950
},
{
"epoch": 14.29025087329311,
"grad_norm": 24063.818359375,
"learning_rate": 3.927516671959353e-05,
"loss": 0.6701,
"step": 45000
},
{
"epoch": 14.30612892981899,
"grad_norm": 24251.955078125,
"learning_rate": 3.923547157827882e-05,
"loss": 0.6749,
"step": 45050
},
{
"epoch": 14.32200698634487,
"grad_norm": 24572.671875,
"learning_rate": 3.9195776436964114e-05,
"loss": 0.6754,
"step": 45100
},
{
"epoch": 14.337885042870752,
"grad_norm": 24464.720703125,
"learning_rate": 3.9156081295649414e-05,
"loss": 0.6718,
"step": 45150
},
{
"epoch": 14.353763099396634,
"grad_norm": 24511.29296875,
"learning_rate": 3.9116386154334714e-05,
"loss": 0.6752,
"step": 45200
},
{
"epoch": 14.369641155922515,
"grad_norm": 24030.92578125,
"learning_rate": 3.907669101302001e-05,
"loss": 0.6737,
"step": 45250
},
{
"epoch": 14.385519212448397,
"grad_norm": 23963.9453125,
"learning_rate": 3.903699587170531e-05,
"loss": 0.6726,
"step": 45300
},
{
"epoch": 14.401397268974277,
"grad_norm": 24408.703125,
"learning_rate": 3.89973007303906e-05,
"loss": 0.6755,
"step": 45350
},
{
"epoch": 14.417275325500158,
"grad_norm": 24175.90234375,
"learning_rate": 3.89576055890759e-05,
"loss": 0.6733,
"step": 45400
},
{
"epoch": 14.43315338202604,
"grad_norm": 24181.03515625,
"learning_rate": 3.8917910447761195e-05,
"loss": 0.6715,
"step": 45450
},
{
"epoch": 14.449031438551922,
"grad_norm": 24088.134765625,
"learning_rate": 3.887821530644649e-05,
"loss": 0.6734,
"step": 45500
},
{
"epoch": 14.464909495077803,
"grad_norm": 24348.998046875,
"learning_rate": 3.883852016513179e-05,
"loss": 0.673,
"step": 45550
},
{
"epoch": 14.480787551603683,
"grad_norm": 23610.765625,
"learning_rate": 3.879882502381709e-05,
"loss": 0.6733,
"step": 45600
},
{
"epoch": 14.496665608129565,
"grad_norm": 23721.083984375,
"learning_rate": 3.875912988250238e-05,
"loss": 0.6731,
"step": 45650
},
{
"epoch": 14.512543664655446,
"grad_norm": 24457.9375,
"learning_rate": 3.871943474118768e-05,
"loss": 0.6733,
"step": 45700
},
{
"epoch": 14.528421721181328,
"grad_norm": 24163.458984375,
"learning_rate": 3.867973959987298e-05,
"loss": 0.6721,
"step": 45750
},
{
"epoch": 14.54429977770721,
"grad_norm": 23760.05859375,
"learning_rate": 3.864004445855827e-05,
"loss": 0.6715,
"step": 45800
},
{
"epoch": 14.56017783423309,
"grad_norm": 23812.14453125,
"learning_rate": 3.860034931724357e-05,
"loss": 0.6727,
"step": 45850
},
{
"epoch": 14.57605589075897,
"grad_norm": 23580.501953125,
"learning_rate": 3.8560654175928864e-05,
"loss": 0.6704,
"step": 45900
},
{
"epoch": 14.591933947284852,
"grad_norm": 24045.09765625,
"learning_rate": 3.8520959034614165e-05,
"loss": 0.6718,
"step": 45950
},
{
"epoch": 14.607812003810734,
"grad_norm": 24375.74609375,
"learning_rate": 3.8481263893299465e-05,
"loss": 0.6709,
"step": 46000
},
{
"epoch": 14.623690060336616,
"grad_norm": 24000.484375,
"learning_rate": 3.844156875198476e-05,
"loss": 0.674,
"step": 46050
},
{
"epoch": 14.639568116862495,
"grad_norm": 23421.57421875,
"learning_rate": 3.840187361067006e-05,
"loss": 0.6735,
"step": 46100
},
{
"epoch": 14.655446173388377,
"grad_norm": 24476.041015625,
"learning_rate": 3.836217846935535e-05,
"loss": 0.6737,
"step": 46150
},
{
"epoch": 14.671324229914259,
"grad_norm": 23659.337890625,
"learning_rate": 3.8322483328040646e-05,
"loss": 0.6752,
"step": 46200
},
{
"epoch": 14.68720228644014,
"grad_norm": 24077.185546875,
"learning_rate": 3.8282788186725946e-05,
"loss": 0.6684,
"step": 46250
},
{
"epoch": 14.703080342966022,
"grad_norm": 24052.515625,
"learning_rate": 3.8243093045411246e-05,
"loss": 0.673,
"step": 46300
},
{
"epoch": 14.718958399491902,
"grad_norm": 23948.08203125,
"learning_rate": 3.820339790409654e-05,
"loss": 0.6745,
"step": 46350
},
{
"epoch": 14.734836456017783,
"grad_norm": 23770.400390625,
"learning_rate": 3.816370276278184e-05,
"loss": 0.6699,
"step": 46400
},
{
"epoch": 14.750714512543665,
"grad_norm": 23651.755859375,
"learning_rate": 3.8124007621467134e-05,
"loss": 0.6722,
"step": 46450
},
{
"epoch": 14.766592569069546,
"grad_norm": 24653.626953125,
"learning_rate": 3.808431248015243e-05,
"loss": 0.6698,
"step": 46500
},
{
"epoch": 14.782470625595428,
"grad_norm": 24360.767578125,
"learning_rate": 3.804461733883773e-05,
"loss": 0.6701,
"step": 46550
},
{
"epoch": 14.798348682121308,
"grad_norm": 23147.37109375,
"learning_rate": 3.800492219752302e-05,
"loss": 0.6687,
"step": 46600
},
{
"epoch": 14.81422673864719,
"grad_norm": 24035.583984375,
"learning_rate": 3.796522705620832e-05,
"loss": 0.6703,
"step": 46650
},
{
"epoch": 14.830104795173071,
"grad_norm": 23670.650390625,
"learning_rate": 3.792553191489362e-05,
"loss": 0.67,
"step": 46700
},
{
"epoch": 14.845982851698952,
"grad_norm": 23496.2265625,
"learning_rate": 3.7885836773578915e-05,
"loss": 0.6741,
"step": 46750
},
{
"epoch": 14.861860908224834,
"grad_norm": 24511.904296875,
"learning_rate": 3.7846141632264215e-05,
"loss": 0.673,
"step": 46800
},
{
"epoch": 14.877738964750714,
"grad_norm": 23906.30859375,
"learning_rate": 3.780644649094951e-05,
"loss": 0.6745,
"step": 46850
},
{
"epoch": 14.893617021276595,
"grad_norm": 23655.87109375,
"learning_rate": 3.77667513496348e-05,
"loss": 0.668,
"step": 46900
},
{
"epoch": 14.909495077802477,
"grad_norm": 23849.982421875,
"learning_rate": 3.77270562083201e-05,
"loss": 0.6671,
"step": 46950
},
{
"epoch": 14.925373134328359,
"grad_norm": 24548.85546875,
"learning_rate": 3.76873610670054e-05,
"loss": 0.6698,
"step": 47000
},
{
"epoch": 14.94125119085424,
"grad_norm": 24144.28515625,
"learning_rate": 3.7647665925690696e-05,
"loss": 0.668,
"step": 47050
},
{
"epoch": 14.95712924738012,
"grad_norm": 23972.701171875,
"learning_rate": 3.7607970784376e-05,
"loss": 0.6677,
"step": 47100
},
{
"epoch": 14.973007303906002,
"grad_norm": 23551.353515625,
"learning_rate": 3.756827564306129e-05,
"loss": 0.6699,
"step": 47150
},
{
"epoch": 14.988885360431883,
"grad_norm": 23527.978515625,
"learning_rate": 3.752858050174659e-05,
"loss": 0.6702,
"step": 47200
},
{
"epoch": 15.004763416957765,
"grad_norm": 23273.08984375,
"learning_rate": 3.7488885360431884e-05,
"loss": 0.6702,
"step": 47250
},
{
"epoch": 15.020641473483646,
"grad_norm": 23773.09765625,
"learning_rate": 3.744919021911718e-05,
"loss": 0.6671,
"step": 47300
},
{
"epoch": 15.036519530009526,
"grad_norm": 24133.150390625,
"learning_rate": 3.740949507780248e-05,
"loss": 0.6607,
"step": 47350
},
{
"epoch": 15.052397586535408,
"grad_norm": 23611.69140625,
"learning_rate": 3.736979993648778e-05,
"loss": 0.6608,
"step": 47400
},
{
"epoch": 15.06827564306129,
"grad_norm": 24616.044921875,
"learning_rate": 3.733010479517307e-05,
"loss": 0.6678,
"step": 47450
},
{
"epoch": 15.084153699587171,
"grad_norm": 24132.029296875,
"learning_rate": 3.729040965385837e-05,
"loss": 0.6634,
"step": 47500
},
{
"epoch": 15.100031756113053,
"grad_norm": 24181.455078125,
"learning_rate": 3.7250714512543665e-05,
"loss": 0.6655,
"step": 47550
},
{
"epoch": 15.115909812638932,
"grad_norm": 23974.443359375,
"learning_rate": 3.721101937122896e-05,
"loss": 0.662,
"step": 47600
},
{
"epoch": 15.131787869164814,
"grad_norm": 24213.7890625,
"learning_rate": 3.717132422991426e-05,
"loss": 0.6608,
"step": 47650
},
{
"epoch": 15.147665925690696,
"grad_norm": 24070.62890625,
"learning_rate": 3.713162908859956e-05,
"loss": 0.6658,
"step": 47700
},
{
"epoch": 15.163543982216577,
"grad_norm": 23968.4296875,
"learning_rate": 3.709193394728485e-05,
"loss": 0.6609,
"step": 47750
},
{
"epoch": 15.179422038742459,
"grad_norm": 23617.162109375,
"learning_rate": 3.7052238805970153e-05,
"loss": 0.6613,
"step": 47800
},
{
"epoch": 15.195300095268339,
"grad_norm": 24029.837890625,
"learning_rate": 3.701254366465545e-05,
"loss": 0.664,
"step": 47850
},
{
"epoch": 15.21117815179422,
"grad_norm": 23731.349609375,
"learning_rate": 3.697284852334075e-05,
"loss": 0.6636,
"step": 47900
},
{
"epoch": 15.227056208320102,
"grad_norm": 24603.748046875,
"learning_rate": 3.693315338202604e-05,
"loss": 0.6629,
"step": 47950
},
{
"epoch": 15.242934264845983,
"grad_norm": 23958.181640625,
"learning_rate": 3.6893458240711334e-05,
"loss": 0.6648,
"step": 48000
},
{
"epoch": 15.258812321371865,
"grad_norm": 23479.33984375,
"learning_rate": 3.6853763099396635e-05,
"loss": 0.6653,
"step": 48050
},
{
"epoch": 15.274690377897745,
"grad_norm": 24116.53515625,
"learning_rate": 3.6814067958081935e-05,
"loss": 0.6622,
"step": 48100
},
{
"epoch": 15.290568434423626,
"grad_norm": 23710.822265625,
"learning_rate": 3.677437281676723e-05,
"loss": 0.6632,
"step": 48150
},
{
"epoch": 15.306446490949508,
"grad_norm": 24976.740234375,
"learning_rate": 3.673467767545253e-05,
"loss": 0.6648,
"step": 48200
},
{
"epoch": 15.32232454747539,
"grad_norm": 24450.333984375,
"learning_rate": 3.669498253413782e-05,
"loss": 0.6665,
"step": 48250
},
{
"epoch": 15.338202604001271,
"grad_norm": 24068.060546875,
"learning_rate": 3.6655287392823116e-05,
"loss": 0.6653,
"step": 48300
},
{
"epoch": 15.354080660527151,
"grad_norm": 23732.517578125,
"learning_rate": 3.6615592251508416e-05,
"loss": 0.6649,
"step": 48350
},
{
"epoch": 15.369958717053033,
"grad_norm": 23714.439453125,
"learning_rate": 3.6575897110193716e-05,
"loss": 0.6644,
"step": 48400
},
{
"epoch": 15.385836773578914,
"grad_norm": 24033.36328125,
"learning_rate": 3.6536201968879017e-05,
"loss": 0.6648,
"step": 48450
},
{
"epoch": 15.401714830104796,
"grad_norm": 23467.201171875,
"learning_rate": 3.649650682756431e-05,
"loss": 0.6632,
"step": 48500
},
{
"epoch": 15.417592886630676,
"grad_norm": 23929.56640625,
"learning_rate": 3.6456811686249604e-05,
"loss": 0.6621,
"step": 48550
},
{
"epoch": 15.433470943156557,
"grad_norm": 23940.3125,
"learning_rate": 3.6417116544934904e-05,
"loss": 0.6666,
"step": 48600
},
{
"epoch": 15.449348999682439,
"grad_norm": 23760.220703125,
"learning_rate": 3.63774214036202e-05,
"loss": 0.6657,
"step": 48650
},
{
"epoch": 15.46522705620832,
"grad_norm": 24150.181640625,
"learning_rate": 3.633772626230549e-05,
"loss": 0.6635,
"step": 48700
},
{
"epoch": 15.481105112734202,
"grad_norm": 23952.111328125,
"learning_rate": 3.629803112099079e-05,
"loss": 0.6659,
"step": 48750
},
{
"epoch": 15.496983169260083,
"grad_norm": 23915.330078125,
"learning_rate": 3.625833597967609e-05,
"loss": 0.6649,
"step": 48800
},
{
"epoch": 15.512861225785963,
"grad_norm": 24483.07421875,
"learning_rate": 3.6218640838361385e-05,
"loss": 0.6633,
"step": 48850
},
{
"epoch": 15.528739282311845,
"grad_norm": 24314.33203125,
"learning_rate": 3.6178945697046685e-05,
"loss": 0.6649,
"step": 48900
},
{
"epoch": 15.544617338837726,
"grad_norm": 24762.5625,
"learning_rate": 3.613925055573198e-05,
"loss": 0.6652,
"step": 48950
},
{
"epoch": 15.560495395363608,
"grad_norm": 24496.2109375,
"learning_rate": 3.609955541441727e-05,
"loss": 0.6636,
"step": 49000
},
{
"epoch": 15.576373451889488,
"grad_norm": 24142.921875,
"learning_rate": 3.605986027310257e-05,
"loss": 0.665,
"step": 49050
},
{
"epoch": 15.59225150841537,
"grad_norm": 23896.888671875,
"learning_rate": 3.602016513178787e-05,
"loss": 0.6636,
"step": 49100
},
{
"epoch": 15.608129564941251,
"grad_norm": 23628.111328125,
"learning_rate": 3.598046999047317e-05,
"loss": 0.6615,
"step": 49150
},
{
"epoch": 15.624007621467133,
"grad_norm": 23748.744140625,
"learning_rate": 3.594077484915847e-05,
"loss": 0.6648,
"step": 49200
},
{
"epoch": 15.639885677993014,
"grad_norm": 23825.630859375,
"learning_rate": 3.590107970784376e-05,
"loss": 0.6652,
"step": 49250
},
{
"epoch": 15.655763734518896,
"grad_norm": 23878.716796875,
"learning_rate": 3.586138456652906e-05,
"loss": 0.6615,
"step": 49300
},
{
"epoch": 15.671641791044776,
"grad_norm": 23718.876953125,
"learning_rate": 3.5821689425214354e-05,
"loss": 0.6622,
"step": 49350
},
{
"epoch": 15.687519847570657,
"grad_norm": 23546.572265625,
"learning_rate": 3.578199428389965e-05,
"loss": 0.6641,
"step": 49400
},
{
"epoch": 15.703397904096539,
"grad_norm": 23565.736328125,
"learning_rate": 3.574229914258495e-05,
"loss": 0.6656,
"step": 49450
},
{
"epoch": 15.71927596062242,
"grad_norm": 23384.71484375,
"learning_rate": 3.570260400127025e-05,
"loss": 0.6618,
"step": 49500
},
{
"epoch": 15.7351540171483,
"grad_norm": 23340.80078125,
"learning_rate": 3.566290885995554e-05,
"loss": 0.6653,
"step": 49550
},
{
"epoch": 15.751032073674182,
"grad_norm": 24208.947265625,
"learning_rate": 3.562321371864084e-05,
"loss": 0.6661,
"step": 49600
},
{
"epoch": 15.766910130200063,
"grad_norm": 23574.521484375,
"learning_rate": 3.5583518577326135e-05,
"loss": 0.6666,
"step": 49650
},
{
"epoch": 15.782788186725945,
"grad_norm": 23982.78125,
"learning_rate": 3.5543823436011436e-05,
"loss": 0.663,
"step": 49700
},
{
"epoch": 15.798666243251827,
"grad_norm": 23564.3359375,
"learning_rate": 3.550412829469673e-05,
"loss": 0.6606,
"step": 49750
},
{
"epoch": 15.814544299777708,
"grad_norm": 23933.189453125,
"learning_rate": 3.546443315338203e-05,
"loss": 0.6647,
"step": 49800
},
{
"epoch": 15.830422356303588,
"grad_norm": 23510.328125,
"learning_rate": 3.542473801206733e-05,
"loss": 0.6635,
"step": 49850
},
{
"epoch": 15.84630041282947,
"grad_norm": 23416.091796875,
"learning_rate": 3.5385042870752623e-05,
"loss": 0.6646,
"step": 49900
},
{
"epoch": 15.862178469355351,
"grad_norm": 23560.341796875,
"learning_rate": 3.534534772943792e-05,
"loss": 0.663,
"step": 49950
},
{
"epoch": 15.878056525881233,
"grad_norm": 23726.447265625,
"learning_rate": 3.530565258812322e-05,
"loss": 0.6649,
"step": 50000
},
{
"epoch": 15.893934582407113,
"grad_norm": 23844.755859375,
"learning_rate": 3.526595744680851e-05,
"loss": 0.6622,
"step": 50050
},
{
"epoch": 15.909812638932994,
"grad_norm": 23132.53125,
"learning_rate": 3.5226262305493804e-05,
"loss": 0.6612,
"step": 50100
},
{
"epoch": 15.925690695458876,
"grad_norm": 24354.9453125,
"learning_rate": 3.5186567164179105e-05,
"loss": 0.6633,
"step": 50150
},
{
"epoch": 15.941568751984757,
"grad_norm": 23439.099609375,
"learning_rate": 3.5146872022864405e-05,
"loss": 0.6619,
"step": 50200
},
{
"epoch": 15.957446808510639,
"grad_norm": 24292.439453125,
"learning_rate": 3.5107176881549705e-05,
"loss": 0.6629,
"step": 50250
},
{
"epoch": 15.973324865036519,
"grad_norm": 23685.8515625,
"learning_rate": 3.5067481740235e-05,
"loss": 0.6622,
"step": 50300
},
{
"epoch": 15.9892029215624,
"grad_norm": 23704.876953125,
"learning_rate": 3.502778659892029e-05,
"loss": 0.663,
"step": 50350
},
{
"epoch": 16.005080978088284,
"grad_norm": 23387.416015625,
"learning_rate": 3.498809145760559e-05,
"loss": 0.659,
"step": 50400
},
{
"epoch": 16.02095903461416,
"grad_norm": 23603.845703125,
"learning_rate": 3.4948396316290886e-05,
"loss": 0.6574,
"step": 50450
},
{
"epoch": 16.036837091140043,
"grad_norm": 24016.529296875,
"learning_rate": 3.490870117497618e-05,
"loss": 0.657,
"step": 50500
},
{
"epoch": 16.052715147665925,
"grad_norm": 24235.45703125,
"learning_rate": 3.4869006033661487e-05,
"loss": 0.6557,
"step": 50550
},
{
"epoch": 16.068593204191806,
"grad_norm": 23704.05859375,
"learning_rate": 3.482931089234678e-05,
"loss": 0.6547,
"step": 50600
},
{
"epoch": 16.084471260717688,
"grad_norm": 23739.4921875,
"learning_rate": 3.4789615751032074e-05,
"loss": 0.6541,
"step": 50650
},
{
"epoch": 16.10034931724357,
"grad_norm": 24145.8046875,
"learning_rate": 3.4749920609717374e-05,
"loss": 0.6594,
"step": 50700
},
{
"epoch": 16.11622737376945,
"grad_norm": 24285.50390625,
"learning_rate": 3.471022546840267e-05,
"loss": 0.6569,
"step": 50750
},
{
"epoch": 16.132105430295333,
"grad_norm": 23721.884765625,
"learning_rate": 3.467053032708796e-05,
"loss": 0.6585,
"step": 50800
},
{
"epoch": 16.147983486821214,
"grad_norm": 23684.302734375,
"learning_rate": 3.463083518577326e-05,
"loss": 0.6563,
"step": 50850
},
{
"epoch": 16.163861543347096,
"grad_norm": 24330.8046875,
"learning_rate": 3.459114004445856e-05,
"loss": 0.6547,
"step": 50900
},
{
"epoch": 16.179739599872974,
"grad_norm": 23832.5703125,
"learning_rate": 3.455144490314386e-05,
"loss": 0.6578,
"step": 50950
},
{
"epoch": 16.195617656398856,
"grad_norm": 23988.150390625,
"learning_rate": 3.4511749761829155e-05,
"loss": 0.6558,
"step": 51000
},
{
"epoch": 16.211495712924737,
"grad_norm": 23709.25,
"learning_rate": 3.447205462051445e-05,
"loss": 0.655,
"step": 51050
},
{
"epoch": 16.22737376945062,
"grad_norm": 23120.591796875,
"learning_rate": 3.443235947919975e-05,
"loss": 0.6565,
"step": 51100
},
{
"epoch": 16.2432518259765,
"grad_norm": 23713.447265625,
"learning_rate": 3.439266433788504e-05,
"loss": 0.6583,
"step": 51150
},
{
"epoch": 16.259129882502382,
"grad_norm": 23596.0625,
"learning_rate": 3.4352969196570336e-05,
"loss": 0.6585,
"step": 51200
},
{
"epoch": 16.275007939028264,
"grad_norm": 24150.869140625,
"learning_rate": 3.4313274055255636e-05,
"loss": 0.6575,
"step": 51250
},
{
"epoch": 16.290885995554145,
"grad_norm": 24331.630859375,
"learning_rate": 3.427357891394094e-05,
"loss": 0.6594,
"step": 51300
},
{
"epoch": 16.306764052080027,
"grad_norm": 23451.88671875,
"learning_rate": 3.423388377262623e-05,
"loss": 0.6596,
"step": 51350
},
{
"epoch": 16.32264210860591,
"grad_norm": 23804.421875,
"learning_rate": 3.419418863131153e-05,
"loss": 0.6565,
"step": 51400
},
{
"epoch": 16.338520165131786,
"grad_norm": 24613.421875,
"learning_rate": 3.4154493489996824e-05,
"loss": 0.6608,
"step": 51450
},
{
"epoch": 16.354398221657668,
"grad_norm": 23770.623046875,
"learning_rate": 3.4114798348682124e-05,
"loss": 0.6558,
"step": 51500
},
{
"epoch": 16.37027627818355,
"grad_norm": 23497.1171875,
"learning_rate": 3.407510320736742e-05,
"loss": 0.6559,
"step": 51550
},
{
"epoch": 16.38615433470943,
"grad_norm": 23999.451171875,
"learning_rate": 3.403540806605272e-05,
"loss": 0.66,
"step": 51600
},
{
"epoch": 16.402032391235313,
"grad_norm": 23710.712890625,
"learning_rate": 3.399571292473802e-05,
"loss": 0.6605,
"step": 51650
},
{
"epoch": 16.417910447761194,
"grad_norm": 23660.166015625,
"learning_rate": 3.395601778342331e-05,
"loss": 0.6549,
"step": 51700
},
{
"epoch": 16.433788504287076,
"grad_norm": 23169.587890625,
"learning_rate": 3.3916322642108605e-05,
"loss": 0.6604,
"step": 51750
},
{
"epoch": 16.449666560812958,
"grad_norm": 24434.888671875,
"learning_rate": 3.3876627500793906e-05,
"loss": 0.6586,
"step": 51800
},
{
"epoch": 16.46554461733884,
"grad_norm": 23537.533203125,
"learning_rate": 3.38369323594792e-05,
"loss": 0.6588,
"step": 51850
},
{
"epoch": 16.48142267386472,
"grad_norm": 23304.681640625,
"learning_rate": 3.379723721816449e-05,
"loss": 0.6571,
"step": 51900
},
{
"epoch": 16.4973007303906,
"grad_norm": 23815.26171875,
"learning_rate": 3.375754207684979e-05,
"loss": 0.6593,
"step": 51950
},
{
"epoch": 16.51317878691648,
"grad_norm": 24492.578125,
"learning_rate": 3.3717846935535093e-05,
"loss": 0.6583,
"step": 52000
},
{
"epoch": 16.529056843442362,
"grad_norm": 23740.220703125,
"learning_rate": 3.3678151794220394e-05,
"loss": 0.6548,
"step": 52050
},
{
"epoch": 16.544934899968244,
"grad_norm": 23588.91015625,
"learning_rate": 3.363845665290569e-05,
"loss": 0.6556,
"step": 52100
},
{
"epoch": 16.560812956494125,
"grad_norm": 23579.50390625,
"learning_rate": 3.359876151159098e-05,
"loss": 0.6572,
"step": 52150
},
{
"epoch": 16.576691013020007,
"grad_norm": 23243.826171875,
"learning_rate": 3.355906637027628e-05,
"loss": 0.658,
"step": 52200
},
{
"epoch": 16.59256906954589,
"grad_norm": 23816.73828125,
"learning_rate": 3.3519371228961575e-05,
"loss": 0.6573,
"step": 52250
},
{
"epoch": 16.60844712607177,
"grad_norm": 23419.2265625,
"learning_rate": 3.3479676087646875e-05,
"loss": 0.6569,
"step": 52300
},
{
"epoch": 16.62432518259765,
"grad_norm": 24114.97265625,
"learning_rate": 3.3439980946332175e-05,
"loss": 0.6556,
"step": 52350
},
{
"epoch": 16.64020323912353,
"grad_norm": 23384.103515625,
"learning_rate": 3.340028580501747e-05,
"loss": 0.6595,
"step": 52400
},
{
"epoch": 16.65608129564941,
"grad_norm": 23055.017578125,
"learning_rate": 3.336059066370276e-05,
"loss": 0.6539,
"step": 52450
},
{
"epoch": 16.671959352175293,
"grad_norm": 23221.4609375,
"learning_rate": 3.332089552238806e-05,
"loss": 0.6587,
"step": 52500
},
{
"epoch": 16.687837408701174,
"grad_norm": 23152.85546875,
"learning_rate": 3.3281200381073356e-05,
"loss": 0.6583,
"step": 52550
},
{
"epoch": 16.703715465227056,
"grad_norm": 23718.134765625,
"learning_rate": 3.324150523975865e-05,
"loss": 0.6575,
"step": 52600
},
{
"epoch": 16.719593521752937,
"grad_norm": 23835.32421875,
"learning_rate": 3.320181009844395e-05,
"loss": 0.652,
"step": 52650
},
{
"epoch": 16.73547157827882,
"grad_norm": 24064.998046875,
"learning_rate": 3.316211495712925e-05,
"loss": 0.6544,
"step": 52700
},
{
"epoch": 16.7513496348047,
"grad_norm": 23508.853515625,
"learning_rate": 3.312241981581455e-05,
"loss": 0.6558,
"step": 52750
},
{
"epoch": 16.767227691330582,
"grad_norm": 23061.650390625,
"learning_rate": 3.3082724674499844e-05,
"loss": 0.6591,
"step": 52800
},
{
"epoch": 16.783105747856464,
"grad_norm": 23995.44140625,
"learning_rate": 3.304302953318514e-05,
"loss": 0.6559,
"step": 52850
},
{
"epoch": 16.798983804382345,
"grad_norm": 23488.259765625,
"learning_rate": 3.300333439187044e-05,
"loss": 0.6571,
"step": 52900
},
{
"epoch": 16.814861860908223,
"grad_norm": 24552.5625,
"learning_rate": 3.296363925055573e-05,
"loss": 0.6547,
"step": 52950
},
{
"epoch": 16.830739917434105,
"grad_norm": 23736.818359375,
"learning_rate": 3.292394410924103e-05,
"loss": 0.6578,
"step": 53000
},
{
"epoch": 16.846617973959987,
"grad_norm": 24375.546875,
"learning_rate": 3.288424896792633e-05,
"loss": 0.655,
"step": 53050
},
{
"epoch": 16.862496030485868,
"grad_norm": 23373.5,
"learning_rate": 3.2844553826611625e-05,
"loss": 0.6555,
"step": 53100
},
{
"epoch": 16.87837408701175,
"grad_norm": 24301.333984375,
"learning_rate": 3.280485868529692e-05,
"loss": 0.6574,
"step": 53150
},
{
"epoch": 16.89425214353763,
"grad_norm": 23277.462890625,
"learning_rate": 3.276516354398222e-05,
"loss": 0.6555,
"step": 53200
},
{
"epoch": 16.910130200063513,
"grad_norm": 23834.724609375,
"learning_rate": 3.272546840266751e-05,
"loss": 0.6582,
"step": 53250
},
{
"epoch": 16.926008256589395,
"grad_norm": 23567.853515625,
"learning_rate": 3.268577326135281e-05,
"loss": 0.6547,
"step": 53300
},
{
"epoch": 16.941886313115276,
"grad_norm": 23326.3671875,
"learning_rate": 3.2646078120038106e-05,
"loss": 0.6567,
"step": 53350
},
{
"epoch": 16.957764369641154,
"grad_norm": 23291.1953125,
"learning_rate": 3.260638297872341e-05,
"loss": 0.6552,
"step": 53400
},
{
"epoch": 16.973642426167036,
"grad_norm": 23493.248046875,
"learning_rate": 3.256668783740871e-05,
"loss": 0.6562,
"step": 53450
},
{
"epoch": 16.989520482692917,
"grad_norm": 23239.197265625,
"learning_rate": 3.2526992696094e-05,
"loss": 0.6562,
"step": 53500
},
{
"epoch": 17.0053985392188,
"grad_norm": 23646.5,
"learning_rate": 3.2487297554779294e-05,
"loss": 0.6553,
"step": 53550
},
{
"epoch": 17.02127659574468,
"grad_norm": 23330.103515625,
"learning_rate": 3.2447602413464594e-05,
"loss": 0.6497,
"step": 53600
},
{
"epoch": 17.037154652270562,
"grad_norm": 24091.298828125,
"learning_rate": 3.240790727214989e-05,
"loss": 0.6515,
"step": 53650
},
{
"epoch": 17.053032708796444,
"grad_norm": 24229.26171875,
"learning_rate": 3.236821213083519e-05,
"loss": 0.6518,
"step": 53700
},
{
"epoch": 17.068910765322325,
"grad_norm": 23416.974609375,
"learning_rate": 3.232851698952049e-05,
"loss": 0.6496,
"step": 53750
},
{
"epoch": 17.084788821848207,
"grad_norm": 23611.005859375,
"learning_rate": 3.228882184820578e-05,
"loss": 0.6491,
"step": 53800
},
{
"epoch": 17.10066687837409,
"grad_norm": 23050.642578125,
"learning_rate": 3.224912670689108e-05,
"loss": 0.6507,
"step": 53850
},
{
"epoch": 17.116544934899967,
"grad_norm": 23595.267578125,
"learning_rate": 3.2209431565576376e-05,
"loss": 0.6475,
"step": 53900
},
{
"epoch": 17.132422991425848,
"grad_norm": 23561.833984375,
"learning_rate": 3.216973642426167e-05,
"loss": 0.6487,
"step": 53950
},
{
"epoch": 17.14830104795173,
"grad_norm": 23513.59375,
"learning_rate": 3.213004128294697e-05,
"loss": 0.6474,
"step": 54000
},
{
"epoch": 17.16417910447761,
"grad_norm": 23998.837890625,
"learning_rate": 3.209034614163226e-05,
"loss": 0.6507,
"step": 54050
},
{
"epoch": 17.180057161003493,
"grad_norm": 23122.234375,
"learning_rate": 3.2050651000317563e-05,
"loss": 0.649,
"step": 54100
},
{
"epoch": 17.195935217529374,
"grad_norm": 23222.94921875,
"learning_rate": 3.2010955859002864e-05,
"loss": 0.6503,
"step": 54150
},
{
"epoch": 17.211813274055256,
"grad_norm": 23577.119140625,
"learning_rate": 3.197126071768816e-05,
"loss": 0.6511,
"step": 54200
},
{
"epoch": 17.227691330581138,
"grad_norm": 23782.28125,
"learning_rate": 3.193156557637345e-05,
"loss": 0.6523,
"step": 54250
},
{
"epoch": 17.24356938710702,
"grad_norm": 24278.03125,
"learning_rate": 3.189187043505875e-05,
"loss": 0.654,
"step": 54300
},
{
"epoch": 17.2594474436329,
"grad_norm": 23616.5390625,
"learning_rate": 3.1852175293744045e-05,
"loss": 0.651,
"step": 54350
},
{
"epoch": 17.27532550015878,
"grad_norm": 23676.12890625,
"learning_rate": 3.181248015242934e-05,
"loss": 0.6503,
"step": 54400
},
{
"epoch": 17.29120355668466,
"grad_norm": 23172.5859375,
"learning_rate": 3.1772785011114645e-05,
"loss": 0.6541,
"step": 54450
},
{
"epoch": 17.307081613210542,
"grad_norm": 23444.791015625,
"learning_rate": 3.173308986979994e-05,
"loss": 0.6491,
"step": 54500
},
{
"epoch": 17.322959669736424,
"grad_norm": 23441.435546875,
"learning_rate": 3.169339472848524e-05,
"loss": 0.6495,
"step": 54550
},
{
"epoch": 17.338837726262305,
"grad_norm": 23380.716796875,
"learning_rate": 3.165369958717053e-05,
"loss": 0.6504,
"step": 54600
},
{
"epoch": 17.354715782788187,
"grad_norm": 23601.03125,
"learning_rate": 3.1614004445855826e-05,
"loss": 0.6543,
"step": 54650
},
{
"epoch": 17.37059383931407,
"grad_norm": 23646.830078125,
"learning_rate": 3.1574309304541126e-05,
"loss": 0.6514,
"step": 54700
},
{
"epoch": 17.38647189583995,
"grad_norm": 23490.85546875,
"learning_rate": 3.153461416322642e-05,
"loss": 0.6513,
"step": 54750
},
{
"epoch": 17.40234995236583,
"grad_norm": 23827.6796875,
"learning_rate": 3.149491902191172e-05,
"loss": 0.6484,
"step": 54800
},
{
"epoch": 17.418228008891713,
"grad_norm": 23096.40625,
"learning_rate": 3.145522388059702e-05,
"loss": 0.6483,
"step": 54850
},
{
"epoch": 17.43410606541759,
"grad_norm": 23743.8359375,
"learning_rate": 3.1415528739282314e-05,
"loss": 0.652,
"step": 54900
},
{
"epoch": 17.449984121943473,
"grad_norm": 23075.265625,
"learning_rate": 3.137583359796761e-05,
"loss": 0.6501,
"step": 54950
},
{
"epoch": 17.465862178469354,
"grad_norm": 24035.71875,
"learning_rate": 3.133613845665291e-05,
"loss": 0.652,
"step": 55000
},
{
"epoch": 17.481740234995236,
"grad_norm": 23361.478515625,
"learning_rate": 3.12964433153382e-05,
"loss": 0.6536,
"step": 55050
},
{
"epoch": 17.497618291521118,
"grad_norm": 23745.171875,
"learning_rate": 3.12567481740235e-05,
"loss": 0.6504,
"step": 55100
},
{
"epoch": 17.513496348047,
"grad_norm": 23789.912109375,
"learning_rate": 3.12170530327088e-05,
"loss": 0.6491,
"step": 55150
},
{
"epoch": 17.52937440457288,
"grad_norm": 23335.9609375,
"learning_rate": 3.1177357891394095e-05,
"loss": 0.6503,
"step": 55200
},
{
"epoch": 17.545252461098762,
"grad_norm": 23689.12890625,
"learning_rate": 3.1137662750079396e-05,
"loss": 0.6527,
"step": 55250
},
{
"epoch": 17.561130517624644,
"grad_norm": 23400.572265625,
"learning_rate": 3.109796760876469e-05,
"loss": 0.6519,
"step": 55300
},
{
"epoch": 17.577008574150526,
"grad_norm": 23527.71875,
"learning_rate": 3.105827246744998e-05,
"loss": 0.6494,
"step": 55350
},
{
"epoch": 17.592886630676404,
"grad_norm": 23462.595703125,
"learning_rate": 3.101857732613528e-05,
"loss": 0.6507,
"step": 55400
},
{
"epoch": 17.608764687202285,
"grad_norm": 23832.5625,
"learning_rate": 3.0978882184820576e-05,
"loss": 0.648,
"step": 55450
},
{
"epoch": 17.624642743728167,
"grad_norm": 23171.41796875,
"learning_rate": 3.093918704350588e-05,
"loss": 0.6515,
"step": 55500
},
{
"epoch": 17.64052080025405,
"grad_norm": 22859.158203125,
"learning_rate": 3.089949190219118e-05,
"loss": 0.65,
"step": 55550
},
{
"epoch": 17.65639885677993,
"grad_norm": 23794.48828125,
"learning_rate": 3.085979676087647e-05,
"loss": 0.6494,
"step": 55600
},
{
"epoch": 17.67227691330581,
"grad_norm": 23437.626953125,
"learning_rate": 3.082010161956177e-05,
"loss": 0.6507,
"step": 55650
},
{
"epoch": 17.688154969831693,
"grad_norm": 23454.169921875,
"learning_rate": 3.0780406478247064e-05,
"loss": 0.6528,
"step": 55700
},
{
"epoch": 17.704033026357575,
"grad_norm": 23537.875,
"learning_rate": 3.074071133693236e-05,
"loss": 0.6508,
"step": 55750
},
{
"epoch": 17.719911082883456,
"grad_norm": 23073.7421875,
"learning_rate": 3.070101619561766e-05,
"loss": 0.6514,
"step": 55800
},
{
"epoch": 17.735789139409338,
"grad_norm": 24199.0234375,
"learning_rate": 3.066132105430295e-05,
"loss": 0.652,
"step": 55850
},
{
"epoch": 17.751667195935216,
"grad_norm": 24246.693359375,
"learning_rate": 3.062162591298825e-05,
"loss": 0.6506,
"step": 55900
},
{
"epoch": 17.767545252461098,
"grad_norm": 23131.373046875,
"learning_rate": 3.058193077167355e-05,
"loss": 0.6473,
"step": 55950
},
{
"epoch": 17.78342330898698,
"grad_norm": 23967.60546875,
"learning_rate": 3.0542235630358846e-05,
"loss": 0.649,
"step": 56000
},
{
"epoch": 17.79930136551286,
"grad_norm": 23857.62890625,
"learning_rate": 3.050254048904414e-05,
"loss": 0.6498,
"step": 56050
},
{
"epoch": 17.815179422038742,
"grad_norm": 23931.333984375,
"learning_rate": 3.046284534772944e-05,
"loss": 0.651,
"step": 56100
},
{
"epoch": 17.831057478564624,
"grad_norm": 23383.25390625,
"learning_rate": 3.0423150206414736e-05,
"loss": 0.6526,
"step": 56150
},
{
"epoch": 17.846935535090505,
"grad_norm": 23175.05078125,
"learning_rate": 3.038345506510003e-05,
"loss": 0.6494,
"step": 56200
},
{
"epoch": 17.862813591616387,
"grad_norm": 23574.662109375,
"learning_rate": 3.034375992378533e-05,
"loss": 0.6479,
"step": 56250
},
{
"epoch": 17.87869164814227,
"grad_norm": 23581.66015625,
"learning_rate": 3.0304064782470627e-05,
"loss": 0.6514,
"step": 56300
},
{
"epoch": 17.89456970466815,
"grad_norm": 23191.83203125,
"learning_rate": 3.0264369641155928e-05,
"loss": 0.649,
"step": 56350
},
{
"epoch": 17.91044776119403,
"grad_norm": 23138.642578125,
"learning_rate": 3.022467449984122e-05,
"loss": 0.6523,
"step": 56400
},
{
"epoch": 17.92632581771991,
"grad_norm": 24247.412109375,
"learning_rate": 3.0184979358526515e-05,
"loss": 0.6508,
"step": 56450
},
{
"epoch": 17.94220387424579,
"grad_norm": 23328.904296875,
"learning_rate": 3.0145284217211818e-05,
"loss": 0.6521,
"step": 56500
},
{
"epoch": 17.958081930771673,
"grad_norm": 23195.806640625,
"learning_rate": 3.0105589075897112e-05,
"loss": 0.6519,
"step": 56550
},
{
"epoch": 17.973959987297555,
"grad_norm": 24111.8359375,
"learning_rate": 3.0065893934582405e-05,
"loss": 0.6504,
"step": 56600
},
{
"epoch": 17.989838043823436,
"grad_norm": 23394.666015625,
"learning_rate": 3.0026198793267706e-05,
"loss": 0.6499,
"step": 56650
},
{
"epoch": 18.005716100349318,
"grad_norm": 24451.033203125,
"learning_rate": 2.9986503651953002e-05,
"loss": 0.6493,
"step": 56700
},
{
"epoch": 18.0215941568752,
"grad_norm": 24162.71875,
"learning_rate": 2.9946808510638296e-05,
"loss": 0.6437,
"step": 56750
},
{
"epoch": 18.03747221340108,
"grad_norm": 23387.994140625,
"learning_rate": 2.9907113369323596e-05,
"loss": 0.6487,
"step": 56800
},
{
"epoch": 18.053350269926963,
"grad_norm": 23269.083984375,
"learning_rate": 2.9867418228008893e-05,
"loss": 0.6469,
"step": 56850
},
{
"epoch": 18.06922832645284,
"grad_norm": 23347.12890625,
"learning_rate": 2.9827723086694193e-05,
"loss": 0.6428,
"step": 56900
},
{
"epoch": 18.085106382978722,
"grad_norm": 23654.724609375,
"learning_rate": 2.9788027945379487e-05,
"loss": 0.6485,
"step": 56950
},
{
"epoch": 18.100984439504604,
"grad_norm": 23451.236328125,
"learning_rate": 2.9748332804064784e-05,
"loss": 0.6458,
"step": 57000
},
{
"epoch": 18.116862496030485,
"grad_norm": 24515.484375,
"learning_rate": 2.9708637662750084e-05,
"loss": 0.6451,
"step": 57050
},
{
"epoch": 18.132740552556367,
"grad_norm": 23294.755859375,
"learning_rate": 2.9668942521435378e-05,
"loss": 0.6473,
"step": 57100
},
{
"epoch": 18.14861860908225,
"grad_norm": 24213.658203125,
"learning_rate": 2.962924738012067e-05,
"loss": 0.6435,
"step": 57150
},
{
"epoch": 18.16449666560813,
"grad_norm": 23097.380859375,
"learning_rate": 2.958955223880597e-05,
"loss": 0.6448,
"step": 57200
},
{
"epoch": 18.18037472213401,
"grad_norm": 23379.626953125,
"learning_rate": 2.954985709749127e-05,
"loss": 0.6455,
"step": 57250
},
{
"epoch": 18.196252778659893,
"grad_norm": 23119.544921875,
"learning_rate": 2.9510161956176562e-05,
"loss": 0.6435,
"step": 57300
},
{
"epoch": 18.212130835185775,
"grad_norm": 23745.896484375,
"learning_rate": 2.9470466814861862e-05,
"loss": 0.6469,
"step": 57350
},
{
"epoch": 18.228008891711653,
"grad_norm": 23996.830078125,
"learning_rate": 2.943077167354716e-05,
"loss": 0.6471,
"step": 57400
},
{
"epoch": 18.243886948237535,
"grad_norm": 23478.310546875,
"learning_rate": 2.939107653223246e-05,
"loss": 0.6435,
"step": 57450
},
{
"epoch": 18.259765004763416,
"grad_norm": 23451.203125,
"learning_rate": 2.9351381390917753e-05,
"loss": 0.6466,
"step": 57500
},
{
"epoch": 18.275643061289298,
"grad_norm": 23465.240234375,
"learning_rate": 2.931168624960305e-05,
"loss": 0.6473,
"step": 57550
},
{
"epoch": 18.29152111781518,
"grad_norm": 23575.08203125,
"learning_rate": 2.927199110828835e-05,
"loss": 0.6465,
"step": 57600
},
{
"epoch": 18.30739917434106,
"grad_norm": 23251.138671875,
"learning_rate": 2.9232295966973644e-05,
"loss": 0.6434,
"step": 57650
},
{
"epoch": 18.323277230866942,
"grad_norm": 23791.912109375,
"learning_rate": 2.919260082565894e-05,
"loss": 0.6462,
"step": 57700
},
{
"epoch": 18.339155287392824,
"grad_norm": 23962.904296875,
"learning_rate": 2.915290568434424e-05,
"loss": 0.6464,
"step": 57750
},
{
"epoch": 18.355033343918706,
"grad_norm": 23311.55078125,
"learning_rate": 2.9113210543029534e-05,
"loss": 0.6432,
"step": 57800
},
{
"epoch": 18.370911400444587,
"grad_norm": 23348.361328125,
"learning_rate": 2.9073515401714828e-05,
"loss": 0.6439,
"step": 57850
},
{
"epoch": 18.386789456970465,
"grad_norm": 23554.818359375,
"learning_rate": 2.9033820260400128e-05,
"loss": 0.6468,
"step": 57900
},
{
"epoch": 18.402667513496347,
"grad_norm": 23781.22265625,
"learning_rate": 2.8994125119085425e-05,
"loss": 0.6444,
"step": 57950
},
{
"epoch": 18.41854557002223,
"grad_norm": 22692.29296875,
"learning_rate": 2.895442997777072e-05,
"loss": 0.6442,
"step": 58000
},
{
"epoch": 18.43442362654811,
"grad_norm": 23058.107421875,
"learning_rate": 2.891473483645602e-05,
"loss": 0.6439,
"step": 58050
},
{
"epoch": 18.45030168307399,
"grad_norm": 22859.33203125,
"learning_rate": 2.8875039695141316e-05,
"loss": 0.645,
"step": 58100
},
{
"epoch": 18.466179739599873,
"grad_norm": 23191.587890625,
"learning_rate": 2.8835344553826616e-05,
"loss": 0.6449,
"step": 58150
},
{
"epoch": 18.482057796125755,
"grad_norm": 23647.953125,
"learning_rate": 2.879564941251191e-05,
"loss": 0.6466,
"step": 58200
},
{
"epoch": 18.497935852651636,
"grad_norm": 23992.3828125,
"learning_rate": 2.8755954271197207e-05,
"loss": 0.6473,
"step": 58250
},
{
"epoch": 18.513813909177518,
"grad_norm": 23166.861328125,
"learning_rate": 2.8716259129882507e-05,
"loss": 0.6451,
"step": 58300
},
{
"epoch": 18.5296919657034,
"grad_norm": 23637.072265625,
"learning_rate": 2.86765639885678e-05,
"loss": 0.6481,
"step": 58350
},
{
"epoch": 18.545570022229278,
"grad_norm": 23371.7265625,
"learning_rate": 2.8636868847253094e-05,
"loss": 0.6406,
"step": 58400
},
{
"epoch": 18.56144807875516,
"grad_norm": 23572.712890625,
"learning_rate": 2.8597173705938398e-05,
"loss": 0.6437,
"step": 58450
},
{
"epoch": 18.57732613528104,
"grad_norm": 23404.9765625,
"learning_rate": 2.855747856462369e-05,
"loss": 0.6467,
"step": 58500
},
{
"epoch": 18.593204191806922,
"grad_norm": 23365.392578125,
"learning_rate": 2.8517783423308985e-05,
"loss": 0.6464,
"step": 58550
},
{
"epoch": 18.609082248332804,
"grad_norm": 22837.333984375,
"learning_rate": 2.8478088281994285e-05,
"loss": 0.6476,
"step": 58600
},
{
"epoch": 18.624960304858686,
"grad_norm": 23265.06640625,
"learning_rate": 2.8438393140679582e-05,
"loss": 0.6445,
"step": 58650
},
{
"epoch": 18.640838361384567,
"grad_norm": 23657.7265625,
"learning_rate": 2.8398697999364882e-05,
"loss": 0.6487,
"step": 58700
},
{
"epoch": 18.65671641791045,
"grad_norm": 23775.322265625,
"learning_rate": 2.8359002858050176e-05,
"loss": 0.6461,
"step": 58750
},
{
"epoch": 18.67259447443633,
"grad_norm": 23216.8359375,
"learning_rate": 2.8319307716735472e-05,
"loss": 0.6445,
"step": 58800
},
{
"epoch": 18.68847253096221,
"grad_norm": 23683.677734375,
"learning_rate": 2.8279612575420773e-05,
"loss": 0.6464,
"step": 58850
},
{
"epoch": 18.70435058748809,
"grad_norm": 24212.357421875,
"learning_rate": 2.8239917434106066e-05,
"loss": 0.6458,
"step": 58900
},
{
"epoch": 18.72022864401397,
"grad_norm": 23845.314453125,
"learning_rate": 2.8200222292791363e-05,
"loss": 0.6434,
"step": 58950
},
{
"epoch": 18.736106700539853,
"grad_norm": 23429.07421875,
"learning_rate": 2.8160527151476663e-05,
"loss": 0.6454,
"step": 59000
},
{
"epoch": 18.751984757065735,
"grad_norm": 23693.30078125,
"learning_rate": 2.8120832010161957e-05,
"loss": 0.6449,
"step": 59050
},
{
"epoch": 18.767862813591616,
"grad_norm": 24399.693359375,
"learning_rate": 2.808113686884725e-05,
"loss": 0.6442,
"step": 59100
},
{
"epoch": 18.783740870117498,
"grad_norm": 23742.171875,
"learning_rate": 2.8041441727532554e-05,
"loss": 0.645,
"step": 59150
},
{
"epoch": 18.79961892664338,
"grad_norm": 23861.95703125,
"learning_rate": 2.8001746586217848e-05,
"loss": 0.6476,
"step": 59200
},
{
"epoch": 18.81549698316926,
"grad_norm": 23694.40625,
"learning_rate": 2.7962051444903148e-05,
"loss": 0.6453,
"step": 59250
},
{
"epoch": 18.831375039695143,
"grad_norm": 22883.59375,
"learning_rate": 2.792235630358844e-05,
"loss": 0.6441,
"step": 59300
},
{
"epoch": 18.847253096221024,
"grad_norm": 24172.53125,
"learning_rate": 2.788266116227374e-05,
"loss": 0.643,
"step": 59350
},
{
"epoch": 18.863131152746902,
"grad_norm": 23304.0859375,
"learning_rate": 2.784296602095904e-05,
"loss": 0.6468,
"step": 59400
},
{
"epoch": 18.879009209272784,
"grad_norm": 23574.00390625,
"learning_rate": 2.7803270879644332e-05,
"loss": 0.6473,
"step": 59450
},
{
"epoch": 18.894887265798666,
"grad_norm": 23528.5859375,
"learning_rate": 2.776357573832963e-05,
"loss": 0.6455,
"step": 59500
},
{
"epoch": 18.910765322324547,
"grad_norm": 23860.66015625,
"learning_rate": 2.772388059701493e-05,
"loss": 0.647,
"step": 59550
},
{
"epoch": 18.92664337885043,
"grad_norm": 23734.267578125,
"learning_rate": 2.7684185455700223e-05,
"loss": 0.6422,
"step": 59600
},
{
"epoch": 18.94252143537631,
"grad_norm": 23822.1875,
"learning_rate": 2.764449031438552e-05,
"loss": 0.643,
"step": 59650
},
{
"epoch": 18.958399491902192,
"grad_norm": 22949.701171875,
"learning_rate": 2.760479517307082e-05,
"loss": 0.6441,
"step": 59700
},
{
"epoch": 18.974277548428073,
"grad_norm": 23238.943359375,
"learning_rate": 2.7565100031756114e-05,
"loss": 0.6449,
"step": 59750
},
{
"epoch": 18.990155604953955,
"grad_norm": 23398.4140625,
"learning_rate": 2.7525404890441407e-05,
"loss": 0.6429,
"step": 59800
},
{
"epoch": 19.006033661479837,
"grad_norm": 23431.22265625,
"learning_rate": 2.7485709749126707e-05,
"loss": 0.6427,
"step": 59850
},
{
"epoch": 19.021911718005715,
"grad_norm": 23309.41796875,
"learning_rate": 2.7446014607812004e-05,
"loss": 0.6407,
"step": 59900
},
{
"epoch": 19.037789774531596,
"grad_norm": 23369.134765625,
"learning_rate": 2.7406319466497305e-05,
"loss": 0.6337,
"step": 59950
},
{
"epoch": 19.053667831057478,
"grad_norm": 23830.84375,
"learning_rate": 2.7366624325182598e-05,
"loss": 0.6405,
"step": 60000
},
{
"epoch": 19.06954588758336,
"grad_norm": 23755.240234375,
"learning_rate": 2.7326929183867895e-05,
"loss": 0.6404,
"step": 60050
},
{
"epoch": 19.08542394410924,
"grad_norm": 23902.658203125,
"learning_rate": 2.7287234042553195e-05,
"loss": 0.639,
"step": 60100
},
{
"epoch": 19.101302000635123,
"grad_norm": 23243.98828125,
"learning_rate": 2.724753890123849e-05,
"loss": 0.6352,
"step": 60150
},
{
"epoch": 19.117180057161004,
"grad_norm": 23601.951171875,
"learning_rate": 2.7207843759923786e-05,
"loss": 0.6374,
"step": 60200
},
{
"epoch": 19.133058113686886,
"grad_norm": 23469.58203125,
"learning_rate": 2.7168148618609086e-05,
"loss": 0.6391,
"step": 60250
},
{
"epoch": 19.148936170212767,
"grad_norm": 24173.52734375,
"learning_rate": 2.712845347729438e-05,
"loss": 0.6387,
"step": 60300
},
{
"epoch": 19.164814226738645,
"grad_norm": 23875.79296875,
"learning_rate": 2.7088758335979677e-05,
"loss": 0.6399,
"step": 60350
},
{
"epoch": 19.180692283264527,
"grad_norm": 23351.52734375,
"learning_rate": 2.7049063194664977e-05,
"loss": 0.6418,
"step": 60400
},
{
"epoch": 19.19657033979041,
"grad_norm": 24030.84765625,
"learning_rate": 2.700936805335027e-05,
"loss": 0.6417,
"step": 60450
},
{
"epoch": 19.21244839631629,
"grad_norm": 23502.681640625,
"learning_rate": 2.696967291203557e-05,
"loss": 0.6419,
"step": 60500
},
{
"epoch": 19.228326452842172,
"grad_norm": 23972.9609375,
"learning_rate": 2.6929977770720864e-05,
"loss": 0.6385,
"step": 60550
},
{
"epoch": 19.244204509368053,
"grad_norm": 23450.828125,
"learning_rate": 2.689028262940616e-05,
"loss": 0.6383,
"step": 60600
},
{
"epoch": 19.260082565893935,
"grad_norm": 23348.533203125,
"learning_rate": 2.685058748809146e-05,
"loss": 0.6438,
"step": 60650
},
{
"epoch": 19.275960622419817,
"grad_norm": 23530.572265625,
"learning_rate": 2.6810892346776755e-05,
"loss": 0.642,
"step": 60700
},
{
"epoch": 19.291838678945698,
"grad_norm": 23539.4375,
"learning_rate": 2.6771197205462052e-05,
"loss": 0.6403,
"step": 60750
},
{
"epoch": 19.30771673547158,
"grad_norm": 23497.1015625,
"learning_rate": 2.6731502064147352e-05,
"loss": 0.6384,
"step": 60800
},
{
"epoch": 19.323594791997458,
"grad_norm": 23695.337890625,
"learning_rate": 2.6691806922832646e-05,
"loss": 0.6428,
"step": 60850
},
{
"epoch": 19.33947284852334,
"grad_norm": 23628.791015625,
"learning_rate": 2.6652111781517942e-05,
"loss": 0.6371,
"step": 60900
},
{
"epoch": 19.35535090504922,
"grad_norm": 23643.2421875,
"learning_rate": 2.6612416640203243e-05,
"loss": 0.643,
"step": 60950
},
{
"epoch": 19.371228961575103,
"grad_norm": 23056.48828125,
"learning_rate": 2.6572721498888536e-05,
"loss": 0.6387,
"step": 61000
},
{
"epoch": 19.387107018100984,
"grad_norm": 23687.0859375,
"learning_rate": 2.6533026357573837e-05,
"loss": 0.64,
"step": 61050
},
{
"epoch": 19.402985074626866,
"grad_norm": 23476.853515625,
"learning_rate": 2.6493331216259133e-05,
"loss": 0.6406,
"step": 61100
},
{
"epoch": 19.418863131152747,
"grad_norm": 24086.623046875,
"learning_rate": 2.6453636074944427e-05,
"loss": 0.6428,
"step": 61150
},
{
"epoch": 19.43474118767863,
"grad_norm": 23353.08984375,
"learning_rate": 2.6413940933629727e-05,
"loss": 0.6435,
"step": 61200
},
{
"epoch": 19.45061924420451,
"grad_norm": 23448.341796875,
"learning_rate": 2.637424579231502e-05,
"loss": 0.6423,
"step": 61250
},
{
"epoch": 19.466497300730392,
"grad_norm": 23803.828125,
"learning_rate": 2.6334550651000318e-05,
"loss": 0.6419,
"step": 61300
},
{
"epoch": 19.48237535725627,
"grad_norm": 24309.66796875,
"learning_rate": 2.6294855509685618e-05,
"loss": 0.6434,
"step": 61350
},
{
"epoch": 19.49825341378215,
"grad_norm": 23195.96875,
"learning_rate": 2.625516036837091e-05,
"loss": 0.6378,
"step": 61400
},
{
"epoch": 19.514131470308033,
"grad_norm": 23841.748046875,
"learning_rate": 2.621546522705621e-05,
"loss": 0.6397,
"step": 61450
},
{
"epoch": 19.530009526833915,
"grad_norm": 23373.90625,
"learning_rate": 2.617577008574151e-05,
"loss": 0.6405,
"step": 61500
},
{
"epoch": 19.545887583359796,
"grad_norm": 23817.623046875,
"learning_rate": 2.6136074944426802e-05,
"loss": 0.6402,
"step": 61550
},
{
"epoch": 19.561765639885678,
"grad_norm": 23955.10546875,
"learning_rate": 2.60963798031121e-05,
"loss": 0.6394,
"step": 61600
},
{
"epoch": 19.57764369641156,
"grad_norm": 23063.484375,
"learning_rate": 2.60566846617974e-05,
"loss": 0.6426,
"step": 61650
},
{
"epoch": 19.59352175293744,
"grad_norm": 23891.98046875,
"learning_rate": 2.6016989520482693e-05,
"loss": 0.6411,
"step": 61700
},
{
"epoch": 19.609399809463323,
"grad_norm": 23992.01171875,
"learning_rate": 2.5977294379167993e-05,
"loss": 0.6414,
"step": 61750
},
{
"epoch": 19.625277865989204,
"grad_norm": 23503.697265625,
"learning_rate": 2.5937599237853287e-05,
"loss": 0.6375,
"step": 61800
},
{
"epoch": 19.641155922515082,
"grad_norm": 24320.890625,
"learning_rate": 2.5897904096538584e-05,
"loss": 0.6395,
"step": 61850
},
{
"epoch": 19.657033979040964,
"grad_norm": 23124.4375,
"learning_rate": 2.5858208955223884e-05,
"loss": 0.6425,
"step": 61900
},
{
"epoch": 19.672912035566846,
"grad_norm": 24162.05078125,
"learning_rate": 2.5818513813909177e-05,
"loss": 0.6399,
"step": 61950
},
{
"epoch": 19.688790092092727,
"grad_norm": 23305.732421875,
"learning_rate": 2.5778818672594474e-05,
"loss": 0.6375,
"step": 62000
},
{
"epoch": 19.70466814861861,
"grad_norm": 23611.771484375,
"learning_rate": 2.5739123531279775e-05,
"loss": 0.6402,
"step": 62050
},
{
"epoch": 19.72054620514449,
"grad_norm": 23453.798828125,
"learning_rate": 2.5699428389965068e-05,
"loss": 0.6365,
"step": 62100
},
{
"epoch": 19.736424261670372,
"grad_norm": 23632.29296875,
"learning_rate": 2.5659733248650365e-05,
"loss": 0.6387,
"step": 62150
},
{
"epoch": 19.752302318196254,
"grad_norm": 23645.380859375,
"learning_rate": 2.5620038107335665e-05,
"loss": 0.642,
"step": 62200
},
{
"epoch": 19.768180374722135,
"grad_norm": 23833.201171875,
"learning_rate": 2.558034296602096e-05,
"loss": 0.6401,
"step": 62250
},
{
"epoch": 19.784058431248017,
"grad_norm": 23208.59765625,
"learning_rate": 2.554064782470626e-05,
"loss": 0.6398,
"step": 62300
},
{
"epoch": 19.799936487773895,
"grad_norm": 23972.072265625,
"learning_rate": 2.5500952683391556e-05,
"loss": 0.6371,
"step": 62350
},
{
"epoch": 19.815814544299776,
"grad_norm": 23224.896484375,
"learning_rate": 2.546125754207685e-05,
"loss": 0.6391,
"step": 62400
},
{
"epoch": 19.831692600825658,
"grad_norm": 24514.93359375,
"learning_rate": 2.542156240076215e-05,
"loss": 0.6403,
"step": 62450
},
{
"epoch": 19.84757065735154,
"grad_norm": 23860.24609375,
"learning_rate": 2.5381867259447443e-05,
"loss": 0.6409,
"step": 62500
},
{
"epoch": 19.86344871387742,
"grad_norm": 23272.37890625,
"learning_rate": 2.534217211813274e-05,
"loss": 0.6391,
"step": 62550
},
{
"epoch": 19.879326770403303,
"grad_norm": 23465.796875,
"learning_rate": 2.530247697681804e-05,
"loss": 0.6403,
"step": 62600
},
{
"epoch": 19.895204826929184,
"grad_norm": 23239.646484375,
"learning_rate": 2.5262781835503334e-05,
"loss": 0.641,
"step": 62650
},
{
"epoch": 19.911082883455066,
"grad_norm": 23455.052734375,
"learning_rate": 2.522308669418863e-05,
"loss": 0.641,
"step": 62700
},
{
"epoch": 19.926960939980948,
"grad_norm": 23780.283203125,
"learning_rate": 2.518339155287393e-05,
"loss": 0.6403,
"step": 62750
},
{
"epoch": 19.94283899650683,
"grad_norm": 24348.8515625,
"learning_rate": 2.5143696411559225e-05,
"loss": 0.638,
"step": 62800
},
{
"epoch": 19.958717053032707,
"grad_norm": 23591.212890625,
"learning_rate": 2.5104001270244525e-05,
"loss": 0.6367,
"step": 62850
},
{
"epoch": 19.97459510955859,
"grad_norm": 23728.669921875,
"learning_rate": 2.5064306128929822e-05,
"loss": 0.6397,
"step": 62900
},
{
"epoch": 19.99047316608447,
"grad_norm": 23320.609375,
"learning_rate": 2.5024610987615116e-05,
"loss": 0.6411,
"step": 62950
},
{
"epoch": 20.0,
"step": 62980,
"total_flos": 2.10597197119488e+18,
"train_loss": 0.33622724912249125,
"train_runtime": 72693.7372,
"train_samples_per_second": 55.437,
"train_steps_per_second": 0.866
}
],
"logging_steps": 50,
"max_steps": 62980,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.10597197119488e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}