P4_multi_without_retrieval_2 / trainer_state.json

Model save

7bd9ea3 verified 12 months ago

249 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.0,
	"eval_steps": 500,
	"global_step": 7106,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00028145229383619476,
	"grad_norm": 1.9831818342208862,
	"learning_rate": 2.8129395218002816e-07,
	"loss": 2.1254,
	"step": 1
	},
	{
	"epoch": 0.0014072614691809737,
	"grad_norm": 1.082022786140442,
	"learning_rate": 1.4064697609001406e-06,
	"loss": 1.2669,
	"step": 5
	},
	{
	"epoch": 0.0028145229383619475,
	"grad_norm": 1.065866231918335,
	"learning_rate": 2.8129395218002813e-06,
	"loss": 1.3653,
	"step": 10
	},
	{
	"epoch": 0.004221784407542921,
	"grad_norm": 2.5712316036224365,
	"learning_rate": 4.219409282700422e-06,
	"loss": 1.53,
	"step": 15
	},
	{
	"epoch": 0.005629045876723895,
	"grad_norm": 2.248335838317871,
	"learning_rate": 5.6258790436005626e-06,
	"loss": 1.4474,
	"step": 20
	},
	{
	"epoch": 0.007036307345904869,
	"grad_norm": 1.0500571727752686,
	"learning_rate": 7.032348804500703e-06,
	"loss": 1.3372,
	"step": 25
	},
	{
	"epoch": 0.008443568815085843,
	"grad_norm": 1.867329716682434,
	"learning_rate": 8.438818565400844e-06,
	"loss": 1.2333,
	"step": 30
	},
	{
	"epoch": 0.009850830284266816,
	"grad_norm": 3.1149637699127197,
	"learning_rate": 9.845288326300985e-06,
	"loss": 1.2918,
	"step": 35
	},
	{
	"epoch": 0.01125809175344779,
	"grad_norm": 1.9895963668823242,
	"learning_rate": 1.1251758087201125e-05,
	"loss": 1.3152,
	"step": 40
	},
	{
	"epoch": 0.012665353222628765,
	"grad_norm": 2.0947887897491455,
	"learning_rate": 1.2658227848101267e-05,
	"loss": 1.3231,
	"step": 45
	},
	{
	"epoch": 0.014072614691809739,
	"grad_norm": 1.4856278896331787,
	"learning_rate": 1.4064697609001406e-05,
	"loss": 1.2376,
	"step": 50
	},
	{
	"epoch": 0.015479876160990712,
	"grad_norm": 1.2920206785202026,
	"learning_rate": 1.547116736990155e-05,
	"loss": 1.1083,
	"step": 55
	},
	{
	"epoch": 0.016887137630171686,
	"grad_norm": 1.3694531917572021,
	"learning_rate": 1.6877637130801688e-05,
	"loss": 0.9554,
	"step": 60
	},
	{
	"epoch": 0.01829439909935266,
	"grad_norm": 1.335752248764038,
	"learning_rate": 1.828410689170183e-05,
	"loss": 0.8074,
	"step": 65
	},
	{
	"epoch": 0.019701660568533633,
	"grad_norm": 0.8360928297042847,
	"learning_rate": 1.969057665260197e-05,
	"loss": 0.9003,
	"step": 70
	},
	{
	"epoch": 0.021108922037714608,
	"grad_norm": 1.4033712148666382,
	"learning_rate": 2.1097046413502112e-05,
	"loss": 1.0069,
	"step": 75
	},
	{
	"epoch": 0.02251618350689558,
	"grad_norm": 3.524489164352417,
	"learning_rate": 2.250351617440225e-05,
	"loss": 0.8655,
	"step": 80
	},
	{
	"epoch": 0.023923444976076555,
	"grad_norm": 5.065525054931641,
	"learning_rate": 2.3909985935302392e-05,
	"loss": 0.8884,
	"step": 85
	},
	{
	"epoch": 0.02533070644525753,
	"grad_norm": 1.1002086400985718,
	"learning_rate": 2.5316455696202533e-05,
	"loss": 0.7538,
	"step": 90
	},
	{
	"epoch": 0.026737967914438502,
	"grad_norm": 1.5529321432113647,
	"learning_rate": 2.672292545710267e-05,
	"loss": 0.9944,
	"step": 95
	},
	{
	"epoch": 0.028145229383619477,
	"grad_norm": 2.5230774879455566,
	"learning_rate": 2.8129395218002813e-05,
	"loss": 0.742,
	"step": 100
	},
	{
	"epoch": 0.02955249085280045,
	"grad_norm": 1.8407468795776367,
	"learning_rate": 2.9535864978902954e-05,
	"loss": 0.6106,
	"step": 105
	},
	{
	"epoch": 0.030959752321981424,
	"grad_norm": 1.8544448614120483,
	"learning_rate": 3.09423347398031e-05,
	"loss": 0.7784,
	"step": 110
	},
	{
	"epoch": 0.0323670137911624,
	"grad_norm": 1.5393428802490234,
	"learning_rate": 3.234880450070324e-05,
	"loss": 0.6225,
	"step": 115
	},
	{
	"epoch": 0.03377427526034337,
	"grad_norm": 0.9650129675865173,
	"learning_rate": 3.3755274261603375e-05,
	"loss": 0.466,
	"step": 120
	},
	{
	"epoch": 0.03518153672952434,
	"grad_norm": 1.2199194431304932,
	"learning_rate": 3.516174402250352e-05,
	"loss": 0.7885,
	"step": 125
	},
	{
	"epoch": 0.03658879819870532,
	"grad_norm": 3.1491034030914307,
	"learning_rate": 3.656821378340366e-05,
	"loss": 0.642,
	"step": 130
	},
	{
	"epoch": 0.037996059667886294,
	"grad_norm": 1.015199899673462,
	"learning_rate": 3.79746835443038e-05,
	"loss": 1.0289,
	"step": 135
	},
	{
	"epoch": 0.039403321137067265,
	"grad_norm": 1.211543321609497,
	"learning_rate": 3.938115330520394e-05,
	"loss": 0.8134,
	"step": 140
	},
	{
	"epoch": 0.040810582606248244,
	"grad_norm": 1.6816538572311401,
	"learning_rate": 4.078762306610408e-05,
	"loss": 0.8157,
	"step": 145
	},
	{
	"epoch": 0.042217844075429216,
	"grad_norm": 1.9145057201385498,
	"learning_rate": 4.2194092827004224e-05,
	"loss": 0.6091,
	"step": 150
	},
	{
	"epoch": 0.04362510554461019,
	"grad_norm": 1.168205976486206,
	"learning_rate": 4.3600562587904366e-05,
	"loss": 0.5557,
	"step": 155
	},
	{
	"epoch": 0.04503236701379116,
	"grad_norm": 0.8458957076072693,
	"learning_rate": 4.50070323488045e-05,
	"loss": 0.4024,
	"step": 160
	},
	{
	"epoch": 0.04643962848297214,
	"grad_norm": 1.442372441291809,
	"learning_rate": 4.641350210970464e-05,
	"loss": 0.8014,
	"step": 165
	},
	{
	"epoch": 0.04784688995215311,
	"grad_norm": 1.6391854286193848,
	"learning_rate": 4.7819971870604783e-05,
	"loss": 1.012,
	"step": 170
	},
	{
	"epoch": 0.04925415142133408,
	"grad_norm": 1.334926724433899,
	"learning_rate": 4.9226441631504925e-05,
	"loss": 0.6832,
	"step": 175
	},
	{
	"epoch": 0.05066141289051506,
	"grad_norm": 1.0498499870300293,
	"learning_rate": 5.0632911392405066e-05,
	"loss": 0.6351,
	"step": 180
	},
	{
	"epoch": 0.05206867435969603,
	"grad_norm": 2.0023510456085205,
	"learning_rate": 5.203938115330521e-05,
	"loss": 0.6497,
	"step": 185
	},
	{
	"epoch": 0.053475935828877004,
	"grad_norm": 1.9690536260604858,
	"learning_rate": 5.344585091420534e-05,
	"loss": 0.7836,
	"step": 190
	},
	{
	"epoch": 0.054883197298057976,
	"grad_norm": 1.4102208614349365,
	"learning_rate": 5.4852320675105484e-05,
	"loss": 0.5955,
	"step": 195
	},
	{
	"epoch": 0.056290458767238954,
	"grad_norm": 0.9214100241661072,
	"learning_rate": 5.6258790436005626e-05,
	"loss": 0.7519,
	"step": 200
	},
	{
	"epoch": 0.057697720236419926,
	"grad_norm": 1.3210060596466064,
	"learning_rate": 5.766526019690577e-05,
	"loss": 0.5468,
	"step": 205
	},
	{
	"epoch": 0.0591049817056009,
	"grad_norm": 1.723496437072754,
	"learning_rate": 5.907172995780591e-05,
	"loss": 0.4599,
	"step": 210
	},
	{
	"epoch": 0.06051224317478188,
	"grad_norm": 1.1883797645568848,
	"learning_rate": 6.047819971870605e-05,
	"loss": 0.7824,
	"step": 215
	},
	{
	"epoch": 0.06191950464396285,
	"grad_norm": 1.0189827680587769,
	"learning_rate": 6.18846694796062e-05,
	"loss": 0.5021,
	"step": 220
	},
	{
	"epoch": 0.06332676611314382,
	"grad_norm": 1.1384845972061157,
	"learning_rate": 6.329113924050633e-05,
	"loss": 0.7703,
	"step": 225
	},
	{
	"epoch": 0.0647340275823248,
	"grad_norm": 2.097339391708374,
	"learning_rate": 6.469760900140648e-05,
	"loss": 1.0998,
	"step": 230
	},
	{
	"epoch": 0.06614128905150576,
	"grad_norm": 2.55668044090271,
	"learning_rate": 6.610407876230662e-05,
	"loss": 0.5333,
	"step": 235
	},
	{
	"epoch": 0.06754855052068674,
	"grad_norm": 1.1277037858963013,
	"learning_rate": 6.751054852320675e-05,
	"loss": 0.6361,
	"step": 240
	},
	{
	"epoch": 0.06895581198986772,
	"grad_norm": 2.0660481452941895,
	"learning_rate": 6.89170182841069e-05,
	"loss": 0.7486,
	"step": 245
	},
	{
	"epoch": 0.07036307345904869,
	"grad_norm": 2.1117303371429443,
	"learning_rate": 7.032348804500703e-05,
	"loss": 0.7103,
	"step": 250
	},
	{
	"epoch": 0.07177033492822966,
	"grad_norm": 1.1796034574508667,
	"learning_rate": 7.172995780590718e-05,
	"loss": 0.6379,
	"step": 255
	},
	{
	"epoch": 0.07317759639741064,
	"grad_norm": 1.470502257347107,
	"learning_rate": 7.313642756680732e-05,
	"loss": 0.4737,
	"step": 260
	},
	{
	"epoch": 0.07458485786659161,
	"grad_norm": 1.443248987197876,
	"learning_rate": 7.454289732770746e-05,
	"loss": 0.812,
	"step": 265
	},
	{
	"epoch": 0.07599211933577259,
	"grad_norm": 3.0095481872558594,
	"learning_rate": 7.59493670886076e-05,
	"loss": 0.4456,
	"step": 270
	},
	{
	"epoch": 0.07739938080495357,
	"grad_norm": 1.157353401184082,
	"learning_rate": 7.735583684950773e-05,
	"loss": 0.524,
	"step": 275
	},
	{
	"epoch": 0.07880664227413453,
	"grad_norm": 1.1761438846588135,
	"learning_rate": 7.876230661040788e-05,
	"loss": 0.7222,
	"step": 280
	},
	{
	"epoch": 0.08021390374331551,
	"grad_norm": 0.64066082239151,
	"learning_rate": 8.016877637130802e-05,
	"loss": 0.5886,
	"step": 285
	},
	{
	"epoch": 0.08162116521249649,
	"grad_norm": 0.9376239776611328,
	"learning_rate": 8.157524613220817e-05,
	"loss": 0.6901,
	"step": 290
	},
	{
	"epoch": 0.08302842668167745,
	"grad_norm": 0.9339331388473511,
	"learning_rate": 8.29817158931083e-05,
	"loss": 0.389,
	"step": 295
	},
	{
	"epoch": 0.08443568815085843,
	"grad_norm": 1.1914637088775635,
	"learning_rate": 8.438818565400845e-05,
	"loss": 0.535,
	"step": 300
	},
	{
	"epoch": 0.0858429496200394,
	"grad_norm": 1.1882398128509521,
	"learning_rate": 8.579465541490858e-05,
	"loss": 0.3909,
	"step": 305
	},
	{
	"epoch": 0.08725021108922038,
	"grad_norm": 1.5186290740966797,
	"learning_rate": 8.720112517580873e-05,
	"loss": 0.6317,
	"step": 310
	},
	{
	"epoch": 0.08865747255840135,
	"grad_norm": 1.509944200515747,
	"learning_rate": 8.860759493670887e-05,
	"loss": 0.4739,
	"step": 315
	},
	{
	"epoch": 0.09006473402758232,
	"grad_norm": 1.4957388639450073,
	"learning_rate": 9.0014064697609e-05,
	"loss": 0.6078,
	"step": 320
	},
	{
	"epoch": 0.0914719954967633,
	"grad_norm": 1.8821747303009033,
	"learning_rate": 9.142053445850915e-05,
	"loss": 0.8152,
	"step": 325
	},
	{
	"epoch": 0.09287925696594428,
	"grad_norm": 0.9399609565734863,
	"learning_rate": 9.282700421940928e-05,
	"loss": 0.6356,
	"step": 330
	},
	{
	"epoch": 0.09428651843512524,
	"grad_norm": 1.4053034782409668,
	"learning_rate": 9.423347398030943e-05,
	"loss": 0.7405,
	"step": 335
	},
	{
	"epoch": 0.09569377990430622,
	"grad_norm": 0.9742883443832397,
	"learning_rate": 9.563994374120957e-05,
	"loss": 0.7251,
	"step": 340
	},
	{
	"epoch": 0.0971010413734872,
	"grad_norm": 3.047891616821289,
	"learning_rate": 9.704641350210972e-05,
	"loss": 0.7387,
	"step": 345
	},
	{
	"epoch": 0.09850830284266816,
	"grad_norm": 0.8324292898178101,
	"learning_rate": 9.845288326300985e-05,
	"loss": 0.584,
	"step": 350
	},
	{
	"epoch": 0.09991556431184914,
	"grad_norm": 1.0198436975479126,
	"learning_rate": 9.985935302391e-05,
	"loss": 0.4691,
	"step": 355
	},
	{
	"epoch": 0.10132282578103012,
	"grad_norm": 3.0640432834625244,
	"learning_rate": 0.00010126582278481013,
	"loss": 0.4508,
	"step": 360
	},
	{
	"epoch": 0.10273008725021109,
	"grad_norm": 0.9727720022201538,
	"learning_rate": 0.00010267229254571027,
	"loss": 0.4544,
	"step": 365
	},
	{
	"epoch": 0.10413734871939206,
	"grad_norm": 1.4771376848220825,
	"learning_rate": 0.00010407876230661042,
	"loss": 0.5085,
	"step": 370
	},
	{
	"epoch": 0.10554461018857304,
	"grad_norm": 1.5016095638275146,
	"learning_rate": 0.00010548523206751055,
	"loss": 0.5482,
	"step": 375
	},
	{
	"epoch": 0.10695187165775401,
	"grad_norm": 1.5180020332336426,
	"learning_rate": 0.00010689170182841069,
	"loss": 0.7243,
	"step": 380
	},
	{
	"epoch": 0.10835913312693499,
	"grad_norm": 1.8111554384231567,
	"learning_rate": 0.00010829817158931083,
	"loss": 0.5539,
	"step": 385
	},
	{
	"epoch": 0.10976639459611595,
	"grad_norm": 1.488231897354126,
	"learning_rate": 0.00010970464135021097,
	"loss": 0.4533,
	"step": 390
	},
	{
	"epoch": 0.11117365606529693,
	"grad_norm": 1.7389737367630005,
	"learning_rate": 0.00011111111111111112,
	"loss": 0.6554,
	"step": 395
	},
	{
	"epoch": 0.11258091753447791,
	"grad_norm": 0.9282882213592529,
	"learning_rate": 0.00011251758087201125,
	"loss": 0.5665,
	"step": 400
	},
	{
	"epoch": 0.11398817900365887,
	"grad_norm": 1.2808202505111694,
	"learning_rate": 0.0001139240506329114,
	"loss": 0.8137,
	"step": 405
	},
	{
	"epoch": 0.11539544047283985,
	"grad_norm": 1.520807147026062,
	"learning_rate": 0.00011533052039381153,
	"loss": 0.7432,
	"step": 410
	},
	{
	"epoch": 0.11680270194202083,
	"grad_norm": 1.4392223358154297,
	"learning_rate": 0.0001167369901547117,
	"loss": 0.478,
	"step": 415
	},
	{
	"epoch": 0.1182099634112018,
	"grad_norm": 0.8880683779716492,
	"learning_rate": 0.00011814345991561182,
	"loss": 0.4246,
	"step": 420
	},
	{
	"epoch": 0.11961722488038277,
	"grad_norm": 0.832594633102417,
	"learning_rate": 0.00011954992967651195,
	"loss": 0.5505,
	"step": 425
	},
	{
	"epoch": 0.12102448634956375,
	"grad_norm": 0.4944693148136139,
	"learning_rate": 0.0001209563994374121,
	"loss": 0.4342,
	"step": 430
	},
	{
	"epoch": 0.12243174781874472,
	"grad_norm": 0.8733665943145752,
	"learning_rate": 0.00012236286919831225,
	"loss": 0.5839,
	"step": 435
	},
	{
	"epoch": 0.1238390092879257,
	"grad_norm": 1.1832093000411987,
	"learning_rate": 0.0001237693389592124,
	"loss": 0.6976,
	"step": 440
	},
	{
	"epoch": 0.12524627075710668,
	"grad_norm": 1.0406477451324463,
	"learning_rate": 0.00012517580872011252,
	"loss": 0.6353,
	"step": 445
	},
	{
	"epoch": 0.12665353222628764,
	"grad_norm": 0.788364827632904,
	"learning_rate": 0.00012658227848101267,
	"loss": 0.3272,
	"step": 450
	},
	{
	"epoch": 0.1280607936954686,
	"grad_norm": 1.2941433191299438,
	"learning_rate": 0.00012798874824191281,
	"loss": 0.7372,
	"step": 455
	},
	{
	"epoch": 0.1294680551646496,
	"grad_norm": 0.9147971272468567,
	"learning_rate": 0.00012939521800281296,
	"loss": 0.5474,
	"step": 460
	},
	{
	"epoch": 0.13087531663383056,
	"grad_norm": 1.0644923448562622,
	"learning_rate": 0.00013080168776371308,
	"loss": 0.6286,
	"step": 465
	},
	{
	"epoch": 0.13228257810301153,
	"grad_norm": 0.8214511275291443,
	"learning_rate": 0.00013220815752461323,
	"loss": 0.3655,
	"step": 470
	},
	{
	"epoch": 0.13368983957219252,
	"grad_norm": 0.7348743677139282,
	"learning_rate": 0.00013361462728551338,
	"loss": 0.5278,
	"step": 475
	},
	{
	"epoch": 0.13509710104137349,
	"grad_norm": 1.0437523126602173,
	"learning_rate": 0.0001350210970464135,
	"loss": 0.4665,
	"step": 480
	},
	{
	"epoch": 0.13650436251055445,
	"grad_norm": 1.6613603830337524,
	"learning_rate": 0.00013642756680731365,
	"loss": 0.7575,
	"step": 485
	},
	{
	"epoch": 0.13791162397973544,
	"grad_norm": 1.0844550132751465,
	"learning_rate": 0.0001378340365682138,
	"loss": 0.6744,
	"step": 490
	},
	{
	"epoch": 0.1393188854489164,
	"grad_norm": 1.3651305437088013,
	"learning_rate": 0.00013924050632911395,
	"loss": 0.8377,
	"step": 495
	},
	{
	"epoch": 0.14072614691809737,
	"grad_norm": 1.256631851196289,
	"learning_rate": 0.00014064697609001407,
	"loss": 0.6523,
	"step": 500
	},
	{
	"epoch": 0.14213340838727836,
	"grad_norm": 1.7894726991653442,
	"learning_rate": 0.0001420534458509142,
	"loss": 0.5191,
	"step": 505
	},
	{
	"epoch": 0.14354066985645933,
	"grad_norm": 0.8206887245178223,
	"learning_rate": 0.00014345991561181436,
	"loss": 0.378,
	"step": 510
	},
	{
	"epoch": 0.1449479313256403,
	"grad_norm": 1.6677026748657227,
	"learning_rate": 0.00014486638537271449,
	"loss": 0.326,
	"step": 515
	},
	{
	"epoch": 0.1463551927948213,
	"grad_norm": 1.4679995775222778,
	"learning_rate": 0.00014627285513361463,
	"loss": 0.6619,
	"step": 520
	},
	{
	"epoch": 0.14776245426400225,
	"grad_norm": 0.829093337059021,
	"learning_rate": 0.00014767932489451478,
	"loss": 0.7372,
	"step": 525
	},
	{
	"epoch": 0.14916971573318322,
	"grad_norm": 1.6188422441482544,
	"learning_rate": 0.00014908579465541493,
	"loss": 0.5666,
	"step": 530
	},
	{
	"epoch": 0.1505769772023642,
	"grad_norm": 1.319091558456421,
	"learning_rate": 0.00015049226441631505,
	"loss": 0.8461,
	"step": 535
	},
	{
	"epoch": 0.15198423867154517,
	"grad_norm": 1.7154995203018188,
	"learning_rate": 0.0001518987341772152,
	"loss": 0.6463,
	"step": 540
	},
	{
	"epoch": 0.15339150014072614,
	"grad_norm": 1.4643100500106812,
	"learning_rate": 0.00015330520393811535,
	"loss": 0.6149,
	"step": 545
	},
	{
	"epoch": 0.15479876160990713,
	"grad_norm": 1.554081916809082,
	"learning_rate": 0.00015471167369901547,
	"loss": 0.7509,
	"step": 550
	},
	{
	"epoch": 0.1562060230790881,
	"grad_norm": 1.040045976638794,
	"learning_rate": 0.00015611814345991562,
	"loss": 0.6607,
	"step": 555
	},
	{
	"epoch": 0.15761328454826906,
	"grad_norm": 1.9093159437179565,
	"learning_rate": 0.00015752461322081577,
	"loss": 0.6108,
	"step": 560
	},
	{
	"epoch": 0.15902054601745005,
	"grad_norm": 0.8650393486022949,
	"learning_rate": 0.0001589310829817159,
	"loss": 0.629,
	"step": 565
	},
	{
	"epoch": 0.16042780748663102,
	"grad_norm": 1.011257529258728,
	"learning_rate": 0.00016033755274261603,
	"loss": 0.2586,
	"step": 570
	},
	{
	"epoch": 0.16183506895581198,
	"grad_norm": 0.8653711676597595,
	"learning_rate": 0.00016174402250351618,
	"loss": 0.6063,
	"step": 575
	},
	{
	"epoch": 0.16324233042499298,
	"grad_norm": 1.7408281564712524,
	"learning_rate": 0.00016315049226441633,
	"loss": 0.4728,
	"step": 580
	},
	{
	"epoch": 0.16464959189417394,
	"grad_norm": 0.7200327515602112,
	"learning_rate": 0.00016455696202531648,
	"loss": 0.6803,
	"step": 585
	},
	{
	"epoch": 0.1660568533633549,
	"grad_norm": 2.032118320465088,
	"learning_rate": 0.0001659634317862166,
	"loss": 0.6615,
	"step": 590
	},
	{
	"epoch": 0.1674641148325359,
	"grad_norm": 1.1240061521530151,
	"learning_rate": 0.00016736990154711675,
	"loss": 0.4675,
	"step": 595
	},
	{
	"epoch": 0.16887137630171686,
	"grad_norm": 0.8609156012535095,
	"learning_rate": 0.0001687763713080169,
	"loss": 0.6737,
	"step": 600
	},
	{
	"epoch": 0.17027863777089783,
	"grad_norm": 1.4271563291549683,
	"learning_rate": 0.00017018284106891702,
	"loss": 0.6479,
	"step": 605
	},
	{
	"epoch": 0.1716858992400788,
	"grad_norm": 0.8409131765365601,
	"learning_rate": 0.00017158931082981717,
	"loss": 0.5877,
	"step": 610
	},
	{
	"epoch": 0.17309316070925979,
	"grad_norm": 1.002172827720642,
	"learning_rate": 0.00017299578059071731,
	"loss": 0.5572,
	"step": 615
	},
	{
	"epoch": 0.17450042217844075,
	"grad_norm": 0.7729489207267761,
	"learning_rate": 0.00017440225035161746,
	"loss": 0.64,
	"step": 620
	},
	{
	"epoch": 0.17590768364762172,
	"grad_norm": 1.3359206914901733,
	"learning_rate": 0.00017580872011251758,
	"loss": 0.5652,
	"step": 625
	},
	{
	"epoch": 0.1773149451168027,
	"grad_norm": 2.492105722427368,
	"learning_rate": 0.00017721518987341773,
	"loss": 0.584,
	"step": 630
	},
	{
	"epoch": 0.17872220658598367,
	"grad_norm": 1.271020770072937,
	"learning_rate": 0.00017862165963431788,
	"loss": 0.4011,
	"step": 635
	},
	{
	"epoch": 0.18012946805516464,
	"grad_norm": 0.8744266629219055,
	"learning_rate": 0.000180028129395218,
	"loss": 0.616,
	"step": 640
	},
	{
	"epoch": 0.18153672952434563,
	"grad_norm": 1.2818926572799683,
	"learning_rate": 0.00018143459915611815,
	"loss": 0.463,
	"step": 645
	},
	{
	"epoch": 0.1829439909935266,
	"grad_norm": 1.3106176853179932,
	"learning_rate": 0.0001828410689170183,
	"loss": 0.4851,
	"step": 650
	},
	{
	"epoch": 0.18435125246270756,
	"grad_norm": 1.068864345550537,
	"learning_rate": 0.00018424753867791845,
	"loss": 0.6297,
	"step": 655
	},
	{
	"epoch": 0.18575851393188855,
	"grad_norm": 1.879895567893982,
	"learning_rate": 0.00018565400843881857,
	"loss": 0.6638,
	"step": 660
	},
	{
	"epoch": 0.18716577540106952,
	"grad_norm": 1.4671173095703125,
	"learning_rate": 0.00018706047819971872,
	"loss": 0.7588,
	"step": 665
	},
	{
	"epoch": 0.18857303687025048,
	"grad_norm": 1.5851764678955078,
	"learning_rate": 0.00018846694796061886,
	"loss": 0.7505,
	"step": 670
	},
	{
	"epoch": 0.18998029833943147,
	"grad_norm": 0.7149075269699097,
	"learning_rate": 0.00018987341772151899,
	"loss": 0.3806,
	"step": 675
	},
	{
	"epoch": 0.19138755980861244,
	"grad_norm": 1.049310326576233,
	"learning_rate": 0.00019127988748241913,
	"loss": 0.5908,
	"step": 680
	},
	{
	"epoch": 0.1927948212777934,
	"grad_norm": 0.950442373752594,
	"learning_rate": 0.00019268635724331928,
	"loss": 0.6755,
	"step": 685
	},
	{
	"epoch": 0.1942020827469744,
	"grad_norm": 0.9287855625152588,
	"learning_rate": 0.00019409282700421943,
	"loss": 0.5703,
	"step": 690
	},
	{
	"epoch": 0.19560934421615536,
	"grad_norm": 0.7228776216506958,
	"learning_rate": 0.00019549929676511955,
	"loss": 0.5971,
	"step": 695
	},
	{
	"epoch": 0.19701660568533633,
	"grad_norm": 1.04582941532135,
	"learning_rate": 0.0001969057665260197,
	"loss": 0.9477,
	"step": 700
	},
	{
	"epoch": 0.19842386715451732,
	"grad_norm": 1.6367225646972656,
	"learning_rate": 0.00019831223628691985,
	"loss": 0.5875,
	"step": 705
	},
	{
	"epoch": 0.19983112862369828,
	"grad_norm": 0.724415123462677,
	"learning_rate": 0.00019971870604782,
	"loss": 0.5531,
	"step": 710
	},
	{
	"epoch": 0.20123839009287925,
	"grad_norm": 1.1167938709259033,
	"learning_rate": 0.00019999980693280142,
	"loss": 0.4568,
	"step": 715
	},
	{
	"epoch": 0.20264565156206024,
	"grad_norm": 3.7291440963745117,
	"learning_rate": 0.00019999902259858484,
	"loss": 0.4796,
	"step": 720
	},
	{
	"epoch": 0.2040529130312412,
	"grad_norm": 1.0626037120819092,
	"learning_rate": 0.00019999763493537887,
	"loss": 0.5454,
	"step": 725
	},
	{
	"epoch": 0.20546017450042217,
	"grad_norm": 1.1673458814620972,
	"learning_rate": 0.00019999564395155577,
	"loss": 0.6261,
	"step": 730
	},
	{
	"epoch": 0.20686743596960316,
	"grad_norm": 1.1592299938201904,
	"learning_rate": 0.00019999304965912784,
	"loss": 0.6726,
	"step": 735
	},
	{
	"epoch": 0.20827469743878413,
	"grad_norm": 1.117803692817688,
	"learning_rate": 0.00019998985207374736,
	"loss": 0.8504,
	"step": 740
	},
	{
	"epoch": 0.2096819589079651,
	"grad_norm": 0.8449244499206543,
	"learning_rate": 0.00019998605121470645,
	"loss": 0.4394,
	"step": 745
	},
	{
	"epoch": 0.2110892203771461,
	"grad_norm": 0.9696683883666992,
	"learning_rate": 0.00019998164710493705,
	"loss": 0.3861,
	"step": 750
	},
	{
	"epoch": 0.21249648184632705,
	"grad_norm": 1.5206379890441895,
	"learning_rate": 0.00019997663977101068,
	"loss": 0.6289,
	"step": 755
	},
	{
	"epoch": 0.21390374331550802,
	"grad_norm": 1.5071372985839844,
	"learning_rate": 0.00019997102924313836,
	"loss": 0.8584,
	"step": 760
	},
	{
	"epoch": 0.215311004784689,
	"grad_norm": 0.9600889086723328,
	"learning_rate": 0.00019996481555517028,
	"loss": 0.3949,
	"step": 765
	},
	{
	"epoch": 0.21671826625386997,
	"grad_norm": 0.8249372839927673,
	"learning_rate": 0.00019995799874459585,
	"loss": 0.559,
	"step": 770
	},
	{
	"epoch": 0.21812552772305094,
	"grad_norm": 1.2509324550628662,
	"learning_rate": 0.00019995057885254333,
	"loss": 0.5327,
	"step": 775
	},
	{
	"epoch": 0.2195327891922319,
	"grad_norm": 0.8242643475532532,
	"learning_rate": 0.00019994255592377936,
	"loss": 0.4605,
	"step": 780
	},
	{
	"epoch": 0.2209400506614129,
	"grad_norm": 0.7586041688919067,
	"learning_rate": 0.00019993393000670916,
	"loss": 0.4722,
	"step": 785
	},
	{
	"epoch": 0.22234731213059386,
	"grad_norm": 1.2805287837982178,
	"learning_rate": 0.00019992470115337592,
	"loss": 0.2861,
	"step": 790
	},
	{
	"epoch": 0.22375457359977483,
	"grad_norm": 1.6375665664672852,
	"learning_rate": 0.00019991486941946048,
	"loss": 0.5846,
	"step": 795
	},
	{
	"epoch": 0.22516183506895582,
	"grad_norm": 0.8348977565765381,
	"learning_rate": 0.00019990443486428118,
	"loss": 0.4657,
	"step": 800
	},
	{
	"epoch": 0.22656909653813678,
	"grad_norm": 1.1735246181488037,
	"learning_rate": 0.0001998933975507933,
	"loss": 0.6255,
	"step": 805
	},
	{
	"epoch": 0.22797635800731775,
	"grad_norm": 1.1627134084701538,
	"learning_rate": 0.00019988175754558874,
	"loss": 0.7479,
	"step": 810
	},
	{
	"epoch": 0.22938361947649874,
	"grad_norm": 1.916646957397461,
	"learning_rate": 0.00019986951491889578,
	"loss": 0.4814,
	"step": 815
	},
	{
	"epoch": 0.2307908809456797,
	"grad_norm": 1.5607751607894897,
	"learning_rate": 0.00019985666974457847,
	"loss": 0.5807,
	"step": 820
	},
	{
	"epoch": 0.23219814241486067,
	"grad_norm": 0.759840726852417,
	"learning_rate": 0.0001998432221001362,
	"loss": 0.5299,
	"step": 825
	},
	{
	"epoch": 0.23360540388404166,
	"grad_norm": 0.8141459226608276,
	"learning_rate": 0.0001998291720667033,
	"loss": 0.584,
	"step": 830
	},
	{
	"epoch": 0.23501266535322263,
	"grad_norm": 1.113457441329956,
	"learning_rate": 0.00019981451972904854,
	"loss": 0.5733,
	"step": 835
	},
	{
	"epoch": 0.2364199268224036,
	"grad_norm": 1.1313204765319824,
	"learning_rate": 0.00019979926517557458,
	"loss": 0.6995,
	"step": 840
	},
	{
	"epoch": 0.23782718829158458,
	"grad_norm": 0.8379271626472473,
	"learning_rate": 0.00019978340849831743,
	"loss": 0.3914,
	"step": 845
	},
	{
	"epoch": 0.23923444976076555,
	"grad_norm": 0.8467435240745544,
	"learning_rate": 0.00019976694979294596,
	"loss": 0.6813,
	"step": 850
	},
	{
	"epoch": 0.24064171122994651,
	"grad_norm": 1.30973219871521,
	"learning_rate": 0.00019974988915876134,
	"loss": 0.4174,
	"step": 855
	},
	{
	"epoch": 0.2420489726991275,
	"grad_norm": 0.9715356230735779,
	"learning_rate": 0.0001997322266986963,
	"loss": 0.4208,
	"step": 860
	},
	{
	"epoch": 0.24345623416830847,
	"grad_norm": 1.0101361274719238,
	"learning_rate": 0.0001997139625193146,
	"loss": 0.602,
	"step": 865
	},
	{
	"epoch": 0.24486349563748944,
	"grad_norm": 0.9341487288475037,
	"learning_rate": 0.0001996950967308104,
	"loss": 0.3989,
	"step": 870
	},
	{
	"epoch": 0.24627075710667043,
	"grad_norm": 1.2196135520935059,
	"learning_rate": 0.00019967562944700763,
	"loss": 0.4883,
	"step": 875
	},
	{
	"epoch": 0.2476780185758514,
	"grad_norm": 1.2374253273010254,
	"learning_rate": 0.00019965556078535917,
	"loss": 0.7397,
	"step": 880
	},
	{
	"epoch": 0.24908528004503236,
	"grad_norm": 0.561997652053833,
	"learning_rate": 0.00019963489086694626,
	"loss": 0.7548,
	"step": 885
	},
	{
	"epoch": 0.25049254151421335,
	"grad_norm": 0.8023036122322083,
	"learning_rate": 0.00019961361981647775,
	"loss": 0.4486,
	"step": 890
	},
	{
	"epoch": 0.2518998029833943,
	"grad_norm": 0.9484225511550903,
	"learning_rate": 0.00019959174776228928,
	"loss": 0.4158,
	"step": 895
	},
	{
	"epoch": 0.2533070644525753,
	"grad_norm": 1.119430661201477,
	"learning_rate": 0.0001995692748363426,
	"loss": 0.7553,
	"step": 900
	},
	{
	"epoch": 0.25471432592175625,
	"grad_norm": 1.4776628017425537,
	"learning_rate": 0.0001995462011742247,
	"loss": 0.2808,
	"step": 905
	},
	{
	"epoch": 0.2561215873909372,
	"grad_norm": 1.370290756225586,
	"learning_rate": 0.00019952252691514706,
	"loss": 0.4522,
	"step": 910
	},
	{
	"epoch": 0.25752884886011823,
	"grad_norm": 1.1513909101486206,
	"learning_rate": 0.00019949825220194468,
	"loss": 0.5382,
	"step": 915
	},
	{
	"epoch": 0.2589361103292992,
	"grad_norm": 1.0892587900161743,
	"learning_rate": 0.00019947337718107547,
	"loss": 0.5407,
	"step": 920
	},
	{
	"epoch": 0.26034337179848016,
	"grad_norm": 1.1014186143875122,
	"learning_rate": 0.00019944790200261903,
	"loss": 0.5723,
	"step": 925
	},
	{
	"epoch": 0.2617506332676611,
	"grad_norm": 1.4293971061706543,
	"learning_rate": 0.000199421826820276,
	"loss": 0.7333,
	"step": 930
	},
	{
	"epoch": 0.2631578947368421,
	"grad_norm": 0.5284586548805237,
	"learning_rate": 0.00019939515179136713,
	"loss": 0.6351,
	"step": 935
	},
	{
	"epoch": 0.26456515620602306,
	"grad_norm": 0.7904505133628845,
	"learning_rate": 0.0001993678770768321,
	"loss": 0.6792,
	"step": 940
	},
	{
	"epoch": 0.2659724176752041,
	"grad_norm": 0.5654340982437134,
	"learning_rate": 0.0001993400028412288,
	"loss": 0.4223,
	"step": 945
	},
	{
	"epoch": 0.26737967914438504,
	"grad_norm": 0.9616327285766602,
	"learning_rate": 0.00019931152925273225,
	"loss": 0.4585,
	"step": 950
	},
	{
	"epoch": 0.268786940613566,
	"grad_norm": 1.3930063247680664,
	"learning_rate": 0.00019928245648313347,
	"loss": 0.7828,
	"step": 955
	},
	{
	"epoch": 0.27019420208274697,
	"grad_norm": 1.6367273330688477,
	"learning_rate": 0.00019925278470783866,
	"loss": 0.6883,
	"step": 960
	},
	{
	"epoch": 0.27160146355192794,
	"grad_norm": 0.9764294028282166,
	"learning_rate": 0.00019922251410586802,
	"loss": 0.4474,
	"step": 965
	},
	{
	"epoch": 0.2730087250211089,
	"grad_norm": 0.7450019121170044,
	"learning_rate": 0.00019919164485985463,
	"loss": 0.436,
	"step": 970
	},
	{
	"epoch": 0.2744159864902899,
	"grad_norm": 0.774627149105072,
	"learning_rate": 0.0001991601771560434,
	"loss": 0.3708,
	"step": 975
	},
	{
	"epoch": 0.2758232479594709,
	"grad_norm": 1.1829273700714111,
	"learning_rate": 0.00019912811118429,
	"loss": 0.4453,
	"step": 980
	},
	{
	"epoch": 0.27723050942865185,
	"grad_norm": 1.0340484380722046,
	"learning_rate": 0.0001990954471380596,
	"loss": 0.3123,
	"step": 985
	},
	{
	"epoch": 0.2786377708978328,
	"grad_norm": 0.6128121018409729,
	"learning_rate": 0.00019906218521442576,
	"loss": 0.3459,
	"step": 990
	},
	{
	"epoch": 0.2800450323670138,
	"grad_norm": 0.8443979024887085,
	"learning_rate": 0.00019902832561406934,
	"loss": 0.7583,
	"step": 995
	},
	{
	"epoch": 0.28145229383619474,
	"grad_norm": 1.4136847257614136,
	"learning_rate": 0.00019899386854127705,
	"loss": 0.6206,
	"step": 1000
	},
	{
	"epoch": 0.28285955530537576,
	"grad_norm": 0.7922631502151489,
	"learning_rate": 0.00019895881420394052,
	"loss": 0.5676,
	"step": 1005
	},
	{
	"epoch": 0.28426681677455673,
	"grad_norm": 1.7876763343811035,
	"learning_rate": 0.0001989231628135547,
	"loss": 0.5216,
	"step": 1010
	},
	{
	"epoch": 0.2856740782437377,
	"grad_norm": 1.3975410461425781,
	"learning_rate": 0.00019888691458521692,
	"loss": 0.5053,
	"step": 1015
	},
	{
	"epoch": 0.28708133971291866,
	"grad_norm": 1.0760260820388794,
	"learning_rate": 0.00019885006973762535,
	"loss": 0.3415,
	"step": 1020
	},
	{
	"epoch": 0.2884886011820996,
	"grad_norm": 1.2067842483520508,
	"learning_rate": 0.00019881262849307785,
	"loss": 0.4352,
	"step": 1025
	},
	{
	"epoch": 0.2898958626512806,
	"grad_norm": 0.8484588265419006,
	"learning_rate": 0.0001987745910774705,
	"loss": 0.6558,
	"step": 1030
	},
	{
	"epoch": 0.29130312412046155,
	"grad_norm": 1.2854669094085693,
	"learning_rate": 0.00019873595772029628,
	"loss": 0.5144,
	"step": 1035
	},
	{
	"epoch": 0.2927103855896426,
	"grad_norm": 0.8903659582138062,
	"learning_rate": 0.00019869672865464373,
	"loss": 0.7212,
	"step": 1040
	},
	{
	"epoch": 0.29411764705882354,
	"grad_norm": 1.1273301839828491,
	"learning_rate": 0.00019865690411719546,
	"loss": 0.5763,
	"step": 1045
	},
	{
	"epoch": 0.2955249085280045,
	"grad_norm": 1.6163692474365234,
	"learning_rate": 0.00019861648434822687,
	"loss": 0.8076,
	"step": 1050
	},
	{
	"epoch": 0.29693216999718547,
	"grad_norm": 1.0796860456466675,
	"learning_rate": 0.00019857546959160444,
	"loss": 0.8208,
	"step": 1055
	},
	{
	"epoch": 0.29833943146636643,
	"grad_norm": 0.8399056792259216,
	"learning_rate": 0.00019853386009478454,
	"loss": 0.5939,
	"step": 1060
	},
	{
	"epoch": 0.2997466929355474,
	"grad_norm": 1.2428550720214844,
	"learning_rate": 0.0001984916561088118,
	"loss": 0.2594,
	"step": 1065
	},
	{
	"epoch": 0.3011539544047284,
	"grad_norm": 2.2983717918395996,
	"learning_rate": 0.00019844885788831756,
	"loss": 0.7697,
	"step": 1070
	},
	{
	"epoch": 0.3025612158739094,
	"grad_norm": 1.0774344205856323,
	"learning_rate": 0.0001984054656915184,
	"loss": 0.6441,
	"step": 1075
	},
	{
	"epoch": 0.30396847734309035,
	"grad_norm": 0.6637004613876343,
	"learning_rate": 0.00019836147978021467,
	"loss": 0.4219,
	"step": 1080
	},
	{
	"epoch": 0.3053757388122713,
	"grad_norm": 0.9496357440948486,
	"learning_rate": 0.00019831690041978862,
	"loss": 0.6518,
	"step": 1085
	},
	{
	"epoch": 0.3067830002814523,
	"grad_norm": 1.3843315839767456,
	"learning_rate": 0.00019827172787920315,
	"loss": 0.6269,
	"step": 1090
	},
	{
	"epoch": 0.30819026175063324,
	"grad_norm": 0.9899942278862,
	"learning_rate": 0.0001982259624309999,
	"loss": 0.5791,
	"step": 1095
	},
	{
	"epoch": 0.30959752321981426,
	"grad_norm": 0.8998156785964966,
	"learning_rate": 0.00019817960435129778,
	"loss": 0.7362,
	"step": 1100
	},
	{
	"epoch": 0.31100478468899523,
	"grad_norm": 0.615544319152832,
	"learning_rate": 0.00019813265391979137,
	"loss": 0.457,
	"step": 1105
	},
	{
	"epoch": 0.3124120461581762,
	"grad_norm": 1.026685118675232,
	"learning_rate": 0.00019808511141974886,
	"loss": 0.5494,
	"step": 1110
	},
	{
	"epoch": 0.31381930762735716,
	"grad_norm": 1.0256643295288086,
	"learning_rate": 0.00019803697713801084,
	"loss": 0.3588,
	"step": 1115
	},
	{
	"epoch": 0.3152265690965381,
	"grad_norm": 0.8720577359199524,
	"learning_rate": 0.00019798825136498814,
	"loss": 0.5563,
	"step": 1120
	},
	{
	"epoch": 0.3166338305657191,
	"grad_norm": 0.8864659667015076,
	"learning_rate": 0.00019793893439466043,
	"loss": 0.3091,
	"step": 1125
	},
	{
	"epoch": 0.3180410920349001,
	"grad_norm": 1.0853145122528076,
	"learning_rate": 0.00019788902652457412,
	"loss": 0.6204,
	"step": 1130
	},
	{
	"epoch": 0.3194483535040811,
	"grad_norm": 1.6496775150299072,
	"learning_rate": 0.0001978385280558409,
	"loss": 0.4948,
	"step": 1135
	},
	{
	"epoch": 0.32085561497326204,
	"grad_norm": 1.668879508972168,
	"learning_rate": 0.00019778743929313555,
	"loss": 0.7545,
	"step": 1140
	},
	{
	"epoch": 0.322262876442443,
	"grad_norm": 0.7751437425613403,
	"learning_rate": 0.00019773576054469446,
	"loss": 0.4416,
	"step": 1145
	},
	{
	"epoch": 0.32367013791162397,
	"grad_norm": 1.3606644868850708,
	"learning_rate": 0.0001976834921223135,
	"loss": 0.4837,
	"step": 1150
	},
	{
	"epoch": 0.32507739938080493,
	"grad_norm": 0.5276009440422058,
	"learning_rate": 0.0001976306343413463,
	"loss": 0.2264,
	"step": 1155
	},
	{
	"epoch": 0.32648466084998595,
	"grad_norm": 1.034682035446167,
	"learning_rate": 0.00019757718752070239,
	"loss": 0.5388,
	"step": 1160
	},
	{
	"epoch": 0.3278919223191669,
	"grad_norm": 0.9205548763275146,
	"learning_rate": 0.00019752315198284497,
	"loss": 0.7432,
	"step": 1165
	},
	{
	"epoch": 0.3292991837883479,
	"grad_norm": 0.2892135977745056,
	"learning_rate": 0.00019746852805378932,
	"loss": 0.2681,
	"step": 1170
	},
	{
	"epoch": 0.33070644525752885,
	"grad_norm": 1.4844127893447876,
	"learning_rate": 0.0001974133160631007,
	"loss": 0.4837,
	"step": 1175
	},
	{
	"epoch": 0.3321137067267098,
	"grad_norm": 0.7771471738815308,
	"learning_rate": 0.00019735751634389226,
	"loss": 0.7133,
	"step": 1180
	},
	{
	"epoch": 0.3335209681958908,
	"grad_norm": 1.23273766040802,
	"learning_rate": 0.00019730112923282321,
	"loss": 0.789,
	"step": 1185
	},
	{
	"epoch": 0.3349282296650718,
	"grad_norm": 1.751483678817749,
	"learning_rate": 0.0001972441550700966,
	"loss": 0.7569,
	"step": 1190
	},
	{
	"epoch": 0.33633549113425276,
	"grad_norm": 0.31647899746894836,
	"learning_rate": 0.00019718659419945756,
	"loss": 0.4276,
	"step": 1195
	},
	{
	"epoch": 0.3377427526034337,
	"grad_norm": 1.3560551404953003,
	"learning_rate": 0.00019712844696819076,
	"loss": 0.4853,
	"step": 1200
	},
	{
	"epoch": 0.3391500140726147,
	"grad_norm": 1.571906328201294,
	"learning_rate": 0.00019706971372711882,
	"loss": 0.3889,
	"step": 1205
	},
	{
	"epoch": 0.34055727554179566,
	"grad_norm": 1.2469801902770996,
	"learning_rate": 0.00019701039483059981,
	"loss": 0.5063,
	"step": 1210
	},
	{
	"epoch": 0.3419645370109766,
	"grad_norm": 0.660874605178833,
	"learning_rate": 0.00019695049063652543,
	"loss": 0.4789,
	"step": 1215
	},
	{
	"epoch": 0.3433717984801576,
	"grad_norm": 0.9069953560829163,
	"learning_rate": 0.00019689000150631845,
	"loss": 0.393,
	"step": 1220
	},
	{
	"epoch": 0.3447790599493386,
	"grad_norm": 1.9359229803085327,
	"learning_rate": 0.000196828927804931,
	"loss": 0.4297,
	"step": 1225
	},
	{
	"epoch": 0.34618632141851957,
	"grad_norm": 1.063952088356018,
	"learning_rate": 0.00019676726990084195,
	"loss": 0.5455,
	"step": 1230
	},
	{
	"epoch": 0.34759358288770054,
	"grad_norm": 1.7802363634109497,
	"learning_rate": 0.000196705028166055,
	"loss": 0.5684,
	"step": 1235
	},
	{
	"epoch": 0.3490008443568815,
	"grad_norm": 1.1787841320037842,
	"learning_rate": 0.00019664220297609624,
	"loss": 0.6942,
	"step": 1240
	},
	{
	"epoch": 0.35040810582606247,
	"grad_norm": 1.146467924118042,
	"learning_rate": 0.00019657879471001195,
	"loss": 0.6188,
	"step": 1245
	},
	{
	"epoch": 0.35181536729524343,
	"grad_norm": 1.322690486907959,
	"learning_rate": 0.0001965148037503663,
	"loss": 0.5142,
	"step": 1250
	},
	{
	"epoch": 0.35322262876442445,
	"grad_norm": 0.8079725503921509,
	"learning_rate": 0.0001964502304832391,
	"loss": 0.4729,
	"step": 1255
	},
	{
	"epoch": 0.3546298902336054,
	"grad_norm": 1.8152616024017334,
	"learning_rate": 0.0001963850752982234,
	"loss": 0.7246,
	"step": 1260
	},
	{
	"epoch": 0.3560371517027864,
	"grad_norm": 1.4570809602737427,
	"learning_rate": 0.00019631933858842317,
	"loss": 0.8113,
	"step": 1265
	},
	{
	"epoch": 0.35744441317196735,
	"grad_norm": 1.1229805946350098,
	"learning_rate": 0.00019625302075045088,
	"loss": 0.5401,
	"step": 1270
	},
	{
	"epoch": 0.3588516746411483,
	"grad_norm": 0.693499743938446,
	"learning_rate": 0.00019618612218442517,
	"loss": 0.3536,
	"step": 1275
	},
	{
	"epoch": 0.3602589361103293,
	"grad_norm": 1.592119574546814,
	"learning_rate": 0.00019611864329396853,
	"loss": 0.5994,
	"step": 1280
	},
	{
	"epoch": 0.3616661975795103,
	"grad_norm": 1.087098479270935,
	"learning_rate": 0.00019605058448620452,
	"loss": 0.5211,
	"step": 1285
	},
	{
	"epoch": 0.36307345904869126,
	"grad_norm": 1.002854585647583,
	"learning_rate": 0.0001959819461717557,
	"loss": 0.6473,
	"step": 1290
	},
	{
	"epoch": 0.3644807205178722,
	"grad_norm": 1.2526451349258423,
	"learning_rate": 0.00019591272876474106,
	"loss": 0.4721,
	"step": 1295
	},
	{
	"epoch": 0.3658879819870532,
	"grad_norm": 0.9391024112701416,
	"learning_rate": 0.00019584293268277324,
	"loss": 0.5849,
	"step": 1300
	},
	{
	"epoch": 0.36729524345623416,
	"grad_norm": 1.1725986003875732,
	"learning_rate": 0.00019577255834695643,
	"loss": 0.4718,
	"step": 1305
	},
	{
	"epoch": 0.3687025049254151,
	"grad_norm": 1.1449577808380127,
	"learning_rate": 0.00019570160618188353,
	"loss": 0.5429,
	"step": 1310
	},
	{
	"epoch": 0.37010976639459614,
	"grad_norm": 1.8632793426513672,
	"learning_rate": 0.00019563007661563367,
	"loss": 0.5791,
	"step": 1315
	},
	{
	"epoch": 0.3715170278637771,
	"grad_norm": 0.6620994210243225,
	"learning_rate": 0.00019555797007976975,
	"loss": 0.4016,
	"step": 1320
	},
	{
	"epoch": 0.37292428933295807,
	"grad_norm": 1.7540533542633057,
	"learning_rate": 0.00019548528700933559,
	"loss": 0.5039,
	"step": 1325
	},
	{
	"epoch": 0.37433155080213903,
	"grad_norm": 0.9329980611801147,
	"learning_rate": 0.00019541202784285352,
	"loss": 0.403,
	"step": 1330
	},
	{
	"epoch": 0.37573881227132,
	"grad_norm": 0.4586445689201355,
	"learning_rate": 0.00019533819302232168,
	"loss": 0.3944,
	"step": 1335
	},
	{
	"epoch": 0.37714607374050096,
	"grad_norm": 1.575636863708496,
	"learning_rate": 0.00019526378299321127,
	"loss": 0.5372,
	"step": 1340
	},
	{
	"epoch": 0.378553335209682,
	"grad_norm": 1.2038066387176514,
	"learning_rate": 0.00019518879820446398,
	"loss": 0.4409,
	"step": 1345
	},
	{
	"epoch": 0.37996059667886295,
	"grad_norm": 0.9737414121627808,
	"learning_rate": 0.0001951132391084892,
	"loss": 0.7155,
	"step": 1350
	},
	{
	"epoch": 0.3813678581480439,
	"grad_norm": 1.0166410207748413,
	"learning_rate": 0.00019503710616116128,
	"loss": 0.6772,
	"step": 1355
	},
	{
	"epoch": 0.3827751196172249,
	"grad_norm": 1.1660302877426147,
	"learning_rate": 0.0001949603998218169,
	"loss": 0.7076,
	"step": 1360
	},
	{
	"epoch": 0.38418238108640584,
	"grad_norm": 0.576275110244751,
	"learning_rate": 0.0001948831205532521,
	"loss": 0.5392,
	"step": 1365
	},
	{
	"epoch": 0.3855896425555868,
	"grad_norm": 1.453596830368042,
	"learning_rate": 0.00019480526882171976,
	"loss": 0.7963,
	"step": 1370
	},
	{
	"epoch": 0.38699690402476783,
	"grad_norm": 0.7829164862632751,
	"learning_rate": 0.00019472684509692646,
	"loss": 0.3505,
	"step": 1375
	},
	{
	"epoch": 0.3884041654939488,
	"grad_norm": 0.9208312630653381,
	"learning_rate": 0.0001946478498520299,
	"loss": 0.5539,
	"step": 1380
	},
	{
	"epoch": 0.38981142696312976,
	"grad_norm": 1.0814006328582764,
	"learning_rate": 0.00019456828356363598,
	"loss": 0.3839,
	"step": 1385
	},
	{
	"epoch": 0.3912186884323107,
	"grad_norm": 1.592490553855896,
	"learning_rate": 0.00019448814671179585,
	"loss": 0.6688,
	"step": 1390
	},
	{
	"epoch": 0.3926259499014917,
	"grad_norm": 0.880333662033081,
	"learning_rate": 0.00019440743978000312,
	"loss": 0.6542,
	"step": 1395
	},
	{
	"epoch": 0.39403321137067265,
	"grad_norm": 0.516769528388977,
	"learning_rate": 0.00019432616325519084,
	"loss": 0.4571,
	"step": 1400
	},
	{
	"epoch": 0.3954404728398536,
	"grad_norm": 1.1296850442886353,
	"learning_rate": 0.00019424431762772866,
	"loss": 0.5596,
	"step": 1405
	},
	{
	"epoch": 0.39684773430903464,
	"grad_norm": 0.8967404365539551,
	"learning_rate": 0.00019416190339141976,
	"loss": 0.4144,
	"step": 1410
	},
	{
	"epoch": 0.3982549957782156,
	"grad_norm": 1.983446478843689,
	"learning_rate": 0.00019407892104349804,
	"loss": 0.2378,
	"step": 1415
	},
	{
	"epoch": 0.39966225724739657,
	"grad_norm": 0.868871808052063,
	"learning_rate": 0.00019399537108462494,
	"loss": 0.8016,
	"step": 1420
	},
	{
	"epoch": 0.40106951871657753,
	"grad_norm": 1.9956140518188477,
	"learning_rate": 0.00019391125401888644,
	"loss": 0.5541,
	"step": 1425
	},
	{
	"epoch": 0.4024767801857585,
	"grad_norm": 1.437330961227417,
	"learning_rate": 0.00019382657035379026,
	"loss": 0.299,
	"step": 1430
	},
	{
	"epoch": 0.40388404165493946,
	"grad_norm": 1.0055358409881592,
	"learning_rate": 0.00019374132060026242,
	"loss": 0.5419,
	"step": 1435
	},
	{
	"epoch": 0.4052913031241205,
	"grad_norm": 1.3034961223602295,
	"learning_rate": 0.00019365550527264443,
	"loss": 0.7488,
	"step": 1440
	},
	{
	"epoch": 0.40669856459330145,
	"grad_norm": 1.9104148149490356,
	"learning_rate": 0.0001935691248886901,
	"loss": 0.4039,
	"step": 1445
	},
	{
	"epoch": 0.4081058260624824,
	"grad_norm": 1.3824232816696167,
	"learning_rate": 0.00019348217996956245,
	"loss": 0.5864,
	"step": 1450
	},
	{
	"epoch": 0.4095130875316634,
	"grad_norm": 0.18742340803146362,
	"learning_rate": 0.00019339467103983044,
	"loss": 0.3931,
	"step": 1455
	},
	{
	"epoch": 0.41092034900084434,
	"grad_norm": 1.0197157859802246,
	"learning_rate": 0.00019330659862746603,
	"loss": 0.4888,
	"step": 1460
	},
	{
	"epoch": 0.4123276104700253,
	"grad_norm": 1.248344898223877,
	"learning_rate": 0.00019321796326384082,
	"loss": 0.4607,
	"step": 1465
	},
	{
	"epoch": 0.41373487193920633,
	"grad_norm": 0.8360584378242493,
	"learning_rate": 0.00019312876548372286,
	"loss": 0.5113,
	"step": 1470
	},
	{
	"epoch": 0.4151421334083873,
	"grad_norm": 1.7348827123641968,
	"learning_rate": 0.00019303900582527344,
	"loss": 0.511,
	"step": 1475
	},
	{
	"epoch": 0.41654939487756826,
	"grad_norm": 1.2273963689804077,
	"learning_rate": 0.00019294868483004396,
	"loss": 0.3603,
	"step": 1480
	},
	{
	"epoch": 0.4179566563467492,
	"grad_norm": 1.0628288984298706,
	"learning_rate": 0.00019285780304297245,
	"loss": 0.5377,
	"step": 1485
	},
	{
	"epoch": 0.4193639178159302,
	"grad_norm": 1.1135960817337036,
	"learning_rate": 0.00019276636101238045,
	"loss": 0.3928,
	"step": 1490
	},
	{
	"epoch": 0.42077117928511115,
	"grad_norm": 0.8842063546180725,
	"learning_rate": 0.00019267435928996962,
	"loss": 0.4252,
	"step": 1495
	},
	{
	"epoch": 0.4221784407542922,
	"grad_norm": 0.56885826587677,
	"learning_rate": 0.00019258179843081847,
	"loss": 0.5456,
	"step": 1500
	},
	{
	"epoch": 0.42358570222347314,
	"grad_norm": 0.5579463243484497,
	"learning_rate": 0.00019248867899337896,
	"loss": 0.3585,
	"step": 1505
	},
	{
	"epoch": 0.4249929636926541,
	"grad_norm": 1.1640398502349854,
	"learning_rate": 0.00019239500153947305,
	"loss": 0.5048,
	"step": 1510
	},
	{
	"epoch": 0.42640022516183507,
	"grad_norm": 0.8812012672424316,
	"learning_rate": 0.00019230076663428962,
	"loss": 0.4503,
	"step": 1515
	},
	{
	"epoch": 0.42780748663101603,
	"grad_norm": 1.1245768070220947,
	"learning_rate": 0.0001922059748463807,
	"loss": 0.364,
	"step": 1520
	},
	{
	"epoch": 0.429214748100197,
	"grad_norm": 1.0180691480636597,
	"learning_rate": 0.00019211062674765817,
	"loss": 0.4229,
	"step": 1525
	},
	{
	"epoch": 0.430622009569378,
	"grad_norm": 1.3053510189056396,
	"learning_rate": 0.0001920147229133904,
	"loss": 0.4794,
	"step": 1530
	},
	{
	"epoch": 0.432029271038559,
	"grad_norm": 0.8506336808204651,
	"learning_rate": 0.00019191826392219867,
	"loss": 0.5524,
	"step": 1535
	},
	{
	"epoch": 0.43343653250773995,
	"grad_norm": 1.0151127576828003,
	"learning_rate": 0.00019182125035605376,
	"loss": 0.5024,
	"step": 1540
	},
	{
	"epoch": 0.4348437939769209,
	"grad_norm": 1.094344973564148,
	"learning_rate": 0.00019172368280027233,
	"loss": 0.5535,
	"step": 1545
	},
	{
	"epoch": 0.4362510554461019,
	"grad_norm": 1.0190297365188599,
	"learning_rate": 0.00019162556184351348,
	"loss": 0.393,
	"step": 1550
	},
	{
	"epoch": 0.43765831691528284,
	"grad_norm": 1.502398133277893,
	"learning_rate": 0.00019152688807777516,
	"loss": 0.4018,
	"step": 1555
	},
	{
	"epoch": 0.4390655783844638,
	"grad_norm": 0.8518544435501099,
	"learning_rate": 0.00019142766209839064,
	"loss": 0.5682,
	"step": 1560
	},
	{
	"epoch": 0.4404728398536448,
	"grad_norm": 0.42057764530181885,
	"learning_rate": 0.0001913278845040249,
	"loss": 0.2624,
	"step": 1565
	},
	{
	"epoch": 0.4418801013228258,
	"grad_norm": 0.8204036951065063,
	"learning_rate": 0.00019122755589667093,
	"loss": 0.6987,
	"step": 1570
	},
	{
	"epoch": 0.44328736279200676,
	"grad_norm": 1.2145869731903076,
	"learning_rate": 0.00019112667688164626,
	"loss": 0.575,
	"step": 1575
	},
	{
	"epoch": 0.4446946242611877,
	"grad_norm": 1.5361616611480713,
	"learning_rate": 0.0001910252480675891,
	"loss": 0.466,
	"step": 1580
	},
	{
	"epoch": 0.4461018857303687,
	"grad_norm": 1.8853634595870972,
	"learning_rate": 0.00019092327006645497,
	"loss": 0.4938,
	"step": 1585
	},
	{
	"epoch": 0.44750914719954965,
	"grad_norm": 1.2990604639053345,
	"learning_rate": 0.00019082074349351268,
	"loss": 0.5759,
	"step": 1590
	},
	{
	"epoch": 0.44891640866873067,
	"grad_norm": 1.3845807313919067,
	"learning_rate": 0.0001907176689673408,
	"loss": 0.6341,
	"step": 1595
	},
	{
	"epoch": 0.45032367013791164,
	"grad_norm": 0.8449406027793884,
	"learning_rate": 0.0001906140471098239,
	"loss": 0.546,
	"step": 1600
	},
	{
	"epoch": 0.4517309316070926,
	"grad_norm": 1.2000244855880737,
	"learning_rate": 0.00019050987854614886,
	"loss": 0.5149,
	"step": 1605
	},
	{
	"epoch": 0.45313819307627357,
	"grad_norm": 0.8644974827766418,
	"learning_rate": 0.0001904051639048009,
	"loss": 0.5419,
	"step": 1610
	},
	{
	"epoch": 0.45454545454545453,
	"grad_norm": 0.4699718654155731,
	"learning_rate": 0.00019029990381756002,
	"loss": 0.3501,
	"step": 1615
	},
	{
	"epoch": 0.4559527160146355,
	"grad_norm": 0.6143896579742432,
	"learning_rate": 0.00019019409891949703,
	"loss": 0.4732,
	"step": 1620
	},
	{
	"epoch": 0.4573599774838165,
	"grad_norm": 1.4060841798782349,
	"learning_rate": 0.0001900877498489698,
	"loss": 0.6648,
	"step": 1625
	},
	{
	"epoch": 0.4587672389529975,
	"grad_norm": 1.3622968196868896,
	"learning_rate": 0.00018998085724761935,
	"loss": 0.3465,
	"step": 1630
	},
	{
	"epoch": 0.46017450042217845,
	"grad_norm": 0.6618224382400513,
	"learning_rate": 0.00018987342176036607,
	"loss": 0.5135,
	"step": 1635
	},
	{
	"epoch": 0.4615817618913594,
	"grad_norm": 1.253423810005188,
	"learning_rate": 0.0001897654440354057,
	"loss": 0.5411,
	"step": 1640
	},
	{
	"epoch": 0.4629890233605404,
	"grad_norm": 1.0359442234039307,
	"learning_rate": 0.00018965692472420554,
	"loss": 0.5266,
	"step": 1645
	},
	{
	"epoch": 0.46439628482972134,
	"grad_norm": 1.4265358448028564,
	"learning_rate": 0.00018954786448150047,
	"loss": 0.481,
	"step": 1650
	},
	{
	"epoch": 0.46580354629890236,
	"grad_norm": 0.6981240510940552,
	"learning_rate": 0.00018943826396528897,
	"loss": 0.287,
	"step": 1655
	},
	{
	"epoch": 0.4672108077680833,
	"grad_norm": 0.8274213671684265,
	"learning_rate": 0.00018932812383682917,
	"loss": 0.4081,
	"step": 1660
	},
	{
	"epoch": 0.4686180692372643,
	"grad_norm": 0.7835836410522461,
	"learning_rate": 0.0001892174447606349,
	"loss": 0.344,
	"step": 1665
	},
	{
	"epoch": 0.47002533070644525,
	"grad_norm": 1.9255175590515137,
	"learning_rate": 0.00018910622740447167,
	"loss": 0.6834,
	"step": 1670
	},
	{
	"epoch": 0.4714325921756262,
	"grad_norm": 1.7480101585388184,
	"learning_rate": 0.00018899447243935256,
	"loss": 0.4431,
	"step": 1675
	},
	{
	"epoch": 0.4728398536448072,
	"grad_norm": 0.7691779136657715,
	"learning_rate": 0.00018888218053953425,
	"loss": 0.5831,
	"step": 1680
	},
	{
	"epoch": 0.4742471151139882,
	"grad_norm": 0.6671115756034851,
	"learning_rate": 0.00018876935238251296,
	"loss": 0.3096,
	"step": 1685
	},
	{
	"epoch": 0.47565437658316917,
	"grad_norm": 0.7756052613258362,
	"learning_rate": 0.00018865598864902035,
	"loss": 0.4505,
	"step": 1690
	},
	{
	"epoch": 0.47706163805235013,
	"grad_norm": 0.7612590193748474,
	"learning_rate": 0.00018854209002301932,
	"loss": 0.5595,
	"step": 1695
	},
	{
	"epoch": 0.4784688995215311,
	"grad_norm": 0.9925332069396973,
	"learning_rate": 0.00018842765719170006,
	"loss": 0.3256,
	"step": 1700
	},
	{
	"epoch": 0.47987616099071206,
	"grad_norm": 1.4211307764053345,
	"learning_rate": 0.00018831269084547574,
	"loss": 0.3897,
	"step": 1705
	},
	{
	"epoch": 0.48128342245989303,
	"grad_norm": 0.8699591159820557,
	"learning_rate": 0.00018819719167797842,
	"loss": 0.348,
	"step": 1710
	},
	{
	"epoch": 0.48269068392907405,
	"grad_norm": 1.1962676048278809,
	"learning_rate": 0.00018808116038605493,
	"loss": 0.6022,
	"step": 1715
	},
	{
	"epoch": 0.484097945398255,
	"grad_norm": 1.0962321758270264,
	"learning_rate": 0.00018796459766976247,
	"loss": 0.4853,
	"step": 1720
	},
	{
	"epoch": 0.485505206867436,
	"grad_norm": 1.8502682447433472,
	"learning_rate": 0.00018784750423236462,
	"loss": 0.5438,
	"step": 1725
	},
	{
	"epoch": 0.48691246833661694,
	"grad_norm": 0.8780159950256348,
	"learning_rate": 0.0001877298807803269,
	"loss": 0.4728,
	"step": 1730
	},
	{
	"epoch": 0.4883197298057979,
	"grad_norm": 1.3143213987350464,
	"learning_rate": 0.00018761172802331263,
	"loss": 0.648,
	"step": 1735
	},
	{
	"epoch": 0.4897269912749789,
	"grad_norm": 1.3124626874923706,
	"learning_rate": 0.00018749304667417863,
	"loss": 0.568,
	"step": 1740
	},
	{
	"epoch": 0.49113425274415984,
	"grad_norm": 1.2247035503387451,
	"learning_rate": 0.0001873738374489709,
	"loss": 0.3325,
	"step": 1745
	},
	{
	"epoch": 0.49254151421334086,
	"grad_norm": 0.8056420683860779,
	"learning_rate": 0.00018725410106692025,
	"loss": 0.5355,
	"step": 1750
	},
	{
	"epoch": 0.4939487756825218,
	"grad_norm": 1.782456636428833,
	"learning_rate": 0.00018713383825043806,
	"loss": 0.3927,
	"step": 1755
	},
	{
	"epoch": 0.4953560371517028,
	"grad_norm": 0.9671362638473511,
	"learning_rate": 0.00018701304972511187,
	"loss": 0.4428,
	"step": 1760
	},
	{
	"epoch": 0.49676329862088375,
	"grad_norm": 0.8646135330200195,
	"learning_rate": 0.00018689173621970096,
	"loss": 0.396,
	"step": 1765
	},
	{
	"epoch": 0.4981705600900647,
	"grad_norm": 1.406186580657959,
	"learning_rate": 0.00018676989846613205,
	"loss": 0.4296,
	"step": 1770
	},
	{
	"epoch": 0.4995778215592457,
	"grad_norm": 1.2148306369781494,
	"learning_rate": 0.00018664753719949478,
	"loss": 0.3217,
	"step": 1775
	},
	{
	"epoch": 0.5009850830284267,
	"grad_norm": 2.317777395248413,
	"learning_rate": 0.00018652465315803745,
	"loss": 0.5039,
	"step": 1780
	},
	{
	"epoch": 0.5023923444976076,
	"grad_norm": 2.461662530899048,
	"learning_rate": 0.00018640124708316225,
	"loss": 0.5716,
	"step": 1785
	},
	{
	"epoch": 0.5037996059667886,
	"grad_norm": 1.3684732913970947,
	"learning_rate": 0.0001862773197194211,
	"loss": 0.3489,
	"step": 1790
	},
	{
	"epoch": 0.5052068674359697,
	"grad_norm": 0.7968658208847046,
	"learning_rate": 0.00018615287181451108,
	"loss": 0.4202,
	"step": 1795
	},
	{
	"epoch": 0.5066141289051506,
	"grad_norm": 1.1133559942245483,
	"learning_rate": 0.00018602790411926975,
	"loss": 0.4799,
	"step": 1800
	},
	{
	"epoch": 0.5080213903743316,
	"grad_norm": 1.4438867568969727,
	"learning_rate": 0.0001859024173876709,
	"loss": 0.5841,
	"step": 1805
	},
	{
	"epoch": 0.5094286518435125,
	"grad_norm": 0.5369459986686707,
	"learning_rate": 0.0001857764123768196,
	"loss": 0.4793,
	"step": 1810
	},
	{
	"epoch": 0.5108359133126935,
	"grad_norm": 0.7949886918067932,
	"learning_rate": 0.0001856498898469482,
	"loss": 0.4041,
	"step": 1815
	},
	{
	"epoch": 0.5122431747818744,
	"grad_norm": 0.5967936515808105,
	"learning_rate": 0.00018552285056141124,
	"loss": 0.3951,
	"step": 1820
	},
	{
	"epoch": 0.5136504362510554,
	"grad_norm": 0.32833540439605713,
	"learning_rate": 0.00018539529528668094,
	"loss": 0.2362,
	"step": 1825
	},
	{
	"epoch": 0.5150576977202365,
	"grad_norm": 0.7846612334251404,
	"learning_rate": 0.00018526722479234286,
	"loss": 0.4279,
	"step": 1830
	},
	{
	"epoch": 0.5164649591894174,
	"grad_norm": 1.5786385536193848,
	"learning_rate": 0.00018513863985109095,
	"loss": 0.429,
	"step": 1835
	},
	{
	"epoch": 0.5178722206585984,
	"grad_norm": 1.2571947574615479,
	"learning_rate": 0.00018500954123872303,
	"loss": 0.6325,
	"step": 1840
	},
	{
	"epoch": 0.5192794821277793,
	"grad_norm": 0.807839035987854,
	"learning_rate": 0.00018487992973413605,
	"loss": 0.3732,
	"step": 1845
	},
	{
	"epoch": 0.5206867435969603,
	"grad_norm": 0.9321346282958984,
	"learning_rate": 0.00018474980611932144,
	"loss": 0.5329,
	"step": 1850
	},
	{
	"epoch": 0.5220940050661413,
	"grad_norm": 1.1516450643539429,
	"learning_rate": 0.0001846191711793604,
	"loss": 0.553,
	"step": 1855
	},
	{
	"epoch": 0.5235012665353223,
	"grad_norm": 1.2552000284194946,
	"learning_rate": 0.000184488025702419,
	"loss": 0.5088,
	"step": 1860
	},
	{
	"epoch": 0.5249085280045033,
	"grad_norm": 0.7412288188934326,
	"learning_rate": 0.00018435637047974375,
	"loss": 0.623,
	"step": 1865
	},
	{
	"epoch": 0.5263157894736842,
	"grad_norm": 0.7325606942176819,
	"learning_rate": 0.0001842242063056565,
	"loss": 0.4663,
	"step": 1870
	},
	{
	"epoch": 0.5277230509428652,
	"grad_norm": 0.7041971683502197,
	"learning_rate": 0.0001840915339775498,
	"loss": 0.3317,
	"step": 1875
	},
	{
	"epoch": 0.5291303124120461,
	"grad_norm": 0.8097009062767029,
	"learning_rate": 0.00018395835429588215,
	"loss": 0.5374,
	"step": 1880
	},
	{
	"epoch": 0.5305375738812271,
	"grad_norm": 0.5471770763397217,
	"learning_rate": 0.000183824668064173,
	"loss": 0.6708,
	"step": 1885
	},
	{
	"epoch": 0.5319448353504082,
	"grad_norm": 0.9955052137374878,
	"learning_rate": 0.00018369047608899798,
	"loss": 0.3958,
	"step": 1890
	},
	{
	"epoch": 0.5333520968195891,
	"grad_norm": 0.980060875415802,
	"learning_rate": 0.00018355577917998414,
	"loss": 0.5356,
	"step": 1895
	},
	{
	"epoch": 0.5347593582887701,
	"grad_norm": 0.8592010736465454,
	"learning_rate": 0.00018342057814980494,
	"loss": 0.5253,
	"step": 1900
	},
	{
	"epoch": 0.536166619757951,
	"grad_norm": 0.8325905799865723,
	"learning_rate": 0.00018328487381417532,
	"loss": 0.5743,
	"step": 1905
	},
	{
	"epoch": 0.537573881227132,
	"grad_norm": 1.0972857475280762,
	"learning_rate": 0.00018314866699184687,
	"loss": 0.6613,
	"step": 1910
	},
	{
	"epoch": 0.5389811426963129,
	"grad_norm": 0.9051984548568726,
	"learning_rate": 0.00018301195850460293,
	"loss": 0.5146,
	"step": 1915
	},
	{
	"epoch": 0.5403884041654939,
	"grad_norm": 0.8490184545516968,
	"learning_rate": 0.00018287474917725343,
	"loss": 0.6052,
	"step": 1920
	},
	{
	"epoch": 0.541795665634675,
	"grad_norm": 0.9744853377342224,
	"learning_rate": 0.00018273703983763017,
	"loss": 0.556,
	"step": 1925
	},
	{
	"epoch": 0.5432029271038559,
	"grad_norm": 0.9393332600593567,
	"learning_rate": 0.0001825988313165816,
	"loss": 0.6805,
	"step": 1930
	},
	{
	"epoch": 0.5446101885730369,
	"grad_norm": 0.786738932132721,
	"learning_rate": 0.0001824601244479679,
	"loss": 0.5313,
	"step": 1935
	},
	{
	"epoch": 0.5460174500422178,
	"grad_norm": 1.7297477722167969,
	"learning_rate": 0.00018232092006865606,
	"loss": 0.6627,
	"step": 1940
	},
	{
	"epoch": 0.5474247115113988,
	"grad_norm": 0.8226016759872437,
	"learning_rate": 0.00018218121901851468,
	"loss": 0.4177,
	"step": 1945
	},
	{
	"epoch": 0.5488319729805798,
	"grad_norm": 1.1636661291122437,
	"learning_rate": 0.0001820410221404089,
	"loss": 0.5303,
	"step": 1950
	},
	{
	"epoch": 0.5502392344497608,
	"grad_norm": 1.3004634380340576,
	"learning_rate": 0.00018190033028019534,
	"loss": 0.5114,
	"step": 1955
	},
	{
	"epoch": 0.5516464959189418,
	"grad_norm": 1.512581706047058,
	"learning_rate": 0.00018175914428671716,
	"loss": 0.5918,
	"step": 1960
	},
	{
	"epoch": 0.5530537573881227,
	"grad_norm": 0.7482631206512451,
	"learning_rate": 0.0001816174650117987,
	"loss": 0.6304,
	"step": 1965
	},
	{
	"epoch": 0.5544610188573037,
	"grad_norm": 1.3120630979537964,
	"learning_rate": 0.00018147529331024044,
	"loss": 0.5008,
	"step": 1970
	},
	{
	"epoch": 0.5558682803264846,
	"grad_norm": 0.9526933431625366,
	"learning_rate": 0.00018133263003981384,
	"loss": 0.6951,
	"step": 1975
	},
	{
	"epoch": 0.5572755417956656,
	"grad_norm": 0.8142489194869995,
	"learning_rate": 0.0001811894760612562,
	"loss": 0.478,
	"step": 1980
	},
	{
	"epoch": 0.5586828032648467,
	"grad_norm": 1.5639302730560303,
	"learning_rate": 0.0001810458322382654,
	"loss": 0.6378,
	"step": 1985
	},
	{
	"epoch": 0.5600900647340276,
	"grad_norm": 0.6878836154937744,
	"learning_rate": 0.00018090169943749476,
	"loss": 0.6067,
	"step": 1990
	},
	{
	"epoch": 0.5614973262032086,
	"grad_norm": 1.1296664476394653,
	"learning_rate": 0.0001807570785285477,
	"loss": 0.6044,
	"step": 1995
	},
	{
	"epoch": 0.5629045876723895,
	"grad_norm": 0.837823748588562,
	"learning_rate": 0.00018061197038397268,
	"loss": 0.4684,
	"step": 2000
	},
	{
	"epoch": 0.5643118491415705,
	"grad_norm": 1.2144043445587158,
	"learning_rate": 0.0001804663758792577,
	"loss": 0.3649,
	"step": 2005
	},
	{
	"epoch": 0.5657191106107515,
	"grad_norm": 0.8372750878334045,
	"learning_rate": 0.00018032029589282525,
	"loss": 0.4253,
	"step": 2010
	},
	{
	"epoch": 0.5671263720799324,
	"grad_norm": 0.8684276342391968,
	"learning_rate": 0.00018017373130602683,
	"loss": 0.3992,
	"step": 2015
	},
	{
	"epoch": 0.5685336335491135,
	"grad_norm": 0.9675285816192627,
	"learning_rate": 0.0001800266830031377,
	"loss": 0.5995,
	"step": 2020
	},
	{
	"epoch": 0.5699408950182944,
	"grad_norm": 0.9824860692024231,
	"learning_rate": 0.00017987915187135157,
	"loss": 0.2531,
	"step": 2025
	},
	{
	"epoch": 0.5713481564874754,
	"grad_norm": 2.90608549118042,
	"learning_rate": 0.0001797311388007753,
	"loss": 0.6474,
	"step": 2030
	},
	{
	"epoch": 0.5727554179566563,
	"grad_norm": 0.922585666179657,
	"learning_rate": 0.00017958264468442332,
	"loss": 0.4685,
	"step": 2035
	},
	{
	"epoch": 0.5741626794258373,
	"grad_norm": 1.4679278135299683,
	"learning_rate": 0.00017943367041821243,
	"loss": 0.4786,
	"step": 2040
	},
	{
	"epoch": 0.5755699408950183,
	"grad_norm": 0.8750627040863037,
	"learning_rate": 0.00017928421690095636,
	"loss": 0.317,
	"step": 2045
	},
	{
	"epoch": 0.5769772023641992,
	"grad_norm": 1.1974796056747437,
	"learning_rate": 0.00017913428503436035,
	"loss": 0.496,
	"step": 2050
	},
	{
	"epoch": 0.5783844638333803,
	"grad_norm": 0.8931379914283752,
	"learning_rate": 0.00017898387572301563,
	"loss": 0.6886,
	"step": 2055
	},
	{
	"epoch": 0.5797917253025612,
	"grad_norm": 1.0573607683181763,
	"learning_rate": 0.00017883298987439404,
	"loss": 0.5887,
	"step": 2060
	},
	{
	"epoch": 0.5811989867717422,
	"grad_norm": 1.1087405681610107,
	"learning_rate": 0.00017868162839884254,
	"loss": 0.5817,
	"step": 2065
	},
	{
	"epoch": 0.5826062482409231,
	"grad_norm": 0.5602430701255798,
	"learning_rate": 0.00017852979220957775,
	"loss": 0.4194,
	"step": 2070
	},
	{
	"epoch": 0.5840135097101041,
	"grad_norm": 0.9328368306159973,
	"learning_rate": 0.00017837748222268037,
	"loss": 0.3816,
	"step": 2075
	},
	{
	"epoch": 0.5854207711792851,
	"grad_norm": 1.4052832126617432,
	"learning_rate": 0.00017822469935708965,
	"loss": 0.7981,
	"step": 2080
	},
	{
	"epoch": 0.5868280326484661,
	"grad_norm": 1.0276223421096802,
	"learning_rate": 0.00017807144453459793,
	"loss": 0.4105,
	"step": 2085
	},
	{
	"epoch": 0.5882352941176471,
	"grad_norm": 1.257156491279602,
	"learning_rate": 0.00017791771867984503,
	"loss": 0.5565,
	"step": 2090
	},
	{
	"epoch": 0.589642555586828,
	"grad_norm": 1.0978988409042358,
	"learning_rate": 0.00017776352272031264,
	"loss": 0.5929,
	"step": 2095
	},
	{
	"epoch": 0.591049817056009,
	"grad_norm": 0.8809897303581238,
	"learning_rate": 0.0001776088575863188,
	"loss": 0.3527,
	"step": 2100
	},
	{
	"epoch": 0.59245707852519,
	"grad_norm": 0.6997563242912292,
	"learning_rate": 0.00017745372421101223,
	"loss": 0.5211,
	"step": 2105
	},
	{
	"epoch": 0.5938643399943709,
	"grad_norm": 0.9955636262893677,
	"learning_rate": 0.00017729812353036668,
	"loss": 0.5267,
	"step": 2110
	},
	{
	"epoch": 0.595271601463552,
	"grad_norm": 0.8788183927536011,
	"learning_rate": 0.00017714205648317535,
	"loss": 0.5372,
	"step": 2115
	},
	{
	"epoch": 0.5966788629327329,
	"grad_norm": 1.0072330236434937,
	"learning_rate": 0.00017698552401104517,
	"loss": 0.5234,
	"step": 2120
	},
	{
	"epoch": 0.5980861244019139,
	"grad_norm": 1.6254470348358154,
	"learning_rate": 0.00017682852705839115,
	"loss": 0.4621,
	"step": 2125
	},
	{
	"epoch": 0.5994933858710948,
	"grad_norm": 1.0389853715896606,
	"learning_rate": 0.00017667106657243072,
	"loss": 0.5439,
	"step": 2130
	},
	{
	"epoch": 0.6009006473402758,
	"grad_norm": 0.9769371151924133,
	"learning_rate": 0.00017651314350317787,
	"loss": 0.6171,
	"step": 2135
	},
	{
	"epoch": 0.6023079088094568,
	"grad_norm": 1.7502343654632568,
	"learning_rate": 0.0001763547588034376,
	"loss": 0.612,
	"step": 2140
	},
	{
	"epoch": 0.6037151702786377,
	"grad_norm": 1.1023430824279785,
	"learning_rate": 0.00017619591342880005,
	"loss": 0.4228,
	"step": 2145
	},
	{
	"epoch": 0.6051224317478188,
	"grad_norm": 2.0511550903320312,
	"learning_rate": 0.00017603660833763476,
	"loss": 0.3462,
	"step": 2150
	},
	{
	"epoch": 0.6065296932169997,
	"grad_norm": 0.7986024022102356,
	"learning_rate": 0.00017587684449108497,
	"loss": 0.4616,
	"step": 2155
	},
	{
	"epoch": 0.6079369546861807,
	"grad_norm": 0.7450430989265442,
	"learning_rate": 0.00017571662285306166,
	"loss": 0.5481,
	"step": 2160
	},
	{
	"epoch": 0.6093442161553617,
	"grad_norm": 1.1748677492141724,
	"learning_rate": 0.00017555594439023787,
	"loss": 0.5419,
	"step": 2165
	},
	{
	"epoch": 0.6107514776245426,
	"grad_norm": 0.7183251976966858,
	"learning_rate": 0.0001753948100720429,
	"loss": 0.4122,
	"step": 2170
	},
	{
	"epoch": 0.6121587390937236,
	"grad_norm": 0.7296462655067444,
	"learning_rate": 0.00017523322087065614,
	"loss": 0.3651,
	"step": 2175
	},
	{
	"epoch": 0.6135660005629046,
	"grad_norm": 0.5904517769813538,
	"learning_rate": 0.00017507117776100178,
	"loss": 0.3728,
	"step": 2180
	},
	{
	"epoch": 0.6149732620320856,
	"grad_norm": 1.5718715190887451,
	"learning_rate": 0.00017490868172074232,
	"loss": 0.4729,
	"step": 2185
	},
	{
	"epoch": 0.6163805235012665,
	"grad_norm": 1.053885579109192,
	"learning_rate": 0.00017474573373027315,
	"loss": 0.4341,
	"step": 2190
	},
	{
	"epoch": 0.6177877849704475,
	"grad_norm": 0.723726212978363,
	"learning_rate": 0.00017458233477271628,
	"loss": 0.4755,
	"step": 2195
	},
	{
	"epoch": 0.6191950464396285,
	"grad_norm": 1.133907437324524,
	"learning_rate": 0.00017441848583391463,
	"loss": 0.7399,
	"step": 2200
	},
	{
	"epoch": 0.6206023079088094,
	"grad_norm": 0.5922422409057617,
	"learning_rate": 0.00017425418790242606,
	"loss": 0.4381,
	"step": 2205
	},
	{
	"epoch": 0.6220095693779905,
	"grad_norm": 0.534817636013031,
	"learning_rate": 0.0001740894419695172,
	"loss": 0.4668,
	"step": 2210
	},
	{
	"epoch": 0.6234168308471714,
	"grad_norm": 0.5950006246566772,
	"learning_rate": 0.00017392424902915786,
	"loss": 0.3497,
	"step": 2215
	},
	{
	"epoch": 0.6248240923163524,
	"grad_norm": 3.878748655319214,
	"learning_rate": 0.00017375861007801465,
	"loss": 0.2247,
	"step": 2220
	},
	{
	"epoch": 0.6262313537855334,
	"grad_norm": 1.3402066230773926,
	"learning_rate": 0.00017359252611544505,
	"loss": 0.3214,
	"step": 2225
	},
	{
	"epoch": 0.6276386152547143,
	"grad_norm": 1.3445652723312378,
	"learning_rate": 0.0001734259981434917,
	"loss": 0.4757,
	"step": 2230
	},
	{
	"epoch": 0.6290458767238953,
	"grad_norm": 0.801052987575531,
	"learning_rate": 0.00017325902716687578,
	"loss": 0.542,
	"step": 2235
	},
	{
	"epoch": 0.6304531381930762,
	"grad_norm": 0.6313127279281616,
	"learning_rate": 0.0001730916141929916,
	"loss": 0.6026,
	"step": 2240
	},
	{
	"epoch": 0.6318603996622573,
	"grad_norm": 0.7048347592353821,
	"learning_rate": 0.00017292376023189996,
	"loss": 0.4769,
	"step": 2245
	},
	{
	"epoch": 0.6332676611314382,
	"grad_norm": 1.3377580642700195,
	"learning_rate": 0.00017275546629632235,
	"loss": 0.3727,
	"step": 2250
	},
	{
	"epoch": 0.6346749226006192,
	"grad_norm": 1.3854931592941284,
	"learning_rate": 0.00017258673340163485,
	"loss": 0.4537,
	"step": 2255
	},
	{
	"epoch": 0.6360821840698002,
	"grad_norm": 1.5850138664245605,
	"learning_rate": 0.00017241756256586183,
	"loss": 0.5933,
	"step": 2260
	},
	{
	"epoch": 0.6374894455389811,
	"grad_norm": 1.3591883182525635,
	"learning_rate": 0.00017224795480967,
	"loss": 0.3786,
	"step": 2265
	},
	{
	"epoch": 0.6388967070081621,
	"grad_norm": 0.685483992099762,
	"learning_rate": 0.00017207791115636206,
	"loss": 0.3562,
	"step": 2270
	},
	{
	"epoch": 0.640303968477343,
	"grad_norm": 1.1758111715316772,
	"learning_rate": 0.00017190743263187076,
	"loss": 0.3506,
	"step": 2275
	},
	{
	"epoch": 0.6417112299465241,
	"grad_norm": 0.9146699905395508,
	"learning_rate": 0.00017173652026475247,
	"loss": 0.4753,
	"step": 2280
	},
	{
	"epoch": 0.643118491415705,
	"grad_norm": 0.6895302534103394,
	"learning_rate": 0.00017156517508618116,
	"loss": 0.2637,
	"step": 2285
	},
	{
	"epoch": 0.644525752884886,
	"grad_norm": 1.011983036994934,
	"learning_rate": 0.00017139339812994204,
	"loss": 0.551,
	"step": 2290
	},
	{
	"epoch": 0.645933014354067,
	"grad_norm": 1.5470740795135498,
	"learning_rate": 0.0001712211904324254,
	"loss": 0.6397,
	"step": 2295
	},
	{
	"epoch": 0.6473402758232479,
	"grad_norm": 0.8334661722183228,
	"learning_rate": 0.0001710485530326204,
	"loss": 0.3297,
	"step": 2300
	},
	{
	"epoch": 0.648747537292429,
	"grad_norm": 1.3184936046600342,
	"learning_rate": 0.00017087548697210868,
	"loss": 0.2933,
	"step": 2305
	},
	{
	"epoch": 0.6501547987616099,
	"grad_norm": 0.6180691719055176,
	"learning_rate": 0.00017070199329505815,
	"loss": 0.316,
	"step": 2310
	},
	{
	"epoch": 0.6515620602307909,
	"grad_norm": 1.5314627885818481,
	"learning_rate": 0.00017052807304821673,
	"loss": 0.4908,
	"step": 2315
	},
	{
	"epoch": 0.6529693216999719,
	"grad_norm": 0.2867351472377777,
	"learning_rate": 0.0001703537272809059,
	"loss": 0.4078,
	"step": 2320
	},
	{
	"epoch": 0.6543765831691528,
	"grad_norm": 1.513857126235962,
	"learning_rate": 0.00017017895704501447,
	"loss": 0.5121,
	"step": 2325
	},
	{
	"epoch": 0.6557838446383338,
	"grad_norm": 0.7989262938499451,
	"learning_rate": 0.00017000376339499233,
	"loss": 0.4578,
	"step": 2330
	},
	{
	"epoch": 0.6571911061075147,
	"grad_norm": 1.8081159591674805,
	"learning_rate": 0.00016982814738784386,
	"loss": 0.3809,
	"step": 2335
	},
	{
	"epoch": 0.6585983675766958,
	"grad_norm": 1.2163859605789185,
	"learning_rate": 0.0001696521100831216,
	"loss": 0.3293,
	"step": 2340
	},
	{
	"epoch": 0.6600056290458767,
	"grad_norm": 1.5051732063293457,
	"learning_rate": 0.00016947565254292016,
	"loss": 0.33,
	"step": 2345
	},
	{
	"epoch": 0.6614128905150577,
	"grad_norm": 0.6793294548988342,
	"learning_rate": 0.00016929877583186936,
	"loss": 0.5292,
	"step": 2350
	},
	{
	"epoch": 0.6628201519842387,
	"grad_norm": 1.8864996433258057,
	"learning_rate": 0.00016912148101712814,
	"loss": 0.1853,
	"step": 2355
	},
	{
	"epoch": 0.6642274134534196,
	"grad_norm": 1.2697969675064087,
	"learning_rate": 0.00016894376916837795,
	"loss": 0.4886,
	"step": 2360
	},
	{
	"epoch": 0.6656346749226006,
	"grad_norm": 1.4264556169509888,
	"learning_rate": 0.00016876564135781638,
	"loss": 0.5061,
	"step": 2365
	},
	{
	"epoch": 0.6670419363917816,
	"grad_norm": 0.5291624665260315,
	"learning_rate": 0.00016858709866015065,
	"loss": 0.4241,
	"step": 2370
	},
	{
	"epoch": 0.6684491978609626,
	"grad_norm": 1.5842996835708618,
	"learning_rate": 0.00016840814215259112,
	"loss": 0.4321,
	"step": 2375
	},
	{
	"epoch": 0.6698564593301436,
	"grad_norm": 0.7339175939559937,
	"learning_rate": 0.0001682287729148449,
	"loss": 0.4975,
	"step": 2380
	},
	{
	"epoch": 0.6712637207993245,
	"grad_norm": 0.6193541884422302,
	"learning_rate": 0.00016804899202910907,
	"loss": 0.1977,
	"step": 2385
	},
	{
	"epoch": 0.6726709822685055,
	"grad_norm": 1.8930505514144897,
	"learning_rate": 0.00016786880058006453,
	"loss": 0.6117,
	"step": 2390
	},
	{
	"epoch": 0.6740782437376864,
	"grad_norm": 1.268921971321106,
	"learning_rate": 0.0001676881996548691,
	"loss": 0.5449,
	"step": 2395
	},
	{
	"epoch": 0.6754855052068675,
	"grad_norm": 1.5368669033050537,
	"learning_rate": 0.00016750719034315121,
	"loss": 0.4734,
	"step": 2400
	},
	{
	"epoch": 0.6768927666760484,
	"grad_norm": 0.8705158233642578,
	"learning_rate": 0.00016732577373700314,
	"loss": 0.4644,
	"step": 2405
	},
	{
	"epoch": 0.6783000281452294,
	"grad_norm": 0.3128531873226166,
	"learning_rate": 0.00016714395093097458,
	"loss": 0.4438,
	"step": 2410
	},
	{
	"epoch": 0.6797072896144104,
	"grad_norm": 1.795952558517456,
	"learning_rate": 0.00016696172302206597,
	"loss": 0.463,
	"step": 2415
	},
	{
	"epoch": 0.6811145510835913,
	"grad_norm": 0.8031005263328552,
	"learning_rate": 0.00016677909110972183,
	"loss": 0.727,
	"step": 2420
	},
	{
	"epoch": 0.6825218125527723,
	"grad_norm": 1.083425760269165,
	"learning_rate": 0.00016659605629582418,
	"loss": 0.6498,
	"step": 2425
	},
	{
	"epoch": 0.6839290740219532,
	"grad_norm": 0.9262056350708008,
	"learning_rate": 0.00016641261968468598,
	"loss": 0.3122,
	"step": 2430
	},
	{
	"epoch": 0.6853363354911343,
	"grad_norm": 0.27757611870765686,
	"learning_rate": 0.00016622878238304424,
	"loss": 0.3477,
	"step": 2435
	},
	{
	"epoch": 0.6867435969603152,
	"grad_norm": 0.6037611365318298,
	"learning_rate": 0.00016604454550005356,
	"loss": 0.2896,
	"step": 2440
	},
	{
	"epoch": 0.6881508584294962,
	"grad_norm": 0.7902546525001526,
	"learning_rate": 0.00016585991014727932,
	"loss": 0.6687,
	"step": 2445
	},
	{
	"epoch": 0.6895581198986772,
	"grad_norm": 0.8998187184333801,
	"learning_rate": 0.000165674877438691,
	"loss": 0.5168,
	"step": 2450
	},
	{
	"epoch": 0.6909653813678581,
	"grad_norm": 0.9715900421142578,
	"learning_rate": 0.0001654894484906555,
	"loss": 0.6263,
	"step": 2455
	},
	{
	"epoch": 0.6923726428370391,
	"grad_norm": 1.390411138534546,
	"learning_rate": 0.00016530362442193037,
	"loss": 0.4905,
	"step": 2460
	},
	{
	"epoch": 0.69377990430622,
	"grad_norm": 0.8985224366188049,
	"learning_rate": 0.00016511740635365705,
	"loss": 0.5525,
	"step": 2465
	},
	{
	"epoch": 0.6951871657754011,
	"grad_norm": 0.8099625110626221,
	"learning_rate": 0.00016493079540935406,
	"loss": 0.3906,
	"step": 2470
	},
	{
	"epoch": 0.6965944272445821,
	"grad_norm": 1.9844683408737183,
	"learning_rate": 0.00016474379271491033,
	"loss": 0.5456,
	"step": 2475
	},
	{
	"epoch": 0.698001688713763,
	"grad_norm": 1.053562045097351,
	"learning_rate": 0.00016455639939857842,
	"loss": 0.2934,
	"step": 2480
	},
	{
	"epoch": 0.699408950182944,
	"grad_norm": 1.4200698137283325,
	"learning_rate": 0.00016436861659096752,
	"loss": 0.6771,
	"step": 2485
	},
	{
	"epoch": 0.7008162116521249,
	"grad_norm": 0.7813885807991028,
	"learning_rate": 0.00016418044542503685,
	"loss": 0.357,
	"step": 2490
	},
	{
	"epoch": 0.702223473121306,
	"grad_norm": 1.131839632987976,
	"learning_rate": 0.00016399188703608867,
	"loss": 0.528,
	"step": 2495
	},
	{
	"epoch": 0.7036307345904869,
	"grad_norm": 0.7668808698654175,
	"learning_rate": 0.00016380294256176155,
	"loss": 0.4434,
	"step": 2500
	},
	{
	"epoch": 0.7050379960596679,
	"grad_norm": 2.0037477016448975,
	"learning_rate": 0.00016361361314202343,
	"loss": 0.5884,
	"step": 2505
	},
	{
	"epoch": 0.7064452575288489,
	"grad_norm": 0.726494550704956,
	"learning_rate": 0.0001634238999191647,
	"loss": 0.4555,
	"step": 2510
	},
	{
	"epoch": 0.7078525189980298,
	"grad_norm": 0.5868455171585083,
	"learning_rate": 0.0001632338040377915,
	"loss": 0.4513,
	"step": 2515
	},
	{
	"epoch": 0.7092597804672108,
	"grad_norm": 0.8666847348213196,
	"learning_rate": 0.00016304332664481848,
	"loss": 0.7028,
	"step": 2520
	},
	{
	"epoch": 0.7106670419363917,
	"grad_norm": 1.0513399839401245,
	"learning_rate": 0.00016285246888946234,
	"loss": 0.3972,
	"step": 2525
	},
	{
	"epoch": 0.7120743034055728,
	"grad_norm": 0.765617847442627,
	"learning_rate": 0.0001626612319232344,
	"loss": 0.4364,
	"step": 2530
	},
	{
	"epoch": 0.7134815648747538,
	"grad_norm": 0.7804258465766907,
	"learning_rate": 0.00016246961689993404,
	"loss": 0.6756,
	"step": 2535
	},
	{
	"epoch": 0.7148888263439347,
	"grad_norm": 1.0644882917404175,
	"learning_rate": 0.00016227762497564153,
	"loss": 0.4398,
	"step": 2540
	},
	{
	"epoch": 0.7162960878131157,
	"grad_norm": 1.0868752002716064,
	"learning_rate": 0.0001620852573087111,
	"loss": 0.4097,
	"step": 2545
	},
	{
	"epoch": 0.7177033492822966,
	"grad_norm": 0.877193033695221,
	"learning_rate": 0.00016189251505976403,
	"loss": 0.4445,
	"step": 2550
	},
	{
	"epoch": 0.7191106107514776,
	"grad_norm": 1.735767126083374,
	"learning_rate": 0.00016169939939168155,
	"loss": 0.4002,
	"step": 2555
	},
	{
	"epoch": 0.7205178722206586,
	"grad_norm": 0.679560124874115,
	"learning_rate": 0.00016150591146959787,
	"loss": 0.4376,
	"step": 2560
	},
	{
	"epoch": 0.7219251336898396,
	"grad_norm": 0.7569028735160828,
	"learning_rate": 0.00016131205246089304,
	"loss": 0.5988,
	"step": 2565
	},
	{
	"epoch": 0.7233323951590206,
	"grad_norm": 0.7681282758712769,
	"learning_rate": 0.00016111782353518624,
	"loss": 0.6736,
	"step": 2570
	},
	{
	"epoch": 0.7247396566282015,
	"grad_norm": 0.9109302759170532,
	"learning_rate": 0.0001609232258643282,
	"loss": 0.4269,
	"step": 2575
	},
	{
	"epoch": 0.7261469180973825,
	"grad_norm": 1.033499002456665,
	"learning_rate": 0.00016072826062239458,
	"loss": 0.4186,
	"step": 2580
	},
	{
	"epoch": 0.7275541795665634,
	"grad_norm": 0.765438437461853,
	"learning_rate": 0.00016053292898567876,
	"loss": 0.4688,
	"step": 2585
	},
	{
	"epoch": 0.7289614410357445,
	"grad_norm": 1.352359414100647,
	"learning_rate": 0.00016033723213268464,
	"loss": 0.4242,
	"step": 2590
	},
	{
	"epoch": 0.7303687025049254,
	"grad_norm": 0.9118134379386902,
	"learning_rate": 0.00016014117124411954,
	"loss": 0.4915,
	"step": 2595
	},
	{
	"epoch": 0.7317759639741064,
	"grad_norm": 1.1372839212417603,
	"learning_rate": 0.00015994474750288725,
	"loss": 0.3128,
	"step": 2600
	},
	{
	"epoch": 0.7331832254432874,
	"grad_norm": 0.23089000582695007,
	"learning_rate": 0.00015974796209408071,
	"loss": 0.4923,
	"step": 2605
	},
	{
	"epoch": 0.7345904869124683,
	"grad_norm": 1.543110728263855,
	"learning_rate": 0.00015955081620497497,
	"loss": 0.5901,
	"step": 2610
	},
	{
	"epoch": 0.7359977483816493,
	"grad_norm": 1.474463939666748,
	"learning_rate": 0.00015935331102501994,
	"loss": 0.5367,
	"step": 2615
	},
	{
	"epoch": 0.7374050098508302,
	"grad_norm": 0.7584693431854248,
	"learning_rate": 0.00015915544774583324,
	"loss": 0.6098,
	"step": 2620
	},
	{
	"epoch": 0.7388122713200113,
	"grad_norm": 0.6778565645217896,
	"learning_rate": 0.0001589572275611931,
	"loss": 0.4514,
	"step": 2625
	},
	{
	"epoch": 0.7402195327891923,
	"grad_norm": 0.7713000178337097,
	"learning_rate": 0.00015875865166703105,
	"loss": 0.2646,
	"step": 2630
	},
	{
	"epoch": 0.7416267942583732,
	"grad_norm": 1.2152999639511108,
	"learning_rate": 0.0001585597212614247,
	"loss": 0.5909,
	"step": 2635
	},
	{
	"epoch": 0.7430340557275542,
	"grad_norm": 1.4983125925064087,
	"learning_rate": 0.00015836043754459064,
	"loss": 0.4621,
	"step": 2640
	},
	{
	"epoch": 0.7444413171967351,
	"grad_norm": 1.0301270484924316,
	"learning_rate": 0.000158160801718877,
	"loss": 0.2372,
	"step": 2645
	},
	{
	"epoch": 0.7458485786659161,
	"grad_norm": 1.2305338382720947,
	"learning_rate": 0.0001579608149887564,
	"loss": 0.3397,
	"step": 2650
	},
	{
	"epoch": 0.747255840135097,
	"grad_norm": 1.1948976516723633,
	"learning_rate": 0.00015776047856081853,
	"loss": 0.3388,
	"step": 2655
	},
	{
	"epoch": 0.7486631016042781,
	"grad_norm": 1.539473295211792,
	"learning_rate": 0.00015755979364376295,
	"loss": 0.239,
	"step": 2660
	},
	{
	"epoch": 0.7500703630734591,
	"grad_norm": 2.136974811553955,
	"learning_rate": 0.0001573587614483918,
	"loss": 0.5409,
	"step": 2665
	},
	{
	"epoch": 0.75147762454264,
	"grad_norm": 1.2603963613510132,
	"learning_rate": 0.0001571573831876024,
	"loss": 0.3763,
	"step": 2670
	},
	{
	"epoch": 0.752884886011821,
	"grad_norm": 0.9054425954818726,
	"learning_rate": 0.00015695566007638013,
	"loss": 0.4531,
	"step": 2675
	},
	{
	"epoch": 0.7542921474810019,
	"grad_norm": 0.6948245763778687,
	"learning_rate": 0.0001567535933317908,
	"loss": 0.3894,
	"step": 2680
	},
	{
	"epoch": 0.755699408950183,
	"grad_norm": 1.3231799602508545,
	"learning_rate": 0.00015655118417297366,
	"loss": 0.4352,
	"step": 2685
	},
	{
	"epoch": 0.757106670419364,
	"grad_norm": 0.8093194365501404,
	"learning_rate": 0.00015634843382113372,
	"loss": 0.5505,
	"step": 2690
	},
	{
	"epoch": 0.7585139318885449,
	"grad_norm": 0.7088418006896973,
	"learning_rate": 0.0001561453434995346,
	"loss": 0.4232,
	"step": 2695
	},
	{
	"epoch": 0.7599211933577259,
	"grad_norm": 0.48376569151878357,
	"learning_rate": 0.00015594191443349105,
	"loss": 0.5123,
	"step": 2700
	},
	{
	"epoch": 0.7613284548269068,
	"grad_norm": 1.2853504419326782,
	"learning_rate": 0.00015573814785036164,
	"loss": 0.3733,
	"step": 2705
	},
	{
	"epoch": 0.7627357162960878,
	"grad_norm": 0.7034462690353394,
	"learning_rate": 0.00015553404497954117,
	"loss": 0.4144,
	"step": 2710
	},
	{
	"epoch": 0.7641429777652687,
	"grad_norm": 1.340484380722046,
	"learning_rate": 0.00015532960705245356,
	"loss": 0.4388,
	"step": 2715
	},
	{
	"epoch": 0.7655502392344498,
	"grad_norm": 0.7512633204460144,
	"learning_rate": 0.00015512483530254412,
	"loss": 0.4672,
	"step": 2720
	},
	{
	"epoch": 0.7669575007036308,
	"grad_norm": 2.1453585624694824,
	"learning_rate": 0.00015491973096527217,
	"loss": 0.8132,
	"step": 2725
	},
	{
	"epoch": 0.7683647621728117,
	"grad_norm": 1.0686702728271484,
	"learning_rate": 0.00015471429527810383,
	"loss": 0.3679,
	"step": 2730
	},
	{
	"epoch": 0.7697720236419927,
	"grad_norm": 1.7490125894546509,
	"learning_rate": 0.00015450852948050426,
	"loss": 0.3288,
	"step": 2735
	},
	{
	"epoch": 0.7711792851111736,
	"grad_norm": 1.7581394910812378,
	"learning_rate": 0.00015430243481393024,
	"loss": 0.6833,
	"step": 2740
	},
	{
	"epoch": 0.7725865465803546,
	"grad_norm": 1.5255379676818848,
	"learning_rate": 0.00015409601252182285,
	"loss": 0.4711,
	"step": 2745
	},
	{
	"epoch": 0.7739938080495357,
	"grad_norm": 1.7117855548858643,
	"learning_rate": 0.00015388926384959976,
	"loss": 0.6609,
	"step": 2750
	},
	{
	"epoch": 0.7754010695187166,
	"grad_norm": 0.5109424591064453,
	"learning_rate": 0.00015368219004464786,
	"loss": 0.3426,
	"step": 2755
	},
	{
	"epoch": 0.7768083309878976,
	"grad_norm": 1.3394129276275635,
	"learning_rate": 0.0001534747923563156,
	"loss": 0.4882,
	"step": 2760
	},
	{
	"epoch": 0.7782155924570785,
	"grad_norm": 1.1809154748916626,
	"learning_rate": 0.00015326707203590568,
	"loss": 0.262,
	"step": 2765
	},
	{
	"epoch": 0.7796228539262595,
	"grad_norm": 0.6428471207618713,
	"learning_rate": 0.0001530590303366672,
	"loss": 0.3657,
	"step": 2770
	},
	{
	"epoch": 0.7810301153954404,
	"grad_norm": 0.5726737976074219,
	"learning_rate": 0.0001528506685137883,
	"loss": 0.4514,
	"step": 2775
	},
	{
	"epoch": 0.7824373768646214,
	"grad_norm": 0.589094877243042,
	"learning_rate": 0.00015264198782438858,
	"loss": 0.5539,
	"step": 2780
	},
	{
	"epoch": 0.7838446383338025,
	"grad_norm": 0.7207341194152832,
	"learning_rate": 0.00015243298952751145,
	"loss": 0.3529,
	"step": 2785
	},
	{
	"epoch": 0.7852518998029834,
	"grad_norm": 1.0593701601028442,
	"learning_rate": 0.0001522236748841165,
	"loss": 0.317,
	"step": 2790
	},
	{
	"epoch": 0.7866591612721644,
	"grad_norm": 1.1395798921585083,
	"learning_rate": 0.000152014045157072,
	"loss": 0.5062,
	"step": 2795
	},
	{
	"epoch": 0.7880664227413453,
	"grad_norm": 1.3966251611709595,
	"learning_rate": 0.00015180410161114724,
	"loss": 0.4887,
	"step": 2800
	},
	{
	"epoch": 0.7894736842105263,
	"grad_norm": 0.7492479681968689,
	"learning_rate": 0.00015159384551300493,
	"loss": 0.3919,
	"step": 2805
	},
	{
	"epoch": 0.7908809456797072,
	"grad_norm": 1.2680071592330933,
	"learning_rate": 0.00015138327813119337,
	"loss": 0.3053,
	"step": 2810
	},
	{
	"epoch": 0.7922882071488883,
	"grad_norm": 1.4319703578948975,
	"learning_rate": 0.00015117240073613908,
	"loss": 0.3683,
	"step": 2815
	},
	{
	"epoch": 0.7936954686180693,
	"grad_norm": 1.0931735038757324,
	"learning_rate": 0.00015096121460013895,
	"loss": 0.5054,
	"step": 2820
	},
	{
	"epoch": 0.7951027300872502,
	"grad_norm": 0.627133309841156,
	"learning_rate": 0.00015074972099735266,
	"loss": 0.4424,
	"step": 2825
	},
	{
	"epoch": 0.7965099915564312,
	"grad_norm": 0.90239417552948,
	"learning_rate": 0.00015053792120379476,
	"loss": 0.5346,
	"step": 2830
	},
	{
	"epoch": 0.7979172530256121,
	"grad_norm": 1.3932188749313354,
	"learning_rate": 0.0001503258164973274,
	"loss": 0.5265,
	"step": 2835
	},
	{
	"epoch": 0.7993245144947931,
	"grad_norm": 1.2821606397628784,
	"learning_rate": 0.0001501134081576523,
	"loss": 0.3778,
	"step": 2840
	},
	{
	"epoch": 0.8007317759639742,
	"grad_norm": 0.8399055600166321,
	"learning_rate": 0.00014990069746630299,
	"loss": 0.5459,
	"step": 2845
	},
	{
	"epoch": 0.8021390374331551,
	"grad_norm": 2.0415430068969727,
	"learning_rate": 0.00014968768570663735,
	"loss": 0.534,
	"step": 2850
	},
	{
	"epoch": 0.8035462989023361,
	"grad_norm": 1.1202126741409302,
	"learning_rate": 0.00014947437416382956,
	"loss": 0.3913,
	"step": 2855
	},
	{
	"epoch": 0.804953560371517,
	"grad_norm": 1.3579108715057373,
	"learning_rate": 0.00014926076412486263,
	"loss": 0.3769,
	"step": 2860
	},
	{
	"epoch": 0.806360821840698,
	"grad_norm": 1.1060523986816406,
	"learning_rate": 0.00014904685687852043,
	"loss": 0.4045,
	"step": 2865
	},
	{
	"epoch": 0.8077680833098789,
	"grad_norm": 1.785001277923584,
	"learning_rate": 0.00014883265371538,
	"loss": 0.4895,
	"step": 2870
	},
	{
	"epoch": 0.80917534477906,
	"grad_norm": 0.7138920426368713,
	"learning_rate": 0.00014861815592780378,
	"loss": 0.2431,
	"step": 2875
	},
	{
	"epoch": 0.810582606248241,
	"grad_norm": 1.0932033061981201,
	"learning_rate": 0.00014840336480993172,
	"loss": 0.4196,
	"step": 2880
	},
	{
	"epoch": 0.8119898677174219,
	"grad_norm": 1.47943115234375,
	"learning_rate": 0.00014818828165767355,
	"loss": 0.4288,
	"step": 2885
	},
	{
	"epoch": 0.8133971291866029,
	"grad_norm": 1.5669611692428589,
	"learning_rate": 0.00014797290776870101,
	"loss": 0.7103,
	"step": 2890
	},
	{
	"epoch": 0.8148043906557838,
	"grad_norm": 1.002616047859192,
	"learning_rate": 0.0001477572444424399,
	"loss": 0.2174,
	"step": 2895
	},
	{
	"epoch": 0.8162116521249648,
	"grad_norm": 1.2607040405273438,
	"learning_rate": 0.00014754129298006228,
	"loss": 0.3312,
	"step": 2900
	},
	{
	"epoch": 0.8176189135941458,
	"grad_norm": 1.2113310098648071,
	"learning_rate": 0.00014732505468447867,
	"loss": 0.309,
	"step": 2905
	},
	{
	"epoch": 0.8190261750633268,
	"grad_norm": 0.6215373277664185,
	"learning_rate": 0.00014710853086033013,
	"loss": 0.3802,
	"step": 2910
	},
	{
	"epoch": 0.8204334365325078,
	"grad_norm": 0.9997283220291138,
	"learning_rate": 0.00014689172281398042,
	"loss": 0.5467,
	"step": 2915
	},
	{
	"epoch": 0.8218406980016887,
	"grad_norm": 0.7299907803535461,
	"learning_rate": 0.0001466746318535082,
	"loss": 0.4039,
	"step": 2920
	},
	{
	"epoch": 0.8232479594708697,
	"grad_norm": 0.8940709829330444,
	"learning_rate": 0.00014645725928869892,
	"loss": 0.282,
	"step": 2925
	},
	{
	"epoch": 0.8246552209400506,
	"grad_norm": 1.1947124004364014,
	"learning_rate": 0.00014623960643103705,
	"loss": 0.4364,
	"step": 2930
	},
	{
	"epoch": 0.8260624824092316,
	"grad_norm": 0.6835992932319641,
	"learning_rate": 0.00014602167459369826,
	"loss": 0.4539,
	"step": 2935
	},
	{
	"epoch": 0.8274697438784127,
	"grad_norm": 0.7021106481552124,
	"learning_rate": 0.00014580346509154136,
	"loss": 0.2876,
	"step": 2940
	},
	{
	"epoch": 0.8288770053475936,
	"grad_norm": 1.7289482355117798,
	"learning_rate": 0.00014558497924110038,
	"loss": 0.4377,
	"step": 2945
	},
	{
	"epoch": 0.8302842668167746,
	"grad_norm": 1.0549077987670898,
	"learning_rate": 0.00014536621836057665,
	"loss": 0.5667,
	"step": 2950
	},
	{
	"epoch": 0.8316915282859555,
	"grad_norm": 0.5255772471427917,
	"learning_rate": 0.000145147183769831,
	"loss": 0.4976,
	"step": 2955
	},
	{
	"epoch": 0.8330987897551365,
	"grad_norm": 2.376354694366455,
	"learning_rate": 0.00014492787679037537,
	"loss": 0.8001,
	"step": 2960
	},
	{
	"epoch": 0.8345060512243174,
	"grad_norm": 0.8916311264038086,
	"learning_rate": 0.0001447082987453654,
	"loss": 0.4217,
	"step": 2965
	},
	{
	"epoch": 0.8359133126934984,
	"grad_norm": 0.5236600637435913,
	"learning_rate": 0.00014448845095959192,
	"loss": 0.4531,
	"step": 2970
	},
	{
	"epoch": 0.8373205741626795,
	"grad_norm": 1.5615344047546387,
	"learning_rate": 0.00014426833475947345,
	"loss": 0.3796,
	"step": 2975
	},
	{
	"epoch": 0.8387278356318604,
	"grad_norm": 0.6851219534873962,
	"learning_rate": 0.00014404795147304774,
	"loss": 0.3966,
	"step": 2980
	},
	{
	"epoch": 0.8401350971010414,
	"grad_norm": 1.6611498594284058,
	"learning_rate": 0.00014382730242996404,
	"loss": 0.6284,
	"step": 2985
	},
	{
	"epoch": 0.8415423585702223,
	"grad_norm": 2.139336109161377,
	"learning_rate": 0.00014360638896147501,
	"loss": 0.4697,
	"step": 2990
	},
	{
	"epoch": 0.8429496200394033,
	"grad_norm": 1.0581591129302979,
	"learning_rate": 0.00014338521240042873,
	"loss": 0.5119,
	"step": 2995
	},
	{
	"epoch": 0.8443568815085843,
	"grad_norm": 0.885945200920105,
	"learning_rate": 0.00014316377408126046,
	"loss": 0.4225,
	"step": 3000
	},
	{
	"epoch": 0.8457641429777653,
	"grad_norm": 2.1063387393951416,
	"learning_rate": 0.00014294207533998486,
	"loss": 0.4308,
	"step": 3005
	},
	{
	"epoch": 0.8471714044469463,
	"grad_norm": 0.6381533741950989,
	"learning_rate": 0.00014272011751418782,
	"loss": 0.4063,
	"step": 3010
	},
	{
	"epoch": 0.8485786659161272,
	"grad_norm": 0.740987241268158,
	"learning_rate": 0.00014249790194301832,
	"loss": 0.2807,
	"step": 3015
	},
	{
	"epoch": 0.8499859273853082,
	"grad_norm": 0.8399060964584351,
	"learning_rate": 0.0001422754299671804,
	"loss": 0.3904,
	"step": 3020
	},
	{
	"epoch": 0.8513931888544891,
	"grad_norm": 1.4542044401168823,
	"learning_rate": 0.00014205270292892512,
	"loss": 0.5098,
	"step": 3025
	},
	{
	"epoch": 0.8528004503236701,
	"grad_norm": 0.8759632706642151,
	"learning_rate": 0.00014182972217204238,
	"loss": 0.438,
	"step": 3030
	},
	{
	"epoch": 0.8542077117928512,
	"grad_norm": 1.2544376850128174,
	"learning_rate": 0.00014160648904185295,
	"loss": 0.3654,
	"step": 3035
	},
	{
	"epoch": 0.8556149732620321,
	"grad_norm": 0.9191109538078308,
	"learning_rate": 0.00014138300488520007,
	"loss": 0.4855,
	"step": 3040
	},
	{
	"epoch": 0.8570222347312131,
	"grad_norm": 1.2452969551086426,
	"learning_rate": 0.00014115927105044172,
	"loss": 0.1865,
	"step": 3045
	},
	{
	"epoch": 0.858429496200394,
	"grad_norm": 1.0692249536514282,
	"learning_rate": 0.00014093528888744212,
	"loss": 0.3869,
	"step": 3050
	},
	{
	"epoch": 0.859836757669575,
	"grad_norm": 0.9611905217170715,
	"learning_rate": 0.00014071105974756382,
	"loss": 0.4429,
	"step": 3055
	},
	{
	"epoch": 0.861244019138756,
	"grad_norm": 1.419103741645813,
	"learning_rate": 0.00014048658498365946,
	"loss": 0.3828,
	"step": 3060
	},
	{
	"epoch": 0.8626512806079369,
	"grad_norm": 0.70958012342453,
	"learning_rate": 0.00014026186595006356,
	"loss": 0.4098,
	"step": 3065
	},
	{
	"epoch": 0.864058542077118,
	"grad_norm": 0.7273248434066772,
	"learning_rate": 0.0001400369040025845,
	"loss": 0.3795,
	"step": 3070
	},
	{
	"epoch": 0.8654658035462989,
	"grad_norm": 1.2816479206085205,
	"learning_rate": 0.00013981170049849614,
	"loss": 0.3648,
	"step": 3075
	},
	{
	"epoch": 0.8668730650154799,
	"grad_norm": 1.0046167373657227,
	"learning_rate": 0.00013958625679652982,
	"loss": 0.3949,
	"step": 3080
	},
	{
	"epoch": 0.8682803264846608,
	"grad_norm": 0.45679983496665955,
	"learning_rate": 0.000139360574256866,
	"loss": 0.3828,
	"step": 3085
	},
	{
	"epoch": 0.8696875879538418,
	"grad_norm": 0.7042393684387207,
	"learning_rate": 0.00013913465424112627,
	"loss": 0.3163,
	"step": 3090
	},
	{
	"epoch": 0.8710948494230228,
	"grad_norm": 0.7769744992256165,
	"learning_rate": 0.00013890849811236478,
	"loss": 0.275,
	"step": 3095
	},
	{
	"epoch": 0.8725021108922038,
	"grad_norm": 0.5500330328941345,
	"learning_rate": 0.0001386821072350604,
	"loss": 0.36,
	"step": 3100
	},
	{
	"epoch": 0.8739093723613848,
	"grad_norm": 1.508569359779358,
	"learning_rate": 0.00013845548297510834,
	"loss": 0.3744,
	"step": 3105
	},
	{
	"epoch": 0.8753166338305657,
	"grad_norm": 1.6323150396347046,
	"learning_rate": 0.0001382286266998117,
	"loss": 0.5385,
	"step": 3110
	},
	{
	"epoch": 0.8767238952997467,
	"grad_norm": 1.0691790580749512,
	"learning_rate": 0.00013800153977787364,
	"loss": 0.4918,
	"step": 3115
	},
	{
	"epoch": 0.8781311567689276,
	"grad_norm": 0.8545736074447632,
	"learning_rate": 0.0001377742235793887,
	"loss": 0.327,
	"step": 3120
	},
	{
	"epoch": 0.8795384182381086,
	"grad_norm": 1.2977032661437988,
	"learning_rate": 0.00013754667947583486,
	"loss": 0.3627,
	"step": 3125
	},
	{
	"epoch": 0.8809456797072897,
	"grad_norm": 0.8414074778556824,
	"learning_rate": 0.00013731890884006507,
	"loss": 0.4126,
	"step": 3130
	},
	{
	"epoch": 0.8823529411764706,
	"grad_norm": 1.2440998554229736,
	"learning_rate": 0.00013709091304629903,
	"loss": 0.5402,
	"step": 3135
	},
	{
	"epoch": 0.8837602026456516,
	"grad_norm": 1.1474038362503052,
	"learning_rate": 0.00013686269347011487,
	"loss": 0.4402,
	"step": 3140
	},
	{
	"epoch": 0.8851674641148325,
	"grad_norm": 1.9769107103347778,
	"learning_rate": 0.00013663425148844097,
	"loss": 0.5528,
	"step": 3145
	},
	{
	"epoch": 0.8865747255840135,
	"grad_norm": 1.071049451828003,
	"learning_rate": 0.00013640558847954746,
	"loss": 0.3496,
	"step": 3150
	},
	{
	"epoch": 0.8879819870531945,
	"grad_norm": 1.002313494682312,
	"learning_rate": 0.00013617670582303804,
	"loss": 0.4351,
	"step": 3155
	},
	{
	"epoch": 0.8893892485223754,
	"grad_norm": 0.8908954858779907,
	"learning_rate": 0.00013594760489984167,
	"loss": 0.3371,
	"step": 3160
	},
	{
	"epoch": 0.8907965099915565,
	"grad_norm": 0.9060853123664856,
	"learning_rate": 0.00013571828709220413,
	"loss": 0.2489,
	"step": 3165
	},
	{
	"epoch": 0.8922037714607374,
	"grad_norm": 0.7479000687599182,
	"learning_rate": 0.00013548875378367972,
	"loss": 0.2874,
	"step": 3170
	},
	{
	"epoch": 0.8936110329299184,
	"grad_norm": 0.9289246201515198,
	"learning_rate": 0.00013525900635912299,
	"loss": 0.466,
	"step": 3175
	},
	{
	"epoch": 0.8950182943990993,
	"grad_norm": 1.428377628326416,
	"learning_rate": 0.0001350290462046803,
	"loss": 0.5203,
	"step": 3180
	},
	{
	"epoch": 0.8964255558682803,
	"grad_norm": 0.7524283528327942,
	"learning_rate": 0.00013479887470778149,
	"loss": 0.365,
	"step": 3185
	},
	{
	"epoch": 0.8978328173374613,
	"grad_norm": 1.021815299987793,
	"learning_rate": 0.0001345684932571315,
	"loss": 0.5084,
	"step": 3190
	},
	{
	"epoch": 0.8992400788066423,
	"grad_norm": 0.7522305846214294,
	"learning_rate": 0.00013433790324270199,
	"loss": 0.2659,
	"step": 3195
	},
	{
	"epoch": 0.9006473402758233,
	"grad_norm": 1.3865163326263428,
	"learning_rate": 0.00013410710605572294,
	"loss": 0.2533,
	"step": 3200
	},
	{
	"epoch": 0.9020546017450042,
	"grad_norm": 1.8485382795333862,
	"learning_rate": 0.00013387610308867437,
	"loss": 0.3675,
	"step": 3205
	},
	{
	"epoch": 0.9034618632141852,
	"grad_norm": 1.203482985496521,
	"learning_rate": 0.0001336448957352777,
	"loss": 0.3284,
	"step": 3210
	},
	{
	"epoch": 0.9048691246833662,
	"grad_norm": 0.9714936017990112,
	"learning_rate": 0.00013341348539048752,
	"loss": 0.2657,
	"step": 3215
	},
	{
	"epoch": 0.9062763861525471,
	"grad_norm": 1.062326192855835,
	"learning_rate": 0.00013318187345048328,
	"loss": 0.3837,
	"step": 3220
	},
	{
	"epoch": 0.9076836476217282,
	"grad_norm": 1.3822613954544067,
	"learning_rate": 0.00013295006131266055,
	"loss": 0.3584,
	"step": 3225
	},
	{
	"epoch": 0.9090909090909091,
	"grad_norm": 1.2804548740386963,
	"learning_rate": 0.0001327180503756228,
	"loss": 0.4558,
	"step": 3230
	},
	{
	"epoch": 0.9104981705600901,
	"grad_norm": 0.6253718137741089,
	"learning_rate": 0.00013248584203917298,
	"loss": 0.2871,
	"step": 3235
	},
	{
	"epoch": 0.911905432029271,
	"grad_norm": 0.8237050175666809,
	"learning_rate": 0.00013225343770430502,
	"loss": 0.4014,
	"step": 3240
	},
	{
	"epoch": 0.913312693498452,
	"grad_norm": 0.9199953675270081,
	"learning_rate": 0.00013202083877319538,
	"loss": 0.597,
	"step": 3245
	},
	{
	"epoch": 0.914719954967633,
	"grad_norm": 1.0530214309692383,
	"learning_rate": 0.00013178804664919444,
	"loss": 0.5745,
	"step": 3250
	},
	{
	"epoch": 0.9161272164368139,
	"grad_norm": 1.0369855165481567,
	"learning_rate": 0.00013155506273681837,
	"loss": 0.2493,
	"step": 3255
	},
	{
	"epoch": 0.917534477905995,
	"grad_norm": 0.37017834186553955,
	"learning_rate": 0.00013132188844174042,
	"loss": 0.5125,
	"step": 3260
	},
	{
	"epoch": 0.9189417393751759,
	"grad_norm": 0.5272582769393921,
	"learning_rate": 0.0001310885251707824,
	"loss": 0.2099,
	"step": 3265
	},
	{
	"epoch": 0.9203490008443569,
	"grad_norm": 1.3228068351745605,
	"learning_rate": 0.00013085497433190635,
	"loss": 0.3625,
	"step": 3270
	},
	{
	"epoch": 0.9217562623135379,
	"grad_norm": 1.2980788946151733,
	"learning_rate": 0.000130621237334206,
	"loss": 0.3258,
	"step": 3275
	},
	{
	"epoch": 0.9231635237827188,
	"grad_norm": 0.7955147624015808,
	"learning_rate": 0.00013038731558789816,
	"loss": 0.331,
	"step": 3280
	},
	{
	"epoch": 0.9245707852518998,
	"grad_norm": 0.33198082447052,
	"learning_rate": 0.00013015321050431435,
	"loss": 0.2828,
	"step": 3285
	},
	{
	"epoch": 0.9259780467210807,
	"grad_norm": 1.193824052810669,
	"learning_rate": 0.0001299189234958922,
	"loss": 0.5299,
	"step": 3290
	},
	{
	"epoch": 0.9273853081902618,
	"grad_norm": 0.6841180324554443,
	"learning_rate": 0.00012968445597616695,
	"loss": 0.2236,
	"step": 3295
	},
	{
	"epoch": 0.9287925696594427,
	"grad_norm": 1.009793758392334,
	"learning_rate": 0.00012944980935976295,
	"loss": 0.4583,
	"step": 3300
	},
	{
	"epoch": 0.9301998311286237,
	"grad_norm": 1.1918591260910034,
	"learning_rate": 0.00012921498506238512,
	"loss": 0.4523,
	"step": 3305
	},
	{
	"epoch": 0.9316070925978047,
	"grad_norm": 0.7123336791992188,
	"learning_rate": 0.00012897998450081037,
	"loss": 0.3185,
	"step": 3310
	},
	{
	"epoch": 0.9330143540669856,
	"grad_norm": 0.6820237040519714,
	"learning_rate": 0.00012874480909287904,
	"loss": 0.4963,
	"step": 3315
	},
	{
	"epoch": 0.9344216155361666,
	"grad_norm": 0.6030889749526978,
	"learning_rate": 0.00012850946025748643,
	"loss": 0.3238,
	"step": 3320
	},
	{
	"epoch": 0.9358288770053476,
	"grad_norm": 0.3159545958042145,
	"learning_rate": 0.00012827393941457416,
	"loss": 0.1804,
	"step": 3325
	},
	{
	"epoch": 0.9372361384745286,
	"grad_norm": 0.500643789768219,
	"learning_rate": 0.00012803824798512166,
	"loss": 0.4421,
	"step": 3330
	},
	{
	"epoch": 0.9386433999437095,
	"grad_norm": 1.0271189212799072,
	"learning_rate": 0.00012780238739113755,
	"loss": 0.4825,
	"step": 3335
	},
	{
	"epoch": 0.9400506614128905,
	"grad_norm": 1.3835067749023438,
	"learning_rate": 0.000127566359055651,
	"loss": 0.5109,
	"step": 3340
	},
	{
	"epoch": 0.9414579228820715,
	"grad_norm": 0.6945546269416809,
	"learning_rate": 0.00012733016440270344,
	"loss": 0.3438,
	"step": 3345
	},
	{
	"epoch": 0.9428651843512524,
	"grad_norm": 0.5347813367843628,
	"learning_rate": 0.0001270938048573395,
	"loss": 0.2245,
	"step": 3350
	},
	{
	"epoch": 0.9442724458204335,
	"grad_norm": 0.5110495090484619,
	"learning_rate": 0.00012685728184559878,
	"loss": 0.3236,
	"step": 3355
	},
	{
	"epoch": 0.9456797072896144,
	"grad_norm": 1.1028776168823242,
	"learning_rate": 0.00012662059679450715,
	"loss": 0.3656,
	"step": 3360
	},
	{
	"epoch": 0.9470869687587954,
	"grad_norm": 1.0305935144424438,
	"learning_rate": 0.0001263837511320681,
	"loss": 0.2271,
	"step": 3365
	},
	{
	"epoch": 0.9484942302279764,
	"grad_norm": 1.1044567823410034,
	"learning_rate": 0.0001261467462872541,
	"loss": 0.3901,
	"step": 3370
	},
	{
	"epoch": 0.9499014916971573,
	"grad_norm": 1.0489617586135864,
	"learning_rate": 0.00012590958368999817,
	"loss": 0.3906,
	"step": 3375
	},
	{
	"epoch": 0.9513087531663383,
	"grad_norm": 0.9781221747398376,
	"learning_rate": 0.0001256722647711849,
	"loss": 0.3616,
	"step": 3380
	},
	{
	"epoch": 0.9527160146355192,
	"grad_norm": 1.1387841701507568,
	"learning_rate": 0.0001254347909626421,
	"loss": 0.2382,
	"step": 3385
	},
	{
	"epoch": 0.9541232761047003,
	"grad_norm": 1.3473316431045532,
	"learning_rate": 0.00012519716369713214,
	"loss": 0.446,
	"step": 3390
	},
	{
	"epoch": 0.9555305375738812,
	"grad_norm": 1.1464128494262695,
	"learning_rate": 0.00012495938440834327,
	"loss": 0.341,
	"step": 3395
	},
	{
	"epoch": 0.9569377990430622,
	"grad_norm": 0.9990252256393433,
	"learning_rate": 0.0001247214545308808,
	"loss": 0.4666,
	"step": 3400
	},
	{
	"epoch": 0.9583450605122432,
	"grad_norm": 1.9256302118301392,
	"learning_rate": 0.0001244833755002587,
	"loss": 0.4555,
	"step": 3405
	},
	{
	"epoch": 0.9597523219814241,
	"grad_norm": 0.8169670104980469,
	"learning_rate": 0.00012424514875289088,
	"loss": 0.6558,
	"step": 3410
	},
	{
	"epoch": 0.9611595834506051,
	"grad_norm": 1.60161554813385,
	"learning_rate": 0.0001240067757260824,
	"loss": 0.4544,
	"step": 3415
	},
	{
	"epoch": 0.9625668449197861,
	"grad_norm": 0.7437291741371155,
	"learning_rate": 0.0001237682578580208,
	"loss": 0.3022,
	"step": 3420
	},
	{
	"epoch": 0.9639741063889671,
	"grad_norm": 0.9030975699424744,
	"learning_rate": 0.00012352959658776767,
	"loss": 0.4267,
	"step": 3425
	},
	{
	"epoch": 0.9653813678581481,
	"grad_norm": 1.0298916101455688,
	"learning_rate": 0.00012329079335524973,
	"loss": 0.5084,
	"step": 3430
	},
	{
	"epoch": 0.966788629327329,
	"grad_norm": 1.4346392154693604,
	"learning_rate": 0.0001230518496012502,
	"loss": 0.5032,
	"step": 3435
	},
	{
	"epoch": 0.96819589079651,
	"grad_norm": 1.988788366317749,
	"learning_rate": 0.00012281276676739996,
	"loss": 0.5206,
	"step": 3440
	},
	{
	"epoch": 0.9696031522656909,
	"grad_norm": 0.627189040184021,
	"learning_rate": 0.00012257354629616933,
	"loss": 0.3927,
	"step": 3445
	},
	{
	"epoch": 0.971010413734872,
	"grad_norm": 1.1982104778289795,
	"learning_rate": 0.0001223341896308588,
	"loss": 0.4134,
	"step": 3450
	},
	{
	"epoch": 0.9724176752040529,
	"grad_norm": 1.1405185461044312,
	"learning_rate": 0.00012209469821559062,
	"loss": 0.314,
	"step": 3455
	},
	{
	"epoch": 0.9738249366732339,
	"grad_norm": 1.0637789964675903,
	"learning_rate": 0.00012185507349530006,
	"loss": 0.4855,
	"step": 3460
	},
	{
	"epoch": 0.9752321981424149,
	"grad_norm": 1.1884607076644897,
	"learning_rate": 0.00012161531691572665,
	"loss": 0.4043,
	"step": 3465
	},
	{
	"epoch": 0.9766394596115958,
	"grad_norm": 0.7082695960998535,
	"learning_rate": 0.00012137542992340552,
	"loss": 0.3864,
	"step": 3470
	},
	{
	"epoch": 0.9780467210807768,
	"grad_norm": 1.400940179824829,
	"learning_rate": 0.0001211354139656585,
	"loss": 0.3179,
	"step": 3475
	},
	{
	"epoch": 0.9794539825499577,
	"grad_norm": 1.0918678045272827,
	"learning_rate": 0.00012089527049058566,
	"loss": 0.3724,
	"step": 3480
	},
	{
	"epoch": 0.9808612440191388,
	"grad_norm": 0.8317002654075623,
	"learning_rate": 0.00012065500094705635,
	"loss": 0.4669,
	"step": 3485
	},
	{
	"epoch": 0.9822685054883197,
	"grad_norm": 2.4732000827789307,
	"learning_rate": 0.00012041460678470057,
	"loss": 0.536,
	"step": 3490
	},
	{
	"epoch": 0.9836757669575007,
	"grad_norm": 0.4239155650138855,
	"learning_rate": 0.00012017408945390009,
	"loss": 0.4178,
	"step": 3495
	},
	{
	"epoch": 0.9850830284266817,
	"grad_norm": 1.0096583366394043,
	"learning_rate": 0.00011993345040577995,
	"loss": 0.5533,
	"step": 3500
	},
	{
	"epoch": 0.9864902898958626,
	"grad_norm": 1.6637718677520752,
	"learning_rate": 0.00011969269109219945,
	"loss": 0.1999,
	"step": 3505
	},
	{
	"epoch": 0.9878975513650436,
	"grad_norm": 1.4339228868484497,
	"learning_rate": 0.0001194518129657435,
	"loss": 0.2913,
	"step": 3510
	},
	{
	"epoch": 0.9893048128342246,
	"grad_norm": 0.9473050236701965,
	"learning_rate": 0.00011921081747971392,
	"loss": 0.4202,
	"step": 3515
	},
	{
	"epoch": 0.9907120743034056,
	"grad_norm": 1.5468287467956543,
	"learning_rate": 0.00011896970608812053,
	"loss": 0.2755,
	"step": 3520
	},
	{
	"epoch": 0.9921193357725866,
	"grad_norm": 1.0197608470916748,
	"learning_rate": 0.00011872848024567245,
	"loss": 0.399,
	"step": 3525
	},
	{
	"epoch": 0.9935265972417675,
	"grad_norm": 1.9030907154083252,
	"learning_rate": 0.00011848714140776936,
	"loss": 0.3538,
	"step": 3530
	},
	{
	"epoch": 0.9949338587109485,
	"grad_norm": 1.1370608806610107,
	"learning_rate": 0.00011824569103049264,
	"loss": 0.6243,
	"step": 3535
	},
	{
	"epoch": 0.9963411201801294,
	"grad_norm": 0.7336493134498596,
	"learning_rate": 0.0001180041305705967,
	"loss": 0.287,
	"step": 3540
	},
	{
	"epoch": 0.9977483816493105,
	"grad_norm": 0.8091352581977844,
	"learning_rate": 0.0001177624614855,
	"loss": 0.4314,
	"step": 3545
	},
	{
	"epoch": 0.9991556431184914,
	"grad_norm": 0.8396396636962891,
	"learning_rate": 0.0001175206852332765,
	"loss": 0.243,
	"step": 3550
	},
	{
	"epoch": 1.0005629045876725,
	"grad_norm": 0.4893011152744293,
	"learning_rate": 0.00011727880327264667,
	"loss": 0.4008,
	"step": 3555
	},
	{
	"epoch": 1.0019701660568534,
	"grad_norm": 0.5934264659881592,
	"learning_rate": 0.00011703681706296871,
	"loss": 0.197,
	"step": 3560
	},
	{
	"epoch": 1.0033774275260343,
	"grad_norm": 0.9697572588920593,
	"learning_rate": 0.00011679472806422991,
	"loss": 0.2565,
	"step": 3565
	},
	{
	"epoch": 1.0047846889952152,
	"grad_norm": 0.6383791565895081,
	"learning_rate": 0.00011655253773703763,
	"loss": 0.1732,
	"step": 3570
	},
	{
	"epoch": 1.0061919504643964,
	"grad_norm": 2.7294044494628906,
	"learning_rate": 0.00011631024754261057,
	"loss": 0.344,
	"step": 3575
	},
	{
	"epoch": 1.0075992119335773,
	"grad_norm": 0.7987744212150574,
	"learning_rate": 0.00011606785894277002,
	"loss": 0.2462,
	"step": 3580
	},
	{
	"epoch": 1.0090064734027582,
	"grad_norm": 1.0963287353515625,
	"learning_rate": 0.00011582537339993102,
	"loss": 0.2017,
	"step": 3585
	},
	{
	"epoch": 1.0104137348719393,
	"grad_norm": 0.2937074303627014,
	"learning_rate": 0.00011558279237709337,
	"loss": 0.2587,
	"step": 3590
	},
	{
	"epoch": 1.0118209963411202,
	"grad_norm": 1.1680563688278198,
	"learning_rate": 0.00011534011733783303,
	"loss": 0.3315,
	"step": 3595
	},
	{
	"epoch": 1.0132282578103011,
	"grad_norm": 0.8227936029434204,
	"learning_rate": 0.00011509734974629316,
	"loss": 0.1936,
	"step": 3600
	},
	{
	"epoch": 1.014635519279482,
	"grad_norm": 1.266236424446106,
	"learning_rate": 0.0001148544910671754,
	"loss": 0.283,
	"step": 3605
	},
	{
	"epoch": 1.0160427807486632,
	"grad_norm": 0.4134606122970581,
	"learning_rate": 0.0001146115427657308,
	"loss": 0.1711,
	"step": 3610
	},
	{
	"epoch": 1.017450042217844,
	"grad_norm": 0.5949440598487854,
	"learning_rate": 0.00011436850630775127,
	"loss": 0.2659,
	"step": 3615
	},
	{
	"epoch": 1.018857303687025,
	"grad_norm": 1.2255134582519531,
	"learning_rate": 0.00011412538315956051,
	"loss": 0.331,
	"step": 3620
	},
	{
	"epoch": 1.0202645651562061,
	"grad_norm": 0.7793748378753662,
	"learning_rate": 0.00011388217478800536,
	"loss": 0.3107,
	"step": 3625
	},
	{
	"epoch": 1.021671826625387,
	"grad_norm": 1.5764113664627075,
	"learning_rate": 0.00011363888266044668,
	"loss": 0.2801,
	"step": 3630
	},
	{
	"epoch": 1.023079088094568,
	"grad_norm": 0.7818349599838257,
	"learning_rate": 0.0001133955082447508,
	"loss": 0.4592,
	"step": 3635
	},
	{
	"epoch": 1.0244863495637488,
	"grad_norm": 0.8325141072273254,
	"learning_rate": 0.00011315205300928047,
	"loss": 0.2221,
	"step": 3640
	},
	{
	"epoch": 1.02589361103293,
	"grad_norm": 0.8759342432022095,
	"learning_rate": 0.0001129085184228861,
	"loss": 0.2282,
	"step": 3645
	},
	{
	"epoch": 1.0273008725021109,
	"grad_norm": 0.8269652724266052,
	"learning_rate": 0.00011266490595489672,
	"loss": 0.288,
	"step": 3650
	},
	{
	"epoch": 1.0287081339712918,
	"grad_norm": 0.9182637929916382,
	"learning_rate": 0.0001124212170751114,
	"loss": 0.2124,
	"step": 3655
	},
	{
	"epoch": 1.030115395440473,
	"grad_norm": 0.7247250080108643,
	"learning_rate": 0.00011217745325379017,
	"loss": 0.2818,
	"step": 3660
	},
	{
	"epoch": 1.0315226569096538,
	"grad_norm": 1.1736894845962524,
	"learning_rate": 0.00011193361596164517,
	"loss": 0.2349,
	"step": 3665
	},
	{
	"epoch": 1.0329299183788347,
	"grad_norm": 0.3809513747692108,
	"learning_rate": 0.00011168970666983184,
	"loss": 0.158,
	"step": 3670
	},
	{
	"epoch": 1.0343371798480159,
	"grad_norm": 1.4163240194320679,
	"learning_rate": 0.0001114457268499401,
	"loss": 0.3035,
	"step": 3675
	},
	{
	"epoch": 1.0357444413171968,
	"grad_norm": 1.8142826557159424,
	"learning_rate": 0.00011120167797398527,
	"loss": 0.3572,
	"step": 3680
	},
	{
	"epoch": 1.0371517027863777,
	"grad_norm": 0.9238508343696594,
	"learning_rate": 0.00011095756151439934,
	"loss": 0.2104,
	"step": 3685
	},
	{
	"epoch": 1.0385589642555586,
	"grad_norm": 1.3922544717788696,
	"learning_rate": 0.0001107133789440221,
	"loss": 0.3846,
	"step": 3690
	},
	{
	"epoch": 1.0399662257247397,
	"grad_norm": 0.5761235952377319,
	"learning_rate": 0.00011046913173609217,
	"loss": 0.1728,
	"step": 3695
	},
	{
	"epoch": 1.0413734871939206,
	"grad_norm": 1.3399313688278198,
	"learning_rate": 0.0001102248213642382,
	"loss": 0.2158,
	"step": 3700
	},
	{
	"epoch": 1.0427807486631016,
	"grad_norm": 0.5189816355705261,
	"learning_rate": 0.00010998044930246985,
	"loss": 0.2724,
	"step": 3705
	},
	{
	"epoch": 1.0441880101322827,
	"grad_norm": 1.0454604625701904,
	"learning_rate": 0.00010973601702516903,
	"loss": 0.3016,
	"step": 3710
	},
	{
	"epoch": 1.0455952716014636,
	"grad_norm": 0.9476893544197083,
	"learning_rate": 0.00010949152600708096,
	"loss": 0.161,
	"step": 3715
	},
	{
	"epoch": 1.0470025330706445,
	"grad_norm": 1.1760029792785645,
	"learning_rate": 0.00010924697772330525,
	"loss": 0.3402,
	"step": 3720
	},
	{
	"epoch": 1.0484097945398254,
	"grad_norm": 0.7986089587211609,
	"learning_rate": 0.000109002373649287,
	"loss": 0.3381,
	"step": 3725
	},
	{
	"epoch": 1.0498170560090065,
	"grad_norm": 0.46115541458129883,
	"learning_rate": 0.00010875771526080791,
	"loss": 0.2121,
	"step": 3730
	},
	{
	"epoch": 1.0512243174781875,
	"grad_norm": 0.8159217238426208,
	"learning_rate": 0.00010851300403397741,
	"loss": 0.1618,
	"step": 3735
	},
	{
	"epoch": 1.0526315789473684,
	"grad_norm": 0.9532806277275085,
	"learning_rate": 0.00010826824144522369,
	"loss": 0.2001,
	"step": 3740
	},
	{
	"epoch": 1.0540388404165495,
	"grad_norm": 0.987647294998169,
	"learning_rate": 0.00010802342897128484,
	"loss": 0.1255,
	"step": 3745
	},
	{
	"epoch": 1.0554461018857304,
	"grad_norm": 0.5456539988517761,
	"learning_rate": 0.00010777856808919993,
	"loss": 0.1738,
	"step": 3750
	},
	{
	"epoch": 1.0568533633549113,
	"grad_norm": 1.2354178428649902,
	"learning_rate": 0.00010753366027630005,
	"loss": 0.1968,
	"step": 3755
	},
	{
	"epoch": 1.0582606248240922,
	"grad_norm": 1.5054504871368408,
	"learning_rate": 0.00010728870701019952,
	"loss": 0.3881,
	"step": 3760
	},
	{
	"epoch": 1.0596678862932734,
	"grad_norm": 0.33300110697746277,
	"learning_rate": 0.00010704370976878683,
	"loss": 0.3455,
	"step": 3765
	},
	{
	"epoch": 1.0610751477624543,
	"grad_norm": 0.28057172894477844,
	"learning_rate": 0.00010679867003021582,
	"loss": 0.3676,
	"step": 3770
	},
	{
	"epoch": 1.0624824092316352,
	"grad_norm": 0.78326416015625,
	"learning_rate": 0.0001065535892728967,
	"loss": 0.2051,
	"step": 3775
	},
	{
	"epoch": 1.0638896707008163,
	"grad_norm": 0.30371785163879395,
	"learning_rate": 0.00010630846897548719,
	"loss": 0.2172,
	"step": 3780
	},
	{
	"epoch": 1.0652969321699972,
	"grad_norm": 0.951871931552887,
	"learning_rate": 0.00010606331061688352,
	"loss": 0.2731,
	"step": 3785
	},
	{
	"epoch": 1.0667041936391781,
	"grad_norm": 0.9194802641868591,
	"learning_rate": 0.00010581811567621165,
	"loss": 0.437,
	"step": 3790
	},
	{
	"epoch": 1.068111455108359,
	"grad_norm": 1.3185656070709229,
	"learning_rate": 0.00010557288563281819,
	"loss": 0.1762,
	"step": 3795
	},
	{
	"epoch": 1.0695187165775402,
	"grad_norm": 0.6637858152389526,
	"learning_rate": 0.00010532762196626151,
	"loss": 0.3499,
	"step": 3800
	},
	{
	"epoch": 1.070925978046721,
	"grad_norm": 0.5646357536315918,
	"learning_rate": 0.00010508232615630291,
	"loss": 0.1794,
	"step": 3805
	},
	{
	"epoch": 1.072333239515902,
	"grad_norm": 0.7347474694252014,
	"learning_rate": 0.00010483699968289754,
	"loss": 0.2088,
	"step": 3810
	},
	{
	"epoch": 1.0737405009850831,
	"grad_norm": 0.7603871822357178,
	"learning_rate": 0.00010459164402618567,
	"loss": 0.2723,
	"step": 3815
	},
	{
	"epoch": 1.075147762454264,
	"grad_norm": 1.574090838432312,
	"learning_rate": 0.0001043462606664835,
	"loss": 0.3175,
	"step": 3820
	},
	{
	"epoch": 1.076555023923445,
	"grad_norm": 1.8480275869369507,
	"learning_rate": 0.00010410085108427448,
	"loss": 0.3903,
	"step": 3825
	},
	{
	"epoch": 1.0779622853926258,
	"grad_norm": 3.3462395668029785,
	"learning_rate": 0.00010385541676020026,
	"loss": 0.2867,
	"step": 3830
	},
	{
	"epoch": 1.079369546861807,
	"grad_norm": 1.0282424688339233,
	"learning_rate": 0.00010360995917505167,
	"loss": 0.3542,
	"step": 3835
	},
	{
	"epoch": 1.0807768083309879,
	"grad_norm": 1.081586241722107,
	"learning_rate": 0.00010336447980976,
	"loss": 0.1933,
	"step": 3840
	},
	{
	"epoch": 1.0821840698001688,
	"grad_norm": 0.7061908841133118,
	"learning_rate": 0.00010311898014538788,
	"loss": 0.3673,
	"step": 3845
	},
	{
	"epoch": 1.08359133126935,
	"grad_norm": 1.0589807033538818,
	"learning_rate": 0.00010287346166312048,
	"loss": 0.2017,
	"step": 3850
	},
	{
	"epoch": 1.0849985927385308,
	"grad_norm": 0.7850357890129089,
	"learning_rate": 0.0001026279258442564,
	"loss": 0.3781,
	"step": 3855
	},
	{
	"epoch": 1.0864058542077117,
	"grad_norm": 0.8800612688064575,
	"learning_rate": 0.00010238237417019889,
	"loss": 0.2454,
	"step": 3860
	},
	{
	"epoch": 1.0878131156768927,
	"grad_norm": 0.8004993796348572,
	"learning_rate": 0.00010213680812244693,
	"loss": 0.3253,
	"step": 3865
	},
	{
	"epoch": 1.0892203771460738,
	"grad_norm": 1.0395301580429077,
	"learning_rate": 0.00010189122918258611,
	"loss": 0.3023,
	"step": 3870
	},
	{
	"epoch": 1.0906276386152547,
	"grad_norm": 0.7087461352348328,
	"learning_rate": 0.00010164563883227982,
	"loss": 0.258,
	"step": 3875
	},
	{
	"epoch": 1.0920349000844356,
	"grad_norm": 1.0742789506912231,
	"learning_rate": 0.00010140003855326034,
	"loss": 0.1768,
	"step": 3880
	},
	{
	"epoch": 1.0934421615536167,
	"grad_norm": 1.7721843719482422,
	"learning_rate": 0.00010115442982731988,
	"loss": 0.2673,
	"step": 3885
	},
	{
	"epoch": 1.0948494230227976,
	"grad_norm": 0.5749943256378174,
	"learning_rate": 0.00010090881413630154,
	"loss": 0.2943,
	"step": 3890
	},
	{
	"epoch": 1.0962566844919786,
	"grad_norm": 1.210871696472168,
	"learning_rate": 0.00010066319296209043,
	"loss": 0.2569,
	"step": 3895
	},
	{
	"epoch": 1.0976639459611597,
	"grad_norm": 0.7546014189720154,
	"learning_rate": 0.00010041756778660483,
	"loss": 0.1277,
	"step": 3900
	},
	{
	"epoch": 1.0990712074303406,
	"grad_norm": 0.45546409487724304,
	"learning_rate": 0.0001001719400917871,
	"loss": 0.2447,
	"step": 3905
	},
	{
	"epoch": 1.1004784688995215,
	"grad_norm": 0.9810652136802673,
	"learning_rate": 9.992631135959484e-05,
	"loss": 0.1891,
	"step": 3910
	},
	{
	"epoch": 1.1018857303687024,
	"grad_norm": 0.26853448152542114,
	"learning_rate": 9.96806830719918e-05,
	"loss": 0.2793,
	"step": 3915
	},
	{
	"epoch": 1.1032929918378835,
	"grad_norm": 0.815556526184082,
	"learning_rate": 9.943505671093923e-05,
	"loss": 0.1589,
	"step": 3920
	},
	{
	"epoch": 1.1047002533070645,
	"grad_norm": 1.1649208068847656,
	"learning_rate": 9.918943375838658e-05,
	"loss": 0.1692,
	"step": 3925
	},
	{
	"epoch": 1.1061075147762454,
	"grad_norm": 1.3160449266433716,
	"learning_rate": 9.894381569626286e-05,
	"loss": 0.1748,
	"step": 3930
	},
	{
	"epoch": 1.1075147762454265,
	"grad_norm": 0.7906925082206726,
	"learning_rate": 9.869820400646752e-05,
	"loss": 0.2706,
	"step": 3935
	},
	{
	"epoch": 1.1089220377146074,
	"grad_norm": 1.7690831422805786,
	"learning_rate": 9.845260017086152e-05,
	"loss": 0.4101,
	"step": 3940
	},
	{
	"epoch": 1.1103292991837883,
	"grad_norm": 0.7361578941345215,
	"learning_rate": 9.820700567125855e-05,
	"loss": 0.2352,
	"step": 3945
	},
	{
	"epoch": 1.1117365606529692,
	"grad_norm": 0.7984316945075989,
	"learning_rate": 9.79614219894159e-05,
	"loss": 0.2466,
	"step": 3950
	},
	{
	"epoch": 1.1131438221221504,
	"grad_norm": 1.6478660106658936,
	"learning_rate": 9.771585060702551e-05,
	"loss": 0.2434,
	"step": 3955
	},
	{
	"epoch": 1.1145510835913313,
	"grad_norm": 0.8288646936416626,
	"learning_rate": 9.747029300570528e-05,
	"loss": 0.1954,
	"step": 3960
	},
	{
	"epoch": 1.1159583450605122,
	"grad_norm": 1.0649809837341309,
	"learning_rate": 9.722475066698992e-05,
	"loss": 0.1995,
	"step": 3965
	},
	{
	"epoch": 1.1173656065296933,
	"grad_norm": 1.0399101972579956,
	"learning_rate": 9.697922507232194e-05,
	"loss": 0.2972,
	"step": 3970
	},
	{
	"epoch": 1.1187728679988742,
	"grad_norm": 0.9969576001167297,
	"learning_rate": 9.673371770304291e-05,
	"loss": 0.2133,
	"step": 3975
	},
	{
	"epoch": 1.1201801294680551,
	"grad_norm": 0.7914555072784424,
	"learning_rate": 9.648823004038452e-05,
	"loss": 0.2006,
	"step": 3980
	},
	{
	"epoch": 1.1215873909372363,
	"grad_norm": 0.8462080359458923,
	"learning_rate": 9.62427635654594e-05,
	"loss": 0.1759,
	"step": 3985
	},
	{
	"epoch": 1.1229946524064172,
	"grad_norm": 1.5257298946380615,
	"learning_rate": 9.599731975925248e-05,
	"loss": 0.2961,
	"step": 3990
	},
	{
	"epoch": 1.124401913875598,
	"grad_norm": 0.918910562992096,
	"learning_rate": 9.575190010261179e-05,
	"loss": 0.2468,
	"step": 3995
	},
	{
	"epoch": 1.125809175344779,
	"grad_norm": 0.9318897128105164,
	"learning_rate": 9.550650607623982e-05,
	"loss": 0.2609,
	"step": 4000
	},
	{
	"epoch": 1.12721643681396,
	"grad_norm": 0.49596425890922546,
	"learning_rate": 9.526113916068431e-05,
	"loss": 0.2369,
	"step": 4005
	},
	{
	"epoch": 1.128623698283141,
	"grad_norm": 0.6530629396438599,
	"learning_rate": 9.501580083632946e-05,
	"loss": 0.1354,
	"step": 4010
	},
	{
	"epoch": 1.130030959752322,
	"grad_norm": 0.39932572841644287,
	"learning_rate": 9.477049258338694e-05,
	"loss": 0.2277,
	"step": 4015
	},
	{
	"epoch": 1.131438221221503,
	"grad_norm": 0.8406773805618286,
	"learning_rate": 9.452521588188711e-05,
	"loss": 0.1472,
	"step": 4020
	},
	{
	"epoch": 1.132845482690684,
	"grad_norm": 0.7629873752593994,
	"learning_rate": 9.427997221166978e-05,
	"loss": 0.2421,
	"step": 4025
	},
	{
	"epoch": 1.1342527441598649,
	"grad_norm": 1.1697338819503784,
	"learning_rate": 9.40347630523756e-05,
	"loss": 0.2181,
	"step": 4030
	},
	{
	"epoch": 1.1356600056290458,
	"grad_norm": 0.924167811870575,
	"learning_rate": 9.378958988343702e-05,
	"loss": 0.3934,
	"step": 4035
	},
	{
	"epoch": 1.137067267098227,
	"grad_norm": 0.8078356385231018,
	"learning_rate": 9.354445418406924e-05,
	"loss": 0.1403,
	"step": 4040
	},
	{
	"epoch": 1.1384745285674078,
	"grad_norm": 0.520318329334259,
	"learning_rate": 9.329935743326144e-05,
	"loss": 0.2916,
	"step": 4045
	},
	{
	"epoch": 1.1398817900365887,
	"grad_norm": 0.45882686972618103,
	"learning_rate": 9.305430110976793e-05,
	"loss": 0.1297,
	"step": 4050
	},
	{
	"epoch": 1.1412890515057699,
	"grad_norm": 0.5139206051826477,
	"learning_rate": 9.280928669209887e-05,
	"loss": 0.2342,
	"step": 4055
	},
	{
	"epoch": 1.1426963129749508,
	"grad_norm": 0.9370526671409607,
	"learning_rate": 9.256431565851181e-05,
	"loss": 0.1581,
	"step": 4060
	},
	{
	"epoch": 1.1441035744441317,
	"grad_norm": 1.525415301322937,
	"learning_rate": 9.23193894870024e-05,
	"loss": 0.255,
	"step": 4065
	},
	{
	"epoch": 1.1455108359133126,
	"grad_norm": 1.745328426361084,
	"learning_rate": 9.207450965529571e-05,
	"loss": 0.1585,
	"step": 4070
	},
	{
	"epoch": 1.1469180973824937,
	"grad_norm": 0.5603808760643005,
	"learning_rate": 9.18296776408372e-05,
	"loss": 0.2085,
	"step": 4075
	},
	{
	"epoch": 1.1483253588516746,
	"grad_norm": 0.24650625884532928,
	"learning_rate": 9.158489492078381e-05,
	"loss": 0.2441,
	"step": 4080
	},
	{
	"epoch": 1.1497326203208555,
	"grad_norm": 1.2769076824188232,
	"learning_rate": 9.134016297199506e-05,
	"loss": 0.1923,
	"step": 4085
	},
	{
	"epoch": 1.1511398817900367,
	"grad_norm": 0.6759532690048218,
	"learning_rate": 9.109548327102424e-05,
	"loss": 0.1818,
	"step": 4090
	},
	{
	"epoch": 1.1525471432592176,
	"grad_norm": 1.7534480094909668,
	"learning_rate": 9.085085729410928e-05,
	"loss": 0.2677,
	"step": 4095
	},
	{
	"epoch": 1.1539544047283985,
	"grad_norm": 1.578730583190918,
	"learning_rate": 9.060628651716409e-05,
	"loss": 0.3868,
	"step": 4100
	},
	{
	"epoch": 1.1553616661975794,
	"grad_norm": 1.5693743228912354,
	"learning_rate": 9.036177241576949e-05,
	"loss": 0.4238,
	"step": 4105
	},
	{
	"epoch": 1.1567689276667605,
	"grad_norm": 0.7190649509429932,
	"learning_rate": 9.011731646516429e-05,
	"loss": 0.2943,
	"step": 4110
	},
	{
	"epoch": 1.1581761891359414,
	"grad_norm": 1.3021358251571655,
	"learning_rate": 8.987292014023658e-05,
	"loss": 0.282,
	"step": 4115
	},
	{
	"epoch": 1.1595834506051224,
	"grad_norm": 0.7299554944038391,
	"learning_rate": 8.962858491551467e-05,
	"loss": 0.2086,
	"step": 4120
	},
	{
	"epoch": 1.1609907120743035,
	"grad_norm": 0.8138667345046997,
	"learning_rate": 8.938431226515813e-05,
	"loss": 0.3847,
	"step": 4125
	},
	{
	"epoch": 1.1623979735434844,
	"grad_norm": 1.6948626041412354,
	"learning_rate": 8.914010366294917e-05,
	"loss": 0.2519,
	"step": 4130
	},
	{
	"epoch": 1.1638052350126653,
	"grad_norm": 0.4518921971321106,
	"learning_rate": 8.889596058228339e-05,
	"loss": 0.1481,
	"step": 4135
	},
	{
	"epoch": 1.1652124964818462,
	"grad_norm": 0.9538673162460327,
	"learning_rate": 8.865188449616124e-05,
	"loss": 0.2342,
	"step": 4140
	},
	{
	"epoch": 1.1666197579510273,
	"grad_norm": 1.5478556156158447,
	"learning_rate": 8.84078768771789e-05,
	"loss": 0.2741,
	"step": 4145
	},
	{
	"epoch": 1.1680270194202083,
	"grad_norm": 0.8891351222991943,
	"learning_rate": 8.816393919751937e-05,
	"loss": 0.2279,
	"step": 4150
	},
	{
	"epoch": 1.1694342808893892,
	"grad_norm": 1.0661555528640747,
	"learning_rate": 8.792007292894387e-05,
	"loss": 0.2588,
	"step": 4155
	},
	{
	"epoch": 1.1708415423585703,
	"grad_norm": 1.0529447793960571,
	"learning_rate": 8.767627954278267e-05,
	"loss": 0.3593,
	"step": 4160
	},
	{
	"epoch": 1.1722488038277512,
	"grad_norm": 1.0678569078445435,
	"learning_rate": 8.743256050992623e-05,
	"loss": 0.1596,
	"step": 4165
	},
	{
	"epoch": 1.1736560652969321,
	"grad_norm": 0.7005488276481628,
	"learning_rate": 8.71889173008166e-05,
	"loss": 0.2517,
	"step": 4170
	},
	{
	"epoch": 1.175063326766113,
	"grad_norm": 0.4683868885040283,
	"learning_rate": 8.69453513854382e-05,
	"loss": 0.1622,
	"step": 4175
	},
	{
	"epoch": 1.1764705882352942,
	"grad_norm": 0.8689951300621033,
	"learning_rate": 8.67018642333092e-05,
	"loss": 0.1776,
	"step": 4180
	},
	{
	"epoch": 1.177877849704475,
	"grad_norm": 0.7526000738143921,
	"learning_rate": 8.645845731347248e-05,
	"loss": 0.1588,
	"step": 4185
	},
	{
	"epoch": 1.179285111173656,
	"grad_norm": 1.2025400400161743,
	"learning_rate": 8.621513209448701e-05,
	"loss": 0.197,
	"step": 4190
	},
	{
	"epoch": 1.180692372642837,
	"grad_norm": 1.2456661462783813,
	"learning_rate": 8.597189004441863e-05,
	"loss": 0.2185,
	"step": 4195
	},
	{
	"epoch": 1.182099634112018,
	"grad_norm": 0.26599639654159546,
	"learning_rate": 8.572873263083152e-05,
	"loss": 0.1736,
	"step": 4200
	},
	{
	"epoch": 1.183506895581199,
	"grad_norm": 0.6946321725845337,
	"learning_rate": 8.548566132077916e-05,
	"loss": 0.2439,
	"step": 4205
	},
	{
	"epoch": 1.18491415705038,
	"grad_norm": 0.8973987102508545,
	"learning_rate": 8.524267758079557e-05,
	"loss": 0.2171,
	"step": 4210
	},
	{
	"epoch": 1.186321418519561,
	"grad_norm": 0.653135359287262,
	"learning_rate": 8.499978287688648e-05,
	"loss": 0.1822,
	"step": 4215
	},
	{
	"epoch": 1.1877286799887419,
	"grad_norm": 1.1294854879379272,
	"learning_rate": 8.475697867452028e-05,
	"loss": 0.3998,
	"step": 4220
	},
	{
	"epoch": 1.189135941457923,
	"grad_norm": 0.7260348200798035,
	"learning_rate": 8.451426643861946e-05,
	"loss": 0.3177,
	"step": 4225
	},
	{
	"epoch": 1.190543202927104,
	"grad_norm": 0.9421544075012207,
	"learning_rate": 8.427164763355169e-05,
	"loss": 0.3644,
	"step": 4230
	},
	{
	"epoch": 1.1919504643962848,
	"grad_norm": 1.8454887866973877,
	"learning_rate": 8.402912372312076e-05,
	"loss": 0.2601,
	"step": 4235
	},
	{
	"epoch": 1.1933577258654657,
	"grad_norm": 0.7556844353675842,
	"learning_rate": 8.378669617055806e-05,
	"loss": 0.1539,
	"step": 4240
	},
	{
	"epoch": 1.1947649873346469,
	"grad_norm": 1.1138182878494263,
	"learning_rate": 8.354436643851365e-05,
	"loss": 0.2221,
	"step": 4245
	},
	{
	"epoch": 1.1961722488038278,
	"grad_norm": 1.7039527893066406,
	"learning_rate": 8.330213598904726e-05,
	"loss": 0.3543,
	"step": 4250
	},
	{
	"epoch": 1.1975795102730087,
	"grad_norm": 1.6566787958145142,
	"learning_rate": 8.306000628361972e-05,
	"loss": 0.1975,
	"step": 4255
	},
	{
	"epoch": 1.1989867717421898,
	"grad_norm": 1.0765029191970825,
	"learning_rate": 8.281797878308406e-05,
	"loss": 0.1358,
	"step": 4260
	},
	{
	"epoch": 1.2003940332113707,
	"grad_norm": 0.7748456001281738,
	"learning_rate": 8.257605494767654e-05,
	"loss": 0.1821,
	"step": 4265
	},
	{
	"epoch": 1.2018012946805516,
	"grad_norm": 0.32174113392829895,
	"learning_rate": 8.233423623700816e-05,
	"loss": 0.1391,
	"step": 4270
	},
	{
	"epoch": 1.2032085561497325,
	"grad_norm": 0.5359024405479431,
	"learning_rate": 8.209252411005548e-05,
	"loss": 0.1476,
	"step": 4275
	},
	{
	"epoch": 1.2046158176189137,
	"grad_norm": 0.9815373420715332,
	"learning_rate": 8.185092002515209e-05,
	"loss": 0.3173,
	"step": 4280
	},
	{
	"epoch": 1.2060230790880946,
	"grad_norm": 0.6186626553535461,
	"learning_rate": 8.16094254399798e-05,
	"loss": 0.3268,
	"step": 4285
	},
	{
	"epoch": 1.2074303405572755,
	"grad_norm": 1.598221778869629,
	"learning_rate": 8.136804181155961e-05,
	"loss": 0.2788,
	"step": 4290
	},
	{
	"epoch": 1.2088376020264566,
	"grad_norm": 0.409020334482193,
	"learning_rate": 8.112677059624316e-05,
	"loss": 0.2455,
	"step": 4295
	},
	{
	"epoch": 1.2102448634956375,
	"grad_norm": 1.0623451471328735,
	"learning_rate": 8.088561324970396e-05,
	"loss": 0.2883,
	"step": 4300
	},
	{
	"epoch": 1.2116521249648184,
	"grad_norm": 0.9107158780097961,
	"learning_rate": 8.064457122692828e-05,
	"loss": 0.191,
	"step": 4305
	},
	{
	"epoch": 1.2130593864339994,
	"grad_norm": 1.021278738975525,
	"learning_rate": 8.040364598220682e-05,
	"loss": 0.2287,
	"step": 4310
	},
	{
	"epoch": 1.2144666479031805,
	"grad_norm": 1.0348402261734009,
	"learning_rate": 8.016283896912563e-05,
	"loss": 0.1455,
	"step": 4315
	},
	{
	"epoch": 1.2158739093723614,
	"grad_norm": 1.06684410572052,
	"learning_rate": 7.992215164055737e-05,
	"loss": 0.1786,
	"step": 4320
	},
	{
	"epoch": 1.2172811708415423,
	"grad_norm": 0.45586028695106506,
	"learning_rate": 7.968158544865272e-05,
	"loss": 0.2625,
	"step": 4325
	},
	{
	"epoch": 1.2186884323107234,
	"grad_norm": 1.0333331823349,
	"learning_rate": 7.944114184483144e-05,
	"loss": 0.1766,
	"step": 4330
	},
	{
	"epoch": 1.2200956937799043,
	"grad_norm": 1.477582335472107,
	"learning_rate": 7.920082227977361e-05,
	"loss": 0.2547,
	"step": 4335
	},
	{
	"epoch": 1.2215029552490853,
	"grad_norm": 0.732683539390564,
	"learning_rate": 7.89606282034111e-05,
	"loss": 0.1894,
	"step": 4340
	},
	{
	"epoch": 1.2229102167182662,
	"grad_norm": 1.199336290359497,
	"learning_rate": 7.872056106491846e-05,
	"loss": 0.3359,
	"step": 4345
	},
	{
	"epoch": 1.2243174781874473,
	"grad_norm": 2.6119384765625,
	"learning_rate": 7.848062231270458e-05,
	"loss": 0.3301,
	"step": 4350
	},
	{
	"epoch": 1.2257247396566282,
	"grad_norm": 1.0260940790176392,
	"learning_rate": 7.824081339440364e-05,
	"loss": 0.1735,
	"step": 4355
	},
	{
	"epoch": 1.2271320011258091,
	"grad_norm": 0.7368533611297607,
	"learning_rate": 7.800113575686643e-05,
	"loss": 0.1741,
	"step": 4360
	},
	{
	"epoch": 1.2285392625949902,
	"grad_norm": 0.8837445378303528,
	"learning_rate": 7.776159084615183e-05,
	"loss": 0.2789,
	"step": 4365
	},
	{
	"epoch": 1.2299465240641712,
	"grad_norm": 1.0234431028366089,
	"learning_rate": 7.752218010751786e-05,
	"loss": 0.1811,
	"step": 4370
	},
	{
	"epoch": 1.231353785533352,
	"grad_norm": 1.1849218606948853,
	"learning_rate": 7.728290498541297e-05,
	"loss": 0.2951,
	"step": 4375
	},
	{
	"epoch": 1.232761047002533,
	"grad_norm": 1.1420046091079712,
	"learning_rate": 7.704376692346748e-05,
	"loss": 0.2964,
	"step": 4380
	},
	{
	"epoch": 1.234168308471714,
	"grad_norm": 0.44826436042785645,
	"learning_rate": 7.680476736448477e-05,
	"loss": 0.165,
	"step": 4385
	},
	{
	"epoch": 1.235575569940895,
	"grad_norm": 0.6397153735160828,
	"learning_rate": 7.656590775043249e-05,
	"loss": 0.138,
	"step": 4390
	},
	{
	"epoch": 1.236982831410076,
	"grad_norm": 1.1096476316452026,
	"learning_rate": 7.632718952243404e-05,
	"loss": 0.2673,
	"step": 4395
	},
	{
	"epoch": 1.238390092879257,
	"grad_norm": 0.7769279479980469,
	"learning_rate": 7.608861412075987e-05,
	"loss": 0.1631,
	"step": 4400
	},
	{
	"epoch": 1.239797354348438,
	"grad_norm": 0.8061667084693909,
	"learning_rate": 7.585018298481849e-05,
	"loss": 0.1851,
	"step": 4405
	},
	{
	"epoch": 1.2412046158176189,
	"grad_norm": 1.618454098701477,
	"learning_rate": 7.561189755314817e-05,
	"loss": 0.2377,
	"step": 4410
	},
	{
	"epoch": 1.2426118772867998,
	"grad_norm": 1.1752551794052124,
	"learning_rate": 7.537375926340802e-05,
	"loss": 0.1806,
	"step": 4415
	},
	{
	"epoch": 1.244019138755981,
	"grad_norm": 0.29463231563568115,
	"learning_rate": 7.513576955236944e-05,
	"loss": 0.1611,
	"step": 4420
	},
	{
	"epoch": 1.2454264002251618,
	"grad_norm": 0.7407804131507874,
	"learning_rate": 7.489792985590743e-05,
	"loss": 0.3176,
	"step": 4425
	},
	{
	"epoch": 1.2468336616943427,
	"grad_norm": 0.8456223011016846,
	"learning_rate": 7.466024160899173e-05,
	"loss": 0.2742,
	"step": 4430
	},
	{
	"epoch": 1.2482409231635239,
	"grad_norm": 1.3502225875854492,
	"learning_rate": 7.442270624567856e-05,
	"loss": 0.2477,
	"step": 4435
	},
	{
	"epoch": 1.2496481846327048,
	"grad_norm": 1.0241039991378784,
	"learning_rate": 7.418532519910162e-05,
	"loss": 0.2415,
	"step": 4440
	},
	{
	"epoch": 1.2510554461018857,
	"grad_norm": 0.570637047290802,
	"learning_rate": 7.394809990146356e-05,
	"loss": 0.2094,
	"step": 4445
	},
	{
	"epoch": 1.2524627075710666,
	"grad_norm": 0.4012211859226227,
	"learning_rate": 7.371103178402731e-05,
	"loss": 0.2591,
	"step": 4450
	},
	{
	"epoch": 1.2538699690402477,
	"grad_norm": 1.1546359062194824,
	"learning_rate": 7.347412227710766e-05,
	"loss": 0.2837,
	"step": 4455
	},
	{
	"epoch": 1.2552772305094286,
	"grad_norm": 0.8672778606414795,
	"learning_rate": 7.32373728100622e-05,
	"loss": 0.298,
	"step": 4460
	},
	{
	"epoch": 1.2566844919786098,
	"grad_norm": 0.4911658465862274,
	"learning_rate": 7.300078481128306e-05,
	"loss": 0.1921,
	"step": 4465
	},
	{
	"epoch": 1.2580917534477907,
	"grad_norm": 1.1717147827148438,
	"learning_rate": 7.276435970818824e-05,
	"loss": 0.1687,
	"step": 4470
	},
	{
	"epoch": 1.2594990149169716,
	"grad_norm": 0.5286734104156494,
	"learning_rate": 7.252809892721282e-05,
	"loss": 0.2104,
	"step": 4475
	},
	{
	"epoch": 1.2609062763861525,
	"grad_norm": 2.43472957611084,
	"learning_rate": 7.229200389380056e-05,
	"loss": 0.2763,
	"step": 4480
	},
	{
	"epoch": 1.2623135378553334,
	"grad_norm": 0.9692918062210083,
	"learning_rate": 7.205607603239508e-05,
	"loss": 0.1913,
	"step": 4485
	},
	{
	"epoch": 1.2637207993245145,
	"grad_norm": 0.8969650268554688,
	"learning_rate": 7.182031676643153e-05,
	"loss": 0.4249,
	"step": 4490
	},
	{
	"epoch": 1.2651280607936954,
	"grad_norm": 0.7135694026947021,
	"learning_rate": 7.158472751832783e-05,
	"loss": 0.1957,
	"step": 4495
	},
	{
	"epoch": 1.2665353222628766,
	"grad_norm": 2.911539077758789,
	"learning_rate": 7.134930970947607e-05,
	"loss": 0.3644,
	"step": 4500
	},
	{
	"epoch": 1.2679425837320575,
	"grad_norm": 1.8338284492492676,
	"learning_rate": 7.111406476023398e-05,
	"loss": 0.2941,
	"step": 4505
	},
	{
	"epoch": 1.2693498452012384,
	"grad_norm": 0.736365020275116,
	"learning_rate": 7.087899408991651e-05,
	"loss": 0.2541,
	"step": 4510
	},
	{
	"epoch": 1.2707571066704193,
	"grad_norm": 1.269327163696289,
	"learning_rate": 7.06440991167869e-05,
	"loss": 0.2847,
	"step": 4515
	},
	{
	"epoch": 1.2721643681396002,
	"grad_norm": 0.6774185299873352,
	"learning_rate": 7.040938125804858e-05,
	"loss": 0.2047,
	"step": 4520
	},
	{
	"epoch": 1.2735716296087813,
	"grad_norm": 1.0028345584869385,
	"learning_rate": 7.017484192983623e-05,
	"loss": 0.2327,
	"step": 4525
	},
	{
	"epoch": 1.2749788910779623,
	"grad_norm": 0.9345621466636658,
	"learning_rate": 6.99404825472074e-05,
	"loss": 0.2574,
	"step": 4530
	},
	{
	"epoch": 1.2763861525471434,
	"grad_norm": 1.2837140560150146,
	"learning_rate": 6.970630452413407e-05,
	"loss": 0.298,
	"step": 4535
	},
	{
	"epoch": 1.2777934140163243,
	"grad_norm": 0.5337740182876587,
	"learning_rate": 6.947230927349396e-05,
	"loss": 0.1538,
	"step": 4540
	},
	{
	"epoch": 1.2792006754855052,
	"grad_norm": 0.5805062651634216,
	"learning_rate": 6.923849820706194e-05,
	"loss": 0.1483,
	"step": 4545
	},
	{
	"epoch": 1.280607936954686,
	"grad_norm": 0.8201838135719299,
	"learning_rate": 6.900487273550187e-05,
	"loss": 0.163,
	"step": 4550
	},
	{
	"epoch": 1.282015198423867,
	"grad_norm": 0.5184070467948914,
	"learning_rate": 6.877143426835764e-05,
	"loss": 0.2611,
	"step": 4555
	},
	{
	"epoch": 1.2834224598930482,
	"grad_norm": 1.0877232551574707,
	"learning_rate": 6.853818421404496e-05,
	"loss": 0.3085,
	"step": 4560
	},
	{
	"epoch": 1.284829721362229,
	"grad_norm": 1.616977572441101,
	"learning_rate": 6.830512397984288e-05,
	"loss": 0.3108,
	"step": 4565
	},
	{
	"epoch": 1.2862369828314102,
	"grad_norm": 0.6340872049331665,
	"learning_rate": 6.807225497188496e-05,
	"loss": 0.177,
	"step": 4570
	},
	{
	"epoch": 1.287644244300591,
	"grad_norm": 0.8518214821815491,
	"learning_rate": 6.783957859515127e-05,
	"loss": 0.1805,
	"step": 4575
	},
	{
	"epoch": 1.289051505769772,
	"grad_norm": 1.280093789100647,
	"learning_rate": 6.760709625345953e-05,
	"loss": 0.2854,
	"step": 4580
	},
	{
	"epoch": 1.290458767238953,
	"grad_norm": 0.7486845850944519,
	"learning_rate": 6.737480934945677e-05,
	"loss": 0.1399,
	"step": 4585
	},
	{
	"epoch": 1.291866028708134,
	"grad_norm": 1.3590744733810425,
	"learning_rate": 6.714271928461097e-05,
	"loss": 0.1735,
	"step": 4590
	},
	{
	"epoch": 1.293273290177315,
	"grad_norm": 0.6231881380081177,
	"learning_rate": 6.691082745920247e-05,
	"loss": 0.2083,
	"step": 4595
	},
	{
	"epoch": 1.2946805516464959,
	"grad_norm": 1.0750889778137207,
	"learning_rate": 6.667913527231549e-05,
	"loss": 0.2304,
	"step": 4600
	},
	{
	"epoch": 1.296087813115677,
	"grad_norm": 1.3983303308486938,
	"learning_rate": 6.644764412182986e-05,
	"loss": 0.3285,
	"step": 4605
	},
	{
	"epoch": 1.297495074584858,
	"grad_norm": 0.5835619568824768,
	"learning_rate": 6.621635540441249e-05,
	"loss": 0.2651,
	"step": 4610
	},
	{
	"epoch": 1.2989023360540388,
	"grad_norm": 0.7869633436203003,
	"learning_rate": 6.598527051550882e-05,
	"loss": 0.2144,
	"step": 4615
	},
	{
	"epoch": 1.3003095975232197,
	"grad_norm": 0.4034360945224762,
	"learning_rate": 6.575439084933468e-05,
	"loss": 0.1919,
	"step": 4620
	},
	{
	"epoch": 1.3017168589924009,
	"grad_norm": 1.0225868225097656,
	"learning_rate": 6.552371779886756e-05,
	"loss": 0.2942,
	"step": 4625
	},
	{
	"epoch": 1.3031241204615818,
	"grad_norm": 1.8515701293945312,
	"learning_rate": 6.52932527558385e-05,
	"loss": 0.2579,
	"step": 4630
	},
	{
	"epoch": 1.3045313819307627,
	"grad_norm": 1.13215172290802,
	"learning_rate": 6.506299711072353e-05,
	"loss": 0.189,
	"step": 4635
	},
	{
	"epoch": 1.3059386433999438,
	"grad_norm": 1.1587252616882324,
	"learning_rate": 6.483295225273521e-05,
	"loss": 0.2055,
	"step": 4640
	},
	{
	"epoch": 1.3073459048691247,
	"grad_norm": 1.6920759677886963,
	"learning_rate": 6.460311956981444e-05,
	"loss": 0.3108,
	"step": 4645
	},
	{
	"epoch": 1.3087531663383056,
	"grad_norm": 0.5736072659492493,
	"learning_rate": 6.437350044862207e-05,
	"loss": 0.2675,
	"step": 4650
	},
	{
	"epoch": 1.3101604278074865,
	"grad_norm": 0.9719104170799255,
	"learning_rate": 6.414409627453025e-05,
	"loss": 0.1933,
	"step": 4655
	},
	{
	"epoch": 1.3115676892766677,
	"grad_norm": 0.8271322250366211,
	"learning_rate": 6.391490843161442e-05,
	"loss": 0.0908,
	"step": 4660
	},
	{
	"epoch": 1.3129749507458486,
	"grad_norm": 1.2622920274734497,
	"learning_rate": 6.368593830264485e-05,
	"loss": 0.1837,
	"step": 4665
	},
	{
	"epoch": 1.3143822122150295,
	"grad_norm": 1.0141448974609375,
	"learning_rate": 6.345718726907815e-05,
	"loss": 0.1396,
	"step": 4670
	},
	{
	"epoch": 1.3157894736842106,
	"grad_norm": 0.5923504829406738,
	"learning_rate": 6.322865671104909e-05,
	"loss": 0.1631,
	"step": 4675
	},
	{
	"epoch": 1.3171967351533915,
	"grad_norm": 1.8866256475448608,
	"learning_rate": 6.300034800736233e-05,
	"loss": 0.1407,
	"step": 4680
	},
	{
	"epoch": 1.3186039966225724,
	"grad_norm": 0.8495520353317261,
	"learning_rate": 6.277226253548385e-05,
	"loss": 0.2345,
	"step": 4685
	},
	{
	"epoch": 1.3200112580917533,
	"grad_norm": 0.8851481080055237,
	"learning_rate": 6.254440167153295e-05,
	"loss": 0.2431,
	"step": 4690
	},
	{
	"epoch": 1.3214185195609345,
	"grad_norm": 0.5228270292282104,
	"learning_rate": 6.231676679027364e-05,
	"loss": 0.1606,
	"step": 4695
	},
	{
	"epoch": 1.3228257810301154,
	"grad_norm": 1.2752258777618408,
	"learning_rate": 6.208935926510659e-05,
	"loss": 0.2588,
	"step": 4700
	},
	{
	"epoch": 1.3242330424992963,
	"grad_norm": 1.6664029359817505,
	"learning_rate": 6.186218046806078e-05,
	"loss": 0.2418,
	"step": 4705
	},
	{
	"epoch": 1.3256403039684774,
	"grad_norm": 0.7116133570671082,
	"learning_rate": 6.16352317697851e-05,
	"loss": 0.1839,
	"step": 4710
	},
	{
	"epoch": 1.3270475654376583,
	"grad_norm": 1.6506725549697876,
	"learning_rate": 6.140851453954021e-05,
	"loss": 0.2076,
	"step": 4715
	},
	{
	"epoch": 1.3284548269068392,
	"grad_norm": 1.0681225061416626,
	"learning_rate": 6.118203014519034e-05,
	"loss": 0.2491,
	"step": 4720
	},
	{
	"epoch": 1.3298620883760202,
	"grad_norm": 0.969599723815918,
	"learning_rate": 6.095577995319476e-05,
	"loss": 0.273,
	"step": 4725
	},
	{
	"epoch": 1.3312693498452013,
	"grad_norm": 1.4593223333358765,
	"learning_rate": 6.072976532859982e-05,
	"loss": 0.358,
	"step": 4730
	},
	{
	"epoch": 1.3326766113143822,
	"grad_norm": 0.29552891850471497,
	"learning_rate": 6.0503987635030656e-05,
	"loss": 0.2655,
	"step": 4735
	},
	{
	"epoch": 1.334083872783563,
	"grad_norm": 2.189373731613159,
	"learning_rate": 6.0278448234682784e-05,
	"loss": 0.2624,
	"step": 4740
	},
	{
	"epoch": 1.3354911342527442,
	"grad_norm": 0.28230440616607666,
	"learning_rate": 6.005314848831415e-05,
	"loss": 0.1886,
	"step": 4745
	},
	{
	"epoch": 1.3368983957219251,
	"grad_norm": 0.5569413304328918,
	"learning_rate": 5.9828089755236714e-05,
	"loss": 0.231,
	"step": 4750
	},
	{
	"epoch": 1.338305657191106,
	"grad_norm": 0.8192738890647888,
	"learning_rate": 5.960327339330828e-05,
	"loss": 0.23,
	"step": 4755
	},
	{
	"epoch": 1.339712918660287,
	"grad_norm": 1.0859158039093018,
	"learning_rate": 5.9378700758924466e-05,
	"loss": 0.3275,
	"step": 4760
	},
	{
	"epoch": 1.341120180129468,
	"grad_norm": 0.8077869415283203,
	"learning_rate": 5.915437320701025e-05,
	"loss": 0.0847,
	"step": 4765
	},
	{
	"epoch": 1.342527441598649,
	"grad_norm": 1.8826837539672852,
	"learning_rate": 5.8930292091012015e-05,
	"loss": 0.2158,
	"step": 4770
	},
	{
	"epoch": 1.3439347030678301,
	"grad_norm": 0.6470653414726257,
	"learning_rate": 5.870645876288938e-05,
	"loss": 0.3325,
	"step": 4775
	},
	{
	"epoch": 1.345341964537011,
	"grad_norm": 0.7090429067611694,
	"learning_rate": 5.848287457310681e-05,
	"loss": 0.2083,
	"step": 4780
	},
	{
	"epoch": 1.346749226006192,
	"grad_norm": 0.1886598914861679,
	"learning_rate": 5.825954087062579e-05,
	"loss": 0.2118,
	"step": 4785
	},
	{
	"epoch": 1.3481564874753729,
	"grad_norm": 0.5092473030090332,
	"learning_rate": 5.8036459002896473e-05,
	"loss": 0.253,
	"step": 4790
	},
	{
	"epoch": 1.3495637489445538,
	"grad_norm": 0.9652419686317444,
	"learning_rate": 5.78136303158495e-05,
	"loss": 0.1499,
	"step": 4795
	},
	{
	"epoch": 1.350971010413735,
	"grad_norm": 0.6111290454864502,
	"learning_rate": 5.759105615388814e-05,
	"loss": 0.1805,
	"step": 4800
	},
	{
	"epoch": 1.3523782718829158,
	"grad_norm": 2.2469632625579834,
	"learning_rate": 5.736873785987997e-05,
	"loss": 0.3536,
	"step": 4805
	},
	{
	"epoch": 1.353785533352097,
	"grad_norm": 0.9734948873519897,
	"learning_rate": 5.714667677514882e-05,
	"loss": 0.2784,
	"step": 4810
	},
	{
	"epoch": 1.3551927948212779,
	"grad_norm": 1.076882243156433,
	"learning_rate": 5.692487423946662e-05,
	"loss": 0.1953,
	"step": 4815
	},
	{
	"epoch": 1.3566000562904588,
	"grad_norm": 0.7746699452400208,
	"learning_rate": 5.6703331591045524e-05,
	"loss": 0.2175,
	"step": 4820
	},
	{
	"epoch": 1.3580073177596397,
	"grad_norm": 0.7650654315948486,
	"learning_rate": 5.6482050166529546e-05,
	"loss": 0.1676,
	"step": 4825
	},
	{
	"epoch": 1.3594145792288206,
	"grad_norm": 0.6610764861106873,
	"learning_rate": 5.62610313009868e-05,
	"loss": 0.1721,
	"step": 4830
	},
	{
	"epoch": 1.3608218406980017,
	"grad_norm": 0.8137916326522827,
	"learning_rate": 5.604027632790112e-05,
	"loss": 0.1374,
	"step": 4835
	},
	{
	"epoch": 1.3622291021671826,
	"grad_norm": 0.6320801377296448,
	"learning_rate": 5.581978657916431e-05,
	"loss": 0.209,
	"step": 4840
	},
	{
	"epoch": 1.3636363636363638,
	"grad_norm": 1.4471935033798218,
	"learning_rate": 5.5599563385067996e-05,
	"loss": 0.1163,
	"step": 4845
	},
	{
	"epoch": 1.3650436251055447,
	"grad_norm": 0.9794873595237732,
	"learning_rate": 5.537960807429547e-05,
	"loss": 0.2077,
	"step": 4850
	},
	{
	"epoch": 1.3664508865747256,
	"grad_norm": 1.3119271993637085,
	"learning_rate": 5.5159921973913866e-05,
	"loss": 0.2667,
	"step": 4855
	},
	{
	"epoch": 1.3678581480439065,
	"grad_norm": 1.156152367591858,
	"learning_rate": 5.49405064093661e-05,
	"loss": 0.1734,
	"step": 4860
	},
	{
	"epoch": 1.3692654095130874,
	"grad_norm": 0.06259223818778992,
	"learning_rate": 5.472136270446275e-05,
	"loss": 0.2067,
	"step": 4865
	},
	{
	"epoch": 1.3706726709822685,
	"grad_norm": 0.6296875476837158,
	"learning_rate": 5.4502492181374284e-05,
	"loss": 0.229,
	"step": 4870
	},
	{
	"epoch": 1.3720799324514494,
	"grad_norm": 1.3139517307281494,
	"learning_rate": 5.428389616062298e-05,
	"loss": 0.286,
	"step": 4875
	},
	{
	"epoch": 1.3734871939206306,
	"grad_norm": 0.5777654051780701,
	"learning_rate": 5.40655759610748e-05,
	"loss": 0.2024,
	"step": 4880
	},
	{
	"epoch": 1.3748944553898115,
	"grad_norm": 0.5422516465187073,
	"learning_rate": 5.384753289993173e-05,
	"loss": 0.2453,
	"step": 4885
	},
	{
	"epoch": 1.3763017168589924,
	"grad_norm": 1.2088871002197266,
	"learning_rate": 5.3629768292723614e-05,
	"loss": 0.1644,
	"step": 4890
	},
	{
	"epoch": 1.3777089783281733,
	"grad_norm": 0.6206454634666443,
	"learning_rate": 5.341228345330025e-05,
	"loss": 0.3293,
	"step": 4895
	},
	{
	"epoch": 1.3791162397973544,
	"grad_norm": 1.0353143215179443,
	"learning_rate": 5.3195079693823624e-05,
	"loss": 0.2197,
	"step": 4900
	},
	{
	"epoch": 1.3805235012665353,
	"grad_norm": 1.076452612876892,
	"learning_rate": 5.297815832475971e-05,
	"loss": 0.1435,
	"step": 4905
	},
	{
	"epoch": 1.3819307627357162,
	"grad_norm": 0.7797285914421082,
	"learning_rate": 5.2761520654870846e-05,
	"loss": 0.1499,
	"step": 4910
	},
	{
	"epoch": 1.3833380242048974,
	"grad_norm": 3.2293171882629395,
	"learning_rate": 5.25451679912077e-05,
	"loss": 0.4037,
	"step": 4915
	},
	{
	"epoch": 1.3847452856740783,
	"grad_norm": 0.7513951659202576,
	"learning_rate": 5.232910163910132e-05,
	"loss": 0.136,
	"step": 4920
	},
	{
	"epoch": 1.3861525471432592,
	"grad_norm": 0.43260759115219116,
	"learning_rate": 5.211332290215543e-05,
	"loss": 0.2419,
	"step": 4925
	},
	{
	"epoch": 1.38755980861244,
	"grad_norm": 0.7441173791885376,
	"learning_rate": 5.189783308223841e-05,
	"loss": 0.1678,
	"step": 4930
	},
	{
	"epoch": 1.3889670700816212,
	"grad_norm": 0.4429182708263397,
	"learning_rate": 5.1682633479475484e-05,
	"loss": 0.1767,
	"step": 4935
	},
	{
	"epoch": 1.3903743315508021,
	"grad_norm": 1.6440355777740479,
	"learning_rate": 5.146772539224094e-05,
	"loss": 0.2831,
	"step": 4940
	},
	{
	"epoch": 1.391781593019983,
	"grad_norm": 1.1421854496002197,
	"learning_rate": 5.1253110117150314e-05,
	"loss": 0.157,
	"step": 4945
	},
	{
	"epoch": 1.3931888544891642,
	"grad_norm": 1.013460397720337,
	"learning_rate": 5.1038788949052344e-05,
	"loss": 0.3537,
	"step": 4950
	},
	{
	"epoch": 1.394596115958345,
	"grad_norm": 1.2984402179718018,
	"learning_rate": 5.082476318102144e-05,
	"loss": 0.2869,
	"step": 4955
	},
	{
	"epoch": 1.396003377427526,
	"grad_norm": 0.8296849727630615,
	"learning_rate": 5.061103410434978e-05,
	"loss": 0.2029,
	"step": 4960
	},
	{
	"epoch": 1.397410638896707,
	"grad_norm": 1.1972373723983765,
	"learning_rate": 5.0397603008539374e-05,
	"loss": 0.182,
	"step": 4965
	},
	{
	"epoch": 1.398817900365888,
	"grad_norm": 1.5300724506378174,
	"learning_rate": 5.0184471181294515e-05,
	"loss": 0.1537,
	"step": 4970
	},
	{
	"epoch": 1.400225161835069,
	"grad_norm": 0.9540086984634399,
	"learning_rate": 4.997163990851381e-05,
	"loss": 0.1679,
	"step": 4975
	},
	{
	"epoch": 1.4016324233042499,
	"grad_norm": 0.15063901245594025,
	"learning_rate": 4.975911047428263e-05,
	"loss": 0.1512,
	"step": 4980
	},
	{
	"epoch": 1.403039684773431,
	"grad_norm": 1.925596833229065,
	"learning_rate": 4.954688416086524e-05,
	"loss": 0.2077,
	"step": 4985
	},
	{
	"epoch": 1.404446946242612,
	"grad_norm": 1.4239457845687866,
	"learning_rate": 4.9334962248696934e-05,
	"loss": 0.2464,
	"step": 4990
	},
	{
	"epoch": 1.4058542077117928,
	"grad_norm": 0.3618084490299225,
	"learning_rate": 4.912334601637658e-05,
	"loss": 0.1579,
	"step": 4995
	},
	{
	"epoch": 1.4072614691809737,
	"grad_norm": 0.8101370334625244,
	"learning_rate": 4.8912036740658776e-05,
	"loss": 0.2682,
	"step": 5000
	},
	{
	"epoch": 1.4086687306501549,
	"grad_norm": 0.7149579524993896,
	"learning_rate": 4.8701035696446064e-05,
	"loss": 0.3497,
	"step": 5005
	},
	{
	"epoch": 1.4100759921193358,
	"grad_norm": 1.0598907470703125,
	"learning_rate": 4.849034415678131e-05,
	"loss": 0.2342,
	"step": 5010
	},
	{
	"epoch": 1.4114832535885167,
	"grad_norm": 1.2105034589767456,
	"learning_rate": 4.8279963392840156e-05,
	"loss": 0.2693,
	"step": 5015
	},
	{
	"epoch": 1.4128905150576978,
	"grad_norm": 0.6534488201141357,
	"learning_rate": 4.8069894673923064e-05,
	"loss": 0.2475,
	"step": 5020
	},
	{
	"epoch": 1.4142977765268787,
	"grad_norm": 1.4907587766647339,
	"learning_rate": 4.7860139267447956e-05,
	"loss": 0.2958,
	"step": 5025
	},
	{
	"epoch": 1.4157050379960596,
	"grad_norm": 1.1340523958206177,
	"learning_rate": 4.765069843894239e-05,
	"loss": 0.1087,
	"step": 5030
	},
	{
	"epoch": 1.4171122994652405,
	"grad_norm": 0.6139047145843506,
	"learning_rate": 4.744157345203588e-05,
	"loss": 0.1827,
	"step": 5035
	},
	{
	"epoch": 1.4185195609344217,
	"grad_norm": 1.5109590291976929,
	"learning_rate": 4.723276556845252e-05,
	"loss": 0.1851,
	"step": 5040
	},
	{
	"epoch": 1.4199268224036026,
	"grad_norm": 0.593103289604187,
	"learning_rate": 4.702427604800307e-05,
	"loss": 0.2019,
	"step": 5045
	},
	{
	"epoch": 1.4213340838727835,
	"grad_norm": 1.3064155578613281,
	"learning_rate": 4.681610614857749e-05,
	"loss": 0.1086,
	"step": 5050
	},
	{
	"epoch": 1.4227413453419646,
	"grad_norm": 1.4465229511260986,
	"learning_rate": 4.66082571261375e-05,
	"loss": 0.099,
	"step": 5055
	},
	{
	"epoch": 1.4241486068111455,
	"grad_norm": 1.0164941549301147,
	"learning_rate": 4.6400730234708676e-05,
	"loss": 0.2006,
	"step": 5060
	},
	{
	"epoch": 1.4255558682803264,
	"grad_norm": 1.600894808769226,
	"learning_rate": 4.61935267263732e-05,
	"loss": 0.2938,
	"step": 5065
	},
	{
	"epoch": 1.4269631297495073,
	"grad_norm": 0.8022120594978333,
	"learning_rate": 4.598664785126217e-05,
	"loss": 0.2981,
	"step": 5070
	},
	{
	"epoch": 1.4283703912186885,
	"grad_norm": 0.6564612984657288,
	"learning_rate": 4.578009485754791e-05,
	"loss": 0.1266,
	"step": 5075
	},
	{
	"epoch": 1.4297776526878694,
	"grad_norm": 0.7073236107826233,
	"learning_rate": 4.557386899143678e-05,
	"loss": 0.2229,
	"step": 5080
	},
	{
	"epoch": 1.4311849141570505,
	"grad_norm": 0.9632103443145752,
	"learning_rate": 4.536797149716133e-05,
	"loss": 0.1511,
	"step": 5085
	},
	{
	"epoch": 1.4325921756262314,
	"grad_norm": 1.1304622888565063,
	"learning_rate": 4.5162403616972945e-05,
	"loss": 0.2341,
	"step": 5090
	},
	{
	"epoch": 1.4339994370954123,
	"grad_norm": 1.135055422782898,
	"learning_rate": 4.4957166591134405e-05,
	"loss": 0.3898,
	"step": 5095
	},
	{
	"epoch": 1.4354066985645932,
	"grad_norm": 0.6786003112792969,
	"learning_rate": 4.475226165791231e-05,
	"loss": 0.2129,
	"step": 5100
	},
	{
	"epoch": 1.4368139600337742,
	"grad_norm": 1.3296654224395752,
	"learning_rate": 4.454769005356955e-05,
	"loss": 0.3128,
	"step": 5105
	},
	{
	"epoch": 1.4382212215029553,
	"grad_norm": 0.7507737278938293,
	"learning_rate": 4.434345301235802e-05,
	"loss": 0.1069,
	"step": 5110
	},
	{
	"epoch": 1.4396284829721362,
	"grad_norm": 1.4222168922424316,
	"learning_rate": 4.4139551766511e-05,
	"loss": 0.1529,
	"step": 5115
	},
	{
	"epoch": 1.4410357444413173,
	"grad_norm": 0.21092858910560608,
	"learning_rate": 4.39359875462359e-05,
	"loss": 0.2159,
	"step": 5120
	},
	{
	"epoch": 1.4424430059104982,
	"grad_norm": 1.0862993001937866,
	"learning_rate": 4.373276157970665e-05,
	"loss": 0.1262,
	"step": 5125
	},
	{
	"epoch": 1.4438502673796791,
	"grad_norm": 1.6479579210281372,
	"learning_rate": 4.352987509305635e-05,
	"loss": 0.2165,
	"step": 5130
	},
	{
	"epoch": 1.44525752884886,
	"grad_norm": 0.11600520461797714,
	"learning_rate": 4.3327329310370016e-05,
	"loss": 0.1696,
	"step": 5135
	},
	{
	"epoch": 1.446664790318041,
	"grad_norm": 0.9424710869789124,
	"learning_rate": 4.312512545367702e-05,
	"loss": 0.3328,
	"step": 5140
	},
	{
	"epoch": 1.448072051787222,
	"grad_norm": 0.6428975462913513,
	"learning_rate": 4.292326474294372e-05,
	"loss": 0.1069,
	"step": 5145
	},
	{
	"epoch": 1.449479313256403,
	"grad_norm": 0.8455730676651001,
	"learning_rate": 4.272174839606628e-05,
	"loss": 0.3006,
	"step": 5150
	},
	{
	"epoch": 1.4508865747255841,
	"grad_norm": 0.6467002034187317,
	"learning_rate": 4.252057762886305e-05,
	"loss": 0.1345,
	"step": 5155
	},
	{
	"epoch": 1.452293836194765,
	"grad_norm": 0.7402626276016235,
	"learning_rate": 4.2319753655067505e-05,
	"loss": 0.1928,
	"step": 5160
	},
	{
	"epoch": 1.453701097663946,
	"grad_norm": 1.142514705657959,
	"learning_rate": 4.211927768632068e-05,
	"loss": 0.3225,
	"step": 5165
	},
	{
	"epoch": 1.4551083591331269,
	"grad_norm": 0.9843090772628784,
	"learning_rate": 4.191915093216411e-05,
	"loss": 0.1223,
	"step": 5170
	},
	{
	"epoch": 1.4565156206023078,
	"grad_norm": 0.9305518865585327,
	"learning_rate": 4.171937460003223e-05,
	"loss": 0.1518,
	"step": 5175
	},
	{
	"epoch": 1.457922882071489,
	"grad_norm": 0.9245863556861877,
	"learning_rate": 4.1519949895245435e-05,
	"loss": 0.161,
	"step": 5180
	},
	{
	"epoch": 1.4593301435406698,
	"grad_norm": 0.5494176149368286,
	"learning_rate": 4.1320878021002466e-05,
	"loss": 0.1645,
	"step": 5185
	},
	{
	"epoch": 1.460737405009851,
	"grad_norm": 0.454455703496933,
	"learning_rate": 4.112216017837346e-05,
	"loss": 0.1784,
	"step": 5190
	},
	{
	"epoch": 1.4621446664790319,
	"grad_norm": 0.8797675967216492,
	"learning_rate": 4.092379756629244e-05,
	"loss": 0.1915,
	"step": 5195
	},
	{
	"epoch": 1.4635519279482128,
	"grad_norm": 0.5059092044830322,
	"learning_rate": 4.072579138155024e-05,
	"loss": 0.1533,
	"step": 5200
	},
	{
	"epoch": 1.4649591894173937,
	"grad_norm": 1.5164445638656616,
	"learning_rate": 4.052814281878725e-05,
	"loss": 0.3054,
	"step": 5205
	},
	{
	"epoch": 1.4663664508865748,
	"grad_norm": 0.8489431738853455,
	"learning_rate": 4.033085307048626e-05,
	"loss": 0.1573,
	"step": 5210
	},
	{
	"epoch": 1.4677737123557557,
	"grad_norm": 0.8418503999710083,
	"learning_rate": 4.0133923326965073e-05,
	"loss": 0.2269,
	"step": 5215
	},
	{
	"epoch": 1.4691809738249366,
	"grad_norm": 0.4309021830558777,
	"learning_rate": 3.9937354776369565e-05,
	"loss": 0.1621,
	"step": 5220
	},
	{
	"epoch": 1.4705882352941178,
	"grad_norm": 1.8004333972930908,
	"learning_rate": 3.974114860466641e-05,
	"loss": 0.1821,
	"step": 5225
	},
	{
	"epoch": 1.4719954967632987,
	"grad_norm": 0.5034974217414856,
	"learning_rate": 3.954530599563586e-05,
	"loss": 0.1586,
	"step": 5230
	},
	{
	"epoch": 1.4734027582324796,
	"grad_norm": 1.8636256456375122,
	"learning_rate": 3.934982813086466e-05,
	"loss": 0.1778,
	"step": 5235
	},
	{
	"epoch": 1.4748100197016605,
	"grad_norm": 0.7782198190689087,
	"learning_rate": 3.915471618973905e-05,
	"loss": 0.2362,
	"step": 5240
	},
	{
	"epoch": 1.4762172811708416,
	"grad_norm": 0.5170087218284607,
	"learning_rate": 3.895997134943735e-05,
	"loss": 0.1389,
	"step": 5245
	},
	{
	"epoch": 1.4776245426400225,
	"grad_norm": 0.6563436388969421,
	"learning_rate": 3.876559478492319e-05,
	"loss": 0.1972,
	"step": 5250
	},
	{
	"epoch": 1.4790318041092034,
	"grad_norm": 0.6524726748466492,
	"learning_rate": 3.857158766893814e-05,
	"loss": 0.2123,
	"step": 5255
	},
	{
	"epoch": 1.4804390655783846,
	"grad_norm": 0.8341132402420044,
	"learning_rate": 3.837795117199483e-05,
	"loss": 0.2374,
	"step": 5260
	},
	{
	"epoch": 1.4818463270475655,
	"grad_norm": 0.37632039189338684,
	"learning_rate": 3.818468646236984e-05,
	"loss": 0.114,
	"step": 5265
	},
	{
	"epoch": 1.4832535885167464,
	"grad_norm": 2.116046190261841,
	"learning_rate": 3.799179470609656e-05,
	"loss": 0.3048,
	"step": 5270
	},
	{
	"epoch": 1.4846608499859273,
	"grad_norm": 2.3138134479522705,
	"learning_rate": 3.7799277066958205e-05,
	"loss": 0.1414,
	"step": 5275
	},
	{
	"epoch": 1.4860681114551084,
	"grad_norm": 1.4033293724060059,
	"learning_rate": 3.760713470648093e-05,
	"loss": 0.1972,
	"step": 5280
	},
	{
	"epoch": 1.4874753729242893,
	"grad_norm": 0.9336678981781006,
	"learning_rate": 3.741536878392654e-05,
	"loss": 0.1519,
	"step": 5285
	},
	{
	"epoch": 1.4888826343934702,
	"grad_norm": 1.4050379991531372,
	"learning_rate": 3.7223980456285813e-05,
	"loss": 0.1493,
	"step": 5290
	},
	{
	"epoch": 1.4902898958626514,
	"grad_norm": 0.4991312623023987,
	"learning_rate": 3.70329708782713e-05,
	"loss": 0.157,
	"step": 5295
	},
	{
	"epoch": 1.4916971573318323,
	"grad_norm": 1.6823819875717163,
	"learning_rate": 3.6842341202310374e-05,
	"loss": 0.2532,
	"step": 5300
	},
	{
	"epoch": 1.4931044188010132,
	"grad_norm": 0.81031733751297,
	"learning_rate": 3.665209257853843e-05,
	"loss": 0.3201,
	"step": 5305
	},
	{
	"epoch": 1.494511680270194,
	"grad_norm": 1.287041425704956,
	"learning_rate": 3.646222615479177e-05,
	"loss": 0.1398,
	"step": 5310
	},
	{
	"epoch": 1.4959189417393752,
	"grad_norm": 0.4528125822544098,
	"learning_rate": 3.62727430766007e-05,
	"loss": 0.2131,
	"step": 5315
	},
	{
	"epoch": 1.4973262032085561,
	"grad_norm": 1.0578283071517944,
	"learning_rate": 3.608364448718283e-05,
	"loss": 0.1415,
	"step": 5320
	},
	{
	"epoch": 1.498733464677737,
	"grad_norm": 0.4122551679611206,
	"learning_rate": 3.589493152743585e-05,
	"loss": 0.0914,
	"step": 5325
	},
	{
	"epoch": 1.5001407261469182,
	"grad_norm": 0.6634222269058228,
	"learning_rate": 3.570660533593091e-05,
	"loss": 0.1269,
	"step": 5330
	},
	{
	"epoch": 1.501547987616099,
	"grad_norm": 0.27888017892837524,
	"learning_rate": 3.551866704890564e-05,
	"loss": 0.1288,
	"step": 5335
	},
	{
	"epoch": 1.50295524908528,
	"grad_norm": 1.0966591835021973,
	"learning_rate": 3.533111780025725e-05,
	"loss": 0.1822,
	"step": 5340
	},
	{
	"epoch": 1.504362510554461,
	"grad_norm": 1.1912025213241577,
	"learning_rate": 3.514395872153584e-05,
	"loss": 0.2205,
	"step": 5345
	},
	{
	"epoch": 1.505769772023642,
	"grad_norm": 0.34254777431488037,
	"learning_rate": 3.49571909419374e-05,
	"loss": 0.1333,
	"step": 5350
	},
	{
	"epoch": 1.507177033492823,
	"grad_norm": 0.7154930233955383,
	"learning_rate": 3.4770815588297054e-05,
	"loss": 0.1758,
	"step": 5355
	},
	{
	"epoch": 1.508584294962004,
	"grad_norm": 0.7776800394058228,
	"learning_rate": 3.4584833785082385e-05,
	"loss": 0.1721,
	"step": 5360
	},
	{
	"epoch": 1.509991556431185,
	"grad_norm": 1.0347821712493896,
	"learning_rate": 3.43992466543865e-05,
	"loss": 0.1735,
	"step": 5365
	},
	{
	"epoch": 1.511398817900366,
	"grad_norm": 0.773311972618103,
	"learning_rate": 3.4214055315921245e-05,
	"loss": 0.1798,
	"step": 5370
	},
	{
	"epoch": 1.5128060793695468,
	"grad_norm": 0.15166114270687103,
	"learning_rate": 3.402926088701062e-05,
	"loss": 0.2025,
	"step": 5375
	},
	{
	"epoch": 1.5142133408387277,
	"grad_norm": 0.4494927227497101,
	"learning_rate": 3.38448644825839e-05,
	"loss": 0.1211,
	"step": 5380
	},
	{
	"epoch": 1.5156206023079088,
	"grad_norm": 1.2481530904769897,
	"learning_rate": 3.36608672151689e-05,
	"loss": 0.1325,
	"step": 5385
	},
	{
	"epoch": 1.5170278637770898,
	"grad_norm": 0.7955223321914673,
	"learning_rate": 3.347727019488531e-05,
	"loss": 0.1334,
	"step": 5390
	},
	{
	"epoch": 1.518435125246271,
	"grad_norm": 1.1012686491012573,
	"learning_rate": 3.329407452943799e-05,
	"loss": 0.1978,
	"step": 5395
	},
	{
	"epoch": 1.5198423867154518,
	"grad_norm": 2.147088050842285,
	"learning_rate": 3.311128132411031e-05,
	"loss": 0.1742,
	"step": 5400
	},
	{
	"epoch": 1.5212496481846327,
	"grad_norm": 1.0812978744506836,
	"learning_rate": 3.292889168175751e-05,
	"loss": 0.1237,
	"step": 5405
	},
	{
	"epoch": 1.5226569096538136,
	"grad_norm": 0.8602486848831177,
	"learning_rate": 3.274690670279984e-05,
	"loss": 0.1628,
	"step": 5410
	},
	{
	"epoch": 1.5240641711229945,
	"grad_norm": 0.4767683446407318,
	"learning_rate": 3.25653274852162e-05,
	"loss": 0.0893,
	"step": 5415
	},
	{
	"epoch": 1.5254714325921757,
	"grad_norm": 1.434166431427002,
	"learning_rate": 3.238415512453741e-05,
	"loss": 0.3905,
	"step": 5420
	},
	{
	"epoch": 1.5268786940613566,
	"grad_norm": 3.7128000259399414,
	"learning_rate": 3.220339071383948e-05,
	"loss": 0.336,
	"step": 5425
	},
	{
	"epoch": 1.5282859555305377,
	"grad_norm": 0.9743013381958008,
	"learning_rate": 3.202303534373712e-05,
	"loss": 0.17,
	"step": 5430
	},
	{
	"epoch": 1.5296932169997186,
	"grad_norm": 0.4060254991054535,
	"learning_rate": 3.184309010237728e-05,
	"loss": 0.1817,
	"step": 5435
	},
	{
	"epoch": 1.5311004784688995,
	"grad_norm": 1.3302080631256104,
	"learning_rate": 3.16635560754323e-05,
	"loss": 0.2442,
	"step": 5440
	},
	{
	"epoch": 1.5325077399380804,
	"grad_norm": 1.5643320083618164,
	"learning_rate": 3.148443434609367e-05,
	"loss": 0.3225,
	"step": 5445
	},
	{
	"epoch": 1.5339150014072613,
	"grad_norm": 1.2559304237365723,
	"learning_rate": 3.1305725995065205e-05,
	"loss": 0.1861,
	"step": 5450
	},
	{
	"epoch": 1.5353222628764425,
	"grad_norm": 1.1454960107803345,
	"learning_rate": 3.112743210055677e-05,
	"loss": 0.1262,
	"step": 5455
	},
	{
	"epoch": 1.5367295243456234,
	"grad_norm": 0.46115657687187195,
	"learning_rate": 3.0949553738277634e-05,
	"loss": 0.1827,
	"step": 5460
	},
	{
	"epoch": 1.5381367858148045,
	"grad_norm": 1.2840021848678589,
	"learning_rate": 3.077209198143002e-05,
	"loss": 0.1399,
	"step": 5465
	},
	{
	"epoch": 1.5395440472839854,
	"grad_norm": 1.189970850944519,
	"learning_rate": 3.0595047900702564e-05,
	"loss": 0.2078,
	"step": 5470
	},
	{
	"epoch": 1.5409513087531663,
	"grad_norm": 0.5335509181022644,
	"learning_rate": 3.041842256426404e-05,
	"loss": 0.1423,
	"step": 5475
	},
	{
	"epoch": 1.5423585702223472,
	"grad_norm": 0.8606838583946228,
	"learning_rate": 3.024221703775665e-05,
	"loss": 0.1468,
	"step": 5480
	},
	{
	"epoch": 1.5437658316915281,
	"grad_norm": 1.3679966926574707,
	"learning_rate": 3.0066432384289844e-05,
	"loss": 0.1247,
	"step": 5485
	},
	{
	"epoch": 1.5451730931607093,
	"grad_norm": 1.2723866701126099,
	"learning_rate": 2.989106966443379e-05,
	"loss": 0.1482,
	"step": 5490
	},
	{
	"epoch": 1.5465803546298902,
	"grad_norm": 0.8712704181671143,
	"learning_rate": 2.97161299362129e-05,
	"loss": 0.2848,
	"step": 5495
	},
	{
	"epoch": 1.5479876160990713,
	"grad_norm": 0.6967242360115051,
	"learning_rate": 2.9541614255099625e-05,
	"loss": 0.1604,
	"step": 5500
	},
	{
	"epoch": 1.5493948775682522,
	"grad_norm": 1.0415253639221191,
	"learning_rate": 2.9367523674007947e-05,
	"loss": 0.1876,
	"step": 5505
	},
	{
	"epoch": 1.5508021390374331,
	"grad_norm": 0.5861086845397949,
	"learning_rate": 2.9193859243287036e-05,
	"loss": 0.1835,
	"step": 5510
	},
	{
	"epoch": 1.552209400506614,
	"grad_norm": 1.444682002067566,
	"learning_rate": 2.902062201071505e-05,
	"loss": 0.1588,
	"step": 5515
	},
	{
	"epoch": 1.553616661975795,
	"grad_norm": 1.0231586694717407,
	"learning_rate": 2.8847813021492574e-05,
	"loss": 0.3833,
	"step": 5520
	},
	{
	"epoch": 1.555023923444976,
	"grad_norm": 1.2998064756393433,
	"learning_rate": 2.8675433318236567e-05,
	"loss": 0.1849,
	"step": 5525
	},
	{
	"epoch": 1.556431184914157,
	"grad_norm": 0.8349362015724182,
	"learning_rate": 2.8503483940973952e-05,
	"loss": 0.1391,
	"step": 5530
	},
	{
	"epoch": 1.5578384463833381,
	"grad_norm": 0.9555754661560059,
	"learning_rate": 2.8331965927135274e-05,
	"loss": 0.2073,
	"step": 5535
	},
	{
	"epoch": 1.559245707852519,
	"grad_norm": 1.703472375869751,
	"learning_rate": 2.8160880311548522e-05,
	"loss": 0.2548,
	"step": 5540
	},
	{
	"epoch": 1.5606529693217,
	"grad_norm": 0.39019107818603516,
	"learning_rate": 2.799022812643295e-05,
	"loss": 0.1277,
	"step": 5545
	},
	{
	"epoch": 1.5620602307908809,
	"grad_norm": 1.0451160669326782,
	"learning_rate": 2.782001040139267e-05,
	"loss": 0.3046,
	"step": 5550
	},
	{
	"epoch": 1.5634674922600618,
	"grad_norm": 0.8136467337608337,
	"learning_rate": 2.765022816341063e-05,
	"loss": 0.197,
	"step": 5555
	},
	{
	"epoch": 1.564874753729243,
	"grad_norm": 0.6249985098838806,
	"learning_rate": 2.7480882436842335e-05,
	"loss": 0.1592,
	"step": 5560
	},
	{
	"epoch": 1.566282015198424,
	"grad_norm": 0.5969499945640564,
	"learning_rate": 2.7311974243409565e-05,
	"loss": 0.2353,
	"step": 5565
	},
	{
	"epoch": 1.567689276667605,
	"grad_norm": 0.5542153716087341,
	"learning_rate": 2.7143504602194448e-05,
	"loss": 0.1407,
	"step": 5570
	},
	{
	"epoch": 1.5690965381367858,
	"grad_norm": 0.40066176652908325,
	"learning_rate": 2.697547452963307e-05,
	"loss": 0.1318,
	"step": 5575
	},
	{
	"epoch": 1.5705037996059668,
	"grad_norm": 0.4262009859085083,
	"learning_rate": 2.680788503950944e-05,
	"loss": 0.171,
	"step": 5580
	},
	{
	"epoch": 1.5719110610751477,
	"grad_norm": 0.7851074934005737,
	"learning_rate": 2.664073714294948e-05,
	"loss": 0.2443,
	"step": 5585
	},
	{
	"epoch": 1.5733183225443286,
	"grad_norm": 0.39711621403694153,
	"learning_rate": 2.6474031848414704e-05,
	"loss": 0.2419,
	"step": 5590
	},
	{
	"epoch": 1.5747255840135097,
	"grad_norm": 0.4387623369693756,
	"learning_rate": 2.6307770161696354e-05,
	"loss": 0.0821,
	"step": 5595
	},
	{
	"epoch": 1.5761328454826908,
	"grad_norm": 0.9057246446609497,
	"learning_rate": 2.6141953085909198e-05,
	"loss": 0.2652,
	"step": 5600
	},
	{
	"epoch": 1.5775401069518717,
	"grad_norm": 0.7787453532218933,
	"learning_rate": 2.597658162148544e-05,
	"loss": 0.2335,
	"step": 5605
	},
	{
	"epoch": 1.5789473684210527,
	"grad_norm": 1.116365909576416,
	"learning_rate": 2.5811656766168902e-05,
	"loss": 0.2092,
	"step": 5610
	},
	{
	"epoch": 1.5803546298902336,
	"grad_norm": 0.741118848323822,
	"learning_rate": 2.5647179515008724e-05,
	"loss": 0.18,
	"step": 5615
	},
	{
	"epoch": 1.5817618913594145,
	"grad_norm": 0.9240850806236267,
	"learning_rate": 2.548315086035351e-05,
	"loss": 0.2047,
	"step": 5620
	},
	{
	"epoch": 1.5831691528285956,
	"grad_norm": 1.0324885845184326,
	"learning_rate": 2.5319571791845408e-05,
	"loss": 0.1117,
	"step": 5625
	},
	{
	"epoch": 1.5845764142977765,
	"grad_norm": 1.108396053314209,
	"learning_rate": 2.5156443296414013e-05,
	"loss": 0.1582,
	"step": 5630
	},
	{
	"epoch": 1.5859836757669576,
	"grad_norm": 1.0466639995574951,
	"learning_rate": 2.4993766358270388e-05,
	"loss": 0.2145,
	"step": 5635
	},
	{
	"epoch": 1.5873909372361386,
	"grad_norm": 1.1003303527832031,
	"learning_rate": 2.4831541958901293e-05,
	"loss": 0.1401,
	"step": 5640
	},
	{
	"epoch": 1.5887981987053195,
	"grad_norm": 0.7945972084999084,
	"learning_rate": 2.4669771077063152e-05,
	"loss": 0.101,
	"step": 5645
	},
	{
	"epoch": 1.5902054601745004,
	"grad_norm": 1.6851614713668823,
	"learning_rate": 2.4508454688776105e-05,
	"loss": 0.2356,
	"step": 5650
	},
	{
	"epoch": 1.5916127216436813,
	"grad_norm": 0.708411693572998,
	"learning_rate": 2.434759376731819e-05,
	"loss": 0.2346,
	"step": 5655
	},
	{
	"epoch": 1.5930199831128624,
	"grad_norm": 0.9913239479064941,
	"learning_rate": 2.4187189283219446e-05,
	"loss": 0.1195,
	"step": 5660
	},
	{
	"epoch": 1.5944272445820433,
	"grad_norm": 1.0097897052764893,
	"learning_rate": 2.4027242204256108e-05,
	"loss": 0.1723,
	"step": 5665
	},
	{
	"epoch": 1.5958345060512245,
	"grad_norm": 0.8258925080299377,
	"learning_rate": 2.3867753495444723e-05,
	"loss": 0.1539,
	"step": 5670
	},
	{
	"epoch": 1.5972417675204054,
	"grad_norm": 0.5283498764038086,
	"learning_rate": 2.3708724119036262e-05,
	"loss": 0.1165,
	"step": 5675
	},
	{
	"epoch": 1.5986490289895863,
	"grad_norm": 1.170369267463684,
	"learning_rate": 2.355015503451048e-05,
	"loss": 0.1951,
	"step": 5680
	},
	{
	"epoch": 1.6000562904587672,
	"grad_norm": 0.8622944355010986,
	"learning_rate": 2.339204719856998e-05,
	"loss": 0.153,
	"step": 5685
	},
	{
	"epoch": 1.601463551927948,
	"grad_norm": 0.6249514818191528,
	"learning_rate": 2.323440156513448e-05,
	"loss": 0.0686,
	"step": 5690
	},
	{
	"epoch": 1.6028708133971292,
	"grad_norm": 0.2732272446155548,
	"learning_rate": 2.3077219085335054e-05,
	"loss": 0.1054,
	"step": 5695
	},
	{
	"epoch": 1.6042780748663101,
	"grad_norm": 1.5117753744125366,
	"learning_rate": 2.2920500707508496e-05,
	"loss": 0.1682,
	"step": 5700
	},
	{
	"epoch": 1.6056853363354913,
	"grad_norm": 1.9940603971481323,
	"learning_rate": 2.2764247377191405e-05,
	"loss": 0.2375,
	"step": 5705
	},
	{
	"epoch": 1.6070925978046722,
	"grad_norm": 1.0817060470581055,
	"learning_rate": 2.2608460037114642e-05,
	"loss": 0.2294,
	"step": 5710
	},
	{
	"epoch": 1.608499859273853,
	"grad_norm": 0.4378751814365387,
	"learning_rate": 2.2453139627197618e-05,
	"loss": 0.1674,
	"step": 5715
	},
	{
	"epoch": 1.609907120743034,
	"grad_norm": 0.5405195951461792,
	"learning_rate": 2.22982870845425e-05,
	"loss": 0.3422,
	"step": 5720
	},
	{
	"epoch": 1.611314382212215,
	"grad_norm": 1.4159220457077026,
	"learning_rate": 2.214390334342875e-05,
	"loss": 0.2116,
	"step": 5725
	},
	{
	"epoch": 1.612721643681396,
	"grad_norm": 1.1930686235427856,
	"learning_rate": 2.1989989335307304e-05,
	"loss": 0.0965,
	"step": 5730
	},
	{
	"epoch": 1.614128905150577,
	"grad_norm": 1.2334959506988525,
	"learning_rate": 2.1836545988795054e-05,
	"loss": 0.1547,
	"step": 5735
	},
	{
	"epoch": 1.615536166619758,
	"grad_norm": 0.7615369558334351,
	"learning_rate": 2.168357422966928e-05,
	"loss": 0.2468,
	"step": 5740
	},
	{
	"epoch": 1.616943428088939,
	"grad_norm": 0.7710257172584534,
	"learning_rate": 2.153107498086193e-05,
	"loss": 0.1674,
	"step": 5745
	},
	{
	"epoch": 1.61835068955812,
	"grad_norm": 0.464054673910141,
	"learning_rate": 2.137904916245419e-05,
	"loss": 0.2004,
	"step": 5750
	},
	{
	"epoch": 1.6197579510273008,
	"grad_norm": 0.3523075580596924,
	"learning_rate": 2.1227497691670894e-05,
	"loss": 0.2314,
	"step": 5755
	},
	{
	"epoch": 1.6211652124964817,
	"grad_norm": 0.8045745491981506,
	"learning_rate": 2.1076421482874877e-05,
	"loss": 0.1431,
	"step": 5760
	},
	{
	"epoch": 1.6225724739656628,
	"grad_norm": 0.7054654955863953,
	"learning_rate": 2.0925821447561665e-05,
	"loss": 0.1056,
	"step": 5765
	},
	{
	"epoch": 1.6239797354348438,
	"grad_norm": 1.5930366516113281,
	"learning_rate": 2.077569849435379e-05,
	"loss": 0.2394,
	"step": 5770
	},
	{
	"epoch": 1.6253869969040249,
	"grad_norm": 0.678402304649353,
	"learning_rate": 2.062605352899537e-05,
	"loss": 0.1482,
	"step": 5775
	},
	{
	"epoch": 1.6267942583732058,
	"grad_norm": 1.009436845779419,
	"learning_rate": 2.0476887454346716e-05,
	"loss": 0.2381,
	"step": 5780
	},
	{
	"epoch": 1.6282015198423867,
	"grad_norm": 0.5717734098434448,
	"learning_rate": 2.0328201170378813e-05,
	"loss": 0.1877,
	"step": 5785
	},
	{
	"epoch": 1.6296087813115676,
	"grad_norm": 1.0021076202392578,
	"learning_rate": 2.0179995574167842e-05,
	"loss": 0.1836,
	"step": 5790
	},
	{
	"epoch": 1.6310160427807485,
	"grad_norm": 0.5409684777259827,
	"learning_rate": 2.0032271559889915e-05,
	"loss": 0.21,
	"step": 5795
	},
	{
	"epoch": 1.6324233042499297,
	"grad_norm": 1.6268481016159058,
	"learning_rate": 1.9885030018815487e-05,
	"loss": 0.1786,
	"step": 5800
	},
	{
	"epoch": 1.6338305657191106,
	"grad_norm": 1.0220392942428589,
	"learning_rate": 1.9738271839304213e-05,
	"loss": 0.2016,
	"step": 5805
	},
	{
	"epoch": 1.6352378271882917,
	"grad_norm": 0.8178629875183105,
	"learning_rate": 1.959199790679934e-05,
	"loss": 0.1491,
	"step": 5810
	},
	{
	"epoch": 1.6366450886574726,
	"grad_norm": 2.1935439109802246,
	"learning_rate": 1.944620910382252e-05,
	"loss": 0.1966,
	"step": 5815
	},
	{
	"epoch": 1.6380523501266535,
	"grad_norm": 1.1369730234146118,
	"learning_rate": 1.930090630996849e-05,
	"loss": 0.2084,
	"step": 5820
	},
	{
	"epoch": 1.6394596115958344,
	"grad_norm": 0.8570969104766846,
	"learning_rate": 1.915609040189972e-05,
	"loss": 0.1779,
	"step": 5825
	},
	{
	"epoch": 1.6408668730650153,
	"grad_norm": 0.8881973624229431,
	"learning_rate": 1.901176225334105e-05,
	"loss": 0.2334,
	"step": 5830
	},
	{
	"epoch": 1.6422741345341965,
	"grad_norm": 1.057015299797058,
	"learning_rate": 1.886792273507457e-05,
	"loss": 0.2208,
	"step": 5835
	},
	{
	"epoch": 1.6436813960033776,
	"grad_norm": 0.40783455967903137,
	"learning_rate": 1.8724572714934307e-05,
	"loss": 0.0648,
	"step": 5840
	},
	{
	"epoch": 1.6450886574725585,
	"grad_norm": 0.8724305629730225,
	"learning_rate": 1.8581713057800933e-05,
	"loss": 0.2695,
	"step": 5845
	},
	{
	"epoch": 1.6464959189417394,
	"grad_norm": 1.3229783773422241,
	"learning_rate": 1.8439344625596534e-05,
	"loss": 0.1555,
	"step": 5850
	},
	{
	"epoch": 1.6479031804109203,
	"grad_norm": 0.7381983399391174,
	"learning_rate": 1.8297468277279618e-05,
	"loss": 0.177,
	"step": 5855
	},
	{
	"epoch": 1.6493104418801012,
	"grad_norm": 0.4356767535209656,
	"learning_rate": 1.8156084868839617e-05,
	"loss": 0.094,
	"step": 5860
	},
	{
	"epoch": 1.6507177033492821,
	"grad_norm": 2.0452256202697754,
	"learning_rate": 1.8015195253292016e-05,
	"loss": 0.3872,
	"step": 5865
	},
	{
	"epoch": 1.6521249648184633,
	"grad_norm": 0.7345725297927856,
	"learning_rate": 1.7874800280672953e-05,
	"loss": 0.3794,
	"step": 5870
	},
	{
	"epoch": 1.6535322262876444,
	"grad_norm": 0.5564286112785339,
	"learning_rate": 1.773490079803436e-05,
	"loss": 0.194,
	"step": 5875
	},
	{
	"epoch": 1.6549394877568253,
	"grad_norm": 1.4534375667572021,
	"learning_rate": 1.7595497649438565e-05,
	"loss": 0.2468,
	"step": 5880
	},
	{
	"epoch": 1.6563467492260062,
	"grad_norm": 1.159037709236145,
	"learning_rate": 1.745659167595337e-05,
	"loss": 0.2072,
	"step": 5885
	},
	{
	"epoch": 1.6577540106951871,
	"grad_norm": 0.9856454133987427,
	"learning_rate": 1.7318183715647017e-05,
	"loss": 0.2057,
	"step": 5890
	},
	{
	"epoch": 1.659161272164368,
	"grad_norm": 0.9816296696662903,
	"learning_rate": 1.7180274603583035e-05,
	"loss": 0.0591,
	"step": 5895
	},
	{
	"epoch": 1.660568533633549,
	"grad_norm": 0.6953201293945312,
	"learning_rate": 1.7042865171815158e-05,
	"loss": 0.1549,
	"step": 5900
	},
	{
	"epoch": 1.66197579510273,
	"grad_norm": 0.9859986901283264,
	"learning_rate": 1.6905956249382448e-05,
	"loss": 0.1446,
	"step": 5905
	},
	{
	"epoch": 1.6633830565719112,
	"grad_norm": 2.2135300636291504,
	"learning_rate": 1.6769548662304224e-05,
	"loss": 0.2074,
	"step": 5910
	},
	{
	"epoch": 1.6647903180410921,
	"grad_norm": 0.7724807858467102,
	"learning_rate": 1.6633643233575014e-05,
	"loss": 0.1867,
	"step": 5915
	},
	{
	"epoch": 1.666197579510273,
	"grad_norm": 0.6000497341156006,
	"learning_rate": 1.6498240783159656e-05,
	"loss": 0.3259,
	"step": 5920
	},
	{
	"epoch": 1.667604840979454,
	"grad_norm": 1.0605989694595337,
	"learning_rate": 1.6363342127988435e-05,
	"loss": 0.2042,
	"step": 5925
	},
	{
	"epoch": 1.6690121024486348,
	"grad_norm": 0.4106568396091461,
	"learning_rate": 1.6228948081951943e-05,
	"loss": 0.1073,
	"step": 5930
	},
	{
	"epoch": 1.670419363917816,
	"grad_norm": 0.9518342614173889,
	"learning_rate": 1.6095059455896387e-05,
	"loss": 0.1523,
	"step": 5935
	},
	{
	"epoch": 1.671826625386997,
	"grad_norm": 0.7186952829360962,
	"learning_rate": 1.596167705761852e-05,
	"loss": 0.1052,
	"step": 5940
	},
	{
	"epoch": 1.673233886856178,
	"grad_norm": 0.5331084728240967,
	"learning_rate": 1.5828801691860895e-05,
	"loss": 0.1007,
	"step": 5945
	},
	{
	"epoch": 1.674641148325359,
	"grad_norm": 0.530546247959137,
	"learning_rate": 1.5696434160306983e-05,
	"loss": 0.0948,
	"step": 5950
	},
	{
	"epoch": 1.6760484097945398,
	"grad_norm": 0.9805326461791992,
	"learning_rate": 1.5564575261576254e-05,
	"loss": 0.2097,
	"step": 5955
	},
	{
	"epoch": 1.6774556712637207,
	"grad_norm": 0.8919891715049744,
	"learning_rate": 1.5433225791219407e-05,
	"loss": 0.1409,
	"step": 5960
	},
	{
	"epoch": 1.6788629327329017,
	"grad_norm": 0.8015194535255432,
	"learning_rate": 1.5302386541713687e-05,
	"loss": 0.126,
	"step": 5965
	},
	{
	"epoch": 1.6802701942020828,
	"grad_norm": 0.47212257981300354,
	"learning_rate": 1.5172058302457881e-05,
	"loss": 0.1573,
	"step": 5970
	},
	{
	"epoch": 1.6816774556712637,
	"grad_norm": 0.6983383297920227,
	"learning_rate": 1.5042241859767735e-05,
	"loss": 0.1209,
	"step": 5975
	},
	{
	"epoch": 1.6830847171404448,
	"grad_norm": 1.2159236669540405,
	"learning_rate": 1.4912937996871168e-05,
	"loss": 0.1802,
	"step": 5980
	},
	{
	"epoch": 1.6844919786096257,
	"grad_norm": 0.764870822429657,
	"learning_rate": 1.4784147493903455e-05,
	"loss": 0.2714,
	"step": 5985
	},
	{
	"epoch": 1.6858992400788066,
	"grad_norm": 0.9790758490562439,
	"learning_rate": 1.4655871127902655e-05,
	"loss": 0.2561,
	"step": 5990
	},
	{
	"epoch": 1.6873065015479876,
	"grad_norm": 2.1390011310577393,
	"learning_rate": 1.4528109672804835e-05,
	"loss": 0.23,
	"step": 5995
	},
	{
	"epoch": 1.6887137630171685,
	"grad_norm": 0.39941343665122986,
	"learning_rate": 1.4400863899439387e-05,
	"loss": 0.2019,
	"step": 6000
	},
	{
	"epoch": 1.6901210244863496,
	"grad_norm": 0.6225385069847107,
	"learning_rate": 1.42741345755245e-05,
	"loss": 0.1884,
	"step": 6005
	},
	{
	"epoch": 1.6915282859555305,
	"grad_norm": 0.7307006120681763,
	"learning_rate": 1.4147922465662367e-05,
	"loss": 0.1126,
	"step": 6010
	},
	{
	"epoch": 1.6929355474247116,
	"grad_norm": 1.095548152923584,
	"learning_rate": 1.4022228331334675e-05,
	"loss": 0.1279,
	"step": 6015
	},
	{
	"epoch": 1.6943428088938925,
	"grad_norm": 0.45030713081359863,
	"learning_rate": 1.3897052930898035e-05,
	"loss": 0.1378,
	"step": 6020
	},
	{
	"epoch": 1.6957500703630735,
	"grad_norm": 1.7270435094833374,
	"learning_rate": 1.3772397019579242e-05,
	"loss": 0.2399,
	"step": 6025
	},
	{
	"epoch": 1.6971573318322544,
	"grad_norm": 1.0650115013122559,
	"learning_rate": 1.3648261349470948e-05,
	"loss": 0.1895,
	"step": 6030
	},
	{
	"epoch": 1.6985645933014353,
	"grad_norm": 1.0545300245285034,
	"learning_rate": 1.352464666952694e-05,
	"loss": 0.1122,
	"step": 6035
	},
	{
	"epoch": 1.6999718547706164,
	"grad_norm": 1.0150022506713867,
	"learning_rate": 1.3401553725557681e-05,
	"loss": 0.1585,
	"step": 6040
	},
	{
	"epoch": 1.7013791162397973,
	"grad_norm": 0.5082919001579285,
	"learning_rate": 1.3278983260225875e-05,
	"loss": 0.2291,
	"step": 6045
	},
	{
	"epoch": 1.7027863777089784,
	"grad_norm": 0.9131124019622803,
	"learning_rate": 1.3156936013041898e-05,
	"loss": 0.1303,
	"step": 6050
	},
	{
	"epoch": 1.7041936391781594,
	"grad_norm": 0.6868187189102173,
	"learning_rate": 1.3035412720359353e-05,
	"loss": 0.1357,
	"step": 6055
	},
	{
	"epoch": 1.7056009006473403,
	"grad_norm": 0.8841606378555298,
	"learning_rate": 1.2914414115370666e-05,
	"loss": 0.1271,
	"step": 6060
	},
	{
	"epoch": 1.7070081621165212,
	"grad_norm": 0.7348530888557434,
	"learning_rate": 1.2793940928102654e-05,
	"loss": 0.1773,
	"step": 6065
	},
	{
	"epoch": 1.708415423585702,
	"grad_norm": 0.7667552828788757,
	"learning_rate": 1.2673993885412073e-05,
	"loss": 0.2278,
	"step": 6070
	},
	{
	"epoch": 1.7098226850548832,
	"grad_norm": 1.5741273164749146,
	"learning_rate": 1.2554573710981276e-05,
	"loss": 0.1607,
	"step": 6075
	},
	{
	"epoch": 1.7112299465240641,
	"grad_norm": 1.1054571866989136,
	"learning_rate": 1.2435681125313803e-05,
	"loss": 0.1732,
	"step": 6080
	},
	{
	"epoch": 1.7126372079932453,
	"grad_norm": 1.193298101425171,
	"learning_rate": 1.2317316845730131e-05,
	"loss": 0.2668,
	"step": 6085
	},
	{
	"epoch": 1.7140444694624262,
	"grad_norm": 0.5256794691085815,
	"learning_rate": 1.2199481586363281e-05,
	"loss": 0.1741,
	"step": 6090
	},
	{
	"epoch": 1.715451730931607,
	"grad_norm": 1.2280601263046265,
	"learning_rate": 1.2082176058154426e-05,
	"loss": 0.1479,
	"step": 6095
	},
	{
	"epoch": 1.716858992400788,
	"grad_norm": 1.0573979616165161,
	"learning_rate": 1.196540096884876e-05,
	"loss": 0.1264,
	"step": 6100
	},
	{
	"epoch": 1.718266253869969,
	"grad_norm": 1.5370665788650513,
	"learning_rate": 1.1849157022991163e-05,
	"loss": 0.2142,
	"step": 6105
	},
	{
	"epoch": 1.71967351533915,
	"grad_norm": 0.7827951312065125,
	"learning_rate": 1.1733444921921899e-05,
	"loss": 0.2057,
	"step": 6110
	},
	{
	"epoch": 1.721080776808331,
	"grad_norm": 1.3667113780975342,
	"learning_rate": 1.1618265363772407e-05,
	"loss": 0.2746,
	"step": 6115
	},
	{
	"epoch": 1.722488038277512,
	"grad_norm": 1.506797432899475,
	"learning_rate": 1.15036190434612e-05,
	"loss": 0.1855,
	"step": 6120
	},
	{
	"epoch": 1.723895299746693,
	"grad_norm": 0.9613803029060364,
	"learning_rate": 1.1389506652689474e-05,
	"loss": 0.1031,
	"step": 6125
	},
	{
	"epoch": 1.7253025612158739,
	"grad_norm": 1.2002402544021606,
	"learning_rate": 1.1275928879937114e-05,
	"loss": 0.1781,
	"step": 6130
	},
	{
	"epoch": 1.7267098226850548,
	"grad_norm": 0.5957798361778259,
	"learning_rate": 1.1162886410458462e-05,
	"loss": 0.1176,
	"step": 6135
	},
	{
	"epoch": 1.7281170841542357,
	"grad_norm": 0.9620370268821716,
	"learning_rate": 1.1050379926278132e-05,
	"loss": 0.1515,
	"step": 6140
	},
	{
	"epoch": 1.7295243456234168,
	"grad_norm": 0.9195571541786194,
	"learning_rate": 1.0938410106187046e-05,
	"loss": 0.1121,
	"step": 6145
	},
	{
	"epoch": 1.730931607092598,
	"grad_norm": 0.4538973867893219,
	"learning_rate": 1.0826977625738155e-05,
	"loss": 0.1129,
	"step": 6150
	},
	{
	"epoch": 1.7323388685617789,
	"grad_norm": 1.3514046669006348,
	"learning_rate": 1.0716083157242484e-05,
	"loss": 0.1743,
	"step": 6155
	},
	{
	"epoch": 1.7337461300309598,
	"grad_norm": 0.8769412636756897,
	"learning_rate": 1.0605727369765072e-05,
	"loss": 0.1615,
	"step": 6160
	},
	{
	"epoch": 1.7351533915001407,
	"grad_norm": 1.3082162141799927,
	"learning_rate": 1.0495910929120866e-05,
	"loss": 0.1344,
	"step": 6165
	},
	{
	"epoch": 1.7365606529693216,
	"grad_norm": 0.8667125105857849,
	"learning_rate": 1.0386634497870751e-05,
	"loss": 0.2135,
	"step": 6170
	},
	{
	"epoch": 1.7379679144385025,
	"grad_norm": 0.7873309850692749,
	"learning_rate": 1.0277898735317614e-05,
	"loss": 0.1445,
	"step": 6175
	},
	{
	"epoch": 1.7393751759076836,
	"grad_norm": 1.0749235153198242,
	"learning_rate": 1.016970429750218e-05,
	"loss": 0.1792,
	"step": 6180
	},
	{
	"epoch": 1.7407824373768648,
	"grad_norm": 0.7576783299446106,
	"learning_rate": 1.0062051837199282e-05,
	"loss": 0.1597,
	"step": 6185
	},
	{
	"epoch": 1.7421896988460457,
	"grad_norm": 0.7447710037231445,
	"learning_rate": 9.954942003913758e-06,
	"loss": 0.1363,
	"step": 6190
	},
	{
	"epoch": 1.7435969603152266,
	"grad_norm": 0.756251335144043,
	"learning_rate": 9.848375443876578e-06,
	"loss": 0.1474,
	"step": 6195
	},
	{
	"epoch": 1.7450042217844075,
	"grad_norm": 0.45274704694747925,
	"learning_rate": 9.742352800040988e-06,
	"loss": 0.065,
	"step": 6200
	},
	{
	"epoch": 1.7464114832535884,
	"grad_norm": 1.0789294242858887,
	"learning_rate": 9.636874712078603e-06,
	"loss": 0.2623,
	"step": 6205
	},
	{
	"epoch": 1.7478187447227693,
	"grad_norm": 1.4076869487762451,
	"learning_rate": 9.531941816375501e-06,
	"loss": 0.2516,
	"step": 6210
	},
	{
	"epoch": 1.7492260061919505,
	"grad_norm": 2.701754331588745,
	"learning_rate": 9.427554746028478e-06,
	"loss": 0.2951,
	"step": 6215
	},
	{
	"epoch": 1.7506332676611316,
	"grad_norm": 0.36146071553230286,
	"learning_rate": 9.3237141308411e-06,
	"loss": 0.0842,
	"step": 6220
	},
	{
	"epoch": 1.7520405291303125,
	"grad_norm": 1.120956540107727,
	"learning_rate": 9.22042059732008e-06,
	"loss": 0.2894,
	"step": 6225
	},
	{
	"epoch": 1.7534477905994934,
	"grad_norm": 0.5138603448867798,
	"learning_rate": 9.117674768671313e-06,
	"loss": 0.0713,
	"step": 6230
	},
	{
	"epoch": 1.7548550520686743,
	"grad_norm": 0.8469157814979553,
	"learning_rate": 9.015477264796202e-06,
	"loss": 0.2038,
	"step": 6235
	},
	{
	"epoch": 1.7562623135378552,
	"grad_norm": 1.5071958303451538,
	"learning_rate": 8.913828702287974e-06,
	"loss": 0.3285,
	"step": 6240
	},
	{
	"epoch": 1.7576695750070364,
	"grad_norm": 1.6233199834823608,
	"learning_rate": 8.812729694427879e-06,
	"loss": 0.1192,
	"step": 6245
	},
	{
	"epoch": 1.7590768364762173,
	"grad_norm": 0.884638786315918,
	"learning_rate": 8.712180851181462e-06,
	"loss": 0.1612,
	"step": 6250
	},
	{
	"epoch": 1.7604840979453984,
	"grad_norm": 1.5049396753311157,
	"learning_rate": 8.612182779195021e-06,
	"loss": 0.1233,
	"step": 6255
	},
	{
	"epoch": 1.7618913594145793,
	"grad_norm": 1.0843751430511475,
	"learning_rate": 8.512736081791772e-06,
	"loss": 0.2496,
	"step": 6260
	},
	{
	"epoch": 1.7632986208837602,
	"grad_norm": 0.9301806688308716,
	"learning_rate": 8.413841358968332e-06,
	"loss": 0.2379,
	"step": 6265
	},
	{
	"epoch": 1.7647058823529411,
	"grad_norm": 1.611035943031311,
	"learning_rate": 8.315499207391075e-06,
	"loss": 0.1856,
	"step": 6270
	},
	{
	"epoch": 1.766113143822122,
	"grad_norm": 1.3043655157089233,
	"learning_rate": 8.217710220392526e-06,
	"loss": 0.1456,
	"step": 6275
	},
	{
	"epoch": 1.7675204052913032,
	"grad_norm": 1.800098180770874,
	"learning_rate": 8.12047498796773e-06,
	"loss": 0.2416,
	"step": 6280
	},
	{
	"epoch": 1.768927666760484,
	"grad_norm": 0.7097885608673096,
	"learning_rate": 8.023794096770808e-06,
	"loss": 0.141,
	"step": 6285
	},
	{
	"epoch": 1.7703349282296652,
	"grad_norm": 1.1929750442504883,
	"learning_rate": 7.927668130111243e-06,
	"loss": 0.3433,
	"step": 6290
	},
	{
	"epoch": 1.7717421896988461,
	"grad_norm": 1.647980809211731,
	"learning_rate": 7.832097667950588e-06,
	"loss": 0.2052,
	"step": 6295
	},
	{
	"epoch": 1.773149451168027,
	"grad_norm": 0.43591317534446716,
	"learning_rate": 7.737083286898749e-06,
	"loss": 0.2104,
	"step": 6300
	},
	{
	"epoch": 1.774556712637208,
	"grad_norm": 1.241782546043396,
	"learning_rate": 7.642625560210637e-06,
	"loss": 0.1109,
	"step": 6305
	},
	{
	"epoch": 1.7759639741063888,
	"grad_norm": 0.9579405784606934,
	"learning_rate": 7.548725057782658e-06,
	"loss": 0.1786,
	"step": 6310
	},
	{
	"epoch": 1.77737123557557,
	"grad_norm": 0.7312494516372681,
	"learning_rate": 7.455382346149342e-06,
	"loss": 0.1228,
	"step": 6315
	},
	{
	"epoch": 1.7787784970447509,
	"grad_norm": 0.7087497711181641,
	"learning_rate": 7.36259798847978e-06,
	"loss": 0.1424,
	"step": 6320
	},
	{
	"epoch": 1.780185758513932,
	"grad_norm": 1.6807194948196411,
	"learning_rate": 7.2703725445744105e-06,
	"loss": 0.1199,
	"step": 6325
	},
	{
	"epoch": 1.781593019983113,
	"grad_norm": 1.101808786392212,
	"learning_rate": 7.178706570861515e-06,
	"loss": 0.0979,
	"step": 6330
	},
	{
	"epoch": 1.7830002814522938,
	"grad_norm": 1.7121551036834717,
	"learning_rate": 7.087600620393864e-06,
	"loss": 0.101,
	"step": 6335
	},
	{
	"epoch": 1.7844075429214747,
	"grad_norm": 0.6395900845527649,
	"learning_rate": 6.997055242845441e-06,
	"loss": 0.2197,
	"step": 6340
	},
	{
	"epoch": 1.7858148043906557,
	"grad_norm": 0.9732767343521118,
	"learning_rate": 6.907070984508124e-06,
	"loss": 0.1321,
	"step": 6345
	},
	{
	"epoch": 1.7872220658598368,
	"grad_norm": 1.2426737546920776,
	"learning_rate": 6.8176483882883e-06,
	"loss": 0.2246,
	"step": 6350
	},
	{
	"epoch": 1.7886293273290177,
	"grad_norm": 1.6869935989379883,
	"learning_rate": 6.728787993703733e-06,
	"loss": 0.2733,
	"step": 6355
	},
	{
	"epoch": 1.7900365887981988,
	"grad_norm": 0.49518850445747375,
	"learning_rate": 6.640490336880134e-06,
	"loss": 0.1142,
	"step": 6360
	},
	{
	"epoch": 1.7914438502673797,
	"grad_norm": 0.7494794726371765,
	"learning_rate": 6.552755950548095e-06,
	"loss": 0.2115,
	"step": 6365
	},
	{
	"epoch": 1.7928511117365606,
	"grad_norm": 0.7595309019088745,
	"learning_rate": 6.465585364039795e-06,
	"loss": 0.1135,
	"step": 6370
	},
	{
	"epoch": 1.7942583732057416,
	"grad_norm": 0.7823693752288818,
	"learning_rate": 6.378979103285765e-06,
	"loss": 0.1422,
	"step": 6375
	},
	{
	"epoch": 1.7956656346749225,
	"grad_norm": 1.9872539043426514,
	"learning_rate": 6.292937690811795e-06,
	"loss": 0.22,
	"step": 6380
	},
	{
	"epoch": 1.7970728961441036,
	"grad_norm": 0.46582117676734924,
	"learning_rate": 6.207461645735746e-06,
	"loss": 0.1519,
	"step": 6385
	},
	{
	"epoch": 1.7984801576132845,
	"grad_norm": 0.40433597564697266,
	"learning_rate": 6.122551483764416e-06,
	"loss": 0.2422,
	"step": 6390
	},
	{
	"epoch": 1.7998874190824656,
	"grad_norm": 1.4909939765930176,
	"learning_rate": 6.038207717190436e-06,
	"loss": 0.1638,
	"step": 6395
	},
	{
	"epoch": 1.8012946805516465,
	"grad_norm": 0.7252668738365173,
	"learning_rate": 5.954430854889182e-06,
	"loss": 0.1053,
	"step": 6400
	},
	{
	"epoch": 1.8027019420208275,
	"grad_norm": 1.4477570056915283,
	"learning_rate": 5.871221402315674e-06,
	"loss": 0.1934,
	"step": 6405
	},
	{
	"epoch": 1.8041092034900084,
	"grad_norm": 0.43066859245300293,
	"learning_rate": 5.788579861501597e-06,
	"loss": 0.114,
	"step": 6410
	},
	{
	"epoch": 1.8055164649591893,
	"grad_norm": 1.1360474824905396,
	"learning_rate": 5.706506731052175e-06,
	"loss": 0.1447,
	"step": 6415
	},
	{
	"epoch": 1.8069237264283704,
	"grad_norm": 0.6951930522918701,
	"learning_rate": 5.625002506143218e-06,
	"loss": 0.1401,
	"step": 6420
	},
	{
	"epoch": 1.8083309878975513,
	"grad_norm": 1.213666319847107,
	"learning_rate": 5.544067678518194e-06,
	"loss": 0.1737,
	"step": 6425
	},
	{
	"epoch": 1.8097382493667324,
	"grad_norm": 0.9512806534767151,
	"learning_rate": 5.46370273648511e-06,
	"loss": 0.1517,
	"step": 6430
	},
	{
	"epoch": 1.8111455108359134,
	"grad_norm": 1.4045182466506958,
	"learning_rate": 5.3839081649137205e-06,
	"loss": 0.1899,
	"step": 6435
	},
	{
	"epoch": 1.8125527723050943,
	"grad_norm": 0.579311192035675,
	"learning_rate": 5.304684445232522e-06,
	"loss": 0.1442,
	"step": 6440
	},
	{
	"epoch": 1.8139600337742752,
	"grad_norm": 1.6119418144226074,
	"learning_rate": 5.2260320554258225e-06,
	"loss": 0.1473,
	"step": 6445
	},
	{
	"epoch": 1.815367295243456,
	"grad_norm": 1.2963722944259644,
	"learning_rate": 5.147951470030976e-06,
	"loss": 0.227,
	"step": 6450
	},
	{
	"epoch": 1.8167745567126372,
	"grad_norm": 1.3112095594406128,
	"learning_rate": 5.070443160135352e-06,
	"loss": 0.116,
	"step": 6455
	},
	{
	"epoch": 1.8181818181818183,
	"grad_norm": 0.49451136589050293,
	"learning_rate": 4.993507593373625e-06,
	"loss": 0.2077,
	"step": 6460
	},
	{
	"epoch": 1.8195890796509993,
	"grad_norm": 1.0468064546585083,
	"learning_rate": 4.917145233924924e-06,
	"loss": 0.246,
	"step": 6465
	},
	{
	"epoch": 1.8209963411201802,
	"grad_norm": 0.5947392582893372,
	"learning_rate": 4.841356542510022e-06,
	"loss": 0.1534,
	"step": 6470
	},
	{
	"epoch": 1.822403602589361,
	"grad_norm": 0.3909468352794647,
	"learning_rate": 4.766141976388494e-06,
	"loss": 0.1792,
	"step": 6475
	},
	{
	"epoch": 1.823810864058542,
	"grad_norm": 0.911483645439148,
	"learning_rate": 4.691501989356084e-06,
	"loss": 0.2147,
	"step": 6480
	},
	{
	"epoch": 1.825218125527723,
	"grad_norm": 0.5338053703308105,
	"learning_rate": 4.617437031741867e-06,
	"loss": 0.0811,
	"step": 6485
	},
	{
	"epoch": 1.826625386996904,
	"grad_norm": 0.5877882242202759,
	"learning_rate": 4.54394755040558e-06,
	"loss": 0.1473,
	"step": 6490
	},
	{
	"epoch": 1.8280326484660852,
	"grad_norm": 0.21510696411132812,
	"learning_rate": 4.471033988734885e-06,
	"loss": 0.2545,
	"step": 6495
	},
	{
	"epoch": 1.829439909935266,
	"grad_norm": 1.325976014137268,
	"learning_rate": 4.398696786642731e-06,
	"loss": 0.1934,
	"step": 6500
	},
	{
	"epoch": 1.830847171404447,
	"grad_norm": 0.5961741805076599,
	"learning_rate": 4.326936380564705e-06,
	"loss": 0.1732,
	"step": 6505
	},
	{
	"epoch": 1.8322544328736279,
	"grad_norm": 1.4790091514587402,
	"learning_rate": 4.255753203456392e-06,
	"loss": 0.1699,
	"step": 6510
	},
	{
	"epoch": 1.8336616943428088,
	"grad_norm": 0.5095391869544983,
	"learning_rate": 4.185147684790691e-06,
	"loss": 0.1335,
	"step": 6515
	},
	{
	"epoch": 1.83506895581199,
	"grad_norm": 0.5565084218978882,
	"learning_rate": 4.115120250555349e-06,
	"loss": 0.1748,
	"step": 6520
	},
	{
	"epoch": 1.8364762172811708,
	"grad_norm": 1.2198169231414795,
	"learning_rate": 4.045671323250333e-06,
	"loss": 0.2197,
	"step": 6525
	},
	{
	"epoch": 1.837883478750352,
	"grad_norm": 0.4299394488334656,
	"learning_rate": 3.976801321885215e-06,
	"loss": 0.1229,
	"step": 6530
	},
	{
	"epoch": 1.8392907402195329,
	"grad_norm": 0.8082312345504761,
	"learning_rate": 3.908510661976739e-06,
	"loss": 0.2784,
	"step": 6535
	},
	{
	"epoch": 1.8406980016887138,
	"grad_norm": 0.7714455723762512,
	"learning_rate": 3.840799755546298e-06,
	"loss": 0.1128,
	"step": 6540
	},
	{
	"epoch": 1.8421052631578947,
	"grad_norm": 1.8380225896835327,
	"learning_rate": 3.773669011117398e-06,
	"loss": 0.2196,
	"step": 6545
	},
	{
	"epoch": 1.8435125246270756,
	"grad_norm": 1.4072784185409546,
	"learning_rate": 3.707118833713241e-06,
	"loss": 0.1164,
	"step": 6550
	},
	{
	"epoch": 1.8449197860962567,
	"grad_norm": 2.7376558780670166,
	"learning_rate": 3.6411496248542897e-06,
	"loss": 0.1715,
	"step": 6555
	},
	{
	"epoch": 1.8463270475654376,
	"grad_norm": 1.3996756076812744,
	"learning_rate": 3.5757617825557533e-06,
	"loss": 0.1792,
	"step": 6560
	},
	{
	"epoch": 1.8477343090346188,
	"grad_norm": 1.6355584859848022,
	"learning_rate": 3.5109557013253357e-06,
	"loss": 0.1213,
	"step": 6565
	},
	{
	"epoch": 1.8491415705037997,
	"grad_norm": 0.6846399903297424,
	"learning_rate": 3.446731772160716e-06,
	"loss": 0.1431,
	"step": 6570
	},
	{
	"epoch": 1.8505488319729806,
	"grad_norm": 1.0300202369689941,
	"learning_rate": 3.3830903825472493e-06,
	"loss": 0.1996,
	"step": 6575
	},
	{
	"epoch": 1.8519560934421615,
	"grad_norm": 0.8449344038963318,
	"learning_rate": 3.3200319164556683e-06,
	"loss": 0.1457,
	"step": 6580
	},
	{
	"epoch": 1.8533633549113424,
	"grad_norm": 0.8704646825790405,
	"learning_rate": 3.2575567543396746e-06,
	"loss": 0.1493,
	"step": 6585
	},
	{
	"epoch": 1.8547706163805235,
	"grad_norm": 1.0447726249694824,
	"learning_rate": 3.195665273133719e-06,
	"loss": 0.2999,
	"step": 6590
	},
	{
	"epoch": 1.8561778778497044,
	"grad_norm": 0.6128522157669067,
	"learning_rate": 3.134357846250735e-06,
	"loss": 0.0989,
	"step": 6595
	},
	{
	"epoch": 1.8575851393188856,
	"grad_norm": 0.7889478802680969,
	"learning_rate": 3.073634843579776e-06,
	"loss": 0.1107,
	"step": 6600
	},
	{
	"epoch": 1.8589924007880665,
	"grad_norm": 1.114986777305603,
	"learning_rate": 3.0134966314839144e-06,
	"loss": 0.0739,
	"step": 6605
	},
	{
	"epoch": 1.8603996622572474,
	"grad_norm": 0.4977349638938904,
	"learning_rate": 2.953943572797968e-06,
	"loss": 0.0591,
	"step": 6610
	},
	{
	"epoch": 1.8618069237264283,
	"grad_norm": 0.6706826686859131,
	"learning_rate": 2.8949760268263017e-06,
	"loss": 0.1383,
	"step": 6615
	},
	{
	"epoch": 1.8632141851956092,
	"grad_norm": 0.6721628308296204,
	"learning_rate": 2.8365943493406934e-06,
	"loss": 0.1539,
	"step": 6620
	},
	{
	"epoch": 1.8646214466647903,
	"grad_norm": 0.6661956310272217,
	"learning_rate": 2.7787988925782048e-06,
	"loss": 0.1833,
	"step": 6625
	},
	{
	"epoch": 1.8660287081339713,
	"grad_norm": 1.3089790344238281,
	"learning_rate": 2.7215900052389497e-06,
	"loss": 0.1368,
	"step": 6630
	},
	{
	"epoch": 1.8674359696031524,
	"grad_norm": 1.6742780208587646,
	"learning_rate": 2.6649680324841166e-06,
	"loss": 0.2486,
	"step": 6635
	},
	{
	"epoch": 1.8688432310723333,
	"grad_norm": 0.8076462745666504,
	"learning_rate": 2.608933315933837e-06,
	"loss": 0.115,
	"step": 6640
	},
	{
	"epoch": 1.8702504925415142,
	"grad_norm": 1.4497947692871094,
	"learning_rate": 2.5534861936650665e-06,
	"loss": 0.1807,
	"step": 6645
	},
	{
	"epoch": 1.8716577540106951,
	"grad_norm": 0.8782854676246643,
	"learning_rate": 2.4986270002096747e-06,
	"loss": 0.1052,
	"step": 6650
	},
	{
	"epoch": 1.873065015479876,
	"grad_norm": 0.6687735915184021,
	"learning_rate": 2.4443560665523e-06,
	"loss": 0.2023,
	"step": 6655
	},
	{
	"epoch": 1.8744722769490572,
	"grad_norm": 0.698962390422821,
	"learning_rate": 2.3906737201284002e-06,
	"loss": 0.1023,
	"step": 6660
	},
	{
	"epoch": 1.875879538418238,
	"grad_norm": 1.2811174392700195,
	"learning_rate": 2.3375802848223385e-06,
	"loss": 0.1374,
	"step": 6665
	},
	{
	"epoch": 1.8772867998874192,
	"grad_norm": 0.8447235226631165,
	"learning_rate": 2.285076080965287e-06,
	"loss": 0.1569,
	"step": 6670
	},
	{
	"epoch": 1.8786940613566,
	"grad_norm": 0.6996911764144897,
	"learning_rate": 2.233161425333474e-06,
	"loss": 0.1395,
	"step": 6675
	},
	{
	"epoch": 1.880101322825781,
	"grad_norm": 1.388584852218628,
	"learning_rate": 2.1818366311460946e-06,
	"loss": 0.1692,
	"step": 6680
	},
	{
	"epoch": 1.881508584294962,
	"grad_norm": 0.5281504988670349,
	"learning_rate": 2.1311020080635346e-06,
	"loss": 0.1218,
	"step": 6685
	},
	{
	"epoch": 1.8829158457641428,
	"grad_norm": 0.8310534954071045,
	"learning_rate": 2.080957862185484e-06,
	"loss": 0.2253,
	"step": 6690
	},
	{
	"epoch": 1.884323107233324,
	"grad_norm": 0.5924013257026672,
	"learning_rate": 2.031404496049072e-06,
	"loss": 0.0862,
	"step": 6695
	},
	{
	"epoch": 1.8857303687025049,
	"grad_norm": 0.445305198431015,
	"learning_rate": 1.982442208627033e-06,
	"loss": 0.2208,
	"step": 6700
	},
	{
	"epoch": 1.887137630171686,
	"grad_norm": 0.66776442527771,
	"learning_rate": 1.9340712953259565e-06,
	"loss": 0.159,
	"step": 6705
	},
	{
	"epoch": 1.888544891640867,
	"grad_norm": 0.8003804683685303,
	"learning_rate": 1.886292047984395e-06,
	"loss": 0.1276,
	"step": 6710
	},
	{
	"epoch": 1.8899521531100478,
	"grad_norm": 1.1968119144439697,
	"learning_rate": 1.839104754871257e-06,
	"loss": 0.1147,
	"step": 6715
	},
	{
	"epoch": 1.8913594145792287,
	"grad_norm": 2.06772518157959,
	"learning_rate": 1.7925097006839198e-06,
	"loss": 0.1263,
	"step": 6720
	},
	{
	"epoch": 1.8927666760484096,
	"grad_norm": 0.8591898083686829,
	"learning_rate": 1.746507166546596e-06,
	"loss": 0.1612,
	"step": 6725
	},
	{
	"epoch": 1.8941739375175908,
	"grad_norm": 1.3790104389190674,
	"learning_rate": 1.7010974300086358e-06,
	"loss": 0.1714,
	"step": 6730
	},
	{
	"epoch": 1.8955811989867717,
	"grad_norm": 0.6857600808143616,
	"learning_rate": 1.656280765042828e-06,
	"loss": 0.1331,
	"step": 6735
	},
	{
	"epoch": 1.8969884604559528,
	"grad_norm": 0.9561905860900879,
	"learning_rate": 1.612057442043724e-06,
	"loss": 0.13,
	"step": 6740
	},
	{
	"epoch": 1.8983957219251337,
	"grad_norm": 1.3840196132659912,
	"learning_rate": 1.5684277278260718e-06,
	"loss": 0.2562,
	"step": 6745
	},
	{
	"epoch": 1.8998029833943146,
	"grad_norm": 0.6963467001914978,
	"learning_rate": 1.525391885623173e-06,
	"loss": 0.1882,
	"step": 6750
	},
	{
	"epoch": 1.9012102448634955,
	"grad_norm": 0.9500248432159424,
	"learning_rate": 1.4829501750852626e-06,
	"loss": 0.131,
	"step": 6755
	},
	{
	"epoch": 1.9026175063326765,
	"grad_norm": 0.8108523488044739,
	"learning_rate": 1.4411028522779757e-06,
	"loss": 0.1891,
	"step": 6760
	},
	{
	"epoch": 1.9040247678018576,
	"grad_norm": 0.6868911981582642,
	"learning_rate": 1.3998501696808274e-06,
	"loss": 0.1761,
	"step": 6765
	},
	{
	"epoch": 1.9054320292710387,
	"grad_norm": 1.8471946716308594,
	"learning_rate": 1.3591923761856363e-06,
	"loss": 0.2683,
	"step": 6770
	},
	{
	"epoch": 1.9068392907402196,
	"grad_norm": 0.5496200919151306,
	"learning_rate": 1.3191297170950578e-06,
	"loss": 0.1627,
	"step": 6775
	},
	{
	"epoch": 1.9082465522094005,
	"grad_norm": 0.7432734370231628,
	"learning_rate": 1.2796624341210873e-06,
	"loss": 0.1406,
	"step": 6780
	},
	{
	"epoch": 1.9096538136785814,
	"grad_norm": 0.773916482925415,
	"learning_rate": 1.2407907653836038e-06,
	"loss": 0.1308,
	"step": 6785
	},
	{
	"epoch": 1.9110610751477624,
	"grad_norm": 1.0941839218139648,
	"learning_rate": 1.2025149454089723e-06,
	"loss": 0.1269,
	"step": 6790
	},
	{
	"epoch": 1.9124683366169433,
	"grad_norm": 0.5930225253105164,
	"learning_rate": 1.1648352051285448e-06,
	"loss": 0.1393,
	"step": 6795
	},
	{
	"epoch": 1.9138755980861244,
	"grad_norm": 0.38355159759521484,
	"learning_rate": 1.127751771877339e-06,
	"loss": 0.128,
	"step": 6800
	},
	{
	"epoch": 1.9152828595553055,
	"grad_norm": 0.8687125444412231,
	"learning_rate": 1.0912648693926497e-06,
	"loss": 0.128,
	"step": 6805
	},
	{
	"epoch": 1.9166901210244864,
	"grad_norm": 0.9181435704231262,
	"learning_rate": 1.055374717812696e-06,
	"loss": 0.2078,
	"step": 6810
	},
	{
	"epoch": 1.9180973824936673,
	"grad_norm": 1.5709048509597778,
	"learning_rate": 1.0200815336752657e-06,
	"loss": 0.1745,
	"step": 6815
	},
	{
	"epoch": 1.9195046439628483,
	"grad_norm": 0.8740848302841187,
	"learning_rate": 9.853855299164717e-07,
	"loss": 0.1209,
	"step": 6820
	},
	{
	"epoch": 1.9209119054320292,
	"grad_norm": 0.46822214126586914,
	"learning_rate": 9.512869158693982e-07,
	"loss": 0.1031,
	"step": 6825
	},
	{
	"epoch": 1.9223191669012103,
	"grad_norm": 0.6493380665779114,
	"learning_rate": 9.177858972628794e-07,
	"loss": 0.1665,
	"step": 6830
	},
	{
	"epoch": 1.9237264283703912,
	"grad_norm": 0.628223180770874,
	"learning_rate": 8.848826762202556e-07,
	"loss": 0.1375,
	"step": 6835
	},
	{
	"epoch": 1.9251336898395723,
	"grad_norm": 0.8677277565002441,
	"learning_rate": 8.525774512581297e-07,
	"loss": 0.1193,
	"step": 6840
	},
	{
	"epoch": 1.9265409513087532,
	"grad_norm": 0.34191542863845825,
	"learning_rate": 8.208704172851911e-07,
	"loss": 0.1605,
	"step": 6845
	},
	{
	"epoch": 1.9279482127779342,
	"grad_norm": 0.3965689539909363,
	"learning_rate": 7.897617656010381e-07,
	"loss": 0.2008,
	"step": 6850
	},
	{
	"epoch": 1.929355474247115,
	"grad_norm": 1.651140809059143,
	"learning_rate": 7.592516838950348e-07,
	"loss": 0.259,
	"step": 6855
	},
	{
	"epoch": 1.930762735716296,
	"grad_norm": 1.2457526922225952,
	"learning_rate": 7.293403562451229e-07,
	"loss": 0.1243,
	"step": 6860
	},
	{
	"epoch": 1.932169997185477,
	"grad_norm": 0.42919033765792847,
	"learning_rate": 7.000279631168005e-07,
	"loss": 0.0686,
	"step": 6865
	},
	{
	"epoch": 1.933577258654658,
	"grad_norm": 1.004384160041809,
	"learning_rate": 6.713146813619564e-07,
	"loss": 0.1132,
	"step": 6870
	},
	{
	"epoch": 1.9349845201238391,
	"grad_norm": 0.7319831252098083,
	"learning_rate": 6.432006842178262e-07,
	"loss": 0.0594,
	"step": 6875
	},
	{
	"epoch": 1.93639178159302,
	"grad_norm": 0.9444944262504578,
	"learning_rate": 6.156861413059601e-07,
	"loss": 0.1181,
	"step": 6880
	},
	{
	"epoch": 1.937799043062201,
	"grad_norm": 1.6310319900512695,
	"learning_rate": 5.887712186312011e-07,
	"loss": 0.2333,
	"step": 6885
	},
	{
	"epoch": 1.9392063045313819,
	"grad_norm": 0.7760756015777588,
	"learning_rate": 5.624560785806754e-07,
	"loss": 0.1101,
	"step": 6890
	},
	{
	"epoch": 1.9406135660005628,
	"grad_norm": 1.4316829442977905,
	"learning_rate": 5.367408799227925e-07,
	"loss": 0.1512,
	"step": 6895
	},
	{
	"epoch": 1.942020827469744,
	"grad_norm": 0.6632144451141357,
	"learning_rate": 5.116257778063238e-07,
	"loss": 0.176,
	"step": 6900
	},
	{
	"epoch": 1.9434280889389248,
	"grad_norm": 0.4353666603565216,
	"learning_rate": 4.871109237594373e-07,
	"loss": 0.1293,
	"step": 6905
	},
	{
	"epoch": 1.944835350408106,
	"grad_norm": 2.0593976974487305,
	"learning_rate": 4.631964656888088e-07,
	"loss": 0.4206,
	"step": 6910
	},
	{
	"epoch": 1.9462426118772869,
	"grad_norm": 0.8553899526596069,
	"learning_rate": 4.3988254787868945e-07,
	"loss": 0.2033,
	"step": 6915
	},
	{
	"epoch": 1.9476498733464678,
	"grad_norm": 2.4069225788116455,
	"learning_rate": 4.171693109900954e-07,
	"loss": 0.1747,
	"step": 6920
	},
	{
	"epoch": 1.9490571348156487,
	"grad_norm": 1.0317012071609497,
	"learning_rate": 3.950568920598974e-07,
	"loss": 0.1857,
	"step": 6925
	},
	{
	"epoch": 1.9504643962848296,
	"grad_norm": 0.16559715569019318,
	"learning_rate": 3.735454245000436e-07,
	"loss": 0.1506,
	"step": 6930
	},
	{
	"epoch": 1.9518716577540107,
	"grad_norm": 1.008353590965271,
	"learning_rate": 3.526350380967047e-07,
	"loss": 0.1661,
	"step": 6935
	},
	{
	"epoch": 1.9532789192231916,
	"grad_norm": 0.8605316877365112,
	"learning_rate": 3.323258590095635e-07,
	"loss": 0.1547,
	"step": 6940
	},
	{
	"epoch": 1.9546861806923728,
	"grad_norm": 0.8140857815742493,
	"learning_rate": 3.126180097709597e-07,
	"loss": 0.204,
	"step": 6945
	},
	{
	"epoch": 1.9560934421615537,
	"grad_norm": 0.250213086605072,
	"learning_rate": 2.9351160928522416e-07,
	"loss": 0.1531,
	"step": 6950
	},
	{
	"epoch": 1.9575007036307346,
	"grad_norm": 2.0146706104278564,
	"learning_rate": 2.7500677282795704e-07,
	"loss": 0.135,
	"step": 6955
	},
	{
	"epoch": 1.9589079650999155,
	"grad_norm": 0.43031638860702515,
	"learning_rate": 2.57103612045273e-07,
	"loss": 0.1118,
	"step": 6960
	},
	{
	"epoch": 1.9603152265690964,
	"grad_norm": 1.1351455450057983,
	"learning_rate": 2.3980223495319034e-07,
	"loss": 0.1474,
	"step": 6965
	},
	{
	"epoch": 1.9617224880382775,
	"grad_norm": 0.6760854721069336,
	"learning_rate": 2.231027459369539e-07,
	"loss": 0.1577,
	"step": 6970
	},
	{
	"epoch": 1.9631297495074584,
	"grad_norm": 0.6344230771064758,
	"learning_rate": 2.0700524575041347e-07,
	"loss": 0.0911,
	"step": 6975
	},
	{
	"epoch": 1.9645370109766396,
	"grad_norm": 0.8816024661064148,
	"learning_rate": 1.915098315153907e-07,
	"loss": 0.1711,
	"step": 6980
	},
	{
	"epoch": 1.9659442724458205,
	"grad_norm": 1.2508419752120972,
	"learning_rate": 1.766165967211464e-07,
	"loss": 0.2165,
	"step": 6985
	},
	{
	"epoch": 1.9673515339150014,
	"grad_norm": 0.9682034254074097,
	"learning_rate": 1.6232563122373645e-07,
	"loss": 0.1176,
	"step": 6990
	},
	{
	"epoch": 1.9687587953841823,
	"grad_norm": 0.5194812417030334,
	"learning_rate": 1.4863702124554567e-07,
	"loss": 0.1792,
	"step": 6995
	},
	{
	"epoch": 1.9701660568533632,
	"grad_norm": 0.7501698136329651,
	"learning_rate": 1.3555084937475483e-07,
	"loss": 0.1375,
	"step": 7000
	},
	{
	"epoch": 1.9715733183225443,
	"grad_norm": 0.8848897218704224,
	"learning_rate": 1.2306719456478544e-07,
	"loss": 0.1218,
	"step": 7005
	},
	{
	"epoch": 1.9729805797917253,
	"grad_norm": 0.5296036601066589,
	"learning_rate": 1.1118613213388918e-07,
	"loss": 0.0949,
	"step": 7010
	},
	{
	"epoch": 1.9743878412609064,
	"grad_norm": 0.5823400616645813,
	"learning_rate": 9.990773376464812e-08,
	"loss": 0.1266,
	"step": 7015
	},
	{
	"epoch": 1.9757951027300873,
	"grad_norm": 1.2051528692245483,
	"learning_rate": 8.923206750359736e-08,
	"loss": 0.1841,
	"step": 7020
	},
	{
	"epoch": 1.9772023641992682,
	"grad_norm": 2.1660141944885254,
	"learning_rate": 7.915919776073644e-08,
	"loss": 0.1758,
	"step": 7025
	},
	{
	"epoch": 1.9786096256684491,
	"grad_norm": 0.9142996072769165,
	"learning_rate": 6.968918530920742e-08,
	"loss": 0.2226,
	"step": 7030
	},
	{
	"epoch": 1.98001688713763,
	"grad_norm": 2.0500295162200928,
	"learning_rate": 6.082208728490635e-08,
	"loss": 0.1638,
	"step": 7035
	},
	{
	"epoch": 1.9814241486068112,
	"grad_norm": 0.7084165811538696,
	"learning_rate": 5.255795718611678e-08,
	"loss": 0.1535,
	"step": 7040
	},
	{
	"epoch": 1.9828314100759923,
	"grad_norm": 0.5557725429534912,
	"learning_rate": 4.489684487322121e-08,
	"loss": 0.1053,
	"step": 7045
	},
	{
	"epoch": 1.9842386715451732,
	"grad_norm": 0.3313843905925751,
	"learning_rate": 3.783879656840128e-08,
	"loss": 0.1593,
	"step": 7050
	},
	{
	"epoch": 1.985645933014354,
	"grad_norm": 2.084636688232422,
	"learning_rate": 3.1383854855304705e-08,
	"loss": 0.1938,
	"step": 7055
	},
	{
	"epoch": 1.987053194483535,
	"grad_norm": 0.47041577100753784,
	"learning_rate": 2.553205867884545e-08,
	"loss": 0.0875,
	"step": 7060
	},
	{
	"epoch": 1.988460455952716,
	"grad_norm": 0.6036000847816467,
	"learning_rate": 2.0283443344959464e-08,
	"loss": 0.064,
	"step": 7065
	},
	{
	"epoch": 1.9898677174218968,
	"grad_norm": 0.40105298161506653,
	"learning_rate": 1.5638040520382646e-08,
	"loss": 0.1467,
	"step": 7070
	},
	{
	"epoch": 1.991274978891078,
	"grad_norm": 0.8283329606056213,
	"learning_rate": 1.1595878232428803e-08,
	"loss": 0.1675,
	"step": 7075
	},
	{
	"epoch": 1.992682240360259,
	"grad_norm": 0.612358570098877,
	"learning_rate": 8.15698086888972e-09,
	"loss": 0.1813,
	"step": 7080
	},
	{
	"epoch": 1.99408950182944,
	"grad_norm": 0.3482489287853241,
	"learning_rate": 5.321369177835323e-09,
	"loss": 0.1543,
	"step": 7085
	},
	{
	"epoch": 1.995496763298621,
	"grad_norm": 0.9294025301933289,
	"learning_rate": 3.089060267480459e-09,
	"loss": 0.1197,
	"step": 7090
	},
	{
	"epoch": 1.9969040247678018,
	"grad_norm": 1.7287979125976562,
	"learning_rate": 1.4600676061404805e-09,
	"loss": 0.1638,
	"step": 7095
	},
	{
	"epoch": 1.9983112862369827,
	"grad_norm": 0.451955109834671,
	"learning_rate": 4.344010220980188e-10,
	"loss": 0.2378,
	"step": 7100
	},
	{
	"epoch": 1.9997185477061636,
	"grad_norm": 0.541246771812439,
	"learning_rate": 1.20667035474753e-11,
	"loss": 0.1537,
	"step": 7105
	},
	{
	"epoch": 2.0,
	"step": 7106,
	"total_flos": 1.54790643235396e+18,
	"train_loss": 0.3593486731773929,
	"train_runtime": 16225.5696,
	"train_samples_per_second": 3.503,
	"train_steps_per_second": 0.438
	}
	],
	"logging_steps": 5,
	"max_steps": 7106,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 20,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.54790643235396e+18,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}