Upload folder using huggingface_hub

a179af5 verified 3 months ago

28 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.0,
	"eval_steps": 500,
	"global_step": 790,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03164556962025317,
	"grad_norm": 5.988588333129883,
	"learning_rate": 0.0002,
	"loss": 2.7545,
	"step": 5
	},
	{
	"epoch": 0.06329113924050633,
	"grad_norm": 1.671600580215454,
	"learning_rate": 0.00019872773536895675,
	"loss": 0.7945,
	"step": 10
	},
	{
	"epoch": 0.0949367088607595,
	"grad_norm": 1.5613313913345337,
	"learning_rate": 0.00019745547073791352,
	"loss": 0.5902,
	"step": 15
	},
	{
	"epoch": 0.12658227848101267,
	"grad_norm": 0.9209323525428772,
	"learning_rate": 0.00019618320610687023,
	"loss": 0.4814,
	"step": 20
	},
	{
	"epoch": 0.15822784810126583,
	"grad_norm": 0.7866256237030029,
	"learning_rate": 0.00019491094147582698,
	"loss": 0.4366,
	"step": 25
	},
	{
	"epoch": 0.189873417721519,
	"grad_norm": 0.7535956501960754,
	"learning_rate": 0.00019363867684478372,
	"loss": 0.4238,
	"step": 30
	},
	{
	"epoch": 0.22151898734177214,
	"grad_norm": 0.6957400441169739,
	"learning_rate": 0.00019236641221374049,
	"loss": 0.5012,
	"step": 35
	},
	{
	"epoch": 0.25316455696202533,
	"grad_norm": 0.7030977010726929,
	"learning_rate": 0.00019109414758269723,
	"loss": 0.4676,
	"step": 40
	},
	{
	"epoch": 0.2848101265822785,
	"grad_norm": 0.741550624370575,
	"learning_rate": 0.00018982188295165394,
	"loss": 0.4893,
	"step": 45
	},
	{
	"epoch": 0.31645569620253167,
	"grad_norm": 0.5580260753631592,
	"learning_rate": 0.00018854961832061068,
	"loss": 0.4825,
	"step": 50
	},
	{
	"epoch": 0.34810126582278483,
	"grad_norm": 0.5945926308631897,
	"learning_rate": 0.00018727735368956745,
	"loss": 0.495,
	"step": 55
	},
	{
	"epoch": 0.379746835443038,
	"grad_norm": 0.570940375328064,
	"learning_rate": 0.0001860050890585242,
	"loss": 0.4557,
	"step": 60
	},
	{
	"epoch": 0.41139240506329117,
	"grad_norm": 0.6694577932357788,
	"learning_rate": 0.00018473282442748093,
	"loss": 0.4674,
	"step": 65
	},
	{
	"epoch": 0.4430379746835443,
	"grad_norm": 0.6412336826324463,
	"learning_rate": 0.00018346055979643765,
	"loss": 0.4305,
	"step": 70
	},
	{
	"epoch": 0.47468354430379744,
	"grad_norm": 0.6980250477790833,
	"learning_rate": 0.00018218829516539442,
	"loss": 0.4357,
	"step": 75
	},
	{
	"epoch": 0.5063291139240507,
	"grad_norm": 0.48515501618385315,
	"learning_rate": 0.00018091603053435116,
	"loss": 0.4285,
	"step": 80
	},
	{
	"epoch": 0.5379746835443038,
	"grad_norm": 0.6025907397270203,
	"learning_rate": 0.0001796437659033079,
	"loss": 0.45,
	"step": 85
	},
	{
	"epoch": 0.569620253164557,
	"grad_norm": 0.5797450542449951,
	"learning_rate": 0.00017837150127226464,
	"loss": 0.5033,
	"step": 90
	},
	{
	"epoch": 0.6012658227848101,
	"grad_norm": 0.6783467531204224,
	"learning_rate": 0.00017709923664122138,
	"loss": 0.4174,
	"step": 95
	},
	{
	"epoch": 0.6329113924050633,
	"grad_norm": 0.5603845119476318,
	"learning_rate": 0.00017582697201017812,
	"loss": 0.4505,
	"step": 100
	},
	{
	"epoch": 0.6645569620253164,
	"grad_norm": 0.7022290229797363,
	"learning_rate": 0.00017455470737913486,
	"loss": 0.5161,
	"step": 105
	},
	{
	"epoch": 0.6962025316455697,
	"grad_norm": 0.6286556124687195,
	"learning_rate": 0.00017328244274809163,
	"loss": 0.4525,
	"step": 110
	},
	{
	"epoch": 0.7278481012658228,
	"grad_norm": 0.7144973874092102,
	"learning_rate": 0.00017201017811704835,
	"loss": 0.5068,
	"step": 115
	},
	{
	"epoch": 0.759493670886076,
	"grad_norm": 0.55781090259552,
	"learning_rate": 0.0001707379134860051,
	"loss": 0.4385,
	"step": 120
	},
	{
	"epoch": 0.7911392405063291,
	"grad_norm": 0.5584812760353088,
	"learning_rate": 0.00016946564885496183,
	"loss": 0.4206,
	"step": 125
	},
	{
	"epoch": 0.8227848101265823,
	"grad_norm": 0.7030683755874634,
	"learning_rate": 0.0001681933842239186,
	"loss": 0.4833,
	"step": 130
	},
	{
	"epoch": 0.8544303797468354,
	"grad_norm": 0.6400471329689026,
	"learning_rate": 0.00016692111959287534,
	"loss": 0.4646,
	"step": 135
	},
	{
	"epoch": 0.8860759493670886,
	"grad_norm": 0.5747826099395752,
	"learning_rate": 0.00016564885496183205,
	"loss": 0.4334,
	"step": 140
	},
	{
	"epoch": 0.9177215189873418,
	"grad_norm": 0.519247829914093,
	"learning_rate": 0.0001643765903307888,
	"loss": 0.4365,
	"step": 145
	},
	{
	"epoch": 0.9493670886075949,
	"grad_norm": 0.6712743639945984,
	"learning_rate": 0.00016310432569974556,
	"loss": 0.4196,
	"step": 150
	},
	{
	"epoch": 0.9810126582278481,
	"grad_norm": 0.8766248226165771,
	"learning_rate": 0.0001618320610687023,
	"loss": 0.44,
	"step": 155
	},
	{
	"epoch": 1.0126582278481013,
	"grad_norm": 0.432377427816391,
	"learning_rate": 0.00016055979643765905,
	"loss": 0.3616,
	"step": 160
	},
	{
	"epoch": 1.0443037974683544,
	"grad_norm": 0.5202181339263916,
	"learning_rate": 0.0001592875318066158,
	"loss": 0.3763,
	"step": 165
	},
	{
	"epoch": 1.0759493670886076,
	"grad_norm": 0.5511195659637451,
	"learning_rate": 0.00015801526717557253,
	"loss": 0.3291,
	"step": 170
	},
	{
	"epoch": 1.1075949367088607,
	"grad_norm": 0.6027284264564514,
	"learning_rate": 0.00015674300254452927,
	"loss": 0.3141,
	"step": 175
	},
	{
	"epoch": 1.139240506329114,
	"grad_norm": 0.6925147175788879,
	"learning_rate": 0.000155470737913486,
	"loss": 0.3651,
	"step": 180
	},
	{
	"epoch": 1.1708860759493671,
	"grad_norm": 0.6030688285827637,
	"learning_rate": 0.00015419847328244275,
	"loss": 0.3411,
	"step": 185
	},
	{
	"epoch": 1.2025316455696202,
	"grad_norm": 0.5992720127105713,
	"learning_rate": 0.0001529262086513995,
	"loss": 0.3508,
	"step": 190
	},
	{
	"epoch": 1.2341772151898733,
	"grad_norm": 0.7508724331855774,
	"learning_rate": 0.00015165394402035624,
	"loss": 0.3284,
	"step": 195
	},
	{
	"epoch": 1.2658227848101267,
	"grad_norm": 0.7126018404960632,
	"learning_rate": 0.00015038167938931298,
	"loss": 0.3466,
	"step": 200
	},
	{
	"epoch": 1.2974683544303798,
	"grad_norm": 0.8017547130584717,
	"learning_rate": 0.00014910941475826972,
	"loss": 0.3485,
	"step": 205
	},
	{
	"epoch": 1.3291139240506329,
	"grad_norm": 0.7311880588531494,
	"learning_rate": 0.0001478371501272265,
	"loss": 0.3566,
	"step": 210
	},
	{
	"epoch": 1.360759493670886,
	"grad_norm": 0.7723591327667236,
	"learning_rate": 0.0001465648854961832,
	"loss": 0.3329,
	"step": 215
	},
	{
	"epoch": 1.3924050632911391,
	"grad_norm": 0.8075547814369202,
	"learning_rate": 0.00014529262086513994,
	"loss": 0.3584,
	"step": 220
	},
	{
	"epoch": 1.4240506329113924,
	"grad_norm": 0.5989384055137634,
	"learning_rate": 0.0001440203562340967,
	"loss": 0.371,
	"step": 225
	},
	{
	"epoch": 1.4556962025316456,
	"grad_norm": 0.678035318851471,
	"learning_rate": 0.00014274809160305345,
	"loss": 0.3448,
	"step": 230
	},
	{
	"epoch": 1.4873417721518987,
	"grad_norm": 0.8693724274635315,
	"learning_rate": 0.0001414758269720102,
	"loss": 0.3644,
	"step": 235
	},
	{
	"epoch": 1.518987341772152,
	"grad_norm": 0.6025015115737915,
	"learning_rate": 0.0001402035623409669,
	"loss": 0.3233,
	"step": 240
	},
	{
	"epoch": 1.5506329113924051,
	"grad_norm": 0.679233729839325,
	"learning_rate": 0.00013893129770992368,
	"loss": 0.3247,
	"step": 245
	},
	{
	"epoch": 1.5822784810126582,
	"grad_norm": 0.7034026980400085,
	"learning_rate": 0.00013765903307888042,
	"loss": 0.3527,
	"step": 250
	},
	{
	"epoch": 1.6139240506329116,
	"grad_norm": 0.7514588236808777,
	"learning_rate": 0.00013638676844783716,
	"loss": 0.3487,
	"step": 255
	},
	{
	"epoch": 1.6455696202531644,
	"grad_norm": 0.7183879017829895,
	"learning_rate": 0.0001351145038167939,
	"loss": 0.3407,
	"step": 260
	},
	{
	"epoch": 1.6772151898734178,
	"grad_norm": 0.6752856969833374,
	"learning_rate": 0.00013384223918575064,
	"loss": 0.3088,
	"step": 265
	},
	{
	"epoch": 1.7088607594936709,
	"grad_norm": 0.8107082843780518,
	"learning_rate": 0.00013256997455470738,
	"loss": 0.3841,
	"step": 270
	},
	{
	"epoch": 1.740506329113924,
	"grad_norm": 0.5849813222885132,
	"learning_rate": 0.00013129770992366413,
	"loss": 0.3325,
	"step": 275
	},
	{
	"epoch": 1.7721518987341773,
	"grad_norm": 0.8018965125083923,
	"learning_rate": 0.00013002544529262087,
	"loss": 0.3649,
	"step": 280
	},
	{
	"epoch": 1.8037974683544302,
	"grad_norm": 0.8379972577095032,
	"learning_rate": 0.0001287531806615776,
	"loss": 0.3668,
	"step": 285
	},
	{
	"epoch": 1.8354430379746836,
	"grad_norm": 0.6462769508361816,
	"learning_rate": 0.00012748091603053435,
	"loss": 0.3363,
	"step": 290
	},
	{
	"epoch": 1.8670886075949367,
	"grad_norm": 0.8890714645385742,
	"learning_rate": 0.0001262086513994911,
	"loss": 0.3265,
	"step": 295
	},
	{
	"epoch": 1.8987341772151898,
	"grad_norm": 0.797147274017334,
	"learning_rate": 0.00012493638676844783,
	"loss": 0.3636,
	"step": 300
	},
	{
	"epoch": 1.9303797468354431,
	"grad_norm": 0.6804778575897217,
	"learning_rate": 0.0001236641221374046,
	"loss": 0.3442,
	"step": 305
	},
	{
	"epoch": 1.9620253164556962,
	"grad_norm": 0.6891390681266785,
	"learning_rate": 0.00012239185750636134,
	"loss": 0.3145,
	"step": 310
	},
	{
	"epoch": 1.9936708860759493,
	"grad_norm": 0.9055079817771912,
	"learning_rate": 0.00012111959287531807,
	"loss": 0.342,
	"step": 315
	},
	{
	"epoch": 2.0253164556962027,
	"grad_norm": 0.609603762626648,
	"learning_rate": 0.00011984732824427483,
	"loss": 0.2504,
	"step": 320
	},
	{
	"epoch": 2.0569620253164556,
	"grad_norm": 1.3054362535476685,
	"learning_rate": 0.00011857506361323157,
	"loss": 0.2211,
	"step": 325
	},
	{
	"epoch": 2.088607594936709,
	"grad_norm": 0.8065559267997742,
	"learning_rate": 0.0001173027989821883,
	"loss": 0.2173,
	"step": 330
	},
	{
	"epoch": 2.1202531645569622,
	"grad_norm": 0.8054972887039185,
	"learning_rate": 0.00011603053435114504,
	"loss": 0.2126,
	"step": 335
	},
	{
	"epoch": 2.151898734177215,
	"grad_norm": 0.9218589663505554,
	"learning_rate": 0.00011475826972010179,
	"loss": 0.2042,
	"step": 340
	},
	{
	"epoch": 2.1835443037974684,
	"grad_norm": 0.9257758259773254,
	"learning_rate": 0.00011348600508905853,
	"loss": 0.2102,
	"step": 345
	},
	{
	"epoch": 2.2151898734177213,
	"grad_norm": 0.9863210320472717,
	"learning_rate": 0.00011221374045801527,
	"loss": 0.219,
	"step": 350
	},
	{
	"epoch": 2.2468354430379747,
	"grad_norm": 0.8986596465110779,
	"learning_rate": 0.000110941475826972,
	"loss": 0.2145,
	"step": 355
	},
	{
	"epoch": 2.278481012658228,
	"grad_norm": 0.869886040687561,
	"learning_rate": 0.00010966921119592877,
	"loss": 0.1967,
	"step": 360
	},
	{
	"epoch": 2.310126582278481,
	"grad_norm": 1.0244789123535156,
	"learning_rate": 0.0001083969465648855,
	"loss": 0.2095,
	"step": 365
	},
	{
	"epoch": 2.3417721518987342,
	"grad_norm": 0.7236781120300293,
	"learning_rate": 0.00010712468193384224,
	"loss": 0.2123,
	"step": 370
	},
	{
	"epoch": 2.3734177215189876,
	"grad_norm": 0.7103443145751953,
	"learning_rate": 0.00010585241730279898,
	"loss": 0.2205,
	"step": 375
	},
	{
	"epoch": 2.4050632911392404,
	"grad_norm": 0.9352710247039795,
	"learning_rate": 0.00010458015267175574,
	"loss": 0.2259,
	"step": 380
	},
	{
	"epoch": 2.4367088607594938,
	"grad_norm": 0.8048036098480225,
	"learning_rate": 0.00010330788804071248,
	"loss": 0.2138,
	"step": 385
	},
	{
	"epoch": 2.4683544303797467,
	"grad_norm": 0.814346194267273,
	"learning_rate": 0.00010203562340966922,
	"loss": 0.2311,
	"step": 390
	},
	{
	"epoch": 2.5,
	"grad_norm": 0.9042171835899353,
	"learning_rate": 0.00010076335877862595,
	"loss": 0.2278,
	"step": 395
	},
	{
	"epoch": 2.5316455696202533,
	"grad_norm": 0.7023847103118896,
	"learning_rate": 9.94910941475827e-05,
	"loss": 0.2357,
	"step": 400
	},
	{
	"epoch": 2.5632911392405062,
	"grad_norm": 0.9368842840194702,
	"learning_rate": 9.821882951653944e-05,
	"loss": 0.2182,
	"step": 405
	},
	{
	"epoch": 2.5949367088607596,
	"grad_norm": 0.9225996136665344,
	"learning_rate": 9.694656488549618e-05,
	"loss": 0.2228,
	"step": 410
	},
	{
	"epoch": 2.6265822784810124,
	"grad_norm": 0.7295313477516174,
	"learning_rate": 9.567430025445293e-05,
	"loss": 0.2143,
	"step": 415
	},
	{
	"epoch": 2.6582278481012658,
	"grad_norm": 0.9664236903190613,
	"learning_rate": 9.440203562340968e-05,
	"loss": 0.2152,
	"step": 420
	},
	{
	"epoch": 2.689873417721519,
	"grad_norm": 0.8742689490318298,
	"learning_rate": 9.312977099236642e-05,
	"loss": 0.2182,
	"step": 425
	},
	{
	"epoch": 2.721518987341772,
	"grad_norm": 0.8087453842163086,
	"learning_rate": 9.185750636132316e-05,
	"loss": 0.2184,
	"step": 430
	},
	{
	"epoch": 2.7531645569620253,
	"grad_norm": 1.062659502029419,
	"learning_rate": 9.05852417302799e-05,
	"loss": 0.2156,
	"step": 435
	},
	{
	"epoch": 2.7848101265822782,
	"grad_norm": 0.9411716461181641,
	"learning_rate": 8.931297709923665e-05,
	"loss": 0.2213,
	"step": 440
	},
	{
	"epoch": 2.8164556962025316,
	"grad_norm": 0.993147075176239,
	"learning_rate": 8.804071246819339e-05,
	"loss": 0.2127,
	"step": 445
	},
	{
	"epoch": 2.848101265822785,
	"grad_norm": 0.8353611826896667,
	"learning_rate": 8.676844783715013e-05,
	"loss": 0.2116,
	"step": 450
	},
	{
	"epoch": 2.879746835443038,
	"grad_norm": 0.9915521740913391,
	"learning_rate": 8.549618320610687e-05,
	"loss": 0.2299,
	"step": 455
	},
	{
	"epoch": 2.911392405063291,
	"grad_norm": 0.9111132621765137,
	"learning_rate": 8.422391857506363e-05,
	"loss": 0.2113,
	"step": 460
	},
	{
	"epoch": 2.9430379746835444,
	"grad_norm": 0.8857221007347107,
	"learning_rate": 8.295165394402035e-05,
	"loss": 0.2285,
	"step": 465
	},
	{
	"epoch": 2.9746835443037973,
	"grad_norm": 0.8553436398506165,
	"learning_rate": 8.167938931297711e-05,
	"loss": 0.233,
	"step": 470
	},
	{
	"epoch": 3.0063291139240507,
	"grad_norm": 0.5474989414215088,
	"learning_rate": 8.040712468193385e-05,
	"loss": 0.1938,
	"step": 475
	},
	{
	"epoch": 3.037974683544304,
	"grad_norm": 0.703250527381897,
	"learning_rate": 7.913486005089059e-05,
	"loss": 0.131,
	"step": 480
	},
	{
	"epoch": 3.069620253164557,
	"grad_norm": 1.2964314222335815,
	"learning_rate": 7.786259541984733e-05,
	"loss": 0.1256,
	"step": 485
	},
	{
	"epoch": 3.1012658227848102,
	"grad_norm": 0.7699221968650818,
	"learning_rate": 7.659033078880407e-05,
	"loss": 0.1247,
	"step": 490
	},
	{
	"epoch": 3.132911392405063,
	"grad_norm": 0.6273168325424194,
	"learning_rate": 7.531806615776081e-05,
	"loss": 0.1173,
	"step": 495
	},
	{
	"epoch": 3.1645569620253164,
	"grad_norm": 0.7778182029724121,
	"learning_rate": 7.404580152671756e-05,
	"loss": 0.1263,
	"step": 500
	},
	{
	"epoch": 3.1962025316455698,
	"grad_norm": 1.197022795677185,
	"learning_rate": 7.27735368956743e-05,
	"loss": 0.1278,
	"step": 505
	},
	{
	"epoch": 3.2278481012658227,
	"grad_norm": 0.7795239090919495,
	"learning_rate": 7.150127226463105e-05,
	"loss": 0.1253,
	"step": 510
	},
	{
	"epoch": 3.259493670886076,
	"grad_norm": 0.8459110856056213,
	"learning_rate": 7.022900763358778e-05,
	"loss": 0.1245,
	"step": 515
	},
	{
	"epoch": 3.291139240506329,
	"grad_norm": 0.6801343560218811,
	"learning_rate": 6.895674300254454e-05,
	"loss": 0.1284,
	"step": 520
	},
	{
	"epoch": 3.3227848101265822,
	"grad_norm": 1.0283461809158325,
	"learning_rate": 6.768447837150128e-05,
	"loss": 0.1289,
	"step": 525
	},
	{
	"epoch": 3.3544303797468356,
	"grad_norm": 1.1402161121368408,
	"learning_rate": 6.641221374045802e-05,
	"loss": 0.1335,
	"step": 530
	},
	{
	"epoch": 3.3860759493670884,
	"grad_norm": 0.8805460333824158,
	"learning_rate": 6.513994910941476e-05,
	"loss": 0.127,
	"step": 535
	},
	{
	"epoch": 3.4177215189873418,
	"grad_norm": 0.8641778230667114,
	"learning_rate": 6.38676844783715e-05,
	"loss": 0.1253,
	"step": 540
	},
	{
	"epoch": 3.449367088607595,
	"grad_norm": 0.9324259161949158,
	"learning_rate": 6.259541984732826e-05,
	"loss": 0.1244,
	"step": 545
	},
	{
	"epoch": 3.481012658227848,
	"grad_norm": 0.8999868035316467,
	"learning_rate": 6.132315521628498e-05,
	"loss": 0.1294,
	"step": 550
	},
	{
	"epoch": 3.5126582278481013,
	"grad_norm": 0.8019403219223022,
	"learning_rate": 6.005089058524174e-05,
	"loss": 0.1243,
	"step": 555
	},
	{
	"epoch": 3.5443037974683547,
	"grad_norm": 0.9356296062469482,
	"learning_rate": 5.877862595419847e-05,
	"loss": 0.1296,
	"step": 560
	},
	{
	"epoch": 3.5759493670886076,
	"grad_norm": 0.8532143831253052,
	"learning_rate": 5.750636132315522e-05,
	"loss": 0.124,
	"step": 565
	},
	{
	"epoch": 3.607594936708861,
	"grad_norm": 1.1260122060775757,
	"learning_rate": 5.6234096692111956e-05,
	"loss": 0.1209,
	"step": 570
	},
	{
	"epoch": 3.6392405063291138,
	"grad_norm": 0.7926989793777466,
	"learning_rate": 5.496183206106871e-05,
	"loss": 0.1265,
	"step": 575
	},
	{
	"epoch": 3.670886075949367,
	"grad_norm": 0.8992180824279785,
	"learning_rate": 5.3689567430025446e-05,
	"loss": 0.1311,
	"step": 580
	},
	{
	"epoch": 3.7025316455696204,
	"grad_norm": 0.7314108610153198,
	"learning_rate": 5.2417302798982194e-05,
	"loss": 0.1254,
	"step": 585
	},
	{
	"epoch": 3.7341772151898733,
	"grad_norm": 0.9207622408866882,
	"learning_rate": 5.114503816793893e-05,
	"loss": 0.1289,
	"step": 590
	},
	{
	"epoch": 3.7658227848101267,
	"grad_norm": 0.622431218624115,
	"learning_rate": 4.9872773536895677e-05,
	"loss": 0.1251,
	"step": 595
	},
	{
	"epoch": 3.7974683544303796,
	"grad_norm": 1.0110617876052856,
	"learning_rate": 4.860050890585242e-05,
	"loss": 0.1312,
	"step": 600
	},
	{
	"epoch": 3.829113924050633,
	"grad_norm": 0.699611246585846,
	"learning_rate": 4.7328244274809166e-05,
	"loss": 0.1263,
	"step": 605
	},
	{
	"epoch": 3.8607594936708862,
	"grad_norm": 0.7877194881439209,
	"learning_rate": 4.605597964376591e-05,
	"loss": 0.1304,
	"step": 610
	},
	{
	"epoch": 3.892405063291139,
	"grad_norm": 0.8100650906562805,
	"learning_rate": 4.478371501272265e-05,
	"loss": 0.1311,
	"step": 615
	},
	{
	"epoch": 3.9240506329113924,
	"grad_norm": 0.6674991250038147,
	"learning_rate": 4.351145038167939e-05,
	"loss": 0.1303,
	"step": 620
	},
	{
	"epoch": 3.9556962025316453,
	"grad_norm": 0.8028637170791626,
	"learning_rate": 4.223918575063613e-05,
	"loss": 0.1304,
	"step": 625
	},
	{
	"epoch": 3.9873417721518987,
	"grad_norm": 1.6102169752120972,
	"learning_rate": 4.096692111959288e-05,
	"loss": 0.125,
	"step": 630
	},
	{
	"epoch": 4.018987341772152,
	"grad_norm": 0.4470888376235962,
	"learning_rate": 3.969465648854962e-05,
	"loss": 0.106,
	"step": 635
	},
	{
	"epoch": 4.050632911392405,
	"grad_norm": 0.40415889024734497,
	"learning_rate": 3.842239185750636e-05,
	"loss": 0.0871,
	"step": 640
	},
	{
	"epoch": 4.082278481012658,
	"grad_norm": 0.48266398906707764,
	"learning_rate": 3.7150127226463104e-05,
	"loss": 0.0859,
	"step": 645
	},
	{
	"epoch": 4.113924050632911,
	"grad_norm": 0.836426854133606,
	"learning_rate": 3.5877862595419845e-05,
	"loss": 0.0883,
	"step": 650
	},
	{
	"epoch": 4.1455696202531644,
	"grad_norm": 0.6731426119804382,
	"learning_rate": 3.4605597964376594e-05,
	"loss": 0.0876,
	"step": 655
	},
	{
	"epoch": 4.177215189873418,
	"grad_norm": 0.5741623640060425,
	"learning_rate": 3.3333333333333335e-05,
	"loss": 0.0911,
	"step": 660
	},
	{
	"epoch": 4.208860759493671,
	"grad_norm": 0.5007706880569458,
	"learning_rate": 3.2061068702290076e-05,
	"loss": 0.0882,
	"step": 665
	},
	{
	"epoch": 4.2405063291139244,
	"grad_norm": 0.5870316028594971,
	"learning_rate": 3.078880407124682e-05,
	"loss": 0.0891,
	"step": 670
	},
	{
	"epoch": 4.272151898734177,
	"grad_norm": 0.698828935623169,
	"learning_rate": 2.9516539440203562e-05,
	"loss": 0.0912,
	"step": 675
	},
	{
	"epoch": 4.30379746835443,
	"grad_norm": 0.5611512064933777,
	"learning_rate": 2.824427480916031e-05,
	"loss": 0.0881,
	"step": 680
	},
	{
	"epoch": 4.3354430379746836,
	"grad_norm": 0.9599896669387817,
	"learning_rate": 2.6972010178117052e-05,
	"loss": 0.0875,
	"step": 685
	},
	{
	"epoch": 4.367088607594937,
	"grad_norm": 0.6073245406150818,
	"learning_rate": 2.5699745547073793e-05,
	"loss": 0.0887,
	"step": 690
	},
	{
	"epoch": 4.39873417721519,
	"grad_norm": 0.6183071136474609,
	"learning_rate": 2.4427480916030535e-05,
	"loss": 0.0902,
	"step": 695
	},
	{
	"epoch": 4.430379746835443,
	"grad_norm": 0.4458979368209839,
	"learning_rate": 2.3155216284987276e-05,
	"loss": 0.088,
	"step": 700
	},
	{
	"epoch": 4.462025316455696,
	"grad_norm": 0.6202102303504944,
	"learning_rate": 2.1882951653944024e-05,
	"loss": 0.0905,
	"step": 705
	},
	{
	"epoch": 4.493670886075949,
	"grad_norm": 0.46292412281036377,
	"learning_rate": 2.0610687022900766e-05,
	"loss": 0.0895,
	"step": 710
	},
	{
	"epoch": 4.525316455696203,
	"grad_norm": 0.6506438255310059,
	"learning_rate": 1.9338422391857507e-05,
	"loss": 0.0931,
	"step": 715
	},
	{
	"epoch": 4.556962025316456,
	"grad_norm": 0.5219342112541199,
	"learning_rate": 1.8066157760814252e-05,
	"loss": 0.0916,
	"step": 720
	},
	{
	"epoch": 4.588607594936709,
	"grad_norm": 0.47599899768829346,
	"learning_rate": 1.6793893129770993e-05,
	"loss": 0.0867,
	"step": 725
	},
	{
	"epoch": 4.620253164556962,
	"grad_norm": 0.5680922865867615,
	"learning_rate": 1.5521628498727735e-05,
	"loss": 0.0878,
	"step": 730
	},
	{
	"epoch": 4.651898734177215,
	"grad_norm": 0.5268383622169495,
	"learning_rate": 1.424936386768448e-05,
	"loss": 0.0881,
	"step": 735
	},
	{
	"epoch": 4.6835443037974684,
	"grad_norm": 0.6063334345817566,
	"learning_rate": 1.2977099236641221e-05,
	"loss": 0.0904,
	"step": 740
	},
	{
	"epoch": 4.715189873417722,
	"grad_norm": 0.5388665795326233,
	"learning_rate": 1.1704834605597966e-05,
	"loss": 0.0877,
	"step": 745
	},
	{
	"epoch": 4.746835443037975,
	"grad_norm": 0.5125636458396912,
	"learning_rate": 1.0432569974554709e-05,
	"loss": 0.0927,
	"step": 750
	},
	{
	"epoch": 4.7784810126582276,
	"grad_norm": 0.5058565139770508,
	"learning_rate": 9.16030534351145e-06,
	"loss": 0.0885,
	"step": 755
	},
	{
	"epoch": 4.810126582278481,
	"grad_norm": 0.39005881547927856,
	"learning_rate": 7.888040712468193e-06,
	"loss": 0.0892,
	"step": 760
	},
	{
	"epoch": 4.841772151898734,
	"grad_norm": 0.45494306087493896,
	"learning_rate": 6.615776081424936e-06,
	"loss": 0.0926,
	"step": 765
	},
	{
	"epoch": 4.8734177215189876,
	"grad_norm": 0.5130964517593384,
	"learning_rate": 5.343511450381679e-06,
	"loss": 0.0902,
	"step": 770
	},
	{
	"epoch": 4.905063291139241,
	"grad_norm": 0.6438283324241638,
	"learning_rate": 4.0712468193384225e-06,
	"loss": 0.092,
	"step": 775
	},
	{
	"epoch": 4.936708860759493,
	"grad_norm": 0.4781509041786194,
	"learning_rate": 2.7989821882951656e-06,
	"loss": 0.0912,
	"step": 780
	},
	{
	"epoch": 4.968354430379747,
	"grad_norm": 0.42383071780204773,
	"learning_rate": 1.5267175572519084e-06,
	"loss": 0.0866,
	"step": 785
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.7937325835227966,
	"learning_rate": 2.544529262086514e-07,
	"loss": 0.0851,
	"step": 790
	}
	],
	"logging_steps": 5,
	"max_steps": 790,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.804508938412032e+16,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}