downstream-7b / trainer_state.json

Model save

ecef6e0 verified over 1 year ago

34.3 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9992103185048697,
	"eval_steps": 500,
	"global_step": 949,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0010529086601737299,
	"grad_norm": 4.026114469450001,
	"learning_rate": 2.105263157894737e-06,
	"loss": 1.3755,
	"step": 1
	},
	{
	"epoch": 0.0052645433008686494,
	"grad_norm": 1.1251945428540173,
	"learning_rate": 1.0526315789473684e-05,
	"loss": 1.3102,
	"step": 5
	},
	{
	"epoch": 0.010529086601737299,
	"grad_norm": 0.563945023031292,
	"learning_rate": 2.105263157894737e-05,
	"loss": 1.2626,
	"step": 10
	},
	{
	"epoch": 0.01579362990260595,
	"grad_norm": 0.4336837922055097,
	"learning_rate": 3.157894736842105e-05,
	"loss": 1.2133,
	"step": 15
	},
	{
	"epoch": 0.021058173203474598,
	"grad_norm": 0.33707427690007363,
	"learning_rate": 4.210526315789474e-05,
	"loss": 1.1636,
	"step": 20
	},
	{
	"epoch": 0.026322716504343247,
	"grad_norm": 0.2412443925117212,
	"learning_rate": 5.2631578947368424e-05,
	"loss": 1.1845,
	"step": 25
	},
	{
	"epoch": 0.0315872598052119,
	"grad_norm": 0.2414606698915264,
	"learning_rate": 6.31578947368421e-05,
	"loss": 1.1401,
	"step": 30
	},
	{
	"epoch": 0.03685180310608055,
	"grad_norm": 0.22628642837844729,
	"learning_rate": 7.368421052631579e-05,
	"loss": 1.1591,
	"step": 35
	},
	{
	"epoch": 0.042116346406949196,
	"grad_norm": 0.2208133931977146,
	"learning_rate": 8.421052631578948e-05,
	"loss": 1.1397,
	"step": 40
	},
	{
	"epoch": 0.04738088970781785,
	"grad_norm": 0.21905054641153135,
	"learning_rate": 9.473684210526316e-05,
	"loss": 1.1295,
	"step": 45
	},
	{
	"epoch": 0.052645433008686494,
	"grad_norm": 0.1951061692287286,
	"learning_rate": 0.00010526315789473685,
	"loss": 1.141,
	"step": 50
	},
	{
	"epoch": 0.05790997630955515,
	"grad_norm": 0.18792729468212402,
	"learning_rate": 0.00011578947368421053,
	"loss": 1.121,
	"step": 55
	},
	{
	"epoch": 0.0631745196104238,
	"grad_norm": 0.189919407852987,
	"learning_rate": 0.0001263157894736842,
	"loss": 1.1347,
	"step": 60
	},
	{
	"epoch": 0.06843906291129244,
	"grad_norm": 0.18793881851552416,
	"learning_rate": 0.0001368421052631579,
	"loss": 1.0961,
	"step": 65
	},
	{
	"epoch": 0.0737036062121611,
	"grad_norm": 0.18263188517758086,
	"learning_rate": 0.00014736842105263158,
	"loss": 1.0937,
	"step": 70
	},
	{
	"epoch": 0.07896814951302975,
	"grad_norm": 0.18520098125405152,
	"learning_rate": 0.00015789473684210527,
	"loss": 1.1419,
	"step": 75
	},
	{
	"epoch": 0.08423269281389839,
	"grad_norm": 0.18675524775004465,
	"learning_rate": 0.00016842105263157895,
	"loss": 1.1094,
	"step": 80
	},
	{
	"epoch": 0.08949723611476705,
	"grad_norm": 0.18469057661828525,
	"learning_rate": 0.00017894736842105264,
	"loss": 1.0952,
	"step": 85
	},
	{
	"epoch": 0.0947617794156357,
	"grad_norm": 0.17860571701450936,
	"learning_rate": 0.00018947368421052632,
	"loss": 1.1035,
	"step": 90
	},
	{
	"epoch": 0.10002632271650434,
	"grad_norm": 0.2032976356381528,
	"learning_rate": 0.0002,
	"loss": 1.1329,
	"step": 95
	},
	{
	"epoch": 0.10529086601737299,
	"grad_norm": 0.18932762375677964,
	"learning_rate": 0.0001999830846194422,
	"loss": 1.0902,
	"step": 100
	},
	{
	"epoch": 0.11055540931824165,
	"grad_norm": 0.17823414518835126,
	"learning_rate": 0.00019993234420037073,
	"loss": 1.0951,
	"step": 105
	},
	{
	"epoch": 0.1158199526191103,
	"grad_norm": 0.19033211796864122,
	"learning_rate": 0.00019984779590865556,
	"loss": 1.11,
	"step": 110
	},
	{
	"epoch": 0.12108449591997894,
	"grad_norm": 0.1781090004150184,
	"learning_rate": 0.0001997294683476273,
	"loss": 1.1216,
	"step": 115
	},
	{
	"epoch": 0.1263490392208476,
	"grad_norm": 0.20142566240295628,
	"learning_rate": 0.0001995774015484005,
	"loss": 1.088,
	"step": 120
	},
	{
	"epoch": 0.13161358252171623,
	"grad_norm": 0.16738672746077932,
	"learning_rate": 0.00019939164695633067,
	"loss": 1.1069,
	"step": 125
	},
	{
	"epoch": 0.13687812582258488,
	"grad_norm": 0.17141306079033702,
	"learning_rate": 0.00019917226741361015,
	"loss": 1.1178,
	"step": 130
	},
	{
	"epoch": 0.14214266912345355,
	"grad_norm": 0.18242919862111662,
	"learning_rate": 0.00019891933713800798,
	"loss": 1.115,
	"step": 135
	},
	{
	"epoch": 0.1474072124243222,
	"grad_norm": 0.18858703761293544,
	"learning_rate": 0.00019863294169776148,
	"loss": 1.092,
	"step": 140
	},
	{
	"epoch": 0.15267175572519084,
	"grad_norm": 0.1851910906506613,
	"learning_rate": 0.00019831317798262786,
	"loss": 1.1015,
	"step": 145
	},
	{
	"epoch": 0.1579362990260595,
	"grad_norm": 0.17061718616532065,
	"learning_rate": 0.00019796015417110577,
	"loss": 1.0834,
	"step": 150
	},
	{
	"epoch": 0.16320084232692814,
	"grad_norm": 0.19083175263550564,
	"learning_rate": 0.0001975739896938375,
	"loss": 1.0915,
	"step": 155
	},
	{
	"epoch": 0.16846538562779678,
	"grad_norm": 0.17041981222039004,
	"learning_rate": 0.00019715481519320496,
	"loss": 1.1045,
	"step": 160
	},
	{
	"epoch": 0.17372992892866543,
	"grad_norm": 0.17539080495334333,
	"learning_rate": 0.00019670277247913205,
	"loss": 1.0822,
	"step": 165
	},
	{
	"epoch": 0.1789944722295341,
	"grad_norm": 0.16847918243353582,
	"learning_rate": 0.00019621801448110952,
	"loss": 1.1113,
	"step": 170
	},
	{
	"epoch": 0.18425901553040275,
	"grad_norm": 0.16577520965121645,
	"learning_rate": 0.00019570070519645767,
	"loss": 1.0726,
	"step": 175
	},
	{
	"epoch": 0.1895235588312714,
	"grad_norm": 0.17216940817918563,
	"learning_rate": 0.00019515101963484485,
	"loss": 1.1214,
	"step": 180
	},
	{
	"epoch": 0.19478810213214004,
	"grad_norm": 0.16717603329959776,
	"learning_rate": 0.00019456914375908023,
	"loss": 1.0749,
	"step": 185
	},
	{
	"epoch": 0.20005264543300869,
	"grad_norm": 0.16743311436795275,
	"learning_rate": 0.0001939552744222014,
	"loss": 1.0856,
	"step": 190
	},
	{
	"epoch": 0.20531718873387733,
	"grad_norm": 0.16628473925396028,
	"learning_rate": 0.00019330961930087725,
	"loss": 1.1088,
	"step": 195
	},
	{
	"epoch": 0.21058173203474598,
	"grad_norm": 0.1735673710306468,
	"learning_rate": 0.00019263239682514952,
	"loss": 1.094,
	"step": 200
	},
	{
	"epoch": 0.21584627533561462,
	"grad_norm": 0.16463208491106188,
	"learning_rate": 0.00019192383610453618,
	"loss": 1.1191,
	"step": 205
	},
	{
	"epoch": 0.2211108186364833,
	"grad_norm": 0.1697201646634803,
	"learning_rate": 0.00019118417685052194,
	"loss": 1.1188,
	"step": 210
	},
	{
	"epoch": 0.22637536193735194,
	"grad_norm": 0.15930348674889006,
	"learning_rate": 0.00019041366929546219,
	"loss": 1.1132,
	"step": 215
	},
	{
	"epoch": 0.2316399052382206,
	"grad_norm": 0.16154587528605638,
	"learning_rate": 0.0001896125741079272,
	"loss": 1.1029,
	"step": 220
	},
	{
	"epoch": 0.23690444853908924,
	"grad_norm": 0.1689593754891321,
	"learning_rate": 0.00018878116230451613,
	"loss": 1.1196,
	"step": 225
	},
	{
	"epoch": 0.24216899183995788,
	"grad_norm": 0.158620581331537,
	"learning_rate": 0.0001879197151581702,
	"loss": 1.0786,
	"step": 230
	},
	{
	"epoch": 0.24743353514082653,
	"grad_norm": 0.1591976970503649,
	"learning_rate": 0.00018702852410301554,
	"loss": 1.0861,
	"step": 235
	},
	{
	"epoch": 0.2526980784416952,
	"grad_norm": 0.16271741933565712,
	"learning_rate": 0.00018610789063576913,
	"loss": 1.077,
	"step": 240
	},
	{
	"epoch": 0.25796262174256385,
	"grad_norm": 0.16691583214883504,
	"learning_rate": 0.00018515812621373997,
	"loss": 1.0931,
	"step": 245
	},
	{
	"epoch": 0.26322716504343247,
	"grad_norm": 0.15795453416798677,
	"learning_rate": 0.00018417955214946092,
	"loss": 1.0929,
	"step": 250
	},
	{
	"epoch": 0.26849170834430114,
	"grad_norm": 0.15734119895920037,
	"learning_rate": 0.00018317249950198597,
	"loss": 1.086,
	"step": 255
	},
	{
	"epoch": 0.27375625164516976,
	"grad_norm": 0.15815121783491273,
	"learning_rate": 0.0001821373089648906,
	"loss": 1.1142,
	"step": 260
	},
	{
	"epoch": 0.27902079494603843,
	"grad_norm": 0.15790684873329372,
	"learning_rate": 0.00018107433075101252,
	"loss": 1.0907,
	"step": 265
	},
	{
	"epoch": 0.2842853382469071,
	"grad_norm": 0.1603612919235376,
	"learning_rate": 0.00017998392447397197,
	"loss": 1.103,
	"step": 270
	},
	{
	"epoch": 0.2895498815477757,
	"grad_norm": 0.1935643212000403,
	"learning_rate": 0.00017886645902651167,
	"loss": 1.1207,
	"step": 275
	},
	{
	"epoch": 0.2948144248486444,
	"grad_norm": 0.16197395404790052,
	"learning_rate": 0.0001777223124556978,
	"loss": 1.1036,
	"step": 280
	},
	{
	"epoch": 0.300078968149513,
	"grad_norm": 0.16503760296000086,
	"learning_rate": 0.00017655187183502344,
	"loss": 1.0647,
	"step": 285
	},
	{
	"epoch": 0.3053435114503817,
	"grad_norm": 0.1772283442967409,
	"learning_rate": 0.00017535553313345904,
	"loss": 1.1075,
	"step": 290
	},
	{
	"epoch": 0.3106080547512503,
	"grad_norm": 0.16282645295325013,
	"learning_rate": 0.00017413370108149286,
	"loss": 1.1094,
	"step": 295
	},
	{
	"epoch": 0.315872598052119,
	"grad_norm": 0.15561402718068354,
	"learning_rate": 0.00017288678903420762,
	"loss": 1.0776,
	"step": 300
	},
	{
	"epoch": 0.32113714135298765,
	"grad_norm": 0.15594031474920508,
	"learning_rate": 0.00017161521883143934,
	"loss": 1.1078,
	"step": 305
	},
	{
	"epoch": 0.32640168465385627,
	"grad_norm": 0.1570395383591175,
	"learning_rate": 0.00017031942065506576,
	"loss": 1.1124,
	"step": 310
	},
	{
	"epoch": 0.33166622795472495,
	"grad_norm": 0.15773152315944608,
	"learning_rate": 0.00016899983288347248,
	"loss": 1.0913,
	"step": 315
	},
	{
	"epoch": 0.33693077125559356,
	"grad_norm": 0.15310806808595664,
	"learning_rate": 0.00016765690194324616,
	"loss": 1.0845,
	"step": 320
	},
	{
	"epoch": 0.34219531455646224,
	"grad_norm": 0.16384678369715433,
	"learning_rate": 0.00016629108215814525,
	"loss": 1.1173,
	"step": 325
	},
	{
	"epoch": 0.34745985785733086,
	"grad_norm": 0.165818325184464,
	"learning_rate": 0.00016490283559539838,
	"loss": 1.1014,
	"step": 330
	},
	{
	"epoch": 0.35272440115819953,
	"grad_norm": 0.15456003221800826,
	"learning_rate": 0.000163492631909384,
	"loss": 1.0915,
	"step": 335
	},
	{
	"epoch": 0.3579889444590682,
	"grad_norm": 0.16059867173233644,
	"learning_rate": 0.00016206094818274229,
	"loss": 1.0969,
	"step": 340
	},
	{
	"epoch": 0.3632534877599368,
	"grad_norm": 0.17415674474557066,
	"learning_rate": 0.00016060826876497478,
	"loss": 1.1145,
	"step": 345
	},
	{
	"epoch": 0.3685180310608055,
	"grad_norm": 0.16440403677512835,
	"learning_rate": 0.0001591350851085851,
	"loss": 1.0683,
	"step": 350
	},
	{
	"epoch": 0.3737825743616741,
	"grad_norm": 0.15901493438320982,
	"learning_rate": 0.00015764189560281677,
	"loss": 1.1199,
	"step": 355
	},
	{
	"epoch": 0.3790471176625428,
	"grad_norm": 0.15988293404570103,
	"learning_rate": 0.00015612920540504453,
	"loss": 1.0709,
	"step": 360
	},
	{
	"epoch": 0.3843116609634114,
	"grad_norm": 0.1616109424204681,
	"learning_rate": 0.00015459752626987563,
	"loss": 1.1027,
	"step": 365
	},
	{
	"epoch": 0.3895762042642801,
	"grad_norm": 0.1513607201651111,
	"learning_rate": 0.00015304737637601926,
	"loss": 1.0956,
	"step": 370
	},
	{
	"epoch": 0.3948407475651487,
	"grad_norm": 0.15452619863423803,
	"learning_rate": 0.0001514792801509831,
	"loss": 1.0952,
	"step": 375
	},
	{
	"epoch": 0.40010529086601737,
	"grad_norm": 0.15418975657555584,
	"learning_rate": 0.00014989376809365493,
	"loss": 1.0934,
	"step": 380
	},
	{
	"epoch": 0.40536983416688604,
	"grad_norm": 0.15158447263390024,
	"learning_rate": 0.00014829137659483143,
	"loss": 1.0981,
	"step": 385
	},
	{
	"epoch": 0.41063437746775466,
	"grad_norm": 0.15420702474431047,
	"learning_rate": 0.0001466726477557527,
	"loss": 1.1013,
	"step": 390
	},
	{
	"epoch": 0.41589892076862334,
	"grad_norm": 0.1513401762569788,
	"learning_rate": 0.00014503812920470534,
	"loss": 1.1128,
	"step": 395
	},
	{
	"epoch": 0.42116346406949196,
	"grad_norm": 0.1759021276212348,
	"learning_rate": 0.00014338837391175582,
	"loss": 1.0793,
	"step": 400
	},
	{
	"epoch": 0.42642800737036063,
	"grad_norm": 0.15639002655528358,
	"learning_rate": 0.00014172394000167623,
	"loss": 1.1126,
	"step": 405
	},
	{
	"epoch": 0.43169255067122925,
	"grad_norm": 0.1558922751326013,
	"learning_rate": 0.00014004539056512667,
	"loss": 1.0864,
	"step": 410
	},
	{
	"epoch": 0.4369570939720979,
	"grad_norm": 0.15449223519766864,
	"learning_rate": 0.00013835329346815716,
	"loss": 1.1161,
	"step": 415
	},
	{
	"epoch": 0.4422216372729666,
	"grad_norm": 0.15398779214531882,
	"learning_rate": 0.0001366482211600945,
	"loss": 1.113,
	"step": 420
	},
	{
	"epoch": 0.4474861805738352,
	"grad_norm": 0.15902962443654645,
	"learning_rate": 0.000134930750479878,
	"loss": 1.0783,
	"step": 425
	},
	{
	"epoch": 0.4527507238747039,
	"grad_norm": 0.15614703146804315,
	"learning_rate": 0.00013320146246091074,
	"loss": 1.0891,
	"step": 430
	},
	{
	"epoch": 0.4580152671755725,
	"grad_norm": 0.151735228198923,
	"learning_rate": 0.00013146094213449148,
	"loss": 1.1006,
	"step": 435
	},
	{
	"epoch": 0.4632798104764412,
	"grad_norm": 0.1633743946888902,
	"learning_rate": 0.00012970977833189393,
	"loss": 1.0717,
	"step": 440
	},
	{
	"epoch": 0.4685443537773098,
	"grad_norm": 0.16534257355481496,
	"learning_rate": 0.00012794856348516095,
	"loss": 1.0778,
	"step": 445
	},
	{
	"epoch": 0.47380889707817847,
	"grad_norm": 0.1856142828881669,
	"learning_rate": 0.00012617789342668004,
	"loss": 1.0859,
	"step": 450
	},
	{
	"epoch": 0.47907344037904714,
	"grad_norm": 0.15229515578033356,
	"learning_rate": 0.00012439836718760886,
	"loss": 1.0761,
	"step": 455
	},
	{
	"epoch": 0.48433798367991576,
	"grad_norm": 0.15984985984562605,
	"learning_rate": 0.00012261058679521834,
	"loss": 1.0926,
	"step": 460
	},
	{
	"epoch": 0.48960252698078444,
	"grad_norm": 0.14896040772758903,
	"learning_rate": 0.00012081515706922227,
	"loss": 1.0834,
	"step": 465
	},
	{
	"epoch": 0.49486707028165305,
	"grad_norm": 0.1514924492192347,
	"learning_rate": 0.00011901268541716224,
	"loss": 1.0885,
	"step": 470
	},
	{
	"epoch": 0.5001316135825217,
	"grad_norm": 0.1513889418015892,
	"learning_rate": 0.00011720378162891708,
	"loss": 1.1001,
	"step": 475
	},
	{
	"epoch": 0.5053961568833903,
	"grad_norm": 0.15159825336613816,
	"learning_rate": 0.0001153890576704062,
	"loss": 1.1082,
	"step": 480
	},
	{
	"epoch": 0.510660700184259,
	"grad_norm": 0.15427722774659086,
	"learning_rate": 0.00011356912747655685,
	"loss": 1.0843,
	"step": 485
	},
	{
	"epoch": 0.5159252434851277,
	"grad_norm": 0.14639500931900093,
	"learning_rate": 0.00011174460674360549,
	"loss": 1.1058,
	"step": 490
	},
	{
	"epoch": 0.5211897867859964,
	"grad_norm": 0.15320269723203808,
	"learning_rate": 0.00010991611272080269,
	"loss": 1.1125,
	"step": 495
	},
	{
	"epoch": 0.5264543300868649,
	"grad_norm": 0.15092814943890553,
	"learning_rate": 0.00010808426400159338,
	"loss": 1.0898,
	"step": 500
	},
	{
	"epoch": 0.5317188733877336,
	"grad_norm": 0.14712598563479434,
	"learning_rate": 0.00010624968031434173,
	"loss": 1.0975,
	"step": 505
	},
	{
	"epoch": 0.5369834166886023,
	"grad_norm": 0.1506174008648404,
	"learning_rate": 0.00010441298231267242,
	"loss": 1.0789,
	"step": 510
	},
	{
	"epoch": 0.542247959989471,
	"grad_norm": 0.14915164476738402,
	"learning_rate": 0.00010257479136549889,
	"loss": 1.088,
	"step": 515
	},
	{
	"epoch": 0.5475125032903395,
	"grad_norm": 0.14933216158522156,
	"learning_rate": 0.00010073572934680919,
	"loss": 1.1012,
	"step": 520
	},
	{
	"epoch": 0.5527770465912082,
	"grad_norm": 0.1623395783916047,
	"learning_rate": 9.889641842528178e-05,
	"loss": 1.0992,
	"step": 525
	},
	{
	"epoch": 0.5580415898920769,
	"grad_norm": 0.15524883773019818,
	"learning_rate": 9.70574808538006e-05,
	"loss": 1.0558,
	"step": 530
	},
	{
	"epoch": 0.5633061331929455,
	"grad_norm": 0.14879516385003932,
	"learning_rate": 9.521953875894257e-05,
	"loss": 1.0634,
	"step": 535
	},
	{
	"epoch": 0.5685706764938142,
	"grad_norm": 0.14856407933911947,
	"learning_rate": 9.338321393050719e-05,
	"loss": 1.0513,
	"step": 540
	},
	{
	"epoch": 0.5738352197946828,
	"grad_norm": 0.1514919636398635,
	"learning_rate": 9.154912761116056e-05,
	"loss": 1.0899,
	"step": 545
	},
	{
	"epoch": 0.5790997630955514,
	"grad_norm": 0.15005939408454377,
	"learning_rate": 8.971790028626395e-05,
	"loss": 1.09,
	"step": 550
	},
	{
	"epoch": 0.5843643063964201,
	"grad_norm": 0.1541140355049706,
	"learning_rate": 8.789015147395919e-05,
	"loss": 1.072,
	"step": 555
	},
	{
	"epoch": 0.5896288496972888,
	"grad_norm": 0.14756189100480177,
	"learning_rate": 8.606649951558073e-05,
	"loss": 1.0548,
	"step": 560
	},
	{
	"epoch": 0.5948933929981574,
	"grad_norm": 0.14468591274130843,
	"learning_rate": 8.424756136646623e-05,
	"loss": 1.056,
	"step": 565
	},
	{
	"epoch": 0.600157936299026,
	"grad_norm": 0.1510683202100121,
	"learning_rate": 8.243395238723571e-05,
	"loss": 1.0999,
	"step": 570
	},
	{
	"epoch": 0.6054224795998947,
	"grad_norm": 0.14942489035639112,
	"learning_rate": 8.062628613561051e-05,
	"loss": 1.08,
	"step": 575
	},
	{
	"epoch": 0.6106870229007634,
	"grad_norm": 0.14792710995590722,
	"learning_rate": 7.8825174158842e-05,
	"loss": 1.0916,
	"step": 580
	},
	{
	"epoch": 0.615951566201632,
	"grad_norm": 0.14543568608581728,
	"learning_rate": 7.703122578682046e-05,
	"loss": 1.061,
	"step": 585
	},
	{
	"epoch": 0.6212161095025006,
	"grad_norm": 0.14792849899325772,
	"learning_rate": 7.524504792593419e-05,
	"loss": 1.1101,
	"step": 590
	},
	{
	"epoch": 0.6264806528033693,
	"grad_norm": 0.14574924924348462,
	"learning_rate": 7.346724485374837e-05,
	"loss": 1.0687,
	"step": 595
	},
	{
	"epoch": 0.631745196104238,
	"grad_norm": 0.1434166906369258,
	"learning_rate": 7.169841801457347e-05,
	"loss": 1.0825,
	"step": 600
	},
	{
	"epoch": 0.6370097394051066,
	"grad_norm": 0.14254720323207454,
	"learning_rate": 6.993916581599202e-05,
	"loss": 1.0896,
	"step": 605
	},
	{
	"epoch": 0.6422742827059753,
	"grad_norm": 0.14534591022474969,
	"learning_rate": 6.819008342641273e-05,
	"loss": 1.0805,
	"step": 610
	},
	{
	"epoch": 0.6475388260068439,
	"grad_norm": 0.1471482502229213,
	"learning_rate": 6.645176257372055e-05,
	"loss": 1.0933,
	"step": 615
	},
	{
	"epoch": 0.6528033693077125,
	"grad_norm": 0.14967562406928056,
	"learning_rate": 6.472479134509052e-05,
	"loss": 1.0987,
	"step": 620
	},
	{
	"epoch": 0.6580679126085812,
	"grad_norm": 0.14756218985788289,
	"learning_rate": 6.300975398803362e-05,
	"loss": 1.0862,
	"step": 625
	},
	{
	"epoch": 0.6633324559094499,
	"grad_norm": 0.14358810278632364,
	"learning_rate": 6.130723071274107e-05,
	"loss": 1.0736,
	"step": 630
	},
	{
	"epoch": 0.6685969992103185,
	"grad_norm": 0.14508119820046267,
	"learning_rate": 5.961779749579516e-05,
	"loss": 1.077,
	"step": 635
	},
	{
	"epoch": 0.6738615425111871,
	"grad_norm": 0.14868475648668983,
	"learning_rate": 5.794202588531166e-05,
	"loss": 1.0921,
	"step": 640
	},
	{
	"epoch": 0.6791260858120558,
	"grad_norm": 0.14136660751737096,
	"learning_rate": 5.628048280758096e-05,
	"loss": 1.0967,
	"step": 645
	},
	{
	"epoch": 0.6843906291129245,
	"grad_norm": 0.14429824406995242,
	"learning_rate": 5.4633730375272594e-05,
	"loss": 1.094,
	"step": 650
	},
	{
	"epoch": 0.6896551724137931,
	"grad_norm": 0.1435583500936634,
	"learning_rate": 5.300232569726804e-05,
	"loss": 1.0796,
	"step": 655
	},
	{
	"epoch": 0.6949197157146617,
	"grad_norm": 0.14917594264214823,
	"learning_rate": 5.13868206901867e-05,
	"loss": 1.0813,
	"step": 660
	},
	{
	"epoch": 0.7001842590155304,
	"grad_norm": 0.14484547003342338,
	"learning_rate": 4.9787761891668397e-05,
	"loss": 1.0833,
	"step": 665
	},
	{
	"epoch": 0.7054488023163991,
	"grad_norm": 0.14125281408090304,
	"learning_rate": 4.820569027547533e-05,
	"loss": 1.0813,
	"step": 670
	},
	{
	"epoch": 0.7107133456172677,
	"grad_norm": 0.1408995053360923,
	"learning_rate": 4.6641141068476666e-05,
	"loss": 1.0752,
	"step": 675
	},
	{
	"epoch": 0.7159778889181364,
	"grad_norm": 0.1414179653044325,
	"learning_rate": 4.5094643569577186e-05,
	"loss": 1.054,
	"step": 680
	},
	{
	"epoch": 0.721242432219005,
	"grad_norm": 0.14582058548503438,
	"learning_rate": 4.356672097065134e-05,
	"loss": 1.1048,
	"step": 685
	},
	{
	"epoch": 0.7265069755198736,
	"grad_norm": 0.14009606861616825,
	"learning_rate": 4.205789017954364e-05,
	"loss": 1.0683,
	"step": 690
	},
	{
	"epoch": 0.7317715188207423,
	"grad_norm": 0.14586506040118713,
	"learning_rate": 4.056866164519465e-05,
	"loss": 1.0728,
	"step": 695
	},
	{
	"epoch": 0.737036062121611,
	"grad_norm": 0.14168474565307407,
	"learning_rate": 3.909953918495234e-05,
	"loss": 1.0476,
	"step": 700
	},
	{
	"epoch": 0.7423006054224796,
	"grad_norm": 0.14476382479542646,
	"learning_rate": 3.7651019814126654e-05,
	"loss": 1.05,
	"step": 705
	},
	{
	"epoch": 0.7475651487233482,
	"grad_norm": 0.14528550784733454,
	"learning_rate": 3.622359357784569e-05,
	"loss": 1.0611,
	"step": 710
	},
	{
	"epoch": 0.7528296920242169,
	"grad_norm": 0.14781069746763306,
	"learning_rate": 3.481774338526954e-05,
	"loss": 1.0952,
	"step": 715
	},
	{
	"epoch": 0.7580942353250856,
	"grad_norm": 0.15618197530507127,
	"learning_rate": 3.343394484621855e-05,
	"loss": 1.0836,
	"step": 720
	},
	{
	"epoch": 0.7633587786259542,
	"grad_norm": 0.22087793925041818,
	"learning_rate": 3.207266611027069e-05,
	"loss": 1.0727,
	"step": 725
	},
	{
	"epoch": 0.7686233219268228,
	"grad_norm": 0.14674869869141435,
	"learning_rate": 3.0734367708383294e-05,
	"loss": 1.0712,
	"step": 730
	},
	{
	"epoch": 0.7738878652276915,
	"grad_norm": 0.14673826341334423,
	"learning_rate": 2.9419502397091713e-05,
	"loss": 1.0852,
	"step": 735
	},
	{
	"epoch": 0.7791524085285602,
	"grad_norm": 0.1426087824509766,
	"learning_rate": 2.812851500533843e-05,
	"loss": 1.0604,
	"step": 740
	},
	{
	"epoch": 0.7844169518294288,
	"grad_norm": 0.1446320144127932,
	"learning_rate": 2.6861842283983953e-05,
	"loss": 1.0537,
	"step": 745
	},
	{
	"epoch": 0.7896814951302974,
	"grad_norm": 0.14326111319394175,
	"learning_rate": 2.5619912758050725e-05,
	"loss": 1.0942,
	"step": 750
	},
	{
	"epoch": 0.7949460384311661,
	"grad_norm": 0.14149919988871043,
	"learning_rate": 2.4403146581749925e-05,
	"loss": 1.0578,
	"step": 755
	},
	{
	"epoch": 0.8002105817320347,
	"grad_norm": 0.14034086298796508,
	"learning_rate": 2.3211955396340002e-05,
	"loss": 1.0808,
	"step": 760
	},
	{
	"epoch": 0.8054751250329034,
	"grad_norm": 0.1433790314655123,
	"learning_rate": 2.204674219086531e-05,
	"loss": 1.0906,
	"step": 765
	},
	{
	"epoch": 0.8107396683337721,
	"grad_norm": 0.138618618401559,
	"learning_rate": 2.090790116582191e-05,
	"loss": 1.0559,
	"step": 770
	},
	{
	"epoch": 0.8160042116346407,
	"grad_norm": 0.1429827381187093,
	"learning_rate": 1.9795817599796418e-05,
	"loss": 1.0792,
	"step": 775
	},
	{
	"epoch": 0.8212687549355093,
	"grad_norm": 0.14200271718072968,
	"learning_rate": 1.871086771912348e-05,
	"loss": 1.0702,
	"step": 780
	},
	{
	"epoch": 0.826533298236378,
	"grad_norm": 0.1429932480295589,
	"learning_rate": 1.7653418570605475e-05,
	"loss": 1.0715,
	"step": 785
	},
	{
	"epoch": 0.8317978415372467,
	"grad_norm": 0.14431467515210814,
	"learning_rate": 1.6623827897337762e-05,
	"loss": 1.0713,
	"step": 790
	},
	{
	"epoch": 0.8370623848381153,
	"grad_norm": 0.15238820455432608,
	"learning_rate": 1.562244401768144e-05,
	"loss": 1.0824,
	"step": 795
	},
	{
	"epoch": 0.8423269281389839,
	"grad_norm": 0.14830242766673976,
	"learning_rate": 1.4649605707424707e-05,
	"loss": 1.0787,
	"step": 800
	},
	{
	"epoch": 0.8475914714398526,
	"grad_norm": 0.14468170557092047,
	"learning_rate": 1.3705642085172366e-05,
	"loss": 1.0737,
	"step": 805
	},
	{
	"epoch": 0.8528560147407213,
	"grad_norm": 0.14674968769736463,
	"learning_rate": 1.2790872501002472e-05,
	"loss": 1.0577,
	"step": 810
	},
	{
	"epoch": 0.8581205580415899,
	"grad_norm": 0.14311627432536864,
	"learning_rate": 1.1905606428427774e-05,
	"loss": 1.0692,
	"step": 815
	},
	{
	"epoch": 0.8633851013424585,
	"grad_norm": 0.14558376197107287,
	"learning_rate": 1.105014335969855e-05,
	"loss": 1.0934,
	"step": 820
	},
	{
	"epoch": 0.8686496446433272,
	"grad_norm": 0.14414555681497093,
	"learning_rate": 1.0224772704482033e-05,
	"loss": 1.0875,
	"step": 825
	},
	{
	"epoch": 0.8739141879441958,
	"grad_norm": 0.1399627142514978,
	"learning_rate": 9.429773691952858e-06,
	"loss": 1.082,
	"step": 830
	},
	{
	"epoch": 0.8791787312450645,
	"grad_norm": 0.1392001373823857,
	"learning_rate": 8.665415276327871e-06,
	"loss": 1.0573,
	"step": 835
	},
	{
	"epoch": 0.8844432745459332,
	"grad_norm": 0.13993969105859186,
	"learning_rate": 7.931956045876688e-06,
	"loss": 1.0448,
	"step": 840
	},
	{
	"epoch": 0.8897078178468018,
	"grad_norm": 0.16741517197447736,
	"learning_rate": 7.229644135439473e-06,
	"loss": 1.104,
	"step": 845
	},
	{
	"epoch": 0.8949723611476704,
	"grad_norm": 0.14123729142229655,
	"learning_rate": 6.558717142480919e-06,
	"loss": 1.0808,
	"step": 850
	},
	{
	"epoch": 0.9002369044485391,
	"grad_norm": 0.1424278055064695,
	"learning_rate": 5.919402046709288e-06,
	"loss": 1.0709,
	"step": 855
	},
	{
	"epoch": 0.9055014477494078,
	"grad_norm": 0.13993993967003346,
	"learning_rate": 5.311915133287415e-06,
	"loss": 1.0941,
	"step": 860
	},
	{
	"epoch": 0.9107659910502763,
	"grad_norm": 0.14557850289664284,
	"learning_rate": 4.7364619196617495e-06,
	"loss": 1.0492,
	"step": 865
	},
	{
	"epoch": 0.916030534351145,
	"grad_norm": 0.1450177459066908,
	"learning_rate": 4.193237086034351e-06,
	"loss": 1.0972,
	"step": 870
	},
	{
	"epoch": 0.9212950776520137,
	"grad_norm": 0.1570091074884799,
	"learning_rate": 3.6824244095010065e-06,
	"loss": 1.0695,
	"step": 875
	},
	{
	"epoch": 0.9265596209528824,
	"grad_norm": 0.14097561405495265,
	"learning_rate": 3.2041967018780707e-06,
	"loss": 1.0948,
	"step": 880
	},
	{
	"epoch": 0.931824164253751,
	"grad_norm": 0.1420984285291773,
	"learning_rate": 2.7587157512388718e-06,
	"loss": 1.0573,
	"step": 885
	},
	{
	"epoch": 0.9370887075546196,
	"grad_norm": 0.1545471738706476,
	"learning_rate": 2.346132267179646e-06,
	"loss": 1.0786,
	"step": 890
	},
	{
	"epoch": 0.9423532508554883,
	"grad_norm": 0.14481364480205125,
	"learning_rate": 1.9665858298333005e-06,
	"loss": 1.0939,
	"step": 895
	},
	{
	"epoch": 0.9476177941563569,
	"grad_norm": 0.1446556897144525,
	"learning_rate": 1.6202048426483651e-06,
	"loss": 1.0752,
	"step": 900
	},
	{
	"epoch": 0.9528823374572256,
	"grad_norm": 0.13840641658264988,
	"learning_rate": 1.3071064889491724e-06,
	"loss": 1.0757,
	"step": 905
	},
	{
	"epoch": 0.9581468807580943,
	"grad_norm": 0.1405867091258211,
	"learning_rate": 1.0273966922918155e-06,
	"loss": 1.0886,
	"step": 910
	},
	{
	"epoch": 0.9634114240589629,
	"grad_norm": 0.15143973079201015,
	"learning_rate": 7.81170080629412e-07,
	"loss": 1.0337,
	"step": 915
	},
	{
	"epoch": 0.9686759673598315,
	"grad_norm": 0.15113893856195346,
	"learning_rate": 5.68509954298757e-07,
	"loss": 1.099,
	"step": 920
	},
	{
	"epoch": 0.9739405106607002,
	"grad_norm": 0.1436446854214333,
	"learning_rate": 3.8948825783918784e-07,
	"loss": 1.0595,
	"step": 925
	},
	{
	"epoch": 0.9792050539615689,
	"grad_norm": 0.14373165990559605,
	"learning_rate": 2.4416555565318635e-07,
	"loss": 1.0815,
	"step": 930
	},
	{
	"epoch": 0.9844695972624374,
	"grad_norm": 0.14233020784379563,
	"learning_rate": 1.3259101151694708e-07,
	"loss": 1.0569,
	"step": 935
	},
	{
	"epoch": 0.9897341405633061,
	"grad_norm": 0.13823967108377017,
	"learning_rate": 5.480237194799287e-08,
	"loss": 1.0689,
	"step": 940
	},
	{
	"epoch": 0.9949986838641748,
	"grad_norm": 0.1431568671824589,
	"learning_rate": 1.0825953435122938e-08,
	"loss": 1.0709,
	"step": 945
	},
	{
	"epoch": 0.9992103185048697,
	"eval_loss": 1.07915198802948,
	"eval_runtime": 3821.2872,
	"eval_samples_per_second": 3.522,
	"eval_steps_per_second": 0.881,
	"step": 949
	},
	{
	"epoch": 0.9992103185048697,
	"step": 949,
	"total_flos": 1959448100732928.0,
	"train_loss": 1.0930153100081064,
	"train_runtime": 22340.3866,
	"train_samples_per_second": 2.72,
	"train_steps_per_second": 0.042
	}
	],
	"logging_steps": 5,
	"max_steps": 949,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1959448100732928.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}