sanity_syntax_20p_100k / trainer_state.json

Model save

5566327 verified over 1 year ago

32.6 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.999162712810494,
	"eval_steps": 500,
	"global_step": 895,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0011163829193413341,
	"grad_norm": 0.3974737008844776,
	"learning_rate": 2.2222222222222225e-06,
	"loss": 1.607,
	"step": 1
	},
	{
	"epoch": 0.0055819145967066705,
	"grad_norm": 0.4252789938746273,
	"learning_rate": 1.1111111111111112e-05,
	"loss": 1.5942,
	"step": 5
	},
	{
	"epoch": 0.011163829193413341,
	"grad_norm": 0.4658525758416883,
	"learning_rate": 2.2222222222222223e-05,
	"loss": 1.5877,
	"step": 10
	},
	{
	"epoch": 0.01674574379012001,
	"grad_norm": 0.27282017063503095,
	"learning_rate": 3.3333333333333335e-05,
	"loss": 1.5695,
	"step": 15
	},
	{
	"epoch": 0.022327658386826682,
	"grad_norm": 0.24165395076839943,
	"learning_rate": 4.4444444444444447e-05,
	"loss": 1.558,
	"step": 20
	},
	{
	"epoch": 0.027909572983533353,
	"grad_norm": 0.1767403193777301,
	"learning_rate": 5.555555555555556e-05,
	"loss": 1.4678,
	"step": 25
	},
	{
	"epoch": 0.03349148758024002,
	"grad_norm": 0.16356442786177314,
	"learning_rate": 6.666666666666667e-05,
	"loss": 1.467,
	"step": 30
	},
	{
	"epoch": 0.039073402176946694,
	"grad_norm": 0.15556520577978836,
	"learning_rate": 7.777777777777778e-05,
	"loss": 1.429,
	"step": 35
	},
	{
	"epoch": 0.044655316773653364,
	"grad_norm": 0.1263609432879071,
	"learning_rate": 8.888888888888889e-05,
	"loss": 1.4253,
	"step": 40
	},
	{
	"epoch": 0.050237231370360035,
	"grad_norm": 0.1696978939183065,
	"learning_rate": 0.0001,
	"loss": 1.3895,
	"step": 45
	},
	{
	"epoch": 0.055819145967066705,
	"grad_norm": 0.10830406775154863,
	"learning_rate": 0.00011111111111111112,
	"loss": 1.3645,
	"step": 50
	},
	{
	"epoch": 0.061401060563773376,
	"grad_norm": 0.08414898733986972,
	"learning_rate": 0.00012222222222222224,
	"loss": 1.3082,
	"step": 55
	},
	{
	"epoch": 0.06698297516048005,
	"grad_norm": 0.07973185533121883,
	"learning_rate": 0.00013333333333333334,
	"loss": 1.2962,
	"step": 60
	},
	{
	"epoch": 0.07256488975718671,
	"grad_norm": 0.09811845100733502,
	"learning_rate": 0.00014444444444444444,
	"loss": 1.3061,
	"step": 65
	},
	{
	"epoch": 0.07814680435389339,
	"grad_norm": 0.08298371354138047,
	"learning_rate": 0.00015555555555555556,
	"loss": 1.3017,
	"step": 70
	},
	{
	"epoch": 0.08372871895060005,
	"grad_norm": 0.07510078793315819,
	"learning_rate": 0.0001666666666666667,
	"loss": 1.2989,
	"step": 75
	},
	{
	"epoch": 0.08931063354730673,
	"grad_norm": 0.07085309149624731,
	"learning_rate": 0.00017777777777777779,
	"loss": 1.2787,
	"step": 80
	},
	{
	"epoch": 0.09489254814401339,
	"grad_norm": 0.09400917029194135,
	"learning_rate": 0.00018888888888888888,
	"loss": 1.2843,
	"step": 85
	},
	{
	"epoch": 0.10047446274072007,
	"grad_norm": 0.09230059652672952,
	"learning_rate": 0.0002,
	"loss": 1.262,
	"step": 90
	},
	{
	"epoch": 0.10605637733742673,
	"grad_norm": 0.10009657676945562,
	"learning_rate": 0.00019998096274980728,
	"loss": 1.2821,
	"step": 95
	},
	{
	"epoch": 0.11163829193413341,
	"grad_norm": 0.12201167887174731,
	"learning_rate": 0.000199923858247567,
	"loss": 1.2668,
	"step": 100
	},
	{
	"epoch": 0.11722020653084007,
	"grad_norm": 0.09628889966493127,
	"learning_rate": 0.00019982870823553308,
	"loss": 1.2503,
	"step": 105
	},
	{
	"epoch": 0.12280212112754675,
	"grad_norm": 0.10028621820088561,
	"learning_rate": 0.00019969554894159723,
	"loss": 1.2632,
	"step": 110
	},
	{
	"epoch": 0.12838403572425341,
	"grad_norm": 0.08593461106683208,
	"learning_rate": 0.00019952443106549533,
	"loss": 1.2396,
	"step": 115
	},
	{
	"epoch": 0.1339659503209601,
	"grad_norm": 0.08827739693201113,
	"learning_rate": 0.00019931541975950378,
	"loss": 1.2784,
	"step": 120
	},
	{
	"epoch": 0.13954786491766677,
	"grad_norm": 0.0911508607290428,
	"learning_rate": 0.00019906859460363307,
	"loss": 1.2689,
	"step": 125
	},
	{
	"epoch": 0.14512977951437342,
	"grad_norm": 0.12157025851983183,
	"learning_rate": 0.00019878404957532814,
	"loss": 1.2563,
	"step": 130
	},
	{
	"epoch": 0.1507116941110801,
	"grad_norm": 0.10772740664174668,
	"learning_rate": 0.0001984618930136869,
	"loss": 1.2853,
	"step": 135
	},
	{
	"epoch": 0.15629360870778677,
	"grad_norm": 0.09940063564218579,
	"learning_rate": 0.00019810224757821064,
	"loss": 1.241,
	"step": 140
	},
	{
	"epoch": 0.16187552330449345,
	"grad_norm": 0.09118466185918958,
	"learning_rate": 0.00019770525020210204,
	"loss": 1.2746,
	"step": 145
	},
	{
	"epoch": 0.1674574379012001,
	"grad_norm": 0.09674538853934604,
	"learning_rate": 0.0001972710520401287,
	"loss": 1.2561,
	"step": 150
	},
	{
	"epoch": 0.17303935249790678,
	"grad_norm": 0.1126652956332537,
	"learning_rate": 0.0001967998184110713,
	"loss": 1.257,
	"step": 155
	},
	{
	"epoch": 0.17862126709461346,
	"grad_norm": 0.0869341846350413,
	"learning_rate": 0.00019629172873477995,
	"loss": 1.2529,
	"step": 160
	},
	{
	"epoch": 0.18420318169132013,
	"grad_norm": 0.09888626799953022,
	"learning_rate": 0.00019574697646386027,
	"loss": 1.244,
	"step": 165
	},
	{
	"epoch": 0.18978509628802678,
	"grad_norm": 0.09785278620381999,
	"learning_rate": 0.0001951657690100178,
	"loss": 1.2334,
	"step": 170
	},
	{
	"epoch": 0.19536701088473346,
	"grad_norm": 0.07378537831469305,
	"learning_rate": 0.0001945483276650868,
	"loss": 1.2415,
	"step": 175
	},
	{
	"epoch": 0.20094892548144014,
	"grad_norm": 0.08814263560160436,
	"learning_rate": 0.0001938948875167745,
	"loss": 1.2512,
	"step": 180
	},
	{
	"epoch": 0.20653084007814682,
	"grad_norm": 0.09775538276417937,
	"learning_rate": 0.00019320569735915271,
	"loss": 1.2213,
	"step": 185
	},
	{
	"epoch": 0.21211275467485347,
	"grad_norm": 0.09538626874304115,
	"learning_rate": 0.00019248101959793066,
	"loss": 1.2354,
	"step": 190
	},
	{
	"epoch": 0.21769466927156014,
	"grad_norm": 0.08332625788355251,
	"learning_rate": 0.00019172113015054532,
	"loss": 1.2444,
	"step": 195
	},
	{
	"epoch": 0.22327658386826682,
	"grad_norm": 0.08309090570657847,
	"learning_rate": 0.00019092631834110723,
	"loss": 1.2316,
	"step": 200
	},
	{
	"epoch": 0.2288584984649735,
	"grad_norm": 0.09054323693110126,
	"learning_rate": 0.0001900968867902419,
	"loss": 1.27,
	"step": 205
	},
	{
	"epoch": 0.23444041306168015,
	"grad_norm": 0.08549436898181585,
	"learning_rate": 0.00018923315129986835,
	"loss": 1.2348,
	"step": 210
	},
	{
	"epoch": 0.24002232765838682,
	"grad_norm": 0.086610993256363,
	"learning_rate": 0.00018833544073295917,
	"loss": 1.2461,
	"step": 215
	},
	{
	"epoch": 0.2456042422550935,
	"grad_norm": 0.08146109722648563,
	"learning_rate": 0.00018740409688832764,
	"loss": 1.2431,
	"step": 220
	},
	{
	"epoch": 0.2511861568518002,
	"grad_norm": 0.08232534290451142,
	"learning_rate": 0.00018643947437048944,
	"loss": 1.2408,
	"step": 225
	},
	{
	"epoch": 0.25676807144850683,
	"grad_norm": 0.08507739560575232,
	"learning_rate": 0.00018544194045464886,
	"loss": 1.243,
	"step": 230
	},
	{
	"epoch": 0.26234998604521353,
	"grad_norm": 0.09782665661618925,
	"learning_rate": 0.00018441187494686053,
	"loss": 1.2426,
	"step": 235
	},
	{
	"epoch": 0.2679319006419202,
	"grad_norm": 0.0809973818897895,
	"learning_rate": 0.0001833496700394202,
	"loss": 1.2345,
	"step": 240
	},
	{
	"epoch": 0.27351381523862683,
	"grad_norm": 0.09269081567542259,
	"learning_rate": 0.00018225573016153945,
	"loss": 1.2343,
	"step": 245
	},
	{
	"epoch": 0.27909572983533354,
	"grad_norm": 0.09671785308848269,
	"learning_rate": 0.00018113047182536127,
	"loss": 1.2327,
	"step": 250
	},
	{
	"epoch": 0.2846776444320402,
	"grad_norm": 0.0906432644454991,
	"learning_rate": 0.00017997432346737524,
	"loss": 1.2532,
	"step": 255
	},
	{
	"epoch": 0.29025955902874684,
	"grad_norm": 0.08371586611488784,
	"learning_rate": 0.00017878772528529232,
	"loss": 1.2384,
	"step": 260
	},
	{
	"epoch": 0.29584147362545354,
	"grad_norm": 0.08640773776491195,
	"learning_rate": 0.000177571129070442,
	"loss": 1.2193,
	"step": 265
	},
	{
	"epoch": 0.3014233882221602,
	"grad_norm": 0.08164649256677078,
	"learning_rate": 0.00017632499803575474,
	"loss": 1.2327,
	"step": 270
	},
	{
	"epoch": 0.3070053028188669,
	"grad_norm": 0.09156690890905773,
	"learning_rate": 0.00017504980663939613,
	"loss": 1.2534,
	"step": 275
	},
	{
	"epoch": 0.31258721741557355,
	"grad_norm": 0.08393163680296412,
	"learning_rate": 0.00017374604040411935,
	"loss": 1.2411,
	"step": 280
	},
	{
	"epoch": 0.3181691320122802,
	"grad_norm": 0.08340859881557235,
	"learning_rate": 0.00017241419573240462,
	"loss": 1.2398,
	"step": 285
	},
	{
	"epoch": 0.3237510466089869,
	"grad_norm": 0.08622506272483123,
	"learning_rate": 0.00017105477971745666,
	"loss": 1.2321,
	"step": 290
	},
	{
	"epoch": 0.32933296120569355,
	"grad_norm": 0.08338497396964428,
	"learning_rate": 0.00016966830995013133,
	"loss": 1.2453,
	"step": 295
	},
	{
	"epoch": 0.3349148758024002,
	"grad_norm": 0.08718794446584939,
	"learning_rate": 0.00016825531432186543,
	"loss": 1.2134,
	"step": 300
	},
	{
	"epoch": 0.3404967903991069,
	"grad_norm": 0.09158015865602193,
	"learning_rate": 0.00016681633082368498,
	"loss": 1.223,
	"step": 305
	},
	{
	"epoch": 0.34607870499581356,
	"grad_norm": 0.08768121171152027,
	"learning_rate": 0.0001653519073413675,
	"loss": 1.235,
	"step": 310
	},
	{
	"epoch": 0.3516606195925202,
	"grad_norm": 0.08907125432704804,
	"learning_rate": 0.00016386260144683745,
	"loss": 1.2169,
	"step": 315
	},
	{
	"epoch": 0.3572425341892269,
	"grad_norm": 0.08767993008424768,
	"learning_rate": 0.00016234898018587337,
	"loss": 1.2435,
	"step": 320
	},
	{
	"epoch": 0.36282444878593356,
	"grad_norm": 0.08991663909567185,
	"learning_rate": 0.00016081161986220807,
	"loss": 1.2371,
	"step": 325
	},
	{
	"epoch": 0.36840636338264027,
	"grad_norm": 0.07876061570647706,
	"learning_rate": 0.00015925110581810394,
	"loss": 1.2118,
	"step": 330
	},
	{
	"epoch": 0.3739882779793469,
	"grad_norm": 0.09088539514665886,
	"learning_rate": 0.00015766803221148673,
	"loss": 1.2333,
	"step": 335
	},
	{
	"epoch": 0.37957019257605357,
	"grad_norm": 0.09371191064756335,
	"learning_rate": 0.00015606300178972287,
	"loss": 1.2192,
	"step": 340
	},
	{
	"epoch": 0.38515210717276027,
	"grad_norm": 0.0988524027231739,
	"learning_rate": 0.00015443662566012645,
	"loss": 1.2201,
	"step": 345
	},
	{
	"epoch": 0.3907340217694669,
	"grad_norm": 0.08068655015289312,
	"learning_rate": 0.00015278952305728324,
	"loss": 1.2312,
	"step": 350
	},
	{
	"epoch": 0.39631593636617357,
	"grad_norm": 0.08530580419429784,
	"learning_rate": 0.00015112232110728015,
	"loss": 1.2103,
	"step": 355
	},
	{
	"epoch": 0.4018978509628803,
	"grad_norm": 0.0832856621155852,
	"learning_rate": 0.00014943565458893,
	"loss": 1.2049,
	"step": 360
	},
	{
	"epoch": 0.4074797655595869,
	"grad_norm": 0.10112900442930213,
	"learning_rate": 0.00014773016569208283,
	"loss": 1.2381,
	"step": 365
	},
	{
	"epoch": 0.41306168015629363,
	"grad_norm": 0.08250019530921109,
	"learning_rate": 0.00014600650377311522,
	"loss": 1.2185,
	"step": 370
	},
	{
	"epoch": 0.4186435947530003,
	"grad_norm": 0.0987578329954232,
	"learning_rate": 0.0001442653251076912,
	"loss": 1.2222,
	"step": 375
	},
	{
	"epoch": 0.42422550934970693,
	"grad_norm": 0.08530899013880136,
	"learning_rate": 0.00014250729264088843,
	"loss": 1.2556,
	"step": 380
	},
	{
	"epoch": 0.42980742394641364,
	"grad_norm": 0.10267562745822716,
	"learning_rate": 0.00014073307573478526,
	"loss": 1.2146,
	"step": 385
	},
	{
	"epoch": 0.4353893385431203,
	"grad_norm": 0.09189285950155643,
	"learning_rate": 0.00013894334991360448,
	"loss": 1.2206,
	"step": 390
	},
	{
	"epoch": 0.44097125313982694,
	"grad_norm": 0.08370196846674145,
	"learning_rate": 0.00013713879660651068,
	"loss": 1.2076,
	"step": 395
	},
	{
	"epoch": 0.44655316773653364,
	"grad_norm": 0.08423557906306067,
	"learning_rate": 0.0001353201028881598,
	"loss": 1.2223,
	"step": 400
	},
	{
	"epoch": 0.4521350823332403,
	"grad_norm": 0.08292081122541138,
	"learning_rate": 0.00013348796121709862,
	"loss": 1.2294,
	"step": 405
	},
	{
	"epoch": 0.457716996929947,
	"grad_norm": 0.08767079524531268,
	"learning_rate": 0.00013164306917211476,
	"loss": 1.2229,
	"step": 410
	},
	{
	"epoch": 0.46329891152665365,
	"grad_norm": 0.0865942463810843,
	"learning_rate": 0.000129786129186637,
	"loss": 1.2163,
	"step": 415
	},
	{
	"epoch": 0.4688808261233603,
	"grad_norm": 0.08101515714055764,
	"learning_rate": 0.00012791784828128724,
	"loss": 1.2337,
	"step": 420
	},
	{
	"epoch": 0.474462740720067,
	"grad_norm": 0.09009147490161429,
	"learning_rate": 0.00012603893779468604,
	"loss": 1.2148,
	"step": 425
	},
	{
	"epoch": 0.48004465531677365,
	"grad_norm": 0.08757351279515291,
	"learning_rate": 0.0001241501131126138,
	"loss": 1.2056,
	"step": 430
	},
	{
	"epoch": 0.4856265699134803,
	"grad_norm": 0.08418609867162384,
	"learning_rate": 0.00012225209339563145,
	"loss": 1.2419,
	"step": 435
	},
	{
	"epoch": 0.491208484510187,
	"grad_norm": 0.08790367723325618,
	"learning_rate": 0.0001203456013052634,
	"loss": 1.2115,
	"step": 440
	},
	{
	"epoch": 0.49679039910689365,
	"grad_norm": 0.08071789319204539,
	"learning_rate": 0.00011843136272884794,
	"loss": 1.2072,
	"step": 445
	},
	{
	"epoch": 0.5023723137036004,
	"grad_norm": 0.0879278395825441,
	"learning_rate": 0.00011651010650315923,
	"loss": 1.2194,
	"step": 450
	},
	{
	"epoch": 0.507954228300307,
	"grad_norm": 0.08506166782358492,
	"learning_rate": 0.00011458256413690633,
	"loss": 1.2077,
	"step": 455
	},
	{
	"epoch": 0.5135361428970137,
	"grad_norm": 0.08984730610411729,
	"learning_rate": 0.00011264946953221496,
	"loss": 1.2484,
	"step": 460
	},
	{
	"epoch": 0.5191180574937203,
	"grad_norm": 0.2978083078661545,
	"learning_rate": 0.00011071155870519777,
	"loss": 1.2491,
	"step": 465
	},
	{
	"epoch": 0.5246999720904271,
	"grad_norm": 0.08504227931172395,
	"learning_rate": 0.00010876956950572006,
	"loss": 1.2268,
	"step": 470
	},
	{
	"epoch": 0.5302818866871337,
	"grad_norm": 0.08620167875904892,
	"learning_rate": 0.0001068242413364671,
	"loss": 1.2252,
	"step": 475
	},
	{
	"epoch": 0.5358638012838404,
	"grad_norm": 0.08669957736640198,
	"learning_rate": 0.00010487631487142017,
	"loss": 1.217,
	"step": 480
	},
	{
	"epoch": 0.541445715880547,
	"grad_norm": 0.08577871896034497,
	"learning_rate": 0.00010292653177384876,
	"loss": 1.2169,
	"step": 485
	},
	{
	"epoch": 0.5470276304772537,
	"grad_norm": 0.08417260057895289,
	"learning_rate": 0.00010097563441392581,
	"loss": 1.2354,
	"step": 490
	},
	{
	"epoch": 0.5526095450739603,
	"grad_norm": 0.08676422431924583,
	"learning_rate": 9.90243655860742e-05,
	"loss": 1.2039,
	"step": 495
	},
	{
	"epoch": 0.5581914596706671,
	"grad_norm": 0.09103906295111437,
	"learning_rate": 9.707346822615128e-05,
	"loss": 1.2194,
	"step": 500
	},
	{
	"epoch": 0.5637733742673737,
	"grad_norm": 0.08594537537719427,
	"learning_rate": 9.512368512857984e-05,
	"loss": 1.1949,
	"step": 505
	},
	{
	"epoch": 0.5693552888640804,
	"grad_norm": 0.08392759057088481,
	"learning_rate": 9.317575866353292e-05,
	"loss": 1.2196,
	"step": 510
	},
	{
	"epoch": 0.574937203460787,
	"grad_norm": 0.08201912454761111,
	"learning_rate": 9.123043049427995e-05,
	"loss": 1.2131,
	"step": 515
	},
	{
	"epoch": 0.5805191180574937,
	"grad_norm": 0.08925291750313868,
	"learning_rate": 8.928844129480227e-05,
	"loss": 1.2369,
	"step": 520
	},
	{
	"epoch": 0.5861010326542004,
	"grad_norm": 0.08954980070951671,
	"learning_rate": 8.735053046778506e-05,
	"loss": 1.2175,
	"step": 525
	},
	{
	"epoch": 0.5916829472509071,
	"grad_norm": 0.08574100993825345,
	"learning_rate": 8.541743586309365e-05,
	"loss": 1.2166,
	"step": 530
	},
	{
	"epoch": 0.5972648618476137,
	"grad_norm": 0.08840883290578404,
	"learning_rate": 8.348989349684076e-05,
	"loss": 1.2271,
	"step": 535
	},
	{
	"epoch": 0.6028467764443204,
	"grad_norm": 0.08443946017557556,
	"learning_rate": 8.156863727115211e-05,
	"loss": 1.2329,
	"step": 540
	},
	{
	"epoch": 0.608428691041027,
	"grad_norm": 0.0902640782545258,
	"learning_rate": 7.965439869473664e-05,
	"loss": 1.2253,
	"step": 545
	},
	{
	"epoch": 0.6140106056377338,
	"grad_norm": 0.08988630625422679,
	"learning_rate": 7.774790660436858e-05,
	"loss": 1.1785,
	"step": 550
	},
	{
	"epoch": 0.6195925202344404,
	"grad_norm": 0.08134808753957644,
	"learning_rate": 7.584988688738622e-05,
	"loss": 1.2261,
	"step": 555
	},
	{
	"epoch": 0.6251744348311471,
	"grad_norm": 0.08768193779762151,
	"learning_rate": 7.396106220531398e-05,
	"loss": 1.2463,
	"step": 560
	},
	{
	"epoch": 0.6307563494278537,
	"grad_norm": 0.0885816930556393,
	"learning_rate": 7.208215171871277e-05,
	"loss": 1.2141,
	"step": 565
	},
	{
	"epoch": 0.6363382640245604,
	"grad_norm": 0.08553683878588977,
	"learning_rate": 7.021387081336301e-05,
	"loss": 1.2026,
	"step": 570
	},
	{
	"epoch": 0.641920178621267,
	"grad_norm": 0.09505838067263224,
	"learning_rate": 6.835693082788525e-05,
	"loss": 1.2168,
	"step": 575
	},
	{
	"epoch": 0.6475020932179738,
	"grad_norm": 0.08769224685329463,
	"learning_rate": 6.651203878290139e-05,
	"loss": 1.2493,
	"step": 580
	},
	{
	"epoch": 0.6530840078146805,
	"grad_norm": 0.07990213288377576,
	"learning_rate": 6.46798971118402e-05,
	"loss": 1.2308,
	"step": 585
	},
	{
	"epoch": 0.6586659224113871,
	"grad_norm": 0.08133261350163556,
	"learning_rate": 6.286120339348935e-05,
	"loss": 1.2014,
	"step": 590
	},
	{
	"epoch": 0.6642478370080938,
	"grad_norm": 0.09363089434544866,
	"learning_rate": 6.105665008639557e-05,
	"loss": 1.2238,
	"step": 595
	},
	{
	"epoch": 0.6698297516048004,
	"grad_norm": 0.07910287951552411,
	"learning_rate": 5.926692426521474e-05,
	"loss": 1.2473,
	"step": 600
	},
	{
	"epoch": 0.6754116662015072,
	"grad_norm": 0.0801209902764544,
	"learning_rate": 5.749270735911158e-05,
	"loss": 1.1975,
	"step": 605
	},
	{
	"epoch": 0.6809935807982138,
	"grad_norm": 0.08087293360533905,
	"learning_rate": 5.573467489230879e-05,
	"loss": 1.1966,
	"step": 610
	},
	{
	"epoch": 0.6865754953949205,
	"grad_norm": 0.08220997258417966,
	"learning_rate": 5.399349622688479e-05,
	"loss": 1.2345,
	"step": 615
	},
	{
	"epoch": 0.6921574099916271,
	"grad_norm": 0.0825575277760057,
	"learning_rate": 5.226983430791722e-05,
	"loss": 1.2289,
	"step": 620
	},
	{
	"epoch": 0.6977393245883338,
	"grad_norm": 0.08305460425818378,
	"learning_rate": 5.0564345411070025e-05,
	"loss": 1.204,
	"step": 625
	},
	{
	"epoch": 0.7033212391850404,
	"grad_norm": 0.08011105262542664,
	"learning_rate": 4.8877678892719866e-05,
	"loss": 1.1946,
	"step": 630
	},
	{
	"epoch": 0.7089031537817472,
	"grad_norm": 0.08686069747720479,
	"learning_rate": 4.721047694271676e-05,
	"loss": 1.2,
	"step": 635
	},
	{
	"epoch": 0.7144850683784538,
	"grad_norm": 0.08537977661965272,
	"learning_rate": 4.556337433987359e-05,
	"loss": 1.2054,
	"step": 640
	},
	{
	"epoch": 0.7200669829751605,
	"grad_norm": 0.08857193949478791,
	"learning_rate": 4.393699821027716e-05,
	"loss": 1.1988,
	"step": 645
	},
	{
	"epoch": 0.7256488975718671,
	"grad_norm": 0.09608004999262602,
	"learning_rate": 4.2331967788513295e-05,
	"loss": 1.2226,
	"step": 650
	},
	{
	"epoch": 0.7312308121685738,
	"grad_norm": 0.08235757922811432,
	"learning_rate": 4.074889418189608e-05,
	"loss": 1.2202,
	"step": 655
	},
	{
	"epoch": 0.7368127267652805,
	"grad_norm": 0.08660069823512372,
	"learning_rate": 3.9188380137791936e-05,
	"loss": 1.215,
	"step": 660
	},
	{
	"epoch": 0.7423946413619872,
	"grad_norm": 0.08090639704744831,
	"learning_rate": 3.7651019814126654e-05,
	"loss": 1.2255,
	"step": 665
	},
	{
	"epoch": 0.7479765559586938,
	"grad_norm": 0.08082821477995833,
	"learning_rate": 3.613739855316257e-05,
	"loss": 1.2176,
	"step": 670
	},
	{
	"epoch": 0.7535584705554005,
	"grad_norm": 0.08469395080984878,
	"learning_rate": 3.46480926586325e-05,
	"loss": 1.2275,
	"step": 675
	},
	{
	"epoch": 0.7591403851521071,
	"grad_norm": 0.0871555466504494,
	"learning_rate": 3.3183669176315045e-05,
	"loss": 1.2351,
	"step": 680
	},
	{
	"epoch": 0.7647222997488139,
	"grad_norm": 0.08170223557553191,
	"learning_rate": 3.174468567813461e-05,
	"loss": 1.2074,
	"step": 685
	},
	{
	"epoch": 0.7703042143455205,
	"grad_norm": 0.0838318843856818,
	"learning_rate": 3.033169004986873e-05,
	"loss": 1.2396,
	"step": 690
	},
	{
	"epoch": 0.7758861289422272,
	"grad_norm": 0.08831381148889993,
	"learning_rate": 2.894522028254334e-05,
	"loss": 1.1947,
	"step": 695
	},
	{
	"epoch": 0.7814680435389338,
	"grad_norm": 0.08158536981215994,
	"learning_rate": 2.7585804267595384e-05,
	"loss": 1.208,
	"step": 700
	},
	{
	"epoch": 0.7870499581356405,
	"grad_norm": 0.08116519613000232,
	"learning_rate": 2.6253959595880673e-05,
	"loss": 1.2191,
	"step": 705
	},
	{
	"epoch": 0.7926318727323471,
	"grad_norm": 0.08294169676184929,
	"learning_rate": 2.495019336060387e-05,
	"loss": 1.195,
	"step": 710
	},
	{
	"epoch": 0.7982137873290539,
	"grad_norm": 0.08406756837278591,
	"learning_rate": 2.367500196424529e-05,
	"loss": 1.2203,
	"step": 715
	},
	{
	"epoch": 0.8037957019257606,
	"grad_norm": 0.08211403607563178,
	"learning_rate": 2.242887092955801e-05,
	"loss": 1.2041,
	"step": 720
	},
	{
	"epoch": 0.8093776165224672,
	"grad_norm": 0.07980978787138238,
	"learning_rate": 2.121227471470768e-05,
	"loss": 1.2394,
	"step": 725
	},
	{
	"epoch": 0.8149595311191739,
	"grad_norm": 0.08416184610807921,
	"learning_rate": 2.002567653262479e-05,
	"loss": 1.2228,
	"step": 730
	},
	{
	"epoch": 0.8205414457158805,
	"grad_norm": 0.08256062792318115,
	"learning_rate": 1.8869528174638752e-05,
	"loss": 1.203,
	"step": 735
	},
	{
	"epoch": 0.8261233603125873,
	"grad_norm": 0.09043351264554417,
	"learning_rate": 1.774426983846058e-05,
	"loss": 1.2275,
	"step": 740
	},
	{
	"epoch": 0.8317052749092939,
	"grad_norm": 0.08486147964302236,
	"learning_rate": 1.6650329960579792e-05,
	"loss": 1.2208,
	"step": 745
	},
	{
	"epoch": 0.8372871895060006,
	"grad_norm": 0.0935945466460169,
	"learning_rate": 1.5588125053139468e-05,
	"loss": 1.2131,
	"step": 750
	},
	{
	"epoch": 0.8428691041027072,
	"grad_norm": 0.08282716353976063,
	"learning_rate": 1.4558059545351143e-05,
	"loss": 1.2284,
	"step": 755
	},
	{
	"epoch": 0.8484510186994139,
	"grad_norm": 0.08286515378820142,
	"learning_rate": 1.3560525629510568e-05,
	"loss": 1.2086,
	"step": 760
	},
	{
	"epoch": 0.8540329332961206,
	"grad_norm": 0.08295259360853054,
	"learning_rate": 1.259590311167238e-05,
	"loss": 1.2061,
	"step": 765
	},
	{
	"epoch": 0.8596148478928273,
	"grad_norm": 0.08358389042910293,
	"learning_rate": 1.166455926704082e-05,
	"loss": 1.222,
	"step": 770
	},
	{
	"epoch": 0.8651967624895339,
	"grad_norm": 0.08388863476839661,
	"learning_rate": 1.0766848700131648e-05,
	"loss": 1.2143,
	"step": 775
	},
	{
	"epoch": 0.8707786770862406,
	"grad_norm": 0.08277339984932784,
	"learning_rate": 9.903113209758096e-06,
	"loss": 1.2192,
	"step": 780
	},
	{
	"epoch": 0.8763605916829472,
	"grad_norm": 0.08938310164317657,
	"learning_rate": 9.073681658892775e-06,
	"loss": 1.2191,
	"step": 785
	},
	{
	"epoch": 0.8819425062796539,
	"grad_norm": 0.07910593096708422,
	"learning_rate": 8.278869849454718e-06,
	"loss": 1.2269,
	"step": 790
	},
	{
	"epoch": 0.8875244208763606,
	"grad_norm": 0.08295037453317607,
	"learning_rate": 7.5189804020693536e-06,
	"loss": 1.2021,
	"step": 795
	},
	{
	"epoch": 0.8931063354730673,
	"grad_norm": 0.08199446080472911,
	"learning_rate": 6.794302640847294e-06,
	"loss": 1.1961,
	"step": 800
	},
	{
	"epoch": 0.8986882500697739,
	"grad_norm": 0.08481342663212112,
	"learning_rate": 6.1051124832254944e-06,
	"loss": 1.2069,
	"step": 805
	},
	{
	"epoch": 0.9042701646664806,
	"grad_norm": 0.08217551850800063,
	"learning_rate": 5.451672334913216e-06,
	"loss": 1.2055,
	"step": 810
	},
	{
	"epoch": 0.9098520792631872,
	"grad_norm": 0.08322503504827561,
	"learning_rate": 4.834230989982213e-06,
	"loss": 1.2156,
	"step": 815
	},
	{
	"epoch": 0.915433993859894,
	"grad_norm": 0.08125961805104615,
	"learning_rate": 4.253023536139733e-06,
	"loss": 1.2005,
	"step": 820
	},
	{
	"epoch": 0.9210159084566006,
	"grad_norm": 0.09037682759604541,
	"learning_rate": 3.7082712652200867e-06,
	"loss": 1.2079,
	"step": 825
	},
	{
	"epoch": 0.9265978230533073,
	"grad_norm": 0.08711894287392291,
	"learning_rate": 3.2001815889286856e-06,
	"loss": 1.232,
	"step": 830
	},
	{
	"epoch": 0.9321797376500139,
	"grad_norm": 0.08367132801462379,
	"learning_rate": 2.728947959871353e-06,
	"loss": 1.1858,
	"step": 835
	},
	{
	"epoch": 0.9377616522467206,
	"grad_norm": 0.0809801248589102,
	"learning_rate": 2.294749797897955e-06,
	"loss": 1.1871,
	"step": 840
	},
	{
	"epoch": 0.9433435668434274,
	"grad_norm": 0.08412969109149288,
	"learning_rate": 1.8977524217893783e-06,
	"loss": 1.2248,
	"step": 845
	},
	{
	"epoch": 0.948925481440134,
	"grad_norm": 0.08014128153610968,
	"learning_rate": 1.5381069863131037e-06,
	"loss": 1.2312,
	"step": 850
	},
	{
	"epoch": 0.9545073960368406,
	"grad_norm": 0.08040835492341503,
	"learning_rate": 1.2159504246718522e-06,
	"loss": 1.2213,
	"step": 855
	},
	{
	"epoch": 0.9600893106335473,
	"grad_norm": 0.08170226749481643,
	"learning_rate": 9.314053963669245e-07,
	"loss": 1.2114,
	"step": 860
	},
	{
	"epoch": 0.965671225230254,
	"grad_norm": 0.08123838559159317,
	"learning_rate": 6.845802404962243e-07,
	"loss": 1.2455,
	"step": 865
	},
	{
	"epoch": 0.9712531398269606,
	"grad_norm": 0.08532355248950987,
	"learning_rate": 4.7556893450466653e-07,
	"loss": 1.2017,
	"step": 870
	},
	{
	"epoch": 0.9768350544236674,
	"grad_norm": 0.07935413274906811,
	"learning_rate": 3.044510584027771e-07,
	"loss": 1.203,
	"step": 875
	},
	{
	"epoch": 0.982416969020374,
	"grad_norm": 0.07922680701516337,
	"learning_rate": 1.7129176446692984e-07,
	"loss": 1.1993,
	"step": 880
	},
	{
	"epoch": 0.9879988836170807,
	"grad_norm": 0.08007277288266887,
	"learning_rate": 7.614175243301213e-08,
	"loss": 1.221,
	"step": 885
	},
	{
	"epoch": 0.9935807982137873,
	"grad_norm": 0.08190648675567455,
	"learning_rate": 1.9037250192732726e-08,
	"loss": 1.2245,
	"step": 890
	},
	{
	"epoch": 0.999162712810494,
	"grad_norm": 0.07884795604109555,
	"learning_rate": 0.0,
	"loss": 1.2359,
	"step": 895
	},
	{
	"epoch": 0.999162712810494,
	"eval_loss": 1.1748292446136475,
	"eval_runtime": 1569.4225,
	"eval_samples_per_second": 8.524,
	"eval_steps_per_second": 0.533,
	"step": 895
	},
	{
	"epoch": 0.999162712810494,
	"step": 895,
	"total_flos": 1.1254972268150784e+16,
	"train_loss": 1.2433469767011078,
	"train_runtime": 20318.3129,
	"train_samples_per_second": 2.821,
	"train_steps_per_second": 0.044
	}
	],
	"logging_steps": 5,
	"max_steps": 895,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 25,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1254972268150784e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}