Chem_train / trainer_state.json

Upload folder using huggingface_hub

8376519 verified 4 months ago

30.1 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 10.0,
	"eval_steps": 500,
	"global_step": 850,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05917159763313609,
	"grad_norm": 1.1964364051818848,
	"learning_rate": 8e-05,
	"loss": 2.1543,
	"step": 5
	},
	{
	"epoch": 0.11834319526627218,
	"grad_norm": 1.1647157669067383,
	"learning_rate": 0.00018,
	"loss": 1.6346,
	"step": 10
	},
	{
	"epoch": 0.17751479289940827,
	"grad_norm": 0.6181473135948181,
	"learning_rate": 0.00019904761904761907,
	"loss": 0.9978,
	"step": 15
	},
	{
	"epoch": 0.23668639053254437,
	"grad_norm": 0.5694869160652161,
	"learning_rate": 0.00019785714285714288,
	"loss": 0.9807,
	"step": 20
	},
	{
	"epoch": 0.2958579881656805,
	"grad_norm": 0.6708640456199646,
	"learning_rate": 0.00019666666666666666,
	"loss": 1.2485,
	"step": 25
	},
	{
	"epoch": 0.35502958579881655,
	"grad_norm": 0.7021521925926208,
	"learning_rate": 0.00019547619047619047,
	"loss": 1.1107,
	"step": 30
	},
	{
	"epoch": 0.41420118343195267,
	"grad_norm": 0.5067740082740784,
	"learning_rate": 0.0001942857142857143,
	"loss": 1.1293,
	"step": 35
	},
	{
	"epoch": 0.47337278106508873,
	"grad_norm": 0.5455656051635742,
	"learning_rate": 0.0001930952380952381,
	"loss": 1.1371,
	"step": 40
	},
	{
	"epoch": 0.5325443786982249,
	"grad_norm": 0.6190764307975769,
	"learning_rate": 0.00019190476190476192,
	"loss": 1.0131,
	"step": 45
	},
	{
	"epoch": 0.591715976331361,
	"grad_norm": 0.544291615486145,
	"learning_rate": 0.00019071428571428573,
	"loss": 0.969,
	"step": 50
	},
	{
	"epoch": 0.650887573964497,
	"grad_norm": 0.600204348564148,
	"learning_rate": 0.0001895238095238095,
	"loss": 1.0509,
	"step": 55
	},
	{
	"epoch": 0.7100591715976331,
	"grad_norm": 0.5397897958755493,
	"learning_rate": 0.00018833333333333335,
	"loss": 0.9839,
	"step": 60
	},
	{
	"epoch": 0.7692307692307693,
	"grad_norm": 0.571107804775238,
	"learning_rate": 0.00018714285714285716,
	"loss": 0.9281,
	"step": 65
	},
	{
	"epoch": 0.8284023668639053,
	"grad_norm": 0.5789744257926941,
	"learning_rate": 0.00018595238095238097,
	"loss": 0.9543,
	"step": 70
	},
	{
	"epoch": 0.8875739644970414,
	"grad_norm": 0.6334110498428345,
	"learning_rate": 0.00018476190476190478,
	"loss": 1.0625,
	"step": 75
	},
	{
	"epoch": 0.9467455621301775,
	"grad_norm": 0.5225853323936462,
	"learning_rate": 0.00018357142857142858,
	"loss": 0.9734,
	"step": 80
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.7854479551315308,
	"learning_rate": 0.0001823809523809524,
	"loss": 0.9452,
	"step": 85
	},
	{
	"epoch": 1.0591715976331362,
	"grad_norm": 0.47304806113243103,
	"learning_rate": 0.0001811904761904762,
	"loss": 0.8473,
	"step": 90
	},
	{
	"epoch": 1.1183431952662721,
	"grad_norm": 0.6840282678604126,
	"learning_rate": 0.00018,
	"loss": 0.6189,
	"step": 95
	},
	{
	"epoch": 1.1775147928994083,
	"grad_norm": 0.5173171162605286,
	"learning_rate": 0.00017880952380952382,
	"loss": 0.9301,
	"step": 100
	},
	{
	"epoch": 1.2366863905325443,
	"grad_norm": 0.4892285466194153,
	"learning_rate": 0.00017761904761904763,
	"loss": 0.7659,
	"step": 105
	},
	{
	"epoch": 1.2958579881656804,
	"grad_norm": 0.5754849910736084,
	"learning_rate": 0.00017642857142857144,
	"loss": 0.7756,
	"step": 110
	},
	{
	"epoch": 1.3550295857988166,
	"grad_norm": 0.47277289628982544,
	"learning_rate": 0.00017523809523809525,
	"loss": 0.7844,
	"step": 115
	},
	{
	"epoch": 1.4142011834319526,
	"grad_norm": 0.8198840022087097,
	"learning_rate": 0.00017404761904761906,
	"loss": 0.8969,
	"step": 120
	},
	{
	"epoch": 1.4733727810650887,
	"grad_norm": 0.5040334463119507,
	"learning_rate": 0.00017285714285714287,
	"loss": 0.8727,
	"step": 125
	},
	{
	"epoch": 1.532544378698225,
	"grad_norm": 0.5382494926452637,
	"learning_rate": 0.00017166666666666667,
	"loss": 0.7677,
	"step": 130
	},
	{
	"epoch": 1.5917159763313609,
	"grad_norm": 0.631537914276123,
	"learning_rate": 0.00017047619047619048,
	"loss": 0.8364,
	"step": 135
	},
	{
	"epoch": 1.650887573964497,
	"grad_norm": 0.5718739628791809,
	"learning_rate": 0.0001692857142857143,
	"loss": 0.7447,
	"step": 140
	},
	{
	"epoch": 1.7100591715976332,
	"grad_norm": 0.557224452495575,
	"learning_rate": 0.0001680952380952381,
	"loss": 0.8828,
	"step": 145
	},
	{
	"epoch": 1.7692307692307692,
	"grad_norm": 0.6206871271133423,
	"learning_rate": 0.0001669047619047619,
	"loss": 0.5674,
	"step": 150
	},
	{
	"epoch": 1.8284023668639053,
	"grad_norm": 0.6297276616096497,
	"learning_rate": 0.00016571428571428575,
	"loss": 0.7956,
	"step": 155
	},
	{
	"epoch": 1.8875739644970415,
	"grad_norm": 0.6178033351898193,
	"learning_rate": 0.00016452380952380953,
	"loss": 0.5527,
	"step": 160
	},
	{
	"epoch": 1.9467455621301775,
	"grad_norm": 0.6269710063934326,
	"learning_rate": 0.00016333333333333334,
	"loss": 0.9635,
	"step": 165
	},
	{
	"epoch": 2.0,
	"grad_norm": 0.7290076613426208,
	"learning_rate": 0.00016214285714285715,
	"loss": 0.8048,
	"step": 170
	},
	{
	"epoch": 2.059171597633136,
	"grad_norm": 0.5376546382904053,
	"learning_rate": 0.00016095238095238096,
	"loss": 0.602,
	"step": 175
	},
	{
	"epoch": 2.1183431952662723,
	"grad_norm": 0.720078706741333,
	"learning_rate": 0.0001597619047619048,
	"loss": 0.5738,
	"step": 180
	},
	{
	"epoch": 2.1775147928994083,
	"grad_norm": 0.5647716522216797,
	"learning_rate": 0.00015857142857142857,
	"loss": 0.5445,
	"step": 185
	},
	{
	"epoch": 2.2366863905325443,
	"grad_norm": 0.7397224307060242,
	"learning_rate": 0.00015738095238095238,
	"loss": 0.5383,
	"step": 190
	},
	{
	"epoch": 2.2958579881656807,
	"grad_norm": 0.8834079504013062,
	"learning_rate": 0.0001561904761904762,
	"loss": 0.6575,
	"step": 195
	},
	{
	"epoch": 2.3550295857988166,
	"grad_norm": 0.6497870683670044,
	"learning_rate": 0.000155,
	"loss": 0.6677,
	"step": 200
	},
	{
	"epoch": 2.4142011834319526,
	"grad_norm": 0.686392605304718,
	"learning_rate": 0.00015380952380952384,
	"loss": 0.493,
	"step": 205
	},
	{
	"epoch": 2.4733727810650885,
	"grad_norm": 0.719688892364502,
	"learning_rate": 0.00015261904761904762,
	"loss": 0.5356,
	"step": 210
	},
	{
	"epoch": 2.532544378698225,
	"grad_norm": 0.6884217262268066,
	"learning_rate": 0.00015142857142857143,
	"loss": 0.7167,
	"step": 215
	},
	{
	"epoch": 2.591715976331361,
	"grad_norm": 0.7767056822776794,
	"learning_rate": 0.00015023809523809524,
	"loss": 0.7346,
	"step": 220
	},
	{
	"epoch": 2.6508875739644973,
	"grad_norm": 0.6508312225341797,
	"learning_rate": 0.00014904761904761904,
	"loss": 0.547,
	"step": 225
	},
	{
	"epoch": 2.710059171597633,
	"grad_norm": 0.6159693598747253,
	"learning_rate": 0.00014785714285714288,
	"loss": 0.5539,
	"step": 230
	},
	{
	"epoch": 2.769230769230769,
	"grad_norm": 0.7028509378433228,
	"learning_rate": 0.00014666666666666666,
	"loss": 0.609,
	"step": 235
	},
	{
	"epoch": 2.828402366863905,
	"grad_norm": 0.6096014976501465,
	"learning_rate": 0.00014547619047619047,
	"loss": 0.5913,
	"step": 240
	},
	{
	"epoch": 2.8875739644970415,
	"grad_norm": 0.8518397212028503,
	"learning_rate": 0.00014428571428571428,
	"loss": 0.6356,
	"step": 245
	},
	{
	"epoch": 2.9467455621301775,
	"grad_norm": 0.6462046504020691,
	"learning_rate": 0.00014309523809523812,
	"loss": 0.527,
	"step": 250
	},
	{
	"epoch": 3.0,
	"grad_norm": 0.8931583762168884,
	"learning_rate": 0.00014190476190476193,
	"loss": 0.6505,
	"step": 255
	},
	{
	"epoch": 3.059171597633136,
	"grad_norm": 0.6089041233062744,
	"learning_rate": 0.00014071428571428573,
	"loss": 0.4721,
	"step": 260
	},
	{
	"epoch": 3.1183431952662723,
	"grad_norm": 0.9845924973487854,
	"learning_rate": 0.00013952380952380952,
	"loss": 0.4478,
	"step": 265
	},
	{
	"epoch": 3.1775147928994083,
	"grad_norm": 0.6962316036224365,
	"learning_rate": 0.00013833333333333333,
	"loss": 0.3862,
	"step": 270
	},
	{
	"epoch": 3.2366863905325443,
	"grad_norm": 0.6963745951652527,
	"learning_rate": 0.00013714285714285716,
	"loss": 0.5446,
	"step": 275
	},
	{
	"epoch": 3.2958579881656807,
	"grad_norm": 0.9289587736129761,
	"learning_rate": 0.00013595238095238097,
	"loss": 0.4985,
	"step": 280
	},
	{
	"epoch": 3.3550295857988166,
	"grad_norm": 0.7913327813148499,
	"learning_rate": 0.00013476190476190478,
	"loss": 0.4291,
	"step": 285
	},
	{
	"epoch": 3.4142011834319526,
	"grad_norm": 0.7623841166496277,
	"learning_rate": 0.00013357142857142856,
	"loss": 0.4198,
	"step": 290
	},
	{
	"epoch": 3.4733727810650885,
	"grad_norm": 1.1334826946258545,
	"learning_rate": 0.00013238095238095237,
	"loss": 0.4442,
	"step": 295
	},
	{
	"epoch": 3.532544378698225,
	"grad_norm": 0.8162091374397278,
	"learning_rate": 0.0001311904761904762,
	"loss": 0.4249,
	"step": 300
	},
	{
	"epoch": 3.591715976331361,
	"grad_norm": 0.7582007646560669,
	"learning_rate": 0.00013000000000000002,
	"loss": 0.4166,
	"step": 305
	},
	{
	"epoch": 3.6508875739644973,
	"grad_norm": 0.8337474465370178,
	"learning_rate": 0.00012880952380952382,
	"loss": 0.3552,
	"step": 310
	},
	{
	"epoch": 3.710059171597633,
	"grad_norm": 0.7497977018356323,
	"learning_rate": 0.0001276190476190476,
	"loss": 0.3778,
	"step": 315
	},
	{
	"epoch": 3.769230769230769,
	"grad_norm": 0.9030293226242065,
	"learning_rate": 0.00012642857142857144,
	"loss": 0.4048,
	"step": 320
	},
	{
	"epoch": 3.828402366863905,
	"grad_norm": 0.8548532128334045,
	"learning_rate": 0.00012523809523809525,
	"loss": 0.5433,
	"step": 325
	},
	{
	"epoch": 3.8875739644970415,
	"grad_norm": 1.1865911483764648,
	"learning_rate": 0.00012404761904761906,
	"loss": 0.4465,
	"step": 330
	},
	{
	"epoch": 3.9467455621301775,
	"grad_norm": 0.6329714059829712,
	"learning_rate": 0.00012285714285714287,
	"loss": 0.499,
	"step": 335
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.8335389494895935,
	"learning_rate": 0.00012166666666666667,
	"loss": 0.3774,
	"step": 340
	},
	{
	"epoch": 4.059171597633136,
	"grad_norm": 0.7739379405975342,
	"learning_rate": 0.00012047619047619047,
	"loss": 0.2472,
	"step": 345
	},
	{
	"epoch": 4.118343195266272,
	"grad_norm": 1.0731953382492065,
	"learning_rate": 0.00011928571428571428,
	"loss": 0.3058,
	"step": 350
	},
	{
	"epoch": 4.177514792899408,
	"grad_norm": 1.051379680633545,
	"learning_rate": 0.0001180952380952381,
	"loss": 0.3446,
	"step": 355
	},
	{
	"epoch": 4.236686390532545,
	"grad_norm": 0.6324198842048645,
	"learning_rate": 0.00011690476190476191,
	"loss": 0.2351,
	"step": 360
	},
	{
	"epoch": 4.295857988165681,
	"grad_norm": 0.9921632409095764,
	"learning_rate": 0.00011571428571428574,
	"loss": 0.2795,
	"step": 365
	},
	{
	"epoch": 4.355029585798817,
	"grad_norm": 0.9360544085502625,
	"learning_rate": 0.00011452380952380952,
	"loss": 0.4056,
	"step": 370
	},
	{
	"epoch": 4.414201183431953,
	"grad_norm": 0.956781268119812,
	"learning_rate": 0.00011333333333333334,
	"loss": 0.3001,
	"step": 375
	},
	{
	"epoch": 4.4733727810650885,
	"grad_norm": 1.0604465007781982,
	"learning_rate": 0.00011214285714285715,
	"loss": 0.3972,
	"step": 380
	},
	{
	"epoch": 4.5325443786982245,
	"grad_norm": 0.8613020181655884,
	"learning_rate": 0.00011095238095238096,
	"loss": 0.2828,
	"step": 385
	},
	{
	"epoch": 4.591715976331361,
	"grad_norm": 0.666599690914154,
	"learning_rate": 0.00010976190476190478,
	"loss": 0.3187,
	"step": 390
	},
	{
	"epoch": 4.650887573964497,
	"grad_norm": 0.7497467398643494,
	"learning_rate": 0.00010857142857142856,
	"loss": 0.4278,
	"step": 395
	},
	{
	"epoch": 4.710059171597633,
	"grad_norm": 0.733259916305542,
	"learning_rate": 0.00010738095238095239,
	"loss": 0.2384,
	"step": 400
	},
	{
	"epoch": 4.769230769230769,
	"grad_norm": 0.7570552229881287,
	"learning_rate": 0.0001061904761904762,
	"loss": 0.3938,
	"step": 405
	},
	{
	"epoch": 4.828402366863905,
	"grad_norm": 0.8109162449836731,
	"learning_rate": 0.000105,
	"loss": 0.2729,
	"step": 410
	},
	{
	"epoch": 4.887573964497041,
	"grad_norm": 0.7985107898712158,
	"learning_rate": 0.00010380952380952383,
	"loss": 0.3805,
	"step": 415
	},
	{
	"epoch": 4.946745562130177,
	"grad_norm": 0.704366147518158,
	"learning_rate": 0.00010261904761904761,
	"loss": 0.2468,
	"step": 420
	},
	{
	"epoch": 5.0,
	"grad_norm": 1.054413080215454,
	"learning_rate": 0.00010142857142857143,
	"loss": 0.2892,
	"step": 425
	},
	{
	"epoch": 5.059171597633136,
	"grad_norm": 0.7603329420089722,
	"learning_rate": 0.00010023809523809524,
	"loss": 0.2865,
	"step": 430
	},
	{
	"epoch": 5.118343195266272,
	"grad_norm": 0.9612646102905273,
	"learning_rate": 9.904761904761905e-05,
	"loss": 0.2215,
	"step": 435
	},
	{
	"epoch": 5.177514792899408,
	"grad_norm": 0.8669071793556213,
	"learning_rate": 9.785714285714286e-05,
	"loss": 0.2078,
	"step": 440
	},
	{
	"epoch": 5.236686390532545,
	"grad_norm": 0.7441051006317139,
	"learning_rate": 9.666666666666667e-05,
	"loss": 0.233,
	"step": 445
	},
	{
	"epoch": 5.295857988165681,
	"grad_norm": 0.5900620818138123,
	"learning_rate": 9.547619047619049e-05,
	"loss": 0.2581,
	"step": 450
	},
	{
	"epoch": 5.355029585798817,
	"grad_norm": 0.990178644657135,
	"learning_rate": 9.428571428571429e-05,
	"loss": 0.2603,
	"step": 455
	},
	{
	"epoch": 5.414201183431953,
	"grad_norm": 0.7644340991973877,
	"learning_rate": 9.309523809523811e-05,
	"loss": 0.2021,
	"step": 460
	},
	{
	"epoch": 5.4733727810650885,
	"grad_norm": 0.5087964534759521,
	"learning_rate": 9.19047619047619e-05,
	"loss": 0.1126,
	"step": 465
	},
	{
	"epoch": 5.5325443786982245,
	"grad_norm": 0.7896738052368164,
	"learning_rate": 9.071428571428571e-05,
	"loss": 0.2084,
	"step": 470
	},
	{
	"epoch": 5.591715976331361,
	"grad_norm": 0.71749347448349,
	"learning_rate": 8.952380952380953e-05,
	"loss": 0.1618,
	"step": 475
	},
	{
	"epoch": 5.650887573964497,
	"grad_norm": 0.8466284871101379,
	"learning_rate": 8.833333333333333e-05,
	"loss": 0.2574,
	"step": 480
	},
	{
	"epoch": 5.710059171597633,
	"grad_norm": 1.0023925304412842,
	"learning_rate": 8.714285714285715e-05,
	"loss": 0.1985,
	"step": 485
	},
	{
	"epoch": 5.769230769230769,
	"grad_norm": 0.8096638321876526,
	"learning_rate": 8.595238095238096e-05,
	"loss": 0.285,
	"step": 490
	},
	{
	"epoch": 5.828402366863905,
	"grad_norm": 0.9154897332191467,
	"learning_rate": 8.476190476190477e-05,
	"loss": 0.2051,
	"step": 495
	},
	{
	"epoch": 5.887573964497041,
	"grad_norm": 0.9506188035011292,
	"learning_rate": 8.357142857142858e-05,
	"loss": 0.2664,
	"step": 500
	},
	{
	"epoch": 5.946745562130177,
	"grad_norm": 0.8935821056365967,
	"learning_rate": 8.238095238095238e-05,
	"loss": 0.1717,
	"step": 505
	},
	{
	"epoch": 6.0,
	"grad_norm": 1.2274423837661743,
	"learning_rate": 8.11904761904762e-05,
	"loss": 0.2529,
	"step": 510
	},
	{
	"epoch": 6.059171597633136,
	"grad_norm": 0.6341638565063477,
	"learning_rate": 8e-05,
	"loss": 0.1795,
	"step": 515
	},
	{
	"epoch": 6.118343195266272,
	"grad_norm": 1.273710012435913,
	"learning_rate": 7.880952380952382e-05,
	"loss": 0.1612,
	"step": 520
	},
	{
	"epoch": 6.177514792899408,
	"grad_norm": 1.065499186515808,
	"learning_rate": 7.761904761904762e-05,
	"loss": 0.1839,
	"step": 525
	},
	{
	"epoch": 6.236686390532545,
	"grad_norm": 0.5382740497589111,
	"learning_rate": 7.642857142857143e-05,
	"loss": 0.1628,
	"step": 530
	},
	{
	"epoch": 6.295857988165681,
	"grad_norm": 0.6181464791297913,
	"learning_rate": 7.523809523809524e-05,
	"loss": 0.141,
	"step": 535
	},
	{
	"epoch": 6.355029585798817,
	"grad_norm": 0.7450206875801086,
	"learning_rate": 7.404761904761905e-05,
	"loss": 0.1453,
	"step": 540
	},
	{
	"epoch": 6.414201183431953,
	"grad_norm": 0.9426142573356628,
	"learning_rate": 7.285714285714286e-05,
	"loss": 0.1403,
	"step": 545
	},
	{
	"epoch": 6.4733727810650885,
	"grad_norm": 0.9675353169441223,
	"learning_rate": 7.166666666666667e-05,
	"loss": 0.1216,
	"step": 550
	},
	{
	"epoch": 6.5325443786982245,
	"grad_norm": 0.5108327269554138,
	"learning_rate": 7.047619047619048e-05,
	"loss": 0.1484,
	"step": 555
	},
	{
	"epoch": 6.591715976331361,
	"grad_norm": 0.6549590229988098,
	"learning_rate": 6.928571428571429e-05,
	"loss": 0.1338,
	"step": 560
	},
	{
	"epoch": 6.650887573964497,
	"grad_norm": 0.843664288520813,
	"learning_rate": 6.80952380952381e-05,
	"loss": 0.1586,
	"step": 565
	},
	{
	"epoch": 6.710059171597633,
	"grad_norm": 0.8650611639022827,
	"learning_rate": 6.69047619047619e-05,
	"loss": 0.1517,
	"step": 570
	},
	{
	"epoch": 6.769230769230769,
	"grad_norm": 0.7471471428871155,
	"learning_rate": 6.571428571428571e-05,
	"loss": 0.1966,
	"step": 575
	},
	{
	"epoch": 6.828402366863905,
	"grad_norm": 0.7219163775444031,
	"learning_rate": 6.452380952380954e-05,
	"loss": 0.1368,
	"step": 580
	},
	{
	"epoch": 6.887573964497041,
	"grad_norm": 0.911191463470459,
	"learning_rate": 6.333333333333333e-05,
	"loss": 0.1681,
	"step": 585
	},
	{
	"epoch": 6.946745562130177,
	"grad_norm": 0.6017201542854309,
	"learning_rate": 6.214285714285714e-05,
	"loss": 0.1675,
	"step": 590
	},
	{
	"epoch": 7.0,
	"grad_norm": 1.0746937990188599,
	"learning_rate": 6.0952380952380964e-05,
	"loss": 0.1944,
	"step": 595
	},
	{
	"epoch": 7.059171597633136,
	"grad_norm": 0.8227368593215942,
	"learning_rate": 5.9761904761904766e-05,
	"loss": 0.076,
	"step": 600
	},
	{
	"epoch": 7.118343195266272,
	"grad_norm": 0.6826525330543518,
	"learning_rate": 5.8571428571428575e-05,
	"loss": 0.1048,
	"step": 605
	},
	{
	"epoch": 7.177514792899408,
	"grad_norm": 1.0038989782333374,
	"learning_rate": 5.738095238095238e-05,
	"loss": 0.0998,
	"step": 610
	},
	{
	"epoch": 7.236686390532545,
	"grad_norm": 0.8537135720252991,
	"learning_rate": 5.619047619047619e-05,
	"loss": 0.1035,
	"step": 615
	},
	{
	"epoch": 7.295857988165681,
	"grad_norm": 0.7067388892173767,
	"learning_rate": 5.500000000000001e-05,
	"loss": 0.1186,
	"step": 620
	},
	{
	"epoch": 7.355029585798817,
	"grad_norm": 0.7003813982009888,
	"learning_rate": 5.380952380952381e-05,
	"loss": 0.0803,
	"step": 625
	},
	{
	"epoch": 7.414201183431953,
	"grad_norm": 0.9923582673072815,
	"learning_rate": 5.261904761904763e-05,
	"loss": 0.1121,
	"step": 630
	},
	{
	"epoch": 7.4733727810650885,
	"grad_norm": 0.4991615116596222,
	"learning_rate": 5.142857142857143e-05,
	"loss": 0.1056,
	"step": 635
	},
	{
	"epoch": 7.5325443786982245,
	"grad_norm": 0.7406235337257385,
	"learning_rate": 5.023809523809524e-05,
	"loss": 0.1135,
	"step": 640
	},
	{
	"epoch": 7.591715976331361,
	"grad_norm": 0.5871267318725586,
	"learning_rate": 4.904761904761905e-05,
	"loss": 0.1141,
	"step": 645
	},
	{
	"epoch": 7.650887573964497,
	"grad_norm": 0.5495700836181641,
	"learning_rate": 4.785714285714286e-05,
	"loss": 0.07,
	"step": 650
	},
	{
	"epoch": 7.710059171597633,
	"grad_norm": 0.9295830130577087,
	"learning_rate": 4.666666666666667e-05,
	"loss": 0.1293,
	"step": 655
	},
	{
	"epoch": 7.769230769230769,
	"grad_norm": 0.9041563272476196,
	"learning_rate": 4.547619047619048e-05,
	"loss": 0.1034,
	"step": 660
	},
	{
	"epoch": 7.828402366863905,
	"grad_norm": 0.6490697264671326,
	"learning_rate": 4.428571428571428e-05,
	"loss": 0.0898,
	"step": 665
	},
	{
	"epoch": 7.887573964497041,
	"grad_norm": 0.5583420991897583,
	"learning_rate": 4.30952380952381e-05,
	"loss": 0.0891,
	"step": 670
	},
	{
	"epoch": 7.946745562130177,
	"grad_norm": 0.7829737663269043,
	"learning_rate": 4.190476190476191e-05,
	"loss": 0.112,
	"step": 675
	},
	{
	"epoch": 8.0,
	"grad_norm": 0.48943546414375305,
	"learning_rate": 4.0714285714285717e-05,
	"loss": 0.115,
	"step": 680
	},
	{
	"epoch": 8.059171597633137,
	"grad_norm": 0.4989611506462097,
	"learning_rate": 3.9523809523809526e-05,
	"loss": 0.0645,
	"step": 685
	},
	{
	"epoch": 8.118343195266272,
	"grad_norm": 0.5723634362220764,
	"learning_rate": 3.8333333333333334e-05,
	"loss": 0.0603,
	"step": 690
	},
	{
	"epoch": 8.177514792899409,
	"grad_norm": 0.5361748933792114,
	"learning_rate": 3.7142857142857143e-05,
	"loss": 0.0626,
	"step": 695
	},
	{
	"epoch": 8.236686390532544,
	"grad_norm": 0.8497764468193054,
	"learning_rate": 3.595238095238095e-05,
	"loss": 0.0856,
	"step": 700
	},
	{
	"epoch": 8.29585798816568,
	"grad_norm": 0.48423126339912415,
	"learning_rate": 3.476190476190476e-05,
	"loss": 0.0568,
	"step": 705
	},
	{
	"epoch": 8.355029585798816,
	"grad_norm": 0.30722182989120483,
	"learning_rate": 3.357142857142857e-05,
	"loss": 0.0564,
	"step": 710
	},
	{
	"epoch": 8.414201183431953,
	"grad_norm": 0.637298047542572,
	"learning_rate": 3.2380952380952386e-05,
	"loss": 0.0792,
	"step": 715
	},
	{
	"epoch": 8.47337278106509,
	"grad_norm": 1.1620301008224487,
	"learning_rate": 3.1190476190476195e-05,
	"loss": 0.0848,
	"step": 720
	},
	{
	"epoch": 8.532544378698224,
	"grad_norm": 0.9842550158500671,
	"learning_rate": 3e-05,
	"loss": 0.0695,
	"step": 725
	},
	{
	"epoch": 8.591715976331361,
	"grad_norm": 0.5429280996322632,
	"learning_rate": 2.880952380952381e-05,
	"loss": 0.0691,
	"step": 730
	},
	{
	"epoch": 8.650887573964496,
	"grad_norm": 0.4466063380241394,
	"learning_rate": 2.7619047619047622e-05,
	"loss": 0.0427,
	"step": 735
	},
	{
	"epoch": 8.710059171597633,
	"grad_norm": 0.7048435807228088,
	"learning_rate": 2.642857142857143e-05,
	"loss": 0.066,
	"step": 740
	},
	{
	"epoch": 8.76923076923077,
	"grad_norm": 0.71544349193573,
	"learning_rate": 2.523809523809524e-05,
	"loss": 0.048,
	"step": 745
	},
	{
	"epoch": 8.828402366863905,
	"grad_norm": 0.27939409017562866,
	"learning_rate": 2.404761904761905e-05,
	"loss": 0.047,
	"step": 750
	},
	{
	"epoch": 8.887573964497042,
	"grad_norm": 0.30130356550216675,
	"learning_rate": 2.2857142857142858e-05,
	"loss": 0.0715,
	"step": 755
	},
	{
	"epoch": 8.946745562130177,
	"grad_norm": 0.6010681986808777,
	"learning_rate": 2.1666666666666667e-05,
	"loss": 0.0478,
	"step": 760
	},
	{
	"epoch": 9.0,
	"grad_norm": 0.5269991159439087,
	"learning_rate": 2.0476190476190476e-05,
	"loss": 0.048,
	"step": 765
	},
	{
	"epoch": 9.059171597633137,
	"grad_norm": 0.30882084369659424,
	"learning_rate": 1.928571428571429e-05,
	"loss": 0.0508,
	"step": 770
	},
	{
	"epoch": 9.118343195266272,
	"grad_norm": 0.3261071741580963,
	"learning_rate": 1.8095238095238094e-05,
	"loss": 0.0439,
	"step": 775
	},
	{
	"epoch": 9.177514792899409,
	"grad_norm": 0.33494994044303894,
	"learning_rate": 1.6904761904761906e-05,
	"loss": 0.0394,
	"step": 780
	},
	{
	"epoch": 9.236686390532544,
	"grad_norm": 0.470735639333725,
	"learning_rate": 1.5714285714285715e-05,
	"loss": 0.0465,
	"step": 785
	},
	{
	"epoch": 9.29585798816568,
	"grad_norm": 0.4816909432411194,
	"learning_rate": 1.4523809523809526e-05,
	"loss": 0.0461,
	"step": 790
	},
	{
	"epoch": 9.355029585798816,
	"grad_norm": 0.332380086183548,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 0.0438,
	"step": 795
	},
	{
	"epoch": 9.414201183431953,
	"grad_norm": 1.0364826917648315,
	"learning_rate": 1.2142857142857144e-05,
	"loss": 0.0575,
	"step": 800
	},
	{
	"epoch": 9.47337278106509,
	"grad_norm": 0.31686267256736755,
	"learning_rate": 1.0952380952380953e-05,
	"loss": 0.0376,
	"step": 805
	},
	{
	"epoch": 9.532544378698224,
	"grad_norm": 0.19316697120666504,
	"learning_rate": 9.761904761904762e-06,
	"loss": 0.0546,
	"step": 810
	},
	{
	"epoch": 9.591715976331361,
	"grad_norm": 0.3316756784915924,
	"learning_rate": 8.571428571428573e-06,
	"loss": 0.0303,
	"step": 815
	},
	{
	"epoch": 9.650887573964496,
	"grad_norm": 0.38312825560569763,
	"learning_rate": 7.380952380952382e-06,
	"loss": 0.0407,
	"step": 820
	},
	{
	"epoch": 9.710059171597633,
	"grad_norm": 0.1672104001045227,
	"learning_rate": 6.190476190476191e-06,
	"loss": 0.0406,
	"step": 825
	},
	{
	"epoch": 9.76923076923077,
	"grad_norm": 0.30679717659950256,
	"learning_rate": 5e-06,
	"loss": 0.0316,
	"step": 830
	},
	{
	"epoch": 9.828402366863905,
	"grad_norm": 0.3245919346809387,
	"learning_rate": 3.8095238095238102e-06,
	"loss": 0.0361,
	"step": 835
	},
	{
	"epoch": 9.887573964497042,
	"grad_norm": 0.25631895661354065,
	"learning_rate": 2.6190476190476192e-06,
	"loss": 0.0351,
	"step": 840
	},
	{
	"epoch": 9.946745562130177,
	"grad_norm": 0.2532467842102051,
	"learning_rate": 1.4285714285714286e-06,
	"loss": 0.0397,
	"step": 845
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.7708420157432556,
	"learning_rate": 2.3809523809523814e-07,
	"loss": 0.0416,
	"step": 850
	},
	{
	"epoch": 10.0,
	"step": 850,
	"total_flos": 5.971841440128e+16,
	"train_loss": 0.3865027742876726,
	"train_runtime": 1732.5271,
	"train_samples_per_second": 3.902,
	"train_steps_per_second": 0.491
	}
	],
	"logging_steps": 5,
	"max_steps": 850,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.971841440128e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}