7B_call_sum / trainer_state.json

MaruchanPark

add model

f43ca49 over 1 year ago

25.3 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.0,
	"eval_steps": 500,
	"global_step": 1580,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03164556962025317,
	"grad_norm": 0.0072021484375,
	"learning_rate": 0.0002,
	"loss": 0.0019,
	"step": 10
	},
	{
	"epoch": 0.06329113924050633,
	"grad_norm": 0.01226806640625,
	"learning_rate": 0.0002,
	"loss": 0.0012,
	"step": 20
	},
	{
	"epoch": 0.0949367088607595,
	"grad_norm": 0.013427734375,
	"learning_rate": 0.0002,
	"loss": 0.0016,
	"step": 30
	},
	{
	"epoch": 0.12658227848101267,
	"grad_norm": 0.004791259765625,
	"learning_rate": 0.0002,
	"loss": 0.0037,
	"step": 40
	},
	{
	"epoch": 0.15822784810126583,
	"grad_norm": 0.00787353515625,
	"learning_rate": 0.0002,
	"loss": 0.0015,
	"step": 50
	},
	{
	"epoch": 0.189873417721519,
	"grad_norm": 0.0164794921875,
	"learning_rate": 0.0002,
	"loss": 0.0023,
	"step": 60
	},
	{
	"epoch": 0.22151898734177214,
	"grad_norm": 0.0019989013671875,
	"learning_rate": 0.0002,
	"loss": 0.0017,
	"step": 70
	},
	{
	"epoch": 0.25316455696202533,
	"grad_norm": 0.043701171875,
	"learning_rate": 0.0002,
	"loss": 0.0018,
	"step": 80
	},
	{
	"epoch": 0.2848101265822785,
	"grad_norm": 0.04150390625,
	"learning_rate": 0.0002,
	"loss": 0.0012,
	"step": 90
	},
	{
	"epoch": 0.31645569620253167,
	"grad_norm": 0.0126953125,
	"learning_rate": 0.0002,
	"loss": 0.0016,
	"step": 100
	},
	{
	"epoch": 0.34810126582278483,
	"grad_norm": 0.0299072265625,
	"learning_rate": 0.0002,
	"loss": 0.0017,
	"step": 110
	},
	{
	"epoch": 0.379746835443038,
	"grad_norm": 0.017333984375,
	"learning_rate": 0.0002,
	"loss": 0.0021,
	"step": 120
	},
	{
	"epoch": 0.41139240506329117,
	"grad_norm": 0.0284423828125,
	"learning_rate": 0.0002,
	"loss": 0.004,
	"step": 130
	},
	{
	"epoch": 0.4430379746835443,
	"grad_norm": 0.0228271484375,
	"learning_rate": 0.0002,
	"loss": 0.0015,
	"step": 140
	},
	{
	"epoch": 0.47468354430379744,
	"grad_norm": 0.0216064453125,
	"learning_rate": 0.0002,
	"loss": 0.0012,
	"step": 150
	},
	{
	"epoch": 0.5063291139240507,
	"grad_norm": 0.0079345703125,
	"learning_rate": 0.0002,
	"loss": 0.0017,
	"step": 160
	},
	{
	"epoch": 0.5379746835443038,
	"grad_norm": 0.03564453125,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 170
	},
	{
	"epoch": 0.569620253164557,
	"grad_norm": 0.0859375,
	"learning_rate": 0.0002,
	"loss": 0.0016,
	"step": 180
	},
	{
	"epoch": 0.6012658227848101,
	"grad_norm": 0.0014801025390625,
	"learning_rate": 0.0002,
	"loss": 0.0013,
	"step": 190
	},
	{
	"epoch": 0.6329113924050633,
	"grad_norm": 0.02734375,
	"learning_rate": 0.0002,
	"loss": 0.0015,
	"step": 200
	},
	{
	"epoch": 0.6645569620253164,
	"grad_norm": 0.00836181640625,
	"learning_rate": 0.0002,
	"loss": 0.0012,
	"step": 210
	},
	{
	"epoch": 0.6962025316455697,
	"grad_norm": 0.0218505859375,
	"learning_rate": 0.0002,
	"loss": 0.0012,
	"step": 220
	},
	{
	"epoch": 0.7278481012658228,
	"grad_norm": 0.00799560546875,
	"learning_rate": 0.0002,
	"loss": 0.0013,
	"step": 230
	},
	{
	"epoch": 0.759493670886076,
	"grad_norm": 0.0478515625,
	"learning_rate": 0.0002,
	"loss": 0.003,
	"step": 240
	},
	{
	"epoch": 0.7911392405063291,
	"grad_norm": 0.091796875,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 250
	},
	{
	"epoch": 0.8227848101265823,
	"grad_norm": 0.0218505859375,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 260
	},
	{
	"epoch": 0.8544303797468354,
	"grad_norm": 0.006317138671875,
	"learning_rate": 0.0002,
	"loss": 0.0009,
	"step": 270
	},
	{
	"epoch": 0.8860759493670886,
	"grad_norm": 0.034912109375,
	"learning_rate": 0.0002,
	"loss": 0.0016,
	"step": 280
	},
	{
	"epoch": 0.9177215189873418,
	"grad_norm": 0.0115966796875,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 290
	},
	{
	"epoch": 0.9493670886075949,
	"grad_norm": 0.04296875,
	"learning_rate": 0.0002,
	"loss": 0.0027,
	"step": 300
	},
	{
	"epoch": 0.9810126582278481,
	"grad_norm": 0.0277099609375,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 310
	},
	{
	"epoch": 1.0126582278481013,
	"grad_norm": 0.0240478515625,
	"learning_rate": 0.0002,
	"loss": 0.001,
	"step": 320
	},
	{
	"epoch": 1.0443037974683544,
	"grad_norm": 0.0078125,
	"learning_rate": 0.0002,
	"loss": 0.0042,
	"step": 330
	},
	{
	"epoch": 1.0759493670886076,
	"grad_norm": 0.0047607421875,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 340
	},
	{
	"epoch": 1.1075949367088607,
	"grad_norm": 0.007598876953125,
	"learning_rate": 0.0002,
	"loss": 0.0011,
	"step": 350
	},
	{
	"epoch": 1.139240506329114,
	"grad_norm": 0.00665283203125,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 360
	},
	{
	"epoch": 1.1708860759493671,
	"grad_norm": 0.00445556640625,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 370
	},
	{
	"epoch": 1.2025316455696202,
	"grad_norm": 0.004364013671875,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 380
	},
	{
	"epoch": 1.2341772151898733,
	"grad_norm": 0.003662109375,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 390
	},
	{
	"epoch": 1.2658227848101267,
	"grad_norm": 0.020263671875,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 400
	},
	{
	"epoch": 1.2974683544303798,
	"grad_norm": 0.008544921875,
	"learning_rate": 0.0002,
	"loss": 0.0009,
	"step": 410
	},
	{
	"epoch": 1.3291139240506329,
	"grad_norm": 0.0140380859375,
	"learning_rate": 0.0002,
	"loss": 0.0015,
	"step": 420
	},
	{
	"epoch": 1.360759493670886,
	"grad_norm": 0.00616455078125,
	"learning_rate": 0.0002,
	"loss": 0.001,
	"step": 430
	},
	{
	"epoch": 1.3924050632911391,
	"grad_norm": 0.00506591796875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 440
	},
	{
	"epoch": 1.4240506329113924,
	"grad_norm": 0.041748046875,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 450
	},
	{
	"epoch": 1.4556962025316456,
	"grad_norm": 0.0849609375,
	"learning_rate": 0.0002,
	"loss": 0.0011,
	"step": 460
	},
	{
	"epoch": 1.4873417721518987,
	"grad_norm": 0.01495361328125,
	"learning_rate": 0.0002,
	"loss": 0.001,
	"step": 470
	},
	{
	"epoch": 1.518987341772152,
	"grad_norm": 0.007110595703125,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 480
	},
	{
	"epoch": 1.5506329113924051,
	"grad_norm": 0.005401611328125,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 490
	},
	{
	"epoch": 1.5822784810126582,
	"grad_norm": 0.0084228515625,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 500
	},
	{
	"epoch": 1.6139240506329116,
	"grad_norm": 0.01123046875,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 510
	},
	{
	"epoch": 1.6455696202531644,
	"grad_norm": 0.00970458984375,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 520
	},
	{
	"epoch": 1.6772151898734178,
	"grad_norm": 0.00665283203125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 530
	},
	{
	"epoch": 1.7088607594936709,
	"grad_norm": 0.00506591796875,
	"learning_rate": 0.0002,
	"loss": 0.001,
	"step": 540
	},
	{
	"epoch": 1.740506329113924,
	"grad_norm": 0.0205078125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 550
	},
	{
	"epoch": 1.7721518987341773,
	"grad_norm": 0.04052734375,
	"learning_rate": 0.0002,
	"loss": 0.0011,
	"step": 560
	},
	{
	"epoch": 1.8037974683544302,
	"grad_norm": 0.0179443359375,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 570
	},
	{
	"epoch": 1.8354430379746836,
	"grad_norm": 0.0145263671875,
	"learning_rate": 0.0002,
	"loss": 0.0009,
	"step": 580
	},
	{
	"epoch": 1.8670886075949367,
	"grad_norm": 0.004913330078125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 590
	},
	{
	"epoch": 1.8987341772151898,
	"grad_norm": 0.014404296875,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 600
	},
	{
	"epoch": 1.9303797468354431,
	"grad_norm": 0.005126953125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 610
	},
	{
	"epoch": 1.9620253164556962,
	"grad_norm": 0.00390625,
	"learning_rate": 0.0002,
	"loss": 0.001,
	"step": 620
	},
	{
	"epoch": 1.9936708860759493,
	"grad_norm": 0.0020904541015625,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 630
	},
	{
	"epoch": 2.0253164556962027,
	"grad_norm": 0.00102996826171875,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 640
	},
	{
	"epoch": 2.0569620253164556,
	"grad_norm": 0.0079345703125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 650
	},
	{
	"epoch": 2.088607594936709,
	"grad_norm": 0.004058837890625,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 660
	},
	{
	"epoch": 2.1202531645569622,
	"grad_norm": 0.006683349609375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 670
	},
	{
	"epoch": 2.151898734177215,
	"grad_norm": 0.00799560546875,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 680
	},
	{
	"epoch": 2.1835443037974684,
	"grad_norm": 0.01416015625,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 690
	},
	{
	"epoch": 2.2151898734177213,
	"grad_norm": 0.005523681640625,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 700
	},
	{
	"epoch": 2.2468354430379747,
	"grad_norm": 0.003692626953125,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 710
	},
	{
	"epoch": 2.278481012658228,
	"grad_norm": 0.01287841796875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 720
	},
	{
	"epoch": 2.310126582278481,
	"grad_norm": 0.0101318359375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 730
	},
	{
	"epoch": 2.3417721518987342,
	"grad_norm": 0.006866455078125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 740
	},
	{
	"epoch": 2.3734177215189876,
	"grad_norm": 0.00921630859375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 750
	},
	{
	"epoch": 2.4050632911392404,
	"grad_norm": 0.0072021484375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 760
	},
	{
	"epoch": 2.4367088607594938,
	"grad_norm": 0.00921630859375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 770
	},
	{
	"epoch": 2.4683544303797467,
	"grad_norm": 0.01239013671875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 780
	},
	{
	"epoch": 2.5,
	"grad_norm": 0.01226806640625,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 790
	},
	{
	"epoch": 2.5316455696202533,
	"grad_norm": 0.00872802734375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 800
	},
	{
	"epoch": 2.5632911392405062,
	"grad_norm": 0.007781982421875,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 810
	},
	{
	"epoch": 2.5949367088607596,
	"grad_norm": 0.005767822265625,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 820
	},
	{
	"epoch": 2.6265822784810124,
	"grad_norm": 0.0111083984375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 830
	},
	{
	"epoch": 2.6582278481012658,
	"grad_norm": 0.0157470703125,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 840
	},
	{
	"epoch": 2.689873417721519,
	"grad_norm": 0.0111083984375,
	"learning_rate": 0.0002,
	"loss": 0.0008,
	"step": 850
	},
	{
	"epoch": 2.721518987341772,
	"grad_norm": 0.009521484375,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 860
	},
	{
	"epoch": 2.7531645569620253,
	"grad_norm": 0.0018463134765625,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 870
	},
	{
	"epoch": 2.7848101265822782,
	"grad_norm": 0.00168609619140625,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 880
	},
	{
	"epoch": 2.8164556962025316,
	"grad_norm": 0.01470947265625,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 890
	},
	{
	"epoch": 2.848101265822785,
	"grad_norm": 0.0211181640625,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 900
	},
	{
	"epoch": 2.879746835443038,
	"grad_norm": 0.0057373046875,
	"learning_rate": 0.0002,
	"loss": 0.0017,
	"step": 910
	},
	{
	"epoch": 2.911392405063291,
	"grad_norm": 0.00469970703125,
	"learning_rate": 0.0002,
	"loss": 0.0022,
	"step": 920
	},
	{
	"epoch": 2.9430379746835444,
	"grad_norm": 0.00982666015625,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 930
	},
	{
	"epoch": 2.9746835443037973,
	"grad_norm": 0.0087890625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 940
	},
	{
	"epoch": 3.0063291139240507,
	"grad_norm": 0.0098876953125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 950
	},
	{
	"epoch": 3.037974683544304,
	"grad_norm": 0.00360107421875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 960
	},
	{
	"epoch": 3.069620253164557,
	"grad_norm": 0.0150146484375,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 970
	},
	{
	"epoch": 3.1012658227848102,
	"grad_norm": 0.005828857421875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 980
	},
	{
	"epoch": 3.132911392405063,
	"grad_norm": 0.00665283203125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 990
	},
	{
	"epoch": 3.1645569620253164,
	"grad_norm": 0.005401611328125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1000
	},
	{
	"epoch": 3.1962025316455698,
	"grad_norm": 0.0216064453125,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 1010
	},
	{
	"epoch": 3.2278481012658227,
	"grad_norm": 0.0152587890625,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 1020
	},
	{
	"epoch": 3.259493670886076,
	"grad_norm": 0.0096435546875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1030
	},
	{
	"epoch": 3.291139240506329,
	"grad_norm": 0.007232666015625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1040
	},
	{
	"epoch": 3.3227848101265822,
	"grad_norm": 0.006439208984375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1050
	},
	{
	"epoch": 3.3544303797468356,
	"grad_norm": 0.003692626953125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 1060
	},
	{
	"epoch": 3.3860759493670884,
	"grad_norm": 0.0028839111328125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1070
	},
	{
	"epoch": 3.4177215189873418,
	"grad_norm": 0.005859375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1080
	},
	{
	"epoch": 3.449367088607595,
	"grad_norm": 0.0067138671875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1090
	},
	{
	"epoch": 3.481012658227848,
	"grad_norm": 0.007171630859375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1100
	},
	{
	"epoch": 3.5126582278481013,
	"grad_norm": 0.00537109375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1110
	},
	{
	"epoch": 3.5443037974683547,
	"grad_norm": 0.00775146484375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1120
	},
	{
	"epoch": 3.5759493670886076,
	"grad_norm": 0.0030517578125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1130
	},
	{
	"epoch": 3.607594936708861,
	"grad_norm": 0.00823974609375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1140
	},
	{
	"epoch": 3.6392405063291138,
	"grad_norm": 0.005157470703125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1150
	},
	{
	"epoch": 3.670886075949367,
	"grad_norm": 0.01202392578125,
	"learning_rate": 0.0002,
	"loss": 0.0009,
	"step": 1160
	},
	{
	"epoch": 3.7025316455696204,
	"grad_norm": 0.0233154296875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1170
	},
	{
	"epoch": 3.7341772151898733,
	"grad_norm": 0.005218505859375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1180
	},
	{
	"epoch": 3.7658227848101267,
	"grad_norm": 0.0108642578125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1190
	},
	{
	"epoch": 3.7974683544303796,
	"grad_norm": 0.0086669921875,
	"learning_rate": 0.0002,
	"loss": 0.0006,
	"step": 1200
	},
	{
	"epoch": 3.829113924050633,
	"grad_norm": 0.002685546875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1210
	},
	{
	"epoch": 3.8607594936708862,
	"grad_norm": 0.0019683837890625,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1220
	},
	{
	"epoch": 3.892405063291139,
	"grad_norm": 0.0020294189453125,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 1230
	},
	{
	"epoch": 3.9240506329113924,
	"grad_norm": 0.003509521484375,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1240
	},
	{
	"epoch": 3.9556962025316453,
	"grad_norm": 0.004486083984375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1250
	},
	{
	"epoch": 3.9873417721518987,
	"grad_norm": 0.0034637451171875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1260
	},
	{
	"epoch": 4.018987341772152,
	"grad_norm": 0.0031890869140625,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1270
	},
	{
	"epoch": 4.050632911392405,
	"grad_norm": 0.0036163330078125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1280
	},
	{
	"epoch": 4.082278481012658,
	"grad_norm": 0.01055908203125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1290
	},
	{
	"epoch": 4.113924050632911,
	"grad_norm": 0.00188446044921875,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1300
	},
	{
	"epoch": 4.1455696202531644,
	"grad_norm": 0.0029296875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1310
	},
	{
	"epoch": 4.177215189873418,
	"grad_norm": 0.00167083740234375,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1320
	},
	{
	"epoch": 4.208860759493671,
	"grad_norm": 0.005584716796875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1330
	},
	{
	"epoch": 4.2405063291139244,
	"grad_norm": 0.007171630859375,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1340
	},
	{
	"epoch": 4.272151898734177,
	"grad_norm": 0.004119873046875,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1350
	},
	{
	"epoch": 4.30379746835443,
	"grad_norm": 0.00543212890625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1360
	},
	{
	"epoch": 4.3354430379746836,
	"grad_norm": 0.00860595703125,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1370
	},
	{
	"epoch": 4.367088607594937,
	"grad_norm": 0.0130615234375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1380
	},
	{
	"epoch": 4.39873417721519,
	"grad_norm": 0.0074462890625,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1390
	},
	{
	"epoch": 4.430379746835443,
	"grad_norm": 0.00384521484375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1400
	},
	{
	"epoch": 4.462025316455696,
	"grad_norm": 0.0106201171875,
	"learning_rate": 0.0002,
	"loss": 0.0004,
	"step": 1410
	},
	{
	"epoch": 4.493670886075949,
	"grad_norm": 0.0027618408203125,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1420
	},
	{
	"epoch": 4.525316455696203,
	"grad_norm": 0.00555419921875,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1430
	},
	{
	"epoch": 4.556962025316456,
	"grad_norm": 0.006011962890625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1440
	},
	{
	"epoch": 4.588607594936709,
	"grad_norm": 0.006256103515625,
	"learning_rate": 0.0002,
	"loss": 0.0002,
	"step": 1450
	},
	{
	"epoch": 4.620253164556962,
	"grad_norm": 0.003997802734375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1460
	},
	{
	"epoch": 4.651898734177215,
	"grad_norm": 0.006744384765625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1470
	},
	{
	"epoch": 4.6835443037974684,
	"grad_norm": 0.00836181640625,
	"learning_rate": 0.0002,
	"loss": 0.0005,
	"step": 1480
	},
	{
	"epoch": 4.715189873417722,
	"grad_norm": 0.0076904296875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1490
	},
	{
	"epoch": 4.746835443037975,
	"grad_norm": 0.01239013671875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1500
	},
	{
	"epoch": 4.7784810126582276,
	"grad_norm": 0.00146484375,
	"learning_rate": 0.0002,
	"loss": 0.0007,
	"step": 1510
	},
	{
	"epoch": 4.810126582278481,
	"grad_norm": 0.00909423828125,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1520
	},
	{
	"epoch": 4.841772151898734,
	"grad_norm": 0.0133056640625,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1530
	},
	{
	"epoch": 4.8734177215189876,
	"grad_norm": 0.00360107421875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1540
	},
	{
	"epoch": 4.905063291139241,
	"grad_norm": 0.00799560546875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1550
	},
	{
	"epoch": 4.936708860759493,
	"grad_norm": 0.0047607421875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1560
	},
	{
	"epoch": 4.968354430379747,
	"grad_norm": 0.00157928466796875,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1570
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.0101318359375,
	"learning_rate": 0.0002,
	"loss": 0.0003,
	"step": 1580
	}
	],
	"logging_steps": 10,
	"max_steps": 1580,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.151495561120973e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}