instruct-test-test-test / trainer_state.json
bimabk's picture
Upload task output 8ca8a9ea-9ae3-4938-9713-015819984d61
a818e70 verified
{
"best_global_step": 348,
"best_metric": 0.5661588907241821,
"best_model_checkpoint": "/workspace/scripts/soutputs/8ca8a9ea-9ae3-4938-9713-015819984d61_0/checkpoint-348",
"epoch": 0.997134670487106,
"eval_steps": 500,
"global_step": 348,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014326647564469915,
"grad_norm": 1.8568024635314941,
"learning_rate": 2.3557306798236462e-05,
"loss": 0.9115,
"step": 5
},
{
"epoch": 0.02865329512893983,
"grad_norm": 0.9094077944755554,
"learning_rate": 5.300394029603203e-05,
"loss": 0.7201,
"step": 10
},
{
"epoch": 0.04297994269340974,
"grad_norm": 0.7323216199874878,
"learning_rate": 8.245057379382762e-05,
"loss": 0.6833,
"step": 15
},
{
"epoch": 0.05730659025787966,
"grad_norm": 0.5553655624389648,
"learning_rate": 0.00011189720729162319,
"loss": 0.6641,
"step": 20
},
{
"epoch": 0.07163323782234957,
"grad_norm": 0.5094953179359436,
"learning_rate": 0.00014134384078941877,
"loss": 0.6485,
"step": 25
},
{
"epoch": 0.08595988538681948,
"grad_norm": 0.4796569347381592,
"learning_rate": 0.00017079047428721436,
"loss": 0.6399,
"step": 30
},
{
"epoch": 0.10028653295128939,
"grad_norm": 0.49097880721092224,
"learning_rate": 0.00020023710778500992,
"loss": 0.6322,
"step": 35
},
{
"epoch": 0.11461318051575932,
"grad_norm": 0.4515115022659302,
"learning_rate": 0.00020611968072263296,
"loss": 0.6384,
"step": 40
},
{
"epoch": 0.12893982808022922,
"grad_norm": 0.4155406057834625,
"learning_rate": 0.0002060922453495023,
"loss": 0.6088,
"step": 45
},
{
"epoch": 0.14326647564469913,
"grad_norm": 0.4288005232810974,
"learning_rate": 0.00020604371285965804,
"loss": 0.6074,
"step": 50
},
{
"epoch": 0.15759312320916904,
"grad_norm": 0.41716331243515015,
"learning_rate": 0.000205974094945471,
"loss": 0.604,
"step": 55
},
{
"epoch": 0.17191977077363896,
"grad_norm": 0.412203311920166,
"learning_rate": 0.00020588340837917924,
"loss": 0.6138,
"step": 60
},
{
"epoch": 0.18624641833810887,
"grad_norm": 0.3845170736312866,
"learning_rate": 0.00020577167500884718,
"loss": 0.6036,
"step": 65
},
{
"epoch": 0.20057306590257878,
"grad_norm": 0.4012996256351471,
"learning_rate": 0.00020563892175310208,
"loss": 0.6313,
"step": 70
},
{
"epoch": 0.2148997134670487,
"grad_norm": 0.4158208966255188,
"learning_rate": 0.0002054851805946488,
"loss": 0.6115,
"step": 75
},
{
"epoch": 0.22922636103151864,
"grad_norm": 0.409179151058197,
"learning_rate": 0.00020531048857256465,
"loss": 0.6039,
"step": 80
},
{
"epoch": 0.24355300859598855,
"grad_norm": 0.3829016387462616,
"learning_rate": 0.00020511488777337586,
"loss": 0.6224,
"step": 85
},
{
"epoch": 0.25787965616045844,
"grad_norm": 0.3995232582092285,
"learning_rate": 0.00020489842532091834,
"loss": 0.616,
"step": 90
},
{
"epoch": 0.2722063037249284,
"grad_norm": 0.3849766254425049,
"learning_rate": 0.00020466115336498453,
"loss": 0.6012,
"step": 95
},
{
"epoch": 0.28653295128939826,
"grad_norm": 0.3875938951969147,
"learning_rate": 0.00020440312906875961,
"loss": 0.5982,
"step": 100
},
{
"epoch": 0.3008595988538682,
"grad_norm": 0.4086308777332306,
"learning_rate": 0.0002041244145950498,
"loss": 0.6051,
"step": 105
},
{
"epoch": 0.3151862464183381,
"grad_norm": 0.40100717544555664,
"learning_rate": 0.00020382507709130636,
"loss": 0.6121,
"step": 110
},
{
"epoch": 0.32951289398280803,
"grad_norm": 0.42525944113731384,
"learning_rate": 0.0002035051886734482,
"loss": 0.6111,
"step": 115
},
{
"epoch": 0.3438395415472779,
"grad_norm": 0.4180887043476105,
"learning_rate": 0.00020316482640848823,
"loss": 0.5977,
"step": 120
},
{
"epoch": 0.35816618911174786,
"grad_norm": 0.38851866126060486,
"learning_rate": 0.00020280407229596612,
"loss": 0.6037,
"step": 125
},
{
"epoch": 0.37249283667621774,
"grad_norm": 0.39135101437568665,
"learning_rate": 0.0002024230132481934,
"loss": 0.5931,
"step": 130
},
{
"epoch": 0.3868194842406877,
"grad_norm": 0.3942822813987732,
"learning_rate": 0.00020202174106931448,
"loss": 0.572,
"step": 135
},
{
"epoch": 0.40114613180515757,
"grad_norm": 0.38815903663635254,
"learning_rate": 0.0002016003524331895,
"loss": 0.6004,
"step": 140
},
{
"epoch": 0.4154727793696275,
"grad_norm": 0.3702361285686493,
"learning_rate": 0.00020115894886010366,
"loss": 0.5913,
"step": 145
},
{
"epoch": 0.4297994269340974,
"grad_norm": 0.37867555022239685,
"learning_rate": 0.00020069763669230918,
"loss": 0.5811,
"step": 150
},
{
"epoch": 0.44412607449856734,
"grad_norm": 0.36923325061798096,
"learning_rate": 0.00020021652706840554,
"loss": 0.5953,
"step": 155
},
{
"epoch": 0.4584527220630373,
"grad_norm": 0.3655109703540802,
"learning_rate": 0.00019971573589656414,
"loss": 0.5863,
"step": 160
},
{
"epoch": 0.47277936962750716,
"grad_norm": 0.37171751260757446,
"learning_rate": 0.00019919538382660374,
"loss": 0.5939,
"step": 165
},
{
"epoch": 0.4871060171919771,
"grad_norm": 0.37251758575439453,
"learning_rate": 0.00019865559622092392,
"loss": 0.5895,
"step": 170
},
{
"epoch": 0.501432664756447,
"grad_norm": 0.3676713705062866,
"learning_rate": 0.00019809650312430275,
"loss": 0.5701,
"step": 175
},
{
"epoch": 0.5157593123209169,
"grad_norm": 0.3767331540584564,
"learning_rate": 0.0001975182392325668,
"loss": 0.5874,
"step": 180
},
{
"epoch": 0.5300859598853869,
"grad_norm": 0.3921726942062378,
"learning_rate": 0.00019692094386014036,
"loss": 0.5729,
"step": 185
},
{
"epoch": 0.5444126074498568,
"grad_norm": 0.37424615025520325,
"learning_rate": 0.00019630476090648182,
"loss": 0.5826,
"step": 190
},
{
"epoch": 0.5587392550143266,
"grad_norm": 0.37583500146865845,
"learning_rate": 0.00019566983882141615,
"loss": 0.5687,
"step": 195
},
{
"epoch": 0.5730659025787965,
"grad_norm": 0.38613978028297424,
"learning_rate": 0.00019501633056936998,
"loss": 0.5918,
"step": 200
},
{
"epoch": 0.5873925501432665,
"grad_norm": 0.3772313594818115,
"learning_rate": 0.00019434439359252017,
"loss": 0.5884,
"step": 205
},
{
"epoch": 0.6017191977077364,
"grad_norm": 0.3849615752696991,
"learning_rate": 0.00019365418977286276,
"loss": 0.5598,
"step": 210
},
{
"epoch": 0.6160458452722063,
"grad_norm": 0.3819197118282318,
"learning_rate": 0.0001929458853932128,
"loss": 0.5803,
"step": 215
},
{
"epoch": 0.6303724928366762,
"grad_norm": 0.3772597908973694,
"learning_rate": 0.00019221965109714363,
"loss": 0.5858,
"step": 220
},
{
"epoch": 0.6446991404011462,
"grad_norm": 0.37635743618011475,
"learning_rate": 0.00019147566184787585,
"loss": 0.5827,
"step": 225
},
{
"epoch": 0.6590257879656161,
"grad_norm": 0.3801959455013275,
"learning_rate": 0.00019071409688612524,
"loss": 0.5683,
"step": 230
},
{
"epoch": 0.673352435530086,
"grad_norm": 0.3805936872959137,
"learning_rate": 0.00018993513968692063,
"loss": 0.5771,
"step": 235
},
{
"epoch": 0.6876790830945558,
"grad_norm": 0.3774188160896301,
"learning_rate": 0.00018913897791540107,
"loss": 0.5631,
"step": 240
},
{
"epoch": 0.7020057306590258,
"grad_norm": 0.37649068236351013,
"learning_rate": 0.00018832580338160425,
"loss": 0.5669,
"step": 245
},
{
"epoch": 0.7163323782234957,
"grad_norm": 0.38752481341362,
"learning_rate": 0.00018749581199425556,
"loss": 0.5743,
"step": 250
},
{
"epoch": 0.7306590257879656,
"grad_norm": 0.38894984126091003,
"learning_rate": 0.0001866492037135702,
"loss": 0.579,
"step": 255
},
{
"epoch": 0.7449856733524355,
"grad_norm": 0.36124807596206665,
"learning_rate": 0.00018578618250307912,
"loss": 0.5752,
"step": 260
},
{
"epoch": 0.7593123209169055,
"grad_norm": 0.3647681474685669,
"learning_rate": 0.00018490695628049046,
"loss": 0.573,
"step": 265
},
{
"epoch": 0.7736389684813754,
"grad_norm": 0.38366076350212097,
"learning_rate": 0.0001840117368675982,
"loss": 0.5677,
"step": 270
},
{
"epoch": 0.7879656160458453,
"grad_norm": 0.36573946475982666,
"learning_rate": 0.0001831007399392506,
"loss": 0.5652,
"step": 275
},
{
"epoch": 0.8022922636103151,
"grad_norm": 0.354879230260849,
"learning_rate": 0.00018217418497139,
"loss": 0.5648,
"step": 280
},
{
"epoch": 0.8166189111747851,
"grad_norm": 0.36587202548980713,
"learning_rate": 0.00018123229518817702,
"loss": 0.566,
"step": 285
},
{
"epoch": 0.830945558739255,
"grad_norm": 0.3714900314807892,
"learning_rate": 0.0001802752975082119,
"loss": 0.57,
"step": 290
},
{
"epoch": 0.8452722063037249,
"grad_norm": 0.3702372610569,
"learning_rate": 0.00017930342248986537,
"loss": 0.5569,
"step": 295
},
{
"epoch": 0.8595988538681948,
"grad_norm": 0.36726057529449463,
"learning_rate": 0.00017831690427573326,
"loss": 0.5491,
"step": 300
},
{
"epoch": 0.8739255014326648,
"grad_norm": 0.36911457777023315,
"learning_rate": 0.00017731598053622675,
"loss": 0.5596,
"step": 305
},
{
"epoch": 0.8882521489971347,
"grad_norm": 0.38149625062942505,
"learning_rate": 0.00017630089241231375,
"loss": 0.5736,
"step": 310
},
{
"epoch": 0.9025787965616046,
"grad_norm": 0.3648279011249542,
"learning_rate": 0.00017527188445742308,
"loss": 0.5605,
"step": 315
},
{
"epoch": 0.9169054441260746,
"grad_norm": 0.3748702108860016,
"learning_rate": 0.00017422920457852738,
"loss": 0.5802,
"step": 320
},
{
"epoch": 0.9312320916905444,
"grad_norm": 0.3691900372505188,
"learning_rate": 0.00017317310397641764,
"loss": 0.5431,
"step": 325
},
{
"epoch": 0.9455587392550143,
"grad_norm": 0.3742789328098297,
"learning_rate": 0.0001721038370851842,
"loss": 0.5647,
"step": 330
},
{
"epoch": 0.9598853868194842,
"grad_norm": 0.36382776498794556,
"learning_rate": 0.00017102166151091922,
"loss": 0.5631,
"step": 335
},
{
"epoch": 0.9742120343839542,
"grad_norm": 0.3645402193069458,
"learning_rate": 0.00016992683796965424,
"loss": 0.5671,
"step": 340
},
{
"epoch": 0.9885386819484241,
"grad_norm": 0.3582296669483185,
"learning_rate": 0.000168819630224549,
"loss": 0.564,
"step": 345
},
{
"epoch": 0.997134670487106,
"eval_loss": 0.5661588907241821,
"eval_runtime": 3.0246,
"eval_samples_per_second": 13.886,
"eval_steps_per_second": 13.886,
"step": 348
}
],
"logging_steps": 5,
"max_steps": 1047,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.990390930375967e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}