lora-chatglm2-6b-guodegang / trainer_state.json
machenchi
haha
80580fc
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 0.0004991232148123761,
"loss": 4.3712,
"step": 20
},
{
"epoch": 0.16,
"learning_rate": 0.0004964990092676262,
"loss": 3.7266,
"step": 40
},
{
"epoch": 0.24,
"learning_rate": 0.0004921457902821578,
"loss": 3.5415,
"step": 60
},
{
"epoch": 0.32,
"learning_rate": 0.0004860940925593703,
"loss": 3.362,
"step": 80
},
{
"epoch": 0.4,
"learning_rate": 0.0004783863644106502,
"loss": 2.9872,
"step": 100
},
{
"epoch": 0.48,
"learning_rate": 0.0004690766700109659,
"loss": 2.848,
"step": 120
},
{
"epoch": 0.56,
"learning_rate": 0.00045823031017752484,
"loss": 2.4691,
"step": 140
},
{
"epoch": 0.64,
"learning_rate": 0.00044592336433145995,
"loss": 2.4321,
"step": 160
},
{
"epoch": 0.72,
"learning_rate": 0.00043224215685535287,
"loss": 2.1654,
"step": 180
},
{
"epoch": 0.8,
"learning_rate": 0.0004172826515897146,
"loss": 1.8459,
"step": 200
},
{
"epoch": 0.88,
"learning_rate": 0.00040114977871559375,
"loss": 1.7236,
"step": 220
},
{
"epoch": 0.96,
"learning_rate": 0.00038395669874474915,
"loss": 1.42,
"step": 240
},
{
"epoch": 1.04,
"learning_rate": 0.00036582400877996547,
"loss": 1.2365,
"step": 260
},
{
"epoch": 1.12,
"learning_rate": 0.00034687889661302575,
"loss": 0.8967,
"step": 280
},
{
"epoch": 1.2,
"learning_rate": 0.00032725424859373687,
"loss": 0.8539,
"step": 300
},
{
"epoch": 1.28,
"learning_rate": 0.00030708771752766396,
"loss": 0.8967,
"step": 320
},
{
"epoch": 1.36,
"learning_rate": 0.00028652075714060294,
"loss": 0.6522,
"step": 340
},
{
"epoch": 1.44,
"learning_rate": 0.0002656976298823284,
"loss": 0.6262,
"step": 360
},
{
"epoch": 1.52,
"learning_rate": 0.00024476439502916077,
"loss": 0.6353,
"step": 380
},
{
"epoch": 1.6,
"learning_rate": 0.00022386788418308668,
"loss": 0.4705,
"step": 400
},
{
"epoch": 1.68,
"learning_rate": 0.0002031546713535688,
"loss": 0.4861,
"step": 420
},
{
"epoch": 1.76,
"learning_rate": 0.00018277004484618358,
"loss": 0.4526,
"step": 440
},
{
"epoch": 1.84,
"learning_rate": 0.00016285698816954626,
"loss": 0.3909,
"step": 460
},
{
"epoch": 1.92,
"learning_rate": 0.00014355517710873183,
"loss": 0.3611,
"step": 480
},
{
"epoch": 2.0,
"learning_rate": 0.00012500000000000006,
"loss": 0.423,
"step": 500
},
{
"epoch": 2.08,
"learning_rate": 0.0001073216080788921,
"loss": 0.2621,
"step": 520
},
{
"epoch": 2.16,
"learning_rate": 9.064400256282756e-05,
"loss": 0.2386,
"step": 540
},
{
"epoch": 2.24,
"learning_rate": 7.508416487165862e-05,
"loss": 0.2187,
"step": 560
},
{
"epoch": 2.32,
"learning_rate": 6.075123608706093e-05,
"loss": 0.2298,
"step": 580
},
{
"epoch": 2.4,
"learning_rate": 4.7745751406263163e-05,
"loss": 0.21,
"step": 600
},
{
"epoch": 2.48,
"learning_rate": 3.615893495987335e-05,
"loss": 0.1537,
"step": 620
},
{
"epoch": 2.56,
"learning_rate": 2.6072059940146776e-05,
"loss": 0.2061,
"step": 640
},
{
"epoch": 2.64,
"learning_rate": 1.7555878527937163e-05,
"loss": 0.1574,
"step": 660
},
{
"epoch": 2.72,
"learning_rate": 1.067012561698319e-05,
"loss": 0.2123,
"step": 680
},
{
"epoch": 2.8,
"learning_rate": 5.463099816548578e-06,
"loss": 0.2079,
"step": 700
},
{
"epoch": 2.88,
"learning_rate": 1.9713246713805587e-06,
"loss": 0.1966,
"step": 720
},
{
"epoch": 2.96,
"learning_rate": 2.1929247528540418e-07,
"loss": 0.1839,
"step": 740
},
{
"epoch": 3.0,
"step": 750,
"total_flos": 4.158994380698419e+16,
"train_loss": 1.1695097732543944,
"train_runtime": 966.7484,
"train_samples_per_second": 3.103,
"train_steps_per_second": 0.776
}
],
"logging_steps": 20,
"max_steps": 750,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 4.158994380698419e+16,
"trial_name": null,
"trial_params": null
}