trainer_state.json · jtmaxsoft/OFKMS-Migration-Qwen3.5-9B-SFT at main

OFKMS-Migration-Qwen3.5-9B-SFT / trainer_state.json

Upload bf16 - Migration QLoRA SFT

37b6090 verified 29 days ago

5.22 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 243,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.12422360248447205,
	"grad_norm": 0.45446592569351196,
	"learning_rate": 7.2000000000000005e-06,
	"loss": 2.634676933288574,
	"step": 10
	},
	{
	"epoch": 0.2484472049689441,
	"grad_norm": 0.23934991657733917,
	"learning_rate": 1.5200000000000002e-05,
	"loss": 2.372325325012207,
	"step": 20
	},
	{
	"epoch": 0.37267080745341613,
	"grad_norm": 0.13367633521556854,
	"learning_rate": 1.9983390502829168e-05,
	"loss": 2.020255470275879,
	"step": 30
	},
	{
	"epoch": 0.4968944099378882,
	"grad_norm": 0.16358281672000885,
	"learning_rate": 1.9797166732215078e-05,
	"loss": 1.7821327209472657,
	"step": 40
	},
	{
	"epoch": 0.6211180124223602,
	"grad_norm": 0.1420416533946991,
	"learning_rate": 1.940783098998643e-05,
	"loss": 1.5877119064331056,
	"step": 50
	},
	{
	"epoch": 0.7453416149068323,
	"grad_norm": 0.1504518836736679,
	"learning_rate": 1.8823454869940243e-05,
	"loss": 1.4695100784301758,
	"step": 60
	},
	{
	"epoch": 0.8695652173913043,
	"grad_norm": 0.14376819133758545,
	"learning_rate": 1.8056153485471167e-05,
	"loss": 1.289743995666504,
	"step": 70
	},
	{
	"epoch": 0.9937888198757764,
	"grad_norm": 0.11340057104825974,
	"learning_rate": 1.712183430261319e-05,
	"loss": 1.292655849456787,
	"step": 80
	},
	{
	"epoch": 1.1118012422360248,
	"grad_norm": 0.12464317679405212,
	"learning_rate": 1.6039867351144778e-05,
	"loss": 1.1228573799133301,
	"step": 90
	},
	{
	"epoch": 1.236024844720497,
	"grad_norm": 0.15469282865524292,
	"learning_rate": 1.483268365084351e-05,
	"loss": 1.075217342376709,
	"step": 100
	},
	{
	"epoch": 1.360248447204969,
	"grad_norm": 0.2216130495071411,
	"learning_rate": 1.3525310178198707e-05,
	"loss": 1.010305690765381,
	"step": 110
	},
	{
	"epoch": 1.484472049689441,
	"grad_norm": 0.1635051667690277,
	"learning_rate": 1.2144851014515055e-05,
	"loss": 1.0296391487121581,
	"step": 120
	},
	{
	"epoch": 1.608695652173913,
	"grad_norm": 0.1360178142786026,
	"learning_rate": 1.0719925432091671e-05,
	"loss": 0.9926811218261719,
	"step": 130
	},
	{
	"epoch": 1.7329192546583851,
	"grad_norm": 0.1589968353509903,
	"learning_rate": 9.28007456790833e-06,
	"loss": 0.9096580505371094,
	"step": 140
	},
	{
	"epoch": 1.8571428571428572,
	"grad_norm": 0.25687724351882935,
	"learning_rate": 7.855148985484946e-06,
	"loss": 0.9168188095092773,
	"step": 150
	},
	{
	"epoch": 1.981366459627329,
	"grad_norm": 0.2077445685863495,
	"learning_rate": 6.474689821801295e-06,
	"loss": 0.8668004035949707,
	"step": 160
	},
	{
	"epoch": 2.099378881987578,
	"grad_norm": 0.20112568140029907,
	"learning_rate": 5.167316349156495e-06,
	"loss": 0.813880729675293,
	"step": 170
	},
	{
	"epoch": 2.2236024844720497,
	"grad_norm": 0.18293920159339905,
	"learning_rate": 3.960132648855226e-06,
	"loss": 0.8557339668273926,
	"step": 180
	},
	{
	"epoch": 2.3478260869565215,
	"grad_norm": 0.19826386868953705,
	"learning_rate": 2.878165697386812e-06,
	"loss": 0.8404932022094727,
	"step": 190
	},
	{
	"epoch": 2.472049689440994,
	"grad_norm": 0.26104530692100525,
	"learning_rate": 1.9438465145288377e-06,
	"loss": 0.7638469696044922,
	"step": 200
	},
	{
	"epoch": 2.596273291925466,
	"grad_norm": 0.1945190578699112,
	"learning_rate": 1.1765451300597574e-06,
	"loss": 0.8175761222839355,
	"step": 210
	},
	{
	"epoch": 2.720496894409938,
	"grad_norm": 0.21797508001327515,
	"learning_rate": 5.921690100135713e-07,
	"loss": 0.7640025615692139,
	"step": 220
	},
	{
	"epoch": 2.8447204968944098,
	"grad_norm": 0.17856864631175995,
	"learning_rate": 2.028332677849254e-07,
	"loss": 0.7627779960632324,
	"step": 230
	},
	{
	"epoch": 2.968944099378882,
	"grad_norm": 0.19549702107906342,
	"learning_rate": 1.6609497170834154e-08,
	"loss": 0.7115848541259766,
	"step": 240
	}
	],
	"logging_steps": 10,
	"max_steps": 243,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.6764376918235546e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}