checkpoint-243/trainer_state.json · Xerv-AI/ReasonBorn-Qwen-3B at main

ReasonBorn-Qwen-3B / checkpoint-243 /trainer_state.json

ReasonBorn-3B-v1.2 – LoRA on GRAD – trained 2026-03-02 17:14

36324d4 verified about 2 months ago

6.36 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 243,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.09917355371900827,
	"grad_norm": 0.05352747067809105,
	"learning_rate": 8.400000000000001e-05,
	"loss": 0.7596395015716553,
	"step": 8
	},
	{
	"epoch": 0.19834710743801653,
	"grad_norm": 0.024916447699069977,
	"learning_rate": 0.00018,
	"loss": 0.6222814321517944,
	"step": 16
	},
	{
	"epoch": 0.2975206611570248,
	"grad_norm": 0.016597295179963112,
	"learning_rate": 0.00017945376095861547,
	"loss": 0.5472516417503357,
	"step": 24
	},
	{
	"epoch": 0.39669421487603307,
	"grad_norm": 0.012275703251361847,
	"learning_rate": 0.00017782167443646923,
	"loss": 0.517052948474884,
	"step": 32
	},
	{
	"epoch": 0.49586776859504134,
	"grad_norm": 0.011337196454405785,
	"learning_rate": 0.00017512355175305713,
	"loss": 0.501731276512146,
	"step": 40
	},
	{
	"epoch": 0.5950413223140496,
	"grad_norm": 0.012592756189405918,
	"learning_rate": 0.000171392144462782,
	"loss": 0.5070059299468994,
	"step": 48
	},
	{
	"epoch": 0.6942148760330579,
	"grad_norm": 0.012089293450117111,
	"learning_rate": 0.00016667274679544943,
	"loss": 0.48694121837615967,
	"step": 56
	},
	{
	"epoch": 0.7933884297520661,
	"grad_norm": 0.01202303171157837,
	"learning_rate": 0.00016102264584567545,
	"loss": 0.4653348922729492,
	"step": 64
	},
	{
	"epoch": 0.8925619834710744,
	"grad_norm": 0.011642170138657093,
	"learning_rate": 0.00015451042618516063,
	"loss": 0.4749022126197815,
	"step": 72
	},
	{
	"epoch": 0.9917355371900827,
	"grad_norm": 0.01265915110707283,
	"learning_rate": 0.00014721513733889716,
	"loss": 0.475362092256546,
	"step": 80
	},
	{
	"epoch": 1.0867768595041323,
	"grad_norm": 0.013994473032653332,
	"learning_rate": 0.00013922533423101844,
	"loss": 0.4846913516521454,
	"step": 88
	},
	{
	"epoch": 1.1859504132231404,
	"grad_norm": 0.015602202154695988,
	"learning_rate": 0.00013063800224798005,
	"loss": 0.4634976089000702,
	"step": 96
	},
	{
	"epoch": 1.2851239669421488,
	"grad_norm": 0.015496148727834225,
	"learning_rate": 0.00012155737996734791,
	"loss": 0.4593273103237152,
	"step": 104
	},
	{
	"epoch": 1.384297520661157,
	"grad_norm": 0.014515814371407032,
	"learning_rate": 0.00011209369384267194,
	"loss": 0.47245365381240845,
	"step": 112
	},
	{
	"epoch": 1.4834710743801653,
	"grad_norm": 0.01587059162557125,
	"learning_rate": 0.00010236182020365675,
	"loss": 0.4527878761291504,
	"step": 120
	},
	{
	"epoch": 1.5826446280991735,
	"grad_norm": 0.016186628490686417,
	"learning_rate": 9.24798908131346e-05,
	"loss": 0.4529184401035309,
	"step": 128
	},
	{
	"epoch": 1.6818181818181817,
	"grad_norm": 0.015668360516428947,
	"learning_rate": 8.25678589074901e-05,
	"loss": 0.45313313603401184,
	"step": 136
	},
	{
	"epoch": 1.78099173553719,
	"grad_norm": 0.0156264491379261,
	"learning_rate": 7.274604312686357e-05,
	"loss": 0.44556480646133423,
	"step": 144
	},
	{
	"epoch": 1.8801652892561984,
	"grad_norm": 0.01666293293237686,
	"learning_rate": 6.313366700984752e-05,
	"loss": 0.44562897086143494,
	"step": 152
	},
	{
	"epoch": 1.9793388429752066,
	"grad_norm": 0.01604226417839527,
	"learning_rate": 5.384741178123277e-05,
	"loss": 0.459658145904541,
	"step": 160
	},
	{
	"epoch": 2.074380165289256,
	"grad_norm": 0.017228346318006516,
	"learning_rate": 4.500000000000002e-05,
	"loss": 0.435981810092926,
	"step": 168
	},
	{
	"epoch": 2.1735537190082646,
	"grad_norm": 0.01715254969894886,
	"learning_rate": 3.669882726015181e-05,
	"loss": 0.4425530731678009,
	"step": 176
	},
	{
	"epoch": 2.2727272727272725,
	"grad_norm": 0.016410550102591515,
	"learning_rate": 2.904465855368332e-05,
	"loss": 0.4361629784107208,
	"step": 184
	},
	{
	"epoch": 2.371900826446281,
	"grad_norm": 0.01654178649187088,
	"learning_rate": 2.213040512007935e-05,
	"loss": 0.4368935823440552,
	"step": 192
	},
	{
	"epoch": 2.4710743801652892,
	"grad_norm": 0.017943061888217926,
	"learning_rate": 1.6039996629727944e-05,
	"loss": 0.44572409987449646,
	"step": 200
	},
	{
	"epoch": 2.5702479338842976,
	"grad_norm": 0.01812249794602394,
	"learning_rate": 1.0847362391415993e-05,
	"loss": 0.4320479929447174,
	"step": 208
	},
	{
	"epoch": 2.669421487603306,
	"grad_norm": 0.017407618463039398,
	"learning_rate": 6.6155339506678416e-06,
	"loss": 0.43307027220726013,
	"step": 216
	},
	{
	"epoch": 2.768595041322314,
	"grad_norm": 0.017422957345843315,
	"learning_rate": 3.395879972167463e-06,
	"loss": 0.4369695782661438,
	"step": 224
	},
	{
	"epoch": 2.8677685950413223,
	"grad_norm": 0.018260862678289413,
	"learning_rate": 1.227482693754991e-06,
	"loss": 0.43731415271759033,
	"step": 232
	},
	{
	"epoch": 2.9669421487603307,
	"grad_norm": 0.018071839585900307,
	"learning_rate": 1.3666352100154435e-07,
	"loss": 0.4352337419986725,
	"step": 240
	}
	],
	"logging_steps": 8,
	"max_steps": 243,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 300,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.997164774211256e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}