ReasonBorn-Qwen-3B / checkpoint-243 /trainer_state.json
Phase-Technologies's picture
ReasonBorn-3B-v1.2 – LoRA on GRAD – trained 2026-03-02 17:14
36324d4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 243,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09917355371900827,
"grad_norm": 0.05352747067809105,
"learning_rate": 8.400000000000001e-05,
"loss": 0.7596395015716553,
"step": 8
},
{
"epoch": 0.19834710743801653,
"grad_norm": 0.024916447699069977,
"learning_rate": 0.00018,
"loss": 0.6222814321517944,
"step": 16
},
{
"epoch": 0.2975206611570248,
"grad_norm": 0.016597295179963112,
"learning_rate": 0.00017945376095861547,
"loss": 0.5472516417503357,
"step": 24
},
{
"epoch": 0.39669421487603307,
"grad_norm": 0.012275703251361847,
"learning_rate": 0.00017782167443646923,
"loss": 0.517052948474884,
"step": 32
},
{
"epoch": 0.49586776859504134,
"grad_norm": 0.011337196454405785,
"learning_rate": 0.00017512355175305713,
"loss": 0.501731276512146,
"step": 40
},
{
"epoch": 0.5950413223140496,
"grad_norm": 0.012592756189405918,
"learning_rate": 0.000171392144462782,
"loss": 0.5070059299468994,
"step": 48
},
{
"epoch": 0.6942148760330579,
"grad_norm": 0.012089293450117111,
"learning_rate": 0.00016667274679544943,
"loss": 0.48694121837615967,
"step": 56
},
{
"epoch": 0.7933884297520661,
"grad_norm": 0.01202303171157837,
"learning_rate": 0.00016102264584567545,
"loss": 0.4653348922729492,
"step": 64
},
{
"epoch": 0.8925619834710744,
"grad_norm": 0.011642170138657093,
"learning_rate": 0.00015451042618516063,
"loss": 0.4749022126197815,
"step": 72
},
{
"epoch": 0.9917355371900827,
"grad_norm": 0.01265915110707283,
"learning_rate": 0.00014721513733889716,
"loss": 0.475362092256546,
"step": 80
},
{
"epoch": 1.0867768595041323,
"grad_norm": 0.013994473032653332,
"learning_rate": 0.00013922533423101844,
"loss": 0.4846913516521454,
"step": 88
},
{
"epoch": 1.1859504132231404,
"grad_norm": 0.015602202154695988,
"learning_rate": 0.00013063800224798005,
"loss": 0.4634976089000702,
"step": 96
},
{
"epoch": 1.2851239669421488,
"grad_norm": 0.015496148727834225,
"learning_rate": 0.00012155737996734791,
"loss": 0.4593273103237152,
"step": 104
},
{
"epoch": 1.384297520661157,
"grad_norm": 0.014515814371407032,
"learning_rate": 0.00011209369384267194,
"loss": 0.47245365381240845,
"step": 112
},
{
"epoch": 1.4834710743801653,
"grad_norm": 0.01587059162557125,
"learning_rate": 0.00010236182020365675,
"loss": 0.4527878761291504,
"step": 120
},
{
"epoch": 1.5826446280991735,
"grad_norm": 0.016186628490686417,
"learning_rate": 9.24798908131346e-05,
"loss": 0.4529184401035309,
"step": 128
},
{
"epoch": 1.6818181818181817,
"grad_norm": 0.015668360516428947,
"learning_rate": 8.25678589074901e-05,
"loss": 0.45313313603401184,
"step": 136
},
{
"epoch": 1.78099173553719,
"grad_norm": 0.0156264491379261,
"learning_rate": 7.274604312686357e-05,
"loss": 0.44556480646133423,
"step": 144
},
{
"epoch": 1.8801652892561984,
"grad_norm": 0.01666293293237686,
"learning_rate": 6.313366700984752e-05,
"loss": 0.44562897086143494,
"step": 152
},
{
"epoch": 1.9793388429752066,
"grad_norm": 0.01604226417839527,
"learning_rate": 5.384741178123277e-05,
"loss": 0.459658145904541,
"step": 160
},
{
"epoch": 2.074380165289256,
"grad_norm": 0.017228346318006516,
"learning_rate": 4.500000000000002e-05,
"loss": 0.435981810092926,
"step": 168
},
{
"epoch": 2.1735537190082646,
"grad_norm": 0.01715254969894886,
"learning_rate": 3.669882726015181e-05,
"loss": 0.4425530731678009,
"step": 176
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.016410550102591515,
"learning_rate": 2.904465855368332e-05,
"loss": 0.4361629784107208,
"step": 184
},
{
"epoch": 2.371900826446281,
"grad_norm": 0.01654178649187088,
"learning_rate": 2.213040512007935e-05,
"loss": 0.4368935823440552,
"step": 192
},
{
"epoch": 2.4710743801652892,
"grad_norm": 0.017943061888217926,
"learning_rate": 1.6039996629727944e-05,
"loss": 0.44572409987449646,
"step": 200
},
{
"epoch": 2.5702479338842976,
"grad_norm": 0.01812249794602394,
"learning_rate": 1.0847362391415993e-05,
"loss": 0.4320479929447174,
"step": 208
},
{
"epoch": 2.669421487603306,
"grad_norm": 0.017407618463039398,
"learning_rate": 6.6155339506678416e-06,
"loss": 0.43307027220726013,
"step": 216
},
{
"epoch": 2.768595041322314,
"grad_norm": 0.017422957345843315,
"learning_rate": 3.395879972167463e-06,
"loss": 0.4369695782661438,
"step": 224
},
{
"epoch": 2.8677685950413223,
"grad_norm": 0.018260862678289413,
"learning_rate": 1.227482693754991e-06,
"loss": 0.43731415271759033,
"step": 232
},
{
"epoch": 2.9669421487603307,
"grad_norm": 0.018071839585900307,
"learning_rate": 1.3666352100154435e-07,
"loss": 0.4352337419986725,
"step": 240
}
],
"logging_steps": 8,
"max_steps": 243,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.997164774211256e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}