Sailor_chatbot / last-checkpoint /trainer_state.json
HiTruong's picture
Training in progress, step 510, checkpoint
0b7e97b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.299820466786354,
"eval_steps": 500,
"global_step": 510,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.9299820466786355,
"grad_norm": 1.410841941833496,
"learning_rate": 4.917503719406088e-05,
"loss": 2.132,
"step": 51
},
{
"epoch": 5.859964093357271,
"grad_norm": 1.4861546754837036,
"learning_rate": 4.593820100105355e-05,
"loss": 1.5508,
"step": 102
},
{
"epoch": 8.789946140035907,
"grad_norm": 1.5175491571426392,
"learning_rate": 4.056969451220282e-05,
"loss": 1.0115,
"step": 153
},
{
"epoch": 11.719928186714542,
"grad_norm": 1.5818439722061157,
"learning_rate": 3.3616073079362926e-05,
"loss": 0.5528,
"step": 204
},
{
"epoch": 14.649910233393177,
"grad_norm": 1.2990632057189941,
"learning_rate": 2.578526897695321e-05,
"loss": 0.2567,
"step": 255
},
{
"epoch": 17.579892280071814,
"grad_norm": 0.6602067351341248,
"learning_rate": 1.7874518438250597e-05,
"loss": 0.1087,
"step": 306
},
{
"epoch": 20.50987432675045,
"grad_norm": 0.31337055563926697,
"learning_rate": 1.0689196860135234e-05,
"loss": 0.0477,
"step": 357
},
{
"epoch": 23.439856373429084,
"grad_norm": 0.3955017030239105,
"learning_rate": 4.960825378228082e-06,
"loss": 0.0299,
"step": 408
},
{
"epoch": 26.36983842010772,
"grad_norm": 0.2580316364765167,
"learning_rate": 1.2725963752426379e-06,
"loss": 0.0258,
"step": 459
},
{
"epoch": 29.299820466786354,
"grad_norm": 0.2776833474636078,
"learning_rate": 0.0,
"loss": 0.0242,
"step": 510
}
],
"logging_steps": 51,
"max_steps": 510,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 4,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.4841996401198694e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}