shulijia's picture
Training in progress, step 135, checkpoint
0c8dc17 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 135,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22535211267605634,
"grad_norm": 2.90373158454895,
"learning_rate": 6.4285714285714295e-06,
"loss": 0.8135,
"mean_token_accuracy": 0.3537426620721817,
"num_tokens": 81920.0,
"step": 10
},
{
"epoch": 0.4507042253521127,
"grad_norm": 2.4647960662841797,
"learning_rate": 9.586776859504134e-06,
"loss": 0.5292,
"mean_token_accuracy": 0.4071795493364334,
"num_tokens": 163840.0,
"step": 20
},
{
"epoch": 0.676056338028169,
"grad_norm": 2.641406297683716,
"learning_rate": 8.760330578512397e-06,
"loss": 0.5129,
"mean_token_accuracy": 0.41124021336436273,
"num_tokens": 245760.0,
"step": 30
},
{
"epoch": 0.9014084507042254,
"grad_norm": 2.4985663890838623,
"learning_rate": 7.933884297520661e-06,
"loss": 0.5123,
"mean_token_accuracy": 0.40974804051220415,
"num_tokens": 327680.0,
"step": 40
},
{
"epoch": 1.1126760563380282,
"grad_norm": 2.1662192344665527,
"learning_rate": 7.107438016528926e-06,
"loss": 0.4303,
"mean_token_accuracy": 0.4234572724501292,
"num_tokens": 404480.0,
"step": 50
},
{
"epoch": 1.3380281690140845,
"grad_norm": 2.3111164569854736,
"learning_rate": 6.280991735537191e-06,
"loss": 0.4318,
"mean_token_accuracy": 0.42314089871942995,
"num_tokens": 486400.0,
"step": 60
},
{
"epoch": 1.563380281690141,
"grad_norm": 2.6036767959594727,
"learning_rate": 5.4545454545454545e-06,
"loss": 0.4583,
"mean_token_accuracy": 0.41168052703142166,
"num_tokens": 568320.0,
"step": 70
},
{
"epoch": 1.788732394366197,
"grad_norm": 2.100604772567749,
"learning_rate": 4.62809917355372e-06,
"loss": 0.4329,
"mean_token_accuracy": 0.4169642850756645,
"num_tokens": 650240.0,
"step": 80
},
{
"epoch": 2.0,
"grad_norm": 4.51124382019043,
"learning_rate": 3.801652892561984e-06,
"loss": 0.4272,
"mean_token_accuracy": 0.4280365296204885,
"num_tokens": 727040.0,
"step": 90
},
{
"epoch": 2.2253521126760565,
"grad_norm": 2.14109468460083,
"learning_rate": 2.9752066115702483e-06,
"loss": 0.3818,
"step": 100
},
{
"epoch": 2.2253521126760565,
"eval_loss": 0.9899753332138062,
"eval_mean_token_accuracy": 0.7401255607604981,
"eval_num_tokens": 808960.0,
"eval_runtime": 2.7529,
"eval_samples_per_second": 28.697,
"eval_steps_per_second": 1.816,
"step": 100
},
{
"epoch": 2.4507042253521125,
"grad_norm": 2.369915723800659,
"learning_rate": 2.1487603305785124e-06,
"loss": 0.3967,
"mean_token_accuracy": 0.4355430521070957,
"num_tokens": 890880.0,
"step": 110
},
{
"epoch": 2.676056338028169,
"grad_norm": 2.0789310932159424,
"learning_rate": 1.322314049586777e-06,
"loss": 0.3508,
"mean_token_accuracy": 0.44587817750871184,
"num_tokens": 972800.0,
"step": 120
},
{
"epoch": 2.9014084507042255,
"grad_norm": 2.4127914905548096,
"learning_rate": 4.958677685950413e-07,
"loss": 0.3784,
"mean_token_accuracy": 0.42572162225842475,
"num_tokens": 1054720.0,
"step": 130
}
],
"logging_steps": 10,
"max_steps": 135,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2882137132892160.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}