freelancer-projects-1k-traces / trainer_state.json
mnpham's picture
End of training
cc79849 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.32,
"grad_norm": 3.0088891059134757,
"learning_rate": 2e-05,
"loss": 0.7923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04150344803929329,
"step": 5,
"valid_targets_mean": 4567.2,
"valid_targets_min": 2907
},
{
"epoch": 0.64,
"grad_norm": 0.9748835611922116,
"learning_rate": 3.998096443163716e-05,
"loss": 0.6915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037823863327503204,
"step": 10,
"valid_targets_mean": 4694.8,
"valid_targets_min": 1740
},
{
"epoch": 0.96,
"grad_norm": 0.5292027668233105,
"learning_rate": 3.931851652578137e-05,
"loss": 0.6339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03384627401828766,
"step": 15,
"valid_targets_mean": 6716.5,
"valid_targets_min": 3779
},
{
"epoch": 1.256,
"grad_norm": 0.381874909525484,
"learning_rate": 3.774021666356444e-05,
"loss": 0.5937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0336514413356781,
"step": 20,
"valid_targets_mean": 4429.5,
"valid_targets_min": 1506
},
{
"epoch": 1.576,
"grad_norm": 0.3191036829485198,
"learning_rate": 3.532088886237956e-05,
"loss": 0.5791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03018685057759285,
"step": 25,
"valid_targets_mean": 6942.8,
"valid_targets_min": 5230
},
{
"epoch": 1.896,
"grad_norm": 0.2936828471213373,
"learning_rate": 3.217522858017442e-05,
"loss": 0.5804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02674776315689087,
"step": 30,
"valid_targets_mean": 3804.5,
"valid_targets_min": 1421
},
{
"epoch": 2.192,
"grad_norm": 0.25770700064193613,
"learning_rate": 2.8452365234813992e-05,
"loss": 0.569,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03195125609636307,
"step": 35,
"valid_targets_mean": 4945.2,
"valid_targets_min": 929
},
{
"epoch": 2.512,
"grad_norm": 0.2547784828064005,
"learning_rate": 2.4328792278762058e-05,
"loss": 0.5312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030446790158748627,
"step": 40,
"valid_targets_mean": 5575.0,
"valid_targets_min": 3849
},
{
"epoch": 2.832,
"grad_norm": 0.25899697668472715,
"learning_rate": 2e-05,
"loss": 0.5487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03602331131696701,
"step": 45,
"valid_targets_mean": 5869.8,
"valid_targets_min": 3374
},
{
"epoch": 3.128,
"grad_norm": 0.22099844272575836,
"learning_rate": 1.5671207721237945e-05,
"loss": 0.5284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04063715040683746,
"step": 50,
"valid_targets_mean": 8575.2,
"valid_targets_min": 5225
},
{
"epoch": 3.448,
"grad_norm": 0.2275701442675398,
"learning_rate": 1.1547634765186016e-05,
"loss": 0.5323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02942601963877678,
"step": 55,
"valid_targets_mean": 6379.2,
"valid_targets_min": 1888
},
{
"epoch": 3.768,
"grad_norm": 0.2236212833364483,
"learning_rate": 7.824771419825588e-06,
"loss": 0.5299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.034093089401721954,
"step": 60,
"valid_targets_mean": 5950.8,
"valid_targets_min": 4866
},
{
"epoch": 4.064,
"grad_norm": 0.2179261363674421,
"learning_rate": 4.679111137620442e-06,
"loss": 0.5101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027605395764112473,
"step": 65,
"valid_targets_mean": 5062.5,
"valid_targets_min": 4995
},
{
"epoch": 4.384,
"grad_norm": 0.23062712401292512,
"learning_rate": 2.259783336435566e-06,
"loss": 0.5329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0404040664434433,
"step": 70,
"valid_targets_mean": 6195.2,
"valid_targets_min": 5413
},
{
"epoch": 4.704,
"grad_norm": 0.2295496459564114,
"learning_rate": 6.814834742186361e-07,
"loss": 0.5152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02953474223613739,
"step": 75,
"valid_targets_mean": 5352.2,
"valid_targets_min": 4831
},
{
"epoch": 5.0,
"grad_norm": 0.2811848755096405,
"learning_rate": 1.9035568362844037e-08,
"loss": 0.509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06422175467014313,
"step": 80,
"valid_targets_mean": 8112.2,
"valid_targets_min": 6354
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06422175467014313,
"step": 80,
"total_flos": 2.548584738193408e+17,
"train_loss": 0.5735973387956619,
"train_runtime": 10563.2448,
"train_samples_per_second": 0.473,
"train_steps_per_second": 0.008,
"valid_targets_mean": 8112.2,
"valid_targets_min": 6354
}
],
"logging_steps": 5,
"max_steps": 80,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.548584738193408e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}