Checkpoints / checkpoint-200 /trainer_state.json
KrafterDen's picture
Training in progress, step 200, checkpoint
5d4cae6 verified
raw
history blame
2.79 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4922698253980463,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.9999999999999997e-05,
"loss": 3.623,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 5.9999999999999995e-05,
"loss": 3.605,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 8.999999999999999e-05,
"loss": 3.5168,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 0.00011999999999999999,
"loss": 3.3511,
"step": 40
},
{
"epoch": 0.12,
"learning_rate": 0.00015,
"loss": 3.1952,
"step": 50
},
{
"epoch": 0.15,
"learning_rate": 0.00017999999999999998,
"loss": 3.0575,
"step": 60
},
{
"epoch": 0.17,
"learning_rate": 0.00020999999999999998,
"loss": 2.8575,
"step": 70
},
{
"epoch": 0.2,
"learning_rate": 0.00023999999999999998,
"loss": 2.7029,
"step": 80
},
{
"epoch": 0.22,
"learning_rate": 0.00027,
"loss": 2.6392,
"step": 90
},
{
"epoch": 0.25,
"learning_rate": 0.0003,
"loss": 2.5679,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 0.00027,
"loss": 2.5392,
"step": 110
},
{
"epoch": 0.3,
"learning_rate": 0.00023999999999999998,
"loss": 2.5153,
"step": 120
},
{
"epoch": 0.32,
"learning_rate": 0.00020999999999999998,
"loss": 2.4822,
"step": 130
},
{
"epoch": 0.34,
"learning_rate": 0.00017999999999999998,
"loss": 2.4678,
"step": 140
},
{
"epoch": 0.37,
"learning_rate": 0.00015,
"loss": 2.4438,
"step": 150
},
{
"epoch": 0.39,
"learning_rate": 0.00011999999999999999,
"loss": 2.4351,
"step": 160
},
{
"epoch": 0.42,
"learning_rate": 8.999999999999999e-05,
"loss": 2.4147,
"step": 170
},
{
"epoch": 0.44,
"learning_rate": 5.9999999999999995e-05,
"loss": 2.3935,
"step": 180
},
{
"epoch": 0.47,
"learning_rate": 2.9999999999999997e-05,
"loss": 2.4104,
"step": 190
},
{
"epoch": 0.49,
"learning_rate": 0.0,
"loss": 2.3838,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 2.723845771535155e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}