Incomple's picture
End of training
a513610 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9996261682242991,
"eval_steps": 134,
"global_step": 1337,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05009345794392523,
"grad_norm": 2.942253589630127,
"learning_rate": 5e-07,
"loss": 0.4451,
"step": 67
},
{
"epoch": 0.10018691588785046,
"grad_norm": 6.863779067993164,
"learning_rate": 1e-06,
"loss": 0.427,
"step": 134
},
{
"epoch": 0.1502803738317757,
"grad_norm": 2.3883817195892334,
"learning_rate": 9.443059019118869e-07,
"loss": 0.3916,
"step": 201
},
{
"epoch": 0.20037383177570092,
"grad_norm": 4.304512977600098,
"learning_rate": 8.886118038237738e-07,
"loss": 0.3516,
"step": 268
},
{
"epoch": 0.2504672897196262,
"grad_norm": 2.8164753913879395,
"learning_rate": 8.329177057356608e-07,
"loss": 0.2772,
"step": 335
},
{
"epoch": 0.3005607476635514,
"grad_norm": 1.764276146888733,
"learning_rate": 7.772236076475478e-07,
"loss": 0.2265,
"step": 402
},
{
"epoch": 0.3506542056074766,
"grad_norm": 2.509056806564331,
"learning_rate": 7.215295095594347e-07,
"loss": 0.2223,
"step": 469
},
{
"epoch": 0.40074766355140184,
"grad_norm": 1.2209707498550415,
"learning_rate": 6.658354114713217e-07,
"loss": 0.1904,
"step": 536
},
{
"epoch": 0.4508411214953271,
"grad_norm": 1.6530442237854004,
"learning_rate": 6.101413133832086e-07,
"loss": 0.1972,
"step": 603
},
{
"epoch": 0.5009345794392523,
"grad_norm": 2.5448479652404785,
"learning_rate": 5.544472152950955e-07,
"loss": 0.1901,
"step": 670
},
{
"epoch": 0.5510280373831775,
"grad_norm": 2.0424203872680664,
"learning_rate": 4.987531172069825e-07,
"loss": 0.1809,
"step": 737
},
{
"epoch": 0.6011214953271028,
"grad_norm": 2.789212226867676,
"learning_rate": 4.4305901911886947e-07,
"loss": 0.1667,
"step": 804
},
{
"epoch": 0.6512149532710281,
"grad_norm": 2.038717269897461,
"learning_rate": 3.873649210307564e-07,
"loss": 0.1713,
"step": 871
},
{
"epoch": 0.7013084112149532,
"grad_norm": 1.706325888633728,
"learning_rate": 3.3167082294264335e-07,
"loss": 0.1628,
"step": 938
},
{
"epoch": 0.7514018691588785,
"grad_norm": 1.2791858911514282,
"learning_rate": 2.7597672485453034e-07,
"loss": 0.1411,
"step": 1005
},
{
"epoch": 0.8014953271028037,
"grad_norm": 2.504589557647705,
"learning_rate": 2.2028262676641728e-07,
"loss": 0.1502,
"step": 1072
},
{
"epoch": 0.851588785046729,
"grad_norm": 2.502255439758301,
"learning_rate": 1.6458852867830422e-07,
"loss": 0.1568,
"step": 1139
},
{
"epoch": 0.9016822429906542,
"grad_norm": 1.224613904953003,
"learning_rate": 1.0889443059019118e-07,
"loss": 0.134,
"step": 1206
},
{
"epoch": 0.9517757009345794,
"grad_norm": 3.104508876800537,
"learning_rate": 5.320033250207814e-08,
"loss": 0.1404,
"step": 1273
},
{
"epoch": 0.9996261682242991,
"step": 1337,
"total_flos": 9.176656366338048e+16,
"train_loss": 0.2238639939963684,
"train_runtime": 1988.5984,
"train_samples_per_second": 5.381,
"train_steps_per_second": 0.672
}
],
"logging_steps": 67,
"max_steps": 1337,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.176656366338048e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}