your-model-name / checkpoint-1467 /trainer_state.json
rememz's picture
Upload folder using huggingface_hub
19f66fb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 1467,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10224948875255624,
"grad_norm": 0.22070330381393433,
"learning_rate": 9.999694946400538e-05,
"loss": 1.2022,
"step": 50
},
{
"epoch": 0.20449897750511248,
"grad_norm": 0.32263442873954773,
"learning_rate": 9.963133532962538e-05,
"loss": 1.0028,
"step": 100
},
{
"epoch": 0.3067484662576687,
"grad_norm": 0.2867143452167511,
"learning_rate": 9.866072190997923e-05,
"loss": 0.9375,
"step": 150
},
{
"epoch": 0.40899795501022496,
"grad_norm": 0.2979172170162201,
"learning_rate": 9.709694085177272e-05,
"loss": 0.9275,
"step": 200
},
{
"epoch": 0.5112474437627812,
"grad_norm": 0.3326389789581299,
"learning_rate": 9.495905443524156e-05,
"loss": 0.9066,
"step": 250
},
{
"epoch": 0.6134969325153374,
"grad_norm": 0.3445422649383545,
"learning_rate": 9.227312320752585e-05,
"loss": 0.9226,
"step": 300
},
{
"epoch": 0.7157464212678937,
"grad_norm": 0.3284899890422821,
"learning_rate": 8.907188830811434e-05,
"loss": 0.8938,
"step": 350
},
{
"epoch": 0.8179959100204499,
"grad_norm": 0.3464341461658478,
"learning_rate": 8.539437235876908e-05,
"loss": 0.9039,
"step": 400
},
{
"epoch": 0.9202453987730062,
"grad_norm": 0.35458436608314514,
"learning_rate": 8.1285403783028e-05,
"loss": 0.888,
"step": 450
},
{
"epoch": 1.0224948875255624,
"grad_norm": 0.3531360626220703,
"learning_rate": 7.679507035376672e-05,
"loss": 0.8834,
"step": 500
},
{
"epoch": 1.1247443762781186,
"grad_norm": 0.38273200392723083,
"learning_rate": 7.197810863000116e-05,
"loss": 0.8308,
"step": 550
},
{
"epoch": 1.2269938650306749,
"grad_norm": 0.3767881691455841,
"learning_rate": 6.689323672561398e-05,
"loss": 0.8212,
"step": 600
},
{
"epoch": 1.329243353783231,
"grad_norm": 0.4009556472301483,
"learning_rate": 6.160243854346398e-05,
"loss": 0.8223,
"step": 650
},
{
"epoch": 1.4314928425357873,
"grad_norm": 0.430261492729187,
"learning_rate": 5.617020819996831e-05,
"loss": 0.8233,
"step": 700
},
{
"epoch": 1.5337423312883436,
"grad_norm": 0.39679399132728577,
"learning_rate": 5.0662763850519936e-05,
"loss": 0.8224,
"step": 750
},
{
"epoch": 1.6359918200408998,
"grad_norm": 0.4061990976333618,
"learning_rate": 4.514724049910228e-05,
"loss": 0.8018,
"step": 800
},
{
"epoch": 1.738241308793456,
"grad_norm": 0.4112933874130249,
"learning_rate": 3.969087163164348e-05,
"loss": 0.8233,
"step": 850
},
{
"epoch": 1.8404907975460123,
"grad_norm": 0.41034209728240967,
"learning_rate": 3.436016964888865e-05,
"loss": 0.8079,
"step": 900
},
{
"epoch": 1.9427402862985685,
"grad_norm": 0.4243488907814026,
"learning_rate": 2.922011508920362e-05,
"loss": 0.8134,
"step": 950
},
{
"epoch": 2.044989775051125,
"grad_norm": 0.432036817073822,
"learning_rate": 2.433336452457431e-05,
"loss": 0.763,
"step": 1000
},
{
"epoch": 2.044989775051125,
"eval_loss": 0.8031564950942993,
"eval_runtime": 238.4795,
"eval_samples_per_second": 0.923,
"eval_steps_per_second": 0.923,
"step": 1000
},
{
"epoch": 2.147239263803681,
"grad_norm": 0.422557532787323,
"learning_rate": 1.975948678544301e-05,
"loss": 0.7697,
"step": 1050
},
{
"epoch": 2.2494887525562373,
"grad_norm": 0.44817760586738586,
"learning_rate": 1.5554236824697687e-05,
"loss": 0.7843,
"step": 1100
},
{
"epoch": 2.3517382413087935,
"grad_norm": 0.4573202133178711,
"learning_rate": 1.176887607231434e-05,
"loss": 0.7643,
"step": 1150
},
{
"epoch": 2.4539877300613497,
"grad_norm": 0.4587993919849396,
"learning_rate": 8.449547565437887e-06,
"loss": 0.7501,
"step": 1200
},
{
"epoch": 2.556237218813906,
"grad_norm": 0.4792107045650482,
"learning_rate": 5.6367134709813644e-06,
"loss": 0.763,
"step": 1250
},
{
"epoch": 2.658486707566462,
"grad_norm": 0.46617749333381653,
"learning_rate": 3.364661857267265e-06,
"loss": 0.7504,
"step": 1300
},
{
"epoch": 2.7607361963190185,
"grad_norm": 0.45423439145088196,
"learning_rate": 1.6610887270981423e-06,
"loss": 0.7539,
"step": 1350
},
{
"epoch": 2.8629856850715747,
"grad_norm": 0.4944806694984436,
"learning_rate": 5.467604072171062e-07,
"loss": 0.7662,
"step": 1400
},
{
"epoch": 2.965235173824131,
"grad_norm": 0.45094653964042664,
"learning_rate": 3.526040958494181e-08,
"loss": 0.7672,
"step": 1450
}
],
"logging_steps": 50,
"max_steps": 1467,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.441743417518326e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}