faiqwild's picture
Upload folder using huggingface_hub
c9b9fb8 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5784526391901663,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028922631959508314,
"grad_norm": 0.3374865651130676,
"learning_rate": 0.00018,
"loss": 2.0231,
"step": 10
},
{
"epoch": 0.05784526391901663,
"grad_norm": 0.20978349447250366,
"learning_rate": 0.0001905263157894737,
"loss": 1.1149,
"step": 20
},
{
"epoch": 0.08676789587852494,
"grad_norm": 0.11324426531791687,
"learning_rate": 0.00018,
"loss": 0.8403,
"step": 30
},
{
"epoch": 0.11569052783803326,
"grad_norm": 0.10928981751203537,
"learning_rate": 0.00016947368421052633,
"loss": 0.7539,
"step": 40
},
{
"epoch": 0.14461315979754158,
"grad_norm": 0.08820035308599472,
"learning_rate": 0.00015894736842105264,
"loss": 0.7341,
"step": 50
},
{
"epoch": 0.1735357917570499,
"grad_norm": 0.1019802913069725,
"learning_rate": 0.00014842105263157895,
"loss": 0.6881,
"step": 60
},
{
"epoch": 0.2024584237165582,
"grad_norm": 0.10406459867954254,
"learning_rate": 0.00013789473684210527,
"loss": 0.6202,
"step": 70
},
{
"epoch": 0.2313810556760665,
"grad_norm": 0.1173112764954567,
"learning_rate": 0.00012736842105263158,
"loss": 0.6308,
"step": 80
},
{
"epoch": 0.2603036876355748,
"grad_norm": 0.11734358966350555,
"learning_rate": 0.00011684210526315791,
"loss": 0.6048,
"step": 90
},
{
"epoch": 0.28922631959508316,
"grad_norm": 0.13270510733127594,
"learning_rate": 0.00010631578947368421,
"loss": 0.6347,
"step": 100
},
{
"epoch": 0.31814895155459144,
"grad_norm": 0.127024307847023,
"learning_rate": 9.578947368421052e-05,
"loss": 0.5998,
"step": 110
},
{
"epoch": 0.3470715835140998,
"grad_norm": 0.1302337944507599,
"learning_rate": 8.526315789473685e-05,
"loss": 0.5987,
"step": 120
},
{
"epoch": 0.3759942154736081,
"grad_norm": 0.11779110878705978,
"learning_rate": 7.473684210526316e-05,
"loss": 0.5897,
"step": 130
},
{
"epoch": 0.4049168474331164,
"grad_norm": 0.12403657287359238,
"learning_rate": 6.421052631578948e-05,
"loss": 0.5914,
"step": 140
},
{
"epoch": 0.43383947939262474,
"grad_norm": 0.12974581122398376,
"learning_rate": 5.368421052631579e-05,
"loss": 0.603,
"step": 150
},
{
"epoch": 0.462762111352133,
"grad_norm": 0.12593378126621246,
"learning_rate": 4.3157894736842105e-05,
"loss": 0.5707,
"step": 160
},
{
"epoch": 0.49168474331164136,
"grad_norm": 0.1275719851255417,
"learning_rate": 3.2631578947368426e-05,
"loss": 0.546,
"step": 170
},
{
"epoch": 0.5206073752711496,
"grad_norm": 0.1218939870595932,
"learning_rate": 2.2105263157894736e-05,
"loss": 0.5871,
"step": 180
},
{
"epoch": 0.549530007230658,
"grad_norm": 0.10984767973423004,
"learning_rate": 1.1578947368421053e-05,
"loss": 0.5978,
"step": 190
},
{
"epoch": 0.5784526391901663,
"grad_norm": 0.1242329478263855,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.5879,
"step": 200
},
{
"epoch": 0.5784526391901663,
"step": 200,
"total_flos": 4.2983824905968026e+17,
"train_loss": 0.7258491277694702,
"train_runtime": 2490.1271,
"train_samples_per_second": 2.57,
"train_steps_per_second": 0.08
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.2983824905968026e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}