Rubywong123's picture
Upload folder using huggingface_hub
fbdcce9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9820971867007673,
"eval_steps": 500,
"global_step": 64,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.030690537084398978,
"grad_norm": 0.13874784250562586,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.5217,
"step": 1
},
{
"epoch": 0.1534526854219949,
"grad_norm": 0.08867673869818674,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.5081,
"step": 5
},
{
"epoch": 0.3069053708439898,
"grad_norm": 0.1708878126934411,
"learning_rate": 9.931806517013612e-06,
"loss": 0.4752,
"step": 10
},
{
"epoch": 0.46035805626598464,
"grad_norm": 0.0963904508895781,
"learning_rate": 9.521785803487888e-06,
"loss": 0.4432,
"step": 15
},
{
"epoch": 0.6138107416879796,
"grad_norm": 0.06982228815595948,
"learning_rate": 8.770533048884483e-06,
"loss": 0.4573,
"step": 20
},
{
"epoch": 0.7672634271099744,
"grad_norm": 0.07015950515433515,
"learning_rate": 7.734740790612137e-06,
"loss": 0.4213,
"step": 25
},
{
"epoch": 0.9207161125319693,
"grad_norm": 0.054896941467642804,
"learning_rate": 6.492574055008474e-06,
"loss": 0.4237,
"step": 30
},
{
"epoch": 0.9820971867007673,
"eval_loss": 0.4264456629753113,
"eval_runtime": 28.9203,
"eval_samples_per_second": 18.845,
"eval_steps_per_second": 4.737,
"step": 32
},
{
"epoch": 1.092071611253197,
"grad_norm": 0.06382244755020999,
"learning_rate": 5.137771711840811e-06,
"loss": 0.4519,
"step": 35
},
{
"epoch": 1.2455242966751918,
"grad_norm": 0.056628063212756805,
"learning_rate": 3.7725725642960047e-06,
"loss": 0.3904,
"step": 40
},
{
"epoch": 1.3989769820971867,
"grad_norm": 0.0505084664996252,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.3524,
"step": 45
},
{
"epoch": 1.5524296675191815,
"grad_norm": 0.05364523140110051,
"learning_rate": 1.4160874341577447e-06,
"loss": 0.3426,
"step": 50
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.054002137212089386,
"learning_rate": 6.026312439675553e-07,
"loss": 0.3682,
"step": 55
},
{
"epoch": 1.8593350383631715,
"grad_norm": 0.052807283780708696,
"learning_rate": 1.210180868628219e-07,
"loss": 0.3482,
"step": 60
},
{
"epoch": 1.9820971867007673,
"eval_loss": 0.4236310124397278,
"eval_runtime": 28.6054,
"eval_samples_per_second": 19.052,
"eval_steps_per_second": 4.789,
"step": 64
},
{
"epoch": 1.9820971867007673,
"step": 64,
"total_flos": 1.6374020737440154e+17,
"train_loss": 0.41061520762741566,
"train_runtime": 932.5034,
"train_samples_per_second": 3.354,
"train_steps_per_second": 0.069
}
],
"logging_steps": 5,
"max_steps": 64,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6374020737440154e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}