saepark's picture
Upload folder using huggingface_hub
2e17716 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1,
"global_step": 17,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.058823529411764705,
"grad_norm": 27.238576889038086,
"learning_rate": 5e-06,
"loss": 2.4516,
"step": 1
},
{
"epoch": 0.11764705882352941,
"grad_norm": 13.931468963623047,
"learning_rate": 4.957432749209755e-06,
"loss": 2.069,
"step": 2
},
{
"epoch": 0.17647058823529413,
"grad_norm": 10.875990867614746,
"learning_rate": 4.83118057351089e-06,
"loss": 1.7016,
"step": 3
},
{
"epoch": 0.23529411764705882,
"grad_norm": 9.849050521850586,
"learning_rate": 4.625542839324036e-06,
"loss": 1.4704,
"step": 4
},
{
"epoch": 0.29411764705882354,
"grad_norm": 8.515420913696289,
"learning_rate": 4.3475222930516484e-06,
"loss": 1.2682,
"step": 5
},
{
"epoch": 0.35294117647058826,
"grad_norm": 7.926646709442139,
"learning_rate": 4.006586590948141e-06,
"loss": 1.0279,
"step": 6
},
{
"epoch": 0.4117647058823529,
"grad_norm": 6.248785495758057,
"learning_rate": 3.6143458894413463e-06,
"loss": 1.0175,
"step": 7
},
{
"epoch": 0.47058823529411764,
"grad_norm": 6.138355255126953,
"learning_rate": 3.184157475180208e-06,
"loss": 0.9081,
"step": 8
},
{
"epoch": 0.47058823529411764,
"eval_loss": 1.0566511154174805,
"eval_runtime": 3.7727,
"eval_samples_per_second": 0.795,
"eval_steps_per_second": 0.265,
"step": 8
},
{
"checkpoint_runtime": 60.7829
},
{
"epoch": 0.5294117647058824,
"grad_norm": 2.969385862350464,
"learning_rate": 2.730670898658255e-06,
"loss": 1.1001,
"step": 9
},
{
"epoch": 0.5294117647058824,
"eval_loss": 1.030696988105774,
"eval_runtime": 3.7741,
"eval_samples_per_second": 0.795,
"eval_steps_per_second": 0.265,
"step": 9
},
{
"checkpoint_runtime": 62.4096
},
{
"epoch": 0.5882352941176471,
"grad_norm": 2.176196813583374,
"learning_rate": 2.269329101341745e-06,
"loss": 0.9383,
"step": 10
},
{
"epoch": 0.5882352941176471,
"eval_loss": 1.0097538232803345,
"eval_runtime": 3.7773,
"eval_samples_per_second": 0.794,
"eval_steps_per_second": 0.265,
"step": 10
},
{
"checkpoint_runtime": 65.596
},
{
"epoch": 0.6470588235294118,
"grad_norm": 1.8730698823928833,
"learning_rate": 1.8158425248197931e-06,
"loss": 0.908,
"step": 11
},
{
"epoch": 0.6470588235294118,
"eval_loss": 0.9935200810432434,
"eval_runtime": 3.7904,
"eval_samples_per_second": 0.791,
"eval_steps_per_second": 0.264,
"step": 11
},
{
"checkpoint_runtime": 71.8184
},
{
"epoch": 0.7058823529411765,
"grad_norm": 1.5293439626693726,
"learning_rate": 1.3856541105586545e-06,
"loss": 0.8389,
"step": 12
},
{
"epoch": 0.7058823529411765,
"eval_loss": 0.9816959500312805,
"eval_runtime": 3.7837,
"eval_samples_per_second": 0.793,
"eval_steps_per_second": 0.264,
"step": 12
},
{
"checkpoint_runtime": 71.1631
},
{
"epoch": 0.7647058823529411,
"grad_norm": 1.3013556003570557,
"learning_rate": 9.934134090518593e-07,
"loss": 0.7742,
"step": 13
},
{
"epoch": 0.7647058823529411,
"eval_loss": 0.9741225838661194,
"eval_runtime": 3.7962,
"eval_samples_per_second": 0.79,
"eval_steps_per_second": 0.263,
"step": 13
},
{
"checkpoint_runtime": 73.8347
},
{
"epoch": 0.8235294117647058,
"grad_norm": 1.3270924091339111,
"learning_rate": 6.524777069483526e-07,
"loss": 0.8797,
"step": 14
},
{
"epoch": 0.8235294117647058,
"eval_loss": 0.9687883853912354,
"eval_runtime": 4.1861,
"eval_samples_per_second": 0.717,
"eval_steps_per_second": 0.239,
"step": 14
},
{
"checkpoint_runtime": 76.1419
},
{
"epoch": 0.8823529411764706,
"grad_norm": 1.2872895002365112,
"learning_rate": 3.7445716067596506e-07,
"loss": 0.8989,
"step": 15
},
{
"epoch": 0.8823529411764706,
"eval_loss": 0.9653826355934143,
"eval_runtime": 5.1398,
"eval_samples_per_second": 0.584,
"eval_steps_per_second": 0.195,
"step": 15
},
{
"checkpoint_runtime": 77.0676
},
{
"epoch": 0.9411764705882353,
"grad_norm": 1.2387118339538574,
"learning_rate": 1.6881942648911077e-07,
"loss": 0.8853,
"step": 16
},
{
"epoch": 0.9411764705882353,
"eval_loss": 0.9634800553321838,
"eval_runtime": 4.8803,
"eval_samples_per_second": 0.615,
"eval_steps_per_second": 0.205,
"step": 16
},
{
"checkpoint_runtime": 79.8227
},
{
"epoch": 1.0,
"grad_norm": 1.144616961479187,
"learning_rate": 4.256725079024554e-08,
"loss": 0.802,
"step": 17
},
{
"epoch": 1.0,
"eval_loss": 0.9631189703941345,
"eval_runtime": 4.9196,
"eval_samples_per_second": 0.61,
"eval_steps_per_second": 0.203,
"step": 17
}
],
"logging_steps": 1.0,
"max_steps": 17,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0006846020937318e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}