Rubywong123's picture
Upload folder using huggingface_hub
03afec7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9917355371900827,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.024793388429752067,
"grad_norm": 0.07427436731705139,
"learning_rate": 1.25e-06,
"loss": 0.4198,
"step": 1
},
{
"epoch": 0.12396694214876033,
"grad_norm": 0.06996875359362274,
"learning_rate": 6.25e-06,
"loss": 0.4294,
"step": 5
},
{
"epoch": 0.24793388429752067,
"grad_norm": 0.07459353999937281,
"learning_rate": 9.980973490458728e-06,
"loss": 0.3886,
"step": 10
},
{
"epoch": 0.371900826446281,
"grad_norm": 0.08674231581833301,
"learning_rate": 9.768584753741134e-06,
"loss": 0.4137,
"step": 15
},
{
"epoch": 0.49586776859504134,
"grad_norm": 0.08495439793697043,
"learning_rate": 9.330127018922195e-06,
"loss": 0.4006,
"step": 20
},
{
"epoch": 0.6198347107438017,
"grad_norm": 0.06829258686322341,
"learning_rate": 8.68638668405062e-06,
"loss": 0.3411,
"step": 25
},
{
"epoch": 0.743801652892562,
"grad_norm": 0.07146602412363501,
"learning_rate": 7.86788218175523e-06,
"loss": 0.3623,
"step": 30
},
{
"epoch": 0.8677685950413223,
"grad_norm": 0.0684124113756464,
"learning_rate": 6.913417161825449e-06,
"loss": 0.3255,
"step": 35
},
{
"epoch": 0.9917355371900827,
"grad_norm": 0.08832822341085747,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.3393,
"step": 40
},
{
"epoch": 0.9917355371900827,
"eval_loss": 0.432477205991745,
"eval_runtime": 28.4008,
"eval_samples_per_second": 19.33,
"eval_steps_per_second": 4.859,
"step": 40
},
{
"epoch": 1.1239669421487604,
"grad_norm": 0.07536649029047712,
"learning_rate": 4.781903063173321e-06,
"loss": 0.3563,
"step": 45
},
{
"epoch": 1.2479338842975207,
"grad_norm": 0.10590347165075693,
"learning_rate": 3.705904774487396e-06,
"loss": 0.2761,
"step": 50
},
{
"epoch": 1.3719008264462809,
"grad_norm": 0.0905785145173113,
"learning_rate": 2.6912569338248317e-06,
"loss": 0.2485,
"step": 55
},
{
"epoch": 1.4958677685950414,
"grad_norm": 0.09878550778765635,
"learning_rate": 1.7860619515673034e-06,
"loss": 0.2721,
"step": 60
},
{
"epoch": 1.6198347107438016,
"grad_norm": 0.10195708282293185,
"learning_rate": 1.0332332985438248e-06,
"loss": 0.2466,
"step": 65
},
{
"epoch": 1.743801652892562,
"grad_norm": 0.0922397167738883,
"learning_rate": 4.6846106481675035e-07,
"loss": 0.2357,
"step": 70
},
{
"epoch": 1.8677685950413223,
"grad_norm": 0.09959951208433296,
"learning_rate": 1.185199644003332e-07,
"loss": 0.228,
"step": 75
},
{
"epoch": 1.9917355371900827,
"grad_norm": 0.10210514342941603,
"learning_rate": 0.0,
"loss": 0.2399,
"step": 80
},
{
"epoch": 1.9917355371900827,
"eval_loss": 0.44621971249580383,
"eval_runtime": 28.3617,
"eval_samples_per_second": 19.357,
"eval_steps_per_second": 4.866,
"step": 80
},
{
"epoch": 1.9917355371900827,
"step": 80,
"total_flos": 1.979713786478592e+17,
"train_loss": 0.3188592839986086,
"train_runtime": 1045.198,
"train_samples_per_second": 3.703,
"train_steps_per_second": 0.077
}
],
"logging_steps": 5,
"max_steps": 80,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.979713786478592e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}