lowhipa-large-asc / trainer_state.json
jshrdt's picture
Upload folder using huggingface_hub
47386b9 verified
{
"best_metric": 0.15153372287750244,
"best_model_checkpoint": "../models/b_ll_asc1k-2/checkpoint-378",
"epoch": 5.1,
"eval_steps": 126,
"global_step": 378,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015873015873015873,
"grad_norm": 1.7664669752120972,
"learning_rate": 1e-05,
"loss": 2.9008,
"step": 1
},
{
"epoch": 0.1,
"grad_norm": 1.1034066677093506,
"learning_rate": 0.00063,
"loss": 1.2295,
"step": 63
},
{
"epoch": 1.1,
"grad_norm": 0.6373379230499268,
"learning_rate": 0.0009509433962264152,
"loss": 0.2402,
"step": 126
},
{
"epoch": 1.1,
"eval_loss": 0.20612144470214844,
"eval_runtime": 25.198,
"eval_samples_per_second": 7.223,
"eval_steps_per_second": 0.913,
"step": 126
},
{
"epoch": 2.1,
"grad_norm": 0.5278385281562805,
"learning_rate": 0.0008320754716981132,
"loss": 0.1486,
"step": 189
},
{
"epoch": 3.1,
"grad_norm": 0.562412440776825,
"learning_rate": 0.0007132075471698113,
"loss": 0.1,
"step": 252
},
{
"epoch": 3.1,
"eval_loss": 0.1705094277858734,
"eval_runtime": 24.6952,
"eval_samples_per_second": 7.37,
"eval_steps_per_second": 0.931,
"step": 252
},
{
"epoch": 4.1,
"grad_norm": 0.3925608992576599,
"learning_rate": 0.0005943396226415095,
"loss": 0.0655,
"step": 315
},
{
"epoch": 5.1,
"grad_norm": 0.39186909794807434,
"learning_rate": 0.0004754716981132076,
"loss": 0.0411,
"step": 378
},
{
"epoch": 5.1,
"eval_loss": 0.15153372287750244,
"eval_runtime": 24.8181,
"eval_samples_per_second": 7.333,
"eval_steps_per_second": 0.927,
"step": 378
}
],
"logging_steps": 63,
"max_steps": 630,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 126,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.28296230912e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}