bert_base_rand_10_v1_stsb / trainer_state.json
Hartunka's picture
End of training
91d6585 verified
{
"best_global_step": 69,
"best_metric": 2.342188835144043,
"best_model_checkpoint": "bert_base_rand_10_v1_stsb/checkpoint-69",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 184,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.409469127655029,
"learning_rate": 4.9e-05,
"loss": 2.5501,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.11660962062209998,
"eval_loss": 2.5206260681152344,
"eval_pearson": 0.12466393002920396,
"eval_runtime": 0.9623,
"eval_samples_per_second": 1558.766,
"eval_spearmanr": 0.10855531121499598,
"eval_steps_per_second": 6.235,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 7.4389872550964355,
"learning_rate": 4.8e-05,
"loss": 1.9123,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.15291538722011827,
"eval_loss": 2.3921282291412354,
"eval_pearson": 0.16244375887475837,
"eval_runtime": 0.9815,
"eval_samples_per_second": 1528.239,
"eval_spearmanr": 0.1433870155654782,
"eval_steps_per_second": 6.113,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 7.024801254272461,
"learning_rate": 4.7e-05,
"loss": 1.6606,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.22688924377276753,
"eval_loss": 2.342188835144043,
"eval_pearson": 0.22998859981501937,
"eval_runtime": 0.9655,
"eval_samples_per_second": 1553.543,
"eval_spearmanr": 0.2237898877305157,
"eval_steps_per_second": 6.214,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 8.618351936340332,
"learning_rate": 4.600000000000001e-05,
"loss": 1.2907,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.26417223938248946,
"eval_loss": 2.5929768085479736,
"eval_pearson": 0.2616769184082946,
"eval_runtime": 0.9533,
"eval_samples_per_second": 1573.558,
"eval_spearmanr": 0.2666675603566844,
"eval_steps_per_second": 6.294,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 9.85341739654541,
"learning_rate": 4.5e-05,
"loss": 0.9783,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.28284175884292595,
"eval_loss": 2.470913887023926,
"eval_pearson": 0.28538506851887113,
"eval_runtime": 0.9872,
"eval_samples_per_second": 1519.39,
"eval_spearmanr": 0.28029844916698077,
"eval_steps_per_second": 6.078,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 9.521453857421875,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.7673,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.3047270812690138,
"eval_loss": 2.468660593032837,
"eval_pearson": 0.3073468779629231,
"eval_runtime": 0.9776,
"eval_samples_per_second": 1534.407,
"eval_spearmanr": 0.3021072845751046,
"eval_steps_per_second": 6.138,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 14.15671443939209,
"learning_rate": 4.3e-05,
"loss": 0.5922,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.3050835905547613,
"eval_loss": 2.4916512966156006,
"eval_pearson": 0.30686700146863255,
"eval_runtime": 0.9563,
"eval_samples_per_second": 1568.561,
"eval_spearmanr": 0.30330017964089,
"eval_steps_per_second": 6.274,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 5.233458518981934,
"learning_rate": 4.2e-05,
"loss": 0.4832,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.29110873134498483,
"eval_loss": 2.7526607513427734,
"eval_pearson": 0.2930574079479448,
"eval_runtime": 1.0031,
"eval_samples_per_second": 1495.347,
"eval_spearmanr": 0.2891600547420248,
"eval_steps_per_second": 5.981,
"step": 184
},
{
"epoch": 8.0,
"step": 184,
"total_flos": 6050447504044032.0,
"train_loss": 1.2793446831081225,
"train_runtime": 98.3058,
"train_samples_per_second": 2924.039,
"train_steps_per_second": 11.698
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6050447504044032.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}