{ "best_global_step": 46, "best_metric": 2.265411138534546, "best_model_checkpoint": "bert_base_km_10_v2_stsb/checkpoint-46", "epoch": 7.0, "eval_steps": 500, "global_step": 161, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.086790084838867, "learning_rate": 4.9e-05, "loss": 2.711, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.10658543423072875, "eval_loss": 2.354522943496704, "eval_pearson": 0.11120670304289051, "eval_runtime": 0.9923, "eval_samples_per_second": 1511.625, "eval_spearmanr": 0.10196416541856697, "eval_steps_per_second": 6.046, "step": 23 }, { "epoch": 2.0, "grad_norm": 6.981199741363525, "learning_rate": 4.8e-05, "loss": 1.9988, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.2009889425479498, "eval_loss": 2.265411138534546, "eval_pearson": 0.2097104961123283, "eval_runtime": 0.9811, "eval_samples_per_second": 1528.952, "eval_spearmanr": 0.19226738898357132, "eval_steps_per_second": 6.116, "step": 46 }, { "epoch": 3.0, "grad_norm": 4.845885276794434, "learning_rate": 4.7e-05, "loss": 1.8048, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.22149173965356517, "eval_loss": 2.294152021408081, "eval_pearson": 0.2305564079119015, "eval_runtime": 1.0134, "eval_samples_per_second": 1480.098, "eval_spearmanr": 0.21242707139522884, "eval_steps_per_second": 5.92, "step": 69 }, { "epoch": 4.0, "grad_norm": 8.093894004821777, "learning_rate": 4.600000000000001e-05, "loss": 1.5374, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.2617844143516973, "eval_loss": 2.547459602355957, "eval_pearson": 0.2699579327432999, "eval_runtime": 0.9482, "eval_samples_per_second": 1581.894, "eval_spearmanr": 0.25361089596009473, "eval_steps_per_second": 6.328, "step": 92 }, { "epoch": 5.0, "grad_norm": 7.534348964691162, "learning_rate": 4.5e-05, "loss": 1.2573, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.2667936066986546, "eval_loss": 2.6120412349700928, "eval_pearson": 0.26958414545814036, "eval_runtime": 0.9704, "eval_samples_per_second": 1545.769, "eval_spearmanr": 0.26400306793916883, "eval_steps_per_second": 6.183, "step": 115 }, { "epoch": 6.0, "grad_norm": 8.248147964477539, "learning_rate": 4.4000000000000006e-05, "loss": 0.9617, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.2914851323109282, "eval_loss": 2.569234609603882, "eval_pearson": 0.29488939547637344, "eval_runtime": 0.9467, "eval_samples_per_second": 1584.49, "eval_spearmanr": 0.28808086914548303, "eval_steps_per_second": 6.338, "step": 138 }, { "epoch": 7.0, "grad_norm": 12.66100025177002, "learning_rate": 4.3e-05, "loss": 0.7474, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.30780744811664074, "eval_loss": 2.6657228469848633, "eval_pearson": 0.3060453241906782, "eval_runtime": 0.975, "eval_samples_per_second": 1538.411, "eval_spearmanr": 0.3095695720426032, "eval_steps_per_second": 6.154, "step": 161 }, { "epoch": 7.0, "step": 161, "total_flos": 5294141566038528.0, "train_loss": 1.574070948251286, "train_runtime": 86.1378, "train_samples_per_second": 3337.095, "train_steps_per_second": 13.351 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5294141566038528.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }