{ "best_global_step": 92, "best_metric": 2.239424228668213, "best_model_checkpoint": "bert_base_km_5_v1_stsb/checkpoint-92", "epoch": 9.0, "eval_steps": 500, "global_step": 207, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.2634968757629395, "learning_rate": 4.9e-05, "loss": 2.6517, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.12675379829831443, "eval_loss": 2.325453996658325, "eval_pearson": 0.1330184139107724, "eval_runtime": 0.9532, "eval_samples_per_second": 1573.642, "eval_spearmanr": 0.12048918268585644, "eval_steps_per_second": 6.295, "step": 23 }, { "epoch": 2.0, "grad_norm": 6.955192565917969, "learning_rate": 4.8e-05, "loss": 1.9658, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.2335786038892586, "eval_loss": 2.3460726737976074, "eval_pearson": 0.23856360343594119, "eval_runtime": 0.9829, "eval_samples_per_second": 1526.133, "eval_spearmanr": 0.22859360434257603, "eval_steps_per_second": 6.105, "step": 46 }, { "epoch": 3.0, "grad_norm": 9.839750289916992, "learning_rate": 4.7e-05, "loss": 1.6725, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.2386789341364155, "eval_loss": 2.437448024749756, "eval_pearson": 0.24243548982978144, "eval_runtime": 0.9837, "eval_samples_per_second": 1524.783, "eval_spearmanr": 0.23492237844304958, "eval_steps_per_second": 6.099, "step": 69 }, { "epoch": 4.0, "grad_norm": 16.305805206298828, "learning_rate": 4.600000000000001e-05, "loss": 1.2633, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.32212437726050713, "eval_loss": 2.239424228668213, "eval_pearson": 0.3226858166168006, "eval_runtime": 0.9602, "eval_samples_per_second": 1562.241, "eval_spearmanr": 0.3215629379042137, "eval_steps_per_second": 6.249, "step": 92 }, { "epoch": 5.0, "grad_norm": 11.919197082519531, "learning_rate": 4.5e-05, "loss": 0.877, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.31245569201685475, "eval_loss": 2.488182783126831, "eval_pearson": 0.3102391372649197, "eval_runtime": 0.9527, "eval_samples_per_second": 1574.471, "eval_spearmanr": 0.31467224676878985, "eval_steps_per_second": 6.298, "step": 115 }, { "epoch": 6.0, "grad_norm": 9.296478271484375, "learning_rate": 4.4000000000000006e-05, "loss": 0.6898, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.3107272405919173, "eval_loss": 2.4794673919677734, "eval_pearson": 0.3146900459752465, "eval_runtime": 0.9695, "eval_samples_per_second": 1547.22, "eval_spearmanr": 0.3067644352085881, "eval_steps_per_second": 6.189, "step": 138 }, { "epoch": 7.0, "grad_norm": 7.156578540802002, "learning_rate": 4.3e-05, "loss": 0.4742, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.2962889521612072, "eval_loss": 2.667475700378418, "eval_pearson": 0.2998348435139836, "eval_runtime": 1.004, "eval_samples_per_second": 1494.084, "eval_spearmanr": 0.2927430608084309, "eval_steps_per_second": 5.976, "step": 161 }, { "epoch": 8.0, "grad_norm": 6.3387346267700195, "learning_rate": 4.2e-05, "loss": 0.3676, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.36138029385170867, "eval_loss": 2.336973190307617, "eval_pearson": 0.3624171116259935, "eval_runtime": 0.9576, "eval_samples_per_second": 1566.335, "eval_spearmanr": 0.3603434760774238, "eval_steps_per_second": 6.265, "step": 184 }, { "epoch": 9.0, "grad_norm": 10.670768737792969, "learning_rate": 4.1e-05, "loss": 0.3107, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.3364466316742135, "eval_loss": 2.4862287044525146, "eval_pearson": 0.33953987332588476, "eval_runtime": 0.9553, "eval_samples_per_second": 1570.153, "eval_spearmanr": 0.3333533900225422, "eval_steps_per_second": 6.281, "step": 207 }, { "epoch": 9.0, "step": 207, "total_flos": 6806753442049536.0, "train_loss": 1.1414002612017202, "train_runtime": 111.1051, "train_samples_per_second": 2587.189, "train_steps_per_second": 10.351 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6806753442049536.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }