| { | |
| "best_metric": 2.2568883895874023, | |
| "best_model_checkpoint": "tiny_bert_rand_50_v1_stsb/checkpoint-23", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 138, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.64505672454834, | |
| "learning_rate": 4.9e-05, | |
| "loss": 3.4713, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_combined_score": 0.11346061932108288, | |
| "eval_loss": 2.2568883895874023, | |
| "eval_pearson": 0.11480426487286079, | |
| "eval_runtime": 0.6984, | |
| "eval_samples_per_second": 2147.658, | |
| "eval_spearmanr": 0.11211697376930498, | |
| "eval_steps_per_second": 8.591, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.127440452575684, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.0275, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_combined_score": 0.08747432980447115, | |
| "eval_loss": 2.6621973514556885, | |
| "eval_pearson": 0.09797904028841499, | |
| "eval_runtime": 0.6495, | |
| "eval_samples_per_second": 2309.362, | |
| "eval_spearmanr": 0.0769696193205273, | |
| "eval_steps_per_second": 9.237, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 6.796934604644775, | |
| "learning_rate": 4.7e-05, | |
| "loss": 1.8912, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_combined_score": 0.1679648923583315, | |
| "eval_loss": 2.4156179428100586, | |
| "eval_pearson": 0.1744431045329717, | |
| "eval_runtime": 0.5709, | |
| "eval_samples_per_second": 2627.537, | |
| "eval_spearmanr": 0.16148668018369133, | |
| "eval_steps_per_second": 10.51, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 7.0087785720825195, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 1.6966, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_combined_score": 0.211070610209586, | |
| "eval_loss": 2.5946474075317383, | |
| "eval_pearson": 0.21027160965287675, | |
| "eval_runtime": 0.5525, | |
| "eval_samples_per_second": 2714.697, | |
| "eval_spearmanr": 0.2118696107662953, | |
| "eval_steps_per_second": 10.859, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 17.69513702392578, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.4665, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_combined_score": 0.231848067216258, | |
| "eval_loss": 2.562957286834717, | |
| "eval_pearson": 0.22792827229288598, | |
| "eval_runtime": 0.5572, | |
| "eval_samples_per_second": 2692.232, | |
| "eval_spearmanr": 0.23576786213963, | |
| "eval_steps_per_second": 10.769, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 6.968521595001221, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 1.2375, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_combined_score": 0.259776621258329, | |
| "eval_loss": 2.49996018409729, | |
| "eval_pearson": 0.2583030902504291, | |
| "eval_runtime": 0.5756, | |
| "eval_samples_per_second": 2605.763, | |
| "eval_spearmanr": 0.26125015226622894, | |
| "eval_steps_per_second": 10.423, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 138, | |
| "total_flos": 904528923227136.0, | |
| "train_loss": 1.9650979664014734, | |
| "train_runtime": 31.6691, | |
| "train_samples_per_second": 9076.676, | |
| "train_steps_per_second": 36.313 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 904528923227136.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |