| { | |
| "best_metric": 2.276757001876831, | |
| "best_model_checkpoint": "tiny_bert_rand_100_v1_stsb/checkpoint-115", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 230, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 4.961604595184326, | |
| "learning_rate": 4.9e-05, | |
| "loss": 3.4483, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_combined_score": 0.15705636987694221, | |
| "eval_loss": 2.314561128616333, | |
| "eval_pearson": 0.16774334792257545, | |
| "eval_runtime": 0.5294, | |
| "eval_samples_per_second": 2833.424, | |
| "eval_spearmanr": 0.14636939183130895, | |
| "eval_steps_per_second": 11.334, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.913020610809326, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.0255, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_combined_score": 0.1126483992024648, | |
| "eval_loss": 2.5450494289398193, | |
| "eval_pearson": 0.11683712415863884, | |
| "eval_runtime": 0.5808, | |
| "eval_samples_per_second": 2582.785, | |
| "eval_spearmanr": 0.10845967424629079, | |
| "eval_steps_per_second": 10.331, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 7.117321491241455, | |
| "learning_rate": 4.7e-05, | |
| "loss": 1.8523, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_combined_score": 0.21422209468092734, | |
| "eval_loss": 2.3148019313812256, | |
| "eval_pearson": 0.2201949812893677, | |
| "eval_runtime": 0.6512, | |
| "eval_samples_per_second": 2303.455, | |
| "eval_spearmanr": 0.20824920807248695, | |
| "eval_steps_per_second": 9.214, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 6.040245532989502, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 1.6156, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_combined_score": 0.2691090690581144, | |
| "eval_loss": 2.3426806926727295, | |
| "eval_pearson": 0.27034978222621336, | |
| "eval_runtime": 0.548, | |
| "eval_samples_per_second": 2737.269, | |
| "eval_spearmanr": 0.26786835589001545, | |
| "eval_steps_per_second": 10.949, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 9.559501647949219, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.3454, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_combined_score": 0.2780937290708646, | |
| "eval_loss": 2.276757001876831, | |
| "eval_pearson": 0.2798131537425998, | |
| "eval_runtime": 0.5306, | |
| "eval_samples_per_second": 2827.107, | |
| "eval_spearmanr": 0.2763743043991294, | |
| "eval_steps_per_second": 11.308, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 7.285312652587891, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 1.1616, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_combined_score": 0.273431489839639, | |
| "eval_loss": 2.6383583545684814, | |
| "eval_pearson": 0.26860800836532117, | |
| "eval_runtime": 0.5919, | |
| "eval_samples_per_second": 2534.01, | |
| "eval_spearmanr": 0.2782549713139568, | |
| "eval_steps_per_second": 10.136, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 10.173330307006836, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.9734, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_combined_score": 0.2831437139838975, | |
| "eval_loss": 2.477155923843384, | |
| "eval_pearson": 0.28229446668730734, | |
| "eval_runtime": 0.5315, | |
| "eval_samples_per_second": 2822.41, | |
| "eval_spearmanr": 0.2839929612804877, | |
| "eval_steps_per_second": 11.29, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 9.35107707977295, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.8406, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_combined_score": 0.24872085472419414, | |
| "eval_loss": 2.8825998306274414, | |
| "eval_pearson": 0.2434565004022157, | |
| "eval_runtime": 0.5698, | |
| "eval_samples_per_second": 2632.351, | |
| "eval_spearmanr": 0.25398520904617256, | |
| "eval_steps_per_second": 10.529, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 17.66458511352539, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.7077, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_combined_score": 0.24927091603402246, | |
| "eval_loss": 2.909149169921875, | |
| "eval_pearson": 0.2461040064609805, | |
| "eval_runtime": 0.5293, | |
| "eval_samples_per_second": 2834.011, | |
| "eval_spearmanr": 0.25243782560706446, | |
| "eval_steps_per_second": 11.336, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 11.106776237487793, | |
| "learning_rate": 4e-05, | |
| "loss": 0.6149, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_combined_score": 0.2685087682776196, | |
| "eval_loss": 2.823472023010254, | |
| "eval_pearson": 0.26516905911909416, | |
| "eval_runtime": 0.5621, | |
| "eval_samples_per_second": 2668.438, | |
| "eval_spearmanr": 0.2718484774361451, | |
| "eval_steps_per_second": 10.674, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 230, | |
| "total_flos": 1507548205378560.0, | |
| "train_loss": 1.4585458755493164, | |
| "train_runtime": 51.1618, | |
| "train_samples_per_second": 5618.449, | |
| "train_steps_per_second": 22.478 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 1507548205378560.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |