{ "best_metric": 2.276757001876831, "best_model_checkpoint": "tiny_bert_rand_100_v1_stsb/checkpoint-115", "epoch": 10.0, "eval_steps": 500, "global_step": 230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.961604595184326, "learning_rate": 4.9e-05, "loss": 3.4483, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.15705636987694221, "eval_loss": 2.314561128616333, "eval_pearson": 0.16774334792257545, "eval_runtime": 0.5294, "eval_samples_per_second": 2833.424, "eval_spearmanr": 0.14636939183130895, "eval_steps_per_second": 11.334, "step": 23 }, { "epoch": 2.0, "grad_norm": 6.913020610809326, "learning_rate": 4.8e-05, "loss": 2.0255, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.1126483992024648, "eval_loss": 2.5450494289398193, "eval_pearson": 0.11683712415863884, "eval_runtime": 0.5808, "eval_samples_per_second": 2582.785, "eval_spearmanr": 0.10845967424629079, "eval_steps_per_second": 10.331, "step": 46 }, { "epoch": 3.0, "grad_norm": 7.117321491241455, "learning_rate": 4.7e-05, "loss": 1.8523, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.21422209468092734, "eval_loss": 2.3148019313812256, "eval_pearson": 0.2201949812893677, "eval_runtime": 0.6512, "eval_samples_per_second": 2303.455, "eval_spearmanr": 0.20824920807248695, "eval_steps_per_second": 9.214, "step": 69 }, { "epoch": 4.0, "grad_norm": 6.040245532989502, "learning_rate": 4.600000000000001e-05, "loss": 1.6156, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.2691090690581144, "eval_loss": 2.3426806926727295, "eval_pearson": 0.27034978222621336, "eval_runtime": 0.548, "eval_samples_per_second": 2737.269, "eval_spearmanr": 0.26786835589001545, "eval_steps_per_second": 10.949, "step": 92 }, { "epoch": 5.0, "grad_norm": 9.559501647949219, "learning_rate": 4.5e-05, "loss": 1.3454, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.2780937290708646, "eval_loss": 2.276757001876831, "eval_pearson": 0.2798131537425998, "eval_runtime": 0.5306, "eval_samples_per_second": 2827.107, "eval_spearmanr": 0.2763743043991294, "eval_steps_per_second": 11.308, "step": 115 }, { "epoch": 6.0, "grad_norm": 7.285312652587891, "learning_rate": 4.4000000000000006e-05, "loss": 1.1616, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.273431489839639, "eval_loss": 2.6383583545684814, "eval_pearson": 0.26860800836532117, "eval_runtime": 0.5919, "eval_samples_per_second": 2534.01, "eval_spearmanr": 0.2782549713139568, "eval_steps_per_second": 10.136, "step": 138 }, { "epoch": 7.0, "grad_norm": 10.173330307006836, "learning_rate": 4.3e-05, "loss": 0.9734, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.2831437139838975, "eval_loss": 2.477155923843384, "eval_pearson": 0.28229446668730734, "eval_runtime": 0.5315, "eval_samples_per_second": 2822.41, "eval_spearmanr": 0.2839929612804877, "eval_steps_per_second": 11.29, "step": 161 }, { "epoch": 8.0, "grad_norm": 9.35107707977295, "learning_rate": 4.2e-05, "loss": 0.8406, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.24872085472419414, "eval_loss": 2.8825998306274414, "eval_pearson": 0.2434565004022157, "eval_runtime": 0.5698, "eval_samples_per_second": 2632.351, "eval_spearmanr": 0.25398520904617256, "eval_steps_per_second": 10.529, "step": 184 }, { "epoch": 9.0, "grad_norm": 17.66458511352539, "learning_rate": 4.1e-05, "loss": 0.7077, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.24927091603402246, "eval_loss": 2.909149169921875, "eval_pearson": 0.2461040064609805, "eval_runtime": 0.5293, "eval_samples_per_second": 2834.011, "eval_spearmanr": 0.25243782560706446, "eval_steps_per_second": 11.336, "step": 207 }, { "epoch": 10.0, "grad_norm": 11.106776237487793, "learning_rate": 4e-05, "loss": 0.6149, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.2685087682776196, "eval_loss": 2.823472023010254, "eval_pearson": 0.26516905911909416, "eval_runtime": 0.5621, "eval_samples_per_second": 2668.438, "eval_spearmanr": 0.2718484774361451, "eval_steps_per_second": 10.674, "step": 230 }, { "epoch": 10.0, "step": 230, "total_flos": 1507548205378560.0, "train_loss": 1.4585458755493164, "train_runtime": 51.1618, "train_samples_per_second": 5618.449, "train_steps_per_second": 22.478 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1507548205378560.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }