| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5449434979636322, |
| "eval_steps": 500, |
| "global_step": 9500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02868123673492801, |
| "grad_norm": 5.003673553466797, |
| "learning_rate": 9.54110898661568e-07, |
| "loss": 1.0608, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02868123673492801, |
| "eval_loss": 0.3829629123210907, |
| "eval_runtime": 12.7122, |
| "eval_samples_per_second": 517.929, |
| "eval_steps_per_second": 16.205, |
| "eval_sts-dev_pearson_cosine": 0.8611719010801527, |
| "eval_sts-dev_spearman_cosine": 0.8766654545382051, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05736247346985602, |
| "grad_norm": 5.119338512420654, |
| "learning_rate": 1.9101338432122374e-06, |
| "loss": 0.5244, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05736247346985602, |
| "eval_loss": 0.2714338004589081, |
| "eval_runtime": 12.7434, |
| "eval_samples_per_second": 516.659, |
| "eval_steps_per_second": 16.165, |
| "eval_sts-dev_pearson_cosine": 0.8570032340400104, |
| "eval_sts-dev_spearman_cosine": 0.8696568405511665, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08604371020478403, |
| "grad_norm": 1.5423139333724976, |
| "learning_rate": 2.8661567877629063e-06, |
| "loss": 0.4477, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08604371020478403, |
| "eval_loss": 0.23649391531944275, |
| "eval_runtime": 12.7309, |
| "eval_samples_per_second": 517.168, |
| "eval_steps_per_second": 16.181, |
| "eval_sts-dev_pearson_cosine": 0.8587832954043909, |
| "eval_sts-dev_spearman_cosine": 0.8684451966216453, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11472494693971204, |
| "grad_norm": 6.727112770080566, |
| "learning_rate": 3.822179732313576e-06, |
| "loss": 0.4196, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11472494693971204, |
| "eval_loss": 0.2210703343153, |
| "eval_runtime": 12.7293, |
| "eval_samples_per_second": 517.233, |
| "eval_steps_per_second": 16.183, |
| "eval_sts-dev_pearson_cosine": 0.8620536482725116, |
| "eval_sts-dev_spearman_cosine": 0.8689494483130697, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.14340618367464006, |
| "grad_norm": 5.841507911682129, |
| "learning_rate": 4.778202676864245e-06, |
| "loss": 0.3861, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14340618367464006, |
| "eval_loss": 0.2159556895494461, |
| "eval_runtime": 12.6972, |
| "eval_samples_per_second": 518.541, |
| "eval_steps_per_second": 16.224, |
| "eval_sts-dev_pearson_cosine": 0.8629719099031811, |
| "eval_sts-dev_spearman_cosine": 0.8678508213750861, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.17208742040956806, |
| "grad_norm": 5.232604026794434, |
| "learning_rate": 5.734225621414914e-06, |
| "loss": 0.383, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17208742040956806, |
| "eval_loss": 0.21165093779563904, |
| "eval_runtime": 12.7074, |
| "eval_samples_per_second": 518.125, |
| "eval_steps_per_second": 16.211, |
| "eval_sts-dev_pearson_cosine": 0.8659937938410078, |
| "eval_sts-dev_spearman_cosine": 0.8714441691741636, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.20076865714449607, |
| "grad_norm": 4.678284168243408, |
| "learning_rate": 6.6902485659655835e-06, |
| "loss": 0.3692, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.20076865714449607, |
| "eval_loss": 0.21005575358867645, |
| "eval_runtime": 12.689, |
| "eval_samples_per_second": 518.876, |
| "eval_steps_per_second": 16.235, |
| "eval_sts-dev_pearson_cosine": 0.8597359500105216, |
| "eval_sts-dev_spearman_cosine": 0.8650711959150784, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.22944989387942408, |
| "grad_norm": 6.501852512359619, |
| "learning_rate": 7.646271510516251e-06, |
| "loss": 0.3543, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22944989387942408, |
| "eval_loss": 0.21329358220100403, |
| "eval_runtime": 12.6888, |
| "eval_samples_per_second": 518.882, |
| "eval_steps_per_second": 16.235, |
| "eval_sts-dev_pearson_cosine": 0.855877914902483, |
| "eval_sts-dev_spearman_cosine": 0.8592050633377774, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2581311306143521, |
| "grad_norm": 5.672712326049805, |
| "learning_rate": 8.602294455066922e-06, |
| "loss": 0.3568, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2581311306143521, |
| "eval_loss": 0.20921653509140015, |
| "eval_runtime": 12.6834, |
| "eval_samples_per_second": 519.103, |
| "eval_steps_per_second": 16.242, |
| "eval_sts-dev_pearson_cosine": 0.8626966982304745, |
| "eval_sts-dev_spearman_cosine": 0.8661138721912987, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2868123673492801, |
| "grad_norm": 6.034841537475586, |
| "learning_rate": 9.558317399617591e-06, |
| "loss": 0.3404, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2868123673492801, |
| "eval_loss": 0.21060334146022797, |
| "eval_runtime": 12.681, |
| "eval_samples_per_second": 519.2, |
| "eval_steps_per_second": 16.245, |
| "eval_sts-dev_pearson_cosine": 0.8596459726107795, |
| "eval_sts-dev_spearman_cosine": 0.8617335699545178, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.3154936040842081, |
| "grad_norm": 4.462884426116943, |
| "learning_rate": 1.0514340344168261e-05, |
| "loss": 0.3307, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3154936040842081, |
| "eval_loss": 0.20899620652198792, |
| "eval_runtime": 12.6837, |
| "eval_samples_per_second": 519.091, |
| "eval_steps_per_second": 16.241, |
| "eval_sts-dev_pearson_cosine": 0.8624256036447535, |
| "eval_sts-dev_spearman_cosine": 0.8646255603888939, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.34417484081913613, |
| "grad_norm": 7.7960896492004395, |
| "learning_rate": 1.147036328871893e-05, |
| "loss": 0.3359, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.34417484081913613, |
| "eval_loss": 0.2030467540025711, |
| "eval_runtime": 12.7091, |
| "eval_samples_per_second": 518.054, |
| "eval_steps_per_second": 16.209, |
| "eval_sts-dev_pearson_cosine": 0.8702937660837488, |
| "eval_sts-dev_spearman_cosine": 0.8730166667579369, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.37285607755406414, |
| "grad_norm": 6.986783981323242, |
| "learning_rate": 1.24263862332696e-05, |
| "loss": 0.3304, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.37285607755406414, |
| "eval_loss": 0.20733679831027985, |
| "eval_runtime": 12.7035, |
| "eval_samples_per_second": 518.284, |
| "eval_steps_per_second": 16.216, |
| "eval_sts-dev_pearson_cosine": 0.8611582102576957, |
| "eval_sts-dev_spearman_cosine": 0.8632160410827856, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.40153731428899214, |
| "grad_norm": 4.2199883460998535, |
| "learning_rate": 1.3382409177820268e-05, |
| "loss": 0.3319, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.40153731428899214, |
| "eval_loss": 0.19952718913555145, |
| "eval_runtime": 12.6937, |
| "eval_samples_per_second": 518.682, |
| "eval_steps_per_second": 16.229, |
| "eval_sts-dev_pearson_cosine": 0.8664693326646002, |
| "eval_sts-dev_spearman_cosine": 0.8692297087437101, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.43021855102392015, |
| "grad_norm": 5.597110271453857, |
| "learning_rate": 1.4338432122370937e-05, |
| "loss": 0.3087, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.43021855102392015, |
| "eval_loss": 0.20627431571483612, |
| "eval_runtime": 12.715, |
| "eval_samples_per_second": 517.814, |
| "eval_steps_per_second": 16.201, |
| "eval_sts-dev_pearson_cosine": 0.8590261077617714, |
| "eval_sts-dev_spearman_cosine": 0.8614239668671501, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.45889978775884815, |
| "grad_norm": 1.4251642227172852, |
| "learning_rate": 1.5294455066921608e-05, |
| "loss": 0.3058, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.45889978775884815, |
| "eval_loss": 0.19824104011058807, |
| "eval_runtime": 12.7073, |
| "eval_samples_per_second": 518.128, |
| "eval_steps_per_second": 16.211, |
| "eval_sts-dev_pearson_cosine": 0.8642348489707395, |
| "eval_sts-dev_spearman_cosine": 0.8667949296820955, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.48758102449377616, |
| "grad_norm": 6.344972610473633, |
| "learning_rate": 1.6250478011472275e-05, |
| "loss": 0.3207, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.48758102449377616, |
| "eval_loss": 0.20592595636844635, |
| "eval_runtime": 12.6964, |
| "eval_samples_per_second": 518.573, |
| "eval_steps_per_second": 16.225, |
| "eval_sts-dev_pearson_cosine": 0.8632819341519357, |
| "eval_sts-dev_spearman_cosine": 0.8663394328606723, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.5162622612287042, |
| "grad_norm": 5.220601558685303, |
| "learning_rate": 1.7206500956022945e-05, |
| "loss": 0.3184, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5162622612287042, |
| "eval_loss": 0.20682041347026825, |
| "eval_runtime": 12.6949, |
| "eval_samples_per_second": 518.635, |
| "eval_steps_per_second": 16.227, |
| "eval_sts-dev_pearson_cosine": 0.8574421776666108, |
| "eval_sts-dev_spearman_cosine": 0.8603376674379024, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5449434979636322, |
| "grad_norm": 4.097256183624268, |
| "learning_rate": 1.8162523900573612e-05, |
| "loss": 0.3085, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5449434979636322, |
| "eval_loss": 0.19450272619724274, |
| "eval_runtime": 12.6996, |
| "eval_samples_per_second": 518.44, |
| "eval_steps_per_second": 16.221, |
| "eval_sts-dev_pearson_cosine": 0.8675839523712885, |
| "eval_sts-dev_spearman_cosine": 0.8694666129275455, |
| "step": 9500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 261495, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|