| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 1468, |
| "global_step": 14672, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10005452562704471, |
| "grad_norm": 11.328764915466309, |
| "learning_rate": 3.0111524163568777e-06, |
| "loss": 4.796, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.20010905125408943, |
| "grad_norm": 8.898218154907227, |
| "learning_rate": 6.042957455596862e-06, |
| "loss": 1.3015, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.20010905125408943, |
| "eval_nli-pairs_loss": 0.9115270376205444, |
| "eval_nli-pairs_runtime": 3.7365, |
| "eval_nli-pairs_samples_per_second": 1822.03, |
| "eval_nli-pairs_steps_per_second": 57.005, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.20010905125408943, |
| "eval_qnli-contrastive_loss": 0.03581170365214348, |
| "eval_qnli-contrastive_runtime": 3.4652, |
| "eval_qnli-contrastive_samples_per_second": 1576.52, |
| "eval_qnli-contrastive_steps_per_second": 49.347, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.30016357688113415, |
| "grad_norm": 5.427567005157471, |
| "learning_rate": 9.074762494836845e-06, |
| "loss": 0.89, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.40021810250817885, |
| "grad_norm": 3.5350825786590576, |
| "learning_rate": 1.210656753407683e-05, |
| "loss": 0.716, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.40021810250817885, |
| "eval_nli-pairs_loss": 0.5944256782531738, |
| "eval_nli-pairs_runtime": 3.5093, |
| "eval_nli-pairs_samples_per_second": 1940.006, |
| "eval_nli-pairs_steps_per_second": 60.696, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.40021810250817885, |
| "eval_qnli-contrastive_loss": 0.016810204833745956, |
| "eval_qnli-contrastive_runtime": 3.3523, |
| "eval_qnli-contrastive_samples_per_second": 1629.638, |
| "eval_qnli-contrastive_steps_per_second": 51.01, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.5002726281352236, |
| "grad_norm": 9.52629280090332, |
| "learning_rate": 1.5134242048740192e-05, |
| "loss": 0.6365, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.6003271537622683, |
| "grad_norm": 11.004107475280762, |
| "learning_rate": 1.8166047087980174e-05, |
| "loss": 0.5883, |
| "step": 4404 |
| }, |
| { |
| "epoch": 0.6003271537622683, |
| "eval_nli-pairs_loss": 0.49746155738830566, |
| "eval_nli-pairs_runtime": 3.5691, |
| "eval_nli-pairs_samples_per_second": 1907.459, |
| "eval_nli-pairs_steps_per_second": 59.678, |
| "step": 4404 |
| }, |
| { |
| "epoch": 0.6003271537622683, |
| "eval_qnli-contrastive_loss": 0.016411835327744484, |
| "eval_qnli-contrastive_runtime": 3.3328, |
| "eval_qnli-contrastive_samples_per_second": 1639.167, |
| "eval_qnli-contrastive_steps_per_second": 51.308, |
| "step": 4404 |
| }, |
| { |
| "epoch": 0.700381679389313, |
| "grad_norm": 9.219084739685059, |
| "learning_rate": 1.995708117651556e-05, |
| "loss": 0.5192, |
| "step": 5138 |
| }, |
| { |
| "epoch": 0.8004362050163577, |
| "grad_norm": 7.066645622253418, |
| "learning_rate": 1.946925849011595e-05, |
| "loss": 0.4961, |
| "step": 5872 |
| }, |
| { |
| "epoch": 0.8004362050163577, |
| "eval_nli-pairs_loss": 0.44500303268432617, |
| "eval_nli-pairs_runtime": 3.6078, |
| "eval_nli-pairs_samples_per_second": 1887.014, |
| "eval_nli-pairs_steps_per_second": 59.038, |
| "step": 5872 |
| }, |
| { |
| "epoch": 0.8004362050163577, |
| "eval_qnli-contrastive_loss": 0.028794871643185616, |
| "eval_qnli-contrastive_runtime": 3.335, |
| "eval_qnli-contrastive_samples_per_second": 1638.074, |
| "eval_qnli-contrastive_steps_per_second": 51.274, |
| "step": 5872 |
| }, |
| { |
| "epoch": 0.9004907306434023, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8462745233342613e-05, |
| "loss": 0.6035, |
| "step": 6606 |
| }, |
| { |
| "epoch": 1.000545256270447, |
| "grad_norm": 3.743481397628784, |
| "learning_rate": 1.699267443860664e-05, |
| "loss": 0.4733, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.000545256270447, |
| "eval_nli-pairs_loss": 0.4215342402458191, |
| "eval_nli-pairs_runtime": 3.6783, |
| "eval_nli-pairs_samples_per_second": 1850.875, |
| "eval_nli-pairs_steps_per_second": 57.908, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.000545256270447, |
| "eval_qnli-contrastive_loss": 0.01100869383662939, |
| "eval_qnli-contrastive_runtime": 3.639, |
| "eval_qnli-contrastive_samples_per_second": 1501.242, |
| "eval_qnli-contrastive_steps_per_second": 46.991, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.1005997818974917, |
| "grad_norm": 0.39953914284706116, |
| "learning_rate": 1.513957108680355e-05, |
| "loss": 0.4002, |
| "step": 8074 |
| }, |
| { |
| "epoch": 1.2006543075245366, |
| "grad_norm": 2.542104482650757, |
| "learning_rate": 1.3004941249978107e-05, |
| "loss": 0.3929, |
| "step": 8808 |
| }, |
| { |
| "epoch": 1.2006543075245366, |
| "eval_nli-pairs_loss": 0.37960606813430786, |
| "eval_nli-pairs_runtime": 3.5792, |
| "eval_nli-pairs_samples_per_second": 1902.102, |
| "eval_nli-pairs_steps_per_second": 59.511, |
| "step": 8808 |
| }, |
| { |
| "epoch": 1.2006543075245366, |
| "eval_qnli-contrastive_loss": 0.04537490755319595, |
| "eval_qnli-contrastive_runtime": 3.371, |
| "eval_qnli-contrastive_samples_per_second": 1620.568, |
| "eval_qnli-contrastive_steps_per_second": 50.726, |
| "step": 8808 |
| }, |
| { |
| "epoch": 1.3007088331515813, |
| "grad_norm": 2.3156607151031494, |
| "learning_rate": 1.0705711968273469e-05, |
| "loss": 0.3826, |
| "step": 9542 |
| }, |
| { |
| "epoch": 1.400763358778626, |
| "grad_norm": 3.3540971279144287, |
| "learning_rate": 8.370979573663896e-06, |
| "loss": 0.3522, |
| "step": 10276 |
| }, |
| { |
| "epoch": 1.400763358778626, |
| "eval_nli-pairs_loss": 0.3714284896850586, |
| "eval_nli-pairs_runtime": 3.5826, |
| "eval_nli-pairs_samples_per_second": 1900.32, |
| "eval_nli-pairs_steps_per_second": 59.455, |
| "step": 10276 |
| }, |
| { |
| "epoch": 1.400763358778626, |
| "eval_qnli-contrastive_loss": 0.017819516360759735, |
| "eval_qnli-contrastive_runtime": 3.4236, |
| "eval_qnli-contrastive_samples_per_second": 1595.701, |
| "eval_qnli-contrastive_steps_per_second": 49.948, |
| "step": 10276 |
| }, |
| { |
| "epoch": 1.5008178844056705, |
| "grad_norm": 1.3052864074707031, |
| "learning_rate": 6.125236966193413e-06, |
| "loss": 0.3627, |
| "step": 11010 |
| }, |
| { |
| "epoch": 1.6008724100327154, |
| "grad_norm": 1.00529944896698, |
| "learning_rate": 4.088586072137575e-06, |
| "loss": 0.3553, |
| "step": 11744 |
| }, |
| { |
| "epoch": 1.6008724100327154, |
| "eval_nli-pairs_loss": 0.3628700077533722, |
| "eval_nli-pairs_runtime": 3.7016, |
| "eval_nli-pairs_samples_per_second": 1839.184, |
| "eval_nli-pairs_steps_per_second": 57.542, |
| "step": 11744 |
| }, |
| { |
| "epoch": 1.6008724100327154, |
| "eval_qnli-contrastive_loss": 0.025695964694023132, |
| "eval_qnli-contrastive_runtime": 3.365, |
| "eval_qnli-contrastive_samples_per_second": 1623.495, |
| "eval_qnli-contrastive_steps_per_second": 50.818, |
| "step": 11744 |
| }, |
| { |
| "epoch": 1.70092693565976, |
| "grad_norm": 6.129855632781982, |
| "learning_rate": 2.375740327678049e-06, |
| "loss": 0.3406, |
| "step": 12478 |
| }, |
| { |
| "epoch": 1.800981461286805, |
| "grad_norm": 0.2667868733406067, |
| "learning_rate": 1.0819684733779468e-06, |
| "loss": 0.3288, |
| "step": 13212 |
| }, |
| { |
| "epoch": 1.800981461286805, |
| "eval_nli-pairs_loss": 0.3574618101119995, |
| "eval_nli-pairs_runtime": 3.5437, |
| "eval_nli-pairs_samples_per_second": 1921.132, |
| "eval_nli-pairs_steps_per_second": 60.106, |
| "step": 13212 |
| }, |
| { |
| "epoch": 1.800981461286805, |
| "eval_qnli-contrastive_loss": 0.028892073780298233, |
| "eval_qnli-contrastive_runtime": 3.3304, |
| "eval_qnli-contrastive_samples_per_second": 1640.352, |
| "eval_qnli-contrastive_steps_per_second": 51.345, |
| "step": 13212 |
| }, |
| { |
| "epoch": 1.9010359869138496, |
| "grad_norm": 5.174046039581299, |
| "learning_rate": 2.746246447818135e-07, |
| "loss": 0.4563, |
| "step": 13946 |
| } |
| ], |
| "logging_steps": 734, |
| "max_steps": 14672, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 7336, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 94, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|