bobox's picture
Training in progress, epoch 2, checkpoint
d6796ed verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 1468,
"global_step": 14672,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10005452562704471,
"grad_norm": 11.328764915466309,
"learning_rate": 3.0111524163568777e-06,
"loss": 4.796,
"step": 734
},
{
"epoch": 0.20010905125408943,
"grad_norm": 8.898218154907227,
"learning_rate": 6.042957455596862e-06,
"loss": 1.3015,
"step": 1468
},
{
"epoch": 0.20010905125408943,
"eval_nli-pairs_loss": 0.9115270376205444,
"eval_nli-pairs_runtime": 3.7365,
"eval_nli-pairs_samples_per_second": 1822.03,
"eval_nli-pairs_steps_per_second": 57.005,
"step": 1468
},
{
"epoch": 0.20010905125408943,
"eval_qnli-contrastive_loss": 0.03581170365214348,
"eval_qnli-contrastive_runtime": 3.4652,
"eval_qnli-contrastive_samples_per_second": 1576.52,
"eval_qnli-contrastive_steps_per_second": 49.347,
"step": 1468
},
{
"epoch": 0.30016357688113415,
"grad_norm": 5.427567005157471,
"learning_rate": 9.074762494836845e-06,
"loss": 0.89,
"step": 2202
},
{
"epoch": 0.40021810250817885,
"grad_norm": 3.5350825786590576,
"learning_rate": 1.210656753407683e-05,
"loss": 0.716,
"step": 2936
},
{
"epoch": 0.40021810250817885,
"eval_nli-pairs_loss": 0.5944256782531738,
"eval_nli-pairs_runtime": 3.5093,
"eval_nli-pairs_samples_per_second": 1940.006,
"eval_nli-pairs_steps_per_second": 60.696,
"step": 2936
},
{
"epoch": 0.40021810250817885,
"eval_qnli-contrastive_loss": 0.016810204833745956,
"eval_qnli-contrastive_runtime": 3.3523,
"eval_qnli-contrastive_samples_per_second": 1629.638,
"eval_qnli-contrastive_steps_per_second": 51.01,
"step": 2936
},
{
"epoch": 0.5002726281352236,
"grad_norm": 9.52629280090332,
"learning_rate": 1.5134242048740192e-05,
"loss": 0.6365,
"step": 3670
},
{
"epoch": 0.6003271537622683,
"grad_norm": 11.004107475280762,
"learning_rate": 1.8166047087980174e-05,
"loss": 0.5883,
"step": 4404
},
{
"epoch": 0.6003271537622683,
"eval_nli-pairs_loss": 0.49746155738830566,
"eval_nli-pairs_runtime": 3.5691,
"eval_nli-pairs_samples_per_second": 1907.459,
"eval_nli-pairs_steps_per_second": 59.678,
"step": 4404
},
{
"epoch": 0.6003271537622683,
"eval_qnli-contrastive_loss": 0.016411835327744484,
"eval_qnli-contrastive_runtime": 3.3328,
"eval_qnli-contrastive_samples_per_second": 1639.167,
"eval_qnli-contrastive_steps_per_second": 51.308,
"step": 4404
},
{
"epoch": 0.700381679389313,
"grad_norm": 9.219084739685059,
"learning_rate": 1.995708117651556e-05,
"loss": 0.5192,
"step": 5138
},
{
"epoch": 0.8004362050163577,
"grad_norm": 7.066645622253418,
"learning_rate": 1.946925849011595e-05,
"loss": 0.4961,
"step": 5872
},
{
"epoch": 0.8004362050163577,
"eval_nli-pairs_loss": 0.44500303268432617,
"eval_nli-pairs_runtime": 3.6078,
"eval_nli-pairs_samples_per_second": 1887.014,
"eval_nli-pairs_steps_per_second": 59.038,
"step": 5872
},
{
"epoch": 0.8004362050163577,
"eval_qnli-contrastive_loss": 0.028794871643185616,
"eval_qnli-contrastive_runtime": 3.335,
"eval_qnli-contrastive_samples_per_second": 1638.074,
"eval_qnli-contrastive_steps_per_second": 51.274,
"step": 5872
},
{
"epoch": 0.9004907306434023,
"grad_norm": 0.0,
"learning_rate": 1.8462745233342613e-05,
"loss": 0.6035,
"step": 6606
},
{
"epoch": 1.000545256270447,
"grad_norm": 3.743481397628784,
"learning_rate": 1.699267443860664e-05,
"loss": 0.4733,
"step": 7340
},
{
"epoch": 1.000545256270447,
"eval_nli-pairs_loss": 0.4215342402458191,
"eval_nli-pairs_runtime": 3.6783,
"eval_nli-pairs_samples_per_second": 1850.875,
"eval_nli-pairs_steps_per_second": 57.908,
"step": 7340
},
{
"epoch": 1.000545256270447,
"eval_qnli-contrastive_loss": 0.01100869383662939,
"eval_qnli-contrastive_runtime": 3.639,
"eval_qnli-contrastive_samples_per_second": 1501.242,
"eval_qnli-contrastive_steps_per_second": 46.991,
"step": 7340
},
{
"epoch": 1.1005997818974917,
"grad_norm": 0.39953914284706116,
"learning_rate": 1.513957108680355e-05,
"loss": 0.4002,
"step": 8074
},
{
"epoch": 1.2006543075245366,
"grad_norm": 2.542104482650757,
"learning_rate": 1.3004941249978107e-05,
"loss": 0.3929,
"step": 8808
},
{
"epoch": 1.2006543075245366,
"eval_nli-pairs_loss": 0.37960606813430786,
"eval_nli-pairs_runtime": 3.5792,
"eval_nli-pairs_samples_per_second": 1902.102,
"eval_nli-pairs_steps_per_second": 59.511,
"step": 8808
},
{
"epoch": 1.2006543075245366,
"eval_qnli-contrastive_loss": 0.04537490755319595,
"eval_qnli-contrastive_runtime": 3.371,
"eval_qnli-contrastive_samples_per_second": 1620.568,
"eval_qnli-contrastive_steps_per_second": 50.726,
"step": 8808
},
{
"epoch": 1.3007088331515813,
"grad_norm": 2.3156607151031494,
"learning_rate": 1.0705711968273469e-05,
"loss": 0.3826,
"step": 9542
},
{
"epoch": 1.400763358778626,
"grad_norm": 3.3540971279144287,
"learning_rate": 8.370979573663896e-06,
"loss": 0.3522,
"step": 10276
},
{
"epoch": 1.400763358778626,
"eval_nli-pairs_loss": 0.3714284896850586,
"eval_nli-pairs_runtime": 3.5826,
"eval_nli-pairs_samples_per_second": 1900.32,
"eval_nli-pairs_steps_per_second": 59.455,
"step": 10276
},
{
"epoch": 1.400763358778626,
"eval_qnli-contrastive_loss": 0.017819516360759735,
"eval_qnli-contrastive_runtime": 3.4236,
"eval_qnli-contrastive_samples_per_second": 1595.701,
"eval_qnli-contrastive_steps_per_second": 49.948,
"step": 10276
},
{
"epoch": 1.5008178844056705,
"grad_norm": 1.3052864074707031,
"learning_rate": 6.125236966193413e-06,
"loss": 0.3627,
"step": 11010
},
{
"epoch": 1.6008724100327154,
"grad_norm": 1.00529944896698,
"learning_rate": 4.088586072137575e-06,
"loss": 0.3553,
"step": 11744
},
{
"epoch": 1.6008724100327154,
"eval_nli-pairs_loss": 0.3628700077533722,
"eval_nli-pairs_runtime": 3.7016,
"eval_nli-pairs_samples_per_second": 1839.184,
"eval_nli-pairs_steps_per_second": 57.542,
"step": 11744
},
{
"epoch": 1.6008724100327154,
"eval_qnli-contrastive_loss": 0.025695964694023132,
"eval_qnli-contrastive_runtime": 3.365,
"eval_qnli-contrastive_samples_per_second": 1623.495,
"eval_qnli-contrastive_steps_per_second": 50.818,
"step": 11744
},
{
"epoch": 1.70092693565976,
"grad_norm": 6.129855632781982,
"learning_rate": 2.375740327678049e-06,
"loss": 0.3406,
"step": 12478
},
{
"epoch": 1.800981461286805,
"grad_norm": 0.2667868733406067,
"learning_rate": 1.0819684733779468e-06,
"loss": 0.3288,
"step": 13212
},
{
"epoch": 1.800981461286805,
"eval_nli-pairs_loss": 0.3574618101119995,
"eval_nli-pairs_runtime": 3.5437,
"eval_nli-pairs_samples_per_second": 1921.132,
"eval_nli-pairs_steps_per_second": 60.106,
"step": 13212
},
{
"epoch": 1.800981461286805,
"eval_qnli-contrastive_loss": 0.028892073780298233,
"eval_qnli-contrastive_runtime": 3.3304,
"eval_qnli-contrastive_samples_per_second": 1640.352,
"eval_qnli-contrastive_steps_per_second": 51.345,
"step": 13212
},
{
"epoch": 1.9010359869138496,
"grad_norm": 5.174046039581299,
"learning_rate": 2.746246447818135e-07,
"loss": 0.4563,
"step": 13946
}
],
"logging_steps": 734,
"max_steps": 14672,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 7336,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 94,
"trial_name": null,
"trial_params": null
}