SapBERT_freeze_hypencoder_context / trainer_state.json
Stevenf232's picture
Upload trained model from Colab
dbaf6b9 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11627906976744186,
"grad_norm": 108.15798950195312,
"learning_rate": 2.3255813953488376e-06,
"loss": 4.5639,
"step": 10
},
{
"epoch": 0.23255813953488372,
"grad_norm": 82.37198638916016,
"learning_rate": 4.651162790697675e-06,
"loss": 4.3868,
"step": 20
},
{
"epoch": 0.3488372093023256,
"grad_norm": 71.77903747558594,
"learning_rate": 6.976744186046513e-06,
"loss": 4.0951,
"step": 30
},
{
"epoch": 0.46511627906976744,
"grad_norm": 66.50274658203125,
"learning_rate": 9.30232558139535e-06,
"loss": 3.7553,
"step": 40
},
{
"epoch": 0.5813953488372093,
"grad_norm": 73.22584533691406,
"learning_rate": 1.1627906976744187e-05,
"loss": 3.3389,
"step": 50
},
{
"epoch": 0.6976744186046512,
"grad_norm": 78.85617065429688,
"learning_rate": 1.3953488372093025e-05,
"loss": 2.8809,
"step": 60
},
{
"epoch": 0.813953488372093,
"grad_norm": 70.36491394042969,
"learning_rate": 1.6279069767441862e-05,
"loss": 2.4333,
"step": 70
},
{
"epoch": 0.9302325581395349,
"grad_norm": 88.5320053100586,
"learning_rate": 1.86046511627907e-05,
"loss": 1.8066,
"step": 80
},
{
"epoch": 1.0465116279069768,
"grad_norm": 90.84140014648438,
"learning_rate": 1.9896640826873385e-05,
"loss": 1.2644,
"step": 90
},
{
"epoch": 1.1627906976744187,
"grad_norm": 107.49101257324219,
"learning_rate": 1.9638242894056848e-05,
"loss": 1.0383,
"step": 100
},
{
"epoch": 1.2790697674418605,
"grad_norm": 101.91487884521484,
"learning_rate": 1.937984496124031e-05,
"loss": 1.0025,
"step": 110
},
{
"epoch": 1.3953488372093024,
"grad_norm": 92.1374740600586,
"learning_rate": 1.9121447028423774e-05,
"loss": 0.8067,
"step": 120
},
{
"epoch": 1.5116279069767442,
"grad_norm": 93.908447265625,
"learning_rate": 1.8863049095607237e-05,
"loss": 0.8832,
"step": 130
},
{
"epoch": 1.627906976744186,
"grad_norm": 70.7513198852539,
"learning_rate": 1.86046511627907e-05,
"loss": 0.7466,
"step": 140
},
{
"epoch": 1.744186046511628,
"grad_norm": 87.94328308105469,
"learning_rate": 1.8346253229974164e-05,
"loss": 0.7646,
"step": 150
},
{
"epoch": 1.8604651162790697,
"grad_norm": 73.78392028808594,
"learning_rate": 1.8087855297157624e-05,
"loss": 0.6297,
"step": 160
},
{
"epoch": 1.9767441860465116,
"grad_norm": 76.53567504882812,
"learning_rate": 1.7829457364341087e-05,
"loss": 0.5853,
"step": 170
},
{
"epoch": 2.0930232558139537,
"grad_norm": 59.62578582763672,
"learning_rate": 1.757105943152455e-05,
"loss": 0.443,
"step": 180
},
{
"epoch": 2.2093023255813953,
"grad_norm": 49.40946578979492,
"learning_rate": 1.7312661498708013e-05,
"loss": 0.4205,
"step": 190
},
{
"epoch": 2.3255813953488373,
"grad_norm": 60.58675765991211,
"learning_rate": 1.7054263565891473e-05,
"loss": 0.3815,
"step": 200
},
{
"epoch": 2.441860465116279,
"grad_norm": 84.80896759033203,
"learning_rate": 1.6795865633074936e-05,
"loss": 0.4841,
"step": 210
},
{
"epoch": 2.558139534883721,
"grad_norm": 63.476505279541016,
"learning_rate": 1.65374677002584e-05,
"loss": 0.4462,
"step": 220
},
{
"epoch": 2.6744186046511627,
"grad_norm": 93.7468032836914,
"learning_rate": 1.6279069767441862e-05,
"loss": 0.458,
"step": 230
},
{
"epoch": 2.7906976744186047,
"grad_norm": 72.0390625,
"learning_rate": 1.6020671834625325e-05,
"loss": 0.4498,
"step": 240
},
{
"epoch": 2.9069767441860463,
"grad_norm": 57.512176513671875,
"learning_rate": 1.5762273901808785e-05,
"loss": 0.3927,
"step": 250
},
{
"epoch": 3.0232558139534884,
"grad_norm": 51.03213882446289,
"learning_rate": 1.550387596899225e-05,
"loss": 0.3707,
"step": 260
},
{
"epoch": 3.13953488372093,
"grad_norm": 60.818233489990234,
"learning_rate": 1.5245478036175711e-05,
"loss": 0.3091,
"step": 270
},
{
"epoch": 3.255813953488372,
"grad_norm": 85.74930572509766,
"learning_rate": 1.4987080103359175e-05,
"loss": 0.3512,
"step": 280
},
{
"epoch": 3.3720930232558137,
"grad_norm": 61.855953216552734,
"learning_rate": 1.4728682170542636e-05,
"loss": 0.3402,
"step": 290
},
{
"epoch": 3.488372093023256,
"grad_norm": 73.42764282226562,
"learning_rate": 1.44702842377261e-05,
"loss": 0.2461,
"step": 300
},
{
"epoch": 3.604651162790698,
"grad_norm": 58.168540954589844,
"learning_rate": 1.421188630490956e-05,
"loss": 0.2955,
"step": 310
},
{
"epoch": 3.7209302325581395,
"grad_norm": 47.6633415222168,
"learning_rate": 1.3953488372093025e-05,
"loss": 0.336,
"step": 320
},
{
"epoch": 3.8372093023255816,
"grad_norm": 63.79522705078125,
"learning_rate": 1.3695090439276487e-05,
"loss": 0.3029,
"step": 330
},
{
"epoch": 3.953488372093023,
"grad_norm": 71.78118133544922,
"learning_rate": 1.343669250645995e-05,
"loss": 0.2938,
"step": 340
},
{
"epoch": 4.069767441860465,
"grad_norm": 51.58977127075195,
"learning_rate": 1.3178294573643412e-05,
"loss": 0.2736,
"step": 350
},
{
"epoch": 4.186046511627907,
"grad_norm": 62.82514572143555,
"learning_rate": 1.2919896640826875e-05,
"loss": 0.2936,
"step": 360
},
{
"epoch": 4.3023255813953485,
"grad_norm": 61.00852584838867,
"learning_rate": 1.2661498708010338e-05,
"loss": 0.323,
"step": 370
},
{
"epoch": 4.4186046511627906,
"grad_norm": 15.029560089111328,
"learning_rate": 1.24031007751938e-05,
"loss": 0.2644,
"step": 380
},
{
"epoch": 4.534883720930233,
"grad_norm": 63.83937072753906,
"learning_rate": 1.2144702842377262e-05,
"loss": 0.2595,
"step": 390
},
{
"epoch": 4.651162790697675,
"grad_norm": 50.91780090332031,
"learning_rate": 1.1886304909560724e-05,
"loss": 0.2734,
"step": 400
},
{
"epoch": 4.767441860465116,
"grad_norm": 49.75490951538086,
"learning_rate": 1.1627906976744187e-05,
"loss": 0.2704,
"step": 410
},
{
"epoch": 4.883720930232558,
"grad_norm": 49.518131256103516,
"learning_rate": 1.1369509043927648e-05,
"loss": 0.299,
"step": 420
},
{
"epoch": 5.0,
"grad_norm": 30.25467872619629,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.2598,
"step": 430
},
{
"epoch": 5.116279069767442,
"grad_norm": 40.72892379760742,
"learning_rate": 1.0852713178294573e-05,
"loss": 0.21,
"step": 440
},
{
"epoch": 5.232558139534884,
"grad_norm": 41.70243835449219,
"learning_rate": 1.0594315245478038e-05,
"loss": 0.2307,
"step": 450
},
{
"epoch": 5.348837209302325,
"grad_norm": 66.0749740600586,
"learning_rate": 1.03359173126615e-05,
"loss": 0.1893,
"step": 460
},
{
"epoch": 5.465116279069767,
"grad_norm": 55.968231201171875,
"learning_rate": 1.0077519379844963e-05,
"loss": 0.2739,
"step": 470
},
{
"epoch": 5.5813953488372094,
"grad_norm": 44.32405090332031,
"learning_rate": 9.819121447028424e-06,
"loss": 0.2613,
"step": 480
},
{
"epoch": 5.6976744186046515,
"grad_norm": 80.5435791015625,
"learning_rate": 9.560723514211887e-06,
"loss": 0.2298,
"step": 490
},
{
"epoch": 5.813953488372093,
"grad_norm": 51.336830139160156,
"learning_rate": 9.30232558139535e-06,
"loss": 0.2614,
"step": 500
},
{
"epoch": 5.930232558139535,
"grad_norm": 24.42147445678711,
"learning_rate": 9.043927648578812e-06,
"loss": 0.2813,
"step": 510
},
{
"epoch": 6.046511627906977,
"grad_norm": 43.1801872253418,
"learning_rate": 8.785529715762275e-06,
"loss": 0.1733,
"step": 520
},
{
"epoch": 6.162790697674419,
"grad_norm": 46.86786651611328,
"learning_rate": 8.527131782945736e-06,
"loss": 0.2382,
"step": 530
},
{
"epoch": 6.27906976744186,
"grad_norm": 33.578487396240234,
"learning_rate": 8.2687338501292e-06,
"loss": 0.1822,
"step": 540
},
{
"epoch": 6.395348837209302,
"grad_norm": 39.639198303222656,
"learning_rate": 8.010335917312663e-06,
"loss": 0.202,
"step": 550
},
{
"epoch": 6.511627906976744,
"grad_norm": 27.308820724487305,
"learning_rate": 7.751937984496126e-06,
"loss": 0.2184,
"step": 560
},
{
"epoch": 6.627906976744186,
"grad_norm": 30.545543670654297,
"learning_rate": 7.493540051679587e-06,
"loss": 0.185,
"step": 570
},
{
"epoch": 6.7441860465116275,
"grad_norm": 47.775875091552734,
"learning_rate": 7.23514211886305e-06,
"loss": 0.2075,
"step": 580
},
{
"epoch": 6.8604651162790695,
"grad_norm": 50.34706115722656,
"learning_rate": 6.976744186046513e-06,
"loss": 0.2519,
"step": 590
},
{
"epoch": 6.976744186046512,
"grad_norm": 23.37942886352539,
"learning_rate": 6.718346253229975e-06,
"loss": 0.2196,
"step": 600
},
{
"epoch": 7.093023255813954,
"grad_norm": 29.51810646057129,
"learning_rate": 6.459948320413437e-06,
"loss": 0.1596,
"step": 610
},
{
"epoch": 7.209302325581396,
"grad_norm": 44.744171142578125,
"learning_rate": 6.2015503875969e-06,
"loss": 0.1719,
"step": 620
},
{
"epoch": 7.325581395348837,
"grad_norm": 64.436767578125,
"learning_rate": 5.943152454780362e-06,
"loss": 0.2198,
"step": 630
},
{
"epoch": 7.441860465116279,
"grad_norm": 19.253662109375,
"learning_rate": 5.684754521963824e-06,
"loss": 0.1481,
"step": 640
},
{
"epoch": 7.558139534883721,
"grad_norm": 33.91790771484375,
"learning_rate": 5.4263565891472865e-06,
"loss": 0.2362,
"step": 650
},
{
"epoch": 7.674418604651163,
"grad_norm": 27.47454071044922,
"learning_rate": 5.16795865633075e-06,
"loss": 0.1741,
"step": 660
},
{
"epoch": 7.790697674418604,
"grad_norm": 33.89091491699219,
"learning_rate": 4.909560723514212e-06,
"loss": 0.1759,
"step": 670
},
{
"epoch": 7.906976744186046,
"grad_norm": 50.72035217285156,
"learning_rate": 4.651162790697675e-06,
"loss": 0.1852,
"step": 680
},
{
"epoch": 8.023255813953488,
"grad_norm": 15.040735244750977,
"learning_rate": 4.3927648578811375e-06,
"loss": 0.1454,
"step": 690
},
{
"epoch": 8.13953488372093,
"grad_norm": 45.22159194946289,
"learning_rate": 4.1343669250646e-06,
"loss": 0.2117,
"step": 700
},
{
"epoch": 8.255813953488373,
"grad_norm": 18.63289451599121,
"learning_rate": 3.875968992248063e-06,
"loss": 0.1718,
"step": 710
},
{
"epoch": 8.372093023255815,
"grad_norm": 16.854759216308594,
"learning_rate": 3.617571059431525e-06,
"loss": 0.176,
"step": 720
},
{
"epoch": 8.488372093023255,
"grad_norm": 49.81332015991211,
"learning_rate": 3.3591731266149875e-06,
"loss": 0.1962,
"step": 730
},
{
"epoch": 8.604651162790697,
"grad_norm": 31.597118377685547,
"learning_rate": 3.10077519379845e-06,
"loss": 0.1685,
"step": 740
},
{
"epoch": 8.720930232558139,
"grad_norm": 45.57378387451172,
"learning_rate": 2.842377260981912e-06,
"loss": 0.1693,
"step": 750
},
{
"epoch": 8.837209302325581,
"grad_norm": 33.74851989746094,
"learning_rate": 2.583979328165375e-06,
"loss": 0.161,
"step": 760
},
{
"epoch": 8.953488372093023,
"grad_norm": 32.361839294433594,
"learning_rate": 2.3255813953488376e-06,
"loss": 0.1661,
"step": 770
},
{
"epoch": 9.069767441860465,
"grad_norm": 25.80358123779297,
"learning_rate": 2.0671834625323e-06,
"loss": 0.1393,
"step": 780
},
{
"epoch": 9.186046511627907,
"grad_norm": 37.717708587646484,
"learning_rate": 1.8087855297157624e-06,
"loss": 0.1522,
"step": 790
},
{
"epoch": 9.30232558139535,
"grad_norm": 36.026527404785156,
"learning_rate": 1.550387596899225e-06,
"loss": 0.1683,
"step": 800
},
{
"epoch": 9.418604651162791,
"grad_norm": 41.67938232421875,
"learning_rate": 1.2919896640826874e-06,
"loss": 0.146,
"step": 810
},
{
"epoch": 9.534883720930232,
"grad_norm": 26.428848266601562,
"learning_rate": 1.03359173126615e-06,
"loss": 0.1653,
"step": 820
},
{
"epoch": 9.651162790697674,
"grad_norm": 20.74589729309082,
"learning_rate": 7.751937984496125e-07,
"loss": 0.1543,
"step": 830
},
{
"epoch": 9.767441860465116,
"grad_norm": 25.37066078186035,
"learning_rate": 5.16795865633075e-07,
"loss": 0.1644,
"step": 840
},
{
"epoch": 9.883720930232558,
"grad_norm": 56.05203628540039,
"learning_rate": 2.583979328165375e-07,
"loss": 0.1921,
"step": 850
},
{
"epoch": 10.0,
"grad_norm": 0.19254037737846375,
"learning_rate": 0.0,
"loss": 0.1691,
"step": 860
}
],
"logging_steps": 10,
"max_steps": 860,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}