Upload trained model from Colab

Browse files

Files changed (11) hide show

config.json +40 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +59 -0
trainer_state.json +636 -0
training_args.bin +3 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "architectures": [
+    "HypencoderDualEncoder"
+  ],
+  "loss_kwargs": [
+    {
+      "only_use_first_item": false,
+      "use_in_batch_negatives": true
+    }
+  ],
+  "loss_type": [
+    "cross_entropy"
+  ],
+  "passage_encoder_kwargs": {
+    "freeze_transformer": true,
+    "model_name_or_path": "cambridgeltl/SapBERT-from-PubMedBERT-fulltext",
+    "pooling_type": "cls"
+  },
+  "passage_encoder_type": "",
+  "query_encoder_kwargs": {
+    "base_encoder_output_dim": 768,
+    "converter_kwargs": {
+      "activation_type": "relu",
+      "do_residual_on_last": false,
+      "vector_dimensions": [
+        768,
+        768,
+        768,
+        1
+      ]
+    },
+    "embedding_representation": null,
+    "freeze_transformer": true,
+    "model_name_or_path": "cambridgeltl/SapBERT-from-PubMedBERT-fulltext"
+  },
+  "query_encoder_type": "",
+  "shared_encoder": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0"
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ce5b646eab1b080d83decbfef0c6acfa580ee686e473b1cfc2db60f6157d
+size 89816779

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86be474515c14dd79b12662916b17b21a28675fdfde3d9c3fe28081fcf78a4b2
+size 482938527

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ead4b63ccfe6bb0cd4044fb2c468922f48d0902fdfb976017847b7a6810c13d
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32d1942f6b31b97eec2ecc7f6945f5159a4d117bbf4eb6f962d45753fe8b0378
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "full_tokenizer_file": null,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,636 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 860,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.11627906976744186,
+      "grad_norm": 108.15798950195312,
+      "learning_rate": 2.3255813953488376e-06,
+      "loss": 4.5639,
+      "step": 10
+    },
+    {
+      "epoch": 0.23255813953488372,
+      "grad_norm": 82.37198638916016,
+      "learning_rate": 4.651162790697675e-06,
+      "loss": 4.3868,
+      "step": 20
+    },
+    {
+      "epoch": 0.3488372093023256,
+      "grad_norm": 71.77903747558594,
+      "learning_rate": 6.976744186046513e-06,
+      "loss": 4.0951,
+      "step": 30
+    },
+    {
+      "epoch": 0.46511627906976744,
+      "grad_norm": 66.50274658203125,
+      "learning_rate": 9.30232558139535e-06,
+      "loss": 3.7553,
+      "step": 40
+    },
+    {
+      "epoch": 0.5813953488372093,
+      "grad_norm": 73.22584533691406,
+      "learning_rate": 1.1627906976744187e-05,
+      "loss": 3.3389,
+      "step": 50
+    },
+    {
+      "epoch": 0.6976744186046512,
+      "grad_norm": 78.85617065429688,
+      "learning_rate": 1.3953488372093025e-05,
+      "loss": 2.8809,
+      "step": 60
+    },
+    {
+      "epoch": 0.813953488372093,
+      "grad_norm": 70.36491394042969,
+      "learning_rate": 1.6279069767441862e-05,
+      "loss": 2.4333,
+      "step": 70
+    },
+    {
+      "epoch": 0.9302325581395349,
+      "grad_norm": 88.5320053100586,
+      "learning_rate": 1.86046511627907e-05,
+      "loss": 1.8066,
+      "step": 80
+    },
+    {
+      "epoch": 1.0465116279069768,
+      "grad_norm": 90.84140014648438,
+      "learning_rate": 1.9896640826873385e-05,
+      "loss": 1.2644,
+      "step": 90
+    },
+    {
+      "epoch": 1.1627906976744187,
+      "grad_norm": 107.49101257324219,
+      "learning_rate": 1.9638242894056848e-05,
+      "loss": 1.0383,
+      "step": 100
+    },
+    {
+      "epoch": 1.2790697674418605,
+      "grad_norm": 101.91487884521484,
+      "learning_rate": 1.937984496124031e-05,
+      "loss": 1.0025,
+      "step": 110
+    },
+    {
+      "epoch": 1.3953488372093024,
+      "grad_norm": 92.1374740600586,
+      "learning_rate": 1.9121447028423774e-05,
+      "loss": 0.8067,
+      "step": 120
+    },
+    {
+      "epoch": 1.5116279069767442,
+      "grad_norm": 93.908447265625,
+      "learning_rate": 1.8863049095607237e-05,
+      "loss": 0.8832,
+      "step": 130
+    },
+    {
+      "epoch": 1.627906976744186,
+      "grad_norm": 70.7513198852539,
+      "learning_rate": 1.86046511627907e-05,
+      "loss": 0.7466,
+      "step": 140
+    },
+    {
+      "epoch": 1.744186046511628,
+      "grad_norm": 87.94328308105469,
+      "learning_rate": 1.8346253229974164e-05,
+      "loss": 0.7646,
+      "step": 150
+    },
+    {
+      "epoch": 1.8604651162790697,
+      "grad_norm": 73.78392028808594,
+      "learning_rate": 1.8087855297157624e-05,
+      "loss": 0.6297,
+      "step": 160
+    },
+    {
+      "epoch": 1.9767441860465116,
+      "grad_norm": 76.53567504882812,
+      "learning_rate": 1.7829457364341087e-05,
+      "loss": 0.5853,
+      "step": 170
+    },
+    {
+      "epoch": 2.0930232558139537,
+      "grad_norm": 59.62578582763672,
+      "learning_rate": 1.757105943152455e-05,
+      "loss": 0.443,
+      "step": 180
+    },
+    {
+      "epoch": 2.2093023255813953,
+      "grad_norm": 49.40946578979492,
+      "learning_rate": 1.7312661498708013e-05,
+      "loss": 0.4205,
+      "step": 190
+    },
+    {
+      "epoch": 2.3255813953488373,
+      "grad_norm": 60.58675765991211,
+      "learning_rate": 1.7054263565891473e-05,
+      "loss": 0.3815,
+      "step": 200
+    },
+    {
+      "epoch": 2.441860465116279,
+      "grad_norm": 84.80896759033203,
+      "learning_rate": 1.6795865633074936e-05,
+      "loss": 0.4841,
+      "step": 210
+    },
+    {
+      "epoch": 2.558139534883721,
+      "grad_norm": 63.476505279541016,
+      "learning_rate": 1.65374677002584e-05,
+      "loss": 0.4462,
+      "step": 220
+    },
+    {
+      "epoch": 2.6744186046511627,
+      "grad_norm": 93.7468032836914,
+      "learning_rate": 1.6279069767441862e-05,
+      "loss": 0.458,
+      "step": 230
+    },
+    {
+      "epoch": 2.7906976744186047,
+      "grad_norm": 72.0390625,
+      "learning_rate": 1.6020671834625325e-05,
+      "loss": 0.4498,
+      "step": 240
+    },
+    {
+      "epoch": 2.9069767441860463,
+      "grad_norm": 57.512176513671875,
+      "learning_rate": 1.5762273901808785e-05,
+      "loss": 0.3927,
+      "step": 250
+    },
+    {
+      "epoch": 3.0232558139534884,
+      "grad_norm": 51.03213882446289,
+      "learning_rate": 1.550387596899225e-05,
+      "loss": 0.3707,
+      "step": 260
+    },
+    {
+      "epoch": 3.13953488372093,
+      "grad_norm": 60.818233489990234,
+      "learning_rate": 1.5245478036175711e-05,
+      "loss": 0.3091,
+      "step": 270
+    },
+    {
+      "epoch": 3.255813953488372,
+      "grad_norm": 85.74930572509766,
+      "learning_rate": 1.4987080103359175e-05,
+      "loss": 0.3512,
+      "step": 280
+    },
+    {
+      "epoch": 3.3720930232558137,
+      "grad_norm": 61.855953216552734,
+      "learning_rate": 1.4728682170542636e-05,
+      "loss": 0.3402,
+      "step": 290
+    },
+    {
+      "epoch": 3.488372093023256,
+      "grad_norm": 73.42764282226562,
+      "learning_rate": 1.44702842377261e-05,
+      "loss": 0.2461,
+      "step": 300
+    },
+    {
+      "epoch": 3.604651162790698,
+      "grad_norm": 58.168540954589844,
+      "learning_rate": 1.421188630490956e-05,
+      "loss": 0.2955,
+      "step": 310
+    },
+    {
+      "epoch": 3.7209302325581395,
+      "grad_norm": 47.6633415222168,
+      "learning_rate": 1.3953488372093025e-05,
+      "loss": 0.336,
+      "step": 320
+    },
+    {
+      "epoch": 3.8372093023255816,
+      "grad_norm": 63.79522705078125,
+      "learning_rate": 1.3695090439276487e-05,
+      "loss": 0.3029,
+      "step": 330
+    },
+    {
+      "epoch": 3.953488372093023,
+      "grad_norm": 71.78118133544922,
+      "learning_rate": 1.343669250645995e-05,
+      "loss": 0.2938,
+      "step": 340
+    },
+    {
+      "epoch": 4.069767441860465,
+      "grad_norm": 51.58977127075195,
+      "learning_rate": 1.3178294573643412e-05,
+      "loss": 0.2736,
+      "step": 350
+    },
+    {
+      "epoch": 4.186046511627907,
+      "grad_norm": 62.82514572143555,
+      "learning_rate": 1.2919896640826875e-05,
+      "loss": 0.2936,
+      "step": 360
+    },
+    {
+      "epoch": 4.3023255813953485,
+      "grad_norm": 61.00852584838867,
+      "learning_rate": 1.2661498708010338e-05,
+      "loss": 0.323,
+      "step": 370
+    },
+    {
+      "epoch": 4.4186046511627906,
+      "grad_norm": 15.029560089111328,
+      "learning_rate": 1.24031007751938e-05,
+      "loss": 0.2644,
+      "step": 380
+    },
+    {
+      "epoch": 4.534883720930233,
+      "grad_norm": 63.83937072753906,
+      "learning_rate": 1.2144702842377262e-05,
+      "loss": 0.2595,
+      "step": 390
+    },
+    {
+      "epoch": 4.651162790697675,
+      "grad_norm": 50.91780090332031,
+      "learning_rate": 1.1886304909560724e-05,
+      "loss": 0.2734,
+      "step": 400
+    },
+    {
+      "epoch": 4.767441860465116,
+      "grad_norm": 49.75490951538086,
+      "learning_rate": 1.1627906976744187e-05,
+      "loss": 0.2704,
+      "step": 410
+    },
+    {
+      "epoch": 4.883720930232558,
+      "grad_norm": 49.518131256103516,
+      "learning_rate": 1.1369509043927648e-05,
+      "loss": 0.299,
+      "step": 420
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 30.25467872619629,
+      "learning_rate": 1.1111111111111113e-05,
+      "loss": 0.2598,
+      "step": 430
+    },
+    {
+      "epoch": 5.116279069767442,
+      "grad_norm": 40.72892379760742,
+      "learning_rate": 1.0852713178294573e-05,
+      "loss": 0.21,
+      "step": 440
+    },
+    {
+      "epoch": 5.232558139534884,
+      "grad_norm": 41.70243835449219,
+      "learning_rate": 1.0594315245478038e-05,
+      "loss": 0.2307,
+      "step": 450
+    },
+    {
+      "epoch": 5.348837209302325,
+      "grad_norm": 66.0749740600586,
+      "learning_rate": 1.03359173126615e-05,
+      "loss": 0.1893,
+      "step": 460
+    },
+    {
+      "epoch": 5.465116279069767,
+      "grad_norm": 55.968231201171875,
+      "learning_rate": 1.0077519379844963e-05,
+      "loss": 0.2739,
+      "step": 470
+    },
+    {
+      "epoch": 5.5813953488372094,
+      "grad_norm": 44.32405090332031,
+      "learning_rate": 9.819121447028424e-06,
+      "loss": 0.2613,
+      "step": 480
+    },
+    {
+      "epoch": 5.6976744186046515,
+      "grad_norm": 80.5435791015625,
+      "learning_rate": 9.560723514211887e-06,
+      "loss": 0.2298,
+      "step": 490
+    },
+    {
+      "epoch": 5.813953488372093,
+      "grad_norm": 51.336830139160156,
+      "learning_rate": 9.30232558139535e-06,
+      "loss": 0.2614,
+      "step": 500
+    },
+    {
+      "epoch": 5.930232558139535,
+      "grad_norm": 24.42147445678711,
+      "learning_rate": 9.043927648578812e-06,
+      "loss": 0.2813,
+      "step": 510
+    },
+    {
+      "epoch": 6.046511627906977,
+      "grad_norm": 43.1801872253418,
+      "learning_rate": 8.785529715762275e-06,
+      "loss": 0.1733,
+      "step": 520
+    },
+    {
+      "epoch": 6.162790697674419,
+      "grad_norm": 46.86786651611328,
+      "learning_rate": 8.527131782945736e-06,
+      "loss": 0.2382,
+      "step": 530
+    },
+    {
+      "epoch": 6.27906976744186,
+      "grad_norm": 33.578487396240234,
+      "learning_rate": 8.2687338501292e-06,
+      "loss": 0.1822,
+      "step": 540
+    },
+    {
+      "epoch": 6.395348837209302,
+      "grad_norm": 39.639198303222656,
+      "learning_rate": 8.010335917312663e-06,
+      "loss": 0.202,
+      "step": 550
+    },
+    {
+      "epoch": 6.511627906976744,
+      "grad_norm": 27.308820724487305,
+      "learning_rate": 7.751937984496126e-06,
+      "loss": 0.2184,
+      "step": 560
+    },
+    {
+      "epoch": 6.627906976744186,
+      "grad_norm": 30.545543670654297,
+      "learning_rate": 7.493540051679587e-06,
+      "loss": 0.185,
+      "step": 570
+    },
+    {
+      "epoch": 6.7441860465116275,
+      "grad_norm": 47.775875091552734,
+      "learning_rate": 7.23514211886305e-06,
+      "loss": 0.2075,
+      "step": 580
+    },
+    {
+      "epoch": 6.8604651162790695,
+      "grad_norm": 50.34706115722656,
+      "learning_rate": 6.976744186046513e-06,
+      "loss": 0.2519,
+      "step": 590
+    },
+    {
+      "epoch": 6.976744186046512,
+      "grad_norm": 23.37942886352539,
+      "learning_rate": 6.718346253229975e-06,
+      "loss": 0.2196,
+      "step": 600
+    },
+    {
+      "epoch": 7.093023255813954,
+      "grad_norm": 29.51810646057129,
+      "learning_rate": 6.459948320413437e-06,
+      "loss": 0.1596,
+      "step": 610
+    },
+    {
+      "epoch": 7.209302325581396,
+      "grad_norm": 44.744171142578125,
+      "learning_rate": 6.2015503875969e-06,
+      "loss": 0.1719,
+      "step": 620
+    },
+    {
+      "epoch": 7.325581395348837,
+      "grad_norm": 64.436767578125,
+      "learning_rate": 5.943152454780362e-06,
+      "loss": 0.2198,
+      "step": 630
+    },
+    {
+      "epoch": 7.441860465116279,
+      "grad_norm": 19.253662109375,
+      "learning_rate": 5.684754521963824e-06,
+      "loss": 0.1481,
+      "step": 640
+    },
+    {
+      "epoch": 7.558139534883721,
+      "grad_norm": 33.91790771484375,
+      "learning_rate": 5.4263565891472865e-06,
+      "loss": 0.2362,
+      "step": 650
+    },
+    {
+      "epoch": 7.674418604651163,
+      "grad_norm": 27.47454071044922,
+      "learning_rate": 5.16795865633075e-06,
+      "loss": 0.1741,
+      "step": 660
+    },
+    {
+      "epoch": 7.790697674418604,
+      "grad_norm": 33.89091491699219,
+      "learning_rate": 4.909560723514212e-06,
+      "loss": 0.1759,
+      "step": 670
+    },
+    {
+      "epoch": 7.906976744186046,
+      "grad_norm": 50.72035217285156,
+      "learning_rate": 4.651162790697675e-06,
+      "loss": 0.1852,
+      "step": 680
+    },
+    {
+      "epoch": 8.023255813953488,
+      "grad_norm": 15.040735244750977,
+      "learning_rate": 4.3927648578811375e-06,
+      "loss": 0.1454,
+      "step": 690
+    },
+    {
+      "epoch": 8.13953488372093,
+      "grad_norm": 45.22159194946289,
+      "learning_rate": 4.1343669250646e-06,
+      "loss": 0.2117,
+      "step": 700
+    },
+    {
+      "epoch": 8.255813953488373,
+      "grad_norm": 18.63289451599121,
+      "learning_rate": 3.875968992248063e-06,
+      "loss": 0.1718,
+      "step": 710
+    },
+    {
+      "epoch": 8.372093023255815,
+      "grad_norm": 16.854759216308594,
+      "learning_rate": 3.617571059431525e-06,
+      "loss": 0.176,
+      "step": 720
+    },
+    {
+      "epoch": 8.488372093023255,
+      "grad_norm": 49.81332015991211,
+      "learning_rate": 3.3591731266149875e-06,
+      "loss": 0.1962,
+      "step": 730
+    },
+    {
+      "epoch": 8.604651162790697,
+      "grad_norm": 31.597118377685547,
+      "learning_rate": 3.10077519379845e-06,
+      "loss": 0.1685,
+      "step": 740
+    },
+    {
+      "epoch": 8.720930232558139,
+      "grad_norm": 45.57378387451172,
+      "learning_rate": 2.842377260981912e-06,
+      "loss": 0.1693,
+      "step": 750
+    },
+    {
+      "epoch": 8.837209302325581,
+      "grad_norm": 33.74851989746094,
+      "learning_rate": 2.583979328165375e-06,
+      "loss": 0.161,
+      "step": 760
+    },
+    {
+      "epoch": 8.953488372093023,
+      "grad_norm": 32.361839294433594,
+      "learning_rate": 2.3255813953488376e-06,
+      "loss": 0.1661,
+      "step": 770
+    },
+    {
+      "epoch": 9.069767441860465,
+      "grad_norm": 25.80358123779297,
+      "learning_rate": 2.0671834625323e-06,
+      "loss": 0.1393,
+      "step": 780
+    },
+    {
+      "epoch": 9.186046511627907,
+      "grad_norm": 37.717708587646484,
+      "learning_rate": 1.8087855297157624e-06,
+      "loss": 0.1522,
+      "step": 790
+    },
+    {
+      "epoch": 9.30232558139535,
+      "grad_norm": 36.026527404785156,
+      "learning_rate": 1.550387596899225e-06,
+      "loss": 0.1683,
+      "step": 800
+    },
+    {
+      "epoch": 9.418604651162791,
+      "grad_norm": 41.67938232421875,
+      "learning_rate": 1.2919896640826874e-06,
+      "loss": 0.146,
+      "step": 810
+    },
+    {
+      "epoch": 9.534883720930232,
+      "grad_norm": 26.428848266601562,
+      "learning_rate": 1.03359173126615e-06,
+      "loss": 0.1653,
+      "step": 820
+    },
+    {
+      "epoch": 9.651162790697674,
+      "grad_norm": 20.74589729309082,
+      "learning_rate": 7.751937984496125e-07,
+      "loss": 0.1543,
+      "step": 830
+    },
+    {
+      "epoch": 9.767441860465116,
+      "grad_norm": 25.37066078186035,
+      "learning_rate": 5.16795865633075e-07,
+      "loss": 0.1644,
+      "step": 840
+    },
+    {
+      "epoch": 9.883720930232558,
+      "grad_norm": 56.05203628540039,
+      "learning_rate": 2.583979328165375e-07,
+      "loss": 0.1921,
+      "step": 850
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.19254037737846375,
+      "learning_rate": 0.0,
+      "loss": 0.1691,
+      "step": 860
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 860,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 2500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28dccd6852282dbe630c1844fcc84e1777fd602b981895a0af7dbc24b4de493a
+size 5841

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff