tapt_helpfulness_unipelt_pretraining_model

Browse files

Files changed (8) hide show

README.md +103 -0
default/head_config.json +19 -0
default/pytorch_model_head.bin +3 -0
runs/Apr16_20-02-09_4d4f64e167bc/events.out.tfevents.1713297751.4d4f64e167bc.3991.0 +2 -2
tapt_unipelt/adapter_config.json +76 -0
tapt_unipelt/head_config.json +19 -0
tapt_unipelt/pytorch_adapter.bin +3 -0
tapt_unipelt/pytorch_model_head.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+---
+license: mit
+base_model: roberta-base
+tags:
+- generated_from_trainer
+model-index:
+- name: tapt_helpfulness_unipelt_pretraining_model
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# tapt_helpfulness_unipelt_pretraining_model
+This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.5289
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 21
+- eval_batch_size: 21
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 42
+- optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-06
+- lr_scheduler_type: linear
+- num_epochs: 100
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss |
+|:-------------:|:-----:|:-----:|:---------------:|
+| 4.9049        | 1.0   | 232   | 2.5347          |
+| 2.4175        | 2.0   | 465   | 2.1097          |
+| 2.1517        | 3.0   | 697   | 1.9547          |
+| 2.03          | 4.0   | 930   | 1.8601          |
+| 1.9577        | 5.0   | 1162  | 1.8151          |
+| 1.8962        | 6.0   | 1395  | 1.7581          |
+| 1.8651        | 7.0   | 1627  | 1.7348          |
+| 1.8269        | 8.0   | 1860  | 1.7102          |
+| 1.8062        | 9.0   | 2092  | 1.6833          |
+| 1.7794        | 10.0  | 2325  | 1.6744          |
+| 1.7685        | 11.0  | 2557  | 1.6544          |
+| 1.7498        | 12.0  | 2790  | 1.6456          |
+| 1.7364        | 13.0  | 3022  | 1.6298          |
+| 1.7226        | 14.0  | 3255  | 1.6205          |
+| 1.7154        | 15.0  | 3487  | 1.6238          |
+| 1.6913        | 16.0  | 3720  | 1.6027          |
+| 1.6994        | 17.0  | 3952  | 1.5928          |
+| 1.6795        | 18.0  | 4185  | 1.5829          |
+| 1.6809        | 19.0  | 4417  | 1.6010          |
+| 1.6669        | 20.0  | 4650  | 1.5794          |
+| 1.6678        | 21.0  | 4882  | 1.5784          |
+| 1.6533        | 22.0  | 5115  | 1.5644          |
+| 1.6547        | 23.0  | 5347  | 1.5838          |
+| 1.6509        | 24.0  | 5580  | 1.5650          |
+| 1.6479        | 25.0  | 5812  | 1.5533          |
+| 1.6309        | 26.0  | 6045  | 1.5508          |
+| 1.6382        | 27.0  | 6277  | 1.5530          |
+| 1.6218        | 28.0  | 6510  | 1.5632          |
+| 1.6242        | 29.0  | 6742  | 1.5617          |
+| 1.6273        | 30.0  | 6975  | 1.5395          |
+| 1.6205        | 31.0  | 7207  | 1.5427          |
+| 1.612         | 32.0  | 7440  | 1.5402          |
+| 1.6151        | 33.0  | 7672  | 1.5307          |
+| 1.6088        | 34.0  | 7905  | 1.5265          |
+| 1.6099        | 35.0  | 8137  | 1.5353          |
+| 1.6006        | 36.0  | 8370  | 1.5202          |
+| 1.6062        | 37.0  | 8602  | 1.5279          |
+| 1.5947        | 38.0  | 8835  | 1.5318          |
+| 1.6033        | 39.0  | 9067  | 1.5291          |
+| 1.5891        | 40.0  | 9300  | 1.5197          |
+| 1.5944        | 41.0  | 9532  | 1.5337          |
+| 1.5841        | 42.0  | 9765  | 1.5208          |
+| 1.592         | 43.0  | 9997  | 1.5223          |
+| 1.5862        | 44.0  | 10230 | 1.5343          |
+| 1.5895        | 45.0  | 10462 | 1.5289          |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

default/head_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "config": {
+    "activation_function": "gelu",
+    "bias": true,
+    "embedding_size": 768,
+    "head_type": "masked_lm",
+    "label2id": null,
+    "layer_norm": true,
+    "layers": 2,
+    "shift_labels": false,
+    "vocab_size": 50265
+  },
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "default",
+  "version": "0.1.2"
+}

default/pytorch_model_head.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdff7928366b173ef2d74fcdf04581a7c98c13c3bd11623203f922ffcc7248c5
+size 156986358

runs/Apr16_20-02-09_4d4f64e167bc/events.out.tfevents.1713297751.4d4f64e167bc.3991.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da9f64886e3c90356b91dcdfef021eafb76a1d053b948536f41c17bd183dbc8d
-size 24323

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4354cbe0810179e77d85b180406f33f9955b664973cd8f20268f2e9b2ddd942
+size 24677

tapt_unipelt/adapter_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "config": {
+    "architecture": "union",
+    "configs": [
+      {
+        "architecture": "prefix_tuning",
+        "bottleneck_size": 512,
+        "cross_prefix": true,
+        "dropout": 0.0,
+        "encoder_prefix": true,
+        "flat": false,
+        "leave_out": [],
+        "non_linearity": "tanh",
+        "prefix_length": 10,
+        "shared_gating": true,
+        "use_gating": true
+      },
+      {
+        "adapter_residual_before_ln": false,
+        "cross_adapter": false,
+        "factorized_phm_W": true,
+        "factorized_phm_rule": false,
+        "hypercomplex_nonlinearity": "glorot-uniform",
+        "init_weights": "bert",
+        "inv_adapter": null,
+        "inv_adapter_reduction_factor": null,
+        "is_parallel": false,
+        "learn_phm": true,
+        "leave_out": [],
+        "ln_after": false,
+        "ln_before": false,
+        "mh_adapter": false,
+        "non_linearity": "relu",
+        "original_ln_after": true,
+        "original_ln_before": true,
+        "output_adapter": true,
+        "phm_bias": true,
+        "phm_c_init": "normal",
+        "phm_dim": 4,
+        "phm_init_range": 0.0001,
+        "phm_layer": false,
+        "phm_rank": 1,
+        "reduction_factor": 16,
+        "residual_before_ln": true,
+        "scaling": 1.0,
+        "shared_W_phm": false,
+        "shared_phm_rule": true,
+        "use_gating": true
+      },
+      {
+        "alpha": 8,
+        "architecture": "lora",
+        "attn_matrices": [
+          "q",
+          "v"
+        ],
+        "composition_mode": "add",
+        "dropout": 0.0,
+        "init_weights": "lora",
+        "intermediate_lora": false,
+        "leave_out": [],
+        "output_lora": false,
+        "r": 8,
+        "selfattn_lora": true,
+        "use_gating": true
+      }
+    ]
+  },
+  "config_id": "7ee253f8cb7be91f",
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "tapt_unipelt",
+  "version": "0.1.2"
+}

tapt_unipelt/head_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "config": {
+    "activation_function": "gelu",
+    "bias": true,
+    "embedding_size": 768,
+    "head_type": "masked_lm",
+    "label2id": null,
+    "layer_norm": true,
+    "layers": 2,
+    "shift_labels": false,
+    "vocab_size": 50265
+  },
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "tapt_unipelt",
+  "version": "0.1.2"
+}

tapt_unipelt/pytorch_adapter.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:476db16df6032275944dd8fa0ea4f902a161d05420c1ea2147d0221f98dee599
+size 44418864

tapt_unipelt/pytorch_model_head.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74f145696a67ce7ad871ded48701e331a45f2d75a59c3323a5f538db03b0e496
+size 156986358