Training in progress, epoch 0, checkpoint

Browse files

Files changed (11) hide show

last-checkpoint/default/head_config.json +19 -0
last-checkpoint/default/pytorch_model_head.bin +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/tapt_unipelt/adapter_config.json +76 -0
last-checkpoint/tapt_unipelt/head_config.json +19 -0
last-checkpoint/tapt_unipelt/pytorch_adapter.bin +3 -0
last-checkpoint/tapt_unipelt/pytorch_model_head.bin +3 -0
last-checkpoint/trainer_state.json +35 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/default/head_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "config": {
+    "activation_function": "gelu",
+    "bias": true,
+    "embedding_size": 768,
+    "head_type": "masked_lm",
+    "label2id": null,
+    "layer_norm": true,
+    "layers": 2,
+    "shift_labels": false,
+    "vocab_size": 50265
+  },
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "default",
+  "version": "0.1.2"
+}

last-checkpoint/default/pytorch_model_head.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdff7928366b173ef2d74fcdf04581a7c98c13c3bd11623203f922ffcc7248c5
+size 156986358

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a46534c06e096874f8b363e21727329e759dd5a284a12b8ca6b4a90aacf4531
+size 93571514

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba062fb616648fe95a7387c720bd7229051939a15b5efb41d6bab65bc4443944
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42cfbbb71aacede8d916cd2c75ee19ec5e636b312ce77040498932f13f3d26d9
+size 1064

last-checkpoint/tapt_unipelt/adapter_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "config": {
+    "architecture": "union",
+    "configs": [
+      {
+        "architecture": "prefix_tuning",
+        "bottleneck_size": 512,
+        "cross_prefix": true,
+        "dropout": 0.0,
+        "encoder_prefix": true,
+        "flat": false,
+        "leave_out": [],
+        "non_linearity": "tanh",
+        "prefix_length": 10,
+        "shared_gating": true,
+        "use_gating": true
+      },
+      {
+        "adapter_residual_before_ln": false,
+        "cross_adapter": false,
+        "factorized_phm_W": true,
+        "factorized_phm_rule": false,
+        "hypercomplex_nonlinearity": "glorot-uniform",
+        "init_weights": "bert",
+        "inv_adapter": null,
+        "inv_adapter_reduction_factor": null,
+        "is_parallel": false,
+        "learn_phm": true,
+        "leave_out": [],
+        "ln_after": false,
+        "ln_before": false,
+        "mh_adapter": false,
+        "non_linearity": "relu",
+        "original_ln_after": true,
+        "original_ln_before": true,
+        "output_adapter": true,
+        "phm_bias": true,
+        "phm_c_init": "normal",
+        "phm_dim": 4,
+        "phm_init_range": 0.0001,
+        "phm_layer": false,
+        "phm_rank": 1,
+        "reduction_factor": 16,
+        "residual_before_ln": true,
+        "scaling": 1.0,
+        "shared_W_phm": false,
+        "shared_phm_rule": true,
+        "use_gating": true
+      },
+      {
+        "alpha": 8,
+        "architecture": "lora",
+        "attn_matrices": [
+          "q",
+          "v"
+        ],
+        "composition_mode": "add",
+        "dropout": 0.0,
+        "init_weights": "lora",
+        "intermediate_lora": false,
+        "leave_out": [],
+        "output_lora": false,
+        "r": 8,
+        "selfattn_lora": true,
+        "use_gating": true
+      }
+    ]
+  },
+  "config_id": "7ee253f8cb7be91f",
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "tapt_unipelt",
+  "version": "0.1.2"
+}

last-checkpoint/tapt_unipelt/head_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "config": {
+    "activation_function": "gelu",
+    "bias": true,
+    "embedding_size": 768,
+    "head_type": "masked_lm",
+    "label2id": null,
+    "layer_norm": true,
+    "layers": 2,
+    "shift_labels": false,
+    "vocab_size": 50265
+  },
+  "hidden_size": 768,
+  "model_class": "RobertaAdapterModel",
+  "model_name": "roberta-base",
+  "model_type": "roberta",
+  "name": "tapt_unipelt",
+  "version": "0.1.2"
+}

last-checkpoint/tapt_unipelt/pytorch_adapter.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:584c2f7c9333fc4cf4afe6763b017083f7a4e52495f4877babd8553dadbce535
+size 44418864

last-checkpoint/tapt_unipelt/pytorch_model_head.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0768c9d31497289722df2637a035da1d47556c67bd5fa1be54e9e12718fc7833
+size 156986358

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 2.5050151348114014,
+  "best_model_checkpoint": "tapt_helpfulness_unipelt_pretraining_model/checkpoint-232",
+  "epoch": 0.9978494623655914,
+  "eval_steps": 500,
+  "global_step": 232,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 4.7846,
+      "step": 232
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.5050151348114014,
+      "eval_runtime": 27.8974,
+      "eval_samples_per_second": 70.974,
+      "eval_steps_per_second": 3.405,
+      "step": 232
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 23200,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 500,
+  "total_flos": 2936712191373312.0,
+  "train_batch_size": 21,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b04ee6a722e0a2aa2bcbdc779e700fcf9833b5a2a4670f81b1cb465375f90f3
+size 4728