Upload InkubaLM multi-task with adapters

Browse files

Files changed (14) hide show

config.json +53 -6
machine-translation/adapter_config.json +3 -3
machine-translation/head_config.json +3 -3
machine-translation/pytorch_adapter.bin +2 -2
machine-translation/pytorch_model_head.bin +2 -2
model.safetensors +2 -2
sentiment-analysis/adapter_config.json +2 -2
sentiment-analysis/head_config.json +2 -2
sentiment-analysis/pytorch_adapter.bin +2 -2
sentiment-analysis/pytorch_model_head.bin +2 -2
xnli/adapter_config.json +2 -2
xnli/head_config.json +2 -2
xnli/pytorch_adapter.bin +2 -2
xnli/pytorch_model_head.bin +2 -2

config.json CHANGED Viewed

@@ -1,12 +1,47 @@
 {
-  "_name_or_path": "YvanCarre/InkubaLM-multitask_distill",
   "adapters": {
     "adapters": {
-      "machine-translation": "seq_bn",
       "sentiment-analysis": "seq_bn",
       "xnli": "seq_bn"
     },
-    "config_map": {},
     "fusion_config_map": {},
     "fusion_name_map": {},
     "fusions": {}
@@ -23,7 +58,7 @@
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
-  "hidden_size": 1024,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
@@ -40,14 +75,26 @@
   "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 32,
-  "num_hidden_layers": 6,
   "num_key_value_heads": 32,
   "prediction_heads": {
     "machine-translation": {
       "activation_function": "gelu",
       "bias": true,
       "dropout_prob": null,
-      "embedding_size": 1024,
       "head_type": "causal_lm",
       "label2id": null,
       "layer_norm": true,

 {
+  "_name_or_path": "YvanCarre/InkubaLM-multitask",
   "adapters": {
     "adapters": {
+      "machine-translation": "26cd1b10db746518",
       "sentiment-analysis": "seq_bn",
       "xnli": "seq_bn"
     },
+    "config_map": {
+      "26cd1b10db746518": {
+        "adapter_residual_before_ln": false,
+        "cross_adapter": false,
+        "dropout": 0.0,
+        "factorized_phm_W": true,
+        "factorized_phm_rule": false,
+        "hypercomplex_nonlinearity": "glorot-uniform",
+        "init_weights": "bert",
+        "inv_adapter": null,
+        "inv_adapter_reduction_factor": null,
+        "is_parallel": false,
+        "learn_phm": true,
+        "leave_out": [],
+        "ln_after": false,
+        "ln_before": false,
+        "mh_adapter": false,
+        "non_linearity": "relu",
+        "original_ln_after": true,
+        "original_ln_before": true,
+        "output_adapter": true,
+        "phm_bias": true,
+        "phm_c_init": "normal",
+        "phm_dim": 4,
+        "phm_init_range": 0.0001,
+        "phm_layer": false,
+        "phm_rank": 1,
+        "reduction_factor": 8,
+        "residual_before_ln": true,
+        "scaling": 1.0,
+        "shared_W_phm": false,
+        "shared_phm_rule": true,
+        "stochastic_depth": 0.0,
+        "use_gating": false
+      }
+    },
     "fusion_config_map": {},
     "fusion_name_map": {},
     "fusions": {}
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 2048,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
   "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 32,
+  "num_hidden_layers": 8,
   "num_key_value_heads": 32,
   "prediction_heads": {
+    "default": {
+      "activation_function": null,
+      "bias": false,
+      "dropout_prob": null,
+      "embedding_size": 2048,
+      "head_type": "causal_lm",
+      "label2id": null,
+      "layer_norm": false,
+      "layers": 1,
+      "shift_labels": true,
+      "vocab_size": 61788
+    },
     "machine-translation": {
       "activation_function": "gelu",
       "bias": true,
       "dropout_prob": null,
+      "embedding_size": 2048,
       "head_type": "causal_lm",
       "label2id": null,
       "layer_norm": true,

machine-translation/adapter_config.json CHANGED Viewed

@@ -25,7 +25,7 @@
     "phm_init_range": 0.0001,
     "phm_layer": false,
     "phm_rank": 1,
-    "reduction_factor": 16,
     "residual_before_ln": true,
     "scaling": 1.0,
     "shared_W_phm": false,
@@ -33,9 +33,9 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "machine-translation",
   "version": "adapters.1.1.0"

     "phm_init_range": 0.0001,
     "phm_layer": false,
     "phm_rank": 1,
+    "reduction_factor": 8,
     "residual_before_ln": true,
     "scaling": 1.0,
     "shared_W_phm": false,
     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "machine-translation",
   "version": "adapters.1.1.0"

machine-translation/head_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "activation_function": "gelu",
     "bias": true,
     "dropout_prob": null,
-    "embedding_size": 1024,
     "head_type": "causal_lm",
     "label2id": null,
     "layer_norm": true,
@@ -11,9 +11,9 @@
     "shift_labels": true,
     "vocab_size": 61788
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "machine-translation",
   "version": "adapters.1.1.0"

     "activation_function": "gelu",
     "bias": true,
     "dropout_prob": null,
+    "embedding_size": 2048,
     "head_type": "causal_lm",
     "label2id": null,
     "layer_norm": true,
     "shift_labels": true,
     "vocab_size": 61788
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "machine-translation",
   "version": "adapters.1.1.0"

machine-translation/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bd574ea0d6427a0a3a53c284c8901a73ee0c1cdc54e8bb1cab44955d46650b6
-size 465434

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b6b002f72b489ffaa766b7d366b7cac13ad7d8eea08a8e4230cb4bd0fb6a63a
+size 4426802

machine-translation/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41f94019959f76ba5744845bf97ea1f47a9f49a183347935c5baf8f2359c5ebd
-size 33318594

 version https://git-lfs.github.com/spec/v1
+oid sha256:be2ded33057399a641e72ed20cc47ff4c55c552069fd468d3ff3549568045d3f
+size 67587010

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f95ff5dcae433d413f63f618f2493417de985555d6af8aeb13024f8df0d33ee
-size 241922568

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9e184725fc66b99e41c839b454048e486b282e28c731e6c06c097d9c29574ac
+size 611402780

sentiment-analysis/adapter_config.json CHANGED Viewed

@@ -33,9 +33,9 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "sentiment-analysis",
   "version": "adapters.1.1.0"

     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "sentiment-analysis",
   "version": "adapters.1.1.0"

sentiment-analysis/head_config.json CHANGED Viewed

@@ -13,9 +13,9 @@
     "num_labels": 3,
     "use_pooler": false
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "sentiment-analysis",
   "version": "adapters.1.1.0"

     "num_labels": 3,
     "use_pooler": false
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "sentiment-analysis",
   "version": "adapters.1.1.0"

sentiment-analysis/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77a3b3227ad33f4c11d8c41ebe166dc838000c4176f1201305c04db24b540c05
-size 465370

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd8ca904c0c7788981e1d3198d806cfd69cd2996ca1c2f64908baefd185a3d0a
+size 2260914

sentiment-analysis/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cb845315a8bf12b2997c7ff74ca84978fa9120302436eed20f9b4c610d75447
-size 552306

 version https://git-lfs.github.com/spec/v1
+oid sha256:b252c53e95cd4e19f3b9e5563c2829ebbb3ba9fba76cafa05902c3298c380e64
+size 2178738

xnli/adapter_config.json CHANGED Viewed

@@ -33,9 +33,9 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "xnli",
   "version": "adapters.1.1.0"

     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "xnli",
   "version": "adapters.1.1.0"

xnli/head_config.json CHANGED Viewed

@@ -13,9 +13,9 @@
     "num_labels": 3,
     "use_pooler": false
   },
-  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
-  "model_name": "YvanCarre/InkubaLM-multitask_distill",
   "model_type": "llama",
   "name": "xnli",
   "version": "adapters.1.1.0"

     "num_labels": 3,
     "use_pooler": false
   },
+  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
+  "model_name": "YvanCarre/InkubaLM-multitask",
   "model_type": "llama",
   "name": "xnli",
   "version": "adapters.1.1.0"

xnli/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:859b9e906ed47986007036046853c59173bd8913f1eeec326a6aa3ce39d1714e
-size 464218

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6443b3b56f21faf143eb05f15ddd1e004f147a3a8e772e377c6c653b67f5e11
+size 2259378

xnli/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71b9aaadc91a7329a94d861093ea1ac95e21b75f1397698cb389193ab5a4d8a7
-size 553995

 version https://git-lfs.github.com/spec/v1
+oid sha256:752e8b4111a708be5fcaff8d66cb16cbf8ba968023f1b4b170cf55f8af8da371
+size 2184971