Upload InkubaLM multi-task with adapters
Browse files- config.json +53 -6
- machine-translation/adapter_config.json +3 -3
- machine-translation/head_config.json +3 -3
- machine-translation/pytorch_adapter.bin +2 -2
- machine-translation/pytorch_model_head.bin +2 -2
- model.safetensors +2 -2
- sentiment-analysis/adapter_config.json +2 -2
- sentiment-analysis/head_config.json +2 -2
- sentiment-analysis/pytorch_adapter.bin +2 -2
- sentiment-analysis/pytorch_model_head.bin +2 -2
- xnli/adapter_config.json +2 -2
- xnli/head_config.json +2 -2
- xnli/pytorch_adapter.bin +2 -2
- xnli/pytorch_model_head.bin +2 -2
config.json
CHANGED
|
@@ -1,12 +1,47 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "YvanCarre/InkubaLM-
|
| 3 |
"adapters": {
|
| 4 |
"adapters": {
|
| 5 |
-
"machine-translation": "
|
| 6 |
"sentiment-analysis": "seq_bn",
|
| 7 |
"xnli": "seq_bn"
|
| 8 |
},
|
| 9 |
-
"config_map": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"fusion_config_map": {},
|
| 11 |
"fusion_name_map": {},
|
| 12 |
"fusions": {}
|
|
@@ -23,7 +58,7 @@
|
|
| 23 |
"eos_token_id": 2,
|
| 24 |
"head_dim": 64,
|
| 25 |
"hidden_act": "silu",
|
| 26 |
-
"hidden_size":
|
| 27 |
"id2label": {
|
| 28 |
"0": "LABEL_0",
|
| 29 |
"1": "LABEL_1",
|
|
@@ -40,14 +75,26 @@
|
|
| 40 |
"mlp_bias": false,
|
| 41 |
"model_type": "llama",
|
| 42 |
"num_attention_heads": 32,
|
| 43 |
-
"num_hidden_layers":
|
| 44 |
"num_key_value_heads": 32,
|
| 45 |
"prediction_heads": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
"machine-translation": {
|
| 47 |
"activation_function": "gelu",
|
| 48 |
"bias": true,
|
| 49 |
"dropout_prob": null,
|
| 50 |
-
"embedding_size":
|
| 51 |
"head_type": "causal_lm",
|
| 52 |
"label2id": null,
|
| 53 |
"layer_norm": true,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "YvanCarre/InkubaLM-multitask",
|
| 3 |
"adapters": {
|
| 4 |
"adapters": {
|
| 5 |
+
"machine-translation": "26cd1b10db746518",
|
| 6 |
"sentiment-analysis": "seq_bn",
|
| 7 |
"xnli": "seq_bn"
|
| 8 |
},
|
| 9 |
+
"config_map": {
|
| 10 |
+
"26cd1b10db746518": {
|
| 11 |
+
"adapter_residual_before_ln": false,
|
| 12 |
+
"cross_adapter": false,
|
| 13 |
+
"dropout": 0.0,
|
| 14 |
+
"factorized_phm_W": true,
|
| 15 |
+
"factorized_phm_rule": false,
|
| 16 |
+
"hypercomplex_nonlinearity": "glorot-uniform",
|
| 17 |
+
"init_weights": "bert",
|
| 18 |
+
"inv_adapter": null,
|
| 19 |
+
"inv_adapter_reduction_factor": null,
|
| 20 |
+
"is_parallel": false,
|
| 21 |
+
"learn_phm": true,
|
| 22 |
+
"leave_out": [],
|
| 23 |
+
"ln_after": false,
|
| 24 |
+
"ln_before": false,
|
| 25 |
+
"mh_adapter": false,
|
| 26 |
+
"non_linearity": "relu",
|
| 27 |
+
"original_ln_after": true,
|
| 28 |
+
"original_ln_before": true,
|
| 29 |
+
"output_adapter": true,
|
| 30 |
+
"phm_bias": true,
|
| 31 |
+
"phm_c_init": "normal",
|
| 32 |
+
"phm_dim": 4,
|
| 33 |
+
"phm_init_range": 0.0001,
|
| 34 |
+
"phm_layer": false,
|
| 35 |
+
"phm_rank": 1,
|
| 36 |
+
"reduction_factor": 8,
|
| 37 |
+
"residual_before_ln": true,
|
| 38 |
+
"scaling": 1.0,
|
| 39 |
+
"shared_W_phm": false,
|
| 40 |
+
"shared_phm_rule": true,
|
| 41 |
+
"stochastic_depth": 0.0,
|
| 42 |
+
"use_gating": false
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
"fusion_config_map": {},
|
| 46 |
"fusion_name_map": {},
|
| 47 |
"fusions": {}
|
|
|
|
| 58 |
"eos_token_id": 2,
|
| 59 |
"head_dim": 64,
|
| 60 |
"hidden_act": "silu",
|
| 61 |
+
"hidden_size": 2048,
|
| 62 |
"id2label": {
|
| 63 |
"0": "LABEL_0",
|
| 64 |
"1": "LABEL_1",
|
|
|
|
| 75 |
"mlp_bias": false,
|
| 76 |
"model_type": "llama",
|
| 77 |
"num_attention_heads": 32,
|
| 78 |
+
"num_hidden_layers": 8,
|
| 79 |
"num_key_value_heads": 32,
|
| 80 |
"prediction_heads": {
|
| 81 |
+
"default": {
|
| 82 |
+
"activation_function": null,
|
| 83 |
+
"bias": false,
|
| 84 |
+
"dropout_prob": null,
|
| 85 |
+
"embedding_size": 2048,
|
| 86 |
+
"head_type": "causal_lm",
|
| 87 |
+
"label2id": null,
|
| 88 |
+
"layer_norm": false,
|
| 89 |
+
"layers": 1,
|
| 90 |
+
"shift_labels": true,
|
| 91 |
+
"vocab_size": 61788
|
| 92 |
+
},
|
| 93 |
"machine-translation": {
|
| 94 |
"activation_function": "gelu",
|
| 95 |
"bias": true,
|
| 96 |
"dropout_prob": null,
|
| 97 |
+
"embedding_size": 2048,
|
| 98 |
"head_type": "causal_lm",
|
| 99 |
"label2id": null,
|
| 100 |
"layer_norm": true,
|
machine-translation/adapter_config.json
CHANGED
|
@@ -25,7 +25,7 @@
|
|
| 25 |
"phm_init_range": 0.0001,
|
| 26 |
"phm_layer": false,
|
| 27 |
"phm_rank": 1,
|
| 28 |
-
"reduction_factor":
|
| 29 |
"residual_before_ln": true,
|
| 30 |
"scaling": 1.0,
|
| 31 |
"shared_W_phm": false,
|
|
@@ -33,9 +33,9 @@
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
-
"hidden_size":
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "machine-translation",
|
| 41 |
"version": "adapters.1.1.0"
|
|
|
|
| 25 |
"phm_init_range": 0.0001,
|
| 26 |
"phm_layer": false,
|
| 27 |
"phm_rank": 1,
|
| 28 |
+
"reduction_factor": 8,
|
| 29 |
"residual_before_ln": true,
|
| 30 |
"scaling": 1.0,
|
| 31 |
"shared_W_phm": false,
|
|
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
+
"hidden_size": 2048,
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "machine-translation",
|
| 41 |
"version": "adapters.1.1.0"
|
machine-translation/head_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"activation_function": "gelu",
|
| 4 |
"bias": true,
|
| 5 |
"dropout_prob": null,
|
| 6 |
-
"embedding_size":
|
| 7 |
"head_type": "causal_lm",
|
| 8 |
"label2id": null,
|
| 9 |
"layer_norm": true,
|
|
@@ -11,9 +11,9 @@
|
|
| 11 |
"shift_labels": true,
|
| 12 |
"vocab_size": 61788
|
| 13 |
},
|
| 14 |
-
"hidden_size":
|
| 15 |
"model_class": "LlamaAdapterModel",
|
| 16 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 17 |
"model_type": "llama",
|
| 18 |
"name": "machine-translation",
|
| 19 |
"version": "adapters.1.1.0"
|
|
|
|
| 3 |
"activation_function": "gelu",
|
| 4 |
"bias": true,
|
| 5 |
"dropout_prob": null,
|
| 6 |
+
"embedding_size": 2048,
|
| 7 |
"head_type": "causal_lm",
|
| 8 |
"label2id": null,
|
| 9 |
"layer_norm": true,
|
|
|
|
| 11 |
"shift_labels": true,
|
| 12 |
"vocab_size": 61788
|
| 13 |
},
|
| 14 |
+
"hidden_size": 2048,
|
| 15 |
"model_class": "LlamaAdapterModel",
|
| 16 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 17 |
"model_type": "llama",
|
| 18 |
"name": "machine-translation",
|
| 19 |
"version": "adapters.1.1.0"
|
machine-translation/pytorch_adapter.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b6b002f72b489ffaa766b7d366b7cac13ad7d8eea08a8e4230cb4bd0fb6a63a
|
| 3 |
+
size 4426802
|
machine-translation/pytorch_model_head.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be2ded33057399a641e72ed20cc47ff4c55c552069fd468d3ff3549568045d3f
|
| 3 |
+
size 67587010
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9e184725fc66b99e41c839b454048e486b282e28c731e6c06c097d9c29574ac
|
| 3 |
+
size 611402780
|
sentiment-analysis/adapter_config.json
CHANGED
|
@@ -33,9 +33,9 @@
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
-
"hidden_size":
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "sentiment-analysis",
|
| 41 |
"version": "adapters.1.1.0"
|
|
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
+
"hidden_size": 2048,
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "sentiment-analysis",
|
| 41 |
"version": "adapters.1.1.0"
|
sentiment-analysis/head_config.json
CHANGED
|
@@ -13,9 +13,9 @@
|
|
| 13 |
"num_labels": 3,
|
| 14 |
"use_pooler": false
|
| 15 |
},
|
| 16 |
-
"hidden_size":
|
| 17 |
"model_class": "LlamaAdapterModel",
|
| 18 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 19 |
"model_type": "llama",
|
| 20 |
"name": "sentiment-analysis",
|
| 21 |
"version": "adapters.1.1.0"
|
|
|
|
| 13 |
"num_labels": 3,
|
| 14 |
"use_pooler": false
|
| 15 |
},
|
| 16 |
+
"hidden_size": 2048,
|
| 17 |
"model_class": "LlamaAdapterModel",
|
| 18 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 19 |
"model_type": "llama",
|
| 20 |
"name": "sentiment-analysis",
|
| 21 |
"version": "adapters.1.1.0"
|
sentiment-analysis/pytorch_adapter.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd8ca904c0c7788981e1d3198d806cfd69cd2996ca1c2f64908baefd185a3d0a
|
| 3 |
+
size 2260914
|
sentiment-analysis/pytorch_model_head.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b252c53e95cd4e19f3b9e5563c2829ebbb3ba9fba76cafa05902c3298c380e64
|
| 3 |
+
size 2178738
|
xnli/adapter_config.json
CHANGED
|
@@ -33,9 +33,9 @@
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
-
"hidden_size":
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "xnli",
|
| 41 |
"version": "adapters.1.1.0"
|
|
|
|
| 33 |
"stochastic_depth": 0.0,
|
| 34 |
"use_gating": false
|
| 35 |
},
|
| 36 |
+
"hidden_size": 2048,
|
| 37 |
"model_class": "LlamaAdapterModel",
|
| 38 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 39 |
"model_type": "llama",
|
| 40 |
"name": "xnli",
|
| 41 |
"version": "adapters.1.1.0"
|
xnli/head_config.json
CHANGED
|
@@ -13,9 +13,9 @@
|
|
| 13 |
"num_labels": 3,
|
| 14 |
"use_pooler": false
|
| 15 |
},
|
| 16 |
-
"hidden_size":
|
| 17 |
"model_class": "LlamaAdapterModel",
|
| 18 |
-
"model_name": "YvanCarre/InkubaLM-
|
| 19 |
"model_type": "llama",
|
| 20 |
"name": "xnli",
|
| 21 |
"version": "adapters.1.1.0"
|
|
|
|
| 13 |
"num_labels": 3,
|
| 14 |
"use_pooler": false
|
| 15 |
},
|
| 16 |
+
"hidden_size": 2048,
|
| 17 |
"model_class": "LlamaAdapterModel",
|
| 18 |
+
"model_name": "YvanCarre/InkubaLM-multitask",
|
| 19 |
"model_type": "llama",
|
| 20 |
"name": "xnli",
|
| 21 |
"version": "adapters.1.1.0"
|
xnli/pytorch_adapter.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6443b3b56f21faf143eb05f15ddd1e004f147a3a8e772e377c6c653b67f5e11
|
| 3 |
+
size 2259378
|
xnli/pytorch_model_head.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:752e8b4111a708be5fcaff8d66cb16cbf8ba968023f1b4b170cf55f8af8da371
|
| 3 |
+
size 2184971
|