YvanCarre commited on
Commit
a56848a
·
verified ·
1 Parent(s): 030a1bb

Upload InkubaLM multi-task with adapters

Browse files
config.json CHANGED
@@ -1,12 +1,47 @@
1
  {
2
- "_name_or_path": "YvanCarre/InkubaLM-multitask_distill",
3
  "adapters": {
4
  "adapters": {
5
- "machine-translation": "seq_bn",
6
  "sentiment-analysis": "seq_bn",
7
  "xnli": "seq_bn"
8
  },
9
- "config_map": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "fusion_config_map": {},
11
  "fusion_name_map": {},
12
  "fusions": {}
@@ -23,7 +58,7 @@
23
  "eos_token_id": 2,
24
  "head_dim": 64,
25
  "hidden_act": "silu",
26
- "hidden_size": 1024,
27
  "id2label": {
28
  "0": "LABEL_0",
29
  "1": "LABEL_1",
@@ -40,14 +75,26 @@
40
  "mlp_bias": false,
41
  "model_type": "llama",
42
  "num_attention_heads": 32,
43
- "num_hidden_layers": 6,
44
  "num_key_value_heads": 32,
45
  "prediction_heads": {
 
 
 
 
 
 
 
 
 
 
 
 
46
  "machine-translation": {
47
  "activation_function": "gelu",
48
  "bias": true,
49
  "dropout_prob": null,
50
- "embedding_size": 1024,
51
  "head_type": "causal_lm",
52
  "label2id": null,
53
  "layer_norm": true,
 
1
  {
2
+ "_name_or_path": "YvanCarre/InkubaLM-multitask",
3
  "adapters": {
4
  "adapters": {
5
+ "machine-translation": "26cd1b10db746518",
6
  "sentiment-analysis": "seq_bn",
7
  "xnli": "seq_bn"
8
  },
9
+ "config_map": {
10
+ "26cd1b10db746518": {
11
+ "adapter_residual_before_ln": false,
12
+ "cross_adapter": false,
13
+ "dropout": 0.0,
14
+ "factorized_phm_W": true,
15
+ "factorized_phm_rule": false,
16
+ "hypercomplex_nonlinearity": "glorot-uniform",
17
+ "init_weights": "bert",
18
+ "inv_adapter": null,
19
+ "inv_adapter_reduction_factor": null,
20
+ "is_parallel": false,
21
+ "learn_phm": true,
22
+ "leave_out": [],
23
+ "ln_after": false,
24
+ "ln_before": false,
25
+ "mh_adapter": false,
26
+ "non_linearity": "relu",
27
+ "original_ln_after": true,
28
+ "original_ln_before": true,
29
+ "output_adapter": true,
30
+ "phm_bias": true,
31
+ "phm_c_init": "normal",
32
+ "phm_dim": 4,
33
+ "phm_init_range": 0.0001,
34
+ "phm_layer": false,
35
+ "phm_rank": 1,
36
+ "reduction_factor": 8,
37
+ "residual_before_ln": true,
38
+ "scaling": 1.0,
39
+ "shared_W_phm": false,
40
+ "shared_phm_rule": true,
41
+ "stochastic_depth": 0.0,
42
+ "use_gating": false
43
+ }
44
+ },
45
  "fusion_config_map": {},
46
  "fusion_name_map": {},
47
  "fusions": {}
 
58
  "eos_token_id": 2,
59
  "head_dim": 64,
60
  "hidden_act": "silu",
61
+ "hidden_size": 2048,
62
  "id2label": {
63
  "0": "LABEL_0",
64
  "1": "LABEL_1",
 
75
  "mlp_bias": false,
76
  "model_type": "llama",
77
  "num_attention_heads": 32,
78
+ "num_hidden_layers": 8,
79
  "num_key_value_heads": 32,
80
  "prediction_heads": {
81
+ "default": {
82
+ "activation_function": null,
83
+ "bias": false,
84
+ "dropout_prob": null,
85
+ "embedding_size": 2048,
86
+ "head_type": "causal_lm",
87
+ "label2id": null,
88
+ "layer_norm": false,
89
+ "layers": 1,
90
+ "shift_labels": true,
91
+ "vocab_size": 61788
92
+ },
93
  "machine-translation": {
94
  "activation_function": "gelu",
95
  "bias": true,
96
  "dropout_prob": null,
97
+ "embedding_size": 2048,
98
  "head_type": "causal_lm",
99
  "label2id": null,
100
  "layer_norm": true,
machine-translation/adapter_config.json CHANGED
@@ -25,7 +25,7 @@
25
  "phm_init_range": 0.0001,
26
  "phm_layer": false,
27
  "phm_rank": 1,
28
- "reduction_factor": 16,
29
  "residual_before_ln": true,
30
  "scaling": 1.0,
31
  "shared_W_phm": false,
@@ -33,9 +33,9 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
39
  "model_type": "llama",
40
  "name": "machine-translation",
41
  "version": "adapters.1.1.0"
 
25
  "phm_init_range": 0.0001,
26
  "phm_layer": false,
27
  "phm_rank": 1,
28
+ "reduction_factor": 8,
29
  "residual_before_ln": true,
30
  "scaling": 1.0,
31
  "shared_W_phm": false,
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
+ "model_name": "YvanCarre/InkubaLM-multitask",
39
  "model_type": "llama",
40
  "name": "machine-translation",
41
  "version": "adapters.1.1.0"
machine-translation/head_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "activation_function": "gelu",
4
  "bias": true,
5
  "dropout_prob": null,
6
- "embedding_size": 1024,
7
  "head_type": "causal_lm",
8
  "label2id": null,
9
  "layer_norm": true,
@@ -11,9 +11,9 @@
11
  "shift_labels": true,
12
  "vocab_size": 61788
13
  },
14
- "hidden_size": 1024,
15
  "model_class": "LlamaAdapterModel",
16
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
17
  "model_type": "llama",
18
  "name": "machine-translation",
19
  "version": "adapters.1.1.0"
 
3
  "activation_function": "gelu",
4
  "bias": true,
5
  "dropout_prob": null,
6
+ "embedding_size": 2048,
7
  "head_type": "causal_lm",
8
  "label2id": null,
9
  "layer_norm": true,
 
11
  "shift_labels": true,
12
  "vocab_size": 61788
13
  },
14
+ "hidden_size": 2048,
15
  "model_class": "LlamaAdapterModel",
16
+ "model_name": "YvanCarre/InkubaLM-multitask",
17
  "model_type": "llama",
18
  "name": "machine-translation",
19
  "version": "adapters.1.1.0"
machine-translation/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bd574ea0d6427a0a3a53c284c8901a73ee0c1cdc54e8bb1cab44955d46650b6
3
- size 465434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6b002f72b489ffaa766b7d366b7cac13ad7d8eea08a8e4230cb4bd0fb6a63a
3
+ size 4426802
machine-translation/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41f94019959f76ba5744845bf97ea1f47a9f49a183347935c5baf8f2359c5ebd
3
- size 33318594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be2ded33057399a641e72ed20cc47ff4c55c552069fd468d3ff3549568045d3f
3
+ size 67587010
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f95ff5dcae433d413f63f618f2493417de985555d6af8aeb13024f8df0d33ee
3
- size 241922568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e184725fc66b99e41c839b454048e486b282e28c731e6c06c097d9c29574ac
3
+ size 611402780
sentiment-analysis/adapter_config.json CHANGED
@@ -33,9 +33,9 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
39
  "model_type": "llama",
40
  "name": "sentiment-analysis",
41
  "version": "adapters.1.1.0"
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
+ "model_name": "YvanCarre/InkubaLM-multitask",
39
  "model_type": "llama",
40
  "name": "sentiment-analysis",
41
  "version": "adapters.1.1.0"
sentiment-analysis/head_config.json CHANGED
@@ -13,9 +13,9 @@
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
- "hidden_size": 1024,
17
  "model_class": "LlamaAdapterModel",
18
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
19
  "model_type": "llama",
20
  "name": "sentiment-analysis",
21
  "version": "adapters.1.1.0"
 
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
+ "hidden_size": 2048,
17
  "model_class": "LlamaAdapterModel",
18
+ "model_name": "YvanCarre/InkubaLM-multitask",
19
  "model_type": "llama",
20
  "name": "sentiment-analysis",
21
  "version": "adapters.1.1.0"
sentiment-analysis/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a3b3227ad33f4c11d8c41ebe166dc838000c4176f1201305c04db24b540c05
3
- size 465370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd8ca904c0c7788981e1d3198d806cfd69cd2996ca1c2f64908baefd185a3d0a
3
+ size 2260914
sentiment-analysis/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cb845315a8bf12b2997c7ff74ca84978fa9120302436eed20f9b4c610d75447
3
- size 552306
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b252c53e95cd4e19f3b9e5563c2829ebbb3ba9fba76cafa05902c3298c380e64
3
+ size 2178738
xnli/adapter_config.json CHANGED
@@ -33,9 +33,9 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
39
  "model_type": "llama",
40
  "name": "xnli",
41
  "version": "adapters.1.1.0"
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
+ "model_name": "YvanCarre/InkubaLM-multitask",
39
  "model_type": "llama",
40
  "name": "xnli",
41
  "version": "adapters.1.1.0"
xnli/head_config.json CHANGED
@@ -13,9 +13,9 @@
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
- "hidden_size": 1024,
17
  "model_class": "LlamaAdapterModel",
18
- "model_name": "YvanCarre/InkubaLM-multitask_distill",
19
  "model_type": "llama",
20
  "name": "xnli",
21
  "version": "adapters.1.1.0"
 
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
+ "hidden_size": 2048,
17
  "model_class": "LlamaAdapterModel",
18
+ "model_name": "YvanCarre/InkubaLM-multitask",
19
  "model_type": "llama",
20
  "name": "xnli",
21
  "version": "adapters.1.1.0"
xnli/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859b9e906ed47986007036046853c59173bd8913f1eeec326a6aa3ce39d1714e
3
- size 464218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6443b3b56f21faf143eb05f15ddd1e004f147a3a8e772e377c6c653b67f5e11
3
+ size 2259378
xnli/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71b9aaadc91a7329a94d861093ea1ac95e21b75f1397698cb389193ab5a4d8a7
3
- size 553995
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752e8b4111a708be5fcaff8d66cb16cbf8ba968023f1b4b170cf55f8af8da371
3
+ size 2184971