p1k0 commited on
Commit
6b907c2
·
verified ·
1 Parent(s): 5bb93c8

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. glue/mistral_weights_pt/pt_layer12_sst2/adapter_config.json +53 -0
  2. glue/mistral_weights_pt/pt_layer12_sst2/head_config.json +14 -0
  3. glue/mistral_weights_pt/pt_layer12_wnli/adapter_config.json +53 -0
  4. glue/mistral_weights_pt/pt_layer12_wnli/head_config.json +14 -0
  5. glue/mistral_weights_pt/pt_layer14_mnli/adapter_config.json +53 -0
  6. glue/mistral_weights_pt/pt_layer14_mnli/head_config.json +15 -0
  7. glue/mistral_weights_pt/pt_layer14_mrpc/adapter_config.json +53 -0
  8. glue/mistral_weights_pt/pt_layer14_mrpc/head_config.json +14 -0
  9. glue/mistral_weights_pt/pt_layer14_qnli/adapter_config.json +53 -0
  10. glue/mistral_weights_pt/pt_layer14_qnli/head_config.json +14 -0
  11. glue/mistral_weights_pt/pt_layer14_qqp/adapter_config.json +53 -0
  12. glue/mistral_weights_pt/pt_layer14_qqp/head_config.json +14 -0
  13. glue/mistral_weights_pt/pt_layer14_rte/adapter_config.json +53 -0
  14. glue/mistral_weights_pt/pt_layer14_rte/head_config.json +14 -0
  15. glue/mistral_weights_pt/pt_layer14_sst2/adapter_config.json +53 -0
  16. glue/mistral_weights_pt/pt_layer14_sst2/head_config.json +14 -0
  17. glue/mistral_weights_pt/pt_layer14_wnli/adapter_config.json +53 -0
  18. glue/mistral_weights_pt/pt_layer14_wnli/head_config.json +14 -0
  19. glue/mistral_weights_pt/pt_layer22_mnli/adapter_config.json +53 -0
  20. glue/mistral_weights_pt/pt_layer22_mnli/head_config.json +15 -0
  21. glue/mistral_weights_pt/pt_layer22_mrpc/adapter_config.json +53 -0
  22. glue/mistral_weights_pt/pt_layer22_mrpc/head_config.json +14 -0
  23. glue/mistral_weights_pt/pt_layer22_qnli/adapter_config.json +53 -0
  24. glue/mistral_weights_pt/pt_layer22_qnli/head_config.json +14 -0
  25. glue/mistral_weights_pt/pt_layer22_qqp/adapter_config.json +53 -0
  26. glue/mistral_weights_pt/pt_layer22_qqp/head_config.json +14 -0
  27. glue/mistral_weights_pt/pt_layer22_rte/adapter_config.json +53 -0
  28. glue/mistral_weights_pt/pt_layer22_rte/head_config.json +14 -0
  29. glue/mistral_weights_pt/pt_layer22_sst2/adapter_config.json +53 -0
  30. glue/mistral_weights_pt/pt_layer22_sst2/head_config.json +14 -0
  31. glue/mistral_weights_pt/pt_layer22_wnli/adapter_config.json +53 -0
  32. glue/mistral_weights_pt/pt_layer22_wnli/head_config.json +14 -0
  33. glue/mistral_weights_pt/pt_layer25_mnli/adapter_config.json +53 -0
  34. glue/mistral_weights_pt/pt_layer25_mnli/head_config.json +15 -0
  35. glue/mistral_weights_pt/pt_layer25_mrpc/adapter_config.json +53 -0
  36. glue/mistral_weights_pt/pt_layer25_mrpc/head_config.json +14 -0
  37. glue/mistral_weights_pt/pt_layer25_qnli/adapter_config.json +53 -0
  38. glue/mistral_weights_pt/pt_layer25_qnli/head_config.json +14 -0
  39. glue/mistral_weights_pt/pt_layer25_qqp/adapter_config.json +53 -0
  40. glue/mistral_weights_pt/pt_layer25_qqp/head_config.json +14 -0
  41. glue/mistral_weights_pt/pt_layer25_rte/adapter_config.json +53 -0
  42. glue/mistral_weights_pt/pt_layer25_rte/head_config.json +14 -0
  43. glue/mistral_weights_pt/pt_layer25_sst2/adapter_config.json +53 -0
  44. glue/mistral_weights_pt/pt_layer25_sst2/head_config.json +14 -0
  45. glue/mistral_weights_pt/pt_layer25_wnli/adapter_config.json +53 -0
  46. glue/mistral_weights_pt/pt_layer25_wnli/head_config.json +14 -0
  47. glue/mistral_weights_pt/pt_layer31_mnli/adapter_config.json +53 -0
  48. glue/mistral_weights_pt/pt_layer31_mnli/head_config.json +15 -0
  49. glue/mistral_weights_pt/pt_layer31_mrpc/adapter_config.json +53 -0
  50. glue/mistral_weights_pt/pt_layer31_mrpc/head_config.json +14 -0
glue/mistral_weights_pt/pt_layer12_sst2/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 13,
23
+ 14,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer12_sst2",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer12_sst2/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer12_wnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 13,
23
+ 14,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer12_wnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer12_wnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_mnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_mnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_mnli/head_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1,
7
+ "LABEL_2": 2
8
+ },
9
+ "model_class": "MistralForSequenceClassification",
10
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
11
+ "model_type": "mistral",
12
+ "name": null,
13
+ "num_labels": 3,
14
+ "version": "adapters.1.0.0"
15
+ }
glue/mistral_weights_pt/pt_layer14_mrpc/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_mrpc",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_mrpc/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_qnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_qnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_qnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_qqp/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_qqp",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_qqp/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_rte/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_rte",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_rte/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_sst2/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_sst2",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_sst2/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer14_wnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 15,
25
+ 16,
26
+ 17,
27
+ 18,
28
+ 19,
29
+ 20,
30
+ 21,
31
+ 22,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer14_wnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer14_wnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_mnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_mnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_mnli/head_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1,
7
+ "LABEL_2": 2
8
+ },
9
+ "model_class": "MistralForSequenceClassification",
10
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
11
+ "model_type": "mistral",
12
+ "name": null,
13
+ "num_labels": 3,
14
+ "version": "adapters.1.0.0"
15
+ }
glue/mistral_weights_pt/pt_layer22_mrpc/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_mrpc",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_mrpc/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_qnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_qnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_qnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_qqp/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_qqp",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_qqp/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_rte/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_rte",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_rte/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_sst2/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_sst2",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_sst2/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer22_wnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 23,
33
+ 24,
34
+ 25,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer22_wnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer22_wnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_mnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_mnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_mnli/head_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1,
7
+ "LABEL_2": 2
8
+ },
9
+ "model_class": "MistralForSequenceClassification",
10
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
11
+ "model_type": "mistral",
12
+ "name": null,
13
+ "num_labels": 3,
14
+ "version": "adapters.1.0.0"
15
+ }
glue/mistral_weights_pt/pt_layer25_mrpc/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_mrpc",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_mrpc/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_qnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_qnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_qnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_qqp/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_qqp",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_qqp/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_rte/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_rte",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_rte/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_sst2/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_sst2",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_sst2/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer25_wnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 26,
36
+ 27,
37
+ 28,
38
+ 29,
39
+ 30,
40
+ 31
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer25_wnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer25_wnli/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }
glue/mistral_weights_pt/pt_layer31_mnli/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 25,
36
+ 26,
37
+ 27,
38
+ 28,
39
+ 29,
40
+ 30
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer31_mnli",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer31_mnli/head_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1,
7
+ "LABEL_2": 2
8
+ },
9
+ "model_class": "MistralForSequenceClassification",
10
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
11
+ "model_type": "mistral",
12
+ "name": null,
13
+ "num_labels": 3,
14
+ "version": "adapters.1.0.0"
15
+ }
glue/mistral_weights_pt/pt_layer31_mrpc/adapter_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "prefix_tuning",
4
+ "bottleneck_size": 512,
5
+ "cross_prefix": true,
6
+ "dropout": 0.0,
7
+ "encoder_prefix": true,
8
+ "flat": false,
9
+ "leave_out": [
10
+ 0,
11
+ 1,
12
+ 2,
13
+ 3,
14
+ 4,
15
+ 5,
16
+ 6,
17
+ 7,
18
+ 8,
19
+ 9,
20
+ 10,
21
+ 11,
22
+ 12,
23
+ 13,
24
+ 14,
25
+ 15,
26
+ 16,
27
+ 17,
28
+ 18,
29
+ 19,
30
+ 20,
31
+ 21,
32
+ 22,
33
+ 23,
34
+ 24,
35
+ 25,
36
+ 26,
37
+ 27,
38
+ 28,
39
+ 29,
40
+ 30
41
+ ],
42
+ "non_linearity": "tanh",
43
+ "prefix_length": 30,
44
+ "shared_gating": true,
45
+ "use_gating": false
46
+ },
47
+ "hidden_size": 4096,
48
+ "model_class": "MistralForSequenceClassification",
49
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
50
+ "model_type": "mistral",
51
+ "name": "pt_layer31_mrpc",
52
+ "version": "adapters.1.0.0"
53
+ }
glue/mistral_weights_pt/pt_layer31_mrpc/head_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 4096,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1
7
+ },
8
+ "model_class": "MistralForSequenceClassification",
9
+ "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b",
10
+ "model_type": "mistral",
11
+ "name": null,
12
+ "num_labels": 2,
13
+ "version": "adapters.1.0.0"
14
+ }