diff --git a/glue/mistral_weights_pt/pt_layer12_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layer12_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aad2330620e5440da396b3cbcdb0c819d693a77c --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer12_sst2/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer12_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer12_sst2/head_config.json b/glue/mistral_weights_pt/pt_layer12_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer12_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer12_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer12_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97d96d7c4e3be9a27cb5a21ab9385c8e471827a5 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer12_wnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer12_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer12_wnli/head_config.json b/glue/mistral_weights_pt/pt_layer12_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer12_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_mnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_mnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0a33b80ceb33684ff8a2979940c4b65439c49586 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_mnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_mnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_mnli/head_config.json b/glue/mistral_weights_pt/pt_layer14_mnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e2a9627add563d87368dea14580ddd73b2a5c1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_mnli/head_config.json @@ -0,0 +1,15 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 3, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_mrpc/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_mrpc/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..82fb4e576fc1678dcc256a1b4b239737fa6d689f --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_mrpc/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_mrpc/head_config.json b/glue/mistral_weights_pt/pt_layer14_mrpc/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_mrpc/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_qnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_qnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ea899c83e26c56d64d31cbb15227d066b515c46 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_qnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_qnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_qnli/head_config.json b/glue/mistral_weights_pt/pt_layer14_qnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_qnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_qqp/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_qqp/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bb1cefc71460e3616f3219e41469e680a2d3a0a7 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_qqp/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_qqp/head_config.json b/glue/mistral_weights_pt/pt_layer14_qqp/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_qqp/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_rte/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_rte/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf7ab9872db18740f27907a739f940da1fc8ae74 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_rte/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_rte/head_config.json b/glue/mistral_weights_pt/pt_layer14_rte/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_rte/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffadecd7b6a0836da8829575555f764a59a9c6c3 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_sst2/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_sst2/head_config.json b/glue/mistral_weights_pt/pt_layer14_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer14_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10ae3c0341d94a8eb4d3fb51d93394d8612f595b --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_wnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer14_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer14_wnli/head_config.json b/glue/mistral_weights_pt/pt_layer14_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer14_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_mnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_mnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e76e0be4e4cfeaa6b3e713777bf7fbe820f5f8de --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_mnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_mnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_mnli/head_config.json b/glue/mistral_weights_pt/pt_layer22_mnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e2a9627add563d87368dea14580ddd73b2a5c1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_mnli/head_config.json @@ -0,0 +1,15 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 3, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_mrpc/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_mrpc/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1befd7963cdef9f68486e82969132069ec539e86 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_mrpc/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_mrpc/head_config.json b/glue/mistral_weights_pt/pt_layer22_mrpc/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_mrpc/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_qnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_qnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb9b2750d20cb929717f9ed867cf4508bea0454 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_qnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_qnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_qnli/head_config.json b/glue/mistral_weights_pt/pt_layer22_qnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_qnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_qqp/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_qqp/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..450c460396237c2173605cd39e4444c9bc394b19 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_qqp/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_qqp/head_config.json b/glue/mistral_weights_pt/pt_layer22_qqp/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_qqp/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_rte/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_rte/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68ce524613617927c6025c2b4e74c64e542182e9 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_rte/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_rte/head_config.json b/glue/mistral_weights_pt/pt_layer22_rte/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_rte/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6329a236e638ee83fafcae2607e39e8638def334 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_sst2/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_sst2/head_config.json b/glue/mistral_weights_pt/pt_layer22_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer22_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b7a641dab38ce965904aea905d63be7285252f0 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_wnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer22_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer22_wnli/head_config.json b/glue/mistral_weights_pt/pt_layer22_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer22_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_mnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_mnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5dd212d71acf16d40207e318a571eeaaf7e9b8e1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_mnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_mnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_mnli/head_config.json b/glue/mistral_weights_pt/pt_layer25_mnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e2a9627add563d87368dea14580ddd73b2a5c1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_mnli/head_config.json @@ -0,0 +1,15 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 3, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_mrpc/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_mrpc/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d3fba3da9dfbb10fba2ccffd2c486f9a6099415 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_mrpc/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_mrpc/head_config.json b/glue/mistral_weights_pt/pt_layer25_mrpc/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_mrpc/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_qnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_qnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..17a019c6e8ce671ebcbd7e88efea44b2eec68b63 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_qnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_qnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_qnli/head_config.json b/glue/mistral_weights_pt/pt_layer25_qnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_qnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_qqp/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_qqp/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..456d857cf6123cbdced94a1053e0558f2d2c8979 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_qqp/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_qqp/head_config.json b/glue/mistral_weights_pt/pt_layer25_qqp/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_qqp/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_rte/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_rte/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f066ad6f10744213fc330ac31588640e43ae6722 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_rte/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_rte/head_config.json b/glue/mistral_weights_pt/pt_layer25_rte/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_rte/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..407d7a5c7bd1a8d35c159ebc8344b61d832a0502 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_sst2/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_sst2/head_config.json b/glue/mistral_weights_pt/pt_layer25_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer25_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3bb726e366a9103ff6b81735c709efb1eec53a --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_wnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 30, + 31 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer25_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer25_wnli/head_config.json b/glue/mistral_weights_pt/pt_layer25_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer25_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_mnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_mnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cff2b581271067077a223517ae339d2d6d888afb --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_mnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_mnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_mnli/head_config.json b/glue/mistral_weights_pt/pt_layer31_mnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e2a9627add563d87368dea14580ddd73b2a5c1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_mnli/head_config.json @@ -0,0 +1,15 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 3, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_mrpc/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_mrpc/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ff4dfebbb84f265d2e5c45023bd367cbadc7755a --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_mrpc/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_mrpc/head_config.json b/glue/mistral_weights_pt/pt_layer31_mrpc/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_mrpc/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_qnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_qnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6edefe1139fdd6d7be4fe05abd9a866d39ce42b2 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_qnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_qnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_qnli/head_config.json b/glue/mistral_weights_pt/pt_layer31_qnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_qnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_qqp/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_qqp/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbc9c8243dbf3176af721db6cfb1696f1e941c0e --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_qqp/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_qqp/head_config.json b/glue/mistral_weights_pt/pt_layer31_qqp/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_qqp/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_rte/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_rte/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bc30fe180e437292a766735cbd7256c1d313841 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_rte/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_rte/head_config.json b/glue/mistral_weights_pt/pt_layer31_rte/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_rte/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa3f9b3c0ce4e1f95c4ef873f2b2fadbfea8d53f --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_sst2/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_sst2/head_config.json b/glue/mistral_weights_pt/pt_layer31_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layer31_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c6ed2a6f6c5d907c645a5f4d4bcb59137ff7f1d --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_wnli/adapter_config.json @@ -0,0 +1,53 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layer31_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layer31_wnli/head_config.json b/glue/mistral_weights_pt/pt_layer31_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layer31_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mnli/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_mnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..db16379cb94fea1263260742ae14a61f2cf497e5 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mnli/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_mnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mnli/head_config.json b/glue/mistral_weights_pt/pt_layerfull_mnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e2a9627add563d87368dea14580ddd73b2a5c1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mnli/head_config.json @@ -0,0 +1,15 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 3, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mrpc/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_mrpc/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ca4b1153c6ea71c37c3470ca700564594e7f3511 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mrpc/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mrpc/head_config.json b/glue/mistral_weights_pt/pt_layerfull_mrpc/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mrpc/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0853423be0087c6ee957d147e771fe8d14654e5 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/adapter_config.json @@ -0,0 +1,22 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "config_id": "648bf22f5afeaaa6", + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_mrpc", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/head_config.json b/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_mrpc_fix_lr1e-5_ep10_wd00001_lin/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qnli/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_qnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9823235c187ed7a340e219ffa21b1403d21435 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qnli/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_qnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qnli/head_config.json b/glue/mistral_weights_pt/pt_layerfull_qnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b057fbbe5f668b14ef293271febb51e47d8d01d1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp/head_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b057fbbe5f668b14ef293271febb51e47d8d01d1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/head_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp_ep2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp_old/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp_old/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b057fbbe5f668b14ef293271febb51e47d8d01d1 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp_old/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_qqp", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_qqp_old/head_config.json b/glue/mistral_weights_pt/pt_layerfull_qqp_old/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_qqp_old/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a44f1fd79c0a660c38fa5098e01cace0ca7417fa --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a44f1fd79c0a660c38fa5098e01cace0ca7417fa --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..feea4eb9f86dfe105d5f4bc0805b4074df577e23 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.2, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 20, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e18393f4cdefa6557939ffbf228b4b795e1fc800 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-4/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c31d06cf988c3aa2fb8893b1dc6b156e8bdcc55 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 20, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e18393f4cdefa6557939ffbf228b4b795e1fc800 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_2e-5/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/README.md b/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d674b464ff7589082283e088909596c89cc3d39b --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/README.md @@ -0,0 +1,202 @@ +--- +base_model: /mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..193c963b3c830aec8449ebcb8437d8780b46aaaf --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_peft/adapter_config.json @@ -0,0 +1,15 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "encoder_hidden_size": 1024, + "inference_mode": true, + "num_attention_heads": 8, + "num_layers": 32, + "num_transformer_submodules": 1, + "num_virtual_tokens": 20, + "peft_type": "PREFIX_TUNING", + "prefix_projection": false, + "revision": null, + "task_type": "SEQ_CLS", + "token_dim": 1024 +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3fcb6a4e52ae89e41294f01c356c62edaee80583 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 20, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c31d06cf988c3aa2fb8893b1dc6b156e8bdcc55 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 20, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e18393f4cdefa6557939ffbf228b4b795e1fc800 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_1e-5/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.1" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f9f2dad736315e3864429543c22f6a4a4e12121 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/adapter_config.json @@ -0,0 +1,22 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 20, + "shared_gating": true, + "use_gating": false + }, + "config_id": "a62560183dec11a0", + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_rte", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/head_config.json b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_rte_fix_pl20_lr1e-6_ep25_wd00001_cos/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_sst2/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_sst2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e420542a553bc502e2f912ca463171b724e9c74e --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_sst2/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_sst2", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_sst2/head_config.json b/glue/mistral_weights_pt/pt_layerfull_sst2/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_sst2/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_wnli/adapter_config.json b/glue/mistral_weights_pt/pt_layerfull_wnli/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd968898f0d4b865e0f9c87c8c25f6a777356b1c --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_wnli/adapter_config.json @@ -0,0 +1,21 @@ +{ + "config": { + "architecture": "prefix_tuning", + "bottleneck_size": 512, + "cross_prefix": true, + "dropout": 0.0, + "encoder_prefix": true, + "flat": false, + "leave_out": [], + "non_linearity": "tanh", + "prefix_length": 30, + "shared_gating": true, + "use_gating": false + }, + "hidden_size": 4096, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": "pt_layerfull_wnli", + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/glue/mistral_weights_pt/pt_layerfull_wnli/head_config.json b/glue/mistral_weights_pt/pt_layerfull_wnli/head_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4266014e3cc11ca014dd9c2311d7d97077dda67 --- /dev/null +++ b/glue/mistral_weights_pt/pt_layerfull_wnli/head_config.json @@ -0,0 +1,14 @@ +{ + "config": null, + "hidden_size": 4096, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_class": "MistralForSequenceClassification", + "model_name": "/mnt/data/users/liamding/data/TOX/pretrained_models/mistral-7b", + "model_type": "mistral", + "name": null, + "num_labels": 2, + "version": "adapters.1.0.0" +} \ No newline at end of file diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_0/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_0/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_0/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..539ef11fa88fba453224332717512d4ac0648f74 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ce89efbb86ffb7cae2988b1edf5e80fe5d597d22a899cf7a1f7eeb18c26fbe +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_1/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa871b23fb1c76b8f8072d4402e05ea7d592eb48 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc502130c156f5495f2e7dbb784d9c3a3bdee5a37e4dab25f48d769f8a1d92d +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_10/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b0505bfd8c7f0249a234c07a8210fed73a6bd98 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65cd4032cc1dee39006825cb4c6db0e35bbbe13d0fab11c5c32ee1884b9a0f6 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_11/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ae5eae8c078b040b18c1c8891aac6cf055dd7a8 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4740df698580628e996d25dc753e27da34aea3089aded6be91965e264951f6bc +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_12/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..98f6a4e690b1208ce3dae15dd547320f0850616e --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a837b0f66854cafb5844d1310dcea0889ad6d260125574b8db08d16b9efe22e9 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_13/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ee28907a2ed15afce14523b684a1174ae63371b --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80e7c4a39a241f3d3136bf99c6d87c4d61ad5d272cda8326d78906f39d938a9 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_14/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c219818684ca7b06b808bcff871bc086abb8d3e --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dbccf1de5cc35abf1c01ba5e1b915570e1c58ef9e51c194dcdd1b200e229d39 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_15/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..02faaa76d2860f85b8fa4d8aaa1c4a43c93f1703 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c03d49c53c4d8f6080f9e480c9cf34e007b64e9e72e569cd7e88ca96ce6df7a +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_16/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcd0190bbbd40dfb4c8a5444e95caba99d34bed2 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b445d8db9d2edba0d7dac54a5f4c18697b34ff08759fa34d89a09543586fdc3e +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_17/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..faf5d543300efbc1a2af74be62d572bd9f1a5e89 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f5aad523ecaf5de62e99c3fe5e1ed3ea5e9d64b12166d0fa1c0e7a81a414c9 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_18/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8893315fcf4c16e7b69361b30a3f97930e020fe --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0432c3e084d4ef1ae883b5a4e3ce83c5fe60ba6936db267f7dfb539457b518 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_19/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fc08354f732f616bf1efff0d4f112f2a5c06368 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5e19a810a72925a8da08e0ce0f253493dbd123e002caafa6daca201e6aba00 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_2/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..c710ca50ab3a691ca0ed987a0339cab692bc049b --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf05fcf412537b481ee65e86320d9126e6aa8c4b8f3877cee8d35b06ce4f7fae +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_model_head.bin b/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fd0a3faaad144d4fdff76f8b123d8ea198deec0 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_20/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35824ee4ac9cf7596f4b5f27426060d1242187b8cf3c846cbcee0df99453af +size 257316684 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_21/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_21/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..2db665a3313d14bf4edf738566fd3859888779b3 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_21/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:941ce4fd1dc54f941c01414f1a11635d94ef19926b63b9a04c8ef714974217f7 +size 1652575 diff --git a/tox/gpt2-large/toxic_gpt2-large_layer_22/pytorch_adapter.bin b/tox/gpt2-large/toxic_gpt2-large_layer_22/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..5145a72bb6fd45c956f20579b06f076c9ceadfb8 --- /dev/null +++ b/tox/gpt2-large/toxic_gpt2-large_layer_22/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c70aa83e127d7ad883990a3b34c086ebb4f74aaa05ef61da2097a837f8cf2e +size 1652575 diff --git a/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_adapter.bin b/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_adapter.bin new file mode 100644 index 0000000000000000000000000000000000000000..b33a07bdd9ec48f3542be4ab5cabf4245507248e --- /dev/null +++ b/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_adapter.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982b86f6e16981d7b4955191fb8ae965de5a34cd2e7814f9f60270ae5953ad6f +size 538082834 diff --git a/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_model_head.bin b/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_model_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c274e843c249dc3c7ecbf56dcf2e3529e1b7139 --- /dev/null +++ b/tox/mistral/toxic_Mistral-7B_layer_full/pytorch_model_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb8c804f3131124d13a6178a19a01acc0594bdde559c985acb88733ce8e1399 +size 536872211