cgoosen commited on
Commit
c72f2e5
·
verified ·
1 Parent(s): 00325c6

cgoosen/prompt-tackler_v3

Browse files
README.md CHANGED
@@ -1,12 +1,9 @@
1
  ---
2
- license: mit
3
- base_model: microsoft/deberta-v3-base
 
4
  tags:
5
  - generated_from_trainer
6
- - prompt injection
7
- - security
8
- - jailbreak
9
- - prompt security
10
  metrics:
11
  - accuracy
12
  - precision
@@ -15,31 +12,21 @@ metrics:
15
  model-index:
16
  - name: prompt-tackler
17
  results: []
18
- datasets:
19
- - reshabhs/SPML_Chatbot_Prompt_Injection
20
- - VMware/open-instruct
21
- - jackhhao/jailbreak-classification
22
- - cgoosen/prompt_injection_combined
23
- language:
24
- - en
25
- - afr
26
- - fr
27
- #thumbnail: "url to a thumbnail used in social sharing"
28
- library_name: transformers
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
32
  should probably proofread and complete it, then remove this comment. -->
33
 
 
34
  # prompt-tackler
35
 
36
- This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
37
  It achieves the following results on the evaluation set:
38
- - Loss: 0.0101
39
- - Accuracy: 0.9984
40
- - Precision: 0.9984
41
- - Recall: 0.9984
42
- - F1: 0.9984
43
 
44
  ## Model description
45
 
@@ -62,25 +49,28 @@ The following hyperparameters were used during training:
62
  - train_batch_size: 8
63
  - eval_batch_size: 8
64
  - seed: 42
65
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
 
66
  - lr_scheduler_type: linear
67
  - num_epochs: 6
 
68
 
69
  ### Training results
70
 
71
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
72
- |:-------------:|:-----:|:-----:|:---------------:|:--------:|:---------:|:------:|:------:|
73
- | 0.0242 | 1.0 | 3058 | 0.0167 | 0.9967 | 0.9968 | 0.9967 | 0.9967 |
74
- | 0.0146 | 2.0 | 6116 | 0.0163 | 0.9977 | 0.9977 | 0.9977 | 0.9977 |
75
- | 0.009 | 3.0 | 9174 | 0.0112 | 0.9984 | 0.9984 | 0.9984 | 0.9984 |
76
- | 0.0029 | 4.0 | 12232 | 0.0101 | 0.9984 | 0.9984 | 0.9984 | 0.9984 |
77
- | 0.0029 | 5.0 | 15290 | 0.0179 | 0.9980 | 0.9981 | 0.9980 | 0.9980 |
78
- | 0.0012 | 6.0 | 18348 | 0.0160 | 0.9985 | 0.9985 | 0.9985 | 0.9985 |
79
 
80
 
81
  ### Framework versions
82
 
83
- - Transformers 4.40.2
84
- - Pytorch 2.5.0+cu124
85
- - Datasets 2.18.0
86
- - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: protectai/deberta-v3-small-prompt-injection-v2
5
  tags:
6
  - generated_from_trainer
 
 
 
 
7
  metrics:
8
  - accuracy
9
  - precision
 
12
  model-index:
13
  - name: prompt-tackler
14
  results: []
 
 
 
 
 
 
 
 
 
 
 
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
  should probably proofread and complete it, then remove this comment. -->
19
 
20
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/christogoosen/prompt-tackler/runs/w2bjzmse)
21
  # prompt-tackler
22
 
23
+ This model is a fine-tuned version of [protectai/deberta-v3-small-prompt-injection-v2](https://huggingface.co/protectai/deberta-v3-small-prompt-injection-v2) on the None dataset.
24
  It achieves the following results on the evaluation set:
25
+ - Loss: 0.0186
26
+ - Accuracy: 0.9959
27
+ - Precision: 0.9959
28
+ - Recall: 0.9959
29
+ - F1: 0.9959
30
 
31
  ## Model description
32
 
 
49
  - train_batch_size: 8
50
  - eval_batch_size: 8
51
  - seed: 42
52
+ - gradient_accumulation_steps: 2
53
+ - total_train_batch_size: 16
54
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
55
  - lr_scheduler_type: linear
56
  - num_epochs: 6
57
+ - mixed_precision_training: Native AMP
58
 
59
  ### Training results
60
 
61
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
62
+ |:-------------:|:-----:|:------:|:---------------:|:--------:|:---------:|:------:|:------:|
63
+ | 0.0177 | 1.0 | 20686 | 0.0222 | 0.9943 | 0.9943 | 0.9943 | 0.9943 |
64
+ | 0.012 | 2.0 | 41372 | 0.0186 | 0.9959 | 0.9959 | 0.9959 | 0.9959 |
65
+ | 0.0084 | 3.0 | 62058 | 0.0278 | 0.9955 | 0.9955 | 0.9955 | 0.9955 |
66
+ | 0.0216 | 4.0 | 82744 | 0.0256 | 0.9959 | 0.9959 | 0.9959 | 0.9959 |
67
+ | 0.0038 | 5.0 | 103430 | 0.0327 | 0.9963 | 0.9963 | 0.9963 | 0.9963 |
68
+ | 0.0 | 6.0 | 124116 | 0.0383 | 0.9963 | 0.9963 | 0.9963 | 0.9963 |
69
 
70
 
71
  ### Framework versions
72
 
73
+ - Transformers 4.53.3
74
+ - Pytorch 2.9.1+cu128
75
+ - Datasets 2.21.0
76
+ - Tokenizers 0.21.4
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
@@ -8,24 +7,23 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "benign",
12
- "1": "injection",
13
- "2": "jailbreak"
14
  },
15
  "initializer_range": 0.02,
16
  "intermediate_size": 3072,
17
  "label2id": {
18
- "benign": 0,
19
- "injection": 1,
20
- "jailbreak": 2
21
  },
22
  "layer_norm_eps": 1e-07,
 
23
  "max_position_embeddings": 512,
24
  "max_relative_positions": -1,
25
  "model_type": "deberta-v2",
26
  "norm_rel_ebd": "layer_norm",
27
  "num_attention_heads": 12,
28
- "num_hidden_layers": 12,
29
  "pad_token_id": 0,
30
  "pooler_dropout": 0,
31
  "pooler_hidden_act": "gelu",
@@ -39,7 +37,7 @@
39
  "relative_attention": true,
40
  "share_att_key": true,
41
  "torch_dtype": "float32",
42
- "transformers_version": "4.40.2",
43
  "type_vocab_size": 0,
44
  "vocab_size": 128100
45
  }
 
1
  {
 
2
  "architectures": [
3
  "DebertaV2ForSequenceClassification"
4
  ],
 
7
  "hidden_dropout_prob": 0.1,
8
  "hidden_size": 768,
9
  "id2label": {
10
+ "0": "SAFE",
11
+ "1": "INJECTION"
 
12
  },
13
  "initializer_range": 0.02,
14
  "intermediate_size": 3072,
15
  "label2id": {
16
+ "INJECTION": 1,
17
+ "SAFE": 0
 
18
  },
19
  "layer_norm_eps": 1e-07,
20
+ "legacy": true,
21
  "max_position_embeddings": 512,
22
  "max_relative_positions": -1,
23
  "model_type": "deberta-v2",
24
  "norm_rel_ebd": "layer_norm",
25
  "num_attention_heads": 12,
26
+ "num_hidden_layers": 6,
27
  "pad_token_id": 0,
28
  "pooler_dropout": 0,
29
  "pooler_hidden_act": "gelu",
 
37
  "relative_attention": true,
38
  "share_att_key": true,
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.53.3",
41
  "type_vocab_size": 0,
42
  "vocab_size": 128100
43
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95341ec085da9ab7162ce1da9e0e509510567b05e2867da1487f9d041adbefb8
3
- size 737722356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b582fe4b3b0d10b9d3e42709e6bdbb79a0b2fa75571eaf937e390fa86a30fce
3
+ size 567598552
special_tokens_map.json CHANGED
@@ -1,10 +1,46 @@
1
  {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[PAD]",
7
- "sep_token": "[SEP]",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "unk_token": {
9
  "content": "[UNK]",
10
  "lstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
  "unk_token": {
45
  "content": "[UNK]",
46
  "lstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -46,13 +46,18 @@
46
  "cls_token": "[CLS]",
47
  "do_lower_case": false,
48
  "eos_token": "[SEP]",
 
49
  "mask_token": "[MASK]",
 
50
  "model_max_length": 1000000000000000019884624838656,
51
  "pad_token": "[PAD]",
52
  "sep_token": "[SEP]",
53
  "sp_model_kwargs": {},
54
  "split_by_punct": false,
 
55
  "tokenizer_class": "DebertaV2Tokenizer",
 
 
56
  "unk_token": "[UNK]",
57
  "vocab_type": "spm"
58
  }
 
46
  "cls_token": "[CLS]",
47
  "do_lower_case": false,
48
  "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
  "mask_token": "[MASK]",
51
+ "max_length": 512,
52
  "model_max_length": 1000000000000000019884624838656,
53
  "pad_token": "[PAD]",
54
  "sep_token": "[SEP]",
55
  "sp_model_kwargs": {},
56
  "split_by_punct": false,
57
+ "stride": 0,
58
  "tokenizer_class": "DebertaV2Tokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "[UNK]",
62
  "vocab_type": "spm"
63
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed263431c07caa92024d8d00226c32fef13c71cd0d6bdf65ae1f74139d3e4b73
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e7ae0c9711428e4d19857cb7cd194a9a60d3a0b0c675ab4a6a58919573350b
3
+ size 5777