cgoosen/prompt-tackler_v3

Browse files

Files changed (7) hide show

README.md +26 -36
config.json +7 -9
model.safetensors +2 -2
special_tokens_map.json +42 -6
tokenizer.json +0 -0
tokenizer_config.json +5 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,12 +1,9 @@
 ---
-license: mit
-base_model: microsoft/deberta-v3-base
 tags:
 - generated_from_trainer
-- prompt injection
-- security
-- jailbreak
-- prompt security
 metrics:
 - accuracy
 - precision
@@ -15,31 +12,21 @@ metrics:
 model-index:
 - name: prompt-tackler
   results: []
-datasets:
-- reshabhs/SPML_Chatbot_Prompt_Injection
-- VMware/open-instruct
-- jackhhao/jailbreak-classification
-- cgoosen/prompt_injection_combined
-language:
-  - en
-  - afr
-  - fr
-#thumbnail: "url to a thumbnail used in social sharing"
-library_name: transformers
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
 # prompt-tackler
-This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0101
-- Accuracy: 0.9984
-- Precision: 0.9984
-- Recall: 0.9984
-- F1: 0.9984
 ## Model description
@@ -62,25 +49,28 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 6
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Accuracy | Precision | Recall | F1     |
-|:-------------:|:-----:|:-----:|:---------------:|:--------:|:---------:|:------:|:------:|
-| 0.0242        | 1.0   | 3058  | 0.0167          | 0.9967   | 0.9968    | 0.9967 | 0.9967 |
-| 0.0146        | 2.0   | 6116  | 0.0163          | 0.9977   | 0.9977    | 0.9977 | 0.9977 |
-| 0.009         | 3.0   | 9174  | 0.0112          | 0.9984   | 0.9984    | 0.9984 | 0.9984 |
-| 0.0029        | 4.0   | 12232 | 0.0101          | 0.9984   | 0.9984    | 0.9984 | 0.9984 |
-| 0.0029        | 5.0   | 15290 | 0.0179          | 0.9980   | 0.9981    | 0.9980 | 0.9980 |
-| 0.0012        | 6.0   | 18348 | 0.0160          | 0.9985   | 0.9985    | 0.9985 | 0.9985 |
 ### Framework versions
-- Transformers 4.40.2
-- Pytorch 2.5.0+cu124
-- Datasets 2.18.0
-- Tokenizers 0.19.1

 ---
+library_name: transformers
+license: apache-2.0
+base_model: protectai/deberta-v3-small-prompt-injection-v2
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 - precision
 model-index:
 - name: prompt-tackler
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/christogoosen/prompt-tackler/runs/w2bjzmse)
 # prompt-tackler
+This model is a fine-tuned version of [protectai/deberta-v3-small-prompt-injection-v2](https://huggingface.co/protectai/deberta-v3-small-prompt-injection-v2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0186
+- Accuracy: 0.9959
+- Precision: 0.9959
+- Recall: 0.9959
+- F1: 0.9959
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 16
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 6
+- mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step   | Validation Loss | Accuracy | Precision | Recall | F1     |
+|:-------------:|:-----:|:------:|:---------------:|:--------:|:---------:|:------:|:------:|
+| 0.0177        | 1.0   | 20686  | 0.0222          | 0.9943   | 0.9943    | 0.9943 | 0.9943 |
+| 0.012         | 2.0   | 41372  | 0.0186          | 0.9959   | 0.9959    | 0.9959 | 0.9959 |
+| 0.0084        | 3.0   | 62058  | 0.0278          | 0.9955   | 0.9955    | 0.9955 | 0.9955 |
+| 0.0216        | 4.0   | 82744  | 0.0256          | 0.9959   | 0.9959    | 0.9959 | 0.9959 |
+| 0.0038        | 5.0   | 103430 | 0.0327          | 0.9963   | 0.9963    | 0.9963 | 0.9963 |
+| 0.0           | 6.0   | 124116 | 0.0383          | 0.9963   | 0.9963    | 0.9963 | 0.9963 |
 ### Framework versions
+- Transformers 4.53.3
+- Pytorch 2.9.1+cu128
+- Datasets 2.21.0
+- Tokenizers 0.21.4

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_name_or_path": "microsoft/deberta-v3-base",
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
@@ -8,24 +7,23 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "benign",
-    "1": "injection",
-    "2": "jailbreak"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "benign": 0,
-    "injection": 1,
-    "jailbreak": 2
   },
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
   "model_type": "deberta-v2",
   "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
   "pooler_dropout": 0,
   "pooler_hidden_act": "gelu",
@@ -39,7 +37,7 @@
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.40.2",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

 {
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "SAFE",
+    "1": "INJECTION"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "INJECTION": 1,
+    "SAFE": 0
   },
   "layer_norm_eps": 1e-07,
+  "legacy": true,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
   "model_type": "deberta-v2",
   "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 0,
   "pooler_dropout": 0,
   "pooler_hidden_act": "gelu",
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95341ec085da9ab7162ce1da9e0e509510567b05e2867da1487f9d041adbefb8
-size 737722356

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b582fe4b3b0d10b9d3e42709e6bdbb79a0b2fa75571eaf937e390fa86a30fce
+size 567598552

special_tokens_map.json CHANGED Viewed

@@ -1,10 +1,46 @@
 {
-  "bos_token": "[CLS]",
-  "cls_token": "[CLS]",
-  "eos_token": "[SEP]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
   "unk_token": {
     "content": "[UNK]",
     "lstrip": false,

 {
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "unk_token": {
     "content": "[UNK]",
     "lstrip": false,

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -46,13 +46,18 @@
   "cls_token": "[CLS]",
   "do_lower_case": false,
   "eos_token": "[SEP]",
   "mask_token": "[MASK]",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "sp_model_kwargs": {},
   "split_by_punct": false,
   "tokenizer_class": "DebertaV2Tokenizer",
   "unk_token": "[UNK]",
   "vocab_type": "spm"
 }

   "cls_token": "[CLS]",
   "do_lower_case": false,
   "eos_token": "[SEP]",
+  "extra_special_tokens": {},
   "mask_token": "[MASK]",
+  "max_length": 512,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "sp_model_kwargs": {},
   "split_by_punct": false,
+  "stride": 0,
   "tokenizer_class": "DebertaV2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]",
   "vocab_type": "spm"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed263431c07caa92024d8d00226c32fef13c71cd0d6bdf65ae1f74139d3e4b73
-size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9e7ae0c9711428e4d19857cb7cd194a9a60d3a0b0c675ab4a6a58919573350b
+size 5777