Pankaj001
/

Opensource_attack_examples

Model card Files Files and versions

xet

Community

Pankaj001 commited on Jul 31, 2024

Commit

d2b21f1

verified ·

1 Parent(s): d35af4f

Upload txt_attk.ipynb

Browse files

Files changed (1) hide show

txt_attk.ipynb +59 -42

txt_attk.ipynb CHANGED Viewed

@@ -80,6 +80,9 @@
    },
    "outputs": [],
    "source": [
     "# Importing necessary libraries\n",
     "import os\n",
     "import numpy as np\n",
@@ -118,6 +121,10 @@
    },
    "outputs": [],
    "source": [
     "# Flag to determine whether to train a new model or use a pre-trained one\n",
     "model_train = True # False-> download from Huggingface"
    ]
@@ -145,6 +152,9 @@
    },
    "outputs": [],
    "source": [
     "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)"
    ]
   },
@@ -252,6 +262,10 @@
     }
    ],
    "source": [
     "if model_train:\n",
     "    # Setting up parameters for the IMDB dataset and model\n",
     "    vocab_size = 10000  # Number of words to keep in the vocabulary\n",
@@ -326,6 +340,10 @@
    },
    "outputs": [],
    "source": [
     "class CustomTensorFlowModelWrapper(ModelWrapper):\n",
     "    def __init__(self, model,tokenizer,model_type,max_length = None,preprocess_text = None):\n",
     "        self.model = model\n",
@@ -367,15 +385,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "b1c3280d-03b0-4c06-bdb8-1e5e57700bb2",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-07-30T06:48:25.829899Z",
-     "iopub.status.busy": "2024-07-30T06:48:25.829271Z",
-     "iopub.status.idle": "2024-07-30T06:49:06.376939Z",
-     "shell.execute_reply": "2024-07-30T06:49:06.376939Z",
-     "shell.execute_reply.started": "2024-07-30T06:48:25.829899Z"
     },
     "scrolled": true
    },
@@ -412,27 +430,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 10%|████████▎                                                                          | 1/10 [00:35<05:18, 35.40s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--------------------------------------------- Result 1 ---------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  10%|██▉                          | 1/10 [00:36<05:24, 36.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "[[0 (96%)]] --> [[1 (91%)]]\n",
       "\n",
       "Don't [[waste]] your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\n",
@@ -446,7 +451,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[Succeeded / Failed / Skipped / Total] 1 / 1 / 0 / 2:  20%|█████▊                       | 2/10 [00:38<02:34, 19.30s/it]"
      ]
     },
     {
@@ -454,24 +459,26 @@
      "output_type": "stream",
      "text": [
       "--------------------------------------------- Result 2 ---------------------------------------------\n",
-      "[[1 (94%)]] --> [[[FAILED]]]\n",
       "\n",
-      "I am happy\n",
       "\n",
       "\n",
       "\n",
       "+-------------------------------+--------+\n",
       "| Attack Results                |        |\n",
       "+-------------------------------+--------+\n",
-      "| Number of successful attacks: | 1      |\n",
-      "| Number of failed attacks:     | 1      |\n",
       "| Number of skipped attacks:    | 0      |\n",
       "| Original accuracy:            | 100.0% |\n",
-      "| Accuracy under attack:        | 50.0%  |\n",
-      "| Attack success rate:          | 50.0%  |\n",
-      "| Average perturbed word %:     | 2.33%  |\n",
-      "| Average num. words per input: | 23.0   |\n",
-      "| Avg num queries:              | 158.5  |\n",
       "+-------------------------------+--------+\n"
      ]
     },
@@ -484,12 +491,19 @@
     }
    ],
    "source": [
     "# Wrapping the model for TextAttack\n",
-    "model_wrapper = CustomTensorFlowModelWrapper(model,tokenizer,\"lstm\",max_length)\n",
     "\n",
     "# Preparing input data for the attack\n",
     "input_data = [(\"\"\"Don't waste your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\"\"\", 0),\n",
-    "             (\"I am happy\",1)]\n",
     "dataset = textattack.datasets.Dataset(input_data)\n",
     "\n",
     "# Setting up the attack\n",
@@ -510,15 +524,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "bef21ec2-d1d0-4752-9b0a-2c99c006291e",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-07-30T06:54:43.238422Z",
-     "iopub.status.busy": "2024-07-30T06:54:43.238422Z",
-     "iopub.status.idle": "2024-07-30T06:54:43.254191Z",
-     "shell.execute_reply": "2024-07-30T06:54:43.253694Z",
-     "shell.execute_reply.started": "2024-07-30T06:54:43.238422Z"
     }
    },
    "outputs": [
@@ -533,17 +547,20 @@
       "Perturbed_text_Label -> 1\n",
       "\n",
       "---------------------------------------------------------------------------\n",
-      "Original_text -> I am happy\n",
       "Original_text_Label -> 1\n",
       "\n",
-      "Perturbed_text -> 1 am happy\n",
-      "Perturbed_text_Label -> 1\n",
       "\n",
       "---------------------------------------------------------------------------\n"
      ]
     }
    ],
    "source": [
     "# Displaying the results of the attack\n",
     "for data in attacked_data:\n",
     "    print(f\"Original_text -> {data.original_text()}\")\n",

    },
    "outputs": [],
    "source": [
+    "\"\"\"\n",
+    "Description: import library \n",
+    "\"\"\"\n",
     "# Importing necessary libraries\n",
     "import os\n",
     "import numpy as np\n",
    },
    "outputs": [],
    "source": [
+    "\"\"\"\n",
+    "Description: Assigning a flag value for Model Training  or Loading from huggingface.  \n",
+    "\"\"\"\n",
+    "\n",
     "# Flag to determine whether to train a new model or use a pre-trained one\n",
     "model_train = True # False-> download from Huggingface"
    ]
    },
    "outputs": [],
    "source": [
+    "\"\"\"\n",
+    "Description: Load IMDB data with art functionality.  \n",
+    "\"\"\"\n",
     "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)"
    ]
   },
     }
    ],
    "source": [
+    "\"\"\"\n",
+    "Description: Training or loading the model.\n",
+    "\"\"\"\n",
+    "\n",
     "if model_train:\n",
     "    # Setting up parameters for the IMDB dataset and model\n",
     "    vocab_size = 10000  # Number of words to keep in the vocabulary\n",
    },
    "outputs": [],
    "source": [
+    "\"\"\"\n",
+    "Description: create class to design architecture of model wrapper for text-attack\n",
+    "\"\"\"\n",
+    "\n",
     "class CustomTensorFlowModelWrapper(ModelWrapper):\n",
     "    def __init__(self, model,tokenizer,model_type,max_length = None,preprocess_text = None):\n",
     "        self.model = model\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "b1c3280d-03b0-4c06-bdb8-1e5e57700bb2",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-07-31T05:10:49.360446Z",
+     "iopub.status.busy": "2024-07-31T05:10:49.359376Z",
+     "iopub.status.idle": "2024-07-31T05:11:32.103320Z",
+     "shell.execute_reply": "2024-07-31T05:11:32.103320Z",
+     "shell.execute_reply.started": "2024-07-31T05:10:49.360446Z"
     },
     "scrolled": true
    },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  10%|██▉                          | 1/10 [00:37<05:35, 37.25s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "--------------------------------------------- Result 1 ---------------------------------------------\n",
       "[[0 (96%)]] --> [[1 (91%)]]\n",
       "\n",
       "Don't [[waste]] your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\n",
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2:  20%|█████▊                       | 2/10 [00:41<02:47, 20.93s/it]"
      ]
     },
     {
      "output_type": "stream",
      "text": [
       "--------------------------------------------- Result 2 ---------------------------------------------\n",
+      "[[1 (98%)]] --> [[0 (74%)]]\n",
       "\n",
+      "I am happy as it was a [[wonderful]] experience\n",
+      "\n",
+      "I am happy as it was a [[marvellous]] experience\n",
       "\n",
       "\n",
       "\n",
       "+-------------------------------+--------+\n",
       "| Attack Results                |        |\n",
       "+-------------------------------+--------+\n",
+      "| Number of successful attacks: | 2      |\n",
+      "| Number of failed attacks:     | 0      |\n",
       "| Number of skipped attacks:    | 0      |\n",
       "| Original accuracy:            | 100.0% |\n",
+      "| Accuracy under attack:        | 0.0%   |\n",
+      "| Attack success rate:          | 100.0% |\n",
+      "| Average perturbed word %:     | 6.72%  |\n",
+      "| Average num. words per input: | 26.0   |\n",
+      "| Avg num queries:              | 166.0  |\n",
       "+-------------------------------+--------+\n"
      ]
     },
     }
    ],
    "source": [
+    "\"\"\"\n",
+    "Description: Generating text attack vector\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
     "# Wrapping the model for TextAttack\n",
+    "model_wrapper = CustomTensorFlowModelWrapper(model,tokenizer,\"lstm\",max_length) \n",
+    "                                            \n",
+    "# if transformer no need to assign max length\n",
     "\n",
     "# Preparing input data for the attack\n",
     "input_data = [(\"\"\"Don't waste your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\"\"\", 0),\n",
+    "             (\"I am happy as it was a wonderful experience\",1)]\n",
     "dataset = textattack.datasets.Dataset(input_data)\n",
     "\n",
     "# Setting up the attack\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "bef21ec2-d1d0-4752-9b0a-2c99c006291e",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-07-31T05:11:32.105310Z",
+     "iopub.status.busy": "2024-07-31T05:11:32.105310Z",
+     "iopub.status.idle": "2024-07-31T05:11:32.120300Z",
+     "shell.execute_reply": "2024-07-31T05:11:32.119361Z",
+     "shell.execute_reply.started": "2024-07-31T05:11:32.105310Z"
     }
    },
    "outputs": [
       "Perturbed_text_Label -> 1\n",
       "\n",
       "---------------------------------------------------------------------------\n",
+      "Original_text -> I am happy as it was a wonderful experience\n",
       "Original_text_Label -> 1\n",
       "\n",
+      "Perturbed_text -> I am happy as it was a marvellous experience\n",
+      "Perturbed_text_Label -> 0\n",
       "\n",
       "---------------------------------------------------------------------------\n"
      ]
     }
    ],
    "source": [
+    "\"\"\"\n",
+    "Description: Displaying result of text attack\n",
+    "\"\"\"\n",
     "# Displaying the results of the attack\n",
     "for data in attacked_data:\n",
     "    print(f\"Original_text -> {data.original_text()}\")\n",