Pankaj001 commited on
Commit
d2b21f1
Β·
verified Β·
1 Parent(s): d35af4f

Upload txt_attk.ipynb

Browse files
Files changed (1) hide show
  1. txt_attk.ipynb +59 -42
txt_attk.ipynb CHANGED
@@ -80,6 +80,9 @@
80
  },
81
  "outputs": [],
82
  "source": [
 
 
 
83
  "# Importing necessary libraries\n",
84
  "import os\n",
85
  "import numpy as np\n",
@@ -118,6 +121,10 @@
118
  },
119
  "outputs": [],
120
  "source": [
 
 
 
 
121
  "# Flag to determine whether to train a new model or use a pre-trained one\n",
122
  "model_train = True # False-> download from Huggingface"
123
  ]
@@ -145,6 +152,9 @@
145
  },
146
  "outputs": [],
147
  "source": [
 
 
 
148
  "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)"
149
  ]
150
  },
@@ -252,6 +262,10 @@
252
  }
253
  ],
254
  "source": [
 
 
 
 
255
  "if model_train:\n",
256
  " # Setting up parameters for the IMDB dataset and model\n",
257
  " vocab_size = 10000 # Number of words to keep in the vocabulary\n",
@@ -326,6 +340,10 @@
326
  },
327
  "outputs": [],
328
  "source": [
 
 
 
 
329
  "class CustomTensorFlowModelWrapper(ModelWrapper):\n",
330
  " def __init__(self, model,tokenizer,model_type,max_length = None,preprocess_text = None):\n",
331
  " self.model = model\n",
@@ -367,15 +385,15 @@
367
  },
368
  {
369
  "cell_type": "code",
370
- "execution_count": 9,
371
  "id": "b1c3280d-03b0-4c06-bdb8-1e5e57700bb2",
372
  "metadata": {
373
  "execution": {
374
- "iopub.execute_input": "2024-07-30T06:48:25.829899Z",
375
- "iopub.status.busy": "2024-07-30T06:48:25.829271Z",
376
- "iopub.status.idle": "2024-07-30T06:49:06.376939Z",
377
- "shell.execute_reply": "2024-07-30T06:49:06.376939Z",
378
- "shell.execute_reply.started": "2024-07-30T06:48:25.829899Z"
379
  },
380
  "scrolled": true
381
  },
@@ -412,27 +430,14 @@
412
  "name": "stderr",
413
  "output_type": "stream",
414
  "text": [
415
- " 10%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 1/10 [00:35<05:18, 35.40s/it]"
416
- ]
417
- },
418
- {
419
- "name": "stdout",
420
- "output_type": "stream",
421
- "text": [
422
- "--------------------------------------------- Result 1 ---------------------------------------------\n"
423
- ]
424
- },
425
- {
426
- "name": "stderr",
427
- "output_type": "stream",
428
- "text": [
429
- "[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1: 10%|β–ˆβ–ˆβ–‰ | 1/10 [00:36<05:24, 36.06s/it]"
430
  ]
431
  },
432
  {
433
  "name": "stdout",
434
  "output_type": "stream",
435
  "text": [
 
436
  "[[0 (96%)]] --> [[1 (91%)]]\n",
437
  "\n",
438
  "Don't [[waste]] your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\n",
@@ -446,7 +451,7 @@
446
  "name": "stderr",
447
  "output_type": "stream",
448
  "text": [
449
- "[Succeeded / Failed / Skipped / Total] 1 / 1 / 0 / 2: 20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 2/10 [00:38<02:34, 19.30s/it]"
450
  ]
451
  },
452
  {
@@ -454,24 +459,26 @@
454
  "output_type": "stream",
455
  "text": [
456
  "--------------------------------------------- Result 2 ---------------------------------------------\n",
457
- "[[1 (94%)]] --> [[[FAILED]]]\n",
458
  "\n",
459
- "I am happy\n",
 
 
460
  "\n",
461
  "\n",
462
  "\n",
463
  "+-------------------------------+--------+\n",
464
  "| Attack Results | |\n",
465
  "+-------------------------------+--------+\n",
466
- "| Number of successful attacks: | 1 |\n",
467
- "| Number of failed attacks: | 1 |\n",
468
  "| Number of skipped attacks: | 0 |\n",
469
  "| Original accuracy: | 100.0% |\n",
470
- "| Accuracy under attack: | 50.0% |\n",
471
- "| Attack success rate: | 50.0% |\n",
472
- "| Average perturbed word %: | 2.33% |\n",
473
- "| Average num. words per input: | 23.0 |\n",
474
- "| Avg num queries: | 158.5 |\n",
475
  "+-------------------------------+--------+\n"
476
  ]
477
  },
@@ -484,12 +491,19 @@
484
  }
485
  ],
486
  "source": [
 
 
 
 
 
487
  "# Wrapping the model for TextAttack\n",
488
- "model_wrapper = CustomTensorFlowModelWrapper(model,tokenizer,\"lstm\",max_length)\n",
 
 
489
  "\n",
490
  "# Preparing input data for the attack\n",
491
  "input_data = [(\"\"\"Don't waste your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\"\"\", 0),\n",
492
- " (\"I am happy\",1)]\n",
493
  "dataset = textattack.datasets.Dataset(input_data)\n",
494
  "\n",
495
  "# Setting up the attack\n",
@@ -510,15 +524,15 @@
510
  },
511
  {
512
  "cell_type": "code",
513
- "execution_count": 13,
514
  "id": "bef21ec2-d1d0-4752-9b0a-2c99c006291e",
515
  "metadata": {
516
  "execution": {
517
- "iopub.execute_input": "2024-07-30T06:54:43.238422Z",
518
- "iopub.status.busy": "2024-07-30T06:54:43.238422Z",
519
- "iopub.status.idle": "2024-07-30T06:54:43.254191Z",
520
- "shell.execute_reply": "2024-07-30T06:54:43.253694Z",
521
- "shell.execute_reply.started": "2024-07-30T06:54:43.238422Z"
522
  }
523
  },
524
  "outputs": [
@@ -533,17 +547,20 @@
533
  "Perturbed_text_Label -> 1\n",
534
  "\n",
535
  "---------------------------------------------------------------------------\n",
536
- "Original_text -> I am happy\n",
537
  "Original_text_Label -> 1\n",
538
  "\n",
539
- "Perturbed_text -> 1 am happy\n",
540
- "Perturbed_text_Label -> 1\n",
541
  "\n",
542
  "---------------------------------------------------------------------------\n"
543
  ]
544
  }
545
  ],
546
  "source": [
 
 
 
547
  "# Displaying the results of the attack\n",
548
  "for data in attacked_data:\n",
549
  " print(f\"Original_text -> {data.original_text()}\")\n",
 
80
  },
81
  "outputs": [],
82
  "source": [
83
+ "\"\"\"\n",
84
+ "Description: import library \n",
85
+ "\"\"\"\n",
86
  "# Importing necessary libraries\n",
87
  "import os\n",
88
  "import numpy as np\n",
 
121
  },
122
  "outputs": [],
123
  "source": [
124
+ "\"\"\"\n",
125
+ "Description: Assigning a flag value for Model Training or Loading from huggingface. \n",
126
+ "\"\"\"\n",
127
+ "\n",
128
  "# Flag to determine whether to train a new model or use a pre-trained one\n",
129
  "model_train = True # False-> download from Huggingface"
130
  ]
 
152
  },
153
  "outputs": [],
154
  "source": [
155
+ "\"\"\"\n",
156
+ "Description: Load IMDB data with art functionality. \n",
157
+ "\"\"\"\n",
158
  "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)"
159
  ]
160
  },
 
262
  }
263
  ],
264
  "source": [
265
+ "\"\"\"\n",
266
+ "Description: Training or loading the model.\n",
267
+ "\"\"\"\n",
268
+ "\n",
269
  "if model_train:\n",
270
  " # Setting up parameters for the IMDB dataset and model\n",
271
  " vocab_size = 10000 # Number of words to keep in the vocabulary\n",
 
340
  },
341
  "outputs": [],
342
  "source": [
343
+ "\"\"\"\n",
344
+ "Description: create class to design architecture of model wrapper for text-attack\n",
345
+ "\"\"\"\n",
346
+ "\n",
347
  "class CustomTensorFlowModelWrapper(ModelWrapper):\n",
348
  " def __init__(self, model,tokenizer,model_type,max_length = None,preprocess_text = None):\n",
349
  " self.model = model\n",
 
385
  },
386
  {
387
  "cell_type": "code",
388
+ "execution_count": 16,
389
  "id": "b1c3280d-03b0-4c06-bdb8-1e5e57700bb2",
390
  "metadata": {
391
  "execution": {
392
+ "iopub.execute_input": "2024-07-31T05:10:49.360446Z",
393
+ "iopub.status.busy": "2024-07-31T05:10:49.359376Z",
394
+ "iopub.status.idle": "2024-07-31T05:11:32.103320Z",
395
+ "shell.execute_reply": "2024-07-31T05:11:32.103320Z",
396
+ "shell.execute_reply.started": "2024-07-31T05:10:49.360446Z"
397
  },
398
  "scrolled": true
399
  },
 
430
  "name": "stderr",
431
  "output_type": "stream",
432
  "text": [
433
+ "[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1: 10%|β–ˆβ–ˆβ–‰ | 1/10 [00:37<05:35, 37.25s/it]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  ]
435
  },
436
  {
437
  "name": "stdout",
438
  "output_type": "stream",
439
  "text": [
440
+ "--------------------------------------------- Result 1 ---------------------------------------------\n",
441
  "[[0 (96%)]] --> [[1 (91%)]]\n",
442
  "\n",
443
  "Don't [[waste]] your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\n",
 
451
  "name": "stderr",
452
  "output_type": "stream",
453
  "text": [
454
+ "[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2: 20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 2/10 [00:41<02:47, 20.93s/it]"
455
  ]
456
  },
457
  {
 
459
  "output_type": "stream",
460
  "text": [
461
  "--------------------------------------------- Result 2 ---------------------------------------------\n",
462
+ "[[1 (98%)]] --> [[0 (74%)]]\n",
463
  "\n",
464
+ "I am happy as it was a [[wonderful]] experience\n",
465
+ "\n",
466
+ "I am happy as it was a [[marvellous]] experience\n",
467
  "\n",
468
  "\n",
469
  "\n",
470
  "+-------------------------------+--------+\n",
471
  "| Attack Results | |\n",
472
  "+-------------------------------+--------+\n",
473
+ "| Number of successful attacks: | 2 |\n",
474
+ "| Number of failed attacks: | 0 |\n",
475
  "| Number of skipped attacks: | 0 |\n",
476
  "| Original accuracy: | 100.0% |\n",
477
+ "| Accuracy under attack: | 0.0% |\n",
478
+ "| Attack success rate: | 100.0% |\n",
479
+ "| Average perturbed word %: | 6.72% |\n",
480
+ "| Average num. words per input: | 26.0 |\n",
481
+ "| Avg num queries: | 166.0 |\n",
482
  "+-------------------------------+--------+\n"
483
  ]
484
  },
 
491
  }
492
  ],
493
  "source": [
494
+ "\"\"\"\n",
495
+ "Description: Generating text attack vector\n",
496
+ "\"\"\"\n",
497
+ "\n",
498
+ "\n",
499
  "# Wrapping the model for TextAttack\n",
500
+ "model_wrapper = CustomTensorFlowModelWrapper(model,tokenizer,\"lstm\",max_length) \n",
501
+ " \n",
502
+ "# if transformer no need to assign max length\n",
503
  "\n",
504
  "# Preparing input data for the attack\n",
505
  "input_data = [(\"\"\"Don't waste your time or money on this one. This book is terrible. Whatever happened to Amanda Quick writing great books. She used to be my favorite autor. It will be a long time before I ever purchase another one of her books.\"\"\", 0),\n",
506
+ " (\"I am happy as it was a wonderful experience\",1)]\n",
507
  "dataset = textattack.datasets.Dataset(input_data)\n",
508
  "\n",
509
  "# Setting up the attack\n",
 
524
  },
525
  {
526
  "cell_type": "code",
527
+ "execution_count": 17,
528
  "id": "bef21ec2-d1d0-4752-9b0a-2c99c006291e",
529
  "metadata": {
530
  "execution": {
531
+ "iopub.execute_input": "2024-07-31T05:11:32.105310Z",
532
+ "iopub.status.busy": "2024-07-31T05:11:32.105310Z",
533
+ "iopub.status.idle": "2024-07-31T05:11:32.120300Z",
534
+ "shell.execute_reply": "2024-07-31T05:11:32.119361Z",
535
+ "shell.execute_reply.started": "2024-07-31T05:11:32.105310Z"
536
  }
537
  },
538
  "outputs": [
 
547
  "Perturbed_text_Label -> 1\n",
548
  "\n",
549
  "---------------------------------------------------------------------------\n",
550
+ "Original_text -> I am happy as it was a wonderful experience\n",
551
  "Original_text_Label -> 1\n",
552
  "\n",
553
+ "Perturbed_text -> I am happy as it was a marvellous experience\n",
554
+ "Perturbed_text_Label -> 0\n",
555
  "\n",
556
  "---------------------------------------------------------------------------\n"
557
  ]
558
  }
559
  ],
560
  "source": [
561
+ "\"\"\"\n",
562
+ "Description: Displaying result of text attack\n",
563
+ "\"\"\"\n",
564
  "# Displaying the results of the attack\n",
565
  "for data in attacked_data:\n",
566
  " print(f\"Original_text -> {data.original_text()}\")\n",