Upload 30 files
Browse files- .gitattributes +1 -0
- sample_model_files/malicious_model_custom_layer.h5 +3 -0
- sample_model_files/malicious_model_lambda.h5 +3 -0
- sample_model_files/model_01.h5 +3 -0
- sample_model_files/model_99.h5 +3 -0
- sample_model_files/model_with_pickled_data.h5 +3 -0
- sample_model_files/pb files/linear_model.pb/fingerprint.pb +3 -0
- sample_model_files/pb files/linear_model.pb/keras_metadata.pb +3 -0
- sample_model_files/pb files/linear_model.pb/saved_model.pb +3 -0
- sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001 +3 -0
- sample_model_files/pb files/linear_model.pb/variables/variables.index +3 -0
- sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb +3 -0
- sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb +3 -0
- sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb +3 -0
- sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001 +3 -0
- sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index +3 -0
- sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb +3 -0
- sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb +3 -0
- sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb +3 -0
- sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001 +3 -0
- sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index +3 -0
- sample_model_files/pb files/pb/saved_model.pb +3 -0
- sample_model_files/pb files/pb/variables/variables.data-00000-of-00001 +3 -0
- sample_model_files/pb files/pb/variables/variables.index +0 -0
- sample_model_files/safe_model.h5 +3 -0
- sample_model_files/sample_pickle.pkl +3 -0
- sample_notebook_files/classification_notebook.ipynb +177 -0
- sample_notebook_files/generic.ipynb +252 -0
- sample_notebook_files/prediction_notebook.ipynb +167 -0
- sample_notebook_files/safe_notebook.ipynb +138 -0
- sample_notebook_files/timeseries_notebook.ipynb +129 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
sample_model_files/pb[[:space:]]files/pb/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
sample_model_files/malicious_model_custom_layer.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:232e6d8bc42f8c821d4858a923028c3debe78e8938cf00049a0f5bb2c55ea856
|
| 3 |
+
size 50512
|
sample_model_files/malicious_model_lambda.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91f45f5c6183f78ac82ca526bae8dee3e8804a340828ff8da18f4fc2dcb93856
|
| 3 |
+
size 50512
|
sample_model_files/model_01.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d2a411dbc531120b84c37211a19107fded7947c591215894778a2dc12927b62
|
| 3 |
+
size 5881000
|
sample_model_files/model_99.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3696f7d099bf5a2efa03195cada466ef91bf678e024333a146576a5937990f9
|
| 3 |
+
size 6330600
|
sample_model_files/model_with_pickled_data.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49a0e12b37d068fa5a4c3a1c32868ede7a4fa4a7cea69af173c4131314298f28
|
| 3 |
+
size 18779
|
sample_model_files/pb files/linear_model.pb/fingerprint.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5b997df2c14a9436c89b95a8a209d74af3363d52745d83a718a97693fcd5a74
|
| 3 |
+
size 58
|
sample_model_files/pb files/linear_model.pb/keras_metadata.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa76ef4feac193ebc7ff8762cb8d082eeac9d24e69879f2745081b8b08c2321e
|
| 3 |
+
size 4425
|
sample_model_files/pb files/linear_model.pb/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e71626999238006df8851264233f38859d035948ff742210ebd5731c13f8640
|
| 3 |
+
size 39339
|
sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:715488bb973717280729ba9357d11cef2392bf7f4bfce8e65ec2e94d6982928b
|
| 3 |
+
size 2325
|
sample_model_files/pb files/linear_model.pb/variables/variables.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e58d48e3235f959186e81b264f9250385ea822d703437d7a1a674bf318907512
|
| 3 |
+
size 621
|
sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63413813c501e4a12f3edbc28e373f41e26f3baef03afbf2911df59d0061e242
|
| 3 |
+
size 57
|
sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88004053b611bffb60ed32ac97cab30d8ff2c358ccb737d5450cc8ab7cece103
|
| 3 |
+
size 204831
|
sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ae8eebd2d4bc8fddb4a6629208456ec212375a7893a764b321280b6140abea9
|
| 3 |
+
size 2249481
|
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:859960385d552b0b232fb6995dee6d2c2af44c20a9bd141517b62c13dc63fa98
|
| 3 |
+
size 391277
|
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e51dca6b2deabfeddea73fd8c56c88c9320add07006959f67b01760b0b63d64
|
| 3 |
+
size 12414
|
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e670dc5c33f705fe175b5e6f802ac10dc826e4e86e17b62fcbe83bd21340fa65
|
| 3 |
+
size 57
|
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b52251889d94b38f78f541596bdad519f3f052a2f62598272a1dbbf1c935e35
|
| 3 |
+
size 8526
|
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:200961791226861b664060751c220faecf6e7a824ec3a99c7967b0c83f5c671f
|
| 3 |
+
size 61321
|
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcbc0d986a88a44d6c30766b7b3a1a2c9840ce11002242bae9c698c225b7f7ad
|
| 3 |
+
size 3517
|
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2abc09c5f25f35f9d01650985103b49019ed9b1ff130da65bf5567b4e1016ede
|
| 3 |
+
size 621
|
sample_model_files/pb files/pb/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d55083f53c49df137a089df0c4c6d41b6e027a9ed06f6f7c201ab8ca4b306b3f
|
| 3 |
+
size 235045
|
sample_model_files/pb files/pb/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d71048fc9a5dd7aad56ebe1493174118f0da01ec5640fd1979d1448c765f26d1
|
| 3 |
+
size 6274830
|
sample_model_files/pb files/pb/variables/variables.index
ADDED
|
Binary file (3.56 kB). View file
|
|
|
sample_model_files/safe_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:913fb308a03f3485ded8ab0b3747af7996bb0ba9e151622177db0b2141fd8593
|
| 3 |
+
size 49672
|
sample_model_files/sample_pickle.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ab0c5b5c0b3775045c35f933ee98d4c1c68d5021c1d6fcf983c2f0bb9178607
|
| 3 |
+
size 186233
|
sample_notebook_files/classification_notebook.ipynb
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "d5c4b5c2-8c0a-4cbe-8997-1a98c14be2e4",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"A text classification model using libraries like NLTK or SpaCy. It includes some PII data within the code (e.g., hard-coded email addresses or phone numbers for testing purposes), and include a few API tokens/secrets."
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": null,
|
| 14 |
+
"id": "f95fa380-34d0-455d-8002-ebe5f829542c",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Vulnerable libraries\n",
|
| 19 |
+
"!pip install django==1.11.15\n",
|
| 20 |
+
"!pip install flask==0.12.2\n",
|
| 21 |
+
"!pip install numpy==1.16.0\n",
|
| 22 |
+
"!pip install requests==2.19.1\n",
|
| 23 |
+
"!pip install scikit-learn==0.19.0"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": null,
|
| 29 |
+
"id": "25315022-9da9-4c29-8326-6532d261dd56",
|
| 30 |
+
"metadata": {},
|
| 31 |
+
"outputs": [],
|
| 32 |
+
"source": [
|
| 33 |
+
"# Non-permissible licensed libraries\n",
|
| 34 |
+
"import gmpy2\n",
|
| 35 |
+
"import oct2py\n",
|
| 36 |
+
"import pygsl\n",
|
| 37 |
+
"from PyQt5 import QtCore"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"id": "489ad824-285c-4219-afc6-073192d54f3e",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# Required Libraries for our task\n",
|
| 48 |
+
"import nltk\n",
|
| 49 |
+
"import sklearn"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"cell_type": "code",
|
| 54 |
+
"execution_count": null,
|
| 55 |
+
"id": "109d2f98-4d6d-42d9-acb4-2f195af051d5",
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"outputs": [],
|
| 58 |
+
"source": [
|
| 59 |
+
"# PII Data\n",
|
| 60 |
+
"email = \"john.doe@example.com\"\n",
|
| 61 |
+
"phone = \"123-456-7890\""
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "code",
|
| 66 |
+
"execution_count": null,
|
| 67 |
+
"id": "d637e295-0953-4980-bf99-c7e7e509e876",
|
| 68 |
+
"metadata": {},
|
| 69 |
+
"outputs": [],
|
| 70 |
+
"source": [
|
| 71 |
+
"# API Keys and secrets\n",
|
| 72 |
+
"fb_app_secret = \"3e4a22bb7e6b2c38b7809234b3ee782b\"\n",
|
| 73 |
+
"db_credentials = \"username:password@localhost:5432/mydatabase\""
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"cell_type": "code",
|
| 78 |
+
"execution_count": null,
|
| 79 |
+
"id": "a6493567-ad7f-4b87-95e4-5068a09fca92",
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": [
|
| 83 |
+
"# Download nltk data\n",
|
| 84 |
+
"nltk.download('punkt', download_dir='/nltk_data/')"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": null,
|
| 90 |
+
"id": "7f94e191-bfe7-4e54-9dbf-4d2484b0dbe9",
|
| 91 |
+
"metadata": {},
|
| 92 |
+
"outputs": [],
|
| 93 |
+
"source": [
|
| 94 |
+
"\n",
|
| 95 |
+
"# Text Classification\n",
|
| 96 |
+
"from sklearn.datasets import fetch_20newsgroups\n",
|
| 97 |
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
| 98 |
+
"from sklearn.feature_extraction.text import TfidfTransformer\n",
|
| 99 |
+
"from sklearn.naive_bayes import MultinomialNB"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": null,
|
| 105 |
+
"id": "8552e84a-e164-4519-8ce8-959c7dd277ef",
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [],
|
| 108 |
+
"source": [
|
| 109 |
+
"# Load Data\n",
|
| 110 |
+
"categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']\n",
|
| 111 |
+
"twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)\n"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"cell_type": "code",
|
| 116 |
+
"execution_count": null,
|
| 117 |
+
"id": "abc38386-e63f-4d22-81dc-1785ac8f043b",
|
| 118 |
+
"metadata": {},
|
| 119 |
+
"outputs": [],
|
| 120 |
+
"source": [
|
| 121 |
+
"# Feature Extraction\n",
|
| 122 |
+
"count_vect = CountVectorizer()\n",
|
| 123 |
+
"X_train_counts = count_vect.fit_transform(twenty_train.data)\n",
|
| 124 |
+
"tfidf_transformer = TfidfTransformer()\n",
|
| 125 |
+
"X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "code",
|
| 130 |
+
"execution_count": null,
|
| 131 |
+
"id": "0ea57698-12ff-48b3-a8b6-bb8dffabbc5f",
|
| 132 |
+
"metadata": {},
|
| 133 |
+
"outputs": [],
|
| 134 |
+
"source": [
|
| 135 |
+
"# Train Model\n",
|
| 136 |
+
"clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)\n"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": null,
|
| 142 |
+
"id": "a5fa6532-594c-4790-a630-83388c556591",
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"outputs": [],
|
| 145 |
+
"source": [
|
| 146 |
+
"# Predict\n",
|
| 147 |
+
"docs_new = ['God is love', 'OpenGL on the GPU is fast']\n",
|
| 148 |
+
"X_new_counts = count_vect.transform(docs_new)\n",
|
| 149 |
+
"X_new_tfidf = tfidf_transformer.transform(X_new_counts)\n",
|
| 150 |
+
"predicted = clf.predict(X_new_tfidf)\n",
|
| 151 |
+
"for doc, category in zip(docs_new, predicted):\n",
|
| 152 |
+
" print('%r => %s' % (doc, twenty_train.target_names[category]))"
|
| 153 |
+
]
|
| 154 |
+
}
|
| 155 |
+
],
|
| 156 |
+
"metadata": {
|
| 157 |
+
"kernelspec": {
|
| 158 |
+
"display_name": "Python 3 (ipykernel)",
|
| 159 |
+
"language": "python",
|
| 160 |
+
"name": "python3"
|
| 161 |
+
},
|
| 162 |
+
"language_info": {
|
| 163 |
+
"codemirror_mode": {
|
| 164 |
+
"name": "ipython",
|
| 165 |
+
"version": 3
|
| 166 |
+
},
|
| 167 |
+
"file_extension": ".py",
|
| 168 |
+
"mimetype": "text/x-python",
|
| 169 |
+
"name": "python",
|
| 170 |
+
"nbconvert_exporter": "python",
|
| 171 |
+
"pygments_lexer": "ipython3",
|
| 172 |
+
"version": "3.10.6"
|
| 173 |
+
}
|
| 174 |
+
},
|
| 175 |
+
"nbformat": 4,
|
| 176 |
+
"nbformat_minor": 5
|
| 177 |
+
}
|
sample_notebook_files/generic.ipynb
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"id": "8de6eb89-7d92-4e9d-ab20-8c71ed062072",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import keras\n",
|
| 11 |
+
"from keras.models import Sequential\n",
|
| 12 |
+
"from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D\n",
|
| 13 |
+
"from keras.datasets import mnist\n",
|
| 14 |
+
"from keras.preprocessing.image import ImageDataGenerator\n",
|
| 15 |
+
"import matplotlib.pyplot as plt\n",
|
| 16 |
+
"import numpy as np\n",
|
| 17 |
+
"import pandas as pd"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": null,
|
| 23 |
+
"id": "1caec746-026a-4649-952d-98ff1ac69e97",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"source": [
|
| 27 |
+
"# Intentionally including deprecated library\n",
|
| 28 |
+
"import imp"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"id": "6e7fb59e-6d40-4be1-a51e-0162ddd02c80",
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"outputs": [],
|
| 37 |
+
"source": [
|
| 38 |
+
" # added this vulnerable library (safety might use updated safe library version for this)\n",
|
| 39 |
+
"import urllib3 \n",
|
| 40 |
+
"print(urllib3.__version__)"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": null,
|
| 46 |
+
"id": "cf17cb05-cc6f-4ef0-a27d-fb6a5af33eb9",
|
| 47 |
+
"metadata": {},
|
| 48 |
+
"outputs": [],
|
| 49 |
+
"source": [
|
| 50 |
+
"#using vulnerable library forcefully for safety to detect\n",
|
| 51 |
+
"!pip install urllib3==1.24.1"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": null,
|
| 57 |
+
"id": "ca7abfef-f88c-4766-8db4-b1f0909c8e83",
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [],
|
| 60 |
+
"source": [
|
| 61 |
+
"!pip install scikit-learn==0.19.0\n",
|
| 62 |
+
"import sklearn\n",
|
| 63 |
+
"print(sklearn.__version__)"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"execution_count": null,
|
| 69 |
+
"id": "a78b2239-8abd-44fb-b337-9c9f0830ecaf",
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"outputs": [],
|
| 72 |
+
"source": [
|
| 73 |
+
"!pip install numpy==1.16.0\n",
|
| 74 |
+
"import numpy as np\n",
|
| 75 |
+
"print(np.__version__)"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": null,
|
| 81 |
+
"id": "b3fb3af1-200d-4088-b2a8-5fa445e5d0ac",
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"!pip install tensorflow==1.15.5\n",
|
| 86 |
+
"import tensorflow as tf\n",
|
| 87 |
+
"print(tf.__version__)\n"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": null,
|
| 93 |
+
"id": "95b8762f-bd02-4f4d-9fa2-e511a2b4a326",
|
| 94 |
+
"metadata": {},
|
| 95 |
+
"outputs": [],
|
| 96 |
+
"source": [
|
| 97 |
+
"# A real example of a library with a non-permissive license\n",
|
| 98 |
+
"import gmpy2"
|
| 99 |
+
]
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"cell_type": "code",
|
| 103 |
+
"execution_count": null,
|
| 104 |
+
"id": "88872ca5-c939-4b25-b37e-4351fd6ef336",
|
| 105 |
+
"metadata": {},
|
| 106 |
+
"outputs": [],
|
| 107 |
+
"source": [
|
| 108 |
+
"# GNU Octave, an interpreted high-level programming language for numerical computations\n",
|
| 109 |
+
"# Licensed under GPL\n",
|
| 110 |
+
"import oct2py"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": null,
|
| 116 |
+
"id": "5e844eab-9caa-467e-b1c4-2c7aac5a31a9",
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [],
|
| 119 |
+
"source": [
|
| 120 |
+
"# Mock secret keys\n",
|
| 121 |
+
"SECRET_KEY = \"ABCDEFG\"\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"aws_secret_key_1 = \"A3TABCDEFGH1234567890\" \n",
|
| 124 |
+
"\n",
|
| 125 |
+
"AWS_SECRET_ACCESS_KEY_0 = \"AKIAIOSFODNN7EXAMPLE\"\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"AWS_SECRET_ACCESS_KEY = \"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\""
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": null,
|
| 133 |
+
"id": "901dbdc1-76b2-47d3-9428-2ddb4c043653",
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"AWS_ACCOUNT_ID = \"1234-5678-9012\""
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": null,
|
| 143 |
+
"id": "631f40be-470b-4bf1-b645-a0b8429f0dfb",
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"outputs": [],
|
| 146 |
+
"source": [
|
| 147 |
+
"# PII Information (Just for demonstration, do not use real PII)\n",
|
| 148 |
+
"user_data = {\n",
|
| 149 |
+
" 'name': 'John Doe',\n",
|
| 150 |
+
" 'email': 'johndoe@example.com',\n",
|
| 151 |
+
" 'address': '123 Main St, Anytown, USA'\n",
|
| 152 |
+
"}"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": null,
|
| 158 |
+
"id": "2056314b-486e-4b2f-923a-4194c8a955fe",
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"outputs": [],
|
| 161 |
+
"source": [
|
| 162 |
+
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"# normalize to range 0-1\n",
|
| 165 |
+
"x_train = x_train / 255.0\n",
|
| 166 |
+
"x_test = x_test / 255.0\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"# reshape\n",
|
| 169 |
+
"x_train = x_train.reshape(-1, 28, 28, 1)\n",
|
| 170 |
+
"x_test = x_test.reshape(-1, 28, 28, 1)\n"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"id": "e9766dd8-260d-4184-ac37-9e768f780d8e",
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [],
|
| 179 |
+
"source": [
|
| 180 |
+
"## Define the model\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"# %%\n",
|
| 183 |
+
"model = Sequential()\n",
|
| 184 |
+
"model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))\n",
|
| 185 |
+
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
|
| 186 |
+
"model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))\n",
|
| 187 |
+
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
|
| 188 |
+
"model.add(Flatten())\n",
|
| 189 |
+
"model.add(Dense(128, activation='relu'))\n",
|
| 190 |
+
"model.add(Dense(10, activation='softmax'))"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"id": "e2e6cd12-349b-4088-a189-3037da3191ab",
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [],
|
| 199 |
+
"source": [
|
| 200 |
+
"# ## Compile the model\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"# %%\n",
|
| 203 |
+
"model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])"
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"cell_type": "code",
|
| 208 |
+
"execution_count": null,
|
| 209 |
+
"id": "156520be-06f9-45a6-8c75-5fcfa567d3de",
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"outputs": [],
|
| 212 |
+
"source": [
|
| 213 |
+
"# ## Train the model\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"# %%\n",
|
| 216 |
+
"history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)\n"
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"cell_type": "code",
|
| 221 |
+
"execution_count": null,
|
| 222 |
+
"id": "8bd92538-5655-444c-aa79-92c614f890d8",
|
| 223 |
+
"metadata": {},
|
| 224 |
+
"outputs": [],
|
| 225 |
+
"source": [
|
| 226 |
+
"test_loss, test_accuracy = model.evaluate(x_test, y_test)\n",
|
| 227 |
+
"print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')"
|
| 228 |
+
]
|
| 229 |
+
}
|
| 230 |
+
],
|
| 231 |
+
"metadata": {
|
| 232 |
+
"kernelspec": {
|
| 233 |
+
"display_name": "Python 3 (ipykernel)",
|
| 234 |
+
"language": "python",
|
| 235 |
+
"name": "python3"
|
| 236 |
+
},
|
| 237 |
+
"language_info": {
|
| 238 |
+
"codemirror_mode": {
|
| 239 |
+
"name": "ipython",
|
| 240 |
+
"version": 3
|
| 241 |
+
},
|
| 242 |
+
"file_extension": ".py",
|
| 243 |
+
"mimetype": "text/x-python",
|
| 244 |
+
"name": "python",
|
| 245 |
+
"nbconvert_exporter": "python",
|
| 246 |
+
"pygments_lexer": "ipython3",
|
| 247 |
+
"version": "3.10.6"
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
"nbformat": 4,
|
| 251 |
+
"nbformat_minor": 5
|
| 252 |
+
}
|
sample_notebook_files/prediction_notebook.ipynb
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "a06300a0-6379-4cb8-b015-0e6e689ab64a",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"This Jupyter notebook script sets up a basic prediction model while intentionally incorporating different types of potential vulnerabilities including usage of older version of a library, hardcoded secrets, and PII."
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": null,
|
| 14 |
+
"id": "a4e7e5b2-3c14-44fb-808f-7241b2e75658",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Cell 1: (Forcing an installation of an older version of libraries)\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"!pip install numpy==1.16.0\n",
|
| 21 |
+
"!pip install scikit-learn==0.19.0 # vulnerable version of scikit-learn"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": null,
|
| 27 |
+
"id": "d8f3c422-d9e6-497a-a7b2-ec91fee80fa4",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"outputs": [],
|
| 30 |
+
"source": [
|
| 31 |
+
"# Cell 2: (Importing libraries including the one with older version)\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"import numpy as np\n",
|
| 34 |
+
"import pandas as pd\n",
|
| 35 |
+
"import sklearn\n",
|
| 36 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 37 |
+
"from sklearn.linear_model import LinearRegression"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"id": "3fc98c03-cc4c-4a3b-a5d9-41523c26930f",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# Including a non-permissible licensed library\n",
|
| 48 |
+
"import oct2py"
|
| 49 |
+
]
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"cell_type": "code",
|
| 53 |
+
"execution_count": null,
|
| 54 |
+
"id": "bb7a28c5-ac7f-4574-990d-d25c7670f211",
|
| 55 |
+
"metadata": {},
|
| 56 |
+
"outputs": [],
|
| 57 |
+
"source": [
|
| 58 |
+
"# Cell 3: (API tokens and secrets)\n",
|
| 59 |
+
"azure_access_key = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==\""
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"cell_type": "code",
|
| 64 |
+
"execution_count": null,
|
| 65 |
+
"id": "dd875b59-7454-4c81-88c2-37cf011ed332",
|
| 66 |
+
"metadata": {},
|
| 67 |
+
"outputs": [],
|
| 68 |
+
"source": [
|
| 69 |
+
"gcloud_api_key = \"AIzaQwerty12345678Xx\""
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": null,
|
| 75 |
+
"id": "87d8ed66-bb6d-46b1-9968-b7d5b2cf49df",
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"outputs": [],
|
| 78 |
+
"source": [
|
| 79 |
+
"youtube_api_key = \"AIzaSyCewf3U1ZXHH4E2mK2s8A2D\""
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"cell_type": "code",
|
| 84 |
+
"execution_count": null,
|
| 85 |
+
"id": "f2bae80c-9132-4931-8182-fafbe4a414a1",
|
| 86 |
+
"metadata": {},
|
| 87 |
+
"outputs": [],
|
| 88 |
+
"source": [
|
| 89 |
+
"# Dummy PII Data\n",
|
| 90 |
+
"person = {\n",
|
| 91 |
+
" 'first_name': 'John',\n",
|
| 92 |
+
" 'last_name': 'Doe',\n",
|
| 93 |
+
" 'ssn': '123-45-6789',\n",
|
| 94 |
+
" 'address': '1600 Amphitheatre Parkway, Mountain View, CA'\n",
|
| 95 |
+
"} # this could be seen as PII"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"cell_type": "code",
|
| 100 |
+
"execution_count": null,
|
| 101 |
+
"id": "0ff71e14-4cf7-47f8-be2f-7a2f93d7900a",
|
| 102 |
+
"metadata": {},
|
| 103 |
+
"outputs": [],
|
| 104 |
+
"source": [
|
| 105 |
+
"USER_NAME = 'Joe Smith' #another PII in another format"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"cell_type": "code",
|
| 110 |
+
"execution_count": null,
|
| 111 |
+
"id": "9a21aa9d-96ec-4555-9d1a-fa0f2cd39802",
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"EMAIL = 'john.doe@example.com' # also PII"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": null,
|
| 121 |
+
"id": "fb202b7d-a7ef-4d6e-89ec-6e5aa01422d0",
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"outputs": [],
|
| 124 |
+
"source": [
|
| 125 |
+
"# Cell 4: (Model building)\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"# Creating a dataset\n",
|
| 128 |
+
"np.random.seed(0)\n",
|
| 129 |
+
"x = np.random.rand(100, 1)\n",
|
| 130 |
+
"y = 2 + 3 * x + np.random.rand(100, 1)\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"# Splitting the data\n",
|
| 133 |
+
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"# Model initialization\n",
|
| 136 |
+
"regression_model = LinearRegression()\n",
|
| 137 |
+
"\n",
|
| 138 |
+
"# Fit the data(train the model)\n",
|
| 139 |
+
"regression_model.fit(x_train, y_train)\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"# Predict\n",
|
| 142 |
+
"y_predicted = regression_model.predict(x_test)"
|
| 143 |
+
]
|
| 144 |
+
}
|
| 145 |
+
],
|
| 146 |
+
"metadata": {
|
| 147 |
+
"kernelspec": {
|
| 148 |
+
"display_name": "Python 3 (ipykernel)",
|
| 149 |
+
"language": "python",
|
| 150 |
+
"name": "python3"
|
| 151 |
+
},
|
| 152 |
+
"language_info": {
|
| 153 |
+
"codemirror_mode": {
|
| 154 |
+
"name": "ipython",
|
| 155 |
+
"version": 3
|
| 156 |
+
},
|
| 157 |
+
"file_extension": ".py",
|
| 158 |
+
"mimetype": "text/x-python",
|
| 159 |
+
"name": "python",
|
| 160 |
+
"nbconvert_exporter": "python",
|
| 161 |
+
"pygments_lexer": "ipython3",
|
| 162 |
+
"version": "3.10.6"
|
| 163 |
+
}
|
| 164 |
+
},
|
| 165 |
+
"nbformat": 4,
|
| 166 |
+
"nbformat_minor": 5
|
| 167 |
+
}
|
sample_notebook_files/safe_notebook.ipynb
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": []
|
| 7 |
+
},
|
| 8 |
+
"kernelspec": {
|
| 9 |
+
"name": "python3",
|
| 10 |
+
"display_name": "Python 3"
|
| 11 |
+
},
|
| 12 |
+
"language_info": {
|
| 13 |
+
"name": "python"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"cells": [
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "markdown",
|
| 19 |
+
"source": [
|
| 20 |
+
"Note: This notebook is free from any Personal Identifiable Information (PII)exposed API tokens, and outdated or vulnerable libraries."
|
| 21 |
+
],
|
| 22 |
+
"metadata": {
|
| 23 |
+
"id": "lNScDliRLnLV"
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": 1,
|
| 29 |
+
"metadata": {
|
| 30 |
+
"id": "R6ZAQbX7LB5l"
|
| 31 |
+
},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"# Importing Safe and Updated Libraries\n",
|
| 35 |
+
"import pandas as pd\n",
|
| 36 |
+
"import numpy as np\n",
|
| 37 |
+
"import datetime\n",
|
| 38 |
+
"from matplotlib import pyplot as plt\n",
|
| 39 |
+
"from sklearn.linear_model import LinearRegression"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"source": [
|
| 45 |
+
"# Time Series Analysis using Pandas\n",
|
| 46 |
+
"# Create a date range\n",
|
| 47 |
+
"date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')"
|
| 48 |
+
],
|
| 49 |
+
"metadata": {
|
| 50 |
+
"id": "oxMnBJncLJyH"
|
| 51 |
+
},
|
| 52 |
+
"execution_count": 2,
|
| 53 |
+
"outputs": []
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"source": [
|
| 58 |
+
"# Create a DataFrame\n",
|
| 59 |
+
"df = pd.DataFrame(date_rng, columns=['date'])"
|
| 60 |
+
],
|
| 61 |
+
"metadata": {
|
| 62 |
+
"id": "gOX_vL4lLMmq"
|
| 63 |
+
},
|
| 64 |
+
"execution_count": 3,
|
| 65 |
+
"outputs": []
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"cell_type": "code",
|
| 69 |
+
"source": [
|
| 70 |
+
"# Generate some random data\n",
|
| 71 |
+
"df['data'] = np.random.randint(0,100,size=(len(date_rng)))"
|
| 72 |
+
],
|
| 73 |
+
"metadata": {
|
| 74 |
+
"id": "52rGQNM-LRsO"
|
| 75 |
+
},
|
| 76 |
+
"execution_count": 4,
|
| 77 |
+
"outputs": []
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"source": [
|
| 82 |
+
"# Set the date column as index\n",
|
| 83 |
+
"df['datetime'] = pd.to_datetime(df['date'])\n",
|
| 84 |
+
"df = df.set_index('datetime')\n",
|
| 85 |
+
"df.drop(['date'], axis=1, inplace=True)"
|
| 86 |
+
],
|
| 87 |
+
"metadata": {
|
| 88 |
+
"id": "Oz4NQyeqLXKW"
|
| 89 |
+
},
|
| 90 |
+
"execution_count": 5,
|
| 91 |
+
"outputs": []
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "code",
|
| 95 |
+
"source": [
|
| 96 |
+
"# Resample the DataFrame to calculate daily means\n",
|
| 97 |
+
"df_resampled = df.resample('D').mean()"
|
| 98 |
+
],
|
| 99 |
+
"metadata": {
|
| 100 |
+
"id": "ees6U5rfLaw-"
|
| 101 |
+
},
|
| 102 |
+
"execution_count": 6,
|
| 103 |
+
"outputs": []
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"source": [
|
| 108 |
+
"# Display the resampled DataFrame\n",
|
| 109 |
+
"print(df_resampled)"
|
| 110 |
+
],
|
| 111 |
+
"metadata": {
|
| 112 |
+
"id": "sdkU13xrLdKT"
|
| 113 |
+
},
|
| 114 |
+
"execution_count": null,
|
| 115 |
+
"outputs": []
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"source": [
|
| 120 |
+
"# Prediction part\n",
|
| 121 |
+
"X = [i for i in range(0, len(df_resampled))]\n",
|
| 122 |
+
"X = np.reshape(X, (len(X), 1))\n",
|
| 123 |
+
"y = df_resampled['data'].tolist()\n",
|
| 124 |
+
"model = LinearRegression()\n",
|
| 125 |
+
"model.fit(X, y)\n",
|
| 126 |
+
"# Predict the 'data' value for the next day\n",
|
| 127 |
+
"next_day = [[len(X) + 1]]\n",
|
| 128 |
+
"predicted_value = model.predict(next_day)\n",
|
| 129 |
+
"print('The predicted average value for the next day is: ', predicted_value[0])"
|
| 130 |
+
],
|
| 131 |
+
"metadata": {
|
| 132 |
+
"id": "8x0pvqnrLiKF"
|
| 133 |
+
},
|
| 134 |
+
"execution_count": null,
|
| 135 |
+
"outputs": []
|
| 136 |
+
}
|
| 137 |
+
]
|
| 138 |
+
}
|
sample_notebook_files/timeseries_notebook.ipynb
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "6cb9b97a-1641-45af-89bb-782b726bb957",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"Time-series analysis using pandas and incorporates some of the libraries and tokens."
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": null,
|
| 14 |
+
"id": "ed4a4cac-fed2-4d55-bcf9-163611851677",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Time Series Analysis using Pandas\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"# Install vulnerable versions of libraries\n",
|
| 21 |
+
"!pip install django==1.11.15\n",
|
| 22 |
+
"!pip install flask==0.12.2\n",
|
| 23 |
+
"!pip install numpy==1.16.0\n",
|
| 24 |
+
"!pip install pandas==0.24.1"
|
| 25 |
+
]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"execution_count": null,
|
| 30 |
+
"id": "14e8b67a-5ed9-4881-be42-e7259c46f9b7",
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"# Import libraries\n",
|
| 35 |
+
"import pandas as pd\n",
|
| 36 |
+
"import numpy as np\n",
|
| 37 |
+
"import datetime\n",
|
| 38 |
+
"from matplotlib import pyplot as plt\n",
|
| 39 |
+
"from sklearn.linear_model import LinearRegression"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"execution_count": null,
|
| 45 |
+
"id": "dd6ffe2b-0a38-4950-ab46-4b0cbdd7b399",
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"source": [
|
| 49 |
+
"# Exposed API Tokens\n",
|
| 50 |
+
"linkedin_api_key = \"8619zzn49n49x1\""
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "code",
|
| 55 |
+
"execution_count": null,
|
| 56 |
+
"id": "050a4e30-afd6-4da0-b992-630774894d42",
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"# Let's analyze some time-series data.\n",
|
| 61 |
+
"# Please note that this data is fictional and does not represent any real person or entity.\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"# Create a date range\n",
|
| 64 |
+
"date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"# Create a DataFrame\n",
|
| 67 |
+
"df = pd.DataFrame(date_rng, columns=['date'])\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"# Generate some random data\n",
|
| 70 |
+
"df['data'] = np.random.randint(0,100,size=(len(date_rng)))\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"# Set the date column as index\n",
|
| 73 |
+
"df['datetime'] = pd.to_datetime(df['date'])\n",
|
| 74 |
+
"df = df.set_index('datetime')\n",
|
| 75 |
+
"df.drop(['date'], axis=1, inplace=True)\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"# Resample the DataFrame to calculate daily means\n",
|
| 78 |
+
"df_resampled = df.resample('D').mean()\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"# Display the resampled DataFrame\n",
|
| 81 |
+
"print(df_resampled)\n",
|
| 82 |
+
"\n",
|
| 83 |
+
"# Prediction part\n",
|
| 84 |
+
"X = [i for i in range(0, len(df_resampled))]\n",
|
| 85 |
+
"X = np.reshape(X, (len(X), 1))\n",
|
| 86 |
+
"y = df_resampled['data'].tolist()\n",
|
| 87 |
+
"model = LinearRegression()\n",
|
| 88 |
+
"model.fit(X, y)\n",
|
| 89 |
+
"# Predict the 'data' value for the next day\n",
|
| 90 |
+
"next_day = [[len(X) + 1]]\n",
|
| 91 |
+
"predicted_value = model.predict(next_day)\n",
|
| 92 |
+
"print('The predicted average value for the next day is: ', predicted_value[0])\n"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": null,
|
| 98 |
+
"id": "21f2e251-7f69-4f27-9041-aff5d022bac0",
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"\n",
|
| 103 |
+
"# PII in comments (phone number)\n",
|
| 104 |
+
"# Contact me if you have any questions: 123-456-7890"
|
| 105 |
+
]
|
| 106 |
+
}
|
| 107 |
+
],
|
| 108 |
+
"metadata": {
|
| 109 |
+
"kernelspec": {
|
| 110 |
+
"display_name": "Python 3 (ipykernel)",
|
| 111 |
+
"language": "python",
|
| 112 |
+
"name": "python3"
|
| 113 |
+
},
|
| 114 |
+
"language_info": {
|
| 115 |
+
"codemirror_mode": {
|
| 116 |
+
"name": "ipython",
|
| 117 |
+
"version": 3
|
| 118 |
+
},
|
| 119 |
+
"file_extension": ".py",
|
| 120 |
+
"mimetype": "text/x-python",
|
| 121 |
+
"name": "python",
|
| 122 |
+
"nbconvert_exporter": "python",
|
| 123 |
+
"pygments_lexer": "ipython3",
|
| 124 |
+
"version": "3.10.6"
|
| 125 |
+
}
|
| 126 |
+
},
|
| 127 |
+
"nbformat": 4,
|
| 128 |
+
"nbformat_minor": 5
|
| 129 |
+
}
|