Pankaj001 commited on
Commit
a3abb69
·
verified ·
1 Parent(s): eb326a7

Upload 30 files

Browse files
Files changed (31) hide show
  1. .gitattributes +1 -0
  2. sample_model_files/malicious_model_custom_layer.h5 +3 -0
  3. sample_model_files/malicious_model_lambda.h5 +3 -0
  4. sample_model_files/model_01.h5 +3 -0
  5. sample_model_files/model_99.h5 +3 -0
  6. sample_model_files/model_with_pickled_data.h5 +3 -0
  7. sample_model_files/pb files/linear_model.pb/fingerprint.pb +3 -0
  8. sample_model_files/pb files/linear_model.pb/keras_metadata.pb +3 -0
  9. sample_model_files/pb files/linear_model.pb/saved_model.pb +3 -0
  10. sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001 +3 -0
  11. sample_model_files/pb files/linear_model.pb/variables/variables.index +3 -0
  12. sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb +3 -0
  13. sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb +3 -0
  14. sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb +3 -0
  15. sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001 +3 -0
  16. sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index +3 -0
  17. sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb +3 -0
  18. sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb +3 -0
  19. sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb +3 -0
  20. sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001 +3 -0
  21. sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index +3 -0
  22. sample_model_files/pb files/pb/saved_model.pb +3 -0
  23. sample_model_files/pb files/pb/variables/variables.data-00000-of-00001 +3 -0
  24. sample_model_files/pb files/pb/variables/variables.index +0 -0
  25. sample_model_files/safe_model.h5 +3 -0
  26. sample_model_files/sample_pickle.pkl +3 -0
  27. sample_notebook_files/classification_notebook.ipynb +177 -0
  28. sample_notebook_files/generic.ipynb +252 -0
  29. sample_notebook_files/prediction_notebook.ipynb +167 -0
  30. sample_notebook_files/safe_notebook.ipynb +138 -0
  31. sample_notebook_files/timeseries_notebook.ipynb +129 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ sample_model_files/pb[[:space:]]files/pb/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
sample_model_files/malicious_model_custom_layer.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232e6d8bc42f8c821d4858a923028c3debe78e8938cf00049a0f5bb2c55ea856
3
+ size 50512
sample_model_files/malicious_model_lambda.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f45f5c6183f78ac82ca526bae8dee3e8804a340828ff8da18f4fc2dcb93856
3
+ size 50512
sample_model_files/model_01.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2a411dbc531120b84c37211a19107fded7947c591215894778a2dc12927b62
3
+ size 5881000
sample_model_files/model_99.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3696f7d099bf5a2efa03195cada466ef91bf678e024333a146576a5937990f9
3
+ size 6330600
sample_model_files/model_with_pickled_data.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a0e12b37d068fa5a4c3a1c32868ede7a4fa4a7cea69af173c4131314298f28
3
+ size 18779
sample_model_files/pb files/linear_model.pb/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b997df2c14a9436c89b95a8a209d74af3363d52745d83a718a97693fcd5a74
3
+ size 58
sample_model_files/pb files/linear_model.pb/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa76ef4feac193ebc7ff8762cb8d082eeac9d24e69879f2745081b8b08c2321e
3
+ size 4425
sample_model_files/pb files/linear_model.pb/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e71626999238006df8851264233f38859d035948ff742210ebd5731c13f8640
3
+ size 39339
sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715488bb973717280729ba9357d11cef2392bf7f4bfce8e65ec2e94d6982928b
3
+ size 2325
sample_model_files/pb files/linear_model.pb/variables/variables.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e58d48e3235f959186e81b264f9250385ea822d703437d7a1a674bf318907512
3
+ size 621
sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63413813c501e4a12f3edbc28e373f41e26f3baef03afbf2911df59d0061e242
3
+ size 57
sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88004053b611bffb60ed32ac97cab30d8ff2c358ccb737d5450cc8ab7cece103
3
+ size 204831
sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae8eebd2d4bc8fddb4a6629208456ec212375a7893a764b321280b6140abea9
3
+ size 2249481
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:859960385d552b0b232fb6995dee6d2c2af44c20a9bd141517b62c13dc63fa98
3
+ size 391277
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e51dca6b2deabfeddea73fd8c56c88c9320add07006959f67b01760b0b63d64
3
+ size 12414
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e670dc5c33f705fe175b5e6f802ac10dc826e4e86e17b62fcbe83bd21340fa65
3
+ size 57
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b52251889d94b38f78f541596bdad519f3f052a2f62598272a1dbbf1c935e35
3
+ size 8526
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200961791226861b664060751c220faecf6e7a824ec3a99c7967b0c83f5c671f
3
+ size 61321
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcbc0d986a88a44d6c30766b7b3a1a2c9840ce11002242bae9c698c225b7f7ad
3
+ size 3517
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2abc09c5f25f35f9d01650985103b49019ed9b1ff130da65bf5567b4e1016ede
3
+ size 621
sample_model_files/pb files/pb/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d55083f53c49df137a089df0c4c6d41b6e027a9ed06f6f7c201ab8ca4b306b3f
3
+ size 235045
sample_model_files/pb files/pb/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d71048fc9a5dd7aad56ebe1493174118f0da01ec5640fd1979d1448c765f26d1
3
+ size 6274830
sample_model_files/pb files/pb/variables/variables.index ADDED
Binary file (3.56 kB). View file
 
sample_model_files/safe_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:913fb308a03f3485ded8ab0b3747af7996bb0ba9e151622177db0b2141fd8593
3
+ size 49672
sample_model_files/sample_pickle.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab0c5b5c0b3775045c35f933ee98d4c1c68d5021c1d6fcf983c2f0bb9178607
3
+ size 186233
sample_notebook_files/classification_notebook.ipynb ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "d5c4b5c2-8c0a-4cbe-8997-1a98c14be2e4",
6
+ "metadata": {},
7
+ "source": [
8
+ "A text classification model using libraries like NLTK or SpaCy. It includes some PII data within the code (e.g., hard-coded email addresses or phone numbers for testing purposes), and include a few API tokens/secrets."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "f95fa380-34d0-455d-8002-ebe5f829542c",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# Vulnerable libraries\n",
19
+ "!pip install django==1.11.15\n",
20
+ "!pip install flask==0.12.2\n",
21
+ "!pip install numpy==1.16.0\n",
22
+ "!pip install requests==2.19.1\n",
23
+ "!pip install scikit-learn==0.19.0"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "id": "25315022-9da9-4c29-8326-6532d261dd56",
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "# Non-permissible licensed libraries\n",
34
+ "import gmpy2\n",
35
+ "import oct2py\n",
36
+ "import pygsl\n",
37
+ "from PyQt5 import QtCore"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "489ad824-285c-4219-afc6-073192d54f3e",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "# Required Libraries for our task\n",
48
+ "import nltk\n",
49
+ "import sklearn"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": null,
55
+ "id": "109d2f98-4d6d-42d9-acb4-2f195af051d5",
56
+ "metadata": {},
57
+ "outputs": [],
58
+ "source": [
59
+ "# PII Data\n",
60
+ "email = \"john.doe@example.com\"\n",
61
+ "phone = \"123-456-7890\""
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "id": "d637e295-0953-4980-bf99-c7e7e509e876",
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "# API Keys and secrets\n",
72
+ "fb_app_secret = \"3e4a22bb7e6b2c38b7809234b3ee782b\"\n",
73
+ "db_credentials = \"username:password@localhost:5432/mydatabase\""
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "a6493567-ad7f-4b87-95e4-5068a09fca92",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "# Download nltk data\n",
84
+ "nltk.download('punkt', download_dir='/nltk_data/')"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": null,
90
+ "id": "7f94e191-bfe7-4e54-9dbf-4d2484b0dbe9",
91
+ "metadata": {},
92
+ "outputs": [],
93
+ "source": [
94
+ "\n",
95
+ "# Text Classification\n",
96
+ "from sklearn.datasets import fetch_20newsgroups\n",
97
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
98
+ "from sklearn.feature_extraction.text import TfidfTransformer\n",
99
+ "from sklearn.naive_bayes import MultinomialNB"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "id": "8552e84a-e164-4519-8ce8-959c7dd277ef",
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "# Load Data\n",
110
+ "categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']\n",
111
+ "twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)\n"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": null,
117
+ "id": "abc38386-e63f-4d22-81dc-1785ac8f043b",
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "# Feature Extraction\n",
122
+ "count_vect = CountVectorizer()\n",
123
+ "X_train_counts = count_vect.fit_transform(twenty_train.data)\n",
124
+ "tfidf_transformer = TfidfTransformer()\n",
125
+ "X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": null,
131
+ "id": "0ea57698-12ff-48b3-a8b6-bb8dffabbc5f",
132
+ "metadata": {},
133
+ "outputs": [],
134
+ "source": [
135
+ "# Train Model\n",
136
+ "clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)\n"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "id": "a5fa6532-594c-4790-a630-83388c556591",
143
+ "metadata": {},
144
+ "outputs": [],
145
+ "source": [
146
+ "# Predict\n",
147
+ "docs_new = ['God is love', 'OpenGL on the GPU is fast']\n",
148
+ "X_new_counts = count_vect.transform(docs_new)\n",
149
+ "X_new_tfidf = tfidf_transformer.transform(X_new_counts)\n",
150
+ "predicted = clf.predict(X_new_tfidf)\n",
151
+ "for doc, category in zip(docs_new, predicted):\n",
152
+ " print('%r => %s' % (doc, twenty_train.target_names[category]))"
153
+ ]
154
+ }
155
+ ],
156
+ "metadata": {
157
+ "kernelspec": {
158
+ "display_name": "Python 3 (ipykernel)",
159
+ "language": "python",
160
+ "name": "python3"
161
+ },
162
+ "language_info": {
163
+ "codemirror_mode": {
164
+ "name": "ipython",
165
+ "version": 3
166
+ },
167
+ "file_extension": ".py",
168
+ "mimetype": "text/x-python",
169
+ "name": "python",
170
+ "nbconvert_exporter": "python",
171
+ "pygments_lexer": "ipython3",
172
+ "version": "3.10.6"
173
+ }
174
+ },
175
+ "nbformat": 4,
176
+ "nbformat_minor": 5
177
+ }
sample_notebook_files/generic.ipynb ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "8de6eb89-7d92-4e9d-ab20-8c71ed062072",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import keras\n",
11
+ "from keras.models import Sequential\n",
12
+ "from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D\n",
13
+ "from keras.datasets import mnist\n",
14
+ "from keras.preprocessing.image import ImageDataGenerator\n",
15
+ "import matplotlib.pyplot as plt\n",
16
+ "import numpy as np\n",
17
+ "import pandas as pd"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": null,
23
+ "id": "1caec746-026a-4649-952d-98ff1ac69e97",
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "# Intentionally including deprecated library\n",
28
+ "import imp"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "id": "6e7fb59e-6d40-4be1-a51e-0162ddd02c80",
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ " # added this vulnerable library (safety might use updated safe library version for this)\n",
39
+ "import urllib3 \n",
40
+ "print(urllib3.__version__)"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "id": "cf17cb05-cc6f-4ef0-a27d-fb6a5af33eb9",
47
+ "metadata": {},
48
+ "outputs": [],
49
+ "source": [
50
+ "#using vulnerable library forcefully for safety to detect\n",
51
+ "!pip install urllib3==1.24.1"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "id": "ca7abfef-f88c-4766-8db4-b1f0909c8e83",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "!pip install scikit-learn==0.19.0\n",
62
+ "import sklearn\n",
63
+ "print(sklearn.__version__)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "id": "a78b2239-8abd-44fb-b337-9c9f0830ecaf",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "!pip install numpy==1.16.0\n",
74
+ "import numpy as np\n",
75
+ "print(np.__version__)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": null,
81
+ "id": "b3fb3af1-200d-4088-b2a8-5fa445e5d0ac",
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "!pip install tensorflow==1.15.5\n",
86
+ "import tensorflow as tf\n",
87
+ "print(tf.__version__)\n"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "id": "95b8762f-bd02-4f4d-9fa2-e511a2b4a326",
94
+ "metadata": {},
95
+ "outputs": [],
96
+ "source": [
97
+ "# A real example of a library with a non-permissive license\n",
98
+ "import gmpy2"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": null,
104
+ "id": "88872ca5-c939-4b25-b37e-4351fd6ef336",
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "# GNU Octave, an interpreted high-level programming language for numerical computations\n",
109
+ "# Licensed under GPL\n",
110
+ "import oct2py"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "id": "5e844eab-9caa-467e-b1c4-2c7aac5a31a9",
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "# Mock secret keys\n",
121
+ "SECRET_KEY = \"ABCDEFG\"\n",
122
+ "\n",
123
+ "aws_secret_key_1 = \"A3TABCDEFGH1234567890\" \n",
124
+ "\n",
125
+ "AWS_SECRET_ACCESS_KEY_0 = \"AKIAIOSFODNN7EXAMPLE\"\n",
126
+ "\n",
127
+ "AWS_SECRET_ACCESS_KEY = \"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\""
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": null,
133
+ "id": "901dbdc1-76b2-47d3-9428-2ddb4c043653",
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "AWS_ACCOUNT_ID = \"1234-5678-9012\""
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "id": "631f40be-470b-4bf1-b645-a0b8429f0dfb",
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "# PII Information (Just for demonstration, do not use real PII)\n",
148
+ "user_data = {\n",
149
+ " 'name': 'John Doe',\n",
150
+ " 'email': 'johndoe@example.com',\n",
151
+ " 'address': '123 Main St, Anytown, USA'\n",
152
+ "}"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": null,
158
+ "id": "2056314b-486e-4b2f-923a-4194c8a955fe",
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
163
+ "\n",
164
+ "# normalize to range 0-1\n",
165
+ "x_train = x_train / 255.0\n",
166
+ "x_test = x_test / 255.0\n",
167
+ "\n",
168
+ "# reshape\n",
169
+ "x_train = x_train.reshape(-1, 28, 28, 1)\n",
170
+ "x_test = x_test.reshape(-1, 28, 28, 1)\n"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "id": "e9766dd8-260d-4184-ac37-9e768f780d8e",
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": [
180
+ "## Define the model\n",
181
+ "\n",
182
+ "# %%\n",
183
+ "model = Sequential()\n",
184
+ "model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))\n",
185
+ "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
186
+ "model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))\n",
187
+ "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
188
+ "model.add(Flatten())\n",
189
+ "model.add(Dense(128, activation='relu'))\n",
190
+ "model.add(Dense(10, activation='softmax'))"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": null,
196
+ "id": "e2e6cd12-349b-4088-a189-3037da3191ab",
197
+ "metadata": {},
198
+ "outputs": [],
199
+ "source": [
200
+ "# ## Compile the model\n",
201
+ "\n",
202
+ "# %%\n",
203
+ "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])"
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "code",
208
+ "execution_count": null,
209
+ "id": "156520be-06f9-45a6-8c75-5fcfa567d3de",
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "# ## Train the model\n",
214
+ "\n",
215
+ "# %%\n",
216
+ "history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)\n"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "id": "8bd92538-5655-444c-aa79-92c614f890d8",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "test_loss, test_accuracy = model.evaluate(x_test, y_test)\n",
227
+ "print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')"
228
+ ]
229
+ }
230
+ ],
231
+ "metadata": {
232
+ "kernelspec": {
233
+ "display_name": "Python 3 (ipykernel)",
234
+ "language": "python",
235
+ "name": "python3"
236
+ },
237
+ "language_info": {
238
+ "codemirror_mode": {
239
+ "name": "ipython",
240
+ "version": 3
241
+ },
242
+ "file_extension": ".py",
243
+ "mimetype": "text/x-python",
244
+ "name": "python",
245
+ "nbconvert_exporter": "python",
246
+ "pygments_lexer": "ipython3",
247
+ "version": "3.10.6"
248
+ }
249
+ },
250
+ "nbformat": 4,
251
+ "nbformat_minor": 5
252
+ }
sample_notebook_files/prediction_notebook.ipynb ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "a06300a0-6379-4cb8-b015-0e6e689ab64a",
6
+ "metadata": {},
7
+ "source": [
8
+ "This Jupyter notebook script sets up a basic prediction model while intentionally incorporating different types of potential vulnerabilities including usage of older version of a library, hardcoded secrets, and PII."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "a4e7e5b2-3c14-44fb-808f-7241b2e75658",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# Cell 1: (Forcing an installation of an older version of libraries)\n",
19
+ "\n",
20
+ "!pip install numpy==1.16.0\n",
21
+ "!pip install scikit-learn==0.19.0 # vulnerable version of scikit-learn"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": null,
27
+ "id": "d8f3c422-d9e6-497a-a7b2-ec91fee80fa4",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "# Cell 2: (Importing libraries including the one with older version)\n",
32
+ "\n",
33
+ "import numpy as np\n",
34
+ "import pandas as pd\n",
35
+ "import sklearn\n",
36
+ "from sklearn.model_selection import train_test_split\n",
37
+ "from sklearn.linear_model import LinearRegression"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "3fc98c03-cc4c-4a3b-a5d9-41523c26930f",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "# Including a non-permissible licensed library\n",
48
+ "import oct2py"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "id": "bb7a28c5-ac7f-4574-990d-d25c7670f211",
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "# Cell 3: (API tokens and secrets)\n",
59
+ "azure_access_key = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==\""
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": null,
65
+ "id": "dd875b59-7454-4c81-88c2-37cf011ed332",
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "gcloud_api_key = \"AIzaQwerty12345678Xx\""
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "id": "87d8ed66-bb6d-46b1-9968-b7d5b2cf49df",
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "youtube_api_key = \"AIzaSyCewf3U1ZXHH4E2mK2s8A2D\""
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "id": "f2bae80c-9132-4931-8182-fafbe4a414a1",
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "# Dummy PII Data\n",
90
+ "person = {\n",
91
+ " 'first_name': 'John',\n",
92
+ " 'last_name': 'Doe',\n",
93
+ " 'ssn': '123-45-6789',\n",
94
+ " 'address': '1600 Amphitheatre Parkway, Mountain View, CA'\n",
95
+ "} # this could be seen as PII"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": null,
101
+ "id": "0ff71e14-4cf7-47f8-be2f-7a2f93d7900a",
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "USER_NAME = 'Joe Smith' #another PII in another format"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "id": "9a21aa9d-96ec-4555-9d1a-fa0f2cd39802",
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "EMAIL = 'john.doe@example.com' # also PII"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": null,
121
+ "id": "fb202b7d-a7ef-4d6e-89ec-6e5aa01422d0",
122
+ "metadata": {},
123
+ "outputs": [],
124
+ "source": [
125
+ "# Cell 4: (Model building)\n",
126
+ "\n",
127
+ "# Creating a dataset\n",
128
+ "np.random.seed(0)\n",
129
+ "x = np.random.rand(100, 1)\n",
130
+ "y = 2 + 3 * x + np.random.rand(100, 1)\n",
131
+ "\n",
132
+ "# Splitting the data\n",
133
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
134
+ "\n",
135
+ "# Model initialization\n",
136
+ "regression_model = LinearRegression()\n",
137
+ "\n",
138
+ "# Fit the data(train the model)\n",
139
+ "regression_model.fit(x_train, y_train)\n",
140
+ "\n",
141
+ "# Predict\n",
142
+ "y_predicted = regression_model.predict(x_test)"
143
+ ]
144
+ }
145
+ ],
146
+ "metadata": {
147
+ "kernelspec": {
148
+ "display_name": "Python 3 (ipykernel)",
149
+ "language": "python",
150
+ "name": "python3"
151
+ },
152
+ "language_info": {
153
+ "codemirror_mode": {
154
+ "name": "ipython",
155
+ "version": 3
156
+ },
157
+ "file_extension": ".py",
158
+ "mimetype": "text/x-python",
159
+ "name": "python",
160
+ "nbconvert_exporter": "python",
161
+ "pygments_lexer": "ipython3",
162
+ "version": "3.10.6"
163
+ }
164
+ },
165
+ "nbformat": 4,
166
+ "nbformat_minor": 5
167
+ }
sample_notebook_files/safe_notebook.ipynb ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "markdown",
19
+ "source": [
20
+ "Note: This notebook is free from any Personal Identifiable Information (PII)exposed API tokens, and outdated or vulnerable libraries."
21
+ ],
22
+ "metadata": {
23
+ "id": "lNScDliRLnLV"
24
+ }
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 1,
29
+ "metadata": {
30
+ "id": "R6ZAQbX7LB5l"
31
+ },
32
+ "outputs": [],
33
+ "source": [
34
+ "# Importing Safe and Updated Libraries\n",
35
+ "import pandas as pd\n",
36
+ "import numpy as np\n",
37
+ "import datetime\n",
38
+ "from matplotlib import pyplot as plt\n",
39
+ "from sklearn.linear_model import LinearRegression"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "source": [
45
+ "# Time Series Analysis using Pandas\n",
46
+ "# Create a date range\n",
47
+ "date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')"
48
+ ],
49
+ "metadata": {
50
+ "id": "oxMnBJncLJyH"
51
+ },
52
+ "execution_count": 2,
53
+ "outputs": []
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "source": [
58
+ "# Create a DataFrame\n",
59
+ "df = pd.DataFrame(date_rng, columns=['date'])"
60
+ ],
61
+ "metadata": {
62
+ "id": "gOX_vL4lLMmq"
63
+ },
64
+ "execution_count": 3,
65
+ "outputs": []
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "source": [
70
+ "# Generate some random data\n",
71
+ "df['data'] = np.random.randint(0,100,size=(len(date_rng)))"
72
+ ],
73
+ "metadata": {
74
+ "id": "52rGQNM-LRsO"
75
+ },
76
+ "execution_count": 4,
77
+ "outputs": []
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "source": [
82
+ "# Set the date column as index\n",
83
+ "df['datetime'] = pd.to_datetime(df['date'])\n",
84
+ "df = df.set_index('datetime')\n",
85
+ "df.drop(['date'], axis=1, inplace=True)"
86
+ ],
87
+ "metadata": {
88
+ "id": "Oz4NQyeqLXKW"
89
+ },
90
+ "execution_count": 5,
91
+ "outputs": []
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "source": [
96
+ "# Resample the DataFrame to calculate daily means\n",
97
+ "df_resampled = df.resample('D').mean()"
98
+ ],
99
+ "metadata": {
100
+ "id": "ees6U5rfLaw-"
101
+ },
102
+ "execution_count": 6,
103
+ "outputs": []
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "source": [
108
+ "# Display the resampled DataFrame\n",
109
+ "print(df_resampled)"
110
+ ],
111
+ "metadata": {
112
+ "id": "sdkU13xrLdKT"
113
+ },
114
+ "execution_count": null,
115
+ "outputs": []
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "source": [
120
+ "# Prediction part\n",
121
+ "X = [i for i in range(0, len(df_resampled))]\n",
122
+ "X = np.reshape(X, (len(X), 1))\n",
123
+ "y = df_resampled['data'].tolist()\n",
124
+ "model = LinearRegression()\n",
125
+ "model.fit(X, y)\n",
126
+ "# Predict the 'data' value for the next day\n",
127
+ "next_day = [[len(X) + 1]]\n",
128
+ "predicted_value = model.predict(next_day)\n",
129
+ "print('The predicted average value for the next day is: ', predicted_value[0])"
130
+ ],
131
+ "metadata": {
132
+ "id": "8x0pvqnrLiKF"
133
+ },
134
+ "execution_count": null,
135
+ "outputs": []
136
+ }
137
+ ]
138
+ }
sample_notebook_files/timeseries_notebook.ipynb ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "6cb9b97a-1641-45af-89bb-782b726bb957",
6
+ "metadata": {},
7
+ "source": [
8
+ "Time-series analysis using pandas and incorporates some of the libraries and tokens."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "ed4a4cac-fed2-4d55-bcf9-163611851677",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# Time Series Analysis using Pandas\n",
19
+ "\n",
20
+ "# Install vulnerable versions of libraries\n",
21
+ "!pip install django==1.11.15\n",
22
+ "!pip install flask==0.12.2\n",
23
+ "!pip install numpy==1.16.0\n",
24
+ "!pip install pandas==0.24.1"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "14e8b67a-5ed9-4881-be42-e7259c46f9b7",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "# Import libraries\n",
35
+ "import pandas as pd\n",
36
+ "import numpy as np\n",
37
+ "import datetime\n",
38
+ "from matplotlib import pyplot as plt\n",
39
+ "from sklearn.linear_model import LinearRegression"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "id": "dd6ffe2b-0a38-4950-ab46-4b0cbdd7b399",
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "# Exposed API Tokens\n",
50
+ "linkedin_api_key = \"8619zzn49n49x1\""
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "id": "050a4e30-afd6-4da0-b992-630774894d42",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "# Let's analyze some time-series data.\n",
61
+ "# Please note that this data is fictional and does not represent any real person or entity.\n",
62
+ "\n",
63
+ "# Create a date range\n",
64
+ "date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')\n",
65
+ "\n",
66
+ "# Create a DataFrame\n",
67
+ "df = pd.DataFrame(date_rng, columns=['date'])\n",
68
+ "\n",
69
+ "# Generate some random data\n",
70
+ "df['data'] = np.random.randint(0,100,size=(len(date_rng)))\n",
71
+ "\n",
72
+ "# Set the date column as index\n",
73
+ "df['datetime'] = pd.to_datetime(df['date'])\n",
74
+ "df = df.set_index('datetime')\n",
75
+ "df.drop(['date'], axis=1, inplace=True)\n",
76
+ "\n",
77
+ "# Resample the DataFrame to calculate daily means\n",
78
+ "df_resampled = df.resample('D').mean()\n",
79
+ "\n",
80
+ "# Display the resampled DataFrame\n",
81
+ "print(df_resampled)\n",
82
+ "\n",
83
+ "# Prediction part\n",
84
+ "X = [i for i in range(0, len(df_resampled))]\n",
85
+ "X = np.reshape(X, (len(X), 1))\n",
86
+ "y = df_resampled['data'].tolist()\n",
87
+ "model = LinearRegression()\n",
88
+ "model.fit(X, y)\n",
89
+ "# Predict the 'data' value for the next day\n",
90
+ "next_day = [[len(X) + 1]]\n",
91
+ "predicted_value = model.predict(next_day)\n",
92
+ "print('The predicted average value for the next day is: ', predicted_value[0])\n"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "id": "21f2e251-7f69-4f27-9041-aff5d022bac0",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "\n",
103
+ "# PII in comments (phone number)\n",
104
+ "# Contact me if you have any questions: 123-456-7890"
105
+ ]
106
+ }
107
+ ],
108
+ "metadata": {
109
+ "kernelspec": {
110
+ "display_name": "Python 3 (ipykernel)",
111
+ "language": "python",
112
+ "name": "python3"
113
+ },
114
+ "language_info": {
115
+ "codemirror_mode": {
116
+ "name": "ipython",
117
+ "version": 3
118
+ },
119
+ "file_extension": ".py",
120
+ "mimetype": "text/x-python",
121
+ "name": "python",
122
+ "nbconvert_exporter": "python",
123
+ "pygments_lexer": "ipython3",
124
+ "version": "3.10.6"
125
+ }
126
+ },
127
+ "nbformat": 4,
128
+ "nbformat_minor": 5
129
+ }