Pankaj001 commited on May 17, 2024

Commit

a3abb69

verified ·

1 Parent(s): eb326a7

Upload 30 files

Browse files

Files changed (31) hide show

.gitattributes +1 -0
sample_model_files/malicious_model_custom_layer.h5 +3 -0
sample_model_files/malicious_model_lambda.h5 +3 -0
sample_model_files/model_01.h5 +3 -0
sample_model_files/model_99.h5 +3 -0
sample_model_files/model_with_pickled_data.h5 +3 -0
sample_model_files/pb files/linear_model.pb/fingerprint.pb +3 -0
sample_model_files/pb files/linear_model.pb/keras_metadata.pb +3 -0
sample_model_files/pb files/linear_model.pb/saved_model.pb +3 -0
sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001 +3 -0
sample_model_files/pb files/linear_model.pb/variables/variables.index +3 -0
sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb +3 -0
sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb +3 -0
sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb +3 -0
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001 +3 -0
sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index +3 -0
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb +3 -0
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb +3 -0
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb +3 -0
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001 +3 -0
sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index +3 -0
sample_model_files/pb files/pb/saved_model.pb +3 -0
sample_model_files/pb files/pb/variables/variables.data-00000-of-00001 +3 -0
sample_model_files/pb files/pb/variables/variables.index +0 -0
sample_model_files/safe_model.h5 +3 -0
sample_model_files/sample_pickle.pkl +3 -0
sample_notebook_files/classification_notebook.ipynb +177 -0
sample_notebook_files/generic.ipynb +252 -0
sample_notebook_files/prediction_notebook.ipynb +167 -0
sample_notebook_files/safe_notebook.ipynb +138 -0
sample_notebook_files/timeseries_notebook.ipynb +129 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sample_model_files/pb[[:space:]]files/pb/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

sample_model_files/malicious_model_custom_layer.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:232e6d8bc42f8c821d4858a923028c3debe78e8938cf00049a0f5bb2c55ea856
+size 50512

sample_model_files/malicious_model_lambda.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91f45f5c6183f78ac82ca526bae8dee3e8804a340828ff8da18f4fc2dcb93856
+size 50512

sample_model_files/model_01.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d2a411dbc531120b84c37211a19107fded7947c591215894778a2dc12927b62
+size 5881000

sample_model_files/model_99.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3696f7d099bf5a2efa03195cada466ef91bf678e024333a146576a5937990f9
+size 6330600

sample_model_files/model_with_pickled_data.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a0e12b37d068fa5a4c3a1c32868ede7a4fa4a7cea69af173c4131314298f28
+size 18779

sample_model_files/pb files/linear_model.pb/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5b997df2c14a9436c89b95a8a209d74af3363d52745d83a718a97693fcd5a74
+size 58

sample_model_files/pb files/linear_model.pb/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa76ef4feac193ebc7ff8762cb8d082eeac9d24e69879f2745081b8b08c2321e
+size 4425

sample_model_files/pb files/linear_model.pb/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e71626999238006df8851264233f38859d035948ff742210ebd5731c13f8640
+size 39339

sample_model_files/pb files/linear_model.pb/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:715488bb973717280729ba9357d11cef2392bf7f4bfce8e65ec2e94d6982928b
+size 2325

sample_model_files/pb files/linear_model.pb/variables/variables.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e58d48e3235f959186e81b264f9250385ea822d703437d7a1a674bf318907512
+size 621

sample_model_files/pb files/malicious_model_read_modified.pb/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63413813c501e4a12f3edbc28e373f41e26f3baef03afbf2911df59d0061e242
+size 57

sample_model_files/pb files/malicious_model_read_modified.pb/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88004053b611bffb60ed32ac97cab30d8ff2c358ccb737d5450cc8ab7cece103
+size 204831

sample_model_files/pb files/malicious_model_read_modified.pb/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ae8eebd2d4bc8fddb4a6629208456ec212375a7893a764b321280b6140abea9
+size 2249481

sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:859960385d552b0b232fb6995dee6d2c2af44c20a9bd141517b62c13dc63fa98
+size 391277

sample_model_files/pb files/malicious_model_read_modified.pb/variables/variables.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e51dca6b2deabfeddea73fd8c56c88c9320add07006959f67b01760b0b63d64
+size 12414

sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e670dc5c33f705fe175b5e6f802ac10dc826e4e86e17b62fcbe83bd21340fa65
+size 57

sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b52251889d94b38f78f541596bdad519f3f052a2f62598272a1dbbf1c935e35
+size 8526

sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:200961791226861b664060751c220faecf6e7a824ec3a99c7967b0c83f5c671f
+size 61321

sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcbc0d986a88a44d6c30766b7b3a1a2c9840ce11002242bae9c698c225b7f7ad
+size 3517

sample_model_files/pb files/malicious_model_unsafe_write_modified.pb/variables/variables.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2abc09c5f25f35f9d01650985103b49019ed9b1ff130da65bf5567b4e1016ede
+size 621

sample_model_files/pb files/pb/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d55083f53c49df137a089df0c4c6d41b6e027a9ed06f6f7c201ab8ca4b306b3f
+size 235045

sample_model_files/pb files/pb/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d71048fc9a5dd7aad56ebe1493174118f0da01ec5640fd1979d1448c765f26d1
+size 6274830

sample_model_files/pb files/pb/variables/variables.index ADDED Viewed

Binary file (3.56 kB). View file

sample_model_files/safe_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:913fb308a03f3485ded8ab0b3747af7996bb0ba9e151622177db0b2141fd8593
+size 49672

sample_model_files/sample_pickle.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ab0c5b5c0b3775045c35f933ee98d4c1c68d5021c1d6fcf983c2f0bb9178607
+size 186233

sample_notebook_files/classification_notebook.ipynb ADDED Viewed

	@@ -0,0 +1,177 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d5c4b5c2-8c0a-4cbe-8997-1a98c14be2e4",
+   "metadata": {},
+   "source": [
+    "A text classification model using libraries like NLTK or SpaCy. It includes some PII data within the code (e.g., hard-coded email addresses or phone numbers for testing purposes), and include a few API tokens/secrets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f95fa380-34d0-455d-8002-ebe5f829542c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Vulnerable libraries\n",
+    "!pip install django==1.11.15\n",
+    "!pip install flask==0.12.2\n",
+    "!pip install numpy==1.16.0\n",
+    "!pip install requests==2.19.1\n",
+    "!pip install scikit-learn==0.19.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "25315022-9da9-4c29-8326-6532d261dd56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Non-permissible licensed libraries\n",
+    "import gmpy2\n",
+    "import oct2py\n",
+    "import pygsl\n",
+    "from PyQt5 import QtCore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "489ad824-285c-4219-afc6-073192d54f3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Required Libraries for our task\n",
+    "import nltk\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "109d2f98-4d6d-42d9-acb4-2f195af051d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# PII Data\n",
+    "email = \"john.doe@example.com\"\n",
+    "phone = \"123-456-7890\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d637e295-0953-4980-bf99-c7e7e509e876",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# API Keys and secrets\n",
+    "fb_app_secret = \"3e4a22bb7e6b2c38b7809234b3ee782b\"\n",
+    "db_credentials = \"username:password@localhost:5432/mydatabase\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6493567-ad7f-4b87-95e4-5068a09fca92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download nltk data\n",
+    "nltk.download('punkt', download_dir='/nltk_data/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f94e191-bfe7-4e54-9dbf-4d2484b0dbe9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Text Classification\n",
+    "from sklearn.datasets import fetch_20newsgroups\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "from sklearn.feature_extraction.text import TfidfTransformer\n",
+    "from sklearn.naive_bayes import MultinomialNB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8552e84a-e164-4519-8ce8-959c7dd277ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load Data\n",
+    "categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']\n",
+    "twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abc38386-e63f-4d22-81dc-1785ac8f043b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Feature Extraction\n",
+    "count_vect = CountVectorizer()\n",
+    "X_train_counts = count_vect.fit_transform(twenty_train.data)\n",
+    "tfidf_transformer = TfidfTransformer()\n",
+    "X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ea57698-12ff-48b3-a8b6-bb8dffabbc5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train Model\n",
+    "clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5fa6532-594c-4790-a630-83388c556591",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Predict\n",
+    "docs_new = ['God is love', 'OpenGL on the GPU is fast']\n",
+    "X_new_counts = count_vect.transform(docs_new)\n",
+    "X_new_tfidf = tfidf_transformer.transform(X_new_counts)\n",
+    "predicted = clf.predict(X_new_tfidf)\n",
+    "for doc, category in zip(docs_new, predicted):\n",
+    "    print('%r => %s' % (doc, twenty_train.target_names[category]))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sample_notebook_files/generic.ipynb ADDED Viewed

	@@ -0,0 +1,252 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8de6eb89-7d92-4e9d-ab20-8c71ed062072",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D\n",
+    "from keras.datasets import mnist\n",
+    "from keras.preprocessing.image import ImageDataGenerator\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1caec746-026a-4649-952d-98ff1ac69e97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Intentionally including deprecated library\n",
+    "import imp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e7fb59e-6d40-4be1-a51e-0162ddd02c80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " # added this vulnerable library (safety might use updated safe library version for this)\n",
+    "import urllib3 \n",
+    "print(urllib3.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf17cb05-cc6f-4ef0-a27d-fb6a5af33eb9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#using vulnerable library forcefully for safety to detect\n",
+    "!pip install urllib3==1.24.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca7abfef-f88c-4766-8db4-b1f0909c8e83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install scikit-learn==0.19.0\n",
+    "import sklearn\n",
+    "print(sklearn.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a78b2239-8abd-44fb-b337-9c9f0830ecaf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install numpy==1.16.0\n",
+    "import numpy as np\n",
+    "print(np.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3fb3af1-200d-4088-b2a8-5fa445e5d0ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install tensorflow==1.15.5\n",
+    "import tensorflow as tf\n",
+    "print(tf.__version__)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95b8762f-bd02-4f4d-9fa2-e511a2b4a326",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# A real example of a library with a non-permissive license\n",
+    "import gmpy2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88872ca5-c939-4b25-b37e-4351fd6ef336",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GNU Octave, an interpreted high-level programming language for numerical computations\n",
+    "# Licensed under GPL\n",
+    "import oct2py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e844eab-9caa-467e-b1c4-2c7aac5a31a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Mock secret keys\n",
+    "SECRET_KEY = \"ABCDEFG\"\n",
+    "\n",
+    "aws_secret_key_1 = \"A3TABCDEFGH1234567890\" \n",
+    "\n",
+    "AWS_SECRET_ACCESS_KEY_0 = \"AKIAIOSFODNN7EXAMPLE\"\n",
+    "\n",
+    "AWS_SECRET_ACCESS_KEY = \"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "901dbdc1-76b2-47d3-9428-2ddb4c043653",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AWS_ACCOUNT_ID = \"1234-5678-9012\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "631f40be-470b-4bf1-b645-a0b8429f0dfb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# PII Information (Just for demonstration, do not use real PII)\n",
+    "user_data = {\n",
+    "    'name': 'John Doe',\n",
+    "    'email': 'johndoe@example.com',\n",
+    "    'address': '123 Main St, Anytown, USA'\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2056314b-486e-4b2f-923a-4194c8a955fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
+    "\n",
+    "# normalize to range 0-1\n",
+    "x_train = x_train / 255.0\n",
+    "x_test = x_test / 255.0\n",
+    "\n",
+    "# reshape\n",
+    "x_train = x_train.reshape(-1, 28, 28, 1)\n",
+    "x_test = x_test.reshape(-1, 28, 28, 1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9766dd8-260d-4184-ac37-9e768f780d8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Define the model\n",
+    "\n",
+    "# %%\n",
+    "model = Sequential()\n",
+    "model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "model.add(Flatten())\n",
+    "model.add(Dense(128, activation='relu'))\n",
+    "model.add(Dense(10, activation='softmax'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2e6cd12-349b-4088-a189-3037da3191ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ## Compile the model\n",
+    "\n",
+    "# %%\n",
+    "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "156520be-06f9-45a6-8c75-5fcfa567d3de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ## Train the model\n",
+    "\n",
+    "# %%\n",
+    "history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8bd92538-5655-444c-aa79-92c614f890d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_loss, test_accuracy = model.evaluate(x_test, y_test)\n",
+    "print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sample_notebook_files/prediction_notebook.ipynb ADDED Viewed

	@@ -0,0 +1,167 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a06300a0-6379-4cb8-b015-0e6e689ab64a",
+   "metadata": {},
+   "source": [
+    "This Jupyter notebook script sets up a basic prediction model while intentionally incorporating different types of potential vulnerabilities including usage of older version of a library, hardcoded secrets, and PII."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4e7e5b2-3c14-44fb-808f-7241b2e75658",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 1: (Forcing an installation of an older version of libraries)\n",
+    "\n",
+    "!pip install numpy==1.16.0\n",
+    "!pip install scikit-learn==0.19.0  # vulnerable version of scikit-learn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8f3c422-d9e6-497a-a7b2-ec91fee80fa4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 2: (Importing libraries including the one with older version)\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import sklearn\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3fc98c03-cc4c-4a3b-a5d9-41523c26930f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Including a non-permissible licensed library\n",
+    "import oct2py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb7a28c5-ac7f-4574-990d-d25c7670f211",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 3: (API tokens and secrets)\n",
+    "azure_access_key = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd875b59-7454-4c81-88c2-37cf011ed332",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gcloud_api_key = \"AIzaQwerty12345678Xx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "87d8ed66-bb6d-46b1-9968-b7d5b2cf49df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "youtube_api_key = \"AIzaSyCewf3U1ZXHH4E2mK2s8A2D\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2bae80c-9132-4931-8182-fafbe4a414a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dummy PII Data\n",
+    "person = {\n",
+    "    'first_name': 'John',\n",
+    "    'last_name': 'Doe',\n",
+    "    'ssn': '123-45-6789',\n",
+    "    'address': '1600 Amphitheatre Parkway, Mountain View, CA'\n",
+    "}  # this could be seen as PII"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ff71e14-4cf7-47f8-be2f-7a2f93d7900a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USER_NAME = 'Joe Smith'  #another PII in another format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a21aa9d-96ec-4555-9d1a-fa0f2cd39802",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "EMAIL = 'john.doe@example.com'  # also PII"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb202b7d-a7ef-4d6e-89ec-6e5aa01422d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cell 4: (Model building)\n",
+    "\n",
+    "# Creating a dataset\n",
+    "np.random.seed(0)\n",
+    "x = np.random.rand(100, 1)\n",
+    "y = 2 + 3 * x + np.random.rand(100, 1)\n",
+    "\n",
+    "# Splitting the data\n",
+    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
+    "\n",
+    "# Model initialization\n",
+    "regression_model = LinearRegression()\n",
+    "\n",
+    "# Fit the data(train the model)\n",
+    "regression_model.fit(x_train, y_train)\n",
+    "\n",
+    "# Predict\n",
+    "y_predicted = regression_model.predict(x_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sample_notebook_files/safe_notebook.ipynb ADDED Viewed

	@@ -0,0 +1,138 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Note: This notebook is free from any Personal Identifiable Information (PII)exposed API tokens, and outdated or vulnerable libraries."
+      ],
+      "metadata": {
+        "id": "lNScDliRLnLV"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "R6ZAQbX7LB5l"
+      },
+      "outputs": [],
+      "source": [
+        "# Importing Safe and Updated Libraries\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import datetime\n",
+        "from matplotlib import pyplot as plt\n",
+        "from sklearn.linear_model import LinearRegression"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Time Series Analysis using Pandas\n",
+        "# Create a date range\n",
+        "date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')"
+      ],
+      "metadata": {
+        "id": "oxMnBJncLJyH"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Create a DataFrame\n",
+        "df = pd.DataFrame(date_rng, columns=['date'])"
+      ],
+      "metadata": {
+        "id": "gOX_vL4lLMmq"
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Generate some random data\n",
+        "df['data'] = np.random.randint(0,100,size=(len(date_rng)))"
+      ],
+      "metadata": {
+        "id": "52rGQNM-LRsO"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set the date column as index\n",
+        "df['datetime'] = pd.to_datetime(df['date'])\n",
+        "df = df.set_index('datetime')\n",
+        "df.drop(['date'], axis=1, inplace=True)"
+      ],
+      "metadata": {
+        "id": "Oz4NQyeqLXKW"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Resample the DataFrame to calculate daily means\n",
+        "df_resampled = df.resample('D').mean()"
+      ],
+      "metadata": {
+        "id": "ees6U5rfLaw-"
+      },
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Display the resampled DataFrame\n",
+        "print(df_resampled)"
+      ],
+      "metadata": {
+        "id": "sdkU13xrLdKT"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Prediction part\n",
+        "X = [i for i in range(0, len(df_resampled))]\n",
+        "X = np.reshape(X, (len(X), 1))\n",
+        "y = df_resampled['data'].tolist()\n",
+        "model = LinearRegression()\n",
+        "model.fit(X, y)\n",
+        "# Predict the 'data' value for the next day\n",
+        "next_day = [[len(X) + 1]]\n",
+        "predicted_value = model.predict(next_day)\n",
+        "print('The predicted average value for the next day is: ', predicted_value[0])"
+      ],
+      "metadata": {
+        "id": "8x0pvqnrLiKF"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

sample_notebook_files/timeseries_notebook.ipynb ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6cb9b97a-1641-45af-89bb-782b726bb957",
+   "metadata": {},
+   "source": [
+    "Time-series analysis using pandas and incorporates some of the libraries and tokens."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed4a4cac-fed2-4d55-bcf9-163611851677",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Time Series Analysis using Pandas\n",
+    "\n",
+    "# Install vulnerable versions of libraries\n",
+    "!pip install django==1.11.15\n",
+    "!pip install flask==0.12.2\n",
+    "!pip install numpy==1.16.0\n",
+    "!pip install pandas==0.24.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14e8b67a-5ed9-4881-be42-e7259c46f9b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import datetime\n",
+    "from matplotlib import pyplot as plt\n",
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd6ffe2b-0a38-4950-ab46-4b0cbdd7b399",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Exposed API Tokens\n",
+    "linkedin_api_key = \"8619zzn49n49x1\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "050a4e30-afd6-4da0-b992-630774894d42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's analyze some time-series data.\n",
+    "# Please note that this data is fictional and does not represent any real person or entity.\n",
+    "\n",
+    "# Create a date range\n",
+    "date_rng = pd.date_range(start='1/01/2023', end='1/10/2023', freq='H')\n",
+    "\n",
+    "# Create a DataFrame\n",
+    "df = pd.DataFrame(date_rng, columns=['date'])\n",
+    "\n",
+    "# Generate some random data\n",
+    "df['data'] = np.random.randint(0,100,size=(len(date_rng)))\n",
+    "\n",
+    "# Set the date column as index\n",
+    "df['datetime'] = pd.to_datetime(df['date'])\n",
+    "df = df.set_index('datetime')\n",
+    "df.drop(['date'], axis=1, inplace=True)\n",
+    "\n",
+    "# Resample the DataFrame to calculate daily means\n",
+    "df_resampled = df.resample('D').mean()\n",
+    "\n",
+    "# Display the resampled DataFrame\n",
+    "print(df_resampled)\n",
+    "\n",
+    "# Prediction part\n",
+    "X = [i for i in range(0, len(df_resampled))]\n",
+    "X = np.reshape(X, (len(X), 1))\n",
+    "y = df_resampled['data'].tolist()\n",
+    "model = LinearRegression()\n",
+    "model.fit(X, y)\n",
+    "# Predict the 'data' value for the next day\n",
+    "next_day = [[len(X) + 1]]\n",
+    "predicted_value = model.predict(next_day)\n",
+    "print('The predicted average value for the next day is: ', predicted_value[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21f2e251-7f69-4f27-9041-aff5d022bac0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# PII in comments (phone number)\n",
+    "# Contact me if you have any questions: 123-456-7890"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}