{ "cells": [ { "cell_type": "markdown", "id": "a06300a0-6379-4cb8-b015-0e6e689ab64a", "metadata": {}, "source": [ "This Jupyter notebook script sets up a basic prediction model while intentionally incorporating different types of potential vulnerabilities including usage of older version of a library, hardcoded secrets, and PII." ] }, { "cell_type": "code", "execution_count": null, "id": "a4e7e5b2-3c14-44fb-808f-7241b2e75658", "metadata": {}, "outputs": [], "source": [ "# Cell 1: (Forcing an installation of an older version of libraries)\n", "\n", "!pip install numpy==1.16.0\n", "!pip install scikit-learn==0.19.0 # vulnerable version of scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "id": "d8f3c422-d9e6-497a-a7b2-ec91fee80fa4", "metadata": {}, "outputs": [], "source": [ "# Cell 2: (Importing libraries including the one with older version)\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import sklearn\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression" ] }, { "cell_type": "code", "execution_count": null, "id": "3fc98c03-cc4c-4a3b-a5d9-41523c26930f", "metadata": {}, "outputs": [], "source": [ "# Including a non-permissible licensed library\n", "import oct2py" ] }, { "cell_type": "code", "execution_count": null, "id": "bb7a28c5-ac7f-4574-990d-d25c7670f211", "metadata": {}, "outputs": [], "source": [ "# Cell 3: (API tokens and secrets)\n", "azure_access_key = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==\"" ] }, { "cell_type": "code", "execution_count": null, "id": "dd875b59-7454-4c81-88c2-37cf011ed332", "metadata": {}, "outputs": [], "source": [ "gcloud_api_key = \"AIzaQwerty12345678Xx\"" ] }, { "cell_type": "code", "execution_count": null, "id": "87d8ed66-bb6d-46b1-9968-b7d5b2cf49df", "metadata": {}, "outputs": [], "source": [ "youtube_api_key = \"AIzaSyCewf3U1ZXHH4E2mK2s8A2D\"" ] }, { "cell_type": "code", "execution_count": null, "id": "f2bae80c-9132-4931-8182-fafbe4a414a1", "metadata": {}, "outputs": [], "source": [ "# Dummy PII Data\n", "person = {\n", " 'first_name': 'John',\n", " 'last_name': 'Doe',\n", " 'ssn': '123-45-6789',\n", " 'address': '1600 Amphitheatre Parkway, Mountain View, CA'\n", "} # this could be seen as PII" ] }, { "cell_type": "code", "execution_count": null, "id": "0ff71e14-4cf7-47f8-be2f-7a2f93d7900a", "metadata": {}, "outputs": [], "source": [ "USER_NAME = 'Joe Smith' #another PII in another format" ] }, { "cell_type": "code", "execution_count": null, "id": "9a21aa9d-96ec-4555-9d1a-fa0f2cd39802", "metadata": {}, "outputs": [], "source": [ "EMAIL = 'john.doe@example.com' # also PII" ] }, { "cell_type": "code", "execution_count": null, "id": "fb202b7d-a7ef-4d6e-89ec-6e5aa01422d0", "metadata": {}, "outputs": [], "source": [ "# Cell 4: (Model building)\n", "\n", "# Creating a dataset\n", "np.random.seed(0)\n", "x = np.random.rand(100, 1)\n", "y = 2 + 3 * x + np.random.rand(100, 1)\n", "\n", "# Splitting the data\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n", "\n", "# Model initialization\n", "regression_model = LinearRegression()\n", "\n", "# Fit the data(train the model)\n", "regression_model.fit(x_train, y_train)\n", "\n", "# Predict\n", "y_predicted = regression_model.predict(x_test)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }