Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files
colab_files_to_train_models/Multiple disease prediction system - Parkinsons.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Multiple disease prediction system - Parkinsons.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPgH5xu9ZLOpcMFNkcpInRX"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"9B5Zl1UOBMAJ"},"source":["Importing the Dependencies"]},{"cell_type":"code","metadata":{"id":"YOCpZ1Vm6cfW","executionInfo":{"status":"ok","timestamp":1653200307851,"user_tz":-330,"elapsed":2162,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import numpy as np\n","import pandas as pd\n","from sklearn.model_selection import train_test_split\n","from sklearn import svm\n","from sklearn.metrics import accuracy_score"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"PZm-USrtB_q4"},"source":["Data Collection & Analysis"]},{"cell_type":"code","metadata":{"id":"5YC2lGuVBiZA","executionInfo":{"status":"ok","timestamp":1653200307854,"user_tz":-330,"elapsed":23,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the data from csv file to a Pandas DataFrame\n","parkinsons_data = pd.read_csv('/content/parkinsons.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":299},"id":"Iw8z6w60Djd2","executionInfo":{"status":"ok","timestamp":1653200307855,"user_tz":-330,"elapsed":23,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"ca177b83-79f9-46c5-89c0-42985b1923ba"},"source":["# printing the first 5 rows of the dataframe\n","parkinsons_data.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n","0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 \n","1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 \n","2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 \n","3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 \n","4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 \n","\n"," MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... \\\n","0 0.00007 0.00370 0.00554 0.01109 0.04374 ... \n","1 0.00008 0.00465 0.00696 0.01394 0.06134 ... \n","2 0.00009 0.00544 0.00781 0.01633 0.05233 ... \n","3 0.00009 0.00502 0.00698 0.01505 0.05492 ... \n","4 0.00011 0.00655 0.00908 0.01966 0.06425 ... \n","\n"," Shimmer:DDA NHR HNR status RPDE DFA spread1 \\\n","0 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 \n","1 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 \n","2 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 \n","3 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 \n","4 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 \n","\n"," spread2 D2 PPE \n","0 0.266482 2.301442 0.284654 \n","1 0.335590 2.486855 0.368674 \n","2 0.311173 2.342259 0.332634 \n","3 0.334147 2.405554 0.368975 \n","4 0.234513 2.332180 0.410335 \n","\n","[5 rows x 24 columns]"],"text/html":["\n"," <div id=\"df-09ab8692-d189-4aac-8b73-032a2a0c64be\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>name</th>\n"," <th>MDVP:Fo(Hz)</th>\n"," <th>MDVP:Fhi(Hz)</th>\n"," <th>MDVP:Flo(Hz)</th>\n"," <th>MDVP:Jitter(%)</th>\n"," <th>MDVP:Jitter(Abs)</th>\n"," <th>MDVP:RAP</th>\n"," <th>MDVP:PPQ</th>\n"," <th>Jitter:DDP</th>\n"," <th>MDVP:Shimmer</th>\n"," <th>...</th>\n"," <th>Shimmer:DDA</th>\n"," <th>NHR</th>\n"," <th>HNR</th>\n"," <th>status</th>\n"," <th>RPDE</th>\n"," <th>DFA</th>\n"," <th>spread1</th>\n"," <th>spread2</th>\n"," <th>D2</th>\n"," <th>PPE</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>phon_R01_S01_1</td>\n"," <td>119.992</td>\n"," <td>157.302</td>\n"," <td>74.997</td>\n"," <td>0.00784</td>\n"," <td>0.00007</td>\n"," <td>0.00370</td>\n"," <td>0.00554</td>\n"," <td>0.01109</td>\n"," <td>0.04374</td>\n"," <td>...</td>\n"," <td>0.06545</td>\n"," <td>0.02211</td>\n"," <td>21.033</td>\n"," <td>1</td>\n"," <td>0.414783</td>\n"," <td>0.815285</td>\n"," <td>-4.813031</td>\n"," <td>0.266482</td>\n"," <td>2.301442</td>\n"," <td>0.284654</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>phon_R01_S01_2</td>\n"," <td>122.400</td>\n"," <td>148.650</td>\n"," <td>113.819</td>\n"," <td>0.00968</td>\n"," <td>0.00008</td>\n"," <td>0.00465</td>\n"," <td>0.00696</td>\n"," <td>0.01394</td>\n"," <td>0.06134</td>\n"," <td>...</td>\n"," <td>0.09403</td>\n"," <td>0.01929</td>\n"," <td>19.085</td>\n"," <td>1</td>\n"," <td>0.458359</td>\n"," <td>0.819521</td>\n"," <td>-4.075192</td>\n"," <td>0.335590</td>\n"," <td>2.486855</td>\n"," <td>0.368674</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>phon_R01_S01_3</td>\n"," <td>116.682</td>\n"," <td>131.111</td>\n"," <td>111.555</td>\n"," <td>0.01050</td>\n"," <td>0.00009</td>\n"," <td>0.00544</td>\n"," <td>0.00781</td>\n"," <td>0.01633</td>\n"," <td>0.05233</td>\n"," <td>...</td>\n"," <td>0.08270</td>\n"," <td>0.01309</td>\n"," <td>20.651</td>\n"," <td>1</td>\n"," <td>0.429895</td>\n"," <td>0.825288</td>\n"," <td>-4.443179</td>\n"," <td>0.311173</td>\n"," <td>2.342259</td>\n"," <td>0.332634</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>phon_R01_S01_4</td>\n"," <td>116.676</td>\n"," <td>137.871</td>\n"," <td>111.366</td>\n"," <td>0.00997</td>\n"," <td>0.00009</td>\n"," <td>0.00502</td>\n"," <td>0.00698</td>\n"," <td>0.01505</td>\n"," <td>0.05492</td>\n"," <td>...</td>\n"," <td>0.08771</td>\n"," <td>0.01353</td>\n"," <td>20.644</td>\n"," <td>1</td>\n"," <td>0.434969</td>\n"," <td>0.819235</td>\n"," <td>-4.117501</td>\n"," <td>0.334147</td>\n"," <td>2.405554</td>\n"," <td>0.368975</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>phon_R01_S01_5</td>\n"," <td>116.014</td>\n"," <td>141.781</td>\n"," <td>110.655</td>\n"," <td>0.01284</td>\n"," <td>0.00011</td>\n"," <td>0.00655</td>\n"," <td>0.00908</td>\n"," <td>0.01966</td>\n"," <td>0.06425</td>\n"," <td>...</td>\n"," <td>0.10470</td>\n"," <td>0.01767</td>\n"," <td>19.649</td>\n"," <td>1</td>\n"," <td>0.417356</td>\n"," <td>0.823484</td>\n"," <td>-3.747787</td>\n"," <td>0.234513</td>\n"," <td>2.332180</td>\n"," <td>0.410335</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 24 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-09ab8692-d189-4aac-8b73-032a2a0c64be')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-09ab8692-d189-4aac-8b73-032a2a0c64be button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-09ab8692-d189-4aac-8b73-032a2a0c64be');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cK7L_o2TDuZb","executionInfo":{"status":"ok","timestamp":1653200307855,"user_tz":-330,"elapsed":19,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"ff4ba57f-ef7c-42e3-e76d-c6cd69e8b250"},"source":["# number of rows and columns in the dataframe\n","parkinsons_data.shape"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(195, 24)"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NLmzHIgnEGi4","executionInfo":{"status":"ok","timestamp":1653200307856,"user_tz":-330,"elapsed":17,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"59986869-f8e1-47a7-cd80-aa699514e488"},"source":["# getting more information about the dataset\n","parkinsons_data.info()"],"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 195 entries, 0 to 194\n","Data columns (total 24 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 name 195 non-null object \n"," 1 MDVP:Fo(Hz) 195 non-null float64\n"," 2 MDVP:Fhi(Hz) 195 non-null float64\n"," 3 MDVP:Flo(Hz) 195 non-null float64\n"," 4 MDVP:Jitter(%) 195 non-null float64\n"," 5 MDVP:Jitter(Abs) 195 non-null float64\n"," 6 MDVP:RAP 195 non-null float64\n"," 7 MDVP:PPQ 195 non-null float64\n"," 8 Jitter:DDP 195 non-null float64\n"," 9 MDVP:Shimmer 195 non-null float64\n"," 10 MDVP:Shimmer(dB) 195 non-null float64\n"," 11 Shimmer:APQ3 195 non-null float64\n"," 12 Shimmer:APQ5 195 non-null float64\n"," 13 MDVP:APQ 195 non-null float64\n"," 14 Shimmer:DDA 195 non-null float64\n"," 15 NHR 195 non-null float64\n"," 16 HNR 195 non-null float64\n"," 17 status 195 non-null int64 \n"," 18 RPDE 195 non-null float64\n"," 19 DFA 195 non-null float64\n"," 20 spread1 195 non-null float64\n"," 21 spread2 195 non-null float64\n"," 22 D2 195 non-null float64\n"," 23 PPE 195 non-null float64\n","dtypes: float64(22), int64(1), object(1)\n","memory usage: 36.7+ KB\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"70rgu_k4ET9F","executionInfo":{"status":"ok","timestamp":1653200307857,"user_tz":-330,"elapsed":15,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"9d86783c-8f01-468a-a9a0-04552ebc10c7"},"source":["# checking for missing values in each column\n","parkinsons_data.isnull().sum()"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["name 0\n","MDVP:Fo(Hz) 0\n","MDVP:Fhi(Hz) 0\n","MDVP:Flo(Hz) 0\n","MDVP:Jitter(%) 0\n","MDVP:Jitter(Abs) 0\n","MDVP:RAP 0\n","MDVP:PPQ 0\n","Jitter:DDP 0\n","MDVP:Shimmer 0\n","MDVP:Shimmer(dB) 0\n","Shimmer:APQ3 0\n","Shimmer:APQ5 0\n","MDVP:APQ 0\n","Shimmer:DDA 0\n","NHR 0\n","HNR 0\n","status 0\n","RPDE 0\n","DFA 0\n","spread1 0\n","spread2 0\n","D2 0\n","PPE 0\n","dtype: int64"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":394},"id":"1AxFu0-nEhSA","executionInfo":{"status":"ok","timestamp":1653200308700,"user_tz":-330,"elapsed":853,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"333d5e58-f085-43dc-ccb1-e7fc75084bcf"},"source":["# getting some statistical measures about the data\n","parkinsons_data.describe()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n","count 195.000000 195.000000 195.000000 195.000000 \n","mean 154.228641 197.104918 116.324631 0.006220 \n","std 41.390065 91.491548 43.521413 0.004848 \n","min 88.333000 102.145000 65.476000 0.001680 \n","25% 117.572000 134.862500 84.291000 0.003460 \n","50% 148.790000 175.829000 104.315000 0.004940 \n","75% 182.769000 224.205500 140.018500 0.007365 \n","max 260.105000 592.030000 239.170000 0.033160 \n","\n"," MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer \\\n","count 195.000000 195.000000 195.000000 195.000000 195.000000 \n","mean 0.000044 0.003306 0.003446 0.009920 0.029709 \n","std 0.000035 0.002968 0.002759 0.008903 0.018857 \n","min 0.000007 0.000680 0.000920 0.002040 0.009540 \n","25% 0.000020 0.001660 0.001860 0.004985 0.016505 \n","50% 0.000030 0.002500 0.002690 0.007490 0.022970 \n","75% 0.000060 0.003835 0.003955 0.011505 0.037885 \n","max 0.000260 0.021440 0.019580 0.064330 0.119080 \n","\n"," MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status \\\n","count 195.000000 ... 195.000000 195.000000 195.000000 195.000000 \n","mean 0.282251 ... 0.046993 0.024847 21.885974 0.753846 \n","std 0.194877 ... 0.030459 0.040418 4.425764 0.431878 \n","min 0.085000 ... 0.013640 0.000650 8.441000 0.000000 \n","25% 0.148500 ... 0.024735 0.005925 19.198000 1.000000 \n","50% 0.221000 ... 0.038360 0.011660 22.085000 1.000000 \n","75% 0.350000 ... 0.060795 0.025640 25.075500 1.000000 \n","max 1.302000 ... 0.169420 0.314820 33.047000 1.000000 \n","\n"," RPDE DFA spread1 spread2 D2 PPE \n","count 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 \n","mean 0.498536 0.718099 -5.684397 0.226510 2.381826 0.206552 \n","std 0.103942 0.055336 1.090208 0.083406 0.382799 0.090119 \n","min 0.256570 0.574282 -7.964984 0.006274 1.423287 0.044539 \n","25% 0.421306 0.674758 -6.450096 0.174351 2.099125 0.137451 \n","50% 0.495954 0.722254 -5.720868 0.218885 2.361532 0.194052 \n","75% 0.587562 0.761881 -5.046192 0.279234 2.636456 0.252980 \n","max 0.685151 0.825288 -2.434031 0.450493 3.671155 0.527367 \n","\n","[8 rows x 23 columns]"],"text/html":["\n"," <div id=\"df-b83bb5a5-5785-498f-b146-d62b868ce2a6\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>MDVP:Fo(Hz)</th>\n"," <th>MDVP:Fhi(Hz)</th>\n"," <th>MDVP:Flo(Hz)</th>\n"," <th>MDVP:Jitter(%)</th>\n"," <th>MDVP:Jitter(Abs)</th>\n"," <th>MDVP:RAP</th>\n"," <th>MDVP:PPQ</th>\n"," <th>Jitter:DDP</th>\n"," <th>MDVP:Shimmer</th>\n"," <th>MDVP:Shimmer(dB)</th>\n"," <th>...</th>\n"," <th>Shimmer:DDA</th>\n"," <th>NHR</th>\n"," <th>HNR</th>\n"," <th>status</th>\n"," <th>RPDE</th>\n"," <th>DFA</th>\n"," <th>spread1</th>\n"," <th>spread2</th>\n"," <th>D2</th>\n"," <th>PPE</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>count</th>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>...</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," <td>195.000000</td>\n"," </tr>\n"," <tr>\n"," <th>mean</th>\n"," <td>154.228641</td>\n"," <td>197.104918</td>\n"," <td>116.324631</td>\n"," <td>0.006220</td>\n"," <td>0.000044</td>\n"," <td>0.003306</td>\n"," <td>0.003446</td>\n"," <td>0.009920</td>\n"," <td>0.029709</td>\n"," <td>0.282251</td>\n"," <td>...</td>\n"," <td>0.046993</td>\n"," <td>0.024847</td>\n"," <td>21.885974</td>\n"," <td>0.753846</td>\n"," <td>0.498536</td>\n"," <td>0.718099</td>\n"," <td>-5.684397</td>\n"," <td>0.226510</td>\n"," <td>2.381826</td>\n"," <td>0.206552</td>\n"," </tr>\n"," <tr>\n"," <th>std</th>\n"," <td>41.390065</td>\n"," <td>91.491548</td>\n"," <td>43.521413</td>\n"," <td>0.004848</td>\n"," <td>0.000035</td>\n"," <td>0.002968</td>\n"," <td>0.002759</td>\n"," <td>0.008903</td>\n"," <td>0.018857</td>\n"," <td>0.194877</td>\n"," <td>...</td>\n"," <td>0.030459</td>\n"," <td>0.040418</td>\n"," <td>4.425764</td>\n"," <td>0.431878</td>\n"," <td>0.103942</td>\n"," <td>0.055336</td>\n"," <td>1.090208</td>\n"," <td>0.083406</td>\n"," <td>0.382799</td>\n"," <td>0.090119</td>\n"," </tr>\n"," <tr>\n"," <th>min</th>\n"," <td>88.333000</td>\n"," <td>102.145000</td>\n"," <td>65.476000</td>\n"," <td>0.001680</td>\n"," <td>0.000007</td>\n"," <td>0.000680</td>\n"," <td>0.000920</td>\n"," <td>0.002040</td>\n"," <td>0.009540</td>\n"," <td>0.085000</td>\n"," <td>...</td>\n"," <td>0.013640</td>\n"," <td>0.000650</td>\n"," <td>8.441000</td>\n"," <td>0.000000</td>\n"," <td>0.256570</td>\n"," <td>0.574282</td>\n"," <td>-7.964984</td>\n"," <td>0.006274</td>\n"," <td>1.423287</td>\n"," <td>0.044539</td>\n"," </tr>\n"," <tr>\n"," <th>25%</th>\n"," <td>117.572000</td>\n"," <td>134.862500</td>\n"," <td>84.291000</td>\n"," <td>0.003460</td>\n"," <td>0.000020</td>\n"," <td>0.001660</td>\n"," <td>0.001860</td>\n"," <td>0.004985</td>\n"," <td>0.016505</td>\n"," <td>0.148500</td>\n"," <td>...</td>\n"," <td>0.024735</td>\n"," <td>0.005925</td>\n"," <td>19.198000</td>\n"," <td>1.000000</td>\n"," <td>0.421306</td>\n"," <td>0.674758</td>\n"," <td>-6.450096</td>\n"," <td>0.174351</td>\n"," <td>2.099125</td>\n"," <td>0.137451</td>\n"," </tr>\n"," <tr>\n"," <th>50%</th>\n"," <td>148.790000</td>\n"," <td>175.829000</td>\n"," <td>104.315000</td>\n"," <td>0.004940</td>\n"," <td>0.000030</td>\n"," <td>0.002500</td>\n"," <td>0.002690</td>\n"," <td>0.007490</td>\n"," <td>0.022970</td>\n"," <td>0.221000</td>\n"," <td>...</td>\n"," <td>0.038360</td>\n"," <td>0.011660</td>\n"," <td>22.085000</td>\n"," <td>1.000000</td>\n"," <td>0.495954</td>\n"," <td>0.722254</td>\n"," <td>-5.720868</td>\n"," <td>0.218885</td>\n"," <td>2.361532</td>\n"," <td>0.194052</td>\n"," </tr>\n"," <tr>\n"," <th>75%</th>\n"," <td>182.769000</td>\n"," <td>224.205500</td>\n"," <td>140.018500</td>\n"," <td>0.007365</td>\n"," <td>0.000060</td>\n"," <td>0.003835</td>\n"," <td>0.003955</td>\n"," <td>0.011505</td>\n"," <td>0.037885</td>\n"," <td>0.350000</td>\n"," <td>...</td>\n"," <td>0.060795</td>\n"," <td>0.025640</td>\n"," <td>25.075500</td>\n"," <td>1.000000</td>\n"," <td>0.587562</td>\n"," <td>0.761881</td>\n"," <td>-5.046192</td>\n"," <td>0.279234</td>\n"," <td>2.636456</td>\n"," <td>0.252980</td>\n"," </tr>\n"," <tr>\n"," <th>max</th>\n"," <td>260.105000</td>\n"," <td>592.030000</td>\n"," <td>239.170000</td>\n"," <td>0.033160</td>\n"," <td>0.000260</td>\n"," <td>0.021440</td>\n"," <td>0.019580</td>\n"," <td>0.064330</td>\n"," <td>0.119080</td>\n"," <td>1.302000</td>\n"," <td>...</td>\n"," <td>0.169420</td>\n"," <td>0.314820</td>\n"," <td>33.047000</td>\n"," <td>1.000000</td>\n"," <td>0.685151</td>\n"," <td>0.825288</td>\n"," <td>-2.434031</td>\n"," <td>0.450493</td>\n"," <td>3.671155</td>\n"," <td>0.527367</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>8 rows × 23 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b83bb5a5-5785-498f-b146-d62b868ce2a6')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-b83bb5a5-5785-498f-b146-d62b868ce2a6 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-b83bb5a5-5785-498f-b146-d62b868ce2a6');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3O8AclzwExyH","executionInfo":{"status":"ok","timestamp":1653200308701,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"6a330028-c2a3-431a-f1cb-b529137cdcc7"},"source":["# distribution of target Variable\n","parkinsons_data['status'].value_counts()"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1 147\n","0 48\n","Name: status, dtype: int64"]},"metadata":{},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"L1srlxtEFYfN"},"source":["1 --> Parkinson's Positive\n","\n","0 --> Healthy\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":237},"id":"zUrPan7CFTMq","executionInfo":{"status":"ok","timestamp":1653200308702,"user_tz":-330,"elapsed":18,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"9addca6f-f25f-4cde-aa11-266fbece8b9f"},"source":["# grouping the data bas3ed on the target variable\n","parkinsons_data.groupby('status').mean()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n","status \n","0 181.937771 223.636750 145.207292 0.003866 \n","1 145.180762 188.441463 106.893558 0.006989 \n","\n"," MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer \\\n","status \n","0 0.000023 0.001925 0.002056 0.005776 0.017615 \n","1 0.000051 0.003757 0.003900 0.011273 0.033658 \n","\n"," MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR \\\n","status ... \n","0 0.162958 ... 0.013305 0.028511 0.011483 24.678750 \n","1 0.321204 ... 0.027600 0.053027 0.029211 20.974048 \n","\n"," RPDE DFA spread1 spread2 D2 PPE \n","status \n","0 0.442552 0.695716 -6.759264 0.160292 2.154491 0.123017 \n","1 0.516816 0.725408 -5.333420 0.248133 2.456058 0.233828 \n","\n","[2 rows x 22 columns]"],"text/html":["\n"," <div id=\"df-2223dde4-4970-4602-b034-347b3317c577\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>MDVP:Fo(Hz)</th>\n"," <th>MDVP:Fhi(Hz)</th>\n"," <th>MDVP:Flo(Hz)</th>\n"," <th>MDVP:Jitter(%)</th>\n"," <th>MDVP:Jitter(Abs)</th>\n"," <th>MDVP:RAP</th>\n"," <th>MDVP:PPQ</th>\n"," <th>Jitter:DDP</th>\n"," <th>MDVP:Shimmer</th>\n"," <th>MDVP:Shimmer(dB)</th>\n"," <th>...</th>\n"," <th>MDVP:APQ</th>\n"," <th>Shimmer:DDA</th>\n"," <th>NHR</th>\n"," <th>HNR</th>\n"," <th>RPDE</th>\n"," <th>DFA</th>\n"," <th>spread1</th>\n"," <th>spread2</th>\n"," <th>D2</th>\n"," <th>PPE</th>\n"," </tr>\n"," <tr>\n"," <th>status</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>181.937771</td>\n"," <td>223.636750</td>\n"," <td>145.207292</td>\n"," <td>0.003866</td>\n"," <td>0.000023</td>\n"," <td>0.001925</td>\n"," <td>0.002056</td>\n"," <td>0.005776</td>\n"," <td>0.017615</td>\n"," <td>0.162958</td>\n"," <td>...</td>\n"," <td>0.013305</td>\n"," <td>0.028511</td>\n"," <td>0.011483</td>\n"," <td>24.678750</td>\n"," <td>0.442552</td>\n"," <td>0.695716</td>\n"," <td>-6.759264</td>\n"," <td>0.160292</td>\n"," <td>2.154491</td>\n"," <td>0.123017</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>145.180762</td>\n"," <td>188.441463</td>\n"," <td>106.893558</td>\n"," <td>0.006989</td>\n"," <td>0.000051</td>\n"," <td>0.003757</td>\n"," <td>0.003900</td>\n"," <td>0.011273</td>\n"," <td>0.033658</td>\n"," <td>0.321204</td>\n"," <td>...</td>\n"," <td>0.027600</td>\n"," <td>0.053027</td>\n"," <td>0.029211</td>\n"," <td>20.974048</td>\n"," <td>0.516816</td>\n"," <td>0.725408</td>\n"," <td>-5.333420</td>\n"," <td>0.248133</td>\n"," <td>2.456058</td>\n"," <td>0.233828</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>2 rows × 22 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2223dde4-4970-4602-b034-347b3317c577')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-2223dde4-4970-4602-b034-347b3317c577 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-2223dde4-4970-4602-b034-347b3317c577');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"8RY6c0waGSs7"},"source":["Data Pre-Processing"]},{"cell_type":"markdown","metadata":{"id":"We7sRYu7Gc4q"},"source":["Separating the features & Target"]},{"cell_type":"code","metadata":{"id":"UAcz8jFnFuzH","executionInfo":{"status":"ok","timestamp":1653200308702,"user_tz":-330,"elapsed":16,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["X = parkinsons_data.drop(columns=['name','status'], axis=1)\n","Y = parkinsons_data['status']"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"guRof_8WG1Yn","executionInfo":{"status":"ok","timestamp":1653200308702,"user_tz":-330,"elapsed":16,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"531b55ec-5615-47e6-dde6-51295bfe8945"},"source":["print(X)"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":[" MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n","0 119.992 157.302 74.997 0.00784 \n","1 122.400 148.650 113.819 0.00968 \n","2 116.682 131.111 111.555 0.01050 \n","3 116.676 137.871 111.366 0.00997 \n","4 116.014 141.781 110.655 0.01284 \n",".. ... ... ... ... \n","190 174.188 230.978 94.261 0.00459 \n","191 209.516 253.017 89.488 0.00564 \n","192 174.688 240.005 74.287 0.01360 \n","193 198.764 396.961 74.904 0.00740 \n","194 214.289 260.277 77.973 0.00567 \n","\n"," MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer \\\n","0 0.00007 0.00370 0.00554 0.01109 0.04374 \n","1 0.00008 0.00465 0.00696 0.01394 0.06134 \n","2 0.00009 0.00544 0.00781 0.01633 0.05233 \n","3 0.00009 0.00502 0.00698 0.01505 0.05492 \n","4 0.00011 0.00655 0.00908 0.01966 0.06425 \n",".. ... ... ... ... ... \n","190 0.00003 0.00263 0.00259 0.00790 0.04087 \n","191 0.00003 0.00331 0.00292 0.00994 0.02751 \n","192 0.00008 0.00624 0.00564 0.01873 0.02308 \n","193 0.00004 0.00370 0.00390 0.01109 0.02296 \n","194 0.00003 0.00295 0.00317 0.00885 0.01884 \n","\n"," MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE \\\n","0 0.426 ... 0.02971 0.06545 0.02211 21.033 0.414783 \n","1 0.626 ... 0.04368 0.09403 0.01929 19.085 0.458359 \n","2 0.482 ... 0.03590 0.08270 0.01309 20.651 0.429895 \n","3 0.517 ... 0.03772 0.08771 0.01353 20.644 0.434969 \n","4 0.584 ... 0.04465 0.10470 0.01767 19.649 0.417356 \n",".. ... ... ... ... ... ... ... \n","190 0.405 ... 0.02745 0.07008 0.02764 19.517 0.448439 \n","191 0.263 ... 0.01879 0.04812 0.01810 19.147 0.431674 \n","192 0.256 ... 0.01667 0.03804 0.10715 17.883 0.407567 \n","193 0.241 ... 0.01588 0.03794 0.07223 19.020 0.451221 \n","194 0.190 ... 0.01373 0.03078 0.04398 21.209 0.462803 \n","\n"," DFA spread1 spread2 D2 PPE \n","0 0.815285 -4.813031 0.266482 2.301442 0.284654 \n","1 0.819521 -4.075192 0.335590 2.486855 0.368674 \n","2 0.825288 -4.443179 0.311173 2.342259 0.332634 \n","3 0.819235 -4.117501 0.334147 2.405554 0.368975 \n","4 0.823484 -3.747787 0.234513 2.332180 0.410335 \n",".. ... ... ... ... ... \n","190 0.657899 -6.538586 0.121952 2.657476 0.133050 \n","191 0.683244 -6.195325 0.129303 2.784312 0.168895 \n","192 0.655683 -6.787197 0.158453 2.679772 0.131728 \n","193 0.643956 -6.744577 0.207454 2.138608 0.123306 \n","194 0.664357 -5.724056 0.190667 2.555477 0.148569 \n","\n","[195 rows x 22 columns]\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xSNrvkJoG3cY","executionInfo":{"status":"ok","timestamp":1653200308703,"user_tz":-330,"elapsed":14,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"db156ede-5d9e-4ab4-de6d-ead138a71faf"},"source":["print(Y)"],"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1\n","1 1\n","2 1\n","3 1\n","4 1\n"," ..\n","190 0\n","191 0\n","192 0\n","193 0\n","194 0\n","Name: status, Length: 195, dtype: int64\n"]}]},{"cell_type":"markdown","metadata":{"id":"WDeqEaaHHBAS"},"source":["Splitting the data to training data & Test data"]},{"cell_type":"code","metadata":{"id":"4c6nrCiVG6NB","executionInfo":{"status":"ok","timestamp":1653200309503,"user_tz":-330,"elapsed":810,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6OqUka96H35c","executionInfo":{"status":"ok","timestamp":1653200309504,"user_tz":-330,"elapsed":16,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"47eb1e86-5aa5-41f1-deb2-e02d9d2bffe7"},"source":["print(X.shape, X_train.shape, X_test.shape)"],"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["(195, 22) (156, 22) (39, 22)\n"]}]},{"cell_type":"markdown","metadata":{"id":"QIOAtx35JUMg"},"source":["Model Training"]},{"cell_type":"markdown","metadata":{"id":"fWlsaBNuJV5g"},"source":["Support Vector Machine Model"]},{"cell_type":"code","metadata":{"id":"IDInA1u5JCZ9","executionInfo":{"status":"ok","timestamp":1653200309504,"user_tz":-330,"elapsed":16,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["model = svm.SVC(kernel='linear')"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"F01DNpqWKmaW","executionInfo":{"status":"ok","timestamp":1653200309505,"user_tz":-330,"elapsed":16,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"a681ba26-aa79-4b09-a0a3-ddd653726c52"},"source":["# training the SVM model with training data\n","model.fit(X_train, Y_train)"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["SVC(kernel='linear')"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"1z_-nZfuLJrH"},"source":["Model Evaluation"]},{"cell_type":"markdown","metadata":{"id":"Rj3XAnF8LMF4"},"source":["Accuracy Score"]},{"cell_type":"code","metadata":{"id":"5LwxNgnqK1Za","executionInfo":{"status":"ok","timestamp":1653200309505,"user_tz":-330,"elapsed":14,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy score on training data\n","X_train_prediction = model.predict(X_train)\n","training_data_accuracy = accuracy_score(Y_train, X_train_prediction)"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-dS9tcGdLm41","executionInfo":{"status":"ok","timestamp":1653200309506,"user_tz":-330,"elapsed":15,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"bbc0b0cd-ed16-4430-f137-8fdaf1f2183d"},"source":["print('Accuracy score of training data : ', training_data_accuracy)"],"execution_count":18,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy score of training data : 0.8717948717948718\n"]}]},{"cell_type":"code","metadata":{"id":"rNUO2uHmLtjY","executionInfo":{"status":"ok","timestamp":1653200309506,"user_tz":-330,"elapsed":14,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy score on training data\n","X_test_prediction = model.predict(X_test)\n","test_data_accuracy = accuracy_score(Y_test, X_test_prediction)"],"execution_count":19,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BsF3UnQ2L_aR","executionInfo":{"status":"ok","timestamp":1653200309506,"user_tz":-330,"elapsed":14,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"80347c36-1481-4ba3-9cf3-7ed635ca5a85"},"source":["print('Accuracy score of test data : ', test_data_accuracy)"],"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy score of test data : 0.8717948717948718\n"]}]},{"cell_type":"markdown","metadata":{"id":"QlR4JG4YMfOR"},"source":["Building a Predictive System"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"w0FjSoO1MGBU","executionInfo":{"status":"ok","timestamp":1653200309507,"user_tz":-330,"elapsed":13,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"a4715754-198f-4927-9df3-e8a104a9b154"},"source":["input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)\n","\n","# changing input data to a numpy array\n","input_data_as_numpy_array = np.asarray(input_data)\n","\n","# reshape the numpy array\n","input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n","\n","prediction = model.predict(input_data_reshaped)\n","print(prediction)\n","\n","\n","if (prediction[0] == 0):\n"," print(\"The Person does not have Parkinsons Disease\")\n","\n","else:\n"," print(\"The Person has Parkinsons\")\n"],"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["[0]\n","The Person does not have Parkinsons Disease\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n"," \"X does not have valid feature names, but\"\n"]}]},{"cell_type":"markdown","metadata":{"id":"FCHCMHpshHU4"},"source":["Saving the trained model"]},{"cell_type":"code","metadata":{"id":"cdmTOR4MhHCB","executionInfo":{"status":"ok","timestamp":1653200309507,"user_tz":-330,"elapsed":12,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import pickle"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"4gN09lokhKuZ","executionInfo":{"status":"ok","timestamp":1653200309508,"user_tz":-330,"elapsed":13,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["filename = 'parkinsons_model.sav'\n","pickle.dump(model, open(filename, 'wb'))"],"execution_count":23,"outputs":[]},{"cell_type":"code","metadata":{"id":"IKW4D5CqhP5X","executionInfo":{"status":"ok","timestamp":1653200309510,"user_tz":-330,"elapsed":14,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the saved model\n","loaded_model = pickle.load(open('parkinsons_model.sav', 'rb'))"],"execution_count":24,"outputs":[]},{"cell_type":"code","source":["for column in X.columns:\n"," print(column)"],"metadata":{"id":"m8FO1U8hRVm_","executionInfo":{"status":"ok","timestamp":1653200309511,"user_tz":-330,"elapsed":15,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"079be68d-fb39-4544-9100-c6388b397091","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["MDVP:Fo(Hz)\n","MDVP:Fhi(Hz)\n","MDVP:Flo(Hz)\n","MDVP:Jitter(%)\n","MDVP:Jitter(Abs)\n","MDVP:RAP\n","MDVP:PPQ\n","Jitter:DDP\n","MDVP:Shimmer\n","MDVP:Shimmer(dB)\n","Shimmer:APQ3\n","Shimmer:APQ5\n","MDVP:APQ\n","Shimmer:DDA\n","NHR\n","HNR\n","RPDE\n","DFA\n","spread1\n","spread2\n","D2\n","PPE\n"]}]},{"cell_type":"code","source":[""],"metadata":{"id":"JPyuHFeDRXZU","executionInfo":{"status":"ok","timestamp":1653200309512,"user_tz":-330,"elapsed":15,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"execution_count":25,"outputs":[]}]}
|
colab_files_to_train_models/Multiple disease prediction system - diabetes.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Multiple disease prediction system - diabetes.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyNUMtWtqiQOs+bSuSretvw5"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"LnPbntVRnfvV"},"source":["Importing the Dependencies"]},{"cell_type":"code","metadata":{"id":"-71UtHzNVWjB","executionInfo":{"status":"ok","timestamp":1653200161038,"user_tz":-330,"elapsed":1362,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import numpy as np\n","import pandas as pd\n","from sklearn.model_selection import train_test_split\n","from sklearn import svm\n","from sklearn.metrics import accuracy_score"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bmfOfG8joBBy"},"source":["Data Collection and Analysis\n","\n","PIMA Diabetes Dataset"]},{"cell_type":"code","metadata":{"id":"Xpw6Mj_pn_TL","executionInfo":{"status":"ok","timestamp":1653200161039,"user_tz":-330,"elapsed":37,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the diabetes dataset to a pandas DataFrame\n","diabetes_dataset = pd.read_csv('/content/diabetes.csv') "],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"-tjO09ncovoh","executionInfo":{"status":"ok","timestamp":1653200161040,"user_tz":-330,"elapsed":36,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"f4faaf8a-3fc8-413a-bbd4-bb272589a73c"},"source":["# printing the first 5 rows of the dataset\n","diabetes_dataset.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n","0 6 148 72 35 0 33.6 \n","1 1 85 66 29 0 26.6 \n","2 8 183 64 0 0 23.3 \n","3 1 89 66 23 94 28.1 \n","4 0 137 40 35 168 43.1 \n","\n"," DiabetesPedigreeFunction Age Outcome \n","0 0.627 50 1 \n","1 0.351 31 0 \n","2 0.672 32 1 \n","3 0.167 21 0 \n","4 2.288 33 1 "],"text/html":["\n"," <div id=\"df-543bb76d-9958-491b-8063-13990fcca102\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Pregnancies</th>\n"," <th>Glucose</th>\n"," <th>BloodPressure</th>\n"," <th>SkinThickness</th>\n"," <th>Insulin</th>\n"," <th>BMI</th>\n"," <th>DiabetesPedigreeFunction</th>\n"," <th>Age</th>\n"," <th>Outcome</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>6</td>\n"," <td>148</td>\n"," <td>72</td>\n"," <td>35</td>\n"," <td>0</td>\n"," <td>33.6</td>\n"," <td>0.627</td>\n"," <td>50</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1</td>\n"," <td>85</td>\n"," <td>66</td>\n"," <td>29</td>\n"," <td>0</td>\n"," <td>26.6</td>\n"," <td>0.351</td>\n"," <td>31</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>8</td>\n"," <td>183</td>\n"," <td>64</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>23.3</td>\n"," <td>0.672</td>\n"," <td>32</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>1</td>\n"," <td>89</td>\n"," <td>66</td>\n"," <td>23</td>\n"," <td>94</td>\n"," <td>28.1</td>\n"," <td>0.167</td>\n"," <td>21</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0</td>\n"," <td>137</td>\n"," <td>40</td>\n"," <td>35</td>\n"," <td>168</td>\n"," <td>43.1</td>\n"," <td>2.288</td>\n"," <td>33</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-543bb76d-9958-491b-8063-13990fcca102')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-543bb76d-9958-491b-8063-13990fcca102 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-543bb76d-9958-491b-8063-13990fcca102');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lynParo6pEMB","executionInfo":{"status":"ok","timestamp":1653200161040,"user_tz":-330,"elapsed":28,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"b8e97658-d5ae-4407-e3c0-f4b7b0d0321e"},"source":["# number of rows and Columns in this dataset\n","diabetes_dataset.shape"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(768, 9)"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":300},"id":"3NDJOlrEpmoL","executionInfo":{"status":"ok","timestamp":1653200161041,"user_tz":-330,"elapsed":26,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"1a166b9c-0ec1-42dc-e0f4-32924aa1ab65"},"source":["# getting the statistical measures of the data\n","diabetes_dataset.describe()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n","count 768.000000 768.000000 768.000000 768.000000 768.000000 \n","mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n","std 3.369578 31.972618 19.355807 15.952218 115.244002 \n","min 0.000000 0.000000 0.000000 0.000000 0.000000 \n","25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n","50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n","75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n","max 17.000000 199.000000 122.000000 99.000000 846.000000 \n","\n"," BMI DiabetesPedigreeFunction Age Outcome \n","count 768.000000 768.000000 768.000000 768.000000 \n","mean 31.992578 0.471876 33.240885 0.348958 \n","std 7.884160 0.331329 11.760232 0.476951 \n","min 0.000000 0.078000 21.000000 0.000000 \n","25% 27.300000 0.243750 24.000000 0.000000 \n","50% 32.000000 0.372500 29.000000 0.000000 \n","75% 36.600000 0.626250 41.000000 1.000000 \n","max 67.100000 2.420000 81.000000 1.000000 "],"text/html":["\n"," <div id=\"df-e3ca105c-2704-4b75-87c3-2eac2e0e898f\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Pregnancies</th>\n"," <th>Glucose</th>\n"," <th>BloodPressure</th>\n"," <th>SkinThickness</th>\n"," <th>Insulin</th>\n"," <th>BMI</th>\n"," <th>DiabetesPedigreeFunction</th>\n"," <th>Age</th>\n"," <th>Outcome</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>count</th>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," <td>768.000000</td>\n"," </tr>\n"," <tr>\n"," <th>mean</th>\n"," <td>3.845052</td>\n"," <td>120.894531</td>\n"," <td>69.105469</td>\n"," <td>20.536458</td>\n"," <td>79.799479</td>\n"," <td>31.992578</td>\n"," <td>0.471876</td>\n"," <td>33.240885</td>\n"," <td>0.348958</td>\n"," </tr>\n"," <tr>\n"," <th>std</th>\n"," <td>3.369578</td>\n"," <td>31.972618</td>\n"," <td>19.355807</td>\n"," <td>15.952218</td>\n"," <td>115.244002</td>\n"," <td>7.884160</td>\n"," <td>0.331329</td>\n"," <td>11.760232</td>\n"," <td>0.476951</td>\n"," </tr>\n"," <tr>\n"," <th>min</th>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.078000</td>\n"," <td>21.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>25%</th>\n"," <td>1.000000</td>\n"," <td>99.000000</td>\n"," <td>62.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>27.300000</td>\n"," <td>0.243750</td>\n"," <td>24.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>50%</th>\n"," <td>3.000000</td>\n"," <td>117.000000</td>\n"," <td>72.000000</td>\n"," <td>23.000000</td>\n"," <td>30.500000</td>\n"," <td>32.000000</td>\n"," <td>0.372500</td>\n"," <td>29.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>75%</th>\n"," <td>6.000000</td>\n"," <td>140.250000</td>\n"," <td>80.000000</td>\n"," <td>32.000000</td>\n"," <td>127.250000</td>\n"," <td>36.600000</td>\n"," <td>0.626250</td>\n"," <td>41.000000</td>\n"," <td>1.000000</td>\n"," </tr>\n"," <tr>\n"," <th>max</th>\n"," <td>17.000000</td>\n"," <td>199.000000</td>\n"," <td>122.000000</td>\n"," <td>99.000000</td>\n"," <td>846.000000</td>\n"," <td>67.100000</td>\n"," <td>2.420000</td>\n"," <td>81.000000</td>\n"," <td>1.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e3ca105c-2704-4b75-87c3-2eac2e0e898f')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-e3ca105c-2704-4b75-87c3-2eac2e0e898f button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-e3ca105c-2704-4b75-87c3-2eac2e0e898f');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LrpHzaGpp5dQ","executionInfo":{"status":"ok","timestamp":1653200161043,"user_tz":-330,"elapsed":27,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"5da75a35-5065-4c08-abb1-13bc921dec5a"},"source":["diabetes_dataset['Outcome'].value_counts()"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 500\n","1 268\n","Name: Outcome, dtype: int64"]},"metadata":{},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"cB1qRaNcqeh5"},"source":["0 --> Non-Diabetic\n","\n","1 --> Diabetic"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":143},"id":"I6MWR0k_qSCK","executionInfo":{"status":"ok","timestamp":1653200161045,"user_tz":-330,"elapsed":27,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"1e817bc0-c41e-4181-a8e5-37885e765862"},"source":["diabetes_dataset.groupby('Outcome').mean()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n","Outcome \n","0 3.298000 109.980000 68.184000 19.664000 68.792000 \n","1 4.865672 141.257463 70.824627 22.164179 100.335821 \n","\n"," BMI DiabetesPedigreeFunction Age \n","Outcome \n","0 30.304200 0.429734 31.190000 \n","1 35.142537 0.550500 37.067164 "],"text/html":["\n"," <div id=\"df-d6444b5f-72b8-4e7e-b7a2-6fc0efdc7816\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Pregnancies</th>\n"," <th>Glucose</th>\n"," <th>BloodPressure</th>\n"," <th>SkinThickness</th>\n"," <th>Insulin</th>\n"," <th>BMI</th>\n"," <th>DiabetesPedigreeFunction</th>\n"," <th>Age</th>\n"," </tr>\n"," <tr>\n"," <th>Outcome</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>3.298000</td>\n"," <td>109.980000</td>\n"," <td>68.184000</td>\n"," <td>19.664000</td>\n"," <td>68.792000</td>\n"," <td>30.304200</td>\n"," <td>0.429734</td>\n"," <td>31.190000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>4.865672</td>\n"," <td>141.257463</td>\n"," <td>70.824627</td>\n"," <td>22.164179</td>\n"," <td>100.335821</td>\n"," <td>35.142537</td>\n"," <td>0.550500</td>\n"," <td>37.067164</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d6444b5f-72b8-4e7e-b7a2-6fc0efdc7816')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-d6444b5f-72b8-4e7e-b7a2-6fc0efdc7816 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-d6444b5f-72b8-4e7e-b7a2-6fc0efdc7816');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"RoDW7l9mqqHZ","executionInfo":{"status":"ok","timestamp":1653200161046,"user_tz":-330,"elapsed":27,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# separating the data and labels\n","X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n","Y = diabetes_dataset['Outcome']"],"execution_count":8,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3eiRW9M9raMm","executionInfo":{"status":"ok","timestamp":1653200161046,"user_tz":-330,"elapsed":26,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"107aefe8-a40e-4a8c-efa4-e6a4929b8343"},"source":["print(X)"],"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":[" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n","0 6 148 72 35 0 33.6 \n","1 1 85 66 29 0 26.6 \n","2 8 183 64 0 0 23.3 \n","3 1 89 66 23 94 28.1 \n","4 0 137 40 35 168 43.1 \n",".. ... ... ... ... ... ... \n","763 10 101 76 48 180 32.9 \n","764 2 122 70 27 0 36.8 \n","765 5 121 72 23 112 26.2 \n","766 1 126 60 0 0 30.1 \n","767 1 93 70 31 0 30.4 \n","\n"," DiabetesPedigreeFunction Age \n","0 0.627 50 \n","1 0.351 31 \n","2 0.672 32 \n","3 0.167 21 \n","4 2.288 33 \n",".. ... ... \n","763 0.171 63 \n","764 0.340 27 \n","765 0.245 30 \n","766 0.349 47 \n","767 0.315 23 \n","\n","[768 rows x 8 columns]\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"AoxgTJAMrcCl","executionInfo":{"status":"ok","timestamp":1653200161047,"user_tz":-330,"elapsed":25,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"afb40f51-fd9c-43ff-c51f-7b49a7f9b152"},"source":["print(Y)"],"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1\n","1 0\n","2 1\n","3 0\n","4 1\n"," ..\n","763 0\n","764 0\n","765 0\n","766 1\n","767 0\n","Name: Outcome, Length: 768, dtype: int64\n"]}]},{"cell_type":"markdown","metadata":{"id":"gHciEFkxsoQP"},"source":["Train Test Split"]},{"cell_type":"code","metadata":{"id":"AEfKGj_yslvD","executionInfo":{"status":"ok","timestamp":1653200161047,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)"],"execution_count":11,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DR05T-o0t3FQ","executionInfo":{"status":"ok","timestamp":1653200161048,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"1e35e84b-b1ac-4cf3-c2af-31eb3bf2e71c"},"source":["print(X.shape, X_train.shape, X_test.shape)"],"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["(768, 8) (614, 8) (154, 8)\n"]}]},{"cell_type":"markdown","metadata":{"id":"ElJ3tkOtuC_n"},"source":["Training the Model"]},{"cell_type":"code","metadata":{"id":"5szLWHlNt9xc","executionInfo":{"status":"ok","timestamp":1653200161048,"user_tz":-330,"elapsed":19,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["classifier = svm.SVC(kernel='linear')"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ncJWY_7suPAb","executionInfo":{"status":"ok","timestamp":1653200163540,"user_tz":-330,"elapsed":2510,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"00260e64-02fd-426f-d274-717bbf9318d6"},"source":["#training the support vector Machine Classifier\n","classifier.fit(X_train, Y_train)"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["SVC(kernel='linear')"]},"metadata":{},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"UV4-CAfquiyP"},"source":["Model Evaluation"]},{"cell_type":"markdown","metadata":{"id":"yhAjGPJWunXa"},"source":["Accuracy Score"]},{"cell_type":"code","metadata":{"id":"fJLEPQK7ueXp","executionInfo":{"status":"ok","timestamp":1653200163541,"user_tz":-330,"elapsed":27,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy score on the training data\n","X_train_prediction = classifier.predict(X_train)\n","training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mmJ22qhVvNwj","executionInfo":{"status":"ok","timestamp":1653200163541,"user_tz":-330,"elapsed":25,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"ec01e94f-dbed-4286-9413-a05dab7eb7bc"},"source":["print('Accuracy score of the training data : ', training_data_accuracy)"],"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy score of the training data : 0.7833876221498371\n"]}]},{"cell_type":"code","metadata":{"id":"G2CICFMEvcCl","executionInfo":{"status":"ok","timestamp":1653200163542,"user_tz":-330,"elapsed":24,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy score on the test data\n","X_test_prediction = classifier.predict(X_test)\n","test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i2GcW_t_vz7C","executionInfo":{"status":"ok","timestamp":1653200163542,"user_tz":-330,"elapsed":23,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"638e2f46-36ec-471a-b9f7-ac7e319e5fea"},"source":["print('Accuracy score of the test data : ', test_data_accuracy)"],"execution_count":18,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy score of the test data : 0.7727272727272727\n"]}]},{"cell_type":"markdown","metadata":{"id":"gq8ZX1xpwPF5"},"source":["Making a Predictive System"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"U-ULRe4yv5tH","executionInfo":{"status":"ok","timestamp":1653200163543,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"6d829789-0c86-469d-ec6a-f20276f0a3ed"},"source":["input_data = (5,166,72,19,175,25.8,0.587,51)\n","\n","# changing the input_data to numpy array\n","input_data_as_numpy_array = np.asarray(input_data)\n","\n","# reshape the array as we are predicting for one instance\n","input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n","\n","prediction = classifier.predict(input_data_reshaped)\n","print(prediction)\n","\n","if (prediction[0] == 0):\n"," print('The person is not diabetic')\n","else:\n"," print('The person is diabetic')"],"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":["[1]\n","The person is diabetic\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n"," \"X does not have valid feature names, but\"\n"]}]},{"cell_type":"markdown","metadata":{"id":"FCHCMHpshHU4"},"source":["Saving the trained model"]},{"cell_type":"code","metadata":{"id":"cdmTOR4MhHCB","executionInfo":{"status":"ok","timestamp":1653200163543,"user_tz":-330,"elapsed":18,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import pickle"],"execution_count":20,"outputs":[]},{"cell_type":"code","metadata":{"id":"4gN09lokhKuZ","executionInfo":{"status":"ok","timestamp":1653200163543,"user_tz":-330,"elapsed":17,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["filename = 'diabetes_model.sav'\n","pickle.dump(classifier, open(filename, 'wb'))"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"IKW4D5CqhP5X","executionInfo":{"status":"ok","timestamp":1653200163544,"user_tz":-330,"elapsed":18,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the saved model\n","loaded_model = pickle.load(open('diabetes_model.sav', 'rb'))"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"exbg9-VWiHRx","executionInfo":{"status":"ok","timestamp":1653200163544,"user_tz":-330,"elapsed":17,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"e9d0e88b-67b8-457a-a252-22633cdc382e"},"source":["input_data = (5,166,72,19,175,25.8,0.587,51)\n","\n","# changing the input_data to numpy array\n","input_data_as_numpy_array = np.asarray(input_data)\n","\n","# reshape the array as we are predicting for one instance\n","input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n","\n","prediction = loaded_model.predict(input_data_reshaped)\n","print(prediction)\n","\n","if (prediction[0] == 0):\n"," print('The person is not diabetic')\n","else:\n"," print('The person is diabetic')"],"execution_count":23,"outputs":[{"output_type":"stream","name":"stdout","text":["[1]\n","The person is diabetic\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n"," \"X does not have valid feature names, but\"\n"]}]},{"cell_type":"code","source":["for column in X.columns:\n"," print(column)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IP-TYuEFOTF4","executionInfo":{"status":"ok","timestamp":1653200163545,"user_tz":-330,"elapsed":15,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"508fa3ac-ee67-4361-e558-d6b22b9c2c2c"},"execution_count":24,"outputs":[{"output_type":"stream","name":"stdout","text":["Pregnancies\n","Glucose\n","BloodPressure\n","SkinThickness\n","Insulin\n","BMI\n","DiabetesPedigreeFunction\n","Age\n"]}]}]}
|
colab_files_to_train_models/Multiple disease prediction system - heart.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Multiple disease prediction system - heart.ipynb","provenance":[],"authorship_tag":"ABX9TyNkEJ4hIKtyX260ZMlxNVm4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"aTb-9TFFqprC"},"source":["Importing the Dependencies"]},{"cell_type":"code","metadata":{"id":"3q9U3S_whh3-","executionInfo":{"status":"ok","timestamp":1653200233515,"user_tz":-330,"elapsed":1659,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import numpy as np\n","import pandas as pd\n","from sklearn.model_selection import train_test_split\n","from sklearn.linear_model import LogisticRegression\n","from sklearn.metrics import accuracy_score"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"egMd5zeurTMR"},"source":["Data Collection and Processing"]},{"cell_type":"code","metadata":{"id":"0q-3-LkQrREV","executionInfo":{"status":"ok","timestamp":1653200233516,"user_tz":-330,"elapsed":13,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the csv data to a Pandas DataFrame\n","heart_data = pd.read_csv('/content/heart.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"M8dQxSTqriWD","executionInfo":{"status":"ok","timestamp":1653200233516,"user_tz":-330,"elapsed":12,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"0ab48855-8403-4103-c7c8-e307c07da055"},"source":["# print first 5 rows of the dataset\n","heart_data.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n","0 63 1 3 145 233 1 0 150 0 2.3 0 \n","1 37 1 2 130 250 0 1 187 0 3.5 0 \n","2 41 0 1 130 204 0 0 172 0 1.4 2 \n","3 56 1 1 120 236 0 1 178 0 0.8 2 \n","4 57 0 0 120 354 0 1 163 1 0.6 2 \n","\n"," ca thal target \n","0 0 1 1 \n","1 0 2 1 \n","2 0 2 1 \n","3 0 2 1 \n","4 0 2 1 "],"text/html":["\n"," <div id=\"df-c22c32f4-9008-4881-915a-af9a1cc44c73\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>age</th>\n"," <th>sex</th>\n"," <th>cp</th>\n"," <th>trestbps</th>\n"," <th>chol</th>\n"," <th>fbs</th>\n"," <th>restecg</th>\n"," <th>thalach</th>\n"," <th>exang</th>\n"," <th>oldpeak</th>\n"," <th>slope</th>\n"," <th>ca</th>\n"," <th>thal</th>\n"," <th>target</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>63</td>\n"," <td>1</td>\n"," <td>3</td>\n"," <td>145</td>\n"," <td>233</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>150</td>\n"," <td>0</td>\n"," <td>2.3</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>37</td>\n"," <td>1</td>\n"," <td>2</td>\n"," <td>130</td>\n"," <td>250</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>187</td>\n"," <td>0</td>\n"," <td>3.5</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>41</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>130</td>\n"," <td>204</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>172</td>\n"," <td>0</td>\n"," <td>1.4</td>\n"," <td>2</td>\n"," <td>0</td>\n"," <td>2</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>56</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>120</td>\n"," <td>236</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>178</td>\n"," <td>0</td>\n"," <td>0.8</td>\n"," <td>2</td>\n"," <td>0</td>\n"," <td>2</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>57</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>120</td>\n"," <td>354</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>163</td>\n"," <td>1</td>\n"," <td>0.6</td>\n"," <td>2</td>\n"," <td>0</td>\n"," <td>2</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c22c32f4-9008-4881-915a-af9a1cc44c73')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-c22c32f4-9008-4881-915a-af9a1cc44c73 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-c22c32f4-9008-4881-915a-af9a1cc44c73');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"Fx_aCZDgrqdR","executionInfo":{"status":"ok","timestamp":1653200234314,"user_tz":-330,"elapsed":808,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"a8dea85c-ba32-493b-c199-354a6e4ce093"},"source":["# print last 5 rows of the dataset\n","heart_data.tail()"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n","298 57 0 0 140 241 0 1 123 1 0.2 \n","299 45 1 3 110 264 0 1 132 0 1.2 \n","300 68 1 0 144 193 1 1 141 0 3.4 \n","301 57 1 0 130 131 0 1 115 1 1.2 \n","302 57 0 1 130 236 0 0 174 0 0.0 \n","\n"," slope ca thal target \n","298 1 0 3 0 \n","299 1 0 3 0 \n","300 1 2 3 0 \n","301 1 1 3 0 \n","302 1 1 2 0 "],"text/html":["\n"," <div id=\"df-8a23c18a-4205-4dce-9342-e086c7334c7b\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>age</th>\n"," <th>sex</th>\n"," <th>cp</th>\n"," <th>trestbps</th>\n"," <th>chol</th>\n"," <th>fbs</th>\n"," <th>restecg</th>\n"," <th>thalach</th>\n"," <th>exang</th>\n"," <th>oldpeak</th>\n"," <th>slope</th>\n"," <th>ca</th>\n"," <th>thal</th>\n"," <th>target</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>298</th>\n"," <td>57</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>140</td>\n"," <td>241</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>123</td>\n"," <td>1</td>\n"," <td>0.2</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>3</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>299</th>\n"," <td>45</td>\n"," <td>1</td>\n"," <td>3</td>\n"," <td>110</td>\n"," <td>264</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>132</td>\n"," <td>0</td>\n"," <td>1.2</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>3</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>300</th>\n"," <td>68</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>144</td>\n"," <td>193</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>141</td>\n"," <td>0</td>\n"," <td>3.4</td>\n"," <td>1</td>\n"," <td>2</td>\n"," <td>3</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>301</th>\n"," <td>57</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>130</td>\n"," <td>131</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>115</td>\n"," <td>1</td>\n"," <td>1.2</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>3</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>302</th>\n"," <td>57</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>130</td>\n"," <td>236</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>174</td>\n"," <td>0</td>\n"," <td>0.0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>2</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8a23c18a-4205-4dce-9342-e086c7334c7b')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-8a23c18a-4205-4dce-9342-e086c7334c7b button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-8a23c18a-4205-4dce-9342-e086c7334c7b');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8nX1tIzbrz0u","executionInfo":{"status":"ok","timestamp":1653200234314,"user_tz":-330,"elapsed":28,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"aed82820-3f95-4fc3-bd5a-21dcbd434fb7"},"source":["# number of rows and columns in the dataset\n","heart_data.shape"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(303, 14)"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7_xTcw1Sr6aJ","executionInfo":{"status":"ok","timestamp":1653200234315,"user_tz":-330,"elapsed":26,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"3dfcf6cf-f814-4b3b-f8be-57e2ea6ea7b2"},"source":["# getting some info about the data\n","heart_data.info()"],"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 303 entries, 0 to 302\n","Data columns (total 14 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 age 303 non-null int64 \n"," 1 sex 303 non-null int64 \n"," 2 cp 303 non-null int64 \n"," 3 trestbps 303 non-null int64 \n"," 4 chol 303 non-null int64 \n"," 5 fbs 303 non-null int64 \n"," 6 restecg 303 non-null int64 \n"," 7 thalach 303 non-null int64 \n"," 8 exang 303 non-null int64 \n"," 9 oldpeak 303 non-null float64\n"," 10 slope 303 non-null int64 \n"," 11 ca 303 non-null int64 \n"," 12 thal 303 non-null int64 \n"," 13 target 303 non-null int64 \n","dtypes: float64(1), int64(13)\n","memory usage: 33.3 KB\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GjHtW31rsGlb","executionInfo":{"status":"ok","timestamp":1653200234316,"user_tz":-330,"elapsed":25,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"062af8fa-b836-480d-93dd-b84f335f0839"},"source":["# checking for missing values\n","heart_data.isnull().sum()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["age 0\n","sex 0\n","cp 0\n","trestbps 0\n","chol 0\n","fbs 0\n","restecg 0\n","thalach 0\n","exang 0\n","oldpeak 0\n","slope 0\n","ca 0\n","thal 0\n","target 0\n","dtype: int64"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":364},"id":"OHmcP7DJsSEP","executionInfo":{"status":"ok","timestamp":1653200234317,"user_tz":-330,"elapsed":24,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"b08b05fa-7f04-45bb-8f9b-c676a77834c3"},"source":["# statistical measures about the data\n","heart_data.describe()"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" age sex cp trestbps chol fbs \\\n","count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n","mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n","std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n","min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n","25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n","50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n","75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n","max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n","\n"," restecg thalach exang oldpeak slope ca \\\n","count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n","mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n","std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n","min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n","25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n","50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n","75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n","max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n","\n"," thal target \n","count 303.000000 303.000000 \n","mean 2.313531 0.544554 \n","std 0.612277 0.498835 \n","min 0.000000 0.000000 \n","25% 2.000000 0.000000 \n","50% 2.000000 1.000000 \n","75% 3.000000 1.000000 \n","max 3.000000 1.000000 "],"text/html":["\n"," <div id=\"df-8077a748-f74f-4f21-94b6-9fc7e8a6e707\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>age</th>\n"," <th>sex</th>\n"," <th>cp</th>\n"," <th>trestbps</th>\n"," <th>chol</th>\n"," <th>fbs</th>\n"," <th>restecg</th>\n"," <th>thalach</th>\n"," <th>exang</th>\n"," <th>oldpeak</th>\n"," <th>slope</th>\n"," <th>ca</th>\n"," <th>thal</th>\n"," <th>target</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>count</th>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," <td>303.000000</td>\n"," </tr>\n"," <tr>\n"," <th>mean</th>\n"," <td>54.366337</td>\n"," <td>0.683168</td>\n"," <td>0.966997</td>\n"," <td>131.623762</td>\n"," <td>246.264026</td>\n"," <td>0.148515</td>\n"," <td>0.528053</td>\n"," <td>149.646865</td>\n"," <td>0.326733</td>\n"," <td>1.039604</td>\n"," <td>1.399340</td>\n"," <td>0.729373</td>\n"," <td>2.313531</td>\n"," <td>0.544554</td>\n"," </tr>\n"," <tr>\n"," <th>std</th>\n"," <td>9.082101</td>\n"," <td>0.466011</td>\n"," <td>1.032052</td>\n"," <td>17.538143</td>\n"," <td>51.830751</td>\n"," <td>0.356198</td>\n"," <td>0.525860</td>\n"," <td>22.905161</td>\n"," <td>0.469794</td>\n"," <td>1.161075</td>\n"," <td>0.616226</td>\n"," <td>1.022606</td>\n"," <td>0.612277</td>\n"," <td>0.498835</td>\n"," </tr>\n"," <tr>\n"," <th>min</th>\n"," <td>29.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>94.000000</td>\n"," <td>126.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>71.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>25%</th>\n"," <td>47.500000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>120.000000</td>\n"," <td>211.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>133.500000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>1.000000</td>\n"," <td>0.000000</td>\n"," <td>2.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>50%</th>\n"," <td>55.000000</td>\n"," <td>1.000000</td>\n"," <td>1.000000</td>\n"," <td>130.000000</td>\n"," <td>240.000000</td>\n"," <td>0.000000</td>\n"," <td>1.000000</td>\n"," <td>153.000000</td>\n"," <td>0.000000</td>\n"," <td>0.800000</td>\n"," <td>1.000000</td>\n"," <td>0.000000</td>\n"," <td>2.000000</td>\n"," <td>1.000000</td>\n"," </tr>\n"," <tr>\n"," <th>75%</th>\n"," <td>61.000000</td>\n"," <td>1.000000</td>\n"," <td>2.000000</td>\n"," <td>140.000000</td>\n"," <td>274.500000</td>\n"," <td>0.000000</td>\n"," <td>1.000000</td>\n"," <td>166.000000</td>\n"," <td>1.000000</td>\n"," <td>1.600000</td>\n"," <td>2.000000</td>\n"," <td>1.000000</td>\n"," <td>3.000000</td>\n"," <td>1.000000</td>\n"," </tr>\n"," <tr>\n"," <th>max</th>\n"," <td>77.000000</td>\n"," <td>1.000000</td>\n"," <td>3.000000</td>\n"," <td>200.000000</td>\n"," <td>564.000000</td>\n"," <td>1.000000</td>\n"," <td>2.000000</td>\n"," <td>202.000000</td>\n"," <td>1.000000</td>\n"," <td>6.200000</td>\n"," <td>2.000000</td>\n"," <td>4.000000</td>\n"," <td>3.000000</td>\n"," <td>1.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8077a748-f74f-4f21-94b6-9fc7e8a6e707')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-8077a748-f74f-4f21-94b6-9fc7e8a6e707 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-8077a748-f74f-4f21-94b6-9fc7e8a6e707');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4InaOSIUsfWP","executionInfo":{"status":"ok","timestamp":1653200234318,"user_tz":-330,"elapsed":24,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"fb25a289-d5e9-438f-d21d-f09474cacbc6"},"source":["# checking the distribution of Target Variable\n","heart_data['target'].value_counts()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1 165\n","0 138\n","Name: target, dtype: int64"]},"metadata":{},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"aSOBu4qDtJy5"},"source":["1 --> Defective Heart\n","\n","0 --> Healthy Heart"]},{"cell_type":"markdown","metadata":{"id":"tW8i4igjtPRC"},"source":["Splitting the Features and Target"]},{"cell_type":"code","metadata":{"id":"Q6yfbswrs7m3","executionInfo":{"status":"ok","timestamp":1653200234318,"user_tz":-330,"elapsed":22,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["X = heart_data.drop(columns='target', axis=1)\n","Y = heart_data['target']"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XJoCp4ZKtpZy","executionInfo":{"status":"ok","timestamp":1653200234319,"user_tz":-330,"elapsed":23,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"14301a08-f0ce-41ef-ea17-b6a315aebef5"},"source":["print(X)"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":[" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n","0 63 1 3 145 233 1 0 150 0 2.3 \n","1 37 1 2 130 250 0 1 187 0 3.5 \n","2 41 0 1 130 204 0 0 172 0 1.4 \n","3 56 1 1 120 236 0 1 178 0 0.8 \n","4 57 0 0 120 354 0 1 163 1 0.6 \n",".. ... ... .. ... ... ... ... ... ... ... \n","298 57 0 0 140 241 0 1 123 1 0.2 \n","299 45 1 3 110 264 0 1 132 0 1.2 \n","300 68 1 0 144 193 1 1 141 0 3.4 \n","301 57 1 0 130 131 0 1 115 1 1.2 \n","302 57 0 1 130 236 0 0 174 0 0.0 \n","\n"," slope ca thal \n","0 0 0 1 \n","1 0 0 2 \n","2 2 0 2 \n","3 2 0 2 \n","4 2 0 2 \n",".. ... .. ... \n","298 1 0 3 \n","299 1 0 3 \n","300 1 2 3 \n","301 1 1 3 \n","302 1 1 2 \n","\n","[303 rows x 13 columns]\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nukuj-YItq1w","executionInfo":{"status":"ok","timestamp":1653200234319,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"ee5e6451-0b29-4a02-aebf-24367d73874b"},"source":["print(Y)"],"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1\n","1 1\n","2 1\n","3 1\n","4 1\n"," ..\n","298 0\n","299 0\n","300 0\n","301 0\n","302 0\n","Name: target, Length: 303, dtype: int64\n"]}]},{"cell_type":"markdown","metadata":{"id":"_EcjSE3Et18n"},"source":["Splitting the Data into Training data & Test Data"]},{"cell_type":"code","metadata":{"id":"a-UUfRUxtuga","executionInfo":{"status":"ok","timestamp":1653200234319,"user_tz":-330,"elapsed":20,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x7PrjC6zuf6X","executionInfo":{"status":"ok","timestamp":1653200234320,"user_tz":-330,"elapsed":21,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"d78b7e79-4659-4101-c9d6-61df1d0d5981"},"source":["print(X.shape, X_train.shape, X_test.shape)"],"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["(303, 13) (242, 13) (61, 13)\n"]}]},{"cell_type":"markdown","metadata":{"id":"beSkZmpVuvn9"},"source":["Model Training"]},{"cell_type":"markdown","metadata":{"id":"gi2NOWZjuxzw"},"source":["Logistic Regression"]},{"cell_type":"code","metadata":{"id":"4-Md74FYuqNL","executionInfo":{"status":"ok","timestamp":1653200234320,"user_tz":-330,"elapsed":20,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["model = LogisticRegression()"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kCdHYxGUu7XD","executionInfo":{"status":"ok","timestamp":1653200234321,"user_tz":-330,"elapsed":20,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"e24ea90f-2542-44ca-abbd-9d0c9cce231d"},"source":["# training the LogisticRegression model with Training data\n","model.fit(X_train, Y_train)"],"execution_count":16,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n"]},{"output_type":"execute_result","data":{"text/plain":["LogisticRegression()"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"ZYIw8Gi9vXfU"},"source":["Model Evaluation"]},{"cell_type":"markdown","metadata":{"id":"wmxAekfZvZa9"},"source":["Accuracy Score"]},{"cell_type":"code","metadata":{"id":"g19JaUTMvPKy","executionInfo":{"status":"ok","timestamp":1653200234321,"user_tz":-330,"elapsed":18,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy on training data\n","X_train_prediction = model.predict(X_train)\n","training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uQBZvBh8v7R_","executionInfo":{"status":"ok","timestamp":1653200234908,"user_tz":-330,"elapsed":605,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"69f64655-7f79-4bdf-a0f1-6288620f032b"},"source":["print('Accuracy on Training data : ', training_data_accuracy)"],"execution_count":18,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy on Training data : 0.8512396694214877\n"]}]},{"cell_type":"code","metadata":{"id":"mDONDJdlwBIO","executionInfo":{"status":"ok","timestamp":1653200234908,"user_tz":-330,"elapsed":12,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# accuracy on test data\n","X_test_prediction = model.predict(X_test)\n","test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"],"execution_count":19,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_MBS-OqdwYpf","executionInfo":{"status":"ok","timestamp":1653200234909,"user_tz":-330,"elapsed":13,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"bbe13e78-0dab-4925-e541-fb39be7d847f"},"source":["print('Accuracy on Test data : ', test_data_accuracy)"],"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy on Test data : 0.819672131147541\n"]}]},{"cell_type":"markdown","metadata":{"id":"jIruVh3Qwq0e"},"source":["Building a Predictive System"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9ercruC9wb4C","executionInfo":{"status":"ok","timestamp":1653200234909,"user_tz":-330,"elapsed":12,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"e4cddd3b-acdb-482c-a25f-b720f02111c9"},"source":["input_data = (62,0,0,140,268,0,0,160,0,3.6,0,2,2)\n","\n","# change the input data to a numpy array\n","input_data_as_numpy_array= np.asarray(input_data)\n","\n","# reshape the numpy array as we are predicting for only on instance\n","input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n","\n","prediction = model.predict(input_data_reshaped)\n","print(prediction)\n","\n","if (prediction[0]== 0):\n"," print('The Person does not have a Heart Disease')\n","else:\n"," print('The Person has Heart Disease')"],"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["[0]\n","The Person does not have a Heart Disease\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n"," \"X does not have valid feature names, but\"\n"]}]},{"cell_type":"markdown","metadata":{"id":"FCHCMHpshHU4"},"source":["Saving the trained model"]},{"cell_type":"code","metadata":{"id":"cdmTOR4MhHCB","executionInfo":{"status":"ok","timestamp":1653200234910,"user_tz":-330,"elapsed":11,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["import pickle"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"4gN09lokhKuZ","executionInfo":{"status":"ok","timestamp":1653200234910,"user_tz":-330,"elapsed":10,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["filename = 'heart_disease_model.sav'\n","pickle.dump(model, open(filename, 'wb'))"],"execution_count":23,"outputs":[]},{"cell_type":"code","metadata":{"id":"IKW4D5CqhP5X","executionInfo":{"status":"ok","timestamp":1653200234911,"user_tz":-330,"elapsed":11,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"source":["# loading the saved model\n","loaded_model = pickle.load(open('heart_disease_model.sav', 'rb'))"],"execution_count":24,"outputs":[]},{"cell_type":"code","source":["for column in X.columns:\n"," print(column)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1SiD-oDpPSxY","executionInfo":{"status":"ok","timestamp":1653200234911,"user_tz":-330,"elapsed":11,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}},"outputId":"13c5618c-e337-4637-a35a-452e607e5ef4"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["age\n","sex\n","cp\n","trestbps\n","chol\n","fbs\n","restecg\n","thalach\n","exang\n","oldpeak\n","slope\n","ca\n","thal\n"]}]},{"cell_type":"code","source":[""],"metadata":{"id":"A23YZI2tPaFk","executionInfo":{"status":"ok","timestamp":1653200234911,"user_tz":-330,"elapsed":8,"user":{"displayName":"siddhardh selvam","userId":"13966379820454708749"}}},"execution_count":25,"outputs":[]}]}
|