{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "# import seaborn as sns\n", "# import matplotlib.pyplot as plt\n", "import os\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_selection import SelectFromModel\n", "from sklearn.ensemble import RandomForestClassifier\n", "# plt.style.use('seaborn-colorblind')\n", "# %matplotlib inline\n", "from sklearn.feature_selection import RFE\n", "from feature_selection import hybrid\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.datasets import load_breast_cancer\n", "data = load_breast_cancer()\n", "data = pd.DataFrame(np.c_[data['data'], data['target']],\n", " columns= np.append(data['feature_names'], ['target']))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensiontarget
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...17.33184.602019.00.16220.66560.71190.26540.46010.118900.0
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...23.41158.801956.00.12380.18660.24160.18600.27500.089020.0
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...25.53152.501709.00.14440.42450.45040.24300.36130.087580.0
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...26.5098.87567.70.20980.86630.68690.25750.66380.173000.0
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...16.67152.201575.00.13740.20500.40000.16250.23640.076780.0
\n", "

5 rows × 31 columns

\n", "
" ], "text/plain": [ " mean radius mean texture mean perimeter mean area mean smoothness \\\n", "0 17.99 10.38 122.80 1001.0 0.11840 \n", "1 20.57 17.77 132.90 1326.0 0.08474 \n", "2 19.69 21.25 130.00 1203.0 0.10960 \n", "3 11.42 20.38 77.58 386.1 0.14250 \n", "4 20.29 14.34 135.10 1297.0 0.10030 \n", "\n", " mean compactness mean concavity mean concave points mean symmetry \\\n", "0 0.27760 0.3001 0.14710 0.2419 \n", "1 0.07864 0.0869 0.07017 0.1812 \n", "2 0.15990 0.1974 0.12790 0.2069 \n", "3 0.28390 0.2414 0.10520 0.2597 \n", "4 0.13280 0.1980 0.10430 0.1809 \n", "\n", " mean fractal dimension ... worst texture worst perimeter worst area \\\n", "0 0.07871 ... 17.33 184.60 2019.0 \n", "1 0.05667 ... 23.41 158.80 1956.0 \n", "2 0.05999 ... 25.53 152.50 1709.0 \n", "3 0.09744 ... 26.50 98.87 567.7 \n", "4 0.05883 ... 16.67 152.20 1575.0 \n", "\n", " worst smoothness worst compactness worst concavity worst concave points \\\n", "0 0.1622 0.6656 0.7119 0.2654 \n", "1 0.1238 0.1866 0.2416 0.1860 \n", "2 0.1444 0.4245 0.4504 0.2430 \n", "3 0.2098 0.8663 0.6869 0.2575 \n", "4 0.1374 0.2050 0.4000 0.1625 \n", "\n", " worst symmetry worst fractal dimension target \n", "0 0.4601 0.11890 0.0 \n", "1 0.2750 0.08902 0.0 \n", "2 0.3613 0.08758 0.0 \n", "3 0.6638 0.17300 0.0 \n", "4 0.2364 0.07678 0.0 \n", "\n", "[5 rows x 31 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((455, 30), (114, 30))" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(data.drop(labels=['target'], axis=1), \n", " data.target, test_size=0.2,\n", " random_state=0)\n", "X_train.shape, X_test.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Recursive Feature Elimination \n", "### with Random Forests Importance\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example 1\n", "This method is slightly **different from the guide**, as it use a different stopping criterion: the desired number of features to select is eventually reached." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RFE(estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=None,\n", " oob_score=False, random_state=None, verbose=0,\n", " warm_start=False),\n", " n_features_to_select=10, step=1, verbose=0)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# n_features_to_select decide the stopping criterion\n", "# we stop till 10 features remaining\n", "\n", "sel_ = RFE(RandomForestClassifier(n_estimators=20), n_features_to_select=10)\n", "sel_.fit(X_train.fillna(0), y_train)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['mean texture', 'mean perimeter', 'mean area', 'mean concavity',\n", " 'mean concave points', 'worst radius', 'worst perimeter', 'worst area',\n", " 'worst concave points', 'worst symmetry'],\n", " dtype='object')\n" ] } ], "source": [ "selected_feat = X_train.columns[(sel_.get_support())]\n", "print(selected_feat)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Example 2\n", "recursive feature elimination with RandomForest\n", "with the method same as the guide\n", "1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.\n", "2. Remove one feature -the least important- and build a machine learning algorithm utilizing the remaining features.\n", "3. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.\n", "4. If the metric decreases by more of an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.\n", "5. Repeat steps 2-4 until all features have been removed (and therefore evaluated) and the drop in performance assessed.\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "testing feature: mean radius which is feature 1 out of 30\n", "New Test ROC AUC=0.9941251190854239\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0026992696093999236\n", "keep: mean radius\n", "\n", "testing feature: mean texture which is feature 2 out of 30\n", "New Test ROC AUC=0.9936487773896475\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0031756113051762958\n", "keep: mean texture\n", "\n", "testing feature: mean perimeter which is feature 3 out of 30\n", "New Test ROC AUC=0.9968243886948238\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0\n", "remove: mean perimeter\n", "\n", "testing feature: mean area which is feature 4 out of 30\n", "New Test ROC AUC=0.9960304858685297\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0007939028262941017\n", "remove: mean area\n", "\n", "testing feature: mean smoothness which is feature 5 out of 30\n", "New Test ROC AUC=0.9965068275643061\n", "All features Test ROC AUC=0.9960304858685297\n", "Drop in ROC AUC=-0.0004763416957763722\n", "remove: mean smoothness\n", "\n", "testing feature: mean compactness which is feature 6 out of 30\n", "New Test ROC AUC=0.9942838996506828\n", "All features Test ROC AUC=0.9965068275643061\n", "Drop in ROC AUC=0.0022229279136233293\n", "keep: mean compactness\n", "\n", "testing feature: mean concavity which is feature 7 out of 30\n", "New Test ROC AUC=0.9957129247380121\n", "All features Test ROC AUC=0.9965068275643061\n", "Drop in ROC AUC=0.0007939028262939907\n", "remove: mean concavity\n", "\n", "testing feature: mean concave points which is feature 8 out of 30\n", "New Test ROC AUC=0.9976182915211178\n", "All features Test ROC AUC=0.9957129247380121\n", "Drop in ROC AUC=-0.0019053667831057108\n", "remove: mean concave points\n", "\n", "testing feature: mean symmetry which is feature 9 out of 30\n", "New Test ROC AUC=0.9953953636074945\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0022229279136233293\n", "keep: mean symmetry\n", "\n", "testing feature: mean fractal dimension which is feature 10 out of 30\n", "New Test ROC AUC=0.9949190219117181\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0026992696093997015\n", "keep: mean fractal dimension\n", "\n", "testing feature: radius error which is feature 11 out of 30\n", "New Test ROC AUC=0.9952365830422356\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.002381708478882194\n", "keep: radius error\n", "\n", "testing feature: texture error which is feature 12 out of 30\n", "New Test ROC AUC=0.9952365830422356\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.002381708478882194\n", "keep: texture error\n", "\n", "testing feature: perimeter error which is feature 13 out of 30\n", "New Test ROC AUC=0.9939663385201651\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.003651953000952668\n", "keep: perimeter error\n", "\n", "testing feature: area error which is feature 14 out of 30\n", "New Test ROC AUC=0.994919021911718\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0026992696093998125\n", "keep: area error\n", "\n", "testing feature: smoothness error which is feature 15 out of 30\n", "New Test ROC AUC=0.995871705303271\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.001746586217846846\n", "keep: smoothness error\n", "\n", "testing feature: compactness error which is feature 16 out of 30\n", "New Test ROC AUC=0.9958717053032708\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0017465862178469571\n", "keep: compactness error\n", "\n", "testing feature: concavity error which is feature 17 out of 30\n", "New Test ROC AUC=0.9961892664337886\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0014290250873292276\n", "keep: concavity error\n", "\n", "testing feature: concave points error which is feature 18 out of 30\n", "New Test ROC AUC=0.9961892664337885\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0014290250873293386\n", "keep: concave points error\n", "\n", "testing feature: symmetry error which is feature 19 out of 30\n", "New Test ROC AUC=0.9968243886948238\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.0007939028262939907\n", "remove: symmetry error\n", "\n", "testing feature: fractal dimension error which is feature 20 out of 30\n", "New Test ROC AUC=0.9946014607812005\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0022229279136233293\n", "keep: fractal dimension error\n", "\n", "testing feature: worst radius which is feature 21 out of 30\n", "New Test ROC AUC=0.9955541441727532\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.001270244522070585\n", "keep: worst radius\n", "\n", "testing feature: worst texture which is feature 22 out of 30\n", "New Test ROC AUC=0.9958717053032708\n", "All features Test ROC AUC=0.9968243886948238\n", "Drop in ROC AUC=0.0009526833915529664\n", "remove: worst texture\n", "\n", "testing feature: worst perimeter which is feature 23 out of 30\n", "New Test ROC AUC=0.995871705303271\n", "All features Test ROC AUC=0.9958717053032708\n", "Drop in ROC AUC=-1.1102230246251565e-16\n", "remove: worst perimeter\n", "\n", "testing feature: worst area which is feature 24 out of 30\n", "New Test ROC AUC=0.9938075579549063\n", "All features Test ROC AUC=0.995871705303271\n", "Drop in ROC AUC=0.0020641473483646866\n", "keep: worst area\n", "\n", "testing feature: worst smoothness which is feature 25 out of 30\n", "New Test ROC AUC=0.9939663385201651\n", "All features Test ROC AUC=0.995871705303271\n", "Drop in ROC AUC=0.0019053667831058219\n", "keep: worst smoothness\n", "\n", "testing feature: worst compactness which is feature 26 out of 30\n", "New Test ROC AUC=0.9960304858685296\n", "All features Test ROC AUC=0.995871705303271\n", "Drop in ROC AUC=-0.0001587805652586427\n", "remove: worst compactness\n", "\n", "testing feature: worst concavity which is feature 27 out of 30\n", "New Test ROC AUC=0.9966656081295648\n", "All features Test ROC AUC=0.9960304858685296\n", "Drop in ROC AUC=-0.0006351222610352369\n", "remove: worst concavity\n", "\n", "testing feature: worst concave points which is feature 28 out of 30\n", "New Test ROC AUC=0.9936487773896475\n", "All features Test ROC AUC=0.9966656081295648\n", "Drop in ROC AUC=0.00301683073991732\n", "keep: worst concave points\n", "\n", "testing feature: worst symmetry which is feature 29 out of 30\n", "New Test ROC AUC=0.9976182915211178\n", "All features Test ROC AUC=0.9966656081295648\n", "Drop in ROC AUC=-0.0009526833915529664\n", "remove: worst symmetry\n", "\n", "testing feature: worst fractal dimension which is feature 30 out of 30\n", "New Test ROC AUC=0.9973007303906002\n", "All features Test ROC AUC=0.9976182915211178\n", "Drop in ROC AUC=0.00031756113051761847\n", "remove: worst fractal dimension\n", "DONE!!\n", "total features to remove: 12\n", "total features to keep: 18\n" ] } ], "source": [ "# tol decide whether we should drop or keep the feature in current round\n", "features_to_keep = hybrid.recursive_feature_elimination_rf(X_train=X_train,\n", " y_train=y_train,\n", " X_test=X_test,\n", " y_test=y_test,\n", " tol=0.001)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['mean radius',\n", " 'mean texture',\n", " 'mean compactness',\n", " 'mean symmetry',\n", " 'mean fractal dimension',\n", " 'radius error',\n", " 'texture error',\n", " 'perimeter error',\n", " 'area error',\n", " 'smoothness error',\n", " 'compactness error',\n", " 'concavity error',\n", " 'concave points error',\n", " 'fractal dimension error',\n", " 'worst radius',\n", " 'worst area',\n", " 'worst smoothness',\n", " 'worst concave points']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "features_to_keep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Recursive Feature Addition\n", "### with Random Forests Importance" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example 1\n", "recursive feature addition with RandomForest\n", "with the method same as the guide\n", "1. Rank the features according to their importance derived from a machine learning algorithm: it can be tree importance, or LASSO / Ridge, or the linear / logistic regression coefficients.\n", "2. Build a machine learning model with only 1 feature, the most important one, and calculate the model metric for performance.\n", "3. Add one feature -the most important- and build a machine learning algorithm utilizing the added and any feature from previous rounds.\n", "4. Calculate a performance metric of your choice: roc-auc, mse, rmse, accuracy.\n", "5. If the metric increases by more than an arbitrarily set threshold, then that feature is important and should be kept. Otherwise, we can remove that feature.\n", "6. Repeat steps 2-5 until all features have been removed (and therefore evaluated) and the drop in performance assessed.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "testing feature: mean texture which is feature 1 out of 30\n", "New Test ROC AUC=0.9558590028580501\n", "All features Test ROC AUC=0.9009209272785013\n", "Increase in ROC AUC=0.054938075579548884\n", "keep: mean texture\n", "\n", "testing feature: mean perimeter which is feature 2 out of 30\n", "New Test ROC AUC=0.9609399809463322\n", "All features Test ROC AUC=0.9558590028580501\n", "Increase in ROC AUC=0.005080978088282007\n", "keep: mean perimeter\n", "\n", "testing feature: mean area which is feature 3 out of 30\n", "New Test ROC AUC=0.9609399809463322\n", "All features Test ROC AUC=0.9609399809463322\n", "Increase in ROC AUC=0.0\n", "remove: mean area\n", "\n", "testing feature: mean smoothness which is feature 4 out of 30\n", "New Test ROC AUC=0.9684026675134964\n", "All features Test ROC AUC=0.9609399809463322\n", "Increase in ROC AUC=0.007462686567164201\n", "keep: mean smoothness\n", "\n", "testing feature: mean compactness which is feature 5 out of 30\n", "New Test ROC AUC=0.9750714512543665\n", "All features Test ROC AUC=0.9684026675134964\n", "Increase in ROC AUC=0.006668783740870099\n", "keep: mean compactness\n", "\n", "testing feature: mean concavity which is feature 6 out of 30\n", "New Test ROC AUC=0.9933312162591298\n", "All features Test ROC AUC=0.9750714512543665\n", "Increase in ROC AUC=0.01825976500476334\n", "keep: mean concavity\n", "\n", "testing feature: mean concave points which is feature 7 out of 30\n", "New Test ROC AUC=0.9925373134328358\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0007939028262939907\n", "remove: mean concave points\n", "\n", "testing feature: mean symmetry which is feature 8 out of 30\n", "New Test ROC AUC=0.9895204826929185\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0038107335662113107\n", "remove: mean symmetry\n", "\n", "testing feature: mean fractal dimension which is feature 9 out of 30\n", "New Test ROC AUC=0.9892029215624007\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.00412829469672904\n", "remove: mean fractal dimension\n", "\n", "testing feature: radius error which is feature 10 out of 30\n", "New Test ROC AUC=0.9895204826929184\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0038107335662114217\n", "remove: radius error\n", "\n", "testing feature: texture error which is feature 11 out of 30\n", "New Test ROC AUC=0.9868212130835186\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.006510003175611234\n", "remove: texture error\n", "\n", "testing feature: perimeter error which is feature 12 out of 30\n", "New Test ROC AUC=0.9890441409971419\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.004287075261987905\n", "remove: perimeter error\n", "\n", "testing feature: area error which is feature 13 out of 30\n", "New Test ROC AUC=0.989044140997142\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.004287075261987794\n", "remove: area error\n", "\n", "testing feature: smoothness error which is feature 14 out of 30\n", "New Test ROC AUC=0.988091457605589\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.00523975865354076\n", "remove: smoothness error\n", "\n", "testing feature: compactness error which is feature 15 out of 30\n", "New Test ROC AUC=0.9895204826929184\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0038107335662114217\n", "remove: compactness error\n", "\n", "testing feature: concavity error which is feature 16 out of 30\n", "New Test ROC AUC=0.9911082883455065\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0022229279136233293\n", "remove: concavity error\n", "\n", "testing feature: concave points error which is feature 17 out of 30\n", "New Test ROC AUC=0.9906319466497301\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0026992696093997015\n", "remove: concave points error\n", "\n", "testing feature: symmetry error which is feature 18 out of 30\n", "New Test ROC AUC=0.9876151159098127\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0057161003493171325\n", "remove: symmetry error\n", "\n", "testing feature: fractal dimension error which is feature 19 out of 30\n", "New Test ROC AUC=0.9896792632581772\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.003651953000952557\n", "remove: fractal dimension error\n", "\n", "testing feature: worst radius which is feature 20 out of 30\n", "New Test ROC AUC=0.994125119085424\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=0.0007939028262942127\n", "remove: worst radius\n", "\n", "testing feature: worst texture which is feature 21 out of 30\n", "New Test ROC AUC=0.9906319466497301\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0026992696093997015\n", "remove: worst texture\n", "\n", "testing feature: worst perimeter which is feature 22 out of 30\n", "New Test ROC AUC=0.9933312162591299\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=1.1102230246251565e-16\n", "remove: worst perimeter\n", "\n", "testing feature: worst area which is feature 23 out of 30\n", "New Test ROC AUC=0.9931724356938711\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0001587805652586427\n", "remove: worst area\n", "\n", "testing feature: worst smoothness which is feature 24 out of 30\n", "New Test ROC AUC=0.9933312162591299\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=1.1102230246251565e-16\n", "remove: worst smoothness\n", "\n", "testing feature: worst compactness which is feature 25 out of 30\n", "New Test ROC AUC=0.9895204826929184\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=-0.0038107335662114217\n", "remove: worst compactness\n", "\n", "testing feature: worst concavity which is feature 26 out of 30\n", "New Test ROC AUC=0.9938075579549063\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=0.0004763416957764832\n", "remove: worst concavity\n", "\n", "testing feature: worst concave points which is feature 27 out of 30\n", "New Test ROC AUC=0.9971419498253413\n", "All features Test ROC AUC=0.9933312162591298\n", "Increase in ROC AUC=0.0038107335662115327\n", "keep: worst concave points\n", "\n", "testing feature: worst symmetry which is feature 28 out of 30\n", "New Test ROC AUC=0.9957129247380121\n", "All features Test ROC AUC=0.9971419498253413\n", "Increase in ROC AUC=-0.0014290250873292276\n", "remove: worst symmetry\n", "\n", "testing feature: worst fractal dimension which is feature 29 out of 30\n", "New Test ROC AUC=0.9950778024769769\n", "All features Test ROC AUC=0.9971419498253413\n", "Increase in ROC AUC=-0.0020641473483644646\n", "remove: worst fractal dimension\n", "DONE!!\n", "total features to keep: 7\n" ] } ], "source": [ "features_to_keep = hybrid.recursive_feature_addition_rf(X_train=X_train,\n", " y_train=y_train,\n", " X_test=X_test,\n", " y_test=y_test,\n", " tol=0.001)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['mean radius',\n", " 'mean texture',\n", " 'mean perimeter',\n", " 'mean smoothness',\n", " 'mean compactness',\n", " 'mean concavity',\n", " 'worst concave points']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "features_to_keep" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }