{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d114e576", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "\n", "df = sns.load_dataset('titanic')" ] }, { "cell_type": "code", "execution_count": 2, "id": "62e20e15", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
................................................
88602male27.00013.0000SSecondmanTrueNaNSouthamptonnoTrue
88711female19.00030.0000SFirstwomanFalseBSouthamptonyesTrue
88803femaleNaN1223.4500SThirdwomanFalseNaNSouthamptonnoFalse
88911male26.00030.0000CFirstmanTrueCCherbourgyesTrue
89003male32.0007.7500QThirdmanTrueNaNQueenstownnoTrue
\n", "

891 rows × 15 columns

\n", "
" ], "text/plain": [ " survived pclass sex age sibsp parch fare embarked class \\\n", "0 0 3 male 22.0 1 0 7.2500 S Third \n", "1 1 1 female 38.0 1 0 71.2833 C First \n", "2 1 3 female 26.0 0 0 7.9250 S Third \n", "3 1 1 female 35.0 1 0 53.1000 S First \n", "4 0 3 male 35.0 0 0 8.0500 S Third \n", ".. ... ... ... ... ... ... ... ... ... \n", "886 0 2 male 27.0 0 0 13.0000 S Second \n", "887 1 1 female 19.0 0 0 30.0000 S First \n", "888 0 3 female NaN 1 2 23.4500 S Third \n", "889 1 1 male 26.0 0 0 30.0000 C First \n", "890 0 3 male 32.0 0 0 7.7500 Q Third \n", "\n", " who adult_male deck embark_town alive alone \n", "0 man True NaN Southampton no False \n", "1 woman False C Cherbourg yes False \n", "2 woman False NaN Southampton yes True \n", "3 woman False C Southampton yes False \n", "4 man True NaN Southampton no True \n", ".. ... ... ... ... ... ... \n", "886 man True NaN Southampton no True \n", "887 woman False B Southampton yes True \n", "888 woman False NaN Southampton no False \n", "889 man True C Cherbourg yes True \n", "890 man True NaN Queenstown no True \n", "\n", "[891 rows x 15 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 3, "id": "0e8d87f9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',\n", " 'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',\n", " 'alive', 'alone'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 4, "id": "53c30023", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexageparchfare
003male22.007.2500
111female38.0071.2833
213female26.007.9250
311female35.0053.1000
403male35.008.0500
.....................
88602male27.0013.0000
88711female19.0030.0000
88803femaleNaN223.4500
88911male26.0030.0000
89003male32.007.7500
\n", "

891 rows × 6 columns

\n", "
" ], "text/plain": [ " survived pclass sex age parch fare\n", "0 0 3 male 22.0 0 7.2500\n", "1 1 1 female 38.0 0 71.2833\n", "2 1 3 female 26.0 0 7.9250\n", "3 1 1 female 35.0 0 53.1000\n", "4 0 3 male 35.0 0 8.0500\n", ".. ... ... ... ... ... ...\n", "886 0 2 male 27.0 0 13.0000\n", "887 1 1 female 19.0 0 30.0000\n", "888 0 3 female NaN 2 23.4500\n", "889 1 1 male 26.0 0 30.0000\n", "890 0 3 male 32.0 0 7.7500\n", "\n", "[891 rows x 6 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.drop(columns=['who', 'adult_male','alive','sibsp','alone','embark_town','embarked','deck','class'])\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "id": "3223a012", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['male', 'female'], dtype=object)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['sex'].unique()" ] }, { "cell_type": "code", "execution_count": 6, "id": "d86ef363", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexageparchfare
003022.007.2500
111138.0071.2833
213126.007.9250
311135.0053.1000
403035.008.0500
.....................
88602027.0013.0000
88711119.0030.0000
888031NaN223.4500
88911026.0030.0000
89003032.007.7500
\n", "

891 rows × 6 columns

\n", "
" ], "text/plain": [ " survived pclass sex age parch fare\n", "0 0 3 0 22.0 0 7.2500\n", "1 1 1 1 38.0 0 71.2833\n", "2 1 3 1 26.0 0 7.9250\n", "3 1 1 1 35.0 0 53.1000\n", "4 0 3 0 35.0 0 8.0500\n", ".. ... ... ... ... ... ...\n", "886 0 2 0 27.0 0 13.0000\n", "887 1 1 1 19.0 0 30.0000\n", "888 0 3 1 NaN 2 23.4500\n", "889 1 1 0 26.0 0 30.0000\n", "890 0 3 0 32.0 0 7.7500\n", "\n", "[891 rows x 6 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['sex'] = df['sex'].map({'male':0,'female':1})\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "id": "c4e3253f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "survived 0\n", "pclass 0\n", "sex 0\n", "age 177\n", "parch 0\n", "fare 0\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 8, "id": "6329f3d8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 714.000000\n", "mean 29.699118\n", "std 14.526497\n", "min 0.420000\n", "25% 20.125000\n", "50% 28.000000\n", "75% 38.000000\n", "max 80.000000\n", "Name: age, dtype: float64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['age'].describe()" ] }, { "cell_type": "code", "execution_count": 9, "id": "8fe71869", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28.0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['age'].median()" ] }, { "cell_type": "code", "execution_count": 10, "id": "ff80b834", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "survived 0\n", "pclass 0\n", "sex 0\n", "age 0\n", "parch 0\n", "fare 0\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['age'] = df['age'].fillna(30)\n", "\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 11, "id": "a1920396", "metadata": {}, "outputs": [], "source": [ "x = df.drop(columns=['survived']) #features\n", "\n", "y = df['survived'] #target" ] }, { "cell_type": "code", "execution_count": 12, "id": "a04b824d", "metadata": {}, "outputs": [], "source": [ "# importing ensemble model" ] }, { "cell_type": "code", "execution_count": 13, "id": "ce1742de", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 14, "id": "5d4b0678", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 15, "id": "e0dec475", "metadata": {}, "outputs": [], "source": [ "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)" ] }, { "cell_type": "code", "execution_count": 24, "id": "205ddbfe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssexageparchfare
3311045.5028.5000
7332023.0013.0000
3823032.007.9250
7043026.007.8542
813316.0231.2750
..................
1063121.007.6500
2701030.0031.0000
8603041.0014.1083
4351114.02120.0000
1021021.0177.2875
\n", "

712 rows × 5 columns

\n", "
" ], "text/plain": [ " pclass sex age parch fare\n", "331 1 0 45.5 0 28.5000\n", "733 2 0 23.0 0 13.0000\n", "382 3 0 32.0 0 7.9250\n", "704 3 0 26.0 0 7.8542\n", "813 3 1 6.0 2 31.2750\n", ".. ... ... ... ... ...\n", "106 3 1 21.0 0 7.6500\n", "270 1 0 30.0 0 31.0000\n", "860 3 0 41.0 0 14.1083\n", "435 1 1 14.0 2 120.0000\n", "102 1 0 21.0 1 77.2875\n", "\n", "[712 rows x 5 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train" ] }, { "cell_type": "raw", "id": "4c79ec9f", "metadata": {}, "source": [ "len(x_train)" ] }, { "cell_type": "code", "execution_count": 17, "id": "36a7215d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "179" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(x_test)" ] }, { "cell_type": "code", "execution_count": 18, "id": "d67f0925", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(max_depth=7, n_estimators=50, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier(max_depth=7, n_estimators=50, random_state=42)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf = RandomForestClassifier(n_estimators=50,random_state=42,max_depth=7)\n", "\n", "rf.fit(x_train,y_train)" ] }, { "cell_type": "markdown", "id": "e7d6c68c", "metadata": {}, "source": [ "- n_estimators: number of trees\n", "- random_state: fixing the selection\n", "- max_depth: tree depth (level)" ] }, { "cell_type": "code", "execution_count": 19, "id": "0a9b3ca8", "metadata": {}, "outputs": [], "source": [ "y_pred = rf.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 20, "id": "6f63656d", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report" ] }, { "cell_type": "code", "execution_count": 21, "id": "ce9e2bd6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.79 0.90 0.84 105\n", " 1 0.82 0.66 0.73 74\n", "\n", " accuracy 0.80 179\n", " macro avg 0.80 0.78 0.79 179\n", "weighted avg 0.80 0.80 0.79 179\n", "\n" ] } ], "source": [ "cr = classification_report(y_test,y_pred)\n", "\n", "print(cr)" ] }, { "cell_type": "code", "execution_count": 22, "id": "0897a811", "metadata": {}, "outputs": [], "source": [ "from joblib import dump" ] }, { "cell_type": "code", "execution_count": 23, "id": "0c4a3032", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['model.joblib']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dump(rf,\"model.joblib\")" ] }, { "cell_type": "code", "execution_count": 25, "id": "920011aa", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\uwais\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:465: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "array([0], dtype=int64)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf.predict([[1,0,45,2,120]])" ] }, { "cell_type": "code", "execution_count": null, "id": "0504f1f2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }