{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "# import seaborn as sns\n", "# import matplotlib.pyplot as plt\n", "import os\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import roc_curve, roc_auc_score\n", "\n", "# plt.style.use('seaborn-colorblind')\n", "# %matplotlib inline\n", "#from feature_cleaning import rare_values as ra" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "use_cols = [\n", " 'Pclass', 'Sex', 'Age', 'Fare', 'SibSp',\n", " 'Survived'\n", "]\n", "\n", "data = pd.read_csv('./data/titanic.csv', usecols=use_cols)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Survived | \n", "Pclass | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Fare | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "3 | \n", "male | \n", "22.0 | \n", "1 | \n", "7.2500 | \n", "
| 1 | \n", "1 | \n", "1 | \n", "female | \n", "38.0 | \n", "1 | \n", "71.2833 | \n", "
| 2 | \n", "1 | \n", "3 | \n", "female | \n", "26.0 | \n", "0 | \n", "7.9250 | \n", "