{ "cells": [ { "cell_type": "markdown", "id": "37cfe918-891e-4c07-914b-a5c42ff01f12", "metadata": {}, "source": [ "### Data Import" ] }, { "cell_type": "code", "execution_count": 1, "id": "db8bb49a-2e2d-4408-b571-44c7547b463b", "metadata": {}, "outputs": [], "source": [ "# Libraries\n", "import pandas as pd\n", "import numpy as np\n", "import random as rnd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "8d470c8b-9188-45ba-9356-ec22c1b146bf", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3005 entries, 0 to 3004\n", "Columns: 820 entries, ID to CASEDIF\n", "dtypes: float64(93), int64(6), object(721)\n", "memory usage: 18.8+ MB\n" ] } ], "source": [ "# Load up data\n", "pd.set_option('display.max_rows', 10)\n", "df = pd.read_csv('./data.csv')\n", "df.info()" ] }, { "cell_type": "markdown", "id": "6c6fe342-0591-48e4-9d5f-d981d62d50c9", "metadata": {}, "source": [ "# Feature Deletion" ] }, { "cell_type": "code", "execution_count": 3, "id": "6508cb2e-4304-4e2c-a101-79e7caeaffc2", "metadata": {}, "outputs": [], "source": [ "# Features with high amount of missing values\n", "empty_values = df.isna().sum()\n", "total_rows = len(df)\n", "empty_percentages = (empty_values / total_rows) * 100\n", "filtered_empty_values_ab25per = empty_percentages[empty_percentages > 25]\n", "cols_to_drop = filtered_empty_values_ab25per.index.tolist()\n", "df = df.drop(columns = cols_to_drop)" ] }, { "cell_type": "code", "execution_count": 4, "id": "f67860b6-5837-49de-a970-50f40a7e4bc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3005 entries, 0 to 3004\n", "Columns: 556 entries, ID to CASEDIF\n", "dtypes: float64(49), int64(6), object(501)\n", "memory usage: 12.7+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 5, "id": "2020f8b3-bda8-459b-819c-f601c4aa6a53", "metadata": {}, "outputs": [], "source": [ "# Medecine features except for ones with high correlation with depression\n", "medicine_cols_to_keep = ['PSYCHOTHERAGENTS', 'ANTIDEPRESSANTS', 'SSRIANTIDEPRESSA',\n", " 'ANXIOLYTICSSEDAT', 'ANTICONVULSANTS', 'BENZODIAZEPINES']\n", "medicine_cols = [\n", " \"antiinfectives\",\n", " \"amebicides\",\n", " \"antifungals\",\n", " \"antimalarialagen\",\n", " \"antituberagents\",\n", " \"cephalosporins\",\n", " \"leprostatics\",\n", " \"macrolidederivat\",\n", " \"miscantibiotics\",\n", " \"penicillins\",\n", " \"quinolones\",\n", " \"sulfonamides\",\n", " \"tetracyclines\",\n", " \"urinaryantiinfec\",\n", " \"antihyplipagents\",\n", " \"antineoplastics\",\n", " \"alkylatingagents\",\n", " \"antimetabolites\",\n", " \"hormonesantineop\",\n", " \"miscantineoplast\",\n", " \"biologicals\",\n", " \"recombinanthuman\",\n", " \"cardiovascularag\",\n", " \"angiotensinconve\",\n", " \"antiadrenergperi\",\n", " \"antiadrenergcent\",\n", " \"antianginalagent\",\n", " \"antiarrhythmicag\",\n", " \"betaadrenergicbl\",\n", " \"calciumchannelbl\",\n", " \"diuretics\",\n", " \"inotropicagents\",\n", " \"misccardiovascul\",\n", " \"peripheralvasodi\",\n", " \"vasodilators\",\n", " \"vasopressors\",\n", " \"antihypertensive\",\n", " \"angiotensiniiinh\",\n", " \"centralnervoussy\",\n", " \"analgesics\",\n", " \"miscanalgesics\",\n", " \"narcanalgs\",\n", " \"nonsteroidalanti\",\n", " \"salicylates\",\n", " \"analgesiccombina\",\n", " \"anticonvulsants\",\n", " \"antiemeticantive\",\n", " \"antiparkinsonage\",\n", " \"anxiolyticssedat\",\n", " \"barbiturates\",\n", " \"benzodiazepines\",\n", " \"miscanxiolyticss\",\n", " \"cnsstimulants\",\n", " \"musclerelaxants\",\n", " \"miscantidepressa\",\n", " \"miscantipsychoti\",\n", " \"psychothercombin\",\n", " \"misccentralnervo\",\n", " \"coagulationmodif\",\n", " \"anticoagulants\",\n", " \"antiplateletagen\",\n", " \"misccoagulationm\",\n", " \"gastrointestinal\",\n", " \"antacids\",\n", " \"anticholsantispa\",\n", " \"antidiarrheals\",\n", " \"digestiveenzymes\",\n", " \"gallstonesolubil\",\n", " \"gistimulants\",\n", " \"h2antagonists\",\n", " \"laxatives\",\n", " \"miscgiagents\",\n", " \"hormones\",\n", " \"adrenalcorticals\",\n", " \"antidiabeticagen\",\n", " \"mischormones\",\n", " \"sexhormones\",\n", " \"contraceptives\",\n", " \"thyroiddrugs\",\n", " \"immunosuppressiv\",\n", " \"miscagents\",\n", " \"antidotes\",\n", " \"chelatingagents\",\n", " \"cholinergicmuscl\",\n", " \"localinjectablea\",\n", " \"miscuncategorize\",\n", " \"genitourinarytra\",\n", " \"nutritionalprods\",\n", " \"ironproducts\",\n", " \"mineralsandelect\",\n", " \"vitamins\",\n", " \"vitaminmineral\",\n", " \"respiratoryagent\",\n", " \"antihistamines\",\n", " \"antitussives\",\n", " \"bronchodilators\",\n", " \"methylxanthines\",\n", " \"decongestants\",\n", " \"expectorants\",\n", " \"miscrespiratorya\",\n", " \"respiratoryinhal\",\n", " \"upperrespiratory\",\n", " \"topicalagents\",\n", " \"dermatologicalag\",\n", " \"topicalantiinfec\",\n", " \"topicalsteroids\",\n", " \"topicalanestheti\",\n", " \"misctopicalagent\",\n", " \"topicalacneagent\",\n", " \"mouthandthroatpr\",\n", " \"ophthalpreparati\",\n", " \"oticpreparations\",\n", " \"vaginalpreparati\",\n", " \"loopdiuretics\",\n", " \"potassiumsparing\",\n", " \"thiazidediuretic\",\n", " \"carbonicanhydras\",\n", " \"firstgenerationc\",\n", " \"thirdgenerationc\",\n", " \"ophthalantiinfec\",\n", " \"ophthalglaucomaa\",\n", " \"ophthalsteroids\",\n", " \"ophthalsteroidsw\",\n", " \"ophthalantiinfla\",\n", " \"miscophthalagent\",\n", " \"oticsteroidswith\",\n", " \"miscoticagents\",\n", " \"hmgcoareductasei\",\n", " \"miscantihyplipag\",\n", " \"skelmuscrels\",\n", " \"adrenergicbronch\",\n", " \"bronchodilatorco\",\n", " \"androgensandanab\",\n", " \"estrogens\",\n", " \"progestins\",\n", " \"sexhormonecombin\",\n", " \"narcanalgcombina\",\n", " \"antirheumatics\",\n", " \"antimigraineagen\",\n", " \"antigoutagents\",\n", " \"fiveht3receptora\",\n", " \"phenthiazantieme\",\n", " \"anticholantiemet\",\n", " \"miscantiemetics\",\n", " \"hydantoinanticon\",\n", " \"barbiturateantic\",\n", " \"benzodiazepinean\",\n", " \"miscanticonvulsa\",\n", " \"anticholantipark\",\n", " \"ssriantidepressa\",\n", " \"tricyclicantidep\",\n", " \"phenthiazantipsy\",\n", " \"plateletaggregat\",\n", " \"sulfonylureas\",\n", " \"nonsulfonylureas\",\n", " \"insulin\",\n", " \"alphaglucosidase\",\n", " \"bisphosphonates\",\n", " \"alternativemeds\",\n", " \"nutraceuticals\",\n", " \"herbalproducts\",\n", " \"penicillinaseres\",\n", " \"aminopenicillins\",\n", " \"betalactamaseinh\",\n", " \"adamantaneantivi\",\n", " \"purinenucleoside\",\n", " \"miscantituberage\",\n", " \"polyenes\",\n", " \"azoleantifungals\",\n", " \"miscantifungals\",\n", " \"antimalarialquin\",\n", " \"miscantimalarial\",\n", " \"lincomycinderiva\",\n", " \"fibricacidderiva\",\n", " \"psychotheragents\",\n", " \"leukotrienemodif\",\n", " \"nasallubricants\",\n", " \"nasalsteroids\",\n", " \"nasalantihistami\",\n", " \"nasalpreparation\",\n", " \"antidepressants\",\n", " \"monoamineoxidase\",\n", " \"antipsychotics\",\n", " \"bileacidsequestr\",\n", " \"anorexiants\",\n", " \"immunologicagent\",\n", " \"monoclonalantibo\",\n", " \"heparins\",\n", " \"coumarinsandinda\",\n", " \"impotenceagents\",\n", " \"urinaryantispasm\",\n", " \"urinaryphmodifie\",\n", " \"miscgenitourinar\",\n", " \"ophthalantihista\",\n", " \"miscvaginalagent\",\n", " \"antipsoriatics\",\n", " \"thiazolidinedion\",\n", " \"protonpumpinhibi\",\n", " \"cardioselectiveb\",\n", " \"noncardioselecti\",\n", " \"dopaminergicanti\",\n", " \"fiveaminosalic\",\n", " \"cox2inhibitors\",\n", " \"meglitinides\",\n", " \"fivealphareducti\",\n", " \"antihyperuricemi\",\n", " \"topicalantibioti\",\n", " \"topicalantifunga\",\n", " \"inhaledcorticost\",\n", " \"mastcellstabiliz\",\n", " \"anticholbronchod\",\n", " \"glucocorticoids\",\n", " \"mineralocorticoi\",\n", " \"agentsforpulmona\",\n", " \"macrolides\",\n", " \"ketolides\",\n", " \"phenylpiperazine\",\n", " \"tetracyclicantid\",\n", " \"ssnriantidepress\",\n", " \"miscantidiabetic\",\n", " \"dibenzazepineant\",\n", " \"cholinergicagoni\",\n", " \"cholinesterasein\",\n", " \"antidiabeticcomb\",\n", " \"cholesterolabsor\",\n", " \"antihyplipcombin\",\n", " \"smokingcessation\",\n", " \"othersupplements\"\n", "]\n", "\n", "medicine_cols_to_drop = [x.upper() for x in medicine_cols]\n", " \n", "medicine_cols_to_drop = [element for element in medicine_cols_to_drop if element not in medicine_cols_to_keep]\n", "\n", "df = df.drop(columns = medicine_cols_to_drop)" ] }, { "cell_type": "code", "execution_count": 6, "id": "1b6f4c04-d4af-48cb-941e-d206c5fea6ff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3005 entries, 0 to 3004\n", "Columns: 334 entries, ID to CASEDIF\n", "dtypes: float64(49), int64(6), object(279)\n", "memory usage: 7.7+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 7, "id": "a282ca80-1dfd-41a6-8773-67fbc747e254", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Delete useless features\n", "useless_cols = ['ID', 'FI_ID', 'PATH', 'VERSION', 'INT_START',\n", " 'WEIGHT_SEL', 'WEIGHT_ADJ', 'STRATUM', \n", " 'CLUSTER']\n", "\n", "df = df.drop(columns = useless_cols)" ] }, { "cell_type": "code", "execution_count": 8, "id": "33bce8f0-e10f-4adf-8775-a36b202e41ab", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3005 entries, 0 to 3004\n", "Columns: 325 entries, GENDER to CASEDIF\n", "dtypes: float64(47), int64(1), object(277)\n", "memory usage: 7.5+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "id": "d8130d58-98ce-456e-8992-545b737fae14", "metadata": {}, "source": [ "# Depression Scale Creation and Entry Cleaning" ] }, { "cell_type": "code", "execution_count": 9, "id": "4315d3d1-daa1-4add-a891-8b420b21faba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RESTLES 222\n", "CONFIDNT 216\n", "WORRY 212\n", "RELAXED 211\n", "FLTEFF 13\n", "NOTGETGO 10\n", "FLTDEP 9\n", "NOSLEEP 7\n", "NOTEAT 7\n", "dtype: int64\n" ] } ], "source": [ "df_pure = df\n", "\n", "symptoms_array = [\n", " \"NOTGETGO\", # Difficulty getting going\n", " \"FLTDEP\", # Feeling of deep sadness or emptiness\n", " \"NOSLEEP\", # Insomnia or sleeping too much\n", " \"RESTLES\", # Feeling restless\n", " \"NOTEAT\", # Changes in appetite or weight\n", " \"CONFIDNT\", # Lack of confidence\n", " \"FLTEFF\", # Feeling things are out of control\n", " \"RELAXED\", # Unable to feel relaxed\n", " \"WORRY\" # Worrying thoughts\n", "]\n", "\n", "df_phq9 = df_pure[symptoms_array]\n", "\n", "missing_counts = df_phq9.isna().sum()\n", "missing_counts_sorted = missing_counts.sort_values(ascending=False)\n", "\n", "# Print the sorted counts\n", "print(missing_counts_sorted)" ] }, { "cell_type": "code", "execution_count": 10, "id": "98932270-aeae-4758-a9d9-5234087e6734", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3005 entries, 0 to 3004\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NOTGETGO 2995 non-null object\n", " 1 FLTDEP 2996 non-null object\n", " 2 NOSLEEP 2998 non-null object\n", " 3 RESTLES 2783 non-null object\n", " 4 NOTEAT 2998 non-null object\n", " 5 CONFIDNT 2789 non-null object\n", " 6 FLTEFF 2992 non-null object\n", " 7 RELAXED 2794 non-null object\n", " 8 WORRY 2793 non-null object\n", "dtypes: object(9)\n", "memory usage: 211.4+ KB\n" ] } ], "source": [ "df_phq9.info()" ] }, { "cell_type": "code", "execution_count": 11, "id": "7a9e9234-32bd-4d21-8017-db9ef7354bd9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 2763 entries, 0 to 3004\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NOTGETGO 2763 non-null object\n", " 1 FLTDEP 2763 non-null object\n", " 2 NOSLEEP 2763 non-null object\n", " 3 RESTLES 2763 non-null object\n", " 4 NOTEAT 2763 non-null object\n", " 5 CONFIDNT 2763 non-null object\n", " 6 FLTEFF 2763 non-null object\n", " 7 RELAXED 2763 non-null object\n", " 8 WORRY 2763 non-null object\n", "dtypes: object(9)\n", "memory usage: 215.9+ KB\n" ] } ], "source": [ "# Delete all entries that has a missing value in one of these features\n", "df_phq9_2 = df_phq9.copy()\n", "df_phq9_2 = df_phq9_2.dropna()\n", "df_phq9_2.info() # los around 244 entries from that" ] }, { "cell_type": "code", "execution_count": 12, "id": "5ba060e8-46e9-4f4f-a680-33660116e35e", "metadata": {}, "outputs": [], "source": [ "# Making depression scale\n", "from sklearn.preprocessing import LabelEncoder\n", "label_encoder = LabelEncoder()\n", "\n", "for col in df_phq9_2.columns:\n", " df_phq9_2.loc[:, col] = label_encoder.fit_transform(df_phq9_2[col])\n", "\n", "# Need to reverse the order from bad-good to good-bad\n", "df_phq9_2.loc[:, 'CONFIDNT'] = df_phq9_2['CONFIDNT'].apply(lambda x: 3 - x)\n", "df_phq9_2.loc[:, 'RELAXED'] = df_phq9_2['RELAXED'].apply(lambda x: 3 - x)\n", "\n", "df_phq9_2['total_sum'] = df_phq9_2.sum(axis = 1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "c7684ba0-4a07-4cf4-a7a6-3d2087e5e125", "metadata": {}, "outputs": [], "source": [ "# Categorize depression\n", "def categorize_score(score):\n", " if 0 <= score <= 4:\n", " return 'Normal'\n", " elif 5 <= score <= 9:\n", " return 'Mild'\n", " elif 10 <= score <= 27:\n", " return 'ModerateSevere'\n", "\n", "# Applying the categorization function to the 'Total_Sum' column\n", "df_phq9_2['depression_category'] = df_phq9_2['total_sum'].apply(categorize_score)" ] }, { "cell_type": "code", "execution_count": 14, "id": "eb79389e-0a1e-4f8f-b27e-efb320610038", "metadata": {}, "outputs": [], "source": [ "# Seperate by Category\n", "df_phq9_normal = df_phq9_2[df_phq9_2['depression_category'] == 'Normal']\n", "df_phq9_mild = df_phq9_2[df_phq9_2['depression_category'] == 'Mild']\n", "df_phq9_moderatesevere = df_phq9_2[df_phq9_2['depression_category'] == 'ModerateSevere']" ] }, { "cell_type": "code", "execution_count": 15, "id": "c03612d6-788b-4cdd-85f6-8e8e7c541956", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 1308 entries, 0 to 3003\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NOTGETGO 1308 non-null object\n", " 1 FLTDEP 1308 non-null object\n", " 2 NOSLEEP 1308 non-null object\n", " 3 RESTLES 1308 non-null object\n", " 4 NOTEAT 1308 non-null object\n", " 5 CONFIDNT 1308 non-null object\n", " 6 FLTEFF 1308 non-null object\n", " 7 RELAXED 1308 non-null object\n", " 8 WORRY 1308 non-null object\n", " 9 total_sum 1308 non-null object\n", " 10 depression_category 1308 non-null object\n", "dtypes: object(11)\n", "memory usage: 122.6+ KB\n", "\n", "Index: 940 entries, 2 to 3004\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NOTGETGO 940 non-null object\n", " 1 FLTDEP 940 non-null object\n", " 2 NOSLEEP 940 non-null object\n", " 3 RESTLES 940 non-null object\n", " 4 NOTEAT 940 non-null object\n", " 5 CONFIDNT 940 non-null object\n", " 6 FLTEFF 940 non-null object\n", " 7 RELAXED 940 non-null object\n", " 8 WORRY 940 non-null object\n", " 9 total_sum 940 non-null object\n", " 10 depression_category 940 non-null object\n", "dtypes: object(11)\n", "memory usage: 88.1+ KB\n", "\n", "Index: 515 entries, 3 to 3000\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NOTGETGO 515 non-null object\n", " 1 FLTDEP 515 non-null object\n", " 2 NOSLEEP 515 non-null object\n", " 3 RESTLES 515 non-null object\n", " 4 NOTEAT 515 non-null object\n", " 5 CONFIDNT 515 non-null object\n", " 6 FLTEFF 515 non-null object\n", " 7 RELAXED 515 non-null object\n", " 8 WORRY 515 non-null object\n", " 9 total_sum 515 non-null object\n", " 10 depression_category 515 non-null object\n", "dtypes: object(11)\n", "memory usage: 48.3+ KB\n" ] } ], "source": [ "df_phq9_normal.info()\n", "df_phq9_mild.info()\n", "df_phq9_moderatesevere.info()" ] }, { "cell_type": "code", "execution_count": 16, "id": "7b8ab44d-aab2-4b5c-ba3e-e1eabd3d7239", "metadata": {}, "outputs": [], "source": [ "# Connect with original dataset\n", "mild_indices = df_phq9_mild.index\n", "\n", "df_mild_connection = df_pure.loc[mild_indices]\n", "df_mild_connection['depression_category'] = 'mild'\n", "df_mild_connection\n", "\n", "moderatesevere_indices = df_phq9_moderatesevere.index\n", "\n", "df_moderatesevere_connection = df_pure.loc[moderatesevere_indices]\n", "df_moderatesevere_connection['depression_category'] = 'moderatesevere'\n", "df_moderatesevere_connection\n", "\n", "normal_indices = df_phq9_normal.index\n", "\n", "df_normal_connection = df_pure.loc[normal_indices]\n", "df_normal_connection['depression_category'] = 'normal'\n", "df_normal_connection\n", "\n", "df_appended = pd.concat([df_normal_connection, df_mild_connection, df_moderatesevere_connection], ignore_index=False)\n", "\n", "symptoms_array = [\n", " \"NOTGETGO\", # Difficulty getting going\n", " \"FLTDEP\", # Feeling of deep sadness or emptiness\n", " \"NOSLEEP\", # Insomnia or sleeping too much\n", " \"RESTLES\", # Feeling restless\n", " \"NOTEAT\", # Changes in appetite or weight\n", " \"CONFIDNT\", # Lack of confidence\n", " \"UNCNTRL\", # Feeling things are out of control\n", " \"RELAXED\", # Unable to feel relaxed\n", " \"WORRY\" # Worrying thoughts\n", "]\n", "\n", "df_appended = df_appended.drop(columns = symptoms_array)\n", "df_appended['total_sum'] = df_phq9_2['total_sum']" ] }, { "cell_type": "code", "execution_count": 17, "id": "d6d0ae13-8b6a-466f-af99-571509ef1201", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERAGEAGEGRPDEGREE_RECODEEDUCRACE_RECODEHISPANICETHGRPMILITARYJAIL...IWLOC5IWLOC6STRUCTQBUILDOTBUILDCOMBUILDCASECOMPCASEDIFdepression_categorytotal_sum
0(2) female62(1) 57-64(5) masters(4) bachelors or more(1) white/caucasian(0) no(1) white(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family house(4) very well kept(3) fairly well kept (needs cosmetic work)(3) average(11) eleventh case or more(2) somewhat difficultnormal2
1(2) female79(3) 75-85(2) high school diploma/equivalency(3) voc cert/some college/assoc(1) white/caucasian(0) no(1) white(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family house(4) very well kept(3) fairly well kept (needs cosmetic work)(4) above average(11) eleventh case or more(3) not very difficultnormal3
17(1) male58(1) 57-64(4) bachelors(4) bachelors or more(1) white/caucasian(0) no(1) white(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family house(3) fairly well kept (needs cosmetic work)(3) fairly well kept (needs cosmetic work)(3) average(11) eleventh case or more(2) somewhat difficultnormal2
24(1) male79(3) 75-85(5) masters(4) bachelors or more(1) white/caucasian(0) no(1) white(1) yes(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family house(4) very well kept(4) very well kept(5) far above average(08) eighth case(2) somewhat difficultnormal4
26(2) female68(2) 65-74(6) law, md or phd(4) bachelors or more(1) white/caucasian(0) no(1) white(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family house(4) very well kept(3) fairly well kept (needs cosmetic work)(4) above average(11) eleventh case or more(2) somewhat difficultnormal3
..................................................................
2988(2) female61(1) 57-64(1) none(1) < hs(1) white/caucasian(1) yes(3) hispanic, non-black(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(01) trailer(3) fairly well kept (needs cosmetic work)(1) very poorly kept (needs major repairs)(4) above average(11) eleventh case or more(3) not very difficultmoderatesevere14
2991(2) female70(2) 65-74(1) none(1) < hs(3) asian, pacific islander, american indian o...(1) yes(3) hispanic, non-black(0) no(0) no...(1) 1 (quiet)(1) 1 (no smell)(02) detached single family houseNaNNaNNaN(11) eleventh case or more(3) not very difficultmoderatesevere11
2992(2) female70(2) 65-74(3) associates(3) voc cert/some college/assoc(1) white/caucasian(0) no(1) white(0) no(0) no...(1) 1 (quiet)(4) 4(02) detached single family house(3) fairly well kept (needs cosmetic work)(3) fairly well kept (needs cosmetic work)(3) average(11) eleventh case or more(4) not at all difficultmoderatesevere10
2993(1) male63(1) 57-64(4) bachelors(4) bachelors or more(1) white/caucasian(0) no(1) white(0) no(0) no...(2) 2(1) 1 (no smell)(02) detached single family house(4) very well kept(4) very well kept(3) average(07) seventh case(4) not at all difficultmoderatesevere11
3000(2) female73(2) 65-74(3) associates(3) voc cert/some college/assoc(1) white/caucasian(0) no(1) white(0) no(0) no...(2) 2(2) 2(02) detached single family house(4) very well kept(4) very well kept(3) average(05) fifth case(3) not very difficultmoderatesevere17
\n", "

2763 rows × 318 columns

\n", "
" ], "text/plain": [ " GENDER AGE AGEGRP DEGREE_RECODE \\\n", "0 (2) female 62 (1) 57-64 (5) masters \n", "1 (2) female 79 (3) 75-85 (2) high school diploma/equivalency \n", "17 (1) male 58 (1) 57-64 (4) bachelors \n", "24 (1) male 79 (3) 75-85 (5) masters \n", "26 (2) female 68 (2) 65-74 (6) law, md or phd \n", "... ... ... ... ... \n", "2988 (2) female 61 (1) 57-64 (1) none \n", "2991 (2) female 70 (2) 65-74 (1) none \n", "2992 (2) female 70 (2) 65-74 (3) associates \n", "2993 (1) male 63 (1) 57-64 (4) bachelors \n", "3000 (2) female 73 (2) 65-74 (3) associates \n", "\n", " EDUC \\\n", "0 (4) bachelors or more \n", "1 (3) voc cert/some college/assoc \n", "17 (4) bachelors or more \n", "24 (4) bachelors or more \n", "26 (4) bachelors or more \n", "... ... \n", "2988 (1) < hs \n", "2991 (1) < hs \n", "2992 (3) voc cert/some college/assoc \n", "2993 (4) bachelors or more \n", "3000 (3) voc cert/some college/assoc \n", "\n", " RACE_RECODE HISPANIC \\\n", "0 (1) white/caucasian (0) no \n", "1 (1) white/caucasian (0) no \n", "17 (1) white/caucasian (0) no \n", "24 (1) white/caucasian (0) no \n", "26 (1) white/caucasian (0) no \n", "... ... ... \n", "2988 (1) white/caucasian (1) yes \n", "2991 (3) asian, pacific islander, american indian o... (1) yes \n", "2992 (1) white/caucasian (0) no \n", "2993 (1) white/caucasian (0) no \n", "3000 (1) white/caucasian (0) no \n", "\n", " ETHGRP MILITARY JAIL ... IWLOC5 \\\n", "0 (1) white (0) no (0) no ... (1) 1 (quiet) \n", "1 (1) white (0) no (0) no ... (1) 1 (quiet) \n", "17 (1) white (0) no (0) no ... (1) 1 (quiet) \n", "24 (1) white (1) yes (0) no ... (1) 1 (quiet) \n", "26 (1) white (0) no (0) no ... (1) 1 (quiet) \n", "... ... ... ... ... ... \n", "2988 (3) hispanic, non-black (0) no (0) no ... (1) 1 (quiet) \n", "2991 (3) hispanic, non-black (0) no (0) no ... (1) 1 (quiet) \n", "2992 (1) white (0) no (0) no ... (1) 1 (quiet) \n", "2993 (1) white (0) no (0) no ... (2) 2 \n", "3000 (1) white (0) no (0) no ... (2) 2 \n", "\n", " IWLOC6 STRUCTQ \\\n", "0 (1) 1 (no smell) (02) detached single family house \n", "1 (1) 1 (no smell) (02) detached single family house \n", "17 (1) 1 (no smell) (02) detached single family house \n", "24 (1) 1 (no smell) (02) detached single family house \n", "26 (1) 1 (no smell) (02) detached single family house \n", "... ... ... \n", "2988 (1) 1 (no smell) (01) trailer \n", "2991 (1) 1 (no smell) (02) detached single family house \n", "2992 (4) 4 (02) detached single family house \n", "2993 (1) 1 (no smell) (02) detached single family house \n", "3000 (2) 2 (02) detached single family house \n", "\n", " BUILD \\\n", "0 (4) very well kept \n", "1 (4) very well kept \n", "17 (3) fairly well kept (needs cosmetic work) \n", "24 (4) very well kept \n", "26 (4) very well kept \n", "... ... \n", "2988 (3) fairly well kept (needs cosmetic work) \n", "2991 NaN \n", "2992 (3) fairly well kept (needs cosmetic work) \n", "2993 (4) very well kept \n", "3000 (4) very well kept \n", "\n", " OTBUILD COMBUILD \\\n", "0 (3) fairly well kept (needs cosmetic work) (3) average \n", "1 (3) fairly well kept (needs cosmetic work) (4) above average \n", "17 (3) fairly well kept (needs cosmetic work) (3) average \n", "24 (4) very well kept (5) far above average \n", "26 (3) fairly well kept (needs cosmetic work) (4) above average \n", "... ... ... \n", "2988 (1) very poorly kept (needs major repairs) (4) above average \n", "2991 NaN NaN \n", "2992 (3) fairly well kept (needs cosmetic work) (3) average \n", "2993 (4) very well kept (3) average \n", "3000 (4) very well kept (3) average \n", "\n", " CASECOMP CASEDIF \\\n", "0 (11) eleventh case or more (2) somewhat difficult \n", "1 (11) eleventh case or more (3) not very difficult \n", "17 (11) eleventh case or more (2) somewhat difficult \n", "24 (08) eighth case (2) somewhat difficult \n", "26 (11) eleventh case or more (2) somewhat difficult \n", "... ... ... \n", "2988 (11) eleventh case or more (3) not very difficult \n", "2991 (11) eleventh case or more (3) not very difficult \n", "2992 (11) eleventh case or more (4) not at all difficult \n", "2993 (07) seventh case (4) not at all difficult \n", "3000 (05) fifth case (3) not very difficult \n", "\n", " depression_category total_sum \n", "0 normal 2 \n", "1 normal 3 \n", "17 normal 2 \n", "24 normal 4 \n", "26 normal 3 \n", "... ... ... \n", "2988 moderatesevere 14 \n", "2991 moderatesevere 11 \n", "2992 moderatesevere 10 \n", "2993 moderatesevere 11 \n", "3000 moderatesevere 17 \n", "\n", "[2763 rows x 318 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_appended" ] }, { "cell_type": "code", "execution_count": 18, "id": "a25493c0-42b4-4ce4-8fb2-c3579ec28253", "metadata": {}, "outputs": [], "source": [ "# Export for regression\n", "df_appended.to_csv('3labelv4Regression.csv', index = False)" ] }, { "cell_type": "code", "execution_count": 19, "id": "4946608b-95e0-4752-b35b-a895d0a85763", "metadata": {}, "outputs": [], "source": [ "# Export for classification\n", "df_appended_classification = df_appended.drop('total_sum', axis = 1)\n", "df_appended_classification.to_csv('3labelv4Classification.csv', index = False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }