{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "d8af933e4af4461c9130f2f39b028193": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [], "layout": "IPY_MODEL_854110ca3fea4201b3e89e4b31346e33" } }, "949502f1dad24c19b8ed255837e65480": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4d1c3e5d2d184895abc8e7dec6a133bc", "placeholder": "​", "style": "IPY_MODEL_f366bd24f8cb407b84e031e5f0d90ab5", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "f9a32d79db034bbc9221e714bb3631be": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_254b3a685431419c8ef166ff1ff8c594", "placeholder": "​", "style": "IPY_MODEL_781a4e29ef364e54a9092d786835b83a", "value": "" } }, "7b1ee4714b65418baa1b6b97358923e0": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_3ae72b8b08ab44148b759ef65e5dad0d", "style": "IPY_MODEL_1a081370130641d696b39165b8c87963", "value": true } }, "e34237ef65fb4157b63d45be1f61ab98": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_2ceeb26183604902b8d9cb8e7a256d05", "style": "IPY_MODEL_e2ed39391a0f481fb104c74a1511c40c", "tooltip": "" } }, "faf6520ec0f8433da1795094a8679490": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8b713f9ee3e0451d91a4d7653aca8be5", "placeholder": "​", "style": "IPY_MODEL_e186852a14bc49ccada899ea8d25ed29", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "854110ca3fea4201b3e89e4b31346e33": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "4d1c3e5d2d184895abc8e7dec6a133bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f366bd24f8cb407b84e031e5f0d90ab5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "254b3a685431419c8ef166ff1ff8c594": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "781a4e29ef364e54a9092d786835b83a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3ae72b8b08ab44148b759ef65e5dad0d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1a081370130641d696b39165b8c87963": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2ceeb26183604902b8d9cb8e7a256d05": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e2ed39391a0f481fb104c74a1511c40c": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "8b713f9ee3e0451d91a4d7653aca8be5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e186852a14bc49ccada899ea8d25ed29": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1145ce7e08b7470d98ca8e827fa49b20": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_abd184653ad54787a12d708c89bec8cd", "placeholder": "​", "style": "IPY_MODEL_afd1ed836ca941188e97bb6bacedc1ab", "value": "Connecting..." } }, "abd184653ad54787a12d708c89bec8cd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "afd1ed836ca941188e97bb6bacedc1ab": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Lectura de los diversos Datasets" ], "metadata": { "id": "WUrdxYVGkCnz" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "# Ingesta de los 2 Ficheros\n", "df_train = pd.read_csv('application_train.csv')\n", "df_column_description = pd.read_csv('HomeCredit_columns_description.csv', encoding='latin1')" ], "metadata": { "id": "okRCiCTJkHVt" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Lectura Inicial del Fichero Train" ], "metadata": { "id": "Ea5mH3VWqN-4" } }, { "cell_type": "code", "source": [ "df_train.head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 412 }, "id": "aNnsaGLAqQe9", "outputId": "62b8e1e8-1417-4ad5-b951-f76d70521446" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "0 100002 1 Cash loans M N \n", "1 100003 0 Cash loans F N \n", "2 100004 0 Revolving loans M Y \n", "3 100006 0 Cash loans F N \n", "4 100007 0 Cash loans M N \n", "5 100008 0 Cash loans M N \n", "6 100009 0 Cash loans F Y \n", "7 100010 0 Cash loans M Y \n", "8 100011 0 Cash loans F N \n", "9 100012 0 Revolving loans M N \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY \\\n", "0 Y 0 202500.0 406597.5 24700.5 \n", "1 N 0 270000.0 1293502.5 35698.5 \n", "2 Y 0 67500.0 135000.0 6750.0 \n", "3 Y 0 135000.0 312682.5 29686.5 \n", "4 Y 0 121500.0 513000.0 21865.5 \n", "5 Y 0 99000.0 490495.5 27517.5 \n", "6 Y 1 171000.0 1560726.0 41301.0 \n", "7 Y 0 360000.0 1530000.0 42075.0 \n", "8 Y 0 112500.0 1019610.0 33826.5 \n", "9 Y 0 135000.0 405000.0 20250.0 \n", "\n", " ... FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 \\\n", "0 ... 0 0 0 0 \n", "1 ... 0 0 0 0 \n", "2 ... 0 0 0 0 \n", "3 ... 0 0 0 0 \n", "4 ... 0 0 0 0 \n", "5 ... 0 0 0 0 \n", "6 ... 0 0 0 0 \n", "7 ... 0 0 0 0 \n", "8 ... 0 0 0 0 \n", "9 ... 0 0 0 0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 0.0 0.0 \n", "6 0.0 0.0 \n", "7 0.0 0.0 \n", "8 0.0 0.0 \n", "9 NaN NaN \n", "\n", " AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 0.0 0.0 \n", "6 0.0 1.0 \n", "7 0.0 0.0 \n", "8 0.0 0.0 \n", "9 NaN NaN \n", "\n", " AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0.0 1.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 1.0 1.0 \n", "6 1.0 2.0 \n", "7 0.0 0.0 \n", "8 0.0 1.0 \n", "9 NaN NaN \n", "\n", "[10 rows x 122 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITY...FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5...00000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.5...00000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0...00000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5...0000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5...00000.00.00.00.00.00.0
51000080Cash loansMNY099000.0490495.527517.5...00000.00.00.00.01.01.0
61000090Cash loansFYY1171000.01560726.041301.0...00000.00.00.01.01.02.0
71000100Cash loansMYY0360000.01530000.042075.0...00000.00.00.00.00.00.0
81000110Cash loansFNY0112500.01019610.033826.5...00000.00.00.00.00.01.0
91000120Revolving loansMNY0135000.0405000.020250.0...0000NaNNaNNaNNaNNaNNaN
\n", "

10 rows × 122 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_train" } }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "df_train.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kXVQf-_2qTSZ", "outputId": "a264688b-33dd-498c-fccb-4d4d35027f75" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(307511, 122)" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "source": [ "df_train.info(verbose=True, show_counts=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sRhaYIotqvss", "outputId": "8fa5e9cd-5f21-4449-9743-3fa422109ce7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "RangeIndex: 307511 entries, 0 to 307510\n", "Data columns (total 122 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 SK_ID_CURR 307511 non-null int64 \n", " 1 TARGET 307511 non-null int64 \n", " 2 NAME_CONTRACT_TYPE 307511 non-null object \n", " 3 CODE_GENDER 307511 non-null object \n", " 4 FLAG_OWN_CAR 307511 non-null object \n", " 5 FLAG_OWN_REALTY 307511 non-null object \n", " 6 CNT_CHILDREN 307511 non-null int64 \n", " 7 AMT_INCOME_TOTAL 307511 non-null float64\n", " 8 AMT_CREDIT 307511 non-null float64\n", " 9 AMT_ANNUITY 307499 non-null float64\n", " 10 AMT_GOODS_PRICE 307233 non-null float64\n", " 11 NAME_TYPE_SUITE 306219 non-null object \n", " 12 NAME_INCOME_TYPE 307511 non-null object \n", " 13 NAME_EDUCATION_TYPE 307511 non-null object \n", " 14 NAME_FAMILY_STATUS 307511 non-null object \n", " 15 NAME_HOUSING_TYPE 307511 non-null object \n", " 16 REGION_POPULATION_RELATIVE 307511 non-null float64\n", " 17 DAYS_BIRTH 307511 non-null int64 \n", " 18 DAYS_EMPLOYED 307511 non-null int64 \n", " 19 DAYS_REGISTRATION 307511 non-null float64\n", " 20 DAYS_ID_PUBLISH 307511 non-null int64 \n", " 21 OWN_CAR_AGE 104582 non-null float64\n", " 22 FLAG_MOBIL 307511 non-null int64 \n", " 23 FLAG_EMP_PHONE 307511 non-null int64 \n", " 24 FLAG_WORK_PHONE 307511 non-null int64 \n", " 25 FLAG_CONT_MOBILE 307511 non-null int64 \n", " 26 FLAG_PHONE 307511 non-null int64 \n", " 27 FLAG_EMAIL 307511 non-null int64 \n", " 28 OCCUPATION_TYPE 211120 non-null object \n", " 29 CNT_FAM_MEMBERS 307509 non-null float64\n", " 30 REGION_RATING_CLIENT 307511 non-null int64 \n", " 31 REGION_RATING_CLIENT_W_CITY 307511 non-null int64 \n", " 32 WEEKDAY_APPR_PROCESS_START 307511 non-null object \n", " 33 HOUR_APPR_PROCESS_START 307511 non-null int64 \n", " 34 REG_REGION_NOT_LIVE_REGION 307511 non-null int64 \n", " 35 REG_REGION_NOT_WORK_REGION 307511 non-null int64 \n", " 36 LIVE_REGION_NOT_WORK_REGION 307511 non-null int64 \n", " 37 REG_CITY_NOT_LIVE_CITY 307511 non-null int64 \n", " 38 REG_CITY_NOT_WORK_CITY 307511 non-null int64 \n", " 39 LIVE_CITY_NOT_WORK_CITY 307511 non-null int64 \n", " 40 ORGANIZATION_TYPE 307511 non-null object \n", " 41 EXT_SOURCE_1 134133 non-null float64\n", " 42 EXT_SOURCE_2 306851 non-null float64\n", " 43 EXT_SOURCE_3 246546 non-null float64\n", " 44 APARTMENTS_AVG 151450 non-null float64\n", " 45 BASEMENTAREA_AVG 127568 non-null float64\n", " 46 YEARS_BEGINEXPLUATATION_AVG 157504 non-null float64\n", " 47 YEARS_BUILD_AVG 103023 non-null float64\n", " 48 COMMONAREA_AVG 92646 non-null float64\n", " 49 ELEVATORS_AVG 143620 non-null float64\n", " 50 ENTRANCES_AVG 152683 non-null float64\n", " 51 FLOORSMAX_AVG 154491 non-null float64\n", " 52 FLOORSMIN_AVG 98869 non-null float64\n", " 53 LANDAREA_AVG 124921 non-null float64\n", " 54 LIVINGAPARTMENTS_AVG 97312 non-null float64\n", " 55 LIVINGAREA_AVG 153161 non-null float64\n", " 56 NONLIVINGAPARTMENTS_AVG 93997 non-null float64\n", " 57 NONLIVINGAREA_AVG 137829 non-null float64\n", " 58 APARTMENTS_MODE 151450 non-null float64\n", " 59 BASEMENTAREA_MODE 127568 non-null float64\n", " 60 YEARS_BEGINEXPLUATATION_MODE 157504 non-null float64\n", " 61 YEARS_BUILD_MODE 103023 non-null float64\n", " 62 COMMONAREA_MODE 92646 non-null float64\n", " 63 ELEVATORS_MODE 143620 non-null float64\n", " 64 ENTRANCES_MODE 152683 non-null float64\n", " 65 FLOORSMAX_MODE 154491 non-null float64\n", " 66 FLOORSMIN_MODE 98869 non-null float64\n", " 67 LANDAREA_MODE 124921 non-null float64\n", " 68 LIVINGAPARTMENTS_MODE 97312 non-null float64\n", " 69 LIVINGAREA_MODE 153161 non-null float64\n", " 70 NONLIVINGAPARTMENTS_MODE 93997 non-null float64\n", " 71 NONLIVINGAREA_MODE 137829 non-null float64\n", " 72 APARTMENTS_MEDI 151450 non-null float64\n", " 73 BASEMENTAREA_MEDI 127568 non-null float64\n", " 74 YEARS_BEGINEXPLUATATION_MEDI 157504 non-null float64\n", " 75 YEARS_BUILD_MEDI 103023 non-null float64\n", " 76 COMMONAREA_MEDI 92646 non-null float64\n", " 77 ELEVATORS_MEDI 143620 non-null float64\n", " 78 ENTRANCES_MEDI 152683 non-null float64\n", " 79 FLOORSMAX_MEDI 154491 non-null float64\n", " 80 FLOORSMIN_MEDI 98869 non-null float64\n", " 81 LANDAREA_MEDI 124921 non-null float64\n", " 82 LIVINGAPARTMENTS_MEDI 97312 non-null float64\n", " 83 LIVINGAREA_MEDI 153161 non-null float64\n", " 84 NONLIVINGAPARTMENTS_MEDI 93997 non-null float64\n", " 85 NONLIVINGAREA_MEDI 137829 non-null float64\n", " 86 FONDKAPREMONT_MODE 97216 non-null object \n", " 87 HOUSETYPE_MODE 153214 non-null object \n", " 88 TOTALAREA_MODE 159080 non-null float64\n", " 89 WALLSMATERIAL_MODE 151170 non-null object \n", " 90 EMERGENCYSTATE_MODE 161756 non-null object \n", " 91 OBS_30_CNT_SOCIAL_CIRCLE 306490 non-null float64\n", " 92 DEF_30_CNT_SOCIAL_CIRCLE 306490 non-null float64\n", " 93 OBS_60_CNT_SOCIAL_CIRCLE 306490 non-null float64\n", " 94 DEF_60_CNT_SOCIAL_CIRCLE 306490 non-null float64\n", " 95 DAYS_LAST_PHONE_CHANGE 307510 non-null float64\n", " 96 FLAG_DOCUMENT_2 307511 non-null int64 \n", " 97 FLAG_DOCUMENT_3 307511 non-null int64 \n", " 98 FLAG_DOCUMENT_4 307511 non-null int64 \n", " 99 FLAG_DOCUMENT_5 307511 non-null int64 \n", " 100 FLAG_DOCUMENT_6 307511 non-null int64 \n", " 101 FLAG_DOCUMENT_7 307511 non-null int64 \n", " 102 FLAG_DOCUMENT_8 307511 non-null int64 \n", " 103 FLAG_DOCUMENT_9 307511 non-null int64 \n", " 104 FLAG_DOCUMENT_10 307511 non-null int64 \n", " 105 FLAG_DOCUMENT_11 307511 non-null int64 \n", " 106 FLAG_DOCUMENT_12 307511 non-null int64 \n", " 107 FLAG_DOCUMENT_13 307511 non-null int64 \n", " 108 FLAG_DOCUMENT_14 307511 non-null int64 \n", " 109 FLAG_DOCUMENT_15 307511 non-null int64 \n", " 110 FLAG_DOCUMENT_16 307511 non-null int64 \n", " 111 FLAG_DOCUMENT_17 307511 non-null int64 \n", " 112 FLAG_DOCUMENT_18 307511 non-null int64 \n", " 113 FLAG_DOCUMENT_19 307511 non-null int64 \n", " 114 FLAG_DOCUMENT_20 307511 non-null int64 \n", " 115 FLAG_DOCUMENT_21 307511 non-null int64 \n", " 116 AMT_REQ_CREDIT_BUREAU_HOUR 265992 non-null float64\n", " 117 AMT_REQ_CREDIT_BUREAU_DAY 265992 non-null float64\n", " 118 AMT_REQ_CREDIT_BUREAU_WEEK 265992 non-null float64\n", " 119 AMT_REQ_CREDIT_BUREAU_MON 265992 non-null float64\n", " 120 AMT_REQ_CREDIT_BUREAU_QRT 265992 non-null float64\n", " 121 AMT_REQ_CREDIT_BUREAU_YEAR 265992 non-null float64\n", "dtypes: float64(65), int64(41), object(16)\n", "memory usage: 286.2+ MB\n" ] } ] }, { "cell_type": "code", "source": [ "df_train.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 350 }, "id": "f7ZmA8j0qsDM", "outputId": "6b561c5e-b8ba-463d-a75c-9024fe93e814" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SK_ID_CURR TARGET CNT_CHILDREN AMT_INCOME_TOTAL \\\n", "count 307511.000000 307511.000000 307511.000000 3.075110e+05 \n", "mean 278180.518577 0.080729 0.417052 1.687979e+05 \n", "std 102790.175348 0.272419 0.722121 2.371231e+05 \n", "min 100002.000000 0.000000 0.000000 2.565000e+04 \n", "25% 189145.500000 0.000000 0.000000 1.125000e+05 \n", "50% 278202.000000 0.000000 0.000000 1.471500e+05 \n", "75% 367142.500000 0.000000 1.000000 2.025000e+05 \n", "max 456255.000000 1.000000 19.000000 1.170000e+08 \n", "\n", " AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE \\\n", "count 3.075110e+05 307499.000000 3.072330e+05 \n", "mean 5.990260e+05 27108.573909 5.383962e+05 \n", "std 4.024908e+05 14493.737315 3.694465e+05 \n", "min 4.500000e+04 1615.500000 4.050000e+04 \n", "25% 2.700000e+05 16524.000000 2.385000e+05 \n", "50% 5.135310e+05 24903.000000 4.500000e+05 \n", "75% 8.086500e+05 34596.000000 6.795000e+05 \n", "max 4.050000e+06 258025.500000 4.050000e+06 \n", "\n", " REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED ... \\\n", "count 307511.000000 307511.000000 307511.000000 ... \n", "mean 0.020868 -16036.995067 63815.045904 ... \n", "std 0.013831 4363.988632 141275.766519 ... \n", "min 0.000290 -25229.000000 -17912.000000 ... \n", "25% 0.010006 -19682.000000 -2760.000000 ... \n", "50% 0.018850 -15750.000000 -1213.000000 ... \n", "75% 0.028663 -12413.000000 -289.000000 ... \n", "max 0.072508 -7489.000000 365243.000000 ... \n", "\n", " FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 \\\n", "count 307511.000000 307511.000000 307511.000000 307511.000000 \n", "mean 0.008130 0.000595 0.000507 0.000335 \n", "std 0.089798 0.024387 0.022518 0.018299 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 0.000000 \n", "75% 0.000000 0.000000 0.000000 0.000000 \n", "max 1.000000 1.000000 1.000000 1.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY \\\n", "count 265992.000000 265992.000000 \n", "mean 0.006402 0.007000 \n", "std 0.083849 0.110757 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 4.000000 9.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON \\\n", "count 265992.000000 265992.000000 \n", "mean 0.034362 0.267395 \n", "std 0.204685 0.916002 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 8.000000 27.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "count 265992.000000 265992.000000 \n", "mean 0.265474 1.899974 \n", "std 0.794056 1.869295 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 1.000000 \n", "75% 0.000000 3.000000 \n", "max 261.000000 25.000000 \n", "\n", "[8 rows x 106 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYED...FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
count307511.000000307511.000000307511.0000003.075110e+053.075110e+05307499.0000003.072330e+05307511.000000307511.000000307511.000000...307511.000000307511.000000307511.000000307511.000000265992.000000265992.000000265992.000000265992.000000265992.000000265992.000000
mean278180.5185770.0807290.4170521.687979e+055.990260e+0527108.5739095.383962e+050.020868-16036.99506763815.045904...0.0081300.0005950.0005070.0003350.0064020.0070000.0343620.2673950.2654741.899974
std102790.1753480.2724190.7221212.371231e+054.024908e+0514493.7373153.694465e+050.0138314363.988632141275.766519...0.0897980.0243870.0225180.0182990.0838490.1107570.2046850.9160020.7940561.869295
min100002.0000000.0000000.0000002.565000e+044.500000e+041615.5000004.050000e+040.000290-25229.000000-17912.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%189145.5000000.0000000.0000001.125000e+052.700000e+0516524.0000002.385000e+050.010006-19682.000000-2760.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%278202.0000000.0000000.0000001.471500e+055.135310e+0524903.0000004.500000e+050.018850-15750.000000-1213.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
75%367142.5000000.0000001.0000002.025000e+058.086500e+0534596.0000006.795000e+050.028663-12413.000000-289.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000003.000000
max456255.0000001.00000019.0000001.170000e+084.050000e+06258025.5000004.050000e+060.072508-7489.000000365243.000000...1.0000001.0000001.0000001.0000004.0000009.0000008.00000027.000000261.00000025.000000
\n", "

8 rows × 106 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe" } }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "markdown", "source": [ "# Limpieza Duplicados" ], "metadata": { "id": "MmvS5Ogirdap" } }, { "cell_type": "markdown", "source": [ "Primero voy ha hacer un conteo de todos los duplicados que hay en el dataset" ], "metadata": { "id": "gghY_Wd5riZN" } }, { "cell_type": "code", "source": [ "df_train_duplicados = df_train.duplicated().sum()\n", "\n", "print(f\"El dataset tiene: {df_train_duplicados} duplicados\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LyEF5xBnrJth", "outputId": "0a132c2d-dd31-4d22-d062-75e0f7ff6e54" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "El dataset tiene: 0 duplicados\n" ] } ] }, { "cell_type": "markdown", "source": [ "Segundo voy a mirar cuantos filas no duplicadas tiene" ], "metadata": { "id": "GdHS_8-br8CL" } }, { "cell_type": "code", "source": [ "df_sin_duplicados = df_train.drop_duplicates()\n", "print(f\"Las filas no duplicadas son: {df_sin_duplicados.shape[0]}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0ODp3NWPrxwo", "outputId": "c8e1b1b8-afb0-4759-d85b-c00bcd5eb607" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Las filas no duplicadas son: 307511\n" ] } ] }, { "cell_type": "markdown", "source": [ "Con esto vemos que no tiene duplicados" ], "metadata": { "id": "JUgz9f4QsOQ1" } }, { "cell_type": "markdown", "source": [ "# Tratamiento Nulos" ], "metadata": { "id": "DZAe0LTnsS_P" } }, { "cell_type": "code", "source": [ "def missing_data(data):\n", " total = data.isnull().sum().sort_values(ascending=False)\n", " percent = (data.isnull().sum()/data.isnull().count()*100).sort_values(ascending=False)\n", "\n", " return pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])" ], "metadata": { "id": "2-F9nwzSfuxZ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "missing_data(df_train).head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "hjonoOrGgqQL", "outputId": "6caad331-6f14-4d21-b13f-52f532eb6fee" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Total Percent\n", "COMMONAREA_AVG 214865 69.872297\n", "COMMONAREA_MODE 214865 69.872297\n", "COMMONAREA_MEDI 214865 69.872297\n", "NONLIVINGAPARTMENTS_MEDI 213514 69.432963\n", "NONLIVINGAPARTMENTS_MODE 213514 69.432963\n", "NONLIVINGAPARTMENTS_AVG 213514 69.432963\n", "FONDKAPREMONT_MODE 210295 68.386172\n", "LIVINGAPARTMENTS_AVG 210199 68.354953\n", "LIVINGAPARTMENTS_MEDI 210199 68.354953\n", "LIVINGAPARTMENTS_MODE 210199 68.354953" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalPercent
COMMONAREA_AVG21486569.872297
COMMONAREA_MODE21486569.872297
COMMONAREA_MEDI21486569.872297
NONLIVINGAPARTMENTS_MEDI21351469.432963
NONLIVINGAPARTMENTS_MODE21351469.432963
NONLIVINGAPARTMENTS_AVG21351469.432963
FONDKAPREMONT_MODE21029568.386172
LIVINGAPARTMENTS_AVG21019968.354953
LIVINGAPARTMENTS_MEDI21019968.354953
LIVINGAPARTMENTS_MODE21019968.354953
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"missing_data(df_train)\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2121,\n \"min\": 210199,\n \"max\": 214865,\n \"num_unique_values\": 4,\n \"samples\": [\n 213514,\n 210199,\n 214865\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Percent\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6898714243691965,\n \"min\": 68.35495315614726,\n \"max\": 69.87229725115525,\n \"num_unique_values\": 4,\n \"samples\": [\n 69.43296337366793,\n 68.35495315614726,\n 69.87229725115525\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "df_train.head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 412 }, "id": "z5hbhq9EsKMk", "outputId": "6ddca3b2-b139-4705-f76d-4a63deab60c2" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "0 100002 1 Cash loans M N \n", "1 100003 0 Cash loans F N \n", "2 100004 0 Revolving loans M Y \n", "3 100006 0 Cash loans F N \n", "4 100007 0 Cash loans M N \n", "5 100008 0 Cash loans M N \n", "6 100009 0 Cash loans F Y \n", "7 100010 0 Cash loans M Y \n", "8 100011 0 Cash loans F N \n", "9 100012 0 Revolving loans M N \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY \\\n", "0 Y 0 202500.0 406597.5 24700.5 \n", "1 N 0 270000.0 1293502.5 35698.5 \n", "2 Y 0 67500.0 135000.0 6750.0 \n", "3 Y 0 135000.0 312682.5 29686.5 \n", "4 Y 0 121500.0 513000.0 21865.5 \n", "5 Y 0 99000.0 490495.5 27517.5 \n", "6 Y 1 171000.0 1560726.0 41301.0 \n", "7 Y 0 360000.0 1530000.0 42075.0 \n", "8 Y 0 112500.0 1019610.0 33826.5 \n", "9 Y 0 135000.0 405000.0 20250.0 \n", "\n", " ... FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 \\\n", "0 ... 0 0 0 0 \n", "1 ... 0 0 0 0 \n", "2 ... 0 0 0 0 \n", "3 ... 0 0 0 0 \n", "4 ... 0 0 0 0 \n", "5 ... 0 0 0 0 \n", "6 ... 0 0 0 0 \n", "7 ... 0 0 0 0 \n", "8 ... 0 0 0 0 \n", "9 ... 0 0 0 0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 0.0 0.0 \n", "6 0.0 0.0 \n", "7 0.0 0.0 \n", "8 0.0 0.0 \n", "9 NaN NaN \n", "\n", " AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 0.0 0.0 \n", "6 0.0 1.0 \n", "7 0.0 0.0 \n", "8 0.0 0.0 \n", "9 NaN NaN \n", "\n", " AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0.0 1.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "5 1.0 1.0 \n", "6 1.0 2.0 \n", "7 0.0 0.0 \n", "8 0.0 1.0 \n", "9 NaN NaN \n", "\n", "[10 rows x 122 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITY...FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5...00000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.5...00000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0...00000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5...0000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5...00000.00.00.00.00.00.0
51000080Cash loansMNY099000.0490495.527517.5...00000.00.00.00.01.01.0
61000090Cash loansFYY1171000.01560726.041301.0...00000.00.00.01.01.02.0
71000100Cash loansMYY0360000.01530000.042075.0...00000.00.00.00.00.00.0
81000110Cash loansFNY0112500.01019610.033826.5...00000.00.00.00.00.01.0
91000120Revolving loansMNY0135000.0405000.020250.0...0000NaNNaNNaNNaNNaNNaN
\n", "

10 rows × 122 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_train" } }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "code", "source": [ "df_train.set_index('SK_ID_CURR', inplace=True)" ], "metadata": { "id": "EzMCtzF7subW" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Voy a calcular el porcentaje de Nulos por Columna" ], "metadata": { "id": "j14lz7PcSD9H" } }, { "cell_type": "code", "source": [ "total_nulos = df_train.isna().sum().sort_values(ascending=False)\n", "porcentaje = (df_train.isnull().sum() / df_train.isnull().count()).sort_values(ascending=False) * 100\n", "missing_data = pd.concat([total_nulos, porcentaje], axis=1, keys=['Total', 'Porcentaje'])\n", "missing_data.head(50)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "lituQ1OBSInO", "outputId": "13d9d6ff-504e-436c-d6d5-23f0f732137d" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Total Porcentaje\n", "COMMONAREA_AVG 214865 69.872297\n", "COMMONAREA_MEDI 214865 69.872297\n", "COMMONAREA_MODE 214865 69.872297\n", "NONLIVINGAPARTMENTS_AVG 213514 69.432963\n", "NONLIVINGAPARTMENTS_MEDI 213514 69.432963\n", "NONLIVINGAPARTMENTS_MODE 213514 69.432963\n", "FONDKAPREMONT_MODE 210295 68.386172\n", "LIVINGAPARTMENTS_MEDI 210199 68.354953\n", "LIVINGAPARTMENTS_AVG 210199 68.354953\n", "LIVINGAPARTMENTS_MODE 210199 68.354953\n", "FLOORSMIN_MODE 208642 67.848630\n", "FLOORSMIN_AVG 208642 67.848630\n", "FLOORSMIN_MEDI 208642 67.848630\n", "YEARS_BUILD_MODE 204488 66.497784\n", "YEARS_BUILD_MEDI 204488 66.497784\n", "YEARS_BUILD_AVG 204488 66.497784\n", "OWN_CAR_AGE 202929 65.990810\n", "LANDAREA_MEDI 182590 59.376738\n", "LANDAREA_AVG 182590 59.376738\n", "LANDAREA_MODE 182590 59.376738\n", "BASEMENTAREA_MODE 179943 58.515956\n", "BASEMENTAREA_AVG 179943 58.515956\n", "BASEMENTAREA_MEDI 179943 58.515956\n", "EXT_SOURCE_1 173378 56.381073\n", "NONLIVINGAREA_AVG 169682 55.179164\n", "NONLIVINGAREA_MODE 169682 55.179164\n", "NONLIVINGAREA_MEDI 169682 55.179164\n", "ELEVATORS_MEDI 163891 53.295980\n", "ELEVATORS_AVG 163891 53.295980\n", "ELEVATORS_MODE 163891 53.295980\n", "WALLSMATERIAL_MODE 156341 50.840783\n", "APARTMENTS_AVG 156061 50.749729\n", "APARTMENTS_MODE 156061 50.749729\n", "APARTMENTS_MEDI 156061 50.749729\n", "ENTRANCES_MEDI 154828 50.348768\n", "ENTRANCES_MODE 154828 50.348768\n", "ENTRANCES_AVG 154828 50.348768\n", "LIVINGAREA_AVG 154350 50.193326\n", "LIVINGAREA_MODE 154350 50.193326\n", "LIVINGAREA_MEDI 154350 50.193326\n", "HOUSETYPE_MODE 154297 50.176091\n", "FLOORSMAX_MODE 153020 49.760822\n", "FLOORSMAX_AVG 153020 49.760822\n", "FLOORSMAX_MEDI 153020 49.760822\n", "YEARS_BEGINEXPLUATATION_MODE 150007 48.781019\n", "YEARS_BEGINEXPLUATATION_AVG 150007 48.781019\n", "YEARS_BEGINEXPLUATATION_MEDI 150007 48.781019\n", "TOTALAREA_MODE 148431 48.268517\n", "EMERGENCYSTATE_MODE 145755 47.398304\n", "OCCUPATION_TYPE 96391 31.345545" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalPorcentaje
COMMONAREA_AVG21486569.872297
COMMONAREA_MEDI21486569.872297
COMMONAREA_MODE21486569.872297
NONLIVINGAPARTMENTS_AVG21351469.432963
NONLIVINGAPARTMENTS_MEDI21351469.432963
NONLIVINGAPARTMENTS_MODE21351469.432963
FONDKAPREMONT_MODE21029568.386172
LIVINGAPARTMENTS_MEDI21019968.354953
LIVINGAPARTMENTS_AVG21019968.354953
LIVINGAPARTMENTS_MODE21019968.354953
FLOORSMIN_MODE20864267.848630
FLOORSMIN_AVG20864267.848630
FLOORSMIN_MEDI20864267.848630
YEARS_BUILD_MODE20448866.497784
YEARS_BUILD_MEDI20448866.497784
YEARS_BUILD_AVG20448866.497784
OWN_CAR_AGE20292965.990810
LANDAREA_MEDI18259059.376738
LANDAREA_AVG18259059.376738
LANDAREA_MODE18259059.376738
BASEMENTAREA_MODE17994358.515956
BASEMENTAREA_AVG17994358.515956
BASEMENTAREA_MEDI17994358.515956
EXT_SOURCE_117337856.381073
NONLIVINGAREA_AVG16968255.179164
NONLIVINGAREA_MODE16968255.179164
NONLIVINGAREA_MEDI16968255.179164
ELEVATORS_MEDI16389153.295980
ELEVATORS_AVG16389153.295980
ELEVATORS_MODE16389153.295980
WALLSMATERIAL_MODE15634150.840783
APARTMENTS_AVG15606150.749729
APARTMENTS_MODE15606150.749729
APARTMENTS_MEDI15606150.749729
ENTRANCES_MEDI15482850.348768
ENTRANCES_MODE15482850.348768
ENTRANCES_AVG15482850.348768
LIVINGAREA_AVG15435050.193326
LIVINGAREA_MODE15435050.193326
LIVINGAREA_MEDI15435050.193326
HOUSETYPE_MODE15429750.176091
FLOORSMAX_MODE15302049.760822
FLOORSMAX_AVG15302049.760822
FLOORSMAX_MEDI15302049.760822
YEARS_BEGINEXPLUATATION_MODE15000748.781019
YEARS_BEGINEXPLUATATION_AVG15000748.781019
YEARS_BEGINEXPLUATATION_MEDI15000748.781019
TOTALAREA_MODE14843148.268517
EMERGENCYSTATE_MODE14575547.398304
OCCUPATION_TYPE9639131.345545
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "missing_data", "summary": "{\n \"name\": \"missing_data\",\n \"rows\": 121,\n \"fields\": [\n {\n \"column\": \"Total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 87570,\n \"min\": 0,\n \"max\": 214865,\n \"num_unique_values\": 32,\n \"samples\": [\n 2,\n 154350,\n 1292\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Porcentaje\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28.47712108258451,\n \"min\": 0.0,\n \"max\": 69.87229725115525,\n \"num_unique_values\": 32,\n \"samples\": [\n 0.0006503832383231819,\n 50.193326417591564,\n 0.42014757195677555\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "markdown", "source": [ "### Desbalanceo en el Target" ], "metadata": { "id": "BEjvoPJviMjS" } }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "temp = df_train['TARGET'].value_counts()\n", "df = pd.DataFrame({'labels': temp.index,\n", " 'values': temp.values\n", " })\n", "plt.figure(figsize=(6, 6))\n", "plt.title('Train Dataset')\n", "sns.set_color_codes(\"pastel\")\n", "sns.barplot(x = 'labels', y='values', data = df)\n", "locs, labels = plt.xticks()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 564 }, "id": "7ECWxbX2iO7u", "outputId": "c50078c3-b25e-42aa-b365-a9cfbb6defb8" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAIjCAYAAAAQrVEdAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANRVJREFUeJzt3X98znX////7sbEffhybn5ud5kcoRJxG2yoiy6HkTJbo50hU5xArpDT0S3FWEvGuLmdL5f2WisLZWPPrU5Y0yY+apIlODubHdjBssx3fP3z3ujjasIOtQ0+36+VyXE7H6/Xc63gcx3mR2+U4XsdrNrfb7RYAAIAB/Hw9AAAAQGUhbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAXJIGDx6sZs2a+XoMAH8xhA0Ar9hstgrdVq9e7etRPaxevdpjvsDAQIWFhal79+568cUXlZOTc8HH/vHHHzV58mTt2rWr8ga+CPPnz9eMGTN8PQbgEzZ+VxQAb3zwwQce9+fNm6e0tDS9//77HttvvvlmhYWFXfDjFBUVqaSkRIGBgRd8jDOtXr1aPXr00KhRo9SlSxcVFxcrJydH69at05IlSxQSEqKPPvpIN910k9fH/vjjjzVgwACtWrVK3bt3r5R5L8Ztt92mrVu3XjKhBfyZqvl6AAB/Lffdd5/H/W+++UZpaWlltv/R8ePHVaNGjQo/TvXq1S9ovvPp2rWr7rzzTo9tP/zwg3r16qX4+Hj9+OOPatSoUZU8NoCqx0dRACpd9+7d1a5dO2VmZqpbt26qUaOGnnrqKUnSZ599pj59+igiIkKBgYFq0aKFnnvuORUXF3sc44/n2OzatUs2m03/+te/9NZbb6lFixYKDAxUly5dtGHDhouat0OHDpoxY4Zyc3M1a9Ysa/tvv/2mf/7zn7rqqqsUHBysevXqacCAAR7vhKSkpGjAgAGSpB49epT5KK6iz3fHjh2Kj49XeHi4goKC1LhxYw0aNEh5eXke6z744ANFRUUpODhYdevW1aBBg7Rnzx5rf/fu3bVs2TL99ttv1iycq4TLCe/YAKgShw4d0i233KJBgwbpvvvusz6WSklJUa1atZSUlKRatWpp5cqVSk5Olsvl0vTp08973Pnz5+vo0aN6+OGHZbPZNG3aNPXv31+//vrrRb3Lc+edd2ro0KFasWKFXnjhBUnShg0btG7dOg0aNEiNGzfWrl27NGfOHHXv3l0//vijatSooW7dumnUqFGaOXOmnnrqKbVp00aSrP+tyPMtLCyUw+FQQUGBRo4cqfDwcP33v//V0qVLlZubq5CQEEnSCy+8oGeeeUZ33XWXHnroIeXk5OiNN95Qt27d9P333ys0NFRPP/208vLy9Pvvv+u1116TJNWqVeuCXxfgL8cNABchMTHR/cf/lNx4441uSe65c+eWWX/8+PEy2x5++GF3jRo13CdPnrS2JSQkuJs2bWrdz87Odkty16tXz3348GFr+2effeaW5F6yZMk551y1apVbknvhwoVnXdOhQwd3nTp1zjlrRkaGW5J73rx51raFCxe6JblXrVpVZn1Fnu/3339/3tl27drl9vf3d7/wwgse27ds2eKuVq2ax/Y+ffp4vHbA5YSPogBUicDAQA0ZMqTM9uDgYOvPR48e1cGDB9W1a1cdP35cWVlZ5z3uwIEDVadOHet+165dJUm//vrrRc9cq1YtHT16tNxZi4qKdOjQIbVs2VKhoaHauHFjhY5Zkedb+o7M8uXLdfz48XKP8+mnn6qkpER33XWXDh48aN3Cw8PVqlUrrVq1yuvnC5iIj6IAVIm//e1vCggIKLN927ZtmjhxolauXCmXy+Wx74/nk5SnSZMmHvdLI+fIkSMXMe1px44dU+3ata37J06c0NSpU/Xuu+/qv//9r9xnfIm0IrNKFXu+zZs3V1JSkl599VV9+OGH6tq1q/7xj3/ovvvus6Jnx44dcrvdatWqVbmPU1UnWwN/NYQNgCpx5jsVpXJzc3XjjTfKbrfr2WefVYsWLRQUFKSNGzdq/PjxKikpOe9x/f39y93uvsgrVxQVFennn39Wu3btrG0jR47Uu+++q9GjRys2NlYhISGy2WwaNGhQhWb15vm+8sorGjx4sD777DOtWLFCo0aN0tSpU/XNN9+ocePGKikpkc1m0xdffFHua8B5NMBphA2AP83q1at16NAhffrpp+rWrZu1PTs724dTnfbxxx/rxIkTcjgcHtsSEhL0yiuvWNtOnjyp3Nxcj5+12WzlHtPb59u+fXu1b99eEydO1Lp163T99ddr7ty5ev7559WiRQu53W41b95cV1555Tmfy9nmAS4HnGMD4E9T+k7Dme+uFBYW6s033/TVSJJOX8dm9OjRqlOnjhITE63t/v7+Zd4JeuONN8p8VbtmzZqSVCZ4Kvp8XS6XTp065bGtffv28vPzU0FBgSSpf//+8vf315QpU8rM5Ha7dejQIY95KvpRGWAa3rEB8Ke57rrrVKdOHSUkJGjUqFGy2Wx6//33L/pjJG/8v//3/3Ty5EkVFxfr0KFD+vrrr/X5558rJCREixYtUnh4uLX2tttu0/vvv6+QkBC1bdtWGRkZ+vLLL1WvXj2PY3bs2FH+/v56+eWXlZeXp8DAQN10000Vfr4rV67UiBEjNGDAAF155ZU6deqU3n//ffn7+ys+Pl6S1KJFCz3//POaMGGCdu3apX79+ql27drKzs7WokWLNHz4cD3xxBOSpKioKC1YsEBJSUnq0qWLatWqpb59+1bxKwtcInz0bSwAhjjb172vvvrqctd//fXX7piYGHdwcLA7IiLCPW7cOPfy5cvLfF36bF/3nj59epljSnJPmjTpnHOWft279Fa9enV3gwYN3N26dXO/8MIL7gMHDpT5mSNHjriHDBnirl+/vrtWrVpuh8PhzsrKcjdt2tSdkJDgsfbtt992X3HFFW5/f3+P51KR5/vrr7+6H3zwQXeLFi3cQUFB7rp167p79Ojh/vLLL8vM9Mknn7hvuOEGd82aNd01a9Z0t27d2p2YmOjevn27tebYsWPue+65xx0aGuqWxFe/cVnhd0UBAABjcI4NAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIzBBfr+RCUlJdq7d69q167NJc8BAPCC2+3W0aNHFRERIT+/s78vQ9j8ifbu3avIyEhfjwEAwF/Wnj171Lhx47PuJ2z+RLVr15Z0+v8Uu93u42kAAPjrcLlcioyMtP4tPRvC5k9U+vGT3W4nbAAAuADnO5WDk4cBAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGKOarwdA5YkaO8/XIwBVLnP6A74eAcAljHdsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAyfhs3UqVPVpUsX1a5dWw0bNlS/fv20fft2jzXdu3eXzWbzuD3yyCMea3bv3q0+ffqoRo0aatiwocaOHatTp055rFm9erU6deqkwMBAtWzZUikpKWXmmT17tpo1a6agoCBFR0fr22+/9dh/8uRJJSYmql69eqpVq5bi4+O1f//+ynkxAADARfNp2KxZs0aJiYn65ptvlJaWpqKiIvXq1Uv5+fke64YNG6Z9+/ZZt2nTpln7iouL1adPHxUWFmrdunV67733lJKSouTkZGtNdna2+vTpox49emjTpk0aPXq0HnroIS1fvtxas2DBAiUlJWnSpEnauHGjOnToIIfDoQMHDlhrxowZoyVLlmjhwoVas2aN9u7dq/79+1fhKwQAALxhc7vdbl8PUSonJ0cNGzbUmjVr1K1bN0mn37Hp2LGjZsyYUe7PfPHFF7rtttu0d+9ehYWFSZLmzp2r8ePHKycnRwEBARo/fryWLVumrVu3Wj83aNAg5ebmKjU1VZIUHR2tLl26aNasWZKkkpISRUZGauTIkXryySeVl5enBg0aaP78+brzzjslSVlZWWrTpo0yMjIUExNz3ufncrkUEhKivLw82e32C36dziZq7LxKPyZwqcmc/oCvRwDgAxX9N/SSOscmLy9PklS3bl2P7R9++KHq16+vdu3aacKECTp+/Li1LyMjQ+3bt7eiRpIcDodcLpe2bdtmrYmLi/M4psPhUEZGhiSpsLBQmZmZHmv8/PwUFxdnrcnMzFRRUZHHmtatW6tJkybWmj8qKCiQy+XyuAEAgKpTzdcDlCopKdHo0aN1/fXXq127dtb2e+65R02bNlVERIQ2b96s8ePHa/v27fr0008lSU6n0yNqJFn3nU7nOde4XC6dOHFCR44cUXFxcblrsrKyrGMEBAQoNDS0zJrSx/mjqVOnasqUKV6+EgAA4EJdMmGTmJiorVu36quvvvLYPnz4cOvP7du3V6NGjdSzZ0/t3LlTLVq0+LPH9MqECROUlJRk3Xe5XIqMjPThRAAAmO2S+ChqxIgRWrp0qVatWqXGjRufc210dLQk6ZdffpEkhYeHl/lmUun98PDwc66x2+0KDg5W/fr15e/vX+6aM49RWFio3Nzcs675o8DAQNntdo8bAACoOj4NG7fbrREjRmjRokVauXKlmjdvft6f2bRpkySpUaNGkqTY2Fht2bLF49tLaWlpstvtatu2rbUmPT3d4zhpaWmKjY2VJAUEBCgqKspjTUlJidLT0601UVFRql69usea7du3a/fu3dYaAADgWz79KCoxMVHz58/XZ599ptq1a1vnqoSEhCg4OFg7d+7U/Pnzdeutt6pevXravHmzxowZo27duumaa66RJPXq1Utt27bV/fffr2nTpsnpdGrixIlKTExUYGCgJOmRRx7RrFmzNG7cOD344INauXKlPvroIy1btsyaJSkpSQkJCercubOuvfZazZgxQ/n5+RoyZIg109ChQ5WUlKS6devKbrdr5MiRio2NrdA3ogAAQNXzadjMmTNH0umvdJ/p3Xff1eDBgxUQEKAvv/zSiozIyEjFx8dr4sSJ1lp/f38tXbpUjz76qGJjY1WzZk0lJCTo2WeftdY0b95cy5Yt05gxY/T666+rcePGeuedd+RwOKw1AwcOVE5OjpKTk+V0OtWxY0elpqZ6nFD82muvyc/PT/Hx8SooKJDD4dCbb75ZRa8OAADw1iV1HRvTcR0b4OJxHRvg8vSXvI4NAADAxSBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMn4bN1KlT1aVLF9WuXVsNGzZUv379tH37do81J0+eVGJiourVq6datWopPj5e+/fv91ize/du9enTRzVq1FDDhg01duxYnTp1ymPN6tWr1alTJwUGBqply5ZKSUkpM8/s2bPVrFkzBQUFKTo6Wt9++63XswAAAN/xadisWbNGiYmJ+uabb5SWlqaioiL16tVL+fn51poxY8ZoyZIlWrhwodasWaO9e/eqf//+1v7i4mL16dNHhYWFWrdund577z2lpKQoOTnZWpOdna0+ffqoR48e2rRpk0aPHq2HHnpIy5cvt9YsWLBASUlJmjRpkjZu3KgOHTrI4XDowIEDFZ4FAAD4ls3tdrt9PUSpnJwcNWzYUGvWrFG3bt2Ul5enBg0aaP78+brzzjslSVlZWWrTpo0yMjIUExOjL774Qrfddpv27t2rsLAwSdLcuXM1fvx45eTkKCAgQOPHj9eyZcu0detW67EGDRqk3NxcpaamSpKio6PVpUsXzZo1S5JUUlKiyMhIjRw5Uk8++WSFZvmjgoICFRQUWPddLpciIyOVl5cnu91e6a9f1Nh5lX5M4FKTOf0BX48AwAdcLpdCQkLO+2/oJXWOTV5eniSpbt26kqTMzEwVFRUpLi7OWtO6dWs1adJEGRkZkqSMjAy1b9/eihpJcjgccrlc2rZtm7XmzGOUrik9RmFhoTIzMz3W+Pn5KS4uzlpTkVn+aOrUqQoJCbFukZGRF/bCAACACrlkwqakpESjR4/W9ddfr3bt2kmSnE6nAgICFBoa6rE2LCxMTqfTWnNm1JTuL913rjUul0snTpzQwYMHVVxcXO6aM49xvln+aMKECcrLy7Nue/bsqeCrAQAALkQ1Xw9QKjExUVu3btVXX33l61EqTWBgoAIDA309BgAAl41L4h2bESNGaOnSpVq1apUaN25sbQ8PD1dhYaFyc3M91u/fv1/h4eHWmj9+M6n0/vnW2O12BQcHq379+vL39y93zZnHON8sAADAt3waNm63WyNGjNCiRYu0cuVKNW/e3GN/VFSUqlevrvT0dGvb9u3btXv3bsXGxkqSYmNjtWXLFo9vL6Wlpclut6tt27bWmjOPUbqm9BgBAQGKioryWFNSUqL09HRrTUVmAQAAvuXTj6ISExM1f/58ffbZZ6pdu7Z1rkpISIiCg4MVEhKioUOHKikpSXXr1pXdbtfIkSMVGxtrfQupV69eatu2re6//35NmzZNTqdTEydOVGJiovUx0COPPKJZs2Zp3LhxevDBB7Vy5Up99NFHWrZsmTVLUlKSEhIS1LlzZ1177bWaMWOG8vPzNWTIEGum880CAAB8y6dhM2fOHElS9+7dPba/++67Gjx4sCTptddek5+fn+Lj41VQUCCHw6E333zTWuvv76+lS5fq0UcfVWxsrGrWrKmEhAQ9++yz1prmzZtr2bJlGjNmjF5//XU1btxY77zzjhwOh7Vm4MCBysnJUXJyspxOpzp27KjU1FSPE4rPNwsAAPCtS+o6Nqar6HfwLxTXscHlgOvYAJenv+R1bAAAAC4GYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMIbXYXPixAkdP37cuv/bb79pxowZWrFiRaUOBgAA4C2vw+b222/XvHnzJEm5ubmKjo7WK6+8ottvv11z5syp9AEBAAAqyuuw2bhxo7p27SpJ+vjjjxUWFqbffvtN8+bN08yZMyt9QAAAgIryOmyOHz+u2rVrS5JWrFih/v37y8/PTzExMfrtt98qfUAAAICK8jpsWrZsqcWLF2vPnj1avny5evXqJUk6cOCA7HZ7pQ8IAABQUV6HTXJysp544gk1a9ZM1157rWJjYyWdfvfm73//e6UPCAAAUFHVvP2BO++8UzfccIP27dunDh06WNt79uypO+64o1KHAwAA8MYFXccmPDxctWvXVlpamk6cOCFJ6tKli1q3bl2pwwEAAHjD67A5dOiQevbsqSuvvFK33nqr9u3bJ0kaOnSoHn/88UofEAAAoKK8DpsxY8aoevXq2r17t2rUqGFtHzhwoFJTUyt1OAAAAG94fY7NihUrtHz5cjVu3Nhje6tWrfi6NwAA8Cmv37HJz8/3eKem1OHDhxUYGFgpQwEAAFwIr8Oma9eu1q9UkCSbzaaSkhJNmzZNPXr0qNThAAAAvOF12EybNk1vvfWWbrnlFhUWFmrcuHFq166d1q5dq5dfftmrY61du1Z9+/ZVRESEbDabFi9e7LF/8ODBstlsHrfevXt7rDl8+LDuvfde2e12hYaGaujQoTp27JjHms2bN6tr164KCgpSZGSkpk2bVmaWhQsXqnXr1goKClL79u31n//8x2O/2+1WcnKyGjVqpODgYMXFxWnHjh1ePV8AAFC1vA6bdu3a6eeff9YNN9yg22+/Xfn5+erfv7++//57tWjRwqtj5efnq0OHDpo9e/ZZ1/Tu3Vv79u2zbv/7v//rsf/ee+/Vtm3blJaWpqVLl2rt2rUaPny4td/lcqlXr15q2rSpMjMzNX36dE2ePFlvvfWWtWbdunW6++67NXToUH3//ffq16+f+vXrp61bt1prpk2bppkzZ2ru3Llav369atasKYfDoZMnT3r1nAEAQNWxud1ut6+HkE5/pLVo0SL169fP2jZ48GDl5uaWeSen1E8//aS2bdtqw4YN6ty5syQpNTVVt956q37//XdFRERozpw5evrpp+V0OhUQECBJevLJJ7V48WJlZWVJOv2Nrvz8fC1dutQ6dkxMjDp27Ki5c+fK7XYrIiJCjz/+uJ544glJUl5ensLCwpSSkqJBgwZV6Dm6XC6FhIQoLy+vSn79RNTYeedfBPzFZU5/wNcjAPCBiv4b6vW3otauXXvO/d26dfP2kOe0evVqNWzYUHXq1NFNN92k559/XvXq1ZMkZWRkKDQ01IoaSYqLi5Ofn5/Wr1+vO+64QxkZGerWrZsVNZLkcDj08ssv68iRI6pTp44yMjKUlJTk8bgOh8MKquzsbDmdTsXFxVn7Q0JCFB0drYyMjLOGTUFBgQoKCqz7Lpfrol8PAABwdl6HTffu3ctss9ls1p+Li4svaqAz9e7dW/3791fz5s21c+dOPfXUU7rllluUkZEhf39/OZ1ONWzY0ONnqlWrprp168rpdEqSnE6nmjdv7rEmLCzM2lenTh05nU5r25lrzjzGmT9X3pryTJ06VVOmTLmAZw4AAC6E12Fz5MgRj/tFRUX6/vvv9cwzz+iFF16otMEkebwT0r59e11zzTVq0aKFVq9erZ49e1bqY1WFCRMmeLwT5HK5FBkZ6cOJAAAwm9dhExISUmbbzTffrICAACUlJSkzM7NSBivPFVdcofr16+uXX35Rz549FR4ergMHDnisOXXqlA4fPqzw8HBJp3+v1f79+z3WlN4/35oz95dua9Sokceajh07nnXewMBAru0DAMCf6IJ+CWZ5wsLCtH379so6XLl+//13HTp0yIqL2NhY5ebmesTUypUrVVJSoujoaGvN2rVrVVRUZK1JS0vTVVddpTp16lhr0tPTPR4rLS1NsbGxkqTmzZsrPDzcY43L5dL69eutNQAAwPe8fsdm8+bNHvfdbrf27dunl1566ZzvXpTn2LFj+uWXX6z72dnZ2rRpk+rWrau6detqypQpio+PV3h4uHbu3Klx48apZcuWcjgckqQ2bdqod+/eGjZsmObOnauioiKNGDFCgwYNUkREhCTpnnvu0ZQpUzR06FCNHz9eW7du1euvv67XXnvNetzHHntMN954o1555RX16dNH//d//6fvvvvO+kq4zWbT6NGj9fzzz6tVq1Zq3ry5nnnmGUVERHh8iwsAAPiW12HTsWNH2Ww2/fFb4jExMfr3v//t1bG+++47j6sVl56PkpCQoDlz5mjz5s167733lJubq4iICPXq1UvPPfecx8c7H374oUaMGKGePXvKz89P8fHxmjlzprU/JCREK1asUGJioqKiolS/fn0lJyd7XOvmuuuu0/z58zVx4kQ99dRTatWqlRYvXqx27dpZa8aNG6f8/HwNHz5cubm5uuGGG5SamqqgoCCvnjMAAKg6Xl/H5o+/6NLPz08NGjTgH/gK4Do2wMXjOjbA5anKrmPTtGnTixoMAACgqlQobM78aOd8Ro0adcHDAAAAXIwKhc2ZJ9qei81mI2wAAIDPVChssrOzq3oOAACAi1Zp17EBAADwNa9PHpZOXyjv888/1+7du1VYWOix79VXX62UwQAAALzlddikp6frH//4h6644gplZWWpXbt22rVrl9xutzp16lQVMwIAAFSI1x9FTZgwQU888YS2bNmioKAgffLJJ9qzZ49uvPFGDRgwoCpmBAAAqBCvw+ann37SAw+cvkBWtWrVdOLECdWqVUvPPvusXn755UofEAAAoKK8DpuaNWta59U0atRIO3futPYdPHiw8iYDAADwktfn2MTExOirr75SmzZtdOutt+rxxx/Xli1b9OmnnyomJqYqZgQAAKgQr8Pm1Vdf1bFjxyRJU6ZM0bFjx7RgwQK1atWKb0QBAACf8jpsXnzxRd13332STn8sNXfu3EofCgAA4EJ4fY5NTk6OevfurcjISI0dO1Y//PBDVcwFAADgNa/D5rPPPtO+ffv0zDPPaMOGDerUqZOuvvpqvfjii9q1a1cVjAgAAFAxF/QrFerUqaPhw4dr9erV+u233zR48GC9//77atmyZWXPBwAAUGEX9buiioqK9N1332n9+vXatWuXwsLCKmsuAAAAr11Q2KxatUrDhg1TWFiYBg8eLLvdrqVLl+r333+v7PkAAAAqzOtvRf3tb3/T4cOH1bt3b7311lvq27evAgMDq2I2AAAAr3gdNpMnT9aAAQMUGhpaBeMAAABcOK/DZtiwYVUxBwAAwEW7qJOHAQAALiWEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADCGT8Nm7dq16tu3ryIiImSz2bR48WKP/W63W8nJyWrUqJGCg4MVFxenHTt2eKw5fPiw7r33XtntdoWGhmro0KE6duyYx5rNmzera9euCgoKUmRkpKZNm1ZmloULF6p169YKCgpS+/bt9Z///MfrWQAAgG/5NGzy8/PVoUMHzZ49u9z906ZN08yZMzV37lytX79eNWvWlMPh0MmTJ6019957r7Zt26a0tDQtXbpUa9eu1fDhw639LpdLvXr1UtOmTZWZmanp06dr8uTJeuutt6w169at0913362hQ4fq+++/V79+/dSvXz9t3brVq1kAAIBv2dxut9vXQ0iSzWbTokWL1K9fP0mn3yGJiIjQ448/rieeeEKSlJeXp7CwMKWkpGjQoEH66aef1LZtW23YsEGdO3eWJKWmpurWW2/V77//roiICM2ZM0dPP/20nE6nAgICJElPPvmkFi9erKysLEnSwIEDlZ+fr6VLl1rzxMTEqGPHjpo7d26FZqkIl8ulkJAQ5eXlyW63V8rrdqaosfMq/ZjApSZz+gO+HgGAD1T039BL9hyb7OxsOZ1OxcXFWdtCQkIUHR2tjIwMSVJGRoZCQ0OtqJGkuLg4+fn5af369daabt26WVEjSQ6HQ9u3b9eRI0esNWc+Tuma0sepyCzlKSgokMvl8rgBAICqc8mGjdPplCSFhYV5bA8LC7P2OZ1ONWzY0GN/tWrVVLduXY815R3jzMc425oz959vlvJMnTpVISEh1i0yMvI8zxoAAFyMSzZsTDBhwgTl5eVZtz179vh6JAAAjHbJhk14eLgkaf/+/R7b9+/fb+0LDw/XgQMHPPafOnVKhw8f9lhT3jHOfIyzrTlz//lmKU9gYKDsdrvHDQAAVJ1LNmyaN2+u8PBwpaenW9tcLpfWr1+v2NhYSVJsbKxyc3OVmZlprVm5cqVKSkoUHR1trVm7dq2KioqsNWlpabrqqqtUp04da82Zj1O6pvRxKjILAADwPZ+GzbFjx7Rp0yZt2rRJ0umTdDdt2qTdu3fLZrNp9OjRev755/X5559ry5YteuCBBxQREWF9c6pNmzbq3bu3hg0bpm+//VZff/21RowYoUGDBikiIkKSdM899yggIEBDhw7Vtm3btGDBAr3++utKSkqy5njssceUmpqqV155RVlZWZo8ebK+++47jRgxQpIqNAsAAPC9ar588O+++049evSw7pfGRkJCglJSUjRu3Djl5+dr+PDhys3N1Q033KDU1FQFBQVZP/Phhx9qxIgR6tmzp/z8/BQfH6+ZM2da+0NCQrRixQolJiYqKipK9evXV3Jysse1bq677jrNnz9fEydO1FNPPaVWrVpp8eLFateunbWmIrMAAADfumSuY3M54Do2wMXjOjbA5ekvfx0bAAAAbxE2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADDGJR02kydPls1m87i1bt3a2n/y5EklJiaqXr16qlWrluLj47V//36PY+zevVt9+vRRjRo11LBhQ40dO1anTp3yWLN69Wp16tRJgYGBatmypVJSUsrMMnv2bDVr1kxBQUGKjo7Wt99+WyXPGQAAXLhLOmwk6eqrr9a+ffus21dffWXtGzNmjJYsWaKFCxdqzZo12rt3r/r372/tLy4uVp8+fVRYWKh169bpvffeU0pKipKTk6012dnZ6tOnj3r06KFNmzZp9OjReuihh7R8+XJrzYIFC5SUlKRJkyZp48aN6tChgxwOhw4cOPDnvAgAAKBCbG632+3rIc5m8uTJWrx4sTZt2lRmX15enho0aKD58+frzjvvlCRlZWWpTZs2ysjIUExMjL744gvddttt2rt3r8LCwiRJc+fO1fjx45WTk6OAgACNHz9ey5Yt09atW61jDxo0SLm5uUpNTZUkRUdHq0uXLpo1a5YkqaSkRJGRkRo5cqSefPLJCj8fl8ulkJAQ5eXlyW63X+jLclZRY+dV+jGBS03m9Ad8PQIAH6jov6GX/Ds2O3bsUEREhK644grde++92r17tyQpMzNTRUVFiouLs9a2bt1aTZo0UUZGhiQpIyND7du3t6JGkhwOh1wul7Zt22atOfMYpWtKj1FYWKjMzEyPNX5+foqLi7PWnE1BQYFcLpfHDQAAVJ1LOmyio6OVkpKi1NRUzZkzR9nZ2eratauOHj0qp9OpgIAAhYaGevxMWFiYnE6nJMnpdHpETen+0n3nWuNyuXTixAkdPHhQxcXF5a4pPcbZTJ06VSEhIdYtMjLS69cAAABUXDVfD3Aut9xyi/Xna665RtHR0WratKk++ugjBQcH+3CyipkwYYKSkpKs+y6Xi7gBAKAKXdLv2PxRaGiorrzySv3yyy8KDw9XYWGhcnNzPdbs379f4eHhkqTw8PAy35IqvX++NXa7XcHBwapfv778/f3LXVN6jLMJDAyU3W73uAEAgKrzlwqbY8eOaefOnWrUqJGioqJUvXp1paenW/u3b9+u3bt3KzY2VpIUGxurLVu2eHx7KS0tTXa7XW3btrXWnHmM0jWlxwgICFBUVJTHmpKSEqWnp1trAADApeGSDpsnnnhCa9as0a5du7Ru3Trdcccd8vf31913362QkBANHTpUSUlJWrVqlTIzMzVkyBDFxsYqJiZGktSrVy+1bdtW999/v3744QctX75cEydOVGJiogIDAyVJjzzyiH799VeNGzdOWVlZevPNN/XRRx9pzJgx1hxJSUl6++239d577+mnn37So48+qvz8fA0ZMsQnrwsAACjfJX2Oze+//667775bhw4dUoMGDXTDDTfom2++UYMGDSRJr732mvz8/BQfH6+CggI5HA69+eab1s/7+/tr6dKlevTRRxUbG6uaNWsqISFBzz77rLWmefPmWrZsmcaMGaPXX39djRs31jvvvCOHw2GtGThwoHJycpScnCyn06mOHTsqNTW1zAnFAADAty7p69iYhuvYABeP69gAlydjrmMDAABQUYQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGNU8/UAAHC5iBo7z9cjAFUuc/oDPn183rEBAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbL82ePVvNmjVTUFCQoqOj9e233/p6JAAA8P8jbLywYMECJSUladKkSdq4caM6dOggh8OhAwcO+Ho0AAAgwsYrr776qoYNG6YhQ4aobdu2mjt3rmrUqKF///vfvh4NAABIqubrAf4qCgsLlZmZqQkTJljb/Pz8FBcXp4yMjHJ/pqCgQAUFBdb9vLw8SZLL5aqSGYsLTlTJcYFLSVX9/fkz8HcUl4Oq+jtaely3233OdYRNBR08eFDFxcUKCwvz2B4WFqasrKxyf2bq1KmaMmVKme2RkZFVMiNwOQh54xFfjwDgHKr67+jRo0cVEhJy1v2ETRWaMGGCkpKSrPslJSU6fPiw6tWrJ5vN5sPJUBlcLpciIyO1Z88e2e12X48D4A/4O2oWt9uto0ePKiIi4pzrCJsKql+/vvz9/bV//36P7fv371d4eHi5PxMYGKjAwECPbaGhoVU1InzEbrfzH03gEsbfUXOc652aUpw8XEEBAQGKiopSenq6ta2kpETp6emKjY314WQAAKAU79h4ISkpSQkJCercubOuvfZazZgxQ/n5+RoyZIivRwMAACJsvDJw4EDl5OQoOTlZTqdTHTt2VGpqapkTinF5CAwM1KRJk8p83Ajg0sDf0cuTzX2+700BAAD8RXCODQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNcIFmz56tZs2aKSgoSNHR0fr22299PRIASWvXrlXfvn0VEREhm82mxYsX+3ok/IkIG+ACLFiwQElJSZo0aZI2btyoDh06yOFw6MCBA74eDbjs5efnq0OHDpo9e7avR4EP8HVv4AJER0erS5cumjVrlqTTV6GOjIzUyJEj9eSTT/p4OgClbDabFi1apH79+vl6FPxJeMcG8FJhYaEyMzMVFxdnbfPz81NcXJwyMjJ8OBkAgLABvHTw4EEVFxeXueJ0WFiYnE6nj6YCAEiEDQAAMAhhA3ipfv368vf31/79+z2279+/X+Hh4T6aCgAgETaA1wICAhQVFaX09HRrW0lJidLT0xUbG+vDyQAA/HZv4AIkJSUpISFBnTt31rXXXqsZM2YoPz9fQ4YM8fVowGXv2LFj+uWXX6z72dnZ2rRpk+rWrasmTZr4cDL8Gfi6N3CBZs2apenTp8vpdKpjx46aOXOmoqOjfT0WcNlbvXq1evToUWZ7QkKCUlJS/vyB8KcibAAAgDE4xwYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGwF9a9+7dNXr06AqtXb16tWw2m3Jzcy/qMZs1a6YZM2Zc1DEAVA3CBgAAGIOwAQAAxiBsABjj/fffV+fOnVW7dm2Fh4frnnvu0YEDB8qs+/rrr3XNNdcoKChIMTEx2rp1q8f+r776Sl27dlVwcLAiIyM1atQo5efnl/uYbrdbkydPVpMmTRQYGKiIiAiNGjWqSp4fgPMjbAAYo6ioSM8995x++OEHLV68WLt27dLgwYPLrBs7dqxeeeUVbdiwQQ0aNFDfvn1VVFQkSdq5c6d69+6t+Ph4bd68WQsWLNBXX32lESNGlPuYn3zyiV577TX9z//8j3bs2KHFixerffv2Vfk0AZxDNV8PAACV5cEHH7T+fMUVV2jmzJnq0qWLjh07plq1aln7Jk2apJtvvlmS9N5776lx48ZatGiR7rrrLk2dOlX33nuvdUJyq1atNHPmTN14442aM2eOgoKCPB5z9+7dCg8PV1xcnKpXr64mTZro2muvrfonC6BcvGMDwBiZmZnq27evmjRpotq1a+vGG2+UdDo+zhQbG2v9uW7durrqqqv0008/SZJ++OEHpaSkqFatWtbN4XCopKRE2dnZZR5zwIABOnHihK644goNGzZMixYt0qlTp6rwWQI4F8IGgBHy8/PlcDhkt9v14YcfasOGDVq0aJEkqbCwsMLHOXbsmB5++GFt2rTJuv3www/asWOHWrRoUWZ9ZGSktm/frjfffFPBwcH65z//qW7dulkfbQH4c/FRFAAjZGVl6dChQ3rppZcUGRkpSfruu+/KXfvNN9+oSZMmkqQjR47o559/Vps2bSRJnTp10o8//qiWLVtW+LGDg4PVt29f9e3bV4mJiWrdurW2bNmiTp06XeSzAuAtwgaAEZo0aaKAgAC98cYbeuSRR7R161Y999xz5a599tlnVa9ePYWFhenpp59W/fr11a9fP0nS+PHjFRMToxEjRuihhx5SzZo19eOPPyotLU2zZs0qc6yUlBQVFxcrOjpaNWrU0AcffKDg4GA1bdq0Kp8ugLPgoygARmjQoIFSUlK0cOFCtW3bVi+99JL+9a9/lbv2pZde0mOPPaaoqCg5nU4tWbJEAQEBkqRrrrlGa9as0c8//6yuXbvq73//u5KTkxUREVHusUJDQ/X222/r+uuv1zXXXKMvv/xSS5YsUb169arsuQI4O5vb7Xb7eggAAIDKwDs2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjPH/AVSrgG7nWmoXAAAAAElFTkSuQmCC\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "### 1. Gestion de la Vivienda\n", "\n", "Voy a eliminar las columnas de _Mode y _Medi para quedarme solo con _AVG, ya que dan el mismo tipo de informacion" ], "metadata": { "id": "lG4hjq_SUFsn" } }, { "cell_type": "code", "source": [ "columnas_eliminar = [c for c in df_train.columns if c.endswith('_MODE') or c.endswith('_MEDI')]\n", "df_train.drop(columns=columnas_eliminar, inplace=True)\n", "print(f\"Columnas eliminadas por redundancia: {len(columnas_eliminar)}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pZ8BGVerUXV8", "outputId": "34ec58e2-afd4-4d80-f7e2-39ebb7686965" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Columnas eliminadas por redundancia: 33\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 2. Gestion de OWN_CAR_AGE\n", "\n", "Si en esta es un nulo, voy a asumir que es un 0, ya que esto es consistente con no tener coche o un coche nuevo sin dato" ], "metadata": { "id": "AGyHbwrtU3lm" } }, { "cell_type": "code", "source": [ "df_train['OWN_CAR_AGE'].fillna(0, inplace=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OV7FihYbVE9O", "outputId": "5479a7a6-259a-43ec-9b77-17da246d09f2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-786619477.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train['OWN_CAR_AGE'].fillna(0, inplace=True)\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 3. Gestion de OCCUPATION_TYPE\n", "\n", "En esta voy a rellenar con una nueva categoria que sea 'Unknown'" ], "metadata": { "id": "2bg9oJWQVbGm" } }, { "cell_type": "code", "source": [ "df_train['OCCUPATION_TYPE'].fillna('Unknown', inplace=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5tERMTqyVlkC", "outputId": "61ddec11-6b6d-4696-d53c-fe87e3492c00" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-552318821.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train['OCCUPATION_TYPE'].fillna('Unknown', inplace=True)\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 4. Imputacion Inteligente (Voy a crear una columna nueva llamada FLAG para indicar el riesgo + aplicacion de al MEDIANA)" ], "metadata": { "id": "O28gQBQ5brts" } }, { "cell_type": "code", "source": [ "# 1. Voy a identificar las columnas numericas que TODAVIA tienen nulos\n", "cols_numericas_con_nulos = df_train.select_dtypes(include=['float64', 'int64']).columns[df_train.select_dtypes(include=['float64', 'int64']).isnull().any()]\n", "\n", "print(f\"Imputamos {len(cols_numericas_con_nulos)} columnas numericas...\")\n", "\n", "for col in cols_numericas_con_nulos:\n", " # A. Voy a crear la tabla 'Bandera' (Flag)\n", " df_train[f'{col}_ISNAN'] = df_train[col].isna().astype(int)\n", "\n", " # B. Imputar el valor con la mediana\n", " # Voy a usar la mediana ya que es robusta a valores extremos (Outliers)\n", " mediana = df_train[col].median()\n", " df_train[col].fillna(mediana, inplace=True)\n", "\n", "print(\"Imputacion completa\")\n", "\n", "# Verificacion final de la Limpieza de Datos\n", "print(f\"Total de nulos restantes en el dataset: {df_train.isnull().sum().sum()}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pg1VFSVBcDWx", "outputId": "ba5623be-5a82-4661-c237-324d3b7a5e9f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Imputamos 31 columnas numericas...\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n", "/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " df_train[col].fillna(mediana, inplace=True)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Imputacion completa\n", "Total de nulos restantes en el dataset: 1292\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 5. Gestion NAME_TYPE_SUITE (Categorica)\n", "\n", "En esta voy a rellenarla con la moda (Esta es el valor que mas se repite)" ], "metadata": { "id": "fbcZvJZneLJU" } }, { "cell_type": "code", "source": [ "moda_suite = df_train['NAME_TYPE_SUITE'].mode()[0]\n", "\n", "df_train['NAME_TYPE_SUITE'] = df_train['NAME_TYPE_SUITE'].fillna(moda_suite)\n", "\n", "print(f\"Valor usado para rellenar: {moda_suite}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZaT2N8coeWPJ", "outputId": "2bb4bd52-a00f-4ca3-ef89-042080e0abe5" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Valor usado para rellenar: Unaccompanied\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 6. Verificacion final de nulos" ], "metadata": { "id": "O-yXhgCPepj2" } }, { "cell_type": "code", "source": [ "total_nulos = df_train.isna().sum().sum()\n", "\n", "print(f\"Total de nulos en el dataset: {total_nulos}\")\n", "\n", "if total_nulos == 0:\n", " print(\"Dataset totalmente limpio\")\n", "else:\n", " print(\"Aun quedan nulos\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UlX4snXNetZU", "outputId": "a396d283-8f7d-4ad4-bb87-ffacfa6b001a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Total de nulos en el dataset: 0\n", "Dataset totalmente limpio\n" ] } ] }, { "cell_type": "code", "source": [ "df_column_description.head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "HnytRL-ms37a", "outputId": "1e9b0bd6-ce54-44c5-9309-ea34bb72c043" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 Table Row \\\n", "0 1 application_{train|test}.csv SK_ID_CURR \n", "1 2 application_{train|test}.csv TARGET \n", "2 5 application_{train|test}.csv NAME_CONTRACT_TYPE \n", "3 6 application_{train|test}.csv CODE_GENDER \n", "4 7 application_{train|test}.csv FLAG_OWN_CAR \n", "5 8 application_{train|test}.csv FLAG_OWN_REALTY \n", "6 9 application_{train|test}.csv CNT_CHILDREN \n", "7 10 application_{train|test}.csv AMT_INCOME_TOTAL \n", "8 11 application_{train|test}.csv AMT_CREDIT \n", "9 12 application_{train|test}.csv AMT_ANNUITY \n", "\n", " Description Special \n", "0 ID of loan in our sample NaN \n", "1 Target variable (1 - client with payment diffi... NaN \n", "2 Identification if loan is cash or revolving NaN \n", "3 Gender of the client NaN \n", "4 Flag if the client owns a car NaN \n", "5 Flag if client owns a house or flat NaN \n", "6 Number of children the client has NaN \n", "7 Income of the client NaN \n", "8 Credit amount of the loan NaN \n", "9 Loan annuity NaN " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0TableRowDescriptionSpecial
01application_{train|test}.csvSK_ID_CURRID of loan in our sampleNaN
12application_{train|test}.csvTARGETTarget variable (1 - client with payment diffi...NaN
25application_{train|test}.csvNAME_CONTRACT_TYPEIdentification if loan is cash or revolvingNaN
36application_{train|test}.csvCODE_GENDERGender of the clientNaN
47application_{train|test}.csvFLAG_OWN_CARFlag if the client owns a carNaN
58application_{train|test}.csvFLAG_OWN_REALTYFlag if client owns a house or flatNaN
69application_{train|test}.csvCNT_CHILDRENNumber of children the client hasNaN
710application_{train|test}.csvAMT_INCOME_TOTALIncome of the clientNaN
811application_{train|test}.csvAMT_CREDITCredit amount of the loanNaN
912application_{train|test}.csvAMT_ANNUITYLoan annuityNaN
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_column_description", "summary": "{\n \"name\": \"df_column_description\",\n \"rows\": 219,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 63,\n \"min\": 1,\n \"max\": 221,\n \"num_unique_values\": 219,\n \"samples\": [\n 157,\n 96,\n 219\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Table\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"application_{train|test}.csv\",\n \"bureau.csv\",\n \"previous_application.csv\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Row\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 196,\n \"samples\": [\n \"SK_ID_PREV \",\n \"FLAG_DOCUMENT_19\",\n \"REGION_POPULATION_RELATIVE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Description\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 163,\n \"samples\": [\n \"Purpose of the cash loan\",\n \"Number of other drawings during this month on the previous credit\",\n \"Flag if the application was the last application per day of the client. Sometimes clients apply for more applications a day. Rarely it could also be error in our system that one application is in the database twice\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Special\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"normalized \",\n \"time only relative to the application\",\n \"recoded\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 29 } ] }, { "cell_type": "markdown", "source": [ "# Encoding" ], "metadata": { "id": "jcxN2ZpSVNg1" } }, { "cell_type": "code", "source": [ "df_train.head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 444 }, "id": "OzgTEngRgqqC", "outputId": "d925cb07-61f5-4eeb-e2a7-6ffcca824ec9" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "SK_ID_CURR \n", "100002 1 Cash loans M N \n", "100003 0 Cash loans F N \n", "100004 0 Revolving loans M Y \n", "100006 0 Cash loans F N \n", "100007 0 Cash loans M N \n", "100008 0 Cash loans M N \n", "100009 0 Cash loans F Y \n", "100010 0 Cash loans M Y \n", "100011 0 Cash loans F N \n", "100012 0 Revolving loans M N \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT \\\n", "SK_ID_CURR \n", "100002 Y 0 202500.0 406597.5 \n", "100003 N 0 270000.0 1293502.5 \n", "100004 Y 0 67500.0 135000.0 \n", "100006 Y 0 135000.0 312682.5 \n", "100007 Y 0 121500.0 513000.0 \n", "100008 Y 0 99000.0 490495.5 \n", "100009 Y 1 171000.0 1560726.0 \n", "100010 Y 0 360000.0 1530000.0 \n", "100011 Y 0 112500.0 1019610.0 \n", "100012 Y 0 135000.0 405000.0 \n", "\n", " AMT_ANNUITY AMT_GOODS_PRICE ... DEF_30_CNT_SOCIAL_CIRCLE_ISNAN \\\n", "SK_ID_CURR ... \n", "100002 24700.5 351000.0 ... 0 \n", "100003 35698.5 1129500.0 ... 0 \n", "100004 6750.0 135000.0 ... 0 \n", "100006 29686.5 297000.0 ... 0 \n", "100007 21865.5 513000.0 ... 0 \n", "100008 27517.5 454500.0 ... 0 \n", "100009 41301.0 1395000.0 ... 0 \n", "100010 42075.0 1530000.0 ... 0 \n", "100011 33826.5 913500.0 ... 0 \n", "100012 20250.0 405000.0 ... 0 \n", "\n", " OBS_60_CNT_SOCIAL_CIRCLE_ISNAN DEF_60_CNT_SOCIAL_CIRCLE_ISNAN \\\n", "SK_ID_CURR \n", "100002 0 0 \n", "100003 0 0 \n", "100004 0 0 \n", "100006 0 0 \n", "100007 0 0 \n", "100008 0 0 \n", "100009 0 0 \n", "100010 0 0 \n", "100011 0 0 \n", "100012 0 0 \n", "\n", " DAYS_LAST_PHONE_CHANGE_ISNAN AMT_REQ_CREDIT_BUREAU_HOUR_ISNAN \\\n", "SK_ID_CURR \n", "100002 0 0 \n", "100003 0 0 \n", "100004 0 0 \n", "100006 0 1 \n", "100007 0 0 \n", "100008 0 0 \n", "100009 0 0 \n", "100010 0 0 \n", "100011 0 0 \n", "100012 0 1 \n", "\n", " AMT_REQ_CREDIT_BUREAU_DAY_ISNAN AMT_REQ_CREDIT_BUREAU_WEEK_ISNAN \\\n", "SK_ID_CURR \n", "100002 0 0 \n", "100003 0 0 \n", "100004 0 0 \n", "100006 1 1 \n", "100007 0 0 \n", "100008 0 0 \n", "100009 0 0 \n", "100010 0 0 \n", "100011 0 0 \n", "100012 1 1 \n", "\n", " AMT_REQ_CREDIT_BUREAU_MON_ISNAN AMT_REQ_CREDIT_BUREAU_QRT_ISNAN \\\n", "SK_ID_CURR \n", "100002 0 0 \n", "100003 0 0 \n", "100004 0 0 \n", "100006 1 1 \n", "100007 0 0 \n", "100008 0 0 \n", "100009 0 0 \n", "100010 0 0 \n", "100011 0 0 \n", "100012 1 1 \n", "\n", " AMT_REQ_CREDIT_BUREAU_YEAR_ISNAN \n", "SK_ID_CURR \n", "100002 0 \n", "100003 0 \n", "100004 0 \n", "100006 1 \n", "100007 0 \n", "100008 0 \n", "100009 0 \n", "100010 0 \n", "100011 0 \n", "100012 1 \n", "\n", "[10 rows x 119 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICE...DEF_30_CNT_SOCIAL_CIRCLE_ISNANOBS_60_CNT_SOCIAL_CIRCLE_ISNANDEF_60_CNT_SOCIAL_CIRCLE_ISNANDAYS_LAST_PHONE_CHANGE_ISNANAMT_REQ_CREDIT_BUREAU_HOUR_ISNANAMT_REQ_CREDIT_BUREAU_DAY_ISNANAMT_REQ_CREDIT_BUREAU_WEEK_ISNANAMT_REQ_CREDIT_BUREAU_MON_ISNANAMT_REQ_CREDIT_BUREAU_QRT_ISNANAMT_REQ_CREDIT_BUREAU_YEAR_ISNAN
SK_ID_CURR
1000021Cash loansMNY0202500.0406597.524700.5351000.0...0000000000
1000030Cash loansFNN0270000.01293502.535698.51129500.0...0000000000
1000040Revolving loansMYY067500.0135000.06750.0135000.0...0000000000
1000060Cash loansFNY0135000.0312682.529686.5297000.0...0000111111
1000070Cash loansMNY0121500.0513000.021865.5513000.0...0000000000
1000080Cash loansMNY099000.0490495.527517.5454500.0...0000000000
1000090Cash loansFYY1171000.01560726.041301.01395000.0...0000000000
1000100Cash loansMYY0360000.01530000.042075.01530000.0...0000000000
1000110Cash loansFNY0112500.01019610.033826.5913500.0...0000000000
1000120Revolving loansMNY0135000.0405000.020250.0405000.0...0000111111
\n", "

10 rows × 119 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_train" } }, "metadata": {}, "execution_count": 30 } ] }, { "cell_type": "markdown", "source": [ "Eliminar el XNA" ], "metadata": { "id": "MYjHGCmzXsNC" } }, { "cell_type": "code", "source": [ "# Limpieza específica de Género\n", "# Eliminamos las filas donde el género es 'XNA' (son errores de datos)\n", "df_train = df_train[df_train['CODE_GENDER'] != 'XNA']" ], "metadata": { "id": "-MVU9v1OXrbz" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### 1. Label Encoding en FLAG_OWN_CAR" ], "metadata": { "id": "FpKQdu0Hf-fS" } }, { "cell_type": "code", "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", "# Voy a crear el objeto Encoder\n", "le = LabelEncoder()\n", "le_count = 0\n", "\n", "# 1. Aplicamos Label Encoder (Solo para las variables binarias 'Si/No' 'M/F')\n", "# Hacemo un bucle\n", "for col in df_train.columns:\n", " # Si es de tipo texto (Object)\n", " if df_train[col].dtype == 'object':\n", " # Si tiene 2 o menos valores unicos\n", " if len(list(df_train[col].unique())) <= 2:\n", " # Entrenamos y transformamos\n", " le.fit(df_train[col])\n", " df_train[col] = le.transform(df_train[col])\n", " le_count += 1\n", " print(f\"Label Encoding Aplicado a {col}\")\n", "\n", "print(f'{le_count} columnas fueron preprocesadas con Label Encoding')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1bfO0NZOs6JE", "outputId": "acff5571-6e7e-4fd4-e830-9ea25cbbefb0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-753042584.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_train[col] = le.transform(df_train[col])\n", "/tmp/ipython-input-753042584.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_train[col] = le.transform(df_train[col])\n", "/tmp/ipython-input-753042584.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_train[col] = le.transform(df_train[col])\n", "/tmp/ipython-input-753042584.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_train[col] = le.transform(df_train[col])\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Label Encoding Aplicado a NAME_CONTRACT_TYPE\n", "Label Encoding Aplicado a CODE_GENDER\n", "Label Encoding Aplicado a FLAG_OWN_CAR\n", "Label Encoding Aplicado a FLAG_OWN_REALTY\n", "4 columnas fueron preprocesadas con Label Encoding\n" ] } ] }, { "cell_type": "markdown", "source": [ "### 2. OneHotEncoding" ], "metadata": { "id": "iY3fFnQOgwO3" } }, { "cell_type": "code", "source": [ "# Verifico ahora cuantas columnas de texto quedan\n", "cols_texto_restantes = df_train.select_dtypes(include=['object']).columns\n", "print(f\"Columnas de texto pendiente de OHE: {len(cols_texto_restantes)}\")\n", "\n", "# Aplico One-Hot Encoding a TODO lo que quede de texto automaticamente\n", "df_train = pd.get_dummies(df_train)\n", "\n", "print(f\"Dimensiones finales despues de Encoding completo:: {df_train.shape}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "McsQZsZxsMqR", "outputId": "1ba34956-c640-4efa-b9ea-16db7883e5d5" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Columnas de texto pendiente de OHE: 8\n", "Dimensiones finales despues de Encoding completo:: (307507, 227)\n" ] } ] }, { "cell_type": "code", "source": [ "df_train.head(50)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "VRGd4fJ5X9PU", "outputId": "62087a3d-3574-4c3e-89ad-1ff3263bd587" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "SK_ID_CURR \n", "100002 1 0 1 0 \n", "100003 0 0 0 0 \n", "100004 0 1 1 1 \n", "100006 0 0 0 0 \n", "100007 0 0 1 0 \n", "100008 0 0 1 0 \n", "100009 0 0 0 1 \n", "100010 0 0 1 1 \n", "100011 0 0 0 0 \n", "100012 0 1 1 0 \n", "100014 0 0 0 0 \n", "100015 0 0 0 0 \n", "100016 0 0 0 0 \n", "100017 0 0 1 1 \n", "100018 0 0 0 0 \n", "100019 0 0 1 1 \n", "100020 0 0 1 0 \n", "100021 0 1 0 0 \n", "100022 0 1 0 0 \n", "100023 0 0 0 0 \n", "100024 0 1 1 1 \n", "100025 0 0 0 1 \n", "100026 0 0 0 0 \n", "100027 0 0 0 0 \n", "100029 0 0 1 1 \n", "100030 0 0 0 0 \n", "100031 1 0 0 0 \n", "100032 0 0 1 0 \n", "100033 0 0 1 1 \n", "100034 0 1 1 0 \n", "100035 0 0 0 0 \n", "100036 0 0 0 0 \n", "100037 0 0 0 0 \n", "100039 0 0 1 1 \n", "100040 0 0 0 0 \n", "100041 0 0 0 0 \n", "100043 0 0 0 0 \n", "100044 0 0 1 0 \n", "100045 0 0 0 0 \n", "100046 0 1 1 1 \n", "100047 1 0 1 0 \n", "100048 0 0 0 0 \n", "100049 1 0 0 0 \n", "100050 0 0 0 0 \n", "100051 0 0 1 0 \n", "100052 0 1 0 0 \n", "100053 0 0 0 0 \n", "100054 0 0 0 0 \n", "100055 0 0 0 0 \n", "100056 0 0 1 1 \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT \\\n", "SK_ID_CURR \n", "100002 1 0 202500.000 406597.5 \n", "100003 0 0 270000.000 1293502.5 \n", "100004 1 0 67500.000 135000.0 \n", "100006 1 0 135000.000 312682.5 \n", "100007 1 0 121500.000 513000.0 \n", "100008 1 0 99000.000 490495.5 \n", "100009 1 1 171000.000 1560726.0 \n", "100010 1 0 360000.000 1530000.0 \n", "100011 1 0 112500.000 1019610.0 \n", "100012 1 0 135000.000 405000.0 \n", "100014 1 1 112500.000 652500.0 \n", "100015 1 0 38419.155 148365.0 \n", "100016 1 0 67500.000 80865.0 \n", "100017 0 1 225000.000 918468.0 \n", "100018 1 0 189000.000 773680.5 \n", "100019 1 0 157500.000 299772.0 \n", "100020 0 0 108000.000 509602.5 \n", "100021 1 1 81000.000 270000.0 \n", "100022 1 0 112500.000 157500.0 \n", "100023 1 1 90000.000 544491.0 \n", "100024 1 0 135000.000 427500.0 \n", "100025 1 1 202500.000 1132573.5 \n", "100026 0 1 450000.000 497520.0 \n", "100027 1 0 83250.000 239850.0 \n", "100029 0 2 135000.000 247500.0 \n", "100030 1 0 90000.000 225000.0 \n", "100031 1 0 112500.000 979992.0 \n", "100032 1 1 112500.000 327024.0 \n", "100033 1 0 270000.000 790830.0 \n", "100034 1 0 90000.000 180000.0 \n", "100035 1 0 292500.000 665892.0 \n", "100036 1 0 112500.000 512064.0 \n", "100037 0 0 90000.000 199008.0 \n", "100039 0 1 360000.000 733315.5 \n", "100040 1 0 135000.000 1125000.0 \n", "100041 0 0 112500.000 450000.0 \n", "100043 1 2 198000.000 641173.5 \n", "100044 1 0 121500.000 454500.0 \n", "100045 1 0 99000.000 247275.0 \n", "100046 1 0 180000.000 540000.0 \n", "100047 1 0 202500.000 1193580.0 \n", "100048 1 0 202500.000 604152.0 \n", "100049 0 0 135000.000 288873.0 \n", "100050 1 0 108000.000 746280.0 \n", "100051 1 0 202500.000 661702.5 \n", "100052 1 1 90000.000 180000.0 \n", "100053 1 0 202500.000 305221.5 \n", "100054 1 0 99000.000 260640.0 \n", "100055 0 0 130500.000 1350000.0 \n", "100056 1 0 360000.000 1506816.0 \n", "\n", " AMT_ANNUITY AMT_GOODS_PRICE ... \\\n", "SK_ID_CURR ... \n", "100002 24700.5 351000.0 ... \n", "100003 35698.5 1129500.0 ... \n", "100004 6750.0 135000.0 ... \n", "100006 29686.5 297000.0 ... \n", "100007 21865.5 513000.0 ... \n", "100008 27517.5 454500.0 ... \n", "100009 41301.0 1395000.0 ... \n", "100010 42075.0 1530000.0 ... \n", "100011 33826.5 913500.0 ... \n", "100012 20250.0 405000.0 ... \n", "100014 21177.0 652500.0 ... \n", "100015 10678.5 135000.0 ... \n", "100016 5881.5 67500.0 ... \n", "100017 28966.5 697500.0 ... \n", "100018 32778.0 679500.0 ... \n", "100019 20160.0 247500.0 ... \n", "100020 26149.5 387000.0 ... \n", "100021 13500.0 270000.0 ... \n", "100022 7875.0 157500.0 ... \n", "100023 17563.5 454500.0 ... \n", "100024 21375.0 427500.0 ... \n", "100025 37561.5 927000.0 ... \n", "100026 32521.5 450000.0 ... \n", "100027 23850.0 225000.0 ... \n", "100029 12703.5 247500.0 ... \n", "100030 11074.5 225000.0 ... \n", "100031 27076.5 702000.0 ... \n", "100032 23827.5 270000.0 ... \n", "100033 57676.5 675000.0 ... \n", "100034 9000.0 180000.0 ... \n", "100035 24592.5 477000.0 ... \n", "100036 25033.5 360000.0 ... \n", "100037 20893.5 180000.0 ... \n", "100039 39069.0 679500.0 ... \n", "100040 32895.0 1125000.0 ... \n", "100041 44509.5 450000.0 ... \n", "100043 23157.0 553500.0 ... \n", "100044 15151.5 454500.0 ... \n", "100045 17338.5 225000.0 ... \n", "100046 27000.0 540000.0 ... \n", "100047 35028.0 855000.0 ... \n", "100048 29196.0 540000.0 ... \n", "100049 16258.5 238500.0 ... \n", "100050 42970.5 675000.0 ... \n", "100051 48280.5 598500.0 ... \n", "100052 9000.0 180000.0 ... \n", "100053 17649.0 252000.0 ... \n", "100054 26838.0 225000.0 ... \n", "100055 37255.5 1350000.0 ... \n", "100056 49927.5 1350000.0 ... \n", "\n", " ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 \\\n", "SK_ID_CURR \n", "100002 False False \n", "100003 False False \n", "100004 False False \n", "100006 False False \n", "100007 False False \n", "100008 False False \n", "100009 False False \n", "100010 False False \n", "100011 False False \n", "100012 False False \n", "100014 False False \n", "100015 False False \n", "100016 False False \n", "100017 False False \n", "100018 False False \n", "100019 False False \n", "100020 False False \n", "100021 False False \n", "100022 False False \n", "100023 False False \n", "100024 False False \n", "100025 False False \n", "100026 False False \n", "100027 False False \n", "100029 False False \n", "100030 False False \n", "100031 False False \n", "100032 False False \n", "100033 False False \n", "100034 False False \n", "100035 False False \n", "100036 False False \n", "100037 False False \n", "100039 False False \n", "100040 False False \n", "100041 False False \n", "100043 False False \n", "100044 False False \n", "100045 False False \n", "100046 False False \n", "100047 False False \n", "100048 False False \n", "100049 False False \n", "100050 False False \n", "100051 False False \n", "100052 False False \n", "100053 False False \n", "100054 False False \n", "100055 False False \n", "100056 False False \n", "\n", " ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 \\\n", "SK_ID_CURR \n", "100002 False False \n", "100003 False False \n", "100004 False False \n", "100006 False False \n", "100007 False False \n", "100008 False False \n", "100009 False False \n", "100010 False False \n", "100011 False False \n", "100012 False False \n", "100014 False False \n", "100015 False False \n", "100016 False False \n", "100017 False False \n", "100018 False False \n", "100019 False False \n", "100020 False False \n", "100021 False False \n", "100022 False False \n", "100023 False False \n", "100024 False False \n", "100025 False True \n", "100026 False False \n", "100027 False False \n", "100029 False False \n", "100030 False False \n", "100031 False False \n", "100032 False False \n", "100033 False False \n", "100034 False False \n", "100035 False False \n", "100036 False False \n", "100037 False False \n", "100039 False False \n", "100040 False False \n", "100041 False False \n", "100043 False False \n", "100044 False False \n", "100045 False False \n", "100046 False False \n", "100047 False False \n", "100048 False False \n", "100049 False False \n", "100050 False False \n", "100051 False False \n", "100052 False False \n", "100053 False False \n", "100054 False False \n", "100055 False False \n", "100056 False False \n", "\n", " ORGANIZATION_TYPE_Transport: type 1 \\\n", "SK_ID_CURR \n", "100002 False \n", "100003 False \n", "100004 False \n", "100006 False \n", "100007 False \n", "100008 False \n", "100009 False \n", "100010 False \n", "100011 False \n", "100012 False \n", "100014 False \n", "100015 False \n", "100016 False \n", "100017 False \n", "100018 False \n", "100019 False \n", "100020 False \n", "100021 False \n", "100022 False \n", "100023 False \n", "100024 False \n", "100025 False \n", "100026 False \n", "100027 False \n", "100029 False \n", "100030 False \n", "100031 False \n", "100032 False \n", "100033 False \n", "100034 False \n", "100035 False \n", "100036 False \n", "100037 False \n", "100039 False \n", "100040 False \n", "100041 False \n", "100043 False \n", "100044 False \n", "100045 False \n", "100046 False \n", "100047 False \n", "100048 False \n", "100049 False \n", "100050 False \n", "100051 False \n", "100052 False \n", "100053 False \n", "100054 False \n", "100055 False \n", "100056 False \n", "\n", " ORGANIZATION_TYPE_Transport: type 2 \\\n", "SK_ID_CURR \n", "100002 False \n", "100003 False \n", "100004 False \n", "100006 False \n", "100007 False \n", "100008 False \n", "100009 False \n", "100010 False \n", "100011 False \n", "100012 False \n", "100014 False \n", "100015 False \n", "100016 False \n", "100017 False \n", "100018 True \n", "100019 False \n", "100020 False \n", "100021 False \n", "100022 False \n", "100023 False \n", "100024 False \n", "100025 False \n", "100026 False \n", "100027 False \n", "100029 False \n", "100030 False \n", "100031 False \n", "100032 False \n", "100033 False \n", "100034 False \n", "100035 False \n", "100036 False \n", "100037 False \n", "100039 False \n", "100040 False \n", "100041 False \n", "100043 False \n", "100044 False \n", "100045 False \n", "100046 False \n", "100047 False \n", "100048 False \n", "100049 False \n", "100050 False \n", "100051 False \n", "100052 False \n", "100053 False \n", "100054 False \n", "100055 False \n", "100056 True \n", "\n", " ORGANIZATION_TYPE_Transport: type 3 \\\n", "SK_ID_CURR \n", "100002 False \n", "100003 False \n", "100004 False \n", "100006 False \n", "100007 False \n", "100008 False \n", "100009 False \n", "100010 False \n", "100011 False \n", "100012 False \n", "100014 False \n", "100015 False \n", "100016 False \n", "100017 False \n", "100018 False \n", "100019 False \n", "100020 False \n", "100021 False \n", "100022 False \n", "100023 False \n", "100024 False \n", "100025 False \n", "100026 False \n", "100027 False \n", "100029 False \n", "100030 False \n", "100031 False \n", "100032 False \n", "100033 False \n", "100034 False \n", "100035 False \n", "100036 False \n", "100037 False \n", "100039 False \n", "100040 False \n", "100041 False \n", "100043 False \n", "100044 False \n", "100045 False \n", "100046 False \n", "100047 False \n", "100048 False \n", "100049 False \n", "100050 False \n", "100051 False \n", "100052 False \n", "100053 False \n", "100054 False \n", "100055 False \n", "100056 False \n", "\n", " ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University \\\n", "SK_ID_CURR \n", "100002 False False \n", "100003 False False \n", "100004 False False \n", "100006 False False \n", "100007 False False \n", "100008 False False \n", "100009 False False \n", "100010 False False \n", "100011 False False \n", "100012 False False \n", "100014 False False \n", "100015 False False \n", "100016 False False \n", "100017 False False \n", "100018 False False \n", "100019 False False \n", "100020 False False \n", "100021 False False \n", "100022 False False \n", "100023 False False \n", "100024 False False \n", "100025 False False \n", "100026 False False \n", "100027 False False \n", "100029 False False \n", "100030 False False \n", "100031 False False \n", "100032 False False \n", "100033 False False \n", "100034 False False \n", "100035 False False \n", "100036 False False \n", "100037 False False \n", "100039 False False \n", "100040 False False \n", "100041 False False \n", "100043 False False \n", "100044 True False \n", "100045 False False \n", "100046 False False \n", "100047 False False \n", "100048 False False \n", "100049 False False \n", "100050 False False \n", "100051 False False \n", "100052 False False \n", "100053 False False \n", "100054 False False \n", "100055 False False \n", "100056 False False \n", "\n", " ORGANIZATION_TYPE_XNA \n", "SK_ID_CURR \n", "100002 False \n", "100003 False \n", "100004 False \n", "100006 False \n", "100007 False \n", "100008 False \n", "100009 False \n", "100010 False \n", "100011 True \n", "100012 False \n", "100014 False \n", "100015 True \n", "100016 False \n", "100017 False \n", "100018 False \n", "100019 False \n", "100020 False \n", "100021 False \n", "100022 False \n", "100023 False \n", "100024 False \n", "100025 False \n", "100026 False \n", "100027 True \n", "100029 False \n", "100030 False \n", "100031 False \n", "100032 False \n", "100033 False \n", "100034 False \n", "100035 False \n", "100036 False \n", "100037 False \n", "100039 False \n", "100040 False \n", "100041 False \n", "100043 False \n", "100044 False \n", "100045 True \n", "100046 False \n", "100047 False \n", "100048 False \n", "100049 False \n", "100050 True \n", "100051 False \n", "100052 False \n", "100053 True \n", "100054 False \n", "100055 False \n", "100056 False \n", "\n", "[50 rows x 227 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICE...ORGANIZATION_TYPE_Trade: type 4ORGANIZATION_TYPE_Trade: type 5ORGANIZATION_TYPE_Trade: type 6ORGANIZATION_TYPE_Trade: type 7ORGANIZATION_TYPE_Transport: type 1ORGANIZATION_TYPE_Transport: type 2ORGANIZATION_TYPE_Transport: type 3ORGANIZATION_TYPE_Transport: type 4ORGANIZATION_TYPE_UniversityORGANIZATION_TYPE_XNA
SK_ID_CURR
100002101010202500.000406597.524700.5351000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100003000000270000.0001293502.535698.51129500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10000401111067500.000135000.06750.0135000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100006000010135000.000312682.529686.5297000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100007001010121500.000513000.021865.5513000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10000800101099000.000490495.527517.5454500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100009000111171000.0001560726.041301.01395000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100010001110360000.0001530000.042075.01530000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100011000010112500.0001019610.033826.5913500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
100012011010135000.000405000.020250.0405000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100014000011112500.000652500.021177.0652500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10001500001038419.155148365.010678.5135000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
10001600001067500.00080865.05881.567500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100017001101225000.000918468.028966.5697500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100018000010189000.000773680.532778.0679500.0...FalseFalseFalseFalseFalseTrueFalseFalseFalseFalse
100019001110157500.000299772.020160.0247500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100020001000108000.000509602.526149.5387000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10002101001181000.000270000.013500.0270000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100022010010112500.000157500.07875.0157500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10002300001190000.000544491.017563.5454500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100024011110135000.000427500.021375.0427500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100025000111202500.0001132573.537561.5927000.0...FalseFalseFalseTrueFalseFalseFalseFalseFalseFalse
100026000001450000.000497520.032521.5450000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10002700001083250.000239850.023850.0225000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
100029001102135000.000247500.012703.5247500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10003000001090000.000225000.011074.5225000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100031100010112500.000979992.027076.5702000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100032001011112500.000327024.023827.5270000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100033001110270000.000790830.057676.5675000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10003401101090000.000180000.09000.0180000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100035000010292500.000665892.024592.5477000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100036000010112500.000512064.025033.5360000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10003700000090000.000199008.020893.5180000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100039001101360000.000733315.539069.0679500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100040000010135000.0001125000.032895.01125000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100041000000112500.000450000.044509.5450000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100043000012198000.000641173.523157.0553500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100044001010121500.000454500.015151.5454500.0...FalseFalseFalseFalseFalseFalseFalseTrueFalseFalse
10004500001099000.000247275.017338.5225000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
100046011110180000.000540000.027000.0540000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100047101010202500.0001193580.035028.0855000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100048000010202500.000604152.029196.0540000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100049100000135000.000288873.016258.5238500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100050000010108000.000746280.042970.5675000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
100051001010202500.000661702.548280.5598500.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
10005201001190000.000180000.09000.0180000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100053000010202500.000305221.517649.0252000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue
10005400001099000.000260640.026838.0225000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100055000000130500.0001350000.037255.51350000.0...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
100056001110360000.0001506816.049927.51350000.0...FalseFalseFalseFalseFalseTrueFalseFalseFalseFalse
\n", "

50 rows × 227 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_train" } }, "metadata": {}, "execution_count": 34 } ] }, { "cell_type": "code", "source": [ "df_train.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 350 }, "id": "ugGadWrwVrFK", "outputId": "d636fba4-5c20-4ae9-dbb3-eac892fa0572" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "count 307507.00000 307507.000000 307507.000000 307507.000000 \n", "mean 0.08073 0.095201 0.341648 0.340106 \n", "std 0.27242 0.293493 0.474263 0.473745 \n", "min 0.00000 0.000000 0.000000 0.000000 \n", "25% 0.00000 0.000000 0.000000 0.000000 \n", "50% 0.00000 0.000000 0.000000 0.000000 \n", "75% 0.00000 0.000000 1.000000 1.000000 \n", "max 1.00000 1.000000 1.000000 1.000000 \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT \\\n", "count 307507.000000 307507.000000 3.075070e+05 3.075070e+05 \n", "mean 0.693669 0.417047 1.687977e+05 5.990286e+05 \n", "std 0.460970 0.722119 2.371246e+05 4.024926e+05 \n", "min 0.000000 0.000000 2.565000e+04 4.500000e+04 \n", "25% 0.000000 0.000000 1.125000e+05 2.700000e+05 \n", "50% 1.000000 0.000000 1.471500e+05 5.135310e+05 \n", "75% 1.000000 1.000000 2.025000e+05 8.086500e+05 \n", "max 1.000000 19.000000 1.170000e+08 4.050000e+06 \n", "\n", " AMT_ANNUITY AMT_GOODS_PRICE ... DEF_30_CNT_SOCIAL_CIRCLE_ISNAN \\\n", "count 307507.000000 3.075070e+05 ... 307507.000000 \n", "mean 27108.580714 5.383178e+05 ... 0.003320 \n", "std 14493.522125 3.692898e+05 ... 0.057526 \n", "min 1615.500000 4.050000e+04 ... 0.000000 \n", "25% 16524.000000 2.385000e+05 ... 0.000000 \n", "50% 24903.000000 4.500000e+05 ... 0.000000 \n", "75% 34596.000000 6.795000e+05 ... 0.000000 \n", "max 258025.500000 4.050000e+06 ... 1.000000 \n", "\n", " OBS_60_CNT_SOCIAL_CIRCLE_ISNAN DEF_60_CNT_SOCIAL_CIRCLE_ISNAN \\\n", "count 307507.000000 307507.000000 \n", "mean 0.003320 0.003320 \n", "std 0.057526 0.057526 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 1.000000 1.000000 \n", "\n", " DAYS_LAST_PHONE_CHANGE_ISNAN AMT_REQ_CREDIT_BUREAU_HOUR_ISNAN \\\n", "count 307507.000000 307507.000000 \n", "mean 0.000003 0.135018 \n", "std 0.001803 0.341743 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 1.000000 1.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_DAY_ISNAN AMT_REQ_CREDIT_BUREAU_WEEK_ISNAN \\\n", "count 307507.000000 307507.000000 \n", "mean 0.135018 0.135018 \n", "std 0.341743 0.341743 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 1.000000 1.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_MON_ISNAN AMT_REQ_CREDIT_BUREAU_QRT_ISNAN \\\n", "count 307507.000000 307507.000000 \n", "mean 0.135018 0.135018 \n", "std 0.341743 0.341743 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 1.000000 1.000000 \n", "\n", " AMT_REQ_CREDIT_BUREAU_YEAR_ISNAN \n", "count 307507.000000 \n", "mean 0.135018 \n", "std 0.341743 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.000000 \n", "75% 0.000000 \n", "max 1.000000 \n", "\n", "[8 rows x 111 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICE...DEF_30_CNT_SOCIAL_CIRCLE_ISNANOBS_60_CNT_SOCIAL_CIRCLE_ISNANDEF_60_CNT_SOCIAL_CIRCLE_ISNANDAYS_LAST_PHONE_CHANGE_ISNANAMT_REQ_CREDIT_BUREAU_HOUR_ISNANAMT_REQ_CREDIT_BUREAU_DAY_ISNANAMT_REQ_CREDIT_BUREAU_WEEK_ISNANAMT_REQ_CREDIT_BUREAU_MON_ISNANAMT_REQ_CREDIT_BUREAU_QRT_ISNANAMT_REQ_CREDIT_BUREAU_YEAR_ISNAN
count307507.00000307507.000000307507.000000307507.000000307507.000000307507.0000003.075070e+053.075070e+05307507.0000003.075070e+05...307507.000000307507.000000307507.000000307507.000000307507.000000307507.000000307507.000000307507.000000307507.000000307507.000000
mean0.080730.0952010.3416480.3401060.6936690.4170471.687977e+055.990286e+0527108.5807145.383178e+05...0.0033200.0033200.0033200.0000030.1350180.1350180.1350180.1350180.1350180.135018
std0.272420.2934930.4742630.4737450.4609700.7221192.371246e+054.024926e+0514493.5221253.692898e+05...0.0575260.0575260.0575260.0018030.3417430.3417430.3417430.3417430.3417430.341743
min0.000000.0000000.0000000.0000000.0000000.0000002.565000e+044.500000e+041615.5000004.050000e+04...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%0.000000.0000000.0000000.0000000.0000000.0000001.125000e+052.700000e+0516524.0000002.385000e+05...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%0.000000.0000000.0000000.0000001.0000000.0000001.471500e+055.135310e+0524903.0000004.500000e+05...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
75%0.000000.0000001.0000001.0000001.0000001.0000002.025000e+058.086500e+0534596.0000006.795000e+05...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
max1.000001.0000001.0000001.0000001.00000019.0000001.170000e+084.050000e+06258025.5000004.050000e+06...1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", "

8 rows × 111 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe" } }, "metadata": {}, "execution_count": 35 } ] }, { "cell_type": "code", "source": [ "df_train.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Wz-IdRXYYD-b", "outputId": "5f7286a5-ed78-4c7d-a549-cf62a9faa747" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(307507, 227)" ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "markdown", "source": [ "# Feature Engineering" ], "metadata": { "id": "LDbIC6xjwTAo" } }, { "cell_type": "code", "source": [ "# 1. Instalar librería oficial\n", "!pip install huggingface_hub joblib\n", "\n", "# 2. Loguearse (Pega tu token cuando te lo pida)\n", "from huggingface_hub import notebook_login\n", "notebook_login()\n", "\n", "# 3. Guardar tu modelo y preprocesadores (Simulacro)\n", "import joblib\n", "# Asumiendo que 'model' es tu Random Forest y 'le' es tu encoder\n", "# joblib.dump(model, 'model_riesgo_credito.pkl')\n", "# joblib.dump(df_train.columns.tolist(), 'features.pkl') # Guardar nombres de columnas\n", "\n", "# 4. Crear el repositorio y subir archivos\n", "from huggingface_hub import HfApi\n", "\n", "api = HfApi()\n", "\n", "repo_id = \"IzanMoya/Credit-Scoring-Risk-Model-TFM\"\n", "\n", "# Crear repositorio (si no existe)\n", "api.create_repo(repo_id=repo_id, exist_ok=True, repo_type=\"model\")\n", "\n", "# Subir el Notebook\n", "api.upload_file(\n", " path_or_fileobj=\"TFM - Scoring de Riesgo Crediticio con ML para la Banca .ipynb\",\n", " path_in_repo=\"TFM -Scoring de Riesgo Crediticio con ML para la Banca .ipynb\",\n", " repo_id=repo_id,\n", " repo_type=\"model\"\n", ")\n", "\n", "# Subir el Modelo (cuando lo tengas entrenado)\n", "# api.upload_file(\n", "# path_or_fileobj=\"model_riesgo_credito.pkl\",\n", "# path_in_repo=\"model.pkl\",\n", "# repo_id=repo_id,\n", "# repo_type=\"model\"\n", "# )\n", "\n", "print(f\"¡Subido con éxito a https://huggingface.co/{repo_id}!\")" ], "metadata": { "id": "tugLv6dA1p5X", "colab": { "base_uri": "https://localhost:8080/", "height": 599, "referenced_widgets": [ "d8af933e4af4461c9130f2f39b028193", "949502f1dad24c19b8ed255837e65480", "f9a32d79db034bbc9221e714bb3631be", "7b1ee4714b65418baa1b6b97358923e0", "e34237ef65fb4157b63d45be1f61ab98", "faf6520ec0f8433da1795094a8679490", "854110ca3fea4201b3e89e4b31346e33", "4d1c3e5d2d184895abc8e7dec6a133bc", "f366bd24f8cb407b84e031e5f0d90ab5", "254b3a685431419c8ef166ff1ff8c594", "781a4e29ef364e54a9092d786835b83a", "3ae72b8b08ab44148b759ef65e5dad0d", "1a081370130641d696b39165b8c87963", "2ceeb26183604902b8d9cb8e7a256d05", "e2ed39391a0f481fb104c74a1511c40c", "8b713f9ee3e0451d91a4d7653aca8be5", "e186852a14bc49ccada899ea8d25ed29", "1145ce7e08b7470d98ca8e827fa49b20", "abd184653ad54787a12d708c89bec8cd", "afd1ed836ca941188e97bb6bacedc1ab" ] }, "outputId": "d10e3b89-bb56-4d62-ca0a-6ed1e4e7fb70" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.12/dist-packages (0.36.0)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.12/dist-packages (1.5.3)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (3.20.3)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2025.3.0)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (25.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (6.0.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2.32.4)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.67.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.15.0)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (1.2.0)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.4.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.11)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2026.1.4)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='
\u001b[0;34m()\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;31m# Subir el Notebook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m api.upload_file(\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"TFM - Scoring de Riesgo Crediticio con ML para la Banca .ipynb\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mpath_in_repo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"TFM -Scoring de Riesgo Crediticio con ML para la Banca .ipynb\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py\u001b[0m in \u001b[0;36m_inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msmoothly_deprecate_use_auth_token\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhas_token\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhas_token\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_inner_fn\u001b[0m \u001b[0;31m# type: ignore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/hf_api.py\u001b[0m in \u001b[0;36m_inner\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1685\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1686\u001b[0m \u001b[0;31m# Otherwise, call the function normally\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1687\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1688\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1689\u001b[0m \u001b[0m_inner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_future_compatible\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m \u001b[0;31m# type: ignore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/hf_api.py\u001b[0m in \u001b[0;36mupload_file\u001b[0;34m(self, path_or_fileobj, path_in_repo, repo_id, token, repo_type, revision, commit_message, commit_description, create_pr, parent_commit, run_as_future)\u001b[0m\n\u001b[1;32m 4660\u001b[0m \u001b[0mcommit_message\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcommit_message\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34mf\"Upload {path_in_repo} with huggingface_hub\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4661\u001b[0m )\n\u001b[0;32m-> 4662\u001b[0;31m operation = CommitOperationAdd(\n\u001b[0m\u001b[1;32m 4663\u001b[0m \u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4664\u001b[0m \u001b[0mpath_in_repo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath_in_repo\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/_commit_api.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path_in_repo, path_or_fileobj)\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/_commit_api.py\u001b[0m in \u001b[0;36m__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mpath_or_fileobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormpath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpanduser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Provided path: '{path_or_fileobj}' is not a file on the local file system\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath_or_fileobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBufferedIOBase\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Provided path: 'TFM - Scoring de Riesgo Crediticio con ML para la Banca .ipynb' is not a file on the local file system" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "uyiAV2bqqlTF" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "co_FcXb7kBFE" }, "execution_count": null, "outputs": [] } ] }