diff --git "a/DL_GroupE_Assignment1 varunan (1).ipynb" "b/DL_GroupE_Assignment1 varunan (1).ipynb" deleted file mode 100644--- "a/DL_GroupE_Assignment1 varunan (1).ipynb" +++ /dev/null @@ -1,4751 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fc419e40-5914-4300-ae2b-a5f65567a949", - "metadata": {}, - "source": [ - "MECE Table" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "91e7ae86-11b2-4450-80d8-2d4edc1b0a3f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameIDROLE
0Samil Mithani500223143DNN and Its fine tunning
1Mahima Patel500223995Data Preprocessing and Model Building
2Varunan Gurushev500225040RandomForest Model Tunning
3Gazal500225552Data Preprocessing
4Priya500225204Data Visualization
5Shiv Kapadia500203921Model Fine Tuning
6Abhishek Singh500224576Data Visualization
7Rohit500224882Data PPreprocessing
\n", - "
" - ], - "text/plain": [ - " Name ID ROLE\n", - "0 Samil Mithani 500223143 DNN and Its fine tunning\n", - "1 Mahima Patel 500223995 Data Preprocessing and Model Building \n", - "2 Varunan Gurushev 500225040 RandomForest Model Tunning \n", - "3 Gazal 500225552 Data Preprocessing\n", - "4 Priya 500225204 Data Visualization \n", - "5 Shiv Kapadia 500203921 Model Fine Tuning\n", - "6 Abhishek Singh 500224576 Data Visualization\n", - "7 Rohit 500224882 Data PPreprocessing" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "MECE = {\n", - " 'Name': ['Samil Mithani', 'Mahima Patel','Varunan Gurushev', 'Gazal', 'Priya', 'Shiv Kapadia','Abhishek Singh','Rohit'],\n", - " 'ID': [500223143, 500223995, 500225040, 500225552,500225204 , 500203921, 500224576,500224882],\n", - " 'ROLE': ['DNN and Its fine tunning','Data Preprocessing and Model Building ', 'RandomForest Model Tunning ', 'Data Preprocessing','Data Visualization ','Model Fine Tuning','Data Visualization','Data PPreprocessing']\n", - "}\n", - "\n", - "MECE_Table = pd.DataFrame(MECE)\n", - "MECE_Table" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "165352c9", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import sklearn as sl\n", - "from sklearn.preprocessing import LabelEncoder" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a00152e2", - "metadata": {}, - "outputs": [], - "source": [ - "df=pd.read_csv('bank_data_train.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "59754131", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDCR_PROD_CNT_ILAMOUNT_RUB_CLO_PRCPRC_ACCEPTS_A_EMAIL_LINKAPP_REGISTR_RGN_CODEPRC_ACCEPTS_A_POSPRC_ACCEPTS_A_TKTURNOVER_DYNAMIC_IL_1MCNT_TRAN_AUT_TENDENCY1MSUM_TRAN_AUT_TENDENCY1M...REST_DYNAMIC_CC_3MMED_DEBT_PRC_YWZLDEAL_ACT_DAYS_PCT_TR3LDEAL_ACT_DAYS_PCT_AAVGLDEAL_DELINQ_PER_MAXYWZTURNOVER_DYNAMIC_CC_3MLDEAL_ACT_DAYS_PCT_TRLDEAL_ACT_DAYS_PCT_TR4LDEAL_ACT_DAYS_PCT_CURRTARGET
014684100.000000NaNNaNNaNNaN0.0NaNNaN...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
114684200.041033NaNNaNNaNNaN0.00.1666670.186107...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
214684300.0069150.0NaN0.00.00.0NaNNaN...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
314684400.000000NaNNaNNaNNaN0.0NaNNaN...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
414684500.000000NaNNaNNaNNaN0.0NaNNaN...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
\n", - "

5 rows × 116 columns

\n", - "
" - ], - "text/plain": [ - " ID CR_PROD_CNT_IL AMOUNT_RUB_CLO_PRC PRC_ACCEPTS_A_EMAIL_LINK \\\n", - "0 146841 0 0.000000 NaN \n", - "1 146842 0 0.041033 NaN \n", - "2 146843 0 0.006915 0.0 \n", - "3 146844 0 0.000000 NaN \n", - "4 146845 0 0.000000 NaN \n", - "\n", - " APP_REGISTR_RGN_CODE PRC_ACCEPTS_A_POS PRC_ACCEPTS_A_TK \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN 0.0 0.0 \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " TURNOVER_DYNAMIC_IL_1M CNT_TRAN_AUT_TENDENCY1M SUM_TRAN_AUT_TENDENCY1M \\\n", - "0 0.0 NaN NaN \n", - "1 0.0 0.166667 0.186107 \n", - "2 0.0 NaN NaN \n", - "3 0.0 NaN NaN \n", - "4 0.0 NaN NaN \n", - "\n", - " ... REST_DYNAMIC_CC_3M MED_DEBT_PRC_YWZ LDEAL_ACT_DAYS_PCT_TR3 \\\n", - "0 ... 0.0 NaN NaN \n", - "1 ... 0.0 NaN NaN \n", - "2 ... 0.0 NaN NaN \n", - "3 ... 0.0 NaN NaN \n", - "4 ... 0.0 NaN NaN \n", - "\n", - " LDEAL_ACT_DAYS_PCT_AAVG LDEAL_DELINQ_PER_MAXYWZ TURNOVER_DYNAMIC_CC_3M \\\n", - "0 NaN NaN 0.0 \n", - "1 NaN NaN 0.0 \n", - "2 NaN NaN 0.0 \n", - "3 NaN NaN 0.0 \n", - "4 NaN NaN 0.0 \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR LDEAL_ACT_DAYS_PCT_TR4 LDEAL_ACT_DAYS_PCT_CURR \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " TARGET \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "\n", - "[5 rows x 116 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "0b0ecde7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 116)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "f90be4d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 104)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns_to_drop = ['PRC_ACCEPTS_A_EMAIL_LINK','PRC_ACCEPTS_A_POS','PRC_ACCEPTS_A_TK','PRC_ACCEPTS_A_AMOBILE','PRC_ACCEPTS_TK','PRC_ACCEPTS_A_MTP','CNT_ACCEPTS_TK','CLNT_JOB_POSITION','APP_DRIVING_LICENSE','PRC_ACCEPTS_A_ATM','PRC_ACCEPTS_MTP','CNT_ACCEPTS_MTP']\n", - "df = df.drop(columns=columns_to_drop)\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "ab111add", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDCR_PROD_CNT_ILAMOUNT_RUB_CLO_PRCAPP_REGISTR_RGN_CODETURNOVER_DYNAMIC_IL_1MCNT_TRAN_AUT_TENDENCY1MSUM_TRAN_AUT_TENDENCY1MAMOUNT_RUB_SUP_PRCSUM_TRAN_AUT_TENDENCY3MREST_DYNAMIC_FDEP_1M...REST_DYNAMIC_CC_3MMED_DEBT_PRC_YWZLDEAL_ACT_DAYS_PCT_TR3LDEAL_ACT_DAYS_PCT_AAVGLDEAL_DELINQ_PER_MAXYWZTURNOVER_DYNAMIC_CC_3MLDEAL_ACT_DAYS_PCT_TRLDEAL_ACT_DAYS_PCT_TR4LDEAL_ACT_DAYS_PCT_CURRTARGET
count355190.000000355190.000000316867.00000060550.000000355190.00000077112.00000077112.000000316867.000000111052.000000355190.000000...355190.00000095713.00000093448.00000098175.00000095713.000000355190.00000093448.00000093448.00000093448.000000355190.000000
mean368794.6748750.1052250.04404550.9474980.0013050.4168960.4145720.0852490.6890800.000723...0.0073090.0550740.0257070.0499430.0092520.0043090.0139380.0139380.0139380.081435
std128148.8045660.4313720.10844921.7778550.0291180.3164930.3386120.1423100.3017250.014081...0.0666810.2159090.1157320.1858300.0927890.0598520.0970990.0970990.0970990.273503
min146841.0000000.0000000.0000000.0000000.0000000.0069440.0000000.0000000.0000020.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%257846.2500000.0000000.00000033.0000000.0000000.1666670.1396450.0000000.4462690.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%368778.5000000.0000000.00000054.0000000.0000000.3000000.2857140.0271170.7229850.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
75%479737.7500000.0000000.03660872.0000000.0000000.5714290.6611950.1100051.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
max590828.00000011.0000001.00000089.0000001.0000001.0000001.0000001.0000001.0000001.000000...1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", - "

8 rows × 93 columns

\n", - "
" - ], - "text/plain": [ - " ID CR_PROD_CNT_IL AMOUNT_RUB_CLO_PRC \\\n", - "count 355190.000000 355190.000000 316867.000000 \n", - "mean 368794.674875 0.105225 0.044045 \n", - "std 128148.804566 0.431372 0.108449 \n", - "min 146841.000000 0.000000 0.000000 \n", - "25% 257846.250000 0.000000 0.000000 \n", - "50% 368778.500000 0.000000 0.000000 \n", - "75% 479737.750000 0.000000 0.036608 \n", - "max 590828.000000 11.000000 1.000000 \n", - "\n", - " APP_REGISTR_RGN_CODE TURNOVER_DYNAMIC_IL_1M CNT_TRAN_AUT_TENDENCY1M \\\n", - "count 60550.000000 355190.000000 77112.000000 \n", - "mean 50.947498 0.001305 0.416896 \n", - "std 21.777855 0.029118 0.316493 \n", - "min 0.000000 0.000000 0.006944 \n", - "25% 33.000000 0.000000 0.166667 \n", - "50% 54.000000 0.000000 0.300000 \n", - "75% 72.000000 0.000000 0.571429 \n", - "max 89.000000 1.000000 1.000000 \n", - "\n", - " SUM_TRAN_AUT_TENDENCY1M AMOUNT_RUB_SUP_PRC SUM_TRAN_AUT_TENDENCY3M \\\n", - "count 77112.000000 316867.000000 111052.000000 \n", - "mean 0.414572 0.085249 0.689080 \n", - "std 0.338612 0.142310 0.301725 \n", - "min 0.000000 0.000000 0.000002 \n", - "25% 0.139645 0.000000 0.446269 \n", - "50% 0.285714 0.027117 0.722985 \n", - "75% 0.661195 0.110005 1.000000 \n", - "max 1.000000 1.000000 1.000000 \n", - "\n", - " REST_DYNAMIC_FDEP_1M ... REST_DYNAMIC_CC_3M MED_DEBT_PRC_YWZ \\\n", - "count 355190.000000 ... 355190.000000 95713.000000 \n", - "mean 0.000723 ... 0.007309 0.055074 \n", - "std 0.014081 ... 0.066681 0.215909 \n", - "min 0.000000 ... 0.000000 0.000000 \n", - "25% 0.000000 ... 0.000000 0.000000 \n", - "50% 0.000000 ... 0.000000 0.000000 \n", - "75% 0.000000 ... 0.000000 0.000000 \n", - "max 1.000000 ... 1.000000 1.000000 \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR3 LDEAL_ACT_DAYS_PCT_AAVG \\\n", - "count 93448.000000 98175.000000 \n", - "mean 0.025707 0.049943 \n", - "std 0.115732 0.185830 \n", - "min 0.000000 0.000000 \n", - "25% 0.000000 0.000000 \n", - "50% 0.000000 0.000000 \n", - "75% 0.000000 0.000000 \n", - "max 1.000000 1.000000 \n", - "\n", - " LDEAL_DELINQ_PER_MAXYWZ TURNOVER_DYNAMIC_CC_3M LDEAL_ACT_DAYS_PCT_TR \\\n", - "count 95713.000000 355190.000000 93448.000000 \n", - "mean 0.009252 0.004309 0.013938 \n", - "std 0.092789 0.059852 0.097099 \n", - "min 0.000000 0.000000 0.000000 \n", - "25% 0.000000 0.000000 0.000000 \n", - "50% 0.000000 0.000000 0.000000 \n", - "75% 0.000000 0.000000 0.000000 \n", - "max 1.000000 1.000000 1.000000 \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR4 LDEAL_ACT_DAYS_PCT_CURR TARGET \n", - "count 93448.000000 93448.000000 355190.000000 \n", - "mean 0.013938 0.013938 0.081435 \n", - "std 0.097099 0.097099 0.273503 \n", - "min 0.000000 0.000000 0.000000 \n", - "25% 0.000000 0.000000 0.000000 \n", - "50% 0.000000 0.000000 0.000000 \n", - "75% 0.000000 0.000000 0.000000 \n", - "max 1.000000 1.000000 1.000000 \n", - "\n", - "[8 rows x 93 columns]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "01d7003b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ID 0\n", - "CR_PROD_CNT_IL 0\n", - "AMOUNT_RUB_CLO_PRC 38323\n", - "APP_REGISTR_RGN_CODE 294640\n", - "TURNOVER_DYNAMIC_IL_1M 0\n", - " ... \n", - "TURNOVER_DYNAMIC_CC_3M 0\n", - "LDEAL_ACT_DAYS_PCT_TR 261742\n", - "LDEAL_ACT_DAYS_PCT_TR4 261742\n", - "LDEAL_ACT_DAYS_PCT_CURR 261742\n", - "TARGET 0\n", - "Length: 104, dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.isnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "0e681922", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['CLNT_TRUST_RELATION',\n", - " 'APP_MARITAL_STATUS',\n", - " 'APP_KIND_OF_PROP_HABITATION',\n", - " 'CLNT_JOB_POSITION_TYPE',\n", - " 'APP_EDUCATION',\n", - " 'APP_TRAVEL_PASS',\n", - " 'APP_CAR',\n", - " 'APP_POSITION_TYPE',\n", - " 'APP_EMP_TYPE',\n", - " 'APP_COMP_TYPE',\n", - " 'PACK']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "col_types = df.dtypes\n", - "cat_columns = col_types[col_types == 'object'].index.tolist()\n", - "num_columns = col_types[col_types != 'object'].index.tolist()\n", - "cat_columns" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "6775532f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 97)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns_to_drop2 = ['APP_MARITAL_STATUS','APP_KIND_OF_PROP_HABITATION','CLNT_TRUST_RELATION', 'APP_EDUCATION', 'APP_CAR','APP_COMP_TYPE','CLNT_JOB_POSITION_TYPE']\n", - "df = df.drop(columns=columns_to_drop2)\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "68644d71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['ID',\n", - " 'CR_PROD_CNT_IL',\n", - " 'AMOUNT_RUB_CLO_PRC',\n", - " 'APP_REGISTR_RGN_CODE',\n", - " 'TURNOVER_DYNAMIC_IL_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY1M',\n", - " 'SUM_TRAN_AUT_TENDENCY1M',\n", - " 'AMOUNT_RUB_SUP_PRC',\n", - " 'SUM_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_SAVE_3M',\n", - " 'CR_PROD_CNT_VCU',\n", - " 'REST_AVG_CUR',\n", - " 'CNT_TRAN_MED_TENDENCY1M',\n", - " 'AMOUNT_RUB_NAS_PRC',\n", - " 'TRANS_COUNT_SUP_PRC',\n", - " 'CNT_TRAN_CLO_TENDENCY1M',\n", - " 'SUM_TRAN_MED_TENDENCY1M',\n", - " 'TRANS_COUNT_NAS_PRC',\n", - " 'CR_PROD_CNT_TOVR',\n", - " 'CR_PROD_CNT_PIL',\n", - " 'SUM_TRAN_CLO_TENDENCY1M',\n", - " 'TURNOVER_CC',\n", - " 'TRANS_COUNT_ATM_PRC',\n", - " 'AMOUNT_RUB_ATM_PRC',\n", - " 'TURNOVER_PAYM',\n", - " 'AGE',\n", - " 'CNT_TRAN_MED_TENDENCY3M',\n", - " 'CR_PROD_CNT_CC',\n", - " 'SUM_TRAN_MED_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_3M',\n", - " 'REST_DYNAMIC_IL_1M',\n", - " 'SUM_TRAN_CLO_TENDENCY3M',\n", - " 'LDEAL_TENOR_MAX',\n", - " 'LDEAL_YQZ_CHRG',\n", - " 'CR_PROD_CNT_CCFP',\n", - " 'DEAL_YQZ_IR_MAX',\n", - " 'LDEAL_YQZ_COM',\n", - " 'DEAL_YQZ_IR_MIN',\n", - " 'CNT_TRAN_CLO_TENDENCY3M',\n", - " 'REST_DYNAMIC_CUR_1M',\n", - " 'REST_AVG_PAYM',\n", - " 'LDEAL_TENOR_MIN',\n", - " 'LDEAL_AMT_MONTH',\n", - " 'LDEAL_GRACE_DAYS_PCT_MED',\n", - " 'REST_DYNAMIC_CUR_3M',\n", - " 'CNT_TRAN_SUP_TENDENCY3M',\n", - " 'TURNOVER_DYNAMIC_CUR_1M',\n", - " 'REST_DYNAMIC_PAYM_3M',\n", - " 'SUM_TRAN_SUP_TENDENCY3M',\n", - " 'REST_DYNAMIC_IL_3M',\n", - " 'CNT_TRAN_ATM_TENDENCY3M',\n", - " 'CNT_TRAN_ATM_TENDENCY1M',\n", - " 'TURNOVER_DYNAMIC_IL_3M',\n", - " 'SUM_TRAN_ATM_TENDENCY3M',\n", - " 'DEAL_GRACE_DAYS_ACC_S1X1',\n", - " 'AVG_PCT_MONTH_TO_PCLOSE',\n", - " 'DEAL_YWZ_IR_MIN',\n", - " 'SUM_TRAN_SUP_TENDENCY1M',\n", - " 'DEAL_YWZ_IR_MAX',\n", - " 'SUM_TRAN_ATM_TENDENCY1M',\n", - " 'REST_DYNAMIC_PAYM_1M',\n", - " 'CNT_TRAN_SUP_TENDENCY1M',\n", - " 'DEAL_GRACE_DAYS_ACC_AVG',\n", - " 'TURNOVER_DYNAMIC_CUR_3M',\n", - " 'MAX_PCLOSE_DATE',\n", - " 'LDEAL_YQZ_PC',\n", - " 'CLNT_SETUP_TENOR',\n", - " 'DEAL_GRACE_DAYS_ACC_MAX',\n", - " 'TURNOVER_DYNAMIC_PAYM_3M',\n", - " 'LDEAL_DELINQ_PER_MAXYQZ',\n", - " 'TURNOVER_DYNAMIC_PAYM_1M',\n", - " 'CLNT_SALARY_VALUE',\n", - " 'TRANS_AMOUNT_TENDENCY3M',\n", - " 'MED_DEBT_PRC_YQZ',\n", - " 'TRANS_CNT_TENDENCY3M',\n", - " 'LDEAL_USED_AMT_AVG_YQZ',\n", - " 'REST_DYNAMIC_CC_1M',\n", - " 'LDEAL_USED_AMT_AVG_YWZ',\n", - " 'TURNOVER_DYNAMIC_CC_1M',\n", - " 'AVG_PCT_DEBT_TO_DEAL_AMT',\n", - " 'LDEAL_ACT_DAYS_ACC_PCT_AVG',\n", - " 'REST_DYNAMIC_CC_3M',\n", - " 'MED_DEBT_PRC_YWZ',\n", - " 'LDEAL_ACT_DAYS_PCT_TR3',\n", - " 'LDEAL_ACT_DAYS_PCT_AAVG',\n", - " 'LDEAL_DELINQ_PER_MAXYWZ',\n", - " 'TURNOVER_DYNAMIC_CC_3M',\n", - " 'LDEAL_ACT_DAYS_PCT_TR',\n", - " 'LDEAL_ACT_DAYS_PCT_TR4',\n", - " 'LDEAL_ACT_DAYS_PCT_CURR',\n", - " 'TARGET']" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "num_columns" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "b2c56f1b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 97)" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "fe695eae", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 92)" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns_to_drop3 = ['AGE','APP_REGISTR_RGN_CODE','ID','APP_TRAVEL_PASS','APP_EMP_TYPE']\n", - "df = df.drop(columns=columns_to_drop3)\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "1593b269", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CR_PROD_CNT_ILAMOUNT_RUB_CLO_PRCTURNOVER_DYNAMIC_IL_1MCNT_TRAN_AUT_TENDENCY1MSUM_TRAN_AUT_TENDENCY1MAMOUNT_RUB_SUP_PRCSUM_TRAN_AUT_TENDENCY3MREST_DYNAMIC_FDEP_1MCNT_TRAN_AUT_TENDENCY3MREST_DYNAMIC_SAVE_3M...REST_DYNAMIC_CC_3MMED_DEBT_PRC_YWZLDEAL_ACT_DAYS_PCT_TR3LDEAL_ACT_DAYS_PCT_AAVGLDEAL_DELINQ_PER_MAXYWZTURNOVER_DYNAMIC_CC_3MLDEAL_ACT_DAYS_PCT_TRLDEAL_ACT_DAYS_PCT_TR4LDEAL_ACT_DAYS_PCT_CURRTARGET
000.0919550.0NaNNaN0.000000NaN0.0NaN0.541683...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
100.0410330.00.1666670.1861070.2446780.6709680.00.6666670.000000...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
200.0069150.0NaNNaN0.000000NaN0.0NaN0.000000...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
300.0919550.0NaNNaN0.000000NaN0.0NaN0.005874...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
400.0919550.0NaNNaN0.000000NaN0.0NaN0.000000...0.0NaNNaNNaNNaN0.0NaNNaNNaN0
\n", - "

5 rows × 92 columns

\n", - "
" - ], - "text/plain": [ - " CR_PROD_CNT_IL AMOUNT_RUB_CLO_PRC TURNOVER_DYNAMIC_IL_1M \\\n", - "0 0 0.091955 0.0 \n", - "1 0 0.041033 0.0 \n", - "2 0 0.006915 0.0 \n", - "3 0 0.091955 0.0 \n", - "4 0 0.091955 0.0 \n", - "\n", - " CNT_TRAN_AUT_TENDENCY1M SUM_TRAN_AUT_TENDENCY1M AMOUNT_RUB_SUP_PRC \\\n", - "0 NaN NaN 0.000000 \n", - "1 0.166667 0.186107 0.244678 \n", - "2 NaN NaN 0.000000 \n", - "3 NaN NaN 0.000000 \n", - "4 NaN NaN 0.000000 \n", - "\n", - " SUM_TRAN_AUT_TENDENCY3M REST_DYNAMIC_FDEP_1M CNT_TRAN_AUT_TENDENCY3M \\\n", - "0 NaN 0.0 NaN \n", - "1 0.670968 0.0 0.666667 \n", - "2 NaN 0.0 NaN \n", - "3 NaN 0.0 NaN \n", - "4 NaN 0.0 NaN \n", - "\n", - " REST_DYNAMIC_SAVE_3M ... REST_DYNAMIC_CC_3M MED_DEBT_PRC_YWZ \\\n", - "0 0.541683 ... 0.0 NaN \n", - "1 0.000000 ... 0.0 NaN \n", - "2 0.000000 ... 0.0 NaN \n", - "3 0.005874 ... 0.0 NaN \n", - "4 0.000000 ... 0.0 NaN \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR3 LDEAL_ACT_DAYS_PCT_AAVG LDEAL_DELINQ_PER_MAXYWZ \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " TURNOVER_DYNAMIC_CC_3M LDEAL_ACT_DAYS_PCT_TR LDEAL_ACT_DAYS_PCT_TR4 \\\n", - "0 0.0 NaN NaN \n", - "1 0.0 NaN NaN \n", - "2 0.0 NaN NaN \n", - "3 0.0 NaN NaN \n", - "4 0.0 NaN NaN \n", - "\n", - " LDEAL_ACT_DAYS_PCT_CURR TARGET \n", - "0 NaN 0 \n", - "1 NaN 0 \n", - "2 NaN 0 \n", - "3 NaN 0 \n", - "4 NaN 0 \n", - "\n", - "[5 rows x 92 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "# Replaceing zeros with NaN for treating them as missing values\n", - "df['AMOUNT_RUB_CLO_PRC'].replace(0, np.nan, inplace=True)\n", - "\n", - "# Fill missing values with the mean of the column (excluding NaNs)\n", - "df['AMOUNT_RUB_CLO_PRC'].fillna(df['AMOUNT_RUB_CLO_PRC'].mean(), inplace=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "231d9efb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Non-numeric columns: Index(['APP_POSITION_TYPE'], dtype='object')\n" - ] - } - ], - "source": [ - "# checking the correlation of no of products used by customer for each category of products\n", - "columns_to_check = ['AMOUNT_RUB_CLO_PRC','CNT_TRAN_AUT_TENDENCY1M','SUM_TRAN_AUT_TENDENCY1M','AMOUNT_RUB_SUP_PRC','SUM_TRAN_AUT_TENDENCY3M','CNT_TRAN_AUT_TENDENCY3M','CNT_TRAN_MED_TENDENCY1M','AMOUNT_RUB_NAS_PRC','TRANS_COUNT_SUP_PRC','CNT_TRAN_CLO_TENDENCY1M','SUM_TRAN_MED_TENDENCY1M','TRANS_COUNT_NAS_PRC','SUM_TRAN_CLO_TENDENCY1M','APP_POSITION_TYPE','TRANS_COUNT_ATM_PRC','AMOUNT_RUB_ATM_PRC','CNT_TRAN_MED_TENDENCY3M','SUM_TRAN_MED_TENDENCY3M','SUM_TRAN_CLO_TENDENCY3M','LDEAL_TENOR_MAX','LDEAL_YQZ_CHRG','DEAL_YQZ_IR_MAX','LDEAL_YQZ_COM','DEAL_YQZ_IR_MIN','CNT_TRAN_CLO_TENDENCY3M','LDEAL_TENOR_MIN','LDEAL_AMT_MONTH','CNT_TRAN_SUP_TENDENCY3M','SUM_TRAN_SUP_TENDENCY3M','CNT_TRAN_ATM_TENDENCY3M','CNT_TRAN_ATM_TENDENCY1M','SUM_TRAN_ATM_TENDENCY3M','DEAL_GRACE_DAYS_ACC_S1X1','AVG_PCT_MONTH_TO_PCLOSE','DEAL_YWZ_IR_MIN','SUM_TRAN_SUP_TENDENCY1M','DEAL_YWZ_IR_MAX','SUM_TRAN_ATM_TENDENCY1M','CNT_TRAN_SUP_TENDENCY1M','DEAL_GRACE_DAYS_ACC_AVG','LDEAL_YQZ_PC','DEAL_GRACE_DAYS_ACC_MAX','LDEAL_DELINQ_PER_MAXYQZ','CLNT_SALARY_VALUE','TRANS_AMOUNT_TENDENCY3M','MED_DEBT_PRC_YQZ','TRANS_CNT_TENDENCY3M','LDEAL_USED_AMT_AVG_YQZ','LDEAL_USED_AMT_AVG_YWZ','AVG_PCT_DEBT_TO_DEAL_AMT','LDEAL_ACT_DAYS_ACC_PCT_AVG','MED_DEBT_PRC_YWZ','LDEAL_ACT_DAYS_PCT_TR3','LDEAL_ACT_DAYS_PCT_AAVG','LDEAL_DELINQ_PER_MAXYWZ','LDEAL_ACT_DAYS_PCT_TR','LDEAL_ACT_DAYS_PCT_TR4','LDEAL_ACT_DAYS_PCT_CURR','TARGET']\n", - "non_numeric_columns = df[columns_to_check].select_dtypes(exclude=[np.number]).columns\n", - "print(\"Non-numeric columns:\", non_numeric_columns)\n", - "label_encoders = {}\n", - "for column in non_numeric_columns:\n", - " le = LabelEncoder()\n", - " df[column] = le.fit_transform(df[column].astype(str))\n", - " label_encoders[column] = le\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "9e202315", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AMOUNT_RUB_CLO_PRCCNT_TRAN_AUT_TENDENCY1MSUM_TRAN_AUT_TENDENCY1MAMOUNT_RUB_SUP_PRCSUM_TRAN_AUT_TENDENCY3MCNT_TRAN_AUT_TENDENCY3MCNT_TRAN_MED_TENDENCY1MAMOUNT_RUB_NAS_PRCTRANS_COUNT_SUP_PRCCNT_TRAN_CLO_TENDENCY1M...AVG_PCT_DEBT_TO_DEAL_AMTLDEAL_ACT_DAYS_ACC_PCT_AVGMED_DEBT_PRC_YWZLDEAL_ACT_DAYS_PCT_TR3LDEAL_ACT_DAYS_PCT_AAVGLDEAL_DELINQ_PER_MAXYWZLDEAL_ACT_DAYS_PCT_TRLDEAL_ACT_DAYS_PCT_TR4LDEAL_ACT_DAYS_PCT_CURRTARGET
AMOUNT_RUB_CLO_PRC1.0000000.0494750.0466740.0118410.0392900.0424830.023695-0.006358-0.077084-0.079970...0.001312-0.001481-0.005629-0.006224-0.0046340.000581-0.002975-0.002975-0.002975-0.001319
CNT_TRAN_AUT_TENDENCY1M0.0494751.0000000.928355-0.0346650.5052100.5509010.200437-0.000126-0.0474600.155910...0.0024980.0140500.0065900.0065800.0118940.0115400.0052660.0052660.0052660.029565
SUM_TRAN_AUT_TENDENCY1M0.0466740.9283551.000000-0.0333170.5317480.5097640.1876980.000077-0.0453230.146325...0.0025530.0126260.0054460.0053130.0110260.0104670.0045330.0045330.0045330.027457
AMOUNT_RUB_SUP_PRC0.011841-0.034665-0.0333171.000000-0.032999-0.034831-0.055277-0.0002140.718376-0.034542...0.000390-0.0025620.001128-0.004957-0.0046980.000409-0.006974-0.006974-0.006974-0.024558
SUM_TRAN_AUT_TENDENCY3M0.0392900.5052100.531748-0.0329991.0000000.9146500.138073-0.000498-0.0432250.108307...0.0020520.0119330.0068680.0071690.0099580.0100490.0077160.0077160.0077160.020648
CNT_TRAN_AUT_TENDENCY3M0.0424830.5509010.509764-0.0348310.9146501.0000000.149022-0.001554-0.0454320.117697...0.0019490.0139580.0082600.0093020.0113540.0106440.0091340.0091340.0091340.022869
CNT_TRAN_MED_TENDENCY1M0.0236950.2004370.187698-0.0552770.1380730.1490221.0000000.000959-0.0645990.221546...-0.0004540.0155150.0085420.0090180.0101370.0089970.0068940.0068940.0068940.032242
AMOUNT_RUB_NAS_PRC-0.006358-0.0001260.000077-0.000214-0.000498-0.0015540.0009591.000000-0.0506420.003803...0.002100-0.005959-0.004743-0.005884-0.0052680.001456-0.005776-0.005776-0.005776-0.011020
TRANS_COUNT_SUP_PRC-0.077084-0.047460-0.0453230.718376-0.043225-0.045432-0.064599-0.0506421.000000-0.035499...-0.0010070.0015530.0070400.003319-0.002855-0.002627-0.000654-0.000654-0.000654-0.020416
CNT_TRAN_CLO_TENDENCY1M-0.0799700.1559100.146325-0.0345420.1083070.1176970.2215460.003803-0.0354991.000000...-0.0012330.0147360.0098180.0119480.0123100.0072190.0086590.0086590.0086590.030361
SUM_TRAN_MED_TENDENCY1M0.0211170.1785590.168158-0.0499530.1217260.1300190.8958460.000685-0.0584880.200163...-0.0003970.0136510.0077770.0086540.0093760.0081780.0065140.0065140.0065140.028469
TRANS_COUNT_NAS_PRC-0.0131940.0076890.007680-0.0437430.0047650.0045650.0148100.725117-0.1163500.015730...0.001740-0.008373-0.008053-0.008635-0.005304-0.002327-0.008357-0.008357-0.008357-0.009448
SUM_TRAN_CLO_TENDENCY1M-0.0800350.1425590.134500-0.0304900.0980740.1067370.1992940.004282-0.0319650.914132...-0.0008730.0132580.0092140.0115760.0114360.0057900.0076320.0076320.0076320.026957
APP_POSITION_TYPE0.0388540.0134970.0119310.0095080.0152530.0176820.0035020.002417-0.0126830.002217...0.014368-0.033142-0.028042-0.015945-0.082999-0.0487150.0093240.0093240.009324-0.018842
TRANS_COUNT_ATM_PRC-0.1088190.0650790.061365-0.4331550.0571590.0620470.060934-0.196057-0.5188030.058749...0.0020250.0198740.0129220.0163300.0322160.0143400.0138290.0138290.0138290.093872
AMOUNT_RUB_ATM_PRC-0.2828700.0430710.041007-0.4888290.0379860.0418380.060422-0.254324-0.3211240.063246...-0.0011340.0173420.0101790.0170930.0282350.0090570.0124910.0124910.0124910.077213
CNT_TRAN_MED_TENDENCY3M0.0269590.1582630.147939-0.0548310.2096900.2268160.500273-0.001192-0.0657310.164174...0.0016640.0152520.0097740.0099280.0115840.0102100.0087350.0087350.0087350.027036
SUM_TRAN_MED_TENDENCY3M0.0238710.1400010.131643-0.0488790.1851430.1978010.441779-0.000188-0.0586790.146871...0.0019530.0127880.0091580.0091800.0101480.0092940.0073690.0073690.0073690.024238
SUM_TRAN_CLO_TENDENCY3M-0.0718770.1221700.116668-0.0306710.1552040.1649710.1606960.004098-0.0315910.457337...-0.0013440.0137640.0116070.0110130.0116700.0088880.0091800.0091800.0091800.023270
LDEAL_TENOR_MAX0.002975-0.001951-0.000963-0.002509-0.004489-0.004008-0.0031250.004006-0.006514-0.002510...0.0150530.0154190.0129500.0406100.0894380.0298400.0022140.0022140.0022140.014318
LDEAL_YQZ_CHRG0.0002740.0024020.002152-0.0014130.0017360.0018900.001511-0.002211-0.0007820.001022...-0.061453-0.009704-0.006706-0.029382-0.069493-0.009721-0.002931-0.002931-0.002931-0.004033
DEAL_YQZ_IR_MAX-0.0030950.0025760.001660-0.0009170.0040940.0052400.002546-0.0019700.0007310.003237...-0.003961-0.001476-0.0060070.0062970.026875-0.0200530.0046550.0046550.0046550.003706
LDEAL_YQZ_COM0.0014530.0039390.0031010.0019620.0020370.0022270.002265-0.001304-0.0002050.000153...-0.129920-0.015589-0.017030-0.0077120.006229-0.041947-0.000177-0.000177-0.000177-0.002327
DEAL_YQZ_IR_MIN-0.0033960.0028210.0019190.0002580.0045470.0055240.003638-0.0012500.0020490.004496...-0.024847-0.003654-0.0090370.0009660.020671-0.0298460.0055520.0055520.0055520.004550
CNT_TRAN_CLO_TENDENCY3M-0.0681110.1336880.126785-0.0355340.1708030.1828740.1793970.003041-0.0361320.506409...-0.0011110.0148110.0117980.0110330.0125560.0099390.0099470.0099470.0099470.026280
LDEAL_TENOR_MIN0.001166-0.003053-0.002544-0.000406-0.004243-0.003982-0.0021980.002625-0.0041160.000074...-0.0572700.0077470.0085660.0333680.0189240.0284450.0011720.0011720.0011720.004597
LDEAL_AMT_MONTH0.0002970.0002740.000809-0.0026790.0035230.003467-0.001352-0.000752-0.004200-0.003256...0.487743-0.0032210.0228620.0322460.0621610.0617580.0040660.0040660.004066-0.005344
CNT_TRAN_SUP_TENDENCY3M0.0568700.2252730.209914-0.0571920.3291050.3564890.205819-0.003684-0.0841720.183961...-0.0008810.0208800.0151440.0156680.0190020.0174680.0144520.0144520.0144520.035731
SUM_TRAN_SUP_TENDENCY3M0.0538250.2143420.201377-0.0564310.3087800.3305650.193269-0.003462-0.0804520.177145...-0.0015280.0214580.0145480.0146990.0178240.0165810.0132330.0132330.0132330.032965
CNT_TRAN_ATM_TENDENCY3M0.0772760.1443020.1346740.0063150.1933720.2067630.1179930.010063-0.0475950.104679...0.0016810.0272450.0238610.0155810.0206440.0208230.0138300.0138300.0138300.045657
CNT_TRAN_ATM_TENDENCY1M0.0957740.1804290.1699130.0056270.1229660.1314260.1399360.013281-0.0641740.125310...0.0020210.0230410.0168530.0105780.0217860.0221470.0070470.0070470.0070470.071619
SUM_TRAN_ATM_TENDENCY3M0.0722340.1294540.1220160.0027170.1664080.1749450.1027650.008648-0.0481630.093932...0.0013140.0262140.0214720.0145230.0198530.0195450.0124210.0124210.0124210.045768
DEAL_GRACE_DAYS_ACC_S1X10.005087-0.001202-0.0016480.012103-0.009460-0.007931-0.0043120.0009490.010981-0.006359...0.0278260.2356920.2211670.0485460.0336940.1058310.0047260.0047260.004726-0.001347
AVG_PCT_MONTH_TO_PCLOSE0.0015470.0012000.0019220.0007430.0017090.0025230.0008250.001236-0.002415-0.003471...0.1249850.0347850.0334790.0232240.0439650.0846090.0031100.0031100.0031100.000501
DEAL_YWZ_IR_MIN-0.0030770.0021270.002931-0.003613-0.000851-0.001060-0.0018240.006273-0.0015350.000996...-0.019771-0.199341-0.276461-0.196136-0.121922-0.109328-0.149111-0.149111-0.149111-0.007858
SUM_TRAN_SUP_TENDENCY1M0.0692390.3115580.294231-0.0660690.2131170.2286540.286360-0.001171-0.1051730.262752...-0.0004370.0177750.0085580.0092720.0167390.0167010.0071810.0071810.0071810.041617
DEAL_YWZ_IR_MAX-0.002768-0.004813-0.003886-0.000954-0.009027-0.010012-0.0085580.005795-0.001168-0.007267...-0.012026-0.212901-0.279412-0.249282-0.154790-0.044269-0.192050-0.192050-0.192050-0.018765
SUM_TRAN_ATM_TENDENCY1M0.0883550.1572970.1491770.0036910.1066020.1132160.1178490.012225-0.0614670.109443...0.0017050.0204090.0144720.0096980.0198560.0192440.0064030.0064030.0064030.071122
CNT_TRAN_SUP_TENDENCY1M0.0741360.3339430.312566-0.0677950.2283210.2472220.310808-0.000633-0.1142550.271020...-0.0005330.0184490.0090970.0094310.0173790.0172810.0076130.0076130.0076130.045585
DEAL_GRACE_DAYS_ACC_AVG0.003483-0.001647-0.0018810.010462-0.008722-0.007641-0.004142-0.0008170.007802-0.005085...0.0327930.2680900.1589760.0537300.0326470.1175220.0053860.0053860.005386-0.004171
LDEAL_YQZ_PC-0.001038-0.001940-0.0027270.002057-0.005337-0.004172-0.001618-0.0010750.001834-0.001907...-0.0367990.0074990.008992-0.004903-0.0273180.024909-0.000059-0.000059-0.0000590.002900
DEAL_GRACE_DAYS_ACC_MAX0.002070-0.002073-0.0022730.008832-0.008495-0.007374-0.003964-0.0010900.007171-0.004612...0.0304360.2443030.1576720.0468270.0284520.1300190.0049270.0049270.004927-0.004527
LDEAL_DELINQ_PER_MAXYQZ0.0002960.0048840.004329-0.0011170.0047500.0052370.005515-0.001113-0.0019660.004405...0.3687610.0721600.095019-0.015328-0.1428660.2401500.0065320.0065320.006532-0.016342
CLNT_SALARY_VALUE-0.0028270.0010730.002523-0.007400-0.000758-0.001697-0.0011200.000517-0.0078570.002187...-0.000180-0.003074-0.003227-0.002174-0.002098-0.000427-0.001997-0.001997-0.0019970.000409
TRANS_AMOUNT_TENDENCY3M0.0471760.1662200.160170-0.0247330.2376790.2422430.136324-0.013183-0.0645700.143793...0.0006950.0298480.0251780.0222100.0274090.0224320.0193680.0193680.0193680.055346
MED_DEBT_PRC_YQZ0.0002480.0035090.0029350.0000390.0030510.0026410.0009220.0004580.0003640.001451...0.4759290.0068400.0147900.0453500.1627960.027048-0.000831-0.000831-0.0008310.010467
TRANS_CNT_TENDENCY3M0.0461170.1962120.182892-0.0229560.2973690.3213230.164777-0.019368-0.0546020.163305...0.0014350.0308790.0267880.0245660.0302510.0229610.0220840.0220840.0220840.051699
LDEAL_USED_AMT_AVG_YQZ0.000541-0.000357-0.000079-0.0010680.0005970.000784-0.0007750.000274-0.003949-0.002797...0.3876420.0273340.0344850.0111220.0968620.0697750.0035300.0035300.003530-0.001530
LDEAL_USED_AMT_AVG_YWZ-0.005747-0.009377-0.008470-0.006433-0.010851-0.013321-0.0057250.004554-0.000381-0.006869...0.040175-0.087228-0.054458-0.013673-0.016758-0.0086140.0211510.0211510.021151-0.006381
AVG_PCT_DEBT_TO_DEAL_AMT0.0013120.0024980.0025530.0003900.0020520.001949-0.0004540.002100-0.001007-0.001233...1.0000000.0124850.0677690.0223610.0392160.1647940.0075240.0075240.007524-0.010198
LDEAL_ACT_DAYS_ACC_PCT_AVG-0.0014810.0140500.012626-0.0025620.0119330.0139580.015515-0.0059590.0015530.014736...0.0124851.0000000.5902250.6501140.3950120.2779880.6277520.6277520.6277520.041959
MED_DEBT_PRC_YWZ-0.0056290.0065900.0054460.0011280.0068680.0082600.008542-0.0047430.0070400.009818...0.0677690.5902251.0000000.6105250.3780490.4173230.4957280.4957280.4957280.029183
LDEAL_ACT_DAYS_PCT_TR3-0.0062240.0065800.005313-0.0049570.0071690.0093020.009018-0.0058840.0033190.011948...0.0223610.6501140.6105251.0000000.6076040.1111400.7962860.7962860.7962860.048763
LDEAL_ACT_DAYS_PCT_AAVG-0.0046340.0118940.011026-0.0046980.0099580.0113540.010137-0.005268-0.0028550.012310...0.0392160.3950120.3780490.6076041.0000000.0692310.4838270.4838270.4838270.074718
LDEAL_DELINQ_PER_MAXYWZ0.0005810.0115400.0104670.0004090.0100490.0106440.0089970.001456-0.0026270.007219...0.1647940.2779880.4173230.1111400.0692311.0000000.0579050.0579050.0579050.000725
LDEAL_ACT_DAYS_PCT_TR-0.0029750.0052660.004533-0.0069740.0077160.0091340.006894-0.005776-0.0006540.008659...0.0075240.6277520.4957280.7962860.4838270.0579051.0000001.0000001.0000000.048387
LDEAL_ACT_DAYS_PCT_TR4-0.0029750.0052660.004533-0.0069740.0077160.0091340.006894-0.005776-0.0006540.008659...0.0075240.6277520.4957280.7962860.4838270.0579051.0000001.0000001.0000000.048387
LDEAL_ACT_DAYS_PCT_CURR-0.0029750.0052660.004533-0.0069740.0077160.0091340.006894-0.005776-0.0006540.008659...0.0075240.6277520.4957280.7962860.4838270.0579051.0000001.0000001.0000000.048387
TARGET-0.0013190.0295650.027457-0.0245580.0206480.0228690.032242-0.011020-0.0204160.030361...-0.0101980.0419590.0291830.0487630.0747180.0007250.0483870.0483870.0483871.000000
\n", - "

59 rows × 59 columns

\n", - "
" - ], - "text/plain": [ - " AMOUNT_RUB_CLO_PRC CNT_TRAN_AUT_TENDENCY1M \\\n", - "AMOUNT_RUB_CLO_PRC 1.000000 0.049475 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.049475 1.000000 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.046674 0.928355 \n", - "AMOUNT_RUB_SUP_PRC 0.011841 -0.034665 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.039290 0.505210 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.042483 0.550901 \n", - "CNT_TRAN_MED_TENDENCY1M 0.023695 0.200437 \n", - "AMOUNT_RUB_NAS_PRC -0.006358 -0.000126 \n", - "TRANS_COUNT_SUP_PRC -0.077084 -0.047460 \n", - "CNT_TRAN_CLO_TENDENCY1M -0.079970 0.155910 \n", - "SUM_TRAN_MED_TENDENCY1M 0.021117 0.178559 \n", - "TRANS_COUNT_NAS_PRC -0.013194 0.007689 \n", - "SUM_TRAN_CLO_TENDENCY1M -0.080035 0.142559 \n", - "APP_POSITION_TYPE 0.038854 0.013497 \n", - "TRANS_COUNT_ATM_PRC -0.108819 0.065079 \n", - "AMOUNT_RUB_ATM_PRC -0.282870 0.043071 \n", - "CNT_TRAN_MED_TENDENCY3M 0.026959 0.158263 \n", - "SUM_TRAN_MED_TENDENCY3M 0.023871 0.140001 \n", - "SUM_TRAN_CLO_TENDENCY3M -0.071877 0.122170 \n", - "LDEAL_TENOR_MAX 0.002975 -0.001951 \n", - "LDEAL_YQZ_CHRG 0.000274 0.002402 \n", - "DEAL_YQZ_IR_MAX -0.003095 0.002576 \n", - "LDEAL_YQZ_COM 0.001453 0.003939 \n", - "DEAL_YQZ_IR_MIN -0.003396 0.002821 \n", - "CNT_TRAN_CLO_TENDENCY3M -0.068111 0.133688 \n", - "LDEAL_TENOR_MIN 0.001166 -0.003053 \n", - "LDEAL_AMT_MONTH 0.000297 0.000274 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.056870 0.225273 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.053825 0.214342 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.077276 0.144302 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.095774 0.180429 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.072234 0.129454 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.005087 -0.001202 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.001547 0.001200 \n", - "DEAL_YWZ_IR_MIN -0.003077 0.002127 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.069239 0.311558 \n", - "DEAL_YWZ_IR_MAX -0.002768 -0.004813 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.088355 0.157297 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.074136 0.333943 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.003483 -0.001647 \n", - "LDEAL_YQZ_PC -0.001038 -0.001940 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.002070 -0.002073 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.000296 0.004884 \n", - "CLNT_SALARY_VALUE -0.002827 0.001073 \n", - "TRANS_AMOUNT_TENDENCY3M 0.047176 0.166220 \n", - "MED_DEBT_PRC_YQZ 0.000248 0.003509 \n", - "TRANS_CNT_TENDENCY3M 0.046117 0.196212 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.000541 -0.000357 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.005747 -0.009377 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.001312 0.002498 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG -0.001481 0.014050 \n", - "MED_DEBT_PRC_YWZ -0.005629 0.006590 \n", - "LDEAL_ACT_DAYS_PCT_TR3 -0.006224 0.006580 \n", - "LDEAL_ACT_DAYS_PCT_AAVG -0.004634 0.011894 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.000581 0.011540 \n", - "LDEAL_ACT_DAYS_PCT_TR -0.002975 0.005266 \n", - "LDEAL_ACT_DAYS_PCT_TR4 -0.002975 0.005266 \n", - "LDEAL_ACT_DAYS_PCT_CURR -0.002975 0.005266 \n", - "TARGET -0.001319 0.029565 \n", - "\n", - " SUM_TRAN_AUT_TENDENCY1M AMOUNT_RUB_SUP_PRC \\\n", - "AMOUNT_RUB_CLO_PRC 0.046674 0.011841 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.928355 -0.034665 \n", - "SUM_TRAN_AUT_TENDENCY1M 1.000000 -0.033317 \n", - "AMOUNT_RUB_SUP_PRC -0.033317 1.000000 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.531748 -0.032999 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.509764 -0.034831 \n", - "CNT_TRAN_MED_TENDENCY1M 0.187698 -0.055277 \n", - "AMOUNT_RUB_NAS_PRC 0.000077 -0.000214 \n", - "TRANS_COUNT_SUP_PRC -0.045323 0.718376 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.146325 -0.034542 \n", - "SUM_TRAN_MED_TENDENCY1M 0.168158 -0.049953 \n", - "TRANS_COUNT_NAS_PRC 0.007680 -0.043743 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.134500 -0.030490 \n", - "APP_POSITION_TYPE 0.011931 0.009508 \n", - "TRANS_COUNT_ATM_PRC 0.061365 -0.433155 \n", - "AMOUNT_RUB_ATM_PRC 0.041007 -0.488829 \n", - "CNT_TRAN_MED_TENDENCY3M 0.147939 -0.054831 \n", - "SUM_TRAN_MED_TENDENCY3M 0.131643 -0.048879 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.116668 -0.030671 \n", - "LDEAL_TENOR_MAX -0.000963 -0.002509 \n", - "LDEAL_YQZ_CHRG 0.002152 -0.001413 \n", - "DEAL_YQZ_IR_MAX 0.001660 -0.000917 \n", - "LDEAL_YQZ_COM 0.003101 0.001962 \n", - "DEAL_YQZ_IR_MIN 0.001919 0.000258 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.126785 -0.035534 \n", - "LDEAL_TENOR_MIN -0.002544 -0.000406 \n", - "LDEAL_AMT_MONTH 0.000809 -0.002679 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.209914 -0.057192 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.201377 -0.056431 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.134674 0.006315 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.169913 0.005627 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.122016 0.002717 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 -0.001648 0.012103 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.001922 0.000743 \n", - "DEAL_YWZ_IR_MIN 0.002931 -0.003613 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.294231 -0.066069 \n", - "DEAL_YWZ_IR_MAX -0.003886 -0.000954 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.149177 0.003691 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.312566 -0.067795 \n", - "DEAL_GRACE_DAYS_ACC_AVG -0.001881 0.010462 \n", - "LDEAL_YQZ_PC -0.002727 0.002057 \n", - "DEAL_GRACE_DAYS_ACC_MAX -0.002273 0.008832 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.004329 -0.001117 \n", - "CLNT_SALARY_VALUE 0.002523 -0.007400 \n", - "TRANS_AMOUNT_TENDENCY3M 0.160170 -0.024733 \n", - "MED_DEBT_PRC_YQZ 0.002935 0.000039 \n", - "TRANS_CNT_TENDENCY3M 0.182892 -0.022956 \n", - "LDEAL_USED_AMT_AVG_YQZ -0.000079 -0.001068 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.008470 -0.006433 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.002553 0.000390 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.012626 -0.002562 \n", - "MED_DEBT_PRC_YWZ 0.005446 0.001128 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.005313 -0.004957 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.011026 -0.004698 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.010467 0.000409 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.004533 -0.006974 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.004533 -0.006974 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.004533 -0.006974 \n", - "TARGET 0.027457 -0.024558 \n", - "\n", - " SUM_TRAN_AUT_TENDENCY3M CNT_TRAN_AUT_TENDENCY3M \\\n", - "AMOUNT_RUB_CLO_PRC 0.039290 0.042483 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.505210 0.550901 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.531748 0.509764 \n", - "AMOUNT_RUB_SUP_PRC -0.032999 -0.034831 \n", - "SUM_TRAN_AUT_TENDENCY3M 1.000000 0.914650 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.914650 1.000000 \n", - "CNT_TRAN_MED_TENDENCY1M 0.138073 0.149022 \n", - "AMOUNT_RUB_NAS_PRC -0.000498 -0.001554 \n", - "TRANS_COUNT_SUP_PRC -0.043225 -0.045432 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.108307 0.117697 \n", - "SUM_TRAN_MED_TENDENCY1M 0.121726 0.130019 \n", - "TRANS_COUNT_NAS_PRC 0.004765 0.004565 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.098074 0.106737 \n", - "APP_POSITION_TYPE 0.015253 0.017682 \n", - "TRANS_COUNT_ATM_PRC 0.057159 0.062047 \n", - "AMOUNT_RUB_ATM_PRC 0.037986 0.041838 \n", - "CNT_TRAN_MED_TENDENCY3M 0.209690 0.226816 \n", - "SUM_TRAN_MED_TENDENCY3M 0.185143 0.197801 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.155204 0.164971 \n", - "LDEAL_TENOR_MAX -0.004489 -0.004008 \n", - "LDEAL_YQZ_CHRG 0.001736 0.001890 \n", - "DEAL_YQZ_IR_MAX 0.004094 0.005240 \n", - "LDEAL_YQZ_COM 0.002037 0.002227 \n", - "DEAL_YQZ_IR_MIN 0.004547 0.005524 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.170803 0.182874 \n", - "LDEAL_TENOR_MIN -0.004243 -0.003982 \n", - "LDEAL_AMT_MONTH 0.003523 0.003467 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.329105 0.356489 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.308780 0.330565 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.193372 0.206763 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.122966 0.131426 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.166408 0.174945 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 -0.009460 -0.007931 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.001709 0.002523 \n", - "DEAL_YWZ_IR_MIN -0.000851 -0.001060 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.213117 0.228654 \n", - "DEAL_YWZ_IR_MAX -0.009027 -0.010012 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.106602 0.113216 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.228321 0.247222 \n", - "DEAL_GRACE_DAYS_ACC_AVG -0.008722 -0.007641 \n", - "LDEAL_YQZ_PC -0.005337 -0.004172 \n", - "DEAL_GRACE_DAYS_ACC_MAX -0.008495 -0.007374 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.004750 0.005237 \n", - "CLNT_SALARY_VALUE -0.000758 -0.001697 \n", - "TRANS_AMOUNT_TENDENCY3M 0.237679 0.242243 \n", - "MED_DEBT_PRC_YQZ 0.003051 0.002641 \n", - "TRANS_CNT_TENDENCY3M 0.297369 0.321323 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.000597 0.000784 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.010851 -0.013321 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.002052 0.001949 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.011933 0.013958 \n", - "MED_DEBT_PRC_YWZ 0.006868 0.008260 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.007169 0.009302 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.009958 0.011354 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.010049 0.010644 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.007716 0.009134 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.007716 0.009134 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.007716 0.009134 \n", - "TARGET 0.020648 0.022869 \n", - "\n", - " CNT_TRAN_MED_TENDENCY1M AMOUNT_RUB_NAS_PRC \\\n", - "AMOUNT_RUB_CLO_PRC 0.023695 -0.006358 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.200437 -0.000126 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.187698 0.000077 \n", - "AMOUNT_RUB_SUP_PRC -0.055277 -0.000214 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.138073 -0.000498 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.149022 -0.001554 \n", - "CNT_TRAN_MED_TENDENCY1M 1.000000 0.000959 \n", - "AMOUNT_RUB_NAS_PRC 0.000959 1.000000 \n", - "TRANS_COUNT_SUP_PRC -0.064599 -0.050642 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.221546 0.003803 \n", - "SUM_TRAN_MED_TENDENCY1M 0.895846 0.000685 \n", - "TRANS_COUNT_NAS_PRC 0.014810 0.725117 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.199294 0.004282 \n", - "APP_POSITION_TYPE 0.003502 0.002417 \n", - "TRANS_COUNT_ATM_PRC 0.060934 -0.196057 \n", - "AMOUNT_RUB_ATM_PRC 0.060422 -0.254324 \n", - "CNT_TRAN_MED_TENDENCY3M 0.500273 -0.001192 \n", - "SUM_TRAN_MED_TENDENCY3M 0.441779 -0.000188 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.160696 0.004098 \n", - "LDEAL_TENOR_MAX -0.003125 0.004006 \n", - "LDEAL_YQZ_CHRG 0.001511 -0.002211 \n", - "DEAL_YQZ_IR_MAX 0.002546 -0.001970 \n", - "LDEAL_YQZ_COM 0.002265 -0.001304 \n", - "DEAL_YQZ_IR_MIN 0.003638 -0.001250 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.179397 0.003041 \n", - "LDEAL_TENOR_MIN -0.002198 0.002625 \n", - "LDEAL_AMT_MONTH -0.001352 -0.000752 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.205819 -0.003684 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.193269 -0.003462 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.117993 0.010063 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.139936 0.013281 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.102765 0.008648 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 -0.004312 0.000949 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.000825 0.001236 \n", - "DEAL_YWZ_IR_MIN -0.001824 0.006273 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.286360 -0.001171 \n", - "DEAL_YWZ_IR_MAX -0.008558 0.005795 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.117849 0.012225 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.310808 -0.000633 \n", - "DEAL_GRACE_DAYS_ACC_AVG -0.004142 -0.000817 \n", - "LDEAL_YQZ_PC -0.001618 -0.001075 \n", - "DEAL_GRACE_DAYS_ACC_MAX -0.003964 -0.001090 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.005515 -0.001113 \n", - "CLNT_SALARY_VALUE -0.001120 0.000517 \n", - "TRANS_AMOUNT_TENDENCY3M 0.136324 -0.013183 \n", - "MED_DEBT_PRC_YQZ 0.000922 0.000458 \n", - "TRANS_CNT_TENDENCY3M 0.164777 -0.019368 \n", - "LDEAL_USED_AMT_AVG_YQZ -0.000775 0.000274 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.005725 0.004554 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT -0.000454 0.002100 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.015515 -0.005959 \n", - "MED_DEBT_PRC_YWZ 0.008542 -0.004743 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.009018 -0.005884 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.010137 -0.005268 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.008997 0.001456 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.006894 -0.005776 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.006894 -0.005776 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.006894 -0.005776 \n", - "TARGET 0.032242 -0.011020 \n", - "\n", - " TRANS_COUNT_SUP_PRC CNT_TRAN_CLO_TENDENCY1M ... \\\n", - "AMOUNT_RUB_CLO_PRC -0.077084 -0.079970 ... \n", - "CNT_TRAN_AUT_TENDENCY1M -0.047460 0.155910 ... \n", - "SUM_TRAN_AUT_TENDENCY1M -0.045323 0.146325 ... \n", - "AMOUNT_RUB_SUP_PRC 0.718376 -0.034542 ... \n", - "SUM_TRAN_AUT_TENDENCY3M -0.043225 0.108307 ... \n", - "CNT_TRAN_AUT_TENDENCY3M -0.045432 0.117697 ... \n", - "CNT_TRAN_MED_TENDENCY1M -0.064599 0.221546 ... \n", - "AMOUNT_RUB_NAS_PRC -0.050642 0.003803 ... \n", - "TRANS_COUNT_SUP_PRC 1.000000 -0.035499 ... \n", - "CNT_TRAN_CLO_TENDENCY1M -0.035499 1.000000 ... \n", - "SUM_TRAN_MED_TENDENCY1M -0.058488 0.200163 ... \n", - "TRANS_COUNT_NAS_PRC -0.116350 0.015730 ... \n", - "SUM_TRAN_CLO_TENDENCY1M -0.031965 0.914132 ... \n", - "APP_POSITION_TYPE -0.012683 0.002217 ... \n", - "TRANS_COUNT_ATM_PRC -0.518803 0.058749 ... \n", - "AMOUNT_RUB_ATM_PRC -0.321124 0.063246 ... \n", - "CNT_TRAN_MED_TENDENCY3M -0.065731 0.164174 ... \n", - "SUM_TRAN_MED_TENDENCY3M -0.058679 0.146871 ... \n", - "SUM_TRAN_CLO_TENDENCY3M -0.031591 0.457337 ... \n", - "LDEAL_TENOR_MAX -0.006514 -0.002510 ... \n", - "LDEAL_YQZ_CHRG -0.000782 0.001022 ... \n", - "DEAL_YQZ_IR_MAX 0.000731 0.003237 ... \n", - "LDEAL_YQZ_COM -0.000205 0.000153 ... \n", - "DEAL_YQZ_IR_MIN 0.002049 0.004496 ... \n", - "CNT_TRAN_CLO_TENDENCY3M -0.036132 0.506409 ... \n", - "LDEAL_TENOR_MIN -0.004116 0.000074 ... \n", - "LDEAL_AMT_MONTH -0.004200 -0.003256 ... \n", - "CNT_TRAN_SUP_TENDENCY3M -0.084172 0.183961 ... \n", - "SUM_TRAN_SUP_TENDENCY3M -0.080452 0.177145 ... \n", - "CNT_TRAN_ATM_TENDENCY3M -0.047595 0.104679 ... \n", - "CNT_TRAN_ATM_TENDENCY1M -0.064174 0.125310 ... \n", - "SUM_TRAN_ATM_TENDENCY3M -0.048163 0.093932 ... \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.010981 -0.006359 ... \n", - "AVG_PCT_MONTH_TO_PCLOSE -0.002415 -0.003471 ... \n", - "DEAL_YWZ_IR_MIN -0.001535 0.000996 ... \n", - "SUM_TRAN_SUP_TENDENCY1M -0.105173 0.262752 ... \n", - "DEAL_YWZ_IR_MAX -0.001168 -0.007267 ... \n", - "SUM_TRAN_ATM_TENDENCY1M -0.061467 0.109443 ... \n", - "CNT_TRAN_SUP_TENDENCY1M -0.114255 0.271020 ... \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.007802 -0.005085 ... \n", - "LDEAL_YQZ_PC 0.001834 -0.001907 ... \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.007171 -0.004612 ... \n", - "LDEAL_DELINQ_PER_MAXYQZ -0.001966 0.004405 ... \n", - "CLNT_SALARY_VALUE -0.007857 0.002187 ... \n", - "TRANS_AMOUNT_TENDENCY3M -0.064570 0.143793 ... \n", - "MED_DEBT_PRC_YQZ 0.000364 0.001451 ... \n", - "TRANS_CNT_TENDENCY3M -0.054602 0.163305 ... \n", - "LDEAL_USED_AMT_AVG_YQZ -0.003949 -0.002797 ... \n", - "LDEAL_USED_AMT_AVG_YWZ -0.000381 -0.006869 ... \n", - "AVG_PCT_DEBT_TO_DEAL_AMT -0.001007 -0.001233 ... \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.001553 0.014736 ... \n", - "MED_DEBT_PRC_YWZ 0.007040 0.009818 ... \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.003319 0.011948 ... \n", - "LDEAL_ACT_DAYS_PCT_AAVG -0.002855 0.012310 ... \n", - "LDEAL_DELINQ_PER_MAXYWZ -0.002627 0.007219 ... \n", - "LDEAL_ACT_DAYS_PCT_TR -0.000654 0.008659 ... \n", - "LDEAL_ACT_DAYS_PCT_TR4 -0.000654 0.008659 ... \n", - "LDEAL_ACT_DAYS_PCT_CURR -0.000654 0.008659 ... \n", - "TARGET -0.020416 0.030361 ... \n", - "\n", - " AVG_PCT_DEBT_TO_DEAL_AMT \\\n", - "AMOUNT_RUB_CLO_PRC 0.001312 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.002498 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.002553 \n", - "AMOUNT_RUB_SUP_PRC 0.000390 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.002052 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.001949 \n", - "CNT_TRAN_MED_TENDENCY1M -0.000454 \n", - "AMOUNT_RUB_NAS_PRC 0.002100 \n", - "TRANS_COUNT_SUP_PRC -0.001007 \n", - "CNT_TRAN_CLO_TENDENCY1M -0.001233 \n", - "SUM_TRAN_MED_TENDENCY1M -0.000397 \n", - "TRANS_COUNT_NAS_PRC 0.001740 \n", - "SUM_TRAN_CLO_TENDENCY1M -0.000873 \n", - "APP_POSITION_TYPE 0.014368 \n", - "TRANS_COUNT_ATM_PRC 0.002025 \n", - "AMOUNT_RUB_ATM_PRC -0.001134 \n", - "CNT_TRAN_MED_TENDENCY3M 0.001664 \n", - "SUM_TRAN_MED_TENDENCY3M 0.001953 \n", - "SUM_TRAN_CLO_TENDENCY3M -0.001344 \n", - "LDEAL_TENOR_MAX 0.015053 \n", - "LDEAL_YQZ_CHRG -0.061453 \n", - "DEAL_YQZ_IR_MAX -0.003961 \n", - "LDEAL_YQZ_COM -0.129920 \n", - "DEAL_YQZ_IR_MIN -0.024847 \n", - "CNT_TRAN_CLO_TENDENCY3M -0.001111 \n", - "LDEAL_TENOR_MIN -0.057270 \n", - "LDEAL_AMT_MONTH 0.487743 \n", - "CNT_TRAN_SUP_TENDENCY3M -0.000881 \n", - "SUM_TRAN_SUP_TENDENCY3M -0.001528 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.001681 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.002021 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.001314 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.027826 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.124985 \n", - "DEAL_YWZ_IR_MIN -0.019771 \n", - "SUM_TRAN_SUP_TENDENCY1M -0.000437 \n", - "DEAL_YWZ_IR_MAX -0.012026 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.001705 \n", - "CNT_TRAN_SUP_TENDENCY1M -0.000533 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.032793 \n", - "LDEAL_YQZ_PC -0.036799 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.030436 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.368761 \n", - "CLNT_SALARY_VALUE -0.000180 \n", - "TRANS_AMOUNT_TENDENCY3M 0.000695 \n", - "MED_DEBT_PRC_YQZ 0.475929 \n", - "TRANS_CNT_TENDENCY3M 0.001435 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.387642 \n", - "LDEAL_USED_AMT_AVG_YWZ 0.040175 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 1.000000 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.012485 \n", - "MED_DEBT_PRC_YWZ 0.067769 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.022361 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.039216 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.164794 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.007524 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.007524 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.007524 \n", - "TARGET -0.010198 \n", - "\n", - " LDEAL_ACT_DAYS_ACC_PCT_AVG MED_DEBT_PRC_YWZ \\\n", - "AMOUNT_RUB_CLO_PRC -0.001481 -0.005629 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.014050 0.006590 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.012626 0.005446 \n", - "AMOUNT_RUB_SUP_PRC -0.002562 0.001128 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.011933 0.006868 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.013958 0.008260 \n", - "CNT_TRAN_MED_TENDENCY1M 0.015515 0.008542 \n", - "AMOUNT_RUB_NAS_PRC -0.005959 -0.004743 \n", - "TRANS_COUNT_SUP_PRC 0.001553 0.007040 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.014736 0.009818 \n", - "SUM_TRAN_MED_TENDENCY1M 0.013651 0.007777 \n", - "TRANS_COUNT_NAS_PRC -0.008373 -0.008053 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.013258 0.009214 \n", - "APP_POSITION_TYPE -0.033142 -0.028042 \n", - "TRANS_COUNT_ATM_PRC 0.019874 0.012922 \n", - "AMOUNT_RUB_ATM_PRC 0.017342 0.010179 \n", - "CNT_TRAN_MED_TENDENCY3M 0.015252 0.009774 \n", - "SUM_TRAN_MED_TENDENCY3M 0.012788 0.009158 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.013764 0.011607 \n", - "LDEAL_TENOR_MAX 0.015419 0.012950 \n", - "LDEAL_YQZ_CHRG -0.009704 -0.006706 \n", - "DEAL_YQZ_IR_MAX -0.001476 -0.006007 \n", - "LDEAL_YQZ_COM -0.015589 -0.017030 \n", - "DEAL_YQZ_IR_MIN -0.003654 -0.009037 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.014811 0.011798 \n", - "LDEAL_TENOR_MIN 0.007747 0.008566 \n", - "LDEAL_AMT_MONTH -0.003221 0.022862 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.020880 0.015144 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.021458 0.014548 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.027245 0.023861 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.023041 0.016853 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.026214 0.021472 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.235692 0.221167 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.034785 0.033479 \n", - "DEAL_YWZ_IR_MIN -0.199341 -0.276461 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.017775 0.008558 \n", - "DEAL_YWZ_IR_MAX -0.212901 -0.279412 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.020409 0.014472 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.018449 0.009097 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.268090 0.158976 \n", - "LDEAL_YQZ_PC 0.007499 0.008992 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.244303 0.157672 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.072160 0.095019 \n", - "CLNT_SALARY_VALUE -0.003074 -0.003227 \n", - "TRANS_AMOUNT_TENDENCY3M 0.029848 0.025178 \n", - "MED_DEBT_PRC_YQZ 0.006840 0.014790 \n", - "TRANS_CNT_TENDENCY3M 0.030879 0.026788 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.027334 0.034485 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.087228 -0.054458 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.012485 0.067769 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 1.000000 0.590225 \n", - "MED_DEBT_PRC_YWZ 0.590225 1.000000 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.650114 0.610525 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.395012 0.378049 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.277988 0.417323 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.627752 0.495728 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.627752 0.495728 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.627752 0.495728 \n", - "TARGET 0.041959 0.029183 \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR3 LDEAL_ACT_DAYS_PCT_AAVG \\\n", - "AMOUNT_RUB_CLO_PRC -0.006224 -0.004634 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.006580 0.011894 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.005313 0.011026 \n", - "AMOUNT_RUB_SUP_PRC -0.004957 -0.004698 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.007169 0.009958 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.009302 0.011354 \n", - "CNT_TRAN_MED_TENDENCY1M 0.009018 0.010137 \n", - "AMOUNT_RUB_NAS_PRC -0.005884 -0.005268 \n", - "TRANS_COUNT_SUP_PRC 0.003319 -0.002855 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.011948 0.012310 \n", - "SUM_TRAN_MED_TENDENCY1M 0.008654 0.009376 \n", - "TRANS_COUNT_NAS_PRC -0.008635 -0.005304 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.011576 0.011436 \n", - "APP_POSITION_TYPE -0.015945 -0.082999 \n", - "TRANS_COUNT_ATM_PRC 0.016330 0.032216 \n", - "AMOUNT_RUB_ATM_PRC 0.017093 0.028235 \n", - "CNT_TRAN_MED_TENDENCY3M 0.009928 0.011584 \n", - "SUM_TRAN_MED_TENDENCY3M 0.009180 0.010148 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.011013 0.011670 \n", - "LDEAL_TENOR_MAX 0.040610 0.089438 \n", - "LDEAL_YQZ_CHRG -0.029382 -0.069493 \n", - "DEAL_YQZ_IR_MAX 0.006297 0.026875 \n", - "LDEAL_YQZ_COM -0.007712 0.006229 \n", - "DEAL_YQZ_IR_MIN 0.000966 0.020671 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.011033 0.012556 \n", - "LDEAL_TENOR_MIN 0.033368 0.018924 \n", - "LDEAL_AMT_MONTH 0.032246 0.062161 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.015668 0.019002 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.014699 0.017824 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.015581 0.020644 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.010578 0.021786 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.014523 0.019853 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.048546 0.033694 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.023224 0.043965 \n", - "DEAL_YWZ_IR_MIN -0.196136 -0.121922 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.009272 0.016739 \n", - "DEAL_YWZ_IR_MAX -0.249282 -0.154790 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.009698 0.019856 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.009431 0.017379 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.053730 0.032647 \n", - "LDEAL_YQZ_PC -0.004903 -0.027318 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.046827 0.028452 \n", - "LDEAL_DELINQ_PER_MAXYQZ -0.015328 -0.142866 \n", - "CLNT_SALARY_VALUE -0.002174 -0.002098 \n", - "TRANS_AMOUNT_TENDENCY3M 0.022210 0.027409 \n", - "MED_DEBT_PRC_YQZ 0.045350 0.162796 \n", - "TRANS_CNT_TENDENCY3M 0.024566 0.030251 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.011122 0.096862 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.013673 -0.016758 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.022361 0.039216 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.650114 0.395012 \n", - "MED_DEBT_PRC_YWZ 0.610525 0.378049 \n", - "LDEAL_ACT_DAYS_PCT_TR3 1.000000 0.607604 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.607604 1.000000 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.111140 0.069231 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.796286 0.483827 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.796286 0.483827 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.796286 0.483827 \n", - "TARGET 0.048763 0.074718 \n", - "\n", - " LDEAL_DELINQ_PER_MAXYWZ LDEAL_ACT_DAYS_PCT_TR \\\n", - "AMOUNT_RUB_CLO_PRC 0.000581 -0.002975 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.011540 0.005266 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.010467 0.004533 \n", - "AMOUNT_RUB_SUP_PRC 0.000409 -0.006974 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.010049 0.007716 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.010644 0.009134 \n", - "CNT_TRAN_MED_TENDENCY1M 0.008997 0.006894 \n", - "AMOUNT_RUB_NAS_PRC 0.001456 -0.005776 \n", - "TRANS_COUNT_SUP_PRC -0.002627 -0.000654 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.007219 0.008659 \n", - "SUM_TRAN_MED_TENDENCY1M 0.008178 0.006514 \n", - "TRANS_COUNT_NAS_PRC -0.002327 -0.008357 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.005790 0.007632 \n", - "APP_POSITION_TYPE -0.048715 0.009324 \n", - "TRANS_COUNT_ATM_PRC 0.014340 0.013829 \n", - "AMOUNT_RUB_ATM_PRC 0.009057 0.012491 \n", - "CNT_TRAN_MED_TENDENCY3M 0.010210 0.008735 \n", - "SUM_TRAN_MED_TENDENCY3M 0.009294 0.007369 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.008888 0.009180 \n", - "LDEAL_TENOR_MAX 0.029840 0.002214 \n", - "LDEAL_YQZ_CHRG -0.009721 -0.002931 \n", - "DEAL_YQZ_IR_MAX -0.020053 0.004655 \n", - "LDEAL_YQZ_COM -0.041947 -0.000177 \n", - "DEAL_YQZ_IR_MIN -0.029846 0.005552 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.009939 0.009947 \n", - "LDEAL_TENOR_MIN 0.028445 0.001172 \n", - "LDEAL_AMT_MONTH 0.061758 0.004066 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.017468 0.014452 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.016581 0.013233 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.020823 0.013830 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.022147 0.007047 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.019545 0.012421 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.105831 0.004726 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.084609 0.003110 \n", - "DEAL_YWZ_IR_MIN -0.109328 -0.149111 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.016701 0.007181 \n", - "DEAL_YWZ_IR_MAX -0.044269 -0.192050 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.019244 0.006403 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.017281 0.007613 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.117522 0.005386 \n", - "LDEAL_YQZ_PC 0.024909 -0.000059 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.130019 0.004927 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.240150 0.006532 \n", - "CLNT_SALARY_VALUE -0.000427 -0.001997 \n", - "TRANS_AMOUNT_TENDENCY3M 0.022432 0.019368 \n", - "MED_DEBT_PRC_YQZ 0.027048 -0.000831 \n", - "TRANS_CNT_TENDENCY3M 0.022961 0.022084 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.069775 0.003530 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.008614 0.021151 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.164794 0.007524 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.277988 0.627752 \n", - "MED_DEBT_PRC_YWZ 0.417323 0.495728 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.111140 0.796286 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.069231 0.483827 \n", - "LDEAL_DELINQ_PER_MAXYWZ 1.000000 0.057905 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.057905 1.000000 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.057905 1.000000 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.057905 1.000000 \n", - "TARGET 0.000725 0.048387 \n", - "\n", - " LDEAL_ACT_DAYS_PCT_TR4 LDEAL_ACT_DAYS_PCT_CURR \\\n", - "AMOUNT_RUB_CLO_PRC -0.002975 -0.002975 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.005266 0.005266 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.004533 0.004533 \n", - "AMOUNT_RUB_SUP_PRC -0.006974 -0.006974 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.007716 0.007716 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.009134 0.009134 \n", - "CNT_TRAN_MED_TENDENCY1M 0.006894 0.006894 \n", - "AMOUNT_RUB_NAS_PRC -0.005776 -0.005776 \n", - "TRANS_COUNT_SUP_PRC -0.000654 -0.000654 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.008659 0.008659 \n", - "SUM_TRAN_MED_TENDENCY1M 0.006514 0.006514 \n", - "TRANS_COUNT_NAS_PRC -0.008357 -0.008357 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.007632 0.007632 \n", - "APP_POSITION_TYPE 0.009324 0.009324 \n", - "TRANS_COUNT_ATM_PRC 0.013829 0.013829 \n", - "AMOUNT_RUB_ATM_PRC 0.012491 0.012491 \n", - "CNT_TRAN_MED_TENDENCY3M 0.008735 0.008735 \n", - "SUM_TRAN_MED_TENDENCY3M 0.007369 0.007369 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.009180 0.009180 \n", - "LDEAL_TENOR_MAX 0.002214 0.002214 \n", - "LDEAL_YQZ_CHRG -0.002931 -0.002931 \n", - "DEAL_YQZ_IR_MAX 0.004655 0.004655 \n", - "LDEAL_YQZ_COM -0.000177 -0.000177 \n", - "DEAL_YQZ_IR_MIN 0.005552 0.005552 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.009947 0.009947 \n", - "LDEAL_TENOR_MIN 0.001172 0.001172 \n", - "LDEAL_AMT_MONTH 0.004066 0.004066 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.014452 0.014452 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.013233 0.013233 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.013830 0.013830 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.007047 0.007047 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.012421 0.012421 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 0.004726 0.004726 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.003110 0.003110 \n", - "DEAL_YWZ_IR_MIN -0.149111 -0.149111 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.007181 0.007181 \n", - "DEAL_YWZ_IR_MAX -0.192050 -0.192050 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.006403 0.006403 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.007613 0.007613 \n", - "DEAL_GRACE_DAYS_ACC_AVG 0.005386 0.005386 \n", - "LDEAL_YQZ_PC -0.000059 -0.000059 \n", - "DEAL_GRACE_DAYS_ACC_MAX 0.004927 0.004927 \n", - "LDEAL_DELINQ_PER_MAXYQZ 0.006532 0.006532 \n", - "CLNT_SALARY_VALUE -0.001997 -0.001997 \n", - "TRANS_AMOUNT_TENDENCY3M 0.019368 0.019368 \n", - "MED_DEBT_PRC_YQZ -0.000831 -0.000831 \n", - "TRANS_CNT_TENDENCY3M 0.022084 0.022084 \n", - "LDEAL_USED_AMT_AVG_YQZ 0.003530 0.003530 \n", - "LDEAL_USED_AMT_AVG_YWZ 0.021151 0.021151 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT 0.007524 0.007524 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.627752 0.627752 \n", - "MED_DEBT_PRC_YWZ 0.495728 0.495728 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.796286 0.796286 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.483827 0.483827 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.057905 0.057905 \n", - "LDEAL_ACT_DAYS_PCT_TR 1.000000 1.000000 \n", - "LDEAL_ACT_DAYS_PCT_TR4 1.000000 1.000000 \n", - "LDEAL_ACT_DAYS_PCT_CURR 1.000000 1.000000 \n", - "TARGET 0.048387 0.048387 \n", - "\n", - " TARGET \n", - "AMOUNT_RUB_CLO_PRC -0.001319 \n", - "CNT_TRAN_AUT_TENDENCY1M 0.029565 \n", - "SUM_TRAN_AUT_TENDENCY1M 0.027457 \n", - "AMOUNT_RUB_SUP_PRC -0.024558 \n", - "SUM_TRAN_AUT_TENDENCY3M 0.020648 \n", - "CNT_TRAN_AUT_TENDENCY3M 0.022869 \n", - "CNT_TRAN_MED_TENDENCY1M 0.032242 \n", - "AMOUNT_RUB_NAS_PRC -0.011020 \n", - "TRANS_COUNT_SUP_PRC -0.020416 \n", - "CNT_TRAN_CLO_TENDENCY1M 0.030361 \n", - "SUM_TRAN_MED_TENDENCY1M 0.028469 \n", - "TRANS_COUNT_NAS_PRC -0.009448 \n", - "SUM_TRAN_CLO_TENDENCY1M 0.026957 \n", - "APP_POSITION_TYPE -0.018842 \n", - "TRANS_COUNT_ATM_PRC 0.093872 \n", - "AMOUNT_RUB_ATM_PRC 0.077213 \n", - "CNT_TRAN_MED_TENDENCY3M 0.027036 \n", - "SUM_TRAN_MED_TENDENCY3M 0.024238 \n", - "SUM_TRAN_CLO_TENDENCY3M 0.023270 \n", - "LDEAL_TENOR_MAX 0.014318 \n", - "LDEAL_YQZ_CHRG -0.004033 \n", - "DEAL_YQZ_IR_MAX 0.003706 \n", - "LDEAL_YQZ_COM -0.002327 \n", - "DEAL_YQZ_IR_MIN 0.004550 \n", - "CNT_TRAN_CLO_TENDENCY3M 0.026280 \n", - "LDEAL_TENOR_MIN 0.004597 \n", - "LDEAL_AMT_MONTH -0.005344 \n", - "CNT_TRAN_SUP_TENDENCY3M 0.035731 \n", - "SUM_TRAN_SUP_TENDENCY3M 0.032965 \n", - "CNT_TRAN_ATM_TENDENCY3M 0.045657 \n", - "CNT_TRAN_ATM_TENDENCY1M 0.071619 \n", - "SUM_TRAN_ATM_TENDENCY3M 0.045768 \n", - "DEAL_GRACE_DAYS_ACC_S1X1 -0.001347 \n", - "AVG_PCT_MONTH_TO_PCLOSE 0.000501 \n", - "DEAL_YWZ_IR_MIN -0.007858 \n", - "SUM_TRAN_SUP_TENDENCY1M 0.041617 \n", - "DEAL_YWZ_IR_MAX -0.018765 \n", - "SUM_TRAN_ATM_TENDENCY1M 0.071122 \n", - "CNT_TRAN_SUP_TENDENCY1M 0.045585 \n", - "DEAL_GRACE_DAYS_ACC_AVG -0.004171 \n", - "LDEAL_YQZ_PC 0.002900 \n", - "DEAL_GRACE_DAYS_ACC_MAX -0.004527 \n", - "LDEAL_DELINQ_PER_MAXYQZ -0.016342 \n", - "CLNT_SALARY_VALUE 0.000409 \n", - "TRANS_AMOUNT_TENDENCY3M 0.055346 \n", - "MED_DEBT_PRC_YQZ 0.010467 \n", - "TRANS_CNT_TENDENCY3M 0.051699 \n", - "LDEAL_USED_AMT_AVG_YQZ -0.001530 \n", - "LDEAL_USED_AMT_AVG_YWZ -0.006381 \n", - "AVG_PCT_DEBT_TO_DEAL_AMT -0.010198 \n", - "LDEAL_ACT_DAYS_ACC_PCT_AVG 0.041959 \n", - "MED_DEBT_PRC_YWZ 0.029183 \n", - "LDEAL_ACT_DAYS_PCT_TR3 0.048763 \n", - "LDEAL_ACT_DAYS_PCT_AAVG 0.074718 \n", - "LDEAL_DELINQ_PER_MAXYWZ 0.000725 \n", - "LDEAL_ACT_DAYS_PCT_TR 0.048387 \n", - "LDEAL_ACT_DAYS_PCT_TR4 0.048387 \n", - "LDEAL_ACT_DAYS_PCT_CURR 0.048387 \n", - "TARGET 1.000000 \n", - "\n", - "[59 rows x 59 columns]" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "numeric_columns_to_check = df[columns_to_check].select_dtypes(include=[np.number]).columns\n", - "df[numeric_columns_to_check] = df[numeric_columns_to_check].fillna(df[numeric_columns_to_check].mean())\n", - "correlation_matrix = df[columns_to_check].corr()\n", - "correlation_matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "82e1d93d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 90)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Dropping columns that are least correlated\n", - "columns_to_drop4 = ['AMOUNT_RUB_CLO_PRC','LDEAL_DELINQ_PER_MAXYWZ']\n", - "df = df.drop(columns=columns_to_drop4)\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "bb512ca2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(355190, 90)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Dropping rows where all elements are missing\n", - "df_rows = df.dropna(how='all')\n", - "df_rows.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "15860997", - "metadata": {}, - "outputs": [], - "source": [ - "# Importing important libraries\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import StandardScaler, OneHotEncoder" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "806f8b15", - "metadata": {}, - "outputs": [], - "source": [ - "X = df.drop(columns=['TARGET'])\n", - "y = df['TARGET']" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "064ab733", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['CR_PROD_CNT_IL',\n", - " 'TURNOVER_DYNAMIC_IL_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY1M',\n", - " 'SUM_TRAN_AUT_TENDENCY1M',\n", - " 'AMOUNT_RUB_SUP_PRC',\n", - " 'SUM_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_SAVE_3M',\n", - " 'CR_PROD_CNT_VCU',\n", - " 'REST_AVG_CUR',\n", - " 'CNT_TRAN_MED_TENDENCY1M',\n", - " 'AMOUNT_RUB_NAS_PRC',\n", - " 'TRANS_COUNT_SUP_PRC',\n", - " 'CNT_TRAN_CLO_TENDENCY1M',\n", - " 'SUM_TRAN_MED_TENDENCY1M',\n", - " 'TRANS_COUNT_NAS_PRC',\n", - " 'CR_PROD_CNT_TOVR',\n", - " 'CR_PROD_CNT_PIL',\n", - " 'SUM_TRAN_CLO_TENDENCY1M',\n", - " 'APP_POSITION_TYPE',\n", - " 'TURNOVER_CC',\n", - " 'TRANS_COUNT_ATM_PRC',\n", - " 'AMOUNT_RUB_ATM_PRC',\n", - " 'TURNOVER_PAYM',\n", - " 'CNT_TRAN_MED_TENDENCY3M',\n", - " 'CR_PROD_CNT_CC',\n", - " 'SUM_TRAN_MED_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_3M',\n", - " 'REST_DYNAMIC_IL_1M',\n", - " 'SUM_TRAN_CLO_TENDENCY3M',\n", - " 'LDEAL_TENOR_MAX',\n", - " 'LDEAL_YQZ_CHRG',\n", - " 'CR_PROD_CNT_CCFP',\n", - " 'DEAL_YQZ_IR_MAX',\n", - " 'LDEAL_YQZ_COM',\n", - " 'DEAL_YQZ_IR_MIN',\n", - " 'CNT_TRAN_CLO_TENDENCY3M',\n", - " 'REST_DYNAMIC_CUR_1M',\n", - " 'REST_AVG_PAYM',\n", - " 'LDEAL_TENOR_MIN',\n", - " 'LDEAL_AMT_MONTH',\n", - " 'LDEAL_GRACE_DAYS_PCT_MED',\n", - " 'REST_DYNAMIC_CUR_3M',\n", - " 'CNT_TRAN_SUP_TENDENCY3M',\n", - " 'TURNOVER_DYNAMIC_CUR_1M',\n", - " 'REST_DYNAMIC_PAYM_3M',\n", - " 'SUM_TRAN_SUP_TENDENCY3M',\n", - " 'REST_DYNAMIC_IL_3M',\n", - " 'CNT_TRAN_ATM_TENDENCY3M',\n", - " 'CNT_TRAN_ATM_TENDENCY1M',\n", - " 'TURNOVER_DYNAMIC_IL_3M',\n", - " 'SUM_TRAN_ATM_TENDENCY3M',\n", - " 'DEAL_GRACE_DAYS_ACC_S1X1',\n", - " 'AVG_PCT_MONTH_TO_PCLOSE',\n", - " 'DEAL_YWZ_IR_MIN',\n", - " 'SUM_TRAN_SUP_TENDENCY1M',\n", - " 'DEAL_YWZ_IR_MAX',\n", - " 'SUM_TRAN_ATM_TENDENCY1M',\n", - " 'REST_DYNAMIC_PAYM_1M',\n", - " 'CNT_TRAN_SUP_TENDENCY1M',\n", - " 'DEAL_GRACE_DAYS_ACC_AVG',\n", - " 'TURNOVER_DYNAMIC_CUR_3M',\n", - " 'MAX_PCLOSE_DATE',\n", - " 'LDEAL_YQZ_PC',\n", - " 'CLNT_SETUP_TENOR',\n", - " 'DEAL_GRACE_DAYS_ACC_MAX',\n", - " 'TURNOVER_DYNAMIC_PAYM_3M',\n", - " 'LDEAL_DELINQ_PER_MAXYQZ',\n", - " 'TURNOVER_DYNAMIC_PAYM_1M',\n", - " 'CLNT_SALARY_VALUE',\n", - " 'TRANS_AMOUNT_TENDENCY3M',\n", - " 'MED_DEBT_PRC_YQZ',\n", - " 'TRANS_CNT_TENDENCY3M',\n", - " 'LDEAL_USED_AMT_AVG_YQZ',\n", - " 'REST_DYNAMIC_CC_1M',\n", - " 'LDEAL_USED_AMT_AVG_YWZ',\n", - " 'TURNOVER_DYNAMIC_CC_1M',\n", - " 'AVG_PCT_DEBT_TO_DEAL_AMT',\n", - " 'LDEAL_ACT_DAYS_ACC_PCT_AVG',\n", - " 'REST_DYNAMIC_CC_3M',\n", - " 'MED_DEBT_PRC_YWZ',\n", - " 'LDEAL_ACT_DAYS_PCT_TR3',\n", - " 'LDEAL_ACT_DAYS_PCT_AAVG',\n", - " 'TURNOVER_DYNAMIC_CC_3M',\n", - " 'LDEAL_ACT_DAYS_PCT_TR',\n", - " 'LDEAL_ACT_DAYS_PCT_TR4',\n", - " 'LDEAL_ACT_DAYS_PCT_CURR']" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "numeric_features = X.select_dtypes(include=['number']).columns.tolist()\n", - "categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()\n", - "numeric_features" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "447e665e", - "metadata": {}, - "outputs": [], - "source": [ - "# Splitting the data into train and test sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", - "# Defining preprocessing steps\n", - "num_trans = Pipeline(steps=[\n", - " ('imputer', SimpleImputer(strategy='mean')),\n", - " ('scaler', StandardScaler())\n", - "])\n", - "cat_trans = Pipeline(steps=[\n", - " ('imputer', SimpleImputer(strategy='most_frequent')),\n", - " ('onehot', OneHotEncoder(handle_unknown='ignore'))\n", - "])\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', num_trans, numeric_features),\n", - " ('cat', cat_trans, categorical_features)\n", - " ])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "36e74664", - "metadata": {}, - "outputs": [], - "source": [ - "# Checking if all specified columns exist in the DataFrame\n", - "missing_numeric_cols = [col for col in numeric_features if col not in X.columns]\n", - "missing_categorical_cols = [col for col in categorical_features if col not in X.columns]\n", - "\n", - "if missing_numeric_cols:\n", - " print(f\"Missing numeric columns in the DataFrame: {missing_numeric_cols}\")\n", - "if missing_categorical_cols:\n", - " print(f\"Missing categorical columns in the DataFrame: {missing_categorical_cols}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "34c82aab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Pipeline(steps=[('preprocessor',\n",
-       "                 ColumnTransformer(transformers=[('num',\n",
-       "                                                  Pipeline(steps=[('imputer',\n",
-       "                                                                   SimpleImputer()),\n",
-       "                                                                  ('scaler',\n",
-       "                                                                   StandardScaler())]),\n",
-       "                                                  ['CR_PROD_CNT_IL',\n",
-       "                                                   'TURNOVER_DYNAMIC_IL_1M',\n",
-       "                                                   'CNT_TRAN_AUT_TENDENCY1M',\n",
-       "                                                   'SUM_TRAN_AUT_TENDENCY1M',\n",
-       "                                                   'AMOUNT_RUB_SUP_PRC',\n",
-       "                                                   'SUM_TRAN_AUT_TENDENCY3M',\n",
-       "                                                   'REST_DYNAMIC_FDEP_1M',\n",
-       "                                                   'CNT_TRAN_AUT_TENDENCY3M',\n",
-       "                                                   'REST_DYNAMIC_...\n",
-       "                                                   'AMOUNT_RUB_ATM_PRC',\n",
-       "                                                   'TURNOVER_PAYM',\n",
-       "                                                   'CNT_TRAN_MED_TENDENCY3M',\n",
-       "                                                   'CR_PROD_CNT_CC',\n",
-       "                                                   'SUM_TRAN_MED_TENDENCY3M',\n",
-       "                                                   'REST_DYNAMIC_FDEP_3M',\n",
-       "                                                   'REST_DYNAMIC_IL_1M', ...]),\n",
-       "                                                 ('cat',\n",
-       "                                                  Pipeline(steps=[('imputer',\n",
-       "                                                                   SimpleImputer(strategy='most_frequent')),\n",
-       "                                                                  ('onehot',\n",
-       "                                                                   OneHotEncoder(handle_unknown='ignore'))]),\n",
-       "                                                  ['PACK'])])),\n",
-       "                ('classifier', RandomForestClassifier(random_state=42))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "Pipeline(steps=[('preprocessor',\n", - " ColumnTransformer(transformers=[('num',\n", - " Pipeline(steps=[('imputer',\n", - " SimpleImputer()),\n", - " ('scaler',\n", - " StandardScaler())]),\n", - " ['CR_PROD_CNT_IL',\n", - " 'TURNOVER_DYNAMIC_IL_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY1M',\n", - " 'SUM_TRAN_AUT_TENDENCY1M',\n", - " 'AMOUNT_RUB_SUP_PRC',\n", - " 'SUM_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_1M',\n", - " 'CNT_TRAN_AUT_TENDENCY3M',\n", - " 'REST_DYNAMIC_...\n", - " 'AMOUNT_RUB_ATM_PRC',\n", - " 'TURNOVER_PAYM',\n", - " 'CNT_TRAN_MED_TENDENCY3M',\n", - " 'CR_PROD_CNT_CC',\n", - " 'SUM_TRAN_MED_TENDENCY3M',\n", - " 'REST_DYNAMIC_FDEP_3M',\n", - " 'REST_DYNAMIC_IL_1M', ...]),\n", - " ('cat',\n", - " Pipeline(steps=[('imputer',\n", - " SimpleImputer(strategy='most_frequent')),\n", - " ('onehot',\n", - " OneHotEncoder(handle_unknown='ignore'))]),\n", - " ['PACK'])])),\n", - " ('classifier', RandomForestClassifier(random_state=42))])" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Creating a pipeline with preprocessing and the Random Forest model\n", - "rf_pipeline = Pipeline(steps=[('preprocessor', preprocessor),\n", - " ('classifier', RandomForestClassifier(random_state=42))])\n", - "# And fitting the model\n", - "rf_pipeline.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "860104f6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy: 0.920422872265548\n" - ] - } - ], - "source": [ - "# Evaluating the model\n", - "accuracy = rf_pipeline.score(X_test, y_test)\n", - "print(\"Accuracy:\", accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "a5a6202d-05a0-49da-adab-eefa3cfd9694", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "a62223ea-9f0f-42c4-abc4-d1cde01c1f28", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Accuracy: 0.9998521917846787\n" - ] - } - ], - "source": [ - "# Training Accuracy\n", - "y_train_pred = rf_pipeline.predict(X_train)\n", - "train_accuracy = accuracy_score(y_train, y_train_pred)\n", - "print(f\"Training Accuracy: {train_accuracy}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "529e015a-9ea5-433f-959e-9f5de225b7bc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test Accuracy: 0.920422872265548\n", - "Confusion Matrix:\n", - " [[65098 215]\n", - " [ 5438 287]]\n", - "Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.92 1.00 0.96 65313\n", - " 1 0.57 0.05 0.09 5725\n", - "\n", - " accuracy 0.92 71038\n", - " macro avg 0.75 0.52 0.53 71038\n", - "weighted avg 0.89 0.92 0.89 71038\n", - "\n" - ] - } - ], - "source": [ - "#Testing Accuracy\n", - "y_test_pred = rf_pipeline.predict(X_test)\n", - "test_accuracy = accuracy_score(y_test, y_test_pred)\n", - "print(f\"Test Accuracy: {test_accuracy}\")\n", - "\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_test_pred))\n", - "print(\"Classification Report:\\n\", classification_report(y_test, y_test_pred))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "569eee63-660d-47a1-9ea9-6c77a90b4894", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Random Forest Model\n", - "Training Accuracy: 0.9998521917846787\n", - "Test Accuracy: 0.920422872265548\n", - "Confusion Matrix:\n", - " [[65098 215]\n", - " [ 5438 287]]\n", - "Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.92 1.00 0.96 65313\n", - " 1 0.57 0.05 0.09 5725\n", - "\n", - " accuracy 0.92 71038\n", - " macro avg 0.75 0.52 0.53 71038\n", - "weighted avg 0.89 0.92 0.89 71038\n", - "\n" - ] - } - ], - "source": [ - "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix\n", - "\n", - "# Predictions\n", - "y_pred_rf = rf_pipeline.predict(X_test)\n", - "train_pred_rf = rf_pipeline.predict(X_train)\n", - "\n", - "# Evaluation metrics\n", - "print(\"Random Forest Model\")\n", - "print(\"Training Accuracy:\", accuracy_score(y_train, train_pred_rf))\n", - "print(\"Test Accuracy:\", accuracy_score(y_test, y_pred_rf))\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred_rf))\n", - "print(\"Classification Report:\\n\", classification_report(y_test, y_pred_rf))\n" - ] - }, - { - "cell_type": "markdown", - "id": "2c9c6176-b120-4fa0-8952-08dd677af7f0", - "metadata": {}, - "source": [ - "Hyperparameter Tuning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25ad369f-7f95-4bdf-9f06-d9a19fcb66d2", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "\n", - "# Define the parameter grid\n", - "param_grid = {\n", - " 'classifier__n_estimators': [100, 200, 300],\n", - " 'classifier__max_depth': [None, 10, 20, 30],\n", - " 'classifier__min_samples_split': [2, 5, 10]\n", - "}\n", - "\n", - "# GridSearchCV\n", - "grid_search = GridSearchCV(estimator=rf_pipeline, param_grid=param_grid, cv=3, n_jobs=-1, scoring='accuracy')\n", - "grid_search.fit(X_train, y_train)\n", - "# Best parameters\n", - "print(\"Best parameters found: \", grid_search.best_params_)\n", - "print(\"Best cross-validation accuracy: \", grid_search.best_score_)\n", - "\n", - "# Refitting the pipeline with best parameters\n", - "best_rf_pipeline = grid_search.best_estimator_\n", - "best_rf_pipeline.fit(X_train, y_train)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "b100cd93-e9c0-4963-b4d2-9d816649ff6b", - "metadata": {}, - "source": [ - "Evaluating the Tuned Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4d9da891-51a0-46dc-993b-084504db2e96", - "metadata": {}, - "outputs": [], - "source": [ - "# Predictions\n", - "y_pred_rf_best = best_rf_pipeline.predict(X_test)\n", - "train_pred_rf_best = best_rf_pipeline.predict(X_train)\n", - "\n", - "# Evaluation metrics\n", - "print(\"Tuned Random Forest Model\")\n", - "print(\"Training Accuracy:\", accuracy_score(y_train, train_pred_rf_best))\n", - "print(\"Test Accuracy:\", accuracy_score(y_test, y_pred_rf_best))\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred_rf_best))\n", - "print(\"Classification Report:\\n\", classification_report(y_test, y_pred_rf_best))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6e38323c", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluate the fine-tuned model\n", - "fine_tuned_accuracy = best_rf_pipeline.score(X_test, y_test)\n", - "print(\"Fine-tuned Test Accuracy:\", fine_tuned_accuracy)\n", - "\n", - "# Print fine-tuned classification report\n", - "fine_tuned_y_pred = best_rf_pipeline.predict(X_test)\n", - "print(\"Fine-tuned Classification Report:\\n\", classification_report(y_test, fine_tuned_y_pred))\n", - "\n", - "# Print confusion matrix\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, fine_tuned_y_pred))" - ] - }, - { - "cell_type": "markdown", - "id": "06a6898b", - "metadata": {}, - "source": [ - "# XG Boost" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07ebe4c6", - "metadata": {}, - "outputs": [], - "source": [ - "# Installing and importing xgboost\n", - "!pip install xgboost\n", - "from xgboost import XGBClassifier\n", - "from sklearn.model_selection import train_test_split, GridSearchCV" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bdefb327", - "metadata": {}, - "outputs": [], - "source": [ - "# Creating preprocessing pipelines for both numeric and categorical data\n", - "numerical_transformer = StandardScaler()\n", - "categorical_transformer = OneHotEncoder(handle_unknown='ignore')\n", - "\n", - "# Bundling preprocessing for numerical and categorical features\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', numerical_transformer, numeric_features),\n", - " ('cat', categorical_transformer, categorical_features)\n", - " ])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "436d3875", - "metadata": {}, - "outputs": [], - "source": [ - "# Defining the model\n", - "xgb_model = XGBClassifier(random_state=42, n_jobs=-1)\n", - "\n", - "# Creating and evaluating the pipeline\n", - "xgb_pipeline = Pipeline(steps=[('preprocessor', preprocessor),\n", - " ('classifier', xgb_model)])\n", - "# Fitting the pipeline on the training data\n", - "xgb_pipeline.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03310498", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluating the pipeline on the test data\n", - "xgb_acc = xgb_pipeline.score(X_test, y_test)\n", - "print(\"XGBoost Accuracy:\", xgb_acc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f194e04-06ff-4fbd-b32c-2b6e9ee7b4f8", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluating the pipeline on the train data\n", - "y_train_pred = xgb_pipeline.predict(X_train)\n", - "train_accuracy = accuracy_score(y_train, y_train_pred)\n", - "print(f\"Training Accuracy: {train_accuracy}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "010f0c18-8e3b-432f-a4ff-03e4bf4334bf", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluating the pipeline on the test data\n", - "y_test_pred = xgb_pipeline.predict(X_test)\n", - "test_accuracy = accuracy_score(y_test, y_test_pred)\n", - "print(f\"Test Accuracy: {test_accuracy}\")\n", - "\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_test_pred))\n", - "print(\"Classification Report:\\n\", classification_report(y_test, y_test_pred))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4620013c", - "metadata": {}, - "outputs": [], - "source": [ - "# Define the parameter grid for hyperparameter tuning\n", - "param_grid = {\n", - " 'classifier__n_estimators': [100, 200, 300],\n", - " 'classifier__max_depth': [3, 5, 7],\n", - " 'classifier__learning_rate': [0.01, 0.1, 0.2],\n", - " 'classifier__subsample': [0.6, 0.8, 1.0],\n", - " 'classifier__colsample_bytree': [0.6, 0.8, 1.0]\n", - "}\n", - "\n", - "# Set up GridSearchCV for hyperparameter tuning\n", - "grid_search = GridSearchCV(estimator=xgb_pipeline, param_grid=param_grid, cv=3, n_jobs=-1, scoring='accuracy')\n", - "\n", - "# Train the model with GridSearchCV\n", - "grid_search.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ca8252d", - "metadata": {}, - "outputs": [], - "source": [ - "# Get the best parameters and best estimator\n", - "best_params = grid_search.best_params_\n", - "print(\"Best parameters found: \", best_params)\n", - "best_xgb_pipeline = grid_search.best_estimator_\n", - "\n", - "# Evaluate the fine-tuned model\n", - "fine_tuned_accuracy = best_xgb_pipeline.score(X_test, y_test)\n", - "print(\"Fine-tuned XGBoost Accuracy:\", fine_tuned_accuracy)\n", - "\n", - "# Evaluation on train and test data for the fine-tuned model\n", - "fine_tuned_train_pred = best_xgb_pipeline.predict(X_train)\n", - "fine_tuned_train_accuracy = accuracy_score(y_train, fine_tuned_train_pred)\n", - "print(f\"Fine-tuned Training Accuracy: {fine_tuned_train_accuracy}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a57d08f", - "metadata": {}, - "outputs": [], - "source": [ - "fine_tuned_test_pred = best_xgb_pipeline.predict(X_test)\n", - "fine_tuned_test_accuracy = accuracy_score(y_test, fine_tuned_test_pred)\n", - "print(f\"Fine-tuned Test Accuracy: {fine_tuned_test_accuracy}\")\n", - "\n", - "print(\"Fine-tuned Confusion Matrix:\\n\", confusion_matrix(y_test, fine_tuned_test_pred))\n", - "print(\"Fine-tuned Classification Report:\\n\", classification_report(y_test, fine_tuned_test_pred))" - ] - }, - { - "cell_type": "markdown", - "id": "aa6d141a", - "metadata": {}, - "source": [ - "# DNN Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d50b0bfc", - "metadata": {}, - "outputs": [], - "source": [ - "# Fitting the preprocessor and transforming the training data\n", - "X_train = preprocessor.fit_transform(X_train)\n", - "X_test = preprocessor.transform(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a4d558a", - "metadata": {}, - "outputs": [], - "source": [ - "#Importing useful libraries\n", - "import tensorflow as tf\n", - "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense, Dropout\n", - "from tensorflow.keras.callbacks import EarlyStopping\n", - "from tensorflow.keras.optimizers import Adam\n", - "from tensorflow.keras.wrappers.scikit_learn import KerasClassifier\n", - "early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n", - "# Defining the model\n", - "dnn_model = Sequential([\n", - " Dense(128, activation='relu', input_shape=(X_train.shape[1],)),\n", - " Dropout(0.2),\n", - " Dense(64, activation='relu'),\n", - " Dropout(0.2),\n", - " Dense(32, activation='relu'),\n", - " Dense(1, activat ion='sigmoid') # Assuming binary classification\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1132ab3", - "metadata": {}, - "outputs": [], - "source": [ - "# Compiling the model\n", - "dnn_model.compile(optimizer='adam',\n", - " loss='binary_crossentropy', # Use 'categorical_crossentropy' for multi-class classification\n", - " metrics=['accuracy'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3c2b5c4", - "metadata": {}, - "outputs": [], - "source": [ - "# Training the model\n", - "Train = dnn_model.fit(X_train, y_train, \n", - " epochs=10, \n", - " batch_size=32, \n", - " validation_data=(X_test, y_test),\n", - " callbacks=[early_stopping])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd6d250e", - "metadata": {}, - "outputs": [], - "source": [ - "# Evaluating the model\n", - "dnn_loss, dnn_accuracy = dnn_model.evaluate(X_test, y_test)\n", - "print(f\"DNN Accuracy: {dnn_accuracy}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3a111fe7", - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap the model using the KerasClassifier\n", - "dnn_model = KerasClassifier(build_fn=create_dnn_model, verbose=0)\n", - "\n", - "# Define the parameter grid for hyperparameter tuning\n", - "param_dist = {\n", - " 'learning_rate': [0.001, 0.01, 0.1],\n", - " 'dropout_rate': [0.2, 0.3, 0.4],\n", - " 'units': [64, 128, 256],\n", - " 'batch_size': [16, 32, 64],\n", - " 'epochs': [10, 50, 100]\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb7b1b80", - "metadata": {}, - "outputs": [], - "source": [ - "# Set up RandomizedSearchCV for hyperparameter tuning\n", - "random_search = RandomizedSearchCV(estimator=dnn_model, param_distributions=param_dist, \n", - " n_iter=10, cv=3, verbose=1, n_jobs=-1)\n", - "\n", - "# Train the model with RandomizedSearchCV\n", - "random_search_result = random_search.fit(X_train, y_train, \n", - " validation_data=(X_test, y_test), \n", - " callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0dbdcf23", - "metadata": {}, - "outputs": [], - "source": [ - "# Get the best parameters and best estimator\n", - "best_params = random_search_result.best_params_\n", - "print(\"Best parameters found: \", best_params)\n", - "best_dnn_model = random_search_result.best_estimator_\n", - "\n", - "# Evaluate the fine-tuned model\n", - "fine_tuned_loss, fine_tuned_accuracy = best_dnn_model.model.evaluate(X_test, y_test)\n", - "print(f\"Fine-tuned DNN Accuracy: {fine_tuned_accuracy}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "970a8049", - "metadata": {}, - "outputs": [], - "source": [ - "# Predictions for evaluation\n", - "y_test_pred = (best_dnn_model.model.predict(X_test) > 0.5).astype(\"int32\")\n", - "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_test_pred))\n", - "print(\"Classification Report:\\n\", classification_report(y_test, y_test_pred))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}