File size: 5,866 Bytes
57224ce
 
 
 
 
 
4d9cc11
03be78e
 
 
57224ce
03be78e
 
57224ce
fd19bf9
 
 
 
 
 
 
57224ce
 
be9e24e
57224ce
 
03be78e
57224ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be9e24e
57224ce
be9e24e
57224ce
 
2d35bf6
 
 
 
 
 
 
 
 
928de13
57224ce
 
 
 
 
 
4d9cc11
 
 
946ed04
dc7688e
4d9cc11
03be78e
dc7688e
 
 
03be78e
dc7688e
 
 
 
 
 
 
 
 
 
d76235b
dc7688e
 
 
 
 
 
4d9cc11
dc7688e
 
4d9cc11
03be78e
dc7688e
 
03be78e
 
 
1dd281f
03be78e
42275c5
03be78e
61aee05
 
0b3ee8b
ca9331e
2bb7ff1
ca9331e
0b3ee8b
eb5d1ac
dc8b9d2
 
 
eb5d1ac
dc7688e
1e9b2f6
836639e
1e9b2f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c6cc90
946ed04
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st 
import pickle 
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder



st.title("Health Insurance Cross Sell Prediction")
st.write("""This application uses XGBoost Classifier to perform cross cell predictin. Now, question arises what is Cross Sell Prediction? 
So, Cross-selling involves selling complementary products to existing customers. It is one of the highly effective techniques in the marketing industry.

The project uses the dataset of customers of Health Insurance company and the problem the statement is as follows:

to build a model to predict whether a customer would be interested in Vehicle Insurance is extremely helpful for the company because 
it can then accordingly plan its communication strategy to reach out to those customers and optimize its business model and revenue.""")
st.sidebar.header('Customer Data')

#df = pd.read_csv('health_insurance.csv')


# DATA from user
def user_report():
  gender = st.sidebar.selectbox("Gender",
        ("Male", "Female" ))
  if gender=='Female':
      gender=0
  else:
      gender=1
  age = st.sidebar.slider('Age of Customer', 20,85, 28 )
  license = st.sidebar.selectbox('has Driving_License?', ("YES","NO") )
  if license=='NO':
      license=0
  else:
      license=1
  regioncode = st.sidebar.number_input('Enter the Region Code (any number between 0 to 52 )',min_value=0,max_value=52,step=1)
  is_previously_insured = st.sidebar.selectbox('is_previously_insured', ("YES","NO") )
  if is_previously_insured=='YES':
      is_previously_insured=1
  else:
      is_previously_insured=0
  vechile_age = st.sidebar.selectbox('Vechile Age',('<1 year','1-2 year','>2 years'))
  if vechile_age=='1-2 year':
      vechile_age=0
  elif vechile_age=='<1 year':
      vechile_age=1
  else:
      vechile_age=2
  is_your_vechile_damaged = st.sidebar.selectbox('Is your Vechile Damaged',("YES","NO"))
  if is_your_vechile_damaged =='NO':
      is_your_vechile_damaged=0
  else:
      is_your_vechile_damaged=1
  annual_premium = st.sidebar.slider('Enter Annual premium you pay', 2000,60000, 5000 )
  policy_sales_channel= st.sidebar.number_input("Policy Sales Channel(Enter any number between 1 to 160)",step =1,min_value=1,max_value=160)
  number_of_days_company = st.sidebar.number_input("Enter the number of days Associaed with company(Vintage)",step=1)

  user_report_data = {
      'Gender':gender,
      'Age':age,
      'Driving_License':license,
      'Region_Code':regioncode,
      'Previously_Insured': is_previously_insured,
      'Vehicle_Age':vechile_age,
      'Vehicle_Damage':is_your_vechile_damaged,
      'Annual_Premium': annual_premium,
      'Policy_Sales_Channel':policy_sales_channel,
      'Vintage':number_of_days_company,
  }
  report_data = pd.DataFrame(user_report_data, index=[0])
  return report_data


#Customer Data
user_data = user_report()
st.header("Customer Data")
st.write(user_data)


def prediction(report_data):
        # Importing data from csv

        df = pd.read_csv('health_insurance.csv')

        # Label Encoder

        le_gender  = LabelEncoder() 
        df['Gender'] = le_gender.fit_transform(df['Gender'])

        le_vAge =  LabelEncoder()
        df['Vehicle_Age'] = le_vAge.fit_transform(df['Vehicle_Age'])

        le_vDamage = LabelEncoder()
        df['Vehicle_Damage'] = le_vDamage.fit_transform(df['Vehicle_Damage'])

        x = df.drop(columns=['id','Response'], axis = 1)
        y = df['Response']

        #balancing the data for Target column
        from imblearn.over_sampling import SMOTE
        smt = SMOTE(k_neighbors=8, random_state=10)
        x_new, y_new = smt.fit_resample(x, y)
        
        #Splitting the data into train and test datasets
        xtrain, xtest, ytrain, ytest = train_test_split(x_new, y_new, test_size =.30, random_state = 0)
        
       #Xg boost model building 

        model_xgb = XGBClassifier()
        model_xgb.fit(xtrain, ytrain)       

        # using Standard Scaler
        scaler = StandardScaler()
        
        xtrain = scaler.fit_transform(xtrain)  
        #scaling the user data
        report_data=scaler.transform(report_data)
        response = model_xgb.predict(report_data)
        if response==1:
                return 'Status of Customer. This customer willing to buy a vehicle insurance'
        else:
                return 'Status of Customer. This customer will not buy a vehicle insurance'
        
y_pred = prediction(user_data)

if st.button("Predict"):
    st.subheader(y_pred)


st.write("""Features Used:

The following are the input Varibles of a customer which Company needs to be enter, and then the application will predict whether that particular
person/customer will be willing to buy Vehicle Insurance or not

1) Gender : Gender of the customer

2) Age : Age of the customer

3) Driving_License : 0 - Customer does not have DL,1 - Customer already has DL

4) Region_Code : Unique code for the region of the customer

5) Previously_Insured : 1 - Customer already has Vehicle Insurance, 0-Customer doesn't have Vehicle Insurance

6) Vehicle_Age : Age of the Vehicle 

7) Vehicle_Damage : 1 - Customer got his/her vehicle damaged in the past. 0 -Customer didn't get his/her vehicle damaged in the past.

8) Annual_Premium : The amount customer needs to pay as premium in the year

9) PolicySalesChannel : Anonymized Code for the channel of outreaching to the customer ie. Different Agents, Over Mail, Over Phone, In Person, etc.

10) Vintage : Number of Days, Customer has been associated with the company

Target Column/Prediction

Response : 1 - Customer is interested, 0 - Customer is not interested""")