Spaces:
Runtime error
Runtime error
File size: 5,866 Bytes
57224ce 4d9cc11 03be78e 57224ce 03be78e 57224ce fd19bf9 57224ce be9e24e 57224ce 03be78e 57224ce be9e24e 57224ce be9e24e 57224ce 2d35bf6 928de13 57224ce 4d9cc11 946ed04 dc7688e 4d9cc11 03be78e dc7688e 03be78e dc7688e d76235b dc7688e 4d9cc11 dc7688e 4d9cc11 03be78e dc7688e 03be78e 1dd281f 03be78e 42275c5 03be78e 61aee05 0b3ee8b ca9331e 2bb7ff1 ca9331e 0b3ee8b eb5d1ac dc8b9d2 eb5d1ac dc7688e 1e9b2f6 836639e 1e9b2f6 1c6cc90 946ed04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import pickle
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
st.title("Health Insurance Cross Sell Prediction")
st.write("""This application uses XGBoost Classifier to perform cross cell predictin. Now, question arises what is Cross Sell Prediction?
So, Cross-selling involves selling complementary products to existing customers. It is one of the highly effective techniques in the marketing industry.
The project uses the dataset of customers of Health Insurance company and the problem the statement is as follows:
to build a model to predict whether a customer would be interested in Vehicle Insurance is extremely helpful for the company because
it can then accordingly plan its communication strategy to reach out to those customers and optimize its business model and revenue.""")
st.sidebar.header('Customer Data')
#df = pd.read_csv('health_insurance.csv')
# DATA from user
def user_report():
gender = st.sidebar.selectbox("Gender",
("Male", "Female" ))
if gender=='Female':
gender=0
else:
gender=1
age = st.sidebar.slider('Age of Customer', 20,85, 28 )
license = st.sidebar.selectbox('has Driving_License?', ("YES","NO") )
if license=='NO':
license=0
else:
license=1
regioncode = st.sidebar.number_input('Enter the Region Code (any number between 0 to 52 )',min_value=0,max_value=52,step=1)
is_previously_insured = st.sidebar.selectbox('is_previously_insured', ("YES","NO") )
if is_previously_insured=='YES':
is_previously_insured=1
else:
is_previously_insured=0
vechile_age = st.sidebar.selectbox('Vechile Age',('<1 year','1-2 year','>2 years'))
if vechile_age=='1-2 year':
vechile_age=0
elif vechile_age=='<1 year':
vechile_age=1
else:
vechile_age=2
is_your_vechile_damaged = st.sidebar.selectbox('Is your Vechile Damaged',("YES","NO"))
if is_your_vechile_damaged =='NO':
is_your_vechile_damaged=0
else:
is_your_vechile_damaged=1
annual_premium = st.sidebar.slider('Enter Annual premium you pay', 2000,60000, 5000 )
policy_sales_channel= st.sidebar.number_input("Policy Sales Channel(Enter any number between 1 to 160)",step =1,min_value=1,max_value=160)
number_of_days_company = st.sidebar.number_input("Enter the number of days Associaed with company(Vintage)",step=1)
user_report_data = {
'Gender':gender,
'Age':age,
'Driving_License':license,
'Region_Code':regioncode,
'Previously_Insured': is_previously_insured,
'Vehicle_Age':vechile_age,
'Vehicle_Damage':is_your_vechile_damaged,
'Annual_Premium': annual_premium,
'Policy_Sales_Channel':policy_sales_channel,
'Vintage':number_of_days_company,
}
report_data = pd.DataFrame(user_report_data, index=[0])
return report_data
#Customer Data
user_data = user_report()
st.header("Customer Data")
st.write(user_data)
def prediction(report_data):
# Importing data from csv
df = pd.read_csv('health_insurance.csv')
# Label Encoder
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])
le_vAge = LabelEncoder()
df['Vehicle_Age'] = le_vAge.fit_transform(df['Vehicle_Age'])
le_vDamage = LabelEncoder()
df['Vehicle_Damage'] = le_vDamage.fit_transform(df['Vehicle_Damage'])
x = df.drop(columns=['id','Response'], axis = 1)
y = df['Response']
#balancing the data for Target column
from imblearn.over_sampling import SMOTE
smt = SMOTE(k_neighbors=8, random_state=10)
x_new, y_new = smt.fit_resample(x, y)
#Splitting the data into train and test datasets
xtrain, xtest, ytrain, ytest = train_test_split(x_new, y_new, test_size =.30, random_state = 0)
#Xg boost model building
model_xgb = XGBClassifier()
model_xgb.fit(xtrain, ytrain)
# using Standard Scaler
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
#scaling the user data
report_data=scaler.transform(report_data)
response = model_xgb.predict(report_data)
if response==1:
return 'Status of Customer. This customer willing to buy a vehicle insurance'
else:
return 'Status of Customer. This customer will not buy a vehicle insurance'
y_pred = prediction(user_data)
if st.button("Predict"):
st.subheader(y_pred)
st.write("""Features Used:
The following are the input Varibles of a customer which Company needs to be enter, and then the application will predict whether that particular
person/customer will be willing to buy Vehicle Insurance or not
1) Gender : Gender of the customer
2) Age : Age of the customer
3) Driving_License : 0 - Customer does not have DL,1 - Customer already has DL
4) Region_Code : Unique code for the region of the customer
5) Previously_Insured : 1 - Customer already has Vehicle Insurance, 0-Customer doesn't have Vehicle Insurance
6) Vehicle_Age : Age of the Vehicle
7) Vehicle_Damage : 1 - Customer got his/her vehicle damaged in the past. 0 -Customer didn't get his/her vehicle damaged in the past.
8) Annual_Premium : The amount customer needs to pay as premium in the year
9) PolicySalesChannel : Anonymized Code for the channel of outreaching to the customer ie. Different Agents, Over Mail, Over Phone, In Person, etc.
10) Vintage : Number of Days, Customer has been associated with the company
Target Column/Prediction
Response : 1 - Customer is interested, 0 - Customer is not interested""")
|