Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import streamlit as st | |
| import pickle | |
| from sklearn.preprocessing import StandardScaler | |
| from xgboost import XGBClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| st.title("Health Insurance Cross Sell Prediction") | |
| st.write("""This application uses XGBoost Classifier to perform cross cell predictin. Now, question arises what is Cross Sell Prediction? | |
| So, Cross-selling involves selling complementary products to existing customers. It is one of the highly effective techniques in the marketing industry. | |
| The project uses the dataset of customers of Health Insurance company and the problem the statement is as follows: | |
| to build a model to predict whether a customer would be interested in Vehicle Insurance is extremely helpful for the company because | |
| it can then accordingly plan its communication strategy to reach out to those customers and optimize its business model and revenue.""") | |
| st.sidebar.header('Customer Data') | |
| #df = pd.read_csv('health_insurance.csv') | |
| # DATA from user | |
| def user_report(): | |
| gender = st.sidebar.selectbox("Gender", | |
| ("Male", "Female" )) | |
| if gender=='Female': | |
| gender=0 | |
| else: | |
| gender=1 | |
| age = st.sidebar.slider('Age of Customer', 20,85, 28 ) | |
| license = st.sidebar.selectbox('has Driving_License?', ("YES","NO") ) | |
| if license=='NO': | |
| license=0 | |
| else: | |
| license=1 | |
| regioncode = st.sidebar.number_input('Enter the Region Code (any number between 0 to 52 )',min_value=0,max_value=52,step=1) | |
| is_previously_insured = st.sidebar.selectbox('is_previously_insured', ("YES","NO") ) | |
| if is_previously_insured=='YES': | |
| is_previously_insured=1 | |
| else: | |
| is_previously_insured=0 | |
| vechile_age = st.sidebar.selectbox('Vechile Age',('<1 year','1-2 year','>2 years')) | |
| if vechile_age=='1-2 year': | |
| vechile_age=0 | |
| elif vechile_age=='<1 year': | |
| vechile_age=1 | |
| else: | |
| vechile_age=2 | |
| is_your_vechile_damaged = st.sidebar.selectbox('Is your Vechile Damaged',("YES","NO")) | |
| if is_your_vechile_damaged =='NO': | |
| is_your_vechile_damaged=0 | |
| else: | |
| is_your_vechile_damaged=1 | |
| annual_premium = st.sidebar.slider('Enter Annual premium you pay', 2000,60000, 5000 ) | |
| policy_sales_channel= st.sidebar.number_input("Policy Sales Channel(Enter any number between 1 to 160)",step =1,min_value=1,max_value=160) | |
| number_of_days_company = st.sidebar.number_input("Enter the number of days Associaed with company(Vintage)",step=1) | |
| user_report_data = { | |
| 'Gender':gender, | |
| 'Age':age, | |
| 'Driving_License':license, | |
| 'Region_Code':regioncode, | |
| 'Previously_Insured': is_previously_insured, | |
| 'Vehicle_Age':vechile_age, | |
| 'Vehicle_Damage':is_your_vechile_damaged, | |
| 'Annual_Premium': annual_premium, | |
| 'Policy_Sales_Channel':policy_sales_channel, | |
| 'Vintage':number_of_days_company, | |
| } | |
| report_data = pd.DataFrame(user_report_data, index=[0]) | |
| return report_data | |
| #Customer Data | |
| user_data = user_report() | |
| st.header("Customer Data") | |
| st.write(user_data) | |
| def prediction(report_data): | |
| # Importing data from csv | |
| df = pd.read_csv('health_insurance.csv') | |
| # Label Encoder | |
| le_gender = LabelEncoder() | |
| df['Gender'] = le_gender.fit_transform(df['Gender']) | |
| le_vAge = LabelEncoder() | |
| df['Vehicle_Age'] = le_vAge.fit_transform(df['Vehicle_Age']) | |
| le_vDamage = LabelEncoder() | |
| df['Vehicle_Damage'] = le_vDamage.fit_transform(df['Vehicle_Damage']) | |
| x = df.drop(columns=['id','Response'], axis = 1) | |
| y = df['Response'] | |
| #balancing the data for Target column | |
| from imblearn.over_sampling import SMOTE | |
| smt = SMOTE(k_neighbors=8, random_state=10) | |
| x_new, y_new = smt.fit_resample(x, y) | |
| #Splitting the data into train and test datasets | |
| xtrain, xtest, ytrain, ytest = train_test_split(x_new, y_new, test_size =.30, random_state = 0) | |
| #Xg boost model building | |
| model_xgb = XGBClassifier() | |
| model_xgb.fit(xtrain, ytrain) | |
| # using Standard Scaler | |
| scaler = StandardScaler() | |
| xtrain = scaler.fit_transform(xtrain) | |
| #scaling the user data | |
| report_data=scaler.transform(report_data) | |
| response = model_xgb.predict(report_data) | |
| if response==1: | |
| return 'Status of Customer. This customer willing to buy a vehicle insurance' | |
| else: | |
| return 'Status of Customer. This customer will not buy a vehicle insurance' | |
| y_pred = prediction(user_data) | |
| if st.button("Predict"): | |
| st.subheader(y_pred) | |
| st.write("""Features Used: | |
| The following are the input Varibles of a customer which Company needs to be enter, and then the application will predict whether that particular | |
| person/customer will be willing to buy Vehicle Insurance or not | |
| 1) Gender : Gender of the customer | |
| 2) Age : Age of the customer | |
| 3) Driving_License : 0 - Customer does not have DL,1 - Customer already has DL | |
| 4) Region_Code : Unique code for the region of the customer | |
| 5) Previously_Insured : 1 - Customer already has Vehicle Insurance, 0-Customer doesn't have Vehicle Insurance | |
| 6) Vehicle_Age : Age of the Vehicle | |
| 7) Vehicle_Damage : 1 - Customer got his/her vehicle damaged in the past. 0 -Customer didn't get his/her vehicle damaged in the past. | |
| 8) Annual_Premium : The amount customer needs to pay as premium in the year | |
| 9) PolicySalesChannel : Anonymized Code for the channel of outreaching to the customer ie. Different Agents, Over Mail, Over Phone, In Person, etc. | |
| 10) Vintage : Number of Days, Customer has been associated with the company | |
| Target Column/Prediction | |
| Response : 1 - Customer is interested, 0 - Customer is not interested""") | |