|
|
|
|
|
import streamlit as st |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
import pickle |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Insurance Cost Predictor") |
|
|
|
|
|
|
|
|
image = plt.imread("insurance.jpg") |
|
|
|
|
|
st.image(image, caption = "Health Matters the most") |
|
|
|
|
|
|
|
|
data = pd.read_csv("Insurance.csv") |
|
|
|
|
|
st.header("The Dataset") |
|
|
|
|
|
st.dataframe(data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def gender_distribution(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.countplot(x = data['sex']) |
|
|
plt.xlabel("Sex") |
|
|
plt.ylabel("Frequency") |
|
|
plt.title("Gender Distribution of the Applicants") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
def region(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.countplot(x = data['region'], palette="Set3") |
|
|
plt.xlabel("Region") |
|
|
plt.ylabel("Frequency") |
|
|
plt.title("Distribution of Regions") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
def scatterplot(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker']) |
|
|
plt.xlabel("BMI value") |
|
|
plt.ylabel("Insurance Cost") |
|
|
plt.title("Scatter plot between BMI values and Insurance Cost") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
def age_distribution(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2)) |
|
|
plt.xlabel("Age") |
|
|
plt.ylabel("Density") |
|
|
plt.title("Distribution of Age") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
def visulaizations(): |
|
|
|
|
|
st.header("Data Visualization") |
|
|
|
|
|
title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"]) |
|
|
|
|
|
if title == "Gender Distribution": |
|
|
gender_distribution() |
|
|
|
|
|
elif title == "Regions and Frequencies": |
|
|
region() |
|
|
|
|
|
elif title == "BMI vs Charges": |
|
|
scatterplot() |
|
|
|
|
|
elif title == "Age": |
|
|
age_distribution() |
|
|
|
|
|
|
|
|
visulaizations() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def children(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.countplot(x = data['children']) |
|
|
plt.xlabel("Number of childrens") |
|
|
plt.ylabel("Frequency") |
|
|
plt.title("Frequency Distribution according to the number of children") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
def scatterplot1(): |
|
|
sns.set_theme() |
|
|
fig = plt.figure() |
|
|
sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker']) |
|
|
plt.xlabel("Age") |
|
|
plt.ylabel("Insurance Cost") |
|
|
plt.title("Scatter plot between Age and Insurance Cost") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
st.subheader("More Visualizations") |
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
children() |
|
|
with col2: |
|
|
scatterplot1() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def bmi(height,weight): |
|
|
h=height/100 |
|
|
bmi_value = weight/(h**2) |
|
|
return bmi_value |
|
|
|
|
|
|
|
|
def user_inputs(): |
|
|
|
|
|
|
|
|
st.header("User Dashboard") |
|
|
|
|
|
|
|
|
age = st.slider('Your Age',1,100,30) |
|
|
sex = st.selectbox('Select Your Gender',['male','female']) |
|
|
height = st.slider('Your Height in (cm)',50,250,150) |
|
|
weight = st.slider('Your Weight in (Kg)',10,150,50) |
|
|
children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5]) |
|
|
smoker = st.radio('Smoker', ['yes','no']) |
|
|
region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest']) |
|
|
|
|
|
|
|
|
bmi1 = bmi(height, weight) |
|
|
input_data={ |
|
|
'age':age, |
|
|
'sex':sex, |
|
|
'bmi':bmi1, |
|
|
'children':children, |
|
|
'smoker':smoker, |
|
|
'region':region |
|
|
} |
|
|
|
|
|
|
|
|
df = data.copy() |
|
|
|
|
|
df.drop('charges', axis=1, inplace=True) |
|
|
|
|
|
user_data = pd.DataFrame(input_data, index=[0]) |
|
|
|
|
|
user_df = pd.concat([df,user_data],ignore_index=True, axis=0) |
|
|
|
|
|
user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True) |
|
|
|
|
|
user_input_data = user_dum_data.iloc[-1,:] |
|
|
|
|
|
final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns) |
|
|
|
|
|
st.subheader("User Data") |
|
|
st.dataframe(final_user_data) |
|
|
|
|
|
return final_user_data |
|
|
|
|
|
user_results = user_inputs() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = pickle.load(open("insurance_predict.pkl",'rb')) |
|
|
|
|
|
results = list(model.predict(user_results)) |
|
|
|
|
|
|
|
|
st.subheader("Predictions and Accuracies") |
|
|
col3,col4 = st.columns(2) |
|
|
with col3: |
|
|
st.metric("Insurance cost", str(results[0])) |
|
|
with col4: |
|
|
st.metric("RMSE of the model", 4674.719889567355) |