File size: 5,030 Bytes
db7f6aa d92a8b5 db7f6aa d92a8b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# import libraries
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle
# Title
st.title("Insurance Cost Predictor")
# Reading the image
image = plt.imread("insurance.jpg")
#Setting the image
st.image(image, caption = "Health Matters the most")
# reading the data set
data = pd.read_csv("Insurance.csv")
# header as the data set
st.header("The Dataset")
# setting the dataframe
st.dataframe(data)
#creating plots
#gender distributions
def gender_distribution():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['sex'])
plt.xlabel("Sex")
plt.ylabel("Frequency")
plt.title("Gender Distribution of the Applicants")
st.pyplot(fig)
#population
def region():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['region'], palette="Set3")
plt.xlabel("Region")
plt.ylabel("Frequency")
plt.title("Distribution of Regions")
st.pyplot(fig)
# scatterplot
def scatterplot():
sns.set_theme()
fig = plt.figure()
sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker'])
plt.xlabel("BMI value")
plt.ylabel("Insurance Cost")
plt.title("Scatter plot between BMI values and Insurance Cost")
st.pyplot(fig)
#distribution plot
def age_distribution():
sns.set_theme()
fig = plt.figure()
sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2))
plt.xlabel("Age")
plt.ylabel("Density")
plt.title("Distribution of Age")
st.pyplot(fig)
#Function to visualize graphs
def visulaizations():
#setting the subheader
st.header("Data Visualization")
#radio button
title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"])
if title == "Gender Distribution":
gender_distribution()
elif title == "Regions and Frequencies":
region()
elif title == "BMI vs Charges":
scatterplot()
elif title == "Age":
age_distribution()
visulaizations()
#more visualizations
#barplot
def children():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['children'])
plt.xlabel("Number of childrens")
plt.ylabel("Frequency")
plt.title("Frequency Distribution according to the number of children")
st.pyplot(fig)
#scatterplot
def scatterplot1():
sns.set_theme()
fig = plt.figure()
sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker'])
plt.xlabel("Age")
plt.ylabel("Insurance Cost")
plt.title("Scatter plot between Age and Insurance Cost")
st.pyplot(fig)
#creating columns
st.subheader("More Visualizations")
col1, col2 = st.columns(2)
with col1:
children()
with col2:
scatterplot1()
#user input
# function to find BMI value
def bmi(height,weight):
h=height/100
bmi_value = weight/(h**2)
return bmi_value
#Getting user input
def user_inputs():
#header
st.header("User Dashboard")
#Dashboard elements
age = st.slider('Your Age',1,100,30)
sex = st.selectbox('Select Your Gender',['male','female'])
height = st.slider('Your Height in (cm)',50,250,150)
weight = st.slider('Your Weight in (Kg)',10,150,50)
children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5])
smoker = st.radio('Smoker', ['yes','no'])
region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest'])
#reading user data as a dictionary
bmi1 = bmi(height, weight)
input_data={
'age':age,
'sex':sex,
'bmi':bmi1,
'children':children,
'smoker':smoker,
'region':region
}
#getting the copy of original data
df = data.copy()
#drop the predicting colmn
df.drop('charges', axis=1, inplace=True)
#input data as dataframe
user_data = pd.DataFrame(input_data, index=[0])
#concatenate original dataframe and user input dataframe
user_df = pd.concat([df,user_data],ignore_index=True, axis=0)
#getting dummies(categorical varaibles encoding)
user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True)
#selecting the last row which is the user inputs
user_input_data = user_dum_data.iloc[-1,:]
#reading the user input data as a dataframe
final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns)
#user data
st.subheader("User Data")
st.dataframe(final_user_data)
#returning data
return final_user_data
user_results = user_inputs()
#Cost predictions
#loading the model
model = pickle.load(open("insurance_predict.pkl",'rb'))
#predicting the results
results = list(model.predict(user_results))
#setting the accuarcies, predictions
st.subheader("Predictions and Accuracies")
col3,col4 = st.columns(2)
with col3:
st.metric("Insurance cost", str(results[0]))
with col4:
st.metric("RMSE of the model", 4674.719889567355) |