firstapp / app.py
Lachin's picture
predictions added
d92a8b5
# import libraries
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle
# Title
st.title("Insurance Cost Predictor")
# Reading the image
image = plt.imread("insurance.jpg")
#Setting the image
st.image(image, caption = "Health Matters the most")
# reading the data set
data = pd.read_csv("Insurance.csv")
# header as the data set
st.header("The Dataset")
# setting the dataframe
st.dataframe(data)
#creating plots
#gender distributions
def gender_distribution():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['sex'])
plt.xlabel("Sex")
plt.ylabel("Frequency")
plt.title("Gender Distribution of the Applicants")
st.pyplot(fig)
#population
def region():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['region'], palette="Set3")
plt.xlabel("Region")
plt.ylabel("Frequency")
plt.title("Distribution of Regions")
st.pyplot(fig)
# scatterplot
def scatterplot():
sns.set_theme()
fig = plt.figure()
sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker'])
plt.xlabel("BMI value")
plt.ylabel("Insurance Cost")
plt.title("Scatter plot between BMI values and Insurance Cost")
st.pyplot(fig)
#distribution plot
def age_distribution():
sns.set_theme()
fig = plt.figure()
sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2))
plt.xlabel("Age")
plt.ylabel("Density")
plt.title("Distribution of Age")
st.pyplot(fig)
#Function to visualize graphs
def visulaizations():
#setting the subheader
st.header("Data Visualization")
#radio button
title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"])
if title == "Gender Distribution":
gender_distribution()
elif title == "Regions and Frequencies":
region()
elif title == "BMI vs Charges":
scatterplot()
elif title == "Age":
age_distribution()
visulaizations()
#more visualizations
#barplot
def children():
sns.set_theme()
fig = plt.figure()
sns.countplot(x = data['children'])
plt.xlabel("Number of childrens")
plt.ylabel("Frequency")
plt.title("Frequency Distribution according to the number of children")
st.pyplot(fig)
#scatterplot
def scatterplot1():
sns.set_theme()
fig = plt.figure()
sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker'])
plt.xlabel("Age")
plt.ylabel("Insurance Cost")
plt.title("Scatter plot between Age and Insurance Cost")
st.pyplot(fig)
#creating columns
st.subheader("More Visualizations")
col1, col2 = st.columns(2)
with col1:
children()
with col2:
scatterplot1()
#user input
# function to find BMI value
def bmi(height,weight):
h=height/100
bmi_value = weight/(h**2)
return bmi_value
#Getting user input
def user_inputs():
#header
st.header("User Dashboard")
#Dashboard elements
age = st.slider('Your Age',1,100,30)
sex = st.selectbox('Select Your Gender',['male','female'])
height = st.slider('Your Height in (cm)',50,250,150)
weight = st.slider('Your Weight in (Kg)',10,150,50)
children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5])
smoker = st.radio('Smoker', ['yes','no'])
region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest'])
#reading user data as a dictionary
bmi1 = bmi(height, weight)
input_data={
'age':age,
'sex':sex,
'bmi':bmi1,
'children':children,
'smoker':smoker,
'region':region
}
#getting the copy of original data
df = data.copy()
#drop the predicting colmn
df.drop('charges', axis=1, inplace=True)
#input data as dataframe
user_data = pd.DataFrame(input_data, index=[0])
#concatenate original dataframe and user input dataframe
user_df = pd.concat([df,user_data],ignore_index=True, axis=0)
#getting dummies(categorical varaibles encoding)
user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True)
#selecting the last row which is the user inputs
user_input_data = user_dum_data.iloc[-1,:]
#reading the user input data as a dataframe
final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns)
#user data
st.subheader("User Data")
st.dataframe(final_user_data)
#returning data
return final_user_data
user_results = user_inputs()
#Cost predictions
#loading the model
model = pickle.load(open("insurance_predict.pkl",'rb'))
#predicting the results
results = list(model.predict(user_results))
#setting the accuarcies, predictions
st.subheader("Predictions and Accuracies")
col3,col4 = st.columns(2)
with col3:
st.metric("Insurance cost", str(results[0]))
with col4:
st.metric("RMSE of the model", 4674.719889567355)