Spaces:

Lachin
/

firstapp

Runtime error

File size: 5,030 Bytes

# import libraries
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle



# Title 
st.title("Insurance Cost Predictor")

# Reading the image
image = plt.imread("insurance.jpg")
#Setting the image
st.image(image, caption = "Health Matters the most")

# reading the data set
data = pd.read_csv("Insurance.csv")
# header as the data set
st.header("The Dataset")
# setting the dataframe
st.dataframe(data)

#creating plots

#gender distributions
def gender_distribution():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['sex'])
    plt.xlabel("Sex")
    plt.ylabel("Frequency")
    plt.title("Gender Distribution of the Applicants")
    st.pyplot(fig)

#population
def region():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['region'], palette="Set3")
    plt.xlabel("Region")
    plt.ylabel("Frequency")
    plt.title("Distribution of Regions")
    st.pyplot(fig)

# scatterplot
def scatterplot():
    sns.set_theme()
    fig = plt.figure()
    sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker'])
    plt.xlabel("BMI value")
    plt.ylabel("Insurance Cost")
    plt.title("Scatter plot between BMI values and Insurance Cost")
    st.pyplot(fig)

#distribution plot
def age_distribution():
    sns.set_theme()
    fig = plt.figure()
    sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2))
    plt.xlabel("Age")
    plt.ylabel("Density")
    plt.title("Distribution of Age")
    st.pyplot(fig)

#Function to visualize graphs
def visulaizations():
    #setting the subheader
    st.header("Data Visualization")
    #radio button
    title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"])

    if title == "Gender Distribution":
        gender_distribution()
    
    elif title == "Regions and Frequencies":
        region()
    
    elif title == "BMI vs Charges":
        scatterplot()
    
    elif title == "Age":
        age_distribution()


visulaizations()

#more visualizations

#barplot
def children():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['children'])
    plt.xlabel("Number of childrens")
    plt.ylabel("Frequency")
    plt.title("Frequency Distribution according to the number of children")
    st.pyplot(fig)

#scatterplot
def scatterplot1():
    sns.set_theme()
    fig = plt.figure()
    sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker'])
    plt.xlabel("Age")
    plt.ylabel("Insurance Cost")
    plt.title("Scatter plot between Age and Insurance Cost")
    st.pyplot(fig)

#creating columns
st.subheader("More Visualizations")
col1, col2 = st.columns(2)
with col1:
    children()
with col2:
    scatterplot1()


#user input

# function to find BMI value
def bmi(height,weight):
    h=height/100
    bmi_value = weight/(h**2)
    return bmi_value

#Getting user input
def user_inputs():

    #header
    st.header("User Dashboard")

    #Dashboard elements
    age = st.slider('Your Age',1,100,30)
    sex = st.selectbox('Select Your Gender',['male','female'])
    height = st.slider('Your Height in (cm)',50,250,150)
    weight = st.slider('Your Weight in (Kg)',10,150,50)
    children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5])
    smoker = st.radio('Smoker', ['yes','no'])
    region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest'])
    
    #reading user data as a dictionary
    bmi1 = bmi(height, weight)
    input_data={
        'age':age,
        'sex':sex,
        'bmi':bmi1,
        'children':children,
        'smoker':smoker,
        'region':region
    }
    
    #getting the copy of original data
    df = data.copy()
    #drop the predicting colmn
    df.drop('charges', axis=1, inplace=True)
    #input data as dataframe
    user_data = pd.DataFrame(input_data, index=[0])
    #concatenate original dataframe and user input dataframe
    user_df = pd.concat([df,user_data],ignore_index=True, axis=0)
    #getting dummies(categorical varaibles encoding)
    user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True)
    #selecting the last row which is the user inputs
    user_input_data = user_dum_data.iloc[-1,:]
    #reading the user input data as a dataframe
    final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns)
    #user data
    st.subheader("User Data")
    st.dataframe(final_user_data)
    #returning data
    return final_user_data

user_results = user_inputs()

#Cost predictions

#loading the model
model = pickle.load(open("insurance_predict.pkl",'rb'))
#predicting the results
results = list(model.predict(user_results))

#setting the accuarcies, predictions
st.subheader("Predictions and Accuracies")
col3,col4 = st.columns(2)
with col3:
    st.metric("Insurance cost", str(results[0]))
with col4:
    st.metric("RMSE of the model", 4674.719889567355)