File size: 5,030 Bytes
db7f6aa
 
 
 
 
d92a8b5
db7f6aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d92a8b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# import libraries
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle



# Title 
st.title("Insurance Cost Predictor")

# Reading the image
image = plt.imread("insurance.jpg")
#Setting the image
st.image(image, caption = "Health Matters the most")

# reading the data set
data = pd.read_csv("Insurance.csv")
# header as the data set
st.header("The Dataset")
# setting the dataframe
st.dataframe(data)

#creating plots

#gender distributions
def gender_distribution():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['sex'])
    plt.xlabel("Sex")
    plt.ylabel("Frequency")
    plt.title("Gender Distribution of the Applicants")
    st.pyplot(fig)

#population
def region():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['region'], palette="Set3")
    plt.xlabel("Region")
    plt.ylabel("Frequency")
    plt.title("Distribution of Regions")
    st.pyplot(fig)

# scatterplot
def scatterplot():
    sns.set_theme()
    fig = plt.figure()
    sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker'])
    plt.xlabel("BMI value")
    plt.ylabel("Insurance Cost")
    plt.title("Scatter plot between BMI values and Insurance Cost")
    st.pyplot(fig)

#distribution plot
def age_distribution():
    sns.set_theme()
    fig = plt.figure()
    sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2))
    plt.xlabel("Age")
    plt.ylabel("Density")
    plt.title("Distribution of Age")
    st.pyplot(fig)

#Function to visualize graphs
def visulaizations():
    #setting the subheader
    st.header("Data Visualization")
    #radio button
    title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"])

    if title == "Gender Distribution":
        gender_distribution()
    
    elif title == "Regions and Frequencies":
        region()
    
    elif title == "BMI vs Charges":
        scatterplot()
    
    elif title == "Age":
        age_distribution()


visulaizations()

#more visualizations

#barplot
def children():
    sns.set_theme()
    fig = plt.figure()
    sns.countplot(x = data['children'])
    plt.xlabel("Number of childrens")
    plt.ylabel("Frequency")
    plt.title("Frequency Distribution according to the number of children")
    st.pyplot(fig)

#scatterplot
def scatterplot1():
    sns.set_theme()
    fig = plt.figure()
    sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker'])
    plt.xlabel("Age")
    plt.ylabel("Insurance Cost")
    plt.title("Scatter plot between Age and Insurance Cost")
    st.pyplot(fig)

#creating columns
st.subheader("More Visualizations")
col1, col2 = st.columns(2)
with col1:
    children()
with col2:
    scatterplot1()


#user input

# function to find BMI value
def bmi(height,weight):
    h=height/100
    bmi_value = weight/(h**2)
    return bmi_value

#Getting user input
def user_inputs():

    #header
    st.header("User Dashboard")

    #Dashboard elements
    age = st.slider('Your Age',1,100,30)
    sex = st.selectbox('Select Your Gender',['male','female'])
    height = st.slider('Your Height in (cm)',50,250,150)
    weight = st.slider('Your Weight in (Kg)',10,150,50)
    children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5])
    smoker = st.radio('Smoker', ['yes','no'])
    region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest'])
    
    #reading user data as a dictionary
    bmi1 = bmi(height, weight)
    input_data={
        'age':age,
        'sex':sex,
        'bmi':bmi1,
        'children':children,
        'smoker':smoker,
        'region':region
    }
    
    #getting the copy of original data
    df = data.copy()
    #drop the predicting colmn
    df.drop('charges', axis=1, inplace=True)
    #input data as dataframe
    user_data = pd.DataFrame(input_data, index=[0])
    #concatenate original dataframe and user input dataframe
    user_df = pd.concat([df,user_data],ignore_index=True, axis=0)
    #getting dummies(categorical varaibles encoding)
    user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True)
    #selecting the last row which is the user inputs
    user_input_data = user_dum_data.iloc[-1,:]
    #reading the user input data as a dataframe
    final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns)
    #user data
    st.subheader("User Data")
    st.dataframe(final_user_data)
    #returning data
    return final_user_data

user_results = user_inputs()

#Cost predictions

#loading the model
model = pickle.load(open("insurance_predict.pkl",'rb'))
#predicting the results
results = list(model.predict(user_results))

#setting the accuarcies, predictions
st.subheader("Predictions and Accuracies")
col3,col4 = st.columns(2)
with col3:
    st.metric("Insurance cost", str(results[0]))
with col4:
    st.metric("RMSE of the model", 4674.719889567355)