Spaces:

Lachin
/

firstapp

Runtime error

App Files Files Community

firstapp / app.py

Lachin

predictions added

d92a8b5 over 3 years ago

raw

history blame contribute delete

5.03 kB

	# import libraries
	import streamlit as st
	import matplotlib.pyplot as plt
	import pandas as pd
	import seaborn as sns
	import pickle



	# Title
	st.title("Insurance Cost Predictor")

	# Reading the image
	image = plt.imread("insurance.jpg")
	#Setting the image
	st.image(image, caption = "Health Matters the most")

	# reading the data set
	data = pd.read_csv("Insurance.csv")
	# header as the data set
	st.header("The Dataset")
	# setting the dataframe
	st.dataframe(data)

	#creating plots

	#gender distributions
	def gender_distribution():
	sns.set_theme()
	fig = plt.figure()
	sns.countplot(x = data['sex'])
	plt.xlabel("Sex")
	plt.ylabel("Frequency")
	plt.title("Gender Distribution of the Applicants")
	st.pyplot(fig)

	#population
	def region():
	sns.set_theme()
	fig = plt.figure()
	sns.countplot(x = data['region'], palette="Set3")
	plt.xlabel("Region")
	plt.ylabel("Frequency")
	plt.title("Distribution of Regions")
	st.pyplot(fig)

	# scatterplot
	def scatterplot():
	sns.set_theme()
	fig = plt.figure()
	sns.scatterplot(data = data, x = data['bmi'], y = data['charges'], hue= data['smoker'])
	plt.xlabel("BMI value")
	plt.ylabel("Insurance Cost")
	plt.title("Scatter plot between BMI values and Insurance Cost")
	st.pyplot(fig)

	#distribution plot
	def age_distribution():
	sns.set_theme()
	fig = plt.figure()
	sns.distplot(x = data['age'], kde= True, rug= True, color = (0.7,0.3,0.2))
	plt.xlabel("Age")
	plt.ylabel("Density")
	plt.title("Distribution of Age")
	st.pyplot(fig)

	#Function to visualize graphs
	def visulaizations():
	#setting the subheader
	st.header("Data Visualization")
	#radio button
	title = st.radio("Select a variable", ["Gender Distribution","Regions and Frequencies","BMI vs Charges", "Age"])

	if title == "Gender Distribution":
	gender_distribution()

	elif title == "Regions and Frequencies":
	region()

	elif title == "BMI vs Charges":
	scatterplot()

	elif title == "Age":
	age_distribution()


	visulaizations()

	#more visualizations

	#barplot
	def children():
	sns.set_theme()
	fig = plt.figure()
	sns.countplot(x = data['children'])
	plt.xlabel("Number of childrens")
	plt.ylabel("Frequency")
	plt.title("Frequency Distribution according to the number of children")
	st.pyplot(fig)

	#scatterplot
	def scatterplot1():
	sns.set_theme()
	fig = plt.figure()
	sns.scatterplot(data = data, x = data['age'], y = data['charges'], hue= data['smoker'])
	plt.xlabel("Age")
	plt.ylabel("Insurance Cost")
	plt.title("Scatter plot between Age and Insurance Cost")
	st.pyplot(fig)

	#creating columns
	st.subheader("More Visualizations")
	col1, col2 = st.columns(2)
	with col1:
	children()
	with col2:
	scatterplot1()


	#user input

	# function to find BMI value
	def bmi(height,weight):
	h=height/100
	bmi_value = weight/(h**2)
	return bmi_value

	#Getting user input
	def user_inputs():

	#header
	st.header("User Dashboard")

	#Dashboard elements
	age = st.slider('Your Age',1,100,30)
	sex = st.selectbox('Select Your Gender',['male','female'])
	height = st.slider('Your Height in (cm)',50,250,150)
	weight = st.slider('Your Weight in (Kg)',10,150,50)
	children = st.selectbox('Number of Childrens in Your Family',[0,1,2,3,4,5])
	smoker = st.radio('Smoker', ['yes','no'])
	region = st.radio('Select Your Region',['northeast','northwest','southeast','southwest'])

	#reading user data as a dictionary
	bmi1 = bmi(height, weight)
	input_data={
	'age':age,
	'sex':sex,
	'bmi':bmi1,
	'children':children,
	'smoker':smoker,
	'region':region
	}

	#getting the copy of original data
	df = data.copy()
	#drop the predicting colmn
	df.drop('charges', axis=1, inplace=True)
	#input data as dataframe
	user_data = pd.DataFrame(input_data, index=[0])
	#concatenate original dataframe and user input dataframe
	user_df = pd.concat([df,user_data],ignore_index=True, axis=0)
	#getting dummies(categorical varaibles encoding)
	user_dum_data = pd.get_dummies(user_df, columns=['sex','children','smoker','region'], drop_first=True)
	#selecting the last row which is the user inputs
	user_input_data = user_dum_data.iloc[-1,:]
	#reading the user input data as a dataframe
	final_user_data = pd.DataFrame([user_input_data.array], columns=user_dum_data.columns)
	#user data
	st.subheader("User Data")
	st.dataframe(final_user_data)
	#returning data
	return final_user_data

	user_results = user_inputs()

	#Cost predictions

	#loading the model
	model = pickle.load(open("insurance_predict.pkl",'rb'))
	#predicting the results
	results = list(model.predict(user_results))

	#setting the accuarcies, predictions
	st.subheader("Predictions and Accuracies")
	col3,col4 = st.columns(2)
	with col3:
	st.metric("Insurance cost", str(results[0]))
	with col4:
	st.metric("RMSE of the model", 4674.719889567355)