import streamlit as st
import pandas as pd
import numpy as np
import shap
import pickle
# disable warning
st.set_option('deprecation.showPyplotGlobalUse', False)
# load trained model
lgbm_base = pickle.load(open('lgbm_base.pkl', 'rb'))
lgbm_opt = pickle.load(open('lgbm_opt.pkl', 'rb'))
# Explicitly set the num_classes attribute for the models
lgbm_base._n_classes = 1 # Assuming it's a regression model (single output)
lgbm_opt._n_classes = 1 # Assuming it's a regression model (single output)
np.random.seed(123)
# Set font color and background color using HTML tags
st.markdown("
", unsafe_allow_html=True)
st.markdown("Welcome to My HuggingFace Streamlit App!
", unsafe_allow_html=True)
st.markdown("This app generates house sale price predictions .
", unsafe_allow_html=True)
# Add more HTML styling to the instructions
st.markdown("Instructions:
", unsafe_allow_html=True)
st.markdown("On the sidebar to the left, you will find a series of questions about the house specifics that require your input. Once you have finished answering the questions, click the button below to generate the predicted house sale price, as well as the respective SHAP summary plot and SHAP interaction plot.
", unsafe_allow_html=True)
st.markdown("Note: Please allow the app some time (a few seconds) to generate predictions and plots.
", unsafe_allow_html=True)
name_list = ['OverallQual', 'OverallCond', 'YrSold', 'YearBuilt', 'YearRemodAdd',
'LotArea', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'GrLivArea',
'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'Fireplaces',
'GarageYrBlt', 'GarageArea', 'PoolArea'
]
name_list_train = ['OverallQual', 'OverallCond', 'YrSold', 'YearBuilt', 'YearRemodAdd',
'LotArea', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'GrLivArea',
'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'Fireplaces',
'GarageYrBlt', 'GarageArea', 'PoolArea'
]
data = pd.read_csv('train.csv')
data = data[name_list_train].values
description_list = [
'What is the overall material and quality finish rating?',
'What is the overall condition rating?',
'In which year was the house sold?',
'In which year was the house built?',
'In which year was the house remodeled?',
'What is the lot size (in square feet)?',
'What is the total basement area (in square feet)?',
'How many full bathrooms in the basement area?',
'How many half bathrooms in the basement area?',
'What is the total ground area (in square feet)?',
'How many full bathrooms in the ground area?',
'How many half bathrooms in the ground area?',
'How many bedrooms in the ground area?',
'How many kitchens in the ground area?',
'How many fireplaces in the ground area?',
'In which year was the garage built?',
'What is the garage size (in square feet)?',
'What is the pool area (in square feet)?'
]
min_list = [1, 1, 2006, 1872, 1950, 1300, 0, 0,
0, 334, 0, 0, 0, 0, 0, 1900, 0, 0
]
max_list = [10, 9, 2010, 2010, 2010, 215245, 6110, 3,
2, 5642, 3, 2, 8, 3, 3, 2010, 1418, 738
]
count = 0
with st.sidebar:
for i in range(len(name_list)):
variable_name = name_list[i]
globals()[variable_name] = st.slider(description_list[i] ,min_value=int(min_list[i]), max_value =int(max_list[i]),step=1)
data_df = {'OverallQual': [OverallQual], 'OverallCond': [OverallCond], 'YrSold': [YrSold],'YearBuilt': [YearBuilt],
'YearRemodAdd': [YearRemodAdd], 'LotArea': [LotArea], 'TotalBsmtSF':[TotalBsmtSF], 'BsmtFullBath': [BsmtFullBath],
'BsmtHalfBath': [BsmtHalfBath], 'GrLivArea':[GrLivArea], 'FullBath': [FullBath], 'HalfBath': [HalfBath],
'BedroomAbvGr':[BedroomAbvGr], 'KitchenAbvGr': [KitchenAbvGr], 'Fireplaces' : [Fireplaces],
'GarageYrBlt' : [GarageYrBlt], 'GarageArea' : [GarageArea], 'PoolArea' : [PoolArea]
}
data_df = pd.DataFrame.from_dict(data_df)
y_pred_base = lgbm_base.predict(data_df)
y_pred_opt = lgbm_opt.predict(data_df)
col1, col2, col3 , col4, col5 = st.columns(5)
with col1:
pass
with col2:
pass
with col4:
pass
with col5:
pass
with col3 :
center_button = st.button('Generate House Price')
if center_button:
import time
with st.spinner('Calculating....'):
time.sleep(2)
st.markdown("The price range of your house is between:
", unsafe_allow_html=True)
col1, col2, col3 = st.columns([3, 3, 3])
estimated_price_base = "{:,.0f}".format(float(y_pred_base.mean()))
estimated_price_opt = "{:,.0f}".format(float(y_pred_opt.mean()))
with col1:
st.write("")
st.markdown("LightGBM Baseline Model Prediction:
", unsafe_allow_html=True)
st.markdown(f"USD {estimated_price_base}
", unsafe_allow_html=True)
st.write("")
st.markdown("The SHAP summary and interaction plots are:
", unsafe_allow_html=True)
explainer_base = shap.TreeExplainer(lgbm_base)
shap_explainer_base = explainer_base(data_df)
shap_values_base = shap.TreeExplainer(lgbm_base).shap_values(data_df)
shap_interaction_values_base = shap.TreeExplainer(lgbm_base).shap_interaction_values(data_df)
st.subheader("SHAP Summary Plot")
st.markdown("", unsafe_allow_html=True)
shap.plots.beeswarm(shap_explainer_base, max_display=10)
st.markdown("
", unsafe_allow_html=True)
st.pyplot()
st.subheader("SHAP Interaction Plot")
st.markdown("", unsafe_allow_html=True)
shap.summary_plot(shap_interaction_values_base, data_df)
st.markdown("
", unsafe_allow_html=True)
st.pyplot()
st.markdown("LightGBM Model Optimized (via Optuna) Prediction:
", unsafe_allow_html=True)
st.markdown(f"USD {estimated_price_opt}
", unsafe_allow_html=True)
st.write("")
st.markdown("The SHAP summary and interaction plots are:
", unsafe_allow_html=True)
explainer_opt = shap.TreeExplainer(lgbm_opt)
shap_explainer_opt = explainer_opt(data_df)
shap_values_opt = shap.TreeExplainer(lgbm_opt).shap_values(data_df)
shap_interaction_values_opt = shap.TreeExplainer(lgbm_opt).shap_interaction_values(data_df)
st.subheader("SHAP Summary Plot")
st.markdown("", unsafe_allow_html=True)
shap.plots.beeswarm(shap_explainer_opt, max_display=10)
st.markdown("
", unsafe_allow_html=True)
st.pyplot()
st.subheader("SHAP Interaction Plot")
st.markdown("", unsafe_allow_html=True)
shap.summary_plot(shap_interaction_values_opt, data_df)
st.markdown("
", unsafe_allow_html=True)
st.pyplot()