nikhil0nk's picture
cleaning
bb0b18d
import streamlit as st
import streamlit.components.v1 as components
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import pybanking
from pybanking.value_prediction import model_value_prediction
from pybanking.EDA import data_analysis
import sklearn.metrics as metrics
from mlxtend.plotting import plot_confusion_matrix
import streamlit.components.v1 as components
from sklearn.model_selection import train_test_split
from PIL import Image
from pycaret.regression import pull,predict_model
st.set_page_config(page_title="Customer Value Prediction Model", layout="wide")
col1,col2 = st.columns([1,2])
with col1:
image = Image.open('Shorthills.png')
st.image(image)
with col2:
st.title('Customer Value Prediction Model')
train_df= model_value_prediction.get_data()
option2 = st.selectbox(
'Which dataset would you like to use for prediction?',
['Sample Dataset', 'Upload Custom']
)
if option2 == 'Upload Custom':
file = st.file_uploader("Choose a file")
if file is not None:
#read csv
train_df = pd.read_csv(file)
else:
st.warning("you need to upload a csv file.")
st.subheader('This is the Selected Data')
st.dataframe(train_df.head(5))
analysis_class = data_analysis.Analysis()
option3 = st.selectbox(
'Select Exploratory Data Analysis type',
['None', 'DataPrep', 'SweetViz', 'PandasProfiling']
)
if option3 == 'SweetViz':
res = analysis_class.sweetviz_analysis(train_df)
res.show_html(filepath='SweetViz.html', open_browser=True, layout='widescreen', scale=None)
HtmlFile = open('SweetViz.html', 'r', encoding='utf-8')
source_code = HtmlFile.read()
with st.expander("See Report"):
components.html(source_code, height=600, scrolling=True)
elif option3 == 'DataPrep':
res = analysis_class.dataprep_analysis(train_df)
# res.show_browser()
res.save('DataPrep.html')
HtmlFile = open('DataPrep.html', 'r', encoding='utf-8')
source_code = HtmlFile.read()
with st.expander("See Report"):
components.html(source_code, height=600, scrolling=True)
elif option3 == 'PandasProfiling':
res = analysis_class.pandas_analysis(train_df)
res.to_file("PandasProfiling.html")
HtmlFile = open('PandasProfiling.html', 'r', encoding='utf-8')
source_code = HtmlFile.read()
with st.expander("See Report"):
components.html(source_code, height=600, scrolling=True)
model_names = [
"Logistic_Regression",
"Support_Vector_Machine",
"Support_Vector_Machine_Optimized",
"Decision_Tree",
"Neural_Network",
"Random_Forest",
"Pycaret_Best",
"LGBM",
"Lasso"
]
option = st.selectbox(
'Select a model to be used',
model_names
)
tr_df = model_value_prediction.important_feat(train_df,option)
model = pickle.load(open(option+'.pkl', 'rb'))
st.write("Model Loaded : ", option)
train_X,test_X,train_y,dev_X,val_X,dev_y,val_y,test_y= model_value_prediction.preprocess_inputs(tr_df,option)
model = model_value_prediction.train(tr_df,option)
y_pred = model_value_prediction.predict(test_X,model,option)
if option == "Pycaret_Best":
predict_model(model)
st.write("RMSLE Score:", pull()['RMSLE'][0])
else:
st.write("RMSLE Score:", metrics.mean_squared_log_error(test_y, y_pred, squared=False))
st.write("Poisson Score:", metrics.mean_tweedie_deviance(test_y, y_pred))