Taylor Kirk
Fixing errors
4edde41
import streamlit as st
import pandas as pd
from utils.plot_gains import plot_gains
from utils.load_pred_model import load_model, load_demo_data
def render():
model = load_model() # Using the cached function
df = load_demo_data()
model_columns = ['lemma_title', 'lemma_text', 'images', 'Review Length', 'Title Length']
X = df[model_columns].copy()
X['lemma_title'] = X['lemma_title'].fillna("")
X['lemma_text'] = X['lemma_text'].fillna("")
y = df['vote']
st.session_state.demo_probs = model.predict_proba(X)
with st.expander("Step 1: Load Data and review", icon="🧐"):
st.write("You'll first upload your dataset (in CSV or Parquet format) and review it to make sure everything is guchi. Below is what the final dataset looks like. We'll take care of the preprocessing steps, the only columns you need to ensure exist in the uploaded data are the review title and text columns, images and the number of votes that review has so far if using existing data")
st.dataframe(df)
with st.expander("Step 2: Get Predictions", icon=':material/self_improvement:'):
st.write("The next thing we'll do is use the model to make predictions on your data. " \
"For our purposes, we are predicting the probability that the review belongs to the positive class")
prob_button = st.button("Push to predict", icon='πŸŽ†', type='secondary')
if prob_button:
prob_df = pd.DataFrame({
"Actual": y,
"Probability of helpful vote": st.session_state.demo_probs[:, 1]
})
st.dataframe(prob_df)
with st.expander("Step 3: Plots the gains", icon=":material/data_thresholding:"):
st.write("Once we have our predictions, we can plot the gains curve which shows us the subset of our data is worth focusing on")
if st.button("Plot the gains", icon="πŸ€ͺ", type="secondary"):
fig, data, total = plot_gains(y, st.session_state.demo_probs[:, 1])
st.plotly_chart(fig)
st.write(f"""We can see from this plot that our best return comes from focusing on the top **{round(total*100, 2)}%** of our customers,
which will lead to us capturing **{round(data*100, 2)}%** of all possible possible cases""")