Spaces:
Sleeping
Sleeping
File size: 7,777 Bytes
af2ee6a e92f616 af2ee6a e92f616 af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a 0fbf81b af2ee6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# import streamlit as st
# import pandas as pd
# import joblib
# import numpy as np
# from io import BytesIO # For in-memory file handling
# # Load the trained model
# model = joblib.load("stacked_model.pkl")
# # Set page configuration
# st.set_page_config(page_title="Retention Prediction App", layout="centered")
# st.title("๐ Retention Prediction App")
# st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
# # Upload Excel file
# uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
# if uploaded_file is not None:
# # Read the uploaded Excel file
# data = pd.read_excel(uploaded_file)
# st.subheader("Preview of Uploaded Data:")
# st.write(data.head())
# # Check if the required columns are in the uploaded file
# required_columns = [
# "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
# "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
# "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
# "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
# ]
# if all(col in data.columns for col in required_columns):
# st.success("All required columns are present!")
# # Extract features for prediction
# X = data[required_columns]
# # Predict probabilities and classes
# predicted_probabilities = model.predict_proba(X)[:, 1]
# predicted_classes = model.predict(X)
# # Add predictions to the original data
# data["Predicted_Probability"] = predicted_probabilities
# data["Predicted_Retention"] = predicted_classes
# st.subheader("Predictions:")
# st.write(data.head())
# # Save the DataFrame to an in-memory buffer
# output = BytesIO()
# with pd.ExcelWriter(output, engine="openpyxl") as writer:
# data.to_excel(writer, index=False, sheet_name="Predictions")
# processed_data = output.getvalue()
# # Create a download button
# st.download_button(
# label="Download Predictions as Excel",
# data=processed_data,
# file_name="predicted_retention.xlsx",
# mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
# )
# else:
# missing_cols = [col for col in required_columns if col not in data.columns]
# st.error(f"The following required columns are missing: {missing_cols}")
import streamlit as st
import joblib
import numpy as np
def calculate_failed_rates(number_of_previous, number_of_failed):
"""
Calculate failure rates for Bup or Meth episodes.
Handles cases where 'number_of_previous' is zero to avoid division errors.
"""
return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
# Load the trained model
model = joblib.load("stacked_model.pkl")
# Feature list (15 features including dynamically calculated ones)
feature_names = [
"MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
"TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
"RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
"CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
]
# Page title and description
st.set_page_config(page_title="Retention Prediction App", layout="centered")
st.title("๐ Retention Prediction App")
st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
# Sidebar description
st.sidebar.header("๐ Input Patient Data")
st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
# Create a form for inputs
with st.form("patient_form"):
st.subheader("Patient Information")
# Collect inputs
col1, col2 = st.columns(2) # Two-column layout for better organization
with col1:
moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
with col2:
prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
st.subheader("Calculated Features *")
col3, col4 = st.columns(2)
with col3:
number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
with col4:
number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
# Dynamic calculations
failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
# Submit button
submitted = st.form_submit_button("Predict Retention")
if submitted:
# Create input feature array
input_features = np.array([
moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
]).reshape(1, -1)
# Check if the input matches the model's expected feature count
expected_feature_count = 15
if input_features.shape[1] != expected_feature_count:
st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
else:
# Predict retention probability and class
predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class
predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1)
# Display the results
st.success("Prediction Results:")
st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
# Highlight results
st.markdown("""
- **Predicted Class:** Based on the highest probability.
- **Probability:** Shows the likelihood of retention.
""")
|