File size: 7,777 Bytes
af2ee6a
 
 
 
 
e92f616
af2ee6a
 
e92f616
af2ee6a
 
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbf81b
 
 
 
 
 
af2ee6a
 
 
0fbf81b
af2ee6a
 
 
 
 
 
0fbf81b
 
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
0fbf81b
af2ee6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# import streamlit as st
# import pandas as pd
# import joblib
# import numpy as np
# from io import BytesIO  # For in-memory file handling

# # Load the trained model
# model = joblib.load("stacked_model.pkl")

# # Set page configuration
# st.set_page_config(page_title="Retention Prediction App", layout="centered")
# st.title("๐Ÿ“ˆ Retention Prediction App")
# st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")

# # Upload Excel file
# uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])

# if uploaded_file is not None:
#     # Read the uploaded Excel file
#     data = pd.read_excel(uploaded_file)

#     st.subheader("Preview of Uploaded Data:")
#     st.write(data.head())

#     # Check if the required columns are in the uploaded file
#     required_columns = [
#         "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
#         "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
#         "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
#         "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
#     ]

#     if all(col in data.columns for col in required_columns):
#         st.success("All required columns are present!")
        
#         # Extract features for prediction
#         X = data[required_columns]

#         # Predict probabilities and classes
#         predicted_probabilities = model.predict_proba(X)[:, 1]
#         predicted_classes = model.predict(X)

#         # Add predictions to the original data
#         data["Predicted_Probability"] = predicted_probabilities
#         data["Predicted_Retention"] = predicted_classes

#         st.subheader("Predictions:")
#         st.write(data.head())

#         # Save the DataFrame to an in-memory buffer
#         output = BytesIO()
#         with pd.ExcelWriter(output, engine="openpyxl") as writer:
#             data.to_excel(writer, index=False, sheet_name="Predictions")
#         processed_data = output.getvalue()

#         # Create a download button
#         st.download_button(
#             label="Download Predictions as Excel",
#             data=processed_data,
#             file_name="predicted_retention.xlsx",
#             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
#         )
#     else:
#         missing_cols = [col for col in required_columns if col not in data.columns]
#         st.error(f"The following required columns are missing: {missing_cols}")






import streamlit as st
import joblib
import numpy as np

def calculate_failed_rates(number_of_previous, number_of_failed):
    """
    Calculate failure rates for Bup or Meth episodes.
    Handles cases where 'number_of_previous' is zero to avoid division errors.
    """
    return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0


# Load the trained model
model = joblib.load("stacked_model.pkl")

# Feature list (15 features including dynamically calculated ones)
feature_names = [
    "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
    "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
    "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2", 
    "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
]

# Page title and description
st.set_page_config(page_title="Retention Prediction App", layout="centered")
st.title("๐Ÿ“ˆ Retention Prediction App")
st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")

# Sidebar description
st.sidebar.header("๐Ÿ“ Input Patient Data")
st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")

# Create a form for inputs
with st.form("patient_form"):
    st.subheader("Patient Information")

    # Collect inputs
    col1, col2 = st.columns(2)  # Two-column layout for better organization
    with col1:
        moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
        prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
        rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
        previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
        timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
        ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
        rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)

    with col2:
        prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
        year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
        moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
        rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
        numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
        ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)

    st.subheader("Calculated Features *")
    col3, col4 = st.columns(2)
    with col3:
        number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
        number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
    with col4:
        number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
        number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)

    # Dynamic calculations
    failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
    failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)

    # Submit button
    submitted = st.form_submit_button("Predict Retention")

if submitted:
    # Create input feature array
    input_features = np.array([
        moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
        timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
        rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
        ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
    ]).reshape(1, -1)

    # Check if the input matches the model's expected feature count
    expected_feature_count = 15
    if input_features.shape[1] != expected_feature_count:
        st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
    else:
        # Predict retention probability and class
        predicted_probability = model.predict_proba(input_features)[:, 1][0]  # Probability of positive class
        predicted_class = model.predict(input_features)[0]  # Predicted class (0 or 1)

        # Display the results
        st.success("Prediction Results:")
        st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
        st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")

        # Highlight results
        st.markdown("""
            - **Predicted Class:** Based on the highest probability.
            - **Probability:** Shows the likelihood of retention.
        """)