Nahiyan14 commited on
Commit
0fbf81b
Β·
verified Β·
1 Parent(s): fad2288

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -89
app.py CHANGED
@@ -1,99 +1,167 @@
1
  import streamlit as st
 
2
  import joblib
3
  import numpy as np
4
 
5
- def calculate_failed_rates(number_of_previous, number_of_failed):
6
- """
7
- Calculate failure rates for Bup or Meth episodes.
8
- Handles cases where 'number_of_previous' is zero to avoid division errors.
9
- """
10
- return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
11
-
12
-
13
  # Load the trained model
14
  model = joblib.load("stacked_model.pkl")
15
 
16
- # Feature list (15 features including dynamically calculated ones)
17
- feature_names = [
18
- "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
19
- "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
20
- "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
21
- "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
22
- ]
23
-
24
- # Page title and description
25
  st.set_page_config(page_title="Retention Prediction App", layout="centered")
26
  st.title("πŸ“ˆ Retention Prediction App")
27
- st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
28
-
29
- # Sidebar description
30
- st.sidebar.header("πŸ“ Input Patient Data")
31
- st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
32
-
33
- # Create a form for inputs
34
- with st.form("patient_form"):
35
- st.subheader("Patient Information")
36
-
37
- # Collect inputs
38
- col1, col2 = st.columns(2) # Two-column layout for better organization
39
- with col1:
40
- moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
41
- prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
42
- rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
43
- previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
44
- timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
45
- ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
46
- rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
47
-
48
- with col2:
49
- prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
50
- year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
51
- moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
52
- rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
53
- numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
54
- ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
55
-
56
- st.subheader("Calculated Features *")
57
- col3, col4 = st.columns(2)
58
- with col3:
59
- number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
60
- number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
61
- with col4:
62
- number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
63
- number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
64
-
65
- # Dynamic calculations
66
- failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
67
- failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
68
-
69
- # Submit button
70
- submitted = st.form_submit_button("Predict Retention")
71
-
72
- if submitted:
73
- # Create input feature array
74
- input_features = np.array([
75
- moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
76
- timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
77
- rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
78
- ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
79
- ]).reshape(1, -1)
80
-
81
- # Check if the input matches the model's expected feature count
82
- expected_feature_count = 15
83
- if input_features.shape[1] != expected_feature_count:
84
- st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
85
  else:
86
- # Predict retention probability and class
87
- predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class
88
- predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1)
89
-
90
- # Display the results
91
- st.success("Prediction Results:")
92
- st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
93
- st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
94
-
95
- # Highlight results
96
- st.markdown("""
97
- - **Predicted Class:** Based on the highest probability.
98
- - **Probability:** Shows the likelihood of retention.
99
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
  import joblib
4
  import numpy as np
5
 
 
 
 
 
 
 
 
 
6
  # Load the trained model
7
  model = joblib.load("stacked_model.pkl")
8
 
9
+ # Set page configuration
 
 
 
 
 
 
 
 
10
  st.set_page_config(page_title="Retention Prediction App", layout="centered")
11
  st.title("πŸ“ˆ Retention Prediction App")
12
+ st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
13
+
14
+ # Upload Excel file
15
+ uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
16
+
17
+ if uploaded_file is not None:
18
+ # Read the uploaded Excel file
19
+ data = pd.read_excel(uploaded_file)
20
+
21
+ st.subheader("Preview of Uploaded Data:")
22
+ st.write(data.head())
23
+
24
+ # Check if the required columns are in the uploaded file
25
+ required_columns = [
26
+ "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
27
+ "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
28
+ "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
29
+ "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
30
+ ]
31
+
32
+ if all(col in data.columns for col in required_columns):
33
+ st.success("All required columns are present!")
34
+
35
+ # Extract features for prediction
36
+ X = data[required_columns]
37
+
38
+ # Predict probabilities and classes
39
+ predicted_probabilities = model.predict_proba(X)[:, 1]
40
+ predicted_classes = model.predict(X)
41
+
42
+ # Add predictions to the original data
43
+ data["Predicted_Probability"] = predicted_probabilities
44
+ data["Predicted_Retention"] = predicted_classes
45
+
46
+ st.subheader("Predictions:")
47
+ st.write(data.head())
48
+
49
+ # Downloadable Excel file with predictions
50
+ output_file = "predicted_retention.xlsx"
51
+ data.to_excel(output_file, index=False)
52
+
53
+ # Create a download link
54
+ st.download_button(
55
+ label="Download Predictions as Excel",
56
+ data=data.to_excel(index=False, engine="openpyxl"),
57
+ file_name="predicted_retention.xlsx",
58
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
59
+ )
 
 
 
 
 
 
 
 
 
 
60
  else:
61
+ missing_cols = [col for col in required_columns if col not in data.columns]
62
+ st.error(f"The following required columns are missing: {missing_cols}")
63
+
64
+
65
+
66
+
67
+
68
+
69
+ # import streamlit as st
70
+ # import joblib
71
+ # import numpy as np
72
+
73
+ # def calculate_failed_rates(number_of_previous, number_of_failed):
74
+ # """
75
+ # Calculate failure rates for Bup or Meth episodes.
76
+ # Handles cases where 'number_of_previous' is zero to avoid division errors.
77
+ # """
78
+ # return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
79
+
80
+
81
+ # # Load the trained model
82
+ # model = joblib.load("stacked_model.pkl")
83
+
84
+ # # Feature list (15 features including dynamically calculated ones)
85
+ # feature_names = [
86
+ # "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
87
+ # "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
88
+ # "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
89
+ # "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
90
+ # ]
91
+
92
+ # # Page title and description
93
+ # st.set_page_config(page_title="Retention Prediction App", layout="centered")
94
+ # st.title("πŸ“ˆ Retention Prediction App")
95
+ # st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
96
+
97
+ # # Sidebar description
98
+ # st.sidebar.header("πŸ“ Input Patient Data")
99
+ # st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
100
+
101
+ # # Create a form for inputs
102
+ # with st.form("patient_form"):
103
+ # st.subheader("Patient Information")
104
+
105
+ # # Collect inputs
106
+ # col1, col2 = st.columns(2) # Two-column layout for better organization
107
+ # with col1:
108
+ # moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
109
+ # prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
110
+ # rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
111
+ # previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
112
+ # timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
113
+ # ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
114
+ # rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
115
+
116
+ # with col2:
117
+ # prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
118
+ # year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
119
+ # moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
120
+ # rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
121
+ # numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
122
+ # ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
123
+
124
+ # st.subheader("Calculated Features *")
125
+ # col3, col4 = st.columns(2)
126
+ # with col3:
127
+ # number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
128
+ # number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
129
+ # with col4:
130
+ # number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
131
+ # number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
132
+
133
+ # # Dynamic calculations
134
+ # failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
135
+ # failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
136
+
137
+ # # Submit button
138
+ # submitted = st.form_submit_button("Predict Retention")
139
+
140
+ # if submitted:
141
+ # # Create input feature array
142
+ # input_features = np.array([
143
+ # moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
144
+ # timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
145
+ # rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
146
+ # ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
147
+ # ]).reshape(1, -1)
148
+
149
+ # # Check if the input matches the model's expected feature count
150
+ # expected_feature_count = 15
151
+ # if input_features.shape[1] != expected_feature_count:
152
+ # st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
153
+ # else:
154
+ # # Predict retention probability and class
155
+ # predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class
156
+ # predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1)
157
+
158
+ # # Display the results
159
+ # st.success("Prediction Results:")
160
+ # st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
161
+ # st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
162
+
163
+ # # Highlight results
164
+ # st.markdown("""
165
+ # - **Predicted Class:** Based on the highest probability.
166
+ # - **Probability:** Shows the likelihood of retention.
167
+ # """)