Nahiyan14 commited on
Commit
af2ee6a
·
verified ·
1 Parent(s): 6f996eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -151
app.py CHANGED
@@ -1,170 +1,170 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import joblib
4
- import numpy as np
5
- from io import BytesIO # For in-memory file handling
6
 
7
- # Load the trained model
8
- model = joblib.load("stacked_model.pkl")
9
 
10
- # Set page configuration
11
- st.set_page_config(page_title="Retention Prediction App", layout="centered")
12
- st.title("📈 Retention Prediction App")
13
- st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
14
 
15
- # Upload Excel file
16
- uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
17
 
18
- if uploaded_file is not None:
19
- # Read the uploaded Excel file
20
- data = pd.read_excel(uploaded_file)
21
 
22
- st.subheader("Preview of Uploaded Data:")
23
- st.write(data.head())
24
 
25
- # Check if the required columns are in the uploaded file
26
- required_columns = [
27
- "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
28
- "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
29
- "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
30
- "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
31
- ]
32
 
33
- if all(col in data.columns for col in required_columns):
34
- st.success("All required columns are present!")
35
 
36
- # Extract features for prediction
37
- X = data[required_columns]
38
-
39
- # Predict probabilities and classes
40
- predicted_probabilities = model.predict_proba(X)[:, 1]
41
- predicted_classes = model.predict(X)
42
-
43
- # Add predictions to the original data
44
- data["Predicted_Probability"] = predicted_probabilities
45
- data["Predicted_Retention"] = predicted_classes
46
-
47
- st.subheader("Predictions:")
48
- st.write(data.head())
49
-
50
- # Save the DataFrame to an in-memory buffer
51
- output = BytesIO()
52
- with pd.ExcelWriter(output, engine="openpyxl") as writer:
53
- data.to_excel(writer, index=False, sheet_name="Predictions")
54
- processed_data = output.getvalue()
55
-
56
- # Create a download button
57
- st.download_button(
58
- label="Download Predictions as Excel",
59
- data=processed_data,
60
- file_name="predicted_retention.xlsx",
61
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
62
- )
63
- else:
64
- missing_cols = [col for col in required_columns if col not in data.columns]
65
- st.error(f"The following required columns are missing: {missing_cols}")
66
 
67
 
68
 
69
 
70
 
71
 
72
- # import streamlit as st
73
- # import joblib
74
- # import numpy as np
75
 
76
- # def calculate_failed_rates(number_of_previous, number_of_failed):
77
- # """
78
- # Calculate failure rates for Bup or Meth episodes.
79
- # Handles cases where 'number_of_previous' is zero to avoid division errors.
80
- # """
81
- # return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
82
 
83
 
84
- # # Load the trained model
85
- # model = joblib.load("stacked_model.pkl")
86
 
87
- # # Feature list (15 features including dynamically calculated ones)
88
- # feature_names = [
89
- # "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
90
- # "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
91
- # "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
92
- # "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
93
- # ]
94
 
95
- # # Page title and description
96
- # st.set_page_config(page_title="Retention Prediction App", layout="centered")
97
- # st.title("📈 Retention Prediction App")
98
- # st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
99
-
100
- # # Sidebar description
101
- # st.sidebar.header("📝 Input Patient Data")
102
- # st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
103
-
104
- # # Create a form for inputs
105
- # with st.form("patient_form"):
106
- # st.subheader("Patient Information")
107
-
108
- # # Collect inputs
109
- # col1, col2 = st.columns(2) # Two-column layout for better organization
110
- # with col1:
111
- # moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
112
- # prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
113
- # rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
114
- # previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
115
- # timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
116
- # ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
117
- # rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
118
-
119
- # with col2:
120
- # prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
121
- # year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
122
- # moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
123
- # rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
124
- # numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
125
- # ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
126
-
127
- # st.subheader("Calculated Features *")
128
- # col3, col4 = st.columns(2)
129
- # with col3:
130
- # number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
131
- # number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
132
- # with col4:
133
- # number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
134
- # number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
135
-
136
- # # Dynamic calculations
137
- # failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
138
- # failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
139
-
140
- # # Submit button
141
- # submitted = st.form_submit_button("Predict Retention")
142
-
143
- # if submitted:
144
- # # Create input feature array
145
- # input_features = np.array([
146
- # moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
147
- # timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
148
- # rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
149
- # ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
150
- # ]).reshape(1, -1)
151
-
152
- # # Check if the input matches the model's expected feature count
153
- # expected_feature_count = 15
154
- # if input_features.shape[1] != expected_feature_count:
155
- # st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
156
- # else:
157
- # # Predict retention probability and class
158
- # predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class
159
- # predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1)
160
-
161
- # # Display the results
162
- # st.success("Prediction Results:")
163
- # st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
164
- # st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
165
-
166
- # # Highlight results
167
- # st.markdown("""
168
- # - **Predicted Class:** Based on the highest probability.
169
- # - **Probability:** Shows the likelihood of retention.
170
- # """)
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import joblib
4
+ # import numpy as np
5
+ # from io import BytesIO # For in-memory file handling
6
 
7
+ # # Load the trained model
8
+ # model = joblib.load("stacked_model.pkl")
9
 
10
+ # # Set page configuration
11
+ # st.set_page_config(page_title="Retention Prediction App", layout="centered")
12
+ # st.title("📈 Retention Prediction App")
13
+ # st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
14
 
15
+ # # Upload Excel file
16
+ # uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
17
 
18
+ # if uploaded_file is not None:
19
+ # # Read the uploaded Excel file
20
+ # data = pd.read_excel(uploaded_file)
21
 
22
+ # st.subheader("Preview of Uploaded Data:")
23
+ # st.write(data.head())
24
 
25
+ # # Check if the required columns are in the uploaded file
26
+ # required_columns = [
27
+ # "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
28
+ # "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
29
+ # "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
30
+ # "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
31
+ # ]
32
 
33
+ # if all(col in data.columns for col in required_columns):
34
+ # st.success("All required columns are present!")
35
 
36
+ # # Extract features for prediction
37
+ # X = data[required_columns]
38
+
39
+ # # Predict probabilities and classes
40
+ # predicted_probabilities = model.predict_proba(X)[:, 1]
41
+ # predicted_classes = model.predict(X)
42
+
43
+ # # Add predictions to the original data
44
+ # data["Predicted_Probability"] = predicted_probabilities
45
+ # data["Predicted_Retention"] = predicted_classes
46
+
47
+ # st.subheader("Predictions:")
48
+ # st.write(data.head())
49
+
50
+ # # Save the DataFrame to an in-memory buffer
51
+ # output = BytesIO()
52
+ # with pd.ExcelWriter(output, engine="openpyxl") as writer:
53
+ # data.to_excel(writer, index=False, sheet_name="Predictions")
54
+ # processed_data = output.getvalue()
55
+
56
+ # # Create a download button
57
+ # st.download_button(
58
+ # label="Download Predictions as Excel",
59
+ # data=processed_data,
60
+ # file_name="predicted_retention.xlsx",
61
+ # mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
62
+ # )
63
+ # else:
64
+ # missing_cols = [col for col in required_columns if col not in data.columns]
65
+ # st.error(f"The following required columns are missing: {missing_cols}")
66
 
67
 
68
 
69
 
70
 
71
 
72
+ import streamlit as st
73
+ import joblib
74
+ import numpy as np
75
 
76
+ def calculate_failed_rates(number_of_previous, number_of_failed):
77
+ """
78
+ Calculate failure rates for Bup or Meth episodes.
79
+ Handles cases where 'number_of_previous' is zero to avoid division errors.
80
+ """
81
+ return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
82
 
83
 
84
+ # Load the trained model
85
+ model = joblib.load("stacked_model.pkl")
86
 
87
+ # Feature list (15 features including dynamically calculated ones)
88
+ feature_names = [
89
+ "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
90
+ "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
91
+ "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
92
+ "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
93
+ ]
94
 
95
+ # Page title and description
96
+ st.set_page_config(page_title="Retention Prediction App", layout="centered")
97
+ st.title("📈 Retention Prediction App")
98
+ st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
99
+
100
+ # Sidebar description
101
+ st.sidebar.header("📝 Input Patient Data")
102
+ st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
103
+
104
+ # Create a form for inputs
105
+ with st.form("patient_form"):
106
+ st.subheader("Patient Information")
107
+
108
+ # Collect inputs
109
+ col1, col2 = st.columns(2) # Two-column layout for better organization
110
+ with col1:
111
+ moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
112
+ prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
113
+ rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
114
+ previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
115
+ timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
116
+ ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
117
+ rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
118
+
119
+ with col2:
120
+ prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
121
+ year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
122
+ moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
123
+ rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
124
+ numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
125
+ ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
126
+
127
+ st.subheader("Calculated Features *")
128
+ col3, col4 = st.columns(2)
129
+ with col3:
130
+ number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
131
+ number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
132
+ with col4:
133
+ number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
134
+ number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
135
+
136
+ # Dynamic calculations
137
+ failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
138
+ failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
139
+
140
+ # Submit button
141
+ submitted = st.form_submit_button("Predict Retention")
142
+
143
+ if submitted:
144
+ # Create input feature array
145
+ input_features = np.array([
146
+ moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
147
+ timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
148
+ rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
149
+ ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
150
+ ]).reshape(1, -1)
151
+
152
+ # Check if the input matches the model's expected feature count
153
+ expected_feature_count = 15
154
+ if input_features.shape[1] != expected_feature_count:
155
+ st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
156
+ else:
157
+ # Predict retention probability and class
158
+ predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class
159
+ predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1)
160
+
161
+ # Display the results
162
+ st.success("Prediction Results:")
163
+ st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
164
+ st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
165
+
166
+ # Highlight results
167
+ st.markdown("""
168
+ - **Predicted Class:** Based on the highest probability.
169
+ - **Probability:** Shows the likelihood of retention.
170
+ """)