Spaces:
Sleeping
Sleeping
| # import streamlit as st | |
| # import pandas as pd | |
| # import joblib | |
| # import numpy as np | |
| # from io import BytesIO # For in-memory file handling | |
| # # Load the trained model | |
| # model = joblib.load("stacked_model.pkl") | |
| # # Set page configuration | |
| # st.set_page_config(page_title="Retention Prediction App", layout="centered") | |
| # st.title("π Retention Prediction App") | |
| # st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.") | |
| # # Upload Excel file | |
| # uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"]) | |
| # if uploaded_file is not None: | |
| # # Read the uploaded Excel file | |
| # data = pd.read_excel(uploaded_file) | |
| # st.subheader("Preview of Uploaded Data:") | |
| # st.write(data.head()) | |
| # # Check if the required columns are in the uploaded file | |
| # required_columns = [ | |
| # "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes", | |
| # "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", | |
| # "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", | |
| # "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate" | |
| # ] | |
| # if all(col in data.columns for col in required_columns): | |
| # st.success("All required columns are present!") | |
| # # Extract features for prediction | |
| # X = data[required_columns] | |
| # # Predict probabilities and classes | |
| # predicted_probabilities = model.predict_proba(X)[:, 1] | |
| # predicted_classes = model.predict(X) | |
| # # Add predictions to the original data | |
| # data["Predicted_Probability"] = predicted_probabilities | |
| # data["Predicted_Retention"] = predicted_classes | |
| # st.subheader("Predictions:") | |
| # st.write(data.head()) | |
| # # Save the DataFrame to an in-memory buffer | |
| # output = BytesIO() | |
| # with pd.ExcelWriter(output, engine="openpyxl") as writer: | |
| # data.to_excel(writer, index=False, sheet_name="Predictions") | |
| # processed_data = output.getvalue() | |
| # # Create a download button | |
| # st.download_button( | |
| # label="Download Predictions as Excel", | |
| # data=processed_data, | |
| # file_name="predicted_retention.xlsx", | |
| # mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| # ) | |
| # else: | |
| # missing_cols = [col for col in required_columns if col not in data.columns] | |
| # st.error(f"The following required columns are missing: {missing_cols}") | |
| import streamlit as st | |
| import joblib | |
| import numpy as np | |
| def calculate_failed_rates(number_of_previous, number_of_failed): | |
| """ | |
| Calculate failure rates for Bup or Meth episodes. | |
| Handles cases where 'number_of_previous' is zero to avoid division errors. | |
| """ | |
| return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0 | |
| # Load the trained model | |
| model = joblib.load("stacked_model.pkl") | |
| # Feature list (15 features including dynamically calculated ones) | |
| feature_names = [ | |
| "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes", | |
| "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt", | |
| "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2", | |
| "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate" | |
| ] | |
| # Page title and description | |
| st.set_page_config(page_title="Retention Prediction App", layout="centered") | |
| st.title("π Retention Prediction App") | |
| st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.") | |
| # Sidebar description | |
| st.sidebar.header("π Input Patient Data") | |
| st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.") | |
| # Create a form for inputs | |
| with st.form("patient_form"): | |
| st.subheader("Patient Information") | |
| # Collect inputs | |
| col1, col2 = st.columns(2) # Two-column layout for better organization | |
| with col1: | |
| moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD") | |
| prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1) | |
| rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O") | |
| previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0) | |
| timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0) | |
| ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0) | |
| rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001) | |
| with col2: | |
| prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1) | |
| year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1) | |
| moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD") | |
| rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001) | |
| numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0) | |
| ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0) | |
| st.subheader("Calculated Features *") | |
| col3, col4 = st.columns(2) | |
| with col3: | |
| number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0) | |
| number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0) | |
| with col4: | |
| number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0) | |
| number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0) | |
| # Dynamic calculations | |
| failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes) | |
| failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes) | |
| # Submit button | |
| submitted = st.form_submit_button("Predict Retention") | |
| if submitted: | |
| # Create input feature array | |
| input_features = np.array([ | |
| moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes, | |
| timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt, | |
| rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2, | |
| ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate | |
| ]).reshape(1, -1) | |
| # Check if the input matches the model's expected feature count | |
| expected_feature_count = 15 | |
| if input_features.shape[1] != expected_feature_count: | |
| st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.") | |
| else: | |
| # Predict retention probability and class | |
| predicted_probability = model.predict_proba(input_features)[:, 1][0] # Probability of positive class | |
| predicted_class = model.predict(input_features)[0] # Predicted class (0 or 1) | |
| # Display the results | |
| st.success("Prediction Results:") | |
| st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}") | |
| st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}") | |
| # Highlight results | |
| st.markdown(""" | |
| - **Predicted Class:** Based on the highest probability. | |
| - **Probability:** Shows the likelihood of retention. | |
| """) | |