Spaces:

Nahiyan14
/

Prediction_of_Retention

Sleeping

File size: 7,777 Bytes

af2ee6a
 
 
 
 
e92f616
af2ee6a
 
e92f616
af2ee6a
 
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
0fbf81b
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbf81b
 
 
 
 
 
af2ee6a
 
 
0fbf81b
af2ee6a
 
 
 
 
 
0fbf81b
 
af2ee6a
 
0fbf81b
af2ee6a
 
 
 
 
 
 
0fbf81b
af2ee6a

# import streamlit as st
# import pandas as pd
# import joblib
# import numpy as np
# from io import BytesIO  # For in-memory file handling

# # Load the trained model
# model = joblib.load("stacked_model.pkl")

# # Set page configuration
# st.set_page_config(page_title="Retention Prediction App", layout="centered")
# st.title("📈 Retention Prediction App")
# st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")

# # Upload Excel file
# uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])

# if uploaded_file is not None:
#     # Read the uploaded Excel file
#     data = pd.read_excel(uploaded_file)

#     st.subheader("Preview of Uploaded Data:")
#     st.write(data.head())

#     # Check if the required columns are in the uploaded file
#     required_columns = [
#         "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
#         "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
#         "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
#         "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
#     ]

#     if all(col in data.columns for col in required_columns):
#         st.success("All required columns are present!")
        
#         # Extract features for prediction
#         X = data[required_columns]

#         # Predict probabilities and classes
#         predicted_probabilities = model.predict_proba(X)[:, 1]
#         predicted_classes = model.predict(X)

#         # Add predictions to the original data
#         data["Predicted_Probability"] = predicted_probabilities
#         data["Predicted_Retention"] = predicted_classes

#         st.subheader("Predictions:")
#         st.write(data.head())

#         # Save the DataFrame to an in-memory buffer
#         output = BytesIO()
#         with pd.ExcelWriter(output, engine="openpyxl") as writer:
#             data.to_excel(writer, index=False, sheet_name="Predictions")
#         processed_data = output.getvalue()

#         # Create a download button
#         st.download_button(
#             label="Download Predictions as Excel",
#             data=processed_data,
#             file_name="predicted_retention.xlsx",
#             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
#         )
#     else:
#         missing_cols = [col for col in required_columns if col not in data.columns]
#         st.error(f"The following required columns are missing: {missing_cols}")






import streamlit as st
import joblib
import numpy as np

def calculate_failed_rates(number_of_previous, number_of_failed):
    """
    Calculate failure rates for Bup or Meth episodes.
    Handles cases where 'number_of_previous' is zero to avoid division errors.
    """
    return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0


# Load the trained model
model = joblib.load("stacked_model.pkl")

# Feature list (15 features including dynamically calculated ones)
feature_names = [
    "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
    "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
    "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2", 
    "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
]

# Page title and description
st.set_page_config(page_title="Retention Prediction App", layout="centered")
st.title("📈 Retention Prediction App")
st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")

# Sidebar description
st.sidebar.header("📝 Input Patient Data")
st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")

# Create a form for inputs
with st.form("patient_form"):
    st.subheader("Patient Information")

    # Collect inputs
    col1, col2 = st.columns(2)  # Two-column layout for better organization
    with col1:
        moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
        prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
        rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
        previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
        timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
        ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
        rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)

    with col2:
        prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
        year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
        moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
        rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
        numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
        ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)

    st.subheader("Calculated Features *")
    col3, col4 = st.columns(2)
    with col3:
        number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
        number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
    with col4:
        number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
        number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)

    # Dynamic calculations
    failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
    failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)

    # Submit button
    submitted = st.form_submit_button("Predict Retention")

if submitted:
    # Create input feature array
    input_features = np.array([
        moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
        timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
        rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
        ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
    ]).reshape(1, -1)

    # Check if the input matches the model's expected feature count
    expected_feature_count = 15
    if input_features.shape[1] != expected_feature_count:
        st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
    else:
        # Predict retention probability and class
        predicted_probability = model.predict_proba(input_features)[:, 1][0]  # Probability of positive class
        predicted_class = model.predict(input_features)[0]  # Predicted class (0 or 1)

        # Display the results
        st.success("Prediction Results:")
        st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
        st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")

        # Highlight results
        st.markdown("""
            - **Predicted Class:** Based on the highest probability.
            - **Probability:** Shows the likelihood of retention.
        """)