Spaces:

Nahiyan14
/

Prediction_of_Retention

Sleeping

App Files Files Community

Nahiyan14 commited on Nov 27, 2024

Commit

af2ee6a

verified ·

1 Parent(s): 6f996eb

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -151

app.py CHANGED Viewed

@@ -1,170 +1,170 @@
-import streamlit as st
-import pandas as pd
-import joblib
-import numpy as np
-from io import BytesIO  # For in-memory file handling
-# Load the trained model
-model = joblib.load("stacked_model.pkl")
-# Set page configuration
-st.set_page_config(page_title="Retention Prediction App", layout="centered")
-st.title("📈 Retention Prediction App")
-st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
-# Upload Excel file
-uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
-if uploaded_file is not None:
-    # Read the uploaded Excel file
-    data = pd.read_excel(uploaded_file)
-    st.subheader("Preview of Uploaded Data:")
-    st.write(data.head())
-    # Check if the required columns are in the uploaded file
-    required_columns = [
-        "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
-        "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
-        "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
-        "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
-    ]
-    if all(col in data.columns for col in required_columns):
-        st.success("All required columns are present!")
-        # Extract features for prediction
-        X = data[required_columns]
-        # Predict probabilities and classes
-        predicted_probabilities = model.predict_proba(X)[:, 1]
-        predicted_classes = model.predict(X)
-        # Add predictions to the original data
-        data["Predicted_Probability"] = predicted_probabilities
-        data["Predicted_Retention"] = predicted_classes
-        st.subheader("Predictions:")
-        st.write(data.head())
-        # Save the DataFrame to an in-memory buffer
-        output = BytesIO()
-        with pd.ExcelWriter(output, engine="openpyxl") as writer:
-            data.to_excel(writer, index=False, sheet_name="Predictions")
-        processed_data = output.getvalue()
-        # Create a download button
-        st.download_button(
-            label="Download Predictions as Excel",
-            data=processed_data,
-            file_name="predicted_retention.xlsx",
-            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
-        )
-    else:
-        missing_cols = [col for col in required_columns if col not in data.columns]
-        st.error(f"The following required columns are missing: {missing_cols}")
-# import streamlit as st
-# import joblib
-# import numpy as np
-# def calculate_failed_rates(number_of_previous, number_of_failed):
-#     """
-#     Calculate failure rates for Bup or Meth episodes.
-#     Handles cases where 'number_of_previous' is zero to avoid division errors.
-#     """
-#     return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
-# # Load the trained model
-# model = joblib.load("stacked_model.pkl")
-# # Feature list (15 features including dynamically calculated ones)
-# feature_names = [
-#     "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
-#     "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
-#     "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
-#     "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
-# ]
-# # Page title and description
-# st.set_page_config(page_title="Retention Prediction App", layout="centered")
-# st.title("📈 Retention Prediction App")
-# st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
-# # Sidebar description
-# st.sidebar.header("📝 Input Patient Data")
-# st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
-# # Create a form for inputs
-# with st.form("patient_form"):
-#     st.subheader("Patient Information")
-#     # Collect inputs
-#     col1, col2 = st.columns(2)  # Two-column layout for better organization
-#     with col1:
-#         moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
-#         prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
-#         rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
-#         previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
-#         timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
-#         ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
-#         rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
-#     with col2:
-#         prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
-#         year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
-#         moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
-#         rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
-#         numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
-#         ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
-#     st.subheader("Calculated Features *")
-#     col3, col4 = st.columns(2)
-#     with col3:
-#         number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
-#         number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
-#     with col4:
-#         number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
-#         number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
-#     # Dynamic calculations
-#     failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
-#     failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
-#     # Submit button
-#     submitted = st.form_submit_button("Predict Retention")
-# if submitted:
-#     # Create input feature array
-#     input_features = np.array([
-#         moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
-#         timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
-#         rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
-#         ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
-#     ]).reshape(1, -1)
-#     # Check if the input matches the model's expected feature count
-#     expected_feature_count = 15
-#     if input_features.shape[1] != expected_feature_count:
-#         st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
-#     else:
-#         # Predict retention probability and class
-#         predicted_probability = model.predict_proba(input_features)[:, 1][0]  # Probability of positive class
-#         predicted_class = model.predict(input_features)[0]  # Predicted class (0 or 1)
-#         # Display the results
-#         st.success("Prediction Results:")
-#         st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
-#         st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
-#         # Highlight results
-#         st.markdown("""
-#             - **Predicted Class:** Based on the highest probability.
-#             - **Probability:** Shows the likelihood of retention.
-#         """)

+# import streamlit as st
+# import pandas as pd
+# import joblib
+# import numpy as np
+# from io import BytesIO  # For in-memory file handling
+# # Load the trained model
+# model = joblib.load("stacked_model.pkl")
+# # Set page configuration
+# st.set_page_config(page_title="Retention Prediction App", layout="centered")
+# st.title("📈 Retention Prediction App")
+# st.markdown("Upload an Excel file containing **20% unseen data** to predict retention probabilities and classes.")
+# # Upload Excel file
+# uploaded_file = st.file_uploader("Upload your Excel file (with all required columns):", type=["xlsx"])
+# if uploaded_file is not None:
+#     # Read the uploaded Excel file
+#     data = pd.read_excel(uploaded_file)
+#     st.subheader("Preview of Uploaded Data:")
+#     st.write(data.head())
+#     # Check if the required columns are in the uploaded file
+#     required_columns = [
+#         "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
+#         "Failed_Bup_Rate", "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation",
+#         "MOUDType_Nalt", "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode",
+#         "RPL_THEME2", "CtDaysCoveredAntidepEpisode", "Failed_Meth_Rate"
+#     ]
+#     if all(col in data.columns for col in required_columns):
+#         st.success("All required columns are present!")
+#         # Extract features for prediction
+#         X = data[required_columns]
+#         # Predict probabilities and classes
+#         predicted_probabilities = model.predict_proba(X)[:, 1]
+#         predicted_classes = model.predict(X)
+#         # Add predictions to the original data
+#         data["Predicted_Probability"] = predicted_probabilities
+#         data["Predicted_Retention"] = predicted_classes
+#         st.subheader("Predictions:")
+#         st.write(data.head())
+#         # Save the DataFrame to an in-memory buffer
+#         output = BytesIO()
+#         with pd.ExcelWriter(output, engine="openpyxl") as writer:
+#             data.to_excel(writer, index=False, sheet_name="Predictions")
+#         processed_data = output.getvalue()
+#         # Create a download button
+#         st.download_button(
+#             label="Download Predictions as Excel",
+#             data=processed_data,
+#             file_name="predicted_retention.xlsx",
+#             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+#         )
+#     else:
+#         missing_cols = [col for col in required_columns if col not in data.columns]
+#         st.error(f"The following required columns are missing: {missing_cols}")
+import streamlit as st
+import joblib
+import numpy as np
+def calculate_failed_rates(number_of_previous, number_of_failed):
+    """
+    Calculate failure rates for Bup or Meth episodes.
+    Handles cases where 'number_of_previous' is zero to avoid division errors.
+    """
+    return number_of_failed / number_of_previous if number_of_previous > 0 else 0.0
+# Load the trained model
+model = joblib.load("stacked_model.pkl")
+# Feature list (15 features including dynamically calculated ones)
+feature_names = [
+    "MOUDType_Meth", "prior_BUP_days_supply", "RxLocation_O", "Previous_Meth_Episodes",
+    "TimeSinceLastEpisode", "prior_Meth_days_supply", "YearOfInitiation", "MOUDType_Nalt",
+    "RPL_THEME3", "NumberofMHVisits", "CtRxsEpisode", "RPL_THEME2",
+    "CtDaysCoveredAntidepEpisode", "Failed_Bup_Rate", "Failed_Meth_Rate"
+]
+# Page title and description
+st.set_page_config(page_title="Retention Prediction App", layout="centered")
+st.title("📈 Retention Prediction App")
+st.markdown("Predict the probability of **retention** and the corresponding class using a trained StackingClassifier model.")
+# Sidebar description
+st.sidebar.header("📝 Input Patient Data")
+st.sidebar.markdown("Enter the required patient data below. Fields marked with * are dynamically calculated.")
+# Create a form for inputs
+with st.form("patient_form"):
+    st.subheader("Patient Information")
+    # Collect inputs
+    col1, col2 = st.columns(2)  # Two-column layout for better organization
+    with col1:
+        moudtype_meth = st.selectbox("MOUDType_Meth (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Meth MOUD")
+        prior_bup_days_supply = st.number_input("Prior BUP Days Supply (days)", min_value=0, value=30, step=1)
+        rxlocation_o = st.selectbox("RxLocation_O (1=Yes, 0=No)", [0, 1], help="Whether RxLocation is O")
+        previous_meth_episodes = st.number_input("Previous Meth Episodes", min_value=0, value=0)
+        timesince_last_episode = st.number_input("Time Since Last Episode (days)", min_value=0, value=0)
+        ctrxs_episode = st.number_input("CtRxsEpisode", min_value=0, value=0)
+        rpl_theme2 = st.number_input("RPL_THEME2", min_value=0.0, value=0.5, step=0.001)
+    with col2:
+        prior_meth_days_supply = st.number_input("Prior Meth Days Supply (days)", min_value=0, value=0, step=1)
+        year_of_initiation = st.number_input("Year of Initiation", min_value=2005, value=2022, step=1)
+        moudtype_nalt = st.selectbox("MOUDType_Nalt (1=Yes, 0=No)", [0, 1], help="Whether the patient is on Nalt MOUD")
+        rpl_theme3 = st.number_input("RPL_THEME3", min_value=0.0, value=0.5, step=0.001)
+        numberof_mhvisits = st.number_input("Number of MH Visits", min_value=0, value=0)
+        ct_days_covered_antidep_episode = st.number_input("CtDaysCoveredAntidepEpisode", min_value=0, value=0)
+    st.subheader("Calculated Features *")
+    col3, col4 = st.columns(2)
+    with col3:
+        number_of_previous_bup_episodes = st.number_input("Number of Previous BUP Episodes", min_value=0, value=0)
+        number_of_failed_bup_episodes = st.number_input("Number of Failed BUP Episodes", min_value=0, value=0)
+    with col4:
+        number_of_previous_meth_episodes = st.number_input("Number of Previous Meth Episodes", min_value=0, value=0)
+        number_of_failed_meth_episodes = st.number_input("Number of Failed Meth Episodes", min_value=0, value=0)
+    # Dynamic calculations
+    failed_bup_rate = calculate_failed_rates(number_of_previous_bup_episodes, number_of_failed_bup_episodes)
+    failed_meth_rate = calculate_failed_rates(number_of_previous_meth_episodes, number_of_failed_meth_episodes)
+    # Submit button
+    submitted = st.form_submit_button("Predict Retention")
+if submitted:
+    # Create input feature array
+    input_features = np.array([
+        moudtype_meth, prior_bup_days_supply, rxlocation_o, previous_meth_episodes,
+        timesince_last_episode, prior_meth_days_supply, year_of_initiation, moudtype_nalt,
+        rpl_theme3, numberof_mhvisits, ctrxs_episode, rpl_theme2,
+        ct_days_covered_antidep_episode, failed_bup_rate, failed_meth_rate
+    ]).reshape(1, -1)
+    # Check if the input matches the model's expected feature count
+    expected_feature_count = 15
+    if input_features.shape[1] != expected_feature_count:
+        st.error(f"Feature mismatch! Expected {expected_feature_count} features, but got {input_features.shape[1]}.")
+    else:
+        # Predict retention probability and class
+        predicted_probability = model.predict_proba(input_features)[:, 1][0]  # Probability of positive class
+        predicted_class = model.predict(input_features)[0]  # Predicted class (0 or 1)
+        # Display the results
+        st.success("Prediction Results:")
+        st.write(f"**Predicted Class:** {'Retention (1)' if predicted_class == 1 else 'No Retention (0)'}")
+        st.write(f"**Predicted Probability of Retention:** {predicted_probability:.2%}")
+        # Highlight results
+        st.markdown("""
+            - **Predicted Class:** Based on the highest probability.
+            - **Probability:** Shows the likelihood of retention.
+        """)