Spaces:

UVA-MSBA
/

T6

Sleeping

App Files Files Community

mns6rh commited on 8 days ago

Commit

df403ba

verified ·

1 Parent(s): 6cff660

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -66

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ plt.rcParams["figure.dpi"] = 100
 # =========================
 # Load model (CatBoostClassifier saved via joblib)
 # =========================
-model = joblib.load("cat (3).joblib")
 FEATURES = [
     "Engagement",
@@ -37,6 +37,16 @@ CLUSTER_1 = {
     "Engagement": 4.9324,
 }
 CLUSTER_3 = {
     "Voice": 2.39,
     "DecisionAutonomy": 3.55,
@@ -47,13 +57,31 @@ CLUSTER_3 = {
     "Engagement": 3.3909,
 }
-VISIBLE_DRIVERS = ["Engagement", "SupportiveGM", "WellBeing", "WorkEnvironment"]
-VISIBLE_LABELS = ["Engagement", "Supportive GM", "Well-Being", "Work Environment"]
 # =========================
 # Helpers
 # =========================
-def clamp(x):
     return max(1.0, min(5.0, float(x)))
 def build_X(vals: dict) -> pd.DataFrame:
@@ -66,38 +94,30 @@ def prob_at_risk(X: pd.DataFrame) -> float:
     idx = classes.index(1)  # class 1 = At Risk
     return float(probs[idx])
-def risk_label(p):
     return "At Risk" if p >= 0.5 else "Not At Risk"
-def stable_threshold():
-    return min(CLUSTER_1[v] for v in VISIBLE_DRIVERS)
 # =========================
-# Plot: drivers vs threshold
 # =========================
-def make_driver_plot(Engagement, SupportiveGM, WellBeing, WorkEnvironment):
-    th = stable_threshold()
-    values = [Engagement, SupportiveGM, WellBeing, WorkEnvironment]
-    colors = ["seagreen" if v >= th else "firebrick" for v in values]
-    fig, ax = plt.subplots(figsize=(8.8, 3.4))
-    ax.bar(VISIBLE_LABELS, values, color=colors)
-    ax.axhline(th, linestyle="--", linewidth=2)
-    ax.text(3.05, th, "Stable threshold", va="center")
     ax.set_ylim(1, 5.4)
     ax.set_yticks([1, 2, 3, 4, 5])
     ax.set_ylabel("Score (1–5)")
-    ax.set_title("Key Drivers vs Stable Threshold")
-    ax.margins(x=0.12)
     plt.tight_layout()
-    plt.subplots_adjust(bottom=0.22)
     return fig
 # =========================
-# TRUE SHAP using CatBoost native SHAP values
 # =========================
 def make_catboost_shap_plot(X: pd.DataFrame):
     """
@@ -106,35 +126,33 @@ def make_catboost_shap_plot(X: pd.DataFrame):
     returns array shape: (n_rows, n_features + 1)
     last column is expected value; first n_features are SHAP contributions.
     """
-    fig, ax = plt.subplots(figsize=(8.8, 3.4))
     try:
         from catboost import Pool
         pool = Pool(X)  # 1-row
         shap_vals = model.get_feature_importance(pool, type="ShapValues")
-        # shap_vals shape: (1, n_features+1)
         contrib = shap_vals[0, :-1]  # drop expected value
         s = pd.Series(contrib, index=X.columns)
-        # You don't want to talk about management level in the story
         s = s.drop(labels=["ManagementLevel"], errors="ignore")
-        # top by absolute contribution
         s = s.reindex(s.abs().sort_values(ascending=False).index).head(8)
         ax.barh(s.index[::-1], s.values[::-1])
-        ax.set_title("Top Drivers of This Prediction (True SHAP)")
         ax.set_xlabel("Impact on model log-odds (signed)")
         plt.tight_layout()
         return fig
     except Exception as e:
-        # If catboost isn't installed or something fails, show the error nicely
         ax.text(
             0.5, 0.55,
-            "True SHAP chart unavailable.\nInstall 'catboost' in requirements.txt.",
             ha="center", va="center", fontsize=10
         )
         ax.text(0.5, 0.40, f"Error: {str(e)[:150]}", ha="center", va="center", fontsize=9)
@@ -145,87 +163,125 @@ def make_catboost_shap_plot(X: pd.DataFrame):
 # =========================
 # Prediction
 # =========================
-def predict(Engagement, SupportiveGM, WellBeing, WorkEnvironment):
-    Engagement = clamp(Engagement)
-    SupportiveGM = clamp(SupportiveGM)
-    WellBeing = clamp(WellBeing)
-    WorkEnvironment = clamp(WorkEnvironment)
-    # Model needs hidden vars; keep them at stable values to keep the story focused
     vals = {
-        "Engagement": Engagement,
-        "SupportiveGM": SupportiveGM,
-        "ManagementLevel": 2,  # fixed constant, not shown
-        "WellBeing": WellBeing,
-        "Voice": CLUSTER_1["Voice"],
-        "DecisionAutonomy": CLUSTER_1["DecisionAutonomy"],
-        "Workload": CLUSTER_1["Workload"],
-        "WorkEnvironment": WorkEnvironment,
     }
     X = build_X(vals)
     p = prob_at_risk(X)
     headline = f"Predicted Status: {risk_label(p)}"
-    driver_fig = make_driver_plot(Engagement, SupportiveGM, WellBeing, WorkEnvironment)
     shap_fig = make_catboost_shap_plot(X)
-    return headline, driver_fig, shap_fig
-def apply_recommendation():
-    e = CLUSTER_1["Engagement"]
-    s = CLUSTER_1["SupportiveGM"]
-    w = CLUSTER_1["WellBeing"]
-    env = CLUSTER_1["WorkEnvironment"]
-    headline, driver_fig, shap_fig = predict(e, s, w, env)
-    return e, s, w, env, headline, driver_fig, shap_fig
 # =========================
 # UI Layout (no scrolling)
 # =========================
 CSS = """
 #app-wrap { max-width: 1200px; margin: 0 auto; }
-.compact .gr-markdown { margin-bottom: 0.4rem !important; }
 """
 with gr.Blocks(css=CSS) as demo:
     gr.Markdown(
         "<div id='app-wrap' class='compact'>"
-        "<h2>Retention Recommendation Simulator</h2>"
-        "<p style='margin-top:0;'>Adjust the 4 drivers and click <b>Predict</b>. "
-        "Click <b>Apply Recommendation Plan</b> to jump to the stable target.</p>"
         "</div>"
     )
     with gr.Row():
         # LEFT: sliders + buttons
-        with gr.Column(scale=5, min_width=420):
             Engagement = gr.Slider(1, 5, value=CLUSTER_3["Engagement"], step=0.01, label="Engagement")
             SupportiveGM = gr.Slider(1, 5, value=CLUSTER_3["SupportiveGM"], step=0.01, label="Supportive GM")
             WellBeing = gr.Slider(1, 5, value=CLUSTER_3["WellBeing"], step=0.01, label="Well-Being")
             WorkEnvironment = gr.Slider(1, 5, value=CLUSTER_3["WorkEnvironment"], step=0.01, label="Work Environment")
             with gr.Row():
                 btn_predict = gr.Button("Predict")
-                btn_recommend = gr.Button("Apply Recommendation Plan")
         # RIGHT: headline + two plots stacked
         with gr.Column(scale=7, min_width=520):
             headline = gr.Textbox(label="Result", value="", interactive=False)
-            driver_plot = gr.Plot(label="Key Drivers vs Stable Threshold")
-            shap_plot = gr.Plot(label="True SHAP (CatBoost)")
     btn_predict.click(
         fn=predict,
-        inputs=[Engagement, SupportiveGM, WellBeing, WorkEnvironment],
-        outputs=[headline, driver_plot, shap_plot],
     )
-    btn_recommend.click(
-        fn=apply_recommendation,
         inputs=[],
-        outputs=[Engagement, SupportiveGM, WellBeing, WorkEnvironment, headline, driver_plot, shap_plot],
     )
 demo.launch()

 # =========================
 # Load model (CatBoostClassifier saved via joblib)
 # =========================
+model = joblib.load("cat (1).joblib")
 FEATURES = [
     "Engagement",
     "Engagement": 4.9324,
 }
+CLUSTER_2 = {
+    "Voice": 3.94,
+    "DecisionAutonomy": 4.24,
+    "Workload": 3.76,
+    "WellBeing": 4.0251,
+    "WorkEnvironment": 4.1484,
+    "SupportiveGM": 4.1275,
+    "Engagement": 4.2828,
+}
 CLUSTER_3 = {
     "Voice": 2.39,
     "DecisionAutonomy": 3.55,
     "Engagement": 3.3909,
 }
+# You asked: "MAKE all THE VARS the key drivers" (we treat all survey vars as drivers)
+ALL_DRIVER_VARS = [
+    "Engagement",
+    "SupportiveGM",
+    "WellBeing",
+    "WorkEnvironment",
+    "Voice",
+    "DecisionAutonomy",
+    "Workload",
+]
+ALL_DRIVER_LABELS = [
+    "Engagement",
+    "Supportive GM",
+    "Well-Being",
+    "Work Environment",
+    "Voice",
+    "Decision Autonomy",
+    "Workload",
+]
 # =========================
 # Helpers
 # =========================
+def clamp_1_5(x):
     return max(1.0, min(5.0, float(x)))
 def build_X(vals: dict) -> pd.DataFrame:
     idx = classes.index(1)  # class 1 = At Risk
     return float(probs[idx])
+def risk_label(p: float) -> str:
     return "At Risk" if p >= 0.5 else "Not At Risk"
 # =========================
+# Plot: "Average of key drivers" (shows ALL driver vars)
 # =========================
+def make_driver_plot(driver_vals: dict):
+    values = [driver_vals[v] for v in ALL_DRIVER_VARS]
+    fig, ax = plt.subplots(figsize=(8.8, 3.2))
+    ax.bar(ALL_DRIVER_LABELS, values)
     ax.set_ylim(1, 5.4)
     ax.set_yticks([1, 2, 3, 4, 5])
     ax.set_ylabel("Score (1–5)")
+    ax.set_title("Average of key drivers")
+    ax.margins(x=0.08)
     plt.tight_layout()
+    plt.subplots_adjust(bottom=0.28)
     return fig
 # =========================
+# Plot: TRUE SHAP using CatBoost native SHAP values
 # =========================
 def make_catboost_shap_plot(X: pd.DataFrame):
     """
     returns array shape: (n_rows, n_features + 1)
     last column is expected value; first n_features are SHAP contributions.
     """
+    fig, ax = plt.subplots(figsize=(8.8, 3.2))
     try:
         from catboost import Pool
         pool = Pool(X)  # 1-row
         shap_vals = model.get_feature_importance(pool, type="ShapValues")
         contrib = shap_vals[0, :-1]  # drop expected value
         s = pd.Series(contrib, index=X.columns)
+        # Keep SHAP focused on survey drivers (exclude ManagementLevel)
         s = s.drop(labels=["ManagementLevel"], errors="ignore")
+        # Top 8 by absolute contribution
         s = s.reindex(s.abs().sort_values(ascending=False).index).head(8)
         ax.barh(s.index[::-1], s.values[::-1])
+        ax.set_title("Feature Importance (Shap)")
         ax.set_xlabel("Impact on model log-odds (signed)")
         plt.tight_layout()
         return fig
     except Exception as e:
         ax.text(
             0.5, 0.55,
+            "SHAP chart unavailable.\nInstall 'catboost' in requirements.txt.",
             ha="center", va="center", fontsize=10
         )
         ax.text(0.5, 0.40, f"Error: {str(e)[:150]}", ha="center", va="center", fontsize=9)
 # =========================
 # Prediction
 # =========================
+def predict(
+    Engagement,
+    SupportiveGM,
+    WellBeing,
+    WorkEnvironment,
+    Voice,
+    DecisionAutonomy,
+    Workload,
+):
+    # Clamp sliders
+    driver_vals = {
+        "Engagement": clamp_1_5(Engagement),
+        "SupportiveGM": clamp_1_5(SupportiveGM),
+        "WellBeing": clamp_1_5(WellBeing),
+        "WorkEnvironment": clamp_1_5(WorkEnvironment),
+        "Voice": clamp_1_5(Voice),
+        "DecisionAutonomy": clamp_1_5(DecisionAutonomy),
+        "Workload": clamp_1_5(Workload),
+    }
+    # Build model row (ManagementLevel fixed internally)
     vals = {
+        **driver_vals,
+        "ManagementLevel": 2,
     }
     X = build_X(vals)
     p = prob_at_risk(X)
     headline = f"Predicted Status: {risk_label(p)}"
+    drivers_fig = make_driver_plot(driver_vals)
     shap_fig = make_catboost_shap_plot(X)
+    return headline, drivers_fig, shap_fig
+# =========================
+# Button: At risk group = average of Cluster 1 and Cluster 2 (as you requested)
+# =========================
+def at_risk_group():
+    avg = {}
+    for v in ALL_DRIVER_VARS:
+        avg[v] = (CLUSTER_1[v] + CLUSTER_2[v]) / 2.0
+    headline, drivers_fig, shap_fig = predict(
+        avg["Engagement"],
+        avg["SupportiveGM"],
+        avg["WellBeing"],
+        avg["WorkEnvironment"],
+        avg["Voice"],
+        avg["DecisionAutonomy"],
+        avg["Workload"],
+    )
+    # Return slider updates + outputs
+    return (
+        avg["Engagement"],
+        avg["SupportiveGM"],
+        avg["WellBeing"],
+        avg["WorkEnvironment"],
+        avg["Voice"],
+        avg["DecisionAutonomy"],
+        avg["Workload"],
+        headline,
+        drivers_fig,
+        shap_fig,
+    )
 # =========================
 # UI Layout (no scrolling)
 # =========================
 CSS = """
 #app-wrap { max-width: 1200px; margin: 0 auto; }
+.compact .gr-markdown { margin-bottom: 0.35rem !important; }
 """
 with gr.Blocks(css=CSS) as demo:
     gr.Markdown(
         "<div id='app-wrap' class='compact'>"
+        "<h2>Retention Simulator</h2>"
+        "<p style='margin-top:0;'>Adjust all drivers and click <b>Predict</b>. "
+        "Click <b>At risk group</b> to load the average of Cluster 1 and Cluster 2.</p>"
         "</div>"
     )
     with gr.Row():
         # LEFT: sliders + buttons
+        with gr.Column(scale=5, min_width=430):
+            # Default starting point: Cluster 3 (most at-risk)
             Engagement = gr.Slider(1, 5, value=CLUSTER_3["Engagement"], step=0.01, label="Engagement")
             SupportiveGM = gr.Slider(1, 5, value=CLUSTER_3["SupportiveGM"], step=0.01, label="Supportive GM")
             WellBeing = gr.Slider(1, 5, value=CLUSTER_3["WellBeing"], step=0.01, label="Well-Being")
             WorkEnvironment = gr.Slider(1, 5, value=CLUSTER_3["WorkEnvironment"], step=0.01, label="Work Environment")
+            Voice = gr.Slider(1, 5, value=CLUSTER_3["Voice"], step=0.01, label="Voice")
+            DecisionAutonomy = gr.Slider(1, 5, value=CLUSTER_3["DecisionAutonomy"], step=0.01, label="Decision Autonomy")
+            Workload = gr.Slider(1, 5, value=CLUSTER_3["Workload"], step=0.01, label="Workload")
             with gr.Row():
                 btn_predict = gr.Button("Predict")
+                btn_atrisk = gr.Button("At risk group")
         # RIGHT: headline + two plots stacked
         with gr.Column(scale=7, min_width=520):
             headline = gr.Textbox(label="Result", value="", interactive=False)
+            drivers_plot = gr.Plot(label="Average of key drivers")
+            shap_plot = gr.Plot(label="Feature Importance (Shap)")
     btn_predict.click(
         fn=predict,
+        inputs=[Engagement, SupportiveGM, WellBeing, WorkEnvironment, Voice, DecisionAutonomy, Workload],
+        outputs=[headline, drivers_plot, shap_plot],
     )
+    btn_atrisk.click(
+        fn=at_risk_group,
         inputs=[],
+        outputs=[
+            Engagement, SupportiveGM, WellBeing, WorkEnvironment, Voice, DecisionAutonomy, Workload,
+            headline, drivers_plot, shap_plot
+        ],
     )
 demo.launch()