LianHP commited on
Commit
1613eae
·
verified ·
1 Parent(s): cf40bc6

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +110 -33
  2. shap_waterfall.png +0 -0
app.py CHANGED
@@ -5,66 +5,143 @@ import numpy as np
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
7
 
 
 
 
8
  # ---------------------------
9
- # TRAIN A TINY MODEL
10
  # ---------------------------
11
  np.random.seed(42)
 
 
12
  df = pd.DataFrame({
13
  "age": np.random.randint(20, 80, 200),
14
  "bmi": np.random.uniform(18, 35, 200),
15
  "steps": np.random.randint(2000, 14000, 200),
16
  })
17
- df["cost"] = df["age"]*20 + df["bmi"]*40 - df["steps"]*0.4 + np.random.normal(0,200,200)
18
 
19
- X = df[["age","bmi","steps"]]
 
 
 
 
 
 
 
 
 
 
20
  y = df["cost"]
21
 
22
- model = xgb.XGBRegressor(n_estimators=40, max_depth=3)
 
 
 
 
 
 
 
 
23
  model.fit(X, y)
24
 
25
- explainer = shap.Explainer(model, X)
 
 
 
 
 
26
 
27
  # ---------------------------
28
- # GRADIO INFERENCE FUNCTION
29
  # ---------------------------
30
- def explain_cost(age, bmi, steps):
31
- input_data = pd.DataFrame([{
 
 
 
 
 
 
 
32
  "age": age,
33
  "bmi": bmi,
34
  "steps": steps
35
  }])
36
 
37
- shap_values = explainer(input_data)
 
38
 
39
- # Create SHAP waterfall plot
40
- plt.figure(figsize=(8,6))
41
- shap.waterfall_plot(shap_values[0], show=False)
42
- plt.tight_layout()
43
 
44
- # Save plot to temporary file
45
- plot_path = "shap_plot.png"
46
- plt.savefig(plot_path)
 
 
 
 
 
 
47
  plt.close()
48
 
49
- pred = model.predict(input_data)[0]
50
- return pred, plot_path
51
 
52
 
53
  # ---------------------------
54
- # GRADIO UI
55
  # ---------------------------
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# SHAP Explainability Demo")
58
-
59
- age = gr.Slider(20, 80, value=40, label="Age")
60
- bmi = gr.Slider(18, 35, value=25, label="BMI")
61
- steps = gr.Slider(2000, 15000, value=8000, label="Daily Steps")
62
-
63
- btn = gr.Button("Explain Prediction")
64
-
65
- pred_output = gr.Number(label="Predicted Cost ($)")
66
- shap_output = gr.Image(label="SHAP Waterfall")
67
-
68
- btn.click(explain_cost, [age, bmi, steps], [pred_output, shap_output])
69
-
70
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
7
 
8
+ # Make sure matplotlib does not try to open any GUI backend
9
+ plt.switch_backend("Agg")
10
+
11
  # ---------------------------
12
+ # 1. CREATE SYNTHETIC DATA & TRAIN MODEL
13
  # ---------------------------
14
  np.random.seed(42)
15
+
16
+ # Simple synthetic dataset: "health-like" features and a fake cost
17
  df = pd.DataFrame({
18
  "age": np.random.randint(20, 80, 200),
19
  "bmi": np.random.uniform(18, 35, 200),
20
  "steps": np.random.randint(2000, 14000, 200),
21
  })
 
22
 
23
+ # Fake target: cost in dollars
24
+ df["cost"] = (
25
+ 10 * df["age"] +
26
+ 50 * (df["bmi"] - 25) -
27
+ 0.001 * df["steps"] +
28
+ np.random.normal(0, 50, size=len(df))
29
+ )
30
+
31
+ FEATURE_COLUMNS = ["age", "bmi", "steps"]
32
+
33
+ X = df[FEATURE_COLUMNS]
34
  y = df["cost"]
35
 
36
+ # Train a tiny XGBoost regressor
37
+ model = xgb.XGBRegressor(
38
+ n_estimators=40,
39
+ max_depth=3,
40
+ learning_rate=0.1,
41
+ subsample=0.8,
42
+ colsample_bytree=0.8,
43
+ random_state=42
44
+ )
45
  model.fit(X, y)
46
 
47
+ # ---------------------------
48
+ # 2. BUILD A SHAP TREE EXPLAINER (SAFE FOR TREE MODELS)
49
+ # ---------------------------
50
+ # DO NOT use shap.Explainer(model, X) -> causes TypeError on some setups
51
+ explainer = shap.TreeExplainer(model)
52
+
53
 
54
  # ---------------------------
55
+ # 3. FUNCTION TO EXPLAIN A SINGLE PREDICTION
56
  # ---------------------------
57
+ def explain_cost(age: float, bmi: float, steps: int):
58
+ """
59
+ Take user inputs, compute predicted cost, and generate a SHAP waterfall plot.
60
+ Returns:
61
+ - predicted cost (float)
62
+ - path to saved waterfall PNG (string)
63
+ """
64
+ # Build a single-row DataFrame with the same columns as training
65
+ input_df = pd.DataFrame([{
66
  "age": age,
67
  "bmi": bmi,
68
  "steps": steps
69
  }])
70
 
71
+ # Model prediction
72
+ pred = model.predict(input_df)[0]
73
 
74
+ # Compute SHAP values for this single instance
75
+ shap_explanation = explainer(input_df) # returns shap.Explanation object
 
 
76
 
77
+ # Make a waterfall plot for the first (and only) instance
78
+ plt.figure(figsize=(8, 5))
79
+ shap.plots.waterfall(shap_explanation[0], show=False)
80
+ plt.title("SHAP Waterfall Explanation", fontsize=12)
81
+
82
+ # Save plot to file for Gradio to show
83
+ output_path = "shap_waterfall.png"
84
+ plt.tight_layout()
85
+ plt.savefig(output_path, bbox_inches="tight")
86
  plt.close()
87
 
88
+ # Return prediction and image path
89
+ return float(pred), output_path
90
 
91
 
92
  # ---------------------------
93
+ # 4. GRADIO UI
94
  # ---------------------------
95
  with gr.Blocks() as demo:
96
+ gr.Markdown(
97
+ """
98
+ # SHAP Explainability Demo
99
+
100
+ This app trains a tiny XGBoost regression model on synthetic data
101
+ and explains each prediction using **SHAP TreeExplainer**.
102
+
103
+ Adjust the sliders and click **Explain Prediction** to see:
104
+ - The model's predicted cost
105
+ - A SHAP waterfall plot showing how each feature pushes the prediction
106
+ higher or lower relative to the model's base value.
107
+ """
108
+ )
109
+
110
+ with gr.Row():
111
+ age = gr.Slider(
112
+ minimum=20,
113
+ maximum=80,
114
+ value=40,
115
+ step=1,
116
+ label="Age"
117
+ )
118
+ bmi = gr.Slider(
119
+ minimum=18,
120
+ maximum=35,
121
+ value=25,
122
+ step=0.1,
123
+ label="BMI"
124
+ )
125
+ steps = gr.Slider(
126
+ minimum=2000,
127
+ maximum=15000,
128
+ value=8000,
129
+ step=500,
130
+ label="Daily Steps"
131
+ )
132
+
133
+ explain_button = gr.Button("Explain Prediction")
134
+
135
+ with gr.Row():
136
+ pred_output = gr.Number(label="Predicted Cost ($)")
137
+ shap_output = gr.Image(label="SHAP Waterfall", type="filepath")
138
+
139
+ explain_button.click(
140
+ fn=explain_cost,
141
+ inputs=[age, bmi, steps],
142
+ outputs=[pred_output, shap_output]
143
+ )
144
+
145
+ # For local debugging; on Hugging Face this is ignored but harmless
146
+ if __name__ == "__main__":
147
+ demo.launch(share=True)
shap_waterfall.png ADDED