LianHP commited on
Commit
bddd8b3
·
verified ·
1 Parent(s): 26b2d75

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gradio/certificate.pem +31 -0
  2. README.md +3 -9
  3. app.py +344 -0
  4. requirements.txt +4 -0
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Propensity Score
3
- emoji: 🏆
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: propensity_score
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.47.2
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.linear_model import LogisticRegression, LinearRegression
4
+ import gradio as gr
5
+
6
+ REQUIRED_COLS = [
7
+ "treatment", # 0/1 (0 = control, 1 = new drug)
8
+ "outcome", # 0/1 or continuous outcome
9
+ "age",
10
+ "sex", # 0/1 or M/F convertible
11
+ "baseline_risk_score",
12
+ "comorbidity_index",
13
+ ]
14
+
15
+
16
+ def propensity_covariate_adjustment(file):
17
+ if file is None:
18
+ return "❌ Please upload a CSV file."
19
+
20
+ try:
21
+ df = pd.read_csv(file.name)
22
+ except Exception as e:
23
+ return f"❌ Error reading file: {e}"
24
+
25
+ # Check required columns
26
+ missing = [c for c in REQUIRED_COLS if c not in df.columns]
27
+ if missing:
28
+ return (
29
+ "❌ Missing required columns: "
30
+ + ", ".join(missing)
31
+ + f"\n\nYour columns: {list(df.columns)}"
32
+ )
33
+
34
+ # Make a copy to avoid warning issues
35
+ df = df.copy()
36
+
37
+ # Basic cleaning
38
+ # Ensure numeric types where needed
39
+ df["treatment"] = pd.to_numeric(df["treatment"], errors="coerce")
40
+ df["outcome"] = pd.to_numeric(df["outcome"], errors="coerce")
41
+ df["age"] = pd.to_numeric(df["age"], errors="coerce")
42
+ df["baseline_risk_score"] = pd.to_numeric(df["baseline_risk_score"], errors="coerce")
43
+ df["comorbidity_index"] = pd.to_numeric(df["comorbidity_index"], errors="coerce")
44
+
45
+ # Handle sex if it's "M"/"F"
46
+ if df["sex"].dtype == object:
47
+ df["sex"] = df["sex"].str.upper().map({"M": 0, "F": 1})
48
+ df["sex"] = pd.to_numeric(df["sex"], errors="coerce")
49
+
50
+ # Drop rows with any missing key values
51
+ df = df.dropna(subset=REQUIRED_COLS)
52
+ if df.shape[0] == 0:
53
+ return "❌ After cleaning, no valid rows remain. Please check your data."
54
+
55
+ # Crude (unadjusted) treatment effect: difference in mean outcome
56
+ treated = df[df["treatment"] == 1]
57
+ control = df[df["treatment"] == 0]
58
+
59
+ if treated.shape[0] == 0 or control.shape[0] == 0:
60
+ return "❌ Need both treated (treatment=1) and control (treatment=0) subjects."
61
+
62
+ crude_effect = treated["outcome"].mean() - control["outcome"].mean()
63
+
64
+ # ----------------------------
65
+ # Step 1: Propensity score model
66
+ # ----------------------------
67
+ X_ps = df[["age", "sex", "baseline_risk_score", "comorbidity_index"]]
68
+ y_treat = df["treatment"]
69
+
70
+ try:
71
+ ps_model = LogisticRegression(max_iter=1000)
72
+ ps_model.fit(X_ps, y_treat)
73
+ except Exception as e:
74
+ return f"❌ Error fitting propensity score model: {e}"
75
+
76
+ # Predicted propensity scores
77
+ df["propensity_score"] = ps_model.predict_proba(X_ps)[:, 1]
78
+
79
+ # ----------------------------
80
+ # Step 2: IPTW (Inverse Probability of Treatment Weighting)
81
+ # ----------------------------
82
+ # IPTW weights: treated = 1/PS, control = 1/(1-PS)
83
+ df["iptw_weight"] = np.where(
84
+ df["treatment"] == 1,
85
+ 1.0 / df["propensity_score"],
86
+ 1.0 / (1.0 - df["propensity_score"])
87
+ )
88
+
89
+ # Stabilized weights (optional but often used)
90
+ # p_treated = df["treatment"].mean()
91
+ # df["iptw_stabilized"] = np.where(
92
+ # df["treatment"] == 1,
93
+ # p_treated / df["propensity_score"],
94
+ # (1 - p_treated) / (1.0 - df["propensity_score"])
95
+ # )
96
+
97
+ # Recalculate treated/control with updated df
98
+ treated = df[df["treatment"] == 1]
99
+ control = df[df["treatment"] == 0]
100
+
101
+ # Weighted means for outcomes
102
+ weighted_mean_outcome_treated = np.average(treated["outcome"], weights=treated["iptw_weight"])
103
+ weighted_mean_outcome_control = np.average(control["outcome"], weights=control["iptw_weight"])
104
+ iptw_effect = weighted_mean_outcome_treated - weighted_mean_outcome_control
105
+
106
+ # ----------------------------
107
+ # Step 3: Standardized Mean Differences (SMD)
108
+ # ----------------------------
109
+ def calculate_smd(mean1, mean2, std1, std2):
110
+ """Calculate standardized mean difference"""
111
+ pooled_std = np.sqrt((std1**2 + std2**2) / 2)
112
+ if pooled_std == 0:
113
+ return 0.0
114
+ return (mean1 - mean2) / pooled_std
115
+
116
+ def calculate_weighted_std(values, weights):
117
+ """Calculate weighted standard deviation"""
118
+ weighted_mean = np.average(values, weights=weights)
119
+ weighted_var = np.average((values - weighted_mean)**2, weights=weights)
120
+ return np.sqrt(weighted_var)
121
+
122
+ # Covariates to check balance for
123
+ covariates = ["age", "sex", "baseline_risk_score", "comorbidity_index", "propensity_score"]
124
+
125
+ smd_results = []
126
+ for cov in covariates:
127
+ # Before adjustment (unadjusted)
128
+ mean_treated_before = treated[cov].mean()
129
+ mean_control_before = control[cov].mean()
130
+ std_treated_before = treated[cov].std()
131
+ std_control_before = control[cov].std()
132
+ smd_before = calculate_smd(mean_treated_before, mean_control_before,
133
+ std_treated_before, std_control_before)
134
+
135
+ # After adjustment (IPTW weighted)
136
+ mean_treated_after = np.average(treated[cov], weights=treated["iptw_weight"])
137
+ mean_control_after = np.average(control[cov], weights=control["iptw_weight"])
138
+ std_treated_after = calculate_weighted_std(treated[cov], treated["iptw_weight"])
139
+ std_control_after = calculate_weighted_std(control[cov], control["iptw_weight"])
140
+ smd_after = calculate_smd(mean_treated_after, mean_control_after,
141
+ std_treated_after, std_control_after)
142
+
143
+ smd_results.append({
144
+ "Covariate": cov,
145
+ "Mean_Treated_Before": mean_treated_before,
146
+ "Mean_Control_Before": mean_control_before,
147
+ "SMD_Before": smd_before,
148
+ "Mean_Treated_After": mean_treated_after,
149
+ "Mean_Control_After": mean_control_after,
150
+ "SMD_After": smd_after
151
+ })
152
+
153
+ # Create balance table
154
+ balance_table = "| Covariate | Mean (Treated) Before | Mean (Control) Before | SMD Before | Mean (Treated) After | Mean (Control) After | SMD After |\n"
155
+ balance_table += "|-----------|----------------------|----------------------|------------|---------------------|---------------------|-----------|\n"
156
+ for r in smd_results:
157
+ balance_table += (
158
+ f"| {r['Covariate']} | {r['Mean_Treated_Before']:.3f} | {r['Mean_Control_Before']:.3f} | "
159
+ f"{r['SMD_Before']:.3f} | {r['Mean_Treated_After']:.3f} | {r['Mean_Control_After']:.3f} | "
160
+ f"{r['SMD_After']:.3f} |\n"
161
+ )
162
+
163
+ # ----------------------------
164
+ # Step 4: Covariate adjustment
165
+ # outcome ~ treatment + propensity_score
166
+ # ----------------------------
167
+ X_adj = df[["treatment", "propensity_score"]]
168
+ y_out = df["outcome"]
169
+
170
+ lin_model = LinearRegression()
171
+ lin_model.fit(X_adj, y_out)
172
+
173
+ # Coefficients: intercept + beta_treatment + beta_ps
174
+ intercept = lin_model.intercept_
175
+ beta_treat = lin_model.coef_[0]
176
+ beta_ps = lin_model.coef_[1]
177
+
178
+ # Summaries
179
+ avg_ps_treated = treated["propensity_score"].mean()
180
+ avg_ps_control = control["propensity_score"].mean()
181
+ avg_iptw_treated = treated["iptw_weight"].mean()
182
+ avg_iptw_control = control["iptw_weight"].mean()
183
+
184
+ n_treated = treated.shape[0]
185
+ n_control = control.shape[0]
186
+
187
+ text = f"""
188
+ # Propensity Score Covariate Adjustment – Drug Development Example
189
+
190
+ ## 1. Data Summary
191
+
192
+ - Number of patients: **{df.shape[0]}**
193
+ - Treated (new drug): **{n_treated}**
194
+ - Control (standard of care): **{n_control}**
195
+
196
+ Outcome is interpreted as:
197
+ - 1 = event of interest (e.g., progression-free at 12 months)
198
+ - 0 = no event (e.g., progressed or not progression-free)
199
+
200
+ ---
201
+
202
+ ## 2. Crude (Unadjusted) Treatment Effect
203
+
204
+ Unadjusted difference in mean outcome:
205
+
206
+ - Mean outcome (treated): **{treated["outcome"].mean():.3f}**
207
+ - Mean outcome (control): **{control["outcome"].mean():.3f}**
208
+
209
+ **Crude effect (treated - control):** **{crude_effect:.3f}**
210
+
211
+ This ignores all baseline differences between the two groups.
212
+
213
+ ---
214
+
215
+ ## 3. Propensity Score Model
216
+
217
+ We fit a logistic regression to estimate the probability of receiving the new drug:
218
+
219
+ **P(treatment=1 | age, sex, baseline_risk_score, comorbidity_index)**
220
+
221
+ Average estimated propensity scores:
222
+
223
+ - Treated group: **{avg_ps_treated:.3f}**
224
+ - Control group: **{avg_ps_control:.3f}**
225
+
226
+ A big difference here indicates some baseline imbalance in who gets treated.
227
+
228
+ ---
229
+
230
+ ## 4. Standardized Mean Differences (Balance Table)
231
+
232
+ Standardized Mean Differences (SMD) measure the balance of covariates between treated and control groups.
233
+ SMD < 0.1 is generally considered well-balanced. SMD < 0.25 is often acceptable.
234
+
235
+ **Balance Before vs After IPTW Weighting:**
236
+
237
+ {balance_table}
238
+
239
+ **Interpretation:**
240
+ - SMD values closer to 0 indicate better balance
241
+ - After IPTW weighting, SMDs should be reduced, indicating improved balance
242
+ - The propensity score itself is included as a check on the propensity model
243
+
244
+ ---
245
+
246
+ ## 5. IPTW (Inverse Probability of Treatment Weighting)
247
+
248
+ We calculate IPTW weights as:
249
+ - **Treated subjects:** w = 1 / propensity_score
250
+ - **Control subjects:** w = 1 / (1 - propensity_score)
251
+
252
+ Average IPTW weights:
253
+ - Treated group: **{avg_iptw_treated:.3f}**
254
+ - Control group: **{avg_iptw_control:.3f}**
255
+
256
+ ### Weighted Outcome Means
257
+
258
+ - Weighted mean outcome (treated): **{weighted_mean_outcome_treated:.3f}**
259
+ - Weighted mean outcome (control): **{weighted_mean_outcome_control:.3f}**
260
+
261
+ **IPTW-adjusted effect (treated - control):** **{iptw_effect:.3f}**
262
+
263
+ This is the treatment effect estimated using IPTW weighting to balance the groups.
264
+
265
+ ---
266
+
267
+ ## 6. Covariate Adjustment Using Propensity Scores
268
+
269
+ We also fit a linear regression:
270
+
271
+ **outcome ~ treatment + propensity_score**
272
+
273
+ - Intercept: **{intercept:.3f}**
274
+ - Coefficient on treatment (adjusted effect): **{beta_treat:.3f}**
275
+ - Coefficient on propensity score: **{beta_ps:.3f}**
276
+
277
+ **Interpretation:**
278
+
279
+ - The **crude effect** shows what happens if we just compare treated vs control.
280
+ - The **IPTW-adjusted effect** uses weighting to create a pseudo-population with balanced covariates.
281
+ - The **regression-adjusted effect** (coefficient on treatment) estimates the treatment effect
282
+ **after controlling for baseline covariates via the propensity score** in a regression model.
283
+
284
+ Both methods (IPTW and regression adjustment) should give similar results if the model is correctly specified.
285
+
286
+ ---
287
+
288
+ ## Summary of Treatment Effects
289
+
290
+ | Method | Treatment Effect |
291
+ |--------|------------------|
292
+ | Crude (unadjusted) | **{crude_effect:.3f}** |
293
+ | IPTW-weighted | **{iptw_effect:.3f}** |
294
+ | Regression-adjusted | **{beta_treat:.3f}** |
295
+
296
+ In a real drug development / RWE setting, you might:
297
+ - Use more covariates (labs, performance status, biomarkers)
298
+ - Use logistic or survival models for the outcome
299
+ - Compute confidence intervals and p-values
300
+ - Combine IPTW with regression adjustment (doubly robust estimation)
301
+
302
+ This app demonstrates **propensity score-based covariate adjustment** and **IPTW weighting**.
303
+ """
304
+
305
+ return text
306
+
307
+
308
+ with gr.Blocks() as demo:
309
+ gr.Markdown(
310
+ """
311
+ # Propensity Score Covariate Adjustment – Drug Development (Demo)
312
+
313
+ Upload a CSV file with observational data comparing a **new drug** vs **standard of care**.
314
+
315
+ ### Required columns:
316
+ - `treatment` (0 = control, 1 = new drug)
317
+ - `outcome` (0/1 or continuous outcome)
318
+ - `age`
319
+ - `sex` (0/1 or M/F)
320
+ - `baseline_risk_score`
321
+ - `comorbidity_index`
322
+
323
+ The app will:
324
+ 1. Estimate **propensity scores** with logistic regression
325
+ 2. Compute the **crude (unadjusted)** treatment effect
326
+ 3. Calculate **IPTW (Inverse Probability of Treatment Weighting)** and weighted means
327
+ 4. Compute **Standardized Mean Differences (SMD)** before vs after adjustment
328
+ 5. Fit an **outcome model** with outcome ~ treatment + propensity_score
329
+ 6. Report **propensity-adjusted treatment effect** and **IPTW-adjusted effect**
330
+ """
331
+ )
332
+
333
+ file_input = gr.File(label="Upload CSV")
334
+ run_button = gr.Button("Run Propensity Score Adjustment")
335
+ output_md = gr.Markdown()
336
+
337
+ run_button.click(
338
+ propensity_covariate_adjustment,
339
+ inputs=[file_input],
340
+ outputs=[output_md],
341
+ )
342
+
343
+ if __name__ == "__main__":
344
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ gradio>=4.0.0