Spaces:

LianHP
/

propensity_score

Sleeping

App Files Files Community

LianHP commited on Nov 20, 2025

Commit

bddd8b3

verified ·

1 Parent(s): 26b2d75

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

.gradio/certificate.pem +31 -0
README.md +3 -9
app.py +344 -0
requirements.txt +4 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Propensity Score
-emoji: 🏆
-colorFrom: red
-colorTo: gray
-sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: propensity_score
 app_file: app.py
+sdk: gradio
+sdk_version: 5.47.2
 ---

app.py ADDED Viewed

	@@ -0,0 +1,344 @@

+import pandas as pd
+import numpy as np
+from sklearn.linear_model import LogisticRegression, LinearRegression
+import gradio as gr
+REQUIRED_COLS = [
+    "treatment",            # 0/1 (0 = control, 1 = new drug)
+    "outcome",              # 0/1 or continuous outcome
+    "age",
+    "sex",                  # 0/1 or M/F convertible
+    "baseline_risk_score",
+    "comorbidity_index",
+]
+def propensity_covariate_adjustment(file):
+    if file is None:
+        return "❌ Please upload a CSV file."
+    try:
+        df = pd.read_csv(file.name)
+    except Exception as e:
+        return f"❌ Error reading file: {e}"
+    # Check required columns
+    missing = [c for c in REQUIRED_COLS if c not in df.columns]
+    if missing:
+        return (
+            "❌ Missing required columns: "
+            + ", ".join(missing)
+            + f"\n\nYour columns: {list(df.columns)}"
+        )
+    # Make a copy to avoid warning issues
+    df = df.copy()
+    # Basic cleaning
+    # Ensure numeric types where needed
+    df["treatment"] = pd.to_numeric(df["treatment"], errors="coerce")
+    df["outcome"] = pd.to_numeric(df["outcome"], errors="coerce")
+    df["age"] = pd.to_numeric(df["age"], errors="coerce")
+    df["baseline_risk_score"] = pd.to_numeric(df["baseline_risk_score"], errors="coerce")
+    df["comorbidity_index"] = pd.to_numeric(df["comorbidity_index"], errors="coerce")
+    # Handle sex if it's "M"/"F"
+    if df["sex"].dtype == object:
+        df["sex"] = df["sex"].str.upper().map({"M": 0, "F": 1})
+    df["sex"] = pd.to_numeric(df["sex"], errors="coerce")
+    # Drop rows with any missing key values
+    df = df.dropna(subset=REQUIRED_COLS)
+    if df.shape[0] == 0:
+        return "❌ After cleaning, no valid rows remain. Please check your data."
+    # Crude (unadjusted) treatment effect: difference in mean outcome
+    treated = df[df["treatment"] == 1]
+    control = df[df["treatment"] == 0]
+    if treated.shape[0] == 0 or control.shape[0] == 0:
+        return "❌ Need both treated (treatment=1) and control (treatment=0) subjects."
+    crude_effect = treated["outcome"].mean() - control["outcome"].mean()
+    # ----------------------------
+    # Step 1: Propensity score model
+    # ----------------------------
+    X_ps = df[["age", "sex", "baseline_risk_score", "comorbidity_index"]]
+    y_treat = df["treatment"]
+    try:
+        ps_model = LogisticRegression(max_iter=1000)
+        ps_model.fit(X_ps, y_treat)
+    except Exception as e:
+        return f"❌ Error fitting propensity score model: {e}"
+    # Predicted propensity scores
+    df["propensity_score"] = ps_model.predict_proba(X_ps)[:, 1]
+    # ----------------------------
+    # Step 2: IPTW (Inverse Probability of Treatment Weighting)
+    # ----------------------------
+    # IPTW weights: treated = 1/PS, control = 1/(1-PS)
+    df["iptw_weight"] = np.where(
+        df["treatment"] == 1,
+        1.0 / df["propensity_score"],
+        1.0 / (1.0 - df["propensity_score"])
+    )
+    # Stabilized weights (optional but often used)
+    # p_treated = df["treatment"].mean()
+    # df["iptw_stabilized"] = np.where(
+    #     df["treatment"] == 1,
+    #     p_treated / df["propensity_score"],
+    #     (1 - p_treated) / (1.0 - df["propensity_score"])
+    # )
+    # Recalculate treated/control with updated df
+    treated = df[df["treatment"] == 1]
+    control = df[df["treatment"] == 0]
+    # Weighted means for outcomes
+    weighted_mean_outcome_treated = np.average(treated["outcome"], weights=treated["iptw_weight"])
+    weighted_mean_outcome_control = np.average(control["outcome"], weights=control["iptw_weight"])
+    iptw_effect = weighted_mean_outcome_treated - weighted_mean_outcome_control
+    # ----------------------------
+    # Step 3: Standardized Mean Differences (SMD)
+    # ----------------------------
+    def calculate_smd(mean1, mean2, std1, std2):
+        """Calculate standardized mean difference"""
+        pooled_std = np.sqrt((std1**2 + std2**2) / 2)
+        if pooled_std == 0:
+            return 0.0
+        return (mean1 - mean2) / pooled_std
+    def calculate_weighted_std(values, weights):
+        """Calculate weighted standard deviation"""
+        weighted_mean = np.average(values, weights=weights)
+        weighted_var = np.average((values - weighted_mean)**2, weights=weights)
+        return np.sqrt(weighted_var)
+    # Covariates to check balance for
+    covariates = ["age", "sex", "baseline_risk_score", "comorbidity_index", "propensity_score"]
+    smd_results = []
+    for cov in covariates:
+        # Before adjustment (unadjusted)
+        mean_treated_before = treated[cov].mean()
+        mean_control_before = control[cov].mean()
+        std_treated_before = treated[cov].std()
+        std_control_before = control[cov].std()
+        smd_before = calculate_smd(mean_treated_before, mean_control_before,
+                                   std_treated_before, std_control_before)
+        # After adjustment (IPTW weighted)
+        mean_treated_after = np.average(treated[cov], weights=treated["iptw_weight"])
+        mean_control_after = np.average(control[cov], weights=control["iptw_weight"])
+        std_treated_after = calculate_weighted_std(treated[cov], treated["iptw_weight"])
+        std_control_after = calculate_weighted_std(control[cov], control["iptw_weight"])
+        smd_after = calculate_smd(mean_treated_after, mean_control_after,
+                                 std_treated_after, std_control_after)
+        smd_results.append({
+            "Covariate": cov,
+            "Mean_Treated_Before": mean_treated_before,
+            "Mean_Control_Before": mean_control_before,
+            "SMD_Before": smd_before,
+            "Mean_Treated_After": mean_treated_after,
+            "Mean_Control_After": mean_control_after,
+            "SMD_After": smd_after
+        })
+    # Create balance table
+    balance_table = "| Covariate | Mean (Treated) Before | Mean (Control) Before | SMD Before | Mean (Treated) After | Mean (Control) After | SMD After |\n"
+    balance_table += "|-----------|----------------------|----------------------|------------|---------------------|---------------------|-----------|\n"
+    for r in smd_results:
+        balance_table += (
+            f"| {r['Covariate']} | {r['Mean_Treated_Before']:.3f} | {r['Mean_Control_Before']:.3f} | "
+            f"{r['SMD_Before']:.3f} | {r['Mean_Treated_After']:.3f} | {r['Mean_Control_After']:.3f} | "
+            f"{r['SMD_After']:.3f} |\n"
+        )
+    # ----------------------------
+    # Step 4: Covariate adjustment
+    # outcome ~ treatment + propensity_score
+    # ----------------------------
+    X_adj = df[["treatment", "propensity_score"]]
+    y_out = df["outcome"]
+    lin_model = LinearRegression()
+    lin_model.fit(X_adj, y_out)
+    # Coefficients: intercept + beta_treatment + beta_ps
+    intercept = lin_model.intercept_
+    beta_treat = lin_model.coef_[0]
+    beta_ps = lin_model.coef_[1]
+    # Summaries
+    avg_ps_treated = treated["propensity_score"].mean()
+    avg_ps_control = control["propensity_score"].mean()
+    avg_iptw_treated = treated["iptw_weight"].mean()
+    avg_iptw_control = control["iptw_weight"].mean()
+    n_treated = treated.shape[0]
+    n_control = control.shape[0]
+    text = f"""
+# Propensity Score Covariate Adjustment – Drug Development Example
+## 1. Data Summary
+- Number of patients: **{df.shape[0]}**
+- Treated (new drug): **{n_treated}**
+- Control (standard of care): **{n_control}**
+Outcome is interpreted as:
+- 1 = event of interest (e.g., progression-free at 12 months)
+- 0 = no event (e.g., progressed or not progression-free)
+---
+## 2. Crude (Unadjusted) Treatment Effect
+Unadjusted difference in mean outcome:
+- Mean outcome (treated): **{treated["outcome"].mean():.3f}**
+- Mean outcome (control): **{control["outcome"].mean():.3f}**
+**Crude effect (treated - control):** **{crude_effect:.3f}**
+This ignores all baseline differences between the two groups.
+---
+## 3. Propensity Score Model
+We fit a logistic regression to estimate the probability of receiving the new drug:
+**P(treatment=1 | age, sex, baseline_risk_score, comorbidity_index)**
+Average estimated propensity scores:
+- Treated group: **{avg_ps_treated:.3f}**
+- Control group: **{avg_ps_control:.3f}**
+A big difference here indicates some baseline imbalance in who gets treated.
+---
+## 4. Standardized Mean Differences (Balance Table)
+Standardized Mean Differences (SMD) measure the balance of covariates between treated and control groups.
+SMD < 0.1 is generally considered well-balanced. SMD < 0.25 is often acceptable.
+**Balance Before vs After IPTW Weighting:**
+{balance_table}
+**Interpretation:**
+- SMD values closer to 0 indicate better balance
+- After IPTW weighting, SMDs should be reduced, indicating improved balance
+- The propensity score itself is included as a check on the propensity model
+---
+## 5. IPTW (Inverse Probability of Treatment Weighting)
+We calculate IPTW weights as:
+- **Treated subjects:** w = 1 / propensity_score
+- **Control subjects:** w = 1 / (1 - propensity_score)
+Average IPTW weights:
+- Treated group: **{avg_iptw_treated:.3f}**
+- Control group: **{avg_iptw_control:.3f}**
+### Weighted Outcome Means
+- Weighted mean outcome (treated): **{weighted_mean_outcome_treated:.3f}**
+- Weighted mean outcome (control): **{weighted_mean_outcome_control:.3f}**
+**IPTW-adjusted effect (treated - control):** **{iptw_effect:.3f}**
+This is the treatment effect estimated using IPTW weighting to balance the groups.
+---
+## 6. Covariate Adjustment Using Propensity Scores
+We also fit a linear regression:
+**outcome ~ treatment + propensity_score**
+- Intercept: **{intercept:.3f}**
+- Coefficient on treatment (adjusted effect): **{beta_treat:.3f}**
+- Coefficient on propensity score: **{beta_ps:.3f}**
+**Interpretation:**
+- The **crude effect** shows what happens if we just compare treated vs control.
+- The **IPTW-adjusted effect** uses weighting to create a pseudo-population with balanced covariates.
+- The **regression-adjusted effect** (coefficient on treatment) estimates the treatment effect
+  **after controlling for baseline covariates via the propensity score** in a regression model.
+Both methods (IPTW and regression adjustment) should give similar results if the model is correctly specified.
+---
+## Summary of Treatment Effects
+| Method | Treatment Effect |
+|--------|------------------|
+| Crude (unadjusted) | **{crude_effect:.3f}** |
+| IPTW-weighted | **{iptw_effect:.3f}** |
+| Regression-adjusted | **{beta_treat:.3f}** |
+In a real drug development / RWE setting, you might:
+- Use more covariates (labs, performance status, biomarkers)
+- Use logistic or survival models for the outcome
+- Compute confidence intervals and p-values
+- Combine IPTW with regression adjustment (doubly robust estimation)
+This app demonstrates **propensity score-based covariate adjustment** and **IPTW weighting**.
+"""
+    return text
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+# Propensity Score Covariate Adjustment – Drug Development (Demo)
+Upload a CSV file with observational data comparing a **new drug** vs **standard of care**.
+### Required columns:
+- `treatment` (0 = control, 1 = new drug)
+- `outcome` (0/1 or continuous outcome)
+- `age`
+- `sex` (0/1 or M/F)
+- `baseline_risk_score`
+- `comorbidity_index`
+The app will:
+1. Estimate **propensity scores** with logistic regression
+2. Compute the **crude (unadjusted)** treatment effect
+3. Calculate **IPTW (Inverse Probability of Treatment Weighting)** and weighted means
+4. Compute **Standardized Mean Differences (SMD)** before vs after adjustment
+5. Fit an **outcome model** with outcome ~ treatment + propensity_score
+6. Report **propensity-adjusted treatment effect** and **IPTW-adjusted effect**
+"""
+    )
+    file_input = gr.File(label="Upload CSV")
+    run_button = gr.Button("Run Propensity Score Adjustment")
+    output_md = gr.Markdown()
+    run_button.click(
+        propensity_covariate_adjustment,
+        inputs=[file_input],
+        outputs=[output_md],
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pandas
+numpy
+scikit-learn
+gradio>=4.0.0