Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.linear_model import LogisticRegression, LinearRegression | |
| import gradio as gr | |
| REQUIRED_COLS = [ | |
| "treatment", # 0/1 (0 = control, 1 = new drug) | |
| "outcome", # 0/1 or continuous outcome | |
| "age", | |
| "sex", # 0/1 or M/F convertible | |
| "baseline_risk_score", | |
| "comorbidity_index", | |
| ] | |
| def propensity_covariate_adjustment(file): | |
| if file is None: | |
| return "β Please upload a CSV file." | |
| try: | |
| df = pd.read_csv(file.name) | |
| except Exception as e: | |
| return f"β Error reading file: {e}" | |
| # Check required columns | |
| missing = [c for c in REQUIRED_COLS if c not in df.columns] | |
| if missing: | |
| return ( | |
| "β Missing required columns: " | |
| + ", ".join(missing) | |
| + f"\n\nYour columns: {list(df.columns)}" | |
| ) | |
| # Make a copy to avoid warning issues | |
| df = df.copy() | |
| # Basic cleaning | |
| # Ensure numeric types where needed | |
| df["treatment"] = pd.to_numeric(df["treatment"], errors="coerce") | |
| df["outcome"] = pd.to_numeric(df["outcome"], errors="coerce") | |
| df["age"] = pd.to_numeric(df["age"], errors="coerce") | |
| df["baseline_risk_score"] = pd.to_numeric(df["baseline_risk_score"], errors="coerce") | |
| df["comorbidity_index"] = pd.to_numeric(df["comorbidity_index"], errors="coerce") | |
| # Handle sex if it's "M"/"F" | |
| if df["sex"].dtype == object: | |
| df["sex"] = df["sex"].str.upper().map({"M": 0, "F": 1}) | |
| df["sex"] = pd.to_numeric(df["sex"], errors="coerce") | |
| # Drop rows with any missing key values | |
| df = df.dropna(subset=REQUIRED_COLS) | |
| if df.shape[0] == 0: | |
| return "β After cleaning, no valid rows remain. Please check your data." | |
| # Crude (unadjusted) treatment effect: difference in mean outcome | |
| treated = df[df["treatment"] == 1] | |
| control = df[df["treatment"] == 0] | |
| if treated.shape[0] == 0 or control.shape[0] == 0: | |
| return "β Need both treated (treatment=1) and control (treatment=0) subjects." | |
| crude_effect = treated["outcome"].mean() - control["outcome"].mean() | |
| # ---------------------------- | |
| # Step 1: Propensity score model | |
| # ---------------------------- | |
| X_ps = df[["age", "sex", "baseline_risk_score", "comorbidity_index"]] | |
| y_treat = df["treatment"] | |
| try: | |
| ps_model = LogisticRegression(max_iter=1000) | |
| ps_model.fit(X_ps, y_treat) | |
| except Exception as e: | |
| return f"β Error fitting propensity score model: {e}" | |
| # Predicted propensity scores | |
| df["propensity_score"] = ps_model.predict_proba(X_ps)[:, 1] | |
| # ---------------------------- | |
| # Step 2: IPTW (Inverse Probability of Treatment Weighting) | |
| # ---------------------------- | |
| # IPTW weights: treated = 1/PS, control = 1/(1-PS) | |
| df["iptw_weight"] = np.where( | |
| df["treatment"] == 1, | |
| 1.0 / df["propensity_score"], | |
| 1.0 / (1.0 - df["propensity_score"]) | |
| ) | |
| # Stabilized weights (optional but often used) | |
| # p_treated = df["treatment"].mean() | |
| # df["iptw_stabilized"] = np.where( | |
| # df["treatment"] == 1, | |
| # p_treated / df["propensity_score"], | |
| # (1 - p_treated) / (1.0 - df["propensity_score"]) | |
| # ) | |
| # Recalculate treated/control with updated df | |
| treated = df[df["treatment"] == 1] | |
| control = df[df["treatment"] == 0] | |
| # Weighted means for outcomes | |
| weighted_mean_outcome_treated = np.average(treated["outcome"], weights=treated["iptw_weight"]) | |
| weighted_mean_outcome_control = np.average(control["outcome"], weights=control["iptw_weight"]) | |
| iptw_effect = weighted_mean_outcome_treated - weighted_mean_outcome_control | |
| # ---------------------------- | |
| # Step 3: Standardized Mean Differences (SMD) | |
| # ---------------------------- | |
| def calculate_smd(mean1, mean2, std1, std2): | |
| """Calculate standardized mean difference""" | |
| pooled_std = np.sqrt((std1**2 + std2**2) / 2) | |
| if pooled_std == 0: | |
| return 0.0 | |
| return (mean1 - mean2) / pooled_std | |
| def calculate_weighted_std(values, weights): | |
| """Calculate weighted standard deviation""" | |
| weighted_mean = np.average(values, weights=weights) | |
| weighted_var = np.average((values - weighted_mean)**2, weights=weights) | |
| return np.sqrt(weighted_var) | |
| # Covariates to check balance for | |
| covariates = ["age", "sex", "baseline_risk_score", "comorbidity_index", "propensity_score"] | |
| smd_results = [] | |
| for cov in covariates: | |
| # Before adjustment (unadjusted) | |
| mean_treated_before = treated[cov].mean() | |
| mean_control_before = control[cov].mean() | |
| std_treated_before = treated[cov].std() | |
| std_control_before = control[cov].std() | |
| smd_before = calculate_smd(mean_treated_before, mean_control_before, | |
| std_treated_before, std_control_before) | |
| # After adjustment (IPTW weighted) | |
| mean_treated_after = np.average(treated[cov], weights=treated["iptw_weight"]) | |
| mean_control_after = np.average(control[cov], weights=control["iptw_weight"]) | |
| std_treated_after = calculate_weighted_std(treated[cov], treated["iptw_weight"]) | |
| std_control_after = calculate_weighted_std(control[cov], control["iptw_weight"]) | |
| smd_after = calculate_smd(mean_treated_after, mean_control_after, | |
| std_treated_after, std_control_after) | |
| smd_results.append({ | |
| "Covariate": cov, | |
| "Mean_Treated_Before": mean_treated_before, | |
| "Mean_Control_Before": mean_control_before, | |
| "SMD_Before": smd_before, | |
| "Mean_Treated_After": mean_treated_after, | |
| "Mean_Control_After": mean_control_after, | |
| "SMD_After": smd_after | |
| }) | |
| # Create balance table | |
| balance_table = "| Covariate | Mean (Treated) Before | Mean (Control) Before | SMD Before | Mean (Treated) After | Mean (Control) After | SMD After |\n" | |
| balance_table += "|-----------|----------------------|----------------------|------------|---------------------|---------------------|-----------|\n" | |
| for r in smd_results: | |
| balance_table += ( | |
| f"| {r['Covariate']} | {r['Mean_Treated_Before']:.3f} | {r['Mean_Control_Before']:.3f} | " | |
| f"{r['SMD_Before']:.3f} | {r['Mean_Treated_After']:.3f} | {r['Mean_Control_After']:.3f} | " | |
| f"{r['SMD_After']:.3f} |\n" | |
| ) | |
| # ---------------------------- | |
| # Step 4: Covariate adjustment | |
| # outcome ~ treatment + propensity_score | |
| # ---------------------------- | |
| X_adj = df[["treatment", "propensity_score"]] | |
| y_out = df["outcome"] | |
| lin_model = LinearRegression() | |
| lin_model.fit(X_adj, y_out) | |
| # Coefficients: intercept + beta_treatment + beta_ps | |
| intercept = lin_model.intercept_ | |
| beta_treat = lin_model.coef_[0] | |
| beta_ps = lin_model.coef_[1] | |
| # Summaries | |
| avg_ps_treated = treated["propensity_score"].mean() | |
| avg_ps_control = control["propensity_score"].mean() | |
| avg_iptw_treated = treated["iptw_weight"].mean() | |
| avg_iptw_control = control["iptw_weight"].mean() | |
| n_treated = treated.shape[0] | |
| n_control = control.shape[0] | |
| text = f""" | |
| # Propensity Score Covariate Adjustment β Drug Development Example | |
| ## 1. Data Summary | |
| - Number of patients: **{df.shape[0]}** | |
| - Treated (new drug): **{n_treated}** | |
| - Control (standard of care): **{n_control}** | |
| Outcome is interpreted as: | |
| - 1 = event of interest (e.g., progression-free at 12 months) | |
| - 0 = no event (e.g., progressed or not progression-free) | |
| --- | |
| ## 2. Crude (Unadjusted) Treatment Effect | |
| Unadjusted difference in mean outcome: | |
| - Mean outcome (treated): **{treated["outcome"].mean():.3f}** | |
| - Mean outcome (control): **{control["outcome"].mean():.3f}** | |
| **Crude effect (treated - control):** **{crude_effect:.3f}** | |
| This ignores all baseline differences between the two groups. | |
| --- | |
| ## 3. Propensity Score Model | |
| We fit a logistic regression to estimate the probability of receiving the new drug: | |
| **P(treatment=1 | age, sex, baseline_risk_score, comorbidity_index)** | |
| Average estimated propensity scores: | |
| - Treated group: **{avg_ps_treated:.3f}** | |
| - Control group: **{avg_ps_control:.3f}** | |
| A big difference here indicates some baseline imbalance in who gets treated. | |
| --- | |
| ## 4. Standardized Mean Differences (Balance Table) | |
| Standardized Mean Differences (SMD) measure the balance of covariates between treated and control groups. | |
| SMD < 0.1 is generally considered well-balanced. SMD < 0.25 is often acceptable. | |
| **Balance Before vs After IPTW Weighting:** | |
| {balance_table} | |
| **Interpretation:** | |
| - SMD values closer to 0 indicate better balance | |
| - After IPTW weighting, SMDs should be reduced, indicating improved balance | |
| - The propensity score itself is included as a check on the propensity model | |
| --- | |
| ## 5. IPTW (Inverse Probability of Treatment Weighting) | |
| We calculate IPTW weights as: | |
| - **Treated subjects:** w = 1 / propensity_score | |
| - **Control subjects:** w = 1 / (1 - propensity_score) | |
| Average IPTW weights: | |
| - Treated group: **{avg_iptw_treated:.3f}** | |
| - Control group: **{avg_iptw_control:.3f}** | |
| ### Weighted Outcome Means | |
| - Weighted mean outcome (treated): **{weighted_mean_outcome_treated:.3f}** | |
| - Weighted mean outcome (control): **{weighted_mean_outcome_control:.3f}** | |
| **IPTW-adjusted effect (treated - control):** **{iptw_effect:.3f}** | |
| This is the treatment effect estimated using IPTW weighting to balance the groups. | |
| --- | |
| ## 6. Covariate Adjustment Using Propensity Scores | |
| We also fit a linear regression: | |
| **outcome ~ treatment + propensity_score** | |
| - Intercept: **{intercept:.3f}** | |
| - Coefficient on treatment (adjusted effect): **{beta_treat:.3f}** | |
| - Coefficient on propensity score: **{beta_ps:.3f}** | |
| **Interpretation:** | |
| - The **crude effect** shows what happens if we just compare treated vs control. | |
| - The **IPTW-adjusted effect** uses weighting to create a pseudo-population with balanced covariates. | |
| - The **regression-adjusted effect** (coefficient on treatment) estimates the treatment effect | |
| **after controlling for baseline covariates via the propensity score** in a regression model. | |
| Both methods (IPTW and regression adjustment) should give similar results if the model is correctly specified. | |
| --- | |
| ## Summary of Treatment Effects | |
| | Method | Treatment Effect | | |
| |--------|------------------| | |
| | Crude (unadjusted) | **{crude_effect:.3f}** | | |
| | IPTW-weighted | **{iptw_effect:.3f}** | | |
| | Regression-adjusted | **{beta_treat:.3f}** | | |
| In a real drug development / RWE setting, you might: | |
| - Use more covariates (labs, performance status, biomarkers) | |
| - Use logistic or survival models for the outcome | |
| - Compute confidence intervals and p-values | |
| - Combine IPTW with regression adjustment (doubly robust estimation) | |
| This app demonstrates **propensity score-based covariate adjustment** and **IPTW weighting**. | |
| """ | |
| return text | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Propensity Score Covariate Adjustment β Drug Development (Demo) | |
| Upload a CSV file with observational data comparing a **new drug** vs **standard of care**. | |
| ### Required columns: | |
| - `treatment` (0 = control, 1 = new drug) | |
| - `outcome` (0/1 or continuous outcome) | |
| - `age` | |
| - `sex` (0/1 or M/F) | |
| - `baseline_risk_score` | |
| - `comorbidity_index` | |
| The app will: | |
| 1. Estimate **propensity scores** with logistic regression | |
| 2. Compute the **crude (unadjusted)** treatment effect | |
| 3. Calculate **IPTW (Inverse Probability of Treatment Weighting)** and weighted means | |
| 4. Compute **Standardized Mean Differences (SMD)** before vs after adjustment | |
| 5. Fit an **outcome model** with outcome ~ treatment + propensity_score | |
| 6. Report **propensity-adjusted treatment effect** and **IPTW-adjusted effect** | |
| """ | |
| ) | |
| file_input = gr.File(label="Upload CSV") | |
| run_button = gr.Button("Run Propensity Score Adjustment") | |
| output_md = gr.Markdown() | |
| run_button.click( | |
| propensity_covariate_adjustment, | |
| inputs=[file_input], | |
| outputs=[output_md], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |