| """ |
| Compare CheXpert labeler vs NegBio labeler against manual ground truth labels |
| from MIMIC-CXR-JPG test set. |
| """ |
|
|
| import pandas as pd |
| import numpy as np |
| from sklearn.metrics import ( |
| accuracy_score, f1_score, precision_score, recall_score, |
| classification_report, |
| ) |
|
|
| |
| CHEXPERT_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.0.0-chexpert.csv" |
| NEGBIO_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.0.0-negbio.csv" |
| GT_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.1.0-test-set-labeled.csv" |
|
|
| |
| |
| |
| |
| UNCERTAIN_STRATEGY = "negative" |
| |
|
|
| PATHOLOGIES = [ |
| "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", |
| "Enlarged Cardiomediastinum", "Fracture", "Lung Lesion", "Lung Opacity", |
| "No Finding", "Pleural Effusion", "Pleural Other", "Pneumonia", |
| "Pneumothorax", "Support Devices", |
| ] |
|
|
|
|
| def load_labels(path: str) -> pd.DataFrame: |
| if path.endswith(".gz"): |
| df = pd.read_csv(path, compression="gzip") |
| else: |
| df = pd.read_csv(path) |
| return df |
|
|
|
|
| def resolve_uncertain(df: pd.DataFrame, cols: list[str], strategy: str) -> pd.DataFrame: |
| df = df.copy() |
| if strategy == "positive": |
| |
| for c in cols: |
| df[c] = df[c].replace(-1.0, 1.0) |
| elif strategy == "negative": |
| |
| for c in cols: |
| df[c] = df[c].replace(-1.0, 0.0) |
| elif strategy == "drop": |
| |
| mask = (df[cols] == -1.0).any(axis=1) |
| df = df[~mask] |
| else: |
| raise ValueError(f"Unknown strategy: {strategy}") |
| |
| df[cols] = df[cols].fillna(0.0).clip(0, 1).astype(int) |
| return df |
|
|
|
|
| def available_pathologies(df: pd.DataFrame) -> list[str]: |
| return [p for p in PATHOLOGIES if p in df.columns] |
|
|
|
|
| def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray, label: str) -> dict: |
| return { |
| "tool": label, |
| "macro_f1": f1_score(y_true, y_pred, average="macro", zero_division=0), |
| "micro_f1": f1_score(y_true, y_pred, average="micro", zero_division=0), |
| "accuracy": accuracy_score(y_true, y_pred), |
| "macro_prec": precision_score(y_true, y_pred, average="macro", zero_division=0), |
| "macro_rec": recall_score(y_true, y_pred, average="macro", zero_division=0), |
| } |
|
|
|
|
| def per_pathology_f1(y_true: np.ndarray, y_pred: np.ndarray, cols: list[str]) -> pd.Series: |
| scores = {} |
| for i, c in enumerate(cols): |
| scores[c] = f1_score(y_true[:, i], y_pred[:, i], zero_division=0) |
| return pd.Series(scores) |
|
|
|
|
| def main(): |
| print("Loading files...") |
| gt = load_labels(GT_PATH) |
| chx = load_labels(CHEXPERT_PATH) |
| neg = load_labels(NEGBIO_PATH) |
|
|
| print(f" Ground truth : {len(gt):,} studies") |
| print(f" CheXpert : {len(chx):,} studies") |
| print(f" NegBio : {len(neg):,} studies") |
|
|
| |
| gt = gt.set_index("study_id") |
| chx = chx.set_index("study_id") |
| neg = neg.set_index("study_id") |
|
|
| common_idx = gt.index.intersection(chx.index).intersection(neg.index) |
| print(f"\n Studies in all three : {len(common_idx):,}") |
|
|
| gt = gt.loc[common_idx] |
| chx = chx.loc[common_idx] |
| neg = neg.loc[common_idx] |
|
|
| |
| cols = [p for p in PATHOLOGIES if p in gt.columns and p in chx.columns and p in neg.columns] |
| print(f" Pathologies evaluated: {len(cols)}") |
| print(f" {cols}\n") |
|
|
| |
| gt_clean = gt[cols].fillna(0.0).clip(0, 1).astype(int) |
|
|
| |
| chx_clean = resolve_uncertain(chx[cols].copy(), cols, UNCERTAIN_STRATEGY) |
| neg_clean = resolve_uncertain(neg[cols].copy(), cols, UNCERTAIN_STRATEGY) |
|
|
| |
| if UNCERTAIN_STRATEGY == "drop": |
| shared = chx_clean.index.intersection(neg_clean.index) |
| gt_clean = gt_clean.loc[shared] |
| chx_clean = chx_clean.loc[shared] |
| neg_clean = neg_clean.loc[shared] |
| print(f" After dropping uncertain rows: {len(shared):,} studies remain\n") |
|
|
| Y_true = gt_clean.values |
| Y_chx = chx_clean.values |
| Y_neg = neg_clean.values |
|
|
| |
| res_chx = compute_metrics(Y_true, Y_chx, "CheXpert") |
| res_neg = compute_metrics(Y_true, Y_neg, "NegBio") |
|
|
| summary = pd.DataFrame([res_chx, res_neg]).set_index("tool") |
| print("=" * 60) |
| print("OVERALL METRICS (uncertain strategy: '{}')".format(UNCERTAIN_STRATEGY)) |
| print("=" * 60) |
| print(summary.to_string(float_format="{:.4f}".format)) |
|
|
| winner = "CheXpert" if res_chx["macro_f1"] > res_neg["macro_f1"] else "NegBio" |
| diff = abs(res_chx["macro_f1"] - res_neg["macro_f1"]) |
| print(f"\n→ Better labeler (macro-F1): {winner} (Δ = {diff:.4f})") |
|
|
| |
| f1_chx = per_pathology_f1(Y_true, Y_chx, cols) |
| f1_neg = per_pathology_f1(Y_true, Y_neg, cols) |
|
|
| per_path = pd.DataFrame({ |
| "CheXpert_F1": f1_chx, |
| "NegBio_F1": f1_neg, |
| "Winner": np.where(f1_chx >= f1_neg, "CheXpert", "NegBio"), |
| "Δ": (f1_chx - f1_neg).round(4), |
| }) |
|
|
| print("\n" + "=" * 60) |
| print("PER-PATHOLOGY F1") |
| print("=" * 60) |
| print(per_path.to_string(float_format="{:.4f}".format)) |
|
|
| chx_wins = (per_path["Winner"] == "CheXpert").sum() |
| neg_wins = (per_path["Winner"] == "NegBio").sum() |
| print(f"\nPathology wins → CheXpert: {chx_wins} | NegBio: {neg_wins}") |
|
|
| |
| print("\n" + "=" * 60) |
| print("CLASSIFICATION REPORT — CheXpert") |
| print("=" * 60) |
| print(classification_report(Y_true, Y_chx, target_names=cols, zero_division=0)) |
|
|
| print("=" * 60) |
| print("CLASSIFICATION REPORT — NegBio") |
| print("=" * 60) |
| print(classification_report(Y_true, Y_neg, target_names=cols, zero_division=0)) |
|
|
| |
| out_path = "dev/labeler_comparison.csv" |
| per_path.to_csv(out_path) |
| print(f"Per-pathology results saved to {out_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|