""" Compare CheXpert labeler vs NegBio labeler against manual ground truth labels from MIMIC-CXR-JPG test set. """ import pandas as pd import numpy as np from sklearn.metrics import ( accuracy_score, f1_score, precision_score, recall_score, classification_report, ) # ── Cấu hình — chỉnh 4 dòng này ────────────────────────────────────────────── CHEXPERT_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.0.0-chexpert.csv" NEGBIO_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.0.0-negbio.csv" GT_PATH = r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.1.0-test-set-labeled.csv" # Cách xử lý nhãn uncertain (-1): # "positive" → coi là có bệnh (mặc định, conservative) # "negative" → coi là không có bệnh # "drop" → bỏ hẳn các study có uncertain UNCERTAIN_STRATEGY = "negative" # ───────────────────────────────────────────────────────────────────────────── PATHOLOGIES = [ "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged Cardiomediastinum", "Fracture", "Lung Lesion", "Lung Opacity", "No Finding", "Pleural Effusion", "Pleural Other", "Pneumonia", "Pneumothorax", "Support Devices", ] def load_labels(path: str) -> pd.DataFrame: if path.endswith(".gz"): df = pd.read_csv(path, compression="gzip") else: df = pd.read_csv(path) return df def resolve_uncertain(df: pd.DataFrame, cols: list[str], strategy: str) -> pd.DataFrame: df = df.copy() if strategy == "positive": # -1 (uncertain) → 1 (treat as positive, conservative) for c in cols: df[c] = df[c].replace(-1.0, 1.0) elif strategy == "negative": # -1 (uncertain) → 0 (treat as negative) for c in cols: df[c] = df[c].replace(-1.0, 0.0) elif strategy == "drop": # drop rows that have any uncertain label mask = (df[cols] == -1.0).any(axis=1) df = df[~mask] else: raise ValueError(f"Unknown strategy: {strategy}") # NaN (not mentioned) → 0 df[cols] = df[cols].fillna(0.0).clip(0, 1).astype(int) return df def available_pathologies(df: pd.DataFrame) -> list[str]: return [p for p in PATHOLOGIES if p in df.columns] def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray, label: str) -> dict: return { "tool": label, "macro_f1": f1_score(y_true, y_pred, average="macro", zero_division=0), "micro_f1": f1_score(y_true, y_pred, average="micro", zero_division=0), "accuracy": accuracy_score(y_true, y_pred), "macro_prec": precision_score(y_true, y_pred, average="macro", zero_division=0), "macro_rec": recall_score(y_true, y_pred, average="macro", zero_division=0), } def per_pathology_f1(y_true: np.ndarray, y_pred: np.ndarray, cols: list[str]) -> pd.Series: scores = {} for i, c in enumerate(cols): scores[c] = f1_score(y_true[:, i], y_pred[:, i], zero_division=0) return pd.Series(scores) def main(): print("Loading files...") gt = load_labels(GT_PATH) chx = load_labels(CHEXPERT_PATH) neg = load_labels(NEGBIO_PATH) print(f" Ground truth : {len(gt):,} studies") print(f" CheXpert : {len(chx):,} studies") print(f" NegBio : {len(neg):,} studies") # Align on study_id gt = gt.set_index("study_id") chx = chx.set_index("study_id") neg = neg.set_index("study_id") common_idx = gt.index.intersection(chx.index).intersection(neg.index) print(f"\n Studies in all three : {len(common_idx):,}") gt = gt.loc[common_idx] chx = chx.loc[common_idx] neg = neg.loc[common_idx] # Determine shared pathology columns cols = [p for p in PATHOLOGIES if p in gt.columns and p in chx.columns and p in neg.columns] print(f" Pathologies evaluated: {len(cols)}") print(f" {cols}\n") # Ground truth: NaN → 0, clip to binary gt_clean = gt[cols].fillna(0.0).clip(0, 1).astype(int) # Auto-labelers: resolve uncertain then binarise chx_clean = resolve_uncertain(chx[cols].copy(), cols, UNCERTAIN_STRATEGY) neg_clean = resolve_uncertain(neg[cols].copy(), cols, UNCERTAIN_STRATEGY) # If strategy==drop, gt must also be filtered to matching rows if UNCERTAIN_STRATEGY == "drop": shared = chx_clean.index.intersection(neg_clean.index) gt_clean = gt_clean.loc[shared] chx_clean = chx_clean.loc[shared] neg_clean = neg_clean.loc[shared] print(f" After dropping uncertain rows: {len(shared):,} studies remain\n") Y_true = gt_clean.values Y_chx = chx_clean.values Y_neg = neg_clean.values # ── Overall metrics ──────────────────────────────────────────────────────── res_chx = compute_metrics(Y_true, Y_chx, "CheXpert") res_neg = compute_metrics(Y_true, Y_neg, "NegBio") summary = pd.DataFrame([res_chx, res_neg]).set_index("tool") print("=" * 60) print("OVERALL METRICS (uncertain strategy: '{}')".format(UNCERTAIN_STRATEGY)) print("=" * 60) print(summary.to_string(float_format="{:.4f}".format)) winner = "CheXpert" if res_chx["macro_f1"] > res_neg["macro_f1"] else "NegBio" diff = abs(res_chx["macro_f1"] - res_neg["macro_f1"]) print(f"\n→ Better labeler (macro-F1): {winner} (Δ = {diff:.4f})") # ── Per-pathology F1 ────────────────────────────────────────────────────── f1_chx = per_pathology_f1(Y_true, Y_chx, cols) f1_neg = per_pathology_f1(Y_true, Y_neg, cols) per_path = pd.DataFrame({ "CheXpert_F1": f1_chx, "NegBio_F1": f1_neg, "Winner": np.where(f1_chx >= f1_neg, "CheXpert", "NegBio"), "Δ": (f1_chx - f1_neg).round(4), }) print("\n" + "=" * 60) print("PER-PATHOLOGY F1") print("=" * 60) print(per_path.to_string(float_format="{:.4f}".format)) chx_wins = (per_path["Winner"] == "CheXpert").sum() neg_wins = (per_path["Winner"] == "NegBio").sum() print(f"\nPathology wins → CheXpert: {chx_wins} | NegBio: {neg_wins}") # ── Detailed classification report ──────────────────────────────────────── print("\n" + "=" * 60) print("CLASSIFICATION REPORT — CheXpert") print("=" * 60) print(classification_report(Y_true, Y_chx, target_names=cols, zero_division=0)) print("=" * 60) print("CLASSIFICATION REPORT — NegBio") print("=" * 60) print(classification_report(Y_true, Y_neg, target_names=cols, zero_division=0)) # ── Save results ────────────────────────────────────────────────────────── out_path = "dev/labeler_comparison.csv" per_path.to_csv(out_path) print(f"Per-pathology results saved to {out_path}") if __name__ == "__main__": main()