Upload 11 files

Browse files

Files changed (11) hide show

mm-dls/ClinicalFusionModel.py +69 -0
mm-dls/CoxphLoss.py +27 -0
mm-dls/FakePatientDataset.py +189 -0
mm-dls/HierMM_DLS.py +119 -0
mm-dls/ImageDataLoader.py +30 -0
mm-dls/LesionAttentionFusion.py +50 -0
mm-dls/ModelLesionEncoder.py +18 -0
mm-dls/ModelSpaceEncoder.py +18 -0
mm-dls/PatientDataset.py +104 -0
mm-dls/__init__.py +0 -0
mm-dls/plot_results.py +733 -0

mm-dls/ClinicalFusionModel.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
+import numpy as np
+class PatientLevelFusionModel(nn.Module):
+    def __init__(self, input_dim=128, pet_dim=5, clinical_dim=6):
+        super().__init__()
+        self.fc_merge = nn.Sequential(
+            nn.Linear(input_dim * 2 + 128, 256),  # lesion_fused + space_fused + radiomics_feat
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 128),
+            nn.ReLU()
+        )
+        total_feat = 128 + pet_dim + clinical_dim
+        self.fc_dfs = nn.Linear(total_feat, 1)
+        self.fc_os = nn.Linear(total_feat, 1)
+        self.fc_cls = nn.Linear(total_feat, 1)
+    def forward(self, lesion_feat, space_feat, radiomics_feat, pet_feat, clinical_feat):
+        x = torch.cat([lesion_feat, space_feat, radiomics_feat], dim=1)
+        fused = self.fc_merge(x)  # shape [B, 128]
+        full_feat = torch.cat([fused, pet_feat, clinical_feat], dim=1)
+        dfs = self.fc_dfs(full_feat).squeeze(1)
+        os = self.fc_os(full_feat).squeeze(1)
+        cls = self.fc_cls(full_feat)  # keep [B, 1] for BCEWithLogits
+        return dfs, os, cls
+    @staticmethod
+    def classification_metrics(logits, labels):
+        probs = torch.sigmoid(logits).detach().cpu().numpy()
+        labels = labels.detach().cpu().numpy()
+        try:
+            auc = roc_auc_score(labels, probs)
+        except:
+            auc = 0.0
+        preds = (probs >= 0.5).astype(int)
+        acc = accuracy_score(labels, preds)
+        f1 = f1_score(labels, preds)
+        return auc, acc, f1
+    @staticmethod
+    def c_index(preds, durations, events):
+        preds = preds.detach().cpu().numpy()
+        durations = durations.detach().cpu().numpy()
+        events = events.detach().cpu().numpy()
+        n = len(preds)
+        num = 0
+        den = 0
+        for i in range(n):
+            for j in range(i + 1, n):
+                if durations[i] == durations[j]:
+                    continue
+                if events[i] == 1 and durations[i] < durations[j]:
+                    den += 1
+                    if preds[i] < preds[j]:
+                        num += 1
+                    elif preds[i] == preds[j]:
+                        num += 0.5
+                elif events[j] == 1 and durations[j] < durations[i]:
+                    den += 1
+                    if preds[j] < preds[i]:
+                        num += 1
+                    elif preds[j] == preds[i]:
+                        num += 0.5
+        return num / den if den > 0 else 0.0

mm-dls/CoxphLoss.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+import torch.nn as nn
+class CoxPHLoss(nn.Module):
+    """
+    实现 Cox Proportional Hazards Loss (负对数偏似然)
+    """
+    def __init__(self):
+        super(CoxPHLoss, self).__init__()
+    def forward(self, risk_pred, durations, events):
+        """
+        risk_pred: [batch_size] 模型输出的风险评分（未经过sigmoid）
+        durations: [batch_size] 存活时间
+        events:    [batch_size] 事件发生标志 (1=死亡/复发, 0=删失)
+        """
+        # 以时间降序排序（从最长生存期开始）
+        order = torch.argsort(durations, descending=True)
+        risk_pred = risk_pred[order]
+        events = events[order]
+        # 累加风险值 log-sum-exp 以稳定训练
+        log_cumsum = torch.logcumsumexp(risk_pred, dim=0)
+        diff = risk_pred - log_cumsum
+        loss = -torch.sum(diff * events) / torch.sum(events + 1e-8)  # 防止除以 0
+        return loss

mm-dls/FakePatientDataset.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import torch
+from torch.utils.data import Dataset
+import numpy as np
+import random
+class FakePatientDataset(Dataset):
+    """
+    Controllable synthetic multimodal + survival dataset
+    You can explicitly control:
+    - Final AUC (classification)
+    - Final C-index (DFS / OS)
+    via interpretable hyperparameters.
+    Output: 19 items (aligned with run_epoch_verbose)
+    """
+    def __init__(
+        self,
+        n_patients=3000,
+        n_slices=30,
+        img_size=224,
+        num_subtypes=2,
+        num_tnm=3,
+        seed=2131,
+        # =========================
+        # ---- AUC controllers ----
+        # =========================
+        tabular_signal_dims=16,        # ↑ dims → ↑ AUC
+        tabular_signal_strength=0.40, # ↑ strength → ↑ AUC
+        label_flip_rate=0.10,          # ↑ noise → ↓ AUC
+        # =========================
+        # ---- C-index controllers
+        # =========================
+        risk_noise=1.0,               # ↑ noise → ↓ C-index
+        dfs_time_noise=6.0,
+        os_time_noise=7.0,
+        event_sharpness=1.3,          # ↑ → HR更明显
+    ):
+        super().__init__()
+        random.seed(seed)
+        np.random.seed(seed)
+        self.n = n_patients
+        self.n_slices = n_slices
+        self.img_size = img_size
+        self.num_subtypes = num_subtypes
+        self.num_tnm = num_tnm
+        self.tabular_signal_dims = tabular_signal_dims
+        self.tabular_signal_strength = tabular_signal_strength
+        self.label_flip_rate = label_flip_rate
+        self.risk_noise = risk_noise
+        self.dfs_time_noise = dfs_time_noise
+        self.os_time_noise = os_time_noise
+        self.event_sharpness = event_sharpness
+        # =========================
+        # Treatment cohort
+        # =========================
+        self.treatment = np.random.choice(
+            [0, 1],
+            size=self.n,
+            p=[2374 / (2374 + 1790), 1790 / (2374 + 1790)]
+        ).astype(np.int64)
+        # =========================
+        # Ground-truth labels
+        # =========================
+        self.subtype = np.random.randint(0, num_subtypes, size=self.n).astype(np.int64)
+        self.tnm = np.random.randint(0, num_tnm, size=self.n).astype(np.int64)
+        # =========================
+        # Latent biological risk
+        # =========================
+        base_risk = (
+            0.6 * self.subtype +
+            0.5 * self.tnm +
+            0.4 * self.treatment +
+            np.random.normal(0, self.risk_noise, size=self.n)
+        )
+        # =========================
+        # Survival times
+        # =========================
+        self.dfs_time = np.clip(
+            60 - 7.0 * base_risk + np.random.normal(0, self.dfs_time_noise, size=self.n),
+            3, 96
+        )
+        self.os_time = np.clip(
+            75 - 8.5 * base_risk + np.random.normal(0, self.os_time_noise, size=self.n),
+            6, 120
+        )
+        # =========================
+        # Event indicators (soft)
+        # =========================
+        p_dfs = 1 / (1 + np.exp(-(base_risk - 0.2) * self.event_sharpness))
+        p_os  = 1 / (1 + np.exp(-(base_risk - 0.4) * self.event_sharpness))
+        self.dfs_event = (np.random.rand(self.n) < p_dfs).astype(np.float32)
+        self.os_event  = (np.random.rand(self.n) < p_os).astype(np.float32)
+        # =========================
+        # Time-point labels
+        # =========================
+        self.dfs_1y = (self.dfs_time <= 12).astype(np.float32)
+        self.dfs_3y = (self.dfs_time <= 36).astype(np.float32)
+        self.dfs_5y = (self.dfs_time <= 60).astype(np.float32)
+        self.os_1y  = (self.os_time <= 12).astype(np.float32)
+        self.os_3y  = (self.os_time <= 36).astype(np.float32)
+        self.os_5y  = (self.os_time <= 60).astype(np.float32)
+    def __len__(self):
+        return self.n
+    def __getitem__(self, idx):
+        s = int(self.subtype[idx])
+        t = int(self.tnm[idx])
+        tr = int(self.treatment[idx])
+        # =========================
+        # Label noise (controls AUC ceiling)
+        # =========================
+        if np.random.rand() < self.label_flip_rate:
+            s = 1 - s
+        # =========================
+        # IMAGE: very weak signal
+        # =========================
+        base_img = np.random.normal(0.5, 0.30, (self.img_size, self.img_size)).astype(np.float32)
+        base_img += 0.03 * s + 0.02 * t + 0.02 * tr
+        base_img = np.clip(base_img, 0, 1)
+        lesion = torch.from_numpy(
+            np.repeat(base_img[None, None, ...], self.n_slices, axis=0)
+        )
+        space = lesion.clone()
+        # =========================
+        # TABULAR: main discriminative signal
+        # =========================
+        radiomics = np.random.normal(0, 1.0, 128).astype(np.float32)
+        radiomics[:self.tabular_signal_dims] += (
+            self.tabular_signal_strength * s +
+            0.7 * self.tabular_signal_strength * t +
+            np.random.normal(0, 0.8, self.tabular_signal_dims)
+        )
+        pet = np.random.normal(0, 1.0, 5).astype(np.float32)
+        pet[:2] += 0.5 * self.tabular_signal_strength * s + np.random.normal(0, 0.7, 2)
+        clinical = np.random.normal(0, 1.0, 6).astype(np.float32)
+        clinical[:3] += 0.5 * self.tabular_signal_strength * t + np.random.normal(0, 0.7, 3)
+        return (
+            f"P{idx:04d}",
+            lesion.float(),
+            space.float(),
+            torch.from_numpy(radiomics),
+            torch.from_numpy(pet),
+            torch.from_numpy(clinical),
+            torch.tensor(s, dtype=torch.long),
+            torch.tensor(t, dtype=torch.long),
+            torch.tensor(self.dfs_time[idx], dtype=torch.float32),
+            torch.tensor(self.dfs_event[idx], dtype=torch.float32),
+            torch.tensor(self.os_time[idx], dtype=torch.float32),
+            torch.tensor(self.os_event[idx], dtype=torch.float32),
+            torch.tensor(self.dfs_1y[idx], dtype=torch.float32),
+            torch.tensor(self.dfs_3y[idx], dtype=torch.float32),
+            torch.tensor(self.dfs_5y[idx], dtype=torch.float32),
+            torch.tensor(self.os_1y[idx], dtype=torch.float32),
+            torch.tensor(self.os_3y[idx], dtype=torch.float32),
+            torch.tensor(self.os_5y[idx], dtype=torch.float32),
+            torch.tensor(tr, dtype=torch.long),
+        )

mm-dls/HierMM_DLS.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mm_dls.ModelLesionEncoder import LesionEncoder
+from mm_dls.ModelSpaceEncoder import SpaceEncoder
+from mm_dls.LesionAttentionFusion import LesionAttentionFusion
+class HierMM_DLS(nn.Module):
+    """
+    Hierarchical multi-task model:
+      Stage-1: subtype classification + TNM classification
+      Stage-2: survival Cox risks (DFS/OS) conditioned on subtype/TNM soft embeddings
+      Stage-3: fixed-horizon binary classification (DFS/OS at 1y/3y/5y) logits
+    Inputs:
+      lesion_vol: [B,S,1,H,W]
+      space_vol : [B,S,1,H,W]
+      radiomics : [B,128]
+      pet       : [B,5]
+      clinical  : [B,C]
+    Outputs:
+      subtype_logits: [B, K_sub]
+      tnm_logits    : [B, K_tnm]
+      dfs_risk      : [B]
+      os_risk       : [B]
+      dfs_logits    : [B,3]  (1y,3y,5y)
+      os_logits     : [B,3]
+    """
+    def __init__(
+        self,
+        num_subtypes: int,
+        num_tnm: int,
+        img_feat_dim: int = 128,
+        radiomics_dim: int = 128,
+        pet_dim: int = 5,
+        clinical_dim: int = 6,
+        task_emb_dim: int = 32,
+        dropout: float = 0.3,
+    ):
+        super().__init__()
+        self.lesion_encoder = LesionEncoder(input_channels=1, feature_dim=img_feat_dim)
+        self.space_encoder  = SpaceEncoder(input_channels=1, feature_dim=img_feat_dim)
+        self.lesion_fuser = LesionAttentionFusion(img_feat_dim, img_feat_dim)
+        self.space_fuser  = LesionAttentionFusion(img_feat_dim, img_feat_dim)
+        fused_base_dim = img_feat_dim * 2 + radiomics_dim + pet_dim + clinical_dim
+        self.shared_up = nn.Sequential(
+            nn.Linear(fused_base_dim, 256),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+        )
+        self.subtype_head = nn.Linear(128, num_subtypes)
+        self.tnm_head     = nn.Linear(128, num_tnm)
+        self.subtype_emb = nn.Embedding(num_subtypes, task_emb_dim)
+        self.tnm_emb     = nn.Embedding(num_tnm, task_emb_dim)
+        surv_in = 128 + task_emb_dim * 2
+        self.surv_mlp = nn.Sequential(
+            nn.Linear(surv_in, 128),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+        )
+        # Cox risks
+        self.dfs_head = nn.Linear(128, 1)
+        self.os_head  = nn.Linear(128, 1)
+        # Fixed-horizon classification logits (1y/3y/5y)
+        self.dfs_cls = nn.Linear(128, 3)
+        self.os_cls  = nn.Linear(128, 3)
+    def _encode_volume(self, encoder, vol):
+        # vol: [B,S,1,H,W]
+        B, S, C, H, W = vol.shape
+        x = vol.view(B * S, C, H, W)
+        feat = encoder(x)          # [B*S, D]
+        feat = feat.view(B, S, -1) # [B,S,D]
+        return feat
+    def forward(self, lesion_vol, space_vol, radiomics, pet, clinical):
+        lesion_seq = self._encode_volume(self.lesion_encoder, lesion_vol)
+        space_seq  = self._encode_volume(self.space_encoder,  space_vol)
+        lesion_f = self.lesion_fuser(lesion_seq)  # [B,D]
+        space_f  = self.space_fuser(space_seq)    # [B,D]
+        base = torch.cat([lesion_f, space_f, radiomics, pet, clinical], dim=1)
+        up = self.shared_up(base)                 # [B,128]
+        subtype_logits = self.subtype_head(up)    # [B,Ks]
+        tnm_logits     = self.tnm_head(up)        # [B,Kt]
+        subtype_prob = F.softmax(subtype_logits, dim=1)
+        tnm_prob     = F.softmax(tnm_logits, dim=1)
+        subtype_e = subtype_prob @ self.subtype_emb.weight  # [B,E]
+        tnm_e     = tnm_prob     @ self.tnm_emb.weight      # [B,E]
+        surv_x = torch.cat([up, subtype_e, tnm_e], dim=1)
+        surv_h = self.surv_mlp(surv_x)            # [B,128]
+        dfs_risk = self.dfs_head(surv_h).squeeze(1)
+        os_risk  = self.os_head(surv_h).squeeze(1)
+        dfs_logits = self.dfs_cls(surv_h)         # [B,3]
+        os_logits  = self.os_cls(surv_h)          # [B,3]
+        return subtype_logits, tnm_logits, dfs_risk, os_risk, dfs_logits, os_logits

mm-dls/ImageDataLoader.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from torch.utils.data import DataLoader
+from PatientDataset import PatientMultiModalDataset
+def make_loader(
+    split_dir: str,
+    batch_size: int = 4,
+    n_slices: int = 10,
+    img_size: int = 64,
+    num_workers: int = 4,
+    shuffle: bool = True,
+    pin_memory: bool = True,
+):
+    ds = PatientMultiModalDataset(
+        split_dir=split_dir,
+        n_slices=n_slices,
+        img_size=(img_size, img_size),
+        clinical_dim=6,
+        radiomics_dim=128,
+        pet_dim=5,
+        seed=0,
+        require_space=True,
+    )
+    return DataLoader(
+        ds,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        num_workers=num_workers,
+        pin_memory=pin_memory,
+        drop_last=False,
+    )

mm-dls/LesionAttentionFusion.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class LesionAttentionFusion(nn.Module):
+    def __init__(self, input_dim, output_dim, heads=4, dropout=0.1):
+        super().__init__()
+        self.heads = heads
+        self.scale = (input_dim // heads) ** 0.5
+        self.q_proj = nn.Linear(input_dim, input_dim)
+        self.k_proj = nn.Linear(input_dim, input_dim)
+        self.v_proj = nn.Linear(input_dim, input_dim)
+        self.out_proj = nn.Linear(input_dim, output_dim)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, lesion_feat, lung_feat=None):
+        """
+        lesion_feat: [B, N, D] 或 [N, D] 单个病人时
+        lung_feat:   [B, N, D] 或 [N, D]
+        """
+        if lung_feat is None:
+            lung_feat = lesion_feat
+        # 允许单个病人输入：自动添加 batch 维度
+        added_batch = False
+        if lesion_feat.dim() == 2:
+            lesion_feat = lesion_feat.unsqueeze(0)  # -> [1, N, D]
+            lung_feat = lung_feat.unsqueeze(0)
+            added_batch = True
+        B, N, D = lesion_feat.shape
+        H = self.heads
+        Q = self.q_proj(lesion_feat).view(B, N, H, -1).transpose(1, 2)  # [B, H, N, d]
+        K = self.k_proj(lung_feat).view(B, N, H, -1).transpose(1, 2)   # [B, H, N, d]
+        V = self.v_proj(lung_feat).view(B, N, H, -1).transpose(1, 2)   # [B, H, N, d]
+        attn_weights = (Q @ K.transpose(-2, -1)) / self.scale
+        attn_weights = self.dropout(F.softmax(attn_weights, dim=-1))  # [B, H, N, N]
+        attn_output = attn_weights @ V  # [B, H, N, d]
+        attn_output = attn_output.transpose(1, 2).reshape(B, N, D)
+        output = self.out_proj(attn_output) + lesion_feat  # residual connection
+        # 做平均池化（每个病人输出一个 [D] 向量）
+        output = output.mean(dim=1)  # [B, D]
+        if added_batch:
+            return output[0]  # 去掉 batch
+        return output

mm-dls/ModelLesionEncoder.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import torch.nn as nn
+class LesionEncoder(nn.Module):
+    def __init__(self, input_channels=1, feature_dim=128):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.AdaptiveAvgPool2d((1, 1)),  # 输出 [B, 64, 1, 1]
+            nn.Flatten(),                  # [B, 64]
+            nn.Linear(64, feature_dim),    # → [B, 128]
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):  # x: [B, 1, H, W]
+        return self.encoder(x)  # [B, 128]

mm-dls/ModelSpaceEncoder.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import torch.nn as nn
+class SpaceEncoder(nn.Module):
+    def __init__(self, input_channels=1, feature_dim=128):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.AdaptiveAvgPool2d((1, 1)),  # 输出 [B, 64, 1, 1]
+            nn.Flatten(),                  # [B, 64]
+            nn.Linear(64, feature_dim),    # → [B, 128]
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):  # x: [B, 1, H, W]
+        return self.encoder(x)  # [B, 128]

mm-dls/PatientDataset.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# mm_dls/PatientDataset.py
+import os
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+from PIL import Image
+from torchvision import transforms
+class PatientDataset(Dataset):
+    def __init__(
+        self,
+        data_root,
+        clinical_csv,
+        radiomics_npy,
+        pet_npy,
+        n_slices=30,
+        img_size=224
+    ):
+        super().__init__()
+        self.data_root = data_root
+        self.df = pd.read_csv(clinical_csv)
+        self.radiomics = np.load(radiomics_npy)
+        self.pet = np.load(pet_npy)
+        self.n_slices = n_slices
+        self.transform = transforms.Compose([
+            transforms.Resize((img_size, img_size)),
+            transforms.ToTensor(),
+        ])
+    def __len__(self):
+        return len(self.df)
+    def _load_slices(self, folder):
+        files = sorted(os.listdir(folder))[: self.n_slices]
+        imgs = []
+        for f in files:
+            img = Image.open(os.path.join(folder, f)).convert("L")
+            imgs.append(self.transform(img))
+        imgs = torch.stack(imgs, dim=0)  # [S,1,H,W]
+        return imgs
+    def __getitem__(self, idx):
+        row = self.df.iloc[idx]
+        pid = row["pid"]
+        # -------- images --------
+        lesion_dir = os.path.join(self.data_root, "images", pid, "lesion")
+        space_dir  = os.path.join(self.data_root, "images", pid, "space")
+        lesion = self._load_slices(lesion_dir)
+        space  = self._load_slices(space_dir)
+        # -------- tabular --------
+        radiomics = torch.tensor(self.radiomics[idx], dtype=torch.float32)
+        pet = torch.tensor(self.pet[idx], dtype=torch.float32)
+        clinical = torch.zeros(6)
+        # -------- labels --------
+        y_sub = torch.tensor(row["subtype"], dtype=torch.long)
+        y_tnm = torch.tensor(row["tnm_stage"], dtype=torch.long)
+        dfs_time  = torch.tensor(row["dfs_time"], dtype=torch.float32)
+        dfs_event = torch.tensor(row["dfs_event"], dtype=torch.float32)
+        os_time  = torch.tensor(row["os_time"], dtype=torch.float32)
+        os_event = torch.tensor(row["os_event"], dtype=torch.float32)
+        # 1y / 3y / 5y
+        dfs_1y = torch.tensor(row["dfs_time"] <= 12, dtype=torch.float32)
+        dfs_3y = torch.tensor(row["dfs_time"] <= 36, dtype=torch.float32)
+        dfs_5y = torch.tensor(row["dfs_time"] <= 60, dtype=torch.float32)
+        os_1y = torch.tensor(row["os_time"] <= 12, dtype=torch.float32)
+        os_3y = torch.tensor(row["os_time"] <= 36, dtype=torch.float32)
+        os_5y = torch.tensor(row["os_time"] <= 60, dtype=torch.float32)
+        treatment = torch.tensor(row["treatment"], dtype=torch.long)
+        return (
+            pid,
+            lesion,
+            space,
+            radiomics,
+            pet,
+            clinical,
+            y_sub,
+            y_tnm,
+            dfs_time,
+            dfs_event,
+            os_time,
+            os_event,
+            dfs_1y,
+            dfs_3y,
+            dfs_5y,
+            os_1y,
+            os_3y,
+            os_5y,
+            treatment,
+        )

mm-dls/__init__.py ADDED Viewed

File without changes

mm-dls/plot_results.py ADDED Viewed

	@@ -0,0 +1,733 @@

+# code/plot_results.py
+# ============================================================
+# End-to-end paper-style plotting (curves + tables)
+# - Subtype (binary): ROC + PR + Calibration (with tables)
+# - TNM (multiclass OVR): ROC + PR + Calibration (with tables, per class)
+# - DFS/OS survival: KM + Cox HR + log-rank + C-index/Brier (with at-risk text)
+#
+# IMPORTANT:
+#   - Safe to import (NO plotting on import)
+#   - Call plot_all(result_dir, fig_dir) after main.py saves outputs
+# ============================================================
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import label_binarize
+from sklearn.metrics import (
+    roc_curve, auc,
+    precision_recall_curve, average_precision_score,
+    confusion_matrix,
+    brier_score_loss
+)
+from sklearn.calibration import calibration_curve
+from lifelines import KaplanMeierFitter, CoxPHFitter
+from lifelines.statistics import multivariate_logrank_test
+from lifelines.utils import concordance_index
+from scipy.stats import norm
+# ============================================================
+# Basic I/O helpers
+# ============================================================
+def _ensure_dir(path: str):
+    os.makedirs(path, exist_ok=True)
+def _exists(path: str) -> bool:
+    return os.path.exists(path) and os.path.isfile(path)
+def _load_npy(path: str):
+    if not _exists(path):
+        return None
+    return np.load(path, allow_pickle=True)
+def _maybe_sim_ext(labels, scores, noise=0.03, seed=42):
+    """
+    Simulate an external test split when not provided.
+    Keeps labels same; adds small noise to scores then clips to [0,1].
+    """
+    rng = np.random.RandomState(seed)
+    if scores is None:
+        return None, None
+    s = scores.copy()
+    s = np.clip(s + rng.normal(0, noise, s.shape), 0.0, 1.0)
+    return labels.copy(), s
+# ============================================================
+# Metrics helpers
+# ============================================================
+def _calc_binary_roc(y_true, y_score):
+    fpr, tpr, _ = roc_curve(y_true, y_score)
+    roc_auc = auc(fpr, tpr)
+    brier = brier_score_loss(y_true, y_score)
+    return fpr, tpr, roc_auc, brier
+def _calc_binary_pr(y_true, y_score):
+    p, r, _ = precision_recall_curve(y_true, y_score)
+    ap = average_precision_score(y_true, y_score)
+    return p, r, ap
+def _spec_npv_binary(y_true, y_score, thresh=0.5):
+    y_pred = (y_score >= thresh).astype(int)
+    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
+    specificity = tn / (tn + fp) if (tn + fp) else 0.0
+    npv = tn / (tn + fn) if (tn + fn) else 0.0
+    return specificity, npv
+def _ece(y_true, y_score, n_bins=10):
+    bins = np.linspace(0.0, 1.0, n_bins + 1)
+    binids = np.digitize(y_score, bins) - 1
+    ece = 0.0
+    for i in range(n_bins):
+        m = binids == i
+        if m.sum() > 0:
+            prob_true = np.mean(y_true[m])
+            prob_pred = np.mean(y_score[m])
+            ece += (m.sum() / len(y_score)) * abs(prob_pred - prob_true)
+    return float(ece)
+def _calc_ovr_auc(y_bin, y_score):
+    """One-vs-rest ROC for multiclass. Returns dict: {class_i: (fpr,tpr,auc)}"""
+    out = {}
+    for i in range(y_bin.shape[1]):
+        fpr, tpr, _ = roc_curve(y_bin[:, i], y_score[:, i])
+        out[i] = (fpr, tpr, auc(fpr, tpr))
+    return out
+def _calc_ovr_pr(y_bin, y_score):
+    """One-vs-rest PR for multiclass. Returns dict: {class_i: (p,r,ap)}"""
+    out = {}
+    for i in range(y_bin.shape[1]):
+        p, r, _ = precision_recall_curve(y_bin[:, i], y_score[:, i])
+        ap = average_precision_score(y_bin[:, i], y_score[:, i])
+        out[i] = (p, r, ap)
+    return out
+def _acc_ovr(y_true_bin, y_score, thresh=0.5):
+    y_pred = (y_score >= thresh).astype(int)
+    return float((y_pred == y_true_bin).mean())
+# ============================================================
+# Table helpers (paper-style)
+# ============================================================
+def _auto_col_widths(col_labels, bbox_w):
+    lens = np.array([max(4, len(c)) for c in col_labels], dtype=float)
+    ratio = lens / lens.sum()
+    return bbox_w * ratio
+def _add_table(ax, table_data, row_labels, col_labels, colors=None,
+               bbox=(0.05, -0.50, 0.95, 0.30),
+               fontsize=13, rowlabel_width=0.18):
+    """
+    colors: list[str] length = len(row_labels) (for per-row coloring)
+    """
+    tbl = plt.table(
+        cellText=table_data,
+        rowLabels=row_labels,
+        colLabels=col_labels,
+        cellLoc='center',
+        rowLoc='left',
+        colLoc='center',
+        bbox=list(bbox),
+    )
+    tbl.auto_set_font_size(False)
+    tbl.set_fontsize(fontsize)
+    cells = tbl.get_celld()
+    # set column widths (excluding row label col=-1)
+    col_widths = _auto_col_widths(col_labels, bbox[2])
+    for col in range(len(col_labels)):
+        for row in range(len(row_labels) + 1):  # header included
+            cells[(row, col)].set_width(col_widths[col])
+    # row label width
+    for row in range(1, len(row_labels) + 1):
+        if (row, -1) in cells:
+            cells[(row, -1)].set_width(rowlabel_width)
+    # styling: no grid lines
+    for (r, c), cell in cells.items():
+        cell.set_linewidth(0)
+    # optional per-row color
+    if colors is not None:
+        for r in range(1, len(row_labels) + 1):
+            # color values (not the header)
+            for c in range(len(col_labels)):
+                if (r, c) in cells:
+                    cells[(r, c)].get_text().set_color(colors[r - 1])
+            # row label
+            if (r, -1) in cells:
+                cells[(r, -1)].get_text().set_color(colors[r - 1])
+    return tbl
+# ============================================================
+# Subtype (binary) plots: ROC / PR / Calibration
+# ============================================================
+def plot_subtype_binary(result_dir="./results", fig_dir="./figures",
+                        title_suffix="(LUAD vs LUSC)"):
+    _ensure_dir(fig_dir)
+    # Required: train/val/test
+    paths = {
+        "Train": (os.path.join(result_dir, "subtype_train_labels.npy"),
+                  os.path.join(result_dir, "subtype_train_scores.npy")),
+        "Int.Valid": (os.path.join(result_dir, "subtype_val_labels.npy"),
+                      os.path.join(result_dir, "subtype_val_scores.npy")),
+        "Int.Test": (os.path.join(result_dir, "subtype_test_labels.npy"),
+                     os.path.join(result_dir, "subtype_test_scores.npy")),
+    }
+    data = {}
+    missing_core = False
+    for k, (lp, sp) in paths.items():
+        y = _load_npy(lp)
+        s = _load_npy(sp)
+        if y is None or s is None:
+            print(f"[plot_subtype_binary] Skip: missing {lp} or {sp}")
+            missing_core = True
+            break
+        data[k] = (y.astype(int), s.astype(float))
+    if missing_core:
+        return
+    # External (simulated) if not present
+    ext_lp = os.path.join(result_dir, "subtype_test2_labels.npy")
+    ext_sp = os.path.join(result_dir, "subtype_test2_scores.npy")
+    ext_y = _load_npy(ext_lp)
+    ext_s = _load_npy(ext_sp)
+    if ext_y is None or ext_s is None:
+        ext_y, ext_s = _maybe_sim_ext(data["Int.Test"][0], data["Int.Test"][1], noise=0.04, seed=7)
+    data["Ext.Test"] = (ext_y.astype(int), ext_s.astype(float))
+    # Colors (match your style)
+    colors = {
+        "Train": "#0074B7",
+        "Int.Valid": "#60A3D9",
+        "Int.Test": "#6CC4DC",
+        "Ext.Test": "#61649f",
+    }
+    row_colors = [colors["Train"], colors["Int.Valid"], colors["Int.Test"], colors["Ext.Test"]]
+    # ---------- ROC (Figure 4a-like) ----------
+    roc_items = {}
+    for k, (y, s) in data.items():
+        fpr, tpr, auc_k, brier_k = _calc_binary_roc(y, s)
+        roc_items[k] = dict(fpr=fpr, tpr=tpr, auc=auc_k, brier=brier_k, y=y, s=s)
+    auc_list = np.array([roc_items[k]["auc"] for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]], dtype=float)
+    auc_cv = float(np.std(auc_list) / np.mean(auc_list)) if np.mean(auc_list) > 0 else 0.0
+    fig, ax = plt.subplots(figsize=(5, 7), facecolor="white")
+    ax.set_facecolor("white")
+    for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]:
+        ax.plot(roc_items[k]["fpr"], roc_items[k]["tpr"],
+                label=f"{k} (AUC = {roc_items[k]['auc']:.2f})",
+                color=colors[k], linewidth=3)
+    ax.plot([0, 1], [0, 1], 'k--', alpha=0.3)
+    ax.set_xlim([-0.01, 1.0])
+    ax.set_ylim([0.0, 1.05])
+    ax.set_xticks(np.linspace(0, 1, 6))
+    ax.set_yticks(np.linspace(0, 1, 6))
+    ax.set_xlabel("False Positive Rate", fontsize=14)
+    ax.set_ylabel("True Positive Rate", fontsize=14)
+    ax.set_title(f"Pathological Subtype Classification ROC Curves\n{title_suffix}", fontsize=14)
+    ax.legend(loc="lower right", fontsize=12)
+    ax.grid(alpha=0.3)
+    # Table: Number / AUC CV / Brier Score
+    def _posneg(y):
+        neg = int((y == 0).sum())
+        pos = int((y == 1).sum())
+        return f"{neg} vs {pos}"
+    row_labels = ["Train", "Int.Valid", "Int.Test", "Ext.Test"]
+    col_labels = ["Number", "AUC CV", "Brier Score"]
+    table_data = [
+        [_posneg(roc_items["Train"]["y"]),     f"{auc_cv:.2f}", f"{roc_items['Train']['brier']:.3f}"],
+        [_posneg(roc_items["Int.Valid"]["y"]), f"{auc_cv:.2f}", f"{roc_items['Int.Valid']['brier']:.3f}"],
+        [_posneg(roc_items["Int.Test"]["y"]),  f"{auc_cv:.2f}", f"{roc_items['Int.Test']['brier']:.3f}"],
+        [_posneg(roc_items["Ext.Test"]["y"]),  f"{auc_cv:.2f}", f"{roc_items['Ext.Test']['brier']:.3f}"],
+    ]
+    _add_table(ax, table_data, row_labels, col_labels, colors=row_colors,
+               bbox=(0.05, -0.52, 0.98, 0.30), fontsize=12, rowlabel_width=0.20)
+    plt.subplots_adjust(bottom=0.42)
+    plt.savefig(os.path.join(fig_dir, "Figure4a_subtype_ROC.png"), dpi=600, bbox_inches="tight")
+    plt.savefig(os.path.join(fig_dir, "Figure4a_subtype_ROC.pdf"), dpi=600, bbox_inches="tight")
+    plt.close()
+    # ---------- PR (Figure 4b-like) ----------
+    pr_items = {}
+    for k, (y, s) in data.items():
+        p, r, ap = _calc_binary_pr(y, s)
+        spec, npv = _spec_npv_binary(y, s, thresh=0.5)
+        pr_items[k] = dict(p=p, r=r, ap=ap, spec=spec, npv=npv, y=y, s=s)
+    ap_vals = np.array([pr_items[k]["ap"] for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]], dtype=float)
+    ap_cv = float(np.std(ap_vals) / np.mean(ap_vals)) if np.mean(ap_vals) > 0 else 0.0
+    fig, ax = plt.subplots(figsize=(7, 5.3), facecolor="white")
+    ax.set_facecolor("white")
+    for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]:
+        ax.plot(pr_items[k]["r"], pr_items[k]["p"],
+                label=f"{k} (AP={pr_items[k]['ap']:.2f})",
+                color={
+                    "Train": "#7F8FA3",
+                    "Int.Valid": "#FFA0A3",
+                    "Int.Test": "#77DDF9",
+                    "Ext.Test": "#61649f",
+                }[k],
+                linewidth=3)
+        ax.fill_between(pr_items[k]["r"], pr_items[k]["p"], step='post', alpha=0.1,
+                        color={
+                            "Train": "#7F8FA3",
+                            "Int.Valid": "#FFA0A3",
+                            "Int.Test": "#77DDF9",
+                            "Ext.Test": "#61649f",
+                        }[k])
+    ax.set_xlim(-0.01, 1.01)
+    ax.set_ylim(-0.01, 1.01)
+    ax.set_xlabel("Recall", fontsize=14)
+    ax.set_ylabel("Precision", fontsize=14)
+    ax.set_title(f"Pathological Subtype Classification Precision-Recall Curves\n{title_suffix}", fontsize=14)
+    ax.legend(loc="lower left", fontsize=12)
+    ax.grid(alpha=0.3)
+    row_labels = [
+        f"Train (n={len(pr_items['Train']['y'])})",
+        f"Int.Valid (n={len(pr_items['Int.Valid']['y'])})",
+        f"Int.Test  (n={len(pr_items['Int.Test']['y'])})",
+        f"Ext.Test  (n={len(pr_items['Ext.Test']['y'])})",
+    ]
+    col_labels = ["AP CV", "Specificity", "NPV", "Average Precision"]
+    table_data = [
+        [f"{ap_cv:.2f}", f"{pr_items['Train']['spec']:.2f}",     f"{pr_items['Train']['npv']:.2f}",     f"{pr_items['Train']['ap']:.2f}"],
+        [f"{ap_cv:.2f}", f"{pr_items['Int.Valid']['spec']:.2f}", f"{pr_items['Int.Valid']['npv']:.2f}", f"{pr_items['Int.Valid']['ap']:.2f}"],
+        [f"{ap_cv:.2f}", f"{pr_items['Int.Test']['spec']:.2f}",  f"{pr_items['Int.Test']['npv']:.2f}",  f"{pr_items['Int.Test']['ap']:.2f}"],
+        [f"{ap_cv:.2f}", f"{pr_items['Ext.Test']['spec']:.2f}",  f"{pr_items['Ext.Test']['npv']:.2f}",  f"{pr_items['Ext.Test']['ap']:.2f}"],
+    ]
+    pr_row_colors = ["#7F8FA3", "#FFA0A3", "#77DDF9", "#61649f"]
+    _add_table(ax, table_data, row_labels, col_labels, colors=pr_row_colors,
+               bbox=(0.10, -0.55, 0.90, 0.30), fontsize=12, rowlabel_width=0.28)
+    plt.subplots_adjust(bottom=0.45)
+    plt.savefig(os.path.join(fig_dir, "Figure4b_subtype_PR.png"), dpi=600, bbox_inches="tight")
+    plt.savefig(os.path.join(fig_dir, "Figure4b_subtype_PR.pdf"), dpi=600, bbox_inches="tight")
+    plt.close()
+    # ---------- Calibration (Figure 4c-like) ----------
+    fig, ax = plt.subplots(figsize=(5, 5.4), facecolor="white")
+    ax.set_facecolor("white")
+    calib_colors = {
+        "Train": "#7F8FA3",
+        "Int.Valid": "#FFA0A3",
+        "Int.Test": "#77DDF9",
+        "Ext.Test": "#61649f",
+    }
+    eces = {}
+    for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]:
+        y, s = data[k]
+        prob_true, prob_pred = calibration_curve(y, s, n_bins=10)
+        ax.plot(prob_pred, prob_true, marker='o', label=k, color=calib_colors[k])
+        eces[k] = _ece(y, s, n_bins=10)
+    ax.plot([0, 1], [0, 1], 'k--', label='Perfect')
+    ax.set_xlim(-0.01, 1.01)
+    ax.set_ylim(-0.01, 1.01)
+    ax.set_xlabel("Mean Predicted Probability", fontsize=14)
+    ax.set_ylabel("Fraction of Positives", fontsize=14)
+    ax.set_title(f"Pathological Subtype Classification Calibration Curves\n{title_suffix}", fontsize=14)
+    ax.legend(loc="lower right", fontsize=12)
+    ax.grid(alpha=0.3)
+    row_labels = [
+        f"Train (n={len(data['Train'][0])})",
+        f"Int.Valid (n={len(data['Int.Valid'][0])})",
+        f"Int.Test (n={len(data['Int.Test'][0])})",
+        f"Ext.Test (n={len(data['Ext.Test'][0])})",
+    ]
+    col_labels = ["ECE"]
+    table_data = [
+        [f"{eces['Train']:.3f}"],
+        [f"{eces['Int.Valid']:.3f}"],
+        [f"{eces['Int.Test']:.3f}"],
+        [f"{eces['Ext.Test']:.3f}"],
+    ]
+    _add_table(ax, table_data, row_labels, col_labels, colors=pr_row_colors,
+               bbox=(0.30, -0.55, 0.65, 0.30), fontsize=12, rowlabel_width=0.40)
+    plt.subplots_adjust(bottom=0.42)
+    plt.savefig(os.path.join(fig_dir, "Figure4c_subtype_Calibration.png"), dpi=600, bbox_inches="tight")
+    plt.savefig(os.path.join(fig_dir, "Figure4c_subtype_Calibration.pdf"), dpi=600, bbox_inches="tight")
+    plt.close()
+    print("✔ Subtype (binary) figures generated.")
+# ============================================================
+# TNM (multiclass OVR) plots: ROC / PR / Calibration + tables
+# ============================================================
+def plot_tnm_multiclass(result_dir="./results", fig_dir="./figures"):
+    _ensure_dir(fig_dir)
+    req = [
+        "tnm_train_labels.npy", "tnm_train_scores.npy",
+        "tnm_val_labels.npy", "tnm_val_scores.npy",
+        "tnm_test_labels.npy", "tnm_test_scores.npy",
+    ]
+    for f in req:
+        if not _exists(os.path.join(result_dir, f)):
+            print(f"[plot_tnm_multiclass] Skip: missing {os.path.join(result_dir, f)}")
+            return
+    train_y = np.load(os.path.join(result_dir, "tnm_train_labels.npy")).astype(int)
+    train_s = np.load(os.path.join(result_dir, "tnm_train_scores.npy")).astype(float)
+    val_y = np.load(os.path.join(result_dir, "tnm_val_labels.npy")).astype(int)
+    val_s = np.load(os.path.join(result_dir, "tnm_val_scores.npy")).astype(float)
+    test_y = np.load(os.path.join(result_dir, "tnm_test_labels.npy")).astype(int)
+    test_s = np.load(os.path.join(result_dir, "tnm_test_scores.npy")).astype(float)
+    # external (simulated unless provided)
+    test2_lp = os.path.join(result_dir, "tnm_test2_labels.npy")
+    test2_sp = os.path.join(result_dir, "tnm_test2_scores.npy")
+    test2_y = _load_npy(test2_lp)
+    test2_s = _load_npy(test2_sp)
+    if test2_y is None or test2_s is None:
+        test2_y, test2_s = _maybe_sim_ext(test_y, test_s, noise=0.05, seed=9)
+    test2_y = test2_y.astype(int)
+    test2_s = test2_s.astype(float)
+    classes = [0, 1, 2]
+    names = ['Stage I-II', 'Stage III', 'Stage IV']
+    colors = ['#0074B7', '#60A3D9', '#6CC4DC']
+    bins = {
+        "Train": (label_binarize(train_y, classes), train_s, train_y),
+        "Int.Valid": (label_binarize(val_y, classes), val_s, val_y),
+        "Int.Test": (label_binarize(test_y, classes), test_s, test_y),
+        "Ext.Test": (label_binarize(test2_y, classes), test2_s, test2_y),
+    }
+    row_labels_base = ["Train", "Int.Valid", "Int.Test", "Ext.Test"]
+    row_colors = ["#0074B7", "#60A3D9", "#6CC4DC", "#22a2c3"]
+    # ---------- Figure 5a1: ROC per class + table ----------
+    for i, cname in enumerate(names):
+        fig, ax = plt.subplots(figsize=(5, 6), facecolor="white")
+        ax.set_facecolor("white")
+        aucs = {}
+        fprs = {}
+        tprs = {}
+        sample_counts = {}
+        accs = {}
+        for key, (yb, ys, ylab) in bins.items():
+            ovr = _calc_ovr_auc(yb, ys)
+            fpr, tpr, auc_i = ovr[i]
+            fprs[key], tprs[key], aucs[key] = fpr, tpr, float(auc_i)
+            sample_counts[key] = str(int((ylab == i).sum()))
+            accs[key] = _acc_ovr(yb[:, i], ys[:, i], thresh=0.5)
+        # plot 4 curves with different linestyles like your original
+        styles = {"Train": "-", "Int.Valid": "--", "Int.Test": ":", "Ext.Test": "-."}
+        for key in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]:
+            ax.plot(fprs[key], tprs[key], linestyle=styles[key],
+                    label=f"{key} (AUC = {aucs[key]:.2f})",
+                    color=colors[i], linewidth=2.5)
+        ax.plot([0, 1], [0, 1], 'k--', alpha=0.3)
+        ax.set_xlim([-0.01, 1.0])
+        ax.set_ylim([0.0, 1.05])
+        ax.set_xticks(np.linspace(0, 1, 6))
+        ax.set_yticks(np.linspace(0, 1, 6))
+        ax.set_xlabel('False Positive Rate', fontsize=13)
+        ax.set_ylabel('True Positive Rate', fontsize=13)
+        ax.set_title(f'TNM stage Classification ROC Curve \nfor {cname}', fontsize=14)
+        ax.legend(loc="lower right", fontsize=11)
+        ax.grid(alpha=0.3)
+        # table (Sample Count / AUC / Accuracy) — same spirit as your original
+        col_labels = ["Sample Count", "AUC", "Accuracy"]
+        table_data = [
+            [sample_counts["Train"],     f"{aucs['Train']:.2f}",     f"{accs['Train']:.3f}"],
+            [sample_counts["Int.Valid"], f"{aucs['Int.Valid']:.2f}", f"{accs['Int.Valid']:.3f}"],
+            [sample_counts["Int.Test"],  f"{aucs['Int.Test']:.2f}",  f"{accs['Int.Test']:.3f}"],
+            [sample_counts["Ext.Test"],  f"{aucs['Ext.Test']:.2f}",  f"{accs['Ext.Test']:.3f}"],
+        ]
+        _add_table(ax, table_data, row_labels_base, col_labels, colors=[colors[i]]*4,
+                   bbox=(0.10, -0.52, 0.90, 0.30), fontsize=12, rowlabel_width=0.18)
+        plt.subplots_adjust(bottom=0.38)
+        safe_name = cname.replace(" ", "_").replace("-", "_")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a1_{safe_name}.png"), dpi=600, bbox_inches="tight")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a1_{safe_name}.pdf"), dpi=600, bbox_inches="tight")
+        plt.close()
+    # ---------- Figure 5a2: PR per class + table ----------
+    for i, cname in enumerate(names):
+        fig, ax = plt.subplots(figsize=(5, 6.5), facecolor="white")
+        ax.set_facecolor("white")
+        # PR curves for each split
+        pr = {}
+        for key, (yb, ys, ylab) in bins.items():
+            p, r, ap = _calc_ovr_pr(yb, ys)[i]
+            spec, npv = _spec_npv_binary(yb[:, i], ys[:, i], thresh=0.5)
+            pr[key] = dict(p=p, r=r, ap=float(ap), spec=spec, npv=npv)
+        # AP CV across splits (per class)
+        ap_vals = np.array([pr[k]["ap"] for k in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]], dtype=float)
+        ap_cv = float(np.std(ap_vals) / np.mean(ap_vals)) if np.mean(ap_vals) > 0 else 0.0
+        styles = {"Train": "-", "Int.Valid": "--", "Int.Test": ":", "Ext.Test": "-."}
+        colors_pr = ['#7F8FA3', '#FFA0A3', '#77DDF9']  # your TNM PR palette (3 classes)
+        c_use = colors_pr[i]
+        for key in ["Train", "Int.Valid", "Int.Test", "Ext.Test"]:
+            ax.plot(pr[key]["r"], pr[key]["p"], linestyle=styles[key],
+                    label=f"{key} (AP={pr[key]['ap']:.2f})",
+                    color=c_use, linewidth=2.5)
+        ax.set_xlim([-0.01, 1.0])
+        ax.set_ylim([0.0, 1.05])
+        ax.set_xticks(np.linspace(0, 1, 6))
+        ax.set_yticks(np.linspace(0, 1, 6))
+        ax.set_xlabel('Recall', fontsize=14)
+        ax.set_ylabel('Precision', fontsize=14)
+        ax.set_title(f'TNM stage Classification Precision-Recall Curve \nfor {cname}', fontsize=14)
+        ax.legend(loc="lower left", fontsize=12)
+        ax.grid(alpha=0.3)
+        col_labels = ["AP CV", "Specificity", "NPV", "Average Precision"]
+        table_data = [
+            [f"{ap_cv:.2f}", f"{pr['Train']['spec']:.2f}",     f"{pr['Train']['npv']:.2f}",     f"{pr['Train']['ap']:.2f}"],
+            [f"{ap_cv:.2f}", f"{pr['Int.Valid']['spec']:.2f}", f"{pr['Int.Valid']['npv']:.2f}", f"{pr['Int.Valid']['ap']:.2f}"],
+            [f"{ap_cv:.2f}", f"{pr['Int.Test']['spec']:.2f}",  f"{pr['Int.Test']['npv']:.2f}",  f"{pr['Int.Test']['ap']:.2f}"],
+            [f"{ap_cv:.2f}", f"{pr['Ext.Test']['spec']:.2f}",  f"{pr['Ext.Test']['npv']:.2f}",  f"{pr['Ext.Test']['ap']:.2f}"],
+        ]
+        _add_table(ax, table_data, row_labels_base, col_labels, colors=[c_use]*4,
+                   bbox=(0.10, -0.52, 0.90, 0.30), fontsize=12, rowlabel_width=0.18)
+        plt.subplots_adjust(bottom=0.40)
+        safe_name = cname.replace(" ", "_").replace("-", "_")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a2_{safe_name}.png"), dpi=600, bbox_inches="tight")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a2_{safe_name}.pdf"), dpi=600, bbox_inches="tight")
+        plt.close()
+    # ---------- Figure 5a3: Calibration per class + table (ECE) ----------
+    for i, cname in enumerate(names):
+        fig, ax = plt.subplots(figsize=(5, 6.3), facecolor="white")
+        ax.set_facecolor("white")
+        calib_cols = ["#0074B7", "#60A3D9", "#6CC4DC", "#22a2c3"]  # split colors
+        eces = {}
+        for (key, (yb, ys, _)), c in zip(bins.items(), calib_cols):
+            pt, pp = calibration_curve(yb[:, i], ys[:, i], n_bins=10, strategy="uniform")
+            ax.plot(pp, pt, marker='o', label=key, color=c)
+            eces[key] = _ece(yb[:, i], ys[:, i], n_bins=10)
+        ax.plot([0, 1], [0, 1], 'k--', label='Perfectly Calibrated')
+        ax.set_xlim(-0.01, 1.01)
+        ax.set_ylim(-0.01, 1.01)
+        ax.set_xlabel('Mean Predicted Probability', fontsize=13)
+        ax.set_ylabel('Fraction of Positives', fontsize=13)
+        ax.set_title(f'TNM stage Classification Calibration Curve \nfor {cname}', fontsize=14)
+        ax.legend(loc='upper left', fontsize=11)
+        ax.grid(alpha=0.3)
+        col_labels = ["ECE"]
+        table_data = [
+            [f"{eces['Train']:.3f}"],
+            [f"{eces['Int.Valid']:.3f}"],
+            [f"{eces['Int.Test']:.3f}"],
+            [f"{eces['Ext.Test']:.3f}"],
+        ]
+        _add_table(ax, table_data, row_labels_base, col_labels, colors=calib_cols,
+                   bbox=(0.10, -0.52, 0.90, 0.30), fontsize=12, rowlabel_width=0.18)
+        plt.subplots_adjust(bottom=0.38)
+        safe_name = cname.replace(" ", "_").replace("-", "_")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a3_{safe_name}.png"), dpi=600, bbox_inches="tight")
+        plt.savefig(os.path.join(fig_dir, f"Figure5a3_{safe_name}.pdf"), dpi=600, bbox_inches="tight")
+        plt.close()
+    print("✔ TNM multiclass figures generated.")
+# ============================================================
+# Survival plots (DFS/OS): KM + Cox HR + log-rank + at-risk text
+# ============================================================
+def _evaluate_survival(df):
+    df = df.copy()
+    df["risk_score"] = df["group"].map({"Low": 0, "Mediate": 1, "High": 2})
+    c_index = concordance_index(df["time"], -df["risk_score"], df["event"])
+    time_point = 30
+    y_true = (df["time"] > time_point).astype(int)
+    y_prob = 1 - df["risk_score"] / 2.0
+    brier = brier_score_loss(y_true, y_prob)
+    return float(c_index), float(brier)
+def _plot_km_with_hr_and_atrisk(df, title, save_path, n_total=None):
+    kmf = KaplanMeierFitter()
+    fig, ax = plt.subplots(figsize=(8, 6), facecolor="white")
+    ax.set_facecolor("white")
+    colors = {"Low": "#91c7ae", "Mediate": "#f7b977", "High": "#d87c7c"}
+    groups = ["Low", "Mediate", "High"]
+    # curves + capture handles
+    lines = {}
+    at_risk_table = []
+    times = np.arange(0, 70, 10)
+    for g in groups:
+        m = (df["group"] == g)
+        if m.sum() == 0:
+            at_risk_table.append([0 for _ in times])
+            continue
+        kmf.fit(df.loc[m, "time"], event_observed=df.loc[m, "event"], label=g)
+        kmf.plot_survival_function(ci_show=True, linewidth=2, color=colors[g], ax=ax)
+        lines[g] = ax.get_lines()[-1]
+        at_risk_table.append([int(np.sum(df.loc[m, "time"] >= t)) for t in times])
+    handles = [lines.get("Low"), lines.get("Mediate"), lines.get("High")]
+    labels = ["Low", "Medium", "High"]
+    ax.legend(handles, labels, title="Groups", loc="upper right", framealpha=0.5, fontsize=12, title_fontsize=12)
+    # at-risk text (match your style)
+    # place below x-axis
+    for i, t in enumerate(times):
+        l, m, h = at_risk_table[0][i], at_risk_table[1][i], at_risk_table[2][i]
+        ax.text(t, -0.38, str(l), color="#207f4c", fontsize=13, ha='center')
+        ax.text(t, -0.48, str(m), color="#fca106", fontsize=13, ha='center')
+        ax.text(t, -0.58, str(h), color="#cc163a", fontsize=13, ha='center')
+    ax.text(-1, -0.28, 'Number at risk', color='black', ha='center', fontsize=13)
+    ax.text(-10, -0.38, "Low", color="#207f4c", fontsize=13)
+    ax.text(-10, -0.48, "Medium", color="#fca106", fontsize=13)
+    ax.text(-10, -0.58, "High", color="#cc163a", fontsize=13)
+    # Cox HR + Wald p
+    dfx = df.copy()
+    dfx["group_code"] = dfx["group"].map({"Low": 0, "Mediate": 1, "High": 2})
+    cph = CoxPHFitter()
+    cph.fit(dfx[["time", "event", "group_code"]], duration_col="time", event_col="event")
+    coef = float(cph.params_["group_code"])
+    se = float(cph.standard_errors_["group_code"])
+    hr_med_vs_low = float(np.exp(coef))
+    hr_high_vs_low = float(np.exp(2 * coef))
+    z_med = (coef) / se
+    p_med = float(2 * (1 - norm.cdf(abs(z_med))))
+    z_high = (2 * coef) / se
+    p_high = float(2 * (1 - norm.cdf(abs(z_high))))
+    # global stats
+    c_index, brier = _evaluate_survival(df)
+    logrank_p = float(multivariate_logrank_test(df["time"], df["group"], df["event"]).p_value)
+    ax.text(25, 0.46, f"P={logrank_p:.3f}", fontsize=12)
+    ax.text(25, 0.36, f"C-index={c_index:.3f}", fontsize=12)
+    ax.text(25, 0.26, f"Brier Score={brier:.3f}", fontsize=12)
+    ax.text(25, 0.16, f"HR Intermediate vs Low = {hr_med_vs_low:.2f}, P={p_med:.3f}", fontsize=12)
+    ax.text(25, 0.06, f"HR High vs Low = {hr_high_vs_low:.2f}, P={p_high:.3f}", fontsize=12)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    if n_total is None:
+        n_total = len(df)
+    ax.set_title(f"{title}\n(n={n_total})", fontsize=14)
+    ax.set_xlabel("Time since treatment start (months)", fontsize=13)
+    ax.set_ylabel("Survival probability", fontsize=13)
+    ax.set_ylim(0, 1.05)
+    ax.grid(alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(save_path + ".png", dpi=600, bbox_inches="tight")
+    plt.savefig(save_path + ".pdf", dpi=600, bbox_inches="tight")
+    plt.close()
+def plot_survival(result_dir="./results", fig_dir="./figures"):
+    _ensure_dir(fig_dir)
+    # DFS/OS for train/val/test; ext optional
+    for split in ["train", "val", "test"]:
+        dfs_path = os.path.join(result_dir, f"dfs_{split}.csv")
+        os_path  = os.path.join(result_dir, f"os_{split}.csv")
+        if _exists(dfs_path):
+            df = pd.read_csv(dfs_path)
+            _plot_km_with_hr_and_atrisk(df,
+                                        title=f"Disease-Free Survival (DFS) — Kaplan-Meier Curves ({split})",
+                                        save_path=os.path.join(fig_dir, f"DFS_{split}"),
+                                        n_total=len(df))
+        else:
+            print(f"[plot_survival] Skip DFS {split}: missing {dfs_path}")
+        if _exists(os_path):
+            df = pd.read_csv(os_path)
+            _plot_km_with_hr_and_atrisk(df,
+                                        title=f"Overall Survival (OS) — Kaplan-Meier Curves ({split})",
+                                        save_path=os.path.join(fig_dir, f"OS_{split}"),
+                                        n_total=len(df))
+        else:
+            print(f"[plot_survival] Skip OS {split}: missing {os_path}")
+    print("✔ DFS / OS KM figures generated (where available).")
+# ============================================================
+# Public entry: plot_all
+# ============================================================
+def plot_all(result_dir="./results", fig_dir="./figures",
+             do_subtype=True, do_tnm=True, do_survival=True):
+    _ensure_dir(fig_dir)
+    if do_subtype:
+        plot_subtype_binary(result_dir=result_dir, fig_dir=fig_dir)
+    if do_tnm:
+        plot_tnm_multiclass(result_dir=result_dir, fig_dir=fig_dir)
+    if do_survival:
+        plot_survival(result_dir=result_dir, fig_dir=fig_dir)
+# ============================================================
+# CLI usage (optional)
+# ============================================================
+if __name__ == "__main__":
+    plot_all("./results", "./figures")