Spaces:

ymin98
/

WACA-UNet

Sleeping

App Files Files Community

ymin98 commited on Nov 30, 2025

Commit

dd5a134

verified ·

1 Parent(s): 8f5f7e0

Upload 15 files

Browse files

Files changed (5) hide show

README +14 -0
app.py +375 -0
model.py +565 -0
requirements.txt +8 -0
wacaunet_val_f1_331_0.8757.pth +3 -0

README ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: WACA-UNet
+emoji: ⚡
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 5.0.0
+app_file: app.py
+---
+# WACA-UNet IR-drop Demo
+Gradio demo for WACA-UNet (Weakness-Aware Channel Attention U-Net)
+for static IR-drop prediction on the ICCAD-2023 benchmark.

app.py ADDED Viewed

	@@ -0,0 +1,375 @@

+# app.py
+import os
+import io
+import math
+from typing import Tuple
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn as nn
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from PIL import Image
+# ---- Project modules ----
+from model import WACA_Unet
+# ==========================
+# Settings
+# ==========================
+IN_CHANNELS = 25
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Path to the pretrained checkpoint.
+# This file should be available in the Space (or local environment).
+MODEL_CHECKPOINT_PATH = (
+"wacaunet_val_f1_331_0.8757.pth"
+)
+# WACA-UNet output unit:
+# - The model is assumed to predict IR-drop directly in mV.
+# - If SCALE_TO_V is True, the demo will convert mV -> V (divide by 1000) for display.
+SCALE_TO_V = False
+# Directory containing example .npy inputs (created offline)
+SAMPLES_DIR = "tools/samples"
+# ==========================
+# Utility functions
+# ==========================
+def load_checkpoint_state(path: str, device: str):
+    """
+    Load a checkpoint and return a state_dict in a robust way.
+    Handles common patterns: {'state_dict': ...}, {'net': ...}, or raw state_dict.
+    """
+    state = torch.load(path, map_location=device)
+    if isinstance(state, dict):
+        if "state_dict" in state:
+            return state["state_dict"]
+        if "net" in state:
+            return state["net"]
+    # Fallback: assume the object itself is a state_dict
+    return state
+def get_model() -> Tuple[nn.Module, str]:
+    """
+    Load WACA-UNet once and cache it for reuse in the Gradio session.
+    """
+    if not hasattr(get_model, "_cache"):
+        if not os.path.exists(MODEL_CHECKPOINT_PATH):
+            raise FileNotFoundError(
+                f"Checkpoint not found at '{MODEL_CHECKPOINT_PATH}'. "
+                f"Please upload the checkpoint file and update MODEL_CHECKPOINT_PATH."
+            )
+        device = DEVICE
+        model = WACA_Unet(in_ch=IN_CHANNELS, depth=4, base_ch=64)
+        state_dict = load_checkpoint_state(MODEL_CHECKPOINT_PATH, device)
+        # Strip 'module.' prefix from keys if the checkpoint was saved with DDP
+        new_state = {}
+        for k, v in state_dict.items():
+            if k.startswith("module."):
+                new_state[k[len("module."):]] = v
+            else:
+                new_state[k] = v
+        model.load_state_dict(new_state, strict=False)
+        model.to(device)
+        model.eval()
+        get_model._cache = (model, device)
+    return get_model._cache  # type: ignore
+def list_sample_files() -> list:
+    """
+    List sample .npy files under SAMPLES_DIR.
+    Each .npy is assumed to store a (25, H, W) or (H, W, 25) array.
+    """
+    if not os.path.exists(SAMPLES_DIR):
+        return []
+    files = []
+    for fname in sorted(os.listdir(SAMPLES_DIR)):
+        if fname.lower().endswith(".npy"):
+            files.append(os.path.join(SAMPLES_DIR, fname))
+    return files
+def fig_to_pil(fig: matplotlib.figure.Figure) -> Image.Image:
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", bbox_inches="tight", dpi=150)
+    plt.close(fig)
+    buf.seek(0)
+    img = Image.open(buf).convert("RGB")
+    return img
+def ensure_chw(arr: np.ndarray) -> np.ndarray:
+    """
+    Ensure the array has shape (C, H, W).
+    Supported shapes:
+      - (C, H, W): returned as-is
+      - (H, W, C): transposed to (C, H, W)
+      - (H, W): treated as (1, H, W) (not recommended for this demo)
+    """
+    if arr.ndim == 3:
+        c_first = arr.shape[0]
+        c_last = arr.shape[-1]
+        if c_first == IN_CHANNELS:
+            # Already (C, H, W)
+            return arr.astype(np.float32)
+        elif c_last == IN_CHANNELS:
+            # (H, W, C) -> (C, H, W)
+            return np.transpose(arr, (2, 0, 1)).astype(np.float32)
+        else:
+            raise ValueError(
+                f"3D array but channel dimension is not {IN_CHANNELS}. "
+                f"Got shape={arr.shape}."
+            )
+    elif arr.ndim == 2:
+        # (H, W) -> (1, H, W)
+        return arr.astype(np.float32)[None, ...]
+    else:
+        raise ValueError(
+            f"Unsupported input ndim: {arr.ndim}, shape={arr.shape}. "
+            f"Expected (C,H,W) or (H,W,C) or (H,W)."
+        )
+def preprocess_input(arr: np.ndarray) -> torch.Tensor:
+    """
+    Convert a numpy array to a (1, C, H, W) torch.FloatTensor.
+    Assumptions:
+      - The .npy already contains the same normalization used during training,
+        e.g., per-channel z-score from the IRDropDataset / CFIRSTNET configuration.
+    """
+    chw = ensure_chw(arr)
+    if chw.shape[0] != IN_CHANNELS:
+        raise ValueError(
+            f"Unexpected number of channels: expected {IN_CHANNELS}, "
+            f"got C={chw.shape[0]}, shape={chw.shape}."
+        )
+    x = torch.from_numpy(chw).unsqueeze(0)  # (1, C, H, W)
+    return x
+def plot_input_grid(chw: np.ndarray, title_prefix: str = "Input") -> Image.Image:
+    """
+    Visualize 25 input channels as a 5×5 grid.
+    """
+    C, H, W = chw.shape
+    cols = 5
+    rows = math.ceil(C / cols)
+    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2.0, rows * 2.0))
+    axes = np.atleast_2d(axes)
+    for idx in range(rows * cols):
+        r = idx // cols
+        c = idx % cols
+        ax = axes[r, c]
+        if idx < C:
+            ch_img = chw[idx]
+            ax.imshow(ch_img, cmap="jet")
+            ax.set_title(f"{title_prefix} C{idx}", fontsize=7)
+            ax.axis("off")
+        else:
+            ax.axis("off")
+    fig.suptitle("25-channel Input (5×5)", fontsize=12)
+    fig.tight_layout()
+    return fig_to_pil(fig)
+def plot_prediction(pred_2d: np.ndarray, to_v: bool = False) -> Image.Image:
+    """
+    Visualize the predicted IR-drop (single channel) with a colormap and colorbar.
+    Model output is assumed to be in mV:
+      - to_v == False: render directly in mV.
+      - to_v == True : convert mV -> V by dividing by 1000 for display.
+    """
+    pred_mV = np.nan_to_num(pred_2d.astype(np.float32))
+    if to_v:
+        pred = pred_mV * 1e-3  # mV -> V
+        unit = "V"
+    else:
+        pred = pred_mV
+        unit = "mV"
+    fig, ax = plt.subplots(figsize=(4, 4))
+    im = ax.imshow(pred, cmap="jet")
+    ax.set_title(f"Predicted IR-drop [{unit}]")
+    ax.axis("off")
+    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+    fig.tight_layout()
+    return fig_to_pil(fig)
+def load_npy_file(path: str) -> np.ndarray:
+    return np.array(np.load(path))
+def load_uploaded_npy(file_obj) -> np.ndarray:
+    """
+    Load a numpy array from a Gradio File object.
+    """
+    return np.load(file_obj.name)
+# ==========================
+# Gradio callback
+# ==========================
+def infer(
+    sample_path: str,
+    uploaded_file,
+    use_uploaded: bool,
+    scale_to_v: bool,
+) -> Tuple[Image.Image, Image.Image, str]:
+    """
+    Main inference function for Gradio.
+    Args:
+        sample_path: Selected path under tools/samples/.
+        uploaded_file: User-uploaded .npy file (Gradio File).
+        use_uploaded: If True and a file is provided, uploaded_file takes priority.
+        scale_to_v: If True, convert model output from mV to V for visualization.
+    Returns:
+        input_img: 5×5 grid of the 25 input channels.
+        pred_img: Predicted IR-drop heatmap.
+        info: Text summary (shapes, ranges, units).
+    """
+    # 1) Decide input source
+    if use_uploaded and uploaded_file is not None:
+        arr = load_uploaded_npy(uploaded_file)
+        src_desc = f"Uploaded file: {os.path.basename(uploaded_file.name)}"
+    else:
+        if not sample_path:
+            raise gr.Error("Please select a sample file or upload your own .npy input.")
+        arr = load_npy_file(sample_path)
+        src_desc = f"Sample: {os.path.basename(sample_path)}"
+    # 2) Preprocess
+    try:
+        x = preprocess_input(arr)  # (1, C, H, W)
+    except ValueError as e:
+        raise gr.Error(str(e))
+    model, device = get_model()
+    x = x.to(device)
+    # 3) Inference
+    with torch.no_grad():
+        out = model(x)
+        if isinstance(out, dict) and "x_recon" in out:
+            y = out["x_recon"]
+        else:
+            y = out
+    # y: (1, 1, H, W) or (1, H, W) -> (H, W)
+    pred_np = y.detach().cpu().squeeze().numpy()
+    chw = ensure_chw(arr)
+    pred_img = plot_prediction(pred_np, to_v=scale_to_v)
+    input_img = plot_input_grid(chw, title_prefix="Input")
+    unit_display = "V" if scale_to_v else "mV"
+    info = (
+        f"{src_desc}\n"
+        f"Input shape: {arr.shape}, "
+        f"Pred shape: {pred_np.shape}, "
+        f"Pred range (model output in mV): "
+        f"[{float(pred_np.min()):.4g}, {float(pred_np.max()):.4g}] "
+        f"→ displayed in [{unit_display}].\n"
+        f"(Input feature composition follows the 25-channel configuration "
+        f"from the official CFIRSTNET ICCAD-2023 public repository.)"
+    )
+    return input_img, pred_img, info
+# ==========================
+# Gradio UI
+# ==========================
+def build_demo():
+    sample_files = list_sample_files()
+    sample_choices = (
+        [("", "--- choose sample ---")]
+        + [(p, os.path.basename(p)) for p in sample_files]
+    )
+    with gr.Blocks(title="WACA-UNet IR-drop Demo") as demo:
+        gr.Markdown(
+            """
+            # WACA-UNet IR-drop Prediction Demo
+            - **Input**: 25-channel physical/power-delivery feature maps
+            (e.g., HIRD, WR, effective distance, PDN density, etc.).
+            - **Output**: Predicted static IR-drop map (visualized in mV or V).
+            - The 25-channel input composition follows the configuration used
+            in the official [CFIRSTNET GitHub repository](https://github.com/jason122490/CFIRSTNET)
+            for the ICCAD-2023 benchmark.
+            - You can either:
+            - select a pre-generated sample from `tools/samples/`, or
+            - upload your own `.npy` file with shape `(25, H, W)` or `(H, W, 25)`.
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                sample_dropdown = gr.Dropdown(
+                    choices=[c[0] for c in sample_choices],
+                    value=sample_choices[1][0] if len(sample_choices) > 1 else "",
+                    label="Sample .npy from tools/samples/",
+                    info="Pre-generated example inputs (25 × H × W) stored as .npy files.",
+                )
+                use_uploaded = gr.Checkbox(
+                    label="Use uploaded file if available",
+                    value=True,
+                )
+                uploaded_file = gr.File(
+                    label="Custom input (.npy, shape = (25, H, W) or (H, W, 25))",
+                    file_types=[".npy"],
+                )
+                scale_to_v_input = gr.Checkbox(
+                    label="Convert prediction from mV to V (divide by 1000)",
+                    value=SCALE_TO_V,
+                )
+                run_btn = gr.Button("Run Inference")
+            with gr.Column(scale=2):
+                pred_img = gr.Image(label="Predicted IR-drop", type="pil")
+                input_img = gr.Image(label="25-channel Input (5×5 grid)", type="pil")
+                info_text = gr.Textbox(
+                    label="Info",
+                    interactive=False,
+                    lines=4,
+                )
+        run_btn.click(
+            fn=infer,
+            inputs=[sample_dropdown, uploaded_file, use_uploaded, scale_to_v_input],
+            outputs=[input_img, pred_img, info_text],
+        )
+    return demo
+if __name__ == "__main__":
+    demo = build_demo()
+    demo.launch()

model.py ADDED Viewed

	@@ -0,0 +1,565 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.layers import DropPath
+import math
+from typing import List, Tuple, Optional, Dict
+class WACA_CBAM(nn.Module):
+    def __init__(self, channels, reduction=16):
+        super(WACA_CBAM, self).__init__()
+        self.channels = channels
+        if channels < reduction or channels // reduction == 0:
+            self.reduced_channels = channels // 2 if channels > 1 else 1
+        else:
+            self.reduced_channels = channels // reduction
+        self.fc_layers = nn.Sequential(
+            nn.Conv2d(self.channels, self.reduced_channels, kernel_size=1, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(self.reduced_channels, self.channels, kernel_size=1, bias=False)
+        )
+        self.spatial_attn = nn.Sequential(
+            nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        avg_pool = F.adaptive_avg_pool2d(x, 1)
+        max_pool = F.adaptive_max_pool2d(x, 1)
+        avg_out = self.fc_layers(avg_pool)
+        max_out = self.fc_layers(max_pool)
+        gate_logits = avg_out + max_out
+        weakness_scores = torch.sigmoid(-gate_logits)
+        attn_scores = torch.sigmoid(gate_logits)
+        gated_weak = x * weakness_scores
+        squeezed_2_avg = F.adaptive_avg_pool2d(gated_weak, 1)
+        squeezed_2_max = F.adaptive_max_pool2d(gated_weak, 1)
+        gate_logits_2 = self.fc_layers(squeezed_2_avg+ squeezed_2_max) # current
+        # gate_logits_2 = self.fc_layers(squeezed_2_avg) + self.fc_layers(squeezed_2_max) # naive
+        attn_scores_2 = torch.sigmoid(gate_logits_2)
+        gated_attn = x * (attn_scores + attn_scores_2) * 0.5
+        # Spatial Attention (CBAM)
+        avg_out = torch.mean(gated_attn, dim=1, keepdim=True)
+        max_out, _ = torch.max(gated_attn, dim=1, keepdim=True)
+        sa_input = torch.cat([avg_out, max_out], dim=1)
+        sa_weight = self.spatial_attn(sa_input)
+        out = gated_attn * sa_weight
+        return out
+##################################################################################
+# copy from https://github.com/facebookresearch/ConvNeXt-V2/blob/main/models/convnextv2.py
+class LayerNorm(nn.Module):
+    """ LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+class GRN(nn.Module):
+    """ GRN (Global Response Normalization) layer
+    """
+    def __init__(self, dim):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
+        self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
+    def forward(self, x):
+        Gx = torch.norm(x, p=2, dim=(1,2), keepdim=True)
+        Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
+        return self.gamma * (x * Nx) + self.beta + x
+#################################################################################################
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+class ConvNeXtV2BlockWACA_Atrous(nn.Module):
+    def __init__(self, in_ch, out_ch, reduction=16, drop_path=0., dilation=3):
+        super().__init__()
+        # Atrous (dilated) depthwise convolution
+        # dilation을 적용하면서 같은 receptive field를 유지하기 위해 padding 조정
+        padding = dilation * 3  # kernel_size=7이므로 (7-1)//2 * dilation
+        self.dwconv = nn.Conv2d(
+            in_ch, in_ch,
+            kernel_size=7,
+            padding=padding,
+            groups=in_ch,
+            dilation=dilation  # atrous convolution 적용
+        )
+        self.norm = LayerNorm(in_ch, eps=1e-6)
+        self.pwconv1 = nn.Linear(in_ch, 4 * in_ch)
+        self.act = nn.GELU()
+        self.grn = GRN(4 * in_ch)
+        self.pwconv2 = nn.Linear(4 * in_ch, out_ch)
+        self.fow = WACA_CBAM(out_ch, reduction=reduction)
+        self.proj = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input_x = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # BCHW -> BHWC
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # BHWC -> BCHW
+        x = self.fow(x)
+        x = self.drop_path(x)
+        out = self.proj(input_x) + x
+        return out
+# Multi-scale atrous convolution을 사용하는 버전
+class ConvNeXtV2BlockWACA_MultiAtrous(nn.Module):
+    def __init__(self, in_ch, out_ch, reduction=16, drop_path=0., dilations=[1, 2, 4]):
+        super().__init__()
+        # 여러 dilation rate를 가진 depthwise convolution들
+        self.dwconv_branches = nn.ModuleList([
+            nn.Conv2d(
+                in_ch, in_ch // len(dilations),
+                kernel_size=7,
+                padding=d * 3,  # kernel_size=7에 대한 padding
+                groups=in_ch // len(dilations),
+                dilation=d
+            ) for d in dilations
+        ])
+        # 브랜치들을 합친 후 원래 채널 수로 맞추기
+        self.combine_conv = nn.Conv2d(in_ch, in_ch, 1)
+        self.norm = LayerNorm(in_ch, eps=1e-6)
+        self.pwconv1 = nn.Linear(in_ch, 4 * in_ch)
+        self.act = nn.GELU()
+        self.grn = GRN(4 * in_ch)
+        self.pwconv2 = nn.Linear(4 * in_ch, out_ch)
+        self.fow = WACA_CBAM(out_ch, reduction=reduction)
+        self.proj = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input_x = x
+        # Multi-scale atrous convolution
+        branch_outputs = []
+        for i, dwconv in enumerate(self.dwconv_branches):
+            # 각 브랜치에 해당하는 채널 선택
+            channels_per_branch = x.size(1) // len(self.dwconv_branches)
+            start_idx = i * channels_per_branch
+            end_idx = (i + 1) * channels_per_branch if i < len(self.dwconv_branches) - 1 else x.size(1)
+            branch_input = x[:, start_idx:end_idx, :, :]
+            branch_outputs.append(dwconv(branch_input))
+        # 모든 브랜치 출력을 concatenate
+        x = torch.cat(branch_outputs, dim=1)
+        x = self.combine_conv(x)
+        x = x.permute(0, 2, 3, 1)  # BCHW -> BHWC
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # BHWC -> BCHW
+        x = self.fow(x)
+        x = self.drop_path(x)
+        out = self.proj(input_x) + x
+        return out
+# ASPP (Atrous Spatial Pyramid Pooling) 스타일의 버전
+class ConvNeXtV2BlockWACA_ASPP(nn.Module):
+    def __init__(self, in_ch, out_ch, reduction=16, drop_path=0., dilations=[1, 6, 12, 18]):
+        super().__init__()
+        # ASPP 스타일의 parallel atrous convolutions
+        self.aspp_branches = nn.ModuleList()
+        for dilation in dilations:
+            if dilation == 1:
+                # 첫 번째 브랜치는 일반 convolution
+                branch = nn.Conv2d(in_ch, in_ch // len(dilations), 1)
+            else:
+                # 나머지는 atrous convolution
+                branch = nn.Conv2d(
+                    in_ch, in_ch // len(dilations),
+                    kernel_size=3,
+                    padding=dilation,
+                    dilation=dilation,
+                    groups=in_ch // len(dilations)
+                )
+            self.aspp_branches.append(branch)
+        # Global Average Pooling branch
+        self.global_avg_pool = nn.Sequential(
+            nn.AdaptiveAvgPool2d((1, 1)),
+            nn.Conv2d(in_ch, in_ch // len(dilations), 1),
+        )
+        # 모든 브랜치를 합치는 convolution
+        total_channels = (len(dilations) + 1) * (in_ch // len(dilations))
+        self.combine_conv = nn.Conv2d(total_channels, in_ch, 1)
+        self.norm = LayerNorm(in_ch, eps=1e-6)
+        self.pwconv1 = nn.Linear(in_ch, 4 * in_ch)
+        self.act = nn.GELU()
+        self.grn = GRN(4 * in_ch)
+        self.pwconv2 = nn.Linear(4 * in_ch, out_ch)
+        self.fow = WACA_CBAM(out_ch, reduction=reduction)
+        self.proj = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input_x = x
+        h, w = x.size()[2:]
+        # ASPP branches
+        branch_outputs = []
+        for branch in self.aspp_branches:
+            branch_outputs.append(branch(x))
+        # Global average pooling branch
+        global_feat = self.global_avg_pool(x)
+        global_feat = F.interpolate(global_feat, size=(h, w), mode='bilinear', align_corners=False)
+        branch_outputs.append(global_feat)
+        # Concatenate all branches
+        x = torch.cat(branch_outputs, dim=1)
+        x = self.combine_conv(x)
+        x = x.permute(0, 2, 3, 1)  # BCHW -> BHWC
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # BHWC -> BCHW
+        x = self.fow(x)
+        x = self.drop_path(x)
+        out = self.proj(input_x) + x
+        return out
+#################################################################################################
+class ConvNeXtV2BlockWACA(nn.Module):
+    def __init__(self, in_ch, out_ch, reduction=16,  drop_path=0.,use_grn=True):
+        super().__init__()
+        self.dwconv = nn.Conv2d(in_ch, in_ch, kernel_size=7, padding=3, groups=in_ch)
+        self.norm = LayerNorm(in_ch, eps=1e-6)
+        self.pwconv1 = nn.Linear(in_ch, 4 * in_ch)
+        self.act = nn.GELU()
+        self.grn = GRN(4 * in_ch) if use_grn else nn.Identity()
+        self.pwconv2 = nn.Linear(4 * in_ch, out_ch)
+        self.fow = WACA_CBAM(out_ch,reduction=reduction)
+        self.proj = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input_x = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # BCHW -> BHWC
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # BHWC -> BCHW
+        x = self.fow(x)
+        x = self.drop_path(x)
+        out = self.proj(input_x) + x
+        return out
+class AttentionGate(nn.Module):
+    def __init__(self, in_ch_x, in_ch_g, out_ch):
+        super().__init__()
+        self.act = nn.ReLU(inplace=True)
+        self.w_x_g = nn.Conv2d(in_ch_x + in_ch_g, out_ch, kernel_size=1, stride=1, padding=0, bias=False)
+        self.attn = nn.Conv2d(out_ch, out_ch, kernel_size=1, padding=0, bias=False)
+    def forward(self, x, g):
+        res = x
+        xg = torch.cat([x, g], dim=1) # B, (x_c+g_c), H, W
+        xg = self.w_x_g(xg)
+        xg = self.act(xg)
+        attn = torch.sigmoid(self.attn(xg))
+        out = res * attn
+        return out
+class WACA_Unet(nn.Module):
+    def __init__(self, in_ch=25, out_ch=1, base_ch=64, reduction=16,
+                 depth=4, drop_path=0.2, block=ConvNeXtV2BlockWACA, **kwargs):
+        super().__init__()
+        self.depth = depth
+        chs = [base_ch * 2**i for i in range(depth+1)]
+        self.drop_path = drop_path
+        n_enc_blocks = depth + 1
+        n_dec_blocks = depth
+        total_blocks = n_enc_blocks + n_dec_blocks
+        drop_path_rates = torch.linspace(0, drop_path, total_blocks).tolist()
+        enc_dp_rates = drop_path_rates[:n_enc_blocks]
+        dec_dp_rates = drop_path_rates[n_enc_blocks:]
+        # Encoder
+        self.enc_blocks = nn.ModuleList([
+            block(in_ch, chs[0], reduction, drop_path=enc_dp_rates[0])
+        ] + [
+            block(chs[i], chs[i+1], reduction, drop_path=enc_dp_rates[i+1])
+            for i in range(depth)
+        ])
+        self.pool = nn.ModuleList([
+            nn.Conv2d(chs[i], chs[i], kernel_size=3, stride=2, padding=1, groups=chs[i])
+            for i in range(depth)
+        ])
+        # Decoder
+        self.upconvs = nn.ModuleList([
+            nn.ConvTranspose2d(chs[i+1], chs[i], kernel_size=2, stride=2)
+            for i in reversed(range(depth))
+        ])
+        self.dec_blocks = nn.ModuleList([
+            block(chs[i]*2, chs[i], reduction, drop_path=dec_dp_rates[i])
+            for i in reversed(range(depth))
+        ])
+        # Attention Gates
+        self.attn_gates = nn.ModuleList([
+            AttentionGate(chs[i], chs[i], chs[i])
+            for i in reversed(range(depth))
+        ])
+        self.final_head = nn.Sequential(
+            nn.Conv2d(chs[0], out_ch, kernel_size=1)
+        )
+    def forward(self, x):
+        enc_feats = []
+        for i, enc in enumerate(self.enc_blocks):
+            x = enc(x)
+            enc_feats.append(x)
+            if i < self.depth:
+                x = self.pool[i](x)
+        # Decoder
+        for i in range(self.depth):
+            x = self.upconvs[i](x)
+            enc_feat = enc_feats[self.depth-1-i]
+            # AttentionGate: (encoder feature, decoder upconv output)
+            attn_enc_feat = self.attn_gates[i](enc_feat, x)
+            x = torch.cat([attn_enc_feat, x], dim=1)
+            x = self.dec_blocks[i](x)
+        out = self.final_head(x)
+        return {
+            'x_recon': out
+        }
+###############################################################################
+from torch.nn.utils.rnn import pack_padded_sequence
+class GRUStem(nn.Module):
+    """
+    Zero-padded variable channels. 각 채널을 공유 인코더(phi)로 임베딩 후,
+    채널 축을 시간축으로 간주해 BiGRU로 통합.
+    """
+    def __init__(self, out_channels: int = 64, embed_channels: int = 16, small_input: bool = True):
+        super().__init__()
+        stride = 1 if small_input else 2
+        self.phi = nn.Sequential(
+            nn.Conv2d(1, embed_channels, kernel_size=3, stride=stride, padding=1, bias=False),
+            nn.BatchNorm2d(embed_channels),
+            nn.ReLU(inplace=True),
+        )
+        hidden = out_channels // 2
+        assert hidden > 0, "out_channels must be >=2 to support BiGRU"
+        self.gru = nn.GRU(input_size=embed_channels, hidden_size=hidden,
+                          num_layers=1, bidirectional=True)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.act = nn.ReLU(inplace=True)
+        self.out_channels = out_channels
+        self.small_input = small_input
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x: [B, Cmax, H, W] with zero-padded channels
+        B, Cmax, H, W = x.shape
+        # non-zero channel lengths
+        with torch.no_grad():
+            nonzero_ch = (x.abs().sum(dim=(2, 3)) > 0)  # [B, Cmax]
+            lengths = nonzero_ch.sum(dim=1).clamp(min=1)  # [B]
+        # shared encoder φ for each channel
+        feat_per_c = [self.phi(x[:, c:c+1, :, :]) for c in range(Cmax)]  # list of [B,E,H',W']
+        Fstack = torch.stack(feat_per_c, dim=0)  # [Cmax, B, E, H', W']
+        Cseq, Bsz, E, Hp, Wp = Fstack.shape
+        # sequence for GRU: [T=Cmax, N=B*Hp*Wp, E]
+        Fseq = Fstack.permute(0, 1, 3, 4, 2).contiguous().view(Cseq, Bsz * Hp * Wp, E)
+        lens = lengths.repeat_interleave(Hp * Wp).cpu()  # [N]
+        packed = pack_padded_sequence(Fseq, lens, enforce_sorted=False)
+        _, h_n = self.gru(packed)  # [2, N, hidden]
+        h_cat = torch.cat([h_n[0], h_n[1]], dim=-1)  # [N, out_ch]
+        out = h_cat.view(Bsz, Hp, Wp, -1).permute(0, 3, 1, 2).contiguous()  # [B,out,H',W']
+        out = self.act(self.bn(out))
+        return out
+##################################################################
+class _PoolDownMixin:
+    def __init__(self, small_input: bool):
+        self._stride = 1 if small_input else 2
+    def _maybe_down(self, y: torch.Tensor) -> torch.Tensor:
+        if self._stride == 2:
+            return F.avg_pool2d(y, 2)
+        return y
+class FourierStem2D(nn.Module, _PoolDownMixin):
+    def __init__(self, out_dim=64, basis="chebyshev", small_input: bool = True):
+        nn.Module.__init__(self); _PoolDownMixin.__init__(self, small_input)
+        assert basis in ("fourier", "chebyshev")
+        self.out_dim = out_dim; self.basis = basis
+        self.proj = nn.Linear(out_dim, out_dim)
+        self._basis_cache: Dict[Tuple[int, str, torch.device, torch.dtype], torch.Tensor] = {}
+    def _get_basis(self, C, device, dtype):
+        key = (C, self.basis, device, dtype)
+        if key in self._basis_cache: return self._basis_cache[key]
+        idx = torch.linspace(-1, 1, C, device=device, dtype=dtype).unsqueeze(0)
+        if self.basis == "fourier":
+            B = torch.stack([torch.cos(idx * i * math.pi) for i in range(1, self.out_dim+1)], dim=-1)
+        else:
+            B = torch.stack([torch.cos(i * torch.acos(idx)) for i in range(1, self.out_dim+1)], dim=-1)
+        self._basis_cache[key] = B; return B
+    def forward(self, x: torch.Tensor):
+        B, C, H, W = x.shape; device, dtype = x.device, x.dtype
+        x_flat = x.permute(0,2,3,1).reshape(-1, C)                  # [BHW,C]
+        basis = self._get_basis(C, device, dtype)[0]                # [C,D]
+        emb = (x_flat @ basis)                                      # [BHW,D]
+        emb = self.proj(emb).view(B, H, W, self.out_dim).permute(0,3,1,2)
+        return self._maybe_down(emb)
+class WACA_Unet_stem(nn.Module):
+    def __init__(self, in_ch=25, out_ch=1, base_ch=64, reduction=16,
+                 depth=4, drop_path=0.2, block=ConvNeXtV2BlockWACA, **kwargs):
+        super().__init__()
+        self.depth = depth
+        chs = [base_ch * 2**i for i in range(depth+1)]
+        self.drop_path = drop_path
+        n_enc_blocks = depth + 1
+        n_dec_blocks = depth
+        total_blocks = n_enc_blocks + n_dec_blocks
+        drop_path_rates = torch.linspace(0, drop_path, total_blocks).tolist()
+        enc_dp_rates = drop_path_rates[:n_enc_blocks]
+        dec_dp_rates = drop_path_rates[n_enc_blocks:]
+        # self.stem = GRUStem(out_channels=16,embed_channels=16,small_input=True)
+        self.stem = FourierStem2D(chs[0])
+        # self.up0 = nn.Upsample(scale_factor=2,mode='bicubic')
+        # Encoder
+        self.enc_blocks = nn.ModuleList([
+            block(chs[0], chs[0], reduction, drop_path=enc_dp_rates[0])
+        ] + [
+            block(chs[i], chs[i+1], reduction, drop_path=enc_dp_rates[i+1])
+            for i in range(depth)
+        ])
+        self.pool = nn.ModuleList([
+            nn.Conv2d(chs[i], chs[i], kernel_size=3, stride=2, padding=1, groups=chs[i])
+            for i in range(depth)
+        ])
+        # Decoder
+        self.upconvs = nn.ModuleList([
+            nn.ConvTranspose2d(chs[i+1], chs[i], kernel_size=2, stride=2)
+            for i in reversed(range(depth))
+        ])
+        self.dec_blocks = nn.ModuleList([
+            block(chs[i]*2, chs[i], reduction, drop_path=dec_dp_rates[i])
+            for i in reversed(range(depth))
+        ])
+        # Attention Gates
+        self.attn_gates = nn.ModuleList([
+            AttentionGate(chs[i], chs[i], chs[i])
+            for i in reversed(range(depth))
+        ])
+        self.final_head = nn.Sequential(
+            nn.Conv2d(chs[0], out_ch, kernel_size=1)
+        )
+    def forward(self, x):
+        enc_feats = []
+        x = self.stem(x)
+        # x = self.up0(x)
+        for i, enc in enumerate(self.enc_blocks):
+            x = enc(x)
+            enc_feats.append(x)
+            if i < self.depth:
+                x = self.pool[i](x)
+        # Decoder
+        for i in range(self.depth):
+            x = self.upconvs[i](x)
+            enc_feat = enc_feats[self.depth-1-i]
+            # AttentionGate: (encoder feature, decoder upconv output)
+            attn_enc_feat = self.attn_gates[i](enc_feat, x)
+            x = torch.cat([attn_enc_feat, x], dim=1)
+            x = self.dec_blocks[i](x)
+        out = self.final_head(x)
+        return {
+            'x_recon': out
+        }
+if __name__ == '__main__':
+    for block in [ConvNeXtV2BlockWACA]:
+        print(f"Testing block: {block.__name__}")
+        model = WACA_Unet_stem(in_ch=25, out_ch=1,block=block, depth=4)
+        dummy = torch.randn(2,25, 384, 384)
+        out = model(dummy)['x_recon']
+        print(f"Input shape: {dummy.shape}")
+        print(f"Output shape: {out.shape}")
+        total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        print(f"Total trainable parameters: {total_params:,}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio>=5.0.0
+numpy>=1.23
+matplotlib>=3.7
+pillow>=10.0
+torch>=2.1.0
+torchvision>=0.16.0
+timm>=0.9.5
+einops>=0.8.0

wacaunet_val_f1_331_0.8757.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c20f01f72d3ab9a4efb22d732bad281fc7de53b6a3506e8c0ed626385a77951
+size 72818550