Spaces:

MedicalAI-DP
/

ECG_Delineation

Sleeping

App Files Files Community

wogh2012 commited on Jan 22, 2025

Commit

aefacda

1 Parent(s): c69af64

refactor: add implementations

Browse files

Files changed (8) hide show

res/impl/DeepLabV3Plus.py +404 -0
res/impl/FCN.py +130 -0
res/impl/HRNetV2.py +378 -0
res/impl/PSPNet.py +240 -0
res/impl/SETR.py +291 -0
res/impl/SegFormer.py +294 -0
res/impl/UNet3PlusDeepSup.py +241 -0
res/models/hrnetv2/best_config.json +0 -151

res/impl/DeepLabV3Plus.py ADDED Viewed

	@@ -0,0 +1,404 @@

+"""
+paper: https://arxiv.org/abs/1802.02611
+ref:
+    - https://github.com/tensorflow/models/tree/master/research/deeplab
+    - https://github.com/VainF/DeepLabV3Plus-Pytorch
+    - https://github.com/Hyunjulie/KR-Reading-Computer-Vision-Papers/blob/master/DeepLabv3%2B/deeplabv3p.py
+"""
+import math
+import torch
+from torch import nn
+from torch.functional import F
+class AtrousSeparableConv1d(nn.Module):
+    def __init__(
+        self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False
+    ):
+        super(AtrousSeparableConv1d, self).__init__()
+        self.depthwise = nn.Conv1d(
+            inplanes,
+            inplanes,
+            kernel_size,
+            stride,
+            0,
+            dilation,
+            groups=inplanes,
+            bias=bias,
+        )
+        self.pointwise = nn.Conv1d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)
+    def forward(self, x):
+        x = self.apply_fixed_padding(
+            x, self.depthwise.kernel_size[0], rate=self.depthwise.dilation[0]
+        )
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+    def apply_fixed_padding(self, inputs, kernel_size, rate):
+        """
+        해당 함수는 (dilation)rate 와 kernel_size 에 따라 output 의 크기가 input 의 크기와 동일해질 수 있도록 input 에 padding 을 적용합니다.
+        다만, stride 가 2 이상인 경우에는 해당 함수를 거치더라도 input 과 output 크기가 동일해지지 않을 수 있습니다.
+        이 경우는 최대한 input 과 output 크기를 맞춰주는 것에 의미가 있고, 전체 네트워크의 마지막 upsample 단계에서 최종적으로 크기를 맞춰줍니다.
+        """
+        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
+        pad_total = kernel_size_effective - 1
+        pad_beg = pad_total // 2
+        pad_end = pad_total - pad_beg
+        padded_inputs = F.pad(inputs, (pad_beg, pad_end))
+        return padded_inputs
+class Block(nn.Module):
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        reps,
+        kernel_size=3,
+        stride=1,
+        dilation=1,
+        start_with_relu=True,
+        grow_first=True,
+        is_last=False,
+    ):
+        super(Block, self).__init__()
+        if planes != inplanes or stride != 1:
+            self.skip = nn.Conv1d(inplanes, planes, 1, stride=stride, bias=False)
+            self.skipbn = nn.BatchNorm1d(planes)
+        else:
+            self.skip = None
+        self.relu = nn.ReLU(inplace=True)
+        rep = []
+        filters = inplanes
+        if grow_first:
+            rep.append(self.relu)
+            rep.append(
+                AtrousSeparableConv1d(
+                    inplanes, planes, kernel_size, stride=1, dilation=dilation
+                )
+            )
+            rep.append(nn.BatchNorm1d(planes))
+            filters = planes
+        for _ in range(reps - 1):
+            rep.append(self.relu)
+            rep.append(
+                AtrousSeparableConv1d(
+                    filters, filters, kernel_size, stride=1, dilation=dilation
+                )
+            )
+            rep.append(nn.BatchNorm1d(filters))
+        if not grow_first:
+            rep.append(self.relu)
+            rep.append(
+                AtrousSeparableConv1d(
+                    inplanes, planes, kernel_size, stride=1, dilation=dilation
+                )
+            )
+            rep.append(nn.BatchNorm1d(planes))
+        if not start_with_relu:
+            rep = rep[1:]
+        if stride == 2:
+            rep.append(AtrousSeparableConv1d(planes, planes, kernel_size, stride=2))
+        elif stride == 1:
+            if is_last:
+                rep.append(AtrousSeparableConv1d(planes, planes, kernel_size, stride=1))
+        else:
+            raise NotImplementedError("stride must be 1 or 2 in Block.")
+        self.rep = nn.Sequential(*rep)
+    def forward(self, inp):
+        x = self.rep(inp)
+        if self.skip is not None:
+            skip = self.skip(inp)
+            skip = self.skipbn(skip)
+        else:
+            skip = inp
+        x += skip
+        return x
+class Xception(nn.Module):
+    """Modified Aligned Xception"""
+    def __init__(
+        self,
+        inplanes=1,
+        output_stride=16,
+        kernel_size=3,
+        middle_repeat=16,
+        middle_block_rate=1,
+        exit_block_rates=(1, 2),
+    ):
+        super(Xception, self).__init__()
+        if output_stride == 16:
+            entry3_stride = 2
+        elif output_stride == 8:
+            entry3_stride = 1
+        else:
+            raise NotImplementedError
+        self.conv1 = nn.Conv1d(
+            inplanes,
+            32,
+            kernel_size,
+            stride=2,
+            padding=(kernel_size - 1) // 2,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm1d(32)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(
+            32, 64, kernel_size, stride=1, padding=(kernel_size - 1) // 2, bias=False
+        )
+        self.bn2 = nn.BatchNorm1d(64)
+        self.entry1 = Block(
+            64, 128, reps=2, kernel_size=kernel_size, stride=2, start_with_relu=False
+        )
+        self.entry2 = Block(
+            128,
+            256,
+            reps=2,
+            kernel_size=kernel_size,
+            stride=2,
+            start_with_relu=True,
+            grow_first=True,
+        )
+        self.entry3 = Block(
+            256,
+            728,
+            reps=2,
+            kernel_size=kernel_size,
+            stride=entry3_stride,
+            start_with_relu=True,
+            grow_first=True,
+            is_last=True,
+        )
+        self.middle = nn.Sequential(
+            *[
+                Block(
+                    728,
+                    728,
+                    reps=3,
+                    kernel_size=kernel_size,
+                    stride=1,
+                    dilation=middle_block_rate,
+                    start_with_relu=True,
+                    grow_first=True,
+                )
+                for _ in range(middle_repeat)
+            ]
+        )
+        self.exit = Block(
+            728,
+            1024,
+            reps=2,
+            kernel_size=kernel_size,
+            stride=1,
+            dilation=exit_block_rates[0],
+            start_with_relu=True,
+            grow_first=False,
+            is_last=True,
+        )
+        self.conv3 = AtrousSeparableConv1d(
+            1024, 1536, kernel_size, stride=1, dilation=exit_block_rates[1]
+        )
+        self.bn3 = nn.BatchNorm1d(1536)
+        self.conv4 = AtrousSeparableConv1d(
+            1536, 1536, kernel_size, stride=1, dilation=exit_block_rates[1]
+        )
+        self.bn4 = nn.BatchNorm1d(1536)
+        self.conv5 = AtrousSeparableConv1d(
+            1536, 2048, kernel_size, stride=1, dilation=exit_block_rates[1]
+        )
+        self.bn5 = nn.BatchNorm1d(2048)
+    def forward(self, x: torch.Tensor):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        low_level = x = self.entry1(x)
+        x = self.entry2(x)
+        x = self.entry3(x)
+        x = self.middle(x)
+        x = self.exit(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.bn4(x)
+        x = self.relu(x)
+        x = self.conv5(x)
+        x = self.bn5(x)
+        x = self.relu(x)
+        return x, low_level
+class ASPP(nn.Module):
+    """Atrous Spatial Pyramid Pooling"""
+    def __init__(self, inplanes, planes, rate, kernel_size=3):
+        super(ASPP, self).__init__()
+        if rate == 1:
+            kernel_size = 1
+            padding = 0
+        else:
+            padding = rate * (kernel_size - 1) // 2
+        self.atrous_convolution = nn.Conv1d(
+            inplanes,
+            planes,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=padding,
+            dilation=rate,
+            bias=False,
+        )
+        self.bn = nn.BatchNorm1d(planes)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x = self.atrous_convolution(x)
+        x = self.bn(x)
+        return self.relu(x)
+class DeepLabV3Plus(nn.Module):
+    def __init__(self, config):
+        super(DeepLabV3Plus, self).__init__()
+        self.config = config
+        # output_stride: (input's spatial resolution / output's resolution)
+        output_stride = int(config.output_stride)
+        kernel_size = int(config.kernel_size)
+        middle_block_rate = int(config.middle_block_rate)
+        exit_block_rates: list = config.exit_block_rates
+        middle_repeat = int(config.middle_repeat)
+        self.interpolate_mode = str(config.interpolate_mode)
+        aspp_channel = int(config.aspp_channel)
+        aspp_rate: list = config.aspp_rate
+        output_size = config.output_size  # 3(p, qrs, t)
+        self.xception_features = Xception(
+            output_stride=output_stride,
+            kernel_size=kernel_size,
+            middle_repeat=middle_repeat,
+            middle_block_rate=middle_block_rate,
+            exit_block_rates=exit_block_rates,
+        )
+        # ASPP
+        self.aspp1 = ASPP(
+            2048, aspp_channel, rate=aspp_rate[0], kernel_size=kernel_size
+        )
+        self.aspp2 = ASPP(
+            2048, aspp_channel, rate=aspp_rate[1], kernel_size=kernel_size
+        )
+        self.aspp3 = ASPP(
+            2048, aspp_channel, rate=aspp_rate[2], kernel_size=kernel_size
+        )
+        self.aspp4 = ASPP(
+            2048, aspp_channel, rate=aspp_rate[3], kernel_size=kernel_size
+        )
+        self.relu = nn.ReLU()
+        self.global_avg_pool = nn.Sequential(
+            nn.AdaptiveAvgPool1d(1),
+            nn.Conv1d(2048, aspp_channel, 1, stride=1, bias=False),
+            nn.BatchNorm1d(aspp_channel),
+            nn.ReLU(),
+        )
+        self.conv1 = nn.Conv1d(aspp_channel * 5, aspp_channel, 1, bias=False)
+        self.bn1 = nn.BatchNorm1d(aspp_channel)
+        # adopt [1x1, 48] for channel reduction.
+        self.conv2 = nn.Conv1d(128, 48, 1, bias=False)
+        self.bn2 = nn.BatchNorm1d(48)
+        self.last_conv = nn.Sequential(
+            nn.Conv1d(
+                aspp_channel + 48,
+                256,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=(kernel_size - 1) // 2,
+                bias=False,
+            ),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Conv1d(
+                256,
+                256,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=(kernel_size - 1) // 2,
+                bias=False,
+            ),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Conv1d(256, output_size, kernel_size=1, stride=1),
+        )
+    def forward(self, input):
+        x, low_level_features = self.xception_features(input)
+        x1 = self.aspp1(x)
+        x2 = self.aspp2(x)
+        x3 = self.aspp3(x)
+        x4 = self.aspp4(x)
+        x5 = self.global_avg_pool(x)
+        x5 = F.interpolate(x5, size=x4.shape[2:], mode=self.interpolate_mode)
+        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = F.interpolate(
+            x, size=int(math.ceil(input.shape[-1] / 4)), mode=self.interpolate_mode
+        )
+        low_level_features = self.conv2(low_level_features)
+        low_level_features = self.bn2(low_level_features)
+        low_level_features = self.relu(low_level_features)
+        x = torch.cat((x, low_level_features), dim=1)
+        x = self.last_conv(x)
+        x = F.interpolate(x, size=input.shape[2:], mode=self.interpolate_mode)
+        return x

res/impl/FCN.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+paper: https://arxiv.org/abs/1605.06211
+ref: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/voc-fcn8s/net.py
+"""
+import torch
+import torch.nn as nn
+class FCN(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.kernel_size = int(config.kernel_size)
+        last_layer_kernel_size = int(config.last_layer_kernel_size)
+        inplanes = int(config.inplanes)
+        combine_conf: dict = config.combine_conf
+        self.num_layers = int(combine_conf["num_layers"])
+        self.first_padding = {6: 240, 5: 130, 4: 80}[self.num_layers]
+        self.num_convs = int(config.num_convs)
+        self.dilation = int(config.dilation)
+        self.combine_until = int(combine_conf["combine_until"])
+        assert self.combine_until < self.num_layers
+        dropout = float(config.dropout)
+        output_size = config.output_size  # 3(p, qrs, t)
+        self.layers = nn.ModuleList()
+        for i in range(self.num_layers):
+            self.layers.append(
+                self._make_layer(
+                    1 if i == 0 else inplanes * (2 ** (i - 1)),
+                    inplanes * (2 ** (i)),
+                    is_first=True if i == 0 else False,
+                )
+            )
+        # pool 단계가 없는 마지막 conv layer로 다른 layer 와 다르게 conv 개수(2)와 channel이 고정이고, dropout을 수행
+        self.layers.append(
+            nn.Sequential(
+                nn.Conv1d(inplanes * (2 ** (i)), 4096, last_layer_kernel_size),
+                nn.BatchNorm1d(4096),
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Conv1d(4096, 4096, 1),
+                nn.BatchNorm1d(4096),
+                nn.ReLU(),
+                nn.Dropout(dropout),
+            )
+        )
+        self.score_convs = []
+        self.up_convs = []
+        for i in range(self.combine_until, self.num_layers - 1):
+            # pool 결과를 combine 하는 만큼만 score_convs 와 up_convs 가 생성됨
+            self.score_convs.append(
+                nn.Conv1d(inplanes * (2 ** (i)), output_size, kernel_size=1, bias=False)
+            )
+            self.up_convs.append(
+                nn.ConvTranspose1d(output_size, output_size, kernel_size=4, stride=2)
+            )
+        # pool 이 없는 마지막 convs 결과에 수행하는 score_convs
+        # self.score_convs 는 항상 self.up_convs 의 개수보다 1개 더 많음
+        self.score_convs.append(nn.Conv1d(4096, output_size, kernel_size=1, bias=False))
+        self.score_convs.reverse()
+        self.score_convs = nn.ModuleList(self.score_convs)
+        self.up_convs = nn.ModuleList(self.up_convs)
+        self.last_up_convs = nn.ConvTranspose1d(
+            output_size,
+            output_size,
+            kernel_size=2 ** (self.combine_until + 1) * 2,  # stride * 2
+            stride=2 ** (self.combine_until + 1),
+        )
+    def _make_layer(
+        self,
+        in_channel: int,
+        out_channel: int,
+        is_first: bool = False,
+    ):
+        layer = []
+        plane = in_channel
+        for idx in range(self.num_convs):
+            layer.append(
+                nn.Conv1d(
+                    plane,
+                    out_channel,
+                    kernel_size=self.kernel_size,
+                    padding=self.first_padding
+                    if idx == 0 and is_first
+                    else (self.dilation * (self.kernel_size - 1)) // 2,
+                    dilation=self.dilation,
+                    bias=False,
+                )
+            )
+            layer.append(nn.BatchNorm1d(out_channel))
+            layer.append(nn.ReLU())
+            plane = out_channel
+        layer.append(nn.MaxPool1d(2, 2, ceil_mode=True))
+        return nn.Sequential(*layer)
+    def forward(self, input: torch.Tensor, y=None):
+        output: torch.Tensor = input
+        pools = []
+        for idx, layer in enumerate(self.layers):
+            output = layer(output)
+            if self.combine_until <= idx < (self.num_layers - 1):
+                pools.append(output)
+        pools.reverse()
+        output = self.score_convs[0](output)
+        if len(pools) > 0:
+            output = self.up_convs[0](output)
+            for i in range(len(pools)):
+                score_pool = self.score_convs[i + 1](pools[i])
+                offset = (score_pool.shape[2] - output.shape[2]) // 2
+                cropped_score_pool = torch.tensor_split(
+                    score_pool, (offset, offset + output.shape[2]), dim=2
+                )[1]
+                output = torch.add(cropped_score_pool, output)
+                if i < len(pools) - 1:  # 마지막 up_conv 는 last_up_convs 이용
+                    output = self.up_convs[i + 1](output)
+        output = self.last_up_convs(output)
+        offset = (output.shape[2] - input.shape[2]) // 2
+        cropped_score_pool = torch.tensor_split(
+            output, (offset, offset + input.shape[2]), dim=2
+        )[1]
+        return cropped_score_pool

res/impl/HRNetV2.py ADDED Viewed

	@@ -0,0 +1,378 @@

+"""
+paper: https://arxiv.org/abs/1904.04514
+ref: https://github.com/HRNet/HRNet-Semantic-Segmentation/blob/HRNet-OCR/lib/models/seg_hrnet.py
+"""
+import torch
+import torch.nn as nn
+from torch.functional import F
+import math
+def _gen_same_length_conv(in_channel, out_channel, kernel_size=1, dilation=1):
+    """길이가 변하지 않는 conv 생성, block 내에서 feature 를 추출하는 convolution 에서 사용"""
+    return nn.Conv1d(
+        in_channel,
+        out_channel,
+        kernel_size=kernel_size,
+        stride=1,
+        padding=(dilation * (kernel_size - 1)) // 2,
+        dilation=dilation,
+        bias=False,
+    )
+def _gen_downsample(in_channel, out_channel):
+    """kernel_size:3, stride:2, padding:1 인 2배 downsample 하는 conv 생성"""
+    return nn.Conv1d(
+        in_channel, out_channel, kernel_size=3, stride=2, padding=1, bias=False
+    )
+def _gen_channel_change_conv(in_channel, out_channel):
+    """kernel_size:1, stride:1 인 channel 변경하는 conv 생성"""
+    return nn.Conv1d(in_channel, out_channel, kernel_size=1, stride=1, bias=False)
+class BasicBlock(nn.Module):
+    """resnet 의 basic block 으로 channel 변화는 inplanes -> planes"""
+    expansion = 1
+    def __init__(self, inplanes, planes, kernel_size=3, dilation=1):
+        super().__init__()
+        self.conv1 = _gen_same_length_conv(inplanes, planes, kernel_size, dilation)
+        self.bn1 = nn.BatchNorm1d(planes)
+        self.relu = nn.ReLU()
+        self.conv2 = _gen_same_length_conv(planes, planes, kernel_size, dilation)
+        self.bn2 = nn.BatchNorm1d(planes)
+        self.make_residual = (
+            _gen_channel_change_conv(inplanes, planes)
+            if inplanes != planes
+            else nn.Identity()
+        )
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        residual = self.make_residual(x)
+        out = out + residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    """resnet 의 Bottleneck block 으로 channel 변화는 inplanes -> planes * 4"""
+    expansion = 4
+    def __init__(self, inplanes, planes, kernel_size=3, dilation=1):
+        super().__init__()
+        self.conv1 = _gen_same_length_conv(inplanes, planes)
+        self.bn1 = nn.BatchNorm1d(planes)
+        self.conv2 = _gen_same_length_conv(planes, planes, kernel_size, dilation)
+        self.bn2 = nn.BatchNorm1d(planes)
+        self.conv3 = _gen_same_length_conv(planes, planes * self.expansion)
+        self.bn3 = nn.BatchNorm1d(planes * self.expansion)
+        self.relu = nn.ReLU()
+        self.make_residual = (
+            _gen_channel_change_conv(inplanes, planes * self.expansion)
+            if inplanes != planes * self.expansion
+            else nn.Identity()
+        )
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        residual = self.make_residual(x)
+        out = out + residual
+        out = self.relu(out)
+        return out
+class HRModule(nn.Module):
+    def __init__(
+        self,
+        stage_idx,
+        num_blocks,
+        block_type_by_stage,
+        in_channels_by_stage,
+        out_channels_by_stage,
+        data_len_by_branch,
+        kernel_size,
+        dilation,
+        interpolate_mode,
+    ):
+        super().__init__()
+        self.branches = nn.ModuleList()
+        self.fusions = nn.ModuleList()
+        block_type: BasicBlock | Bottleneck = block_type_by_stage[stage_idx]
+        in_channels = in_channels_by_stage[stage_idx]
+        for i in range(stage_idx + 1):  # branch 생성
+            blocks_by_branch = []
+            _channels = in_channels[i]
+            blocks_by_branch.append(
+                block_type(_channels, _channels, kernel_size, dilation)
+            )
+            for _ in range(1, num_blocks):
+                blocks_by_branch.append(
+                    block_type(
+                        _channels * block_type.expansion,
+                        _channels,
+                        kernel_size,
+                        dilation,
+                    )
+                )
+            self.branches.append(nn.Sequential(*blocks_by_branch))
+        out_channels = out_channels_by_stage[stage_idx]
+        for i in range(stage_idx + 1):
+            fusion_by_branch = nn.ModuleList()
+            for j in range(stage_idx + 1):
+                if i < j:
+                    fusion_by_branch.append(
+                        nn.Sequential(
+                            _gen_channel_change_conv(out_channels[j], in_channels[i]),
+                            nn.BatchNorm1d(in_channels[i]),
+                            nn.Upsample(
+                                size=data_len_by_branch[i], mode=interpolate_mode
+                            ),
+                        )
+                    )
+                elif i == j:
+                    if out_channels[i] != in_channels[j]:
+                        fusion_by_branch.append(
+                            nn.Sequential(
+                                _gen_channel_change_conv(
+                                    out_channels[i], in_channels[j]
+                                ),
+                                nn.BatchNorm1d(in_channels[j]),
+                                nn.ReLU(),
+                            )
+                        )
+                    else:
+                        fusion_by_branch.append(nn.Identity())
+                else:
+                    # 차이나는 branch 만큼 2배씩 downsample, channel 은 현재 layer 의 in_channel 로 맞춰줌
+                    downsamples = [
+                        _gen_downsample(out_channels[j], in_channels[i]),
+                        nn.BatchNorm1d(in_channels[i]),
+                    ]
+                    for _ in range(1, i - j):
+                        downsamples.extend(
+                            [
+                                nn.ReLU(),
+                                _gen_downsample(in_channels[i], in_channels[i]),
+                                nn.BatchNorm1d(in_channels[i]),
+                            ]
+                        )
+                    fusion_by_branch.append(nn.Sequential(*downsamples))
+            self.fusions.append(fusion_by_branch)
+class HRNetV2(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        data_len = int(config.data_len)  # ECGPQRSTDataset.second, hz 에 맞춰서
+        kernel_size = int(config.kernel_size)
+        dilation = int(config.dilation)
+        num_stages = int(config.num_stages)
+        num_blocks = int(config.num_blocks)
+        self.num_modules = config.num_modules  # [1, 1, 4, 3, ..]
+        assert num_stages <= len(self.num_modules)
+        use_bottleneck = config.use_bottleneck  # [1, 0, 0, 0, ..]
+        assert num_stages <= len(use_bottleneck)
+        stage1_channels = int(config.stage1_channels)  # 64, 128
+        num_channels_init = int(config.num_channels_init)  # 18, 32, 48
+        self.interpolate_mode = config.interpolate_mode
+        output_size = config.output_size  # 3(p, qrs, t)
+        # stem
+        self.stem = nn.Sequential(
+            nn.Conv1d(
+                1, stage1_channels, kernel_size=3, stride=2, padding=1, bias=False
+            ),
+            nn.BatchNorm1d(stage1_channels),
+            nn.Conv1d(
+                stage1_channels,
+                stage1_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=False,
+            ),
+            nn.BatchNorm1d(stage1_channels),
+            nn.ReLU(),
+        )
+        for _ in range(2):  # stem 을 거친 이후 데이터 길이 계산
+            data_len = math.floor((data_len - 1) / 2 + 1)
+        # create meta: 네트워크 생성 전 각 stage 의 in_channel, out_channel 등의 정보를 먼저 만들고 시작
+        in_channels_by_stage = []
+        out_channels_by_stage = []
+        block_type_by_stage = []
+        for stage_idx in range(num_stages):
+            block_type_each_stage = (
+                Bottleneck if use_bottleneck[stage_idx] == 1 else BasicBlock
+            )
+            if stage_idx == 0:
+                in_channels_each_stage = [stage1_channels]
+                out_channels_each_stage = [
+                    stage1_channels * block_type_each_stage.expansion
+                ]
+                data_len_by_branch = [data_len]
+            else:
+                in_channels_each_stage = [
+                    num_channels_init * 2**idx for idx in range(stage_idx + 1)
+                ]
+                out_channels_each_stage = [
+                    (num_channels_init * 2**idx) * block_type_each_stage.expansion
+                    for idx in range(stage_idx + 1)
+                ]
+                data_len_by_branch.append(
+                    math.floor((data_len_by_branch[-1] - 1) / 2 + 1)
+                )
+            block_type_by_stage.append(block_type_each_stage)
+            in_channels_by_stage.append(in_channels_each_stage)
+            out_channels_by_stage.append(out_channels_each_stage)
+        # create stages
+        self.stages = nn.ModuleList()
+        for stage_idx in range(num_stages):
+            modules_by_stage = nn.ModuleList()
+            for _ in range(self.num_modules[stage_idx]):
+                modules_by_stage.append(
+                    HRModule(
+                        stage_idx,
+                        num_blocks,
+                        block_type_by_stage,
+                        in_channels_by_stage,
+                        out_channels_by_stage,
+                        data_len_by_branch,
+                        kernel_size,
+                        dilation,
+                        self.interpolate_mode,
+                    )
+                )
+            self.stages.append(modules_by_stage)
+        # create transition
+        self.transitions = nn.ModuleList()
+        for stage_idx in range(num_stages - 1):
+            # 여기에서 stage_idx 는 이전 stage 를 뜻함. transition 은 각 stage 사이에서 channel 을 바꿔주거나 새로운 branch 를 생성하는 역할
+            transition_by_stage = nn.ModuleList()
+            psc = in_channels_by_stage[stage_idx]  # psc: prev_stage_channels
+            nsc = in_channels_by_stage[stage_idx + 1]  # nsc: next_stage_channels
+            for nsbi in range(stage_idx + 2):  # nsbi: next_stage_branch_idx
+                if nsbi < stage_idx + 1:  # 동일한 branch level
+                    if psc[nsbi] != nsc[nsbi]:
+                        transition_by_stage.append(
+                            nn.Sequential(
+                                _gen_channel_change_conv(psc[nsbi], nsc[nsbi]),
+                                nn.BatchNorm1d(nsc[nsbi]),
+                                nn.ReLU(),
+                            )
+                        )
+                    else:
+                        transition_by_stage.append(nn.Identity())
+                else:  # create new branch from exists branches
+                    transition_from_branches = nn.ModuleList()
+                    for psbi in range(nsbi):
+                        # psbi: prev_stage_branch_idx
+                        transition_from_one_branch = [
+                            _gen_downsample(psc[psbi], nsc[nsbi]),
+                            nn.BatchNorm1d(nsc[nsbi]),
+                        ]
+                        for _ in range(1, nsbi - psbi):
+                            transition_from_one_branch.extend(
+                                [
+                                    nn.ReLU(),
+                                    _gen_downsample(nsc[nsbi], nsc[nsbi]),
+                                    nn.BatchNorm1d(nsc[nsbi]),
+                                ]
+                            )
+                        transition_from_branches.append(
+                            nn.Sequential(*transition_from_one_branch)
+                        )
+                    transition_by_stage.append(transition_from_branches)
+            self.transitions.append(transition_by_stage)
+        self.cls = nn.Conv1d(sum(in_channels_each_stage), output_size, 1, bias=False)
+    def forward(self, input: torch.Tensor, y=None):
+        output: torch.Tensor = input
+        output = self.stem(output)
+        outputs = [output]
+        for stage_idx, stage in enumerate(self.stages):
+            for module_idx in range(self.num_modules[stage_idx]):
+                for branch_idx in range(stage_idx + 1):
+                    outputs[branch_idx] = stage[module_idx].branches[branch_idx](
+                        outputs[branch_idx]
+                    )
+                fusion_outputs = []
+                for next in range(stage_idx + 1):
+                    fusion_output_from_branches = []
+                    for prev in range(stage_idx + 1):
+                        fusion_output_from_branch: torch.Tensor = stage[
+                            module_idx
+                        ].fusions[next][prev](outputs[prev])
+                        fusion_output_from_branches.append(fusion_output_from_branch)
+                    fusion_outputs.append(sum(fusion_output_from_branches))
+                outputs = fusion_outputs
+            if stage_idx < len(self.stages) - 1:
+                transition_outputs = []
+                for trans_idx, transition in enumerate(self.transitions[stage_idx]):
+                    # transition 에는 다음 stage 의 branch 개수만큼 Sequential 이나 ModuleList 가 존재
+                    # 앞의 Sequential 들은 channel 만 다음 stage 에 맞게 변경하거나 기존 그대로 사용 (Identity)
+                    # 마지막 ModuleList 각 branch 의 fusion 결과들을 downsample 한 결과들로부터 새로운 branch 를 생성
+                    if trans_idx < stage_idx + 1:
+                        transition_outputs.append(transition(outputs[trans_idx]))
+                    else:
+                        transition_outputs.append(
+                            sum(
+                                [
+                                    transition_from_each_branch(output)
+                                    for transition_from_each_branch, output in zip(
+                                        transition, outputs
+                                    )
+                                ]
+                            )
+                        )
+                outputs = transition_outputs
+        # HRNetV2
+        outputs = [
+            F.interpolate(output, size=outputs[0].shape[-1], mode=self.interpolate_mode)
+            for output in outputs
+        ]
+        output = torch.cat(outputs, dim=1)
+        return F.interpolate(
+            self.cls(output), size=input.shape[-1], mode=self.interpolate_mode
+        )

res/impl/PSPNet.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+paper: https://arxiv.org/abs/1612.01105
+ref:
+    - https://github.com/hszhao/PSPNet
+"""
+import torch
+from torch import nn
+from torch.functional import F
+class PPM(nn.Module):
+    """Pyramid Pooling Module"""
+    def __init__(self, in_dim, reduction_dim, bins, interplate_mode):
+        super(PPM, self).__init__()
+        self.features = []
+        for bin in bins:
+            self.features.append(
+                nn.Sequential(
+                    nn.AdaptiveAvgPool1d(bin),
+                    nn.Conv1d(in_dim, reduction_dim, kernel_size=1, bias=False),
+                    nn.BatchNorm1d(reduction_dim),
+                    nn.ReLU(),
+                )
+            )
+        self.features = nn.ModuleList(self.features)
+        self.interplate_mode = interplate_mode
+    def forward(self, x: torch.Tensor):
+        x_size = x.size()
+        out = [x]
+        for f in self.features:
+            out.append(F.interpolate(f(x), x_size[2], mode=self.interplate_mode))
+        return torch.cat(out, dim=1)
+class Bottleneck(nn.Module):
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        expansion=4,
+        kernel_size=3,
+        stride=1,
+        dilation=1,
+        padding=1,
+        downsample=None,
+    ):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv1d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm1d(planes)
+        self.conv2 = nn.Conv1d(
+            planes,
+            planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            dilation=dilation,
+            padding=padding,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm1d(planes)
+        self.conv3 = nn.Conv1d(planes, planes * expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm1d(planes * expansion)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class PSPNet(nn.Module):
+    def __init__(self, config):
+        super(PSPNet, self).__init__()
+        self.config = config
+        self.kernel_size = int(config.kernel_size)
+        self.padding = (self.kernel_size - 1) // 2
+        self.expansion = int(config.expansion)
+        self.inplanes = int(config.inplanes)
+        num_layers = int(config.num_layers)
+        self.num_bottlenecks = int(config.num_bottlenecks)
+        self.interpolate_mode = str(config.interpolate_mode)
+        self.dilation = int(config.dilation)
+        ppm_bins: list = config.ppm_bins
+        self.aux_idx = int(config.aux_idx)
+        assert self.aux_idx < num_layers
+        self.aux_ratio = float(config.aux_ratio)
+        dropout = float(config.dropout)
+        output_size = config.output_size  # 3(p, qrs, t)
+        # stem 단계에서 1/4 만큼 downsample 된 상태로 시작
+        self.stem = nn.Sequential(
+            *[
+                nn.Conv1d(
+                    1,
+                    self.inplanes,
+                    self.kernel_size,
+                    stride=2,
+                    padding=self.padding,
+                    bias=False,
+                ),
+                nn.BatchNorm1d(self.inplanes),
+                nn.ReLU(),
+                nn.MaxPool1d(self.kernel_size, stride=2, padding=self.padding),
+            ]
+        )
+        self.layers = []
+        plane = self.inplanes
+        for i in range(num_layers):
+            self.layers.append(self._make_layer(plane * (2 ** (i))))
+        self.layers = nn.ModuleList(self.layers)
+        encode_dim = self.inplanes
+        self.ppm = PPM(
+            encode_dim,
+            int(encode_dim / len(ppm_bins)),
+            ppm_bins,
+            self.interpolate_mode,
+        )
+        encode_dim *= 2
+        self.cls = nn.Sequential(
+            nn.Conv1d(
+                encode_dim,
+                512,
+                kernel_size=self.kernel_size,
+                padding=self.padding,
+                bias=False,
+            ),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout1d(dropout),
+            nn.Conv1d(512, output_size, kernel_size=1),
+        )
+        self.aux_branch = nn.Sequential(
+            # 추출하고자 하는 layer index 에 해당하는 channel 과 맞춰주어야 함
+            nn.Conv1d(
+                plane * self.expansion * (2**self.aux_idx),
+                256,
+                kernel_size=self.kernel_size,
+                padding=self.padding,
+                bias=False,
+            ),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout1d(0.1),
+            nn.Conv1d(256, output_size, kernel_size=1),
+        )
+    def _make_layer(self, planes: int):
+        """
+        self.num_bottlenecks 개의 bottleneck 으로 구성된 layer 를 반환
+        첫번째 bottleneck 에서 2 만큼 downsample 됨
+        두번째 이후부터의 bottleneck 에서 self.dilation 으로 dilated conv 수행
+        """
+        downsample = nn.Sequential(
+            nn.Conv1d(
+                self.inplanes,
+                planes * self.expansion,
+                kernel_size=1,
+                stride=2,
+                bias=False,
+            ),
+            nn.BatchNorm1d(planes * self.expansion),
+        )
+        bottlenecks = []
+        bottlenecks.append(
+            Bottleneck(
+                self.inplanes,
+                planes,
+                expansion=self.expansion,
+                kernel_size=self.kernel_size,
+                stride=2,
+                dilation=1,
+                padding=self.padding,
+                downsample=downsample,
+            )
+        )
+        self.inplanes = planes * self.expansion
+        for _ in range(1, self.num_bottlenecks):
+            bottlenecks.append(
+                Bottleneck(
+                    self.inplanes,
+                    planes,
+                    expansion=self.expansion,
+                    kernel_size=self.kernel_size,
+                    stride=1,
+                    dilation=self.dilation,
+                    padding=(self.dilation * (self.kernel_size - 1)) // 2,
+                )
+            )
+        return nn.Sequential(*bottlenecks)
+    def forward(self, input: torch.Tensor, y=None):
+        output: torch.Tensor = input
+        output = self.stem(output)
+        for i, _layer in enumerate(self.layers):
+            output = _layer(output)
+            if i == self.aux_idx:
+                aux = output
+        output = self.ppm(output)
+        output = self.cls(output)
+        output = F.interpolate(
+            output,
+            input.shape[2],
+            mode=self.interpolate_mode,
+        )
+        if self.training:
+            aux = self.aux_branch(aux)
+            aux = F.interpolate(
+                aux,
+                input.shape[2],
+                mode=self.interpolate_mode,
+            )
+            return torch.add(output * (1 - self.aux_ratio), aux * self.aux_ratio)
+        else:
+            return output

res/impl/SETR.py ADDED Viewed

	@@ -0,0 +1,291 @@

+"""
+paper: https://arxiv.org/abs/2012.15840
+- ref
+    - encoder:
+        - https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/backbones/vit.py
+        - https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/vit_1d.py
+    - decoder:
+        - https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/setr_up_head.py
+        - https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/setr_mla_head.py
+- encoder: ViT 와 구조가 동일하며, PatchEmbed 의 경우 patch_size를 kernel_size와 stride 로 하는 Conv1d를 사용
+- decoder: upsample 하는 방식으로 다음 두가지를 사용 (scale_factor: 특정 배수만큼 upsample / size: 특정 크기와 동일한 크기로 upsample)
+    - naive: 원본 길이로 size 방식 upsample
+    - pup: scale_factor 방식으로 수행하다가 마지막에 원본 길이로 size 방식으로 upsample
+    - mla: 총 두 단계로 수행하며, 첫번째 단계에서 transformer block 의 결과들을 scale_factor 방식으로 수행하고 두번째 단계에서 첫번째 결과들을 concat 한 후 size 방식으로 upsample
+"""
+import math
+import torch
+from torch import nn
+from einops import rearrange
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.0):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.LayerNorm(dim),
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        return self.net(x)
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head**-0.5
+        self.norm = nn.LayerNorm(dim)
+        self.attend = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+        self.to_out = (
+            nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+            if project_out
+            else nn.Identity()
+        )
+    def forward(self, x):
+        x = self.norm(x)
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
+        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+        attn = self.attend(dots)
+        attn = self.dropout(attn)
+        out = torch.matmul(attn, v)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        return self.to_out(out)
+# ========== 여기까지 https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/vit_1d.py 차용 ==========
+# ========== 아래부터 setr 원본 참고 https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/backbones/vit.py ==========
+class TransformerBlock(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_attn_heads,
+        attn_head_dim,
+        mlp_dim,
+        attn_dropout=0.0,
+        ffn_dropout=0.0,
+    ):
+        super().__init__()
+        self.attn = Attention(
+            dim, heads=num_attn_heads, dim_head=attn_head_dim, dropout=attn_dropout
+        )
+        self.ffn = FeedForward(dim, mlp_dim, dropout=ffn_dropout)
+    def forward(self, x):
+        x = self.attn(x) + x
+        x = self.ffn(x) + x
+        return x
+class PatchEmbed(nn.Module):
+    def __init__(
+        self,
+        embed_dim=1024,
+        kernel_size=16,
+        bias=False,
+    ):
+        super().__init__()
+        self.projection = nn.Conv1d(
+            in_channels=1,
+            out_channels=embed_dim,
+            kernel_size=kernel_size,
+            stride=kernel_size,
+            bias=bias,
+        )
+    def forward(self, x: torch.Tensor):
+        return self.projection(x).transpose(1, 2)
+class SETR(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        embed_dim = int(config.embed_dim)
+        data_len = int(config.data_len)  # ECGPQRSTDataset.second, hz 에 맞춰서
+        patch_size = int(config.patch_size)
+        assert data_len % patch_size == 0
+        num_patches = data_len // patch_size
+        patch_bias = bool(config.patch_bias)
+        dropout = float(config.dropout)
+        # pos_dropout_p: float = config.pos_dropout_p   # 파라미터라 너무 많으므로 우선 dropout 개수는 하나로 사용
+        num_layers = int(config.num_layers)  # transformer block 개수
+        num_attn_heads = int(config.num_attn_heads)
+        attn_head_dim = int(config.attn_head_dim)
+        mlp_dim = int(config.mlp_dim)
+        # attn_dropout: float = config.attn_dropout
+        # ffn_dropout: float = config.ffn_dropout
+        interpolate_mode = str(config.interpolate_mode)
+        dec_conf: dict = config.dec_conf
+        assert len(dec_conf) == 1
+        self.dec_mode: str = list(dec_conf.keys())[0]
+        assert self.dec_mode in ["naive", "pup", "mla"]
+        self.dec_param: dict = dec_conf[self.dec_mode]
+        output_size = int(config.output_size)
+        # patch embedding
+        self.patch_embed = PatchEmbed(
+            embed_dim=embed_dim,
+            kernel_size=patch_size,
+            bias=patch_bias,
+        )
+        # positional embedding
+        self.pos_embed = nn.Parameter(torch.randn(1, num_patches, embed_dim))
+        self.pos_dropout = nn.Dropout(p=dropout)
+        # transformer encoder
+        self.layers = nn.ModuleList()
+        for _ in range(num_layers):
+            self.layers.append(
+                TransformerBlock(
+                    dim=embed_dim,
+                    num_attn_heads=num_attn_heads,
+                    attn_head_dim=attn_head_dim,
+                    mlp_dim=mlp_dim,
+                    attn_dropout=dropout,
+                    ffn_dropout=dropout,
+                )
+            )
+        # decoder
+        self.dec_layers = nn.ModuleList()
+        if self.dec_mode == "naive":
+            self.dec_layers.append(nn.Upsample(size=data_len, mode=interpolate_mode))
+            dec_out_channel = embed_dim
+        elif self.dec_mode == "pup":
+            self.dec_layers.append(nn.LayerNorm(embed_dim))
+            dec_up_scale = int(self.dec_param["up_scale"])
+            available_up_count = int(
+                math.log(data_len // num_patches, dec_up_scale)
+            )  # scale_factor 방법으로 upsample 할 수 있는 단계 계산, 나머지는 size 방법으로 upsample
+            pup_channels = int(self.dec_param["channels"])
+            dec_in_channel = embed_dim
+            dec_out_channel = pup_channels
+            dec_kernel_size = int(self.dec_param["kernel_size"])
+            dec_num_convs_by_layer = int(self.dec_param["num_convs_by_layer"])
+            assert dec_kernel_size in [1, 3]  # 원본 코드 그대로
+            for i in range(available_up_count + 1):
+                for _ in range(dec_num_convs_by_layer):
+                    self.dec_layers.append(
+                        nn.Conv1d(
+                            dec_in_channel,
+                            dec_out_channel,
+                            kernel_size=dec_kernel_size,
+                            stride=1,
+                            padding=(dec_kernel_size - 1) // 2,
+                        )
+                    )
+                    dec_in_channel = dec_out_channel
+                if i < available_up_count:
+                    self.dec_layers.append(
+                        nn.Upsample(scale_factor=dec_up_scale, mode=interpolate_mode)
+                    )
+                else:  # last upsample
+                    self.dec_layers.append(
+                        nn.Upsample(size=data_len, mode=interpolate_mode)
+                    )
+        else:  # mla
+            dec_up_scale = int(self.dec_param["up_scale"])
+            assert (
+                data_len >= dec_up_scale * num_patches
+            )  # transformer 중간 결과를 up_scale 만큼 upsample 했을 때 원본 보다는 작아야 최종 upsample 이 의미가 있음
+            dec_output_step = int(self.dec_param["output_step"])
+            assert num_layers % dec_output_step == 0
+            dec_num_convs_by_layer = int(self.dec_param["num_convs_by_layer"])
+            dec_kernel_size = int(self.dec_param["kernel_size"])
+            mid_feature_cnt = num_layers // dec_output_step
+            mla_channel = int(self.dec_param["channels"])
+            for _ in range(mid_feature_cnt):
+                # transformer block 중간 결과에서 각 step 별로 추출한 feature map 에 적용할 conv-upsample
+                dec_in_channel = embed_dim
+                dec_layers_each_upsample = []
+                for _ in range(dec_num_convs_by_layer):
+                    dec_layers_each_upsample.append(
+                        nn.Conv1d(
+                            dec_in_channel,
+                            mla_channel,
+                            kernel_size=dec_kernel_size,
+                            stride=1,
+                            padding=(dec_kernel_size - 1) // 2,
+                        )
+                    )
+                    dec_in_channel = mla_channel
+                    dec_layers_each_upsample.append(
+                        nn.Upsample(scale_factor=dec_up_scale, mode=interpolate_mode)
+                    )
+                self.dec_layers.append(nn.Sequential(*dec_layers_each_upsample))
+            # last decoder layer: 중간 feature map 을 concat 한 이후, upsample
+            self.dec_layers.append(nn.Upsample(size=data_len, mode=interpolate_mode))
+            dec_out_channel = (
+                mla_channel * mid_feature_cnt
+            )  # self.dec_layers 를 transformer 중간 결과들에 적용한 feature map 개수(mid_feature_cnt)만큼 channel-wise concat 하기 때문에 그만큼 증가된 channel ��� 아래 self.cls 의 in_channel 로 사용되어어야 함
+        self.cls = nn.Conv1d(dec_out_channel, output_size, 1, bias=False)
+    def forward(self, input: torch.Tensor, y=None):
+        output = input
+        # patch embedding
+        output = self.patch_embed(output)
+        # positional embedding
+        output += self.pos_embed
+        output = self.pos_dropout(output)
+        outputs = []
+        # transformer encoder
+        for i, layer in enumerate(self.layers):
+            output = layer(output)
+            if self.dec_mode == "mla":
+                if (i + 1) % int(self.dec_param["output_step"]) == 0:
+                    outputs.append(output.transpose(1, 2))
+        if self.dec_mode != "mla":  # mla 의 경우 위에서 이미 추가
+            outputs.append(output.transpose(1, 2))
+        # decoder
+        if self.dec_mode == "naive":
+            assert len(outputs) == 1
+            output = outputs[0]
+            output = self.dec_layers[0](output)
+        elif self.dec_mode == "pup":
+            assert len(outputs) == 1
+            output = outputs[0]
+            pup_norm = self.dec_layers[0]
+            output = pup_norm(output.transpose(1, 2)).transpose(1, 2)
+            for i, dec_layer in enumerate(self.dec_layers[1:]):
+                output = dec_layer(output)
+        else:  # mla
+            dec_output_step = int(self.dec_param["output_step"])
+            mid_feature_cnt = len(self.layers) // dec_output_step
+            assert len(outputs) == mid_feature_cnt
+            for i in range(len(outputs)):
+                outputs[i] = self.dec_layers[i](outputs[i])
+            output = torch.cat(outputs, dim=1)
+            output = self.dec_layers[-1](output)
+        return self.cls(output)

res/impl/SegFormer.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+paper: https://arxiv.org/abs/2105.15203
+- ref:
+    - encoder:
+        - https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py
+        - https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/backbones/mit.py
+    - decoder:
+        - https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/decode_heads/segformer_head.py
+        - https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/segformer_head.py
+"""
+import torch
+from torch import nn
+from torch.functional import F
+import math
+from einops import rearrange
+class MixFFN(nn.Module):
+    def __init__(self, embed_dim, channels, dropout=0.0):
+        super().__init__()
+        self.layers = nn.Sequential(
+            nn.Conv1d(  # fc1
+                in_channels=embed_dim, out_channels=channels, kernel_size=1, stride=1
+            ),
+            nn.Conv1d(  # position embed (depthwise-separable)
+                in_channels=channels,
+                out_channels=channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                groups=channels,
+            ),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Conv1d(  # fc2
+                in_channels=channels, out_channels=embed_dim, kernel_size=1
+            ),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        out = x.transpose(1, 2)
+        out = self.layers(out)
+        out = out.transpose(1, 2)
+        return out
+class EfficientMultiheadAttention(nn.Module):
+    """
+    PVT(Pyramid Vision Transformer)에서 사용한 Spatial-Reduction Attention 을 차용
+    변수명 중 sr 은 Spatial-Reduction 의 약어
+    """
+    def __init__(
+        self, embed_dim, num_heads=8, attn_drop=0.0, proj_drop=0.0, sr_ratio=1
+    ):
+        super().__init__()
+        assert (
+            embed_dim % num_heads == 0
+        ), f"dim {embed_dim} should be divided by num_heads {num_heads}."
+        self.num_heads = num_heads
+        head_dim = embed_dim // num_heads
+        self.scale = head_dim**-0.5
+        self.q = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.kv = nn.Linear(embed_dim, embed_dim * 2, bias=False)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(embed_dim, embed_dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv1d(
+                embed_dim, embed_dim, kernel_size=sr_ratio, stride=sr_ratio
+            )
+            self.norm = nn.LayerNorm(embed_dim)
+    def forward(self, x):
+        B, N, C = x.shape
+        q = self.q(x)
+        q = rearrange(q, "b n (h c) -> b h n c", h=self.num_heads)
+        if self.sr_ratio > 1:
+            x_ = x.transpose(1, 2)
+            x_ = self.sr(x_).transpose(1, 2)
+            x_ = self.norm(x_)
+            kv = self.kv(x_)
+            kv = rearrange(
+                kv,
+                "b n (two_heads h c) -> two_heads b h n c",
+                two_heads=2,
+                h=self.num_heads,
+            )
+        else:
+            kv = self.kv(x)
+            kv = rearrange(
+                kv,
+                "b n (two_heads h c) -> two_heads b h n c",
+                two_heads=2,
+                h=self.num_heads,
+            )
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2)
+        x = x.reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class TransformerBlock(nn.Module):
+    def __init__(self, embed_dim, num_heads, ffn_channels, dropout=0.2, sr_ratio=1):
+        super().__init__()
+        self.attn = nn.Sequential(
+            nn.LayerNorm(embed_dim),
+            EfficientMultiheadAttention(
+                embed_dim=embed_dim,
+                num_heads=num_heads,
+                attn_drop=dropout,
+                proj_drop=dropout,
+                sr_ratio=sr_ratio,
+            ),
+        )
+        self.ffn = nn.Sequential(
+            nn.LayerNorm(embed_dim),
+            MixFFN(embed_dim=embed_dim, channels=ffn_channels, dropout=dropout),
+        )
+    def forward(self, x):
+        x = x + self.attn(x)
+        x = x + self.ffn(x)
+        return x
+class PatchEmbed(nn.Module):
+    def __init__(
+        self,
+        in_channels=1,
+        embed_dim=1024,
+        kernel_size=7,
+        stride=4,
+        padding=3,
+        bias=False,
+    ):
+        super().__init__()
+        self.projection = nn.Conv1d(
+            in_channels=in_channels,
+            out_channels=embed_dim,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=bias,
+        )
+    def forward(self, x: torch.Tensor):
+        return self.projection(x).transpose(1, 2)
+class MiT(nn.Module):
+    """MixVisionTransformer"""
+    def __init__(
+        self,
+        embed_dim=512,
+        num_blocks=[2, 2, 6, 2],
+        num_heads=[1, 2, "ceil"],
+        sr_ratios=[1, 2, "ceil"],
+        mlp_ratio=4,
+        dropout=0.2,
+    ):
+        super().__init__()
+        num_stages = len(num_blocks)
+        round_func = getattr(math, num_heads[2])  # math.ceil or match.floor
+        num_heads = [
+            round_func((num_heads[0] * math.pow(num_heads[1], itr)))
+            for itr in range(num_stages)
+        ]
+        round_func = getattr(math, sr_ratios[2])  # math.ceil or match.floor
+        sr_ratios = [
+            round_func(sr_ratios[0] * math.pow(sr_ratios[1], itr))
+            for itr in range(num_stages)
+        ]
+        sr_ratios.reverse()
+        self.embed_dims = [embed_dim * num_head for num_head in num_heads]
+        patch_kernel_sizes = [7]  # [7, 3, 3, ..]
+        patch_kernel_sizes.extend([3] * (num_stages - 1))
+        patch_strides = [4]  # [4, 2, 2, ..]
+        patch_strides.extend([2] * (num_stages - 1))
+        patch_paddings = [3]  # [3, 1, 1, ..]
+        patch_paddings.extend([1] * (num_stages - 1))
+        in_channels = 1
+        self.stages = nn.ModuleList()
+        for i, num_block in enumerate(num_blocks):
+            patch_embed = PatchEmbed(
+                in_channels=in_channels,
+                embed_dim=self.embed_dims[i],
+                kernel_size=patch_kernel_sizes[i],
+                stride=patch_strides[i],
+                padding=patch_paddings[i],
+            )
+            blocks = nn.ModuleList(
+                [
+                    TransformerBlock(
+                        embed_dim=self.embed_dims[i],
+                        num_heads=num_heads[i],
+                        ffn_channels=mlp_ratio * self.embed_dims[i],
+                        dropout=dropout,
+                        sr_ratio=sr_ratios[i],
+                    )
+                    for _ in range(num_block)
+                ]
+            )
+            in_channels = self.embed_dims[i]
+            norm = nn.LayerNorm(self.embed_dims[i])
+            self.stages.append(nn.ModuleList([patch_embed, blocks, norm]))
+    def forward(self, x):
+        outs = []
+        for stage in self.stages:
+            x = stage[0](x)  # patch embed
+            for block in stage[1]:  # transformer blocks
+                x = block(x)
+            x = stage[2](x)  # norm
+            x = x.transpose(1, 2)
+            outs.append(x)
+        return outs
+class SegFormer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        embed_dim = int(config.embed_dim)
+        num_blocks = config.num_blocks
+        num_heads = config.num_heads
+        assert len(num_heads) == 3 and num_heads[2] in ["floor", "ceil"]
+        sr_ratios = config.sr_ratios
+        assert len(sr_ratios) == 3 and sr_ratios[2] in ["floor", "ceil"]
+        mlp_ratio = int(config.mlp_ratio)
+        dropout = float(config.dropout)
+        decoder_channels = int(config.decoder_channels)
+        self.interpolate_mode = str(config.interpolate_mode)
+        output_size = int(config.output_size)
+        self.MiT = MiT(embed_dim, num_blocks, num_heads, sr_ratios, mlp_ratio, dropout)
+        num_stages = len(num_blocks)
+        self.decode_mlps = nn.ModuleList(
+            [
+                nn.Conv1d(self.MiT.embed_dims[i], decoder_channels, 1, bias=False)
+                for i in range(num_stages)
+            ]
+        )
+        self.decode_fusion = nn.Conv1d(
+            decoder_channels * num_stages, decoder_channels, 1, bias=False
+        )
+        self.cls = nn.Conv1d(decoder_channels, output_size, 1, bias=False)
+    def forward(self, input: torch.Tensor, y=None):
+        output = input
+        output = self.MiT(output)
+        for i, (_output, decode_mlp) in enumerate(zip(output, self.decode_mlps)):
+            _output = decode_mlp(_output)
+            if i != 0:
+                _output = F.interpolate(
+                    _output, size=output[0].shape[2], mode=self.interpolate_mode
+                )
+            output[i] = _output
+        output = torch.concat(output, dim=1)
+        output = self.decode_fusion(output)
+        output = self.cls(output)
+        return F.interpolate(output, size=input.shape[2], mode=self.interpolate_mode)

res/impl/UNet3PlusDeepSup.py ADDED Viewed

	@@ -0,0 +1,241 @@

+"""
+paper: https://arxiv.org/abs/2004.08790
+ref: https://github.com/ZJUGiveLab/UNet-Version/blob/master/models/UNet_3Plus.py
+"""
+import torch
+from torch import nn
+from torch.functional import F
+class UNetConv(nn.Module):
+    def __init__(
+        self,
+        in_size,
+        out_size,
+        is_batchnorm=True,
+        num_layers=2,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+    ):
+        super().__init__()
+        self.num_layers = num_layers
+        for i in range(num_layers):
+            seq = [nn.Conv1d(in_size, out_size, kernel_size, stride, padding)]
+            if is_batchnorm:
+                seq.append(nn.BatchNorm1d(out_size))
+            seq.append(nn.ReLU())
+            conv = nn.Sequential(*seq)
+            setattr(self, "conv%d" % i, conv)
+            in_size = out_size
+    def forward(self, inputs):
+        x = inputs
+        for i in range(self.num_layers):
+            conv = getattr(self, "conv%d" % i)
+            x = conv(x)
+        return x
+class UNet3PlusDeepSup(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        inplanes = int(config.inplanes)
+        kernel_size = int(config.kernel_size)
+        padding = (kernel_size - 1) // 2
+        num_encoder_layers = int(config.num_encoder_layers)
+        encoder_batchnorm = bool(config.encoder_batchnorm)
+        self.num_depths = int(config.num_depths)
+        self.interpolate_mode = str(config.interpolate_mode)
+        dropout = float(config.dropout)
+        self.use_cgm = bool(config.use_cgm)
+        # sum_of_sup == True: 모든 sup 을 elementwise sum 하여 하나의 dense map 을 만들어 label 과 loss 를 구함
+        # sum_of_sup == False: 각 sup 과 label의 loss 를 각각 구하여 하나의 loss 에 저장
+        self.sum_of_sup = bool(config.sum_of_sup)
+        # TrialSetup._init_network_params 에서 설정됨
+        self.output_size: int = config.output_size
+        # Encoder
+        self.encoders = torch.nn.ModuleList()
+        for i in range(self.num_depths):
+            """(MaxPool - UNetConv) 를 수행하는 것이 하나의 depth 이고, 예외적으로 첫번째 depth 의 encode 결과는 (UNetConv)만 수행한 것"""
+            _encoders = []
+            if i != 0:
+                _encoders.append(nn.MaxPool1d(2))
+            _encoders.append(
+                UNetConv(
+                    1 if i == 0 else (inplanes * (2 ** (i - 1))),
+                    inplanes * (2**i),
+                    is_batchnorm=encoder_batchnorm,
+                    num_layers=num_encoder_layers,
+                    kernel_size=kernel_size,
+                    stride=1,
+                    padding=padding,
+                )
+            )
+            self.encoders.append(nn.Sequential(*_encoders))
+        # CGM: Classification-Guided Module
+        if self.use_cgm:
+            self.cls = nn.Sequential(
+                nn.Dropout(dropout),
+                nn.Conv1d(
+                    inplanes * (2 ** (self.num_depths - 1)), 2 * self.output_size, 1
+                ),
+                nn.AdaptiveMaxPool1d(1),
+                nn.Sigmoid(),
+            )
+        # Decoder
+        self.up_channels = inplanes * self.num_depths
+        self.decoders = torch.nn.ModuleList()
+        for i in reversed(range(self.num_depths - 1)):
+            """
+            각 decoder 는 각 encode 결과를 MaxPool 하거나 그대로(Conv,BatchNorm,Relu 만) 사용하거나 Upsample 된 결과를 수행하고 concat 하여 (Conv,BatchNorm,Relu)를 수행할 수 있도록 구성
+            다만, Upsample 은 encode 결과와 size 를 맞추기 간편하도록 forward 단계에서 torch.functional.interpolate() 로 수행
+            """
+            # 각 단계별 decoder 는 항상 num_depths 만큼 구성되고 내부적으로 MaxPool/그대로/Upsample 수행할지가 달라짐
+            _decoders = torch.nn.ModuleList()
+            for j in range(self.num_depths):
+                _each_decoders = []
+                if j < i:
+                    _each_decoders.append(nn.MaxPool1d(2 ** (i - j), ceil_mode=True))
+                if i < j < self.num_depths - 1:
+                    _each_decoders.append(
+                        nn.Conv1d(
+                            inplanes * self.num_depths,
+                            inplanes,
+                            kernel_size,
+                            padding=padding,
+                        )
+                    )
+                else:
+                    _each_decoders.append(
+                        nn.Conv1d(
+                            inplanes * (2**j), inplanes, kernel_size, padding=padding
+                        )
+                    )
+                _each_decoders.append(nn.BatchNorm1d(inplanes))
+                _each_decoders.append(nn.ReLU())
+                _decoders.append(nn.Sequential(*_each_decoders))
+            _decoders.append(
+                nn.Sequential(
+                    nn.Conv1d(
+                        self.up_channels, self.up_channels, kernel_size, padding=padding
+                    ),
+                    nn.BatchNorm1d(self.up_channels),
+                    nn.ReLU(),
+                )
+            )
+            self.decoders.append(_decoders)
+        # 앞 conv 들은 in channel 이 up_channels(inplanes*num_depths(원본에서는 320)), 마지막 conv 는 마지막 encoder 결과의 output_channel 과 맞춤
+        self.sup_conv = torch.nn.ModuleList()
+        for i in range(self.num_depths - 1):
+            self.sup_conv.append(
+                nn.Sequential(
+                    nn.Conv1d(
+                        self.up_channels, self.output_size, kernel_size, padding=padding
+                    ),
+                    nn.BatchNorm1d(self.output_size),
+                    nn.ReLU(),
+                )
+            )
+        self.sup_conv.append(
+            nn.Sequential(
+                nn.Conv1d(
+                    inplanes * (2 ** (self.num_depths - 1)),
+                    self.output_size,
+                    kernel_size,
+                    padding=padding,
+                ),
+                nn.BatchNorm1d(self.output_size),
+                nn.ReLU(),
+            )
+        )
+    def forward(self, input: torch.Tensor, y=None):
+        # Encoder
+        output = input
+        enc_features = []  # X1Ee, X2Ee, .. , X5Ee
+        dec_features = []  # X5Ee, X4De, .. , X1De
+        for encoder in self.encoders:
+            output = encoder(output)
+            enc_features.append(output)
+        dec_features.append(output)
+        # CGM
+        cls_branch_max = None
+        if self.use_cgm:
+            # (B, 2*3(output_size), 1)
+            cls_branch: torch.Tensor = self.cls(enc_features[-1])
+            # (B, 3(output_size))
+            cls_branch_max = cls_branch.view(
+                input.shape[0], self.output_size, 2
+            ).argmax(2)
+        # Decoder
+        for i in reversed(range(self.num_depths - 1)):
+            _each_dec_feature = []
+            for j in range(self.num_depths):
+                if j <= i:
+                    _each_enc = enc_features[j]
+                else:
+                    _each_enc = F.interpolate(
+                        dec_features[self.num_depths - j - 1],
+                        enc_features[i].shape[2],
+                        mode=self.interpolate_mode,
+                    )
+                _each_dec_feature.append(
+                    self.decoders[self.num_depths - i - 2][j](_each_enc)
+                )
+            dec_features.append(
+                self.decoders[self.num_depths - i - 2][-1](
+                    torch.cat(_each_dec_feature, dim=1)
+                )
+            )
+        sup = []
+        for i, (dec_feature, sup_conv) in enumerate(
+            zip(dec_features, reversed(self.sup_conv))
+        ):
+            if i < self.num_depths - 1:
+                sup.append(
+                    F.interpolate(
+                        sup_conv(dec_feature),
+                        input.shape[2],
+                        mode=self.interpolate_mode,
+                    )
+                )
+            else:
+                sup.append(sup_conv(dec_feature))
+        if self.use_cgm:
+            if self.sum_of_sup:
+                return torch.sigmoid(
+                    sum(
+                        [
+                            torch.einsum("ijk,ij->ijk", [_sup, cls_branch_max])
+                            for _sup in reversed(sup)
+                        ]
+                    )
+                )
+            else:
+                return [
+                    torch.sigmoid(
+                        torch.einsum("ijk,ij->ijk", [_sup, cls_branch_max])
+                        for _sup in reversed(sup)
+                    )
+                ]
+        else:
+            if self.sum_of_sup:
+                return torch.sigmoid(sum(sup))
+            else:
+                return [torch.sigmoid(_sup) for _sup in reversed(sup)]

res/models/hrnetv2/best_config.json DELETED Viewed

@@ -1,151 +0,0 @@
-{
-    "train": {
-        "progress": true,
-        "random_seed": 2407041220,
-        "resume_dir": [],
-        "checkpoint_dir": "/bfai/nfs_export/workspace/share/result/wogh/hrnet/train-240704_123013",
-        "checkpoint_save_freq": 1,
-        "working_dir": "",
-        "user": "wogh",
-        "name": "hrnet",
-        "exp_name": "wogh:hrnet",
-        "type": "supervised",
-        "task": "segmentation",
-        "epochs": 501,
-        "batch_size": 64,
-        "hpo": {
-            "num_samples": 256,
-            "criteria": {
-                "jaccard_avg": 1
-            },
-            "scheduler": {
-                "ASHAScheduler": {
-                    "grace_period": 200,
-                    "max_t": 501
-                }
-            }
-        },
-        "label": {
-            "num_labels": 3,
-            "path": [
-                "/bfai/nfs_export/workspace/share/labels/pqrst/ludb/train.csv",
-                "/bfai/nfs_export/workspace/share/labels/pqrst/ludb/valid.csv",
-                "/bfai/nfs_export/workspace/share/labels/pqrst/ludb/test.csv"
-            ],
-            "target": [
-                "p_onoffs",
-                "qrs_onoffs",
-                "t_onoffs"
-            ],
-            "split_ratio": [
-                1,
-                1,
-                1
-            ]
-        },
-        "resource_per_trial": {
-            "num_workers": 1,
-            "num_gpus_per_worker": 1,
-            "num_cpus_per_worker": 16
-        },
-        "comment": "",
-        "tracking": true,
-        "available_resources": {
-            "available_gpus": 16.0
-        }
-    },
-    "solver": {
-        "SolverPQRST": {
-            "mixed_precision": true,
-            "gradient_clip": 0.1
-        }
-    },
-    "datasets": [
-        {
-            "ECGPQRSTDataset": {
-                "lead_type": [
-                    "I",
-                    "II",
-                    "III",
-                    "aVR",
-                    "aVL",
-                    "aVF",
-                    "V1",
-                    "V2",
-                    "V3",
-                    "V4",
-                    "V5",
-                    "V6"
-                ],
-                "aux_data": [],
-                "normalization": "z_norm",
-                "second": 10,
-                "hz": 500
-            }
-        }
-    ],
-    "models": [
-        {
-            "network": {
-                "HRNetV2": {
-                    "data_len": 5000,
-                    "kernel_size": 5,
-                    "dilation": 1,
-                    "num_stages": 3,
-                    "num_blocks": 6,
-                    "num_modules": [
-                        1,
-                        1,
-                        1,
-                        4,
-                        3
-                    ],
-                    "use_bottleneck": [
-                        1,
-                        0,
-                        0,
-                        0,
-                        0
-                    ],
-                    "stage1_channels": 128,
-                    "num_channels_init": 48,
-                    "interpolate_mode": "linear",
-                    "task": "segmentation",
-                    "num_leads": 12,
-                    "num_aux": 0,
-                    "output_size": 3,
-                    "aux_output_size": 0
-                }
-            },
-            "optimizer": [
-                {
-                    "SGD": {
-                        "lr": 0.0983058839402403,
-                        "momentum": 0.9,
-                        "weight_decay": 0.0003850652731758502,
-                        "sharpness_min": false
-                    }
-                }
-            ],
-            "scheduler": [
-                {
-                    "PolynomialLR": {
-                        "total_iters": 501,
-                        "power": 0.0
-                    }
-                }
-            ]
-        }
-    ],
-    "loss_fns": [
-        {
-            "BCEWithLogitsLoss": {}
-        }
-    ],
-    "cur_epoch": 358,
-    "cutoff": [
-        0.001163482666015625,
-        0.15087890625,
-        -0.587890625
-    ]
-}