|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
import torch.nn.functional as F
|
|
|
from einops import rearrange
|
|
|
import timm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DoubleConv(nn.Module):
|
|
|
def __init__(self, in_ch, out_ch):
|
|
|
super().__init__()
|
|
|
self.block = nn.Sequential(
|
|
|
nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
|
|
|
nn.BatchNorm2d(out_ch),
|
|
|
nn.ReLU(inplace=True),
|
|
|
nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
|
|
|
nn.BatchNorm2d(out_ch),
|
|
|
nn.ReLU(inplace=True),
|
|
|
)
|
|
|
|
|
|
def forward(self, x):
|
|
|
return self.block(x)
|
|
|
|
|
|
|
|
|
class UpBlock(nn.Module):
|
|
|
"""
|
|
|
Upsample (bilinear) + concat skip + DoubleConv
|
|
|
NO transposed convolutions → no grid artifacts
|
|
|
"""
|
|
|
def __init__(self, in_ch, skip_ch, out_ch):
|
|
|
super().__init__()
|
|
|
self.conv = DoubleConv(in_ch + skip_ch, out_ch)
|
|
|
|
|
|
def forward(self, x, skip):
|
|
|
x = F.interpolate(x, size=skip.shape[2:], mode="bilinear", align_corners=False)
|
|
|
x = torch.cat([x, skip], dim=1)
|
|
|
return self.conv(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class model(nn.Module):
|
|
|
def __init__(
|
|
|
self,
|
|
|
in_channels=3,
|
|
|
num_classes=15,
|
|
|
freeze_encoder=False,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.encoder = timm.create_model(
|
|
|
"swinv2_tiny_window8_256",
|
|
|
pretrained=True,
|
|
|
features_only=True,
|
|
|
out_indices=(0, 1, 2, 3),
|
|
|
)
|
|
|
|
|
|
if freeze_encoder:
|
|
|
for p in self.encoder.parameters():
|
|
|
p.requires_grad = False
|
|
|
|
|
|
|
|
|
old_proj = self.encoder.patch_embed.proj
|
|
|
self.encoder.patch_embed.proj = nn.Conv2d(
|
|
|
in_channels=in_channels,
|
|
|
out_channels=old_proj.out_channels,
|
|
|
kernel_size=old_proj.kernel_size,
|
|
|
stride=old_proj.stride,
|
|
|
padding=old_proj.padding,
|
|
|
bias=old_proj.bias is not None,
|
|
|
)
|
|
|
|
|
|
|
|
|
c0, c1, c2, c3 = self.encoder.feature_info.channels()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.up3 = UpBlock(c3, c2, c2)
|
|
|
self.up2 = UpBlock(c2, c1, c1)
|
|
|
self.up1 = UpBlock(c1, c0, c0)
|
|
|
|
|
|
self.refine = nn.Sequential(
|
|
|
nn.Conv2d(c0, c0, 3, padding=1),
|
|
|
nn.ReLU(inplace=True),
|
|
|
nn.Conv2d(c0, c0, 3, padding=1),
|
|
|
nn.ReLU(inplace=True),
|
|
|
)
|
|
|
|
|
|
self.head = nn.Conv2d(c0, num_classes, kernel_size=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
f0, f1, f2, f3 = self.encoder(x)
|
|
|
|
|
|
|
|
|
f0 = rearrange(f0, "b h w c -> b c h w")
|
|
|
f1 = rearrange(f1, "b h w c -> b c h w")
|
|
|
f2 = rearrange(f2, "b h w c -> b c h w")
|
|
|
f3 = rearrange(f3, "b h w c -> b c h w")
|
|
|
|
|
|
|
|
|
d3 = self.up3(f3, f2)
|
|
|
d2 = self.up2(d3, f1)
|
|
|
d1 = self.up1(d2, f0)
|
|
|
|
|
|
d1 = self.refine(d1)
|
|
|
|
|
|
out = F.interpolate(
|
|
|
d1, size=x.shape[2:], mode="bilinear", align_corners=False
|
|
|
)
|
|
|
|
|
|
return self.head(out)
|
|
|
|