Timerns
/

GlacialLakes

Model card Files Files and versions

GlacialLakes / code /lake_detection_deep_learning /trainer /models /swinv2Cnn.py

Timerns's picture

Upload folder using huggingface_hub

984cdba verified 8 days ago

history blame contribute delete

3.9 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from einops import rearrange
	import timm

	# ---------------------------------------------------------
	# Basic CNN Blocks
	# ---------------------------------------------------------

	class DoubleConv(nn.Module):
	def __init__(self, in_ch, out_ch):
	super().__init__()
	self.block = nn.Sequential(
	nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
	nn.BatchNorm2d(out_ch),
	nn.ReLU(inplace=True),
	nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
	nn.BatchNorm2d(out_ch),
	nn.ReLU(inplace=True),
	)

	def forward(self, x):
	return self.block(x)


	class UpBlock(nn.Module):
	"""
	Upsample (bilinear) + concat skip + DoubleConv
	NO transposed convolutions → no grid artifacts
	"""
	def __init__(self, in_ch, skip_ch, out_ch):
	super().__init__()
	self.conv = DoubleConv(in_ch + skip_ch, out_ch)

	def forward(self, x, skip):
	x = F.interpolate(x, size=skip.shape[2:], mode="bilinear", align_corners=False)
	x = torch.cat([x, skip], dim=1)
	return self.conv(x)

	# ---------------------------------------------------------
	# SwinV2 + CNN Decoder
	# ---------------------------------------------------------

	class model(nn.Module):
	def __init__(
	self,
	in_channels=3,
	num_classes=15,
	freeze_encoder=False,
	):
	super().__init__()

	# -------------------------------
	# Encoder (SwinV2)
	# -------------------------------
	self.encoder = timm.create_model(
	"swinv2_tiny_window8_256",
	pretrained=True,
	features_only=True,
	out_indices=(0, 1, 2, 3),
	)

	if freeze_encoder:
	for p in self.encoder.parameters():
	p.requires_grad = False

	# Replace patch embedding to accept custom input channels
	old_proj = self.encoder.patch_embed.proj
	self.encoder.patch_embed.proj = nn.Conv2d(
	in_channels=in_channels,
	out_channels=old_proj.out_channels,
	kernel_size=old_proj.kernel_size,
	stride=old_proj.stride,
	padding=old_proj.padding,
	bias=old_proj.bias is not None,
	)

	# Encoder channel sizes
	c0, c1, c2, c3 = self.encoder.feature_info.channels()

	# -------------------------------
	# CNN Decoder (artifact-free)
	# -------------------------------
	self.up3 = UpBlock(c3, c2, c2) # 1/32 → 1/16
	self.up2 = UpBlock(c2, c1, c1) # 1/16 → 1/8
	self.up1 = UpBlock(c1, c0, c0) # 1/8 → 1/4

	self.refine = nn.Sequential(
	nn.Conv2d(c0, c0, 3, padding=1),
	nn.ReLU(inplace=True),
	nn.Conv2d(c0, c0, 3, padding=1),
	nn.ReLU(inplace=True),
	)

	self.head = nn.Conv2d(c0, num_classes, kernel_size=1)

	# ---------------------------------------------------------
	# Forward
	# ---------------------------------------------------------
	def forward(self, x):
	f0, f1, f2, f3 = self.encoder(x)

	# Swin outputs are (B, H, W, C)
	f0 = rearrange(f0, "b h w c -> b c h w")
	f1 = rearrange(f1, "b h w c -> b c h w")
	f2 = rearrange(f2, "b h w c -> b c h w")
	f3 = rearrange(f3, "b h w c -> b c h w")

	# Decoder
	d3 = self.up3(f3, f2)
	d2 = self.up2(d3, f1)
	d1 = self.up1(d2, f0)

	d1 = self.refine(d1)

	out = F.interpolate(
	d1, size=x.shape[2:], mode="bilinear", align_corners=False
	)

	return self.head(out)