Zhipeng

init project

966d9af 3 months ago

9.27 kB

	from __future__ import annotations

	from typing import Iterable, Sequence

	import torch
	from torch import nn
	from torch.nn import functional as F


	def squash(x: torch.Tensor, dim: int = -1, eps: float = 1e-7) -> torch.Tensor:
	"""Squash nonlinearity used by capsule networks."""
	squared_norm = (x * x).sum(dim=dim, keepdim=True)
	scale = squared_norm / (1.0 + squared_norm)
	return scale * x / torch.sqrt(squared_norm + eps)


	class ConvBNAct(nn.Module):
	"""Convolution + BatchNorm + SiLU."""

	def __init__(
	self,
	in_channels: int,
	out_channels: int,
	kernel_size: int = 3,
	stride: int = 1,
	padding: int \| None = None,
	) -> None:
	super().__init__()
	if padding is None:
	padding = kernel_size // 2
	self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
	self.bn = nn.BatchNorm2d(out_channels)
	self.act = nn.SiLU(inplace=True)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	return self.act(self.bn(self.conv(x)))


	class PrimaryCaps2d(nn.Module):
	"""Primary capsule layer for 2D feature maps."""

	def __init__(
	self,
	in_channels: int,
	num_caps: int,
	dim_caps: int,
	kernel_size: int = 1,
	stride: int = 1,
	padding: int \| None = None,
	) -> None:
	super().__init__()
	if padding is None:
	padding = kernel_size // 2
	out_channels = num_caps * dim_caps
	self.num_caps = num_caps
	self.dim_caps = dim_caps
	self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
	self.bn = nn.BatchNorm2d(out_channels)
	self.act = nn.SiLU(inplace=True)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	x = self.act(self.bn(self.conv(x)))
	bsz, _, h, w = x.shape
	x = x.view(bsz, self.num_caps, self.dim_caps, h, w)
	return squash(x, dim=2)


	class RoutingCaps(nn.Module):
	"""Dynamic routing between capsules."""

	def __init__(
	self,
	num_in_caps: int,
	dim_in: int,
	num_out_caps: int,
	dim_out: int,
	routing_iters: int = 3,
	) -> None:
	super().__init__()
	self.num_in_caps = num_in_caps
	self.dim_in = dim_in
	self.num_out_caps = num_out_caps
	self.dim_out = dim_out
	self.routing_iters = routing_iters

	weight = torch.randn(1, num_in_caps, num_out_caps, dim_out, dim_in) * 0.01
	self.W = nn.Parameter(weight)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	if x.ndim != 3:
	raise ValueError(f"RoutingCaps expects [B, N, D], got {tuple(x.shape)}")
	bsz = x.shape[0]
	x = x.unsqueeze(2).unsqueeze(-1) # [B, N, 1, D, 1]
	u_hat = torch.matmul(self.W, x).squeeze(-1) # [B, N, M, Dout]
	b = x.new_zeros(bsz, self.num_in_caps, self.num_out_caps)
	for idx in range(self.routing_iters):
	c = F.softmax(b, dim=-1)
	s = (c.unsqueeze(-1) * u_hat).sum(dim=1)
	v = squash(s, dim=-1)
	if idx < self.routing_iters - 1:
	b = b + (u_hat * v.unsqueeze(1)).sum(dim=-1)
	return v


	class DeformableCaps2d(nn.Module):
	"""Deformable capsule layer with learned sampling offsets."""

	def __init__(
	self,
	in_channels: int,
	num_child_caps: int = 8,
	dim_child: int = 8,
	num_parent_caps: int = 8,
	dim_parent: int = 8,
	num_samples: int = 4,
	routing_iters: int = 3,
	offset_scale: float = 1.0,
	out_channels: int \| None = None,
	) -> None:
	super().__init__()
	self.num_child_caps = num_child_caps
	self.dim_child = dim_child
	self.num_parent_caps = num_parent_caps
	self.dim_parent = dim_parent
	self.num_samples = num_samples
	self.routing_iters = routing_iters
	self.offset_scale = offset_scale

	self.primary = PrimaryCaps2d(in_channels, num_child_caps, dim_child, kernel_size=1, stride=1, padding=0)
	self.offset = nn.Conv2d(in_channels, 2 * num_samples, kernel_size=3, stride=1, padding=1)
	nn.init.zeros_(self.offset.weight)
	nn.init.zeros_(self.offset.bias)

	self.routing = RoutingCaps(
	num_in_caps=num_samples * num_child_caps,
	dim_in=dim_child,
	num_out_caps=num_parent_caps,
	dim_out=dim_parent,
	routing_iters=routing_iters,
	)

	caps_channels = num_parent_caps * dim_parent
	self.out_channels = out_channels or caps_channels
	self.project = None
	if self.out_channels != caps_channels:
	self.project = ConvBNAct(caps_channels, self.out_channels, kernel_size=1, stride=1, padding=0)

	@staticmethod
	def _base_grid(h: int, w: int, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
	ys = torch.linspace(-1.0, 1.0, h, device=device, dtype=dtype)
	xs = torch.linspace(-1.0, 1.0, w, device=device, dtype=dtype)
	yy, xx = torch.meshgrid(ys, xs, indexing="ij")
	return torch.stack((xx, yy), dim=-1)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	child = self.primary(x) # [B, Nc, Dc, H, W]
	bsz, _, _, h, w = child.shape

	child_flat = child.view(bsz, self.num_child_caps * self.dim_child, h, w)
	offsets = self.offset(x).view(bsz, self.num_samples, 2, h, w)
	offsets = torch.tanh(offsets) * self.offset_scale

	scale_x = max(w - 1, 1) / 2.0
	scale_y = max(h - 1, 1) / 2.0
	scale = offsets.new_tensor([scale_x, scale_y]).view(1, 1, 2, 1, 1)
	offsets = offsets / scale

	base = self._base_grid(h, w, x.device, x.dtype).view(1, 1, h, w, 2)
	grids = base + offsets.permute(0, 1, 3, 4, 2)

	sampled = []
	for idx in range(self.num_samples):
	grid = grids[:, idx]
	feat = F.grid_sample(
	child_flat,
	grid,
	mode="bilinear",
	padding_mode="zeros",
	align_corners=True,
	)
	sampled.append(feat)

	sampled = torch.stack(sampled, dim=1)
	sampled = sampled.view(bsz, self.num_samples, self.num_child_caps, self.dim_child, h, w)
	sampled = sampled.permute(0, 4, 5, 1, 2, 3).contiguous()
	sampled = sampled.view(bsz * h * w, self.num_samples * self.num_child_caps, self.dim_child)

	routed = self.routing(sampled)
	routed = routed.view(bsz, h, w, self.num_parent_caps, self.dim_parent)
	routed = routed.permute(0, 3, 4, 1, 2).contiguous()
	out = routed.view(bsz, self.num_parent_caps * self.dim_parent, h, w)

	if self.project is not None:
	out = self.project(out)
	return out


	class DeformableCapsBlock(nn.Module):
	"""Backbone block: Conv downsample + deformable capsule routing."""

	def __init__(
	self,
	c1: int,
	c2: int,
	num_child_caps: int = 8,
	dim_child: int = 8,
	num_parent_caps: int = 8,
	dim_parent: int = 8,
	num_samples: int = 4,
	routing_iters: int = 3,
	stride: int = 1,
	) -> None:
	super().__init__()
	self.down = ConvBNAct(c1, c2, kernel_size=3, stride=stride)
	self.caps = DeformableCaps2d(
	in_channels=c2,
	num_child_caps=num_child_caps,
	dim_child=dim_child,
	num_parent_caps=num_parent_caps,
	dim_parent=dim_parent,
	num_samples=num_samples,
	routing_iters=routing_iters,
	out_channels=c2,
	)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	x = self.down(x)
	return self.caps(x)


	class CapsuleBackbone(nn.Module):
	"""Simple capsule-based backbone that returns multi-scale features."""

	def __init__(
	self,
	in_channels: int = 3,
	stem_channels: int = 64,
	stages: Sequence[int] = (128, 256, 512),
	capsule_cfgs: Iterable[dict] \| None = None,
	) -> None:
	super().__init__()
	self.stem = ConvBNAct(in_channels, stem_channels, kernel_size=3, stride=2)
	stage_cfgs = list(capsule_cfgs) if capsule_cfgs is not None else [{}] * len(stages)
	if len(stage_cfgs) != len(stages):
	raise ValueError("capsule_cfgs must match stages length")

	blocks = []
	in_ch = stem_channels
	for out_ch, cfg in zip(stages, stage_cfgs):
	blocks.append(
	nn.Sequential(
	ConvBNAct(in_ch, out_ch, kernel_size=3, stride=2),
	DeformableCaps2d(out_ch, out_channels=out_ch, **cfg),
	)
	)
	in_ch = out_ch
	self.stages = nn.ModuleList(blocks)

	def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, ...]:
	x = self.stem(x)
	outputs = []
	for stage in self.stages:
	x = stage(x)
	outputs.append(x)
	return tuple(outputs)


	__all__ = [
	"CapsuleBackbone",
	"ConvBNAct",
	"DeformableCaps2d",
	"DeformableCapsBlock",
	"PrimaryCaps2d",
	"RoutingCaps",
	"squash",
	]