| """Deep Evolved Detection Head. |
| |
| 10-layer MLP with interleaved depthwise 3x3 convolutions operating on |
| 92 evolutionarily-selected feature dimensions. The dimension selection |
| was performed via GPU-batched evolutionary search (200 gen/s). The MLP |
| was trained on the selected dimensions with the backbone frozen. |
| |
| 182K params, 10.6 mAP, mAP@0.75 = 10.8. |
| """ |
|
|
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
|
|
| NUM_CLASSES = 80 |
|
|
|
|
| def cofiber_decompose(f, n_scales): |
| cofibers = []; residual = f |
| for _ in range(n_scales - 1): |
| omega = F.avg_pool2d(residual, 2) |
| sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False) |
| cofibers.append(residual - sigma_omega); residual = omega |
| cofibers.append(residual); return cofibers |
|
|
|
|
| class SpatialDWConv(nn.Module): |
| def __init__(self, channels): |
| super().__init__() |
| self.conv = nn.Conv2d(channels, channels, 3, padding=1, groups=channels) |
|
|
| def forward(self, x, B, H, W): |
| if x.dim() == 4: |
| x = x.permute(0, 3, 1, 2) |
| x = self.conv(x) |
| x = x.permute(0, 2, 3, 1) |
| return x |
|
|
|
|
| class EvolvedDeepHead(nn.Module): |
| name = "evolved_deep" |
| needs_intermediates = False |
|
|
| def __init__(self, evolved_dims, hidden=128, n_layers=10, n_scales=3): |
| super().__init__() |
| self.evolved_dims = evolved_dims |
| self.n_scales = n_scales |
| K = len(evolved_dims) |
| self.dim_idx = nn.Parameter(torch.tensor(evolved_dims, dtype=torch.long), requires_grad=False) |
| self.scale_norms = nn.ModuleList([nn.LayerNorm(768) for _ in range(n_scales)]) |
|
|
| layers = [] |
| in_dim = K |
| for i in range(n_layers): |
| layers.append(nn.Linear(in_dim, hidden)) |
| layers.append(nn.GELU()) |
| if i % 2 == 1: |
| layers.append(SpatialDWConv(hidden)) |
| in_dim = hidden |
| self.backbone = nn.Sequential(*layers) |
|
|
| self.cls_head = nn.Linear(hidden, NUM_CLASSES) |
| self.reg_head = nn.Linear(hidden, 4) |
| self.ctr_head = nn.Linear(hidden, 1) |
| self.scale_params = nn.Parameter(torch.ones(n_scales)) |
|
|
| def forward(self, spatial, inter=None): |
| cofibers = cofiber_decompose(spatial, self.n_scales) |
| cls_l, reg_l, ctr_l = [], [], [] |
| for i, cof in enumerate(cofibers): |
| B, C, H, W = cof.shape |
| f = self.scale_norms[i](cof.permute(0, 2, 3, 1).reshape(-1, C)) |
| f_sel = f[:, self.dim_idx].reshape(B, H, W, -1) |
| h = self._forward_with_spatial(f_sel, B, H, W) |
| cls = self.cls_head(h.reshape(-1, h.shape[-1])).reshape(B, H, W, -1).permute(0, 3, 1, 2) |
| reg_raw = (self.reg_head(h.reshape(-1, h.shape[-1])) * self.scale_params[i]).clamp(-10, 10) |
| reg = reg_raw.exp().reshape(B, H, W, 4).permute(0, 3, 1, 2) |
| ctr = self.ctr_head(h.reshape(-1, h.shape[-1])).reshape(B, H, W, 1).permute(0, 3, 1, 2) |
| cls_l.append(cls); reg_l.append(reg); ctr_l.append(ctr) |
| return cls_l, reg_l, ctr_l |
|
|
| def _forward_with_spatial(self, x, B, H, W): |
| for layer in self.backbone: |
| if isinstance(layer, SpatialDWConv): |
| x = layer(x, B, H, W) |
| else: |
| x = layer(x) |
| return x |
|
|