Spaces:
Runtime error
Runtime error
| """ | |
| Codegoni A, Lombardi G, Ferrari A. | |
| TINYCD: A (Not So) Deep Learning Model For Change Detection[J]. | |
| arXiv preprint arXiv:2207.13159, 2022. | |
| The code in this file is borrowed from: | |
| https://github.com/AndreaCodegoni/Tiny_model_4_CD | |
| """ | |
| from typing import List, Optional | |
| import torchvision | |
| from torch import Tensor, reshape, stack | |
| from torch.nn import (Conv2d, InstanceNorm2d, Module, ModuleList, PReLU, | |
| Sequential, Upsample) | |
| from opencd.registry import MODELS | |
| class PixelwiseLinear(Module): | |
| def __init__( | |
| self, | |
| fin: List[int], | |
| fout: List[int], | |
| last_activation: Module = None, | |
| ) -> None: | |
| assert len(fout) == len(fin) | |
| super().__init__() | |
| n = len(fin) | |
| self._linears = Sequential( | |
| *[ | |
| Sequential( | |
| Conv2d(fin[i], fout[i], kernel_size=1, bias=True), | |
| PReLU() | |
| if i < n - 1 or last_activation is None | |
| else last_activation, | |
| ) | |
| for i in range(n) | |
| ] | |
| ) | |
| def forward(self, x: Tensor) -> Tensor: | |
| # Processing the tensor: | |
| return self._linears(x) | |
| class MixingBlock(Module): | |
| def __init__( | |
| self, | |
| ch_in: int, | |
| ch_out: int, | |
| ): | |
| super().__init__() | |
| self._convmix = Sequential( | |
| Conv2d(ch_in, ch_out, 3, groups=ch_out, padding=1), | |
| PReLU(), | |
| InstanceNorm2d(ch_out), | |
| ) | |
| def forward(self, x: Tensor, y: Tensor) -> Tensor: | |
| # Packing the tensors and interleaving the channels: | |
| mixed = stack((x, y), dim=2) | |
| mixed = reshape(mixed, (x.shape[0], -1, x.shape[2], x.shape[3])) | |
| # Mixing: | |
| return self._convmix(mixed) | |
| class MixingMaskAttentionBlock(Module): | |
| """use the grouped convolution to make a sort of attention""" | |
| def __init__( | |
| self, | |
| ch_in: int, | |
| ch_out: int, | |
| fin: List[int], | |
| fout: List[int], | |
| generate_masked: bool = False, | |
| ): | |
| super().__init__() | |
| self._mixing = MixingBlock(ch_in, ch_out) | |
| self._linear = PixelwiseLinear(fin, fout) | |
| self._final_normalization = InstanceNorm2d(ch_out) if generate_masked else None | |
| self._mixing_out = MixingBlock(ch_in, ch_out) if generate_masked else None | |
| def forward(self, x: Tensor, y: Tensor) -> Tensor: | |
| z_mix = self._mixing(x, y) | |
| z = self._linear(z_mix) | |
| z_mix_out = 0 if self._mixing_out is None else self._mixing_out(x, y) | |
| return ( | |
| z | |
| if self._final_normalization is None | |
| else self._final_normalization(z_mix_out * z) | |
| ) | |
| class UpMask(Module): | |
| def __init__( | |
| self, | |
| scale_factor: float, | |
| nin: int, | |
| nout: int, | |
| ): | |
| super().__init__() | |
| self._upsample = Upsample( | |
| scale_factor=scale_factor, mode="bilinear", align_corners=True | |
| ) | |
| self._convolution = Sequential( | |
| Conv2d(nin, nin, 3, 1, groups=nin, padding=1), | |
| PReLU(), | |
| InstanceNorm2d(nin), | |
| Conv2d(nin, nout, kernel_size=1, stride=1), | |
| PReLU(), | |
| InstanceNorm2d(nout), | |
| ) | |
| def forward(self, x: Tensor, y: Optional[Tensor] = None) -> Tensor: | |
| x = self._upsample(x) | |
| if y is not None: | |
| x = x * y | |
| return self._convolution(x) | |
| def _get_backbone( | |
| bkbn_name, pretrained, output_layer_bkbn, freeze_backbone | |
| ) -> ModuleList: | |
| # The whole model: | |
| entire_model = getattr(torchvision.models, bkbn_name)( | |
| pretrained=pretrained | |
| ).features | |
| # Slicing it: | |
| derived_model = ModuleList([]) | |
| for name, layer in entire_model.named_children(): | |
| derived_model.append(layer) | |
| if name == output_layer_bkbn: | |
| break | |
| # Freezing the backbone weights: | |
| if freeze_backbone: | |
| for param in derived_model.parameters(): | |
| param.requires_grad = False | |
| return derived_model | |
| class TinyCD(Module): | |
| def __init__( | |
| self, | |
| in_channels, | |
| bkbn_name="efficientnet_b4", | |
| pretrained=True, | |
| output_layer_bkbn="3", | |
| freeze_backbone=False, | |
| ): | |
| super().__init__() | |
| # Load the pretrained backbone according to parameters: | |
| self._backbone = _get_backbone( | |
| bkbn_name, pretrained, output_layer_bkbn, freeze_backbone | |
| ) | |
| # Initialize mixing blocks: | |
| self._first_mix = MixingMaskAttentionBlock(6, 3, [3, 10, 5], [10, 5, 1]) | |
| self._mixing_mask = ModuleList( | |
| [ | |
| MixingMaskAttentionBlock(48, 24, [24, 12, 6], [12, 6, 1]), | |
| MixingMaskAttentionBlock(64, 32, [32, 16, 8], [16, 8, 1]), | |
| MixingBlock(112, 56), | |
| ] | |
| ) | |
| # Initialize Upsampling blocks: | |
| self._up = ModuleList( | |
| [ | |
| UpMask(2, 56, 64), | |
| UpMask(2, 64, 64), | |
| UpMask(2, 64, 32), | |
| ] | |
| ) | |
| # Final classification layer: | |
| self._classify = PixelwiseLinear([32, 16], [16, 1], None) # out_channels = 8 | |
| def forward(self, x1: Tensor, x2: Tensor) -> Tensor: | |
| features = self._encode(x1, x2) | |
| latents = self._decode(features) | |
| out = self._classify(latents) | |
| return (out,) | |
| def _encode(self, ref, test) -> List[Tensor]: | |
| features = [self._first_mix(ref, test)] | |
| for num, layer in enumerate(self._backbone): | |
| ref, test = layer(ref), layer(test) | |
| if num != 0: | |
| features.append(self._mixing_mask[num - 1](ref, test)) | |
| return features | |
| def _decode(self, features) -> Tensor: | |
| upping = features[-1] | |
| for i, j in enumerate(range(-2, -5, -1)): | |
| upping = self._up[i](upping, features[j]) | |
| return upping | |