| # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | |
| """ | |
| Backbone modules. | |
| """ | |
| from collections import OrderedDict | |
| import torch | |
| import torch.nn.functional as F | |
| import torchvision | |
| from torch import nn | |
| import models.vgg_ as vgg_models | |
| # import models.resnet_ as resnet_models | |
| class BackboneBase_VGG(nn.Module): | |
| def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool): | |
| super().__init__() | |
| features = list(backbone.features.children()) | |
| if return_interm_layers: | |
| if name == 'vgg16_bn': | |
| self.body1 = nn.Sequential(*features[:13]) | |
| self.body2 = nn.Sequential(*features[13:23]) | |
| self.body3 = nn.Sequential(*features[23:33]) | |
| self.body4 = nn.Sequential(*features[33:43]) | |
| else: | |
| self.body1 = nn.Sequential(*features[:9]) | |
| self.body2 = nn.Sequential(*features[9:16]) | |
| self.body3 = nn.Sequential(*features[16:23]) | |
| self.body4 = nn.Sequential(*features[23:30]) | |
| else: | |
| if name == 'vgg16_bn': | |
| self.body = nn.Sequential(*features[:44]) # 16x down-sample | |
| elif name == 'vgg16': | |
| self.body = nn.Sequential(*features[:30]) # 16x down-sample | |
| self.num_channels = num_channels | |
| self.return_interm_layers = return_interm_layers | |
| def forward(self, tensor_list): | |
| out = [] | |
| if self.return_interm_layers: | |
| xs = tensor_list | |
| for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]): | |
| xs = layer(xs) | |
| out.append(xs) | |
| else: | |
| xs = self.body(tensor_list) | |
| out.append(xs) | |
| return out | |
| # class BackboneBase_ResNet(nn.Module): | |
| # def __init__(self, backbone: models_resnet.ResNet, fpn_target_channels: list): | |
| # super().__init__() | |
| # # fpn_target_channels = [c_for_features_1, c_for_features_2, c_for_features_3] | |
| # # e.g. [256, 512, 512], which are the channel dimensions FPN expects for its P3, P4, P5 inputs respectively. | |
| # # Extract ResNet layers | |
| # self.body0_main = nn.Sequential(backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool) | |
| # self.body1_main = backbone.layer1 # Corresponds to C2 level | |
| # self.body2_main = backbone.layer2 # Corresponds to C3 level | |
| # self.body3_main = backbone.layer3 # Corresponds to C4 level | |
| # self.body4_main = backbone.layer4 # Corresponds to C5 level | |
| # # Determine input channels from ResNet layers for adapters | |
| # # This logic assumes Bottleneck or BasicBlock as used in standard ResNets | |
| # if isinstance(backbone.layer1[0], models_resnet.Bottleneck): | |
| # l1_channels = 256 | |
| # l2_channels = 512 | |
| # l3_channels = 1024 | |
| # l4_channels = 2048 | |
| # elif isinstance(backbone.layer1[0], models_resnet.BasicBlock): | |
| # l1_channels = 64 | |
| # l2_channels = 128 | |
| # l3_channels = 256 | |
| # l4_channels = 512 | |
| # else: | |
| # raise NotImplementedError(f"Unknown block type in ResNet: {type(backbone.layer1[0])}. Please check resnet_.py.") | |
| # # Adapter layers to transform ResNet layer outputs to the channel dimensions | |
| # # expected by the FPN (features[1], features[2], features[3]). | |
| # # features[1] (FPN's P3 input) is derived from ResNet's layer2 (C3) output. | |
| # self.adapter1 = nn.Conv2d(l2_channels, fpn_target_channels[0], kernel_size=1) | |
| # # features[2] (FPN's P4 input) is derived from ResNet's layer3 (C4) output. | |
| # self.adapter2 = nn.Conv2d(l3_channels, fpn_target_channels[1], kernel_size=1) | |
| # # features[3] (FPN's P5 input) is derived from ResNet's layer4 (C5) output. | |
| # self.adapter3 = nn.Conv2d(l4_channels, fpn_target_channels[2], kernel_size=1) | |
| # def forward(self, tensor_list): | |
| # xs = self.body0_main(tensor_list) | |
| # out = [] | |
| # x1 = self.body1_main(xs) # Output of ResNet's layer1 (C2) | |
| # out.append(x1) # This corresponds to features[0] | |
| # x2 = self.body2_main(x1) # Output of ResNet's layer2 (C3) | |
| # out.append(self.adapter1(x2)) # Adapted, corresponds to features[1] | |
| # x3 = self.body3_main(x2) # Output of ResNet's layer3 (C4) | |
| # out.append(self.adapter2(x3)) # Adapted, corresponds to features[2] | |
| # x4 = self.body4_main(x3) # Output of ResNet's layer4 (C5) | |
| # out.append(self.adapter3(x4)) # Adapted, corresponds to features[3] | |
| # return out | |
| # class Backbone_ResNet(BackboneBase_ResNet): | |
| # def __init__(self, name: str, pretrained: bool, fpn_target_channels: list = [256, 512, 512]): | |
| # if name == 'resnet50': | |
| # # Use weights from your resnet_.py, assuming it follows torchvision's API | |
| # weights = models_resnet.ResNet50_Weights.IMAGENET1K_V1 if pretrained else None | |
| # backbone_model = models_resnet.resnet50(weights=weights) | |
| # # Add elif for other ResNet variants like resnet18, resnet34 if needed | |
| # # elif name == 'resnet18': | |
| # # weights = models_resnet.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None | |
| # # backbone_model = models_resnet.resnet18(weights=weights) | |
| # else: | |
| # raise ValueError(f"Unsupported ResNet backbone: {name}") | |
| # super().__init__(backbone_model, fpn_target_channels) | |
| # # num_channels is the channel dimension of the FPN output features (P3, P4, P5) | |
| # # that are fed to the detection/segmentation heads. | |
| # # Your FPN's `feature_size` is 256. | |
| # self.num_channels = 256 | |
| # class BackboneBase_ResNet(nn.Module): | |
| # """ResNet backbone with frozen BatchNorm.""" | |
| # pass | |
| class Backbone_VGG(BackboneBase_VGG): | |
| """VGG backbone with frozen BatchNorm.""" | |
| def __init__(self, name: str, return_interm_layers: bool): | |
| if name == 'vgg16_bn': | |
| backbone = vgg_models.vgg16_bn(pretrained=True) | |
| elif name == 'vgg16': | |
| backbone = vgg_models.vgg16(pretrained=True) | |
| num_channels = 256 | |
| super().__init__(backbone, num_channels, name, return_interm_layers) | |
| def build_backbone(args): | |
| backbone = Backbone_VGG(args.backbone, True) | |
| return backbone | |
| if __name__ == '__main__': | |
| Backbone_VGG('vgg16', True) | |