wormcountingmlp / backbone.py
sposhiy's picture
Upload backbone.py with huggingface_hub
aadf069 verified
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Backbone modules.
"""
from collections import OrderedDict
import torch
import torch.nn.functional as F
import torchvision
from torch import nn
import models.vgg_ as vgg_models
# import models.resnet_ as resnet_models
class BackboneBase_VGG(nn.Module):
def __init__(self, backbone: nn.Module, num_channels: int, name: str, return_interm_layers: bool):
super().__init__()
features = list(backbone.features.children())
if return_interm_layers:
if name == 'vgg16_bn':
self.body1 = nn.Sequential(*features[:13])
self.body2 = nn.Sequential(*features[13:23])
self.body3 = nn.Sequential(*features[23:33])
self.body4 = nn.Sequential(*features[33:43])
else:
self.body1 = nn.Sequential(*features[:9])
self.body2 = nn.Sequential(*features[9:16])
self.body3 = nn.Sequential(*features[16:23])
self.body4 = nn.Sequential(*features[23:30])
else:
if name == 'vgg16_bn':
self.body = nn.Sequential(*features[:44]) # 16x down-sample
elif name == 'vgg16':
self.body = nn.Sequential(*features[:30]) # 16x down-sample
self.num_channels = num_channels
self.return_interm_layers = return_interm_layers
def forward(self, tensor_list):
out = []
if self.return_interm_layers:
xs = tensor_list
for _, layer in enumerate([self.body1, self.body2, self.body3, self.body4]):
xs = layer(xs)
out.append(xs)
else:
xs = self.body(tensor_list)
out.append(xs)
return out
# class BackboneBase_ResNet(nn.Module):
# def __init__(self, backbone: models_resnet.ResNet, fpn_target_channels: list):
# super().__init__()
# # fpn_target_channels = [c_for_features_1, c_for_features_2, c_for_features_3]
# # e.g. [256, 512, 512], which are the channel dimensions FPN expects for its P3, P4, P5 inputs respectively.
# # Extract ResNet layers
# self.body0_main = nn.Sequential(backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool)
# self.body1_main = backbone.layer1 # Corresponds to C2 level
# self.body2_main = backbone.layer2 # Corresponds to C3 level
# self.body3_main = backbone.layer3 # Corresponds to C4 level
# self.body4_main = backbone.layer4 # Corresponds to C5 level
# # Determine input channels from ResNet layers for adapters
# # This logic assumes Bottleneck or BasicBlock as used in standard ResNets
# if isinstance(backbone.layer1[0], models_resnet.Bottleneck):
# l1_channels = 256
# l2_channels = 512
# l3_channels = 1024
# l4_channels = 2048
# elif isinstance(backbone.layer1[0], models_resnet.BasicBlock):
# l1_channels = 64
# l2_channels = 128
# l3_channels = 256
# l4_channels = 512
# else:
# raise NotImplementedError(f"Unknown block type in ResNet: {type(backbone.layer1[0])}. Please check resnet_.py.")
# # Adapter layers to transform ResNet layer outputs to the channel dimensions
# # expected by the FPN (features[1], features[2], features[3]).
# # features[1] (FPN's P3 input) is derived from ResNet's layer2 (C3) output.
# self.adapter1 = nn.Conv2d(l2_channels, fpn_target_channels[0], kernel_size=1)
# # features[2] (FPN's P4 input) is derived from ResNet's layer3 (C4) output.
# self.adapter2 = nn.Conv2d(l3_channels, fpn_target_channels[1], kernel_size=1)
# # features[3] (FPN's P5 input) is derived from ResNet's layer4 (C5) output.
# self.adapter3 = nn.Conv2d(l4_channels, fpn_target_channels[2], kernel_size=1)
# def forward(self, tensor_list):
# xs = self.body0_main(tensor_list)
# out = []
# x1 = self.body1_main(xs) # Output of ResNet's layer1 (C2)
# out.append(x1) # This corresponds to features[0]
# x2 = self.body2_main(x1) # Output of ResNet's layer2 (C3)
# out.append(self.adapter1(x2)) # Adapted, corresponds to features[1]
# x3 = self.body3_main(x2) # Output of ResNet's layer3 (C4)
# out.append(self.adapter2(x3)) # Adapted, corresponds to features[2]
# x4 = self.body4_main(x3) # Output of ResNet's layer4 (C5)
# out.append(self.adapter3(x4)) # Adapted, corresponds to features[3]
# return out
# class Backbone_ResNet(BackboneBase_ResNet):
# def __init__(self, name: str, pretrained: bool, fpn_target_channels: list = [256, 512, 512]):
# if name == 'resnet50':
# # Use weights from your resnet_.py, assuming it follows torchvision's API
# weights = models_resnet.ResNet50_Weights.IMAGENET1K_V1 if pretrained else None
# backbone_model = models_resnet.resnet50(weights=weights)
# # Add elif for other ResNet variants like resnet18, resnet34 if needed
# # elif name == 'resnet18':
# # weights = models_resnet.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
# # backbone_model = models_resnet.resnet18(weights=weights)
# else:
# raise ValueError(f"Unsupported ResNet backbone: {name}")
# super().__init__(backbone_model, fpn_target_channels)
# # num_channels is the channel dimension of the FPN output features (P3, P4, P5)
# # that are fed to the detection/segmentation heads.
# # Your FPN's `feature_size` is 256.
# self.num_channels = 256
# class BackboneBase_ResNet(nn.Module):
# """ResNet backbone with frozen BatchNorm."""
# pass
class Backbone_VGG(BackboneBase_VGG):
"""VGG backbone with frozen BatchNorm."""
def __init__(self, name: str, return_interm_layers: bool):
if name == 'vgg16_bn':
backbone = vgg_models.vgg16_bn(pretrained=True)
elif name == 'vgg16':
backbone = vgg_models.vgg16(pretrained=True)
num_channels = 256
super().__init__(backbone, num_channels, name, return_interm_layers)
def build_backbone(args):
backbone = Backbone_VGG(args.backbone, True)
return backbone
if __name__ == '__main__':
Backbone_VGG('vgg16', True)