xcssgzs
/

efficientNetV2

+from collections import OrderedDict
+from functools import partial
+from typing import Callable, Optional
+import torch.nn as nn
+import torch
+from torch import Tensor
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """
+    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
+    This function is taken from the rwightman.
+    It can be seen here:
+    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+class DropPath(nn.Module):
+    """
+    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+class ConvBNAct(nn.Module):
+    def __init__(self,
+                 in_planes: int,
+                 out_planes: int,
+                 kernel_size: int = 3,
+                 stride: int = 1,
+                 groups: int = 1,
+                 norm_layer: Optional[Callable[..., nn.Module]] = None,
+                 activation_layer: Optional[Callable[..., nn.Module]] = None):
+        super(ConvBNAct, self).__init__()
+        padding = (kernel_size - 1) // 2
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if activation_layer is None:
+            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)
+        self.conv = nn.Conv2d(in_channels=in_planes,
+                              out_channels=out_planes,
+                              kernel_size=kernel_size,
+                              stride=stride,
+                              padding=padding,
+                              groups=groups,
+                              bias=False)
+        self.bn = norm_layer(out_planes)
+        self.act = activation_layer()
+    def forward(self, x):
+        result = self.conv(x)
+        result = self.bn(result)
+        result = self.act(result)
+        return result
+class SqueezeExcite(nn.Module):
+    def __init__(self,
+                 input_c: int,   # block input channel
+                 expand_c: int,  # block expand channel
+                 se_ratio: float = 0.25):
+        super(SqueezeExcite, self).__init__()
+        squeeze_c = int(input_c * se_ratio)
+        self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1)
+        self.act1 = nn.SiLU()  # alias Swish
+        self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1)
+        self.act2 = nn.Sigmoid()
+    def forward(self, x: Tensor) -> Tensor:
+        scale = x.mean((2, 3), keepdim=True)
+        scale = self.conv_reduce(scale)
+        scale = self.act1(scale)
+        scale = self.conv_expand(scale)
+        scale = self.act2(scale)
+        return scale * x
+class MBConv(nn.Module):
+    def __init__(self,
+                 kernel_size: int,
+                 input_c: int,
+                 out_c: int,
+                 expand_ratio: int,
+                 stride: int,
+                 se_ratio: float,
+                 drop_rate: float,
+                 norm_layer: Callable[..., nn.Module]):
+        super(MBConv, self).__init__()
+        if stride not in [1, 2]:
+            raise ValueError("illegal stride value.")
+        self.has_shortcut = (stride == 1 and input_c == out_c)
+        activation_layer = nn.SiLU  # alias Swish
+        expanded_c = input_c * expand_ratio
+        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在
+        assert expand_ratio != 1
+        # Point-wise expansion
+        self.expand_conv = ConvBNAct(input_c,
+                                     expanded_c,
+                                     kernel_size=1,
+                                     norm_layer=norm_layer,
+                                     activation_layer=activation_layer)
+        # Depth-wise convolution
+        self.dwconv = ConvBNAct(expanded_c,
+                                expanded_c,
+                                kernel_size=kernel_size,
+                                stride=stride,
+                                groups=expanded_c,
+                                norm_layer=norm_layer,
+                                activation_layer=activation_layer)
+        self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity()
+        # Point-wise linear projection
+        self.project_conv = ConvBNAct(expanded_c,
+                                      out_planes=out_c,
+                                      kernel_size=1,
+                                      norm_layer=norm_layer,
+                                      activation_layer=nn.Identity)  # 注意这里没有激活函数，所有传入Identity
+        self.out_channels = out_c
+        # 只有在使用shortcut连接时才使用dropout层
+        self.drop_rate = drop_rate
+        if self.has_shortcut and drop_rate > 0:
+            self.dropout = DropPath(drop_rate)
+    def forward(self, x: Tensor) -> Tensor:
+        result = self.expand_conv(x)
+        result = self.dwconv(result)
+        result = self.se(result)
+        result = self.project_conv(result)
+        if self.has_shortcut:
+            if self.drop_rate > 0:
+                result = self.dropout(result)
+            result += x
+        return result
+class FusedMBConv(nn.Module):
+    def __init__(self,
+                 kernel_size: int,
+                 input_c: int,
+                 out_c: int,
+                 expand_ratio: int,
+                 stride: int,
+                 se_ratio: float,
+                 drop_rate: float,
+                 norm_layer: Callable[..., nn.Module]):
+        super(FusedMBConv, self).__init__()
+        assert stride in [1, 2]
+        assert se_ratio == 0
+        self.has_shortcut = stride == 1 and input_c == out_c
+        self.drop_rate = drop_rate
+        self.has_expansion = expand_ratio != 1
+        activation_layer = nn.SiLU  # alias Swish
+        expanded_c = input_c * expand_ratio
+        # 只有当expand ratio不等于1时才有expand conv
+        if self.has_expansion:
+            # Expansion convolution
+            self.expand_conv = ConvBNAct(input_c,
+                                         expanded_c,
+                                         kernel_size=kernel_size,
+                                         stride=stride,
+                                         norm_layer=norm_layer,
+                                         activation_layer=activation_layer)
+            self.project_conv = ConvBNAct(expanded_c,
+                                          out_c,
+                                          kernel_size=1,
+                                          norm_layer=norm_layer,
+                                          activation_layer=nn.Identity)  # 注意没有激活函数
+        else:
+            # 当只有project_conv时的情况
+            self.project_conv = ConvBNAct(input_c,
+                                          out_c,
+                                          kernel_size=kernel_size,
+                                          stride=stride,
+                                          norm_layer=norm_layer,
+                                          activation_layer=activation_layer)  # 注意有激活函数
+        self.out_channels = out_c
+        # 只有在使用shortcut连接时才使用dropout层
+        self.drop_rate = drop_rate
+        if self.has_shortcut and drop_rate > 0:
+            self.dropout = DropPath(drop_rate)
+    def forward(self, x: Tensor) -> Tensor:
+        if self.has_expansion:
+            result = self.expand_conv(x)
+            result = self.project_conv(result)
+        else:
+            result = self.project_conv(x)
+        if self.has_shortcut:
+            if self.drop_rate > 0:
+                result = self.dropout(result)
+            result += x
+        return result
+class EfficientNetV2(nn.Module):
+    def __init__(self,
+                 model_cnf: list,
+                 num_classes: int = 1000,
+                 num_features: int = 1280,
+                 dropout_rate: float = 0.2,
+                 drop_connect_rate: float = 0.2):
+        super(EfficientNetV2, self).__init__()
+        for cnf in model_cnf:
+            assert len(cnf) == 8
+        norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)
+        stem_filter_num = model_cnf[0][4]
+        self.stem = ConvBNAct(3,
+                              stem_filter_num,
+                              kernel_size=3,
+                              stride=2,
+                              norm_layer=norm_layer)  # 激活函数默认是SiLU
+        total_blocks = sum([i[0] for i in model_cnf])
+        block_id = 0
+        blocks = []
+        for cnf in model_cnf:
+            repeats = cnf[0]
+            op = FusedMBConv if cnf[-2] == 0 else MBConv
+            for i in range(repeats):
+                blocks.append(op(kernel_size=cnf[1],
+                                 input_c=cnf[4] if i == 0 else cnf[5],
+                                 out_c=cnf[5],
+                                 expand_ratio=cnf[3],
+                                 stride=cnf[2] if i == 0 else 1,
+                                 se_ratio=cnf[-1],
+                                 drop_rate=drop_connect_rate * block_id / total_blocks,
+                                 norm_layer=norm_layer))
+                block_id += 1
+        self.blocks = nn.Sequential(*blocks)
+        head_input_c = model_cnf[-1][-3]
+        head = OrderedDict()
+        head.update({"project_conv": ConvBNAct(head_input_c,
+                                               num_features,
+                                               kernel_size=1,
+                                               norm_layer=norm_layer)})  # 激活函数默认是SiLU
+        head.update({"avgpool": nn.AdaptiveAvgPool2d(1)})
+        head.update({"flatten": nn.Flatten()})
+        if dropout_rate > 0:
+            head.update({"dropout": nn.Dropout(p=dropout_rate, inplace=True)})
+        head.update({"classifier": nn.Linear(num_features, num_classes)})
+        self.head = nn.Sequential(head)
+        # initial weights
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out")
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.zeros_(m.bias)
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.stem(x)
+        x = self.blocks(x)
+        x = self.head(x)
+        return x
+def efficientnetv2_s(num_classes: int = 1000):
+    """
+    EfficientNetV2
+    https://arxiv.org/abs/2104.00298
+    """
+    # train_size: 300, eval_size: 384
+    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
+    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],
+                    [4, 3, 2, 4, 24, 48, 0, 0],
+                    [4, 3, 2, 4, 48, 64, 0, 0],
+                    [6, 3, 2, 4, 64, 128, 1, 0.25],
+                    [9, 3, 1, 6, 128, 160, 1, 0.25],
+                    [15, 3, 2, 6, 160, 256, 1, 0.25]]
+    model = EfficientNetV2(model_cnf=model_config,
+                           num_classes=num_classes,
+                           dropout_rate=0.2)
+    return model
+def efficientnetv2_m(num_classes: int = 1000):
+    """
+    EfficientNetV2
+    https://arxiv.org/abs/2104.00298
+    """
+    # train_size: 384, eval_size: 480
+    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
+    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],
+                    [5, 3, 2, 4, 24, 48, 0, 0],
+                    [5, 3, 2, 4, 48, 80, 0, 0],
+                    [7, 3, 2, 4, 80, 160, 1, 0.25],
+                    [14, 3, 1, 6, 160, 176, 1, 0.25],
+                    [18, 3, 2, 6, 176, 304, 1, 0.25],
+                    [5, 3, 1, 6, 304, 512, 1, 0.25]]
+    model = EfficientNetV2(model_cnf=model_config,
+                           num_classes=num_classes,
+                           dropout_rate=0.3)
+    return model
+def efficientnetv2_l(num_classes: int = 1000):
+    """
+    EfficientNetV2
+    https://arxiv.org/abs/2104.00298
+    """
+    # train_size: 384, eval_size: 480
+    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
+    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],
+                    [7, 3, 2, 4, 32, 64, 0, 0],
+                    [7, 3, 2, 4, 64, 96, 0, 0],
+                    [10, 3, 2, 4, 96, 192, 1, 0.25],
+                    [19, 3, 1, 6, 192, 224, 1, 0.25],
+                    [25, 3, 2, 6, 224, 384, 1, 0.25],
+                    [7, 3, 1, 6, 384, 640, 1, 0.25]]
+    model = EfficientNetV2(model_cnf=model_config,
+                           num_classes=num_classes,
+                           dropout_rate=0.4)
+    return model

script.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import pandas as pd
+import torch
+from PIL import Image
+from torchvision import transforms
+from model import efficientnetv2_s as create_model
+def predict(test_metadata, root_path='/tmp/data/private_testset', output_csv_path='./submission.csv'):
+    img_size = {"s": [384, 384],  # train_size, val_size
+                "m": [384, 480],
+                "l": [384, 480]}
+    num_model = "s"
+    data_transform = transforms.Compose(
+        [transforms.Resize(img_size[num_model][1]),
+         transforms.CenterCrop(img_size[num_model][1]),
+         transforms.ToTensor(),
+         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+    id_list = test_metadata['observation_id'].tolist()
+    img_name_list = test_metadata['filename'].tolist()
+    print(os.path.abspath(os.path.dirname(__file__)))
+    id2classId = dict()
+    id2prob = dict()
+    prob_list = list()
+    classId_list = list()
+    for img_name in img_name_list:
+        img_path = os.path.join(root_path, img_name)
+        assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
+        img = Image.open(img_path).convert('RGB')
+        img = data_transform(img)
+        img = torch.unsqueeze(img, dim=0)
+        with torch.no_grad():
+            # predict class
+            output = model(img.to(device)).cpu()
+            predict = torch.softmax(output, dim=1)
+            probs, classesId = torch.max(predict, dim=1)
+            prob = probs.data.numpy().tolist()[0]
+            classesId = classesId.data.numpy().tolist()[0]
+            prob_list.append(prob)
+            classId_list.append(classesId)
+    for i, id in enumerate(id_list):
+        if id not in id2classId.keys():
+            id2classId[id] = classId_list[i]
+            id2prob[id] = prob_list[i]
+        else:
+            if prob_list[i] > id2prob[id]:
+                id2classId[id] = classId_list[i]
+                id2prob[id] = prob_list[i]
+    classes = list()
+    for id in id_list:
+        classes.append(str(id2classId[id]))
+    test_metadata["class_id"] = classes
+    user_pred_df = test_metadata.drop_duplicates("observation_id", keep="first")
+    user_pred_df[["observation_id", "class_id"]].to_csv(output_csv_path, index=None)
+if __name__ == '__main__':
+    import zipfile
+    with zipfile.ZipFile("/tmp/data/private_testset.zip", 'r') as zip_ref:
+        zip_ref.extractall("/tmp/data")
+    root_path = '/tmp/data/private_testset'
+    # root_path = "../../data_set/flower_data/val/n1"
+    # json_file = open(json_path, "r")
+    # index2class = json.load(json_file)
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    # create model
+    model = create_model(num_classes=1784).to(device)
+    # load model weights
+    model_weight_path = "./efficientNetV2.pth"
+    model.load_state_dict(torch.load(model_weight_path, map_location=device))
+    model.eval()
+    metadata_file_path = "./SnakeCLEF2024_TestMetadata.csv"
+    # metadata_file_path = "./test1.csv"
+    test_metadata = pd.read_csv(metadata_file_path)
+    predict(test_metadata, root_path)