| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import tensorflow as tf |
| import keras |
| from keras.activations import silu, relu, relu6 |
| from keras.ops import clip |
| from keras import layers |
| from keras.src.applications import imagenet_utils |
| from keras import Model |
| from typing import List, Tuple |
| import math |
|
|
|
|
| repeats = [1, 2, 2, 3, 3, 4, 1] |
|
|
|
|
| def _round_expansion(expansion_factor: int, repeats: List[int]) -> List[int] : |
| """ |
| Docstring for _round_expansion |
| |
| Args: |
| expansion_factor (int): scaling coefficient for the input filters |
| repeats (list): list managing block repetition in the network and therefore depth |
| Returns: |
| exp_ratio: list of scaling coefficient |
| """ |
| exp_ratio = [] |
| flag = 1 |
| for r in repeats: |
| if (r != 0) and flag: |
| exp_ratio.append(1) |
| flag = 0 |
| else: |
| exp_ratio.append(expansion_factor) |
|
|
| return exp_ratio |
|
|
|
|
| def _num_blocks(repeats: List[int]) -> List[int]: |
| """ |
| Docstring for _num_blocks |
| |
| Args: |
| repeats (List[int]): repetition pattern along the network |
| |
| Returns: |
| blocks (List[int]): auxiliary list for network construction |
| """ |
| blocks = [] |
| for r in repeats: |
| if (r != 0): |
| blocks.append(1) |
| else: |
| blocks.append(0) |
| return blocks |
|
|
|
|
| def _round_filters(filters: int, width_coefficient: float, depth_divisor: int = 8, min_filters: int = None) -> int: |
| """ |
| Round number of filters based on depth multiplier. |
| |
| Args: |
| filters (int): base filter number of a sub-block |
| width_coefficient (int): scaling coefficient on filters |
| depth_divisor (int): scaling coefficient on depth |
| min_filters (int): minimum number of filters considered |
| |
| Returns: |
| Rounded number of filters |
| """ |
| if not width_coefficient: |
| return filters |
| filters *= width_coefficient |
| min_filters = min_filters or depth_divisor |
| new_filters = max(int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, min_filters) |
| |
| if new_filters < 0.9 * filters: |
| new_filters += depth_divisor |
| return int(new_filters) |
|
|
|
|
| def _round_repeats(repeats: List[int], depth_coefficient: float, depth_trunc: str) -> List[int]: |
| """ |
| Per-stage depth scaling. Scales the block repeats in each stage. This depth scaling maintains |
| compatibility with the EfficientNet scaling method, while allowing sensible |
| scaling for other models that may have multiple block arg definitions in each stage. |
| |
| Args: |
| repeats (list): pattern for sub-block repetition |
| depth_coefficient (float): scaling coefficient on depth |
| depth_trunc (str): method for truncation, example 'round' |
| Returns: |
| repeats_scaled (list): scaled repeat per stage |
| |
| """ |
|
|
| |
| |
| num_repeat = sum(repeats) |
| if depth_trunc == 'round': |
| |
| |
| |
| num_repeat_scaled = round(num_repeat * depth_coefficient) |
| else: |
| |
| |
| num_repeat_scaled = int(math.ceil(num_repeat * depth_coefficient)) |
| |
| |
| |
| repeats_scaled = [] |
| for r in repeats[::-1]: |
| if depth_trunc == 'round': |
| rs = round((r / num_repeat * num_repeat_scaled)) |
| else: |
| rs = max(1, round((r / num_repeat * num_repeat_scaled))) |
| repeats_scaled.append(rs) |
| num_repeat -= r |
| num_repeat_scaled -= rs |
| repeats_scaled = repeats_scaled[::-1] |
| return repeats_scaled |
|
|
|
|
| def _swish(x): |
| """ |
| Docstring for _swish |
| |
| Args: |
| x (tf.Tensor): input tensor |
| Returns: |
| swish activation of x |
| """ |
| return silu(x) |
| |
|
|
| def _mb_conv_block(inputs: tf.Tensor, in_channels: int, out_channels: int, num_repeat: int, stride: int, expansion_factor: int, se_ratio: float, k: int, drop_rate: float, |
| prev_block_num: int, activation) -> tf.Tensor: |
| """ |
| Docstring for _mb_conv_block |
| |
| Args: |
| inputs (tf.Tensor): block input tensor |
| in_channels (int): number of input channels |
| out_channels (int): number of output channels |
| num_repeat (int): |
| stride (int): stride of the convolution |
| expansion_factor (int): scaling coefficient for the input filters |
| se_ratio (float): between 0 and 1, fraction to squeeze the input filters |
| k (int): kernel size |
| drop_rate (float): between 0 and 1, fraction of the input units to drop |
| prev_block_num (int): dropout adjustement parameter |
| activation (str): activation function |
| Returns: |
| tf.Tensor output of mb_conv block |
| |
| """ |
|
|
| x = inputs |
| input_filters = in_channels |
| |
| for i in range(num_repeat): |
| |
| input_tensor = x |
| if i == 0: |
| |
| stride = stride |
| else: |
| stride = 1 |
|
|
| expanded_filters = input_filters * expansion_factor |
| if expansion_factor != 1: |
| x = layers.Conv2D(filters=expanded_filters, kernel_size=(1, 1), strides=1, padding='same')(x) |
| x = layers.BatchNormalization()(x) |
| x = layers.Activation(activation)(x) |
|
|
| x = layers.DepthwiseConv2D(kernel_size=(k, k), strides=stride, padding='same')(x) |
| x = layers.BatchNormalization()(x) |
| x = layers.Activation(activation)(x) |
|
|
| |
| squeezed_filters = max (1, int(input_filters * se_ratio)) |
| se_tensor = layers.GlobalAveragePooling2D()(x) |
| se_tensor = layers.Reshape((1, 1, expanded_filters))(se_tensor) |
| se_tensor = layers.Conv2D(filters=squeezed_filters , kernel_size=(1, 1), padding='same')(se_tensor) |
| se_tensor = layers.BatchNormalization()(se_tensor) |
| se_tensor = layers.Activation(activation, name='act_{}'.format(i+prev_block_num))(se_tensor) |
| se_tensor = layers.Conv2D(filters=expanded_filters , kernel_size=(1, 1), padding='same')(se_tensor) |
| se_tensor = layers.BatchNormalization()(se_tensor) |
| se_tensor = layers.Activation('sigmoid', name='act2_{}'.format(i+prev_block_num))(se_tensor) |
| x = layers.multiply([x, se_tensor]) |
| |
| |
| x = layers.Conv2D(filters=out_channels, kernel_size=(1, 1), strides=1, padding='same')(x) |
| x = layers.BatchNormalization()(x) |
|
|
| if stride == 1 and input_filters == out_channels: |
| num_blocks_total = 16 |
| dropout_rate = drop_rate * float(prev_block_num + i) / num_blocks_total |
| if dropout_rate and (dropout_rate > 0): |
| x = layers.Dropout(dropout_rate, noise_shape=(None, 1, 1, 1))(x) |
| x = layers.add([x, input_tensor]) |
| |
| input_filters = out_channels |
|
|
| return x |
|
|
|
|
| def _EfficientNet(width_coefficient_list: float = 1.0, |
| depth_coefficient: float = 1.0, |
| input_resolution: int = 224, |
| expansion_factor: int = 6, |
| se_ratio: float = 0.25, |
| input_channels: int = 3, |
| dropout_rate: float = 0.2, |
| drop_connect_rate: float = 0.2, |
| depth_trunc: str = 'ceil', |
| activation: str = 'relu', |
| include_top=True, |
| pooling: str = None, |
| classes: int = 101) -> keras.Model: |
| """ |
| Docstring for _EfficientNet |
| |
| Args: |
| width_coefficient_list (float): scaling coefficient for network width |
| depth_coefficient (float): scaling coefficient for network depth |
| input_resolution (int): szie of input in pixels |
| expansion_factor (int): scaling coefficient for the input filters |
| se_ratio (float): between 0 and 1, fraction to squeeze the input filters |
| input_channels (int): number of input channels |
| dropout_rate (float): dropout rate before final classifier layer |
| drop_connect_rate (float): dropout rate at skip connections |
| depth_trunc (str): method for truncation |
| activation (str): the activation function to use |
| include_top (boolean): whether to include the fully-connected layer at the top of the network |
| pooling (str): pooling mode for feature extraction, 'None', 'avg' or 'max' |
| classes (int): number of classes to classify images |
| Returns: |
| keras.model with efficientnet topology |
| """ |
|
|
| |
| input = keras.Input(shape=(input_resolution, input_resolution, input_channels)) |
|
|
| |
| if activation == 'swish': |
| activation = _swish() |
| if activation == 'relu6': |
| activation = relu6 |
|
|
| |
| x = layers.Conv2D(filters=_round_filters(32, width_coefficient_list[0]), kernel_size=(3, 3), strides=(2, 2), padding='same')(input) |
| x = layers.BatchNormalization()(x) |
| x = layers.Activation(activation, name='stem_activation')(x) |
|
|
| |
| repeats_scaled = _round_repeats(repeats, depth_coefficient, depth_trunc) |
| exp_ratio = _round_expansion(expansion_factor, repeats_scaled) |
| |
| block1 = _mb_conv_block(inputs=x, in_channels=_round_filters(32, width_coefficient_list[0]), out_channels=_round_filters(16, width_coefficient_list[1]), |
| num_repeat=repeats_scaled[0],stride=1, expansion_factor=exp_ratio[0], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, |
| prev_block_num=0, activation=activation) |
|
|
| block2 = _mb_conv_block(inputs=block1, in_channels=_round_filters(16, width_coefficient_list[1]), out_channels=_round_filters(24, width_coefficient_list[2]), |
| num_repeat=repeats_scaled[1],stride=2, expansion_factor=exp_ratio[1], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:1]), activation=activation) |
| |
| block3 = _mb_conv_block(inputs=block2, in_channels=_round_filters(24, width_coefficient_list[2]), out_channels=_round_filters(40, width_coefficient_list[3]), |
| num_repeat=repeats_scaled[2],stride=2, expansion_factor=exp_ratio[2], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:2]), activation=activation) |
| |
| block4 = _mb_conv_block(inputs=block3, in_channels=_round_filters(40, width_coefficient_list[3]), out_channels=_round_filters(80, width_coefficient_list[4]), |
| num_repeat=repeats_scaled[3], stride=2, expansion_factor=exp_ratio[3], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:3]), activation=activation) |
| |
| block5 = _mb_conv_block(inputs=block4, in_channels=_round_filters(80, width_coefficient_list[4]), out_channels=_round_filters(112, width_coefficient_list[5]), |
| num_repeat=repeats_scaled[4], stride=1, expansion_factor=exp_ratio[4], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:4]), activation=activation) |
| |
| block6 = _mb_conv_block(inputs=block5, in_channels=_round_filters(112, width_coefficient_list[5]), out_channels=_round_filters(192, width_coefficient_list[6]), |
| num_repeat=repeats_scaled[5], stride=2, expansion_factor=exp_ratio[5], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:5]), activation=activation) |
| |
| block7 = _mb_conv_block(inputs=block6, in_channels=_round_filters(192, width_coefficient_list[6]), out_channels=_round_filters(320, width_coefficient_list[7]), |
| num_repeat=repeats_scaled[6],stride=1, expansion_factor=exp_ratio[6], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, |
| prev_block_num=sum(repeats_scaled[0:6]), activation=activation) |
|
|
| |
| x = layers.Conv2D(filters=_round_filters(1280, width_coefficient_list[8]), kernel_size=(1, 1), padding='same', name='top_conv')(block7) |
| x = layers.BatchNormalization()(x) |
| x = layers.Activation(activation, name='top_activation')(x) |
| |
| if include_top: |
| x = layers.GlobalAveragePooling2D(name='avg_pool')(x) |
| if dropout_rate and dropout_rate > 0: |
| x = layers.Dropout(dropout_rate, name='top_dropout')(x) |
| x = layers.Dense(classes, activation='softmax', name='output_probs')(x) |
| else: |
| if pooling == 'avg': |
| x = layers.GlobalAveragePooling2D()(x) |
| elif pooling == 'max': |
| x = layers.GlobalMaxPooling2D()(x) |
|
|
| |
| model = Model(input, x, name="st_evvicientnetlcv1") |
|
|
| return model |
|
|
|
|
| def get_st_efficientnetlcv1(input_shape: Tuple[int, int, int] = None, num_classes: int = None, dropout: float = None, pretrained: bool = False, **kwargs) -> keras.Model: |
| """ |
| Creates a Keras model for fine-grained classification from scratch. |
| |
| Args: |
| input_shape (Tuple[int, int, int]): The shape of the input tensor. |
| num_classes (int): The number of classes for the classification task. |
| dropout (float): The dropout rate. |
| |
| Returns: |
| keras.Model: A Keras model for fine-grained classification. |
| """ |
| |
| if pretrained: |
| print("WARNING: No pretrained weights are found for 'st_efficientnet_lv_v1' model. Random weights are used instead.") |
|
|
| |
| if input_shape[0] != input_shape[1]: |
| raise ValueError(f"Expecting image width and height to be the same. Received image shape {input_shape}") |
| |
| if (input_shape[0] % 32 > 0) or (input_shape[1] % 32 > 0): |
| raise ValueError(f"Expecting image width and height to be multiples of 32. Received image shape {input_shape}") |
| activation = 'relu6' |
| d = 1. |
| w = [0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45] |
| e = 3 |
| model = _EfficientNet(width_coefficient_list=w, depth_coefficient=d, input_resolution=input_shape[0], expansion_factor=e, depth_trunc='ceil', activation=activation, |
| input_channels=input_shape[2], dropout_rate=dropout, include_top=True, classes=num_classes) |
| |
| return model |
|
|