Spaces:

STMicroelectronics
/

stm32-modelzoo-app

Running

File size: 15,738 Bytes

747451d

# /*---------------------------------------------------------------------------------------------
#  * Copyright 2015 The TensorFlow Authors. 
#  * Copyright (c) 2022 STMicroelectronics.
#  * All rights reserved.
#  *
#  * This software is licensed under terms that can be found in the LICENSE file in
#  * the root directory of this software component.
#  * If no LICENSE file comes with this software, it is provided AS-IS.
#  *--------------------------------------------------------------------------------------------*/

import tensorflow as tf
import keras
from keras.activations import silu, relu, relu6
from keras.ops import clip
from keras import layers
from keras.src.applications import imagenet_utils
from keras import Model
from typing import List, Tuple
import math


repeats = [1, 2, 2, 3, 3, 4, 1]


def _round_expansion(expansion_factor: int, repeats: List[int]) -> List[int] :
    """
    Docstring for _round_expansion
    
    Args:
        expansion_factor (int): scaling coefficient for the input filters
        repeats (list): list managing block repetition in the network and therefore depth
    Returns:
        exp_ratio: list of scaling coefficient 
    """
    exp_ratio = []
    flag = 1
    for r in repeats:
        if (r != 0) and flag:
            exp_ratio.append(1)
            flag = 0
        else:
            exp_ratio.append(expansion_factor)

    return exp_ratio


def _num_blocks(repeats: List[int]) -> List[int]:
    """
    Docstring for _num_blocks
    
    Args: 
        repeats (List[int]): repetition pattern along the network

    Returns: 
        blocks (List[int]): auxiliary list for network construction
    """
    blocks = []
    for r in repeats:
        if (r != 0):
            blocks.append(1)
        else:
            blocks.append(0)
    return blocks


def _round_filters(filters: int, width_coefficient: float, depth_divisor: int = 8, min_filters: int = None) -> int:
    """
    Round number of filters based on depth multiplier.
    
    Args: 
        filters (int): base filter number of a sub-block
        width_coefficient (int): scaling coefficient on filters
        depth_divisor (int): scaling coefficient on depth
        min_filters (int): minimum number of filters considered

    Returns: 
        Rounded number of filters
    """
    if not width_coefficient:
        return filters
    filters *= width_coefficient
    min_filters = min_filters or depth_divisor
    new_filters = max(int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, min_filters)
    # Make sure that round down does not go down by more than 10%.
    if new_filters < 0.9 * filters:
        new_filters += depth_divisor
    return int(new_filters)


def _round_repeats(repeats: List[int], depth_coefficient: float, depth_trunc: str) -> List[int]:
    """ 
    Per-stage depth scaling. Scales the block repeats in each stage. This depth scaling maintains
    compatibility with the EfficientNet scaling method, while allowing sensible
    scaling for other models that may have multiple block arg definitions in each stage.

    Args:
        repeats (list): pattern for sub-block repetition
        depth_coefficient (float): scaling coefficient on depth
        depth_trunc (str): method for truncation, example 'round'
    Returns:
        repeats_scaled (list): scaled repeat per stage

    """

    # We scale the total repeat count for each stage, there may be multiple
    # block arg defs per stage so we need to sum.
    num_repeat = sum(repeats)
    if depth_trunc == 'round':
        # Truncating to int by rounding allows stages with few repeats to remain
        # proportionally smaller for longer. This is a good choice when stage definitions
        # include single repeat stages that we'd prefer to keep that way as long as possible
        num_repeat_scaled = round(num_repeat * depth_coefficient)
    else:
        # The default for EfficientNet truncates repeats to int via 'ceil'.
        # Any multiplier > 1.0 will result in an increased depth for every stage.
        num_repeat_scaled = int(math.ceil(num_repeat * depth_coefficient))
    # Proportionally distribute repeat count scaling to each block definition in the stage.
    # Allocation is done in reverse as it results in the first block being less likely to be scaled.
    # The first block makes less sense to repeat in most of the arch definitions.
    repeats_scaled = []
    for r in repeats[::-1]:
        if depth_trunc == 'round':
            rs = round((r / num_repeat * num_repeat_scaled))
        else:
            rs = max(1, round((r / num_repeat * num_repeat_scaled)))
        repeats_scaled.append(rs)
        num_repeat -= r
        num_repeat_scaled -= rs
    repeats_scaled = repeats_scaled[::-1]
    return repeats_scaled


def _swish(x):
    """
    Docstring for _swish
    
    Args:
        x (tf.Tensor): input tensor
    Returns:
        swish activation of x
    """
    return silu(x)
    

def _mb_conv_block(inputs: tf.Tensor, in_channels: int, out_channels: int, num_repeat: int, stride: int, expansion_factor: int, se_ratio: float, k: int, drop_rate: float, 
                  prev_block_num: int, activation) -> tf.Tensor:
    """
    Docstring for _mb_conv_block
    
    Args:
        inputs (tf.Tensor): block input tensor
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        num_repeat (int):
        stride (int): stride of the convolution
        expansion_factor (int): scaling coefficient for the input filters
        se_ratio (float): between 0 and 1, fraction to squeeze the input filters
        k (int): kernel size
        drop_rate (float): between 0 and 1, fraction of the input units to drop
        prev_block_num (int): dropout adjustement parameter
        activation (str): activation function
    Returns:
        tf.Tensor output of mb_conv block

    """

    x = inputs
    input_filters = in_channels
	
    for i in range(num_repeat):
        # Expansion phase: making the layer wide wide as mentioned in Inverted residual block
        input_tensor = x
        if i == 0:
            # The first block needs to take care of stride and filter size increase.
            stride = stride
        else:
            stride = 1

        expanded_filters = input_filters * expansion_factor
        if expansion_factor != 1:
            x = layers.Conv2D(filters=expanded_filters, kernel_size=(1, 1), strides=1, padding='same')(x)
            x = layers.BatchNormalization()(x)
            x = layers.Activation(activation)(x)

        x = layers.DepthwiseConv2D(kernel_size=(k, k), strides=stride, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation(activation)(x)

        # Squeeze and excitation phase: extracting global features with global average pooling and squeeze numbers of channels using se_ratio
        squeezed_filters = max (1, int(input_filters * se_ratio))
        se_tensor = layers.GlobalAveragePooling2D()(x)
        se_tensor = layers.Reshape((1, 1, expanded_filters))(se_tensor)
        se_tensor = layers.Conv2D(filters=squeezed_filters , kernel_size=(1, 1), padding='same')(se_tensor)
        se_tensor = layers.BatchNormalization()(se_tensor)  
        se_tensor = layers.Activation(activation, name='act_{}'.format(i+prev_block_num))(se_tensor)
        se_tensor = layers.Conv2D(filters=expanded_filters , kernel_size=(1, 1), padding='same')(se_tensor)
        se_tensor = layers.BatchNormalization()(se_tensor)
        se_tensor = layers.Activation('sigmoid', name='act2_{}'.format(i+prev_block_num))(se_tensor)
        x = layers.multiply([x, se_tensor])
        
        # Output phase:
        x = layers.Conv2D(filters=out_channels, kernel_size=(1, 1), strides=1, padding='same')(x)
        x = layers.BatchNormalization()(x)

        if stride == 1 and input_filters == out_channels:
            num_blocks_total = 16
            dropout_rate = drop_rate * float(prev_block_num + i) / num_blocks_total
            if dropout_rate and (dropout_rate > 0):
                x = layers.Dropout(dropout_rate, noise_shape=(None, 1, 1, 1))(x)
            x = layers.add([x, input_tensor])
            
        input_filters = out_channels

    return x


def _EfficientNet(width_coefficient_list: float = 1.0,
                depth_coefficient: float = 1.0,
                input_resolution: int = 224,
                expansion_factor: int = 6,
                se_ratio: float = 0.25,
                input_channels: int = 3,
                dropout_rate: float = 0.2,
                drop_connect_rate: float = 0.2,
                depth_trunc: str = 'ceil',
                activation: str = 'relu',
                include_top=True,
                pooling: str = None,
                classes: int = 101) -> keras.Model:
    """
    Docstring for _EfficientNet
    
    Args: 
        width_coefficient_list (float): scaling coefficient for network width
        depth_coefficient (float): scaling coefficient for network depth
        input_resolution (int): szie of input in pixels
        expansion_factor (int): scaling coefficient for the input filters
        se_ratio (float): between 0 and 1, fraction to squeeze the input filters
        input_channels (int): number of input channels
        dropout_rate (float): dropout rate before final classifier layer
        drop_connect_rate (float): dropout rate at skip connections
        depth_trunc (str): method for truncation
        activation (str): the activation function to use
        include_top (boolean): whether to include the fully-connected layer at the top of the network
        pooling (str): pooling mode for feature extraction, 'None', 'avg' or 'max'
        classes (int): number of classes to classify images
    Returns: 
        keras.model with efficientnet topology
    """

    # Determine proper input shape
    input = keras.Input(shape=(input_resolution, input_resolution, input_channels))

    # Activation
    if activation == 'swish':
        activation = _swish()
    if activation == 'relu6':
        activation = relu6

    # Build stem
    x = layers.Conv2D(filters=_round_filters(32, width_coefficient_list[0]), kernel_size=(3, 3), strides=(2, 2), padding='same')(input)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation, name='stem_activation')(x)

    # Build blocks
    repeats_scaled = _round_repeats(repeats, depth_coefficient, depth_trunc)
    exp_ratio = _round_expansion(expansion_factor, repeats_scaled)
    
    block1 = _mb_conv_block(inputs=x, in_channels=_round_filters(32, width_coefficient_list[0]), out_channels=_round_filters(16, width_coefficient_list[1]), 
                           num_repeat=repeats_scaled[0],stride=1, expansion_factor=exp_ratio[0], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, 
                           prev_block_num=0, activation=activation)

    block2 = _mb_conv_block(inputs=block1, in_channels=_round_filters(16, width_coefficient_list[1]), out_channels=_round_filters(24, width_coefficient_list[2]), 
                           num_repeat=repeats_scaled[1],stride=2, expansion_factor=exp_ratio[1], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, 
                           prev_block_num=sum(repeats_scaled[0:1]), activation=activation)
    
    block3 = _mb_conv_block(inputs=block2, in_channels=_round_filters(24, width_coefficient_list[2]), out_channels=_round_filters(40, width_coefficient_list[3]), 
                           num_repeat=repeats_scaled[2],stride=2, expansion_factor=exp_ratio[2], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, 
                           prev_block_num=sum(repeats_scaled[0:2]), activation=activation)
    
    block4 = _mb_conv_block(inputs=block3, in_channels=_round_filters(40, width_coefficient_list[3]), out_channels=_round_filters(80, width_coefficient_list[4]), 
                           num_repeat=repeats_scaled[3], stride=2, expansion_factor=exp_ratio[3], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, 
                           prev_block_num=sum(repeats_scaled[0:3]), activation=activation)
    
    block5 = _mb_conv_block(inputs=block4, in_channels=_round_filters(80, width_coefficient_list[4]), out_channels=_round_filters(112, width_coefficient_list[5]), 
                           num_repeat=repeats_scaled[4], stride=1, expansion_factor=exp_ratio[4], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, 
                           prev_block_num=sum(repeats_scaled[0:4]), activation=activation)
    
    block6 = _mb_conv_block(inputs=block5, in_channels=_round_filters(112, width_coefficient_list[5]), out_channels=_round_filters(192, width_coefficient_list[6]),
                            num_repeat=repeats_scaled[5], stride=2, expansion_factor=exp_ratio[5], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate, 
                            prev_block_num=sum(repeats_scaled[0:5]), activation=activation)
    
    block7 = _mb_conv_block(inputs=block6, in_channels=_round_filters(192, width_coefficient_list[6]), out_channels=_round_filters(320, width_coefficient_list[7]),
                            num_repeat=repeats_scaled[6],stride=1, expansion_factor=exp_ratio[6], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate, 
                            prev_block_num=sum(repeats_scaled[0:6]), activation=activation)

    # Build top
    x = layers.Conv2D(filters=_round_filters(1280, width_coefficient_list[8]), kernel_size=(1, 1), padding='same', name='top_conv')(block7)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation, name='top_activation')(x)
	
    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        if dropout_rate and dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name='top_dropout')(x)
        x = layers.Dense(classes, activation='softmax', name='output_probs')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Create model.
    model = Model(input, x, name="st_evvicientnetlcv1")

    return model


def get_st_efficientnetlcv1(input_shape: Tuple[int, int, int] = None, num_classes: int = None, dropout: float = None, pretrained: bool = False, **kwargs) -> keras.Model:
    """
    Creates a Keras model for fine-grained classification from scratch.

    Args:
        input_shape (Tuple[int, int, int]): The shape of the input tensor.
        num_classes (int): The number of classes for the classification task.
        dropout (float): The dropout rate.

    Returns:
        keras.Model: A Keras model for fine-grained classification.
    """
    
    if pretrained:
      print("WARNING: No pretrained weights are found for 'st_efficientnet_lv_v1' model. Random weights are used instead.")

    # Validate input_shape is square
    if input_shape[0] != input_shape[1]:
        raise ValueError(f"Expecting image width and height to be the same. Received image shape {input_shape}")
    # Validate input_shape is multiple of 32
    if (input_shape[0] % 32 > 0) or (input_shape[1] % 32 > 0):
        raise ValueError(f"Expecting image width and height to be multiples of 32. Received image shape {input_shape}")
    activation = 'relu6'
    d = 1.
    w = [0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45]
    e = 3
    model = _EfficientNet(width_coefficient_list=w, depth_coefficient=d, input_resolution=input_shape[0], expansion_factor=e, depth_trunc='ceil', activation=activation, 
							input_channels=input_shape[2], dropout_rate=dropout, include_top=True, classes=num_classes)
    
    return model