stm32-modelzoo-app / image_classification /tf /src /models /st_efficientnetlcv1.py
FBAGSTM's picture
STM32 AI Experimentation Hub
747451d
# /*---------------------------------------------------------------------------------------------
# * Copyright 2015 The TensorFlow Authors.
# * Copyright (c) 2022 STMicroelectronics.
# * All rights reserved.
# *
# * This software is licensed under terms that can be found in the LICENSE file in
# * the root directory of this software component.
# * If no LICENSE file comes with this software, it is provided AS-IS.
# *--------------------------------------------------------------------------------------------*/
import tensorflow as tf
import keras
from keras.activations import silu, relu, relu6
from keras.ops import clip
from keras import layers
from keras.src.applications import imagenet_utils
from keras import Model
from typing import List, Tuple
import math
repeats = [1, 2, 2, 3, 3, 4, 1]
def _round_expansion(expansion_factor: int, repeats: List[int]) -> List[int] :
"""
Docstring for _round_expansion
Args:
expansion_factor (int): scaling coefficient for the input filters
repeats (list): list managing block repetition in the network and therefore depth
Returns:
exp_ratio: list of scaling coefficient
"""
exp_ratio = []
flag = 1
for r in repeats:
if (r != 0) and flag:
exp_ratio.append(1)
flag = 0
else:
exp_ratio.append(expansion_factor)
return exp_ratio
def _num_blocks(repeats: List[int]) -> List[int]:
"""
Docstring for _num_blocks
Args:
repeats (List[int]): repetition pattern along the network
Returns:
blocks (List[int]): auxiliary list for network construction
"""
blocks = []
for r in repeats:
if (r != 0):
blocks.append(1)
else:
blocks.append(0)
return blocks
def _round_filters(filters: int, width_coefficient: float, depth_divisor: int = 8, min_filters: int = None) -> int:
"""
Round number of filters based on depth multiplier.
Args:
filters (int): base filter number of a sub-block
width_coefficient (int): scaling coefficient on filters
depth_divisor (int): scaling coefficient on depth
min_filters (int): minimum number of filters considered
Returns:
Rounded number of filters
"""
if not width_coefficient:
return filters
filters *= width_coefficient
min_filters = min_filters or depth_divisor
new_filters = max(int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, min_filters)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += depth_divisor
return int(new_filters)
def _round_repeats(repeats: List[int], depth_coefficient: float, depth_trunc: str) -> List[int]:
"""
Per-stage depth scaling. Scales the block repeats in each stage. This depth scaling maintains
compatibility with the EfficientNet scaling method, while allowing sensible
scaling for other models that may have multiple block arg definitions in each stage.
Args:
repeats (list): pattern for sub-block repetition
depth_coefficient (float): scaling coefficient on depth
depth_trunc (str): method for truncation, example 'round'
Returns:
repeats_scaled (list): scaled repeat per stage
"""
# We scale the total repeat count for each stage, there may be multiple
# block arg defs per stage so we need to sum.
num_repeat = sum(repeats)
if depth_trunc == 'round':
# Truncating to int by rounding allows stages with few repeats to remain
# proportionally smaller for longer. This is a good choice when stage definitions
# include single repeat stages that we'd prefer to keep that way as long as possible
num_repeat_scaled = round(num_repeat * depth_coefficient)
else:
# The default for EfficientNet truncates repeats to int via 'ceil'.
# Any multiplier > 1.0 will result in an increased depth for every stage.
num_repeat_scaled = int(math.ceil(num_repeat * depth_coefficient))
# Proportionally distribute repeat count scaling to each block definition in the stage.
# Allocation is done in reverse as it results in the first block being less likely to be scaled.
# The first block makes less sense to repeat in most of the arch definitions.
repeats_scaled = []
for r in repeats[::-1]:
if depth_trunc == 'round':
rs = round((r / num_repeat * num_repeat_scaled))
else:
rs = max(1, round((r / num_repeat * num_repeat_scaled)))
repeats_scaled.append(rs)
num_repeat -= r
num_repeat_scaled -= rs
repeats_scaled = repeats_scaled[::-1]
return repeats_scaled
def _swish(x):
"""
Docstring for _swish
Args:
x (tf.Tensor): input tensor
Returns:
swish activation of x
"""
return silu(x)
def _mb_conv_block(inputs: tf.Tensor, in_channels: int, out_channels: int, num_repeat: int, stride: int, expansion_factor: int, se_ratio: float, k: int, drop_rate: float,
prev_block_num: int, activation) -> tf.Tensor:
"""
Docstring for _mb_conv_block
Args:
inputs (tf.Tensor): block input tensor
in_channels (int): number of input channels
out_channels (int): number of output channels
num_repeat (int):
stride (int): stride of the convolution
expansion_factor (int): scaling coefficient for the input filters
se_ratio (float): between 0 and 1, fraction to squeeze the input filters
k (int): kernel size
drop_rate (float): between 0 and 1, fraction of the input units to drop
prev_block_num (int): dropout adjustement parameter
activation (str): activation function
Returns:
tf.Tensor output of mb_conv block
"""
x = inputs
input_filters = in_channels
for i in range(num_repeat):
# Expansion phase: making the layer wide wide as mentioned in Inverted residual block
input_tensor = x
if i == 0:
# The first block needs to take care of stride and filter size increase.
stride = stride
else:
stride = 1
expanded_filters = input_filters * expansion_factor
if expansion_factor != 1:
x = layers.Conv2D(filters=expanded_filters, kernel_size=(1, 1), strides=1, padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation(activation)(x)
x = layers.DepthwiseConv2D(kernel_size=(k, k), strides=stride, padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation(activation)(x)
# Squeeze and excitation phase: extracting global features with global average pooling and squeeze numbers of channels using se_ratio
squeezed_filters = max (1, int(input_filters * se_ratio))
se_tensor = layers.GlobalAveragePooling2D()(x)
se_tensor = layers.Reshape((1, 1, expanded_filters))(se_tensor)
se_tensor = layers.Conv2D(filters=squeezed_filters , kernel_size=(1, 1), padding='same')(se_tensor)
se_tensor = layers.BatchNormalization()(se_tensor)
se_tensor = layers.Activation(activation, name='act_{}'.format(i+prev_block_num))(se_tensor)
se_tensor = layers.Conv2D(filters=expanded_filters , kernel_size=(1, 1), padding='same')(se_tensor)
se_tensor = layers.BatchNormalization()(se_tensor)
se_tensor = layers.Activation('sigmoid', name='act2_{}'.format(i+prev_block_num))(se_tensor)
x = layers.multiply([x, se_tensor])
# Output phase:
x = layers.Conv2D(filters=out_channels, kernel_size=(1, 1), strides=1, padding='same')(x)
x = layers.BatchNormalization()(x)
if stride == 1 and input_filters == out_channels:
num_blocks_total = 16
dropout_rate = drop_rate * float(prev_block_num + i) / num_blocks_total
if dropout_rate and (dropout_rate > 0):
x = layers.Dropout(dropout_rate, noise_shape=(None, 1, 1, 1))(x)
x = layers.add([x, input_tensor])
input_filters = out_channels
return x
def _EfficientNet(width_coefficient_list: float = 1.0,
depth_coefficient: float = 1.0,
input_resolution: int = 224,
expansion_factor: int = 6,
se_ratio: float = 0.25,
input_channels: int = 3,
dropout_rate: float = 0.2,
drop_connect_rate: float = 0.2,
depth_trunc: str = 'ceil',
activation: str = 'relu',
include_top=True,
pooling: str = None,
classes: int = 101) -> keras.Model:
"""
Docstring for _EfficientNet
Args:
width_coefficient_list (float): scaling coefficient for network width
depth_coefficient (float): scaling coefficient for network depth
input_resolution (int): szie of input in pixels
expansion_factor (int): scaling coefficient for the input filters
se_ratio (float): between 0 and 1, fraction to squeeze the input filters
input_channels (int): number of input channels
dropout_rate (float): dropout rate before final classifier layer
drop_connect_rate (float): dropout rate at skip connections
depth_trunc (str): method for truncation
activation (str): the activation function to use
include_top (boolean): whether to include the fully-connected layer at the top of the network
pooling (str): pooling mode for feature extraction, 'None', 'avg' or 'max'
classes (int): number of classes to classify images
Returns:
keras.model with efficientnet topology
"""
# Determine proper input shape
input = keras.Input(shape=(input_resolution, input_resolution, input_channels))
# Activation
if activation == 'swish':
activation = _swish()
if activation == 'relu6':
activation = relu6
# Build stem
x = layers.Conv2D(filters=_round_filters(32, width_coefficient_list[0]), kernel_size=(3, 3), strides=(2, 2), padding='same')(input)
x = layers.BatchNormalization()(x)
x = layers.Activation(activation, name='stem_activation')(x)
# Build blocks
repeats_scaled = _round_repeats(repeats, depth_coefficient, depth_trunc)
exp_ratio = _round_expansion(expansion_factor, repeats_scaled)
block1 = _mb_conv_block(inputs=x, in_channels=_round_filters(32, width_coefficient_list[0]), out_channels=_round_filters(16, width_coefficient_list[1]),
num_repeat=repeats_scaled[0],stride=1, expansion_factor=exp_ratio[0], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
prev_block_num=0, activation=activation)
block2 = _mb_conv_block(inputs=block1, in_channels=_round_filters(16, width_coefficient_list[1]), out_channels=_round_filters(24, width_coefficient_list[2]),
num_repeat=repeats_scaled[1],stride=2, expansion_factor=exp_ratio[1], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:1]), activation=activation)
block3 = _mb_conv_block(inputs=block2, in_channels=_round_filters(24, width_coefficient_list[2]), out_channels=_round_filters(40, width_coefficient_list[3]),
num_repeat=repeats_scaled[2],stride=2, expansion_factor=exp_ratio[2], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:2]), activation=activation)
block4 = _mb_conv_block(inputs=block3, in_channels=_round_filters(40, width_coefficient_list[3]), out_channels=_round_filters(80, width_coefficient_list[4]),
num_repeat=repeats_scaled[3], stride=2, expansion_factor=exp_ratio[3], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:3]), activation=activation)
block5 = _mb_conv_block(inputs=block4, in_channels=_round_filters(80, width_coefficient_list[4]), out_channels=_round_filters(112, width_coefficient_list[5]),
num_repeat=repeats_scaled[4], stride=1, expansion_factor=exp_ratio[4], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:4]), activation=activation)
block6 = _mb_conv_block(inputs=block5, in_channels=_round_filters(112, width_coefficient_list[5]), out_channels=_round_filters(192, width_coefficient_list[6]),
num_repeat=repeats_scaled[5], stride=2, expansion_factor=exp_ratio[5], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:5]), activation=activation)
block7 = _mb_conv_block(inputs=block6, in_channels=_round_filters(192, width_coefficient_list[6]), out_channels=_round_filters(320, width_coefficient_list[7]),
num_repeat=repeats_scaled[6],stride=1, expansion_factor=exp_ratio[6], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
prev_block_num=sum(repeats_scaled[0:6]), activation=activation)
# Build top
x = layers.Conv2D(filters=_round_filters(1280, width_coefficient_list[8]), kernel_size=(1, 1), padding='same', name='top_conv')(block7)
x = layers.BatchNormalization()(x)
x = layers.Activation(activation, name='top_activation')(x)
if include_top:
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
if dropout_rate and dropout_rate > 0:
x = layers.Dropout(dropout_rate, name='top_dropout')(x)
x = layers.Dense(classes, activation='softmax', name='output_probs')(x)
else:
if pooling == 'avg':
x = layers.GlobalAveragePooling2D()(x)
elif pooling == 'max':
x = layers.GlobalMaxPooling2D()(x)
# Create model.
model = Model(input, x, name="st_evvicientnetlcv1")
return model
def get_st_efficientnetlcv1(input_shape: Tuple[int, int, int] = None, num_classes: int = None, dropout: float = None, pretrained: bool = False, **kwargs) -> keras.Model:
"""
Creates a Keras model for fine-grained classification from scratch.
Args:
input_shape (Tuple[int, int, int]): The shape of the input tensor.
num_classes (int): The number of classes for the classification task.
dropout (float): The dropout rate.
Returns:
keras.Model: A Keras model for fine-grained classification.
"""
if pretrained:
print("WARNING: No pretrained weights are found for 'st_efficientnet_lv_v1' model. Random weights are used instead.")
# Validate input_shape is square
if input_shape[0] != input_shape[1]:
raise ValueError(f"Expecting image width and height to be the same. Received image shape {input_shape}")
# Validate input_shape is multiple of 32
if (input_shape[0] % 32 > 0) or (input_shape[1] % 32 > 0):
raise ValueError(f"Expecting image width and height to be multiples of 32. Received image shape {input_shape}")
activation = 'relu6'
d = 1.
w = [0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45]
e = 3
model = _EfficientNet(width_coefficient_list=w, depth_coefficient=d, input_resolution=input_shape[0], expansion_factor=e, depth_trunc='ceil', activation=activation,
input_channels=input_shape[2], dropout_rate=dropout, include_top=True, classes=num_classes)
return model