Spaces:

STMicroelectronics
/

stm32-modelzoo-app

Running

App Files Files Community

stm32-modelzoo-app / image_classification /tf /src /models /st_efficientnetlcv1.py

FBAGSTM

STM32 AI Experimentation Hub

747451d about 1 month ago

raw

history blame contribute delete

15.7 kB

	# /*---------------------------------------------------------------------------------------------
	# * Copyright 2015 The TensorFlow Authors.
	# * Copyright (c) 2022 STMicroelectronics.
	# * All rights reserved.
	# *
	# * This software is licensed under terms that can be found in the LICENSE file in
	# * the root directory of this software component.
	# * If no LICENSE file comes with this software, it is provided AS-IS.
	# --------------------------------------------------------------------------------------------/

	import tensorflow as tf
	import keras
	from keras.activations import silu, relu, relu6
	from keras.ops import clip
	from keras import layers
	from keras.src.applications import imagenet_utils
	from keras import Model
	from typing import List, Tuple
	import math


	repeats = [1, 2, 2, 3, 3, 4, 1]


	def _round_expansion(expansion_factor: int, repeats: List[int]) -> List[int] :
	"""
	Docstring for _round_expansion

	Args:
	expansion_factor (int): scaling coefficient for the input filters
	repeats (list): list managing block repetition in the network and therefore depth
	Returns:
	exp_ratio: list of scaling coefficient
	"""
	exp_ratio = []
	flag = 1
	for r in repeats:
	if (r != 0) and flag:
	exp_ratio.append(1)
	flag = 0
	else:
	exp_ratio.append(expansion_factor)

	return exp_ratio


	def _num_blocks(repeats: List[int]) -> List[int]:
	"""
	Docstring for _num_blocks

	Args:
	repeats (List[int]): repetition pattern along the network

	Returns:
	blocks (List[int]): auxiliary list for network construction
	"""
	blocks = []
	for r in repeats:
	if (r != 0):
	blocks.append(1)
	else:
	blocks.append(0)
	return blocks


	def _round_filters(filters: int, width_coefficient: float, depth_divisor: int = 8, min_filters: int = None) -> int:
	"""
	Round number of filters based on depth multiplier.

	Args:
	filters (int): base filter number of a sub-block
	width_coefficient (int): scaling coefficient on filters
	depth_divisor (int): scaling coefficient on depth
	min_filters (int): minimum number of filters considered

	Returns:
	Rounded number of filters
	"""
	if not width_coefficient:
	return filters
	filters *= width_coefficient
	min_filters = min_filters or depth_divisor
	new_filters = max(int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, min_filters)
	# Make sure that round down does not go down by more than 10%.
	if new_filters < 0.9 * filters:
	new_filters += depth_divisor
	return int(new_filters)


	def _round_repeats(repeats: List[int], depth_coefficient: float, depth_trunc: str) -> List[int]:
	"""
	Per-stage depth scaling. Scales the block repeats in each stage. This depth scaling maintains
	compatibility with the EfficientNet scaling method, while allowing sensible
	scaling for other models that may have multiple block arg definitions in each stage.

	Args:
	repeats (list): pattern for sub-block repetition
	depth_coefficient (float): scaling coefficient on depth
	depth_trunc (str): method for truncation, example 'round'
	Returns:
	repeats_scaled (list): scaled repeat per stage

	"""

	# We scale the total repeat count for each stage, there may be multiple
	# block arg defs per stage so we need to sum.
	num_repeat = sum(repeats)
	if depth_trunc == 'round':
	# Truncating to int by rounding allows stages with few repeats to remain
	# proportionally smaller for longer. This is a good choice when stage definitions
	# include single repeat stages that we'd prefer to keep that way as long as possible
	num_repeat_scaled = round(num_repeat * depth_coefficient)
	else:
	# The default for EfficientNet truncates repeats to int via 'ceil'.
	# Any multiplier > 1.0 will result in an increased depth for every stage.
	num_repeat_scaled = int(math.ceil(num_repeat * depth_coefficient))
	# Proportionally distribute repeat count scaling to each block definition in the stage.
	# Allocation is done in reverse as it results in the first block being less likely to be scaled.
	# The first block makes less sense to repeat in most of the arch definitions.
	repeats_scaled = []
	for r in repeats[::-1]:
	if depth_trunc == 'round':
	rs = round((r / num_repeat * num_repeat_scaled))
	else:
	rs = max(1, round((r / num_repeat * num_repeat_scaled)))
	repeats_scaled.append(rs)
	num_repeat -= r
	num_repeat_scaled -= rs
	repeats_scaled = repeats_scaled[::-1]
	return repeats_scaled


	def _swish(x):
	"""
	Docstring for _swish

	Args:
	x (tf.Tensor): input tensor
	Returns:
	swish activation of x
	"""
	return silu(x)


	def _mb_conv_block(inputs: tf.Tensor, in_channels: int, out_channels: int, num_repeat: int, stride: int, expansion_factor: int, se_ratio: float, k: int, drop_rate: float,
	prev_block_num: int, activation) -> tf.Tensor:
	"""
	Docstring for _mb_conv_block

	Args:
	inputs (tf.Tensor): block input tensor
	in_channels (int): number of input channels
	out_channels (int): number of output channels
	num_repeat (int):
	stride (int): stride of the convolution
	expansion_factor (int): scaling coefficient for the input filters
	se_ratio (float): between 0 and 1, fraction to squeeze the input filters
	k (int): kernel size
	drop_rate (float): between 0 and 1, fraction of the input units to drop
	prev_block_num (int): dropout adjustement parameter
	activation (str): activation function
	Returns:
	tf.Tensor output of mb_conv block

	"""

	x = inputs
	input_filters = in_channels

	for i in range(num_repeat):
	# Expansion phase: making the layer wide wide as mentioned in Inverted residual block
	input_tensor = x
	if i == 0:
	# The first block needs to take care of stride and filter size increase.
	stride = stride
	else:
	stride = 1

	expanded_filters = input_filters * expansion_factor
	if expansion_factor != 1:
	x = layers.Conv2D(filters=expanded_filters, kernel_size=(1, 1), strides=1, padding='same')(x)
	x = layers.BatchNormalization()(x)
	x = layers.Activation(activation)(x)

	x = layers.DepthwiseConv2D(kernel_size=(k, k), strides=stride, padding='same')(x)
	x = layers.BatchNormalization()(x)
	x = layers.Activation(activation)(x)

	# Squeeze and excitation phase: extracting global features with global average pooling and squeeze numbers of channels using se_ratio
	squeezed_filters = max (1, int(input_filters * se_ratio))
	se_tensor = layers.GlobalAveragePooling2D()(x)
	se_tensor = layers.Reshape((1, 1, expanded_filters))(se_tensor)
	se_tensor = layers.Conv2D(filters=squeezed_filters , kernel_size=(1, 1), padding='same')(se_tensor)
	se_tensor = layers.BatchNormalization()(se_tensor)
	se_tensor = layers.Activation(activation, name='act_{}'.format(i+prev_block_num))(se_tensor)
	se_tensor = layers.Conv2D(filters=expanded_filters , kernel_size=(1, 1), padding='same')(se_tensor)
	se_tensor = layers.BatchNormalization()(se_tensor)
	se_tensor = layers.Activation('sigmoid', name='act2_{}'.format(i+prev_block_num))(se_tensor)
	x = layers.multiply([x, se_tensor])

	# Output phase:
	x = layers.Conv2D(filters=out_channels, kernel_size=(1, 1), strides=1, padding='same')(x)
	x = layers.BatchNormalization()(x)

	if stride == 1 and input_filters == out_channels:
	num_blocks_total = 16
	dropout_rate = drop_rate * float(prev_block_num + i) / num_blocks_total
	if dropout_rate and (dropout_rate > 0):
	x = layers.Dropout(dropout_rate, noise_shape=(None, 1, 1, 1))(x)
	x = layers.add([x, input_tensor])

	input_filters = out_channels

	return x


	def _EfficientNet(width_coefficient_list: float = 1.0,
	depth_coefficient: float = 1.0,
	input_resolution: int = 224,
	expansion_factor: int = 6,
	se_ratio: float = 0.25,
	input_channels: int = 3,
	dropout_rate: float = 0.2,
	drop_connect_rate: float = 0.2,
	depth_trunc: str = 'ceil',
	activation: str = 'relu',
	include_top=True,
	pooling: str = None,
	classes: int = 101) -> keras.Model:
	"""
	Docstring for _EfficientNet

	Args:
	width_coefficient_list (float): scaling coefficient for network width
	depth_coefficient (float): scaling coefficient for network depth
	input_resolution (int): szie of input in pixels
	expansion_factor (int): scaling coefficient for the input filters
	se_ratio (float): between 0 and 1, fraction to squeeze the input filters
	input_channels (int): number of input channels
	dropout_rate (float): dropout rate before final classifier layer
	drop_connect_rate (float): dropout rate at skip connections
	depth_trunc (str): method for truncation
	activation (str): the activation function to use
	include_top (boolean): whether to include the fully-connected layer at the top of the network
	pooling (str): pooling mode for feature extraction, 'None', 'avg' or 'max'
	classes (int): number of classes to classify images
	Returns:
	keras.model with efficientnet topology
	"""

	# Determine proper input shape
	input = keras.Input(shape=(input_resolution, input_resolution, input_channels))

	# Activation
	if activation == 'swish':
	activation = _swish()
	if activation == 'relu6':
	activation = relu6

	# Build stem
	x = layers.Conv2D(filters=_round_filters(32, width_coefficient_list[0]), kernel_size=(3, 3), strides=(2, 2), padding='same')(input)
	x = layers.BatchNormalization()(x)
	x = layers.Activation(activation, name='stem_activation')(x)

	# Build blocks
	repeats_scaled = _round_repeats(repeats, depth_coefficient, depth_trunc)
	exp_ratio = _round_expansion(expansion_factor, repeats_scaled)

	block1 = _mb_conv_block(inputs=x, in_channels=_round_filters(32, width_coefficient_list[0]), out_channels=_round_filters(16, width_coefficient_list[1]),
	num_repeat=repeats_scaled[0],stride=1, expansion_factor=exp_ratio[0], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
	prev_block_num=0, activation=activation)

	block2 = _mb_conv_block(inputs=block1, in_channels=_round_filters(16, width_coefficient_list[1]), out_channels=_round_filters(24, width_coefficient_list[2]),
	num_repeat=repeats_scaled[1],stride=2, expansion_factor=exp_ratio[1], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:1]), activation=activation)

	block3 = _mb_conv_block(inputs=block2, in_channels=_round_filters(24, width_coefficient_list[2]), out_channels=_round_filters(40, width_coefficient_list[3]),
	num_repeat=repeats_scaled[2],stride=2, expansion_factor=exp_ratio[2], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:2]), activation=activation)

	block4 = _mb_conv_block(inputs=block3, in_channels=_round_filters(40, width_coefficient_list[3]), out_channels=_round_filters(80, width_coefficient_list[4]),
	num_repeat=repeats_scaled[3], stride=2, expansion_factor=exp_ratio[3], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:3]), activation=activation)

	block5 = _mb_conv_block(inputs=block4, in_channels=_round_filters(80, width_coefficient_list[4]), out_channels=_round_filters(112, width_coefficient_list[5]),
	num_repeat=repeats_scaled[4], stride=1, expansion_factor=exp_ratio[4], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:4]), activation=activation)

	block6 = _mb_conv_block(inputs=block5, in_channels=_round_filters(112, width_coefficient_list[5]), out_channels=_round_filters(192, width_coefficient_list[6]),
	num_repeat=repeats_scaled[5], stride=2, expansion_factor=exp_ratio[5], se_ratio=se_ratio, k=5, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:5]), activation=activation)

	block7 = _mb_conv_block(inputs=block6, in_channels=_round_filters(192, width_coefficient_list[6]), out_channels=_round_filters(320, width_coefficient_list[7]),
	num_repeat=repeats_scaled[6],stride=1, expansion_factor=exp_ratio[6], se_ratio=se_ratio, k=3, drop_rate=drop_connect_rate,
	prev_block_num=sum(repeats_scaled[0:6]), activation=activation)

	# Build top
	x = layers.Conv2D(filters=_round_filters(1280, width_coefficient_list[8]), kernel_size=(1, 1), padding='same', name='top_conv')(block7)
	x = layers.BatchNormalization()(x)
	x = layers.Activation(activation, name='top_activation')(x)

	if include_top:
	x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
	if dropout_rate and dropout_rate > 0:
	x = layers.Dropout(dropout_rate, name='top_dropout')(x)
	x = layers.Dense(classes, activation='softmax', name='output_probs')(x)
	else:
	if pooling == 'avg':
	x = layers.GlobalAveragePooling2D()(x)
	elif pooling == 'max':
	x = layers.GlobalMaxPooling2D()(x)

	# Create model.
	model = Model(input, x, name="st_evvicientnetlcv1")

	return model


	def get_st_efficientnetlcv1(input_shape: Tuple[int, int, int] = None, num_classes: int = None, dropout: float = None, pretrained: bool = False, **kwargs) -> keras.Model:
	"""
	Creates a Keras model for fine-grained classification from scratch.

	Args:
	input_shape (Tuple[int, int, int]): The shape of the input tensor.
	num_classes (int): The number of classes for the classification task.
	dropout (float): The dropout rate.

	Returns:
	keras.Model: A Keras model for fine-grained classification.
	"""

	if pretrained:
	print("WARNING: No pretrained weights are found for 'st_efficientnet_lv_v1' model. Random weights are used instead.")

	# Validate input_shape is square
	if input_shape[0] != input_shape[1]:
	raise ValueError(f"Expecting image width and height to be the same. Received image shape {input_shape}")
	# Validate input_shape is multiple of 32
	if (input_shape[0] % 32 > 0) or (input_shape[1] % 32 > 0):
	raise ValueError(f"Expecting image width and height to be multiples of 32. Received image shape {input_shape}")
	activation = 'relu6'
	d = 1.
	w = [0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45]
	e = 3
	model = _EfficientNet(width_coefficient_list=w, depth_coefficient=d, input_resolution=input_shape[0], expansion_factor=e, depth_trunc='ceil', activation=activation,
	input_channels=input_shape[2], dropout_rate=dropout, include_top=True, classes=num_classes)

	return model