Spaces:

valste
/

capsnet_inference_demo

Sleeping

App Files Files Community

capsnet_inference_demo / modelbuilder.py

valste

fixed the always one class prediction issue

ecefb2d 3 months ago

raw

history blame contribute delete

13.5 kB

	"""
	Class to construct the different type of models
	"""

	# --- Core TensorFlow/Keras
	import tensorflow as tf
	from tensorflow import keras
	from tensorflow.keras import layers, Sequential
	from tensorflow.keras.layers import Dense, Input, Rescaling
	from tensorflow.keras.applications import MobileNet, ResNet50

	# --- CapsNet-specific
	from keras.saving import register_keras_serializable # For custom layer serialization

	# --- Project-specific
	from defs import ModelType as mt


	class ModelBuilder:
	# builds the models

	def __init__(self, model_type, **model_params):

	self.model_type = model_type
	self.model_params = model_params
	self.model = None
	self.model_name = None

	# config extractor and attributes adding by model type
	if self.model_type in (mt.MOBILENET, mt.RESNET50):
	self.base_model_params = self.model_params.pop("base_model")
	self.model_name = self.base_model_params["name"]
	self.input_shape = self.base_model_params["input_shape"]
	self.base_trainable = self.model_params.pop("base_trainable")
	self.base_model = None

	elif self.model_type == mt.CAPSNET:
	self.model_name = model_params.pop("name")
	self.input_shape = model_params.pop("input_shape")
	self.prim_caps_params = model_params.pop("prim_caps")
	self.digit_caps_params = model_params.pop("digit_caps")
	self.routing_algo = model_params.pop("routing_algo") # informative only

	# model_type vs input shape validation
	if self.model_type in (
	mt.MOBILENET,
	mt.RESNET50,
	):
	if self.input_shape != (224, 224, 3):
	raise Exception(
	f"input shape for {self.model_name} model must be (224,224,3)"
	)
	elif self.model_type == mt.CAPSNET:
	if self.input_shape != (256, 256, 3):
	raise Exception(
	f"input shape for {self.model_name} model must be (256,256,3)"
	)
	else:
	raise Exception(
	f"Model not supported: {self.model_name}. The model name must contain one substring from {mt.MOBILENET, mt.RESNET50, mt.CAPSNET}"
	)

	def get_augmentation_pipe(self):
	# Random/Augmentation layers are stochastic only when training=True
	# disabled during inference/evaluation
	return Sequential(
	[
	layers.RandomRotation(0.1),
	layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
	layers.RandomZoom(0.1),
	],
	name="augmentation",
	)

	def get_compiled_model(self):
	# Extract config
	compile_params = self.model_params.pop("compile_params")

	# Define input layer
	inputs = Input(shape=self.input_shape, name="inputs")

	# --- Random/Augmentation layers are stochastic only when training=True
	x_aug = self.get_augmentation_pipe()(inputs)
	# ----- end augmentation -----

	# --- common preprocessing layer: rescaling to [0,1]
	x = Rescaling(1.0 / 255)(x_aug)

	# Model selector
	match self.model_type:
	case mt.RESNET50:
	self.base_model = ResNet50(input_tensor=x_aug, **self.base_model_params)
	self.base_model.trainable = self.base_trainable

	case mt.MOBILENET:
	self.base_model = MobileNet(
	input_tensor=x_aug, **self.base_model_params
	)
	self.base_model.trainable = self.base_trainable

	case mt.CAPSNET:
	self.base_model = None
	x = Rescaling(1.0 / 255)(x)
	outputs = self.build_capsnet(inputs=x_aug, **self.model_params)

	case _:
	raise Exception(
	f"Model type {self.model_type} not supported: {self.model_name}"
	)

	# Classification head
	if self.model_type in (mt.RESNET50, mt.MOBILENET):
	x = self.base_model.output
	outputs = Dense(4, activation="softmax")(x)
	elif self.model_type == mt.CAPSNET:
	pass
	else:
	raise Exception(f"No classifier head defined for {self.model_type}")

	# Final model
	self.model = keras.Model(name=self.model_name, inputs=inputs, outputs=outputs)
	self.model.compile(**compile_params)

	print(f"The {self.model_name} model has been compiled successfully")

	return self.base_model, self.model

	def build_capsnet(self, inputs, **params):
	"""
	Build a Capsule Network model for four class lung iseases classification: COVID, Normal, Pneumonia and Opacity.
	The batch dimension is always None internally → full input shape is (None, 256, 256, 1).
	The output shape is (None, 4, 1)
	Args:
	name (_type_): _description_
	first_Conv2DKernel_size (int, optional): _description_. Defaults to 10.
	input_shape (tuple, optional): _description_. Defaults to (256, 256, 3).
	n_class (int, optional): _description_. Defaults to 4.
	routing_iters (int, optional): _description_. Defaults to 3.
	routing_algo (str, optional): _description_. Defaults to "by_agreement".

	Returns:
	model: to be compiled
	"""

	first_Conv2DKernel_size = params.pop("first_Conv2DKernel_size")

	# --- Preprocessing Layers ---
	x = inputs

	# --- Feature Extraction ---
	# learns 64 different 3x3 filters
	x = layers.Conv2D(
	filters=64,
	kernel_size=first_Conv2DKernel_size,
	strides=2,
	padding="valid",
	activation="relu",
	)(
	x
	) # downsampling strides=2, no padding because only exposed lung area matters/contains features
	x = layers.BatchNormalization()(x)

	x = layers.Conv2D(128, 5, strides=2, padding="same", activation="relu")(
	x
	) # padding="same" because of transformed output of the 1rst conv2D-layer (None, 125, 125, 64) to not lose the spatial info
	x = layers.BatchNormalization()(x)
	x = layers.Dropout(0.25)(x) # Dropout after second block (early regularization)

	x = layers.Conv2D(128, 3, strides=1, padding="same", activation="relu")(x)
	x = layers.BatchNormalization()(x)

	x = layers.Conv2D(256, 3, strides=1, padding="same", activation="relu")(x)
	x = layers.BatchNormalization()(x)
	x = layers.Dropout(0.3)(x) # Deeper regularization after more feature maps

	x = layers.Conv2D(512, 3, strides=1, padding="same", activation="relu")(
	x
	) # out : (None, 64, 64, 512)
	x = layers.BatchNormalization()(x) # out: (None, 64, 64, 512)

	x = layers.Dropout(0.3)(
	x
	) # Final dropout before capsules, out : (None, 64, 64, 512)

	# --- Capsule Layers for classification---
	primary_caps = PrimaryCaps(**self.prim_caps_params)(
	x
	) # dim_capsule=8, # Each capsule is an 8D vector (i.e. each capsule outputs a vector of length 8)
	# n_channels=32, # There are 32 capsule "types" per spatial location (like 32 different filters)
	# kernel_size=9,
	# strides=2, # Moves the 3×3 kernel with stride x → if x > 1 it reduces spatial size by x (downsampling)
	# # stride=1 This means the kernel moves 1 pixel at a time, covering every possible position in the input.
	# padding='same') # same: No padding → output size shrinks (no border pixels used)

	digit_caps = DigitCaps(**self.digit_caps_params)(
	primary_caps
	) # num_capsule=n_class, # 1 capsule per class (e.g. 4 diseases = 4 capsules)
	# dim_capsule=16, # Each output capsule is a 16D vector → captures pose info
	# routing_iters=routing_iters # Use 3 iterations of dynamic routing (or EM routing) to refine capsule agreement
	# ) # out: (None, 4, 1, 16)

	outputs = Length()(digit_caps)

	return outputs


	# Squash function: This function shrinks small vectors to zero and large vectors to unit vectors.
	def squash(vectors, axis=-1):
	s_squared_norm = tf.reduce_sum(tf.square(vectors), axis, keepdims=True)
	# tf.keras.backend.epsilon() on google coalb with A100 GPU = 1e-07
	scale = (
	s_squared_norm
	/ (1 + s_squared_norm)
	/ tf.sqrt(s_squared_norm + tf.keras.backend.epsilon())
	)
	return scale * vectors


	# PrimaryCaps Layer/ Lower-level capsules (e.g. detecting edges or textures)
	@register_keras_serializable() # make it serializable to .keras format
	class PrimaryCaps(layers.Layer):

	def __init__(
	self, dim_capsule, n_channels, kernel_size, strides, padding, **kwargs
	):
	super(PrimaryCaps, self).__init__(**kwargs)
	self.conv = layers.Conv2D(
	filters=dim_capsule * n_channels,
	kernel_size=kernel_size,
	strides=strides,
	padding=padding,
	activation="relu",
	)
	self.dim_capsule = dim_capsule
	self.n_channels = n_channels
	self.kernel_size = kernel_size #
	self.strides = strides #
	self.padding = padding

	def build(self, input_shape):
	# Important: build the internal Conv2D layer using input shape
	self.conv.build(input_shape)
	super().build(input_shape) # Let Keras know the layer is built

	def call(self, inputs):
	outputs = self.conv(inputs)
	outputs = tf.reshape(
	outputs,
	(
	-1,
	outputs.shape[1] * outputs.shape[2] * self.n_channels,
	self.dim_capsule,
	),
	)
	return squash(outputs)

	def get_config(self):
	# hook in to keras Layer to modify layer's config on reload
	config = super().get_config()
	config.update(
	{
	"dim_capsule": self.dim_capsule,
	"n_channels": self.n_channels,
	"kernel_size": self.kernel_size,
	"strides": self.strides,
	"padding": self.padding,
	}
	)
	return config


	@register_keras_serializable()
	class DigitCaps(layers.Layer):
	# DigitCaps Layer / Higher-level capsules (e.g. detecting objects like animals or lungs)

	def __init__(self, num_capsule, dim_capsule, routing_iters=3, **kwargs):
	super(DigitCaps, self).__init__(**kwargs)
	self.num_capsule = num_capsule
	self.dim_capsule = dim_capsule
	self.routing_iters = routing_iters

	def build(self, input_shape):
	self.input_num_capsule = input_shape[1]
	self.input_dim_capsule = input_shape[2]
	self.W = self.add_weight(
	shape=[
	self.input_num_capsule,
	self.num_capsule,
	self.input_dim_capsule,
	self.dim_capsule,
	],
	initializer="glorot_uniform",
	trainable=True,
	)

	def call(self, inputs):
	inputs_expand = tf.expand_dims(inputs, 2)
	inputs_tiled = tf.expand_dims(inputs_expand, 3)
	inputs_tiled = tf.tile(inputs_tiled, [1, 1, self.num_capsule, 1, 1])
	inputs_hat = tf.matmul(inputs_tiled, self.W)

	b = tf.zeros(
	shape=[tf.shape(inputs)[0], self.input_num_capsule, self.num_capsule, 1, 1]
	)

	# Dynamic Routing by Agreement algo
	for i in range(self.routing_iters):
	c = tf.nn.softmax(
	b, axis=2
	) # coupling coefficient, beacause of softmax(...) all c's connected to a single higher capsule sum to 1.
	s = tf.reduce_sum(
	c * inputs_hat, axis=1, keepdims=True
	) # weighted sum along axis=1
	v = squash(
	s, axis=-2
	) # shrinks small vectors to zero and large vectors to unit vectors
	if i < self.routing_iters - 1:
	b += tf.reduce_sum(inputs_hat * v, axis=-1, keepdims=True)

	return tf.squeeze(v, axis=1)

	def get_config(self):
	# hook in to keras Layer to modify layer's config on reload
	config = super().get_config()
	config.update(
	{
	"num_capsule": self.num_capsule,
	"dim_capsule": self.dim_capsule,
	"routing_iters": self.routing_iters,
	}
	)
	return config


	# Length Layer
	@register_keras_serializable()
	class Length(layers.Layer):
	def call(self, inputs, **kwargs):
	return tf.sqrt(tf.reduce_sum(tf.square(inputs), -1))


	# Margin Loss for Capsule Networks
	def margin_loss(y_true, y_pred):
	# y_true is a one-hot vector
	# y_pred is the Length() output: vector of shape [batch_size, num_classes] (each value ≈ class presence probability)
	m_plus = 0.9
	m_minus = 0.1
	lambda_val = 0.5
	L = y_true * tf.square(tf.maximum(0.0, m_plus - y_pred)) + lambda_val * (
	1 - y_true
	) * tf.square(tf.maximum(0.0, y_pred - m_minus))
	return tf.reduce_mean(tf.reduce_sum(L, axis=1))


	capsnet_custom_objects = {
	"PrimaryCaps": PrimaryCaps,
	"DigitCaps": DigitCaps,
	"Length": Length,
	"margin_loss": margin_loss,
	}