Spaces:

joyjonesmark
/

emotion-recognition

Sleeping

App Files Files Community

emotion-recognition / src /models /vgg_model.py

joyjonesmark

Initial deploy with models

e5abc2e 12 days ago

raw

history blame contribute delete

7.53 kB

	"""
	VGG-19 transfer learning model for emotion recognition.
	"""
	import tensorflow as tf
	from tensorflow.keras.models import Model
	from tensorflow.keras.layers import (
	Dense, Dropout, GlobalAveragePooling2D, Flatten,
	BatchNormalization, Input, Lambda
	)
	from tensorflow.keras.applications import VGG19

	import sys
	from pathlib import Path
	sys.path.append(str(Path(__file__).parent.parent.parent))
	from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB


	def build_vgg_model(
	input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
	num_classes: int = NUM_CLASSES,
	trainable_layers: int = 4,
	dropout_rate: float = 0.5
	) -> Model:
	"""
	Build VGG-19 transfer learning model for emotion recognition.

	Args:
	input_shape: Input image shape (height, width, channels)
	num_classes: Number of emotion classes
	trainable_layers: Number of top convolutional layers to make trainable
	dropout_rate: Dropout rate for dense layers

	Returns:
	Keras model
	"""
	# Load pre-trained VGG19
	base_model = VGG19(
	weights='imagenet',
	include_top=False,
	input_shape=input_shape
	)

	# Freeze all layers initially
	for layer in base_model.layers:
	layer.trainable = False

	# Unfreeze top convolutional layers for fine-tuning
	for layer in base_model.layers[-trainable_layers:]:
	layer.trainable = True

	# Build the model
	inputs = Input(shape=input_shape)

	# Preprocess input for VGG19 using Rescaling layer
	# VGG19 expects inputs scaled to 0-255 range with mean subtraction
	x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)

	# Pass through base model
	x = base_model(x, training=True)

	# Classification head
	x = GlobalAveragePooling2D()(x)
	x = Dense(512, activation='relu')(x)
	x = BatchNormalization()(x)
	x = Dropout(dropout_rate)(x)
	x = Dense(256, activation='relu')(x)
	x = BatchNormalization()(x)
	x = Dropout(dropout_rate)(x)
	outputs = Dense(num_classes, activation='softmax')(x)

	model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion')

	return model


	def build_vgg_from_grayscale(
	input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1),
	num_classes: int = NUM_CLASSES,
	trainable_layers: int = 4,
	dropout_rate: float = 0.5
	) -> Model:
	"""
	Build VGG-19 model that accepts grayscale input.
	Converts grayscale to RGB internally.

	Args:
	input_shape: Input shape for grayscale images
	num_classes: Number of emotion classes
	trainable_layers: Number of top layers to make trainable
	dropout_rate: Dropout rate

	Returns:
	Keras model
	"""
	# Load pre-trained VGG19
	base_model = VGG19(
	weights='imagenet',
	include_top=False,
	input_shape=(*IMAGE_SIZE_TRANSFER, 3)
	)

	# Freeze base layers
	for layer in base_model.layers:
	layer.trainable = False

	# Unfreeze top layers
	for layer in base_model.layers[-trainable_layers:]:
	layer.trainable = True

	# Input for grayscale image
	inputs = Input(shape=input_shape)

	# Convert grayscale to RGB by repeating channels
	x = tf.keras.layers.Concatenate()([inputs, inputs, inputs])

	# Preprocess for VGG19 using Rescaling layer
	x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(x)

	# Base model
	x = base_model(x, training=True)

	# Classification head
	x = GlobalAveragePooling2D()(x)
	x = Dense(512, activation='relu')(x)
	x = BatchNormalization()(x)
	x = Dropout(dropout_rate)(x)
	x = Dense(256, activation='relu')(x)
	x = BatchNormalization()(x)
	x = Dropout(dropout_rate)(x)
	outputs = Dense(num_classes, activation='softmax')(x)

	model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_grayscale')

	return model


	def build_vgg_with_flatten(
	input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
	num_classes: int = NUM_CLASSES,
	dropout_rate: float = 0.5
	) -> Model:
	"""
	Alternative VGG-19 architecture using Flatten instead of GAP.
	This is closer to the original VGG architecture.

	Args:
	input_shape: Input image shape
	num_classes: Number of emotion classes
	dropout_rate: Dropout rate

	Returns:
	Keras model
	"""
	base_model = VGG19(
	weights='imagenet',
	include_top=False,
	input_shape=input_shape
	)

	# Freeze base model
	for layer in base_model.layers:
	layer.trainable = False

	inputs = Input(shape=input_shape)
	x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)
	x = base_model(x, training=False)

	# VGG-style classification head
	x = Flatten()(x)
	x = Dense(4096, activation='relu')(x)
	x = Dropout(dropout_rate)(x)
	x = Dense(4096, activation='relu')(x)
	x = Dropout(dropout_rate)(x)
	outputs = Dense(num_classes, activation='softmax')(x)

	model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_flatten')

	return model


	def freeze_base_model(model: Model) -> Model:
	"""
	Freeze all layers in the base VGG model.

	Args:
	model: VGG emotion model

	Returns:
	Model with frozen base
	"""
	for layer in model.layers:
	if 'vgg' in layer.name.lower():
	layer.trainable = False
	return model


	def unfreeze_top_blocks(model: Model, num_blocks: int = 1) -> Model:
	"""
	Unfreeze top convolutional blocks of VGG for fine-tuning.
	VGG19 has 5 blocks. Block 5 has 4 conv layers.

	Args:
	model: VGG emotion model
	num_blocks: Number of blocks to unfreeze from top

	Returns:
	Model with partially unfrozen base
	"""
	# Block layer counts: block1=2, block2=2, block3=4, block4=4, block5=4
	block_layers = {5: 4, 4: 4, 3: 4, 2: 2, 1: 2}

	layers_to_unfreeze = sum([block_layers[i] for i in range(6 - num_blocks, 6)])

	for layer in model.layers:
	if 'vgg' in layer.name.lower():
	for vgg_layer in layer.layers[-layers_to_unfreeze:]:
	if 'conv' in vgg_layer.name:
	vgg_layer.trainable = True

	return model


	def get_model_config() -> dict:
	"""
	Get the default model configuration.

	Returns:
	Dictionary with model configuration
	"""
	return {
	"name": "VGG-19",
	"input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
	"num_classes": NUM_CLASSES,
	"expected_accuracy": "68-75%",
	"training_time": "~60 minutes (GPU)",
	"parameters": "~20M",
	"base_model": "VGG-19 (ImageNet)"
	}


	if __name__ == "__main__":
	# Build and display model summary
	print("Building VGG-19 model...")
	model = build_vgg_model()

	# Count trainable parameters
	trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
	non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])

	print(f"\nTotal parameters: {trainable + non_trainable:,}")
	print(f"Trainable parameters: {trainable:,}")
	print(f"Non-trainable parameters: {non_trainable:,}")

	print("\nModel configuration:")
	config = get_model_config()
	for key, value in config.items():
	print(f" {key}: {value}")