Spaces:
Sleeping
Sleeping
| """ | |
| VGG-19 transfer learning model for emotion recognition. | |
| """ | |
| import tensorflow as tf | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.layers import ( | |
| Dense, Dropout, GlobalAveragePooling2D, Flatten, | |
| BatchNormalization, Input, Lambda | |
| ) | |
| from tensorflow.keras.applications import VGG19 | |
| import sys | |
| from pathlib import Path | |
| sys.path.append(str(Path(__file__).parent.parent.parent)) | |
| from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB | |
| def build_vgg_model( | |
| input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), | |
| num_classes: int = NUM_CLASSES, | |
| trainable_layers: int = 4, | |
| dropout_rate: float = 0.5 | |
| ) -> Model: | |
| """ | |
| Build VGG-19 transfer learning model for emotion recognition. | |
| Args: | |
| input_shape: Input image shape (height, width, channels) | |
| num_classes: Number of emotion classes | |
| trainable_layers: Number of top convolutional layers to make trainable | |
| dropout_rate: Dropout rate for dense layers | |
| Returns: | |
| Keras model | |
| """ | |
| # Load pre-trained VGG19 | |
| base_model = VGG19( | |
| weights='imagenet', | |
| include_top=False, | |
| input_shape=input_shape | |
| ) | |
| # Freeze all layers initially | |
| for layer in base_model.layers: | |
| layer.trainable = False | |
| # Unfreeze top convolutional layers for fine-tuning | |
| for layer in base_model.layers[-trainable_layers:]: | |
| layer.trainable = True | |
| # Build the model | |
| inputs = Input(shape=input_shape) | |
| # Preprocess input for VGG19 using Rescaling layer | |
| # VGG19 expects inputs scaled to 0-255 range with mean subtraction | |
| x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs) | |
| # Pass through base model | |
| x = base_model(x, training=True) | |
| # Classification head | |
| x = GlobalAveragePooling2D()(x) | |
| x = Dense(512, activation='relu')(x) | |
| x = BatchNormalization()(x) | |
| x = Dropout(dropout_rate)(x) | |
| x = Dense(256, activation='relu')(x) | |
| x = BatchNormalization()(x) | |
| x = Dropout(dropout_rate)(x) | |
| outputs = Dense(num_classes, activation='softmax')(x) | |
| model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion') | |
| return model | |
| def build_vgg_from_grayscale( | |
| input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1), | |
| num_classes: int = NUM_CLASSES, | |
| trainable_layers: int = 4, | |
| dropout_rate: float = 0.5 | |
| ) -> Model: | |
| """ | |
| Build VGG-19 model that accepts grayscale input. | |
| Converts grayscale to RGB internally. | |
| Args: | |
| input_shape: Input shape for grayscale images | |
| num_classes: Number of emotion classes | |
| trainable_layers: Number of top layers to make trainable | |
| dropout_rate: Dropout rate | |
| Returns: | |
| Keras model | |
| """ | |
| # Load pre-trained VGG19 | |
| base_model = VGG19( | |
| weights='imagenet', | |
| include_top=False, | |
| input_shape=(*IMAGE_SIZE_TRANSFER, 3) | |
| ) | |
| # Freeze base layers | |
| for layer in base_model.layers: | |
| layer.trainable = False | |
| # Unfreeze top layers | |
| for layer in base_model.layers[-trainable_layers:]: | |
| layer.trainable = True | |
| # Input for grayscale image | |
| inputs = Input(shape=input_shape) | |
| # Convert grayscale to RGB by repeating channels | |
| x = tf.keras.layers.Concatenate()([inputs, inputs, inputs]) | |
| # Preprocess for VGG19 using Rescaling layer | |
| x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(x) | |
| # Base model | |
| x = base_model(x, training=True) | |
| # Classification head | |
| x = GlobalAveragePooling2D()(x) | |
| x = Dense(512, activation='relu')(x) | |
| x = BatchNormalization()(x) | |
| x = Dropout(dropout_rate)(x) | |
| x = Dense(256, activation='relu')(x) | |
| x = BatchNormalization()(x) | |
| x = Dropout(dropout_rate)(x) | |
| outputs = Dense(num_classes, activation='softmax')(x) | |
| model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_grayscale') | |
| return model | |
| def build_vgg_with_flatten( | |
| input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), | |
| num_classes: int = NUM_CLASSES, | |
| dropout_rate: float = 0.5 | |
| ) -> Model: | |
| """ | |
| Alternative VGG-19 architecture using Flatten instead of GAP. | |
| This is closer to the original VGG architecture. | |
| Args: | |
| input_shape: Input image shape | |
| num_classes: Number of emotion classes | |
| dropout_rate: Dropout rate | |
| Returns: | |
| Keras model | |
| """ | |
| base_model = VGG19( | |
| weights='imagenet', | |
| include_top=False, | |
| input_shape=input_shape | |
| ) | |
| # Freeze base model | |
| for layer in base_model.layers: | |
| layer.trainable = False | |
| inputs = Input(shape=input_shape) | |
| x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs) | |
| x = base_model(x, training=False) | |
| # VGG-style classification head | |
| x = Flatten()(x) | |
| x = Dense(4096, activation='relu')(x) | |
| x = Dropout(dropout_rate)(x) | |
| x = Dense(4096, activation='relu')(x) | |
| x = Dropout(dropout_rate)(x) | |
| outputs = Dense(num_classes, activation='softmax')(x) | |
| model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_flatten') | |
| return model | |
| def freeze_base_model(model: Model) -> Model: | |
| """ | |
| Freeze all layers in the base VGG model. | |
| Args: | |
| model: VGG emotion model | |
| Returns: | |
| Model with frozen base | |
| """ | |
| for layer in model.layers: | |
| if 'vgg' in layer.name.lower(): | |
| layer.trainable = False | |
| return model | |
| def unfreeze_top_blocks(model: Model, num_blocks: int = 1) -> Model: | |
| """ | |
| Unfreeze top convolutional blocks of VGG for fine-tuning. | |
| VGG19 has 5 blocks. Block 5 has 4 conv layers. | |
| Args: | |
| model: VGG emotion model | |
| num_blocks: Number of blocks to unfreeze from top | |
| Returns: | |
| Model with partially unfrozen base | |
| """ | |
| # Block layer counts: block1=2, block2=2, block3=4, block4=4, block5=4 | |
| block_layers = {5: 4, 4: 4, 3: 4, 2: 2, 1: 2} | |
| layers_to_unfreeze = sum([block_layers[i] for i in range(6 - num_blocks, 6)]) | |
| for layer in model.layers: | |
| if 'vgg' in layer.name.lower(): | |
| for vgg_layer in layer.layers[-layers_to_unfreeze:]: | |
| if 'conv' in vgg_layer.name: | |
| vgg_layer.trainable = True | |
| return model | |
| def get_model_config() -> dict: | |
| """ | |
| Get the default model configuration. | |
| Returns: | |
| Dictionary with model configuration | |
| """ | |
| return { | |
| "name": "VGG-19", | |
| "input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), | |
| "num_classes": NUM_CLASSES, | |
| "expected_accuracy": "68-75%", | |
| "training_time": "~60 minutes (GPU)", | |
| "parameters": "~20M", | |
| "base_model": "VGG-19 (ImageNet)" | |
| } | |
| if __name__ == "__main__": | |
| # Build and display model summary | |
| print("Building VGG-19 model...") | |
| model = build_vgg_model() | |
| # Count trainable parameters | |
| trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]) | |
| non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights]) | |
| print(f"\nTotal parameters: {trainable + non_trainable:,}") | |
| print(f"Trainable parameters: {trainable:,}") | |
| print(f"Non-trainable parameters: {non_trainable:,}") | |
| print("\nModel configuration:") | |
| config = get_model_config() | |
| for key, value in config.items(): | |
| print(f" {key}: {value}") | |