Spaces:

joyjonesmark
/

emotion-recognition

Sleeping

File size: 7,533 Bytes

e5abc2e

"""
VGG-19 transfer learning model for emotion recognition.
"""
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Dropout, GlobalAveragePooling2D, Flatten,
    BatchNormalization, Input, Lambda
)
from tensorflow.keras.applications import VGG19

import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB


def build_vgg_model(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
    num_classes: int = NUM_CLASSES,
    trainable_layers: int = 4,
    dropout_rate: float = 0.5
) -> Model:
    """
    Build VGG-19 transfer learning model for emotion recognition.
    
    Args:
        input_shape: Input image shape (height, width, channels)
        num_classes: Number of emotion classes
        trainable_layers: Number of top convolutional layers to make trainable
        dropout_rate: Dropout rate for dense layers
        
    Returns:
        Keras model
    """
    # Load pre-trained VGG19
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze all layers initially
    for layer in base_model.layers:
        layer.trainable = False
    
    # Unfreeze top convolutional layers for fine-tuning
    for layer in base_model.layers[-trainable_layers:]:
        layer.trainable = True
    
    # Build the model
    inputs = Input(shape=input_shape)
    
    # Preprocess input for VGG19 using Rescaling layer
    # VGG19 expects inputs scaled to 0-255 range with mean subtraction
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)
    
    # Pass through base model
    x = base_model(x, training=True)
    
    # Classification head
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion')
    
    return model


def build_vgg_from_grayscale(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1),
    num_classes: int = NUM_CLASSES,
    trainable_layers: int = 4,
    dropout_rate: float = 0.5
) -> Model:
    """
    Build VGG-19 model that accepts grayscale input.
    Converts grayscale to RGB internally.
    
    Args:
        input_shape: Input shape for grayscale images
        num_classes: Number of emotion classes
        trainable_layers: Number of top layers to make trainable
        dropout_rate: Dropout rate
        
    Returns:
        Keras model
    """
    # Load pre-trained VGG19
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=(*IMAGE_SIZE_TRANSFER, 3)
    )
    
    # Freeze base layers
    for layer in base_model.layers:
        layer.trainable = False
    
    # Unfreeze top layers
    for layer in base_model.layers[-trainable_layers:]:
        layer.trainable = True
    
    # Input for grayscale image
    inputs = Input(shape=input_shape)
    
    # Convert grayscale to RGB by repeating channels
    x = tf.keras.layers.Concatenate()([inputs, inputs, inputs])
    
    # Preprocess for VGG19 using Rescaling layer
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(x)
    
    # Base model
    x = base_model(x, training=True)
    
    # Classification head
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_grayscale')
    
    return model


def build_vgg_with_flatten(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
    num_classes: int = NUM_CLASSES,
    dropout_rate: float = 0.5
) -> Model:
    """
    Alternative VGG-19 architecture using Flatten instead of GAP.
    This is closer to the original VGG architecture.
    
    Args:
        input_shape: Input image shape
        num_classes: Number of emotion classes
        dropout_rate: Dropout rate
        
    Returns:
        Keras model
    """
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze base model
    for layer in base_model.layers:
        layer.trainable = False
    
    inputs = Input(shape=input_shape)
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)
    x = base_model(x, training=False)
    
    # VGG-style classification head
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_flatten')
    
    return model


def freeze_base_model(model: Model) -> Model:
    """
    Freeze all layers in the base VGG model.
    
    Args:
        model: VGG emotion model
        
    Returns:
        Model with frozen base
    """
    for layer in model.layers:
        if 'vgg' in layer.name.lower():
            layer.trainable = False
    return model


def unfreeze_top_blocks(model: Model, num_blocks: int = 1) -> Model:
    """
    Unfreeze top convolutional blocks of VGG for fine-tuning.
    VGG19 has 5 blocks. Block 5 has 4 conv layers.
    
    Args:
        model: VGG emotion model
        num_blocks: Number of blocks to unfreeze from top
        
    Returns:
        Model with partially unfrozen base
    """
    # Block layer counts: block1=2, block2=2, block3=4, block4=4, block5=4
    block_layers = {5: 4, 4: 4, 3: 4, 2: 2, 1: 2}
    
    layers_to_unfreeze = sum([block_layers[i] for i in range(6 - num_blocks, 6)])
    
    for layer in model.layers:
        if 'vgg' in layer.name.lower():
            for vgg_layer in layer.layers[-layers_to_unfreeze:]:
                if 'conv' in vgg_layer.name:
                    vgg_layer.trainable = True
    
    return model


def get_model_config() -> dict:
    """
    Get the default model configuration.
    
    Returns:
        Dictionary with model configuration
    """
    return {
        "name": "VGG-19",
        "input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
        "num_classes": NUM_CLASSES,
        "expected_accuracy": "68-75%",
        "training_time": "~60 minutes (GPU)",
        "parameters": "~20M",
        "base_model": "VGG-19 (ImageNet)"
    }


if __name__ == "__main__":
    # Build and display model summary
    print("Building VGG-19 model...")
    model = build_vgg_model()
    
    # Count trainable parameters
    trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
    non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])
    
    print(f"\nTotal parameters: {trainable + non_trainable:,}")
    print(f"Trainable parameters: {trainable:,}")
    print(f"Non-trainable parameters: {non_trainable:,}")
    
    print("\nModel configuration:")
    config = get_model_config()
    for key, value in config.items():
        print(f"  {key}: {value}")