Spaces:

joyjonesmark
/

emotion-recognition

Sleeping

File size: 5,794 Bytes

e5abc2e

"""
Custom CNN model architecture for emotion recognition.
Optimized for 48x48 grayscale images.
"""
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Dense, Dropout, Flatten,
    BatchNormalization, Input, GlobalAveragePooling2D
)
from tensorflow.keras.regularizers import l2

import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE, NUM_CLASSES, NUM_CHANNELS


def build_custom_cnn(
    input_shape: tuple = (*IMAGE_SIZE, NUM_CHANNELS),
    num_classes: int = NUM_CLASSES,
    dropout_rate: float = 0.25,
    dense_dropout: float = 0.5,
    l2_reg: float = 0.01
) -> Model:
    """
    Build a custom CNN architecture for emotion recognition.
    
    Architecture:
    - 4 Convolutional blocks with increasing filters (64 -> 128 -> 256 -> 512)
    - Each block: Conv2D -> BatchNorm -> ReLU -> MaxPool -> Dropout
    - Dense layers for classification
    
    Args:
        input_shape: Input image shape (height, width, channels)
        num_classes: Number of emotion classes
        dropout_rate: Dropout rate for conv blocks
        dense_dropout: Dropout rate for dense layers
        l2_reg: L2 regularization factor
        
    Returns:
        Compiled Keras model
    """
    model = Sequential([
        # Input layer
        Input(shape=input_shape),
        
        # Block 1: 64 filters
        Conv2D(64, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Conv2D(64, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(dropout_rate),
        
        # Block 2: 128 filters
        Conv2D(128, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Conv2D(128, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(dropout_rate),
        
        # Block 3: 256 filters
        Conv2D(256, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Conv2D(256, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(dropout_rate),
        
        # Block 4: 512 filters
        Conv2D(512, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Conv2D(512, (3, 3), padding='same', activation='relu',
               kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(dropout_rate),
        
        # Classification head
        Flatten(),
        Dense(512, activation='relu', kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(dense_dropout),
        Dense(256, activation='relu', kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(dense_dropout),
        Dense(num_classes, activation='softmax')
    ], name='custom_emotion_cnn')
    
    return model


def build_custom_cnn_v2(
    input_shape: tuple = (*IMAGE_SIZE, NUM_CHANNELS),
    num_classes: int = NUM_CLASSES
) -> Model:
    """
    Alternative CNN architecture with residual-like connections.
    
    Args:
        input_shape: Input image shape
        num_classes: Number of emotion classes
        
    Returns:
        Keras model
    """
    inputs = Input(shape=input_shape)
    
    # Initial convolution
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    
    # Block 1
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Block 2
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Block 3
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Global pooling and classification
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='custom_emotion_cnn_v2')
    
    return model


def get_model_config() -> dict:
    """
    Get the default model configuration.
    
    Returns:
        Dictionary with model configuration
    """
    return {
        "name": "Custom CNN",
        "input_shape": (*IMAGE_SIZE, NUM_CHANNELS),
        "num_classes": NUM_CLASSES,
        "expected_accuracy": "60-68%",
        "training_time": "~30 minutes (GPU)",
        "parameters": "~5M"
    }


if __name__ == "__main__":
    # Build and display model summary
    model = build_custom_cnn()
    model.summary()
    
    print("\nModel configuration:")
    config = get_model_config()
    for key, value in config.items():
        print(f"  {key}: {value}")