File size: 7,533 Bytes
e5abc2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
"""
VGG-19 transfer learning model for emotion recognition.
"""
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Dropout, GlobalAveragePooling2D, Flatten,
    BatchNormalization, Input, Lambda
)
from tensorflow.keras.applications import VGG19

import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB


def build_vgg_model(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
    num_classes: int = NUM_CLASSES,
    trainable_layers: int = 4,
    dropout_rate: float = 0.5
) -> Model:
    """
    Build VGG-19 transfer learning model for emotion recognition.
    
    Args:
        input_shape: Input image shape (height, width, channels)
        num_classes: Number of emotion classes
        trainable_layers: Number of top convolutional layers to make trainable
        dropout_rate: Dropout rate for dense layers
        
    Returns:
        Keras model
    """
    # Load pre-trained VGG19
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze all layers initially
    for layer in base_model.layers:
        layer.trainable = False
    
    # Unfreeze top convolutional layers for fine-tuning
    for layer in base_model.layers[-trainable_layers:]:
        layer.trainable = True
    
    # Build the model
    inputs = Input(shape=input_shape)
    
    # Preprocess input for VGG19 using Rescaling layer
    # VGG19 expects inputs scaled to 0-255 range with mean subtraction
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)
    
    # Pass through base model
    x = base_model(x, training=True)
    
    # Classification head
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion')
    
    return model


def build_vgg_from_grayscale(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1),
    num_classes: int = NUM_CLASSES,
    trainable_layers: int = 4,
    dropout_rate: float = 0.5
) -> Model:
    """
    Build VGG-19 model that accepts grayscale input.
    Converts grayscale to RGB internally.
    
    Args:
        input_shape: Input shape for grayscale images
        num_classes: Number of emotion classes
        trainable_layers: Number of top layers to make trainable
        dropout_rate: Dropout rate
        
    Returns:
        Keras model
    """
    # Load pre-trained VGG19
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=(*IMAGE_SIZE_TRANSFER, 3)
    )
    
    # Freeze base layers
    for layer in base_model.layers:
        layer.trainable = False
    
    # Unfreeze top layers
    for layer in base_model.layers[-trainable_layers:]:
        layer.trainable = True
    
    # Input for grayscale image
    inputs = Input(shape=input_shape)
    
    # Convert grayscale to RGB by repeating channels
    x = tf.keras.layers.Concatenate()([inputs, inputs, inputs])
    
    # Preprocess for VGG19 using Rescaling layer
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(x)
    
    # Base model
    x = base_model(x, training=True)
    
    # Classification head
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_grayscale')
    
    return model


def build_vgg_with_flatten(
    input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
    num_classes: int = NUM_CLASSES,
    dropout_rate: float = 0.5
) -> Model:
    """
    Alternative VGG-19 architecture using Flatten instead of GAP.
    This is closer to the original VGG architecture.
    
    Args:
        input_shape: Input image shape
        num_classes: Number of emotion classes
        dropout_rate: Dropout rate
        
    Returns:
        Keras model
    """
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze base model
    for layer in base_model.layers:
        layer.trainable = False
    
    inputs = Input(shape=input_shape)
    x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs)
    x = base_model(x, training=False)
    
    # VGG-style classification head
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_flatten')
    
    return model


def freeze_base_model(model: Model) -> Model:
    """
    Freeze all layers in the base VGG model.
    
    Args:
        model: VGG emotion model
        
    Returns:
        Model with frozen base
    """
    for layer in model.layers:
        if 'vgg' in layer.name.lower():
            layer.trainable = False
    return model


def unfreeze_top_blocks(model: Model, num_blocks: int = 1) -> Model:
    """
    Unfreeze top convolutional blocks of VGG for fine-tuning.
    VGG19 has 5 blocks. Block 5 has 4 conv layers.
    
    Args:
        model: VGG emotion model
        num_blocks: Number of blocks to unfreeze from top
        
    Returns:
        Model with partially unfrozen base
    """
    # Block layer counts: block1=2, block2=2, block3=4, block4=4, block5=4
    block_layers = {5: 4, 4: 4, 3: 4, 2: 2, 1: 2}
    
    layers_to_unfreeze = sum([block_layers[i] for i in range(6 - num_blocks, 6)])
    
    for layer in model.layers:
        if 'vgg' in layer.name.lower():
            for vgg_layer in layer.layers[-layers_to_unfreeze:]:
                if 'conv' in vgg_layer.name:
                    vgg_layer.trainable = True
    
    return model


def get_model_config() -> dict:
    """
    Get the default model configuration.
    
    Returns:
        Dictionary with model configuration
    """
    return {
        "name": "VGG-19",
        "input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB),
        "num_classes": NUM_CLASSES,
        "expected_accuracy": "68-75%",
        "training_time": "~60 minutes (GPU)",
        "parameters": "~20M",
        "base_model": "VGG-19 (ImageNet)"
    }


if __name__ == "__main__":
    # Build and display model summary
    print("Building VGG-19 model...")
    model = build_vgg_model()
    
    # Count trainable parameters
    trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
    non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])
    
    print(f"\nTotal parameters: {trainable + non_trainable:,}")
    print(f"Trainable parameters: {trainable:,}")
    print(f"Non-trainable parameters: {non_trainable:,}")
    
    print("\nModel configuration:")
    config = get_model_config()
    for key, value in config.items():
        print(f"  {key}: {value}")