File size: 6,855 Bytes
e942d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""

LoRA (Low-Rank Adaptation) implementation for convolutional layers.

"""

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models


class LoRALayer(nn.Module):
    """

    LoRA (Low-Rank Adaptation) wrapper for convolutional layers.

    

    Args:

        original_layer: The Conv2d layer to adapt

        rank: LoRA rank (default=8)

              - Lower rank (4): Fewer parameters, less overfitting risk, less capacity

              - Medium rank (8-16): Balanced trade-off (recommended for most tasks)

              - Higher rank (32+): More capacity but approaches full fine-tuning

              

              For small datasets (<1000 images), rank=8 provides sufficient

              adaptation capacity while keeping parameters low (~2% of original layer).

    """
    
    def __init__(self, original_layer, rank=8):
        super().__init__()
        self.original_layer = original_layer
        self.rank = rank
        
        # Get dimensions from original layer
        out_channels = original_layer.out_channels
        in_channels = original_layer.in_channels
        kernel_size = original_layer.kernel_size
        
        # LoRA matrices: A (down-projection) and B (up-projection)
        # A reduces dimensions: in_channels -> rank
        # Initialized with small random values to break symmetry
        self.lora_A = nn.Parameter(
            torch.randn(rank, in_channels, *kernel_size) * 0.01
        )
        
        # B expands dimensions: rank -> out_channels
        # Initialized to zeros so LoRA starts as identity (preserves pretrained weights)
        # This initialization strategy follows the original LoRA paper
        self.lora_B = nn.Parameter(
            torch.zeros(out_channels, rank, 1, 1)
        )
        
        # Freeze original weights (preserve ImageNet knowledge)
        self.original_layer.weight.requires_grad = False
        if self.original_layer.bias is not None:
            self.original_layer.bias.requires_grad = False
    
    def forward(self, x):
        """

        Forward pass combining original frozen weights with LoRA adaptation.

        

        Mathematical formulation:

        output = W_frozen * x + (B * (A * x))

        

        where * denotes convolution operation.

        """
        # Original forward pass (frozen pretrained weights)
        original_output = self.original_layer(x)
        
        # LoRA adaptation pathway (low-rank decomposition)
        # Step 1: Down-project with A (in_channels → rank)
        lora_output = F.conv2d(
            x,
            self.lora_A,
            stride=self.original_layer.stride,
            padding=self.original_layer.padding
        )
        
        # Step 2: Up-project with B (rank → out_channels)
        # These two sequential convolutions approximate a low-rank adaptation
        lora_output = F.conv2d(lora_output, self.lora_B)
        
        # Combine: W*x + (B*(A*x)) where * denotes convolution
        return original_output + lora_output


def get_model(num_classes=2, pretrained=True):
    """

    Load ResNet34 with optional pretrained weights.

    

    Args:

        num_classes: Number of output classes

        pretrained: Whether to load ImageNet pretrained weights

    

    Returns:

        ResNet34 model

    """
    if pretrained:
        model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
    else:
        model = models.resnet34(weights=None)
    
    # Modify last layer for classification
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    
    return model


def apply_lora_to_model(model, target_layers=['layer3', 'layer4'], rank=8):
    """

    Apply LoRA adapters to specific layers in ResNet34.

    

    Strategy: We target layer3 and layer4 (high-level feature extractors) because:

    - layer1 & layer2: Extract low-level features (edges, textures) that are 

      universal across tasks → keep frozen, no adaptation needed

    - layer3 & layer4: Extract high-level semantic features (objects, contexts)

      that are task-specific → need slight adaptation for smoking detection

    - fc: Brand new classifier head → fully trainable

    

    This approach gives us the sweet spot:

    - Full fine-tuning: 21.8M params (overfitting risk with small datasets)

    - Only fc training: ~1K params (may underfit, features not adapted)

    - LoRA on layer3+layer4: ~465K params (2.14% of model, balanced approach)

    

    Args:

        model: ResNet34 model

        target_layers: List of layer names to apply LoRA to

        rank: LoRA rank (default=8, adds ~2% params per adapted layer)

    

    Returns:

        Number of convolutional layers where LoRA was applied

    """
    # Freeze ALL layers first (preserve ImageNet features)
    for param in model.parameters():
        param.requires_grad = False
    
    # Unfreeze only the new classification head
    for param in model.fc.parameters():
        param.requires_grad = True
    
    lora_count = 0
    
    for layer_name in target_layers:
        # Get the layer dynamically (e.g., model.layer3)
        layer = getattr(model, layer_name)
        
        # Iterate through all blocks in this layer
        for block in layer:
            # Find all Conv2d layers in this block dynamically
            for name, module in block.named_modules():
                if isinstance(module, nn.Conv2d):
                    # Get parent module and attribute name to replace it
                    parent = block
                    attr_names = name.split('.')
                    
                    # Navigate to parent of the conv layer
                    for attr in attr_names[:-1]:
                        parent = getattr(parent, attr)
                    
                    # Check if not already wrapped
                    current_module = getattr(parent, attr_names[-1])
                    if not isinstance(current_module, LoRALayer):
                        # Replace with LoRA-wrapped version
                        setattr(parent, attr_names[-1], LoRALayer(current_module, rank=rank))
                        lora_count += 1
    
    return lora_count


def count_parameters(model):
    """

    Count total and trainable parameters in the model.

    

    Returns:

        tuple: (total_params, trainable_params, trainable_percentage)

    """
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    trainable_pct = 100. * trainable_params / total_params
    
    return total_params, trainable_params, trainable_pct