notrito commited on
Commit
b6e95b3
·
verified ·
1 Parent(s): a2f3682

Upload model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. model.py +179 -0
model.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LoRA (Low-Rank Adaptation) implementation for convolutional layers.
3
+ """
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from torchvision import models
9
+
10
+
11
+ class LoRALayer(nn.Module):
12
+ """
13
+ LoRA (Low-Rank Adaptation) wrapper for convolutional layers.
14
+
15
+ Args:
16
+ original_layer: The Conv2d layer to adapt
17
+ rank: LoRA rank (default=8)
18
+ - Lower rank (4): Fewer parameters, less overfitting risk, less capacity
19
+ - Medium rank (8-16): Balanced trade-off (recommended for most tasks)
20
+ - Higher rank (32+): More capacity but approaches full fine-tuning
21
+
22
+ For small datasets (<1000 images), rank=8 provides sufficient
23
+ adaptation capacity while keeping parameters low (~2% of original layer).
24
+ """
25
+
26
+ def __init__(self, original_layer, rank=8):
27
+ super().__init__()
28
+ self.original_layer = original_layer
29
+ self.rank = rank
30
+
31
+ # Get dimensions from original layer
32
+ out_channels = original_layer.out_channels
33
+ in_channels = original_layer.in_channels
34
+ kernel_size = original_layer.kernel_size
35
+
36
+ # LoRA matrices: A (down-projection) and B (up-projection)
37
+ # A reduces dimensions: in_channels -> rank
38
+ # Initialized with small random values to break symmetry
39
+ self.lora_A = nn.Parameter(
40
+ torch.randn(rank, in_channels, *kernel_size) * 0.01
41
+ )
42
+
43
+ # B expands dimensions: rank -> out_channels
44
+ # Initialized to zeros so LoRA starts as identity (preserves pretrained weights)
45
+ # This initialization strategy follows the original LoRA paper
46
+ self.lora_B = nn.Parameter(
47
+ torch.zeros(out_channels, rank, 1, 1)
48
+ )
49
+
50
+ # Freeze original weights (preserve ImageNet knowledge)
51
+ self.original_layer.weight.requires_grad = False
52
+ if self.original_layer.bias is not None:
53
+ self.original_layer.bias.requires_grad = False
54
+
55
+ def forward(self, x):
56
+ """
57
+ Forward pass combining original frozen weights with LoRA adaptation.
58
+
59
+ Mathematical formulation:
60
+ output = W_frozen * x + (B * (A * x))
61
+
62
+ where * denotes convolution operation.
63
+ """
64
+ # Original forward pass (frozen pretrained weights)
65
+ original_output = self.original_layer(x)
66
+
67
+ # LoRA adaptation pathway (low-rank decomposition)
68
+ # Step 1: Down-project with A (in_channels → rank)
69
+ lora_output = F.conv2d(
70
+ x,
71
+ self.lora_A,
72
+ stride=self.original_layer.stride,
73
+ padding=self.original_layer.padding
74
+ )
75
+
76
+ # Step 2: Up-project with B (rank → out_channels)
77
+ # These two sequential convolutions approximate a low-rank adaptation
78
+ lora_output = F.conv2d(lora_output, self.lora_B)
79
+
80
+ # Combine: W*x + (B*(A*x)) where * denotes convolution
81
+ return original_output + lora_output
82
+
83
+
84
+ def get_model(num_classes=2, pretrained=True):
85
+ """
86
+ Load ResNet34 with optional pretrained weights.
87
+
88
+ Args:
89
+ num_classes: Number of output classes
90
+ pretrained: Whether to load ImageNet pretrained weights
91
+
92
+ Returns:
93
+ ResNet34 model
94
+ """
95
+ if pretrained:
96
+ model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
97
+ else:
98
+ model = models.resnet34(weights=None)
99
+
100
+ # Modify last layer for classification
101
+ num_features = model.fc.in_features
102
+ model.fc = nn.Linear(num_features, num_classes)
103
+
104
+ return model
105
+
106
+
107
+ def apply_lora_to_model(model, target_layers=['layer3', 'layer4'], rank=8):
108
+ """
109
+ Apply LoRA adapters to specific layers in ResNet34.
110
+
111
+ Strategy: We target layer3 and layer4 (high-level feature extractors) because:
112
+ - layer1 & layer2: Extract low-level features (edges, textures) that are
113
+ universal across tasks → keep frozen, no adaptation needed
114
+ - layer3 & layer4: Extract high-level semantic features (objects, contexts)
115
+ that are task-specific → need slight adaptation for smoking detection
116
+ - fc: Brand new classifier head → fully trainable
117
+
118
+ This approach gives us the sweet spot:
119
+ - Full fine-tuning: 21.8M params (overfitting risk with small datasets)
120
+ - Only fc training: ~1K params (may underfit, features not adapted)
121
+ - LoRA on layer3+layer4: ~465K params (2.14% of model, balanced approach)
122
+
123
+ Args:
124
+ model: ResNet34 model
125
+ target_layers: List of layer names to apply LoRA to
126
+ rank: LoRA rank (default=8, adds ~2% params per adapted layer)
127
+
128
+ Returns:
129
+ Number of convolutional layers where LoRA was applied
130
+ """
131
+ # Freeze ALL layers first (preserve ImageNet features)
132
+ for param in model.parameters():
133
+ param.requires_grad = False
134
+
135
+ # Unfreeze only the new classification head
136
+ for param in model.fc.parameters():
137
+ param.requires_grad = True
138
+
139
+ lora_count = 0
140
+
141
+ for layer_name in target_layers:
142
+ # Get the layer dynamically (e.g., model.layer3)
143
+ layer = getattr(model, layer_name)
144
+
145
+ # Iterate through all blocks in this layer
146
+ for block in layer:
147
+ # Find all Conv2d layers in this block dynamically
148
+ for name, module in block.named_modules():
149
+ if isinstance(module, nn.Conv2d):
150
+ # Get parent module and attribute name to replace it
151
+ parent = block
152
+ attr_names = name.split('.')
153
+
154
+ # Navigate to parent of the conv layer
155
+ for attr in attr_names[:-1]:
156
+ parent = getattr(parent, attr)
157
+
158
+ # Check if not already wrapped
159
+ current_module = getattr(parent, attr_names[-1])
160
+ if not isinstance(current_module, LoRALayer):
161
+ # Replace with LoRA-wrapped version
162
+ setattr(parent, attr_names[-1], LoRALayer(current_module, rank=rank))
163
+ lora_count += 1
164
+
165
+ return lora_count
166
+
167
+
168
+ def count_parameters(model):
169
+ """
170
+ Count total and trainable parameters in the model.
171
+
172
+ Returns:
173
+ tuple: (total_params, trainable_params, trainable_percentage)
174
+ """
175
+ total_params = sum(p.numel() for p in model.parameters())
176
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
177
+ trainable_pct = 100. * trainable_params / total_params
178
+
179
+ return total_params, trainable_params, trainable_pct